I am trying to extract text between particular tags and attributes. For now, I tried to extract for tags. I am reading a ".gexf" file which has XML data inside. Then I am saving this data as a string. Then I am trying to extract text between "nodes" tag. Here is my code so far:
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Main {
private static String filePath = "src/babel.gexf";
public String readFile(String filePath) throws IOException {
BufferedReader br = new BufferedReader(new FileReader(filePath));
try {
StringBuilder sb = new StringBuilder();
String line = br.readLine();
while (line != null) {
sb.append(line);
sb.append("\n");
line = br.readLine();
}
return sb.toString();
} finally {
br.close();
}
}
public void getNodesContent(String content) throws IOException {
final Pattern pattern = Pattern.compile("<nodes>(\\w+)</nodes>", Pattern.MULTILINE);
final Matcher matcher = pattern.matcher(content);
while (matcher.find()) {
System.out.println(matcher.group(1));
}
}
public static void main(String [] args) throws IOException {
Main m = new Main();
String result = m.readFile(filePath);
m.getNodesContent(result);
}
}
Aucun commentaire:
Enregistrer un commentaire