@sophialwx There is no direct way to convert HTML document structure to JSON. However, you can achieve this by iteration the document tree model. For example see the following code:
System.out.println(getJson(new Document("C:\\Temp\\in.docx")));
private static String getJson(Document doc) {
StringBuilder sb = new StringBuilder();
int indent = 1;
sb.append(openJson());
sb.append(openElement(doc, indent++));
for (Section section : doc.getSections()) {
sb.append(openElement(section, indent++));
RefInt referenceToIndent = new RefInt(indent);
handleContainer(sb, section.getBody(), referenceToIndent);
indent = referenceToIndent.get();
sb.append(closeElement(--indent, (section.getNextSibling() == null)));
}
sb.append(closeElement(--indent, true));
sb.append(closeElement(0, true));
return sb.toString();
}
private static void handleContainer(StringBuilder sb, CompositeNode container, RefInt indent) {
if (!container.hasChildNodes())
sb.append(openAndCloseElement(container, indent.get(), (container.getNextSibling() == null)));
else {
sb.append(openElement(container, indent.set(indent.get() + 1) - 1));
for (Node node : (Iterable<Node>) container.getChildNodes()) {
CompositeNode childContainer = as(node, CompositeNode.class);
if (childContainer != null)
handleContainer(sb, childContainer, indent);
else
handleNode(sb, node, indent);
}
sb.append(closeElement(indent.set(indent.get() - 1), (container.getNextSibling() == null)));
}
}
private static void handleNode(StringBuilder sb, Node node, RefInt indent) {
switch (node.getNodeType()) {
case NodeType.RUN:
sb.append(openElement(node, indent.set(indent.get() + 1) - 1));
Run run = as(node, Run.class);
// for shorter output
{
//sb.append(writeElement("Text", run.getText(), indent.get(), false));
//handleFont(sb, run.getFont(), indent, true);
sb.append(writeElement("Text", run.getText(), indent.get(), true));
}
sb.append(closeElement(indent.set(indent.get() - 1), (node.getNextSibling() == null)));
break;
default:
break;
}
}
private static void handleFont(StringBuilder sb, Font font, RefInt indent, boolean isLast) {
sb.append(openElement("Font", indent.set(indent.get() + 1) - 1));
sb.append(writeElement("Name", font.getName(), indent.get(), false));
sb.append(writeElement("Size", font.getSize(), indent.get(), true));
sb.append(closeElement(indent.set(indent.get() - 1), isLast));
}
private static String openJson() {
return "{\n";
}
private static String openElement(Node node, int indent) {
return openElement(getNodeName(node), indent);
}
private static String openElement(String name, int indent) {
return getIndent(indent) + getQuoted(name) + " : {\n";
}
private static String openAndCloseElement(Node node, int indent, boolean isLast) {
return getIndent(indent) + getQuoted(getNodeName(node)) + " : { }" + getComma(isLast) + "\n";
}
private static String writeElement(String name, String value, int indent, boolean isLast) {
return getIndent(indent) + getQuoted(name) + " : " + getQuoted(value) + getComma(isLast) + "\n";
}
private static String writeElement(String name, double value, int indent, boolean isLast) {
return getIndent(indent) + getQuoted(name) + " : " + value + getComma(isLast) + "\n";
}
private static String closeElement(int indent, boolean isLast) {
return getIndent(indent) + "}" + getComma(isLast) + "\n";
}
private static String getNodeName(Node node) {
return NodeType.toString(node.getNodeType());
}
private static String getQuoted(String value) {
return "\"" + value + "\"";
}
private static String getComma(boolean isLastElement) {
return isLastElement ? "" : ",";
}
private static String getIndent(int indent) {
if (indent == 0)
return "";
char[] charArray = new char[indent];
for (int i = 0; i < indent; i++)
charArray[i] = ' ';
return new java.lang.String(charArray);
}
public static <T> T as(Object obj, Class<T> type) {
return type.isInstance(obj) ? (T) obj : null;
}