How to read entire content of a word document like a Json/List in C#

Hi Team,

Is there a way to read all the content in the word document (para/ para header and so on) in a single object like Json/ List and if yes will the object have any category to identify the content (Like para header/ Para content/ shape).

Thanks,
Karthikeyan

@Karthik_Test_account There is no built in method to convert document to JSON. However, you can achieve this by inspecting Document Object Model. Here is a simplified code that converts document to JSON:

private static string GetJson(Document doc)
{
    StringBuilder sb = new StringBuilder();
    int indent = 1;
    sb.Append(OpenJson());
    sb.Append(OpenElement(doc, indent++));
    foreach (Section section in doc.Sections)
    {
        sb.Append(OpenElement(section, indent++));
        HandleContainer(sb, section.Body, ref indent);
        sb.Append(CloseElement(--indent, (section.NextSibling == null)));
    }
    sb.Append(CloseElement(--indent, true));
    sb.Append(CloseElement(0, true));
    return sb.ToString();
}

private static void HandleContainer(StringBuilder sb, CompositeNode container, ref int indent)
{
    if (!container.HasChildNodes)
        sb.Append(OpenAndCloseElement(container, indent, (container.NextSibling == null)));
    else
    {
        sb.Append(OpenElement(container, indent++));
        foreach (Node node in container.ChildNodes)
        {
            CompositeNode childContainer = node as CompositeNode;
            if (childContainer != null)
                HandleContainer(sb, childContainer, ref indent);
            else
                HandleNode(sb, node, ref indent);
        }
        sb.Append(CloseElement(--indent, (container.NextSibling == null)));
    }
}

private static void HandleNode(StringBuilder sb, Node node, ref int indent)
{
    switch (node.NodeType)
    {
        case NodeType.Run:
            sb.Append(OpenElement(node, indent++));
            Run run = node as Run;
            // for shorter output 
            {
                //sb.Append(WriteElement(nameof(run.Text), run.Text, indent, false));
                //HandleFont(sb, run.Font, ref indent, true);
                sb.Append(WriteElement(nameof(run.Text), run.Text, indent, true));
            }
            sb.Append(CloseElement(--indent, (node.NextSibling == null)));
            break;
        default:
            break;
    }
}

private static void HandleFont(StringBuilder sb, Font font, ref int indent, bool isLast)
{
    sb.Append(OpenElement("Font", indent++));
    sb.Append(WriteElement(nameof(font.Name), font.Name, indent, false));
    sb.Append(WriteElement(nameof(font.Size), font.Size, indent, true));
    sb.Append(CloseElement(--indent, isLast));
}

private static string OpenJson() { return "{\n"; }
private static string OpenElement(Node node, int indent) { return OpenElement(GetNodeName(node), indent); }
private static string OpenElement(string name, int indent) { return GetIndent(indent) + GetQuoted(name) + " : {\n"; }
private static string OpenAndCloseElement(Node node, int indent, bool isLast)
{
    return GetIndent(indent) + GetQuoted(GetNodeName(node)) + " : { }" + GetComma(isLast) + "\n";
}
private static string WriteElement(string name, string value, int indent, bool isLast)
{
    return GetIndent(indent) + GetQuoted(name) + " : " + GetQuoted(value) + GetComma(isLast) + "\n";
}
private static string WriteElement(string name, double value, int indent, bool isLast)
{
    return GetIndent(indent) + GetQuoted(name) + " : " + value + GetComma(isLast) + "\n";
}
private static string CloseElement(int indent, bool isLast) { return GetIndent(indent) + "}" + GetComma(isLast) + "\n"; }
private static string GetNodeName(Node node) { return node.NodeType.ToString(); }
private static string GetQuoted(string value) { return "\"" + value + "\""; }
private static string GetComma(bool isLastElement) { return isLastElement ? string.Empty : ","; }
private static string GetIndent(int indent) { return new string(' ', indent * 2); }

Alternatively you can use DocumentVisitor to write document structure into your own custom format.