Render paragraph as an image C++

I tried to do the same thing using Aspose.Words C++,

try {
    // Load the document
    auto doc1 = MakeObject<Document>(u"<FilePath>");
    if (doc1->get_Sections()->get_Count() == 0) {
        std::cerr << "The document has no sections." << std::endl;
        return -1; // Exit if only one section is found which is created by Aspose.Words for using evaluation mode
    }
    for (int s = 0; s < doc1->get_Sections()->get_Count(); ++s) {
        auto section = doc1->get_Sections()->idx_get(s);
        auto paragraphs = section->get_Body()->get_Paragraphs();

        for (int i = 0; i < paragraphs->get_Count(); ++i) {
            auto p = paragraphs->idx_get(i);

            // Skip if paragraph is null
            if (p == nullptr) {
                std::cerr << "Paragraph is null" << std::endl;
                continue;
            }

            // Clone the document to create a temporary document
            auto tmp = System::ExplicitCast<Document>(doc1->Clone(false));
            if (tmp == nullptr) {
                std::cerr << "Failed to clone the document." << std::endl;
                continue;
            }

            tmp->AppendChild(tmp->ImportNode(p->GetAncestor(NodeType::Section), false));
            tmp->get_FirstSection()->EnsureMinimum();
            tmp->get_FirstSection()->get_Body()->PrependChild(tmp->ImportNode(p, true));

            // Wrap the paragraph with a bookmark to calculate its bounds
            String tmpBookmarkName = u"tmp";
            tmp->get_FirstSection()->get_Body()->get_FirstParagraph()->PrependChild(MakeObject<BookmarkStart>(tmp, tmpBookmarkName));
            tmp->get_FirstSection()->get_Body()->get_FirstParagraph()->AppendChild(MakeObject<BookmarkEnd>(tmp, tmpBookmarkName));

            // Use LayoutCollector and LayoutEnumerator to calculate paragraph bounds
            auto collector = MakeObject<LayoutCollector>(tmp);
            auto enumerator = MakeObject<LayoutEnumerator>(tmp);

            enumerator->set_Current(collector->GetEntity(tmp->get_FirstSection()->get_Body()->get_FirstParagraph()->get_FirstChild()));
            while (enumerator->get_Type() != LayoutEntityType::Line)
                enumerator->MoveParent();

            RectangleF paraBounds = enumerator->get_Rectangle();

            enumerator->set_Current(collector->GetEntity(tmp->get_FirstSection()->get_Body()->get_FirstParagraph()->get_LastChild()));
            while (enumerator->get_Type() != LayoutEntityType::Line)
                enumerator->MoveParent();

            paraBounds = RectangleF::Union(paraBounds, enumerator->get_Rectangle());

            // Adjust page size and margins
            auto ps = tmp->get_FirstSection()->get_PageSetup();
            ps->set_LeftMargin(0);
            ps->set_RightMargin(0);
            ps->set_TopMargin(0);
            ps->set_BottomMargin(0);
            ps->set_PageWidth(paraBounds.get_Width());
            ps->set_PageHeight(paraBounds.get_Height());

            // Update the document's page layout to apply changes
            tmp->UpdatePageLayout();

            // Generate file name and check if not empty
            auto filename = String::Format(u"D:\\DocX\\para_{0}_{1}.png", s, i);
            if (filename.IsEmpty()) {
                std::cerr << "Filename is empty" << std::endl;
                continue;
            }

            // Save the result as an image
            tmp->Save(filename,SaveFormat::Png);
        }
    }
}
catch (Exception& e) {
    std::cerr << "An exception occurred: " << e.what() << std::endl;
}

I was able to get the first section alone which was created by aspose.words as I am using the Evaluation mode.

This was the output png that was returned, but I expected all the texts inside the document to be returned as this, but its returning Error: System::ArgumentNullException: Value can not be null.: value, I know there must be at least one section in every document but why am I getting null as argument?

@vignesh527 Please try using an unique bookmark name for each paragraph. Please see the following modified code that works fine on my side:

// Load the document
auto doc1 = System::MakeObject<Document>(u"C:\\Temp\\in.docx");
if (doc1->get_Sections()->get_Count() == 0) {
    std::cerr << "The document has no sections." << std::endl;
    return -1; // Exit if only one section is found which is created by Aspose.Words for using evaluation mode
}
for (int s = 0; s < doc1->get_Sections()->get_Count(); ++s) {
    auto section = doc1->get_Sections()->idx_get(s);
    auto paragraphs = section->get_Body()->get_Paragraphs();

    for (int i = 0; i < paragraphs->get_Count(); ++i) {
        auto p = paragraphs->idx_get(i);

        // Skip if paragraph is null
        if (p == nullptr) {
            std::cerr << "Paragraph is null" << std::endl;
            continue;
        }

        // Clone the document to create a temporary document
        auto tmp = System::ExplicitCast<Document>(doc1->Clone(false));
        if (tmp == nullptr) {
            std::cerr << "Failed to clone the document." << std::endl;
            continue;
        }

        tmp->AppendChild(tmp->ImportNode(p->GetAncestor(NodeType::Section), false));
        tmp->get_FirstSection()->EnsureMinimum();
        tmp->get_FirstSection()->get_Body()->PrependChild(tmp->ImportNode(p, true));

        // Wrap the paragraph with a bookmark to calculate its bounds
        String tmpBookmarkName = String::Format(u"tmp_{0}_{1}.png", s, i);
        tmp->get_FirstSection()->get_Body()->get_FirstParagraph()->PrependChild(System::MakeObject<BookmarkStart>(tmp, tmpBookmarkName));
        tmp->get_FirstSection()->get_Body()->get_FirstParagraph()->AppendChild(System::MakeObject<BookmarkEnd>(tmp, tmpBookmarkName));

        // Use LayoutCollector and LayoutEnumerator to calculate paragraph bounds
        auto collector = System::MakeObject<LayoutCollector>(tmp);
        auto enumerator = System::MakeObject<LayoutEnumerator>(tmp);

        enumerator->set_Current(collector->GetEntity(tmp->get_FirstSection()->get_Body()->get_FirstParagraph()->get_FirstChild()));
        while (enumerator->get_Type() != LayoutEntityType::Line)
            enumerator->MoveParent();

        auto paraBounds = enumerator->get_Rectangle();

        enumerator->set_Current(collector->GetEntity(tmp->get_FirstSection()->get_Body()->get_FirstParagraph()->get_LastChild()));
        while (enumerator->get_Type() != LayoutEntityType::Line)
            enumerator->MoveParent();

        paraBounds = RectangleF::Union(paraBounds, enumerator->get_Rectangle());

        // Adjust page size and margins
        auto ps = tmp->get_FirstSection()->get_PageSetup();
        ps->set_LeftMargin(0);
        ps->set_RightMargin(0);
        ps->set_TopMargin(0);
        ps->set_BottomMargin(0);
        ps->set_PageWidth(paraBounds.get_Width());
        ps->set_PageHeight(paraBounds.get_Height());

        // Update the document's page layout to apply changes
        tmp->UpdatePageLayout();

        // Generate file name and check if not empty
        auto filename = String::Format(u"C:\\Temp\\para_{0}_{1}.png", s, i);
        if (filename.IsEmpty()) {
            std::cerr << "Filename is empty" << std::endl;
            continue;
        }

        // Save the result as an image
        tmp->Save(filename, SaveFormat::Png);
    }
}

I tried giving each bookmarks unique names as you mentioned above, I still received the same Argument value null error. I debugged it, for the first section which is the watermark created by Aspose.Words it is working fine and generating a .png file at the specified location. For the next iteration after it reaches line 38
enumerator->set_Current(collector->GetEntity(tmp->get_FirstSection()->get_Body()->get_FirstParagraph()->get_FirstChild()));
it is returning an exception.

@vignesh527 Could you please attach your input document here for testing? we will check the issue and provide you more information.

@vignesh527 Thank you for additional information. The problem does not occur with licensed version of Aspose.Words. If you would like to test Aspose.Words without evaluation version limitations, you can request a free 30-days temporary license. Please see our documentation to learn more about licensing:
https://docs.aspose.com/words/cpp/licensing/

Sure, thanks for the update. I am performing an analysis to buy Aspose.Words for my project.From the initial analysis Aspose.Words seems to be better than other tools, I am trying to verify if we can achieve certain things with Aspose. I just wanted to know if I succeeded with the above script, will I be able to sort the contents in the document based on their X and Y axes values and extract them? Are there any possibilities to convert frames to paragraphs or text boxes, drawing objects or floating objects to just paragraphs using Aspose.Words? I am dealing with extracting texts from .DOCX files which contain complex structure and floating objects.

@vignesh527 There is no direct way to convert frames to shapes, but Aspose.Words provides access to frames properties via FrameFormat class. The above code demonstrates how to use LayoutCollector and LayoutEnumerator to calculate paragraphs’ bounding boxes. But there is no way to sort content in visual order using Aspose.Words.

1 Like

Thanks for the update!

1 Like