Free Support Forum - aspose.com

Different no of pages when generating to pdf as opposed to word

        var sourceDoc = GetStreamFromFile("WordDocWithTablesReplacedByHyperlinksVisitor.docx");
        var bytes = sourceDoc.Content;

        _mainDoc = ReadWordDocument(bytes);

        _mainDoc.Sections.Clear();

        _mainDoc.RemoveUnusedResources();

        // remove all styles except minimal required set
        var stylesToKeep = new List<StyleIdentifier>
    {
        StyleIdentifier.DefaultParagraphFont,
        StyleIdentifier.Normal,
        StyleIdentifier.NoList,
        StyleIdentifier.TableNormal
    };

        var styles = _mainDoc.Styles.Cast<Style>().ToList();

        CleanExistingStyles(_mainDoc, styles, stylesToKeep, (new List<string>() { }).ToArray());

        CreateCdmRangeStyles(_mainDoc);

        _mainDoc.RemoveUnusedResources();

_mainDoc.PageCount should be 1, but it is larger

    private static void CleanExistingStyles(Document doc, List<Style> styles, List<StyleIdentifier> stylesToKeep, string[] existingStyleNames)
    {
        foreach (var style in styles)
        {
            if (stylesToKeep.Contains(style.StyleIdentifier) || existingStyleNames.Contains(style.Name))
                continue;

            DeleteStyle(style);

            var remainingStyles = doc.Styles.Cast<Style>().ToList();

            if (remainingStyles.Any())
            {
                CleanExistingStyles(doc, remainingStyles, stylesToKeep, existingStyleNames);
                break;
            }
        }
    }

    private static void DeleteStyle(Style style)
    {
        Style replaceWith;
        switch (style.Type)
        {
            case StyleType.Paragraph:
                replaceWith = style.Styles[StyleIdentifier.Normal];
                break;
            case StyleType.Character:
                replaceWith = style.Styles[StyleIdentifier.DefaultParagraphFont];
                break;
            case StyleType.Table:
                replaceWith = style.Styles[StyleIdentifier.TableNormal];
                break;
            case StyleType.List:
                replaceWith = style.Styles[StyleIdentifier.NoList];
                break;
            default:
                throw new ArgumentOutOfRangeException();
        }

        var normalName = replaceWith.Name;
        replaceWith.Name = style.Name; // changing the style name replaces the style with the same name 
        replaceWith.Name = normalName;
    }

    private static void CreateCdmRangeStyles(Document doc)
    {
        var normalTableStyle = doc.Styles[StyleIdentifier.TableNormal];

        var cdmRange1Style = doc.Styles.AddCopy(normalTableStyle);
        cdmRange1Style.Name = "CDM Range 1";
        cdmRange1Style.BaseStyleName = normalTableStyle.Name;

        var cdmRange2Style = doc.Styles.AddCopy(normalTableStyle);
        cdmRange2Style.Name = "CDM Range 2";
        cdmRange2Style.BaseStyleName = normalTableStyle.Name;
    }

    private static Document ReadWordDocument(byte[] wordContent)
    {
        using (var ms = new MemoryStream(wordContent))
        {
            Document doc;

            if (wordContent != null && wordContent.Length > 0)
            {
                doc = new Document(ms);
            }
            else
            {
                doc = new Document();
            }

            return doc;
        }
    }

Can you please provide an explanation for why this happens?

Thank you

@ibmromania,

Thanks for your inquiry. Please ZIP and attach your input Word document here for testing. We will investigate the issue on our side and provide you more information.

@tahir.manzoor
I’ve attached the word input.
Word.zip (23.5 KB)

@adrian.anton,

Thanks for sharing the document. We have tested the scenario using shared code example. Your code example removes the content from the document and Document.PageCount property returns 1. Could you please share some more detail about your query what exact you want to achieve using Aspose.Words? We will then provide you more information about your query.

@tahir.manzoor
If you remove this line:
_mainDoc.Sections.Clear();
you will see that the issue is reproducing.
The code that I’m using is pretty much the same as the one in the original post, with a few minor changes:

        var _mainDoc = new Document(@"C:\test.docx");
        _mainDoc.Cleanup();

        // remove all styles except minimal required set
        var stylesToKeep = new List<StyleIdentifier>
        {
            StyleIdentifier.DefaultParagraphFont,
            StyleIdentifier.Normal,
            StyleIdentifier.NoList,
            StyleIdentifier.TableNormal
        };

        var styles = _mainDoc.Styles.Cast<Aspose.Words.Style>().ToList();

        CleanExistingStyles(_mainDoc, styles, stylesToKeep, (new List<string>() { }).ToArray());

        CreateCdmRangeStyles(_mainDoc);

        _mainDoc.Cleanup();

        _mainDoc.Save(@"C:\test1.docx",SaveFormat.Docx);
        _mainDoc.Save(@"C:\test1.pdf",SaveFormat.Pdf);

I also attached another word document.
Thank you

test.zip (33.5 KB)

@adrian.anton,

Thanks for sharing the detail. We have tested the scenario and have managed to reproduce the same issue at our side. For the sake of correction, we have logged this problem in our issue tracking system as WORDSNET-17118. You will be notified via this forum thread once this issue is resolved.

We apologize for your inconvenience.

The issues you have found earlier (filed as WORDSNET-17118) have been fixed in this Aspose.Words for .NET 18.8 update and this Aspose.Words for Java 18.8 update.