I am currently working on converting Word documents to HTML format. During this process, I am unlinking page fields to ensure that the page numbers in the HTML output match those in the original Word document. However, I am encountering errors when attempting to unlink page fields in certain documents.
Could you please provide insight into why this issue is occurring and suggest potential solutions to resolve it?
Snippet :
private void FixPageNumber(ref Document page)
{
try
{
// unlink page field
page.Range.Fields.Where(f => f.Type == FieldType.FieldPage).ToList()
.ForEach(f => { f.Update(); f.Unlink(); });
}
catch (Exception ex)
{
}
}
private void CreateSectionBreak(ref Document sourceDoc, ref Document destDoc)
{
try
{
for (int i = 0; i < sourceDoc.PageCount; i++)
{
Document page = sourceDoc.ExtractPages(i, 1);
// unlink page field
FixPageNumber(ref page);
destDoc.AppendDocument(page, ImportFormatMode.UseDestinationStyles);
destDoc.LastSection.PageSetup.RestartPageNumbering = false;
}
}
catch (Exception ex)
{
}
}
var wordloadoption = new Aspose.Words.Loading.LoadOptions() { LoadFormat = LoadFormat.Docx };
Aspose.Words.Document doc = new Aspose.Words.Document(@"D:\Temp\FormattingIssue\HiddenBookmark.docx", wordloadoption);
Aspose.Words.Document tempDoc = (Document)doc.Clone(false);
tempDoc.RemoveAllChildren();
CreateSectionBreak(ref doc, ref tempDoc);
Document :
PageNumberIssue.docx (1.1 MB)
@AlpeshChaudhariDev Please try modifying your code like this:
private void FixPageNumber(ref Document page)
{
try
{
page.UpdateFields();
// unlink page field
page.Range.Fields.Where(f => f.Type == FieldType.FieldPage).ToList()
.ForEach(f => { f.Unlink(); });
}
catch (Exception ex)
{
}
}
private void CreateSectionBreak(ref Document sourceDoc, ref Document destDoc)
{
try
{
for (int i = 0; i < sourceDoc.PageCount; i++)
{
Document page = sourceDoc.ExtractPages(i, 1);
// Remove section breaks in the page.
while (page.Sections.Count > 1)
{
page.FirstSection.AppendContent(page.Sections[1]);
page.Sections[1].Remove();
}
// Reset section start of the section.
page.FirstSection.PageSetup.SectionStart = SectionStart.NewPage;
// unlink page field
FixPageNumber(ref page);
destDoc.AppendDocument(page, ImportFormatMode.UseDestinationStyles);
}
}
catch (Exception ex)
{
}
}
Thanks @alexey.noskov its working fine. However,I’m facing facing a problem when i set the header footer content into document body. I am calling the SetHeaderFooterIntoBody
function after the FixPageNumber
then i have got duplicated page number in html output. I am not able Indenity why this happening can you give me suggestion for this how can i resolve this issue.
Here is my duplicated page number :
104, 107,109,111
Snippet for SetHeaderFooterIntoBody :
private void SetHeaderFooterIntoBody(ref Document pageDoc)
{
// Check whether header/footer displaid
// This might be either primary or field page header/footer.
HeaderFooter displayedHeader = pageDoc.FirstSection.PageSetup.DifferentFirstPageHeaderFooter ?
pageDoc.FirstSection.HeadersFooters[HeaderFooterType.HeaderFirst] :
pageDoc.FirstSection.HeadersFooters[HeaderFooterType.HeaderPrimary];
HeaderFooter displayedFooter = pageDoc.LastSection.PageSetup.DifferentFirstPageHeaderFooter ?
pageDoc.LastSection.HeadersFooters[HeaderFooterType.FooterFirst] :
pageDoc.LastSection.HeadersFooters[HeaderFooterType.FooterPrimary];
// Move content into the main body.
if (displayedHeader != null)
{
while (displayedHeader.HasChildNodes)
pageDoc.FirstSection.Body.PrependChild(displayedHeader.LastChild);
}
if (displayedFooter != null)
{
while (displayedFooter.HasChildNodes)
pageDoc.LastSection.Body.AppendChild(displayedFooter.FirstChild);
}
}
Source Document :
PageNumberIssue.docx (1.1 MB)
HTML Output :
PageNumberIssue.zip (589.1 KB)
@AlpeshChaudhariDev Unfortunately, I cannot reproduce the problem on my side using the latest 24.9 version of Aspose.Words. As I can see you are using an old 21.8 version. Please try using the latest version.
I have checked the latest version (24.9) of Aspose Words, and I found that this issue is still not resolved in the new version.
My Snippet for Convert Word to HTML :
string strLicenscePath = @"D:\Aspose\ConsoleApps\AsposeWord\License\Aspose.Wordsfor.NET.lic";
Aspose.Words.License licWord = new Aspose.Words.License();
licWord.SetLicense(strLicenscePath);
Aspose.Words.Saving.HtmlSaveOptions options = new Aspose.Words.Saving.HtmlSaveOptions(Aspose.Words.SaveFormat.Html)
{
ExportImagesAsBase64 = true,
ExportXhtmlTransitional = true,
Encoding = System.Text.Encoding.UTF8,
ExportHeadersFootersMode = Aspose.Words.Saving.ExportHeadersFootersMode.None,
ExportPageMargins = true,
ExportPageSetup = true,
UseHighQualityRendering = true,
ResolveFontNames = true,
AllowEmbeddingPostScriptFonts = true,
PrettyFormat = true,
SaveFormat = SaveFormat.Html,
ExportTocPageNumbers = true,
CssStyleSheetType = CssStyleSheetType.Inline
};
var wordloadoption = new Aspose.Words.Loading.LoadOptions() { LoadFormat = LoadFormat.Docx };
Aspose.Words.Document doc = new Aspose.Words.Document(@"D:\Temp\FormattingIssue\CosanFormattingIssue.docx", wordloadoption);
doc.Cleanup();
doc.WebExtensionTaskPanes.Clear();
doc.RemoveExternalSchemaReferences();
doc.RemoveMacros();
Aspose.Words.Document tempDoc = (Document)doc.Clone(false);
tempDoc.RemoveAllChildren();
tempDoc.FontInfos.EmbedTrueTypeFonts = true;
tempDoc.FontInfos.EmbedSystemFonts = true;
tempDoc.FontInfos.SaveSubsetFonts = true;
CreateSectionBreak(ref doc, ref tempDoc);
tempDoc.Save(@"D:\Temp\FormattingIssue\CosanFormattingIssue.html", options);
void CreateSectionBreak(ref Document sourceDoc, ref Document destDoc)
{
try
{
for (int i = 0; i < sourceDoc.PageCount; i++)
{
Document page = sourceDoc.ExtractPages(i, 1);
// Remove section breaks in the page.
while (page.Sections.Count > 1)
{
page.FirstSection.AppendContent(page.Sections[1]);
page.Sections[1].Remove();
}
// Reset section start of the section.
page.FirstSection.PageSetup.SectionStart = SectionStart.NewPage;
// unlink page field
FixPageNumber(ref page);
SetHeaderFooterIntoBody(ref page);
// Remove headers/footers since we already moved their content to main body.
page.GetChildNodes(NodeType.HeaderFooter, true).Clear();
//RemoveSectionBreak(ref page);
destDoc.AppendDocument(page, ImportFormatMode.UseDestinationStyles);
//destDoc.LastSection.PageSetup.RestartPageNumbering = true;
}
}
catch (Exception ex)
{
}
}
void FixPageNumber(ref Document page)
{
try
{
page.UpdateFields();
// unlink page field
page.Range.Fields.Where(f => f.Type == FieldType.FieldPage).ToList()
.ForEach(f => { f.Update(); f.Unlink(); });
}
catch (Exception ex)
{
}
}
void SetHeaderFooterIntoBody(ref Document pageDoc)
{
// Check whether header/footer displaid
// This might be either primary or field page header/footer.
HeaderFooter displayedHeader = pageDoc.FirstSection.PageSetup.DifferentFirstPageHeaderFooter ?
pageDoc.FirstSection.HeadersFooters[HeaderFooterType.HeaderFirst] :
pageDoc.FirstSection.HeadersFooters[HeaderFooterType.HeaderPrimary];
HeaderFooter displayedFooter = pageDoc.LastSection.PageSetup.DifferentFirstPageHeaderFooter ?
pageDoc.LastSection.HeadersFooters[HeaderFooterType.FooterFirst] :
pageDoc.LastSection.HeadersFooters[HeaderFooterType.FooterPrimary];
// Move content into the main body.
if (displayedHeader != null)
{
while (displayedHeader.HasChildNodes)
pageDoc.FirstSection.Body.PrependChild(displayedHeader.LastChild);
}
if (displayedFooter != null)
{
while (displayedFooter.HasChildNodes)
pageDoc.LastSection.Body.AppendChild(displayedFooter.FirstChild);
}
}
Please check this HTML Output duplicated page numbers :
51,53,72,144 and more.
Source Document :
CosanFormattingIssue.docx (1.5 MB)
HTML Output :
CosanFormattingIssue.zip (751.0 KB)
@AlpeshChaudhariDev Probably in your case it would be better to simply use page index instead of the PAGE
field value. Please try using the following modified code:
void CreateSectionBreak(ref Document sourceDoc, ref Document destDoc)
{
try
{
for (int i = 0; i < sourceDoc.PageCount; i++)
{
Document page = sourceDoc.ExtractPages(i, 1);
// Remove section breaks in the page.
while (page.Sections.Count > 1)
{
page.FirstSection.AppendContent(page.Sections[1]);
page.Sections[1].Remove();
}
// Reset section start of the section.
page.FirstSection.PageSetup.SectionStart = SectionStart.NewPage;
// unlink page field
FixPageNumber(ref page, i + 1);
SetHeaderFooterIntoBody(ref page);
// Remove headers/footers since we already moved their content to main body.
page.GetChildNodes(NodeType.HeaderFooter, true).Clear();
//RemoveSectionBreak(ref page);
destDoc.AppendDocument(page, ImportFormatMode.UseDestinationStyles);
//destDoc.LastSection.PageSetup.RestartPageNumbering = true;
}
}
catch (Exception ex)
{
}
}
void FixPageNumber(ref Document page, int pageNumber)
{
try
{
DocumentBuilder builder = new DocumentBuilder(page);
page.Range.Fields.Where(f => f.Type == FieldType.FieldPage).ToList()
.ForEach(f => { builder.MoveToField(f, true); builder.Write(pageNumber.ToString()); f.Remove(); });
}
catch (Exception ex)
{
}
}