Hi ,
we have a 30 page document with pagenumber in the footer. after deleting few pages, the pagenumber in the footer is the same as before instead of continuing.
can you please advise.
this is our code to remove blank pages:
private MemoryStream RemoveBlankPages(Document document)
{
var stream = new MemoryStream();
var listOfBlankPageNumbers = new List<int>();
int pagecount = document.PageCount;
for (int i = 0; i < pagecount; i++)
{
var page = document.ExtractPages(i, 1);
string pagetext = page.FirstSection.Body.ToString(SaveFormat.Text);
if (string.IsNullOrWhiteSpace(pagetext))
{
listOfBlankPageNumbers.Add(i);
}
}
if (listOfBlankPageNumbers?.Any() == true)
{
listOfBlankPageNumbers.Insert(0, -1);
// Add all the non-empty pages to the final document
Document nonEmptyDocument = (Document)document.Clone(false);
nonEmptyDocument.RemoveAllChildren();
int index;
int count;
for (int iCount = 1; iCount < listOfBlankPageNumbers.Count; iCount++)
{
index = (int)listOfBlankPageNumbers[iCount - 1] + 1;
count = (int)listOfBlankPageNumbers[iCount] - index;
if (count > 0)
nonEmptyDocument.AppendDocument(document.ExtractPages(index, count), ImportFormatMode.UseDestinationStyles);
}
if (document.PageCount > (int)listOfBlankPageNumbers.Last() + 1)
{
index = listOfBlankPageNumbers.Last() + 1;
count = document.PageCount - index;
nonEmptyDocument.AppendDocument(document.ExtractPages(index, count), ImportFormatMode.UseDestinationStyles);
}
nonEmptyDocument.Save(stream, SaveFormat.Docx);
}
else
{
document.Save(stream, SaveFormat.Docx);
}
return stream;
}
@randomuser123 the following code does what you want, but it fixes the issue with the page numbers and evaluates if there is a shape on the page to avoid including it in the blank pages array.:
Document doc = new Document(@"C:\Temp\input.docx");
// A List will hold blank page numbers
ArrayList emptyPageNumbers = new ArrayList();
emptyPageNumbers.Add(-1);
// Extract each page as a separate Word document
int totalPages = doc.PageCount;
for (int i = 0; i < totalPages; i++)
{
Document pageDoc = doc.ExtractPages(i, 1);
// Get text representation of this Page and total count of Shapes
int shapeCount = 0;
string textOfPage = "";
foreach (Section section in pageDoc.Sections)
{
// Lets not consider the content of Headers and Footers
textOfPage += section.Body.ToString(SaveFormat.Text);
shapeCount += section.Body.GetChildNodes(NodeType.Shape, true).Count;
}
// if text_of_Page is empty and does not contain any Shape nodes then consider this Page is blank
if (string.IsNullOrEmpty(textOfPage.Trim()) && shapeCount == 0)
emptyPageNumbers.Add(i);
}
emptyPageNumbers.Add(totalPages);
// Concatenate documents with non-empty pages again
Document final_Document = (Document)doc.Clone(false);
final_Document.RemoveAllChildren();
for (int i = 1; i < emptyPageNumbers.Count; i++)
{
int index = (int)emptyPageNumbers[i - 1] + 1;
int count = (int)emptyPageNumbers[i] - index;
if (count > 0)
{
final_Document.AppendDocument(doc.ExtractPages(index, count), ImportFormatMode.KeepSourceFormatting);
// Insert everithing in the same section to avoid Section Breaks
if(final_Document.Sections.Count > 1)
{
final_Document.FirstSection.AppendContent(final_Document.LastSection);
final_Document.LastSection.Remove();
}
// Fix page numbering if it's present
final_Document.LastSection.PageSetup.RestartPageNumbering = true;
}
}
final_Document.Save(@"C:\Temp\output.docx");
hi @eduardo.canal,
thank you, your code works fine. But it does not preserve page breaks from the original document, can you please advise
@randomuser123 What you need is simply set RestartPageNumbering
to false
in the first section of each extracted sub document.
Document subDoc = document.ExtractPages(index, count);
subDoc.FirstSection.PageSetup.RestartPageNumbering = false;
Please see the modified version of your original method:
private static MemoryStream RemoveBlankPages(Document document)
{
var stream = new MemoryStream();
var listOfBlankPageNumbers = new List<int>();
int pagecount = document.PageCount;
for (int i = 0; i < pagecount; i++)
{
var page = document.ExtractPages(i, 1);
string pagetext = page.FirstSection.Body.ToString(SaveFormat.Text);
if (string.IsNullOrWhiteSpace(pagetext))
{
listOfBlankPageNumbers.Add(i);
}
}
if (listOfBlankPageNumbers?.Any() == true)
{
listOfBlankPageNumbers.Insert(0, -1);
// Add all the non-empty pages to the final document
Document nonEmptyDocument = (Document)document.Clone(false);
nonEmptyDocument.RemoveAllChildren();
int index;
int count;
for (int iCount = 1; iCount < listOfBlankPageNumbers.Count; iCount++)
{
index = (int)listOfBlankPageNumbers[iCount - 1] + 1;
count = (int)listOfBlankPageNumbers[iCount] - index;
if (count > 0)
{
Document subDoc = document.ExtractPages(index, count);
subDoc.FirstSection.PageSetup.RestartPageNumbering = false;
nonEmptyDocument.AppendDocument(subDoc, ImportFormatMode.UseDestinationStyles);
}
}
if (document.PageCount > (int)listOfBlankPageNumbers.Last() + 1)
{
index = listOfBlankPageNumbers.Last() + 1;
count = document.PageCount - index;
Document subDoc = document.ExtractPages(index, count);
subDoc.FirstSection.PageSetup.RestartPageNumbering = false;
nonEmptyDocument.AppendDocument(subDoc, ImportFormatMode.UseDestinationStyles);
}
nonEmptyDocument.Save(stream, SaveFormat.Docx);
}
else
{
document.Save(stream, SaveFormat.Docx);
}
return stream;
}
Thanks @alexey.noskov, works like a charm! Appreciate it
1 Like