var sourceDoc = GetStreamFromFile("WordDocWithTablesReplacedByHyperlinksVisitor.docx");
var bytes = sourceDoc.Content;
_mainDoc = ReadWordDocument(bytes);
_mainDoc.Sections.Clear();
_mainDoc.RemoveUnusedResources();
// remove all styles except minimal required set
var stylesToKeep = new List<StyleIdentifier>
{
StyleIdentifier.DefaultParagraphFont,
StyleIdentifier.Normal,
StyleIdentifier.NoList,
StyleIdentifier.TableNormal
};
var styles = _mainDoc.Styles.Cast<Style>().ToList();
CleanExistingStyles(_mainDoc, styles, stylesToKeep, (new List<string>() { }).ToArray());
CreateCdmRangeStyles(_mainDoc);
_mainDoc.RemoveUnusedResources();
_mainDoc.PageCount should be 1, but it is larger
private static void CleanExistingStyles(Document doc, List<Style> styles, List<StyleIdentifier> stylesToKeep, string[] existingStyleNames)
{
foreach (var style in styles)
{
if (stylesToKeep.Contains(style.StyleIdentifier) || existingStyleNames.Contains(style.Name))
continue;
DeleteStyle(style);
var remainingStyles = doc.Styles.Cast < Style().ToList();
if (remainingStyles.Any())
{
CleanExistingStyles(doc, remainingStyles, stylesToKeep, existingStyleNames);
break;
}
}
}
private static void DeleteStyle(Style style)
{
Style replaceWith;
switch (style.Type)
{
case StyleType.Paragraph:
replaceWith = style.Styles[StyleIdentifier.Normal];
break;
case StyleType.Character:
replaceWith = style.Styles[StyleIdentifier.DefaultParagraphFont];
break;
case StyleType.Table:
replaceWith = style.Styles[StyleIdentifier.TableNormal];
break;
case StyleType.List:
replaceWith = style.Styles[StyleIdentifier.NoList];
break;
default:
throw new ArgumentOutOfRangeException();
}
var normalName = replaceWith.Name;
replaceWith.Name = style.Name; // changing the style name replaces the style with the same name
replaceWith.Name = normalName;
}
private static void CreateCdmRangeStyles(Document doc)
{
var normalTableStyle = doc.Styles[StyleIdentifier.TableNormal];
var cdmRange1Style = doc.Styles.AddCopy(normalTableStyle);
cdmRange1Style.Name = "CDM Range 1";
cdmRange1Style.BaseStyleName = normalTableStyle.Name;
var cdmRange2Style = doc.Styles.AddCopy(normalTableStyle);
cdmRange2Style.Name = "CDM Range 2";
cdmRange2Style.BaseStyleName = normalTableStyle.Name;
}
private static Document ReadWordDocument(byte[] wordContent)
{
using (var ms = new MemoryStream(wordContent))
{
Document doc;
if (wordContent != null && wordContent.Length 0)
{
doc = new Document(ms);
}
else
{
doc = new Document();
}
return doc;
}
}
Can you please provide an explanation for why this happens?
Thank you