Are you able to suggest why paragraphs imported using Document Library is sometimes split into multiple Run instead of just 1?
Business Case and Benefits of Product should be 1 Run, but it is being converted into 5 Text Run instead.
I am extracting that information using the following Code:
private static string GetHeader(string id, Paragraph paragraph, Dictionary<string,string> headerToId)
{
var headerText = "";
var newId = id + "#P_0";
// Console.WriteLine("Inspecting " + paragraph.GetText() + " ");
if (paragraph.ParagraphFormat.Alignment != ParagraphAlignment.Justify &&
paragraph.ParagraphFormat.Alignment != ParagraphAlignment.Left)
{
// Console.WriteLine("Rejected for Alignment");
return "";
}
if (paragraph.Count == 0)
{
// Console.WriteLine("Rejected for Child Count");
return "";
}
for (var i = 0; i < paragraph.Count; i++)
{
var paragraphChild = paragraph.GetChildNodes(NodeType.Any, false)[i];
if (paragraphChild is BookmarkStart || paragraphChild is BookmarkEnd)
{
continue;
}
if (!(paragraphChild is Run))
{
// Console.WriteLine("Rejected for Child is Not RUN");
// Console.WriteLine(paragraphChild.NodeType);
return "";
}
var run = (Run)paragraphChild;
if (!run.Font.Bold)
{
// Console.WriteLine("Rejected for Bold");
return "";
}
if (run.Text.Trim().Length == 0 && paragraph.Count == 1)
{
//Required to Remove Empty Space Paragraph being detected as Header
return "";
}
// Console.WriteLine(run.Text);
if (run.Text.ToUpper().Trim() != run.Text.Trim())
{
// Console.WriteLine("Rejected for Trim");
return "";
}
headerText += run.Text;
}
headerToId[headerText.Trim()] = newId;
return headerText.Trim();
}