when i tried to search a particular text in a pdf ,the first 4 text , similar to the search text is highlighted from every page if present and the reset same text are not highlighted.So i have used below code ,please help me to figure out how to highlight all the text present in a pdf.
And also when i use text segment to get text,when i convert i to html the text is not displayed.
{
Document doc = new Document("s1.pdf");
//input string
string c = "a";
//adding /s* for space or line break to get exact match
string formattedLine = Regex.Replace(c, @"\s*", " ").Replace(" ",@"\s*");
HtmlSaveOptions htmlOptions = new HtmlSaveOptions();
htmlOptions.PartsEmbeddingMode = Aspose.Pdf.HtmlSaveOptions.PartsEmbeddingModes.EmbedAllIntoHtml;
htmlOptions.LettersPositioningMethod = Aspose.Pdf.HtmlSaveOptions.LettersPositioningMethods.UseEmUnitsAndCompensationOfRoundingErrorsInCss;
htmlOptions.SplitCssIntoPages = false;
htmlOptions.RasterImagesSavingMode = Aspose.Pdf.HtmlSaveOptions.RasterImagesSavingModes.AsEmbeddedPartsOfPngPageBackground;
htmlOptions.RemoveEmptyAreasOnTopAndBottom = true;
htmlOptions.FontSavingMode = Aspose.Pdf.HtmlSaveOptions.FontSavingModes.SaveInAllFormats;
foreach (Page page in doc.Pages)
{
TextFragmentAbsorber tfa = new TextFragmentAbsorber("(?i)"+formattedLine, new TextSearchOptions(true));
page.Accept(tfa);
TextFragmentCollection tfc = tfa.TextFragments;
if (tfc.Count > 0)
{
Console.WriteLine(tfc.Count);
int j = tfc.Count;
foreach (TextFragment frag in tfc)
{
tfa.TextSearchOptions.IsRegularExpressionUsed = true;
frag.TextState.ForegroundColor = Aspose.Pdf.Color.FromRgb(System.Drawing.Color.Black);
frag.TextState.BackgroundColor = Aspose.Pdf.Color.FromRgb(System.Drawing.Color.Yellow);
}
Document newDocument = new Document();
newDocument.Pages.Add(page);
Console.WriteLine(page.Number);
newDocument.Save( page.Number + ".html", htmlOptions);
}
else
{
Console.WriteLine("Not Found");
}
}
}