Problems
If I read text content from a pdf file using TextAbsorber, the text received is just one line additional to the message of ASPOSE the the tool is Just evaluation copy. The file under test is attached.
The code snippet used is given below.
public bool GetTextUsingTextAbsorber(string SourceFileName, int PageIndex, ref string TextFromFile )
{
try
{
Document pdfDocument = new Document(SourceFileName);
// Create TextAbsorber object to extract text
TextAbsorber textAbsorber = new TextAbsorber();
// Accept the absorber for a particular page
pdfDocument.Pages[PageIndex].Accept(textAbsorber);
// Get the extracted text
TextFromFile = textAbsorber.Text;
return true;
}
catch (Exception ex)
{
MessageBox.Show(ex.Message);
return false;
}
}
The received output is as below.
Evaluation Only. Created with Aspose.PDF. Copyright 2002-2020 Aspose Pty Ltd.
Printing Time: 12:1
If I read the pdf contents using ParagraphAbsorber, the behaviour is still funny.
If I run the code directly, I get only 3 lines from the pdf file.
If I run the code with breakpoints and single stepping in the debugger, I get many more lines from file but not all text lines.
The code is given below.
public bool GetAllLinesFromPage(string SourcrFileName, int PageIndex, ref List<string> LinesFromPage)
{
try
{
// Open an existing PDF file
Document doc = new Document(SourceFileName);
// Instantiate ParagraphAbsorber
ParagraphAbsorber absorber = new ParagraphAbsorber();
absorber.Visit(doc);
foreach (PageMarkup markup in absorber.PageMarkups)
{
int i = 1;
foreach (MarkupSection section in markup.Sections)
{
int j = 1;
foreach (MarkupParagraph paragraph in section.Paragraphs)
{
//StringBuilder paragraphText = new StringBuilder();
string Line = "";
foreach (List<TextFragment> line in paragraph.Lines)
{
foreach (TextFragment fragment in line)
{
//paragraphText.Append(fragment.Text);
Line = Line + fragment.Text;
}
//paragraphText.Append("\r\n");
Line = Line + "\r\n";
}
//paragraphText.Append("\r\n");
Line = Line + "\r\n";
////Console.WriteLine("Paragraph {0} of section {1} on page {2}:", j, i, markup.Number);
////Console.WriteLine(paragraphText.ToString());
//MessageBox.Show(Line);
LinesFromPage.Add(Line);
j++;
}
i++;
}
}
return true;
}
catch (Exception ex)
{
MessageBox.Show(ex.Message);
return false;
}
}
RT_25.78_Copy.pdf (100.3 KB)