Hi, I want to extract all the text from the attached pdf file. But using below code only 1st line is getting extracted. resumeLink is the external link to pdf file. Can you please help me with this? I have attached pdf file and extracted text.
byte[] contentBuffer = webClient.DownloadData(resumeLink);
MemoryStream ms = new MemoryStream(contentBuffer);
Document pdfDocument = new Document(ms);
System.Text.StringBuilder builder = new System.Text.StringBuilder();
string extractedText = "";
foreach (Page pdfPage in pdfDocument.Pages)
{
using (MemoryStream textStream = new MemoryStream())
{
// Create text device
TextDevice textDevice = new TextDevice();
// Set text extraction options - set text extraction mode (Raw or Pure)
TextExtractionOptions textExtOptions = new
TextExtractionOptions(TextExtractionOptions.TextFormattingMode.Pure);
textDevice.ExtractionOptions = textExtOptions;
// Convert a particular page and save text to the stream
textDevice.Process(pdfPage, textStream);
// Convert a particular page and save text to the stream
textDevice.Process(pdfDocument.Pages[1], textStream);
// Close memory stream
textStream.Close();
// Get text from memory stream
extractedText = Encoding.Unicode.GetString(textStream.ToArray());
}
builder.Append(extractedText);
}
description += builder.ToString();
image.png (9.4 KB)
Profile_Test.pdf (121.6 KB)