Hi Samsen,
Document _pdfDocument = new Document(“c:/pdftest/N3A_R13.pdf”);<o:p></o:p>
// search all separate words using regular expression
var textFragmentAbsorber = new TextFragmentAbsorber(@"[^\s]+", new TextSearchOptions(true));
_pdfDocument.Pages[6].Accept(textFragmentAbsorber);
TextFragmentCollection textFragmentCollection = textFragmentAbsorber.TextFragments;
for (int j = 1; j <= textFragmentCollection.Count; j++)
{
Console.WriteLine("=====================================");
Console.WriteLine(textFragmentCollection[j].Text);
// wordsList.Add(new TextItem
Console.WriteLine((float)textFragmentCollection[j].Rectangle.Height * 100);
Console.WriteLine((float)textFragmentCollection[j].Rectangle.Width * 100);
Console.WriteLine(_pdfDocument.Pages[6].Rect.Height * 100 - (float)textFragmentCollection[j].Position.YIndent * 100 - (float)textFragmentCollection[j].Rectangle.Height * 100);
Console.WriteLine((float)textFragmentCollection[j].Position.XIndent * 100);
Text = textFragmentCollection[j].Text.Replace("–", "").Replace("_", "");
}
Hi Nayyer,
Hi Samsen,
string inFile = “c:/pdftest/N3A_R13.pdf”;<o:p></o:p>
string outFileImg = "c:/pdftest/N3A_R13_resultant.png";
int resolution = 150;
Aspose.Pdf.Document temppdfDocument = new Aspose.Pdf.Document(inFile);
Aspose.Pdf.Document pdfDocument = new Aspose.Pdf.Document();
pdfDocument.Pages.Add(temppdfDocument.Pages[6]);
using (MemoryStream ms = new MemoryStream())
{
PdfConverter conv = new PdfConverter(pdfDocument);
conv.Resolution = new Resolution(resolution, resolution);
conv.GetNextImage(ms, System.Drawing.Imaging.ImageFormat.Png);
Bitmap bmp = (Bitmap)Bitmap.FromStream(ms);
using (System.Drawing.Graphics gr = System.Drawing.Graphics.FromImage(bmp))
{
float scale = resolution / 72f;
gr.Transform = new System.Drawing.Drawing2D.Matrix(scale, 0, 0, -scale, 0, bmp.Height);
// for (int i = 0; i < pdfDocument.Pages.Count; i++)
{
Page page = pdfDocument.Pages[1];
//create TextAbsorber object to find all words
TextFragmentAbsorber textFragmentAbsorber = new TextFragmentAbsorber(@"[\S]+");
textFragmentAbsorber.TextSearchOptions.IsRegularExpressionUsed = true;
page.Accept(textFragmentAbsorber);
//get the extracted text fragments
TextFragmentCollection textFragmentCollection = textFragmentAbsorber.TextFragments;
//loop through the fragments
foreach (TextFragment textFragment in textFragmentCollection)
{
// if (i == 0)
{
gr.DrawRectangle(
Pens.Yellow,
(float)textFragment.Position.XIndent,
(float)textFragment.Position.YIndent,
(float)textFragment.Rectangle.Width,
(float)textFragment.Rectangle.Height);
for (int segNum = 1; segNum <= textFragment.Segments.Count; segNum++)
{
TextSegment segment = textFragment.Segments[segNum];
for (int charNum = 1; charNum <= segment.Characters.Count; charNum++)
{
CharInfo characterInfo = segment.Characters[charNum];
Aspose.Pdf.Rectangle rect = page.GetPageRect(true);
Console.WriteLine("TextFragment = " + textFragment.Text + " Page URY = " + rect.URY +
" TextFragment URY = " + textFragment.Rectangle.URY);
gr.DrawRectangle(
Pens.Black,
(float)characterInfo.Rectangle.LLX,
(float)characterInfo.Rectangle.LLY,
(float)characterInfo.Rectangle.Width,
(float)characterInfo.Rectangle.Height);
}
gr.DrawRectangle(
Pens.Green,
(float)segment.Rectangle.LLX,
(float)segment.Rectangle.LLY,
(float)segment.Rectangle.Width,
(float)segment.Rectangle.Height);
}
}
}
}
}
bmp.Save(outFileImg, System.Drawing.Imaging.ImageFormat.Png);
}