If you extract text from PDF and write it to a new PDF, the Korean characters come out garbled.Korea Char.png (28.1 KB)
Korean Char broken.PNG (20.4 KB)
Korean Char.PNG (1.3 KB)
source code-0.PNG (12.2 KB)
source code.PNG (12.3 KB)
Can you please share the source PDF and also the product version that you have used on your end.
I am getting error while typing your code so can you please paste the code here instead of screenshot so that we can help you further. Error.PNG (8.4 KB)
Aspose.Pdf Version is 17.12.0
PdfExtractor pdfExtractor = new PdfExtractor();
pdfExtractor.BindPdf(dataDir + "\\" + name + ext);
// Use parameterless ExtractText method
pdfExtractor.ExtractText();
MemoryStream tempMemoryStream = new MemoryStream();
pdfExtractor.GetText(tempMemoryStream);
string text = "";
string[] sCode = null;
// Specify Unicode encoding type in StreamReader constructor
using (StreamReader streamReader = new StreamReader(tempMemoryStream, Encoding.Unicode))
{
streamReader.BaseStream.Seek(0, SeekOrigin.Begin);
text = streamReader.ReadToEnd();
sCode = text.Split('\r', (char)StringSplitOptions.None);
str = sCode[0].ToString();
str = str + "\r\n";
}
/**/
str = str0[1];
cell = sheet.Cells["D" + (j + 1).ToString()];
cell.PutValue("'" + str + "");
// Open document
Document pdfDocument = new Document(".\\biome.pdf");
// Create TextAbsorber object to find all instances of the input search phrase
TextFragmentAbsorber textFragmentAbsorber = new TextFragmentAbsorber("name");
// Accept the absorber for all the pages
pdfDocument.Pages.Accept(textFragmentAbsorber);
// Get the extracted text fragments
TextFragmentCollection textFragmentCollection = textFragmentAbsorber.TextFragments;
if (textFragmentCollection.Count > 0)
{
// Get first occurance of text and replace
TextFragment textFragment = textFragmentCollection[1];
// Update text and other properties
textFragment.Text = str;
textFragment.TextState.Font.IsEmbedded = true;
textFragment.TextState.Font = FontRepository.FindFont("Arial", FontStyles.Regular);
textFragment.TextState.FontSize = 16;
Console.WriteLine("\nText replaced successfully.\nFile saved at " + dataDir);
}
// Create TextAbsorber object to find all instances of the input search phrase
textFragmentAbsorber = new TextFragmentAbsorber("number");
// Accept the absorber for all the pages
pdfDocument.Pages.Accept(textFragmentAbsorber);
// Get the extracted text fragments
textFragmentCollection = textFragmentAbsorber.TextFragments;
if (textFragmentCollection.Count > 0)
{
// Get first occurance of text and replace
TextFragment textFragment = textFragmentCollection[1];
// Update text and other properties
textFragment.Text = i.ToString();
textFragment.TextState.Font.IsEmbedded = true;
textFragment.TextState.Font = FontRepository.FindFont("Arial", FontStyles.Regular);
textFragment.TextState.FontSize = 10;
//textFragment.TextState.ForegroundColor = System.Drawing.Color.Blue;
pdfDocument.Save(dataDir + "\\biome" + i.ToString() + ".pdf");
// Open document
//PdfFileMend mender = new PdfFileMend();
// Save changes
//mender.Save(dataDir + "\\biome" + i.ToString() + ".pdf");
pdfDocument = new Document(dataDir + "\\biome" + i.ToString() + ".pdf");
pdfDocument1.Pages.Add(pdfDocument.Pages);
System.IO.File.Delete(dataDir + "\\biome" + i.ToString() + ".pdf");
Console.WriteLine("\nText replaced successfully.\nFile saved at " + dataDir);
}
//File.WriteAllText(dataDir + "output_out.txt", text);
label1.Text = "[표지 생성 중(" + i.ToString() + ")] " + name + ext;
label1.Refresh();
if (i % 20 == 0 || Files.Length == i || Files.Length == i + 1)
{
workbook.Save(dataDir2 + @"\ZZZ" + fld + ".xlsx", Aspose.Cells.SaveFormat.Xlsx);
Workbook workbook2 = new Workbook(dataDir2 + @"\ZZZ" + fld + ".xlsx");
workbook2.Save(dataDir2 + @"\ZZZ" + fld + ".pdf", Aspose.Cells.SaveFormat.Pdf);
}
biome.pdf (2.7 MB)
I request you to upgrade to latest version and share your feedback. Moreover, shared code includes several undeclared variables that are important for running the code. So we like to request you to share a sample console application so that we can reproduce this on our end.