We're sorry Aspose doesn't work properply without JavaScript enabled.

Free Support Forum - aspose.com

Korean Char Brocken

If you extract text from PDF and write it to a new PDF, the Korean characters come out garbled.Korea Char.png (28.1 KB)
Korean Char broken.PNG (20.4 KB)
Korean Char.PNG (1.3 KB)
source code-0.PNG (12.2 KB)
source code.PNG (12.3 KB)

@hbhur102

Can you please share the source PDF and also the product version that you have used on your end.

Uploaded the sample pdf.

1 분당차병원_10125980_TG74015549.pdf (518.2 KB)

@hbhur102

I am getting error while typing your code so can you please paste the code here instead of screenshot so that we can help you further. Error.PNG (8.4 KB)

Aspose.Pdf Version is 17.12.0

                    PdfExtractor pdfExtractor = new PdfExtractor();
                    pdfExtractor.BindPdf(dataDir + "\\" + name + ext);

                    // Use parameterless ExtractText method
                    pdfExtractor.ExtractText();

                    MemoryStream tempMemoryStream = new MemoryStream();
                    pdfExtractor.GetText(tempMemoryStream);

                    string text = "";
                    string[] sCode = null;
                    // Specify Unicode encoding type in StreamReader constructor
                    using (StreamReader streamReader = new StreamReader(tempMemoryStream, Encoding.Unicode))
                    {
                        streamReader.BaseStream.Seek(0, SeekOrigin.Begin);
                        text = streamReader.ReadToEnd();
                        sCode = text.Split('\r', (char)StringSplitOptions.None);
                    
                        str = sCode[0].ToString();
                        str = str + "\r\n";
                    }
                    /**/
                    str = str0[1];

                    cell = sheet.Cells["D" + (j + 1).ToString()];
                    cell.PutValue("'" + str + "");

                    // Open document
                    Document pdfDocument = new Document(".\\biome.pdf");

                    // Create TextAbsorber object to find all instances of the input search phrase
                    TextFragmentAbsorber textFragmentAbsorber = new TextFragmentAbsorber("name");

                    // Accept the absorber for all the pages
                    pdfDocument.Pages.Accept(textFragmentAbsorber);

                    // Get the extracted text fragments
                    TextFragmentCollection textFragmentCollection = textFragmentAbsorber.TextFragments;
                    if (textFragmentCollection.Count > 0)
                    {
                        // Get first occurance of text and replace
                        TextFragment textFragment = textFragmentCollection[1];
                        // Update text and other properties
                        textFragment.Text = str;
                        textFragment.TextState.Font.IsEmbedded = true;
                        textFragment.TextState.Font = FontRepository.FindFont("Arial", FontStyles.Regular);
                        textFragment.TextState.FontSize = 16;

                        Console.WriteLine("\nText replaced successfully.\nFile saved at " + dataDir);
                    }

                    // Create TextAbsorber object to find all instances of the input search phrase
                    textFragmentAbsorber = new TextFragmentAbsorber("number");

                    // Accept the absorber for all the pages
                    pdfDocument.Pages.Accept(textFragmentAbsorber);

                    // Get the extracted text fragments
                    textFragmentCollection = textFragmentAbsorber.TextFragments;
                    if (textFragmentCollection.Count > 0)
                    {
                        // Get first occurance of text and replace
                        TextFragment textFragment = textFragmentCollection[1];
                        // Update text and other properties
                        textFragment.Text = i.ToString();
                        textFragment.TextState.Font.IsEmbedded = true;
                        textFragment.TextState.Font = FontRepository.FindFont("Arial", FontStyles.Regular);
                        textFragment.TextState.FontSize = 10;

                        //textFragment.TextState.ForegroundColor = System.Drawing.Color.Blue;

                        pdfDocument.Save(dataDir + "\\biome" + i.ToString() + ".pdf");

                        // Open document
                        //PdfFileMend mender = new PdfFileMend();

                        // Save changes
                        //mender.Save(dataDir + "\\biome" + i.ToString() + ".pdf");

                        pdfDocument = new Document(dataDir + "\\biome" + i.ToString() + ".pdf");

                        pdfDocument1.Pages.Add(pdfDocument.Pages);

                        System.IO.File.Delete(dataDir + "\\biome" + i.ToString() + ".pdf");

                        Console.WriteLine("\nText replaced successfully.\nFile saved at " + dataDir);
                    }

                    //File.WriteAllText(dataDir + "output_out.txt", text);

                    label1.Text = "[표지 생성 중(" + i.ToString() + ")] " + name + ext;
                    label1.Refresh();

                    if (i % 20 == 0 || Files.Length == i || Files.Length == i + 1)
                    {
                        workbook.Save(dataDir2 + @"\ZZZ" + fld + ".xlsx", Aspose.Cells.SaveFormat.Xlsx);

                        Workbook workbook2 = new Workbook(dataDir2 + @"\ZZZ" + fld + ".xlsx");

                        workbook2.Save(dataDir2 + @"\ZZZ" + fld + ".pdf", Aspose.Cells.SaveFormat.Pdf);
                    }

biome.pdf (2.7 MB)

@hbhur102

I request you to upgrade to latest version and share your feedback. Moreover, shared code includes several undeclared variables that are important for running the code. So we like to request you to share a sample console application so that we can reproduce this on our end.