Hello Aspose,
I am trying to convert PDF files into PNG images on the environment Docker + Linux (Ubuntu 18.04.5 LTS).
But characters in the original PDF files become corrupted when converted into PNG.
I seems that this is due to lack of effective font setteing on my script.
Could you teach me how to resolve this issue?
I put the script which I wrote below.
As for the input file, it may take much time to prepare for you because of a certain reason.
String filepath = args[0];
String filename = Path.GetFileName(args[0]);
// Make directories
Directory.CreateDirectory(args[1] + "/" + filename);
String txt_outpath = args[1] + "/" + filename + "/" + "txt";
String png_outpath = args[1] + "/" + filename + "/" + "png";
Directory.CreateDirectory(txt_outpath);
Directory.CreateDirectory(png_outpath);
// Open document
Aspose.Pdf.Document pdfDocument = new Aspose.Pdf.Document(filepath);
Aspose.Pdf.Text.FolderFontSource source = new Aspose.Pdf.Text.FolderFontSource("../data/Fonts_windows");
pdfDocument.Save(args[1] + "/" + filename + "/"+ "result.pdf");
// Create Aspose.Pdf.RenderingOptions to enable font hinting
RenderingOptions opts = new RenderingOptions();
opts.UseFontHinting = true;
// Get PNG image from each page of the original PDF file
for (int pageCount = 1; pageCount <= pdfDocument.Pages.Count; pageCount++)
{
using (FileStream imageStream = new FileStream(png_outpath +"/"+ $"SplitDocumentPageByPageOut_{pageCount}.png", FileMode.Create))
{
// Create PNG device with specified attributes
// Width, Height, Resolution, Quality
// Quality [0-100], 100 is Maximum
// Create Resolution object
Resolution resolution = new Resolution(300);
PngDevice pngDevice = new PngDevice(resolution);
//// Set predefined rendering options
pngDevice.RenderingOptions = opts;
// Convert a particular page and save the image to stream
pngDevice.Process(pdfDocument.Pages[pageCount], imageStream);
// Close stream
imageStream.Close();
}
// Create TextAbsorber object to extract text
TextAbsorber absorber = new TextAbsorber();
// Accept the absorber for first page
pdfDocument.Pages[pageCount].Accept(absorber);
// Get the extracted text
string extractedText = absorber.Text;
// Create a writer and open the file
TextWriter tw = new StreamWriter(txt_outpath +"/"+ $"SplitDocumentPageByPageOut_{pageCount}.txt");
// Write a line of text to the file
tw.WriteLine(extractedText);
// Close the stream
tw.Close();
}