Insert Hindi Text Characters in Word Document using Mail Merge & Convert DOCX to PDF using Java

Hi,
We are having some challenges when we convert Docx to PDF. Docx is having Hindhi Language characters which are Devnagri character set. Checked multiple options including fonts, Unicode characters are coming but supplementary character ordering is changing. Archive.zip (92.6 KB)

Below is the simple code snippet. Tried using the latest Word to PDF version.

import java.nio.charset.StandardCharsets;

import com.aspose.words.Document;
import com.aspose.words.DocumentBuilder;
import com.aspose.words.FieldMergeField;
import com.aspose.words.FieldMergingArgs;
import com.aspose.words.IFieldMergingCallback;
import com.aspose.words.ImageFieldMergingArgs;
import com.aspose.words.PdfCompliance;
import com.aspose.words.SaveFormat;
import com.aspose.words.SaveOptions;

public class AsposeHiindhi {

	public static void main(String[] args) throws Exception {

		produceDocuments("/Users/rk/Products/Letters/AsposeJDK7/TestFile.docx");

	}

	public static void produceDocuments(String srcDoc) throws Exception {

		// Open the template document.
		Document doc = new Document(srcDoc);

		// Add a handler for the MergeField event.
		doc.getMailMerge().setFieldMergingCallback(new HandleMerge());

		// Execute mail merge.
		String string1 = new String("दिनांक");
		String string2 = "dummy2";
		String string3 = "dummy3";
		String string4 = new String("दिनांक".getBytes("UTF-8"));

		//Character [] charset2 = new Character[6];
		char[] charset2 = {'1','2','3','4','5','6'};
//		charset2[0] = '\ufeff';
		charset2[0] = 'द';
		charset2[1] = 'ि';
		charset2[2] = 'न';
		charset2[3] = 'ा';
		charset2[4] = 'ं';
		charset2[5] = 'क';
		
		string2 = new String(charset2);
		
		char[] charset3 = {'1','2','3','4','5','6'};
		charset3[0] = 'ि';
		charset3[1] = 'द';
		charset3[3] = 'ा';
		charset3[4] = 'ं';
		charset3[2] = 'न';
		charset3[5] = 'क';
		
		string3 = new String(charset3);

		doc.getMailMerge().execute(new String[]{"finBranch1","finBranch2","finBranch3","finBranch4"}, new String[]{string1,string2,string3,string4});
		
		DocumentBuilder builder = new DocumentBuilder(doc);
		builder.getFont().setName("Mangal");
//		builder.write("Mangal: दिनांक");
		builder.writeln("Directwriting 1: " +  string1);
		builder.writeln("Directwriting 2: " +  string2);
		builder.writeln("Directwriting 3: " +  string3);
		builder.writeln("Directwriting 4: " +  string4);
		builder.writeln();

/*
		System.out.println("CharArray Start");
		char [] hnidhiStr = "दिनांक".toCharArray();
		for (int i = 0; i < hnidhiStr.length; i++) {
			System.out.println(hnidhiStr[i]);
			System.out.println((int) hnidhiStr[i]);
		}
		System.out.println("CharArray End");

		com.aspose.words.PdfSaveOptions pdfOptions = new com.aspose.words.PdfSaveOptions();
	
		pdfOptions.setEmbedFullFonts(true);
		pdfOptions.setCompliance(PdfCompliance.PDF_17);
*/
		SaveOptions opt = SaveOptions.createSaveOptions(SaveFormat.PDF);
		// Save the document.
	//	doc.save("TestFileOut.pdf",pdfOptions);
				doc.save("TestFileOut.pdf", opt);


//		System.out.println("Data Final:"+ doc.getRange().getText().trim());
		System.out.println("==========================");

	}
}

 class HandleMerge implements IFieldMergingCallback {
    /**
     * This is called when merge field is actually merged with data in the document.
     */
    public void fieldMerging(final FieldMergingArgs args) throws Exception {
            FieldMergeField field = args.getField();
            DocumentBuilder docBuilder = new DocumentBuilder(args.getDocument());
            docBuilder.moveToMergeField(args.getFieldName());
            System.out.println("Fields Fonts: " + docBuilder.getFont().getName());
            System.out.println("Fields: " + field.getFieldName());
            docBuilder.getFont().setName("Devanagari MT");
            System.out.println("Fields Fonts: " + docBuilder.getFont().getName());
            docBuilder.write(args.getFieldValue().toString());
      }
	@Override
	public void imageFieldMerging(ImageFieldMergingArgs arg0) throws Exception {
		// TODO Auto-generated method stub
	}
}

@saikrishna.p,

Please also ZIP and attach the following font files here for further testing:

  • Mangal
  • Devanagari MT
  • DaxOT

fonts.zip (120.3 KB)

Not using DaxOT. You can ignore this file.

Thanks for quick response.

@saikrishna.p,

Please upgrade to the latest 20.7 version of Aspose.Words for Java and try running the following code:

Document doc = new Document("E:\\Temp\\Archive\\TestFileOut.docx");
doc.getLayoutOptions().setTextShaperFactory(com.aspose.words.shaping.harfbuzz.HarfBuzzTextShaperFactory.getInstance());
doc.save("E:\\temp\\Archive\\awjava-20.7.pdf");

I have also attached the output PDF file here for your reference:

It worked and we are considering the upgrade to latest version. Thanks for the support.

@saikrishna.p,

Good to know that you have sorted it out. Feel free to contact us any time if you need further help or have some other issue or queries, we will be happy to assist you soon.