I have been struggling how to make my app use parallel processing to accelerate the merge process. Here is my C++ code.
using namespace Aspose::Words;
using namespace System;
using namespace System::IO;
using namespace System::Collections::Generic;
int main() {
try {
String folderPath = u"C:\\Users\\baymane\\Projects\\facture-jumlee2\\temp\\2024-07-03_11_52_46\\202405050101\\docx\\";
auto docxFiles = Directory::GetFiles(folderPath, u"*.docx");
auto finalDoc = MakeObject<Document>();
int i = 1;
int length = docxFiles->get_Length();
for (auto docxFile : docxFiles) {
i++;
auto tempDoc = MakeObject<Document>(docxFile);
finalDoc->AppendDocument(tempDoc, ImportFormatMode::KeepSourceFormatting);
std::cout << "Reached document " << i << " on " << length << std::endl;
}
// Save the merged document as a PDF
finalDoc->Save(u"output.pdf", SaveFormat::Pdf);
std::cout << "Finished." << std::endl;
}
catch (const Exception& e) {
std::cerr << "An error occurred: " << e->get_Message().ToUtf8String() << std::endl;
}
return 0;
}
I also have developed a Java app that does the same thing. I tried ExecutorService and using Threads, but my app crashes with a memory exception, even though I still have enough RAM. I looked up the problem and found no leads.
Here is the Java Code for parallel processing:
package or.aspose;
import com.aspose.words.*;
import java.io.IOException;
import java.nio.file.DirectoryStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.sql.Array;
import java.util.*;
import java.io.File;
import java.util.List;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.stream.Stream;
import static or.aspose.helpers.DocxHelper.isSectionEmpty;
import static or.aspose.helpers.Helpers.*;
public class Main {
// Example public static method to be executed in parallel
public static String processDocxFolderPath(int taskId,List<String> docxFoldersPath, String originalFolderPath, String destinationFolderPath) {
// Simulating some task processing
try {
Thread.sleep(5000);
for(String folder: docxFoldersPath){
System.out.println("Task_" + taskId+ ": Processing " + folder);
generatePDFsForFolder(taskId,originalFolderPath,folder, destinationFolderPath);
}
} catch (InterruptedException e) {
e.printStackTrace();
} catch (Exception e) {
throw new RuntimeException(e);
}
return "Task " + taskId + " completed";
}
public static List<List<String>> divideArray(List<String> list, int splits) {
List<List<String>> chunks = new ArrayList<>();
for(int i = 0;i<splits;i++){
chunks.add(new ArrayList<>());
}
int i = 0;
while (i < list.size()){
chunks.get(i%splits).add(list.get(i));
i++;
}
return chunks;
}
public static void generatePDFs(String originalFolderPath, int numTasks) throws Exception {
License wordLicense = new License();
wordLicense.setLicense("Aspose.WordsforJava.lic");
ArrayList<String> docxPaths = new ArrayList<>();
Set<String> foundFolders = new HashSet<>(); // Using Set to avoid duplicate relative paths
findDocxFiles(originalFolderPath, docxPaths, foundFolders);
List<List<String>> splitFoundFolders = divideArray(foundFolders.stream().toList(), numTasks);
for(List<String> folders: splitFoundFolders){
System.out.print("[");
for(String folder: folders){
System.out.print(folder + ",");
}
System.out.println("]");
}
String destinationFolderPath = "C:\\Users\\baymane\\IdeaProjects\\test\\resources";
ExecutorService executorService = Executors.newFixedThreadPool(numTasks);
List<CompletableFuture<String>> futures = new ArrayList<>();
for (int i = 0; i < numTasks; i++) {
final int taskId = i;
CompletableFuture<String> future = CompletableFuture.supplyAsync(() -> processDocxFolderPath(taskId,splitFoundFolders.get(taskId), originalFolderPath, destinationFolderPath), executorService);
futures.add(future);
}
List<String> results = new ArrayList<>();
for (CompletableFuture<String> future : futures) {
try {
String result = future.get(); // Blocking call to get the result
results.add(result);
} catch (InterruptedException | ExecutionException e) {
e.printStackTrace();
}
}
// Shutdown the executor service
executorService.shutdown();
// Print results
System.out.println("Parallel execution results:");
results.forEach(System.out::println);
}
public static void generatePDFsForFolder(int taskId,String originalPath,String folderRelativePath, String targetFolder) throws Exception {
Document mergedDoc = new Document();
// Call the method to find DOCX files and their containing folders
List<String> docxPaths = listDocxFiles(originalPath + folderRelativePath);
for (int i=0;i < docxPaths.size();i++){
System.out.println("Task_" + taskId + ":" + "merging document " + (i+1) + "/" + docxPaths.size() + " in folder " + originalPath + folderRelativePath);
Document doc1 = new Document(docxPaths.get(i));
doc1.cleanup();
// Iterate through sections to find and remove empty pages
for (Section section : doc1.getSections()) {
// Check if the section has no body content
if (isSectionEmpty(section)) {
// Remove the section (which effectively removes the page)
doc1.getSections().remove(section);
}
}
mergedDoc.appendDocument(doc1, ImportFormatMode.KEEP_SOURCE_FORMATTING);
}
//createFoldersInDestination(targetFolder, foundFolders);
String documentName = targetFolder + "\\" + folderRelativePath.replace("/","_").replace("\\","_") + ".pdf";
mergedDoc.save(documentName, SaveFormat.PDF);
System.out.println("Task_" + taskId + ": Documents merged as " + documentName);
}
public static void main(String[] args) throws Exception {
//getFoldersOfDocxText();
System.out.println(Runtime.getRuntime().maxMemory());
generatePDFs("C:\\Users\\baymane\\Projects\\facture-jumlee2\\temp", 1);
}
}
Is there any mistake im making? Is there any example of Threading/Parallel Processing in Java of Word documents? I appreciate any help. Thanks