I have no idea how we are going to do this via email, but I would like to iterate through all pages of a pdf document, with each page running in its own thread. I would like to limit the number of threads as a parameter. Nothing I try is working.
''Mase Woods
''10/25/2024
''Main Function to Process Scheduels and Tags for ThreadQueue
Function ProcessPlanset(ByVal CompanyID As Integer, ByVal DocID As Integer, ByVal documentBytesArray As Byte()) As Boolean
Dim iReturn As Boolean = True
Dim pdfDocument As Aspose.Pdf.Document
Dim Page As Aspose.Pdf.Page
Dim itPage As PageIteration
Dim itTotal As New PageIteration
Dim iPageReturn As Boolean
Try
Dim maxConcurrentThreads As Integer = 5 ' Set the maximum number of concurrent threads
' Initialize the semaphore with the maximum number of concurrent threads
semaphore = New Semaphore(maxConcurrentThreads, maxConcurrentThreads)
' Attach event handler
AddHandler PageProcessed, AddressOf OnPageProcessed
pdfDocument = New Aspose.Pdf.Document(FullFileName)
itTotal.FileName = pdfDocument.FileName
Dim pageNum As Integer = 0
For Each Page In pdfDocument.Pages
pageNum = pageNum + 1
itPage = New PageIteration
itPage.PageNumber = Page.Number
itPage.FileName = pdfDocument.FileName
' Initialize the countdown event with the number of pages
countdown = New CountdownEvent(pdfDocument.Pages.Count)
'iPageReturn = ProcessPage(Page)
Try
ThreadPool.QueueUserWorkItem(AddressOf ProcessPage, Page)
Catch ex As Exception
Stop
End Try
itPage.EndTime = Date.Now
itPage.ElapsedTime = itPage.GetTimeDifference(itPage.StartTime, itPage.EndTime)
itTotal.AllText = itTotal.AllText & (System.IO.Path.GetFileName(itPage.FileName) & " Page Number: " & itPage.PageNumber & ": " & itPage.ElapsedTime & vbCrLf)
Next
' Wait for all threads to complete
countdown.Wait()
itTotal.EndTime = Date.Now
itTotal.ElapsedTime = itTotal.GetTimeDifference(itTotal.StartTime, itTotal.EndTime)
itTotal.AllText = itTotal.AllText & (System.IO.Path.GetFileName(itTotal.FileName) & "TOTAL TIME: " & itTotal.ElapsedTime)
Catch ex As Exception
iReturn = False
End Try
Stop
Return iReturn
End Function
''Mase Woods
''10/25/2024
''Main Function to Process Page with Scheduels and Tags
Function ProcessPage(ByVal Page As Aspose.Pdf.Page) As Boolean
’ Wait until the semaphore is available
semaphore.WaitOne()
Dim iReturn As Boolean = True
'Find all tables on page
Try
Dim pageNumber As Integer = CInt(Page.Number)
' Your file processing logic here
Console.WriteLine($"Processing page {pageNumber}")
' Create a lock object for synchronization
Dim lockObject As New Object()
SyncLock lockObject
Dim absorber As New Aspose.Pdf.Text.TableAbsorber
Try
'absorber = getAbsorber(Page)
absorber.Visit(Page)
Catch ex As Exception
Console.WriteLine(ex.Message)
End Try
If absorber.TableList.Count > 0 Then 'Has no grids on page
'Find Grids that are Schdeules areturn a list of shedules
Dim ScheduleGrids As List(Of ScheduleGrid)
ScheduleGrids = getSchedules(absorber)
'Process Shcedules
If ScheduleGrids.Count > 0 Then 'Has grids and shedules
'Process Grids, from list of Shcedules
For Each grdTable In ScheduleGrids
Dim iprocess As Boolean
iprocess = processTags(grdTable)
Next
ScheduleGrids = Nothing
End If
End If
RaiseEvent PageProcessed(Page)
End SyncLock
Catch ex As Exception
iReturn = False
Finally
' Release the semaphore
semaphore.Release()
' Signal the countdown event
countdown.Signal()
End Try
Return iReturn
End Function
''Mase Woods
''10/25/2024
''Function Raised on Thread Conmpletion
Private Sub OnPageProcessed(page As Aspose.Pdf.Page)
Console.WriteLine($“Page {page.Number} has been processed.”)
End Sub