Free Support Forum - aspose.com

How to extract PDF form data using Aspose.PDF Cloud Python SDK

In the previous post, we consider how to extract data from PDF Form. Let us see how to retrieve data from the filled form. Most of the code is the same as in the previous post. 9apps cartoon hd

import asposestoragecloud
from asposestoragecloud.apis.storage_api import StorageApi

import asposepdfcloud
from asposepdfcloud.apis.pdf_api import PdfApi
from asposepdfcloud.rest import ApiException

class PdfCloudPyDemo(object):   
    def __init__(self):
        self.storageName = "First Storage"
        self.folderName = "demos"
        self.formName = "Student Info Form.pdf"
        self.localFolder = "C:\\tmp\\"        
    
        # Get App key and App SID from https://dashboard.aspose.cloud/#/apps
        self.storage_api_client = asposestoragecloud.api_client.ApiClient(apiKey='***',
            appSid='***')
        self.storage_api = StorageApi(self.storage_api_client)

        self.pdf_api_client = asposepdfcloud.api_client.ApiClient(app_key='***',
            app_sid='***')
        self.pdf_api = PdfApi(self.pdf_api_client)

    # Extract Data
    def ExtractFormData(self):
        pass

#run an example
formDemo = PdfCloudPyDemo()
resp = formDemo.ExtractFormData()
dc= dict((v.name, v.values[0]) for v in resp.fields.list)
print(dc)

To retrieve the data we should use a pdf_api.get_field(...) call:

def ExtractFormData(self):
    try:                                           
        fileName = self.folderName + "/" + self.formName
        fileExistResponse = self.storage_api.get_is_exist(path=fileName, storage= self.storageName)                
        if (fileExistResponse.file_exist.is_exist):
                opts = {                        
                    "folder" : self.folderName,
                    "storage": self.storageName
                    }
                return self.pdf_api.get_fields(self.formName, **opts)
        else:
            print('File not found')
    except ApiException as ex:
        print(ex) 

In response, we will get a data transfer object of type asposepdfcloud.models.Fields . Thus, to get the values we need to handle a list property in an appropriate manner. For example, we can map list to a dictionary with field name as a key:

formDemo = PdfCloudPyDemo()
formData = formDemo.ExtractFormData()
formData = dict((v.name, v.values[0]) for v in resp.fields.list)
print(formData)

This topic has been moved to the related forum: https://forum.aspose.cloud/t/how-to-extract-pdf-form-data-using-aspose-pdf-cloud-python-sdk/3541