privateai / deid-examples

Examples scripts that showcase how to use Private AI Text to de-identify, redact, hash, tokenize, mask and synthesize PII in text.
MIT License
74 stars 1 forks source link

raise HTTPError(message) requests.exceptions.HTTPError: The request returned with a 500 Internal Server Error #33

Closed upasana-mittal closed 1 month ago

upasana-mittal commented 1 month ago
from privateai_client import PAIClient
from privateai_client.objects import request_objects
import base64
import json
import requests

client = PAIClient(url="https://api.private-ai.com/community")

client.add_api_key(api_key="api-key")

print(f'Able to access Private AI container: {client.ping()}')

input_pdf_path = 'Rahul_Jain_CBC_with_ESR(image).pdf'
output_pdf_path = 'output/PAI_SYNTH_EN_medical-referral_2_redacted.pdf'

with open(input_pdf_path, 'rb') as pdf_file:
    encoded_pdf = base64.b64encode(pdf_file.read()).decode('ascii')

entity_type_selector = request_objects.entity_type_selector_obj(
type="ENABLE",
value=['NAME', 'NAME_GIVEN', 'NAME_FAMILY', 'DOB', 'LOCATION', 'ACCOUNT_NUMBER', 'DATE', 'EMAIL_ADDRESS',
       'HEALTHCARE_NUMBER', 'LOCATION_ADDRESS', 'ORGANIZATION_MEDICAL_FACILITY']

)

entity_detection = request_objects.entity_detection_obj(
entity_types=[entity_type_selector],
accuracy="standard"

)

response = client.process_files_base64(
request_object={
    'file': {
        'data': encoded_pdf,
        'content_type': 'application/pdf'
    },
    'entity_detection': {
        "entity_types": ['NAME', 'NAME_GIVEN', 'NAME_FAMILY', 'DOB', 'LOCATION', 'ACCOUNT_NUMBER', 'DATE',
                         'EMAIL_ADDRESS',
                         'HEALTHCARE_NUMBER', 'LOCATION_ADDRESS', 'ORGANIZATION_MEDICAL_FACILITY']
    }
}

)

print('***** PROCESSED TEXT *****')
print(response.processed_text)

print('***** REDACTED TEXT *****')
print(json.dumps(response.entities, indent=2))

with open(output_pdf_path, 'wb') as redacted_pdf:
    redacted_pdf.write(base64.b64decode(response.processed_file))

Getting this server error. Please help

guyd commented 1 month ago

Hi @upasana-mittal. Thanks for your interest in Private AI. We are currently looking into this and will get back to you shortly.

letmerecall commented 1 month ago

Hey @upasana-mittal!

Instead of passing a dictionary, can you try creating a request_objects.file_base64_obj and pass it to request_object?

i.e;

instead of doing

response = client.process_files_base64(
request_object={
    'file': {
        'data': encoded_pdf,
        'content_type': 'application/pdf'
    },
    'entity_detection': {
        "entity_types": ['NAME', 'NAME_GIVEN', 'NAME_FAMILY', 'DOB', 'LOCATION', 'ACCOUNT_NUMBER', 'DATE',
                         'EMAIL_ADDRESS',
                         'HEALTHCARE_NUMBER', 'LOCATION_ADDRESS', 'ORGANIZATION_MEDICAL_FACILITY']
    }
})

try

request_obj = request_objects.file_base64_obj(
    file=request_objects.file_obj(
        data=encoded_pdf,
        content_type='application/pdf'
    ),
    entity_detection=entity_detection
)

response = client.process_files_base64(request_object=request_obj)

if you still want to use a dictionary, the correct format for entity_detection would be:

response = client.process_files_base64(
request_object={
    "file": {
        "data": encoded_pdf,
        "content_type": "application/pdf"
    },
    "entity_detection": {
        "entity_types": [
            {
                "type": "ENABLE",
                "value": [
                    "NAME",
                    "NAME_GIVEN",
                    "NAME_FAMILY",
                    "DOB",
                    "LOCATION",
                    "ACCOUNT_NUMBER",
                    "DATE",
                    "EMAIL_ADDRESS",
                    "HEALTHCARE_NUMBER",
                    "LOCATION_ADDRESS",
                    "ORGANIZATION_MEDICAL_FACILITY"
                ]
            }
        ]
    }
})

Also, note that in your current code entity_detection object is not being used anywhere.

upasana-mittal commented 1 month ago

Hey @upasana-mittal!

Instead of passing a dictionary, can you try creating a request_objects.file_base64_obj and pass it to request_object?

i.e;

instead of doing

response = client.process_files_base64(
request_object={
    'file': {
        'data': encoded_pdf,
        'content_type': 'application/pdf'
    },
    'entity_detection': {
        "entity_types": ['NAME', 'NAME_GIVEN', 'NAME_FAMILY', 'DOB', 'LOCATION', 'ACCOUNT_NUMBER', 'DATE',
                         'EMAIL_ADDRESS',
                         'HEALTHCARE_NUMBER', 'LOCATION_ADDRESS', 'ORGANIZATION_MEDICAL_FACILITY']
    }
})

try

request_obj = request_objects.file_base64_obj(
    file=request_objects.file_obj(
        data=encoded_pdf,
        content_type='application/pdf'
    ),
    entity_detection=entity_detection
)

response = client.process_files_base64(request_object=request_obj)

if you still want to use a dictionary, the correct format for entity_detection would be:

response = client.process_files_base64(
request_object={
    "file": {
        "data": encoded_pdf,
        "content_type": "application/pdf"
    },
    "entity_detection": {
        "entity_types": [
            {
                "type": "ENABLE",
                "value": [
                    "NAME",
                    "NAME_GIVEN",
                    "NAME_FAMILY",
                    "DOB",
                    "LOCATION",
                    "ACCOUNT_NUMBER",
                    "DATE",
                    "EMAIL_ADDRESS",
                    "HEALTHCARE_NUMBER",
                    "LOCATION_ADDRESS",
                    "ORGANIZATION_MEDICAL_FACILITY"
                ]
            }
        ]
    }
})

Also, note that in your current code entity_detection object is not being used anywhere.

Thanks @letmerecall This worked for me!! I will close this issue