Open titipata opened 2 months ago
ChatGPT 4o for information extraction
import os
import base64
import requests
from openai import OpenAI
# Set up the OpenAI API client
client = OpenAI(api_key="")
def encode_image(image_path):
"""Encode the image file to base64."""
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
def ask_question_about_image(image_path, question):
"""Ask a question about the given image using OpenAI's GPT-4 Vision model."""
# Encode the image
base64_image = encode_image(image_path)
# Prepare the payload for the API request
payload = {
"model": "gpt-4o",
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": question
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
}
}
]
}
],
"max_tokens": 300
}
# Make the API request
response = client.chat.completions.create(**payload)
# Extract and return the model's answer
return response.choices[0].message.content
image_path = "ca_รายการจดทะเบียน_3.jpg"
question = """You are provided with a scanned or photographed image of a Thai vehicle registration book (สมุดทะเบียนรถ). Your task is to extract the following information from the image.
The extracted value is typically located on the right side of the key in the document.
Extract these details:
1. วันจดทะเบียน (date_of_registration)
2. เลขทะเบียน (registration_no)
3. จังหวัด (car_province)
4. ประเภท (vehicle_use)
5. รย (type)
6. ลักษณะ (body_style)
7. ยี่ห้อรถ (manufacturer)
8. แบบ (model)
9. รุ่นปี คศ (year)
10. สี (color)
11. เลขตัวรถ (chassis_number)
12. อยู่ที่ (chassis_location)
13. ยี่ห้อเครื่องยนต์ (engine_manufacturer)
14. เลขเครื่องยนต์ (engine_number)
15. อยู่ที่ (engine_location)
16. เชื้อเพลิง (fuel_type)
17. เลขถังแก๊ส (fuel_tank_number)
18. จำนวน (cylinders)
19. ซีซี (cubic_capacity)
20. แรงม้า (horse_power)
21. จำนวนเพลาและล้อ (axles_wheels_no)
22. น้ำหนักรถ (unladen_weight)
23. น้ำหนักบรรทุก/น้ำหนักเพลา (load_capacity)
24. น้ำหนักรวม (gross_weight)
25. ที่นั่ง (seats)
Instructions:
Carefully examine the image and locate each piece of information.
If a particular field is not visible or not present in the image, use the value "N/A" for that field.
Ensure all text extracted from the image is in its original language (Thai or English) as it appears in the document.
Return the extracted information in a JSON format, using the English key names provided in parentheses.
Only return the JSON output, without any additional explanation or text.
Example of expected output format:
{
"date_of_registration": "1 ม.ค. 2566",
"registration_no": "กข 1234",
"car_province": "กรุงเทพมหานคร",
...
"seats": "4"
}
"""
answer = ask_question_about_image(image_path, question)
print(f"Question: {question}")
print(f"Answer: {answer}")
jiwer
), ความแม่นยำ (เป็นเปอร์เซ็นต์)import numpy as np
import pandas as pd
import jiwer
annotated_df = pd.read_excel('annotated_results.xlsx', dtype=str).fillna("")
predicted_df = pd.read_excel('predicted_results.xlsx', dtype=str).fillna("")
merged_df = pd.merge(annotated_df, predicted_df, on='image_path', suffixes=('_df1', '_df2'))
columns_of_interest = [
'date_of_registration', 'registration_no', 'car_province', 'vehicle_use', 'type', 'body_style',
'manufacturer', 'model', 'year', 'color', 'chassis_number', 'chassis_location', 'engine_manufacturer',
'engine_number', 'engine_location', 'fuel_type', 'fuel_tank_number', 'cylinders', 'cubic_capacity',
'horse_power', 'axles_wheels_no', 'unladen_weight', 'load_capacity', 'gross_weight', 'seats'
]
# minor data post-processing
merged_df["year_df2"] = merged_df.year_df2.str.replace("ค.ศ.", "").str.replace("คศ.", "")
eval_list = []
for column in columns_of_interest:
cer = []
for _, row in merged_df.iterrows():
cer, accuracy = [], []
gt = str(row[f'{column}_df1'])
pred = str(row[f'{column}_df2'])
if len(gt) > 0:
cer.append(jiwer.cer(gt, pred))
cer = np.mean(cer)
accuracy = (merged_df[f'{column}_df1'] == merged_df[f'{column}_df2']).mean() * 100
eval_list.append({
"column_name": column,
"cer": cer,
"accuracy": accuracy
})
eval_df = pd.DataFrame(eval_list)
ตรวจสอบข้อมูลในแต่ละ key จาก prediction และ ground truth ได้ด้วยการดึง 2 หลักมาเปรียบเทียบกัน
# compare 2 columns
col = "year" # col = "date_of_registration"
merged_df[[f"{col}_df1", f"{col}_df2"]]
Progress tracking (For myself reminder 555):
Here are some notebooks that we can prepare during the day.