cambialens / lens-api-doc

10 stars 5 forks source link

Problem 400 while retrieving IPC in patent API #76

Closed ArdorZZ closed 1 week ago

ArdorZZ commented 1 week ago

Hi, I would like to retrieve "class_ipcr.symbol" using publication identifiers via patent API. However, I get a error 400 back. Here's the Python.Is there anything I can do about that?

import pandas as pd
import requests
import json

file_path = 'processed_patent_codes.xlsx'
df = pd.read_excel(file_path)

df['IPC'] = None

lens_api_url = "https://api.lens.org/patent/search"
lens_api_token = "YOUR_LENS_API_TOKEN"  
headers = {
    "Authorization": f"Bearer {lens_api_token}",
    "Content-Type": "application/json"
}

def fetch_ipc_from_lens(patent_numbers):
    try:
            params = {
            "query": {
                "terms": {
                    "ids": patent_numbers
                }
            },
            "include": ["class_ipcr.symbol"]
        }
        response = requests.post(lens_api_url, headers=headers, json=params)
        if response.status_code == 200:
            data = response.json()
            if data and "data" in data:
                return data["data"]
            else:
                print("No data found for the requested patents.")
                return []
        else:
            print(f"Failed to fetch data from Lens API: {response.status_code}")
            return []
    except Exception as e:
        print(f"An error occurred while fetching IPC from Lens API: {e}")
        return []

batch_size = 1000
for i in range(0, len(df), batch_size):
    batch_patent_numbers = df['patent_code'].iloc[i:i + batch_size].tolist()
    patent_data_list = fetch_ipc_from_lens(batch_patent_numbers)

        for patent_data in patent_data_list:
        patent_code = patent_data.get("lens_id")
        ipc_codes = patent_data.get("class_ipcr", [])
        ipc_codes_text = ', '.join([ipc["symbol"] for ipc in ipc_codes])
        df.loc[df['patent_code'] == patent_code, 'IPC'] = ipc_codes_text

output_path = 'processed_patent_codes_with_ipc.xlsx'
df.to_excel(output_path, index=False)

print("Updated Excel file saved with IPC information.")
rosharma9 commented 1 week ago

Duplicate #77