Closed Hedongpeng closed 5 months ago
import requests import time import json
url = 'https://api.lens.org/scholarly/search' headers = { 'Authorization': 'Bearer token', 'Content-Type': 'application/json' }
def load_ids(filename): with open(filename, 'r') as file: ids = file.read().splitlines() return ids
def scroll(scroll_id, request_body):
if scroll_id is not None:
request_body = json.dumps({
"scroll_id": scroll_id,
"include": include_fields
})
# Make the API request
response = requests.post(url, data=request_body, headers=headers)
# Handle rate limiting by the API
if response.status_code == 429:
time.sleep(8) # Wait 8 seconds before retrying
scroll(scroll_id, request_body)
# Stop and debug if the response is not OK
elif response.status_code != 200:
print(response.json())
return
# Process the data and iterate with the new scroll_id
else:
data = response.json()
scroll_id = data.get('scroll_id')
print(data['data']) # You can modify this part to process your data as needed
if scroll_id:
scroll(scroll_id, request_body)
include_fields = json.dumps([ "lens_id", "title", "abstract", "keywords", "date_published", "year_published", "publication_type", "external_ids.type", "authors.display_name", "author_count", "authors.affiliations.name", "references.lens_id", "referenced_by_count", "reference_count", "patent_citations.lens_id", "patent_citation_count", "ids.doi", "source.title", "source.publisher", "source.country", "source.asjc_code", "source.issn", "fields_of_study", "authors.affiliations.address.country_code", "authors.affiliations.address.city", "funding.country", "open_access.colour" ])
identifiers = load_ids('lens-ID.txt') # Filename where IDs are stored
for ids_batch in [identifiers[i:i + 5000] for i in range(0, len(identifiers), 5000)]: initial_request_body = json.dumps({ "query": { "terms": { "lens_id": ids_batch } }, "include": include_fields })
scroll(scroll_id=None, request_body=initial_request_body)
#error
{'reference': '354d319c-c691-4a1b-a95f-02b8c352ce31', 'message': 'Mismatched input for fields - [include]', 'code': 400}
{'reference': 'a8c8e39d-2c3c-408c-9c42-79132f1f7bd1', 'message': 'Mismatched input for fields - [include]', 'code': 400} {'reference': '268d7aa3-c0fd-4046-94c4-fc5aeb775be0', 'message': 'Mismatched input for fields - [include]', 'code': 400} {'reference': '78948b6a-35b0-4b53-b9bd-1ee956038ffc', 'message': 'Mismatched input for fields - [include]', 'code': 400} {'reference': '5efa2069-5783-40c0-a277-29ac7cb04aa5', 'message': 'Mismatched input for fields - [include]', 'code': 400} {'reference': '24d5e571-93d9-404e-b3f0-1df811304ace', 'message': 'Mismatched input for fields - [include]', 'code': 400}
Dear friend,
I have been attempting to use the academic API based on the guidelines provided in the manual. I have a text document containing Lens IDs, and I am trying to retrieve specific fields listed in the API manual. However, I keep encountering errors in Python. The error messages are as follows:
Failed to fetch data: {'reference': 'fd210d71-1900-452b-9b5c-7141f2c6e806', 'message': 'Unrecognized fields - [authors.display_name, referenced_by_count, reference_count, patent_citation_count, ids.doi, source.asjc_code, authors.affiliations.address.country_code, authors.affiliations.address.city]', 'code': 400} Failed to fetch data: {'reference': '3d98d6e8-8c9e-4155-a1bc-5aade3c0eb51', 'message': 'Unrecognized fields - [authors.display_name, referenced_by_count, reference_count, patent_citation_count, ids.doi, source.asjc_code, authors.affiliations.address.country_code, authors.affiliations.address.city]', 'code': 400} I would greatly appreciate any assistance you could provide. Thank you and I hope you have a pleasant day.
Best regards,
Dongpeng
Here is my code:
import requests import json
设置API的URL和授权头部
url = 'https://api.lens.org/scholarly/search' headers = { 'Authorization': 'Bearer token', 'Content-Type': 'application/json' }
从文件中读取Lens ID
def load_ids(filename): with open(filename, 'r') as file: ids = file.read().splitlines() return ids
定义一个函数用于发送请求并处理数据
def fetch_data(ids): all_data = [] # 用于存储所有收集的数据
分割整个列表,每次处理5000个ID
将数据保存到JSON文件
def save_to_json(data, filename): with open(filename, 'w') as file: json.dump(data, file, indent=4)
主函数
def main(): lens_ids = load_ids('lens-ID.txt') # 从文件中读取ID result_data = fetch_data(lens_ids) # 获取数据 save_to_json(result_data, 'output.json') # 保存数据到JSON文件
运行主函数
if name == 'main': main()