Closed mingjun1120 closed 2 months ago
This seems to be a duplicate of https://github.com/ScrapeGraphAI/Scrapegraph-ai/issues/422
Try this example (I have no idea if it will work, because I don't have access to Azure to test it):
"""
Basic example of scraping pipeline using SmartScraper using Azure OpenAI Key
"""
import os
from scrapegraphai.graphs import SmartScraperGraph
from scrapegraphai.utils import prettify_exec_info
# required environment variable in .env
# AZURE_OPENAI_KEY
graph_config = {
"llm": {
"api_key": os.environ["AZURE_OPENAI_KEY"],
"model": "azure/gpt-3.5-turbo",
},
"verbose": True,
"headless": False
}
# ************************************************
# Create the SmartScraperGraph instance and run it
# ************************************************
smart_scraper_graph = SmartScraperGraph(
prompt="List me all the titles",
source="https://sport.sky.it/nba?gr=www",
config=graph_config
)
smart_scraper_graph = SmartScraperGraph(
prompt="""List me all the events, with the following fields: company_name, event_name, event_start_date, event_start_time,
event_end_date, event_end_time, location, event_mode, event_category,
third_party_redirect, no_of_days,
time_in_hours, hosted_or_attending, refreshments_type,
registration_available, registration_link""",
# also accepts a string with the already downloaded HTML code
source="https://www.hmhco.com/event",
config=graph_config
)
result = smart_scraper_graph.run()
print(result)
# ************************************************
# Get graph execution info
# ************************************************
graph_exec_info = smart_scraper_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))
If it works, we'll put it in place instead of the current example.
@f-aguzzi, I got this error. Model not supported
Traceback (most recent call last):
File "C:\Users\GV631HJ\OneDrive - EY\Desktop\CIMB\venv\Lib\site-packages\scrapegraphai\graphs\abstract_graph.py", line 153, in
_create_llm
self.model_token = models_tokens["openai"][llm_params["model"]]
~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^
KeyError: 'azure/gpt-3.5-turbo'
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "C:\Users\GV631HJ\OneDrive - EY\Desktop\CIMB\scrapegraph.py", line 27, in <module>
smart_scraper_graph = SmartScraperGraph(
^^^^^^^^^^^^^^^^^^
File "C:\Users\GV631HJ\OneDrive - EY\Desktop\CIMB\venv\Lib\site-packages\scrapegraphai\graphs\smart_scraper_graph.py", line 53, in __init__
super().__init__(prompt, config, source, schema)
File "C:\Users\GV631HJ\OneDrive - EY\Desktop\CIMB\venv\Lib\site-packages\scrapegraphai\graphs\abstract_graph.py", line 73, in __init__
self.llm_model = self._create_llm(config["llm"], chat=True)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\GV631HJ\OneDrive - EY\Desktop\CIMB\venv\Lib\site-packages\scrapegraphai\graphs\abstract_graph.py", line 155, in
_create_llm
raise KeyError("Model not supported") from exc
KeyError: 'Model not supported'
@f-aguzzi, I made some changes to the code (bolded below), but it's still not functioning. I did deploy the gpt-4o with the exact same name, which is also called gpt-4o in my Azure OpenAI environment.
"""
Basic example of scraping pipeline using SmartScraper using Azure OpenAI Key
"""
import os
import requests
from scrapegraphai.graphs import SmartScraperGraph
from scrapegraphai.utils import prettify_exec_info
# required environment variable in .env
# AZURE_OPENAI_KEY
graph_config = {
"llm": {
"api_key": os.getenv("AZURE_OPENAI_API_KEY"), <--------- Updated
"model": "gpt-4o", <--------- Updated
},
"verbose": True,
"headless": False
}
# ************************************************
# Create the SmartScraperGraph instance and run it
# ************************************************
smart_scraper_graph = SmartScraperGraph(
prompt="List me all the titles",
source="https://sport.sky.it/nba?gr=www",
config=graph_config
)
smart_scraper_graph = SmartScraperGraph(
prompt="""List me all the events, with the following fields: company_name, event_name, event_start_date, event_start_time,
event_end_date, event_end_time, location, event_mode, event_category,
third_party_redirect, no_of_days,
time_in_hours, hosted_or_attending, refreshments_type,
registration_available, registration_link""",
# also accepts a string with the already downloaded HTML code
source="https://www.hmhco.com/event",
config=graph_config
)
result = smart_scraper_graph.run()
print(result)
# ************************************************
# Get graph execution info
# ************************************************
graph_exec_info = smart_scraper_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))
The error: Did not find openai_api_key
Traceback (most recent call last):
File "/teamspace/studios/this_studio/test.py", line 27, in <module>
smart_scraper_graph = SmartScraperGraph(
File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/scrapegraphai/graphs/smart_scraper_graph.py", line 53, in __init__
super().__init__(prompt, config, source, schema)
File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/scrapegraphai/graphs/abstract_graph.py", line 73, in __init__
self.llm_model = self._create_llm(config["llm"], chat=True)
File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/scrapegraphai/graphs/abstract_graph.py", line 156, in _create_llm
return OpenAI(llm_params)
File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/scrapegraphai/models/openai.py", line 17, in __init__
super().__init__(**llm_config)
File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/pydantic/v1/main.py", line 341, in __init__
raise validation_error
pydantic.v1.error_wrappers.ValidationError: 1 validation error for OpenAI
__root__
Did not find openai_api_key, please add an environment variable `OPENAI_API_KEY` which contains it, or pass `openai_api_key` as a named parameter. (type=value_error)
⚡ ~
⚡ ~ python test.py
Traceback (most recent call last):
File "/teamspace/studios/this_studio/test.py", line 27, in <module>
smart_scraper_graph = SmartScraperGraph(
File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/scrapegraphai/graphs/smart_scraper_graph.py", line 53, in __init__
super().__init__(prompt, config, source, schema)
File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/scrapegraphai/graphs/abstract_graph.py", line 73, in __init__
self.llm_model = self._create_llm(config["llm"], chat=True)
File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/scrapegraphai/graphs/abstract_graph.py", line 156, in _create_llm
return OpenAI(llm_params)
File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/scrapegraphai/models/openai.py", line 17, in __init__
super().__init__(**llm_config)
File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/pydantic/v1/main.py", line 341, in __init__
raise validation_error
pydantic.v1.error_wrappers.ValidationError: 1 validation error for OpenAI
__root__
Did not find openai_api_key, please add an environment variable `OPENAI_API_KEY` which contains it, or pass `openai_api_key` as a named parameter. (type=value_error)
you should have done graph_config = { "llm": { "api_key": os.environ["AZURE_OPENAI_KEY"], "model": "azure/gpt-4o"", }, "verbose": True, "headless": False }
There is a bug with the implementation of passing the model instance directly instead of model details. The code did not assign self.model_token
if model instance is passed directly.
https://github.com/ScrapeGraphAI/Scrapegraph-ai/blob/208ab267ceda30b4527222d9dfd61e5c5ed243c3/scrapegraphai/graphs/abstract_graph.py#L149-L151
Quick Fix
# If model instance is passed directly instead of the model details
if "model_instance" in llm_params:
try:
self.model_token = llm_params["model_tokens"]
except KeyError as exc:
raise KeyError("model_tokens not specified") from exc
return llm_params["model_instance"]
graph_config
graph_config = {
"llm": {
"model_instance": model_instance,
"model_tokens": <YOUR_MODEL_TOKEN>,
}
}
thank you fort the tip, please update to the new version
Describe the bug I attempted to execute the
smart_scraper_schema_azure.py
script from the Scrapegraph-ai/example/azure GitHub directory, but encountered the following issue:To Reproduce (Code) I am just using the sample Azure OpenAI code only.
Expected behavior It should be able to print the JSON output for the extracted the data
Desktop (please complete the following information):