ScrapeGraphAI / Scrapegraph-ai

Python scraper based on AI
https://scrapegraphai.com
MIT License
14.59k stars 1.19k forks source link

json.decoder.JSONDecodeError: Expecting value: line 10 column 5 (char 743) #262

Closed david-strejc closed 4 months ago

david-strejc commented 4 months ago

david @ blackbox:Programming/searchgraphai $ python first_try.py

Traceback (most recent call last): File "/home/david/.local/lib/python3.10/site-packages/langchain_core/output_parsers/json.py", line 66, in parse_result return parse_json_markdown(text) File "/home/david/.local/lib/python3.10/site-packages/langchain_core/utils/json.py", line 147, in parse_json_markdown return _parse_json(json_str, parser=parser) File "/home/david/.local/lib/python3.10/site-packages/langchain_core/utils/json.py", line 160, in _parse_json return parser(json_str) File "/home/david/.local/lib/python3.10/site-packages/langchain_core/utils/json.py", line 120, in parse_partial_json return json.loads(s, strict=strict) File "/usr/lib/python3.10/json/init.py", line 359, in loads return cls(**kw).decode(s) File "/usr/lib/python3.10/json/decoder.py", line 337, in decode obj, end = self.raw_decode(s, idx=_w(s, 0).end()) File "/usr/lib/python3.10/json/decoder.py", line 355, in raw_decode raise JSONDecodeError("Expecting value", s, err.value) from None json.decoder.JSONDecodeError: Expecting value: line 10 column 5 (char 743)

The above exception was the direct cause of the following exception:

Traceback (most recent call last): File "/home/david/Work/Programming/searchgraphai/first_try.py", line 24, in result = search_graph.run() File "/home/david/.local/lib/python3.10/site-packages/scrapegraphai/graphs/search_graph.py", line 120, in run self.final_state, self.execution_info = self.graph.execute(inputs) File "/home/david/.local/lib/python3.10/site-packages/scrapegraphai/graphs/base_graph.py", line 107, in execute result = current_node.execute(state) File "/home/david/.local/lib/python3.10/site-packages/scrapegraphai/nodes/graph_iterator_node.py", line 73, in execute state = asyncio.run(self._async_execute(state, batchsize)) File "/usr/lib/python3.10/asyncio/runners.py", line 44, in run return loop.run_until_complete(main) File "/usr/lib/python3.10/asyncio/base_events.py", line 649, in run_until_complete return future.result() File "/home/david/.local/lib/python3.10/site-packages/scrapegraphai/nodes/graph_iterator_node.py", line 128, in _async_execute answers = await tqdm.gather( File "/home/david/.local/lib/python3.10/site-packages/tqdm/asyncio.py", line 79, in gather res = [await f for f in cls.as_completed(ifs, loop=loop, timeout=timeout, File "/home/david/.local/lib/python3.10/site-packages/tqdm/asyncio.py", line 79, in res = [await f for f in cls.as_completed(ifs, loop=loop, timeout=timeout, File "/usr/lib/python3.10/asyncio/tasks.py", line 571, in _wait_for_one return f.result() # May raise f.exception(). File "/home/david/.local/lib/python3.10/site-packages/tqdm/asyncio.py", line 76, in wrap_awaitable return i, await f File "/home/david/.local/lib/python3.10/site-packages/scrapegraphai/nodes/graph_iterator_node.py", line 117, in _async_run return await asyncio.to_thread(graph.run) File "/usr/lib/python3.10/asyncio/threads.py", line 25, in to_thread return await loop.run_in_executor(None, func_call) File "/usr/lib/python3.10/concurrent/futures/thread.py", line 58, in run result = self.fn(*self.args, *self.kwargs) File "/home/david/.local/lib/python3.10/site-packages/scrapegraphai/graphs/smart_scraper_graph.py", line 112, in run self.final_state, self.execution_info = self.graph.execute(inputs) File "/home/david/.local/lib/python3.10/site-packages/scrapegraphai/graphs/base_graph.py", line 107, in execute result = current_node.execute(state) File "/home/david/.local/lib/python3.10/site-packages/scrapegraphai/nodes/generate_answer_node.py", line 135, in execute answer = map_chain.invoke({"question": user_prompt}) File "/home/david/.local/lib/python3.10/site-packages/langchain_core/runnables/base.py", line 3142, in invoke output = {key: future.result() for key, future in zip(steps, futures)} File "/home/david/.local/lib/python3.10/site-packages/langchain_core/runnables/base.py", line 3142, in output = {key: future.result() for key, future in zip(steps, futures)} File "/usr/lib/python3.10/concurrent/futures/_base.py", line 451, in result return self.get_result() File "/usr/lib/python3.10/concurrent/futures/_base.py", line 403, in get_result raise self._exception File "/usr/lib/python3.10/concurrent/futures/thread.py", line 58, in run result = self.fn(self.args, self.kwargs) File "/home/david/.local/lib/python3.10/site-packages/langchain_core/runnables/base.py", line 2499, in invoke input = step.invoke( File "/home/david/.local/lib/python3.10/site-packages/langchain_core/output_parsers/base.py", line 169, in invoke return self._call_with_config( File "/home/david/.local/lib/python3.10/site-packages/langchain_core/runnables/base.py", line 1626, in _call_with_config context.run( File "/home/david/.local/lib/python3.10/site-packages/langchain_core/runnables/config.py", line 347, in call_func_with_variable_args return func(input, kwargs) # type: ignore[call-arg] File "/home/david/.local/lib/python3.10/site-packages/langchain_core/output_parsers/base.py", line 170, in lambda inner_input: self.parse_result( File "/home/david/.local/lib/python3.10/site-packages/langchain_core/output_parsers/json.py", line 69, in parse_result raise OutputParserException(msg, llm_output=text) from e langchain_core.exceptions.OutputParserException: Invalid json output: Here is the JSON object:

{
  "title": null,
  "content": null,
  "links": [
    "https://honestcooking.com/",
    "https://www.facebook.com/sharer.php?u=https://honestcooking.com/chioggia-beet-and-fennel-salad/",
    "https://pinterest.com/pin/create/bookmarklet/?url=https://honestcooking.com/chioggia-beet-and-fennel-salad/&media;=https://honestcooking.com/wp-content/uploads/2012/02/chioggia-beet-and-fennel-salad-495-400.jpg",
    "https://twitter.com/intent/tweet?text=Chioggia%20Beet%20and%20Fennel%20Salad&via;=honestcooking&url;=https://honestcooking.com/chioggia-beet-and-fennel-salad/",
    "mailto:?subject=Chioggia%20Beet%20and%20Fennel%20Salad&body;=Chioggia%20Beet%20and%20Fennel%20Salad%20https://honestcooking.com/chioggia-beet-and-fennel-salad/",
    ...
  ],
  "images": [
    "https://honestcooking.com/wp-content/uploads/2020/09/Honest-Cooking-New-Logo-1-Line.png",
    "https://honestcooking.com/wp-content/uploads/2020/09/Honest-Cooking-New-Logo-1-Line.png",
    ...
  ]
}

david @ blackbox:Programming/searchgraphai $ david @ blackbox:Programming/searchgraphai $ david @ blackbox:Programming/searchgraphai $
david @ blackbox:Programming/searchgraphai $ cat first_try.py from scrapegraphai.graphs import SearchGraph

Define the configuration for the graph

graph_config = { "llm": { "model": "groq/llama3-8b-8192", "api_key": "", "temperature": 0 }, "embeddings": { "model": "ollama/nomic-embed-text", "base_url": "http://localhost:11434", # set ollama URL arbitrarily }, "max_results": 5, }

Create the SearchGraph instance

search_graph = SearchGraph( prompt="List me all the traditional recipes from Chioggia", config=graph_config )

Run the graph

result = search_graph.run() print(result) david @ blackbox:Programming/searchgraphai $

f-aguzzi commented 4 months ago

Is this the complete JSON? Does it actually contain ... at the end of links and images or is this a shortened version?

deox1111 commented 4 months ago

Is this the complete JSON? Does it actually contain ... at the end of links and images or is this a shortened version?

i have the same problem, output actually contains ...

json.decoder.JSONDecodeError: Expecting value: line 8 column 5 (char 703) langchain_core.exceptions.OutputParserException: Invalid json output: Here is the JSON object:

{
  "links": [
    "https://honestcooking.com/",
    "https://www.facebook.com/sharer.php?u=https://honestcooking.com/chioggia-beet-and-fennel-salad/",
    "https://pinterest.com/pin/create/bookmarklet/?url=https://honestcooking.com/chioggia-beet-and-fennel-salad/&media=https://honestcooking.com/wp-content/uploads/2012/02/chioggia-beet-and-fennel-salad-495-400.jpg",
    "https://twitter.com/intent/tweet?text=Chioggia%20Beet%20and%20Fennel%20Salad&via=honestcooking&url=https://honestcooking.com/chioggia-beet-and-fennel-salad/",
    "mailto:?subject=Chioggia%20Beet%20and%20Fennel%20Salad&body=Chioggia%20Beet%20and%20Fennel%20Salad%20https://honestcooking.com/chioggia-beet-and-fennel-salad/",
    ...
  ],
  "images": [
    "https://honestcooking.com/wp-content/uploads/2020/09/Honest-Cooking-New-Logo-1-Line.png",
    "https://honestcooking.com/wp-content/uploads/2020/09/Honest-Cooking-New-Logo-1-Line.png",
    ...
  ]
}
PeriniM commented 4 months ago

Hei @david-strejc try set the "format":"json" in the llm dict and see if you still get the error. Sometimes with local models is needed

deox1111 commented 4 months ago

Hei @david-strejc try set the "format":"json" in the llm dict and see if you still get the error. Sometimes with local models is needed

that works but when you crank up the max_results to more than 2, it will start throwing out some JSON errors (two is fine, prob a API issue)

python3 first.py
{'recipes': [{'name': 'Chioggia Beet Salad', 'description': 'This simple chioggia beet salad is super simple, letting the beautiful bright colors and gently sweet crispness of the beets shine. It takes mere minutes to prepare, and is great as a side or appetizer.', 'ingredients': ['2 tablespoons slivered almonds or sliced', '1 chioggia beet', '2 handfuls arugula rocket', '1 oz goat cheese'], 'instructions': ["Gently toast the almonds in a dry skillet/frying pan or under a broiler/grill. Keep a close eye on them so they turn golden brown but don't burn then set aside to cool.", 'Peel the outside layer off the chioggia beet then carefully make thin slices with a mandolin or wide vegetable peeler. Try to make as complete circles as you can but even part slices are good.', 'Place the arugula on two plates and top with the slices of beet, crumbled goat cheese and the toasted almonds.', 'Shake together the vinaigrette ingredients (oil, vinegar and maple syrup) in a small jar or whisk in a small bowl. Drizzle over the salad and serve.']}, {'title': 'Roasted Chioggia Beet and Gorgonzola Salad', 'description': 'This simple fall salad features chioggia beets, a beautiful candy stripe beet, that is both sweet and earthy. The beets are paired with spicy arugula and creamy goronzola for a well balanced salad that is anything but ordinary.', 'ingredients': [{'name': '1 large bunch (about 1 lb) of chioggia (or any kind) beets', 'quantity': '1 large bunch (about 1 lb)'}, {'name': '3 cups loosely packed arugula', 'quantity': '3 cups'}, {'name': '1/4 small red onion, very thinly sliced', 'quantity': '1/4 small'}, {'name': '1/4 cup crumbled gorgonzola or blue cheese', 'quantity': '1/4 cup'}, {'name': '4-5 sprigs thyme, leaves removed from stalks', 'quantity': '4-5 sprigs'}, {'name': 'olive oil', 'quantity': ''}, {'name': 'balsamic glaze or a good balsamic vinegar', 'quantity': ''}, {'name': 'flaky sea salt, to taste', 'quantity': 'to taste'}], 'instructions': [{'step': "1. Preheat oven to 400F. Remove beet greens and save for later use. Wrap beets individually (or in sets of 2 if small) in aluminum foil and place on a baking sheet. Leave the skins on for this. Roast in oven for 45-60 minutes, until cooked through but still firm. Cooking time depends on the size of the beets. If you prefer not to use foil, you can place beets unwrapped in a baking dish with sides, filled with about a 1/2 inch of water and follow the same instructions. You will lose some of the nutrients in the beets to the water, but it's a good alternative to using foil. When ready, remove from oven, and open foil packets. Let cool until you can handle them. Peel with fingers or a peeler (the peels should come off very easily now). Note, this can be done up to 3 days ahead. Store beets whole in the fridge in an airtight container until ready to use.", 'step_number': 1}, {'step': '2. Slice beets thinly either using a sharp knife or mandolin into 1/8-inch thick circles. Add arugula to a medium platter or bowl. Top with beets, sliced onion and gorgonzola. Sprinkle with a pinch of sea salt and thyme, then give a little toss.', 'step_number': 2}, {'step': "3. Drizzle a little olive oil and balsamic glaze or balsamic vinegar if you don't have the glaze. Serve immediately.", 'step_number': 3}]}]}
from scrapegraphai.graphs import SearchGraph

# Define the configuration for the graph
graph_config = {
    "llm": {
        "model": "groq/llama3-8b-8192",
        "api_key": "",
        "temperature": 0,
    },
    "embeddings": {
        "model": "ollama/nomic-embed-text",
        "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
    },
    "max_results": 2,
    "format":"json"
}

# Create the SearchGraph instance
search_graph = SearchGraph(
    prompt="List me all the traditional recipes from Chioggia",
    config=graph_config,
)

# Run the graph
result = search_graph.run()
print(result)
VinciGit00 commented 4 months ago

hi, we are trying to fix it, pls install the new beta

VinciGit00 commented 4 months ago

ok, now it should be ok. check the main please and update it