Open myrulezzz opened 3 months ago
It should work via the llama index packages, like this for example:
from llama_index.llms.ollama import Ollama
from llama_index.multi_modal_llms.ollama import OllamaMultiModal
from lavague.core import WorldModel, ActionEngine
from lavague.core.agents import WebAgent
from lavague.drivers.selenium import SeleniumDriver
ollama_url = "[YOUR_URL]"
OLLAMA_MODEL = [MODEL_NAME]
OLLAMA_MM_LLM_MODEL = [MODEL_NAME]
mm_llm = OllamaMultiModal(model=OLLAMA_MM_LLM_MODEL, base_url=ollama_url, request_timeout=10.0)
llm = Ollama(model=OLLAMA_MODEL, base_url=ollama_url)
selenium_driver = SeleniumDriver(headless=False)
action_engine = ActionEngine(llm=llm, driver=selenium_driver)
world_model = WorldModel(mm_llm=mm_llm)
agent = WebAgent(world_model, action_engine)
agent.get("https://huggingface.co/docs")
result = agent.run("Get the first paragraphe of the peft quicktour")
print("output:\n", result.output)
when i am running it i get error
from langchain.memory import ChatMessageHistory
with new imports of:
from langchain_community.chat_message_histories import ChatMessageHistory You can use the langchain cli to automatically upgrade many imports. Please see documentation here https://python.langchain.com/v0.2/docs/versions/v0_2/ warn_deprecated( Traceback (most recent call last): File "/Users/andreasstylianou/Desktop/./ollamaindex.py", line 10, in
from llama_index.llms.ollama import Ollama File "/opt/homebrew/lib/python3.11/site-packages/llama_index/init.py", line 21, in from llama_index.indices.common.struct_store.base import SQLDocumentContextBuilder File "/opt/homebrew/lib/python3.11/site-packages/llama_index/indices/init.py", line 4, in from llama_index.indices.document_summary.base import DocumentSummaryIndex File "/opt/homebrew/lib/python3.11/site-packages/llama_index/indices/document_summary/init.py", line 4, in from llama_index.indices.document_summary.base import ( File "/opt/homebrew/lib/python3.11/site-packages/llama_index/indices/document_summary/base.py", line 14, in from llama_index.indices.base import BaseIndex File "/opt/homebrew/lib/python3.11/site-packages/llama_index/indices/base.py", line 6, in from llama_index.chat_engine.types import BaseChatEngine, ChatMode File "/opt/homebrew/lib/python3.11/site-packages/llama_index/chat_engine/init.py", line 1, in from llama_index.chat_engine.condense_question import CondenseQuestionChatEngine File "/opt/homebrew/lib/python3.11/site-packages/llama_index/chat_engine/condense_question.py", line 6, in from llama_index.chat_engine.types import ( File "/opt/homebrew/lib/python3.11/site-packages/llama_index/chat_engine/types.py", line 14, in from llama_index.tools import ToolOutput File "/opt/homebrew/lib/python3.11/site-packages/llama_index/tools/init.py", line 5, in from llama_index.tools.query_plan import QueryPlanTool File "/opt/homebrew/lib/python3.11/site-packages/llama_index/tools/query_plan.py", line 6, in from llama_index.response_synthesizers import BaseSynthesizer, get_response_synthesizer File "/opt/homebrew/lib/python3.11/site-packages/llama_index/response_synthesizers/init.py", line 3, in from llama_index.response_synthesizers.accumulate import Accumulate File "/opt/homebrew/lib/python3.11/site-packages/llama_index/response_synthesizers/accumulate.py", line 5, in from llama_index.indices.service_context import ServiceContext File "/opt/homebrew/lib/python3.11/site-packages/llama_index/indices/service_context.py", line 10, in from llama_index.indices.prompt_helper import PromptHelper File "/opt/homebrew/lib/python3.11/site-packages/llama_index/indices/prompt_helper.py", line 16, in from llama_index.llm_predictor.base import LLMMetadata File "/opt/homebrew/lib/python3.11/site-packages/llama_index/llm_predictor/init.py", line 3, in from llama_index.llm_predictor.base import LLMPredictor File "/opt/homebrew/lib/python3.11/site-packages/llama_index/llm_predictor/base.py", line 18, in from llama_index.llms.utils import LLMType, resolve_llm File "/opt/homebrew/lib/python3.11/site-packages/llama_index/llms/init.py", line 23, in from llama_index.llms.litellm import LiteLLM File "/opt/homebrew/lib/python3.11/site-packages/llama_index/llms/litellm.py", line 28, in from llama_index.llms.litellm_utils import ( File "/opt/homebrew/lib/python3.11/site-packages/llama_index/llms/litellm_utils.py", line 4, in from openai.openai_object import OpenAIObject ModuleNotFoundError: No module named 'openai.openai_object'
I don't get this error, can you try installing lavague + the llama_index modules in a fresh venv and tell me if you get the same error?
Also, I believe when we tested before we did not find any ollama multi-modal LLMs performing well enough with LaVague to be useful. Some open-source LLMs & embedding models have performed okay but it's been harder to find good performing mulit-modals
So do you have an example with ollama model and embeddings?
Regards, Andreas Stylianou
From: Laura Yie @.> Sent: Friday, August 23, 2024 2:09:21 PM To: lavague-ai/LaVague @.> Cc: Andreas @.>; Author @.> Subject: Re: [lavague-ai/LaVague] Ollama package (Issue #565)
I don't get this error, can you try installing lavague + the llama_index modules in a fresh venv and tell me if you get the same error?
Also, I believe when we tested before we did not find any ollama multi-modal LLMs performing well enough with LaVague to be useful. Some open-source LLMs & embedding models have performed okay but it's been harder to find good performing mulit-modals
— Reply to this email directly, view it on GitHubhttps://github.com/lavague-ai/LaVague/issues/565#issuecomment-2306861050, or unsubscribehttps://github.com/notifications/unsubscribe-auth/AKIY77NDUIJ2I3SM5ODUPVDZS4J6DAVCNFSM6AAAAABMLD5AZKVHI2DSMVQWIX3LMV43OSLTON2WKQ3PNVWWK3TUHMZDGMBWHA3DCMBVGA. You are receiving this because you authored the thread.Message ID: @.***>
I don't get this error, can you try installing lavague + the llama_index modules in a fresh venv and tell me if you get the same error?
Also, I believe when we tested before we did not find any ollama multi-modal LLMs performing well enough with LaVague to be useful. Some open-source LLMs & embedding models have performed okay but it's been harder to find good performing mulit-modals
Maybe a good way to benchmark if open models might work is to have a known page that works with Lavague via closed models (say like youtube.com) and an example of the controls or data that is detected in the page. That way a person could feed the same page to an open source model to see what controls/data it can detect and respond with so we could quickly evaluate which models might be worth trying to setup for more robust testing.
I have a similar error when trying to run with paligemma, seeing references to OpenAI. I have started a fresh venv @lyie28 . Here is an example @myrulezzz
from lavague.core import WorldModel, ActionEngine
from lavague.core.agents import WebAgent
from lavague.drivers.selenium import SeleniumDriver
from llama_index.multi_modal_llms.huggingface import HuggingFaceMultiModal
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
embed_model = HuggingFaceEmbedding(model_name="dunzhang/stella_en_400M_v5", trust_remote_code=True)
llm = HuggingFaceMultiModal.from_model_name("google/paligemma-3b-mix-224")
mm_llm = llm
selenium_driver = SeleniumDriver()
world_model = WorldModel(mm_llm=mm_llm)
action_engine = ActionEngine(driver=selenium_driver, llm=llm, embedding=embed_model)
agent = WebAgent(world_model, action_engine)
agent.get("https://huggingface.co/docs")
agent.run("Go on the quicktour of PEFT")
I get the error (slightly different):
Traceback (most recent call last):
File "./test1.py", line 26, in <module>
action_engine = ActionEngine(driver=selenium_driver, llm=llm, embedding=embed_model)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "./venv/lib/python3.12/site-packages/lavague/core/action_engine.py", line 84, in __init__
python_engine = PythonEngine(driver, llm, embedding)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "./venv/lib/python3.12/site-packages/lavague/core/python_engine.py", line 66, in __init__
self.ocr_mm_llm = ocr_mm_llm or OpenAIMultiModal(
^^^^^^^^^^^^^^^^^
File "./venv/lib/python3.12/site-packages/llama_index/multi_modal_llms/openai/base.py", line 107, in __init__
self._messages_to_prompt = messages_to_prompt or generic_messages_to_prompt
^^^^^^^^^^^^^^^^^^^^^^^^
File "./venv/lib/python3.12/site-packages/pydantic/main.py", line 865, in __setattr__
if self.__pydantic_private__ is None or name not in self.__private_attributes__:
^^^^^^^^^^^^^^^^^^^^^^^^^
File "./venv/lib/python3.12/site-packages/pydantic/main.py", line 853, in __getattr__
return super().__getattribute__(item) # Raises AttributeError if appropriate
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AttributeError: 'OpenAIMultiModal' object has no attribute '__pydantic_private__'. Did you mean: '__pydantic_complete__'?
I can see in the source code that if Python Engine is not provided, it will default to one using OpenAI.
from action_engine.py:
def __init__(
self,
driver: BaseDriver,
navigation_engine: BaseEngine = None,
python_engine: BaseEngine = None,
navigation_control: BaseEngine = None,
llm: BaseLLM = None,
embedding: BaseEmbedding = None,
retriever: BaseHtmlRetriever = None,
prompt_template: PromptTemplate = NAVIGATION_ENGINE_PROMPT_TEMPLATE.prompt_template,
extractor: BaseExtractor = DynamicExtractor(),
time_between_actions: float = 1.5,
n_attempts: int = 5,
logger: AgentLogger = None,
):
if llm is None:
llm = get_default_context().llm
if embedding is None:
embedding = get_default_context().embedding
self.driver = driver
if retriever is None:
retriever = get_default_retriever(driver, embedding=embedding)
if navigation_engine is None:
navigation_engine = NavigationEngine(
driver=driver,
llm=llm,
retriever=retriever,
prompt_template=prompt_template,
extractor=extractor,
time_between_actions=time_between_actions,
n_attempts=n_attempts,
logger=logger,
embedding=embedding,
)
if python_engine is None:
python_engine = PythonEngine(driver, llm, embedding)
and then from python_engine.py:
def __init__(
self,
driver: BaseDriver,
llm: Optional[BaseLLM] = None,
embedding: Optional[BaseEmbedding] = None,
logger: Optional[AgentLogger] = None,
clean_html: Callable[[str], str] = trafilatura.extract,
ocr_mm_llm: Optional[BaseLLM] = None,
ocr_llm: Optional[BaseLLM] = None,
display: bool = False,
batch_size: int = 5,
confidence_threshold: float = 0.85,
temp_screenshots_path="./tmp_screenshots",
n_search_attemps=10,
):
self.llm = llm or get_default_context().llm
self.embedding = embedding or get_default_context().embedding
self.clean_html = clean_html
self.driver = driver
self.logger = logger
self.display = display
self.ocr_mm_llm = ocr_mm_llm or OpenAIMultiModal(
model="gpt-4o-mini", temperature=DEFAULT_TEMPERATURE
)
self.ocr_llm = ocr_llm or self.llm
self.batch_size = batch_size
self.confidence_threshold = confidence_threshold
self.temp_screenshots_path = temp_screenshots_path
self.n_search_attempts = n_search_attemps
I will be happy to contribute changes myself if it can be confirmed it is a bug and not a missing feature. From the code above, it seems the that the local model use case is not implemented, so it could be beyond just fixing a bug.
Can i use ollama python package to interact with lavague?