MaartenGr / BERTopic

Leveraging BERT and c-TF-IDF to create easily interpretable topics.
https://maartengr.github.io/BERTopic/
MIT License
5.99k stars 752 forks source link

Getting Started broken #1655

Open franz101 opened 9 months ago

franz101 commented 9 months ago
from bertopic import BERTopic
from sklearn.datasets import fetch_20newsgroups

docs = fetch_20newsgroups(subset='all',  remove=('headers', 'footers', 'quotes'))['data']

topic_model = BERTopic()
topics, probs = topic_model.fit_transform(docs)
---------------------------------------------------------------------------
PydanticUserError                         Traceback (most recent call last)
Cell In[56], line 1
----> 1 from bertopic import BERTopic
      2 from sklearn.datasets import fetch_20newsgroups
      4 docs = fetch_20newsgroups(subset='all',  remove=('headers', 'footers', 'quotes'))['data']

File ~/.pyenv/versions/3.10.13/lib/python3.10/site-packages/bertopic/__init__.py:1
----> 1 from bertopic._bertopic import BERTopic
      3 __version__ = "0.16.0"
      5 __all__ = [
      6     "BERTopic",
      7 ]

File ~/.pyenv/versions/3.10.13/lib/python3.10/site-packages/bertopic/_bertopic.py:52
     50 from bertopic.backend._utils import select_backend
     51 from bertopic.vectorizers import ClassTfidfTransformer
---> 52 from bertopic.representation import BaseRepresentation
     53 from bertopic.dimensionality import BaseDimensionalityReduction
     54 from bertopic.cluster._utils import hdbscan_delegator, is_supported_hdbscan

File ~/.pyenv/versions/3.10.13/lib/python3.10/site-packages/bertopic/representation/__init__.py:38
     36 # LangChain Generator
     37 try:
---> 38     from bertopic.representation._langchain import LangChain
     39 except ModuleNotFoundError:
     40     msg = "`pip install langchain` \n\n"

File ~/.pyenv/versions/3.10.13/lib/python3.10/site-packages/bertopic/representation/_langchain.py:2
      1 import pandas as pd
----> 2 from langchain.docstore.document import Document
      3 from scipy.sparse import csr_matrix
      4 from typing import Callable, Dict, Mapping, List, Tuple, Union

File ~/.pyenv/versions/3.10.13/lib/python3.10/site-packages/langchain/__init__.py:6
      3 from importlib import metadata
      4 from typing import Optional
----> 6 from langchain.agents import MRKLChain, ReActChain, SelfAskWithSearchChain
      7 from langchain.cache import BaseCache
      8 from langchain.chains import (
      9     ConversationChain,
     10     LLMBashChain,
   (...)
     18     VectorDBQAWithSourcesChain,
     19 )

File ~/.pyenv/versions/3.10.13/lib/python3.10/site-packages/langchain/agents/__init__.py:2
      1 """Interface for agents."""
----> 2 from langchain.agents.agent import (
      3     Agent,
      4     AgentExecutor,
      5     AgentOutputParser,
      6     BaseMultiActionAgent,
      7     BaseSingleActionAgent,
      8     LLMSingleActionAgent,
      9 )
     10 from langchain.agents.agent_toolkits import (
     11     create_csv_agent,
     12     create_json_agent,
   (...)
     21     create_vectorstore_router_agent,
     22 )
     23 from langchain.agents.agent_types import AgentType

File ~/.pyenv/versions/3.10.13/lib/python3.10/site-packages/langchain/agents/agent.py:16
     13 from pydantic import BaseModel, root_validator
     15 from langchain.agents.agent_types import AgentType
---> 16 from langchain.agents.tools import InvalidTool
     17 from langchain.base_language import BaseLanguageModel
     18 from langchain.callbacks.base import BaseCallbackManager

File ~/.pyenv/versions/3.10.13/lib/python3.10/site-packages/langchain/agents/tools.py:4
      1 """Interface for tools."""
      2 from typing import Optional
----> 4 from langchain.callbacks.manager import (
      5     AsyncCallbackManagerForToolRun,
      6     CallbackManagerForToolRun,
      7 )
      8 from langchain.tools.base import BaseTool, Tool, tool
     11 class InvalidTool(BaseTool):

File ~/.pyenv/versions/3.10.13/lib/python3.10/site-packages/langchain/callbacks/__init__.py:3
      1 """Callback handlers that allow listening to events in LangChain."""
----> 3 from langchain.callbacks.aim_callback import AimCallbackHandler
      4 from langchain.callbacks.argilla_callback import ArgillaCallbackHandler
      5 from langchain.callbacks.clearml_callback import ClearMLCallbackHandler

File ~/.pyenv/versions/3.10.13/lib/python3.10/site-packages/langchain/callbacks/aim_callback.py:4
      1 from copy import deepcopy
      2 from typing import Any, Dict, List, Optional, Union
----> 4 from langchain.callbacks.base import BaseCallbackHandler
      5 from langchain.schema import AgentAction, AgentFinish, LLMResult
      8 def import_aim() -> Any:

File ~/.pyenv/versions/3.10.13/lib/python3.10/site-packages/langchain/callbacks/base.py:7
      4 from typing import Any, Dict, List, Optional, Union
      5 from uuid import UUID
----> 7 from langchain.schema import (
      8     AgentAction,
      9     AgentFinish,
     10     BaseMessage,
     11     LLMResult,
     12 )
     15 class LLMManagerMixin:
     16     """Mixin for LLM callbacks."""

File ~/.pyenv/versions/3.10.13/lib/python3.10/site-packages/langchain/schema.py:152
    148 def messages_from_dict(messages: List[dict]) -> List[BaseMessage]:
    149     return [_message_from_dict(m) for m in messages]
--> 152 class ChatGeneration(Generation):
    153     """Output of a single generation."""
    155     text = ""

File ~/.pyenv/versions/3.10.13/lib/python3.10/site-packages/langchain/schema.py:159, in ChatGeneration()
    155 text = ""
    156 message: BaseMessage
    158 @root_validator
--> 159 def set_text(cls, values: Dict[str, Any]) -> Dict[str, Any]:
    160     values["text"] = values["message"].content
    161     return values

File ~/.pyenv/versions/3.10.13/lib/python3.10/site-packages/pydantic/deprecated/class_validators.py:222, in root_validator(pre, skip_on_failure, allow_reuse, *__args)
    212 warn(
    213     'Pydantic V1 style `@root_validator` validators are deprecated.'
    214     ' You should migrate to Pydantic V2 style `@model_validator` validators,'
   (...)
    217     stacklevel=2,
    218 )
    220 if __args:
    221     # Ensure a nice error is raised if someone attempts to use the bare decorator
--> 222     return root_validator()(*__args)  # type: ignore
    224 if allow_reuse is True:  # pragma: no cover
    225     warn(_ALLOW_REUSE_WARNING_MESSAGE, DeprecationWarning)

File ~/.pyenv/versions/3.10.13/lib/python3.10/site-packages/pydantic/deprecated/class_validators.py:228, in root_validator(pre, skip_on_failure, allow_reuse, *__args)
    226 mode: Literal['before', 'after'] = 'before' if pre is True else 'after'
    227 if pre is False and skip_on_failure is not True:
--> 228     raise PydanticUserError(
    229         'If you use `@root_validator` with pre=False (the default) you MUST specify `skip_on_failure=True`.'
    230         ' Note that `@root_validator` is deprecated and should be replaced with `@model_validator`.',
    231         code='root-validator-pre-skip',
    232     )
    234 wrap = partial(_decorators_v1.make_v1_generic_root_validator, pre=pre)
    236 def dec(f: Callable[..., Any] | classmethod[Any, Any, Any] | staticmethod[Any, Any]) -> Any:

PydanticUserError: If you use `@root_validator` with pre=False (the default) you MUST specify `skip_on_failure=True`. Note that `@root_validator` is deprecated and should be replaced with `@model_validator`.

For further information visit https://errors.pydantic.dev/2.4/u/root-validator-pre-skip
franz101 commented 9 months ago

Workaround !pip install --upgrade pydantic\<2

MaartenGr commented 9 months ago

Thanks for sharing! Indeed seems like an issue with LangChain. Perhaps updating LangChain is also a solution.