mindsdb / mindsdb

The platform for building AI from enterprise data
https://mindsdb.com
Other
26.24k stars 4.8k forks source link

[Bug, docker]: Too many error messages related to [nltk_data] #6664

Closed kunaljubce closed 1 year ago

kunaljubce commented 1 year ago

Short description of current behavior

Docker pull of mindsdb is throwing multiple error messages related to nltk_data.

kunal@LAPTOP-9VNPL0QQ:/mnt/c/Users/kunal/OneDrive/Documents/projects/mindsdb (staging)$ docker run -p 47334:47334 -p 47335:47335 mindsdb/mindsdb
Unable to find image 'mindsdb/mindsdb:latest' locally
latest: Pulling from mindsdb/mindsdb
a404e5416296: Pull complete
d70bbcbd9fa5: Pull complete
2f8d87f6e9b5: Pull complete
f0869fc58250: Pull complete
84168e770412: Pull complete
3fabd8144577: Pull complete
f3e3d6df7b96: Pull complete
4bf40298b20f: Pull complete
cb6529f60581: Pull complete
343d7172667d: Pull complete
Digest: sha256:5f5bc0838c8c90cd0232881c5249bf2d21c30e16a32dc1b21fbd755cb24da5e7
Status: Downloaded newer image for mindsdb/mindsdb:latest
[nltk_data] Error loading punkt: <urlopen error EOF occurred in
[nltk_data]     violation of protocol (_ssl.c:1129)>
[nltk_data] Error loading stopwords: <urlopen error EOF occurred in
[nltk_data]     violation of protocol (_ssl.c:1129)>
/opt/conda/lib/python3.9/site-packages/torchvision/io/image.py:13: UserWarning: Failed to load image Python extension: libtorch_cuda_cu.so: cannot open shared object file: No such file or directory
  warn(f"Failed to load image Python extension: {e}")

 ✓ telemetry enabled

Applying database migrations:

Performing database changes:
# ### commands auto generated by Alembic - please adjust! ###
op.create_table('ai_table',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('updated_at', sa.DateTime(), nullable=True),
sa.Column('created_at', sa.DateTime(), nullable=True),
sa.Column('name', sa.String(), nullable=True),
sa.Column('integration_name', sa.String(), nullable=True),
sa.Column('integration_query', sa.String(), nullable=True),
sa.Column('query_fields', mindsdb.interfaces.storage.db.Json(), nullable=True),
sa.Column('predictor_name', sa.String(), nullable=True),
sa.Column('predictor_columns', mindsdb.interfaces.storage.db.Json(), nullable=True),
sa.Column('company_id', sa.Integer(), nullable=True),
sa.PrimaryKeyConstraint('id')
)
op.create_table('datasource',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('updated_at', sa.DateTime(), nullable=True),
sa.Column('created_at', sa.DateTime(), nullable=True),
sa.Column('name', sa.String(), nullable=True),
sa.Column('data', sa.String(), nullable=True),
sa.Column('creation_info', sa.String(), nullable=True),
sa.Column('analysis', sa.String(), nullable=True),
sa.Column('company_id', sa.Integer(), nullable=True),
sa.Column('mindsdb_version', sa.String(), nullable=True),
sa.Column('datasources_version', sa.String(), nullable=True),
sa.Column('integration_id', sa.Integer(), nullable=True),
sa.PrimaryKeyConstraint('id')
)
op.create_table('integration',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('updated_at', sa.DateTime(), nullable=True),
sa.Column('created_at', sa.DateTime(), nullable=True),
sa.Column('name', sa.String(), nullable=False),
sa.Column('data', mindsdb.interfaces.storage.db.Json(), nullable=True),
sa.Column('company_id', sa.Integer(), nullable=True),
sa.PrimaryKeyConstraint('id')
)
op.create_table('log',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('created_at', sa.DateTime(), nullable=True),
sa.Column('log_type', sa.String(), nullable=True),
sa.Column('source', sa.String(), nullable=True),
sa.Column('company_id', sa.Integer(), nullable=True),
sa.Column('payload', sa.String(), nullable=True),
sa.PrimaryKeyConstraint('id')
)
op.create_table('predictor',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('updated_at', sa.DateTime(), nullable=True),
sa.Column('created_at', sa.DateTime(), nullable=True),
sa.Column('name', sa.String(), nullable=True),
sa.Column('data', mindsdb.interfaces.storage.db.Json(), nullable=True),
sa.Column('to_predict', mindsdb.interfaces.storage.db.Array(), nullable=True),
sa.Column('company_id', sa.Integer(), nullable=True),
sa.Column('mindsdb_version', sa.String(), nullable=True),
sa.Column('native_version', sa.String(), nullable=True),
sa.Column('datasource_id', sa.Integer(), nullable=True),
sa.Column('is_custom', sa.Boolean(), nullable=True),
sa.Column('learn_args', mindsdb.interfaces.storage.db.Json(), nullable=True),
sa.Column('update_status', sa.String(), nullable=True),
sa.Column('json_ai', mindsdb.interfaces.storage.db.Json(), nullable=True),
sa.Column('code', sa.String(), nullable=True),
sa.Column('lightwood_version', sa.String(), nullable=True),
sa.Column('dtype_dict', mindsdb.interfaces.storage.db.Json(), nullable=True),
sa.PrimaryKeyConstraint('id')
)
op.create_table('semaphor',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('updated_at', sa.DateTime(), nullable=True),
sa.Column('created_at', sa.DateTime(), nullable=True),
sa.Column('entity_type', sa.String(), nullable=True),
sa.Column('entity_id', sa.Integer(), nullable=True),
sa.Column('action', sa.String(), nullable=True),
sa.Column('company_id', sa.Integer(), nullable=True),
sa.PrimaryKeyConstraint('id')
)
op.create_table('stream',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('name', sa.String(), nullable=False),
sa.Column('stream_in', sa.String(), nullable=False),
sa.Column('stream_out', sa.String(), nullable=False),
sa.Column('anomaly_stream', sa.String(), nullable=True),
sa.Column('integration', sa.String(), nullable=True),
sa.Column('predictor', sa.String(), nullable=False),
sa.Column('company_id', sa.Integer(), nullable=True),
sa.Column('updated_at', sa.DateTime(), nullable=True),
sa.Column('created_at', sa.DateTime(), nullable=True),
sa.Column('type', sa.String(), nullable=True),
sa.Column('connection_info', mindsdb.interfaces.storage.db.Json(), nullable=True),
sa.Column('learning_params', mindsdb.interfaces.storage.db.Json(), nullable=True),
sa.Column('learning_threshold', sa.Integer(), nullable=True),
sa.PrimaryKeyConstraint('id')
)
# ### end Alembic commands ###
Version 23.6.4.0
Configuration file:
   /root/mindsdb_config.json
Storage path:
   /root/mdb_storage
Dependencies for the handler 'phoenix' are not installed by default.
 If you want to use "phoenix" please install "['pyphoenix', 'phoenixdb', 'protobuf==3.20.*']"
Dependencies for the handler 'sqreamdb' are not installed by default.
 If you want to use "sqreamdb" please install "['pysqream>=3.2.5', 'pysqream_sqlalchemy>=0.8']"
Dependencies for the handler 'rocket_chat' are not installed by default.
 If you want to use "rocket_chat" please install "['rocketchat_API']"
Dependencies for the handler 'plaid' are not installed by default.
 If you want to use "plaid" please install "['plaid-python']"
Dependencies for the handler 'reddit' are not installed by default.
 If you want to use "reddit" please install "['praw']"
Dependencies for the handler 'confluence' are not installed by default.
 If you want to use "confluence" please install "['atlassian-python-api']"
Dependencies for the handler 'ludwig' are not installed by default.
 If you want to use "ludwig" please install "['ludwig[distributed]>=0.5.2', 'ray<=1.13.0', 'mindsdb>=22.10.2.1', 'mindsdb_sql >= 0.4.4,<0.5.0', 'hyperopt', 'dill', 'dask']"
Dependencies for the handler 'mendeley' are not installed by default.
 If you want to use "mendeley" please install "['mendeley']"
Dependencies for the handler 'access' are not installed by default.
 If you want to use "access" please install "['pyodbc', 'sqlalchemy-access']"
Dependencies for the handler 'druid' are not installed by default.
 If you want to use "druid" please install "['pydruid', 'sqlalchemy']"
Dependencies for the handler 'mlflow' are not installed by default.
 If you want to use "mlflow" please install "['mlflow', 'mindsdb>=22.10.2.1']"
Dependencies for the handler 'empress' are not installed by default.
 If you want to use "empress" please install "['pyodbc']"
Dependencies for the handler 'surrealdb' are not installed by default.
 If you want to use "surrealdb" please install "['pysurrealdb', 'websocket']"
Dependencies for the handler 'maxdb' are not installed by default.
 If you want to use "maxdb" please install "['jaydebeapi']"
Dependencies for the handler 'autokeras' are not installed by default.
 If you want to use "autokeras" please install "[]"
Dependencies for the handler 'cohere' are not installed by default.
 If you want to use "cohere" please install "['cohere==4.5.1']"
Dependencies for the handler 'gitlab' are not installed by default.
 If you want to use "gitlab" please install "['python-gitlab']"
Dependencies for the handler 'ignite' are not installed by default.
 If you want to use "ignite" please install "['pyignite']"
Dependencies for the handler 'altibase' are not installed by default.
 If you want to use "altibase" please install "['jaydebeapi']"
Dependencies for the handler 'hsqldb' are not installed by default.
 If you want to use "hsqldb" please install "['pyodbc==4.0.34']"
Dependencies for the handler 'rockset' are not installed by default.
 If you want to use "rockset" please install "['mysql-connector-python', 'rockset']"
Dependencies for the handler 'utilities' are not installed by default.
 If you want to use "utilities" please install "[]"
Dependencies for the handler 'derby' are not installed by default.
 If you want to use "derby" please install "['jaydebeapi']"
Dependencies for the handler 'github' are not installed by default.
 If you want to use "github" please install "['pygithub']"
Dependencies for the handler 'sendinblue' are not installed by default.
 If you want to use "sendinblue" please install "['sib_api_v3_sdk']"
Dependencies for the handler 'monkeylearn' are not installed by default.
 If you want to use "monkeylearn" please install "['monkeylearn==3.6.0', 'mindsdb>=22.10.2.1']"
Dependencies for the handler 'ingres' are not installed by default.
 If you want to use "ingres" please install "['pyodbc', 'sqlalchemy<2.0.0', 'ingres_sa_dialect==0.3']"
Dependencies for the handler 'jira' are not installed by default.
 If you want to use "jira" please install "['atlassian-python-api']"
Dependencies for the handler 'informix' are not installed by default.
 If you want to use "informix" please install "['IfxPy', 'sqlalchemy-informix']"
Dependencies for the handler 'binance' are not installed by default.
 If you want to use "binance" please install "['binance-connector']"
Dependencies for the handler 'cloud_spanner' are not installed by default.
 If you want to use "cloud_spanner" please install "['google-cloud-spanner']"
Dependencies for the handler 'stripe' are not installed by default.
 If you want to use "stripe" please install "['stripe']"
Dependencies for the handler 'TPOT' are not installed by default.
 If you want to use "TPOT" please install "['tpot<=0.11.7', 'dill']"
Dependencies for the handler 'statsforecast' are not installed by default.
 If you want to use "statsforecast" please install "['statsforecast>=1.4.0, <2.0', 'hierarchicalforecast<1.0']"
Dependencies for the handler 'shopify' are not installed by default.
 If you want to use "shopify" please install "['ShopifyAPI']"
Dependencies for the handler 'eventstoredb' are not installed by default.
 If you want to use "eventstoredb" please install "[]"
Dependencies for the handler 'strava' are not installed by default.
 If you want to use "strava" please install "['stravalib']"
Dependencies for the handler 'neuralforecast' are not installed by default.
 If you want to use "neuralforecast" please install "['neuralforecast>=1.4.0, <1.5.0', 'hierarchicalforecast<1.0', 'hyperopt<1.0']"
Dependencies for the handler 'nuo_jdbc' are not installed by default.
 If you want to use "nuo_jdbc" please install "['jaydebeapi']"
Dependencies for the handler 'newsapi' are not installed by default.
 If you want to use "newsapi" please install "['newsapi-python']"
Dependencies for the handler 'quickbooks' are not installed by default.
 If you want to use "quickbooks" please install "['qbosdk']"
Dependencies for the handler 'autosklearn' are not installed by default.
 If you want to use "autosklearn" please install "['auto-sklearn', 'mindsdb-evaluator>=0.0.6', 'dill']"
Dependencies for the handler 'paypal' are not installed by default.
 If you want to use "paypal" please install "['paypalrestsdk']"
Dependencies for the handler 'writer' are not installed by default.
 If you want to use "writer" please install "['langchain==0.0.186', 'pydantic==1.10.8', 'sentence_transformers']"
Dependencies for the handler 'dremio' are not installed by default.
 If you want to use "dremio" please install "['sqlalchemy_dremio']"
Dependencies for the handler 'FLAML' are not installed by default.
 If you want to use "FLAML" please install "['flaml<=1.2.3', 'dill']"
Dependencies for the handler 'llama_index' are not installed by default.
 If you want to use "llama_index" please install "['llama-index==0.6.11', 'langchain<=0.0.192', 'openai<=0.28.0', 'html2text']"
http API: starting...
mysql API: starting...
mongodb API: starting...
jobs API: starting...
chatbot API: starting...
[nltk_data] Error loading punkt: <urlopen error EOF occurred in
[nltk_data]     violation of protocol (_ssl.c:1129)>[nltk_data] Error loading punkt: <urlopen error EOF occurred in
[nltk_data]     violation of protocol (_ssl.c:1129)>

[nltk_data] Error loading punkt: <urlopen error EOF occurred in
[nltk_data]     violation of protocol (_ssl.c:1129)>
[nltk_data] Error loading stopwords: <urlopen error EOF occurred in
[nltk_data]     violation of protocol (_ssl.c:1129)>[nltk_data] Error loading stopwords: <urlopen error EOF occurred in
[nltk_data]     violation of protocol (_ssl.c:1129)>

[nltk_data] Error loading stopwords: <urlopen error EOF occurred in
[nltk_data]     violation of protocol (_ssl.c:1129)>
/opt/conda/lib/python3.9/site-packages/torchvision/io/image.py:13: UserWarning: Failed to load image Python extension: libtorch_cuda_cu.so: cannot open shared object file: No such file or directory
  warn(f"Failed to load image Python extension: {e}")
/opt/conda/lib/python3.9/site-packages/torchvision/io/image.py:13: UserWarning: Failed to load image Python extension: libtorch_cuda_cu.so: cannot open shared object file: No such file or directory
  warn(f"Failed to load image Python extension: {e}")
/opt/conda/lib/python3.9/site-packages/torchvision/io/image.py:13: UserWarning: Failed to load image Python extension: libtorch_cuda_cu.so: cannot open shared object file: No such file or directory
  warn(f"Failed to load image Python extension: {e}")

Video or screenshots

image

Expected behavior

Either we need to fix this error or suppress the error messages.

How to reproduce the error

docker run -p 47334:47334 -p 47335:47335 mindsdb/mindsdb

Anything else?

No response

tomhuds commented 1 year ago
  1. confirm nltk downloads is the cause of the error
  2. explore if we can pre-download the nltk data into the docker image