DHARPA-Project / kiara_plugin.topic_modelling

Mozilla Public License 2.0
0 stars 2 forks source link

Error on import_table_from_local_folder_results #6

Closed stakats closed 2 days ago

stakats commented 5 days ago

Running this cell (under heading 1.3) in the Jupyter notebook generates an error (see below).

To Reproduce Steps to reproduce the behavior:

  1. Clone this repository and navigate to it in Terminal (here using macOS 15.0).
  2. Create a new conda environment conda create -n kiara_tm -c conda-forge python=3.12 notebook -y
  3. conda activate kiara_tm
  4. pip install git+https://github.com/DHARPA-Project/kiara_plugin.topic_modelling
  5. jupyter notebook
  6. run cell with import_table_from_local_folder_results = kiara.run_job('import.table.from.local_folder_path', inputs=import_table_from_local_folder_inputs, comment=" ")

Note that the onboarding 1.1 and 1.2 fail with similar errors at the same point.

Expected behavior A clear and concise description of what you expected to happen.

Desktop (please complete the following information):

Error message

---------------------------------------------------------------------------
OperationalError                          Traceback (most recent call last)
File ~/.anaconda3/envs/kiara_tm/lib/python3.12/site-packages/sqlalchemy/engine/base.py:1967, in Connection._exec_single_context(self, dialect, context, statement, parameters)
   1966     if not evt_handled:
-> 1967         self.dialect.do_execute(
   1968             cursor, str_statement, effective_parameters, context
   1969         )
   1971 if self._has_events or self.engine._has_events:

File ~/.anaconda3/envs/kiara_tm/lib/python3.12/site-packages/sqlalchemy/engine/default.py:941, in DefaultDialect.do_execute(self, cursor, statement, parameters, context)
    940 def do_execute(self, cursor, statement, parameters, context=None):
--> 941     cursor.execute(statement, parameters)

OperationalError: table metadata has no column named metadata_item_created

The above exception was the direct cause of the following exception:

OperationalError                          Traceback (most recent call last)
File ~/.anaconda3/envs/kiara_tm/lib/python3.12/site-packages/kiara/processing/__init__.py:384, in ModuleProcessor.job_status_updated(self, job_id, status)
    383     for val in result_values.values():
--> 384         self._kiara.data_registry.store_value(val)
    385 except Exception as e:

File ~/.anaconda3/envs/kiara_tm/lib/python3.12/site-packages/kiara/registries/data/__init__.py:542, in DataRegistry.store_value(self, value, data_store)
    540 for env_hash in _value.pedigree.environments.values():
--> 542     self._persist_environment(env_hash, store=data_store)
    544 store: DataStore = self.get_archive(archive_id_or_alias=data_store)  # type: ignore

File ~/.anaconda3/envs/kiara_tm/lib/python3.12/site-packages/kiara/registries/data/__init__.py:519, in DataRegistry._persist_environment(self, env_hash, store)
    515     raise KiaraException(
    516         f"Can't persist data environment with hash '{env_hash}': no such environment registered."
    517     )
--> 519 self._kiara.metadata_registry.register_metadata_item(
    520     key=ENVIRONMENT_MARKER_KEY, item=environment, store=store
    521 )

File ~/.anaconda3/envs/kiara_tm/lib/python3.12/site-packages/kiara/registries/metadata/__init__.py:332, in MetadataRegistry.register_metadata_item(self, key, item, reference_item_type, reference_item_key, reference_item_id, replace_existing_references, allow_multiple_references, store)
    330 mounted_store: MetadataStore = self.get_archive(archive_id_or_alias=store)  # type: ignore
--> 332 result = mounted_store.store_metadata_item(
    333     key=key,
    334     item=item,
    335     reference_item_type=reference_item_type,
    336     reference_item_key=reference_item_key,
    337     reference_item_id=reference_item_id,
    338     replace_existing_references=replace_existing_references,
    339     allow_multiple_references=allow_multiple_references,
    340 )
    341 return result

File ~/.anaconda3/envs/kiara_tm/lib/python3.12/site-packages/kiara/registries/metadata/metadata_store/__init__.py:202, in MetadataStore.store_metadata_item(self, key, item, reference_item_type, reference_item_key, reference_item_id, replace_existing_references, allow_multiple_references, store)
    200 if not metadata_item_id:
--> 202     metadata_item_id = self._store_metadata_item(
    203         key=key,
    204         value_json=data_json,
    205         value_hash=data_hash,
    206         model_type_id=model_type,
    207         model_schema_hash=model_schema_hash,
    208     )
    209     self._schema_stored_item[data_hash] = metadata_item_id

File ~/.anaconda3/envs/kiara_tm/lib/python3.12/site-packages/kiara/registries/metadata/metadata_store/sqlite_store.py:508, in SqliteMetadataStore._store_metadata_item(self, key, value_json, value_hash, model_type_id, model_schema_hash)
    507 with self.sqlite_engine.connect() as conn:
--> 508     conn.execute(sql, params)
    509     result = conn.execute(query_metadata_id, query_metadata_params)

File ~/.anaconda3/envs/kiara_tm/lib/python3.12/site-packages/sqlalchemy/engine/base.py:1418, in Connection.execute(self, statement, parameters, execution_options)
   1417 else:
-> 1418     return meth(
   1419         self,
   1420         distilled_parameters,
   1421         execution_options or NO_OPTIONS,
   1422     )

File ~/.anaconda3/envs/kiara_tm/lib/python3.12/site-packages/sqlalchemy/sql/elements.py:515, in ClauseElement._execute_on_connection(self, connection, distilled_params, execution_options)
    514         assert isinstance(self, Executable)
--> 515     return connection._execute_clauseelement(
    516         self, distilled_params, execution_options
    517     )
    518 else:

File ~/.anaconda3/envs/kiara_tm/lib/python3.12/site-packages/sqlalchemy/engine/base.py:1640, in Connection._execute_clauseelement(self, elem, distilled_parameters, execution_options)
   1632 compiled_sql, extracted_params, cache_hit = elem._compile_w_cache(
   1633     dialect=dialect,
   1634     compiled_cache=compiled_cache,
   (...)
   1638     linting=self.dialect.compiler_linting | compiler.WARN_LINTING,
   1639 )
-> 1640 ret = self._execute_context(
   1641     dialect,
   1642     dialect.execution_ctx_cls._init_compiled,
   1643     compiled_sql,
   1644     distilled_parameters,
   1645     execution_options,
   1646     compiled_sql,
   1647     distilled_parameters,
   1648     elem,
   1649     extracted_params,
   1650     cache_hit=cache_hit,
   1651 )
   1652 if has_events:

File ~/.anaconda3/envs/kiara_tm/lib/python3.12/site-packages/sqlalchemy/engine/base.py:1846, in Connection._execute_context(self, dialect, constructor, statement, parameters, execution_options, *args, **kw)
   1845 else:
-> 1846     return self._exec_single_context(
   1847         dialect, context, statement, parameters
   1848     )

File ~/.anaconda3/envs/kiara_tm/lib/python3.12/site-packages/sqlalchemy/engine/base.py:1986, in Connection._exec_single_context(self, dialect, context, statement, parameters)
   1985 except BaseException as e:
-> 1986     self._handle_dbapi_exception(
   1987         e, str_statement, effective_parameters, cursor, context
   1988     )
   1990 return result

File ~/.anaconda3/envs/kiara_tm/lib/python3.12/site-packages/sqlalchemy/engine/base.py:2355, in Connection._handle_dbapi_exception(self, e, statement, parameters, cursor, context, is_sub_exec)
   2354     assert sqlalchemy_exception is not None
-> 2355     raise sqlalchemy_exception.with_traceback(exc_info[2]) from e
   2356 else:

File ~/.anaconda3/envs/kiara_tm/lib/python3.12/site-packages/sqlalchemy/engine/base.py:1967, in Connection._exec_single_context(self, dialect, context, statement, parameters)
   1966     if not evt_handled:
-> 1967         self.dialect.do_execute(
   1968             cursor, str_statement, effective_parameters, context
   1969         )
   1971 if self._has_events or self.engine._has_events:

File ~/.anaconda3/envs/kiara_tm/lib/python3.12/site-packages/sqlalchemy/engine/default.py:941, in DefaultDialect.do_execute(self, cursor, statement, parameters, context)
    940 def do_execute(self, cursor, statement, parameters, context=None):
--> 941     cursor.execute(statement, parameters)

OperationalError: (sqlite3.OperationalError) table metadata has no column named metadata_item_created
[SQL: INSERT OR IGNORE INTO metadata (metadata_item_id, metadata_item_created, metadata_item_key, metadata_item_hash, model_type_id, model_schema_hash, metadata_value) VALUES (?, ?, ?, ?, ?, ?, ?)]
[parameters: ('2f61b7b3-510a-4512-947e-a154583c85f2', '2024-10-10T17:27:54.526169+02:00', 'environment', 'zdpuB1fgWJtuL3tUuhncW8oiE4Qswf6opvzhnjKkQ1apQo8gA', 'info.runtime.kiara_plugins', 'zdpuAqC8m4UmDtf3w5CBJoGzEtGDE17XjdFsRwLhdpSz2bmmQ', '{"environment_type":"kiara_plugins","kiara_plugins":[{"name":"kiara_plugin.core_types","version":"0.5.1"},{"name":"kiara_plugin.onboarding","version":"0.5.1"},{"name":"kiara_plugin.tabular","version":"0.5.5"},{"name":"kiara_plugin.topic_modelling","version":"0.1.dev82+gce84eda"}]}')]
(Background on this error at: https://sqlalche.me/e/20/e3q8)

During handling of the above exception, another exception occurred:

KiaraException                            Traceback (most recent call last)
File ~/.anaconda3/envs/kiara_tm/lib/python3.12/site-packages/kiara/processing/synchronous.py:39, in SynchronousProcessor._add_processing_task(self, job_id, module, inputs, outputs, job_log)
     38     # output_wrap._sync()
---> 39     self.job_status_updated(job_id=job_id, status=JobStatus.SUCCESS)
     40 except Exception as e:

File ~/.anaconda3/envs/kiara_tm/lib/python3.12/site-packages/kiara/processing/__init__.py:387, in ModuleProcessor.job_status_updated(self, job_id, status)
    386 log_exception(e)
--> 387 raise KiaraException(
    388     msg=f"Failed to auto-save job results for job: {job_id}",
    389     parent=e,
    390 )

KiaraException: Failed to auto-save job results for job: f9fe88fc-de9d-410b-a7c2-a09e1d3efeb2

(sqlite3.OperationalError) table metadata has no column named metadata_item_created
[SQL: INSERT OR IGNORE INTO metadata (metadata_item_id, metadata_item_created, metadata_item_key, metadata_item_hash, model_type_id, model_schema_hash, metadata_value) VALUES (?, ?, ?, ?, ?, ?, ?)]
[parameters: ('2f61b7b3-510a-4512-947e-a154583c85f2', '2024-10-10T17:27:54.526169+02:00', 'environment', 'zdpuB1fgWJtuL3tUuhncW8oiE4Qswf6opvzhnjKkQ1apQo8gA', 'info.runtime.kiara_plugins', 'zdpuAqC8m4UmDtf3w5CBJoGzEtGDE17XjdFsRwLhdpSz2bmmQ', '{"environment_type":"kiara_plugins","kiara_plugins":[{"name":"kiara_plugin.core_types","version":"0.5.1"},{"name":"kiara_plugin.onboarding","version":"0.5.1"},{"name":"kiara_plugin.tabular","version":"0.5.5"},{"name":"kiara_plugin.topic_modelling","version":"0.1.dev82+gce84eda"}]}')]
(Background on this error at: https://sqlalche.me/e/20/e3q8)

During handling of the above exception, another exception occurred:

Exception                                 Traceback (most recent call last)
Cell In[6], line 1
----> 1 import_table_from_local_folder_results = kiara.run_job('import.table.from.local_folder_path', inputs=import_table_from_local_folder_inputs, comment=" ")

File ~/.anaconda3/envs/kiara_tm/lib/python3.12/site-packages/kiara/interfaces/python_api/kiara_api.py:139, in KiaraAPI.run_job(self, operation, inputs, comment, operation_config)
    136 if inputs is None:
    137     inputs = {}
--> 139 return self._api.run_job(
    140     operation=operation,
    141     inputs=inputs,
    142     operation_config=operation_config,
    143     comment=comment,
    144 )

File ~/.anaconda3/envs/kiara_tm/lib/python3.12/site-packages/kiara/interfaces/python_api/base_api.py:3115, in BaseAPI.run_job(self, operation, inputs, operation_config, **job_metadata)
   3112 if inputs is None:
   3113     inputs = {}
-> 3115 job_id = self.queue_job(
   3116     operation=operation,
   3117     inputs=inputs,
   3118     operation_config=operation_config,
   3119     **job_metadata,
   3120 )
   3121 return self.context.job_registry.retrieve_result(job_id=job_id)

File ~/.anaconda3/envs/kiara_tm/lib/python3.12/site-packages/kiara/interfaces/python_api/base_api.py:3080, in BaseAPI.queue_job(self, operation, inputs, operation_config, **job_metadata)
   3077 else:
   3078     manifest = _operation
-> 3080 job_id = self.queue_manifest(manifest=manifest, inputs=inputs, **job_metadata)
   3082 return job_id

File ~/.anaconda3/envs/kiara_tm/lib/python3.12/site-packages/kiara/interfaces/python_api/base_api.py:2954, in BaseAPI.queue_manifest(self, manifest, inputs, **job_metadata)
   2948     inputs = {}
   2950 job_config = self.context.job_registry.prepare_job_config(
   2951     manifest=manifest, inputs=inputs
   2952 )
-> 2954 job_id = self.context.job_registry.execute_job(
   2955     job_config=job_config, wait=False, auto_save_result=save_values
   2956 )
   2958 if job_metadata:
   2959     self.context.metadata_registry.register_job_metadata_items(
   2960         job_id=job_id, items=job_metadata
   2961     )

File ~/.anaconda3/envs/kiara_tm/lib/python3.12/site-packages/kiara/registries/jobs/__init__.py:644, in JobRegistry.execute_job(self, job_config, wait, auto_save_result)
    642 except Exception as e:
    643     log.error("error.queue_job", job_id=job_id)
--> 644     raise e
    646 if wait:
    647     self._processor.wait_for(job_id)

File ~/.anaconda3/envs/kiara_tm/lib/python3.12/site-packages/kiara/registries/jobs/__init__.py:641, in JobRegistry.execute_job(self, job_config, wait, auto_save_result)
    638 self._active_jobs[job_config.job_hash] = job_id
    640 try:
--> 641     self._processor.queue_job(job_id=job_id)
    642 except Exception as e:
    643     log.error("error.queue_job", job_id=job_id)

File ~/.anaconda3/envs/kiara_tm/lib/python3.12/site-packages/kiara/processing/__init__.py:249, in ModuleProcessor.queue_job(self, job_id)
    247 job._exception = kpe
    248 log_exception(kpe)
--> 249 raise e

File ~/.anaconda3/envs/kiara_tm/lib/python3.12/site-packages/kiara/processing/__init__.py:216, in ModuleProcessor.queue_job(self, job_id)
    213     module._set_job_registry(self._kiara.job_registry)  # type: ignore
    215 try:
--> 216     self._add_processing_task(
    217         job_id=job_id,
    218         module=module,
    219         inputs=input_values,
    220         outputs=outputs,
    221         job_log=job.job_log,
    222     )
    223     return job
    225 except Exception as e:

File ~/.anaconda3/envs/kiara_tm/lib/python3.12/site-packages/kiara/processing/synchronous.py:41, in SynchronousProcessor._add_processing_task(self, job_id, module, inputs, outputs, job_log)
     39     self.job_status_updated(job_id=job_id, status=JobStatus.SUCCESS)
     40 except Exception as e:
---> 41     self.job_status_updated(job_id=job_id, status=e)

File ~/.anaconda3/envs/kiara_tm/lib/python3.12/site-packages/kiara/processing/__init__.py:257, in ModuleProcessor.job_status_updated(self, job_id, status)
    255 job = self._active_jobs.get(job_id, None)
    256 if job is None:
--> 257     raise Exception(
    258         f"Can't retrieve active job with id '{job_id}', no such job registered."
    259     )
    261 old_status = job.status
    263 result_values = None

Exception: Can't retrieve active job with id 'f9fe88fc-de9d-410b-a7c2-a09e1d3efeb2', no such job registered.
stakats commented 2 days ago

Deleting kiara context (kiara context delete) resolved this issue.