Open Zethson opened 2 weeks ago
!lamin init --storage ./run-tests --name run-tests --schema bionty import lamindb as ln import bionty as bt adata = ln.core.datasets.anndata_pbmc68k_reduced() curator = ln.Curator.from_anndata(adata, var_index=bt.Gene.ensembl_gene_id, organism="human") curator.validate() curator.validate()
leads to
{ "name": "IntegrityError", "message": "UNIQUE constraint failed: bionty_gene.ensembl_gene_id", "stack": "--------------------------------------------------------------------------- IntegrityError Traceback (most recent call last) File ~/miniconda3/envs/lamindb/lib/python3.11/site-packages/django/db/backends/utils.py:105, in CursorWrapper._execute(self, sql, params, *ignored_wrapper_args) 104 else: --> 105 return self.cursor.execute(sql, params) File ~/miniconda3/envs/lamindb/lib/python3.11/site-packages/django/db/backends/sqlite3/base.py:354, in SQLiteCursorWrapper.execute(self, query, params) 353 query = self.convert_query(query, param_names=param_names) --> 354 return super().execute(query, params) IntegrityError: UNIQUE constraint failed: bionty_gene.ensembl_gene_id The above exception was the direct cause of the following exception: IntegrityError Traceback (most recent call last) Cell In[3], line 1 ----> 1 curator.validate() File ~/PycharmProjects/lamindb/lamindb/_curate.py:548, in AnnDataCurator.validate(self, organism) 543 logger.important( 544 f\"validating metadata using registries of instance {colors.italic(self._using_key)}\" 545 ) 547 # add all validated records to the current instance --> 548 self._update_registry_all() 550 validated_var, non_validated_var = validate_categories( 551 self._adata.var.index, 552 field=self._var_field, (...) 558 **self._kwargs, # type: ignore 559 ) 560 validated_obs, non_validated_obs = validate_categories_in_df( 561 self._adata.obs, 562 fields=self.categoricals, (...) 566 **self._kwargs, 567 ) File ~/PycharmProjects/lamindb/lamindb/_curate.py:517, in AnnDataCurator._update_registry_all(self, validated_only, **kwargs) 515 \"\"\"Save labels for all features.\"\"\" 516 logger.info(\"saving validated records of 'var_index'\") --> 517 self._save_from_var_index(validated_only=validated_only, **self._kwargs) 518 for name in self._obs_fields.keys(): 519 logger.info(f\"saving validated terms of '{name}'\") File ~/PycharmProjects/lamindb/lamindb/_curate.py:502, in AnnDataCurator._save_from_var_index(self, validated_only, organism) 498 def _save_from_var_index( 499 self, validated_only: bool = True, organism: str | None = None 500 ): 501 \"\"\"Save variable records.\"\"\" --> 502 update_registry( 503 values=list(self._adata.var.index), 504 field=self.var_index, 505 key=\"var_index\", 506 save_function=\".add_new_from_var_index()\", 507 using_key=self._using_key, 508 validated_only=validated_only, 509 organism=organism, 510 source=self._sources.get(\"var_index\"), 511 exclude=self._exclude.get(\"var_index\"), 512 ) File ~/PycharmProjects/lamindb/lamindb/_curate.py:1512, in update_registry(values, field, key, save_function, using_key, validated_only, df, organism, dtype, source, standardize, warning, exclude, **kwargs) 1510 if source: 1511 public_records = [r for r in public_records if r.source.uid == source.uid] -> 1512 ln_save(public_records) 1513 labels_saved[\"from public\"] = [ 1514 getattr(r, field.field.name) for r in public_records 1515 ] 1516 non_public_labels = [i for i in values if i not in labels_saved[\"from public\"]] File ~/PycharmProjects/lamindb/lamindb/_save.py:83, in save(records, ignore_conflicts) 79 if non_artifacts: 80 non_artifacts_old, non_artifacts_new = partition( 81 lambda r: r._state.adding or r.pk is None, non_artifacts 82 ) ---> 83 bulk_create(non_artifacts_new, ignore_conflicts=ignore_conflicts) 84 if non_artifacts_old: 85 bulk_update(non_artifacts_old) File ~/PycharmProjects/lamindb/lamindb/_save.py:114, in bulk_create(records, ignore_conflicts) 112 records_by_orm[record.__class__].append(record) 113 for registry, records in records_by_orm.items(): --> 114 registry.objects.bulk_create(records, ignore_conflicts=ignore_conflicts) File ~/miniconda3/envs/lamindb/lib/python3.11/site-packages/django/db/models/manager.py:87, in BaseManager._get_queryset_methods.<locals>.create_method.<locals>.manager_method(self, *args, **kwargs) 85 @wraps(method) 86 def manager_method(self, *args, **kwargs): ---> 87 return getattr(self.get_queryset(), name)(*args, **kwargs) File ~/miniconda3/envs/lamindb/lib/python3.11/site-packages/django/db/models/query.py:835, in QuerySet.bulk_create(self, objs, batch_size, ignore_conflicts, update_conflicts, update_fields, unique_fields) 833 if objs_without_pk: 834 fields = [f for f in fields if not isinstance(f, AutoField)] --> 835 returned_columns = self._batched_insert( 836 objs_without_pk, 837 fields, 838 batch_size, 839 on_conflict=on_conflict, 840 update_fields=update_fields, 841 unique_fields=unique_fields, 842 ) 843 connection = connections[self.db] 844 if ( 845 connection.features.can_return_rows_from_bulk_insert 846 and on_conflict is None 847 ): File ~/miniconda3/envs/lamindb/lib/python3.11/site-packages/django/db/models/query.py:1875, in QuerySet._batched_insert(self, objs, fields, batch_size, on_conflict, update_fields, unique_fields) 1870 for item in [objs[i : i + batch_size] for i in range(0, len(objs), batch_size)]: 1871 if bulk_return and ( 1872 on_conflict is None or on_conflict == OnConflict.UPDATE 1873 ): 1874 inserted_rows.extend( -> 1875 self._insert( 1876 item, 1877 fields=fields, 1878 using=self.db, 1879 on_conflict=on_conflict, 1880 update_fields=update_fields, 1881 unique_fields=unique_fields, 1882 returning_fields=self.model._meta.db_returning_fields, 1883 ) 1884 ) 1885 else: 1886 self._insert( 1887 item, 1888 fields=fields, (...) 1892 unique_fields=unique_fields, 1893 ) File ~/miniconda3/envs/lamindb/lib/python3.11/site-packages/django/db/models/query.py:1847, in QuerySet._insert(self, objs, fields, returning_fields, raw, using, on_conflict, update_fields, unique_fields) 1840 query = sql.InsertQuery( 1841 self.model, 1842 on_conflict=on_conflict, 1843 update_fields=update_fields, 1844 unique_fields=unique_fields, 1845 ) 1846 query.insert_values(fields, objs, raw=raw) -> 1847 return query.get_compiler(using=using).execute_sql(returning_fields) File ~/miniconda3/envs/lamindb/lib/python3.11/site-packages/django/db/models/sql/compiler.py:1836, in SQLInsertCompiler.execute_sql(self, returning_fields) 1834 with self.connection.cursor() as cursor: 1835 for sql, params in self.as_sql(): -> 1836 cursor.execute(sql, params) 1837 if not self.returning_fields: 1838 return [] File ~/miniconda3/envs/lamindb/lib/python3.11/site-packages/django/db/backends/utils.py:79, in CursorWrapper.execute(self, sql, params) 78 def execute(self, sql, params=None): ---> 79 return self._execute_with_wrappers( 80 sql, params, many=False, executor=self._execute 81 ) File ~/miniconda3/envs/lamindb/lib/python3.11/site-packages/django/db/backends/utils.py:92, in CursorWrapper._execute_with_wrappers(self, sql, params, many, executor) 90 for wrapper in reversed(self.db.execute_wrappers): 91 executor = functools.partial(wrapper, executor) ---> 92 return executor(sql, params, many, context) File ~/miniconda3/envs/lamindb/lib/python3.11/site-packages/django/db/backends/utils.py:100, in CursorWrapper._execute(self, sql, params, *ignored_wrapper_args) 98 warnings.warn(self.APPS_NOT_READY_WARNING_MSG, category=RuntimeWarning) 99 self.db.validate_no_broken_transaction() --> 100 with self.db.wrap_database_errors: 101 if params is None: 102 # params default might be backend specific. 103 return self.cursor.execute(sql) File ~/miniconda3/envs/lamindb/lib/python3.11/site-packages/django/db/utils.py:91, in DatabaseErrorWrapper.__exit__(self, exc_type, exc_value, traceback) 89 if dj_exc_type not in (DataError, IntegrityError): 90 self.wrapper.errors_occurred = True ---> 91 raise dj_exc_value.with_traceback(traceback) from exc_value File ~/miniconda3/envs/lamindb/lib/python3.11/site-packages/django/db/backends/utils.py:105, in CursorWrapper._execute(self, sql, params, *ignored_wrapper_args) 103 return self.cursor.execute(sql) 104 else: --> 105 return self.cursor.execute(sql, params) File ~/miniconda3/envs/lamindb/lib/python3.11/site-packages/django/db/backends/sqlite3/base.py:354, in SQLiteCursorWrapper.execute(self, query, params) 352 param_names = list(params) if isinstance(params, Mapping) else None 353 query = self.convert_query(query, param_names=param_names) --> 354 return super().execute(query, params) IntegrityError: UNIQUE constraint failed: bionty_gene.ensembl_gene_id" }
No response
I don't seem to have this issue with synthetic example data.
Report
leads to
Version information
No response