Minimal reproducible error script to come but I'll outline it here and add it later:
New instance
Import Gene from source (will use ENSEMBL 112 by default)
Attempt to curate a new dataset but have the Gene version pinned to 110. This happens when using the cellxgene Curator.
Be met with errors like
{
"name": "IntegrityError",
"message": "duplicate key value violates unique constraint \"bionty_gene_uid_key\"
DETAIL: Key (uid)=(3BEZYDTfG7t4) already exists.
",
"stack": "---------------------------------------------------------------------------
UniqueViolation Traceback (most recent call last)
File ~/miniconda3/envs/lamindb/lib/python3.11/site-packages/django/db/backends/utils.py:105, in CursorWrapper._execute(self, sql, params, *ignored_wrapper_args)
104 else:
--> 105 return self.cursor.execute(sql, params)
UniqueViolation: duplicate key value violates unique constraint \"bionty_gene_uid_key\"
DETAIL: Key (uid)=(3BEZYDTfG7t4) already exists.
The above exception was the direct cause of the following exception:
IntegrityError Traceback (most recent call last)
Cell In[8], line 4
1 curator = pts.PerturbationCurator(
2 adata, using_key=THIS_INSTANCE
3 )
----> 4 curator.validate()
File ~/miniconda3/envs/lamindb/lib/python3.11/site-packages/pertpy_datasets/perturbation_curator.py:98, in PerturbationCurator.validate(self)
96 def validate(self) -> bool:
97 \"\"\"Validates the AnnData object against cellxgene and pertpy's requirements.\"\"\"
---> 98 return super().validate()
File ~/miniconda3/envs/lamindb/lib/python3.11/site-packages/cellxgene_lamin/curate.py:285, in Curator.validate(self)
280 if len(matching_keys) == 0:
281 raise ValueError(
282 \"Unable to find an embedding key. Please calculate an embedding.\"
283 )
--> 285 return super().validate(organism=self.organism)
File ~/PycharmProjects/lamindb/lamindb/_curate.py:553, in AnnDataCurator.validate(self, organism)
548 logger.important(
549 f\"validating metadata using registries of instance {colors.italic(self._using_key)}\"
550 )
552 # add all validated records to the current instance
--> 553 self._update_registry_all()
555 validated_var, non_validated_var = validate_categories(
556 self._adata.var.index,
557 field=self._var_field,
(...)
563 **self._kwargs, # type: ignore
564 )
565 validated_obs, non_validated_obs = validate_categories_in_df(
566 self._adata.obs,
567 fields=self.categoricals,
(...)
571 **self._kwargs,
572 )
File ~/PycharmProjects/lamindb/lamindb/_curate.py:523, in AnnDataCurator._update_registry_all(self, validated_only, **kwargs)
521 def _update_registry_all(self, validated_only: bool = True, **kwargs):
522 \"\"\"Save labels for all features.\"\"\"
--> 523 self._save_from_var_index(validated_only=validated_only, **self._kwargs)
524 for name in self._obs_fields.keys():
525 self._update_registry(name, validated_only=validated_only, **self._kwargs)
File ~/PycharmProjects/lamindb/lamindb/_curate.py:509, in AnnDataCurator._save_from_var_index(self, validated_only, organism)
505 def _save_from_var_index(
506 self, validated_only: bool = True, organism: str | None = None
507 ):
508 \"\"\"Save variable records.\"\"\"
--> 509 update_registry(
510 values=list(self._adata.var.index),
511 field=self.var_index,
512 key=\"var_index\",
513 save_function=\".add_new_from_var_index()\",
514 using_key=self._using_key,
515 validated_only=validated_only,
516 organism=organism,
517 source=self._sources.get(\"var_index\"),
518 exclude=self._exclude.get(\"var_index\"),
519 )
File ~/PycharmProjects/lamindb/lamindb/_curate.py:1522, in update_registry(values, field, key, save_function, using_key, validated_only, df, organism, dtype, source, standardize, warning, exclude, **kwargs)
1520 logger.info(f\"saving validated records of '{key}'\")
1521 settings.verbosity = \"error\"
-> 1522 ln_save(public_records)
1523 labels_saved[\"from public\"] = [
1524 getattr(r, field.field.name) for r in public_records
1525 ]
1526 non_public_labels = [i for i in values if i not in labels_saved[\"from public\"]]
File ~/PycharmProjects/lamindb/lamindb/_save.py:83, in save(records, ignore_conflicts)
79 if non_artifacts:
80 non_artifacts_old, non_artifacts_new = partition(
81 lambda r: r._state.adding or r.pk is None, non_artifacts
82 )
---> 83 bulk_create(non_artifacts_new, ignore_conflicts=ignore_conflicts)
84 if non_artifacts_old:
85 bulk_update(non_artifacts_old)
File ~/PycharmProjects/lamindb/lamindb/_save.py:114, in bulk_create(records, ignore_conflicts)
112 records_by_orm[record.__class__].append(record)
113 for registry, records in records_by_orm.items():
--> 114 registry.objects.bulk_create(records, ignore_conflicts=ignore_conflicts)
File ~/miniconda3/envs/lamindb/lib/python3.11/site-packages/django/db/models/manager.py:87, in BaseManager._get_queryset_methods.<locals>.create_method.<locals>.manager_method(self, *args, **kwargs)
85 @wraps(method)
86 def manager_method(self, *args, **kwargs):
---> 87 return getattr(self.get_queryset(), name)(*args, **kwargs)
File ~/miniconda3/envs/lamindb/lib/python3.11/site-packages/django/db/models/query.py:835, in QuerySet.bulk_create(self, objs, batch_size, ignore_conflicts, update_conflicts, update_fields, unique_fields)
833 if objs_without_pk:
834 fields = [f for f in fields if not isinstance(f, AutoField)]
--> 835 returned_columns = self._batched_insert(
836 objs_without_pk,
837 fields,
838 batch_size,
839 on_conflict=on_conflict,
840 update_fields=update_fields,
841 unique_fields=unique_fields,
842 )
843 connection = connections[self.db]
844 if (
845 connection.features.can_return_rows_from_bulk_insert
846 and on_conflict is None
847 ):
File ~/miniconda3/envs/lamindb/lib/python3.11/site-packages/django/db/models/query.py:1875, in QuerySet._batched_insert(self, objs, fields, batch_size, on_conflict, update_fields, unique_fields)
1870 for item in [objs[i : i + batch_size] for i in range(0, len(objs), batch_size)]:
1871 if bulk_return and (
1872 on_conflict is None or on_conflict == OnConflict.UPDATE
1873 ):
1874 inserted_rows.extend(
-> 1875 self._insert(
1876 item,
1877 fields=fields,
1878 using=self.db,
1879 on_conflict=on_conflict,
1880 update_fields=update_fields,
1881 unique_fields=unique_fields,
1882 returning_fields=self.model._meta.db_returning_fields,
1883 )
1884 )
1885 else:
1886 self._insert(
1887 item,
1888 fields=fields,
(...)
1892 unique_fields=unique_fields,
1893 )
File ~/miniconda3/envs/lamindb/lib/python3.11/site-packages/django/db/models/query.py:1847, in QuerySet._insert(self, objs, fields, returning_fields, raw, using, on_conflict, update_fields, unique_fields)
1840 query = sql.InsertQuery(
1841 self.model,
1842 on_conflict=on_conflict,
1843 update_fields=update_fields,
1844 unique_fields=unique_fields,
1845 )
1846 query.insert_values(fields, objs, raw=raw)
-> 1847 return query.get_compiler(using=using).execute_sql(returning_fields)
File ~/miniconda3/envs/lamindb/lib/python3.11/site-packages/django/db/models/sql/compiler.py:1836, in SQLInsertCompiler.execute_sql(self, returning_fields)
1834 with self.connection.cursor() as cursor:
1835 for sql, params in self.as_sql():
-> 1836 cursor.execute(sql, params)
1837 if not self.returning_fields:
1838 return []
File ~/miniconda3/envs/lamindb/lib/python3.11/site-packages/django/db/backends/utils.py:79, in CursorWrapper.execute(self, sql, params)
78 def execute(self, sql, params=None):
---> 79 return self._execute_with_wrappers(
80 sql, params, many=False, executor=self._execute
81 )
File ~/miniconda3/envs/lamindb/lib/python3.11/site-packages/django/db/backends/utils.py:92, in CursorWrapper._execute_with_wrappers(self, sql, params, many, executor)
90 for wrapper in reversed(self.db.execute_wrappers):
91 executor = functools.partial(wrapper, executor)
---> 92 return executor(sql, params, many, context)
File ~/miniconda3/envs/lamindb/lib/python3.11/site-packages/django/db/backends/utils.py:100, in CursorWrapper._execute(self, sql, params, *ignored_wrapper_args)
98 warnings.warn(self.APPS_NOT_READY_WARNING_MSG, category=RuntimeWarning)
99 self.db.validate_no_broken_transaction()
--> 100 with self.db.wrap_database_errors:
101 if params is None:
102 # params default might be backend specific.
103 return self.cursor.execute(sql)
File ~/miniconda3/envs/lamindb/lib/python3.11/site-packages/django/db/utils.py:91, in DatabaseErrorWrapper.__exit__(self, exc_type, exc_value, traceback)
89 if dj_exc_type not in (DataError, IntegrityError):
90 self.wrapper.errors_occurred = True
---> 91 raise dj_exc_value.with_traceback(traceback) from exc_value
File ~/miniconda3/envs/lamindb/lib/python3.11/site-packages/django/db/backends/utils.py:105, in CursorWrapper._execute(self, sql, params, *ignored_wrapper_args)
103 return self.cursor.execute(sql)
104 else:
--> 105 return self.cursor.execute(sql, params)
IntegrityError: duplicate key value violates unique constraint \"bionty_gene_uid_key\"
DETAIL: Key (uid)=(3BEZYDTfG7t4) already exists.
"
}
I am not sure why the expected outcome here should be.
Should we somehow allow for duplicated Gene records with different versions? I think that we currently don't support that and it's not easy, right?
Should this throw a more informative error and state that there are already gene records with a newer version? I think that's not useful because in this case the user wants to map against a specific version.
Should we say "sorry but Genes are already at version 112 so we map against 112 instead"? Also unexpected behavior.
Report
Minimal reproducible error script to come but I'll outline it here and add it later:
Gene
from source (will use ENSEMBL 112 by default)Gene
version pinned to 110. This happens when using the cellxgene Curator.I am not sure why the expected outcome here should be.
Version information
No response