LorenFrankLab / spyglass

Neuroscience data analysis framework for reproducible research built by Loren Frank Lab at UCSF
https://lorenfranklab.github.io/spyglass/
MIT License
94 stars 43 forks source link

AutomaticCuration.populate Query Syntax Error in spikesorting #1110

Closed emreybroyles closed 2 months ago

emreybroyles commented 2 months ago

Describe the bug sgss.AutomaticCuration.populate([(sgss.AutomaticCurationSelection & autocuration_key).proj()]) produces a SQL syntax error, likely due to the presence of many nested dictionaries in curation_labels and quality_metrics (at least this is what chatgpt told me)

To Reproduce Steps to reproduce the behavior:

This error is on file 01_spikesorting_batch_SC-Error.ipynb at file path /home/ebroyles/Src/spyglass/notebooks/eb_notebooks/01_spikesorting_batch_SC-Error.ipynb See error in cell 23

Click to expand/collapse error stack ```python --------------------------------------------------------------------------- QuerySyntaxError Traceback (most recent call last) Cell In [23], [line 102](vscode-notebook-cell:?execution_count=23&line=102) [100] autocuration_key.update({"auto_curation_params_name": auto_curation_params_name}) [101] sgss.AutomaticCurationSelection.insert1(autocuration_key, skip_duplicates=True) --> [102] sgss.AutomaticCuration.populate([(sgss.AutomaticCurationSelection & autocuration_key).proj()]) [103] auto_curation_key = (sgss.AutomaticCuration & autocuration_key).fetch1("auto_curation_key") [104] auto_curation_out_key = (sgss.Curation & auto_curation_key).fetch1("KEY") File ~/Src/spyglass/src/spyglass/utils/dj_mixin.py:612, in SpyglassMixin.populate(self, *restrictions, **kwargs) [610] if use_transact: # Pass single-process populate to super [611] kwargs["processes"] = processes --> [612] return super().populate(*restrictions, **kwargs) [613] else: # No transaction protection, use bare make [614] for key in keys: File ~/anaconda3/envs/spyglass/lib/python3.9/site-packages/datajoint/autopopulate.py:241, in AutoPopulate.populate(self, suppress_errors, return_exception_objects, reserve_jobs, order, limit, max_calls, display_progress, processes, make_kwargs, *restrictions) [237] if processes == 1: [238] for key in ( [239] if display_progress else keys [240] ): --> [241] error = self._populate1(key, jobs, **populate_kwargs) [242] if error is not None: [243] error_list.append(error) File ~/anaconda3/envs/spyglass/lib/python3.9/site-packages/datajoint/autopopulate.py:292, in AutoPopulate._populate1(self, key, jobs, suppress_errors, return_exception_objects, make_kwargs) [290] self.__class__._allow_insert = True [291] try: --> [292] make(dict(key), **(make_kwargs or {})) [293] as error: [294] try: File ~/Src/spyglass/src/spyglass/spikesorting/v0/spikesorting_curation.py:836, in AutomaticCuration.make(self, key) [834] c_key = (SpikeSorting & key).fetch("KEY")[0] [835] curation_key = {item: key[item] for item in key if item in c_key} --> [836] key["auto_curation_key"] = Curation.insert_curation( [837] curation_key, [838] parent_curation_id=parent_curation_id, [839] labels=labels, [840] merge_groups=merge_groups, [841] metrics=metrics, [842] description="auto curated", [843] ) [845] self.insert1(key) File ~/Src/spyglass/src/spyglass/spikesorting/v0/spikesorting_curation.py:149, in Curation.insert_curation(sorting_key, parent_curation_id, labels, merge_groups, metrics, description) [146] Curation.insert1(sorting_key, skip_duplicates=True) [148] # get the primary key for this curation --> [149] c_key = (Curation & sorting_key).fetch1("KEY") [150] curation_key = {item: sorting_key[item] for item in c_key} [152] return curation_key File ~/Src/spyglass/src/spyglass/utils/dj_mixin.py:791, in SpyglassMixin.fetch1(self, log_fetch, *args, **kwargs) [789] def fetch1(self, *args, log_fetch=True, **kwargs): [790] """Log fetch1 for export.""" --> [791] ret = super().fetch1(*args, **kwargs) [792] if log_fetch: [793] self._log_fetch(*args, **kwargs) File ~/anaconda3/envs/spyglass/lib/python3.9/site-packages/datajoint/fetch.py:348, in Fetch1.__call__(self, squeeze, download_path, *attrs) [346] else: # fetch some attributes, return as tuple [347] attributes = [a for a in attrs if not is_key(a)] --> [348] result = self._expression.proj(*attributes).fetch( [349] squeeze=squeeze, download_path=download_path, format="array" [350] ) [351] != 1: [352] raise DataJointError( [353] "fetch1 should only return one tuple. %d tuples found" % len(result) [354] ) File ~/Src/spyglass/src/spyglass/utils/dj_mixin.py:784, in SpyglassMixin.fetch(self, log_fetch, *args, **kwargs) [782] def fetch(self, *args, log_fetch=True, **kwargs): [783] """Log fetch for export.""" --> [784] ret = super().fetch(*args, **kwargs) [785] if log_fetch: [786] self._log_fetch(*args, **kwargs) File ~/anaconda3/envs/spyglass/lib/python3.9/site-packages/datajoint/fetch.py:258, in Fetch.__call__(self, offset, limit, order_by, format, as_dict, squeeze, download_path, *attrs) [256] == 1 else return_values [257] else: # fetch all attributes as a numpy.record_array or pandas.DataFrame --> [258] cur = self._expression.cursor( [259] as_dict=as_dict, limit=limit, offset=offset, order_by=order_by [260] ) [261] heading = self._expression.heading [262] if as_dict: File ~/anaconda3/envs/spyglass/lib/python3.9/site-packages/datajoint/expression.py:633, in QueryExpression.cursor(self, offset, limit, order_by, as_dict) [631] sql += " LIMIT %d" % limit + (" OFFSET %d" % offset if offset else "") [632] logger.debug(sql) --> [633] return self.connection.query(sql, as_dict=as_dict) File ~/anaconda3/envs/spyglass/lib/python3.9/site-packages/datajoint/connection.py:340, in Connection.query(self, query, args, as_dict, suppress_warnings, reconnect) [338] cursor = self._conn.cursor(cursor=cursor_class) [339] try: --> [340] self._execute_query(cursor, query, args, suppress_warnings) [341] except errors.LostConnectionError: [342] if not reconnect: File ~/anaconda3/envs/spyglass/lib/python3.9/site-packages/datajoint/connection.py:296, in Connection._execute_query(cursor, query, args, suppress_warnings) [294] cursor.execute(query, args) [295] except client.err.Error as err: --> [296] raise translate_query_error(err, query) QuerySyntaxError: ("You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '1: ['noise', 'reject', 'mua', 'mua', 'mua'], 2: ['noise', 'reject', 'mua', 'mua'' at line 1", 'SELECT `curation_id`,`nwb_file_name`,`sort_group_id`,`sort_interval_name`,`preproc_params_name`,`team_name`,`sorter`,`sorter_params_name`,`artifact_removed_interval_list_name` FROM `spikesorting_curation`.`curation` WHERE ( (`nwb_file_name`="SC3820230606_copy_.nwb")AND(`sort_group_id`=14)AND(`sort_interval_name`="pos 1 valid times")AND(`preproc_params_name`="franklab_tetrode_hippocampus_min_seg")AND(`team_name`="sc_eb")AND(`sorter`="mountainsort4")AND(`sorter_params_name`="franklab_tetrode_hippocampus_30KHz_tmp")AND(`artifact_removed_interval_list_name`="SC3820230606_copy_.nwb_pos 1 valid times_100_franklab_tetrode_hippocampus_min_seg_ampl_100_prop_02_2ms_artifact_removed_valid_times")AND(`curation_id`=1)AND(`parent_curation_id`=0)AND(`description`="auto curated")AND(`curation_labels`={1: [\'noise\', \'reject\', \'mua\', \'mua\', \'mua\'], 2: [\'noise\', \'reject\', \'mua\', \'mua\', \'mua\'], 3: [\'noise\', \'reject\', \'mua\', \'mua\', \'mua\']})AND(`merge_groups`="[]")AND(`quality_metrics`={\'snr\': {\'1\': 3.440615653991699, \'2\': 3.4431686401367188, \'3\': 3.4531750679016113}, \'isi_violation\': {\'1\': 0.03923002662142001, \'2\': 0.04031183557760454, \'3\': 0.045673686881502154}, \'nn_isolation\': {\'1\': 0.9829000000000001, \'2\': 0.9776999999999999, \'3\': 0.9763999999999999}, \'nn_noise_overlap\': {\'1\': 0.5006, \'2\': 0.4776, \'3\': 0.5015}, \'peak_offset\': {\'1\': 0.0, \'2\': 0.0, \'3\': 0.0}, \'num_spikes\': {\'1\': 34184.0, \'2\': 35276.0, \'3\': 35470.0}})AND(`time_of_creation`=1726264743))') ```
CBroz1 commented 2 months ago

Hi @emreybroyles - Please provide a code snipped to use to replicate this error, defining all variables in the snippet

For my own future reference, the following regex will clean up error stacks like this: %s/](.*) /] /

samuelbray32 commented 2 months ago

I think this came from #1072 and now restricting with the full spikesorting key (with the nested dictionaries @emreybroyles mentioned above). I'll fix it