Open jrober84 opened 4 months ago
In extractor update the return condition if no truncated records to set default values of False for the columns used later
if len(trunc_records) == 0:
df = df.assign(is_extended=False)
df = df.assign(is_5p_extended=False)
df = df.assign(is_3p_extended=False)
return df
Traceback (most recent call last): File ".conda/envs/profile_dists/lib/python3.9/site-packages/pandas/core/indexes/base.py", line 3652, in get_loc return self._engine.get_loc(casted_key) File "pandas/_libs/index.pyx", line 147, in pandas._libs.index.IndexEngine.get_loc File "pandas/_libs/index.pyx", line 176, in pandas._libs.index.IndexEngine.get_loc File "pandas/_libs/hashtable_class_helper.pxi", line 7080, in pandas._libs.hashtable.PyObjectHashTable.get_item File "pandas/_libs/hashtable_class_helper.pxi", line 7088, in pandas._libs.hashtable.PyObjectHashTable.get_item KeyError: 'is_extended'
The above exception was the direct cause of the following exception:
Traceback (most recent call last): File ".conda/envs/profile_dists/bin/locidex", line 8, in
sys.exit(main())
File ".conda/envs/profile_dists/lib/python3.9/site-packages/locidex/main.py", line 51, in main
exec('locidex.' + task + '.run()')
File "", line 1, in
File ".conda/envs/profile_dists/lib/python3.9/site-packages/locidex/extract.py", line 272, in run
run_extract(config)
File ".conda/envs/profile_dists/lib/python3.9/site-packages/locidex/extract.py", line 202, in run_extract
exobj = extractor(hit_df,seq_data,sseqid_col='sseqid',queryid_col='qseqid',qstart_col='qstart',qend_col='qend',
File ".conda/envs/profile_dists/lib/python3.9/site-packages/locidex/classes/extractor.py", line 31, in init
loci_ranges = self.group_by_locus(self.df,sseqid_col, queryid_col,qlen_col,extend_threshold_ratio)
File ".conda/envs/profile_dists/lib/python3.9/site-packages/locidex/classes/extractor.py", line 543, in group_by_locus
is_extended = row['is_extended']
File ".conda/envs/profile_dists/lib/python3.9/site-packages/pandas/core/series.py", line 1007, in getitem
return self._get_value(key)
File ".conda/envs/profile_dists/lib/python3.9/site-packages/pandas/core/series.py", line 1116, in _get_value
loc = self.index.get_loc(label)
File ".conda/envs/profile_dists/lib/python3.9/site-packages/pandas/core/indexes/base.py", line 3654, in get_loc
raise KeyError(key) from err
KeyError: 'is_extended'
This can be fixed by adding a check for the keys and setting false if not found. Or ensuring that the keys are inserted into the DF earlier