BojarLab / glycowork

Package for processing and analyzing glycans and their role in biology.
https://Bojarlab.github.io/glycowork
MIT License
57 stars 12 forks source link

terminal2 feature set broken #53

Closed mattias-erhardsson closed 5 months ago

mattias-erhardsson commented 5 months ago

At least in get_heatmap() and get_differential_expression() and on the dev branch with latest commit, when using terminal2 as a feature set generates a lengthy error message which ends with a statement about how theres a length mismatch where the expected axis has twice as many elements as the new values. Interestingly, terminal1 and terminal3 works just fine.

Example code:

# Setup
from glycowork.motif.analysis import get_heatmap
import pandas as pd
data = {
    'Glycan': ['Gal(b1-3)GalNAc', 'GalOS(b1-3)GalNAc', 'Gal(b1-3)[Fuc(a1-?)]GalNAc', 'GlcNAc(b1-2)Man(a1-3)Man', 'Man(a1-6)[Man(a1-3)]Man(b1-4)GlcNAc(b1-4)GlcNAc', 'Neu5Ac(a2-6)Gal(b1-4)GlcNAc(b1-2)Man', 'Neu5Ac(a2-3)Gal(b1-3)GalNAc'],
    'Sample1': [1.1, 0.2, 0.3, 0.5, 0.7, 1.0, 0.6],
    'Sample2': [1.2, 0.1, 0.2, 0.4, 0.8, 0.9, 0.5],
    'Sample3': [0.1, 1.8, 1.9, 0.3, 0.6, 0.8, 1.2],
    'Sample4': [0.2, 1.1, 1.2, 0.2, 0.5, 0.7, 1.1],
    'Sample5': [1.3, 0.3, 0.4, 0.6, 0.9, 1.1, 0.7],
    'Sample6': [1.4, 0.4, 0.5, 0.7, 1.0, 1.2, 0.8],
    'Sample7': [0.3, 1.9, 2.0, 0.4, 0.7, 0.9, 1.3],
    'Sample8': [0.4, 1.2, 1.3, 0.3, 0.6, 0.8, 1.2]
}
data = pd.DataFrame(data)
# This works
get_heatmap(data,
           motifs = True,
           feature_set=['terminal1'])
# This fails
get_heatmap(data,
           motifs = True,
           feature_set=['terminal2'])
# This works
get_heatmap(data,
           motifs = True,
           feature_set=['terminal3'])
# This fails
get_heatmap(data,
           motifs = True,
           feature_set=['terminal1','terminal2','terminal3'])

Error:


ValueError Traceback (most recent call last) File ~\AppData\Local\Programs\Python\Python312\Lib\site-packages\glycowork\motif\processing.py:935, in rescue_glycans..wrapper(*args, *kwargs) 933 try: 934 # Try running the original function --> 935 return func(args, **kwargs) 936 except Exception: 937 # If an error occurs, attempt to rescue the glycan sequences

File ~\AppData\Local\Programs\Python\Python312\Lib\site-packages\glycowork\motif\annotate.py:252, in annotate_dataset(glycans, motifs, feature_set, termini_list, condense, custom_motifs) 251 bag_out = pd.concat([bag_out, shadow_bag], axis = 1).reset_index(drop = True) --> 252 bag_out.index = glycans 253 bagout.columns = ['Terminal' + c for c in bag_out.columns]

File ~\AppData\Local\Programs\Python\Python312\Lib\site-packages\pandas\core\generic.py:6313, in NDFrame.setattr(self, name, value) 6312 object.getattribute(self, name) -> 6313 return object.setattr(self, name, value) 6314 except AttributeError:

File properties.pyx:69, in pandas._libs.properties.AxisProperty.set()

File ~\AppData\Local\Programs\Python\Python312\Lib\site-packages\pandas\core\generic.py:814, in NDFrame._set_axis(self, axis, labels) 813 labels = ensure_index(labels) --> 814 self._mgr.set_axis(axis, labels) 815 self._clear_item_cache()

File ~\AppData\Local\Programs\Python\Python312\Lib\site-packages\pandas\core\internals\managers.py:238, in BaseBlockManager.set_axis(self, axis, new_labels) 236 def set_axis(self, axis: AxisInt, new_labels: Index) -> None: 237 # Caller is responsible for ensuring we have an Index object. --> 238 self._validate_set_axis(axis, new_labels) 239 self.axes[axis] = new_labels

File ~\AppData\Local\Programs\Python\Python312\Lib\site-packages\pandas\core\internals\base.py:98, in DataManager._validate_set_axis(self, axis, new_labels) 97 elif new_len != old_len: ---> 98 raise ValueError( 99 f"Length mismatch: Expected axis has {old_len} elements, new " 100 f"values have {new_len} elements" 101 )

ValueError: Length mismatch: Expected axis has 14 elements, new values have 7 elements

During handling of the above exception, another exception occurred:

ValueError Traceback (most recent call last) Cell In[39], line 2 1 # This fails ----> 2 get_heatmap(data, 3 motifs = True, 4 feature_set=['terminal1','terminal2','terminal3'])

File ~\AppData\Local\Programs\Python\Python312\Lib\site-packages\glycowork\motif\analysis.py:263, in get_heatmap(df, motifs, feature_set, transform, datatype, rarity_filter, filepath, index_col, custom_motifs, return_plot, **kwargs) 261 raise ValueError("A heatmap needs to have at least two motifs.") 262 if datatype == 'response': --> 263 df = quantify_motifs(df, df.index.tolist(), feature_set, custom_motifs = custom_motifs) 264 elif datatype == 'presence': 265 # Count glycan motifs and remove rare motifs from the result 266 df_motif = annotate_dataset(df.index.tolist(), feature_set = feature_set, condense = True, custom_motifs = custom_motifs)

File ~\AppData\Local\Programs\Python\Python312\Lib\site-packages\glycowork\motif\annotate.py:308, in quantify_motifs(df, glycans, feature_set, custom_motifs, remove_redundant) 306 df = pd.read_csv(df) if df.endswith(".csv") else pd.read_excel(df) 307 # Motif extraction --> 308 df_motif = annotate_dataset(glycans, feature_set = feature_set, 309 condense = True, custom_motifs = custom_motifs) 310 collect_dic = {} 311 df = df.T

File ~\AppData\Local\Programs\Python\Python312\Lib\site-packages\glycowork\motif\processing.py:940, in rescue_glycans..wrapper(*args, *kwargs) 938 rescued_args = [canonicalize_iupac(arg) if isinstance(arg, str) else [canonicalize_iupac(a) for a in arg] if isinstance(arg, list) and arg and isinstance(arg[0], str) else arg for arg in args] 939 # After rescuing, attempt to run the function again --> 940 return func(rescued_args, **kwargs)

File ~\AppData\Local\Programs\Python\Python312\Lib\site-packages\glycowork\motif\annotate.py:252, in annotate_dataset(glycans, motifs, feature_set, termini_list, condense, custom_motifs) 250 shadow_bag = pd.DataFrame([{i: j.count(i) for i in repertoire if '?' in i} for j in shadow_glycans]) 251 bag_out = pd.concat([bag_out, shadow_bag], axis = 1).reset_index(drop = True) --> 252 bag_out.index = glycans 253 bagout.columns = ['Terminal' + c for c in bag_out.columns] 254 shopping_cart.append(bag_out)

File ~\AppData\Local\Programs\Python\Python312\Lib\site-packages\pandas\core\generic.py:6313, in NDFrame.setattr(self, name, value) 6311 try: 6312 object.getattribute(self, name) -> 6313 return object.setattr(self, name, value) 6314 except AttributeError: 6315 pass

File properties.pyx:69, in pandas._libs.properties.AxisProperty.set()

File ~\AppData\Local\Programs\Python\Python312\Lib\site-packages\pandas\core\generic.py:814, in NDFrame._set_axis(self, axis, labels) 809 """ 810 This is called from the cython code when we set the index attribute 811 directly, e.g. series.index = [1, 2, 3]. 812 """ 813 labels = ensure_index(labels) --> 814 self._mgr.set_axis(axis, labels) 815 self._clear_item_cache()

File ~\AppData\Local\Programs\Python\Python312\Lib\site-packages\pandas\core\internals\managers.py:238, in BaseBlockManager.set_axis(self, axis, new_labels) 236 def set_axis(self, axis: AxisInt, new_labels: Index) -> None: 237 # Caller is responsible for ensuring we have an Index object. --> 238 self._validate_set_axis(axis, new_labels) 239 self.axes[axis] = new_labels

File ~\AppData\Local\Programs\Python\Python312\Lib\site-packages\pandas\core\internals\base.py:98, in DataManager._validate_set_axis(self, axis, new_labels) 95 pass 97 elif new_len != old_len: ---> 98 raise ValueError( 99 f"Length mismatch: Expected axis has {old_len} elements, new " 100 f"values have {new_len} elements" 101 )

ValueError: Length mismatch: Expected axis has 14 elements, new values have 7 elements

mattias-erhardsson commented 5 months ago

I think its the latest commit at least, did a fresh install of the dev branch.

mattias-erhardsson commented 5 months ago

Yeah I just reinstalled specifying commit 3098e80f64588ad5dffb88d11dac14a969eba448, same issue.

Bribak commented 5 months ago

That took me a while to catch:D Bottom line: poorly formatted if statements that evaluated to True when they should not have, creating additional features when the function did not expect them to be created

Fixed in db9e9f9

mattias-erhardsson commented 5 months ago

Happy to provide some wild-goose chases :D