fastai / fastai2

Temporary home for fastai v2 while it's being developed
https://dev.fast.ai
Apache License 2.0
645 stars 235 forks source link

show_batch L issue in new commits #406

Closed fmobrj closed 4 years ago

fmobrj commented 4 years ago

I used datasets and dataloaders a lot before with text (fastaiv2 0.0.8), be it for custom datasets with standard ULMFIT use, be it for using transformers adapted to fastaiv2 pipeline.

I recently upgraded to fastaiv2 last commit (0.0.17), and now I always get this error when using show_batch. Any thoughts?

    `AttributeError: 'L' object has no attribute 'truncate'`

    ---------------------------------------------------------------------------
    AttributeError                            Traceback (most recent call last)
    <ipython-input-39-90634fcc3c9e> in <module>
    ----> 1 dls.show_batch()

    ~/anaconda3/envs/fastaiv2/lib/python3.6/site-packages/fastai2/data/core.py in show_batch(self, b, max_n, ctxs, show, unique, **kwargs)
         97         if b is None: b = self.one_batch()
         98         if not show: return self._pre_show_batch(b, max_n=max_n)
    ---> 99         show_batch(*self._pre_show_batch(b, max_n=max_n), ctxs=ctxs, max_n=max_n, **kwargs)
        100         if unique: self.get_idxs = old_get_idxs
        101 

    ~/anaconda3/envs/fastaiv2/lib/python3.6/site-packages/fastcore/dispatch.py in __call__(self, *args, **kwargs)
         96         if not f: return args[0]
         97         if self.inst is not None: f = MethodType(f, self.inst)
    ---> 98         return f(*args, **kwargs)
         99 
        100     def __get__(self, inst, owner):

    ~/anaconda3/envs/fastaiv2/lib/python3.6/site-packages/fastai2/text/data.py in show_batch(x, y, samples, ctxs, max_n, trunc_at, **kwargs)
        107 def show_batch(x: TensorText, y, samples, ctxs=None, max_n=10, trunc_at=150, **kwargs):
        108     if ctxs is None: ctxs = get_empty_df(min(len(samples), max_n))
    --> 109     if trunc_at is not None: samples = L((s[0].truncate(trunc_at),*s[1:]) for s in samples)
        110     ctxs = show_batch[object](x, y, samples, max_n=max_n, ctxs=ctxs, **kwargs)
        111     display_df(pd.DataFrame(ctxs))

    ~/anaconda3/envs/fastaiv2/lib/python3.6/site-packages/fastcore/foundation.py in __call__(cls, x, *args, **kwargs)
         45             return x
         46 
    ---> 47         res = super().__call__(*((x,) + args), **kwargs)
         48         res._newchk = 0
         49         return res

    ~/anaconda3/envs/fastaiv2/lib/python3.6/site-packages/fastcore/foundation.py in __init__(self, items, use_list, match, *rest)
        316         if items is None: items = []
        317         if (use_list is not None) or not _is_array(items):
    --> 318             items = list(items) if use_list else _listify(items)
        319         if match is not None:
        320             if is_coll(match): match = len(match)

    ~/anaconda3/envs/fastaiv2/lib/python3.6/site-packages/fastcore/foundation.py in _listify(o)
        252     if isinstance(o, list): return o
        253     if isinstance(o, str) or _is_array(o): return [o]
    --> 254     if is_iter(o): return list(o)
        255     return [o]
        256 

    ~/anaconda3/envs/fastaiv2/lib/python3.6/site-packages/fastai2/text/data.py in <genexpr>(.0)
        107 def show_batch(x: TensorText, y, samples, ctxs=None, max_n=10, trunc_at=150, **kwargs):
        108     if ctxs is None: ctxs = get_empty_df(min(len(samples), max_n))
    --> 109     if trunc_at is not None: samples = L((s[0].truncate(trunc_at),*s[1:]) for s in samples)
        110     ctxs = show_batch[object](x, y, samples, max_n=max_n, ctxs=ctxs, **kwargs)
        111     display_df(pd.DataFrame(ctxs))

    AttributeError: 'L' object has no attribute 'truncate'

The code that generated this:

    dsrc = Datasets(df, tfms=[tfms, [attrgetter("label"), Categorize()]], splits=splits)

    dsrc[0]

    Output:
    (TensorText([     0,    180,   1601,   1021,    987,   6497,   2304,  39181,  73478,
              75504,     48, 214641,      8,  20655,  26465,     41,  86210,    302,
             137156,      8,    184,   3181,   3369,  86233, 175754,      9, 114584,
              11126,     54,  51301,     22,  97606,     10, 130653,  18836,  18652,
              14462,    655,  84774,     90, 178579,     28,     48,   5776,   2304,
               8656,      5,      2]),
     TensorCategory(2))

    def transformer_padding(tokenizer=None, max_seq_len=None, sentence_pair=False): 
        if tokenizer.padding_side == 'right': pad_first=False
        else: pad_first=True
        max_seq_len = ifnone(max_seq_len, tokenizer.max_len) 
        return partial(pad_input_chunk, pad_first=pad_first, pad_idx=tokenizer.pad_token_id, seq_len=max_seq_len)

    bs = 1
    max_seq_len=sl
    padding=transformer_padding(xlmr_tok, max_seq_len)
    dls = dsrc.dataloaders(bs=bs, before_batch=[padding])

    o=dls.one_batch(); o[0].size(), o[1].size(), o[0]

    Output:

    (torch.Size([1, 119]),
     torch.Size([1]),
     TensorText([[     0,    180,  73839,      8,  87853, 146454,     85,    110,  84372,
               59197,    196, 113468, 220497,    196,  56649,   1255,   1027,  17914,
              197499,  39531,  19329,      4, 167485,  42677,   1156,      8,   3332,
                4100,  44778,    362,  38612, 107026,   1140,     10,  31810,      8,
               60449,   1952,      5,   1413,    557,     10,  89266,  38845,  10369,
                  99,    525,   1138,     10,  70560,   9153,    381,  68481,     41,
                  22,  97606,   2198,     10, 220497,   1651,     15,  67987,      5,
                 714,      4,  30041,    991,     87,      4,     28, 106392,      4,
                5360,    190,     31,      4,     48,   9335,  70885,   1530,    248,
               11704,     16,      6,      4,     28,     23,  54376,  36735,  13395,
                   7,      8, 129718,    352,   2776,  21635,   1156,      4,   1027,
                5059,   2456,    964,    329,     10,     41,     36, 113468,   1119,
                  56,   2968,  87853,  53647,    196,   1646,   6615,     28,  50047,
                   5,      2]], device='cuda:0'))

    dls.show_batch()
mikonapoli commented 4 years ago

Cannot guarantee it will work, but try updating fastcore with pip install fastcore -U