rentruewang / koila

Prevent PyTorch's `CUDA error: out of memory` in just 1 line of code.
https://koila.rentruewang.com
MIT License
1.82k stars 63 forks source link

Got an error when using lazy. #33

Open SinoKiwi opened 1 year ago

SinoKiwi commented 1 year ago

I'm doing a NMT task.I use my own data loading function rather than using torch dataset.I got an "int object doesn't has attribute 'size' " error. Here's my data loading code:

def get_batches(sz, pad=0):
    for i in range(0, len(datatmp), sz):
        n=0
        srcdata = []
        trgdata = []
        for j in range(n, sz):
            srcdata.append(datatmp[i+j][0])
            trgdata.append(datatmp[i+j][1])
        a = randint(1, 2)
        src_max_seq_length=max([len(srcdata[i]) for i in range(len(srcdata))])
        trg_max_seq_length=max([len(trgdata[i]) for i in range(len(trgdata))])
        # pad src to src_max_seq_length
        for i in range(len(srcdata)):
            srcdata[i] = srcdata[i] + [pad for j in range(src_max_seq_length-len(srcdata[i]))]
        #pad trg to trg_max_seq_length
        for i in range(len(trgdata)):
            trgdata[i] = trgdata[i] + [pad for j in range(trg_max_seq_length-len(trgdata[i]))]

        sr = np.ndarray(shape=(sz, src_max_seq_length))
        tg = np.ndarray(shape=(sz, trg_max_seq_length))
        for i in range(len(srcdata)):
            for j in range(len(srcdata[i])):
                sr[i][j] = srcdata[i][j]
        for i in range(len(trgdata)):
            for j in range(len(trgdata[i])):
                tg[i][j] = trgdata[i][j]
        #srcdata = np.array(srcdata)
        #trgdata = np.array(trgdata)
        srcdata = torch.from_numpy(sr)
        trgdata = torch.from_numpy(tg)
        src = Variable(srcdata, requires_grad=False).long()
        trg = Variable(trgdata, requires_grad=False).long()
        yield Batch(src, trg, pad)#Batch is only a simple class
class Batch:
    "Object for holding a batch of data with mask during training."
    def __init__(self, src, trg=None, pad=0):
        self.src = src
        self.src_mask = (src != pad).unsqueeze(-2)
        if trg is not None:
            self.trg = trg[:, :-1]
            self.trg_y = trg[:, 1:]
            self.trg_mask = \
                self.make_std_mask(self.trg, pad)
            self.ntokens = (self.trg_y != pad).data.sum()

    @staticmethod
    def make_std_mask(tgt, pad):
        "Create a mask to hide padding and future words."
        tgt_mask = (tgt != pad).unsqueeze(-2)
        tgt_mask = tgt_mask & Variable(
            subsequent_mask(tgt.size(-1)).type_as(tgt_mask.data))
        return tgt_mask

ps:The code is adapted from 'Annotated Transformer'

SinoKiwi commented 1 year ago

Oh,the code I used lazy is like this:

def get_batches(sz, pad=0):
    for i in range(0, len(datatmp), sz):
        n=0
        srcdata = []
        trgdata = []
        for j in range(n, sz):
            srcdata.append(datatmp[i+j][0])#appened is a list
            trgdata.append(datatmp[i+j][1])#identical to beneath
        a = randint(1, 2)
        src_max_seq_length=max([len(srcdata[i]) for i in range(len(srcdata))])
        trg_max_seq_length=max([len(trgdata[i]) for i in range(len(trgdata))])
        # pad src to src_max_seq_length
        for i in range(len(srcdata)):
            srcdata[i] = srcdata[i] + [pad for j in range(src_max_seq_length-len(srcdata[i]))]
        #pad trg to trg_max_seq_length
        for i in range(len(trgdata)):
            trgdata[i] = trgdata[i] + [pad for j in range(trg_max_seq_length-len(trgdata[i]))]

        sr = np.ndarray(shape=(sz, src_max_seq_length))
        tg = np.ndarray(shape=(sz, trg_max_seq_length))
        for i in range(len(srcdata)):
            for j in range(len(srcdata[i])):
                sr[i][j] = srcdata[i][j]
        for i in range(len(trgdata)):
            for j in range(len(trgdata[i])):
                tg[i][j] = trgdata[i][j]
        #srcdata = np.array(srcdata)
        #trgdata = np.array(trgdata)
        srcdata = torch.from_numpy(sr)
        trgdata = torch.from_numpy(tg)
        src = Variable(srcdata, requires_grad=False).long()
        trg = Variable(trgdata, requires_grad=False).long()
        (src, trg) = lazy(src,trg, batch=0)#Here
        yield Batch(src, trg, pad)
rentruewang commented 1 year ago

Hmm, sorry for the late response. It seems to me that you're using PyTorch 0.4.* right? I didn't test versions <1 so I'm not sure where the issue comes from. If I had to guess, it's perhaps because of the mismatch between the API of Tensor vs Variable.

SinoKiwi commented 1 year ago

I'm sorry but it has the same error when I use torch2.0.0 and just torch.from_numpy(not using Variable). Here is the output:

Traceback (most recent call last):
  File "train.py", line 515, in <module>
    run_epoch(get_batches(4, BLANK_ID), model, 
  File "train.py", line 299, in run_epoch
    for i, batch in enumerate(tqdm(data_iter)):
  File "/usr/local/miniconda3/lib/python3.8/site-packages/tqdm/std.py", line 1178, in __iter__
    for obj in iterable:
  File "train.py", line 473, in get_batches
    yield Batch(src, trg, pad)
  File "train.py", line 275, in __init__
    self.src_mask = (src != pad).unsqueeze(-2)
  File "/usr/local/miniconda3/lib/python3.8/site-packages/koila/lazy.py", line 320, in __ne__
    return lazy_forward(Tensor.__ne__, prepasses.symmetric, self, other)
  File "/usr/local/miniconda3/lib/python3.8/site-packages/koila/lazy.py", line 504, in lazy_forward
    out = LazyTensor(LazyFunction(func, shape_func)(*args, **kwargs))
  File "/usr/local/miniconda3/lib/python3.8/site-packages/koila/lazy.py", line 51, in __call__
    prepass = self.prepass_func(*args, **kwargs)
  File "/usr/local/miniconda3/lib/python3.8/site-packages/koila/prepasses.py", line 142, in symmetric
    shape = shapes.coerce(input.size(), other.size(), broadcast=True, scalars=True)
AttributeError: 'int' object has no attribute 'size'
rentruewang commented 1 year ago

I see. Seems to be an oversight on my part where I didn't handle broadcasting mechanism with primitives. Thanks for the feedback!