timeseriesAI / tsai

Time series Timeseries Deep Learning Machine Learning Python Pytorch fastai | State-of-the-art Deep Learning library for Time Series and Sequences in Pytorch / fastai
https://timeseriesai.github.io/tsai/
Apache License 2.0
5.07k stars 633 forks source link

Exception occured in `ProgressCallback` when calling event `after_batch`: unsupported format string passed to TensorBase.__format__ #716

Closed YYKKKKXX closed 1 year ago

YYKKKKXX commented 1 year ago

Hello,

I meet an error with using tsai. I was trying to do a regression task, and the error occured. But actually my data don't have the type 'string'.

This is the error stack:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[57], line 1
----> 1 learn.fit_one_cycle(10,1e-3)

File D:\anaconda3\envs\ppg2bp\lib\site-packages\fastai\callback\schedule.py:119, in fit_one_cycle(self, n_epoch, lr_max, div, div_final, pct_start, wd, moms, cbs, reset_opt, start_epoch)
    116 lr_max = np.array([h['lr'] for h in self.opt.hypers])
    117 scheds = {'lr': combined_cos(pct_start, lr_max/div, lr_max, lr_max/div_final),
    118           'mom': combined_cos(pct_start, *(self.moms if moms is None else moms))}
--> 119 self.fit(n_epoch, cbs=ParamScheduler(scheds)+L(cbs), reset_opt=reset_opt, wd=wd, start_epoch=start_epoch)

File D:\anaconda3\envs\ppg2bp\lib\site-packages\fastai\learner.py:264, in Learner.fit(self, n_epoch, lr, wd, cbs, reset_opt, start_epoch)
    262 self.opt.set_hypers(lr=self.lr if lr is None else lr)
    263 self.n_epoch = n_epoch
--> 264 self._with_events(self._do_fit, 'fit', CancelFitException, self._end_cleanup)

File D:\anaconda3\envs\ppg2bp\lib\site-packages\fastai\learner.py:199, in Learner._with_events(self, f, event_type, ex, final)
    198 def _with_events(self, f, event_type, ex, final=noop):
--> 199     try: self(f'before_{event_type}');  f()
    200     except ex: self(f'after_cancel_{event_type}')
    201     self(f'after_{event_type}');  final()

File D:\anaconda3\envs\ppg2bp\lib\site-packages\fastai\learner.py:253, in Learner._do_fit(self)
    251 for epoch in range(self.n_epoch):
    252     self.epoch=epoch
--> 253     self._with_events(self._do_epoch, 'epoch', CancelEpochException)

File D:\anaconda3\envs\ppg2bp\lib\site-packages\fastai\learner.py:199, in Learner._with_events(self, f, event_type, ex, final)
    198 def _with_events(self, f, event_type, ex, final=noop):
--> 199     try: self(f'before_{event_type}');  f()
    200     except ex: self(f'after_cancel_{event_type}')
    201     self(f'after_{event_type}');  final()

File D:\anaconda3\envs\ppg2bp\lib\site-packages\fastai\learner.py:247, in Learner._do_epoch(self)
    246 def _do_epoch(self):
--> 247     self._do_epoch_train()
    248     self._do_epoch_validate()

File D:\anaconda3\envs\ppg2bp\lib\site-packages\fastai\learner.py:239, in Learner._do_epoch_train(self)
    237 def _do_epoch_train(self):
    238     self.dl = self.dls.train
--> 239     self._with_events(self.all_batches, 'train', CancelTrainException)

File D:\anaconda3\envs\ppg2bp\lib\site-packages\fastai\learner.py:199, in Learner._with_events(self, f, event_type, ex, final)
    198 def _with_events(self, f, event_type, ex, final=noop):
--> 199     try: self(f'before_{event_type}');  f()
    200     except ex: self(f'after_cancel_{event_type}')
    201     self(f'after_{event_type}');  final()

File D:\anaconda3\envs\ppg2bp\lib\site-packages\fastai\learner.py:205, in Learner.all_batches(self)
    203 def all_batches(self):
    204     self.n_iter = len(self.dl)
--> 205     for o in enumerate(self.dl): self.one_batch(*o)

File D:\anaconda3\envs\ppg2bp\lib\site-packages\tsai\learner.py:40, in one_batch(self, i, b)
     38 b_on_device = to_device(b, device=self.dls.device) if self.dls.device is not None else b
     39 self._split(b_on_device)
---> 40 self._with_events(self._do_one_batch, 'batch', CancelBatchException)

File D:\anaconda3\envs\ppg2bp\lib\site-packages\fastai\learner.py:201, in Learner._with_events(self, f, event_type, ex, final)
    199 try: self(f'before_{event_type}');  f()
    200 except ex: self(f'after_cancel_{event_type}')
--> 201 self(f'after_{event_type}');  final()

File D:\anaconda3\envs\ppg2bp\lib\site-packages\fastai\learner.py:172, in Learner.__call__(self, event_name)
--> 172 def __call__(self, event_name): L(event_name).map(self._call_one)

File D:\anaconda3\envs\ppg2bp\lib\site-packages\fastcore\foundation.py:156, in L.map(self, f, *args, **kwargs)
--> 156 def map(self, f, *args, **kwargs): return self._new(map_ex(self, f, *args, gen=False, **kwargs))

File D:\anaconda3\envs\ppg2bp\lib\site-packages\fastcore\basics.py:840, in map_ex(iterable, f, gen, *args, **kwargs)
    838 res = map(g, iterable)
    839 if gen: return res
--> 840 return list(res)

File D:\anaconda3\envs\ppg2bp\lib\site-packages\fastcore\basics.py:825, in bind.__call__(self, *args, **kwargs)
    823     if isinstance(v,_Arg): kwargs[k] = args.pop(v.i)
    824 fargs = [args[x.i] if isinstance(x, _Arg) else x for x in self.pargs] + args[self.maxi+1:]
--> 825 return self.func(*fargs, **kwargs)

File D:\anaconda3\envs\ppg2bp\lib\site-packages\fastai\learner.py:176, in Learner._call_one(self, event_name)
    174 def _call_one(self, event_name):
    175     if not hasattr(event, event_name): raise Exception(f'missing {event_name}')
--> 176     for cb in self.cbs.sorted('order'): cb(event_name)

File D:\anaconda3\envs\ppg2bp\lib\site-packages\fastai\callback\core.py:62, in Callback.__call__(self, event_name)
     60     try: res = getcallable(self, event_name)()
     61     except (CancelBatchException, CancelBackwardException, CancelEpochException, CancelFitException, CancelStepException, CancelTrainException, CancelValidException): raise
---> 62     except Exception as e: raise modify_exception(e, f'Exception occured in `{self.__class__.__name__}` when calling event `{event_name}`:\n\t{e.args[0]}', replace=True)
     63 if event_name=='after_fit': self.run=True #Reset self.run to True at each end of fit
     64 return res

File D:\anaconda3\envs\ppg2bp\lib\site-packages\fastai\callback\core.py:60, in Callback.__call__(self, event_name)
     58 res = None
     59 if self.run and _run: 
---> 60     try: res = getcallable(self, event_name)()
     61     except (CancelBatchException, CancelBackwardException, CancelEpochException, CancelFitException, CancelStepException, CancelTrainException, CancelValidException): raise
     62     except Exception as e: raise modify_exception(e, f'Exception occured in `{self.__class__.__name__}` when calling event `{event_name}`:\n\t{e.args[0]}', replace=True)

File D:\anaconda3\envs\ppg2bp\lib\site-packages\fastai\callback\progress.py:33, in ProgressCallback.after_batch(self)
     31 def after_batch(self):
     32     self.pbar.update(self.iter+1)
---> 33     if hasattr(self, 'smooth_loss'): self.pbar.comment = f'{self.smooth_loss:.4f}'

File D:\anaconda3\envs\ppg2bp\lib\site-packages\torch\_tensor.py:870, in Tensor.__format__(self, format_spec)
    868 def __format__(self, format_spec):
    869     if has_torch_function_unary(self):
--> 870         return handle_torch_function(Tensor.__format__, (self,), self, format_spec)
    871     if self.dim() == 0 and not self.is_meta and type(self) is Tensor:
    872         return self.item().__format__(format_spec)

File D:\anaconda3\envs\ppg2bp\lib\site-packages\torch\overrides.py:1551, in handle_torch_function(public_api, relevant_args, *args, **kwargs)
   1545     warnings.warn("Defining your `__torch_function__ as a plain method is deprecated and "
   1546                   "will be an error in future, please define it as a classmethod.",
   1547                   DeprecationWarning)
   1549 # Use `public_api` instead of `implementation` so __torch_function__
   1550 # implementations can do equality/identity comparisons.
-> 1551 result = torch_func_method(public_api, types, args, kwargs)
   1553 if result is not NotImplemented:
   1554     return result

File D:\anaconda3\envs\ppg2bp\lib\site-packages\fastai\torch_core.py:372, in TensorBase.__torch_function__(cls, func, types, args, kwargs)
    370 if cls.debug and func.__name__ not in ('__str__','__repr__'): print(func, types, args, kwargs)
    371 if _torch_handled(args, cls._opt, func): types = (torch.Tensor,)
--> 372 res = super().__torch_function__(func, types, args, ifnone(kwargs, {}))
    373 dict_objs = _find_args(args) if args else _find_args(list(kwargs.values()))
    374 if issubclass(type(res),TensorBase) and dict_objs: res.set_meta(dict_objs[0],as_copy=True)

File D:\anaconda3\envs\ppg2bp\lib\site-packages\torch\_tensor.py:1295, in Tensor.__torch_function__(cls, func, types, args, kwargs)
   1292     return NotImplemented
   1294 with _C.DisableTorchFunctionSubclass():
-> 1295     ret = func(*args, **kwargs)
   1296     if func in get_default_nowrap_functions():
   1297         return ret

File D:\anaconda3\envs\ppg2bp\lib\site-packages\torch\_tensor.py:873, in Tensor.__format__(self, format_spec)
    871 if self.dim() == 0 and not self.is_meta and type(self) is Tensor:
    872     return self.item().__format__(format_spec)
--> 873 return object.__format__(self, format_spec)

TypeError: Exception occured in `ProgressCallback` when calling event `after_batch`:
    unsupported format string passed to TensorBase.__format__

My code is:

import torch
from torch import nn
import numpy as np
import os
from tsai.all import *
from tsai.basics import *

csv_file = "features1.csv"
csv_data = pd.read_csv(csv_file, low_memory = False)
ts_features_df = pd.DataFrame(csv_data).iloc[:,1:]

# transform a pandas dataframe into X and y numpy arrays
X_df, y_df = df2xy(ts_features_df, target_col='SBP')
splits = get_splits(y_df, valid_size=.2, stratify=False, random_state=23, shuffle=True)
check_data(X_df, y_df, splits)

tfms  = [None, [TSRegression()]]
batch_tfms = TSStandardize(by_sample=True, by_var=True)
dls = get_ts_dls(X_df, y_df, splits=splits, tfms=tfms, batch_tfms=batch_tfms, bs=2)
dls.show_batch()

tst = TST(1,1,66)
learn = Learner(dls, tst, metrics=[mae, rmse, mse, msle, mape])
learn.summary()

learn.fit_one_cycle(10,1e-3)

And the output of my check_data(X_df, y_df, splits):

X      - shape: [6 samples x 1 features x 66 timesteps]  type: ndarray  dtype:float64  isnan: 0
y      - shape: (6,)  type: ndarray  dtype:float64  isnan: 0
splits - n_splits: 2 shape: [5, 1]  overlap: False

Thanks.

oguiza commented 1 year ago

Hi @YYKKKKXX , I don't know what causing your issue. I've tried to reproduce it with some dummy data but I couldn't. Are you using the latest version of tsai (>=0.3.5)? Here's what I used:

from tsai.basics import *
my_setup()

# output
# os              : Linux-5.10.147+-x86_64-with-glibc2.31
# python          : 3.9.16
# tsai            : 0.3.5
# fastai          : 2.7.11
# fastcore        : 1.5.28
# torch           : 1.13.1+cu116
# cpu cores       : 1
# threads per cpu : 2
# RAM             : 12.68 GB
# GPU memory      : N/A
from tsai.basics import *
from tsai.models.TST import TST

X_df = np.random.rand(6, 1, 66)
y_df = np.random.rand(6)
splits = get_splits(y_df, valid_size=.2, stratify=False, random_state=23, shuffle=True, show_plot=False)
check_data(X_df, y_df, splits)

tfms  = [None, [TSRegression()]]
batch_tfms = TSStandardize(by_sample=True, by_var=True)
dls = get_ts_dls(X_df, y_df, splits=splits, tfms=tfms, batch_tfms=batch_tfms, bs=2)
dls.show_batch()

tst = TST(1,1,66)
learn = Learner(dls, tst, metrics=[mae, rmse, mse, msle, mape])
learn.summary()

learn.fit_one_cycle(10, 1e-3)

Alternative (and simpler) ways to do the same are:

from tsai.basics import *

X_df = np.random.rand(6, 1, 66)
y_df = np.random.rand(6)
splits = get_splits(y_df, valid_size=.2, stratify=False, random_state=23, shuffle=True, show_plot=False)
check_data(X_df, y_df, splits)

tfms  = [None, [TSRegression()]]
batch_tfms = TSStandardize(by_sample=True, by_var=True)
dls = get_ts_dls(X_df, y_df, splits=splits, tfms=tfms, batch_tfms=batch_tfms, bs=2)
dls.show_batch()

learn = ts_learner(dls, arch="TST", metrics=[mae, rmse, mse, msle, mape])
learn.summary()

learn.fit_one_cycle(10, 1e-3)

or this:

from tsai.basics import *

X_df = np.random.rand(6, 1, 66)
y_df = np.random.rand(6)
splits = get_splits(y_df, valid_size=.2, stratify=False, random_state=23, shuffle=True, show_plot=False)
check_data(X_df, y_df, splits)

tfms  = [None, [TSRegression()]]
batch_tfms = TSStandardize(by_sample=True, by_var=True)
learn = TSRegressor(X_df, y_df, splits=splits, tfms=tfms, batch_tfms=batch_tfms, bs=2, arch="TST", metrics=[mae, rmse, mse, msle, mape])
learn.dls.show_batch()
learn.summary()
learn.fit_one_cycle(10, 1e-3)
YYKKKKXX commented 1 year ago

Hi @oguiza

Thanks for the reply and suggestion. I am using the latest version of tsai = 0.3.5

And later, I used your code to see if it can work, but I have the same error as before.

However, the learn.summary() shows no error, and all layers are trainable:

TST (Input shape: 2 x 1 x 66)
============================================================================
Layer (type)         Output Shape         Param #    Trainable 
============================================================================
                     2 x 66 x 128        
Linear                                    256        True      
Dropout                                                        
Linear                                    16384      True      
Linear                                    16384      True      
Linear                                    16384      True      
Linear                                    16384      True      
Dropout                                                        
____________________________________________________________________________
                     2 x 128 x 66        
Transpose                                                      
BatchNorm1d                               256        True      
____________________________________________________________________________
                     2 x 66 x 128        
Transpose                                                      
____________________________________________________________________________
                     2 x 66 x 256        
Linear                                    33024      True      
GELU                                                           
Dropout                                                        
____________________________________________________________________________
                     2 x 66 x 128        
Linear                                    32896      True      
Dropout                                                        
____________________________________________________________________________
                     2 x 128 x 66        
Transpose                                                      
BatchNorm1d                               256        True      
____________________________________________________________________________
                     2 x 66 x 128        
Transpose                                                      
Linear                                    16384      True      
Linear                                    16384      True      
Linear                                    16384      True      
Linear                                    16384      True      
Dropout                                                        
____________________________________________________________________________
                     2 x 128 x 66        
Transpose                                                      
BatchNorm1d                               256        True      
____________________________________________________________________________
                     2 x 66 x 128        
Transpose                                                      
____________________________________________________________________________
                     2 x 66 x 256        
Linear                                    33024      True      
GELU                                                           
Dropout                                                        
____________________________________________________________________________
                     2 x 66 x 128        
Linear                                    32896      True      
Dropout                                                        
____________________________________________________________________________
                     2 x 128 x 66        
Transpose                                                      
BatchNorm1d                               256        True      
____________________________________________________________________________
                     2 x 66 x 128        
Transpose                                                      
Linear                                    16384      True      
Linear                                    16384      True      
Linear                                    16384      True      
Linear                                    16384      True      
Dropout                                                        
____________________________________________________________________________
                     2 x 128 x 66        
Transpose                                                      
BatchNorm1d                               256        True      
____________________________________________________________________________
                     2 x 66 x 128        
Transpose                                                      
____________________________________________________________________________
                     2 x 66 x 256        
Linear                                    33024      True      
GELU                                                           
Dropout                                                        
____________________________________________________________________________
                     2 x 66 x 128        
Linear                                    32896      True      
Dropout                                                        
____________________________________________________________________________
                     2 x 128 x 66        
Transpose                                                      
BatchNorm1d                               256        True      
____________________________________________________________________________
                     2 x 66 x 128        
Transpose                                                      
GELU                                                           
____________________________________________________________________________
                     2 x 8448            
Flatten                                                        
____________________________________________________________________________
                     2 x 1               
Linear                                    8449       True      
____________________________________________________________________________

Total params: 404,609
Total trainable params: 404,609
Total non-trainable params: 0

Optimizer used: <function Adam at 0x0000024DB47B6310>
Loss function: FlattenedLoss of MSELoss()

Callbacks:
  - TrainEvalCallback
  - CastToTensor
  - Recorder
  - ProgressCallback

Thank you very much for looking at this.

oguiza commented 1 year ago

That's very strange. You're saying that if you run the same code I used you are getting the issue?

from tsai.basics import *

X_df = np.random.rand(6, 1, 66)
y_df = np.random.rand(6)
splits = get_splits(y_df, valid_size=.2, stratify=False, random_state=23, shuffle=True, show_plot=False)
check_data(X_df, y_df, splits)

tfms  = [None, [TSRegression()]]
batch_tfms = TSStandardize(by_sample=True, by_var=True)
learn = TSRegressor(X_df, y_df, splits=splits, tfms=tfms, batch_tfms=batch_tfms, bs=2, arch="TST", metrics=[mae, rmse, mse, msle, mape])
learn.dls.show_batch()
learn.summary()
learn.fit_one_cycle(10, 1e-3)

Could you please run this code and post the output here?

from tsai.basics import *
my_setup()

Have you set up a new environment to run tsai? Can you run your code in any other environment (like Google Colab) or create a new environment?

YYKKKKXX commented 1 year ago

Hi @oguiza

I recently tried to degrade my tsai version=0.3.4, and it finally work out. now the output my_setup():

os              : Windows-10-10.0.22000-SP0
python          : 3.8.13
tsai            : 0.3.4
fastai          : 2.7.11
fastcore        : 1.5.28
torch           : 1.13.1+cu117
device          : 1 gpu (['NVIDIA GeForce RTX 3060 Laptop GPU'])
cpu cores       : 14
threads per cpu : 1
RAM             : 15.73 GB
GPU memory      : [6.0] GB

Thank you for looking at this.

oguiza commented 1 year ago

I'll close this issue based on the feedback provided.