ssbuild / chatglm_finetuning

chatglm 6b finetuning and alpaca finetuning
1.54k stars 176 forks source link

RuntimeError: "LayerNormKernelImpl" not implemented for 'Half' #65

Closed ARES3366 closed 1 year ago

ARES3366 commented 1 year ago

您好,再请教下,执行python3 train.py时报这个错? python3.8.16 没有cuda,该如何操作

/data/z/zhangxiaoyuan/work/test/chatglm-fineturn/chatglm_finetuning/train.py:198 in │ │ │ │ 195 │ │ │ │ │ │ │ │ │ │ │ │ │ │ dataset_loader_filter_fn=dataset │ │ 196 │ │ │ │ 197 │ │ if train_datasets is not None: │ │ ❱ 198 │ │ │ trainer.fit(model, train_dataloaders=train_datasets) │ │ 199 │ │ │ 200 │ else: │ │ 201 │ │ if not lora_args.with_lora: │ │ │ │ /data/z/zhangxiaoyuan/conda/envs/glm/lib/python3.8/site-packages/pytorch_lightning/trainer/train │ │ er.py:608 in fit │ │ │ │ 605 │ │ """ │ │ 606 │ │ model = self._maybe_unwrap_optimized(model) │ │ 607 │ │ self.strategy._lightning_module = model │ │ ❱ 608 │ │ call._call_and_handle_interrupt( │ │ 609 │ │ │ self, self._fit_impl, model, train_dataloaders, val_dataloaders, datamodule, │ │ 610 │ │ ) │ │ 611 │ │ │ │ /data/z/zhangxiaoyuan/conda/envs/glm/lib/python3.8/site-packages/pytorch_lightning/trainer/call. │ │ py:38 in _call_and_handle_interrupt │ │ │ │ 35 │ │ if trainer.strategy.launcher is not None: │ │ 36 │ │ │ return trainer.strategy.launcher.launch(trainer_fn, args, trainer=trainer, │ │ 37 │ │ else: │ │ ❱ 38 │ │ │ return trainer_fn(args, kwargs) │ │ 39 │ │ │ 40 │ except _TunerExitException: │ │ 41 │ │ trainer._call_teardown_hook() │ │ │ │ /data/z/zhangxiaoyuan/conda/envs/glm/lib/python3.8/site-packages/pytorch_lightning/trainer/train │ │ er.py:650 in _fit_impl │ │ │ │ 647 │ │ │ model_provided=True, │ │ 648 │ │ │ model_connected=self.lightning_module is not None, │ │ 649 │ │ ) │ │ ❱ 650 │ │ self._run(model, ckpt_path=self.ckpt_path) │ │ 651 │ │ │ │ 652 │ │ assert self.state.stopped │ │ 653 │ │ self.training = False │ │ │ │ /data/z/zhangxiaoyuan/conda/envs/glm/lib/python3.8/site-packages/pytorch_lightning/trainer/train │ │ er.py:1112 in _run │ │ │ │ 1109 │ │ │ │ 1110 │ │ self._checkpoint_connector.resume_end() │ │ 1111 │ │ │ │ ❱ 1112 │ │ results = self._run_stage() │ │ 1113 │ │ │ │ 1114 │ │ log.detail(f"{self.class.name}: trainer tearing down") │ │ 1115 │ │ self._teardown() │ │ │ │ /data/z/zhangxiaoyuan/conda/envs/glm/lib/python3.8/site-packages/pytorch_lightning/trainer/train │ │ er.py:1191 in _run_stage │ │ │ │ 1188 │ │ │ return self._run_evaluate() │ │ 1189 │ │ if self.predicting: │ │ 1190 │ │ │ return self._run_predict() │ │ ❱ 1191 │ │ self._run_train() │ │ 1192 │ │ │ 1193 │ def _pre_training_routine(self) -> None: │ │ 1194 │ │ # wait for all to join if on distributed │ │ │ │ /data/z/zhangxiaoyuan/conda/envs/glm/lib/python3.8/site-packages/pytorch_lightning/trainer/train │ │ er.py:1214 in _run_train │ │ │ │ 1211 │ │ self.fit_loop.trainer = self │ │ 1212 │ │ │ │ 1213 │ │ with torch.autograd.set_detect_anomaly(self._detect_anomaly): │ │ ❱ 1214 │ │ │ self.fit_loop.run() │ │ 1215 │ │ │ 1216 │ def _run_evaluate(self) -> _EVALUATE_OUTPUT: │ │ 1217 │ │ assert self.evaluating │ │ │ │ /data/z/zhangxiaoyuan/conda/envs/glm/lib/python3.8/site-packages/pytorch_lightning/loops/loop.py │ │ :199 in run │ │ │ │ 196 │ │ while not self.done: │ │ 197 │ │ │ try: │ │ 198 │ │ │ │ self.on_advance_start(*args, *kwargs) │ │ ❱ 199 │ │ │ │ self.advance(args, kwargs) │ │ 200 │ │ │ │ self.on_advance_end() │ │ 201 │ │ │ │ self._restarting = False │ │ 202 │ │ │ except StopIteration: │ │ │ │ /data/z/zhangxiaoyuan/conda/envs/glm/lib/python3.8/site-packages/pytorch_lightning/loops/fit_loo │ │ p.py:267 in advance │ │ │ │ 264 │ │ assert self._data_fetcher is not None │ │ 265 │ │ self._data_fetcher.setup(dataloader, batch_to_device=batch_to_device) │ │ 266 │ │ with self.trainer.profiler.profile("run_training_epoch"): │ │ ❱ 267 │ │ │ self._outputs = self.epoch_loop.run(self._data_fetcher) │ │ 268 │ │ │ 269 │ def on_advance_end(self) -> None: │ │ 270 │ │ # inform logger the batch loop has finished │ │ │ │ /data/z/zhangxiaoyuan/conda/envs/glm/lib/python3.8/site-packages/pytorch_lightning/loops/loop.py │ │ :199 in run │ │ │ │ 196 │ │ while not self.done: │ │ 197 │ │ │ try: │ │ 198 │ │ │ │ self.on_advance_start(*args, kwargs) │ │ ❱ 199 │ │ │ │ self.advance(*args, *kwargs) │ │ 200 │ │ │ │ self.on_advance_end() │ │ 201 │ │ │ │ self._restarting = False │ │ 202 │ │ │ except StopIteration: │ │ │ │ /data/z/zhangxiaoyuan/conda/envs/glm/lib/python3.8/site-packages/pytorch_lightning/loops/epoch/t │ │ raining_epoch_loop.py:213 in advance │ │ │ │ 210 │ │ │ self.batch_progress.increment_started() │ │ 211 │ │ │ │ │ 212 │ │ │ with self.trainer.profiler.profile("run_training_batch"): │ │ ❱ 213 │ │ │ │ batch_output = self.batch_loop.run(kwargs) │ │ 214 │ │ │ │ 215 │ │ self.batch_progress.increment_processed() │ │ 216 │ │ │ │ /data/z/zhangxiaoyuan/conda/envs/glm/lib/python3.8/site-packages/pytorch_lightning/loops/loop.py │ │ :199 in run │ │ │ │ 196 │ │ while not self.done: │ │ 197 │ │ │ try: │ │ 198 │ │ │ │ self.on_advance_start(args, kwargs) │ │ ❱ 199 │ │ │ │ self.advance(*args, kwargs) │ │ 200 │ │ │ │ self.on_advance_end() │ │ 201 │ │ │ │ self._restarting = False │ │ 202 │ │ │ except StopIteration: │ │ │ │ /data/z/zhangxiaoyuan/conda/envs/glm/lib/python3.8/site-packages/pytorch_lightning/loops/batch/t │ │ raining_batch_loop.py:88 in advance │ │ │ │ 85 │ │ │ optimizers = _get_active_optimizers( │ │ 86 │ │ │ │ self.trainer.optimizers, self.trainer.optimizer_frequencies, kwargs.get( │ │ 87 │ │ │ ) │ │ ❱ 88 │ │ │ outputs = self.optimizer_loop.run(optimizers, kwargs) │ │ 89 │ │ else: │ │ 90 │ │ │ outputs = self.manual_loop.run(kwargs) │ │ 91 │ │ if outputs: │ │ │ │ /data/z/zhangxiaoyuan/conda/envs/glm/lib/python3.8/site-packages/pytorch_lightning/loops/loop.py │ │ :199 in run │ │ │ │ 196 │ │ while not self.done: │ │ 197 │ │ │ try: │ │ 198 │ │ │ │ self.on_advance_start(*args, *kwargs) │ │ ❱ 199 │ │ │ │ self.advance(args, kwargs) │ │ 200 │ │ │ │ self.on_advance_end() │ │ 201 │ │ │ │ self._restarting = False │ │ 202 │ │ │ except StopIteration: │ │ │ │ /data/z/zhangxiaoyuan/conda/envs/glm/lib/python3.8/site-packages/pytorch_lightning/loops/optimiz │ │ ation/optimizer_loop.py:202 in advance │ │ │ │ 199 │ def advance(self, optimizers: List[Tuple[int, Optimizer]], kwargs: OrderedDict) -> N │ │ 200 │ │ kwargs = self._build_kwargs(kwargs, self.optimizer_idx, self._hiddens) │ │ 201 │ │ │ │ ❱ 202 │ │ result = self._run_optimization(kwargs, self._optimizers[self.optim_progress.opt │ │ 203 │ │ if result.loss is not None: │ │ 204 │ │ │ # automatic optimization assumes a loss needs to be returned for extras to b │ │ 205 │ │ │ # would be skipped otherwise │ │ │ │ /data/z/zhangxiaoyuan/conda/envs/glm/lib/python3.8/site-packages/pytorch_lightning/loops/optimiz │ │ ation/optimizer_loop.py:249 in _run_optimization │ │ │ │ 246 │ │ # gradient update with accumulated gradients │ │ 247 │ │ else: │ │ 248 │ │ │ # the batch_idx is optional with inter-batch parallelism │ │ ❱ 249 │ │ │ self._optimizer_step(optimizer, opt_idx, kwargs.get("batch_idx", 0), closure │ │ 250 │ │ │ │ 251 │ │ result = closure.consume_result() │ │ 252 │ │ │ │ /data/z/zhangxiaoyuan/conda/envs/glm/lib/python3.8/site-packages/pytorch_lightning/loops/optimiz │ │ ation/optimizer_loop.py:370 in _optimizer_step │ │ │ │ 367 │ │ │ │ " return True." │ │ 368 │ │ │ ) │ │ 369 │ │ │ kwargs["using_native_amp"] = isinstance(self.trainer.precision_plugin, Mixed │ │ ❱ 370 │ │ self.trainer._call_lightning_module_hook( │ │ 371 │ │ │ "optimizer_step", │ │ 372 │ │ │ self.trainer.current_epoch, │ │ 373 │ │ │ batch_idx, │ │ │ │ /data/z/zhangxiaoyuan/conda/envs/glm/lib/python3.8/site-packages/pytorch_lightning/trainer/train │ │ er.py:1356 in _call_lightning_module_hook │ │ │ │ 1353 │ │ pl_module._current_fx_name = hook_name │ │ 1354 │ │ │ │ 1355 │ │ with self.profiler.profile(f"[LightningModule]{pl_module.class.name}.{ho │ │ ❱ 1356 │ │ │ output = fn(*args, kwargs) │ │ 1357 │ │ │ │ 1358 │ │ # restore current_fx when nested context │ │ 1359 │ │ pl_module._current_fx_name = prev_fx_name │ │ │ │ /data/z/zhangxiaoyuan/conda/envs/glm/lib/python3.8/site-packages/pytorch_lightning/core/module.p │ │ y:1742 in optimizer_step │ │ │ │ 1739 │ │ │ │ │ │ pg["lr"] = lr_scale self.learning_rate │ │ 1740 │ │ │ │ 1741 │ │ """ │ │ ❱ 1742 │ │ optimizer.step(closure=optimizer_closure) │ │ 1743 │ │ │ 1744 │ def optimizer_zero_grad(self, epoch: int, batch_idx: int, optimizer: Optimizer, opti │ │ 1745 │ │ """Override this method to change the default behaviour of `optimizer.zero_grad │ │ │ │ /data/z/zhangxiaoyuan/conda/envs/glm/lib/python3.8/site-packages/pytorch_lightning/core/optimize │ │ r.py:169 in step │ │ │ │ 166 │ │ │ raise MisconfigurationException("Whenoptimizer.step(closure)` is called, t │ │ 167 │ │ │ │ 168 │ │ assert self._strategy is not None │ │ ❱ 169 │ │ step_output = self._strategy.optimizer_step(self._optimizer, self._optimizer_idx │ │ 170 │ │ │ │ 171 │ │ self._on_after_step() │ │ 172 │ │ │ │ /data/z/zhangxiaoyuan/conda/envs/glm/lib/python3.8/site-packages/pytorch_lightning/strategies/st │ │ rategy.py:234 in optimizer_step │ │ │ │ 231 │ │ model = model or self.lightning_module │ │ 232 │ │ # TODO(fabric): remove assertion once strategy's optimizer_step typing is fixed │ │ 233 │ │ assert isinstance(model, pl.LightningModule) │ │ ❱ 234 │ │ return self.precision_plugin.optimizer_step( │ │ 235 │ │ │ optimizer, model=model, optimizer_idx=opt_idx, closure=closure, kwargs │ │ 236 │ │ ) │ │ 237 │ │ │ │ /data/z/zhangxiaoyuan/conda/envs/glm/lib/python3.8/site-packages/pytorch_lightning/plugins/preci │ │ sion/precision_plugin.py:119 in optimizer_step │ │ │ │ 116 │ ) -> Any: │ │ 117 │ │ """Hook to run the optimizer step.""" │ │ 118 │ │ closure = partial(self._wrap_closure, model, optimizer, optimizer_idx, closure) │ │ ❱ 119 │ │ return optimizer.step(closure=closure, kwargs) │ │ 120 │ │ │ 121 │ def _track_grad_norm(self, trainer: "pl.Trainer") -> None: │ │ 122 │ │ if trainer.track_grad_norm == -1: │ │ │ │ /data/z/zhangxiaoyuan/conda/envs/glm/lib/python3.8/site-packages/torch/optim/lr_scheduler.py:69 │ │ in wrapper │ │ │ │ 66 │ │ │ │ instance = instance_ref() │ │ 67 │ │ │ │ instance._step_count += 1 │ │ 68 │ │ │ │ wrapped = func.get(instance, cls) │ │ ❱ 69 │ │ │ │ return wrapped(args, kwargs) │ │ 70 │ │ │ │ │ 71 │ │ │ # Note that the returned function here is no longer a bound method, │ │ 72 │ │ │ # so attributes like __func__ and __self__ no longer exist. │ │ │ │ /data/z/zhangxiaoyuan/conda/envs/glm/lib/python3.8/site-packages/torch/optim/optimizer.py:280 in │ │ wrapper │ │ │ │ 277 │ │ │ │ │ │ │ raise RuntimeError(f"{func} must return None or a tuple of ( │ │ 278 │ │ │ │ │ │ │ │ │ │ │ f"but got {result}.") │ │ 279 │ │ │ │ │ │ ❱ 280 │ │ │ │ out = func(*args, kwargs) │ │ 281 │ │ │ │ self._optimizer_step_code() │ │ 282 │ │ │ │ │ │ 283 │ │ │ │ # call optimizer step post hooks │ │ │ │ /data/z/zhangxiaoyuan/conda/envs/glm/lib/python3.8/site-packages/torch/utils/_contextlib.py:115 │ │ in decorate_context │ │ │ │ 112 │ @functools.wraps(func) │ │ 113 │ def decorate_context(*args, *kwargs): │ │ 114 │ │ with ctx_factory(): │ │ ❱ 115 │ │ │ return func(args, kwargs) │ │ 116 │ │ │ 117 │ return decorate_context │ │ 118 │ │ │ │ /data/z/zhangxiaoyuan/work/test/chatglm-fineturn/chatglm_finetuning/deep_training/nlp/optimizer/ │ │ lion/lion.py:72 in step │ │ │ │ 69 │ │ loss = None │ │ 70 │ │ if exists(closure): │ │ 71 │ │ │ with torch.enable_grad(): │ │ ❱ 72 │ │ │ │ loss = closure() │ │ 73 │ │ │ │ 74 │ │ for group in self.param_groups: │ │ 75 │ │ │ for p in filter(lambda p: exists(p.grad), group['params']): │ │ │ │ /data/z/zhangxiaoyuan/conda/envs/glm/lib/python3.8/site-packages/pytorch_lightning/plugins/preci │ │ sion/precision_plugin.py:105 in _wrap_closure │ │ │ │ 102 │ │ The closure (generally) runs backward so this allows inspecting gradients in │ │ 103 │ │ consistent with the PrecisionPlugin subclasses that cannot pass optimizer. │ │ 104 │ │ """ │ │ ❱ 105 │ │ closure_result = closure() │ │ 106 │ │ self._after_closure(model, optimizer, optimizer_idx) │ │ 107 │ │ return closure_result │ │ 108 │ │ │ │ /data/z/zhangxiaoyuan/conda/envs/glm/lib/python3.8/site-packages/pytorch_lightning/loops/optimiz │ │ ation/optimizer_loop.py:149 in __call__ │ │ │ │ 146 │ │ return step_output │ │ 147 │ │ │ 148 │ def __call__(self, *args: Any, **kwargs: Any) -> Optional[Tensor]: │ │ ❱ 149 │ │ self._result = self.closure(*args, **kwargs) │ │ 150 │ │ return self._result.loss │ │ 151 │ │ 152 │ │ │ │ /data/z/zhangxiaoyuan/conda/envs/glm/lib/python3.8/site-packages/pytorch_lightning/loops/optimiz │ │ ation/optimizer_loop.py:135 in closure │ │ │ │ 132 │ │ self._zero_grad_fn = zero_grad_fn │ │ 133 │ │ │ 134 │ def closure(self, *args: Any, **kwargs: Any) -> ClosureResult: │ │ ❱ 135 │ │ step_output = self._step_fn() │ │ 136 │ │ │ │ 137 │ │ if step_output.closure_loss is None: │ │ 138 │ │ │ self.warning_cache.warn("`training_step` returned `None`. If this was on pur │ │ │ │ /data/z/zhangxiaoyuan/conda/envs/glm/lib/python3.8/site-packages/pytorch_lightning/loops/optimiz │ │ ation/optimizer_loop.py:419 in _training_step │ │ │ │ 416 │ │ │ AClosureResult`` containing the training step output. │ │ 417 │ │ """ │ │ 418 │ │ # manually capture logged metrics │ │ ❱ 419 │ │ training_step_output = self.trainer._call_strategy_hook("training_step", kwargs │ │ 420 │ │ self.trainer.strategy.post_training_step() │ │ 421 │ │ │ │ 422 │ │ model_output = self.trainer._call_lightning_module_hook("training_step_end", tra │ │ │ │ /data/z/zhangxiaoyuan/conda/envs/glm/lib/python3.8/site-packages/pytorch_lightning/trainer/train │ │ er.py:1494 in _call_strategyhook │ │ │ │ 1491 │ │ │ return │ │ 1492 │ │ │ │ 1493 │ │ with self.profiler.profile(f"[Strategy]{self.strategy.class.name}.{hook │ │ ❱ 1494 │ │ │ output = fn(args, kwargs) │ │ 1495 │ │ │ │ 1496 │ │ # restore current_fx when nested context │ │ 1497 │ │ pl_module._current_fx_name = prev_fx_name │ │ │ │ /data/z/zhangxiaoyuan/conda/envs/glm/lib/python3.8/site-packages/pytorch_lightning/strategies/st │ │ rategy.py:378 in training_step │ │ │ │ 375 │ │ """ │ │ 376 │ │ with self.precision_plugin.train_step_context(): │ │ 377 │ │ │ assert isinstance(self.model, TrainingStep) │ │ ❱ 378 │ │ │ return self.model.training_step(*args, kwargs) │ │ 379 │ │ │ 380 │ def post_training_step(self) -> None: │ │ 381 │ │ pass │ │ │ │ /data/z/zhangxiaoyuan/work/test/chatglm-fineturn/chatglm_finetuning/deep_training/nlp/models/tra │ │ nsformer.py:564 in training_step │ │ │ │ 561 │ │ │ 562 │ def training_step(self, batch): │ │ 563 │ │ if isinstance(batch, dict): │ │ ❱ 564 │ │ │ outputs = self.compute_loss(batch) │ │ 565 │ │ else: │ │ 566 │ │ │ outputs = self.compute_loss(*dict(batch)) │ │ 567 │ │ loss = outputs[0] │ │ │ │ /data/z/zhangxiaoyuan/work/test/chatglm-fineturn/chatglm_finetuning/deep_training/nlp/models/tra │ │ nsformer.py:395 in compute_loss │ │ │ │ 392 │ │ │ 393 │ def compute_loss(self,args, kwargs): │ │ 394 │ │ kwargs.update(dict(args)) │ │ ❱ 395 │ │ return self.model.compute_loss(kwargs) │ │ 396 │ │ │ 397 │ │ │ 398 │ def forward(self,*args, *kwargs): │ │ │ │ /data/z/zhangxiaoyuan/work/test/chatglm-fineturn/chatglm_finetuning/deep_training/nlp/models/tra │ │ nsformer.py:141 in compute_loss │ │ │ │ 138 │ │ return self.model(args,batch) │ │ 139 │ │ │ 140 │ def compute_loss(self, *args,batch) -> tuple: │ │ ❱ 141 │ │ return self.model(*args,*batch) │ │ 142 │ │ │ 143 │ def post_init(self): │ │ 144 │ │ return self.model.post_init() │ │ │ │ /data/z/zhangxiaoyuan/conda/envs/glm/lib/python3.8/site-packages/torch/nn/modules/module.py:1501 │ │ in _call_impl │ │ │ │ 1498 │ │ if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks │ │ 1499 │ │ │ │ or _global_backward_pre_hooks or _global_backward_hooks │ │ 1500 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │ │ ❱ 1501 │ │ │ return forward_call(args, kwargs) │ │ 1502 │ │ # Do not call functions when jit is used │ │ 1503 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │ │ 1504 │ │ backward_pre_hooks = [] │ │ │ │ /data/z/zhangxiaoyuan/work/test/chatglm-fineturn/chatglm_finetuning/deep_training/nlp/models/cha │ │ tglm/init.py:1074 in forward │ │ │ │ 1071 │ │ use_cache = use_cache if use_cache is not None else self.config.use_cache │ │ 1072 │ │ return_dict = return_dict if return_dict is not None else self.config.use_return │ │ 1073 │ │ │ │ ❱ 1074 │ │ transformer_outputs = self.transformer( │ │ 1075 │ │ │ input_ids=input_ids, │ │ 1076 │ │ │ position_ids=position_ids, │ │ 1077 │ │ │ attention_mask=attention_mask, │ │ │ │ /data/z/zhangxiaoyuan/conda/envs/glm/lib/python3.8/site-packages/torch/nn/modules/module.py:1501 │ │ in _call_impl │ │ │ │ 1498 │ │ if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks │ │ 1499 │ │ │ │ or _global_backward_pre_hooks or _global_backward_hooks │ │ 1500 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │ │ ❱ 1501 │ │ │ return forward_call(*args, kwargs) │ │ 1502 │ │ # Do not call functions when jit is used │ │ 1503 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │ │ 1504 │ │ backward_pre_hooks = [] │ │ │ │ /data/z/zhangxiaoyuan/work/test/chatglm-fineturn/chatglm_finetuning/deep_training/nlp/models/cha │ │ tglm/init.py:922 in forward │ │ │ │ 919 │ │ │ if output_hidden_states: │ │ 920 │ │ │ │ all_hidden_states = all_hidden_states + (hidden_states,) │ │ 921 │ │ │ │ │ ❱ 922 │ │ │ layer_ret = layer( │ │ 923 │ │ │ │ hidden_states, │ │ 924 │ │ │ │ position_ids=position_ids, │ │ 925 │ │ │ │ attention_mask=attention_mask, │ │ │ │ /data/z/zhangxiaoyuan/conda/envs/glm/lib/python3.8/site-packages/torch/nn/modules/module.py:1501 │ │ in _call_impl │ │ │ │ 1498 │ │ if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks │ │ 1499 │ │ │ │ or _global_backward_pre_hooks or _global_backward_hooks │ │ 1500 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │ │ ❱ 1501 │ │ │ return forward_call(*args, *kwargs) │ │ 1502 │ │ # Do not call functions when jit is used │ │ 1503 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │ │ 1504 │ │ backward_pre_hooks = [] │ │ │ │ /data/z/zhangxiaoyuan/work/test/chatglm-fineturn/chatglm_finetuning/deep_training/nlp/models/cha │ │ tglm/init.py:596 in forward │ │ │ │ 593 │ │ │ │ 594 │ │ # Layer norm at the begining of the transformer layer. │ │ 595 │ │ # [seq_len, batch, hidden_size] │ │ ❱ 596 │ │ attention_input = self.input_layernorm(hidden_states) │ │ 597 │ │ │ │ 598 │ │ # Self attention. │ │ 599 │ │ attention_outputs = self.attention( │ │ │ │ /data/z/zhangxiaoyuan/conda/envs/glm/lib/python3.8/site-packages/torch/nn/modules/module.py:1501 │ │ in _call_impl │ │ │ │ 1498 │ │ if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks │ │ 1499 │ │ │ │ or _global_backward_pre_hooks or _global_backward_hooks │ │ 1500 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │ │ ❱ 1501 │ │ │ return forward_call(args, kwargs) │ │ 1502 │ │ # Do not call functions when jit is used │ │ 1503 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │ │ 1504 │ │ backward_prehooks = [] │ │ │ │ /data/z/zhangxiaoyuan/conda/envs/glm/lib/python3.8/site-packages/torch/nn/modules/normalization. │ │ py:190 in forward │ │ │ │ 187 │ │ │ init.zeros(self.bias) │ │ 188 │ │ │ 189 │ def forward(self, input: Tensor) -> Tensor: │ │ ❱ 190 │ │ return F.layer_norm( │ │ 191 │ │ │ input, self.normalized_shape, self.weight, self.bias, self.eps) │ │ 192 │ │ │ 193 │ def extra_repr(self) -> str: │ │ │ │ /data/z/zhangxiaoyuan/conda/envs/glm/lib/python3.8/site-packages/torch/nn/functional.py:2515 in │ │ layer_norm │ │ │ │ 2512 │ │ return handle_torch_function( │ │ 2513 │ │ │ layer_norm, (input, weight, bias), input, normalized_shape, weight=weight, b │ │ 2514 │ │ ) │ │ ❱ 2515 │ return torch.layer_norm(input, normalized_shape, weight, bias, eps, torch.backends.c │ │ 2516 │ │ 2517 │ │ 2518 def group_norm( │ ╰──────────────────────────────────────────────────────────────────────────────────────────────────╯ RuntimeError: "LayerNormKernelImpl" not implemented for 'Half'

ssbuild commented 1 year ago

1 . 请确认 使用的是当前仓库版本最新代码, 2 . 检查 config/config_small.json "precision": 16