Using multimodal datasets to train ovis1_6-gemma2-9b, an error occurred: RuntimeError: self and mat2 must have the same dtype, but got BFloat16 and Char #2514
Train: 0%| | 0/1980 [00:00<?, ?it/s]/opt/conda/envs/www2025/lib/python3.12/site-packages/bitsandbytes/autograd/_functions.py:316: UserWarning: MatMul8bitLt: inputs will be cast from torch.bfloat16 to float16 during quantization
warnings.warn(f"MatMul8bitLt: inputs will be cast from {A.dtype} to float16 during quantization")
Traceback (most recent call last):
File "/home/tom/fssd/WWW2025/swift/swift/cli/sft.py", line 5, in
sft_main()
File "/home/tom/fssd/WWW2025/swift/swift/utils/run_utils.py", line 32, in x_main
result = llm_x(args, kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/home/tom/fssd/WWW2025/swift/swift/llm/sft.py", line 546, in llm_sft
return trainer_train(args, model, template, train_dataset, val_dataset, callbacks=callbacks, msg=msg)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tom/fssd/WWW2025/swift/swift/llm/sft.py", line 496, in trainer_train
trainer.train(training_args.resume_from_checkpoint)
File "/home/tom/fssd/WWW2025/swift/swift/trainers/mixin.py", line 493, in train
res = super().train(resume_from_checkpoint, args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/conda/envs/www2025/lib/python3.12/site-packages/transformers/trainer.py", line 2122, in train
return inner_training_loop(
^^^^^^^^^^^^^^^^^^^^
File "/opt/conda/envs/www2025/lib/python3.12/site-packages/transformers/trainer.py", line 2474, in _inner_training_loop
tr_loss_step = self.training_step(model, inputs, num_items_in_batch)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/conda/envs/www2025/lib/python3.12/site-packages/transformers/trainer.py", line 3572, in training_step
loss = self.compute_loss(model, inputs, num_items_in_batch=num_items_in_batch)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tom/fssd/WWW2025/swift/swift/trainers/trainers.py", line 161, in compute_loss
outputs = model(inputs)
^^^^^^^^^^^^^^^
File "/opt/conda/envs/www2025/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
return self._call_impl(args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/conda/envs/www2025/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1844, in _call_impl
return inner()
^^^^^^^
File "/opt/conda/envs/www2025/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1769, in inner
args_kwargs_result = hook(self, args, kwargs) # type: ignore[misc]
^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tom/fssd/WWW2025/swift/swift/llm/utils/template.py", line 350, in _pre_forward_hook
res_extra.append(self._post_encode(module, d))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tom/fssd/WWW2025/swift/swift/llm/utils/template.py", line 1355, in _postencode
, inputsembeds, labels, = self.model.merge_multimodal(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tom/.cache/huggingface/modules/transformers_modules/Ovis1.6-Gemma2-9B/modeling_ovis.py", line 376, in merge_multimodal
visual_tokens = self.visual_tokenizer(torch.cat([x for x in pixel_values], dim=0))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/conda/envs/www2025/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/conda/envs/www2025/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
return forward_call(*args, *kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/conda/envs/www2025/lib/python3.12/site-packages/accelerate/hooks.py", line 170, in new_forward
output = module._old_forward(args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tom/.cache/huggingface/modules/transformers_modules/Ovis1.6-Gemma2-9B/modeling_ovis.py", line 223, in forward
features = self.encode(pixel_values)
^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tom/.cache/huggingface/modules/transformers_modules/Ovis1.6-Gemma2-9B/modeling_ovis.py", line 198, in encode
output = self.backbone(pixel_values, output_hidden_states=True, return_dict=True)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/conda/envs/www2025/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/conda/envs/www2025/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
return forward_call(*args, *kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/conda/envs/www2025/lib/python3.12/site-packages/accelerate/hooks.py", line 170, in new_forward
output = module._old_forward(args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/conda/envs/www2025/lib/python3.12/site-packages/transformers/models/siglip/modeling_siglip.py", line 1190, in forward
return self.vision_model(
^^^^^^^^^^^^^^^^^^
File "/opt/conda/envs/www2025/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/conda/envs/www2025/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
return forward_call(*args, *kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/conda/envs/www2025/lib/python3.12/site-packages/accelerate/hooks.py", line 170, in new_forward
output = module._old_forward(args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/conda/envs/www2025/lib/python3.12/site-packages/transformers/models/siglip/modeling_siglip.py", line 1101, in forward
pooler_output = self.head(last_hidden_state) if self.use_head else None
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/conda/envs/www2025/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/conda/envs/www2025/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
return forward_call(*args, *kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/conda/envs/www2025/lib/python3.12/site-packages/accelerate/hooks.py", line 170, in new_forward
output = module._old_forward(args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/conda/envs/www2025/lib/python3.12/site-packages/transformers/models/siglip/modeling_siglip.py", line 1128, in forward
hidden_state = self.attention(probe, hidden_state, hidden_state)[0]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/conda/envs/www2025/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/conda/envs/www2025/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
return forward_call(*args, *kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/conda/envs/www2025/lib/python3.12/site-packages/accelerate/hooks.py", line 170, in new_forward
output = module._old_forward(args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/conda/envs/www2025/lib/python3.12/site-packages/torch/nn/modules/activation.py", line 1368, in forward
attn_output, attn_output_weights = F.multi_head_attention_forward(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/conda/envs/www2025/lib/python3.12/site-packages/torch/nn/functional.py", line 6251, in multi_head_attention_forward
attn_output = linear(attn_output, out_proj_weight, out_proj_bias)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
RuntimeError: self and mat2 must have the same dtype, but got BFloat16 and Char
Loading checkpoint shards: 0%| | 0/5 [00:00<?, ?it/s] Loading checkpoint shards: 20%|██ | 1/5 [00:04<00:18, 4.72s/it] Loading checkpoint shards: 40%|████ | 2/5 [00:10<00:16, 5.53s/it] Loading checkpoint shards: 60%|██████ | 3/5 [00:16<00:11, 5.60s/it] Loading checkpoint shards: 80%|████████ | 4/5 [00:23<00:06, 6.30s/it] Loading checkpoint shards: 100%|██████████| 5/5 [00:24<00:00, 4.25s/it] Loading checkpoint shards: 100%|██████████| 5/5 [00:24<00:00, 4.90s/it] [INFO:swift] model.max_model_len: 8192 [INFO:swift] model.hf_device_map: {'': device(type='cuda', index=0)} [INFO:swift] model_config: OvisConfig { "_attn_implementation_autoset": true, "_name_or_path": "/home/tom/fssd/WWW2025/Ovis1.6-Gemma2-9B", "architectures": [ "Ovis" ], "auto_map": { "AutoConfig": "configuration_ovis.OvisConfig", "AutoModelForCausalLM": "modeling_ovis.Ovis" }, "conversation_formatter_class": "GemmaConversationFormatter", "disable_tie_weight": false, "hidden_size": 3584, "keys_to_ignore_at_inference": [ "past_key_values" ], "llm_attn_implementation": "eager", "llm_config": { "_attn_implementation_autoset": false, "_name_or_path": "google/gemma-2-9b-it", "add_cross_attention": false, "architectures": [ "Gemma2ForCausalLM" ], "attention_bias": false, "attention_dropout": 0.0, "attn_logit_softcapping": 50.0, "bad_words_ids": null, "begin_suppress_tokens": null, "bos_token_id": 2, "cache_implementation": "hybrid", "chunk_size_feed_forward": 0, "cross_attention_hidden_size": null, "decoder_start_token_id": null, "diversity_penalty": 0.0, "do_sample": false, "early_stopping": false, "encoder_no_repeat_ngram_size": 0, "eos_token_id": 1, "exponential_decay_length_penalty": null, "final_logit_softcapping": 30.0, "finetuning_task": null, "forced_bos_token_id": null, "forced_eos_token_id": null, "head_dim": 256, "hidden_act": "gelu_pytorch_tanh", "hidden_activation": "gelu_pytorch_tanh", "hidden_size": 3584, "id2label": { "0": "LABEL_0", "1": "LABEL_1" }, "initializer_range": 0.02, "intermediate_size": 14336, "is_decoder": false, "is_encoder_decoder": false, "label2id": { "LABEL_0": 0, "LABEL_1": 1 }, "length_penalty": 1.0, "max_length": 20, "max_position_embeddings": 8192, "min_length": 0, "model_type": "gemma2", "no_repeat_ngram_size": 0, "num_attention_heads": 16, "num_beam_groups": 1, "num_beams": 1, "num_hidden_layers": 42, "num_key_value_heads": 8, "num_return_sequences": 1, "output_attentions": false, "output_hidden_states": false, "output_scores": false, "pad_token_id": 0, "prefix": null, "problem_type": null, "pruned_heads": {}, "query_pre_attn_scalar": 256, "remove_invalid_values": false, "repetition_penalty": 1.0, "return_dict": true, "return_dict_in_generate": false, "rms_norm_eps": 1e-06, "rope_theta": 10000.0, "sep_token_id": null, "sliding_window": 4096, "sliding_window_size": 4096, "suppress_tokens": null, "task_specific_params": null, "temperature": 1.0, "tf_legacy_loss": false, "tie_encoder_decoder": false, "tie_word_embeddings": true, "tokenizer_class": null, "top_k": 50, "top_p": 1.0, "torch_dtype": "bfloat16", "torchscript": false, "typical_p": 1.0, "use_bfloat16": false, "use_cache": true, "vocab_size": 256000 }, "model_type": "ovis", "multimodal_max_length": 8192, "quantization_config": { "_load_in_4bit": false, "_load_in_8bit": true, "bnb_4bit_compute_dtype": "bfloat16", "bnb_4bit_quant_storage": "uint8", "bnb_4bit_quant_type": "nf4", "bnb_4bit_use_double_quant": true, "llm_int8_enable_fp32_cpu_offload": false, "llm_int8_has_fp16_weight": false, "llm_int8_skip_modules": null, "llm_int8_threshold": 6.0, "load_in_4bit": false, "load_in_8bit": true, "quant_method": "bitsandbytes" }, "torch_dtype": "bfloat16", "transformers_version": "4.46.1", "use_cache": true, "visual_tokenizer_config": { "_attn_implementation_autoset": false, "_name_or_path": "", "add_cross_attention": false, "architectures": null, "backbone_config": { "_attn_implementation_autoset": false, "_name_or_path": "google/siglip-so400m-patch14-384", "add_cross_attention": false, "architectures": null, "attention_dropout": 0.0, "bad_words_ids": null, "begin_suppress_tokens": null, "bos_token_id": null, "chunk_size_feed_forward": 0, "cross_attention_hidden_size": null, "decoder_start_token_id": null, "diversity_penalty": 0.0, "do_sample": false, "early_stopping": false, "encoder_no_repeat_ngram_size": 0, "eos_token_id": null, "exponential_decay_length_penalty": null, "finetuning_task": null, "forced_bos_token_id": null, "forced_eos_token_id": null, "hidden_act": "gelu_pytorch_tanh", "hidden_size": 1152, "id2label": { "0": "LABEL_0", "1": "LABEL_1" }, "image_size": 384, "intermediate_size": 4304, "is_decoder": false, "is_encoder_decoder": false, "label2id": { "LABEL_0": 0, "LABEL_1": 1 }, "layer_norm_eps": 1e-06, "length_penalty": 1.0, "max_length": 20, "min_length": 0, "model_type": "siglip_vision_model", "no_repeat_ngram_size": 0, "num_attention_heads": 16, "num_beam_groups": 1, "num_beams": 1, "num_channels": 3, "num_hidden_layers": 27, "num_return_sequences": 1, "output_attentions": false, "output_hidden_states": false, "output_scores": false, "pad_token_id": null, "patch_size": 14, "prefix": null, "problem_type": null, "pruned_heads": {}, "remove_invalid_values": false, "repetition_penalty": 1.0, "return_dict": true, "return_dict_in_generate": false, "sep_token_id": null, "suppress_tokens": null, "task_specific_params": null, "temperature": 1.0, "tf_legacy_loss": false, "tie_encoder_decoder": false, "tie_word_embeddings": true, "tokenizer_class": null, "top_k": 50, "top_p": 1.0, "torch_dtype": null, "torchscript": false, "typical_p": 1.0, "use_bfloat16": false }, "backbone_kwargs": {}, "bad_words_ids": null, "begin_suppress_tokens": null, "bos_token_id": null, "chunk_size_feed_forward": 0, "cross_attention_hidden_size": null, "decoder_start_token_id": null, "depths": null, "diversity_penalty": 0.0, "do_sample": false, "drop_cls_token": false, "early_stopping": false, "encoder_no_repeat_ngram_size": 0, "eos_token_id": null, "exponential_decay_length_penalty": null, "finetuning_task": null, "forced_bos_token_id": null, "forced_eos_token_id": null, "hidden_stride": 2, "id2label": { "0": "LABEL_0", "1": "LABEL_1" }, "is_decoder": false, "is_encoder_decoder": false, "label2id": { "LABEL_0": 0, "LABEL_1": 1 }, "length_penalty": 1.0, "max_length": 20, "min_length": 0, "model_type": "siglip_visual_tokenizer", "no_repeat_ngram_size": 0, "num_beam_groups": 1, "num_beams": 1, "num_return_sequences": 1, "output_attentions": false, "output_hidden_states": false, "output_scores": false, "pad_token_id": null, "prefix": null, "problem_type": null, "pruned_heads": {}, "remove_invalid_values": false, "repetition_penalty": 1.0, "return_dict": true, "return_dict_in_generate": false, "sep_token_id": null, "suppress_tokens": null, "task_specific_params": null, "tau": 1.0, "temperature": 1.0, "tf_legacy_loss": false, "tie_encoder_decoder": false, "tie_word_embeddings": true, "tokenize_function": "softmax", "tokenizer_class": null, "top_k": 50, "top_p": 1.0, "torch_dtype": null, "torchscript": false, "typical_p": 1.0, "use_bfloat16": false, "vocab_size": 65536 } }
[INFO:swift] model.generation_config: GenerationConfig { "bos_token_id": 2, "eos_token_id": 1, "max_new_tokens": 2048, "pad_token_id": 0 }
[INFO:swift] Setting model.config.use_cache: False [INFO:swift] target_modules: ^(llm)(?!.(lm_head|output|emb|wte|shared)). [INFO:swift] modules_to_save: [] [INFO:swift] lora_config: get_wrapped_class..PeftWrapper(peft_type=<PeftType.LORA: 'LORA'>, auto_mapping=None, base_model_name_or_path='/home/tom/fssd/WWW2025/Ovis1.6-Gemma2-9B', revision=None, task_type='CAUSAL_LM', inference_mode=False, r=8, target_modules='^(llm)(?!.(lm_head|output|emb|wte|shared)).', lora_alpha=32, lora_dropout=0.05, fan_in_fan_out=False, bias='none', use_rslora=False, modules_to_save=[], init_lora_weights=True, layers_to_transform=None, layers_pattern=None, rank_pattern={}, alpha_pattern={}, megatron_config=None, megatron_core='megatron.core', loftq_config={}, use_dora=False, layer_replication=None, runtime_config=LoraRuntimeConfig(ephemeral_gpu_offload=False), lora_dtype=None, lorap_lr_ratio=None, lorap_emb_lr=1e-06)
[INFO:swift] [base_model.model.llm.model.embed_tokens.weight]: requires_grad=False, dtype=torch.bfloat16, device=cuda:0
[INFO:swift] [base_model.model.llm.model.layers.0.self_attn.q_proj.base_layer.weight]: requires_grad=False, dtype=torch.int8, device=cuda:0
[INFO:swift] [base_model.model.llm.model.layers.0.self_attn.q_proj.lora_A.default.weight]: requires_grad=True, dtype=torch.float32, device=cuda:0
[INFO:swift] [base_model.model.llm.model.layers.0.self_attn.q_proj.lora_B.default.weight]: requires_grad=True, dtype=torch.float32, device=cuda:0
[INFO:swift] [base_model.model.llm.model.layers.0.self_attn.k_proj.base_layer.weight]: requires_grad=False, dtype=torch.int8, device=cuda:0
[INFO:swift] [base_model.model.llm.model.layers.0.self_attn.k_proj.lora_A.default.weight]: requires_grad=True, dtype=torch.float32, device=cuda:0
[INFO:swift] [base_model.model.llm.model.layers.0.self_attn.k_proj.lora_B.default.weight]: requires_grad=True, dtype=torch.float32, device=cuda:0
[INFO:swift] [base_model.model.llm.model.layers.0.self_attn.v_proj.base_layer.weight]: requires_grad=False, dtype=torch.int8, device=cuda:0
[INFO:swift] [base_model.model.llm.model.layers.0.self_attn.v_proj.lora_A.default.weight]: requires_grad=True, dtype=torch.float32, device=cuda:0
[INFO:swift] [base_model.model.llm.model.layers.0.self_attn.v_proj.lora_B.default.weight]: requires_grad=True, dtype=torch.float32, device=cuda:0
[INFO:swift] [base_model.model.llm.model.layers.0.self_attn.o_proj.base_layer.weight]: requires_grad=False, dtype=torch.int8, device=cuda:0
[INFO:swift] [base_model.model.llm.model.layers.0.self_attn.o_proj.lora_A.default.weight]: requires_grad=True, dtype=torch.float32, device=cuda:0
[INFO:swift] [base_model.model.llm.model.layers.0.self_attn.o_proj.lora_B.default.weight]: requires_grad=True, dtype=torch.float32, device=cuda:0
[INFO:swift] [base_model.model.llm.model.layers.0.mlp.gate_proj.base_layer.weight]: requires_grad=False, dtype=torch.int8, device=cuda:0
[INFO:swift] [base_model.model.llm.model.layers.0.mlp.gate_proj.lora_A.default.weight]: requires_grad=True, dtype=torch.float32, device=cuda:0
[INFO:swift] [base_model.model.llm.model.layers.0.mlp.gate_proj.lora_B.default.weight]: requires_grad=True, dtype=torch.float32, device=cuda:0
[INFO:swift] [base_model.model.llm.model.layers.0.mlp.up_proj.base_layer.weight]: requires_grad=False, dtype=torch.int8, device=cuda:0
[INFO:swift] [base_model.model.llm.model.layers.0.mlp.up_proj.lora_A.default.weight]: requires_grad=True, dtype=torch.float32, device=cuda:0
[INFO:swift] [base_model.model.llm.model.layers.0.mlp.up_proj.lora_B.default.weight]: requires_grad=True, dtype=torch.float32, device=cuda:0
[INFO:swift] [base_model.model.llm.model.layers.0.mlp.down_proj.base_layer.weight]: requires_grad=False, dtype=torch.int8, device=cuda:0
[INFO:swift] ...
[INFO:swift] PeftModelForCausalLM(
(base_model): LoraModel(
(model): Ovis(
(llm): Gemma2ForCausalLM(
(model): Gemma2Model(
(embed_tokens): Embedding(256000, 3584, padding_idx=0)
(layers): ModuleList(
(0-41): 42 x Gemma2DecoderLayer(
(self_attn): Gemma2Attention(
(q_proj): lora.Linear8bitLt(
(base_layer): Linear8bitLt(in_features=3584, out_features=4096, bias=False)
(lora_dropout): ModuleDict(
(default): Dropout(p=0.05, inplace=False)
)
(lora_A): ModuleDict(
(default): Linear(in_features=3584, out_features=8, bias=False)
)
(lora_B): ModuleDict(
(default): Linear(in_features=8, out_features=4096, bias=False)
)
(lora_embedding_A): ParameterDict()
(lora_embedding_B): ParameterDict()
(lora_magnitude_vector): ModuleDict()
)
(k_proj): lora.Linear8bitLt(
(base_layer): Linear8bitLt(in_features=3584, out_features=2048, bias=False)
(lora_dropout): ModuleDict(
(default): Dropout(p=0.05, inplace=False)
)
(lora_A): ModuleDict(
(default): Linear(in_features=3584, out_features=8, bias=False)
)
(lora_B): ModuleDict(
(default): Linear(in_features=8, out_features=2048, bias=False)
)
(lora_embedding_A): ParameterDict()
(lora_embedding_B): ParameterDict()
(lora_magnitude_vector): ModuleDict()
)
(v_proj): lora.Linear8bitLt(
(base_layer): Linear8bitLt(in_features=3584, out_features=2048, bias=False)
(lora_dropout): ModuleDict(
(default): Dropout(p=0.05, inplace=False)
)
(lora_A): ModuleDict(
(default): Linear(in_features=3584, out_features=8, bias=False)
)
(lora_B): ModuleDict(
(default): Linear(in_features=8, out_features=2048, bias=False)
)
(lora_embedding_A): ParameterDict()
(lora_embedding_B): ParameterDict()
(lora_magnitude_vector): ModuleDict()
)
(o_proj): lora.Linear8bitLt(
(base_layer): Linear8bitLt(in_features=4096, out_features=3584, bias=False)
(lora_dropout): ModuleDict(
(default): Dropout(p=0.05, inplace=False)
)
(lora_A): ModuleDict(
(default): Linear(in_features=4096, out_features=8, bias=False)
)
(lora_B): ModuleDict(
(default): Linear(in_features=8, out_features=3584, bias=False)
)
(lora_embedding_A): ParameterDict()
(lora_embedding_B): ParameterDict()
(lora_magnitude_vector): ModuleDict()
)
(rotary_emb): Gemma2RotaryEmbedding()
)
(mlp): Gemma2MLP(
(gate_proj): lora.Linear8bitLt(
(base_layer): Linear8bitLt(in_features=3584, out_features=14336, bias=False)
(lora_dropout): ModuleDict(
(default): Dropout(p=0.05, inplace=False)
)
(lora_A): ModuleDict(
(default): Linear(in_features=3584, out_features=8, bias=False)
)
(lora_B): ModuleDict(
(default): Linear(in_features=8, out_features=14336, bias=False)
)
(lora_embedding_A): ParameterDict()
(lora_embedding_B): ParameterDict()
(lora_magnitude_vector): ModuleDict()
)
(up_proj): lora.Linear8bitLt(
(base_layer): Linear8bitLt(in_features=3584, out_features=14336, bias=False)
(lora_dropout): ModuleDict(
(default): Dropout(p=0.05, inplace=False)
)
(lora_A): ModuleDict(
(default): Linear(in_features=3584, out_features=8, bias=False)
)
(lora_B): ModuleDict(
(default): Linear(in_features=8, out_features=14336, bias=False)
)
(lora_embedding_A): ParameterDict()
(lora_embedding_B): ParameterDict()
(lora_magnitude_vector): ModuleDict()
)
(down_proj): lora.Linear8bitLt(
(base_layer): Linear8bitLt(in_features=14336, out_features=3584, bias=False)
(lora_dropout): ModuleDict(
(default): Dropout(p=0.05, inplace=False)
)
(lora_A): ModuleDict(
(default): Linear(in_features=14336, out_features=8, bias=False)
)
(lora_B): ModuleDict(
(default): Linear(in_features=8, out_features=3584, bias=False)
)
(lora_embedding_A): ParameterDict()
(lora_embedding_B): ParameterDict()
(lora_magnitude_vector): ModuleDict()
)
(act_fn): PytorchGELUTanh()
)
(input_layernorm): Gemma2RMSNorm((3584,), eps=1e-06)
(pre_feedforward_layernorm): Gemma2RMSNorm((3584,), eps=1e-06)
(post_feedforward_layernorm): Gemma2RMSNorm((3584,), eps=1e-06)
(post_attention_layernorm): Gemma2RMSNorm((3584,), eps=1e-06)
)
)
(norm): Gemma2RMSNorm((3584,), eps=1e-06)
)
(lm_head): Linear(in_features=3584, out_features=256000, bias=False)
)
(visual_tokenizer): SiglipVisualTokenizer(
(backbone): SiglipVisionModel(
(vision_model): SiglipVisionTransformer(
(embeddings): SiglipVisionEmbeddings(
(patch_embedding): Conv2d(3, 1152, kernel_size=(14, 14), stride=(14, 14), padding=valid)
(position_embedding): Embedding(729, 1152)
)
(encoder): SiglipEncoder(
(layers): ModuleList(
(0-26): 27 x SiglipEncoderLayer(
(self_attn): SiglipSdpaAttention(
(k_proj): Linear8bitLt(in_features=1152, out_features=1152, bias=True)
(v_proj): Linear8bitLt(in_features=1152, out_features=1152, bias=True)
(q_proj): Linear8bitLt(in_features=1152, out_features=1152, bias=True)
(out_proj): Linear8bitLt(in_features=1152, out_features=1152, bias=True)
)
(layer_norm1): LayerNorm((1152,), eps=1e-06, elementwise_affine=True)
(mlp): SiglipMLP(
(activation_fn): PytorchGELUTanh()
(fc1): Linear8bitLt(in_features=1152, out_features=4304, bias=True)
(fc2): Linear8bitLt(in_features=4304, out_features=1152, bias=True)
)
(layer_norm2): LayerNorm((1152,), eps=1e-06, elementwise_affine=True)
)
)
)
(post_layernorm): LayerNorm((1152,), eps=1e-06, elementwise_affine=True)
(head): SiglipMultiheadAttentionPoolingHead(
(attention): MultiheadAttention(
(out_proj): Linear8bitLt(in_features=1152, out_features=1152, bias=True)
)
(layernorm): LayerNorm((1152,), eps=1e-06, elementwise_affine=True)
(mlp): SiglipMLP(
(activation_fn): PytorchGELUTanh()
(fc1): Linear8bitLt(in_features=1152, out_features=4304, bias=True)
(fc2): Linear8bitLt(in_features=4304, out_features=1152, bias=True)
)
)
)
)
(head): Sequential(
(0): Linear8bitLt(in_features=4608, out_features=65531, bias=False)
(1): LayerNorm((65531,), eps=1e-05, elementwise_affine=True)
)
)
(vte): VisualEmbedding(65536, 3584)
)
)
)
[INFO:swift] PeftModelForCausalLM: 10233.9195M Params (27.0090M Trainable [0.2639%]), 0.0061M Buffers.
[INFO:swift] system: None
[INFO:swift] args.lazy_tokenize: True
Generating train split: 0 examples [00:00, ? examples/s] Generating train split: 1000 examples [00:00, 53817.98 examples/s] [INFO:swift] train_dataset: Dataset({ features: ['query', 'response', 'images'], num_rows: 990 }) [INFO:swift] val_dataset: Dataset({ features: ['query', 'response', 'images'], num_rows: 10 }) [INFO:swift] Setting max_partition: 9. You can adjust this hyperparameter through the environment variable:
[INFO:swift] training_args: Seq2SeqTrainingArguments(
_n_gpu=1,
acc_strategy=token,
accelerator_config={'split_batches': False, 'dispatch_batches': False, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None, 'use_configured_state': False},
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.95,
adam_epsilon=1e-08,
additional_saved_files=[],
auto_find_batch_size=False,
batch_eval_metrics=False,
bf16=True,
bf16_full_eval=False,
data_seed=None,
dataloader_drop_last=False,
dataloader_num_workers=1,
dataloader_persistent_workers=False,
dataloader_pin_memory=True,
dataloader_prefetch_factor=None,
ddp_backend=None,
ddp_broadcast_buffers=None,
ddp_bucket_cap_mb=None,
ddp_find_unused_parameters=None,
ddp_timeout=1800,
debug=[],
deepspeed=None,
disable_tqdm=False,
dispatch_batches=None,
do_eval=True,
do_predict=False,
do_train=False,
eval_accumulation_steps=None,
eval_delay=0,
eval_do_concat_batches=True,
eval_on_start=False,
eval_steps=1000,
eval_strategy=IntervalStrategy.STEPS,
eval_use_gather_object=False,
evaluation_strategy=None,
fp16=False,
fp16_backend=auto,
fp16_full_eval=False,
fp16_opt_level=O1,
fsdp=[],
fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False},
fsdp_min_num_params=0,
fsdp_transformer_layer_cls_to_wrap=None,
full_determinism=False,
generation_config=GenerationConfig {
"bos_token_id": 2,
"eos_token_id": 1,
"max_new_tokens": 2048,
"pad_token_id": 0
}
,
generation_max_length=None,
generation_num_beams=None,
gradient_accumulation_steps=1,
gradient_checkpointing=True,
gradient_checkpointing_kwargs=None,
greater_is_better=False,
group_by_length=False,
half_precision_backend=auto,
hub_always_push=False,
hub_model_id=None,
hub_private_repo=False,
hub_strategy=HubStrategy.EVERY_SAVE,
hub_token=,
ignore_data_skip=False,
include_for_metrics=[],
include_inputs_for_metrics=False,
include_num_input_tokens_seen=False,
include_tokens_per_second=False,
jit_mode_eval=False,
label_names=None,
label_smoothing_factor=0.0,
learning_rate=0.0001,
length_column_name=length,
load_best_model_at_end=False,
local_rank=0,
log_level=passive,
log_level_replica=warning,
log_on_each_node=True,
logging_dir=/home/tom/fssd/WWW2025/output/ovis1_6-gemma2-9b/v14-20241126-214907/ovis1_6-gemma2-9b/v0-20241126-215001/runs,
logging_first_step=True,
logging_nan_inf_filter=True,
logging_steps=1000,
logging_strategy=IntervalStrategy.STEPS,
loss_name=None,
lr_scheduler_kwargs={},
lr_scheduler_type=SchedulerType.COSINE,
max_grad_norm=1,
max_steps=-1,
metric_for_best_model=loss,
metric_warmup_step=0,
mp_parameters=,
neftune_noise_alpha=None,
no_cuda=False,
num_train_epochs=2,
optim=OptimizerNames.ADAMW_TORCH,
optim_args=None,
optim_target_modules=None,
output_dir=/home/tom/fssd/WWW2025/output/ovis1_6-gemma2-9b/v14-20241126-214907/ovis1_6-gemma2-9b/v0-20241126-215001,
overwrite_output_dir=False,
past_index=-1,
per_device_eval_batch_size=1,
per_device_train_batch_size=1,
predict_with_generate=False,
prediction_loss_only=False,
push_to_hub=False,
push_to_hub_model_id=None,
push_to_hub_organization=None,
push_to_hub_token=,
ray_scope=last,
remove_unused_columns=False,
report_to=['tensorboard'],
restore_callback_states_from_checkpoint=False,
resume_from_checkpoint=None,
run_name=/home/tom/fssd/WWW2025/output/ovis1_6-gemma2-9b/v14-20241126-214907/ovis1_6-gemma2-9b/v0-20241126-215001,
save_on_each_node=False,
save_only_model=False,
save_safetensors=True,
save_steps=1000,
save_strategy=IntervalStrategy.STEPS,
save_total_limit=2,
seed=42,
skip_memory_metrics=True,
sortish_sampler=False,
split_batches=None,
tf32=None,
torch_compile=False,
torch_compile_backend=None,
torch_compile_mode=None,
torch_empty_cache_steps=None,
torchdynamo=None,
tpu_metrics_debug=False,
tpu_num_cores=None,
train_dataset_sample=-1,
train_sampler_random=True,
use_cpu=False,
use_ipex=False,
use_legacy_prediction_loop=False,
use_liger_kernel=False,
use_mps_device=False,
warmup_ratio=0.05,
warmup_steps=0,
weight_decay=0.1,
)
[ERROR:swift] There are error run git command.
/home/tom/fssd/WWW2025/swift/swift/trainers/mixin.py:93: FutureWarning:
MAX_PARTITION
. [INFO:swift] [LABELS_IDS] [-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 23515, 30582, 107] [INFO:swift] [LABELS] [-100 * 214]活动页面tokenizer
is deprecated and will be removed in version 5.0.0 forSeq2SeqTrainer.__init__
. Useprocessing_class
instead. super().init( Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher. [2024-11-26 21:50:40,589] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect) [INFO:swift] The SftArguments will be saved in: /home/tom/fssd/WWW2025/output/ovis1_6-gemma2-9b/v14-20241126-214907/ovis1_6-gemma2-9b/v0-20241126-215001/sft_args.json [INFO:swift] The Seq2SeqTrainingArguments will be saved in: /home/tom/fssd/WWW2025/output/ovis1_6-gemma2-9b/v14-20241126-214907/ovis1_6-gemma2-9b/v0-20241126-215001/training_args.json [INFO:swift] The logging file will be saved in: /home/tom/fssd/WWW2025/output/ovis1_6-gemma2-9b/v14-20241126-214907/ovis1_6-gemma2-9b/v0-20241126-215001/logging.jsonlTrain: 0%| | 0/1980 [00:00<?, ?it/s]/opt/conda/envs/www2025/lib/python3.12/site-packages/bitsandbytes/autograd/_functions.py:316: UserWarning: MatMul8bitLt: inputs will be cast from torch.bfloat16 to float16 during quantization warnings.warn(f"MatMul8bitLt: inputs will be cast from {A.dtype} to float16 during quantization") Traceback (most recent call last): File "/home/tom/fssd/WWW2025/swift/swift/cli/sft.py", line 5, in
sft_main()
File "/home/tom/fssd/WWW2025/swift/swift/utils/run_utils.py", line 32, in x_main
result = llm_x(args, kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/home/tom/fssd/WWW2025/swift/swift/llm/sft.py", line 546, in llm_sft
return trainer_train(args, model, template, train_dataset, val_dataset, callbacks=callbacks, msg=msg)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tom/fssd/WWW2025/swift/swift/llm/sft.py", line 496, in trainer_train
trainer.train(training_args.resume_from_checkpoint)
File "/home/tom/fssd/WWW2025/swift/swift/trainers/mixin.py", line 493, in train
res = super().train(resume_from_checkpoint, args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/conda/envs/www2025/lib/python3.12/site-packages/transformers/trainer.py", line 2122, in train
return inner_training_loop(
^^^^^^^^^^^^^^^^^^^^
File "/opt/conda/envs/www2025/lib/python3.12/site-packages/transformers/trainer.py", line 2474, in _inner_training_loop
tr_loss_step = self.training_step(model, inputs, num_items_in_batch)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/conda/envs/www2025/lib/python3.12/site-packages/transformers/trainer.py", line 3572, in training_step
loss = self.compute_loss(model, inputs, num_items_in_batch=num_items_in_batch)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tom/fssd/WWW2025/swift/swift/trainers/trainers.py", line 161, in compute_loss
outputs = model(inputs)
^^^^^^^^^^^^^^^
File "/opt/conda/envs/www2025/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
return self._call_impl(args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/conda/envs/www2025/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1844, in _call_impl
return inner()
^^^^^^^
File "/opt/conda/envs/www2025/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1769, in inner
args_kwargs_result = hook(self, args, kwargs) # type: ignore[misc]
^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tom/fssd/WWW2025/swift/swift/llm/utils/template.py", line 350, in _pre_forward_hook
res_extra.append(self._post_encode(module, d))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tom/fssd/WWW2025/swift/swift/llm/utils/template.py", line 1355, in _postencode
, inputsembeds, labels, = self.model.merge_multimodal(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tom/.cache/huggingface/modules/transformers_modules/Ovis1.6-Gemma2-9B/modeling_ovis.py", line 376, in merge_multimodal
visual_tokens = self.visual_tokenizer(torch.cat([x for x in pixel_values], dim=0))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/conda/envs/www2025/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/conda/envs/www2025/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
return forward_call(*args, *kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/conda/envs/www2025/lib/python3.12/site-packages/accelerate/hooks.py", line 170, in new_forward
output = module._old_forward(args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tom/.cache/huggingface/modules/transformers_modules/Ovis1.6-Gemma2-9B/modeling_ovis.py", line 223, in forward
features = self.encode(pixel_values)
^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tom/.cache/huggingface/modules/transformers_modules/Ovis1.6-Gemma2-9B/modeling_ovis.py", line 198, in encode
output = self.backbone(pixel_values, output_hidden_states=True, return_dict=True)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/conda/envs/www2025/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/conda/envs/www2025/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
return forward_call(*args, *kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/conda/envs/www2025/lib/python3.12/site-packages/accelerate/hooks.py", line 170, in new_forward
output = module._old_forward(args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/conda/envs/www2025/lib/python3.12/site-packages/transformers/models/siglip/modeling_siglip.py", line 1190, in forward
return self.vision_model(
^^^^^^^^^^^^^^^^^^
File "/opt/conda/envs/www2025/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/conda/envs/www2025/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
return forward_call(*args, *kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/conda/envs/www2025/lib/python3.12/site-packages/accelerate/hooks.py", line 170, in new_forward
output = module._old_forward(args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/conda/envs/www2025/lib/python3.12/site-packages/transformers/models/siglip/modeling_siglip.py", line 1101, in forward
pooler_output = self.head(last_hidden_state) if self.use_head else None
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/conda/envs/www2025/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/conda/envs/www2025/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
return forward_call(*args, *kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/conda/envs/www2025/lib/python3.12/site-packages/accelerate/hooks.py", line 170, in new_forward
output = module._old_forward(args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/conda/envs/www2025/lib/python3.12/site-packages/transformers/models/siglip/modeling_siglip.py", line 1128, in forward
hidden_state = self.attention(probe, hidden_state, hidden_state)[0]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/conda/envs/www2025/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/conda/envs/www2025/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
return forward_call(*args, *kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/conda/envs/www2025/lib/python3.12/site-packages/accelerate/hooks.py", line 170, in new_forward
output = module._old_forward(args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/conda/envs/www2025/lib/python3.12/site-packages/torch/nn/modules/activation.py", line 1368, in forward
attn_output, attn_output_weights = F.multi_head_attention_forward(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/conda/envs/www2025/lib/python3.12/site-packages/torch/nn/functional.py", line 6251, in multi_head_attention_forward
attn_output = linear(attn_output, out_proj_weight, out_proj_bias)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
RuntimeError: self and mat2 must have the same dtype, but got BFloat16 and Char
Train: 0%| | 0/1980 [00:03<?, ?it/s]