Open nlpcat opened 11 months ago
try: import transformers except ImportError: pass from ctranslate2.specs import ( transformer_spec, ) from ctranslate2.converters.transformers import TransformersConverter, register_loader, ModelLoader, _SUPPORTED_ACTIVATIONS @register_loader("GPTBigCodeConfig") class GPTBigCodeMHALoader(ModelLoader): @property def architecture_name(self): return "GPTBigCodeForCausalLM" def get_model_spec(self, model): spec = transformer_spec.TransformerDecoderModelSpec.from_config( model.config.n_layer, model.config.n_head, pre_norm=True, activation=_SUPPORTED_ACTIVATIONS[model.config.activation_function], multi_query_attention=True, alibi=True, alibi_use_positive_positions=True, ) self.set_decoder(spec.decoder, model.transformer) self.set_linear(spec.decoder.projection, model.lm_head) return spec def set_vocabulary(self, spec, tokens): spec.register_vocabulary(tokens) def get_vocabulary(self, model, tokenizer): tokens = super().get_vocabulary(model, tokenizer) extra_ids = model.config.vocab_size - len(tokens) for i in range(extra_ids): tokens.append("<extra_id_%d>" % i) return tokens def set_config(self, config, model, tokenizer): config.bos_token = tokenizer.bos_token config.eos_token = tokenizer.eos_token config.unk_token = tokenizer.unk_token def set_decoder(self, spec, module): spec.scale_embeddings = False self.set_embeddings(spec.embeddings, module.wte) # self.set_position_encodings(spec.position_encodings, module.wpe) self.set_layer_norm(spec.layer_norm, module.ln_f) for layer_spec, layer in zip(spec.layer, module.h): self.set_layer_norm(layer_spec.self_attention.layer_norm, layer.ln_1) self.set_linear(layer_spec.self_attention.linear[0], layer.attn.c_attn) self.set_linear(layer_spec.self_attention.linear[1], layer.attn.c_proj) self.set_layer_norm(layer_spec.ffn.layer_norm, layer.ln_2) self.set_linear(layer_spec.ffn.linear_0, layer.mlp.c_fc) self.set_linear(layer_spec.ffn.linear_1, layer.mlp.c_proj) converter = TransformersConverter( "bigcode/gpt_bigcode-santacoder", load_as_float16=False, low_cpu_mem_usage=True, trust_remote_code=False, ) converter.convert("./bigcode_alibi", force=True) import ctranslate2 generator = ctranslate2.Generator("./bigcode_alibi") results = generator.generate_batch([["python"]], max_length=100)
error:
ValueError: can't index dimension 3 for a storage with rank 3
error: