I followed installation instructions but have failed to run the first example.
I'm glad to contribute with a slick script that install all the requirements with one command when I can successfully run it.
TypeError Traceback (most recent call last)
Cell In[3], line 1
----> 1 m = TransformerLanguageModel.from_pretrained(
2 "checkpoints/Pre-trained-BioGPT",
3 "checkpoint.pt",
4 "data",
5 tokenizer='moses',
6 bpe='fastbpe',
7 bpe_codes="data/bpecodes",
8 min_len=100,
9 max_len_b=1024)
File ~/venvs/biogpt/lib/python3.10/site-packages/fairseq/models/fairseq_model.py:267, in BaseFairseqModel.from_pretrained(cls, model_name_or_path, checkpoint_
file, data_name_or_path, **kwargs)
244 """
245 Load a :class:`~fairseq.models.FairseqModel` from a pre-trained model
246 file. Downloads and caches the pre-trained model file if needed.
(...)
263 model archive path.
264 """
265 from fairseq import hub_utils
--> 267 x = hub_utils.from_pretrained(
268 model_name_or_path,
269 checkpoint_file,
270 data_name_or_path,
271 archive_map=cls.hub_models(),
272 **kwargs,
273 )
274 logger.info(x["args"])
275 return hub_utils.GeneratorHubInterface(x["args"], x["task"], x["models"])
File ~/venvs/biogpt/lib/python3.10/site-packages/fairseq/hub_utils.py:73, in from_pretrained(model_name_or_path, checkpoint_file, data_name_or_path, archive_m
ap, **kwargs)
70 if "user_dir" in kwargs:
71 utils.import_user_module(argparse.Namespace(user_dir=kwargs["user_dir"]))
---> 73 models, args, task = checkpoint_utils.load_model_ensemble_and_task(
74 [os.path.join(model_path, cpt) for cpt in checkpoint_file.split(os.pathsep)],
75 arg_overrides=kwargs,
76 )
78 return {
79 "args": args,
80 "task": task,
81 "models": models,
82 }
File ~/venvs/biogpt/lib/python3.10/site-packages/fairseq/checkpoint_utils.py:469, in load_model_ensemble_and_task(filenames, arg_overrides, task, strict, suff
ix, num_shards, state)
467 argspec = inspect.getfullargspec(task.build_model)
468 if "from_checkpoint" in argspec.args:
--> 469 model = task.build_model(cfg.model, from_checkpoint=True)
470 else:
471 model = task.build_model(cfg.model)
File ~/venvs/biogpt/lib/python3.10/site-packages/fairseq/tasks/language_modeling.py:191, in LanguageModelingTask.build_model(self, args, from_checkpoint)
190 def build_model(self, args, from_checkpoint=False):
--> 191 model = super().build_model(args, from_checkpoint)
192 for target in self.targets:
193 if target not in model.supported_targets:
File ~/venvs/biogpt/lib/python3.10/site-packages/fairseq/tasks/fairseq_task.py:671, in LegacyFairseqTask.build_model(self, args, from_checkpoint)
659 """
660 Build the :class:`~fairseq.models.BaseFairseqModel` instance for this
661 task.
(...)
667 a :class:`~fairseq.models.BaseFairseqModel` instance
668 """
669 from fairseq import models, quantization_utils
--> 671 model = models.build_model(args, self, from_checkpoint)
672 model = quantization_utils.quantize_model_scalar(model, args)
673 return model
File ~/venvs/biogpt/lib/python3.10/site-packages/fairseq/models/__init__.py:106, in build_model(cfg, task, from_checkpoint)
98 ARCH_CONFIG_REGISTRY[model_type](cfg)
100 assert model is not None, (
101 f"Could not infer model type from {cfg}. "
102 "Available models: {}".format(MODEL_DATACLASS_REGISTRY.keys())
103 + f" Requested model type: {model_type}"
104 )
--> 106 return model.build_model(cfg, task)
File ~/venvs/biogpt/lib/python3.10/site-packages/fairseq/models/transformer_lm.py:300, in TransformerLanguageModel.build_model(cls, args, task)
289 embed_tokens = AdaptiveInput(
290 len(task.source_dictionary),
291 task.source_dictionary.pad(),
(...)
297 args.quant_noise_pq_block_size,
298 )
299 else:
--> 300 embed_tokens = cls.build_embedding(
301 args, task.source_dictionary, args.decoder_input_dim
302 )
304 if args.tie_adaptive_weights:
305 assert args.adaptive_input
File ~/venvs/biogpt/lib/python3.10/site-packages/fairseq/models/transformer_lm.py:321, in TransformerLanguageModel.build_embedding(cls, args, dictionary, embe
d_dim, path)
319 @classmethod
320 def build_embedding(cls, args, dictionary, embed_dim, path=None):
--> 321 embed_tokens = Embedding(len(dictionary), embed_dim, dictionary.pad())
322 return embed_tokens
TypeError: object of type 'NoneType' has no len()
This is my installation script at the moment:
export ve_name='biogpt'
export py_version=3.10
curl bit.ly/cfgvelinux -L | bash
. activate_ve $ve_name
ve_data_path=$HOME/venvs/$ve_name/data
ve_code_path=$HOME/venvs/$ve_name/code
mkdir $ve_code_path
mkdir $ve_data_path
cd $ve_code_path
git clone https://github.com/pytorch/fairseq
cd fairseq
git checkout v0.12.0
pip install .
python setup.py build_ext --inplace
cd $ve_code_path
git clone https://github.com/moses-smt/mosesdecoder.git
export MOSES=$ve_code_path/mosesdecoder
cd $ve_code_path
git clone https://github.com/glample/fastBPE.git
export FASTBPE=$ve_code_path/fastBPE
cd fastBPE
g++ -std=c++11 -pthread -O3 fastBPE/main.cc -IfastBPE -o fast
pip install sacremoses
pip install scikit-learn
pip install torch==1.12.0
mkdir $ve_data_path/checkpoints
cd ~/venvs/biogpt/data/checkpoints
wget https://msramllasc.blob.core.windows.net/modelrelease/BioGPT/checkpoints/Pre-trained-BioGPT.tgz
tar -zxvf Pre-trained-BioGPT.tgz
I followed installation instructions but have failed to run the first example. I'm glad to contribute with a slick script that install all the requirements with one command when I can successfully run it.
This is my installation script at the moment: