My objective is to train a document classification model but I am facing memory issues. Data consists of 78200 training samples and 26067 dev samples with total number of unique labels = 13. Average number of tokens present in each sample is 2000 tokens. These samples are converted to spacy docbin format after preprocessing for training.
The model failed to train even on 64 GB sagemaker system after failing on lower memory systems with 100% memory usage and lowering batch size to nlp.batch_size = 4 and batcher.size = 1,
MemoryError((50178, 96), dtype('int32'))#033
Traceback (most recent call last):
File "/usr/local/lib/python3.6/dist-packages/spacy/training/loop.py", line 122, in train
raise e
File "/usr/local/lib/python3.6/dist-packages/spacy/training/loop.py", line 105, in train
for batch, info, is_best_checkpoint in training_step_iterator:
File "/usr/local/lib/python3.6/dist-packages/spacy/training/loop.py", line 226, in train_while_improving
score, other_scores = evaluate()
File "/usr/local/lib/python3.6/dist-packages/spacy/training/loop.py", line 281, in evaluate
scores = nlp.evaluate(dev_corpus(nlp))
File "/usr/local/lib/python3.6/dist-packages/spacy/language.py", line 1415, in evaluate
for eg, doc in zip(examples, docs):
File "/usr/local/lib/python3.6/dist-packages/spacy/language.py", line 1575, in pipe
for doc in docs:
File "/usr/local/lib/python3.6/dist-packages/spacy/util.py", line 1598, in _pipe
yield from proc.pipe(docs, **kwargs)
File "spacy/pipeline/trainable_pipe.pyx", line 73, in pipe
File "/usr/local/lib/python3.6/dist-packages/spacy/util.py", line 1547, in minibatch
batch = list(itertools.islice(items, int(batch_size)))
File "/usr/local/lib/python3.6/dist-packages/spacy/util.py", line 1598, in _pipe
yield from proc.pipe(docs, **kwargs)
File "spacy/pipeline/trainable_pipe.pyx", line 79, in pipe
File "/usr/local/lib/python3.6/dist-packages/spacy/util.py", line 1617, in raise_error
raise e
File "spacy/pipeline/trainable_pipe.pyx", line 75, in spacy.pipeline.trainable_pipe.TrainablePipe.pipe
File "/usr/local/lib/python3.6/dist-packages/spacy/pipeline/tok2vec.py", line 125, in predict
tokvecs = self.model.predict(docs)
File "/usr/local/lib/python3.6/dist-packages/thinc/model.py", line 315, in predict
return self._func(self, X, is_train=False)[0]
File "/usr/local/lib/python3.6/dist-packages/thinc/layers/chain.py", line 54, in forward
Y, inc_layer_grad = layer(X, is_train=is_train)
File "/usr/local/lib/python3.6/dist-packages/thinc/model.py", line 291, in __call__
return self._func(self, X, is_train=is_train)
File "/usr/local/lib/python3.6/dist-packages/thinc/layers/with_array.py", line 40, in forward
return _list_forward(cast(Model[List2d, List2d], model), Xseq, is_train)
File "/usr/local/lib/python3.6/dist-packages/thinc/layers/with_array.py", line 76, in _list_forward
Yf, get_dXf = layer(Xf, is_train)
File "/usr/local/lib/python3.6/dist-packages/thinc/model.py", line 291, in __call__
return self._func(self, X, is_train=is_train)
File "/usr/local/lib/python3.6/dist-packages/thinc/layers/chain.py", line 54, in forward
Y, inc_layer_grad = layer(X, is_train=is_train)
File "/usr/local/lib/python3.6/dist-packages/thinc/model.py", line 291, in __call__
return self._func(self, X, is_train=is_train)
File "/usr/local/lib/python3.6/dist-packages/thinc/layers/residual.py", line 40, in forward
Y, backprop_layer = model.layers[0](X, is_train)
File "/usr/local/lib/python3.6/dist-packages/thinc/model.py", line 291, in __call__
return self._func(self, X, is_train=is_train)
File "/usr/local/lib/python3.6/dist-packages/thinc/layers/chain.py", line 54, in forward
Y, inc_layer_grad = layer(X, is_train=is_train)
File "/usr/local/lib/python3.6/dist-packages/thinc/model.py", line 291, in __call__
return self._func(self, X, is_train=is_train)
File "/usr/local/lib/python3.6/dist-packages/thinc/layers/chain.py", line 54, in forward
Y, inc_layer_grad = layer(X, is_train=is_train)
File "/usr/local/lib/python3.6/dist-packages/thinc/model.py", line 291, in __call__
return self._func(self, X, is_train=is_train)
File "/usr/local/lib/python3.6/dist-packages/thinc/layers/chain.py", line 54, in forward
Y, inc_layer_grad = layer(X, is_train=is_train)
File "/usr/local/lib/python3.6/dist-packages/thinc/model.py", line 291, in __call__
return self._func(self, X, is_train=is_train)
File "/usr/local/lib/python3.6/dist-packages/thinc/layers/maxout.py", line 52, in forward
best, which = model.ops.maxout(Z)
File "thinc/backends/numpy_ops.pyx", line 151, in thinc.backends.numpy_ops.NumpyOps.maxout
numpy.core._exceptions.MemoryError: Unable to allocate 18.4 MiB for an array with shape (50178, 96) and data type int32
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/lib/python3.6/runpy.py", line 193, in _run_module_as_main
"__main__", mod_spec)
File "/usr/lib/python3.6/runpy.py", line 85, in _run_code
exec(code, run_globals)
File "/usr/local/lib/python3.6/dist-packages/spacy/__main__.py", line 4, in <module>
setup_cli()
File "/usr/local/lib/python3.6/dist-packages/spacy/cli/_util.py", line 71, in setup_cli
command(prog_name=COMMAND)
File "/usr/local/lib/python3.6/dist-packages/click/core.py", line 1128, in __call__
return self.main(*args, **kwargs)
File "/usr/local/lib/python3.6/dist-packages/click/core.py", line 1053, in main
rv = self.invoke(ctx)
File "/usr/local/lib/python3.6/dist-packages/click/core.py", line 1659, in invoke
return _process_result(sub_ctx.command.invoke(sub_ctx))
File "/usr/local/lib/python3.6/dist-packages/click/core.py", line 1395, in invoke
return ctx.invoke(self.callback, **ctx.params)
File "/usr/local/lib/python3.6/dist-packages/click/core.py", line 754, in invoke
return __callback(*args, **kwargs)
File "/usr/local/lib/python3.6/dist-packages/typer/main.py", line 500, in wrapper
return callback(**use_params) # type: ignore
File "/usr/local/lib/python3.6/dist-packages/spacy/cli/train.py", line 45, in train_cli
train(config_path, output_path, use_gpu=use_gpu, overrides=overrides)
File "/usr/local/lib/python3.6/dist-packages/spacy/cli/train.py", line 75, in train
train_nlp(nlp, output_path, use_gpu=use_gpu, stdout=sys.stdout, stderr=sys.stderr)
File "/usr/local/lib/python3.6/dist-packages/spacy/training/loop.py", line 126, in train
save_checkpoint(False)
File "/usr/local/lib/python3.6/dist-packages/spacy/training/loop.py", line 67, in save_checkpoint
before_to_disk(nlp).to_disk(output_path / DIR_MODEL_LAST)
File "/usr/local/lib/python3.6/dist-packages/spacy/language.py", line 1988, in to_disk
util.to_disk(path, serializers, exclude)
File "/usr/local/lib/python3.6/dist-packages/spacy/util.py", line 1286, in to_disk
writer(path / key)
File "/usr/local/lib/python3.6/dist-packages/spacy/language.py", line 1986, in <lambda>
serializers[name] = lambda p, proc=proc: proc.to_disk(p, exclude=["vocab"]) # type: ignore[misc]
File "spacy/pipeline/trainable_pipe.pyx", line 318, in spacy.pipeline.trainable_pipe.TrainablePipe.to_disk
File "/usr/local/lib/python3.6/dist-packages/spacy/util.py", line 1286, in to_disk
writer(path / key)
File "spacy/pipeline/trainable_pipe.pyx", line 317, in spacy.pipeline.trainable_pipe.TrainablePipe.to_disk.lambda7
File "/usr/local/lib/python3.6/dist-packages/thinc/model.py", line 525, in to_disk
file_.write(self.to_bytes())
File "/usr/local/lib/python3.6/dist-packages/thinc/model.py", line 517, in to_bytes
return srsly.msgpack_dumps(msg)
File "/usr/local/lib/python3.6/dist-packages/srsly/_msgpack_api.py", line 14, in msgpack_dumps
return msgpack.dumps(data, use_bin_type=True)
File "/usr/local/lib/python3.6/dist-packages/srsly/msgpack/__init__.py", line 55, in packb
return Packer(**kwargs).pack(o)
File "srsly/msgpack/_packer.pyx", line 285, in srsly.msgpack._packer.Packer.pack
File "srsly/msgpack/_packer.pyx", line 291, in srsly.msgpack._packer.Packer.pack
File "srsly/msgpack/_packer.pyx", line 288, in srsly.msgpack._packer.Packer.pack
File "srsly/msgpack/_packer.pyx", line 235, in srsly.msgpack._packer.Packer._pack
File "srsly/msgpack/_packer.pyx", line 264, in srsly.msgpack._packer.Packer._pack
File "srsly/msgpack/_packer.pyx", line 235, in srsly.msgpack._packer.Packer._pack
File "srsly/msgpack/_packer.pyx", line 235, in srsly.msgpack._packer.Packer._pack
How to reproduce the behaviour
My objective is to train a document classification model but I am facing memory issues. Data consists of
78200
training samples and26067
dev samples with total number of unique labels = 13. Average number of tokens present in each sample is 2000 tokens. These samples are converted to spacy docbin format after preprocessing for training.The model failed to train even on
64 GB
sagemaker system after failing on lower memory systems with 100% memory usage and lowering batch size tonlp.batch_size = 4
andbatcher.size = 1
,Training :
Training Config
Error Logs
Your Environment