SmerkyG / gptcore

Fast modular code to create and train cutting edge LLMs
Apache License 2.0
62 stars 9 forks source link

7900xt x4 train error #9

Open win10ogod opened 5 months ago

win10ogod commented 5 months ago

7900x4:~/gptcore$ python cli.py train -c configs/gptalpha.cfg.py Traceback (most recent call last): File "/usr/lib/python3.10/pydoc.py", line 443, in safeimport module = import(path) File "/home/user/gptcore/dataset/init.py", line 35, in class PipedDatasetWrapper(typing.Generic[T_co], torch.utils.data.datapipes.datapipe.IterDataPipe[T_co]): File "/home/user/.local/lib/python3.10/site-packages/torch/utils/data/datapipes/_typing.py", line 373, in new return super().new(cls, name, bases, namespace, kwargs) # type: ignore[call-overload] File "/home/user/.local/lib/python3.10/site-packages/torch/utils/data/datapipes/_typing.py", line 260, in new return super().new(cls, name, bases, namespace, kwargs) # type: ignore[call-overload] File "/usr/lib/python3.10/abc.py", line 106, in new cls = super().new(mcls, name, bases, namespace, **kwargs) TypeError: Cannot create a consistent method resolution order (MRO) for bases Generic, IterDataPipe

During handling of the above exception, another exception occurred:

Traceback (most recent call last): File "/home/user/gptcore/util/config.py", line 479, in process located = locate(fullid, Missing) File "/home/user/gptcore/util/locate.py", line 57, in locate nextmodule = pydoc.safeimport('.'.join(parts[:n+1]), forceload) File "/usr/lib/python3.10/pydoc.py", line 458, in safeimport raise ErrorDuringImport(path, sys.exc_info()) pydoc.ErrorDuringImport: problem in dataset - TypeError: Cannot create a consistent method resolution order (MRO) for bases Generic, IterDataPipe

During handling of the above exception, another exception occurred:

Traceback (most recent call last): File "/home/user/gptcore/cli.py", line 176, in cli() File "/home/user/gptcore/cli.py", line 89, in cli disk_cfg = util.config.eval_first_expr(disk_cfg_str, macros) File "/home/user/gptcore/util/config.py", line 630, in eval_first_expr return ConfigParser().eval_first_expr(unparsed_input, incoming_macros) File "/home/user/gptcore/util/config.py", line 399, in eval_first_expr return self.process(node.value) File "/home/user/gptcore/util/config.py", line 552, in process rv = self.create_factory(node, node.args, node.keywords, immediate=True) File "/home/user/gptcore/util/config.py", line 618, in create_factory positional_placeholders_count, placeholders, args, kwargs = self.process_args_and_keywords(node_args=node_args, node_keywords=node_keywords) File "/home/user/gptcore/util/config.py", line 588, in process_args_and_keywords value = self.process(kw.value) File "/home/user/gptcore/util/config.py", line 522, in process rv = self.create_factory(node, node.args, node.keywords, immediate=False) File "/home/user/gptcore/util/config.py", line 618, in create_factory positional_placeholders_count, placeholders, args, kwargs = self.process_args_and_keywords(node_args=node_args, node_keywords=node_keywords) File "/home/user/gptcore/util/config.py", line 588, in process_args_and_keywords value = self.process(kw.value) File "/home/user/gptcore/util/config.py", line 522, in process rv = self.create_factory(node, node.args, node.keywords, immediate=False) File "/home/user/gptcore/util/config.py", line 607, in create_factory func_ident = self.process(func_node) File "/home/user/gptcore/util/config.py", line 560, in process raise ConfigParseError(node, self.unparsed_input, msg="Internal exception during configuration parsing " + str(e)) util.config.ConfigParseError: Internal exception during configuration parsing problem in dataset - TypeError: Cannot create a consistent method resolution order (MRO) for bases Generic, IterDataPipe at line 70, col 36 datamodule_factory=lambda: dataset.DM( ^^^^^^^^^^^

SmerkyG commented 5 months ago

Looks like issue #11 might be relevant to you for this

On Sat, Mar 30, 2024, 11:22 PM win10ogod @.***> wrote:

7900x4:~/gptcore$ python cli.py train -c configs/gptalpha.cfg.py Traceback (most recent call last): File "/usr/lib/python3.10/pydoc.py", line 443, in safeimport module = import(path) File "/home/user/gptcore/dataset/init.py", line 35, in class PipedDatasetWrapper(typing.Generic[T_co], torch.utils.data.datapipes.datapipe.IterDataPipe[T_co]): File "/home/user/.local/lib/python3.10/site-packages/torch/utils/data/datapipes/_typing.py", line 373, in new return super().new(cls, name, bases, namespace, kwargs) # type: ignore[call-overload] File "/home/user/.local/lib/python3.10/site-packages/torch/utils/data/datapipes/_typing.py", line 260, in new return super().new(cls, name, bases, namespace, kwargs) # type: ignore[call-overload] File "/usr/lib/python3.10/abc.py", line 106, in new cls = super().new(mcls, name, bases, namespace, **kwargs) TypeError: Cannot create a consistent method resolution order (MRO) for bases Generic, IterDataPipe

During handling of the above exception, another exception occurred:

Traceback (most recent call last): File "/home/user/gptcore/util/config.py", line 479, in process located = locate(fullid, Missing) File "/home/user/gptcore/util/locate.py", line 57, in locate nextmodule = pydoc.safeimport('.'.join(parts[:n+1]), forceload) File "/usr/lib/python3.10/pydoc.py", line 458, in safeimport raise ErrorDuringImport(path, sys.exc_info()) pydoc.ErrorDuringImport: problem in dataset - TypeError: Cannot create a consistent method resolution order (MRO) for bases Generic, IterDataPipe

During handling of the above exception, another exception occurred:

Traceback (most recent call last): File "/home/user/gptcore/cli.py", line 176, in cli() File "/home/user/gptcore/cli.py", line 89, in cli disk_cfg = util.config.eval_first_expr(disk_cfg_str, macros) File "/home/user/gptcore/util/config.py", line 630, in eval_first_expr return ConfigParser().eval_first_expr(unparsed_input, incoming_macros) File "/home/user/gptcore/util/config.py", line 399, in eval_first_expr return self.process(node.value) File "/home/user/gptcore/util/config.py", line 552, in process rv = self.create_factory(node, node.args, node.keywords, immediate=True) File "/home/user/gptcore/util/config.py", line 618, in create_factory positional_placeholders_count, placeholders, args, kwargs = self.process_args_and_keywords(node_args=node_args, node_keywords=node_keywords) File "/home/user/gptcore/util/config.py", line 588, in process_args_and_keywords value = self.process(kw.value) File "/home/user/gptcore/util/config.py", line 522, in process rv = self.create_factory(node, node.args, node.keywords, immediate=False) File "/home/user/gptcore/util/config.py", line 618, in create_factory positional_placeholders_count, placeholders, args, kwargs = self.process_args_and_keywords(node_args=node_args, node_keywords=node_keywords) File "/home/user/gptcore/util/config.py", line 588, in process_args_and_keywords value = self.process(kw.value) File "/home/user/gptcore/util/config.py", line 522, in process rv = self.create_factory(node, node.args, node.keywords, immediate=False) File "/home/user/gptcore/util/config.py", line 607, in create_factory func_ident = self.process(func_node) File "/home/user/gptcore/util/config.py", line 560, in process raise ConfigParseError(node, self.unparsed_input, msg="Internal exception during configuration parsing " + str(e)) util.config.ConfigParseError: Internal exception during configuration parsing problem in dataset - TypeError: Cannot create a consistent method resolution order (MRO) for bases Generic, IterDataPipe at line 70, col 36 datamodule_factory=lambda: dataset.DM( ^^^^^^^^^^^

— Reply to this email directly, view it on GitHub https://github.com/SmerkyG/gptcore/issues/9, or unsubscribe https://github.com/notifications/unsubscribe-auth/ACDK33WFDR7EG5S5WLATTOTY256OJAVCNFSM6AAAAABFQB5W4SVHI2DSMVQWIX3LMV43ASLTON2WKOZSGIYTMNZVGY3TONY . You are receiving this because you are subscribed to this thread.Message ID: @.***>

opooladz commented 5 months ago

yea getting docker to work on colab seems to not be as straight forward. some recommend udocker but I havent been able to get either to work...

Here is another repo that claims to work but no idea.

https://github.com/drengskapur/docker-in-colab