Closed Yuki2L0ve closed 9 months ago
@shumingma Is this the same issue as https://github.com/microsoft/torchscale/issues/67 ?
@shumingma Is this the same issue as #67 ?
That is to say, you don't have fixed the first problem yet? How to solve the problem (2)?
The latest release torchscale 0.3.0 (https://pypi.org/project/torchscale/) has fixed these problems. Please have a try. Thanks!
>>> import torchscale
>>> from torchscale.architecture.config import EncoderDecoderConfig
>>> from torchscale.architecture.encoder_decoder import EncoderDecoder
>>>
>>> config = EncoderDecoderConfig(vocab_size=64000)
>>> encdec = EncoderDecoder(config)
>>> print(encdec)
EncoderDecoder(
(encoder): Encoder(
(dropout_module): Dropout(p=0.0, inplace=False)
(layers): ModuleList(
(0-11): 12 x EncoderLayer(
(self_attn): MultiheadAttention(
(k_proj): Linear(in_features=768, out_features=768, bias=True)
(v_proj): Linear(in_features=768, out_features=768, bias=True)
(q_proj): Linear(in_features=768, out_features=768, bias=True)
(out_proj): Linear(in_features=768, out_features=768, bias=True)
(inner_attn_ln): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(dropout_module): Dropout(p=0.0, inplace=False)
)
(self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(dropout_module): Dropout(p=0.0, inplace=False)
(ffn): FeedForwardNetwork(
(activation_dropout_module): Dropout(p=0.0, inplace=False)
(dropout_module): Dropout(p=0.0, inplace=False)
(fc1): Linear(in_features=768, out_features=3072, bias=True)
(fc2): Linear(in_features=3072, out_features=768, bias=True)
(ffn_layernorm): LayerNorm((3072,), eps=1e-05, elementwise_affine=True)
)
(final_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
)
)
(layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
)
(decoder): Decoder(
(dropout_module): Dropout(p=0.0, inplace=False)
(output_projection): Linear(in_features=768, out_features=64000, bias=False)
(layers): ModuleList(
(0-11): 12 x DecoderLayer(
(dropout_module): Dropout(p=0.0, inplace=False)
(self_attn): MultiheadAttention(
(k_proj): Linear(in_features=768, out_features=768, bias=True)
(v_proj): Linear(in_features=768, out_features=768, bias=True)
(q_proj): Linear(in_features=768, out_features=768, bias=True)
(out_proj): Linear(in_features=768, out_features=768, bias=True)
(inner_attn_ln): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(dropout_module): Dropout(p=0.0, inplace=False)
)
(self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(encoder_attn): MultiheadAttention(
(k_proj): Linear(in_features=768, out_features=768, bias=True)
(v_proj): Linear(in_features=768, out_features=768, bias=True)
(q_proj): Linear(in_features=768, out_features=768, bias=True)
(out_proj): Linear(in_features=768, out_features=768, bias=True)
(dropout_module): Dropout(p=0.0, inplace=False)
)
(encoder_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(ffn): FeedForwardNetwork(
(activation_dropout_module): Dropout(p=0.0, inplace=False)
(dropout_module): Dropout(p=0.0, inplace=False)
(fc1): Linear(in_features=768, out_features=3072, bias=True)
(fc2): Linear(in_features=3072, out_features=768, bias=True)
(ffn_layernorm): LayerNorm((3072,), eps=1e-05, elementwise_affine=True)
)
(final_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
)
)
(layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
)
)
>>> import torch
>>> from torchscale.architecture.config import RetNetConfig
>>> from torchscale.architecture.retnet import RetNetDecoder
>>>
>>> config = RetNetConfig(vocab_size=64000)
>>> retnet = RetNetDecoder(config)
>>> print(retnet)
RetNetDecoder(
(dropout_module): Dropout(p=0.0, inplace=False)
(output_projection): Linear(in_features=768, out_features=64000, bias=False)
(layers): ModuleList(
(0-11): 12 x DecoderLayer(
(dropout_module): Dropout(p=0.0, inplace=False)
(retention): MultiScaleRetention(
(q_proj): Linear(in_features=768, out_features=768, bias=False)
(k_proj): Linear(in_features=768, out_features=768, bias=False)
(v_proj): Linear(in_features=768, out_features=1280, bias=False)
(g_proj): Linear(in_features=768, out_features=1280, bias=False)
(out_proj): Linear(in_features=1280, out_features=768, bias=False)
(group_norm): RMSNorm()
)
(retention_layer_norm): RMSNorm()
(ffn): GLU(
(activation_dropout_module): Dropout(p=0.0, inplace=False)
(dropout_module): Dropout(p=0.0, inplace=False)
(fc1): Linear(in_features=768, out_features=1280, bias=False)
(fc2): Linear(in_features=1280, out_features=768, bias=False)
(gate): Linear(in_features=768, out_features=1280, bias=False)
)
(final_layer_norm): RMSNorm()
)
)
(layer_norm): RMSNorm()
(retnet_rel_pos): RetNetRelPos()
)
I have get two errors: (1) from torchscale.architecture.config import EncoderDecoderConfig from torchscale.architecture.encoder_decoder import EncoderDecoder
config = EncoderDecoderConfig(vocab_size=64000) encdec = EncoderDecoder(config) print(encdec)
Traceback (most recent call last): AttributeError: 'EncoderDecoderConfig' object has no attribute 'normalize_output'
(2) import torch from torchscale.architecture.config import RetNetConfig from torchscale.architecture.retnet import RetNetDecoder
config = RetNetConfig(vocab_size=64000) retnet = RetNetDecoder(config) print(retnet)
it shows that "Cannot find reference 'RetNetConfig' in 'config.py' " and "Cannot find reference 'retnet' in 'init.py' " and "Unresolved reference 'RetNetDecoder' "
so how I fix them?