PaddlePaddle / Paddle

PArallel Distributed Deep LEarning: Machine Learning Framework from Industrial Practice (『飞桨』核心框架,深度学习&机器学习高性能单机、分布式训练和跨平台部署)
http://www.paddlepaddle.org/
Apache License 2.0
22.24k stars 5.58k forks source link

动静转换,执行异常 #63143

Closed wang-qa closed 7 months ago

wang-qa commented 7 months ago

bug描述 Describe the Bug

环境

使用官网示例https://www.paddlepaddle.org.cn/tutorials/projectdetail/3714002 Python 3.11.2 paddlepaddle 2.6.1 (CPU)

执行步骤脚本 见补充信息

预期:正常执行,无报错

实际:如下

其他补充信息 Additional Supplementary Information

执行文件

#!/usr/bin/env python3
# -*- coding:utf-8 -*-

# @Description: 动态图转静态图模型保存
#!/usr/bin/env python3
# -*- coding:utf-8 -*-

# @Description: 动态图转静态图模型保存

##############################################################################
""" 定义网络结构 """
import paddle
# paddle.device.set_device('cpu')

paddle.utils.run_check()
# paddle.disable_signal_handler()

# 定义手写数字识别模型
import paddle

# 定义手写数字识别模型
class MNIST(paddle.nn.Layer):
    def __init__(self):
        super(MNIST, self).__init__()

        # 定义一层全连接层,输出维度是1
        self.fc = paddle.nn.Linear(in_features=784, out_features=10)

    # 定义网络结构的前向计算过程
    @paddle.jit.to_static  # 添加装饰器,使动态图网络结构在静态图模式下运行
    def forward(self, inputs):
        outputs = self.fc(inputs)
        return outputs

##############################################################################
""" 训练并保存 """
import paddle
import paddle.nn.functional as F

# 确保从paddle.vision.datasets.MNIST中加载的图像数据是np.ndarray类型
paddle.vision.set_image_backend('cv2')

# 图像归一化函数,将数据范围为[0, 255]的图像归一化到[-1, 1]
def norm_img(img):
    batch_size = img.shape[0]
    # 归一化图像数据
    img = img / 127.5 - 1
    # 将图像形式reshape为[batch_size, 784]
    img = paddle.reshape(img, [batch_size, 784])

    return img

def train(model):
    model.train()
    # 加载训练集 batch_size 设为 16
    train_loader = paddle.io.DataLoader(paddle.vision.datasets.MNIST(mode='train'),
                                        batch_size=16,
                                        shuffle=True)
    opt = paddle.optimizer.SGD(learning_rate=0.001, parameters=model.parameters())
    EPOCH_NUM = 10
    for epoch in range(EPOCH_NUM):
        for batch_id, data in enumerate(train_loader()):
            images = norm_img(data[0]).astype('float32')
            labels = data[1].astype('int64')

            # 前向计算的过程
            predicts = model(images)

            # 计算损失
            loss = F.cross_entropy(predicts, labels)
            avg_loss = paddle.mean(loss)

            # 每训练了1000批次的数据,打印下当前Loss的情况
            if batch_id % 1000 == 0:
                print("epoch_id: {}, batch_id: {}, loss is: {}".format(epoch, batch_id, avg_loss.numpy()))

            # 后向传播,更新参数的过程
            avg_loss.backward()
            opt.step()
            opt.clear_grad()

model = MNIST()

train(model)

paddle.save(model.state_dict(), './mnist.pdparams')
print("==>Trained model saved in ./mnist.pdparams")

##############################################################################
""" 部署调用 """
# save inference model
from paddle.static import InputSpec
# 加载训练好的模型参数
state_dict = paddle.load("./mnist.pdparams")
# 将训练好的参数读取到网络中
model.set_state_dict(state_dict)
# 设置模型为评估模式
model.eval()

# 保存inference模型
paddle.jit.save(
    layer=model,
    path="inference/mnist",
    input_spec=[InputSpec(shape=[None, 784], dtype='float32')])

print("==>Inference model saved in inference/mnist.")

import numpy as np
import paddle
import paddle.nn.functional as F
# 确保从paddle.vision.datasets.MNIST中加载的图像数据是np.ndarray类型
paddle.vision.set_image_backend('cv2')

# 读取mnist测试数据,获取第一个数据
mnist_test = paddle.vision.datasets.MNIST(mode='test')
test_image, label = mnist_test[0]
# 获取读取到的图像的数字标签
print("The label of readed image is : ", label)

# 将测试图像数据转换为tensor,并reshape为[1, 784]
test_image = paddle.reshape(paddle.to_tensor(test_image), [1, 784])
# 然后执行图像归一化
test_image = norm_img(test_image)
# 加载保存的模型
loaded_model = paddle.jit.load("./inference/mnist")
# 利用加载的模型执行预测
preds = loaded_model(test_image)
pred_label = paddle.argmax(preds)
# 打印预测结果
print("The predicted label is : ", pred_label.numpy())
absl-py==2.1.0
anyio==4.3.0
argon2-cffi==23.1.0
argon2-cffi-bindings==21.2.0
arrow==1.3.0
astor==0.8.1
asttokens==2.4.1
astunparse==1.6.3
async-lru==2.0.4
attrs==23.2.0
Babel==2.14.0
beautifulsoup4==4.12.3
bleach==6.1.0
certifi==2024.2.2
cffi==1.16.0
chardet==5.2.0
charset-normalizer==3.3.2
comm==0.2.2
debugpy==1.8.1
decorator==5.1.1
defusedxml==0.7.1
docutils==0.20.1
doxypypy==0.8.8.7
doxyqml==0.5.3
executing==2.0.1
fastjsonschema==2.19.1
flatbuffers==24.3.25
flit==3.9.0
flit_core==3.9.0
fqdn==1.5.1
gast==0.5.4
gi-docgen==2023.3
google-pasta==0.2.0
grpcio==1.62.1
h11==0.14.0
h5py==3.10.0
httpcore==1.0.5
httpx==0.27.0
idna==3.6
install==1.3.5
ipykernel==6.29.4
ipython==8.23.0
ipywidgets==8.1.2
isoduration==20.11.0
jedi==0.19.1
Jinja2==3.1.3
json5==0.9.24
jsonpointer==2.4
jsonschema==4.21.1
jsonschema-specifications==2023.12.1
jupyter==1.0.0
jupyter-console==6.6.3
jupyter-events==0.10.0
jupyter-lsp==2.2.4
jupyter_client==8.6.1
jupyter_core==5.7.2
jupyter_server==2.13.0
jupyter_server_terminals==0.5.3
jupyterlab==4.1.5
jupyterlab_pygments==0.3.0
jupyterlab_server==2.25.4
jupyterlab_widgets==3.0.10
kapidox==5.106.0
keras==3.1.1
libclang==18.1.1
lxml==5.1.1
Mako==1.3.2
Markdown==3.6
markdown-it-py==3.0.0
MarkupSafe==2.1.5
matplotlib-inline==0.1.6
mdurl==0.1.2
meson==1.4.0
mistune==3.0.2
ml-dtypes==0.3.2
namex==0.0.7
nbclient==0.10.0
nbconvert==7.16.3
nbformat==5.10.3
nest-asyncio==1.6.0
networkx==3.2.1
notebook==7.1.2
notebook_shim==0.2.4
numpy==1.26.4
opt-einsum==3.3.0
optree==0.11.0
overrides==7.7.0
packaging==24.0
paddlepaddle==2.6.1
pandocfilters==1.5.1
parso==0.8.3
pexpect==4.9.0
pillow==10.2.0
platformdirs==4.2.0
prometheus_client==0.20.0
prompt-toolkit==3.0.43
protobuf==4.25.3
psutil==5.9.8
ptyprocess==0.7.0
pure-eval==0.2.2
pycairo==1.26.0
pycparser==2.22
Pygments==2.17.2
PyGObject==3.48.1
python-dateutil==2.9.0.post0
python-json-logger==2.0.7
PyYAML==6.0.1
pyzmq==25.1.2
qtconsole==5.5.1
QtPy==2.4.1
referencing==0.34.0
requests==2.31.0
rfc3339-validator==0.1.4
rfc3986-validator==0.1.1
rich==13.7.1
rpds-py==0.18.0
Send2Trash==1.8.2
setools==4.5.0.dev0
six==1.16.0
smartypants==2.0.1
sniffio==1.3.1
soupsieve==2.5
stack-data==0.6.3
tensorboard==2.16.2
tensorboard-data-server==0.7.2
tensorflow==2.16.1
tensorflow-io-gcs-filesystem==0.36.0
termcolor==2.4.0
terminado==0.18.1
tinycss2==1.2.1
tomli_w==1.0.0
tornado==6.4
traitlets==5.14.2
types-python-dateutil==2.9.0.20240316
typing_extensions==4.10.0
typogrify==2.0.7
ufw==0.36.1
uri-template==1.3.0
urllib3==2.2.1
wcwidth==0.2.13
webcolors==1.13
webencodings==0.5.1
websocket-client==1.7.0
Werkzeug==3.0.1
widgetsnbextension==4.0.10
wrapt==1.16.0
SigureMo commented 7 months ago

paddle.jit.save(model, 'runs/mnist_experiment111/model', [paddle.static.InputSpec([-1, 1, 28, 28])])

输入不是 [16, 784] 吗?为什么这里使用 [-1, 1, 28, 28] 呢?使用 [-1, 784] 就可以了

wang-qa commented 7 months ago

paddle.jit.save(model, 'runs/mnist_experiment111/model', [paddle.static.InputSpec([-1, 1, 28, 28])])

输入不是 [16, 784] 吗?为什么这里使用 [-1, 1, 28, 28] 呢?使用 [-1, 784] 就可以了

代码未做任何修改,直接用示例运行,就是这个错误

SigureMo commented 7 months ago

直接用示例运行

哪里的示例呢?官网吗?具体链接是?

wang-qa commented 7 months ago

直接用示例运行

哪里的示例呢?官网吗?具体链接是? https://www.paddlepaddle.org.cn/tutorials/projectdetail/3991815

更正一下code

#!/usr/bin/env python3
# -*- coding:utf-8 -*-

# @Description: 动态图转静态图模型保存

##############################################################################
""" 定义网络结构 """
import paddle
# paddle.device.set_device('cpu')

paddle.utils.run_check()
# paddle.disable_signal_handler()

# 定义手写数字识别模型
import paddle

# 定义手写数字识别模型
class MNIST(paddle.nn.Layer):
    def __init__(self):
        super(MNIST, self).__init__()

        # 定义一层全连接层,输出维度是1
        self.fc = paddle.nn.Linear(in_features=784, out_features=10)

    # 定义网络结构的前向计算过程
    @paddle.jit.to_static  # 添加装饰器,使动态图网络结构在静态图模式下运行
    def forward(self, inputs):
        outputs = self.fc(inputs)
        return outputs

##############################################################################
""" 训练并保存 """
import paddle
import paddle.nn.functional as F

# 确保从paddle.vision.datasets.MNIST中加载的图像数据是np.ndarray类型
paddle.vision.set_image_backend('cv2')

# 图像归一化函数,将数据范围为[0, 255]的图像归一化到[-1, 1]
def norm_img(img):
    batch_size = img.shape[0]
    # 归一化图像数据
    img = img / 127.5 - 1
    # 将图像形式reshape为[batch_size, 784]
    img = paddle.reshape(img, [batch_size, 784])

    return img

def train(model):
    model.train()
    # 加载训练集 batch_size 设为 16
    train_loader = paddle.io.DataLoader(paddle.vision.datasets.MNIST(mode='train'),
                                        batch_size=16,
                                        shuffle=True)
    opt = paddle.optimizer.SGD(learning_rate=0.001, parameters=model.parameters())
    EPOCH_NUM = 10
    for epoch in range(EPOCH_NUM):
        for batch_id, data in enumerate(train_loader()):
            images = norm_img(data[0]).astype('float32')
            labels = data[1].astype('int64')

            # 前向计算的过程
            predicts = model(images)

            # 计算损失
            loss = F.cross_entropy(predicts, labels)
            avg_loss = paddle.mean(loss)

            # 每训练了1000批次的数据,打印下当前Loss的情况
            if batch_id % 1000 == 0:
                print("epoch_id: {}, batch_id: {}, loss is: {}".format(epoch, batch_id, avg_loss.numpy()))

            # 后向传播,更新参数的过程
            avg_loss.backward()
            opt.step()
            opt.clear_grad()

model = MNIST()

train(model)

paddle.save(model.state_dict(), './mnist.pdparams')
print("==>Trained model saved in ./mnist.pdparams")

##############################################################################
""" 部署调用 """
# save inference model
from paddle.static import InputSpec
# 加载训练好的模型参数
state_dict = paddle.load("./mnist.pdparams")
# 将训练好的参数读取到网络中
model.set_state_dict(state_dict)
# 设置模型为评估模式
model.eval()

# 保存inference模型
paddle.jit.save(
    layer=model,
    path="inference/mnist",
    input_spec=[InputSpec(shape=[None, 784], dtype='float32')])

print("==>Inference model saved in inference/mnist.")

import numpy as np
import paddle
import paddle.nn.functional as F
# 确保从paddle.vision.datasets.MNIST中加载的图像数据是np.ndarray类型
paddle.vision.set_image_backend('cv2')

# 读取mnist测试数据,获取第一个数据
mnist_test = paddle.vision.datasets.MNIST(mode='test')
test_image, label = mnist_test[0]
# 获取读取到的图像的数字标签
print("The label of readed image is : ", label)

# 将测试图像数据转换为tensor,并reshape为[1, 784]
test_image = paddle.reshape(paddle.to_tensor(test_image), [1, 784])
# 然后执行图像归一化
test_image = norm_img(test_image)
# 加载保存的模型
loaded_model = paddle.jit.load("./inference/mnist")
# 利用加载的模型执行预测
preds = loaded_model(test_image)
pred_label = paddle.argmax(preds)
# 打印预测结果
print("The predicted label is : ", pred_label.numpy())

image

debug run 8629cd79-c1ad-46e5-aadd-738ae81d2522

SigureMo commented 7 months ago

更新后的代码可以跑啊,无论 CPU 还是 GPU

wang-qa commented 7 months ago

更新后的代码可以跑啊,无论 CPU 还是 GPU

估计跟电脑有关 我是安装的cpu版本,debug可以跑,正常执行就报错

zhwesky2010 commented 7 months ago

@wang-qa 你好,更新后的代码,我这边也是可以直接运行的,应该是你这边机器有问题或者之前代码有问题导致的。使用新的代码运行就可以。 infoflow 2024-04-11 16-05-10