[BUG] - segmentation fault occur when follow the tutorial

statfs commented 1 year ago

Add Link

https://pytorch.org/tutorials/recipes/intel_neural_compressor_for_pytorch.html

Describe the bug

Follow the tutorial, I write this code, and find that the segmentation fault occur when the tensor(and the GraphModule model) moved to cuda:0


# main.py
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

# LeNet Model definition
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(320, 50)
        self.fc1_drop = nn.Dropout()
        self.fc2 = nn.Linear(50, 10)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = x.reshape(-1, 320)
        x = F.relu(self.fc1(x))
        x = self.fc1_drop(x)
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

def save_quant():
    model = Net()
    model.load_state_dict(torch.load('./lenet_mnist_model.pth'))
    print(model)
    model = model.to("cuda:0")
    model.eval()
    from torchvision import datasets, transforms
    test_loader = torch.utils.data.DataLoader(
        datasets.MNIST('./data', train=False, download=True,
            transform=transforms.Compose([ transforms.ToTensor(), ])
            ),
        batch_size=1)

    # launch code for Intel® Neural Compressor
    from neural_compressor.experimental import Quantization
    quantizer = Quantization("./conf.yaml")
    quantizer.model = model
    quantizer.calib_dataloader = test_loader
    quantizer.eval_dataloader = test_loader
    q_model = quantizer()
    # q_model = q_model.to("cuda:0")
    q_model.save('./output')

def good_quant():
    from neural_compressor.utils.pytorch import load
    model = Net()
    int8_model = load('./output', model)
    first = np.random.rand(1,1,28,28).astype(np.float32)
    first = torch.from_numpy(first)
    print(f"when tensor on={first.device}")
    x=int8_model(first)
    print(x)

def bad_quant():
    from neural_compressor.utils.pytorch import load
    model = Net()
    device = "cuda:0"
    int8_model = load('./output', model).to(device)
    first = np.random.rand(1,1,28,28).astype(np.float32)
    first = torch.from_numpy(first).to(device)
    print(f"when tensor on={first.device}")
    x=int8_model(first)
    print(x)

save_quant()
good_quant()
bad_quant()

and the yaml file:

# conf.yaml
version: 2.0

model:
    name: LeNet
    framework: pytorch_fx

evaluation:
    accuracy:
        metric:
            topk: 1

tuning:
  accuracy_criterion:
    relative: 0.01

Expected Result: the quantized model run smoothly and efficiently on GPU

Actual Result: segmentation fault

Segmentation fault (core dumped)

Describe your environment

Platform(Linux)
CUDA(11, 12)
2.0.1+cu117

cc @ezyang @gchanan @zou3519 @kadeng @frank-wei @jgong5 @mingfeima @XiaobingSuper @sanchitintel @ashokei @jingxu10 @jerryzh168 @jianyuh @raghuramank100 @jamesr66a @vkuzo @Xia-Weiwen @leslie-fang-intel

colesbury commented 8 months ago

Marked as high priority due to segmentation fault

HDCharles commented 8 months ago

looks like @ftian1 @holly1238 @yqhu wrote/landed the tutorial, can one of you guys take a look at this?

The pytorch quantization oncall is listed for this issue but the tutorial is for an external repo and isn't actually using any of the pytorch quantization APIs.

malfet commented 8 months ago

Transferring back to tutorials, though looks like the crash comes from neural_compressor library

ftian1 commented 8 months ago

thanks for raising this bug. I am checking it and will fix asap.

pytorch / tutorials