diegoalejogm / gans

Generative Adversarial Networks implemented in PyTorch and Tensorflow
MIT License
823 stars 352 forks source link

RuntimeError: Expected object of device type cuda but got device type cpu for argument #2 'mat1' in call to _th_addmm #12

Open tjf801 opened 4 years ago

tjf801 commented 4 years ago

I am trying to run this program, but it is returning RuntimeError: Expected object of device type cuda but got device type cpu for argument #2 'mat1' in call to _th_addmm.

Note: I am using the notebook file as an actual python file.

from IPython import display

from utils import Logger

import torch
from torch import nn, optim
from torch.autograd.variable import Variable
from torchvision import transforms, datasets

DATA_FOLDER = './torch_data/VGAN/MNIST'
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

print(torch.cuda.is_available(), device)

def mnist_data():
    compose = transforms.Compose([
        transforms.ToTensor(), transforms.Normalize([0.5], [0.5])])
    out_dir = '{}/dataset'.format(DATA_FOLDER)
    return datasets.MNIST(root=out_dir, train=True, transform=compose, download=True)

# Load data
data = mnist_data()
# Create loader with data, so that we can iterate over it
data_loader = torch.utils.data.DataLoader(data, batch_size=100, shuffle=True)
# Num batches
num_batches = len(data_loader)

class DiscriminatorNet(torch.nn.Module):
    """
    A three hidden-layer discriminative neural network
    """
    def __init__(self):
        super(DiscriminatorNet, self).__init__()
        n_features = 784
        n_out = 1

        self.hidden0 = nn.Sequential( 
            nn.Linear(n_features, 1024),
            nn.LeakyReLU(0.2),
            nn.Dropout(0.3)
        )
        self.hidden1 = nn.Sequential(
            nn.Linear(1024, 512),
            nn.LeakyReLU(0.2),
            nn.Dropout(0.3)
        )
        self.hidden2 = nn.Sequential(
            nn.Linear(512, 256),
            nn.LeakyReLU(0.2),
            nn.Dropout(0.3)
        )
        self.out = nn.Sequential(
            torch.nn.Linear(256, n_out),
            torch.nn.Sigmoid()
        )

    def forward(self, x):
        x = self.hidden0(x)
        x = self.hidden1(x)
        x = self.hidden2(x)
        x = self.out(x)
        return x

class GeneratorNet(torch.nn.Module):
    """
    A three hidden-layer generative neural network
    """
    def __init__(self):
        super(GeneratorNet, self).__init__()
        n_features = 100
        n_out = 784

        self.hidden0 = nn.Sequential(
            nn.Linear(n_features, 256),
            nn.LeakyReLU(0.2)
        )
        self.hidden1 = nn.Sequential(            
            nn.Linear(256, 512),
            nn.LeakyReLU(0.2)
        )
        self.hidden2 = nn.Sequential(
            nn.Linear(512, 1024),
            nn.LeakyReLU(0.2)
        )

        self.out = nn.Sequential(
            nn.Linear(1024, n_out),
            nn.Tanh()
        )

    def forward(self, x):
        x = self.hidden0(x)
        x = self.hidden1(x)
        x = self.hidden2(x)
        x = self.out(x)
        return x

def images_to_vectors(images):
    return images.view(images.size(0), 784)

def vectors_to_images(vectors):
    return vectors.view(vectors.size(0), 1, 28, 28)

def noise(size):
    '''
    Generates a 1-d vector of gaussian sampled random values
    '''
    n = Variable(torch.randn(size, 100))
    return n

discriminator = DiscriminatorNet()
generator = GeneratorNet()
if torch.cuda.is_available():
    discriminator.cuda()
    generator.cuda()

# Optimizers
d_optimizer = optim.Adam(discriminator.parameters(), lr=0.0002)
g_optimizer = optim.Adam(generator.parameters(), lr=0.0002)

# Loss function
loss = nn.BCELoss()

# Number of steps to apply to the discriminator
d_steps = 1  # In Goodfellow et. al 2014 this variable is assigned to 1
# Number of epochs
num_epochs = 200

def real_data_target(size):
    '''
    Tensor containing ones, with shape = size
    '''
    data = Variable(torch.ones(size, 1))
    if torch.cuda.is_available(): return data.cuda()
    return data

def fake_data_target(size):
    '''
    Tensor containing zeros, with shape = size
    '''
    data = Variable(torch.zeros(size, 1))
    if torch.cuda.is_available(): return data.cuda()
    return data

def train_discriminator(optimizer, real_data, fake_data):
    # Reset gradients
    optimizer.zero_grad()

    # 1.1 Train on Real Data
    prediction_real = discriminator(real_data)
    # Calculate error and backpropagate
    error_real = loss(prediction_real, real_data_target(real_data.size(0)))
    error_real.backward()

    # 1.2 Train on Fake Data
    prediction_fake = discriminator(fake_data)
    # Calculate error and backpropagate
    error_fake = loss(prediction_fake, fake_data_target(real_data.size(0)))
    error_fake.backward()

    # 1.3 Update weights with gradients
    optimizer.step()

    # Return error
    return error_real + error_fake, prediction_real, prediction_fake

def train_generator(optimizer, fake_data):
    # 2. Train Generator
    # Reset gradients
    optimizer.zero_grad()
    # Sample noise and generate fake data
    prediction = discriminator(fake_data)
    # Calculate error and backpropagate
    error = loss(prediction, real_data_target(prediction.size(0)))
    error.backward()
    # Update weights with gradients
    optimizer.step()
    # Return error
    return error

num_test_samples = 16
test_noise = noise(num_test_samples)

logger = Logger(model_name='VGAN', data_name='MNIST')

for epoch in range(num_epochs):
    for n_batch, (real_batch,_) in enumerate(data_loader):

        # 1. Train Discriminator
        real_data = Variable(images_to_vectors(real_batch))
        if torch.cuda.is_available(): real_data = real_data.cuda()
        # Generate fake data
        fake_data = generator(noise(real_data.size(0))).detach()
        # Train D
        d_error, d_pred_real, d_pred_fake = train_discriminator(d_optimizer,
                                                                real_data, fake_data)

        # 2. Train Generator
        # Generate fake data
        fake_data = generator(noise(real_batch.size(0)))
        # Train G
        g_error = train_generator(g_optimizer, fake_data)
        # Log error
        logger.log(d_error, g_error, epoch, n_batch, num_batches)

        # Display Progress
        if (n_batch) % 100 == 0:
            display.clear_output(True)
            # Display Images
            test_images = vectors_to_images(generator(test_noise)).data.cpu()
            logger.log_images(test_images, num_test_samples, epoch, n_batch, num_batches);
            # Display status Logs
            logger.display_status(
                epoch, num_epochs, n_batch, num_batches,
                d_error, g_error, d_pred_real, d_pred_fake
            )
        # Model Checkpoints
        logger.save_models(generator, discriminator, epoch)
tjf801 commented 4 years ago

Full stack traceback:

GAN>py MNIST_GAN.py

True cuda:0
Traceback (most recent call last):
  File "MNIST_GAN.py", line 195, in <module>
    fake_data = generator(noise(real_data.size(0))).detach()
  File "C:\Program Files\Python37\lib\site-packages\torch\nn\modules\module.py", line 541, in __call__
    result = self.forward(*input, **kwargs)
  File "MNIST_GAN.py", line 92, in forward
    x = self.hidden0(x)
  File "C:\Program Files\Python37\lib\site-packages\torch\nn\modules\module.py", line 541, in __call__
    result = self.forward(*input, **kwargs)
  File "C:\Program Files\Python37\lib\site-packages\torch\nn\modules\container.py", line 92, in forward
    input = module(input)
  File "C:\Program Files\Python37\lib\site-packages\torch\nn\modules\module.py", line 541, in __call__
    result = self.forward(*input, **kwargs)
  File "C:\Program Files\Python37\lib\site-packages\torch\nn\modules\linear.py", line 87, in forward
    return F.linear(input, self.weight, self.bias)
  File "C:\Program Files\Python37\lib\site-packages\torch\nn\functional.py", line 1370, in linear
    ret = torch.addmm(bias, input, weight.t())
RuntimeError: Expected object of device type cuda but got device type cpu for argument #2 'mat1' in call to _th_addmm
testpiano commented 4 years ago

I am running my code using (anaconda3) and (pycharm or jupyter notebook) under window 10 64bit Sometime in jupyter works but not in pycharm inspite of same code and same anaconda environment.

My current problem is introduced below:

I encountered the same problem, when I run my code with

model.cuda()

But running without it is no problem.

This mean I couldn't run my code with GPU, inspite of the

torch.cuda.is_available()

tells me True.

import torch

from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split

import pandas as pd

wine = load_wine()    
wine
pd.DataFrame(wine.data,columns = wine.feature_names)    

wine.target

wine_data = wine.data[0:130]   
wine_target = wine.target[0:130]  
train_X,test_X,train_Y,test_Y = train_test_split(wine_data,wine_target, test_size = 0.2)   

print(len(train_X))   
print(len(test_X))    

train_X = torch.from_numpy(train_X).float()  
train_Y = torch.from_numpy(train_Y).long()   

test_X = torch.from_numpy(test_X).float()
test_Y = torch.from_numpy(test_Y).long()

print(train_X.shape)
print(train_Y.shape)

train=TensorDataset(train_X,train_Y)   

print(train[0])

train_loader = DataLoader(train,batch_size=16,shuffle=True)  

class Net(nn.Module):   
    def __init__(self):
        super(Net,self).__init__()  
        self.fc1 = nn.Linear(13,96) 
        self.fc2 = nn.Linear(96,2)

    def forward(self,x):  
        x=F.relu(self.fc1(x))  
        x=self.fc2(x)           
        return F.log_softmax(x) 

model = Net()  
model.cuda()  #★★★★★

#print(torch.cuda.is_available())

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(),lr = 0.01)

for epoch in range(300):   
    total_loss = 0     
    for train_x,train_y in train_loader:   
        train_x,train_y = Variable(train_x),Variable(train_y) 
        optimizer.zero_grad() 
        output = model(train_x)
        loss = criterion(output, train_y)
        loss.backward()  
        optimizer.step()
        total_loss += loss.item() 

    if (epoch+1)%50==0:
        print(epoch+1, total_loss) 

test_x,test_y = Variable(test_X),Variable(test_Y)  
result = torch.max(model(test_x).data,1)[1]   
accuracy = sum(test_y.data.numpy()==result.numpy())/len(test_y.data.numpy()) 

print(accuracy)

Traceback (most recent call last): File "C:\Users\Anaconda3\envs\myconda\lib\site-packages\IPython\core\interactiveshell.py", line 3326, in run_code exec(code_obj, self.user_global_ns, self.user_ns) File "", line 1, in runfile('C:/Users/Desktop/python_test/test.py', wdir='C:/Users/Desktop/python_test') File "C:\Program Files\JetBrains\PyCharm Community Edition 2019.2.4\helpers\pydev_pydev_bundle\pydev_umd.py", line 197, in runfile pydev_imports.execfile(filename, global_vars, local_vars) # execute the script File "C:\Program Files\JetBrains\PyCharm Community Edition 2019.2.4\helpers\pydev_pydev_imps_pydev_execfile.py", line 18, in execfile exec(compile(contents+"\n", file, 'exec'), glob, loc) File "C:/Users/Desktop/python_test/test.py", line 66, in output = model(train_x)
File "C:\Users\Anaconda3\envs\myconda\lib\site-packages\torch\nn\modules\module.py", line 541, in call result = self.forward(*input, *kwargs) File "C:/Users/Desktop/python_test/test.py", line 49, in forward x=F.relu(self.fc1(x))
File "C:\Users\Anaconda3\envs\myconda\lib\site-packages\torch\nn\modules\module.py", line 541, in call result = self.forward(
input, **kwargs) File "C:\Users\Anaconda3\envs\myconda\lib\site-packages\torch\nn\modules\linear.py", line 87, in forward return F.linear(input, self.weight, self.bias) File "C:\Users\Anaconda3\envs\myconda\lib\site-packages\torch\nn\functional.py", line 1370, in linear ret = torch.addmm(bias, input, weight.t())

RuntimeError: Expected object of device type cuda but got device type cpu for argument #2 'mat1' in call to _th_addmm

I NEED SOLUTIONS!!! I HOPE SOMEBODY to HAVE SOLUTIONS

testpiano commented 4 years ago

Check List

Reassign the model and data to gpu train_x = train_x.cuda() # train_x.to(device) Reassign the model and data to cpu result = result.cpu()

Pratikrocks commented 4 years ago

I am also facing this same issue, does anyone have a solution to it??

liangjiubujiu commented 4 years ago

new_layer=new_layer.cuda()

codeprb commented 4 years ago

Did someone get a solution for this?

diegoalejogm commented 4 years ago

@liangjiubujiu Can you specify what you mean?

It used to work in previous versions of PyTorch. Can you send a fix or explain to me what you did to help the rest?

diegoalejogm commented 4 years ago

@codeprb @Pratikrocks I am checking the solution, you can try running it on CPU if you remove all the if torch.cuda.is_available(): <variable>.cuda() statements in the meantime

karam-nus commented 3 years ago

Each tensor (input/custom intermediate ones created for project specific purpose) should be moved to the device in use - as mentioned in a previous reply by @liangjiubujiu

new_layer=new_layer.cuda()

jf7peng commented 3 years ago

Hello, did u fix your error? I have the same problem when I used nn.linear(), i tested all my data that are exactly on my gpu while it raised Tensor for argument #2 'mat1' is on CPU, but expected it to be on GPU (while checking arguments for addmm)

ZhuoerFeng commented 3 years ago

The reason might be you are calculating Tensors from different devices. I got this problem when CrossEntropyLoss cpu and cuda Tensors :) use tensor1.cuda() to convert a cpu tensor to cuda tensor (you got to check which one is fetched from cpu device)