Unable to run pre-trained model on GTX 1050Ti

Hello. I get the error: CUDA out of memory. Tried to allocate 352.00 MiB (GPU 0; 4.00 GiB total capacity; 2.04 GiB already allocated; 302.80 MiB free; 593.57 MiB cached) when I try to predict disparities. Is there any change I can make to your code, to make it possible to predict disparities? Full error log:

C:\Users\sarim\AppData\Local\Programs\Python\Python37\lib\site-packages\torch\nn\functional.py:2457: UserWarning: nn.functional.upsample is deprecated. Use nn.functional.interpolate instead.
  warnings.warn("nn.functional.upsample is deprecated. Use nn.functional.interpolate instead.")
C:\Users\sarim\AppData\Local\Programs\Python\Python37\lib\site-packages\torch\nn\functional.py:2539: UserWarning: Default upsampling behavior when mode=bilinear is changed to align_corners=False since 0.4.0. Please specify align_corners=True if the old behavior is desired. See the documentation of nn.Upsample for details.
  "See the documentation of nn.Upsample for details.".format(mode))
C:\Users\sarim\AppData\Local\Programs\Python\Python37\lib\site-packages\torch\nn\functional.py:2539: UserWarning: Default upsampling behavior when mode=trilinear is changed to align_corners=False since 0.4.0. Please specify align_corners=True if the old behavior is desired. See the documentation of nn.Upsample for details.
  "See the documentation of nn.Upsample for details.".format(mode))
Traceback (most recent call last):
  File "C:/Users/sarim/PycharmProjects/thesis/DPC/psmnet/submission.py", line 125, in <module>
    main()
  File "C:/Users/sarim/PycharmProjects/thesis/DPC/psmnet/submission.py", line 112, in main
    pred_disp = test(imgL,imgR)
  File "C:/Users/sarim/PycharmProjects/thesis/DPC/psmnet/submission.py", line 83, in test
    output = model(imgL,imgR)
  File "C:\Users\sarim\AppData\Local\Programs\Python\Python37\lib\site-packages\torch\nn\modules\module.py", line 493, in __call__
    result = self.forward(*input, **kwargs)
  File "C:\Users\sarim\AppData\Local\Programs\Python\Python37\lib\site-packages\torch\nn\parallel\data_parallel.py", line 150, in forward
    return self.module(*inputs[0], **kwargs[0])
  File "C:\Users\sarim\AppData\Local\Programs\Python\Python37\lib\site-packages\torch\nn\modules\module.py", line 493, in __call__
    result = self.forward(*input, **kwargs)
  File "C:\Users\sarim\PycharmProjects\thesis\DPC\psmnet\models\stackhourglass.py", line 159, in forward
    pred3 = disparityregression(self.maxdisp)(pred3)
  File "C:\Users\sarim\AppData\Local\Programs\Python\Python37\lib\site-packages\torch\nn\modules\module.py", line 493, in __call__
    result = self.forward(*input, **kwargs)
  File "C:\Users\sarim\PycharmProjects\thesis\DPC\psmnet\models\submodule.py", line 62, in forward
    disp = self.disp.repeat(x.size()[0],1,x.size()[2],x.size()[3])
RuntimeError: CUDA out of memory. Tried to allocate 352.00 MiB (GPU 0; 4.00 GiB total capacity; 2.04 GiB already allocated; 302.80 MiB free; 593.57 MiB cached)

Process finished with exit code 1

Edit: I got it to work by changing your code in stackhourglass.py (in class PSMNet):

    def forward(self, left, right):

        refimg_fea = self.feature_extraction(left)
        targetimg_fea = self.feature_extraction(right)

        # matching
        cost = Variable(
            torch.FloatTensor(refimg_fea.size()[0], refimg_fea.size()[1] * 2, self.maxdisp // 4, refimg_fea.size()[2],
                              refimg_fea.size()[3]).zero_()).cuda()

        for i in range(self.maxdisp // 4):
            if i > 0:
                cost[:, :refimg_fea.size()[1], i, :, i:] = refimg_fea[:, :, :, i:]
                cost[:, refimg_fea.size()[1]:, i, :, i:] = targetimg_fea[:, :, :, :-i]
            else:
                cost[:, :refimg_fea.size()[1], i, :, :] = refimg_fea
                cost[:, refimg_fea.size()[1]:, i, :, :] = targetimg_fea
        cost = cost.contiguous()
        del refimg_fea
        del targetimg_fea

        cost0 = self.dres0(cost)
        del cost
        cost0 = self.dres1(cost0) + cost0

        out1, pre1, post1 = self.dres2(cost0, None, None)
        out1 = out1 + cost0

        out2, pre2, post2 = self.dres3(out1, pre1, post1)
        del post1
        del pre2
        out2 = out2 + cost0

        out3, pre3, post3 = self.dres4(out2, pre1, post2)
        del post3
        del pre3
        del post2
        del pre1
        out3 = out3 + cost0
        del cost0

        cost1 = self.classif1(out1)
        del out1
        cost2 = self.classif2(out2) + cost1
        del out2
        cost3 = self.classif3(out3) + cost2
        del out3

        if self.training:
            cost1 = F.upsample(cost1, [self.maxdisp, left.size()[2], left.size()[3]], mode='trilinear')
            cost2 = F.upsample(cost2, [self.maxdisp, left.size()[2], left.size()[3]], mode='trilinear')

            cost1 = torch.squeeze(cost1, 1)
            pred1 = F.softmax(cost1, dim=1)
            pred1 = disparityregression(self.maxdisp)(pred1)

            cost2 = torch.squeeze(cost2, 1)
            pred2 = F.softmax(cost2, dim=1)
            pred2 = disparityregression(self.maxdisp)(pred2)

        del cost1
        del cost2
        cost3 = F.upsample(cost3, [self.maxdisp, left.size()[2], left.size()[3]], mode='trilinear')
        cost3 = torch.squeeze(cost3, 1)
        pred3 = F.softmax(cost3, dim=1)
        del cost3
        pred3 = disparityregression(self.maxdisp)(pred3)

        if self.training:
            return pred1, pred2, pred3
        else:
            return pred3

mileyan / pseudo_lidar

Unable to run pre-trained model on GTX 1050Ti #23