BIDData / BIDMat

A CPU and GPU-accelerated matrix library for data mining
BSD 3-Clause "New" or "Revised" License
265 stars 73 forks source link

GMat memory allocation error #52

Closed fhorse closed 8 years ago

fhorse commented 8 years ago

I'm running into a error in GMat's allocation during a matrix multiply operation that's run in the bidmach/scala repl:

[user1 ~]$ bidmach
Loading /homes/user1/git/BIDMach/lib/bidmach_init.scala...
import BIDMat.{CMat, CSMat, DMat, Dict, FMat, FND, GMat, GDMat, GIMat, GLMat, GSMat, GSDMat, HMat, IDict, Image, IMat, LMat, Mat, SMat, SBMat, SDMat}
import BIDMat.MatFunctions._
import BIDMat.SciFunctions._
import BIDMat.Solvers._
import BIDMat.Plotting._
import BIDMach.Learner
import BIDMach.models.{FM, GLM, KMeans, KMeansw, LDA, LDAgibbs, Model, NMF, SFA, RandomForest}
import BIDMach.networks.DNN
import BIDMach.datasources.{DataSource, MatDS, FilesDS, SFilesDS}
import BIDMach.mixins.{CosineSim, Perplexity, Top, L1Regularizer, L2Regularizer}
import BIDMach.updaters.{ADAGrad, Batch, BatchNorm, IncMult, IncNorm, Telescoping}
import BIDMach.causal.IPTW
1 CUDA device found, CUDA version 7.0

Welcome to Scala version 2.11.2 (Java HotSpot(TM) 64-Bit Server VM, Java 1.7.0_80).
Type in expressions to have them evaluated.
Type :help for more information.

scala> val f = grand(256,40000)
f: BIDMat.GMat =
   0.88383   0.36261   0.67829   0.41245   0.98139   0.49479   0.73682  0.072556   0.28022   0.86777   0.63998   0.60432   0.34435   0.63388   0.34711   0.37487  0.027557  0.029789   0.74355   0.78470   0.63183  0.019272   0.48245...
   0.81384   0.74068   0.59507  0.056584  0.065755   0.73149   0.94198   0.16632   0.38067   0.63685   0.62824  0.063965   0.88211   0.41027   0.60684   0.16312   0.76326  0.054253   0.91629   0.22353   0.57114   0.19324   0.82873...
        ..        ..        ..        ..        ..        ..        ..        ..        ..        ..        ..        ..        ..        ..        ..        ..        ..        ..        ..        ..        ..        ..        ..

scala> val g = grand(256,80000)
g: BIDMat.GMat =
    0.38165  0.0094942    0.33977    0.52591    0.36995    0.93983    0.53091    0.21534    0.49424    0.17834    0.31689    0.11909    0.30771    0.26260    0.69630    0.36673    0.47742    0.33469   0.032663    0.82122...
    0.28144    0.34397    0.38882    0.94709    0.77737    0.72754   0.060895    0.39984    0.80299    0.10944    0.33119    0.64009    0.10358    0.35609    0.54367    0.44958    0.17401    0.52444   0.010265    0.95499...
         ..         ..         ..         ..         ..         ..         ..         ..         ..         ..         ..         ..         ..         ..         ..         ..         ..         ..         ..         ..

scala> val h = f ^* g
java.lang.IllegalArgumentException: Negative capacity: -84901888
  at java.nio.Buffer.<init>(Buffer.java:191)
  at java.nio.ByteBuffer.<init>(ByteBuffer.java:276)
  at java.nio.ByteBuffer.<init>(ByteBuffer.java:284)
  at java.nio.MappedByteBuffer.<init>(MappedByteBuffer.java:89)
  at java.nio.DirectByteBuffer.<init>(DirectByteBuffer.java:162)
  at jcuda.runtime.JCuda.cudaMallocHostNative(Native Method)
  at jcuda.runtime.JCuda.cudaMallocHost(JCuda.java:3902)
  at BIDMat.GMat$.apply(GMat.scala:1661)
  at BIDMat.GMat$.newOrCheckGMat(GMat.scala:2411)
  at BIDMat.GMat$.newOrCheckGMat(GMat.scala:2445)
  at BIDMat.GMat.GTMult(GMat.scala:658)
  at BIDMat.GMat.$up$times(GMat.scala:1141)
  ... 33 elided

But if the column dimensions are different, it works:

scala> val f = grand(256,38970)
f: BIDMat.GMat =
   0.88383   0.36261   0.67829   0.41245   0.98139   0.49479   0.73682  0.072556   0.28022   0.86777   0.63998   0.60432   0.34435   0.63388   0.34711   0.37487  0.027557  0.029789   0.74355   0.78470   0.63183  0.019272   0.48245...
   0.81384   0.74068   0.59507  0.056584  0.065755   0.73149   0.94198   0.16632   0.38067   0.63685   0.62824  0.063965   0.88211   0.41027   0.60684   0.16312   0.76326  0.054253   0.91629   0.22353   0.57114   0.19324   0.82873...
        ..        ..        ..        ..        ..        ..        ..        ..        ..        ..        ..        ..        ..        ..        ..        ..        ..        ..        ..        ..        ..        ..        ..

scala> val g = grand(256,176965)
g: BIDMat.GMat =
   0.34660   0.30266   0.70115   0.53805   0.47667   0.99830   0.72554   0.33751   0.22702   0.13273   0.40718   0.33229   0.30885   0.42985   0.74604   0.45333   0.60975   0.50820   0.68527   0.97660   0.59849   0.29411   0.39118...
   0.85486   0.14368   0.16848   0.82664   0.89762   0.60588   0.30885   0.46973   0.11753   0.47522   0.95762  0.047029   0.28322   0.90902   0.11110   0.60526   0.70672   0.17768   0.51984   0.25233   0.64633  0.097652   0.22967...
        ..        ..        ..        ..        ..        ..        ..        ..        ..        ..        ..        ..        ..        ..        ..        ..        ..        ..        ..        ..        ..        ..        ..

scala> val h = f ^* g
h: BIDMat.GMat =
  64.222  64.623  67.955  61.407  62.221  61.621  62.616  65.751  68.504  64.667  65.463  62.693  70.220  69.879  65.115  68.670  70.400  67.562  68.949  62.765  62.149  68.763  66.013  67.540  65.336  66.513  64.797  66.269...
  66.547  63.118  70.510  65.141  64.089  61.146  66.066  67.357  68.428  67.383  64.990  62.570  69.941  71.695  66.835  66.209  70.526  66.446  70.568  63.454  62.166  65.496  65.838  67.402  65.898  67.939  63.881  65.722...
      ..      ..      ..      ..      ..      ..      ..      ..      ..      ..      ..      ..      ..      ..      ..      ..      ..      ..      ..      ..      ..      ..      ..      ..      ..      ..      ..      ..

The relevant code is in GMat.scala, line 1661:

1654  def apply(nr:Int, nc:Int):GMat = {
1655    val retv = new GMat(nr, nc, new Pointer(), 1L*nr*nc) 
1656    if (Mat.debugMem) {
1657      println("GMat %d %d, %d %f" format (nr, nc, SciFunctions.getGPU, SciFunctions.GPUmem._1))
1658      if (nr*nc > Mat.debugMemThreshold) throw new RuntimeException("GMat alloc too large");
1659    }
1660    var err = if (1L*nr*nc*Sizeof.FLOAT > Mat.hostAllocSize) {
1661      cudaMallocHost(retv.data, 1L*nr*nc*Sizeof.FLOAT);
1662    } else {
1663      cudaMalloc(retv.data, 1L*nr*nc*Sizeof.FLOAT);
1664    }
1665    cudaDeviceSynchronize;
1666    if (err == 0) err = cudaGetLastError();
1667    if (err != 0) throw new RuntimeException("CUDA alloc failed " + cudaGetErrorString(err));
1668    retv
1669  } 

Notice that:

scala> 1L*40000*80000*4
res2: Long = 12800000000

scala> 1L*(40000*80000*4)
res3: Long = -84901888

The latter is the value that the cudaMallocHost() was complaining about. The version of the code I'm using is:

[user1 BIDMat]$ git log
commit 5c4f7ed945d7d7aac34f8fa2544258e40a7c1568
Author: John Canny <jfc@cs.berkeley.edu>
Date:   Tue Nov 17 16:26:12 2015 -0800

    added HDFSIO.scala
fhorse commented 8 years ago

BTW, my version of BIDMach/BIDMat is built on RHEL6.5, kernel version 2.6.32, cuda version 7.0, if that helps.

fhorse commented 8 years ago

OK. The problem is the dimension of the array being allocated. It exceeds the int limit in Buffer.java:

  189       Buffer(int mark, int pos, int lim, int cap) {       // package-private
  190           if (cap < 0)
  191               throw new IllegalArgumentException("Negative capacity: " + cap);
  192           this.capacity = cap;
  193           limit(lim);
  194           position(pos);
  195           if (mark >= 0) {
  196               if (mark > pos)
  197                   throw new IllegalArgumentException("mark > position: ("
  198                                                      + mark + " > " + pos + ")");
  199               this.mark = mark;
  200           }
  201       }