scalanlp / breeze

Breeze is a numerical processing library for Scala.
www.scalanlp.org
Apache License 2.0
3.44k stars 691 forks source link

matrix multiply fails on transposed matrix, but succeeds on copy of transposed matrix #850

Open philwalk opened 1 year ago

philwalk commented 1 year ago

This seems like a bug to me, hopefully it's not a misunderstanding on my part.

The expression on line 35 executes as expected, but the seemingly identical expression on line 38 causes a runtime crash. The main difference between the expression X.t and (X.t).copy AFAICT is the tranposed matrix has an offset of 1 column into a larger underlying array, due to the way it was created.

The behavior is the same with both scala 2.13.10 and scala 3.2.1. I have tested in the following environments:

The code:

import breeze.linalg._

object ExpressionCrash {

  def main(args: Array[String]): Unit = {
    val yAndX = testmatrix
    printf("yAndX: %d x %d\n%s\n", yAndX.rows, yAndX.cols, yAndX)
    val y = yAndX(::,0) // column zero is y
    printf("y: %d\n%s\n", y.size, y)
    val X = yAndX(::,1 until yAndX.cols) // remainder of columns are predictors
    printf("X:  %d x %d\n%s\n", X.rows, X.cols, X)

    val N = X.cols.toDouble
    val Iᴛ: DenseMatrix[Double] = DenseMatrix.eye[Double](X.rows)
    val Iɴ: DenseMatrix[Double] = DenseMatrix.eye[Double](X.cols)

    val ιᴛ: DenseVector[Double] = DenseVector.ones[Double](X.rows)
    val ιɴ: DenseVector[Double] = DenseVector.ones[Double](X.cols)

    val T = X.rows.toDouble
    val Jᴛ: DenseMatrix[Double] = Iᴛ - 1.0/T * ιᴛ * ιᴛ.t
    val Jɴ: DenseMatrix[Double] = Iɴ - 1.0/T * ιɴ * ιɴ.t

    val ȳ = ιᴛ.t * y/T
    printf("ȳ: %s\n", ȳ)

    var Wxz: DenseVector[Double] = DenseVector.zeros(X.cols)
    val Z = DenseVector.ones[Double](X.rows)

    printf(" Jɴ dims: %s x %s\n", Jɴ.rows,  Jɴ.cols)
    printf("X.t dims: %s x %s\n", X.t.rows, X.t.cols)
    printf(" Jᴛ dims: %s x %s\n", Jᴛ.rows,  Jᴛ.cols)
    printf("  Z dims: %s x %s\n", X.rows,   X.cols)

    Wxz = Jɴ * (X.t).copy * Jᴛ * Z                                   // <<<<<< this succeeds
    printf("Wxz: %s\n", Wxz.data.take(4).toSeq)

    Wxz = Jɴ * X.t        * Jᴛ * Z                                   // <<<<<< this crashes
    printf("Wxz: %s\n", Wxz.data.take(4).toSeq)
  }

  def J(T: Double): DenseMatrix[Double] = {
    val Iᴛ: DenseMatrix[Double] = DenseMatrix.eye[Double](T.toInt)
    val ιᴛ = DenseVector.ones[Double](T.toInt)
    val Jᴛ = Iᴛ - 1.0/T * ιᴛ * ιᴛ.t
    Jᴛ
  }

  lazy val (testrows, testcols) = (4, 3)
  lazy val testmatrix = new DenseMatrix(testcols, testrows, testdata.toArray).t
  lazy val testdata = Seq(
  67,  33,  64,
  62,  69,  78,
  76,  58,  93,
  57,  60,  60,
  ).map( _.toDouble ).toArray
}

Output with stack dump:

yAndX: 4 x 3
67.0  33.0  64.0
62.0  69.0  78.0
76.0  58.0  93.0
57.0  60.0  60.0
y: 4
DenseVector(67.0, 62.0, 76.0, 57.0)
X:  4 x 2
33.0  64.0
69.0  78.0
58.0  93.0
60.0  60.0
Nov 26, 2022 10:38:52 AM dev.ludovic.netlib.blas.InstanceBuilder initializeNative
WARNING: Failed to load implementation from:dev.ludovic.netlib.blas.JNIBLAS
return java 11 instance
ȳ: 65.5
 Jɴ dims: 2 x 2
X.t dims: 2 x 4
 Jᴛ dims: 4 x 4
  Z dims: 4 x 2
Wxz: ArraySeq(0.0, 0.0)
Exception in thread "main" java.lang.IndexOutOfBoundsException: Index 12 out of bounds for length 12
        at dev.ludovic.netlib.blas.AbstractBLAS.checkIndex(AbstractBLAS.java:51)
        at dev.ludovic.netlib.blas.AbstractBLAS.dgemm(AbstractBLAS.java:295)
        at breeze.linalg.operators.DenseMatrixMultiplyOps$impl_OpMulMatrix_DMD_DMD_eq_DMD$.apply(DenseMatrixOps.expanded.scala:192)
        at breeze.linalg.operators.DenseMatrixMultiplyOps$impl_OpMulMatrix_DMD_DMD_eq_DMD$.apply(DenseMatrixOps.expanded.scala:186)
        at breeze.linalg.ImmutableNumericOps.$times(NumericOps.scala:119)
        at breeze.linalg.ImmutableNumericOps.$times$(NumericOps.scala:27)
        at breeze.linalg.DenseMatrix.$times(DenseMatrix.scala:52)
        at ExpressionCrash$.main(ExpressionCrash.scala:38)
        at ExpressionCrash.main(ExrpressionCrash.scala)
        at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
        at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.base/java.lang.reflect.Method.invoke(Method.java:566)
        at dotty.tools.scripting.ScriptingDriver.compileAndRun(ScriptingDriver.scala:36)
        at dotty.tools.scripting.Main$.main(Main.scala:45)
        at dotty.tools.MainGenericRunner$.run$1(MainGenericRunner.scala:249)
        at dotty.tools.MainGenericRunner$.main(MainGenericRunner.scala:268)
        at dotty.tools.MainGenericRunner.main(MainGenericRunner.scala)

philwalk commented 1 year ago

Here's a somewhat cleaner version of the test script, showing exactly what the problem is:


import breeze.linalg._

object ExpressionCrash {

  def main(args: Array[String]): Unit = {
    val (matA, matB) = createTriggerData
    matA * matB.copy  // <<<<<< this succeeds
    matA * matB       // <<<<<< this crashes
  }

  def createTriggerData: (DenseMatrix[Double], DenseMatrix[Double]) = {
    val yAndX = testmatrix
    val y = yAndX(::,0) // column zero is y
    val X = yAndX(::,1 until yAndX.cols) // remainder of columns are predictors

    val Iᴛ: DenseMatrix[Double] = DenseMatrix.eye[Double](X.rows)
    val Iɴ: DenseMatrix[Double] = DenseMatrix.eye[Double](X.cols)

    val ιᴛ: DenseVector[Double] = DenseVector.ones[Double](X.rows)
    val ιɴ: DenseVector[Double] = DenseVector.ones[Double](X.cols)

    val T = X.rows.toDouble
    val Jᴛ: DenseMatrix[Double] = Iᴛ - 1.0/T * ιᴛ * ιᴛ.t
    val Jɴ: DenseMatrix[Double] = Iɴ - 1.0/T * ιɴ * ιɴ.t

    val ȳ = ιᴛ.t * y/T

    var Wxz: DenseVector[Double] = DenseVector.zeros(X.cols)
    val Z = DenseVector.ones[Double](X.rows)
    (Jɴ, X.t)
  }

  def J(T: Double): DenseMatrix[Double] = {
    val Iᴛ: DenseMatrix[Double] = DenseMatrix.eye[Double](T.toInt)
    val ιᴛ = DenseVector.ones[Double](T.toInt)
    val Jᴛ = Iᴛ - 1.0/T * ιᴛ * ιᴛ.t
    Jᴛ
  }

  lazy val (testrows, testcols) = (4, 3)
  lazy val testmatrix = new DenseMatrix(testcols, testrows, testdata.toArray).t
  lazy val testdata = Seq(
  67,  33,  64,
  62,  69,  78,
  76,  58,  93,
  57,  60,  60,
  ).map( _.toDouble ).toArray
}

And here's the somewhat cleaner crash dump:

$ jsrc/expressionCrash.sc
Dec 02, 2022 4:21:06 PM dev.ludovic.netlib.blas.InstanceBuilder initializeNative
WARNING: Failed to load implementation from:dev.ludovic.netlib.blas.JNIBLAS
Dec 02, 2022 4:21:06 PM dev.ludovic.netlib.blas.InstanceBuilder initializeJava
WARNING: Failed to load implementation from:dev.ludovic.netlib.blas.VectorBLAS
java.lang.IndexOutOfBoundsException: Index 12 out of bounds for length 12
        at dev.ludovic.netlib.blas.AbstractBLAS.checkIndex(AbstractBLAS.java:51)
        at dev.ludovic.netlib.blas.AbstractBLAS.dgemm(AbstractBLAS.java:295)
        at breeze.linalg.operators.DenseMatrixMultiplyOps$impl_OpMulMatrix_DMD_DMD_eq_DMD$.apply(DenseMatrixOps.expanded.scala:192)
        at breeze.linalg.operators.DenseMatrixMultiplyOps$impl_OpMulMatrix_DMD_DMD_eq_DMD$.apply(DenseMatrixOps.expanded.scala:186)
        at breeze.linalg.ImmutableNumericOps.$times(NumericOps.scala:119)
        at breeze.linalg.ImmutableNumericOps.$times$(NumericOps.scala:27)
        at breeze.linalg.DenseMatrix.$times(DenseMatrix.scala:52)
        at ExpressionCrash$.main(expressionCrash.sc:10)
        at ExpressionCrash.main(expressionCrash.sc)
        at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:78)
        at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.base/java.lang.reflect.Method.invoke(Method.java:567)
        at dotty.tools.scripting.ScriptingDriver.compileAndRun(ScriptingDriver.scala:33)
        at dotty.tools.scripting.Main$.process(Main.scala:45)
        at dotty.tools.MainGenericRunner$.run$1(MainGenericRunner.scala:250)
        at dotty.tools.MainGenericRunner$.process(MainGenericRunner.scala:270)
        at dotty.tools.MainGenericRunner$.main(MainGenericRunner.scala:281)
        at dotty.tools.MainGenericRunner.main(MainGenericRunner.scala)

BTW, it seems possible that this is a bug in dev.ludovic.netlib.blas.AbstractBLAS.checkIndex rather than breeze. On line 295, the method is called with index==12, which is clearly beyond the end of the array, but the actual expression would seem to only require 11. Perhaps offsetb is incorrectly being added (should it be offsetb -1?).

I opened an issue here: netlib issue 19