def cond(s: String): Boolean = {
s == null || s.trim.isEmpty
}
def transform(str: String): String = {
if (cond(str)) {
null
} else {
if (str.toLowerCase.startsWith("@@@@")) {
"######" + str.substring("@@@@".length)
} else if (str.toLowerCase.startsWith("######")) {
"@@@@" + str.substring("######".length)
} else {
str
}
}
}
val u = makeUdf((x: String, y: String, z: Boolean) => {
var r = new mutable.ArrayBuffer[String]()
r = r :+ x
if (!cond(y)) {
r = r :+ y
if (z) {
r = r :+ transform(y)
}
}
if (z) {
r = r :+ transform(x)
}
r.distinct.toArray
})
The exception is as follows for all of the above, just different variables. It is the comparison of boolean vs int in all cases.
OpcodeSuite:
- not boolean *** FAILED ***
-
org.apache.spark.sql.catalyst.ExtendedAnalysisException: [DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES] Cannot resolve "(value = 0)" due to data type mismatch: the left and right operands of the binary operator have incompatible types ("BOOLEAN" and "INT"). SQLSTATE: 42K09;
'Project [value#1, NOT NOT (value#1 = 0) AS new#10]
+- Repartition 1, true
+- LocalRelation [value#1]
at org.apache.spark.sql.catalyst.analysis.package$AnalysisErrorAt.dataTypeMismatch(package.scala:77)
at org.apache.spark.sql.catalyst.analysis.package$AnalysisErrorAt.dataTypeMismatch(package.scala:70)
at org.apache.spark.sql.catalyst.analysis.CheckAnalysis.$anonfun$checkAnalysis0$7(CheckAnalysis.scala:371)
at org.apache.spark.sql.catalyst.analysis.CheckAnalysis.$anonfun$checkAnalysis0$7$adapted(CheckAnalysis.scala:353)
at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:252)
at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$foreachUp$1(TreeNode.scala:251)
at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$foreachUp$1$adapted(TreeNode.scala:251)
at scala.collection.immutable.Vector.foreach(Vector.scala:2124)
at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:251)
at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$foreachUp$1(TreeNode.scala:251)
..
Tests that are failing with ansi enabled in OpcodeSuite. I don't really know what is going on but it's likely a small issue:
val myudf: Boolean => Boolean = { x => !x }
val myudf: (Boolean, Boolean) => Boolean = { (a, b) => a && b }
val myudf: (Boolean, Boolean) => Boolean = { (a, b) => !(a && b) }
val myudf: (Boolean, Boolean) => Boolean = { (a, b) => a || b }
Conditional array buffer processing:
I am filing this as stated in this PR https://github.com/NVIDIA/spark-rapids/pull/11553. I am going to disable ansi for now in this test, but as we can see the planner will complain
The exception is as follows for all of the above, just different variables. It is the comparison of boolean vs int in all cases.