jpmml / jpmml-sparkml

Java library and command-line application for converting Apache Spark ML pipelines to PMML
GNU Affero General Public License v3.0
267 stars 80 forks source link

when SQLTransformer cast(id as double), run PMMLBuilder error. #62

Closed fttt closed 5 years ago

fttt commented 5 years ago

version info:spark2.4.0 jpmml 1.5.0

I want to change column type in pipelinemodel,its successful in pipelinemodel,but not in pmml build.Any one can help me.Thanks!

when I run code: import org.apache.spark.ml.{Pipeline, PipelineStage} import org.jpmml.sparkml.PMMLBuilder

import scala.collection.mutable.ListBuffer

object aaaa { def main(args: Array[String]): Unit = { import org.apache.spark.ml.feature.SQLTransformer import org.apache.spark.sql.SparkSession

val spark = SparkSession
  .builder()
  .master("local")
  .enableHiveSupport()
  .getOrCreate()

val df = spark.createDataFrame(
  Seq((0, 1.0, 3.0), (2, 2.0, Double.NaN))).toDF("id", "v1", "v2")
df.printSchema()
val sqlTrans = new SQLTransformer()
  .setStatement(
  "SELECT cast(id as Double)  FROM __THIS__")

val stagesArray = new ListBuffer[PipelineStage]()

stagesArray.append(sqlTrans)
val pp = new Pipeline().setStages(stagesArray.toArray)
val ppmodel = pp.fit(df)
ppmodel.transform(df).show()
  val schema = df.schema
  new PMMLBuilder(schema,ppmodel).build()

} } return error: Exception in thread "main" java.lang.IllegalArgumentException: cast(id#54 as double) at org.jpmml.sparkml.ExpressionTranslator.translateInternal(ExpressionTranslator.java:229) at org.jpmml.sparkml.ExpressionTranslator.translate(ExpressionTranslator.java:72) at org.jpmml.sparkml.ExpressionTranslator.translate(ExpressionTranslator.java:67) at org.jpmml.sparkml.feature.SQLTransformerConverter.encodeFeatures(SQLTransformerConverter.java:110) at org.jpmml.sparkml.feature.SQLTransformerConverter.registerFeatures(SQLTransformerConverter.java:141) at org.jpmml.sparkml.PMMLBuilder.build(PMMLBuilder.java:110) at aaaa$.main(aaaa.scala:33) at aaaa.main(aaaa.scala)