/**
* This method converts query exception into error string, which then persist to query result
* metadata
*/
def processQueryException(ex: Exception): String = {
getRootCause(ex) match {
case r: ParseException =>
handleQueryException(r, "Syntax error")
case r: AmazonS3Exception =>
incrementCounter(MetricConstants.S3_ERR_CNT_METRIC)
handleQueryException(
r,
"Fail to read data from S3. Cause",
Some(r.getServiceName),
Some(r.getStatusCode))
case r: AWSGlueException =>
incrementCounter(MetricConstants.GLUE_ERR_CNT_METRIC)
// Redact Access denied in AWS Glue service
r match {
case accessDenied: AccessDeniedException =>
accessDenied.setErrorMessage(
"Access denied in AWS Glue service. Please check permissions.")
case _ => // No additional action for other types of AWSGlueException
}
handleQueryException(
r,
"Fail to read data from Glue. Cause",
Some(r.getServiceName),
Some(r.getStatusCode))
case r: AnalysisException =>
handleQueryException(r, "Fail to analyze query. Cause")
case r: SparkException =>
handleQueryException(r, "Spark exception. Cause")
case r: Exception =>
handleQueryException(r, "Fail to run query. Cause")
}
}
While the root cause of the exception is parsed and returned as part of the contract, the actual ex should be logged as well.
https://github.com/opensearch-project/opensearch-spark/blame/85fec7edbeef7d38de4bea9b824bfa41f10099fd/spark-sql-application/src/main/scala/org/apache/spark/sql/FlintJobExecutor.scala#L455C5-L455C17