Open DrPepper8888 opened 3 months ago
混淆矩阵(confusion matrix)、分类报告(classification report)以及ROC AUC评估
from pyspark.ml.evaluation import MulticlassClassificationEvaluator # 假设predictions是模型的预测结果DataFrame evaluator = MulticlassClassificationEvaluator(labelCol="trueLabel", predictionCol="prediction", metricName="accuracy") accuracy = evaluator.evaluate(predictions) # 获取每个类别的计数 confusion_matrix = predictions.groupBy("trueLabel", "prediction").count() # 打印混淆矩阵 confusion_matrix.show(truncate=False)
from pyspark.sql.functions import udf from pyspark.sql.types import DoubleType # 定义UDF来计算精确度、召回率和F1分数 def classification_report(tp, tn, fp, fn): precision = tp / (tp + fp) if (tp + fp) > 0 else 0 recall = tp / (tp + fn) if (tp + fn) > 0 else 0 f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0 return [precision, recall, f1] report_udf = udf(classification_report, [DoubleType(), DoubleType(), DoubleType(), DoubleType()]) # 计算并添加精确度、召回率和F1分数到DataFrame predictions_with_metrics = predictions.withColumn("metrics", report_udd(*predictions_with_features.select("TP", "TN", "FP", "FN").collect())) # 打印分类报告 predictions_with_metrics.select("label", "metrics").show(truncate=False)
from pyspark.ml.evaluation import BinaryClassificationEvaluator # 假设binary_predictions是二分类模型的预测结果DataFrame evaluator = BinaryClassificationEvaluator(rawPredictionCol="prediction", labelCol="trueLabel", metricName="areaUnderROC") auc = evaluator.evaluate(binary_predictions) # 打印ROC AUC值 print(f"ROC AUC: {auc}")
混淆矩阵(confusion matrix)、分类报告(classification report)以及ROC AUC评估
1. 混淆矩阵(Confusion Matrix)
2. 分类报告(Classification Report)
3. ROC AUC