ML model prediction quality test

model prediction quality test

benchmark test
differential test

benchmark test

class TestIrisPredictions(unittest.TestCase):
    def setUp(self):
        # We prepare both pipelines for use in the tests
        self.pipeline_v1 = SimplePipeline()
        self.pipeline_v2 = PipelineWithDataEngineering()
        self.pipeline_v1.run_pipeline()
        self.pipeline_v2.run_pipeline()

        # the benchmark is simply the same classification value for 
        # for every test entry
        self.benchmark_predictions = [1.0] * len(self.pipeline_v1.y_test)

    def test_accuracy_higher_than_benchmark(self):
        # Given
        benchmark_accuracy = accuracy_score(
            y_true=self.pipeline_v1.y_test,
            y_pred=self.benchmark_predictions)

        predictions = self.pipeline_v1.predict(self.pipeline_v1.X_test)

        # When
        actual_accuracy = accuracy_score(
            y_true=self.pipeline_v1.y_test,
            y_pred=predictions)

        # Then
        print(f'model accuracy: {actual_accuracy}, benchmark accuracy: {benchmark_accuracy}')
        self.assertTrue(actual_accuracy > benchmark_accuracy)

    def test_accuracy_compared_to_previous_version(self):
        # When
        v1_accuracy = self.pipeline_v1.get_accuracy()
        v2_accuracy = self.pipeline_v2.get_accuracy()
        print(f'pipeline v1 accuracy: {v1_accuracy}')
        print(f'pipeline v2 accuracy: {v2_accuracy}')

        # Then
        self.assertTrue(v2_accuracy >= v1_accuracy)

즉, 현재 버전이 벤치마크 스코어보다 정확도가 높은지 테스트
현재 버전이 이전 버전보다 정확도가 높은지 테스트

yeomko22 / TIL

ML model prediction quality test #122

model prediction quality test

benchmark test