Closed chakravarthik27 closed 7 months ago
harness = Harness( task="question-answering", model={"model": "gpt-3.5-turbo-instruct", "hub": "openai"}, data=[ {"data_source": "BoolQ", "split": "test-tiny"}, {"data_source": "NQ-open", "split": "test-tiny"}, {"data_source": "MedQA", "split": "test-tiny"}, {"data_source": "LogiQA", "split": "test-tiny"}, ], config={ "model_parameters": { "max_tokens": 32, "temperature": 0.2, }, "evaluation": { "metric": "llm_eval", "model": "gpt-3.5-turbo-instruct", "hub": "openai", }, "tests": { "defaults": {"min_pass_rate": 0.65}, "robustness": { "uppercase": {"min_pass_rate": 0.75}, "add_typo": {"min_pass_rate": 0.75}, }, }, }, )
generate, run and report
harness.generate().run().report()
generate, run and report