writer / replaCy

spaCy match and replace, maintaining conjugation
https://pypi.org/project/replacy/
MIT License
34 stars 8 forks source link

Tests Helper Module #94

Closed sam-writer closed 3 years ago

sam-writer commented 3 years ago

We use this pattern everywhere we use replaCy:

import os
from typing import Any, Dict, List, Tuple

import pytest
import spacy

from service import SomeCLass

nlp = spacy.load("en_core_web_sm")

r = SomeClass(nlp, otherkwargs)
r_matcher = r.replaCy

def generate_cases(
    match_dict: Dict[str, Any]
) -> Tuple[List[Tuple[str, str]], List[Tuple[str, str]]]:
    positives: List[Tuple[str, str]] = []
    negatives: List[Tuple[str, str]] = []
    for rule_name in match_dict:
        test_set = match_dict[rule_name]["test"]
        positive_cases = test_set["positive"]
        negative_cases = test_set["negative"]
        for positive_sent in positive_cases:
            positives.append((rule_name, positive_sent))
        for negative_sent in negative_cases:
            negatives.append((rule_name, negative_sent))
    return positives, negatives

positive_cases, negative_cases = generate_cases(r_matcher.match_dict)

@pytest.mark.parametrize("match_name,positive_sent", positive_cases)
def test_positive_detection(match_name: str, positive_sent: str):
    spans = r_matcher(positive_sent)
    spans_from_this_rule = list(filter(lambda s: s._.match_name == match_name, spans))
    print(match_name, positive_sent)
    assert len(spans) > 0, "Positive case should trigger rule"

@pytest.mark.parametrize("match_name,negative_sent", negative_cases)
def test_rules_negative_detection(match_name: str, negative_sent: str):
    spans = r_matcher(negative_sent)
    spans_from_this_rule = list(filter(lambda s: s._.match_name == match_name, spans))
    print(match_name, negative_sent)
    assert len(spans_from_this_rule) == 0, "Negative case should NOT trigger rule"

We should put this in replaCy in such a way that a user can do something like:

from replacy.testing import MatchTester

import spacy
from service import SomeClass

nlp = spacy.load("en_core_web_sm")
r = SomeClass(nlp, otherkwargs)
r_matcher = r.replaCy

def test_matches():
    MatchTester(r_matcher)
    assert True

I am not sure how to do this idiomatically... but in whatever way is idiomatic for PyTest, we should make it really easy to test all the positive and negative cases in the user's match dict