justmars / statute-patterns

Construction of regex patterns for Philippine statutory law.
https://lawsql.com
BSD 3-Clause "New" or "Revised" License
0 stars 0 forks source link

Count number of unique statutes per text #3

Closed justmars closed 1 year ago

justmars commented 1 year ago
class StatuteCounted(NamedTuple):
    statute: StatuteBase
    mentions: int

    @classmethod
    def extract_statutes(cls, content: str) -> Iterator[str]:
        """Extract iterator of statute serial texts so that these can be counted via `Counter()`"""
        if statutes := StatuteID.get_statute_matches(content):
            for s in statutes:
                if isinstance(s, StatuteBase):  # cat + idx exists
                    if s.is_single:
                        yield f"{s.statute_category} {s.statute_serial_id}"
                    else:  # more than one StatuteBase should exist
                        for dx in s.deconstructed_identifiers:
                            if isinstance(dx, StatuteBase):  # cat + idx exists
                                yield f"{dx.statute_category} {dx.statute_serial_id}"

    @classmethod
    def count_statutes(cls, content: str):
        statutes_detected = list(cls.extract_statutes(content))
        counted = Counter(statutes_detected)
        unique_statutes = iter(set(statutes_detected))
        for i in unique_statutes:
            mentions = counted[i]
            elements = i.split()
            cat = StatuteCategory(elements[0])
            idx = elements[1]
            try:
                base = StatuteBase(statute_category=cat, statute_serial_id=idx)
                yield cls(statute=base, mentions=mentions)
            except Exception as e:
                msg = f"Bad {elements=} in forming statute base; see {e=}"
                logger.error(msg)
justmars commented 1 year ago

See addition here: https://github.com/justmars/lawrgx/blob/7b8ed0012e713282fda83db60a32b0300e0627cb/lawrgx/rules.py#L67