rdatools / rdabase

Redistricting analytics data & shared code
MIT License
0 stars 0 forks source link

Normalizing metrics for scatter plots so NE is always “better” #75

Open alecramsay opened 1 month ago

alecramsay commented 1 month ago

Some thoughts:

### RATE SPLITTING ###

# Rating county- & district-splitting are inverses of each other.
# Sometimes counties >> districts sometimes counties << districts.

MAX_SPLITTING: float = 1.20  # 90–10 => 95–5 splits
MIN_SPLITTING: float = 1.00  # No splits still vs. 97–03 splits
WORST_MULTIPLIER: float = 1.33  # 1/3 bigger

def best_target(n: float, m: float) -> float:
    """=LAMBDA(n, m, most, least, (((MIN(n, m) - 1) / MAX(n, m)) * most) + ((1 - ((MIN(n, m) - 1) / MAX(n, m))) * least))"""

    more: float = max(n, m)
    less: float = min(n, m)

    w1: float = (less - 1) / more
    w2: float = 1 - w1

    target: float = (w1 * MAX_SPLITTING) + (w2 * MIN_SPLITTING)

    return target

def rate_county_splitting(
    raw_county_splitting: float, n_counties: int, n_districts: int
) -> int:
    _normalizer: Normalizer = Normalizer(raw_county_splitting)

    # The practical ideal raw measurement depends on the # of counties & districts
    best: float = (
        best_target(n_counties, n_districts)
        if (n_counties > n_districts)
        else MAX_SPLITTING
    )
    worst: float = best * WORST_MULTIPLIER

    _normalizer.clip(best, worst)
    _normalizer.unitize(best, worst)
    _normalizer.invert()
    _normalizer.rescale()

    # 09-07-21 - Preserve max value (100) for only when no counties are split
    rating: int = _normalizer.normalized_num
    if (rating == 100) and (raw_county_splitting > 1.0):
        rating = 100 - 1

    return rating