mggg / maup

The geospatial toolkit for redistricting data.
https://maup.readthedocs.io/en/latest/
MIT License
65 stars 23 forks source link

Example in README loses votes and contains non-explicit assumptions #34

Closed InnovativeInventor closed 3 years ago

InnovativeInventor commented 3 years ago
import geopandas as gpd
import geopandas
import maup

blocks = geopandas.read_file("zip://./examples/blocks.zip")
precincts = geopandas.read_file("zip://./examples/precincts.zip")
districts = geopandas.read_file("zip://./examples/districts.zip")

election_columns = ["PRES16D", "PRES16R"]

assignment = maup.assign(blocks, precincts)
weights = blocks.TOTPOP / assignment.map(precincts.TOTPOP)
prorated = maup.prorate(assignment, precincts[election_columns], weights)
blocks[election_columns] = prorated

print(blocks[election_columns].sum())
print(precincts[election_columns].sum())

forcing precinct TOTPOP to equal block TOTPOP doesn't resovle the issue:

import geopandas as gpd
import geopandas
import maup

blocks = geopandas.read_file("zip://./examples/blocks.zip")
precincts = geopandas.read_file("zip://./examples/precincts.zip")
districts = geopandas.read_file("zip://./examples/districts.zip")
precincts["TOTPOP"] *= blocks.TOTPOP.sum()/precincts.TOTPOP.sum()

assert precincts["TOTPOP"].sum() == blocks["TOTPOP"].sum()
election_columns = ["PRES16D", "PRES16R"]

assignment = maup.assign(blocks, precincts)
weights = blocks.TOTPOP / assignment.map(precincts.TOTPOP)
prorated = maup.prorate(assignment, precincts[election_columns], weights)
blocks[election_columns] = prorated

print(blocks[election_columns].sum())
print(precincts[election_columns].sum())
InnovativeInventor commented 3 years ago

Note: forcing the source and target geometries to be cropped to each other like so does not resolve the issue.

precincts["geometry"] = maup.crop_to(precincts, blocks)
blocks["geometry"] = maup.crop_to(blocks, precincts)

Additionally, cropping then dropping the cropped out blocks also does not resolve the issue:

blocks = blocks.drop(blocks[blocks.area==0].index)
amybecker commented 3 years ago

This works. You need to BOTH drop the empty geometries and weight with respect to assignment.

import geopandas as gpd
import geopandas
import maup
import math

blocks = geopandas.read_file("zip://./examples/blocks.zip")
precincts = geopandas.read_file("zip://./examples/precincts.zip")
districts = geopandas.read_file("zip://./examples/districts.zip")

precincts["geometry"] = maup.crop_to(precincts, blocks)
blocks["geometry"] = maup.crop_to(blocks, precincts)
blocks = blocks[~blocks["geometry"].is_empty]
election_columns = ["PRES16D", "PRES16R"]

assignment = maup.assign(blocks, precincts)
blocks['assignment'] = assignment

weights = blocks.TOTPOP / assignment.map(blocks.TOTPOP.groupby(assignment).sum())
assert math.isclose(sum(weights),len(precincts))
prorated = maup.prorate(assignment, precincts[election_columns], weights)
blocks[election_columns] = prorated

print(blocks[election_columns].sum())
print(precincts[election_columns].sum())