mansueto-institute / kblock

Python tools for generating street block delineations and geospatial attributes
GNU General Public License v3.0
1 stars 0 forks source link

starmap w/ generator function (memory feature enhancement) #116

Open nmarchio opened 1 year ago

manmartgarc commented 1 year ago

from multiprocessing import Pool
from typing import Generator, Tuple

import geopandas as gpd
import pandas as pd

def new_function(
    block_id: str,
    blocks: gpd.GeoSeries,
    buildings: gpd.GeoSeries,
    streets: gpd.GeoSeries,
) -> dict:
    block_area = blocks.to_crs(3395).area * 1e-6
    building_count = buildings.explode(ignore_index=True).shape[0]
    street_length = streets.to_crs(3395).length * 0.001

    return {
        "block_id": block_id,
        "block_area": block_area.iloc[0],
        "building_count": building_count,
        "street_length": street_length.iloc[0],
    }

def make_inputs(
    blocks: gpd.GeoDataFrame,
    buildings: gpd.GeoDataFrame,
    streets: gpd.GeoDataFrame,
) -> Generator[
    Tuple[str, gpd.GeoSeries, gpd.GeoSeries, gpd.GeoSeries], None, None
]:
    for block_id in buildings["block_id"].unique():
        yield (
            block_id,
            blocks[blocks["block_id"] == block_id]["geometry"],
            buildings[buildings["block_id"] == block_id]["geometry"],
            streets[streets["block_id"] == block_id]["geometry"],
        )

if __name__ == "__main__":
    syc_streets = gpd.read_parquet("input_data/syc_streets.parquet")
    syc_blocks = gpd.read_parquet("input_data/syc_blocks.parquet")
    syc_buildings = gpd.read_parquet("input_data/syc_buildings.parquet")

    inputs = make_inputs(syc_blocks, syc_buildings, syc_streets)

    with Pool() as pool:
        results = pool.starmap(new_function, inputs)
    df = pd.DataFrame.from_records(
        results,
        columns=["block_id", "block_area", "building_count", "street_length"],
    )
    print(df.head())
nmarchio commented 1 year ago

https://gist.github.com/nmarchio/a1d04340e006481e0ae2c9eaa3b960dc