Open nmarchio opened 1 year ago
from multiprocessing import Pool from typing import Generator, Tuple import geopandas as gpd import pandas as pd def new_function( block_id: str, blocks: gpd.GeoSeries, buildings: gpd.GeoSeries, streets: gpd.GeoSeries, ) -> dict: block_area = blocks.to_crs(3395).area * 1e-6 building_count = buildings.explode(ignore_index=True).shape[0] street_length = streets.to_crs(3395).length * 0.001 return { "block_id": block_id, "block_area": block_area.iloc[0], "building_count": building_count, "street_length": street_length.iloc[0], } def make_inputs( blocks: gpd.GeoDataFrame, buildings: gpd.GeoDataFrame, streets: gpd.GeoDataFrame, ) -> Generator[ Tuple[str, gpd.GeoSeries, gpd.GeoSeries, gpd.GeoSeries], None, None ]: for block_id in buildings["block_id"].unique(): yield ( block_id, blocks[blocks["block_id"] == block_id]["geometry"], buildings[buildings["block_id"] == block_id]["geometry"], streets[streets["block_id"] == block_id]["geometry"], ) if __name__ == "__main__": syc_streets = gpd.read_parquet("input_data/syc_streets.parquet") syc_blocks = gpd.read_parquet("input_data/syc_blocks.parquet") syc_buildings = gpd.read_parquet("input_data/syc_buildings.parquet") inputs = make_inputs(syc_blocks, syc_buildings, syc_streets) with Pool() as pool: results = pool.starmap(new_function, inputs) df = pd.DataFrame.from_records( results, columns=["block_id", "block_area", "building_count", "street_length"], ) print(df.head())
https://gist.github.com/nmarchio/a1d04340e006481e0ae2c9eaa3b960dc