Closed maodonovan closed 5 months ago
Can you share a minimal reproducible notebook so we can investigate? My suspicion is this is an issue with the code, not Colab.
here is a paste from the colab syntax - and the error produced.
How is it that talk show hosts are accorded the status priests used to have?
Michael O'Donovan tel +27829251744 skype maodonovan ///calculating.amends.conclude
email: @.*** Website: https://quantumcity.co.za http://develappmentalstate.co.za
On Thu, 18 Apr 2024 at 18:34, cperry-goog @.***> wrote:
Can you share a minimal reproducible notebook so we can investigate? My suspicion is this is an issue with the code, not Colab.
— Reply to this email directly, view it on GitHub https://github.com/googlecolab/colabtools/issues/4512#issuecomment-2064473934, or unsubscribe https://github.com/notifications/unsubscribe-auth/AAXYWB2AT4DFOZPD4WT2AEDY57YXVAVCNFSM6AAAAABGNBOXQGVHI2DSMVQWIX3LMV43OSLTON2WKQ3PNVWWK3TUHMZDANRUGQ3TGOJTGQ . You are receiving this because you authored the thread.Message ID: @.***>
@. First, select a region from either the Natural Earth low res (fastest), Natural Earth high res or World Bank high res shapefiles: region_border_source = 'Natural Earth (Low Res 110m)' @. ["Natural Earth (Low Res 110m)", "Natural Earth (High Res 10m)", "World Bank (High Res 10m)"] region = 'ZWE (Zimbabwe)' @. ["", "AGO (Angola)", "BDI (Burundi)", "BEN (Benin)", "BFA (Burkina Faso)", "BGD (Bangladesh)", "BRN (Brunei)", "BTN (Bhutan)", "BWA (Botswana)", "CAF (Central African Republic)", "CIV (C\u00f4te d'Ivoire)", "CMR (Cameroon)", "COD (Democratic Republic of the Congo)", "COG (Republic of the Congo)", "COM (Comoros)", "CPV (Cape Verde)", "DJI (Djibouti)", "DZA (Algeria)", "EGY (Egypt)", "ERI (Eritrea)", "ETH (Ethiopia)", "GAB (Gabon)", "GHA (Ghana)", "GIN (Guinea)", "GMB (The Gambia)", "GNB (Guinea-Bissau)", "GNQ (Equatorial Guinea)", "IDN (Indonesia)", "IOT (British Indian Ocean Territory)", "KEN (Kenya)", "KHM (Cambodia)", "LAO (Laos)", "LBR (Liberia)", "LKA (Sri Lanka)", "LSO (Lesotho)", "MDG (Madagascar)", "MDV (Maldives)", "MOZ (Mozambique)", "MRT (Mauritania)", "MUS (Mauritius)", "MWI (Malawi)", "MYS (Malaysia)", "MYT (Mayotte)", "NAM (Namibia)", "NER (Niger)", "NGA (Nigeria)", "NPL (Nepal)", "PHL (Philippines)", "REU (R\u00e9union)", "RWA (Rwanda)", "SDN (Sudan)", "SEN (Senegal)", "SGP (Singapore)", "SHN (Saint Helena, Ascension and Tristan da Cunha)", "SLE (Sierra Leone)", "SOM (Somalia)", "STP (S\u00e3o Tom\u00e9 and Pr\u00edncipe)", "SWZ (Eswatini)", "SYC (Seychelles)", "TGO (Togo)", "THA (Thailand)", "TLS (Timor-Leste)", "TUN (Tunisia)", "TZA (Tanzania)", "UGA (Uganda)", "VNM (Vietnam)", "ZAF (South Africa)", "ZMB (Zambia)", "ZWE (Zimbabwe)"] @. or specify an area of interest in WKT format (assumes crs='EPSG:4326'); this tool might be useful. your_own_wkt_polygon = '' @. {type:"string"} @. Select type of data to download here: data_type = 'polygons' @.*** ["polygons", "points"]
!sudo apt-get install swig !pip install s2geometry pygeos geopandas
import functools import glob import gzip import multiprocessing import os import shutil import tempfile from typing import List, Optional, Tuple
import geopandas as gpd from IPython import display import pandas as pd import s2geometry as s2 import shapely import tensorflow as tf import tqdm.notebook
BUILDING_DOWNLOAD_PATH = ('gs://open-buildings-data/v3/' f'{data_type}_s2_level_6_gzip_no_header')
def get_filename_and_region_dataframe( region_border_source: str, region: str, your_own_wkt_polygon: str) -> Tuple[str, gpd.geodataframe.GeoDataFrame]: """Returns output filename and a geopandas dataframe with one region row."""
if your_own_wkt_polygon:
filename = f'open_buildingsv3{data_type}_your_own_wkt_polygon.csv.gz'
region_df = gpd.GeoDataFrame(
geometry=gpd.GeoSeries.from_wkt([your_own_wkt_polygon]),
crs='EPSG:4326')
if not isinstance(region_df.iloc[0].geometry,
shapely.geometry.polygon.Polygon) and not isinstance(
region_df.iloc[0].geometry,
shapely.geometry.multipolygon.MultiPolygon):
raise ValueError("your_own_wkt_polygon
must be a POLYGON or "
"MULTIPOLYGON.")
print(f'Preparing your_own_wkt_polygon.')
return filename, region_df
if not region: raise ValueError('Please select a region or set your_own_wkt_polygon.')
if region_border_source == 'Natural Earth (Low Res 110m)': url = ('https://www.naturalearthdata.com/http//www.naturalearthdata.com/' 'download/110m/cultural/ne_110m_admin_0_countries.zip') !wget -N {url} display.clear_output() region_shapefile_path = os.path.basename(url) source_name = 'ne_110m' elif region_border_source == 'Natural Earth (High Res 10m)': url = ('https://www.naturalearthdata.com/http//www.naturalearthdata.com/' 'download/10m/cultural/ne_10m_admin_0_countries.zip') !wget -N {url} display.clear_output() region_shapefile_path = os.path.basename(url) source_name = 'ne_10m' elif region_border_source == 'World Bank (High Res 10m)': url = ('https://development-data-hub-s3-public.s3.amazonaws.com/ddhfiles/' '779551/wb_countries_admin0_10m.zip') !wget -N {url} !unzip -o {os.path.basename(url)} display.clear_output() region_shapefile_path = 'WB_countries_Admin0_10m' source_name = 'wb_10m'
region_iso_a3 = region.split(' ')[0] filename = (f'open_buildingsv3{datatype}' f'{sourcename}{region_iso_a3}.csv.gz') region_df = gpd.read_file(region_shapefile_path).query( f'ISO_A3 == "{region_iso_a3}"').dissolve(by='ISO_A3')[['geometry']] print(f'Preparing {region} from {region_border_source}.') return filename, region_df
def get_bounding_box_s2_covering_tokens( region_geometry: shapely.geometry.base.BaseGeometry) -> List[str]: region_bounds = region_geometry.bounds s2_lat_lng_rect = s2.S2LatLngRect_FromPointPair( s2.S2LatLng_FromDegrees(region_bounds[1], region_bounds[0]), s2.S2LatLng_FromDegrees(region_bounds[3], region_bounds[2])) coverer = s2.S2RegionCoverer()
coverer.set_fixed_level(6) coverer.set_max_cells(1000000) return [cell.ToToken() for cell in coverer.GetCovering(s2_lat_lng_rect)]
def s2_token_to_shapely_polygon( s2_token: str) -> shapely.geometry.polygon.Polygon: s2_cell = s2.S2Cell(s2.S2CellId_FromToken(s2_token, len(s2_token))) coords = [] for i in range(4): s2_lat_lng = s2.S2LatLng(s2_cell.GetVertex(i)) coords.append((s2_lat_lng.lng().degrees(), s2_lat_lng.lat().degrees())) return shapely.geometry.Polygon(coords)
def download_s2_token(
s2_token: str, region_df: gpd.geodataframe.GeoDataFrame) -> Optional[str]:
"""Downloads the matching CSV file with polygons for the s2_token
.
NOTE: Only polygons inside the region are kept. NOTE: Passing output via a temporary file to reduce memory usage.
Args: s2_token: S2 token for which to download the CSV file with building polygons. The S2 token should be at the same level as the files in BUILDING_DOWNLOAD_PATH. region_df: A geopandas dataframe with only one row that contains the region for which to keep polygons.
Returns:
Either filepath which contains a gzipped CSV without header for the
s2_token
subfiltered to only contain building polygons inside the region
or None which means that there were no polygons inside the region for this
s2_token
.
"""
s2_cell_geometry = s2_token_to_shapely_polygon(s2_token)
region_geometry = region_df.iloc[0].geometry
prepared_region_geometry = shapely.prepared.prep(region_geometry)
if not prepared_region_geometry.intersects(s2_cell_geometry): return None try:
# directly to pd.read_csv.
with tf.io.gfile.GFile(
os.path.join(BUILDING_DOWNLOAD_PATH, f'{s2_token}_buildings.csv.gz'),
'rb') as gf:
# If the s2 cell is fully covered by country geometry then can skip
# filtering as we need all rows.
if prepared_region_geometry.covers(s2_cell_geometry):
with tempfile.NamedTemporaryFile(mode='w+b', delete=False) as tmp_f:
shutil.copyfileobj(gf, tmp_f)
return tmp_f.name
# Else take the slow path.
# NOTE: We read in chunks to save memory.
csv_chunks = pd.read_csv(
gf, chunksize=2000000, dtype=object, compression='gzip', header=None)
tmp_f = tempfile.NamedTemporaryFile(mode='w+b', delete=False)
tmp_f.close()
for csv_chunk in csv_chunks:
points = gpd.GeoDataFrame(
geometry=gpd.points_from_xy(csv_chunk[1], csv_chunk[0]),
crs='EPSG:4326')
# sjoin 'within' was faster than using shapely's 'within' directly.
points = gpd.sjoin(points, region_df, predicate='within')
csv_chunk = csv_chunk.iloc[points.index]
csv_chunk.to_csv(
tmp_f.name,
mode='ab',
index=False,
header=False,
compression={
'method': 'gzip',
'compresslevel': 1
})
return tmp_f.name
except tf.errors.NotFoundError: return None
display.clear_output() filename, region_df = get_filename_and_region_dataframe(region_border_source, region, your_own_wkt_polygon)
for f in glob.glob('/tmp/openbuildings*'): os.remove(f)
with gzip.open(f'/tmp/{filename}', 'wt') as merged: if data_type == "polygons": merged.write(','.join([ 'latitude', 'longitude', 'area_in_meters', 'confidence', 'geometry', 'full_plus_code' ]) + '\n') else: merged.write(','.join([ 'latitude', 'longitude', 'area_in_meters', 'confidence', 'full_plus_code' ]) + '\n') download_s2_token_fn = functools.partial(download_s2_token, region_df=region_df) s2_tokens = get_bounding_box_s2_covering_tokens(region_df.iloc[0].geometry)
with open(f'/tmp/{filename}', 'ab') as merged: with multiprocessing.Pool(4) as e: for fname in tqdm.notebook.tqdm( e.imap_unordered(download_s2_token_fn, s2_tokens), total=len(s2_tokens)): if fname: with open(fname, 'rb') as tmp_f: shutil.copyfileobj(tmp_f, merged) os.unlink(fname)
ERROR:fiona._env:`/vsizip/ne_110m_admin_0_countries.zip' does not exist in the file system, and is not recognized as a supported dataset name.
CPLE_OpenFailedError Traceback (most recent call last)
fiona/ogrext.pyx in fiona.ogrext.gdal_open_vector()
fiona/_err.pyx in fiona._err.exc_wrap_pointer()
CPLE_OpenFailedError: '/vsizip/ne_110m_admin_0_countries.zip' does not exist in the file system, and is not recognized as a supported dataset name.
During handling of the above exception, another exception occurred:
DriverError Traceback (most recent call last)
A paste will not work -- please create a new public notebook with minimal code to reproduce the error.
Describe the current behavior Attempting to download openbuilding polygon data for Botswana
Describe the expected behavior Allowed to proceed to the script to download to Google drive. However specifying the country (BWA) via World Bank or Natural Earth results in a file not found and error. The files do not appear to be on World Bank or Natural earth sites (where specified)
What web browser you are using Firefox
Additional context