ETL y servidor web para scrapear series de tiempo de Excels semi-estructurados y transformarlos en distribuciones de formato abierto, basado en una extensión experimental del Perfil Nacional de Metadatos de la política de apertura de datos de la APN.
MIT License
9
stars
7
forks
source link
El flujo del ETL se interrumpe si falla la descarga de un catálogo #45
Traceback (most recent call last):
File "/Users/abenassi/github/series-tiempo-ar-scraping/series_tiempo_ar_scraping/download.py", line 33, in download
verify=verify)
File "/Users/abenassi/anaconda/envs/series-tiempo-ar-scraping-new/lib/python3.7/site-packages/requests/api.py", line 75, in get
return request('get', url, params=params, **kwargs)
File "/Users/abenassi/anaconda/envs/series-tiempo-ar-scraping-new/lib/python3.7/site-packages/requests/api.py", line 60, in request
return session.request(method=method, url=url, **kwargs)
File "/Users/abenassi/anaconda/envs/series-tiempo-ar-scraping-new/lib/python3.7/site-packages/requests/sessions.py", line 533, in request
resp = self.send(prep, **send_kwargs)
File "/Users/abenassi/anaconda/envs/series-tiempo-ar-scraping-new/lib/python3.7/site-packages/requests/sessions.py", line 668, in send
history = [resp for resp in gen] if allow_redirects else []
File "/Users/abenassi/anaconda/envs/series-tiempo-ar-scraping-new/lib/python3.7/site-packages/requests/sessions.py", line 668, in <listcomp>
history = [resp for resp in gen] if allow_redirects else []
File "/Users/abenassi/anaconda/envs/series-tiempo-ar-scraping-new/lib/python3.7/site-packages/requests/sessions.py", line 247, in resolve_redirects
**adapter_kwargs
File "/Users/abenassi/anaconda/envs/series-tiempo-ar-scraping-new/lib/python3.7/site-packages/requests/sessions.py", line 646, in send
r = adapter.send(request, **kwargs)
File "/Users/abenassi/anaconda/envs/series-tiempo-ar-scraping-new/lib/python3.7/site-packages/requests/adapters.py", line 514, in send
raise SSLError(e, request=request)
requests.exceptions.SSLError: HTTPSConnectionPool(host='www.economia.gob.ar', port=443): Max retries exceeded with url: /download/infoeco/catalogo_sspm_prod.xlsx (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1056)')))
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/Users/abenassi/anaconda/envs/series-tiempo-ar-scraping-new/bin/etl", line 11, in <module>
load_entry_point('series-tiempo-ar-scraping', 'console_scripts', 'etl')()
File "/Users/abenassi/anaconda/envs/series-tiempo-ar-scraping-new/lib/python3.7/site-packages/click/core.py", line 764, in __call__
return self.main(*args, **kwargs)
File "/Users/abenassi/anaconda/envs/series-tiempo-ar-scraping-new/lib/python3.7/site-packages/click/core.py", line 717, in main
rv = self.invoke(ctx)
File "/Users/abenassi/anaconda/envs/series-tiempo-ar-scraping-new/lib/python3.7/site-packages/click/core.py", line 956, in invoke
return ctx.invoke(self.callback, **ctx.params)
File "/Users/abenassi/anaconda/envs/series-tiempo-ar-scraping-new/lib/python3.7/site-packages/click/core.py", line 555, in invoke
return callback(*args, **kwargs)
File "/Users/abenassi/github/series-tiempo-ar-scraping/series_tiempo_ar_scraping/main.py", line 52, in cli
main(config, log_level)
File "/Users/abenassi/github/series-tiempo-ar-scraping/series_tiempo_ar_scraping/main.py", line 65, in main
config=config
File "/Users/abenassi/github/series-tiempo-ar-scraping/series_tiempo_ar_scraping/base.py", line 717, in __init__
super().__init__(identifier, parent, context)
File "/Users/abenassi/github/series-tiempo-ar-scraping/series_tiempo_ar_scraping/base.py", line 72, in __init__
self.init_childs()
File "/Users/abenassi/github/series-tiempo-ar-scraping/series_tiempo_ar_scraping/base.py", line 732, in init_childs
for catalog in self.catalogs_from_config.keys()
File "/Users/abenassi/github/series-tiempo-ar-scraping/series_tiempo_ar_scraping/base.py", line 732, in <listcomp>
for catalog in self.catalogs_from_config.keys()
File "/Users/abenassi/github/series-tiempo-ar-scraping/series_tiempo_ar_scraping/base.py", line 268, in __init__
super().__init__(identifier, parent, context)
File "/Users/abenassi/github/series-tiempo-ar-scraping/series_tiempo_ar_scraping/base.py", line 70, in __init__
self.init_metadata()
File "/Users/abenassi/github/series-tiempo-ar-scraping/series_tiempo_ar_scraping/base.py", line 272, in init_metadata
self.fetch_metadata_file()
File "/Users/abenassi/github/series-tiempo-ar-scraping/series_tiempo_ar_scraping/base.py", line 291, in fetch_metadata_file
config,
File "/Users/abenassi/github/series-tiempo-ar-scraping/series_tiempo_ar_scraping/base.py", line 648, in download_with_config
download.download_to_file(url, file_path, **config)
File "/Users/abenassi/github/series-tiempo-ar-scraping/series_tiempo_ar_scraping/download.py", line 58, in download_to_file
content = download(url, **kwargs)
File "/Users/abenassi/github/series-tiempo-ar-scraping/series_tiempo_ar_scraping/download.py", line 44, in download
raise DownloadException() from download_exception
series_tiempo_ar_scraping.download.DownloadException