datasets / covid-19

Novel Coronavirus 2019 time series data on cases
https://datahub.io/core/covid-19
1.16k stars 605 forks source link

Executing on 3/14/2020 gets ValidationError & CastError #10

Closed binarytrails closed 4 years ago

binarytrails commented 4 years ago
CastError                                 Traceback (most recent call last)
~/.local/lib/python3.8/site-packages/dataflows/base/schema_validator.py in schema_validator(resource, iterator, field_names, on_error)
     48             for f in schema_fields:
---> 49                 row[f.name] = f.cast_value(row.get(f.name))
     50         except CastError as e:

~/.local/lib/python3.8/site-packages/tableschema/field.py in cast_value(self, value, constraints)
    145             if cast_value == config.ERROR:
--> 146                 raise exceptions.CastError((
    147                     'Field "{field.name}" can\'t cast value "{value}" '

CastError: Field "Deaths" can't cast value "None" for type "number" with format "default"
During handling of the above exception, another exception occurred:

ValidationError                           Traceback (most recent call last)
<ipython-input-11-4036c1aa3210> in <module>
     18 extra_value = {'name': 'Case', 'type': 'number'}
     19 
---> 20 Flow(
     21       load(f'{BASE_URL}{CONFIRMED}'),
     22       load(f'{BASE_URL}{RECOVERED}'),

~/.local/lib/python3.8/site-packages/dataflows/base/flow.py in results(self, on_error)
     10 
     11     def results(self, on_error=None):
---> 12         return self._chain().results(on_error=on_error)
     13 
     14     def process(self):

~/.local/lib/python3.8/site-packages/dataflows/base/datastream_processor.py in results(self, on_error)
     92     def results(self, on_error=None):
     93         ds = self._process()
---> 94         results = [
     95             list(schema_validator(res.res, res, on_error=on_error))
     96             for res in ds.res_iter

~/.local/lib/python3.8/site-packages/dataflows/base/datastream_processor.py in <listcomp>(.0)
     93         ds = self._process()
     94         results = [
---> 95             list(schema_validator(res.res, res, on_error=on_error))
     96             for res in ds.res_iter
     97         ]

~/.local/lib/python3.8/site-packages/dataflows/base/schema_validator.py in schema_validator(resource, iterator, field_names, on_error)
     44         field_names = [f.name for f in schema.fields]
     45     schema_fields = [f for f in schema.fields if f.name in field_names]
---> 46     for i, row in enumerate(iterator):
     47         try:
     48             for f in schema_fields:

~/.local/lib/python3.8/site-packages/dataflows/processors/dumpers/dumper_base.py in row_counter(self, resource, iterator)
     67     def row_counter(self, resource, iterator):
     68         counter = 0
---> 69         for row in iterator:
     70             counter += 1
     71             yield row

~/.local/lib/python3.8/site-packages/dataflows/processors/dumpers/file_dumper.py in rows_processor(self, resource, writer, temp_file)
     74 
     75     def rows_processor(self, resource, writer, temp_file):
---> 76         for row in resource:
     77             writer.write_row(row)
     78             yield row

~/.local/lib/python3.8/site-packages/dataflows/base/schema_validator.py in schema_validator(resource, iterator, field_names, on_error)
     49                 row[f.name] = f.cast_value(row.get(f.name))
     50         except CastError as e:
---> 51             if not on_error(resource['name'], row, i, e):
     52                 continue
     53 

~/.local/lib/python3.8/site-packages/dataflows/base/schema_validator.py in raise_exception(res_name, row, i, e)
     20 
     21 def raise_exception(res_name, row, i, e):
---> 22     raise ValidationError(res_name, row, i, e)
     23 
     24 

ValidationError: 
ROW: {'Date': datetime.date(2020, 3, 14), 'Province/State': None, 'Country/Region': 'Thailand', 'Lat': Decimal('15.0'), 'Long': Decimal('101.0'), 'Confirmed': None, 'Recovered': None, 'Deaths': 'None'}
----
zelima commented 4 years ago

@binarytrails is this still happening? I've just pushed the latest data and it seems ok

binarytrails commented 4 years ago

Did you try it for that Date to reproduce?

anuveyatsu commented 4 years ago

@binarytrails should be FIXED now. See https://github.com/datasets/covid-19/issues/6#issuecomment-603288217