Data-Liberation-Front / csvlint.rb

The gem behind http://csvlint.io
MIT License
283 stars 86 forks source link

Cannot validate "number" data type using CSVW #212

Open jakubklimek opened 6 years ago

jakubklimek commented 6 years ago

Expected Behaviour

When CSVW schema contains "datatype": "number" in column definition, the values should be validated as numbers

Current Behaviour (for problems)

csvlint crashes with:

NOTE: Csvlint::Schema.load_from_json is deprecated; use load_from_uri instead. It will be removed on or after 2018-01-01.
Csvlint::Schema.load_from_json called from /var/lib/gems/2.3.0/gems/csvlint-0.4.0/lib/csvlint/cli.rb:59.
./var/lib/gems/2.3.0/gems/csvlint-0.4.0/lib/csvlint/csvw/number_format.rb:178:in `parse': undefined method `gsub!' for 1.0:Float (NoMethodError)
        from /var/lib/gems/2.3.0/gems/csvlint-0.4.0/lib/csvlint/csvw/column.rb:265:in `block in <class:Column>'
        from /var/lib/gems/2.3.0/gems/csvlint-0.4.0/lib/csvlint/csvw/column.rb:105:in `block in validate'
        from /var/lib/gems/2.3.0/gems/csvlint-0.4.0/lib/csvlint/csvw/column.rb:103:in `each'
        from /var/lib/gems/2.3.0/gems/csvlint-0.4.0/lib/csvlint/csvw/column.rb:103:in `validate'
        from /var/lib/gems/2.3.0/gems/csvlint-0.4.0/lib/csvlint/csvw/table.rb:64:in `block in validate_row'
        from /var/lib/gems/2.3.0/gems/csvlint-0.4.0/lib/csvlint/csvw/table.rb:64:in `map'
        from /var/lib/gems/2.3.0/gems/csvlint-0.4.0/lib/csvlint/csvw/table.rb:64:in `validate_row'
        from /var/lib/gems/2.3.0/gems/csvlint-0.4.0/lib/csvlint/csvw/table_group.rb:38:in `validate_row'
        from /var/lib/gems/2.3.0/gems/csvlint-0.4.0/lib/csvlint/validate.rb:200:in `parse_contents'
        from /var/lib/gems/2.3.0/gems/csvlint-0.4.0/lib/csvlint/validate.rb:166:in `validate_line'
        from /var/lib/gems/2.3.0/gems/csvlint-0.4.0/lib/csvlint/validate.rb:146:in `parse_line'
        from /var/lib/gems/2.3.0/gems/csvlint-0.4.0/lib/csvlint/validate.rb:110:in `block in validate_stream'
        from /var/lib/gems/2.3.0/gems/csvlint-0.4.0/lib/csvlint/validate.rb:108:in `each_line'
        from /var/lib/gems/2.3.0/gems/csvlint-0.4.0/lib/csvlint/validate.rb:108:in `validate_stream'
        from /var/lib/gems/2.3.0/gems/csvlint-0.4.0/lib/csvlint/validate.rb:101:in `validate'
        from /var/lib/gems/2.3.0/gems/csvlint-0.4.0/lib/csvlint/validate.rb:86:in `initialize'
        from /var/lib/gems/2.3.0/gems/csvlint-0.4.0/lib/csvlint/cli.rb:141:in `new'
        from /var/lib/gems/2.3.0/gems/csvlint-0.4.0/lib/csvlint/cli.rb:141:in `validate_csv'
        from /var/lib/gems/2.3.0/gems/csvlint-0.4.0/lib/csvlint/cli.rb:86:in `block in fetch_schema_tables'
        from /var/lib/gems/2.3.0/gems/csvlint-0.4.0/lib/csvlint/cli.rb:79:in `each'
        from /var/lib/gems/2.3.0/gems/csvlint-0.4.0/lib/csvlint/cli.rb:79:in `fetch_schema_tables'
        from /var/lib/gems/2.3.0/gems/csvlint-0.4.0/lib/csvlint/cli.rb:22:in `validate'
        from /var/lib/gems/2.3.0/gems/thor-0.20.0/lib/thor/command.rb:27:in `run'
        from /var/lib/gems/2.3.0/gems/thor-0.20.0/lib/thor/invocation.rb:126:in `invoke_command'
        from /var/lib/gems/2.3.0/gems/thor-0.20.0/lib/thor.rb:387:in `dispatch'
        from /var/lib/gems/2.3.0/gems/thor-0.20.0/lib/thor/base.rb:466:in `start'
        from /var/lib/gems/2.3.0/gems/csvlint-0.4.0/bin/csvlint:9:in `<top (required)>'
        from /usr/local/bin/csvlint:23:in `load'
        from /usr/local/bin/csvlint:23:in `<main>'

Steps to Reproduce (for problems)

CSVW schema:

{
  "@context": ["http://www.w3.org/ns/csvw",{"@language": "cs"}],
  "url": "test.csv",
  "dc:description": "Registr kandidátů",
  "tableSchema": {
    "aboutUrl": "https://data.czso.cz/zdroj/volby-do-poslanecké-sněmovny-parlamentu-čr-2017/kandidát/{VOLKRAJ}/{KSTRANA}/{PORCISLO}",
    "columns": [{
      "name": "VOLKRAJ",
      "titles": "VOLKRAJ",
      "dc:description": "Číslo volebního kraje",
      "required": false,
      "datatype": "number",
      "aboutUrl": "https://data.czso.cz/zdroj/volby-do-poslanecké-sněmovny-parlamentu-čr-2017/kandidátní-listina/{VOLKRAJ}/{KSTRANA}",
      "propertyUrl": ":volebníKraj",
      "valueUrl": "https://data.czso.cz/zdroj/volební-kraj/{VOLKRAJ}"
    },{
      "name": "KSTRANA",
      "titles": "KSTRANA",
      "dc:description": "Vylosované číslo strany (vazba na RKL)",
      "required": false,
      "datatype": "string",
      "propertyUrl": ":subjekt",
      "valueUrl": "https://data.czso.cz/zdroj/volby-do-poslanecké-sněmovny-parlamentu-čr-2017/politický-subjekt/{KSTRANA}"
    },{
      "name": "PORCISLO",
      "titles": "PORCISLO",
      "dc:description": "Pořadí na KL (hlasovacím lístku)",
      "required": false,
      "datatype": "string",
      "propertyUrl": ":pořadí"
    },{
      "name": "JMENO",
      "titles": "JMENO",
      "dc:description": "Jméno, příp. i další jména",
      "required": true,
      "datatype": "string",
      "propertyUrl": "foaf:givenName"
    },{
      "name": "PRIJMENI",
      "titles": "PRIJMENI",
      "dc:description": "Příjmení",
      "required": true,
      "datatype": "string",
      "propertyUrl": "foaf:familyName"
    },{
      "name": "TITULPRED",
      "titles": "TITULPRED",
      "dc:description": "Tituly před jménem",
      "required": true,
      "datatype": "string"
    },{
      "name": "TITULZA",
      "titles": "TITULZA",
      "dc:description": "Tituly za příjmením",
      "required": false,
      "datatype": "string"
    },{
      "name": "VEK",
      "titles": "VEK",
      "dc:description": "Věk",
      "required": true,
      "datatype": "string",
      "propertyUrl": "foaf:age"
    },{
      "name": "POVOLANI",
      "titles": "POVOLANI",
      "dc:description": "Povolání",
      "required": true,
      "datatype": "string",
      "propertyUrl": "s:jobTitle"
    },{
      "name": "BYDLISTEN",
      "titles": "BYDLISTEN",
      "dc:description": "Název obce - bydliště",
      "required": true,
      "datatype": "string",
      "suppressOutput": true
    },{
      "name": "BYDLISTEK",
      "titles": "BYDLISTEK",
      "dc:description": "Kód obce - bydliště",
      "required": true,
      "datatype": "string",
      "propertyUrl": ":obecTrvaléhoPobytu",
      "valueUrl": "https://data.czso.cz/zdroj/obec/{BYDLISTEK}"
    },{
      "name": "PSTRANA",
      "titles": "PSTRANA",
      "dc:description": "Kód politické příslušnosti",
      "required": true,
      "datatype": "string",
      "propertyUrl": ":jeČlenem",
      "valueUrl": "https://data.czso.cz/zdroj/politický-subjekt/{PSTRANA}"
    },{
      "name": "NSTRANA",
      "titles": "NSTRANA",
      "dc:description": "Kód navrhující strany",
      "required": true,
      "datatype": "string",
      "propertyUrl": ":navrhl",
      "aboutUrl": "https://data.czso.cz/zdroj/politický-subjekt/{PSTRANA}",
      "valueUrl": "https://data.czso.cz/zdroj/volby-do-poslanecké-sněmovny-parlamentu-čr-2017/kandidát/{VOLKRAJ}/{KSTRANA}/{PORCISLO}"
    },{
      "name": "PLATNOST",
      "titles": "PLATNOST",
      "dc:description": "Platnost (A = platný, N = neplatný)",
      "required": true,
      "datatype": "string"
    },{
      "name": "POCHLASU",
      "titles": "POCHLASU",
      "dc:description": "Počet přednostních hlasů",
      "required": false,
      "datatype": "string"
    },{
      "name": "POCPROC",
      "titles": "POCPROC",
      "dc:description": "Procento přednostních hlasů, je-li větší nebo rovno 5",
      "required": false,
      "datatype": "string"
    },{
      "name": "POCPROCVSE",
      "titles": "POCPROCVSE",
      "dc:description": "Procento přednostních hlasů",
      "required": false,
      "datatype": "string"
    },{
      "name": "MANDAT",
      "titles": "MANDAT",
      "dc:description": "A = zvolen, N = nezvolen",
      "required": false,
      "datatype": "string"
    },{
      "name": "PORADIMAND",
      "titles": "PORADIMAND",
      "dc:description": "Pořadí přidělení mandátu (1-36, 0=strana nezískala žádný mandát)",
      "required": false,
      "datatype": "string"
    },{
      "name": "PORADINAHR",
      "titles": "PORADINAHR",
      "dc:description": "Pořadí náhradníka (1-35, 0=strana nezískala žádný mandát)",
      "required": false,
      "datatype": "string"
    },{
      "virtual": true,
      "aboutUrl": "https://data.czso.cz/zdroj/volby-do-poslanecké-sněmovny-parlamentu-čr-2017/kandidátní-listina/{VOLKRAJ}/{KSTRANA}",
      "propertyUrl": "rdf:type",
      "valueUrl": ":KandidátníListina"
    },{
      "virtual": true,
      "aboutUrl": "https://data.czso.cz/zdroj/volby-do-poslanecké-sněmovny-parlamentu-čr-2017/kandidátní-listina/{VOLKRAJ}/{KSTRANA}",
      "propertyUrl": ":kandidát",
      "valueUrl": "https://data.czso.cz/zdroj/volby-do-poslanecké-sněmovny-parlamentu-čr-2017/kandidát/{VOLKRAJ}/{KSTRANA}/{PORCISLO}"
    },{
      "virtual": true,
      "propertyUrl": "rdf:type",
      "valueUrl": ":Kandidát"
    }
    ],
    "primaryKey": ["VOLKRAJ","KSTRANA","PORCISLO"]
  }
}

CSV:

VOLKRAJ,KSTRANA,PORCISLO,JMENO,PRIJMENI,TITULPRED,TITULZA,VEK,POVOLANI,BYDLISTEN,BYDLISTEK,PSTRANA,NSTRANA,PLATNOST,POCHLASU,POCPROC,POCPROCVSE,MANDAT,PORADIMAND,PORADINAHR
1,1,1,"Jana","Černochová","Mgr.","",43,"starostka MČ Praha 2","Praha",1,53,53,"A",,,,"",,
  1. Download above to test.json and test.csv
  2. Run csvlint -s test.json

Your Environment

Bash on Ubuntu on Windows, csvlint installed using gem install csvlint