Datenschule / jedeschule-scraper

MIT License
22 stars 15 forks source link

NI: Scrape new API-based website #111

Closed k-nut closed 3 years ago

knutator2 commented 3 years ago

runs through, but i still get an error for 2 schools. Maybe we should check, if the school type is there

2021-02-08 14:05:03 [scrapy.core.scraper] ERROR: Error processing {'id': 16680, 'schulnr': 84001, 'schulname': 'Fritz-Reuter-Schule', 'namensZusatz': 'KGS Bevensen', 'telefon': '05821 543101', 'fax': '05821 543120', 'email': 'kgsbb@t-online.de', 'homepage': 'https://www.kgs-bad-bevensen.com', 'art_id': 60, 'traeger_id': 186, 'traegerschaft_id': 1, 'modified': '2020-12-27 14:09:51', 'created': None, 'daniscode': None, 'sdb_adressen': [{'id': 16680, 'ort_id': 3570, 'strasse': 'Klein Bünstorfer Straße 7', 'modified': '2020-12-27 14:09:51', 'pivot': {'schule_id': 16680, 'adressen_id': 16680, 'hauptsitz': 1}, 'sdb_ort': {'id': 3570, 'plz': 29549, 'ort': 'Bad Bevensen', 'ortsteil': None, 'ags_id': 673, 'modified': '2021-01-07 10:43:54'}}], 'hauptsitz': {'id': 16680, 'ort_id': 3570, 'strasse': 'Klein Bünstorfer Straße 7', 'modified': '2020-12-27 14:09:51', 'sdb_ort': {'id': 3570, 'plz': 29549, 'ort': 'Bad Bevensen', 'ortsteil': None, 'ags_id': 673, 'modified': '2021-01-07 10:43:54', 'sdb_ag': {'id': 673, 'scags': 3605407002, 'gemeindename': 'Bad Bevensen, Stadt', 'plz': 29549, 'regionen_id': '9', 'modified': '2019-11-29 06:56:33', 'dienstellen_code': 3, 'kreis_code': 360, 'sdb_dienststelle': {'id': 3, 'dienststelle': 'RLSB Lüneburg', 'homepage': 'https://www.rlsb.de/organisation/lueneburg', 'modified': '2020-12-28 10:08:00'}}}}, 'ag': {'id': 673, 'scags': 3605407002, 'gemeindename': 'Bad Bevensen, Stadt', 'plz': 29549, 'regionen_id': '9', 'modified': '2019-11-29 06:56:33', 'dienstellen_code': 3, 'kreis_code': 360, 'sdb_kreis': {'id': 37, 'skschl': 360, 'kreis': 'Uelzen', 'zusatz': None}, 'sdb_region': {'id': 9, 'region': 'Lüneburger Heide', 'modified': '2019-11-28 08:29:48'}}, 'kreis': {'id': 37, 'skschl': 360, 'kreis': 'Uelzen', 'zusatz': None}, 'region': {'id': 9, 'region': 'Lüneburger Heide', 'modified': '2019-11-28 08:29:48'}, 'dienststelle': {'id': 3, 'dienststelle': 'RLSB Lüneburg', 'homepage': 'https://www.rlsb.de/organisation/lueneburg', 'modified': '2020-12-28 10:08:00'}, 'sdb_traeger': {'id': 186, 'privat': 0, 'name': 'Landkreis Uelzen', 'modified': '2019-11-28 08:30:54'}, 'gliederungen': [{'id': 24, 'gliederung': 'KGS - Hauptschule -', 'bereich_id': 2, 'modified': '2020-12-01 09:16:42'}, {'id': 25, 'gliederung': 'KGS - Realschule -', 'bereich_id': 2, 'modified': '2020-12-01 09:16:45'}, {'id': 22, 'gliederung': 'KGS - Gymnasium SEK I -', 'bereich_id': 2, 'modified': '2020-12-01 09:16:39'}, {'id': 23, 'gliederung': 'KGS - Gymnasium SEK II -', 'bereich_id': 3, 'modified': '2020-12-01 09:16:41'}], 'besonderheiten': [{'id': 8, 'besonderheit': 'Ganztagsschule', 'link': '', 'modified': '2020-12-27 10:51:44'}, {'id': 11, 'besonderheit': 'Schule ohne Rassismus - Schule mit Courage', 'link': 'http://www.schule-ohne-rassismus.org', 'modified': '2020-12-27 10:51:44'}], 'sdb_kommentare': [], 'sdb_traegerschaft': {'id': 1, 'bezeichnung': 'Öffentlich', 'modified': '2019-12-03 07:26:55'}, 'sdb_art': None}
Traceback (most recent call last):
  File "/Users/knut/Development/jedeschule-scraper/venv/lib/python3.8/site-packages/twisted/internet/defer.py", line 654, in _runCallbacks
    current.result = callback(current.result, *args, **kw)
  File "/Users/knut/Development/jedeschule-scraper/jedeschule/pipelines/school_pipeline.py", line 17, in process_item
    school = spider.normalize(item)
  File "/Users/knut/Development/jedeschule-scraper/jedeschule/spiders/niedersachsen.py", line 57, in normalize
    school_type=item.get("sdb_art", {}).get('art'),
AttributeError: 'NoneType' object has no attribute 'get'
2021-02-08 14:07:03 [scrapy.core.scraper] ERROR: Error processing {'id': 12545, 'schulnr': 51007, 'schulname': 'Carl-Friedrich-Gauß-Schule', 'namensZusatz': 'Kooperative Gesamtschule Hemmingen', 'telefon': '0511 4103-200', 'fax': '0511 4103-211', 'email': 'info@kgshemmingen.de', 'homepage': 'http://www.KGShemmingen.de', 'art_id': 60, 'traeger_id': 367, 'traegerschaft_id': 1, 'modified': '2020-12-27 14:08:12', 'created': None, 'daniscode': None, 'sdb_adressen': [{'id': 12545, 'ort_id': 975, 'strasse': 'Hohe Bünte 4', 'modified': '2020-12-27 14:08:12', 'pivot': {'schule_id': 12545, 'adressen_id': 12545, 'hauptsitz': 1}, 'sdb_ort': {'id': 975, 'plz': 30966, 'ort': 'Hemmingen', 'ortsteil': None, 'ags_id': 173, 'modified': '2020-12-27 14:04:21'}}], 'hauptsitz': {'id': 12545, 'ort_id': 975, 'strasse': 'Hohe Bünte 4', 'modified': '2020-12-27 14:08:12', 'sdb_ort': {'id': 975, 'plz': 30966, 'ort': 'Hemmingen', 'ortsteil': None, 'ags_id': 173, 'modified': '2020-12-27 14:04:21', 'sdb_ag': {'id': 173, 'scags': 2410007007, 'gemeindename': 'Hemmingen, Stadt', 'plz': 30966, 'regionen_id': '11', 'modified': '2019-11-29 06:56:33', 'dienstellen_code': 2, 'kreis_code': 241, 'sdb_dienststelle': {'id': 2, 'dienststelle': 'RLSB Hannover', 'homepage': 'https://www.rlsb.de/organisation/hannover', 'modified': '2020-12-28 10:07:51'}}}}, 'ag': {'id': 173, 'scags': 2410007007, 'gemeindename': 'Hemmingen, Stadt', 'plz': 30966, 'regionen_id': '11', 'modified': '2019-11-29 06:56:33', 'dienstellen_code': 2, 'kreis_code': 241, 'sdb_kreis': {'id': 45, 'skschl': 241, 'kreis': 'Hannover Region', 'zusatz': None}, 'sdb_region': {'id': 11, 'region': 'Hannover-Hildesheim', 'modified': '2019-11-28 08:29:48'}}, 'kreis': {'id': 45, 'skschl': 241, 'kreis': 'Hannover Region', 'zusatz': None}, 'region': {'id': 11, 'region': 'Hannover-Hildesheim', 'modified': '2019-11-28 08:29:48'}, 'dienststelle': {'id': 2, 'dienststelle': 'RLSB Hannover', 'homepage': 'https://www.rlsb.de/organisation/hannover', 'modified': '2020-12-28 10:07:51'}, 'sdb_traeger': {'id': 367, 'privat': 0, 'name': 'Stadt Hemmingen', 'modified': '2019-11-28 08:30:54'}, 'gliederungen': [{'id': 24, 'gliederung': 'KGS - Hauptschule -', 'bereich_id': 2, 'modified': '2020-12-01 09:16:42'}, {'id': 25, 'gliederung': 'KGS - Realschule -', 'bereich_id': 2, 'modified': '2020-12-01 09:16:45'}, {'id': 22, 'gliederung': 'KGS - Gymnasium SEK I -', 'bereich_id': 2, 'modified': '2020-12-01 09:16:39'}, {'id': 23, 'gliederung': 'KGS - Gymnasium SEK II -', 'bereich_id': 3, 'modified': '2020-12-01 09:16:41'}], 'besonderheiten': [{'id': 8, 'besonderheit': 'Ganztagsschule', 'link': '', 'modified': '2020-12-27 10:51:44'}], 'sdb_kommentare': [], 'sdb_traegerschaft': {'id': 1, 'bezeichnung': 'Öffentlich', 'modified': '2019-12-03 07:26:55'}, 'sdb_art': None}
Traceback (most recent call last):
  File "/Users/knut/Development/jedeschule-scraper/venv/lib/python3.8/site-packages/twisted/internet/defer.py", line 654, in _runCallbacks
    current.result = callback(current.result, *args, **kw)
  File "/Users/knut/Development/jedeschule-scraper/jedeschule/pipelines/school_pipeline.py", line 17, in process_item
    school = spider.normalize(item)
  File "/Users/knut/Development/jedeschule-scraper/jedeschule/spiders/niedersachsen.py", line 57, in normalize
    school_type=item.get("sdb_art", {}).get('art'),
AttributeError: 'NoneType' object has no attribute 'get'
k-nut commented 3 years ago

@knutator2 I pushed another little commit that should take care of this. Please review again :)