ThreeSixtyGiving / datatester

Scripts to asses the quality of data from http://data.threesixtygiving.org
MIT License
3 stars 2 forks source link

Data duplicated #37

Closed BibianaC closed 5 years ago

BibianaC commented 5 years ago

The Fore website is currently down.

The Dunhill Medical Trust (the publisher just before The Fore) and The Fore have the same count and total_amount

Registry

Screen Shot 2019-05-16 at 13 37 22

status.json

{
    "datagetter_aggregates": {
      "count": 141,
      "currencies": {
        "GBP": {
          "count": 141,
          "currency_symbol": "£",
          "max_amount": 161255,
          "min_amount": 4739,
          "total_amount": 6036961.779999999
        }
      },
      "distinct_funding_org_identifier": [
        "GB-CHC-1140372"
      ],
      "distinct_funding_org_identifier_count": 1,
      "distinct_recipient_org_identifier_count": 133,
      "duplicate_ids_count": 0,
      "funding_org_identifier_prefixes": {
        "GB-CHC": 1
      },
      "funding_org_identifiers_unrecognised_prefixes": {},
      "id_count": 141,
      "max_award_date": "2018-12-07",
      "min_award_date": "2009-04-01",
      "recipient_org_identifier_prefixes": {
        "GB-CHC": 122,
        "GB-COH": 2,
        "GB-NIC": 1,
        "GB-SC": 8
      },
      "recipient_org_identifiers_unrecognised_prefixes": {},
      "unique_ids_count": 141
    },
    "datagetter_metadata": {
      "acceptable_license": true,
      "datetime_downloaded": "2019-05-15T20:02:18+00:00",
      "downloads": true,
      "file_size": 46440,
      "file_type": "xlsx",
      "json": "data/json_all/a002400000qytqwAAA.json",
      "valid": true
    },
    "description": "Grants awarded between 2009 and September 2017.",
    "distribution": [
      {
        "accessURL": "http://dunhillmedical.org.uk/our-work-in-action/search-grants-for-community-based-charities/",
        "downloadURL": "https://dunhillmedical.org.uk/wp-content/uploads/2019/01/2019-01-22-DMT-final-file.xlsx",
        "title": "Dunhill Medical Trust grants for community-based organisations"
      }
    ],
    "identifier": "a002400000qytqwAAA",
    "issued": "2017-09-27",
    "license": "https://creativecommons.org/licenses/by/4.0/",
    "license_name": "Creative Commons Attribution 4.0 International (CC BY 4.0)",
    "modified": "2019-01-23T10:41:19.000+0000",
    "publisher": {
      "logo": "http://www.threesixtygiving.org/wp-content/uploads/DMT-Logo.png",
      "name": "The Dunhill Medical Trust",
      "prefix": "360G-dunhillmedical",
      "website": "http://www.dunhillmedical.org.uk"
    },
    "title": "Dunhill Medical Trust grants for community-based organisations"
  },
  {
    "datagetter_aggregates": {
      "count": 141,
      "currencies": {
        "GBP": {
          "count": 141,
          "currency_symbol": "£",
          "max_amount": 161255,
          "min_amount": 4739,
          "total_amount": 6036961.779999999
        }
      },
      "distinct_funding_org_identifier": [
        "GB-CHC-1140372"
      ],
      "distinct_funding_org_identifier_count": 1,
      "distinct_recipient_org_identifier_count": 133,
      "duplicate_ids_count": 0,
      "funding_org_identifier_prefixes": {
        "GB-CHC": 1
      },
      "funding_org_identifiers_unrecognised_prefixes": {},
      "id_count": 141,
      "max_award_date": "2018-12-07",
      "min_award_date": "2009-04-01",
      "recipient_org_identifier_prefixes": {
        "GB-CHC": 122,
        "GB-COH": 2,
        "GB-NIC": 1,
        "GB-SC": 8
      },
      "recipient_org_identifiers_unrecognised_prefixes": {},
      "unique_ids_count": 141
    },
    "datagetter_metadata": {
      "acceptable_license": true,
      "datetime_downloaded": "2019-05-15T20:02:19+00:00",
      "downloads": false,
      "file_size": 46440,
      "file_type": "xlsx",
      "json": "data/json_all/a002400000qykbrAAA.json",
      "valid": true
    },
    "description": "",
    "distribution": [
      {
        "accessURL": "http://www.thefore.org/charities/",
        "downloadURL": "http://www.thefore.org/wp-content/uploads/2017/09/The-Fore-Pilot-2012-16-360Giving-Data.xlsx",
        "title": "Grants awarded since 2012"
      }
    ],
    "identifier": "a002400000qykbrAAA",
    "issued": "2017-08-02",
    "license": "https://creativecommons.org/licenses/by/4.0/",
    "license_name": "Creative Commons Attribution 4.0 International (CC BY 4.0)",
    "modified": "2018-08-03T10:26:32.000+0000",
    "publisher": {
      "logo": "http://www.threesixtygiving.org/wp-content/uploads/TheFore_logo_dark.png",
      "name": "The Fore",
      "prefix": "360G-bulldogtrust",
      "website": "http://www.thefore.org"
    },
    "title": "Grants awarded since 2012"
  },
BibianaC commented 5 years ago

@KDuerden I have added the issue about data duplication.

@robredpath FYI this issue has been added.

robredpath commented 5 years ago

This is resolved by https://github.com/ThreeSixtyGiving/datagetter/pull/43