simonw / public-notes

Public notes as issue threads
25 stars 0 forks source link

Senate lobbying disclosures in Datasette Cloud #16

Open simonw opened 9 months ago

simonw commented 9 months ago

Using data from this API: https://lda.senate.gov/api/

simonw commented 9 months ago

I used a Jupyter notebook to extract all ~500 filings for 2024 so far. Export as Markdown:

# Anonymous rate limit is 15/minute so I will sleep 4s between calls
import httpx
url = "https://lda.senate.gov/api/v1/filings/?filing_year=2024&ordering=-dt_posted"
import pathlib, time, json
path = pathlib.Path("/Users/simon/Dropbox/Development/us-senate-lobbying")
path.mkdir(exist_ok=True)
i = 1
while url:
    data = httpx.get(url, headers={"Accept": "application/json"}).json()
    (path / "2024" / f"{i}.json").write_text(json.dumps(data), "utf-8")
    url = data["next"]
    print(i)
    time.sleep(4)
    i += 1
import sqlite_utils
db = sqlite_utils.Database("/tmp/senate.db")
for file in path.glob("2024/*.json"):
    data = json.load(open(file))["results"]
    db["filings"].insert_all(data, pk="filing_uuid")
!open /tmp/senate.db
len(data), type(data), data[0].keys()
(25,
 list,
 dict_keys(['url', 'filing_uuid', 'filing_type', 'filing_type_display', 'filing_year', 'filing_period', 'filing_period_display', 'filing_document_url', 'filing_document_content_type', 'income', 'expenses', 'expenses_method', 'expenses_method_display', 'posted_by_name', 'dt_posted', 'termination_date', 'registrant_country', 'registrant_ppb_country', 'registrant_address_1', 'registrant_address_2', 'registrant_different_address', 'registrant_city', 'registrant_state', 'registrant_zip', 'registrant', 'client', 'lobbying_activities', 'conviction_disclosures', 'foreign_entities', 'affiliated_organizations']))
# Find the longest
all_docs = []
for file in path.glob("2024/*.json"):
    data = json.load(open(file))["results"]
    all_docs.extend(data)
docs = sorted(all_docs, key=lambda d: len(repr(d)), reverse=True)[:5]
doc = docs[1]
doc.keys()
dict_keys(['url', 'filing_uuid', 'filing_type', 'filing_type_display', 'filing_year', 'filing_period', 'filing_period_display', 'filing_document_url', 'filing_document_content_type', 'income', 'expenses', 'expenses_method', 'expenses_method_display', 'posted_by_name', 'dt_posted', 'termination_date', 'registrant_country', 'registrant_ppb_country', 'registrant_address_1', 'registrant_address_2', 'registrant_different_address', 'registrant_city', 'registrant_state', 'registrant_zip', 'registrant', 'client', 'lobbying_activities', 'conviction_disclosures', 'foreign_entities', 'affiliated_organizations'])
interesting = [pair for pair in doc.items() if not isinstance(pair[1], str) and not isinstance(pair[1], int) and pair[1] is not None]; len(interesting)
import json
print(json.dumps(dict(interesting), indent=2))

    {
      "registrant": {
        "id": 401108021,
        "url": "https://lda.senate.gov/api/v1/registrants/401108021/",
        "house_registrant_id": null,
        "name": "THE GENERATION FOUNDATION",
        "description": null,
        "address_1": "20 Air Street",
        "address_2": "London, UK",
        "address_3": "W1B 5AN",
        "address_4": null,
        "city": null,
        "state": null,
        "state_display": null,
        "zip": null,
        "country": "GB",
        "country_display": "United Kingdom",
        "ppb_country": "GB",
        "ppb_country_display": "United Kingdom",
        "contact_name": "ALEXANDER MARSHALL",
        "contact_telephone": "+44 20 7534 4700",
        "dt_updated": "2023-12-01T08:41:32.213592-05:00"
      },
      "client": {
        "id": 58795,
        "url": "https://lda.senate.gov/api/v1/clients/58795/",
        "client_id": 58795,
        "name": "THE GENERATION FOUNDATION",
        "general_description": "Charitable Foundation.",
        "client_government_entity": null,
        "client_self_select": true,
        "state": null,
        "state_display": null,
        "country": "GB",
        "country_display": "United Kingdom",
        "ppb_state": null,
        "ppb_state_display": null,
        "ppb_country": "GB",
        "ppb_country_display": "United Kingdom",
        "effective_date": "2024-02-15"
      },
      "lobbying_activities": [
        {
          "general_issue_code": "LBR",
          "general_issue_code_display": "Labor Issues/Antitrust/Workplace",
          "description": "(1) Issues related to targets at sub-national, national, regional and global levels for net zero greenhouse gas emissions, emissions reductions and deforestation. (2) Issues related to carbon & other greenhouse gas pricing. (3) Issues related to high emitting technologies and activities and incentives for low carbon solutions. (4) Issues related to protection of nature. (5) Issues related to barriers to net zero transition and sustainable capitalism and sustainable investment. (6) Issues related to corporate sustainability disclosure, due diligence and transition planning, and sustainable investment regulation.",
          "foreign_entity_issues": null,
          "lobbyists": [
            {
              "lobbyist": {
                "id": 145494,
                "prefix": null,
                "prefix_display": null,
                "first_name": "GRACE",
                "nickname": null,
                "middle_name": null,
                "last_name": "EDDY",
                "suffix": null,
                "suffix_display": null
              },
              "covered_position": null,
              "new": true
            }
          ],
          "government_entities": []
        },
        {
          "general_issue_code": "INS",
          "general_issue_code_display": "Insurance",
          "description": "(1) Issues related to targets at sub-national, national, regional and global levels for net zero greenhouse gas emissions, emissions reductions and deforestation. (2) Issues related to carbon & other greenhouse gas pricing. (3) Issues related to high emitting technologies and activities and incentives for low carbon solutions. (4) Issues related to protection of nature. (5) Issues related to barriers to net zero transition and sustainable capitalism and sustainable investment. (6) Issues related to corporate sustainability disclosure, due diligence and transition planning, and sustainable investment regulation.",
          "foreign_entity_issues": null,
          "lobbyists": [
            {
              "lobbyist": {
                "id": 145494,
                "prefix": null,
                "prefix_display": null,
                "first_name": "GRACE",
                "nickname": null,
                "middle_name": null,
                "last_name": "EDDY",
                "suffix": null,
                "suffix_display": null
              },
              "covered_position": null,
              "new": true
            }
          ],
          "government_entities": []
        },
        {
          "general_issue_code": "UTI",
          "general_issue_code_display": "Utilities",
          "description": "(1) Issues related to targets at sub-national, national, regional and global levels for net zero greenhouse gas emissions, emissions reductions and deforestation. (2) Issues related to carbon & other greenhouse gas pricing. (3) Issues related to high emitting technologies and activities and incentives for low carbon solutions. (4) Issues related to protection of nature. (5) Issues related to barriers to net zero transition and sustainable capitalism and sustainable investment. (6) Issues related to corporate sustainability disclosure, due diligence and transition planning, and sustainable investment regulation.",
          "foreign_entity_issues": null,
          "lobbyists": [
            {
              "lobbyist": {
                "id": 145494,
                "prefix": null,
                "prefix_display": null,
                "first_name": "GRACE",
                "nickname": null,
                "middle_name": null,
                "last_name": "EDDY",
                "suffix": null,
                "suffix_display": null
              },
              "covered_position": null,
              "new": true
            }
          ],
          "government_entities": []
        },
        {
          "general_issue_code": "URB",
          "general_issue_code_display": "Urban Development/Municipalities",
          "description": "(1) Issues related to targets at sub-national, national, regional and global levels for net zero greenhouse gas emissions, emissions reductions and deforestation. (2) Issues related to carbon & other greenhouse gas pricing. (3) Issues related to high emitting technologies and activities and incentives for low carbon solutions. (4) Issues related to protection of nature. (5) Issues related to barriers to net zero transition and sustainable capitalism and sustainable investment. (6) Issues related to corporate sustainability disclosure, due diligence and transition planning, and sustainable investment regulation.",
          "foreign_entity_issues": null,
          "lobbyists": [
            {
              "lobbyist": {
                "id": 145494,
                "prefix": null,
                "prefix_display": null,
                "first_name": "GRACE",
                "nickname": null,
                "middle_name": null,
                "last_name": "EDDY",
                "suffix": null,
                "suffix_display": null
              },
              "covered_position": null,
              "new": true
            }
          ],
          "government_entities": []
        },
        {
          "general_issue_code": "UNM",
          "general_issue_code_display": "Unemployment",
          "description": "(1) Issues related to targets at sub-national, national, regional and global levels for net zero greenhouse gas emissions, emissions reductions and deforestation. (2) Issues related to carbon & other greenhouse gas pricing. (3) Issues related to high emitting technologies and activities and incentives for low carbon solutions. (4) Issues related to protection of nature. (5) Issues related to barriers to net zero transition and sustainable capitalism and sustainable investment. (6) Issues related to corporate sustainability disclosure, due diligence and transition planning, and sustainable investment regulation.",
          "foreign_entity_issues": null,
          "lobbyists": [
            {
              "lobbyist": {
                "id": 145494,
                "prefix": null,
                "prefix_display": null,
                "first_name": "GRACE",
                "nickname": null,
                "middle_name": null,
                "last_name": "EDDY",
                "suffix": null,
                "suffix_display": null
              },
              "covered_position": null,
              "new": true
            }
          ],
          "government_entities": []
        },
        {
          "general_issue_code": "MIA",
          "general_issue_code_display": "Media (information/publishing)",
          "description": "(1) Issues related to targets at sub-national, national, regional and global levels for net zero greenhouse gas emissions, emissions reductions and deforestation. (2) Issues related to carbon & other greenhouse gas pricing. (3) Issues related to high emitting technologies and activities and incentives for low carbon solutions. (4) Issues related to protection of nature. (5) Issues related to barriers to net zero transition and sustainable capitalism and sustainable investment. (6) Issues related to corporate sustainability disclosure, due diligence and transition planning, and sustainable investment regulation.",
          "foreign_entity_issues": null,
          "lobbyists": [
            {
              "lobbyist": {
                "id": 145494,
                "prefix": null,
                "prefix_display": null,
                "first_name": "GRACE",
                "nickname": null,
                "middle_name": null,
                "last_name": "EDDY",
                "suffix": null,
                "suffix_display": null
              },
              "covered_position": null,
              "new": true
            }
          ],
          "government_entities": []
        },
        {
          "general_issue_code": "MAN",
          "general_issue_code_display": "Manufacturing",
          "description": "(1) Issues related to targets at sub-national, national, regional and global levels for net zero greenhouse gas emissions, emissions reductions and deforestation. (2) Issues related to carbon & other greenhouse gas pricing. (3) Issues related to high emitting technologies and activities and incentives for low carbon solutions. (4) Issues related to protection of nature. (5) Issues related to barriers to net zero transition and sustainable capitalism and sustainable investment. (6) Issues related to corporate sustainability disclosure, due diligence and transition planning, and sustainable investment regulation.",
          "foreign_entity_issues": null,
          "lobbyists": [
            {
              "lobbyist": {
                "id": 145494,
                "prefix": null,
                "prefix_display": null,
                "first_name": "GRACE",
                "nickname": null,
                "middle_name": null,
                "last_name": "EDDY",
                "suffix": null,
                "suffix_display": null
              },
              "covered_position": null,
              "new": true
            }
          ],
          "government_entities": []
        },
        {
          "general_issue_code": "HOU",
          "general_issue_code_display": "Housing",
          "description": "(1) Issues related to targets at sub-national, national, regional and global levels for net zero greenhouse gas emissions, emissions reductions and deforestation. (2) Issues related to carbon & other greenhouse gas pricing. (3) Issues related to high emitting technologies and activities and incentives for low carbon solutions. (4) Issues related to protection of nature. (5) Issues related to barriers to net zero transition and sustainable capitalism and sustainable investment. (6) Issues related to corporate sustainability disclosure, due diligence and transition planning, and sustainable investment regulation.",
          "foreign_entity_issues": null,
          "lobbyists": [
            {
              "lobbyist": {
                "id": 145494,
                "prefix": null,
                "prefix_display": null,
                "first_name": "GRACE",
                "nickname": null,
                "middle_name": null,
                "last_name": "EDDY",
                "suffix": null,
                "suffix_display": null
              },
              "covered_position": null,
              "new": true
            }
          ],
          "government_entities": []
        },
        {
          "general_issue_code": "FUE",
          "general_issue_code_display": "Fuel/Gas/Oil",
          "description": "(1) Issues related to targets at sub-national, national, regional and global levels for net zero greenhouse gas emissions, emissions reductions and deforestation. (2) Issues related to carbon & other greenhouse gas pricing. (3) Issues related to high emitting technologies and activities and incentives for low carbon solutions. (4) Issues related to protection of nature. (5) Issues related to barriers to net zero transition and sustainable capitalism and sustainable investment. (6) Issues related to corporate sustainability disclosure, due diligence and transition planning, and sustainable investment regulation.",
          "foreign_entity_issues": null,
          "lobbyists": [
            {
              "lobbyist": {
                "id": 145494,
                "prefix": null,
                "prefix_display": null,
                "first_name": "GRACE",
                "nickname": null,
                "middle_name": null,
                "last_name": "EDDY",
                "suffix": null,
                "suffix_display": null
              },
              "covered_position": null,
              "new": true
            }
          ],
          "government_entities": []
        },
        {
          "general_issue_code": "FIN",
          "general_issue_code_display": "Financial Institutions/Investments/Securities",
          "description": "(1) Issues related to targets at sub-national, national, regional and global levels for net zero greenhouse gas emissions, emissions reductions and deforestation. (2) Issues related to carbon & other greenhouse gas pricing. (3) Issues related to high emitting technologies and activities and incentives for low carbon solutions. (4) Issues related to protection of nature. (5) Issues related to barriers to net zero transition and sustainable capitalism and sustainable investment. (6) Issues related to corporate sustainability disclosure, due diligence and transition planning, and sustainable investment regulation.",
          "foreign_entity_issues": null,
          "lobbyists": [
            {
              "lobbyist": {
                "id": 145494,
                "prefix": null,
                "prefix_display": null,
                "first_name": "GRACE",
                "nickname": null,
                "middle_name": null,
                "last_name": "EDDY",
                "suffix": null,
                "suffix_display": null
              },
              "covered_position": null,
              "new": true
            }
          ],
          "government_entities": []
        },
        {
          "general_issue_code": "FAM",
          "general_issue_code_display": "Family issues/Abortion/Adoption",
          "description": "(1) Issues related to targets at sub-national, national, regional and global levels for net zero greenhouse gas emissions, emissions reductions and deforestation. (2) Issues related to carbon & other greenhouse gas pricing. (3) Issues related to high emitting technologies and activities and incentives for low carbon solutions. (4) Issues related to protection of nature. (5) Issues related to barriers to net zero transition and sustainable capitalism and sustainable investment. (6) Issues related to corporate sustainability disclosure, due diligence and transition planning, and sustainable investment regulation.",
          "foreign_entity_issues": null,
          "lobbyists": [
            {
              "lobbyist": {
                "id": 145494,
                "prefix": null,
                "prefix_display": null,
                "first_name": "GRACE",
                "nickname": null,
                "middle_name": null,
                "last_name": "EDDY",
                "suffix": null,
                "suffix_display": null
              },
              "covered_position": null,
              "new": true
            }
          ],
          "government_entities": []
        },
        {
          "general_issue_code": "ENV",
          "general_issue_code_display": "Environment/Superfund",
          "description": "(1) Issues related to targets at sub-national, national, regional and global levels for net zero greenhouse gas emissions, emissions reductions and deforestation. (2) Issues related to carbon & other greenhouse gas pricing. (3) Issues related to high emitting technologies and activities and incentives for low carbon solutions. (4) Issues related to protection of nature. (5) Issues related to barriers to net zero transition and sustainable capitalism and sustainable investment. (6) Issues related to corporate sustainability disclosure, due diligence and transition planning, and sustainable investment regulation.",
          "foreign_entity_issues": null,
          "lobbyists": [
            {
              "lobbyist": {
                "id": 145494,
                "prefix": null,
                "prefix_display": null,
                "first_name": "GRACE",
                "nickname": null,
                "middle_name": null,
                "last_name": "EDDY",
                "suffix": null,
                "suffix_display": null
              },
              "covered_position": null,
              "new": true
            }
          ],
          "government_entities": []
        },
        {
          "general_issue_code": "ENG",
          "general_issue_code_display": "Energy/Nuclear",
          "description": "(1) Issues related to targets at sub-national, national, regional and global levels for net zero greenhouse gas emissions, emissions reductions and deforestation. (2) Issues related to carbon & other greenhouse gas pricing. (3) Issues related to high emitting technologies and activities and incentives for low carbon solutions. (4) Issues related to protection of nature. (5) Issues related to barriers to net zero transition and sustainable capitalism and sustainable investment. (6) Issues related to corporate sustainability disclosure, due diligence and transition planning, and sustainable investment regulation.",
          "foreign_entity_issues": null,
          "lobbyists": [
            {
              "lobbyist": {
                "id": 145494,
                "prefix": null,
                "prefix_display": null,
                "first_name": "GRACE",
                "nickname": null,
                "middle_name": null,
                "last_name": "EDDY",
                "suffix": null,
                "suffix_display": null
              },
              "covered_position": null,
              "new": true
            }
          ],
          "government_entities": []
        },
        {
          "general_issue_code": "ECN",
          "general_issue_code_display": "Economics/Economic Development",
          "description": "(1) Issues related to targets at sub-national, national, regional and global levels for net zero greenhouse gas emissions, emissions reductions and deforestation. (2) Issues related to carbon & other greenhouse gas pricing. (3) Issues related to high emitting technologies and activities and incentives for low carbon solutions. (4) Issues related to protection of nature. (5) Issues related to barriers to net zero transition and sustainable capitalism and sustainable investment. (6) Issues related to corporate sustainability disclosure, due diligence and transition planning, and sustainable investment regulation.",
          "foreign_entity_issues": null,
          "lobbyists": [
            {
              "lobbyist": {
                "id": 145494,
                "prefix": null,
                "prefix_display": null,
                "first_name": "GRACE",
                "nickname": null,
                "middle_name": null,
                "last_name": "EDDY",
                "suffix": null,
                "suffix_display": null
              },
              "covered_position": null,
              "new": true
            }
          ],
          "government_entities": []
        },
        {
          "general_issue_code": "CAW",
          "general_issue_code_display": "Clean Air and Water (quality)",
          "description": "(1) Issues related to targets at sub-national, national, regional and global levels for net zero greenhouse gas emissions, emissions reductions and deforestation. (2) Issues related to carbon & other greenhouse gas pricing. (3) Issues related to high emitting technologies and activities and incentives for low carbon solutions. (4) Issues related to protection of nature. (5) Issues related to barriers to net zero transition and sustainable capitalism and sustainable investment. (6) Issues related to corporate sustainability disclosure, due diligence and transition planning, and sustainable investment regulation.",
          "foreign_entity_issues": null,
          "lobbyists": [
            {
              "lobbyist": {
                "id": 145494,
                "prefix": null,
                "prefix_display": null,
                "first_name": "GRACE",
                "nickname": null,
                "middle_name": null,
                "last_name": "EDDY",
                "suffix": null,
                "suffix_display": null
              },
              "covered_position": null,
              "new": true
            }
          ],
          "government_entities": []
        }
      ],
      "conviction_disclosures": [],
      "foreign_entities": [
        {
          "name": "GENERATION INVESTMENT MANAGEMENT LLP",
          "contribution": "0.00",
          "ownership_percentage": "100.00",
          "address": "20 Air Street",
          "city": "London",
          "state": null,
          "state_display": null,
          "country": "GB",
          "country_display": "United Kingdom",
          "ppb_city": "London",
          "ppb_state": null,
          "ppb_state_display": null,
          "ppb_country": "GB",
          "ppb_country_display": "United Kingdom"
        }
      ],
      "affiliated_organizations": []
    }
simonw commented 9 months ago

Exported to Datasette Lite: https://lite.datasette.io/?csv=https://gist.github.com/simonw/a029af7104178e449533fc95b4c4c529

simonw commented 9 months ago

Was inspired to add this feature to Datasette:

simonw commented 9 months ago

Example of lobbying_activities column that includes government_entities:

[
    {
        "general_issue_code": "INS",
        "general_issue_code_display": "Insurance",
        "description": "Section 11503 of the 2023 National Defense Authorization Act",
        "foreign_entity_issues": "",
        "lobbyists": [
            {
                "lobbyist": {
                    "id": 144910,
                    "prefix": "ms",
                    "prefix_display": "MS.",
                    "first_name": "EMILY",
                    "nickname": null,
                    "middle_name": "ELAINE",
                    "last_name": "COYLE",
                    "suffix": null,
                    "suffix_display": null
                },
                "covered_position": null,
                "new": false
            },
            {
                "lobbyist": {
                    "id": 143755,
                    "prefix": null,
                    "prefix_display": null,
                    "first_name": "MARGO",
                    "nickname": null,
                    "middle_name": null,
                    "last_name": "KLOSTERMAN",
                    "suffix": null,
                    "suffix_display": null
                },
                "covered_position": null,
                "new": false
            }
        ],
        "government_entities": [
            {
                "id": 2,
                "name": "HOUSE OF REPRESENTATIVES"
            },
            {
                "id": 1,
                "name": "SENATE"
            }
        ]
    }
]
simonw commented 9 months ago

Only one record has conviction_disclosures:

[
    {
        "lobbyist": {
            "id": 73797,
            "prefix": null,
            "prefix_display": null,
            "first_name": "JACOB",
            "nickname": null,
            "middle_name": null,
            "last_name": "WOHL",
            "suffix": null,
            "suffix_display": null
        },
        "date": "2022-11-30",
        "description": "Illegal campaign robo calls, telecommunications fraud. Cuyahoga County, Ohio. Offense took place in September 2020. Section 2913.05 Ohio code"
    },
    {
        "lobbyist": {
            "id": 46860,
            "prefix": "mr",
            "prefix_display": "MR.",
            "first_name": "JACK",
            "nickname": null,
            "middle_name": "MACAULEY",
            "last_name": "BURKMAN",
            "suffix": null,
            "suffix_display": null
        },
        "date": "2022-11-30",
        "description": "Illegal campaign robo calls, telecommunications fraud. Cuyahoga County, Ohio. Offense took place in September 2020. Section 2913.05 Ohio code"
    }
]
simonw commented 9 months ago

registrant column is always populated and always in this shape:

{
    "id": 401107756,
    "url": "https://lda.senate.gov/api/v1/registrants/401107756/",
    "house_registrant_id": null,
    "name": "MODERN FORTIS LLC",
    "description": null,
    "address_1": "700 12th Street NW",
    "address_2": "Suite 700",
    "address_3": null,
    "address_4": null,
    "city": "Washington",
    "state": "DC",
    "state_display": "District of Columbia",
    "zip": "20005",
    "country": "US",
    "country_display": "United States of America",
    "ppb_country": "US",
    "ppb_country_display": "United States of America",
    "contact_name": "JOSEPH CAMERON",
    "contact_telephone": "+1 774-306-1300",
    "dt_updated": "2024-02-20T13:25:07.968728-05:00"
}

Likewise client:

{
    "id": 57149,
    "url": "https://lda.senate.gov/api/v1/clients/57149/",
    "client_id": 57149,
    "name": "DIVE EQUIPMENT & MARKETING ASSOCIATION",
    "general_description": "Non-profit international trade association of scuba diving industry professionals",
    "client_government_entity": null,
    "client_self_select": false,
    "state": "CA",
    "state_display": "California",
    "country": "US",
    "country_display": "United States of America",
    "ppb_state": "CA",
    "ppb_state_display": "California",
    "ppb_country": "US",
    "ppb_country_display": "United States of America",
    "effective_date": "2023-09-01"
}
simonw commented 9 months ago

foreign_entities example:

[
    {
        "name": "MR. ITAI MENDELSOHN",
        "contribution": "0.00",
        "ownership_percentage": "6.61",
        "address": "Ha-Yetsira ST 3, Floor 15",
        "city": "Ramat Gan",
        "state": null,
        "state_display": null,
        "country": "IL",
        "country_display": "Israel",
        "ppb_city": "Ramat Gan",
        "ppb_state": null,
        "ppb_state_display": null,
        "ppb_country": "IL",
        "ppb_country_display": "Israel"
    },
    {
        "name": "MR. ROY AMIR",
        "contribution": "0.00",
        "ownership_percentage": "6.61",
        "address": "Ha-Yetsira St 3, Floor 15",
        "city": "Ramat Gan",
        "state": null,
        "state_display": null,
        "country": "IL",
        "country_display": "Israel",
        "ppb_city": "Ramat Gan",
        "ppb_state": null,
        "ppb_state_display": null,
        "ppb_country": "IL",
        "ppb_country_display": "Israel"
    },
    {
        "name": "MR. DOR SKULER",
        "contribution": "0.00",
        "ownership_percentage": "6.61",
        "address": "Ha-Yetsira St 3, Floor 15",
        "city": "Ramat Gan",
        "state": null,
        "state_display": null,
        "country": "IL",
        "country_display": "Israel",
        "ppb_city": "Ramat Gan",
        "ppb_state": null,
        "ppb_state_display": null,
        "ppb_country": "IL",
        "ppb_country_display": "Israel"
    }
]
simonw commented 9 months ago

affiliated_organizations example:

[
    {
        "name": "COUNTY OF DUPAGE, IL",
        "url": null,
        "address_1": "421 North County Farm Road",
        "address_2": null,
        "address_3": null,
        "address_4": null,
        "city": "Wheaton",
        "state": "IL",
        "state_display": "Illinois",
        "zip": "60187",
        "country": "US",
        "country_display": "United States of America",
        "ppb_city": null,
        "ppb_state": "IL",
        "ppb_state_display": "Illinois",
        "ppb_country": "US",
        "ppb_country_display": "United States of America"
    }
]

I think that's examples of everything now. I'm going to write a sqlite-utils importer that tries to get this stuff all in tables.

simonw commented 9 months ago

I'll need many-to-many tables between filings and affiliated_organizations and foreign_entities and conviction_disclosures (which will also foreign key to lobbyist)

simonw commented 9 months ago

Looks like I need to pull this out into a separate table too:

{
    "general_issue_code": "LBR",
    "general_issue_code_display": "Labor Issues/Antitrust/Workplace",
    "description": "(1) Issues related to targets at sub-national, national, regional and global levels for net zero greenhouse gas emissions, emissions reductions and deforestation. (2) Issues related to carbon & other greenhouse gas pricing. (3) Issues related to high emitting technologies and activities and incentives for low carbon solutions. (4) Issues related to protection of nature. (5) Issues related to barriers to net zero transition and sustainable capitalism and sustainable investment. (6) Issues related to corporate sustainability disclosure, due diligence and transition planning, and sustainable investment regulation."
}

That's the first set of keys in a lobbying_activities record. I'm going to call that an issue_code.

general_issue_code -> code general_issue_code_display -> display description -> description.

simonw commented 9 months ago

foreign_entity_issues is occasionally present on a lobbying activity where it's either a blank string or null or a populated string:

CleanShot 2024-02-27 at 17 18 05@2x

simonw commented 9 months ago

This horrible query got me the lobbyist counts:

with activities as (
  select value from json_each(lobbying_activities), raw
),
lobbyist_lists as (
  select json_extract(value, '$.lobbyists') as l2 from activities
),
lobbyists as (
  select json_extract(value, '$.lobbyist') as lobbyist from json_each(l2), lobbyist_lists
)
select lobbyist, count(*) from lobbyists group by lobbyist order by count(*) desc

https://lite.datasette.io/?install=datasette-copyable&csv=https://gist.github.com/simonw/a029af7104178e449533fc95b4c4c529#/data?sql=with+activities+as+(%0A++select+value+from+json_each(lobbying_activities)%2C+raw%0A)%2C%0Alobbyist_lists+as+(%0A++select+json_extract(value%2C+'%24.lobbyists')+as+l2+from+activities%0A)%2C%0Alobbyists+as+(%0A++select+json_extract(value%2C+'%24.lobbyist')+as+lobbyist+from+json_each(l2)%2C+lobbyist_lists%0A)%0Aselect+lobbyist%2C+count(*)+from+lobbyists+group+by+lobbyist+order+by+count(*)+desc

lobbyist count(*)
{"id":52794,"prefix":null,"prefix_display":null,"first_name":"JUSTIN","nickname":null,"middle_name":null,"last_name":"LEBLANC","suffix":null,"suffix_display":null} 17
{"id":141889,"prefix":null,"prefix_display":null,"first_name":"STACY","nickname":null,"middle_name":null,"last_name":"MCBRIDE","suffix":null,"suffix_display":null} 16
{"id":145494,"prefix":null,"prefix_display":null,"first_name":"GRACE","nickname":null,"middle_name":null,"last_name":"EDDY","suffix":null,"suffix_display":null} 15
{"id":70784,"prefix":null,"prefix_display":null,"first_name":"ERIC","nickname":null,"middle_name":null,"last_name":"LAUSTEN","suffix":null,"suffix_display":null} 14
{"id":145482,"prefix":null,"prefix_display":null,"first_name":"PETER","nickname":null,"middle_name":null,"last_name":"DEFAZIO","suffix":null,"suffix_display":null} 13
{"id":145265,"prefix":null,"prefix_display":null,"first_name":"NADEGE","nickname":null,"middle_name":null,"last_name":"BLANC","suffix":null,"suffix_display":null} 13
{"id":145264,"prefix":null,"prefix_display":null,"first_name":"JEAN","nickname":null,"middle_name":"ALI","last_name":"BLANC","suffix":null,"suffix_display":null} 13
{"id":144980,"prefix":null,"prefix_display":null,"first_name":"TYLER","nickname":null,"middle_name":null,"last_name":"NEWMAN","suffix":null,"suffix_display":null} 12
{"id":145465,"prefix":null,"prefix_display":null,"first_name":"KIM","nickname":null,"middle_name":null,"last_name":"CORBIN","suffix":null,"suffix_display":null} 11
{"id":58561,"prefix":null,"prefix_display":null,"first_name":"MATHEW","nickname":null,"middle_name":null,"last_name":"LAPINSKI","suffix":null,"suffix_display":null} 10
simonw commented 9 months ago

I may need to do something to help Datasette better display M2M stuff here. A very quick hack that lets you execute extra SQL queries in table cells to pull in related data perhaps?

simonw commented 9 months ago

I'm going to write a single Python function which, given a filing, inserts everything about it into a bunch of different tables - creating those tables if they do not exist.

simonw commented 9 months ago

Found one with different values for is_new and covered_position in lobbying_activities_lobbyist:

CleanShot 2024-02-27 at 18 08 18@2x

simonw commented 9 months ago

Fun query:

select
  last_name,
  json_group_array(id || ': ' || name),
  count(*)
from
  lobbyists
group by
  last_name
having
  count(*) > 1
order by
  count(*) desc

CleanShot 2024-02-27 at 18 11 32@2x

simonw commented 9 months ago

It looks like the really interesting data in the filings isn't available in the JSON - e.g. on https://lda.senate.gov/filings/public/filing/7466c7ee-8bd1-4496-94df-5d5322d25e2d/print/w

So I built a Val that can get it: https://simonw-pinkjellyfish.web.val.run/?url=https://lda.senate.gov/filings/public/filing/7466c7ee-8bd1-4496-94df-5d5322d25e2d/print/

const { TurndownService } = await import("npm:turndown");

export default async function(req: Request): Promise<Response> {
  const searchParams = new URL(req.url).searchParams;
  const url = searchParams.get("url");
  const response = await fetch(url);
  const html = await response.text();
  const issues =
    html.split("<p>16. Specific lobbying issues</p>")[1].split("17. House(s) of Congress and Federal agencies")[0];
  const markdown = new TurndownService().turndown(issues);
  return Response.json({ content: markdown });
}

Updated that to return just plain text:

  const markdown = new turndown.default().turndown(issues);
  return new Response(markdown);
}
simonw commented 9 months ago

Here's my Python so far:

all_docs = []
for file in path.glob("2024/*.json"):
    data = json.load(open(file))["results"]
    all_docs.extend(data)

def lobbyist_string(lobbyist, include_id=True):
    bits  = []
    if include_id:
        bits.append(str(lobbyist["id"]))
    for key in ("prefix_display", "first_name", "nickname", "middle_name", "last_name", "suffix_display"):
        value = lobbyist.get(key, "")
        if value:
            bits.append(str(value))
    return " ".join(bits)

def insert_filing(db, filing):
    filing = copy.deepcopy(filing)
    client = filing.pop("client")
    registrant = filing.pop("registrant")
    filing["registrant_id"] = registrant["id"]
    filing["client_id"] = client["id"]
    db["clients"].insert(client, pk="id", alter=True, replace=True)
    db["registrants"].insert(registrant, pk="id", alter=True, replace=True)
    lobbying_activities = filing.pop("lobbying_activities")
    filing_id = filing["filing_uuid"]
    db["filings"].insert(
        filing,
        pk="filing_uuid",
        foreign_keys=(
            ("client_id", "clients", "id"),
            ("registrant_id", "registrants", "id"),
        ),
        alter=True,
        replace=True,
        column_order=("client_id", "registrant_id"),
    )
    # The lobbying activities are the most complicated bit
    for lobbying_activity in lobbying_activities:
        # general_issue_code/general_issue_code_display/description becomes issue_code
        issue_code = {
            "code": lobbying_activity["general_issue_code"],
            "name": lobbying_activity["general_issue_code_display"],
            "description": lobbying_activity["description"],
        }
        db["issue_codes"].insert(issue_code, pk="code", replace=True)
        foreign_entity_issues = lobbying_activity.get("foreign_entity_issues") or ""
        government_entities = lobbying_activity.get("government_entities") or []
        lobbyists = lobbying_activity.get("lobbyists") or []
        lobbying_activity_id = db["lobbying_activities"].insert({
            "filing_id": filing_id,
            "issue_code": issue_code["code"],
            "lobbyists": [lobbyist_string(lobbyist["lobbyist"]) for lobbyist in lobbyists],
            "government_entities": [ent["name"] for ent in government_entities],
            "foreign_entity_issues": foreign_entity_issues,
        }, pk="id", alter=True, foreign_keys=(
            ("filing_id", "filings", "filing_uuid"),
            ("issue_code", "issue_codes", "code"),
        )).last_pk
        # Insert the lobbyist and relate it back to both tables
        for lobbyist in lobbyists:
            covered_position = lobbyist["covered_position"]
            is_new = lobbyist["new"]
            db["lobbyists"].insert(
                dict(lobbyist["lobbyist"], name=lobbyist_string(lobbyist["lobbyist"], include_id=False)),
                pk="id", replace=True, alter=True
            )
            lobbyist_id = lobbyist["lobbyist"]["id"]
            db["lobbying_activities_lobbyist"].insert({
                "lobbyist_id": lobbyist_id,
                "lobbying_activity_id": lobbying_activity_id,
                "filing_id": filing_id,
                "is_new": is_new,
                "covered_position": covered_position,
            }, alter=True, pk="id", foreign_keys=(
                ("lobbyist_id", "lobbyists", "id"),
                ("lobbying_activity_id", "lobbying_activities", "id"),
                ("filing_id", "filings", "filing_uuid"),
            ))

db = sqlite_utils.Database("/tmp/filings.db")
for table in db.table_names():
    db[table].drop()

for doc in all_docs:
    insert_filing(db, doc)
simonw commented 9 months ago

The report I created a Val for looks to be in a pretty rare format - I found https://lda.senate.gov/filings/public/filing/bbeaaa0f-187c-4a9f-8a30-31ba78cc242f/print/ with the same format (possibly filing_type of 1A) but no others.

simonw commented 9 months ago

This could be useful:

curl 'https://lda.senate.gov/filings/public/filing/00ff141a-e343-4a04-9b37-39d63d1a81d6/print/' | strip-tags -m

Strips tags and minifies a bit. Might be good input to an LLM.