Open simonw opened 9 months ago
I used a Jupyter notebook to extract all ~500 filings for 2024 so far. Export as Markdown:
# Anonymous rate limit is 15/minute so I will sleep 4s between calls
import httpx
url = "https://lda.senate.gov/api/v1/filings/?filing_year=2024&ordering=-dt_posted"
import pathlib, time, json
path = pathlib.Path("/Users/simon/Dropbox/Development/us-senate-lobbying")
path.mkdir(exist_ok=True)
i = 1
while url:
data = httpx.get(url, headers={"Accept": "application/json"}).json()
(path / "2024" / f"{i}.json").write_text(json.dumps(data), "utf-8")
url = data["next"]
print(i)
time.sleep(4)
i += 1
import sqlite_utils
db = sqlite_utils.Database("/tmp/senate.db")
for file in path.glob("2024/*.json"):
data = json.load(open(file))["results"]
db["filings"].insert_all(data, pk="filing_uuid")
!open /tmp/senate.db
len(data), type(data), data[0].keys()
(25,
list,
dict_keys(['url', 'filing_uuid', 'filing_type', 'filing_type_display', 'filing_year', 'filing_period', 'filing_period_display', 'filing_document_url', 'filing_document_content_type', 'income', 'expenses', 'expenses_method', 'expenses_method_display', 'posted_by_name', 'dt_posted', 'termination_date', 'registrant_country', 'registrant_ppb_country', 'registrant_address_1', 'registrant_address_2', 'registrant_different_address', 'registrant_city', 'registrant_state', 'registrant_zip', 'registrant', 'client', 'lobbying_activities', 'conviction_disclosures', 'foreign_entities', 'affiliated_organizations']))
# Find the longest
all_docs = []
for file in path.glob("2024/*.json"):
data = json.load(open(file))["results"]
all_docs.extend(data)
docs = sorted(all_docs, key=lambda d: len(repr(d)), reverse=True)[:5]
doc = docs[1]
doc.keys()
dict_keys(['url', 'filing_uuid', 'filing_type', 'filing_type_display', 'filing_year', 'filing_period', 'filing_period_display', 'filing_document_url', 'filing_document_content_type', 'income', 'expenses', 'expenses_method', 'expenses_method_display', 'posted_by_name', 'dt_posted', 'termination_date', 'registrant_country', 'registrant_ppb_country', 'registrant_address_1', 'registrant_address_2', 'registrant_different_address', 'registrant_city', 'registrant_state', 'registrant_zip', 'registrant', 'client', 'lobbying_activities', 'conviction_disclosures', 'foreign_entities', 'affiliated_organizations'])
interesting = [pair for pair in doc.items() if not isinstance(pair[1], str) and not isinstance(pair[1], int) and pair[1] is not None]; len(interesting)
import json
print(json.dumps(dict(interesting), indent=2))
{
"registrant": {
"id": 401108021,
"url": "https://lda.senate.gov/api/v1/registrants/401108021/",
"house_registrant_id": null,
"name": "THE GENERATION FOUNDATION",
"description": null,
"address_1": "20 Air Street",
"address_2": "London, UK",
"address_3": "W1B 5AN",
"address_4": null,
"city": null,
"state": null,
"state_display": null,
"zip": null,
"country": "GB",
"country_display": "United Kingdom",
"ppb_country": "GB",
"ppb_country_display": "United Kingdom",
"contact_name": "ALEXANDER MARSHALL",
"contact_telephone": "+44 20 7534 4700",
"dt_updated": "2023-12-01T08:41:32.213592-05:00"
},
"client": {
"id": 58795,
"url": "https://lda.senate.gov/api/v1/clients/58795/",
"client_id": 58795,
"name": "THE GENERATION FOUNDATION",
"general_description": "Charitable Foundation.",
"client_government_entity": null,
"client_self_select": true,
"state": null,
"state_display": null,
"country": "GB",
"country_display": "United Kingdom",
"ppb_state": null,
"ppb_state_display": null,
"ppb_country": "GB",
"ppb_country_display": "United Kingdom",
"effective_date": "2024-02-15"
},
"lobbying_activities": [
{
"general_issue_code": "LBR",
"general_issue_code_display": "Labor Issues/Antitrust/Workplace",
"description": "(1) Issues related to targets at sub-national, national, regional and global levels for net zero greenhouse gas emissions, emissions reductions and deforestation. (2) Issues related to carbon & other greenhouse gas pricing. (3) Issues related to high emitting technologies and activities and incentives for low carbon solutions. (4) Issues related to protection of nature. (5) Issues related to barriers to net zero transition and sustainable capitalism and sustainable investment. (6) Issues related to corporate sustainability disclosure, due diligence and transition planning, and sustainable investment regulation.",
"foreign_entity_issues": null,
"lobbyists": [
{
"lobbyist": {
"id": 145494,
"prefix": null,
"prefix_display": null,
"first_name": "GRACE",
"nickname": null,
"middle_name": null,
"last_name": "EDDY",
"suffix": null,
"suffix_display": null
},
"covered_position": null,
"new": true
}
],
"government_entities": []
},
{
"general_issue_code": "INS",
"general_issue_code_display": "Insurance",
"description": "(1) Issues related to targets at sub-national, national, regional and global levels for net zero greenhouse gas emissions, emissions reductions and deforestation. (2) Issues related to carbon & other greenhouse gas pricing. (3) Issues related to high emitting technologies and activities and incentives for low carbon solutions. (4) Issues related to protection of nature. (5) Issues related to barriers to net zero transition and sustainable capitalism and sustainable investment. (6) Issues related to corporate sustainability disclosure, due diligence and transition planning, and sustainable investment regulation.",
"foreign_entity_issues": null,
"lobbyists": [
{
"lobbyist": {
"id": 145494,
"prefix": null,
"prefix_display": null,
"first_name": "GRACE",
"nickname": null,
"middle_name": null,
"last_name": "EDDY",
"suffix": null,
"suffix_display": null
},
"covered_position": null,
"new": true
}
],
"government_entities": []
},
{
"general_issue_code": "UTI",
"general_issue_code_display": "Utilities",
"description": "(1) Issues related to targets at sub-national, national, regional and global levels for net zero greenhouse gas emissions, emissions reductions and deforestation. (2) Issues related to carbon & other greenhouse gas pricing. (3) Issues related to high emitting technologies and activities and incentives for low carbon solutions. (4) Issues related to protection of nature. (5) Issues related to barriers to net zero transition and sustainable capitalism and sustainable investment. (6) Issues related to corporate sustainability disclosure, due diligence and transition planning, and sustainable investment regulation.",
"foreign_entity_issues": null,
"lobbyists": [
{
"lobbyist": {
"id": 145494,
"prefix": null,
"prefix_display": null,
"first_name": "GRACE",
"nickname": null,
"middle_name": null,
"last_name": "EDDY",
"suffix": null,
"suffix_display": null
},
"covered_position": null,
"new": true
}
],
"government_entities": []
},
{
"general_issue_code": "URB",
"general_issue_code_display": "Urban Development/Municipalities",
"description": "(1) Issues related to targets at sub-national, national, regional and global levels for net zero greenhouse gas emissions, emissions reductions and deforestation. (2) Issues related to carbon & other greenhouse gas pricing. (3) Issues related to high emitting technologies and activities and incentives for low carbon solutions. (4) Issues related to protection of nature. (5) Issues related to barriers to net zero transition and sustainable capitalism and sustainable investment. (6) Issues related to corporate sustainability disclosure, due diligence and transition planning, and sustainable investment regulation.",
"foreign_entity_issues": null,
"lobbyists": [
{
"lobbyist": {
"id": 145494,
"prefix": null,
"prefix_display": null,
"first_name": "GRACE",
"nickname": null,
"middle_name": null,
"last_name": "EDDY",
"suffix": null,
"suffix_display": null
},
"covered_position": null,
"new": true
}
],
"government_entities": []
},
{
"general_issue_code": "UNM",
"general_issue_code_display": "Unemployment",
"description": "(1) Issues related to targets at sub-national, national, regional and global levels for net zero greenhouse gas emissions, emissions reductions and deforestation. (2) Issues related to carbon & other greenhouse gas pricing. (3) Issues related to high emitting technologies and activities and incentives for low carbon solutions. (4) Issues related to protection of nature. (5) Issues related to barriers to net zero transition and sustainable capitalism and sustainable investment. (6) Issues related to corporate sustainability disclosure, due diligence and transition planning, and sustainable investment regulation.",
"foreign_entity_issues": null,
"lobbyists": [
{
"lobbyist": {
"id": 145494,
"prefix": null,
"prefix_display": null,
"first_name": "GRACE",
"nickname": null,
"middle_name": null,
"last_name": "EDDY",
"suffix": null,
"suffix_display": null
},
"covered_position": null,
"new": true
}
],
"government_entities": []
},
{
"general_issue_code": "MIA",
"general_issue_code_display": "Media (information/publishing)",
"description": "(1) Issues related to targets at sub-national, national, regional and global levels for net zero greenhouse gas emissions, emissions reductions and deforestation. (2) Issues related to carbon & other greenhouse gas pricing. (3) Issues related to high emitting technologies and activities and incentives for low carbon solutions. (4) Issues related to protection of nature. (5) Issues related to barriers to net zero transition and sustainable capitalism and sustainable investment. (6) Issues related to corporate sustainability disclosure, due diligence and transition planning, and sustainable investment regulation.",
"foreign_entity_issues": null,
"lobbyists": [
{
"lobbyist": {
"id": 145494,
"prefix": null,
"prefix_display": null,
"first_name": "GRACE",
"nickname": null,
"middle_name": null,
"last_name": "EDDY",
"suffix": null,
"suffix_display": null
},
"covered_position": null,
"new": true
}
],
"government_entities": []
},
{
"general_issue_code": "MAN",
"general_issue_code_display": "Manufacturing",
"description": "(1) Issues related to targets at sub-national, national, regional and global levels for net zero greenhouse gas emissions, emissions reductions and deforestation. (2) Issues related to carbon & other greenhouse gas pricing. (3) Issues related to high emitting technologies and activities and incentives for low carbon solutions. (4) Issues related to protection of nature. (5) Issues related to barriers to net zero transition and sustainable capitalism and sustainable investment. (6) Issues related to corporate sustainability disclosure, due diligence and transition planning, and sustainable investment regulation.",
"foreign_entity_issues": null,
"lobbyists": [
{
"lobbyist": {
"id": 145494,
"prefix": null,
"prefix_display": null,
"first_name": "GRACE",
"nickname": null,
"middle_name": null,
"last_name": "EDDY",
"suffix": null,
"suffix_display": null
},
"covered_position": null,
"new": true
}
],
"government_entities": []
},
{
"general_issue_code": "HOU",
"general_issue_code_display": "Housing",
"description": "(1) Issues related to targets at sub-national, national, regional and global levels for net zero greenhouse gas emissions, emissions reductions and deforestation. (2) Issues related to carbon & other greenhouse gas pricing. (3) Issues related to high emitting technologies and activities and incentives for low carbon solutions. (4) Issues related to protection of nature. (5) Issues related to barriers to net zero transition and sustainable capitalism and sustainable investment. (6) Issues related to corporate sustainability disclosure, due diligence and transition planning, and sustainable investment regulation.",
"foreign_entity_issues": null,
"lobbyists": [
{
"lobbyist": {
"id": 145494,
"prefix": null,
"prefix_display": null,
"first_name": "GRACE",
"nickname": null,
"middle_name": null,
"last_name": "EDDY",
"suffix": null,
"suffix_display": null
},
"covered_position": null,
"new": true
}
],
"government_entities": []
},
{
"general_issue_code": "FUE",
"general_issue_code_display": "Fuel/Gas/Oil",
"description": "(1) Issues related to targets at sub-national, national, regional and global levels for net zero greenhouse gas emissions, emissions reductions and deforestation. (2) Issues related to carbon & other greenhouse gas pricing. (3) Issues related to high emitting technologies and activities and incentives for low carbon solutions. (4) Issues related to protection of nature. (5) Issues related to barriers to net zero transition and sustainable capitalism and sustainable investment. (6) Issues related to corporate sustainability disclosure, due diligence and transition planning, and sustainable investment regulation.",
"foreign_entity_issues": null,
"lobbyists": [
{
"lobbyist": {
"id": 145494,
"prefix": null,
"prefix_display": null,
"first_name": "GRACE",
"nickname": null,
"middle_name": null,
"last_name": "EDDY",
"suffix": null,
"suffix_display": null
},
"covered_position": null,
"new": true
}
],
"government_entities": []
},
{
"general_issue_code": "FIN",
"general_issue_code_display": "Financial Institutions/Investments/Securities",
"description": "(1) Issues related to targets at sub-national, national, regional and global levels for net zero greenhouse gas emissions, emissions reductions and deforestation. (2) Issues related to carbon & other greenhouse gas pricing. (3) Issues related to high emitting technologies and activities and incentives for low carbon solutions. (4) Issues related to protection of nature. (5) Issues related to barriers to net zero transition and sustainable capitalism and sustainable investment. (6) Issues related to corporate sustainability disclosure, due diligence and transition planning, and sustainable investment regulation.",
"foreign_entity_issues": null,
"lobbyists": [
{
"lobbyist": {
"id": 145494,
"prefix": null,
"prefix_display": null,
"first_name": "GRACE",
"nickname": null,
"middle_name": null,
"last_name": "EDDY",
"suffix": null,
"suffix_display": null
},
"covered_position": null,
"new": true
}
],
"government_entities": []
},
{
"general_issue_code": "FAM",
"general_issue_code_display": "Family issues/Abortion/Adoption",
"description": "(1) Issues related to targets at sub-national, national, regional and global levels for net zero greenhouse gas emissions, emissions reductions and deforestation. (2) Issues related to carbon & other greenhouse gas pricing. (3) Issues related to high emitting technologies and activities and incentives for low carbon solutions. (4) Issues related to protection of nature. (5) Issues related to barriers to net zero transition and sustainable capitalism and sustainable investment. (6) Issues related to corporate sustainability disclosure, due diligence and transition planning, and sustainable investment regulation.",
"foreign_entity_issues": null,
"lobbyists": [
{
"lobbyist": {
"id": 145494,
"prefix": null,
"prefix_display": null,
"first_name": "GRACE",
"nickname": null,
"middle_name": null,
"last_name": "EDDY",
"suffix": null,
"suffix_display": null
},
"covered_position": null,
"new": true
}
],
"government_entities": []
},
{
"general_issue_code": "ENV",
"general_issue_code_display": "Environment/Superfund",
"description": "(1) Issues related to targets at sub-national, national, regional and global levels for net zero greenhouse gas emissions, emissions reductions and deforestation. (2) Issues related to carbon & other greenhouse gas pricing. (3) Issues related to high emitting technologies and activities and incentives for low carbon solutions. (4) Issues related to protection of nature. (5) Issues related to barriers to net zero transition and sustainable capitalism and sustainable investment. (6) Issues related to corporate sustainability disclosure, due diligence and transition planning, and sustainable investment regulation.",
"foreign_entity_issues": null,
"lobbyists": [
{
"lobbyist": {
"id": 145494,
"prefix": null,
"prefix_display": null,
"first_name": "GRACE",
"nickname": null,
"middle_name": null,
"last_name": "EDDY",
"suffix": null,
"suffix_display": null
},
"covered_position": null,
"new": true
}
],
"government_entities": []
},
{
"general_issue_code": "ENG",
"general_issue_code_display": "Energy/Nuclear",
"description": "(1) Issues related to targets at sub-national, national, regional and global levels for net zero greenhouse gas emissions, emissions reductions and deforestation. (2) Issues related to carbon & other greenhouse gas pricing. (3) Issues related to high emitting technologies and activities and incentives for low carbon solutions. (4) Issues related to protection of nature. (5) Issues related to barriers to net zero transition and sustainable capitalism and sustainable investment. (6) Issues related to corporate sustainability disclosure, due diligence and transition planning, and sustainable investment regulation.",
"foreign_entity_issues": null,
"lobbyists": [
{
"lobbyist": {
"id": 145494,
"prefix": null,
"prefix_display": null,
"first_name": "GRACE",
"nickname": null,
"middle_name": null,
"last_name": "EDDY",
"suffix": null,
"suffix_display": null
},
"covered_position": null,
"new": true
}
],
"government_entities": []
},
{
"general_issue_code": "ECN",
"general_issue_code_display": "Economics/Economic Development",
"description": "(1) Issues related to targets at sub-national, national, regional and global levels for net zero greenhouse gas emissions, emissions reductions and deforestation. (2) Issues related to carbon & other greenhouse gas pricing. (3) Issues related to high emitting technologies and activities and incentives for low carbon solutions. (4) Issues related to protection of nature. (5) Issues related to barriers to net zero transition and sustainable capitalism and sustainable investment. (6) Issues related to corporate sustainability disclosure, due diligence and transition planning, and sustainable investment regulation.",
"foreign_entity_issues": null,
"lobbyists": [
{
"lobbyist": {
"id": 145494,
"prefix": null,
"prefix_display": null,
"first_name": "GRACE",
"nickname": null,
"middle_name": null,
"last_name": "EDDY",
"suffix": null,
"suffix_display": null
},
"covered_position": null,
"new": true
}
],
"government_entities": []
},
{
"general_issue_code": "CAW",
"general_issue_code_display": "Clean Air and Water (quality)",
"description": "(1) Issues related to targets at sub-national, national, regional and global levels for net zero greenhouse gas emissions, emissions reductions and deforestation. (2) Issues related to carbon & other greenhouse gas pricing. (3) Issues related to high emitting technologies and activities and incentives for low carbon solutions. (4) Issues related to protection of nature. (5) Issues related to barriers to net zero transition and sustainable capitalism and sustainable investment. (6) Issues related to corporate sustainability disclosure, due diligence and transition planning, and sustainable investment regulation.",
"foreign_entity_issues": null,
"lobbyists": [
{
"lobbyist": {
"id": 145494,
"prefix": null,
"prefix_display": null,
"first_name": "GRACE",
"nickname": null,
"middle_name": null,
"last_name": "EDDY",
"suffix": null,
"suffix_display": null
},
"covered_position": null,
"new": true
}
],
"government_entities": []
}
],
"conviction_disclosures": [],
"foreign_entities": [
{
"name": "GENERATION INVESTMENT MANAGEMENT LLP",
"contribution": "0.00",
"ownership_percentage": "100.00",
"address": "20 Air Street",
"city": "London",
"state": null,
"state_display": null,
"country": "GB",
"country_display": "United Kingdom",
"ppb_city": "London",
"ppb_state": null,
"ppb_state_display": null,
"ppb_country": "GB",
"ppb_country_display": "United Kingdom"
}
],
"affiliated_organizations": []
}
Exported to Datasette Lite: https://lite.datasette.io/?csv=https://gist.github.com/simonw/a029af7104178e449533fc95b4c4c529
Was inspired to add this feature to Datasette:
Example of lobbying_activities
column that includes government_entities
:
[
{
"general_issue_code": "INS",
"general_issue_code_display": "Insurance",
"description": "Section 11503 of the 2023 National Defense Authorization Act",
"foreign_entity_issues": "",
"lobbyists": [
{
"lobbyist": {
"id": 144910,
"prefix": "ms",
"prefix_display": "MS.",
"first_name": "EMILY",
"nickname": null,
"middle_name": "ELAINE",
"last_name": "COYLE",
"suffix": null,
"suffix_display": null
},
"covered_position": null,
"new": false
},
{
"lobbyist": {
"id": 143755,
"prefix": null,
"prefix_display": null,
"first_name": "MARGO",
"nickname": null,
"middle_name": null,
"last_name": "KLOSTERMAN",
"suffix": null,
"suffix_display": null
},
"covered_position": null,
"new": false
}
],
"government_entities": [
{
"id": 2,
"name": "HOUSE OF REPRESENTATIVES"
},
{
"id": 1,
"name": "SENATE"
}
]
}
]
Only one record has conviction_disclosures
:
[
{
"lobbyist": {
"id": 73797,
"prefix": null,
"prefix_display": null,
"first_name": "JACOB",
"nickname": null,
"middle_name": null,
"last_name": "WOHL",
"suffix": null,
"suffix_display": null
},
"date": "2022-11-30",
"description": "Illegal campaign robo calls, telecommunications fraud. Cuyahoga County, Ohio. Offense took place in September 2020. Section 2913.05 Ohio code"
},
{
"lobbyist": {
"id": 46860,
"prefix": "mr",
"prefix_display": "MR.",
"first_name": "JACK",
"nickname": null,
"middle_name": "MACAULEY",
"last_name": "BURKMAN",
"suffix": null,
"suffix_display": null
},
"date": "2022-11-30",
"description": "Illegal campaign robo calls, telecommunications fraud. Cuyahoga County, Ohio. Offense took place in September 2020. Section 2913.05 Ohio code"
}
]
registrant
column is always populated and always in this shape:
{
"id": 401107756,
"url": "https://lda.senate.gov/api/v1/registrants/401107756/",
"house_registrant_id": null,
"name": "MODERN FORTIS LLC",
"description": null,
"address_1": "700 12th Street NW",
"address_2": "Suite 700",
"address_3": null,
"address_4": null,
"city": "Washington",
"state": "DC",
"state_display": "District of Columbia",
"zip": "20005",
"country": "US",
"country_display": "United States of America",
"ppb_country": "US",
"ppb_country_display": "United States of America",
"contact_name": "JOSEPH CAMERON",
"contact_telephone": "+1 774-306-1300",
"dt_updated": "2024-02-20T13:25:07.968728-05:00"
}
Likewise client
:
{
"id": 57149,
"url": "https://lda.senate.gov/api/v1/clients/57149/",
"client_id": 57149,
"name": "DIVE EQUIPMENT & MARKETING ASSOCIATION",
"general_description": "Non-profit international trade association of scuba diving industry professionals",
"client_government_entity": null,
"client_self_select": false,
"state": "CA",
"state_display": "California",
"country": "US",
"country_display": "United States of America",
"ppb_state": "CA",
"ppb_state_display": "California",
"ppb_country": "US",
"ppb_country_display": "United States of America",
"effective_date": "2023-09-01"
}
foreign_entities
example:
[
{
"name": "MR. ITAI MENDELSOHN",
"contribution": "0.00",
"ownership_percentage": "6.61",
"address": "Ha-Yetsira ST 3, Floor 15",
"city": "Ramat Gan",
"state": null,
"state_display": null,
"country": "IL",
"country_display": "Israel",
"ppb_city": "Ramat Gan",
"ppb_state": null,
"ppb_state_display": null,
"ppb_country": "IL",
"ppb_country_display": "Israel"
},
{
"name": "MR. ROY AMIR",
"contribution": "0.00",
"ownership_percentage": "6.61",
"address": "Ha-Yetsira St 3, Floor 15",
"city": "Ramat Gan",
"state": null,
"state_display": null,
"country": "IL",
"country_display": "Israel",
"ppb_city": "Ramat Gan",
"ppb_state": null,
"ppb_state_display": null,
"ppb_country": "IL",
"ppb_country_display": "Israel"
},
{
"name": "MR. DOR SKULER",
"contribution": "0.00",
"ownership_percentage": "6.61",
"address": "Ha-Yetsira St 3, Floor 15",
"city": "Ramat Gan",
"state": null,
"state_display": null,
"country": "IL",
"country_display": "Israel",
"ppb_city": "Ramat Gan",
"ppb_state": null,
"ppb_state_display": null,
"ppb_country": "IL",
"ppb_country_display": "Israel"
}
]
affiliated_organizations
example:
[
{
"name": "COUNTY OF DUPAGE, IL",
"url": null,
"address_1": "421 North County Farm Road",
"address_2": null,
"address_3": null,
"address_4": null,
"city": "Wheaton",
"state": "IL",
"state_display": "Illinois",
"zip": "60187",
"country": "US",
"country_display": "United States of America",
"ppb_city": null,
"ppb_state": "IL",
"ppb_state_display": "Illinois",
"ppb_country": "US",
"ppb_country_display": "United States of America"
}
]
I think that's examples of everything now. I'm going to write a sqlite-utils
importer that tries to get this stuff all in tables.
I'll need many-to-many tables between filings and affiliated_organizations
and foreign_entities
and conviction_disclosures
(which will also foreign key to lobbyist
)
Looks like I need to pull this out into a separate table too:
{
"general_issue_code": "LBR",
"general_issue_code_display": "Labor Issues/Antitrust/Workplace",
"description": "(1) Issues related to targets at sub-national, national, regional and global levels for net zero greenhouse gas emissions, emissions reductions and deforestation. (2) Issues related to carbon & other greenhouse gas pricing. (3) Issues related to high emitting technologies and activities and incentives for low carbon solutions. (4) Issues related to protection of nature. (5) Issues related to barriers to net zero transition and sustainable capitalism and sustainable investment. (6) Issues related to corporate sustainability disclosure, due diligence and transition planning, and sustainable investment regulation."
}
That's the first set of keys in a lobbying_activities
record. I'm going to call that an issue_code
.
general_issue_code
-> code
general_issue_code_display
-> display
description
-> description
.
foreign_entity_issues
is occasionally present on a lobbying activity where it's either a blank string or null or a populated string:
This horrible query got me the lobbyist counts:
with activities as (
select value from json_each(lobbying_activities), raw
),
lobbyist_lists as (
select json_extract(value, '$.lobbyists') as l2 from activities
),
lobbyists as (
select json_extract(value, '$.lobbyist') as lobbyist from json_each(l2), lobbyist_lists
)
select lobbyist, count(*) from lobbyists group by lobbyist order by count(*) desc
lobbyist | count(*) |
---|---|
{"id":52794,"prefix":null,"prefix_display":null,"first_name":"JUSTIN","nickname":null,"middle_name":null,"last_name":"LEBLANC","suffix":null,"suffix_display":null} | 17 |
{"id":141889,"prefix":null,"prefix_display":null,"first_name":"STACY","nickname":null,"middle_name":null,"last_name":"MCBRIDE","suffix":null,"suffix_display":null} | 16 |
{"id":145494,"prefix":null,"prefix_display":null,"first_name":"GRACE","nickname":null,"middle_name":null,"last_name":"EDDY","suffix":null,"suffix_display":null} | 15 |
{"id":70784,"prefix":null,"prefix_display":null,"first_name":"ERIC","nickname":null,"middle_name":null,"last_name":"LAUSTEN","suffix":null,"suffix_display":null} | 14 |
{"id":145482,"prefix":null,"prefix_display":null,"first_name":"PETER","nickname":null,"middle_name":null,"last_name":"DEFAZIO","suffix":null,"suffix_display":null} | 13 |
{"id":145265,"prefix":null,"prefix_display":null,"first_name":"NADEGE","nickname":null,"middle_name":null,"last_name":"BLANC","suffix":null,"suffix_display":null} | 13 |
{"id":145264,"prefix":null,"prefix_display":null,"first_name":"JEAN","nickname":null,"middle_name":"ALI","last_name":"BLANC","suffix":null,"suffix_display":null} | 13 |
{"id":144980,"prefix":null,"prefix_display":null,"first_name":"TYLER","nickname":null,"middle_name":null,"last_name":"NEWMAN","suffix":null,"suffix_display":null} | 12 |
{"id":145465,"prefix":null,"prefix_display":null,"first_name":"KIM","nickname":null,"middle_name":null,"last_name":"CORBIN","suffix":null,"suffix_display":null} | 11 |
{"id":58561,"prefix":null,"prefix_display":null,"first_name":"MATHEW","nickname":null,"middle_name":null,"last_name":"LAPINSKI","suffix":null,"suffix_display":null} | 10 |
I may need to do something to help Datasette better display M2M stuff here. A very quick hack that lets you execute extra SQL queries in table cells to pull in related data perhaps?
I'm going to write a single Python function which, given a filing, inserts everything about it into a bunch of different tables - creating those tables if they do not exist.
Found one with different values for is_new
and covered_position
in lobbying_activities_lobbyist
:
Fun query:
select
last_name,
json_group_array(id || ': ' || name),
count(*)
from
lobbyists
group by
last_name
having
count(*) > 1
order by
count(*) desc
It looks like the really interesting data in the filings isn't available in the JSON - e.g. on https://lda.senate.gov/filings/public/filing/7466c7ee-8bd1-4496-94df-5d5322d25e2d/print/w
So I built a Val that can get it: https://simonw-pinkjellyfish.web.val.run/?url=https://lda.senate.gov/filings/public/filing/7466c7ee-8bd1-4496-94df-5d5322d25e2d/print/
const { TurndownService } = await import("npm:turndown");
export default async function(req: Request): Promise<Response> {
const searchParams = new URL(req.url).searchParams;
const url = searchParams.get("url");
const response = await fetch(url);
const html = await response.text();
const issues =
html.split("<p>16. Specific lobbying issues</p>")[1].split("17. House(s) of Congress and Federal agencies")[0];
const markdown = new TurndownService().turndown(issues);
return Response.json({ content: markdown });
}
Updated that to return just plain text:
const markdown = new turndown.default().turndown(issues);
return new Response(markdown);
}
Here's my Python so far:
all_docs = []
for file in path.glob("2024/*.json"):
data = json.load(open(file))["results"]
all_docs.extend(data)
def lobbyist_string(lobbyist, include_id=True):
bits = []
if include_id:
bits.append(str(lobbyist["id"]))
for key in ("prefix_display", "first_name", "nickname", "middle_name", "last_name", "suffix_display"):
value = lobbyist.get(key, "")
if value:
bits.append(str(value))
return " ".join(bits)
def insert_filing(db, filing):
filing = copy.deepcopy(filing)
client = filing.pop("client")
registrant = filing.pop("registrant")
filing["registrant_id"] = registrant["id"]
filing["client_id"] = client["id"]
db["clients"].insert(client, pk="id", alter=True, replace=True)
db["registrants"].insert(registrant, pk="id", alter=True, replace=True)
lobbying_activities = filing.pop("lobbying_activities")
filing_id = filing["filing_uuid"]
db["filings"].insert(
filing,
pk="filing_uuid",
foreign_keys=(
("client_id", "clients", "id"),
("registrant_id", "registrants", "id"),
),
alter=True,
replace=True,
column_order=("client_id", "registrant_id"),
)
# The lobbying activities are the most complicated bit
for lobbying_activity in lobbying_activities:
# general_issue_code/general_issue_code_display/description becomes issue_code
issue_code = {
"code": lobbying_activity["general_issue_code"],
"name": lobbying_activity["general_issue_code_display"],
"description": lobbying_activity["description"],
}
db["issue_codes"].insert(issue_code, pk="code", replace=True)
foreign_entity_issues = lobbying_activity.get("foreign_entity_issues") or ""
government_entities = lobbying_activity.get("government_entities") or []
lobbyists = lobbying_activity.get("lobbyists") or []
lobbying_activity_id = db["lobbying_activities"].insert({
"filing_id": filing_id,
"issue_code": issue_code["code"],
"lobbyists": [lobbyist_string(lobbyist["lobbyist"]) for lobbyist in lobbyists],
"government_entities": [ent["name"] for ent in government_entities],
"foreign_entity_issues": foreign_entity_issues,
}, pk="id", alter=True, foreign_keys=(
("filing_id", "filings", "filing_uuid"),
("issue_code", "issue_codes", "code"),
)).last_pk
# Insert the lobbyist and relate it back to both tables
for lobbyist in lobbyists:
covered_position = lobbyist["covered_position"]
is_new = lobbyist["new"]
db["lobbyists"].insert(
dict(lobbyist["lobbyist"], name=lobbyist_string(lobbyist["lobbyist"], include_id=False)),
pk="id", replace=True, alter=True
)
lobbyist_id = lobbyist["lobbyist"]["id"]
db["lobbying_activities_lobbyist"].insert({
"lobbyist_id": lobbyist_id,
"lobbying_activity_id": lobbying_activity_id,
"filing_id": filing_id,
"is_new": is_new,
"covered_position": covered_position,
}, alter=True, pk="id", foreign_keys=(
("lobbyist_id", "lobbyists", "id"),
("lobbying_activity_id", "lobbying_activities", "id"),
("filing_id", "filings", "filing_uuid"),
))
db = sqlite_utils.Database("/tmp/filings.db")
for table in db.table_names():
db[table].drop()
for doc in all_docs:
insert_filing(db, doc)
The report I created a Val for looks to be in a pretty rare format - I found https://lda.senate.gov/filings/public/filing/bbeaaa0f-187c-4a9f-8a30-31ba78cc242f/print/ with the same format (possibly filing_type
of 1A
) but no others.
This could be useful:
curl 'https://lda.senate.gov/filings/public/filing/00ff141a-e343-4a04-9b37-39d63d1a81d6/print/' | strip-tags -m
Strips tags and minifies a bit. Might be good input to an LLM.
Using data from this API: https://lda.senate.gov/api/