jtleider / censusdata

Download data from Census API
MIT License
139 stars 29 forks source link

SSLError: HTTPSConnectionPool(host='api.census.gov', port=443): Max retries exceeded with url: #11

Closed MorganWeiss closed 4 years ago

MorganWeiss commented 4 years ago

I am getting this error now when I am trying to download data with your api:

`SSLError: HTTPSConnectionPool(host='api.census.gov', port=443): Max retries exceeded with url: /data/2012/acs/acs5?get=NAME,B24123_377E,B24123_378E,B24123_379E,B24123_380E,B24123_381E,B24123_382E,B24123_383E,B24123_384E,B24123_385E,B24123_386E,B24123_387E,B24123_388E,B24123_389E,B24123_390E,B24123_391E,B24123_392E,B24123_393E,B24123_394E,B24123_395E,B24123_396E,B24123_397E,B24123_398E,B24123_399E,B24123_400E,B24123_401E,B24123_402E,B24123_403E,B24123_404E,B24123_405E,B24123_406E,B24123_407E,B24123_408E,B24123_409E,B24123_410E,B24123_411E,B24123_412E,B24123_413E,B24123_414E,B24123_415E,B24123_416E,B24123_417E,B24123_418E,B24123_419E,B24123_420E,B24123_421E,B24123_422E,B24123_423E,B24123_424E,B24123_425E&for=tract:&in=state:01+county:&key=e39a53c23358c749629da6f31d8f03878d4088d6 (Caused by SSLError(SSLError("bad handshake: Error([('SSL routines', 'tls_process_server_certificate', 'certificate verify failed')])")))

I tried the same code on another computer and it worked fine but I need it to work on this computer. How do I fix this?

Here is my entire code:

import pandas as pd
import censusdata
pd.set_option('display.expand_frame_repr', False)
pd.set_option('display.precision', 2)
#import statsmodels.formula.api as sm
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
import censusgeocode as cg
import numpy as np
from numbers import Number
import plotly
import matplotlib.pyplot as plt
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
import requests
import pandas
import geopandas
import json
import math
from haversine import haversine
from ipfn import ipfn
import networkx
from matplotlib import pyplot
from matplotlib import patheffects
from shapely.geometry import LineString, MultiLineString

variable_list1 = [
'B24123_377E',
'B24123_378E',
'B24123_379E',
'B24123_380E',
'B24123_381E',
'B24123_382E',
'B24123_383E',
'B24123_384E',
'B24123_385E',
'B24123_386E',
'B24123_387E',
'B24123_388E',
'B24123_389E',
'B24123_390E',
'B24123_391E',
'B24123_392E',
'B24123_393E',
'B24123_394E',
'B24123_395E',
'B24123_396E',
'B24123_397E',
'B24123_398E',
'B24123_399E',
'B24123_400E',
'B24123_401E',
'B24123_402E',
'B24123_403E',
'B24123_404E',
'B24123_405E',
'B24123_406E',
'B24123_407E',
'B24123_408E',
'B24123_409E',
'B24123_410E',
'B24123_411E',
'B24123_412E',
'B24123_413E',
'B24123_414E',
'B24123_415E',
'B24123_416E',
'B24123_417E',
'B24123_418E',
'B24123_419E',
'B24123_420E',
'B24123_421E',
'B24123_422E',
'B24123_423E',
'B24123_424E',
'B24123_425E',
'B24123_426E',
'B24123_427E',
'B24123_428E',
'B24123_429E',
'B24123_430E',
'B24123_431E',
'B24123_432E',
'B24123_433E',
'B24123_434E',
'B24123_435E',
'B24123_436E',
'B24123_437E',
'B24123_438E',
'B24123_439E',
'B24123_440E',
'B24123_441E',
'B24123_442E',
'B24123_443E',
'B24123_444E',
'B24123_445E',
'B24123_446E',
'B24123_447E',
'B24123_448E',
'B24123_449E',
'B24123_450E',
'B24123_451E',
'B24123_452E',
'B24123_453E',
'B24123_454E',
'B24123_455E',
'B24123_456E',
'B24123_457E',
'B24123_458E',
'B24123_459E',
'B24123_460E',
'B24123_461E',
'B24123_462E',
'B24123_463E',
'B24123_464E',
'B24123_465E',
'B24123_466E',
'B24123_467E',
'B24123_468E',
'B24123_469E',
'B24123_470E',
'B24123_471E',
'B24123_472E',
'B24123_473E',
'B24123_474E',
'B24123_475E',
'B24123_476E',
'B24123_477E',
'B24123_478E',
'B24123_479E',
'B24123_480E',
'B24123_481E',
'B24123_482E',
'B24123_483E',
'B24123_484E',
'B24123_485E',
'B24123_486E',
'B24123_487E',
'B24123_488E',
'B24123_489E',
'B24123_490E',
'B24123_491E',
'B24123_492E',
'B24123_493E',
'B24123_494E',
'B24123_495E',
'B24123_496E',
'B24123_497E',
'B24123_498E',
'B24123_499E',
'B24123_500E',
'B24123_501E',
'B24123_502E',
'B24123_503E',
'B24123_504E',
'B24123_505E',
'B24123_506E',
'B24123_507E',
'B24123_508E',
'B24123_509E',
'B24123_510E',
'B24123_511E',
'B24123_512E',
'B24123_513E',
'B24123_514E',
'B24123_515E',
'B24123_516E',
'B24123_517E',
'B24123_518E',
'B24123_519E',
'B24123_520E',
'B24123_521E',
'B24123_522E',
'B24123_523E',
'B24123_524E',
'B24123_525E',
'B24123_526E',
'B24124_001E',
'B24124_002E',
'B24124_003E',
'B24124_004E',
'B24124_005E',
'B24124_006E',
'B24124_007E',
'B24124_008E',
'B24124_009E',
'B24124_010E',
'B24124_011E',
'B24124_012E',
'B24124_013E',
'B24124_014E',
'B24124_015E',
'B24124_016E',
'B24124_017E',
'B24124_018E',
'B24124_019E',
'B24124_020E',
'B24124_021E',
'B24124_022E',
'B24124_023E',
'B24124_024E',
'B24124_025E',
'B24124_026E',
'B24124_027E',
'B24124_028E',
'B24124_029E',
'B24124_030E',
'B24124_031E',
'B24124_032E',
'B24124_033E',
'B24124_034E',
'B24124_035E',
'B24124_036E',
'B24124_037E',
'B24124_038E',
'B24124_039E',
'B24124_040E',
'B24124_041E',
'B24124_042E',
'B24124_043E',
'B24124_044E',
'B24124_045E',
'B24124_046E',
'B24124_047E',
'B24124_048E',
]
variable_list2 = [
'B24124_049E',
'B24124_050E',
'B24124_051E',
'B24124_052E',
'B24124_053E',
'B24124_054E',
'B24124_055E',
'B24124_056E',
'B24124_057E',
'B24124_058E',
'B24124_059E',
'B24124_060E',
'B24124_061E',
'B24124_062E',
'B24124_063E',
'B24124_064E',
'B24124_065E',
'B24124_066E',
'B24124_067E',
'B24124_068E',
'B24124_069E',
'B24124_070E',
'B24124_071E',
'B24124_072E',
'B24124_073E',
'B24124_074E',
'B24124_075E',
'B24124_076E',
'B24124_077E',
'B24124_078E',
'B24124_079E',
'B24124_080E',
'B24124_081E',
'B24124_082E',
'B24124_083E',
'B24124_084E',
'B24124_085E',
'B24124_086E',
'B24124_087E',
'B24124_088E',
'B24124_089E',
'B24124_090E',
'B24124_091E',
'B24124_092E',
'B24124_093E',
'B24124_094E',
'B24124_095E',
'B24124_096E',
'B24124_097E',
'B24124_098E',
'B24124_099E',
'B24124_100E',
'B24124_101E',
'B24124_102E',
'B24124_103E',
'B24124_104E',
'B24124_105E',
'B24124_106E',
'B24124_107E',
'B24124_108E',
'B24124_109E',
'B24124_110E',
'B24124_111E',
'B24124_112E',
'B24124_113E',
'B24124_114E',
'B24124_115E',
'B24124_116E',
'B24124_117E',
'B24124_118E',
'B24124_119E',
'B24124_120E',
'B24124_121E',
'B24124_122E',
'B24124_123E',
'B24124_124E',
'B24124_125E',
'B24124_126E',
'B24124_127E',
'B24124_128E',
'B24124_129E',
'B24124_130E',
'B24124_131E',
'B24124_132E',
'B24124_133E',
'B24124_134E',
'B24124_135E',
'B24124_136E',
'B24124_137E',
'B24124_138E',
'B24124_139E',
'B24124_140E',
'B24124_141E',
'B24124_142E',
'B24124_143E',
'B24124_144E',
'B24124_145E',
'B24124_146E',
'B24124_147E',
'B24124_148E',
'B24124_149E',
'B24124_150E',
'B24124_151E',
'B24124_152E',
'B24124_153E',
'B24124_154E',
'B24124_155E',
'B24124_156E',
'B24124_157E',
'B24124_158E',
'B24124_159E',
'B24124_160E',
'B24124_161E',
'B24124_162E',
'B24124_163E',
'B24124_164E',
'B24124_165E',
'B24124_166E',
'B24124_167E',
'B24124_168E',
'B24124_169E',
'B24124_170E',
'B24124_171E',
'B24124_172E',
'B24124_173E',
'B24124_174E',
'B24124_175E',
'B24124_176E',
'B24124_177E',
'B24124_178E',
'B24124_179E',
'B24124_180E',
'B24124_181E',
'B24124_182E',
'B24124_183E',
'B24124_184E',
'B24124_185E',
'B24124_186E',
'B24124_187E',
'B24124_188E',
'B24124_189E',
'B24124_190E',
'B24124_191E',
'B24124_192E',
'B24124_193E',
'B24124_194E',
'B24124_195E',
'B24124_196E',
'B24124_197E',
'B24124_198E',
'B24124_199E',
'B24124_200E',
'B24124_201E',
'B24124_202E',
'B24124_203E',
'B24124_204E',
'B24124_205E',
'B24124_206E',
'B24124_207E',
'B24124_208E',
'B24124_209E',
'B24124_210E',
'B24124_211E',
'B24124_212E',
'B24124_213E',
'B24124_214E',
'B24124_215E',
'B24124_216E',
'B24124_217E',
'B24124_218E',
'B24124_219E',
'B24124_220E',
'B24124_221E',
'B24124_222E',
'B24124_223E',
'B24124_224E',
'B24124_225E',
'B24124_226E',
'B24124_227E',
'B24124_228E',
'B24124_229E',
'B24124_230E',
'B24124_231E',
'B24124_232E',
'B24124_233E',
'B24124_234E',
'B24124_235E',
'B24124_236E',
'B24124_237E',
'B24124_238E',
'B24124_239E',
'B24124_240E',
'B24124_241E',
'B24124_242E',
'B24124_243E',
'B24124_244E',
'B24124_245E',
'B24124_246E',
'B24124_247E',
'B24124_248E',
'B24124_249E',
'B24124_250E',
'B24124_251E',
]

all_variable_lists = [variable_list1, variable_list2]
print(len(all_variable_lists[0]))
#2) For each year, download the relevant variables for each tract
def download_year(year,variable_list,State,County,Tract):
    df = censusdata.download('acs5', year, censusdata.censusgeo([('state',State),('county',County),('tract',Tract)]), variable_list, key = 'e39a53c23358c749629da6f31d8f03878d4088d6')
    df['Year']=str(year)
    return df
#3) Define function to download for a single year and state 
def callback_arg(i,variable_list,year):
    try:        
        print('Downloading - ',year,'State', i,' of 57')
        if i<10:
            df = download_year(year,variable_list,'0'+str(i),'*','*')
            return df
        if i==51:
            df = download_year(year,variable_list,str(i),'*','*')
            return df
        else:
            df = download_year(year,variable_list,str(i),'*','*')
            return df
    except:
        pass

#3) Function to download for all states and all years, do some slight formatting
def download_all_data(variable_list,max_year):
    df=download_year(2012,variable_list,'01','*','*')
    for year in range(2012,max_year+1):
        if year == 2012:
            for i in range(0,57):
                df=df.append(callback_arg(i,variable_list,year))
        else: 
            for i in range(0,57):
                df=df.append(callback_arg(i,variable_list,year))
    df2=df.reset_index()
    df2=df2.rename(columns = {"index": "Location+Type"}).astype(str)
    df2['state']=df2["Location+Type"].str.split(':').str[0].str.split(', ').str[2]
    df2['Census_tract']=df2["Location+Type"].str.split(':').str[0].str.split(',').str[0].str.split(' ').str[2][0]
    df2['County_name']=df2["Location+Type"].str.split(':').str[0].str.split(', ').str[1]
    return(df2)
#4) Some slight formatting
def write_to_csv(df2,name = 'Employment Data Part 9'):
    df2.to_csv(name)
#5) The line below is commented out, but should run the entire download sequence

list_of_dfs = []
for var_list in all_variable_lists:
    list_of_dfs.append(download_all_data(var_list, 2018))
x1 = list_of_dfs[0].reset_index()
x2 = list_of_dfs[1].reset_index()
x3 = pd.merge(x1,x2, on=['index','Location+Type','Year','state','Census_tract','County_name'])
write_to_csv(x3)
`
jtleider commented 4 years ago

It looks like this may be an issue with the networking setup or SSL certificates on the machine. This does not look like an issue that would be specific to the censusdata package, which relies on the requests package for contacting the census API.

You could try manually going to the API in a web browser and seeing if you still get an error. If not, if you send me exactly where in the code you sent this error comes up I may be able to look into this further, although I suspect I will have a limited ability to troubleshoot this issue.