Open guanxin522 opened 7 years ago
根据课本使用pip install pandas 安装pandas应该是下了最新版,库里面有些东西就被弃用了
根据作者的思路重新写的(主函数路口那注意一下,上传文本出了问题)
from future import print_function import pandas as pd from collections import OrderedDict from pandas import Series,DataFrame import numpy as np import sys reload(sys) sys.setdefaultencoding("utf-8")
def read_data(): excel_content=pd.read_excel('A0201.xls',skiprows=2)
race_list=excel_content.iloc[0,:][1:][::3].tolist()
# print(excel_content.iloc[0,:][1:][::3])
# print(excel_content.head())
# race_list=(excel_content.ix[1:,::3]).values.tolist()
# print((excel_content.ix[1:,::3]).values.tolist())
# print((excel_content.ix[0:1,::3]))
# for m in race_list:
# print (m.decode('utf-8'))
# print(race_list)
# print(excel_content.iloc[:,0])
age_list=map(lambda x:str(x).replace(' ',''),excel_content.iloc[:,0][2:].tolist())
# for m in age_list:
# print(m.decode('utf-8'))
# print(age_list)
excel_content=pd.read_excel('A0201.xls',skiprows=4)
# print(excel_content)
# r_dict=OrderedDict()
# for i,j in excel_content.iloc[:,[4,5,6]].to_dict().items():
# new_v_dict=OrderedDict()
# for vi,vj in j.items():
# # print(vi,vj)
# new_v_dict[age_list[int(vi)]]=vj
# r_dict[i.split('.',1)[0]]=new_v_dict
# # print(new_v_dict)
# print(json.dumps(r_dict,ensure_ascii=False,indent=4))
def get_number(lines):
ret_dict=OrderedDict()
# print(lines.to_dict())
for k,v in lines.to_dict('dict').items():
new_v_dict=OrderedDict()
for vk,vv in v.items():
new_v_dict[age_list[int(vk)]]=vv
ret_dict[k.split('.',1)[0]]=new_v_dict
return ret_dict
result_dict=OrderedDict()
for i,x in enumerate(range(1,178,3)):
ids=[x,x+1,x+2]
# print(ids)
race_list[i]=race_list[i].replace(' ','')
# print(race_list[i])
result_dict[str(race_list[i])]=get_number(excel_content.iloc[:,ids])
# print (race_list[1])
# for k,v in result_dict.items():
# print(k,v)
return result_dict
def calc_mean(d): total=0 total_age=0 for age,count in d.items(): if age.isdigit(): total+=count #统计所累加的人口数 total_age+=int(age)*count return total_age/float(total)
if name =='main':
import json
#将json数据写入到.json文件中
with open("record.json","w") as f:
f.write(json.dumps(read_data(),ensure_ascii=False,indent=4))
def read_excel(): """读取人口普查分民族/年龄/性别统计 """ excel_content = pd.read_excel("A0201.xls", skiprows=2) race_list = excel_content.irow(0)[1:][::3].tolist()
去掉字符中间的空格
这是作者给出8.1.1节的代码,运行不了报了AttributeError: 'DataFrame' object has no attribute 'irow' ,查了原来是irow和icol属性被弃用了。看了官方的文档,是用iloc代替,文档全英文的我好难看懂啊,换了iloc之后运行又出错'DataFrame' object has no attribute 'tolist' ,还是运行不了。还有注释太少了,这本书属于入门书,注释少看不懂啊,excel_content.irow(0)[1:][::3].tolist(),这里也看不懂,书前面没有讲lambda,代码写了却没有对它加个注释,看起来很困难。这段代码运行不起来,第8章后面也运行不起来,希望作者及时更正下。