roberthsu2003 / __2024_07_29_1_3_night__

2024_07_29_1_3_巨匠AI
22 stars 2 forks source link

請使用以下的dataFrame,並且使用py檔。 #15

Open roberthsu2003 opened 6 days ago

roberthsu2003 commented 6 days ago
import os
import pandas as pd
import json

def parse_data(filename):
    df1 = pd.read_csv(filename)
    df2 = df1.rename(columns={"trnOpDate":"乘車日",
            "staCode":"車站代碼",
            "gateInComingCnt":"進站人數",
            "gateOutGoingCnt":"出站人數"})
    with open('車站基本資料集.json',encoding='utf-8') as file:
        json_data = json.load(file)
        stations_info = pd.DataFrame(json_data,columns=['stationCode','stationName'])
        stations_info = stations_info.astype({'stationCode':'int32'})
    df3 = pd.merge(df2,stations_info,left_on='車站代碼',right_on='stationCode')
    df4 = df3.rename(columns={'stationName':'車站名稱'})
    df5 = df4.reindex(columns=['乘車日','車站名稱','進站人數','出站人數'])
    df5['乘車日'] = pd.to_datetime(df5['乘車日'].astype(str))
    return df5

def main():
    current_dir = os.getcwd()
    csv_dir = os.path.join(current_dir,'每日各站進出站人數')
    files_and_dirs = os.listdir(csv_dir)
    all_csv_files = [filename for filename in files_and_dirs if '每日各站進出站人數' in filename]
    sorted_csv_files = sorted(all_csv_files)
    all_final_dfs = [] 
    for filename in sorted_csv_files:
        abs_filename = os.path.join(csv_dir,filename)
        all_final_dfs.append(parse_data(abs_filename))
    df_done = pd.concat(all_final_dfs)
    df_done1 = df_done.set_index('乘車日')
    print(df_done1.info())

if __name__ == '__main__':
    main()

取出所有基隆的資料

取出2021年所有台北車站的資料

FriendlyLu commented 6 days ago
# 請使用以下的dataFrame,並且使用py檔。 #15
# 取出所有基隆的資料
# 取出2021年所有台北車站的資料

import os
import pandas as pd
import json

def parse_data(filename):
    df1 = pd.read_csv(filename)
    df2 = df1.rename(columns={"trnOpDate":"乘車日",
            "staCode":"車站代碼",
            "gateInComingCnt":"進站人數",
            "gateOutGoingCnt":"出站人數"})
    with open('車站基本資料集.json',encoding='utf-8') as file:
        json_data = json.load(file)
        stations_info = pd.DataFrame(json_data,columns=['stationCode','stationName'])
        stations_info = stations_info.astype({'stationCode':'int32'})
    df3 = pd.merge(df2,stations_info,left_on='車站代碼',right_on='stationCode')
    df4 = df3.rename(columns={'stationName':'車站名稱'})
    df5 = df4.reindex(columns=['乘車日','車站名稱','進站人數','出站人數'])
    df5['乘車日'] = pd.to_datetime(df5['乘車日'].astype(str))
    return df5

def main():
    current_dir = os.getcwd()
    csv_dir = os.path.join(current_dir,'每日各站進出站人數')
    files_and_dirs = os.listdir(csv_dir)
    all_csv_files = [filename for filename in files_and_dirs if '每日各站進出站人數' in filename]
    sorted_csv_files = sorted(all_csv_files)
    all_final_dfs = [] 
    for filename in sorted_csv_files:
        abs_filename = os.path.join(csv_dir,filename)
        all_final_dfs.append(parse_data(abs_filename))
    df_done = pd.concat(all_final_dfs)
    df_done1 = df_done.set_index('乘車日')
    print(df_done1.info())

    #取出所有基隆的資料
    df_keelung = df_done1.query('車站名稱 == "基隆"')
    print(df_keelung.info())

    #取出所有 2021 年所有台北車站的資料
    df_taipei_2021 = (df_done1.loc["2021-01-01":"2021-12-31"]).query('車站名稱 == "臺北"')
    print(df_taipei_2021.info())

if __name__ == '__main__':
    main()
yawenny commented 5 days ago
import os
import pandas as pd
import json

def parse_data(filename):
    df1 = pd.read_csv(filename)
    df2 = df1.rename(columns={"trnOpDate":"乘車日",
            "staCode":"車站代碼",
            "gateInComingCnt":"進站人數",
            "gateOutGoingCnt":"出站人數"})
    with open('車站基本資料集.json',encoding='utf-8') as file:
        json_data = json.load(file)
        stations_info = pd.DataFrame(json_data,columns=['stationCode','stationName'])
        stations_info = stations_info.astype({'stationCode':'int32'})
    df3 = pd.merge(df2,stations_info,left_on='車站代碼',right_on='stationCode')
    df4 = df3.rename(columns={'stationName':'車站名稱'})
    df5 = df4.reindex(columns=['乘車日','車站名稱','進站人數','出站人數'])
    df5['乘車日'] = pd.to_datetime(df5['乘車日'].astype(str))
    return df5

def main():
    current_dir = os.getcwd()
    csv_dir = os.path.join(current_dir,'每日各站進出站人數')
    files_and_dirs = os.listdir(csv_dir)
    all_csv_files = [filename for filename in files_and_dirs if '每日各站進出站人數' in filename]
    sorted_csv_files = sorted(all_csv_files)
    all_final_dfs = [] 
    for filename in sorted_csv_files:
        abs_filename = os.path.join(csv_dir,filename)
        all_final_dfs.append(parse_data(abs_filename))
    df_done = pd.concat(all_final_dfs)
    df_done1 = df_done.set_index('乘車日')
    #print(df_done1.info())
    #print(df_done1)
    df_kl = df_done1.query('車站名稱=="基隆"')
    print(df_kl)
    df_tp = df_done1.query('車站名稱=="臺北"')
    #print(df_tp)
    df_tp_2021 = df_tp.loc['2021']
    print(df_tp_2021)

if __name__ == '__main__':
    main()
Oscar-Lee-20211298 commented 5 days ago
import os
import pandas as pd
import json

def parse_data(filename):
    df1 = pd.read_csv(filename)
    df2 = df1.rename(columns={"trnOpDate":"乘車日",
            "staCode":"車站代碼",
            "gateInComingCnt":"進站人數",
            "gateOutGoingCnt":"出站人數"})
    with open('車站基本資料集.json',encoding='utf-8') as file:
        json_data = json.load(file)
        stations_info = pd.DataFrame(json_data,columns=['stationCode','stationName'])
        stations_info = stations_info.astype({'stationCode':'int32'})
    df3 = pd.merge(df2,stations_info,left_on='車站代碼',right_on='stationCode')
    df4 = df3.rename(columns={'stationName':'車站名稱'})
    df5 = df4.reindex(columns=['乘車日','車站名稱','進站人數','出站人數'])
    df5['乘車日'] = pd.to_datetime(df5['乘車日'].astype(str))
    return df5

def main():
    current_dir = os.getcwd()
    csv_dir = os.path.join(current_dir,'每日各站進出站人數')
    files_and_dirs = os.listdir(csv_dir)
    all_csv_files = [filename for filename in files_and_dirs if '每日各站進出站人數' in filename]
    sorted_csv_files = sorted(all_csv_files)
    all_final_dfs = [] 
    for filename in sorted_csv_files:
        abs_filename = os.path.join(csv_dir,filename)
        all_final_dfs.append(parse_data(abs_filename))
    df_done = pd.concat(all_final_dfs)
    df_done1 = df_done.set_index('乘車日')
    print(df_done1.info())

    # 取出所有基隆的資料
    keelung_data = df_done1[df_done1['車站名稱'] == '基隆']

    # 取出2021年所有台北車站的資料
    taipei_2021_data = df_done1[(df_done1['車站名稱'] == '台北') & (df_done1.index.year == 2021)]

    print("基隆的資料:")
    print(keelung_data.info())

    print("2021年台北車站的資料:")
    print(taipei_2021_data.info())

if __name__ == '__main__':
    main()
grace588 commented 1 day ago
import os
import pandas as pd
import json

def parse_data(filename):
    df1 = pd.read_csv(filename)
    df2 = df1.rename(columns={"trnOpDate": "乘車日",
                              "staCode": "車站代碼",
                              "gateInComingCnt": "進站人數",
                              "gateOutGoingCnt": "出站人數"})
    with open('車站基本資料集.json', encoding='utf-8') as file:
        json_data = json.load(file)
        stations_info = pd.DataFrame(json_data, columns=['stationCode', 'stationName'])
        stations_info = stations_info.astype({'stationCode': 'int32'})
    df3 = pd.merge(df2, stations_info, left_on='車站代碼', right_on='stationCode')
    df4 = df3.rename(columns={'stationName': '車站名稱'})
    df5 = df4.reindex(columns=['乘車日', '車站名稱', '進站人數', '出站人數'])
    df5['乘車日'] = pd.to_datetime(df5['乘車日'].astype(str))
    return df5

def main():
    current_dir = os.getcwd()
    csv_dir = os.path.join(current_dir, '每日各站進出站人數')
    files_and_dirs = os.listdir(csv_dir)
    all_csv_files = [filename for filename in files_and_dirs if '每日各站進出站人數' in filename]
    sorted_csv_files = sorted(all_csv_files)
    all_final_dfs = [] 
    for filename in sorted_csv_files:
        abs_filename = os.path.join(csv_dir, filename)
        all_final_dfs.append(parse_data(abs_filename))
    df_done = pd.concat(all_final_dfs)
    df_done1 = df_done.set_index('乘車日')

    # 1. 取出基隆的資料
    keelung_data = df_done1[df_done1['車站名稱'] == '基隆']
    print("基隆資料:")
    print(keelung_data)

    # 2. 取出2021年台北車站的資料
    taipei_2021_data = df_done1[(df_done1['車站名稱'] == '台北車站') & (df_done1.index.year == 2021)]
    print("2021年台北車站資料:")
    print(taipei_2021_data)

if __name__ == '__main__':
    main()