parth1902 / Scrape-FBref-data

To scrape data from fbref.com provided by StatsBomb
115 stars 44 forks source link

None type has no attribute text #4

Open NADoebler opened 1 year ago

NADoebler commented 1 year ago

Hi there,

I am getting the following error:

` --------------------------------------------------------------------------- AttributeError Traceback (most recent call last) c:\Users**\Work Folders\Documents\Fußball\Scrapping.ipynb Cell 8 in <cell line: 1>() ----> [1](vscode-notebook-cell:/c%3A/Users/**/Work%20Folders/Documents/Fu%C3%9Fball/Scrapping.ipynb#X10sZmlsZQ%3D%3D?line=0) BL_teams_1 = get_team_data('https://fbref.com/en/comps/20/','/Bundesliga-Stats', 'for')

c:\Users**\Work Folders\Documents\Fußball\Scrapping.ipynb Cell 8 in get_team_data(top, end, text) 158 def get_team_data(top,end,text): --> [159](vscode-notebook-cell:/c%3A/Users/***/Work%20Folders/Documents/Fu%C3%9Fball/Scrapping.ipynb#X10sZmlsZQ%3D%3D?line=158) df1 = frame_for_category_team('stats',top,end,stats3,text) 160 df2 = frame_for_category_team('keepers',top,end,keepers3,text) 161 df3 = frame_for_category_team('keepersadv',top,end,keepersadv2,text)

c:\Users**\Work Folders\Documents\Fußball\Scrapping.ipynb Cell 8 in frame_for_category_team(category, top, end, features, text) 123 url = (top + category + end) 124 player_table, team_table = get_tables(url,text) --> [125](vscode-notebook-cell:/c%3A/Users/***/Work%20Folders/Documents/Fu%C3%9Fball/Scrapping.ipynb#X10sZmlsZQ%3D%3D?line=124) df_team = get_frame_team(features, team_table) 126 return df_team

c:\Users**\Work Folders\Documents\Fußball\Scrapping.ipynb Cell 8 in get_frame_team(features, team_table) 94 for row in rows_squad: [95](vscode-notebook-cell:/c%3A/Users/*****Work%20Folders/Documents/Fu%C3%9Fball/Scrapping.ipynb#X10sZmlsZQ%3D%3D?line=94) if(row.find('th',{"scope":"row"}) != None): ---> 96 name = row.find('th',{"data-stat":"squad"}).text.strip().encode().decode("utf-8") 97 if 'squad' in pre_df_squad: 98 pre_df_squad['squad'].append(name)

AttributeError: 'NoneType' object has no attribute 'text' `

Flow791 commented 1 year ago

Fbref have renamed some stats.

I think all you have to do is use those stats name in Scrape_FBref.ipynb :

#standard(stats)
stats = ["player","nationality","position","team","age","birth_year","games","games_starts","minutes","goals","assists","pens_made","pens_att","cards_yellow","cards_red","goals_per90","assists_per90","goals_assists_per90","goals_pens_per90","goals_assists_pens_per90","xg","npxg","xa","xg_per90","xa_per90","xg_xa_per90","npxg_per90","npxg_xa_per90"]
stats3 = ["players_used","possession","games","games_starts","minutes","goals","assists","pens_made","pens_att","cards_yellow","cards_red","goals_per90","assists_per90","goals_assists_per90","goals_pens_per90","goals_assists_pens_per90","xg","npxg","xa","xg_per90","xa_per90","xg_xa_per90","npxg_per90","npxg_xa_per90"] 
#goalkeeping(keepers)
keepers = ["player","nationality","position","team","age","birth_year","gk_games","gk_games_starts","gk_minutes","gk_goals_against","gk_goals_against_per90","gk_shots_on_target_against","gk_saves","gk_save_pct","gk_wins","gk_ties","gk_losses","gk_clean_sheets","gk_clean_sheets_pct","gk_pens_att","gk_pens_allowed","gk_pens_saved","gk_pens_missed"]
keepers3 = ["players_used","gk_games","gk_games_starts","gk_minutes","gk_goals_against","gk_goals_against_per90","gk_shots_on_target_against","gk_saves","gk_save_pct","gk_wins","gk_ties","gk_losses","gk_clean_sheets","gk_clean_sheets_pct","gk_pens_att","gk_pens_allowed","gk_pens_saved","gk_pens_missed"]
#advance goalkeeping(keepersadv)
keepersadv = ["player","nationality","position","team","age","birth_year","minutes_90s","gk_goals_against","gk_pens_allowed","gk_free_kick_goals_against","gk_corner_kick_goals_against","gk_own_goals_against","gk_psxg","gk_psnpxg_per_shot_on_target_against","gk_psxg_net","gk_psxg_net_per90","gk_passes_completed_launched","gk_passes_launched","gk_passes_pct_launched","gk_passes","gk_passes_throws","gk_pct_passes_launched","gk_passes_length_avg","gk_goal_kicks","gk_pct_goal_kicks_launched","gk_goal_kick_length_avg","gk_crosses","gk_crosses_stopped","gk_crosses_stopped_pct","gk_def_actions_outside_pen_area","gk_def_actions_outside_pen_area_per90","gk_avg_distance_def_actions"]
keepersadv2 = ["minutes_90s","gk_goals_against","gk_pens_allowed","gk_free_kick_goals_against","gk_corner_kick_goals_against","gk_own_goals_against","gk_psxg","gk_psnpxg_per_shot_on_target_against","gk_psxg_net","gk_psxg_net_per90","gk_passes_completed_launched","gk_passes_launched","gk_passes_pct_launched","gk_passes","gk_passes_throws","gk_pct_passes_launched","gk_passes_length_avg","gk_goal_kicks","gk_pct_goal_kicks_launched","gk_goal_kick_length_avg","gk_crosses","gk_crosses_stopped","gk_crosses_stopped_pct","gk_def_actions_outside_pen_area","gk_def_actions_outside_pen_area_per90","gk_avg_distance_def_actions"]
#shooting(shooting)
shooting = ["player","nationality","position","team","age","birth_year","minutes_90s","goals","pens_made","pens_att","shots_total","shots_on_target","shots_free_kicks","shots_on_target_pct","shots_total_per90","shots_on_target_per90","goals_per_shot","goals_per_shot_on_target","xg","npxg","npxg_per_shot","xg_net","npxg_net"]
shooting2 = ["minutes_90s","goals","pens_made","pens_att","shots_total","shots_on_target","shots_free_kicks","shots_on_target_pct","shots_total_per90","shots_on_target_per90","goals_per_shot","goals_per_shot_on_target","xg","npxg","npxg_per_shot","xg_net","npxg_net"]
shooting3 = ["goals","pens_made","pens_att","shots_total","shots_on_target","shots_free_kicks","shots_on_target_pct","shots_total_per90","shots_on_target_per90","goals_per_shot","goals_per_shot_on_target","xg","npxg","npxg_per_shot","xg_net","npxg_net"]
#passing(passing)
passing = ["player","nationality","position","team","age","birth_year","minutes_90s","passes_completed","passes","passes_pct","passes_total_distance","passes_progressive_distance","passes_completed_short","passes_short","passes_pct_short","passes_completed_medium","passes_medium","passes_pct_medium","passes_completed_long","passes_long","passes_pct_long","assists","xa","xa_net","assisted_shots","passes_into_final_third","passes_into_penalty_area","crosses_into_penalty_area","progressive_passes"]
passing2 = ["passes_completed","passes","passes_pct","passes_total_distance","passes_progressive_distance","passes_completed_short","passes_short","passes_pct_short","passes_completed_medium","passes_medium","passes_pct_medium","passes_completed_long","passes_long","passes_pct_long","assists","xa","xa_net","assisted_shots","passes_into_final_third","passes_into_penalty_area","crosses_into_penalty_area","progressive_passes"]
#passtypes(passing_types)
passing_types = ["player","nationality","position","team","age","birth_year","minutes_90s","passes","passes_live","passes_dead","passes_free_kicks","through_balls","passes_pressure","passes_switches","crosses","corner_kicks","corner_kicks_in","corner_kicks_out","corner_kicks_straight","passes_ground","passes_low","passes_high","passes_left_foot","passes_right_foot","passes_head","throw_ins","passes_other_body","passes_completed","passes_offsides","passes_oob","passes_intercepted","passes_blocked"]
passing_types2 = ["passes","passes_live","passes_dead","passes_free_kicks","through_balls","passes_pressure","passes_switches","crosses","corner_kicks","corner_kicks_in","corner_kicks_out","corner_kicks_straight","passes_ground","passes_low","passes_high","passes_left_foot","passes_right_foot","passes_head","throw_ins","passes_other_body","passes_completed","passes_offsides","passes_oob","passes_intercepted","passes_blocked"]
#goal and shot creation(gca)
gca = ["player","nationality","position","team","age","birth_year","minutes_90s","sca","sca_per90","sca_passes_live","sca_passes_dead","sca_dribbles","sca_shots","sca_fouled","gca","gca_per90","gca_passes_live","gca_passes_dead","gca_dribbles","gca_shots","gca_fouled","gca_defense"]
gca2 = ["sca","sca_per90","sca_passes_live","sca_passes_dead","sca_dribbles","sca_shots","sca_fouled","gca","gca_per90","gca_passes_live","gca_passes_dead","gca_dribbles","gca_shots","gca_fouled","gca_defense"]
#defensive actions(defense)
defense = ["player","nationality","position","team","age","birth_year","minutes_90s","tackles","tackles_won","tackles_def_3rd","tackles_mid_3rd","tackles_att_3rd","dribble_tackles","dribbles_vs","dribble_tackles_pct","dribbled_past","pressures","pressure_regains","pressure_regain_pct","pressures_def_3rd","pressures_mid_3rd","pressures_att_3rd","blocks","blocked_shots","blocked_shots_saves","blocked_passes","interceptions","clearances","errors"]
defense2 = ["tackles","tackles_won","tackles_def_3rd","tackles_mid_3rd","tackles_att_3rd","dribble_tackles","dribbles_vs","dribble_tackles_pct","dribbled_past","pressures","pressure_regains","pressure_regain_pct","pressures_def_3rd","pressures_mid_3rd","pressures_att_3rd","blocks","blocked_shots","blocked_shots_saves","blocked_passes","interceptions","clearances","errors"]
#possession(possession)
possession = ["player","nationality","position","team","age","birth_year","minutes_90s","touches","touches_def_pen_area","touches_def_3rd","touches_mid_3rd","touches_att_3rd","touches_att_pen_area","touches_live_ball","dribbles_completed","dribbles","dribbles_completed_pct","players_dribbled_past","nutmegs","carries","carry_distance","carry_progressive_distance","progressive_carries","carries_into_final_third","carries_into_penalty_area","pass_targets","passes_received","passes_received_pct","miscontrols","dispossessed"]
possession2 = ["touches","touches_def_pen_area","touches_def_3rd","touches_mid_3rd","touches_att_3rd","touches_att_pen_area","touches_live_ball","dribbles_completed","dribbles","dribbles_completed_pct","players_dribbled_past","nutmegs","carries","carry_distance","carry_progressive_distance","progressive_carries","carries_into_final_third","carries_into_penalty_area","pass_targets","passes_received","passes_received_pct","miscontrols","dispossessed"]
#playingtime(playingtime)
playingtime = ["player","nationality","position","team","age","birth_year","minutes_90s","games","minutes","minutes_per_game","minutes_pct","games_starts","minutes_per_start","games_subs","minutes_per_sub","unused_subs","points_per_match","on_goals_for","on_goals_against","plus_minus","plus_minus_per90","plus_minus_wowy","on_xg_for","on_xg_against","xg_plus_minus","xg_plus_minus_per90","xg_plus_minus_wowy"]
playingtime2 = ["games","minutes","minutes_per_game","minutes_pct","games_starts","minutes_per_start","games_subs","minutes_per_sub","unused_subs","points_per_match","on_goals_for","on_goals_against","plus_minus","plus_minus_per90","plus_minus_wowy","on_xg_for","on_xg_against","xg_plus_minus","xg_plus_minus_per90","xg_plus_minus_wowy"]
#miscallaneous(misc)
misc = ["player","nationality","position","team","age","birth_year","minutes_90s","cards_yellow","cards_red","cards_yellow_red","fouls","fouled","offsides","crosses","interceptions","tackles_won","pens_won","pens_conceded","own_goals","ball_recoveries","aerials_won","aerials_lost","aerials_won_pct"]
misc2 = ["cards_yellow","cards_red","cards_yellow_red","fouls","fouled","offsides","crosses","interceptions","tackles_won","pens_won","pens_conceded","own_goals","ball_recoveries","aerials_won","aerials_lost","aerials_won_pct"]
rharmdev commented 1 year ago

Still getting this problem, does anyone have a fix?

mirkodim commented 1 year ago

Fixed it by adding the following if-statement before the .encode part.

for f in features_wanted_player:
            cell = row.find("td", {"data-stat": f})
            if cell is not None:
                a = cell.text.strip().encode()

This is for the player table but should work the same way for the squad table.

philbywalsh commented 7 months ago

Also, take note that "data-stat" has changed from "squad" to "team"

HugoEnzo commented 5 months ago

IndexError Traceback (most recent call last) Cell In[2], line 1 ----> 1 df_outfield = get_outfield_data('https://fbref.com/en/comps/9/','/Premier-League-Stats') 2 df_outfield

Cell In[1], line 136, in get_outfield_data(top, end) 135 def get_outfield_data(top, end): --> 136 df1 = frame_for_category('stats',top,end,stats) 137 df2 = frame_for_category('shooting',top,end,shooting2) 138 df3 = frame_for_category('passing',top,end,passing2)

Cell In[1], line 120, in frame_for_category(category, top, end, features) 118 def frame_for_category(category,top,end,features): 119 url = (top + category + end) --> 120 player_table, team_table = get_tables(url,'for') 121 df_player = get_frame(features, player_table) 122 return df_player

Cell In[1], line 61, in get_tables(url, text) 59 team_table = all_tables[0] 60 team_vs_table = all_tables[1] ---> 61 player_table = all_tables[2] 62 if text == 'for': 63 return player_table, team_table

IndexError: list index out of range

I am getting this error after make the recommended changes, can someone help me with them

HugoEnzo commented 5 months ago

I also get these errors when I try to add - for f in features_wanted_player: cell = row.find("td", {"data-stat": f}) if cell is not None: a = cell.text.strip().encode()

errors -

Cell In[11], line 260 257 scraper = FbRefScraper(leagues=["Premier League"], seasons=[2019]) 258 # Outfield players 259 # Add csv_path to store df to csv --> 260 df_outfield = scraper.scrape_players(csv_path="outfield.csv") 261 print(df_outfield.head()) 263 # Team data

Cell In[11], line 206, in FbRefScraper.scrape_players(self, csv_path) 204 link[0] = link[0] + f"{year_links[league][season]}/" 205 link[1] = f"/{season-1}-{season}-" + link[1].replace("/", "") --> 206 df_outfield_league = get_outfield_data(link[0], link[1]) 207 df_outfield_league["season"] = season 208 df_outfield = df_outfield.append(df_outfield_league, ignore_index=True)

Cell In[11], line 146, in get_outfield_data(top, end) 145 def get_outfield_data(top, end): --> 146 df1 = frame_for_category('stats',top,end,stats) 147 df2 = frame_for_category('shooting',top,end,shooting2) 148 df3 = frame_for_category('passing',top,end,passing2)

Cell In[11], line 133, in frame_for_category(category, top, end, features) 131 url = (top + category + end) 132 player_table, team_table = get_tables(url) --> 133 df_player = get_frame(features, player_table) 134 return df_player

Cell In[11], line 90, in get_frame(features, player_table) 88 if cell is not None: 89 a = cell.text.strip().encode() ---> 90 text=a.decode("utf-8") 91 if(text == ''): 92 text = '0'

UnboundLocalError: cannot access local variable 'a' where it is not associated with a value

AND I also encounter this error if I try and fix the above present error by adding "text=a.decode("utf-8")" to the "if cell is not None:" statement

Cell In[12], line 90, in get_frame(features, player_table) 88 a = cell.text.strip().encode() 89 text=a.decode("utf-8") ---> 90 if(text == ''): 91 text = '0' 92 if((f!='player')&(f!='nationality')&(f!='position')&(f!='team')&(f!='age')&(f!='birth_year')):

UnboundLocalError: cannot access local variable 'text' where it is not associated with a value