SteveHong1901 / UCL-STAT0006-Data-Analysis-Project-1

0 stars 0 forks source link

I'm tr #9

Open SteveHong1901 opened 1 week ago

SteveHong1901 commented 1 week ago

# Extending the list of European university names with non-English names for better analysis
european_universities_extended = european_universities + [
    "Universidad de Barcelona",
    "Université Paris-Sorbonne",
    "Universität Heidelberg",
    "Ecole Polytechnique",
    "Université de Genève",
    "Università degli Studi di Bologna",
    "Universidade de Lisboa",
    "Universidade de Coimbra",
    "École Normale Supérieure",
    "Universidade do Porto",
    "Università degli Studi di Milano",
    "Universität Wien",
    "Universität Hamburg",
    "Universität zu Köln",
    "Università di Pisa",
    "École des Ponts ParisTech",
    "Universität Stuttgart",
    "Università di Padova",
    "Universität Zürich",
    "Université Catholique de Louvain",
    "Ecole Normale Supérieure de Lyon"
]

# Flatten the extended list into a single string
all_words_extended = ' '.join(european_universities_extended)

# Create a Counter object to count the frequency of each word
word_counts_extended = Counter(all_words_extended.split())

# Convert to DataFrame for better visualization
word_freq_df_extended = pd.DataFrame(word_counts_extended.items(), columns=['Word', 'Frequency']).sort_values(by='Frequency', ascending=False)

import ace_tools as tools; tools.display_dataframe_to_user(name="Common Words in European University Names (Extended)", dataframe=word_freq_df_extended)

word_freq_df_extended.head(20)