import pandas as pd
from pandas_ods_reader import read_ods
import json
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from numpy import linalg
from numpy.linalg import svd
from collections import Counter
from mpl_toolkits.mplot3d import Axes3D
import random
import scipy as sp
# === === Helper functions === ===
def write_json(data, filename):
with open(filename,"w") as f:
json.dump(data,f)
return None
def get_json(filename):
with open(filename) as f:
jsonobj = json.load(f)
return jsonobj
def add_character_name(row,character_map):
return tuple(character_map[character_map['ID'] == row['unnamed.1']]['Character display name'])[0]
def add_work_name(row,character_map):
return tuple(character_map[character_map['ID'] == row['unnamed.1']]['Fictional work'])[0]
#=================================
character_map, bap_map = pd.read_html("codebook.html")
df_bap = pd.read_json("July2021_df_bap.json")
df_traits = pd.read_json("July2021_df_traits.json")
df_std = pd.read_json("June2021_df_std_original.json")
df_n = pd.read_json("June2021_df_n_original.json")
clean_column_dict = get_json("July2021_cleaned_column_dict.json")
def runSVD(df1,dropcols=['unnamed.1','name','work'],n=None):
if len(dropcols) > 0:
for x in dropcols:
if x in df1.columns:
df1 = df1.drop(x,axis=1)
if n==None:
n=df1.shape[1]-1
X = df1.to_numpy()
#decompose
U, D, V = np.linalg.svd(X)
# get dim of X
M,N = X.shape
# Construct sigma matrix in SVD (it simply adds null row vectors to match the dim of X)
Sig = sp.linalg.diagsvd(D,M,N)
# Now you can get X back:
remakeX = np.dot(U, np.dot(Sig, V))
assert np.sum(remakeX - X) < 0.00001
return df1, U, D, V, Sig, X, remakeX
# Output from SVD without removing means
df1, U, D, V, Sig, X, remakeX = runSVD(df_traits)
# Remove the average of each trait
#df1_means = df1.mean().mean()
df1_means = 50
df1_normed = df1 - df1_means
# Output from SVD WITH removing means
df2, U2, D2, V2, Sig2, X2, remakeX2 = runSVD(df1_normed,dropcols=[])
# When remaking X, Sig2 and V2 are combined, then their product is combined with U2, so here is that first product
SigV2 = np.dot(Sig2,V2)
# the traits in order of columns
col2 = df2.columns
So those should be downloaded from the repo and put in a reasonable place so this script can access them. (Or those lines should be deleted, if you don't want to load the object.)
The script to run SVD on the original data:
Note that these objects are loaded by the script:
So those should be downloaded from the repo and put in a reasonable place so this script can access them. (Or those lines should be deleted, if you don't want to load the object.)