Open pigbreeder opened 2 years ago
df = df.where(df.notnull(), None) df = df.astype(object).where(pd.notnull(df), None) #应对odps表为空的处理
arr = pd.array([1, 2, np.nan], dtype=pd.Int64Dtype())
df[df.isna().any(axis=1)]
df_obj = df.select_dtypes(['object']) df[df_obj.columns] = df_obj.apply(lambda x: x.str.strip())
t1.style.format({'L1_intra_pct': "{:.2%}",'L1_inter_pct': "{:.2%}",'true_pct': "{:.2%}"})
df.groupby("A").filter(lambda x: len(x) > 1) # filter
df.groupby('B').apply(lambda x: x.sample(frac=0.5)) # sample
df['created_time'] = pd.to_datetime(df['created_at'],unit='s').dt.strftime('%Y-%m-%d %H:%M:%S')
jupyter 中用from tqdm.autonotebook import tqdm
from tqdm.notebook import tqdm
pandas
tqdm.pandas()
df.progress_apply()
用完消失
trange(10,leave=False,desc='test')
print可能会导致输出多行进度条,可以将print语句改为tqdm.write,代码如下
for i in tqdm(range(10),ascii=True):
tqdm.write("come on")
time.sleep(0.1)
from random import random
from time import sleep
from tqdm import tqdm
epochs = 2
train_data_num = 10
for i in range(epochs):
with tqdm(total=train_data_num) as t:
for j in range(10):
# Description will be displayed on the left
t.set_description('Epoch %i' % i)
# Postfix will be displayed on the right,
# formatted automatically based on argument's datatype
t.set_postfix(loss=random(),acc=random())
sleep(0.1)
t.update(1)
import seaborn as sns
sns.histplot(df1.top1_lzd_prob,stat="probability")
import matplotlib.pyplot as plt
fig, ax1 = plt.subplots(figsize=(12, 8))
tdf = pd.DataFrame(datas,columns=['threshold','accu','name','cnt'])
a=sns.barplot(data = tdf,x='threshold',y='accu',hue='name',palette=sns.color_palette('bright')[:2],ax=ax1)
aa = show_values(a,space=.15)
ax2 = ax1.twinx()
# kde/hist 同一个图
# https://stackoverflow.com/questions/46045750/seaborn-distplot-displot-with-multiple-distributions
sns.displot(data=df3, x='pred_score', hue='label', kind='kde', fill=True,palette=sns.color_palette('bright')[:2], height=8, aspect=1.5)
####################
# bar
a=sns.barplot(data = tdf,x='threshold',y='accu',hue='name',palette=sns.color_palette('bright')[:2],ax=ax1)
# https://stackoverflow.com/questions/63945535/seaborn-plotting-histogram-and-lineplot-on-the-same-figure-with-2-y-axis
####################
# write value on plot
# https://juejin.cn/post/7116375204573642759
def show_values(axs, orient="v", space=.01):
def _single(ax):
if orient == "v":
for p in ax.patches:
_x = p.get_x() + p.get_width() / 2
_y = p.get_y() + p.get_height() + (p.get_height()*0.01)
value = '{:.1f}'.format(p.get_height())
ax.text(_x, _y, value, ha="center")
elif orient == "h":
for p in ax.patches:
_x = p.get_x() + p.get_width() + float(space)
_y = p.get_y() + p.get_height() - (p.get_height()*0.5)
value = '{:.1f}'.format(p.get_width())
ax.text(_x, _y, value, ha="left")
if isinstance(axs, np.ndarray):
for idx, ax in np.ndenumerate(axs):
_single(ax)
else:
_single(axs)
p = sns.barplot(x="day", y="tip", data=data, ci=None)
#show values on barplot
show_values(p)
####################
df3 = pd.DataFrame(df2.values.T,columns=df2.index)
area_dict = dict(zip(lakes.id, lakes.value))
clsf_report = pd.DataFrame(classification_report(y_true = base_label_trun, y_pred = predict_label, output_dict=True)).transpose()
groupby 返回多个值,同时有name
pandas中groupby函数中参数ax_index和group_keys的区别
df.groupby(['venture','path_en'],as_index=False).apply(lambda x:pd.Series({'a':len(x[x['label']==1])/len(x)}))
data_frame_trimmed = data_frame.apply(lambda x: x.str.strip() if x.dtype == "object" else x)
最简单绘多图
https://stackoverflow.com/questions/40071096/how-to-plot-multiple-lines-in-one-figure-in-pandas-python-based-on-data-from-mul
from IPython.core.interactiveshell import InteractiveShell InteractiveShell.ast_node_interactivity = "all"