Open matsuken92 opened 5 years ago
train_type1_cut = train.type=="1JHC"
test_type1_cut = test.type=="1JHC"
train_type2_cut = train.type=="1JHN"
test_type2_cut = test.type=="1JHN"
train_type3_cut = (train.type!="1JHC")&(train.type!="1JHN")
test_type3_cut = (test.type!="1JHC")&(test.type!="1JHN")
assert train_type1_cut.sum()+train_type2_cut.sum()+train_type3_cut.sum()==train.shape[0]
assert test_type1_cut.sum()+test_type2_cut.sum()+test_type3_cut.sum()==test.shape[0]
np.save("../processed/v001/train_type1_cut.npy", train_type1_cut)
np.save("../processed/v001/train_type2_cut.npy", train_type2_cut)
np.save("../processed/v001/train_type3_cut.npy", train_type3_cut)
np.save("../processed/v001/test_type1_cut.npy", test_type1_cut)
np.save("../processed/v001/test_type2_cut.npy", test_type2_cut)
np.save("../processed/v001/test_type3_cut.npy", test_type3_cut)
1: 709416, 2: 43363, 3: 3905368
plt.figure(figsize=(10,7))
sns.violinplot(data=train, x="type", y="scalar_coupling_constant", )
train_type1_cut = train.type=="1JHC"
test_type1_cut = test.type=="1JHC"
train_type2_cut = train.type=="1JHN"
test_type2_cut = test.type=="1JHN"
train_type3_cut = train.type=="2JHH"
test_type3_cut = test.type=="2JHH"
train_type4_cut = (train.type!="1JHC")&(train.type!="1JHN")&(train.type!="2JHH")
test_type4_cut = (test.type!="1JHC")&(test.type!="1JHN")&(test.type!="2JHH")
Path("../processed/v002/").mkdir(parents=True, exist_ok=True)
np.save("../processed/v002/train_type1_cut.npy", train_type1_cut)
np.save("../processed/v002/train_type2_cut.npy", train_type2_cut)
np.save("../processed/v002/train_type3_cut.npy", train_type3_cut)
np.save("../processed/v002/train_type4_cut.npy", train_type4_cut)
np.save("../processed/v002/test_type1_cut.npy", test_type1_cut)
np.save("../processed/v002/test_type2_cut.npy", test_type2_cut)
np.save("../processed/v002/test_type3_cut.npy", test_type3_cut)
np.save("../processed/v002/test_type4_cut.npy", test_type3_cut)
1: 709416, 2: 43363, 3: 378036, 4: 3527332
https://www.kaggle.com/c/champs-scalar-coupling/discussion/94237#latest-545925