def D_H_L_mapping(train = None, test = None):
person_prefer_d_1_l = train.groupby('person_prefer_d_1_l')['target'].mean()
train['person_prefer_d_1_l'] = train['person_prefer_d_1_l'].map({i:j for i, j in zip(person_prefer_d_1_l.keys(), person_prefer_d_1_l.values)})
test['person_prefer_d_1_l'] = test['person_prefer_d_1_l'].map({i:j for i, j in zip(person_prefer_d_1_l.keys(), person_prefer_d_1_l.values)})
person_prefer_d_1_n = train.groupby('person_prefer_d_1_n')['target'].mean()
train['person_prefer_d_1_n'] = train['person_prefer_d_1_n'].map({i:j for i, j in zip(person_prefer_d_1_n.keys(), person_prefer_d_1_n.values)})
test['person_prefer_d_1_n'] = test['person_prefer_d_1_n'].map({i:j for i, j in zip(person_prefer_d_1_n.keys(), person_prefer_d_1_n.values)})
person_prefer_d_1_m = train.groupby('person_prefer_d_1_m')['target'].mean()
train['person_prefer_d_1_m'] = train['person_prefer_d_1_m'].map({i:j for i, j in zip(person_prefer_d_1_m.keys(), person_prefer_d_1_m.values)})
test['person_prefer_d_1_m'] = test['person_prefer_d_1_m'].map({i:j for i, j in zip(person_prefer_d_1_m.keys(), person_prefer_d_1_m.values)})
person_prefer_d_1_s = train.groupby('person_prefer_d_1_s')['target'].mean()
train['person_prefer_d_1_s'] = train['person_prefer_d_1_s'].map({i:j for i, j in zip(person_prefer_d_1_s.keys(), person_prefer_d_1_s.values)})
test['person_prefer_d_1_s'] = test['person_prefer_d_1_s'].map({i:j for i, j in zip(person_prefer_d_1_s.keys(), person_prefer_d_1_s.values)})
person_prefer_d_2_l = train.groupby('person_prefer_d_2_l')['target'].mean()
train['person_prefer_d_2_l'] = train['person_prefer_d_2_l'].map({i:j for i, j in zip(person_prefer_d_2_l.keys(), person_prefer_d_2_l.values)})
test['person_prefer_d_2_l'] = test['person_prefer_d_2_l'].map({i:j for i, j in zip(person_prefer_d_2_l.keys(), person_prefer_d_2_l.values)})
person_prefer_d_2_n = train.groupby('person_prefer_d_2_n')['target'].mean()
train['person_prefer_d_2_n'] = train['person_prefer_d_2_n'].map({i:j for i, j in zip(person_prefer_d_2_n.keys(), person_prefer_d_2_n.values)})
test['person_prefer_d_2_n'] = test['person_prefer_d_2_n'].map({i:j for i, j in zip(person_prefer_d_2_n.keys(), person_prefer_d_2_n.values)})
person_prefer_d_2_m = train.groupby('person_prefer_d_2_m')['target'].mean()
train['person_prefer_d_2_m'] = train['person_prefer_d_2_m'].map({i:j for i, j in zip(person_prefer_d_2_m.keys(), person_prefer_d_2_m.values)})
test['person_prefer_d_2_m'] = test['person_prefer_d_2_m'].map({i:j for i, j in zip(person_prefer_d_2_m.keys(), person_prefer_d_2_m.values)})
person_prefer_d_2_s = train.groupby('person_prefer_d_2_s')['target'].mean()
train['person_prefer_d_2_s'] = train['person_prefer_d_2_s'].map({i:j for i, j in zip(person_prefer_d_2_s.keys(), person_prefer_d_2_s.values)})
test['person_prefer_d_2_s'] = test['person_prefer_d_2_s'].map({i:j for i, j in zip(person_prefer_d_2_s.keys(), person_prefer_d_2_s.values)})
person_prefer_d_3_l = train.groupby('person_prefer_d_3_l')['target'].mean()
train['person_prefer_d_3_l'] = train['person_prefer_d_3_l'].map({i:j for i, j in zip(person_prefer_d_3_l.keys(), person_prefer_d_3_l.values)})
test['person_prefer_d_3_l'] = test['person_prefer_d_3_l'].map({i:j for i, j in zip(person_prefer_d_3_l.keys(), person_prefer_d_3_l.values)})
person_prefer_d_3_n = train.groupby('person_prefer_d_3_n')['target'].mean()
train['person_prefer_d_3_n'] = train['person_prefer_d_3_n'].map({i:j for i, j in zip(person_prefer_d_3_n.keys(), person_prefer_d_3_n.values)})
test['person_prefer_d_3_n'] = test['person_prefer_d_3_n'].map({i:j for i, j in zip(person_prefer_d_3_n.keys(), person_prefer_d_3_n.values)})
person_prefer_d_3_m = train.groupby('person_prefer_d_3_m')['target'].mean()
train['person_prefer_d_3_m'] = train['person_prefer_d_3_m'].map({i:j for i, j in zip(person_prefer_d_3_m.keys(), person_prefer_d_3_m.values)})
test['person_prefer_d_3_m'] = test['person_prefer_d_3_m'].map({i:j for i, j in zip(person_prefer_d_3_m.keys(), person_prefer_d_3_m.values)})
person_prefer_d_3_s = train.groupby('person_prefer_d_3_s')['target'].mean()
train['person_prefer_d_3_s'] = train['person_prefer_d_3_s'].map({i:j for i, j in zip(person_prefer_d_3_s.keys(), person_prefer_d_3_s.values)})
test['person_prefer_d_3_s'] = test['person_prefer_d_3_s'].map({i:j for i, j in zip(person_prefer_d_3_s.keys(), person_prefer_d_3_s.values)})
contents_attribute_d_l = train.groupby('contents_attribute_d_l')['target'].mean()
train['contents_attribute_d_l'] = train['contents_attribute_d_l'].map({i:j for i, j in zip(contents_attribute_d_l.keys(), contents_attribute_d_l.values)})
test['contents_attribute_d_l'] = test['contents_attribute_d_l'].map({i:j for i, j in zip(contents_attribute_d_l.keys(), contents_attribute_d_l.values)})
contents_attribute_d_m = train.groupby('contents_attribute_d_m')['target'].mean()
train['contents_attribute_d_m'] = train['contents_attribute_d_m'].map({i:j for i, j in zip(contents_attribute_d_m.keys(), contents_attribute_d_m.values)})
test['contents_attribute_d_m'] = test['contents_attribute_d_m'].map({i:j for i, j in zip(contents_attribute_d_m.keys(), contents_attribute_d_m.values)})
contents_attribute_d_n = train.groupby('contents_attribute_d_n')['target'].mean()
train['contents_attribute_d_n'] = train['contents_attribute_d_n'].map({i:j for i, j in zip(contents_attribute_d_n.keys(), contents_attribute_d_n.values)})
test['contents_attribute_d_n'] = test['contents_attribute_d_n'].map({i:j for i, j in zip(contents_attribute_d_n.keys(), contents_attribute_d_n.values)})
contents_attribute_d_s = train.groupby('contents_attribute_d_s')['target'].mean()
train['contents_attribute_d_s'] = train['contents_attribute_d_s'].map({i:j for i, j in zip(contents_attribute_d_s.keys(), contents_attribute_d_s.values)})
test['contents_attribute_d_s'] = test['contents_attribute_d_s'].map({i:j for i, j in zip(contents_attribute_d_s.keys(), contents_attribute_d_s.values)})
person_prefer_h_1_l = train.groupby('person_prefer_h_1_l')['target'].mean()
train['person_prefer_h_1_l'] = train['person_prefer_h_1_l'].map({i:j for i, j in zip(person_prefer_h_1_l.keys(), person_prefer_h_1_l.values)})
test['person_prefer_h_1_l'] = test['person_prefer_h_1_l'].map({i:j for i, j in zip(person_prefer_h_1_l.keys(), person_prefer_h_1_l.values)})
person_prefer_h_1_m = train.groupby('person_prefer_h_1_m')['target'].mean()
train['person_prefer_h_1_m'] = train['person_prefer_h_1_m'].map({i:j for i, j in zip(person_prefer_h_1_m.keys(), person_prefer_h_1_m.values)})
test['person_prefer_h_1_m'] = test['person_prefer_h_1_m'].map({i:j for i, j in zip(person_prefer_h_1_m.keys(), person_prefer_h_1_m.values)})
person_prefer_h_2_l = train.groupby('person_prefer_h_2_l')['target'].mean()
train['person_prefer_h_2_l'] = train['person_prefer_h_2_l'].map({i:j for i, j in zip(person_prefer_h_2_l.keys(), person_prefer_h_2_l.values)})
test['person_prefer_h_2_l'] = test['person_prefer_h_2_l'].map({i:j for i, j in zip(person_prefer_h_2_l.keys(), person_prefer_h_2_l.values)})
person_prefer_h_2_m = train.groupby('person_prefer_h_2_m')['target'].mean()
train['person_prefer_h_2_m'] = train['person_prefer_h_2_m'].map({i:j for i, j in zip(person_prefer_h_2_m.keys(), person_prefer_h_2_m.values)})
test['person_prefer_h_2_m'] = test['person_prefer_h_2_m'].map({i:j for i, j in zip(person_prefer_h_2_m.keys(), person_prefer_h_2_m.values)})
person_prefer_h_3_l = train.groupby('person_prefer_h_3_l')['target'].mean()
train['person_prefer_h_3_l'] = train['person_prefer_h_3_l'].map({i:j for i, j in zip(person_prefer_h_3_l.keys(), person_prefer_h_3_l.values)})
test['person_prefer_h_3_l'] = test['person_prefer_h_3_l'].map({i:j for i, j in zip(person_prefer_h_3_l.keys(), person_prefer_h_3_l.values)})
person_prefer_h_3_m = train.groupby('person_prefer_h_3_m')['target'].mean()
train['person_prefer_h_3_m'] = train['person_prefer_h_3_m'].map({i:j for i, j in zip(person_prefer_h_3_m.keys(), person_prefer_h_3_m.values)})
test['person_prefer_h_3_m'] = test['person_prefer_h_3_m'].map({i:j for i, j in zip(person_prefer_h_3_m.keys(), person_prefer_h_3_m.values)})
contents_attribute_h_l = train.groupby('contents_attribute_h_l')['target'].mean()
train['contents_attribute_h_l'] = train['contents_attribute_h_l'].map({i:j for i, j in zip(contents_attribute_h_l.keys(), contents_attribute_h_l.values)})
test['contents_attribute_h_l'] = test['contents_attribute_h_l'].map({i:j for i, j in zip(contents_attribute_h_l.keys(), contents_attribute_h_l.values)})
contents_attribute_h_m = train.groupby('contents_attribute_h_m')['target'].mean()
train['contents_attribute_h_m'] = train['contents_attribute_h_m'].map({i:j for i, j in zip(contents_attribute_h_m.keys(), contents_attribute_h_m.values)})
test['contents_attribute_h_m'] = test['contents_attribute_h_m'].map({i:j for i, j in zip(contents_attribute_h_m.keys(), contents_attribute_h_m.values)})
contents_attribute_l_n = train.groupby('contents_attribute_l_n')['target'].mean()
train['contents_attribute_l_n'] = train['contents_attribute_l_n'].map({i:j for i, j in zip(contents_attribute_l_n.keys(), contents_attribute_l_n.values)})
test['contents_attribute_l_n'] = test['contents_attribute_l_n'].map({i:j for i, j in zip(contents_attribute_l_n.keys(), contents_attribute_l_n.values)})
contents_attribute_l_s = train.groupby('contents_attribute_l_s')['target'].mean()
train['contents_attribute_l_s'] = train['contents_attribute_l_s'].map({i:j for i, j in zip(contents_attribute_l_s.keys(), contents_attribute_l_s.values)})
test['contents_attribute_l_s'] = test['contents_attribute_l_s'].map({i:j for i, j in zip(contents_attribute_l_s.keys(), contents_attribute_l_s.values)})
contents_attribute_l_m = train.groupby('contents_attribute_l_m')['target'].mean()
train['contents_attribute_l_m'] = train['contents_attribute_l_m'].map({i:j for i, j in zip(contents_attribute_l_m.keys(), contents_attribute_l_m.values)})
test['contents_attribute_l_m'] = test['contents_attribute_l_m'].map({i:j for i, j in zip(contents_attribute_l_m.keys(), contents_attribute_l_m.values)})
contents_attribute_l_l = train.groupby('contents_attribute_l_l')['target'].mean()
train['contents_attribute_l_l'] = train['contents_attribute_l_l'].map({i:j for i, j in zip(contents_attribute_l_l.keys(), contents_attribute_l_l.values)})
test['contents_attribute_l_l'] = test['contents_attribute_l_l'].map({i:j for i, j in zip(contents_attribute_l_l.keys(), contents_attribute_l_l.values)})
return train, test
결과
score가 0.687로 증가했습니다.
그리고 0이 11000개 1이 33000개 정도 되는 것으로 보여서 threshold를 다시 0.35로 지정해서 모델을 돌려보고 있습니다.
그 이후에 stacking과 voting중에 어떤 부분이 좋은지 확인해 보고 다시 issue에 올리도록 하겠습니다.
Feature engineering
결과
score가 0.687로 증가했습니다. 그리고 0이 11000개 1이 33000개 정도 되는 것으로 보여서 threshold를 다시 0.35로 지정해서 모델을 돌려보고 있습니다. 그 이후에 stacking과 voting중에 어떤 부분이 좋은지 확인해 보고 다시 issue에 올리도록 하겠습니다.