Open maxis1718 opened 10 years ago
Done
docfeature_b20_m60_e20_c0_f0 (using "patscore_p2_s0" as pattern scores)
docfeature_b20_m60_e20_c0_f1 (using "patscore_p2_s0" as pattern scores)
{
// setting id
"setting": 1
// settings
"section": "b20_m60_e20",
"f": 0,
"c": 0,
// target sub-collection: features.location
"feature_type": "location"
// doc ids
// "features": [0, 1, ...]
}
{
_id: 0,
"setting" : 1
"emotion" : "sleepy",
"udocID" : 38000,
"feature" :
[
("#position@beginning_lonely", 1.1214049371732575),
("#position@end_tired", 1.8017507433907276),
("#position@end_good", 0.8710341080806533),
("#position@middle_frustrated", 2.71088318141498),
...
]
},
{
_id: 1,
"setting" :
"emotion" : "sleepy",
"udocID" : 38001,
"feature" :
[
("#position@beginning_lonely", 1.1214049371732575),
("#position@end_tired", 1.8017507433907276),
("#position@end_good", 0.8710341080806533),
("#position@middle_frustrated", 2.71088318141498),
...
]
}
insert setting metadata into features.settings
using pymongo
setting = {
"section": "b20_m60_e20",
"feature_name": "position",
"counting_unit_type": 0,
"feature_value_type": 0
}
# ObjectID is generated by client, which means it is obtained immediately :P
setting_id = str(db['features.settings'].insert( setting ))
get setting_id (or just use setting_id
after insertion )
setting_id = str(db['features.settings'].find_one(setting)['_id'])
insert location feature with setting id into features.position
mdoc = {
"emotion": "sleepy",
"udocID": 38000,
"feature": [ ("#position@beginning_lonely",1.1214049371732575), ("#position@end_tired",1.8017507433907276) ],
"setting": setting_id # looks like 5369fb11d4388c0aa4c5ca4e
}
db['features.position'].insert(mdoc)
create index
db['features.position'].create_index("setting")
config setting and fetch setting_id
setting_id = str(db['features.settings'].find_one(setting)['_id'])
find all features fitting that setting
db['features.position'].find({ "setting": setting_id })
extraction from documents
extract position features
document_scoring.py
insert to MongoDB
settings
mongo > features.settings
> db.features.settings.findOne()
{
"_id" : ObjectId("536aedefd4388c7e3e30aa4b"),
"counting_unit_type" : 0,
"feature_value_type" : 0,
"feature_name" : "position",
"section" : "b20_m60_e20"
}
features
mongo > features.keywords
mongo > features.patterns
mongo > features.position
> db.features.position.findOne()
{
"_id" : ObjectId("536aee18d4388c7e3e30aa4c"),
"emotion" : "sleepy",
"setting" : "536aedefd4388c7e3e30aa4b",
"udocID" : 38000,
"feature" : {
"#position@beginning_lonely" : 1.1214,
"#position@end_tired" : 1.8017,
"#position@end_good" : 0.8710,
"#position@middle_frustrated" : 2.7108,
...
toSVM.py
generate vectors
input: setting
{
"counting_unit_type" : 0,
"feature_value_type" : 0,
"feature_name" : "position",
"section" : "b20_m60_e20"
}
output:
[
(29001, '29 0:1.9005 1:1.3218 2:1.4900, ... ),
(29002, '29 0:0.9664 1:0.0 2:0.0 3:1.7429 4:2.4847, ... ),
...
]
generate testing & training files
train.txt
, test.txt
, gold.txt
Note: features in libsvm must follow ascending order
db.features.setting
{
"_id" : ObjectId("537086fcd4388c7e81676914"),
"feature_name" : "position",
"section" : "b20_m60_e20",
"counting_unit_type" : 0,
"feature_value_type" : 0
}
db.features.position.findOne({setting: "537086fcd4388c7e81676914", udocID: 32874})
{
"emotion" : "lonely",
"setting" : "537086fcd4388c7e81676914",
"udocID" : 32874,
"feature" : [
["#position@middle_blah", 0],
["#position@middle_aggravated", 0],
["#position@middle_bouncy", 0],
["#position@middle_blank", 0],
["#position@middle_crushed", 0.5],
...
]
normalize 前中後:1,2,1 v.s. 2,4,2