codexgigassys / codex-backend

Codex Gigas malware DNA profiling search engine discovers malware patterns and characteristics assisting individuals who are attracted in malware hunting.
MIT License
154 stars 38 forks source link

Some signature descriptions are being saved with a blank space at the beginning, so when searching for them later (without the blank space) they doesn't match #120

Closed CrimsonGlory closed 7 years ago

CrimsonGlory commented 7 years ago

Mongo queries to search for documents in av_analysis collection that have a signature with a trailing whitespace: db.av_analysis.find({"scans.result": {$regex: / $/m} },{"scans.result": 1}).count() db.av_analysis.find({"scans.result": {$regex: /^ /m} },{"scans.result": 1}).count()

CrimsonGlory commented 7 years ago

Ideally, a script to fix previous saved scan results should be done. But since there are very few that have a trailing whitespace, is not worthy. Untested effort:

var counter = 1;
var bulk = db.av_analysis.initializeOrderedBulkOp();
db.av_analysis.find({"sha1": "3fa91f0f116cc0f19e8105bc51b91b7639605663", "scans.result": /^\s+|\s+$/ },{ "scans": 1}).forEach(
    function(doc) {
    tmp=[];
    doc.scans.forEach( function(c){
        print("c=");
        print(c);
        if(c.result != null){
            c.result=c.result.trim();   
        }
        tmp.push(c);
    } )
    print(tmp);
        bulk.find({ "_id": doc._id }).update({
            "$set": { "scans": tmp }
        });

        if ( counter % 1000 == 0 ) {
            bulk.execute();
            counter = 1;
        }
        counter++;
    }
);

if ( counter > 1 ){
    bulk.execute();}