Open that1guy opened 9 years ago
FYI, I can do this on client-side if that's helpful. Just say the word so we're not duplicating efforts.
Cleaning up code on my end.. before I delete my de-dupe code here it is.
var HashTable = require('hashtable');
exports.dedupe = function(result, promise){
var userLat = result.location.latitude;
var userLong = result.location.longitude;
var response = result.external;
var deDupeExternalID = new HashTable();
var deDupeHeading = new HashTable();
var duplicates = [];
var originals = [];
deDupeExternalID.put(response.postings[0].external_url, 0);
//console.log("ID is unique: " + response.postings[0].external_id);
deDupeHeading.put(response.postings[0].heading, 0);
//console.log("Heading is unique: " + response.postings[0].heading);
for (var i = 1; i < response.postings.length; i++) {
result = response.postings[i];
if(typeof deDupeExternalID.get(result.external_url) === 'undefined'){
//console.log("URL is unique: " + result.external_url);
deDupeExternalID.put(result.external_url, i);
if(typeof deDupeHeading.get(result.heading) === 'undefined'){
//console.log("Heading is unique: "+ result.heading);
deDupeHeading.put(result.heading, i);
// TODO: Clean up HTML
originals.push(convertToHTSObjStructure(result, userLat, userLong));
} else {
duplicates.push(result);
console.log("Duplicate Heading: "+ result.heading);
}
} else {
duplicates.push(result);
console.log("Duplicate URL: "+result.external_url);
}
}
console.log("!!!!!!!!!~~~~ DONE WITH DEDUPE ~~~~!!!!!!!!!");
console.log(duplicates.length + " Duplicates");
console.log(originals.length + " Originals");
console.log("!!!!!!!!!~~~~ DONE WITH DEDUPE ~~~~!!!!!!!!!");
promise(null, originals);
};
This is becoming our only weak spot in search that I can see. knock this down and I think we're golden.
https://staging-posting-api.hashtagsell.com/v1/postings/?start=0&count=35&filters[mandatory][contains][heading]=htc&filters[optional][exact][categoryCode]=SELE,SAPL&geo[lookup]=true&geo[min]=0&geo[max]=12890000
I have code that will de-dupe records returned from mongo.find. This should probably be included somewhere in the waterfall block before returning results. Give me your suggestion on how to implement and I will do this on separate branch and send pull request.