Open benel opened 4 years ago
Co-occurrences computation appears to be negligible in comparison with data downloading:
Time | Task |
---|---|
0,1s – 0, 2s | getView(user) |
1,3 s – 2,0 s | getView(corpora, viewpoints) |
1,3 s – 2,0 s | getView(corpora) |
0,4 s – 0,6 s | getView(viewpoints) |
0,001 s | restructuring viewpoints |
0,002 s | restructuring items |
0,019 s | co-occurrences |
Tested on the stained-glasses portfolio (1771 items, 608 topics). @garnier5
Pour info, voici le code (indépendant de Porphyry mais qui s'en inspire grandement) que j'avais utilisé pour faire mes mesures :
const Hypertopic = require('hypertopic');
const USER = 'vitraux';
const SERVICES = [
'http://argos2.hypertopic.org',
'http://steatite.hypertopic.org'
];
let start = new Date().getTime();
let logWithTime = (x) => {
let end = new Date().getTime();
console.log(end - start, x);
start = end;
};
let user = {};
let viewpoints = [];
let items = [];
const hypertopic = new Hypertopic(SERVICES);
hypertopic.getView(`/user/${USER}`)
.then((x) => {
user = x[USER];
logWithTime(user);
return user.viewpoint.map(y => `/viewpoint/${y.id}`)
.concat(user.corpus.map(y => `/corpus/${y.id}`));
})
.then(hypertopic.getView)
.then((x) => {
logWithTime('GOT corpora and viewpoints');
return x;
})
.then((data) => {
for (let v of user.viewpoint) {
let viewpoint = data[v.id];
viewpoint.id = v.id;
viewpoints.push(viewpoint);
}
logWithTime({viewpoints: viewpoints.length});
return data;
})
.then((data) => {
for (let corpus of user.corpus) {
for (let itemId in data[corpus.id]) {
if (!['id','name','user'].includes(itemId)) {
let item = data[corpus.id][itemId];
if (!item.name || !item.name.length) {
} else {
item.id = itemId;
item.corpus = corpus.id;
items.push(item);
}
}
}
}
logWithTime({items: items.length});
return data;
})
.then((x) => {
let selectedItems = items; //worst case
let topicsItems = new Map();
for (let e of selectedItems) {
for (let t of _getRecursiveItemTopics(e)) {
push(topicsItems, t, e.id);
}
}
logWithTime({topics: topicsItems.size});
});
function _getTopic(id) {
for (let v of viewpoints) {
if (v[id]) return v[id];
}
return null;
}
function push(map, topicId, itemId) {
let old = map.get(topicId);
if (old) {
map.set(topicId, old.add(itemId));
} else {
map.set(topicId, new Set([itemId]));
}
}
function _getTopicPath(topicId) {
let topic = _getTopic(topicId);
let path = (topic && topic.broader)? _getTopicPath(topic.broader[0].id) : [];
path.push(topicId);
return path;
}
function _getItemTopicsPaths(item) {
return (item.topic||[]).map(t => _getTopicPath(t.id));
}
function _getRecursiveItemTopics(item) {
return Array.prototype.concat(..._getItemTopicsPaths(item));
}
Tested on the stained-glasses portfolio (always the same item, 14 topics
).
10 000 items
Time | Task |
---|---|
0,05s – 0,1s | getView(user) |
7,0 s – 8,0 s | getView(corpora, viewpoints) |
0,000 s | restructuring viewpoints |
0,009 s | restructuring items |
0,090 s | co-occurrences |
50 000 items
Time | Task |
---|---|
0,05s – 0,1s | getView(user) |
30s – 35s | getView(corpora, viewpoints) |
0,002 s | restructuring viewpoints |
0,030 s | restructuring items |
0,376 s | co-occurrences |
100 000 items
Time | Task |
---|---|
0,1s – 0,2s | getView(user) |
65s – 75s | getView(corpora, viewpoints) |
0,007 s | restructuring viewpoints |
0,050 s | restructuring items |
0,815 s | co-occurrences |
An unpublished paper by Fréderic Merle, Aurélien Bénel and Yann Barloy (written in 2013-2014) compared the efficiency of 3 (or even 4) index structures usable to speed up the multidimensional browsing algorithm. One of them appeared to be far more efficient than those that were tested in earlier and current versions of Porphyry.
Please note however that, as any index, building and updating it takes time.