Closed gabssnake closed 6 years ago
Had to rebase to include e4a1020
So, this was an executive decision in https://github.com/mcollina/bloomrun/pull/45, to allow a huge speed up.
Are you open to having the feature if perf is maintained? I have an application that currently depends on it. Maybe it can be limited to one level deep for saneness, if that is an issue.
@gabssnake yes.
Ok, I’ll give it a go.
Is an opt-in flag an acceptable option?
Maybe bloomrun({ indexing: 'depth', recurse: true })
.
Good news! It looks like there is no negative impact in performance. We might even gain a few microseconds with this PR! No flag is needed.
Data was taken on a 2.2 Ghz Intel i7 with 8GB 1600 Mhz DDR3. See source code for raw data generation.
Two implementations are compared:
baseline
which is the current master branchexperimental
which is master plus this PR, which provides deep lookupTwo scenarios are tested:
3-entries
: list
method on a bloomrun set with 3 entries400-entries
: list
method on a bloomrun set with 400 entriesIn the graph below:
Shoutout to my colleague and friend Guillaume Artero, who did this awesome ECDF visualisation and provided valuable insight into its meaning.
Wow, good work!
@mcdonnelldean what do you think?
@gabssnake epic! @mcollina Yup I'm happy to have the feature back as long as perf is maintained
@mcdonnelldean as you anticipated, there seems to be some cases that will not pass. I’ll see what’s going on.
Been trying to understand what should be the expected behaviour. Would you agree that the following seems reasonable?
test('preserve insertion order regardless of depth (plain)', function (t) {
t.plan(1)
var instance = bloomrun()
instance.add({ some: 'action' }, 1)
instance.add({ some: 'action', hello: 'there' }, 2)
instance.add({ some: 'action', hello: 'there', friend: 'yes' }, 3)
instance.add({ some: 'action' }, 4)
instance.add({ some: 'action' }, 5)
var pattern = { some: 'action', hello: 'there', friend: 'yes', other: 'field' }
t.deepEqual(instance.list(pattern), [1, 2, 3, 4, 5])
})
test('preserve insertion order regardless of depth (nested)', function (t) {
t.plan(1)
var instance = bloomrun()
instance.add({ nested: { another: 'value' } }, 1)
instance.add({ nested: { another: 'value', heavier: 'here' } }, 2)
instance.add({ nested: { another: 'value' } }, 3)
instance.add({ nested: { heavier: 'here' } }, 4)
instance.add({ nested: { heavier: 'here' } }, 5)
var pattern = { nested: { another: 'value', heavier: 'here' } }
t.deepEqual(instance.list(pattern), [1, 2, 3, 4, 5])
})
test('preserve depth and then insertion order (plain)', function (t) {
t.plan(1)
var instance = bloomrun({ indexing: 'depth' })
instance.add({ some: 'action' }, 1)
instance.add({ some: 'action', hello: 'there' }, 2)
instance.add({ some: 'action', hello: 'there', friend: 'yes' }, 3)
instance.add({ some: 'action' }, 4)
instance.add({ some: 'action' }, 5)
var pattern = { some: 'action', hello: 'there', friend: 'yes', other: 'field' }
t.deepEqual(instance.list(pattern), [3, 2, 1, 4, 5])
})
test('preserve depth and then insertion order (nested)', function (t) {
t.plan(1)
var instance = bloomrun({ indexing: 'depth' })
instance.add({ nested: { another: 'value' } }, 1)
instance.add({ nested: { another: 'value', heavier: 'here' } }, 2)
instance.add({ nested: { another: 'value' } }, 3)
instance.add({ nested: { heavier: 'here' } }, 4)
instance.add({ nested: { heavier: 'here' } }, 5)
var pattern = { nested: { another: 'value', heavier: 'here' } }
t.deepEqual(instance.list(pattern), [2, 1, 3, 4, 5])
})
Then it must follow that this is also true:
test('preserve insertion order regardless of depth (plain), buckets', function (t) {
t.plan(1)
var instance = bloomrun()
instance.add({ friend: 'yes' }, 1)
instance.add({ some: 'action', hello: 'there' }, 2)
instance.add({ hello: 'there', some: 'action', friend: 'yes' }, 3)
instance.add({ hello: 'there' }, 4)
instance.add({ some: 'action' }, 5)
var pattern = { some: 'action', hello: 'there', friend: 'yes', other: 'field' }
t.deepEqual(instance.list(pattern), [1, 2, 3, 4, 5])
})
test('preserve insertion order regardless of depth (nested), buckets', function (t) {
t.plan(1)
var instance = bloomrun()
instance.add({ nested: { another: 'value' } }, 1)
instance.add({ nested: { heavier: 'here', another: 'value' } }, 2)
instance.add({ nested: { another: 'value' } }, 3)
instance.add({ plain: 'there' }, 4)
instance.add({ nested: { heavier: 'here' }, }, 5)
var pattern = { nested: { another: 'value', heavier: 'here' }, plain: 'there' }
t.deepEqual(instance.list(pattern), [1, 2, 3, 4, 5])
})
test('preserve depth and then insertion order (plain), buckets', function (t) {
t.plan(1)
var instance = bloomrun({ indexing: 'depth' })
instance.add({ friend: 'yes' }, 1)
instance.add({ some: 'action', hello: 'there' }, 2)
instance.add({ hello: 'there', some: 'action', friend: 'yes' }, 3)
instance.add({ other: 'field' }, 4)
instance.add({ some: 'action' }, 5)
var pattern = { some: 'action', hello: 'there', friend: 'yes', other: 'field' }
t.deepEqual(instance.list(pattern), [3, 2, 1, 4, 5])
})
test('preserve depth and then insertion order (nested), buckets', function (t) {
t.plan(1)
var instance = bloomrun({ indexing: 'depth' })
instance.add({ nested: { another: 'value' } }, 1)
instance.add({ nested: { another: 'value', heavier: 'here' } }, 2)
instance.add({ nested: { heavier: 'here' } }, 3)
instance.add({ nested: { also: 'there' } }, 4)
instance.add({ nested: { heavier: 'here' } }, 5)
var pattern = { nested: { another: 'value', heavier: 'here', also: 'there' } }
t.deepEqual(instance.list(pattern), [2, 1, 3, 4, 5])
})
The first set of tests will pass in v2 and mostly v3. The second set fails in both versions.
@gabssnake the problem you are trying to solve is how to handle overlaps between buckets. When using the indexing depth, buckets are ordered, and then the objects within the buckets are matched (in order).
When using depth indexing, order can only guaranteed within the same bucket. It is possible that some object would match generally before another, but the overall bucket ordering prevent that. Of course, any solution that address that problem is very welcomed (even a clarification on the README).
It is indeed subtle! For now, I would like to tackle only the first set of tests for this PR (functionality equivalent to v2).
Does that sound reasonable @mcdonnelldean @mcollina ?
@gabssnake 👍 , go ahead.
Closing for lack of activity, feel free to reopen!
Covers the following case, which used to work in v2 but fails in v3.