test: patterns with nested objects are supported

gabssnake commented 7 years ago

Covers the following case, which used to work in v2 but fails in v3.

test('patterns with nested objects are supported', function (t) {
  t.plan(3)

  var instance = bloomrun()
  var pattern = { group: '123', something: { another: 'value' } }
  instance.add(pattern, 'found me!')

  t.deepEqual(instance.lookup(pattern), 'found me!')
  t.deepEqual(instance.list(pattern), ['found me!'])

  instance.remove(pattern, 'found me!')
  t.deepEqual(instance.lookup(pattern), null)
})

gabssnake commented 7 years ago

Had to rebase to include e4a1020

mcollina commented 7 years ago

So, this was an executive decision in https://github.com/mcollina/bloomrun/pull/45, to allow a huge speed up.

gabssnake commented 7 years ago

Are you open to having the feature if perf is maintained? I have an application that currently depends on it. Maybe it can be limited to one level deep for saneness, if that is an issue.

mcollina commented 7 years ago

@gabssnake yes.

gabssnake commented 7 years ago

Ok, I’ll give it a go.

Is an opt-in flag an acceptable option?

Maybe bloomrun({ indexing: 'depth', recurse: true }).

gabssnake commented 7 years ago

Good news! It looks like there is no negative impact in performance. We might even gain a few microseconds with this PR! No flag is needed.

Data was taken on a 2.2 Ghz Intel i7 with 8GB 1600 Mhz DDR3. See source code for raw data generation.

Two implementations are compared:

baseline which is the current master branch
experimental which is master plus this PR, which provides deep lookup

Two scenarios are tested:

3-entries: list method on a bloomrun set with 3 entries
400-entries: list method on a bloomrun set with 400 entries

In the graph below:

on y axis, probability of lookup time being inferior to the time seen in the x axis
on x axis, the lookup time is in microseconds

Shoutout to my colleague and friend Guillaume Artero, who did this awesome ECDF visualisation and provided valuable insight into its meaning.

mcollina commented 7 years ago

Wow, good work!

@mcdonnelldean what do you think?

mcdonnelldean commented 7 years ago

@gabssnake epic! @mcollina Yup I'm happy to have the feature back as long as perf is maintained

gabssnake commented 7 years ago

@mcdonnelldean as you anticipated, there seems to be some cases that will not pass. I’ll see what’s going on.

gabssnake commented 7 years ago

Been trying to understand what should be the expected behaviour. Would you agree that the following seems reasonable?

test('preserve insertion order regardless of depth (plain)', function (t) {
  t.plan(1)
  var instance = bloomrun()

  instance.add({ some: 'action' }, 1)
  instance.add({ some: 'action', hello: 'there' }, 2)
  instance.add({ some: 'action', hello: 'there', friend: 'yes' }, 3)
  instance.add({ some: 'action' }, 4)
  instance.add({ some: 'action' }, 5)

  var pattern = { some: 'action', hello: 'there', friend: 'yes', other: 'field' }
  t.deepEqual(instance.list(pattern), [1, 2, 3, 4, 5])
})

test('preserve insertion order regardless of depth (nested)', function (t) {
  t.plan(1)
  var instance = bloomrun()

  instance.add({ nested: { another: 'value' } }, 1)
  instance.add({ nested: { another: 'value', heavier: 'here' } }, 2)
  instance.add({ nested: { another: 'value' } }, 3)
  instance.add({ nested: { heavier: 'here' } }, 4)
  instance.add({ nested: { heavier: 'here' } }, 5)

  var pattern = { nested: { another: 'value', heavier: 'here' } }
  t.deepEqual(instance.list(pattern), [1, 2, 3, 4, 5])
})

test('preserve depth and then insertion order (plain)', function (t) {
  t.plan(1)
  var instance = bloomrun({ indexing: 'depth' })

  instance.add({ some: 'action' }, 1)
  instance.add({ some: 'action', hello: 'there' }, 2)
  instance.add({ some: 'action', hello: 'there', friend: 'yes' }, 3)
  instance.add({ some: 'action' }, 4)
  instance.add({ some: 'action' }, 5)

  var pattern = { some: 'action', hello: 'there', friend: 'yes', other: 'field' }
  t.deepEqual(instance.list(pattern), [3, 2, 1, 4, 5])
})

test('preserve depth and then insertion order (nested)', function (t) {
  t.plan(1)
  var instance = bloomrun({ indexing: 'depth' })

  instance.add({ nested: { another: 'value' } }, 1)
  instance.add({ nested: { another: 'value', heavier: 'here' } }, 2)
  instance.add({ nested: { another: 'value' } }, 3)
  instance.add({ nested: { heavier: 'here' } }, 4)
  instance.add({ nested: { heavier: 'here' } }, 5)

  var pattern = { nested: { another: 'value', heavier: 'here' } }
  t.deepEqual(instance.list(pattern), [2, 1, 3, 4, 5])
})

Then it must follow that this is also true:

test('preserve insertion order regardless of depth (plain), buckets', function (t) {
  t.plan(1)
  var instance = bloomrun()

  instance.add({ friend: 'yes' }, 1)
  instance.add({ some: 'action', hello: 'there' }, 2)
  instance.add({ hello: 'there', some: 'action', friend: 'yes' }, 3)
  instance.add({ hello: 'there' }, 4)
  instance.add({ some: 'action' }, 5)

  var pattern = { some: 'action', hello: 'there', friend: 'yes', other: 'field' }
  t.deepEqual(instance.list(pattern), [1, 2, 3, 4, 5])
})

test('preserve insertion order regardless of depth (nested), buckets', function (t) {
  t.plan(1)
  var instance = bloomrun()

  instance.add({ nested: { another: 'value' } }, 1)
  instance.add({ nested: { heavier: 'here', another: 'value' } }, 2)
  instance.add({ nested: { another: 'value' } }, 3)
  instance.add({ plain: 'there' }, 4)
  instance.add({ nested: { heavier: 'here' }, }, 5)

  var pattern = { nested: { another: 'value', heavier: 'here' }, plain: 'there' }
  t.deepEqual(instance.list(pattern), [1, 2, 3, 4, 5])
})

test('preserve depth and then insertion order (plain), buckets', function (t) {
  t.plan(1)
  var instance = bloomrun({ indexing: 'depth' })

  instance.add({ friend: 'yes' }, 1)
  instance.add({ some: 'action', hello: 'there' }, 2)
  instance.add({ hello: 'there', some: 'action', friend: 'yes' }, 3)
  instance.add({ other: 'field' }, 4)
  instance.add({ some: 'action' }, 5)

  var pattern = { some: 'action', hello: 'there', friend: 'yes', other: 'field' }
  t.deepEqual(instance.list(pattern), [3, 2, 1, 4, 5])
})

test('preserve depth and then insertion order (nested), buckets', function (t) {
  t.plan(1)
  var instance = bloomrun({ indexing: 'depth' })

  instance.add({ nested: { another: 'value' } }, 1)
  instance.add({ nested: { another: 'value', heavier: 'here' } }, 2)
  instance.add({ nested: { heavier: 'here' } }, 3)
  instance.add({ nested: { also: 'there' } }, 4)
  instance.add({ nested: { heavier: 'here' } }, 5)

  var pattern = { nested: { another: 'value', heavier: 'here', also: 'there' } }
  t.deepEqual(instance.list(pattern), [2, 1, 3, 4, 5])
})

The first set of tests will pass in v2 and mostly v3. The second set fails in both versions.

mcollina commented 7 years ago

@gabssnake the problem you are trying to solve is how to handle overlaps between buckets. When using the indexing depth, buckets are ordered, and then the objects within the buckets are matched (in order).

When using depth indexing, order can only guaranteed within the same bucket. It is possible that some object would match generally before another, but the overall bucket ordering prevent that. Of course, any solution that address that problem is very welcomed (even a clarification on the README).

gabssnake commented 7 years ago

It is indeed subtle! For now, I would like to tackle only the first set of tests for this PR (functionality equivalent to v2).

Does that sound reasonable @mcdonnelldean @mcollina ?

mcollina commented 7 years ago

@gabssnake 👍 , go ahead.

mcollina commented 6 years ago

Closing for lack of activity, feel free to reopen!

mcollina / bloomrun

test: patterns with nested objects are supported #52