klaudiosinani / dsforest

Disjoint-set forests for ES6
MIT License
16 stars 0 forks source link

Merging complex shapes #21

Open tommedema opened 5 years ago

tommedema commented 5 years ago

In my case I have complex shapes like so:

const a = { names: ['Tom, 'Peter'], deviceIds: ['A', 'B'], emails: ['one@gmail.com'] }

const b = { names: [], deviceIds: ['A', 'C'], emails: ['two@gmail.com'] }

If I then do union(a, b) I would expect a result like:

const rootNode = { names: ['Tom', 'Peter'], deviceIds: ['A', 'B', 'C'], emails: 'one@gmail.com, two@gmail.com'] }

i.e. the shapes are merged with duplicates filtered out. How would I achieve this with dsforest?

tommedema commented 5 years ago

I've since implemented this myself. Let me know if this is solid / useful:

  get uniqueForestNodes(): T[] {
    return Object.keys(this.size).reduce((acc, key) => {
      const entry = this.parent[key]
      if (entry !== undefined) {
        acc.push(entry)
      }

      return acc
    }, [] as T[])
  }

  /**
   * Find the first matching value in the set. Useful to union (group)
   * together complex shapes based on a comparison function.
   */
  public scanMatch(
    value: T,
    matchFn: (comparisonNode: T, originalNode: T) => boolean
  ): T | undefined {
    const thisId = this.idAccessorFn(value);
    const matchNode = this.uniqueForestNodes
      .find(
        n => {
          const nodeId = this.idAccessorFn(n)
          const result = nodeId !== thisId && matchFn(n, value)

          return result
        }
      )

    if (matchNode !== undefined) {
      const matchId = this.idAccessorFn(matchNode)

      const entry = this.parent[matchId]
      if (entry !== value && entry !== undefined) {
        this.parent[matchId] = this._findSet(entry);
      }

      return this.parent[matchId];
    }

    return undefined
  }

  public unionMergeUnique(
    x: T,
    y: T,
    mergeFn: (mergeFrom: T, mergeTo: T) => T
  ): this {
    if (this.includes(x) && this.includes(y)) {
      let xRep = this._findSet(x);
      let yRep = this._findSet(y);

      if (xRep !== yRep && xRep !== undefined && yRep !== undefined) {
        let xRepId = this.idAccessorFn(xRep);
        let yRepId = this.idAccessorFn(yRep);
        const rankDiff = this.rank[xRepId] - this.rank[yRepId];

        if (rankDiff === 0) {
          this.rank[xRepId] += 1;
        } else if (rankDiff < 0) {
          [xRep, yRep] = [yRep, xRep];
          [xRepId, yRepId] = [yRepId, xRepId];
        }

        this.parent[yRepId] = mergeFn(yRep, xRep);
        this.size[xRepId] += this.size[yRepId];
        delete this.size[yRepId];
        this.sets -= 1;
      }
    }

    return this;
  }

Usage:

export const groupRootUsers = (
  rootUsers: IUser[]
): IUser[] => {
  // Define a new ID for each root user
  const rootUsersWithIds = rootUsers.map(u => ({...u, id: uuid()}))

  // Prepare a disjoint set
  const set = new DisjointSet<IIdUser>(u => u.id)

  // Create a new root for each user
  rootUsersWithIds.forEach(u => set.makeSet(u))

  // Group users with identical email or device ID
  const nodes = set.forestNodes
  for (const rootUser of nodes) {
    const match = set.scanMatch(
      rootUser,
      (comparisonNode) => {
        for (const email of rootUser.emails) {
          if (comparisonNode.emails.includes(email)) {
            return true
          }
        }

        for (const deviceId of rootUser.deviceIds) {
          if (comparisonNode.deviceIds.includes(deviceId)) {
            return true
          }
        }

        return false
      }
    )

    if (match !== undefined) {
      set.unionMergeUnique(
        rootUser,
        match,
        (mergeFrom: IIdUser, mergeTo: IIdUser) => {
          const mergeKeys: Array<keyof Omit<IIdUser, 'id'>> = ['names', 'emails', 'deviceIds', 'uniqueCommentDates']
          for (const mergeKey of mergeKeys) {
            for (const value of mergeFrom[mergeKey]) {
              if (!mergeTo[mergeKey].includes(value)) {
                mergeTo[mergeKey].push(value)
              }
            }
          }

          return mergeTo
        }
      )
    }
  }

  return set.uniqueForestNodes
}