Swatinem / diff

implementation of myers diff algorithm
82 stars 9 forks source link

Any plan for an ES+ refresh? #1

Closed WebReflection closed 4 years ago

WebReflection commented 4 years ago

I am trying to use the smallest, most efficient, algorithm for µdomdiff, and while in the original domdiff I also implemented E. Meyers diffing, it bails out quickly but I've found your implementation super interesting and concise. (also hat tip for the 100% code coverage, there are not so many that do that 👍).

That being said, this module has no updates whatsoever since 6 years ago, so I went ahead and created a slightly more performant version, that also minifies better than current one.

Please have a look:

const {from} = Array;
const {create} = Object;

export const DELETE  = 'del';
export const INSERT  = 'ins';
export const NOOP    = 'nop';
export const REPLACE = 'rep';

export const diff = (a, b, eql = (a, b) => (a === b)) => {
  const lcs = (astart, aend, bstart, bend) => {
    while (astart < aend && bstart < bend && eql(a[astart], b[bstart])) {
      astart++;
      bstart++;
    }
    while (astart < aend && bstart < bend && eql(a[aend - 1], b[bend - 1])) {
      aend--;
      bend--;
    }
    if (astart === aend) {
      while (bstart < bend)
        modb[bstart++] = true;
    }
    else if (bend === bstart) {
      while (astart < aend)
        moda[astart++] = true;
    }
    else {
      const {x, y, u, v} = snake(astart, aend, bstart, bend);
      lcs(astart, x, bstart, y);
      lcs(u, aend, v, bend);
    }
  };
  const snake = (astart, aend, bstart, bend) => {
    const N = aend - astart;
    const M = bend - bstart;
    const kdn = astart - bstart;
    const kup = aend - bend;
    const delta = N - M;
    const deltaOdd = delta & 1;
    const Dmax = (N + M + 1) / 2;
    down[kdn + 1] = astart;
    up[kup - 1] = aend;
    for (let D = 0; D <= Dmax; D++) {
      let k, x, y;
      for (k = kdn - D; k <= kdn + D; k += 2) {
        if (k === kdn - D)
          x = down[k + 1];
        else {
          x = down[k - 1] + 1;
          if ((k < kdn + D) && (down[k + 1] >= x))
            x = down[k + 1];
        }
        y = x - k;
        while (x < aend && y < bend && eql(a[x], b[y])) {
          x++;
          y++;
        }
        down[k] = x;
        if (deltaOdd && (kup - D < k) && (k < kup + D) && up[k] <= down[k])
          return {
            x: down[k],
            y: down[k] - k,
            u: up[k],
            v: up[k] - k,
          };
      }
      for (k = kup - D; k <= kup + D; k += 2) {
        if (k === kup + D)
          x = up[k - 1];
        else {
          x = up[k + 1] - 1;
          if ((k > kup - D) && (up[k - 1] < x))
            x = up[k - 1];
        }
        y = x - k;
        while (x > astart && y > bstart && eql(a[x - 1], b[y - 1])) {
          x--;
          y--;
        }
        up[k] = x;
        if (!deltaOdd && (kdn - D <= k) && (k <= kdn + D) && up[k] <= down[k])
          return {
            x: down[k],
            y: down[k] - k,
            u: up[k],
            v: up[k] - k,
          };
      }
    }
  };
  const aend = a.length;
  const bend = b.length;
  const moda = from(Array(aend), () => false);
  const modb = from(Array(bend), () => false);
  const down = create(null);
  const up = create(null);
  const result = [];
  let astart = 0;
  let bstart = 0;
  lcs(0, aend, 0, bend);
  while (astart < aend || bstart < bend) {
    if (astart < aend && bstart < bend) {
      if (!moda[astart] && !modb[bstart]) {
        result.push(NOOP);
        astart++;
        bstart++;
        continue;
      }
      else if (moda[astart] && modb[bstart]) {
        result.push(REPLACE);
        astart++;
        bstart++;
        continue;
      }
    }
    if (astart < aend && (bstart >= bend || moda[astart])) {
      result.push(DELETE);
      astart++;
    }
    if (bstart < bend && (astart >= aend || modb[bstart])) {
      result.push(INSERT);
      bstart++;
    }
  }
  return result;
};

If you don't care/have plans to update this module as is, would it be OK if I publish it either with a major update of your current name, which is quite the best name for this kind of module, or if I publish it with a different name, maybe using the same license and attributions you have here, even if my stuff is usually MIT/ISC?

Thanks in advance for any sort of outcome 👋

Swatinem commented 4 years ago

update of your current name, which is quite the best name for this kind of module

hm… even though it has name: "diff", this actually was never published to npm, so the package named diff on npm is actually a different one :-D

In either case, I don’t really much care what happens with this package, as I haven’t touched it in 6 years apparently.

WebReflection commented 4 years ago

So how do I eventually re-publish this?

Thanks in advance for suggestions.

... and btw, I've made other few changes, my latest status is this one, if interested (TL;DR using Int8Array to avoid the fancy Array dance + dropping eql as I've never seen it used in a meaningful way anywhere ... worth saying I'm trying to reduce the size as much as I can, but apparently this is the best I can get):

export const DELETE  = 'del';
export const INSERT  = 'ins';
export const NOOP    = 'nop';
export const REPLACE = 'rep';

export const diff = (a, b) => {
  const lcs = (astart, aend, bstart, bend) => {
    while (astart < aend && bstart < bend && (a[astart] === b[bstart])) {
      astart++;
      bstart++;
    }
    while (astart < aend && bstart < bend && (a[aend - 1] === b[bend - 1])) {
      aend--;
      bend--;
    }
    if (astart === aend) {
      while (bstart < bend)
        modb[bstart++] = 1;
    }
    else if (bend === bstart) {
      while (astart < aend)
        moda[astart++] = 1;
    }
    else {
      const path = snake(astart, aend, bstart, bend);
      lcs(astart, path[0], bstart, path[1]);
      lcs(path[2], aend, path[3], bend);
    }
  };
  const snake = (astart, aend, bstart, bend) => {
    const N = aend - astart;
    const M = bend - bstart;
    const kdn = astart - bstart;
    const kup = aend - bend;
    const delta = N - M;
    const deltaOdd = delta & 1;
    const Dmax = (N + M + 1) / 2;
    down[kdn + 1] = astart;
    up[kup - 1] = aend;
    for (let D = 0; D <= Dmax; D++) {
      let k = 0, x = 0, y = 0;
      for (k = kdn - D; k <= kdn + D; k += 2) {
        if (k === kdn - D)
          x = down[k + 1];
        else {
          x = down[k - 1] + 1;
          if ((k < kdn + D) && (x <= down[k + 1]))
            x = down[k + 1];
        }
        y = x - k;
        while (x < aend && y < bend && (a[x] === b[y])) {
          x++;
          y++;
        }
        down[k] = x;
        if (deltaOdd && (kup - D < k) && (k < kup + D) && up[k] <= down[k])
          return [down[k], down[k] - k, up[k], up[k] - k];
      }
      for (k = kup - D; k <= kup + D; k += 2) {
        if (k === kup + D)
          x = up[k - 1];
        else {
          x = up[k + 1] - 1;
          if ((k > kup - D) && (up[k - 1] < x))
            x = up[k - 1];
        }
        y = x - k;
        while (x > astart && y > bstart && (a[x - 1] === b[y - 1])) {
          x--;
          y--;
        }
        up[k] = x;
        if (!deltaOdd && (kdn - D <= k) && (k <= kdn + D) && up[k] <= down[k])
          return [down[k], down[k] - k, up[k], up[k] - k];
      }
    }
  };
  const aend = a.length;
  const bend = b.length;
  const moda = new Int8Array(aend);
  const modb = new Int8Array(bend);
  const down = {};
  const up = {};
  const result = [];
  let astart = 0;
  let bstart = 0;
  lcs(0, aend, 0, bend);
  while (astart < aend || bstart < bend) {
    if (astart < aend && bstart < bend) {
      if (!moda[astart] && !modb[bstart]) {
        result.push(NOOP);
        astart++;
        bstart++;
        continue;
      }
      else if (moda[astart] && modb[bstart]) {
        result.push(REPLACE);
        astart++;
        bstart++;
        continue;
      }
    }
    if (astart < aend && (bend <= bstart || moda[astart])) {
      result.push(DELETE);
      astart++;
    }
    if (bstart < bend && (aend <= astart || modb[bstart])) {
      result.push(INSERT);
      bstart++;
    }
  }
  return result;
};
Swatinem commented 4 years ago

Maybe you misunderstood. I don’t own the diff name on npm. So you can publish under whatever name you like. Maybe just link to my repo. I mostly just copy-pasted this from somewhere else as well, so I don’t think it matters that much.

WebReflection commented 4 years ago

Thank you!