uhop / node-re2

node.js bindings for RE2: fast, safe alternative to backtracking regular expression engines.
Other
479 stars 53 forks source link

Native Regex and RE2 capture group indices inconsistencies #170

Closed teebu closed 1 year ago

teebu commented 1 year ago

RE2 version: 1.19.0

There is an inconsistency between RE2 and native regex with group indices. Second match start/end index location is wrong.

Looks like something is wrong with the offset calculation.

let m;
const regex_test = /(foo*)/dgm;
const str = "foo fooooo";

regex_test.lastIndex = 0; // reset pointer before the loop
while ((m = regex_test.exec(str)) !== null) {
    console.log('regex_test.lastIndex:', regex_test.lastIndex)
    if (m.index === 0 && regex_test.lastIndex === 0) break;
    console.log(m);
}

// RE2
let re2_regex = new RE2(regex_test);
console.log('RE2')
re2_regex.lastIndex = 0; // reset pointer before the loop
while ((m = re2_regex.exec(str)) !== null) {
    console.log('re2_regex.lastIndex:', re2_regex.lastIndex)
    if (m.index === 0 && re2_regex.lastIndex === 0) break;
    console.log(m);
}

Result:

regex_test.lastIndex: 3
[
  'foo',
  'foo',
  index: 0,
  input: 'foo fooooo',
  groups: undefined,
  indices: [ [ 0, 3 ], [ 0, 3 ], groups: undefined ]
]
regex_test.lastIndex: 10
[
  'fooooo',
  'fooooo',
  index: 4,
  input: 'foo fooooo',
  groups: undefined,
  indices: [ [ 4, 10 ], [ 4, 10 ], groups: undefined ]
]

RE2
re2_regex.lastIndex: 3
[
  'foo',
  'foo',
  index: 0,
  input: 'foo fooooo',
  groups: undefined,
  indices: [ [ 0, 3 ], [ 0, 3 ], groups: undefined ]
]
re2_regex.lastIndex: 10
[
  'fooooo',
  'fooooo',
  index: 4,
  input: 'foo fooooo',
  groups: undefined,
  indices: [ [ 1, 7 ], [ 1, 7 ], groups: undefined ]
]