no-context / moo

Optimised tokenizer/lexer generator! 🐄 Uses /y for performance. Moo.
BSD 3-Clause "New" or "Revised" License
821 stars 65 forks source link

has() returns false for keyword tokens that aren't included in lexer.groups #50

Closed deltaidea closed 7 years ago

deltaidea commented 7 years ago
const lexer = moo.compile({
  keyword: 'foo',
  identifier: /[a-z]+/
})
> lexer.has('keyword')
  false
> lexer.has('identifier')
  true

You haven't pushed the latest code to GitHub (npm is ahead of master). Here's a solution:

  Lexer.prototype.has = function(tokenType) {
    var stateKeys = Object.getOwnPropertyNames(this.states)
    for (var i = stateKeys.length; i--; ) {
      var state = this.states[stateKeys[i]];
      for (var j = state.groups.length; j--; ) {
        var group = state.groups[j]
        if (group.tokenType === tokenType) return true
        if (group.keywords) {
          var keywordKeys = Object.getOwnPropertyNames(group.keywords)
          for (var k = keywordKeys.length; k--; ) {
            var keyword = group.keywords[keywordKeys[k]]
            if (keyword.tokenType === tokenType) return true
          }
        }
      }
    }
    return false
  }

Tests

describe('has', () => {

  test('works with multiple states', () => {
    // Example from the readme.
    let lexer = moo.states({
      main: {
        strstart: {match: '`', push: 'lit'},
        ident:    /\w+/,
        lbrace:   {match: '{', push: 'main'},
        rbrace:   {match: '}', pop: 1},
        colon:    ':',
        space:    {match: /\s+/, lineBreaks: true},
      },
      lit: {
        interp:   {match: '${', push: 'main'},
        escape:   /\\./,
        strend:   {match: '`', pop: 1},
        const:    {match: /(?:[^$`]|\$(?!\{))+/, lineBreaks: true},
      },
    })
    expect(lexer.has('strstart')).toEqual(true)
    expect(lexer.has('ident')).toEqual(true)
    expect(lexer.has('interp')).toEqual(true)
    expect(lexer.has('main')).toEqual(false)
  })

  test('works with keywords', () => {
    let lexer = moo.compile({
      keyword: 'foo',
      identifier: /[a-z]+/
    })
    expect(lexer.has('keyword')).toEqual(true)
    expect(lexer.has('identifier')).toEqual(true)
    expect(lexer.has('')).toEqual(false)
    expect(lexer.has('foo')).toEqual(false)
  })

})
deltaidea commented 7 years ago

This would also close #47.

tjvr commented 7 years ago

You haven't pushed the latest code to GitHub (npm is ahead of master)

Oops, thanks. Fixed!

tjvr commented 7 years ago

Your solution looks to be along the right lines; please could you send a PR? Thanks! :-)

deltaidea commented 7 years ago

Sure! Which implementation style would you prefer: the one above or this:

  Lexer.prototype.has = function(tokenType) {
    // Both lexer.states and group.keywords are created with no prototype
    // so it's safe to iterate without the `hasOwnProperty` check.
    for (var s in this.states) {
      for (var g = this.states[s].groups.length; g--; ) {
        if (this.states[s].groups[g].tokenType === tokenType) return true
        if (this.states[s].groups[g].keywords) {
          for (var k in this.states[s].groups[g].keywords ) {
            if (this.states[s].groups[g].keywords[k].tokenType === tokenType) return true
          }
        }
      }
    }
    return false
  }

It's less verbose but with more property accesses.

tjvr commented 7 years ago

I think it should be alright to use in here (we're not particularly concerned about perf, since one should only call has at compile-time); so I think I'd prefer the second one, since that seems easier to read—but without the verbosity/repetition :-)

deltaidea commented 7 years ago

Got it! Did it even better (#51).