zaach / jison

Bison in JavaScript.
http://jison.org
4.36k stars 450 forks source link

C Declarations Section #188

Closed matthewkastor closed 11 years ago

matthewkastor commented 11 years ago

Does Jison support the Declarations Section?

From the Bison docs:

The C declarations section contains macro definitions and declarations of functions and variables that are used in the actions in the grammar rules. These are copied to the beginning of the parser file so that they precede the definition of yyparse. You can use #include to get the declarations from a header file. If you don't need any C declarations, you may omit the %{ and %} delimiters that bracket this section.

I want to have some custom functions built into the parser so I can use them in the actions of grammar rules. I haven't been able to get it to work though.

With the following three files in the same directory, run node build.pseudocode.js

pseudocode.jison

/* lexical grammar */
%lex
%%

\s+                   /* skip whitespace */
[0-9]+("."[0-9]+)?\b  return 'NUMBER'
"with"                return 'WITH'
<<EOF>>               return 'EOF'
.                     return 'INVALID'

/lex
%{
    function custom (param) {
        return param * 2;
    }
%}
%% /* language grammar */

program
    : expressions EOF
        {
            typeof console !== 'undefined' ? console.log($1) : print($1);
            return $1;
        }
    ;

expressions
    : e
    | expressions e 
        {
            $$ = $1 + ' ' + $2;
        }
    ;

e
    : NUMBER
        {
            $$ = custom(parseInt(yytext, 10));
        }
    | WITH
        {
            $$ = yytext;
        }
    ;
%%

pseudocode.example

with 2500
2000 with

build.pseudocode.js

var fs = require('fs');
var jison = require('jison');

var grammar = fs.readFileSync('./pseudocode.jison', 'utf8');
var parser = new jison.Parser(grammar);

// I suppose I could do `parser.generate() + function custom () {}.toString()`
// but that doesn't seem like a good idea either.
fs.writeFileSync('pseudocode.js', parser.generate(), 'utf8');

// I could do global.custom = function custom ...
// but I don't want to do that.
parser.parse(
    fs.readFileSync('./pseudocode.example', 'utf8')
);

I get the following error saying that my custom function is not defined, and I can't find my function anywhere in the generated parser.

C:\directory>node build.pseudocode.js

undefined:16
            this.$ = custom(parseInt(yytext, 10));
                     ^
ReferenceError: custom is not defined
    at Object.eval (eval at buildProductions (C:\directory\node_modules\jison\lib\jison.js:250:30))
    at typal_constructor.parse (C:\directory\node_modules\jison\lib\jison.js:1387:40)
    at Object.<anonymous> (C:\directory\build.pseudocode.js:10:8)
    at Module._compile (module.js:449:26)
    at Object.Module._extensions..js (module.js:467:10)
    at Module.load (module.js:356:32)
    at Function.Module._load (module.js:312:12)
    at Module.runMain (module.js:492:10)
    at process.startup.processNextTick.process._tickCallback (node.js:244:9)
GerHobbelt commented 11 years ago

Since JISON generates JavaScript it works a little differently, but the %{ ... %} plus everything-after-the-last-%% concepts from lex/flex + yacc/bison are available in JISON: see gist https://gist.github.com/GerHobbelt/6400988 where I show a clipped&stripped grammar file where these bits are used/shown, among a few other things that are 'good to know' when you work with JISON. Read the code in that grammar file in the gist and you should be able to copy & paste the console.log()s and maybe a few other bits and see them appear in your generated JavaScript too.

Met vriendelijke groeten / Best regards,

Ger Hobbelt


web: http://www.hobbelt.com/ http://www.hebbut.net/ mail: ger@hobbelt.com

mobile: +31-6-11 120 978

On Sat, Aug 31, 2013 at 9:15 PM, Matthew Kastor notifications@github.comwrote:

Does Jison support the Declarations Sectionhttp://dinosaur.compilertools.net/bison/bison_6.html#SEC35 ?

From the Bison docs:

The C declarations section contains macro definitions and declarations of functions and variables that are used in the actions in the grammar rules. These are copied to the beginning of the parser file so that they precede the definition of yyparse. You can use #include to get the declarations from a header file. If you don't need any C declarations, you may omit the %{ and %} delimiters that bracket this section.

I want to have some custom functions built into the parser so I can use them in the actions of grammar rules. I haven't been able to get it to work though.

With the following three files in the same directory, run node build.pseudocode.js

pseudocode.jison

/* lexical grammar */ %lex %%

\s+ /* skip whitespace */ [0-9]+("."[0-9]+)?\b return 'NUMBER' "with" return 'WITH' <> return 'EOF' . return 'INVALID'

/lex %{ function custom (param) { return param * 2; } %} %% /* language grammar */

program : expressions EOF { typeof console !== 'undefined' ? console.log($1) : print($1); return $1; } ;

expressions : e | expressions e { $$ = $1 + ' ' + $2; } ;

e : NUMBER { $$ = custom(parseInt(yytext, 10)); } | WITH { $$ = yytext; } ; %%

pseudocode.example

with 2500 2000 with

build.pseudocode.js

var fs = require('fs'); var jison = require('jison');

var grammar = fs.readFileSync('./pseudocode.jison', 'utf8'); var parser = new jison.Parser(grammar);

fs.writeFileSync('pseudocode.js', parser.generate(), 'utf8');

parser.parse( fs.readFileSync('./pseudocode.example', 'utf8') );

I get the following error saying that my custom function is not defined, and I can't find my function anywhere in the generated parser.

C:\directory>node build.pseudocode.js

undefined:16 this.$ = custom(parseInt(yytext, 10)); ^ ReferenceError: custom is not defined at Object.eval (eval at buildProductions (C:\directory\node_modules\jison\lib\jison.js:250:30)) at typal_constructor.parse (C:\directory\node_modules\jison\lib\jison.js:1387:40) at Object. (C:\directory\build.pseudocode.js:10:8) at Module._compile (module.js:449:26) at Object.Module._extensions..js (module.js:467:10) at Module.load (module.js:356:32) at Function.Module._load (module.js:312:12) at Module.runMain (module.js:492:10) at process.startup.processNextTick.process._tickCallback (node.js:244:9)

— Reply to this email directly or view it on GitHubhttps://github.com/zaach/jison/issues/188 .

zaach commented 11 years ago

Jison supports them but not with a parser that's created dynamically, since the module (with the extra code sections) is never generated. I suppose as a workaround, you could try var parser = eval(new jison.Parser(grammar).generate()).

Is there a reason you aren't generating the parser and writing it to a file, rather than regenerating it from the grammar each time?

matthewkastor commented 11 years ago

Thanks for the gist @GerHobbelt It's huge but it looks like a good reference document.

@zaach I tried the eval trick but it didn't work. I was generate-ing the parser and writing it to a file but it looks like I have to use the cli to get it to write the extra code sections or maybe there's some option to the generate method I need to supply? I don't know. I just ran jison pseudocode.jison -o pseudocode.js and saw my custom functions show up in the output this time. I'm looking at https://github.com/zaach/jison/blob/master/lib/cli.js#L75 and seeing that there's no way for me to get there from scripts and have it do what I want. Maybe having main accept an options object and then setting opts = options || opts would let me require cli.js and generate a parser file.

I'm just thinking that eventually I'll be trying to set up a jake task and I'll have to kludge in a child process to fire up the cli which will immediately start node and I'll have to hook up event listeners and pigeons to everything just to get the file to come out right and manage some sort of control over the build.

I'll tinker with it later and send a pull request if you would consider letting people access main in the cli script or maybe move it all into jison so cli is just a require and a bit of scripting to package the command line args into an options argument. That way we'd be able to generate the files in scripts and from command lines.

matthewkastor commented 11 years ago

I just tried pseudocode.jison in the web demo. It says that the parser was generated successfully but when I try to parse pseudocode.example it tells me that custom is not defined. Since generating the parser on the command line simply copies the C Declarations and the Additional Code sections into the middle of the anonymous function returning the new Parser, it will work as expected. Though, I suspect the web demo is using the generate function that I used above instead of the generator from cli.js and ends up with a parser which is missing the C Declarations and Additional Code. Given that both sections are copied into a function which is immediately executed, they'll be effective if copied to the in memory parser and the parser will work as intended. Without copying them in, the parser will fail in any parser generated on the fly which uses either of these sections to define actions and variables crucial to parsing whatever they're working on.

I know things are different in C, but the description given for the locations of the sections of user defined code appear to be just prior to and after return new Parser; in the generated parser. I know that hoisting in javascript will reorder things so that any var or function in either section will in effect be declared before the initialization code at the bottom of this anonymous function. It's just the case where someone writes code to execute immediately in either of these sections that would get messed up. It seems like it's supposed to go C Declarations output = new Parser Additional Code return output with output being some meaningful name to access yyparse through. That way they can set up an environment prior to parser instantiation that both the constructor function and the prototype can see, and then just before the parser is returned they have another chance to augment that environment in any way necessary, including doing weird things to the parser to get their grammar files to work.

I'm thinking about this too much probably. I can see keeping the file generation functions in cli.js, somehow getting the parser generator to produce the source code as the cli does when calling generate, eliminating the parser generation functions from the cli and simply calling them from the main module so there's only one place to manage code generation from, then tweaking the template to get the C Declarations and Additional Code into the right spots.

I will think about it some more. I've probably missed something important.

zaach commented 11 years ago

Using main from the cli shouldn't be necessary. Just use the Generator constructor like the cli does instead of Parser. Most likely, there's a bug with the latter.

var generator = new jison.Generator(grammar, settings);
generator.generate(settings);

I'll look into the bug.

matthewkastor commented 11 years ago

@zaach Thanks for fixing it up. The Generator and Parser seem to be doing the same thing now when calling their generate methods. The generator and parser both appear to be including the code unless the moduleType is set to 'amd'.

Here is a little test using the pseudocode.jison file from above.

/**
 * Generates the parser described in `grammarFile` and searches it for
 *  the provided regex.
 * @param {String} modType The type of module to generate.
 * @param {String} grammarFile The path to the grammar file.
 * @param {Boolean} useGenerator Set true to use jison.Generator, otherwise
 *  jison.Parser will be used.
 * @param {RegExp} regex a regular expression to search for.
 * @returns {Boolean} Returns true or false depending on whether or not the
 *  regex was found.
 */
function generatePseudocodeParser (modType, grammarFile, useGenerator, regex) {
    "use strict";
    var fs = require('fs');
    var jison = require('jison');

    var pseudocodeFileContents;

    var grammarFileContent = fs.readFileSync(grammarFile, 'utf8');

    var generatorOptions = {
        type : 'lalr',
        debug: false,
        moduleType: modType || 'commonjs',
        moduleName: 'pseudocode'
    };

    if(useGenerator) {
        var generator = new jison.Generator(grammarFileContent, generatorOptions);
        pseudocodeFileContents = generator.generate();
    } else {
        var parser = new jison.Parser(grammarFileContent, generatorOptions);
        pseudocodeFileContents = parser.generate(generatorOptions);
    }

    return regex.test(pseudocodeFileContents);
}

function test () {
    var failures = [];

    var moduleTypes = ['commonjs', 'js', 'amd'];
    var gen = [true, false];
    var regex = /function custom/;
    var grammarFile = './pseudocode.jison';

    gen.forEach(function (useGenerator) {
        moduleTypes.forEach(function (modType) {
            var result = generatePseudocodeParser(
                modType, grammarFile, useGenerator, regex);
            if(!result) {
                failures.push(
                    'module type ' +
                    modType +
                    ' fails using ' +
                    (useGenerator ? 'generator' : 'parser')
                );
            }
        });
    });
    failures = failures.reduce(function (prev, curr) {
        return prev + curr + '\n';
    }, '');

    console.log(failures);
}

test();
zaach commented 11 years ago

Thanks @matthewkastor, I've added a test for each type. :+1:

matthewkastor commented 11 years ago

Happy to help. :D