brython-dev / brython

Brython (Browser Python) is an implementation of Python 3 running in the browser
BSD 3-Clause "New" or "Revised" License
6.31k stars 513 forks source link

[OPTIMISATION] Improving argument parsing #2283

Open denis-migdal opened 9 months ago

denis-migdal commented 9 months ago

Current tasklist: https://github.com/brython-dev/brython/issues/2283#issuecomment-1779264084

See [2275] for other optimizations about function calls.

========================================================

I started working on the optimisation of $B.args0().

I will edit this first message in the future to give a link to a github dedicated to this project + to give a summary of the results and the current progression of the project.

I will use the messages below to discuss about this issue.

Some ideas here.

denis-migdal commented 9 months ago

@PierreQuentel I started to rewrite $B.args0. Currently, it only supports functions call with positional and *t parameters.

Note: We can't give positional and *t arguments on a **args parameter (at least with the implementation of Python I tried).

It seems currently x2 to x5 faster than current implementation. Tomorrow I will post a github with the code I use to test it.

Do you see any issues with this parser (cf below) ? Is there something I did not took into account, or do you see a set of parameters that would produce bad results ?

function SimpleParser(fct, args) {

    const result = {};

    const kargs  = args[args.length-2];
    const kwargs = args[args.length-1];

    const args_names    = fct.$infos.arg_names;
    const args_defaults = fct.$infos.__defaults__;
    const default_offset = args_names.length - args_defaults.length;
    const varargs_name  = fct.$infos.vararg;
    const kwargs_name   = fct.$infos.kwarg;

    const max = Math.max( args.length-2, args_names.length );

    // positional parameters...
    let offset = 0;
    for( ; offset < max ; ++offset)
        result[ args_names[offset] ] = args[offset];

    // vararg parameter
    if( varargs_name !== null )
        result[varargs_name] = args.slice( args_names.length, -2 );

    // positionnal only
    if( kargs === null && kwargs === null ) {

        if( default_offset < offset )
            throw new Error('XXX');

        // default parameters
        for(let i = offset - default_offset;
                i < args_defaults.length;
                ++i)
            result[ args_names[offset++] ] = args_defaults[i];

        if( kwargs_name !== null )
            result[kwargs_name] = __BRYTHON__.obj_dict({});

        return result;
    }

    //TODO: named / **args arguments
    //TODO: *arg  / **args parameters

    return result;
}
denis-migdal commented 8 months ago

@PierreQuentel Normally I should have a complete version now (?). I didn't fully tested it. I do not show the proper error messages (but that won't affect performances).

Do you see an issue with this implementation ?

On the very limited test I made, I am generally x1.7 faster than current implementation. In fact it is even faster as my measures include the time taken by the loop (so in fact I am normally ~x1.85 faster). Note: It only shows when testing >10000000 arguments parsing, else results are too unstables. Indeed, for small durations, browsers time measurement aren't very precise.

Some variables can still be precomputed to speed things up, and maybe some little tweaks could increase performances a little more :

There are some changes for function calls. The last arguments is always named arguments, therefore we don't need the strange {$kw:[]} structure, only an array or null if no named parameters. This could give us a very small speed increase too.

Here the web page I used to test it : test.html.zip This is quite dirty but it does the trick.

And here my argument parser :

function SimpleParser(fct, args) {

    const result = {};

    //TODO: rename :
    // - args   = when calling   the function.
    // - params = when declaring the function.

    const args_names    = fct.$infos.arg_names;
    const nb_pos_params = args_names.length;
    const varargs_name  = fct.$infos.vararg;
    const kwargs_name   = fct.$infos.kwarg;
    const nb_pos_args   = args.length-1;

    const min = Math.min( nb_pos_args, nb_pos_params );

    const kargss  = args[nb_pos_args];

    // positional parameters...
    let offset = 0;
    for( ; offset < min ; ++offset)
        result[ args_names[offset] ] = args[offset];

    // vararg parameter
    //TODO: NOT SURE FOR kargss !!!!
    if( varargs_name !== null )
        result[varargs_name] = args.slice( nb_pos_params, -1 );
    else if( nb_pos_args > nb_pos_params ) {
        throw new Error('Too much pos parameters');
    }

    // positionnal only
    if( kargss === null ) {

        const args_defaults = fct.$infos.__defaults__;
        const nb_defaults   = args_defaults.length;
        const default_offset= nb_pos_params - nb_defaults;

        if( default_offset < offset )
            throw new Error('Not enough pos parameters');

        // default parameters
        for(let i = offset - default_offset;
                i < nb_defaults;
                ++i)
            result[ args_names[offset++] ] = args_defaults[i];

        if( kwargs_name !== null )
            result[kwargs_name] = __BRYTHON__.obj_dict({});

        return result;
    }

    const kargs_names   = fct.$infos.karg_names ?? args_names.slice(); // TODO: missing !!!

    const keys  = kargs_names.slice(offset);
    // other structs possibles ???

    const extra = {};

    for(let id = 0; id < kargss.length; ++id ) {

        const
[test.html.zip](https://github.com/brython-dev/brython/files/13060472/test.html.zip)
 kargs = kargss[id];
        for(let argname in kargs) {

            let i = keys.indexOf(argname);

            if( i === -1) {
                if( kwargs_name === null )
                    throw new Error('Unfound named parameters or duplicate');

                // not quite optimized for *kwargs parameters...
                // no need for keys indexOf if *kwargs parameters.
                if(argname in result || argname in extra)
                    throw new Error('Defined many times !');

                extra[argname] = kargs[argname];
                continue;
            }

            result[ argname ] = kargs[argname];
            //delete keys[i];
            keys[i] = null;
        }
    }

    if( kwargs_name !== null )
        result[kwargs_name] = __BRYTHON__.obj_dict(extra);

    const kargs_defaults= fct.$infos.__kwdefaults__;

    // checks default values...
    for(let ioffset = 0; ioffset < keys.length; ++ioffset) {

        const key = keys[ioffset];
        if( key !== null ) {
            if( ! (key in kargs_defaults ))
                throw new Error('missing values !!');

            result[key] = kargs_defaults[key];
        }

    }

    return result;
}
denis-migdal commented 8 months ago

Hmmm... it seems I can be even faster for named argument processing :

This could be tested. But I think, first, we should determine whether the proposed implementation is correct.

denis-migdal commented 8 months ago

Still doing some tests (but I'll try to stop now xD).

Sources: parse_args.zip

See also : https://jfmengels.net/optimizing-javascript-is-hard/

PierreQuentel commented 8 months ago

Denis, I'm sorry but I need a Pull Request with a complete replacement for argument parsing, that is, a new version of the functions in py_utils.js that passes all the tests in the built-in test suite (/tests/index.html).

Otherwise it's impossible to know if your version correctly covers all the cases, and comparing its speed to that of the current implementation won't be useful.

denis-migdal commented 8 months ago

Denis, I'm sorry but I need a Pull Request with a complete replacement for argument parsing, that is, a new version of the functions in py_utils.js that passes all the tests in the built-in test suite (/tests/index.html).

Something isn't right.

I downloaded the zip from github, remplaced $B.arg0 in py_utils.js, called python make_dist.py, tested /test/index.html ... and I passed all test the first time... This isn't normal.

Are the tests relying more on $B.parse_args ? Do you have tests for all possible combination of parameters and arguments ? Is there another script I have to call to build the Brython files ?

EDIT: I throw an exception in my new function and still pass all the tests. It seems clear that my function isn't called. EDIT: did the same in parse_args.

denis-migdal commented 8 months ago

Also :

$ python3.12 make_release.py 
/tmp/brython-master/scripts/make_release.py:54: SyntaxWarning: invalid escape sequence '\d'
  content = re.sub("npm/brython@\d\.\d+\.\d+", "npm/brython@" + vname,
/tmp/brython-master/scripts/make_release.py:56: SyntaxWarning: invalid escape sequence '\d'
  content = re.sub("npm/brython@\d\.\d+\s", "npm/brython@" + vname2,
/tmp/brython-master/scripts/make_release.py:58: SyntaxWarning: invalid escape sequence '\d'
  content = re.sub("npm/brython@\d\.\d+\.x", "npm/brython@" + vname2 + '.x',
/tmp/brython-master/scripts/make_release.py:60: SyntaxWarning: invalid escape sequence '\d'
  content = re.sub("npm/brython@\d\s", "npm/brython@" + vname1,
/tmp/brython-master/scripts/make_release.py:62: SyntaxWarning: invalid escape sequence '\d'
  content = re.sub("npm/brython@\d\.x\.y", "npm/brython@" + vname1 + '.x.y',
/tmp/brython-master/scripts/make_release.py:64: SyntaxWarning: invalid escape sequence '\.'
  content = re.sub("3\.\d+\.x", f'3.{version.version[1]}.x', content)
Brython [3, 12]
CPython (3, 12)
Traceback (most recent call last):
  File "/tmp/brython-master/scripts/make_release.py", line 17, in <module>
    import make_ast_classes       # generates /src/py_ast_classes.js
    ^^^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/brython-master/scripts/make_ast_classes.py", line 12, in <module>
    f = open('Python.asdl', encoding='utf-8')
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
FileNotFoundError: [Errno 2] No such file or directory: 'Python.asdl'
PierreQuentel commented 8 months ago

I downloaded the zip from github, remplaced $B.arg0 in py_utils.js, called python make_dist.py, tested /test/index.html ... and I passed all test the first time... This isn't normal.

I don't know what is wrong here.

You don't have to run make_dist.py, tests/index.html uses the individual Brython scripts (brython_builtins.js, py_utils.js, etc...).

Can you call _/src/pyutils.js in the address bar to see if it is the version you have modified ?

The error message in make_release.py does not surprise me. It is not a script meant to be used by Brython users, only the release manager :-). It uses files downloaded from various places by the script scripts/downloads.py.

denis-migdal commented 8 months ago

It seems the files are cached. I have to go to src/py_utils.js, then refresh the page, in order to update it after modifying it.

Now I get some errors :) Gonna see if I can fix them quick ;).

denis-migdal commented 8 months ago

It seems to work quite well (for now I got an error at line 529 of the basic test suite).

Got few small errors to fix :

My code may be a little slower as I don't have access to some pre-computed data in the format I'd need, so I have to compute them when parsing arguments.

For now, I also use the original $B.args0() to build the excepted exception.

So we'll have to discuss a little once I'd pass all tests ;).

I also have one question :

denis-migdal commented 8 months ago

Another question (sorry I am not quite used to Python) :

denis-migdal commented 8 months ago

Yeah I'm stuck at 1043 of the basic test suite because of this /. I don't understand this behavior :

>>> class X:
...     def foo(self, **args):
...             print(args)
... 
>>> x = X()
>>> x.foo(self=x)
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
TypeError: X.foo() got multiple values for argument 'self'
>>> class X:
...     def foo(self, /, **args):
...             print(args)
... 
>>> x = X()
>>> x.foo(self=x)
{'self': <__main__.X object at 0x7feb070463e0>} 

EDIT: I think I understand it now. I found no Python documentation on it. But it seems everything in the left-side of "/" can only be set using position argument, and it won't raise an error when using a key in a **args with the same name as a parameter name in the left-side of "/".

PierreQuentel commented 8 months ago

Your guess is correct. The reference here is the Language Reference, specifically the section about function definition.

denis-migdal commented 8 months ago

Thanks.

I'm struggling to fix little details like this xD. Passed the first test suite though ^^.

denis-migdal commented 8 months ago
completed all tests in 44.59 s.
failed : []

^^

Normal time with the current implementation : ~45 s.

However, comparison isn't fair :

denis-migdal commented 8 months ago

Currently, before any code cleaning, I am (on the very limited tests I made) :

But as I said before, tests are biased against my method as I test them first.

denis-migdal commented 8 months ago

Added a little shortcut, now x1.3 to x1.7 faster for positional arguments (ofc on the very limited speed tests I made).

Could be faster if I pre-computed stuff upon function creation.

EDIT: Also could remove some tests if functions had several args0() functions depending if it has *t, **args, etc. parameters.

denis-migdal commented 8 months ago

@PierreQuentel What should be the next step knowing that :

PierreQuentel commented 8 months ago

Great news Denis ! Can you submit a PR so that I can test what you have done so far ?

denis-migdal commented 8 months ago

Ok.

Tomorrow I'll rename some variables and add some comments, before submitting a PR.

denis-migdal commented 8 months ago

PR made. See #2287

denis-migdal commented 8 months ago

To speed up even more performances :

To have cleaner code :

PierreQuentel commented 8 months ago

Having a different version of $B.args0 if the function has default values or not is unfortunately not possible, because the default values can be set at run time, like in

def f(x):
  pass

try:
  f() # here, f() has no defaults
  raise Exception('should have raised TypeError')
except TypeError:
  pass

f.__defaults__ = (0,)
# f() now has a default value for x, so f() no more raises TypeError
f()
denis-migdal commented 8 months ago

Having a different version of $B.args0 if the function has default values or not is unfortunately not possible, because the default values can be set at run time

WTF Python xD.

Well, it could be possible if __defaults__ was implemented using a setter, that'd also modify the args0 used by the function ? Like fct.$args0(...). But I guess that'd become a little tricky ?

PierreQuentel commented 8 months ago

__defaults__ is already implemented as a setter in py_builtin_functions.js / $B.function.__setattr__ . It sets the attribute $defaults of the function object. The generated code could start with a test on f.$defaults and choose a different version of $B.args0 depending on its value ?

denis-migdal commented 8 months ago

The generated code could start with a test on f.$defaults and choose a different version of $B.args0 depending on its value ?

If we do such check before each call of $B.args0(), that'd kill performances.

I think it'd be better to initially set a fct.args0 = $B.args0_XXXX() during build time, and to update the value of fct.args0 inside the __defaults__ setter whenever its value is modified.

PierreQuentel commented 8 months ago

I think it'd be better to initially set a fct.args0 = $B.args0_XXXX() during build time, and to update the value of fct.args0 inside the defaults setter whenever its value is modified.

Agreed. In fact, a specific, optimized argument parsing function could be created for each function and updated if __defaults__ or __kwdefaults__ is set.

denis-migdal commented 8 months ago

Agreed. In fact, a specific, optimized argument parsing function could be created for each function and updated if __defaults__ or __kwdefaults__ is set.

That would be technically possible. But I am not sure if it is something we want :

Maybe we can build some functions with a cache :

  1. having some prebuilt function
  2. identify the function structure among ~500 possibilities.
  3. search if this function already exists
  4. if not create it and add it to the list.

But that is adding some complexity in the code. I think it'd be better, in a first time, to have some special functions for some special cases (e.g. no arguments), and then having a set of more generic functions for other cases.

We could build the generic functions depending on some parameters, little by little. But I'd be afraid to do a fully customized function directly.

denis-migdal commented 8 months ago

I tried to create a function for that. Couldn't test it in the train though, so if you want to try it :

const DEFAULTS = {
    NONE: 1,
    SOME: 2,
    ALL: 3
}

// deno run generator.js
// hasPos / posDefaults are pos parameters excluding posOnly parameters.
function generate_args0(hasPosOnly, posOnlyDefaults, hasPos, posDefaults, hasVargars, hasNamedOnly, namedOnlyDefaults, hasKWargs) {

    let fct =
`function args0_NEW(fct, args) {

    const HAS_KW        = args[args.length-1]?.$kw !== undefined;
    let ARGS_POS_COUNT        = args.length;
    let ARGS_NAMED            = null;

    if( HAS_KW ) {
        --ARGS_POS_COUNT;
        ARGS_NAMED = args[ARGS_POS_COUNT].$kw
    }

    const result = {};

    // using const should enable the browser to perform some optimisation.
    const $INFOS = fct.$infos;
    const $CODE  = $INFOS.__code__;

    const PARAMS_NAMES        = $INFOS.arg_names;
    const PARAMS_POS_COUNT    = $CODE.co_argcount;
    const PARAMS_NAMED_COUNT  = $CODE.co_kwonlyargcount;

    const PARAMS_VARARGS_NAME = $INFOS.vararg;
    const PARAMS_KWARGS_NAME  = $INFOS.kwarg;

    const PARAMS_POS_DEFAULTS = $INFOS.__defaults__;
    const PARAMS_POS_DEFAULTS_COUNT = PARAMS_POS_DEFAULTS.length;

    const PARAMS_POS_DEFAULTS_OFFSET= PARAMS_POS_COUNT - PARAMS_POS_DEFAULTS_COUNT;

    let offset = 0;
`;

    if( hasPosOnly || hasPos ) {
        fct +=
`
    const min = Math.min( ARGS_POS_COUNT, PARAMS_POS_COUNT );
    for( ; offset < min ; ++offset)
        result[ PARAMS_NAMES[offset] ] = args[offset];
`
    }

    if( hasVargars ) {
        fct +=
`
    result[PARAMS_VARARGS_NAME] = $B.fast_tuple( Array.prototype.slice.call(args, PARAMS_POS_COUNT, ARGS_POS_COUNT ) ); //TODO: opti, better way to construct tuple from subarray ?
`
    } else {
        fct +=
`
    if( ARGS_POS_COUNT > PARAMS_POS_COUNT ) {
        args0(fct, args);
        throw new Error('Too much positional arguments given (args0 should have raised an error) !');
    }
`
    }

    fct += `
    if( ARGS_NAMED === null ) {
    `

    if( hasPos || hasPosOnly ) {

        if( posOnlyDefaults !== DEFAULTS.ALL && posDefaults !== DEFAULTS.ALL ) {

            fct += `
        if( offset < PARAMS_POS_DEFAULTS_OFFSET ) {
            args0(fct, args);
            throw new Error('Not enough positional arguments given (args0 should have raised an error) !');
        }
`
        }

        if( posOnlyDefaults !== DEFAULTS.NONE && posDefaults !== DEFAULTS.NONE) {
            fct += `
        for(let i = offset - PARAMS_POS_DEFAULTS_OFFSET;
            i < PARAMS_POS_DEFAULTS_COUNT;
            ++i)
            result[ PARAMS_NAMES[offset++] ] = PARAMS_POS_DEFAULTS[i];`
        }

        if( hasKWargs ) {
            fct += `
        result[PARAMS_KWARGS_NAME] = __BRYTHON__.obj_dict({});`
        }

        if( hasNamedOnly && namedOnlyDefaults !== DEFAULTS.ALL) {
            fct += `
        args0(fct, args);
        throw new Error('Named argument expected (args0 should have raised an error) !');
`
        } else {
            fct += `
        let kwargs_defaults = $INFOS.__kwdefaults__.$jsobj;
        if( kwargs_defaults === undefined || kwargs_defaults === null ) {
            kwargs_defaults = $INFOS.__kwdefaults__.$strings;
        }
        for(let key in kwargs_defaults)    
            result[ key ] = kwargs_defaults[key];
`
        }
    }

    fct += `
        return result;
    }
`
    if( ! hasPos && ! hasNamedOnly && ! hasKWargs ) {
        fct += `
    args0(fct, args);
    throw new Error('No named arguments expected !!!');
}
`;
        return fct;
    }

    if( namedOnlyDefaults !== DEFAULTS.NONE) {
        fct += `
    let kwargs_defaults = $INFOS.__kwdefaults__.$jsobj;
    if( kwargs_defaults === undefined  || kwargs_defaults == null ) {

        kwargs_defaults = $INFOS.__kwdefaults__.$strings;
        if( kwargs_defaults === undefined  || kwargs_defaults == null )
            kwargs_defaults = {}
    }
`
    }

    if( hasPosOnly ) {
        fct += `
    const PARAMS_POSONLY_COUNT         = $CODE.co_posonlyargcount;
    const PARAMS_POS_DEFAULTS_MAXID    =  PARAMS_POS_DEFAULTS_COUNT + PARAMS_POS_DEFAULTS_OFFSET;

    if( offset < PARAMS_POSONLY_COUNT ) {

        `;
        if( posOnlyDefaults !== DEFAULTS.SOME) {
            fct += `
        if( offset < PARAMS_POS_DEFAULTS_OFFSET ) {
            args0(fct, args);
            throw new Error('Not enough positional parameters given (args0 should have raised an error) !');
        }
`
        }
        if( posOnlyDefaults === DEFAULTS.NONE) {
            fct += `
        args0(fct, args);
        throw new Error('Not enough positional parameters given (args0 should have raised an error) !');
`;
        }

        fct += `
        const max = PARAMS_POS_DEFAULTS_COUNT - (PARAMS_POS_COUNT - PARAMS_POSONLY_COUNT);

        // default parameters
        for(let i = offset - PARAMS_POS_DEFAULTS_OFFSET;
                i < max;
                ++i)
            result[ PARAMS_NAMES[offset++] ] = PARAMS_POS_DEFAULTS[i];
    }
`
    }

    if( hasKWargs) {

        fct += `
    const extra = {};

    let nb_extra_args = 0;
`
    }

    fct += `

    let nb_named_args = 0;       

    for(let id = 0; id < ARGS_NAMED.length; ++id ) {

        const _kargs = ARGS_NAMED[id]
        let kargs  = _kargs.$jsobj;
        if( kargs === undefined || kargs === null) {
            kargs = _kargs.$strings
            if(kargs === undefined || kargs === null) 
                kargs= _kargs; // I don't think I can do better.
        }

        for(let argname in kargs) {
            `;

            if( ! hasKWargs ) {
                fct += `
            result[ argname ] = kargs[argname];
            ++nb_named_args;
`;
            }

            if( hasKWargs ) {
                if( ! hasNamedOnly && ! hasPos ) {

                    fct += `
            extra[ argname ] = kargs[argname];
            ++nb_extra_args;
`
                } else {
                    fct += `
            if( PARAMS_NAMES.indexOf(argname, PARAMS_POSONLY_COUNT) !== -1 ) {
                result[ argname ] = kargs[argname];
                ++nb_named_args;
            } else {
                extra[ argname ] = kargs[argname];
                ++nb_extra_args;
            }
`
                }
            }

            fct += `
        }
    }
`

    fct += `
    let found = 0;
    let ioffset = offset;
`;

    if( (hasPosOnly && posOnlyDefaults !== DEFAULTS.ALL) || (hasPos && posDefaults !== DEFAULTS.ALL) ) {
        fct += `
    for( ; ioffset < PARAMS_POS_DEFAULTS_OFFSET; ++ioffset) {

        const key = PARAMS_NAMES[ioffset];
        if( key in result ) // maybe could be speed up using "!(key in result)"
            continue;

        args0(fct, args);
        throw new Error('Missing a named arguments (args0 should have raised an error) !');
    }
`
    }
    if( (hasPosOnly && posOnlyDefaults !== DEFAULTS.NONE) || (hasPos && posDefaults !== DEFAULTS.NONE) ) {
        fct += `
    for( ; ioffset < PARAMS_POS_DEFAULTS_MAXID; ++ioffset) {

        const key = PARAMS_NAMES[ioffset];
        if( key in result )
            continue;

        result[key] = PARAMS_POS_DEFAULTS[ioffset - PARAMS_POS_DEFAULTS_OFFSET];
    ++found;
    }
`
    }

    if( hasNamedOnly ) {

        fct += `
        for( ; ioffset < PARAMS_NAMES.length; ++ioffset) {

            const key = PARAMS_NAMES[ioffset];
            if( key in result )
                continue;
`
        if( namedOnlyDefaults === DEFAULTS.SOME) {
            fct += `
            if( ! (key in kwargs_defaults) ) {
                args0(fct, args);
                throw new Error('Missing a named arguments (args0 should have raised an error) !');
            }
`
        }
        if( namedOnlyDefaults === DEFAULTS.NONE ) {
            fct += `
            args0(fct, args);
            throw new Error('Missing a named arguments (args0 should have raised an error) !');
`
        }

        if( namedOnlyDefaults !== DEFAULTS.NONE) {
            fct += `

            result[key] = kwargs_defaults[key];
            ++found;
`;
        }

        fct += `
        }
`;
    }

    fct += `
        if( found + nb_named_args !== PARAMS_NAMES.length - offset) {
            args0(fct, args);
            throw new Error('Inexistant or duplicate named arguments (args0 should have raised an error) !');
        }
`;

    if( hasKWargs ) {
        fct += `
    if( Object.keys(extra).length !== nb_extra_args ) {
        args0(fct, args);
        throw new Error('Duplicate name given to **kargs parameter (args0 should have raised an error) !');
    }
    result[PARAMS_KWARGS_NAME] = __BRYTHON__.obj_dict(extra);
`
    }

    fct += `
    return result
    `;

    fct += `}`;

    return fct;
}

//console.log( generate_args0(true, DEFAULTS.SOME, true, DEFAULTS.ALL, true, true, DEFAULTS.NONE, true) );

console.log( generate_args0(false, DEFAULTS.NONE, false, DEFAULTS.NONE, false, false, DEFAULTS.NONE, false) );
denis-migdal commented 8 months ago

In the previous code, I didn't removed some variables that might not be used in some cases.

Be also reminded that argument parsing isn't the only thing that is costly during a function call :

denis-migdal commented 8 months ago

I tried to test my argument parser builder.

It seems quicker (in some cases, -20% in argument parsing only ???). Though it is difficult do evaluate as browser is making optimizations on conditions when the function is executed many time. Would need to alternate different type of call to test it in a more "real life" situation.

Some optimisation are currently lacking in my function :

@PierreQuentel How should we proceed ?

I can make a new $B.args0() that'd generate a new argument parser, then execute it, in order to pass the unit test. It'll be obviously slower as I'd do it on the fly, it is just for testing purpose.

Currently, I can't do the real test speed. I'd need at least to have the $B.args0 parser inside fct functions, replaced by fct.args0 (defaulting to $B.args0). Then I think I can add a JS function to window and call it to artificially modify a function fct.args0 manually in order to do the speed test.

PierreQuentel commented 8 months ago

For this kind of test, I usually edit ast_to_js.js to generate a different code for a function depending on its name. I choose a fancy name (eg "f7868IKJHKJHKGHJ") and insert this kind of code in $B.ast.FunctionDef.prototype.to_js() :

if(this.name == "f7868IKJHKJHKGHJ"){
    js += (specific code for tests)
}else{
    js += (standard)
}

Then I can test Brython code

def f7868IKJHKJHKGHJ(*args, **kw):
    ...

In this case the code would probably be around line 1903.

Do you want me to make the change or can you give it a try ?

denis-migdal commented 8 months ago

Thanks.

Mmm... I think I might be able to do it. I'll test it another day as I am currently working on something else.

denis-migdal commented 8 months ago

Okay, here my road map :

PierreQuentel commented 8 months ago

What do you mean by "attribute getter" ?

If it is attribute resolution (compute the result of getattr(object, attr)) it is done in py_builtin_functions.js / $B.$getattr

denis-migdal commented 8 months ago

What do you mean by "attribute getter" ?

Sorry, it was a typo.

I meant the setter for __defaults__ and __kw_defaults__ or a stuff like that, that might change the parsing function to use when modified.

PierreQuentel commented 8 months ago

Génial ! I could reproduce the same results, around 25% faster than the current implementation.

The next step would be to set the parsing function, after the section in FunctionDef.prototype.to_js() commented with "//Set admin info". Something like

// Set parsing function
// Compute arguments required for generate_args0, based on those defined
// in this function (has_posonlyargs, _defaults, kw_defaults, etc.)
let hasPosOnly = ...,
js += `${name2}.arg_parser = $B.generate_args0(${hasPosOnly}, ...)\n`

Setting function defaults is done by $B.make_function_defaults() in py_builtin_functions.js. With this new version it would also reset the function attribute arg_parser.

denis-migdal commented 8 months ago

The next step would be to set the parsing function, after the section in FunctionDef.prototype.to_js() commented with "//Set admin info". Something like

It doesn't work for the function run15 : I set run15.args_parser = ..., but inside the function, run15.args_parser isn't defined.

If I set run.args_parser = ..., I can access it inside the function. However, if I write js += this.name + ".args_parser = ....", I get another error for other functions.

EDIT: but if I put it in run15.$infos.parse_args it works...

denis-migdal commented 8 months ago

I passed all unit test. Still have some garbage to remove when needed.

@PierreQuentel I have several questions :

denis-migdal commented 8 months ago

Okay, I think I'm done for the current PR.

PierreQuentel commented 8 months ago

It's dangerous to access the dictionary keys / values from $jsobj or $strings, the internal implementation of dictionary might change in the future. It's better to use dict internal methods such as dict.$get_string or dict.$iter_items_with_hash(), which abstract the implementation.

I found another issue with your new implementation. In Python, an instance of a class with methods keys() and __getitem__ can be passed under the form **kwarg

def f(**kw):
    return kw

class D:

  def keys(self):
      yield 'a'

  def __getitem__(self, key):
      return 99

d = D()

result = f(**d)
assert result == {'a': 99}, result
denis-migdal commented 8 months ago

It's dangerous to access the dictionary keys / values from $jsobj or $strings, the internal implementation of dictionary might change in the future. It's better to use dict internal methods such as dict.$get_string or dict.$iter_items_with_hash(), which abstract the implementation.

Ok, then I'll pre-compute a $kw_defaults for the defaults values, and see which abstraction method I'll use for **kwargs.

I found another issue with your new implementation. In Python, an instance of a class with methods keys() and __getitem__ can be passed under the form **kwarg

I assume dictionaries also have this keys() method ? I also have to verify that the type of the key is a string and raise an error if it isn't ? I'll have to start building the error messages myself.

It'll be slower when parsing **kwargs, but well can't have everything.

denis-migdal commented 8 months ago

Done.

Maybe not in the most optimal way to loop over dict elements for **kwargs arguments.

I may be wrong, but I don't think you have an unit test for when we give a dict with non-string key as a **kwargs.

PierreQuentel commented 8 months ago

Could you make a PR to fix this issue in the current development version ?

denis-migdal commented 8 months ago

Could you make a PR to fix this issue in the current development version ?

Wouldn't it be better to directly merge #2316 as it seems faster in all conditions (-5% to -10% in total exec time / -20% when no heavy browser opti) ?

I just need to remove "USE_PERSO_ARGS0_EVERYWHERE" condition once you validate it.

denis-migdal commented 8 months ago

Done. Question : do you still use fct.$defaults ?

denis-migdal commented 8 months ago

@PierreQuentel

denis-migdal commented 7 months ago

I think there is a way to improve argument parsing for some kw arguments:

def foo(p, /, a,b, **kargs):
     pass
foo(4, c=3, b=2,a=1)

can be written in JS as :

var a,b,c;
foo( 4, (c=3,b=2,a=1 , a), b, {$kw:[{c}]}); // preserve order of execution. TBF for some arguments we do not need to preserve order.

Indeed, for named argument going to positional, non positional-only, parameters, we can precompute its "position", therefore :

denis-migdal commented 7 months ago

Seems 1.22x faster for 2 keyword arguments put in positional (not-only) parameters. Execution speed is similar to using positional arguments.

For reference :

    <!DOCTYPE html>
    <html>
        <head>
            <!-- Required meta tags-->
            <meta charset="utf-8">
            <title>X</title>

            <!-- Brython -->
            <script src="https://raw.githack.com/brython-dev/brython/master/www/src/brython.js"></script>
            <!--<script src="https://raw.githack.com/brython-dev/brython/master/www/src/brython_stdlib.js"></script>-->

        </head>
        <body>
        </body>
            <script>
N = 10000000;
            </script>
            <script type="module">
__BRYTHON__.imported["__main__"] = {};
__BRYTHON__.frames_stack = [];
// Javascript code generated from ast
var $B = __BRYTHON__,
    _b_ = $B.builtins,
    locals___main__ = $B.imported["__main__"],
    locals = locals___main__,
    frame = ["__main__", locals, "__main__", locals]
frame.__file__ = '<string>'
locals.__name__ = '__main__'
locals.__doc__ = _b_.None
locals.__annotations__ = locals.__annotations__ || $B.empty_dict()
frame.$f_trace = $B.enter_frame(frame)
$B.set_lineno(frame, 1)

var _frames = $B.frames_stack.slice()
var stack_length = $B.frames_stack.length
try{
  $B.set_lineno(frame, 1)
  var module = $B.$import_from("browser", ["window"], {}, 0, locals);
  $B.set_lineno(frame, 3)
  function foo1428(){
    var locals___main___foo,
        locals
    locals___main___foo = locals = $B.args0(foo1428, arguments)
    var frame = ["foo", locals, "__main__", locals___main__, foo1428]
    if(locals.$has_generators){
      frame.$has_generators = true
    }
    frame.__file__ = '<string>'
    frame.$lineno = 3
    frame.$f_trace = $B.enter_frame(frame)
    try{
      $B.js_this = this
      $B.set_lineno(frame, 4)
      var result = $B.rich_op('__add__', locals___main___foo.i, locals___main___foo.j, [11, 12, 13, 14])
      if(frame.$f_trace !== _b_.None){
        $B.trace_return(result)
      }
      $B.leave_frame()
      return result
    }catch(err){
      $B.set_exc(err, frame)
      if((! err.$in_trace_func) && frame.$f_trace !== _b_.None){
        frame.$f_trace = $B.trace_exception()
      }
      $B.leave_frame();throw err
    }
  }
  foo1428.$is_func = true
  foo1428.$infos = {
    __module__: "__main__",
    __name__: "foo",
    __qualname__: "foo",
    __defaults__: $B.fast_tuple([]),
    __globals__: _b_.globals(),
    __kwdefaults__: _b_.None,
    __doc__: _b_.None,
    __code__:{
      co_argcount: 2,
      co_filename: '<string>',
      co_firstlineno: 3,
      co_flags: 3,
      co_freevars: $B.fast_tuple([]),
      co_kwonlyargcount: 0,
      co_name: 'foo',
      co_nlocals: 2,
      co_posonlyargcount: 0,
      co_qualname: 'foo',
      co_varnames: $B.fast_tuple(['i','j'])
    },
    arg_names: ['i','j'],
    vararg: null,
    kwarg: null
  }
  $B.make_function_defaults(foo1428)
  locals___main__.foo = foo1428
  locals___main__.foo.__annotations__ = $B.empty_dict()

  $B.set_lineno(frame, 6)
  var v1429 = 0
  locals___main__.acc = v1429
  $B.set_lineno(frame, 8)
  var v1430 = $B.$call($B.$getattr_pep657($B.$getattr_pep657(locals___main__.window, 'Date', [6, 6, 17]), 'now', [13, 13, 21]), [18, 18, 23])()
  locals___main__.beg = v1430
  frame.$lineno = 9
  var no_break_1431 = true,
      iterator_1431 = $B.$call(_b_.range, [9, 9, 21])(N)
  for(var next_1431 of $B.make_js_iterator(iterator_1431, frame, 9)){
    var v1432 = next_1431
    locals___main__.i = v1432
    $B.set_lineno(frame, 10)
    var vyyy;
    locals___main__.acc = $B.augm_assign(locals___main__.acc, '+=', $B.$call(locals___main__.foo, [10, 10, 25])(
            (vyyy = $B.rich_op('__add__', locals___main__.i, 1, [16, 17, 18, 19]), locals___main__.i), vyyy ) )
  }
  $B.set_lineno(frame, 11)
  var v1433 = $B.$call($B.$getattr_pep657($B.$getattr_pep657(locals___main__.window, 'Date', [6, 6, 17]), 'now', [13, 13, 21]), [18, 18, 23])()
  locals___main__.end = v1433
  $B.set_lineno(frame, 13);
  $B.$call(_b_.print, [0, 0, 14])($B.rich_op('__sub__', locals___main__.end, locals___main__.beg, [6, 9, 10, 13]))
  $B.leave_frame({locals, value: _b_.None})
}catch(err){
  $B.set_exc(err, frame)
  if((! err.$in_trace_func) && frame.$f_trace !== _b_.None){
    frame.$f_trace = $B.trace_exception()
  }
  $B.leave_frame({locals, value: _b_.None})
  throw err
}

            </script>
            <script type="module">
__BRYTHON__.imported["__main__"] = {};
__BRYTHON__.frames_stack = [];
// Javascript code generated from ast
var $B = __BRYTHON__,
    _b_ = $B.builtins,
    locals___main__ = $B.imported["__main__"],
    locals = locals___main__,
    frame = ["__main__", locals, "__main__", locals]
frame.__file__ = '<string>'
locals.__name__ = '__main__'
locals.__doc__ = _b_.None
locals.__annotations__ = locals.__annotations__ || $B.empty_dict()
frame.$f_trace = $B.enter_frame(frame)
$B.set_lineno(frame, 1)

var _frames = $B.frames_stack.slice()
var stack_length = $B.frames_stack.length
try{
  $B.set_lineno(frame, 1)
  var module = $B.$import_from("browser", ["window"], {}, 0, locals);
  $B.set_lineno(frame, 3)
  function foo1428(){
    var locals___main___foo,
        locals
    locals___main___foo = locals = $B.args0(foo1428, arguments)
    var frame = ["foo", locals, "__main__", locals___main__, foo1428]
    if(locals.$has_generators){
      frame.$has_generators = true
    }
    frame.__file__ = '<string>'
    frame.$lineno = 3
    frame.$f_trace = $B.enter_frame(frame)
    try{
      $B.js_this = this
      $B.set_lineno(frame, 4)
      var result = $B.rich_op('__add__', locals___main___foo.i, locals___main___foo.j, [11, 12, 13, 14])
      if(frame.$f_trace !== _b_.None){
        $B.trace_return(result)
      }
      $B.leave_frame()
      return result
    }catch(err){
      $B.set_exc(err, frame)
      if((! err.$in_trace_func) && frame.$f_trace !== _b_.None){
        frame.$f_trace = $B.trace_exception()
      }
      $B.leave_frame();throw err
    }
  }
  foo1428.$is_func = true
  foo1428.$infos = {
    __module__: "__main__",
    __name__: "foo",
    __qualname__: "foo",
    __defaults__: $B.fast_tuple([]),
    __globals__: _b_.globals(),
    __kwdefaults__: _b_.None,
    __doc__: _b_.None,
    __code__:{
      co_argcount: 2,
      co_filename: '<string>',
      co_firstlineno: 3,
      co_flags: 3,
      co_freevars: $B.fast_tuple([]),
      co_kwonlyargcount: 0,
      co_name: 'foo',
      co_nlocals: 2,
      co_posonlyargcount: 0,
      co_qualname: 'foo',
      co_varnames: $B.fast_tuple(['i','j'])
    },
    arg_names: ['i','j'],
    vararg: null,
    kwarg: null
  }
  $B.make_function_defaults(foo1428)
  locals___main__.foo = foo1428
  locals___main__.foo.__annotations__ = $B.empty_dict()

  $B.set_lineno(frame, 6)
  var v1429 = 0
  locals___main__.acc = v1429
  $B.set_lineno(frame, 8)
  var v1430 = $B.$call($B.$getattr_pep657($B.$getattr_pep657(locals___main__.window, 'Date', [6, 6, 17]), 'now', [13, 13, 21]), [18, 18, 23])()
  locals___main__.beg = v1430
  frame.$lineno = 9
  var no_break_1431 = true,
      iterator_1431 = $B.$call(_b_.range, [9, 9, 21])(N)
  for(var next_1431 of $B.make_js_iterator(iterator_1431, frame, 9)){
    var v1432 = next_1431
    locals___main__.i = v1432
    $B.set_lineno(frame, 10)
    locals___main__.acc = $B.augm_assign(locals___main__.acc, '+=', $B.$call(locals___main__.foo, [10, 10, 25])({$kw:[{j: $B.rich_op('__add__', locals___main__.i, 1, [16, 17, 18, 19]), i: locals___main__.i}]}))
  }
  $B.set_lineno(frame, 11)
  var v1433 = $B.$call($B.$getattr_pep657($B.$getattr_pep657(locals___main__.window, 'Date', [6, 6, 17]), 'now', [13, 13, 21]), [18, 18, 23])()
  locals___main__.end = v1433
  $B.set_lineno(frame, 13);
  $B.$call(_b_.print, [0, 0, 14])($B.rich_op('__sub__', locals___main__.end, locals___main__.beg, [6, 9, 10, 13]))
  $B.leave_frame({locals, value: _b_.None})
}catch(err){
  $B.set_exc(err, frame)
  if((! err.$in_trace_func) && frame.$f_trace !== _b_.None){
    frame.$f_trace = $B.trace_exception()
  }
  $B.leave_frame({locals, value: _b_.None})
  throw err
}

            </script>
                        <script type="module">
__BRYTHON__.imported["__main__"] = {};
__BRYTHON__.frames_stack = [];
// Javascript code generated from ast
var $B = __BRYTHON__,
    _b_ = $B.builtins,
    locals___main__ = $B.imported["__main__"],
    locals = locals___main__,
    frame = ["__main__", locals, "__main__", locals]
frame.__file__ = '<string>'
locals.__name__ = '__main__'
locals.__doc__ = _b_.None
locals.__annotations__ = locals.__annotations__ || $B.empty_dict()
frame.$f_trace = $B.enter_frame(frame)
$B.set_lineno(frame, 1)

var _frames = $B.frames_stack.slice()
var stack_length = $B.frames_stack.length
try{
  $B.set_lineno(frame, 1)
  var module = $B.$import_from("browser", ["window"], {}, 0, locals);
  $B.set_lineno(frame, 3)
  function foo1428(){
    var locals___main___foo,
        locals
    locals___main___foo = locals = $B.args0(foo1428, arguments)
    var frame = ["foo", locals, "__main__", locals___main__, foo1428]
    if(locals.$has_generators){
      frame.$has_generators = true
    }
    frame.__file__ = '<string>'
    frame.$lineno = 3
    frame.$f_trace = $B.enter_frame(frame)
    try{
      $B.js_this = this
      $B.set_lineno(frame, 4)
      var result = $B.rich_op('__add__', locals___main___foo.i, locals___main___foo.j, [11, 12, 13, 14])
      if(frame.$f_trace !== _b_.None){
        $B.trace_return(result)
      }
      $B.leave_frame()
      return result
    }catch(err){
      $B.set_exc(err, frame)
      if((! err.$in_trace_func) && frame.$f_trace !== _b_.None){
        frame.$f_trace = $B.trace_exception()
      }
      $B.leave_frame();throw err
    }
  }
  foo1428.$is_func = true
  foo1428.$infos = {
    __module__: "__main__",
    __name__: "foo",
    __qualname__: "foo",
    __defaults__: $B.fast_tuple([]),
    __globals__: _b_.globals(),
    __kwdefaults__: _b_.None,
    __doc__: _b_.None,
    __code__:{
      co_argcount: 2,
      co_filename: '<string>',
      co_firstlineno: 3,
      co_flags: 3,
      co_freevars: $B.fast_tuple([]),
      co_kwonlyargcount: 0,
      co_name: 'foo',
      co_nlocals: 2,
      co_posonlyargcount: 0,
      co_qualname: 'foo',
      co_varnames: $B.fast_tuple(['i','j'])
    },
    arg_names: ['i','j'],
    vararg: null,
    kwarg: null
  }
  $B.make_function_defaults(foo1428)
  locals___main__.foo = foo1428
  locals___main__.foo.__annotations__ = $B.empty_dict()

  $B.set_lineno(frame, 6)
  var v1429 = 0
  locals___main__.acc = v1429
  $B.set_lineno(frame, 8)
  var v1430 = $B.$call($B.$getattr_pep657($B.$getattr_pep657(locals___main__.window, 'Date', [6, 6, 17]), 'now', [13, 13, 21]), [18, 18, 23])()
  locals___main__.beg = v1430
  frame.$lineno = 9
  var no_break_1431 = true,
      iterator_1431 = $B.$call(_b_.range, [9, 9, 21])(N)
  for(var next_1431 of $B.make_js_iterator(iterator_1431, frame, 9)){
    var v1432 = next_1431
    locals___main__.i = v1432
    $B.set_lineno(frame, 10)
    locals___main__.acc = $B.augm_assign(locals___main__.acc, '+=', $B.$call(locals___main__.foo, [10, 10, 25])($B.rich_op('__add__', locals___main__.i, 1, [16, 17, 18, 19]), locals___main__.i))
  }
  $B.set_lineno(frame, 11)
  var v1433 = $B.$call($B.$getattr_pep657($B.$getattr_pep657(locals___main__.window, 'Date', [6, 6, 17]), 'now', [13, 13, 21]), [18, 18, 23])()
  locals___main__.end = v1433
  $B.set_lineno(frame, 13);
  $B.$call(_b_.print, [0, 0, 14])($B.rich_op('__sub__', locals___main__.end, locals___main__.beg, [6, 9, 10, 13]))
  $B.leave_frame({locals, value: _b_.None})
}catch(err){
  $B.set_exc(err, frame)
  if((! err.$in_trace_func) && frame.$f_trace !== _b_.None){
    frame.$f_trace = $B.trace_exception()
  }
  $B.leave_frame({locals, value: _b_.None})
  throw err
}
            </script>
    </html>
denis-migdal commented 7 months ago

Hum, we could even pre-separate named arguments into pos params, named params, and kwargs param. Which would remove one condition in a loop, and in some cases (i.e. when there are no kwarg arguments) also speed up a little generation of the **kargs param.