mctools / simplebuild

A very simple to use build system for projects with primarily C++/Python code, intended for usage by scientific developers without a strong SW-engineering background.
https://mctools.github.io/simplebuild/
Apache License 2.0
3 stars 1 forks source link

Add build timings #70

Open tkittel opened 8 months ago

tkittel commented 8 months ago

It would be nice to be able to easily tell where we are spending our build times, and for that it would be nice to be able to time all compilation and link commands - in order to be able to potentially come up with some nice speedups.

One option is to use --exportcmds and execute the commands again while timing them.

tkittel commented 8 months ago

For reference, here is how far I got on a script which does this based on an exported json file of commands:


_docompile_shared_data = [ None ]
def _worker( cmd_data ):
    import subprocess
    import os
    import time

    shared_data = _docompile_shared_data[0]
    assert shared_data is not None
    cmd_env = shared_data['env']
    cwd = shared_data['cwd']

    thefile = os.path.relpath(cmd_data['file'],cwd)
    thecmd = cmd_data['command']
    theworkdir = cmd_data['directory']

    print(f"Compiling {thefile}")
    t0 = time.time()
    subprocess.run( thecmd,
                    shell=True,
                    cwd=theworkdir,
                    check=True,
                    env=cmd_env )
    t1 = time.time()
    return ( thefile, float(t1-t0) )

def time_cmds( json_cmd_file, *, repeat = 1 ):
    import json
    import pathlib
    import os
    cmds = json.loads(pathlib.Path(json_cmd_file).read_text()) * repeat
    cwd = os.getcwd()

    #FIXME _simple_build_system -> .
    from _simple_build_system.envsetup import apply_envunsetup_to_dict
    cmd_env = os.environ.copy()
    apply_envunsetup_to_dict( cmd_env )
    _docompile_shared_data[0] = dict( env = cmd_env,
                                      cwd = cwd )

    #FIXME cmds = [e for e in cmds if 'mcpl2phits/main.c' in e['file']]
    from multiprocessing import Pool
    with Pool(5) as pool:
        worker_results = (pool.map(_worker, cmds))

    results = {}
    for fn, t in worker_results:
        if fn not in results:
            results[fn] = []
        results[fn].append(t)

#    results = dict( (k, sum(v)/len(v)) for k,v in results.items() )
    results = dict( (f, min(times)) for f,times in results.items() )
    total = sum( t for f,t in results.items() )
    commul = 0.0
    for t,f in sorted( ((v,k) for k,v in results.items()), reverse=True ):
        commul += t
        print(f'{t:>7.3f}s [{t*100.0/total:>7.3f} %] [{commul*100.0/total:>7.3f} %] {f}')
    print( f"Total: {total} seconds" )

if __name__ == '__main__':
    import sys
    repeat = 1 if len(sys.argv) <= 2 else int(sys.argv[2])
    time_cmds( sys.argv[1], repeat = repeat )
tkittel commented 8 months ago

Not surprisingly, a lot of the time is spent on python-c++ code...