Tiendil / smart-imports

smart imports for Python
BSD 3-Clause "New" or "Revised" License
39 stars 6 forks source link

Implement import hook to avoid parsing module two times #11

Open Tiendil opened 5 years ago

Tiendil commented 5 years ago

Module's source parsed to AST two times:

Probably it is possible to implement import hook to compile & analyze one AST tree. Since smart_imports do not modify AST, it should be totaly safe.

Tiendil commented 5 years ago

It more complex task, than I thought from begining.

Python does not give clean way to process AST before it will be compiled by module loader. Becouse of it, custom module Finder/Loader should be implemented.

But, for unknown reasons,

    tree = ast.parse(source)
    compile(tree, path, 'exec')

much slower then

    compile(source, path, 'exec')

Probably, becouse of converting AST nodes between C & Python code.

So, it is no sence in import hooks, since it will not speed up code analysis.

P.S. pyflame does not help, since compile implemented in C. P.P.S perf does not help: I don't know how use it properly or it requires debug build of Python executable.

Tiendil commented 5 years ago

Here uncompleted import hooks code:


import ast
import sys

import importlib
import importlib.util

class NewModuleLoader(importlib._bootstrap_external._LoaderBasics):
    __slots__ = ('orignial_loader', 'fullname', 'tree', 'code')

    def __init__(self, fullname, orignial_loader):
        self.tree = None
        self.code = None
        self.fullname = fullname
        self.orignial_loader = orignial_loader

    def get_source(self, fullname):
        return self.orignial_loader.get_source(fullname=fullname)

    def get_code(self, fullname):
        if self.code is None:
            return self.orignial_loader.get_code(fullname)

        return self.code

    def create_module(self, spec):
        source = self.get_source(self.fullname)

        if source:
            self.tree = ast.parse(source)
            self.code = compile(self.tree, spec.origin, 'exec')

        return type(sys)(self.fullname)

class DeprecatedModuleLoader:
    __slots__ = ('orignial_loader', 'fullname')

    def __init__(self, fullname, orignial_loader):
        self.fullname = fullname
        self.orignial_loader = orignial_loader

    def load_module(self, fullname):
        return self.orignial_loader.load_module(fullname=fullname)

    def get_source(self, fullname):
        return self.orignial_loader.get_source(fullname=fullname)

class ModuleFinder:
    __slots__ = ('include', 'exclude')

    def __init__(self, include=(), exclude=()):
        self.include = include
        self.exclude = exclude

    def processing_required(self, fullname):
        for excluded_name in self.exclude:
            if fullname.startswith(excluded_name):
                return False

        for included_name in self.include:
            if fullname.startswith(included_name):
                return True

        return False

    def get_finders_to_process(self):
        # search only finders that placed after that finder
        # other finders already processed before

        try:
            index = sys.meta_path.index(self)
            return list(sys.meta_path[index+1:])
        except ValueError:
            # finder has not registered for processing imports
            # and must not process any other finders
            return []

    def find_spec(self, fullname, path, target=None):

        if not self.processing_required(fullname):
            return None

        for finder in self.get_finders_to_process():

            if hasattr(finder, 'find_spec'):
                spec = finder.find_spec(fullname=fullname,
                                        path=path,
                                        target=target)

                if spec is None:
                    continue

                spec.loader = NewModuleLoader(fullname=fullname,
                                              orignial_loader=spec.loader)

                return spec

            if hasattr(finder, 'find_module'):
                loader = finder.find_module(fullname=fullname,
                                            path=path)

                if loader is not None:
                    return importlib.util.spec_from_loader(name=fullname,
                                                           loader=DeprecatedModuleLoader(fullname=fullname,
                                                                                         orignial_loader=loader))

        return None

    def invalidate_caches(self):
        # TODO: invalidate internal caches if any
        pass

def register(include=(), exclude=()):
    finder = ModuleFinder(include=include,
                          exclude=exclude)

    sys.meta_path.insert(0, finder)