kombiHQ / kombi

Kombi is focused in describing and performing tasks
MIT License
2 stars 4 forks source link

Inline crawles loaded with task holders #72

Open paulondc opened 4 years ago

paulondc commented 4 years ago

Currently inline crawlers are only available during the runtime that has loaded the task holder configuration instead, we want to make them available for all processes during the execution of task holders.

asvbarbosa commented 2 years ago

Summary

Kombi's inline crawlers is now working on my implementation.

Modules updated:

Disclosure

Please note that this change was made on my own without any of the contributors help / revision, therefore use it at your own risk.

Config Yaml file inline crawlers example:

crawlers:
  clientCam < clientCamera: "*_{sequence:S}_{shot}_*v{clientVersion}.*"

An error at line 442 in the Crawler module is raised because after the taskHolder is dispatched to the renderfarm, the taskHolder has to retrieve the custom crawler type "clientCam" defined in the config file from the Crawler class variable "__registeredTypes", which get's re-defined and therefore the inline custom crawler type is left out.

kombi.Crawler.Crawler

[ update ]

Adds a new function to create custom Crawlers. The return class has extra 2 vars to make possible to recreate custom Crawler.


def create_custom_crawler(varExtractionExpression, BaseCrawler, baseCrawlerType=None):
'''
Return a custom crawler class.
:param varExtractionExpression: str // String value to be used as a pattern to check against the
                                    var 'baseName'.
:param BaseCrawler: class // Registered Crawler class for the new custom class to inherit from.
:param baseCrawlerType: str // This is the string type of the BaseCrawler. This will help on
                                re-creating custom Crawlers defined in the config file.

:return: _CustomCrawler
'''
class _CustomCrawler(BaseCrawler):
    namePattern = varExtractionExpression

    def __init__(self, *args, **kwargs):
        super(_CustomCrawler, self).__init__(*args, **kwargs)

        # assigning variables
        self.assignVars(
            VarExtractor(
                self.var('baseName'),
                self.namePattern
            )
        )

        self.setVar("customType", True)
        self.setVar("baseType", baseCrawlerType)  # new

    @classmethod
    def test(cls, data, parentCrawler=None):
        # perform the tests for the base classes
        if super(_CustomCrawler, cls).test(data, parentCrawler):
            return VarExtractor(data.baseName(), cls.namePattern).match()

        return False

return _CustomCrawler
> Modified method "createFromJson" and "toJson" to support the new var('baseType').
on "toJson", please note that I added "namePattern" to the crawlerContents variable.
```python
    def toJson(self):
        """
        Serialize the crawler to json (it can be recovered later using fromJson).
        """
        crawlerContents = {
            "vars": {},
            "contextVarNames": [],
            "tags": {},
            "namePattern":getattr(self,"namePattern") if hasattr(self, "namePattern") else None,
            "children": None,
            "initializationData": self.initializationData()
        }

        # serializing the children as well when caching is enabled
        if not self.isLeaf() and self.__childrenCache is not None:
            crawlerContents['children'] = []
            for child in self.__childrenCache:
                crawlerContents['children'].append(child.toJson())

        for varName in self.varNames():
            crawlerContents['vars'][varName] = self.var(varName)

        assert 'type' in crawlerContents['vars'], \
            "Missing type var, cannot serialize crawler (perhaps it was not created through Crawler.create)."

        for varName in self.contextVarNames():
            crawlerContents['contextVarNames'].append(varName)

        for tagName in self.tagNames():
            crawlerContents['tags'][tagName] = self.tag(tagName)

        return json.dumps(
            crawlerContents,
            indent=4,
            separators=(',', ': ')
        )

    @staticmethod
    def createFromJson(jsonContents):
        """
        Create a crawler based on the jsonContents (serialized via toJson).
        """
        contents = json.loads(jsonContents)
        crawlerType = contents["vars"]["type"]
        initializationData = contents['initializationData']

        # creating crawler
        if contents["vars"].get("customType") and contents.get("namePattern"):
            baseCrawlerType = contents["vars"].get("baseType") or crawlerType
            baseCrawler = Crawler.__registeredTypes[baseCrawlerType]
            customClass = create_custom_crawler(contents['namePattern'], baseCrawler, baseCrawlerType)
            Crawler.register(crawlerType, customClass)

            crawler = customClass(initializationData)
        else:
            crawler = Crawler.__registeredTypes[crawlerType](initializationData)

instead of the original, which errors:

    @staticmethod
    def createFromJson(jsonContents):
        """
        Create a crawler based on the jsonContents (serialized via toJson).
        """
        contents = json.loads(jsonContents)
        crawlerType = contents["vars"]["type"]
        initializationData = contents['initializationData']

        # creating crawler
        crawler = Crawler.__registeredTypes[crawlerType](initializationData)

kombi.TaskHolder.Loader.PythonLoader

[ update ]

    @classmethod
    def __parseInlineCrawlers(cls, contents):
        """
        Parse the custom inline crawlers defined in the contents.
        """
        if 'crawlers' in contents:
            # vars checking
            if not isinstance(contents['crawlers'], dict):
                raise PythonLoaderContentError('Expecting a list of vars!')

            for crawlerKey, varExtractorExpression in contents['crawlers'].items():
                parts = crawlerKey.split('<')
                baseType = None
                BaseCrawler = Crawler
                if len(parts) > 1:
                    baseType = parts[1].strip()
                    BaseCrawler = Crawler.registeredType(parts[1].strip())

                Crawler.register(
                    parts[0].strip(),
                    cls.__customInlineCrawler(varExtractorExpression, BaseCrawler, baseType)
                )

    @classmethod
    def __customInlineCrawler(cls, varExtractionExpression, BaseCrawler, baseType=None):
        """
        Return a custom crawler class.
        """
        from ...Crawler.Crawler import create_custom_crawler
        return create_custom_crawler(varExtractionExpression, BaseCrawler, baseType)

instead of the original:

    @classmethod
    def __parseInlineCrawlers(cls, contents):
        """
        Parse the custom inline crawlers defined in the contents.
        """
        if 'crawlers' in contents:
            # vars checking
            if not isinstance(contents['crawlers'], dict):
                raise PythonLoaderContentError('Expecting a list of vars!')

            for crawlerKey, varExtractorExpression in contents['crawlers'].items():
                parts = crawlerKey.split('<')
                BaseCrawler = Crawler
                if parts > 1:
                    BaseCrawler = Crawler.registeredType(parts[1].strip())

                Crawler.register(
                    parts[0].strip(),
                    cls.__customInlineCrawler(varExtractorExpression, BaseCrawler)
                )

    @classmethod
    def __customInlineCrawler(cls, varExtractionExpression, BaseCrawler):
        """
        Return a custom crawler class.
        """
        class _CustomCrawler(BaseCrawler):
            namePattern = varExtractionExpression

            def __init__(self, *args, **kwargs):
                super(_CustomCrawler, self).__init__(*args, **kwargs)

                # assigning variables
                self.assignVars(
                    VarExtractor(
                        self.var('baseName'),
                        self.namePattern
                    )
                )

            @classmethod
            def test(cls, data, parentCrawler=None):
                # perform the tests for the base classes
                if super(_CustomCrawler, cls).test(data, parentCrawler):
                    return VarExtractor(data.baseName(), cls.namePattern).match()

                return False

        return _CustomCrawler