Open paulondc opened 4 years ago
Kombi's inline crawlers is now working on my implementation.
Modules updated:
Please note that this change was made on my own without any of the contributors help / revision, therefore use it at your own risk.
crawlers:
clientCam < clientCamera: "*_{sequence:S}_{shot}_*v{clientVersion}.*"
An error at line 442 in the Crawler module is raised because after the taskHolder is dispatched to the renderfarm, the taskHolder has to retrieve the custom crawler type "clientCam" defined in the config file from the Crawler class variable "__registeredTypes", which get's re-defined and therefore the inline custom crawler type is left out.
Adds a new function to create custom Crawlers. The return class has extra 2 vars to make possible to recreate custom Crawler.
def create_custom_crawler(varExtractionExpression, BaseCrawler, baseCrawlerType=None): ''' Return a custom crawler class.
:param varExtractionExpression: str // String value to be used as a pattern to check against the
var 'baseName'.
:param BaseCrawler: class // Registered Crawler class for the new custom class to inherit from.
:param baseCrawlerType: str // This is the string type of the BaseCrawler. This will help on
re-creating custom Crawlers defined in the config file.
:return: _CustomCrawler
'''
class _CustomCrawler(BaseCrawler):
namePattern = varExtractionExpression
def __init__(self, *args, **kwargs):
super(_CustomCrawler, self).__init__(*args, **kwargs)
# assigning variables
self.assignVars(
VarExtractor(
self.var('baseName'),
self.namePattern
)
)
self.setVar("customType", True)
self.setVar("baseType", baseCrawlerType) # new
@classmethod
def test(cls, data, parentCrawler=None):
# perform the tests for the base classes
if super(_CustomCrawler, cls).test(data, parentCrawler):
return VarExtractor(data.baseName(), cls.namePattern).match()
return False
return _CustomCrawler
> Modified method "createFromJson" and "toJson" to support the new var('baseType').
on "toJson", please note that I added "namePattern" to the crawlerContents variable.
```python
def toJson(self):
"""
Serialize the crawler to json (it can be recovered later using fromJson).
"""
crawlerContents = {
"vars": {},
"contextVarNames": [],
"tags": {},
"namePattern":getattr(self,"namePattern") if hasattr(self, "namePattern") else None,
"children": None,
"initializationData": self.initializationData()
}
# serializing the children as well when caching is enabled
if not self.isLeaf() and self.__childrenCache is not None:
crawlerContents['children'] = []
for child in self.__childrenCache:
crawlerContents['children'].append(child.toJson())
for varName in self.varNames():
crawlerContents['vars'][varName] = self.var(varName)
assert 'type' in crawlerContents['vars'], \
"Missing type var, cannot serialize crawler (perhaps it was not created through Crawler.create)."
for varName in self.contextVarNames():
crawlerContents['contextVarNames'].append(varName)
for tagName in self.tagNames():
crawlerContents['tags'][tagName] = self.tag(tagName)
return json.dumps(
crawlerContents,
indent=4,
separators=(',', ': ')
)
@staticmethod
def createFromJson(jsonContents):
"""
Create a crawler based on the jsonContents (serialized via toJson).
"""
contents = json.loads(jsonContents)
crawlerType = contents["vars"]["type"]
initializationData = contents['initializationData']
# creating crawler
if contents["vars"].get("customType") and contents.get("namePattern"):
baseCrawlerType = contents["vars"].get("baseType") or crawlerType
baseCrawler = Crawler.__registeredTypes[baseCrawlerType]
customClass = create_custom_crawler(contents['namePattern'], baseCrawler, baseCrawlerType)
Crawler.register(crawlerType, customClass)
crawler = customClass(initializationData)
else:
crawler = Crawler.__registeredTypes[crawlerType](initializationData)
instead of the original, which errors:
@staticmethod
def createFromJson(jsonContents):
"""
Create a crawler based on the jsonContents (serialized via toJson).
"""
contents = json.loads(jsonContents)
crawlerType = contents["vars"]["type"]
initializationData = contents['initializationData']
# creating crawler
crawler = Crawler.__registeredTypes[crawlerType](initializationData)
@classmethod
def __parseInlineCrawlers(cls, contents):
"""
Parse the custom inline crawlers defined in the contents.
"""
if 'crawlers' in contents:
# vars checking
if not isinstance(contents['crawlers'], dict):
raise PythonLoaderContentError('Expecting a list of vars!')
for crawlerKey, varExtractorExpression in contents['crawlers'].items():
parts = crawlerKey.split('<')
baseType = None
BaseCrawler = Crawler
if len(parts) > 1:
baseType = parts[1].strip()
BaseCrawler = Crawler.registeredType(parts[1].strip())
Crawler.register(
parts[0].strip(),
cls.__customInlineCrawler(varExtractorExpression, BaseCrawler, baseType)
)
@classmethod
def __customInlineCrawler(cls, varExtractionExpression, BaseCrawler, baseType=None):
"""
Return a custom crawler class.
"""
from ...Crawler.Crawler import create_custom_crawler
return create_custom_crawler(varExtractionExpression, BaseCrawler, baseType)
instead of the original:
@classmethod
def __parseInlineCrawlers(cls, contents):
"""
Parse the custom inline crawlers defined in the contents.
"""
if 'crawlers' in contents:
# vars checking
if not isinstance(contents['crawlers'], dict):
raise PythonLoaderContentError('Expecting a list of vars!')
for crawlerKey, varExtractorExpression in contents['crawlers'].items():
parts = crawlerKey.split('<')
BaseCrawler = Crawler
if parts > 1:
BaseCrawler = Crawler.registeredType(parts[1].strip())
Crawler.register(
parts[0].strip(),
cls.__customInlineCrawler(varExtractorExpression, BaseCrawler)
)
@classmethod
def __customInlineCrawler(cls, varExtractionExpression, BaseCrawler):
"""
Return a custom crawler class.
"""
class _CustomCrawler(BaseCrawler):
namePattern = varExtractionExpression
def __init__(self, *args, **kwargs):
super(_CustomCrawler, self).__init__(*args, **kwargs)
# assigning variables
self.assignVars(
VarExtractor(
self.var('baseName'),
self.namePattern
)
)
@classmethod
def test(cls, data, parentCrawler=None):
# perform the tests for the base classes
if super(_CustomCrawler, cls).test(data, parentCrawler):
return VarExtractor(data.baseName(), cls.namePattern).match()
return False
return _CustomCrawler
Currently inline crawlers are only available during the runtime that has loaded the task holder configuration instead, we want to make them available for all processes during the execution of task holders.