Open rafikg opened 2 years ago
from typing import Optional, Any, List import hyperscan def on_match(id: int, start: int, end: int, flags: int, context: Optional[Any] = None) -> Optional[bool]: context['results'].append((id, start, end)) return 0 db = hyperscan.Database() patterns = ( # expression, id, flags (br'O+M', 0, hyperscan.HS_FLAG_CASELESS|hyperscan.HS_FLAG_SOM_LEFTMOST), ) expressions, ids, flags = zip(*patterns) db.compile( expressions=expressions, ids=ids, elements=len(patterns), flags=flags ) lines = ['Om', 'OOm', 'oom', 'sroom', 'communication', 'surveillance'] context = {'results': []} text = str.encode("\n".join(lines)) print(text) db.scan(text, match_event_handler=on_match, context=context) for result in context['results']: print(result) (0, 0, 2) ->Om (0, 3, 6) -> OOm (0, 7, 10) -> oom (0, 13, 16) ->oom (in sroom) (0, 18, 20)->om (in communication)
with re.findall()
re.findall()
re.findall(rb'O+M', text, flags=re.IGNORECASE) [b'Om', b'OOm', b'oom', b'oom']
with
re.findall()