crdoconnor / xeger

Library to generate random strings from regular expressions.
Other
117 stars 21 forks source link

Regexp raises a KeyError #7

Open horvatha opened 7 years ago

horvatha commented 7 years ago

The regexp below works with rstr.xeger but with xeger it raises "KeyError: CATEGORY_DIGIT".

In [44]: re = r'(1[0-2]|0[1-9])(:[0-5]\d){2} (A|P)M'

In [45]: rstr.xeger(re)
Out[45]: '05:55:17 PM'

In [46]: xeger.Xeger().xeger(re)
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-46-28a9a27bb76a> in <module>()
----> 1 xeger.Xeger().xeger(re)

~/virtualenvs/jupyter_scipy_pandas/lib/python3.5/site-packages/xeger/xeger.py in xeger(self, string_or_regex)
     80 
     81         parsed = re.sre_parse.parse(pattern)
---> 82         result = self._build_string(parsed)
     83         self._cache.clear()
     84         return result

~/virtualenvs/jupyter_scipy_pandas/lib/python3.5/site-packages/xeger/xeger.py in _build_string(self, parsed)
     87         newstr = []
     88         for state in parsed:
---> 89             newstr.append(self._handle_state(state))
     90         return ''.join(newstr)
     91 

~/virtualenvs/jupyter_scipy_pandas/lib/python3.5/site-packages/xeger/xeger.py in _handle_state(self, state)
     92     def _handle_state(self, state):
     93         opcode, value = state
---> 94         return self._cases[str(opcode).lower()](value)
     95 
     96     def _handle_group(self, value):

~/virtualenvs/jupyter_scipy_pandas/lib/python3.5/site-packages/xeger/xeger.py in <lambda>(x)
     69             "groupref": lambda x: self._cache[x],
     70             'min_repeat': lambda x: self._handle_repeat(*x),
---> 71             'max_repeat': lambda x: self._handle_repeat(*x),
     72             'negate': lambda x: [False],
     73         }

~/virtualenvs/jupyter_scipy_pandas/lib/python3.5/site-packages/xeger/xeger.py in _handle_repeat(self, start_range, end_range, value)
    113         times = randint(start_range, end_range)
    114         for i in xrange(times):
--> 115             result.append(''.join(self._handle_state(i) for i in value))
    116         return ''.join(result)

~/virtualenvs/jupyter_scipy_pandas/lib/python3.5/site-packages/xeger/xeger.py in <genexpr>(.0)
    113         times = randint(start_range, end_range)
    114         for i in xrange(times):
--> 115             result.append(''.join(self._handle_state(i) for i in value))
    116         return ''.join(result)

~/virtualenvs/jupyter_scipy_pandas/lib/python3.5/site-packages/xeger/xeger.py in _handle_state(self, state)
     92     def _handle_state(self, state):
     93         opcode, value = state
---> 94         return self._cases[str(opcode).lower()](value)
     95 
     96     def _handle_group(self, value):

~/virtualenvs/jupyter_scipy_pandas/lib/python3.5/site-packages/xeger/xeger.py in <lambda>(x)
     64             'branch':
     65                 lambda x: ''.join(self._handle_state(i) for i in choice(x[1])),
---> 66             "subpattern": lambda x: self._handle_group(x),
     67             "assert": lambda x: ''.join(self._handle_state(i) for i in x[1]),
     68             "assert_not": lambda x: '',

~/virtualenvs/jupyter_scipy_pandas/lib/python3.5/site-packages/xeger/xeger.py in _handle_group(self, value)
     95 
     96     def _handle_group(self, value):
---> 97         result = ''.join(self._handle_state(i) for i in value[1])
     98         if value[0]:
     99             self._cache[value[0]] = result

~/virtualenvs/jupyter_scipy_pandas/lib/python3.5/site-packages/xeger/xeger.py in <genexpr>(.0)
     95 
     96     def _handle_group(self, value):
---> 97         result = ''.join(self._handle_state(i) for i in value[1])
     98         if value[0]:
     99             self._cache[value[0]] = result

~/virtualenvs/jupyter_scipy_pandas/lib/python3.5/site-packages/xeger/xeger.py in _handle_state(self, state)
     92     def _handle_state(self, state):
     93         opcode, value = state
---> 94         return self._cases[str(opcode).lower()](value)
     95 
     96     def _handle_group(self, value):

~/virtualenvs/jupyter_scipy_pandas/lib/python3.5/site-packages/xeger/xeger.py in <lambda>(x)
     58                 lambda x: choice(string.printable.replace(unichr(x), '')),
     59             "at": lambda x: '',
---> 60             "in": lambda x: self._handle_in(x),
     61             "any": lambda x: choice(string.printable.replace('\n', '')),
     62             "range": lambda x: [unichr(i) for i in xrange(x[0], x[1] + 1)],

~/virtualenvs/jupyter_scipy_pandas/lib/python3.5/site-packages/xeger/xeger.py in _handle_in(self, value)
    101 
    102     def _handle_in(self, value):
--> 103         candidates = list(itertools.chain(*(self._handle_state(i) for i in value)))
    104         if candidates[0] is False:
    105             candidates = set(string.printable).difference(candidates[1:])

~/virtualenvs/jupyter_scipy_pandas/lib/python3.5/site-packages/xeger/xeger.py in <genexpr>(.0)
    101 
    102     def _handle_in(self, value):
--> 103         candidates = list(itertools.chain(*(self._handle_state(i) for i in value)))
    104         if candidates[0] is False:
    105             candidates = set(string.printable).difference(candidates[1:])

~/virtualenvs/jupyter_scipy_pandas/lib/python3.5/site-packages/xeger/xeger.py in _handle_state(self, state)
     92     def _handle_state(self, state):
     93         opcode, value = state
---> 94         return self._cases[str(opcode).lower()](value)
     95 
     96     def _handle_group(self, value):

~/virtualenvs/jupyter_scipy_pandas/lib/python3.5/site-packages/xeger/xeger.py in <lambda>(x)
     61             "any": lambda x: choice(string.printable.replace('\n', '')),
     62             "range": lambda x: [unichr(i) for i in xrange(x[0], x[1] + 1)],
---> 63             "category": lambda x: self._categories[x](),
     64             'branch':
     65                 lambda x: ''.join(self._handle_state(i) for i in choice(x[1])),

KeyError: CATEGORY_DIGIT
abellosovic commented 6 years ago

Same to me, the problem is \d, you can use workaround

puittenbroek commented 6 years ago

What is the workaround exactly?

horvatha commented 6 years ago

@puittenbroek Write [0-9] instead of \d.

poussik commented 5 years ago

Solved in 0.3.4 after this merge: https://github.com/crdoconnor/xeger/pull/9

asm0dey commented 4 years ago

Still happens for me time to time with following code:

In [14]: x.xeger("((self\\.)?)?([a-zA-Z0-9_]+)\\[(['\"]?)([a-zA-Z0-9_]+)\\4\] if (['\"]?)\\5\\6 in \\1\\3 else (.*),?")                                                                                                                      
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-14-5a512d5d9301> in <module>
----> 1 x.xeger("((self\\.)?)?([a-zA-Z0-9_]+)\\[(['\"]?)([a-zA-Z0-9_]+)\\4\] if (['\"]?)\\5\\6 in \\1\\3 else (.*),?")

/tmp/xeger/.venv/lib/python3.8/site-packages/xeger/xeger.py in xeger(self, string_or_regex)
     86 
     87         parsed = re.sre_parse.parse(pattern)
---> 88         result = self._build_string(parsed)
     89         self._cache.clear()
     90         return result

/tmp/xeger/.venv/lib/python3.8/site-packages/xeger/xeger.py in _build_string(self, parsed)
    106         newstr = []
    107         for state in parsed:
--> 108             newstr.append(self._handle_state(state))
    109         return ''.join(newstr)
    110 

/tmp/xeger/.venv/lib/python3.8/site-packages/xeger/xeger.py in _handle_state(self, state)
    111     def _handle_state(self, state):
    112         opcode, value = state
--> 113         return self._cases[str(opcode).lower()](value)
    114 
    115     def _handle_group(self, value):

/tmp/xeger/.venv/lib/python3.8/site-packages/xeger/xeger.py in <lambda>(x)
     73             "assert": lambda x: ''.join(self._handle_state(i) for i in x[1]),
     74             "assert_not": lambda x: '',
---> 75             "groupref": lambda x: self._cache[x],
     76             'min_repeat': lambda x: self._handle_repeat(*x),
     77             'max_repeat': lambda x: self._handle_repeat(*x),

KeyError: 1