Open jayvdb opened 1 year ago
[ 49s] =================================== FAILURES =================================== [ 49s] _________________ TestUnpickleRaw.test__construct_simple_trie __________________ [ 49s] [ 49s] self = <test_unpickle.TestUnpickleRaw testMethod=test__construct_simple_trie> [ 49s] [ 49s] @skipIf(not ahocorasick.unicode, "Run only with unicode build") [ 49s] def test__construct_simple_trie(self): [ 49s] [ 49s] r""" [ 49s] trie for set {he, her, his, him, it} [ 49s] [ 49s] #0 -> [h #1 ] -> [e #2*] -> [r #3*] [ 49s] | \-> [i #4 ] -> [s #5*] [ 49s] | \-> [m #6*] [ 49s] | [ 49s] +--> [i #7 ] -> [t #8 ] [ 49s] """ [ 49s] values = ["HE", "HER", "HIS", "HIM", "IT"] [ 49s] [ 49s] node0 = self.create_raw_node(0, [('h', 1), ('i', 7)]) [ 49s] node1 = self.create_raw_node(0, [('e', 2), ('i', 4)]) [ 49s] node2 = self.create_raw_node(1, [('r', 3)]) # HE [ 49s] node3 = self.create_raw_node(1, []) # HER [ 49s] node4 = self.create_raw_node(0, [('s', 5), ('m', 6)]) [ 49s] node5 = self.create_raw_node(1, []) # HIS [ 49s] node6 = self.create_raw_node(1, []) # HIM [ 49s] node7 = self.create_raw_node(0, [('t', 8)]) [ 49s] node8 = self.create_raw_node(1, []) # IT [ 49s] [ 49s] self.count = 9 [ 49s] self.raw = node0 + node1 + node2 + node3 + node4 + node5 + node6 + node7 + node8 [ 49s] self.kind = ahocorasick.TRIE [ 49s] self.values = values [ 49s] self.word_count = 5 [ 49s] [ 49s] > A = self.create_automaton() [ 49s] [ 49s] tests/test_unpickle.py:166: [ 49s] _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ [ 49s] [ 49s] self = <test_unpickle.TestUnpickleRaw testMethod=test__construct_simple_trie> [ 49s] use_exact_raw = False [ 49s] [ 49s] def create_automaton(self, use_exact_raw=False): [ 49s] # alter values that were set in setUp [ 49s] if use_exact_raw: [ 49s] raw = self.raw [ 49s] else: [ 49s] raw = [self.create_raw_count(self.count) + self.raw] [ 49s] [ 49s] args = (raw, self.kind, self.store, self.key_type, [ 49s] self.word_count, self.longest, self.values); [ 49s] [ 49s] > return ahocorasick.Automaton(*args) [ 49s] E ValueError: Data truncated [parsing children of node #2]: chunk #0 @ offset 54, expected at least 840 bytes [ 49s] [ 49s] tests/test_unpickle.py:111: ValueError [ 49s] _ TestUnpickleRaw.test__construct_simple_trie__split_across_a_few_chunks_unicode _ [ 49s] [ 49s] self = <test_unpickle.TestUnpickleRaw testMethod=test__construct_simple_trie__split_across_a_few_chunks_unicode> [ 49s] [ 49s] @skipIf(not ahocorasick.unicode, "Run only with unicode build") [ 49s] def test__construct_simple_trie__split_across_a_few_chunks_unicode(self): [ 49s] [ 49s] r""" [ 49s] trie for set {he, her, his, him, it} [ 49s] [ 49s] #0 -> [h #1 ] -> [e #2*] -> [r #3*] [ 49s] | \-> [i #4 ] -> [s #5*] [ 49s] | \-> [m #6*] [ 49s] | [ 49s] +--> [i #7 ] -> [t #8 ] [ 49s] """ [ 49s] values = ["HE", "HER", "HIS", "HIM", "IT"] [ 49s] [ 49s] node0 = self.create_raw_node(0, [('h', 1), ('i', 7)]) [ 49s] node1 = self.create_raw_node(0, [('e', 2), ('i', 4)]) [ 49s] node2 = self.create_raw_node(1, [('r', 3)]) # HE [ 49s] node3 = self.create_raw_node(1, []) # HER [ 49s] node4 = self.create_raw_node(0, [('s', 5), ('m', 6)]) [ 49s] node5 = self.create_raw_node(1, []) # HIS [ 49s] node6 = self.create_raw_node(1, []) # HIM [ 49s] node7 = self.create_raw_node(0, [('t', 8)]) [ 49s] node8 = self.create_raw_node(1, []) # IT [ 49s] [ 49s] self.count = 9 [ 49s] self.raw = [ [ 49s] self.create_raw_count(2) + node0 + node1, [ 49s] self.create_raw_count(3) + node2 + node3 + node4, [ 49s] self.create_raw_count(1) + node5, [ 49s] self.create_raw_count(3) + node6 + node7 + node8 [ 49s] ] [ 49s] self.kind = ahocorasick.TRIE [ 49s] self.values = values [ 49s] self.word_count = 5 [ 49s] [ 49s] A = self.create_automaton(USE_EXACT_RAW) [ 49s] self.assertEqual(len(A), 5) [ 49s] > self.assertEqual(A.get("he"), "HE") [ 49s] E KeyError [ 49s] [ 49s] tests/test_unpickle.py:211: KeyError [ 49s] _______ TestUnpickleRaw.test__construct_simple_trie__wrong_index_unicode _______ [ 49s] [ 49s] self = <test_unpickle.TestUnpickleRaw testMethod=test__construct_simple_trie__wrong_index_unicode> [ 49s] [ 49s] @skipIf(not ahocorasick.unicode, "Run only with unicode build") [ 49s] def test__construct_simple_trie__wrong_index_unicode(self): [ 49s] """ [ 49s] trie for set {he} [ 49s] [ 49s] #0 -> [h #1*] -> [e #2*] [ 49s] """ [ 49s] [ 49s] node0 = self.create_raw_node(0, [('h', 1)]) [ 49s] node1 = self.create_raw_node(1, [('e', 2)]) # expect python value [ 49s] node2 = self.create_raw_node(1, []) # also python value [ 49s] [ 49s] self.count = 3 [ 49s] self.raw = node0 + node1 + node2 [ 49s] self.kind = ahocorasick.TRIE [ 49s] self.values = ["HE"] # but we provide a too short collection [ 49s] self.word_count = 2 [ 49s] [ 49s] with self.assertRaises(IndexError): [ 49s] > self.create_automaton() [ 49s] E AssertionError: IndexError not raised [ 49s] [ 49s] tests/test_unpickle.py:257: AssertionError [ 49s] _________________ TestUnpickleRaw.test__malicious_fail_pointer _________________ [ 49s] [ 49s] self = <test_unpickle.TestUnpickleRaw testMethod=test__malicious_fail_pointer> [ 49s] [ 49s] def test__malicious_fail_pointer(self): [ 49s] """ [ 49s] trie with just one node [ 49s] """ [ 49s] [ 49s] builder = self.create_node_builder(0, []) [ 49s] builder.fail = 42 [ 49s] [ 49s] self.count = 1 [ 49s] self.raw = builder.dump() [ 49s] self.kind = ahocorasick.TRIE [ 49s] [ 49s] with self.assertRaisesRegex(ValueError, "Node #0 malformed: the fail link points to.*"): [ 49s] > self.create_automaton() [ 49s] [ 49s] tests/test_unpickle.py:354: [ 49s] _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ [ 49s] [ 49s] def create_automaton(self, use_exact_raw=False): [ 49s] # alter values that were set in setUp [ 49s] if use_exact_raw: [ 49s] raw = self.raw [ 49s] else: [ 49s] raw = [self.create_raw_count(self.count) + self.raw] [ 49s] [ 49s] args = (raw, self.kind, self.store, self.key_type, [ 49s] self.word_count, self.longest, self.values); [ 49s] [ 49s] > return ahocorasick.Automaton(*args) [ 49s] E IndexError: list index out of range [ 49s] [ 49s] tests/test_unpickle.py:111: IndexError [ 49s] _____________ TestUnpickleRaw.test__malicious_next_pointer_unicode _____________ [ 49s] [ 49s] self = <test_unpickle.TestUnpickleRaw testMethod=test__malicious_next_pointer_unicode> [ 49s] [ 49s] @skipIf(not ahocorasick.unicode, "Run only with unicode build") [ 49s] def test__malicious_next_pointer_unicode(self): [ 49s] """ [ 49s] #0 -> [? #1 ] [ 49s] """ [ 49s] [ 49s] node0 = self.create_raw_node(0, [('?', 1)]) [ 49s] node1 = self.create_raw_node(0, [('x', 16)]) # the second node point to non-existent node [ 49s] [ 49s] self.count = 2 [ 49s] self.raw = node0 + node1 [ 49s] self.kind = ahocorasick.TRIE [ 49s] [ 49s] with self.assertRaisesRegex(ValueError, "Node #1 malformed: next link #0 points to.*"): [ 49s] > self.create_automaton() [ 49s] [ 49s] tests/test_unpickle.py:323: [ 49s] _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ [ 49s] [ 49s] def create_automaton(self, use_exact_raw=False): [ 49s] # alter values that were set in setUp [ 49s] if use_exact_raw: [ 49s] raw = self.raw [ 49s] else: [ 49s] raw = [self.create_raw_count(self.count) + self.raw] [ 49s] [ 49s] args = (raw, self.kind, self.store, self.key_type, [ 49s] self.word_count, self.longest, self.values); [ 49s] [ 49s] > return ahocorasick.Automaton(*args) [ 49s] E IndexError: list index out of range [ 49s] [ 49s] tests/test_unpickle.py:111: IndexError [ 49s] _________________ TestUnpickleRaw.test__truncated_raw__case_2 __________________ [ 49s] [ 49s] self = <test_unpickle.TestUnpickleRaw testMethod=test__truncated_raw__case_2> [ 49s] [ 49s] def test__truncated_raw__case_2(self): [ 49s] """ [ 49s] trie for set {he} [ 49s] [ 49s] #0 -> [h #1 ] -> [e #2*] [ 49s] """ [ 49s] [ 49s] node0 = self.create_raw_node(0, [('h', 1)]) [ 49s] node1 = self.create_raw_node(0, [('e', 2)]) [ 49s] node2 = self.create_raw_node(1, []) [ 49s] raw = node0 + node1 + node2 [ 49s] [ 49s] self.count = 3 [ 49s] self.kind = ahocorasick.TRIE [ 49s] [ 49s] for length in range(len(raw)): [ 49s] self.raw = raw[:length] # truncate data and expect fail [ 49s] with self.assertRaisesRegex(ValueError, "Data truncated.*"): [ 49s] > self.create_automaton() [ 49s] [ 49s] tests/test_unpickle.py:307: [ 49s] _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ [ 49s] [ 49s] def create_automaton(self, use_exact_raw=False): [ 49s] # alter values that were set in setUp [ 49s] if use_exact_raw: [ 49s] raw = self.raw [ 49s] else: [ 49s] raw = [self.create_raw_count(self.count) + self.raw] [ 49s] [ 49s] args = (raw, self.kind, self.store, self.key_type, [ 49s] self.word_count, self.longest, self.values); [ 49s] [ 49s] > return ahocorasick.Automaton(*args) [ 49s] E IndexError: list index out of range [ 49s] [ 49s] tests/test_unpickle.py:111: IndexError [ 49s] ______________________ TestUnpickleRaw.test__values_leaks ______________________ [ 49s] [ 49s] self = <test_unpickle.TestUnpickleRaw testMethod=test__values_leaks> [ 49s] [ 49s] def test__values_leaks(self): [ 49s] [ 49s] # create not connected nodes, but each hold a value [ 49s] good_nodes = 1000 [ 49s] raw = b'' [ 49s] values = [] [ 49s] for i in range(good_nodes): [ 49s] raw += self.create_raw_node(1, []) [ 49s] values.append(tuple("node %d" % i)) [ 49s] [ 49s] # create the last node that will cause error -- malformed next pointer [ 49s] raw += self.create_raw_node(1, [('_', 10000)]) [ 49s] values.append(tuple("never reached")) [ 49s] [ 49s] self.count = good_nodes + 1 [ 49s] self.raw = raw [ 49s] self.kind = ahocorasick.TRIE [ 49s] self.values = values [ 49s] [ 49s] with self.assertRaises(ValueError): [ 49s] > self.create_automaton() [ 49s] E AssertionError: ValueError not raised [ 49s] [ 49s] tests/test_unpickle.py:376: AssertionError
@jayvdb Thanks... I wonder if we really support 32 bits at all... do you have some specifics on your OS/Arch/compiler environment?