WojciechMula / pyahocorasick

Python module (C extension and plain python) implementing Aho-Corasick algorithm
BSD 3-Clause "New" or "Revised" License
914 stars 122 forks source link

unpickle test failures on i586 #178

Open jayvdb opened 1 year ago

jayvdb commented 1 year ago
[   49s] =================================== FAILURES ===================================
[   49s] _________________ TestUnpickleRaw.test__construct_simple_trie __________________
[   49s] 
[   49s] self = <test_unpickle.TestUnpickleRaw testMethod=test__construct_simple_trie>
[   49s] 
[   49s]     @skipIf(not ahocorasick.unicode, "Run only with unicode build")
[   49s]     def test__construct_simple_trie(self):
[   49s]     
[   49s]         r"""
[   49s]         trie for set {he, her, his, him, it}
[   49s]     
[   49s]         #0 -> [h #1 ] -> [e #2*] -> [r #3*]
[   49s]          |           \-> [i #4 ] -> [s #5*]
[   49s]          |                      \-> [m #6*]
[   49s]          |
[   49s]          +--> [i #7 ] -> [t #8 ]
[   49s]         """
[   49s]         values = ["HE", "HER", "HIS", "HIM", "IT"]
[   49s]     
[   49s]         node0 = self.create_raw_node(0, [('h', 1), ('i', 7)])
[   49s]         node1 = self.create_raw_node(0, [('e', 2), ('i', 4)])
[   49s]         node2 = self.create_raw_node(1, [('r', 3)])  # HE
[   49s]         node3 = self.create_raw_node(1, [])  # HER
[   49s]         node4 = self.create_raw_node(0, [('s', 5), ('m', 6)])
[   49s]         node5 = self.create_raw_node(1, [])  # HIS
[   49s]         node6 = self.create_raw_node(1, [])  # HIM
[   49s]         node7 = self.create_raw_node(0, [('t', 8)])
[   49s]         node8 = self.create_raw_node(1, [])  # IT
[   49s]     
[   49s]         self.count = 9
[   49s]         self.raw = node0 + node1 + node2 + node3 + node4 + node5 + node6 + node7 + node8
[   49s]         self.kind = ahocorasick.TRIE
[   49s]         self.values = values
[   49s]         self.word_count = 5
[   49s]     
[   49s] >       A = self.create_automaton()
[   49s] 
[   49s] tests/test_unpickle.py:166: 
[   49s] _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
[   49s] 
[   49s] self = <test_unpickle.TestUnpickleRaw testMethod=test__construct_simple_trie>
[   49s] use_exact_raw = False
[   49s] 
[   49s]     def create_automaton(self, use_exact_raw=False):
[   49s]         # alter values that were set in setUp
[   49s]         if use_exact_raw:
[   49s]             raw = self.raw
[   49s]         else:
[   49s]             raw = [self.create_raw_count(self.count) + self.raw]
[   49s]     
[   49s]         args = (raw, self.kind, self.store, self.key_type,
[   49s]                 self.word_count, self.longest, self.values);
[   49s]     
[   49s] >       return ahocorasick.Automaton(*args)
[   49s] E       ValueError: Data truncated [parsing children of node #2]: chunk #0 @ offset 54, expected at least 840 bytes
[   49s] 
[   49s] tests/test_unpickle.py:111: ValueError
[   49s] _ TestUnpickleRaw.test__construct_simple_trie__split_across_a_few_chunks_unicode _
[   49s] 
[   49s] self = <test_unpickle.TestUnpickleRaw testMethod=test__construct_simple_trie__split_across_a_few_chunks_unicode>
[   49s] 
[   49s]     @skipIf(not ahocorasick.unicode, "Run only with unicode build")
[   49s]     def test__construct_simple_trie__split_across_a_few_chunks_unicode(self):
[   49s]     
[   49s]         r"""
[   49s]         trie for set {he, her, his, him, it}
[   49s]     
[   49s]         #0 -> [h #1 ] -> [e #2*] -> [r #3*]
[   49s]          |           \-> [i #4 ] -> [s #5*]
[   49s]          |                      \-> [m #6*]
[   49s]          |
[   49s]          +--> [i #7 ] -> [t #8 ]
[   49s]         """
[   49s]         values = ["HE", "HER", "HIS", "HIM", "IT"]
[   49s]     
[   49s]         node0 = self.create_raw_node(0, [('h', 1), ('i', 7)])
[   49s]         node1 = self.create_raw_node(0, [('e', 2), ('i', 4)])
[   49s]         node2 = self.create_raw_node(1, [('r', 3)])  # HE
[   49s]         node3 = self.create_raw_node(1, [])  # HER
[   49s]         node4 = self.create_raw_node(0, [('s', 5), ('m', 6)])
[   49s]         node5 = self.create_raw_node(1, [])  # HIS
[   49s]         node6 = self.create_raw_node(1, [])  # HIM
[   49s]         node7 = self.create_raw_node(0, [('t', 8)])
[   49s]         node8 = self.create_raw_node(1, [])  # IT
[   49s]     
[   49s]         self.count = 9
[   49s]         self.raw = [
[   49s]             self.create_raw_count(2) + node0 + node1,
[   49s]             self.create_raw_count(3) + node2 + node3 + node4,
[   49s]             self.create_raw_count(1) + node5,
[   49s]             self.create_raw_count(3) + node6 + node7 + node8
[   49s]         ]
[   49s]         self.kind = ahocorasick.TRIE
[   49s]         self.values = values
[   49s]         self.word_count = 5
[   49s]     
[   49s]         A = self.create_automaton(USE_EXACT_RAW)
[   49s]         self.assertEqual(len(A), 5)
[   49s] >       self.assertEqual(A.get("he"), "HE")
[   49s] E       KeyError
[   49s] 
[   49s] tests/test_unpickle.py:211: KeyError
[   49s] _______ TestUnpickleRaw.test__construct_simple_trie__wrong_index_unicode _______
[   49s] 
[   49s] self = <test_unpickle.TestUnpickleRaw testMethod=test__construct_simple_trie__wrong_index_unicode>
[   49s] 
[   49s]     @skipIf(not ahocorasick.unicode, "Run only with unicode build")
[   49s]     def test__construct_simple_trie__wrong_index_unicode(self):
[   49s]         """
[   49s]         trie for set {he}
[   49s]     
[   49s]         #0 -> [h #1*] -> [e #2*]
[   49s]         """
[   49s]     
[   49s]         node0 = self.create_raw_node(0, [('h', 1)])
[   49s]         node1 = self.create_raw_node(1, [('e', 2)])  # expect python value
[   49s]         node2 = self.create_raw_node(1, [])  # also python value
[   49s]     
[   49s]         self.count = 3
[   49s]         self.raw = node0 + node1 + node2
[   49s]         self.kind = ahocorasick.TRIE
[   49s]         self.values = ["HE"]  # but we provide a too short collection
[   49s]         self.word_count = 2
[   49s]     
[   49s]         with self.assertRaises(IndexError):
[   49s] >           self.create_automaton()
[   49s] E           AssertionError: IndexError not raised
[   49s] 
[   49s] tests/test_unpickle.py:257: AssertionError
[   49s] _________________ TestUnpickleRaw.test__malicious_fail_pointer _________________
[   49s] 
[   49s] self = <test_unpickle.TestUnpickleRaw testMethod=test__malicious_fail_pointer>
[   49s] 
[   49s]     def test__malicious_fail_pointer(self):
[   49s]         """
[   49s]         trie with just one node
[   49s]         """
[   49s]     
[   49s]         builder = self.create_node_builder(0, [])
[   49s]         builder.fail = 42
[   49s]     
[   49s]         self.count = 1
[   49s]         self.raw = builder.dump()
[   49s]         self.kind = ahocorasick.TRIE
[   49s]     
[   49s]         with self.assertRaisesRegex(ValueError, "Node #0 malformed: the fail link points to.*"):
[   49s] >           self.create_automaton()
[   49s] 
[   49s] tests/test_unpickle.py:354: 
[   49s] _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
[   49s] 
[   49s]     def create_automaton(self, use_exact_raw=False):
[   49s]         # alter values that were set in setUp
[   49s]         if use_exact_raw:
[   49s]             raw = self.raw
[   49s]         else:
[   49s]             raw = [self.create_raw_count(self.count) + self.raw]
[   49s]     
[   49s]         args = (raw, self.kind, self.store, self.key_type,
[   49s]                 self.word_count, self.longest, self.values);
[   49s]     
[   49s] >       return ahocorasick.Automaton(*args)
[   49s] E       IndexError: list index out of range
[   49s] 
[   49s] tests/test_unpickle.py:111: IndexError
[   49s] _____________ TestUnpickleRaw.test__malicious_next_pointer_unicode _____________
[   49s] 
[   49s] self = <test_unpickle.TestUnpickleRaw testMethod=test__malicious_next_pointer_unicode>
[   49s] 
[   49s]     @skipIf(not ahocorasick.unicode, "Run only with unicode build")
[   49s]     def test__malicious_next_pointer_unicode(self):
[   49s]         """
[   49s]         #0 -> [? #1 ]
[   49s]         """
[   49s]     
[   49s]         node0 = self.create_raw_node(0, [('?', 1)])
[   49s]         node1 = self.create_raw_node(0, [('x', 16)])  # the second node point to non-existent node
[   49s]     
[   49s]         self.count = 2
[   49s]         self.raw = node0 + node1
[   49s]         self.kind = ahocorasick.TRIE
[   49s]     
[   49s]         with self.assertRaisesRegex(ValueError, "Node #1 malformed: next link #0 points to.*"):
[   49s] >           self.create_automaton()
[   49s] 
[   49s] tests/test_unpickle.py:323: 
[   49s] _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
[   49s] 
[   49s]     def create_automaton(self, use_exact_raw=False):
[   49s]         # alter values that were set in setUp
[   49s]         if use_exact_raw:
[   49s]             raw = self.raw
[   49s]         else:
[   49s]             raw = [self.create_raw_count(self.count) + self.raw]
[   49s]     
[   49s]         args = (raw, self.kind, self.store, self.key_type,
[   49s]                 self.word_count, self.longest, self.values);
[   49s]     
[   49s] >       return ahocorasick.Automaton(*args)
[   49s] E       IndexError: list index out of range
[   49s] 
[   49s] tests/test_unpickle.py:111: IndexError
[   49s] _________________ TestUnpickleRaw.test__truncated_raw__case_2 __________________
[   49s] 
[   49s] self = <test_unpickle.TestUnpickleRaw testMethod=test__truncated_raw__case_2>
[   49s] 
[   49s]     def test__truncated_raw__case_2(self):
[   49s]         """
[   49s]         trie for set {he}
[   49s]     
[   49s]         #0 -> [h #1 ] -> [e #2*]
[   49s]         """
[   49s]     
[   49s]         node0 = self.create_raw_node(0, [('h', 1)])
[   49s]         node1 = self.create_raw_node(0, [('e', 2)])
[   49s]         node2 = self.create_raw_node(1, [])
[   49s]         raw = node0 + node1 + node2
[   49s]     
[   49s]         self.count = 3
[   49s]         self.kind = ahocorasick.TRIE
[   49s]     
[   49s]         for length in range(len(raw)):
[   49s]             self.raw = raw[:length]  # truncate data and expect fail
[   49s]             with self.assertRaisesRegex(ValueError, "Data truncated.*"):
[   49s] >               self.create_automaton()
[   49s] 
[   49s] tests/test_unpickle.py:307: 
[   49s] _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
[   49s] 
[   49s]     def create_automaton(self, use_exact_raw=False):
[   49s]         # alter values that were set in setUp
[   49s]         if use_exact_raw:
[   49s]             raw = self.raw
[   49s]         else:
[   49s]             raw = [self.create_raw_count(self.count) + self.raw]
[   49s]     
[   49s]         args = (raw, self.kind, self.store, self.key_type,
[   49s]                 self.word_count, self.longest, self.values);
[   49s]     
[   49s] >       return ahocorasick.Automaton(*args)
[   49s] E       IndexError: list index out of range
[   49s] 
[   49s] tests/test_unpickle.py:111: IndexError
[   49s] ______________________ TestUnpickleRaw.test__values_leaks ______________________
[   49s] 
[   49s] self = <test_unpickle.TestUnpickleRaw testMethod=test__values_leaks>
[   49s] 
[   49s]     def test__values_leaks(self):
[   49s]     
[   49s]         # create not connected nodes, but each hold a value
[   49s]         good_nodes = 1000
[   49s]         raw = b''
[   49s]         values = []
[   49s]         for i in range(good_nodes):
[   49s]             raw += self.create_raw_node(1, [])
[   49s]             values.append(tuple("node %d" % i))
[   49s]     
[   49s]         # create the last node that will cause error -- malformed next pointer
[   49s]         raw += self.create_raw_node(1, [('_', 10000)])
[   49s]         values.append(tuple("never reached"))
[   49s]     
[   49s]         self.count = good_nodes + 1
[   49s]         self.raw = raw
[   49s]         self.kind = ahocorasick.TRIE
[   49s]         self.values = values
[   49s]     
[   49s]         with self.assertRaises(ValueError):
[   49s] >           self.create_automaton()
[   49s] E           AssertionError: ValueError not raised
[   49s] 
[   49s] tests/test_unpickle.py:376: AssertionError
pombredanne commented 1 year ago

@jayvdb Thanks... I wonder if we really support 32 bits at all... do you have some specifics on your OS/Arch/compiler environment?