I think the regex is seeing the "Mar" in "Marseilles" as "March", and it's confusing things.
import unittest
import datefinder
class AAATestDateFinder(unittest.TestCase):
def test_birth_date(self):
# this works. Yay!
text = '12 September 1797 at Gaillace, Albi, southern France as Anne Marguerite Adelaide Emily de Vialar'
assert len(list(datefinder.find_dates(text))) == 1
def test_death_date(self):
# this fails. Boo!
text = '24 August 1856 at Marseilles, Bouches-du-Rhône, France of natural causes'
assert len(list(datefinder.find_dates(text))) == 1
def test_august_in_birth_string(self):
# this works. So its not something related to the date string
text = '24 August 1856 at Gaillace, Albi, southern France as Anne Marguerite Adelaide Emily de Vialar'
assert len(list(datefinder.find_dates(text))) == 1
def test_august_in_short_string(self):
# this fails, so it's probably not the non-ACII code in "Rhône"
text = '24 August 1856 at Marseilles'
assert len(list(datefinder.find_dates(text))) == 1
def test_short_string_only(self):
# this works.
text = '24 August 1856'
assert len(list(datefinder.find_dates(text))) == 1
if __name__ == '__main__':
unittest.main()
I think the regex is seeing the "Mar" in "Marseilles" as "March", and it's confusing things.