akoumjian / datefinder

Find dates inside text using Python and get back datetime objects
http://datefinder.readthedocs.org/en/latest/
MIT License
635 stars 167 forks source link

"Marseilles" seems to confuse the regex #114

Open ccurvey opened 5 years ago

ccurvey commented 5 years ago

I think the regex is seeing the "Mar" in "Marseilles" as "March", and it's confusing things.

import unittest

import datefinder

class AAATestDateFinder(unittest.TestCase):
    def test_birth_date(self):
        # this works.  Yay!
        text = '12 September 1797 at Gaillace, Albi, southern France as Anne Marguerite Adelaide Emily de Vialar'
        assert len(list(datefinder.find_dates(text))) == 1

    def test_death_date(self):
        # this fails.  Boo!
        text = '24 August 1856 at Marseilles, Bouches-du-Rhône, France of natural causes'
        assert len(list(datefinder.find_dates(text))) == 1

    def test_august_in_birth_string(self):
        # this works.  So its not something related to the date string
        text = '24 August 1856 at Gaillace, Albi, southern France as Anne Marguerite Adelaide Emily de Vialar'
        assert len(list(datefinder.find_dates(text))) == 1

    def test_august_in_short_string(self):
        # this fails, so it's probably not the non-ACII code in "Rhône"
        text = '24 August 1856 at Marseilles'
        assert len(list(datefinder.find_dates(text))) == 1

    def test_short_string_only(self):
        # this works.
        text = '24 August 1856'
        assert len(list(datefinder.find_dates(text))) == 1

if __name__ == '__main__':
    unittest.main()