import requests
from extractnet import Extractor
raw_html = requests.get('https://apnews.com/article/6e58b5742b36e3de53298cf73fbfdf48').text
results = Extractor().extract(raw_html)
for key, value in results.items():
print(key)
print(value)
print('------------')
Traceback:
Traceback (most recent call last):
File "main.py", line 16, in <module>
results = Extractor().extract(raw_html)
File "/home/doc/Documents/Projects/extract_net_test/venv/lib/python3.8/site-packages/extractnet/pipeline.py", line 88, in extract
return self.postprocess(html, output, documents_meta_data, **kwargs)
File "/home/doc/Documents/Projects/extract_net_test/venv/lib/python3.8/site-packages/extractnet/pipeline.py", line 134, in postprocess
return attribute_sanity_check(results, **sanity_check_params)
File "/home/doc/Documents/Projects/extract_net_test/venv/lib/python3.8/site-packages/extractnet/util.py", line 223, in attribute_sanity_check
content['date'] = dateparser.parse(date)
File "/home/doc/Documents/Projects/extract_net_test/venv/lib/python3.8/site-packages/dateparser/conf.py", line 92, in wrapper
return f(*args, **kwargs)
File "/home/doc/Documents/Projects/extract_net_test/venv/lib/python3.8/site-packages/dateparser/__init__.py", line 61, in parse
data = parser.get_date_data(date_string, date_formats)
File "/home/doc/Documents/Projects/extract_net_test/venv/lib/python3.8/site-packages/dateparser/date.py", line 428, in get_date_data
parsed_date = _DateLocaleParser.parse(
File "/home/doc/Documents/Projects/extract_net_test/venv/lib/python3.8/site-packages/dateparser/date.py", line 178, in parse
return instance._parse()
File "/home/doc/Documents/Projects/extract_net_test/venv/lib/python3.8/site-packages/dateparser/date.py", line 182, in _parse
date_data = self._parsers[parser_name]()
File "/home/doc/Documents/Projects/extract_net_test/venv/lib/python3.8/site-packages/dateparser/date.py", line 196, in _try_freshness_parser
return freshness_date_parser.get_date_data(self._get_translated_date(), self._settings)
File "/home/doc/Documents/Projects/extract_net_test/venv/lib/python3.8/site-packages/dateparser/date.py", line 234, in _get_translated_date
self._translated_date = self.locale.translate(
File "/home/doc/Documents/Projects/extract_net_test/venv/lib/python3.8/site-packages/dateparser/languages/locale.py", line 131, in translate
relative_translations = self._get_relative_translations(settings=settings)
File "/home/doc/Documents/Projects/extract_net_test/venv/lib/python3.8/site-packages/dateparser/languages/locale.py", line 158, in _get_relative_translations
self._generate_relative_translations(normalize=True))
File "/home/doc/Documents/Projects/extract_net_test/venv/lib/python3.8/site-packages/dateparser/languages/locale.py", line 172, in _generate_relative_translations
pattern = DIGIT_GROUP_PATTERN.sub(r'?P<n>\d+', pattern)
File "/home/doc/Documents/Projects/extract_net_test/venv/lib/python3.8/site-packages/regex/regex.py", line 710, in _compile_replacement_helper
is_group, items = _compile_replacement(source, pattern, is_unicode)
File "/home/doc/Documents/Projects/extract_net_test/venv/lib/python3.8/site-packages/regex/_regex_core.py", line 1737, in _compile_replacement
raise error("bad escape \\%s" % ch, source.string, source.pos)
regex._regex_core.error: bad escape \d at position 7
I was able to make it work by replacing pattern definition in locale.py -> _generate_relative_translations:
Hi,
I got this error while trying to run extractor:
Code:
Traceback:
I was able to make it work by replacing pattern definition in locale.py -> _generate_relative_translations:
Was (this produces the error):
Changed to (this works fine):
Could this be a python version issue?
I use Python 3.8.10.