Bad hostname found, kept as-is: معمای محمدرضا، شاه تبریز؛ سایه سپاه در فوتبال و اقتصاد
Traceback (most recent call last):
File "/app/zimit/lib/python3.12/site-packages/idna/core.py", line 295, in ulabel
label = label.encode('ascii')
^^^^^^^^^^^^^^^^^^^^^
UnicodeEncodeError: 'ascii' codec can't encode characters in position 0-4: ordinal not in range(128)
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/app/zimit/lib/python3.12/site-packages/warc2zim/url_rewriting.py", line 234, in normalize
hostname = idna.decode(hostname)
^^^^^^^^^^^^^^^^^^^^^
File "/app/zimit/lib/python3.12/site-packages/idna/core.py", line 393, in decode
s = ulabel(label)
^^^^^^^^^^^^^
File "/app/zimit/lib/python3.12/site-packages/idna/core.py", line 297, in ulabel
check_label(label)
File "/app/zimit/lib/python3.12/site-packages/idna/core.py", line 261, in check_label
raise InvalidCodepoint('Codepoint {0} at position {1} of {2} not allowed'.format(_unot(cp_value), pos+1, repr(label)))
idna.core.InvalidCodepoint: Codepoint U+0020 at position 6 of 'معمای محمدرضا، شاه تبریز؛ سایه سپاه در فوتبال و اقتصاد' not allowed
Traceback (most recent call last):
File "/usr/bin/zimit", line 8, in <module>
sys.exit(zimit.zimit())
^^^^^^^^^^^^^
File "/app/zimit/lib/python3.12/site-packages/zimit/zimit.py", line 580, in zimit
run(sys.argv[1:])
File "/app/zimit/lib/python3.12/site-packages/zimit/zimit.py", line 512, in run
return warc2zim(warc2zim_args)
^^^^^^^^^^^^^^^^^^^^^^^
File "/app/zimit/lib/python3.12/site-packages/warc2zim/main.py", line 90, in main
return converter.run()
^^^^^^^^^^^^^^^
File "/app/zimit/lib/python3.12/site-packages/warc2zim/converter.py", line 271, in run
self.add_items_for_warc_record(record)
File "/app/zimit/lib/python3.12/site-packages/warc2zim/converter.py", line 548, in add_items_for_warc_record
payload_item = WARCPayloadItem(
^^^^^^^^^^^^^^^^
File "/app/zimit/lib/python3.12/site-packages/warc2zim/items.py", line 39, in __init__
(self.title, self.content) = Rewriter(path, record, existing_zim_paths).rewrite(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/app/zimit/lib/python3.12/site-packages/warc2zim/content_rewriting/generic.py", line 91, in rewrite
return self.rewrite_html(head_template, css_insert)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/app/zimit/lib/python3.12/site-packages/warc2zim/content_rewriting/generic.py", line 148, in rewrite_html
return HtmlRewriter(self.url_rewriter, head_insert, css_insert).rewrite(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/app/zimit/lib/python3.12/site-packages/warc2zim/content_rewriting/html.py", line 87, in rewrite
self.feed(content)
File "/usr/lib/python3.12/html/parser.py", line 111, in feed
self.goahead(0)
File "/usr/lib/python3.12/html/parser.py", line 171, in goahead
k = self.parse_starttag(i)
^^^^^^^^^^^^^^^^^^^^^^
File "/usr/lib/python3.12/html/parser.py", line 338, in parse_starttag
self.handle_starttag(tag, attrs)
File "/app/zimit/lib/python3.12/site-packages/warc2zim/content_rewriting/html.py", line 112, in handle_starttag
self.send(transform_attrs(attrs, url_rewriter, self.css_rewriter))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/app/zimit/lib/python3.12/site-packages/warc2zim/content_rewriting/html.py", line 51, in transform_attrs
return " ".join(format_attr(*attr) for attr in processed_attrs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/app/zimit/lib/python3.12/site-packages/warc2zim/content_rewriting/html.py", line 51, in <genexpr>
return " ".join(format_attr(*attr) for attr in processed_attrs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/app/zimit/lib/python3.12/site-packages/warc2zim/content_rewriting/html.py", line 50, in <genexpr>
processed_attrs = (process_attr(attr, url_rewriter, css_rewriter) for attr in attrs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/app/zimit/lib/python3.12/site-packages/warc2zim/content_rewriting/html.py", line 25, in process_attr
return (attr[0], url_rewriter(attr[1]))
^^^^^^^^^^^^^^^^^^^^^
File "/app/zimit/lib/python3.12/site-packages/warc2zim/content_rewriting/html.py", line 107, in <lambda>
url_rewriter = lambda url: self.url_rewriter( # noqa: E731
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/app/zimit/lib/python3.12/site-packages/warc2zim/url_rewriting.py", line 303, in __call__
item_path = normalize(HttpUrl(item_absolute_url))
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/app/zimit/lib/python3.12/site-packages/warc2zim/url_rewriting.py", line 109, in __init__
HttpUrl.check_validity(value)
File "/app/zimit/lib/python3.12/site-packages/warc2zim/url_rewriting.py", line 138, in check_validity
raise ValueError(f"Unsupported upper-case chars in hostname : {value}")
ValueError: Unsupported upper-case chars in hostname : http://xn--%20-nzed9cfd2omai79idc2v%20%D9%86%DB%8C%D8%B1%D9%88%DB%8C%20%D8%A7%D9%86%D8%AA%D8%B8%D8%A7%D9%85%DB%8C%20%D8%AF%D8%B1%20%D8%B2%D9%85%D8%A7%D9%86%20%D9%81%D8%B1%D9%85%D8%A7%D9%86%D8%AF%D9%87%DB%8C%20%D9%82%D8%A7%D9%84%DB%8C%D8%A8%D8%A7%D9%81%D8%8C%20%D9%85%D8%B3%D9%88%D9%88%D9%84%20%D8%A8%D8%A7%D8%B2%D8%AF%D8%A7%D8%B4%D8%AA%20%D9%88%20%D8%A7%D8%B9%D8%AA%D8%B1%D8%A7%D9%81%E2%80%8C%D9%87%D8%A7%DB%8C%20%D8%B2%DB%8C%D8%B1%20%D8%B4%DA%A9%D9%86%D8%AC%D9%87%20%D9%87%D9%85%D8%B3%D8%B1%D9%85%20%D8%A8%D9%88%D8%AF
https://farm.openzim.org/pipeline/295c6632-eaa0-4f02-8129-8afbe2d1f575 For https://farm.openzim.org/recipes/iranwire.com_persian