As title states. If a tumblr post contains at least one media file pointing to a hostname that no longer exists, no html file will be created. This is doubly confusing because the json export does seem to work. Now I have three different numbers on how many posts actually got saved. The number of individual post htmls, the number of json files and the post number reported by tumblr_backup.
Ideally those would always be in sync and the html file would just be created with a missing image.
This may also affect the media export as the post job seems to hard crash upon encountering this error. So images after the broken link might not be saved. But I have been unable to find a good post to actually test this.
Caught exception while saving post 72577921848:
Traceback (most recent call last):
File "/home/cenodis/.local/lib/python3.10/site-packages/urllib3/connection.py", line 198, in _new_conn
sock = connection.create_connection(
File "/home/cenodis/.local/lib/python3.10/site-packages/urllib3/util/connection.py", line 60, in create_connection
for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
File "/usr/lib/python3.10/socket.py", line 955, in getaddrinfo
for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
socket.gaierror: [Errno -2] Name or service not known
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/home/cenodis/.local/lib/python3.10/site-packages/tumblr_backup/wget.py", line 687, in _retrieve_loop
err, doctype = gethttp(url, hstat, doctype, logger, retry_counter, use_dns_check)
File "/home/cenodis/.local/lib/python3.10/site-packages/tumblr_backup/wget.py", line 297, in gethttp
resp = urlopen(url, use_dns_check, request_headers, preload_content=False, enforce_content_length=False)
File "/home/cenodis/.local/lib/python3.10/site-packages/tumblr_backup/wget.py", line 831, in urlopen
return poolman.request('GET', url, headers=req_headers, retries=HTTP_RETRY, **kwargs)
File "/home/cenodis/.local/lib/python3.10/site-packages/urllib3/_request_methods.py", line 136, in request
return self.request_encode_url(
File "/home/cenodis/.local/lib/python3.10/site-packages/urllib3/_request_methods.py", line 183, in request_encode_url
return self.urlopen(method, url, **extra_kw)
File "/home/cenodis/.local/lib/python3.10/site-packages/tumblr_backup/wget.py", line 231, in urlopen
return super().urlopen(method, url, redirect, **kw)
File "/home/cenodis/.local/lib/python3.10/site-packages/urllib3/poolmanager.py", line 444, in urlopen
response = conn.urlopen(method, u.request_uri, **kw)
File "/home/cenodis/.local/lib/python3.10/site-packages/urllib3/connectionpool.py", line 847, in urlopen
retries = retries.increment(
File "/home/cenodis/.local/lib/python3.10/site-packages/urllib3/util/retry.py", line 463, in increment
raise reraise(type(error), error, _stacktrace)
File "/home/cenodis/.local/lib/python3.10/site-packages/urllib3/util/util.py", line 39, in reraise
raise value
File "/home/cenodis/.local/lib/python3.10/site-packages/urllib3/connectionpool.py", line 793, in urlopen
response = self._make_request(
File "/home/cenodis/.local/lib/python3.10/site-packages/urllib3/connectionpool.py", line 496, in _make_request
conn.request(
File "/home/cenodis/.local/lib/python3.10/site-packages/urllib3/connection.py", line 400, in request
self.endheaders()
File "/usr/lib/python3.10/http/client.py", line 1278, in endheaders
self._send_output(message_body, encode_chunked=encode_chunked)
File "/usr/lib/python3.10/http/client.py", line 1038, in _send_output
self.send(msg)
File "/usr/lib/python3.10/http/client.py", line 976, in send
self.connect()
File "/home/cenodis/.local/lib/python3.10/site-packages/urllib3/connection.py", line 238, in connect
self.sock = self._new_conn()
File "/home/cenodis/.local/lib/python3.10/site-packages/urllib3/connection.py", line 205, in _new_conn
raise NameResolutionError(self.host, self, e) from e
urllib3.exceptions.NameResolutionError: <tumblr_backup.wget.WGHTTPConnection object at 0x7f7a60682140>: Failed to resolve 'i40.tinypic.com' ([Errno -2] Name or service not known)
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/cenodis/.local/lib/python3.10/site-packages/tumblr_backup/main.py", line 1928, in save_post
f.write(self.get_post())
File "/home/cenodis/.local/lib/python3.10/site-packages/tumblr_backup/main.py", line 1806, in get_post
content = self.get_content()
File "/home/cenodis/.local/lib/python3.10/site-packages/tumblr_backup/main.py", line 1525, in get_content
append_try('body')
File "/home/cenodis/.local/lib/python3.10/site-packages/tumblr_backup/main.py", line 1512, in append_try
elt = re.sub(r"""(?i)(<img\s(?:[^>]*\s)?src\s*=\s*["'])(.*?)(["'][^>]*>)""",
File "/usr/lib/python3.10/re.py", line 209, in sub
return _compile(pattern, flags).sub(repl, string, count)
File "/home/cenodis/.local/lib/python3.10/site-packages/tumblr_backup/main.py", line 1697, in get_inline_image
saved_name = self.download_media(image_url, filename=image_filename)
File "/home/cenodis/.local/lib/python3.10/site-packages/tumblr_backup/main.py", line 1785, in download_media
return self._download_media_inner(url, get_path, path_parts, media_path)
File "/home/cenodis/.local/lib/python3.10/site-packages/tumblr_backup/main.py", line 1958, in _download_media_inner
wget_retrieve(url, dstpath, post_id=self.ident, post_timestamp=self.post['timestamp'])
File "/home/cenodis/.local/lib/python3.10/site-packages/tumblr_backup/wget.py", line 848, in __call__
_retrieve_loop(
File "/home/cenodis/.local/lib/python3.10/site-packages/tumblr_backup/wget.py", line 694, in _retrieve_loop
hostname = normalized_host(None, conn.host, conn.port)
AttributeError: 'str' object has no attribute 'host'
As title states. If a tumblr post contains at least one media file pointing to a hostname that no longer exists, no html file will be created. This is doubly confusing because the json export does seem to work. Now I have three different numbers on how many posts actually got saved. The number of individual post htmls, the number of json files and the post number reported by tumblr_backup.
Ideally those would always be in sync and the html file would just be created with a missing image.
This may also affect the media export as the post job seems to hard crash upon encountering this error. So images after the broken link might not be saved. But I have been unable to find a good post to actually test this.
Example post (in this case the dead host is an old tinypic server): https://rosexknight.tumblr.com/post/72577921848/i-tried
Command:
tumblr-backup --save-video --save-audio --json -i --internet-archive
Version: tumblr-backup 1.0.3 (pypi)Stacktrace: