Bug report

I have been having issues confirming this, but urlretrieve seems to ignore the provided Host header even if it's added. It seems to correctly look at User-agent and Referer. I have two functions doing the same download, one with urlretrieve and one with requests. The requests one works as expected and fails in the same way urlretrieve fails if I remove the Host header.

def download_helper(url, fname):
            opener = urllib.request.build_opener()
            opener.addheaders = [('User-agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:103.0) Gecko/20100101 Firefox/103.0'),
                                ('Referer', "https://www.amd.com/en/support/graphics/amd-radeon-6000-series/amd-radeon-6700-series/amd-radeon-rx-6700-xt"),
                                ('Host' , 'us.download.nvidia.com')]
            urllib.request.install_opener(opener)
            import ssl
            ssl._create_default_https_context = ssl._create_unverified_context
            urllib.request.urlretrieve(url, filename=fname)

def download_helper2(url, fname):
    my_referer = "https://www.amd.com/en/support/graphics/amd-radeon-6000-series/amd-radeon-6700-series/amd-radeon-rx-6700-xt"
    resp = requests.get(url, verify=False, stream=True, headers={
        'referer': my_referer,
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:93.0) Gecko/20100101 Firefox/93.0',
        'Host' : 'us.download.nvidia.com'
        })
    total = int(resp.headers.get('content-length', 0))
    with open(fname, 'wb') as file:
        for data in resp.iter_content(chunk_size=1024):
            size = file.write(data)

download_helper2('https://192.229.211.70/Windows/516.94/516.94-desktop-win10-win11-64bit-international-dch-whql.exe', r'516.94-desktop-win10-win11-64bit-international-dch-whql.exe')

Your environment

CPython versions tested on: 3.10.6
Operating system and architecture: Windows Server 2022 10.0.20348.887 ; AMD64

# https://docs.python.org/3/howto/urllib2.html?highlight=urllib2#fetching-urls import shutil import tempfile import urllib.request def download_helper2(url, fname): HOST = "us.download.nvidia.com" headers = dict( [ ( "User-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:103.0) Gecko/20100101 Firefox/103.0", ), ( "Referer", "https://www.amd.com/en/support/graphics/amd-radeon-6000-series/amd-radeon-6700-series/amd-radeon-rx-6700-xt", ), ("Host", HOST), ("test", "test"), ] ) request = urllib.request.Request(url=url, headers=headers) with urllib.request.urlopen(request) as response: with open(fname, "wb") as file_: shutil.copyfileobj(response, file_) download_helper2("http://localhost:8000/test", "/tmp/test")

python / cpython

urllib: urlretrieve() seems to ignore provided host header #96287

Bug report

Your environment