Closed AndoniZubimendi closed 8 years ago
Solved this applying this patch, seems its blocking downloads if no valid user-agent is sent.
diff --git a/downloader.py b/downloader.py
old mode 100644
new mode 100755
index 3babc73..478eedb
--- a/downloader.py
+++ b/downloader.py
@@ -6,14 +6,17 @@ import sys, getopt
from lxml import html
# saves downloaded asset to a directory
-def download_to_file(directory, url, session):
+def download_to_file(directory, url, session, headers):
if not os.path.exists(directory):
- resource = session.get("https://www.packtpub.com" + url)
+ resource = session.get("https://www.packtpub.com" + url, verify=True, headers=headers)
target = open(directory, 'w')
target.write(resource.content)
target.close()
def main(argv):
+ headers = {
+ "User-Agent": "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 " +
+ "(KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36"}
email = ''
password = ''
directory = 'packt_ebooks'
@@ -53,7 +56,7 @@ def main(argv):
# initial request to get the "csrf token" for the login
url = "https://www.packtpub.com/"
- start_req = session.get(url)
+ start_req = session.get(url, verify=True, headers=headers)
# extract the "csrf token" (form_build_id) to submit with login POST
tree = html.fromstring(start_req.content)
@@ -68,10 +71,10 @@ def main(argv):
form_build_id=form_build_id)
# login
- session.post(url, data=login_data)
+ session.post(url, data=login_data, verify=True, headers=headers)
# get the ebooks page
- books_page = session.get("https://www.packtpub.com/account/my-ebooks")
+ books_page = session.get("https://www.packtpub.com/account/my-ebooks", verify=True, headers=headers)
books_tree = html.fromstring(books_page.content)
# login successful?
@@ -114,25 +117,25 @@ def main(argv):
if len(pdf) > 0 and 'pdf' in formats:
filename = path + "/" + title + ".pdf"
print "Downloading PDF:", pdf[0]
- download_to_file(filename, pdf[0], session)
+ download_to_file(filename, pdf[0], session, headers)
# epub
if len(epub) > 0 and 'epub' in formats:
filename = path + "/" + title + ".epub"
print "Downloading EPUB:", epub[0]
- download_to_file(filename, epub[0], session)
+ download_to_file(filename, epub[0], session, headers)
# mobi
if len(mobi) > 0 and 'mobi' in formats:
filename = path + "/" + title + ".mobi"
print "Downloading MOBI:", mobi[0]
- download_to_file(filename, mobi[0], session)
+ download_to_file(filename, mobi[0], session, headers)
# code
if len(code) > 0 and includeCode:
filename = path + "/" + title + " [CODE].zip"
print "Downloading CODE:", code[0]
- download_to_file(filename, code[0], session)
+ download_to_file(filename, code[0], session, headers)
if __name__ == "__main__":
Hi @AndoniZubimendi! I was able to reproduce the error. I tested it with your patch and it works perfectly again. Thanks for looking out!! 👍 ⚡ I'm committing the change right now.
Currently, I'm getting the same issue... Could you please help me out of this?
The error is:
Attempting to login...
Traceback (most recent call last):
File "downloader.py", line 374, in <module>(sys.argv[1:])
File "downloader.py", line 291, in main
form_build_id = tree.xpath('//form[@id="packt-user-login-form"]//input[@name="form_build_id"]/@id')[0]
IndexError: list index out of range
I've tried this with both python 2.7.16 and python 3.5.4rc1
I tried to use the script today and it throws me an error. The credentials are fine.
The error is:
Very useful script by the way