Open stepersy opened 1 year ago
Ciao, io sono riuscito a farlo usando il seguente script, la login non so perchè mi restituisce 401, quindi ho usato i cookie prelevati dal browser.
import requests
import fitz
import os
COOKIES={
"connect.sid": "XXXXXXXXXXX",
"elmo_vc": "YYYYYYYYY",
"PHPSESSID": "ZZZZZ",
"shbookInitialized": "true"
}
BOOK_ID='GT2023_G3452521A'
LAST_PAGE_NUMBER=384
BOOK_URL="https://mydbook.giuntitvp.it/books/" + BOOK_ID + "/pdf/pages/%s?type="
USERNAME='USERNAME@email.com'
PASSWORD='password'
TEMP_DIR=BOOK_ID
def getnamevalue(input):
dummy=[ t for t in input.split() if t.startswith('name=') or t.startswith('value=') ]
data={}
for t in dummy:
d = t.split("=")
data[d[0]] = d[1].split('"')[1]
if data.get("name", "") == "":
return
return data.get("name", ""), data.get("value", "")
def getlogindata(username, password):
s = 'https://mydbook.giuntitvp.it/authentication/cas?iframe=false'
r = 'https://mydbook.giuntitvp.it/app/home'
data={ "username": username, "password": password, "submit": "Invia", "service" : [s, s], "return" : [r, r]}
r = requests.get("https://centralauthentication.giunti.it/cas/login?service=https://mydbook.giuntitvp.it/authentication/cas?iframe=false&return=https://mydbook.giuntitvp.it/app/home")
for cookie in iter(r.cookies):
print(cookie.name, cookie.value)
inputs = [ t for t in r.text.split("\n") if '<input' in t ]
inputs = [ t1 for t in inputs for t1 in t.split("<") if t1.startswith('input') ]
namevalues = [ getnamevalue(t) for t in inputs ]
for namevalue in namevalues:
if namevalue is not None:
n = namevalue[0]
v = namevalue[1]
if n in data:
v1 = data[n]
if isinstance(v1, str):
data[n] = [v1, v]
else:
data[n].append(v)
else:
data[n] = v
print(data)
r = requests.post("https://centralauthentication.giunti.it/cas/login?service=https://mydbook.giuntitvp.it/authentication/cas?iframe=false&return=https://mydbook.giuntitvp.it/app/home", data=data)
print(r.status_code)
print(r.encoding)
print(r.text)
if r.status_code != 200:
return
else:
r.encoding = "utf-8-sig"
return r.json()
def login(username, password):
logindata = getlogindata(username, password)
print(logindata)
if "error" in logindata or not logindata:
if logindata.get("error") == "1":
print("Incorrect credentials!")
else:
print("Login failed!")
else:
userid = str(logindata["userId"])
if userid == "0":
print("Unauthorized!")
else:
return logindata["accessToken"] + "/" + userid
def downloadfile(url):
r = requests.get(url, stream=True, headers={"Referer": "https://mydbook.giuntitvp.it"}, cookies=COOKIES)
length = int(r.headers.get("content-length", 1))
if r.status_code != 200:
return
file = b""
for data in r.iter_content(chunk_size=102400):
file += data
return file
def get_page(p):
url = BOOK_URL % str(p)
dta = downloadfile(url)
if dta:
f = open(os.path.join(TEMP_DIR, "page_%d.jpg"%p), "wb")
f.write(dta)
f.close()
def download_book():
for p in range(1, LAST_PAGE_NUMBER+1):
print(p)
get_page(p)
def create_pdf():
doc = fitz.open() # PDF with the pictures
for p in range(1, LAST_PAGE_NUMBER+1):
f = os.path.join(TEMP_DIR, "page_%d.jpg"%p)
img = fitz.open(f) # open pic as document
rect = img[0].rect # pic dimension
pdfbytes = img.convert_to_pdf() # make a PDF stream
img.close() # no longer needed
imgPDF = fitz.open("pdf", pdfbytes) # open stream as PDF
page = doc.new_page(width = rect.width, # new page with ...
height = rect.height) # pic dimension
page.show_pdf_page(rect, imgPDF, 0) # image fills the page
print(p)
doc.save("%s.pdf" % BOOK_ID)
if __name__ == '__main__':
os.makedirs(TEMP_DIR, exist_ok=True)
#login(USERNAME, PASSWORD)
download_book()
create_pdf()
ho provato a creare un file .py con lo script di @ckhmer1 e tenendo aperto il libro digitale. non so se quello che sto facendo e' giusto, ha solo creato dei jpeg non visualizzabili
Ciao! Innanzitutto grazie mille per lo stupendo lavoro che stai facendo! Ho visto che è stata aggiunta la possibilità di scaricare da GiuntiTVP mediante One Shot Link. Come si usa? Ho provato a incollare il link di un mio libro (https://mydbook.giuntitvp.it/app/books/GIAC67_G6760798D/pdfParts?prependCollection=BL35LIV07_MYDBOOK2019) ma continua a rispondere "Unable to view this publicly. Aborting...". Grazieee :)