Open Lomar5115 opened 2 years ago
whats the lab name I'll give it a look
try my updated function ;D this gets all content. if on windows some labs can have long file names.
`def download_lab(uuid, lab_index):
# content meta
host="content-api.rmotr.com"
header = {"Host": host,"Origin": referer,"Authorization": access_token,"User-Agent": user_agent,"Accept": accept,"X-Requested-With": x_requested_with,"Accept-Encoding": accept_encodings,"sec-fetch-mode": sec_fetch_mode,"sec-fetch-dest": sec_fetch_dest,"Referer": referer}
out = requests.get(lab_url.format(uuid),headers = header)
data = json.loads(out.text)
# prepare subfolders
try:
data_name = data['name']
data_name = fix_string_filename(data_name)
subfolder_name = 'Lab'+str(lab_index)+'.'+data_name
if not os.path.exists(subfolder_name):
os.makedirs(subfolder_name)
if not os.path.exists(subfolder_name+"/data"):
os.makedirs(subfolder_name+"/data")
# save lab description as html
html_out = data["description_html"]
if html_out is not None:
with open(subfolder_name+"/index.html",'w',encoding='utf-8') as fp:
# replace external assets links
link="https://assets.ine.com/cybersecurity-lab-images/"+uuid
html_out=html_out.replace(link,"data")
fp.write(str(html_out))
except:
pass
# solutions html
try:
data_name = data['name']
data_name = fix_string_filename(data_name)
subfolder_name = 'Lab'+str(lab_index)+'.'+data_name
if not os.path.exists(subfolder_name):
os.makedirs(subfolder_name)
if not os.path.exists(subfolder_name+"/data"):
os.makedirs(subfolder_name+"/data")
# save solutions as html
html_out = data["solutions_html"]
if html_out is not None:
with open(subfolder_name+"/solutions.html",'w',encoding='utf-8') as fp:
link="https://assets.ine.com/cybersecurity-lab-images/"+uuid
html_out=html_out.replace(link,"data")
fp.write(str(html_out))
except:
pass
# tasks html
try:
data_name = data['name']
data_name = fix_string_filename(data_name)
subfolder_name = 'Lab'+str(lab_index)+'.'+data_name
if not os.path.exists(subfolder_name):
os.makedirs(subfolder_name)
if not os.path.exists(subfolder_name+"/data"):
os.makedirs(subfolder_name+"/data")
# save solutions as html
html_out = data["tasks_html"]
if html_out is not None:
with open(subfolder_name+"/tasks.html",'w',encoding='utf-8') as fp:
link="https://assets.ine.com/cybersecurity-lab-images/"+uuid
html_out=html_out.replace(link,"data")
fp.write(str(html_out))
#try to save any diagram image in case it's blocked later - future use if needed
#html_image = re.findall("<img.*?src='(.*?)'", html_out)
except:
pass
# imageX.png
try:
# imageX.png
host="assets.ine.com"
header = {"Host": host,"Origin": referer,"Authorization": access_token,"User-Agent": user_agent,"Accept": accept,"X-Requested-With": x_requested_with,"Accept-Encoding": accept_encodings,"sec-fetch-mode": sec_fetch_mode,"sec-fetch-dest": sec_fetch_dest,"Referer": referer}
slide_number = 1
status = True
while status:
out = requests.get(labimage_url.format(uuid,str(slide_number)), headers = header, stream=True)
if (out.status_code == 200):
with open(subfolder_name+"/data/image{}.png".format(str(slide_number)),'wb') as fp:
shutil.copyfileobj(out.raw, fp)
slide_number = slide_number + 1
else:
status = False
except:
pass
# solutions pdf / walk through pdf
try:
host="assets.ine.com"
header = {"Host": host,"Origin": referer,"Authorization": access_token,"User-Agent": user_agent,"Accept": accept,"X-Requested-With": x_requested_with,"Accept-Encoding": accept_encodings,"sec-fetch-mode": sec_fetch_mode,"sec-fetch-dest": sec_fetch_dest,"Referer": referer}
wt_pdf = data['solutions_html']
soup = BeautifulSoup(wt_pdf, 'lxml')
wt_pdf = soup.find_all('a')
url = wt_pdf[0].text
wt_pdf_path = os.path.join(subfolder_name, 'walkthrough.pdf')
out = requests.get(url,headers = header)
if (out.status_code == 200):
with open(wt_pdf_path, 'wb') as f:
f.write(out.content)
except:
#print('+ No lab solution walk through pdf\n')
pass
# solutions video
try:
host="video.rmotr.com"
header = {"Host": host,"Origin": referer,"Authorization": access_token,"connection": "keep-alive","User-Agent": user_agent,"Accept": accept,"Accept-Encoding": accept_encodings,"sec-fetch-mode": sec_fetch_mode,"sec-fetch-dest": sec_fetch_dest,"sec-fetch-site": sec_fetch_site}
video_id = data['solution_video']
filename = data_name+"-solutions.mp4"
#parent id is taken from global parent_id in downloader
querystring = {
"parent_type": "course",
"parent_id": parent_id
}
#url
url="https://video.rmotr.com/api/v1/videos/{}/media".format(video_id)
#requests - to get video url
response = requests.get(url, headers=header, params=querystring)
#status
if response.status_code == 200:
vdata = json.loads(response.text)
vtitle = vdata.get('title')
vtitle = vtitle.split('/')[-1]
vtitle = vtitle.replace('.mp4', '')
vtitle = fix_string_filename(vtitle)
filename = os.path.join(subfolder_name, vtitle+'.mp4')
vvtfilename = os.path.join(subfolder_name, vtitle+'.vvt')
urls = vdata['playlist'][0]['sources']
urls_count = len(urls)
vvt_urls = vdata['playlist'][0]['tracks']
#get video file .mp4
count = 1
for vid in urls:
url = vid['file']
filename = os.path.join(subfolder_name, str(count)+'.VOD.mp4')
if os.path.exists(filename):
video = requests.get(url, stream=True)
video_length = int(video.headers.get("content-length"))
if video.status_code == 200:
if(os.path.isfile(filename) and os.path.getsize(filename) >= video_length):
print("Download skipped already downloaded: "+ str(count)+'.VOD.mp4')
else:
try:
with open(filename, 'wb') as video_file:
shutil.copyfileobj(video.raw, video_file)
except:
print(colored("Connection error: Possible long file names.", 'red'))
else:
video = requests.get(url, stream=True)
video_length = int(video.headers.get("content-length"))
if video.status_code == 200:
try:
with open(filename, 'wb') as video_file:
shutil.copyfileobj(video.raw, video_file)
except:
print(colored("Connection error: Possible long file names.", 'red'))
#increase count
count = count + 1
#get video caption .vvt
count = 1
for vvt in vvt_urls:
vvt_url = vvt['file']
vvtfilename = os.path.join(subfolder_name, str(count)+'.VOD.vvt')
if os.path.exists(vvtfilename):
video = requests.get(vvt_url, stream=True)
video_length = int(video.headers.get("content-length"))
if video.status_code == 200:
if(os.path.isfile(vvtfilename) and os.path.getsize(vvtfilename) >= video_length):
print("Download skipped already downloaded: "+ str(count)+'.VOD.vvt')
else:
try:
with open(vvtfilename, 'wb') as video_file:
shutil.copyfileobj(video.raw, video_file)
except:
print(colored("Connection error: Possible long file names.", 'red'))
else:
video = requests.get(vvt_url, stream=True)
video_length = int(video.headers.get("content-length"))
if video.status_code == 200:
try:
with open(vvtfilename, 'wb') as video_file:
shutil.copyfileobj(video.raw, video_file)
except:
print(colored("Connection error: Possible long file names.", 'red'))
#increase count
count = count + 1
except:
#print('+ No lab solutions video')
pass`
On the line containing
filename = data_name+"-solutions.mp4"
Why was that used? Also why is it not included in this line?
filename = os.path.join(subfolder_name, str(count)+'.VOD.mp4')
Thanks
windows has a long filenames issue.
if running on linux its not an issue though
Yeah but the filename = data_name+"-solutions.mp4"
is never used.
it depends on the course. some have solutions videos and some don't. some have pdfs and some don't.
that's why each thing has try: and except:
i don't understand what should i do here, test each of this part or? can you just send the working version for windows?
can i dm you? please share a link i need to download materials
just have to replace the function in your script with the one above.
it'll try each possible download. if it exists it''' download it.
if not it'll hit an except: and move to next possible download type
not all labs have same content. note that some courses the windows 256 character limit is an issue. you might have to deal with that.
actually the course doesn't contain any lab or pdfs, course number 40 and 41
I've added those functions, but some of them i don't know where should i put it in right place. I've attached the file, can you fix and send me please? ine2.py.txt
ok, I've added your lines to the script but unfortunately, i don't see any files after successful download. Do you have any idea?
Hello community, I've used this script 2 months ago all things are fine it was downloaded everything. Now I use the same script same code but it shows me errors, or it can download a lab page with nothing on & no Data File which must contain images and files and the HTML file name not (index) but it came with the same lab name while nothing on the page.
Is there any who faces the same problem? I think this is because INE has change their labs I've tried a lot but nothing happens every time another error shows.
Heeelp pleaaase ..:(
File "C:\Users\USER\Desktop\INE-courses-downloader-master\Ine.py", line 768, in
downloader(course)
File "C:\Users\USER\Desktop\INE-courses-downloader-master\Ine.py", line 627, in downloader
download_lab(k["uuid"], lab_index)
File "C:\Users\USER\Desktop\INE-courses-downloader-master\Ine.py", line 339, in download_lab
subfolder_name = 'Lab'+str(lab_index)+'.'+data["name"]
KeyError: 'name'
Lab function:
def download_lab(uuid, lab_index):