Open bi7je opened 4 years ago
You want to have your url to look something like this https://learning.oreilly.com/videos/ccna-200-301/9780136582700 instead of whatever you have there.
I did a workaround for this and it is working.
Line 22 changed for _def init(self, url, output_folder, username, password, domain, downloaderpath): Line 26 changed for self.domain = url Line 56 changed for _videourl = video.get('href')
Works like a charm after that ;)
My full .py below:
# A resumable Safari Books Online Video downloader
# Main reference: https://mvdwoord.github.io/tools/2017/02/02/safari-downloader.html
from bs4 import BeautifulSoup
import requests
import os
import subprocess
import unicodedata
import string
import config
# Create a config.py file with the following content:
# class Config:
# URL = 'https://www.safaribooksonline.com/library/view/strata-data-conference/9781491985373/'
# DOMAIN = 'https://www.safaribooksonline.com'
# OUTPUT_FOLDER = 'D:\\Strata Data Conference 2017 Singapore'
# USERNAME = 'your_email_address'
# PASSWORD = 'your_password'
# DOWNLOADER = './youtube-dl.exe' # Please download from https://github.com/rg3/youtube-dl
class SafariDownloader:
def __init__(self, url, output_folder, username, password, domain, downloader_path):
self.output_folder = output_folder
self.username = username
self.password = password
self.domain = url
self.downloader_path = downloader_path
req = requests.get(url)
soup = BeautifulSoup(req.text, 'html.parser')
self.topics = soup.find_all('li', class_='toc-level-1') # top-level topic titles
# Update youtube-dl first
subprocess.run([self.downloader_path, "-U"])
def validify(self, filename):
valid_chars = "-_.() %s%s" % (string.ascii_letters, string.digits)
valid_chars = frozenset(valid_chars)
# The unicodedata.normalize call replaces accented characters with the unaccented equivalent,
# which is better than simply stripping them out. After that all disallowed characters are removed.
cleaned_filename = unicodedata.normalize('NFKD', filename).encode('ascii', 'ignore').decode('ascii')
return ''.join(c for c in cleaned_filename if c in valid_chars)
def download(self):
for topic in self.topics:
topic_name = topic.a.text
# Creating folder to put the videos in
save_folder = '{}/{}'.format(self.output_folder, topic_name)
os.makedirs(save_folder, exist_ok=True)
# You can choose to skip these topic_name, comment these three lines if you do not want to skip any
# if topic_name in ('Keynotes', 'Strata Business Summit', 'Sponsored'):
# print("Skipping {}...".format(topic_name))
# continue
for index, video in enumerate(topic.ol.find_all('a')):
video_name = '{:03d} - {}'.format(index + 1, video.text)
video_name = self.validify(video_name)
video_url = video.get('href')
video_out = '{}/{}.mp4'.format(save_folder, video_name)
# Check if file already exists
if os.path.isfile(video_out):
print("File {} already exists! Skipping...".format(video_out))
continue
print("Downloading {} ...".format(video_name))
subprocess.run([self.downloader_path, "-u", self.username, "-p", self.password, "--verbose", "--output", video_out, video_url])
if __name__ == '__main__':
app_config = config.Config
downloader = SafariDownloader(url=app_config.URL, output_folder=app_config.OUTPUT_FOLDER,
username=app_config.USERNAME, password=app_config.PASSWORD,
domain=app_config.DOMAIN, downloader_path=app_config.DOWNLOADER)
downloader.download()
Hey
Im getting the following error when trying to download the ccna 200-301 playlist:
Any idea what the problem might be?
Im kinda thinking that this line might be the problem seeing that the domain and actual course url are pretty much attached to each other, but i dont know how to seperate them.
WARNING: Could not send HEAD request to https://www.safaribooksonline.comhttps://learning.oreilly.com/library/view/ccna-200-301/9780136582700/CCVC_1_1_1.html: <urlopen error [Errno -2] Name or service not known>