ytdl-org / youtube-dl

Command-line program to download videos from YouTube.com and other video sites
http://ytdl-org.github.io/youtube-dl/
The Unlicense
131.15k stars 9.92k forks source link

Bibel TV Multi Episode download #30210

Open geosone opened 2 years ago

geosone commented 2 years ago

Checklist

Description

it would be great if you can set the path to the series an it would download all the avalaibeld episodes. for ex https://www.bibeltv.de/mediathek/serien/316567-ein-engel-auf-erden what at the moen of writing has 2 episodes present https://www.bibeltv.de/mediathek/videos/305339-freundinnen https://www.bibeltv.de/mediathek/videos/305344-ich-wuensch-mir-einen-blauen-mond

dirkf commented 2 years ago

This patch seems to work.

--- old/youtube_dl/extractor/extractors.py
+++ new/youtube_dl/extractor/extractors.py
@@ -112,7 +112,10 @@
     BFMTVLiveIE,
     BFMTVArticleIE,
 )
-from .bibeltv import BibelTVIE
+from .bibeltv import (
+    BibelTVIE,
+    BibelTVSerienIE,
+)
 from .bigflix import BigflixIE
 from .bild import BildIE
 from .bilibili import (

--- old/youtube_dl/extractor/bibeltv.py
+++ new/youtube_dl/extractor/bibeltv.py
@@ -1,11 +1,16 @@
 # coding: utf-8
 from __future__ import unicode_literals
+
+import re

 from .common import InfoExtractor

 class BibelTVIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?bibeltv\.de/mediathek/videos/(?:crn/)?(?P<id>\d+)'
+    _VALID_URL_HOST = r'https?://(?:www\.)?bibeltv\.de'
+    _VALID_URL_PATH_TMPL = r'/mediathek/%s/(?:crn/)?(?P<id>\d+)'
+    _VALID_URL_PATH = _VALID_URL_PATH_TMPL % 'videos'
+    _VALID_URL = _VALID_URL_HOST + _VALID_URL_PATH
     _TESTS = [{
         'url': 'https://www.bibeltv.de/mediathek/videos/329703-sprachkurs-in-malaiisch',
         'md5': '252f908192d611de038b8504b08bf97f',
@@ -17,7 +22,20 @@
             'timestamp': 1608316701,
             'uploader_id': '5840105145001',
             'upload_date': '20201218',
-        }
+        },
+        'skip': 'no longer available',
+    }, {
+        'url': 'https://www.bibeltv.de/mediathek/videos/305339-freundinnen',
+        'md5': 'e895c65cbe7ecb0f09541e16e1fc4868',
+        'info_dict': {
+            'id': 'ref:305339',
+            'ext': 'mp4',
+            'title': 'Freundinnen',
+            'description': 'md5:c211b593ec8532631bd9e6c4f0e13881',
+            'timestamp': 1569487812,
+            'uploader_id': '5840105145001',
+            'upload_date': '20190926',
+        },
     }, {
         'url': 'https://www.bibeltv.de/mediathek/videos/crn/326374',
         'only_matching': True,
@@ -28,3 +46,42 @@
         crn_id = self._match_id(url)
         return self.url_result(
             self.BRIGHTCOVE_URL_TEMPLATE % crn_id, 'BrightcoveNew')
+
+
+class BibelTVSerienIE(BibelTVIE):
+    _VALID_URL = BibelTVIE._VALID_URL_HOST + BibelTVIE._VALID_URL_PATH_TMPL % 'serien'
+    _EPISODE_CLASS_TMPL = r'class\s*=\s*(?P<{qq}>%(q)s)serie-list-card(?P={qq})'
+    _EPISODE_URL = (r'''(?x)
+        <a\s[^>]*?
+        (?:(?P<class>%(class1)s)\s[^>]*?)?
+        href\s*=\s*(?P<q>%%(q)s)%(url)s[\w-]+(?P=q)
+        (?(class)|\s[^>]*?%(class2)s)
+        ''' % {'url': BibelTVIE._VALID_URL_PATH,
+               'class1': _EPISODE_CLASS_TMPL.format(qq='q1'),
+               'class2': _EPISODE_CLASS_TMPL.format(qq='q2'), }) % {'q': r'\'|"|\b', }
+    _TESTS = [{
+        'url': 'https://www.bibeltv.de/mediathek/serien/316567-ein-engel-auf-erden',
+        'info_dict': {
+            'id': '316567',
+            'title': 'Ein Engel auf Erden',
+            'description': 'md5:581eb7ffd9f9aeec4271d5e011381ee3',
+        },
+        'playlist_mincount': 2,
+    }, ]
+
+    def _real_extract(self, url):
+        crn_id = self._match_id(url)
+        webpage = self._download_webpage(url, crn_id)
+        title = self._og_search_title(webpage, fatal=False)
+        descr = self._og_search_description(webpage)
+
+        result = self.playlist_from_matches(
+            re.finditer(self._EPISODE_URL, webpage), 
+            playlist_id=crn_id, playlist_title=title, 
+            getter=lambda m: self.BRIGHTCOVE_URL_TEMPLATE % m.group('id'),
+            ie='BrightcoveNew')
+
+        if result and descr:
+            result['description'] = descr
+
+        return result
geosone commented 2 years ago

this patch works great. thx

geosone commented 2 years ago

but it dont detect the other episode in the season dropdown. for ex here https://www.bibeltv.de/mediathek/serien/327413-eine-himmlische-familie

dirkf commented 2 years ago

For multi-series shows, you'd have to parse the JSON in the <script> element with id __NEXT_DATA__ (this article describes why that happens). The series data is a JSON object at .props.pageProps.seriePageData in which .videos is a list of shows in this format:

          {
            'id': 8502,
            ...
            'seasonNumber': 2,
            'episodeNumber': 9,
            ...
          }