itsmehemant7 / PyMovieDb

A Simple IMDb API for Python
MIT License
90 stars 18 forks source link

Include trailers #23

Open albfan opened 3 months ago

albfan commented 3 months ago

I was unable to parse in a better way, but this do the job:

diff --git i/PyMovieDb/imdb.py w/PyMovieDb/imdb.py
index 6eecd57..4bd3da1 100644
--- i/PyMovieDb/imdb.py
+++ w/PyMovieDb/imdb.py
@@ -144,6 +144,25 @@ class IMDB:
                     # invalid char(s) is/are not in description/trailer/reviewBody schema
                     return self.NA

+        trailers = []
+        trailer_url = result.get("trailer", {"embedUrl": None}).get("embedUrl")
+        if trailer_url:
+            try:
+                response = self.session.get(trailer_url)
+                #print(response.html.text)
+                #result = response.html.xpath("//video")[0].text
+                s = response.html.text
+                import re
+                m = re.findall(r'"url":"https://imdb-video.media-imdb.com[^"]+mp4[^"]+Expires[^"]+"', s)
+                for t_url in m:
+                    t = {}
+                    t["url"] = json.loads("{"+m[0]+"}").get("url")
+                    trailers.append(t)
+                #result = f"""{result}"""
+            except IndexError:
+                t = {}
+                t["url"] = trailer_url
+                trailers.append(t)
         output = {
             "type": result.get('@type'),
             "name": result.get('name'),
@@ -176,6 +195,10 @@ class IMDB:
             "datePublished": result.get("datePublished"),
             "keywords": result.get("keywords"),
             "duration": result.get("duration"),
+            "trailer": {
+                "thumbnail": result.get("trailer", {"thumbnailUrl": None}).get("thumbnailUrl"),
+                "links": trailers
+            },
             "actor": [
                 {"name": actor.get("name"), "url": actor.get("url")} for actor in result.get("actor", [])
             ],

Trailers has a deeper structure with stream quality like:

  {
    "url": "https://imdb-video.media-imdb.com/whatever",
    "__typename":"PlaybackURL"
  },
  {
    "displayName": {
      "value":"SD",
      "language":"en-US",
      "__typename":"LocalizedString"
    },
    "videoMimeType":"MP4",
    "videoDefinition":"DEF_SD"
  }

but I was unable to find a way to parse that with xpath expressions

final result should be like:

   "trailers": [
       {
         "definition: "480p"
         "thumbnail": url,
         "url": url
       },
       {
         "definition: "SD"
         "thumbnail": url,
         "url": url
       }
   ]

while right now is:

   "trailer": {
       "thumbnail": url,
       "links": [
         {
            "url": url
         },
         {
            "url": url
         }, ...
      ]
   }