omdbapi / OMDb-API

The Open Movie Database Bug Tracking
420 stars 21 forks source link

Trailer URLs? #180

Open rootm0s opened 4 years ago

rootm0s commented 4 years ago

Is it possible to add support for fetching trailer URL for a specific title?

Searched for it under open/closed issues but couldn't find anything about it. Or if anyone has an good idea to pull that data from another source, let me know.

rootm0s commented 4 years ago

I wrote this python3 script that fetches the IMDB trailer URL, but it would be more neat to use the API for it. Requires the imdbID, which I fetch from omdb api.

from json import JSONDecoder
from requests import get

class trailer:
    def __init__(self):
        self.text = None
        self.imdbID = None
        self.subdomain = "imdb-video.media-imdb.com"
        self.useragent = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) \
                            AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36"}

    def parse(self, text, decoder=JSONDecoder()):
        """
        Find JSON objects in text, and yield the decoded JSON data. Does not attempt
        to look for JSON arrays, text, or other JSON types outsideof a parent
        JSON object.

        https://stackoverflow.com/questions/54235528/how-to-find-json-object-in-text-with-python
        """
        pos = 0

        while True:
            match = text.find('{', pos)
            if match == -1:
                break
            try:
                result, index = decoder.raw_decode(text[match:])
                yield result
                pos = match + index
            except ValueError:
                pos = match + 1

    def url(self, imdbID):
        """
        First part pulls HTML data from IMDB using imdbID from 
        embedded trailer: https://imdb.com/video/imdb/vi4095655705/imdb/embed

        Second part pulls the HTML data once again to attempt to find the juicy trailer URL.

        Returns:
         * On error: False or None
         * On success: URL
        """
        try:
            data = get("https://www.imdb.com/title/{imdbID}".format(imdbID=imdbID), headers=self.useragent, timeout=25)
        except Exception:
            return False
        else:
            for result in self.parse(data.text):
                if result:
                    try:
                        trailer_url = "https://imdb.com/" + result["trailer"]["embedUrl"] + "/imdb/embed"
                    except KeyError:
                        pass

        try:
            data = get(trailer_url, headers=self.useragent, timeout=25)
        except Exception as error:
            return False
        else:
            for result in self.parse(data.text):
                if result:
                    try:
                        url = result["videoPlayerObject"]["video"]["videoInfoList"][1]["videoUrl"]
                        if self.subdomain in url:
                            return(url)
                        else:
                            return False
                    except KeyError:
                        pass

fake_info = {'Title': '1917',
            'Year': '2019',
            'Rated': 'R',
            'Released': '10 Jan 2020',
            'Runtime': '119 min',
            'Genre': 'Drama, War',
            'Director': 'Sam Mendes',
            'Writer': 'Sam Mendes, Krysty Wilson-Cairns',
            'Actors': 'Dean-Charles Chapman, George MacKay, Daniel Mays, Colin Firth',
            'Plot': 'April 6th, 1917. As a regiment assembles to wage war deep in enemy territory, two soldiers are assigned to race against time and deliver a message that will stop 1,600 men from walking straight into a deadly trap.',
            'Language': 'English, French, German', 'Country': 'USA, UK, India, Spain, Canada',
            'Awards': 'Won 3 Oscars. Another 110 wins & 161 nominations.',
            'Poster': 'https://m.media-amazon.com/images/M/MV5BOTdmNTFjNDEtNzg0My00ZjkxLTg1ZDAtZTdkMDc2ZmFiNWQ1XkEyXkFqcGdeQXVyNTAzNzgwNTg@._V1_SX300.jpg',
            'Ratings': [{'Source': 'Internet Movie Database', 'Value': '8.4/10'}, {'Source': 'Rotten Tomatoes', 'Value': '89%'}, {'Source': 'Metacritic', 'Value': '78/100'}],
            'Metascore': '78',
            'imdbRating': '8.4',
            'imdbVotes': '272,841',
            'imdbID': 'tt8579674',
            'Type': 'movie',
            'DVD': 'N/A',
            'BoxOffice': 'N/A',
            'Production': 'N/A',
            'Website': 'N/A',
            'Response': 'True'}

print(trailer().url(fake_info["imdbID"]))
matinwd commented 2 years ago

I wrote this python3 script that fetches the IMDB trailer URL, but it would be more neat to use the API for it. Requires the imdbID, which I fetch from omdb api.

from json import JSONDecoder
from requests import get

class trailer:
  def __init__(self):
      self.text = None
      self.imdbID = None
      self.subdomain = "imdb-video.media-imdb.com"
      self.useragent = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) \
                          AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36"}

  def parse(self, text, decoder=JSONDecoder()):
      """
      Find JSON objects in text, and yield the decoded JSON data. Does not attempt
      to look for JSON arrays, text, or other JSON types outsideof a parent
      JSON object.

      https://stackoverflow.com/questions/54235528/how-to-find-json-object-in-text-with-python
      """
      pos = 0

      while True:
          match = text.find('{', pos)
          if match == -1:
              break
          try:
              result, index = decoder.raw_decode(text[match:])
              yield result
              pos = match + index
          except ValueError:
              pos = match + 1

  def url(self, imdbID):
      """
      First part pulls HTML data from IMDB using imdbID from 
      embedded trailer: https://imdb.com/video/imdb/vi4095655705/imdb/embed

      Second part pulls the HTML data once again to attempt to find the juicy trailer URL.

      Returns:
       * On error: False or None
       * On success: URL
      """
      try:
          data = get("https://www.imdb.com/title/{imdbID}".format(imdbID=imdbID), headers=self.useragent, timeout=25)
      except Exception:
          return False
      else:
          for result in self.parse(data.text):
              if result:
                  try:
                      trailer_url = "https://imdb.com/" + result["trailer"]["embedUrl"] + "/imdb/embed"
                  except KeyError:
                      pass

      try:
          data = get(trailer_url, headers=self.useragent, timeout=25)
      except Exception as error:
          return False
      else:
          for result in self.parse(data.text):
              if result:
                  try:
                      url = result["videoPlayerObject"]["video"]["videoInfoList"][1]["videoUrl"]
                      if self.subdomain in url:
                          return(url)
                      else:
                          return False
                  except KeyError:
                      pass

fake_info = {'Title': '1917',
          'Year': '2019',
          'Rated': 'R',
          'Released': '10 Jan 2020',
          'Runtime': '119 min',
          'Genre': 'Drama, War',
          'Director': 'Sam Mendes',
          'Writer': 'Sam Mendes, Krysty Wilson-Cairns',
          'Actors': 'Dean-Charles Chapman, George MacKay, Daniel Mays, Colin Firth',
          'Plot': 'April 6th, 1917. As a regiment assembles to wage war deep in enemy territory, two soldiers are assigned to race against time and deliver a message that will stop 1,600 men from walking straight into a deadly trap.',
          'Language': 'English, French, German', 'Country': 'USA, UK, India, Spain, Canada',
          'Awards': 'Won 3 Oscars. Another 110 wins & 161 nominations.',
          'Poster': 'https://m.media-amazon.com/images/M/MV5BOTdmNTFjNDEtNzg0My00ZjkxLTg1ZDAtZTdkMDc2ZmFiNWQ1XkEyXkFqcGdeQXVyNTAzNzgwNTg@._V1_SX300.jpg',
          'Ratings': [{'Source': 'Internet Movie Database', 'Value': '8.4/10'}, {'Source': 'Rotten Tomatoes', 'Value': '89%'}, {'Source': 'Metacritic', 'Value': '78/100'}],
          'Metascore': '78',
          'imdbRating': '8.4',
          'imdbVotes': '272,841',
          'imdbID': 'tt8579674',
          'Type': 'movie',
          'DVD': 'N/A',
          'BoxOffice': 'N/A',
          'Production': 'N/A',
          'Website': 'N/A',
          'Response': 'True'}

print(trailer().url(fake_info["imdbID"]))

Nice....Any idea for PHP?

rootm0s commented 2 years ago

No sorry, not using PHP much.