Open rootm0s opened 4 years ago
I wrote this python3 script that fetches the IMDB trailer URL, but it would be more neat to use the API for it. Requires the imdbID, which I fetch from omdb api.
from json import JSONDecoder
from requests import get
class trailer:
def __init__(self):
self.text = None
self.imdbID = None
self.subdomain = "imdb-video.media-imdb.com"
self.useragent = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) \
AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36"}
def parse(self, text, decoder=JSONDecoder()):
"""
Find JSON objects in text, and yield the decoded JSON data. Does not attempt
to look for JSON arrays, text, or other JSON types outsideof a parent
JSON object.
https://stackoverflow.com/questions/54235528/how-to-find-json-object-in-text-with-python
"""
pos = 0
while True:
match = text.find('{', pos)
if match == -1:
break
try:
result, index = decoder.raw_decode(text[match:])
yield result
pos = match + index
except ValueError:
pos = match + 1
def url(self, imdbID):
"""
First part pulls HTML data from IMDB using imdbID from
embedded trailer: https://imdb.com/video/imdb/vi4095655705/imdb/embed
Second part pulls the HTML data once again to attempt to find the juicy trailer URL.
Returns:
* On error: False or None
* On success: URL
"""
try:
data = get("https://www.imdb.com/title/{imdbID}".format(imdbID=imdbID), headers=self.useragent, timeout=25)
except Exception:
return False
else:
for result in self.parse(data.text):
if result:
try:
trailer_url = "https://imdb.com/" + result["trailer"]["embedUrl"] + "/imdb/embed"
except KeyError:
pass
try:
data = get(trailer_url, headers=self.useragent, timeout=25)
except Exception as error:
return False
else:
for result in self.parse(data.text):
if result:
try:
url = result["videoPlayerObject"]["video"]["videoInfoList"][1]["videoUrl"]
if self.subdomain in url:
return(url)
else:
return False
except KeyError:
pass
fake_info = {'Title': '1917',
'Year': '2019',
'Rated': 'R',
'Released': '10 Jan 2020',
'Runtime': '119 min',
'Genre': 'Drama, War',
'Director': 'Sam Mendes',
'Writer': 'Sam Mendes, Krysty Wilson-Cairns',
'Actors': 'Dean-Charles Chapman, George MacKay, Daniel Mays, Colin Firth',
'Plot': 'April 6th, 1917. As a regiment assembles to wage war deep in enemy territory, two soldiers are assigned to race against time and deliver a message that will stop 1,600 men from walking straight into a deadly trap.',
'Language': 'English, French, German', 'Country': 'USA, UK, India, Spain, Canada',
'Awards': 'Won 3 Oscars. Another 110 wins & 161 nominations.',
'Poster': 'https://m.media-amazon.com/images/M/MV5BOTdmNTFjNDEtNzg0My00ZjkxLTg1ZDAtZTdkMDc2ZmFiNWQ1XkEyXkFqcGdeQXVyNTAzNzgwNTg@._V1_SX300.jpg',
'Ratings': [{'Source': 'Internet Movie Database', 'Value': '8.4/10'}, {'Source': 'Rotten Tomatoes', 'Value': '89%'}, {'Source': 'Metacritic', 'Value': '78/100'}],
'Metascore': '78',
'imdbRating': '8.4',
'imdbVotes': '272,841',
'imdbID': 'tt8579674',
'Type': 'movie',
'DVD': 'N/A',
'BoxOffice': 'N/A',
'Production': 'N/A',
'Website': 'N/A',
'Response': 'True'}
print(trailer().url(fake_info["imdbID"]))
I wrote this python3 script that fetches the IMDB trailer URL, but it would be more neat to use the API for it. Requires the imdbID, which I fetch from omdb api.
from json import JSONDecoder from requests import get class trailer: def __init__(self): self.text = None self.imdbID = None self.subdomain = "imdb-video.media-imdb.com" self.useragent = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) \ AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36"} def parse(self, text, decoder=JSONDecoder()): """ Find JSON objects in text, and yield the decoded JSON data. Does not attempt to look for JSON arrays, text, or other JSON types outsideof a parent JSON object. https://stackoverflow.com/questions/54235528/how-to-find-json-object-in-text-with-python """ pos = 0 while True: match = text.find('{', pos) if match == -1: break try: result, index = decoder.raw_decode(text[match:]) yield result pos = match + index except ValueError: pos = match + 1 def url(self, imdbID): """ First part pulls HTML data from IMDB using imdbID from embedded trailer: https://imdb.com/video/imdb/vi4095655705/imdb/embed Second part pulls the HTML data once again to attempt to find the juicy trailer URL. Returns: * On error: False or None * On success: URL """ try: data = get("https://www.imdb.com/title/{imdbID}".format(imdbID=imdbID), headers=self.useragent, timeout=25) except Exception: return False else: for result in self.parse(data.text): if result: try: trailer_url = "https://imdb.com/" + result["trailer"]["embedUrl"] + "/imdb/embed" except KeyError: pass try: data = get(trailer_url, headers=self.useragent, timeout=25) except Exception as error: return False else: for result in self.parse(data.text): if result: try: url = result["videoPlayerObject"]["video"]["videoInfoList"][1]["videoUrl"] if self.subdomain in url: return(url) else: return False except KeyError: pass fake_info = {'Title': '1917', 'Year': '2019', 'Rated': 'R', 'Released': '10 Jan 2020', 'Runtime': '119 min', 'Genre': 'Drama, War', 'Director': 'Sam Mendes', 'Writer': 'Sam Mendes, Krysty Wilson-Cairns', 'Actors': 'Dean-Charles Chapman, George MacKay, Daniel Mays, Colin Firth', 'Plot': 'April 6th, 1917. As a regiment assembles to wage war deep in enemy territory, two soldiers are assigned to race against time and deliver a message that will stop 1,600 men from walking straight into a deadly trap.', 'Language': 'English, French, German', 'Country': 'USA, UK, India, Spain, Canada', 'Awards': 'Won 3 Oscars. Another 110 wins & 161 nominations.', 'Poster': 'https://m.media-amazon.com/images/M/MV5BOTdmNTFjNDEtNzg0My00ZjkxLTg1ZDAtZTdkMDc2ZmFiNWQ1XkEyXkFqcGdeQXVyNTAzNzgwNTg@._V1_SX300.jpg', 'Ratings': [{'Source': 'Internet Movie Database', 'Value': '8.4/10'}, {'Source': 'Rotten Tomatoes', 'Value': '89%'}, {'Source': 'Metacritic', 'Value': '78/100'}], 'Metascore': '78', 'imdbRating': '8.4', 'imdbVotes': '272,841', 'imdbID': 'tt8579674', 'Type': 'movie', 'DVD': 'N/A', 'BoxOffice': 'N/A', 'Production': 'N/A', 'Website': 'N/A', 'Response': 'True'} print(trailer().url(fake_info["imdbID"]))
Nice....Any idea for PHP?
No sorry, not using PHP much.
Is it possible to add support for fetching trailer URL for a specific title?
Searched for it under open/closed issues but couldn't find anything about it. Or if anyone has an good idea to pull that data from another source, let me know.