soup = BeautifulSoup(html, "html.parser")
rg_meta = soup.find_all("div", {"class": "rg_meta"})
metadata = [json.loads(e.text) for e in rg_meta]
results = [d["ou"] for d in metadata]
if not results:
regex = re.escape("AF_initDataCallback({")
regex += r'[^<]?data:[^<]?' + r'([[^<]+])'
for txt in re.findall(regex, html):
data = json.loads(txt)
try:
for d in data[31][0][12][2]:
try:
results.append(d[1][3][0])
except Exception as e:
pass
except Exception as e:
pass
if not results:
try:
for d in data[56][1][0][0][1][0]:
try:
d = d[0][0]["444383007"]
results.append(d[1][3][0])
except:
pass
except:
pass
Skip to content kelciour/debug_console.py Secret Last active last month • Report abuse Code Revisions 2 Google Images Debug debug_console.py import re import json import requests
from bs4 import BeautifulSoup
headers = { "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.67 Safari/537.36" }
query = "cat"
r = requests.get("https://www.google.com/search?tbm=isch&q={}&safe=active".format(query), headers=headers, timeout=15)
print('-----------------------') print(r.status_code) print('-----------------------')
html = r.text
print('-----------------------') print(html) print('-----------------------')
soup = BeautifulSoup(html, "html.parser") rg_meta = soup.find_all("div", {"class": "rg_meta"}) metadata = [json.loads(e.text) for e in rg_meta] results = [d["ou"] for d in metadata]
if not results: regex = re.escape("AF_initDataCallback({") regex += r'[^<]?data:[^<]?' + r'([[^<]+])'
print('-----------------------') print(' IMAGES ') print('-----------------------') print('\n\n'.join(results)) print('-----------------------') print('Found Images:', len(results)) Add a quote, <Cmd+Shift+.> Add code, <Cmd+e> Add a link, <Cmd+k> Directly mention a user or team Reference an issue or pull request Leave a comment Footer © 2022 GitHub, Inc. Footer navigation Terms Privacy Security Status Docs Contact GitHub Pricing API Training Blog About