Open boludoz opened 8 months ago
from selenium_driverless import webdriver
from selenium_driverless.types.by import By
import asyncio
async def main():
options = webdriver.ChromeOptions()
async with webdriver.Chrome(options=options) as driver:
await driver.get('file:///C:/Users/Admin/Downloads/test_otp.mhtml')
try:
iframe = await driver.find_element(By.XPATH, "//iframe[contains(@*, 'OTP')]")
print("STEP 1 -----------------------------------------------------------------")
iframe_document = await iframe.content_document
print("STEP 2 -----------------------------------------------------------------")
elem = await iframe_document.find_element(By.XPATH, "//*[@id='wcri-html']/body/div[2]/div[1]/div/div/div/fieldset/div/input[1]")
print("END -----------------------------------------------------------------")
except Exception as e:
print(f"[ERROR] While finding OTP: {e}")
asyncio.run(main())
I liked NoDriver so much that I based it on selenium_driverless, reverse engineering 'selenium_driverless' to try to implement iframe as if you were opening a new tab, I'm trying to reverse engineer it.
The problem seems to be within find_element_by text Unfortunately it's too much for me.
Fixed:
async def find_element_by_text(
self,
text: str,
best_match: Optional[bool] = False,
return_enclosing_element: Optional[bool] = True,
) -> Union[element.Element, None]:
"""
finds and returns the first element containing <text>, or best match
:param text:
:type text:
:param best_match: when True, which is MUCH more expensive (thus much slower),
will find the closest match based on length.
this could help tremendously, when for example you search for "login", you'd probably want the login button element,
and not thousands of scripts,meta,headings containing a string of "login".
:type best_match: bool
:param return_enclosing_element:
:type return_enclosing_element:
:return:
:rtype:
"""
doc = await self.send(cdp.dom.get_document(-1, True))
search_id, nresult = await self.send(cdp.dom.perform_search(text, True))
text = text.strip()
node_ids = await self.send(cdp.dom.get_search_results(search_id, 0, nresult))
await self.send(cdp.dom.discard_search_results(search_id))
if not node_ids:
node_ids = []
items = []
for nid in node_ids:
node = util.filter_recurse(doc, lambda n: n.node_id == nid)
try:
elem = element.create(node, self, doc)
except: # noqa
continue
if elem.node_type == 3:
# if found element is a text node (which is plain text, and useless for our purpose),
# we return the parent element of the node (which is often a tag which can have text between their
# opening and closing tags (that is most tags, except for example "img" and "video", "br")
if not elem.parent:
# check if parent actually has a parent and update it to be absolutely sure
await elem.update()
items.append(
elem.parent or elem
) # when it really has no parent, use the text node itself
continue
else:
# just add the element itself
items.append(elem)
# since we already fetched the entire doc, including shadow and frames
# let's also search through the iframes
iframes = util.filter_recurse_all(doc, lambda node: node.node_name == "IFRAME")
if iframes:
iframes_elems = [
element.create(iframe, self, iframe.content_document)
for iframe in iframes
]
for nid in node_ids:
for iframe_elem in iframes_elems:
iframe_text_nodes = util.filter_recurse_all(
iframe_elem,
lambda n: n.node_id == nid)
if iframe_text_nodes:
iframe_text_elems = [
element.create(text_node, self, iframe_elem.tree)
for text_node in iframe_text_nodes
]
items.extend(text_node.parent for text_node in iframe_text_elems)
try:
if not items:
return
if best_match:
closest_by_length = min(
items, key=lambda el: abs(len(text) - len(el.text_all))
)
elem = closest_by_length or items[0]
return elem
else:
# naively just return the first result
for elem in items:
if elem:
return elem
finally:
await self.send(cdp.dom.disable())
The problem now is that I can't click on it.
@boludoz Can you provide document example that you tried to work on and source code that does not work?
@boludoz Can you provide document example that you tried to work on and source code that does not work?
<!DOCTYPE html>
<html lang="es">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
</head>
<body>
<iframe src="https://apoia.se/"
width="800" height="600"
frameborder="0" scrolling="yes">
</iframe>
</body>
</html>
@boludoz
So, after some researching and tweaking I found that we can use iframe as an explicit tab as it is added as a target to driver. That means we can get from driver targets needed for us tab (iframe in our case). That can be done by filter via ID (target_id for target and frame_id for iframe) since they are strings and same for frame and frame tab.
After we get required tab we got to update websocket url since nodriver currently maps it wrongly for some types (like iframe).
Once all that is done, we can use our tab as usual.
Minimal example:
import logging
import nodriver as uc
logging.basicConfig(level=logging.DEBUG)
path_to_website = (
r"your path to website"
)
async def main():
driver = await uc.start()
tab = await driver.get(path_to_website)
await tab
# Have to wait until iframe loaded in the system as a target
await tab.sleep(2)
# Get required iframe tag in dom structure. From that we will use frame_id
iframe = await tab.select("iframe")
# Get required tab. Not safe in case when tab not found
iframe_tab: uc.Tab = next(
filter(
lambda x: str(x.target.target_id) == str(iframe.frame_id), driver.targets
)
)
# Fixing websocket url
iframe_tab.websocket_url = iframe_tab.websocket_url.replace("iframe", "page")
button = await iframe_tab.select(
"#__next > div > div > div.plasmic_apoiase_home_all__EEZNz.PlasmicHome_sectionHero__4yCHj > div > div > div.plasmic_apoiase_home_all__EEZNz.PlasmicHeroHeadline_column__fdMx7__IfB4x > div > div > a"
)
await button.click()
await tab.sleep(1000)
if __name__ == "__main__":
uc.loop().run_until_complete(main())
@boludoz
So, after some researching and tweaking I found that we can use iframe as an explicit tab as it is added as a target to driver. That means we can get from driver targets needed for us tab (iframe in our case). That can be done by filter via ID (target_id for target and frame_id for iframe) since they are strings and same for frame and frame tab.
After we get required tab we got to update websocket url since nodriver currently maps it wrongly for some types (like iframe).
Once all that is done, we can use our tab as usual.
Minimal example:
import logging import nodriver as uc logging.basicConfig(level=logging.DEBUG) path_to_website = ( r"your path to website" ) async def main(): driver = await uc.start() tab = await driver.get(path_to_website) await tab # Have to wait until iframe loaded in the system as a target await tab.sleep(2) # Get required iframe tag in dom structure. From that we will use frame_id iframe = await tab.select("iframe") # Get required tab. Not safe in case when tab not found iframe_tab: uc.Tab = next( filter( lambda x: str(x.target.target_id) == str(iframe.frame_id), driver.targets ) ) # Fixing websocket url iframe_tab.websocket_url = iframe_tab.websocket_url.replace("iframe", "page") button = await iframe_tab.select( "#__next > div > div > div.plasmic_apoiase_home_all__EEZNz.PlasmicHome_sectionHero__4yCHj > div > div > div.plasmic_apoiase_home_all__EEZNz.PlasmicHeroHeadline_column__fdMx7__IfB4x > div > div > a" ) await button.click() await tab.sleep(1000) if __name__ == "__main__": uc.loop().run_until_complete(main())
I love you so much friend, you really are a genius.
@boludoz If it solved your problem, consider closing this issue.
@boludoz If it solved your problem, consider closing this issue.
In that laboratory example it works, but on a real page it doesn't. It must be hidden in the dom something like that. I think the problem is shadow-root
@boludoz
If you could elaborate more on this problem by providing non-working example, it would be easier to help. If u cannot provide such an example, I don't think I will be of help here
@boludoz
If you could elaborate more on this problem by providing non-working example, it would be easier to help. If u cannot provide such an example, I don't think I will be of help here
I didn't find shadow-root here, but when I try to find the iframe it doesn't find it. In the example of testing with the HTML it works perfectly, it is as if they were different types of iframe.
I will try to continue investigating and I will inform you of any news. Apparently the 'iframe' is hidden, i changed the filter for a for to debug and it is not able to find the iframe.
This would probably help a lot : let me know when you have working code 👍
from nodriver import *
import json
async def main():
browser = await start()
tab = await browser.get('https://nowsecure.nl/iframes.html')
await tab
frame_data = await tab.send(cdp.page.get_frame_tree())
print(json.dumps( frame_data.to_json() , indent=4))
if __name__ == '__main__':
loop().run_until_complete(main())
result:
{
"frame": {
"id": "668AE9308C450E1DC6BAA7F012B48A54",
"loaderId": "0B5C430EEB56D85056D637C7B464F014",
"url": "https://nowsecure.nl/iframes.html",
"domainAndRegistry": "nowsecure.nl",
"securityOrigin": "https://nowsecure.nl",
"mimeType": "text/html",
"secureContextType": "Secure",
"crossOriginIsolatedContextType": "NotIsolated",
"gatedAPIFeatures": [],
"adFrameStatus": {
"adFrameType": "none"
}
},
"childFrames": [
{
"frame": {
"id": "2B3B5340E99DEB0029848C38D13CC4EF",
"loaderId": "B2859CDD24B827E4039DB6407F025A62",
"url": "https://nowsecure.nl/mouse.html",
"domainAndRegistry": "nowsecure.nl",
"securityOrigin": "https://nowsecure.nl",
"mimeType": "text/html",
"secureContextType": "Secure",
"crossOriginIsolatedContextType": "NotIsolated",
"gatedAPIFeatures": [],
"parentId": "668AE9308C450E1DC6BAA7F012B48A54",
"name": "iframe0",
"adFrameStatus": {
"adFrameType": "none",
"explanations": []
}
},
"childFrames": [
{
"frame": {
"id": "26EF37AEC1E7F443A3A9E53AA526DD0D",
"loaderId": "401F6DA4EC626470E1DE3DCCEDE09436",
"url": "about:blank",
"domainAndRegistry": "",
"securityOrigin": "://",
"mimeType": "text/html",
"secureContextType": "Secure",
"crossOriginIsolatedContextType": "NotIsolated",
"gatedAPIFeatures": [],
"parentId": "2B3B5340E99DEB0029848C38D13CC4EF",
"name": "",
"adFrameStatus": {
"adFrameType": "none",
"explanations": []
}
}
}
]
},
{
"frame": {
"id": "3A0C0B06764092B1DB7F28C202EBC790",
"loaderId": "FF4552B820C9B8A5AF3903350FEEB0D8",
"url": "blob:https://nowsecure.nl/99ac4434-cb74-486d-bb6e-413df1273c39",
"domainAndRegistry": "",
"securityOrigin": "https://nowsecure.nl",
"mimeType": "text/html",
"secureContextType": "Secure",
"crossOriginIsolatedContextType": "NotIsolated",
"gatedAPIFeatures": [],
"parentId": "668AE9308C450E1DC6BAA7F012B48A54",
"name": "iframe1",
"adFrameStatus": {
"adFrameType": "none",
"explanations": []
}
},
"childFrames": [
{
"frame": {
"id": "1A18EBDF6588FCD48B226FC7C7568746",
"loaderId": "7B58DD268E7BB2B44D2AD77A572487A3",
"url": "about:srcdoc",
"domainAndRegistry": "",
"securityOrigin": "://",
"mimeType": "text/html",
"secureContextType": "Secure",
"crossOriginIsolatedContextType": "NotIsolated",
"gatedAPIFeatures": [],
"parentId": "3A0C0B06764092B1DB7F28C202EBC790",
"name": "iframe-in-iframe",
"adFrameStatus": {
"adFrameType": "none",
"explanations": []
}
}
}
]
},
{
"frame": {
"id": "10CAF055964D3655CD6E4B53958F1ADB",
"loaderId": "8E15344567C5A95855A43725FDB9B01F",
"url": "about:blank",
"domainAndRegistry": "",
"securityOrigin": "://",
"mimeType": "text/html",
"secureContextType": "Secure",
"crossOriginIsolatedContextType": "NotIsolated",
"gatedAPIFeatures": [],
"parentId": "668AE9308C450E1DC6BAA7F012B48A54",
"name": "",
"adFrameStatus": {
"adFrameType": "none",
"explanations": []
}
}
}
]
}
Also
>>> from nodriver import *
>>> browser = await start()
>>> tab = await browser.get('https://nowsecure.nl/iframes.html')
>>> await tab
>>> iframes = await tab.select_all('iframe')
>>> boxes = await iframes[0].query_selector_all('.box')
>>> for box in boxes:
await box.scroll_into_view()
await box.mouse_move()
await box.flash()
await tab.wait(1)
@ultrafunkamsterdam
Regarding your code. It will probably not work in real life as you use iframe with same origin sources. If you will use from different origin then it will not work. An example of the file was provided by @boludoz . (Cross site protection - something like that this is called)
@boludoz Can you provide document example that you tried to work on and source code that does not work?
<!DOCTYPE html> <html lang="es"> <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> </head> <body> <iframe src="https://apoia.se/" width="800" height="600" frameborder="0" scrolling="yes"> </iframe> </body> </html>
@boludoz
Does your website (razer) with its OTP uses same origin for iframe? Like source of iframe is same as the website you visit? If yes, then answer of @ultrafunkamsterdam will probably solve your problems
untested
config = uc.Config()
config.add_argument('--disable-web-security')
driver = await uc.start(config)
iirc it will tell you 'a unsupported option is used', but will work cross origin iframes
untested
config = uc.Config() config.add_argument('--disable-web-security') driver = await uc.start(config)
iirc it will tell you 'a unsupported option is used', but will work cross origin iframes
Do you have an email to send you information?
Iframe control (1).pdf I’ve shared some commands that I’ve managed to reverse engineer and they’ve been successful. I haven’t yet turned them into code, but I’ve tried to provide as detailed a description as possible.
The funniest thing is that I managed to navigate through the frames by modifying get and navigate but I couldn't get what they have inside.
I made a little progress, I'm just having a hard time running the javascripts.
import asyncio
import nodriver as uc
from nodriver import *
import asyncio
async def main():
config = uc.Config()
config.add_argument('--disable-web-security')
browser = await uc.start(config)
tab = await browser.get('file:///C:/Users/franc/Downloads/The%20New%20Razer%20Gold%20&%20Silver.mhtml')
iframes = await tab.select_all('iframe')
iframe = iframes[0]
print(iframe.backend_node_id)
print(iframe.frame_id)
print("IFRAMES -------------------")
isolated_world_context = await tab.send(cdp.page.create_isolated_world(frame_id=iframe.frame_id, world_name='isolated', grant_univeral_access=True))
print(isolated_world_context)
print("isolated_world_context -------------------")
resolve_node_id = await tab.send(cdp.dom.resolve_node(node_id=iframe.backend_node_id, execution_context_id=isolated_world_context, object_group='isolated'))
print(resolve_node_id.object_id)
print("resolve_node_id -------------------")
arguments = [cdp.runtime.CallArgument(object_id=resolve_node_id.object_id)]
result_js = await tab.send(
cdp.runtime.call_function_on(
"() => { return document.body.innerHTML; }",
object_id=resolve_node_id.object_id,
arguments=arguments,
await_promise=True,
user_gesture=True,
return_by_value=True,
)
)
print(result_js)
print("call_function_on_result -------------------")
exit()
asyncio.run(main())
At this point, without any real example of the website I am unable to help you...
At this point, without any real example of the website I am unable to help you...
Can you invite me with your email without the at sign?
I made a little progress, I'm just having a hard time running the javascripts.
import asyncio import nodriver as uc from nodriver import * import asyncio async def main(): config = uc.Config() config.add_argument('--disable-web-security') browser = await uc.start(config) tab = await browser.get('file:///C:/Users/franc/Downloads/The%20New%20Razer%20Gold%20&%20Silver.mhtml') iframes = await tab.select_all('iframe') iframe = iframes[0] print(iframe.backend_node_id) print(iframe.frame_id) print("IFRAMES -------------------") isolated_world_context = await tab.send(cdp.page.create_isolated_world(frame_id=iframe.frame_id, world_name='isolated', grant_univeral_access=True)) print(isolated_world_context) print("isolated_world_context -------------------") resolve_node_id = await tab.send(cdp.dom.resolve_node(node_id=iframe.backend_node_id, execution_context_id=isolated_world_context, object_group='isolated')) print(resolve_node_id.object_id) print("resolve_node_id -------------------") arguments = [cdp.runtime.CallArgument(object_id=resolve_node_id.object_id)] result_js = await tab.send( cdp.runtime.call_function_on( "() => { return document.body.innerHTML; }", object_id=resolve_node_id.object_id, arguments=arguments, await_promise=True, user_gesture=True, return_by_value=True, ) ) print(result_js) print("call_function_on_result -------------------") exit() asyncio.run(main())
This is indeed quirky. The iframe document (not the tag) should be passed..
myframe.content_document.node_id
I made a little progress, I'm just having a hard time running the javascripts.
import asyncio import nodriver as uc from nodriver import * import asyncio async def main(): config = uc.Config() config.add_argument('--disable-web-security') browser = await uc.start(config) tab = await browser.get('file:///C:/Users/franc/Downloads/The%20New%20Razer%20Gold%20&%20Silver.mhtml') iframes = await tab.select_all('iframe') iframe = iframes[0] print(iframe.backend_node_id) print(iframe.frame_id) print("IFRAMES -------------------") isolated_world_context = await tab.send(cdp.page.create_isolated_world(frame_id=iframe.frame_id, world_name='isolated', grant_univeral_access=True)) print(isolated_world_context) print("isolated_world_context -------------------") resolve_node_id = await tab.send(cdp.dom.resolve_node(node_id=iframe.backend_node_id, execution_context_id=isolated_world_context, object_group='isolated')) print(resolve_node_id.object_id) print("resolve_node_id -------------------") arguments = [cdp.runtime.CallArgument(object_id=resolve_node_id.object_id)] result_js = await tab.send( cdp.runtime.call_function_on( "() => { return document.body.innerHTML; }", object_id=resolve_node_id.object_id, arguments=arguments, await_promise=True, user_gesture=True, return_by_value=True, ) ) print(result_js) print("call_function_on_result -------------------") exit() asyncio.run(main())
This is indeed quirky. The iframe document (not the tag) should be passed..
myframe.content_document.node_id
I'm sorry, it looks like that code had errors. I have made even greater progress, the only problem I have is that apparently my iframe is not detected with conventional methods.
That has worked perfectly, the other thing I would like is a high-level function to execute cdp/javascript commands, like the one selenium has. I will apreciate a lot.
import asyncio
import nodriver as uc
from nodriver import *
import asyncio
async def main():
config = uc.Config()
config.add_argument('--disable-web-security')
browser = await uc.start(config)
tab = await browser.get('file:///C:/Users/franc/Downloads/The%20New%20Razer%20Gold%20&%20Silver.mhtml')
iframes = await tab.select_all('iframe')
iframe = iframes[0]
print(iframe.backend_node_id)
print(iframe.frame_id)
print("IFRAMES -------------------")
isolated_world_context = await tab.send(cdp.page.create_isolated_world(frame_id=iframe.frame_id, world_name='isolated', grant_univeral_access=True))
print(isolated_world_context)
print("isolated_world_context -------------------")
iframe.content_document.node_id = await tab.send(cdp.dom.request_child_nodes(node_id=iframe.content_document.node_id, depth=-1))
print(iframe.content_document.node_id)
print("iframe.content_document.node_id -------------------")
resolve_node_id = await tab.send(cdp.dom.resolve_node(node_id=iframe.content_document.node_id, execution_context_id=isolated_world_context, object_group='isolated'))
print(resolve_node_id.object_id)
print("resolve_node_id -------------------")
arguments = [cdp.runtime.CallArgument(object_id=resolve_node_id.object_id)]
result_js = await tab.send(
cdp.runtime.call_function_on(
"() => { return document.body.innerHTML; }",
object_id=resolve_node_id.object_id,
arguments=arguments,
await_promise=True,
user_gesture=True,
return_by_value=True,
)
)
print(result_js)
print("call_function_on_result -------------------")
arguments = [cdp.runtime.CallArgument(value='1')] # El valor que quieres ingresar
result_js = await tab.send(
cdp.runtime.call_function_on(
"""
(value) => {
// Asegúrate de que este XPath corresponda al campo de entrada que quieres llenar
let xpath = '//*[@id="wcri-html"]/body/div[2]/div[1]/div/div/div/fieldset/div/input[1]';
let inputElement = document.evaluate(xpath, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
inputElement.value = value;
}
""",
object_id=resolve_node_id.object_id,
arguments=arguments,
await_promise=True,
user_gesture=True,
return_by_value=True,
)
)
exit()
asyncio.run(main())
The following code works for me, but I'm still struggling to get a more systematic way to interact.
The problem seems to be that the iframe has the following characteristic:
CrossOriginIsolatedContextType.NOT_ISOLATED_FEATURE_DISABLED
use --disable-web-security as option . it will disable all isolations and enable to interact directly . However it will show an warning under the url bar stating that it is an unsupported option (but it is supported).
If if recall correctly, this even enables find() and select() methods to work, but it's such a niche what your're doing, and not something you come acrross often in regular use cases or sites.
use --disable-web-security as option . it will disable all isolations and enable to interact directly . However it will show an warning under the url bar stating that it is an unsupported option (but it is supported).
If if recall correctly, this even enables find() and select() methods to work, but it's such a niche what your're doing, and not something you come acrross often in regular use cases or sites.
Unfortunately this option has not worked for me and what seems niche in reality according to my experience becomes custom, it seems that there is an error in the filter of your find function (you must add an additional for for each nid), I have made a custom one and it worked, I have shared it above and it has some shortcomings with normal frames. Doing this without using a selector is 100% possible, although I would like to find some way to make it also visible like the rest of the frames, although it seems much more practical to use find.
I admit that I am very angry with the person who designed the iframe.
can you please share the code? i can't select the payment iframe on the nike website
hey i have the same problem i want to click on funcaptcha next button that in nested iframes i manged to click on one i frame using the code above but now iam stuck i really lovee nodriver i dont wanna leave it for that one problem
select_all('iframe') timeout error
yeah same here tho i made the first iframa like a tab object and can select the next iframe but cant select anything inside it
Use selectors
didnt work
i can select the next iframe but inside the next iframe is hidden cant select anything inside it
driver.execute_cdp_cmd('Page.enable', {})
driver.execute_cdp_cmd('Page.addScriptToEvaluateOnNewDocument', {
'source': """
Element.prototype._as = Element.prototype.attachShadow;
Element.prototype.attachShadow = function (params) {
return this._as({mode: "open"})
};
"""
})
How to write this code in NoDriver?
driver.execute_cdp_cmd('Page.enable', {}) driver.execute_cdp_cmd('Page.addScriptToEvaluateOnNewDocument', { 'source': """ Element.prototype._as = Element.prototype.attachShadow; Element.prototype.attachShadow = function (params) { return this._as({mode: "open"}) }; """ })
How to write this code in NoDriver?
page.send(cdp.page.add_scr.....) look it up.....
i can select the next iframe but inside the next iframe is hidden cant select anything inside it
Check if the iframe shows up in browser.targets and work from there
i can select the next iframe but inside the next iframe is hidden cant select anything inside it
Check if the iframe shows up in browser.targets and work from there
it doesnt show up and thats the problem
async def main():
driver = await start()
tab = await driver.get("https://nowsecure.nl/iframes.html")
await tab
frame_data = await tab.send(cdp.page.get_frame_tree())
print(json.dumps(frame_data.to_json(), indent=4))
frame_id = frame_data.to_json()["frame"]["id"]
try:
iframe_tab: Tab = next(
filter(
lambda x: str(x.target.target_id) == str(frame_id),
driver.targets,
)
)
# Fixing websocket url
iframe_tab.websocket_url = iframe_tab.websocket_url.replace("iframe", "page")
next_iframe = await iframe_tab.select("iframe")
link_element = await next_iframe.query_selector("a")
await link_element.click()
print("all_good")
except StopIteration:
print("No matching iframe found")
# Handle the case when no iframe is found
import asyncio
asyncio.run(main())
I'm facing rhe exact problem as well. I'm trying to click on the recaptcha checkbox from the demo page: https://www.google.com/recaptcha/api2/demo I can indeed interact with the form fields but can't find the checkbox and can't click on it as it's in an iframe. Any minor help to overcome it will be very appreciated as I can't find any reference and docs for this scenario. Thanks a lot!
async def main(): driver = await start() tab = await driver.get("https://nowsecure.nl/iframes.html") await tab frame_data = await tab.send(cdp.page.get_frame_tree()) print(json.dumps(frame_data.to_json(), indent=4)) frame_id = frame_data.to_json()["frame"]["id"] try: iframe_tab: Tab = next( filter( lambda x: str(x.target.target_id) == str(frame_id), driver.targets, ) ) # Fixing websocket url iframe_tab.websocket_url = iframe_tab.websocket_url.replace("iframe", "page") next_iframe = await iframe_tab.select("iframe") link_element = await next_iframe.query_selector("a") await link_element.click() print("all_good") except StopIteration: print("No matching iframe found") # Handle the case when no iframe is found import asyncio asyncio.run(main())
shadowRoot close Can't find it
I'm facing rhe exact problem as well. I'm trying to click on the recaptcha checkbox from the demo page: https://www.google.com/recaptcha/api2/demo I can indeed interact with the form fields but can't find the checkbox and can't click on it as it's in an iframe. Any minor help to overcome it will be very appreciated as I can't find any reference and docs for this scenario. Thanks a lot! `import json from nodriver import start, cdp, loop import nodriver as uc
async def switch_to_frame(browser, frame): """ change iframe let iframe = document.querySelector("YOUR_IFRAME_SELECTOR") let iframe_tab = iframe.contentWindow.document.body; """ iframe_tab: uc.Tab = next( filter( lambda x: str(x.target.target_id) == str(frame.frame_id), browser.targets ) ) return iframe_tab
async def main(): browser_args = ['--disable-web-security'] browser = await uc.start(browser_args=browser_args) tab = browser.maintab tab = await browser.get("http://www.yescaptcha.cn/auth/login") for in range(10): await tab.scroll_down(50) await tab await tab.sleep(15)
#solve Ordinary iframe
# query_selector = await tab.select_all('button[class="widgetLabel moveFromRightLabel-enter-done"]', include_frames=True)
# print('query_selector:', query_selector)
# if len(query_selector) == 1:
# await query_selector[0].click()
#solve Cross-origin iframe
recaptcha0 = await tab.select('iframe[title="reCAPTCHA"]')
print('recaptcha0:', recaptcha0.frame_id)
# # for tar in browser.targets:
# # print('target_id:', tar.target.target_id)
iframe_tab = await switch_to_frame(browser, recaptcha0)
# # iframe_tab: uc.Tab = next(
# # filter(
# # lambda x: str(x.target.target_id) == str(recaptcha0.frame_id), browser.targets
# # )
# # )
print('iframe_tabwebsocket_url:', iframe_tab.websocket_url)
iframe_tab.websocket_url = iframe_tab.websocket_url.replace("iframe", "page")
button = await iframe_tab.select("span#recaptcha-anchor")
await button.click()
input('stop')
if name == "main": loop().run_until_complete(main())`
Everything in this thread is vane and won’t work in real world. Selecting an element within an iframe or within nested iframes is simpler with javascript. You simply write the JS code and insert it with evaluate. Example:
#Click
await page.evaluate("document.querySelector(...all your selector..).click()")
#Value
await page.evaluate("document.querySelector(...all your selectors..).value='123'")
It is very important to run the driver with the option: '--disable-web-security' to avoid the policy of "Blocked a frame with origin .... from accessing a cross-origin frame", which of course is the usual in real websites.
@andvid72 - Are you sure that works for cross-domain? The below selector doesn't see any elements (even with --disable-web-security)
await page.evaluate("document.querySelector('iframe span#recaptcha-anchor').click()")
@nathankurtyka - You must run the driver with '--disable-site-isolation-trials' in this DOM. Besides, you are also wrongly selecting the node. This is the full script with the correct approach:
import nodriver as uc
async def main():
browser_args = ['--disable-web-security','--disable-site-isolation-trials']
browser = await uc.start(browser_args=browser_args)
tab = await browser.get("http://www.yescaptcha.cn/auth/login")
tab.evaluate("document.querySelector('[title=reCAPTCHA]').contentWindow.document.querySelector('#recaptcha-anchor').click()")
await tab.close()
if __name__ == "__main__":
uc.loop().run_until_complete(main())
@nathankurtyka - You must run the driver with '--disable-site-isolation-trials' in this DOM. Besides, you are also wrongly selecting the node. This is the full script with the correct approach:
import nodriver as uc async def main(): browser_args = ['--disable-web-security','--disable-site-isolation-trials'] browser = await uc.start(browser_args=browser_args) tab = await browser.get("http://www.yescaptcha.cn/auth/login") tab.evaluate("document.querySelector('[title=reCAPTCHA]').contentWindow.document.querySelector('#recaptcha-anchor').click()") await tab.close() if __name__ == "__main__": uc.loop().run_until_complete(main())
May I ask for your advice https://www.icj-cij.org How to operate this website with shadow-root (closed) attribute? I failed my test。
@nathankurtyka - You must run the driver with '--disable-site-isolation-trials' in this DOM. Besides, you are also wrongly selecting the node. This is the full script with the correct approach:
import nodriver as uc async def main(): browser_args = ['--disable-web-security','--disable-site-isolation-trials'] browser = await uc.start(browser_args=browser_args) tab = await browser.get("http://www.yescaptcha.cn/auth/login") tab.evaluate("document.querySelector('[title=reCAPTCHA]').contentWindow.document.querySelector('#recaptcha-anchor').click()") await tab.close() if __name__ == "__main__": uc.loop().run_until_complete(main())
JS is not needed when those browser_args are used :)
@nathankurtyka- Вы должны запустить драйвер с '-disable-site-isolation-trials' в этом DOM. Кроме того, вы также неправильно выбрали узел. Это полный скрипт с таким подходом:
import nodriver as uc async def main(): browser_args = ['--disable-web-security','--disable-site-isolation-trials'] browser = await uc.start(browser_args=browser_args) tab = await browser.get("http://www.yescaptcha.cn/auth/login") tab.evaluate("document.querySelector('[title=reCAPTCHA]').contentWindow.document.querySelector('#recaptcha-anchor').click()") await tab.close() if __name__ == "__main__": uc.loop().run_until_complete(main())
JS не нужен, когда использовались браузерные аргументы :)
Hello. I'm fiddling with the fancaptcha, first I need to click on the Next button, which is in the nested iframe, in the third iframe there is this button, I tried everything, I can’t click. Selenium clicks via swith.to.Frame. But I need it through nodriver, since he is not seen as a bot. In the photo, 1 frame is found, 2 and 3 are not, although he sees that there are 3 frames using the selector.
I'm having difficulty selecting an iframe and using click inside it, I'm using the find and click functions but they don't work inside the iframe. How do I select it?
I really think that nodriver is worth it and I am not going to get off the boat because of this simple obstacle.
My question is, how can I access an iframe within a website?