Closed milahu closed 8 months ago
https://github.com/kaliiiiiiiiii/Selenium-Driverless/blob/24a3513305f833fac600ec8e31bcd5e9df955162/src/selenium_driverless/types/webelement.py#L285-L287 https://developer.mozilla.org/en-US/docs/Web/API/Element/getElementsByTagName
https://github.com/kaliiiiiiiiii/Selenium-Driverless/blob/24a3513305f833fac600ec8e31bcd5e9df955162/src/selenium_driverless/types/deserialize.py#L481 https://github.com/kaliiiiiiiiii/Selenium-Driverless/blob/24a3513305f833fac600ec8e31bcd5e9df955162/src/selenium_driverless/types/deserialize.py#L276
@milahu Do you see any issue here?
WebElement("None", obj_id=None, node_id="None", backend_node_id=10, context_id=None)
I don't see any issue with that. The deserialisation from Chrome just doesn't return more information for nested values than backendNodeId
on it, which is just some reference to the element.
@milahu
Is there any other issue with By.TAG_NAME
you encountered?
I don't see any issue with that.
all variants of driver.find_element
should return the same result for the same element
> document.getElementsByTagName("b")[0] === document.querySelectorAll("b")[0]
true
selenium_driverless/types/webelement.py
if by == By.TAG_NAME:
return await self.execute_script("return obj.getElementsByTagName(arguments[0])",
value, serialization="deep", unique_context=True, timeout=10)
elif by == By.CSS_SELECTOR:
return await self.execute_script("return obj.querySelectorAll(arguments[0])", value, timeout=10,
unique_context=True)
so getElementsByTagName
and querySelectorAll
are wrapped in the same code
i guess the problem is the different type of return value
> document.getElementsByTagName("b")
HTMLCollection [b#b1, b1: b#b1]
> document.querySelectorAll("b")
NodeList [b#b1]
selenium_driverless/types/deserialize.py
elif _type == 'htmlcollection':
_res = JSNodeList(obj_id=obj_id, target=target, class_name=class_name, isolated_exec_id=isolated_exec_id,
frame_id=frame_id)
for idx, _deep in enumerate(_value):
_res.append(
await parse_deep(_deep, target, isolated_exec_id=isolated_exec_id, frame_id=frame_id, loop=loop))
return _res
if class_name in ['NodeList']:
elems = []
obj = JSRemoteObj(target=target, obj_id=obj_id,
isolated_exec_id=isolated_exec_id, frame_id=frame_id)
for idx in range(int(description[-2])):
elems.append(await obj.__exec__("return obj[arguments[0]]", idx, serialization="deep",
execution_context_id=context_id))
return elems
fix: use the same code for NodeList
and HTMLCollection
- if class_name in ['NodeList']:
+ if class_name in ['NodeList', 'HTMLCollection']:
... and remove the elif _type == 'htmlcollection'
block
so both return a list
of WebElement
side note:
i just noticed that find_element
is wasteful
because it calls find_elements
to get all elements with querySelectorAll
etc
instead of calling querySelector
to get only the first element
all variants of driver.find_element should return the same result for the same element
Yeah that would be the optimal case. Unfortunately tho, it's just not the way Chromium deserialises. Ofc we could fetch that information by parsing the information to JSON already at JS-level.
document.getElementsByTagName("b") HTMLCollection [b#b1, b1: b#b1]
Huh, If I remember it correctly tho, the serialised HTMLCollection
is a list of _type=node
We'll have to do some debugging to check what we actually get at https://github.com/kaliiiiiiiiii/Selenium-Driverless/blob/24a3513305f833fac600ec8e31bcd5e9df955162/src/selenium_driverless/types/deserialize.py#L652-L658
side note: i just noticed that find_element is wasteful
Yep, that indeed is the case, and to be resolved.
see here, that the other variants of find_element
do additional calls to get the elements as the serialised types don't include any RemoteObject references to the elements
push (ninja edits)
fix: use the same code for
NodeList
andHTMLCollection
- if class_name in ['NodeList']: + if class_name in ['NodeList', 'HTMLCollection']:
... and remove the
elif _type == 'htmlcollection'
block so both return alist
ofWebElement
driver.find_element(By.TAG_NAME, "...")
fails to find elementsall other variants of
driver.find_element
workthis was part of #144
repro
```py #!/usr/bin/env python3 import asyncio import base64 import sys import os import time import datetime import traceback from selenium_driverless import webdriver from selenium_driverless.types.by import By from cdp_socket.exceptions import CDPError from selenium_driverless.types.webelement import NoSuchElementException async def main(): async with webdriver.Chrome() as driver: url = "data:text/html,asdf" await driver.get(url) # wait for page load await asyncio.sleep(1) # find elem # FIXME WebElement("None", obj_id=None, node_id="None", backend_node_id=10, context_id=None) for tag_name in ["b", "B"]: try: elem = await driver.find_element(By.TAG_NAME, tag_name) elem = f"{repr(elem)}: text={repr(await elem.text)}" except NoSuchElementException as exc: elem = exc print(f"found elem by tag name {tag_name}: {elem}") # FIXME NoSuchElementException try: elem = await driver.find_element(By.CSS_SELECTOR, "b") elem = f"{repr(elem)}: text={repr(await elem.text)}" except NoSuchElementException as exc: elem = exc print(f"found elem by css selector: {elem}") for xpath in ["//b", "/html/body/b", '//*[@id="b1"]']: try: elem = await driver.find_element(By.XPATH, "//b") elem = f"{repr(elem)}: text={repr(await elem.text)}" except NoSuchElementException as exc: elem = exc print(f"found elem by xpath {xpath}: {elem}") try: elem = await driver.find_element(By.ID, "b1") elem = f"{repr(elem)}: text={repr(await elem.text)}" except NoSuchElementException as exc: elem = exc print(f"found elem by id: {elem}") #await asyncio.sleep(99999) asyncio.run(main()) ``` example output ``` found elem by tag name b: WebElement("None", obj_id=None, node_id="None", backend_node_id=6, context_id=None): text='asdf' found elem by tag name B: WebElement("None", obj_id=None, node_id="None", backend_node_id=6, context_id=None): text='asdf' found elem by css selector: WebElement("HTMLElement", obj_id=-6200668007811274967.3.7, node_id="None", backend_node_id=6, context_id=3): text='asdf' found elem by xpath //b: WebElement("HTMLElement", obj_id=-6200668007811274967.3.9, node_id="None", backend_node_id=6, context_id=3): text='asdf' found elem by xpath /html/body/b: WebElement("HTMLElement", obj_id=-6200668007811274967.3.11, node_id="None", backend_node_id=6, context_id=3): text='asdf' found elem by xpath //*[@id="b1"]: WebElement("HTMLElement", obj_id=-6200668007811274967.3.13, node_id="None", backend_node_id=6, context_id=3): text='asdf' found elem by id: WebElement("HTMLElement", obj_id=-6200668007811274967.3.15, node_id="None", backend_node_id=6, context_id=3): text='asdf' ```