hansalemaos / a_selenium2df

Get all attributes from each Selenium element in record time
https://pypi.org/project/a-selenium2df/
MIT License
4 stars 2 forks source link
pandas python selenium

Get all attributes from each Selenium element in record time

# Tested with:
# https://github.com/ultrafunkamsterdam/undetected-chromedriver
# Python 3.9.13
# Windows 10

$pip install a-selenium2df

from a_selenium2df import get_df
from auto_download_undetected_chromedriver import download_undetected_chromedriver
import undetected_chromedriver as uc
from selenium.webdriver.support import expected_conditions
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.common.by import By

if __name__ == "__main__":
    folderchromedriver = "f:\\seleniumdriver2"
    path = download_undetected_chromedriver(folder_path_for_exe=folderchromedriver, undetected=True)
    driver = uc.Chrome(driver_executable_path=path)
    driver.get(r"https://www.whitehouse.gov/")

    df = get_df(driver, By, WebDriverWait, expected_conditions, queryselector="a", with_methods=False, )
    # Faster, but you can only use the standard methods from the webelements
    # The function parses the attributes of webelements much faster than
    # pure Selenium because everything is done in a single query
    # while Selenium sends a query for each item

    # %timeit get_df(driver, By, WebDriverWait, expected_conditions, queryselector="a", with_methods=False,    )
    # 289 ms ± 4.89 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)

    df = get_df(driver, By, WebDriverWait, expected_conditions, queryselector="a",
                with_methods=True, )  # Includes many methods methods, and takes care about frame switching
    # Slower, but you get many useful methods
    # %timeit get_df(driver, By, WebDriverWait, expected_conditions, queryselector="a", with_methods=True,    )  
    # 908 ms ± 40.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)

# Now you can use the power of pandas to locate webelements:
# print(df.loc[df.aa_outerText.str.contains('Vaccines.gov',na=False)].to_string())
#
#
# element      frame  elements_in_frame           aa_relList             aa_text                 aa_origin           aa_host       aa_hostname aa_pathname                    aa_href          aa_offsetParent aa_offsetTop aa_offsetLeft aa_offsetWidth aa_offsetHeight        aa_innerText        aa_outerText                                   aa_className                                   aa_classList        aa_innerHTML                                                                                                                                                                            aa_outerHTML aa_scrollWidth aa_scrollHeight aa_clientWidth aa_clientHeight aa_previousElementSibling aa_nextElementSibling            aa_parentNode         aa_parentElement  aa_firstChild   aa_lastChild aa_nextSibling      aa_textContent               aa_rel aa_firstElementChild aa_lastElementChild aa_childElementCount js_toString js_attachInternals js_blur js_click js_focus js_after js_animate js_append js_attachShadow js_before js_closest js_computedStyleMap js_getAttribute js_getAttributeNS js_getAttributeNames js_getAttributeNode js_getAttributeNodeNS js_getBoundingClientRect js_getClientRects js_getElementsByClassName js_getElementsByTagName js_getElementsByTagNameNS js_getInnerHTML js_hasAttribute js_hasAttributeNS js_hasAttributes js_hasPointerCapture js_insertAdjacentElement js_insertAdjacentHTML js_insertAdjacentText js_matches js_prepend js_querySelector js_querySelectorAll js_releasePointerCapture js_remove js_removeAttribute js_removeAttributeNS js_removeAttributeNode js_replaceChildren js_replaceWith js_requestFullscreen js_requestPointerLock js_scroll js_scrollBy js_scrollIntoView js_scrollIntoViewIfNeeded js_scrollTo js_setAttribute js_setAttributeNS js_setAttributeNode js_setAttributeNodeNS js_setPointerCapture js_toggleAttribute js_webkitMatchesSelector js_webkitRequestFullScreen js_webkitRequestFullscreen js_checkVisibility js_getAnimations js_setHTML js_appendChild js_cloneNode js_compareDocumentPosition js_contains js_getRootNode js_hasChildNodes js_insertBefore js_isDefaultNamespace js_isEqualNode js_isSameNode js_lookupNamespaceURI js_lookupPrefix js_normalize js_removeChild js_replaceChild js_addEventListener js_dispatchEvent js_removeEventListener js_wheel js_change_html_value se_send_keys se_find_elements se_find_element se_is_displayed se_is_enabled se_is_selected se_clear se_click se_switch_to_frame se_location_once_scrolled_into_view                                     se_get_screenshot_as_file se_screenshot                           aa_window_handle                             aa_window_switch
# 68   <selenium.webdriver.remote.webelement.WebElement (session="28e4140b6737882b119b3c693a8fd816", element="93eee670-1b4b-4896-b0e3-55357578413c")>  mainframe                122  noopener noreferrer  Visit Vaccines.gov  https://www.vaccines.gov  www.vaccines.gov  www.vaccines.gov           /  https://www.vaccines.gov/  [object HTMLDivElement]          333          <NA>            882              57  Visit Vaccines.gov  Visit Vaccines.gov  home-topper__btn home-topper__btn--mobile btn  home-topper__btn home-topper__btn--mobile btn  Visit Vaccines.gov  <a href="https://www.vaccines.gov/" class="home-topper__btn home-topper__btn--mobile btn" target="_blank" rel="noopener noreferrer">\n\t\t\t\t\t\t\tVisit Vaccines.gov\t\t\t\t\t\t</a>            878              53            878              53   [object HTMLDivElement]                  <NA>  [object HTMLDivElement]  [object HTMLDivElement]  [object Text]  [object Text]  [object Text]  Visit Vaccines.gov  noopener noreferrer                 <NA>                <NA>                 <NA>          ()                 ()      ()       ()       ()       ()         ()        ()              ()        ()         ()                  ()              ()                ()                   ()                  ()                    ()                       ()                ()                        ()                      ()                        ()              ()              ()                ()               ()                   ()                       ()                    ()                    ()         ()         ()               ()                  ()                       ()        ()                 ()                   ()                     ()                 ()             ()                   ()                    ()        ()          ()                ()                        ()          ()              ()                ()                  ()                    ()                   ()                 ()                       ()                         ()                         ()                 ()               ()         ()             ()           ()                         ()          ()             ()               ()              ()                    ()             ()            ()                    ()              ()           ()             ()              ()                  ()               ()                     ()       ()                   ()           ()               ()              ()              ()            ()             ()       ()       ()               None                                  ()   C:\Users\Gamer\anaconda3\envs\dfdir\seleniumpictures\68.png            ()  CDwindow-5EB51E090E4DDEB92ECFA74D9FBF90D3  CDwindow-5EB51E090E4DDEB92ECFA74D9FBF90D3()
# 69   <selenium.webdriver.remote.webelement.WebElement (session="28e4140b6737882b119b3c693a8fd816", element="e720ec81-d95e-4acd-88f6-3546937ec66d")>  mainframe                122  noopener noreferrer  Visit Vaccines.gov  https://www.vaccines.gov  www.vaccines.gov  www.vaccines.gov           /  https://www.vaccines.gov/                     <NA>         <NA>          <NA>           <NA>            <NA>  Visit Vaccines.gov  Visit Vaccines.gov                           home-topper__btn btn                           home-topper__btn btn  Visit Vaccines.gov                           <a href="https://www.vaccines.gov/" class="home-topper__btn btn" target="_blank" rel="noopener noreferrer">\n\t\t\t\t\t\t\tVisit Vaccines.gov\t\t\t\t\t\t</a>           <NA>            <NA>           <NA>            <NA>                      <NA>                  <NA>  [object HTMLDivElement]  [object HTMLDivElement]  [object Text]  [object Text]  [object Text]  Visit Vaccines.gov  noopener noreferrer                 <NA>                <NA>                 <NA>          ()                 ()      ()       ()       ()       ()         ()        ()              ()        ()         ()                  ()              ()                ()                   ()                  ()                    ()                       ()                ()                        ()                      ()                        ()              ()              ()                ()               ()                   ()                       ()                    ()                    ()         ()         ()               ()                  ()                       ()        ()                 ()                   ()                     ()                 ()             ()                   ()                    ()        ()          ()                ()                        ()          ()              ()                ()                  ()                    ()                   ()                 ()                       ()                         ()                         ()                 ()               ()         ()             ()           ()                         ()          ()             ()               ()              ()                    ()             ()            ()                    ()              ()           ()             ()              ()                  ()               ()                     ()       ()                   ()           ()               ()              ()              ()            ()             ()       ()       ()               None                                  ()   C:\Users\Gamer\anaconda3\envs\dfdir\seleniumpictures\69.png            ()  CDwindow-5EB51E090E4DDEB92ECFA74D9FBF90D3  CDwindow-5EB51E090E4DDEB92ECFA74D9FBF90D3()
# 101  <selenium.webdriver.remote.webelement.WebElement (session="28e4140b6737882b119b3c693a8fd816", element="bc9ffa93-6cd1-454f-8785-7967ebb8f91a")>  mainframe                122  noopener noreferrer  Visit Vaccines.gov  https://www.vaccines.gov  www.vaccines.gov  www.vaccines.gov           /  https://www.vaccines.gov/  [object HTMLDivElement]          149           137            264              57  Visit Vaccines.gov  Visit Vaccines.gov                                            btn                                            btn  Visit Vaccines.gov                                        <a href="https://www.vaccines.gov/" class="btn" target="_blank" rel="noopener noreferrer">\n\t\t\t\t\t\t\t\tVisit Vaccines.gov\t\t\t\t\t\t\t</a>            260              53            260              53    [object HTMLHRElement]                  <NA>  [object HTMLDivElement]  [object HTMLDivElement]  [object Text]  [object Text]  [object Text]  Visit Vaccines.gov  noopener noreferrer                 <NA>                <NA>                 <NA>          ()                 ()      ()       ()       ()       ()         ()        ()              ()        ()         ()                  ()              ()                ()                   ()                  ()                    ()                       ()                ()                        ()                      ()                        ()              ()              ()                ()               ()                   ()                       ()                    ()                    ()         ()         ()               ()                  ()                       ()        ()                 ()                   ()                     ()                 ()             ()                   ()                    ()        ()          ()                ()                        ()          ()              ()                ()                  ()                    ()                   ()                 ()                       ()                         ()                         ()                 ()               ()         ()             ()           ()                         ()          ()             ()               ()              ()                    ()             ()            ()                    ()              ()           ()             ()              ()                  ()               ()                     ()       ()                   ()           ()               ()              ()              ()            ()             ()       ()       ()               None                                  ()  C:\Users\Gamer\anaconda3\envs\dfdir\seleniumpictures\101.png            ()  CDwindow-5EB51E090E4DDEB92ECFA74D9FBF90D3  CDwindow-5EB51E090E4DDEB92ECFA74D9FBF90D3()
#

# print(df.loc[df.aa_outerText.str.contains('Vaccines.gov',na=False)].iloc[0]
# Here is a view of a single element
# Enough information to locate any item

# i n d e x █ index                               █ 68                                                                                                                                                                       █
# ████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████
# 0         █ element                             █ <selenium.webdriver.remote.webelement.WebElement (session="28e4140b6737882b119b3c693a8fd816", element="93eee670-1b4b-4896-b0e3-55357578413c")>                           █
# 1         █ frame                               █ mainframe                                                                                                                                                                █
# 2         █ elements_in_frame                   █ 122                                                                                                                                                                      █
# 3         █ aa_relList                          █ noopener noreferrer                                                                                                                                                      █
# 4         █ aa_text                             █ Visit Vaccines.gov                                                                                                                                                       █
# 5         █ aa_origin                           █ https://www.vaccines.gov                                                                                                                                                 █
# 6         █ aa_host                             █ www.vaccines.gov                                                                                                                                                         █
# 7         █ aa_hostname                         █ www.vaccines.gov                                                                                                                                                         █
# 8         █ aa_pathname                         █ /                                                                                                                                                                        █
# 9         █ aa_href                             █ https://www.vaccines.gov/                                                                                                                                                █
# 10        █ aa_offsetParent                     █ [object HTMLDivElement]                                                                                                                                                  █
# 11        █ aa_offsetTop                        █ 333                                                                                                                                                                      █
# 12        █ aa_offsetLeft                       █ <NA>                                                                                                                                                                     █
# 13        █ aa_offsetWidth                      █ 882                                                                                                                                                                      █
# 14        █ aa_offsetHeight                     █ 57                                                                                                                                                                       █
# 15        █ aa_innerText                        █ Visit Vaccines.gov                                                                                                                                                       █
# 16        █ aa_outerText                        █ Visit Vaccines.gov                                                                                                                                                       █
# 17        █ aa_className                        █ home-topper__btn home-topper__btn--mobile btn                                                                                                                            █
# 18        █ aa_classList                        █ home-topper__btn home-topper__btn--mobile btn                                                                                                                            █
# 19        █ aa_innerHTML                        █ Visit Vaccines.gov                                                                                                                                                       █
# 20        █ aa_outerHTML                        █ <a href="https://www.vaccines.gov/" class="home-topper__btn home-topper__btn--mobile btn" target="_blank" rel="noopener noreferrer">\n\t\t\t\t\tVisit Vaccines.gov</a>   █
# 21        █ aa_scrollWidth                      █ 878                                                                                                                                                                      █
# 22        █ aa_scrollHeight                     █ 53                                                                                                                                                                       █
# 23        █ aa_clientWidth                      █ 878                                                                                                                                                                      █
# 24        █ aa_clientHeight                     █ 53                                                                                                                                                                       █
# 25        █ aa_previousElementSibling           █ [object HTMLDivElement]                                                                                                                                                  █
# 26        █ aa_nextElementSibling               █ <NA>                                                                                                                                                                     █
# 27        █ aa_parentNode                       █ [object HTMLDivElement]                                                                                                                                                  █
# 28        █ aa_parentElement                    █ [object HTMLDivElement]                                                                                                                                                  █
# 29        █ aa_firstChild                       █ [object Text]                                                                                                                                                            █
# 30        █ aa_lastChild                        █ [object Text]                                                                                                                                                            █
# 31        █ aa_nextSibling                      █ [object Text]                                                                                                                                                            █
# 32        █ aa_textContent                      █ Visit Vaccines.gov                                                                                                                                                       █
# 33        █ aa_rel                              █ noopener noreferrer                                                                                                                                                      █
# 34        █ aa_firstElementChild                █ <NA>                                                                                                                                                                     █
# 35        █ aa_lastElementChild                 █ <NA>                                                                                                                                                                     █
# 36        █ aa_childElementCount                █ <NA>                                                                                                                                                                     █
# 37        █ js_toString                         █ ()                                                                                                                                                                       █
# 38        █ js_attachInternals                  █ ()                                                                                                                                                                       █
# 39        █ js_blur                             █ ()                                                                                                                                                                       █
# 40        █ js_click                            █ ()                                                                                                                                                                       █
# 41        █ js_focus                            █ ()                                                                                                                                                                       █
# 42        █ js_after                            █ ()                                                                                                                                                                       █
# 43        █ js_animate                          █ ()                                                                                                                                                                       █
# 44        █ js_append                           █ ()                                                                                                                                                                       █
# 45        █ js_attachShadow                     █ ()                                                                                                                                                                       █
# 46        █ js_before                           █ ()                                                                                                                                                                       █
# 47        █ js_closest                          █ ()                                                                                                                                                                       █
# 48        █ js_computedStyleMap                 █ ()                                                                                                                                                                       █
# 49        █ js_getAttribute                     █ ()                                                                                                                                                                       █
# 50        █ js_getAttributeNS                   █ ()                                                                                                                                                                       █
# 51        █ js_getAttributeNames                █ ()                                                                                                                                                                       █
# 52        █ js_getAttributeNode                 █ ()                                                                                                                                                                       █
# 53        █ js_getAttributeNodeNS               █ ()                                                                                                                                                                       █
# 54        █ js_getBoundingClientRect            █ ()                                                                                                                                                                       █
# 55        █ js_getClientRects                   █ ()                                                                                                                                                                       █
# 56        █ js_getElementsByClassName           █ ()                                                                                                                                                                       █
# 57        █ js_getElementsByTagName             █ ()                                                                                                                                                                       █
# 58        █ js_getElementsByTagNameNS           █ ()                                                                                                                                                                       █
# 59        █ js_getInnerHTML                     █ ()                                                                                                                                                                       █
# 60        █ js_hasAttribute                     █ ()                                                                                                                                                                       █
# 61        █ js_hasAttributeNS                   █ ()                                                                                                                                                                       █
# 62        █ js_hasAttributes                    █ ()                                                                                                                                                                       █
# 63        █ js_hasPointerCapture                █ ()                                                                                                                                                                       █
# 64        █ js_insertAdjacentElement            █ ()                                                                                                                                                                       █
# 65        █ js_insertAdjacentHTML               █ ()                                                                                                                                                                       █
# 66        █ js_insertAdjacentText               █ ()                                                                                                                                                                       █
# 67        █ js_matches                          █ ()                                                                                                                                                                       █
# 68        █ js_prepend                          █ ()                                                                                                                                                                       █
# 69        █ js_querySelector                    █ ()                                                                                                                                                                       █
# 70        █ js_querySelectorAll                 █ ()                                                                                                                                                                       █
# 71        █ js_releasePointerCapture            █ ()                                                                                                                                                                       █
# 72        █ js_remove                           █ ()                                                                                                                                                                       █
# 73        █ js_removeAttribute                  █ ()                                                                                                                                                                       █
# 74        █ js_removeAttributeNS                █ ()                                                                                                                                                                       █
# 75        █ js_removeAttributeNode              █ ()                                                                                                                                                                       █
# 76        █ js_replaceChildren                  █ ()                                                                                                                                                                       █
# 77        █ js_replaceWith                      █ ()                                                                                                                                                                       █
# 78        █ js_requestFullscreen                █ ()                                                                                                                                                                       █
# 79        █ js_requestPointerLock               █ ()                                                                                                                                                                       █
# 80        █ js_scroll                           █ ()                                                                                                                                                                       █
# 81        █ js_scrollBy                         █ ()                                                                                                                                                                       █
# 82        █ js_scrollIntoView                   █ ()                                                                                                                                                                       █
# 83        █ js_scrollIntoViewIfNeeded           █ ()                                                                                                                                                                       █
# 84        █ js_scrollTo                         █ ()                                                                                                                                                                       █
# 85        █ js_setAttribute                     █ ()                                                                                                                                                                       █
# 86        █ js_setAttributeNS                   █ ()                                                                                                                                                                       █
# 87        █ js_setAttributeNode                 █ ()                                                                                                                                                                       █
# 88        █ js_setAttributeNodeNS               █ ()                                                                                                                                                                       █
# 89        █ js_setPointerCapture                █ ()                                                                                                                                                                       █
# 90        █ js_toggleAttribute                  █ ()                                                                                                                                                                       █
# 91        █ js_webkitMatchesSelector            █ ()                                                                                                                                                                       █
# 92        █ js_webkitRequestFullScreen          █ ()                                                                                                                                                                       █
# 93        █ js_webkitRequestFullscreen          █ ()                                                                                                                                                                       █
# 94        █ js_checkVisibility                  █ ()                                                                                                                                                                       █
# 95        █ js_getAnimations                    █ ()                                                                                                                                                                       █
# 96        █ js_setHTML                          █ ()                                                                                                                                                                       █
# 97        █ js_appendChild                      █ ()                                                                                                                                                                       █
# 98        █ js_cloneNode                        █ ()                                                                                                                                                                       █
# 99        █ js_compareDocumentPosition          █ ()                                                                                                                                                                       █
# 100       █ js_contains                         █ ()                                                                                                                                                                       █
# 101       █ js_getRootNode                      █ ()                                                                                                                                                                       █
# 102       █ js_hasChildNodes                    █ ()                                                                                                                                                                       █
# 103       █ js_insertBefore                     █ ()                                                                                                                                                                       █
# 104       █ js_isDefaultNamespace               █ ()                                                                                                                                                                       █
# 105       █ js_isEqualNode                      █ ()                                                                                                                                                                       █
# 106       █ js_isSameNode                       █ ()                                                                                                                                                                       █
# 107       █ js_lookupNamespaceURI               █ ()                                                                                                                                                                       █
# 108       █ js_lookupPrefix                     █ ()                                                                                                                                                                       █
# 109       █ js_normalize                        █ ()                                                                                                                                                                       █
# 110       █ js_removeChild                      █ ()                                                                                                                                                                       █
# 111       █ js_replaceChild                     █ ()                                                                                                                                                                       █
# 112       █ js_addEventListener                 █ ()                                                                                                                                                                       █
# 113       █ js_dispatchEvent                    █ ()                                                                                                                                                                       █
# 114       █ js_removeEventListener              █ ()                                                                                                                                                                       █
# 115       █ js_wheel                            █ ()                                                                                                                                                                       █
# 116       █ js_change_html_value                █ ()                                                                                                                                                                       █
# 117       █ se_send_keys                        █ ()                                                                                                                                                                       █
# 118       █ se_find_elements                    █ ()                                                                                                                                                                       █
# 119       █ se_find_element                     █ ()                                                                                                                                                                       █
# 120       █ se_is_displayed                     █ ()                                                                                                                                                                       █
# 121       █ se_is_enabled                       █ ()                                                                                                                                                                       █
# 122       █ se_is_selected                      █ ()                                                                                                                                                                       █
# 123       █ se_clear                            █ ()                                                                                                                                                                       █
# 124       █ se_click                            █ ()                                                                                                                                                                       █
# 125       █ se_switch_to_frame                  █ None                                                                                                                                                                     █
# 126       █ se_location_once_scrolled_into_view █ ()                                                                                                                                                                       █
# 127       █ se_get_screenshot_as_file           █ C:\Users\Gamer\anaconda3\envs\dfdir\seleniumpictures\68.png                                                                                                              █
# 128       █ se_screenshot                       █ ()                                                                                                                                                                       █
# 129       █ aa_window_handle                    █ CDwindow-5EB51E090E4DDEB92ECFA74D9FBF90D3                                                                                                                                █
# 130       █ aa_window_switch                    █ CDwindow-5EB51E090E4DDEB92ECFA74D9FBF90D3()                                                                                                                              █
# i n d e x █ rows █
# ██████████████████
# Series    █ 131  █

# Here is one way of clicking on an item
# This method switches automatically to the right frame before clicking
# df.loc[df.aa_outerText.str.contains("Vaccines.gov", na=False)].iloc[0].js_click()