Closed Danp2 closed 1 year ago
The following code is a first attempt to improve the speed. There is room for additional improvements, so feel free to suggest changes.
#include "wd_helper.au3"
#include "wd_capabilities.au3"
#include <_HtmlTable2Array.au3>
#include <IE.au3>
$_WD_DEBUG=$_WD_DEBUG_None
_WD_Option("errormsgbox", (@Compiled = 1))
_WD_Option("OutputDebug", (@Compiled = 1))
local $sCapabilities=SetupGecko("")
_WD_Startup()
local $_WD_CreateSession=_WD_CreateSession($sCapabilities)
local $url="file:///"&stringreplace(@scriptdir&"\table.html","\","/")
_WD_Navigate($_WD_CreateSession,$url)
_WD_LoadWait($_WD_CreateSession)
local $hTimer,$TimerDiff
$hTimer = TimerInit()
$aTable = _WD_GetTable($_WD_CreateSession, "//table")
$TimerDiff=TimerDiff($hTimer)/1000
consolewrite(@crlf&"second : "&$TimerDiff&@crlf)
;~ the data is acquired slowly
_ArrayDisplay($aTable)
;~ do you like this solution?
$hTimer = TimerInit()
$Shell_Explorer_2_GUICreate=GUICreate("")
$oIE_se2=ObjCreate("Shell.Explorer.2")
GUICtrlCreateObj($oIE_se2,0,0)
_ienavigate($oIE_se2,"about:blank")
local $html=_WD_ExecuteScript($_WD_CreateSession,'return document.documentElement.outerHTML',Default,Default, $_WD_JSON_Value)
_IEDocWriteHTML($oIE_se2,$html)
local $oTable=_IETableGetCollection($oIE_se2,0)
$aTable = _IETableWriteToArray($oTable,true)
$TimerDiff=TimerDiff($hTimer)/1000
consolewrite(@crlf&"second : "&$TimerDiff&@crlf)
_ArrayDisplay($aTable)
; alt gettable
$hTimer = TimerInit()
$aTable = _WD_GetTable2($_WD_CreateSession, "//table")
$TimerDiff=TimerDiff($hTimer)/1000
consolewrite(@crlf&"second : "&$TimerDiff&@crlf) ;
_ArrayDisplay($aTable)
_WD_DeleteSession($_WD_CreateSession)
_WD_Shutdown()
Func _WD_GetTable2($sSession, $sBaseElement, $sRowsSelector = Default, $sColsSelector = Default)
Local Const $sFuncName = "_WD_GetTable2"
Local $aElements, $sElement, $iLines, $iRow, $iColumns, $iColumn, $sHTML
$_WD_HTTPRESULT = 0
$_WD_HTTPRESPONSE = ''
If $sRowsSelector = Default Then $sRowsSelector = "tr"
If $sColsSelector = Default Then $sColsSelector = "td, th"
; Get the table element
$sElement = _WD_FindElement($sSession, $_WD_LOCATOR_ByXPath, $sBaseElement)
; https://stackoverflow.com/questions/64842157
Local $sScript = "return [...arguments[0].querySelectorAll(arguments[1])]" & _
".map(row => [...row.querySelectorAll(arguments[2])]" & _
".map(cell => cell.textContent));"
Local $sArgs = __WD_JsonElement($sElement) & ', "' & $sRowsSelector & '", "' & $sColsSelector & '"'
Local $oResult = _WD_ExecuteScript($sSession, $sScript, $sArgs, Default, $_WD_JSON_Value)
Local $iRows = UBound($oResult, $UBOUND_ROWS)
Local $iCols = UBound($oResult[0], $UBOUND_ROWS)
Local $aTable[0][$iCols]
For $i = 0 To $iRows - 1 Step +1
_ArrayTranspose($oResult[$i])
_ArrayAdd($aTable, $oResult[$i])
Next
Return $aTable
EndFunc ;==>_WD_GetTable2
Func SetupGecko($bHeadless = False)
_WD_Option('Driver', 'geckodriver.exe')
_WD_Option('DriverParams', '--log trace')
_WD_Option('Port', 4444)
_WD_CapabilitiesStartup()
_WD_CapabilitiesAdd('alwaysMatch', 'firefox')
_WD_CapabilitiesAdd('browserName', 'firefox')
_WD_CapabilitiesAdd('acceptInsecureCerts', True)
; REMARKS
; When using 32bit geckodriver.exe, you may need to set 'binary' option.
; This shouldn't be needed when using 64bit geckodriver.exe,
; but at the same time setting it is not affecting the script.
Local $sPath = _WD_GetBrowserPath("firefox")
If Not @error Then
_WD_CapabilitiesAdd('binary', $sPath)
EndIf
If $bHeadless Then _WD_CapabilitiesAdd('args', '--headless')
_WD_CapabilitiesDump(@ScriptLineNumber) ; dump current Capabilities setting to console - only for testing in this demo
Local $sCapabilities = _WD_CapabilitiesGet()
Return $sCapabilities
EndFunc ;==>SetupGecko
HTML file can be downloaded from here
These are my results --
Method | Seconds |
---|---|
W/O HT2A | 170.46 |
With HT2A | 53.44 |
IE | 1.03 |
JS | 5.20 |
Edit: Reformatted and added results without _HtmlTable2Array
I was testing first version on each supported browser and I notice that IE MODE is slow. Will test it again with new version ASAP.
Here is an abbreviated example using a React table --
local $url="https://glin.github.io/reactable/articles/examples.html"
_WD_Navigate($sSession, $url)
_WD_LoadWait($sSession)
$sTable = "(//div[@role='table'])[1]"
$sRows = "div[role='row']"
$sCols = "div[role='row'] > div[class^='rt-t']"
$aTest1 = _WD_GetTable2($sSession, $sTable, $sRows, $sCols)
_ArrayDisplay($aTest1, "React Table #2")
JS 5.20
which part is related to JS test ?
These are my results --
Method Seconds W/O HT2A 170.46 With HT2A 53.44 IE 1.03 JS 5.20 Edit: Reformatted and added results without _HtmlTable2Array
I assume that IE version is only for testing and you do not plan to implement this feature ?
@mlipok Correct. The version using IE was suggested by a forum member as a way to improve speed. The new method uses JS to reduce the overhead of making repetitive calls to the webdriver.
See the JS code in _WD_GetTable2 above for an early version of the improved code. FYI, I just merged the latest revisions in #485.
Here's a further breakdown on processing time --
When | Seconds |
---|---|
Post JS | 2.0330771 |
Post AI | 5.7190732 |
The array processing in AutoIt is adding 3.5 seconds to the overall time it takes for the function to execute. Anyone have suggestions on how to reduce this even further? Maybe there's a way to modify the JS code so that we can avoid transposing the array. 🤔
so using: https://www.w3schools.com/html/html_tables.asp and:
[...document.querySelector('table#customers').querySelectorAll('tr')]
.map(row => [...row.querySelectorAll('td, th')]
.map(cell => cell.textContent));
Right... the result from JS is currently an array of arrays. This is then being manipulated in AU3 to create a multidimensional array, which slows down the overall function.
I was able to create such JS code:
var ElementSelector = 'table#customers'
var RowsSelector = 'tr'
var ColsSelector = 'td, th'
GetTable(ElementSelector, RowsSelector, ColsSelector)
function GetTable(ElementSelector, RowsSelector, ColsSelector) {
var text = '';
var row_set = document.querySelector(ElementSelector).querySelectorAll(RowsSelector);
for (let irow = 0, cells, current_row, cell_set; irow < row_set.length; irow++) {
console.log('irow = ' + irow);
current_row = row_set[irow];
cell_set = current_row.querySelectorAll(ColsSelector);
for (let icell = 0, current_cell; icell < cell_set.length; icell++) {
console.log('icell = ' + icell);
current_cell = cell_set[icell];
if (icell>0) {text += '|';}
text += current_cell.textContent;
}
text += '\n';
}
return text;
}
my base was: function GetOptions(SelectElement)
from _WD_ElementSelectAction()
it should be easy to use _ArrayAdd()
like it was used here:
https://github.com/Danp2/au3WebDriver/blob/90f909c0c1f8395469119cec81282276855b9c65/wd_helper.au3#L1670
Please perform further step to test it.
small improvements
var ElementSelector = 'table#customers'
var RowsSelector = 'tr'
var ColsSelector = 'td, th'
GetTable(ElementSelector, RowsSelector, ColsSelector)
function GetTable(ElementSelector, RowsSelector, ColsSelector) {
var text = '';
var row_set = document.querySelector(ElementSelector).querySelectorAll(RowsSelector);
for (let irow = 0, cells, current_row, cell_set; irow < row_set.length; irow++) {
if (irow>0) {text += '\n';}
current_row = row_set[irow];
cell_set = current_row.querySelectorAll(ColsSelector);
for (let icell = 0, current_cell; icell < cell_set.length; icell++) {
if (icell>0) {text += '|';}
current_cell = cell_set[icell];
text += current_cell.textContent;
}
}
return text;
}
my base was: function GetOptions(SelectElement) from _WD_ElementSelectAction()
it should be easy to use _ArrayAdd() like it was used here:
Thanks for your efforts. I'm unsure if this is a good direction to go since the number of columns isn't fixed like in the situation you referenced.
This works for basic array assignment --
Local $aResult = [[1, 2, 3, 4, 5], [2, 2, 3, 4, 5]]
_ArrayDisplay($aResult)
This is the same basic format that is returned via JS, except it is a string. So how can we make something like this work?
Local $sResponse = '[[1, 2, 3, 4, 5], [2, 2, 3, 4, 5]]'
Local $aResult = Execute($sResponse) ; or Eval(), etc.
_ArrayDisplay($aResult)
since the number of columns isn't fixed
Im not sure what exactly you mean.... English issues on my side.
Example please.
Do you mean that we can have many colums? But for desired table it is fixed ?
I mean that there are a known number of columns in the scenario you referenced (7 in the case of GetOptions), whereas the number of columns can vary depending on the target table with _WD_GetTable.
Let me know if that still isn't clear and I'll try again.
I found some code by @jguinch that quickly build the array from a string.
; https://www.autoitscript.com/forum/topic/179113-is-there-a-easy-way-to-parse-string-to-array/?do=findComment&comment=1285706
Func __Make2Array($s)
Local $aLines = StringRegExp($s, "(?<=[\[,])\s*\[(.*?)\]\s*[,\]]", 3), $iCountCols = 0
For $i = 0 To UBound($aLines) - 1
$aLines[$i] = StringRegExp($aLines[$i], "(?:^|,)\s*(?|'([^']*)'|""([^""]*)""|(.*?))(?=\s*(?:,|$))", 3)
If UBound($aLines[$i]) > $iCountCols Then $iCountCols = UBound($aLines[$i])
Next
Local $aRet[UBound($aLines)][$iCountCols]
For $y = 0 To UBound($aLines) - 1
For $x = 0 To UBound($aLines[$y]) - 1
$aRet[$y][$x] = ($aLines[$y])[$x]
Next
Next
Return $aRet
EndFunc
I mean that there are a known number of columns in the scenario you referenced (7 in the case of GetOptions), whereas the number of columns can vary depending on the target table with _WD_GetTable.
it still should be easy to use _ArrayAdd()
like it was used here:
https://github.com/Danp2/au3WebDriver/blob/90f909c0c1f8395469119cec81282276855b9c65/wd_helper.au3#L1669-L1670
with this modified JavaScript :
var ElementSelector = 'table#customers'
var RowsSelector = 'tr'
var ColsSelector = 'td, th'
GetTable(ElementSelector, RowsSelector, ColsSelector)
function GetTable(ElementSelector, RowsSelector, ColsSelector) {
var text = '';
var columns_cnt = 0;
var row_set = document.querySelector(ElementSelector).querySelectorAll(RowsSelector);
for (let irow = 0, current_row, cell_set; irow < row_set.length; irow++) {
if (irow>0) {text += '\n';}
current_row = row_set[irow];
cell_set = current_row.querySelectorAll(ColsSelector);
columns_cnt = cell_set.length
for (let icell = 0, current_cell; icell < cell_set.length; icell++) {
if (icell>0) {text += '|';}
current_cell = cell_set[icell];
text += current_cell.textContent;
}
}
var obj ={
text: text,
columns: columns_cnt
};
return obj;
}
Let me know if that still isn't clear and I'll try again.
let me know if that still isn't so easy and I'll try again ;)
btw.
do we still need to use _HtmlTableGetWriteToArray
?
There can be some issues with delimeters.
For example specific CELL
can contains \n
or even |
and such a case will brake our returned text
so try with this:
var ElementSelector = 'table#customers'
var RowsSelector = 'tr'
var ColsSelector = 'td, th'
GetTable(ElementSelector, RowsSelector, ColsSelector)
function GetTable(ElementSelector, RowsSelector, ColsSelector) {
var text = '';
var columns_cnt = 0;
var row_set = document.querySelector(ElementSelector).querySelectorAll(RowsSelector);
for (let irow = 0, current_row, cell_set; irow < row_set.length; irow++) {
if (irow>0) {text += '@\n@';}
current_row = row_set[irow];
cell_set = current_row.querySelectorAll(ColsSelector);
columns_cnt = cell_set.length
for (let icell = 0, current_cell; icell < cell_set.length; icell++) {
if (icell>0) {text += '@|@';}
current_cell = cell_set[icell];
text += current_cell.textContent;
}
}
var obj ={
text: text,
columns: columns_cnt
};
return obj;
}
and some improvements for error checking
var ElementSelector = 'table#customers'
var RowsSelector = 'tr'
var ColsSelector = 'td, th'
GetTable(ElementSelector, RowsSelector, ColsSelector)
function GetTable(ElementSelector, RowsSelector, ColsSelector) {
var text = '';
var table = document.querySelector(ElementSelector)
if (table == null) {
var obj ={text: '', columns: -1}
return obj;
}
var row_set = table.querySelectorAll(RowsSelector);
for (let irow = 0, current_row, cell_set; irow < row_set.length; irow++) {
if (irow>0) {text += '@\n@';}
current_row = row_set[irow];
cell_set = current_row.querySelectorAll(ColsSelector);
for (let icell = 0, current_cell; icell < cell_set.length; icell++) {
if (icell>0) {text += '@|@';}
current_cell = cell_set[icell];
text += current_cell.textContent;
}
}
if (row_set == null) {
var obj ={text: '', columns: -2}
} {
var obj ={
text: text,
columns: table.querySelector(RowsSelector).querySelectorAll(ColsSelector).length
}
};
return obj;
}
finall (I hope) error checking improvements
var ElementSelector = 'table#customers';
var RowsSelector = 'tr';
var ColsSelector = 'td, th';
GetTable(ElementSelector, RowsSelector, ColsSelector);
function GetTable(ElementSelector, RowsSelector, ColsSelector) {
var text = '';
var table = document.querySelector(ElementSelector);
if (table == null) {return {text: '', columns: -1};}
var row_set = table.querySelectorAll(RowsSelector);
if (row_set.length == 0) {return {text: '', columns: -2};}
for (let irow = 0, current_row, cell_set; irow < row_set.length; irow++) {
if (irow>0) {text += '@\n@';}
current_row = row_set[irow];
cell_set = current_row.querySelectorAll(ColsSelector);
if (cell_set.length == 0) {return {text: '', columns: -3};}
for (let icell = 0, current_cell; icell < cell_set.length; icell++) {
if (icell>0) {text += '@|@';}
current_cell = cell_set[icell];
text += current_cell.textContent;
}
}
var obj ={
text: text,
columns: table.querySelector(RowsSelector).querySelectorAll(ColsSelector).length
}
return obj;
}
in order to test it try to change each of this parameters:
var ElementSelector = 'table#customers'
var RowsSelector = 'tr'
var ColsSelector = 'td, th'
for example
do we need to return also rows
:
var obj ={
text: text,
columns: table.querySelector(RowsSelector).querySelectorAll(ColsSelector).length,
rows: .....
}
?
let me know if that still isn't so easy and I'll try again ;)
How is the target array declared in AutoIt if you don't know the number of columns?
do we still need to use _HtmlTableGetWriteToArray ?
No, see https://github.com/Danp2/au3WebDriver/pull/485/commits/983bc07a04059df94667a61989408e95702e8261.
I will commit my latest changes that include the usage of Make2Array
shortly, and then we can check to see if further error checking is needed.
let me know if that still isn't so easy and I'll try again ;)
How is the target array declared in AutoIt if you don't know the number of columns?
for this reason I added return not as text but as JSON object:
var obj ={
text: text,
columns: columns_cnt
};
and number of column is known.
@mlipok Please check out the latest update to the PR and try it out.
Which testing snippet is current ?
Try this --
#include "wd_helper.au3"
#include "wd_capabilities.au3"
#include <IE.au3>
$_WD_DEBUG=$_WD_DEBUG_None
_WD_Option("errormsgbox", (@Compiled = 1))
_WD_Option("OutputDebug", (@Compiled = 1))
local $sCapabilities=SetupGecko("")
_WD_Startup()
local $_WD_CreateSession=_WD_CreateSession($sCapabilities)
local $url="file:///"&stringreplace(@scriptdir&"\table.html","\","/")
_WD_Navigate($_WD_CreateSession,$url)
_WD_LoadWait($_WD_CreateSession)
local $hTimer,$TimerDiff
$hTimer = TimerInit()
$aTable = _WD_GetTable($_WD_CreateSession, "//table")
$TimerDiff=TimerDiff($hTimer)/1000
consolewrite(@crlf&"second : "&$TimerDiff&@crlf)
_ArrayDisplay($aTable, "AI")
;~ do you like this solution?
$hTimer = TimerInit()
$Shell_Explorer_2_GUICreate=GUICreate("")
$oIE_se2=ObjCreate("Shell.Explorer.2")
GUICtrlCreateObj($oIE_se2,0,0)
_ienavigate($oIE_se2,"about:blank")
local $html=_WD_ExecuteScript($_WD_CreateSession,'return document.documentElement.outerHTML',Default,Default, $_WD_JSON_Value)
_IEDocWriteHTML($oIE_se2,$html)
local $oTable=_IETableGetCollection($oIE_se2,0)
$aTable = _IETableWriteToArray($oTable,true)
$TimerDiff=TimerDiff($hTimer)/1000
consolewrite(@crlf&"second : "&$TimerDiff&@crlf)
_ArrayDisplay($aTable, "IE")
_WD_DeleteSession($_WD_CreateSession)
_WD_Shutdown()
Func SetupGecko($bHeadless = False)
_WD_Option('Driver', 'geckodriver.exe')
_WD_Option('DriverParams', '--log trace')
_WD_Option('Port', 4444)
_WD_CapabilitiesStartup()
_WD_CapabilitiesAdd('alwaysMatch', 'firefox')
_WD_CapabilitiesAdd('browserName', 'firefox')
_WD_CapabilitiesAdd('acceptInsecureCerts', True)
; REMARKS
; When using 32bit geckodriver.exe, you may need to set 'binary' option.
; This shouldn't be needed when using 64bit geckodriver.exe,
; but at the same time setting it is not affecting the script.
Local $sPath = _WD_GetBrowserPath("firefox")
If Not @error Then
_WD_CapabilitiesAdd('binary', $sPath)
EndIf
If $bHeadless Then _WD_CapabilitiesAdd('args', '--headless')
_WD_CapabilitiesDump(@ScriptLineNumber) ; dump current Capabilities setting to console - only for testing in this demo
Local $sCapabilities = _WD_CapabilitiesGet()
Return $sCapabilities
EndFunc ;==>SetupGecko
This demo shows a standard table and one with non-standard markers --
#include "wd_helper.au3"
#include "wd_capabilities.au3"
local $sCapabilities=SetupGecko("")
_WD_Startup()
local $sSession=_WD_CreateSession($sCapabilities)
local $url="https://glin.github.io/reactable/articles/examples.html"
_WD_Navigate($sSession,$url)
_WD_LoadWait($sSession)
$sTable = "(//div[@role='table'])[1]"
$sRows = "div[role='row']"
$sCols = "div[role='row'] > div[class^='rt-t']"
$aTest1 = _WD_GetTable($sSession, $sTable, $sRows, $sCols)
_ArrayDisplay($aTest1, "React Table")
$url="https://www.w3schools.com/html/html_tables.asp"
_WD_Navigate($sSession,$url)
_WD_LoadWait($sSession)
$sTable = "//table[@id='customers']"
$aTest2 = _WD_GetTable($sSession, $sTable)
_ArrayDisplay($aTest2, "Standard Table")
_WD_DeleteSession($sSession)
_WD_Shutdown()
Func SetupGecko($bHeadless = False)
_WD_Option('Driver', 'geckodriver.exe')
_WD_Option('DriverParams', '--log trace')
_WD_Option('Port', 4444)
_WD_CapabilitiesStartup()
_WD_CapabilitiesAdd('alwaysMatch', 'firefox')
_WD_CapabilitiesAdd('browserName', 'firefox')
_WD_CapabilitiesAdd('acceptInsecureCerts', True)
; REMARKS
; When using 32bit geckodriver.exe, you may need to set 'binary' option.
; This shouldn't be needed when using 64bit geckodriver.exe,
; but at the same time setting it is not affecting the script.
Local $sPath = _WD_GetBrowserPath("firefox")
If Not @error Then
_WD_CapabilitiesAdd('binary', $sPath)
ConsoleWrite("wd_demo.au3: _WD_GetBrowserPath() > " & $sPath & @CRLF)
EndIf
If $bHeadless Then _WD_CapabilitiesAdd('args', '--headless')
_WD_CapabilitiesDump(@ScriptLineNumber) ; dump current Capabilities setting to console - only for testing in this demo
Local $sCapabilities = _WD_CapabilitiesGet()
Return $sCapabilities
EndFunc ;==>SetupGecko
I'll check ASAP, probably tomorrow.
Hello. I just finish y testing with recent PR #485
here is my testing script:
#Region ; *** Dynamically added Include files ***
#include <Timers.au3> ; added:07/28/23 23:17:44
#EndRegion ; *** Dynamically added Include files ***
#include <Array.au3>
#include "..\wd_helper.au3"
#include "..\wd_capabilities.au3"
Global $b_AddSleep = True
Global $hTimer = TimerInit()
Global $sCapabilities = SetupGecko("")
_WD_Startup()
Global $sSession = _WD_CreateSession($sCapabilities)
_WD_DebugSwitch($_WD_DEBUG_None)
_Example()
_Example2()
_WD_DeleteSession($sSession)
_WD_Shutdown()
Func _Example()
Local $url = "https://glin.github.io/reactable/articles/examples.html"
_WD_Navigate($sSession, $url)
_WD_LoadWait($sSession)
Local $sTable = "(//div[@role='table'])[1]"
Local $sRows = "div[role='row']"
Local $sCols = "div[role='row'] > div[class^='rt-t']"
If $b_AddSleep Then Sleep(5000)
$hTimer = TimerInit()
Local $aTest1 = _WD_GetTable($sSession, $sTable, $sRows, $sCols)
#forceref $aTest1
_Log(@ScriptLineNumber & ' _WD_GetTable - react')
;~ _ArrayDisplay($aTest1, "React Table")
$url = "https://www.w3schools.com/html/html_tables.asp"
_WD_Navigate($sSession, $url)
_WD_LoadWait($sSession)
$sTable = "//table[@id='customers']"
If $b_AddSleep Then Sleep(5000)
$hTimer = TimerInit()
Local $aTest2 = _WD_GetTable($sSession, $sTable)
#forceref $aTest2
_Log(@ScriptLineNumber & ' _WD_GetTable - Standard Table - test 1')
;~ _ArrayDisplay($aTest2, "Standard Table")
If $b_AddSleep Then Sleep(5000)
If $b_AddSleep Then Sleep(5000)
$hTimer = TimerInit()
Local $aTest2b = _WD_GetTable($sSession, $sTable)
#forceref $aTest2b
_Log(@ScriptLineNumber & ' _WD_GetTable - Standard Table - test 2')
;~ _ArrayDisplay($aTest2b, "Standard Table")
EndFunc ;==>_Example
Func _Example2()
Local $url = "https://www.w3schools.com/html/html_tables.asp"
_WD_Navigate($sSession, $url)
_WD_LoadWait($sSession)
Local $sTable = "//table[@id='customers']"
If $b_AddSleep Then Sleep(5000)
$hTimer = TimerInit()
_WD_GetTable2($sSession, $sTable)
_Log(@ScriptLineNumber & ' _WD_GetTable2 - Standard Table - test 1 end')
If $b_AddSleep Then Sleep(5000)
If $b_AddSleep Then Sleep(5000)
$hTimer = TimerInit()
Local $aTest3 = _WD_GetTable2($sSession, $sTable)
#forceref $aTest3
_Log(@ScriptLineNumber & ' _WD_GetTable2 - Standard Table - test 2 end')
;~ _ArrayDisplay($aTest3, "Standard Table")
EndFunc ;==>_Example2
Func SetupGecko($bHeadless = False)
_WD_Option('Driver', 'geckodriver.exe')
_WD_Option('DriverParams', '--log trace')
_WD_Option('Port', 4444)
_WD_CapabilitiesStartup()
_WD_CapabilitiesAdd('alwaysMatch', 'firefox')
_WD_CapabilitiesAdd('browserName', 'firefox')
_WD_CapabilitiesAdd('acceptInsecureCerts', True)
; REMARKS
; When using 32bit geckodriver.exe, you may need to set 'binary' option.
; This shouldn't be needed when using 64bit geckodriver.exe,
; but at the same time setting it is not affecting the script.
Local $sPath = _WD_GetBrowserPath("firefox")
If Not @error Then
_WD_CapabilitiesAdd('binary', $sPath)
ConsoleWrite("wd_demo.au3: _WD_GetBrowserPath() > " & $sPath & @CRLF)
EndIf
If $bHeadless Then _WD_CapabilitiesAdd('args', '--headless')
_WD_CapabilitiesDump(@ScriptLineNumber) ; dump current Capabilities setting to console - only for testing in this demo
Local $sCapabilities = _WD_CapabilitiesGet()
Return $sCapabilities
EndFunc ;==>SetupGecko
Func _WD_GetTable2($sSession, $sBaseElement, $sRowsSelector = Default, $sColsSelector = Default)
Local Const $sFuncName = "_WD_GetTable2"
Local $aElements, $sElement, $iLines, $iRow, $sHTML
#forceref $aElements, $sElement, $iLines, $iRow, $sHTML
$_WD_HTTPRESULT = 0
$_WD_HTTPRESPONSE = ''
If $sRowsSelector = Default Then $sRowsSelector = "tr"
If $sColsSelector = Default Then $sColsSelector = "td, th"
Local Static $sJavaScript = _
"var ElementSelector = 'table#customers';" & @CRLF & _
"var RowsSelector = 'tr';" & @CRLF & _
"var ColsSelector = 'td, th';" & @CRLF & _
"var result = GetTable(ElementSelector, RowsSelector, ColsSelector);" & @CRLF & _
"return result;" & @CRLF & _
"function GetTable(ElementSelector, RowsSelector, ColsSelector) {" & @CRLF & _
" var retval;" & @CRLF & _
" var text = '';" & @CRLF & _
" var table = document.querySelector(ElementSelector);" & @CRLF & _
" if (table == null) {" & @CRLF & _
" retval = {text: '', columns: '-1'};" & @CRLF & _
" } {" & @CRLF & _
" var row_set = table.querySelectorAll(RowsSelector);" & @CRLF & _
" if (row_set.length == 0) {" & @CRLF & _
" retval = {text: '', columns: '-2'};" & @CRLF & _
" } {" & @CRLF & _
" for (var irow = 0, current_row, cell_set; irow < row_set.length; irow++) {" & @CRLF & _
" if (irow>0) {text += '@\n@';}" & @CRLF & _
" current_row = row_set[irow];" & @CRLF & _
" cell_set = current_row.querySelectorAll(ColsSelector);" & @CRLF & _
" if (cell_set.length == 0) {" & @CRLF & _
" retval = {text: '', columns: '-3'};" & @CRLF & _
" } {" & @CRLF & _
" for (var icell = 0, current_cell; icell < cell_set.length; icell++) {" & @CRLF & _
" if (icell>0) {text += '@|@';}" & @CRLF & _
" current_cell = cell_set[icell];" & @CRLF & _
" text += current_cell.textContent;" & @CRLF & _
" }" & @CRLF & _
" }" & @CRLF & _
" }" & @CRLF & _
" }" & @CRLF & _
" retval ={" & @CRLF & _
" text: text," & @CRLF & _
" columns: table.querySelector(RowsSelector).querySelectorAll(ColsSelector).length" & @CRLF & _
" }" & @CRLF & _
" }" & @CRLF & _
" return retval;" & @CRLF & _
"}" & @CRLF & _
""
; Get the table element
$sElement = _WD_FindElement($sSession, $_WD_LOCATOR_ByXPath, $sBaseElement)
Local $iErr = @error
If $iErr = $_WD_ERROR_Success Then
; https://stackoverflow.com/questions/64842157
;~ https://github.com/Danp2/au3WebDriver/issues/486#issuecomment-1649427932
;~ Local $sArgs = __WD_JsonElement($sElement) & ', "' & $sRowsSelector & '", "' & $sColsSelector & '"'
Local $sResponse = _WD_ExecuteScript($sSession, $sJavaScript)
$iErr = @error
If $iErr = $_WD_ERROR_Success Then
_Log(@ScriptLineNumber & ' _WD_GetTable2 - JS')
Local $oJSON = Json_Decode($sResponse)
; Extract target data from results and convert to array
Local $iColumns = Number(Json_Get($oJSON, "[value][columns]"))
Local $sText = Json_Get($oJSON, "[value][text]")
Local $aTable[0][$iColumns]
_ArrayAdd($aTable, $sText, 0, '@|@', '@' & @LF & '@', $ARRAYFILL_FORCE_SINGLEITEM)
EndIf
EndIf
Return SetError(__WD_Error($sFuncName, $iErr), 0, $aTable)
EndFunc ;==>_WD_GetTable2
Func _Log($s_Comment)
ConsoleWrite("! " & $s_Comment & ' >> ' & TimerDiff($hTimer) & ' ms' & @CRLF)
EndFunc ;==>_Log
and my results
! 36 _WD_GetTable - react >> 35.7476 ms
! 49 _WD_GetTable - Standard Table - test 1 >> 21.0249 ms
! 57 _WD_GetTable - Standard Table - test 2 >> 31.8675 ms
! 175 _WD_GetTable2 - JS >> 15.6884 ms
! 73 _WD_GetTable2 - Standard Table - test 1 end >> 18.1065 ms
! 175 _WD_GetTable2 - JS >> 27.8653 ms
! 80 _WD_GetTable2 - Standard Table - test 2 end >> 29.3002 ms
and here is my JS
var ElementSelector = 'table#customers';
var RowsSelector = 'tr';
var ColsSelector = 'td, th';
var result = GetTable(ElementSelector, RowsSelector, ColsSelector);
return result;
function GetTable(ElementSelector, RowsSelector, ColsSelector) {
var retval;
var text = '';
var table = document.querySelector(ElementSelector);
if (table == null) {
retval = {text: '', columns: '-1'};
} {
var row_set = table.querySelectorAll(RowsSelector);
if (row_set.length == 0) {
retval = {text: '', columns: '-2'};
} {
for (var irow = 0, current_row, cell_set; irow < row_set.length; irow++) {
if (irow>0) {text += '@\n@';}
current_row = row_set[irow];
cell_set = current_row.querySelectorAll(ColsSelector);
if (cell_set.length == 0) {
retval = {text: '', columns: '-3'};
} {
for (var icell = 0, current_cell; icell < cell_set.length; icell++) {
if (icell>0) {text += '@|@';}
current_cell = cell_set[icell];
text += current_cell.textContent;
}
}
}
}
retval ={
text: text,
columns: table.querySelector(RowsSelector).querySelectorAll(ColsSelector).length
}
}
return retval;
}
I think we need to measure different tables with many columns and many rows, on different browsers. EDIT: tests in progress....
qucik test:
#Region ; *** Dynamically added Include files ***
#include <Timers.au3> ; added:07/28/23 23:17:44
#EndRegion ; *** Dynamically added Include files ***
#include <Array.au3>
#include "..\wd_helper.au3"
#include "..\wd_capabilities.au3"
Global $b_AddSleep = True
Global $hTimer = TimerInit()
Global $sCapabilities = SetupGecko("")
_WD_Startup()
Global $sSession = _WD_CreateSession($sCapabilities)
_WD_DebugSwitch($_WD_DEBUG_None)
;~ _Example()
;~ _Example2()
_Example3()
_WD_DeleteSession($sSession)
_WD_Shutdown()
Func _Example()
Local $url = "https://glin.github.io/reactable/articles/examples.html"
_WD_Navigate($sSession, $url)
_WD_LoadWait($sSession)
Local $sTable = "(//div[@role='table'])[1]"
Local $sRows = "div[role='row']"
Local $sCols = "div[role='row'] > div[class^='rt-t']"
If $b_AddSleep Then Sleep(5000)
$hTimer = TimerInit()
Local $aTest1 = _WD_GetTable($sSession, $sTable, $sRows, $sCols)
#forceref $aTest1
_Log(@ScriptLineNumber & ' _WD_GetTable - react')
;~ _ArrayDisplay($aTest1, "React Table")
$url = "https://www.w3schools.com/html/html_tables.asp"
_WD_Navigate($sSession, $url)
_WD_LoadWait($sSession)
$sTable = "//table[@id='customers']"
If $b_AddSleep Then Sleep(5000)
$hTimer = TimerInit()
Local $aTest2 = _WD_GetTable($sSession, $sTable)
#forceref $aTest2
_Log(@ScriptLineNumber & ' _WD_GetTable - Standard Table - test 1')
;~ _ArrayDisplay($aTest2, "Standard Table")
If $b_AddSleep Then Sleep(5000)
If $b_AddSleep Then Sleep(5000)
$hTimer = TimerInit()
Local $aTest2b = _WD_GetTable($sSession, $sTable)
#forceref $aTest2b
_Log(@ScriptLineNumber & ' _WD_GetTable - Standard Table - test 2')
;~ _ArrayDisplay($aTest2b, "Standard Table")
EndFunc ;==>_Example
Func _Example2()
Local $url = "https://www.w3schools.com/html/html_tables.asp"
_WD_Navigate($sSession, $url)
_WD_LoadWait($sSession)
Local $sTable = "//table[@id='customers']"
If $b_AddSleep Then Sleep(5000)
$hTimer = TimerInit()
_WD_GetTable2($sSession, $sTable)
_Log(@ScriptLineNumber & ' _WD_GetTable2 - Standard Table - test 1 end')
If $b_AddSleep Then Sleep(5000)
If $b_AddSleep Then Sleep(5000)
$hTimer = TimerInit()
Local $aTest3 = _WD_GetTable2($sSession, $sTable)
#forceref $aTest3
_Log(@ScriptLineNumber & ' _WD_GetTable2 - Standard Table - test 2 end')
;~ _ArrayDisplay($aTest3, "Standard Table")
EndFunc ;==>_Example2
Func _Example3()
Local $url = "https://codebeautify.org/html-table-generator"
_WD_Navigate($sSession, $url)
_WD_LoadWait($sSession)
MsgBox($MB_TOPMOST, "", 'create table with 100 rows and 10 cols')
If $b_AddSleep Then Sleep(5000)
$hTimer = TimerInit()
Local $aTable_Big1 = _WD_GetTable($sSession, "//table[@id='demoTable']")
_Log(@ScriptLineNumber & ' Example3 - _WD_GetTable - test 1 end')
_ArrayDisplay($aTable_Big1)
If $b_AddSleep Then Sleep(5000)
$hTimer = TimerInit()
Local $aTable_Big2 = _WD_GetTable3($sSession, "table#demoTable")
_Log(@ScriptLineNumber & ' Example3 - _WD_GetTable2 - test 2 end')
_ArrayDisplay($aTable_Big2)
EndFunc ;==>_Example3
Func SetupGecko($bHeadless = False)
_WD_Option('Driver', 'geckodriver.exe')
_WD_Option('DriverParams', '--log trace')
_WD_Option('Port', 4444)
_WD_CapabilitiesStartup()
_WD_CapabilitiesAdd('alwaysMatch', 'firefox')
_WD_CapabilitiesAdd('browserName', 'firefox')
_WD_CapabilitiesAdd('acceptInsecureCerts', True)
; REMARKS
; When using 32bit geckodriver.exe, you may need to set 'binary' option.
; This shouldn't be needed when using 64bit geckodriver.exe,
; but at the same time setting it is not affecting the script.
Local $sPath = _WD_GetBrowserPath("firefox")
If Not @error Then
_WD_CapabilitiesAdd('binary', $sPath)
ConsoleWrite("wd_demo.au3: _WD_GetBrowserPath() > " & $sPath & @CRLF)
EndIf
If $bHeadless Then _WD_CapabilitiesAdd('args', '--headless')
_WD_CapabilitiesDump(@ScriptLineNumber) ; dump current Capabilities setting to console - only for testing in this demo
Local $sCapabilities = _WD_CapabilitiesGet()
Return $sCapabilities
EndFunc ;==>SetupGecko
Func _WD_GetTable2($sSession, $sBaseElement, $sRowsSelector = Default, $sColsSelector = Default)
Local Const $sFuncName = "_WD_GetTable2"
Local $aElements, $sElement, $iLines, $iRow, $sHTML
#forceref $aElements, $sElement, $iLines, $iRow, $sHTML
$_WD_HTTPRESULT = 0
$_WD_HTTPRESPONSE = ''
If $sRowsSelector = Default Then $sRowsSelector = "tr"
If $sColsSelector = Default Then $sColsSelector = "td, th"
Local Static $sJavaScript = _
"var ElementSelector = 'table#customers';" & @CRLF & _
"var RowsSelector = 'tr';" & @CRLF & _
"var ColsSelector = 'td, th';" & @CRLF & _
"var result = GetTable(ElementSelector, RowsSelector, ColsSelector);" & @CRLF & _
"return result;" & @CRLF & _
"function GetTable(ElementSelector, RowsSelector, ColsSelector) {" & @CRLF & _
" var retval;" & @CRLF & _
" var text = '';" & @CRLF & _
" var table = document.querySelector(ElementSelector);" & @CRLF & _
" if (table == null) {" & @CRLF & _
" retval = {text: '', columns: '-1'};" & @CRLF & _
" } {" & @CRLF & _
" var row_set = table.querySelectorAll(RowsSelector);" & @CRLF & _
" if (row_set.length == 0) {" & @CRLF & _
" retval = {text: '', columns: '-2'};" & @CRLF & _
" } {" & @CRLF & _
" for (var irow = 0, current_row, cell_set; irow < row_set.length; irow++) {" & @CRLF & _
" if (irow>0) {text += '@\n@';}" & @CRLF & _
" current_row = row_set[irow];" & @CRLF & _
" cell_set = current_row.querySelectorAll(ColsSelector);" & @CRLF & _
" if (cell_set.length == 0) {" & @CRLF & _
" retval = {text: '', columns: '-3'};" & @CRLF & _
" } {" & @CRLF & _
" for (var icell = 0, current_cell; icell < cell_set.length; icell++) {" & @CRLF & _
" if (icell>0) {text += '@|@';}" & @CRLF & _
" current_cell = cell_set[icell];" & @CRLF & _
" text += current_cell.textContent;" & @CRLF & _
" }" & @CRLF & _
" }" & @CRLF & _
" }" & @CRLF & _
" }" & @CRLF & _
" retval ={" & @CRLF & _
" text: text," & @CRLF & _
" columns: table.querySelector(RowsSelector).querySelectorAll(ColsSelector).length" & @CRLF & _
" }" & @CRLF & _
" }" & @CRLF & _
" return retval;" & @CRLF & _
"}" & @CRLF & _
""
; Get the table element
$sElement = _WD_FindElement($sSession, $_WD_LOCATOR_ByXPath, $sBaseElement)
Local $iErr = @error
If $iErr = $_WD_ERROR_Success Then
; https://stackoverflow.com/questions/64842157
;~ https://github.com/Danp2/au3WebDriver/issues/486#issuecomment-1649427932
;~ Local $sArgs = __WD_JsonElement($sElement) & ', "' & $sRowsSelector & '", "' & $sColsSelector & '"'
Local $sResponse = _WD_ExecuteScript($sSession, $sJavaScript)
$iErr = @error
If $iErr = $_WD_ERROR_Success Then
_Log(@ScriptLineNumber & ' _WD_GetTable2 - JS')
Local $oJSON = Json_Decode($sResponse)
; Extract target data from results and convert to array
Local $iColumns = Number(Json_Get($oJSON, "[value][columns]"))
Local $sText = Json_Get($oJSON, "[value][text]")
Local $aTable[0][$iColumns]
_ArrayAdd($aTable, $sText, 0, '@|@', '@' & @LF & '@', $ARRAYFILL_FORCE_SINGLEITEM)
Return SetError(__WD_Error($sFuncName, $iErr), 0, $aTable)
EndIf
EndIf
Return SetError(__WD_Error($sFuncName, $iErr), 0, '')
EndFunc ;==>_WD_GetTable2
Func _WD_GetTable3($sSession, $sBaseElement, $sRowsSelector = Default, $sColsSelector = Default)
Local Const $sFuncName = "_WD_GetTable3"
Local $aElements, $sElement, $iLines, $iRow, $sHTML
#forceref $aElements, $sElement, $iLines, $iRow, $sHTML
$_WD_HTTPRESULT = 0
$_WD_HTTPRESPONSE = ''
If $sRowsSelector = Default Then $sRowsSelector = "tr"
If $sColsSelector = Default Then $sColsSelector = "td, th"
Local Static $sJavaScript = _
"var ElementSelector = 'table#demoTable';" & @CRLF & _
"var RowsSelector = 'tr';" & @CRLF & _
"var ColsSelector = 'td, th';" & @CRLF & _
"var result = GetTable(ElementSelector, RowsSelector, ColsSelector);" & @CRLF & _
"return result;" & @CRLF & _
"function GetTable(ElementSelector, RowsSelector, ColsSelector) {" & @CRLF & _
" var retval;" & @CRLF & _
" var text = '';" & @CRLF & _
" var table = document.querySelector(ElementSelector);" & @CRLF & _
" if (table == null) {" & @CRLF & _
" retval = {text: '', columns: '-1'};" & @CRLF & _
" } {" & @CRLF & _
" var row_set = table.querySelectorAll(RowsSelector);" & @CRLF & _
" if (row_set.length == 0) {" & @CRLF & _
" retval = {text: '', columns: '-2'};" & @CRLF & _
" } {" & @CRLF & _
" for (var irow = 0, current_row, cell_set; irow < row_set.length; irow++) {" & @CRLF & _
" if (irow>0) {text += '@\n@';}" & @CRLF & _
" current_row = row_set[irow];" & @CRLF & _
" cell_set = current_row.querySelectorAll(ColsSelector);" & @CRLF & _
" if (cell_set.length == 0) {" & @CRLF & _
" retval = {text: '', columns: '-3'};" & @CRLF & _
" } {" & @CRLF & _
" for (var icell = 0, current_cell; icell < cell_set.length; icell++) {" & @CRLF & _
" if (icell>0) {text += '@|@';}" & @CRLF & _
" current_cell = cell_set[icell];" & @CRLF & _
" text += current_cell.textContent;" & @CRLF & _
" }" & @CRLF & _
" }" & @CRLF & _
" }" & @CRLF & _
" }" & @CRLF & _
" retval ={" & @CRLF & _
" text: text," & @CRLF & _
" columns: table.querySelector(RowsSelector).querySelectorAll(ColsSelector).length" & @CRLF & _
" }" & @CRLF & _
" }" & @CRLF & _
" return retval;" & @CRLF & _
"}" & @CRLF & _
""
; Get the table element
$sElement = _WD_FindElement($sSession, $_WD_LOCATOR_ByXPath, $sBaseElement)
;~ Local $iErr = @error
Local $iErr = 0
If $iErr = $_WD_ERROR_Success Then
; https://stackoverflow.com/questions/64842157
;~ https://github.com/Danp2/au3WebDriver/issues/486#issuecomment-1649427932
;~ Local $sArgs = __WD_JsonElement($sElement) & ', "' & $sRowsSelector & '", "' & $sColsSelector & '"'
Local $sResponse = _WD_ExecuteScript($sSession, $sJavaScript)
$iErr = @error
If $iErr = $_WD_ERROR_Success Then
_Log(@ScriptLineNumber & ' _WD_GetTable3 - JS')
Local $oJSON = Json_Decode($sResponse)
; Extract target data from results and convert to array
Local $iColumns = Number(Json_Get($oJSON, "[value][columns]"))
Local $sText = Json_Get($oJSON, "[value][text]")
Local $aTable[0][$iColumns]
_ArrayAdd($aTable, $sText, 0, '@|@', '@' & @LF & '@', $ARRAYFILL_FORCE_SINGLEITEM)
Return SetError(__WD_Error($sFuncName, $iErr), 0, $aTable)
EndIf
EndIf
Return SetError(__WD_Error($sFuncName, $iErr), 0, '')
EndFunc ;==>_WD_GetTable2
Func _Log($s_Comment)
ConsoleWrite("! " & $s_Comment & ' >> ' & TimerDiff($hTimer) & ' ms' & @CRLF)
EndFunc ;==>_Log
results:
! 95 Example3 - _WD_GetTable - test 1 end >> 85.4319 ms
! 273 _WD_GetTable3 - JS >> 21.0069 ms
! 101 Example3 - _WD_GetTable2 - test 2 end >> 27.7723 ms
I know that my _WD_GetTable2
and _WD_GetTable3
are hastily cobbled together functions, and that they must be brought into a fixed to follow UDF code style.
But do you agree that this is good direction ?
@mlipok I haven't verified this, but I suspect the bulk of the speed difference is the retrieval of the initial base element, which you are performing as part of the Javascript functionality. Since my version is making an extra webdriver call, it will naturally be slower. However, my version maintains compatibility while yours introduces a script breaking change (xpath vs CSS for the base selector).
However, my version maintains compatibility while yours introduces a script breaking change (xpath vs CSS for the base selector).
I know. And in the sentence below
and that they must be brought into a fixed to follow UDF code style.
I was trying to say that I must already to fix few things.
The main thing I'm asking is should I propose a separate PR that uses my JS way of getting the contents of the table.
I feel the current implementation is has sufficiently addressed the speed issue. Your version is 1-2 seconds faster in my brief testing, which is good. But the difference may reduce once you've addressed the compatibility issue, added error checking, etc. Then there's the concern if maintainability, compatibility, etc.
Keep working on your version if you think it's warranted. When you have finalized the changes, we can review & make a final decision.
Your version is 1-2 seconds faster in my brief testing,
This is true on small tables. Did you test it on 100 cells and 1000 cells ?
Yes, I tested it on the table that started this whole discussion, which is 20 cols and 1000 rows. Your version is faster at around 203 ms per run. Mine averages around 1968 ms.
Here's some interesting results. I went back and retested the prior release of _WD_GetTable
, and it appears that _HtmlTableGetWriteToArray
was causing the slowdown.
With _HtmlTableGetWriteToArray -- 54998 ms avg W/O _HtmlTableGetWriteToArray -- 1288 ms avg
So my new version is actually slowing that the older version (at least in this scenario). 🤔
I was wondering how to get table from here: https://commondatastorage.googleapis.com/chromium-browser-snapshots/index.html?prefix=Win/
But as I see this is dynamically filled table it needs to go down to the last element.
please take a look:
#Region ; *** Dynamically added Include files ***
#include <Timers.au3> ; added:07/28/23 23:17:44
#EndRegion ; *** Dynamically added Include files ***
#include <Array.au3>
#include "..\wd_helper.au3"
#include "..\wd_capabilities.au3"
Global $b_AddSleep = True
Global $hTimer = TimerInit()
Global $sCapabilities = SetupGecko("")
_WD_Startup()
Global $sSession = _WD_CreateSession($sCapabilities)
_WD_DebugSwitch($_WD_DEBUG_None)
_Example4()
_WD_DeleteSession($sSession)
_WD_Shutdown()
Func _Example4()
Local $url = "https://commondatastorage.googleapis.com/chromium-browser-snapshots/index.html?prefix=Win/"
_WD_Navigate($sSession, $url)
If $b_AddSleep Then Sleep(5000)
_WD_LoadWait($sSession)
_WD_WaitElement($sSession, $_WD_LOCATOR_ByXPath, '//img[@class="loader-spinner ng-hide" and @ng-show="loading"]', Default, 3 * 60 * 1000)
Local $sRowsSelector = "div[ng-style='headerScrollerStyle()'], div[ng-style='rowStyle(row)']"
Local $sColsSelector = "div[ng-repeat='col in renderedColumns'], div[ng-repeat='row in renderedColumns']"
Local $sXpath = "//div[@ng-grid='gridOptions']"
$hTimer = TimerInit()
Local $aTable_Big2 = _WD_GetTableEx($sSession, $_WD_LOCATOR_ByXPath, $sXpath, $sRowsSelector, $sColsSelector)
_Log(@ScriptLineNumber & ' Example4 - _WD_GetTableEx - test 1 end')
_ArrayDisplay($aTable_Big2)
$hTimer = TimerInit()
Local $aTable_Big3 = _WD_GetTableEx($sSession, $_WD_LOCATOR_ByCSSSelector, "div[ng-grid='gridOptions']", $sRowsSelector, $sColsSelector)
_Log(@ScriptLineNumber & ' Example4 - _WD_GetTableEx - test 2 end')
_ArrayDisplay($aTable_Big3)
$hTimer = TimerInit()
Local $aTable_Big1 = _WD_GetTable_Orginall($sSession, $sXpath, $sRowsSelector, $sColsSelector)
_Log(@ScriptLineNumber & ' Example4 - _WD_GetTable_Orginall - test 3 end')
_ArrayDisplay($aTable_Big1)
EndFunc ;==>_Example4
Func SetupGecko($bHeadless = False)
_WD_Option('Driver', 'geckodriver.exe')
_WD_Option('DriverParams', '--log trace')
_WD_Option('Port', 4444)
_WD_CapabilitiesStartup()
_WD_CapabilitiesAdd('alwaysMatch', 'firefox')
_WD_CapabilitiesAdd('browserName', 'firefox')
_WD_CapabilitiesAdd('acceptInsecureCerts', True)
; REMARKS
; When using 32bit geckodriver.exe, you may need to set 'binary' option.
; This shouldn't be needed when using 64bit geckodriver.exe,
; but at the same time setting it is not affecting the script.
Local $sPath = _WD_GetBrowserPath("firefox")
If Not @error Then
_WD_CapabilitiesAdd('binary', $sPath)
ConsoleWrite("wd_demo.au3: _WD_GetBrowserPath() > " & $sPath & @CRLF)
EndIf
If $bHeadless Then _WD_CapabilitiesAdd('args', '--headless')
_WD_CapabilitiesDump(@ScriptLineNumber) ; dump current Capabilities setting to console - only for testing in this demo
Local $sCapabilities = _WD_CapabilitiesGet()
Return $sCapabilities
EndFunc ;==>SetupGecko
; #FUNCTION# ====================================================================================================================
; Name ..........: _WD_GetTableEx
; Description ...: Return all elements of a table.
; Syntax ........: _WD_GetTableEx($sSession, $sStrategy, $sTableElementLocator[, $sRowsSelector = Default[, $sColsSelector = Default]])
; Parameters ....: $sSession - Session ID from _WD_CreateSession
; $sStrategy - Locator strategy. See defined constant $_WD_LOCATOR_* for allowed values.
; $sTableElementLocator - Indicates how the WebDriver should traverse through the HTML DOM to locate the desired element.
; $sRowsSelector - [optional] Rows CSS selector. Default is "tr".
; $sColsSelector - [optional] Columns CSS selector. Default is "td, th".
; Return values .: Success - 2D array.
; Failure - "" (empty string) and sets @error to one of the following values:
; - $_WD_ERROR_Exception
; - $_WD_ERROR_NoMatch
; Author ........: danylarson
; Modified ......: water, danp2, mLipok
; Remarks .......:
; Related .......: _WD_FindElement, _WD_ElementAction, _WD_LastHTTPResult
; Link ..........: https://www.autoitscript.com/forum/topic/191990-webdriver-udf-w3c-compliant-version-01182020/page/18/?tab=comments#comment-1415164
; Example .......: No
; ===============================================================================================================================
Func _WD_GetTableEx($sSession, $sStrategy, $sTableElementLocator, $sRowsSelector = Default, $sColsSelector = Default)
Local Const $sFuncName = "_WD_GetTableEx"
Local $sElement, $aTable = ''
If $sRowsSelector = Default Then $sRowsSelector = "tr"
If $sColsSelector = Default Then $sColsSelector = "td, th"
; Get the table element
$sElement = _WD_FindElement($sSession, $sStrategy, $sTableElementLocator)
Local $iErr = @error
If $iErr = $_WD_ERROR_Success Then
$_WD_HTTPRESULT = 0
$_WD_HTTPRESPONSE = ''
; https://stackoverflow.com/questions/64842157
Local Static $sJavaScript = _
"var TableElement = arguments[0];" & @CRLF & _
"var RowsSelector = arguments[1];" & @CRLF & _
"var ColsSelector = arguments[2];" & @CRLF & _
"var result = GetTable(TableElement, RowsSelector, ColsSelector);" & @CRLF & _
"return result;" & @CRLF & _
"" & @CRLF & _
"function GetTable(TableElement, RowsSelector, ColsSelector) {" & @CRLF & _
" var retval;" & @CRLF & _
" var text = '';" & @CRLF & _
" var table = TableElement;" & @CRLF & _
" if (table == null) {" & @CRLF & _
" retval = {text: '', columns: '-1'};" & @CRLF & _
" } {" & @CRLF & _
" var row_set = table.querySelectorAll(RowsSelector);" & @CRLF & _
" if (row_set.length == 0) {" & @CRLF & _
" retval = {text: '', columns: '-2'};" & @CRLF & _
" } {" & @CRLF & _
" for (var irow = 0, current_row, cell_set; irow < row_set.length; irow++) {" & @CRLF & _
" if (irow>0) {text += '@\n@';}" & @CRLF & _
" current_row = row_set[irow];" & @CRLF & _
" cell_set = current_row.querySelectorAll(ColsSelector);" & @CRLF & _
" if (cell_set.length == 0) {" & @CRLF & _
" retval = {text: '', columns: '-3'};" & @CRLF & _
" } {" & @CRLF & _
" for (var icell = 0, current_cell; icell < cell_set.length; icell++) {" & @CRLF & _
" if (icell>0) {text += '@|@';}" & @CRLF & _
" current_cell = cell_set[icell];" & @CRLF & _
" text += current_cell.textContent;" & @CRLF & _
" }" & @CRLF & _
" }" & @CRLF & _
" }" & @CRLF & _
" }" & @CRLF & _
" retval ={" & @CRLF & _
" text: text," & @CRLF & _
" columns: table.querySelector(RowsSelector).querySelectorAll(ColsSelector).length" & @CRLF & _
" }" & @CRLF & _
" }" & @CRLF & _
" return retval;" & @CRLF & _
"}" & @CRLF & _
""
Local $sArgs = __WD_JsonElement($sElement) & ', "' & $sRowsSelector & '", "' & $sColsSelector & '"'
ConsoleWrite("! " & @CRLF)
ConsoleWrite($sJavaScript & @CRLF)
ConsoleWrite("! " & @CRLF)
Local $sResponse = _WD_ExecuteScript($sSession, $sJavaScript, $sArgs)
$iErr = @error
If $iErr = $_WD_ERROR_Success Then
_Log(@ScriptLineNumber & ' _WD_GetTableEx - JS')
Local $oJSON = Json_Decode($sResponse)
; Extract target data from results and convert to array
Local $iColumns = Number(Json_Get($oJSON, "[value][columns]"))
Local $sText = Json_Get($oJSON, "[value][text]")
Local $aTemp[0][$iColumns] ; accurate size temp array
_ArrayAdd($aTemp, $sText, 0, '@|@', '@' & @LF & '@', $ARRAYFILL_FORCE_SINGLEITEM)
$aTable = $aTemp
EndIf
EndIf
Return SetError(__WD_Error($sFuncName, $iErr), 0, $aTable)
EndFunc ;==>_WD_GetTableEx
; #FUNCTION# ====================================================================================================================
; Name ..........: _WD_GetTable_Orginall
; Description ...: Return all elements of a table.
; Syntax ........: _WD_GetTable_Orginall($sSession, $sBaseElement[, $sRowsXpath = Default[, $sColsXpath = Default[, $sCellsXpath = Default]]])
; Parameters ....: $sSession - Session ID from _WD_CreateSession
; $sBaseElement - XPath of the table to return
; $sRowsSelector - [optional] Rows CSS selector. Default is "tr".
; $sColsSelector - [optional] Columns CSS selector. Default is "td, th".
; Return values .: Success - 2D array.
; Failure - "" (empty string) and sets @error to one of the following values:
; - $_WD_ERROR_Exception
; - $_WD_ERROR_NoMatch
; Author ........: danylarson
; Modified ......: water, danp2
; Remarks .......:
; Related .......: _WD_FindElement, _WD_ElementAction, _WD_LastHTTPResult
; Link ..........: https://www.autoitscript.com/forum/topic/191990-webdriver-udf-w3c-compliant-version-01182020/page/18/?tab=comments#comment-1415164
; Example .......: No
Func _WD_GetTable_Orginall($sSession, $sBaseElement, $sRowsSelector = Default, $sColsSelector = Default)
Local Const $sFuncName = "_WD_GetTable_Orginall"
Local $aElements, $sElement, $iLines, $iRow, $iColumns, $iColumn, $sHTML, $aTable = ''
#forceref $aElements, $sElement, $iLines, $iRow, $iColumns, $iColumn, $sHTML, $aTable
$_WD_HTTPRESULT = 0
$_WD_HTTPRESPONSE = ''
If $sRowsSelector = Default Then $sRowsSelector = "tr"
If $sColsSelector = Default Then $sColsSelector = "td, th"
; Get the table element
$sElement = _WD_FindElement($sSession, $_WD_LOCATOR_ByXPath, $sBaseElement)
Local $iErr = @error
If $iErr = $_WD_ERROR_Success Then
; https://stackoverflow.com/questions/64842157
Local $sScript = "return [...arguments[0].querySelectorAll(arguments[1])]" & _
".map(row => [...row.querySelectorAll(arguments[2])]" & _
".map(cell => cell.textContent));"
Local $sArgs = __WD_JsonElement($sElement) & ', "' & $sRowsSelector & '", "' & $sColsSelector & '"'
Local $sResult = _WD_ExecuteScript($sSession, $sScript, $sArgs)
$iErr = @error
_Log(@ScriptLineNumber & ' _WD_GetTable_Orginall - JS')
If $iErr = $_WD_ERROR_Success Then
; Extract target data from results and convert to array
Local $sStr = StringMid($sResult, 10, StringLen($sResult) - 11)
$aTable = __Make2Array($sStr)
EndIf
EndIf
Return SetError(__WD_Error($sFuncName, $iErr), 0, $aTable)
EndFunc ;==>_WD_GetTable_Orginall
Func _Log($s_Comment)
ConsoleWrite("! " & $s_Comment & ' >> ' & TimerDiff($hTimer) & ' ms' & @CRLF)
EndFunc ;==>_Log
Especially on this part:
$hTimer = TimerInit()
Local $aTable_Big1 = _WD_GetTable_Orginall($sSession, $sXpath, $sRowsSelector, $sColsSelector)
_Log(@ScriptLineNumber & ' Example4 - _WD_GetTable_Orginall - test 3 end')
_ArrayDisplay($aTable_Big1)
The _WD_GetTable_Orginall
is a copy of _WD_GetTable
with added _Log(@ScriptLineNumber & ' _WD_GetTable_Orginall - JS')
you will notice that yours implementation gets less rows.
btw.
I also plan to add new parameter to get InnerHTML
from cells instead of text.
This will give you possibility to get links or diagnose if in rows/cell is specific <img>
for example check mark.
you will notice that yours implementation gets less rows.
I committed a fix for this yesterday, so please retest with the latest code and let me know if you still see an issue with the number of rows.
I committed a fix for this yesterday, so please retest with the latest code and let me know if you still see an issue with the number of rows.
Issue fixed.
These are my results --
     ! 165 _WD_GetTableEx - JS >> 20.3798 ms
     ! 40 Example4 - _WD_GetTableEx - test 1 end >> 25.0085 ms
     ! 165 _WD_GetTableEx - JS >> 45.0395 ms
     ! 45 Example4 - _WD_GetTableEx - test 2 end >> 49.4001 ms
     ! 220 _WD_GetTable_Original - JS >> 71.303 ms
     ! 50 Example4 - _WD_GetTable_Original - test 3 end >> 73.3695 ms
Have you investigated why only the first 33 rows are returned?
Feature request
Is your feature request related to a problem? Please describe
Original discussion here ==> https://www.autoitscript.com/forum/topic/208640-webdriver-udf-help-support-iv/?do=findComment&comment=1521243
Describe the solution you'd like
Use Javascript to perform the majority of the HTML processing
Describe alternatives you've considered
See original discussion
Additional context