Open zdavatz opened 2 weeks ago
require 'selenium-webdriver'
# Check if a five-digit number is provided as a command-line argument
if ARGV.length != 1 || !ARGV[0].match(/\A\d{5}\z/)
puts "Usage: ruby search_certificates.rb <five-digit-number>"
exit 1
end
# Extract the five-digit number from the command-line argument
search_number = ARGV[0]
# Set up the Selenium WebDriver (e.g., ChromeDriver)
driver = Selenium::WebDriver.for :chrome
wait = Selenium::WebDriver::Wait.new(timeout: 60) # Increase wait time to 60 seconds
# Navigate to the webpage
driver.get 'https://www.swissreg.ch/database-client/search/query/certificates'
# Function to highlight elements
def highlight_element(driver, element)
driver.execute_script("arguments[0].style.border='3px solid red'", element)
end
# Retry finding an element to handle stale element reference
def find_element_with_retry(driver, wait, locator)
attempts = 0
begin
attempts += 1
element = wait.until { driver.find_element(locator) }
highlight_element(driver, element)
return element
rescue Selenium::WebDriver::Error::StaleElementReferenceError
retry if attempts < 3
raise
end
end
# Wait for the search input field to be present using different locators
begin
search_field = find_element_with_retry(driver, wait, css: 'input[data-cy="search-field-input"]')
rescue Selenium::WebDriver::Error::TimeoutError
puts "Unable to locate the search input field. Please check the selector and the page structure."
driver.quit
exit 1
end
# Enter the search number into the input field
search_field.send_keys(search_number)
# Simulate pressing the Enter key to perform the search
search_field.send_keys(:enter)
# Wait for 10 seconds to allow results to load
sleep 10
# Grab the search results and output the link
begin
results = driver.find_elements(css: 'ipi-dynamic-result-item-renderer')
results.each do |result|
puts result.text
link = result.find_element(tag_name: 'a')
puts link.attribute('href')
end
rescue Selenium::WebDriver::Error::NoSuchElementError
puts "No results found. Please check the search input and try again."
end
# Close the browser
driver.quit
A watir script, that will find the link:
require 'watir'
require 'nokogiri'
require 'logger'
# Configure logging
logger = Logger.new(STDOUT)
logger.level = Logger::DEBUG
# Check if a five-digit number is provided as a command-line argument
if ARGV.length != 1 || !ARGV[0].match(/\A\d{5}\z/)
puts "Usage: ruby search_certificates.rb <five-digit-number>"
exit 1
end
# Extract the five-digit number from the command-line argument
search_number = ARGV[0]
begin
# Set up the Watir browser
browser = Watir::Browser.new :chrome, headless: true
# Attempt to navigate to the webpage with error handling
logger.info "Attempting to access the webpage"
browser.goto('https://www.swissreg.ch/database-client/search/query/certificates')
# Wait for the search input field to be present
logger.info "Waiting for the search input field to be present..."
browser.text_field(data_cy: 'search-field-input').wait_until(&:present?)
# Enter the search number into the search input field
logger.info "Entering the search number: #{search_number}"
browser.text_field(data_cy: 'search-field-input').set(search_number)
# Submit the search form
logger.info "Submitting the search form..."
browser.send_keys :enter
# Wait for the search results to load
logger.info "Waiting for the search results to load..."
sleep(10) # Adjust the sleep time as needed
# Get the page HTML
page_html = browser.html
# Parse the page content with Nokogiri
doc = Nokogiri::HTML(page_html)
# Look for Angular-specific elements and classes
angular_elements = doc.css('div.ipi-detail-link.ng-star-inserted')
logger.debug "Angular Elements Found: #{angular_elements.length}"
# Look for input fields with Angular attributes
ng_input_fields = doc.css('input[ng-reflect-name]')
logger.debug "Angular Input Fields: #{ng_input_fields.length}"
ng_input_fields.each do |input|
logger.debug "Input Field: name='#{input['ng-reflect-name']}', type='#{input['type']}'"
end
# Additional Angular-specific searches
ng_search_elements = doc.css('[data-cy="search-field-input"]')
logger.debug "Search Field Elements: #{ng_search_elements.length}"
# Detailed debugging of page content
logger.debug "Page Title: #{doc.title}"
# Extract and log JavaScript content
js_scripts = doc.css('script[type="text/javascript"]')
logger.debug "JavaScript Scripts Found: #{js_scripts.length}"
js_scripts.each do |script|
logger.debug "JavaScript Content: #{script.content}"
end
# If no elements found, log full document
if angular_elements.empty? && ng_input_fields.empty? && ng_search_elements.empty?
logger.error "No Angular elements found. Possible rendering issue."
logger.debug "Full Document Preview:"
logger.debug page_html[0..1000] # First 1000 characters
exit 1
end
# Note: This script cannot fully interact with JavaScript-rendered content
logger.warn "WARNING: This script may not fully interact with JavaScript-rendered pages."
logger.warn "Consider using a tool like Selenium for complete interaction."
# Output debug information about the page
puts "Page investigation complete. Unable to fully process dynamic content."
# Find the mat-sidenav-content element
sidenav_content = doc.at_css('mat-sidenav-content')
if sidenav_content
logger.debug "mat-sidenav-content Found"
# Output the content of mat-sidenav-content
logger.debug "mat-sidenav-content HTML: #{sidenav_content.to_html}"
else
logger.error "mat-sidenav-content not found"
end
# Find the link using a CSS selector
link = doc.at('a.ipi-detail-link.ng-star-inserted')
if link
logger.debug "Link Found: #{link.text}"
logger.debug "Link URL: #{link['href']}"
# Abort the script after finding the link
logger.info "Link found. Aborting the script."
exit 0
else
logger.error "Link not found"
end
rescue StandardError => e
logger.error "An error occurred:"
logger.error e.message
logger.error e.backtrace.join("\n")
exit 1
ensure
# Close the browser
browser.close
end
A script that will use an input file:
require 'watir'
require 'nokogiri'
require 'logger'
# Configure logging
logger = Logger.new(STDOUT)
logger.level = Logger::DEBUG
# Check if the file path is provided as a command-line argument
if ARGV.length != 1
puts "Usage: ruby search_certificates.rb <file_path>"
exit 1
end
# Read the list of five-digit numbers from the file
file_path = ARGV[0]
search_numbers = File.readlines(file_path).map(&:chomp)
begin
# Set up the Watir browser
browser = Watir::Browser.new :chrome, headless: true
search_numbers.each do |search_number|
# Attempt to navigate to the webpage with error handling
logger.info "Attempting to access the webpage for search number: #{search_number}"
browser.goto('https://www.swissreg.ch/database-client/search/query/certificates')
# Wait for the search input field to be present
logger.info "Waiting for the search input field to be present..."
browser.text_field(data_cy: 'search-field-input').wait_until(&:present?)
# Enter the search number into the search input field
logger.info "Entering the search number: #{search_number}"
search_field = browser.text_field(data_cy: 'search-field-input')
search_field.set(search_number)
# Submit the search form by pressing Enter
logger.info "Submitting the search form..."
search_field.send_keys :enter
# Wait for the search results to load
logger.info "Waiting for the search results to load..."
sleep(1) # Wait for 2 seconds
# Get the page HTML
page_html = browser.html
# Parse the page content with Nokogiri
doc = Nokogiri::HTML(page_html)
# Find the link using a CSS selector
link = doc.at('a.ipi-detail-link.ng-star-inserted')
if link
logger.debug "Link Found: #{link.text}"
logger.debug "Link URL: #{link['href']}"
# Output the link information
puts "Link Found for search number #{search_number}: #{link.text} - #{link['href']}"
else
logger.error "Link not found for search number #{search_number}"
end
end
rescue StandardError => e
logger.error "An error occurred:"
logger.error e.message
logger.error e.backtrace.join("\n")
exit 1
ensure
# Close the browser
browser.close
end
I think we can go with manually editable fields here, as there are only about 20 active patents.