Open adrianshort opened 6 years ago
# Idox requires a scrape per tab
# "https://planningregister.sutton.gov.uk/online-applications/applicationDetails.do?keyVal=PC6337KC08T00&activeTab=summary"
ret = DataFetch::DetailIdox.new.scrape( detail_page_link.gsub( 'summary','details' ) )
sleep(10)
ret.merge!( DataFetch::DetailIdox.new.scrape( detail_page_link.gsub( 'summary','contacts' ) ) )
sleep(10)
ret.merge!( DataFetch::DetailIdox.new.scrape( detail_page_link.gsub( 'summary','dates' ) ) )
the table content differs depending on whether an agent was involved, so hence this approach to check the 'header'
# DataFetch::DetailIdox
def scrape( url )
agent = Mechanize.new
page = agent.get url
app_hash = {}
heads = agent.page.search( "th" )
cols = agent.page.search( "td" )
heads.each_with_index do |head, index|
# "details"
["Application Type","Expected Decision Level","Case Officer","Parish","Ward","District Reference",
"Applicant Name","Agent Name","Agent Company Name","Agent Address","Agent Phone Number",
"Environmental Assessment Requested"].each do |item|
app_hash.merge!( item.parameterize.underscore.to_sym => cols[index].text ) if cols[index] && head.text == item
end
# "dates"
["Application Received Date","Application ValiDated Date","Actual Committee Date",
"Neighbour Consultation Expiry Date","Statutory Expiry Date","Agreed Expiry Date",
"Decision Issued Date","Permission Expiry Date","Temporary Permission Expiry Date"].each do |item|
app_hash.merge! item.parameterize.underscore.to_sym => parse_date( cols[index].text ) if cols[index] && head.text.strip == item
end
end
# Map cols to ours:
key_map = {:case_officer=>:officer,
:neighbour_consultation_expiry_date=>:comments_close_at,
...}
ret = app_hash.map {|k, v| [key_map[k], v] }.to_h
ret.except!(nil)
ret
end
private
# str eg "Mon 17 Sep 2018"
def parse_date( str )
return "" if str.blank?
Time.zone.parse( str )
end
See also #20.