File "/home/wo/opp-tools/bin/../opp/scraper.py", line 846, in check_steppingstone
targets = set(u for u in page.xpath('//a/@href') if re.search('.pdf$', u, re.I))
File "/home/wo/opp-tools/bin/../opp/webpage.py", line 34, in xpath
return self.lxmldoc().xpath(xp)
File "/home/wo/opp-tools/bin/../opp/webpage.py", line 30, in lxmldoc
self._lxmldoc = lxml.html.document_fromstring(html_undeclared)
File "/usr/local/lib/python3.4/dist-packages/lxml/html/__init__.py", line 752, in document_fromstring
value = etree.fromstring(html, parser, **kw)
File "src/lxml/lxml.etree.pyx", line 3213, in lxml.etree.fromstring (src/lxml/lxml.etree.c:77697)
File "src/lxml/parser.pxi", line 1819, in lxml.etree._parseMemoryDocument (src/lxml/lxml.etree.c:116494)
File "src/lxml/parser.pxi", line 1700, in lxml.etree._parseDoc (src/lxml/lxml.etree.c:115040)
File "src/lxml/parser.pxi", line 1040, in lxml.etree._BaseParser._parseUnicodeDoc (src/lxml/lxml.etree.c:109165)
File "src/lxml/parser.pxi", line 573, in lxml.etree._ParserContext._handleParseResultDoc (src/lxml/lxml.etree.c:103404)
File "src/lxml/parser.pxi", line 683, in lxml.etree._handleParseResult (src/lxml/lxml.etree.c:105058)
File "src/lxml/parser.pxi", line 622, in lxml.etree._raiseParseError (src/lxml/lxml.etree.c:104143)
lxml.etree.XMLSyntaxError: line 13: b'ID research already defined'
http://www.u.arizona.edu/~rhealey/personal.html contains two elements with id="research". As a consequence, lxml throws and Exception which scraper doesn't catch: