Actual:
Traceback (most recent call last):
File "", line 3, in
File "scrapemark.py", line 35, in scrape
return pattern.scrape(html, url, get, post, headers, cookie_jar)
File "scrapemark.py", line 93, in scrape
if _match(self._nodes, _remove_comments(html), 0, captures, url, cookie_jar) == -1:
File "scrapemark.py", line 370, in _match
if not _run_special_nodes(special, html[i:], captures, base_url, cookie_jar):
File "scrapemark.py", line 391, in _run_special_nodes
if not _run_special_node(node, s, captures, base_url, cookie_jar):
File "scrapemark.py", line 403, in _run_special_node
i = _match(node[1], s, i, nested_captures, base_url, cookie_jar)
File "scrapemark.py", line 350, in _match
attrs_matched = _match_attrs(node[4], attrs, nested_captures, base_url, cookie_jar)
File "scrapemark.py", line 379, in _match_attrs
m = attr_node[0].match(attrs[name])
TypeError: expected string or buffer
\ What version of the product are you using? On what operating system?
scrapemark-0.9, from the source distribution
Mac OS X Version 10.6.3
Python 2.6.1 (r261:67515, Feb 11 2010, 00:51:29)
[GCC 4.2.1 (Apple Inc. build 5646)] on darwin
\ Please provide any additional information below.
Below is a workaround:
diff -ub scrapemark.py.orig scrapemark.py
--- scrapemark.py.orig 2010-04-28 01:00:58.000000000 -0400
+++ scrapemark.py 2010-04-28 00:59:03.000000000 -0400
@@ -541,7 +541,10 @@
def _parse_attrs(s):
attrs = {}
for m in _attr_re.finditer(s):
- attrs[m.group(1)] = m.group(3) or m.group(4)
+ value = m.group(3)
+ if value is None:
+ value = m.group(4)
+ attrs[m.group(1)] = value
return attrs
def _next_tag(s, i, tag_open_re, tag_close_re, depth=1): # returns (tag body, substringindex after tag)
Reported by kaben.na...@gmail.com, Apr 27, 2010
\ What steps will reproduce the problem?
At the Python console, type import scrapemark scrapemark.scrape( '{* {{ [links].title }} *}', html = 'Some text' )
\ What is the expected output? What do you see instead?
Expected:
{'links': [{'title': u'Some text', 'url': u''}]}
Actual: Traceback (most recent call last): File "", line 3, in
File "scrapemark.py", line 35, in scrape
return pattern.scrape(html, url, get, post, headers, cookie_jar)
File "scrapemark.py", line 93, in scrape
if _match(self._nodes, _remove_comments(html), 0, captures, url, cookie_jar) == -1:
File "scrapemark.py", line 370, in _match
if not _run_special_nodes(special, html[i:], captures, base_url, cookie_jar):
File "scrapemark.py", line 391, in _run_special_nodes
if not _run_special_node(node, s, captures, base_url, cookie_jar):
File "scrapemark.py", line 403, in _run_special_node
i = _match(node[1], s, i, nested_captures, base_url, cookie_jar)
File "scrapemark.py", line 350, in _match
attrs_matched = _match_attrs(node[4], attrs, nested_captures, base_url, cookie_jar)
File "scrapemark.py", line 379, in _match_attrs
m = attr_node[0].match(attrs[name])
TypeError: expected string or buffer
\ What version of the product are you using? On what operating system?
scrapemark-0.9, from the source distribution Mac OS X Version 10.6.3 Python 2.6.1 (r261:67515, Feb 11 2010, 00:51:29) [GCC 4.2.1 (Apple Inc. build 5646)] on darwin
\ Please provide any additional information below.
Below is a workaround: