In [12]: br = mechanize.Browser()
In [13]: br.open("http://www.example.com/")
Out[13]: <response_seek_wrapper at 0x3717bc0 whose wrapped object = <closeable_response at 0x371d328 whose fp = <socket._fileobject object at 0x03615E70>>>
In [14]: br.forms()
ValueError Traceback (most recent call last)
<ipython-input-20-3214840519ff> in <module>()
----> 1 for form in br.forms():
2 pass
c:\python27\lib\site-packages\mechanize-0.2.5-py2.7.egg\mechanize\_mechanize.pyc in forms(self)
418 if not self.viewing_html():
419 raise BrowserStateError("not viewing HTML")
--> 420 return self._factory.forms()
421
422 def global_form(self):
c:\python27\lib\site-packages\mechanize-0.2.5-py2.7.egg\mechanize\_html.pyc in forms(self)
555 try:
556 self._forms_genf = CachingGeneratorFunction(
--> 557 self._forms_factory.forms())
558 except: # XXXX define exception!
559 self.set_response(self._response)
c:\python27\lib\site-packages\mechanize-0.2.5-py2.7.egg\mechanize\_html.pyc in forms(self)
235 _urljoin=_rfc3986.urljoin,
236 _urlparse=_rfc3986.urlsplit,
--> 237 _urlunparse=_rfc3986.urlunsplit,
238 )
239 self.global_form = forms[0]
c:\python27\lib\site-packages\mechanize-0.2.5-py2.7.egg\mechanize\_form.pyc in ParseResponseEx(response, select_default, form_parser_class,
request_class, entitydefs, encoding, _urljoin, _urlparse, _urlunparse)
842 _urljoin=_urljoin,
843 _urlparse=_urlparse,
--> 844 _urlunparse=_urlunparse,
845 )
846
c:\python27\lib\site-packages\mechanize-0.2.5-py2.7.egg\mechanize\_form.pyc in _ParseFileEx(file, base_uri, select_default, ignore_errors, f
orm_parser_class, request_class, entitydefs, backwards_compat, encoding, _urljoin, _urlparse, _urlunparse)
979 data = file.read(CHUNK)
980 try:
--> 981 fp.feed(data)
982 except ParseError, e:
983 e.base_uri = base_uri
c:\python27\lib\site-packages\mechanize-0.2.5-py2.7.egg\mechanize\_form.pyc in feed(self, data)
756 def feed(self, data):
757 try:
--> 758 _sgmllib_copy.SGMLParser.feed(self, data)
759 except _sgmllib_copy.SGMLParseError, exc:
760 raise ParseError(exc)
c:\python27\lib\site-packages\mechanize-0.2.5-py2.7.egg\mechanize\_sgmllib_copy.pyc in feed(self, data)
108
109 self.rawdata = self.rawdata + data
--> 110 self.goahead(0)
111
112 def close(self):
c:\python27\lib\site-packages\mechanize-0.2.5-py2.7.egg\mechanize\_sgmllib_copy.pyc in goahead(self, end)
190 if match:
191 name = match.group(1)
--> 192 self.handle_charref(name)
193 i = match.end(0)
194 if rawdata[i-1] != ';': i = i-1
c:\python27\lib\site-packages\mechanize-0.2.5-py2.7.egg\mechanize\_form.pyc in handle_charref(self, name)
652 def handle_charref(self, name):
653 #debug("%s", name)
--> 654 self.handle_data(unescape_charref(name, self._encoding))
655
656 def unescape_attr(self, name):
c:\python27\lib\site-packages\mechanize-0.2.5-py2.7.egg\mechanize\_form.pyc in unescape_charref(data, encoding)
147 if name.startswith("x"):
148 name, base= name[1:], 16
--> 149 uc = unichr(int(name, base))
150 if encoding is None:
151 return uc
ValueError: invalid literal for int() with base 10: '176C'
In [21]: br = mechanize.Browser()
In [22]: br.open(url)
Out[22]: <response_seek_wrapper at 0x371acb0 whose wrapped object = <closeable_response at 0x371ac60 whose fp = <socket._fileobject object a
t 0x03615F70>>>
In [23]: for form in br.forms():
....: pass
....:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-23-3214840519ff> in <module>()
----> 1 for form in br.forms():
2 pass
c:\python27\lib\site-packages\mechanize-0.2.5-py2.7.egg\mechanize\_mechanize.pyc in forms(self)
418 if not self.viewing_html():
419 raise BrowserStateError("not viewing HTML")
--> 420 return self._factory.forms()
421
422 def global_form(self):
c:\python27\lib\site-packages\mechanize-0.2.5-py2.7.egg\mechanize\_html.pyc in forms(self)
555 try:
556 self._forms_genf = CachingGeneratorFunction(
--> 557 self._forms_factory.forms())
558 except: # XXXX define exception!
559 self.set_response(self._response)
c:\python27\lib\site-packages\mechanize-0.2.5-py2.7.egg\mechanize\_html.pyc in forms(self)
235 _urljoin=_rfc3986.urljoin,
236 _urlparse=_rfc3986.urlsplit,
--> 237 _urlunparse=_rfc3986.urlunsplit,
238 )
239 self.global_form = forms[0]
c:\python27\lib\site-packages\mechanize-0.2.5-py2.7.egg\mechanize\_form.pyc in ParseResponseEx(response, select_default, form_parser_class,
request_class, entitydefs, encoding, _urljoin, _urlparse, _urlunparse)
842 _urljoin=_urljoin,
843 _urlparse=_urlparse,
--> 844 _urlunparse=_urlunparse,
845 )
846
c:\python27\lib\site-packages\mechanize-0.2.5-py2.7.egg\mechanize\_form.pyc in _ParseFileEx(file, base_uri, select_default, ignore_errors, f
orm_parser_class, request_class, entitydefs, backwards_compat, encoding, _urljoin, _urlparse, _urlunparse)
979 data = file.read(CHUNK)
980 try:
--> 981 fp.feed(data)
982 except ParseError, e:
983 e.base_uri = base_uri
c:\python27\lib\site-packages\mechanize-0.2.5-py2.7.egg\mechanize\_form.pyc in feed(self, data)
756 def feed(self, data):
757 try:
--> 758 _sgmllib_copy.SGMLParser.feed(self, data)
759 except _sgmllib_copy.SGMLParseError, exc:
760 raise ParseError(exc)
c:\python27\lib\site-packages\mechanize-0.2.5-py2.7.egg\mechanize\_sgmllib_copy.pyc in feed(self, data)
108
109 self.rawdata = self.rawdata + data
--> 110 self.goahead(0)
111
112 def close(self):
c:\python27\lib\site-packages\mechanize-0.2.5-py2.7.egg\mechanize\_sgmllib_copy.pyc in goahead(self, end)
190 if match:
191 name = match.group(1)
--> 192 self.handle_charref(name)
193 i = match.end(0)
194 if rawdata[i-1] != ';': i = i-1
c:\python27\lib\site-packages\mechanize-0.2.5-py2.7.egg\mechanize\_form.pyc in handle_charref(self, name)
652 def handle_charref(self, name):
653 #debug("%s", name)
--> 654 self.handle_data(unescape_charref(name, self._encoding))
655
656 def unescape_attr(self, name):
c:\python27\lib\site-packages\mechanize-0.2.5-py2.7.egg\mechanize\_form.pyc in unescape_charref(data, encoding)
147 if name.startswith("x"):
148 name, base= name[1:], 16
--> 149 uc = unichr(int(name, base))
150 if encoding is None:
151 return uc
ValueError: invalid literal for int() with base 10: '176C'
Hi,
i have the same Problem and it seems very common these days.
I don't know whether it'll be fixed one day or not, but by then mechanize will stay useless :(
In [10]: import re In [11]: import mechanize
In [12]: br = mechanize.Browser() In [13]: br.open("http://www.example.com/") Out[13]:
<response_seek_wrapper at 0x3717bc0 whose wrapped object = <closeable_response at 0x371d328 whose fp = <socket._fileobject object at 0x03615E70>>>
In [14]: br.forms()