jsumners / feedparser

Automatically exported from code.google.com/p/feedparser
Other
0 stars 0 forks source link

TypeError in isProbablyDownloadable() #388

Closed GoogleCodeExporter closed 9 years ago

GoogleCodeExporter commented 9 years ago
feedparser-5.1.3 on Python-2.7.3 on Fedora 18 x86_64

I tried this:
  import feedparser
  d = feedparser.parse('http://blog.martin-graesslin.com/blog/feed/')

And got this traceback:

/home/aron/.virtualenvs/pp/lib/python2.7/site-packages/feedparser.pyc in 
parse(url_file_stream_or_string, etag, modified, agent, referrer, handlers, 
request_headers, response_headers)
   3999         source.setByteStream(_StringIO(data))
   4000         try:
-> 4001             saxparser.parse(source)
   4002         except xml.sax.SAXException, e:
   4003             result['bozo'] = 1

/usr/lib64/python2.7/xml/sax/expatreader.pyc in parse(self, source)
    105         self.reset()
    106         self._cont_handler.setDocumentLocator(ExpatLocator(self))
--> 107         xmlreader.IncrementalParser.parse(self, source)
    108 
    109     def prepareParser(self, source):

/usr/lib64/python2.7/xml/sax/xmlreader.pyc in parse(self, source)
    121         buffer = file.read(self._bufsize)
    122         while buffer != "":
--> 123             self.feed(buffer)
    124             buffer = file.read(self._bufsize)
    125         self.close()

/usr/lib64/python2.7/xml/sax/expatreader.pyc in feed(self, data, isFinal)
    205             # document. When feeding chunks, they are not normally final -
    206             # except when invoked from close.
--> 207             self._parser.Parse(data, isFinal)
    208         except expat.error, e:
    209             exc = SAXParseException(expat.ErrorString(e.code), e, self)

/usr/lib64/python2.7/xml/sax/expatreader.pyc in end_element_ns(self, name)
    347             pair = tuple(pair)
    348 
--> 349         self._cont_handler.endElementNS(pair, None)
    350 
    351     # this is not used (call directly to ContentHandler)

/home/aron/.virtualenvs/pp/lib/python2.7/site-packages/feedparser.pyc in 
endElementNS(self, name, qname)
   1852                         break
   1853             localname = str(localname).lower()
-> 1854             self.unknown_endtag(localname)
   1855 
   1856         def error(self, exc):

/home/aron/.virtualenvs/pp/lib/python2.7/site-packages/feedparser.pyc in 
unknown_endtag(self, tag)
    699                 raise AttributeError()
    700             method = getattr(self, methodname)
--> 701             method()
    702         except AttributeError:
    703             self.pop(prefix + suffix)

/home/aron/.virtualenvs/pp/lib/python2.7/site-packages/feedparser.pyc in 
_end_description(self)
   1603             self._end_content()
   1604         else:
-> 1605             value = self.popContent('description')
   1606         self._summaryKey = None
   1607     _end_abstract = _end_description

/home/aron/.virtualenvs/pp/lib/python2.7/site-packages/feedparser.pyc in 
popContent(self, tag)
   1019 
   1020     def popContent(self, tag):
-> 1021         value = self.pop(tag)
   1022         self.incontent -= 1
   1023         self.contentparams.clear()

/home/aron/.virtualenvs/pp/lib/python2.7/site-packages/feedparser.pyc in 
pop(self, element, stripWhitespace)
    925         # rely on elements that we sanitize)
    926         if PARSE_MICROFORMATS and is_htmlish and element in ['content', 'description', 'summary']:
--> 927             mfresults = _parseMicroformats(output, self.baseuri, 
self.encoding)
    928             if mfresults:
    929                 for tag in mfresults.get('tags', []):

/home/aron/.virtualenvs/pp/lib/python2.7/site-packages/feedparser.pyc in 
_parseMicroformats(htmlSource, baseURI, encoding)
   2518     p.vcard = p.findVCards(p.document)
   2519     p.findTags()
-> 2520     p.findEnclosures()
   2521     p.findXFN()
   2522     return {"tags": p.tags, "enclosures": p.enclosures, "xfn": p.xfn, "vcard": p.vcard}

/home/aron/.virtualenvs/pp/lib/python2.7/site-packages/feedparser.pyc in 
findEnclosures(self)
   2492         enclosure_match = re.compile(r'\benclosure\b')
   2493         for elm in self.document(all, {'href': re.compile(r'.+')}):
-> 2494             if not enclosure_match.search(elm.get('rel', u'')) and not 
self.isProbablyDownloadable(elm):
   2495                 continue
   2496             if elm.attrMap not in self.enclosures:

/home/aron/.virtualenvs/pp/lib/python2.7/site-packages/feedparser.pyc in 
isProbablyDownloadable(self, elm)
   2451     def isProbablyDownloadable(self, elm):
   2452         attrsD = elm.attrMap
-> 2453         if 'href' not in attrsD:
   2454             return 0
   2455         linktype = attrsD.get('type', '').strip()

TypeError: argument of type 'NoneType' is not iterable

Original issue reported on code.google.com by agrif...@gmail.com on 18 Jan 2013 at 1:54

GoogleCodeExporter commented 9 years ago
It seems I can work around the problem with feedparser.PARSE_MICROFORMATS = 
False but it's a shame this is a global instead of a keyword argument to 
feedparser.parse(), since this means callers on all threads must adopt the same 
behavior.

Original comment by agrif...@gmail.com on 18 Jan 2013 at 2:03

GoogleCodeExporter commented 9 years ago
I'm not seeing this behavior in master with BeautifulSoup 3.2.1 (3.2.x is the 
only tested and supported version of BeautifulSoup). However, I intend to 
completely remove the microformat parsing code Real Soon Now (TM).

Original comment by kurtmckee on 20 Jan 2013 at 7:35