Open ndarville opened 10 years ago
from baseparser import BaseParser
from BeautifulSoup import BeautifulSoup
class InformationParser(BaseParser):
feeder_pat = '^http://nyhederne.tv2.dk/(w+)/'
feeder_pages = ['http://nyhederne.tv2.dk/list']
def _parse(self, html):
"""Retrieve and serve the required fields to create an entry."""
soup = BeautifulSoup(html,
convertEntities=BeautifulSoup.HTML_ENTITIES,
fromEncoding='utf-8')
self.meta = soup.findAll('meta')
self.title = soup.find('h1').getText()
self.date = soup.find('time', {'itemprop': 'datetime'}).getText()
self.byline = soup.find('div', 'page-byline').next.next.getText()
self.body = "" soup.find('div', 'page-body').getText()