Closed damian0604 closed 6 years ago
sudo pip3 install cssselect
In [1]: import requests
In [2]: from lxml.html import fromstring
In [3]: response = requests.get('https://www.nu.nl/buitenland/5176460/britse-regering-zet-23-russische-diplomaten-vergiftiging-skripal.html')
In [4]: tree = fromstring(response.text)
In [5]: tree.xpath('//h1/text()')
Out[5]:
['\n Britse regering zet 23 Russische diplomaten uit om vergiftiging Skripal\n ',
'\n ']
# of
In [9]: e = tree.cssselect('.fluid')[0]
In [10]: e.text
Out[10]: '\n Britse regering zet 23 Russische diplomaten uit om vergiftiging Skripal\n '
links:
# coding: utf-8
# In[8]:
import requests
from lxml.html import fromstring
tree = fromstring(requests.get('http://www.sueddeutsche.de/news?search=corporate+social+responsibility&sort=date&all%5B%5D=dep&all%5B%5D=typ&all%5B%5D=sys&all%5B%5D=time').text)
title = tree.xpath('//*[@class="entrylist__title"]/text()')
print(title)
# In[ ]:
overline = tree.xpath('//*[@class="entrylist__overtitle"]/text()')
print()
# In[10]:
links= tree.xpath('//a[@class="entrylist__link"]')
# In[11]:
l1 = links[0]
# In[14]:
echtelinks = [l.attrib["href"] for l in links]
# In[15]:
echtelinks
xpath checker eruit voorbeelden kloppen niet meer
eventueel ook regexp eruit?
misschien css selector erin?