Closed asmcos closed 1 year ago
The xpath of the page has changed, and the new xpath correction is as follows.
def _gather_pages(self, stock, page): .... # gather the comtent of the first page page = etree.HTML(response.text) trs = page.xpath('//*[@id="mainlist"]/div/ul/li[1]/table/tbody/tr') have_one = False for item in trs: have_one = True read_amount = item.xpath("./td[1]//text()")[0] comments = item.xpath("./td[2]//text()")[0] title = item.xpath("./td[3]/div/a//text()")[0] content_link = item.xpath("./td[3]/div/a/@href")[0] author = item.xpath("./td[4]//text()")[0] time = item.xpath("./td[5]//text()")[0] tmp = pd.DataFrame([read_amount, comments, title, content_link, author, time]).T columns = [ "read amount", "comments", "title", "content link", "author", "create time" ] tmp.columns = columns self.dataframe = pd.concat([self.dataframe, tmp]) #print(title) if have_one == False: return "break" ...
Thank you very much for your information! We have updated and checked the code. You may check it out!
The xpath of the page has changed, and the new xpath correction is as follows.