from scrapy import Spider, Request
from scrapy.crawler import CrawlerProcess
from scrapy_playwright.page import PageMethod
class Bookspider(Spider):
name = "book"
def start_requests(self):
for i in range(1,5):
url = f"https://books.toscrape.com/catalogue/page-{i}.html"
yield Request(
url=url,
callback=self.parse,
meta={
"playwright": True,
"playwright_include_page": True,
# "playwright_page_methods": [
# PageMethod("wait_for_selector", ".product_pod"),
# ],
},
)
async def parse(self, response):
print('info is ',response.meta)
page = response.meta["playwright_page"]
# for item in page.locator('.product_pod'):
# yield {
# 'name': item.locator('h3.a::text'),
# 'price': item.locator('.price_color::text')
# }
all_items = await page.query_selector_all('//*[@id="default"]/div[1]/div/div/div/section/div[2]/ol/li/article')
books = []
for item in all_items:
book = {}
name_el = await item.query_selector('//h3')
book['name'] = await name_el.inner_text()
price_el = await item.query_selector('//*[@class="price_color"]')
book['price'] = await price_el.inner_text()
stock_el = await item.query_selector('.availability')
book['stock'] = await stock_el.inner_text()
yield book
books.append(book)
await page.close()
# all_items = await response.locator('')
if __name__ == "__main__":
process = CrawlerProcess()
process.crawl(Bookspider)
process.start()
I used scrapy_playwright library to test its function. But when I configure it to use scrapy-redis, I found it is no request record in redis. I need your help, thanks.
I used scrapy_playwright library to test its function. But when I configure it to use scrapy-redis, I found it is no request record in redis. I need your help, thanks.