Open ldybyz opened 4 years ago
from pyspider.libs.base_handler import class Handler(BaseHandler): crawl_config = { } @every(minutes=24 60) def on_start(self): self.crawl('https://movie.douban.com/explore#!type=movie&tag=%E7%83%AD%E9%97%A8&sort=recommend&page_limit=20&page_start=0', fetch_type='js', js_script=""" function() { setTimeout("$('.more').click();console.log('finish');", 2000); }""", callback=self.phantomjs_parser)
@config(age=10 * 24 * 60 * 60) def phantomjs_parser(self, response): return [{ "rate": x('p strong').text(), "url": x.attr.href, } for x in response.doc('a.item').items()]
this is the code,I try a lot and the js_scriptis run,but the data is just the page first load
phantomjs_1 | phantomjs fetcher running on port 25555
from pyspider.libs.base_handler import class Handler(BaseHandler): crawl_config = { } @every(minutes=24 60) def on_start(self): self.crawl('https://movie.douban.com/explore#!type=movie&tag=%E7%83%AD%E9%97%A8&sort=recommend&page_limit=20&page_start=0', fetch_type='js', js_script=""" function() { setTimeout("$('.more').click();console.log('finish');", 2000); }""", callback=self.phantomjs_parser)
this is the code,I try a lot and the js_scriptis run,but the data is just the page first load