hi guys :)
i have one issue.
when i don't use pymongo in scrapy after setting scrapy-fake-useragent in settings, scrapy can run well.
but when i use pymongo, scrapy never work, it keep to stop.
can you tell me what is problem ? :)
this is scrapy code to test
import scrapy
import pymongo
import requests
connection = pymongo.MongoClient()
# connection = pymongo.MongoClient()
db_link = connection.CoupangLinkTestDB
db_product = connection.CoupangLinkProductTestDB
class TesterSpider(scrapy.Spider):
name = 'tester4'
def start_requests(self):
yield scrapy.Request(url='https://www.amazon.jp', callback=self.router_start, dont_filter=True)
def router_start(self, response):
link = ['https://www.amazon.com/s/ref=s9_acss_bw_cts_Computer_T1_w?fst=as%3Aoff&rh=n%3A16225007011%2Cn%3A172456&bbn=16225007011&ie=UTF8&qid=1487012920&rnid=16225007011&pf_rd_m=ATVPDKIKX0DER&pf_rd_s=merchandised-search-4&pf_rd_r=PF0Q3FXYDG3N1QMXFA8Y&pf_rd_t=101&pf_rd_p=74069509-93ef-4a3c-8dca-a9e3fa773a64&pf_rd_i=16225007011','https://www.amazon.com/s?bbn=16225007011&rh=n%3A16225007011%2Cn%3A193870011&dc&fst=as%3Aoff&pf_rd_i=16225007011&pf_rd_m=ATVPDKIKX0DER&pf_rd_p=74069509-93ef-4a3c-8dca-a9e3fa773a64&pf_rd_r=PF0Q3FXYDG3N1QMXFA8Y&pf_rd_s=merchandised-search-4&pf_rd_t=101&qid=1487012920&rnid=16225007011&ref=s9_acss_bw_cts_Computer_T2_w','https://www.amazon.com/s?bbn=16225007011&rh=n%3A16225007011%2Cn%3A13896617011&dc&fst=as%3Aoff&pf_rd_i=16225007011&pf_rd_m=ATVPDKIKX0DER&pf_rd_p=74069509-93ef-4a3c-8dca-a9e3fa773a64&pf_rd_r=PF0Q3FXYDG3N1QMXFA8Y&pf_rd_s=merchandised-search-4&pf_rd_t=101&qid=1487012920&rnid=16225007011&ref=s9_acss_bw_cts_Computer_T3_w','https://www.amazon.com/s?bbn=16225007011&rh=n%3A16225007011%2Cn%3A172504&dc&fst=as%3Aoff&pf_rd_i=16225007011&pf_rd_m=ATVPDKIKX0DER&pf_rd_p=74069509-93ef-4a3c-8dca-a9e3fa773a64&pf_rd_r=PF0Q3FXYDG3N1QMXFA8Y&pf_rd_s=merchandised-search-4&pf_rd_t=101&qid=1487012920&rnid=16225007011&ref=s9_acss_bw_cts_Computer_T4_w']
for i in link:
yield scrapy.Request(url=i, callback=self.product_1, priority=20000, dont_filter=True, meta={'link':i})
print('product_1 request')
for i in link:
yield scrapy.Request(url=i, callback=self.product_2, priority=1000, dont_filter=True, meta={'link':i})
print('product_2 request')
for i in link:
yield scrapy.Request(url=i, callback=self.product_3, priority=100, dont_filter=True, meta={'link':i})
print('product_3 request')
for i in link:
yield scrapy.Request(url=i, callback=self.product_4, priority=10, dont_filter=True, meta={'link':i})
print('product_4 request')
# db_start = 40
# db_end = 50
# for i in db_link.product_6.find({})[db_start:db_end]:
# yield scrapy.Request(url=i['link'], callback=self.product_1, priority=20000, dont_filter=True, meta={'link':i['link']})
# print('product_1 request')
# for i in db_link.product_6.find({})[db_start:db_end]:
# yield scrapy.Request(url=i['link'], callback=self.product_2, priority=1000, dont_filter=True, meta={'link':i['link']})
# print('product_2 request')
# for i in db_link.product_6.find({})[db_start:db_end]:
# yield scrapy.Request(url=i['link'], callback=self.product_3, priority=100, dont_filter=True, meta={'link':i['link']})
# print('product_3 request')
# for i in db_link.product_6.find({})[db_start:db_end]:
# yield scrapy.Request(url=i['link'], callback=self.product_4, priority=10, dont_filter=True, meta={'link':i['link']})
# print('product_4 request')
def product_1(self, response):
print('product_1 run')
yield scrapy.Request(url=response.meta['link'] + ' ', callback=self.product, dont_filter=True)
def product_2(self, response):
print('product_2 run')
yield scrapy.Request(url=response.meta['link'] + ' ', callback=self.product, dont_filter=True)
def product_3(self, response):
print('product_3 run')
yield scrapy.Request(url=response.meta['link'] + ' ', callback=self.product, dont_filter=True)
def product_4(self, response):
print('product_4 run')
yield scrapy.Request(url=response.meta['link'] + ' ', callback=self.product, dont_filter=True)
def product(self, response):
for i in range(1,1000):
print(i)
Hi @doi-h! This would definitely be a good question for https://stackoverflow.com/. Make sure to provide errors/logs with your question as well. Thanks!
hi guys :) i have one issue. when i don't use pymongo in scrapy after setting scrapy-fake-useragent in settings, scrapy can run well. but when i use pymongo, scrapy never work, it keep to stop. can you tell me what is problem ? :)
this is scrapy code to test
this is settings.py