Closed CNMan closed 7 years ago
赶脚可能发到 XXNET 那边人多点,这边基本上没人说话。
简单实现了一个,没有做详细测试,希望可行:
import sys, requests, threading
max_thread_num = 200
num_from = 1000000000000
num_to = 1300000000000
class GET_URL():
def __init__(self, num_from, num_to, max_thread_num):
self.lock = threading.Lock()
self.num_from = num_from
self.num_to = num_to
self.thread_num = 0
self.max_thread_num = max_thread_num
def worker(self):
try:
while self.num_from < self.num_to:
url = "http://www.example.com/example/"+ str(self.num_from) + ".png"
self.num_from += 1
r = requests.get(url)
self.lock.acquire()
if (r.status_code != 404):
open("status_code.txt", "a").write(str(url) + "\n")
self.lock.release()
except:
pass
finally:
self.lock.acquire()
self.thread_num -= 1
sys.stdout.write("Threads exist: %d \r" % self.thread_num)
sys.stdout.flush()
self.lock.release()
def start(self):
new_thread_num = self.max_thread_num - self.thread_num
if new_thread_num < 1:
return
num = self.num_to - self.num_from
if new_thread_num > num:
new_thread_num = num
print ("Running, please wait...")
for i in range(0, new_thread_num):
self.lock.acquire()
self.thread_num += 1
self.lock.release()
t = threading.Thread(target = self.worker)
t.start()
get_url = GET_URL(num_from, num_to, max_thread_num)
get_url.start()
@xyuanmu 想下载个1.2.9版本使用,可是为什么windows版本会被chrome的下载器拦截呢?说会危害计算机安全,1.3.0没问题。
chrome 的拦截不是太准确。一般情况可以忽略。
@xyuanmu 多谢!
需要尝试下载,记录非404的链接地址亦可。 http://www.example.com/example/1000000000000.png ...... http://www.example.com/example/1299999999999.png
因为数量巨大,需要多线程(最好能自定义多少个线程),麻烦大神弄个python脚本,多谢:)