# 多线程爬取URL状态,检查URL是否有效
import urllib
import os
import time
import threading
result = []
# 将扫描的结果写到文件中
def record_result(con):
fil = os.getcwd()+'/'+"result.log"
out = open(fil, "a", encoding='utf-8')
out.write(con+"\n")
out.close()
# 获取URL的HTTP状态
def get_status(url):
try:
code = urllib.request.urlopen(url, timeout=1).getcode()
if code == 200:
result.append(url)
record_result(url)
except Exception as e:
pass
# 标准URL
nus = 0
tim = 1601222400
end = 1601265600
# end = 1601308800
# 线程执行函数,即生成URL不断去尝试
def gen_url(num):
global nus,tim
while tim < end:
dat = time.strftime("%m%d%H%M%S", time.localtime(tim))
urn = dat+str(nus).zfill(7)
print(urn)
url = "https://aliyuncs.com/20"+urn
get_status(url+".jpg")
get_status(url+".png")
get_status(url+".xlsx")
if nus == 9999999:
nus = 0
tim+=1
else:
nus+=1
def main():
# 主线程
print('这是主线程:', threading.current_thread().name)
thread_list = []
# 循环创建线程
for i in range(1000):
t = threading.Thread(target=gen_url,args=(i,))
thread_list.append(t)
# 循环开始线程
for t in thread_list:
t.start()
time.sleep(1)
print('主线程结束!'+threading.current_thread().name)
# main()
print(result)
Last modification:February 28th, 2021 at 03:38 pm
© 允许规范转载