# 多线程爬取URL状态,检查URL是否有效
import urllib
import os
import time
import threading

result = []

# 将扫描的结果写到文件中
def record_result(con):
    fil = os.getcwd()+'/'+"result.log"
    out = open(fil, "a", encoding='utf-8')
    out.write(con+"\n")
    out.close()

# 获取URL的HTTP状态
def get_status(url):
    try:
        code = urllib.request.urlopen(url, timeout=1).getcode()
        if code == 200:
            result.append(url)
            record_result(url)
    except Exception as e:
        pass

# 标准URL
nus = 0
tim = 1601222400
end = 1601265600
# end = 1601308800

# 线程执行函数,即生成URL不断去尝试
def gen_url(num):
    global nus,tim
    while tim < end:
        dat = time.strftime("%m%d%H%M%S", time.localtime(tim))
        urn = dat+str(nus).zfill(7)
        print(urn)
        url = "https://aliyuncs.com/20"+urn
        get_status(url+".jpg")
        get_status(url+".png")
        get_status(url+".xlsx")
        if nus == 9999999:
            nus = 0
            tim+=1
        else:
            nus+=1

def main():
    # 主线程
    print('这是主线程:', threading.current_thread().name)

    thread_list = []
    # 循环创建线程
    for i in range(1000):
        t = threading.Thread(target=gen_url,args=(i,))
        thread_list.append(t)
    # 循环开始线程
    for t in thread_list:
        t.start()

    time.sleep(1)
    print('主线程结束!'+threading.current_thread().name)

# main()
print(result)
Last modification:February 28th, 2021 at 03:38 pm
硬币投入口