复制代码- import os
- import requests
- import threading
- from faker import Faker
- faker = Faker(locale='zh_CN')
- from pyquery import PyQuery as pq
- '''下载单本漫画'''
- def get_doc(url): #返回pyquery对象
- headers = {'User-Agent':faker.user_agent()} #用随机UA
- response = requests.get(url,headers=headers)
- html = response.text
- doc = pq(html)
- return doc
- def make_file(file_path,file_name): #目标路径和文件夹名
- if os.path.exists(os.path.join(file_path,file_name)): #判断是否存在目标文件夹
- return os.path.join(file_path,file_name)
- else: #没有就创建
- os.makedirs(os.path.join(file_path,file_name))
- return os.path.join(file_path,file_name)
- def save_img_noreferer(path,src):
- headers = {'User-Agent':faker.user_agent()}
- try:
- response = requests.get(src,headers=headers) #有的时候仍然会出现UA反爬虫,跳过
- except:
- pass
- else:
- if response.status_code == 200 or response.status_code == 206:
- name = src.split('/')[-1] #图片名称默认为存在服务器里的名称
- path = os.path.join(path,name)
- with open(path,'wb') as f:
- f.write(response.content)
- elif response.status_code == 522: #状态码522,重试
- save_img_noreferer(path,src)
- elif response.status_code == 404: #状态码404,可能是src图片格式问题
- save_img_noreferer(path,src.replace('jpg','png'))
- else: #为防止错误,跳过
- pass
- glock = threading.Lock()
- def save_imgs(srcs,path): #多线程,配合save_img_noreferer()使用
- while True:
- glock.acquire()
- if len(srcs) ==0:
- glock.release()
- break
- else:
- src = srcs.pop()
- glock.release()
- save_img_noreferer(path,src) #记得修改path
- def main():
- url_main = input('请输入下载漫画的地址:')
- num = url_main.split('/')[-1].split('o')[0] #漫画代码
- doc = get_doc(url_main)
- page = doc('.ld_box > div:nth-child(2) > .ld_body').text().split(' ')[0] #漫画页码
- title = doc('#comicdetail > h1').text()+'id='+num+'page='+page #漫画标题
- src_format = doc('[align=center]>img')[0].items()[2][1].split('/')[2] #服务器地址
- print('当前漫画服务器地址为:'+src_format+',数字越大下载越慢,请耐心等待')
- pic_format = 'jpg' #默认图片格式
- list_1 = ['\\','/',':','*','?','"','<','>','|'] #去除漫画名中的非法字符
- for i in list_1:
- if i in title:
- title = title.replace(i,'')
- print('开始下载:'+title)
- path = make_file('./本子',title)
- print(title+'用有'+page+'页')
- srcs = []
- for i in range(1,int(page)+1):
- src = 'https://'+src_format+'/galleries/'+num+'/'+str(i)+'.'+pic_format #图片src
- srcs.append(src)
- for _ in range(64): #64个线程
- consumer = threading.Thread(target=save_imgs,args=[srcs,path,])
- consumer.start()
- if __name__ == '__main__':
- main()
|