-- coding: utf-8 --

import requests,os
from l import etree
import multiprocessing
from retrying import retry

创建地址池

urllist = [“http://www.zhuangxiule.cn/c{}p{}/”.format(i,x) for i in range(16,26) for x in range(0,25)]
@retry(stop_max_attempt_number=3)
def get_data(url):
response = requests.get(url,timeout=3)
data = response.content
html = etree.HTML(data)

xpath匹配首页的标题和详情的url

mes = html.xpath(\"//div[@class=‘main’]/dl[@class=‘list-left public-box’]/*\")
for i in mes:
if i.xpath(\"./a/span/text()\"):
= i.xpath(\"./a/span/text()\")
poto_url= i.xpath(\"./a/@href\")[0] if len(i.xpath(\"./a/@href\"))>0 else None
print( )
poto=requests.get(poto_url)

请求每个详情页的图片地址

html = etree.HTML(poto.content)
mes = html.xpath(\"//img/@src\")

创建下载图片的地址路径及写入图片

path = “f:/img/”
if not os.path.exists(path):
os.makedirs(path)
for photo in mes:
potomes = requests.get(photo)
filename = photo.split(\"/\")[-1]
with open(path+filename,“wb”) as f:
f.write(potomes.content)
if name == ‘main’:

创建进程池

pool = multiprocessing.Pool(5)

将每个url作用于get_data方法

pool.map(get_data,urllist)
pool.close()
pool.join()

收藏 打印