-- coding: utf-8 --
import requests,os
from l import etree
import multiprocessing
from retrying import retry
创建地址池
urllist = [“http://www.zhuangxiule.cn/c{}p{}/”.format(i,x) for i in range(16,26) for x in range(0,25)]
@retry(stop_max_attempt_number=3)
def get_data(url):
response = requests.get(url,timeout=3)
data = response.content
html = etree.HTML(data)
xpath匹配首页的标题和详情的url
mes = html.xpath(\"//div[@class=‘main’]/dl[@class=‘list-left public-box’]/*\")
for i in mes:
if i.xpath(\"./a/span/text()\"):
= i.xpath(\"./a/span/text()\")
poto_url= i.xpath(\"./a/@href\")[0] if len(i.xpath(\"./a/@href\"))>0 else None
print( )
poto=requests.get(poto_url)
请求每个详情页的图片地址
html = etree.HTML(poto.content)
mes = html.xpath(\"//img/@src\")
创建下载图片的地址路径及写入图片
path = “f:/img/”
if not os.path.exists(path):
os.makedirs(path)
for photo in mes:
potomes = requests.get(photo)
filename = photo.split(\"/\")[-1]
with open(path+filename,“wb”) as f:
f.write(potomes.content)
if name == ‘main’:
创建进程池
pool = multiprocessing.Pool(5)
将每个url作用于get_data方法
pool.map(get_data,urllist)
pool.close()
pool.join()
继续阅读与本文标签相同的文章
-
2019 年度 “CCF 杰出会员” 公布,清华北大等86人当选
2026-05-18栏目: 教程
-
3步轻松搞定Spring Boot缓存
2026-05-18栏目: 教程
-
5G机皇已来 三星Galaxy Note10+5G正式登陆中国
2026-05-18栏目: 教程
-
威特动力:从“制造”到“智造”的跨越
2026-05-18栏目: 教程
-
Nreal携手运营商KDDI,共推日本MR生态建设
2026-05-18栏目: 教程
