from gevent import monkey, joinall, spawn
monkey.patch_all()
import requests
import re
import os
_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), \'meizitu2\')
girl_list = []
def save_imgs(name, url):
name = name.replace(\'?\', \'\')
name = name.replace(\':\', \' \')
if not os.path.exists(os.path.join( _DIR, name)):
os.mkdir(os.path.join( _DIR, name))
print(\'create path\', os.path.join( _DIR, name))
res = requests.get(url)
source = re.search(r\'img src=\"(.*?)\"\', res.text).group(1)
headers = {
\'user-agent\': \'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 Safari/537.36\' ,
\'cache - control\': \'no - cache\',
\'pragma\': \'no - cache\',
\'upgrade - insecure - requests\': \'1\',
\'Referer\': \"https://www.mzitu.com/1\",
}
with open(os.path.join( _DIR, name, source.split(\'/\')[-1]),\'wb\') as f:
f.write(requests.get(source, headers=headers, timeout=3).content)
print(\'download successful:\', source)
def get_girl_pics(url):
res = requests.get(url)
last_page = re.findall(r\'<span>(\\d+)</span>\', res.text)[-1]
= re.search(r\'<h2 class=\"main- \">(.*?)</h2>\', res.text).group(1)
save_imgs( , url)
for i in range(2, int(last_page) + 1):
save_imgs( , \'\'.join([url, \'/\', str(i)]))
def get_all_grils(url):
global girl_list
res = requests.get(url)
pages = re.findall(r\'<li><a href=\"(.*?)\" target=\"_blank\">\',res.text)
for i in pages:
girl_list.append(i)
def get_url_lists():
url = \'https://www.mzitu.com/mm\'
url_pages = [url]
res = requests.get(url)
girl_pages = re.findall(r\'</span>(.*?)<span class=\" -nav\', res.text)
for i in range(2, int(girl_pages[-1]) + 1):
url_pages.append(\'\'.join([url, \'page/\', str(i)]))
print(url_pages)
joinall([spawn(get_all_grils, i) for i in url_pages])
if __name__ == \'__main__\':
get_url_lists()
joinall([spawn(get_girl_pics, i) for i in girl_list])