圆方圆学院零基础入门学习Python(绝对干货,值得学习)
改写pipelines,py
# -*- coding: utf-8 -*-
# Define your item pipelines here
#
# Don\'t forget to add your pipeline to the ITEM_PIPELINES setting
# See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html
import os
from urllib import request
from scrapy.pipelines.images import ImagesPipeline
from Geely import settings
# class GeelyPipeline( ):
# def __init__(self):
# #os.path.dirname()获取当前文件的路径,os.path.join()获取当前目录并拼接成新目录
# self.path = os.path.join(os.path.dirname(__file__), \'images\')
# # 判断路径是否存在
# if not os.path.exists(self.path):
# os.mkdir(self.path)
# def process_item(self, item, spider):
# #分类存储
# catagory = item[\'catagory\']
# urls = item[\'image_urls\']
# catagory_path = os.path.join(self.path, catagory)
# #如果没有该路径即创建一个
# if not os.path.exists(catagory_path):
# os.mkdir(catagory_path)
# for url in urls:
# #以_进行切割并取最后一个单元
# image_name = url.split(\'_\')[-1]
# request.urlretrieve(url,os.path.join(catagory_path,image_name))
# return item
# 继承ImagesPipeline
class GeelyImagesPipeline(ImagesPipeline):
# 该方法在发送下载请求前调用,本身就是发送下载请求的
def get_media_requests(self, item, info):
# super()直接调用父类对象
request_ s = super(GeelyImagesPipeline, self).get_media_requests(item, info)
for request_ in request_ s:
request_ .item = item
return request_ s
def file_path(self, request, response=None, info=None):
path = super(GeelyImagesPipeline, self).file_path(request, response, info)
# 该方法是在图片将要被存储时调用,用于获取图片存储的路径
catagory = request.item.get(\'catagory\')
# 拿到IMAGES_STORE
images_stores = settings.IMAGES_STORE
catagory_path = os.path.join(images_stores, catagory)
#判断文件名是否存在,如果不存在创建文件
if not os.path.exists(catagory_path):
os.mkdir(catagory_path)
image_name = path.replace(\'full/\',\'\')
image_path = os.path.join(catagory+\'/\',image_name)
return image_path
继续阅读与本文标签相同的文章
-
使用 Docker 部署 Spring Boot 项目
2026-05-18栏目: 教程
-
发布订阅模式
2026-05-18栏目: 教程
-
Docker 用ansible给主机安装docker
2026-05-18栏目: 教程
-
Docker centos7 with sshd (sshd centos7 常用镜像)
2026-05-18栏目: 教程
-
Docker 做资源限制
2026-05-18栏目: 教程
