圆方圆学院零基础入门学习Python(绝对干货,值得学习)

 

改写pipelines,py

    # -*- coding: utf-8 -*-

     # Define your item pipelines here
     #
     # Don\'t forget to add your pipeline to the ITEM_PIPELINES setting
     # See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html

     import os
     from urllib import request

     from scrapy.pipelines.images import ImagesPipeline
     from Geely import settings

     # class GeelyPipeline( ):

     #     def __init__(self):
     #         #os.path.dirname()获取当前文件的路径,os.path.join()获取当前目录并拼接成新目录
     #         self.path = os.path.join(os.path.dirname(__file__), \'images\')

     #         # 判断路径是否存在
     #         if not os.path.exists(self.path):  
     #             os.mkdir(self.path)

     #     def process_item(self, item, spider):

     #         #分类存储
     #         catagory = item[\'catagory\']
     #         urls = item[\'image_urls\']

     #         catagory_path = os.path.join(self.path, catagory)

     #         #如果没有该路径即创建一个
     #         if not os.path.exists(catagory_path): 
     #             os.mkdir(catagory_path)

     #         for url in urls:
     #             #以_进行切割并取最后一个单元
     #             image_name = url.split(\'_\')[-1] 
     #             request.urlretrieve(url,os.path.join(catagory_path,image_name))

     #         return item

     # 继承ImagesPipeline
     class GeelyImagesPipeline(ImagesPipeline):

         # 该方法在发送下载请求前调用,本身就是发送下载请求的
         def get_media_requests(self, item, info):

             # super()直接调用父类对象
             request_ s = super(GeelyImagesPipeline, self).get_media_requests(item, info)
             for request_  in request_ s:
                 request_ .item = item
             return request_ s

         def file_path(self, request, response=None, info=None):

             path = super(GeelyImagesPipeline, self).file_path(request, response, info)

             # 该方法是在图片将要被存储时调用,用于获取图片存储的路径
             catagory = request.item.get(\'catagory\')

             # 拿到IMAGES_STORE
             images_stores = settings.IMAGES_STORE
             catagory_path = os.path.join(images_stores, catagory)

             #判断文件名是否存在,如果不存在创建文件
             if not os.path.exists(catagory_path): 
                 os.mkdir(catagory_path)

             image_name = path.replace(\'full/\',\'\')
             image_path = os.path.join(catagory+\'/\',image_name)

             return image_path


 

收藏 打印