新建发送邮件类

import smtplib
from email.mime.text import MIMEText
from email.header import Header

class SendMail:

   def __init__(self):
       self.sender = \'xx@qq.com\'
       self.receivers = [\'xx1@qq.com\',\'xx2@qq.com\']  # 接收邮件,可设置为你的QQ邮箱或者其他邮箱
       self.smtp_server = \'smtp.qq.com\'
       self.smtp_pwd = \'xx\'
       self.stmp_port = 25

   def sendMessage(self,  , msg):
       # 三个参数:第一个为文本内容,第二个 plain 设置文本格式,第三个 utf-8 设置编码
       message = MIMEText(msg, \'plain\', \'utf-8\')
       message[\'From\'] = self.sender
       message[\'To\'] = \';\'.join(self.receivers)

       message[\'Subject\'] = Header( , \'utf-8\')

       smtpObj = smtplib.SMTP(self.smtp_server, self.stmp_port)
       smtpObj.starttls()
       smtpObj.login(self.sender, self.smtp_pwd)
       smtpObj.sendmail(self.sender, self.receivers, message.as_string())
       print(\'success\')

爬取英语学习资料

比如爬取英语学习链接:http://www.hjenglish.com/new/c1020/,将当前页文章爬取到并发送邮件到指定邮箱:

from bs4 import BeautifulSoup
import time, os
import xlwt
import requests
import datetime
import threading
import schedule
from mymodule.SendMail import *

def get s(url):
    try:
        res = requests.get(url, headers={\'Host\': \'www.hjenglish.com\', \'Referer\':\'http://www.hjenglish.com/new/cet/\', \'User-Agent\':\'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36\'})
        res.raise_for_status()
        page = BeautifulSoup(res.text, \'l \')
        res.close()

         s =[\'http://www.hjenglish.com\' + adom[\'href\'] for adom in page.select(\'.big- . -article\')]
        return  s
    except Exception as err:
        print(err)

def spider (url, lock):
    print(\'当前线程\', threading.currentThread().getName())
    res = requests.get(url, headers={\'Host\': \'www.hjenglish.com\', \'Referer\':\'http://www.hjenglish.com/new/cet/\', \'User-Agent\':\'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36\'})
    if res.status_code == 200:
        try:
            page = BeautifulSoup(res.text, \"l \")
            res.close()
             s = [ .getText() for   in page.select(\'.article-header . \')]
            contents = [contentDom.getText() for contentDom in page.select(\'#J-article-content\')]
            # print( s, contents)
            num = len( s)
            global total
            print( s, contents)
            for i in range(0, num):
                lock.acquire()
                total = total + 1
                lock.release()
                print( s[i], contents[i])
                sender.sendMessage( s[i], contents[i])

        except Exception as err:
            print(err)
    else:
        pass

def my_job():
    try:
        starttime = datetime.datetime.now()
        url = \'http://www.hjenglish.com/new/c1020/\'

        lock = threading.Lock()
        spider_ s = get s(url)
        threads = [threading.Thread(target=spider , args=( , lock)) for   in spider_ s]

        for thread1 in threads:
            thread1.start()

        for thread2 in threads:
            thread2.join()

        endtime = datetime.datetime.now()
        print(\'have spend \', str((endtime - starttime).seconds) + \'s\')
        global total
        total = 0
    except Exception as err:
        print(err)
        os._exit(0)

if __name__ == \'__main__\':
    try:
        sender = SendMail()
        total = 0
        my_job()
    except Exception as err:
        print(err)
        os._exit(0)
收藏 打印