用爬虫下载简单的页面,可以拿来练手
import requests
class TiebaSpider:
def __init__(self,tieba_name_crawl):
\"\"\"
初始化必要参数,完成基础设置
:param tieba_name_crawl:
\"\"\"
self.tieba_name = tieba_name_crawl
self.url_ = \'https://tieba.baidu.com/f?kw=\' + tieba_name_crawl + \'&ie=utf-8&pn={}\'
self.headers = {\'User_Agent\':\'WSF\'}
def make_url_list(self):
\"\"\"
生成下载列表
:return: 下载列表
\"\"\"
return [self.url_ .format(i*50) for i in range(4)]
def download_url(self,url_str):
\"\"\"
使用requests get方法下载指定页面,并返回页面结果
:param url_str: 下载链接
:return: 下载结果
\"\"\"
result = requests.get(url_str,headers = self.headers)
return result.content
def save_result(self,result,page_num):
\"\"\"
存储下载内容
:param result:
:param page_num:
:return:
\"\"\"
file_path = \"{}-第{}页码.html\".format(self.tieba_name,page_num)
with open(file_path,\'wb\') as f:
f.write(result)
def run(self):
\"\"\"
下载主线程,实现主要的下载逻辑
:return:
\"\"\"
url_lists = self.make_url_list()
print(url_lists)
for url_str in url_lists:
result_str = self.download_url(url_str)
p_num = url_lists.index(url_str) + 1
self.save_result(result_str,p_num)
if __name__ == \'__main__\':
tieba_spider = TiebaSpider(\'lol\')
tieba_spider.run()
继续阅读与本文标签相同的文章
-
零基础Python教程033期 循环中的else语句,感叹人生苦短,我学python
2026-05-18栏目: 教程
-
Python高级进阶#015 pyqt5进度条QProgressBar结合使用qbasictimer
2026-05-18栏目: 教程
-
Cassandra编年史
2026-05-18栏目: 教程
-
网站建设——部署与发布入门篇(基于阿里云服务器)
2026-05-18栏目: 教程
-
K8S从懵圈到熟练 - 节点下线姊妹篇
2026-05-18栏目: 教程
