1 # -*- coding:utf-8 -*- 2 3 import time 4 from selenium import webdriver 5 from selenium.webdriver.chrome.options import Options 6 from selenium.common.exceptions import NoSuchElementException 7 8 chrome_options = Options() 9 chrome_options.add_argument(\'--headless\') 10 chrome_options.add_argument(\'--disable-gpu\') 11 chrome_options.add_argument(\"--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36\") 12 13 driver = webdriver.Chrome(chrome_options=chrome_options) 14 driver.maximize_window() 15 16 url = input(\"输入文档链接,搞快点:\") 17 driver.get(url) 18 19 error_str = \"\" 20 21 try : 22 page_num = driver.find_element_by_xpath(\"//span[@class=\'page-count\']\").text 23 24 find_button = driver.find_element_by_xpath(\"//div[@class=\'doc-banner-text\']\") 25 driver.execute_ (\"arguments[0].scrollIntoView();\", find_button) 26 button = driver.find_element_by_xpath(\"//span[@class=\'moreBtn goBtn\']\") 27 button.click() 28 29 for i in range(1,int(page_num.strip(\'/\')) + 1) : 30 page = driver.find_element_by_xpath(\"//div[@data-page-no=\'{}\']\".format(i)) 31 driver.execute_ (\"arguments[0].scrollIntoView();\", page) 32 time.sleep(0.3) 33 print(driver.find_elements_by_xpath(\"//div[@data-page-no=\'{}\']//div[@class=\'reader-txt- \']\".format(i))[-1].text) 34 35 except NoSuchElementException : 36 if driver.find_element_by_xpath(\"//div[@class=\'doc-bottom-text\']\").text == \"试读已结束,如需继续阅读或下载\" : 37 error_str = \"\\n------------------------------------------------------------------\\n\\n\" \\ 38 \"----------百度文库提示试读已结束啦,无法爬取全文,等会再试试吧----------\\n\\n\" \\ 39 \"------------------------------------------------------------------\" 40 41 finally : 42 print(error_str)
继续阅读与本文标签相同的文章
下一篇 :
java编程思想之并发
-
在 NAS SMB 卷上搭建 IIS + WordPress + MYSQL
2026-05-19栏目: 教程
-
SpringBoot2 整合 ElasticSearch框架,实现高性能搜索引擎
2026-05-19栏目: 教程
-
大神引路:新手云大使入门导引
2026-05-19栏目: 教程
-
一行命令导致的数据丢失,阿里工程师是如何恢复的?
2026-05-19栏目: 教程
-
二层、三层、四层交换机的区别!!
2026-05-19栏目: 教程
