官网:www.youquhome.cn import os import requests from bs4 import BeautifulSoup from tqdm import tqdm headers = { 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36' } #建立目录 if not os.path.exists('./小说'): os.mkdir('./小说/') #分析网页更改当前编码方式 goal_url = 'http://www./13/13959/' #改为你具体书的网址 response = requests.get(goal_url) response.encoding = response.apparent_encoding #提取章节名称和网址 soup = BeautifulSoup(response.text,'lxml') book_list = soup.find('div', id= 'list').find_all('a') for book in tqdm(book_list): book_name = book.text book_url = book['href'] #访问具体章节内容 book_info_html = requests.get(goal_url[0:21] + book_url, headers=headers) book_info_html.encoding = book_info_html.apparent_encoding soup = BeautifulSoup(book_info_html.text, 'lxml') info = soup.find('div', id='content') with open('./小说/' + book_name +'.txt', 'a',encoding='utf-8') as f: f.write(info.text) noveltext=''#存储小说 for book in tqdm(book_list): book_name = book.text book_url = book['href'] #访问具体章节内容 book_info_html = requests.get(goal_url[0:21] + book_url, headers=headers) book_info_html.encoding = book_info_html.apparent_encoding soup = BeautifulSoup(book_info_html.text, 'lxml') info = soup.find('div', id='content') noveltext+=info.text #合并 with open('./小说/' + '小说名字' +'.txt', 'a',encoding='utf-8') as f: f.write(noveltext) |
|