緣由
唐家三少《龍王傳說》寫了比較多了,剛好看到了瀏覽器里有推薦,于是就稍稍看看,然而,總是感覺頁面廣告太多了,體驗太差,干脆,用Python整理一下好了。
環境
windows,Python2.x,requests,lxml
代碼
#!/usr/bin/python
# -*- coding: utf-8 -*-
import sys
reload(sys)
sys.setdefaultencoding("utf-8")
import requests
def getHtml(url,headers=None):
r = requests.get(url,headers=headers)
return r.content
def useXpath(html):
from lxml import etree
html = etree.HTML(html)
#print type(html)
mulu = []
urls_text = html.xpath('//*[@id="list"]/dl/dd/a/text()')
urls = html.xpath('//*[@id="list"]/dl/dd/a/@href')
headers = {
'Referer':'http://www.aiquxs.com/read/41/41742/index.html',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36'
}
with open('d://xiaoshu8888.txt','a') as f:
for x in range(len(urls)):
url = 'http://www.aiquxs.com/read/41/41742/' + urls[x]
print u'正在抓取 ',urls_text[x],u' 網址為: ' + url
f.write(urls_text[x]+'\n') # 將章節名寫入文件
html = getHtml(url,headers) # 獲取章節內容的源碼
html = etree.HTML(html)
text = html.xpath('//*[@id="booktext"]/text()')
for item in text:
f.write(item+'\n')
if __name__ == '__main__':
#目錄 url
url = 'http://www.aiquxs.com/read/41/41742/index.html'
html = getHtml(url)
useXpath(html)
運行圖片

結束語
喜歡的話,歡迎關注、打賞,收藏,謝謝!