http1.1 ?http://files.blogjava.net/sunchaojin/http1.3.pdf
1.查看網頁源碼 ?Chrome頁面Ctrl + U、F12
2.使用pycharm創建網頁文件 ? ?源碼包括:images文件夾、css文件、html文件
3.安裝庫lxml、BeautifulSoup4、Requests
https://www.crummy.com/software/BeautifulSoup/bs4/doc.zh/
http://beautifulsoup.readthedocs.io/zh_CN/latest/
http://docs.python-requests.org/zh_CN/latest/user/quickstart.html
response ?成功 ?status_code:200
from bs4 import BeautifulSoup
import requests
import time
urls = ['http://www.duzhe.com/index.php?v=listing&cid=38&page={}'.format(str(i))for i in range(1,9)]
def get_list(url,data=None):
????wb_data = requests.get(url)
????time.sleep(1)
????soup = BeautifulSoup(wb_data.text,'lxml')
????titles = soup.select('#con_warp > div > div > div.left_p > ul > li > div.con_top > h3 > a')
????likes = soup.select('#con_warp > div > div > div.left_p > ul > li > div.icons_warp > a:nth-of-type(1)')
????for title,like in zip(titles,likes):
????????data = {
????????????'title' :title.get_text(),
????????????'like'? :like.get_text()
????????}
????????print(data)
for single_url in urls:
????get_list(single_url)