課時9 解析網頁中的元素

from bs4 import BeautifulSoup
path = 'C:/Users/Google/Desktop/web/index.html'with open(path, 'r') as wb_data:    
soup = BeautifulSoup(wb_data, 'lxml')    titles = soup.select('body > div.container > div.row > div.col-md-9 > div.row > div.col-sm-4.col-lg-4.col-md-4 > div.thumbnail > div.caption > h4 > a')    images = soup.select('body > div.container > div.row > div.col-md-9 > div.row > div.col-sm-4.col-lg-4.col-md-4 > div.thumbnail > img')    reviews = soup.select('div.ratings > p.pull-right')    prices = soup.select('div.caption > h4.pull-right')    stars = soup.select('body > div > div > div.col-md-9 > div > div > div > div.ratings > p:nth-of-type(2)')for title, image, review, price, star in zip(titles, images, reviews, prices, stars):    data = {        'title': title.get_text(),        'image': image.get('src'),        'review': review.get_text(),        'price': price.get_text(),        'star': len(star.find_all("span", class_='glyphicon glyphicon-star'))    }    print(data)
  • 需要注意的點
'star': len(star.find_all("span", class_='glyphicon glyphicon-star'))

這句話的 作用是需要在

1.jpg

這里面 計算出 標簽是class 值 等于 glyphicon glyphicon-star 的個數。

from bs4 import BeautifulSoup
path = 'C:/Users/Google/Desktop/web/index.html'

with open(path, 'r') as wb_data:
    soup = BeautifulSoup(wb_data, 'lxml')
    titles = soup.select('body > div.container > div.row > div.col-md-9 > div.row > div.col-sm-4.col-lg-4.col-md-4 > div.thumbnail > div.caption > h4 > a')
    images = soup.select('body > div.container > div.row > div.col-md-9 > div.row > div.col-sm-4.col-lg-4.col-md-4 > div.thumbnail > img')
    reviews = soup.select('div.ratings > p.pull-right')
    prices = soup.select('div.caption > h4.pull-right')
    stars = soup.select('body > div > div > div.col-md-9 > div > div > div > div.ratings > p:nth-of-type(2)')
    print (stars)
for title, image, review, price, star in zip(titles, images, reviews, prices, stars):
    data = {
        'title': title.get_text(),
        'image': image.get('src'),
        'review': review.get_text(),
        'price': price.get_text(),
        'star': len(star.find_all("span", class_='glyphicon glyphicon-star'))
    }
    print(data)
最后編輯于
?著作權歸作者所有,轉載或內容合作請聯系作者
平臺聲明:文章內容(如有圖片或視頻亦包括在內)由作者上傳并發布,文章內容僅代表作者本人觀點,簡書系信息發布平臺,僅提供信息存儲服務。

推薦閱讀更多精彩內容