python學習—第二周第一節練習項目:在MongoDB中篩選房源

篩選結果

![`GK$TO1TNSIR@0OR輸出結果.png

代碼

from bs4 import BeautifulSoup
import requests
import pymongo

client = pymongo.MongoClient('localhost',27017)
fangjia = client['fangjia']
sheet_line = fangjia['sheet_line']

def gender_get(classname):
    if(classname) == ['member_boy_ico']:
        return 'boy'
    else:
        return 'girl'

def wb_analyse(url):
    wb_data = requests.get(url)
    soup = BeautifulSoup(wb_data.text, 'lxml')
    titles = soup.select('div.pho_info > h4 > em')
    addresses = soup.select('div.pho_info > p > span.pr5')
    rents = soup.select('div.day_l > span')
    imgs = soup.select('#curBigImage')
    ownerimgs = soup.select('div.js_box.clearfix > div.member_pic > a > img')
    ownnames = soup.select('div.js_box.clearfix > div.w_240 > h6 > a')
    genders = soup.select('div.js_box.clearfix > div.w_240 > h6 > span')

    for title, address, rent, img, ownerimg, ownname, gender in zip(titles, addresses, rents, imgs, ownerimgs, ownnames,
                                                                    genders):
        data = {
            'title': title.get_text(),
            'rent': int(rent.get_text()),
        }
        sheet_line.insert_one(data)

def url_get(wbpage):
    wbdata = requests.get(wbpage)
    soup = BeautifulSoup(wbdata.text,'lxml')
    links = soup.select('#page_list > ul > li > a')
    for link in links:
        urlwb = link.get('href')
        wb_analyse(urlwb)

urls = ["http://bj.xiaozhu.com/search-duanzufang-p{}-0/".format(number) for number in range(1,3)]

for single_url in urls:
    url_get(single_url)


for item in sheet_line.find():
    if item['rent'] >= 500:
        print(item)

總結

  1. 剛開始篩選的時候print都是空,后來發現,之前寫的代碼中rent是字符串,需要先轉化為int()才能比較大小
最后編輯于
?著作權歸作者所有,轉載或內容合作請聯系作者
平臺聲明:文章內容(如有圖片或視頻亦包括在內)由作者上傳并發布,文章內容僅代表作者本人觀點,簡書系信息發布平臺,僅提供信息存儲服務。

推薦閱讀更多精彩內容