4.爬取翻頁

# -*- coding: utf-8 -*-
import scrapy
from Boss.items import BossItem

class ZhipinSpider(scrapy.Spider):
    name = 'zhipin'
    allowed_domains = ['zhipin.com']
    start_urls = ['https://www.zhipin.com/c101280600/?query=python&page=1&ka=page-1']
        #['https://www.zhipin.com/c101280600/?query=python&page=%d&ka=page-%d'%(i,i) for i in range(1,21)]
    # ['https://www.zhipin.com/c101280600/?query=python&page=1&ka=page-1']
        # ['https://www.zhipin.com/c101280600/?query=python&page=1&ka=page-1','https://www.zhipin.com/c101280600/?query=python&page=2&ka=page-2','https://www.zhipin.com/c101280600/?query=python&page=3&ka=page-3']
    # 定義一個(gè)變量,用于記錄當(dāng)前是第幾頁
    page = 1

    def parse(self, response):
        job_list = response.xpath("http://div[@class='job-list']//li")
        # print(len(job_list))
        for job in job_list:
            item = BossItem()
            item["job"] = job.xpath(".//div[@class='job-title']/text()").extract_first()
            item["salary"] = job.xpath(".//span[@class='red']/text()").extract_first()
            item["company"] = job.xpath(".//div[@class='company-text']//a/text()").extract_first()
            item["position"] = job.xpath(".//div[@class='info-primary']/p//text()[1]").extract_first()
            item["require"] = job.xpath(".//div[@class='info-primary']/p//text()[2]").extract_first() + job.xpath(".//div[@class='info-primary']/p//text()[3]").extract_first()
            item["info"] =  " ".join(job.xpath(".//div[@class='company-text']/p//text()").extract())
            item["hr"] = " ".join(job.xpath(".//div[@class='info-publis']//h3[@class='name']/text()").extract())
            yield item

        # 進(jìn)行一個(gè)翻頁操作

        self.page += 1
        if self.page < 20:
            # 重新調(diào)度下載器
            url = 'https://www.zhipin.com/c101280600/?query=python&page=%d&ka=page-%d' % (self.page,self.page)
            yield scrapy.Request(url=url,callback=self.parse)
            # 這種方案同步的下載,在一個(gè)下載器下載器下載并解析完畢以后繼續(xù)用手動(dòng)的調(diào)取下載器遞歸下載




?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請(qǐng)聯(lián)系作者
平臺(tái)聲明:文章內(nèi)容(如有圖片或視頻亦包括在內(nèi))由作者上傳并發(fā)布,文章內(nèi)容僅代表作者本人觀點(diǎn),簡書系信息發(fā)布平臺(tái),僅提供信息存儲(chǔ)服務(wù)。

推薦閱讀更多精彩內(nèi)容