爬取京東商品評論
#--*--coding:utf-8--*--
import requests
import json
import os
import sys
import random
import time
'''proxies = {
? "http": "proxy.xxcom:911",
? "https": "proxy.xx.com:911",
}'''
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',
'Accept':'text/html;q=0.9,*/*;q=0.8',
'Accept-Charset':'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
'Connection':'close',
'Referer':'https://www.jd.com/'
}
cookie= {'__jdu':'10846'}
f = open('c:/users/ffan2/desktop/jd.txt','a',encoding='utf-8')
url1='https://sclub.jd.com/comment/productPageComments.action?callback=fetchJSON_comment98vv33573&productId=5118016&score=0&sortType=5&page='
url2='&pageSize=10&isShadowSku=0&fold=1'
ran_num=random.sample(range(30), 30)
'''for i in range(0,1):
? ? #url='https://club.jd.com/comment/productPageComments.action?callback=fetchJSON_comment98vv17182&productId=4554969&score=0&sortType=5&page='+str(i)+'&pageSize=10&isShadowSku=0&fold=1'
? ? url='https://sclub.jd.com/comment/productPageComments.action?callback=fetchJSON_comment98vv33573&productId=5118016&score=0&sortType=5&page='+str(i)+'&pageSize=10&isShadowSku=0&fold=1'
? ? #實現爬多頁
? ? print (url)
? ? r = requests.get(url,headers=headers,cookies=cookie,proxies=proxies)
? ? #print(r.status_code)
? ? html=str(r.content, encoding = "GBK")
? ? f.write(html)
print('done')
? ? #print(html)
'''
for i in ran_num:
? ? ? a = ran_num[0]
? ? ? if i == a:
? ? ? ? ? i=str(i)
? ? ? ? ? url=(url1+i+url2)
? ? ? ? ? r=requests.get(url=url,headers=headers,cookies=cookie) #,proxies=proxies
? ? ? ? ? html=r.content
? ? ? else:
? ? ? ? ? i=str(i)
? ? ? ? ? url=(url1+i+url2)
? ? ? ? ? r=requests.get(url=url,headers=headers,cookies=cookie) #,proxies=proxies
? ? ? ? ? html2=r.content
? ? ? ? ? html = html + html2
? ? ? ? ? time.sleep(5)
? ? ? ? ? print("當前抓取頁面:",url,"狀態:",r)
print('done--------------------')? ? ? ? ?