本文是上一篇的續(xù)后期將進(jìn)行封裝,以提高可用性。
虎撲在展示圖片的時(shí)候偷懶了,直接給出了原圖鏈接,直接獲取這個(gè)鏈接,下載到本地即可。
這里使用PhantomJS來打開瀏覽器。
使用urllib2下載圖片
# download one picture by urllib2
def downloadPicture(pic_url, pic_path):
f = urllib2.urlopen(pic_url)
with open(pic_path, "wb") as img_file:
img_file.write(f.read())
逐個(gè)獲取每個(gè)圖片
def getPictures(elem_url, nums, path):
try:
count = 1
t = elem_url.find(r'.html')
while (count <= nums):
html_url = elem_url[:t-2] + '-' + str(count) + '.html'
# Set proxy
proxy_support = urllib2.ProxyHandler({"http": "http://yout_proxy:8080/"})
opener = urllib2.build_opener(proxy_support)
urllib2.install_opener(opener)
driver.get(html_url)
elem = driver.find_element_by_xpath("http://div[@class='pic_bg']/div/img")
url = elem.get_attribute("src")
file_name = os.path.basename(url).replace('*','')
fname = path + "\\" + file_name
downloadPicture(url, fname)
count += 1
except Exception, e:
print "Error: ", e
finally:
print "Download " + str(count-1) + ' pictures\n'
主程序
if __name__ == "__main__":
elem_url = "http://photo.hupu.com/nba/p29556-1.html"
nums = 15
# Create folder for store picture
sub_folder = os.path.join(os.getcwd(), "hupupic")
if not os.path.exists(sub_folder):
os.mkdir(sub_folder)
# os.chdir(sub_folder)
pic_path = sub_folder
getPictures(elem_url, nums, pic_path)