一、爬取的網頁截圖
image.png
二、通過爬蟲生成的xlsx截圖
hahow課程網頁爬取ajax的接口處理數據后整理到xlsx中.png
三、代碼
import requests
import pandas as pd
# 設置url地址
url = "xxx"
# 設置headers模擬用戶端訪問接口
headers = {"user-agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36"}
# 接收響應信息
response = requests.get(url, headers=headers)
# 處理數據
course_list = []
if response.status_code == 200:
# 成功讀取網頁
data = response.json()["data"]
# print(data)
for info in data:
course_data = [
info["title"],
info["price"],
info["averageRating"],
info["numSoldTickets"]
]
# 處理成這種是因為pandas需要列表
course_list.append(course_data)
# 寫入xlsx表格中
pf = pd.DataFrame(course_list, columns=["課程標題","課程價格","課程評分","課程售賣數量"])
pf.to_excel("new_course.xlsx", index = False, engine="openpyxl")
print("Save!")
else:
print("網頁讀取失敗")