簡單總結
關聯分析、數值比較:散點圖、曲線圖
分布分析:灰度圖、密度圖
涉及分類的分析:柱狀圖、箱式圖
1.Matplotlib基礎知識
使用Python進行數據分析并可視化離不開的一個重要包,matplotlib.這是一個開源的包,里面有非常豐富的可視化圖形處理。官方文檔?Matplotlib,里面有詳細的介紹,以及圖形種類。
本文主要是學習Matplotlib的基本使用方法,需要使用到更多的方法是可以詳細的閱讀官方文檔。
安裝
a.使用 pip install -U matplotlib b.安裝Anaconda 自帶這個包
導入包
import matplotlib.pyplot as plt%matplotlib inline #魔法命令行,使用是能直接再輸出行顯示圖形
幾種基本圖形繪制
x = np.linspace(0, 2, 100)plt.plot(x, x, label='linear')plt.plot(x, x**2, label='quadratic')plt.plot(x, x**3, label='cubic')plt.xlabel('x label')plt.ylabel('y label')plt.title("Simple Plot")plt.legend()plt.show()
def my_plotter(ax, data1, data2, param_dict):? ? """? ? A helper function to make a graph? ? Parameters? ? ----------? ? ax : Axes? ? ? ? The axes to draw to? ? data1 : array? ? ? The x data? ? data2 : array? ? ? The y data? ? param_dict : dict? ? ? Dictionary of kwargs to pass to ax.plot? ? Returns? ? -------? ? out : list? ? ? ? list of artists added? ? """? ? out = ax.plot(data1, data2, **param_dict)? ? return out# which you would then use as:data1, data2, data3, data4 = np.random.randn(4, 100)fig, ax = plt.subplots(1, 1)my_plotter(ax, data1, data2, {'marker': 'x'})
子圖集
fig, (ax1, ax2) = plt.subplots(1, 2)my_plotter(ax1, data1, data2, {'marker': 'x'})my_plotter(ax2, data3, data4, {'marker': 'o'})
import numpy as npimport matplotlib.pyplot as pltN = 5menMeans = (20, 35, 30, 35, 27)womenMeans = (25, 32, 34, 20, 25)menStd = (2, 3, 4, 1, 2)womenStd = (3, 5, 2, 3, 3)ind = np.arange(N)? ? # the x locations for the groupswidth = 0.35? ? ? # the width of the bars: can also be len(x) sequencep1 = plt.bar(ind, menMeans, width, yerr=menStd)p2 = plt.bar(ind, womenMeans, width,? ? ? ? ? ? bottom=menMeans, yerr=womenStd)plt.ylabel('Scores')plt.title('Scores by group and gender')plt.xticks(ind, ('G1', 'G2', 'G3', 'G4', 'G5'))plt.yticks(np.arange(0, 81, 10))plt.legend((p1[0], p2[0]), ('Men', 'Women'))plt.show()
def scatterplot(x_data, y_data, x_label, y_label, title):? ? fig, ax = plt.subplots()? ? ax.scatter(x_data, y_data, s = 10, color = '#539caf', alpha = 0.75)? ? ax.set_title(title)? ? ax.set_xlabel(x_label)? ? ax.set_ylabel(y_label)scatterplot(x_data = daily_data['temp']? ? ? ? ? ? , y_data = daily_data['cnt']? ? ? ? ? ? , x_label = 'Normalized temperature (C)'? ? ? ? ? ? , y_label = 'Check outs'? ? ? ? ? ? , title = 'Number of Check Outs vs Temperature')
import numpy as npimport matplotlib.pyplot as pltmen_means, men_std = (20, 35, 30, 35, 27), (2, 3, 4, 1, 2)women_means, women_std = (25, 32, 34, 20, 25), (3, 5, 2, 3, 3)ind = np.arange(len(men_means))? # the x locations for the groupswidth = 0.35? # the width of the barsfig, ax = plt.subplots()rects1 = ax.bar(ind - width/2, men_means, width, yerr=men_std,? ? ? ? ? ? ? ? color='SkyBlue', label='Men')rects2 = ax.bar(ind + width/2, women_means, width, yerr=women_std,? ? ? ? ? ? ? ? color='IndianRed', label='Women')# Add some text for labels, title and custom x-axis tick labels, etc.ax.set_ylabel('Scores')ax.set_title('Scores by group and gender')ax.set_xticks(ind)ax.set_xticklabels(('G1', 'G2', 'G3', 'G4', 'G5'))ax.legend()def autolabel(rects, xpos='center'):? ? """? ? Attach a text label above each bar in *rects*, displaying its height.? ? *xpos* indicates which side to place the text w.r.t. the center of? ? the bar. It can be one of the following {'center', 'right', 'left'}.? ? """? ? xpos = xpos.lower()? # normalize the case of the parameter? ? ha = {'center': 'center', 'right': 'left', 'left': 'right'}? ? offset = {'center': 0.5, 'right': 0.57, 'left': 0.43}? # x_txt = x + w*off? ? for rect in rects:? ? ? ? height = rect.get_height()? ? ? ? ax.text(rect.get_x() + rect.get_width()*offset[xpos], 1.01*height,? ? ? ? ? ? ? ? '{}'.format(height), ha=ha[xpos], va='bottom')autolabel(rects1, "left")autolabel(rects2, "right")plt.show()
2. GAFAJA股票數據可視化分析
(谷歌,亞馬遜,Facebook,蘋果,京東,阿里巴巴)幾大互聯網科技巨頭的股票數據,可以直接從雅虎財經網站上下載,選擇一年的數據進行分析。根據分析我們可以看到科技大佬也隨著2018年經濟不景氣,受到了很大的影響。但是這也許對很多人來說又是一個最佳的投資時期!不過還是有一家公司做到了從年初到年末漲了24.3%得優秀業績。我們來看看是哪家?
數據下載地址:根據公司名字搜索一下,然后選擇Historcal Data,設置查詢期間,apply,然后download.
BABA Historical Prices | Alibaba Group Holding Limited A Stock - Yahoo Finance?finance.yahoo.com
#coding:utf-8%matplotlibinlineimportpandasaspdimportnumpyasnpimportmatplotlib.pyplotaspltappDF=pd.read_csv(r"Downloads\AAPL.csv")fbDF=pd.read_csv(r"Downloads\FB.csv")jdDF=pd.read_csv(r"Downloads\JD.csv")babaDF=pd.read_csv(r"Downloads\BABA.csv")amznDF=pd.read_csv(r"Downloads\AMZN.csv")googleDF=pd.read_csv(r"Downloads\GOOGL.csv")
APPLE
appDF.dtypesDateobjectOpenfloat64Highfloat64Lowfloat64Closefloat64AdjClosefloat64Volumeint64dtype:objectappDF['Date']=pd.to_datetime(appDF['Date'])# change datatypeappDF.set_index(appDF.Date,inplace=True)# reset indexappDF.indexDatetimeIndex(['2018-01-02','2018-01-03','2018-01-04','2018-01-05','2018-01-08','2018-01-09','2018-01-10','2018-01-11','2018-01-12','2018-01-16',...'2018-12-14','2018-12-17','2018-12-18','2018-12-19','2018-12-20','2018-12-21','2018-12-24','2018-12-26','2018-12-27','2018-12-28'],dtype='datetime64[ns]',name='Date',length=250,freq=None)appDF.info()<class'pandas.core.frame.DataFrame'>DatetimeIndex:250entries,2018-01-02to2018-12-28Datacolumns(total7columns):Date250non-nulldatetime64[ns]Open250non-nullfloat64High250non-nullfloat64Low250non-nullfloat64Close250non-nullfloat64AdjClose250non-nullfloat64Volume250non-nullint64dtypes:datetime64[ns](1),float64(5),int64(1)memoryusage:15.6KBappDF.describe()
Open High Low Close Adj Close Volume count 250.000000 250.000000 250.000000 250.000000 250.000000 2.500000e+02 mean 189.233760 191.120640 187.306320 189.178680 187.908454 3.401187e+07 std 20.456809 20.528269 20.387978 20.539151 20.836598 1.465514e+07 min 148.149994 151.550003 146.589996 146.830002 146.830002 1.251390e+07 25% 173.452503 174.962498 172.080002 173.472500 171.660324 2.351965e+07 50% 186.319999 187.534996 184.965003 186.180001 185.077881 3.161740e+07 75% 207.840000 209.437500 205.937496 207.875003 206.795952 4.077780e+07 max 230.779999 233.470001 229.779999 232.070007 231.263092 9.624670e+07
'''定義函數函數功能:計算股票漲跌幅=(現在股價-買入價格)/買入價格輸入參數:column是收盤價這一列的數據返回數據:漲跌幅'''defchange(column):#買入價格buyPrice=column[0]#現在股價#column.size是總共數據條數,序號是從0開始的,所以最后一條數據的序號是總數目-1curPrice=column[column.size-1]#累計漲跌幅priceChange=(curPrice-buyPrice)/buyPrice#判斷股票是上漲,還是下跌if(priceChange>0):print('股票累計上漲=',priceChange*100,'%')elif(priceChange==0):print('股票累沒有變化=',priceChange*100,'%')else:print('股票累計下跌',priceChange*100,'%')#返回數據returnpriceChangecloseCol=appDF['Close']appChange=change(closeCol)股票累計下跌-9.305700374599455%
fbDF.dtypesDateobjectOpenfloat64Highfloat64Lowfloat64Closefloat64AdjClosefloat64Volumeint64dtype:objectfbDF['Date']=pd.to_datetime(fbDF['Date'])# change datatypefbDF.set_index(fbDF.Date,inplace=True)# reset indexfbDF.indexDatetimeIndex(['2018-01-02','2018-01-03','2018-01-04','2018-01-05','2018-01-08','2018-01-09','2018-01-10','2018-01-11','2018-01-12','2018-01-16',...'2018-12-14','2018-12-17','2018-12-18','2018-12-19','2018-12-20','2018-12-21','2018-12-24','2018-12-26','2018-12-27','2018-12-28'],dtype='datetime64[ns]',name='Date',length=250,freq=None)fbDF.describe()
Open High Low Close Adj Close Volume count 250.000000 250.000000 250.000000 250.000000 250.000000 2.500000e+02 mean 171.621040 173.769240 169.460560 171.672640 171.672640 2.766240e+07 std 19.595486 19.305557 19.958815 19.852291 19.852291 1.920073e+07 min 123.099998 129.740005 123.019997 124.059998 124.059998 9.588600e+06 25% 157.847503 160.939995 156.077495 158.142494 158.142494 1.779380e+07 50% 175.010002 177.040001 172.875000 174.794998 174.794998 2.179760e+07 75% 184.922493 186.510006 183.424996 185.289998 185.289998 3.031708e+07 max 215.720001 218.619995 214.270004 217.500000 217.500000 1.698037e+08
closeCol=fbDF['Close']fbChange=change(closeCol)股票累計下跌-26.57920931076187%
JingDong
jdDF.dtypesDateobjectOpenfloat64Highfloat64Lowfloat64Closefloat64AdjClosefloat64Volumeint64dtype:objectjdDF['Date']=pd.to_datetime(jdDF['Date'])# change datatypejdDF.set_index(jdDF.Date,inplace=True)# reset indexjdDF.indexDatetimeIndex(['2018-01-02','2018-01-03','2018-01-04','2018-01-05','2018-01-08','2018-01-09','2018-01-10','2018-01-11','2018-01-12','2018-01-16',...'2018-12-14','2018-12-17','2018-12-18','2018-12-19','2018-12-20','2018-12-21','2018-12-24','2018-12-26','2018-12-27','2018-12-28'],dtype='datetime64[ns]',name='Date',length=250,freq=None)closeCol=jdDF['Close']jdChange=change(closeCol)股票累計下跌-49.838263628425686%
Alibaba
babaDF.dtypesDateobjectOpenfloat64Highfloat64Lowfloat64Closefloat64AdjClosefloat64Volumeint64dtype:objectbabaDF['Date']=pd.to_datetime(babaDF['Date'])# change datatypebabaDF.set_index(babaDF.Date,inplace=True)# reset indexbabaDF.indexDatetimeIndex(['2018-01-02','2018-01-03','2018-01-04','2018-01-05','2018-01-08','2018-01-09','2018-01-10','2018-01-11','2018-01-12','2018-01-16',...'2018-12-14','2018-12-17','2018-12-18','2018-12-19','2018-12-20','2018-12-21','2018-12-24','2018-12-26','2018-12-27','2018-12-28'],dtype='datetime64[ns]',name='Date',length=250,freq=None)closeCol=babaDF['Close']babaChange=change(closeCol)股票累計下跌-24.26354448996062%
AMAZON
amznDF.dtypesDateobjectOpenfloat64Highfloat64Lowfloat64Closefloat64AdjClosefloat64Volumeint64dtype:objectamznDF['Date']=pd.to_datetime(amznDF['Date'])# change datatypeamznDF.set_index(amznDF.Date,inplace=True)# reset indexamznDF.indexDatetimeIndex(['2018-01-02','2018-01-03','2018-01-04','2018-01-05','2018-01-08','2018-01-09','2018-01-10','2018-01-11','2018-01-12','2018-01-16',...'2018-12-14','2018-12-17','2018-12-18','2018-12-19','2018-12-20','2018-12-21','2018-12-24','2018-12-26','2018-12-27','2018-12-28'],dtype='datetime64[ns]',name='Date',length=250,freq=None)closeCol=amznDF['Close']amznChange=change(closeCol)股票累計上漲=24.306776862206565%(amznDF.Close[-1]-amznDF.Close[0])/amznDF.Close[0]*100# amazon24.306776862206565amznDF.head(1)
Date Open High Low Close Adj Close Volume Date 2018-01-02 2018-01-02 1172.0 1190.0 1170.51001 1189.01001 1189.01001 2694500
amznDF.tail(1)
Date Open High Low Close Adj Close Volume Date 2018-12-28 2018-12-28 1473.349976 1513.469971 1449.0 1478.02002 1478.02002 8825600
googleDF.dtypesDateobjectOpenfloat64Highfloat64Lowfloat64Closefloat64AdjClosefloat64Volumeint64dtype:objectgoogleDF['Date']=pd.to_datetime(googleDF['Date'])# change datatypegoogleDF.set_index(googleDF.Date,inplace=True)# reset indexgoogleDF.indexDatetimeIndex(['2018-01-02','2018-01-03','2018-01-04','2018-01-05','2018-01-08','2018-01-09','2018-01-10','2018-01-11','2018-01-12','2018-01-16',...'2018-12-14','2018-12-17','2018-12-18','2018-12-19','2018-12-20','2018-12-21','2018-12-24','2018-12-26','2018-12-27','2018-12-28'],dtype='datetime64[ns]',name='Date',length=250,freq=None)closeCol=googleDF['Close']googleChange=change(closeCol)股票累計下跌-2.472014607028055%babaDF.plot(x=babaDF.index,y='Close')#x坐標軸文本plt.xlabel('時間')#y坐標軸文本plt.ylabel('股價(美元)')#圖片標題plt.title('2018年阿里巴巴股價走勢')#顯示網格plt.grid(True)#顯示圖形plt.show()
babaDF.plot(x='Volume',y='Close',kind='scatter')#x坐標軸文本plt.xlabel('成交量')#y坐標軸文本plt.ylabel('股價(美元)')#圖片標題plt.title('成交量和股價')#顯示網格plt.grid(True)#顯示圖形plt.show()
babaDF.corr()
Open High Low Close Adj Close Volume Open 1.000000 0.995051 0.993075 0.985336 0.985336 -0.133665 High 0.995051 1.000000 0.994309 0.993553 0.993553 -0.106145 Low 0.993075 0.994309 1.000000 0.994654 0.994654 -0.168921 Close 0.985336 0.993553 0.994654 1.000000 1.000000 -0.145040 Adj Close 0.985336 0.993553 0.994654 1.000000 1.000000 -0.145040 Volume -0.133665 -0.106145 -0.168921 -0.145040 -0.145040 1.000000
#繪制谷歌的畫紙1ax1=googleDF.plot(x=googleDF.index,y='Close',label='谷歌')#通過指定畫紙ax,在同一張畫紙上繪圖#亞馬遜amznDF.plot(ax=ax1,x=amznDF.index,y='Close',label='亞馬遜')#FacebookfbDF.plot(ax=ax1,x=fbDF.index,y='Close',label='Facebook')#蘋果appDF.plot(ax=ax1,x=appDF.index,y='Close',label='蘋果')#阿里巴巴babaDF.plot(ax=ax1,x=babaDF.index,y='Close',label='阿里巴巴')#騰訊jdDF.plot(ax=ax1,x=jdDF.index,y='Close',label='京東')#x坐標軸文本plt.xlabel('時間')#y坐標軸文本plt.ylabel('股價(美元)')#圖片標題plt.title('2018年GAFATA股價累計漲幅比較')#顯示網格plt.grid(True)plt.show()
#繪制谷歌的畫紙1ax2=googleDF.plot(x=googleDF.index,y='Close',label='谷歌')#通過指定畫紙ax,在同一張畫紙上繪圖#亞馬遜amznDF.plot(ax=ax2,x=amznDF.index,y='Close',label='亞馬遜')plt.xlabel('時間')#y坐標軸文本plt.ylabel('股價(美元)')#圖片標題plt.title('2018年GAFATA股價累計漲幅比較')#顯示網格plt.grid(True)plt.show()
#Facebookax3=fbDF.plot(x=fbDF.index,y='Close',label='Facebook')#蘋果appDF.plot(ax=ax3,x=appDF.index,y='Close',label='蘋果')#阿里巴巴babaDF.plot(ax=ax3,x=babaDF.index,y='Close',label='阿里巴巴')#騰訊jdDF.plot(ax=ax3,x=jdDF.index,y='Close',label='京東')#x坐標軸文本plt.xlabel('時間')#y坐標軸文本plt.ylabel('股價(美元)')#圖片標題plt.title('2018年GAFATA股價累計漲幅比較')#顯示網格plt.grid(True)plt.show()
#6家公司股票收盤價平均值gafataMeanList=[googleDF['Close'].mean(),#谷歌amznDF['Close'].mean(),#亞馬遜fbDF['Close'].mean(),#FacebookappDF['Close'].mean(),#蘋果babaDF['Close'].mean(),#阿里巴巴jdDF['Close'].mean()#騰訊]#創建pandas一維數組SeriesgafataMeanSer=pd.Series(gafataMeanList,index=['谷歌','亞馬遜','Facebook','蘋果','阿里巴巴','騰訊'])gafataMeanSer.plot(kind='bar',label='GAFAJA')#圖片標題plt.title('2018年GAFAJA股價平均值')#x坐標軸文本plt.xlabel('公司名稱')#y坐標軸文本plt.ylabel('股價平均值(美元)')plt.grid(True)plt.show()
#存放6家公司的收盤價closeDf=pd.DataFrame()#合并6家公司的收盤價closeDf=pd.concat([closeDf,googleDF['Close'],#谷歌amznDF['Close'],#亞馬遜fbDF['Close'],#FacebookappDF['Close'],#蘋果babaDF['Close'],#阿里巴巴jdDF['Close']#JD],axis=1)#重命名列名為公司名稱closeDf.columns=['谷歌','亞馬遜','Facebook','APPLE','阿里巴巴','JD']closeDf.head()
谷歌 亞馬遜 Facebook APPLE 阿里巴巴 JD Date 2018-01-02 1073.209961 1189.010010 181.419998 172.259995 183.649994 43.279999 2018-01-03 1091.520020 1204.199951 184.669998 172.229996 184.000000 43.509998 2018-01-04 1095.760010 1209.589966 184.330002 173.029999 185.710007 43.669998 2018-01-05 1110.290039 1229.140015 186.850006 175.000000 190.699997 45.639999 2018-01-08 1114.209961 1246.869995 188.279999 174.350006 190.330002 46.099998
closeDf.plot(kind='box')plt.grid(True)plt.show()
股票總結:在2018年,京東的表現最差,股票總計快跌了一半,總市值還剩300億。亞馬遜表現最好,上漲24.3%,總市值7200多億。其他幾個股票都下行了!蘋果在去年最好的市值達到了一萬億,現在7400多億。