install.packages("randomForest")
#只有caret包才有混淆矩陣
#set.seed保證每次運(yùn)行代碼獲得相同的隨機(jī)樣本
npdata <- read.table("data.txt",header=T)
set.seed(123)
#抽取訓(xùn)練集和測試集
train_sub <- sample(nrow(npdata),0.7*nrow(npdata ))#隨機(jī)無放回抽取0.7樣本
train_data <- npdata[train_sub,]
test_data <- npdata[-train_sub,]
library(randomForest)
#導(dǎo)入數(shù)據(jù)
#隨機(jī)森林計(jì)算
set.seed(100)
np.rf <- randomForest(npl~.,data=train_data,importance=TRUE,ntree=50)
#使用訓(xùn)練集查看預(yù)測精度
np_predict <- predict(np.rf,train_data)
plot(train_data$npl,np_predict,main='訓(xùn)練集', xlab = 'npl',ylab = 'Predict')
abline(1,1)
#使用測試集評估預(yù)測性能
np_predict <- predict(np.rf,test_data)
plot(test_data$npl,np_predict,main='預(yù)測集', xlab = 'npl',ylab = 'Predict')
abline(1,1)
#查看擬合優(yōu)度
print(np.rf)
importance(np.rf)
varImpPlot(np.rf,main = "variable importance")
#最優(yōu)決策樹數(shù)量
plot(np.rf)
#繪制錯(cuò)誤率表,確定變量個(gè)數(shù)
result = rfcv(train_data[-1],train_data$npl,cv.fold = 10)
result$error.cv
with(result,plot(n.var,error.cv,log="x",type = "o",lwd=2))