python與xgb做特征重要性分析

代碼

import pandas as pd
import xgboost as xgb
import operator

def get_data():
    train = pd.read_csv("first_result2.csv")
    #這里我只有12個特征
    features = list(train.columns[:11])
    y_train = train['target']
    #數(shù)據(jù)缺失值補全
    for feat in train.select_dtypes(include=['object']).columns:
        m = train.groupby([feat])['target'].mean()
        train[feat].replace(m,inplace=True)
    x_train = train[features]
    return x_train, y_train

 x_train, y_train = get_data()

#這里的參數(shù)自己改
xgb_params = {'booster':'gbtree','objective': 'binary:logistic', "eta": 0.01, "max_depth": 5,  "silent": 0,"colsample_bytree":0.7}
num_rounds = 1000

dtrain = xgb.DMatrix(x_train, label=y_train)
gbdt = xgb.train(xgb_params, dtrain, num_rounds)
importance = gbdt.get_fscore()
importance = sorted(importance.items(), key=operator.itemgetter(1))

print importance

結(jié)果

[('gender', 578), ('is_sys', 1202), ('is_font_cem', 1448), ('is_sup_cem', 1507), ('ite_phone_num', 1669), ('is_dou_kard', 1729), ('is_auto', 1796), ('age', 2235), ('now_month', 2596), (' avg_flow', 2914), ('avr_cost', 4149)]
最后編輯于
?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請聯(lián)系作者
平臺聲明:文章內(nèi)容(如有圖片或視頻亦包括在內(nèi))由作者上傳并發(fā)布,文章內(nèi)容僅代表作者本人觀點,簡書系信息發(fā)布平臺,僅提供信息存儲服務。

推薦閱讀更多精彩內(nèi)容