from sklearn.preprocessing import MinMaxScaler# -*- coding: utf-8 -*-
import pandas
data = pandas.read_csv('D:\\PDM\\6.1\\data1.csv')
#Min-Max標準化
scaler = MinMaxScaler()
data['標準化累計票房'] = scaler.fit_transform(data['累計票房'])
data['標準化豆瓣評分'] = scaler.fit_transform(data['豆瓣評分'])
#Z-Score標準化
from sklearn.preprocessing import scale
data['標準化累計票房'] = scale(data['累計票房'])
data['標準化豆瓣評分'] = scale(data['豆瓣評分'])
#Normalizer歸一化
from sklearn.preprocessing import Normalizer
scaler = Normalizer()
data['歸一化累計票房'] = scaler.fit_transform(
data['累計票房']
)[0]
data['歸一化豆瓣評分'] = scaler.fit_transform(
data['豆瓣評分']
)[0]
# -*- coding: utf-8 -*-
import pandas
data = pandas.read_csv('D:\\PDM\\6.1\\data2.csv')
data['癥狀'] = data['癥狀'].astype('category')
dummiesData = pandas.get_dummies(
data,
columns=['癥狀'],
prefix=['癥狀'],
prefix_sep="_"
)
import pandas
data = pandas.read_csv('D:\\PDM\\6.1\\data3.csv')
from sklearn.preprocessing import Imputer;
#'mean', 'median', 'most_frequent'
imputer = Imputer(strategy='mean')
imputer.fit_transform(data[['累計票房']])