def __randomSampling(self,df_index,scale):
try:
len_df_index = len(df_index)
df_index_choice = np.random.choice(df_index, int( len_df_index* scale),p=[1/(len_df_index)]*len_df_index,replace=False)
# print (df_index_choice)
return df_index_choice
except Exception as e:
print (e)
return None
def RandomSampling(self,scale):
"""隨機抽樣
"""
df_choice_index = self.__randomSampling(self.df.index,scale)
df_choice = self.df.iloc[df_choice_index,:]
df_not_choice = self.df.iloc[-(self.df.index.isin(df_choice.index))]
return (df_choice,df_not_choice)
def __repetitionRandomSampling(self,df_index,scale):
try:
df_index_choice=df_index[np.random.randint(0,len(df_index)-1,size=int(len(df_index) * scale))]
return df_index_choice
except Exception as e:
print (e)
return None
def RepetitionRandomSampling(self,scale):
"""重復隨機抽樣
"""
df_choice_index = self.__repetitionRandomSampling(self.df.index,scale)
df_choice = self.df.iloc[df_choice_index,:]
df_not_choice = self.df.iloc[-(self.df.index.isin(df_choice.index))]
return (df_choice,df_not_choice)
def __systematicSampling(self,df_index,scale):
df_index_choice = []
try:
len_df_index = len(df_index)
len_choice = int(len_df_index * scale)
index = 0
k = 1/scale
while len(df_index_choice)<len_choice:
df_index_choice.append(df_index[int(0+index*k) % len_df_index])
index = index + 1
return df_index_choice
except Exception as e:
print (e)
return None
def SystematicSampling(self,scale):
"""系統抽樣
"""
df_choice_index = self.__systematicSampling(self.df.index,scale)
df_choice = self.df.iloc[df_choice_index,:]
df_not_choice = self.df.iloc[-(self.df.index.isin(df_choice.index))]
return (df_choice,df_not_choice)
- 分層抽樣
先按對觀察指標影響較大的某種特征,將總體分為若干個類別,再從每一層內按上述抽樣方法抽取一定比例的觀察單位,合起來組成樣本。
def StratifiedSampling(self,sampling_type,scale):
"""分層抽取樣本
Args:
sampling_type: 隨機類型,僅支持 rs,rrs,ss,分別是隨機抽樣,重復隨機抽樣,系統抽樣
scale:抽取樣本比例,值域為 (0,1)
"""
df_choice = None
df_values = list(set(self.df_col[0].values))
for i in range(len(df_values)):
df_index = self.df_col[self.df_col[0]==df_values[i]].index
if sampling_type == 'rs':
df_choice_index = self.__randomSampling(df_index,scale)
elif sampling_type == 'rrs':
df_choice_index = self.__repetitionRandomSampling(df_index,scale)
elif sampling_type == 'ss':
df_choice_index = self.__systematicSampling(df_index,scale)
else :
raise Exception('不支持的隨機類型。')
if df_choice is None:
df_choice = self.df.iloc[df_choice_index]
else:
df_temp = self.df.iloc[df_choice_index]
df_choice=df_choice.append(df_temp)
df_not_choice = self.df.iloc[-(self.df.index.isin(df_choice.index))]
return (df_choice,df_not_choice)
最后編輯于 :
?著作權歸作者所有,轉載或內容合作請聯系作者
平臺聲明:文章內容(如有圖片或視頻亦包括在內)由作者上傳并發布,文章內容僅代表作者本人觀點,簡書系信息發布平臺,僅提供信息存儲服務。