tensorflow 雙向LSTM搭建

單向LSTM搭建的步驟是一樣的:

  1. 設定超參數(隱藏向量的維度,學習率,輸入輸出維度等)
  2. 定義輸入輸出placeholder
  3. 定義網絡結構,寫好網絡計算語句(dynamic_run等,用于計算時生成LSTM單元的輸出狀態)
  4. 定義全連接層的權重和偏差,用于將LSTM單元狀態的輸出轉換成類別未規范化概率
  5. 計算輸出的未規范化概率
  6. 定義softmax層
  7. 定義損失
  8. 定義訓練優化器和優化操作
  9. sess.run()

下面給出代碼:

learning_rate = 0.1
state_size = 128 # hidden layer num of features
n_classes = 19
n_features = 23

#輸入
x = tf.placeholder(tf.float32, [None, None, n_features], name='input_placeholder')  #batch_size, time_step, feat_len
y = tf.placeholder(tf.float32, [None, None, n_classes], name='labels_placeholder')  #batch_size, time_step, n_classes

batch_size = tf.placeholder(tf.int32, (), name='batch_size')
time_steps = tf.placeholder(tf.int32, (), name='times_step')

#雙向rnn
lstm_fw_cell = tf.contrib.rnn.BasicLSTMCell(state_size)
lstm_bw_cell = tf.contrib.rnn.BasicLSTMCell(state_size)

init_fw = lstm_fw_cell.zero_state(batch_size, dtype=tf.float32)
init_bw = lstm_bw_cell.zero_state(batch_size, dtype=tf.float32)

weights = tf.get_variable("weights", [2 * state_size, n_classes], dtype=tf.float32,   #注意這里的維度
                         initializer = tf.random_normal_initializer(mean=0, stddev=1))
biases = tf.get_variable("biases", [n_classes], dtype=tf.float32, 
                        initializer = tf.random_normal_initializer(mean=0, stddev=1))

outputs, final_states = tf.nn.bidirectional_dynamic_rnn(lstm_fw_cell,
                                                       lstm_bw_cell,
                                                       x,
                                                       initial_state_fw = init_fw,
                                                       initial_state_bw = init_bw)

outputs = tf.concat(outputs, 2)   #將前向和后向的狀態連接起來
state_out = tf.matmul(tf.reshape(outputs, [-1, 2 * state_size]), weights) + biases  #注意這里的維度
logits = tf.reshape(state_out, [batch_size, time_steps, n_classes])

loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
            logits=logits, labels=y))  #計算交叉熵
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)  #優化方法
train_op = optimizer.minimize(loss_op)

#進行softmax計算
probs = tf.nn.softmax(logits, -1)   #-1也是默認值,表示在最后一維進行運算
predict = tf.argmax(probs, -1)#最大的概率在最后一維的哪一列,從0計數
                                #維度變為  batch_size * time_step

def train_network(num_epochs = 100):
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())  #初始化variable
        
        intervel = 5
        
        for epoch in range(num_epochs):
            #開始訓練
            for idx, (time_step, inputs, labels, idxes) in enumerate(get_dump_seq_data(1)):
                _= sess.run([train_op],
                           feed_dict = {x: inputs,
                                       y:labels,
                                       batch_size:len(inputs),
                                       time_steps: time_step})
            print("epoch %d train done" % epoch)
            #這一輪訓練完畢,計算損失值和準確率
            
            if epoch % intervel == 0 and epoch > 1:
                #訓練集誤差
                acc_record, total_df, total_acc, loss = compute_accuracy(sess, 1)  #這里是我自定義的函數,與整個架構關系不大
                #驗證集誤差
                acc_record_dev, total_df_dev, total_acc_dev, loss_dev = compute_accuracy(sess, 0)
                print("train_acc: %.6f, train_loss: %.6f; dev_acc: %.6f, dev_loss: %.6f" % (total_acc, loss, total_acc_dev, loss_dev))
                print("- " * 50)
                if num_epochs - epoch <= intervel:
                    return acc_record, total_df, acc_record_dev, total_df_dev
?著作權歸作者所有,轉載或內容合作請聯系作者
平臺聲明:文章內容(如有圖片或視頻亦包括在內)由作者上傳并發布,文章內容僅代表作者本人觀點,簡書系信息發布平臺,僅提供信息存儲服務。

推薦閱讀更多精彩內容