目錄
- 音頻解碼流程
- 解碼音頻為pcm
- 使用AudioTrack播放音頻
- 資料
- 收獲
上一篇我們了解了FFmpeg解碼流程、關鍵函數和結構體,實現了視頻解碼器。這篇我們來實現下音頻的解碼器。解碼流程和視頻的基本一致。FFmpeg解碼的音頻裸數據是PCM格式,android上播放PCM音頻數據可以通過AudioTrack和OpenSL ES來實現。
下面我們下來看下解碼的流程
一、音頻解碼流程
和上一篇的視頻解碼流程基本一致。需要注意的是音頻對音頻的重采樣,以及不同樣本格式的數據的排列方式
1.1 音頻解碼流程
- avformat_open_input 打開媒體文件
- avformat_find_stream_info 初始化AVFormatContext_
- 匹配到音頻流的index
- avcodec_find_decoder 根據音頻流信息的codec_id找到對應的解碼器_
- avcodec_open2 使用給定的AVCodec初始化AVCodecContext_
- 初始化輸出文件、解碼AVPacket和AVFrame結構體
- 申請重采樣SwrContext上下文并進行重采樣初始化
- av_read_frame 開始一幀一幀讀取
- avcodec_send_packet
- avcodec_receive_frame
- swr_convert重采樣
- 寫入到PCM文件或者使用AudioTrack、OpenSL ES進行播放
- 釋放資源
1.2 補充知識
音頻采樣格式
enum AVSampleFormat {
AV_SAMPLE_FMT_NONE = -1,
AV_SAMPLE_FMT_U8, ///< unsigned 8 bits
AV_SAMPLE_FMT_S16, ///< signed 16 bits
AV_SAMPLE_FMT_S32, ///< signed 32 bits
AV_SAMPLE_FMT_FLT, ///< float
AV_SAMPLE_FMT_DBL, ///< double
AV_SAMPLE_FMT_U8P, ///< unsigned 8 bits, planar
AV_SAMPLE_FMT_S16P, ///< signed 16 bits, planar
AV_SAMPLE_FMT_S32P, ///< signed 32 bits, planar
AV_SAMPLE_FMT_FLTP, ///< float, planar
AV_SAMPLE_FMT_DBLP, ///< double, planar
AV_SAMPLE_FMT_S64, ///< signed 64 bits
AV_SAMPLE_FMT_S64P, ///< signed 64 bits, planar
AV_SAMPLE_FMT_NB ///< Number of sample formats. DO NOT USE if linking dynamically
};
帶P和不帶P,關系到了AVFrame中的data的數據排列,不帶P,則是LRLRLRLRLR排列,帶P則是LLLLLRRRRR排列,若是雙通道則帶P則意味著data[0]全是L,data[1]全是R(注意:這是采樣點不是字節),PCM播放器播放的文件需要的是LRLRLRLR的。
二、解碼pcm代碼實現
具體實現見代碼和詳細注釋
#include <jni.h>
#include <string>
#include <unistd.h>
extern "C" {
#include "include/libavcodec/avcodec.h"
#include "include/libavformat/avformat.h"
#include "include/log.h"
#include <libswscale/swscale.h>
#include <libavutil/imgutils.h>
#include <libswresample/swresample.h>
}
extern "C"
JNIEXPORT jint JNICALL
Java_android_spport_mylibrary2_Demo_decodeAudio(JNIEnv *env, jobject thiz, jstring video_path,
jstring pcm_path) {
//申請avFormatContext空間,記得要釋放
AVFormatContext *pFormatContext = avformat_alloc_context();
const char *url = env->GetStringUTFChars(video_path, 0);
//1. 打開媒體文件
int result = avformat_open_input(&pFormatContext, url, NULL, NULL);
if (result != 0) {
LOGE("open input error url =%s,result=%d", url, result);
return -1;
}
//2.讀取媒體文件信息,給avFormatContext賦值
result = avformat_find_stream_info(pFormatContext, NULL);
if (result < 0) {
LOGE("open input avformat_find_stream_info,result=%d", result);
return -1;
}
////3. 匹配到音頻流的index
int audioIndex = -1;
for (int i = 0; i < pFormatContext->nb_streams; ++i) {
AVMediaType codecType = pFormatContext->streams[i]->codecpar->codec_type;
if (AVMEDIA_TYPE_AUDIO == codecType) {
audioIndex = i;
break;
}
}
if (audioIndex == -1) {
LOGE("not find a audio stream");
return -1;
}
AVCodecParameters *pCodecParameters = pFormatContext->streams[audioIndex]->codecpar;
//4. 根據流信息的codec_id找到對應的解碼器
AVCodec *pCodec = avcodec_find_decoder(pCodecParameters->codec_id);
if (pCodec == NULL) {
LOGE("Couldn`t find Codec");
return -1;
}
AVCodecContext *pCodecContext = pFormatContext->streams[audioIndex]->codec;
//5.使用給定的AVCodec初始化AVCodecContext
int openResult = avcodec_open2(pCodecContext, pCodec, NULL);
if (openResult < 0) {
LOGE("avcodec open2 result %d", openResult);
return -1;
}
const char *pcmPathStr = env->GetStringUTFChars(pcm_path, NULL);
//新建一個二進制文件,已存在的文件將內容清空,允許讀寫
FILE *pcmFile = fopen(pcmPathStr, "wb+");
if (pcmFile == NULL) {
LOGE(" fopen outPut file error");
return -1;
}
//6. 初始化輸出文件、解碼AVPacket和AVFrame結構體
auto *packet = (AVPacket *) av_malloc(sizeof(AVPacket));
AVFrame *pFrame = av_frame_alloc();
//7. 申請重采樣SwrContext上下文
SwrContext *swrContext = swr_alloc();
int numBytes = 0;
uint8_t *outData[2] = {0};
int dstNbSamples = 0; // 解碼目標的采樣率
int outChannel = 2; // 重采樣后輸出的通道
//帶P和不帶P,關系到了AVFrame中的data的數據排列,不帶P,則是LRLRLRLRLR排列,帶P則是LLLLLRRRRR排列,
// 若是雙通道則帶P則意味著data[0]全是L,data[1]全是R(注意:這是采樣點不是字節),PCM播放器播放的文件需要的是LRLRLRLR的。
//P表示Planar(平面),其數據格式排列方式為 (特別記住,該處是以點nb_samples采樣點來交錯,不是以字節交錯):
// LLLLLLRRRRRRLLLLLLRRRRRRLLLLLLRRRRRRL...(每個LLLLLLRRRRRR為一個音頻幀)
// 而不帶P的數據格式(即交錯排列)排列方式為:
// LRLRLRLRLRLRLRLRLRLRLRLRLRLRLRLRLRLRL...(每個LR為一個音頻樣本)
AVSampleFormat outFormat = AV_SAMPLE_FMT_S16P; // 重采樣后輸出的格式
int outSampleRate = 44100; // 重采樣后輸出的采樣率
// 通道布局與通道數據的枚舉值是不同的,需要av_get_default_channel_layout轉換
swrContext = swr_alloc_set_opts(0, // 輸入為空,則會分配
av_get_default_channel_layout(outChannel),
outFormat, // 輸出的采樣頻率
outSampleRate, // 輸出的格式
av_get_default_channel_layout(pCodecContext->channels),
pCodecContext->sample_fmt, // 輸入的格式
pCodecContext->sample_rate, // 輸入的采樣率
0,
0);
//重采樣初始化
int swrInit = swr_init(swrContext);
if (swrInit < 0) {
LOGE("swr init error swrInit=%d", swrInit);
return -1;
}
auto *outPcmBuffer = (uint8_t *) av_malloc(AVCODEC_MAX_AUDIO_FRAME_SIZE);
int frame_cnt = 0;
outData[0] = (uint8_t *) av_malloc(1152 * 8);
outData[1] = (uint8_t *) av_malloc(1152 * 8);
//8. 開始一幀一幀讀取
while (av_read_frame(pFormatContext, packet) >= 0) {
if (packet->stream_index == audioIndex) {
//9。將封裝包發往解碼器
int ret = avcodec_send_packet(pCodecContext, packet);
if (ret) {
LOGE("Failed to avcodec_send_packet(pAVCodecContext, pAVPacket) ,ret =%d", ret);
break;
}
// LOGI("av_read_frame");
// 10. 從解碼器循環拿取數據幀
while (!avcodec_receive_frame(pCodecContext, pFrame)) {
// nb_samples并不是每個包都相同,遇見過第一個包為47,第二個包開始為1152的
// 獲取每個采樣點的字節大小
numBytes = av_get_bytes_per_sample(outFormat);
//修改采樣率參數后,需要重新獲取采樣點的樣本個數
dstNbSamples = av_rescale_rnd(pFrame->nb_samples,
outSampleRate,
pCodecContext->sample_rate,
AV_ROUND_ZERO);
// 重采樣
swr_convert(swrContext,
outData,
dstNbSamples,
(const uint8_t **) pFrame->data,
pFrame->nb_samples);
LOGI("avcodec_receive_frame");
// 第一次顯示
static bool show = true;
if (show) {
LOGE("numBytes pFrame->nb_samples=%d dstNbSamples=%d,numBytes=%d,pCodecContext->sample_rate=%d,outSampleRate=%d", pFrame->nb_samples,
dstNbSamples,numBytes,pCodecContext->sample_rate,outSampleRate);
show = false;
}
// 使用LRLRLRLRLRL(采樣點為單位,采樣點有幾個字節,交替存儲到文件,可使用pcm播放器播放)
for (int index = 0; index < dstNbSamples; index++) {
// // 交錯的方式寫入, 大部分float的格式輸出 符合LRLRLRLR點交錯模式
for (int channel = 0;channel < pCodecContext->channels; channel++)
{
fwrite((char *) outData[channel] + numBytes * index, 1, numBytes, pcmFile);
}
}
av_packet_unref(packet);
}
frame_cnt++;
}
}
LOGI("frame count is %d", frame_cnt);
swr_free(&swrContext);
av_free(outPcmBuffer);
avcodec_close(pCodecContext);
avformat_close_input(&pFormatContext);
env->ReleaseStringUTFChars(video_path, url);
env->ReleaseStringUTFChars(pcm_path, pcmPathStr);
return 0;
}
三、使用AudioTrack播放PCM音頻
這一小節我們再上一小節解碼輸出PCM音頻數據的基礎上,再Native層調用Java層的AudioTrack進行完成音頻的播放。
在音視頻開發之旅(三)AudioTrack播放PCM音頻我們已經學習實踐過,我們簡單回顧下。
public AudioTrack(int streamType, int sampleRateInHz, int channelConfig, int audioFormat,
int bufferSizeInBytes, int mode)
其中采樣率sampleRateInHz、聲道數channelConfig、音頻格式audioFormat以及音頻緩沖區大小bufferSizeInBytes
來看參數streamType以及mode
streamType音頻流的類型,有如下幾種
AudioManager#STREAM_VOICE_CALL:電話聲音AudioManager#STREAM_SYSTEM:系統聲音
AudioManager#STREAM_RING:鈴聲
AudioManager#STREAM_MUSIC:音樂聲
AudioManager#STREAM_ALARM:鬧鈴聲
AudioManager#STREAM_NOTIFICATION:通知聲
這里我們使用的是AudioManager#STREAM_MUSIC。
下面我們重點看下mode
@param mode streaming or static buffer.
MODE_STATIC and MODE_STREAM
STATIC模式:一次性將所有的數據放到一個固定的buffer,然后直接傳送給AudioTrack,簡單有效,通常應用于播放鈴聲或者系統提示音等,占用內存較少的音頻數據
STREAM模式:一次一次的將音頻數據流寫入到AudioTrack對象中,并持續處于阻塞狀態,當數據從Java層到Native層執行播放完畢后才返回,這種方式可以避免由于音頻過大導致內存占用過多。當然對應的不足就是總是在java和native層進行交互,并且阻塞直到播放完畢,效率損失較大。
我們這里使用STREAM模式相關的方法類如下
package android.spport.mylibrary2;
import android.media.AudioFormat;
import android.media.AudioManager;
import android.media.AudioTrack;
import android.util.Log;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
public class AudioTrackStreamHelper {
private static final String TAG = "AudioTrackStreamHelper";
private AudioTrack audioTrack;
private int sampleRateInHz;
private int channels;
private int audioFormat;
private int bufferSize;
private int mode = -1;
private boolean hasPcmFile = false;
private File pcmFile;
private Thread audioTrackThread;
public void initAudioTrackParams(String path) {
sampleRateInHz = 44100;
channels = AudioFormat.CHANNEL_OUT_STEREO;
audioFormat = AudioFormat.ENCODING_PCM_16BIT;
bufferSize = AudioTrack.getMinBufferSize(sampleRateInHz, channels, audioFormat);
pcmFile = new File(path);//"raw.pcm"
if (pcmFile.exists()) {
hasPcmFile = true;
}
}
private int initAudioTrackWithMode(int mode, int bufferSize) {
if (audioTrack != null) {
audioTrack.release();
audioTrack.setPlaybackPositionUpdateListener(null);
audioTrack = null;
}
audioTrack = new AudioTrack(AudioManager.STREAM_MUSIC, sampleRateInHz, channels, audioFormat, bufferSize, mode);
if (audioTrack != null) {
Log.i(TAG, "initAudioTrackWithMode: state="+audioTrack.getState()+" playState="+audioTrack.getPlayState());
return audioTrack.getState();
}
return AudioTrack.STATE_UNINITIALIZED;
}
public boolean isHasPcmFile() {
return hasPcmFile;
}
public void play() {
releaseAudioTrack();
int state = initAudioTrackWithMode(AudioTrack.MODE_STREAM, bufferSize);
if (state == AudioTrack.STATE_UNINITIALIZED) {
Log.e(TAG, "run: state is uninit");
return;
}
audioTrackThread = new Thread(new Runnable() {
@Override
public void run() {
FileInputStream fileInputStream = null;
try {
fileInputStream = new FileInputStream(pcmFile);
byte[] buffer = new byte[bufferSize / 2];
int readCount;
Log.d(TAG, "run: ThreadId=" + Thread.currentThread() + " playState=" + audioTrack.getPlayState());
//stream模式,可以先調用play
audioTrack.play();
while (fileInputStream.available() > 0) {
readCount = fileInputStream.read(buffer);
if (readCount == AudioTrack.ERROR_BAD_VALUE || readCount == AudioTrack.ERROR_INVALID_OPERATION) {
continue;
}
if (audioTrack == null) {
return;
} else {
Log.i(TAG, "run: audioTrack.getState()" + audioTrack.getState() + " audioTrack.getPlayState()=" + audioTrack.getPlayState());
}
// audioTrack.getPlayState()
//一次一次的寫入pcm數據到audioTrack.由于是在子線程中進行write,快速連續點擊可能主線程觸發了stop或者release,導致子線程write異常:IllegalStateException: Unable to retrieve AudioTrack pointer for write()
//所以加playstate的判斷
if (readCount > 0 && audioTrack != null && audioTrack.getPlayState() == AudioTrack.PLAYSTATE_PLAYING && audioTrack.getState() == AudioTrack.STATE_INITIALIZED) {
audioTrack.write(buffer, 0, readCount);
}
}
} catch (IOException | IllegalStateException e) {
e.printStackTrace();
Log.e(TAG, "play: " + e.getMessage());
} finally {
if (fileInputStream != null) {
try {
fileInputStream.close();
} catch (IOException e) {
e.printStackTrace();
}
}
Log.d(TAG, "playWithStreamMode: end ThreadID=" + Thread.currentThread());
}
}
});
audioTrackThread.start();
}
public void pausePlay() {
if (audioTrack != null) {
if (audioTrack.getState() > AudioTrack.STATE_UNINITIALIZED) {
audioTrack.pause();
}
Log.d(TAG, "pausePlay: isPlaying false getPlayState= " + audioTrack.getPlayState());
}
if (audioTrackThread != null) {
audioTrackThread.interrupt();
}
}
private void releaseAudioTrack() {
if (audioTrack != null && audioTrack.getState() == AudioTrack.STATE_INITIALIZED) {
audioTrack.stop();
audioTrack.release();
Log.d(TAG, "pausePlay: isPlaying false");
}
if (audioTrackThread != null) {
audioTrackThread.interrupt();
}
}
public void destroy() {
if (audioTrack != null) {
audioTrack.release();
audioTrack = null;
}
if (audioTrackThread != null) {
audioTrackThread.interrupt();
audioTrackThread = null;
}
}
}
由于是Java代碼,可以在java層在直接調用,省去了JNI的消耗。
public class MainActivity extends AppCompatActivity {
private Demo demo;
AudioTrackStaticModeHelper audioTrackHelper;
AudioTrackStreamHelper audioTrackStreamHelper;
@Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.activity_main);
// Example of a call to a native method
TextView tv = findViewById(R.id.sample_text);
checkPermission();
demo = new Demo();
tv.setText(demo.stringFromJNI());
String folderurl= Environment.getExternalStorageDirectory().getPath();
File externalFilesDir = getExternalFilesDir(null);
Log.i("MainActivity", "externalFilesDir: "+externalFilesDir);
// demo.decodeVideo(folderurl+"/input.mp4", externalFilesDir+"/output7.yuv");
demo.decodeAudio(folderurl+"/input.mp4", externalFilesDir+"/audio.pcm");
initAudioTrackStreamMode(externalFilesDir);
}
private void initAudioTrackStreamMode(File externalFilesDir) {
audioTrackStreamHelper = new AudioTrackStreamHelper();
audioTrackStreamHelper.initAudioTrackParams(externalFilesDir+"/audio.pcm");
audioTrackStreamHelper.play();
}
}
由于我們FFmpeg解碼時同步的,所以可以采用這種方式,但是解碼本事是耗時操作,應該創建解碼線程,然后播放PCM時也可以直接送給AudioTrack進行播放,而不用先寫入到PCM文件再設置播放。這些都是可優化點。我們在后續音視頻同步時再進行優化。
代碼已上傳至github https://github.com/ayyb1988/ffmpegvideodecodedemo
歡迎交流,一起學習成長。
四、資料
- 《音視頻開發進階》
- ffmpeg主體架構分析
- FFmpeg開發筆記(七):ffmpeg解碼音頻保存為PCM并使用軟件播放
- Android NDK開發之旅35--FFmpeg+AudioTrack音頻播放
- 音視頻開發之旅(三)AudioTrack播放PCM音頻
五、收獲
- 了解音頻解碼流程
- 實現音頻解碼
- 解決由于沒有重采樣以及采樣輸出格式不對導致音頻播放聲音異常問題
- 使用AudioTrack的STRAM模式對解碼后的PCM進行播放
感謝你的閱讀
下一篇我們學習實踐另外一種音頻的播放方式(OpenSL ES),歡迎關注公眾號“音視頻開發之旅”,一起學習成長。
歡迎交流