ffplay.c 源碼分析- 音頻部分

FFmpeg 代碼 version 3.3:

ffplay中的線程模型

音頻的線程模型.png

概述

主要的負責音頻播放的線程,主要有如上幾個

1. 讀取線程-read_thread
在main方法中會啟動的讀取的線程。
這個和視頻的線程模型中是一致的。不同的是,循環讀取的數據是音頻數據。

  • 循環讀取
    這個線程中,會進行讀取的循環。不斷的通過av_read_frame方法,讀取解碼前的數據packet。
  • 送入隊列
    最后將得到的數據,送入對應的流的packet隊列(視頻/音頻/字幕都對應視頻流自己的隊列)

2. 對應流的解碼線程-audio - thread
在讀取線程中,對AVFormatContext進行初始化,獲取AVStream信息后,對應不同的碼流會開啟對應的解碼線程Decode Thread。
ffplay中這里包括了3種流。視頻流。音頻流和字幕流。

  • 循環讀取
    會從對應流的packet隊列中,得到數據。
    然后送入解碼器通過avcodec_decode_video2(舊的API)進行解碼。

  • 送入隊列
    解碼之后,得到解碼前的數據AVFrame,并確定對應的pts
    最后然后其再次送入隊列當中。

3.播放的設置
SDL的音頻播放,主要是設置一個audiocallback,在callback當中,將我們解碼后的數據設置給傳入buff地址中。
后續SDL會再將這個buff地址,傳給對應的音頻播放設備,進行播放。
具體可見 SDL2庫(4)-Android 端源碼簡要分析(AudioSubSystem) 文章中所述。

整體的流程就是這樣簡單。


音頻參數

定義了一個結構體,來簡單的保存音頻的參數。

typedef struct AudioParams {
    //sampleRate
    int freq;
    //聲道數
    int channels;
   //channel_layout 有什么不同嗎?
    int64_t channel_layout;
    //音頻的采樣格式
    enum AVSampleFormat fmt;
    //每一幀的大小= 采樣深度*聲道數    
int frame_size;
    //每一秒的字節數
    int bytes_per_sec;
} AudioParams;

ffplay初始化(main_thread)

進行初始化的整體流程,大部分和上一邊文章相似ffplay.c 源碼分析- 視頻部分
(其中包括對FFmpeg的初始化對傳遞的參數進行初始化SDL的初始化通過stream_open函數開啟read_thread讀取線程
這里就不做過多描述了。
具體來看一下音頻相關的部分。

開啟對應的解碼線程

打開stream_component_open對應的AVStream。打開解碼線程。
ffplay中對應三種碼流。(視頻、音頻和字幕,對應打開自己的解碼線程)

stream_component_open中的音頻部分

   switch (avctx->codec_type) {
    case AVMEDIA_TYPE_AUDIO:
        //忽略聲音濾鏡部分
        sample_rate    = avctx->sample_rate;
        nb_channels    = avctx->channels;
        channel_layout = avctx->channel_layout;

        /* prepare audio output */
        //打開音頻設備。返回的值是音頻設備中buffer的大小
        if ((ret = audio_open(is, channel_layout, nb_channels, sample_rate, &is->audio_tgt)) < 0)
            goto fail;
        //將當前的硬件參數保存下來
        is->audio_hw_buf_size = ret;
        is->audio_src = is->audio_tgt;
        is->audio_buf_size  = 0;
        is->audio_buf_index = 0;

        /* init averaging filter */
        //這個是用來進行音視頻同步的算法的部分。暫時不管
        is->audio_diff_avg_coef  = exp(log(0.01) / AUDIO_DIFF_AVG_NB);
        is->audio_diff_avg_count = 0;
        /* since we do not have a precise anough audio FIFO fullness,
           we correct audio sync only if larger than this threshold */
        is->audio_diff_threshold = (double)(is->audio_hw_buf_size) / is->audio_tgt.bytes_per_sec;

        is->audio_stream = stream_index;
        is->audio_st = ic->streams[stream_index];
        //初始化解碼器
        decoder_init(&is->auddec, avctx, &is->audioq, is->continue_read_thread);
        if ((is->ic->iformat->flags & (AVFMT_NOBINSEARCH | AVFMT_NOGENSEARCH | AVFMT_NO_BYTE_SEEK)) && !is->ic->iformat->read_seek) {
            is->auddec.start_pts = is->audio_st->start_time;
            is->auddec.start_pts_tb = is->audio_st->time_base;
        }
        //開啟音頻解碼線程
        if ((ret = decoder_start(&is->auddec, audio_thread, is)) < 0)
            goto out;
        //播放
        SDL_PauseAudioDevice(audio_dev, 0);
        break;

這里重點開看一下打開音頻流的部分
audio_open

static int audio_open(void *opaque, int64_t wanted_channel_layout, int wanted_nb_channels, int wanted_sample_rate, struct AudioParams *audio_hw_params)
{
    //這個結構體是SDL內部分音頻時,記錄音頻參數的結構體
    SDL_AudioSpec wanted_spec, spec;
    const char *env;
    static const int next_nb_channels[] = {0, 0, 1, 6, 2, 6, 4, 6};
    static const int next_sample_rates[] = {0, 44100, 48000, 96000, 192000};
    int next_sample_rate_idx = FF_ARRAY_ELEMS(next_sample_rates) - 1;

    env = SDL_getenv("SDL_AUDIO_CHANNELS");
    if (env) {
        wanted_nb_channels = atoi(env);
        wanted_channel_layout = av_get_default_channel_layout(wanted_nb_channels);
    }
    if (!wanted_channel_layout || wanted_nb_channels != av_get_channel_layout_nb_channels(wanted_channel_layout)) {
        wanted_channel_layout = av_get_default_channel_layout(wanted_nb_channels);
        wanted_channel_layout &= ~AV_CH_LAYOUT_STEREO_DOWNMIX;
    }
    //可以看到channels 的數量可以通過av_get_channel_layout_nb_channels來進行計算
    wanted_nb_channels = av_get_channel_layout_nb_channels(wanted_channel_layout);
    wanted_spec.channels = wanted_nb_channels;
    wanted_spec.freq = wanted_sample_rate;
    if (wanted_spec.freq <= 0 || wanted_spec.channels <= 0) {
        av_log(NULL, AV_LOG_ERROR, "Invalid sample rate or channel count!\n");
        return -1;
    }
    while (next_sample_rate_idx && next_sample_rates[next_sample_rate_idx] >= wanted_spec.freq)
        next_sample_rate_idx--;
    //這里的format是暫時寫死的AUDIO_S16SYS
    wanted_spec.format = AUDIO_S16SYS;
    wanted_spec.silence = 0;
    //samples 變量表示每一個聲道對應的每一秒的采樣的幀數
    wanted_spec.samples = FFMAX(SDL_AUDIO_MIN_BUFFER_SIZE, 2 << av_log2(wanted_spec.freq / SDL_AUDIO_MAX_CALLBACKS_PER_SEC));
   //SDL播放音頻的話,會將每次需要播放的數據,根據callback將數據位置傳遞給callback,
   //我們需要在CallBack中自定義對音頻數據的填充,就可以完成播放
    wanted_spec.callback = sdl_audio_callback;
    wanted_spec.userdata = opaque;
    //打開音頻設備
    while (!(audio_dev = SDL_OpenAudioDevice(NULL, 0, &wanted_spec, &spec, SDL_AUDIO_ALLOW_FREQUENCY_CHANGE | SDL_AUDIO_ALLOW_CHANNELS_CHANGE))) {
        av_log(NULL, AV_LOG_WARNING, "SDL_OpenAudio (%d channels, %d Hz): %s\n",
               wanted_spec.channels, wanted_spec.freq, SDL_GetError());
        wanted_spec.channels = next_nb_channels[FFMIN(7, wanted_spec.channels)];
        if (!wanted_spec.channels) {
            wanted_spec.freq = next_sample_rates[next_sample_rate_idx--];
            wanted_spec.channels = wanted_nb_channels;
            if (!wanted_spec.freq) {
                av_log(NULL, AV_LOG_ERROR,
                       "No more combinations to try, audio open failed\n");
                return -1;
            }
        }
        wanted_channel_layout = av_get_default_channel_layout(wanted_spec.channels);
    }
    if (spec.format != AUDIO_S16SYS) {
        av_log(NULL, AV_LOG_ERROR,
               "SDL advised audio format %d is not supported!\n", spec.format);
        return -1;
    }
    if (spec.channels != wanted_spec.channels) {
        wanted_channel_layout = av_get_default_channel_layout(spec.channels);
        if (!wanted_channel_layout) {
            av_log(NULL, AV_LOG_ERROR,
                   "SDL advised channel count %d is not supported!\n", spec.channels);
            return -1;
        }
    }
    //最后將傳遞回來的硬件設備的數據進行記錄
    audio_hw_params->fmt = AV_SAMPLE_FMT_S16;
    audio_hw_params->freq = spec.freq;
    audio_hw_params->channel_layout = wanted_channel_layout;
    audio_hw_params->channels =  spec.channels;
    audio_hw_params->frame_size = av_samples_get_buffer_size(NULL, audio_hw_params->channels, 1, audio_hw_params->fmt, 1);
    audio_hw_params->bytes_per_sec = av_samples_get_buffer_size(NULL, audio_hw_params->channels, audio_hw_params->freq, audio_hw_params->fmt, 1);
    if (audio_hw_params->bytes_per_sec <= 0 || audio_hw_params->frame_size <= 0) {
        av_log(NULL, AV_LOG_ERROR, "av_samples_get_buffer_size failed\n");
        return -1;
    }
    return spec.size;
}

這里最值得注意的就是
wanted_spec.callback = sdl_audio_callback;
SDL播放音頻的話,會將每次需要播放的數據,根據callback將數據位置傳遞給callback,
我們需要在callback中自定義對音頻數據的填充,就可以完成播放。

視頻解碼線程audio_thread

read_thread的中對應視頻流時,初始化好了AVCodecAVCodecContext。通過decoder_start方法,開啟了video_thread
video_thread中需要創建AVFrame來接受解碼后的數據,確定視頻的幀率。
然后開啟解碼循環。
不斷的從隊列中獲取解碼前的數據,然后送入解碼器解碼。
再得到解碼后的數據,在送入對應的隊列當中。

初始化參數

創建AVFrame和得到大致的視頻幀率

    //創建AVFrame
    AVFrame *frame = av_frame_alloc();
    //設置好time_base和frame_rate
    AVRational tb = is->video_st->time_base;
    // 猜測視頻幀率
    AVRational frame_rate = av_guess_frame_rate(is->ic, is->video_st, NULL);

開始循環解碼

解碼的方式和視頻播放相同。這兒就不做過多解釋了。解碼完,同樣送入隊列當中。

音頻設置部分

正如上面所述,我們需要在傳入的callback中對我們的數據進行處理。callback也同樣運行在SDL中創建的RunAudio線程。
SDL具體的運行方式,可見SDL2庫(4)-Android 端源碼簡要分析(AudioSubSystem);

/* prepare a new audio buffer */
static void sdl_audio_callback(void *opaque, Uint8 *stream, int len)
{
    VideoState *is = opaque;
    int audio_size, len1;

    audio_callback_time = av_gettime_relative();
    
    //len表示傳入的數據的長度,stream表示傳入的數組的指針(開始位置)
    while (len > 0) {
        if (is->audio_buf_index >= is->audio_buf_size) {
           //進行解碼。得到audio_size
           audio_size = audio_decode_frame(is);
          //小于0,則表示失敗
           if (audio_size < 0) {
                /* if error, just output silence */
               is->audio_buf = NULL;
               is->audio_buf_size = SDL_AUDIO_MIN_BUFFER_SIZE / is->audio_tgt.frame_size * is->audio_tgt.frame_size;
           } else {
               if (is->show_mode != SHOW_MODE_VIDEO)
                   update_sample_display(is, (int16_t *)is->audio_buf, audio_size);
               //記錄audio_buf_size
               is->audio_buf_size = audio_size;
           }
          //重置0
           is->audio_buf_index = 0;
        }
         //用len1 表示單次解碼的frame  的長度
        len1 = is->audio_buf_size - is->audio_buf_index;
        //最后送入的數據長度不能大于可用的空間
        if (len1 > len)
            len1 = len;
        //如果不進行音量調節,則可直接將音頻數據拷貝進入
        if (!is->muted && is->audio_buf && is->audio_volume == SDL_MIX_MAXVOLUME)
            memcpy(stream, (uint8_t *)is->audio_buf + is->audio_buf_index, len1);
        else {
            memset(stream, 0, len1);
            if (!is->muted && is->audio_buf)
                //通過SDL_MixAudioFormat進行混音。可用調整音量的大小
                SDL_MixAudioFormat(stream, (uint8_t *)is->audio_buf + is->audio_buf_index, AUDIO_S16SYS, len1, is->audio_volume);
        }
       //進行偏移,如果還有空間,則繼續解碼更多的幀送入
        len -= len1;
        stream += len1;
        is->audio_buf_index += len1;
    }
    
    //最后用audio_write_buf_size 來記錄這次一共解碼出來的buf size
    is->audio_write_buf_size = is->audio_buf_size - is->audio_buf_index;
    /* Let's assume the audio driver that is used by SDL has two periods. */
    if (!isnan(is->audio_clock)) {
        //同步音頻的時間鐘。
        set_clock_at(&is->audclk, is->audio_clock - (double)(2 * is->audio_hw_buf_size + is->audio_write_buf_size) / is->audio_tgt.bytes_per_sec, is->audio_clock_serial, audio_callback_time / 1000000.0);
        sync_clock_to_slave(&is->extclk, &is->audclk);
    }
}

audio_decode_frame

static int audio_decode_frame(VideoState *is)
{
    int data_size, resampled_data_size;
    int64_t dec_channel_layout;
    av_unused double audio_clock0;
    int wanted_nb_samples;
    Frame *af;

    if (is->paused)
        return -1;

    do {
        //從隊列中取得數據
        if (!(af = frame_queue_peek_readable(&is->sampq)))
            return -1;
        frame_queue_next(&is->sampq);
    } while (af->serial != is->audioq.serial);
    
    //計算data_size 
    data_size = av_samples_get_buffer_size(NULL, af->frame->channels,
                                           af->frame->nb_samples,
                                           af->frame->format, 1);
    
    //計算channel_layout 
    dec_channel_layout =
        (af->frame->channel_layout && af->frame->channels == av_get_channel_layout_nb_channels(af->frame->channel_layout)) ?
        af->frame->channel_layout : av_get_default_channel_layout(af->frame->channels);
    wanted_nb_samples = synchronize_audio(is, af->frame->nb_samples);
    
    //如果解碼的格式和目標的格式不同的話,則需要對音頻的數據進行轉碼
    if (af->frame->format        != is->audio_src.fmt            ||
        dec_channel_layout       != is->audio_src.channel_layout ||
        af->frame->sample_rate   != is->audio_src.freq           ||
        (wanted_nb_samples       != af->frame->nb_samples && !is->swr_ctx)) {
        swr_free(&is->swr_ctx);
        //創建和設置swr
        is->swr_ctx = swr_alloc_set_opts(NULL,
                                         is->audio_tgt.channel_layout, is->audio_tgt.fmt, is->audio_tgt.freq,
                                         dec_channel_layout,           af->frame->format, af->frame->sample_rate,
                                         0, NULL);
        if (!is->swr_ctx || swr_init(is->swr_ctx) < 0) {
            av_log(NULL, AV_LOG_ERROR,
                   "Cannot create sample rate converter for conversion of %d Hz %s %d channels to %d Hz %s %d channels!\n",
                    af->frame->sample_rate, av_get_sample_fmt_name(af->frame->format), af->frame->channels,
                    is->audio_tgt.freq, av_get_sample_fmt_name(is->audio_tgt.fmt), is->audio_tgt.channels);
            swr_free(&is->swr_ctx);
            return -1;
        }
        is->audio_src.channel_layout = dec_channel_layout;
        is->audio_src.channels       = af->frame->channels;
        is->audio_src.freq = af->frame->sample_rate;
        is->audio_src.fmt = af->frame->format;
    }
    
    //進行轉碼
    if (is->swr_ctx) {
        const uint8_t **in = (const uint8_t **)af->frame->extended_data;
        uint8_t **out = &is->audio_buf1;
        //這里加的256是什么意思?這個256是固定的嗎
        int out_count = (int64_t)wanted_nb_samples * is->audio_tgt.freq / af->frame->sample_rate + 256;
        int out_size  = av_samples_get_buffer_size(NULL, is->audio_tgt.channels, out_count, is->audio_tgt.fmt, 0);
        int len2;
        if (out_size < 0) {
            av_log(NULL, AV_LOG_ERROR, "av_samples_get_buffer_size() failed\n");
            return -1;
        }
        if (wanted_nb_samples != af->frame->nb_samples) {
            if (swr_set_compensation(is->swr_ctx, (wanted_nb_samples - af->frame->nb_samples) * is->audio_tgt.freq / af->frame->sample_rate,
                                        wanted_nb_samples * is->audio_tgt.freq / af->frame->sample_rate) < 0) {
                av_log(NULL, AV_LOG_ERROR, "swr_set_compensation() failed\n");
                return -1;
            }
        }
        av_fast_malloc(&is->audio_buf1, &is->audio_buf1_size, out_size);
        if (!is->audio_buf1)
            return AVERROR(ENOMEM);
        //進行轉換
        len2 = swr_convert(is->swr_ctx, out, out_count, in, af->frame->nb_samples);
        if (len2 < 0) {
            av_log(NULL, AV_LOG_ERROR, "swr_convert() failed\n");
            return -1;
        }
        if (len2 == out_count) {
            av_log(NULL, AV_LOG_WARNING, "audio buffer is probably too small\n");
            if (swr_init(is->swr_ctx) < 0)
                swr_free(&is->swr_ctx);
        }
        is->audio_buf = is->audio_buf1;
        //重新計算采樣的數據大小,并返回
        resampled_data_size = len2 * is->audio_tgt.channels * av_get_bytes_per_sample(is->audio_tgt.fmt);
    } else {
        is->audio_buf = af->frame->data[0];
        resampled_data_size = data_size;
    }

    audio_clock0 = is->audio_clock;
    /* update the audio clock with the pts */
    if (!isnan(af->pts))
        //更新pts  這個pts 等于當前的幀包含的所有幀數
        is->audio_clock = af->pts + (double) af->frame->nb_samples / af->frame->sample_rate;
    else
        is->audio_clock = NAN;
    is->audio_clock_serial = af->serial;
#ifdef DEBUG
    {
        static double last_clock;
        printf("audio: delay=%0.3f clock=%0.3f clock0=%0.3f\n",
               is->audio_clock - last_clock,
               is->audio_clock, audio_clock0);
        last_clock = is->audio_clock;
    }
#endif
    return resampled_data_size;
}

這里指的注意的有兩點:

  1. 如果解碼出來的音頻數據不是Ouput的類型,是進行轉碼的
  2. 設置時間戳。為當前一幀播放完,所有音頻數據的時間。
    在之前的sdl_audio_callback中,我們可以看到最后的同步時間戳。
  set_clock_at(&is->audclk, is->audio_clock - (double)(2 * is->audio_hw_buf_size + is->audio_write_buf_size) / is->audio_tgt.bytes_per_sec, is->audio_clock_serial, audio_callback_time / 1000000.0);

最后的傳入同步的時間戳,是我們完整的幀包含的時間戳-實際寫入的幀數+2個硬件buffer的延遲。
因為我們的寫入的時候,還需要考慮傳入的buffer的大小,預期情況下,如果buffer相同,則這里就是原來的pts-硬件延遲的時間。

?著作權歸作者所有,轉載或內容合作請聯系作者
平臺聲明:文章內容(如有圖片或視頻亦包括在內)由作者上傳并發布,文章內容僅代表作者本人觀點,簡書系信息發布平臺,僅提供信息存儲服務。

推薦閱讀更多精彩內容