ffmpeg 解码流程解析(2)

967 阅读7分钟

上一文讲到了avformat_open_input方法,主要是负责AVFormatContext结构体的初始化,根据url资源地址探测文件格式等准备工作

本文从紧接分析avformat_find_stream_info,头文件定义在libavformat/avformat.h

注释的大概意思是,通过读取若干个packet来获取流信息,对于MPEG这种没有 header 的文件格式比较有用,也可以计算像 MPEG-2 这种支持 repeat mode 的真实帧率。(MPEG-2 支持对于大量静止的画面设置 repeat mode,重复的帧不用编码和存储,可以减少体积)

这个方法并不会改变文件的逻辑位置,用于探测检查的buffer会被缓存下来为后面使用。

avformat_find_stream_info
/**
 * Read packets of a media file to get stream information. This
 * is useful for file formats with no headers such as MPEG. This
 * function also computes the real framerate in case of MPEG-2 repeat
 * frame mode.
 * The logical file position is not changed by this function;
 * examined packets may be buffered for later processing.
 *
 * @param ic media file handle
 * @param options  If non-NULL, an ic.nb_streams long array of pointers to
 *                 dictionaries, where i-th member contains options for
 *                 codec corresponding to i-th stream.
 *                 On return each dictionary will be filled with options that were not found.
 * @return >=0 if OK, AVERROR_xxx on error
 *
 * @note this function isn't guaranteed to open all the codecs, so
 *       options being non-empty at return is a perfectly normal behavior.
 *
 * @todo Let the user decide somehow what information is needed so that
 *       we do not waste time getting stuff the user does not need.
 */
int avformat_find_stream_info(AVFormatContext *ic, AVDictionary **options);
  • *ic:填充的上下文对象。
  • **options:可选字典项 实现在libavformat/utils中,这个函数是在是太长了,就不全部粘贴出来了,看一段分析一段
  max_stream_analyze_duration = max_analyze_duration;
  max_subtitle_analyze_duration = max_analyze_duration;
    if (!max_analyze_duration) {
        max_stream_analyze_duration =
        max_analyze_duration        = 5*AV_TIME_BASE;
        max_subtitle_analyze_duration = 30*AV_TIME_BASE;
        if (!strcmp(ic->iformat->name, "flv"))
            max_stream_analyze_duration = 90*AV_TIME_BASE;
        if (!strcmp(ic->iformat->name, "mpeg") || !strcmp(ic->iformat->name, "mpegts"))
            max_stream_analyze_duration = 7*AV_TIME_BASE;
    }+;
}

设置一些基本的常量,定义音视频流/字幕流的分析时长,还会根据文件格式的不同,设置的有所不同,比如flv/mpeg等格式,逻辑比较简单,接着往下看

for (;;) {
        const AVPacket *pkt;
        int analyzed_all_streams;
        if (ff_check_interrupt(&ic->interrupt_callback)) {
            ret = AVERROR_EXIT;
            av_log(ic, AV_LOG_DEBUG, "interrupted\n");
            // #1
            break;
        }

        /* check if one codec still needs to be handled */
        for (i = 0; i < ic->nb_streams; i++) {
            int fps_analyze_framecount = 20;
            int count;

            st = ic->streams[i];
            if (!has_codec_parameters(st, NULL))
                break;
            /* If the timebase is coarse (like the usual millisecond precision
             * of mkv), we need to analyze more frames to reliably arrive at
             * the correct fps. */
            if (av_q2d(st->time_base) > 0.0005)
                fps_analyze_framecount *= 2;
            if (!tb_unreliable(st->internal->avctx))
                fps_analyze_framecount = 0;
            if (ic->fps_probe_size >= 0)
                fps_analyze_framecount = ic->fps_probe_size;
            if (st->disposition & AV_DISPOSITION_ATTACHED_PIC)
                fps_analyze_framecount = 0;
            /* variable fps and no guess at the real fps */
            count = (ic->iformat->flags & AVFMT_NOTIMESTAMPS) ?
                       st->internal->info->codec_info_duration_fields/2 :
                       st->internal->info->duration_count;
            if (!(st->r_frame_rate.num && st->avg_frame_rate.num) &&
                st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
                if (count < fps_analyze_framecount)
                    break;
            }
            // Look at the first 3 frames if there is evidence of frame delay
            // but the decoder delay is not set.
            if (st->internal->info->frame_delay_evidence && count < 2 && st->internal->avctx->has_b_frames == 0)
                break;
            if (!st->internal->avctx->extradata &&
                (!st->internal->extract_extradata.inited ||
                 st->internal->extract_extradata.bsf) &&
                extract_extradata_check(st))
                break;
            if (st->first_dts == AV_NOPTS_VALUE &&
                !(ic->iformat->flags & AVFMT_NOTIMESTAMPS) &&
                st->codec_info_nb_frames < ((st->disposition & AV_DISPOSITION_ATTACHED_PIC) ? 1 : ic->max_ts_probe) &&
                (st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO ||
                 st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO))
                break;
        }
        analyzed_all_streams = 0;
        if (!missing_streams || !*missing_streams)
            if (i == ic->nb_streams) {
                analyzed_all_streams = 1;
                /* NOTE: If the format has no header, then we need to read some
                 * packets to get most of the streams, so we cannot stop here. */
                if (!(ic->ctx_flags & AVFMTCTX_NOHEADER)) {
                    /* If we found the info for all the codecs, we can stop. */
                    ret = count;
                    av_log(ic, AV_LOG_DEBUG, "All info found\n");
                    flush_codecs = 0;
                     // #2
                    break;
                }
            }
        /* We did not get all the codec info, but we read too much data. */
        if (read_size >= probesize) {
            ret = count;
            av_log(ic, AV_LOG_DEBUG,
                   "Probe buffer size limit of %"PRId64" bytes reached\n", probesize);
            for (i = 0; i < ic->nb_streams; i++)
                if (!ic->streams[i]->r_frame_rate.num &&
                    ic->streams[i]->internal->info->duration_count <= 1 &&
                    ic->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO &&
                    strcmp(ic->iformat->name, "image2"))
                    av_log(ic, AV_LOG_WARNING,
                           "Stream #%d: not enough frames to estimate rate; "
                           "consider increasing probesize\n", i);
            break;
        }

        /* NOTE: A new stream can be added there if no header in file
         * (AVFMTCTX_NOHEADER). */
        ret = read_frame_internal(ic, pkt1);
        if (ret == AVERROR(EAGAIN))
            continue;

        if (ret < 0) {
            /* EOF or error*/
            eof_reached = 1;
             // #3
            break;
        }

        if (!(ic->flags & AVFMT_FLAG_NOBUFFER)) {
            ret = avpriv_packet_list_put(&ic->internal->packet_buffer,
                                     &ic->internal->packet_buffer_end,
                                     pkt1, NULL, 0);
            if (ret < 0)
 				// #4
                goto unref_then_goto_end;

            pkt = &ic->internal->packet_buffer_end->pkt;
        } else {
            pkt = pkt1;
        }

        st = ic->streams[pkt->stream_index];
        if (!(st->disposition & AV_DISPOSITION_ATTACHED_PIC))
            read_size += pkt->size;

        avctx = st->internal->avctx;
        if (!st->internal->avctx_inited) {
            ret = avcodec_parameters_to_context(avctx, st->codecpar);
            if (ret < 0)
				 // #5
                goto unref_then_goto_end;
            st->internal->avctx_inited = 1;
        }

        if (pkt->dts != AV_NOPTS_VALUE && st->codec_info_nb_frames > 1) {
            /* check for non-increasing dts */
            if (st->internal->info->fps_last_dts != AV_NOPTS_VALUE &&
                st->internal->info->fps_last_dts >= pkt->dts) {
                av_log(ic, AV_LOG_DEBUG,
                       "Non-increasing DTS in stream %d: packet %d with DTS "
                       "%"PRId64", packet %d with DTS %"PRId64"\n",
                       st->index, st->internal->info->fps_last_dts_idx,
                       st->internal->info->fps_last_dts, st->codec_info_nb_frames,
                       pkt->dts);
                st->internal->info->fps_first_dts =
                st->internal->info->fps_last_dts  = AV_NOPTS_VALUE;
            }
            /* Check for a discontinuity in dts. If the difference in dts
             * is more than 1000 times the average packet duration in the
             * sequence, we treat it as a discontinuity. */
            if (st->internal->info->fps_last_dts != AV_NOPTS_VALUE &&
                st->internal->info->fps_last_dts_idx > st->internal->info->fps_first_dts_idx &&
                (pkt->dts - (uint64_t)st->internal->info->fps_last_dts) / 1000 >
                (st->internal->info->fps_last_dts     - (uint64_t)st->internal->info->fps_first_dts) /
                (st->internal->info->fps_last_dts_idx - st->internal->info->fps_first_dts_idx)) {
                av_log(ic, AV_LOG_WARNING,
                       "DTS discontinuity in stream %d: packet %d with DTS "
                       "%"PRId64", packet %d with DTS %"PRId64"\n",
                       st->index, st->internal->info->fps_last_dts_idx,
                       st->internal->info->fps_last_dts, st->codec_info_nb_frames,
                       pkt->dts);
                st->internal->info->fps_first_dts =
                st->internal->info->fps_last_dts  = AV_NOPTS_VALUE;
            }

            /* update stored dts values */
            if (st->internal->info->fps_first_dts == AV_NOPTS_VALUE) {
                st->internal->info->fps_first_dts     = pkt->dts;
                st->internal->info->fps_first_dts_idx = st->codec_info_nb_frames;
            }
            st->internal->info->fps_last_dts     = pkt->dts;
            st->internal->info->fps_last_dts_idx = st->codec_info_nb_frames;
        }
        if (st->codec_info_nb_frames>1) {
            int64_t t = 0;
            int64_t limit;

            if (st->time_base.den > 0)
                t = av_rescale_q(st->internal->info->codec_info_duration, st->time_base, AV_TIME_BASE_Q);
            if (st->avg_frame_rate.num > 0)
                t = FFMAX(t, av_rescale_q(st->codec_info_nb_frames, av_inv_q(st->avg_frame_rate), AV_TIME_BASE_Q));

            if (   t == 0
                && st->codec_info_nb_frames>30
                && st->internal->info->fps_first_dts != AV_NOPTS_VALUE
                && st->internal->info->fps_last_dts  != AV_NOPTS_VALUE) {
                int64_t dur = av_sat_sub64(st->internal->info->fps_last_dts, st->internal->info->fps_first_dts);
                t = FFMAX(t, av_rescale_q(dur, st->time_base, AV_TIME_BASE_Q));
            }

            if (analyzed_all_streams)                                limit = max_analyze_duration;
            else if (avctx->codec_type == AVMEDIA_TYPE_SUBTITLE) limit = max_subtitle_analyze_duration;
            else                                                     limit = max_stream_analyze_duration;

            if (t >= limit) {
                av_log(ic, AV_LOG_VERBOSE, "max_analyze_duration %"PRId64" reached at %"PRId64" microseconds st:%d\n",
                       limit,
                       t, pkt->stream_index);
                if (ic->flags & AVFMT_FLAG_NOBUFFER)
                    av_packet_unref(pkt1);
                break;
            }
            if (pkt->duration) {
                if (avctx->codec_type == AVMEDIA_TYPE_SUBTITLE && pkt->pts != AV_NOPTS_VALUE && st->start_time != AV_NOPTS_VALUE && pkt->pts >= st->start_time) {
                    st->internal->info->codec_info_duration = FFMIN(pkt->pts - st->start_time, st->internal->info->codec_info_duration + pkt->duration);
                } else
                    st->internal->info->codec_info_duration += pkt->duration;
                st->internal->info->codec_info_duration_fields += st->parser && st->need_parsing && avctx->ticks_per_frame ==2 ? st->parser->repeat_pict + 1 : 2;
            }
        }
        if (st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
#if FF_API_R_FRAME_RATE
            ff_rfps_add_frame(ic, st, pkt->dts);
#endif
            if (pkt->dts != pkt->pts && pkt->dts != AV_NOPTS_VALUE && pkt->pts != AV_NOPTS_VALUE)
                st->internal->info->frame_delay_evidence = 1;
        }
        if (!st->internal->avctx->extradata) {
            ret = extract_extradata(ic, st, pkt);
            if (ret < 0)
                goto unref_then_goto_end;
        }

        /* If still no information, we try to open the codec and to
         * decompress the frame. We try to avoid that in most cases as
         * it takes longer and uses more memory. For MPEG-4, we need to
         * decompress for QuickTime.
         *
         * If AV_CODEC_CAP_CHANNEL_CONF is set this will force decoding of at
         * least one frame of codec data, this makes sure the codec initializes
         * the channel configuration and does not only trust the values from
         * the container. */
        try_decode_frame(ic, st, pkt,
                         (options && i < orig_nb_streams) ? &options[i] : NULL);

        if (ic->flags & AVFMT_FLAG_NOBUFFER)
            av_packet_unref(pkt1);

        st->codec_info_nb_frames++;
        count++;
    }

这里是一个死循环,整理下逻辑,大概的流程如下

for (;;) {
    if 所有stream 满足 has_codec_parameters(st, ..)
       || probe_size > 设置值 {
        break 退出;
    } else {
        //继续读取 packet
        read_frame_internal(ic, &pkt1);
        //尝试对读取到的 packet 解码
        try_decode_frame(ic, st, pkt, ...);
    }
}

这样就清晰很多了,设置一个死循环,跳出的满足2大条件之一,不然就一直读取并解码packet

  • 所有的stream流,都满足has_codec_parameters(后面分析),大概意思就是满足了基本的流参数。
  • probe_size,也就是总共读取的数据大小,大于一个设置值就可以。
has_codec_parameters
static int has_codec_parameters(AVStream *st, const char **errmsg_ptr)
{
    AVCodecContext *avctx = st->internal->avctx;

#define FAIL(errmsg) do {                                         \
        if (errmsg_ptr)                                           \
            *errmsg_ptr = errmsg;                                 \
        return 0;                                                 \
    } while (0)

    if (   avctx->codec_id == AV_CODEC_ID_NONE
        && avctx->codec_type != AVMEDIA_TYPE_DATA)
        FAIL("unknown codec");
    switch (avctx->codec_type) {
    case AVMEDIA_TYPE_AUDIO:
        if (!avctx->frame_size && determinable_frame_size(avctx))
            FAIL("unspecified frame size");
        if (st->internal->info->found_decoder >= 0 &&
            avctx->sample_fmt == AV_SAMPLE_FMT_NONE)
            FAIL("unspecified sample format");
        if (!avctx->sample_rate)
            FAIL("unspecified sample rate");
        if (!avctx->channels)
            FAIL("unspecified number of channels");
        if (st->internal->info->found_decoder >= 0 && !st->internal->nb_decoded_frames && avctx->codec_id == AV_CODEC_ID_DTS)
            FAIL("no decodable DTS frames");
        break;
    case AVMEDIA_TYPE_VIDEO:
        if (!avctx->width)
            FAIL("unspecified size");
        if (st->internal->info->found_decoder >= 0 && avctx->pix_fmt == AV_PIX_FMT_NONE)
            FAIL("unspecified pixel format");
        if (st->codecpar->codec_id == AV_CODEC_ID_RV30 || st->codecpar->codec_id == AV_CODEC_ID_RV40)
            if (!st->sample_aspect_ratio.num && !st->codecpar->sample_aspect_ratio.num && !st->codec_info_nb_frames)
                FAIL("no frame in rv30/40 and no sar");
        break;
    case AVMEDIA_TYPE_SUBTITLE:
        if (avctx->codec_id == AV_CODEC_ID_HDMV_PGS_SUBTITLE && !avctx->width)
            FAIL("unspecified size");
        break;
    case AVMEDIA_TYPE_DATA:
        if (avctx->codec_id == AV_CODEC_ID_NONE) return 1;
    }

    return 1;
}

可以看到音频要检测是否拿到 frame size,sample format, sample rate, channels 等重要参数,视频则会检测视频的 width, pixel format 等等,而且一些需要dts信息校验,那就不可避免需要进行解码逻辑

read_frame_internal

读取帧的原始编码数据,这个方法比较重要,而且相对复杂点,后面单独出一章来讲一下,这里先跳过

try_decode_frame

从方法名应该也能看出来,尝试着去解码一些帧数据

/* returns 1 or 0 if or if not decoded data was returned, or a negative error */
static int try_decode_frame(AVFormatContext *s, AVStream *st,
                            const AVPacket *avpkt, AVDictionary **options)
{
    AVCodecContext *avctx = st->internal->avctx;
    const AVCodec *codec;
    int got_picture = 1, ret = 0;
    //申请帧数据结构体内存
    AVFrame *frame = av_frame_alloc();
    //字幕
    AVSubtitle subtitle;
    AVPacket pkt = *avpkt;
    int do_skip_frame = 0;
    enum AVDiscard skip_frame;

    if (!frame)
        return AVERROR(ENOMEM);
	#1
    if (!avcodec_is_open(avctx) &&
        st->internal->info->found_decoder <= 0 &&
        (st->codecpar->codec_id != -st->internal->info->found_decoder || !st->codecpar->codec_id)) {
        AVDictionary *thread_opt = NULL;

        codec = find_probe_decoder(s, st, st->codecpar->codec_id);

        if (!codec) {
            st->internal->info->found_decoder = -st->codecpar->codec_id;
            ret                     = -1;
            goto fail;
        }

        /* Force thread count to 1 since the H.264 decoder will not extract
         * SPS and PPS to extradata during multi-threaded decoding. */
        av_dict_set(options ? options : &thread_opt, "threads", "1", 0);
        /* Force lowres to 0. The decoder might reduce the video size by the
         * lowres factor, and we don't want that propagated to the stream's
         * codecpar */
        av_dict_set(options ? options : &thread_opt, "lowres", "0", 0);
        if (s->codec_whitelist)
            av_dict_set(options ? options : &thread_opt, "codec_whitelist", s->codec_whitelist, 0);
        ret = avcodec_open2(avctx, codec, options ? options : &thread_opt);
        if (!options)
            av_dict_free(&thread_opt);
        if (ret < 0) {
            st->internal->info->found_decoder = -avctx->codec_id;
            goto fail;
        }
        st->internal->info->found_decoder = 1;
    } else if (!st->internal->info->found_decoder)
        st->internal->info->found_decoder = 1;

    if (st->internal->info->found_decoder < 0) {
        ret = -1;
        goto fail;
    }

    if (avpriv_codec_get_cap_skip_frame_fill_param(avctx->codec)) {
        do_skip_frame = 1;
        skip_frame = avctx->skip_frame;
        avctx->skip_frame = AVDISCARD_ALL;
    }

    while ((pkt.size > 0 || (!pkt.data && got_picture)) &&
           ret >= 0 &&
           (!has_codec_parameters(st, NULL) || !has_decode_delay_been_guessed(st) ||
            (!st->codec_info_nb_frames &&
             (avctx->codec->capabilities & AV_CODEC_CAP_CHANNEL_CONF)))) {
        got_picture = 0;
        if (avctx->codec_type == AVMEDIA_TYPE_VIDEO ||
            avctx->codec_type == AVMEDIA_TYPE_AUDIO) {
            ret = avcodec_send_packet(avctx, &pkt);
            if (ret < 0 && ret != AVERROR(EAGAIN) && ret != AVERROR_EOF)
                break;
            if (ret >= 0)
                pkt.size = 0;
            ret = avcodec_receive_frame(avctx, frame);
            if (ret >= 0)
                got_picture = 1;
            if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
                ret = 0;
        } else if (avctx->codec_type == AVMEDIA_TYPE_SUBTITLE) {
            ret = avcodec_decode_subtitle2(avctx, &subtitle,
                                           &got_picture, &pkt);
            if (got_picture)
                avsubtitle_free(&subtitle);
            if (ret >= 0)
                pkt.size = 0;
        }
        if (ret >= 0) {
            if (got_picture)
                st->internal->nb_decoded_frames++;
            ret       = got_picture;
        }
    }

    if (!pkt.data && !got_picture)
        ret = -1;

fail:
    if (do_skip_frame) {
        avctx->skip_frame = skip_frame;
    }

    av_frame_free(&frame);
    return ret;
}
  • 判断是否已经打开 avcodec(条件为: AVCodecInternal 是否存在,found_decoder,codec_id)
  • 如果未初始化decoder,则从codec_list[]查找与 codec_id 匹配的 AVCodec 结构体对象
  • 调用 avcodec_open2() 打开及初始化 codec
  • 判断是否需要跳过 1 帧
  • 循环开始解码,如果是videoaudio ,发 packet 进入解码队列进行解码,并接受解码后的帧数据,解码 字幕数据

总结一下,这个方法avformat_find_stream_info,主要是通过对文件流的一些探测,来判断是否达到了视频播放的条件,所以,如果需要尽量提升首屏秒开的效果,这个流程可以控制不要花费太多时间,比如探测的数据大小probe_size,探测的时长duration等。