【音视频开发】6. FFmpeg API 封装 ADTS 音频流

39 阅读6分钟

使用 FFmpeg API 封装 ADTS 音频流

1、AAC 与 ADTS 与 ADIF

  • AAC:Advanced Audio Coding,由 MPEG-4 标准定义的有损⾳频压缩格式
  • ADTS:Audio Data Transport Stream,AAC ⾳频封装格式,允许每个帧都有头
  • ADIF:Audio Data Interchange Format,AAC ⾳频封装格式,只有一个整体的头

2、ADTS 帧解析

ADTS 帧组成
  • ADTS 帧由 7B 的头部和数据部分组成
  • 头的固定部分:前 28b,帧之间相同
  • 头的可变部分:后 28b,帧之间不同
ADTS 头固定部分
  • 同步字:用于识别帧的起始
// 12bits syncword: 0xfff
adts_header_buf[0] = 0xff;
adts_header_buf[1] = 0xf0;
  • 标准:MPEG-4 或 MPEG-2
// 1bit ID: 0 for MPEG-4, 1 for MPEG-2
adts_header_buf[1] |= (0 << 3);
  • layer
// 2bits layer: 0
adts_header_buf[1] |= (0 << 1);
  • CRC 校验标识
// 1bit protection absent: set to 1 if there is no CRC and 0 if there is CRC
adts_header_buf[1] |= 1;
  • AAC 级别
// 2bits profile
adts_header_buf[2] = (profile) << 6;
  • 采样率索引
// 4bits sampling_frequency_index
adts_header_buf[2] |= (sampling_frequency_index & 0x0f) << 2;

static const std::unordered_map<int, int> sampling_frequency_map = {
    {96000, 0x0},
    {88200, 0x1},
    {64000, 0x2},
    {48000, 0x3},
    {44100, 0x4},
    {32000, 0x5},
    {24000, 0x6},
    {22050, 0x7},
    {16000, 0x8},
    {12000, 0x9},
    {11025, 0xa},
    {8000,  0xb},
    {7350,  0xc}
};
  • 私有位:填 0
// 1bit private bit: 0
adts_header_buf[2] |= (0 << 1);
  • 声道数
// 3bits channel_configuration
adts_header_buf[2] |= (channels & 0x04) >> 2;
adts_header_buf[3] = (channels & 0x03) << 6;
  • original copy
// 1bit original_copy: 0
adts_header_buf[3] |= (0 << 5);
  • home
// 1bit home: 0
adts_header_buf[3] |= (0 << 4);
ADTS 头可变部分
  • 版权识别
// 1bit copyright_identification_bit: 0
adts_header_buf[3] |= (0 << 3);
// 1bit copyright_identification_start: 0
adts_header_buf[3] |= (0 << 2);
  • 帧长度:头 + 数据的总字节数
// 13bits aac_frame_length
adts_header_buf[3] |= ((aac_frame_length & 0x1800) >> 11);
adts_header_buf[4] = (uint8_t) ((aac_frame_length & 0x7f8) >> 3);
adts_header_buf[5] = (uint8_t) ((aac_frame_length & 0x7) << 5);
  • ADTS 缓冲区满度
// 11bits adts_buffer_fullness: 0x7ff for variable bitrate stream
adts_header_buf[5] |= 0x1f;
adts_header_buf[6] = 0xfc;
  • 携带的 AAC 数据帧数量:n + 1
// 2bits number_of_raw_data_blocks_in_frame: 0
// it indicates that there are 0+1 AAC original frames in the ADTS frame
adts_header_buf[6] |= 0x0;
3、代码实战 —— 从 mp4 中提取 ADTS 流
  • 需求:给定一个 含有 aac 编码音频的 mp4 文件,输出一个每个帧都含 adts 头的 aac 文件
  • 思路:提取每一帧的音频编码数据,添加 ADTS 头
  • 代码示例的环境:
    • 工具链:VS2022,C++20
    • 依赖1:ffmpeg7.1:avcodec,avformat,avutil
    • 依赖2:glog
extern "C" {
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
}

#include <glog/logging.h>

#include <cstdio>
#include <string>
#include <fstream>
#include <unordered_map>

static constexpr int ADTS_HEADER_LEN = 7;

// Thread-local buffer to store FFmpeg error string
thread_local static char error_buffer[AV_ERROR_MAX_STRING_SIZE] = {};

/**
 * @brief Convert FFmpeg error code to error string
 * @param error_code FFmpeg error code
 * @return error string
 */
static char *ErrorToString(const int error_code) {
    std::memset(error_buffer, 0, AV_ERROR_MAX_STRING_SIZE);
    return av_make_error_string(error_buffer, AV_ERROR_MAX_STRING_SIZE, error_code);
}

static const std::unordered_map<int, int> sampling_frequency_map = {
    {96000, 0x0},
    {88200, 0x1},
    {64000, 0x2},
    {48000, 0x3},
    {44100, 0x4},
    {32000, 0x5},
    {24000, 0x6},
    {22050, 0x7},
    {16000, 0x8},
    {12000, 0x9},
    {11025, 0xa},
    {8000,  0xb},
    {7350,  0xc}
};

/**
 * @brief Generate ADTS header
 * @param adts_header_buf ADTS header buffer
 * @param data_len data length
 * @param profile AAC profile
 * @param sample_rate sample rate
 * @param nb_channels channels
 * @return 0 on success, -1 on failure
 */
bool GenerateHeaderADTS(uint8_t *adts_header_buf, int data_len, int profile, int sample_rate, int nb_channels) {
    int sampling_frequency_index{};
    uint32_t aac_frame_length = data_len + ADTS_HEADER_LEN;

    auto it = sampling_frequency_map.find(sample_rate);
    if (it == sampling_frequency_map.end()) {
        LOG(ERROR) << "unsupported sample_rate " << sample_rate;
        return false;
    }
    sampling_frequency_index = it->second;

    /* ======================== */
    /* ADTS fixes header 28bits */
    /* ======================== */

    // 12bits syncword: 0xfff
    adts_header_buf[0] = 0xff;
    adts_header_buf[1] = 0xf0;

    // 1bit ID: 0 for MPEG-4, 1 for MPEG-2
    adts_header_buf[1] |= (0 << 3);

    // 2bits layer: 0
    adts_header_buf[1] |= (0 << 1);

    // 1bit protection absent: set to 1 if there is no CRC and 0 if there is CRC
    adts_header_buf[1] |= 1;

    // 2bits profile
    adts_header_buf[2] = (profile) << 6;

    // 4bits sampling_frequency_index
    adts_header_buf[2] |= (sampling_frequency_index & 0x0f) << 2;

    // 1bit private bit: 0
    adts_header_buf[2] |= (0 << 1);

    // 3bits channel_configuration
    adts_header_buf[2] |= (nb_channels & 0x04) >> 2;
    adts_header_buf[3] = (nb_channels & 0x03) << 6;

    // 1bit original_copy: 0
    adts_header_buf[3] |= (0 << 5);

    // 1bit home: 0
    adts_header_buf[3] |= (0 << 4);

    /* =========================== */
    /* ADTS variable header 28bits */
    /* =========================== */

    // 1bit copyright_identification_bit: 0
    adts_header_buf[3] |= (0 << 3);

    // 1bit copyright_identification_start: 0
    adts_header_buf[3] |= (0 << 2);

    // 13bits aac_frame_length
    adts_header_buf[3] |= ((aac_frame_length & 0x1800) >> 11);
    adts_header_buf[4] = (uint8_t) ((aac_frame_length & 0x7f8) >> 3);
    adts_header_buf[5] = (uint8_t) ((aac_frame_length & 0x7) << 5);

    // 11bits adts_buffer_fullness: 0x7ff for variable bitrate stream
    adts_header_buf[5] |= 0x1f;
    adts_header_buf[6] = 0xfc;

    // 2bits number_of_raw_data_blocks_in_frame: 0
    // it indicates that there are 0+1 AAC original frames in the ADTS frame
    adts_header_buf[6] |= 0x0;

    return true;
}

/**
 * @brief Extract AAC audio stream from input_file and save it to output_file
 * @param input_file input file
 * @param output_file output file
 * @return true on success, false on failure
 */
static bool InnerExtractAudioStreamADTS(AVFormatContext *fmt_ctx, AVPacket *pkt, std::ofstream &ofs) {
    if (!fmt_ctx || !pkt || !ofs) {
        return false;
    }

    int errnum{};
    int aac_stream_index = -1;

    // find aac audio stream
    AVStream *stream = nullptr;
    AVCodecParameters *codec_params = nullptr;
    for (int i = 0; i < fmt_ctx->nb_streams; i++) {
        stream = fmt_ctx->streams[i];
        codec_params = stream->codecpar;
        if (codec_params->codec_type == AVMEDIA_TYPE_AUDIO) {
            if (codec_params->codec_id == AV_CODEC_ID_AAC) {
                aac_stream_index = i;
                break;
            }
        }
    }
    if (aac_stream_index < 0) {
        LOG(ERROR) << "Could not find AAC audio stream";
        return false;
    }

    while (true) {
        if ((errnum = av_read_frame(fmt_ctx, pkt)) < 0) {
            if (errnum != AVERROR_EOF) {
                LOG(ERROR) << "Could not read frame: " << ErrorToString(errnum);
            } else {
                LOG(INFO) << "End of input file";
            }
            break;
        }

        if (pkt->stream_index != aac_stream_index) {
            av_packet_unref(pkt);
            continue;
        }

        uint8_t adts_header_buf[ADTS_HEADER_LEN] = {0};
        bool success = GenerateHeaderADTS(adts_header_buf,
                                          pkt->size,
                                          codec_params->profile,
                                          codec_params->sample_rate,
                                          codec_params->ch_layout.nb_channels);
        if (!success) {
            av_packet_unref(pkt);
            break;
        }
        if (!ofs.write(reinterpret_cast<char *>(adts_header_buf), ADTS_HEADER_LEN) ||
            !ofs.write(reinterpret_cast<char *>(pkt->data), pkt->size)) {
            LOG(ERROR) << "Could not write aac file: ofstream is broken";
            av_packet_unref(pkt);
            return false;
        }
        LOG(INFO) << "Extracted " << pkt->size << " bytes of AAC data";
        av_packet_unref(pkt);
    }

    return true;
}

/**
 * @brief Extract AAC audio stream from input_file and save it to output_file
 * @param input_file input file
 * @param output_file output file
 */
void ExtractAudioStreamADTS(const std::string &input_file, const std::string &output_file) {
    int errnum{};

    // open input_file
    AVFormatContext *fmt_ctx = nullptr;
    if ((errnum = avformat_open_input(&fmt_ctx, input_file.c_str(), nullptr, nullptr)) < 0) {
        LOG(ERROR) << "Could not open source file "" << input_file << "": " << ErrorToString(errnum);
        return;
    }
    if ((errnum = avformat_find_stream_info(fmt_ctx, nullptr)) < 0) {
        LOG(ERROR) << "Could not find stream information: " << ErrorToString(errnum);
        avformat_close_input(&fmt_ctx);
        return;
    }

    av_dump_format(fmt_ctx, 0, input_file.c_str(), 0);

    // open output_file
    std::ofstream ofs(output_file, std::ios::out | std::ios::binary);
    if (!ofs.is_open()) {
        LOG(ERROR) << "Could not open output file "" << output_file << """;
        avformat_close_input(&fmt_ctx);
        return;
    }

    // allocate AVPacket
    AVPacket *pkt = av_packet_alloc();
    if (pkt == nullptr) {
        LOG(ERROR) << "Could not allocate AVPacket: av_packet_alloc()";
        avformat_close_input(&fmt_ctx);
        return;
    }

    InnerExtractAudioStreamADTS(fmt_ctx, pkt, ofs);

    ofs.close();
    av_packet_free(&pkt);
    avformat_close_input(&fmt_ctx);
}

#if 0
int main(int argc, char *argv[]) {
    google::InitGoogleLogging(argv[0]);
    FLAGS_logtostderr = true;
    FLAGS_minloglevel = google::GLOG_INFO;

    ExtractAudioStreamADTS("input.mp4", "output.aac");

    google::ShutdownGoogleLogging();
    return 0;
}
#endif