ffmpeg将pcm_f32le编码为acc

247 阅读6分钟

ffmpeg相关的测试命令

#从mp4文件中提取pcm
ffmpeg -i input.mp4 -ar 44100 -ac 2 -f f32le output.pcm

#播放音频pcm
ffplay -ar 44100 -ac 2 -f f32le output.pcm

#ffmpeg将pcm转aac
ffmpeg -ar 44100 -ac 2 -f f32le -i output.pcm -c:a aac -b:a 192k output.aac

c语言实现将pcm_f32le编码为aac

#include<stdio.h>
#include<stdlib.h>
#include <libavcodec/codec.h>
#include <libavcodec/avcodec.h>

#define ADTS_HEADER_LEN 7

/*
  功能:pcm 转 acc
  pcm信息:
    Stream #0:0: Audio: pcm_f32le, 44100 Hz, 1 channels, flt, 1411 kb/s
  aac信息:
    Stream #0:0: Audio: aac (LC), 44100 Hz, mono, fltp, 68 kb/s
*/

static const int sampling_frequency_map[] = {
  96000,  // 0x0
  88200,  // 0x1
  64000,  // 0x2
  48000,  // 0x3
  44100,  // 0x4
  32000,  // 0x5
  24000,  // 0x6
  22050,  // 0x7
  16000,  // 0x8
  12000,  // 0x9
  11025,  // 0xa
  8000,   // 0xb
  7350,   // 0xc
  // 0xd e f是保留的
};

static int fill_adts_header(char * const adts_header_buf, const int data_length,
                const int profile, const int sample_rate,
                const int nb_channels)
{
  int sampling_frequency_index = 3; // 默认使用48000hz
  int adts_len = data_length + 7;   // data_length是负载长度,7是adts_header头部长度

  int sampling_frequency_size = sizeof(sampling_frequency_map) / sizeof(sampling_frequency_map[0]);
  int i = 0;
  for(i = 0; i < sampling_frequency_size; i++)
  {
      if(sampling_frequency_map[i] == sample_rate)
      {
          sampling_frequency_index = i; //得出当前采集索引
          break;
      }
  }
  if(i >= sampling_frequency_size)  //没有找到任何采集频率 索引,不正常
  {
      fprintf(stderr, "The sampling frequency of %d Hz is not supported.\n", sample_rate);
      return -1;
  }

    adts_header_buf[0] = 0xff;          //syncword:0xfff, 高8bits
    adts_header_buf[1] = 0xf0;          //syncword:0xfff, 低4bits
    adts_header_buf[1] |= (0 << 3);     //ID, MPEG Version:0 for MPEG-4, 1 for MPEG-2, 1bit
    adts_header_buf[1] |= (0 << 1);     //Layer:0(always 00), 2bits
    adts_header_buf[1] |= 1;            //protection absent:1, 1bit

    adts_header_buf[2] = (profile)<<6;  //profile:profile, 2bits
    adts_header_buf[2] |= (sampling_frequency_index & 0x0f)<<2; //sampling frequency index:sampling_frequency_index, 4bits
    adts_header_buf[2] |= (0 << 1);     //private bit:0, 1bit
    adts_header_buf[2] |= (nb_channels & 0x04)>>2;  //channel configuration:nb_channels, 高1bit

    adts_header_buf[3] = (nb_channels & 0x03)<<6; //channel configuration:nb_channels, 低2bits
    adts_header_buf[3] |= (0 << 5);               //original:0, 1bit
    adts_header_buf[3] |= (0 << 4);               //home:0, 1bit
    adts_header_buf[3] |= (0 << 3);               //copyright id bit:0, 1bit
    adts_header_buf[3] |= (0 << 2);               //copyright id start:0, 1bit
    adts_header_buf[3] |= ((adts_len & 0x1800) >> 11);         //frame length:value, 高2bits

    adts_header_buf[4] = (uint8_t)((adts_len & 0x7f8) >> 3);   //frame length:value, 中间8bits
    adts_header_buf[5] = (uint8_t)((adts_len & 0x7) << 5);     //frame length:value, 低3bits
    adts_header_buf[5] |= 0x1f; //buffer fullness:0x7ff, 高5bits
    adts_header_buf[6] = 0xfc;  //buffer fullness:0x7ff, 低6bits, 0xfc(0b11111100)
    // number_of_raw_data_blocks_in_frame:
    // 表示ADTS帧中有 number_of_raw_data_blocks_in_frame + 1个AAC原始帧。

    return 0;
}

/* check that a given sample format is supported by the encoder */
static int check_sample_fmt(const AVCodec *codec, enum AVSampleFormat sample_fmt)
{
  const enum AVSampleFormat *p = codec->sample_fmts;

  while (*p != AV_SAMPLE_FMT_NONE) {
    if (*p == sample_fmt)
      return 1;
    p++;
  }
  return 0;
}

static int encode(AVCodecContext *ctx, AVFrame *frame, AVPacket *pkt,
                   FILE *output)
{
  int ret;
 
  /* send the frame for encoding */
  ret = avcodec_send_frame(ctx, frame);
  if (ret < 0) {
    fprintf(stderr, "Error sending the frame to the encoder\n");
    return -1;
  }

  /* read all the available output packets (in general there may be any
    * number of them */
  while (ret >= 0) {
    ret = avcodec_receive_packet(ctx, pkt);
    if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
        return 0;
    else if (ret < 0) {
      fprintf(stderr, "Error encoding audio frame\n");
      return -1;
    }

    // 每次写入一包数据前,先写入一个ADTS头
    char adts_header_buf[ADTS_HEADER_LEN] = {0};
    fill_adts_header(adts_header_buf,
                pkt->size,
                ctx->profile,
                ctx->sample_rate, //采集频率索引
                ctx->ch_layout.nb_channels);  //取出一帧协议头出来
    fwrite(adts_header_buf, 1, ADTS_HEADER_LEN, output);  // 写adts header , ts流不适用,ts流分离出来的packet带了adts header

    fwrite(pkt->data, 1, pkt->size, output);

    av_packet_unref(pkt);
  }

  return 0;
}

/*
  功能:pcm转aac,采样率、通道数不变,且pcm文件只支持pcm_f32le,输出的aac文件的sample_fmt固定为fltp
  参数:
      pcm_file_path:输入的pcm文件路径
      aac_file_path:输出的aac文件路径
      sample_rate:pcm和aac的采样率
      nb_channels:pcm和aac的通道数
      bit_rate:AAC比特率(480000、128000、64000等)
  返回:成功返回0,失败返回-1
*/
int pcm_to_aac(const char * const pcm_file_path, const char * const aac_file_path,
               const int sample_rate, const int nb_channels, const int bit_rate)
{
  const AVCodec *codec = NULL;
  AVCodecContext *ctx = NULL;
  AVFrame *frame = NULL;
  AVPacket *pkt = NULL;
  int ret;
  

  // 打开 pcm file
  FILE *pcm_file = fopen(pcm_file_path, "rb");
  if (!pcm_file) {
    perror("open pcm file failed");
    ret = -1;
    goto end;
  }

  // 打开 aac file
  FILE *aac_file = fopen(aac_file_path, "wb");
  if (!aac_file) {
    perror("open aac file failed");
    ret = -1;
    goto end;
  }

  /* find the AAC encoder */
  codec = avcodec_find_encoder(AV_CODEC_ID_AAC);
  if (!codec) {
    fprintf(stderr, "AV_CODEC_ID_AAC codec not found\n");
    ret = -1;
    goto end;
  }

  ctx = avcodec_alloc_context3(codec);
  if (!ctx) {
    fprintf(stderr, "Could not allocate audio codec context\n");
    ret = -1;
    goto end;
  }

  /* aac 比特率,比特率越高,音质越好 */
  ctx->bit_rate = bit_rate;
  ctx->sample_rate = sample_rate;

  /* check that the encoder supports f32le pcm input */
  ctx->sample_fmt = AV_SAMPLE_FMT_FLTP;
  if (!check_sample_fmt(codec, ctx->sample_fmt)) {
    fprintf(stderr, "Encoder does not support sample format %s",
            av_get_sample_fmt_name(ctx->sample_fmt));
    ret = -1;
    goto end;
  }
 
  AVChannelLayout *ch_layout = NULL;
  if (nb_channels == 1) {
    ch_layout = &(AVChannelLayout)AV_CHANNEL_LAYOUT_MONO;
  } else if (nb_channels == 2)
  {
    ch_layout = &(AVChannelLayout)AV_CHANNEL_LAYOUT_STEREO;
  } else {
    fprintf(stderr, "nb_channels %d not support!", nb_channels);
    ret = -1;
    goto end;
  }

  ret = av_channel_layout_copy(&ctx->ch_layout, ch_layout);
  if (ret < 0) {
    fprintf(stderr, "av_channel_layout_copy() error!");
    ret = -1;
    goto end;
  }

  /* open it */
  ret = avcodec_open2(ctx, codec, NULL);
  if (ret < 0) {
    fprintf(stderr, "Could not open codec\n");
    ret = -1;
    goto end;
  }

  /* packet for holding encoded output */
  pkt = av_packet_alloc();
  if (!pkt) {
    fprintf(stderr, "Could not allocate the packet\n");
    ret = -1;
    goto end;
  }
 
  /* frame containing input raw audio */
  frame = av_frame_alloc();
  if (!frame) {
    fprintf(stderr, "Could not allocate audio frame\n");
    ret = -1;
    goto end;
  }

  frame->nb_samples = ctx->frame_size;
  frame->format = ctx->sample_fmt;
  ret = av_channel_layout_copy(&frame->ch_layout, &ctx->ch_layout);
  if (ret < 0) {
    fprintf(stderr, "av_channel_layout_copy() error!");
    ret = -1;
    goto end;
  }

  /* allocate the data buffers */
  ret = av_frame_get_buffer(frame, 0);
  if (ret < 0) {
    fprintf(stderr, "Could not allocate audio data buffers\n");
    ret = -1;
    goto end;
  }

  int bytes_frame = sizeof(float) * ctx->ch_layout.nb_channels * ctx->frame_size;

  printf("frame_size: %d\n", ctx->frame_size);
  printf("sample_fmt: %d, name: %s\n", ctx->sample_fmt, av_get_sample_fmt_name(ctx->sample_fmt));
  printf("ch_layout.nb_channels: %d\n", ctx->ch_layout.nb_channels);
  printf("bit_rate: %lld\n", ctx->bit_rate);
  printf("bytes_frame: %d\n", bytes_frame);

  float *tmp_buffer = (float*) malloc(bytes_frame);
  if (!tmp_buffer) {
    fprintf(stderr, "Could not allocate tmp_buffer\n");
    goto end;
  }
  printf("sizeof(tmp_buffer): %ld\n", sizeof(tmp_buffer));

  int bytes_read;
  int count;
  long long pcm_file_size = 0;
  while((bytes_read = fread(tmp_buffer, 1, bytes_frame, pcm_file)) > 0) {
    pcm_file_size += bytes_read;
    count++;
    if(count % 100 == 0) {
      printf("count: %d, bytes_read: %d\n", count, bytes_read);
    }

    // LR LR LR, pcm file 左右声道数据布局形式
    // LLL RRR,frame左右声道数据布局形式
    for (int ch = 0; ch < ctx->ch_layout.nb_channels; ch++) {
      float *data = (float*) frame->data[ch];

      for (int i = 0; i < ctx->frame_size; i++) {
        data[i] = tmp_buffer[ctx->ch_layout.nb_channels * i + ch];
      }
    }

    ret = encode(ctx, frame, pkt, aac_file);
    if (ret < 0) {
      fprintf(stderr, "encode failed!\n");
      ret = -1;
      goto end;
    }
  }
  
  printf("count: %d, bytes_read: %d, pcm_file_size: %lld\n", count, bytes_read, pcm_file_size);

  /* flush the encoder */
  ret = encode(ctx, NULL, pkt, aac_file);
  if (ret < 0) {
    fprintf(stderr, "flush the encoder failed!\n");
    ret = -1;
    goto end;
  }

  // 成功返回
  ret = 0;

end:
  // 释放资源
  if (pcm_file) {
    fclose(pcm_file);
  }
  if (aac_file) {
    fclose(aac_file);
  }

  if (ctx) {
    avcodec_free_context(&ctx);
  }
  if (frame) {
    av_frame_free(&frame);
  }
  if (pkt) {
    av_packet_free(&pkt);
  }
  if (tmp_buffer) {
    free(tmp_buffer);
    tmp_buffer = NULL;
  }

  return ret;
}

int main(int argc, char** argv)
{
  char *pcm_file_path = "../output.pcm";
  char *aac_file_path = "../output.aac";
  int sample_rate = 44100;
  int nb_channels = 2;
  int bit_rate = 128000;
  
  int ret = pcm_to_aac(pcm_file_path, aac_file_path, sample_rate, nb_channels, bit_rate);
  printf("main ret: %d\n", ret);

  return 0;
}