【音视频开发】5. FFmpeg API 内存模型 —— AVPacket 和 AVFrame本文讲解了 FFmpeg

FFmpeg API 内存模型 —— AVPacket 和 AVFrame

1、引用计数原理

AVPacket 结构体

AVPacket 主要用于存储压缩的音视频数据，解复用后/解码前、编码后/复用前
包含缓冲区信息、显示事件戳、解码时间戳等信息

typedef struct AVPacket {
    AVBufferRef *buf;
    int64_t pts;
    int64_t dts;
    uint8_t *data;
    int size;
    // ...
} AVPacket;

typedef struct AVBufferRef {
    AVBuffer *buffer;
    uint8_t *data;
    size_t   size;
} AVBufferRef;

内存的的申请和释放

const char *str = "Hello, World!";
auto size = static_cast<int>(std::strlen(str) + 1);

AVPacket *packet = av_packet_alloc(); // allocate AVPacket
av_new_packet(packet, size); // allocate AVBufferRef

std::memcpy(packet->data, str, size);
fprintf(stderr, "packet->data: \"%s\"\n", packet->data); // Hello, World!
fprintf(stderr, "packet->size: %d\n", packet->size); // 14

int ref_count = av_buffer_get_ref_count(packet->buf); // get reference count
fprintf(stderr, "reference count: %d\n", ref_count); // 1

av_packet_unref(packet); // free AVBufferRef
av_packet_free(&packet); // free AVPacket

浅拷贝

原理是增加引用计数，av_packet_clone = av_packet_alloc() + av_packet_ref()

constexpr int size = 1024;
int ref_count{};

AVPacket *packet1 = av_packet_alloc();
av_new_packet(packet1, size);

ref_count = av_buffer_get_ref_count(packet1->buf);
fprintf(stderr, "reference count: %d\n", ref_count); // 1

AVPacket *packet2 = av_packet_alloc();
av_packet_ref(packet2, packet1); // shallow copy

ref_count = av_buffer_get_ref_count(packet1->buf);
fprintf(stderr, "reference count: %d\n", ref_count); // 2
ref_count = av_buffer_get_ref_count(packet2->buf);
fprintf(stderr, "reference count: %d\n", ref_count); // 2

AVPacket *packet3 = av_packet_clone(packet1); // shallow copy
ref_count = av_buffer_get_ref_count(packet3->buf);
fprintf(stderr, "reference count: %d\n", ref_count); // 3

av_packet_unref(packet1);
av_packet_unref(packet2);
ref_count = av_buffer_get_ref_count(packet3->buf);
fprintf(stderr, "reference count: %d\n", ref_count); // 1

av_packet_unref(packet3);
av_packet_free(&packet1);
av_packet_free(&packet2);
av_packet_free(&packet3);

引用移动

constexpr int size = 1024;
int ref_count{};

AVPacket *packet1 = av_packet_alloc();
av_new_packet(packet1, size);

ref_count = av_buffer_get_ref_count(packet1->buf);
fprintf(stderr, "reference count: %d\n", ref_count); // 1

AVPacket *packet2 = av_packet_alloc();
av_packet_move_ref(packet2, packet1); // move AVBufferRef
fprintf(stderr, "!packet1->buf: %d\n", !packet1->buf); // 1

ref_count = av_buffer_get_ref_count(packet2->buf);
fprintf(stderr, "reference count: %d\n", ref_count); // 1

av_packet_unref(packet2);
av_packet_free(&packet2);
av_packet_free(&packet1);

2、AVFrame 内存结构

YUV 视频格式

Y 亮度参量（黑白画面），UV 色度参量（U蓝V红）
packed 打包格式：每个像素点的 YUV 分量交叉排列
planar 平面格式：使用三个数组分开连续地存放 YUV 三个分量
采样表示：444（1Y ----1UV）、422（2Y----1UV，水平除二）、420（4Y----1UV，水平垂直都除二）
Stride 对齐：假设 100 × 100、16对齐，RGB 300 => 304，Y 100 => 112，U 50 => 64，V 50 =>64

PCM 音频格式

packed 打包格式：左右声道的样本交替存储， L1 R1 L2 R2 L3 R3 L4 R4
planar 平面格式：左右声道的样本分别连续存储，L1 L2 L3 L4 ......R1 R2 R3 R4

AVFrame 结构体

AVFrame 主要用于存储解码后的原始音视频数据，解码后/编码前，引用计数原理与 AVPacket 类似
包括：视频帧的长度宽度、plane数据数组、行长度数组、每个声道的样本数、格式等
plane：一片连续的缓冲区
data[]
- packed 视频：YUV 交织存储在 data[0]；
- planar 视频：data[0] 指向 Y-plane，data[1] 指向 U-plane，data[2] 指向 V-plane
- packed 音频：LR 交织存储在 data[0]
- planar 音频：data[0] 指向 L-plane，data[1] 指向 R-plane
linesize[]
- packed 视频：linesize[0] 表示一行图像所占空间，需 stride 对齐
- planar 视频：linesize[i] 表示一行图像在当前 plane 所占空间
- 音频：仅可设置 linesize[0]，表示一个音频 plane 的大小

typedef struct AVFrame {
    uint8_t *data[AV_NUM_DATA_POINTERS];
    int linesize[AV_NUM_DATA_POINTERS];
    int width, height;
    int nb_samples;
    int format;
    AVBufferRef *buf[AV_NUM_DATA_POINTERS];
    // ......
} AVFrame;

typedef struct AVBufferRef {
    AVBuffer *buffer;
    uint8_t *data;
    size_t   size;
} AVBufferRef;

音频帧内存分析

AVFrame *frame1 = av_frame_alloc(); // allocate AVFrame
frame1->nb_samples = 1024;
frame1->format = AV_SAMPLE_FMT_S16; // 16-bit packed PCM
frame1->ch_layout = AV_CHANNEL_LAYOUT_STEREO; // 2 channels
av_frame_get_buffer(frame1, 0); // allocate AVBufferRef[] according to the format
fprintf(stderr, "frame1->linesize[0]: %d\n", frame1->linesize[0]); // 1024*2*2=4096

AVFrame *frame2 = av_frame_alloc();
frame2->nb_samples = 1024;
frame2->format = AV_SAMPLE_FMT_S16P; // 16-bit planar PCM
frame2->ch_layout = AV_CHANNEL_LAYOUT_STEREO; // 2 channels
av_frame_get_buffer(frame2, 0); // allocate AVBufferRef[] according to the format
fprintf(stderr, "frame2->linesize[0]: %d\n", frame2->linesize[0]); // 1024*2=2048

av_frame_unref(frame1); // free AVBufferRef[]
av_frame_unref(frame2); // free AVBufferRef[]
av_frame_free(&frame1); // free AVFrame
av_frame_free(&frame2); // free AVFrame

视频帧内存分析

AVFrame *frame1 = av_frame_alloc(); // allocate AVFrame
frame1->format = AV_PIX_FMT_YUV420P; // planar YUV 4:2:0, (1 U & V sample per 2x2 Y samples)
frame1->width = 640;
frame1->height = 480;
av_frame_get_buffer(frame1, 0); // allocate AVBufferRef[] according to the format
fprintf(stderr, "frame1->linesize[0]: %d\n", frame1->linesize[0]); // Y: 640
fprintf(stderr, "frame1->linesize[1]: %d\n", frame1->linesize[1]); // U: 320=640/2
fprintf(stderr, "frame1->linesize[2]: %d\n", frame1->linesize[2]); // V: 320=640/2

AVFrame *frame2 = av_frame_alloc();
frame2->format = AV_PIX_FMT_YUYV422; // packed YUV 4:2:2, YUYV
frame2->width = 640;
frame2->height = 480;
av_frame_get_buffer(frame2, 0);
fprintf(stderr, "frame2->linesize[0]: %d\n", frame2->linesize[0]); // YUYV: 1280=640+320+320

av_frame_unref(frame1);
av_frame_unref(frame2);
av_frame_free(&frame1);
av_frame_free(&frame2);