1. 利用fffmpeg将mp3转为pcm并在pcm数据加上wav头就是一个完整的wav文件
2. 代码
#include "utils.h"
#include <libavutil/avutil.h>
#include <libavutil/attributes.h>
#include <libavutil/opt.h>
#include <libavutil/mathematics.h>
#include <libavutil/imgutils.h>
#include <libavutil/samplefmt.h>
#include <libavutil/timestamp.h>
#include <libavformat/avformat.h>
#include <libavcodec/avcodec.h>
#include <libswscale/swscale.h>
#include <libavutil/mathematics.h>
#include <libswresample/swresample.h>
#include <libavutil/channel_layout.h>
#include <libavutil/common.h>
#include <libavformat/avio.h>
#include <libavutil/file.h>
#include <libswresample/swresample.h>
#define AVCODEC_MAX_AUDIO_FRAME_SIZE 192000
//下面这四个结构体是为了分析wav头的
typedef struct {
u_int magic; /* 'RIFF' */
u_int length; /* filelen */
u_int type; /* 'WAVE' */
} WaveHeader;
typedef struct {
u_short format; /* see WAV_FMT_* */
u_short channels;
u_int sample_fq; /* frequence of sample */
u_int byte_p_sec;
u_short byte_p_spl; /* samplesize; 1 or 2 bytes */
u_short bit_p_spl; /* 8, 12 or 16 bit */
} WaveFmtBody;
typedef struct {
u_int type; /* 'data' */
u_int length; /* samplecount */
} WaveChunkHeader;
#define COMPOSE_ID(a,b,c,d) ((a) | ((b)<<8) | ((c)<<16) | ((d)<<24))
#define WAV_RIFF COMPOSE_ID('R','I','F','F')
#define WAV_WAVE COMPOSE_ID('W','A','V','E')
#define WAV_FMT COMPOSE_ID('f','m','t',' ')
#define WAV_DATA COMPOSE_ID('d','a','t','a')
int insert_wave_header(FILE* fp, long data_len)
{
int len;
WaveHeader* header;
WaveChunkHeader* chunk;
WaveFmtBody* body;
fseek(fp, 0, SEEK_SET); //写到wav文件的开始处
len = sizeof(WaveHeader)+sizeof(WaveFmtBody)+sizeof(WaveChunkHeader)*2;
char* buf = (char*)malloc(len);
header = (WaveHeader*)buf;
header->magic = WAV_RIFF;
header->length = data_len + sizeof(WaveFmtBody)+sizeof(WaveChunkHeader)*2 + 4;
header->type = WAV_WAVE;
chunk = buf+sizeof(WaveHeader);
chunk->type = WAV_FMT;
chunk->length = 16;
body = buf+sizeof(WaveHeader)+sizeof(WaveChunkHeader);
body->format = (u_short)0x0001; //编码方式为pcm
body->channels = (u_short)0x02; //声道数为2
body->sample_fq = 44100; //采样频率为44.1k
body->byte_p_sec = 176400; //每秒所需字节数 44100*2*2=采样频率*声道*采样位数
body->byte_p_spl = (u_short)0x4; //对齐无意义
body->bit_p_spl = (u_short)16; //采样位数16bit=2Byte
chunk = buf+sizeof(WaveHeader)+sizeof(WaveChunkHeader)+sizeof(WaveFmtBody);
chunk->type = WAV_DATA;
chunk->length = data_len;
fwrite(buf, 1, len, fp);
free(buf);
return 0;
}
typedef struct {
int videoindex;
int sndindex;
AVFormatContext* pFormatCtx;
AVCodecContext* sndCodecCtx;
AVCodec* sndCodec;
SwrContext *swr_ctx;
DECLARE_ALIGNED(16,uint8_t,audio_buf) [AVCODEC_MAX_AUDIO_FRAME_SIZE * 4];
}AudioState;
int init_ffmpeg(AudioState* is, char* filepath)
{
int i=0;
int ret;
is->sndindex = -1;
if(NULL == filepath)
{
dbmsg("input file is NULL");
return -1;
}
avcodec_register_all();
avfilter_register_all();
av_register_all();
is->pFormatCtx = avformat_alloc_context();
if(avformat_open_input(&is->pFormatCtx, filepath, NULL, NULL)!=0)
return -1;
if(avformat_find_stream_info(is->pFormatCtx, NULL)<0)
return -1;
av_dump_format(is->pFormatCtx,0, 0, 0);
is->videoindex = av_find_best_stream(is->pFormatCtx, AVMEDIA_TYPE_VIDEO, is->videoindex, -1, NULL, 0);
is->sndindex = av_find_best_stream(is->pFormatCtx, AVMEDIA_TYPE_AUDIO,is->sndindex, is->videoindex, NULL, 0);
dbmsg("videoindex=%d, sndindex=%d", is->videoindex, is->sndindex);
if(is->sndindex != -1)
{
is->sndCodecCtx = is->pFormatCtx->streams[is->sndindex]->codec;
is->sndCodec = avcodec_find_decoder(is->sndCodecCtx->codec_id);
if(is->sndCodec == NULL)
{
dbmsg("Codec not found");
return -1;
}
if(avcodec_open2(is->sndCodecCtx, is->sndCodec, NULL) < 0)
return -1;
}
return 0;
}
int main(int argc, char **argv)
{
int ret;
FILE* fp;
int file_data_size = 0; //这儿注意一个问题: 变量用时一定要初始化,否则会出现异常
int len1,len2, data_size, got_frame;
AVPacket *packet = av_mallocz(sizeof(AVPacket));
AVFrame *frame = av_frame_alloc();
AudioState* is = (AudioState*) av_mallocz(sizeof(AudioState));
uint8_t *out[] = { is->audio_buf };
fp = fopen("./test.wav", "wb+");
len1 = sizeof(WaveHeader)+sizeof(WaveFmtBody)+sizeof(WaveChunkHeader)*2;
fseek(fp,len1, SEEK_SET); //在写之前先预留出wav的header,即44个字节
dbmsg("len1=%d",len1);
//第1步初始化ffmpeg,并用ffmpeg解码,最后转为pcm格式
if( (ret=init_ffmpeg(is, argv[1])) != 0) //1.1 初始化ffmpeg
{
dbmsg("init_ffmpeg error");
return -1;
}
while( (av_read_frame(is->pFormatCtx, packet)>=0) ) //1.2 循环读取mp3文件中的数据帧
{
if(packet->stream_index != is->sndindex)
continue;
if((ret=avcodec_decode_audio4(is->sndCodecCtx, frame, &got_frame, packet)) < 0) //1.3 解码数据帧
{
dbmsg("file eof");
break;
}
if(got_frame <= 0) /* No data yet, get more frames */
continue;
data_size = av_samples_get_buffer_size(NULL, is->sndCodecCtx->channels, frame->nb_samples, is->sndCodecCtx->sample_fmt, 1);
//1.4下面将ffmpeg解码后的数据帧转为我们需要的数据(关于"需要的数据"下面有解释)
if(NULL==is->swr_ctx)
{
if(is->swr_ctx != NULL)
swr_free(&is->swr_ctx);
dbmsg("frame: channnels=%d,format=%d, sample_rate=%d", frame->channels, frame->format, frame->sample_rate);
is->swr_ctx = swr_alloc_set_opts(NULL, AV_CH_LAYOUT_STEREO, AV_SAMPLE_FMT_S16, 44100, av_get_default_channel_layout(frame->channels), frame->format, frame->sample_rate, 0, NULL);
if(is->swr_ctx == NULL)
{
dbmsg("swr_ctx == NULL");
}
swr_init(is->swr_ctx);
}
len2 = swr_convert(is->swr_ctx, out, 44100,(const uint8_t **)frame->extended_data, frame->nb_samples);
file_data_size += len2;
//1.5 数据格式转换完成后就写到文件中
fwrite((short *)is->audio_buf, sizeof(short), (size_t) len2* 2, fp);
}
file_data_size *= 4;
dbmsg("file_data_size=%d", file_data_size);
//第2步添加上wav的头
ret = insert_wave_header(fp, file_data_size);
av_free_packet(packet);
av_free(frame);
avcodec_close(is->sndCodecCtx);
avformat_close_input(&is->pFormatCtx);
fclose(fp);
return 0;
}
3.运行结果
cong@msi:/work/ffmpeg/test/alsa/testalsa/5mp3towav$ make run
export LD_LIBRARY_PATH=/work/ffmpeg/out/lib/ \
&& ./mp3towav /work/ffmpeg/test/resource//test.mp3
mp3towav.c:main[150]: len1=44
[mp3 @ 0x14d3620] Skipping 0 bytes of junk at 197687.
libavutil/crc.c:av_crc_init[313]:
[mp3 @ 0x14d3620] Estimating duration from bitrate, this may be inaccurate
Input #0, mp3, from '(null)':
Metadata:
artist : 佚名
title : 法国国歌 马赛曲
TYER : 2013-10-26
Duration: 00:03:28.20, start: 0.000000, bitrate: 199 kb/s
Stream #0:0: Audio: mp3, 44100 Hz, stereo, s16p, 192 kb/s
Stream #0:1: Video: mjpeg, yuvj420p(pc, bt470bg/unknown/unknown), 600x600 [SAR 1:1 DAR 1:1], 90k tbr, 90k tbn, 90k tbc
Metadata:
title : e
comment : Cover (front)
mp3towav.c:init_ffmpeg[120]: videoindex=-1381258232, sndindex=0
mp3towav.c:main[173]: frame: channnels=2,format=6, sample_rate=44100
mp3towav.c:main[186]: file_data_size=36725760
ls查看
cong@msi:/work/ffmpeg/test/alsa/testalsa/5mp3towav$ ls -l
total 36064
-rw-rw-r-- 1 cong cong 885 Sep 11 11:25 Makefile
-rwxrwxr-x 1 cong cong 64126 Sep 11 11:44 mp3towav
-rw-rw-r-- 1 cong cong 6183 Sep 11 11:24 mp3towav.c
-rw-rw-r-- 1 cong cong 115344 Sep 11 11:44 mp3towav.o
-rw-rw-r-- 1 cong cong 36725804 Sep 11 11:44 test.wav
-rw-rw-r-- 1 cong cong 333 Sep 9 11:31 utils.h
4. 说明
mp3towav.c:main[173]: AV_CH_LAYOUT_STEREO=3, AV_SAMPLE_FMT_S16=1, freq=44100
mp3towav.c:main[174]: frame: channnels=2, default_layout=3, format=6, sample_rate=44100
ffmpeg中:include/libavutil/samplefmt.h
enum AVSampleFormat {
AV_SAMPLE_FMT_NONE = -1,
AV_SAMPLE_FMT_U8, ///< unsigned 8 bits
AV_SAMPLE_FMT_S16, ///< signed 16 bits --> 1 这个是pcm的数据格式
AV_SAMPLE_FMT_S32, ///< signed 32 bits
AV_SAMPLE_FMT_FLT, ///< float
AV_SAMPLE_FMT_DBL, ///< double
AV_SAMPLE_FMT_U8P, ///< unsigned 8 bits, planar
AV_SAMPLE_FMT_S16P, ///< signed 16 bits, planar -->6 这个是ffmepg解码之后的数据格式
AV_SAMPLE_FMT_S32P, ///< signed 32 bits, planar
AV_SAMPLE_FMT_FLTP, ///< float, planar
AV_SAMPLE_FMT_DBLP, ///< double, planar
AV_SAMPLE_FMT_NB ///< Number of sample formats. DO NOT USE if linking dynamically
};
interleaved -->理解为交叉存取 --> AV_SAMPLE_FMT_S16是两个声道的声音是交叉存储的
plannar–> 理解为平面存取 --> AV_SAMPLE_FMT_S16P是先存1个声道的数据再存另一个声道的数据
AV_SAMPLE_FMT_S16P is planar signed 16 bit audio, i.e. 2 bytes for each sample which is same for AV_SAMPLE_FMT_S16.
The only difference is in AV_SAMPLE_FMT_S16 samples of each channel are interleaved i.e. if you have two channel audio then the samples buffer will look like
c1 c1 c2 c2 c1 c1 c2 c2… -->AV_SAMPLE_FMT_S16的数据组织方式
where c1 is a sample for channel1 and c2 is sample for channel2.
while for one frame of planar audio you will have something like
c1 c1 c1 c1 … c2 c2 c2 c2 … -->AV_SAMPLE_FMT_S16P的数据组织方式
now how is it stored in AVFrame:
for planar audio:
data[i] will contain the data of channel i (assuming channel 0 is first channel).
however if you have more channels then 8 then data for rest of the channels can be found in extended_data attribute of AVFrame.
for non-planar audio
data[0] will contain the data for all channels in an interleaved manner.
参考文章:
What is the difference between AV_SAMPLE_FMT_S16P and AV_SAMPLE_FMT_S16?