前言
最近研究ios的硬解码进行一个简单总结
注:本篇只针对h264的裸码流进行解码
H264
组成
- H264的码流由NALU单元组成,NALU单元包含视频图像数据和H264的参数信息。
- 其中视频图像数据就是CMBlockBuffer,而H264的参数信息则可以组合成FormatDesc。具体来说参数信息包含SPS(Sequence Parameter Set)和PPS(Picture Parameter Set)。
我们能发现每一段数据前面都有startcode固定信息作为开头,然后是类型接着是真实数据。 根据nalu的类型我们可以判断它是pps,sps,idr还是其他帧。
分割数据类型
h264的解码主要首先就是去获得pps和sps参数
const uint8_t* const parameterSetPointers[2] = { _sps, _pps };
const size_t parameterSetSizes[2] = { _spsSize, _ppsSize };
OSStatus status = CMVideoFormatDescriptionCreateFromH264ParameterSets(kCFAllocatorDefault,
2, //param count
parameterSetPointers,
parameterSetSizes,
4, //nal start code size
&_decoderFormatDescription);
因为都有相同的头部所以我们可以直接拆分startcode来得到每个类型的数据
1.通过NSInputStream读取视频流
self.fileStream = [NSInputStream inputStreamWithFileAtPath:fileName];
[self.fileStream open];
2.将分割出来的数据装到对象里面
// 用于获取到每一帧的包
-(VideoPacket*)nextPacket
{
if(_bufferSize < _bufferCap && self.fileStream.hasBytesAvailable) {
//用来保证背刺读取的数据都是512 * 1024长度
NSInteger readBytes = [self.fileStream read:_buffer + _bufferSize maxLength:_bufferCap - _bufferSize];
_bufferSize += readBytes;//每次取出后拿到末尾的位置
}
//
if(memcmp(_buffer, KStartCode, 4) != 0) { //判断是否相同
return nil;
}
if(_bufferSize >= 5) { //至少5字节才是有效的 因为starcode就占用了4个
uint8_t *bufferBegin = _buffer + 4; // 该帧的起始位置
uint8_t *bufferEnd = _buffer + _bufferSize; // 该帧的结尾位置
while(bufferBegin != bufferEnd) { // 起始位置和结尾不同相同
if(*bufferBegin == 0x01) { // NALU header 的首位字节
if(memcmp(bufferBegin - 3, KStartCode, 4) == 0) { // 用以检查传输过程中是否发生错误,0表示正常,1表示违反语法
NSInteger packetSize = bufferBegin - _buffer - 3;
VideoPacket *vp = [[VideoPacket alloc] initWithSize:packetSize];
memcpy(vp.buffer, _buffer, packetSize); // 将该数据保存
//_buffer被赋予了一个新值
memmove(_buffer, _buffer + packetSize, _bufferSize - packetSize); //移动buffer的位置
_bufferSize -= packetSize;
return vp;
}
}
++bufferBegin;
}
}
return nil;
}
3.由上图可知,第5个字节是表示数据类型,转为10进制后7是sps, 8是pps, 5是IDR(I帧)信息,所以可以直接将I帧,B帧,P帧进行解码
- (void)decodeVideoPacket:(VideoPacket *)vp {
uint32_t nalSize = (uint32_t)(vp.size - 4);
uint8_t *pNalSize = (uint8_t*)(&nalSize);
vp.buffer[0] = *(pNalSize + 3);
vp.buffer[1] = *(pNalSize + 2);
vp.buffer[2] = *(pNalSize + 1);
vp.buffer[3] = *(pNalSize);
CVPixelBufferRef pixelBuffer = NULL;
int nalType = vp.buffer[4] & 0x1F;
switch (nalType) {
case 0x05:
NSLog(@"Nal type is IDR frame");
if([self initH264Decoder]) {
pixelBuffer = [self decode:vp];
}
break;
case 0x07:
NSLog(@"Nal type is SPS");
_spsSize = vp.size - 4;
_sps = malloc(_spsSize);
memcpy(_sps, vp.buffer + 4, _spsSize);
break;
case 0x08:
NSLog(@"Nal type is PPS");
_ppsSize = vp.size - 4;
_pps = malloc(_ppsSize);
memcpy(_pps, vp.buffer + 4, _ppsSize);
break;
default:
NSLog(@"Nal type is B/P frame");
pixelBuffer = [self decode:vp];
break;
}
if(pixelBuffer) {
dispatch_sync(dispatch_get_main_queue(), ^{
if (self.delegate &&[self.delegate respondsToSelector:@selector(WJVideoToolBoxDecoderPixelBuffer:)]) {
[self.delegate WJVideoToolBoxDecoderPixelBuffer:pixelBuffer];
}
});
CVPixelBufferRelease(pixelBuffer);
}
NSLog(@"Read Nalu size %ld", vp.size);
}
VideoToolBox解码
其实拿到pps和sps解码部分的代码就非常简单了
1.初始化解码器
-(BOOL)initH264Decoder {
if(_deocderSession) {
return YES;
}
const uint8_t* const parameterSetPointers[2] = { _sps, _pps };
const size_t parameterSetSizes[2] = { _spsSize, _ppsSize };
OSStatus status = CMVideoFormatDescriptionCreateFromH264ParameterSets(kCFAllocatorDefault,
2, //param count
parameterSetPointers,
parameterSetSizes,
4, //nal start code size
&_decoderFormatDescription);
if(status == noErr) {
CFDictionaryRef attrs = NULL;
const void *keys[] = { kCVPixelBufferPixelFormatTypeKey };
// kCVPixelFormatType_420YpCbCr8Planar is YUV420
// kCVPixelFormatType_420YpCbCr8BiPlanarFullRange is NV12
uint32_t v = kCVPixelFormatType_420YpCbCr8BiPlanarFullRange;
const void *values[] = { CFNumberCreate(NULL, kCFNumberSInt32Type, &v) };
attrs = CFDictionaryCreate(NULL, keys, values, 1, NULL, NULL);
VTDecompressionOutputCallbackRecord callBackRecord;
callBackRecord.decompressionOutputCallback = didDecompress;
callBackRecord.decompressionOutputRefCon = NULL;
status = VTDecompressionSessionCreate(kCFAllocatorDefault,
_decoderFormatDescription,
NULL, attrs,
&callBackRecord,
&_deocderSession);
CFRelease(attrs);
} else {
NSLog(@"IOS8VT: reset decoder session failed status=%d", status);
}
return YES;
}
- 创建解码回调
static void didDecompress( void *decompressionOutputRefCon, void *sourceFrameRefCon, OSStatus status, VTDecodeInfoFlags infoFlags, CVImageBufferRef pixelBuffer, CMTime presentationTimeStamp, CMTime presentationDuration ){
CVPixelBufferRef *outputPixelBuffer = (CVPixelBufferRef *)sourceFrameRefCon;
*outputPixelBuffer = CVPixelBufferRetain(pixelBuffer);
}
- 执行解码
-(CVPixelBufferRef)decode:(VideoPacket*)vp {
CVPixelBufferRef outputPixelBuffer = NULL;
CMBlockBufferRef blockBuffer = NULL;
OSStatus status = CMBlockBufferCreateWithMemoryBlock(kCFAllocatorDefault,
(void*)vp.buffer, vp.size,
kCFAllocatorNull,
NULL, 0, vp.size,
0, &blockBuffer);
if(status == kCMBlockBufferNoErr) {
CMSampleBufferRef sampleBuffer = NULL;
const size_t sampleSizeArray[] = {vp.size};
status = CMSampleBufferCreateReady(kCFAllocatorDefault,
blockBuffer,
_decoderFormatDescription ,
1, 0, NULL, 1, sampleSizeArray,
&sampleBuffer);
if (status == kCMBlockBufferNoErr && sampleBuffer) {
VTDecodeFrameFlags flags = 0;
VTDecodeInfoFlags flagOut = 0;
OSStatus decodeStatus = VTDecompressionSessionDecodeFrame(_deocderSession,
sampleBuffer,
flags,
&outputPixelBuffer,
&flagOut);
if(decodeStatus == kVTInvalidSessionErr) {
NSLog(@"IOS8VT: Invalid session, reset decoder session");
} else if(decodeStatus == kVTVideoDecoderBadDataErr) {
NSLog(@"IOS8VT: decode failed status=%d(Bad data)", decodeStatus);
} else if(decodeStatus != noErr) {
NSLog(@"IOS8VT: decode failed status=%d", decodeStatus);
}
CFRelease(sampleBuffer);
}
CFRelease(blockBuffer);
}
return outputPixelBuffer;
}