前面几篇文章主要介绍了相关的音视频基础知识,你一定感觉音视频技术门槛非常高,不容易掌握。其实音视频技术并没有你想象的那么难,只要坚持,一定会有收获。下面我们来做一个简易的特效播放器帮助我们对视频播放有一个初步的了解。废话不多说,先来看一下demo的效果
视频播放一般经历这几步操作:解协议、解封装、视频解码、音频解码、视频和音频渲染,音画同步等。我们简单说明一下每一步的作用是什么
- 解协议:一般播放网络视频会需要这一步,网络传输协议将视频数据包含其中,通过解析网络协议获取到真实的视频数据。
- 解封装:一般视频的数据封装在文件格式中,比如mp4或者flv文件。我们需要解析视频文件获取到真实的视频数据和音频数据。可以参考
3.解码:将解封装获取到的h264码流和aac码流解码成一帧帧的图像和音频。参考:
4.渲染:将解码获取的图片渲染到屏幕上,将解码获取的音频播放出来。
5.音画同步:渲染的时候有可能图像渲染过快,音频播放过慢等原因,需要进行音画同步操作。
看完这些,是不是感觉超级复杂。其实这里面很多的东西都不需要我们亲自去做,平台提供的api或者著名的开源项目ffmpeg已经帮我们写好了大部分逻辑,一般来说我们不必关注如何解协议,解封装,解码。ffmpge等库已经封装好了细节,我们只需要调用方法即可完成,站在巨人的肩膀上,我们只需要关注我们的需求逻辑就可以了。
下面我来介绍一下iOS平台,如何站在巨人的肩膀上做一个特效播放器。
0x02 解封装&解协议&解码
ios平台上有一个音视频开发框架AVFoundation,它提供了一些音视频相关的高级API。下面我们就看一下如何通过AVFoundation中的AVPlayer来解协议,解封装,解码的。
做过ios开发的都知道AVPlayer是一个视频播放器,易用的api可以快速的让我们播放视频。这一次我们用它来做我们的解码器。
初始化AVPlayerfunc initDecoder(url: URL) {
asset = AVAsset.init(url: url)
asset?.addObserver(self, forKeyPath: "tracks", options: .new, context: nil)
playItem = AVPlayerItem.init(asset: asset!,automaticallyLoadedAssetKeys: ["tracks", "duration"]) decoder = AVPlayer.init(playerItem: playItem)
let attributes = [kCVPixelBufferPixelFormatTypeKey as String: kCVPixelFormatType_420YpCbCr8BiPlanarFullRange] //可以通过AVPlayerItemVideoOutput 可以将avplyer解码的帧请求出来
playItemOutput = AVPlayerItemVideoOutput.init(pixelBufferAttributes: attributes)if let playItemOutput = playItemOutput { playItem?.add(playItemOutput) } }
初始化一个定时器,播放的时候来解码我们需要的帧
func initTimeDriver() {
displayLink = CADisplayLink.init(target: self, selector: #selector(displayLinkAction(sender:)))
displayLink?.add(to: RunLoop.current, forMode: .common) displayLink?.isPaused = true
displayLink?.preferredFramesPerSecond = rate }
请求每一帧数据并发送给渲染层渲染
@objc func displayLinkAction(sender: CADisplayLink) {
DispatchQueue.global().async {
if let decoder = self.decoder {
if decoder.currentTime() == .zero {
debugPrint("drop current time")return
}
debugPrint("decode (decoder.currentTime())")
let time = decoder.currentTime()self.renderCurrentFrame(time: time) }
}
}
开始渲染
//renderView是基于oepngles写的渲染层
func renderCurrentFrame(time: CMTime) {
DispatchQueue.main.async {
if let playItem = self.playItem, let output = self.playItemOutput {
if output.hasNewPixelBuffer(forItemTime: time) {
let pix = self.playItemOutput?.copyPixelBuffer(forItemTime: time, itemTimeForDisplay: nil)
if let pix = pix {
self.renderView?.render(pix: pix, type: self.playType.rawValue)
}
}
}
}
}
解码部分完成,下面介绍一个渲染层
0x03渲染
渲染层在iOS平台一般选用opengles或者metal,现在opengles在iOS平台已经不推荐,推荐使用metal。我实现了一个opengles的版本,后面会实现一个Metal版本,先说一下opengles版本的渲染
初始化View
override init(frame: CGRect) {
super.init(frame: frame) //设置opengles 上下文
self.setupContent() //初始化opengles纹理缓存
self.setupVideoCache() //加载顶点着色器和纹理着色器
self.setupProgram() //加载定点数据
self.setupVerData()
}
func setupContent() {
self.contentScaleFactor = UIScreen.main.scale
context = EAGLContext.init(api: .openGLES3)assert(context != nil, "content init fail ....")
let isSuccess = EAGLContext.setCurrent(context)assert(isSuccess, "setup content fail ....") }
func setupProgram() {
if self.program != 0 {
glDeleteProgram(self.program)
}
self.program = GLESTool.loadShader(vShaderName: "videoVsh", fShaderName: "videoFsh")
assert(self.program != 0, "complie program fail ....")
glUseProgram(self.program)
}
func setupVerData() {
let attrArr:[GLfloat] = [1.0, 1.0, 0.0, -1.0, 1.0, 0.0, -1.0, -1.0, 0.0,1.0, -1.0, 0.0 ]
let texturePosition:[GLfloat] = [1.0, 1.0,0.0, 1.0,0.0, 0.0,1.0, 0.0, ]
let indicate:[GLubyte] = [0, 1, 2,0, 3, 2, ]
glGenBuffers(1, &vbo)
glBindBuffer(GLenum(GL_ARRAY_BUFFER), vbo) glBufferData(GLenum(GL_ARRAY_BUFFER),GLsizeiptr(GLsizei(MemoryLayout<GLfloat>.size * attrArr.count + MemoryLayout<GLfloat>.size * texturePosition.count) ), nil, GLenum(GL_STATIC_DRAW))
glBufferSubData(GLenum(GL_ARRAY_BUFFER), 0,GLsizeiptr(GLsizei(MemoryLayout<GLfloat>.size * attrArr.count)), attrArr) glBufferSubData(GLenum(GL_ARRAY_BUFFER), MemoryLayout<GLfloat>.size * attrArr.count, MemoryLayout<GLfloat>.size * texturePosition.count, texturePosition)
glVertexAttribPointer(0, 3, GLenum(GL_FLOAT), GLboolean(GL_FALSE), GLsizei(MemoryLayout<GLfloat>.size * 3), nil)
glEnableVertexAttribArray(0)
glVertexAttribPointer(1, 2, GLenum(GL_FLOAT), GLboolean(GL_FALSE), GLsizei(MemoryLayout<GLfloat>.size * 2), UnsafeMutablePointer(bitPattern: MemoryLayout<GLfloat>.size * attrArr.count))
glEnableVertexAttribArray(1)
glGenBuffers(1, &veo)
glBindBuffer(GLenum(GL_ELEMENT_ARRAY_BUFFER), veo)
glBufferData(GLenum(GL_ELEMENT_ARRAY_BUFFER), GLsizeiptr(GLsizei(MemoryLayout<GLubyte>.stride * 6)), indicate,
GLenum(GL_STATIC_DRAW))
}
func setupVideoCache() {
let error: CVReturn = CVOpenGLESTextureCacheCreate(kCFAllocatorDefault, nil, self.context, nil, &videoTextureCache)
if error != noErr {
assert(false, "video cache fail....")return
}
}
上传解码后的图片给GPU进行渲染
func render(pix: CVPixelBuffer, type: Int) {
//渲染类型: 包括正常播放,黑白播放,九宫格播放和滤镜播放
self.playType = typeself.pixelBuffer = pixif type == 3, self.lutTextureID == 0 {
//上传lut图用来做滤镜
self.setupLutTexture(imageName: "lookup", uniformName: "lut") }
//上传统一变量给
openglesself.updateCommonUniform()
//上传解码出来的帧到GPU的内存空间
self.uploadGPUPixel()
开始渲染
self.render()
}
上传解码后的YUV图像给GPU
//将解码获取到的YUV图像上传到GPU的内存空间 YUV图像的详细介绍文章在公众号里已经有func uploadGPUPixel() {
guard let pixelBuffer = pixelBuffer else
{return }
let width = CVPixelBufferGetWidth(pixelBuffer)
let height = CVPixelBufferGetHeight(pixelBuffer)
let planeCount = CVPixelBufferGetPlaneCount(pixelBuffer)
let colorAttachments = CVBufferGetAttachment(pixelBuffer, kCVImageBufferYCbCrMatrixKey, nil)?.takeUnretainedValue() as? String
if colorAttachments != nil {if CFStringCompare(colorAttachments as! CFString, kCVImageBufferYCbCrMatrix_ITU_R_601_4, .compareCaseInsensitive) == .compareEqualTo {
colorConverMat = kColorConversion601
//颜色空间是601
} else {
//颜色空间是709
colorConverMat = kColorConversion709
}
} else {
assert(false, "get color space fail ...")
}
let colorMatLocation = glGetUniformLocation(self.program, "colorConversionMatrix")
glUniformMatrix3fv(colorMatLocation, 1, GLboolean(GL_FALSE), colorConverMat)
let projectionLoc = glGetUniformLocation(self.program, "vprojection")
var projectionMatrix: KSMatrix4 = KSMatrix4()
ksMatrixLoadIdentity(&projectionMatrix)
let apect: Float = Float(self.frame.size.width / self.frame.size.height ) ksPerspective(&projectionMatrix, 0.0, apect, 0.10, 1000.0)
glUniformMatrix4fv(projectionLoc, 1, GLboolean(GL_FALSE), &projectionMatrix.m.0.0)
let mvlocation = glGetUniformLocation(self.program, "vmatrix")
var modelViewMatrix: KSMatrix4 = KSMatrix4()
ksMatrixLoadIdentity(&modelViewMatrix)
ksRotate(&modelViewMatrix, 180, 1, 0, 0)//
ksRotate(&modelViewMatrix, yAngle, 0, 1, 0)//
ksRotate(&modelViewMatrix, zAngle, 0, 0, 1)//
ksScale(&modelViewMatrix, 0.5, 0.5, 0.5)
glUniformMatrix4fv(mvlocation, 1, GLboolean(GL_FALSE), &modelViewMatrix.m.0.0)
glActiveTexture(GLenum(GL_TEXTURE0))
let error = CVOpenGLESTextureCacheCreateTextureFromImage(kCFAllocatorDefault, videoTextureCache!, pixelBuffer, nil, GLenum(GL_TEXTURE_2D), GLint(GL_LUMINANCE), GLsizei(width), GLsizei(height), GLenum(GL_LUMINANCE), GLenum(GL_UNSIGNED_BYTE), 0, &lumaTexture)
if error != noErr {
assert(false, "upload image to cache fail ...")
}
glBindTexture(CVOpenGLESTextureGetTarget(lumaTexture!),CVOpenGLESTextureGetName(lumaTexture!))
glTexParameteri(GLenum(GL_TEXTURE_2D), GLenum(GL_TEXTURE_MIN_FILTER), GL_LINEAR)
glTexParameteri(GLenum(GL_TEXTURE_2D), GLenum(GL_TEXTURE_MAG_FILTER), GL_LINEAR)//环绕方式
glTexParameteri(GLenum(GL_TEXTURE_2D), GLenum(GL_TEXTURE_WRAP_S), GL_CLAMP_TO_EDGE)
glTexParameteri(GLenum(GL_TEXTURE_2D), GLenum(GL_TEXTURE_WRAP_T), GL_CLAMP_TO_EDGE)let tex1 = glGetUniformLocation(self.program, "sampleY")
glUniform1i(tex1, 0)
if planeCount == 2 {
glActiveTexture(GLenum(GL_TEXTURE1))let uvError = CVOpenGLESTextureCacheCreateTextureFromImage(kCFAllocatorDefault, videoTextureCache!, pixelBuffer, nil, GLenum(GL_TEXTURE_2D), GLint(GL_LUMINANCE_ALPHA), GLsizei(width / 2), GLsizei(height / 2), GLenum(GL_LUMINANCE_ALPHA), GLenum(GL_UNSIGNED_BYTE), 1, &chromaTexture)
if uvError != noErr {
assert(false, "upload image to cache fail ...")
}
glBindTexture(CVOpenGLESTextureGetTarget(chromaTexture!),CVOpenGLESTextureGetName(chromaTexture!))
glTexParameteri(GLenum(GL_TEXTURE_2D),GLenum(GL_TEXTURE_MIN_FILTER), GL_LINEAR)
glTexParameteri(GLenum(GL_TEXTURE_2D), GLenum(GL_TEXTURE_MAG_FILTER), GL_LINEAR)//环绕方式 glTexParameteri(GLenum(GL_TEXTURE_2D), GLenum(GL_TEXTURE_WRAP_S), GL_CLAMP_TO_EDGE)
glTexParameteri(GLenum(GL_TEXTURE_2D), GLenum(GL_TEXTURE_WRAP_T), GL_CLAMP_TO_EDGE)
let tex2 = glGetUniformLocation(self.program, "sampleUV") glUniform1i(tex2, 1)
}
}
开始渲染
private func render() {
//重设视口
self.resetViewPort()
//绘制图元
glDrawElements(GLenum(GL_TRIANGLES), GLsizei(6), GLenum(GL_UNSIGNED_BYTE), nil)
//提交到绘制完成的结果给
layerself.context.presentRenderbuffer(Int(GL_RENDERBUFFER))
//清空缓存
self.clearCache()
}
到这里渲染播放的api处理已经完成,下面我们来看一下顶点着色器和片元着色器的详细代码
#顶点着色器#version 300 es
#图元位置 也就是纹理要绘制的位置
layout (location = 0) in vec4 vPosition;
#纹理坐标
layout (location = 1) in vec2 aTexcoord;
# 投影矩阵
uniform mat4 vmatrix;
#视图模型矩阵
uniform mat4 vprojection;
#计算之后的纹理坐标 要传给片元着色器
out vec2 textCoord;
void main(){
#纹理坐标 传给片元着色器
textCoord = aTexcoord;
#图元坐标 解码后的图片是上下颠倒的 需要旋转180度来修正一下
gl_Position = vPosition * vmatrix * vprojection;
}
#片元着色器
#version 300 esprecision mediump float;
#由片元着色器传递过来的纹理坐标
in vec2 textCoord;
#最终输出的颜色out vec4 fragColor;
#解码后获取的YUV图像的Y纹理
uniform sampler2D sampleY;
#解码后获取的YUV图像的UV纹理
uniform sampler2D sampleUV;
#用于滤镜的lut纹理
uniform sampler2D lut;
#用于YUV转换成RGB的矩阵
uniform mat3 colorConversionMatrix;
#渲染类型
uniform int type;
#正常播放的方法
void normal() {
vec3 yuv;vec3 rgb;
yuv.x = texture(sampleY, textCoord).r;
yuv.yz = texture(sampleUV, textCoord).ra - vec2(0.5, 0.5);
rgb = colorConversionMatrix * yuv;
fragColor = vec4(rgb, 1);
}
# 去除颜色后的播放效果
void clearColor() {
vec3 yuv;vec3 rgb;
yuv.x = texture(sampleY, textCoord).r;
yuv.yz = texture(sampleUV, textCoord).ra - vec2(0.5, 0.5);
rgb = colorConversionMatrix * vec3(yuv.x, 0, 0);
fragColor = vec4(rgb, 1);
}
# 九宫格播放效果
void ninePlay() {
vec2 tcd = textCoord;
if (tcd.x < 1.0 / 3.0) {
tcd.x = tcd.x * 3.0;
} else if (tcd.x < 2.0 / 3.0) {
tcd.x = (tcd.x - 1.0 / 3.0) * 3.0;
} else {
tcd.x = (tcd.x - 2.0 / 3.0) * 3.0;
}if (tcd.y <= 1.0 / 3.0) {
tcd.y = tcd.y * 3.0;
} else if (tcd.y < 2.0 / 3.0) {
tcd.y = (tcd.y - 1.0 / 3.0) * 3.0;
} else {
tcd.y = (tcd.y - 2.0 / 3.0) * 3.0;
}
vec3 yuv;
vec3 rgb;yuv.x = texture(sampleY, tcd).r;
yuv.yz = texture(sampleUV, tcd).ra - vec2(0.5, 0.5);
rgb = colorConversionMatrix * yuv;
fragColor = vec4(rgb, 1);
}
# 滤镜后播放的效果
void filterPlay() {
vec3 yuv;vec3 rgb;
yuv.x = texture(sampleY, textCoord).r;
yuv.yz = texture(sampleUV, textCoord).ra - vec2(0.5, 0.5);
rgb = colorConversionMatrix * yuv;
float blueColor = rgb.b * 63.0;vec2 quad1;
quad1.y = floor(floor(blueColor) / 8.0);
quad1.x = floor(blueColor) - (quad1.y * 8.0);
vec2 quad2;quad2.y = floor(ceil(blueColor) / 8.0);
quad2.x = ceil(blueColor) - (quad2.y * 8.0);
vec2 texPos1;
texPos1.x = (quad1.x * 0.125) + 0.5/512.0 + ((0.125 - 1.0/512.0) * rgb.r);
texPos1.y = (quad1.y * 0.125) + 0.5/512.0 + ((0.125 - 1.0/512.0) * rgb.g);
vec2 texPos2;texPos2.x = (quad2.x * 0.125) + 0.5/512.0 + ((0.125 - 1.0/512.0) * rgb.r);
texPos2.y = (quad2.y * 0.125) + 0.5/512.0 + ((0.125 - 1.0/512.0) * rgb.g);
vec4 newColor1 = texture(lut, texPos1);vec4 newColor2 = texture(lut, texPos2);
vec4 newColor = mix(newColor1, newColor2, fract(blueColor));
fragColor = mix(vec4(rgb, 0.0), vec4(newColor.rgb, 0.0), 1.0);
}
void main() {
if(type == 0 ) {
normal();
} else if(type == 1) {
clearColor();
} else if(type == 2) {
ninePlay();
} else if (type == 3) {
filterPlay();
}}
通过opengles的顶点着色器和片元着色器,我们可以任意的高效的操纵每一个像素值,你就是一个可以玩弄像素的男人了,哈哈。
0x04 总结
综上所述,简单的3步,就可以完成一个特效视频播放。这其中有很多想象空间,你可以做图像增强,做美颜,做滤镜,做各种你想要的酷酷播放器。开个玩笑,渲染层其实并不简单,但只需要看几本关于渲染的经典书籍,你就可以与大师握手交谈。