持续创作,加速成长!这是我参与「掘金日新计划 · 6 月更文挑战」的第14天,点击查看活动详情
所谓的图像压缩,实际上就是将原图像中的部分像素点去掉,从而在一定程度上实现对图像的降采样过程,常说的图像金字塔便是对图像压缩的最直观的一种解释,如下图图像金字塔的示意图
在示例图中,第0层图像表示原图,指的是未进行降采样的图片,后面的每一层都是在前一层的基础上进行降采样得到的特征图,一般都是通过高斯滤波的方式进行降采样,所以也被称为高斯金字塔,与之相反的过程是由小图像恢复到大图像,被称为上采样,使用的是拉普拉斯算法,因此被称为是拉普拉斯金字塔。
下面通过opencv给出的函数进行说明,代码非常简单,调用pyrDown即可完成整个过程,opencv在内部已经帮我们封装好。
import cv2
# 使用opencv自带的图像降采样函数实现图像金字塔
image = cv2.imread('./images/2.jpg')
# 对图像进行多次降采样
img1 = cv2.pyrDown(image)
img2 = cv2.pyrDown(img1)
img3 = cv2.pyrDown(img2)
img4 = cv2.pyrDown(img3)
# 显示降采样后的图像
cv2.imshow('img', img1)
cv2.imshow('rst1', img2)
cv2.imshow('rst2', img3)
cv2.imshow('rst3', img4)
cv2.waitKey()
cv2.destroyAllWindows()
我把原图放一下,原图非常大,使用上述代码处理时速度非常快,需要原图的自取
处理后的结果如下所示
图像比较大,不能够一次截图整个图像,所以只是将部分结果进行展示。
之前facebook提出过一种算法,可以将大图像进行压缩,但是压缩后的图像非常的怪异,整个图像发生了较大的形变,先上他的处理结果图
整个一个远景图像,通过利用旋转矩阵,一系列转换后变成了畸变较大的近景图,不过其算法的原理还是值得学习,下面我将代码附上,喜欢的可以研究一下。
import sys
from PIL import Image
from math import pi,sin,cos,tan,atan2,asin,acos,hypot,floor,fabs,sqrt,radians
from numpy import clip,array,matrix
# convertCoordinate from coordinate A to B
# x 是沿所求坐标系转换为原坐标系X轴旋转的角度
# y 是沿所求坐标系转换为原坐标系y轴旋转的角度
# z 是沿所求坐标系转换为原坐标系z轴旋转的角度
# sequence 是矩阵旋转的次序,0是x->y->z,1是x->z->y,2是y->x->z,3是y->z->x,4是z->x->y,5是z->y->x
def convertCoordinate(x, y, z, sequence):
mx = matrix([
[1, 0, 0],
[0, cos(x), -sin(x)],
[0, sin(x), cos(x)]
])
my = matrix([
[cos(y), 0, sin(y)],
[0, 1, 0],
[-sin(y), 0, cos(y)]
])
mz = matrix([
[cos(z), -sin(z), 0],
[sin(z), cos(z), 0],
[0, 0, 1]
])
if sequence == 0:
A = (mz.dot(my)).dot(mx)
elif sequence == 1:
A = (my.dot(mz)).dot(mx)
elif sequence == 2:
A = (mz.dot(mx)).dot(my)
elif sequence == 3:
A = (mx.dot(mz)).dot(my)
elif sequence == 4:
A = (my.dot(mx)).dot(mz)
elif sequence == 5:
A = (mx.dot(my)).dot(mz)
return A
A1 = convertCoordinate(radians(0), radians(90), radians(-45), 5)
B1 = convertCoordinate(radians(45), -asin(1.0/sqrt(3))-pi/2.0, radians(0), 2)
A2 = convertCoordinate(radians(0), radians(0), radians(45), 2)
B2 = convertCoordinate(radians(-45), asin(1.0/sqrt(3))-pi, radians(0), 2)
A3 = convertCoordinate(radians(180), radians(90), radians(-45), 1)
B3 = convertCoordinate(radians(135), asin(1.0/sqrt(3))-pi/2.0, radians(0), 2)
A4 = convertCoordinate(radians(45), radians(-90), radians(0), 3)
B4 = convertCoordinate(radians(45), asin(1.0/sqrt(3))-pi/2.0, radians(0), 2)
# get x,y,z coords from out image pixels coords
# i,j 是输出图像的xy坐标
# face 是面的号码
# halfOutSize 输出图像宽度的一半
# 将输出图像的xy值转换为三维空间中的xyz坐标,对应成一个坐标值由-1到1的立方体
def outImgToXYZ(i,j,face,halfOutSize,toward):
if (toward == 0): # front face
a = i * 4.0 / (halfOutSize * 2)
b = j * 4.0 / (halfOutSize * 2)
if face==0: # down
a = a - 2.0
b = 2.0 - b
(x,y,z) = (sqrt(3) - 1, a, b)
elif face==1: # left top
# first coordinate conversion
c = A1.dot(matrix([b-1, a-1, 0]).T)
c[2] = c[2] * sqrt(3) # stretch
# second coordinate conversion
c[0] = c[0] - (1 - sqrt(3))
c[1] = c[1] - 0
c[2] = c[2] - sqrt(2)
d = B1.dot(c)
(x,y,z) = (d[0], d[1], d[2])
elif face==2: # left bottom
# first coordinate conversion
c = A2.dot(matrix([b-3, a-1, 0]).T)
c[0] = c[0] * sqrt(3) # stretch
# second coordinate conversion
c[0] = c[0] - sqrt(2)
c[1] = c[1] - 0
c[2] = c[2] - (1 - sqrt(3))
d = B2.dot(c)
(x,y,z) = (d[0], d[1], d[2])
elif face==3: # right top
# first coordinate conversion
c = A3.dot(matrix([b-1, a-3, 0]).T)
c[2] = c[2] * sqrt(3) # stretch
# second coordinate conversion
c[0] = c[0] - (sqrt(3) - 1)
c[1] = c[1] - 0
c[2] = c[2] - sqrt(2)
d = B3.dot(c)
(x,y,z) = (d[0], d[1], d[2])
elif face==4: # right bottom
# first coordinate conversion
c = A4.dot(matrix([b-3, a-3, 0]).T)
c[2] = c[2] * sqrt(3) # stretch
# second coordinate conversion
c[0] = c[0] - (sqrt(3) - 1)
c[1] = c[1] - 0
c[2] = c[2] - sqrt(2)
d = B4.dot(c)
(x,y,z) = (d[0], d[1], d[2])
return (x,y,z)
# convert using an inverse transformation
def convertBack(imgIn,imgOut):
inSize = imgIn.size
outSize = imgOut.size
inPix = imgIn.load()
outPix = imgOut.load()
edge = inSize[0]/4 # 视角宽度
halfOutSize = outSize[0] / 2
for i in range(outSize[0]):
for j in range(outSize[1]):
face = 0
if fabs(halfOutSize - i) + fabs(halfOutSize - j) <= halfOutSize:
face = 0
elif (i < halfOutSize) and (j < halfOutSize):
face = 1 # 左上角
elif (i < halfOutSize) and (j > halfOutSize):
face = 2 # 左下角
elif (i > halfOutSize) and (j < halfOutSize):
face = 3 # 右上角
elif (i > halfOutSize) and (j > halfOutSize):
face = 4 # 右下角
(x,y,z) = outImgToXYZ(i,j,face,halfOutSize, 0)
theta = atan2(y,x) # 水平方向夹角
r = hypot(x,y)
phi = atan2(z,r) # 垂直方向夹角
# 对应原图像的坐标值
uf = ( 2.0*edge*(theta + pi) / pi )
vf = ( 2.0*edge * (pi/2 - phi)/pi)
outPix[i, j] = inPix[int(uf) % inSize[0], clip(vf,0,inSize[1]-1)]
imgIn = Image.open("./images/2.jpg")
inSize = imgIn.size
imgOut = Image.new("RGB", ((int)(inSize[0] / 4 * sqrt(2)) , (int)(inSize[0] / 4 * sqrt(2))),"black")
convertBack(imgIn, imgOut)
imgOut.save("out.jpg")
imgOut.show()
这个算法对上面10000x5000的图像进行转换时耗时大概五六分钟差不多,毕竟计算的复杂度还是非常高的,而高斯金字塔的计算在毫秒级,相比于两者,对于opencv内部封装函数的阅读也是很大的提高。