python实现大图像的压缩

377 阅读5分钟

持续创作,加速成长!这是我参与「掘金日新计划 · 6 月更文挑战」的第14天,点击查看活动详情

所谓的图像压缩,实际上就是将原图像中的部分像素点去掉,从而在一定程度上实现对图像的降采样过程,常说的图像金字塔便是对图像压缩的最直观的一种解释,如下图图像金字塔的示意图

1.png

在示例图中,第0层图像表示原图,指的是未进行降采样的图片,后面的每一层都是在前一层的基础上进行降采样得到的特征图,一般都是通过高斯滤波的方式进行降采样,所以也被称为高斯金字塔,与之相反的过程是由小图像恢复到大图像,被称为上采样,使用的是拉普拉斯算法,因此被称为是拉普拉斯金字塔。

下面通过opencv给出的函数进行说明,代码非常简单,调用pyrDown即可完成整个过程,opencv在内部已经帮我们封装好。

import cv2

# 使用opencv自带的图像降采样函数实现图像金字塔
image = cv2.imread('./images/2.jpg')
# 对图像进行多次降采样
img1 = cv2.pyrDown(image)
img2 = cv2.pyrDown(img1)
img3 = cv2.pyrDown(img2)
img4 = cv2.pyrDown(img3)
# 显示降采样后的图像
cv2.imshow('img', img1)
cv2.imshow('rst1', img2)
cv2.imshow('rst2', img3)
cv2.imshow('rst3', img4)
cv2.waitKey()
cv2.destroyAllWindows()

我把原图放一下,原图非常大,使用上述代码处理时速度非常快,需要原图的自取

2.jpg

处理后的结果如下所示

image.png 图像比较大,不能够一次截图整个图像,所以只是将部分结果进行展示。 之前facebook提出过一种算法,可以将大图像进行压缩,但是压缩后的图像非常的怪异,整个图像发生了较大的形变,先上他的处理结果图

out.jpg

整个一个远景图像,通过利用旋转矩阵,一系列转换后变成了畸变较大的近景图,不过其算法的原理还是值得学习,下面我将代码附上,喜欢的可以研究一下。

import sys
from PIL import Image
from math import pi,sin,cos,tan,atan2,asin,acos,hypot,floor,fabs,sqrt,radians
from numpy import clip,array,matrix

# convertCoordinate from coordinate A to B
# x 是沿所求坐标系转换为原坐标系X轴旋转的角度
# y 是沿所求坐标系转换为原坐标系y轴旋转的角度
# z 是沿所求坐标系转换为原坐标系z轴旋转的角度
# sequence 是矩阵旋转的次序,0是x->y->z,1是x->z->y,2是y->x->z,3是y->z->x,4是z->x->y,5是z->y->x
def convertCoordinate(x, y, z, sequence):
    mx = matrix([
        [1, 0, 0],
        [0, cos(x), -sin(x)],
        [0, sin(x), cos(x)]
    ])
    my = matrix([
        [cos(y), 0, sin(y)],
        [0, 1, 0],
        [-sin(y), 0, cos(y)]
    ])
    mz = matrix([
        [cos(z), -sin(z), 0],
        [sin(z), cos(z), 0],
        [0, 0, 1]
    ])
    if sequence == 0:
        A = (mz.dot(my)).dot(mx)
    elif sequence == 1:
        A = (my.dot(mz)).dot(mx)
    elif sequence == 2:
        A = (mz.dot(mx)).dot(my)
    elif sequence == 3:
        A = (mx.dot(mz)).dot(my)
    elif sequence == 4:
        A = (my.dot(mx)).dot(mz)
    elif sequence == 5:
        A = (mx.dot(my)).dot(mz)
    return A



A1 = convertCoordinate(radians(0), radians(90), radians(-45), 5)
B1 = convertCoordinate(radians(45), -asin(1.0/sqrt(3))-pi/2.0, radians(0), 2)
A2 = convertCoordinate(radians(0), radians(0), radians(45), 2)
B2 = convertCoordinate(radians(-45), asin(1.0/sqrt(3))-pi, radians(0), 2)    
A3 = convertCoordinate(radians(180), radians(90), radians(-45), 1)
B3 = convertCoordinate(radians(135), asin(1.0/sqrt(3))-pi/2.0, radians(0), 2)
A4 = convertCoordinate(radians(45), radians(-90), radians(0), 3)
B4 = convertCoordinate(radians(45), asin(1.0/sqrt(3))-pi/2.0, radians(0), 2)

# get x,y,z coords from out image pixels coords
# i,j 是输出图像的xy坐标
# face 是面的号码
# halfOutSize 输出图像宽度的一半
# 将输出图像的xy值转换为三维空间中的xyz坐标,对应成一个坐标值由-1到1的立方体
def outImgToXYZ(i,j,face,halfOutSize,toward):
    if (toward == 0):  # front face
        a = i * 4.0 / (halfOutSize * 2)
        b = j * 4.0 / (halfOutSize * 2)
        if face==0: # down
            a = a - 2.0
            b = 2.0 - b
            (x,y,z) = (sqrt(3) - 1, a, b)
        elif face==1: # left top
            # first coordinate conversion
            c = A1.dot(matrix([b-1, a-1, 0]).T)
            c[2] = c[2] * sqrt(3)  # stretch
            # second coordinate conversion
            c[0] = c[0] - (1 - sqrt(3))
            c[1] = c[1] - 0
            c[2] = c[2] - sqrt(2)
            d = B1.dot(c)
            (x,y,z) = (d[0], d[1], d[2])
        elif face==2: # left bottom
            # first coordinate conversion
            c = A2.dot(matrix([b-3, a-1, 0]).T)
            c[0] = c[0] * sqrt(3)  # stretch
            # second coordinate conversion
            c[0] = c[0] - sqrt(2)
            c[1] = c[1] - 0
            c[2] = c[2] - (1 - sqrt(3))
            d = B2.dot(c)
            (x,y,z) = (d[0], d[1], d[2])
        elif face==3: # right top
            # first coordinate conversion
            c = A3.dot(matrix([b-1, a-3, 0]).T)
            c[2] = c[2] * sqrt(3)  # stretch
            # second coordinate conversion
            c[0] = c[0] - (sqrt(3) - 1)
            c[1] = c[1] - 0
            c[2] = c[2] - sqrt(2)
            d = B3.dot(c)
            (x,y,z) = (d[0], d[1], d[2])
        elif face==4: # right bottom
            # first coordinate conversion
            c = A4.dot(matrix([b-3, a-3, 0]).T)
            c[2] = c[2] * sqrt(3)  # stretch
            # second coordinate conversion
            c[0] = c[0] - (sqrt(3) - 1)
            c[1] = c[1] - 0
            c[2] = c[2] - sqrt(2)
            d = B4.dot(c)
            (x,y,z) = (d[0], d[1], d[2])
        return (x,y,z)

# convert using an inverse transformation
def convertBack(imgIn,imgOut):
    inSize = imgIn.size
    outSize = imgOut.size
    inPix = imgIn.load()
    outPix = imgOut.load()
    edge = inSize[0]/4   # 视角宽度
    halfOutSize = outSize[0] / 2
    for i in range(outSize[0]):
        for j in range(outSize[1]):
            face = 0
            if fabs(halfOutSize - i) + fabs(halfOutSize - j) <= halfOutSize:
                face = 0
            elif (i < halfOutSize) and (j < halfOutSize):
                face = 1    # 左上角
            elif (i < halfOutSize) and (j > halfOutSize):
                face = 2    # 左下角
            elif (i > halfOutSize) and (j < halfOutSize):
                face = 3    # 右上角
            elif (i > halfOutSize) and (j > halfOutSize):
                face = 4    # 右下角
            
            (x,y,z) = outImgToXYZ(i,j,face,halfOutSize, 0)
            theta = atan2(y,x) # 水平方向夹角
            r = hypot(x,y)
            phi = atan2(z,r) # 垂直方向夹角
            # 对应原图像的坐标值
            uf = ( 2.0*edge*(theta + pi) / pi )
            vf = ( 2.0*edge * (pi/2 - phi)/pi)
            outPix[i, j] = inPix[int(uf) % inSize[0], clip(vf,0,inSize[1]-1)]

imgIn = Image.open("./images/2.jpg")
inSize = imgIn.size
imgOut = Image.new("RGB", ((int)(inSize[0] / 4 * sqrt(2)) , (int)(inSize[0] / 4 * sqrt(2))),"black")
convertBack(imgIn, imgOut)
imgOut.save("out.jpg")
imgOut.show()

这个算法对上面10000x5000的图像进行转换时耗时大概五六分钟差不多,毕竟计算的复杂度还是非常高的,而高斯金字塔的计算在毫秒级,相比于两者,对于opencv内部封装函数的阅读也是很大的提高。