1. 使用 PyTorch 实现 GPU 加速的卷积滤波(如边缘检测)
import torch
import torch.nn as nn
import cv2
import numpy as np
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
image = cv2.imread("input.jpg")
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image_tensor = torch.from_numpy(image).float().permute(2, 0, 1)
image_tensor = image_tensor.unsqueeze(0).to(device)
conv_layer = nn.Conv2d(
in_channels=3,
out_channels=3,
kernel_size=3,
bias=False,
padding=1
).to(device)
sobel_kernel = torch.tensor([
[[[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]]],
[[[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]]],
[[[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]]],
], dtype=torch.float32).repeat(3, 1, 1, 1).to(device)
conv_layer.weight.data = sobel_kernel
with torch.no_grad():
output_tensor = conv_layer(image_tensor)
output = output_tensor.squeeze(0).permute(1, 2, 0).cpu().numpy()
output = np.clip(output, 0, 255).astype(np.uint8)
cv2.imwrite("edge_detection_gpu.jpg", cv2.cvtColor(output, cv2.COLOR_RGB2BGR))
2. 使用 OpenCV 的 CUDA 模块加速高斯模糊
import cv2
import time
print("CUDA devices:", cv2.cuda.getCudaEnabledDeviceCount())
image = cv2.imread("input.jpg")
gpu_image = cv2.cuda_GpuMat()
gpu_image.upload(image)
gaussian_filter = cv2.cuda.createGaussianFilter(
cv2.CV_8UC3,
cv2.CV_8UC3,
(15, 15),
0
)
start_time = time.time()
for _ in range(100):
gpu_blur = gaussian_filter.apply(gpu_image)
end_time = time.time()
result = gpu_blur.download()
print(f"GPU Time: {end_time - start_time:.4f} seconds")
cv2.imwrite("blur_gpu.jpg", result)
3. 使用 CuPy 加速图像傅里叶变换
import cupy as cp
import cv2
import numpy as np
import time
image = cv2.imread("input.jpg", cv2.IMREAD_GRAYSCALE)
image_gpu = cp.asarray(image)
start_time = time.time()
fft_gpu = cp.fft.fft2(image_gpu)
fft_shift = cp.fft.fftshift(fft_gpu)
magnitude_spectrum = cp.log(cp.abs(fft_shift))
end_time = time.time()
magnitude_cpu = cp.asnumpy(magnitude_spectrum)
print(f"GPU FFT Time: {end_time - start_time:.4f} seconds")
magnitude_cpu = cv2.normalize(magnitude_cpu, None, 0, 255, cv2.NORM_MINMAX)
cv2.imwrite("fft_spectrum_gpu.jpg", magnitude_cpu.astype(np.uint8))
4. 使用 Numba 编写自定义 GPU 核函数(图像反色)
from numba import cuda
import numpy as np
import cv2
import time
image = cv2.imread("input.jpg")
height, width, channels = image.shape
@cuda.jit
def invert_colors_kernel(image):
x, y = cuda.grid(2)
if x < image.shape[0] and y < image.shape[1]:
for c in range(3):
image[x, y, c] = 255 - image[x, y, c]
image_gpu = cuda.to_device(image)
threads_per_block = (16, 16)
blocks_per_grid_x = (height + threads_per_block[0] - 1) // threads_per_block[0]
blocks_per_grid_y = (width + threads_per_block[1] - 1) // threads_per_block[1]
blocks_per_grid = (blocks_per_grid_x, blocks_per_grid_y)
start_time = time.time()
invert_colors_kernel[blocks_per_grid, threads_per_block](image_gpu)
cuda.synchronize()
end_time = time.time()
image_cpu = image_gpu.copy_to_host()
print(f"GPU Invert Time: {end_time - start_time:.6f} seconds")
cv2.imwrite("inverted_gpu.jpg", image_cpu)
5. 使用 PyTorch 实现实时风格迁移(GPU加速)
import torch
import torchvision.models as models
from torchvision import transforms
from PIL import Image
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = models.vgg19(pretrained=True).features.to(device).eval()
preprocess = transforms.Compose([
transforms.Resize(512),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
content_image = Image.open("content.jpg")
style_image = Image.open("style.jpg")
content_tensor = preprocess(content_image).unsqueeze(0).to(device)
style_tensor = preprocess(style_image).unsqueeze(0).to(device)
def style_transfer(model, content_input, style_input, iterations=500):
input_image = content_input.clone().requires_grad_(True)
optimizer = torch.optim.LBFGS([input_image])
for i in range(iterations):
def closure():
optimizer.zero_grad()
return total_loss
optimizer.step(closure)
return input_image
output_image = style_transfer(model, content_tensor, style_tensor)
output_image = output_image.squeeze().cpu().detach()
output_image = transforms.ToPILImage()(output_image)
output_image.save("style_transfer_gpu.jpg")
关键说明
- 硬件依赖:需 NVIDIA GPU 并安装正确版本的 CUDA 和 cuDNN。
- 库安装:
pip install torch torchvision opencv-python-headless cupy numba
- 性能对比:与 CPU 版本相比,GPU 加速通常快 10-100 倍(取决于任务复杂度)。
- 适用场景:
- PyTorch:适合深度学习相关的图像处理(如 GAN、超分辨率)。
- OpenCV CUDA:适合传统图像处理加速(滤波、特征提取)。
- CuPy/Numba:适合自定义数值计算或科研算法。