自动化测试验证码处理不了?看这里 ->>> pytesseract库登录验证码识

120 阅读2分钟

部分代码可以替换成自己的,除了比较扭曲的验证码识别不了,其他都可以实现识别

[点击查看详情获取验证码](github.crmeb.net/u/fei)

from PIL import Image from pytesseract import pytesseract

import re

class ImageRecognize:

def __init__(self, driver):
    self.base = BasePages(driver)
    self.imageSavePath = FilePathConfig.CODE_IMG_SAVE_PATH + "\\" + "shotCode.png"

def save_code_image(self, elementPath, zoomNum=1.25):
    """
    截图保存验证码图片
    :param elementPath:
    :param zoomNum: 电脑屏幕缩放比例,125% , zoom = 1.25
    :return:
    """
    self.base.driver.origin_driver.get_screenshot_as_file(self.imageSavePath)
    imageData = self.base.driver.get_location(elementPath)
    # 图片左右高低尺寸坐标,要乘以屏幕缩放比例
    left = imageData["x"] * zoomNum
    top = imageData["y"] * zoomNum
    right = left + imageData["width"] * zoomNum
    bottom = top + imageData["height"] * zoomNum
    self.imageObj = Image.open(self.imageSavePath)
    codeImage = self.imageObj.crop((left, top, right, bottom))
    codeImage.save(self.imageSavePath)
    return codeImage

def binarization_image(self, image):
    """
    验证码图片二值化转化成黑白色
    :param image: 图片保存对象
    :return:
    """
    imageCode = image.convert("L")
    pixelData = imageCode.load()
    row, col = image.size
    threshold = 150 # 150 灰色
    for i in range(row):
        for y in range(col):
            if pixelData[i, y] > threshold:
                pixelData[i, y] = 0
            else:
                pixelData[i, y] = 255
    return imageCode

def delete_noisy_point(self, image):
    """
    降噪,删除多余的干扰线像素点
    :param image: 图片对象
    :return:
    """
    pixelData = image.load()
    row, col = image.size
    # 判断图片中黑白像素点的多少,判断那种颜色是背景色,那个是验证码颜色
    poxList = []
    for x in range(row - 1):
        for y in range(col - 1):
            poxList.append(pixelData[x, y])
    # 按像素点多少降序排列,多的是背景,少的是验证码
    newList = sorted(set(poxList), key=lambda x: poxList.count(x), reverse=True)
    # 循环判断每个像素点上下左右,左上,右上,左下,右下八个像素点的颜色值
    for a in range(row - 1):
        for b in range(col - 1):
            count = 0
            if pixelData[a, b - 1] == newList[0]: count += 1 # 上
            if pixelData[a, b + 1] == newList[0]: count += 1 # 下
            if pixelData[a - 1, b] == newList[0]: count += 1 # 左
            if pixelData[a + 1, b] == newList[0]: count += 1 # 右
            if pixelData[a - 1, b - 1] == newList[0]: count += 1 # 左上
            if pixelData[a - 1, b + 1] == newList[0]: count += 1 # 左下
            if pixelData[a + 1, b - 1] == newList[0]: count += 1 # 右上
            if pixelData[a + 1, b + 1] == newList[0]: count += 1 # 右下
            # 统计周围四个以上的点都是背景色,则该点就是背景色,否则验证码色
            if count > 4:  pixelData[a, b] = newList[0]
    image.save(self.imageSavePath.replace("shotCode", "ProcessedImage")) # 保存处理后的验证码
    return image

def image_str(self, image):
    """识别处理后的验证码图片"""
    img = self.binarization_image(image)
    afterSpotImg = self.delete_noisy_point(img)
    pytesseract.tesseract_cmd = SC.PYTESSERACT_OCR
    # 图片转文字
    result = pytesseract.image_to_string(afterSpotImg)
    return result

if name == 'main':

from util.pySelenium import PySelenium

p = PySelenium(openType="pc")
# p.open_url(url="xxx/admin/login?redirect=%2Fadmin%2Fdashboard")
# p.sleep(2)
# IM = ImageRecognize(p).save_code_image('xpath->//div[@class="imgs"]/img')
code = ImageRecognize(p).image_str(image=Image.open(r"C:\Users\kk\Desktop\下载.jpg"))
print(code)