部分代码可以替换成自己的,除了比较扭曲的验证码识别不了,其他都可以实现识别
[点击查看详情获取验证码](github.crmeb.net/u/fei)
from PIL import Image from pytesseract import pytesseract
import re
class ImageRecognize:
def __init__(self, driver):
self.base = BasePages(driver)
self.imageSavePath = FilePathConfig.CODE_IMG_SAVE_PATH + "\\" + "shotCode.png"
def save_code_image(self, elementPath, zoomNum=1.25):
"""
截图保存验证码图片
:param elementPath:
:param zoomNum: 电脑屏幕缩放比例,125% , zoom = 1.25
:return:
"""
self.base.driver.origin_driver.get_screenshot_as_file(self.imageSavePath)
imageData = self.base.driver.get_location(elementPath)
# 图片左右高低尺寸坐标,要乘以屏幕缩放比例
left = imageData["x"] * zoomNum
top = imageData["y"] * zoomNum
right = left + imageData["width"] * zoomNum
bottom = top + imageData["height"] * zoomNum
self.imageObj = Image.open(self.imageSavePath)
codeImage = self.imageObj.crop((left, top, right, bottom))
codeImage.save(self.imageSavePath)
return codeImage
def binarization_image(self, image):
"""
验证码图片二值化转化成黑白色
:param image: 图片保存对象
:return:
"""
imageCode = image.convert("L")
pixelData = imageCode.load()
row, col = image.size
threshold = 150 # 150 灰色
for i in range(row):
for y in range(col):
if pixelData[i, y] > threshold:
pixelData[i, y] = 0
else:
pixelData[i, y] = 255
return imageCode
def delete_noisy_point(self, image):
"""
降噪,删除多余的干扰线像素点
:param image: 图片对象
:return:
"""
pixelData = image.load()
row, col = image.size
# 判断图片中黑白像素点的多少,判断那种颜色是背景色,那个是验证码颜色
poxList = []
for x in range(row - 1):
for y in range(col - 1):
poxList.append(pixelData[x, y])
# 按像素点多少降序排列,多的是背景,少的是验证码
newList = sorted(set(poxList), key=lambda x: poxList.count(x), reverse=True)
# 循环判断每个像素点上下左右,左上,右上,左下,右下八个像素点的颜色值
for a in range(row - 1):
for b in range(col - 1):
count = 0
if pixelData[a, b - 1] == newList[0]: count += 1 # 上
if pixelData[a, b + 1] == newList[0]: count += 1 # 下
if pixelData[a - 1, b] == newList[0]: count += 1 # 左
if pixelData[a + 1, b] == newList[0]: count += 1 # 右
if pixelData[a - 1, b - 1] == newList[0]: count += 1 # 左上
if pixelData[a - 1, b + 1] == newList[0]: count += 1 # 左下
if pixelData[a + 1, b - 1] == newList[0]: count += 1 # 右上
if pixelData[a + 1, b + 1] == newList[0]: count += 1 # 右下
# 统计周围四个以上的点都是背景色,则该点就是背景色,否则验证码色
if count > 4: pixelData[a, b] = newList[0]
image.save(self.imageSavePath.replace("shotCode", "ProcessedImage")) # 保存处理后的验证码
return image
def image_str(self, image):
"""识别处理后的验证码图片"""
img = self.binarization_image(image)
afterSpotImg = self.delete_noisy_point(img)
pytesseract.tesseract_cmd = SC.PYTESSERACT_OCR
# 图片转文字
result = pytesseract.image_to_string(afterSpotImg)
return result
if name == 'main':
from util.pySelenium import PySelenium
p = PySelenium(openType="pc")
# p.open_url(url="xxx/admin/login?redirect=%2Fadmin%2Fdashboard")
# p.sleep(2)
# IM = ImageRecognize(p).save_code_image('xpath->//div[@class="imgs"]/img')
code = ImageRecognize(p).image_str(image=Image.open(r"C:\Users\kk\Desktop\下载.jpg"))
print(code)