下载地址:m.pan38.com/download.ph… 提取码:6666
高级屏幕OCR自动点击系统包含完整的错误处理、日志记录和配置管理功能。主要特点包括:1. 智能图像预处理流水线 2. 多模式文本匹配 3. 动作队列管理 4. 持续监控模式 5. 详细的日志记录系统。使用时需要先安装Tesseract OCR引擎并配置环境变量。
import cv2 import numpy as np import pyautogui import pytesseract from PIL import ImageGrab, Image import time import argparse import platform import re import sys from datetime import datetime from typing import List, Tuple, Optional, Union class ScreenOCRClicker: """ 高级屏幕OCR自动点击系统 功能: 1. 多区域并行文字识别 2. 智能图像预处理 3. 模糊匹配和精确匹配模式 4. 点击动作队列管理 5. 执行日志记录 """ VERSION = "2.1.0" DEFAULT_CONFIG = { 'preprocess': True, 'confidence': 0.85, 'delay': 0.3, 'retry': 3, 'timeout': 30, 'region': None, 'exact': False, 'log_level': 1 } def __init__(self, tesseract_path: str = None, config: dict = None): self._init_tesseract(tesseract_path) self.screen_size = pyautogui.size() self.config = {**self.DEFAULT_CONFIG, **(config or {})} self._setup_logger() self.action_queue = [] self.last_capture = None def _init_tesseract(self, path: str): try: if path: pytesseract.pytesseract.tesseract_cmd = path elif platform.system() == 'Windows': default_path = r'C:\Program Files\Tesseract-OCR\tesseract.exe' if os.path.exists(default_path): pytesseract.pytesseract.tesseract_cmd = default_path except Exception as e: self._log(f"Tesseract初始化失败: {str(e)}", 3) raise def _setup_logger(self): self.log_file = f"screen_clicker_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log" self.log_levels = { 0: 'DEBUG', 1: 'INFO', 2: 'WARNING', 3: 'ERROR' } def _log(self, message: str, level: int = 1): log_msg = f"[{datetime.now()}] [{self.log_levels.get(level, 'INFO')}] {message}" if level >= self.config['log_level']: print(log_msg) with open(self.log_file, 'a') as f: f.write(log_msg + '\n') def _preprocess_image(self, image: np.ndarray) -> np.ndarray: """高级图像预处理流水线""" try: # 转换为灰度图 gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) # 自适应阈值二值化 thresh = cv2.adaptiveThreshold( gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2) # 降噪处理 kernel = np.ones((1, 1), np.uint8) processed = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel) # 锐化处理 kernel = np.array([[-1,-1,-1], [-1,9,-1], [-1,-1,-1]]) processed = cv2.filter2D(processed, -1, kernel) return processed except Exception as e: self._log(f"图像预处理失败: {str(e)}", 3) return image def capture_screen(self, region: Tuple[int, int, int, int] = None) -> np.ndarray: """捕获屏幕或指定区域""" try: if region: x1, y1, x2, y2 = region screenshot = ImageGrab.grab(bbox=(x1, y1, x2, y2)) else: screenshot = ImageGrab.grab() self.last_capture = cv2.cvtColor(np.array(screenshot), cv2.COLOR_RGB2BGR) return self.last_capture except Exception as e: self._log(f"屏幕捕获失败: {str(e)}", 3) raise def find_text_positions( self, text: str, region: Tuple[int, int, int, int] = None, exact: bool = None, save_image: bool = False ) -> List[Tuple[int, int]]: """在屏幕中查找文本位置""" exact = exact if exact is not None else self.config['exact'] positions = [] try: screenshot = self.capture_screen(region) if self.config['preprocess']: screenshot = self._preprocess_image(screenshot) if save_image: cv2.imwrite(f"debug_{int(time.time())}.png", screenshot) # 使用Tesseract OCR识别文本 custom_config = r'--oem 3 --psm 6 -l eng+chi_sim' data = pytesseract.image_to_data( screenshot, config=custom_config, output_type=pytesseract.Output.DICT) # 分析识别结果 for i, word in enumerate(data['text']): confidence = int(data['conf'][i]) / 100 if confidence < self.config['confidence']: continue match = False if exact: match = word.lower() == text.lower() else: match = text.lower() in word.lower() if match: x, y, w, h = data['left'][i], data['top'][i], data['width'][i], data['height'][i] center_x = x + w // 2 center_y = y + h // 2 if region: center_x += region[0] center_y += region[1] positions.append((center_x, center_y)) return positions except Exception as e: self._log(f"文本识别失败: {str(e)}", 3) return [] def add_click_action( self, text: str, region: Tuple[int, int, int, int] = None, exact: bool = None, click_count: int = 1, button: str = 'left', delay: float = None, retry: int = None, timeout: float = None ): """添加点击动作到队列""" action = { 'text': text, 'region': region or self.config['region'], 'exact': exact if exact is not None else self.config['exact'], 'click_count': click_count, 'button': button, 'delay': delay or self.config['delay'], 'retry': retry or self.config['retry'], 'timeout': timeout or self.config['timeout'] } self.action_queue.append(action) self._log(f"已添加点击动作: {text}", 0) def execute_click( self, text: str, region: Tuple[int, int, int, int] = None, exact: bool = None, click_count: int = 1, button: str = 'left', delay: float = None, retry: int = None, timeout: float = None ) -> bool: """执行单个点击动作""" exact = exact if exact is not None else self.config['exact'] delay = delay or self.config['delay'] retry = retry or self.config['retry'] timeout = timeout or self.config['timeout'] start_time = time.time() attempt = 0 while attempt < retry and (time.time() - start_time) < timeout: attempt += 1 positions = self.find_text_positions(text, region, exact) if positions: x, y = positions[0] try: pyautogui.moveTo(x, y, duration=delay/2) time.sleep(delay/2) pyautogui.click(x, y, clicks=click_count, button=button) self._log(f"成功点击: {text} 位置: ({x}, {y})", 1) return True except Exception as e: self._log(f"点击失败: {str(e)}", 2) else: self._log(f"未找到文本: {text} (尝试 {attempt}/{retry})", 0) time.sleep(delay) self._log(f"点击动作失败: {text} (超时)", 2) return False def run_queue(self): """执行动作队列中的所有点击动作""" results = [] for action in self.action_queue: result = self.execute_click( action['text'], action['region'], action['exact'], action['click_count'], action['button'], action['delay'], action['retry'], action['timeout'] ) results.append(result) time.sleep(action['delay']) self.action_queue = [] return results def continuous_monitor( self, target_texts: List[str], interval: float = 5, max_attempts: int = 10, callback = None ): """持续监控屏幕寻找目标文本""" attempt = 0 while attempt < max_attempts: attempt += 1 self._log(f"监控尝试 {attempt}/{max_attempts}", 0) for text in target_texts: positions = self.find_text_positions(text) if positions: x, y = positions[0] pyautogui.click(x, y) self._log(f"监控模式下点击: {text}", 1) if callback: callback(text, (x, y)) return True time.sleep(interval) self._log("监控模式结束: 未找到目标文本", 2) return False def main(): parser = argparse.ArgumentParser( description=f"屏幕OCR自动点击器 v{ScreenOCRClicker.VERSION}", formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('text', nargs='?', help='要查找并点击的文本') parser.add_argument('--list', nargs='+', help='多个目标文本列表') parser.add_argument('--region', nargs=4, type=int, help='搜索区域 (x1 y1 x2 y2)') parser.add_argument('--exact', action='store_true', help='精确匹配模式') parser.add_argument('--preprocess', action='store_true', help='启用图像预处理') parser.add_argument('--confidence', type=float, default=0.85, help='OCR置信度阈值 (0-1)') parser.add_argument('--delay', type=float, default=0.3, help='点击间隔时间(秒)') parser.add_argument('--retry', type=int, default=3, help='重试次数') parser.add_argument('--timeout', type=float, default=30, help='超时时间(秒)') parser.add_argument('--tesseract', help='Tesseract可执行文件路径') parser.add_argument('--monitor', action='store_true', help='启用监控模式') parser.add_argument('--interval', type=float, default=5, help='监控间隔时间(秒)') parser.add_argument('--attempts', type=int, default=10, help='监控尝试次数') parser.add_argument('--debug', action='store_true', help='保存调试图像') args = parser.parse_args() config = { 'preprocess': args.preprocess, 'confidence': args.confidence, 'delay': args.delay, 'retry': args.retry, 'timeout': args.timeout, 'region': args.region, 'exact': args.exact, 'log_level': 0 if args.debug else 1 } try: clicker = ScreenOCRClicker(args.tesseract, config) if args.monitor: if not args.list: print("监控模式需要指定--list参数") return clicker.continuous_monitor( args.list, args.interval, args.attempts ) elif args.list: for text in args.list: clicker.add_click_action(text) clicker.run_queue() elif args.text: clicker.execute_click(args.text) else: parser.print_help() except Exception as e: print(f"程序错误: {str(e)}") sys.exit(1) if __name__ == "__main__": main()