pyautogui

import pyautogui
import time

import pytesseract
from PIL import Image
import cv2
import numpy as np
# pip install pyautogui pytesseract opencv-python pillow numpy

def find_text_on_screen(target_text, region=None, lang='eng'):
    """
    在屏幕上查找指定文本并返回其位置
    :param target_text: 要查找的目标文本
    :param region: 截图区域 (left, top, width, height)，None 表示全屏
    :param lang: OCR识别语言（如 'eng' 英文, 'chi_sim' 简体中文）
    :return: 文本位置元组 (left, top, width, height) 或 None
    """
    # 1. 截取屏幕（全屏或指定区域）
    screenshot = pyautogui.screenshot(region=region)
    
    # 2. 将截图转为 OpenCV 格式（可选，用于图像预处理）
    img_cv = cv2.cvtColor(np.array(screenshot), cv2.COLOR_RGB2BGR)
    
    # 3. OCR识别屏幕文本（获取文本内容和位置信息）
    # config参数：--psm 6 表示假设图像是单一均匀的文本块
    details = pytesseract.image_to_data(
        img_cv, 
        lang=lang, 
        output_type=pytesseract.Output.DICT,
        config='--psm 6'
    )
    
    # 4. 遍历识别结果，查找目标文本
    for i in range(len(details['text'])):
        text = details['text'][i].strip()
        if target_text.lower() in text.lower():
            # 获取文本的位置和尺寸
            left = details['left'][i]
            top = details['top'][i]
            width = details['width'][i]
            height = details['height'][i]
            
            # 如果是区域截图，需要加上区域的偏移量
            if region:
                left += region[0]
                top += region[1]
            
            return (left, top, width, height)
    
    return None

def click_text_on_screen(target_text, region=None):
    """
    查找屏幕上的文本并点击其中心位置
    """
    text_position = find_text_on_screen(target_text, region)
    if text_position:
        # 计算文本中心坐标
        center_x = text_position[0] + text_position[2] // 2
        center_y = text_position[1] + text_position[3] // 2
        
        # 点击文本中心
        pyautogui.click(center_x, center_y)
        print(f"已点击文本 '{target_text}'，位置：({center_x}, {center_y})")
        return True
    else:
        print(f"未找到文本 '{target_text}'")
        return False


list = [
    {
        'action': 'click',
        'type': 'position',
        'position': [790, 900],
        'desc': 'click 1'
    },
    {
        'action': 'sleep',
        'duration': 3,
        'desc': 'sleep 3s'
    },
    {
        'action': 'click',
        'type': 'img',
        'path': './screen/a.png',
        'desc': 'click 2'
    },
    {
        'action': 'capture_full_screen',
        'path': '/Users/ii/screen/full.png',
        'desc': 'capture full screen'
    },
    {
        'action': 'click_text',
        'text': 'Allow',
        'desc': 'click Allow'
    },
    {
        'action': 'capture_region',
        'region': [100, 100, 200, 200],
        'path': './screen/region.png',
        'desc': 'capture region'
    }
]

for item in list:
    if item['action'] == 'click':
        if item['type'] == 'position':
            pyautogui.click(item['position'][0], item['position'][1])
            print(f'{item["desc"]} 点击成功')
        if item['type'] == 'img':
            location = pyautogui.locateOnScreen(item['path'], confidence=0.8, grayscale=True)
            if location:
                x, y = pyautogui.center(location)
                x = x / 2
                y = y / 2
                pyautogui.click(x, y)
                print(f'{item["desc"]} 点击成功')
            else:
                print(f'{item["desc"]} 点击失败')
    if item['action'] == 'sleep':
        time.sleep(item['duration'])
        print(f'{item["desc"]} 等待成功')
    if item['action'] == 'capture_full_screen':
        # pyautogui.screenshot(filename=item['path'])
        screenshot = pyautogui.screenshot()  # 先获取截图对象
        screenshot.save(item['path']) 
        print(f'{item["desc"]} 截图成功')
    if item['action'] == 'capture_region':
        region = item['region']
        # pyautogui.screenshot(filename=item['path'], region=region)
        screenshot = pyautogui.screenshot(region=region)  # 先获取截图对象
        screenshot.save(item['path']) 
        print(f'{item["desc"]} 截图成功')
    if item['action'] == 'click_text':
        if click_text_on_screen(item['text']):
            print(f'{item["desc"]} 点击成功')
        else:
            print(f'{item["desc"]} 点击失败')


# pyautogui.locateOnWindow('./screen/chrome.png')

# image_path = 'screen/a.png'
# location = pyautogui.locateOnScreen(image=image_path, confidence=0.8, grayscale=True)
# time.sleep(3) 
# if location:
#     print("找到图片位置:", location)
#     x, y = pyautogui.center(location)

#     x = x/2
#     y= y/2
#     print(f"准备MoveTo坐标：({x}, {y})")
#     pyautogui.moveTo(x=x, y=y, duration=0.5)  # 缓慢移动，便于观察
#     time.sleep(0.2)  # 短暂停顿
#     x, y = pyautogui.position()
#     print(f"当前坐标：({x}, {y})")
#     pyautogui.click(x,y)  # 明确点击参数
#     x, y = pyautogui.position()
#     print("点击位置:", x, y)

#     pyautogui.click(x=355,y=101)
#     pyautogui.typewrite('baidu.com')
#     pyautogui.typewrite(['enter'])
# else:
#     print("未找到图片")
    
# print(pyautogui.position())
# print(pyautogui.resolution())
posted @ 2025-11-30 22:46 zyip 阅读(0) 评论(0) 收藏举报
刷新页面返回顶部
zyip

pyautogui

公告