import pyautogui
import time
import pytesseract
from PIL import Image
import cv2
import numpy as np
# pip install pyautogui pytesseract opencv-python pillow numpy
def find_text_on_screen(target_text, region=None, lang='eng'):
"""
在屏幕上查找指定文本并返回其位置
:param target_text: 要查找的目标文本
:param region: 截图区域 (left, top, width, height),None 表示全屏
:param lang: OCR识别语言(如 'eng' 英文, 'chi_sim' 简体中文)
:return: 文本位置元组 (left, top, width, height) 或 None
"""
# 1. 截取屏幕(全屏或指定区域)
screenshot = pyautogui.screenshot(region=region)
# 2. 将截图转为 OpenCV 格式(可选,用于图像预处理)
img_cv = cv2.cvtColor(np.array(screenshot), cv2.COLOR_RGB2BGR)
# 3. OCR识别屏幕文本(获取文本内容和位置信息)
# config参数:--psm 6 表示假设图像是单一均匀的文本块
details = pytesseract.image_to_data(
img_cv,
lang=lang,
output_type=pytesseract.Output.DICT,
config='--psm 6'
)
# 4. 遍历识别结果,查找目标文本
for i in range(len(details['text'])):
text = details['text'][i].strip()
if target_text.lower() in text.lower():
# 获取文本的位置和尺寸
left = details['left'][i]
top = details['top'][i]
width = details['width'][i]
height = details['height'][i]
# 如果是区域截图,需要加上区域的偏移量
if region:
left += region[0]
top += region[1]
return (left, top, width, height)
return None
def click_text_on_screen(target_text, region=None):
"""
查找屏幕上的文本并点击其中心位置
"""
text_position = find_text_on_screen(target_text, region)
if text_position:
# 计算文本中心坐标
center_x = text_position[0] + text_position[2] // 2
center_y = text_position[1] + text_position[3] // 2
# 点击文本中心
pyautogui.click(center_x, center_y)
print(f"已点击文本 '{target_text}',位置:({center_x}, {center_y})")
return True
else:
print(f"未找到文本 '{target_text}'")
return False
list = [
{
'action': 'click',
'type': 'position',
'position': [790, 900],
'desc': 'click 1'
},
{
'action': 'sleep',
'duration': 3,
'desc': 'sleep 3s'
},
{
'action': 'click',
'type': 'img',
'path': './screen/a.png',
'desc': 'click 2'
},
{
'action': 'capture_full_screen',
'path': '/Users/ii/screen/full.png',
'desc': 'capture full screen'
},
{
'action': 'click_text',
'text': 'Allow',
'desc': 'click Allow'
},
{
'action': 'capture_region',
'region': [100, 100, 200, 200],
'path': './screen/region.png',
'desc': 'capture region'
}
]
for item in list:
if item['action'] == 'click':
if item['type'] == 'position':
pyautogui.click(item['position'][0], item['position'][1])
print(f'{item["desc"]} 点击成功')
if item['type'] == 'img':
location = pyautogui.locateOnScreen(item['path'], confidence=0.8, grayscale=True)
if location:
x, y = pyautogui.center(location)
x = x / 2
y = y / 2
pyautogui.click(x, y)
print(f'{item["desc"]} 点击成功')
else:
print(f'{item["desc"]} 点击失败')
if item['action'] == 'sleep':
time.sleep(item['duration'])
print(f'{item["desc"]} 等待成功')
if item['action'] == 'capture_full_screen':
# pyautogui.screenshot(filename=item['path'])
screenshot = pyautogui.screenshot() # 先获取截图对象
screenshot.save(item['path'])
print(f'{item["desc"]} 截图成功')
if item['action'] == 'capture_region':
region = item['region']
# pyautogui.screenshot(filename=item['path'], region=region)
screenshot = pyautogui.screenshot(region=region) # 先获取截图对象
screenshot.save(item['path'])
print(f'{item["desc"]} 截图成功')
if item['action'] == 'click_text':
if click_text_on_screen(item['text']):
print(f'{item["desc"]} 点击成功')
else:
print(f'{item["desc"]} 点击失败')
# pyautogui.locateOnWindow('./screen/chrome.png')
# image_path = 'screen/a.png'
# location = pyautogui.locateOnScreen(image=image_path, confidence=0.8, grayscale=True)
# time.sleep(3)
# if location:
# print("找到图片位置:", location)
# x, y = pyautogui.center(location)
# x = x/2
# y= y/2
# print(f"准备MoveTo坐标:({x}, {y})")
# pyautogui.moveTo(x=x, y=y, duration=0.5) # 缓慢移动,便于观察
# time.sleep(0.2) # 短暂停顿
# x, y = pyautogui.position()
# print(f"当前坐标:({x}, {y})")
# pyautogui.click(x,y) # 明确点击参数
# x, y = pyautogui.position()
# print("点击位置:", x, y)
# pyautogui.click(x=355,y=101)
# pyautogui.typewrite('baidu.com')
# pyautogui.typewrite(['enter'])
# else:
# print("未找到图片")
# print(pyautogui.position())
# print(pyautogui.resolution())