本地运行Agent-S,替换多模态大模型为豆包。
- 设置环境变量
OPENAI_BASE_URL: https://ark.cn-beijing.volces.com/api/v3
OPENAI_API_KEY: xxxx-xxxx-xxxx-xxxx-xxxx - 修改embedding模型
修改gui_agents/s2/core/engine.py中类名称为OpenAIEmbeddingEngine的self.model变量为豆包的向量模型名称 - 新建main.py
import io import logging import os import platform import sys import time from datetime import datetime import pyautogui from PIL import Image from gui_agents.s2.agents.agent_s import AgentS2 from gui_agents.s2.agents.grounding import OSWorldACI def scale_screen_dimensions(width: int, height: int, max_dim_size: int): scale_factor = min(max_dim_size / width, max_dim_size / height, 1) safe_width = int(width * scale_factor) safe_height = int(height * scale_factor) return safe_width, safe_height def run_agent(agent, instruction: str, scaled_width: int, scaled_height: int): obs = {} traj = "Task:\n" + instruction subtask_traj = "" for _ in range(15): # Get screen shot using pyautogui screenshot = pyautogui.screenshot() screenshot = screenshot.resize((scaled_width, scaled_height), Image.LANCZOS) # Save the screenshot to a BytesIO object buffered = io.BytesIO() screenshot.save(buffered, format="PNG") # Get the byte value of the screenshot screenshot_bytes = buffered.getvalue() # Convert to base64 string. obs["screenshot"] = screenshot_bytes # Get next action code from the agent info, code = agent.predict(instruction=instruction, observation=obs) if "done" in code[0].lower() or "fail" in code[0].lower(): if platform.system() == "Darwin": os.system( f'osascript -e \'display dialog "Task Completed" with title "OpenACI Agent" buttons "OK" default button "OK"\'' ) elif platform.system() == "Linux": os.system( f'zenity --info --title="OpenACI Agent" --text="Task Completed" --width=200 --height=100' ) agent.update_narrative_memory(traj) break if "next" in code[0].lower(): continue if "wait" in code[0].lower(): time.sleep(5) continue else: time.sleep(1.0) print("EXECUTING CODE:", code[0]) # Ask for permission before executing exec(code[0]) time.sleep(1.0) # Update task and subtask trajectories and optionally the episodic memory traj += ( "\n\nReflection:\n" + str(info["reflection"]) + "\n\n----------------------\n\nPlan:\n" + info["executor_plan"] ) subtask_traj = agent.update_episodic_memory(info, subtask_traj) current_platform = platform.system().lower() screen_width, screen_height = pyautogui.size() scaled_width, scaled_height = scale_screen_dimensions( screen_width, screen_height, max_dim_size=2400 ) # grounding_width 参数以模型为准 engine_params = {"engine_type": 'openai', "model": "doubao-1.5-vision-pro-250328"} engine_params_for_grounding = { "engine_type": "openai", "model": "doubao-1.5-vision-pro-250328", "grounding_width": grounding_width, "grounding_height": screen_height * grounding_width / screen_width, } grounding_agent = OSWorldACI( platform=current_platform, engine_params_for_generation=engine_params, engine_params_for_grounding=engine_params_for_grounding, width=screen_width, height=screen_height, ) agent = AgentS2( engine_params, grounding_agent, platform=current_platform, action_space="pyautogui", observation_type="mixed", search_engine=None, ) if __name__ == '__main__': while True: query = input("Query: ") agent.reset() # Run the agent on your own device run_agent(agent, query, scaled_width, scaled_height) response = input("Would you like to provide another query? (y/n): ") if response.lower() != "y": break - 运行
main.py
当出现矩阵维度不兼容错误时修改 gui_agents/s2/core/knowledge.py中的KnowledgeBase类的retrieve_narrative_experience以及retrieve_episodic_experience方法,使维度对其即可。以下为示例代码。
class KnowledgeBase:
def retrieve_narrative_experience(self, instruction: str) -> Tuple[str, str]:
"""Retrieve narrative experience using embeddings"""
knowledge_base = load_knowledge_base(self.narrative_memory_path)
if not knowledge_base:
return "None", "None"
embeddings = load_embeddings(self.embeddings_path)
# Get or create instruction embedding
instruction_embedding = embeddings.get(instruction)
if instruction_embedding is None:
instruction_embedding = self.embedding_engine.get_embeddings(instruction)
embeddings[instruction] = instruction_embedding
target_dim = instruction_embedding.shape[1]
# Get or create embeddings for knowledge base entries
candidate_embeddings = []
for key in knowledge_base:
candidate_embedding = embeddings.get(key)
if candidate_embedding is None:
candidate_embedding = self.embedding_engine.get_embeddings(key)
embeddings[key] = candidate_embedding
current_dim = candidate_embedding.shape[1]
if current_dim > target_dim:
# 如果当前维度大于目标维度,进行截断
candidate_embedding = candidate_embedding[:, :target_dim]
elif current_dim < target_dim:
# 如果当前维度小于目标维度,进行填充
padding = target_dim - current_dim
candidate_embedding = np.pad(candidate_embedding, ((0, 0), (0, padding)), mode='constant',
constant_values=0)
candidate_embeddings.append(candidate_embedding)
save_embeddings(self.embeddings_path, embeddings)
similarities = cosine_similarity(
instruction_embedding, np.vstack(candidate_embeddings)
)[0]
sorted_indices = np.argsort(similarities)[::-1]
keys = list(knowledge_base.keys())
idx = 1 if keys[sorted_indices[0]] == instruction else 0
return keys[sorted_indices[idx]], knowledge_base[keys[sorted_indices[idx]]]
def retrieve_episodic_experience(self, instruction: str) -> Tuple[str, str]:
"""Retrieve similar task experience using embeddings"""
knowledge_base = load_knowledge_base(self.episodic_memory_path)
if not knowledge_base:
return "None", "None"
embeddings = load_embeddings(self.embeddings_path)
# Get or create instruction embedding
instruction_embedding = embeddings.get(instruction)
if instruction_embedding is None:
instruction_embedding = self.embedding_engine.get_embeddings(instruction)
embeddings[instruction] = instruction_embedding
target_dim = instruction_embedding.shape[1]
# Get or create embeddings for knowledge base entries
candidate_embeddings = []
for key in knowledge_base:
candidate_embedding = embeddings.get(key)
if candidate_embedding is None:
candidate_embedding = self.embedding_engine.get_embeddings(key)
embeddings[key] = candidate_embedding
current_dim = candidate_embedding.shape[1]
if current_dim > target_dim:
# 如果当前维度大于目标维度,进行截断
candidate_embedding = candidate_embedding[:, :target_dim]
elif current_dim < target_dim:
# 如果当前维度小于目标维度,进行填充
padding = target_dim - current_dim
candidate_embedding = np.pad(candidate_embedding, ((0, 0), (0, padding)), mode='constant',
constant_values=0)
candidate_embeddings.append(candidate_embedding)
save_embeddings(self.embeddings_path, embeddings)
similarities = cosine_similarity(
instruction_embedding, np.vstack(candidate_embeddings)
)[0]
sorted_indices = np.argsort(similarities)[::-1]
keys = list(knowledge_base.keys())
idx = 1 if keys[sorted_indices[0]] == instruction else 0
return keys[sorted_indices[idx]], knowledge_base[keys[sorted_indices[idx]]]

浙公网安备 33010602011771号