局部视觉语言模型(VLMs)中的结构化输出
通过使用结构化输出,你可以将提示词转化为干净、易于维护的代码,同时利用 Pydantic 强大的验证功能。这种方法为构建 RAG 应用、错误管理以及实现 LLM 优雅重试机制提供了坚实基础。
from mlx_vlm import load, apply_chat_template, generate
from mlx_vlm.utils import load_image
import json
from pydantic import BaseModel, Field, ConfigDict
class Object(BaseModel):
name: str
description: str = Field(..., description="short description")
x: float = Field(..., description="x coordinate of the object")
y: float = Field(..., description="y coordinate of the object")
model_config = ConfigDict(
json_schema_extra={
"example": [
{
"name": "object 1",
"description": "object 1 description",
"x": 13.3,
"y": 59.6,
}
]
}
)
class ObjectList(BaseModel):
object_list: list[Object]
model, processor = load("mlx-community/Molmo-7B-D-0924-4bit", processor_config={"trust_remote_code": True})
config = model.config
image_path = "test.jpg"
image = load_image(image_path)
messages = [
{
"role": "user",
"content": f"""identify the main 3 objects in the image Your response should Return the correct JSON response within a ```json codeblock. not the JSON_SCHEMA".
JSON schema: {json.dumps(ObjectList.model_json_schema())}. Also use the provided example to format your json.
""",
}
]
prompt = apply_chat_template(processor, config, messages)
output = generate(model, processor, image, prompt, max_tokens=1200, temperature=0.7)
print(output)
cleaned_json = output.strip().replace("json", "").replace("", "").strip()
object_list = ObjectList.model_validate_json(cleaned_json)
print(object_list)

浙公网安备 33010602011771号