图片放大AuraSR-v2 olmocr

from aura_sr import AuraSR
import requests
from io import BytesIO
from PIL import Image
import torch

def load_image_from_url(url):
    response = requests.get(url)
    image_data = BytesIO(response.content)
    return Image.open(image_data)
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
DTYPE = torch.get_default_dtype()
torch.set_default_dtype(DTYPE)
torch.set_default_device(DEVICE)
original_load = torch.load
torch.load = lambda *args, **kwargs: original_load(
    *args, **kwargs, map_location=DEVICE
)
aura_sr = AuraSR.from_pretrained("fal/AuraSR-v2")
torch.load = original_load
image = Image.open("746.jpg")
upscaled_image = aura_sr.upscale_4x_overlapped(image)
save_params = {"format": "jpeg"}
upscaled_image.save("./iguana_output2.jpeg", **save_params)
cd C:\Users\admin\.cache\huggingface\hub\models--fal--AuraSR-v2
mkdir blobs
mkdir refs
mkdir snapshots
echo ff452185a7c8b51206dd62c21c292e7baad5c3a3 > refs/main
cd snapshots
mkdir ff452185a7c8b51206dd62c21c292e7baad5c3a3
cd ff452185a7c8b51206dd62c21c292e7baad5c3a3
# 解压文件内容
# md5 来自commit https://huggingface.co/fal/AuraSR-v2/commit/ff452185a7c8b51206dd62c21c292e7baad5c3a3
import torch

from PIL import Image
# from transformers import AutoProcessor, Qwen2VLForConditionalGeneration
from modelscope import AutoModelForCausalLM, AutoTokenizer,Qwen2VLForConditionalGeneration,AutoProcessor

import os
import time
start_time = time.time()
# os.environ['MODELSCOPE_CACHE'] = '您希望的下载路径'
# model = Qwen2VLForConditionalGeneration.from_pretrained("allenai/olmOCR-7B-0225-preview", torch_dtype=torch.bfloat16).eval()
model = Qwen2VLForConditionalGeneration.from_pretrained(rf"C:\Users\admin\.cache\modelscope\hub\models\allenai\olmOCR-7B-0225-preview",torch_dtype=torch.bfloat16,local_files_only=True,force_download=False,resume_download=False).eval().half()
processor = AutoProcessor.from_pretrained(rf"C:\Users\admin\.cache\modelscope\hub\models\allenai\olmOCR-7B-0225-preview",use_fast=True)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model_load_duration = time.time() - start_time  # Modellladezeit messen
print(f"Model Load on: {model_load_duration:.2f} Seconds\n")

# image_folder = "../Bilder/Diagramme und infografische Elemente" 
output_folder = "./Modell_Output/" 
durations_path = os.path.join(output_folder, "durations.txt")
with open(durations_path, "w", encoding="utf-8") as durations_file:
    durations_file.write(f"Model Load on: {model_load_duration:.2f} Seconds\n")

image_path=rf"C:\Users\admin\Desktop\temp\746.jpg"
image = Image.open(image_path).convert('RGB')
start_time = time.time()
prompt = "Below is the image of one page of a document. Just return the plain text representation of this document as if you were reading it naturally.Do not hallucinate."
messages = [
            {
                "role": "user",
                "content": [
                    {"type": "image", "image": image},
                    # {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_base64}"}},
                    {"type": "text", "text": prompt},
                ],
            }
        ]
text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
print(text)
print("==========")
inputs = processor(
            text=[text],
            images=[image],
            padding=True,
            return_tensors="pt",
        )
inputs = {key: value.to(device) for (key, value) in inputs.items()}
output = model.generate(
    **inputs,
    temperature=0.1,  # Set to 0 to make the output deterministic
    max_new_tokens=1024,  # Allow more space for longer text
    num_return_sequences=1,
    do_sample=False,  # Disable randomness for precise extraction
)
# output = model.generate(
#     **inputs,
#     temperature=0.8,  # Set to 0 to make the output deterministic
#     max_new_tokens=4096,  # Allow more space for longer text
#     num_return_sequences=1,
#     do_sample=True,  # Disable randomness for precise extraction
# )
prompt_length = inputs["input_ids"].shape[1]
print(prompt_length)
print("==========")
new_tokens = output[:, prompt_length:]
print(new_tokens)
print("==========")
text_output = processor.tokenizer.batch_decode(new_tokens, skip_special_tokens=True, clean_up_tokenization_spaces=False)
print(text_output)
print("==========")
text_output = text_output[0].replace("\\n", "\n") 
print(text_output)
print("==========")
processing_duration = time.time() - start_time
print(f"processing_duration: {processing_duration:.2f} seconds\n")
image_output_file = os.path.join(output_folder, f"22222_output.txt")
with open(image_output_file, "w", encoding="utf-8") as f:
    f.write(text_output)
with open(durations_path, "a", encoding="utf-8") as durations_file:
           durations_file.write(f"processing_duration: {processing_duration:.2f} seconds\n")

posted @ 2025-03-11 09:44  月渊  阅读(116)  评论(0)    收藏  举报