from aura_sr import AuraSR
import requests
from io import BytesIO
from PIL import Image
import torch
def load_image_from_url(url):
response = requests.get(url)
image_data = BytesIO(response.content)
return Image.open(image_data)
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
DTYPE = torch.get_default_dtype()
torch.set_default_dtype(DTYPE)
torch.set_default_device(DEVICE)
original_load = torch.load
torch.load = lambda *args, **kwargs: original_load(
*args, **kwargs, map_location=DEVICE
)
aura_sr = AuraSR.from_pretrained("fal/AuraSR-v2")
torch.load = original_load
image = Image.open("746.jpg")
upscaled_image = aura_sr.upscale_4x_overlapped(image)
save_params = {"format": "jpeg"}
upscaled_image.save("./iguana_output2.jpeg", **save_params)
cd C:\Users\admin\.cache\huggingface\hub\models--fal--AuraSR-v2
mkdir blobs
mkdir refs
mkdir snapshots
echo ff452185a7c8b51206dd62c21c292e7baad5c3a3 > refs/main
cd snapshots
mkdir ff452185a7c8b51206dd62c21c292e7baad5c3a3
cd ff452185a7c8b51206dd62c21c292e7baad5c3a3
# 解压文件内容
# md5 来自commit https://huggingface.co/fal/AuraSR-v2/commit/ff452185a7c8b51206dd62c21c292e7baad5c3a3
import torch
from PIL import Image
# from transformers import AutoProcessor, Qwen2VLForConditionalGeneration
from modelscope import AutoModelForCausalLM, AutoTokenizer,Qwen2VLForConditionalGeneration,AutoProcessor
import os
import time
start_time = time.time()
# os.environ['MODELSCOPE_CACHE'] = '您希望的下载路径'
# model = Qwen2VLForConditionalGeneration.from_pretrained("allenai/olmOCR-7B-0225-preview", torch_dtype=torch.bfloat16).eval()
model = Qwen2VLForConditionalGeneration.from_pretrained(rf"C:\Users\admin\.cache\modelscope\hub\models\allenai\olmOCR-7B-0225-preview",torch_dtype=torch.bfloat16,local_files_only=True,force_download=False,resume_download=False).eval().half()
processor = AutoProcessor.from_pretrained(rf"C:\Users\admin\.cache\modelscope\hub\models\allenai\olmOCR-7B-0225-preview",use_fast=True)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model_load_duration = time.time() - start_time # Modellladezeit messen
print(f"Model Load on: {model_load_duration:.2f} Seconds\n")
# image_folder = "../Bilder/Diagramme und infografische Elemente"
output_folder = "./Modell_Output/"
durations_path = os.path.join(output_folder, "durations.txt")
with open(durations_path, "w", encoding="utf-8") as durations_file:
durations_file.write(f"Model Load on: {model_load_duration:.2f} Seconds\n")
image_path=rf"C:\Users\admin\Desktop\temp\746.jpg"
image = Image.open(image_path).convert('RGB')
start_time = time.time()
prompt = "Below is the image of one page of a document. Just return the plain text representation of this document as if you were reading it naturally.Do not hallucinate."
messages = [
{
"role": "user",
"content": [
{"type": "image", "image": image},
# {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_base64}"}},
{"type": "text", "text": prompt},
],
}
]
text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
print(text)
print("==========")
inputs = processor(
text=[text],
images=[image],
padding=True,
return_tensors="pt",
)
inputs = {key: value.to(device) for (key, value) in inputs.items()}
output = model.generate(
**inputs,
temperature=0.1, # Set to 0 to make the output deterministic
max_new_tokens=1024, # Allow more space for longer text
num_return_sequences=1,
do_sample=False, # Disable randomness for precise extraction
)
# output = model.generate(
# **inputs,
# temperature=0.8, # Set to 0 to make the output deterministic
# max_new_tokens=4096, # Allow more space for longer text
# num_return_sequences=1,
# do_sample=True, # Disable randomness for precise extraction
# )
prompt_length = inputs["input_ids"].shape[1]
print(prompt_length)
print("==========")
new_tokens = output[:, prompt_length:]
print(new_tokens)
print("==========")
text_output = processor.tokenizer.batch_decode(new_tokens, skip_special_tokens=True, clean_up_tokenization_spaces=False)
print(text_output)
print("==========")
text_output = text_output[0].replace("\\n", "\n")
print(text_output)
print("==========")
processing_duration = time.time() - start_time
print(f"processing_duration: {processing_duration:.2f} seconds\n")
image_output_file = os.path.join(output_folder, f"22222_output.txt")
with open(image_output_file, "w", encoding="utf-8") as f:
f.write(text_output)
with open(durations_path, "a", encoding="utf-8") as durations_file:
durations_file.write(f"processing_duration: {processing_duration:.2f} seconds\n")