Ghibli-style Diffusion AI Deployment

Text-to-Image version (T2I)

from diffusers import StableDiffusionPipeline, DPMSolverMultistepScheduler
import torch
import matplotlib.pyplot as plt

# --- Configuration ---
model_id = "nitrosocke/Ghibli-Diffusion"
output_height = 800
output_width = 1600
num_steps = 50
cfg_scale = 7

prompt = "ghibli style beautiful Caribbean beach tropical (sunset)"
negative_prompt = "soft blurry" 

device = "cuda"
dtype = torch.float32

# --- Load Pipeline at Maximum Capacity ---
pipe = StableDiffusionPipeline.from_pretrained(
    model_id,
    torch_dtype=dtype,
    safety_checker=None  # Disable for faster inference
).to(device)

# Configure DPM++ 2M SDE Karras sampler
pipe.scheduler = DPMSolverMultistepScheduler.from_config(
    pipe.scheduler.config,
    algorithm_type="sde-dpmsolver++",  # Stochastic Differential Equation variant
    solver_type="midpoint",            # 2M (second-order midpoint method)
    use_karras_sigmas=True,            # Enable Karras noise schedule
    solver_order=2                     # Second-order solver
)

# --- Generation with Full GPU Utilization ---
print(f"Generating {output_width}x{output_height} image...")
generator = torch.Generator(device).manual_seed(1529856912)  # Fixed seed for reproducibility

image = pipe(
    prompt=prompt,
    negative_prompt=negative_prompt,
    height=output_height,
    width=output_width,
    num_inference_steps=num_steps,
    guidance_scale=cfg_scale,
    generator=generator
).images[0]

# --- Save Output ---
image.save("ghibli_ultra_hd.png")

Image-to-Image version (I2I)

from diffusers import StableDiffusionImg2ImgPipeline, DPMSolverMultistepScheduler # Changed Pipeline
from PIL import Image # Added for image loading
import torch
import matplotlib.pyplot as plt # Keep for potential display later

# --- Configuration ---
model_id = "nitrosocke/Ghibli-Diffusion"
input_image_path = "2.jpg" # <<<----- !!! SET PATH TO YOUR INPUT IMAGE HERE !!!
output_filename = "ghibli_i2i_output.png" # Changed output name

# --- I2I Specific Parameters ---
denoising_strength = 0.6 # Crucial for I2I: 0.0 = no change, 1.0 = almost full change (like T2I)
                         # Good values often range from 0.5 to 0.85 depending on desired effect

# --- General Parameters (adjust as needed) ---
num_steps = 50
cfg_scale = 7
# Note: Output dimensions are typically inferred from the input image in I2I
# You can resize the init_image before passing it if needed, or sometimes pass height/width to pipe to resize.

prompt = "ghibli style"
negative_prompt = "soft blurry, low quality, text, signature, watermark, deformed" # Often good to enhance negative prompt for I2I

device = "cuda" if torch.cuda.is_available() else "cpu" # Make device selection dynamic
dtype = torch.float16 if device == "cuda" else torch.float32 # Use float16 on GPU for speed/memory

# --- Load Input Image ---
try:
    init_image = Image.open(input_image_path).convert("RGB")
    print(f"Loaded input image from: {input_image_path} (Size: {init_image.size})")
    # Optional: Resize the input image if needed before passing to the pipeline
    desired_width = 800
    desired_height = 600
    print(f"Resizing input image to {desired_width}x{desired_height}")
    init_image = init_image.resize((desired_width, desired_height))
except FileNotFoundError:
    print(f"Error: Input image not found at '{input_image_path}'. Please set the correct path.")
    exit()
except Exception as e:
    print(f"Error loading image: {e}")
    exit()

# --- Load Image-to-Image Pipeline ---
print("Loading Image-to-Image pipeline...")
pipe = StableDiffusionImg2ImgPipeline.from_pretrained( # Changed class here
    model_id,
    torch_dtype=dtype,
    safety_checker=None  # Disable for faster inference (be mindful of potential outputs)
).to(device)
print("Pipeline loaded.")

# Configure DPM++ SDE Karras sampler (This part remains the same)
print("Configuring scheduler...")
pipe.scheduler = DPMSolverMultistepScheduler.from_config(
    pipe.scheduler.config,
    algorithm_type="sde-dpmsolver++",  # Stochastic Differential Equation variant
    solver_type="midpoint",            # 2M (second-order midpoint method)
    use_karras_sigmas=True,            # Enable Karras noise schedule
    solver_order=2                     # Second-order solver
)
print("Scheduler configured.")

# --- Generation with Image-to-Image ---
print(f"Generating image-to-image with strength {denoising_strength}...")
# Use a fixed seed for reproducibility, change if you want variation
generator = torch.Generator(device).manual_seed(1529856912)

# Perform inference
image = pipe(
    prompt=prompt,
    negative_prompt=negative_prompt,
    image=init_image,                  # <<< Pass the loaded input image
    strength=denoising_strength,       # <<< Pass the denoising strength
    num_inference_steps=num_steps,
    guidance_scale=cfg_scale,
    generator=generator
).images[0]
print("Generation complete.")

# --- Save Output ---
image.save(output_filename)
print(f"Saved output image to: {output_filename}")

# Optional: Display the image if matplotlib is available and in an interactive environment
# try:
#    plt.imshow(image)
#    plt.axis('off')
#    plt.show()
# except NameError:
#    pass # Handle case where plt might not be fully imported/available

recommended VRAM: >=8G

posted @ 2025-04-09 14:30  Eureka_Zang  阅读(90)  评论(0)    收藏  举报