⚡ Z-Image-Turbo

import os
os.environ.setdefault("HF_HOME", "/data/.cache/huggingface")
os.environ.setdefault("HF_MODULES_CACHE", "/tmp/hf_modules")
os.environ.setdefault("MPLCONFIGDIR", "/tmp/matplotlib")
os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")
os.environ.setdefault("GRADIO_SSR_MODE", "false")
os.environ.setdefault("PYTORCH_CUDA_ALLOC_CONF", "expandable_segments:True")

import spaces
import torch
import gradio as gr
from diffusers import ZImagePipeline
import random
import warnings
import logging

warnings.filterwarnings("ignore")
logging.getLogger("transformers").setLevel(logging.ERROR)

MODEL_ID = "Tongyi-MAI/Z-Image-Turbo"

_pipeline = None


def _load_pipeline():
    print(f"Loading ZImagePipeline from {MODEL_ID}...")
    try:
        pipe = ZImagePipeline.from_pretrained(
            MODEL_ID,
            torch_dtype=torch.bfloat16,
            low_cpu_mem_usage=False,
        )
        pipe.to("cuda")
        print("Model loaded successfully.")
        return pipe
    except Exception as e:
        print(f"Error loading model: {e}")
        import traceback
        traceback.print_exc()
        return None


# Load at module level for ZeroGPU module-level CUDA emulation
_pipeline = _load_pipeline()


def health():
    """Cheap health endpoint. Must not load weights, run GPU work, or download large files."""
    return {
        "status": "healthy" if _pipeline is not None else "unhealthy",
        "model_loaded": _pipeline is not None,
        "model_id": MODEL_ID,
    }


@spaces.GPU(duration=120)
def generate(
    prompt: str,
    resolution: str,
    seed: int,
    random_seed: bool,
    num_inference_steps: int,
    progress=gr.Progress(track_tqdm=True),
):
    if _pipeline is None:
        raise gr.Error("Model not loaded. Please check Space logs.")

    if not prompt or not prompt.strip():
        raise gr.Error("Please enter a prompt.")

    if random_seed:
        seed = random.randint(1, 1000000)

    try:
        width, height = map(int, resolution.split("x"))
    except Exception:
        width, height = 1024, 1024

    generator = torch.Generator("cuda").manual_seed(int(seed))

    with torch.no_grad():
        image = _pipeline(
            prompt=prompt,
            height=height,
            width=width,
            num_inference_steps=int(num_inference_steps),
            guidance_scale=0.0,
            generator=generator,
        ).images[0]

    return image, str(seed)


RESOLUTIONS = [
    "1024x1024",
    "1152x896",
    "896x1152",
    "1280x720",
    "720x1280",
    "1536x1024",
    "1024x1536",
]

EXAMPLES = [
    ["A serene mountain landscape at sunset", "1024x1024", 42, True, 9],
    ["A futuristic city with neon lights and flying cars", "1024x1024", 42, True, 9],
    ["Young Chinese woman in red Hanfu, intricate embroidery", "1024x1024", 42, True, 9],
]

with gr.Blocks(title="Z-Image-Turbo") as demo:
    gr.Markdown(
        """<div align="center">
        <h1>⚡ Z-Image-Turbo</h1>
        <p>Efficient 6B text-to-image generation with 8-step diffusion</p>
        </div>"""
    )

    with gr.Row():
        with gr.Column(scale=1):
            prompt_input = gr.Textbox(
                label="Prompt",
                lines=3,
                placeholder="Describe the image you want to generate...",
            )
            resolution_input = gr.Dropdown(
                choices=RESOLUTIONS,
                value="1024x1024",
                label="Resolution",
            )
            with gr.Row():
                seed_input = gr.Number(label="Seed", value=42, precision=0)
                random_seed_input = gr.Checkbox(label="Random Seed", value=True)
            steps_input = gr.Slider(
                label="Inference Steps",
                minimum=1,
                maximum=50,
                value=9,
                step=1,
                info="Turbo uses 8-9 steps for best speed/quality",
            )
            generate_btn = gr.Button("Generate", variant="primary")

            gr.Examples(
                examples=EXAMPLES,
                inputs=[
                    prompt_input,
                    resolution_input,
                    seed_input,
                    random_seed_input,
                    steps_input,
                ],
                cache_examples=False,
                label="Example Prompts",
            )

        with gr.Column(scale=1):
            output_image = gr.Image(label="Generated Image", format="png")
            output_seed = gr.Textbox(label="Used Seed", interactive=False)

    generate_btn.click(
        generate,
        inputs=[
            prompt_input,
            resolution_input,
            seed_input,
            random_seed_input,
            steps_input,
        ],
        outputs=[output_image, output_seed],
        api_name="generate",
    )

    # Hidden health endpoint (cheap, no GPU, no weights)
    with gr.Row(visible=False):
        _health_btn = gr.Button("Health")
        _health_out = gr.JSON()
    _health_btn.click(
        health,
        inputs=[],
        outputs=_health_out,
        api_name="health",
    )

if __name__ == "__main__":
    demo.launch()