import os os.environ.setdefault("HF_HOME", "/data/.cache/huggingface") os.environ.setdefault("HF_MODULES_CACHE", "/tmp/hf_modules") os.environ.setdefault("MPLCONFIGDIR", "/tmp/matplotlib") os.environ.setdefault("TOKENIZERS_PARALLELISM", "false") os.environ.setdefault("GRADIO_SSR_MODE", "false") os.environ.setdefault("PYTORCH_CUDA_ALLOC_CONF", "expandable_segments:True") import spaces import torch import gradio as gr from diffusers import ZImagePipeline import random import warnings import logging warnings.filterwarnings("ignore") logging.getLogger("transformers").setLevel(logging.ERROR) MODEL_ID = "Tongyi-MAI/Z-Image-Turbo" _pipeline = None def _load_pipeline(): print(f"Loading ZImagePipeline from {MODEL_ID}...") try: pipe = ZImagePipeline.from_pretrained( MODEL_ID, torch_dtype=torch.bfloat16, low_cpu_mem_usage=False, ) pipe.to("cuda") print("Model loaded successfully.") return pipe except Exception as e: print(f"Error loading model: {e}") import traceback traceback.print_exc() return None # Load at module level for ZeroGPU module-level CUDA emulation _pipeline = _load_pipeline() def health(): """Cheap health endpoint. Must not load weights, run GPU work, or download large files.""" return { "status": "healthy" if _pipeline is not None else "unhealthy", "model_loaded": _pipeline is not None, "model_id": MODEL_ID, } @spaces.GPU(duration=120) def generate( prompt: str, resolution: str, seed: int, random_seed: bool, num_inference_steps: int, progress=gr.Progress(track_tqdm=True), ): if _pipeline is None: raise gr.Error("Model not loaded. Please check Space logs.") if not prompt or not prompt.strip(): raise gr.Error("Please enter a prompt.") if random_seed: seed = random.randint(1, 1000000) try: width, height = map(int, resolution.split("x")) except Exception: width, height = 1024, 1024 generator = torch.Generator("cuda").manual_seed(int(seed)) with torch.no_grad(): image = _pipeline( prompt=prompt, height=height, width=width, num_inference_steps=int(num_inference_steps), guidance_scale=0.0, generator=generator, ).images[0] return image, str(seed) RESOLUTIONS = [ "1024x1024", "1152x896", "896x1152", "1280x720", "720x1280", "1536x1024", "1024x1536", ] EXAMPLES = [ ["A serene mountain landscape at sunset", "1024x1024", 42, True, 9], ["A futuristic city with neon lights and flying cars", "1024x1024", 42, True, 9], ["Young Chinese woman in red Hanfu, intricate embroidery", "1024x1024", 42, True, 9], ] with gr.Blocks(title="Z-Image-Turbo") as demo: gr.Markdown( """

⚡ Z-Image-Turbo

Efficient 6B text-to-image generation with 8-step diffusion

""" ) with gr.Row(): with gr.Column(scale=1): prompt_input = gr.Textbox( label="Prompt", lines=3, placeholder="Describe the image you want to generate...", ) resolution_input = gr.Dropdown( choices=RESOLUTIONS, value="1024x1024", label="Resolution", ) with gr.Row(): seed_input = gr.Number(label="Seed", value=42, precision=0) random_seed_input = gr.Checkbox(label="Random Seed", value=True) steps_input = gr.Slider( label="Inference Steps", minimum=1, maximum=50, value=9, step=1, info="Turbo uses 8-9 steps for best speed/quality", ) generate_btn = gr.Button("Generate", variant="primary") gr.Examples( examples=EXAMPLES, inputs=[ prompt_input, resolution_input, seed_input, random_seed_input, steps_input, ], cache_examples=False, label="Example Prompts", ) with gr.Column(scale=1): output_image = gr.Image(label="Generated Image", format="png") output_seed = gr.Textbox(label="Used Seed", interactive=False) generate_btn.click( generate, inputs=[ prompt_input, resolution_input, seed_input, random_seed_input, steps_input, ], outputs=[output_image, output_seed], api_name="generate", ) # Hidden health endpoint (cheap, no GPU, no weights) with gr.Row(visible=False): _health_btn = gr.Button("Health") _health_out = gr.JSON() _health_btn.click( health, inputs=[], outputs=_health_out, api_name="health", ) if __name__ == "__main__": demo.launch()