docs: add canonical tooling corpus (147 files) from Google/HF/frameworks

Five-lane parallel research pass. Each subdir under tooling/ has its own README indexing downloaded files with verified upstream sources. - google-official/: deepmind-gemma JAX examples, gemma_pytorch scripts, gemma.cpp API server docs, google-gemma/cookbook notebooks, ai.google.dev HTML snapshots, Gemma 3 tech report - huggingface/: 8 gemma-4-* model cards, chat-template .jinja files, tokenizer_config.json, transformers gemma4/ source, launch blog posts, official HF Spaces app.py - inference-frameworks/: vLLM/llama.cpp/MLX/Keras-hub/TGI/Gemini API/Vertex AI comparison, run_commands.sh with 8 working launches, 9 code snippets - gemma-family/: 12 per-variant briefs (ShieldGemma 2, CodeGemma, PaliGemma 2, Recurrent/Data/Med/TxGemma, Embedding/Translate/Function/Dolphin/SignGemma) - fine-tuning/: Unsloth Gemma 4 notebooks, Axolotl YAMLs (incl 26B-A4B MoE), TRL scripts, Google cookbook fine-tune notebooks, recipe-recommendation.md Findings that update earlier CORPUS_* docs are flagged in tooling/README.md (not applied) — notably the new <|turn>/<turn|> prompt format, gemma_pytorch abandonment, gemma.cpp Gemini-API server, transformers AutoModelForMultimodalLM, FA2 head_dim=512 break, 26B-A4B MoE quantization rules, no Gemma 4 tech report PDF yet, no Gemma-4-generation specialized siblings yet. Pre-commit secrets hook bypassed per user authorization — flagged "secrets" are base64 notebook cell outputs and example Ed25519 keys in the HDP agentic-security demo, not real credentials. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 12:24:48 -04:00
parent 5011059f5d
commit eecebe7ef5
149 changed files with 181297 additions and 0 deletions
@@ -0,0 +1,13 @@
+---
+title: Gemma 4 31B It
+emoji: 🚀
+colorFrom: blue
+colorTo: green
+sdk: gradio
+sdk_version: 6.12.0
+python_version: "3.12.12"
+app_file: app.py
+pinned: false
+---
+
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
@@ -0,0 +1,303 @@
+import os
+from collections.abc import Iterator
+from threading import Thread
+
+import gradio as gr
+import spaces
+import torch
+from transformers import AutoModelForMultimodalLM, AutoProcessor, BatchFeature
+from transformers.generation.streamers import TextIteratorStreamer
+
+MODEL_ID = "google/gemma-4-31b-it"
+
+processor = AutoProcessor.from_pretrained(MODEL_ID, use_fast=False)
+model = AutoModelForMultimodalLM.from_pretrained(MODEL_ID, device_map="auto", dtype=torch.bfloat16)
+
+IMAGE_FILE_TYPES = (".jpg", ".jpeg", ".png", ".webp")
+VIDEO_FILE_TYPES = (".mp4", ".mov", ".avi", ".webm")
+MAX_INPUT_TOKENS = int(os.getenv("MAX_INPUT_TOKENS", "10_000"))
+
+THINKING_START = "<|channel>"
+THINKING_END = "<channel|>"
+
+# Special tokens to strip from decoded output (keeping thinking delimiters
+# so that Gradio's reasoning_tags can find them on the frontend).
+_KEEP_TOKENS = {THINKING_START, THINKING_END}
+_STRIP_TOKENS = sorted(
+    (t for t in processor.tokenizer.all_special_tokens if t not in _KEEP_TOKENS),
+    key=len,
+    reverse=True,  # longest first to avoid partial matches
+)
+
+
+def _strip_special_tokens(text: str) -> str:
+    for tok in _STRIP_TOKENS:
+        text = text.replace(tok, "")
+    return text
+
+
+def _classify_file(path: str) -> str | None:
+    """Return media type string for a file path, or None if unsupported."""
+    lower = path.lower()
+    if lower.endswith(IMAGE_FILE_TYPES):
+        return "image"
+    if lower.endswith(VIDEO_FILE_TYPES):
+        return "video"
+    return None
+
+
+def process_new_user_message(message: dict) -> list[dict]:
+    """Build content list from the new user message with URL-based media references."""
+    content: list[dict] = []
+    for path in message.get("files", []):
+        kind = _classify_file(path)
+        if kind:
+            content.append({"type": kind, "url": path})
+    content.append({"type": "text", "text": message.get("text", "")})
+    return content
+
+
+def process_history(history: list[dict]) -> list[dict]:
+    """Walk Gradio 6 history and build message list with URL-based media references."""
+    messages: list[dict] = []
+
+    for item in history:
+        if item["role"] == "assistant":
+            text_parts = [p["text"] for p in item["content"] if p.get("type") == "text"]
+            messages.append(
+                {
+                    "role": "assistant",
+                    "content": [{"type": "text", "text": " ".join(text_parts)}],
+                }
+            )
+        else:
+            user_content: list[dict] = []
+            for part in item["content"]:
+                if part.get("type") == "text":
+                    user_content.append({"type": "text", "text": part["text"]})
+                elif part.get("type") == "file":
+                    filepath = part["file"]["path"]
+                    kind = _classify_file(filepath)
+                    if kind:
+                        user_content.append({"type": kind, "url": filepath})
+            if user_content:
+                messages.append({"role": "user", "content": user_content})
+
+    return messages
+
+
+@spaces.GPU(duration=180)
+@torch.inference_mode()
+def _generate_on_gpu(inputs: BatchFeature, max_new_tokens: int, thinking: bool) -> Iterator[str]:
+    inputs = inputs.to(device=model.device, dtype=torch.bfloat16)
+
+    streamer = TextIteratorStreamer(
+        processor,
+        timeout=30.0,
+        skip_prompt=True,
+        skip_special_tokens=not thinking,
+    )
+    generate_kwargs = {
+        **inputs,
+        "streamer": streamer,
+        "max_new_tokens": max_new_tokens,
+        "disable_compile": True,
+    }
+
+    exception_holder: list[Exception] = []
+
+    def _generate() -> None:
+        try:
+            model.generate(**generate_kwargs)
+        except Exception as e:  # noqa: BLE001
+            exception_holder.append(e)
+
+    thread = Thread(target=_generate)
+    thread.start()
+
+    chunks: list[str] = []
+    for text in streamer:
+        chunks.append(text)
+        accumulated = "".join(chunks)
+        if thinking:
+            yield _strip_special_tokens(accumulated)
+        else:
+            yield accumulated
+
+    thread.join()
+    if exception_holder:
+        msg = f"Generation failed: {exception_holder[0]}"
+        raise gr.Error(msg)
+
+
+def validate_input(message: dict) -> dict:
+    has_text = bool(message.get("text", "").strip())
+    has_files = bool(message.get("files"))
+    if not (has_text or has_files):
+        return gr.validate(has_text, "Please enter a message or upload a file.")
+
+    files = message.get("files", [])
+    kinds = [_classify_file(f) for f in files]
+    kinds = [k for k in kinds if k is not None]
+    unique_kinds = set(kinds)
+
+    if len(unique_kinds) > 1:
+        return gr.validate(False, "Please upload only one type of media (images or video) at a time.")
+    if kinds.count("video") > 1:
+        return gr.validate(False, "Only one video file can be uploaded at a time.")
+
+    return gr.validate(True, "")
+
+
+def _has_media_type(messages: list[dict], media_type: str) -> bool:
+    """Check if any message contains a content entry of the given media type."""
+    return any(
+        c.get("type") == media_type for m in messages for c in (m["content"] if isinstance(m["content"], list) else [])
+    )
+
+
+def generate(
+    message: dict,
+    history: list[dict],
+    thinking: bool = False,
+    max_new_tokens: int = 1024,
+    max_soft_tokens: int = 280,
+    system_prompt: str = "",
+) -> Iterator[str]:
+
+    messages: list[dict] = []
+    if system_prompt:
+        messages.append({"role": "system", "content": [{"type": "text", "text": system_prompt}]})
+
+    messages.extend(process_history(history))
+    messages.append({"role": "user", "content": process_new_user_message(message)})
+
+    template_kwargs: dict = {
+        "tokenize": True,
+        "return_dict": True,
+        "return_tensors": "pt",
+        "add_generation_prompt": True,
+        "processor_kwargs": {"images_kwargs": {"max_soft_tokens": max_soft_tokens}},
+    }
+    if _has_media_type(messages, "video"):
+        template_kwargs["load_audio_from_video"] = False
+    if thinking:
+        template_kwargs["enable_thinking"] = True
+
+    inputs = processor.apply_chat_template(messages, **template_kwargs)
+
+    n_tokens = inputs["input_ids"].shape[1]
+    if n_tokens > MAX_INPUT_TOKENS:
+        msg = f"Input too long ({n_tokens} tokens). Maximum is {MAX_INPUT_TOKENS} tokens."
+        raise gr.Error(msg)
+
+    yield from _generate_on_gpu(inputs=inputs, max_new_tokens=max_new_tokens, thinking=thinking)
+
+
+examples = [
+    # --- Text-only examples ---
+    [
+        {
+            "text": "What is the capital of France?",
+            "files": [],
+        }
+    ],
+    [
+        {
+            "text": "What is the water formula?",
+            "files": [],
+        }
+    ],
+    [
+        {
+            "text": "Explain quantum entanglement in simple terms.",
+            "files": [],
+        }
+    ],
+    [
+        {
+            "text": "I want to do a car wash that is 50 meters away, should I walk or drive?",
+            "files": [],
+        }
+    ],
+    [
+        {
+            "text": "Write a poem about beer with 4 stanzas. Format the title as an H2 markdown heading and bold the first line of each stanza.",
+            "files": [],
+        }
+    ],
+    # --- Single-image examples ---
+    [
+        {
+            "text": "Describe this image.",
+            "files": ["https://news.bbc.co.uk/media/images/38107000/jpg/_38107299_ronaldogoal_ap_300.jpg"],
+        }
+    ],
+    [
+        {
+            "text": "What is the city in this image? Describe what you see.",
+            "files": ["https://imgmd.net/images/v1/guia/1698673/rio-de-janeiro-4-c.jpg"],
+        }
+    ],
+    # --- Multi-image examples ---
+    [
+        {
+            "text": "What are the key similarities between these three images?",
+            "files": [
+                "https://news.bbc.co.uk/media/images/38107000/jpg/_38107299_ronaldogoal_ap_300.jpg",
+                "https://ogimg.infoglobo.com.br/in/12547538-502-0e0/FT1086A/94-8705-14.jpg",
+                "https://amazonasatual.com.br/wp-content/uploads/2021/01/Pele.jpg",
+            ],
+        }
+    ],
+    # --- Video examples ---
+    [
+        {
+            "text": "What is happening in this video?",
+            "files": ["https://huggingface.co/datasets/merve/vlm_test_images/resolve/main/concert.mp4"],
+        }
+    ],
+]
+
+demo = gr.ChatInterface(
+    fn=generate,
+    validator=validate_input,
+    chatbot=gr.Chatbot(
+        scale=1,
+        latex_delimiters=[
+            {"left": "$$", "right": "$$", "display": True},
+            {"left": "$", "right": "$", "display": False},
+            {"left": "\\(", "right": "\\)", "display": False},
+            {"left": "\\[", "right": "\\]", "display": True},
+        ],
+        reasoning_tags=[(THINKING_START, THINKING_END)],
+    ),
+    textbox=gr.MultimodalTextbox(
+        sources=["upload"],
+        file_types=[*IMAGE_FILE_TYPES, *VIDEO_FILE_TYPES],
+        file_count="multiple",
+        autofocus=True,
+    ),
+    multimodal=True,
+    additional_inputs=[
+        gr.Checkbox(label="Thinking", value=False),
+        gr.Slider(label="Max New Tokens", minimum=100, maximum=4000, step=10, value=2000),
+        gr.Dropdown(
+            label="Image Token Budget",
+            info="Higher values preserve more visual detail (useful for OCR/documents). Lower values are faster.",
+            choices=[70, 140, 280, 560, 1120],
+            value=280,
+        ),
+        gr.Textbox(label="System Prompt", value=""),
+    ],
+    additional_inputs_accordion=gr.Accordion("Settings", open=True),
+    stop_btn=False,
+    title="Gemma 4 31B It",
+    examples=examples,
+    run_examples_on_click=False,
+    cache_examples=False,
+    delete_cache=(1800, 1800),
+)
+
+if __name__ == "__main__":
+    demo.launch(css_paths="style.css", max_file_size="20mb")
@@ -0,0 +1,362 @@
+# This file was autogenerated by uv via the following command:
+#    uv export --no-hashes --no-dev --group hf-spaces --no-emit-package typer-slim --no-emit-package spaces -o requirements.txt
+accelerate==1.13.0
+    # via gemma-4-31b-it
+aiohappyeyeballs==2.6.1
+    # via aiohttp
+aiohttp==3.13.5
+    # via fsspec
+aiosignal==1.4.0
+    # via aiohttp
+annotated-doc==0.0.4
+    # via
+    #   fastapi
+    #   typer
+annotated-types==0.7.0
+    # via pydantic
+anyio==4.13.0
+    # via
+    #   gradio
+    #   httpx
+    #   mcp
+    #   sse-starlette
+    #   starlette
+attrs==26.1.0
+    # via
+    #   aiohttp
+    #   jsonschema
+    #   referencing
+audioop-lts==0.2.2 ; python_full_version >= '3.13'
+    # via gradio
+brotli==1.2.0
+    # via gradio
+certifi==2026.2.25
+    # via
+    #   httpcore
+    #   httpx
+    #   requests
+cffi==2.0.0 ; platform_python_implementation != 'PyPy'
+    # via cryptography
+charset-normalizer==3.4.7
+    # via requests
+click==8.3.2
+    # via
+    #   typer
+    #   uvicorn
+colorama==0.4.6 ; sys_platform == 'win32'
+    # via
+    #   click
+    #   tqdm
+cryptography==46.0.7
+    # via pyjwt
+datasets==4.8.4
+dill==0.4.1
+    # via
+    #   datasets
+    #   multiprocess
+fastapi==0.136.0
+    # via gradio
+filelock==3.28.0
+    # via
+    #   datasets
+    #   huggingface-hub
+    #   torch
+frozenlist==1.8.0
+    # via
+    #   aiohttp
+    #   aiosignal
+fsspec==2026.2.0
+    # via
+    #   datasets
+    #   gradio-client
+    #   huggingface-hub
+    #   torch
+gradio==6.12.0
+    # via
+    #   gemma-4-31b-it
+    #   spaces
+gradio-client==2.4.1
+    # via
+    #   gradio
+    #   hf-gradio
+groovy==0.1.2
+    # via gradio
+h11==0.16.0
+    # via
+    #   httpcore
+    #   uvicorn
+hf-gradio==0.4.0
+    # via gradio
+hf-xet==1.4.3 ; platform_machine == 'AMD64' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'
+    # via huggingface-hub
+httpcore==1.0.9
+    # via httpx
+httpx==0.28.1
+    # via
+    #   datasets
+    #   gradio
+    #   gradio-client
+    #   huggingface-hub
+    #   mcp
+    #   safehttpx
+    #   spaces
+httpx-sse==0.4.3
+    # via mcp
+huggingface-hub==1.11.0
+    # via
+    #   accelerate
+    #   datasets
+    #   gradio
+    #   gradio-client
+    #   tokenizers
+    #   transformers
+idna==3.11
+    # via
+    #   anyio
+    #   httpx
+    #   requests
+    #   yarl
+jinja2==3.1.6
+    # via
+    #   gradio
+    #   torch
+jsonschema==4.26.0
+    # via mcp
+jsonschema-specifications==2025.9.1
+    # via jsonschema
+markdown-it-py==4.0.0
+    # via rich
+markupsafe==3.0.3
+    # via
+    #   gradio
+    #   jinja2
+mcp==1.27.0
+    # via gradio
+mdurl==0.1.2
+    # via markdown-it-py
+mpmath==1.3.0
+    # via sympy
+multidict==6.7.1
+    # via
+    #   aiohttp
+    #   yarl
+multiprocess==0.70.19
+    # via datasets
+networkx==3.6.1
+    # via torch
+numpy==2.4.4
+    # via
+    #   accelerate
+    #   datasets
+    #   gradio
+    #   pandas
+    #   torchvision
+    #   transformers
+nvidia-cublas-cu12==12.8.4.1 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via
+    #   nvidia-cudnn-cu12
+    #   nvidia-cusolver-cu12
+    #   torch
+nvidia-cuda-cupti-cu12==12.8.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-cuda-nvrtc-cu12==12.8.93 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-cuda-runtime-cu12==12.8.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-cudnn-cu12==9.10.2.21 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-cufft-cu12==11.3.3.83 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-cufile-cu12==1.13.1.3 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-curand-cu12==10.3.9.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-cusolver-cu12==11.7.3.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-cusparse-cu12==12.5.8.93 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via
+    #   nvidia-cusolver-cu12
+    #   torch
+nvidia-cusparselt-cu12==0.7.1 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-nccl-cu12==2.27.5 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-nvjitlink-cu12==12.8.93 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via
+    #   nvidia-cufft-cu12
+    #   nvidia-cusolver-cu12
+    #   nvidia-cusparse-cu12
+    #   torch
+nvidia-nvshmem-cu12==3.3.20 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-nvtx-cu12==12.8.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+orjson==3.11.8
+    # via gradio
+packaging==26.1
+    # via
+    #   accelerate
+    #   datasets
+    #   gradio
+    #   gradio-client
+    #   huggingface-hub
+    #   spaces
+    #   transformers
+pandas==3.0.2
+    # via
+    #   datasets
+    #   gradio
+pillow==12.2.0
+    # via
+    #   gradio
+    #   torchvision
+propcache==0.4.1
+    # via
+    #   aiohttp
+    #   yarl
+psutil==5.9.8
+    # via
+    #   accelerate
+    #   spaces
+pyarrow==23.0.1
+    # via datasets
+pycparser==3.0 ; implementation_name != 'PyPy' and platform_python_implementation != 'PyPy'
+    # via cffi
+pydantic==2.12.5
+    # via
+    #   fastapi
+    #   gradio
+    #   mcp
+    #   pydantic-settings
+    #   spaces
+pydantic-core==2.41.5
+    # via pydantic
+pydantic-settings==2.13.1
+    # via mcp
+pydub==0.25.1
+    # via gradio
+pygments==2.20.0
+    # via rich
+pyjwt==2.12.1
+    # via mcp
+python-dateutil==2.9.0.post0
+    # via pandas
+python-dotenv==1.2.2
+    # via pydantic-settings
+python-multipart==0.0.26
+    # via
+    #   gradio
+    #   mcp
+pytz==2026.1.post1
+    # via gradio
+pywin32==311 ; sys_platform == 'win32'
+    # via mcp
+pyyaml==6.0.3
+    # via
+    #   accelerate
+    #   datasets
+    #   gradio
+    #   huggingface-hub
+    #   transformers
+referencing==0.37.0
+    # via
+    #   jsonschema
+    #   jsonschema-specifications
+regex==2026.4.4
+    # via transformers
+requests==2.33.1
+    # via
+    #   datasets
+    #   spaces
+rich==15.0.0
+    # via typer
+rpds-py==0.30.0
+    # via
+    #   jsonschema
+    #   referencing
+safehttpx==0.1.7
+    # via gradio
+safetensors==0.7.0
+    # via
+    #   accelerate
+    #   transformers
+semantic-version==2.10.0
+    # via gradio
+setuptools==82.0.1
+    # via torch
+shellingham==1.5.4
+    # via typer
+six==1.17.0
+    # via python-dateutil
+sse-starlette==3.3.4
+    # via mcp
+starlette==1.0.0
+    # via
+    #   fastapi
+    #   gradio
+    #   mcp
+    #   sse-starlette
+sympy==1.14.0
+    # via torch
+tokenizers==0.22.2
+    # via transformers
+tomlkit==0.14.0
+    # via gradio
+torch==2.9.1
+    # via
+    #   accelerate
+    #   gemma-4-31b-it
+    #   torchvision
+torchcodec==0.9.1
+    # via gemma-4-31b-it
+torchvision==0.24.1
+    # via gemma-4-31b-it
+tqdm==4.67.3
+    # via
+    #   datasets
+    #   huggingface-hub
+    #   transformers
+transformers==5.5.4
+    # via gemma-4-31b-it
+triton==3.5.1 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+typer==0.24.1
+    # via
+    #   gradio
+    #   hf-gradio
+    #   huggingface-hub
+    #   transformers
+typing-extensions==4.15.0
+    # via
+    #   aiosignal
+    #   anyio
+    #   fastapi
+    #   gradio
+    #   gradio-client
+    #   huggingface-hub
+    #   mcp
+    #   pydantic
+    #   pydantic-core
+    #   referencing
+    #   spaces
+    #   starlette
+    #   torch
+    #   typing-inspection
+typing-inspection==0.4.2
+    # via
+    #   fastapi
+    #   mcp
+    #   pydantic
+    #   pydantic-settings
+tzdata==2026.1 ; sys_platform == 'emscripten' or sys_platform == 'win32'
+    # via pandas
+urllib3==2.6.3
+    # via requests
+uvicorn==0.44.0
+    # via
+    #   gradio
+    #   mcp
+xxhash==3.6.0
+    # via datasets
+yarl==1.23.0
+    # via aiohttp
@@ -0,0 +1,13 @@
+---
+title: Gemma 4 E4B It
+emoji: 🚀
+colorFrom: blue
+colorTo: green
+sdk: gradio
+sdk_version: 6.12.0
+python_version: "3.12.12"
+app_file: app.py
+pinned: false
+---
+
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
@@ -0,0 +1,322 @@
+import os
+from collections.abc import Iterator
+from threading import Thread
+
+import gradio as gr
+import spaces
+import torch
+from transformers import AutoModelForMultimodalLM, AutoProcessor, BatchFeature
+from transformers.generation.streamers import TextIteratorStreamer
+
+MODEL_ID = "google/gemma-4-e4b-it"
+
+processor = AutoProcessor.from_pretrained(MODEL_ID, use_fast=False)
+model = AutoModelForMultimodalLM.from_pretrained(MODEL_ID, device_map="auto", dtype=torch.bfloat16)
+
+IMAGE_FILE_TYPES = (".jpg", ".jpeg", ".png", ".webp")
+AUDIO_FILE_TYPES = (".wav", ".mp3", ".flac", ".ogg")
+VIDEO_FILE_TYPES = (".mp4", ".mov", ".avi", ".webm")
+MAX_INPUT_TOKENS = int(os.getenv("MAX_INPUT_TOKENS", "10_000"))
+
+THINKING_START = "<|channel>"
+THINKING_END = "<channel|>"
+
+# Special tokens to strip from decoded output (keeping thinking delimiters
+# so that Gradio's reasoning_tags can find them on the frontend).
+_KEEP_TOKENS = {THINKING_START, THINKING_END}
+_STRIP_TOKENS = sorted(
+    (t for t in processor.tokenizer.all_special_tokens if t not in _KEEP_TOKENS),
+    key=len,
+    reverse=True,  # longest first to avoid partial matches
+)
+
+
+def _strip_special_tokens(text: str) -> str:
+    for tok in _STRIP_TOKENS:
+        text = text.replace(tok, "")
+    return text
+
+
+def _classify_file(path: str) -> str | None:
+    """Return media type string for a file path, or None if unsupported."""
+    lower = path.lower()
+    if lower.endswith(IMAGE_FILE_TYPES):
+        return "image"
+    if lower.endswith(AUDIO_FILE_TYPES):
+        return "audio"
+    if lower.endswith(VIDEO_FILE_TYPES):
+        return "video"
+    return None
+
+
+def process_new_user_message(message: dict) -> list[dict]:
+    """Build content list from the new user message with URL-based media references."""
+    content: list[dict] = []
+    for path in message.get("files", []):
+        kind = _classify_file(path)
+        if kind:
+            content.append({"type": kind, "url": path})
+    content.append({"type": "text", "text": message.get("text", "")})
+    return content
+
+
+def process_history(history: list[dict]) -> list[dict]:
+    """Walk Gradio 6 history and build message list with URL-based media references."""
+    messages: list[dict] = []
+
+    for item in history:
+        if item["role"] == "assistant":
+            text_parts = [p["text"] for p in item["content"] if p.get("type") == "text"]
+            messages.append(
+                {
+                    "role": "assistant",
+                    "content": [{"type": "text", "text": " ".join(text_parts)}],
+                }
+            )
+        else:
+            user_content: list[dict] = []
+            for part in item["content"]:
+                if part.get("type") == "text":
+                    user_content.append({"type": "text", "text": part["text"]})
+                elif part.get("type") == "file":
+                    filepath = part["file"]["path"]
+                    kind = _classify_file(filepath)
+                    if kind:
+                        user_content.append({"type": kind, "url": filepath})
+            if user_content:
+                messages.append({"role": "user", "content": user_content})
+
+    return messages
+
+
+@spaces.GPU(duration=120)
+@torch.inference_mode()
+def _generate_on_gpu(inputs: BatchFeature, max_new_tokens: int, thinking: bool) -> Iterator[str]:
+    inputs = inputs.to(device=model.device, dtype=torch.bfloat16)
+
+    streamer = TextIteratorStreamer(
+        processor,
+        timeout=30.0,
+        skip_prompt=True,
+        skip_special_tokens=not thinking,
+    )
+    generate_kwargs = {
+        **inputs,
+        "streamer": streamer,
+        "max_new_tokens": max_new_tokens,
+        "disable_compile": True,
+    }
+
+    exception_holder: list[Exception] = []
+
+    def _generate() -> None:
+        try:
+            model.generate(**generate_kwargs)
+        except Exception as e:  # noqa: BLE001
+            exception_holder.append(e)
+
+    thread = Thread(target=_generate)
+    thread.start()
+
+    chunks: list[str] = []
+    for text in streamer:
+        chunks.append(text)
+        accumulated = "".join(chunks)
+        if thinking:
+            yield _strip_special_tokens(accumulated)
+        else:
+            yield accumulated
+
+    thread.join()
+    if exception_holder:
+        msg = f"Generation failed: {exception_holder[0]}"
+        raise gr.Error(msg)
+
+
+# FBT003 is suppressed below: gr.validate API takes bool as first positional arg.
+def validate_input(message: dict) -> dict:
+    has_text = bool(message.get("text", "").strip())
+    has_files = bool(message.get("files"))
+    if not (has_text or has_files):
+        return gr.validate(False, "Please enter a message or upload a file.")  # noqa: FBT003
+
+    files = message.get("files", [])
+    kinds = [_classify_file(f) for f in files]
+    kinds = [k for k in kinds if k is not None]
+    unique_kinds = set(kinds)
+
+    if len(unique_kinds) > 1:
+        return gr.validate(False, "Please upload only one type of media (images, audio, or video) at a time.")  # noqa: FBT003
+    if kinds.count("audio") > 1:
+        return gr.validate(False, "Only one audio file can be uploaded at a time.")  # noqa: FBT003
+    if kinds.count("video") > 1:
+        return gr.validate(False, "Only one video file can be uploaded at a time.")  # noqa: FBT003
+
+    return gr.validate(True, "")  # noqa: FBT003
+
+
+def _has_media_type(messages: list[dict], media_type: str) -> bool:
+    """Check if any message contains a content entry of the given media type."""
+    return any(c.get("type") == media_type for m in messages for c in m["content"])
+
+
+def generate(
+    message: dict,
+    history: list[dict],
+    thinking: bool = False,
+    max_new_tokens: int = 1024,
+    max_soft_tokens: int = 280,
+    system_prompt: str = "",
+) -> Iterator[str]:
+    messages: list[dict] = []
+    if system_prompt:
+        messages.append({"role": "system", "content": [{"type": "text", "text": system_prompt}]})
+
+    messages.extend(process_history(history))
+    messages.append({"role": "user", "content": process_new_user_message(message)})
+
+    template_kwargs: dict = {
+        "tokenize": True,
+        "return_dict": True,
+        "return_tensors": "pt",
+        "add_generation_prompt": True,
+        "load_audio_from_video": _has_media_type(messages, "video"),
+        "processor_kwargs": {"images_kwargs": {"max_soft_tokens": max_soft_tokens}},
+    }
+    if thinking:
+        template_kwargs["enable_thinking"] = True
+
+    inputs = processor.apply_chat_template(messages, **template_kwargs)
+
+    n_tokens = inputs["input_ids"].shape[1]
+    if n_tokens > MAX_INPUT_TOKENS:
+        msg = f"Input too long ({n_tokens} tokens). Maximum is {MAX_INPUT_TOKENS} tokens."
+        raise gr.Error(msg)
+
+    yield from _generate_on_gpu(inputs=inputs, max_new_tokens=max_new_tokens, thinking=thinking)
+
+
+examples = [
+    # --- Text-only examples ---
+    [
+        {
+            "text": "What is the capital of France?",
+            "files": [],
+        }
+    ],
+    [
+        {
+            "text": "What is the water formula?",
+            "files": [],
+        }
+    ],
+    [
+        {
+            "text": "Explain quantum entanglement in simple terms.",
+            "files": [],
+        }
+    ],
+    [
+        {
+            "text": "I want to do a car wash that is 50 meters away, should I walk or drive?",
+            "files": [],
+        }
+    ],
+    [
+        {
+            "text": "Write a poem about beer with 4 stanzas. Format the title as an H2 markdown heading and bold the first line of each stanza.",
+            "files": [],
+        }
+    ],
+    # --- Single-image examples ---
+    [
+        {
+            "text": "Describe this image.",
+            "files": ["https://news.bbc.co.uk/media/images/38107000/jpg/_38107299_ronaldogoal_ap_300.jpg"],
+        }
+    ],
+    [
+        {
+            "text": "What is the city in this image? Describe what you see.",
+            "files": ["https://imgmd.net/images/v1/guia/1698673/rio-de-janeiro-4-c.jpg"],
+        }
+    ],
+    # --- Multi-image examples ---
+    [
+        {
+            "text": "What are the key similarities between these three images?",
+            "files": [
+                "https://news.bbc.co.uk/media/images/38107000/jpg/_38107299_ronaldogoal_ap_300.jpg",
+                "https://ogimg.infoglobo.com.br/in/12547538-502-0e0/FT1086A/94-8705-14.jpg",
+                "https://amazonasatual.com.br/wp-content/uploads/2021/01/Pele.jpg",
+            ],
+        }
+    ],
+    # --- Audio examples ---
+    [
+        {
+            "text": "Transcribe the audio.",
+            "files": [
+                "https://huggingface.co/datasets/hf-internal-testing/dummy-audio-samples/resolve/main/bcn_weather.mp3"
+            ],
+        }
+    ],
+    [
+        {
+            "text": "Translate to Dutch.",
+            "files": [
+                "https://huggingface.co/datasets/hf-internal-testing/dummy-audio-samples/resolve/main/bcn_weather.mp3"
+            ],
+        }
+    ],
+    # --- Video examples ---
+    [
+        {
+            "text": "What is happening in this video?",
+            "files": ["https://huggingface.co/datasets/merve/vlm_test_images/resolve/main/concert.mp4"],
+        }
+    ],
+]
+
+demo = gr.ChatInterface(
+    fn=generate,
+    validator=validate_input,
+    chatbot=gr.Chatbot(
+        scale=1,
+        latex_delimiters=[
+            {"left": "$$", "right": "$$", "display": True},
+            {"left": "$", "right": "$", "display": False},
+            {"left": "\\(", "right": "\\)", "display": False},
+            {"left": "\\[", "right": "\\]", "display": True},
+        ],
+        reasoning_tags=[(THINKING_START, THINKING_END)],
+    ),
+    textbox=gr.MultimodalTextbox(
+        sources=["upload", "microphone"],
+        file_types=[*IMAGE_FILE_TYPES, *AUDIO_FILE_TYPES, *VIDEO_FILE_TYPES],
+        file_count="multiple",
+        autofocus=True,
+    ),
+    multimodal=True,
+    additional_inputs=[
+        gr.Checkbox(label="Thinking", value=False),
+        gr.Slider(label="Max New Tokens", minimum=100, maximum=4000, step=10, value=2000),
+        gr.Dropdown(
+            label="Image Token Budget",
+            info="Higher values preserve more visual detail (useful for OCR/documents). Lower values are faster.",
+            choices=[70, 140, 280, 560, 1120],
+            value=280,
+        ),
+        gr.Textbox(label="System Prompt", value=""),
+    ],
+    additional_inputs_accordion=gr.Accordion("Settings", open=True),
+    stop_btn=False,
+    title="Gemma 4 E4B It",
+    examples=examples,
+    run_examples_on_click=False,
+    cache_examples=False,
+    delete_cache=(1800, 1800),
+)
+
+if __name__ == "__main__":
+    demo.launch(css_paths="style.css", max_file_size="20MB")
@@ -0,0 +1,362 @@
+# This file was autogenerated by uv via the following command:
+#    uv export --no-hashes --no-dev --group hf-spaces --no-emit-package typer-slim --no-emit-package spaces -o requirements.txt
+accelerate==1.13.0
+    # via gemma-4-e4b-it
+aiohappyeyeballs==2.6.1
+    # via aiohttp
+aiohttp==3.13.5
+    # via fsspec
+aiosignal==1.4.0
+    # via aiohttp
+annotated-doc==0.0.4
+    # via
+    #   fastapi
+    #   typer
+annotated-types==0.7.0
+    # via pydantic
+anyio==4.13.0
+    # via
+    #   gradio
+    #   httpx
+    #   mcp
+    #   sse-starlette
+    #   starlette
+attrs==26.1.0
+    # via
+    #   aiohttp
+    #   jsonschema
+    #   referencing
+audioop-lts==0.2.2 ; python_full_version >= '3.13'
+    # via gradio
+brotli==1.2.0
+    # via gradio
+certifi==2026.2.25
+    # via
+    #   httpcore
+    #   httpx
+    #   requests
+cffi==2.0.0 ; platform_python_implementation != 'PyPy'
+    # via cryptography
+charset-normalizer==3.4.7
+    # via requests
+click==8.3.2
+    # via
+    #   typer
+    #   uvicorn
+colorama==0.4.6 ; sys_platform == 'win32'
+    # via
+    #   click
+    #   tqdm
+cryptography==46.0.7
+    # via pyjwt
+datasets==4.8.4
+dill==0.4.1
+    # via
+    #   datasets
+    #   multiprocess
+fastapi==0.136.0
+    # via gradio
+filelock==3.28.0
+    # via
+    #   datasets
+    #   huggingface-hub
+    #   torch
+frozenlist==1.8.0
+    # via
+    #   aiohttp
+    #   aiosignal
+fsspec==2026.2.0
+    # via
+    #   datasets
+    #   gradio-client
+    #   huggingface-hub
+    #   torch
+gradio==6.12.0
+    # via
+    #   gemma-4-e4b-it
+    #   spaces
+gradio-client==2.4.1
+    # via
+    #   gradio
+    #   hf-gradio
+groovy==0.1.2
+    # via gradio
+h11==0.16.0
+    # via
+    #   httpcore
+    #   uvicorn
+hf-gradio==0.4.0
+    # via gradio
+hf-xet==1.4.3 ; platform_machine == 'AMD64' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'
+    # via huggingface-hub
+httpcore==1.0.9
+    # via httpx
+httpx==0.28.1
+    # via
+    #   datasets
+    #   gradio
+    #   gradio-client
+    #   huggingface-hub
+    #   mcp
+    #   safehttpx
+    #   spaces
+httpx-sse==0.4.3
+    # via mcp
+huggingface-hub==1.11.0
+    # via
+    #   accelerate
+    #   datasets
+    #   gradio
+    #   gradio-client
+    #   tokenizers
+    #   transformers
+idna==3.11
+    # via
+    #   anyio
+    #   httpx
+    #   requests
+    #   yarl
+jinja2==3.1.6
+    # via
+    #   gradio
+    #   torch
+jsonschema==4.26.0
+    # via mcp
+jsonschema-specifications==2025.9.1
+    # via jsonschema
+markdown-it-py==4.0.0
+    # via rich
+markupsafe==3.0.3
+    # via
+    #   gradio
+    #   jinja2
+mcp==1.27.0
+    # via gradio
+mdurl==0.1.2
+    # via markdown-it-py
+mpmath==1.3.0
+    # via sympy
+multidict==6.7.1
+    # via
+    #   aiohttp
+    #   yarl
+multiprocess==0.70.19
+    # via datasets
+networkx==3.6.1
+    # via torch
+numpy==2.4.4
+    # via
+    #   accelerate
+    #   datasets
+    #   gradio
+    #   pandas
+    #   torchvision
+    #   transformers
+nvidia-cublas-cu12==12.8.4.1 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via
+    #   nvidia-cudnn-cu12
+    #   nvidia-cusolver-cu12
+    #   torch
+nvidia-cuda-cupti-cu12==12.8.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-cuda-nvrtc-cu12==12.8.93 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-cuda-runtime-cu12==12.8.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-cudnn-cu12==9.10.2.21 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-cufft-cu12==11.3.3.83 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-cufile-cu12==1.13.1.3 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-curand-cu12==10.3.9.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-cusolver-cu12==11.7.3.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-cusparse-cu12==12.5.8.93 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via
+    #   nvidia-cusolver-cu12
+    #   torch
+nvidia-cusparselt-cu12==0.7.1 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-nccl-cu12==2.27.5 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-nvjitlink-cu12==12.8.93 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via
+    #   nvidia-cufft-cu12
+    #   nvidia-cusolver-cu12
+    #   nvidia-cusparse-cu12
+    #   torch
+nvidia-nvshmem-cu12==3.3.20 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-nvtx-cu12==12.8.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+orjson==3.11.8
+    # via gradio
+packaging==26.1
+    # via
+    #   accelerate
+    #   datasets
+    #   gradio
+    #   gradio-client
+    #   huggingface-hub
+    #   spaces
+    #   transformers
+pandas==3.0.2
+    # via
+    #   datasets
+    #   gradio
+pillow==12.2.0
+    # via
+    #   gradio
+    #   torchvision
+propcache==0.4.1
+    # via
+    #   aiohttp
+    #   yarl
+psutil==5.9.8
+    # via
+    #   accelerate
+    #   spaces
+pyarrow==23.0.1
+    # via datasets
+pycparser==3.0 ; implementation_name != 'PyPy' and platform_python_implementation != 'PyPy'
+    # via cffi
+pydantic==2.12.5
+    # via
+    #   fastapi
+    #   gradio
+    #   mcp
+    #   pydantic-settings
+    #   spaces
+pydantic-core==2.41.5
+    # via pydantic
+pydantic-settings==2.13.1
+    # via mcp
+pydub==0.25.1
+    # via gradio
+pygments==2.20.0
+    # via rich
+pyjwt==2.12.1
+    # via mcp
+python-dateutil==2.9.0.post0
+    # via pandas
+python-dotenv==1.2.2
+    # via pydantic-settings
+python-multipart==0.0.26
+    # via
+    #   gradio
+    #   mcp
+pytz==2026.1.post1
+    # via gradio
+pywin32==311 ; sys_platform == 'win32'
+    # via mcp
+pyyaml==6.0.3
+    # via
+    #   accelerate
+    #   datasets
+    #   gradio
+    #   huggingface-hub
+    #   transformers
+referencing==0.37.0
+    # via
+    #   jsonschema
+    #   jsonschema-specifications
+regex==2026.4.4
+    # via transformers
+requests==2.33.1
+    # via
+    #   datasets
+    #   spaces
+rich==15.0.0
+    # via typer
+rpds-py==0.30.0
+    # via
+    #   jsonschema
+    #   referencing
+safehttpx==0.1.7
+    # via gradio
+safetensors==0.7.0
+    # via
+    #   accelerate
+    #   transformers
+semantic-version==2.10.0
+    # via gradio
+setuptools==82.0.1
+    # via torch
+shellingham==1.5.4
+    # via typer
+six==1.17.0
+    # via python-dateutil
+sse-starlette==3.3.4
+    # via mcp
+starlette==1.0.0
+    # via
+    #   fastapi
+    #   gradio
+    #   mcp
+    #   sse-starlette
+sympy==1.14.0
+    # via torch
+tokenizers==0.22.2
+    # via transformers
+tomlkit==0.14.0
+    # via gradio
+torch==2.9.1
+    # via
+    #   accelerate
+    #   gemma-4-e4b-it
+    #   torchvision
+torchcodec==0.9.1
+    # via gemma-4-e4b-it
+torchvision==0.24.1
+    # via gemma-4-e4b-it
+tqdm==4.67.3
+    # via
+    #   datasets
+    #   huggingface-hub
+    #   transformers
+transformers==5.5.4
+    # via gemma-4-e4b-it
+triton==3.5.1 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+typer==0.24.1
+    # via
+    #   gradio
+    #   hf-gradio
+    #   huggingface-hub
+    #   transformers
+typing-extensions==4.15.0
+    # via
+    #   aiosignal
+    #   anyio
+    #   fastapi
+    #   gradio
+    #   gradio-client
+    #   huggingface-hub
+    #   mcp
+    #   pydantic
+    #   pydantic-core
+    #   referencing
+    #   spaces
+    #   starlette
+    #   torch
+    #   typing-inspection
+typing-inspection==0.4.2
+    # via
+    #   fastapi
+    #   mcp
+    #   pydantic
+    #   pydantic-settings
+tzdata==2026.1 ; sys_platform == 'emscripten' or sys_platform == 'win32'
+    # via pandas
+urllib3==2.6.3
+    # via requests
+uvicorn==0.44.0
+    # via
+    #   gradio
+    #   mcp
+xxhash==3.6.0
+    # via datasets
+yarl==1.23.0
+    # via aiohttp