docs: add canonical tooling corpus (147 files) from Google/HF/frameworks

Five-lane parallel research pass. Each subdir under tooling/ has its own
README indexing downloaded files with verified upstream sources.

- google-official/: deepmind-gemma JAX examples, gemma_pytorch scripts,
  gemma.cpp API server docs, google-gemma/cookbook notebooks, ai.google.dev
  HTML snapshots, Gemma 3 tech report
- huggingface/: 8 gemma-4-* model cards, chat-template .jinja files,
  tokenizer_config.json, transformers gemma4/ source, launch blog posts,
  official HF Spaces app.py
- inference-frameworks/: vLLM/llama.cpp/MLX/Keras-hub/TGI/Gemini API/Vertex AI
  comparison, run_commands.sh with 8 working launches, 9 code snippets
- gemma-family/: 12 per-variant briefs (ShieldGemma 2, CodeGemma, PaliGemma 2,
  Recurrent/Data/Med/TxGemma, Embedding/Translate/Function/Dolphin/SignGemma)
- fine-tuning/: Unsloth Gemma 4 notebooks, Axolotl YAMLs (incl 26B-A4B MoE),
  TRL scripts, Google cookbook fine-tune notebooks, recipe-recommendation.md

Findings that update earlier CORPUS_* docs are flagged in tooling/README.md
(not applied) — notably the new <|turn>/<turn|> prompt format, gemma_pytorch
abandonment, gemma.cpp Gemini-API server, transformers AutoModelForMultimodalLM,
FA2 head_dim=512 break, 26B-A4B MoE quantization rules, no Gemma 4 tech
report PDF yet, no Gemma-4-generation specialized siblings yet.

Pre-commit secrets hook bypassed per user authorization — flagged "secrets"
are base64 notebook cell outputs and example Ed25519 keys in the HDP
agentic-security demo, not real credentials.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Mortdecai
2026-04-18 12:24:48 -04:00
parent 5011059f5d
commit eecebe7ef5
149 changed files with 181297 additions and 0 deletions
@@ -0,0 +1,13 @@
---
title: Gemma 4 31B It
emoji: 🚀
colorFrom: blue
colorTo: green
sdk: gradio
sdk_version: 6.12.0
python_version: "3.12.12"
app_file: app.py
pinned: false
---
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
@@ -0,0 +1,303 @@
import os
from collections.abc import Iterator
from threading import Thread
import gradio as gr
import spaces
import torch
from transformers import AutoModelForMultimodalLM, AutoProcessor, BatchFeature
from transformers.generation.streamers import TextIteratorStreamer
MODEL_ID = "google/gemma-4-31b-it"
processor = AutoProcessor.from_pretrained(MODEL_ID, use_fast=False)
model = AutoModelForMultimodalLM.from_pretrained(MODEL_ID, device_map="auto", dtype=torch.bfloat16)
IMAGE_FILE_TYPES = (".jpg", ".jpeg", ".png", ".webp")
VIDEO_FILE_TYPES = (".mp4", ".mov", ".avi", ".webm")
MAX_INPUT_TOKENS = int(os.getenv("MAX_INPUT_TOKENS", "10_000"))
THINKING_START = "<|channel>"
THINKING_END = "<channel|>"
# Special tokens to strip from decoded output (keeping thinking delimiters
# so that Gradio's reasoning_tags can find them on the frontend).
_KEEP_TOKENS = {THINKING_START, THINKING_END}
_STRIP_TOKENS = sorted(
(t for t in processor.tokenizer.all_special_tokens if t not in _KEEP_TOKENS),
key=len,
reverse=True, # longest first to avoid partial matches
)
def _strip_special_tokens(text: str) -> str:
for tok in _STRIP_TOKENS:
text = text.replace(tok, "")
return text
def _classify_file(path: str) -> str | None:
"""Return media type string for a file path, or None if unsupported."""
lower = path.lower()
if lower.endswith(IMAGE_FILE_TYPES):
return "image"
if lower.endswith(VIDEO_FILE_TYPES):
return "video"
return None
def process_new_user_message(message: dict) -> list[dict]:
"""Build content list from the new user message with URL-based media references."""
content: list[dict] = []
for path in message.get("files", []):
kind = _classify_file(path)
if kind:
content.append({"type": kind, "url": path})
content.append({"type": "text", "text": message.get("text", "")})
return content
def process_history(history: list[dict]) -> list[dict]:
"""Walk Gradio 6 history and build message list with URL-based media references."""
messages: list[dict] = []
for item in history:
if item["role"] == "assistant":
text_parts = [p["text"] for p in item["content"] if p.get("type") == "text"]
messages.append(
{
"role": "assistant",
"content": [{"type": "text", "text": " ".join(text_parts)}],
}
)
else:
user_content: list[dict] = []
for part in item["content"]:
if part.get("type") == "text":
user_content.append({"type": "text", "text": part["text"]})
elif part.get("type") == "file":
filepath = part["file"]["path"]
kind = _classify_file(filepath)
if kind:
user_content.append({"type": kind, "url": filepath})
if user_content:
messages.append({"role": "user", "content": user_content})
return messages
@spaces.GPU(duration=180)
@torch.inference_mode()
def _generate_on_gpu(inputs: BatchFeature, max_new_tokens: int, thinking: bool) -> Iterator[str]:
inputs = inputs.to(device=model.device, dtype=torch.bfloat16)
streamer = TextIteratorStreamer(
processor,
timeout=30.0,
skip_prompt=True,
skip_special_tokens=not thinking,
)
generate_kwargs = {
**inputs,
"streamer": streamer,
"max_new_tokens": max_new_tokens,
"disable_compile": True,
}
exception_holder: list[Exception] = []
def _generate() -> None:
try:
model.generate(**generate_kwargs)
except Exception as e: # noqa: BLE001
exception_holder.append(e)
thread = Thread(target=_generate)
thread.start()
chunks: list[str] = []
for text in streamer:
chunks.append(text)
accumulated = "".join(chunks)
if thinking:
yield _strip_special_tokens(accumulated)
else:
yield accumulated
thread.join()
if exception_holder:
msg = f"Generation failed: {exception_holder[0]}"
raise gr.Error(msg)
def validate_input(message: dict) -> dict:
has_text = bool(message.get("text", "").strip())
has_files = bool(message.get("files"))
if not (has_text or has_files):
return gr.validate(has_text, "Please enter a message or upload a file.")
files = message.get("files", [])
kinds = [_classify_file(f) for f in files]
kinds = [k for k in kinds if k is not None]
unique_kinds = set(kinds)
if len(unique_kinds) > 1:
return gr.validate(False, "Please upload only one type of media (images or video) at a time.")
if kinds.count("video") > 1:
return gr.validate(False, "Only one video file can be uploaded at a time.")
return gr.validate(True, "")
def _has_media_type(messages: list[dict], media_type: str) -> bool:
"""Check if any message contains a content entry of the given media type."""
return any(
c.get("type") == media_type for m in messages for c in (m["content"] if isinstance(m["content"], list) else [])
)
def generate(
message: dict,
history: list[dict],
thinking: bool = False,
max_new_tokens: int = 1024,
max_soft_tokens: int = 280,
system_prompt: str = "",
) -> Iterator[str]:
messages: list[dict] = []
if system_prompt:
messages.append({"role": "system", "content": [{"type": "text", "text": system_prompt}]})
messages.extend(process_history(history))
messages.append({"role": "user", "content": process_new_user_message(message)})
template_kwargs: dict = {
"tokenize": True,
"return_dict": True,
"return_tensors": "pt",
"add_generation_prompt": True,
"processor_kwargs": {"images_kwargs": {"max_soft_tokens": max_soft_tokens}},
}
if _has_media_type(messages, "video"):
template_kwargs["load_audio_from_video"] = False
if thinking:
template_kwargs["enable_thinking"] = True
inputs = processor.apply_chat_template(messages, **template_kwargs)
n_tokens = inputs["input_ids"].shape[1]
if n_tokens > MAX_INPUT_TOKENS:
msg = f"Input too long ({n_tokens} tokens). Maximum is {MAX_INPUT_TOKENS} tokens."
raise gr.Error(msg)
yield from _generate_on_gpu(inputs=inputs, max_new_tokens=max_new_tokens, thinking=thinking)
examples = [
# --- Text-only examples ---
[
{
"text": "What is the capital of France?",
"files": [],
}
],
[
{
"text": "What is the water formula?",
"files": [],
}
],
[
{
"text": "Explain quantum entanglement in simple terms.",
"files": [],
}
],
[
{
"text": "I want to do a car wash that is 50 meters away, should I walk or drive?",
"files": [],
}
],
[
{
"text": "Write a poem about beer with 4 stanzas. Format the title as an H2 markdown heading and bold the first line of each stanza.",
"files": [],
}
],
# --- Single-image examples ---
[
{
"text": "Describe this image.",
"files": ["https://news.bbc.co.uk/media/images/38107000/jpg/_38107299_ronaldogoal_ap_300.jpg"],
}
],
[
{
"text": "What is the city in this image? Describe what you see.",
"files": ["https://imgmd.net/images/v1/guia/1698673/rio-de-janeiro-4-c.jpg"],
}
],
# --- Multi-image examples ---
[
{
"text": "What are the key similarities between these three images?",
"files": [
"https://news.bbc.co.uk/media/images/38107000/jpg/_38107299_ronaldogoal_ap_300.jpg",
"https://ogimg.infoglobo.com.br/in/12547538-502-0e0/FT1086A/94-8705-14.jpg",
"https://amazonasatual.com.br/wp-content/uploads/2021/01/Pele.jpg",
],
}
],
# --- Video examples ---
[
{
"text": "What is happening in this video?",
"files": ["https://huggingface.co/datasets/merve/vlm_test_images/resolve/main/concert.mp4"],
}
],
]
demo = gr.ChatInterface(
fn=generate,
validator=validate_input,
chatbot=gr.Chatbot(
scale=1,
latex_delimiters=[
{"left": "$$", "right": "$$", "display": True},
{"left": "$", "right": "$", "display": False},
{"left": "\\(", "right": "\\)", "display": False},
{"left": "\\[", "right": "\\]", "display": True},
],
reasoning_tags=[(THINKING_START, THINKING_END)],
),
textbox=gr.MultimodalTextbox(
sources=["upload"],
file_types=[*IMAGE_FILE_TYPES, *VIDEO_FILE_TYPES],
file_count="multiple",
autofocus=True,
),
multimodal=True,
additional_inputs=[
gr.Checkbox(label="Thinking", value=False),
gr.Slider(label="Max New Tokens", minimum=100, maximum=4000, step=10, value=2000),
gr.Dropdown(
label="Image Token Budget",
info="Higher values preserve more visual detail (useful for OCR/documents). Lower values are faster.",
choices=[70, 140, 280, 560, 1120],
value=280,
),
gr.Textbox(label="System Prompt", value=""),
],
additional_inputs_accordion=gr.Accordion("Settings", open=True),
stop_btn=False,
title="Gemma 4 31B It",
examples=examples,
run_examples_on_click=False,
cache_examples=False,
delete_cache=(1800, 1800),
)
if __name__ == "__main__":
demo.launch(css_paths="style.css", max_file_size="20mb")
@@ -0,0 +1,362 @@
# This file was autogenerated by uv via the following command:
# uv export --no-hashes --no-dev --group hf-spaces --no-emit-package typer-slim --no-emit-package spaces -o requirements.txt
accelerate==1.13.0
# via gemma-4-31b-it
aiohappyeyeballs==2.6.1
# via aiohttp
aiohttp==3.13.5
# via fsspec
aiosignal==1.4.0
# via aiohttp
annotated-doc==0.0.4
# via
# fastapi
# typer
annotated-types==0.7.0
# via pydantic
anyio==4.13.0
# via
# gradio
# httpx
# mcp
# sse-starlette
# starlette
attrs==26.1.0
# via
# aiohttp
# jsonschema
# referencing
audioop-lts==0.2.2 ; python_full_version >= '3.13'
# via gradio
brotli==1.2.0
# via gradio
certifi==2026.2.25
# via
# httpcore
# httpx
# requests
cffi==2.0.0 ; platform_python_implementation != 'PyPy'
# via cryptography
charset-normalizer==3.4.7
# via requests
click==8.3.2
# via
# typer
# uvicorn
colorama==0.4.6 ; sys_platform == 'win32'
# via
# click
# tqdm
cryptography==46.0.7
# via pyjwt
datasets==4.8.4
dill==0.4.1
# via
# datasets
# multiprocess
fastapi==0.136.0
# via gradio
filelock==3.28.0
# via
# datasets
# huggingface-hub
# torch
frozenlist==1.8.0
# via
# aiohttp
# aiosignal
fsspec==2026.2.0
# via
# datasets
# gradio-client
# huggingface-hub
# torch
gradio==6.12.0
# via
# gemma-4-31b-it
# spaces
gradio-client==2.4.1
# via
# gradio
# hf-gradio
groovy==0.1.2
# via gradio
h11==0.16.0
# via
# httpcore
# uvicorn
hf-gradio==0.4.0
# via gradio
hf-xet==1.4.3 ; platform_machine == 'AMD64' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'
# via huggingface-hub
httpcore==1.0.9
# via httpx
httpx==0.28.1
# via
# datasets
# gradio
# gradio-client
# huggingface-hub
# mcp
# safehttpx
# spaces
httpx-sse==0.4.3
# via mcp
huggingface-hub==1.11.0
# via
# accelerate
# datasets
# gradio
# gradio-client
# tokenizers
# transformers
idna==3.11
# via
# anyio
# httpx
# requests
# yarl
jinja2==3.1.6
# via
# gradio
# torch
jsonschema==4.26.0
# via mcp
jsonschema-specifications==2025.9.1
# via jsonschema
markdown-it-py==4.0.0
# via rich
markupsafe==3.0.3
# via
# gradio
# jinja2
mcp==1.27.0
# via gradio
mdurl==0.1.2
# via markdown-it-py
mpmath==1.3.0
# via sympy
multidict==6.7.1
# via
# aiohttp
# yarl
multiprocess==0.70.19
# via datasets
networkx==3.6.1
# via torch
numpy==2.4.4
# via
# accelerate
# datasets
# gradio
# pandas
# torchvision
# transformers
nvidia-cublas-cu12==12.8.4.1 ; platform_machine == 'x86_64' and sys_platform == 'linux'
# via
# nvidia-cudnn-cu12
# nvidia-cusolver-cu12
# torch
nvidia-cuda-cupti-cu12==12.8.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
# via torch
nvidia-cuda-nvrtc-cu12==12.8.93 ; platform_machine == 'x86_64' and sys_platform == 'linux'
# via torch
nvidia-cuda-runtime-cu12==12.8.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
# via torch
nvidia-cudnn-cu12==9.10.2.21 ; platform_machine == 'x86_64' and sys_platform == 'linux'
# via torch
nvidia-cufft-cu12==11.3.3.83 ; platform_machine == 'x86_64' and sys_platform == 'linux'
# via torch
nvidia-cufile-cu12==1.13.1.3 ; platform_machine == 'x86_64' and sys_platform == 'linux'
# via torch
nvidia-curand-cu12==10.3.9.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
# via torch
nvidia-cusolver-cu12==11.7.3.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
# via torch
nvidia-cusparse-cu12==12.5.8.93 ; platform_machine == 'x86_64' and sys_platform == 'linux'
# via
# nvidia-cusolver-cu12
# torch
nvidia-cusparselt-cu12==0.7.1 ; platform_machine == 'x86_64' and sys_platform == 'linux'
# via torch
nvidia-nccl-cu12==2.27.5 ; platform_machine == 'x86_64' and sys_platform == 'linux'
# via torch
nvidia-nvjitlink-cu12==12.8.93 ; platform_machine == 'x86_64' and sys_platform == 'linux'
# via
# nvidia-cufft-cu12
# nvidia-cusolver-cu12
# nvidia-cusparse-cu12
# torch
nvidia-nvshmem-cu12==3.3.20 ; platform_machine == 'x86_64' and sys_platform == 'linux'
# via torch
nvidia-nvtx-cu12==12.8.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
# via torch
orjson==3.11.8
# via gradio
packaging==26.1
# via
# accelerate
# datasets
# gradio
# gradio-client
# huggingface-hub
# spaces
# transformers
pandas==3.0.2
# via
# datasets
# gradio
pillow==12.2.0
# via
# gradio
# torchvision
propcache==0.4.1
# via
# aiohttp
# yarl
psutil==5.9.8
# via
# accelerate
# spaces
pyarrow==23.0.1
# via datasets
pycparser==3.0 ; implementation_name != 'PyPy' and platform_python_implementation != 'PyPy'
# via cffi
pydantic==2.12.5
# via
# fastapi
# gradio
# mcp
# pydantic-settings
# spaces
pydantic-core==2.41.5
# via pydantic
pydantic-settings==2.13.1
# via mcp
pydub==0.25.1
# via gradio
pygments==2.20.0
# via rich
pyjwt==2.12.1
# via mcp
python-dateutil==2.9.0.post0
# via pandas
python-dotenv==1.2.2
# via pydantic-settings
python-multipart==0.0.26
# via
# gradio
# mcp
pytz==2026.1.post1
# via gradio
pywin32==311 ; sys_platform == 'win32'
# via mcp
pyyaml==6.0.3
# via
# accelerate
# datasets
# gradio
# huggingface-hub
# transformers
referencing==0.37.0
# via
# jsonschema
# jsonschema-specifications
regex==2026.4.4
# via transformers
requests==2.33.1
# via
# datasets
# spaces
rich==15.0.0
# via typer
rpds-py==0.30.0
# via
# jsonschema
# referencing
safehttpx==0.1.7
# via gradio
safetensors==0.7.0
# via
# accelerate
# transformers
semantic-version==2.10.0
# via gradio
setuptools==82.0.1
# via torch
shellingham==1.5.4
# via typer
six==1.17.0
# via python-dateutil
sse-starlette==3.3.4
# via mcp
starlette==1.0.0
# via
# fastapi
# gradio
# mcp
# sse-starlette
sympy==1.14.0
# via torch
tokenizers==0.22.2
# via transformers
tomlkit==0.14.0
# via gradio
torch==2.9.1
# via
# accelerate
# gemma-4-31b-it
# torchvision
torchcodec==0.9.1
# via gemma-4-31b-it
torchvision==0.24.1
# via gemma-4-31b-it
tqdm==4.67.3
# via
# datasets
# huggingface-hub
# transformers
transformers==5.5.4
# via gemma-4-31b-it
triton==3.5.1 ; platform_machine == 'x86_64' and sys_platform == 'linux'
# via torch
typer==0.24.1
# via
# gradio
# hf-gradio
# huggingface-hub
# transformers
typing-extensions==4.15.0
# via
# aiosignal
# anyio
# fastapi
# gradio
# gradio-client
# huggingface-hub
# mcp
# pydantic
# pydantic-core
# referencing
# spaces
# starlette
# torch
# typing-inspection
typing-inspection==0.4.2
# via
# fastapi
# mcp
# pydantic
# pydantic-settings
tzdata==2026.1 ; sys_platform == 'emscripten' or sys_platform == 'win32'
# via pandas
urllib3==2.6.3
# via requests
uvicorn==0.44.0
# via
# gradio
# mcp
xxhash==3.6.0
# via datasets
yarl==1.23.0
# via aiohttp
@@ -0,0 +1,13 @@
---
title: Gemma 4 E4B It
emoji: 🚀
colorFrom: blue
colorTo: green
sdk: gradio
sdk_version: 6.12.0
python_version: "3.12.12"
app_file: app.py
pinned: false
---
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
@@ -0,0 +1,322 @@
import os
from collections.abc import Iterator
from threading import Thread
import gradio as gr
import spaces
import torch
from transformers import AutoModelForMultimodalLM, AutoProcessor, BatchFeature
from transformers.generation.streamers import TextIteratorStreamer
MODEL_ID = "google/gemma-4-e4b-it"
processor = AutoProcessor.from_pretrained(MODEL_ID, use_fast=False)
model = AutoModelForMultimodalLM.from_pretrained(MODEL_ID, device_map="auto", dtype=torch.bfloat16)
IMAGE_FILE_TYPES = (".jpg", ".jpeg", ".png", ".webp")
AUDIO_FILE_TYPES = (".wav", ".mp3", ".flac", ".ogg")
VIDEO_FILE_TYPES = (".mp4", ".mov", ".avi", ".webm")
MAX_INPUT_TOKENS = int(os.getenv("MAX_INPUT_TOKENS", "10_000"))
THINKING_START = "<|channel>"
THINKING_END = "<channel|>"
# Special tokens to strip from decoded output (keeping thinking delimiters
# so that Gradio's reasoning_tags can find them on the frontend).
_KEEP_TOKENS = {THINKING_START, THINKING_END}
_STRIP_TOKENS = sorted(
(t for t in processor.tokenizer.all_special_tokens if t not in _KEEP_TOKENS),
key=len,
reverse=True, # longest first to avoid partial matches
)
def _strip_special_tokens(text: str) -> str:
for tok in _STRIP_TOKENS:
text = text.replace(tok, "")
return text
def _classify_file(path: str) -> str | None:
"""Return media type string for a file path, or None if unsupported."""
lower = path.lower()
if lower.endswith(IMAGE_FILE_TYPES):
return "image"
if lower.endswith(AUDIO_FILE_TYPES):
return "audio"
if lower.endswith(VIDEO_FILE_TYPES):
return "video"
return None
def process_new_user_message(message: dict) -> list[dict]:
"""Build content list from the new user message with URL-based media references."""
content: list[dict] = []
for path in message.get("files", []):
kind = _classify_file(path)
if kind:
content.append({"type": kind, "url": path})
content.append({"type": "text", "text": message.get("text", "")})
return content
def process_history(history: list[dict]) -> list[dict]:
"""Walk Gradio 6 history and build message list with URL-based media references."""
messages: list[dict] = []
for item in history:
if item["role"] == "assistant":
text_parts = [p["text"] for p in item["content"] if p.get("type") == "text"]
messages.append(
{
"role": "assistant",
"content": [{"type": "text", "text": " ".join(text_parts)}],
}
)
else:
user_content: list[dict] = []
for part in item["content"]:
if part.get("type") == "text":
user_content.append({"type": "text", "text": part["text"]})
elif part.get("type") == "file":
filepath = part["file"]["path"]
kind = _classify_file(filepath)
if kind:
user_content.append({"type": kind, "url": filepath})
if user_content:
messages.append({"role": "user", "content": user_content})
return messages
@spaces.GPU(duration=120)
@torch.inference_mode()
def _generate_on_gpu(inputs: BatchFeature, max_new_tokens: int, thinking: bool) -> Iterator[str]:
inputs = inputs.to(device=model.device, dtype=torch.bfloat16)
streamer = TextIteratorStreamer(
processor,
timeout=30.0,
skip_prompt=True,
skip_special_tokens=not thinking,
)
generate_kwargs = {
**inputs,
"streamer": streamer,
"max_new_tokens": max_new_tokens,
"disable_compile": True,
}
exception_holder: list[Exception] = []
def _generate() -> None:
try:
model.generate(**generate_kwargs)
except Exception as e: # noqa: BLE001
exception_holder.append(e)
thread = Thread(target=_generate)
thread.start()
chunks: list[str] = []
for text in streamer:
chunks.append(text)
accumulated = "".join(chunks)
if thinking:
yield _strip_special_tokens(accumulated)
else:
yield accumulated
thread.join()
if exception_holder:
msg = f"Generation failed: {exception_holder[0]}"
raise gr.Error(msg)
# FBT003 is suppressed below: gr.validate API takes bool as first positional arg.
def validate_input(message: dict) -> dict:
has_text = bool(message.get("text", "").strip())
has_files = bool(message.get("files"))
if not (has_text or has_files):
return gr.validate(False, "Please enter a message or upload a file.") # noqa: FBT003
files = message.get("files", [])
kinds = [_classify_file(f) for f in files]
kinds = [k for k in kinds if k is not None]
unique_kinds = set(kinds)
if len(unique_kinds) > 1:
return gr.validate(False, "Please upload only one type of media (images, audio, or video) at a time.") # noqa: FBT003
if kinds.count("audio") > 1:
return gr.validate(False, "Only one audio file can be uploaded at a time.") # noqa: FBT003
if kinds.count("video") > 1:
return gr.validate(False, "Only one video file can be uploaded at a time.") # noqa: FBT003
return gr.validate(True, "") # noqa: FBT003
def _has_media_type(messages: list[dict], media_type: str) -> bool:
"""Check if any message contains a content entry of the given media type."""
return any(c.get("type") == media_type for m in messages for c in m["content"])
def generate(
message: dict,
history: list[dict],
thinking: bool = False,
max_new_tokens: int = 1024,
max_soft_tokens: int = 280,
system_prompt: str = "",
) -> Iterator[str]:
messages: list[dict] = []
if system_prompt:
messages.append({"role": "system", "content": [{"type": "text", "text": system_prompt}]})
messages.extend(process_history(history))
messages.append({"role": "user", "content": process_new_user_message(message)})
template_kwargs: dict = {
"tokenize": True,
"return_dict": True,
"return_tensors": "pt",
"add_generation_prompt": True,
"load_audio_from_video": _has_media_type(messages, "video"),
"processor_kwargs": {"images_kwargs": {"max_soft_tokens": max_soft_tokens}},
}
if thinking:
template_kwargs["enable_thinking"] = True
inputs = processor.apply_chat_template(messages, **template_kwargs)
n_tokens = inputs["input_ids"].shape[1]
if n_tokens > MAX_INPUT_TOKENS:
msg = f"Input too long ({n_tokens} tokens). Maximum is {MAX_INPUT_TOKENS} tokens."
raise gr.Error(msg)
yield from _generate_on_gpu(inputs=inputs, max_new_tokens=max_new_tokens, thinking=thinking)
examples = [
# --- Text-only examples ---
[
{
"text": "What is the capital of France?",
"files": [],
}
],
[
{
"text": "What is the water formula?",
"files": [],
}
],
[
{
"text": "Explain quantum entanglement in simple terms.",
"files": [],
}
],
[
{
"text": "I want to do a car wash that is 50 meters away, should I walk or drive?",
"files": [],
}
],
[
{
"text": "Write a poem about beer with 4 stanzas. Format the title as an H2 markdown heading and bold the first line of each stanza.",
"files": [],
}
],
# --- Single-image examples ---
[
{
"text": "Describe this image.",
"files": ["https://news.bbc.co.uk/media/images/38107000/jpg/_38107299_ronaldogoal_ap_300.jpg"],
}
],
[
{
"text": "What is the city in this image? Describe what you see.",
"files": ["https://imgmd.net/images/v1/guia/1698673/rio-de-janeiro-4-c.jpg"],
}
],
# --- Multi-image examples ---
[
{
"text": "What are the key similarities between these three images?",
"files": [
"https://news.bbc.co.uk/media/images/38107000/jpg/_38107299_ronaldogoal_ap_300.jpg",
"https://ogimg.infoglobo.com.br/in/12547538-502-0e0/FT1086A/94-8705-14.jpg",
"https://amazonasatual.com.br/wp-content/uploads/2021/01/Pele.jpg",
],
}
],
# --- Audio examples ---
[
{
"text": "Transcribe the audio.",
"files": [
"https://huggingface.co/datasets/hf-internal-testing/dummy-audio-samples/resolve/main/bcn_weather.mp3"
],
}
],
[
{
"text": "Translate to Dutch.",
"files": [
"https://huggingface.co/datasets/hf-internal-testing/dummy-audio-samples/resolve/main/bcn_weather.mp3"
],
}
],
# --- Video examples ---
[
{
"text": "What is happening in this video?",
"files": ["https://huggingface.co/datasets/merve/vlm_test_images/resolve/main/concert.mp4"],
}
],
]
demo = gr.ChatInterface(
fn=generate,
validator=validate_input,
chatbot=gr.Chatbot(
scale=1,
latex_delimiters=[
{"left": "$$", "right": "$$", "display": True},
{"left": "$", "right": "$", "display": False},
{"left": "\\(", "right": "\\)", "display": False},
{"left": "\\[", "right": "\\]", "display": True},
],
reasoning_tags=[(THINKING_START, THINKING_END)],
),
textbox=gr.MultimodalTextbox(
sources=["upload", "microphone"],
file_types=[*IMAGE_FILE_TYPES, *AUDIO_FILE_TYPES, *VIDEO_FILE_TYPES],
file_count="multiple",
autofocus=True,
),
multimodal=True,
additional_inputs=[
gr.Checkbox(label="Thinking", value=False),
gr.Slider(label="Max New Tokens", minimum=100, maximum=4000, step=10, value=2000),
gr.Dropdown(
label="Image Token Budget",
info="Higher values preserve more visual detail (useful for OCR/documents). Lower values are faster.",
choices=[70, 140, 280, 560, 1120],
value=280,
),
gr.Textbox(label="System Prompt", value=""),
],
additional_inputs_accordion=gr.Accordion("Settings", open=True),
stop_btn=False,
title="Gemma 4 E4B It",
examples=examples,
run_examples_on_click=False,
cache_examples=False,
delete_cache=(1800, 1800),
)
if __name__ == "__main__":
demo.launch(css_paths="style.css", max_file_size="20MB")
@@ -0,0 +1,362 @@
# This file was autogenerated by uv via the following command:
# uv export --no-hashes --no-dev --group hf-spaces --no-emit-package typer-slim --no-emit-package spaces -o requirements.txt
accelerate==1.13.0
# via gemma-4-e4b-it
aiohappyeyeballs==2.6.1
# via aiohttp
aiohttp==3.13.5
# via fsspec
aiosignal==1.4.0
# via aiohttp
annotated-doc==0.0.4
# via
# fastapi
# typer
annotated-types==0.7.0
# via pydantic
anyio==4.13.0
# via
# gradio
# httpx
# mcp
# sse-starlette
# starlette
attrs==26.1.0
# via
# aiohttp
# jsonschema
# referencing
audioop-lts==0.2.2 ; python_full_version >= '3.13'
# via gradio
brotli==1.2.0
# via gradio
certifi==2026.2.25
# via
# httpcore
# httpx
# requests
cffi==2.0.0 ; platform_python_implementation != 'PyPy'
# via cryptography
charset-normalizer==3.4.7
# via requests
click==8.3.2
# via
# typer
# uvicorn
colorama==0.4.6 ; sys_platform == 'win32'
# via
# click
# tqdm
cryptography==46.0.7
# via pyjwt
datasets==4.8.4
dill==0.4.1
# via
# datasets
# multiprocess
fastapi==0.136.0
# via gradio
filelock==3.28.0
# via
# datasets
# huggingface-hub
# torch
frozenlist==1.8.0
# via
# aiohttp
# aiosignal
fsspec==2026.2.0
# via
# datasets
# gradio-client
# huggingface-hub
# torch
gradio==6.12.0
# via
# gemma-4-e4b-it
# spaces
gradio-client==2.4.1
# via
# gradio
# hf-gradio
groovy==0.1.2
# via gradio
h11==0.16.0
# via
# httpcore
# uvicorn
hf-gradio==0.4.0
# via gradio
hf-xet==1.4.3 ; platform_machine == 'AMD64' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'
# via huggingface-hub
httpcore==1.0.9
# via httpx
httpx==0.28.1
# via
# datasets
# gradio
# gradio-client
# huggingface-hub
# mcp
# safehttpx
# spaces
httpx-sse==0.4.3
# via mcp
huggingface-hub==1.11.0
# via
# accelerate
# datasets
# gradio
# gradio-client
# tokenizers
# transformers
idna==3.11
# via
# anyio
# httpx
# requests
# yarl
jinja2==3.1.6
# via
# gradio
# torch
jsonschema==4.26.0
# via mcp
jsonschema-specifications==2025.9.1
# via jsonschema
markdown-it-py==4.0.0
# via rich
markupsafe==3.0.3
# via
# gradio
# jinja2
mcp==1.27.0
# via gradio
mdurl==0.1.2
# via markdown-it-py
mpmath==1.3.0
# via sympy
multidict==6.7.1
# via
# aiohttp
# yarl
multiprocess==0.70.19
# via datasets
networkx==3.6.1
# via torch
numpy==2.4.4
# via
# accelerate
# datasets
# gradio
# pandas
# torchvision
# transformers
nvidia-cublas-cu12==12.8.4.1 ; platform_machine == 'x86_64' and sys_platform == 'linux'
# via
# nvidia-cudnn-cu12
# nvidia-cusolver-cu12
# torch
nvidia-cuda-cupti-cu12==12.8.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
# via torch
nvidia-cuda-nvrtc-cu12==12.8.93 ; platform_machine == 'x86_64' and sys_platform == 'linux'
# via torch
nvidia-cuda-runtime-cu12==12.8.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
# via torch
nvidia-cudnn-cu12==9.10.2.21 ; platform_machine == 'x86_64' and sys_platform == 'linux'
# via torch
nvidia-cufft-cu12==11.3.3.83 ; platform_machine == 'x86_64' and sys_platform == 'linux'
# via torch
nvidia-cufile-cu12==1.13.1.3 ; platform_machine == 'x86_64' and sys_platform == 'linux'
# via torch
nvidia-curand-cu12==10.3.9.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
# via torch
nvidia-cusolver-cu12==11.7.3.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
# via torch
nvidia-cusparse-cu12==12.5.8.93 ; platform_machine == 'x86_64' and sys_platform == 'linux'
# via
# nvidia-cusolver-cu12
# torch
nvidia-cusparselt-cu12==0.7.1 ; platform_machine == 'x86_64' and sys_platform == 'linux'
# via torch
nvidia-nccl-cu12==2.27.5 ; platform_machine == 'x86_64' and sys_platform == 'linux'
# via torch
nvidia-nvjitlink-cu12==12.8.93 ; platform_machine == 'x86_64' and sys_platform == 'linux'
# via
# nvidia-cufft-cu12
# nvidia-cusolver-cu12
# nvidia-cusparse-cu12
# torch
nvidia-nvshmem-cu12==3.3.20 ; platform_machine == 'x86_64' and sys_platform == 'linux'
# via torch
nvidia-nvtx-cu12==12.8.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
# via torch
orjson==3.11.8
# via gradio
packaging==26.1
# via
# accelerate
# datasets
# gradio
# gradio-client
# huggingface-hub
# spaces
# transformers
pandas==3.0.2
# via
# datasets
# gradio
pillow==12.2.0
# via
# gradio
# torchvision
propcache==0.4.1
# via
# aiohttp
# yarl
psutil==5.9.8
# via
# accelerate
# spaces
pyarrow==23.0.1
# via datasets
pycparser==3.0 ; implementation_name != 'PyPy' and platform_python_implementation != 'PyPy'
# via cffi
pydantic==2.12.5
# via
# fastapi
# gradio
# mcp
# pydantic-settings
# spaces
pydantic-core==2.41.5
# via pydantic
pydantic-settings==2.13.1
# via mcp
pydub==0.25.1
# via gradio
pygments==2.20.0
# via rich
pyjwt==2.12.1
# via mcp
python-dateutil==2.9.0.post0
# via pandas
python-dotenv==1.2.2
# via pydantic-settings
python-multipart==0.0.26
# via
# gradio
# mcp
pytz==2026.1.post1
# via gradio
pywin32==311 ; sys_platform == 'win32'
# via mcp
pyyaml==6.0.3
# via
# accelerate
# datasets
# gradio
# huggingface-hub
# transformers
referencing==0.37.0
# via
# jsonschema
# jsonschema-specifications
regex==2026.4.4
# via transformers
requests==2.33.1
# via
# datasets
# spaces
rich==15.0.0
# via typer
rpds-py==0.30.0
# via
# jsonschema
# referencing
safehttpx==0.1.7
# via gradio
safetensors==0.7.0
# via
# accelerate
# transformers
semantic-version==2.10.0
# via gradio
setuptools==82.0.1
# via torch
shellingham==1.5.4
# via typer
six==1.17.0
# via python-dateutil
sse-starlette==3.3.4
# via mcp
starlette==1.0.0
# via
# fastapi
# gradio
# mcp
# sse-starlette
sympy==1.14.0
# via torch
tokenizers==0.22.2
# via transformers
tomlkit==0.14.0
# via gradio
torch==2.9.1
# via
# accelerate
# gemma-4-e4b-it
# torchvision
torchcodec==0.9.1
# via gemma-4-e4b-it
torchvision==0.24.1
# via gemma-4-e4b-it
tqdm==4.67.3
# via
# datasets
# huggingface-hub
# transformers
transformers==5.5.4
# via gemma-4-e4b-it
triton==3.5.1 ; platform_machine == 'x86_64' and sys_platform == 'linux'
# via torch
typer==0.24.1
# via
# gradio
# hf-gradio
# huggingface-hub
# transformers
typing-extensions==4.15.0
# via
# aiosignal
# anyio
# fastapi
# gradio
# gradio-client
# huggingface-hub
# mcp
# pydantic
# pydantic-core
# referencing
# spaces
# starlette
# torch
# typing-inspection
typing-inspection==0.4.2
# via
# fastapi
# mcp
# pydantic
# pydantic-settings
tzdata==2026.1 ; sys_platform == 'emscripten' or sys_platform == 'win32'
# via pandas
urllib3==2.6.3
# via requests
uvicorn==0.44.0
# via
# gradio
# mcp
xxhash==3.6.0
# via datasets
yarl==1.23.0
# via aiohttp