"""
title: Google Gemini 2.x Flash Image Generation (Streaming)
requirements: google-genai
"""
import base64
import mimetypes
from typing import Optional
# Open WebUI imports
from fastapi import Request
from pydantic import BaseModel, Field
from open_webui.routers.images import upload_image, load_b64_image_data
from open_webui.models.users import Users
# Google Gemini (banana-style) imports
from google import genai
from google.genai import types
class Tools:
"""Container class for Open WebUI tools."""
class Valves(BaseModel):
"""User-configurable settings for the tool."""
api_key: str = Field(default="", description="Your Google AI API key here")
# 依官方 banana 示例,預設使用 2.5 flash image preview
model_name: str = Field(
default="gemini-2.5-flash-image-preview",
description="The Google AI model name for image+text generation (streaming)",
)
def __init__(self):
"""Initialize the Tool."""
self.valves = self.Valves()
async def gemini_generate_image(
self,
prompt: str,
__request__: Request,
__user__: dict,
__event_emitter__=None,
) -> str:
"""
Generates image(s) and/or text from Gemini using streaming API.
Streams TEXT chunks to UI and uploads IMAGE parts to Open WebUI storage.
Returns a short status message for the LLM.
"""
if not self.valves.api_key:
return (
"Error: API key is missing. Please configure it in the tool settings."
)
if not isinstance(prompt, str):
return "Error: The prompt must be a string."
# Start status
if __event_emitter__:
await __event_emitter__(
{
"type": "status",
"data": {
"description": "Generating (streaming) with Gemini…",
"done": False,
},
}
)
try:
client = genai.Client(api_key=self.valves.api_key)
contents = [
types.Content(
role="user",
parts=[types.Part.from_text(text=prompt)],
)
]
generate_content_config = types.GenerateContentConfig(
response_modalities=["IMAGE", "TEXT"]
)
# Track results
image_count = 0
text_seen = False
# Stream chunks
for chunk in client.models.generate_content_stream(
model=self.valves.model_name,
contents=contents,
config=generate_content_config,
):
# Some chunks may be heartbeats/empty; guard checks
if (
not chunk
or chunk.candidates is None
or not chunk.candidates
or chunk.candidates[0].content is None
or chunk.candidates[0].content.parts is None
):
continue
parts = chunk.candidates[0].content.parts
# 1) TEXT: 官方示例裡印出 chunk.text;我們直接把可見文字推送到 UI
if getattr(chunk, "text", None):
text_seen = True
if __event_emitter__ and chunk.text.strip():
await __event_emitter__(
{
"type": "message",
"data": {"content": chunk.text},
}
)
# 2) IMAGE: 掃描 parts 中的 inline_data,把位元組上傳到 Open WebUI
for part in parts:
inline = getattr(part, "inline_data", None)
if inline and inline.data:
mime_type: str = inline.mime_type or "image/png"
# 直接用回傳 bytes;不再強制經過 Pillow 重編碼
b64 = base64.b64encode(inline.data).decode("utf-8")
data_uri = f"data:{mime_type};base64,{b64}"
# 交給 Open WebUI 解析與存檔
image_data, content_type = load_b64_image_data(data_uri)
url = upload_image(
__request__,
metadata={
"instances": {"prompt": prompt},
"parameters": {
"sampleCount": 1,
"outputOptions": {"mimeType": mime_type},
},
},
image_data=image_data,
content_type=content_type,
user=Users.get_user_by_id(__user__["id"]),
)
image_count += 1
# 回貼圖片訊息
if __event_emitter__:
await __event_emitter__(
{
"type": "message",
"data": {"content": f""},
}
)
# Done status
if __event_emitter__:
await __event_emitter__(
{
"type": "status",
"data": {
"description": f"Done. Images: {image_count} | Text: {'yes' if text_seen else 'no'}",
"done": True,
},
}
)
if image_count > 0:
return "Notify the user that the image has been successfully generated"
elif text_seen:
return "Notify the user that only text was generated"
else:
return "Notify the user that no output was generated"
except Exception as err:
if __event_emitter__:
await __event_emitter__(
{
"type": "status",
"data": {
"description": f"An error occurred: {err}",
"done": True,
},
}
)
return f"Tell the user: {err}"