Gemini 2.5 Flash Image Tool • Open WebUI Community

""" title: Google Gemini 2.x Flash Image Generation (Streaming) requirements: google-genai """ import base64 import mimetypes from typing import Optional # Open WebUI imports from fastapi import Request from pydantic import BaseModel, Field from open_webui.routers.images import upload_image, load_b64_image_data from open_webui.models.users import Users # Google Gemini (banana-style) imports from google import genai from google.genai import types class Tools: """Container class for Open WebUI tools.""" class Valves(BaseModel): """User-configurable settings for the tool.""" api_key: str = Field(default="", description="Your Google AI API key here") # 依官方 banana 示例，預設使用 2.5 flash image preview model_name: str = Field( default="gemini-2.5-flash-image-preview", description="The Google AI model name for image+text generation (streaming)", ) def __init__(self): """Initialize the Tool.""" self.valves = self.Valves() async def gemini_generate_image( self, prompt: str, __request__: Request, __user__: dict, __event_emitter__=None, ) -> str: """ Generates image(s) and/or text from Gemini using streaming API. Streams TEXT chunks to UI and uploads IMAGE parts to Open WebUI storage. Returns a short status message for the LLM. """ if not self.valves.api_key: return ( "Error: API key is missing. Please configure it in the tool settings." ) if not isinstance(prompt, str): return "Error: The prompt must be a string." # Start status if __event_emitter__: await __event_emitter__( { "type": "status", "data": { "description": "Generating (streaming) with Gemini…", "done": False, }, } ) try: client = genai.Client(api_key=self.valves.api_key) contents = [ types.Content( role="user", parts=[types.Part.from_text(text=prompt)], ) ] generate_content_config = types.GenerateContentConfig( response_modalities=["IMAGE", "TEXT"] ) # Track results image_count = 0 text_seen = False # Stream chunks for chunk in client.models.generate_content_stream( model=self.valves.model_name, contents=contents, config=generate_content_config, ): # Some chunks may be heartbeats/empty; guard checks if ( not chunk or chunk.candidates is None or not chunk.candidates or chunk.candidates[0].content is None or chunk.candidates[0].content.parts is None ): continue parts = chunk.candidates[0].content.parts # 1) TEXT: 官方示例裡印出 chunk.text；我們直接把可見文字推送到 UI if getattr(chunk, "text", None): text_seen = True if __event_emitter__ and chunk.text.strip(): await __event_emitter__( { "type": "message", "data": {"content": chunk.text}, } ) # 2) IMAGE: 掃描 parts 中的 inline_data，把位元組上傳到 Open WebUI for part in parts: inline = getattr(part, "inline_data", None) if inline and inline.data: mime_type: str = inline.mime_type or "image/png" # 直接用回傳 bytes；不再強制經過 Pillow 重編碼 b64 = base64.b64encode(inline.data).decode("utf-8") data_uri = f"data:{mime_type};base64,{b64}" # 交給 Open WebUI 解析與存檔 image_data, content_type = load_b64_image_data(data_uri) url = upload_image( __request__, metadata={ "instances": {"prompt": prompt}, "parameters": { "sampleCount": 1, "outputOptions": {"mimeType": mime_type}, }, }, image_data=image_data, content_type=content_type, user=Users.get_user_by_id(__user__["id"]), ) image_count += 1 # 回貼圖片訊息 if __event_emitter__: await __event_emitter__( { "type": "message", "data": {"content": f"![Generated Image]({url})"}, } ) # Done status if __event_emitter__: await __event_emitter__( { "type": "status", "data": { "description": f"Done. Images: {image_count} | Text: {'yes' if text_seen else 'no'}", "done": True, }, } ) if image_count > 0: return "Notify the user that the image has been successfully generated" elif text_seen: return "Notify the user that only text was generated" else: return "Notify the user that no output was generated" except Exception as err: if __event_emitter__: await __event_emitter__( { "type": "status", "data": { "description": f"An error occurred: {err}", "done": True, }, } ) return f"Tell the user: {err}"