DeepResearch Tool • Open WebUI Community

""" title: BrowserUI Deep Research author: JAYADEEP V author_url: https://github.com/jaideepv # Or your repo if applicable git_url: https://github.com/open-webui/open-webui # Or your repo if applicable description: Performs deep web research using a BrowserUI (e.g., running on localhost:7788) instance via its Gradio API, providing streaming status updates. required_open_webui_version: 0.4.0 # Requires event emitters and async tools requirements: requests version: 0.1.0 license: MIT """ import requests import json import logging import os from typing import Callable, Any, Optional from pydantic import BaseModel, Field from datetime import datetime # Needed for potential future citation use # Configure basic logging for the tool logging.basicConfig( level=logging.INFO, format="%(asctime)s - %(levelname)s - [BrowserUITool] - %(message)s", ) class Tools: # Valves: Configurable by Admins in Open WebUI settings class Valves(BaseModel): BROWSERUI_URL: str = Field( default="http://localhost:7788", description="Base URL for the BrowserUI Gradio API (e.g., http://localhost:7788)", ) LLM_PROVIDER: str = Field( default="openai", description="Default LLM provider to use in BrowserUI (e.g., 'openai', 'ollama', 'anthropic'). Must be supported by BrowserUI.", ) LLM_MODEL_NAME: str = Field( default="gpt-4o", description="Default LLM model name to use in BrowserUI (e.g., 'gpt-4o', 'llama3'). Must be supported by BrowserUI and the provider.", ) # Optional API Key Valve - Admins can set this, otherwise environment variables are checked API_KEY: Optional[str] = Field( default=None, description="[Optional] API Key for the selected LLM Provider. If unset, the tool will try common environment variables (e.g., OPENAI_API_KEY).", ) MAX_SEARCH_ITERATIONS: int = Field( default=3, description="Default maximum search iterations for the deep research agent.", ) MAX_QUERY_PER_ITER: int = Field( default=1, description="Default maximum queries per search iteration." ) LLM_TEMPERATURE: float = Field( default=0.6, description="Default temperature for the LLM used by BrowserUI.", ) LLM_NUM_CTX: int = Field( default=16000, description="Default context length (primarily for Ollama models in BrowserUI).", ) LLM_BASE_URL: str = Field( default="", description="[Optional] Custom Base URL for the LLM API endpoint if needed by BrowserUI.", ) USE_VISION: bool = Field( default=True, description="Allow the BrowserUI agent to use vision capabilities (if model supports).", ) HEADLESS_BROWSER: bool = Field( default=False, # Default to non-headless for easier debugging if needed description="Run the BrowserUI browser in headless mode.", ) REQUEST_TIMEOUT: int = Field( default=60, description="Timeout in seconds for the initial request to start the research task.", ) def __init__(self): """Initialize the Tool and its valves.""" self.valves = self.Valves() # If you were implementing custom citations, you would set self.citation = False here # self.citation = False logging.info("BrowserUI Deep Research Tool initialized.") async def perform_deep_research( self, research_task: str, __event_emitter__: Callable[[dict], Any] ) -> str: """ Performs deep research on a given topic using an external BrowserUI instance. :param research_task: The detailed research task or question. :param __event_emitter__: Open WebUI event emitter for status updates. :return: The final research report string or an error message. """ logging.info(f"Starting deep research task: '{research_task[:100]}...'") await __event_emitter__( { "type": "status", "data": { "description": "Initiating deep research request to BrowserUI...", "done": False, "hidden": False, }, } ) # --- Get configuration from valves --- api_host = self.valves.BROWSERUI_URL.rstrip("/") llm_provider = self.valves.LLM_PROVIDER llm_model_name = self.valves.LLM_MODEL_NAME llm_temperature = self.valves.LLM_TEMPERATURE llm_num_ctx = self.valves.LLM_NUM_CTX llm_base_url = self.valves.LLM_BASE_URL use_vision = self.valves.USE_VISION headless = self.valves.HEADLESS_BROWSER max_search_iterations = self.valves.MAX_SEARCH_ITERATIONS max_query_per_iter = self.valves.MAX_QUERY_PER_ITER request_timeout = self.valves.REQUEST_TIMEOUT # --- API Key Handling: Prioritize Valve, then Env Vars --- effective_api_key = self.valves.API_KEY # Check valve first if not effective_api_key: # Try environment variables if valve not set env_var_map = { "openai": "OPENAI_API_KEY", "anthropic": "ANTHROPIC_API_KEY", "google": "GOOGLE_API_KEY", "azure_openai": "AZURE_OPENAI_API_KEY", "mistral": "MISTRAL_API_KEY", # Add others as needed } env_key = env_var_map.get(llm_provider) if env_key: effective_api_key = os.environ.get(env_key) if effective_api_key: logging.info(f"Using API key from environment variable {env_key}") else: logging.warning( f"API key for {llm_provider} not set in Valves or found in env var {env_key}. Proceeding without explicit key." ) effective_api_key = "" # Ensure it's an empty string if not found else: logging.info( f"No specific environment variable mapped for provider '{llm_provider}'. Proceeding without explicit API key." ) effective_api_key = "" else: logging.info("Using API key provided in Admin Valves.") # --- Prepare request data matching BrowserUI /run_deep_search API --- # Ensure parameter order and types match the API documentation provided earlier request_data_list = [ research_task, # research_task (str) float(max_search_iterations), # max_search_iteration_input (float) float(max_query_per_iter), # max_query_per_iter_input (float) llm_provider, # llm_provider (Literal) llm_model_name, # llm_model_name (Literal) float(llm_num_ctx), # llm_num_ctx (float) float(llm_temperature), # llm_temperature (float) llm_base_url, # llm_base_url (str) effective_api_key or "", # llm_api_key (str) - Ensure empty string if None use_vision, # use_vision (bool) False, # use_own_browser (bool) - Let's default this to False for simplicity headless, # headless (bool) "", # chrome_cdp (str) - Default to empty ] request_payload = {"data": request_data_list} # Use the Gradio API endpoint path start_url = f"{api_host}/gradio_api/call/run_deep_search" logging.info(f"Posting to start URL: {start_url}") # Log parameters excluding sensitive API key log_payload_data = { f"param_{i}": v for i, v in enumerate(request_data_list) if i != 8 } # Index 8 is the API key logging.debug(f"Request Payload Data (key omitted): {log_payload_data}") try: # --- Step 1: Initiate the task --- response = requests.post( start_url, json=request_payload, timeout=request_timeout ) response.raise_for_status() response_json = response.json() event_id = response_json.get("event_id") or response_json.get( "session_hash" ) # Handle both possibilities if not event_id: logging.error( f"Failed to get event_id/session_hash from BrowserUI response: {response_json}" ) error_msg = "Error: Could not get task identifier from BrowserUI." await __event_emitter__( { "type": "status", "data": { "description": error_msg, "done": True, "hidden": False, }, } ) return error_msg # Return error to LLM logging.info(f"Task initiated. Event ID/Session Hash: {event_id}") await __event_emitter__( { "type": "status", "data": { "description": "Task submitted to BrowserUI, waiting for progress...", "done": False, "hidden": False, }, } ) # --- Step 2: Connect to the streaming endpoint --- stream_url = f"{api_host}/gradio_api/queue/data?session_hash={event_id}" logging.info(f"Connecting to stream URL: {stream_url}") final_result = "" download_link = ( "" # Not strictly needed for return, but can track if available ) # Use a context manager for the streaming request with requests.get(stream_url, stream=True, timeout=None) as stream_response: stream_response.raise_for_status() logging.info("Connected to BrowserUI event stream.") for line in stream_response.iter_lines(): if not line: continue decoded_line = line.decode("utf-8").strip() if decoded_line.startswith("data: "): decoded_line = decoded_line[len("data: ") :].strip() if not decoded_line: continue try: msg = json.loads(decoded_line) msg_type = msg.get("msg") logging.debug(f"Received stream message type: {msg_type}") status_description = None is_complete = False # Handle various message types for status updates if msg_type == "estimation": status_description = "BrowserUI: Estimating queue time..." elif msg_type == "process_starts": status_description = ( "BrowserUI: Research process starting..." ) elif msg_type == "heartbeat": continue # Usually too noisy for status updates elif msg_type == "progress": status_description = ( "BrowserUI: Making progress..." # Generic progress ) elif msg_type == "process_generating": status_description = "BrowserUI: Generating final report..." elif msg_type == "log": # Optionally relay logs log_msg = msg.get("log", "Log message") log_level = msg.get("level", "info").lower() status_description = f"BrowserUI Log ({log_level}): {log_msg[:100]}{'...' if len(log_msg)>100 else ''}" # logging.info(f"BrowserUI Log: {log_msg}") # Log it internally too elif msg_type == "process_completed": is_complete = True status_description = "BrowserUI: Task completed." output_data = msg.get("output", {}).get("data", []) if ( output_data and isinstance(output_data, list) and len(output_data) > 0 ): final_result = output_data[0] if len(output_data) > 1 and isinstance( output_data[1], dict ): # Check for download link if needed in the future download_link = output_data[1].get("url", "") logging.info( f"Download link found: {download_link}" ) logging.info("Final report received from BrowserUI.") else: final_result = "Error: BrowserUI completed but returned empty or invalid data." logging.error( f"Unexpected output format: {output_data}" ) break # Exit loop on completion # Emit status update if description was set if status_description: await __event_emitter__( { "type": "status", "data": { "description": status_description, "done": False, "hidden": False, }, } ) except json.JSONDecodeError: logging.warning( f"Could not decode JSON from stream line: '{decoded_line}'" ) except Exception as e: logging.error( f"Error processing stream message: {msg if 'msg' in locals() else decoded_line} - Error: {e}", exc_info=True, ) final_result = f"Error processing BrowserUI stream: {e}" await __event_emitter__( { "type": "status", "data": { "description": f"Error: {final_result}", "done": True, "hidden": False, }, } ) is_complete = True # Treat as complete with error break # Exit loop on error # --- Final processing after stream closes or completes --- if not final_result: final_result = "Error: Task finished, but no final report was captured from BrowserUI." logging.warning(final_result) await __event_emitter__( { "type": "status", "data": { "description": final_result, "done": True, "hidden": False, }, } ) else: # Emit final "completed" status if not already done by error handling if ( not is_complete ): # Only if loop finished without break due to 'process_completed' or error await __event_emitter__( { "type": "status", "data": { "description": "BrowserUI: Task completed successfully.", "done": True, "hidden": True, }, # Hide final status } ) # --- Emit final message content (optional but good practice) --- # This appears as a separate message in the chat after the LLM processes the return value # You might choose *not* to emit a message here and just rely on the LLM using the return value. # Let's comment it out for now, as the primary goal is to return the report to the LLM. # await __event_emitter__({ # "type": "message", # "data": { "content": f"**BrowserUI Research Report:**\n\n{final_result}" } # }) # --- Return the final report string to the LLM --- logging.info( f"Returning final result to LLM (length: {len(final_result)})." ) return final_result except requests.exceptions.Timeout: error_message = f"Error: Timeout connecting to BrowserUI API at {start_url} after {request_timeout}s." logging.error(error_message) await __event_emitter__( { "type": "status", "data": { "description": error_message, "done": True, "hidden": False, }, } ) return error_message except requests.exceptions.RequestException as e: error_message = ( f"Error: Network error connecting to BrowserUI API at {api_host}: {e}" ) logging.error(error_message, exc_info=True) await __event_emitter__( { "type": "status", "data": { "description": error_message, "done": True, "hidden": False, }, } ) return error_message # Return error string to LLM except Exception as e: error_message = f"An unexpected error occurred in the BrowserUI tool: {e}" logging.error(error_message, exc_info=True) await __event_emitter__( { "type": "status", "data": { "description": error_message, "done": True, "hidden": False, }, } ) return error_message # Return error string to LLM # Example of how to potentially use citations if needed later: # async def emit_citation(self, content, title, url, __event_emitter__): # await __event_emitter__( # { # "type": "citation", # "data": { # "document": [content], # "metadata": [ # { # "date_accessed": datetime.now().isoformat(), # "source": title, # } # ], # "source": {"name": title, "url": url}, # }, # } # )