Gets a comprehensive overview of the computer, including open applications, focused UI elements, window structures, and Chrome browser details (if available). This endpoint is the foundation for understanding the current state of the Windows environment and is essential for LLM-based agents to understand the context before taking actions.
The endpoint utilizes Windows Accessibility Trees, Playwright, and various heuristics to provide detailed information about:
This is the root action for agentic computer control. The JSON data returned by this endpoint should be included in every LLM prompt to explain the current computer situation to the LLM.
Note: For a complete experience, use the Open Chrome action beforehand to get a browser instance that will be included in the overview.
Token Usage: Each invocation of this endpoint consumes 50 Smooth Operator API tokens.
This endpoint accepts a POST request with an empty JSON object. No request parameters are required.
{}
The response is a complex object containing multiple categories of information about the system state. The main components of the response include:
{ "windows": [], // List of open windows "chromeInstances": [], // List of Chrome browser instances "focusInfo": {}, // Information about focused elements "topPinnedTaskbarIcons": [], // Taskbar icons "topDesktopIcons": [], // Desktop icons "topInstalledPrograms": [], // Installed programs "importantNote": "" // Optional note regarding system status or token credits }
Field | Type | Description |
---|---|---|
windows | array | List of open windows with details about each window including title, process name, position, etc. |
chromeInstances | array | List of active Chrome browser instances with their tabs and details. |
focusInfo | object | Information about the currently focused UI element and its parent window. |
topPinnedTaskbarIcons | array | List of icons pinned to the Windows taskbar. |
topDesktopIcons | array | List of icons on the Windows desktop. |
topInstalledPrograms | array | List of commonly used installed programs. |
importantNote | string | Optional field that may contain important information about system status. If token credits are insufficient, this field will contain a call-to-action message. |
The focusInfo
object is particularly important as it indicates what is currently in focus:
{ "focusedElement": {}, // Currently focused UI element "focusedElementParentWindow": {}, // Window containing the focused element "someOtherElementsInSameWindowThatMightBeRelevant": [], // Other UI elements "currentChromeTabMostRelevantElements": [], // Chrome elements (if applicable) "isChrome": boolean, // Whether Chrome is in focus "note": "string" // Additional context }
Field | Type | Description |
---|---|---|
focusedElement | object | Details about the currently focused UI element. |
focusedElementParentWindow | object | Details about the window containing the focused element. |
someOtherElementsInSameWindowThatMightBeRelevant | array | List of other potentially interactive UI elements in the same window. |
currentChromeTabMostRelevantElements | array | List of relevant UI elements in the current Chrome tab (if Chrome is in focus). |
isChrome | boolean | Indicates whether Chrome is currently in focus. |
note | string | Optional note with additional context about the focused application. |
{
"windows": [
{
"title": "Smooth Operator",
"processName": "smoothoperator",
"executablePath": "C:\\Program Files\\Smooth Operator\\smoothoperator.exe",
"position": {
"x": 0,
"y": 0,
"width": 1920,
"height": 1080
},
"windowId": "window_123",
"isFocused": true
},
// More windows...
],
"chromeInstances": [
{
"windowId": "chrome_window_456",
"title": "Google Chrome",
"tabs": [
{
"id": "tab_1",
"title": "Google",
"url": "https://www.google.com",
"isActive": true
},
// More tabs...
]
}
],
"focusInfo": {
"focusedElement": {
"name": "Search input",
"controlType": "Edit",
"elementId": "element_789"
},
"focusedElementParentWindow": {
"title": "Google Chrome",
"windowId": "chrome_window_456"
},
"someOtherElementsInSameWindowThatMightBeRelevant": [
// Relevant UI elements...
],
"currentChromeTabMostRelevantElements": [
{
"name": "Google Search",
"controlType": "Button",
"elementId": "button_012",
"cssSelector": "#search-form button[name='btnK']"
},
// More elements...
],
"isChrome": true,
"note": "The currently focused application is Google Chrome. Use chrome specific actions to control it."
},
"topPinnedTaskbarIcons": [
// Taskbar icons...
],
"topDesktopIcons": [
// Desktop icons...
],
"topInstalledPrograms": [
// Installed programs...
],
"importantNote": "Note: This endpoint requires token credits. To increase your token quota, please visit your account settings."
}
importantNote
field will contain a call-to-action message if your token credits are insufficient.
import requests import json def get_system_overview(api_key): url = "http://localhost:54321/tools-api/system/overview" headers = { "Content-Type": "application/json", "Authorization": f"Bearer {api_key}" } response = requests.post(url, headers=headers, json={}) if response.status_code == 200: return response.json() else: print(f"Error: {response.status_code}") print(response.text) return None # Example usage api_key = "your_api_key_here" overview = get_system_overview(api_key) if overview: # Access focused window information focus_info = overview.get("focusInfo", {}) focused_window = focus_info.get("focusedElementParentWindow", {}) print(f"Currently focused window: {focused_window.get('title', 'Unknown')}") # Check if Chrome is in focus if focus_info.get("isChrome", False): print("Chrome is in focus. Use chrome-specific actions.") # Access Chrome elements chrome_elements = focus_info.get("currentChromeTabMostRelevantElements", []) print(f"Found {len(chrome_elements)} relevant elements in the current Chrome tab")
interface OverviewResponse { windows: WindowInfo[]; chromeInstances: ChromeInstance[]; focusInfo: FocusInformation; topPinnedTaskbarIcons: TaskbarIcon[]; topDesktopIcons: DesktopIcon[]; topInstalledPrograms: InstalledProgram[]; importantNote: string; } interface FocusInformation { focusedElement: ControlInfo; focusedElementParentWindow: WindowInfo; someOtherElementsInSameWindowThatMightBeRelevant: ControlInfo[]; currentChromeTabMostRelevantElements: ChromeElementInfo[]; isChrome: boolean; note: string | null; } async function getSystemOverview(apiKey: string): Promise{ const url = "http://localhost:54321/tools-api/system/overview"; try { const response = await fetch(url, { method: "POST", headers: { "Content-Type": "application/json", "Authorization": `Bearer ${apiKey}` }, body: JSON.stringify({}) }); if (!response.ok) { console.error(`Error: ${response.status}`); console.error(await response.text()); return null; } return await response.json() as OverviewResponse; } catch (error) { console.error("Failed to get system overview:", error); return null; } } // Example usage async function example() { const apiKey = "your_api_key_here"; const overview = await getSystemOverview(apiKey); if (overview) { // Access focused window information const focusInfo = overview.focusInfo; const focusedWindow = focusInfo.focusedElementParentWindow; console.log(`Currently focused window: ${focusedWindow?.title || 'Unknown'}`); // Check if Chrome is in focus if (focusInfo.isChrome) { console.log("Chrome is in focus. Use chrome-specific actions."); // Access Chrome elements const chromeElements = focusInfo.currentChromeTabMostRelevantElements || []; console.log(`Found ${chromeElements.length} relevant elements in the current Chrome tab`); } } }
using System; using System.Net.Http; using System.Text; using System.Text.Json; using System.Threading.Tasks; public class ToolsServerClient { private readonly HttpClient _httpClient; private readonly string _apiKey; public ToolsServerClient(string apiKey) { _httpClient = new HttpClient { BaseAddress = new Uri("http://localhost:54321") }; _apiKey = apiKey; _httpClient.DefaultRequestHeaders.Add("Authorization", $"Bearer {apiKey}"); } public async TaskGetSystemOverviewAsync() { var content = new StringContent("{}", Encoding.UTF8, "application/json"); var response = await _httpClient.PostAsync("/tools-api/system/overview", content); response.EnsureSuccessStatusCode(); var jsonResponse = await response.Content.ReadAsStringAsync(); return JsonSerializer.Deserialize (jsonResponse, new JsonSerializerOptions { PropertyNameCaseInsensitive = true }); } } // Response models public class OverviewResponse { public WindowInfo[] windows { get; set; } public ChromeInstance[] chromeInstances { get; set; } public FocusInformation focusInfo { get; set; } public TaskbarIcon[] topPinnedTaskbarIcons { get; set; } public DesktopIcon[] topDesktopIcons { get; set; } public InstalledProgram[] topInstalledPrograms { get; set; } public string importantNote { get; set; } } public class FocusInformation { public ControlInfo focusedElement { get; set; } public WindowInfo focusedElementParentWindow { get; set; } public ControlInfo[] someOtherElementsInSameWindowThatMightBeRelevant { get; set; } public ChromeElementInfo[] currentChromeTabMostRelevantElements { get; set; } public bool isChrome { get; set; } public string note { get; set; } } // Example usage public class Example { public static async Task Main() { var client = new ToolsServerClient("your_api_key_here"); var overview = await client.GetSystemOverviewAsync(); // Access focused window information var focusInfo = overview.focusInfo; var focusedWindow = focusInfo.focusedElementParentWindow; Console.WriteLine($"Currently focused window: {focusedWindow?.title ?? "Unknown"}"); // Check if Chrome is in focus if (focusInfo.isChrome) { Console.WriteLine("Chrome is in focus. Use chrome-specific actions."); // Access Chrome elements var chromeElements = focusInfo.currentChromeTabMostRelevantElements ?? Array.Empty (); Console.WriteLine($"Found {chromeElements.Length} relevant elements in the current Chrome tab"); } } }