Loading...
Loading...
AI-powered computer automation using MCP to control desktop apps, click elements, and interact with the UI on macOS, Linux, and Windows
npx skill4agent add aradotso/codex-skills open-computer-use-automationSkill by ara.so — Codex Skills collection.
open-computer-usenpm i -g open-computer-useopen-computer-use# Codex
open-computer-use install-codex-mcp
# Claude Code
open-computer-use install-claude-mcp
# Gemini CLI (project scope)
open-computer-use install-gemini-mcp
# Gemini CLI (user scope)
open-computer-use install-gemini-mcp --scope user
# opencode
open-computer-use install-opencode-mcp~/.codex/config.toml~/.claude.json{
"mcpServers": {
"open-computer-use": {
"command": "open-computer-use",
"args": ["mcp"]
}
}
}# Install for Codex
npx skills add iFurySt/open-codex-computer-use -g -a codex --skill open-computer-use -y
# Install for Claude Code
npx skills add iFurySt/open-codex-computer-use -g -a claude-code --skill open-computer-use -y
# Update existing skill
npx skills update open-computer-use -g -y
# List installed skills
npx skills ls -g -a codex | rg 'open-computer-use'# Check permissions and system readiness
open-computer-use doctor
# Call a single tool (returns MCP JSON)
open-computer-use call list_apps
# Call with arguments
open-computer-use call get_app_state --args '{"app":"TextEdit"}'
# Run a sequence of operations (maintains element_index state)
open-computer-use call --calls '[
{"tool":"get_app_state","args":{"app":"TextEdit"}},
{"tool":"press_key","args":{"app":"TextEdit","key":"Return"}}
]'
# Run sequence from file with custom sleep between operations
open-computer-use call --calls-file sequence.json --sleep 0.5
# Show help
open-computer-use -hopen-computer-use install-codex-plugin{
"apps": ["Safari", "TextEdit", "Terminal"]
}appinclude_screenshot{
"app": "TextEdit",
"elements": [
{
"element_index": 0,
"role": "AXWindow",
"title": "Untitled",
"children": [...]
}
],
"screenshot": "data:image/png;base64,..."
}appelement_indexget_app_stateclick_type{
"success": true,
"element_index": 5
}apptextappkeyappapp// From an MCP client or AI agent
// 1. List running apps
const apps = await call_tool("list_apps");
// 2. Get app UI state
const state = await call_tool("get_app_state", {
app: "TextEdit",
include_screenshot: true
});
// 3. Find and click a button
// (element_index 3 might be a "Save" button from state.elements)
await call_tool("click_element", {
app: "TextEdit",
element_index: 3
});
// 4. Type text
await call_tool("type_text", {
app: "TextEdit",
text: "Hello, world!"
});
// 5. Save with keyboard shortcut
await call_tool("press_key", {
app: "TextEdit",
key: "Command+S"
});automation.json[
{
"tool": "activate_app",
"args": {"app": "TextEdit"}
},
{
"tool": "get_app_state",
"args": {"app": "TextEdit"}
},
{
"tool": "type_text",
"args": {
"app": "TextEdit",
"text": "This is automated text."
}
},
{
"tool": "press_key",
"args": {
"app": "TextEdit",
"key": "Return"
}
},
{
"tool": "take_screenshot",
"args": {"app": "TextEdit"}
}
]open-computer-use call --calls-file automation.json --sleep 1get_app_state{
"elements": [
{
"element_index": 0,
"role": "AXWindow",
"title": "Document",
"children": [
{
"element_index": 1,
"role": "AXButton",
"title": "Close",
"enabled": true
},
{
"element_index": 2,
"role": "AXTextArea",
"value": "Current text content"
}
]
}
]
}element_indexclick_element--sleepopen-computer-use call --calls-file seq.json --sleep 0.5{
"mcpServers": {
"open-computer-use": {
"command": "open-computer-use",
"args": ["mcp"],
"env": {}
}
}
}open-computer-use doctorlist_appsopen-computer-use call list_appsclick_elementget_app_stateopen-computer-use# Verify installation
which open-computer-use
# Test manual MCP mode
open-computer-use mcp
# Reinstall globally
npm i -g open-computer-use
# Check agent config file syntax
cat ~/.codex/config.toml # or relevant config# Install AT-SPI dependencies (Ubuntu/Debian)
sudo apt-get install at-spi2-core
# Enable accessibility
gsettings set org.gnome.desktop.interface toolkit-accessibility trueimport { exec } from 'child_process';
import { promisify } from 'util';
const execAsync = promisify(exec);
async function automateApp(appName: string) {
// Get app state
const { stdout } = await execAsync(
`open-computer-use call get_app_state --args '{"app":"${appName}"}'`
);
const state = JSON.parse(stdout);
// Find button with specific title
const button = state.elements
.flatMap(e => e.children || [])
.find(e => e.role === 'AXButton' && e.title === 'Submit');
if (button) {
// Click it
await execAsync(
`open-computer-use call click_element --args '{"app":"${appName}","element_index":${button.element_index}}'`
);
}
}
await automateApp('Safari');open-computer-useTo interact with desktop apps:
1. Always list apps first to verify the target is running
2. Get app state to find element indices
3. Use element_index from state when clicking
4. Add small delays between operations (1s default)
5. Take screenshots to verify results
Example workflow:
- list_apps → verify "Safari" is running
- get_app_state(app="Safari") → find address bar element_index
- click_element(element_index=X) → focus address bar
- type_text(text="https://example.com") → enter URL
- press_key(key="Return") → navigate