Loading...
Loading...
Capture screenshots and extract text via OCR using MacPilot. Take full-screen, region, or window screenshots, and recognize text in images or screen areas with multi-language support.
npx skill4agent add adhikjoshi/macpilot-skills macpilot-screenshot-ocrmacpilot screenshot --json # Capture to temp file
macpilot screenshot ~/Desktop/screen.png --json # Capture to specific path
macpilot screenshot --with-permissions --json # Use CGWindowListCreateImage directlymacpilot screenshot --region 100,200,800,600 --json
# Region format: x,y,width,height (from top-left corner)macpilot screenshot --window "Safari" --json # Capture Safari window
macpilot screenshot --window "Finder" --json # Capture Finder windowmacpilot screenshot --all-windows --json # Each window separatelymacpilot screenshot --display 1 --json # Second display (0-indexed)macpilot screenshot --format png ~/Desktop/shot.png # PNG (default, lossless)
macpilot screenshot --format jpg ~/Desktop/shot.jpg # JPEG (smaller files)macpilot ocr scan /path/to/image.png --json
macpilot ocr scan ~/Desktop/screenshot.png --jsonmacpilot ocr scan 100 200 800 600 --json
# Arguments: x y width height (captures region then OCRs it)macpilot ocr scan image.png --language en-US --json # English
macpilot ocr scan image.png --language ja --json # Japanese
macpilot ocr scan image.png --language zh-Hans --json # Simplified Chinese
macpilot ocr scan image.png --language de --json # German
macpilot ocr scan image.png --language fr --json # Frenchmacpilot ocr click "Submit" --json # Find text on screen and click it
macpilot ocr click "OK" --app Finder --json # Click text in specific app
macpilot ocr click "Accept" --timeout 10 --json # Retry until text appears (10s)--timeoutmacpilot screen record start --output ~/Desktop/recording.mov --json
macpilot screen record start --output rec.mov --region 0,0,1920,1080 --json # Region
macpilot screen record start --output rec.mov --window Safari --json # Window
macpilot screen record start --output rec.mov --display 1 --json # Display
macpilot screen record start --output rec.mov --audio --json # With audio
macpilot screen record start --output rec.mov --quality high --fps 60 --json # Qualitymacpilot screen record stop --json # Stop and save
macpilot screen record status --json # Check if recording
macpilot screen record pause --json # Pause recording
macpilot screen record resume --json # Resume recordinglowmediumhighmacpilot display-info --json
# Returns: all displays with resolution, position, scale factor# Take screenshot of specific region
macpilot screenshot --region 0,0,1920,1080 ~/tmp/capture.png --json
# Extract text from it
macpilot ocr scan ~/tmp/capture.png --json# Directly OCR a screen region without saving
macpilot ocr scan 200 100 600 400 --json# Instead of screenshot > OCR > parse > click, just:
macpilot ocr click "Submit" --json
macpilot ocr click "Next" --timeout 5 --json # Wait up to 5s for text to appear# Screenshot a window to see its current state
macpilot screenshot --window "Safari" ~/tmp/safari.png --json
# Read the image to verify content
macpilot ocr scan ~/tmp/safari.png --jsonmacpilot screen record start --output ~/Desktop/demo.mov
macpilot app open Safari
macpilot wait seconds 2
macpilot keyboard key cmd+l
macpilot keyboard type "https://example.com"
macpilot keyboard key enter
macpilot wait seconds 3
macpilot screen record stopdisplay-info