Loading...
Loading...
Adaptive multi-agent framework for automated data science tasks with planning, execution, and validation
npx skill4agent add aradotso/ai-agent-skills agentic-data-scientistSkill by ara.so — AI Agent Skills collection.
# Install globally with uv
uv tool install agentic-data-scientist
# Or use directly with uvx (no installation)
uvx agentic-data-scientist --mode simple "your query"npm install -g @anthropic-ai/claude-codeexport OPENROUTER_API_KEY="your_openrouter_key" # For planning/review agents
export ANTHROPIC_API_KEY="your_anthropic_key" # For coding agent# Disable network access (web search, URL fetching)
export DISABLE_NETWORK_ACCESS=true.env# Required
OPENROUTER_API_KEY=your_openrouter_key
ANTHROPIC_API_KEY=your_anthropic_key
# Optional
DISABLE_NETWORK_ACCESS=false # Set to true to disable web tools--mode# Orchestrated mode: Full multi-agent workflow
agentic-data-scientist "Perform differential expression analysis" \
--mode orchestrated \
--files data.csv
# Simple mode: Direct coding, no planning
agentic-data-scientist "Write a CSV parser" \
--mode simple# Single file
agentic-data-scientist "Analyze dataset" \
--mode orchestrated \
--files data.csv
# Multiple files
agentic-data-scientist "Compare datasets" \
--mode orchestrated \
-f data1.csv -f data2.csv -f metadata.json
# Directory upload (recursive)
agentic-data-scientist "Analyze all CSVs in folder" \
--mode orchestrated \
--files ./data_folder/# Default: ./agentic_output/ (preserved after completion)
agentic-data-scientist "Analyze data" \
--mode orchestrated \
--files data.csv
# Custom working directory
agentic-data-scientist "Generate report" \
--mode orchestrated \
--files data.csv \
--working-dir ./my_analysis
# Temporary directory (auto-cleanup)
agentic-data-scientist "Quick exploration" \
--mode simple \
--files data.csv \
--temp-dir
# Force keep files (override temp-dir cleanup)
agentic-data-scientist "Analysis" \
--mode orchestrated \
--files data.csv \
--temp-dir \
--keep-files# Custom log file location
agentic-data-scientist "Analyze" \
--mode orchestrated \
--files data.csv \
--log-file ./analysis.log
# Verbose logging
agentic-data-scientist "Debug issue" \
--mode simple \
--verbose# Comprehensive analysis with multiple stages
agentic-data-scientist \
"Perform exploratory data analysis on sales data, \
identify trends, create visualizations, \
and build a predictive model for future sales" \
--mode orchestrated \
--files sales_2024.csv \
--working-dir ./sales_analysis \
--log-file analysis.log# Fast coding without planning overhead
agentic-data-scientist \
"Write a Python script that reads multiple CSV files, \
merges them on a common ID column, \
and exports to Excel with formatting" \
--mode simple \
--files data1.csv data2.csv data3.csv \
--temp-dir# Compare multiple datasets
agentic-data-scientist \
"Compare the distribution of features across treatment groups, \
perform statistical tests (t-test, ANOVA), \
and generate publication-ready plots" \
--mode orchestrated \
-f control.csv \
-f treatment_a.csv \
-f treatment_b.csv \
--working-dir ./stats_analysis# Process all files in a directory
agentic-data-scientist \
"Analyze all patient data files in the folder, \
aggregate results, and create summary statistics" \
--mode orchestrated \
--files ./patient_data/ \
--working-dir ./patient_analysisfrom agentic_data_scientist.cli import main
import sys
# Prepare arguments
sys.argv = [
'agentic-data-scientist',
'Perform clustering analysis on customer data',
'--mode', 'orchestrated',
'--files', 'customers.csv',
'--working-dir', './clustering_output'
]
# Run
main()import asyncio
from pathlib import Path
from agentic_data_scientist.workflow import create_workflow
async def run_analysis():
# Create workflow
workflow = create_workflow(
query="Analyze customer segments",
mode="orchestrated",
files=[Path("customers.csv")],
working_dir=Path("./output"),
disable_network=False
)
# Execute
result = await workflow.execute()
print(result)
asyncio.run(run_analysis())# Start with simple mode for quick exploration
agentic-data-scientist \
"Load dataset and show basic statistics" \
--mode simple \
--files data.csv
# Then use orchestrated mode for deep analysis
agentic-data-scientist \
"Perform full statistical analysis including outlier detection, \
correlation analysis, and clustering" \
--mode orchestrated \
--files data.csv \
--working-dir ./deep_analysis# Use orchestrated mode to develop a complete pipeline
agentic-data-scientist \
"Create a data processing pipeline that: \
1) Cleans and normalizes raw data \
2) Engineers new features \
3) Splits into train/test \
4) Trains multiple models \
5) Evaluates and selects best model \
6) Exports model and metrics" \
--mode orchestrated \
--files raw_data.csv \
--working-dir ./ml_pipeline# Generate comprehensive reports
agentic-data-scientist \
"Analyze quarterly sales data and create an executive report \
with visualizations, key metrics, and recommendations" \
--mode orchestrated \
--files q1_sales.csv q2_sales.csv q3_sales.csv q4_sales.csv \
--working-dir ./quarterly_report# Enable verbose logging for troubleshooting
agentic-data-scientist \
"Complex analysis task" \
--mode orchestrated \
--files data.csv \
--verbose \
--log-file debug.log \
--keep-filesUser Query → Plan Maker → Plan Reviewer → Plan Parser → Structured PlanStage → Coding Agent → Review Agent → Criteria Checker → Stage ReflectorAll Completed Stages → Summary Agent → Final Report# Verify keys are set
echo $OPENROUTER_API_KEY
echo $ANTHROPIC_API_KEY
# Set them if missing
export OPENROUTER_API_KEY="your_key"
export ANTHROPIC_API_KEY="your_key"# Install Claude Code CLI
npm install -g @anthropic-ai/claude-code
# Verify installation
claude-code --version# Disable network tools if causing problems
export DISABLE_NETWORK_ACCESS=true
# Or in .env file
echo "DISABLE_NETWORK_ACCESS=true" >> .env# Verify file exists
ls -la data.csv
# Use absolute paths
agentic-data-scientist "Analyze" \
--mode orchestrated \
--files /absolute/path/to/data.csv
# Check directory permissions for recursive upload
ls -la ./data_folder/# Ensure directory is writable
mkdir -p ./output
chmod 755 ./output
# Use temp directory if permission issues
agentic-data-scientist "Analyze" \
--mode orchestrated \
--files data.csv \
--temp-dir# Use verbose mode to see what's happening
agentic-data-scientist "Query" \
--mode orchestrated \
--files data.csv \
--verbose
# Try simple mode to isolate planning vs execution issues
agentic-data-scientist "Query" \
--mode simple \
--files data.csv# Default behavior preserves files in ./agentic_output/
ls -la ./agentic_output/
# Explicitly set working directory
agentic-data-scientist "Analyze" \
--mode orchestrated \
--files data.csv \
--working-dir ./my_output
# Use --keep-files to override temp-dir cleanup
agentic-data-scientist "Analyze" \
--mode orchestrated \
--files data.csv \
--temp-dir \
--keep-filesfrom agentic_data_scientist.prompts import PLAN_MAKER_PROMPT
# Modify prompts for domain-specific needs
custom_prompt = PLAN_MAKER_PROMPT + """
Additional domain context:
- Focus on genomics data
- Use bioinformatics best practices
"""# Configure MCP servers in your workflow
# Agents automatically gain access to tools