Loading...
Loading...
Context layer for AI data agents - query warehouses accurately with semantic layers, metrics, and wiki knowledge through MCP
npx skill4agent add aradotso/ai-agent-skills ktx-ai-data-agentsSkill by ara.so — AI Agent Skills collection.
# Install globally
npm install -g @kaelio/ktx
# Or use npx
npx @kaelio/ktx setup# Create or resume a ktx project in current directory
ktx setup
# Check project status
ktx statusktx setupktx.yamlmy-project/
├── ktx.yaml # Project configuration
├── semantic-layer/<connection-id>/ # YAML metric/dimension definitions
├── wiki/global/ # Shared business knowledge
├── wiki/user/<user-id>/ # User-scoped notes
├── raw-sources/<connection-id>/ # Ingest artifacts and reports
└── .ktx/ # Local state (git-ignored)ktx.yamlsemantic-layer/wiki/.ktx/version: 1.0
name: my-analytics-project
llm:
provider: anthropic
model: claude-sonnet-4-6
embeddings:
provider: openai
model: text-embedding-3-small
connections:
- id: warehouse
type: postgres
host: ${DATABASE_HOST}
port: 5432
database: analytics
user: ${DATABASE_USER}
password: ${DATABASE_PASSWORD}
ssl: true
context_sources:
- id: dbt_main
type: dbt
connection_id: warehouse
manifest_path: ./target/manifest.json
catalog_path: ./target/catalog.json
- id: notion_docs
type: notion
token: ${NOTION_TOKEN}
page_ids:
- 3fa85f64-5717-4562-b3fc-2c963f66afa6# LLM providers
export ANTHROPIC_API_KEY=your-key-here
export GOOGLE_APPLICATION_CREDENTIALS=/path/to/service-account.json
# Embeddings
export OPENAI_API_KEY=your-key-here
# Database credentials
export DATABASE_HOST=warehouse.example.com
export DATABASE_USER=readonly_user
export DATABASE_PASSWORD=secure-password
# Context sources
export NOTION_TOKEN=secret_notion_token
export LOOKER_API_TOKEN=looker-tokenconnections:
- id: postgres_warehouse
type: postgres
host: ${PG_HOST}
port: 5432
database: analytics
user: ${PG_USER}
password: ${PG_PASSWORD}
ssl: trueconnections:
- id: snowflake_warehouse
type: snowflake
account: ${SNOWFLAKE_ACCOUNT}
user: ${SNOWFLAKE_USER}
password: ${SNOWFLAKE_PASSWORD}
warehouse: COMPUTE_WH
database: ANALYTICS
schema: PUBLICconnections:
- id: bigquery_warehouse
type: bigquery
project_id: ${GCP_PROJECT_ID}
dataset: analytics
credentials_path: ${GOOGLE_APPLICATION_CREDENTIALS}# Build context from all configured sources
ktx ingest
# Build context for specific connection
ktx ingest --connection warehouse
# Force rebuild ignoring cache
ktx ingest --force
# Dry run to preview changes
ktx ingest --dry-run# Search semantic layer (metrics, dimensions)
ktx sl "revenue"
ktx sl "customer churn rate"
# Search wiki knowledge
ktx wiki "refund policy"
ktx wiki "data retention rules"
# Get detailed entity information
ktx describe metric monthly_recurring_revenue
ktx describe dimension customer_segment# Start MCP server for agent clients
ktx mcp start
# Start with specific project directory
ktx mcp start --project-dir /path/to/project
# Check MCP server status
ktx mcp status
# Stop MCP server
ktx mcp stop# Validate configuration
ktx validate
# Show project status
ktx status
# List all configured connections
ktx connections list
# Test connection
ktx connections test warehousesemantic-layer/warehouse/metrics.yamlmetrics:
- name: monthly_recurring_revenue
label: Monthly Recurring Revenue
description: Sum of all active subscription values normalized to monthly
type: sum
sql: |
CASE
WHEN billing_period = 'monthly' THEN amount
WHEN billing_period = 'annual' THEN amount / 12
END
table: subscriptions
filters:
- column: status
operator: equals
value: 'active'
dimensions:
- customer_segment
- plan_type
timestamp_column: created_at
- name: customer_count
label: Active Customers
description: Count of distinct active customer IDs
type: count_distinct
sql: customer_id
table: subscriptions
filters:
- column: status
operator: equals
value: 'active'semantic-layer/warehouse/dimensions.yamldimensions:
- name: customer_segment
label: Customer Segment
description: Business vs. enterprise customer classification
type: categorical
sql: |
CASE
WHEN annual_revenue > 100000 THEN 'Enterprise'
WHEN annual_revenue > 10000 THEN 'Business'
ELSE 'Startup'
END
table: customers
- name: signup_date
label: Signup Date
description: Date customer first signed up
type: time
sql: DATE(created_at)
table: customers
granularities:
- day
- week
- month
- quarter
- yearsemantic-layer/warehouse/joins.yamljoins:
- left_table: subscriptions
right_table: customers
type: left
conditions:
- left_column: customer_id
right_column: id
- left_table: subscriptions
right_table: plans
type: left
conditions:
- left_column: plan_id
right_column: id# Create global wiki page
cat > wiki/global/refund-policy.md << 'EOF'
# Refund Policy
## Overview
Customers can request refunds within 30 days of purchase.
## Rules
- Full refund: < 7 days
- Prorated refund: 7-30 days
- No refund: > 30 days
## Database Impact
Refunds update `transactions.status` to 'refunded' and create
negative entries in `revenue_events`.
EOF
# Create user-scoped note
mkdir -p wiki/user/$(whoami)
cat > wiki/user/$(whoami)/analysis-notes.md << 'EOF'
# Analysis Notes
## 2025-05 Revenue Analysis
Found discrepancy in EMEA revenue - missing Stripe events.
Tracked in JIRA-1234.
EOFcontext_sources:
- id: product_docs
type: notion
token: ${NOTION_TOKEN}
page_ids:
- 3fa85f64-5717-4562-b3fc-2c963f66afa6 # Product Roadmap
- 7c9e6679-7425-40de-944b-e07fc1f90ae7 # Data Dictionary
recursive: true # Include child pagesktx ingestwiki/global/# In your ktx project directory
ktx mcp start
# Or specify project location
ktx mcp start --project-dir ~/my-analytics~/Library/Application Support/Claude/claude_desktop_config.json{
"mcpServers": {
"ktx": {
"command": "ktx",
"args": ["mcp", "start", "--project-dir", "/absolute/path/to/project"]
}
}
}What was our MRR last month by customer segment?monthly_recurring_revenuecustomer_segmentktx_search_semantic_layerktx_search_wikiktx_describe_entityktx_list_connectionsktx_get_context_summary# 1. Navigate to analytics project
cd ~/projects/analytics
# 2. Run setup wizard
ktx setup
# Select: Anthropic Claude, OpenAI embeddings, configure Postgres connection
# 3. Add dbt context source
# Edit ktx.yaml to add dbt manifest/catalog paths
# 4. Build context
ktx ingest
# 5. Verify
ktx status
ktx sl "revenue"
# 6. Start MCP for agents
ktx mcp start# After dbt run or schema changes
dbt run
dbt docs generate
ktx ingest --connection warehouse
# After updating wiki pages
ktx ingest --source notion_docs
# Check for conflicts or issues
ktx validate# Find available metrics
ktx sl "churn"
# Output:
# Metrics:
# - customer_churn_rate (Monthly customer churn percentage)
# - mrr_churn (Monthly recurring revenue lost to churn)
#
# Dimensions:
# - churn_reason (Categorical reason for cancellation)
# Get metric details
ktx describe metric customer_churn_rate
# Now prompt agent with context:
# "Calculate customer_churn_rate for Q1 2025 by churn_reason"connections:
- id: production
type: snowflake
account: ${SNOWFLAKE_PROD_ACCOUNT}
# ... prod credentials
- id: staging
type: snowflake
account: ${SNOWFLAKE_STAGING_ACCOUNT}
# ... staging credentials
context_sources:
- id: dbt_prod
type: dbt
connection_id: production
manifest_path: ./prod/target/manifest.json
- id: dbt_staging
type: dbt
connection_id: staging
manifest_path: ./staging/target/manifest.json# Ingest specific warehouse
ktx ingest --connection production
# Search scoped to connection
ktx sl "revenue" --connection productionktx status
# If shows "ktx mcp start --project-dir ...", copy and run that commandls ktx.yaml
# If missing, run: ktx setupcat ~/Library/Application\ Support/Claude/claude_desktop_config.json
# Ensure path is absolute, not relative# Test connection directly
ktx connections test warehouse
# Common fixes:
# - Verify environment variables are set
# - Check firewall/VPN for warehouse access
# - Ensure user has SELECT permissions
# - For Snowflake, verify warehouse is running# Enable verbose logging
ktx ingest --verbose
# Check specific source
ktx ingest --source dbt_main --verbose
# Validate configuration
ktx validate
# Common issues:
# - dbt manifest/catalog paths incorrect
# - Missing environment variables
# - LLM API rate limits (retry with backoff)# List all metrics
ktx sl "*" --type metric
# Ensure YAML is valid
cat semantic-layer/warehouse/metrics.yaml
ktx validate# Check join definitions
cat semantic-layer/warehouse/joins.yaml
# Common issues:
# - Missing join between tables
# - Ambiguous join paths (fan trap)
# - Incorrect column namesktx mcp stop
ktx mcp startktx ingest --forcektx sl "your search term"
ktx wiki "your search term"
# If empty, context may not have been ingestedimport { KtxProject } from '@kaelio/ktx';
// Load project
const project = await KtxProject.load('/path/to/project');
// Search semantic layer
const metrics = await project.searchSemanticLayer('revenue', {
type: 'metric',
limit: 10
});
// Search wiki
const wikiPages = await project.searchWiki('refund policy', {
scope: 'global',
limit: 5
});
// Get entity details
const metric = await project.describeEntity('metric', 'monthly_recurring_revenue');
// Execute ingestion
await project.ingest({
connectionId: 'warehouse',
force: false
});semantic-layer/wiki/global/ktx.yamlktx ingestktx connections testwiki/user/llm:
provider: vertex
project_id: ${GCP_PROJECT_ID}
location: us-central1
model: claude-3-5-sonnet@20241022
max_tokens: 8192
temperature: 0.0embeddings:
provider: openai
model: text-embedding-3-large
dimensions: 1536
batch_size: 100# Cron example: Daily at 2 AM
0 2 * * * cd /path/to/project && ktx ingest --connection warehouse >> /var/log/ktx.log 2>&1from ktx_sl import SemanticLayer
# Load semantic layer
sl = SemanticLayer.from_project("/path/to/project")
# Build query
query = sl.query(
metrics=["monthly_recurring_revenue"],
dimensions=["customer_segment", "signup_month"],
filters=[
{"dimension": "signup_date", "operator": ">=", "value": "2025-01-01"}
],
order_by=[{"metric": "monthly_recurring_revenue", "desc": True}]
)
# Get SQL
sql = query.to_sql()
print(sql)