Loading...
Loading...
Specialized AI assistant for DSPy development with deep knowledge of predictors, optimizers, adapters, and GEPA integration. Provides session management, codebase indexing, and command-based workflows.
npx skill4agent add zpankz/mcp-skillset dspy-codePredictChainOfThoughtChainOfThoughtWithHintProgramOfThoughtReActMultiChainComparisonRetrieveTypedPredictorEnsemblemajorityBootstrapFewShotBootstrapFewShotWithRandomSearchBootstrapFewShotWithOptunaCOPROMIPROMIPROv2BetterTogetherEnsembleKNNFewShotLabeledFewShotSignatureOptimizerChatAdapterJSONAdapterFunctionAdapterImageAdapterfrom dspy.gepa import GEPA
gepa = GEPA(
metric=accuracy,
population_size=10,
generations=20,
mutation_rate=0.3,
crossover_rate=0.7
)
result = gepa.optimize(
seed_prompt="question -> answer",
training_examples=trainset,
budget=100 # Max LLM calls
)session = {
'id': 'session_123',
'workspace': '/path/to/project',
'created_at': '2024-01-15T10:30:00Z',
'modules': [...],
'optimizers': [...],
'datasets': [...],
'metrics': [...]
}interface CodebaseIndex {
workspace: string;
indexed_at: string;
modules: Array<{
path: string;
name: string;
signature?: string;
type: string;
}>;
signatures: Array<{
path: string;
definition: string;
}>;
metrics: Array<{
path: string;
name: string;
type: MetricType;
}>;
}┌──────────────┐
│ /init │ Initialize project structure
└──────┬───────┘
│
▼
┌──────────────┐
│ Design │ Define signatures and modules
└──────┬───────┘
│
▼
┌──────────────┐
│ Implement │ Write forward() methods
└──────┬───────┘
│
▼
┌──────────────┐
│ /validate │ Check correctness
└──────────────┘/init <project_name>/connect/demo <template>/validate <file>┌──────────────┐
│ Data │ Prepare training/dev/test sets
└──────┬───────┘
│
▼
┌──────────────┐
│ Metric │ Define evaluation function
└──────┬───────┘
│
▼
┌──────────────┐
│ /optimize │ Compile with optimizer
└──────┬───────┘
│
▼
┌──────────────┐
│ Evaluate │ Test on dev/test sets
└──────┬───────┘
│
▼
┌──────────────┐
│ /export │ Save optimized program
└──────────────┘/optimize <module>/evaluate <module>/export <format>/init <project_name>project_name/
├── modules/ # DSPy modules
├── data/ # Training/dev/test datasets
├── metrics/ # Custom metrics
├── optimized/ # Saved optimized programs
├── tests/ # Unit tests
└── config.py # Configuration--template <name>--lm <model>--retrieval/connect/demo <template>simple-qaragmulti-hoptyped-outputclassificationagentensembleself-refininghinted-qaprogram-of-thoughtchatbotdata-pipeline--with-optimization--with-tests--output <path>/optimize <module>--optimizer <type>--budget <N>--metric <name>--no-save--use-gepa/validate <file>{
valid: boolean;
errors: string[]; // Critical issues
warnings: string[]; // Non-critical issues
suggestions: string[]; // Improvement suggestions
}/export <format>pythonjsonyamlmarkdown--include-examples--include-tests--minify# DSPy 2.4 (old)
optimizer = dspy.BootstrapFewShot(
metric=metric,
max_bootstrapped_demos=4
)
# DSPy 2.5+ (new)
optimizer = dspy.BootstrapFewShot(
metric=metric,
max_bootstrapped_demos=4,
max_labeled_demos=8 # New parameter
)import dspy
class QAModule(dspy.Module):
"""Question answering with chain of thought."""
def __init__(self):
super().__init__()
self.qa = dspy.ChainOfThought("question -> answer")
def forward(self, question):
return self.qa(question=question)
# Usage
lm = dspy.OpenAI(model="gpt-3.5-turbo")
dspy.settings.configure(lm=lm)
qa = QAModule()
result = qa(question="What is DSPy?")
print(result.answer)/validate/optimize# 1. Define metric
def accuracy(example, prediction, trace=None):
return example.answer.lower() in prediction.answer.lower()
# 2. Prepare data
trainset = [
dspy.Example(question="...", answer="...").with_inputs("question")
for ... in data
]
# 3. Choose optimizer (MIPRO for 100+ examples)
optimizer = dspy.MIPRO(
metric=accuracy,
prompt_model=dspy.OpenAI("gpt-4"),
task_model=dspy.OpenAI("gpt-3.5-turbo"),
num_trials=20,
minibatch_size=50
)
# 4. Compile
optimized_rag = optimizer.compile(rag_module, trainset=trainset)
# 5. Evaluate
from dspy.evaluate import Evaluate
evaluator = Evaluate(devset=devset, metric=accuracy)
score = evaluator(optimized_rag)
print(f"Accuracy: {score:.2%}")
# 6. Save
optimized_rag.save('rag_optimized.json')from dspy.gepa import GEPA
# 1. Define task
seed_prompt = "question -> answer"
training_examples = trainset[:50]
# 2. Configure GEPA
gepa = GEPA(
metric=accuracy,
population_size=10,
generations=20,
mutation_rate=0.3,
crossover_rate=0.7
)
# 3. Optimize
result = gepa.optimize(
seed_prompt=seed_prompt,
training_examples=training_examples,
budget=100 # Max 100 LLM calls
)
# 4. Use optimized prompt
print(f"Best prompt: {result.best_prompt}")
print(f"Score: {result.best_score:.2%}")
# 5. Create module with optimized prompt
class OptimizedQA(dspy.Module):
def __init__(self):
super().__init__()
self.qa = dspy.ChainOfThought(result.best_prompt)
def forward(self, question):
return self.qa(question=question)# Good: Start simple
self.qa = dspy.ChainOfThought("question -> answer")
# Bad: Overengineering
self.qa = dspy.Ensemble([
dspy.ChainOfThought(...),
dspy.ProgramOfThought(...),
dspy.ReAct(...)
]) # Too complex for iteration# Iterate quickly with 10 examples
quick_optimizer = dspy.BootstrapFewShot(metric=accuracy)
quick_test = quick_optimizer.compile(module, trainset=trainset[:10])
# Then scale to full dataset
full_optimizer = dspy.MIPRO(metric=accuracy)
production = full_optimizer.compile(module, trainset=full_trainset)# Log all predictions
def predict_with_logging(module, input):
prediction = module(input=input)
log_prediction(input, prediction, timestamp=datetime.now())
return prediction# Save with version
version = "v1.2.3"
optimized.save(f'models/qa_{version}.json')
# Track performance
performance_log = {
'version': version,
'dev_score': dev_score,
'test_score': test_score,
'optimizer': 'MIPRO',
'timestamp': datetime.now().isoformat()
}
save_performance_log(performance_log)# Good: Single responsibility
class Retriever(dspy.Module):
def forward(self, query):
return self.retrieve(query)
class Generator(dspy.Module):
def forward(self, context, question):
return self.generate(context=context, question=question)
class RAG(dspy.Module):
def __init__(self):
self.retriever = Retriever()
self.generator = Generator()
def forward(self, question):
context = self.retriever(query=question)
return self.generator(context=context, question=question)import unittest
class TestQAModule(unittest.TestCase):
def setUp(self):
self.qa = QAModule()
def test_basic_question(self):
result = self.qa(question="What is 2+2?")
self.assertIsNotNone(result.answer)
def test_complex_question(self):
result = self.qa(question="Explain quantum computing")
self.assertTrue(len(result.answer) > 50)max_bootstrapped_demosnum_trialsbudgetnum_threads=4"input1, input2 -> output1, output2"def forward(self, input: str) -> dspy.Predictionclass MyModule(dspy.Module)__init__()population_sizegenerations/Users/mikhail/Downloads/architect/dspy-code-codebase