Loading...
Loading...
Reduce your AI API bill. Use when AI costs are too high, API calls are too expensive, you want to use cheaper models, optimize token usage, reduce LLM spending, route easy questions to cheap models, or make your AI feature more cost-effective. Covers DSPy cost optimization — cheaper models, smart routing, per-module LMs, fine-tuning, caching, and prompt reduction.
npx skill4agent add lebsral/dspy-programming-not-prompting-lms-skills ai-cutting-costsimport dspy
# Run your program and check token usage
lm = dspy.LM("openai/gpt-4o-mini")
dspy.configure(lm=lm)
result = my_program(question="test")
dspy.inspect_history(n=3) # Shows token counts per call# Instead of GPT-4o (~$5/M input tokens)
lm = dspy.LM("openai/gpt-4o-mini") # ~$0.15/M input tokens — 33x cheaper
# Or use an open-source model
lm = dspy.LM("together_ai/meta-llama/Llama-3-70b-chat-hf")/ai-improving-accuracy/ai-switching-models# Caching is ON by default — same inputs won't re-call the API
lm = dspy.LM("openai/gpt-4o-mini") # cached automatically
# To verify caching is working, run the same input twice
# and check that the second call is instantdspy.contextset_lmexpensive_lm = dspy.LM("openai/gpt-4o")
cheap_lm = dspy.LM("openai/gpt-4o-mini")
dspy.configure(lm=expensive_lm) # default
class MyPipeline(dspy.Module):
def __init__(self):
self.classify = dspy.ChainOfThought(ClassifySignature)
self.generate = dspy.ChainOfThought(GenerateSignature)
def forward(self, text):
# Use cheap model for simple classification
with dspy.context(lm=cheap_lm):
category = self.classify(text=text)
# Use expensive model only for complex generation
return self.generate(text=text, category=category.label)# Set LM on specific modules permanently
my_program.classify.lm = cheap_lm
my_program.generate.lm = expensive_lmclass ComplexityRouter(dspy.Module):
def __init__(self):
self.assess = dspy.Predict(AssessComplexity)
self.simple_handler = dspy.Predict(AnswerQuestion)
self.complex_handler = dspy.ChainOfThought(AnswerQuestion)
def forward(self, question):
# Use the cheap model to decide complexity
with dspy.context(lm=cheap_lm):
assessment = self.assess(question=question)
# Route to the right model
if assessment.complexity == "simple":
with dspy.context(lm=cheap_lm):
return self.simple_handler(question=question)
else:
with dspy.context(lm=expensive_lm):
return self.complex_handler(question=question)
class AssessComplexity(dspy.Signature):
"""Assess if this question needs a powerful model or a simple one can handle it."""
question: str = dspy.InputField()
complexity: Literal["simple", "complex"] = dspy.OutputField(
desc="simple = factual/straightforward, complex = reasoning/nuanced"
)class CascadingPipeline(dspy.Module):
def __init__(self):
self.answer = dspy.ChainOfThought(AnswerQuestion)
self.verify = dspy.Predict(CheckConfidence)
def forward(self, question):
# Try cheap model first
with dspy.context(lm=cheap_lm):
result = self.answer(question=question)
check = self.verify(question=question, answer=result.answer)
# If cheap model isn't confident, escalate to expensive
if not check.is_confident:
with dspy.context(lm=expensive_lm):
result = self.answer(question=question)
return result
class CheckConfidence(dspy.Signature):
"""Is this answer confident and complete, or should we escalate to a better model?"""
question: str = dspy.InputField()
answer: str = dspy.InputField()
is_confident: bool = dspy.OutputField()# Fewer demos = shorter prompts = lower cost
optimizer = dspy.BootstrapFewShot(
metric=metric,
max_bootstrapped_demos=2, # down from 4
max_labeled_demos=2, # down from 4
)# Fewer passages = shorter context
class DocSearch(dspy.Module):
def __init__(self):
self.retrieve = dspy.Retrieve(k=2) # down from 5
self.answer = dspy.ChainOfThought(AnswerSignature)# Verbose — costs more tokens
class Verbose(dspy.Signature):
"""Given the following text, carefully analyze the content and provide a detailed classification."""
text: str = dspy.InputField(desc="The full text content to be analyzed and classified")
label: str = dspy.OutputField(desc="The classification label for this text")
# Concise — same quality, fewer tokens
class Concise(dspy.Signature):
"""Classify the text."""
text: str = dspy.InputField()
label: str = dspy.OutputField()# Build and optimize with the expensive model, then fine-tune a cheap one
optimizer = dspy.BootstrapFinetune(metric=metric, num_threads=24)
finetuned = optimizer.compile(my_program, trainset=trainset, teacher=teacher_optimized)/ai-fine-tuningPredictChainOfThoughtChainOfThoughtPredict# ChainOfThought — more tokens, better for complex tasks
classifier = dspy.ChainOfThought(ClassifySignature)
# Predict — fewer tokens, fine for simple tasks
classifier = dspy.Predict(ClassifySignature)/ai-improving-accuracyPredictChainOfThought/ai-building-pipelines/ai-improving-accuracy/ai-fixing-errors