ai-ml-development

Compare original and translation side by side

🇺🇸

Original

English
🇨🇳

Translation

Chinese

AI & Machine Learning Development

人工智能与机器学习开发

Comprehensive guide for building AI/ML systems from prototyping to production.
从原型到生产构建AI/ML系统的综合指南。

Frameworks Overview

框架概述

FrameworkBest ForEcosystem
PyTorchResearch, flexibilityHugging Face, Lightning
TensorFlowProduction, mobileTFX, TF Lite, TF.js
JAXHigh-performance, TPUsFlax, Optax
scikit-learnClassical MLSimple, batteries-included

框架适用场景生态系统
PyTorch研究、灵活性优先Hugging Face, Lightning
TensorFlow生产环境、移动开发TFX, TF Lite, TF.js
JAX高性能、TPU适配Flax, Optax
scikit-learn经典机器学习简洁易用、功能完备

PyTorch

PyTorch

Model Definition

模型定义

python
import torch
import torch.nn as nn
import torch.nn.functional as F

class ConvNet(nn.Module):
    def __init__(self, num_classes: int = 10):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(64 * 8 * 8, 256)
        self.fc2 = nn.Linear(256, num_classes)
        self.dropout = nn.Dropout(0.5)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 64 * 8 * 8)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        return self.fc2(x)
python
import torch
import torch.nn as nn
import torch.nn.functional as F

class ConvNet(nn.Module):
    def __init__(self, num_classes: int = 10):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(64 * 8 * 8, 256)
        self.fc2 = nn.Linear(256, num_classes)
        self.dropout = nn.Dropout(0.5)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 64 * 8 * 8)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        return self.fc2(x)

Training Loop

训练循环

python
from torch.utils.data import DataLoader
from torch.optim import AdamW
from tqdm import tqdm

def train_model(
    model: nn.Module,
    train_loader: DataLoader,
    val_loader: DataLoader,
    epochs: int = 10,
    lr: float = 1e-3,
    device: str = "cuda"
) -> dict:
    model = model.to(device)
    optimizer = AdamW(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()

    for epoch in range(epochs):
        model.train()
        for batch in tqdm(train_loader):
            inputs, labels = batch[0].to(device), batch[1].to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

        # Validation
        model.eval()
        correct = total = 0
        with torch.no_grad():
            for batch in val_loader:
                inputs, labels = batch[0].to(device), batch[1].to(device)
                outputs = model(inputs)
                _, predicted = outputs.max(1)
                total += labels.size(0)
                correct += predicted.eq(labels).sum().item()

        print(f"Epoch {epoch+1}: Val Acc {100.*correct/total:.2f}%")
python
from torch.utils.data import DataLoader
from torch.optim import AdamW
from tqdm import tqdm

def train_model(
    model: nn.Module,
    train_loader: DataLoader,
    val_loader: DataLoader,
    epochs: int = 10,
    lr: float = 1e-3,
    device: str = "cuda"
) -> dict:
    model = model.to(device)
    optimizer = AdamW(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()

    for epoch in range(epochs):
        model.train()
        for batch in tqdm(train_loader):
            inputs, labels = batch[0].to(device), batch[1].to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

        # 验证环节
        model.eval()
        correct = total = 0
        with torch.no_grad():
            for batch in val_loader:
                inputs, labels = batch[0].to(device), batch[1].to(device)
                outputs = model(inputs)
                _, predicted = outputs.max(1)
                total += labels.size(0)
                correct += predicted.eq(labels).sum().item()

        print(f"第 {epoch+1} 轮训练: 验证准确率 {100.*correct/total:.2f}%")

PyTorch Lightning

PyTorch Lightning

python
import pytorch_lightning as pl
from torchmetrics import Accuracy

class LitModel(pl.LightningModule):
    def __init__(self, model: nn.Module, lr: float = 1e-3):
        super().__init__()
        self.model = model
        self.lr = lr
        self.criterion = nn.CrossEntropyLoss()
        self.accuracy = Accuracy(task="multiclass", num_classes=10)

    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self.model(x)
        loss = self.criterion(logits, y)
        self.log("train_loss", loss)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self.model(x)
        self.accuracy(logits, y)
        self.log("val_acc", self.accuracy)

    def configure_optimizers(self):
        return AdamW(self.parameters(), lr=self.lr)

python
import pytorch_lightning as pl
from torchmetrics import Accuracy

class LitModel(pl.LightningModule):
    def __init__(self, model: nn.Module, lr: float = 1e-3):
        super().__init__()
        self.model = model
        self.lr = lr
        self.criterion = nn.CrossEntropyLoss()
        self.accuracy = Accuracy(task="multiclass", num_classes=10)

    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self.model(x)
        loss = self.criterion(logits, y)
        self.log("train_loss", loss)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self.model(x)
        self.accuracy(logits, y)
        self.log("val_acc", self.accuracy)

    def configure_optimizers(self):
        return AdamW(self.parameters(), lr=self.lr)

Hugging Face Transformers

Hugging Face Transformers

Text Classification

文本分类

python
from transformers import (
    AutoModelForSequenceClassification,
    AutoTokenizer,
    TrainingArguments,
    Trainer,
)
from datasets import load_dataset

model_name = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

dataset = load_dataset("imdb")

def preprocess(examples):
    return tokenizer(examples["text"], truncation=True, padding="max_length")

tokenized = dataset.map(preprocess, batched=True)

trainer = Trainer(
    model=model,
    args=TrainingArguments(
        output_dir="./results",
        num_train_epochs=3,
        per_device_train_batch_size=16,
        evaluation_strategy="epoch",
    ),
    train_dataset=tokenized["train"],
    eval_dataset=tokenized["test"],
)

trainer.train()
python
from transformers import (
    AutoModelForSequenceClassification,
    AutoTokenizer,
    TrainingArguments,
    Trainer,
)
from datasets import load_dataset

model_name = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

dataset = load_dataset("imdb")

def preprocess(examples):
    return tokenizer(examples["text"], truncation=True, padding="max_length")

tokenized = dataset.map(preprocess, batched=True)

trainer = Trainer(
    model=model,
    args=TrainingArguments(
        output_dir="./results",
        num_train_epochs=3,
        per_device_train_batch_size=16,
        evaluation_strategy="epoch",
    ),
    train_dataset=tokenized["train"],
    eval_dataset=tokenized["test"],
)

trainer.train()

Fine-Tuning LLMs with LoRA

使用LoRA微调大语言模型

python
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import LoraConfig, get_peft_model, TaskType
from trl import SFTTrainer

model_name = "meta-llama/Llama-2-7b-hf"
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    load_in_4bit=True,
    device_map="auto",
)
tokenizer = AutoTokenizer.from_pretrained(model_name)

lora_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    r=16,
    lora_alpha=32,
    lora_dropout=0.1,
    target_modules=["q_proj", "v_proj"],
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

python
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import LoraConfig, get_peft_model, TaskType
from trl import SFTTrainer

model_name = "meta-llama/Llama-2-7b-hf"
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    load_in_4bit=True,
    device_map="auto",
)
tokenizer = AutoTokenizer.from_pretrained(model_name)

lora_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    r=16,
    lora_alpha=32,
    lora_dropout=0.1,
    target_modules=["q_proj", "v_proj"],
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

LLM Integration

LLM集成

OpenAI API

OpenAI API

python
from openai import OpenAI

client = OpenAI()

def chat_completion(messages: list[dict], model: str = "gpt-4") -> str:
    response = client.chat.completions.create(
        model=model,
        messages=messages,
    )
    return response.choices[0].message.content
python
from openai import OpenAI

client = OpenAI()

def chat_completion(messages: list[dict], model: str = "gpt-4") -> str:
    response = client.chat.completions.create(
        model=model,
        messages=messages,
    )
    return response.choices[0].message.content

Function calling

函数调用

def extract_entities(text: str) -> dict: response = client.chat.completions.create( model="gpt-4", messages=[{"role": "user", "content": text}], tools=[{ "type": "function", "function": { "name": "extract_entities", "parameters": { "type": "object", "properties": { "people": {"type": "array", "items": {"type": "string"}}, "places": {"type": "array", "items": {"type": "string"}}, }, }, }, }], ) return response.choices[0].message.tool_calls[0].function.arguments
undefined
def extract_entities(text: str) -> dict: response = client.chat.completions.create( model="gpt-4", messages=[{"role": "user", "content": text}], tools=[{ "type": "function", "function": { "name": "extract_entities", "parameters": { "type": "object", "properties": { "people": {"type": "array", "items": {"type": "string"}}, "places": {"type": "array", "items": {"type": "string"}}, }, }, }, }], ) return response.choices[0].message.tool_calls[0].function.arguments
undefined

Anthropic Claude API

Anthropic Claude API

python
import anthropic

client = anthropic.Anthropic()

def claude_completion(prompt: str, model: str = "claude-3-sonnet-20240229") -> str:
    message = client.messages.create(
        model=model,
        max_tokens=1024,
        messages=[{"role": "user", "content": prompt}],
    )
    return message.content[0].text
python
import anthropic

client = anthropic.Anthropic()

def claude_completion(prompt: str, model: str = "claude-3-sonnet-20240229") -> str:
    message = client.messages.create(
        model=model,
        max_tokens=1024,
        messages=[{"role": "user", "content": prompt}],
    )
    return message.content[0].text

LangChain

LangChain

python
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

llm = ChatOpenAI(model="gpt-4")
prompt = ChatPromptTemplate.from_template("Summarize: {text}")
chain = prompt | llm | StrOutputParser()

result = chain.invoke({"text": "Long document here..."})

python
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

llm = ChatOpenAI(model="gpt-4")
prompt = ChatPromptTemplate.from_template("总结内容: {text}")
chain = prompt | llm | StrOutputParser()

result = chain.invoke({"text": "此处为长文档内容..."})

Vector Databases

向量数据库

Pinecone

Pinecone

python
from pinecone import Pinecone

pc = Pinecone(api_key="xxx")
index = pc.Index("my-index")
python
from pinecone import Pinecone

pc = Pinecone(api_key="xxx")
index = pc.Index("my-index")

Upsert vectors

上传向量

index.upsert(vectors=[ {"id": "1", "values": [0.1, 0.2], "metadata": {"text": "..."}}, ])
index.upsert(vectors=[ {"id": "1", "values": [0.1, 0.2], "metadata": {"text": "..."}}, ])

Query

查询向量

results = index.query(vector=[0.1, 0.2], top_k=5, include_metadata=True)
undefined
results = index.query(vector=[0.1, 0.2], top_k=5, include_metadata=True)
undefined

ChromaDB (Local)

ChromaDB(本地部署)

python
import chromadb

client = chromadb.PersistentClient(path="./chroma_db")
collection = client.get_or_create_collection(name="documents")

collection.add(
    documents=["Doc 1", "Doc 2"],
    ids=["doc1", "doc2"],
)

results = collection.query(query_texts=["search query"], n_results=5)

python
import chromadb

client = chromadb.PersistentClient(path="./chroma_db")
collection = client.get_or_create_collection(name="documents")

collection.add(
    documents=["文档1", "文档2"],
    ids=["doc1", "doc2"],
)

results = collection.query(query_texts=["搜索关键词"], n_results=5)

MLOps

MLOps

Model Registry (MLflow)

模型注册(MLflow)

python
import mlflow

mlflow.set_experiment("my-experiment")

with mlflow.start_run():
    mlflow.log_params({"lr": 0.001, "epochs": 10})
    mlflow.log_metrics({"accuracy": 0.95})
    mlflow.pytorch.log_model(model, "model")
python
import mlflow

mlflow.set_experiment("我的实验项目")

with mlflow.start_run():
    mlflow.log_params({"学习率": 0.001, "训练轮数": 10})
    mlflow.log_metrics({"准确率": 0.95})
    mlflow.pytorch.log_model(model, "model")

Model Serving (FastAPI)

模型部署(FastAPI)

python
from fastapi import FastAPI
from pydantic import BaseModel
import torch

app = FastAPI()
model = torch.load("model.pt")
model.eval()

class PredictionRequest(BaseModel):
    features: list[float]

@app.post("/predict")
async def predict(request: PredictionRequest):
    with torch.no_grad():
        tensor = torch.tensor([request.features])
        output = model(tensor)
        return {"prediction": output.argmax().item()}

python
from fastapi import FastAPI
from pydantic import BaseModel
import torch

app = FastAPI()
model = torch.load("model.pt")
model.eval()

class PredictionRequest(BaseModel):
    features: list[float]

@app.post("/predict")
async def predict(request: PredictionRequest):
    with torch.no_grad():
        tensor = torch.tensor([request.features])
        output = model(tensor)
        return {"prediction": output.argmax().item()}

Best Practices

最佳实践

Training

训练环节

  • Experiment tracking (MLflow, W&B)
  • Mixed precision training
  • Gradient accumulation
  • Early stopping
  • Learning rate scheduling
  • 实验追踪(MLflow, W&B)
  • 混合精度训练
  • 梯度累积
  • 早停机制
  • 学习率调度

Deployment

部署环节

  • Model versioning
  • A/B testing
  • Monitoring for drift
  • Fallback mechanisms

  • 模型版本管理
  • A/B测试
  • 模型漂移监控
  • 降级 fallback 机制

LLM Application Development

LLM应用开发

RAG Architecture (Retrieval-Augmented Generation)

RAG架构(检索增强生成)

python
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_community.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
python
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_community.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough

1. Load and chunk documents

1. 加载并切分文档

text_splitter = RecursiveCharacterTextSplitter( chunk_size=1000, chunk_overlap=200, separators=["\n\n", "\n", ". ", " "], ) chunks = text_splitter.split_documents(documents)
text_splitter = RecursiveCharacterTextSplitter( chunk_size=1000, chunk_overlap=200, separators=["\n\n", "\n", ". ", " "], ) chunks = text_splitter.split_documents(documents)

2. Embed and store in vector database

2. 生成嵌入并存储到向量数据库

embeddings = OpenAIEmbeddings(model="text-embedding-3-small") vectorstore = Chroma.from_documents(chunks, embeddings, persist_directory="./db")
embeddings = OpenAIEmbeddings(model="text-embedding-3-small") vectorstore = Chroma.from_documents(chunks, embeddings, persist_directory="./db")

3. Create retrieval chain

3. 创建检索链

retriever = vectorstore.as_retriever( search_type="mmr", # Maximal Marginal Relevance search_kwargs={"k": 5}, )
prompt = ChatPromptTemplate.from_template(""" Answer based on the following context. If the answer is not in the context, say so.
Context: {context}
Question: {question} """)
chain = ( {"context": retriever, "question": RunnablePassthrough()} | prompt | ChatOpenAI(model="gpt-4o") )
result = chain.invoke("What is the refund policy?")
undefined
retriever = vectorstore.as_retriever( search_type="mmr", # 最大边际相关性 search_kwargs={"k": 5}, )
prompt = ChatPromptTemplate.from_template(""" 基于以下上下文回答问题。如果答案不在上下文中,请直接说明。
上下文: {context}
问题: {question} """)
chain = ( {"context": retriever, "question": RunnablePassthrough()} | prompt | ChatOpenAI(model="gpt-4o") )
result = chain.invoke("退款政策是什么?")
undefined

Vector Databases

向量数据库对比

DatabaseTypeBest For
pgvectorPostgreSQL extensionExisting Postgres, hybrid queries
PineconeManaged cloudProduction scale, serverless
ChromaLocal/embeddedPrototyping, small-medium datasets
WeaviateSelf-hosted/cloudMultimodal, GraphQL interface
QdrantSelf-hosted/cloudHigh performance, filtering
python
undefined
数据库类型适用场景
pgvectorPostgreSQL扩展已有Postgres环境、混合查询场景
Pinecone托管云服务生产级规模、无服务器架构
Chroma本地/嵌入式原型开发、中小规模数据集
Weaviate自托管/云服务多模态数据、GraphQL接口支持
Qdrant自托管/云服务高性能、高级过滤需求
python
undefined

pgvector with SQLAlchemy

结合SQLAlchemy使用pgvector

from pgvector.sqlalchemy import Vector
class Document(Base): tablename = "documents" id = Column(Integer, primary_key=True) content = Column(Text) embedding = Column(Vector(1536)) # OpenAI embedding dimension
from pgvector.sqlalchemy import Vector
class Document(Base): tablename = "documents" id = Column(Integer, primary_key=True) content = Column(Text) embedding = Column(Vector(1536)) # OpenAI嵌入向量维度

Similarity search

相似度搜索

from sqlalchemy import text results = session.execute(text(""" SELECT content, embedding <=> :query_embedding AS distance FROM documents ORDER BY embedding <=> :query_embedding LIMIT 5 """), {"query_embedding": str(query_vector)})
undefined
from sqlalchemy import text results = session.execute(text(""" SELECT content, embedding <=> :query_embedding AS distance FROM documents ORDER BY embedding <=> :query_embedding LIMIT 5 """), {"query_embedding": str(query_vector)})
undefined

Prompt Engineering Patterns

提示词工程模式

python
undefined
python
undefined

System prompt pattern

系统提示词模式

SYSTEM_PROMPT = """You are a helpful assistant that answers questions about {domain}.
Rules:
  • Only answer based on provided context
  • If uncertain, say "I don't know"
  • Cite sources when possible
  • Be concise and factual """
SYSTEM_PROMPT = """你是一个专注于{domain}领域的助手。
规则:
  • 仅基于提供的上下文回答
  • 不确定时请回答“我不知道”
  • 尽可能引用来源
  • 回答简洁、准确 """

Few-shot prompting

少样本提示词

FEW_SHOT_PROMPT = """ Classify the sentiment of the following text.
Text: "The product arrived on time and works perfectly!" Sentiment: positive
Text: "Terrible customer service, waited 3 hours." Sentiment: negative
Text: "{user_input}" Sentiment:"""
FEW_SHOT_PROMPT = """ 对以下文本进行情感分类。
文本: "产品按时送达,使用效果完美!" 情感: 正面
文本: "客服态度极差,等待了3小时。" 情感: 负面
文本: "{user_input}" 情感:"""

Chain-of-thought prompting

思维链提示词

COT_PROMPT = """ Solve step by step:
  1. Identify the key information
  2. Break down the problem
  3. Work through each step
  4. Provide the final answer
Problem: {problem} """
undefined
COT_PROMPT = """ 请分步解决问题:
  1. 识别关键信息
  2. 拆解问题
  3. 逐步推导
  4. 给出最终答案
问题: {problem} """
undefined

Structured Outputs

结构化输出

python
undefined
python
undefined

Anthropic Claude structured output

Anthropic Claude结构化输出

import anthropic from pydantic import BaseModel
class ExtractedEntity(BaseModel): name: str type: str # person, org, location confidence: float
class ExtractionResult(BaseModel): entities: list[ExtractedEntity] summary: str
client = anthropic.Anthropic() message = client.messages.create( model="claude-sonnet-4-20250514", max_tokens=1024, messages=[{"role": "user", "content": f"Extract entities from: {text}"}], # Claude supports tool_use for structured output tools=[{ "name": "extract_entities", "description": "Extract named entities from text", "input_schema": ExtractionResult.model_json_schema(), }], tool_choice={"type": "tool", "name": "extract_entities"}, )
import anthropic from pydantic import BaseModel
class ExtractedEntity(BaseModel): name: str type: str # 人物、组织、地点 confidence: float
class ExtractionResult(BaseModel): entities: list[ExtractedEntity] summary: str
client = anthropic.Anthropic() message = client.messages.create( model="claude-sonnet-4-20250514", max_tokens=1024, messages=[{"role": "user", "content": f"从以下文本提取实体: {text}"}], # Claude支持通过tool_use实现结构化输出 tools=[{ "name": "extract_entities", "description": "从文本中提取命名实体", "input_schema": ExtractionResult.model_json_schema(), }], tool_choice={"type": "tool", "name": "extract_entities"}, )

OpenAI structured output

OpenAI结构化输出

from openai import OpenAI
client = OpenAI() response = client.beta.chat.completions.parse( model="gpt-4o", messages=[{"role": "user", "content": f"Extract entities from: {text}"}], response_format=ExtractionResult, ) result = response.choices[0].message.parsed
undefined
from openai import OpenAI
client = OpenAI() response = client.beta.chat.completions.parse( model="gpt-4o", messages=[{"role": "user", "content": f"从以下文本提取实体: {text}"}], response_format=ExtractionResult, ) result = response.choices[0].message.parsed
undefined

Tool Use / Function Calling

工具调用/函数调用

python
undefined
python
undefined

Claude tool use

Claude工具调用

tools = [ { "name": "search_database", "description": "Search the product database", "input_schema": { "type": "object", "properties": { "query": {"type": "string", "description": "Search query"}, "category": {"type": "string", "enum": ["electronics", "clothing", "books"]}, }, "required": ["query"], }, }, { "name": "get_weather", "description": "Get current weather for a location", "input_schema": { "type": "object", "properties": { "location": {"type": "string"}, }, "required": ["location"], }, }, ]
tools = [ { "name": "search_database", "description": "搜索产品数据库", "input_schema": { "type": "object", "properties": { "query": {"type": "string", "description": "搜索关键词"}, "category": {"type": "string", "enum": ["electronics", "clothing", "books"]}, }, "required": ["query"], }, }, { "name": "get_weather", "description": "获取指定地点的当前天气", "input_schema": { "type": "object", "properties": { "location": {"type": "string"}, }, "required": ["location"], }, }, ]

Agentic loop: call LLM, execute tools, feed results back

Agent循环: 调用LLM、执行工具、反馈结果

while True: response = client.messages.create( model="claude-sonnet-4-20250514", messages=messages, tools=tools, )
if response.stop_reason == "end_turn":
    break

# Execute tool calls
for block in response.content:
    if block.type == "tool_use":
        result = execute_tool(block.name, block.input)
        messages.append({"role": "assistant", "content": response.content})
        messages.append({
            "role": "user",
            "content": [{"type": "tool_result", "tool_use_id": block.id, "content": str(result)}],
        })
undefined
while True: response = client.messages.create( model="claude-sonnet-4-20250514", messages=messages, tools=tools, )
if response.stop_reason == "end_turn":
    break

# 执行工具调用
for block in response.content:
    if block.type == "tool_use":
        result = execute_tool(block.name, block.input)
        messages.append({"role": "assistant", "content": response.content})
        messages.append({
            "role": "user",
            "content": [{"type": "tool_result", "tool_use_id": block.id, "content": str(result)}],
        })
undefined

Claude API / Anthropic SDK Patterns

Claude API/Anthropic SDK使用模式

python
import anthropic

client = anthropic.Anthropic()
python
import anthropic

client = anthropic.Anthropic()

Basic message

基础消息调用

response = client.messages.create( model="claude-sonnet-4-20250514", max_tokens=4096, system="You are a helpful coding assistant.", messages=[ {"role": "user", "content": "Explain async/await in Python"}, ], )
response = client.messages.create( model="claude-sonnet-4-20250514", max_tokens=4096, system="你是一个乐于助人的编程助手。", messages=[ {"role": "user", "content": "解释Python中的async/await"}, ], )

Streaming

流式输出

with client.messages.stream( model="claude-sonnet-4-20250514", max_tokens=4096, messages=[{"role": "user", "content": prompt}], ) as stream: for text in stream.text_stream: print(text, end="", flush=True)
with client.messages.stream( model="claude-sonnet-4-20250514", max_tokens=4096, messages=[{"role": "user", "content": prompt}], ) as stream: for text in stream.text_stream: print(text, end="", flush=True)

Vision (image input)

视觉能力(图片输入)

import base64
with open("screenshot.png", "rb") as f: image_data = base64.standard_b64encode(f.read()).decode()
response = client.messages.create( model="claude-sonnet-4-20250514", max_tokens=1024, messages=[{ "role": "user", "content": [ {"type": "image", "source": {"type": "base64", "media_type": "image/png", "data": image_data}}, {"type": "text", "text": "Describe this UI and suggest improvements"}, ], }], )
undefined
import base64
with open("screenshot.png", "rb") as f: image_data = base64.standard_b64encode(f.read()).decode()
response = client.messages.create( model="claude-sonnet-4-20250514", max_tokens=1024, messages=[{ "role": "user", "content": [ {"type": "image", "source": {"type": "base64", "media_type": "image/png", "data": image_data}}, {"type": "text", "text": "描述这个UI并提出改进建议"}, ], }], )
undefined

LangChain / LlamaIndex

LangChain / LlamaIndex

python
undefined
python
undefined

LangChain LCEL (LangChain Expression Language)

LangChain LCEL(LangChain表达式语言)

from langchain_anthropic import ChatAnthropic from langchain_core.prompts import ChatPromptTemplate from langchain_core.output_parsers import StrOutputParser
llm = ChatAnthropic(model="claude-sonnet-4-20250514")
chain = ( ChatPromptTemplate.from_messages([ ("system", "You are a helpful assistant."), ("user", "{input}"), ]) | llm | StrOutputParser() )
from langchain_anthropic import ChatAnthropic from langchain_core.prompts import ChatPromptTemplate from langchain_core.output_parsers import StrOutputParser
llm = ChatAnthropic(model="claude-sonnet-4-20250514")
chain = ( ChatPromptTemplate.from_messages([ ("system", "你是一个乐于助人的助手。"), ("user", "{input}"), ]) | llm | StrOutputParser() )

LlamaIndex for document Q&A

使用LlamaIndex实现文档问答

from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
documents = SimpleDirectoryReader("data/").load_data() index = VectorStoreIndex.from_documents(documents) query_engine = index.as_query_engine() response = query_engine.query("What are the key findings?")
undefined
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
documents = SimpleDirectoryReader("data/").load_data() index = VectorStoreIndex.from_documents(documents) query_engine = index.as_query_engine() response = query_engine.query("核心结论有哪些?")
undefined

Evaluation Frameworks

评估框架

python
undefined
python
undefined

RAGAS for RAG evaluation

使用RAGAS评估RAG系统

from ragas import evaluate from ragas.metrics import faithfulness, answer_relevancy, context_precision
result = evaluate( dataset=eval_dataset, metrics=[faithfulness, answer_relevancy, context_precision], ) print(result)
from ragas import evaluate from ragas.metrics import faithfulness, answer_relevancy, context_precision
result = evaluate( dataset=eval_dataset, metrics=[faithfulness, answer_relevancy, context_precision], ) print(result)

LangSmith for tracing and evaluation

使用LangSmith进行追踪和评估

import langsmith
client = langsmith.Client()
import langsmith
client = langsmith.Client()

Traces are automatically captured when LANGCHAIN_TRACING_V2=true

当LANGCHAIN_TRACING_V2=true时会自动捕获追踪数据

Custom evaluation

自定义评估函数

def evaluate_response(prediction: str, reference: str) -> dict: """Score response quality.""" # Use LLM-as-judge pattern judge_prompt = f"""Rate the following response on a scale of 1-5: Reference: {reference} Response: {prediction} Score (1-5):""" score = llm.invoke(judge_prompt) return {"score": int(score.content.strip())}
undefined
def evaluate_response(prediction: str, reference: str) -> dict: """评估回答质量。""" # 使用LLM作为评估者的模式 judge_prompt = f"""请对以下回答按1-5分评分: 参考回答: {reference} 模型回答: {prediction} 评分(1-5):""" score = llm.invoke(judge_prompt) return {"score": int(score.content.strip())}
undefined

LLM App Architecture Patterns

LLM应用架构模式

PatternUse Case
RAGQ&A over documents, knowledge bases
AgentMulti-step tasks requiring tool use
Chain-of-ThoughtComplex reasoning, math, logic
Map-ReduceSummarizing long documents
RouterDirecting queries to specialized handlers
ReflectionSelf-correcting outputs
Multi-AgentCollaborative problem solving
模式适用场景
RAG文档问答、知识库查询
Agent需要工具调用的多步骤任务
思维链复杂推理、数学计算、逻辑问题
Map-Reduce长文档总结
路由模式将查询导向专业处理模块
反思模式自我修正输出结果
多Agent协作协同解决复杂问题