ai-ml-development

Compare original and translation side by side

🇺🇸

Original

English

🇨🇳

Translation

Chinese

AI & Machine Learning Development

人工智能与机器学习开发

Comprehensive guide for building AI/ML systems from prototyping to production.

从原型到生产构建AI/ML系统的综合指南。

Frameworks Overview

框架概述

Framework	Best For	Ecosystem
PyTorch	Research, flexibility	Hugging Face, Lightning
TensorFlow	Production, mobile	TFX, TF Lite, TF.js
JAX	High-performance, TPUs	Flax, Optax
scikit-learn	Classical ML	Simple, batteries-included

框架	适用场景	生态系统
PyTorch	研究、灵活性优先	Hugging Face, Lightning
TensorFlow	生产环境、移动开发	TFX, TF Lite, TF.js
JAX	高性能、TPU适配	Flax, Optax
scikit-learn	经典机器学习	简洁易用、功能完备

PyTorch

Model Definition

模型定义

python

import torch
import torch.nn as nn
import torch.nn.functional as F

class ConvNet(nn.Module):
    def __init__(self, num_classes: int = 10):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(64 * 8 * 8, 256)
        self.fc2 = nn.Linear(256, num_classes)
        self.dropout = nn.Dropout(0.5)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 64 * 8 * 8)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        return self.fc2(x)

python

import torch
import torch.nn as nn
import torch.nn.functional as F

class ConvNet(nn.Module):
    def __init__(self, num_classes: int = 10):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(64 * 8 * 8, 256)
        self.fc2 = nn.Linear(256, num_classes)
        self.dropout = nn.Dropout(0.5)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 64 * 8 * 8)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        return self.fc2(x)

Training Loop

训练循环

python

from torch.utils.data import DataLoader
from torch.optim import AdamW
from tqdm import tqdm

def train_model(
    model: nn.Module,
    train_loader: DataLoader,
    val_loader: DataLoader,
    epochs: int = 10,
    lr: float = 1e-3,
    device: str = "cuda"
) -> dict:
    model = model.to(device)
    optimizer = AdamW(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()

    for epoch in range(epochs):
        model.train()
        for batch in tqdm(train_loader):
            inputs, labels = batch[0].to(device), batch[1].to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

        # Validation
        model.eval()
        correct = total = 0
        with torch.no_grad():
            for batch in val_loader:
                inputs, labels = batch[0].to(device), batch[1].to(device)
                outputs = model(inputs)
                _, predicted = outputs.max(1)
                total += labels.size(0)
                correct += predicted.eq(labels).sum().item()

        print(f"Epoch {epoch+1}: Val Acc {100.*correct/total:.2f}%")

python

from torch.utils.data import DataLoader
from torch.optim import AdamW
from tqdm import tqdm

def train_model(
    model: nn.Module,
    train_loader: DataLoader,
    val_loader: DataLoader,
    epochs: int = 10,
    lr: float = 1e-3,
    device: str = "cuda"
) -> dict:
    model = model.to(device)
    optimizer = AdamW(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()

    for epoch in range(epochs):
        model.train()
        for batch in tqdm(train_loader):
            inputs, labels = batch[0].to(device), batch[1].to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

        # 验证环节
        model.eval()
        correct = total = 0
        with torch.no_grad():
            for batch in val_loader:
                inputs, labels = batch[0].to(device), batch[1].to(device)
                outputs = model(inputs)
                _, predicted = outputs.max(1)
                total += labels.size(0)
                correct += predicted.eq(labels).sum().item()

        print(f"第 {epoch+1} 轮训练: 验证准确率 {100.*correct/total:.2f}%")

PyTorch Lightning

python

import pytorch_lightning as pl
from torchmetrics import Accuracy

class LitModel(pl.LightningModule):
    def __init__(self, model: nn.Module, lr: float = 1e-3):
        super().__init__()
        self.model = model
        self.lr = lr
        self.criterion = nn.CrossEntropyLoss()
        self.accuracy = Accuracy(task="multiclass", num_classes=10)

    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self.model(x)
        loss = self.criterion(logits, y)
        self.log("train_loss", loss)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self.model(x)
        self.accuracy(logits, y)
        self.log("val_acc", self.accuracy)

    def configure_optimizers(self):
        return AdamW(self.parameters(), lr=self.lr)

python

import pytorch_lightning as pl
from torchmetrics import Accuracy

class LitModel(pl.LightningModule):
    def __init__(self, model: nn.Module, lr: float = 1e-3):
        super().__init__()
        self.model = model
        self.lr = lr
        self.criterion = nn.CrossEntropyLoss()
        self.accuracy = Accuracy(task="multiclass", num_classes=10)

    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self.model(x)
        loss = self.criterion(logits, y)
        self.log("train_loss", loss)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self.model(x)
        self.accuracy(logits, y)
        self.log("val_acc", self.accuracy)

    def configure_optimizers(self):
        return AdamW(self.parameters(), lr=self.lr)

Hugging Face Transformers

Text Classification

文本分类

python

from transformers import (
    AutoModelForSequenceClassification,
    AutoTokenizer,
    TrainingArguments,
    Trainer,
)
from datasets import load_dataset

model_name = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

dataset = load_dataset("imdb")

def preprocess(examples):
    return tokenizer(examples["text"], truncation=True, padding="max_length")

tokenized = dataset.map(preprocess, batched=True)

trainer = Trainer(
    model=model,
    args=TrainingArguments(
        output_dir="./results",
        num_train_epochs=3,
        per_device_train_batch_size=16,
        evaluation_strategy="epoch",
    ),
    train_dataset=tokenized["train"],
    eval_dataset=tokenized["test"],
)

trainer.train()

python

from transformers import (
    AutoModelForSequenceClassification,
    AutoTokenizer,
    TrainingArguments,
    Trainer,
)
from datasets import load_dataset

model_name = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

dataset = load_dataset("imdb")

def preprocess(examples):
    return tokenizer(examples["text"], truncation=True, padding="max_length")

tokenized = dataset.map(preprocess, batched=True)

trainer = Trainer(
    model=model,
    args=TrainingArguments(
        output_dir="./results",
        num_train_epochs=3,
        per_device_train_batch_size=16,
        evaluation_strategy="epoch",
    ),
    train_dataset=tokenized["train"],
    eval_dataset=tokenized["test"],
)

trainer.train()

Fine-Tuning LLMs with LoRA

使用LoRA微调大语言模型

python

from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import LoraConfig, get_peft_model, TaskType
from trl import SFTTrainer

model_name = "meta-llama/Llama-2-7b-hf"
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    load_in_4bit=True,
    device_map="auto",
)
tokenizer = AutoTokenizer.from_pretrained(model_name)

lora_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    r=16,
    lora_alpha=32,
    lora_dropout=0.1,
    target_modules=["q_proj", "v_proj"],
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

python

from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import LoraConfig, get_peft_model, TaskType
from trl import SFTTrainer

model_name = "meta-llama/Llama-2-7b-hf"
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    load_in_4bit=True,
    device_map="auto",
)
tokenizer = AutoTokenizer.from_pretrained(model_name)

lora_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    r=16,
    lora_alpha=32,
    lora_dropout=0.1,
    target_modules=["q_proj", "v_proj"],
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

LLM Integration

LLM集成

OpenAI API

python

from openai import OpenAI

client = OpenAI()

def chat_completion(messages: list[dict], model: str = "gpt-4") -> str:
    response = client.chat.completions.create(
        model=model,
        messages=messages,
    )
    return response.choices[0].message.content

python

from openai import OpenAI

client = OpenAI()

def chat_completion(messages: list[dict], model: str = "gpt-4") -> str:
    response = client.chat.completions.create(
        model=model,
        messages=messages,
    )
    return response.choices[0].message.content

Function calling

函数调用

def extract_entities(text: str) -> dict: response = client.chat.completions.create( model="gpt-4", messages=[{"role": "user", "content": text}], tools=[{ "type": "function", "function": { "name": "extract_entities", "parameters": { "type": "object", "properties": { "people": {"type": "array", "items": {"type": "string"}}, "places": {"type": "array", "items": {"type": "string"}}, }, }, }, }], ) return response.choices[0].message.tool_calls[0].function.arguments

undefined

undefined

Anthropic Claude API

python

import anthropic

client = anthropic.Anthropic()

def claude_completion(prompt: str, model: str = "claude-3-sonnet-20240229") -> str:
    message = client.messages.create(
        model=model,
        max_tokens=1024,
        messages=[{"role": "user", "content": prompt}],
    )
    return message.content[0].text

python

import anthropic

client = anthropic.Anthropic()

def claude_completion(prompt: str, model: str = "claude-3-sonnet-20240229") -> str:
    message = client.messages.create(
        model=model,
        max_tokens=1024,
        messages=[{"role": "user", "content": prompt}],
    )
    return message.content[0].text

LangChain

python

from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

llm = ChatOpenAI(model="gpt-4")
prompt = ChatPromptTemplate.from_template("Summarize: {text}")
chain = prompt | llm | StrOutputParser()

result = chain.invoke({"text": "Long document here..."})

python

from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

llm = ChatOpenAI(model="gpt-4")
prompt = ChatPromptTemplate.from_template("总结内容: {text}")
chain = prompt | llm | StrOutputParser()

result = chain.invoke({"text": "此处为长文档内容..."})

Vector Databases

向量数据库

Pinecone

python

from pinecone import Pinecone

pc = Pinecone(api_key="xxx")
index = pc.Index("my-index")

python

from pinecone import Pinecone

pc = Pinecone(api_key="xxx")
index = pc.Index("my-index")

Upsert vectors

上传向量

index.upsert(vectors=[ {"id": "1", "values": [0.1, 0.2], "metadata": {"text": "..."}}, ])

Query

查询向量

results = index.query(vector=[0.1, 0.2], top_k=5, include_metadata=True)

undefined

results = index.query(vector=[0.1, 0.2], top_k=5, include_metadata=True)

undefined

ChromaDB (Local)

ChromaDB（本地部署）

python

import chromadb

client = chromadb.PersistentClient(path="./chroma_db")
collection = client.get_or_create_collection(name="documents")

collection.add(
    documents=["Doc 1", "Doc 2"],
    ids=["doc1", "doc2"],
)

results = collection.query(query_texts=["search query"], n_results=5)

python

import chromadb

client = chromadb.PersistentClient(path="./chroma_db")
collection = client.get_or_create_collection(name="documents")

collection.add(
    documents=["文档1", "文档2"],
    ids=["doc1", "doc2"],
)

results = collection.query(query_texts=["搜索关键词"], n_results=5)

MLOps

Model Registry (MLflow)

模型注册（MLflow）

python

import mlflow

mlflow.set_experiment("my-experiment")

with mlflow.start_run():
    mlflow.log_params({"lr": 0.001, "epochs": 10})
    mlflow.log_metrics({"accuracy": 0.95})
    mlflow.pytorch.log_model(model, "model")

python

import mlflow

mlflow.set_experiment("我的实验项目")

with mlflow.start_run():
    mlflow.log_params({"学习率": 0.001, "训练轮数": 10})
    mlflow.log_metrics({"准确率": 0.95})
    mlflow.pytorch.log_model(model, "model")

Model Serving (FastAPI)

模型部署（FastAPI）

python

from fastapi import FastAPI
from pydantic import BaseModel
import torch

app = FastAPI()
model = torch.load("model.pt")
model.eval()

class PredictionRequest(BaseModel):
    features: list[float]

@app.post("/predict")
async def predict(request: PredictionRequest):
    with torch.no_grad():
        tensor = torch.tensor([request.features])
        output = model(tensor)
        return {"prediction": output.argmax().item()}

python

from fastapi import FastAPI
from pydantic import BaseModel
import torch

app = FastAPI()
model = torch.load("model.pt")
model.eval()

class PredictionRequest(BaseModel):
    features: list[float]

@app.post("/predict")
async def predict(request: PredictionRequest):
    with torch.no_grad():
        tensor = torch.tensor([request.features])
        output = model(tensor)
        return {"prediction": output.argmax().item()}

Best Practices

最佳实践

Training

训练环节

Deployment

部署环节

LLM Application Development

LLM应用开发

RAG Architecture (Retrieval-Augmented Generation)

RAG架构（检索增强生成）

python

from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_community.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough

python

from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_community.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough

1. Load and chunk documents

1. 加载并切分文档

text_splitter = RecursiveCharacterTextSplitter( chunk_size=1000, chunk_overlap=200, separators=["\n\n", "\n", ". ", " "], ) chunks = text_splitter.split_documents(documents)

2. Embed and store in vector database

2. 生成嵌入并存储到向量数据库

embeddings = OpenAIEmbeddings(model="text-embedding-3-small") vectorstore = Chroma.from_documents(chunks, embeddings, persist_directory="./db")

3. Create retrieval chain

3. 创建检索链

retriever = vectorstore.as_retriever( search_type="mmr", # Maximal Marginal Relevance search_kwargs={"k": 5}, )

prompt = ChatPromptTemplate.from_template(""" Answer based on the following context. If the answer is not in the context, say so.

Context: {context}

Question: {question} """)

chain = ( {"context": retriever, "question": RunnablePassthrough()} | prompt | ChatOpenAI(model="gpt-4o") )

result = chain.invoke("What is the refund policy?")

undefined

retriever = vectorstore.as_retriever( search_type="mmr", # 最大边际相关性 search_kwargs={"k": 5}, )

prompt = ChatPromptTemplate.from_template(""" 基于以下上下文回答问题。如果答案不在上下文中，请直接说明。

上下文: {context}

问题: {question} """)

chain = ( {"context": retriever, "question": RunnablePassthrough()} | prompt | ChatOpenAI(model="gpt-4o") )

result = chain.invoke("退款政策是什么？")

undefined

Vector Databases

向量数据库对比

Database	Type	Best For
pgvector	PostgreSQL extension	Existing Postgres, hybrid queries
Pinecone	Managed cloud	Production scale, serverless
Chroma	Local/embedded	Prototyping, small-medium datasets
Weaviate	Self-hosted/cloud	Multimodal, GraphQL interface
Qdrant	Self-hosted/cloud	High performance, filtering

python

undefined

数据库	类型	适用场景
pgvector	PostgreSQL扩展	已有Postgres环境、混合查询场景
Pinecone	托管云服务	生产级规模、无服务器架构
Chroma	本地/嵌入式	原型开发、中小规模数据集
Weaviate	自托管/云服务	多模态数据、GraphQL接口支持
Qdrant	自托管/云服务	高性能、高级过滤需求

python

undefined

pgvector with SQLAlchemy

结合SQLAlchemy使用pgvector

from pgvector.sqlalchemy import Vector

class Document(Base): tablename = "documents" id = Column(Integer, primary_key=True) content = Column(Text) embedding = Column(Vector(1536)) # OpenAI embedding dimension

from pgvector.sqlalchemy import Vector

class Document(Base): tablename = "documents" id = Column(Integer, primary_key=True) content = Column(Text) embedding = Column(Vector(1536)) # OpenAI嵌入向量维度

Similarity search

相似度搜索

from sqlalchemy import text results = session.execute(text(""" SELECT content, embedding <=> :query_embedding AS distance FROM documents ORDER BY embedding <=> :query_embedding LIMIT 5 """), {"query_embedding": str(query_vector)})

undefined

undefined

Prompt Engineering Patterns

提示词工程模式

python

undefined

python

undefined

System prompt pattern

系统提示词模式

SYSTEM_PROMPT = """You are a helpful assistant that answers questions about {domain}.

Rules:

Only answer based on provided context
If uncertain, say "I don't know"
Cite sources when possible
Be concise and factual """

SYSTEM_PROMPT = """你是一个专注于{domain}领域的助手。

规则:

仅基于提供的上下文回答
不确定时请回答“我不知道”
尽可能引用来源
回答简洁、准确 """

Few-shot prompting

少样本提示词

FEW_SHOT_PROMPT = """ Classify the sentiment of the following text.

Text: "The product arrived on time and works perfectly!" Sentiment: positive

Text: "Terrible customer service, waited 3 hours." Sentiment: negative

Text: "{user_input}" Sentiment:"""

FEW_SHOT_PROMPT = """ 对以下文本进行情感分类。

文本: "产品按时送达，使用效果完美！" 情感: 正面

文本: "客服态度极差，等待了3小时。" 情感: 负面

文本: "{user_input}" 情感:"""

Chain-of-thought prompting

思维链提示词

COT_PROMPT = """ Solve step by step:

Identify the key information
Break down the problem
Work through each step
Provide the final answer

Problem: {problem} """

undefined

COT_PROMPT = """ 请分步解决问题:

识别关键信息
拆解问题
逐步推导
给出最终答案

问题: {problem} """

undefined

Structured Outputs

结构化输出

python

undefined

python

undefined

Anthropic Claude structured output

Anthropic Claude结构化输出

import anthropic from pydantic import BaseModel

class ExtractedEntity(BaseModel): name: str type: str # person, org, location confidence: float

class ExtractionResult(BaseModel): entities: list[ExtractedEntity] summary: str

client = anthropic.Anthropic() message = client.messages.create( model="claude-sonnet-4-20250514", max_tokens=1024, messages=[{"role": "user", "content": f"Extract entities from: {text}"}], # Claude supports tool_use for structured output tools=[{ "name": "extract_entities", "description": "Extract named entities from text", "input_schema": ExtractionResult.model_json_schema(), }], tool_choice={"type": "tool", "name": "extract_entities"}, )

import anthropic from pydantic import BaseModel

class ExtractedEntity(BaseModel): name: str type: str # 人物、组织、地点 confidence: float

class ExtractionResult(BaseModel): entities: list[ExtractedEntity] summary: str

client = anthropic.Anthropic() message = client.messages.create( model="claude-sonnet-4-20250514", max_tokens=1024, messages=[{"role": "user", "content": f"从以下文本提取实体: {text}"}], # Claude支持通过tool_use实现结构化输出 tools=[{ "name": "extract_entities", "description": "从文本中提取命名实体", "input_schema": ExtractionResult.model_json_schema(), }], tool_choice={"type": "tool", "name": "extract_entities"}, )

OpenAI structured output

OpenAI结构化输出

from openai import OpenAI

client = OpenAI() response = client.beta.chat.completions.parse( model="gpt-4o", messages=[{"role": "user", "content": f"Extract entities from: {text}"}], response_format=ExtractionResult, ) result = response.choices[0].message.parsed

undefined

from openai import OpenAI

client = OpenAI() response = client.beta.chat.completions.parse( model="gpt-4o", messages=[{"role": "user", "content": f"从以下文本提取实体: {text}"}], response_format=ExtractionResult, ) result = response.choices[0].message.parsed

undefined

Tool Use / Function Calling

工具调用/函数调用

python

undefined

python

undefined

Claude tool use

Claude工具调用

tools = [ { "name": "search_database", "description": "Search the product database", "input_schema": { "type": "object", "properties": { "query": {"type": "string", "description": "Search query"}, "category": {"type": "string", "enum": ["electronics", "clothing", "books"]}, }, "required": ["query"], }, }, { "name": "get_weather", "description": "Get current weather for a location", "input_schema": { "type": "object", "properties": { "location": {"type": "string"}, }, "required": ["location"], }, }, ]

tools = [ { "name": "search_database", "description": "搜索产品数据库", "input_schema": { "type": "object", "properties": { "query": {"type": "string", "description": "搜索关键词"}, "category": {"type": "string", "enum": ["electronics", "clothing", "books"]}, }, "required": ["query"], }, }, { "name": "get_weather", "description": "获取指定地点的当前天气", "input_schema": { "type": "object", "properties": { "location": {"type": "string"}, }, "required": ["location"], }, }, ]

Agentic loop: call LLM, execute tools, feed results back

Agent循环: 调用LLM、执行工具、反馈结果

while True: response = client.messages.create( model="claude-sonnet-4-20250514", messages=messages, tools=tools, )

if response.stop_reason == "end_turn":
    break

# Execute tool calls
for block in response.content:
    if block.type == "tool_use":
        result = execute_tool(block.name, block.input)
        messages.append({"role": "assistant", "content": response.content})
        messages.append({
            "role": "user",
            "content": [{"type": "tool_result", "tool_use_id": block.id, "content": str(result)}],
        })

undefined

while True: response = client.messages.create( model="claude-sonnet-4-20250514", messages=messages, tools=tools, )

if response.stop_reason == "end_turn":
    break

# 执行工具调用
for block in response.content:
    if block.type == "tool_use":
        result = execute_tool(block.name, block.input)
        messages.append({"role": "assistant", "content": response.content})
        messages.append({
            "role": "user",
            "content": [{"type": "tool_result", "tool_use_id": block.id, "content": str(result)}],
        })

undefined

Claude API / Anthropic SDK Patterns

Claude API/Anthropic SDK使用模式

python

import anthropic

client = anthropic.Anthropic()

python

import anthropic

client = anthropic.Anthropic()

Basic message

基础消息调用

response = client.messages.create( model="claude-sonnet-4-20250514", max_tokens=4096, system="You are a helpful coding assistant.", messages=[ {"role": "user", "content": "Explain async/await in Python"}, ], )

response = client.messages.create( model="claude-sonnet-4-20250514", max_tokens=4096, system="你是一个乐于助人的编程助手。", messages=[ {"role": "user", "content": "解释Python中的async/await"}, ], )

Streaming

流式输出

with client.messages.stream( model="claude-sonnet-4-20250514", max_tokens=4096, messages=[{"role": "user", "content": prompt}], ) as stream: for text in stream.text_stream: print(text, end="", flush=True)

Vision (image input)

视觉能力（图片输入）

import base64

with open("screenshot.png", "rb") as f: image_data = base64.standard_b64encode(f.read()).decode()

response = client.messages.create( model="claude-sonnet-4-20250514", max_tokens=1024, messages=[{ "role": "user", "content": [ {"type": "image", "source": {"type": "base64", "media_type": "image/png", "data": image_data}}, {"type": "text", "text": "Describe this UI and suggest improvements"}, ], }], )

undefined

import base64

with open("screenshot.png", "rb") as f: image_data = base64.standard_b64encode(f.read()).decode()

undefined

LangChain / LlamaIndex

python

undefined

python

undefined

LangChain LCEL (LangChain Expression Language)

LangChain LCEL（LangChain表达式语言）

from langchain_anthropic import ChatAnthropic from langchain_core.prompts import ChatPromptTemplate from langchain_core.output_parsers import StrOutputParser

llm = ChatAnthropic(model="claude-sonnet-4-20250514")

chain = ( ChatPromptTemplate.from_messages([ ("system", "You are a helpful assistant."), ("user", "{input}"), ]) | llm | StrOutputParser() )

from langchain_anthropic import ChatAnthropic from langchain_core.prompts import ChatPromptTemplate from langchain_core.output_parsers import StrOutputParser

llm = ChatAnthropic(model="claude-sonnet-4-20250514")

chain = ( ChatPromptTemplate.from_messages([ ("system", "你是一个乐于助人的助手。"), ("user", "{input}"), ]) | llm | StrOutputParser() )

LlamaIndex for document Q&A

使用LlamaIndex实现文档问答

from llama_index.core import VectorStoreIndex, SimpleDirectoryReader

documents = SimpleDirectoryReader("data/").load_data() index = VectorStoreIndex.from_documents(documents) query_engine = index.as_query_engine() response = query_engine.query("What are the key findings?")

undefined

from llama_index.core import VectorStoreIndex, SimpleDirectoryReader

undefined

Evaluation Frameworks

评估框架

python

undefined

python

undefined

RAGAS for RAG evaluation

使用RAGAS评估RAG系统

from ragas import evaluate from ragas.metrics import faithfulness, answer_relevancy, context_precision

result = evaluate( dataset=eval_dataset, metrics=[faithfulness, answer_relevancy, context_precision], ) print(result)

from ragas import evaluate from ragas.metrics import faithfulness, answer_relevancy, context_precision

result = evaluate( dataset=eval_dataset, metrics=[faithfulness, answer_relevancy, context_precision], ) print(result)

LangSmith for tracing and evaluation

使用LangSmith进行追踪和评估

import langsmith

client = langsmith.Client()

import langsmith

client = langsmith.Client()

Traces are automatically captured when LANGCHAIN_TRACING_V2=true

当LANGCHAIN_TRACING_V2=true时会自动捕获追踪数据

Custom evaluation

自定义评估函数

def evaluate_response(prediction: str, reference: str) -> dict: """Score response quality.""" # Use LLM-as-judge pattern judge_prompt = f"""Rate the following response on a scale of 1-5: Reference: {reference} Response: {prediction} Score (1-5):""" score = llm.invoke(judge_prompt) return {"score": int(score.content.strip())}

undefined

def evaluate_response(prediction: str, reference: str) -> dict: """评估回答质量。""" # 使用LLM作为评估者的模式 judge_prompt = f"""请对以下回答按1-5分评分: 参考回答: {reference} 模型回答: {prediction} 评分（1-5）:""" score = llm.invoke(judge_prompt) return {"score": int(score.content.strip())}

undefined

LLM App Architecture Patterns

LLM应用架构模式

Pattern	Use Case
RAG	Q&A over documents, knowledge bases
Agent	Multi-step tasks requiring tool use
Chain-of-Thought	Complex reasoning, math, logic
Map-Reduce	Summarizing long documents
Router	Directing queries to specialized handlers
Reflection	Self-correcting outputs
Multi-Agent	Collaborative problem solving

模式	适用场景
RAG	文档问答、知识库查询
Agent	需要工具调用的多步骤任务
思维链	复杂推理、数学计算、逻辑问题
Map-Reduce	长文档总结
路由模式	将查询导向专业处理模块
反思模式	自我修正输出结果
多Agent协作	协同解决复杂问题