distributed-tracing
Compare original and translation side by side
🇺🇸
Original
English🇨🇳
Translation
ChineseDistributed Tracing
分布式追踪
Overview
概述
Set up distributed tracing infrastructure with Jaeger or Zipkin to track requests across microservices and identify performance bottlenecks.
使用Jaeger或Zipkin搭建分布式追踪基础设施,以跨微服务跟踪请求并识别性能瓶颈。
When to Use
适用场景
- Debugging microservice interactions
- Identifying performance bottlenecks
- Tracking request flows
- Analyzing service dependencies
- Root cause analysis
- 调试微服务间的交互
- 识别性能瓶颈
- 跟踪请求流
- 分析服务依赖关系
- 根因分析
Instructions
操作步骤
1. Jaeger Setup
1. Jaeger 部署
yaml
undefinedyaml
undefineddocker-compose.yml
docker-compose.yml
version: '3.8'
services:
jaeger:
image: jaegertracing/all-in-one:latest
ports:
- "5775:5775/udp"
- "6831:6831/udp"
- "16686:16686"
- "14268:14268"
networks:
- tracing
networks:
tracing:
undefinedversion: '3.8'
services:
jaeger:
image: jaegertracing/all-in-one:latest
ports:
- "5775:5775/udp"
- "6831:6831/udp"
- "16686:16686"
- "14268:14268"
networks:
- tracing
networks:
tracing:
undefined2. Node.js Jaeger Instrumentation
2. Node.js Jaeger 埋点
javascript
// tracing.js
const initTracer = require('jaeger-client').initTracer;
const opentracing = require('opentracing');
const initJaegerTracer = (serviceName) => {
const config = {
serviceName: serviceName,
sampler: {
type: 'const',
param: 1
},
reporter: {
logSpans: true,
agentHost: process.env.JAEGER_AGENT_HOST || 'localhost',
agentPort: process.env.JAEGER_AGENT_PORT || 6831
}
};
return initTracer(config, {});
};
const tracer = initJaegerTracer('api-service');
module.exports = { tracer };javascript
// tracing.js
const initTracer = require('jaeger-client').initTracer;
const opentracing = require('opentracing');
const initJaegerTracer = (serviceName) => {
const config = {
serviceName: serviceName,
sampler: {
type: 'const',
param: 1
},
reporter: {
logSpans: true,
agentHost: process.env.JAEGER_AGENT_HOST || 'localhost',
agentPort: process.env.JAEGER_AGENT_PORT || 6831
}
};
return initTracer(config, {});
};
const tracer = initJaegerTracer('api-service');
module.exports = { tracer };3. Express Tracing Middleware
3. Express 追踪中间件
javascript
// middleware.js
const { tracer } = require('./tracing');
const opentracing = require('opentracing');
const tracingMiddleware = (req, res, next) => {
const wireCtx = tracer.extract(
opentracing.FORMAT_HTTP_HEADERS,
req.headers
);
const span = tracer.startSpan(req.path, {
childOf: wireCtx,
tags: {
[opentracing.Tags.SPAN_KIND]: opentracing.Tags.SPAN_KIND_RPC_SERVER,
[opentracing.Tags.HTTP_METHOD]: req.method,
[opentracing.Tags.HTTP_URL]: req.url
}
});
req.span = span;
res.on('finish', () => {
span.setTag(opentracing.Tags.HTTP_STATUS_CODE, res.statusCode);
span.finish();
});
next();
};
module.exports = tracingMiddleware;javascript
// middleware.js
const { tracer } = require('./tracing');
const opentracing = require('opentracing');
const tracingMiddleware = (req, res, next) => {
const wireCtx = tracer.extract(
opentracing.FORMAT_HTTP_HEADERS,
req.headers
);
const span = tracer.startSpan(req.path, {
childOf: wireCtx,
tags: {
[opentracing.Tags.SPAN_KIND]: opentracing.Tags.SPAN_KIND_RPC_SERVER,
[opentracing.Tags.HTTP_METHOD]: req.method,
[opentracing.Tags.HTTP_URL]: req.url
}
});
req.span = span;
res.on('finish', () => {
span.setTag(opentracing.Tags.HTTP_STATUS_CODE, res.statusCode);
span.finish();
});
next();
};
module.exports = tracingMiddleware;4. Python Jaeger Integration
4. Python Jaeger 集成
python
undefinedpython
undefinedtracing.py
tracing.py
from jaeger_client import Config
from opentracing.propagation import Format
def init_jaeger_tracer(service_name):
config = Config(
config={
'sampler': {'type': 'const', 'param': 1},
'local_agent': {
'reporting_host': 'localhost',
'reporting_port': 6831,
},
'logging': True,
},
service_name=service_name,
)
return config.initialize_tracer()
from jaeger_client import Config
from opentracing.propagation import Format
def init_jaeger_tracer(service_name):
config = Config(
config={
'sampler': {'type': 'const', 'param': 1},
'local_agent': {
'reporting_host': 'localhost',
'reporting_port': 6831,
},
'logging': True,
},
service_name=service_name,
)
return config.initialize_tracer()
Flask integration
Flask integration
from flask import Flask, request
app = Flask(name)
tracer = init_jaeger_tracer('api-service')
@app.before_request
def before_request():
ctx = tracer.extract(Format.HTTP_HEADERS, request.headers)
request.span = tracer.start_span(
request.path,
child_of=ctx,
tags={
'http.method': request.method,
'http.url': request.url,
}
)
@app.after_request
def after_request(response):
request.span.set_tag('http.status_code', response.status_code)
request.span.finish()
return response
@app.route('/api/users/<user_id>')
def get_user(user_id):
with tracer.start_span('fetch-user', child_of=request.span) as span:
span.set_tag('user.id', user_id)
# Fetch user from database
return {'user': {'id': user_id}}
undefinedfrom flask import Flask, request
app = Flask(name)
tracer = init_jaeger_tracer('api-service')
@app.before_request
def before_request():
ctx = tracer.extract(Format.HTTP_HEADERS, request.headers)
request.span = tracer.start_span(
request.path,
child_of=ctx,
tags={
'http.method': request.method,
'http.url': request.url,
}
)
@app.after_request
def after_request(response):
request.span.set_tag('http.status_code', response.status_code)
request.span.finish()
return response
@app.route('/api/users/<user_id>')
def get_user(user_id):
with tracer.start_span('fetch-user', child_of=request.span) as span:
span.set_tag('user.id', user_id)
# Fetch user from database
return {'user': {'id': user_id}}
undefined5. Distributed Context Propagation
5. 分布式上下文传递
javascript
// propagation.js
const axios = require('axios');
const { tracer } = require('./tracing');
const opentracing = require('opentracing');
async function callDownstreamService(span, url, data) {
const headers = {};
// Inject trace context
tracer.inject(span, opentracing.FORMAT_HTTP_HEADERS, headers);
try {
const response = await axios.post(url, data, { headers });
span.setTag('downstream.success', true);
return response.data;
} catch (error) {
span.setTag(opentracing.Tags.ERROR, true);
span.log({
event: 'error',
message: error.message
});
throw error;
}
}
module.exports = { callDownstreamService };javascript
// propagation.js
const axios = require('axios');
const { tracer } = require('./tracing');
const opentracing = require('opentracing');
async function callDownstreamService(span, url, data) {
const headers = {};
// Inject trace context
tracer.inject(span, opentracing.FORMAT_HTTP_HEADERS, headers);
try {
const response = await axios.post(url, data, { headers });
span.setTag('downstream.success', true);
return response.data;
} catch (error) {
span.setTag(opentracing.Tags.ERROR, true);
span.log({
event: 'error',
message: error.message
});
throw error;
}
}
module.exports = { callDownstreamService };6. Zipkin Integration
6. Zipkin 集成
javascript
// zipkin-setup.js
const CLSContext = require('zipkin-context-cls');
const { Tracer, BatchRecorder, HttpLogger } = require('zipkin');
const zipkinMiddleware = require('zipkin-instrumentation-express').expressMiddleware;
const recorder = new BatchRecorder({
logger: new HttpLogger({
endpoint: 'http://localhost:9411/api/v2/spans',
headers: { 'Content-Type': 'application/json' }
})
});
const ctxImpl = new CLSContext('zipkin');
const tracer = new Tracer({ recorder, ctxImpl });
module.exports = {
tracer,
zipkinMiddleware: zipkinMiddleware({
tracer,
serviceName: 'api-service'
})
};javascript
// zipkin-setup.js
const CLSContext = require('zipkin-context-cls');
const { Tracer, BatchRecorder, HttpLogger } = require('zipkin');
const zipkinMiddleware = require('zipkin-instrumentation-express').expressMiddleware;
const recorder = new BatchRecorder({
logger: new HttpLogger({
endpoint: 'http://localhost:9411/api/v2/spans',
headers: { 'Content-Type': 'application/json' }
})
});
const ctxImpl = new CLSContext('zipkin');
const tracer = new Tracer({ recorder, ctxImpl });
module.exports = {
tracer,
zipkinMiddleware: zipkinMiddleware({
tracer,
serviceName: 'api-service'
})
};7. Trace Analysis
7. 追踪分析
python
undefinedpython
undefinedquery-traces.py
query-traces.py
import requests
def query_traces(service_name, operation=None, limit=20):
params = {
'service': service_name,
'limit': limit
}
if operation:
params['operation'] = operation
response = requests.get('http://localhost:16686/api/traces', params=params)
return response.json()['data']def find_slow_traces(service_name, min_duration_ms=1000):
traces = query_traces(service_name, limit=100)
slow_traces = [
t for t in traces
if t['duration'] > min_duration_ms * 1000
]
return sorted(slow_traces, key=lambda t: t['duration'], reverse=True)
undefinedimport requests
def query_traces(service_name, operation=None, limit=20):
params = {
'service': service_name,
'limit': limit
}
if operation:
params['operation'] = operation
response = requests.get('http://localhost:16686/api/traces', params=params)
return response.json()['data']def find_slow_traces(service_name, min_duration_ms=1000):
traces = query_traces(service_name, limit=100)
slow_traces = [
t for t in traces
if t['duration'] > min_duration_ms * 1000
]
return sorted(slow_traces, key=lambda t: t['duration'], reverse=True)
undefinedBest Practices
最佳实践
✅ DO
✅ 建议
- Sample appropriately for your traffic volume
- Propagate trace context across services
- Add meaningful span tags
- Log errors with spans
- Use consistent service naming
- Monitor trace latency
- Document trace format
- Keep instrumentation lightweight
- 根据流量规模合理设置采样率
- 在服务间传递追踪上下文
- 为Span添加有意义的标签
- 结合Span记录错误信息
- 使用一致的服务命名规则
- 监控追踪延迟
- 文档化追踪格式
- 保持埋点轻量化
❌ DON'T
❌ 避免
- Sample 100% in production
- Skip trace context propagation
- Log sensitive data in spans
- Create excessive spans
- Ignore sampling configuration
- Use unbounded cardinality tags
- Deploy without testing collection
- 生产环境中设置100%采样率
- 跳过追踪上下文传递
- 在Span中记录敏感数据
- 创建过多的Span
- 忽略采样配置
- 使用无界基数的标签
- 未测试采集功能就部署
Key Concepts
核心概念
- Trace: Complete request flow across services
- Span: Single operation within a trace
- Tag: Metadata attached to spans
- Log: Timestamped events within spans
- Context: Trace information propagated between services
- Trace:跨服务的完整请求流
- Span:追踪中的单个操作
- Tag:附加到Span的元数据
- Log:Span内的时间戳事件
- Context:在服务间传递的追踪信息