Loading...
Loading...
Instrumenting Go applications with OpenTelemetry for distributed tracing, Prometheus for metrics, and structured logging with slog
npx skill4agent add bobmatnyc/claude-mpm-skills golang-observability-opentelemetrygo get go.opentelemetry.io/otel
go get go.opentelemetry.io/otel/sdk
go get go.opentelemetry.io/otel/exporters/jaeger
go get go.opentelemetry.io/contrib/instrumentation/net/http/otelhttppackage main
import (
"context"
"log"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/exporters/jaeger"
"go.opentelemetry.io/otel/sdk/resource"
sdktrace "go.opentelemetry.io/otel/sdk/trace"
semconv "go.opentelemetry.io/otel/semconv/v1.21.0"
)
func initTracer(serviceName string) (*sdktrace.TracerProvider, error) {
// Create Jaeger exporter
exporter, err := jaeger.New(jaeger.WithCollectorEndpoint(
jaeger.WithEndpoint("http://localhost:14268/api/traces"),
))
if err != nil {
return nil, err
}
// Create resource with service name
res, err := resource.Merge(
resource.Default(),
resource.NewWithAttributes(
semconv.SchemaURL,
semconv.ServiceName(serviceName),
semconv.ServiceVersion("1.0.0"),
),
)
if err != nil {
return nil, err
}
// Create tracer provider
tp := sdktrace.NewTracerProvider(
sdktrace.WithBatcher(exporter),
sdktrace.WithResource(res),
sdktrace.WithSampler(sdktrace.AlwaysSample()), // Use probability sampler in production
)
otel.SetTracerProvider(tp)
return tp, nil
}
func main() {
tp, err := initTracer("order-service")
if err != nil {
log.Fatal(err)
}
defer func() {
if err := tp.Shutdown(context.Background()); err != nil {
log.Printf("Error shutting down tracer: %v", err)
}
}()
// Application code...
}import (
"context"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/codes"
"go.opentelemetry.io/otel/trace"
)
func ProcessOrder(ctx context.Context, order Order) error {
tracer := otel.Tracer("order-service")
ctx, span := tracer.Start(ctx, "ProcessOrder")
defer span.End()
// Add attributes
span.SetAttributes(
attribute.String("order.id", order.ID),
attribute.Int("order.items", len(order.Items)),
attribute.Float64("order.total", order.Total),
)
// Validate order (creates child span)
if err := validateOrder(ctx, order); err != nil {
span.RecordError(err)
span.SetStatus(codes.Error, "validation failed")
return err
}
// Fulfill order
if err := fulfillOrder(ctx, order); err != nil {
span.RecordError(err)
span.SetStatus(codes.Error, "fulfillment failed")
return err
}
span.SetStatus(codes.Ok, "order processed successfully")
return nil
}
func validateOrder(ctx context.Context, order Order) error {
_, span := otel.Tracer("order-service").Start(ctx, "validateOrder")
defer span.End()
// Validation logic...
return nil
}import (
"net/http"
"go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp"
)
func main() {
// Wrap handler with automatic tracing
handler := http.HandlerFunc(orderHandler)
wrappedHandler := otelhttp.NewHandler(handler, "order-handler")
http.Handle("/orders", wrappedHandler)
http.ListenAndServe(":8080", nil)
}
// Manual instrumentation for more control
func orderHandler(w http.ResponseWriter, r *http.Request) {
ctx := r.Context()
tracer := otel.Tracer("order-service")
ctx, span := tracer.Start(ctx, "orderHandler")
defer span.End()
// Extract order ID from request
orderID := r.URL.Query().Get("id")
span.SetAttributes(attribute.String("order.id", orderID))
// Process order with propagated context
order, err := fetchOrder(ctx, orderID)
if err != nil {
span.RecordError(err)
http.Error(w, "Order not found", http.StatusNotFound)
return
}
// ... handle response
}go get github.com/prometheus/client_golang/prometheus
go get github.com/prometheus/client_golang/prometheus/promhttppackage metrics
import (
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
)
var (
// Counter: Monotonically increasing value
httpRequestsTotal = promauto.NewCounterVec(
prometheus.CounterOpts{
Name: "http_requests_total",
Help: "Total number of HTTP requests",
},
[]string{"method", "path", "status"},
)
// Gauge: Value that can go up or down
activeConnections = promauto.NewGauge(
prometheus.GaugeOpts{
Name: "active_connections",
Help: "Number of active connections",
},
)
// Histogram: Observations bucketed by value
httpRequestDuration = promauto.NewHistogramVec(
prometheus.HistogramOpts{
Name: "http_request_duration_seconds",
Help: "HTTP request duration in seconds",
Buckets: prometheus.DefBuckets, // [0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10]
},
[]string{"method", "path"},
)
// Summary: Similar to histogram but calculates quantiles
dbQueryDuration = promauto.NewSummaryVec(
prometheus.SummaryOpts{
Name: "db_query_duration_seconds",
Help: "Database query duration",
Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
},
[]string{"query_type"},
)
)import (
"net/http"
"strconv"
"time"
"github.com/prometheus/client_golang/prometheus/promhttp"
)
// Metrics middleware that instruments all HTTP handlers
func MetricsMiddleware(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
start := time.Now()
// Track active connections
activeConnections.Inc()
defer activeConnections.Dec()
// Wrap response writer to capture status code
rw := &responseWriter{ResponseWriter: w, statusCode: http.StatusOK}
// Call next handler
next.ServeHTTP(rw, r)
// Record metrics
duration := time.Since(start).Seconds()
httpRequestDuration.WithLabelValues(r.Method, r.URL.Path).Observe(duration)
httpRequestsTotal.WithLabelValues(r.Method, r.URL.Path, strconv.Itoa(rw.statusCode)).Inc()
})
}
type responseWriter struct {
http.ResponseWriter
statusCode int
}
func (rw *responseWriter) WriteHeader(code int) {
rw.statusCode = code
rw.ResponseWriter.WriteHeader(code)
}
// Expose metrics endpoint
func main() {
http.Handle("/metrics", promhttp.Handler())
handler := MetricsMiddleware(http.HandlerFunc(orderHandler))
http.Handle("/orders", handler)
http.ListenAndServe(":8080", nil)
}func ProcessPayment(ctx context.Context, payment Payment) error {
timer := prometheus.NewTimer(dbQueryDuration.WithLabelValues("payment_insert"))
defer timer.ObserveDuration()
// Process payment
if err := db.InsertPayment(payment); err != nil {
httpRequestsTotal.WithLabelValues("POST", "/payments", "500").Inc()
return err
}
httpRequestsTotal.WithLabelValues("POST", "/payments", "200").Inc()
return nil
}package main
import (
"context"
"log/slog"
"os"
)
func initLogger() *slog.Logger {
// JSON logger for production
handler := slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{
Level: slog.LevelInfo,
AddSource: true, // Include file:line information
})
logger := slog.New(handler)
slog.SetDefault(logger) // Set as default logger
return logger
}
func main() {
logger := initLogger()
logger.Info("service starting",
"service", "order-service",
"version", "1.0.0",
"port", 8080,
)
}import (
"context"
"log/slog"
"go.opentelemetry.io/otel/trace"
)
// Add trace context to logger
func LoggerWithTrace(ctx context.Context) *slog.Logger {
span := trace.SpanFromContext(ctx)
spanCtx := span.SpanContext()
return slog.With(
"trace_id", spanCtx.TraceID().String(),
"span_id", spanCtx.SpanID().String(),
)
}
func HandleRequest(ctx context.Context, req Request) error {
logger := LoggerWithTrace(ctx)
logger.Info("processing request",
"request_id", req.ID,
"method", req.Method,
"path", req.Path,
)
if err := processRequest(ctx, req); err != nil {
logger.Error("request failed",
"error", err,
"duration_ms", time.Since(req.StartTime).Milliseconds(),
)
return err
}
logger.Info("request completed successfully",
"duration_ms", time.Since(req.StartTime).Milliseconds(),
)
return nil
}func ProcessOrder(ctx context.Context, order Order) error {
logger := LoggerWithTrace(ctx).With(
"order_id", order.ID,
"user_id", order.UserID,
)
logger.Debug("validating order", "items", len(order.Items))
if len(order.Items) == 0 {
logger.Warn("empty order received")
return ErrEmptyOrder
}
logger.Info("order validation passed")
if err := fulfillOrder(ctx, order); err != nil {
logger.Error("fulfillment failed",
"error", err,
slog.Group("order_details",
"total", order.Total,
"items", len(order.Items),
),
)
return err
}
logger.Info("order processed successfully",
"total", order.Total,
)
return nil
}import (
"context"
"database/sql"
"encoding/json"
"net/http"
"time"
)
type HealthChecker struct {
db *sql.DB
// Add other dependencies
}
type HealthStatus struct {
Status string `json:"status"`
Version string `json:"version"`
Checks map[string]string `json:"checks"`
Timestamp time.Time `json:"timestamp"`
}
// Liveness probe - is the app running?
func (hc *HealthChecker) LivenessHandler(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusOK)
json.NewEncoder(w).Encode(map[string]string{
"status": "alive",
})
}
// Readiness probe - is the app ready to serve traffic?
func (hc *HealthChecker) ReadinessHandler(w http.ResponseWriter, r *http.Request) {
ctx, cancel := context.WithTimeout(r.Context(), 5*time.Second)
defer cancel()
status := HealthStatus{
Status: "ready",
Version: "1.0.0",
Checks: make(map[string]string),
Timestamp: time.Now(),
}
// Check database
if err := hc.db.PingContext(ctx); err != nil {
status.Status = "not_ready"
status.Checks["database"] = "unhealthy: " + err.Error()
w.WriteHeader(http.StatusServiceUnavailable)
} else {
status.Checks["database"] = "healthy"
}
// Add more dependency checks (Redis, external APIs, etc.)
w.Header().Set("Content-Type", "application/json")
if status.Status == "ready" {
w.WriteHeader(http.StatusOK)
}
json.NewEncoder(w).Encode(status)
}import (
"context"
"net/http"
"os"
"os/signal"
"syscall"
"time"
)
func main() {
// Initialize tracer
tp, err := initTracer("order-service")
if err != nil {
log.Fatal(err)
}
// Setup HTTP server
server := &http.Server{
Addr: ":8080",
Handler: setupRoutes(),
}
// Channel for shutdown signals
shutdown := make(chan os.Signal, 1)
signal.Notify(shutdown, os.Interrupt, syscall.SIGTERM)
// Start server in goroutine
go func() {
slog.Info("server starting", "port", 8080)
if err := server.ListenAndServe(); err != http.ErrServerClosed {
log.Fatal(err)
}
}()
// Wait for shutdown signal
<-shutdown
slog.Info("shutdown signal received")
// Create shutdown context with timeout
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
// Shutdown HTTP server
slog.Info("shutting down HTTP server")
if err := server.Shutdown(ctx); err != nil {
slog.Error("HTTP server shutdown error", "error", err)
}
// Shutdown tracer provider (flush spans)
slog.Info("shutting down tracer")
if err := tp.Shutdown(ctx); err != nil {
slog.Error("tracer shutdown error", "error", err)
}
slog.Info("shutdown complete")
}package main
import (
"context"
"database/sql"
"log/slog"
"net/http"
"os"
"time"
"github.com/prometheus/client_golang/prometheus/promhttp"
"go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
)
type Server struct {
db *sql.DB
logger *slog.Logger
}
func (s *Server) orderHandler(w http.ResponseWriter, r *http.Request) {
ctx := r.Context()
// Get tracer and create span
tracer := otel.Tracer("order-service")
ctx, span := tracer.Start(ctx, "orderHandler")
defer span.End()
// Create context-aware logger with trace ID
logger := s.logger.With(
"trace_id", span.SpanContext().TraceID().String(),
"request_id", r.Header.Get("X-Request-ID"),
)
orderID := r.URL.Query().Get("id")
span.SetAttributes(attribute.String("order.id", orderID))
logger.Info("fetching order", "order_id", orderID)
// Fetch order from database
order, err := s.fetchOrder(ctx, orderID)
if err != nil {
span.RecordError(err)
logger.Error("failed to fetch order", "error", err)
http.Error(w, "Order not found", http.StatusNotFound)
return
}
logger.Info("order fetched successfully",
"order_id", orderID,
"items", len(order.Items),
)
// Return order as JSON
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(order)
}
func (s *Server) fetchOrder(ctx context.Context, orderID string) (*Order, error) {
_, span := otel.Tracer("order-service").Start(ctx, "fetchOrder")
defer span.End()
// Time database query
start := time.Now()
var order Order
err := s.db.QueryRowContext(ctx, "SELECT * FROM orders WHERE id = ?", orderID).Scan(&order)
duration := time.Since(start).Seconds()
dbQueryDuration.WithLabelValues("select_order").Observe(duration)
return &order, err
}
func setupRoutes(s *Server, hc *HealthChecker) http.Handler {
mux := http.NewServeMux()
// Health endpoints (no tracing needed)
mux.HandleFunc("/health", hc.LivenessHandler)
mux.HandleFunc("/ready", hc.ReadinessHandler)
mux.Handle("/metrics", promhttp.Handler())
// Business endpoints (with tracing)
orderHandler := http.HandlerFunc(s.orderHandler)
mux.Handle("/orders", otelhttp.NewHandler(orderHandler, "orders"))
// Wrap everything with metrics middleware
return MetricsMiddleware(mux)
}func processOrder(order Order) error {
// Creates new context, loses trace!
ctx := context.Background()
return validateOrder(ctx, order)
}func processOrder(ctx context.Context, order Order) error {
// Propagates trace context
return validateOrder(ctx, order)
}// user_id can have millions of values!
httpRequests.WithLabelValues(r.Method, r.URL.Path, userID).Inc()// Only method and path (bounded values)
httpRequests.WithLabelValues(r.Method, r.URL.Path).Inc()
// Track user-specific metrics separately if neededlogger.Info("user login",
"email", user.Email, // PII!
"password", user.Password, // CRITICAL!
"token", authToken, // SECRET!
)logger.Info("user login",
"user_id", user.ID, // Safe identifier
"method", "password",
)func processOrder(ctx context.Context) error {
ctx, span := tracer.Start(ctx, "processOrder")
// Missing defer span.End()!
if err := validate(); err != nil {
return err // Span never closed!
}
return nil
}func processOrder(ctx context.Context) error {
ctx, span := tracer.Start(ctx, "processOrder")
defer span.End() // Always runs
if err := validate(); err != nil {
span.RecordError(err)
return err
}
return nil
}// Synchronous export blocks HTTP handler
exporter := jaeger.New(jaeger.WithCollectorEndpoint(...))
tp := sdktrace.NewTracerProvider(
sdktrace.WithSyncer(exporter), // BAD: Synchronous!
)// Batching exports asynchronously
tp := sdktrace.NewTracerProvider(
sdktrace.WithBatcher(exporter), // GOOD: Async batching
)func main() {
tp, _ := initTracer("service")
// Missing shutdown - spans lost!
http.ListenAndServe(":8080", nil)
}func main() {
tp, _ := initTracer("service")
defer tp.Shutdown(context.Background())
// Handle signals and graceful shutdown
server.ListenAndServe()
}context.Context/health/readyspan.RecordError()_total_secondshttp_requests_totalhttp_requestspayment_transactions_totalerrors_totalactive_connectionsqueue_sizememory_usage_byteshttp_request_duration_seconds_millisecondsdb_query_duration_secondsresponse_size_bytesmethodhttp_methodstatusstatus_codehttp_statustp, _ := initTracer("service-name")
defer tp.Shutdown(context.Background())ctx, span := otel.Tracer("name").Start(ctx, "operation")
defer span.End()
span.SetAttributes(attribute.String("key", "value"))counter := promauto.NewCounterVec(opts, []string{"label"})
histogram := promauto.NewHistogramVec(opts, []string{"label"})logger := slog.With("trace_id", traceID)
logger.Info("message", "key", value)http.HandleFunc("/health", livenessHandler)
http.HandleFunc("/ready", readinessHandler)