OpenTelemetry 的基本使用
启动 OpenTelemetry Collector 及其相关组件
# docker compose up -d
[+] Running 4/4
✔ Container prometheus Started 0.9s
✔ Container trace-jaeger-all-in-one-1 Started 0.9s
✔ Container trace-zipkin-all-in-one-1 Started 0.9s
✔ Container trace-otel-collector-1 Started
对接 Zipkin
Zipkin 是一个分布式跟踪系统。它有助于收集解决服务体系结构中延迟问题所需的定时数据。功能包括收集和查找这些数据。
示例代码:
var logger = log.New(os.Stderr, "zipkin-example", log.Ldate|log.Ltime|log.Llongfile)
// initTracer creates a new trace provider instance and registers it as global trace provider.
func initTracer(url string) (func(context.Context) error, error) {
// Create Zipkin Exporter and install it as a global tracer.
exporter, err := zipkin.New(url, zipkin.WithLogger(logger))
if err != nil {
return nil, err
}
batcher := sdktrace.NewBatchSpanProcessor(exporter)
tp := sdktrace.NewTracerProvider(
sdktrace.WithSpanProcessor(batcher),
sdktrace.WithResource(resource.NewWithAttributes(
semconv.SchemaURL,
semconv.ServiceName("zipkin-test"),
)),
)
// 使用 zipkin 作为 Trace provider
otel.SetTracerProvider(tp)
return tp.Shutdown, nil
}
func main() {
url := flag.String("zipkin", "http://localhost:9411/api/v2/spans", "zipkin url")
flag.Parse()
ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt)
defer cancel()
shutdown, err := initTracer(*url)
if err != nil {
log.Fatal(err)
}
defer func() {
if err := shutdown(ctx); err != nil {
log.Fatal("failed to shutdown TracerProvider: %w", err)
}
}()
tr := otel.GetTracerProvider().Tracer("component-main")
// 创建 root Span,名称为 foo
ctx, span := tr.Start(ctx, "foo", trace.WithSpanKind(trace.SpanKindServer))
<-time.After(6 * time.Millisecond)
bar(ctx)
<-time.After(6 * time.Millisecond)
span.End()
}
func bar(ctx context.Context) {
tr := otel.GetTracerProvider().Tracer("component-bar")
// 创建子 Span,它的上级 Span 为 foo
_, span := tr.Start(ctx, "bar")
<-time.After(6 * time.Millisecond)
span.End()
}
以上代码会创建两个Span
,父Span
为foo
,子Span
为bar
,执行程序后,会向 Zipkin 上报 Trace 数据。
打开 Zipkin(http://127.0.0.1:9411/zipkin/),可以看到刚刚的 Trace:
点开show
可以查看Span
详情,可以看到刚刚创建的两个Span
:
对接 Jaeger
Jaeger 受到Dapper和OpenZipkin的启发,是由Uber Technologies作为开源发布的分布式跟踪系统。它用于监控和故障排除基于微服务的分布式系统,包括:
- 分布式上下文传播
- 分布式事务监控
- 根本原因分析
- 服务依赖分析
- 性能/延迟优化
示例代码:
const (
service = "trace-demo"
environment = "production"
id = 1
)
// tracerProvider returns an OpenTelemetry TracerProvider configured to use
// the Jaeger exporter that will send spans to the provided url. The returned
// TracerProvider will also use a Resource configured with all the information
// about the application.
func tracerProvider(url string) (*tracesdk.TracerProvider, error) {
// Create the Jaeger exporter
exp, err := jaeger.New(jaeger.WithCollectorEndpoint(jaeger.WithEndpoint(url)))
if err != nil {
return nil, err
}
tp := tracesdk.NewTracerProvider(
// Always be sure to batch in production.
tracesdk.WithBatcher(exp),
// Record information about this application in a Resource.
tracesdk.WithResource(resource.NewWithAttributes(
semconv.SchemaURL,
semconv.ServiceName(service),
attribute.String("environment", environment),
attribute.Int64("ID", id),
)),
)
return tp, nil
}
func main() {
tp, err := tracerProvider("http://localhost:14268/api/traces")
if err != nil {
log.Fatal(err)
}
// Register our TracerProvider as the global so any imported
// instrumentation in the future will default to using it.
otel.SetTracerProvider(tp)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
// Cleanly shutdown and flush telemetry when the application exits.
defer func(ctx context.Context) {
// Do not make the application hang when it is shutdown.
ctx, cancel = context.WithTimeout(ctx, time.Second*5)
defer cancel()
if err := tp.Shutdown(ctx); err != nil {
log.Fatal(err)
}
}(ctx)
tr := tp.Tracer("component-main")
// 创建一个 Span
ctx, span := tr.Start(ctx, "foo")
defer span.End()
bar(ctx)
}
func bar(ctx context.Context) {
// Use the global TracerProvider.
tr := otel.Tracer("component-bar")
// 子 Span
_, span := tr.Start(ctx, "bar")
span.SetAttributes(attribute.Key("testset").String("value"))
defer span.End()
// Do bar...
}
根 Jaeger 一样,以上代码会创建两个Span
,父Span
为foo
,子Span
为bar
,执行程序后,会向 Jaeger上报 Trace 数据。
打开 Zipkin(http://127.0.0.1:16686/),可以看到刚刚的 Trace:
点开Span
详情,可以看到刚刚创建的两个Span
:
对接 Prometheus
Prometheus是一个开源系统监控和警报工具包,最初由 SoundCloud构建。自 2012 年启动以来,许多公司和组织都采用了 Prometheus,该项目拥有非常活跃的开发者和用户社区。它现在是一个独立的开源项目,独立于任何公司进行维护。为了强调这一点,并明确项目的治理结构,Prometheus 于 2016 年作为继Kubernetes之后的第二个托管项目加入了云原生计算基金会。
Prometheus 将其指标收集并存储为时间序列数据,即指标信息与记录时的时间戳以及称为标签的可选键值对一起存储。
示例代码:
package main
import (
"context"
"fmt"
"log"
"math/rand"
"net/http"
"os"
"os/signal"
"time"
"github.com/prometheus/client_golang/prometheus/promhttp"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/exporters/prometheus"
api "go.opentelemetry.io/otel/metric"
"go.opentelemetry.io/otel/sdk/metric"
)
func main() {
rng := rand.New(rand.NewSource(time.Now().UnixNano()))
ctx := context.Background()
// The exporter embeds a default OpenTelemetry Reader and
// implements prometheus.Collector, allowing it to be used as
// both a Reader and Collector.
exporter, err := prometheus.New()
if err != nil {
log.Fatal(err)
}
provider := metric.NewMeterProvider(metric.WithReader(exporter))
meter := provider.Meter("github.com/xuqil/observability/metrics/prometheus")
// Start the prometheus HTTP server and pass the exporter Collector to it
go serveMetrics()
opt := api.WithAttributes(
attribute.Key("A").String("B"),
attribute.Key("C").String("D"),
)
// This is the equivalent of prometheus.NewCounterVec
counter, err := meter.Float64Counter("foo", api.WithDescription("a simple counter"))
if err != nil {
log.Fatal(err)
}
counter.Add(ctx, 5, opt)
gauge, err := meter.Float64ObservableGauge("bar", api.WithDescription("a fun little gauge"))
if err != nil {
log.Fatal(err)
}
_, err = meter.RegisterCallback(func(_ context.Context, o api.Observer) error {
n := -10. + rng.Float64()*(90.) // [-10, 100)
o.ObserveFloat64(gauge, n, opt)
return nil
}, gauge)
if err != nil {
log.Fatal(err)
}
// This is the equivalent of prometheus.NewHistogramVec
histogram, err := meter.Float64Histogram("baz", api.WithDescription("a very nice histogram"))
if err != nil {
log.Fatal(err)
}
histogram.Record(ctx, 23, opt)
histogram.Record(ctx, 7, opt)
histogram.Record(ctx, 101, opt)
histogram.Record(ctx, 105, opt)
ctx, _ = signal.NotifyContext(ctx, os.Interrupt)
<-ctx.Done()
}
func serveMetrics() {
log.Printf("serving metrics at localhost:2223/metrics")
http.Handle("/metrics", promhttp.Handler())
err := http.ListenAndServe(":2223", nil)
if err != nil {
fmt.Printf("error serving http: %v", err)
return
}
}
执行程序后,访问 exporter 地址:http://localhost:2223/metrics
# curl localhost:2223/metrics
# HELP bar a fun little gauge
# TYPE bar gauge
bar{A="B",C="D",otel_scope_name="github.com/xuqil/observability/metrics/prometheus",otel_scope_version=""} 6.947346382686447
# HELP baz a very nice histogram
# TYPE baz histogram
baz_bucket{A="B",C="D",otel_scope_name="github.com/xuqil/observability/metrics/prometheus",otel_scope_version="",le="0"} 0
baz_bucket{A="B",C="D",otel_scope_name="github.com/xuqil/observability/metrics/prometheus",otel_scope_version="",le="5"} 0
baz_bucket{A="B",C="D",otel_scope_name="github.com/xuqil/observability/metrics/prometheus",otel_scope_version="",le="10"} 1
baz_bucket{A="B",C="D",otel_scope_name="github.com/xuqil/observability/metrics/prometheus",otel_scope_version="",le="25"} 2
baz_bucket{A="B",C="D",otel_scope_name="github.com/xuqil/observability/metrics/prometheus",otel_scope_version="",le="50"} 2
baz_bucket{A="B",C="D",otel_scope_name="github.com/xuqil/observability/metrics/prometheus",otel_scope_version="",le="75"} 2
baz_bucket{A="B",C="D",otel_scope_name="github.com/xuqil/observability/metrics/prometheus",otel_scope_version="",le="100"} 2
baz_bucket{A="B",C="D",otel_scope_name="github.com/xuqil/observability/metrics/prometheus",otel_scope_version="",le="250"} 4
baz_bucket{A="B",C="D",otel_scope_name="github.com/xuqil/observability/metrics/prometheus",otel_scope_version="",le="500"} 4
baz_bucket{A="B",C="D",otel_scope_name="github.com/xuqil/observability/metrics/prometheus",otel_scope_version="",le="750"} 4
baz_bucket{A="B",C="D",otel_scope_name="github.com/xuqil/observability/metrics/prometheus",otel_scope_version="",le="1000"} 4
baz_bucket{A="B",C="D",otel_scope_name="github.com/xuqil/observability/metrics/prometheus",otel_scope_version="",le="2500"} 4
baz_bucket{A="B",C="D",otel_scope_name="github.com/xuqil/observability/metrics/prometheus",otel_scope_version="",le="5000"} 4
baz_bucket{A="B",C="D",otel_scope_name="github.com/xuqil/observability/metrics/prometheus",otel_scope_version="",le="7500"} 4
baz_bucket{A="B",C="D",otel_scope_name="github.com/xuqil/observability/metrics/prometheus",otel_scope_version="",le="10000"} 4
baz_bucket{A="B",C="D",otel_scope_name="github.com/xuqil/observability/metrics/prometheus",otel_scope_version="",le="+Inf"} 4
baz_sum{A="B",C="D",otel_scope_name="github.com/xuqil/observability/metrics/prometheus",otel_scope_version=""} 236
baz_count{A="B",C="D",otel_scope_name="github.com/xuqil/observability/metrics/prometheus",otel_scope_version=""} 4
从结果可以看到,OpenTelemetry 生成了跟 Prometheus exporter 一致的数据。