From eedd80cf5799aec294db04aeaf4a253eeb57f968 Mon Sep 17 00:00:00 2001 From: Ron Federman Date: Sun, 29 Dec 2024 15:47:14 +0200 Subject: [PATCH 1/3] Add gRPC error message for client spans --- Makefile | 4 +- internal/pkg/inject/offset_results.json | 217 ++++++++++++++++++ .../grpc/client/bpf/probe.bpf.c | 9 + .../grpc/client/bpf_arm64_bpfel.go | 3 + .../grpc/client/bpf_x86_bpfel.go | 3 + .../google.golang.org/grpc/client/probe.go | 12 + .../grpc/server/bpf/probe.bpf.c | 11 +- internal/test/e2e/grpc/traces.json | 6 +- internal/test/e2e/grpc/verify.bats | 33 ++- internal/tools/inspect/cmd/offsetgen/main.go | 1 + 10 files changed, 283 insertions(+), 16 deletions(-) diff --git a/Makefile b/Makefile index 4094fb139..69f328b2c 100644 --- a/Makefile +++ b/Makefile @@ -126,8 +126,8 @@ offsets: | $(OFFSETGEN) $(OFFSETGEN) -output=$(OFFSETS_OUTPUT_FILE) -cache=$(OFFSETS_OUTPUT_FILE) .PHONY: docker-offsets -docker-offsets: - docker run -e DOCKER_USERNAME=$(DOCKER_USERNAME) -e DOCKER_PASSWORD=$(DOCKER_PASSWORD) --rm -v /tmp:/tmp -v /var/run/docker.sock:/var/run/docker.sock -v $(shell pwd):/app golang:1.22 /bin/sh -c "cd ../app && make offsets" +docker-offsets: docker-build-base + docker run -e DOCKER_USERNAME=$(DOCKER_USERNAME) -e DOCKER_PASSWORD=$(DOCKER_PASSWORD) --rm -v /tmp:/tmp -v /var/run/docker.sock:/var/run/docker.sock -v $(shell pwd):/app $(IMG_NAME_BASE) /bin/sh -c "cd ../app && make offsets" .PHONY: update-licenses update-licenses: generate $(GOLICENSES) diff --git a/internal/pkg/inject/offset_results.json b/internal/pkg/inject/offset_results.json index 684c9c090..b607d418b 100644 --- a/internal/pkg/inject/offset_results.json +++ b/internal/pkg/inject/offset_results.json @@ -1878,6 +1878,223 @@ ] } ] + }, + { + "field": "Message", + "offsets": [ + { + "offset": null, + "versions": [ + "1.0.0", + "1.0.1-GA", + "1.0.2", + "1.0.3", + "1.0.4", + "1.0.5", + "1.2.0", + "1.2.1", + "1.3.0", + "1.4.0", + "1.4.1", + "1.4.2", + "1.5.0", + "1.5.1", + "1.5.2", + "1.6.0", + "1.7.0", + "1.7.1", + "1.7.2", + "1.7.3", + "1.7.4", + "1.7.5", + "1.64.0" + ] + }, + { + "offset": 8, + "versions": [ + "1.15.0", + "1.16.0", + "1.17.0", + "1.18.0", + "1.18.1", + "1.19.0", + "1.19.1", + "1.20.0", + "1.20.1", + "1.21.0", + "1.21.1", + "1.21.2", + "1.21.3", + "1.21.4", + "1.22.0", + "1.22.1", + "1.22.2", + "1.22.3", + "1.23.0", + "1.23.1", + "1.24.0", + "1.25.0", + "1.25.1", + "1.26.0", + "1.27.0-pre", + "1.27.0", + "1.27.1", + "1.28.0-pre", + "1.28.0", + "1.28.1", + "1.29.0-dev", + "1.29.0", + "1.29.1", + "1.30.0-dev", + "1.30.0-dev.1", + "1.30.0", + "1.30.1", + "1.31.0-dev", + "1.31.0", + "1.31.1", + "1.32.0-dev", + "1.32.0", + "1.33.0-dev", + "1.33.0", + "1.33.1", + "1.34.0-dev" + ] + }, + { + "offset": 48, + "versions": [ + "1.8.0", + "1.8.2", + "1.9.0", + "1.9.1", + "1.9.2", + "1.10.0", + "1.10.1", + "1.11.0", + "1.11.1", + "1.11.2", + "1.11.3", + "1.12.0", + "1.12.1", + "1.12.2", + "1.13.0", + "1.14.0", + "1.33.2", + "1.33.3", + "1.34.0", + "1.34.1", + "1.34.2", + "1.35.0-dev", + "1.35.0", + "1.35.1", + "1.36.0-dev", + "1.36.0", + "1.36.1", + "1.37.0-dev", + "1.37.0", + "1.37.1", + "1.38.0-dev", + "1.38.0", + "1.38.1", + "1.39.0-dev", + "1.39.0", + "1.39.1", + "1.40.0-dev", + "1.40.0", + "1.40.1", + "1.41.0-dev", + "1.41.0", + "1.41.1", + "1.42.0-dev", + "1.42.0", + "1.43.0-dev", + "1.43.0", + "1.44.0-dev", + "1.44.0", + "1.45.0-dev", + "1.45.0", + "1.46.0-dev", + "1.46.0", + "1.46.1", + "1.46.2", + "1.47.0-dev", + "1.47.0", + "1.48.0-dev", + "1.48.0", + "1.49.0-dev", + "1.49.0", + "1.50.0-dev", + "1.50.0", + "1.50.1", + "1.51.0-dev", + "1.51.0", + "1.52.0-dev", + "1.52.0", + "1.52.1", + "1.52.3", + "1.53.0-dev", + "1.53.0", + "1.54.0", + "1.54.1", + "1.55.0-dev", + "1.55.0", + "1.55.1", + "1.56.0-dev", + "1.56.0", + "1.56.1", + "1.56.2", + "1.56.3", + "1.57.0-dev", + "1.57.0", + "1.57.1", + "1.57.2", + "1.58.0-dev", + "1.58.0", + "1.58.1", + "1.58.2", + "1.58.3", + "1.59.0-dev", + "1.59.0", + "1.60.0-dev", + "1.60.0", + "1.60.1", + "1.61.0-dev", + "1.61.0", + "1.61.1", + "1.61.2", + "1.62.0", + "1.62.1", + "1.62.2", + "1.63.0", + "1.63.1", + "1.63.2", + "1.63.3", + "1.64.1", + "1.65.0-dev", + "1.65.0", + "1.65.1", + "1.66.0-dev", + "1.66.0", + "1.66.1", + "1.66.2", + "1.66.3", + "1.67.0-dev", + "1.67.0", + "1.67.1", + "1.67.2", + "1.67.3", + "1.68.0-dev", + "1.68.0", + "1.68.1", + "1.68.2", + "1.69.0-dev", + "1.69.0", + "1.69.2", + "1.70.0-dev" + ] + } + ] } ] } diff --git a/internal/pkg/instrumentation/bpf/google.golang.org/grpc/client/bpf/probe.bpf.c b/internal/pkg/instrumentation/bpf/google.golang.org/grpc/client/bpf/probe.bpf.c index 8d8c5acec..d3b06433e 100644 --- a/internal/pkg/instrumentation/bpf/google.golang.org/grpc/client/bpf/probe.bpf.c +++ b/internal/pkg/instrumentation/bpf/google.golang.org/grpc/client/bpf/probe.bpf.c @@ -12,10 +12,12 @@ char __license[] SEC("license") = "Dual MIT/GPL"; #define MAX_SIZE 50 #define MAX_CONCURRENT 50 +#define MAX_ERROR_LEN 128 struct grpc_request_t { BASE_SPAN_PROPERTIES + char err_msg[MAX_ERROR_LEN]; char method[MAX_SIZE]; char target[MAX_SIZE]; u32 status_code; @@ -51,6 +53,7 @@ volatile const u64 headerFrame_streamid_pos; volatile const u64 headerFrame_hf_pos; volatile const u64 error_status_pos; volatile const u64 status_s_pos; +volatile const u64 status_message_pos; volatile const u64 status_code_pos; volatile const bool write_status_supported; @@ -139,6 +142,11 @@ int uprobe_ClientConn_Invoke_Returns(struct pt_regs *ctx) { // s *Status // } // The `Status` proto object contains a `Code` int32 field, which is what we want + // type Status struct { + // Code int32 + // Message string + // Details []*anypb.Any + // } void *resp_ptr = get_argument(ctx, 2); if(resp_ptr == 0) { // err == nil @@ -152,6 +160,7 @@ int uprobe_ClientConn_Invoke_Returns(struct pt_regs *ctx) { bpf_probe_read_user(&s_ptr, sizeof(s_ptr), (void *)(status_ptr + status_s_pos)); // Get status code from Status.s pointer bpf_probe_read_user(&grpc_span->status_code, sizeof(grpc_span->status_code), (void *)(s_ptr + status_code_pos)); + get_go_string_from_user_ptr((void *)(s_ptr + status_message_pos), grpc_span->err_msg, sizeof(grpc_span->err_msg)); done: grpc_span->end_time = bpf_ktime_get_ns(); diff --git a/internal/pkg/instrumentation/bpf/google.golang.org/grpc/client/bpf_arm64_bpfel.go b/internal/pkg/instrumentation/bpf/google.golang.org/grpc/client/bpf_arm64_bpfel.go index 20f55781f..0fd79ae6e 100644 --- a/internal/pkg/instrumentation/bpf/google.golang.org/grpc/client/bpf_arm64_bpfel.go +++ b/internal/pkg/instrumentation/bpf/google.golang.org/grpc/client/bpf_arm64_bpfel.go @@ -17,6 +17,7 @@ type bpfGrpcRequestT struct { EndTime uint64 Sc bpfSpanContext Psc bpfSpanContext + ErrMsg [128]int8 Method [50]int8 Target [50]int8 StatusCode uint32 @@ -108,6 +109,7 @@ type bpfVariableSpecs struct { IsRegistersAbi *ebpf.VariableSpec `ebpf:"is_registers_abi"` StartAddr *ebpf.VariableSpec `ebpf:"start_addr"` StatusCodePos *ebpf.VariableSpec `ebpf:"status_code_pos"` + StatusMessagePos *ebpf.VariableSpec `ebpf:"status_message_pos"` StatusS_pos *ebpf.VariableSpec `ebpf:"status_s_pos"` TotalCpus *ebpf.VariableSpec `ebpf:"total_cpus"` WriteStatusSupported *ebpf.VariableSpec `ebpf:"write_status_supported"` @@ -172,6 +174,7 @@ type bpfVariables struct { IsRegistersAbi *ebpf.Variable `ebpf:"is_registers_abi"` StartAddr *ebpf.Variable `ebpf:"start_addr"` StatusCodePos *ebpf.Variable `ebpf:"status_code_pos"` + StatusMessagePos *ebpf.Variable `ebpf:"status_message_pos"` StatusS_pos *ebpf.Variable `ebpf:"status_s_pos"` TotalCpus *ebpf.Variable `ebpf:"total_cpus"` WriteStatusSupported *ebpf.Variable `ebpf:"write_status_supported"` diff --git a/internal/pkg/instrumentation/bpf/google.golang.org/grpc/client/bpf_x86_bpfel.go b/internal/pkg/instrumentation/bpf/google.golang.org/grpc/client/bpf_x86_bpfel.go index dac396828..a50b98736 100644 --- a/internal/pkg/instrumentation/bpf/google.golang.org/grpc/client/bpf_x86_bpfel.go +++ b/internal/pkg/instrumentation/bpf/google.golang.org/grpc/client/bpf_x86_bpfel.go @@ -17,6 +17,7 @@ type bpfGrpcRequestT struct { EndTime uint64 Sc bpfSpanContext Psc bpfSpanContext + ErrMsg [128]int8 Method [50]int8 Target [50]int8 StatusCode uint32 @@ -108,6 +109,7 @@ type bpfVariableSpecs struct { IsRegistersAbi *ebpf.VariableSpec `ebpf:"is_registers_abi"` StartAddr *ebpf.VariableSpec `ebpf:"start_addr"` StatusCodePos *ebpf.VariableSpec `ebpf:"status_code_pos"` + StatusMessagePos *ebpf.VariableSpec `ebpf:"status_message_pos"` StatusS_pos *ebpf.VariableSpec `ebpf:"status_s_pos"` TotalCpus *ebpf.VariableSpec `ebpf:"total_cpus"` WriteStatusSupported *ebpf.VariableSpec `ebpf:"write_status_supported"` @@ -172,6 +174,7 @@ type bpfVariables struct { IsRegistersAbi *ebpf.Variable `ebpf:"is_registers_abi"` StartAddr *ebpf.Variable `ebpf:"start_addr"` StatusCodePos *ebpf.Variable `ebpf:"status_code_pos"` + StatusMessagePos *ebpf.Variable `ebpf:"status_message_pos"` StatusS_pos *ebpf.Variable `ebpf:"status_s_pos"` TotalCpus *ebpf.Variable `ebpf:"total_cpus"` WriteStatusSupported *ebpf.Variable `ebpf:"write_status_supported"` diff --git a/internal/pkg/instrumentation/bpf/google.golang.org/grpc/client/probe.go b/internal/pkg/instrumentation/bpf/google.golang.org/grpc/client/probe.go index 680f30eb2..90bbeb7bb 100644 --- a/internal/pkg/instrumentation/bpf/google.golang.org/grpc/client/probe.go +++ b/internal/pkg/instrumentation/bpf/google.golang.org/grpc/client/probe.go @@ -102,6 +102,13 @@ func New(logger *slog.Logger, version string) probe.Probe { }, MinVersion: writeStatusMinVersion, }, + probe.StructFieldConstMinVersion{ + StructField: probe.StructFieldConst{ + Key: "status_message_pos", + Val: structfield.NewID("google.golang.org/grpc", "google.golang.org/genproto/googleapis/rpc/status", "Status", "Message"), + }, + MinVersion: writeStatusMinVersion, + }, }, Uprobes: []probe.Uprobe{ { @@ -137,6 +144,7 @@ func verifyAndLoadBpf() (*ebpf.CollectionSpec, error) { // event represents an event in the gRPC client during a gRPC request. type event struct { context.BaseSpanProperties + ErrMsg [128]byte Method [50]byte Target [50]byte StatusCode int32 @@ -184,6 +192,10 @@ func processFn(e *event) ptrace.SpanSlice { if writeStatus && e.StatusCode > 0 { span.Status().SetCode(ptrace.StatusCodeError) + errMsg := unix.ByteSliceToString(e.ErrMsg[:]) + if errMsg != "" { + span.Status().SetMessage(errMsg) + } } return spans diff --git a/internal/pkg/instrumentation/bpf/google.golang.org/grpc/server/bpf/probe.bpf.c b/internal/pkg/instrumentation/bpf/google.golang.org/grpc/server/bpf/probe.bpf.c index 8aa593da2..3f105ef99 100644 --- a/internal/pkg/instrumentation/bpf/google.golang.org/grpc/server/bpf/probe.bpf.c +++ b/internal/pkg/instrumentation/bpf/google.golang.org/grpc/server/bpf/probe.bpf.c @@ -273,9 +273,12 @@ SEC("uprobe/server_handleStream2") int uprobe_server_handleStream2_Returns(struct pt_regs *ctx) { u64 server_stream_pos = 4; void *server_stream_ptr = get_argument(ctx, server_stream_pos); + struct go_iface go_context = {0}; + void *key = NULL; if (server_stream_ptr == NULL) { - bpf_printk("grpc:server:uprobe/server_handleStream2Return: failed to get ServerStream arg"); - return -1; + // we might fail get the pointer for versions of go which use register ABI, as this function does not return anything + // this is not an error in that case so we can just go to the lookup which will happen by goroutine + goto lookup; } void *stream_ptr; @@ -285,7 +288,6 @@ int uprobe_server_handleStream2_Returns(struct pt_regs *ctx) { return -2; } - struct go_iface go_context = {0}; rc = bpf_probe_read_user(&go_context.type, sizeof(go_context.type), (void *)(stream_ptr + stream_ctx_pos)); if (rc != 0) { bpf_printk("grpc:server:uprobe/server_handleStream2Return: failed to read context type"); @@ -298,7 +300,8 @@ int uprobe_server_handleStream2_Returns(struct pt_regs *ctx) { return -4; } - void *key = get_consistent_key(ctx, go_context.data); +lookup: + key = get_consistent_key(ctx, go_context.data); struct grpc_request_t *event = bpf_map_lookup_elem(&grpc_events, &key); if (event == NULL) { bpf_printk("grpc:server:uprobe/server_handleStream2Return: event is NULL"); diff --git a/internal/test/e2e/grpc/traces.json b/internal/test/e2e/grpc/traces.json index 9aefd373d..82bdb6e71 100644 --- a/internal/test/e2e/grpc/traces.json +++ b/internal/test/e2e/grpc/traces.json @@ -232,7 +232,8 @@ "parentSpanId": "", "spanId": "xxxxx", "status": { - "code": 2 + "code": 2, + "message": "unimplmented" }, "traceId": "xxxxx" }, @@ -281,7 +282,8 @@ "parentSpanId": "", "spanId": "xxxxx", "status": { - "code": 2 + "code": 2, + "message": "connection error: desc = \"transport: Error while dialing: dial tcp 127.0.0.1:1701: connect: connection refused\"" }, "traceId": "xxxxx" } diff --git a/internal/test/e2e/grpc/verify.bats b/internal/test/e2e/grpc/verify.bats index 12b724844..0eca9048b 100644 --- a/internal/test/e2e/grpc/verify.bats +++ b/internal/test/e2e/grpc/verify.bats @@ -112,11 +112,6 @@ SCOPE="go.opentelemetry.io/auto/google.golang.org/grpc" assert_equal "$client_span_id" "$server_parent_span_id" } -@test "server :: expected (redacted) trace output" { - redact_json - assert_equal "$(git --no-pager diff ${BATS_TEST_DIRNAME}/traces.json)" "" -} - @test "client :: emits a span name 'SayHello'" { result=$(client_span_names_for ${SCOPE} | uniq) assert_equal "$result" '"/helloworld.Greeter/SayHello"' @@ -151,8 +146,30 @@ SCOPE="go.opentelemetry.io/auto/google.golang.org/grpc" assert_regex "$span_id" ${MATCH_A_SPAN_ID} } -@test "client :: error code is present for unsuccessful span" { +@test "client :: error code is present" { + result=$(client_span_attributes_for ${SCOPE} | jq -r "select(.key == \"rpc.grpc.status_code\").value.intValue" | sort -u) + # gRPC error code 0 - OK + # gRPC error code 12 - Unimplemented # gRPC error code 14 - Unavailable - result=$(client_span_attributes_for ${SCOPE} | jq 'select(.key == "rpc.grpc.status_code" and .value.intValue == "14")') - assert_not_empty "$result" + + expected_result=$(printf "%s\n" "0" "12" "14") + assert_equal "$result" "$expected_result" +} + +@test "client :: includes status message" { + result=$(client_spans_from_scope_named ${SCOPE} | jq -r ' + select(.status.message != null) + | .status.message + ' | sort -u) + + expected_result=$(printf "%s\n" \ + "connection error: desc = \"transport: Error while dialing: dial tcp 127.0.0.1:1701: connect: connection refused\"" \ + "unimplmented") + + assert_equal "$result" "$expected_result" +} + +@test "server :: expected (redacted) trace output" { + redact_json + assert_equal "$(git --no-pager diff ${BATS_TEST_DIRNAME}/traces.json)" "" } diff --git a/internal/tools/inspect/cmd/offsetgen/main.go b/internal/tools/inspect/cmd/offsetgen/main.go index 7b919474b..17f841f8a 100644 --- a/internal/tools/inspect/cmd/offsetgen/main.go +++ b/internal/tools/inspect/cmd/offsetgen/main.go @@ -144,6 +144,7 @@ func manifests() ([]inspect.Manifest, error) { structfield.NewID("google.golang.org/grpc", "google.golang.org/grpc/internal/status", "Error", "s"), structfield.NewID("google.golang.org/grpc", "google.golang.org/grpc/internal/status", "Status", "s"), structfield.NewID("google.golang.org/grpc", "google.golang.org/genproto/googleapis/rpc/status", "Status", "Code"), + structfield.NewID("google.golang.org/grpc", "google.golang.org/genproto/googleapis/rpc/status", "Status", "Message"), structfield.NewID("google.golang.org/grpc", "google.golang.org/grpc/internal/transport", "http2Server", "peer"), structfield.NewID("google.golang.org/grpc", "google.golang.org/grpc/peer", "Peer", "LocalAddr"), }, From 16e579c87f140d807fea32ec6952d932b28adc93 Mon Sep 17 00:00:00 2001 From: Ron Federman Date: Fri, 3 Jan 2025 17:18:06 +0200 Subject: [PATCH 2/3] Add changelog entry --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9b45aad49..ccdadceb1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ OpenTelemetry Go Automatic Instrumentation adheres to [Semantic Versioning](http - Support `google.golang.org/grpc` `1.68.2`. ([#1462](https://github.com/open-telemetry/opentelemetry-go-instrumentation/pull/1462)) - Support `google.golang.org/grpc` `1.69.2`. ([#1467](https://github.com/open-telemetry/opentelemetry-go-instrumentation/pull/1467)) - Support `golang.org/x/net` `0.33.0`. ([#1471](https://github.com/open-telemetry/opentelemetry-go-instrumentation/pull/1471)) +- Include gRPC error message for client spans. ([#1528](https://github.com/open-telemetry/opentelemetry-go-instrumentation/pull/1528)) ## [v0.19.0-alpha] - 2024-12-05 From 22961401aa0c5cc4fbf53a9ba9d624af315db8cb Mon Sep 17 00:00:00 2001 From: Ron Federman <73110295+RonFed@users.noreply.github.com> Date: Mon, 6 Jan 2025 09:52:28 +0200 Subject: [PATCH 3/3] Apply suggestions from code review Co-authored-by: Tyler Yahn --- .../bpf/google.golang.org/grpc/client/bpf/probe.bpf.c | 6 +++--- .../bpf/google.golang.org/grpc/server/bpf/probe.bpf.c | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/internal/pkg/instrumentation/bpf/google.golang.org/grpc/client/bpf/probe.bpf.c b/internal/pkg/instrumentation/bpf/google.golang.org/grpc/client/bpf/probe.bpf.c index d3b06433e..39284e01d 100644 --- a/internal/pkg/instrumentation/bpf/google.golang.org/grpc/client/bpf/probe.bpf.c +++ b/internal/pkg/instrumentation/bpf/google.golang.org/grpc/client/bpf/probe.bpf.c @@ -143,9 +143,9 @@ int uprobe_ClientConn_Invoke_Returns(struct pt_regs *ctx) { // } // The `Status` proto object contains a `Code` int32 field, which is what we want // type Status struct { - // Code int32 - // Message string - // Details []*anypb.Any + // Code int32 + // Message string + // Details []*anypb.Any // } void *resp_ptr = get_argument(ctx, 2); if(resp_ptr == 0) { diff --git a/internal/pkg/instrumentation/bpf/google.golang.org/grpc/server/bpf/probe.bpf.c b/internal/pkg/instrumentation/bpf/google.golang.org/grpc/server/bpf/probe.bpf.c index 3f105ef99..d59f392a8 100644 --- a/internal/pkg/instrumentation/bpf/google.golang.org/grpc/server/bpf/probe.bpf.c +++ b/internal/pkg/instrumentation/bpf/google.golang.org/grpc/server/bpf/probe.bpf.c @@ -276,8 +276,8 @@ int uprobe_server_handleStream2_Returns(struct pt_regs *ctx) { struct go_iface go_context = {0}; void *key = NULL; if (server_stream_ptr == NULL) { - // we might fail get the pointer for versions of go which use register ABI, as this function does not return anything - // this is not an error in that case so we can just go to the lookup which will happen by goroutine + // We might fail to get the pointer for versions of Go which use register ABI, as this function does not return anything. + // This is not an error in that case so we can just go to the lookup which will happen by goroutine. goto lookup; }