From 9f269ed825b7602025ce6dcce709cf577949a806 Mon Sep 17 00:00:00 2001 From: Matthew Johnson Date: Thu, 1 Feb 2024 18:29:14 -0500 Subject: [PATCH] add net.perf.conntrackAllowanceAvailable metric In a recent incident, the net.perf.conntrackAllowanceExceeded was a strong signal for the cause. Adding the conntrackAllowanceAvailable metric will allow it to be used as an input to auto-scaling policies. --- lib/ethtool.h | 13 +++++++++++++ lib/ethtool_test.cc | 8 ++++++-- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/lib/ethtool.h b/lib/ethtool.h index f6b3f81..049a1a9 100644 --- a/lib/ethtool.h +++ b/lib/ethtool.h @@ -89,6 +89,19 @@ class Ethtool { continue; } + found = stat_line.find("conntrack_allowance_available:"); + if (found != std::string::npos) { + auto metric = registry_->GetGauge(id_for("net.perf.conntrackAllowanceAvailable", iface, nullptr, net_tags_)); + std::vector stat_fields = absl::StrSplit(stat_line, ':'); + try { + auto number = std::stoll(stat_fields[1]); + metric->Set(number); + } catch (const std::invalid_argument& e) { + atlasagent::Logger()->error("Unable to parse {} as a number: {}", stat_fields[1], e.what()); + } + continue; + } + found = stat_line.find("linklocal_allowance_exceeded:"); if (found != std::string::npos) { auto metric = registry_->GetMonotonicCounter(id_for("net.perf.linklocalAllowanceExceeded", iface, nullptr, net_tags_)); diff --git a/lib/ethtool_test.cc b/lib/ethtool_test.cc index 01d2994..5b088be 100644 --- a/lib/ethtool_test.cc +++ b/lib/ethtool_test.cc @@ -35,13 +35,15 @@ TEST(Ethtool, Stats) { " bw_out_allowance_exceeded: 0\n", " pps_allowance_exceeded: 0\n", " conntrack_allowance_exceeded: 0\n", + " conntrack_allowance_available: 100\n", " linklocal_allowance_exceeded: 0\n", " queue_0_tx_cnt: 368940\n", " queue_0_tx_bytes: 126196057\n"}; ethtool.stats(first_sample, "eth0"); auto ms = registry.Measurements(); - EXPECT_EQ(ms.size(), 0); + // one gauge, the rest mono counters + EXPECT_EQ(ms.size(), 1); std::vector second_sample = { "NIC statistics:\n", @@ -50,6 +52,7 @@ TEST(Ethtool, Stats) { " bw_in_allowance_exceeded: 5\n", " bw_out_allowance_exceeded: 10\n", " conntrack_allowance_exceeded: 15\n", + " conntrack_allowance_available: 110\n", " linklocal_allowance_exceeded: 20\n", " pps_allowance_exceeded: 25\n", " queue_0_tx_cnt: 368940\n", @@ -58,13 +61,14 @@ TEST(Ethtool, Stats) { // we need two samples, because these are all monotonic counters ethtool.stats(second_sample, "eth0"); ms = registry.Measurements(); - EXPECT_EQ(ms.size(), 5); + EXPECT_EQ(ms.size(), 6); auto map = measurements_to_map(ms, ""); std::unordered_map expected = { {"net.perf.bwAllowanceExceeded|count|in", 5}, {"net.perf.bwAllowanceExceeded|count|out", 10}, {"net.perf.conntrackAllowanceExceeded|count", 15}, + {"net.perf.conntrackAllowanceAvailable|gauge", 110}, {"net.perf.linklocalAllowanceExceeded|count", 20}, {"net.perf.ppsAllowanceExceeded|count", 25}}; EXPECT_EQ(map, expected);