diff --git a/src/VirtualClient/VirtualClient.Main/profiles/MONITORS-GPU-AMD.json b/src/VirtualClient/VirtualClient.Main/profiles/MONITORS-GPU-AMD.json index 628acf1569..ae40144fcf 100644 --- a/src/VirtualClient/VirtualClient.Main/profiles/MONITORS-GPU-AMD.json +++ b/src/VirtualClient/VirtualClient.Main/profiles/MONITORS-GPU-AMD.json @@ -1,7 +1,7 @@ { "Description": "Default Monitors for AMD GPU systems.", "Metadata": { - "SupportedPlatforms": "linux-x64,win-x64", + "SupportedPlatforms": "linux-x64, win-x64", "SupportedOperatingSystems": "CBL-Mariner,CentOS,Debian,RedHat,Suse,Ubuntu,Windows" }, "Parameters": { @@ -12,9 +12,11 @@ { "Type": "AmdSmiMonitor", "Parameters": { - "Scenario": "AmdGpuCounters", - "MonitorFrequency": "$.Parameters.MonitorFrequency", - "MonitorWarmupPeriod": "$.Parameters.MonitorWarmupPeriod" + "Scenario": "AmdGpuCounters", + "SubsystemMetric": true, + "SubsystemXgmi": true, + "MonitorFrequency": "$.Parameters.MonitorFrequency", + "MonitorWarmupPeriod": "$.Parameters.MonitorWarmupPeriod" } }, { diff --git a/src/VirtualClient/VirtualClient.Monitors.UnitTests/Amd-Smi/AmdSmiMetricsParserTests.cs b/src/VirtualClient/VirtualClient.Monitors.UnitTests/Amd-Smi/AmdSmiMetricsParserTests.cs new file mode 100644 index 0000000000..4955359eeb --- /dev/null +++ b/src/VirtualClient/VirtualClient.Monitors.UnitTests/Amd-Smi/AmdSmiMetricsParserTests.cs @@ -0,0 +1,41 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +namespace VirtualClient.Monitors.UnitTests.Amd_Smi +{ + using System.Collections.Generic; + using System.IO; + using System.Reflection; + using NUnit.Framework; + using VirtualClient.Contracts; + using VirtualClient.Monitors.Amd_Smi; + + [TestFixture] + [Category("Unit")] + public class AmdSmiMetricsParserTests + { + [Test] + public void AmdSmiMetricsParserTest() + { + string workingDirectory = Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location); + string outputPath = Path.Combine(workingDirectory, "Examples", "amd-smi", "metrics.txt"); + string rawText = File.ReadAllText(outputPath); + string gpuId = "0"; + + AmdSmiMetricsParser testParser = new AmdSmiMetricsParser(rawText); + IList metrics = testParser.Parse(); + + MetricAssert.Exists(metrics, $"GFX_ACTIVITY_GPU{gpuId}", 0, "%"); + MetricAssert.Exists(metrics, $"UMC_ACTIVITY_GPU{gpuId}", 0, "%"); + MetricAssert.Exists(metrics, $"MM_ACTIVITY_GPU{gpuId}", -1, ""); // N/A → -1 + MetricAssert.Exists(metrics, $"SOCKET_POWER_GPU{gpuId}", 137, "W"); + MetricAssert.Exists(metrics, $"GFX_VOLTAGE_GPU{gpuId}", -1, "V"); // N/A → -1 + MetricAssert.Exists(metrics, $"SOC_VOLTAGE_GPU{gpuId}", -1, "V"); // N/A → -1 + MetricAssert.Exists(metrics, $"MEM_VOLTAGE_GPU{gpuId}", -1, "V"); // N/A → -1 + MetricAssert.Exists(metrics, $"POWER_MANAGEMENT_GPU{gpuId}", -1, ""); // ENABLED → 1 + MetricAssert.Exists(metrics, $"TEMPERATURE_EDGE_GPU{gpuId}", -1, "C"); + MetricAssert.Exists(metrics, $"TEMPERATURE_HOTSPOT_GPU{gpuId}", 38, "C"); + MetricAssert.Exists(metrics, $"TEMPERATURE_MEM_GPU{gpuId}", 31, "C"); + } + } +} \ No newline at end of file diff --git a/src/VirtualClient/VirtualClient.Monitors.UnitTests/Amd-Smi/AmdSmiXGMIQueryGpuParserUnitTests.cs b/src/VirtualClient/VirtualClient.Monitors.UnitTests/Amd-Smi/AmdSmiXGMIQueryGpuParserUnitTests.cs new file mode 100644 index 0000000000..b5d94ca668 --- /dev/null +++ b/src/VirtualClient/VirtualClient.Monitors.UnitTests/Amd-Smi/AmdSmiXGMIQueryGpuParserUnitTests.cs @@ -0,0 +1,40 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +namespace VirtualClient.Monitors +{ + using System.Collections.Generic; + using System.Diagnostics; + using System.IO; + using System.Linq; + using System.Reflection; + using System.Text; + using System.Threading.Tasks; + using NUnit.Framework; + using VirtualClient.Common; + using VirtualClient.Contracts; + + [TestFixture] + [Category("Unit")] + public class AmdSmiXGMIQueryGpuParserUnitTests + { + [Test] + public void AmdSmiXGMIQueryGpuParserParsesMetricsCorrectly() + { + string workingDirectory = Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location); + string outputPath = Path.Combine(workingDirectory, "Examples", "amd-smi", "xgmi-8xMI300X.json"); + string rawText = File.ReadAllText(outputPath); + AmdSmiXGMIQueryGpuParser testParser = new AmdSmiXGMIQueryGpuParser(rawText); + IList metrics = testParser.Parse(); + Assert.AreEqual(8, metrics.Count); + MetricAssert.Exists(metrics, "xgmi_0_data", 14, "KB"); + MetricAssert.Exists(metrics, "xgmi_1_data", 12, "KB"); + MetricAssert.Exists(metrics, "xgmi_2_data", 10, "KB"); + MetricAssert.Exists(metrics, "xgmi_3_data", 9, "KB"); + MetricAssert.Exists(metrics, "xgmi_4_data", 9, "KB"); + MetricAssert.Exists(metrics, "xgmi_5_data", 8, "KB"); + MetricAssert.Exists(metrics, "xgmi_6_data", 6, "KB"); + MetricAssert.Exists(metrics, "xgmi_7_data", 6, "KB"); + } + } +} \ No newline at end of file diff --git a/src/VirtualClient/VirtualClient.Monitors.UnitTests/AmdSmiQueryGpuParserUnitTests.cs b/src/VirtualClient/VirtualClient.Monitors.UnitTests/AmdSmiQueryGpuParserUnitTests.cs deleted file mode 100644 index bba3396138..0000000000 --- a/src/VirtualClient/VirtualClient.Monitors.UnitTests/AmdSmiQueryGpuParserUnitTests.cs +++ /dev/null @@ -1,37 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. - -namespace VirtualClient.Monitors -{ - using System.Collections.Generic; - using System.Diagnostics; - using System.IO; - using System.Linq; - using System.Reflection; - using System.Text; - using System.Threading.Tasks; - using NUnit.Framework; - using VirtualClient.Common; - using VirtualClient.Contracts; - - [TestFixture] - [Category("Unit")] - public class AmdSmiQueryGpuParserUnitTests - { - [Test] - public void AmdSmiQueryGpuParserParsesMetricsCorrectly() - { - string workingDirectory = Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location); - string outputPath = Path.Combine(workingDirectory, "Examples", "amd-smi", "result.txt"); - string rawText = File.ReadAllText(outputPath); - - AmdSmiQueryGpuParser testParser = new AmdSmiQueryGpuParser(rawText); - IList metrics = testParser.Parse(); - - Assert.AreEqual(3, metrics.Count); - MetricAssert.Exists(metrics, "utilization.gpu [%]", 98, "%"); - MetricAssert.Exists(metrics, "framebuffer.total [MB]", 14928, "MB"); - MetricAssert.Exists(metrics, "framebuffer.used [MB]", 363, "MB"); - } - } -} \ No newline at end of file diff --git a/src/VirtualClient/VirtualClient.Monitors.UnitTests/Examples/amd-smi/UsageMetrics.csv b/src/VirtualClient/VirtualClient.Monitors.UnitTests/Examples/amd-smi/UsageMetrics.csv new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/VirtualClient/VirtualClient.Monitors.UnitTests/Examples/amd-smi/metric-8xMI300X.csv b/src/VirtualClient/VirtualClient.Monitors.UnitTests/Examples/amd-smi/metric-8xMI300X.csv new file mode 100644 index 0000000000..9b4dc629c0 --- /dev/null +++ b/src/VirtualClient/VirtualClient.Monitors.UnitTests/Examples/amd-smi/metric-8xMI300X.csv @@ -0,0 +1,9 @@ +gpu,gfx_activity,umc_activity,mm_activity,vcn_activity,jpeg_activity,socket_power,gfx_voltage,soc_voltage,mem_voltage,power_management,throttle_status,gfx_0_clk,gfx_0_min_clk,gfx_0_max_clk,gfx_0_clk_locked,gfx_0_deep_sleep,gfx_1_clk,gfx_1_min_clk,gfx_1_max_clk,gfx_1_clk_locked,gfx_1_deep_sleep,gfx_2_clk,gfx_2_min_clk,gfx_2_max_clk,gfx_2_clk_locked,gfx_2_deep_sleep,gfx_3_clk,gfx_3_min_clk,gfx_3_max_clk,gfx_3_clk_locked,gfx_3_deep_sleep,gfx_4_clk,gfx_4_min_clk,gfx_4_max_clk,gfx_4_clk_locked,gfx_4_deep_sleep,gfx_5_clk,gfx_5_min_clk,gfx_5_max_clk,gfx_5_clk_locked,gfx_5_deep_sleep,gfx_6_clk,gfx_6_min_clk,gfx_6_max_clk,gfx_6_clk_locked,gfx_6_deep_sleep,gfx_7_clk,gfx_7_min_clk,gfx_7_max_clk,gfx_7_clk_locked,gfx_7_deep_sleep,mem_0_clk,mem_0_min_clk,mem_0_max_clk,mem_0_clk_locked,mem_0_deep_sleep,vclk_0_clk,vclk_0_min_clk,vclk_0_max_clk,vclk_0_clk_locked,vclk_0_deep_sleep,vclk_1_clk,vclk_1_min_clk,vclk_1_max_clk,vclk_1_clk_locked,vclk_1_deep_sleep,vclk_2_clk,vclk_2_min_clk,vclk_2_max_clk,vclk_2_clk_locked,vclk_2_deep_sleep,vclk_3_clk,vclk_3_min_clk,vclk_3_max_clk,vclk_3_clk_locked,vclk_3_deep_sleep,dclk_0_clk,dclk_0_min_clk,dclk_0_max_clk,dclk_0_clk_locked,dclk_0_deep_sleep,dclk_1_clk,dclk_1_min_clk,dclk_1_max_clk,dclk_1_clk_locked,dclk_1_deep_sleep,dclk_2_clk,dclk_2_min_clk,dclk_2_max_clk,dclk_2_clk_locked,dclk_2_deep_sleep,dclk_3_clk,dclk_3_min_clk,dclk_3_max_clk,dclk_3_clk_locked,dclk_3_deep_sleep,edge,hotspot,mem,width,speed,bandwidth,replay_count,l0_to_recovery_count,replay_roll_over_count,nak_sent_count,nak_received_count,current_bandwidth_sent,current_bandwidth_received,max_packet_size,total_correctable_count,total_uncorrectable_count,total_deferred_count,cache_correctable_count,cache_uncorrectable_count,UMC_correctable_count,UMC_uncorrectable_count,UMC_deferred_count,SDMA_correctable_count,SDMA_uncorrectable_count,SDMA_deferred_count,GFX_correctable_count,GFX_uncorrectable_count,GFX_deferred_count,MMHUB_correctable_count,MMHUB_uncorrectable_count,MMHUB_deferred_count,PCIE_BIF_correctable_count,PCIE_BIF_uncorrectable_count,PCIE_BIF_deferred_count,HDP_correctable_count,HDP_uncorrectable_count,HDP_deferred_count,XGMI_WAFL_correctable_count,XGMI_WAFL_uncorrectable_count,XGMI_WAFL_deferred_count,max,rpm,usage,point_0_frequency,point_0_voltage,point_1_frequency,point_1_voltage,point_2_frequency,point_2_voltage,overdrive,perf_level,xgmi_err,total_energy_consumption,total_vram,used_vram,free_vram,total_visible_vram,used_visible_vram,free_visible_vram,total_gtt,used_gtt,free_gtt +0,0,0,N/A,"[0, 0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",133,N/A,N/A,N/A,ENABLED,UNTHROTTLED,132,500,2100,DISABLED,ENABLED,132,500,2100,DISABLED,ENABLED,132,500,2100,DISABLED,ENABLED,132,500,2100,DISABLED,ENABLED,132,500,2100,DISABLED,ENABLED,132,500,2100,DISABLED,ENABLED,132,500,2100,DISABLED,ENABLED,133,500,2100,DISABLED,ENABLED,900,900,1300,N/A,DISABLED,29,914,1333,N/A,ENABLED,29,914,1333,N/A,ENABLED,29,914,1333,N/A,ENABLED,29,914,1333,N/A,ENABLED,22,711,1143,N/A,ENABLED,22,711,1143,N/A,ENABLED,22,711,1143,N/A,ENABLED,22,711,1143,N/A,ENABLED,N/A,36,30,16,N/A,192,0,1,0,0,0,N/A,N/A,N/A,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,AMDSMI_DEV_PERF_LEVEL_AUTO,N/A,12119300.381,196592,283,196309,196592,283,196309,1031932,20,1031912 +1,0,0,N/A,"[0, 0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",139,N/A,N/A,N/A,ENABLED,UNTHROTTLED,132,500,2100,DISABLED,ENABLED,132,500,2100,DISABLED,ENABLED,133,500,2100,DISABLED,ENABLED,133,500,2100,DISABLED,ENABLED,132,500,2100,DISABLED,ENABLED,133,500,2100,DISABLED,ENABLED,132,500,2100,DISABLED,ENABLED,132,500,2100,DISABLED,ENABLED,900,900,1300,N/A,DISABLED,29,914,1333,N/A,ENABLED,29,914,1333,N/A,ENABLED,29,914,1333,N/A,ENABLED,29,914,1333,N/A,ENABLED,22,711,1143,N/A,ENABLED,22,711,1143,N/A,ENABLED,22,711,1143,N/A,ENABLED,22,711,1143,N/A,ENABLED,N/A,35,29,16,N/A,157,0,1,0,0,0,N/A,N/A,N/A,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,AMDSMI_DEV_PERF_LEVEL_AUTO,N/A,12648636.191,196592,283,196309,196592,283,196309,1031932,20,1031912 +2,0,0,N/A,"[0, 0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",133,N/A,N/A,N/A,ENABLED,UNTHROTTLED,132,500,2100,DISABLED,ENABLED,132,500,2100,DISABLED,ENABLED,132,500,2100,DISABLED,ENABLED,132,500,2100,DISABLED,ENABLED,132,500,2100,DISABLED,ENABLED,132,500,2100,DISABLED,ENABLED,132,500,2100,DISABLED,ENABLED,132,500,2100,DISABLED,ENABLED,900,900,1300,N/A,DISABLED,29,914,1333,N/A,ENABLED,29,914,1333,N/A,ENABLED,29,914,1333,N/A,ENABLED,29,914,1333,N/A,ENABLED,22,711,1143,N/A,ENABLED,22,711,1143,N/A,ENABLED,22,711,1143,N/A,ENABLED,22,711,1143,N/A,ENABLED,N/A,32,29,16,N/A,106,0,1,0,0,0,N/A,N/A,N/A,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,AMDSMI_DEV_PERF_LEVEL_AUTO,N/A,12142639.892,196592,283,196309,196592,283,196309,1031932,20,1031912 +3,0,0,N/A,"[0, 0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",132,N/A,N/A,N/A,ENABLED,UNTHROTTLED,132,500,2100,DISABLED,ENABLED,132,500,2100,DISABLED,ENABLED,132,500,2100,DISABLED,ENABLED,132,500,2100,DISABLED,ENABLED,132,500,2100,DISABLED,ENABLED,132,500,2100,DISABLED,ENABLED,132,500,2100,DISABLED,ENABLED,132,500,2100,DISABLED,ENABLED,900,900,1300,N/A,DISABLED,29,914,1333,N/A,ENABLED,29,914,1333,N/A,ENABLED,29,914,1333,N/A,ENABLED,29,914,1333,N/A,ENABLED,22,711,1143,N/A,ENABLED,22,711,1143,N/A,ENABLED,22,711,1143,N/A,ENABLED,22,711,1143,N/A,ENABLED,N/A,36,28,16,N/A,192,0,1,0,0,0,N/A,N/A,N/A,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,AMDSMI_DEV_PERF_LEVEL_AUTO,N/A,12086086.983,196592,283,196309,196592,283,196309,1031932,20,1031912 +4,0,0,N/A,"[0, 0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",136,N/A,N/A,N/A,ENABLED,UNTHROTTLED,132,500,2100,DISABLED,ENABLED,132,500,2100,DISABLED,ENABLED,132,500,2100,DISABLED,ENABLED,132,500,2100,DISABLED,ENABLED,132,500,2100,DISABLED,ENABLED,133,500,2100,DISABLED,ENABLED,133,500,2100,DISABLED,ENABLED,134,500,2100,DISABLED,ENABLED,900,900,1300,N/A,DISABLED,29,914,1333,N/A,ENABLED,29,914,1333,N/A,ENABLED,29,914,1333,N/A,ENABLED,29,914,1333,N/A,ENABLED,22,711,1143,N/A,ENABLED,22,711,1143,N/A,ENABLED,22,711,1143,N/A,ENABLED,22,711,1143,N/A,ENABLED,N/A,36,29,16,N/A,145,0,1,0,0,0,N/A,N/A,N/A,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,AMDSMI_DEV_PERF_LEVEL_AUTO,N/A,12403534.5,196592,283,196309,196592,283,196309,1031932,20,1031912 +5,0,0,N/A,"[0, 0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",132,N/A,N/A,N/A,ENABLED,UNTHROTTLED,132,500,2100,DISABLED,ENABLED,132,500,2100,DISABLED,ENABLED,132,500,2100,DISABLED,ENABLED,132,500,2100,DISABLED,ENABLED,132,500,2100,DISABLED,ENABLED,132,500,2100,DISABLED,ENABLED,132,500,2100,DISABLED,ENABLED,132,500,2100,DISABLED,ENABLED,900,900,1300,N/A,DISABLED,29,914,1333,N/A,ENABLED,29,914,1333,N/A,ENABLED,29,914,1333,N/A,ENABLED,29,914,1333,N/A,ENABLED,22,711,1143,N/A,ENABLED,22,711,1143,N/A,ENABLED,22,711,1143,N/A,ENABLED,22,711,1143,N/A,ENABLED,N/A,35,29,16,N/A,107,0,1,0,0,0,N/A,N/A,N/A,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,AMDSMI_DEV_PERF_LEVEL_AUTO,N/A,12011883.234,196592,283,196309,196592,283,196309,1031932,20,1031912 +6,0,0,N/A,"[0, 0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",132,N/A,N/A,N/A,ENABLED,UNTHROTTLED,132,500,2100,DISABLED,ENABLED,131,500,2100,DISABLED,ENABLED,132,500,2100,DISABLED,ENABLED,132,500,2100,DISABLED,ENABLED,132,500,2100,DISABLED,ENABLED,132,500,2100,DISABLED,ENABLED,132,500,2100,DISABLED,ENABLED,132,500,2100,DISABLED,ENABLED,900,900,1300,N/A,DISABLED,29,914,1333,N/A,ENABLED,29,914,1333,N/A,ENABLED,29,914,1333,N/A,ENABLED,29,914,1333,N/A,ENABLED,22,711,1143,N/A,ENABLED,22,711,1143,N/A,ENABLED,22,711,1143,N/A,ENABLED,22,711,1143,N/A,ENABLED,N/A,36,29,16,N/A,94,0,1,0,0,0,N/A,N/A,N/A,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,AMDSMI_DEV_PERF_LEVEL_AUTO,N/A,11987029.516,196592,283,196309,196592,283,196309,1031932,20,1031912 +7,0,0,N/A,"[0, 0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",134,N/A,N/A,N/A,ENABLED,UNTHROTTLED,132,500,2100,DISABLED,ENABLED,132,500,2100,DISABLED,ENABLED,132,500,2100,DISABLED,ENABLED,132,500,2100,DISABLED,ENABLED,132,500,2100,DISABLED,ENABLED,132,500,2100,DISABLED,ENABLED,132,500,2100,DISABLED,ENABLED,132,500,2100,DISABLED,ENABLED,900,900,1300,N/A,DISABLED,29,914,1333,N/A,ENABLED,29,914,1333,N/A,ENABLED,29,914,1333,N/A,ENABLED,29,914,1333,N/A,ENABLED,22,711,1143,N/A,ENABLED,22,711,1143,N/A,ENABLED,22,711,1143,N/A,ENABLED,22,711,1143,N/A,ENABLED,N/A,37,31,16,N/A,90,0,1,0,0,0,N/A,N/A,N/A,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,N/A,AMDSMI_DEV_PERF_LEVEL_AUTO,N/A,12193331.537,196592,283,196309,196592,283,196309,1031932,20,1031912 diff --git a/src/VirtualClient/VirtualClient.Monitors.UnitTests/Examples/amd-smi/metrics.txt b/src/VirtualClient/VirtualClient.Monitors.UnitTests/Examples/amd-smi/metrics.txt new file mode 100644 index 0000000000..820d9b5231 --- /dev/null +++ b/src/VirtualClient/VirtualClient.Monitors.UnitTests/Examples/amd-smi/metrics.txt @@ -0,0 +1,1167 @@ +GPU: 0 + USAGE: + GFX_ACTIVITY: 0 % + UMC_ACTIVITY: 0 % + MM_ACTIVITY: N/A + VCN_ACTIVITY: [0 %, 0 %, 0 %, 0 %] + JPEG_ACTIVITY: [N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, + N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, + N/A, N/A, N/A] + POWER: + SOCKET_POWER: 137 W + GFX_VOLTAGE: N/A mV + SOC_VOLTAGE: N/A mV + MEM_VOLTAGE: N/A mV + THROTTLE_STATUS: UNTHROTTLED + POWER_MANAGEMENT: ENABLED + CLOCK: + GFX_0: + CLK: 245 MHz + MIN_CLK: 263 MHz + MAX_CLK: 2100 MHz + CLK_LOCKED: DISABLED + DEEP_SLEEP: DISABLED + GFX_1: + CLK: 132 MHz + MIN_CLK: 263 MHz + MAX_CLK: 2100 MHz + CLK_LOCKED: DISABLED + DEEP_SLEEP: ENABLED + GFX_2: + CLK: 131 MHz + MIN_CLK: 263 MHz + MAX_CLK: 2100 MHz + CLK_LOCKED: DISABLED + DEEP_SLEEP: ENABLED + GFX_3: + CLK: 131 MHz + MIN_CLK: 263 MHz + MAX_CLK: 2100 MHz + CLK_LOCKED: DISABLED + DEEP_SLEEP: ENABLED + GFX_4: + CLK: 132 MHz + MIN_CLK: 263 MHz + MAX_CLK: 2100 MHz + CLK_LOCKED: DISABLED + DEEP_SLEEP: ENABLED + GFX_5: + CLK: 132 MHz + MIN_CLK: 263 MHz + MAX_CLK: 2100 MHz + CLK_LOCKED: DISABLED + DEEP_SLEEP: ENABLED + GFX_6: + CLK: 132 MHz + MIN_CLK: 263 MHz + MAX_CLK: 2100 MHz + CLK_LOCKED: DISABLED + DEEP_SLEEP: ENABLED + GFX_7: + CLK: 132 MHz + MIN_CLK: 263 MHz + MAX_CLK: 2100 MHz + CLK_LOCKED: DISABLED + DEEP_SLEEP: ENABLED + MEM_0: + CLK: 900 MHz + MIN_CLK: 900 MHz + MAX_CLK: 1300 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: DISABLED + VCLK_0: + CLK: 29 MHz + MIN_CLK: 914 MHz + MAX_CLK: 1333 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: ENABLED + VCLK_1: + CLK: 29 MHz + MIN_CLK: 914 MHz + MAX_CLK: 1333 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: ENABLED + VCLK_2: + CLK: 29 MHz + MIN_CLK: 914 MHz + MAX_CLK: 1333 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: ENABLED + VCLK_3: + CLK: 29 MHz + MIN_CLK: 914 MHz + MAX_CLK: 1333 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: ENABLED + DCLK_0: + CLK: 22 MHz + MIN_CLK: 711 MHz + MAX_CLK: 1143 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: ENABLED + DCLK_1: + CLK: 22 MHz + MIN_CLK: 711 MHz + MAX_CLK: 1143 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: ENABLED + DCLK_2: + CLK: 22 MHz + MIN_CLK: 711 MHz + MAX_CLK: 1143 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: ENABLED + DCLK_3: + CLK: 22 MHz + MIN_CLK: 711 MHz + MAX_CLK: 1143 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: ENABLED + TEMPERATURE: + EDGE: N/A + HOTSPOT: 38 °C + MEM: 31 °C + PCIE: + WIDTH: 16 + SPEED: 32 GT/s + BANDWIDTH: 4915 Mb/s + REPLAY_COUNT: 0 + L0_TO_RECOVERY_COUNT: 1 + REPLAY_ROLL_OVER_COUNT: 0 + NAK_SENT_COUNT: 0 + NAK_RECEIVED_COUNT: 0 + CURRENT_BANDWIDTH_SENT: N/A + CURRENT_BANDWIDTH_RECEIVED: N/A + MAX_PACKET_SIZE: N/A + MEM_USAGE: + TOTAL_VRAM: 196048 MB + USED_VRAM: 282 MB + FREE_VRAM: 195766 MB + TOTAL_VISIBLE_VRAM: 196048 MB + USED_VISIBLE_VRAM: 282 MB + FREE_VISIBLE_VRAM: 195766 MB + TOTAL_GTT: 885866 MB + USED_GTT: 17 MB + FREE_GTT: 885849 MB + +GPU: 1 + USAGE: + GFX_ACTIVITY: 0 % + UMC_ACTIVITY: 0 % + MM_ACTIVITY: N/A + VCN_ACTIVITY: [0 %, 0 %, 0 %, 0 %] + JPEG_ACTIVITY: [N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, + N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, + N/A, N/A, N/A] + POWER: + SOCKET_POWER: 137 W + GFX_VOLTAGE: N/A mV + SOC_VOLTAGE: N/A mV + MEM_VOLTAGE: N/A mV + THROTTLE_STATUS: UNTHROTTLED + POWER_MANAGEMENT: ENABLED + CLOCK: + GFX_0: + CLK: 232 MHz + MIN_CLK: 251 MHz + MAX_CLK: 2100 MHz + CLK_LOCKED: DISABLED + DEEP_SLEEP: DISABLED + GFX_1: + CLK: 132 MHz + MIN_CLK: 251 MHz + MAX_CLK: 2100 MHz + CLK_LOCKED: DISABLED + DEEP_SLEEP: ENABLED + GFX_2: + CLK: 132 MHz + MIN_CLK: 251 MHz + MAX_CLK: 2100 MHz + CLK_LOCKED: DISABLED + DEEP_SLEEP: ENABLED + GFX_3: + CLK: 132 MHz + MIN_CLK: 251 MHz + MAX_CLK: 2100 MHz + CLK_LOCKED: DISABLED + DEEP_SLEEP: ENABLED + GFX_4: + CLK: 132 MHz + MIN_CLK: 251 MHz + MAX_CLK: 2100 MHz + CLK_LOCKED: DISABLED + DEEP_SLEEP: ENABLED + GFX_5: + CLK: 132 MHz + MIN_CLK: 251 MHz + MAX_CLK: 2100 MHz + CLK_LOCKED: DISABLED + DEEP_SLEEP: ENABLED + GFX_6: + CLK: 132 MHz + MIN_CLK: 251 MHz + MAX_CLK: 2100 MHz + CLK_LOCKED: DISABLED + DEEP_SLEEP: ENABLED + GFX_7: + CLK: 132 MHz + MIN_CLK: 251 MHz + MAX_CLK: 2100 MHz + CLK_LOCKED: DISABLED + DEEP_SLEEP: ENABLED + MEM_0: + CLK: 900 MHz + MIN_CLK: 900 MHz + MAX_CLK: 1300 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: DISABLED + VCLK_0: + CLK: 29 MHz + MIN_CLK: 914 MHz + MAX_CLK: 1333 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: ENABLED + VCLK_1: + CLK: 29 MHz + MIN_CLK: 914 MHz + MAX_CLK: 1333 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: ENABLED + VCLK_2: + CLK: 29 MHz + MIN_CLK: 914 MHz + MAX_CLK: 1333 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: ENABLED + VCLK_3: + CLK: 29 MHz + MIN_CLK: 914 MHz + MAX_CLK: 1333 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: ENABLED + DCLK_0: + CLK: 22 MHz + MIN_CLK: 711 MHz + MAX_CLK: 1143 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: ENABLED + DCLK_1: + CLK: 22 MHz + MIN_CLK: 711 MHz + MAX_CLK: 1143 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: ENABLED + DCLK_2: + CLK: 22 MHz + MIN_CLK: 711 MHz + MAX_CLK: 1143 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: ENABLED + DCLK_3: + CLK: 22 MHz + MIN_CLK: 711 MHz + MAX_CLK: 1143 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: ENABLED + TEMPERATURE: + EDGE: N/A + HOTSPOT: 39 °C + MEM: 33 °C + PCIE: + WIDTH: 16 + SPEED: 32 GT/s + BANDWIDTH: 431 Mb/s + REPLAY_COUNT: 0 + L0_TO_RECOVERY_COUNT: 1 + REPLAY_ROLL_OVER_COUNT: 0 + NAK_SENT_COUNT: 0 + NAK_RECEIVED_COUNT: 0 + CURRENT_BANDWIDTH_SENT: N/A + CURRENT_BANDWIDTH_RECEIVED: N/A + MAX_PACKET_SIZE: N/A + MEM_USAGE: + TOTAL_VRAM: 196048 MB + USED_VRAM: 282 MB + FREE_VRAM: 195766 MB + TOTAL_VISIBLE_VRAM: 196048 MB + USED_VISIBLE_VRAM: 282 MB + FREE_VISIBLE_VRAM: 195766 MB + TOTAL_GTT: 885866 MB + USED_GTT: 17 MB + FREE_GTT: 885849 MB + +GPU: 2 + USAGE: + GFX_ACTIVITY: 0 % + UMC_ACTIVITY: 0 % + MM_ACTIVITY: N/A + VCN_ACTIVITY: [0 %, 0 %, 0 %, 0 %] + JPEG_ACTIVITY: [N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, + N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, + N/A, N/A, N/A] + POWER: + SOCKET_POWER: 138 W + GFX_VOLTAGE: N/A mV + SOC_VOLTAGE: N/A mV + MEM_VOLTAGE: N/A mV + THROTTLE_STATUS: UNTHROTTLED + POWER_MANAGEMENT: ENABLED + CLOCK: + GFX_0: + CLK: 247 MHz + MIN_CLK: 265 MHz + MAX_CLK: 2100 MHz + CLK_LOCKED: DISABLED + DEEP_SLEEP: DISABLED + GFX_1: + CLK: 132 MHz + MIN_CLK: 265 MHz + MAX_CLK: 2100 MHz + CLK_LOCKED: DISABLED + DEEP_SLEEP: ENABLED + GFX_2: + CLK: 132 MHz + MIN_CLK: 265 MHz + MAX_CLK: 2100 MHz + CLK_LOCKED: DISABLED + DEEP_SLEEP: ENABLED + GFX_3: + CLK: 132 MHz + MIN_CLK: 265 MHz + MAX_CLK: 2100 MHz + CLK_LOCKED: DISABLED + DEEP_SLEEP: ENABLED + GFX_4: + CLK: 132 MHz + MIN_CLK: 265 MHz + MAX_CLK: 2100 MHz + CLK_LOCKED: DISABLED + DEEP_SLEEP: ENABLED + GFX_5: + CLK: 132 MHz + MIN_CLK: 265 MHz + MAX_CLK: 2100 MHz + CLK_LOCKED: DISABLED + DEEP_SLEEP: ENABLED + GFX_6: + CLK: 132 MHz + MIN_CLK: 265 MHz + MAX_CLK: 2100 MHz + CLK_LOCKED: DISABLED + DEEP_SLEEP: ENABLED + GFX_7: + CLK: 133 MHz + MIN_CLK: 265 MHz + MAX_CLK: 2100 MHz + CLK_LOCKED: DISABLED + DEEP_SLEEP: ENABLED + MEM_0: + CLK: 900 MHz + MIN_CLK: 900 MHz + MAX_CLK: 1300 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: DISABLED + VCLK_0: + CLK: 29 MHz + MIN_CLK: 914 MHz + MAX_CLK: 1333 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: ENABLED + VCLK_1: + CLK: 29 MHz + MIN_CLK: 914 MHz + MAX_CLK: 1333 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: ENABLED + VCLK_2: + CLK: 29 MHz + MIN_CLK: 914 MHz + MAX_CLK: 1333 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: ENABLED + VCLK_3: + CLK: 29 MHz + MIN_CLK: 914 MHz + MAX_CLK: 1333 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: ENABLED + DCLK_0: + CLK: 22 MHz + MIN_CLK: 711 MHz + MAX_CLK: 1143 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: ENABLED + DCLK_1: + CLK: 22 MHz + MIN_CLK: 711 MHz + MAX_CLK: 1143 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: ENABLED + DCLK_2: + CLK: 22 MHz + MIN_CLK: 711 MHz + MAX_CLK: 1143 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: ENABLED + DCLK_3: + CLK: 22 MHz + MIN_CLK: 711 MHz + MAX_CLK: 1143 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: ENABLED + TEMPERATURE: + EDGE: N/A + HOTSPOT: 38 °C + MEM: 31 °C + PCIE: + WIDTH: 16 + SPEED: 32 GT/s + BANDWIDTH: 2196 Mb/s + REPLAY_COUNT: 0 + L0_TO_RECOVERY_COUNT: 1 + REPLAY_ROLL_OVER_COUNT: 0 + NAK_SENT_COUNT: 0 + NAK_RECEIVED_COUNT: 0 + CURRENT_BANDWIDTH_SENT: N/A + CURRENT_BANDWIDTH_RECEIVED: N/A + MAX_PACKET_SIZE: N/A + MEM_USAGE: + TOTAL_VRAM: 196048 MB + USED_VRAM: 282 MB + FREE_VRAM: 195766 MB + TOTAL_VISIBLE_VRAM: 196048 MB + USED_VISIBLE_VRAM: 282 MB + FREE_VISIBLE_VRAM: 195766 MB + TOTAL_GTT: 885866 MB + USED_GTT: 17 MB + FREE_GTT: 885849 MB + +GPU: 3 + USAGE: + GFX_ACTIVITY: 0 % + UMC_ACTIVITY: 0 % + MM_ACTIVITY: N/A + VCN_ACTIVITY: [0 %, 0 %, 0 %, 0 %] + JPEG_ACTIVITY: [N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, + N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, + N/A, N/A, N/A] + POWER: + SOCKET_POWER: 135 W + GFX_VOLTAGE: N/A mV + SOC_VOLTAGE: N/A mV + MEM_VOLTAGE: N/A mV + THROTTLE_STATUS: UNTHROTTLED + POWER_MANAGEMENT: ENABLED + CLOCK: + GFX_0: + CLK: 253 MHz + MIN_CLK: 271 MHz + MAX_CLK: 2100 MHz + CLK_LOCKED: DISABLED + DEEP_SLEEP: DISABLED + GFX_1: + CLK: 132 MHz + MIN_CLK: 271 MHz + MAX_CLK: 2100 MHz + CLK_LOCKED: DISABLED + DEEP_SLEEP: ENABLED + GFX_2: + CLK: 131 MHz + MIN_CLK: 271 MHz + MAX_CLK: 2100 MHz + CLK_LOCKED: DISABLED + DEEP_SLEEP: ENABLED + GFX_3: + CLK: 132 MHz + MIN_CLK: 271 MHz + MAX_CLK: 2100 MHz + CLK_LOCKED: DISABLED + DEEP_SLEEP: ENABLED + GFX_4: + CLK: 132 MHz + MIN_CLK: 271 MHz + MAX_CLK: 2100 MHz + CLK_LOCKED: DISABLED + DEEP_SLEEP: ENABLED + GFX_5: + CLK: 132 MHz + MIN_CLK: 271 MHz + MAX_CLK: 2100 MHz + CLK_LOCKED: DISABLED + DEEP_SLEEP: ENABLED + GFX_6: + CLK: 132 MHz + MIN_CLK: 271 MHz + MAX_CLK: 2100 MHz + CLK_LOCKED: DISABLED + DEEP_SLEEP: ENABLED + GFX_7: + CLK: 132 MHz + MIN_CLK: 271 MHz + MAX_CLK: 2100 MHz + CLK_LOCKED: DISABLED + DEEP_SLEEP: ENABLED + MEM_0: + CLK: 900 MHz + MIN_CLK: 900 MHz + MAX_CLK: 1300 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: DISABLED + VCLK_0: + CLK: 29 MHz + MIN_CLK: 914 MHz + MAX_CLK: 1333 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: ENABLED + VCLK_1: + CLK: 29 MHz + MIN_CLK: 914 MHz + MAX_CLK: 1333 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: ENABLED + VCLK_2: + CLK: 29 MHz + MIN_CLK: 914 MHz + MAX_CLK: 1333 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: ENABLED + VCLK_3: + CLK: 29 MHz + MIN_CLK: 914 MHz + MAX_CLK: 1333 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: ENABLED + DCLK_0: + CLK: 22 MHz + MIN_CLK: 711 MHz + MAX_CLK: 1143 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: ENABLED + DCLK_1: + CLK: 22 MHz + MIN_CLK: 711 MHz + MAX_CLK: 1143 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: ENABLED + DCLK_2: + CLK: 22 MHz + MIN_CLK: 711 MHz + MAX_CLK: 1143 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: ENABLED + DCLK_3: + CLK: 22 MHz + MIN_CLK: 711 MHz + MAX_CLK: 1143 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: ENABLED + TEMPERATURE: + EDGE: N/A + HOTSPOT: 36 °C + MEM: 31 °C + PCIE: + WIDTH: 16 + SPEED: 32 GT/s + BANDWIDTH: 1027 Mb/s + REPLAY_COUNT: 0 + L0_TO_RECOVERY_COUNT: 1 + REPLAY_ROLL_OVER_COUNT: 0 + NAK_SENT_COUNT: 0 + NAK_RECEIVED_COUNT: 0 + CURRENT_BANDWIDTH_SENT: N/A + CURRENT_BANDWIDTH_RECEIVED: N/A + MAX_PACKET_SIZE: N/A + MEM_USAGE: + TOTAL_VRAM: 196048 MB + USED_VRAM: 282 MB + FREE_VRAM: 195766 MB + TOTAL_VISIBLE_VRAM: 196048 MB + USED_VISIBLE_VRAM: 282 MB + FREE_VISIBLE_VRAM: 195766 MB + TOTAL_GTT: 885866 MB + USED_GTT: 17 MB + FREE_GTT: 885849 MB + +GPU: 4 + USAGE: + GFX_ACTIVITY: 0 % + UMC_ACTIVITY: 0 % + MM_ACTIVITY: N/A + VCN_ACTIVITY: [0 %, 0 %, 0 %, 0 %] + JPEG_ACTIVITY: [N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, + N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, + N/A, N/A, N/A] + POWER: + SOCKET_POWER: 140 W + GFX_VOLTAGE: N/A mV + SOC_VOLTAGE: N/A mV + MEM_VOLTAGE: N/A mV + THROTTLE_STATUS: UNTHROTTLED + POWER_MANAGEMENT: ENABLED + CLOCK: + GFX_0: + CLK: 232 MHz + MIN_CLK: 250 MHz + MAX_CLK: 2100 MHz + CLK_LOCKED: DISABLED + DEEP_SLEEP: DISABLED + GFX_1: + CLK: 132 MHz + MIN_CLK: 250 MHz + MAX_CLK: 2100 MHz + CLK_LOCKED: DISABLED + DEEP_SLEEP: ENABLED + GFX_2: + CLK: 132 MHz + MIN_CLK: 250 MHz + MAX_CLK: 2100 MHz + CLK_LOCKED: DISABLED + DEEP_SLEEP: ENABLED + GFX_3: + CLK: 132 MHz + MIN_CLK: 250 MHz + MAX_CLK: 2100 MHz + CLK_LOCKED: DISABLED + DEEP_SLEEP: ENABLED + GFX_4: + CLK: 132 MHz + MIN_CLK: 250 MHz + MAX_CLK: 2100 MHz + CLK_LOCKED: DISABLED + DEEP_SLEEP: ENABLED + GFX_5: + CLK: 132 MHz + MIN_CLK: 250 MHz + MAX_CLK: 2100 MHz + CLK_LOCKED: DISABLED + DEEP_SLEEP: ENABLED + GFX_6: + CLK: 132 MHz + MIN_CLK: 250 MHz + MAX_CLK: 2100 MHz + CLK_LOCKED: DISABLED + DEEP_SLEEP: ENABLED + GFX_7: + CLK: 132 MHz + MIN_CLK: 250 MHz + MAX_CLK: 2100 MHz + CLK_LOCKED: DISABLED + DEEP_SLEEP: ENABLED + MEM_0: + CLK: 900 MHz + MIN_CLK: 900 MHz + MAX_CLK: 1300 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: DISABLED + VCLK_0: + CLK: 29 MHz + MIN_CLK: 914 MHz + MAX_CLK: 1333 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: ENABLED + VCLK_1: + CLK: 29 MHz + MIN_CLK: 914 MHz + MAX_CLK: 1333 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: ENABLED + VCLK_2: + CLK: 29 MHz + MIN_CLK: 914 MHz + MAX_CLK: 1333 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: ENABLED + VCLK_3: + CLK: 29 MHz + MIN_CLK: 914 MHz + MAX_CLK: 1333 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: ENABLED + DCLK_0: + CLK: 22 MHz + MIN_CLK: 711 MHz + MAX_CLK: 1143 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: ENABLED + DCLK_1: + CLK: 22 MHz + MIN_CLK: 711 MHz + MAX_CLK: 1143 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: ENABLED + DCLK_2: + CLK: 22 MHz + MIN_CLK: 711 MHz + MAX_CLK: 1143 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: ENABLED + DCLK_3: + CLK: 22 MHz + MIN_CLK: 711 MHz + MAX_CLK: 1143 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: ENABLED + TEMPERATURE: + EDGE: N/A + HOTSPOT: 37 °C + MEM: 29 °C + PCIE: + WIDTH: 16 + SPEED: 32 GT/s + BANDWIDTH: 2657 Mb/s + REPLAY_COUNT: 0 + L0_TO_RECOVERY_COUNT: 1 + REPLAY_ROLL_OVER_COUNT: 0 + NAK_SENT_COUNT: 0 + NAK_RECEIVED_COUNT: 0 + CURRENT_BANDWIDTH_SENT: N/A + CURRENT_BANDWIDTH_RECEIVED: N/A + MAX_PACKET_SIZE: N/A + MEM_USAGE: + TOTAL_VRAM: 196048 MB + USED_VRAM: 282 MB + FREE_VRAM: 195766 MB + TOTAL_VISIBLE_VRAM: 196048 MB + USED_VISIBLE_VRAM: 282 MB + FREE_VISIBLE_VRAM: 195766 MB + TOTAL_GTT: 885866 MB + USED_GTT: 17 MB + FREE_GTT: 885849 MB + +GPU: 5 + USAGE: + GFX_ACTIVITY: 0 % + UMC_ACTIVITY: 0 % + MM_ACTIVITY: N/A + VCN_ACTIVITY: [0 %, 0 %, 0 %, 0 %] + JPEG_ACTIVITY: [N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, + N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, + N/A, N/A, N/A] + POWER: + SOCKET_POWER: 138 W + GFX_VOLTAGE: N/A mV + SOC_VOLTAGE: N/A mV + MEM_VOLTAGE: N/A mV + THROTTLE_STATUS: UNTHROTTLED + POWER_MANAGEMENT: ENABLED + CLOCK: + GFX_0: + CLK: 237 MHz + MIN_CLK: 255 MHz + MAX_CLK: 2100 MHz + CLK_LOCKED: DISABLED + DEEP_SLEEP: DISABLED + GFX_1: + CLK: 132 MHz + MIN_CLK: 255 MHz + MAX_CLK: 2100 MHz + CLK_LOCKED: DISABLED + DEEP_SLEEP: ENABLED + GFX_2: + CLK: 132 MHz + MIN_CLK: 255 MHz + MAX_CLK: 2100 MHz + CLK_LOCKED: DISABLED + DEEP_SLEEP: ENABLED + GFX_3: + CLK: 132 MHz + MIN_CLK: 255 MHz + MAX_CLK: 2100 MHz + CLK_LOCKED: DISABLED + DEEP_SLEEP: ENABLED + GFX_4: + CLK: 132 MHz + MIN_CLK: 255 MHz + MAX_CLK: 2100 MHz + CLK_LOCKED: DISABLED + DEEP_SLEEP: ENABLED + GFX_5: + CLK: 132 MHz + MIN_CLK: 255 MHz + MAX_CLK: 2100 MHz + CLK_LOCKED: DISABLED + DEEP_SLEEP: ENABLED + GFX_6: + CLK: 131 MHz + MIN_CLK: 255 MHz + MAX_CLK: 2100 MHz + CLK_LOCKED: DISABLED + DEEP_SLEEP: ENABLED + GFX_7: + CLK: 132 MHz + MIN_CLK: 255 MHz + MAX_CLK: 2100 MHz + CLK_LOCKED: DISABLED + DEEP_SLEEP: ENABLED + MEM_0: + CLK: 900 MHz + MIN_CLK: 900 MHz + MAX_CLK: 1300 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: DISABLED + VCLK_0: + CLK: 29 MHz + MIN_CLK: 914 MHz + MAX_CLK: 1333 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: ENABLED + VCLK_1: + CLK: 29 MHz + MIN_CLK: 914 MHz + MAX_CLK: 1333 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: ENABLED + VCLK_2: + CLK: 29 MHz + MIN_CLK: 914 MHz + MAX_CLK: 1333 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: ENABLED + VCLK_3: + CLK: 29 MHz + MIN_CLK: 914 MHz + MAX_CLK: 1333 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: ENABLED + DCLK_0: + CLK: 22 MHz + MIN_CLK: 711 MHz + MAX_CLK: 1143 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: ENABLED + DCLK_1: + CLK: 22 MHz + MIN_CLK: 711 MHz + MAX_CLK: 1143 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: ENABLED + DCLK_2: + CLK: 22 MHz + MIN_CLK: 711 MHz + MAX_CLK: 1143 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: ENABLED + DCLK_3: + CLK: 22 MHz + MIN_CLK: 711 MHz + MAX_CLK: 1143 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: ENABLED + TEMPERATURE: + EDGE: N/A + HOTSPOT: 38 °C + MEM: 34 °C + PCIE: + WIDTH: 16 + SPEED: 32 GT/s + BANDWIDTH: 18 Mb/s + REPLAY_COUNT: 0 + L0_TO_RECOVERY_COUNT: 1 + REPLAY_ROLL_OVER_COUNT: 0 + NAK_SENT_COUNT: 0 + NAK_RECEIVED_COUNT: 0 + CURRENT_BANDWIDTH_SENT: N/A + CURRENT_BANDWIDTH_RECEIVED: N/A + MAX_PACKET_SIZE: N/A + MEM_USAGE: + TOTAL_VRAM: 196048 MB + USED_VRAM: 282 MB + FREE_VRAM: 195766 MB + TOTAL_VISIBLE_VRAM: 196048 MB + USED_VISIBLE_VRAM: 282 MB + FREE_VISIBLE_VRAM: 195766 MB + TOTAL_GTT: 885866 MB + USED_GTT: 17 MB + FREE_GTT: 885849 MB + +GPU: 6 + USAGE: + GFX_ACTIVITY: 0 % + UMC_ACTIVITY: 0 % + MM_ACTIVITY: N/A + VCN_ACTIVITY: [0 %, 0 %, 0 %, 0 %] + JPEG_ACTIVITY: [N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, + N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, + N/A, N/A, N/A] + POWER: + SOCKET_POWER: 135 W + GFX_VOLTAGE: N/A mV + SOC_VOLTAGE: N/A mV + MEM_VOLTAGE: N/A mV + THROTTLE_STATUS: UNTHROTTLED + POWER_MANAGEMENT: ENABLED + CLOCK: + GFX_0: + CLK: 262 MHz + MIN_CLK: 262 MHz + MAX_CLK: 2100 MHz + CLK_LOCKED: DISABLED + DEEP_SLEEP: DISABLED + GFX_1: + CLK: 132 MHz + MIN_CLK: 262 MHz + MAX_CLK: 2100 MHz + CLK_LOCKED: DISABLED + DEEP_SLEEP: ENABLED + GFX_2: + CLK: 132 MHz + MIN_CLK: 262 MHz + MAX_CLK: 2100 MHz + CLK_LOCKED: DISABLED + DEEP_SLEEP: ENABLED + GFX_3: + CLK: 132 MHz + MIN_CLK: 262 MHz + MAX_CLK: 2100 MHz + CLK_LOCKED: DISABLED + DEEP_SLEEP: ENABLED + GFX_4: + CLK: 132 MHz + MIN_CLK: 262 MHz + MAX_CLK: 2100 MHz + CLK_LOCKED: DISABLED + DEEP_SLEEP: ENABLED + GFX_5: + CLK: 132 MHz + MIN_CLK: 262 MHz + MAX_CLK: 2100 MHz + CLK_LOCKED: DISABLED + DEEP_SLEEP: ENABLED + GFX_6: + CLK: 132 MHz + MIN_CLK: 262 MHz + MAX_CLK: 2100 MHz + CLK_LOCKED: DISABLED + DEEP_SLEEP: ENABLED + GFX_7: + CLK: 132 MHz + MIN_CLK: 262 MHz + MAX_CLK: 2100 MHz + CLK_LOCKED: DISABLED + DEEP_SLEEP: ENABLED + MEM_0: + CLK: 902 MHz + MIN_CLK: 900 MHz + MAX_CLK: 1300 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: DISABLED + VCLK_0: + CLK: 29 MHz + MIN_CLK: 914 MHz + MAX_CLK: 1333 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: ENABLED + VCLK_1: + CLK: 29 MHz + MIN_CLK: 914 MHz + MAX_CLK: 1333 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: ENABLED + VCLK_2: + CLK: 29 MHz + MIN_CLK: 914 MHz + MAX_CLK: 1333 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: ENABLED + VCLK_3: + CLK: 29 MHz + MIN_CLK: 914 MHz + MAX_CLK: 1333 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: ENABLED + DCLK_0: + CLK: 22 MHz + MIN_CLK: 711 MHz + MAX_CLK: 1143 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: ENABLED + DCLK_1: + CLK: 22 MHz + MIN_CLK: 711 MHz + MAX_CLK: 1143 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: ENABLED + DCLK_2: + CLK: 22 MHz + MIN_CLK: 711 MHz + MAX_CLK: 1143 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: ENABLED + DCLK_3: + CLK: 22 MHz + MIN_CLK: 711 MHz + MAX_CLK: 1143 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: ENABLED + TEMPERATURE: + EDGE: N/A + HOTSPOT: 36 °C + MEM: 30 °C + PCIE: + WIDTH: 16 + SPEED: 32 GT/s + BANDWIDTH: 993 Mb/s + REPLAY_COUNT: 0 + L0_TO_RECOVERY_COUNT: 1 + REPLAY_ROLL_OVER_COUNT: 0 + NAK_SENT_COUNT: 0 + NAK_RECEIVED_COUNT: 0 + CURRENT_BANDWIDTH_SENT: N/A + CURRENT_BANDWIDTH_RECEIVED: N/A + MAX_PACKET_SIZE: N/A + MEM_USAGE: + TOTAL_VRAM: 196048 MB + USED_VRAM: 282 MB + FREE_VRAM: 195766 MB + TOTAL_VISIBLE_VRAM: 196048 MB + USED_VISIBLE_VRAM: 282 MB + FREE_VISIBLE_VRAM: 195766 MB + TOTAL_GTT: 885866 MB + USED_GTT: 17 MB + FREE_GTT: 885849 MB + +GPU: 7 + USAGE: + GFX_ACTIVITY: 0 % + UMC_ACTIVITY: 0 % + MM_ACTIVITY: N/A + VCN_ACTIVITY: [0 %, 0 %, 0 %, 0 %] + JPEG_ACTIVITY: [N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, + N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A, + N/A, N/A, N/A] + POWER: + SOCKET_POWER: 139 W + GFX_VOLTAGE: N/A mV + SOC_VOLTAGE: N/A mV + MEM_VOLTAGE: N/A mV + THROTTLE_STATUS: UNTHROTTLED + POWER_MANAGEMENT: ENABLED + CLOCK: + GFX_0: + CLK: 219 MHz + MIN_CLK: 238 MHz + MAX_CLK: 2100 MHz + CLK_LOCKED: DISABLED + DEEP_SLEEP: DISABLED + GFX_1: + CLK: 132 MHz + MIN_CLK: 238 MHz + MAX_CLK: 2100 MHz + CLK_LOCKED: DISABLED + DEEP_SLEEP: ENABLED + GFX_2: + CLK: 131 MHz + MIN_CLK: 238 MHz + MAX_CLK: 2100 MHz + CLK_LOCKED: DISABLED + DEEP_SLEEP: ENABLED + GFX_3: + CLK: 132 MHz + MIN_CLK: 238 MHz + MAX_CLK: 2100 MHz + CLK_LOCKED: DISABLED + DEEP_SLEEP: ENABLED + GFX_4: + CLK: 132 MHz + MIN_CLK: 238 MHz + MAX_CLK: 2100 MHz + CLK_LOCKED: DISABLED + DEEP_SLEEP: ENABLED + GFX_5: + CLK: 132 MHz + MIN_CLK: 238 MHz + MAX_CLK: 2100 MHz + CLK_LOCKED: DISABLED + DEEP_SLEEP: ENABLED + GFX_6: + CLK: 132 MHz + MIN_CLK: 238 MHz + MAX_CLK: 2100 MHz + CLK_LOCKED: DISABLED + DEEP_SLEEP: ENABLED + GFX_7: + CLK: 132 MHz + MIN_CLK: 238 MHz + MAX_CLK: 2100 MHz + CLK_LOCKED: DISABLED + DEEP_SLEEP: ENABLED + MEM_0: + CLK: 900 MHz + MIN_CLK: 900 MHz + MAX_CLK: 1300 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: DISABLED + VCLK_0: + CLK: 29 MHz + MIN_CLK: 914 MHz + MAX_CLK: 1333 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: ENABLED + VCLK_1: + CLK: 29 MHz + MIN_CLK: 914 MHz + MAX_CLK: 1333 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: ENABLED + VCLK_2: + CLK: 29 MHz + MIN_CLK: 914 MHz + MAX_CLK: 1333 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: ENABLED + VCLK_3: + CLK: 29 MHz + MIN_CLK: 914 MHz + MAX_CLK: 1333 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: ENABLED + DCLK_0: + CLK: 22 MHz + MIN_CLK: 711 MHz + MAX_CLK: 1143 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: ENABLED + DCLK_1: + CLK: 22 MHz + MIN_CLK: 711 MHz + MAX_CLK: 1143 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: ENABLED + DCLK_2: + CLK: 22 MHz + MIN_CLK: 711 MHz + MAX_CLK: 1143 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: ENABLED + DCLK_3: + CLK: 22 MHz + MIN_CLK: 711 MHz + MAX_CLK: 1143 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: ENABLED + TEMPERATURE: + EDGE: N/A + HOTSPOT: 37 °C + MEM: 31 °C + PCIE: + WIDTH: 16 + SPEED: 32 GT/s + BANDWIDTH: 3091 Mb/s + REPLAY_COUNT: 0 + L0_TO_RECOVERY_COUNT: 1 + REPLAY_ROLL_OVER_COUNT: 0 + NAK_SENT_COUNT: 0 + NAK_RECEIVED_COUNT: 0 + CURRENT_BANDWIDTH_SENT: N/A + CURRENT_BANDWIDTH_RECEIVED: N/A + MAX_PACKET_SIZE: N/A + MEM_USAGE: + TOTAL_VRAM: 196048 MB + USED_VRAM: 282 MB + FREE_VRAM: 195766 MB + TOTAL_VISIBLE_VRAM: 196048 MB + USED_VISIBLE_VRAM: 282 MB + FREE_VISIBLE_VRAM: 195766 MB + TOTAL_GTT: 885866 MB + USED_GTT: 17 MB + FREE_GTT: 885849 MB \ No newline at end of file diff --git a/src/VirtualClient/VirtualClient.Monitors.UnitTests/Examples/amd-smi/powerMetrics.csv b/src/VirtualClient/VirtualClient.Monitors.UnitTests/Examples/amd-smi/powerMetrics.csv new file mode 100644 index 0000000000..aa6efc72b3 --- /dev/null +++ b/src/VirtualClient/VirtualClient.Monitors.UnitTests/Examples/amd-smi/powerMetrics.csv @@ -0,0 +1,9 @@ +gpu,socket_power,gfx_voltage,soc_voltage,mem_voltage,throttle_status,power_management +0,136,N/A,N/A,N/A,UNTHROTTLED,ENABLED +1,135,N/A,N/A,N/A,UNTHROTTLED,ENABLED +2,137,N/A,N/A,N/A,UNTHROTTLED,ENABLED +3,133,N/A,N/A,N/A,UNTHROTTLED,ENABLED +4,138,N/A,N/A,N/A,UNTHROTTLED,ENABLED +5,137,N/A,N/A,N/A,UNTHROTTLED,ENABLED +6,135,N/A,N/A,N/A,UNTHROTTLED,ENABLED +7,137,N/A,N/A,N/A,UNTHROTTLED,ENABLED \ No newline at end of file diff --git a/src/VirtualClient/VirtualClient.Monitors.UnitTests/Examples/amd-smi/result.txt b/src/VirtualClient/VirtualClient.Monitors.UnitTests/Examples/amd-smi/result.txt deleted file mode 100644 index 73551c460e..0000000000 Binary files a/src/VirtualClient/VirtualClient.Monitors.UnitTests/Examples/amd-smi/result.txt and /dev/null differ diff --git a/src/VirtualClient/VirtualClient.Monitors.UnitTests/Examples/amd-smi/temperatureMetrics.csv b/src/VirtualClient/VirtualClient.Monitors.UnitTests/Examples/amd-smi/temperatureMetrics.csv new file mode 100644 index 0000000000..a838c61828 --- /dev/null +++ b/src/VirtualClient/VirtualClient.Monitors.UnitTests/Examples/amd-smi/temperatureMetrics.csv @@ -0,0 +1,9 @@ +gpu,edge,hotspot,mem +0,N/A,36,29 +1,N/A,36,31 +2,N/A,35,29 +3,N/A,34,29 +4,N/A,35,28 +5,N/A,36,32 +6,N/A,34,29 +7,N/A,36,30 \ No newline at end of file diff --git a/src/VirtualClient/VirtualClient.Monitors.UnitTests/Examples/amd-smi/xgmi-8xMI300X.json b/src/VirtualClient/VirtualClient.Monitors.UnitTests/Examples/amd-smi/xgmi-8xMI300X.json new file mode 100644 index 0000000000..eaff221e17 --- /dev/null +++ b/src/VirtualClient/VirtualClient.Monitors.UnitTests/Examples/amd-smi/xgmi-8xMI300X.json @@ -0,0 +1,858 @@ +[ + { + "gpu": 0, + "bdf": "0000:0c:00.0", + "link_metrics": { + "bit_rate": { + "value": 32, + "unit": "Gb/s" + }, + "max_bandwidth": { + "value": 512, + "unit": "Gb/s" + }, + "link_type": "XGMI", + "links": [ + { + "gpu": 0, + "bdf": "0000:0c:00.0", + "read": "N/A", + "write": "N/A" + }, + { + "gpu": 1, + "bdf": "0000:22:00.0", + "read": { + "value": 1, + "unit": "KB" + }, + "write": { + "value": 1, + "unit": "KB" + } + }, + { + "gpu": 2, + "bdf": "0000:38:00.0", + "read": { + "value": 1, + "unit": "KB" + }, + "write": { + "value": 1, + "unit": "KB" + } + }, + { + "gpu": 3, + "bdf": "0000:5c:00.0", + "read": { + "value": 1, + "unit": "KB" + }, + "write": { + "value": 1, + "unit": "KB" + } + }, + { + "gpu": 4, + "bdf": "0000:9f:00.0", + "read": { + "value": 1, + "unit": "KB" + }, + "write": { + "value": 1, + "unit": "KB" + } + }, + { + "gpu": 5, + "bdf": "0000:af:00.0", + "read": { + "value": 1, + "unit": "KB" + }, + "write": { + "value": 1, + "unit": "KB" + } + }, + { + "gpu": 6, + "bdf": "0000:bf:00.0", + "read": { + "value": 1, + "unit": "KB" + }, + "write": { + "value": 1, + "unit": "KB" + } + }, + { + "gpu": 7, + "bdf": "0000:df:00.0", + "read": { + "value": 1, + "unit": "KB" + }, + "write": { + "value": 1, + "unit": "KB" + } + } + ] + } + }, + { + "gpu": 1, + "bdf": "0000:22:00.0", + "link_metrics": { + "bit_rate": { + "value": 32, + "unit": "Gb/s" + }, + "max_bandwidth": { + "value": 512, + "unit": "Gb/s" + }, + "link_type": "XGMI", + "links": [ + { + "gpu": 0, + "bdf": "0000:0c:00.0", + "read": { + "value": 0, + "unit": "KB" + }, + "write": { + "value": 0, + "unit": "KB" + } + }, + { + "gpu": 1, + "bdf": "0000:22:00.0", + "read": "N/A", + "write": "N/A" + }, + { + "gpu": 2, + "bdf": "0000:38:00.0", + "read": { + "value": 1, + "unit": "KB" + }, + "write": { + "value": 1, + "unit": "KB" + } + }, + { + "gpu": 3, + "bdf": "0000:5c:00.0", + "read": { + "value": 1, + "unit": "KB" + }, + "write": { + "value": 1, + "unit": "KB" + } + }, + { + "gpu": 4, + "bdf": "0000:9f:00.0", + "read": { + "value": 1, + "unit": "KB" + }, + "write": { + "value": 1, + "unit": "KB" + } + }, + { + "gpu": 5, + "bdf": "0000:af:00.0", + "read": { + "value": 1, + "unit": "KB" + }, + "write": { + "value": 1, + "unit": "KB" + } + }, + { + "gpu": 6, + "bdf": "0000:bf:00.0", + "read": { + "value": 1, + "unit": "KB" + }, + "write": { + "value": 1, + "unit": "KB" + } + }, + { + "gpu": 7, + "bdf": "0000:df:00.0", + "read": { + "value": 1, + "unit": "KB" + }, + "write": { + "value": 1, + "unit": "KB" + } + } + ] + } + }, + { + "gpu": 2, + "bdf": "0000:38:00.0", + "link_metrics": { + "bit_rate": { + "value": 32, + "unit": "Gb/s" + }, + "max_bandwidth": { + "value": 512, + "unit": "Gb/s" + }, + "link_type": "XGMI", + "links": [ + { + "gpu": 0, + "bdf": "0000:0c:00.0", + "read": { + "value": 0, + "unit": "KB" + }, + "write": { + "value": 0, + "unit": "KB" + } + }, + { + "gpu": 1, + "bdf": "0000:22:00.0", + "read": { + "value": 1, + "unit": "KB" + }, + "write": { + "value": 1, + "unit": "KB" + } + }, + { + "gpu": 2, + "bdf": "0000:38:00.0", + "read": "N/A", + "write": "N/A" + }, + { + "gpu": 3, + "bdf": "0000:5c:00.0", + "read": { + "value": 1, + "unit": "KB" + }, + "write": { + "value": 1, + "unit": "KB" + } + }, + { + "gpu": 4, + "bdf": "0000:9f:00.0", + "read": { + "value": 1, + "unit": "KB" + }, + "write": { + "value": 1, + "unit": "KB" + } + }, + { + "gpu": 5, + "bdf": "0000:af:00.0", + "read": { + "value": 1, + "unit": "KB" + }, + "write": { + "value": 1, + "unit": "KB" + } + }, + { + "gpu": 6, + "bdf": "0000:bf:00.0", + "read": { + "value": 0, + "unit": "KB" + }, + "write": { + "value": 1, + "unit": "KB" + } + }, + { + "gpu": 7, + "bdf": "0000:df:00.0", + "read": { + "value": 0, + "unit": "KB" + }, + "write": { + "value": 1, + "unit": "KB" + } + } + ] + } + }, + { + "gpu": 3, + "bdf": "0000:5c:00.0", + "link_metrics": { + "bit_rate": { + "value": 32, + "unit": "Gb/s" + }, + "max_bandwidth": { + "value": 512, + "unit": "Gb/s" + }, + "link_type": "XGMI", + "links": [ + { + "gpu": 0, + "bdf": "0000:0c:00.0", + "read": { + "value": 0, + "unit": "KB" + }, + "write": { + "value": 0, + "unit": "KB" + } + }, + { + "gpu": 1, + "bdf": "0000:22:00.0", + "read": { + "value": 0, + "unit": "KB" + }, + "write": { + "value": 1, + "unit": "KB" + } + }, + { + "gpu": 2, + "bdf": "0000:38:00.0", + "read": { + "value": 1, + "unit": "KB" + }, + "write": { + "value": 1, + "unit": "KB" + } + }, + { + "gpu": 3, + "bdf": "0000:5c:00.0", + "read": "N/A", + "write": "N/A" + }, + { + "gpu": 4, + "bdf": "0000:9f:00.0", + "read": { + "value": 1, + "unit": "KB" + }, + "write": { + "value": 1, + "unit": "KB" + } + }, + { + "gpu": 5, + "bdf": "0000:af:00.0", + "read": { + "value": 0, + "unit": "KB" + }, + "write": { + "value": 1, + "unit": "KB" + } + }, + { + "gpu": 6, + "bdf": "0000:bf:00.0", + "read": { + "value": 0, + "unit": "KB" + }, + "write": { + "value": 1, + "unit": "KB" + } + }, + { + "gpu": 7, + "bdf": "0000:df:00.0", + "read": { + "value": 1, + "unit": "KB" + }, + "write": { + "value": 1, + "unit": "KB" + } + } + ] + } + }, + { + "gpu": 4, + "bdf": "0000:9f:00.0", + "link_metrics": { + "bit_rate": { + "value": 32, + "unit": "Gb/s" + }, + "max_bandwidth": { + "value": 512, + "unit": "Gb/s" + }, + "link_type": "XGMI", + "links": [ + { + "gpu": 0, + "bdf": "0000:0c:00.0", + "read": { + "value": 0, + "unit": "KB" + }, + "write": { + "value": 0, + "unit": "KB" + } + }, + { + "gpu": 1, + "bdf": "0000:22:00.0", + "read": { + "value": 1, + "unit": "KB" + }, + "write": { + "value": 1, + "unit": "KB" + } + }, + { + "gpu": 2, + "bdf": "0000:38:00.0", + "read": { + "value": 0, + "unit": "KB" + }, + "write": { + "value": 1, + "unit": "KB" + } + }, + { + "gpu": 3, + "bdf": "0000:5c:00.0", + "read": { + "value": 0, + "unit": "KB" + }, + "write": { + "value": 1, + "unit": "KB" + } + }, + { + "gpu": 4, + "bdf": "0000:9f:00.0", + "read": "N/A", + "write": "N/A" + }, + { + "gpu": 5, + "bdf": "0000:af:00.0", + "read": { + "value": 1, + "unit": "KB" + }, + "write": { + "value": 1, + "unit": "KB" + } + }, + { + "gpu": 6, + "bdf": "0000:bf:00.0", + "read": { + "value": 0, + "unit": "KB" + }, + "write": { + "value": 1, + "unit": "KB" + } + }, + { + "gpu": 7, + "bdf": "0000:df:00.0", + "read": { + "value": 1, + "unit": "KB" + }, + "write": { + "value": 1, + "unit": "KB" + } + } + ] + } + }, + { + "gpu": 5, + "bdf": "0000:af:00.0", + "link_metrics": { + "bit_rate": { + "value": 32, + "unit": "Gb/s" + }, + "max_bandwidth": { + "value": 512, + "unit": "Gb/s" + }, + "link_type": "XGMI", + "links": [ + { + "gpu": 0, + "bdf": "0000:0c:00.0", + "read": { + "value": 0, + "unit": "KB" + }, + "write": { + "value": 0, + "unit": "KB" + } + }, + { + "gpu": 1, + "bdf": "0000:22:00.0", + "read": { + "value": 1, + "unit": "KB" + }, + "write": { + "value": 1, + "unit": "KB" + } + }, + { + "gpu": 2, + "bdf": "0000:38:00.0", + "read": { + "value": 0, + "unit": "KB" + }, + "write": { + "value": 1, + "unit": "KB" + } + }, + { + "gpu": 3, + "bdf": "0000:5c:00.0", + "read": { + "value": 0, + "unit": "KB" + }, + "write": { + "value": 1, + "unit": "KB" + } + }, + { + "gpu": 4, + "bdf": "0000:9f:00.0", + "read": { + "value": 0, + "unit": "KB" + }, + "write": { + "value": 1, + "unit": "KB" + } + }, + { + "gpu": 5, + "bdf": "0000:af:00.0", + "read": "N/A", + "write": "N/A" + }, + { + "gpu": 6, + "bdf": "0000:bf:00.0", + "read": { + "value": 1, + "unit": "KB" + }, + "write": { + "value": 1, + "unit": "KB" + } + }, + { + "gpu": 7, + "bdf": "0000:df:00.0", + "read": { + "value": 0, + "unit": "KB" + }, + "write": { + "value": 1, + "unit": "KB" + } + } + ] + } + }, + { + "gpu": 6, + "bdf": "0000:bf:00.0", + "link_metrics": { + "bit_rate": { + "value": 32, + "unit": "Gb/s" + }, + "max_bandwidth": { + "value": 512, + "unit": "Gb/s" + }, + "link_type": "XGMI", + "links": [ + { + "gpu": 0, + "bdf": "0000:0c:00.0", + "read": { + "value": 0, + "unit": "KB" + }, + "write": { + "value": 0, + "unit": "KB" + } + }, + { + "gpu": 1, + "bdf": "0000:22:00.0", + "read": { + "value": 0, + "unit": "KB" + }, + "write": { + "value": 1, + "unit": "KB" + } + }, + { + "gpu": 2, + "bdf": "0000:38:00.0", + "read": { + "value": 0, + "unit": "KB" + }, + "write": { + "value": 1, + "unit": "KB" + } + }, + { + "gpu": 3, + "bdf": "0000:5c:00.0", + "read": { + "value": 0, + "unit": "KB" + }, + "write": { + "value": 1, + "unit": "KB" + } + }, + { + "gpu": 4, + "bdf": "0000:9f:00.0", + "read": { + "value": 0, + "unit": "KB" + }, + "write": { + "value": 1, + "unit": "KB" + } + }, + { + "gpu": 5, + "bdf": "0000:af:00.0", + "read": { + "value": 0, + "unit": "KB" + }, + "write": { + "value": 1, + "unit": "KB" + } + }, + { + "gpu": 6, + "bdf": "0000:bf:00.0", + "read": "N/A", + "write": "N/A" + }, + { + "gpu": 7, + "bdf": "0000:df:00.0", + "read": { + "value": 0, + "unit": "KB" + }, + "write": { + "value": 1, + "unit": "KB" + } + } + ] + } + }, + { + "gpu": 7, + "bdf": "0000:df:00.0", + "link_metrics": { + "bit_rate": { + "value": 32, + "unit": "Gb/s" + }, + "max_bandwidth": { + "value": 512, + "unit": "Gb/s" + }, + "link_type": "XGMI", + "links": [ + { + "gpu": 0, + "bdf": "0000:0c:00.0", + "read": { + "value": 0, + "unit": "KB" + }, + "write": { + "value": 0, + "unit": "KB" + } + }, + { + "gpu": 1, + "bdf": "0000:22:00.0", + "read": { + "value": 0, + "unit": "KB" + }, + "write": { + "value": 1, + "unit": "KB" + } + }, + { + "gpu": 2, + "bdf": "0000:38:00.0", + "read": { + "value": 0, + "unit": "KB" + }, + "write": { + "value": 1, + "unit": "KB" + } + }, + { + "gpu": 3, + "bdf": "0000:5c:00.0", + "read": { + "value": 0, + "unit": "KB" + }, + "write": { + "value": 1, + "unit": "KB" + } + }, + { + "gpu": 4, + "bdf": "0000:9f:00.0", + "read": { + "value": 0, + "unit": "KB" + }, + "write": { + "value": 1, + "unit": "KB" + } + }, + { + "gpu": 5, + "bdf": "0000:af:00.0", + "read": { + "value": 0, + "unit": "KB" + }, + "write": { + "value": 1, + "unit": "KB" + } + }, + { + "gpu": 6, + "bdf": "0000:bf:00.0", + "read": { + "value": 0, + "unit": "KB" + }, + "write": { + "value": 1, + "unit": "KB" + } + }, + { + "gpu": 7, + "bdf": "0000:df:00.0", + "read": "N/A", + "write": "N/A" + } + ] + } + } +] \ No newline at end of file diff --git a/src/VirtualClient/VirtualClient.Monitors/Amd-Smi/AmdSmiMetricsParser.cs b/src/VirtualClient/VirtualClient.Monitors/Amd-Smi/AmdSmiMetricsParser.cs new file mode 100644 index 0000000000..5579d2619d --- /dev/null +++ b/src/VirtualClient/VirtualClient.Monitors/Amd-Smi/AmdSmiMetricsParser.cs @@ -0,0 +1,114 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +namespace VirtualClient.Monitors.Amd_Smi +{ + using System; + using System.Collections.Generic; + using System.Linq; + using System.Text.RegularExpressions; + using VirtualClient.Contracts; + + /// + /// Parser for AMD SMI power and usage metrics, supporting multiple GPUs. + /// + public class AmdSmiMetricsParser : MetricsParser + { + /// + /// Initializes a new instance of the class. + /// + /// Raw text to parse. + public AmdSmiMetricsParser(string rawText) + : base(rawText) + { + } + + /// + public override IList Parse() + { + this.Preprocess(); + + List metrics = new List(); + var gpuSections = this.ExtractGpuSections(this.PreprocessedText); + + metrics.Add(new Metric("TOTAL_GPUS", gpuSections.Count, "count")); + + foreach (var (gpuId, section) in gpuSections) + { + this.ExtractMetrics(metrics, section, gpuId); + } + + return metrics; + } + + /// + protected override void Preprocess() + { + this.PreprocessedText = this.RawText.Trim(); + } + + /// + /// Extracts GPU sections from the raw text based on whitespace separation. + /// + private List<(string GpuId, string Section)> ExtractGpuSections(string rawText) + { + var gpuSections = new List<(string, string)>(); + var sections = Regex.Split(rawText, "\n\\s*\n").Where(s => !string.IsNullOrWhiteSpace(s)).ToList(); + + for (int i = 0; i < sections.Count; i++) + { + string gpuId = i.ToString(); // Assigning zero-based IDs to GPUs + gpuSections.Add((gpuId, sections[i].Trim())); + } + + return gpuSections; + } + + /// + /// Extracts and adds metrics from the section for a specific GPU. + /// + private void ExtractMetrics(List metrics, string section, string gpuId) + { + var metricDefinitions = new List<(string Name, string Pattern, string Unit, double DivideBy)> + { + ("GFX_ACTIVITY", "GFX_ACTIVITY:\\s+(?\\d+) %", "%", 1), + ("UMC_ACTIVITY", "UMC_ACTIVITY:\\s+(?\\d+) %", "%", 1), + ("MM_ACTIVITY", "MM_ACTIVITY:\\s+(?\\w+)", string.Empty, 1), + ("SOCKET_POWER", "SOCKET_POWER:\\s+(?\\d+) W", "W", 1), + ("GFX_VOLTAGE", "GFX_VOLTAGE:\\s+(?N/A|\\d+(\\.\\d+)?)", "V", 1), + ("SOC_VOLTAGE", "SOC_VOLTAGE:\\s+(?N/A|\\d+(\\.\\d+)?)", "V", 1), + ("MEM_VOLTAGE", "MEM_VOLTAGE:\\s+(?N/A|\\d+(\\.\\d+)?)", "V", 1), + ("POWER_MANAGEMENT", "POWER_MANAGEMENT:\\s+(?\\w+)", string.Empty, 1), + ("TEMPERATURE_EDGE", "EDGE:\\s+(?N/A|\\d+(\\.\\d+)?)(?:\\s+°C)?", "C", 1), + ("TEMPERATURE_HOTSPOT", "HOTSPOT:\\s+(?N/A|\\d+(\\.\\d+)?)(?:\\s+°C)?", "C", 1), + ("TEMPERATURE_MEM", "MEM:\\s+(?N/A|\\d+(\\.\\d+)?)(?:\\s+°C)?", "C", 1) + }; + + foreach (var metric in metricDefinitions) + { + this.AddMetric(metrics, section, $"{metric.Name}_GPU{gpuId}", metric.Pattern, metric.Unit, metric.DivideBy); + } + } + + /// + /// Adds a metric to the list if a match is found in the section, attaching GPU ID. + /// + private void AddMetric(List metrics, string section, string name, string pattern, string unit, double divideBy = 1) + { + var match = Regex.Match(section, pattern); + if (match.Success) + { + double value = ParseDoubleSafely(match.Groups["value"].Value) / divideBy; + metrics.Add(new Metric(name, value, unit)); + } + } + + /// + /// Converts a value to double safely, replacing non-numeric values with -1. + /// + private static double ParseDoubleSafely(string value) + { + return double.TryParse(value, out double result) ? result : -1; + } + } +} \ No newline at end of file diff --git a/src/VirtualClient/VirtualClient.Monitors/Amd-Smi/AmdSmiMonitor.cs b/src/VirtualClient/VirtualClient.Monitors/Amd-Smi/AmdSmiMonitor.cs new file mode 100644 index 0000000000..6e5f045329 --- /dev/null +++ b/src/VirtualClient/VirtualClient.Monitors/Amd-Smi/AmdSmiMonitor.cs @@ -0,0 +1,208 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +namespace VirtualClient.Monitors +{ + using System; + using System.Collections.Generic; + using System.Diagnostics; + using System.IO.Abstractions; + using System.Linq; + using System.Threading; + using System.Threading.Tasks; + using global::VirtualClient; + using global::VirtualClient.Contracts; + using Microsoft.Extensions.DependencyInjection; + using Microsoft.Extensions.Logging; + using VirtualClient.Common; + using VirtualClient.Common.Extensions; + using VirtualClient.Common.Telemetry; + using VirtualClient.Monitors.Amd_Smi; + + /// + /// The Performance Counter Monitor for Virtual Client + /// + public class AmdSmiMonitor : VirtualClientIntervalBasedMonitor + { + private ISystemManagement systemManagement; + private IFileSystem fileSystem; + + /// + /// Initializes a new instance of the class. + /// + public AmdSmiMonitor(IServiceCollection dependencies, IDictionary parameters) + : base(dependencies, parameters) + { + this.systemManagement = this.Dependencies.GetService(); + this.fileSystem = this.systemManagement.FileSystem; + } + + /// + protected override async Task ExecuteAsync(EventContext telemetryContext, CancellationToken cancellationToken) + { + try + { + switch (this.Platform) + { + case PlatformID.Win32NT: + await this.QueryGpuMetricAsync(telemetryContext, cancellationToken).ConfigureAwait(false); + + break; + + case PlatformID.Unix: + await this.QueryGpuMetricAsync(telemetryContext, cancellationToken).ConfigureAwait(false); + + if (!cancellationToken.IsCancellationRequested) + { + Console.WriteLine("executing xgmi"); + await this.QueryGpuXGMIAsync(telemetryContext, cancellationToken).ConfigureAwait(false); + } + + break; + } + } + catch (Exception ex) + { + Console.WriteLine($"[Error] ExecuteAsync failed: {ex.Message}"); + } + } + + /// + protected void ValidateParameters() + { + if (this.MonitorFrequency <= TimeSpan.Zero) + { + throw new MonitorException( + $"The monitor frequency defined/provided for the '{this.TypeName}' component '{this.MonitorFrequency}' is not valid. " + + $"The frequency must be greater than zero.", + ErrorReason.InvalidProfileDefinition); + } + } + + private async Task QueryGpuMetricAsync(EventContext telemetryContext, CancellationToken cancellationToken) + { + string amdSmiMonitorCommand = "amd-smi"; + string commandArgumentsForPower = "metric"; + + await Task.Delay(this.MonitorWarmupPeriod, cancellationToken).ConfigureAwait(false); + int i = 0; + + while (!cancellationToken.IsCancellationRequested && i < 1) + { + try + { + i++; + using (IProcessProxy process = this.systemManagement.ProcessManager.CreateElevatedProcess(this.Platform, amdSmiMonitorCommand, commandArgumentsForPower, Environment.CurrentDirectory)) + { + this.CleanupTasks.Add(() => process.SafeKill()); + DateTime startTime = DateTime.UtcNow; + await process.StartAndWaitAsync(cancellationToken).ConfigureAwait(false); + DateTime endTime = DateTime.UtcNow; + + if (!cancellationToken.IsCancellationRequested) + { + process.ThrowIfErrored(ProcessProxy.DefaultSuccessCodes, errorReason: ErrorReason.MonitorFailed); + + if (process.StandardOutput.Length > 0) + { + AmdSmiMetricsParser parser = new AmdSmiMetricsParser(process.StandardOutput.ToString()); + IList metrics = parser.Parse(); + + if (metrics?.Any() == true) + { + this.Logger.LogPerformanceCounters("amd", metrics, startTime, endTime, telemetryContext); + } + } + } + } + + await Task.Delay(this.MonitorFrequency).ConfigureAwait(false); + } + catch (OperationCanceledException) + { + // Expected whenever ctrl-C is used. + } + catch (Exception exc) + { + this.Logger.LogErrorMessage(exc, telemetryContext, LogLevel.Warning); + } + } + } + + private async Task QueryGpuXGMIAsync(EventContext telemetryContext, CancellationToken cancellationToken) + { + string commandArguments = "xgmi -m --json"; + + await Task.Delay(this.MonitorWarmupPeriod, cancellationToken).ConfigureAwait(false); + int i = 0; + + while (!cancellationToken.IsCancellationRequested && i < 1) + { + i++; + try + { + Stopwatch stopwatch = Stopwatch.StartNew(); + + var (metrics1, startTime1, endTime1) = await this.ExecuteXGMICommand(commandArguments, cancellationToken); + await Task.Delay(500).ConfigureAwait(false); + var (metrics2, startTime2, endTime2) = await this.ExecuteXGMICommand(commandArguments, cancellationToken); + + stopwatch.Stop(); + long elapsedMilliseconds = stopwatch.ElapsedMilliseconds; + + IList aggregatedMetrics = this.AmdSmiXGMIBandwidthAggregator(metrics1, metrics2, elapsedMilliseconds); + + if (aggregatedMetrics?.Any() == true) + { + this.Logger.LogPerformanceCounters("amd", aggregatedMetrics, startTime1, endTime2, telemetryContext); + } + + await Task.Delay(this.MonitorFrequency).ConfigureAwait(false); + } + catch (OperationCanceledException) + { + } + catch (Exception exc) + { + this.Logger.LogErrorMessage(exc, telemetryContext, LogLevel.Warning); + } + } + } + + private async Task<(IList, DateTime, DateTime)> ExecuteXGMICommand(string commandArguments, CancellationToken cancellationToken) + { + using (IProcessProxy process = this.systemManagement.ProcessManager.CreateElevatedProcess(this.Platform, "amd-smi", commandArguments, Environment.CurrentDirectory)) + { + this.CleanupTasks.Add(() => process.SafeKill()); + DateTime startTime = DateTime.UtcNow; + await process.StartAndWaitAsync(cancellationToken).ConfigureAwait(false); + DateTime endTime = DateTime.UtcNow; + + AmdSmiXGMIQueryGpuParser parser = new AmdSmiXGMIQueryGpuParser(process.StandardOutput.ToString()); + return (parser.Parse(), startTime, endTime); + } + } + + private IList AmdSmiXGMIBandwidthAggregator(IList metrics1, IList metrics2, long time) + { + List aggregatedMetrics = new List(); + + if (metrics1.Any() && metrics2.Any()) + { + foreach (Metric counter1 in metrics1) + { + foreach (Metric counter2 in metrics2) + { + if (counter1.Metadata["gpu.id"] == counter2.Metadata["gpu.id"]) + { + double bandwidth = (counter2.Value - counter1.Value) / (((double)time) / 1000.0); + aggregatedMetrics.Add(new Metric($"xgmi.bw", (bandwidth / 1024), unit: "MB/s", metadata: counter1.Metadata)); + } + } + } + } + + return aggregatedMetrics; + } + } +} diff --git a/src/VirtualClient/VirtualClient.Monitors/Amd-Smi/AmdSmiXGMIQueryGpuParser.cs b/src/VirtualClient/VirtualClient.Monitors/Amd-Smi/AmdSmiXGMIQueryGpuParser.cs new file mode 100644 index 0000000000..c2ba6faef8 --- /dev/null +++ b/src/VirtualClient/VirtualClient.Monitors/Amd-Smi/AmdSmiXGMIQueryGpuParser.cs @@ -0,0 +1,88 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +namespace VirtualClient.Monitors +{ + using System; + using System.Collections.Generic; + using System.Data; + using System.IO; + using System.Linq; + using System.Text; + using System.Text.RegularExpressions; + using System.Threading; + using Newtonsoft.Json; + using VirtualClient.Contracts; + using DataTableExtensions = VirtualClient.Contracts.DataTableExtensions; + + /// + /// Parser for AmdSmi output document. + /// + public class AmdSmiXGMIQueryGpuParser : MetricsParser + { + /// + /// Constructor for + /// + /// Raw text to parse. + public AmdSmiXGMIQueryGpuParser(string rawText) + : base(rawText) + { + } + + /// + public override IList Parse() + { + this.Preprocess(); + List metrics = new List(); + List gpuDataList = JsonConvert.DeserializeObject>(this.PreprocessedText); + DataTable dt = new DataTable(); + dt.Columns.Add("gpu", typeof(int)); + int numGPUs = gpuDataList.Count; + for (int i = 0; i < numGPUs; i++) + { + dt.Columns.Add($"xgmi_{i}_data", typeof(double)); + } + + int id = 0; + foreach (dynamic gpuData in gpuDataList) + { + double data = 0; + DataRow row = dt.NewRow(); + row["gpu"] = gpuData.gpu; + foreach (var link in gpuData.link_metrics.links) + { + data += (link.read.value.Value + link.write.value.Value); + } + + row[$"xgmi_{id}_data"] = data; + dt.Rows.Add(row); + id++; + } + + int gpuId = 0; + foreach (DataRow row in dt.Rows) + { + Dictionary metadata = new Dictionary() + { + { "gpu.id", Convert.ToString(SafeGet(row, "gpu")) }, + }; + metrics.Add(new Metric($"xgmi_{gpuId}_data", Convert.ToDouble(SafeGet(row, $"xgmi_{gpuId}_data")), unit: "KB", metadata: metadata)); + gpuId++; + } + + return metrics; + } + + /// + protected override void Preprocess() + { + Regex quotedPattern = new Regex("\"N/A\""); + this.PreprocessedText = quotedPattern.Replace(this.RawText, "{\r\n\"value\": 0,\r\n\"unit\": \"KB\"\r\n}"); + } + + private static IConvertible SafeGet(DataRow row, string columnName) + { + return row.Table.Columns.Contains(columnName) ? Convert.ToString(row[columnName]) : "-1"; + } + } +} \ No newline at end of file diff --git a/src/VirtualClient/VirtualClient.Monitors/AmdSmiMonitor.cs b/src/VirtualClient/VirtualClient.Monitors/AmdSmiMonitor.cs deleted file mode 100644 index da9ac6666c..0000000000 --- a/src/VirtualClient/VirtualClient.Monitors/AmdSmiMonitor.cs +++ /dev/null @@ -1,132 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. - -namespace VirtualClient.Monitors -{ - using System; - using System.Collections.Generic; - using System.IO.Abstractions; - using System.Linq; - using System.Threading; - using System.Threading.Tasks; - using global::VirtualClient; - using global::VirtualClient.Contracts; - using Microsoft.Extensions.DependencyInjection; - using Microsoft.Extensions.Logging; - using VirtualClient.Common; - using VirtualClient.Common.Extensions; - using VirtualClient.Common.Telemetry; - - /// - /// The Performance Counter Monitor for Virtual Client - /// - public class AmdSmiMonitor : VirtualClientIntervalBasedMonitor - { - /// - /// Initializes a new instance of the class. - /// - public AmdSmiMonitor(IServiceCollection dependencies, IDictionary parameters) - : base(dependencies, parameters) - { - } - - /// - protected override async Task ExecuteAsync(EventContext telemetryContext, CancellationToken cancellationToken) - { - switch (this.Platform) - { - case PlatformID.Win32NT: - await this.QueryGpuAsync(telemetryContext, cancellationToken) - .ConfigureAwait(false); - break; - - case PlatformID.Unix: - // not supported at the moment - break; - } - } - - /// - protected void ValidateParameters() - { - if (this.MonitorFrequency <= TimeSpan.Zero) - { - throw new MonitorException( - $"The monitor frequency defined/provided for the '{this.TypeName}' component '{this.MonitorFrequency}' is not valid. " + - $"The frequency must be greater than zero.", - ErrorReason.InvalidProfileDefinition); - } - } - - /// - /// Query the gpu for utilization information - /// - /// Provides context information that will be captured with telemetry events. - /// A token that can be used to cancel the operation. - /// - private async Task QueryGpuAsync(EventContext telemetryContext, CancellationToken cancellationToken) - { - ISystemManagement systemManagement = this.Dependencies.GetService(); - IFileSystem fileSystem = systemManagement.FileSystem; - - int totalSamples = (int)this.MonitorFrequency.TotalSeconds; - string command = "amdsmi"; - string commandArguments = "metric --csv"; - - await Task.Delay(this.MonitorWarmupPeriod, cancellationToken) - .ConfigureAwait(false); - - while (!cancellationToken.IsCancellationRequested) - { - try - { - using (IProcessProxy process = systemManagement.ProcessManager.CreateElevatedProcess(this.Platform, command, $"{commandArguments}", Environment.CurrentDirectory)) - { - this.CleanupTasks.Add(() => process.SafeKill()); - - DateTime startTime = DateTime.UtcNow; - await process.StartAndWaitAsync(cancellationToken) - .ConfigureAwait(false); - - DateTime endTime = DateTime.UtcNow; - - if (!cancellationToken.IsCancellationRequested) - { - try - { - // We cannot log the process details here. The output is too large. - process.ThrowIfErrored(ProcessProxy.DefaultSuccessCodes, errorReason: ErrorReason.MonitorFailed); - - if (process.StandardOutput.Length > 0) - { - AmdSmiQueryGpuParser parser = new AmdSmiQueryGpuParser(process.StandardOutput.ToString()); - IList metrics = parser.Parse(); - - if (metrics?.Any() == true) - { - this.Logger.LogPerformanceCounters("amd", metrics, startTime, endTime, telemetryContext); - } - } - } - catch - { - await this.LogProcessDetailsAsync(process, EventContext.Persisted()); - throw; - } - } - - await Task.Delay(this.MonitorFrequency).ConfigureAwait(false); - } - } - catch (OperationCanceledException) - { - // Expected whenever ctrl-C is used. - } - catch (Exception exc) - { - this.Logger.LogErrorMessage(exc, telemetryContext, LogLevel.Warning); - } - } - } - } -} \ No newline at end of file diff --git a/src/VirtualClient/VirtualClient.Monitors/AmdSmiQueryGpuParser.cs b/src/VirtualClient/VirtualClient.Monitors/AmdSmiQueryGpuParser.cs deleted file mode 100644 index 24220e17b7..0000000000 --- a/src/VirtualClient/VirtualClient.Monitors/AmdSmiQueryGpuParser.cs +++ /dev/null @@ -1,60 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. - -namespace VirtualClient.Monitors -{ - using System; - using System.Collections.Generic; - using System.Data; - using System.Linq; - using VirtualClient.Contracts; - using DataTableExtensions = VirtualClient.Contracts.DataTableExtensions; - - /// - /// Parser for AmdSmi output document. - /// - public class AmdSmiQueryGpuParser : MetricsParser - { - /// - /// Constructor for - /// - /// Raw text to parse. - public AmdSmiQueryGpuParser(string rawText) - : base(rawText) - { - } - - /// - public override IList Parse() - { - this.Preprocess(); - - // Sanatize non-standard csv tokens in output - string replacedText = this.PreprocessedText.Replace("[0, 0]", "0"); - - List metrics = new List(); - DataTable dataTable = DataTableExtensions.DataTableFromCsv(replacedText); - - foreach (DataRow row in dataTable.Rows) - { - Dictionary metadata = new Dictionary() - { - { "gpu.id", Convert.ToString(row[0]) }, - }; - - // Ingest only the metrics which are exposed at the guest level - metrics.Add(new Metric("utilization.gpu [%]", Convert.ToDouble(row[1]), unit: "%", metadata: metadata)); - metrics.Add(new Metric("framebuffer.total [MB]", Convert.ToDouble(row[4]), unit: "MB", metadata: metadata)); - metrics.Add(new Metric("framebuffer.used [MB]", Convert.ToDouble(row[5]), unit: "MB", metadata: metadata)); - } - - return metrics; - } - - /// - protected override void Preprocess() - { - this.PreprocessedText = this.RawText.Replace("\r\n", Environment.NewLine); - } - } -} diff --git a/website/docs/monitors/0200-monitor-profiles.md b/website/docs/monitors/0200-monitor-profiles.md index d3214e1334..6c8edbde0f 100644 --- a/website/docs/monitors/0200-monitor-profiles.md +++ b/website/docs/monitors/0200-monitor-profiles.md @@ -114,3 +114,44 @@ The monitor profile designed for Nvidia GPU systems. The profile captures counte ./VirtualClient --profile=PERF-GPU-MLPERF.json --profile=MONITORS-GPU-NVIDIA.json --system=Demo --timeout=1440 --packageStore="{BlobConnectionString|SAS Uri}" ``` + ``` +## MONITORS-GPU-AMD.json +The monitor profile designed for AMD GPU systems. The profile captures metrics on systems of AMD GPUs with amd-smi. + +* **Supported Platform/Architectures** + * linux-x64 + * win-x64 + +* **Supported Operating Systems** + * Ubuntu 18 + * Ubuntu 20 + * Ubuntu 22 + +* **Dependencies** + * The system needs to have AMD GPU with ROCM installed. + +* **Scenarios** + * Captures metrics on systems using [amd-smi](./0500-amd-smi.md) + +* **Profile Parameters** + The following parameters can be optionally supplied on the command line to change this default behavior. + + | Parameter | Purpose | Default value | + |---------------------------|---------------------------------------------------------------------------------|---------------| + | Scenario | Optional. A description of the purpose of the monitor within the overall profile workflow. | | + | MonitorFrequency | Optional. Defines the frequency (timespan) at which performance counters will be captured/emitted (e.g. 00:01:00). | 00:05:00 | + | MonitorWarmupPeriod | Optional. Defines a period of time (timespan) to wait before starting to track/capture performance counters (e.g. 00:03:00). This allows the system to get to a more typical operational state and generally results better representation for the counters captured. | 00:05:00 | + | MetricFilter | Optional. A comma-delimited list of performance counter names to capture. The default behavior is to capture/emit all performance counters (e.g. \Processor Information(_Total)\% System Time,\Processor Information(_Total)\% User Time). This allows the profile author to focus on a smaller/specific subset of the counters. This is typically used when a lower monitor frequency is required for higher sample precision to keep the size of the data sets emitted by the Virtual Client to a minimum. | | + +* **Usage Examples** + The following section provides a few basic examples of how to use the monitor profile. Additional usage examples can be found in the + 'Usage Scenarios/Examples' link at the top. + + ``` bash + # Run the monitoring facilities only. + ./VirtualClient --profile=MONITORS-GPU-AMD.json + + # Monitor profile explicitly defined. + ./VirtualClient --profile=PERF-GPU-3DMARK-AMD.json --profile=MONITORS-GPU-AMD.json --system=Demo --timeout=1440 --packageStore="{BlobConnectionString|SAS Uri}" + + ```