From 3ce14c78f0f1742bc15eb5b61838479cc3d76ddc Mon Sep 17 00:00:00 2001 From: Charles Torre Date: Wed, 31 Jan 2024 14:17:20 -0800 Subject: [PATCH 01/11] FO 3.2.15 dev --- Build-SFPkgs.ps1 | 8 +- .../PackageRoot/Config/Settings.xml | 8 +- .../PackageRoot/Data/Plugins/Readme.txt | 2 +- .../ApplicationManifest.xml | 2 +- .../Deployment/service-fabric-observer.json | 6 +- ...e-fabric-observer.v3.2.15.parameters.json} | 2 +- Documentation/OperationalTelemetry.md | 4 +- Documentation/Plugins.md | 2 +- Documentation/Using.md | 4 +- FabricObserver.Extensibility.nuspec.template | 4 +- .../FabricObserver.Extensibility.csproj | 4 +- FabricObserver.nuspec.template | 9 +- FabricObserver.sln | 4 +- FabricObserver/FabricObserver.csproj | 4 +- FabricObserver/Observers/AppObserver.cs | 90 ++----------------- FabricObserver/Observers/ObserverManager.cs | 3 +- .../PackageRoot/Config/Settings.xml | 8 +- .../PackageRoot/Data/Plugins/Readme.txt | 6 +- .../PackageRoot/ServiceManifest.xml | 8 +- .../PackageRoot/ServiceManifest_linux.xml | 8 +- .../ApplicationManifest.xml | 24 ++--- README.md | 12 +-- .../SampleObserverPlugin.csproj | 6 +- XmlDiffPatchSF/Program.cs | 2 +- foextlib.md | 2 +- fonuget.md | 2 +- 26 files changed, 76 insertions(+), 158 deletions(-) rename Documentation/Deployment/{service-fabric-observer.v3.2.13.parameters.json => service-fabric-observer.v3.2.15.parameters.json} (93%) diff --git a/Build-SFPkgs.ps1 b/Build-SFPkgs.ps1 index 6bd4a717..c15cda10 100644 --- a/Build-SFPkgs.ps1 +++ b/Build-SFPkgs.ps1 @@ -23,11 +23,11 @@ function Build-SFPkg { try { Push-Location $scriptPath - Build-SFPkg "Microsoft.ServiceFabricApps.FabricObserver.Linux.SelfContained.3.2.14" "$scriptPath\bin\release\FabricObserver\linux-x64\self-contained\FabricObserverType" - Build-SFPkg "Microsoft.ServiceFabricApps.FabricObserver.Linux.FrameworkDependent.3.2.14" "$scriptPath\bin\release\FabricObserver\linux-x64\framework-dependent\FabricObserverType" + Build-SFPkg "Microsoft.ServiceFabricApps.FabricObserver.Linux.SelfContained.3.2.15" "$scriptPath\bin\release\FabricObserver\linux-x64\self-contained\FabricObserverType" + Build-SFPkg "Microsoft.ServiceFabricApps.FabricObserver.Linux.FrameworkDependent.3.2.15" "$scriptPath\bin\release\FabricObserver\linux-x64\framework-dependent\FabricObserverType" - Build-SFPkg "Microsoft.ServiceFabricApps.FabricObserver.Windows.SelfContained.3.2.14" "$scriptPath\bin\release\FabricObserver\win-x64\self-contained\FabricObserverType" - Build-SFPkg "Microsoft.ServiceFabricApps.FabricObserver.Windows.FrameworkDependent.3.2.14" "$scriptPath\bin\release\FabricObserver\win-x64\framework-dependent\FabricObserverType" + Build-SFPkg "Microsoft.ServiceFabricApps.FabricObserver.Windows.SelfContained.3.2.15" "$scriptPath\bin\release\FabricObserver\win-x64\self-contained\FabricObserverType" + Build-SFPkg "Microsoft.ServiceFabricApps.FabricObserver.Windows.FrameworkDependent.3.2.15" "$scriptPath\bin\release\FabricObserver\win-x64\framework-dependent\FabricObserverType" } finally { Pop-Location diff --git a/ClusterObserver/PackageRoot/Config/Settings.xml b/ClusterObserver/PackageRoot/Config/Settings.xml index dbb6d99f..8688518e 100644 --- a/ClusterObserver/PackageRoot/Config/Settings.xml +++ b/ClusterObserver/PackageRoot/Config/Settings.xml @@ -23,15 +23,15 @@ - + + NOTE: Value must now be set in ApplicationManifest.xml. This is a *breaking change* in version 3.2.15. --> + NOTE: Value must now be set in ApplicationManifest.xml. This is a *breaking change* in version 3.2.15.--> @@ -39,7 +39,7 @@ + NOTE: Values must now be set in ApplicationManifest.xml. This is a *breaking change* in version 3.2.15. --> diff --git a/ClusterObserver/PackageRoot/Data/Plugins/Readme.txt b/ClusterObserver/PackageRoot/Data/Plugins/Readme.txt index 4ad27483..461cf7e0 100644 --- a/ClusterObserver/PackageRoot/Data/Plugins/Readme.txt +++ b/ClusterObserver/PackageRoot/Data/Plugins/Readme.txt @@ -68,5 +68,5 @@ cd C:\Users\me\source\repos\service-fabric-observer ./Build-FabricObserver ./Build-NugetPackages -The output from the above commands, FabricObserver platform-specific nupkgs and a package you have to use for plugin authoring named Microsoft.ServiceFabricApps.FabricObserver.Extensibility.3.2.14.nupkg, would be located in +The output from the above commands, FabricObserver platform-specific nupkgs and a package you have to use for plugin authoring named Microsoft.ServiceFabricApps.FabricObserver.Extensibility.3.2.15.nupkg, would be located in C:\Users\me\source\repos\service-fabric-observer\bin\release\FabricObserver\Nugets. \ No newline at end of file diff --git a/ClusterObserverApp/ApplicationPackageRoot/ApplicationManifest.xml b/ClusterObserverApp/ApplicationPackageRoot/ApplicationManifest.xml index 1bca1d7c..41897bbd 100644 --- a/ClusterObserverApp/ApplicationPackageRoot/ApplicationManifest.xml +++ b/ClusterObserverApp/ApplicationPackageRoot/ApplicationManifest.xml @@ -11,8 +11,8 @@ - + diff --git a/Documentation/Deployment/service-fabric-observer.json b/Documentation/Deployment/service-fabric-observer.json index 546bddc3..5e5e6b8e 100644 --- a/Documentation/Deployment/service-fabric-observer.json +++ b/Documentation/Deployment/service-fabric-observer.json @@ -11,16 +11,16 @@ }, "applicationTypeVersionFabricObserver": { "type": "string", - "defaultValue": "3.2.14", + "defaultValue": "3.2.15", "metadata": { - "description": "Provide the app version number of FabricObserver. This must be identical to the version, 3.2.14, in the referenced sfpkg specified in packageUrlFabricObserver." + "description": "Provide the app version number of FabricObserver. This must be identical to the version, 3.2.15, in the referenced sfpkg specified in packageUrlFabricObserver." } }, "packageUrlFabricObserver": { "type": "string", "defaultValue": "", "metadata": { - "description": "This has to be a public accessible URL for the sfpkg file which contains the FabricObserver app package. Example: https://github.com/microsoft/service-fabric-observer/releases/download/[xxxxxxxx]/Microsoft.ServiceFabricApps.FabricObserver.Windows.SelfContained.3.2.14.sfpkg" + "description": "This has to be a public accessible URL for the sfpkg file which contains the FabricObserver app package. Example: https://github.com/microsoft/service-fabric-observer/releases/download/[xxxxxxxx]/Microsoft.ServiceFabricApps.FabricObserver.Windows.SelfContained.3.2.15.sfpkg" } } }, diff --git a/Documentation/Deployment/service-fabric-observer.v3.2.13.parameters.json b/Documentation/Deployment/service-fabric-observer.v3.2.15.parameters.json similarity index 93% rename from Documentation/Deployment/service-fabric-observer.v3.2.13.parameters.json rename to Documentation/Deployment/service-fabric-observer.v3.2.15.parameters.json index 8ad6fdc6..6fbb487d 100644 --- a/Documentation/Deployment/service-fabric-observer.v3.2.13.parameters.json +++ b/Documentation/Deployment/service-fabric-observer.v3.2.15.parameters.json @@ -6,7 +6,7 @@ "value": "" }, "applicationTypeVersionFabricObserver": { - "value": "3.2.14" + "value": "3.2.15" }, "packageUrlFabricObserver": { "value": "" diff --git a/Documentation/OperationalTelemetry.md b/Documentation/OperationalTelemetry.md index 4cc237e9..1ec0f62b 100644 --- a/Documentation/OperationalTelemetry.md +++ b/Documentation/OperationalTelemetry.md @@ -18,7 +18,7 @@ As with most of FabricObserver's application settings, you can also do this with Connect-ServiceFabricCluster ... $appParams = @{ "ObserverManagerEnableOperationalFOTelemetry" = "false"; } -Start-ServiceFabricApplicationUpgrade -ApplicationName fabric:/FabricObserver -ApplicationParameter $appParams -ApplicationTypeVersion 3.2.14 -UnMonitoredAuto +Start-ServiceFabricApplicationUpgrade -ApplicationName fabric:/FabricObserver -ApplicationParameter $appParams -ApplicationTypeVersion 3.2.15 -UnMonitoredAuto ``` @@ -44,7 +44,7 @@ Here is a full example of exactly what is sent in one of these telemetry events, "ClusterId": "00000000-1111-1111-0000-00f00d000d", "ClusterType": "SFRP", "NodeNameHash": "3e83569d4c6aad78083cd081215dafc81e5218556b6a46cb8dd2b183ed0095ad", - "FOVersion": "3.2.14", + "FOVersion": "3.2.15", "HasPlugins": "False", "SFRuntimeVersion":"9.0.1028.9590" "UpTime": "1.00:30:18.8058379", diff --git a/Documentation/Plugins.md b/Documentation/Plugins.md index a3d0c3ab..660202df 100644 --- a/Documentation/Plugins.md +++ b/Documentation/Plugins.md @@ -72,5 +72,5 @@ cd C:\Users\me\source\repos\service-fabric-observer ./Build-FabricObserver ./Build-NugetPackages ``` -The output from the above commands contains FabricObserver platform-specific nupkgs and a nupkg you have to use for plugin authoring named Microsoft.ServiceFabricApps.FabricObserver.Extensibility.3.2.14.nupkg. Nuget packages will be located in +The output from the above commands contains FabricObserver platform-specific nupkgs and a nupkg you have to use for plugin authoring named Microsoft.ServiceFabricApps.FabricObserver.Extensibility.3.2.15.nupkg. Nuget packages will be located in C:\Users\me\source\repos\service-fabric-observer\bin\release\FabricObserver\Nugets. \ No newline at end of file diff --git a/Documentation/Using.md b/Documentation/Using.md index 067cfb8b..3f114937 100644 --- a/Documentation/Using.md +++ b/Documentation/Using.md @@ -710,7 +710,7 @@ $appParams = @{ "FabricSystemObserverEnabled" = "true"; "FabricSystemObserverMem Then execute the application upgrade with ```Powershell -Start-ServiceFabricApplicationUpgrade -ApplicationName fabric:/FabricObserver -ApplicationTypeVersion 3.2.14 -ApplicationParameter $appParams -Monitored -FailureAction rollback +Start-ServiceFabricApplicationUpgrade -ApplicationName fabric:/FabricObserver -ApplicationTypeVersion 3.2.15 -ApplicationParameter $appParams -Monitored -FailureAction rollback ``` **Important**: This action will overwrite previous app paramemter changes that were made in an earlier application upgrade, for example. If you want to preserve any earlier changes, then you will need to @@ -718,7 +718,7 @@ supply those parameter values again along with the new ones. You do this in the ```PowerShell $appName = "fabric:/FabricObserver" -$appVersion = "3.2.14" +$appVersion = "3.2.15" $application = Get-ServiceFabricApplication -ApplicationName $appName $appParamCollection = $application.ApplicationParameters $applicationParameterMap = @{} diff --git a/FabricObserver.Extensibility.nuspec.template b/FabricObserver.Extensibility.nuspec.template index 64388e55..484c7a70 100644 --- a/FabricObserver.Extensibility.nuspec.template +++ b/FabricObserver.Extensibility.nuspec.template @@ -2,9 +2,9 @@ %PACKAGE_ID% - 3.2.14 + 3.2.15 -This is version is required for observer plugins that target FabricObserver 3.2.14 or ClusterObserver 2.2.8. +This is version is required for observer plugins that target FabricObserver 3.2.15 or ClusterObserver 2.2.8. #### Changes - Updated Logger implementatio to fix app param update bug related to log path and max archive lifetime settings not being honored. - Updated ObserverBase to include guard rails to prevent high CPU consumption if an observer's MonitorDuration or MonitorSleepTimeMilliseconds settings are misconfigured (to high or too low, respectively). diff --git a/FabricObserver.Extensibility/FabricObserver.Extensibility.csproj b/FabricObserver.Extensibility/FabricObserver.Extensibility.csproj index 07d9f72a..c43ed403 100644 --- a/FabricObserver.Extensibility/FabricObserver.Extensibility.csproj +++ b/FabricObserver.Extensibility/FabricObserver.Extensibility.csproj @@ -5,8 +5,8 @@ FabricObserver Copyright © 2023 FabricObserver - 3.2.14 - 3.2.14 + 3.2.15 + 3.2.15 CA1416 diff --git a/FabricObserver.nuspec.template b/FabricObserver.nuspec.template index d0fdd23a..f34be7b5 100644 --- a/FabricObserver.nuspec.template +++ b/FabricObserver.nuspec.template @@ -2,12 +2,9 @@ %PACKAGE_ID% - 3.2.14 + 3.2.15 -- *Breaking Change*: Telemetry configuration settings are now required to be overridden in ApplicationManifest.xml to support versionless, parameter-only application upgrades for telemetry settings. See [Issue 292](https://github.com/microsoft/service-fabric-observer/issues/292) for details. Just move your related settings' Value strings from Settings.xml to ApplicationManifest.xml app parameter (the names of these settings are the same). -- Bug fixes and performance improvements to address AppObserver (in concurrency mode) consuming high CPU if misconfigured. There are now throttles and guardrails in place that will override any related user configuration that is deemed "dangerous" with respect to CPU consumption. -- Bug fix in app param update for log path and max archive lifetime settings. -- Updated nuget package dependencies to latest versions. +- Code cleanup and refactoring. Microsoft MIT @@ -16,7 +13,7 @@ icon.png fonuget.md en-US - This package contains the FabricObserver(FO) Application - built for .NET 6.0 and SF Runtime 9.x. FO a highly configurable and extensible resource usage watchdog service that is designed to be run in Azure Service Fabric Windows and Linux clusters. This package contains the entire application and can be used to build .NET Standard 2.0 observer plugins. NOTE: If you want to target .NET 6 for your plugins, then you must use Microsoft.ServiceFabricApps.FabricObserver.Extensibility.3.2.14 nuget package to build them. + This package contains the FabricObserver(FO) Application - built for .NET 6.0 and SF Runtime 9.x. FO a highly configurable and extensible resource usage watchdog service that is designed to be run in Azure Service Fabric Windows and Linux clusters. This package contains the entire application and can be used to build .NET Standard 2.0 observer plugins. NOTE: If you want to target .NET 6 for your plugins, then you must use Microsoft.ServiceFabricApps.FabricObserver.Extensibility.3.2.15 nuget package to build them. diff --git a/FabricObserver.sln b/FabricObserver.sln index 9374ff23..2b2a0050 100644 --- a/FabricObserver.sln +++ b/FabricObserver.sln @@ -35,10 +35,10 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution README.md = README.md SECURITY.md = SECURITY.md Documentation\Deployment\service-fabric-cluster-observer.json = Documentation\Deployment\service-fabric-cluster-observer.json - Documentation\Deployment\service-fabric-observer.json = Documentation\Deployment\service-fabric-observer.json Documentation\Deployment\service-fabric-cluster-observer.v2.2.8.parameters.json = Documentation\Deployment\service-fabric-cluster-observer.v2.2.8.parameters.json - Documentation\Deployment\service-fabric-observer.v3.2.13.parameters.json = Documentation\Deployment\service-fabric-observer.v3.2.13.parameters.json + Documentation\Deployment\service-fabric-observer.json = Documentation\Deployment\service-fabric-observer.json Documentation\Using.md = Documentation\Using.md + Documentation\Deployment\service-fabric-observer.v3.2.15.parameters.json = Documentation\Deployment\service-fabric-observer.v3.2.15.parameters.json EndProjectSection EndProject Project("{A07B5EB6-E848-4116-A8D0-A826331D98C6}") = "ClusterObserverApp", "ClusterObserverApp\ClusterObserverApp.sfproj", "{BD5D216F-5F89-4CC4-92FD-D6FDEC5A19AD}" diff --git a/FabricObserver/FabricObserver.csproj b/FabricObserver/FabricObserver.csproj index 2d0dca0e..2e762ee8 100644 --- a/FabricObserver/FabricObserver.csproj +++ b/FabricObserver/FabricObserver.csproj @@ -11,8 +11,8 @@ True Copyright © 2022 FabricObserver - 3.2.14 - 3.2.14 + 3.2.15 + 3.2.15 true true FabricObserver.Program diff --git a/FabricObserver/Observers/AppObserver.cs b/FabricObserver/Observers/AppObserver.cs index 35fe0d72..ee53df2d 100644 --- a/FabricObserver/Observers/AppObserver.cs +++ b/FabricObserver/Observers/AppObserver.cs @@ -395,12 +395,6 @@ public override Task ReportAsync(CancellationToken token) } childProcCount = ProcessChildProcs(ref childProcDictionary, ref childProcessTelemetryDataList, repOrInst, app, ref parentFrud, token); - - // Remove children from resource metric dictionary (we don't want to report on the child procs individually). - foreach (var item in childProcDictionary) - { - _ = AllAppCpuData.TryRemove(item); - } } // Parent's and aggregated (summed) descendant process data (if any). @@ -441,12 +435,6 @@ public override Task ReportAsync(CancellationToken token) } childProcCount = ProcessChildProcs(ref childProcDictionary, ref childProcessTelemetryDataList, repOrInst, app, ref parentFrud, token); - - // Remove children from resource metric dictionary (we don't want to report on the child procs individually). - foreach (var item in childProcDictionary) - { - _ = AllAppMemDataMb.TryRemove(item); - } } ProcessResourceDataReportHealth( @@ -486,12 +474,6 @@ public override Task ReportAsync(CancellationToken token) } childProcCount = ProcessChildProcs(ref childProcDictionary, ref childProcessTelemetryDataList, repOrInst, app, ref parentFrud, token); - - // Remove children from resource metric dictionary (we don't want to report on the child procs individually). - foreach (var item in childProcDictionary) - { - _ = AllAppMemDataPercent.TryRemove(item); - } } ProcessResourceDataReportHealth( @@ -533,12 +515,6 @@ public override Task ReportAsync(CancellationToken token) } childProcCount = ProcessChildProcs(ref childProcDictionary, ref childProcessTelemetryDataList, repOrInst, app, ref parentFrud, token); - - // Remove children from resource metric dictionary (we don't want to report on the child procs individually). - foreach (var item in childProcDictionary) - { - _ = AllAppPrivateBytesDataMb.TryRemove(item); - } } ProcessResourceDataReportHealth( @@ -581,12 +557,6 @@ public override Task ReportAsync(CancellationToken token) } childProcCount = ProcessChildProcs(ref childProcDictionary, ref childProcessTelemetryDataList, repOrInst, app, ref parentFrud, token); - - // Remove children from resource metric dictionary (we don't want to report on the child procs individually). - foreach (var item in childProcDictionary) - { - _ = AllAppPrivateBytesDataPercent.TryRemove(item); - } } ProcessResourceDataReportHealth( @@ -630,12 +600,6 @@ public override Task ReportAsync(CancellationToken token) } childProcCount = ProcessChildProcs(ref childProcDictionary, ref childProcessTelemetryDataList, repOrInst, app, ref parentFrud, token); - - // Remove children from resource metric dictionary (we don't want to report on the child procs individually). - foreach (var item in childProcDictionary) - { - _ = AllAppRGMemoryUsagePercent.TryRemove(item); - } } ProcessResourceDataReportHealth( @@ -675,12 +639,6 @@ public override Task ReportAsync(CancellationToken token) } childProcCount = ProcessChildProcs(ref childProcDictionary, ref childProcessTelemetryDataList, repOrInst, app, ref parentFrud, token); - - // Remove children from resource metric dictionary (we don't want to report on the child procs individually). - foreach (var item in childProcDictionary) - { - _ = AllAppRGMemoryUsagePercent.TryRemove(item); - } } ProcessResourceDataReportHealth( @@ -720,13 +678,7 @@ public override Task ReportAsync(CancellationToken token) _ = childProcDictionary.TryAdd(frud.Key, frud.Value); } - childProcCount = ProcessChildProcs(ref childProcDictionary, ref childProcessTelemetryDataList, repOrInst, app, ref parentFrud, token); - - // Remove children from resource metric dictionary (we don't want to report on the child procs individually). - foreach (var item in childProcDictionary) - { - _ = AllAppTotalActivePortsData.TryRemove(item); - } + childProcCount = ProcessChildProcs(ref childProcDictionary, ref childProcessTelemetryDataList, repOrInst, app, ref parentFrud, token); } ProcessResourceDataReportHealth( @@ -765,13 +717,7 @@ public override Task ReportAsync(CancellationToken token) _ = childProcDictionary.TryAdd(frud.Key, frud.Value); } - childProcCount = ProcessChildProcs(ref childProcDictionary, ref childProcessTelemetryDataList, repOrInst, app, ref parentFrud, token); - - // Remove children from resource metric dictionary (we don't want to report on the child procs individually). - foreach (var item in childProcDictionary) - { - _ = AllAppEphemeralPortsData.TryRemove(item); - } + childProcCount = ProcessChildProcs(ref childProcDictionary, ref childProcessTelemetryDataList, repOrInst, app, ref parentFrud, token); } ProcessResourceDataReportHealth( @@ -810,13 +756,7 @@ public override Task ReportAsync(CancellationToken token) _ = childProcDictionary.TryAdd(frud.Key, frud.Value); } - childProcCount = ProcessChildProcs(ref childProcDictionary, ref childProcessTelemetryDataList, repOrInst, app, ref parentFrud, token); - - // Remove children from resource metric dictionary (we don't want to report on the child procs individually). - foreach (var item in childProcDictionary) - { - _ = AllAppEphemeralPortsDataPercent.TryRemove(item); - } + childProcCount = ProcessChildProcs(ref childProcDictionary, ref childProcessTelemetryDataList, repOrInst, app, ref parentFrud, token); } ProcessResourceDataReportHealth( @@ -855,13 +795,7 @@ public override Task ReportAsync(CancellationToken token) _ = childProcDictionary.TryAdd(frud.Key, frud.Value); } - childProcCount = ProcessChildProcs(ref childProcDictionary, ref childProcessTelemetryDataList, repOrInst, app, ref parentFrud, token); - - // Remove children from resource metric dictionary (we don't want to report on the child procs individually). - foreach (var item in childProcDictionary) - { - _ = AllAppHandlesData.TryRemove(item); - } + childProcCount = ProcessChildProcs(ref childProcDictionary, ref childProcessTelemetryDataList, repOrInst, app, ref parentFrud, token); } ProcessResourceDataReportHealth( @@ -900,13 +834,7 @@ public override Task ReportAsync(CancellationToken token) _ = childProcDictionary.TryAdd(frud.Key, frud.Value); } - childProcCount = ProcessChildProcs(ref childProcDictionary, ref childProcessTelemetryDataList, repOrInst, app, ref parentFrud, token); - - // Remove children from resource metric dictionary (we don't want to report on the child procs individually). - foreach (var item in childProcDictionary) - { - _ = AllAppThreadsData.TryRemove(item); - } + childProcCount = ProcessChildProcs(ref childProcDictionary, ref childProcessTelemetryDataList, repOrInst, app, ref parentFrud, token); } ProcessResourceDataReportHealth( @@ -945,13 +873,7 @@ public override Task ReportAsync(CancellationToken token) _ = childProcDictionary.TryAdd(frud.Key, frud.Value); } - childProcCount = ProcessChildProcs(ref childProcDictionary, ref childProcessTelemetryDataList, repOrInst, app, ref parentFrud, token); - - // Remove children from resource metric dictionary (we don't want to report on the child procs individually). - foreach (var item in childProcDictionary) - { - _ = AllAppKvsLvidsData.TryRemove(item); - } + childProcCount = ProcessChildProcs(ref childProcDictionary, ref childProcessTelemetryDataList, repOrInst, app, ref parentFrud, token); } // FO will warn if the stateful (Actor, for example) service process has used 75% or greater of available LVIDs. This is not configurable (and a temporary feature). diff --git a/FabricObserver/Observers/ObserverManager.cs b/FabricObserver/Observers/ObserverManager.cs index 7ccd4a2f..88ef7d99 100644 --- a/FabricObserver/Observers/ObserverManager.cs +++ b/FabricObserver/Observers/ObserverManager.cs @@ -23,7 +23,6 @@ using System.Runtime; using FabricObserver.Utilities.ServiceFabric; using ConfigurationSettings = System.Fabric.Description.ConfigurationSettings; -using Microsoft.VisualBasic; namespace FabricObserver.Observers { @@ -54,7 +53,7 @@ private List Observers private CancellationTokenSource linkedSFRuntimeObserverTokenSource; // Folks often use their own version numbers. This is for internal diagnostic telemetry. - private const string InternalVersionNumber = "3.2.14"; + private const string InternalVersionNumber = "3.2.15"; private static FabricClient FabricClientInstance => FabricClientUtilities.FabricClientSingleton; diff --git a/FabricObserver/PackageRoot/Config/Settings.xml b/FabricObserver/PackageRoot/Config/Settings.xml index 52c01e9a..26abbf3d 100644 --- a/FabricObserver/PackageRoot/Config/Settings.xml +++ b/FabricObserver/PackageRoot/Config/Settings.xml @@ -50,15 +50,15 @@ but you can implement whatever provider you want. See IObserverTelemetry interface. --> - + + NOTE: Value must now be set in ApplicationManifest.xml. This is a *breaking change* in version 3.2.15. --> + NOTE: Value must now be set in ApplicationManifest.xml. This is a *breaking change* in version 3.2.15.--> @@ -66,7 +66,7 @@ + NOTE: Values must now be set in ApplicationManifest.xml. This is a *breaking change* in version 3.2.15. --> diff --git a/FabricObserver/PackageRoot/Data/Plugins/Readme.txt b/FabricObserver/PackageRoot/Data/Plugins/Readme.txt index e61ec389..b087d9aa 100644 --- a/FabricObserver/PackageRoot/Data/Plugins/Readme.txt +++ b/FabricObserver/PackageRoot/Data/Plugins/Readme.txt @@ -7,8 +7,8 @@ Note that the observer API lives in its own library, FabricObserver.Extensibilit 1. Create a new .NET 6 Library project. 2. Install the same version of the Microsoft.ServiceFabricApps.FabricObserver.Extensibility nupkg from https://www.nuget.org/profiles/ServiceFabricApps as the version of FabricObserver you are deploying. - E.g., 3.2.14 if you are going to deploy FO 3.2.14. - NOTE: You can also consume the entire FabricObserver 3.2.14 nupkg to build your plugin. Please see the SampleObserverPlugin project's csproj file for more information. + E.g., 3.2.15 if you are going to deploy FO 3.2.15. + NOTE: You can also consume the entire FabricObserver 3.2.15 nupkg to build your plugin. Please see the SampleObserverPlugin project's csproj file for more information. 3. Write an observer! @@ -68,5 +68,5 @@ cd C:\Users\me\source\repos\service-fabric-observer ./Build-FabricObserver ./Build-NugetPackages -The output from the above commands contains FabricObserver platform-specific nupkgs and a package you have to use for plugin authoring named Microsoft.ServiceFabricApps.FabricObserver.Extensibility.3.2.14.nupkg. Nupkg files from above command would be located in +The output from the above commands contains FabricObserver platform-specific nupkgs and a package you have to use for plugin authoring named Microsoft.ServiceFabricApps.FabricObserver.Extensibility.3.2.15.nupkg. Nupkg files from above command would be located in C:\Users\me\source\repos\service-fabric-observer\bin\release\FabricObserver\Nugets. \ No newline at end of file diff --git a/FabricObserver/PackageRoot/ServiceManifest.xml b/FabricObserver/PackageRoot/ServiceManifest.xml index 6f62098b..06043154 100644 --- a/FabricObserver/PackageRoot/ServiceManifest.xml +++ b/FabricObserver/PackageRoot/ServiceManifest.xml @@ -1,6 +1,6 @@  @@ -9,7 +9,7 @@ This name must match the string used in RegisterServiceType call in Program.cs. --> - + install_lvid_perfcounter.bat @@ -25,10 +25,10 @@ - + - + \ No newline at end of file diff --git a/FabricObserver/PackageRoot/ServiceManifest_linux.xml b/FabricObserver/PackageRoot/ServiceManifest_linux.xml index 445d1db7..93a813d0 100644 --- a/FabricObserver/PackageRoot/ServiceManifest_linux.xml +++ b/FabricObserver/PackageRoot/ServiceManifest_linux.xml @@ -1,6 +1,6 @@  @@ -11,7 +11,7 @@ - + setcaps.sh @@ -27,10 +27,10 @@ - + - + \ No newline at end of file diff --git a/FabricObserverApp/ApplicationPackageRoot/ApplicationManifest.xml b/FabricObserverApp/ApplicationPackageRoot/ApplicationManifest.xml index ed202917..3f0ff899 100644 --- a/FabricObserverApp/ApplicationPackageRoot/ApplicationManifest.xml +++ b/FabricObserverApp/ApplicationPackageRoot/ApplicationManifest.xml @@ -1,9 +1,9 @@  - + - + @@ -22,8 +22,8 @@ This is primarily useful for scenarios where you have multiple instances of FO running on a node and you want to ensure that each instance writes ETW event data to different named sinks. This is more of an advanced scenario. In general, just leave this blank. --> - + @@ -152,7 +152,7 @@ - + @@ -241,15 +241,15 @@ - + + + - + @@ -279,8 +279,8 @@ - + +
@@ -447,9 +447,9 @@ - - --> + diff --git a/README.md b/README.md index 4c1a347a..c4023b06 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -## FabricObserver 3.2.14 +## FabricObserver 3.2.15 [![Deploy to Azure](https://aka.ms/deploytoazurebutton)](https://portal.azure.com/#create/Microsoft.Template/uri/https%3A%2F%2Fraw.githubusercontent.com%2Fmicrosoft%2Fservice-fabric-observer%2Fmain%2FDocumentation%2FDeployment%2Fservice-fabric-observer.json) @@ -87,7 +87,7 @@ see [FOAzurePipeline.yaml](/FOAzurePipeline.yaml) for msazure devops build tasks .net6 installed (if you deploy VM images from Azure gallery, then they will not have .net6 installed), then you must deploy the SelfContained package. ### Deploy FabricObserver -**Note: You must deploy this version (3.2.14) to clusters that are running SF 9.0 and above. This version also requires .NET 6.** +**Note: You must deploy this version (3.2.15) to clusters that are running SF 9.0 and above. This version also requires .NET 6.** You can deploy FabricObserver (and ClusterObserver) using Visual Studio (if you build the sources yourself), PowerShell or ARM. Please note that this version of FabricObserver no longer supports the DefaultServices node in ApplicationManifest.xml. This means that should you deploy using PowerShell, you must create an instance of the service as the last command in your script. This was done to support ARM deployment, specifically. The StartupServices.xml file you see in the FabricHealerApp project now contains the service information once held in ApplicationManifest's DefaultServices node. Note that this information is primarily useful for deploying from Visual Studio. @@ -127,15 +127,15 @@ Connect-ServiceFabricCluster -ConnectionEndpoint @('sf-win-cluster.westus2.cloud #Copy $path contents (FO app package) to server: -Copy-ServiceFabricApplicationPackage -ApplicationPackagePath $path -CompressPackage -ApplicationPackagePathInImageStore FO3214 -TimeoutSec 1800 +Copy-ServiceFabricApplicationPackage -ApplicationPackagePath $path -CompressPackage -ApplicationPackagePathInImageStore FO3215 -TimeoutSec 1800 #Register FO ApplicationType: -Register-ServiceFabricApplicationType -ApplicationPathInImageStore FO3214 +Register-ServiceFabricApplicationType -ApplicationPathInImageStore FO3215 #Create FO application (if not already deployed at lesser version): -New-ServiceFabricApplication -ApplicationName fabric:/FabricObserver -ApplicationTypeName FabricObserverType -ApplicationTypeVersion 3.2.14 +New-ServiceFabricApplication -ApplicationName fabric:/FabricObserver -ApplicationTypeName FabricObserverType -ApplicationTypeVersion 3.2.15 #Create the Service instances (-1 means all nodes, which is what is required for FO): @@ -143,7 +143,7 @@ New-ServiceFabricService -Stateless -PartitionSchemeSingleton -ApplicationName f #OR if updating existing version: -Start-ServiceFabricApplicationUpgrade -ApplicationName fabric:/FabricObserver -ApplicationTypeVersion 3.2.14 -Monitored -FailureAction rollback +Start-ServiceFabricApplicationUpgrade -ApplicationName fabric:/FabricObserver -ApplicationTypeVersion 3.2.15 -Monitored -FailureAction rollback ``` ## Observer Model diff --git a/SampleObserverPlugin/SampleObserverPlugin.csproj b/SampleObserverPlugin/SampleObserverPlugin.csproj index 634d0fd3..23326888 100644 --- a/SampleObserverPlugin/SampleObserverPlugin.csproj +++ b/SampleObserverPlugin/SampleObserverPlugin.csproj @@ -19,17 +19,17 @@ - - - + diff --git a/XmlDiffPatchSF/Program.cs b/XmlDiffPatchSF/Program.cs index 0a5343e7..4f6ddf18 100644 --- a/XmlDiffPatchSF/Program.cs +++ b/XmlDiffPatchSF/Program.cs @@ -28,7 +28,7 @@ private static void Main(string[] args) "preceding the file extension.\n\n" + "**Note, if you have observer plugins, then you must supply true for [mergeExistingNodes] as the last argument to pull over your plugin settings as part of the merge.**.\n\n" + "Example:\n\n" + - "DiffPatchXml \"C:\\repos\\FO\\3.1.26\\configs\\ApplicationManifest.xml\" \"C:\\repos\\FO\\3.2.14\\configs\\ApplicationManifest.xml\"\n"); + "DiffPatchXml \"C:\\repos\\FO\\3.1.26\\configs\\ApplicationManifest.xml\" \"C:\\repos\\FO\\3.2.15\\configs\\ApplicationManifest.xml\"\n"); return; } diff --git a/foextlib.md b/foextlib.md index acef99fc..d3cbd958 100644 --- a/foextlib.md +++ b/foextlib.md @@ -1,4 +1,4 @@ -## FabricObserver Extensibility Library 3.2.14 +## FabricObserver Extensibility Library 3.2.15 FabricObserver.Extensibility is a .NET 6 library for building custom observers that extend FabricObserver's capabilities to match your needs. A custom observer is managed just like a built-in observer. diff --git a/fonuget.md b/fonuget.md index 9311e288..bd1f40f3 100644 --- a/fonuget.md +++ b/fonuget.md @@ -1,4 +1,4 @@ -## FabricObserver 3.2.14 +## FabricObserver 3.2.15 [**FabricObserver (FO)**](https://github.com/microsoft/service-fabric-observer) is a production-ready watchdog service with an easy-to-use extensibility model, written as a stateless, singleton Service Fabric **.NET 6** application that by default From 4cf1b7d28782993ce714a64acffebcab0f3cf884 Mon Sep 17 00:00:00 2001 From: Charles Torre Date: Wed, 31 Jan 2024 15:23:19 -0800 Subject: [PATCH 02/11] Fixed RG and typos. --- FabricObserver/PackageRoot/Config/Settings.xml | 4 ++-- .../ApplicationPackageRoot/ApplicationManifest.xml | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/FabricObserver/PackageRoot/Config/Settings.xml b/FabricObserver/PackageRoot/Config/Settings.xml index 26abbf3d..d4ba5ff0 100644 --- a/FabricObserver/PackageRoot/Config/Settings.xml +++ b/FabricObserver/PackageRoot/Config/Settings.xml @@ -74,9 +74,9 @@ - - --> + diff --git a/FabricObserverApp/ApplicationPackageRoot/ApplicationManifest.xml b/FabricObserverApp/ApplicationPackageRoot/ApplicationManifest.xml index 3f0ff899..51d33994 100644 --- a/FabricObserverApp/ApplicationPackageRoot/ApplicationManifest.xml +++ b/FabricObserverApp/ApplicationPackageRoot/ApplicationManifest.xml @@ -2,8 +2,8 @@ - + @@ -22,8 +22,8 @@ This is primarily useful for scenarios where you have multiple instances of FO running on a node and you want to ensure that each instance writes ETW event data to different named sinks. This is more of an advanced scenario. In general, just leave this blank. --> - + @@ -75,7 +75,7 @@ - + @@ -278,9 +278,6 @@ - - -
@@ -439,6 +436,9 @@ + + +
From 5e783d3e63a5e415364a12fd51d7debeb8626c13 Mon Sep 17 00:00:00 2001 From: Charles Torre Date: Fri, 2 Feb 2024 16:19:26 -0800 Subject: [PATCH 03/11] dev --- ClusterObserver.nuspec.template | 2 +- .../ApplicationManifest.xml | 2 +- FabricObserver.Extensibility.nuspec.template | 25 ++++++++----------- FabricObserver.Extensibility/ObserverBase.cs | 8 +++--- .../SampleObserverPlugin.csproj | 8 +++--- 5 files changed, 20 insertions(+), 25 deletions(-) diff --git a/ClusterObserver.nuspec.template b/ClusterObserver.nuspec.template index ee38a3b6..3ab3aea6 100644 --- a/ClusterObserver.nuspec.template +++ b/ClusterObserver.nuspec.template @@ -22,7 +22,7 @@ - + https://aka.ms/sf/FabricObserver diff --git a/ClusterObserverApp/ApplicationPackageRoot/ApplicationManifest.xml b/ClusterObserverApp/ApplicationPackageRoot/ApplicationManifest.xml index 41897bbd..1bca1d7c 100644 --- a/ClusterObserverApp/ApplicationPackageRoot/ApplicationManifest.xml +++ b/ClusterObserverApp/ApplicationPackageRoot/ApplicationManifest.xml @@ -11,8 +11,8 @@ - + diff --git a/FabricObserver.Extensibility.nuspec.template b/FabricObserver.Extensibility.nuspec.template index 484c7a70..43a7e162 100644 --- a/FabricObserver.Extensibility.nuspec.template +++ b/FabricObserver.Extensibility.nuspec.template @@ -4,12 +4,7 @@ %PACKAGE_ID% 3.2.15 -This is version is required for observer plugins that target FabricObserver 3.2.15 or ClusterObserver 2.2.8. -#### Changes -- Updated Logger implementatio to fix app param update bug related to log path and max archive lifetime settings not being honored. -- Updated ObserverBase to include guard rails to prevent high CPU consumption if an observer's MonitorDuration or MonitorSleepTimeMilliseconds settings are misconfigured (to high or too low, respectively). -- Performance improvements. -- Updated nuget package dependencies to latest versions. + Microsoft MIT @@ -18,21 +13,21 @@ This is version is required for observer plugins that target FabricObserver 3.2. icon.png foextlib.md en-US - This package contains the FabricObserver Extensibility library (.NET 6) for use in building FabricObserver and ClusterObserver observers and observer plugins. You need to ensure you use the same versions of this library and CO/FO if you are building observer plugins. + This package contains the FabricObserver Extensibility library (.NET 6) for use in building FabricObserver and ClusterObserver observer plugins in Visual Studio. Each time a new version of FabricObserver is released, a new version of this library (same as FabricObserver's version) will be released to the nuget.org gallery. Unless specified as part of a release with breaking changes that would impact building a plugin (exceedingly rare), you can use earlier versions of this library to build your plugins and they will successfully work with higher versions of FabricObserver/ClusterObserver. In other words, you do not need to rebuild your plugins each time a new version of FabricObserver or ClusterObserver is released unless you are required to, which will be made clear. - - - - - - - - + + + + + + + + https://aka.ms/sf/FabricObserver diff --git a/FabricObserver.Extensibility/ObserverBase.cs b/FabricObserver.Extensibility/ObserverBase.cs index d6e5fb6e..052fa3d9 100644 --- a/FabricObserver.Extensibility/ObserverBase.cs +++ b/FabricObserver.Extensibility/ObserverBase.cs @@ -576,11 +576,11 @@ public bool DumpWindowsServiceProcess(int processId, string procName, string met if (Directory.Exists(DumpsPath) && Directory.GetFiles(DumpsPath, $"{dumpKey}*.dmp", SearchOption.AllDirectories).Length >= MaxDumps) { ObserverLogger.LogWarning($"Reached maximum number({MaxDumps}) of {dumpKey} dmp files stored on local disk. Will not create dmp file. " + - $"If enabled, please make sure that AzureStorageObserver is configured correctly. " + - $"Will attempt to delete old (>= 1 day) local files now."); + $"If enabled, please make sure that AzureStorageUploadObserver is configured correctly. " + + $"Will attempt to delete old (>= 1 day) local {dumpKey} files now."); // Clean out old dmp files, if any. Generally, there will only be some dmp files remaining on disk if customer has not configured - // AzureStorageObserver correctly or some error occurred during some stage of the upload process. + // AzureStorageUploadObserver correctly or some error occurred during some stage of the upload process. Logger.TryCleanFolder(DumpsPath, $"{dumpKey}*.dmp", TimeSpan.FromDays(1)); return false; } @@ -656,7 +656,7 @@ public bool DumpWindowsServiceProcess(int processId, string procName, string met dumpFileName += $"_{DateTime.Now:ddMMyyyyHHmmssFFF}.dmp"; // Check disk space availability before writing dump file. - string driveName = DumpsPath.Substring(0, 2); + string driveName = DumpsPath[..2]; if (DiskUsage.GetCurrentDiskSpaceUsedPercent(driveName) > 90) { diff --git a/SampleObserverPlugin/SampleObserverPlugin.csproj b/SampleObserverPlugin/SampleObserverPlugin.csproj index 23326888..7d977fa6 100644 --- a/SampleObserverPlugin/SampleObserverPlugin.csproj +++ b/SampleObserverPlugin/SampleObserverPlugin.csproj @@ -14,18 +14,18 @@ - + - - - + From e783b8a8c5125cee8a30010e7c66f9a656c73e8d Mon Sep 17 00:00:00 2001 From: Charles Torre Date: Thu, 15 Feb 2024 15:43:07 -0800 Subject: [PATCH 04/11] 3.2.15: Telemetry config update, updated tests, code improvements. --- .../ApplicationManifest.xml | 2 +- FabricObserver.Extensibility/ObserverBase.cs | 14 +- .../Utilities/ConfigSettings.cs | 14 + .../Utilities/NativeMethods.cs | 11 +- .../Utilities/ObserverConstants.cs | 1 + .../ServiceFabric/FabricClientUtilities.cs | 9 +- FabricObserver/Observers/AppObserver.cs | 47 ++- FabricObserver/Observers/ObserverManager.cs | 3 +- .../PackageRoot/Config/Settings.xml | 25 +- .../PackageRoot/ServiceManifest.xml | 6 - .../ApplicationManifest.xml | 57 +-- FabricObserverTests/ObserverTests.cs | 340 ++++-------------- .../PackageRoot/Config/Settings.xml | 17 +- 13 files changed, 222 insertions(+), 324 deletions(-) diff --git a/ClusterObserverApp/ApplicationPackageRoot/ApplicationManifest.xml b/ClusterObserverApp/ApplicationPackageRoot/ApplicationManifest.xml index 1bca1d7c..41897bbd 100644 --- a/ClusterObserverApp/ApplicationPackageRoot/ApplicationManifest.xml +++ b/ClusterObserverApp/ApplicationPackageRoot/ApplicationManifest.xml @@ -11,8 +11,8 @@ - + diff --git a/FabricObserver.Extensibility/ObserverBase.cs b/FabricObserver.Extensibility/ObserverBase.cs index 052fa3d9..bb37f5fc 100644 --- a/FabricObserver.Extensibility/ObserverBase.cs +++ b/FabricObserver.Extensibility/ObserverBase.cs @@ -360,6 +360,8 @@ public CsvFileWriteFormat CsvWriteFormat get; set; } + public bool EmitRawMetricTelemetry => ConfigurationSettings?.EmitRawMetricTelemetry ?? false; + /// /// Base type constructor for all observers (both built-in and plugin impls). /// @@ -909,7 +911,7 @@ public void ProcessResourceDataReportHealth( // Telemetry - This is informational, per reading telemetry, healthstate is irrelevant here. If the process has children, then don't emit this raw data since it will already // be contained in the ChildProcessTelemetry data instances and AppObserver will have already emitted it. // Enable this for your observer if you want to send data to ApplicationInsights or LogAnalytics for each resource usage observation it makes per specified metric. - if (IsTelemetryEnabled && replicaOrInstance?.ChildProcesses == null) + if (IsTelemetryEnabled && replicaOrInstance?.ChildProcesses == null && EmitRawMetricTelemetry) { _ = TelemetryClient?.ReportMetricAsync(telemetryData, Token); } @@ -960,14 +962,14 @@ public void ProcessResourceDataReportHealth( }; } - if (IsTelemetryEnabled) + if (IsEtwEnabled) { - _ = TelemetryClient?.ReportMetricAsync(telemetryData, Token); + ObserverLogger.LogEtw(ObserverConstants.FabricObserverETWEventName, telemetryData); } - if (IsEtwEnabled) + if (IsTelemetryEnabled && EmitRawMetricTelemetry) { - ObserverLogger.LogEtw(ObserverConstants.FabricObserverETWEventName, telemetryData); + _ = TelemetryClient?.ReportMetricAsync(telemetryData, Token); } } @@ -1453,7 +1455,7 @@ public DateTime GetProcessStartTime(int processId) } catch (Exception e) when (e is Win32Exception or ArgumentException or InvalidOperationException) { - ObserverLogger.LogInfo($"Unable to get process start time: {e.Message}. This means process {processId} is no longer running or FO can't access it due to access rights."); + ObserverLogger.LogInfo($"Unable to get start time for process {processId}: {e.Message}."); } return DateTime.MinValue; diff --git a/FabricObserver.Extensibility/Utilities/ConfigSettings.cs b/FabricObserver.Extensibility/Utilities/ConfigSettings.cs index 8b2f6245..3ec66af7 100644 --- a/FabricObserver.Extensibility/Utilities/ConfigSettings.cs +++ b/FabricObserver.Extensibility/Utilities/ConfigSettings.cs @@ -52,6 +52,11 @@ public bool IsObserverTelemetryEnabled get; set; } + public bool EmitRawMetricTelemetry + { + get; set; + } + public TimeSpan AsyncTimeout { get; @@ -109,6 +114,15 @@ private void SetConfigSettings() IsObserverTelemetryEnabled = telemetryEnabled; } + // Emit raw metric telemetry per run? + if (bool.TryParse( + GetConfigSettingValue( + ObserverConstants.EmitRawMetricTelemetry), + out bool emitRawObserverTelemetry)) + { + EmitRawMetricTelemetry = emitRawObserverTelemetry; + } + // Observer etw enabled? if (bool.TryParse( GetConfigSettingValue( diff --git a/FabricObserver.Extensibility/Utilities/NativeMethods.cs b/FabricObserver.Extensibility/Utilities/NativeMethods.cs index b788e0b1..2ba81778 100644 --- a/FabricObserver.Extensibility/Utilities/NativeMethods.cs +++ b/FabricObserver.Extensibility/Utilities/NativeMethods.cs @@ -54,6 +54,7 @@ public static class NativeMethods private static Dictionary> descendantsDictionary; private static Dictionary currentSFServiceProcCache; + public const int ERROR_ACCESS_DENIED = 5; [Flags] public enum CreateToolhelp32SnapshotFlags : uint @@ -1809,7 +1810,6 @@ public static int GetProcessIdFromName(string procName) /// /// The id of the process. /// The start time of the process. - /// A Win32Exception exception will be thrown if this specified process id is not found or if it is non-accessible due to its access control level. public static DateTime GetProcessStartTime(int procId) { SafeProcessHandle procHandle = null; @@ -1818,14 +1818,11 @@ public static DateTime GetProcessStartTime(int procId) { procHandle = GetSafeProcessHandle(procId); - if (procHandle.IsInvalid || procHandle.IsClosed) - { - throw new Win32Exception(Marshal.GetLastWin32Error()); - } - if (!GetProcessTimes(procHandle, out FILETIME ftCreation, out _, out _, out _)) { - throw new Win32Exception(Marshal.GetLastWin32Error()); + // DEBUG. + logger.LogInfo("GetProcessStartTime failed with Win32 error code " + Marshal.GetLastWin32Error() + " for process id " + procId + "."); + return DateTime.MinValue; } try diff --git a/FabricObserver.Extensibility/Utilities/ObserverConstants.cs b/FabricObserver.Extensibility/Utilities/ObserverConstants.cs index ef7af357..4020187b 100644 --- a/FabricObserver.Extensibility/Utilities/ObserverConstants.cs +++ b/FabricObserver.Extensibility/Utilities/ObserverConstants.cs @@ -166,6 +166,7 @@ public sealed class ObserverConstants public const string AppInsightsConnectionString = "AppInsightsConnectionString"; public const string TelemetryProviderEnabled = "EnableTelemetryProvider"; public const string TelemetryProviderType = "TelemetryProvider"; + public const string EmitRawMetricTelemetry = "EmitRawMetricTelemetry"; public const string LogAnalyticsLogTypeParameter = "LogAnalyticsLogType"; public const string LogAnalyticsSharedKeyParameter = "LogAnalyticsSharedKey"; public const string LogAnalyticsWorkspaceIdParameter = "LogAnalyticsWorkspaceId"; diff --git a/FabricObserver.Extensibility/Utilities/ServiceFabric/FabricClientUtilities.cs b/FabricObserver.Extensibility/Utilities/ServiceFabric/FabricClientUtilities.cs index 3453164d..a00dbcbb 100644 --- a/FabricObserver.Extensibility/Utilities/ServiceFabric/FabricClientUtilities.cs +++ b/FabricObserver.Extensibility/Utilities/ServiceFabric/FabricClientUtilities.cs @@ -592,6 +592,12 @@ any processes (children) that the service process (parent) created/spawned. */ if (isWindows) { replicaInfo.HostProcessName = NativeMethods.GetProcessNameFromId((int)replicaInfo.HostProcessId); + + if (replicaInfo.HostProcessName == null) + { + // Process no longer running or access denied. + return; + } } else { @@ -604,7 +610,8 @@ any processes (children) that the service process (parent) created/spawned. */ } catch (Exception e) when (e is ArgumentException or InvalidOperationException or NotSupportedException) { - + // Process no longer running. + return; } } diff --git a/FabricObserver/Observers/AppObserver.cs b/FabricObserver/Observers/AppObserver.cs index ee53df2d..7b5fd060 100644 --- a/FabricObserver/Observers/AppObserver.cs +++ b/FabricObserver/Observers/AppObserver.cs @@ -901,7 +901,7 @@ public override Task ReportAsync(CancellationToken token) ObserverLogger.LogEtw(ObserverConstants.FabricObserverETWEventName, childProcessTelemetryDataList.ToList()); } - if (IsTelemetryEnabled) + if (IsTelemetryEnabled && EmitRawMetricTelemetry) { _ = TelemetryClient?.ReportMetricAsync(childProcessTelemetryDataList.ToList(), token); } @@ -3517,17 +3517,24 @@ any processes (children) that the service process (parent) created/spawned. */ if (replicaInfo != null && replicaInfo.HostProcessId > 0 && !ReplicaOrInstanceList.Any(r => r.HostProcessId == replicaInfo.HostProcessId)) { - // This will be DateTime.MinValue when the target process is inaccessible due to user privilege. - replicaInfo.HostProcessStartTime = GetProcessStartTime((int)replicaInfo.HostProcessId); - if (IsWindows) { - // This will be null if GetProcessNameFromId fails. It will fail when the target process is inaccessible due to user privilege. + // This will be null if GetProcessNameFromId fails. It will fail when the target process is inaccessible due to user privilege, for example. replicaInfo.HostProcessName = NativeMethods.GetProcessNameFromId((int)replicaInfo.HostProcessId); + int errorCode = Marshal.GetLastWin32Error(); - if (replicaInfo.HostProcessName == null || replicaInfo.HostProcessStartTime == DateTime.MinValue) + if (replicaInfo.HostProcessName == null) { - SendServiceProcessElevatedWarning(replicaInfo.ApplicationName.OriginalString, replicaInfo.ServiceName.OriginalString); + // DEBUG (When AppObserverEnableVerboseLogging is true). + ObserverLogger.LogInfo($"Unable to get information for process {replicaInfo.HostProcessId} ({replicaInfo.ServiceName.OriginalString}). Win32 Error Code: {errorCode}."); + + // Make sure the issue is a user privilege problem before emitting the related warning. + if (errorCode == NativeMethods.ERROR_ACCESS_DENIED) + { + SendServiceProcessElevatedWarning(replicaInfo.ApplicationName?.OriginalString, replicaInfo.ServiceName?.OriginalString); + } + + // Do not add replica to repOrInst list. return; } } @@ -3542,11 +3549,32 @@ any processes (children) that the service process (parent) created/spawned. */ } catch (Exception e) when (e is ArgumentException or InvalidOperationException or NotSupportedException) { - // Do not add to repOrInst list.. + // Do not add replica to repOrInst list. return; } } + // This will be DateTime.MinValue when the target process is inaccessible due to user privilege, for example. + // We shouldn't get here given the code above if some process is running at a higher priv than FO.. but if the process *is* inaccessible for some reason at this point, + // then we'll not add the replica to the list. + replicaInfo.HostProcessStartTime = GetProcessStartTime((int)replicaInfo.HostProcessId); + + if (IsWindows) + { + int errorCode = Marshal.GetLastWin32Error(); + + if (errorCode == NativeMethods.ERROR_ACCESS_DENIED) + { + SendServiceProcessElevatedWarning(replicaInfo.ApplicationName?.OriginalString, replicaInfo.ServiceName?.OriginalString); + } + } + + if (replicaInfo.HostProcessStartTime == DateTime.MinValue) + { + // Do not add replica to repOrInst list. + return; + } + ProcessServiceConfiguration(appTypeName, deployedReplica.CodePackageName, replicaInfo); // null HostProcessName means the service process can't be monitored. If Fabric is the hosting process, then this is a Guest Executable or helper code package. @@ -3572,6 +3600,9 @@ private void SendServiceProcessElevatedWarning(string appName, string serviceNam return; } + // DEBUG. + ObserverLogger.LogInfo($"{serviceName} is running as Admin or System user on Windows and can't be monitored by FabricObserver, which is running as Network Service."); + if (ObserverManager.ObserverFailureHealthStateLevel != HealthState.Unknown) { string message = $"{serviceName} is running as Admin or System user on Windows and can't be monitored by FabricObserver, which is running as Network Service. " + diff --git a/FabricObserver/Observers/ObserverManager.cs b/FabricObserver/Observers/ObserverManager.cs index 88ef7d99..73805163 100644 --- a/FabricObserver/Observers/ObserverManager.cs +++ b/FabricObserver/Observers/ObserverManager.cs @@ -42,6 +42,7 @@ private List Observers private const string LVIDCounterName = "Long-Value Maximum LID"; private readonly string nodeName; private readonly TimeSpan OperationalTelemetryRunInterval = TimeSpan.FromDays(1); + private readonly TimeSpan NewReleaseCheckInterval = TimeSpan.FromDays(7); private readonly CancellationToken runAsyncToken; private readonly string sfVersion; private readonly bool isWindows; @@ -271,7 +272,7 @@ public async Task StartObserversAsync() } // Check for new version once a day. - if (!(shutdownSignaled || runAsyncToken.IsCancellationRequested) && DateTime.UtcNow.Subtract(LastVersionCheckDateTime) >= OperationalTelemetryRunInterval) + if (!(shutdownSignaled || runAsyncToken.IsCancellationRequested) && DateTime.UtcNow.Subtract(LastVersionCheckDateTime) >= NewReleaseCheckInterval) { await CheckGithubForNewVersionAsync(); LastVersionCheckDateTime = DateTime.UtcNow; diff --git a/FabricObserver/PackageRoot/Config/Settings.xml b/FabricObserver/PackageRoot/Config/Settings.xml index d4ba5ff0..e7e30e94 100644 --- a/FabricObserver/PackageRoot/Config/Settings.xml +++ b/FabricObserver/PackageRoot/Config/Settings.xml @@ -55,8 +55,8 @@ - - + + @@ -74,9 +74,9 @@ - + @@ -140,6 +140,10 @@ + + + @@ -239,6 +243,9 @@
+ + @@ -257,6 +264,9 @@ Enabling this will put fabric:/System into Warning when either Fabric or FabricRM have consumed 75% of Maximum number of LVIDs (which is int.MaxValue per process). --> + + @@ -309,6 +319,9 @@
+ + @@ -335,7 +348,6 @@ - @@ -388,6 +400,9 @@ + + diff --git a/FabricObserver/PackageRoot/ServiceManifest.xml b/FabricObserver/PackageRoot/ServiceManifest.xml index 06043154..ced20ed0 100644 --- a/FabricObserver/PackageRoot/ServiceManifest.xml +++ b/FabricObserver/PackageRoot/ServiceManifest.xml @@ -10,12 +10,6 @@ - - - install_lvid_perfcounter.bat - CodePackage - - FabricObserver diff --git a/FabricObserverApp/ApplicationPackageRoot/ApplicationManifest.xml b/FabricObserverApp/ApplicationPackageRoot/ApplicationManifest.xml index 51d33994..1040c0af 100644 --- a/FabricObserverApp/ApplicationPackageRoot/ApplicationManifest.xml +++ b/FabricObserverApp/ApplicationPackageRoot/ApplicationManifest.xml @@ -2,9 +2,10 @@ - + + - + - + - - + + + + @@ -57,6 +61,13 @@ + + + + + + @@ -75,7 +86,7 @@ - + @@ -142,8 +153,7 @@ Default value is 25. Please read the related documentation for more information on MaxDegreeOfParallelism. This setting is provided here so you can dial thread usage up and down based on your needs, if you need to. See https://learn.microsoft.com/dotnet/api/system.threading.tasks.paralleloptions.maxdegreeofparallelism?view=net-6.0 --> - + - + @@ -241,9 +250,9 @@ - +
@@ -294,6 +307,7 @@
+ @@ -311,6 +325,7 @@ + @@ -347,6 +362,7 @@
+ @@ -413,6 +429,7 @@ + @@ -436,20 +453,20 @@ - - -
- - - + + + diff --git a/FabricObserverTests/ObserverTests.cs b/FabricObserverTests/ObserverTests.cs index 071ed74a..0dfc7f6d 100644 --- a/FabricObserverTests/ObserverTests.cs +++ b/FabricObserverTests/ObserverTests.cs @@ -101,6 +101,8 @@ public static async Task TestClassStartUp(TestContext testContext) Guid.NewGuid(), long.MaxValue); + ObserverManager.FabricServiceContext = TestServiceContext; + // Ensure clean test app state. await RemoveTestApplicationsAsync(); @@ -238,7 +240,7 @@ private static async Task DeployHealthMetricsAppAsync() await FabricClientSingleton.ApplicationManager.ProvisionApplicationAsync(packagePathInImageStore); // Create HealthMetrics app instance. - ApplicationDescription appDesc = new(new Uri(appName), appType, appVersion/*, nameValueCollection */); + ApplicationDescription appDesc = new(new Uri(appName), appType, appVersion); await FabricClientSingleton.ApplicationManager.CreateApplicationAsync(appDesc); // Create the HealthMetrics service descriptions. @@ -267,15 +269,15 @@ private static async Task DeployHealthMetricsAppAsync() await FabricClientSingleton.ServiceManager.CreateServiceAsync(serviceDescription1); await FabricClientSingleton.ServiceManager.CreateServiceAsync(serviceDescription2); - // This is a hack. Withouth this timeout, the deployed test services may not have populated the FC cache? + // This is a hack. Without this timeout, the deployed test services may not have populated the FC cache? // You may need to increase this value depending upon your dev machine? You'll find out.. - await Task.Delay(TimeSpan.FromSeconds(10)); + await Task.Delay(TimeSpan.FromSeconds(15)); } catch (FabricException fe) { if (fe.ErrorCode == FabricErrorCode.ApplicationAlreadyExists) { - await FabricClientSingleton.ApplicationManager.DeleteApplicationAsync(new DeleteApplicationDescription(new Uri(appName)) { ForceDelete = true}); + await FabricClientSingleton.ApplicationManager.DeleteApplicationAsync(new DeleteApplicationDescription(new Uri(appName)) { ForceDelete = true }); await DeployHealthMetricsAppAsync(); } else if (fe.ErrorCode == FabricErrorCode.ApplicationTypeAlreadyExists) @@ -723,9 +725,9 @@ private static async Task EnsureTestServicesExistAsync(string appName, int [TestMethod] public void AppObserver_Constructor_Test() { - ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryProviderEnabled = false; - ObserverManager.EtwEnabled = false; + + + using var obs = new AppObserver(TestServiceContext); @@ -737,9 +739,9 @@ public void AppObserver_Constructor_Test() [TestMethod] public void AzureStorageUploadObserver_Constructor_Test() { - ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryProviderEnabled = false; - ObserverManager.EtwEnabled = false; + + + using var obs = new AzureStorageUploadObserver(TestServiceContext); @@ -751,9 +753,9 @@ public void AzureStorageUploadObserver_Constructor_Test() [TestMethod] public void CertificateObserver_Constructor_test() { - ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryProviderEnabled = false; - ObserverManager.EtwEnabled = false; + + + using var obs = new CertificateObserver(TestServiceContext); @@ -765,9 +767,9 @@ public void CertificateObserver_Constructor_test() [TestMethod] public void ContainerObserver_Constructor_test() { - ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryProviderEnabled = false; - ObserverManager.EtwEnabled = false; + + + using var obs = new ContainerObserver(TestServiceContext); @@ -779,9 +781,9 @@ public void ContainerObserver_Constructor_test() [TestMethod] public void DiskObserver_Constructor_Test() { - ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryProviderEnabled = false; - ObserverManager.EtwEnabled = false; + + + ObserverManager.ObserverWebAppDeployed = true; using var obs = new DiskObserver(TestServiceContext); @@ -794,9 +796,9 @@ public void DiskObserver_Constructor_Test() [TestMethod] public void FabricSystemObserver_Constructor_Test() { - ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryProviderEnabled = false; - ObserverManager.EtwEnabled = false; + + + using var obs = new FabricSystemObserver(TestServiceContext); @@ -808,9 +810,9 @@ public void FabricSystemObserver_Constructor_Test() [TestMethod] public void NetworkObserver_Constructor_Test() { - ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryProviderEnabled = false; - ObserverManager.EtwEnabled = false; + + + ObserverManager.ObserverWebAppDeployed = true; using var obs = new NetworkObserver(TestServiceContext); @@ -823,9 +825,9 @@ public void NetworkObserver_Constructor_Test() [TestMethod] public void NodeObserver_Constructor_Test() { - ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryProviderEnabled = false; - ObserverManager.EtwEnabled = false; + + + using var obs = new NodeObserver(TestServiceContext); @@ -837,9 +839,9 @@ public void NodeObserver_Constructor_Test() [TestMethod] public void OSObserver_Constructor_Test() { - ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryProviderEnabled = false; - ObserverManager.EtwEnabled = false; + + + using var obs = new OSObserver(TestServiceContext); @@ -853,10 +855,10 @@ public void SFConfigurationObserver_Constructor_Test() { using var client = new FabricClient(); - ObserverManager.FabricServiceContext = TestServiceContext; + + + - ObserverManager.TelemetryProviderEnabled = false; - ObserverManager.EtwEnabled = false; ObserverManager.ObserverWebAppDeployed = true; using var obs = new SFConfigurationObserver(TestServiceContext); @@ -874,9 +876,9 @@ public void SFConfigurationObserver_Constructor_Test() [TestMethod] public async Task AppObserver_InitializeAsync_MalformedTargetAppValue_GeneratesWarning() { - ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryProviderEnabled = false; - ObserverManager.EtwEnabled = false; + + + using var obs = new AppObserver(TestServiceContext) { @@ -892,9 +894,9 @@ public async Task AppObserver_InitializeAsync_MalformedTargetAppValue_GeneratesW [TestMethod] public async Task AppObserver_InitializeAsync_InvalidJson_GeneratesWarning() { - ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryProviderEnabled = false; - ObserverManager.EtwEnabled = false; + + + using var obs = new AppObserver(TestServiceContext) { @@ -910,9 +912,9 @@ public async Task AppObserver_InitializeAsync_InvalidJson_GeneratesWarning() [TestMethod] public async Task AppObserver_InitializeAsync_NoConfigFound_GeneratesWarning() { - ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryProviderEnabled = false; - ObserverManager.EtwEnabled = false; + + + using var obs = new AppObserver(TestServiceContext) { @@ -930,9 +932,9 @@ public async Task AppObserver_InitializeAsync_NoConfigFound_GeneratesWarning() [TestMethod] public async Task AppObserver_InitializeAsync_TargetAppType_ServiceExcludeList_EnsureExcluded() { - ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryProviderEnabled = false; - ObserverManager.EtwEnabled = false; + + + using var obs = new AppObserver(TestServiceContext) { @@ -948,9 +950,9 @@ public async Task AppObserver_InitializeAsync_TargetAppType_ServiceExcludeList_E [TestMethod] public async Task AppObserver_InitializeAsync_TargetApp_ServiceExcludeList_EnsureExcluded() { - ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryProviderEnabled = false; - ObserverManager.EtwEnabled = false; + + + using var obs = new AppObserver(TestServiceContext) { @@ -966,10 +968,6 @@ public async Task AppObserver_InitializeAsync_TargetApp_ServiceExcludeList_Ensur [TestMethod] public async Task AppObserver_InitializeAsync_TargetAppType_ServiceIncludeList_EnsureIncluded() { - ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryProviderEnabled = false; - ObserverManager.EtwEnabled = false; - using var obs = new AppObserver(TestServiceContext) { JsonConfigPath = Path.Combine(Environment.CurrentDirectory, "PackageRoot", "Config", "AppObserver.config.apptype.include.json") @@ -984,10 +982,6 @@ public async Task AppObserver_InitializeAsync_TargetAppType_ServiceIncludeList_E [TestMethod] public async Task AppObserver_InitializeAsync_TargetApp_ServiceIncludeList_EnsureIncluded() { - ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryProviderEnabled = false; - ObserverManager.EtwEnabled = false; - using var obs = new AppObserver(TestServiceContext) { JsonConfigPath = Path.Combine(Environment.CurrentDirectory, "PackageRoot", "Config", "AppObserver.config.app.include.json") @@ -1004,10 +998,6 @@ public async Task AppObserver_InitializeAsync_TargetApp_ServiceIncludeList_Ensur [TestMethod] public async Task AppObserver_InitializeAsync_TargetAppType_MultiServiceExcludeList_EnsureNotExcluded() { - ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryProviderEnabled = false; - ObserverManager.EtwEnabled = false; - using var obs = new AppObserver(TestServiceContext) { JsonConfigPath = Path.Combine(Environment.CurrentDirectory, "PackageRoot", "Config", "AppObserver.config.apptype.multi-exclude.json") @@ -1027,10 +1017,6 @@ public async Task AppObserver_InitializeAsync_TargetAppType_MultiServiceExcludeL [TestMethod] public async Task AppObserver_InitializeAsync_TargetApp_MultiServiceExcludeList_EnsureNotExcluded() { - ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryProviderEnabled = false; - ObserverManager.EtwEnabled = false; - using var obs = new AppObserver(TestServiceContext) { JsonConfigPath = Path.Combine(Environment.CurrentDirectory, "PackageRoot", "Config", "AppObserver.config.app.multi-exclude.json") @@ -1050,10 +1036,6 @@ public async Task AppObserver_InitializeAsync_TargetApp_MultiServiceExcludeList_ [TestMethod] public async Task AppObserver_InitializeAsync_TargetAppType_MultiServiceIncludeList_EnsureIncluded() { - ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryProviderEnabled = false; - ObserverManager.EtwEnabled = false; - using var obs = new AppObserver(TestServiceContext) { JsonConfigPath = Path.Combine(Environment.CurrentDirectory, "PackageRoot", "Config", "AppObserver.config.apptype.multi-include.json") @@ -1068,10 +1050,6 @@ public async Task AppObserver_InitializeAsync_TargetAppType_MultiServiceIncludeL [TestMethod] public async Task AppObserver_InitializeAsync_TargetApp_MultiServiceIncludeList_EnsureIncluded() { - ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryProviderEnabled = false; - ObserverManager.EtwEnabled = false; - using var obs = new AppObserver(TestServiceContext) { JsonConfigPath = Path.Combine(Environment.CurrentDirectory, "PackageRoot", "Config", "AppObserver.config.app.multi-include.json") @@ -1096,10 +1074,6 @@ public async Task AppObserver_ObserveAsync_Successful_IsHealthy() { var startDateTime = DateTime.Now; - ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryProviderEnabled = false; - ObserverManager.EtwEnabled = true; - using var obs = new AppObserver(TestServiceContext) { JsonConfigPath = Path.Combine(Environment.CurrentDirectory, "PackageRoot", "Config", "AppObserver.config.json") @@ -1122,10 +1096,6 @@ public async Task AppObserver_ObserveAsync_Successful_WarningsGenerated() { var startDateTime = DateTime.Now; - ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryProviderEnabled = false; - ObserverManager.EtwEnabled = true; - using var obs = new AppObserver(TestServiceContext) { JsonConfigPath = Path.Combine(Environment.CurrentDirectory, "PackageRoot", "Config", "AppObserver_warnings.config.json") @@ -1148,10 +1118,6 @@ public async Task AppObserver_ObserveAsync_PrivateBytes_Successful_WarningsGener { var startDateTime = DateTime.Now; - ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryProviderEnabled = false; - ObserverManager.EtwEnabled = true; - using var obs = new AppObserver(TestServiceContext) { JsonConfigPath = Path.Combine(Environment.CurrentDirectory, "PackageRoot", "Config", "AppObserver_PrivateBytes_warning.config.json") @@ -1176,10 +1142,6 @@ public async Task AppObserver_ObserveAsync_Successful_RGMemoryLimitWarningGenera { var startDateTime = DateTime.Now; - ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryProviderEnabled = false; - ObserverManager.EtwEnabled = false; - using var obs = new AppObserver(TestServiceContext) { JsonConfigPath = Path.Combine(Environment.CurrentDirectory, "PackageRoot", "Config", "AppObserver_rg_memory_warning.config.json"), @@ -1226,7 +1188,7 @@ await FabricClientSingleton.ApplicationManager.UpgradeApplicationAsync( await obs.ObserveAsync(Token); // Revert previous upgrade. - appUpgradeDescription.ApplicationParameters[ObserverConstants.AppManifestWarningRGMemoryLimitPercent] = "0"; + appUpgradeDescription.ApplicationParameters[ObserverConstants.AppManifestWarningRGMemoryLimitPercent] = "0"; try { @@ -1271,10 +1233,6 @@ public async Task AppObserver_ObserveAsync_Successful_RGCpuLimitWarningGenerated { var startDateTime = DateTime.Now; - ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryProviderEnabled = false; - ObserverManager.EtwEnabled = true; - using var obs = new AppObserver(TestServiceContext) { JsonConfigPath = Path.Combine(Environment.CurrentDirectory, "PackageRoot", "Config", "AppObserver_rg_cpu_warning.config.json"), @@ -1298,11 +1256,6 @@ public async Task AppObserver_ObserveAsync_Successful_RGCpuLimitWarningGenerated public async Task AppObserver_ObserveAsync_Successful_RGLimit_Validate_Multiple_Memory_Cpu_Specification() { var startDateTime = DateTime.Now; - - ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryProviderEnabled = false; - ObserverManager.EtwEnabled = false; - using var obs = new AppObserver(TestServiceContext); await obs.ObserveAsync(Token); @@ -1429,11 +1382,6 @@ await FabricClientSingleton.ApplicationManager.UpgradeApplicationAsync( public async Task AppObserver_ObserveAsync_OldConfigStyle_Successful_WarningsGenerated() { var startDateTime = DateTime.Now; - - ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryProviderEnabled = false; - ObserverManager.EtwEnabled = false; - using var obs = new AppObserver(TestServiceContext) { JsonConfigPath = Path.Combine(Environment.CurrentDirectory, "PackageRoot", "Config", "AppObserver.config.oldstyle_warnings.json") @@ -1455,11 +1403,6 @@ public async Task AppObserver_ObserveAsync_OldConfigStyle_Successful_WarningsGen public async Task AppObserver_ObserveAsync_OldConfigStyle_Successful_NoWarningsGenerated() { var startDateTime = DateTime.Now; - - ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryProviderEnabled = false; - ObserverManager.EtwEnabled = false; - using var obs = new AppObserver(TestServiceContext) { JsonConfigPath = Path.Combine(Environment.CurrentDirectory, "PackageRoot", "Config", "AppObserver.config.oldstyle_nowarnings.json") @@ -1497,7 +1440,7 @@ public async Task Ensure_ConcurrentQueue_Collection_Has_Data_CPU_Win32Impl() TaskScheduler = TaskScheduler.Default }; - _ = Parallel.For (0, services.Count, parallelOptions, (i, state) => + _ = Parallel.For(0, services.Count, parallelOptions, (i, state) => { var service = services[i]; string procName = NativeMethods.GetProcessNameFromId((int)service.HostProcessId); @@ -1532,7 +1475,7 @@ public async Task Ensure_ConcurrentQueue_Collection_Has_Data_CPU_Win32Impl() TimeSpan duration = TimeSpan.FromSeconds(3); - _ = Parallel.For (0, serviceProcs.Count, parallelOptions, (i, state) => + _ = Parallel.For(0, serviceProcs.Count, parallelOptions, (i, state) => { Stopwatch sw = Stopwatch.StartNew(); int procId = serviceProcs.ElementAt(i); @@ -1575,7 +1518,7 @@ public async Task Ensure_ConcurrentQueue_Collection_Has_Data_CPU_NET6ProcessImpl TaskScheduler = TaskScheduler.Default }; - _ = Parallel.For (0, services.Count, parallelOptions, (i, state) => + _ = Parallel.For(0, services.Count, parallelOptions, (i, state) => { var service = services[i]; string procName = NativeMethods.GetProcessNameFromId((int)service.HostProcessId); @@ -1799,11 +1742,6 @@ public async Task Ensure_CircularBuffer_Collection_Has_Data_CPU_NET6ProcessImpl( public async Task AppObserver_DumpProcessOnWarning_SuccessfulDumpCreation() { var startDateTime = DateTime.Now; - - ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryProviderEnabled = false; - ObserverManager.EtwEnabled = false; - using var obs = new AppObserver(TestServiceContext) { JsonConfigPath = Path.Combine(Environment.CurrentDirectory, "PackageRoot", "Config", "AppObserver_warnings_dmps.config.json"), @@ -1840,11 +1778,6 @@ public async Task AppObserver_DumpProcessOnWarning_SuccessfulDumpCreation() public async Task AppObserver_DumpProcessOnError_SuccessfulDumpCreation() { var startDateTime = DateTime.Now; - - ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryProviderEnabled = false; - ObserverManager.EtwEnabled = false; - using var obs = new AppObserver(TestServiceContext) { JsonConfigPath = Path.Combine(Environment.CurrentDirectory, "PackageRoot", "Config", "AppObserver_errors_dmps.config.json"), @@ -1856,7 +1789,7 @@ public async Task AppObserver_DumpProcessOnError_SuccessfulDumpCreation() Assert.IsTrue(Directory.Exists(obs.DumpsPath)); var dmps = Directory.GetFiles(obs.DumpsPath, "*.dmp"); - + Assert.IsTrue(dmps != null && dmps.Any()); // VotingData service, and two helper codepackage binaries. @@ -1882,11 +1815,6 @@ public async Task AppObserver_DumpProcessOnError_SuccessfulDumpCreation() public async Task ContainerObserver_ObserveAsync_Successful_IsHealthy() { var startDateTime = DateTime.Now; - - ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryProviderEnabled = false; - ObserverManager.EtwEnabled = false; - using var obs = new ContainerObserver(TestServiceContext) { ConfigurationFilePath = Path.Combine(Environment.CurrentDirectory, "PackageRoot", "Config", "ContainerObserver.config.json") @@ -2078,11 +2006,6 @@ public async Task CertificateObserver_validCerts() try { var startDateTime = DateTime.Now; - - ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryProviderEnabled = false; - ObserverManager.EtwEnabled = false; - using var obs = new CertificateObserver(TestServiceContext); var commonNamesToObserve = new List @@ -2127,11 +2050,6 @@ public async Task CertificateObserver_validCerts() public async Task CertificateObserver_expiredAndexpiringCerts() { var startDateTime = DateTime.Now; - - ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryProviderEnabled = false; - ObserverManager.EtwEnabled = false; - using var obs = new CertificateObserver(TestServiceContext); IServiceCollection services = new ServiceCollection(); services.AddScoped(typeof(ObserverBase), s => obs); @@ -2183,11 +2101,6 @@ public async Task CertificateObserver_expiredAndexpiringCerts() public async Task NodeObserver_Integer_Greater_Than_100_CPU_Warn_Threshold_No_Fail() { var startDateTime = DateTime.Now; - - ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryProviderEnabled = false; - ObserverManager.EtwEnabled = false; - using var obs = new NodeObserver(TestServiceContext) { DataCapacity = 2, @@ -2211,11 +2124,6 @@ public async Task NodeObserver_Integer_Greater_Than_100_CPU_Warn_Threshold_No_Fa public async Task NodeObserver_Negative_Integer_CPU_Mem_Ports_Firewalls_Values_No_Exceptions_In_Intialize() { var startDateTime = DateTime.Now; - - ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryProviderEnabled = false; - ObserverManager.EtwEnabled = false; - using var obs = new NodeObserver(TestServiceContext) { DataCapacity = 2, @@ -2240,11 +2148,6 @@ public async Task NodeObserver_Negative_Integer_CPU_Mem_Ports_Firewalls_Values_N public async Task NodeObserver_Negative_Integer_Thresholds_CPU_Mem_Ports_Firewalls_All_Data_Containers_Are_Null() { var startDateTime = DateTime.Now; - - ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryProviderEnabled = false; - ObserverManager.EtwEnabled = false; - using var obs = new NodeObserver(TestServiceContext) { DataCapacity = 2, @@ -2276,11 +2179,6 @@ public async Task NodeObserver_Negative_Integer_Thresholds_CPU_Mem_Ports_Firewal public async Task OSObserver_ObserveAsync_Successful_IsHealthy_NoWarningsOrErrors() { var startDateTime = DateTime.Now; - - ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryProviderEnabled = false; - ObserverManager.EtwEnabled = true; - using var obs = new OSObserver(TestServiceContext) { ClusterManifestPath = Path.Combine(Environment.CurrentDirectory, "clusterManifest.xml"), @@ -2316,11 +2214,6 @@ public async Task OSObserver_ObserveAsync_Successful_IsHealthy_NoWarningsOrError public async Task OSObserver_IsWindowsDevCluster_True() { var startDateTime = DateTime.Now; - - ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryProviderEnabled = false; - ObserverManager.EtwEnabled = false; - using var obs = new OSObserver(TestServiceContext) { ClusterManifestPath = Path.Combine(Environment.CurrentDirectory, "clusterManifest.xml"), @@ -2348,11 +2241,6 @@ public async Task OSObserver_IsWindowsDevCluster_True() public async Task DiskObserver_ObserveAsync_Successful_IsHealthy_NoWarningsOrErrors() { var startDateTime = DateTime.Now; - - ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryProviderEnabled = false; - ObserverManager.EtwEnabled = true; - var warningDictionary = new Dictionary { { @"C:\SFDevCluster\Log\Traces", 50000 } @@ -2394,11 +2282,6 @@ public async Task DiskObserver_ObserveAsync_Successful_IsHealthy_NoWarningsOrErr public async Task DiskObserver_ObserveAsync_Successful_IsHealthy_WarningsOrErrors() { var startDateTime = DateTime.Now; - - ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryProviderEnabled = false; - ObserverManager.EtwEnabled = true; - var warningDictionary = new Dictionary { /* Windows paths.. */ @@ -2465,9 +2348,6 @@ public async Task DiskObserver_ObserveAsync_Successful_IsHealthy_WarningsOrError public async Task NetworkObserver_ObserveAsync_Successful_Warnings() { var startDateTime = DateTime.Now; - - ObserverManager.FabricServiceContext = TestServiceContext; - using var obs = new NetworkObserver(TestServiceContext); await obs.ObserveAsync(Token); @@ -2483,9 +2363,6 @@ public async Task NetworkObserver_ObserveAsync_Successful_Warnings() public async Task NetworkObserver_ObserveAsync_Successful_WritesLocalFile_ObsWebDeployed() { var startDateTime = DateTime.Now; - - ObserverManager.FabricServiceContext = TestServiceContext; - using var obs = new NetworkObserver(TestServiceContext) { // This is required since output files are only created if fo api app is also deployed to cluster.. @@ -2513,11 +2390,6 @@ public async Task NetworkObserver_ObserveAsync_Successful_WritesLocalFile_ObsWeb public async Task NodeObserver_ObserveAsync_Successful_IsHealthy_NoWarningsOrErrorsDetected() { var startDateTime = DateTime.Now; - - ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryProviderEnabled = false; - ObserverManager.EtwEnabled = true; - using var obs = new NodeObserver(TestServiceContext) { IsEnabled = true, @@ -2545,11 +2417,6 @@ public async Task NodeObserver_ObserveAsync_Successful_IsHealthy_NoWarningsOrErr public async Task NodeObserver_ObserveAsync_Successful_IsHealthy_WarningsOrErrorsDetected() { var startDateTime = DateTime.Now; - - ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryProviderEnabled = false; - ObserverManager.EtwEnabled = true; - using var obs = new NodeObserver(TestServiceContext) { CpuMonitorDuration = TimeSpan.FromSeconds(1), @@ -2576,11 +2443,6 @@ public async Task NodeObserver_ObserveAsync_Successful_IsHealthy_WarningsOrError public async Task SFConfigurationObserver_ObserveAsync_Successful_IsHealthy() { var startDateTime = DateTime.Now; - - ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryProviderEnabled = false; - ObserverManager.EtwEnabled = false; - using var obs = new SFConfigurationObserver(TestServiceContext) { IsEnabled = true, @@ -2616,11 +2478,6 @@ public async Task SFConfigurationObserver_ObserveAsync_Successful_IsHealthy() public async Task FabricSystemObserver_ObserveAsync_Successful_IsHealthy_NoWarningsOrErrors() { var startDateTime = DateTime.Now; - - ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryProviderEnabled = false; - ObserverManager.EtwEnabled = true; - using var obs = new FabricSystemObserver(TestServiceContext) { MemWarnUsageThresholdMb = 10000, @@ -2646,11 +2503,6 @@ public async Task FabricSystemObserver_ObserveAsync_Successful_IsHealthy_NoWarni public async Task FabricSystemObserver_ObserveAsync_Successful_IsHealthy_MemoryWarningsOrErrorsDetected() { var startDateTime = DateTime.Now; - - ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryProviderEnabled = false; - ObserverManager.EtwEnabled = true; - using var obs = new FabricSystemObserver(TestServiceContext) { MemWarnUsageThresholdMb = 1 @@ -2672,11 +2524,6 @@ public async Task FabricSystemObserver_ObserveAsync_Successful_IsHealthy_MemoryW public async Task FabricSystemObserver_ObserveAsync_Successful_IsHealthy_ActiveTcpPortsWarningsOrErrorsDetected() { var startDateTime = DateTime.Now; - - ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryProviderEnabled = false; - ObserverManager.EtwEnabled = false; - using var obs = new FabricSystemObserver(TestServiceContext) { CpuMonitorDuration = TimeSpan.FromSeconds(1), @@ -2700,11 +2547,6 @@ public async Task FabricSystemObserver_ObserveAsync_Successful_IsHealthy_ActiveT public async Task FabricSystemObserver_ObserveAsync_Successful_IsHealthy_EphemeralPortsWarningsOrErrorsDetected() { var startDateTime = DateTime.Now; - - ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryProviderEnabled = false; - ObserverManager.EtwEnabled = false; - using var obs = new FabricSystemObserver(TestServiceContext) { CpuMonitorDuration = TimeSpan.FromSeconds(1), @@ -2728,11 +2570,6 @@ public async Task FabricSystemObserver_ObserveAsync_Successful_IsHealthy_Ephemer public async Task FabricSystemObserver_ObserveAsync_Successful_IsHealthy_HandlesWarningsOrErrorsDetected() { var startDateTime = DateTime.Now; - - ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryProviderEnabled = false; - ObserverManager.EtwEnabled = false; - using var obs = new FabricSystemObserver(TestServiceContext) { CpuMonitorDuration = TimeSpan.FromSeconds(1), @@ -2756,12 +2593,6 @@ public async Task FabricSystemObserver_ObserveAsync_Successful_IsHealthy_Handles public async Task FabricSystemObserver_Negative_Integer_CPU_Warn_Threshold_No_Unhandled_Exception() { var startDateTime = DateTime.Now; - - ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryProviderEnabled = false; - ObserverManager.EtwEnabled = false; - - using var obs = new FabricSystemObserver(TestServiceContext) { CpuMonitorDuration = TimeSpan.FromSeconds(1), @@ -2784,11 +2615,6 @@ public async Task FabricSystemObserver_Negative_Integer_CPU_Warn_Threshold_No_Un public async Task FabricSystemObserver_Integer_Greater_Than_100_CPU_Warn_Threshold_No_Unhandled_Exception() { var startDateTime = DateTime.Now; - - ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryProviderEnabled = false; - ObserverManager.EtwEnabled = false; - using var obs = new FabricSystemObserver(TestServiceContext) { CpuMonitorDuration = TimeSpan.FromSeconds(1), @@ -2859,7 +2685,7 @@ public void Active_TCP_Ports_Machine_Greater_Than_Active_Ephemeral_Ports_Machine { int total_tcp_ports = OSInfoProvider.Instance.GetActiveTcpPortCount(); int ephemeral_tcp_ports = OSInfoProvider.Instance.GetActiveEphemeralPortCount(); - + Assert.IsTrue(total_tcp_ports > 0 && ephemeral_tcp_ports > 0); Assert.IsTrue(total_tcp_ports > ephemeral_tcp_ports); } @@ -2874,7 +2700,7 @@ public async Task AppObserver_ETW_EventData_IsChildProcessTelemetryData() using var foEtwListener = new FabricObserverEtwListener(_logger); await AppObserver_ObserveAsync_Successful_IsHealthy(); List> childProcessTelemetryData = foEtwListener.foEtwConverter.ChildProcessTelemetry; - + Assert.IsNotNull(childProcessTelemetryData); Assert.IsTrue(childProcessTelemetryData.Count > 0); @@ -2898,9 +2724,9 @@ public async Task AppObserver_ETW_EventData_IsChildProcessTelemetryData() foreach (var c in x.ChildProcessInfo) { Assert.IsFalse(string.IsNullOrWhiteSpace(c.ProcessName)); - + Assert.IsTrue( - !string.IsNullOrWhiteSpace(c.ProcessStartTime) + !string.IsNullOrWhiteSpace(c.ProcessStartTime) && DateTime.TryParse(c.ProcessStartTime, out DateTime startTime) && startTime > DateTime.MinValue); Assert.IsTrue(c.Value > -1); Assert.IsTrue(c.ProcessId > 0); @@ -2919,7 +2745,7 @@ public async Task AppObserver_ETW_EventData_IsTelemetryData() await AppObserver_ObserveAsync_Successful_IsHealthy(); List telemData = foEtwListener.foEtwConverter.ServiceTelemetryData; - + Assert.IsNotNull(telemData); Assert.IsTrue(telemData.Count > 0); @@ -2960,7 +2786,7 @@ public async Task AppObserver_ETW_EventData_IsTelemetryData_HealthWarnings() await AppObserver_ObserveAsync_Successful_WarningsGenerated(); List telemData = foEtwListener.foEtwConverter.ServiceTelemetryData; - + Assert.IsNotNull(telemData); Assert.IsTrue(telemData.Count > 0); @@ -3037,15 +2863,15 @@ public async Task AppObserver_ETW_EventData_RG_ValuesAreNonZero() Assert.IsTrue(data.Code == null); Assert.IsTrue(data.Description == null); Assert.IsTrue(data.Source == ObserverConstants.FabricObserverName); - + // RG Memory if (data.ProcessName is "VotingData" or "VotingWeb" or "ConsoleApp6" or "ConsoleApp7") { - Assert.IsTrue(data.RGMemoryEnabled && data.RGAppliedMemoryLimitMb > 0); + Assert.IsTrue(data.RGMemoryEnabled && data.RGAppliedMemoryLimitMb > 0); } if (data.ProcessName is "VotingData" or "VotingWeb" or "ConsoleApp6" or "ConsoleApp7") - { + { Assert.IsTrue(data.RGCpuEnabled && data.RGAppliedCpuLimitCores > 0); } @@ -3092,7 +2918,7 @@ public async Task AppObserver_ETW_PrivateBytes_Warning_ChildProcesses() childProcessTelemetryData = childProcessTelemetryData.Where( c => c.Find(cti => cti.ApplicationName == "fabric:/TestApp42").Metric == ErrorWarningProperty.PrivateBytesMb).ToList(); - + // Ensure parent service is put into warning. telemData = telemData.Where( t => t.ApplicationName == "fabric:/TestApp42" && t.HealthState == HealthState.Warning).ToList(); @@ -3153,7 +2979,7 @@ public async Task DiskObserver_ETW_EventData_IsTelemetryData() await DiskObserver_ObserveAsync_Successful_IsHealthy_NoWarningsOrErrors(); List telemData = foEtwListener.foEtwConverter.DiskTelemetryData; - + Assert.IsNotNull(telemData); Assert.IsTrue(telemData.Count > 0); @@ -3187,7 +3013,7 @@ public async Task DiskObserver_ETW_EventData_IsTelemetryData_Warnings() await DiskObserver_ObserveAsync_Successful_IsHealthy_WarningsOrErrors(); List telemData = foEtwListener.foEtwConverter.DiskTelemetryData; - + Assert.IsNotNull(telemData); Assert.IsTrue(telemData.Count > 0); @@ -3228,7 +3054,7 @@ public async Task FabricSystemObserver_ETW_EventData_Is_SystemServiceTelemetryDa await FabricSystemObserver_ObserveAsync_Successful_IsHealthy_NoWarningsOrErrors(); List telemData = foEtwListener.foEtwConverter.SystemServiceTelemetryData; - + Assert.IsNotNull(telemData); Assert.IsTrue(telemData.Count > 0); @@ -3266,7 +3092,7 @@ public async Task FabricSystemObserver_ETW_EventData_Is_SystemServiceTelemetryDa await FabricSystemObserver_ObserveAsync_Successful_IsHealthy_MemoryWarningsOrErrorsDetected(); List telemData = foEtwListener.foEtwConverter.SystemServiceTelemetryData; - + Assert.IsNotNull(telemData); Assert.IsTrue(telemData.Count > 0); @@ -3340,7 +3166,7 @@ public async Task NodeObserver_ETW_EventData_IsNodeTelemetryData() await NodeObserver_ObserveAsync_Successful_IsHealthy_NoWarningsOrErrorsDetected(); List telemData = foEtwListener.foEtwConverter.NodeTelemetryData; - + Assert.IsNotNull(telemData); Assert.IsTrue(telemData.Count > 0); @@ -3371,7 +3197,7 @@ public async Task NodeObserver_ETW_EventData_IsNodeTelemetryData_Warnings() await NodeObserver_ObserveAsync_Successful_IsHealthy_WarningsOrErrorsDetected(); List telemData = foEtwListener.foEtwConverter.NodeTelemetryData; - + Assert.IsNotNull(telemData); Assert.IsTrue(telemData.Count > 0); @@ -3406,8 +3232,8 @@ public async Task NodeObserver_ETW_EventData_IsNodeSnapshotTelemetryData() { using var foEtwListener = new FabricObserverEtwListener(_logger); var startDateTime = DateTime.Now; - ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryProviderEnabled = false; + + ObserverManager.EtwEnabled = true; using var obs = new NodeObserver(TestServiceContext) @@ -3454,7 +3280,7 @@ public async Task OSObserver_ETW_EventData_IsMachineTelemetryData() using var foEtwListener = new FabricObserverEtwListener(_logger); await OSObserver_ObserveAsync_Successful_IsHealthy_NoWarningsOrErrors(); MachineTelemetryData machineTelemetryData = foEtwListener.foEtwConverter.MachineTelemetryData; - + Assert.IsNotNull(machineTelemetryData); Assert.IsFalse(string.IsNullOrWhiteSpace(machineTelemetryData.DriveInfo)); @@ -3491,17 +3317,13 @@ public async Task OSObserver_ETW_EventData_IsMachineTelemetryData() [TestMethod] public async Task AppObserver_Detects_Monitors_Multiple_Helper_CodePackages() { - ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryProviderEnabled = false; - ObserverManager.EtwEnabled = false; - using var obs = new AppObserver(TestServiceContext) { JsonConfigPath = Path.Combine(Environment.CurrentDirectory, "PackageRoot", "Config", "AppObserver.config.json") }; var startDateTime = DateTime.Now; - + await obs.InitializeAsync(); // fabric:/Voting application has 2 default services (that create service types) and 2 extra CodePackages (specified in VotingData manifest) @@ -3509,7 +3331,7 @@ public async Task AppObserver_Detects_Monitors_Multiple_Helper_CodePackages() // and therefore will be treated like any process that AppObserver monitors. Assert.IsTrue(obs.ReplicaOrInstanceList.Any(r => r.HostProcessName == "ConsoleApp6")); Assert.IsTrue(obs.ReplicaOrInstanceList.Any(r => r.HostProcessName == "ConsoleApp7")); - + await obs.ObserveAsync(Token); // observer ran to completion with no errors. @@ -3527,10 +3349,6 @@ public async Task AppObserver_Detects_Monitors_Multiple_Helper_CodePackages() [TestMethod] public async Task AppObserver_ChildProcs_Detects_SingleDescendant_Ports_Warning() { - ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryProviderEnabled = false; - ObserverManager.EtwEnabled = false; - using var obs = new AppObserver(TestServiceContext) { JsonConfigPath = Path.Combine(Environment.CurrentDirectory, "PackageRoot", "Config", "AppObserver.config.single-app-target-warning-ports.json") @@ -3556,10 +3374,6 @@ public async Task AppObserver_ChildProcs_Detects_SingleDescendant_Ports_Warning( [TestMethod] public async Task AppObserver_ChildProcs_Detects_SingleDescendant_Ports_NoWarning() { - ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryProviderEnabled = false; - ObserverManager.EtwEnabled = false; - using var obs = new AppObserver(TestServiceContext) { JsonConfigPath = Path.Combine(Environment.CurrentDirectory, "PackageRoot", "Config", "AppObserver.config.single-app-target.json"), diff --git a/FabricObserverTests/PackageRoot/Config/Settings.xml b/FabricObserverTests/PackageRoot/Config/Settings.xml index b53d7694..9c5ef125 100644 --- a/FabricObserverTests/PackageRoot/Config/Settings.xml +++ b/FabricObserverTests/PackageRoot/Config/Settings.xml @@ -8,17 +8,17 @@ + - - - + + @@ -30,7 +30,8 @@ - + + @@ -63,6 +64,7 @@
+ @@ -78,6 +80,7 @@ + @@ -112,7 +115,8 @@
- + + @@ -177,8 +181,9 @@ - + + From 321aada2ec6769eabe66990769e1dad304bb91aa Mon Sep 17 00:00:00 2001 From: Charles Torre Date: Thu, 15 Feb 2024 19:09:43 -0800 Subject: [PATCH 05/11] Update settings --- .../ApplicationPackageRoot/ApplicationManifest.xml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/FabricObserverApp/ApplicationPackageRoot/ApplicationManifest.xml b/FabricObserverApp/ApplicationPackageRoot/ApplicationManifest.xml index 1040c0af..c74126dc 100644 --- a/FabricObserverApp/ApplicationPackageRoot/ApplicationManifest.xml +++ b/FabricObserverApp/ApplicationPackageRoot/ApplicationManifest.xml @@ -16,7 +16,7 @@ - + - - + + @@ -162,7 +162,7 @@ - + From 4a0df827c2f16cf1da0a2d98cb7d05fbef13687e Mon Sep 17 00:00:00 2001 From: Charles Torre Date: Thu, 15 Feb 2024 19:11:41 -0800 Subject: [PATCH 06/11] Remove dev param (never shipped). --- .../ApplicationPackageRoot/ApplicationManifest.xml | 3 --- 1 file changed, 3 deletions(-) diff --git a/FabricObserverApp/ApplicationPackageRoot/ApplicationManifest.xml b/FabricObserverApp/ApplicationPackageRoot/ApplicationManifest.xml index c74126dc..d09199f9 100644 --- a/FabricObserverApp/ApplicationPackageRoot/ApplicationManifest.xml +++ b/FabricObserverApp/ApplicationPackageRoot/ApplicationManifest.xml @@ -28,9 +28,6 @@ - - From c8b8dc3d17a1aacd19bdc405c03d7f261f392468 Mon Sep 17 00:00:00 2001 From: Charles Torre Date: Wed, 21 Feb 2024 11:45:46 -0800 Subject: [PATCH 07/11] 3.2.15- Code update: health report clearing. --- .../ServiceFabric/FabricClientUtilities.cs | 52 ++++++++++--------- FabricObserver/Observers/AppObserver.cs | 7 +++ FabricObserver/Observers/ContainerObserver.cs | 1 - FabricObserver/Observers/ObserverManager.cs | 20 +++++-- .../ApplicationManifest.xml | 4 +- 5 files changed, 51 insertions(+), 33 deletions(-) diff --git a/FabricObserver.Extensibility/Utilities/ServiceFabric/FabricClientUtilities.cs b/FabricObserver.Extensibility/Utilities/ServiceFabric/FabricClientUtilities.cs index a00dbcbb..a615f999 100644 --- a/FabricObserver.Extensibility/Utilities/ServiceFabric/FabricClientUtilities.cs +++ b/FabricObserver.Extensibility/Utilities/ServiceFabric/FabricClientUtilities.cs @@ -360,7 +360,7 @@ private void ProcessMultipleHelperCodePackages( { DeployedCodePackageList codepackages = FabricClientSingleton.QueryManager.GetDeployedCodePackageListAsync( - this.nodeName, + nodeName, appName, deployedReplica.ServiceManifestName, null, @@ -1116,36 +1116,38 @@ await FabricClientSingleton.HealthManager.GetClusterHealthAsync( { try { + // FabricSystemObserver if (app.ApplicationName.OriginalString == ObserverConstants.SystemAppName) { await RemoveApplicationHealthReportsAsync(app, ignoreDefaultQueryTimeout, cancellationToken); } - - var appHealth = - await FabricClientSingleton.HealthManager.GetApplicationHealthAsync( - app.ApplicationName, - TimeSpan.FromSeconds(90), - cancellationToken); - - - if (appHealth.ServiceHealthStates != null && - appHealth.ServiceHealthStates.Any( - s => s.AggregatedHealthState == HealthState.Error || s.AggregatedHealthState == HealthState.Warning)) + else { - foreach (var service in appHealth.ServiceHealthStates) + var appHealth = + await FabricClientSingleton.HealthManager.GetApplicationHealthAsync( + app.ApplicationName, + TimeSpan.FromSeconds(90), + cancellationToken); + + // AppObserver, ContainerObserver. + if (appHealth.ServiceHealthStates != null && + appHealth.ServiceHealthStates.Any( + s => s.AggregatedHealthState == HealthState.Error || s.AggregatedHealthState == HealthState.Warning)) { - if (service.AggregatedHealthState == HealthState.Ok) + foreach (var service in appHealth.ServiceHealthStates) { - continue; - } + if (service.AggregatedHealthState == HealthState.Ok) + { + continue; + } - await RemoveServiceHealthReportsAsync(service, ignoreDefaultQueryTimeout, cancellationToken); + await RemoveServiceHealthReportsAsync(service, ignoreDefaultQueryTimeout, cancellationToken); + } } + + // NetworkObserver/FSO. + await RemoveApplicationHealthReportsAsync(app, ignoreDefaultQueryTimeout, cancellationToken); } - - // NetworkObserver/FSO. - await RemoveApplicationHealthReportsAsync(app, ignoreDefaultQueryTimeout, cancellationToken); - } catch (Exception e) when (e is FabricException or TimeoutException) { @@ -1203,7 +1205,7 @@ private async Task RemoveServiceHealthReportsAsync(ServiceHealthState service, b serviceHealth.HealthEvents.Where( e => JsonHelper.TryDeserializeObject(e.HealthInformation.Description, out TelemetryDataBase telemetryDataBase) - && telemetryDataBase.NodeName == this.nodeName + && telemetryDataBase.NodeName == nodeName && (e.HealthInformation.SourceId.StartsWith(ObserverConstants.AppObserverName) || e.HealthInformation.SourceId.StartsWith(ObserverConstants.ContainerObserverName))).ToList(); @@ -1255,7 +1257,7 @@ private async Task RemoveApplicationHealthReportsAsync(ApplicationHealthState ap appHealth.HealthEvents.Where( e => JsonHelper.TryDeserializeObject(e.HealthInformation.Description, out TelemetryDataBase telemetryDataBase) - && telemetryDataBase.NodeName == this.nodeName + && telemetryDataBase.NodeName == nodeName && (e.HealthInformation.SourceId.StartsWith(ObserverConstants.AppObserverName) || e.HealthInformation.SourceId.StartsWith(ObserverConstants.FabricSystemObserverName) || e.HealthInformation.SourceId.StartsWith(ObserverConstants.NetworkObserverName))).ToList(); @@ -1295,7 +1297,7 @@ private async Task RemoveApplicationHealthReportsAsync(ApplicationHealthState ap private async Task RemoveNodeHealthReportsAsync(IEnumerable nodeHealthStates, bool ignoreDefaultQueryTimeout, CancellationToken cancellationToken) { // Scope to node where this FO instance is running. - nodeHealthStates = nodeHealthStates.Where(n => n.NodeName == this.nodeName); + nodeHealthStates = nodeHealthStates.Where(n => n.NodeName == nodeName); foreach (var nodeHealthState in nodeHealthStates) { @@ -1327,7 +1329,7 @@ private async Task RemoveNodeHealthReportsAsync(IEnumerable nod Code = FOErrorWarningCodes.Ok, HealthMessage = $"Clearing existing FabricObserver Health Reports as the service is stopping or starting.", State = HealthState.Ok, - NodeName = this.nodeName, + NodeName = nodeName, EntityType = EntityType.Machine, HealthReportTimeToLive = TimeSpan.FromSeconds(1) }; diff --git a/FabricObserver/Observers/AppObserver.cs b/FabricObserver/Observers/AppObserver.cs index 7b5fd060..a94ed4ab 100644 --- a/FabricObserver/Observers/AppObserver.cs +++ b/FabricObserver/Observers/AppObserver.cs @@ -3656,6 +3656,13 @@ private void SendServiceProcessElevatedWarning(string appName, string serviceNam ServiceName = serviceName }); } + + // This is used for managing health reports (clearing reports on start and on graceful close). + if (!ServiceNames.Any(a => a == FabricServiceContext.ServiceName.OriginalString)) + { + // Volatile state. This is used to clear health reports during a config upgrade or graceful close. + ServiceNames.Enqueue(FabricServiceContext.ServiceName.OriginalString); + } } } diff --git a/FabricObserver/Observers/ContainerObserver.cs b/FabricObserver/Observers/ContainerObserver.cs index 029ca6f3..70e8abfe 100644 --- a/FabricObserver/Observers/ContainerObserver.cs +++ b/FabricObserver/Observers/ContainerObserver.cs @@ -176,7 +176,6 @@ public override Task ReportAsync(CancellationToken token) EntityType.Service, null, repOrInst); - }); return Task.CompletedTask; diff --git a/FabricObserver/Observers/ObserverManager.cs b/FabricObserver/Observers/ObserverManager.cs index 73805163..9587a5f8 100644 --- a/FabricObserver/Observers/ObserverManager.cs +++ b/FabricObserver/Observers/ObserverManager.cs @@ -216,7 +216,7 @@ public async Task StartObserversAsync() try { - // Clear out any orphaned health reports left behind when FO ungracefully exits. + // Clear out any orphaned Error or Warning health reports left behind when FO ungracefully exited. FabricClientUtilities fabricClientUtilities = new(nodeName); await fabricClientUtilities.ClearFabricObserverHealthReportsAsync(true, CancellationToken.None); @@ -427,7 +427,7 @@ public async Task ClearHealthReportsAsync(string configUpdateLinux) HealthReportTimeToLive = TimeSpan.FromSeconds(1) }; - foreach (var observer in Observers) + foreach (ObserverBase observer in Observers) { try { @@ -458,13 +458,19 @@ public async Task ClearHealthReportsAsync(string configUpdateLinux) } } } - else if (observer.ObserverName == ObserverConstants.AppObserverName || observer.ObserverName == ObserverConstants.NetworkObserverName) + + if (observer.ObserverName == ObserverConstants.AppObserverName || observer.ObserverName == ObserverConstants.NetworkObserverName) { // Service Health reports. if (observer.ServiceNames.Any(a => !string.IsNullOrWhiteSpace(a) && a.Contains("fabric:/"))) { - foreach (var service in observer.ServiceNames) + foreach (string service in observer.ServiceNames) { + if (string.IsNullOrWhiteSpace(service)) + { + continue; + } + try { // App Health reports. NetworkObserver only generates App health reports and stores app name in ServiceNames field (TODO: Change that). @@ -532,8 +538,9 @@ public async Task ClearHealthReportsAsync(string configUpdateLinux) } } } + // System reports (fabric:/System). - else if (observer.ObserverName == ObserverConstants.FabricSystemObserverName) + if (observer.ObserverName == ObserverConstants.FabricSystemObserverName) { try { @@ -614,6 +621,9 @@ public async Task ClearHealthReportsAsync(string configUpdateLinux) { } + + // This only applies to AppObs, ContainerObs, NetworkObs, and FabricSystemObs. + observer.ServiceNames.Clear(); } } diff --git a/FabricObserverApp/ApplicationPackageRoot/ApplicationManifest.xml b/FabricObserverApp/ApplicationPackageRoot/ApplicationManifest.xml index d09199f9..3264dada 100644 --- a/FabricObserverApp/ApplicationPackageRoot/ApplicationManifest.xml +++ b/FabricObserverApp/ApplicationPackageRoot/ApplicationManifest.xml @@ -41,8 +41,8 @@ - - + + From 458d9c7f24ab3ccb55309ab199ad281d1684336f Mon Sep 17 00:00:00 2001 From: Charles Torre Date: Sat, 24 Feb 2024 12:47:09 -0800 Subject: [PATCH 08/11] Removed redundant process privilege check. --- FabricObserver/Observers/AppObserver.cs | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/FabricObserver/Observers/AppObserver.cs b/FabricObserver/Observers/AppObserver.cs index a94ed4ab..a1e0c50d 100644 --- a/FabricObserver/Observers/AppObserver.cs +++ b/FabricObserver/Observers/AppObserver.cs @@ -3554,21 +3554,8 @@ any processes (children) that the service process (parent) created/spawned. */ } } - // This will be DateTime.MinValue when the target process is inaccessible due to user privilege, for example. - // We shouldn't get here given the code above if some process is running at a higher priv than FO.. but if the process *is* inaccessible for some reason at this point, - // then we'll not add the replica to the list. replicaInfo.HostProcessStartTime = GetProcessStartTime((int)replicaInfo.HostProcessId); - if (IsWindows) - { - int errorCode = Marshal.GetLastWin32Error(); - - if (errorCode == NativeMethods.ERROR_ACCESS_DENIED) - { - SendServiceProcessElevatedWarning(replicaInfo.ApplicationName?.OriginalString, replicaInfo.ServiceName?.OriginalString); - } - } - if (replicaInfo.HostProcessStartTime == DateTime.MinValue) { // Do not add replica to repOrInst list. From 927f5144c5ab6773b42ae868a6aa91a35ac8e547 Mon Sep 17 00:00:00 2001 From: Charles Torre Date: Sat, 24 Feb 2024 13:02:49 -0800 Subject: [PATCH 09/11] Updated nuget dep packages. --- .../ApplicationPackageRoot/ApplicationManifest.xml | 2 +- FabricObserver.nuspec.template | 3 ++- FabricObserver/FabricObserver.csproj | 2 +- .../ApplicationPackageRoot/ApplicationManifest.xml | 4 ++-- FabricObserverTests/FabricObserverTests.csproj | 10 +++++----- SampleObserverPlugin/SampleObserverPlugin.csproj | 4 ++-- 6 files changed, 13 insertions(+), 12 deletions(-) diff --git a/ClusterObserverApp/ApplicationPackageRoot/ApplicationManifest.xml b/ClusterObserverApp/ApplicationPackageRoot/ApplicationManifest.xml index 41897bbd..1bca1d7c 100644 --- a/ClusterObserverApp/ApplicationPackageRoot/ApplicationManifest.xml +++ b/ClusterObserverApp/ApplicationPackageRoot/ApplicationManifest.xml @@ -11,8 +11,8 @@ - + diff --git a/FabricObserver.nuspec.template b/FabricObserver.nuspec.template index f34be7b5..88a02911 100644 --- a/FabricObserver.nuspec.template +++ b/FabricObserver.nuspec.template @@ -4,7 +4,8 @@ %PACKAGE_ID% 3.2.15 -- Code cleanup and refactoring. +- Added support for Observer raw metric telemetry disablement via configuration. This enables you to disable raw metric telemetry for any supported observer. This is useful if you are running FabricObserver in a non-production environment and do not want to send a raw telemetry data to Application Insights or Log Analytics. This feature is disabled by default. To enable it, you must set the [Observer name]EmitRawMetricTelemetry parameter to false in ApplicationManifest.xml. +- Fixed a bug in AppObserver that can lead to FabricObserver incorrectly reporting that a target service process couldn't be monitored because it was running at a higher privilege level than FabricObserver. Microsoft MIT diff --git a/FabricObserver/FabricObserver.csproj b/FabricObserver/FabricObserver.csproj index 2e762ee8..3e8e0e4c 100644 --- a/FabricObserver/FabricObserver.csproj +++ b/FabricObserver/FabricObserver.csproj @@ -31,7 +31,7 @@ - + diff --git a/FabricObserverApp/ApplicationPackageRoot/ApplicationManifest.xml b/FabricObserverApp/ApplicationPackageRoot/ApplicationManifest.xml index 3264dada..59862877 100644 --- a/FabricObserverApp/ApplicationPackageRoot/ApplicationManifest.xml +++ b/FabricObserverApp/ApplicationPackageRoot/ApplicationManifest.xml @@ -2,8 +2,8 @@ - + @@ -23,8 +23,8 @@ This is primarily useful for scenarios where you have multiple instances of FO running on a node and you want to ensure that each instance writes ETW event data to different named sinks. This is more of an advanced scenario. In general, just leave this blank. --> - + diff --git a/FabricObserverTests/FabricObserverTests.csproj b/FabricObserverTests/FabricObserverTests.csproj index b7d3eeaf..de2530c9 100644 --- a/FabricObserverTests/FabricObserverTests.csproj +++ b/FabricObserverTests/FabricObserverTests.csproj @@ -13,14 +13,14 @@ - - - - + + + + all runtime; build; native; contentfiles; analyzers; buildtransitive - + diff --git a/SampleObserverPlugin/SampleObserverPlugin.csproj b/SampleObserverPlugin/SampleObserverPlugin.csproj index 7d977fa6..7348e884 100644 --- a/SampleObserverPlugin/SampleObserverPlugin.csproj +++ b/SampleObserverPlugin/SampleObserverPlugin.csproj @@ -14,7 +14,7 @@ - + @@ -32,6 +32,6 @@ - + \ No newline at end of file From 8543fbc68a404149047cef39bfc390391bdc5767 Mon Sep 17 00:00:00 2001 From: Charles Torre Date: Mon, 26 Feb 2024 16:53:46 -0800 Subject: [PATCH 10/11] Removed PackageReference typos in csproj. --- FabricObserverTests/FabricObserverTests.csproj | 2 -- 1 file changed, 2 deletions(-) diff --git a/FabricObserverTests/FabricObserverTests.csproj b/FabricObserverTests/FabricObserverTests.csproj index de2530c9..3badb7b7 100644 --- a/FabricObserverTests/FabricObserverTests.csproj +++ b/FabricObserverTests/FabricObserverTests.csproj @@ -11,8 +11,6 @@ x64 - - From 9b5877209fd07bbf74ac81ec2345fb6cb52edcc5 Mon Sep 17 00:00:00 2001 From: Charles Torre Date: Thu, 14 Mar 2024 10:44:30 -0700 Subject: [PATCH 11/11] Updated sample plugin (version). --- SampleObserverPlugin/SampleObserverPlugin.csproj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SampleObserverPlugin/SampleObserverPlugin.csproj b/SampleObserverPlugin/SampleObserverPlugin.csproj index 7348e884..34f1407e 100644 --- a/SampleObserverPlugin/SampleObserverPlugin.csproj +++ b/SampleObserverPlugin/SampleObserverPlugin.csproj @@ -22,7 +22,7 @@ -