From dc2216e91bc65df8ad65b161081d56e41aa79904 Mon Sep 17 00:00:00 2001 From: Charles Torre Date: Wed, 11 Oct 2023 11:49:44 -0700 Subject: [PATCH 01/10] Fix for LVID db change (SF 10+) --- FabricObserver/Observers/ObserverManager.cs | 8 +++++++- .../ApplicationPackageRoot/ApplicationManifest.xml | 2 +- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/FabricObserver/Observers/ObserverManager.cs b/FabricObserver/Observers/ObserverManager.cs index 9eed658e..e9694c19 100644 --- a/FabricObserver/Observers/ObserverManager.cs +++ b/FabricObserver/Observers/ObserverManager.cs @@ -1387,7 +1387,13 @@ private bool IsLVIDPerfCounterEnabled(ConfigurationSettings settings = null) // DEBUG Logger.LogInfo("IsLVIDPerfCounterEnabled: Running check since a supported observer is enabled for LVID monitoring."); - const string categoryName = "Windows Fabric Database"; + string categoryName = "Windows Fabric Database"; + + if (sfVersion.StartsWith("10")) + { + categoryName = "MSExchange Database"; + } + const string counterName = "Long-Value Maximum LID"; // If there is corrupted state on the machine with respect to performance counters, an AV can occur (in native code, then wrapped in AccessViolationException) diff --git a/FabricObserverApp/ApplicationPackageRoot/ApplicationManifest.xml b/FabricObserverApp/ApplicationPackageRoot/ApplicationManifest.xml index ea47e0a5..f19a469c 100644 --- a/FabricObserverApp/ApplicationPackageRoot/ApplicationManifest.xml +++ b/FabricObserverApp/ApplicationPackageRoot/ApplicationManifest.xml @@ -131,7 +131,7 @@ - + - + install_lvid_perfcounter.bat @@ -25,10 +25,10 @@ - + - + \ No newline at end of file diff --git a/FabricObserver/PackageRoot/ServiceManifest_linux.xml b/FabricObserver/PackageRoot/ServiceManifest_linux.xml index d252968e..65b54348 100644 --- a/FabricObserver/PackageRoot/ServiceManifest_linux.xml +++ b/FabricObserver/PackageRoot/ServiceManifest_linux.xml @@ -1,6 +1,6 @@  @@ -11,7 +11,7 @@ - + setcaps.sh @@ -27,10 +27,10 @@ - + - + \ No newline at end of file diff --git a/FabricObserverApp/ApplicationPackageRoot/ApplicationManifest.xml b/FabricObserverApp/ApplicationPackageRoot/ApplicationManifest.xml index f19a469c..755308d5 100644 --- a/FabricObserverApp/ApplicationPackageRoot/ApplicationManifest.xml +++ b/FabricObserverApp/ApplicationPackageRoot/ApplicationManifest.xml @@ -1,6 +1,6 @@  - + @@ -20,7 +20,7 @@ for example, then this setting will put FO into Warning or Error state or just create a an Ok (info) health message. Supported values are Error, Warning, Ok or None (which means don't put FO into Warning or Error state or create an Ok health event (info)). --> - + - + - + diff --git a/FabricObserverApp/StartupServices.xml b/FabricObserverApp/StartupServices.xml index ee13e966..b03bf8d0 100644 --- a/FabricObserverApp/StartupServices.xml +++ b/FabricObserverApp/StartupServices.xml @@ -1,7 +1,7 @@  - + diff --git a/FabricObserverTests/FabricObserverTests.csproj b/FabricObserverTests/FabricObserverTests.csproj index 337f4f75..b7d3eeaf 100644 --- a/FabricObserverTests/FabricObserverTests.csproj +++ b/FabricObserverTests/FabricObserverTests.csproj @@ -20,7 +20,7 @@ all runtime; build; native; contentfiles; analyzers; buildtransitive - + diff --git a/FabricObserverTests/ObserverTests.cs b/FabricObserverTests/ObserverTests.cs index 305b683f..fb9eecc5 100644 --- a/FabricObserverTests/ObserverTests.cs +++ b/FabricObserverTests/ObserverTests.cs @@ -215,7 +215,6 @@ await FabricClientSingleton.QueryManager.GetDeployedApplicationListAsync( { return; } - string appType = "HealthMetricsType"; string appVersion = "1.0.0.0"; diff --git a/README.md b/README.md index cbef1d96..09a481d5 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -## FabricObserver 3.2.11 +## FabricObserver 3.2.12 [![Deploy to Azure](https://aka.ms/deploytoazurebutton)](https://portal.azure.com/#create/Microsoft.Template/uri/https%3A%2F%2Fraw.githubusercontent.com%2Fmicrosoft%2Fservice-fabric-observer%2Fmain%2FDocumentation%2FDeployment%2Fservice-fabric-observer.json) @@ -87,7 +87,7 @@ see [FOAzurePipeline.yaml](/FOAzurePipeline.yaml) for msazure devops build tasks .net6 installed (if you deploy VM images from Azure gallery, then they will not have .net6 installed), then you must deploy the SelfContained package. ### Deploy FabricObserver -**Note: You must deploy this version (3.2.11) to clusters that are running SF 9.0 and above. This version also requires .NET 6.** +**Note: You must deploy this version (3.2.12) to clusters that are running SF 9.0 and above. This version also requires .NET 6.** You can deploy FabricObserver (and ClusterObserver) using Visual Studio (if you build the sources yourself), PowerShell or ARM. Please note that this version of FabricObserver no longer supports the DefaultServices node in ApplicationManifest.xml. This means that should you deploy using PowerShell, you must create an instance of the service as the last command in your script. This was done to support ARM deployment, specifically. The StartupServices.xml file you see in the FabricHealerApp project now contains the service information once held in ApplicationManifest's DefaultServices node. Note that this information is primarily useful for deploying from Visual Studio. @@ -135,7 +135,7 @@ Register-ServiceFabricApplicationType -ApplicationPathInImageStore FO3211 #Create FO application (if not already deployed at lesser version): -New-ServiceFabricApplication -ApplicationName fabric:/FabricObserver -ApplicationTypeName FabricObserverType -ApplicationTypeVersion 3.2.11 +New-ServiceFabricApplication -ApplicationName fabric:/FabricObserver -ApplicationTypeName FabricObserverType -ApplicationTypeVersion 3.2.12 #Create the Service instances (-1 means all nodes, which is what is required for FO): @@ -143,7 +143,7 @@ New-ServiceFabricService -Stateless -PartitionSchemeSingleton -ApplicationName f #OR if updating existing version: -Start-ServiceFabricApplicationUpgrade -ApplicationName fabric:/FabricObserver -ApplicationTypeVersion 3.2.11 -Monitored -FailureAction rollback +Start-ServiceFabricApplicationUpgrade -ApplicationName fabric:/FabricObserver -ApplicationTypeVersion 3.2.12 -Monitored -FailureAction rollback ``` ## Observer Model diff --git a/SampleObserverPlugin/SampleObserverPlugin.csproj b/SampleObserverPlugin/SampleObserverPlugin.csproj index 6ea3828d..9b87c8af 100644 --- a/SampleObserverPlugin/SampleObserverPlugin.csproj +++ b/SampleObserverPlugin/SampleObserverPlugin.csproj @@ -13,13 +13,13 @@ - - + + - - + - + \ No newline at end of file diff --git a/XmlDiffPatchSF/Program.cs b/XmlDiffPatchSF/Program.cs index 870469ba..07cc0b59 100644 --- a/XmlDiffPatchSF/Program.cs +++ b/XmlDiffPatchSF/Program.cs @@ -28,7 +28,7 @@ private static void Main(string[] args) "preceding the file extension.\n\n" + "**Note, if you have observer plugins, then you must supply true for [mergeExistingNodes] as the last argument to pull over your plugin settings as part of the merge.**.\n\n" + "Example:\n\n" + - "DiffPatchXml \"C:\\repos\\FO\\3.1.26\\configs\\ApplicationManifest.xml\" \"C:\\repos\\FO\\3.2.11\\configs\\ApplicationManifest.xml\"\n"); + "DiffPatchXml \"C:\\repos\\FO\\3.1.26\\configs\\ApplicationManifest.xml\" \"C:\\repos\\FO\\3.2.12\\configs\\ApplicationManifest.xml\"\n"); return; } diff --git a/foextlib.md b/foextlib.md index f139fa47..43585505 100644 --- a/foextlib.md +++ b/foextlib.md @@ -1,4 +1,4 @@ -## FabricObserver Extensibility Library 3.2.11 +## FabricObserver Extensibility Library 3.2.12 FabricObserver.Extensibility is a .NET 6 library for building custom observers that extend FabricObserver's capabilities to match your needs. A custom observer is managed just like a built-in observer. diff --git a/fonuget.md b/fonuget.md index 24a548ea..177e6620 100644 --- a/fonuget.md +++ b/fonuget.md @@ -1,4 +1,4 @@ -## FabricObserver 3.2.11 +## FabricObserver 3.2.12 [**FabricObserver (FO)**](https://github.com/microsoft/service-fabric-observer) is a production-ready watchdog service with an easy-to-use extensibility model, written as a stateless, singleton Service Fabric **.NET 6** application that by default From 02b39ed63455f07a852e7b189379ce726c198b64 Mon Sep 17 00:00:00 2001 From: Charles Torre Date: Tue, 17 Oct 2023 09:41:14 -0700 Subject: [PATCH 03/10] Added net-tools install to setcaps.sh. --- FabricObserver/setcaps.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/FabricObserver/setcaps.sh b/FabricObserver/setcaps.sh index e2bb7b96..d89920e9 100644 --- a/FabricObserver/setcaps.sh +++ b/FabricObserver/setcaps.sh @@ -1,4 +1,6 @@ #!/bin/bash sudo setcap CAP_DAC_READ_SEARCH,CAP_SYS_PTRACE+p ./elevated_netstat sudo setcap CAP_DAC_READ_SEARCH,CAP_SYS_PTRACE+p ./elevated_proc_fd -sudo setcap CAP_DAC_READ_SEARCH,CAP_DAC_OVERRIDE+p ./elevated_docker_stats \ No newline at end of file +sudo setcap CAP_DAC_READ_SEARCH,CAP_DAC_OVERRIDE+p ./elevated_docker_stats + +sudo apt install net-tools \ No newline at end of file From cbbdbe92e3998bf55cbf24f83b71cd2fc54cd97a Mon Sep 17 00:00:00 2001 From: Charles Torre Date: Wed, 18 Oct 2023 13:45:18 -0700 Subject: [PATCH 04/10] + support for LVID monitoring on SF 10+. --- .../ProcessInfo/WindowsProcessInfoProvider.cs | 25 +++++++++++++++---- FabricObserver/Observers/ObserverManager.cs | 8 +++--- FabricObserverTests/ObserverTests.cs | 5 ++++ 3 files changed, 29 insertions(+), 9 deletions(-) diff --git a/FabricObserver.Extensibility/Utilities/ProcessInfo/WindowsProcessInfoProvider.cs b/FabricObserver.Extensibility/Utilities/ProcessInfo/WindowsProcessInfoProvider.cs index af45faed..1ac15418 100644 --- a/FabricObserver.Extensibility/Utilities/ProcessInfo/WindowsProcessInfoProvider.cs +++ b/FabricObserver.Extensibility/Utilities/ProcessInfo/WindowsProcessInfoProvider.cs @@ -25,13 +25,28 @@ public class WindowsProcessInfoProvider : ProcessInfoProvider private const string ProcessCategoryName = "Process"; private const string ProcessMemoryCounterName = "Working Set - Private"; private const string ProcessIDCounterName = "ID Process"; - private const string WinFabDbCategoryName = "Windows Fabric Database"; - private const string LvidCounterName = "Long-Value Maximum LID"; + private const string LVIDCounterName = "Long-Value Maximum LID"; private static readonly object lockObj = new(); - private volatile bool hasWarnedProcessNameLength = false; private static PerformanceCounterCategory performanceCounterCategory = null; + private volatile bool hasWarnedProcessNameLength = false; public readonly static ConcurrentDictionary InstanceNameDictionary = new(); + private static string WinFabDbCategoryName + { + get + { + try + { + return ServiceFabricConfiguration.Instance.FabricVersion.StartsWith("1") ? "MSExchange Database" : "Windows Fabric Database"; + } + catch (ArgumentException ae) + { + ProcessInfoLogger.LogWarning("WinFabDbCategoryName property failure: " + ae.Message); + return null; + } + } + } + private static PerformanceCounterCategory PerfCounterProcessCategory { get @@ -290,7 +305,7 @@ public override double GetProcessKvsLvidsUsagePercentage(string procName, Cancel /* Check to see if the supplied instance (process) exists in the category. */ - if (!PerformanceCounterCategory.InstanceExists(internalProcName, WinFabDbCategoryName)) + if (!string.IsNullOrEmpty(WinFabDbCategoryName) && !PerformanceCounterCategory.InstanceExists(internalProcName, WinFabDbCategoryName)) { return -1; } @@ -302,7 +317,7 @@ The target counter is accessible to processes running as Network User (so, no Un categoryName and counterName are never null (they are const strings). Only two possible exceptions can happen here: IOE and Win32Exception. */ - using PerformanceCounter LvidCounter = new(WinFabDbCategoryName, LvidCounterName, internalProcName, true); + using PerformanceCounter LvidCounter = new(WinFabDbCategoryName, LVIDCounterName, internalProcName, true); float result = LvidCounter.NextValue(); double usedPct = (double)(result * 100) / int.MaxValue; return usedPct; diff --git a/FabricObserver/Observers/ObserverManager.cs b/FabricObserver/Observers/ObserverManager.cs index 6e078b6e..51fbcd08 100644 --- a/FabricObserver/Observers/ObserverManager.cs +++ b/FabricObserver/Observers/ObserverManager.cs @@ -39,6 +39,7 @@ private List Observers get; set; } + private const string LVIDCounterName = "Long-Value Maximum LID"; private readonly string nodeName; private readonly TimeSpan OperationalTelemetryRunInterval = TimeSpan.FromDays(1); private readonly CancellationToken runAsyncToken; @@ -1413,14 +1414,13 @@ private bool IsLVIDPerfCounterEnabled(ConfigurationSettings settings = null) // DEBUG Logger.LogInfo("IsLVIDPerfCounterEnabled: Running check since a supported observer is enabled for LVID monitoring."); string categoryName = "Windows Fabric Database"; + - if (sfVersion.StartsWith("10")) + if (sfVersion.StartsWith("1")) { categoryName = "MSExchange Database"; } - const string counterName = "Long-Value Maximum LID"; - // If there is corrupted state on the machine with respect to performance counters, an AV can occur (in native code, then wrapped in AccessViolationException) // when calling PerformanceCounterCategory.Exists below. This is actually a symptom of a problem that extends beyond just this counter category.. // *Do not catch AV exception*. FO will crash, of course, but that is safer than pretending nothing is wrong. @@ -1428,7 +1428,7 @@ private bool IsLVIDPerfCounterEnabled(ConfigurationSettings settings = null) // cause issues (not FO crashes necessarily, but inaccurate data related to the metrics they represent (like, you will always see 0 or -1 measurement values)). try { - return PerformanceCounterCategory.CounterExists(counterName, categoryName); + return PerformanceCounterCategory.CounterExists(LVIDCounterName, categoryName); } catch (Exception e) when (e is ArgumentException or InvalidOperationException or UnauthorizedAccessException or Win32Exception) { diff --git a/FabricObserverTests/ObserverTests.cs b/FabricObserverTests/ObserverTests.cs index fb9eecc5..d751b829 100644 --- a/FabricObserverTests/ObserverTests.cs +++ b/FabricObserverTests/ObserverTests.cs @@ -102,6 +102,11 @@ public static async Task TestClassStartUp(TestContext testContext) long.MaxValue); // Install required SF test applications. + await DeployTestAppsAppsAsync(); + } + + private static async Task DeployTestAppsAppsAsync() + { await DeployHealthMetricsAppAsync(); await DeployTestApp42Async(); await DeployVotingAppAsync(); From 2c4a106a74e937b4a61c9c66b93f698ddf35f32d Mon Sep 17 00:00:00 2001 From: Charles Torre Date: Wed, 18 Oct 2023 13:51:58 -0700 Subject: [PATCH 05/10] FO nuspec. --- FabricObserver.nuspec.template | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/FabricObserver.nuspec.template b/FabricObserver.nuspec.template index f9f9a520..66b09711 100644 --- a/FabricObserver.nuspec.template +++ b/FabricObserver.nuspec.template @@ -4,7 +4,7 @@ %PACKAGE_ID% 3.2.12 -- ESE LVID count monitor logic updated to support ESE changes in SF 10.x. +- ESE LVID count monitor updated to support ESE database performance category change in SF 10.x versions. Microsoft MIT From b9399e8d99cabe0c00c2c4e482478a35c03c4089 Mon Sep 17 00:00:00 2001 From: Charles Torre Date: Thu, 19 Oct 2023 10:21:27 -0700 Subject: [PATCH 06/10] default log path --- .../ApplicationPackageRoot/ApplicationManifest.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/FabricObserverApp/ApplicationPackageRoot/ApplicationManifest.xml b/FabricObserverApp/ApplicationPackageRoot/ApplicationManifest.xml index 755308d5..96048f7f 100644 --- a/FabricObserverApp/ApplicationPackageRoot/ApplicationManifest.xml +++ b/FabricObserverApp/ApplicationPackageRoot/ApplicationManifest.xml @@ -20,7 +20,7 @@ for example, then this setting will put FO into Warning or Error state or just create a an Ok (info) health message. Supported values are Error, Warning, Ok or None (which means don't put FO into Warning or Error state or create an Ok health event (info)). --> - + + setting is not greater than 0. --> + then the offending observer will be marked as broken and will not run again. + Below setting represents 60 minutes. --> + ClusterObserver will write to its own directory on this path. + **NOTE: For Linux runtime target, just supply the name of the directory (not a path with drive letter like you for Windows).** --> + will be locally logged. This is the recommended setting. Note that file logging is generally + only useful for FabricObserverWebApi, which is an optional log reader service that ships in this repo. --> @@ -46,27 +46,27 @@ + The Values for these will be overriden by ApplicationManifest Parameter settings. Set DefaultValue for each + overridable parameter in that file, not here, as the parameter DefaultValues in ApplicationManifest.xml will be used, by default. + This design is to enable unversioned application-parameter-only updates. This means you will be able to change + any of the MustOverride parameters below at runtime by doing an ApplicationUpdate with ApplicationParameters flag. + See: https://docs.microsoft.com/en-us/azure/service-fabric/service-fabric-application-upgrade-advanced#upgrade-application-parameters-independently-of-version -->
+ will be locally logged. This is the recommended setting. Note that file logging is generally + only useful for FabricObserverWebApi, which is an optional log reader service that ships in this repo. --> + Aggregated Error evaluations will always be transmitted regardless of this setting. --> + emitting a Warning signal.--> @@ -76,12 +76,13 @@
- + ClusterObserver @@ -21,11 +21,11 @@ - + - + diff --git a/ClusterObserver/Readme.md b/ClusterObserver/Readme.md index fc3908bc..de11e459 100644 --- a/ClusterObserver/Readme.md +++ b/ClusterObserver/Readme.md @@ -1,4 +1,4 @@ -### ClusterObserver 2.2.6 +### ClusterObserver 2.2.7 #### This version - and all subsequent versions - requires SF Runtime >= 9.0 and targets .NET 6 ClusterObserver (CO) is a stateless singleton Service Fabric .NET 6 service that runs on one node in a cluster. CO observes cluster health (aggregated) @@ -30,7 +30,7 @@ Application Parameter Upgrade Example: ```Powershell $appName = "fabric:/ClusterObserver" -$appVersion = "2.2.6" +$appVersion = "2.2.7" $application = Get-ServiceFabricApplication -ApplicationName $appName @@ -161,7 +161,7 @@ Start-ServiceFabricApplicationUpgrade -ApplicationName $appName -ApplicationType ``` XML - + @@ -190,7 +190,7 @@ Start-ServiceFabricApplicationUpgrade -ApplicationName $appName -ApplicationType should match the Name and Version attributes of the ServiceManifest element defined in the ServiceManifest.xml file. --> - + diff --git a/ClusterObserverApp/ApplicationPackageRoot/ApplicationManifest.xml b/ClusterObserverApp/ApplicationPackageRoot/ApplicationManifest.xml index a8b0bf2e..f7b9f90f 100644 --- a/ClusterObserverApp/ApplicationPackageRoot/ApplicationManifest.xml +++ b/ClusterObserverApp/ApplicationPackageRoot/ApplicationManifest.xml @@ -1,5 +1,5 @@  - + @@ -10,7 +10,7 @@ - + @@ -20,15 +20,15 @@ - - + + - + diff --git a/Documentation/Deployment/service-fabric-cluster-observer.json b/Documentation/Deployment/service-fabric-cluster-observer.json index a83e7a43..b05dbab9 100644 --- a/Documentation/Deployment/service-fabric-cluster-observer.json +++ b/Documentation/Deployment/service-fabric-cluster-observer.json @@ -11,7 +11,7 @@ }, "applicationTypeVersionClusterObserver": { "type": "string", - "defaultValue": "2.2.6", + "defaultValue": "2.2.7", "metadata": { "description": "Provide the app version number of ClusterObserver. This must be identical to the version specified in the corresponding sfpkg." } diff --git a/Documentation/Deployment/service-fabric-cluster-observer.v2.2.6.parameters.json b/Documentation/Deployment/service-fabric-cluster-observer.v2.2.7.parameters.json similarity index 90% rename from Documentation/Deployment/service-fabric-cluster-observer.v2.2.6.parameters.json rename to Documentation/Deployment/service-fabric-cluster-observer.v2.2.7.parameters.json index 8693d46c..b0d523c0 100644 --- a/Documentation/Deployment/service-fabric-cluster-observer.v2.2.6.parameters.json +++ b/Documentation/Deployment/service-fabric-cluster-observer.v2.2.7.parameters.json @@ -6,7 +6,7 @@ "value": "" }, "applicationTypeVersionClusterObserver": { - "value": "2.2.6" + "value": "2.2.7" }, "packageUrlClusterObserver": { "value": "" diff --git a/FabricObserver.sln b/FabricObserver.sln index 5e07c25b..447d6a31 100644 --- a/FabricObserver.sln +++ b/FabricObserver.sln @@ -35,10 +35,10 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution README.md = README.md SECURITY.md = SECURITY.md Documentation\Deployment\service-fabric-cluster-observer.json = Documentation\Deployment\service-fabric-cluster-observer.json - Documentation\Deployment\service-fabric-cluster-observer.v2.2.6.parameters.json = Documentation\Deployment\service-fabric-cluster-observer.v2.2.6.parameters.json Documentation\Deployment\service-fabric-observer.json = Documentation\Deployment\service-fabric-observer.json - Documentation\Using.md = Documentation\Using.md + Documentation\Deployment\service-fabric-cluster-observer.v2.2.7.parameters.json = Documentation\Deployment\service-fabric-cluster-observer.v2.2.7.parameters.json Documentation\Deployment\service-fabric-observer.v3.2.12.parameters.json = Documentation\Deployment\service-fabric-observer.v3.2.12.parameters.json + Documentation\Using.md = Documentation\Using.md EndProjectSection EndProject Project("{A07B5EB6-E848-4116-A8D0-A826331D98C6}") = "ClusterObserverApp", "ClusterObserverApp\ClusterObserverApp.sfproj", "{BD5D216F-5F89-4CC4-92FD-D6FDEC5A19AD}" diff --git a/FabricObserver/FabricObserver.cs b/FabricObserver/FabricObserver.cs index e669e27a..6afbdde8 100644 --- a/FabricObserver/FabricObserver.cs +++ b/FabricObserver/FabricObserver.cs @@ -70,7 +70,7 @@ private void ConfigureServices(IServiceCollection services) LoadObserversFromPlugins(services); } - // When deleting a stateless instance (like the FabricObserver instance), the SF runtime will call this override. + // Stateless instance restarted (via Remove-ServiceFabricReplica or RemoveReplicaAsync called *without* ForceRemove flag). // This ensures that any health report that FO created will be cleared. protected override void OnAbort() { @@ -78,9 +78,9 @@ protected override void OnAbort() { observerManager.ShutDownAsync().GetAwaiter().GetResult(); } - catch (Exception e) when (e is AggregateException or ObjectDisposedException) + catch (Exception e) when (e is not OutOfMemoryException) { - + // Don't crash in Abort unless it's OOM.. } base.OnAbort(); diff --git a/conuget.md b/conuget.md index ac09fa9a..39a7bdda 100644 --- a/conuget.md +++ b/conuget.md @@ -1,4 +1,4 @@ -### ClusterObserver 2.2.6 +### ClusterObserver 2.2.7 #### This version requires SF Runtime >= 9.0 and targets .NET 6. .NET Core 3.1 is no longer supported. [ClusterObserver (CO)](https://github.com/microsoft/service-fabric-observer/tree/main/ClusterObserver) is a stateless singleton Service Fabric .NET 6 service that runs on one node in a cluster. CO observes cluster health (aggregated) From 04a4bcc7cc640f11915de21b973dee189081a6dc Mon Sep 17 00:00:00 2001 From: Sidhant Bhatia Date: Wed, 20 Dec 2023 18:36:20 -0500 Subject: [PATCH 08/10] adding warning health event for wrong plugin --- Documentation/Plugins.md | 2 +- FabricObserver.Extensibility.nuspec.template | 2 ++ FabricObserver.nuspec.template | 1 + FabricObserver/FabricObserver.cs | 32 ++++++++++++++++++-- 4 files changed, 34 insertions(+), 3 deletions(-) diff --git a/Documentation/Plugins.md b/Documentation/Plugins.md index 7d13ed1d..b3c4577d 100644 --- a/Documentation/Plugins.md +++ b/Documentation/Plugins.md @@ -52,7 +52,7 @@ You must implement ObserverBase's two abstract functions: } ``` -5. Build your observer project, drop the output dll and *ALL* of its dependencies, both managed and native (this is *very* important), into the Config/Data/Plugins folder in FabricObserver/PackageRoot. +5. Build your observer project, drop the output dll and *ALL* of its dependencies, both managed and native (this is *very* important), into the Data/Plugins folder in FabricObserver/PackageRoot. You can place your plugin dll and all of its dependencies in its own (*same*) folder under the Plugins directory (useful if you have multiple plugins). Again, ALL plugin dll dependencies (and their dependencies, if any) need to live in the *same* folder as the plugin dll. diff --git a/FabricObserver.Extensibility.nuspec.template b/FabricObserver.Extensibility.nuspec.template index 18785c3a..c829ffe8 100644 --- a/FabricObserver.Extensibility.nuspec.template +++ b/FabricObserver.Extensibility.nuspec.template @@ -5,6 +5,8 @@ 3.2.12 Note: This is library is required for observer plugins that target FabricObserver 3.2.12. +- Updated LVID count monitor to support ESE database performance category change in SF 10.x versions. +- Adding a warning health event to inform customer if there is an issue with loading plugins. Microsoft MIT diff --git a/FabricObserver.nuspec.template b/FabricObserver.nuspec.template index 66b09711..d6697888 100644 --- a/FabricObserver.nuspec.template +++ b/FabricObserver.nuspec.template @@ -5,6 +5,7 @@ 3.2.12 - ESE LVID count monitor updated to support ESE database performance category change in SF 10.x versions. +- Adding a warning health event to inform customer if there is an issue with loading plugins. Microsoft MIT diff --git a/FabricObserver/FabricObserver.cs b/FabricObserver/FabricObserver.cs index 6afbdde8..05a103bc 100644 --- a/FabricObserver/FabricObserver.cs +++ b/FabricObserver/FabricObserver.cs @@ -11,6 +11,7 @@ using System.Threading; using System.Threading.Tasks; using FabricObserver.Observers; +using FabricObserver.Observers.Utilities; using FabricObserver.Utilities; using McMaster.NETCore.Plugins; using Microsoft.Extensions.DependencyInjection; @@ -24,6 +25,7 @@ namespace FabricObserver internal sealed class FabricObserverService : StatelessService { private ObserverManager observerManager; + readonly Logger logger; /// /// Initializes a new instance of the type. @@ -31,7 +33,7 @@ internal sealed class FabricObserverService : StatelessService /// StatelessServiceContext instance. public FabricObserverService(StatelessServiceContext context) : base(context) { - + logger = new Logger("FabricObserverService"); } /// @@ -108,10 +110,11 @@ private void LoadObserversFromPlugins(IServiceCollection services) PluginLoader[] pluginLoaders = new PluginLoader[pluginDlls.Length]; Type[] sharedTypes = { typeof(FabricObserverStartupAttribute), typeof(IFabricObserverStartup), typeof(IServiceCollection) }; + string dll = ""; for (int i = 0; i < pluginDlls.Length; ++i) { - string dll = pluginDlls[i]; + dll = pluginDlls[i]; PluginLoader loader = PluginLoader.CreateFromAssemblyFile(dll, sharedTypes, a => a.IsUnloadable = false); pluginLoaders[i] = loader; } @@ -148,8 +151,33 @@ private void LoadObserversFromPlugins(IServiceCollection services) } catch (Exception e) when (e is ArgumentException or BadImageFormatException or IOException) { + if(e is IOException) + { + string error = $"Plugin dll {dll} could not be loaded. {e.Message}"; + HealthReport healthReport = new() + { + AppName = new Uri($"{Context.CodePackageActivationContext.ApplicationName}"), + EmitLogEvent = true, + HealthMessage = error, + EntityType = Observers.Utilities.Telemetry.EntityType.Application, + HealthReportTimeToLive = TimeSpan.FromMinutes(10), + State = System.Fabric.Health.HealthState.Warning, + Property = "FabricObserverPluginLoadError", + SourceId = $"FabricObserverService-{Context.NodeContext.NodeName}", + NodeName = Context.NodeContext.NodeName, + }; + + ObserverHealthReporter observerHealth = new(logger); + observerHealth.ReportHealthToServiceFabric(healthReport); + } + continue; } + catch (Exception e) when (e is not OutOfMemoryException) + { + logger.LogError($"Unhandled exception in FabricObserverService Instance: {e.Message}"); + throw; + } } } } From 56be930a6c6355a7188ed4d40e704ae5030d2ad1 Mon Sep 17 00:00:00 2001 From: Sidhant Bhatia Date: Thu, 21 Dec 2023 14:49:06 -0500 Subject: [PATCH 09/10] added missing access modifier --- FabricObserver/FabricObserver.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/FabricObserver/FabricObserver.cs b/FabricObserver/FabricObserver.cs index 05a103bc..511e460c 100644 --- a/FabricObserver/FabricObserver.cs +++ b/FabricObserver/FabricObserver.cs @@ -25,7 +25,7 @@ namespace FabricObserver internal sealed class FabricObserverService : StatelessService { private ObserverManager observerManager; - readonly Logger logger; + private readonly Logger logger; /// /// Initializes a new instance of the type. From 1625b852cfaa20d781cf2dd0f21dd382ceca40a7 Mon Sep 17 00:00:00 2001 From: Charles Torre Date: Thu, 21 Dec 2023 14:08:40 -0800 Subject: [PATCH 10/10] Formatting --- FabricObserver/FabricObserver.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/FabricObserver/FabricObserver.cs b/FabricObserver/FabricObserver.cs index 511e460c..6906356b 100644 --- a/FabricObserver/FabricObserver.cs +++ b/FabricObserver/FabricObserver.cs @@ -151,7 +151,7 @@ private void LoadObserversFromPlugins(IServiceCollection services) } catch (Exception e) when (e is ArgumentException or BadImageFormatException or IOException) { - if(e is IOException) + if (e is IOException) { string error = $"Plugin dll {dll} could not be loaded. {e.Message}"; HealthReport healthReport = new()