From c54c61e5a1f7cfbd21e43846d07aae12eab4f8c7 Mon Sep 17 00:00:00 2001
From: Justyna Betkier <jbetkier@google.com>
Date: Mon, 30 Dec 2024 14:13:36 +0100
Subject: [PATCH] Improve logging when the cluster reaches max nodes total.

- add autoscaling status to reflect that
- change the log severity to warning as this means that autoscaler will
  not be fully functional (in praticular scaling up will not work)
---
 cluster-autoscaler/core/static_autoscaler.go                  | 4 ++--
 .../processors/status/scale_up_status_processor.go            | 2 ++
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/cluster-autoscaler/core/static_autoscaler.go b/cluster-autoscaler/core/static_autoscaler.go
index 0b075640b22f..e8452a8c6522 100644
--- a/cluster-autoscaler/core/static_autoscaler.go
+++ b/cluster-autoscaler/core/static_autoscaler.go
@@ -524,8 +524,8 @@ func (a *StaticAutoscaler) RunOnce(currentTime time.Time) caerrors.AutoscalerErr
 		scaleUpStatus.Result = status.ScaleUpNotNeeded
 		klog.V(1).Info("No unschedulable pods")
 	} else if a.MaxNodesTotal > 0 && len(readyNodes) >= a.MaxNodesTotal {
-		scaleUpStatus.Result = status.ScaleUpNoOptionsAvailable
-		klog.V(1).Infof("Max total nodes in cluster reached: %v. Current number of ready nodes: %v", a.MaxNodesTotal, len(readyNodes))
+		scaleUpStatus.Result = status.ScaleUpLimitedByMaxNodesTotal
+		klog.Warningf("Max total nodes in cluster reached: %v. Current number of ready nodes: %v", a.MaxNodesTotal, len(readyNodes))
 	} else if len(a.BypassedSchedulers) == 0 && allPodsAreNew(unschedulablePodsToHelp, currentTime) {
 		// The assumption here is that these pods have been created very recently and probably there
 		// is more pods to come. In theory we could check the newest pod time but then if pod were created
diff --git a/cluster-autoscaler/processors/status/scale_up_status_processor.go b/cluster-autoscaler/processors/status/scale_up_status_processor.go
index 2bd48ba1ce45..708bb0e232ba 100644
--- a/cluster-autoscaler/processors/status/scale_up_status_processor.go
+++ b/cluster-autoscaler/processors/status/scale_up_status_processor.go
@@ -66,6 +66,8 @@ const (
 	ScaleUpNotTried
 	// ScaleUpInCooldown - the scale up wasn't even attempted, because it's in a cooldown state (it's suspended for a scheduled period of time).
 	ScaleUpInCooldown
+	// ScaleUpLimitedByMaxNodesTotal - the scale up wasn't attempted, because the cluster reached max nodes total
+	ScaleUpLimitedByMaxNodesTotal
 )
 
 // WasSuccessful returns true if the scale-up was successful.