From 7a3ad825c0c6c03db6d4cf989b16810ff68d2745 Mon Sep 17 00:00:00 2001 From: GowthamShanmugasundaram Date: Wed, 28 Mar 2018 20:04:22 +0530 Subject: [PATCH] Calculating and updtating cluster and volume alert_count in each cluster sync tendrl-bug-id: Tendrl/gluster-integration#598 Signed-off-by: GowthamShanmugasundaram --- .../objects/definition/gluster.yaml | 12 +++ .../gluster_integration/sds_sync/__init__.py | 102 ++++++++++-------- 2 files changed, 70 insertions(+), 44 deletions(-) diff --git a/tendrl/gluster_integration/objects/definition/gluster.yaml b/tendrl/gluster_integration/objects/definition/gluster.yaml index 888b4c2..13fc213 100644 --- a/tendrl/gluster_integration/objects/definition/gluster.yaml +++ b/tendrl/gluster_integration/objects/definition/gluster.yaml @@ -135,6 +135,18 @@ namespace.gluster: volume_id: help: "Id of the volume" type: String + relationship: + utilization: + - volume_utilization + status: + - volume_status + - volume_state + - brick_status + - quorum + - ec_min_bricks_up + - afr_quorum_state + - afr_subvol_state + - georep_status value: /clusters/{0}/Volumes/{1} list: /clusters/{0}/Volumes/{1} help: "Volume Alert Counter" diff --git a/tendrl/gluster_integration/sds_sync/__init__.py b/tendrl/gluster_integration/sds_sync/__init__.py index b34d751..2016065 100644 --- a/tendrl/gluster_integration/sds_sync/__init__.py +++ b/tendrl/gluster_integration/sds_sync/__init__.py @@ -9,8 +9,6 @@ from tendrl.commons.event import Event from tendrl.commons.message import ExceptionMessage -from tendrl.commons.objects.cluster_alert_counters import \ - ClusterAlertCounters from tendrl.commons import sds_sync from tendrl.commons.utils import cmd_utils from tendrl.commons.utils import etcd_utils @@ -78,21 +76,6 @@ def run(self): NS.publisher_id, {"message": "Failed to sync cluster network details"} ) - - if NS.tendrl_context.integration_id: - # Initialize alert node alert count - try: - key = 'clusters/%s/nodes/%s/alert_counters' % ( - NS.tendrl_context.integration_id, - NS.node_context.node_id - ) - etcd_utils.read(key) - except(etcd.EtcdException)as ex: - if type(ex) == etcd.EtcdKeyNotFound: - NS.tendrl.objects.ClusterNodeAlertCounters( - node_id=NS.node_context.node_id, - integration_id=NS.tendrl_context.integration_id - ).save() _sleep = 0 while not self._complete.is_set(): # To detect out of band deletes @@ -295,7 +278,8 @@ def run(self): "sync_interval", 10 )) + len(volumes) * 4 ) - + # update alert count + update_cluster_alert_count() # check and enable volume profiling if "provisioner/%s" % NS.tendrl_context.integration_id in \ NS.node_context.tags: @@ -319,17 +303,6 @@ def run(self): ) in ['', 'finished', 'failed'] and \ _cluster.status in [None, ""]: _cluster.save() - # Initialize alert count - try: - alerts_count_key = '/clusters/%s/alert_counters' % ( - NS.tendrl_context.integration_id) - etcd_utils.read(alerts_count_key) - except(etcd.EtcdException)as ex: - if type(ex) == etcd.EtcdKeyNotFound: - ClusterAlertCounters( - integration_id=NS.tendrl_context.integration_id - ).save() - except Exception as ex: Event( ExceptionMessage( @@ -543,21 +516,6 @@ def sync_volumes(volumes, index, vol_options, sync_ttl): } ) volume.save(ttl=sync_ttl) - - # Initialize volume alert count - try: - volume_alert_count_key = '/clusters/%s/Volumes/%s/'\ - 'alert_counters' % ( - NS.tendrl_context.integration_id, - volumes['volume%s.id' % index] - ) - etcd_utils.read(volume_alert_count_key) - except(etcd.EtcdException)as ex: - if type(ex) == etcd.EtcdKeyNotFound: - NS.gluster.objects.VolumeAlertCounters( - integration_id=NS.tendrl_context.integration_id, - volume_id=volumes['volume%s.id' % index] - ).save() # Save the default values of volume options vol_opt_dict = {} for opt_count in \ @@ -862,3 +820,59 @@ def brick_status_alert(hostname): finally: if isinstance(lock, etcd.lock.Lock) and lock.is_acquired: lock.release() + + +def update_cluster_alert_count(): + cluster_alert_count = 0 + severity = ["WARNING", "CRITICAL"] + try: + alert_counts = find_volume_id() + alerts_arr = NS.tendrl.objects.ClusterAlert( + tags={'integration_id': NS.tendrl_context.integration_id} + ).load_all() + for alert in alerts_arr: + alert.tags = json.loads(alert.tags) + if alert.severity in severity: + cluster_alert_count += 1 + if alert.resource in NS.gluster.objects.VolumeAlertCounters( + )._defs['relationship'][alert.alert_type.lower()]: + vol_name = alert.tags.get('volume_name', None) + if vol_name: + if vol_name in alert_counts.keys(): + alert_counts[vol_name]['alert_count'] += 1 + # Update cluster alert count + NS.tendrl.objects.ClusterAlertCounters( + integration_id=NS.tendrl_context.integration_id, + alert_count=cluster_alert_count + ).save() + # Update volume alert count + for volume in alert_counts: + NS.gluster.objects.VolumeAlertCounters( + integration_id=NS.tendrl_context.integration_id, + alert_count=alert_counts[volume]['alert_count'], + volume_id=alert_counts[volume]['vol_id'] + ).save() + except etcd.EtcdException as ex: + logger.log( + "debug", + NS.publisher_id, + {"message": "Unable to update alert count.err: %s" % ex} + ) + + +def find_volume_id(): + alert_counts = {} + volumes = etcd_utils.read( + "clusters/%s/Volumes" % NS.tendrl_context.integration_id + ) + for volume in volumes.leaves: + try: + volume_id = volume.key.split("/")[-1] + key = volume.key + "/name" + vol_name = etcd_utils.read(key).value + alert_counts[vol_name] = {} + alert_counts[vol_name]['vol_id'] = volume_id + alert_counts[vol_name]['alert_count'] = 0 + except etcd.EtcdKeyNotFound: + continue + return alert_counts