From c1fda5181ba74d819f6f729a8eb776d5f5d6fe04 Mon Sep 17 00:00:00 2001 From: GowthamShanmugasundaram Date: Wed, 28 Mar 2018 20:04:22 +0530 Subject: [PATCH 1/3] Calculating and updtating cluster and volume alert_count in each cluster sync tendrl-bug-id: Tendrl/gluster-integration#598 Signed-off-by: GowthamShanmugasundaram --- .../objects/definition/gluster.yaml | 12 +++ .../gluster_integration/sds_sync/__init__.py | 100 ++++++++++-------- 2 files changed, 70 insertions(+), 42 deletions(-) diff --git a/tendrl/gluster_integration/objects/definition/gluster.yaml b/tendrl/gluster_integration/objects/definition/gluster.yaml index 5fee319..02ca5d8 100644 --- a/tendrl/gluster_integration/objects/definition/gluster.yaml +++ b/tendrl/gluster_integration/objects/definition/gluster.yaml @@ -108,6 +108,18 @@ namespace.gluster: volume_id: help: "Id of the volume" type: String + relationship: + utilization: + - volume_utilization + status: + - volume_status + - volume_state + - brick_status + - quorum + - ec_min_bricks_up + - afr_quorum_state + - afr_subvol_state + - georep_status value: /clusters/{0}/Volumes/{1} list: /clusters/{0}/Volumes/{1} help: "Volume Alert Counter" diff --git a/tendrl/gluster_integration/sds_sync/__init__.py b/tendrl/gluster_integration/sds_sync/__init__.py index 8c87ef2..db11da9 100644 --- a/tendrl/gluster_integration/sds_sync/__init__.py +++ b/tendrl/gluster_integration/sds_sync/__init__.py @@ -76,21 +76,6 @@ def run(self): NS.publisher_id, {"message": "Failed to sync cluster network details"} ) - - if NS.tendrl_context.integration_id: - # Initialize alert node alert count - try: - key = 'clusters/%s/nodes/%s/alert_counters' % ( - NS.tendrl_context.integration_id, - NS.node_context.node_id - ) - etcd_utils.read(key) - except(etcd.EtcdException)as ex: - if type(ex) == etcd.EtcdKeyNotFound: - NS.tendrl.objects.ClusterNodeAlertCounters( - node_id=NS.node_context.node_id, - integration_id=NS.tendrl_context.integration_id - ).save() _sleep = 0 while not self._complete.is_set(): # To detect out of band deletes @@ -294,7 +279,8 @@ def run(self): "sync_interval", 10 )) + len(volumes) * 4 ) - + # update alert count + update_cluster_alert_count() # check and enable volume profiling if "provisioner/%s" % NS.tendrl_context.integration_id in \ NS.node_context.tags: @@ -318,17 +304,6 @@ def run(self): ) in ['', 'finished', 'failed'] and \ _cluster.status in [None, ""]: _cluster.save() - # Initialize alert count - try: - alerts_count_key = '/clusters/%s/alert_counters' % ( - NS.tendrl_context.integration_id) - etcd_utils.read(alerts_count_key) - except(etcd.EtcdException)as ex: - if type(ex) == etcd.EtcdKeyNotFound: - NS.tendrl.objects.ClusterAlertCounters( - integration_id=NS.tendrl_context.integration_id - ).save() - except Exception as ex: Event( ExceptionMessage( @@ -542,21 +517,6 @@ def sync_volumes(volumes, index, vol_options, sync_ttl): } ) volume.save(ttl=sync_ttl) - - # Initialize volume alert count - try: - volume_alert_count_key = '/clusters/%s/Volumes/%s/'\ - 'alert_counters' % ( - NS.tendrl_context.integration_id, - volumes['volume%s.id' % index] - ) - etcd_utils.read(volume_alert_count_key) - except(etcd.EtcdException)as ex: - if type(ex) == etcd.EtcdKeyNotFound: - NS.gluster.objects.VolumeAlertCounters( - integration_id=NS.tendrl_context.integration_id, - volume_id=volumes['volume%s.id' % index] - ).save() # Save the default values of volume options vol_opt_dict = {} for opt_count in \ @@ -861,3 +821,59 @@ def brick_status_alert(hostname): finally: if isinstance(lock, etcd.lock.Lock) and lock.is_acquired: lock.release() + + +def update_cluster_alert_count(): + cluster_alert_count = 0 + severity = ["WARNING", "CRITICAL"] + try: + alert_counts = find_volume_id() + alerts_arr = NS.tendrl.objects.ClusterAlert( + tags={'integration_id': NS.tendrl_context.integration_id} + ).load_all() + for alert in alerts_arr: + alert.tags = json.loads(alert.tags) + if alert.severity in severity: + cluster_alert_count += 1 + if alert.resource in NS.gluster.objects.VolumeAlertCounters( + )._defs['relationship'][alert.alert_type.lower()]: + vol_name = alert.tags.get('volume_name', None) + if vol_name: + if vol_name in alert_counts.keys(): + alert_counts[vol_name]['alert_count'] += 1 + # Update cluster alert count + NS.tendrl.objects.ClusterAlertCounters( + integration_id=NS.tendrl_context.integration_id, + alert_count=cluster_alert_count + ).save() + # Update volume alert count + for volume in alert_counts: + NS.gluster.objects.VolumeAlertCounters( + integration_id=NS.tendrl_context.integration_id, + alert_count=alert_counts[volume]['alert_count'], + volume_id=alert_counts[volume]['vol_id'] + ).save() + except etcd.EtcdException as ex: + logger.log( + "debug", + NS.publisher_id, + {"message": "Unable to update alert count.err: %s" % ex} + ) + + +def find_volume_id(): + alert_counts = {} + volumes = etcd_utils.read( + "clusters/%s/Volumes" % NS.tendrl_context.integration_id + ) + for volume in volumes.leaves: + try: + volume_id = volume.key.split("/")[-1] + key = volume.key + "/name" + vol_name = etcd_utils.read(key).value + alert_counts[vol_name] = {} + alert_counts[vol_name]['vol_id'] = volume_id + alert_counts[vol_name]['alert_count'] = 0 + except etcd.EtcdKeyNotFound: + continue + return alert_counts From 2c91b7e97ff6ef74c37ffa628acd859a45530f1d Mon Sep 17 00:00:00 2001 From: GowthamShanmugasundaram Date: Thu, 29 Mar 2018 13:43:05 +0530 Subject: [PATCH 2/3] Modified code as per review comment tendrl-bug-id: Tendrl/gluster-integration#598 Signed-off-by: GowthamShanmugasundaram --- .../gluster_integration/sds_sync/__init__.py | 35 +++++++------------ 1 file changed, 13 insertions(+), 22 deletions(-) diff --git a/tendrl/gluster_integration/sds_sync/__init__.py b/tendrl/gluster_integration/sds_sync/__init__.py index db11da9..1acce57 100644 --- a/tendrl/gluster_integration/sds_sync/__init__.py +++ b/tendrl/gluster_integration/sds_sync/__init__.py @@ -827,7 +827,7 @@ def update_cluster_alert_count(): cluster_alert_count = 0 severity = ["WARNING", "CRITICAL"] try: - alert_counts = find_volume_id() + alert_counts = get_volume_alert_counts() alerts_arr = NS.tendrl.objects.ClusterAlert( tags={'integration_id': NS.tendrl_context.integration_id} ).load_all() @@ -838,22 +838,21 @@ def update_cluster_alert_count(): if alert.resource in NS.gluster.objects.VolumeAlertCounters( )._defs['relationship'][alert.alert_type.lower()]: vol_name = alert.tags.get('volume_name', None) - if vol_name: - if vol_name in alert_counts.keys(): - alert_counts[vol_name]['alert_count'] += 1 + if vol_name and vol_name in alert_counts.keys(): + alert_counts[vol_name]['alert_count'] += 1 # Update cluster alert count NS.tendrl.objects.ClusterAlertCounters( integration_id=NS.tendrl_context.integration_id, alert_count=cluster_alert_count ).save() # Update volume alert count - for volume in alert_counts: + for volume, vol_dict in alert_counts.iteritems(): NS.gluster.objects.VolumeAlertCounters( integration_id=NS.tendrl_context.integration_id, - alert_count=alert_counts[volume]['alert_count'], - volume_id=alert_counts[volume]['vol_id'] + alert_count=vol_dict['alert_count'], + volume_id=vol_dict['vol_id'] ).save() - except etcd.EtcdException as ex: + except (etcd.EtcdException, AttributeError) as ex: logger.log( "debug", NS.publisher_id, @@ -861,19 +860,11 @@ def update_cluster_alert_count(): ) -def find_volume_id(): +def get_volume_alert_counts(): alert_counts = {} - volumes = etcd_utils.read( - "clusters/%s/Volumes" % NS.tendrl_context.integration_id - ) - for volume in volumes.leaves: - try: - volume_id = volume.key.split("/")[-1] - key = volume.key + "/name" - vol_name = etcd_utils.read(key).value - alert_counts[vol_name] = {} - alert_counts[vol_name]['vol_id'] = volume_id - alert_counts[vol_name]['alert_count'] = 0 - except etcd.EtcdKeyNotFound: - continue + volumes = NS.gluster.objects.Volume().load_all() + for volume in volumes: + alert_counts[volume.name] = {'vol_id': volume.vol_id, + 'alert_count': 0 + } return alert_counts From 4787a924f117e8f0c2fcbd56be0fbb04ddee3c97 Mon Sep 17 00:00:00 2001 From: GowthamShanmugasundaram Date: Wed, 4 Apr 2018 11:00:08 +0530 Subject: [PATCH 3/3] Modified code as per review comment tendrl-bug-id: Tendrl/gluster-integration#598 Signed-off-by: GowthamShanmugasundaram --- tendrl/gluster_integration/sds_sync/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tendrl/gluster_integration/sds_sync/__init__.py b/tendrl/gluster_integration/sds_sync/__init__.py index 1acce57..b63b6b6 100644 --- a/tendrl/gluster_integration/sds_sync/__init__.py +++ b/tendrl/gluster_integration/sds_sync/__init__.py @@ -828,10 +828,10 @@ def update_cluster_alert_count(): severity = ["WARNING", "CRITICAL"] try: alert_counts = get_volume_alert_counts() - alerts_arr = NS.tendrl.objects.ClusterAlert( + alerts = NS.tendrl.objects.ClusterAlert( tags={'integration_id': NS.tendrl_context.integration_id} ).load_all() - for alert in alerts_arr: + for alert in alerts: alert.tags = json.loads(alert.tags) if alert.severity in severity: cluster_alert_count += 1