diff --git a/tendrl/gluster_integration/objects/definition/gluster.yaml b/tendrl/gluster_integration/objects/definition/gluster.yaml index 5fee319..02ca5d8 100644 --- a/tendrl/gluster_integration/objects/definition/gluster.yaml +++ b/tendrl/gluster_integration/objects/definition/gluster.yaml @@ -108,6 +108,18 @@ namespace.gluster: volume_id: help: "Id of the volume" type: String + relationship: + utilization: + - volume_utilization + status: + - volume_status + - volume_state + - brick_status + - quorum + - ec_min_bricks_up + - afr_quorum_state + - afr_subvol_state + - georep_status value: /clusters/{0}/Volumes/{1} list: /clusters/{0}/Volumes/{1} help: "Volume Alert Counter" diff --git a/tendrl/gluster_integration/sds_sync/__init__.py b/tendrl/gluster_integration/sds_sync/__init__.py index 8c87ef2..b63b6b6 100644 --- a/tendrl/gluster_integration/sds_sync/__init__.py +++ b/tendrl/gluster_integration/sds_sync/__init__.py @@ -76,21 +76,6 @@ def run(self): NS.publisher_id, {"message": "Failed to sync cluster network details"} ) - - if NS.tendrl_context.integration_id: - # Initialize alert node alert count - try: - key = 'clusters/%s/nodes/%s/alert_counters' % ( - NS.tendrl_context.integration_id, - NS.node_context.node_id - ) - etcd_utils.read(key) - except(etcd.EtcdException)as ex: - if type(ex) == etcd.EtcdKeyNotFound: - NS.tendrl.objects.ClusterNodeAlertCounters( - node_id=NS.node_context.node_id, - integration_id=NS.tendrl_context.integration_id - ).save() _sleep = 0 while not self._complete.is_set(): # To detect out of band deletes @@ -294,7 +279,8 @@ def run(self): "sync_interval", 10 )) + len(volumes) * 4 ) - + # update alert count + update_cluster_alert_count() # check and enable volume profiling if "provisioner/%s" % NS.tendrl_context.integration_id in \ NS.node_context.tags: @@ -318,17 +304,6 @@ def run(self): ) in ['', 'finished', 'failed'] and \ _cluster.status in [None, ""]: _cluster.save() - # Initialize alert count - try: - alerts_count_key = '/clusters/%s/alert_counters' % ( - NS.tendrl_context.integration_id) - etcd_utils.read(alerts_count_key) - except(etcd.EtcdException)as ex: - if type(ex) == etcd.EtcdKeyNotFound: - NS.tendrl.objects.ClusterAlertCounters( - integration_id=NS.tendrl_context.integration_id - ).save() - except Exception as ex: Event( ExceptionMessage( @@ -542,21 +517,6 @@ def sync_volumes(volumes, index, vol_options, sync_ttl): } ) volume.save(ttl=sync_ttl) - - # Initialize volume alert count - try: - volume_alert_count_key = '/clusters/%s/Volumes/%s/'\ - 'alert_counters' % ( - NS.tendrl_context.integration_id, - volumes['volume%s.id' % index] - ) - etcd_utils.read(volume_alert_count_key) - except(etcd.EtcdException)as ex: - if type(ex) == etcd.EtcdKeyNotFound: - NS.gluster.objects.VolumeAlertCounters( - integration_id=NS.tendrl_context.integration_id, - volume_id=volumes['volume%s.id' % index] - ).save() # Save the default values of volume options vol_opt_dict = {} for opt_count in \ @@ -861,3 +821,50 @@ def brick_status_alert(hostname): finally: if isinstance(lock, etcd.lock.Lock) and lock.is_acquired: lock.release() + + +def update_cluster_alert_count(): + cluster_alert_count = 0 + severity = ["WARNING", "CRITICAL"] + try: + alert_counts = get_volume_alert_counts() + alerts = NS.tendrl.objects.ClusterAlert( + tags={'integration_id': NS.tendrl_context.integration_id} + ).load_all() + for alert in alerts: + alert.tags = json.loads(alert.tags) + if alert.severity in severity: + cluster_alert_count += 1 + if alert.resource in NS.gluster.objects.VolumeAlertCounters( + )._defs['relationship'][alert.alert_type.lower()]: + vol_name = alert.tags.get('volume_name', None) + if vol_name and vol_name in alert_counts.keys(): + alert_counts[vol_name]['alert_count'] += 1 + # Update cluster alert count + NS.tendrl.objects.ClusterAlertCounters( + integration_id=NS.tendrl_context.integration_id, + alert_count=cluster_alert_count + ).save() + # Update volume alert count + for volume, vol_dict in alert_counts.iteritems(): + NS.gluster.objects.VolumeAlertCounters( + integration_id=NS.tendrl_context.integration_id, + alert_count=vol_dict['alert_count'], + volume_id=vol_dict['vol_id'] + ).save() + except (etcd.EtcdException, AttributeError) as ex: + logger.log( + "debug", + NS.publisher_id, + {"message": "Unable to update alert count.err: %s" % ex} + ) + + +def get_volume_alert_counts(): + alert_counts = {} + volumes = NS.gluster.objects.Volume().load_all() + for volume in volumes: + alert_counts[volume.name] = {'vol_id': volume.vol_id, + 'alert_count': 0 + } + return alert_counts