Skip to content

Commit

Permalink
Merge pull request #599 from GowthamShanmugam/volume_alert_count_fix
Browse files Browse the repository at this point in the history
Calculating and updtating cluster and volume alert_count in each cluster sync
  • Loading branch information
r0h4n authored Apr 9, 2018
2 parents 2abb431 + 4787a92 commit 209923a
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 42 deletions.
12 changes: 12 additions & 0 deletions tendrl/gluster_integration/objects/definition/gluster.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,18 @@ namespace.gluster:
volume_id:
help: "Id of the volume"
type: String
relationship:
utilization:
- volume_utilization
status:
- volume_status
- volume_state
- brick_status
- quorum
- ec_min_bricks_up
- afr_quorum_state
- afr_subvol_state
- georep_status
value: /clusters/{0}/Volumes/{1}
list: /clusters/{0}/Volumes/{1}
help: "Volume Alert Counter"
Expand Down
91 changes: 49 additions & 42 deletions tendrl/gluster_integration/sds_sync/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,21 +76,6 @@ def run(self):
NS.publisher_id,
{"message": "Failed to sync cluster network details"}
)

if NS.tendrl_context.integration_id:
# Initialize alert node alert count
try:
key = 'clusters/%s/nodes/%s/alert_counters' % (
NS.tendrl_context.integration_id,
NS.node_context.node_id
)
etcd_utils.read(key)
except(etcd.EtcdException)as ex:
if type(ex) == etcd.EtcdKeyNotFound:
NS.tendrl.objects.ClusterNodeAlertCounters(
node_id=NS.node_context.node_id,
integration_id=NS.tendrl_context.integration_id
).save()
_sleep = 0
while not self._complete.is_set():
# To detect out of band deletes
Expand Down Expand Up @@ -294,7 +279,8 @@ def run(self):
"sync_interval", 10
)) + len(volumes) * 4
)

# update alert count
update_cluster_alert_count()
# check and enable volume profiling
if "provisioner/%s" % NS.tendrl_context.integration_id in \
NS.node_context.tags:
Expand All @@ -318,17 +304,6 @@ def run(self):
) in ['', 'finished', 'failed'] and \
_cluster.status in [None, ""]:
_cluster.save()
# Initialize alert count
try:
alerts_count_key = '/clusters/%s/alert_counters' % (
NS.tendrl_context.integration_id)
etcd_utils.read(alerts_count_key)
except(etcd.EtcdException)as ex:
if type(ex) == etcd.EtcdKeyNotFound:
NS.tendrl.objects.ClusterAlertCounters(
integration_id=NS.tendrl_context.integration_id
).save()

except Exception as ex:
Event(
ExceptionMessage(
Expand Down Expand Up @@ -542,21 +517,6 @@ def sync_volumes(volumes, index, vol_options, sync_ttl):
}
)
volume.save(ttl=sync_ttl)

# Initialize volume alert count
try:
volume_alert_count_key = '/clusters/%s/Volumes/%s/'\
'alert_counters' % (
NS.tendrl_context.integration_id,
volumes['volume%s.id' % index]
)
etcd_utils.read(volume_alert_count_key)
except(etcd.EtcdException)as ex:
if type(ex) == etcd.EtcdKeyNotFound:
NS.gluster.objects.VolumeAlertCounters(
integration_id=NS.tendrl_context.integration_id,
volume_id=volumes['volume%s.id' % index]
).save()
# Save the default values of volume options
vol_opt_dict = {}
for opt_count in \
Expand Down Expand Up @@ -861,3 +821,50 @@ def brick_status_alert(hostname):
finally:
if isinstance(lock, etcd.lock.Lock) and lock.is_acquired:
lock.release()


def update_cluster_alert_count():
cluster_alert_count = 0
severity = ["WARNING", "CRITICAL"]
try:
alert_counts = get_volume_alert_counts()
alerts = NS.tendrl.objects.ClusterAlert(
tags={'integration_id': NS.tendrl_context.integration_id}
).load_all()
for alert in alerts:
alert.tags = json.loads(alert.tags)
if alert.severity in severity:
cluster_alert_count += 1
if alert.resource in NS.gluster.objects.VolumeAlertCounters(
)._defs['relationship'][alert.alert_type.lower()]:
vol_name = alert.tags.get('volume_name', None)
if vol_name and vol_name in alert_counts.keys():
alert_counts[vol_name]['alert_count'] += 1
# Update cluster alert count
NS.tendrl.objects.ClusterAlertCounters(
integration_id=NS.tendrl_context.integration_id,
alert_count=cluster_alert_count
).save()
# Update volume alert count
for volume, vol_dict in alert_counts.iteritems():
NS.gluster.objects.VolumeAlertCounters(
integration_id=NS.tendrl_context.integration_id,
alert_count=vol_dict['alert_count'],
volume_id=vol_dict['vol_id']
).save()
except (etcd.EtcdException, AttributeError) as ex:
logger.log(
"debug",
NS.publisher_id,
{"message": "Unable to update alert count.err: %s" % ex}
)


def get_volume_alert_counts():
alert_counts = {}
volumes = NS.gluster.objects.Volume().load_all()
for volume in volumes:
alert_counts[volume.name] = {'vol_id': volume.vol_id,
'alert_count': 0
}
return alert_counts

0 comments on commit 209923a

Please sign in to comment.