Skip to content

Commit

Permalink
Calculating and updtating cluster and volume alert_count in each clus…
Browse files Browse the repository at this point in the history
…ter sync

tendrl-bug-id: #598

Signed-off-by: GowthamShanmugasundaram <[email protected]>
  • Loading branch information
GowthamShanmugam committed Apr 3, 2018
1 parent deb9c4c commit 7a3ad82
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 44 deletions.
12 changes: 12 additions & 0 deletions tendrl/gluster_integration/objects/definition/gluster.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,18 @@ namespace.gluster:
volume_id:
help: "Id of the volume"
type: String
relationship:
utilization:
- volume_utilization
status:
- volume_status
- volume_state
- brick_status
- quorum
- ec_min_bricks_up
- afr_quorum_state
- afr_subvol_state
- georep_status
value: /clusters/{0}/Volumes/{1}
list: /clusters/{0}/Volumes/{1}
help: "Volume Alert Counter"
Expand Down
102 changes: 58 additions & 44 deletions tendrl/gluster_integration/sds_sync/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@

from tendrl.commons.event import Event
from tendrl.commons.message import ExceptionMessage
from tendrl.commons.objects.cluster_alert_counters import \
ClusterAlertCounters
from tendrl.commons import sds_sync
from tendrl.commons.utils import cmd_utils
from tendrl.commons.utils import etcd_utils
Expand Down Expand Up @@ -78,21 +76,6 @@ def run(self):
NS.publisher_id,
{"message": "Failed to sync cluster network details"}
)

if NS.tendrl_context.integration_id:
# Initialize alert node alert count
try:
key = 'clusters/%s/nodes/%s/alert_counters' % (
NS.tendrl_context.integration_id,
NS.node_context.node_id
)
etcd_utils.read(key)
except(etcd.EtcdException)as ex:
if type(ex) == etcd.EtcdKeyNotFound:
NS.tendrl.objects.ClusterNodeAlertCounters(
node_id=NS.node_context.node_id,
integration_id=NS.tendrl_context.integration_id
).save()
_sleep = 0
while not self._complete.is_set():
# To detect out of band deletes
Expand Down Expand Up @@ -295,7 +278,8 @@ def run(self):
"sync_interval", 10
)) + len(volumes) * 4
)

# update alert count
update_cluster_alert_count()
# check and enable volume profiling
if "provisioner/%s" % NS.tendrl_context.integration_id in \
NS.node_context.tags:
Expand All @@ -319,17 +303,6 @@ def run(self):
) in ['', 'finished', 'failed'] and \
_cluster.status in [None, ""]:
_cluster.save()
# Initialize alert count
try:
alerts_count_key = '/clusters/%s/alert_counters' % (
NS.tendrl_context.integration_id)
etcd_utils.read(alerts_count_key)
except(etcd.EtcdException)as ex:
if type(ex) == etcd.EtcdKeyNotFound:
ClusterAlertCounters(
integration_id=NS.tendrl_context.integration_id
).save()

except Exception as ex:
Event(
ExceptionMessage(
Expand Down Expand Up @@ -543,21 +516,6 @@ def sync_volumes(volumes, index, vol_options, sync_ttl):
}
)
volume.save(ttl=sync_ttl)

# Initialize volume alert count
try:
volume_alert_count_key = '/clusters/%s/Volumes/%s/'\
'alert_counters' % (
NS.tendrl_context.integration_id,
volumes['volume%s.id' % index]
)
etcd_utils.read(volume_alert_count_key)
except(etcd.EtcdException)as ex:
if type(ex) == etcd.EtcdKeyNotFound:
NS.gluster.objects.VolumeAlertCounters(
integration_id=NS.tendrl_context.integration_id,
volume_id=volumes['volume%s.id' % index]
).save()
# Save the default values of volume options
vol_opt_dict = {}
for opt_count in \
Expand Down Expand Up @@ -862,3 +820,59 @@ def brick_status_alert(hostname):
finally:
if isinstance(lock, etcd.lock.Lock) and lock.is_acquired:
lock.release()


def update_cluster_alert_count():
cluster_alert_count = 0
severity = ["WARNING", "CRITICAL"]
try:
alert_counts = find_volume_id()
alerts_arr = NS.tendrl.objects.ClusterAlert(
tags={'integration_id': NS.tendrl_context.integration_id}
).load_all()
for alert in alerts_arr:
alert.tags = json.loads(alert.tags)
if alert.severity in severity:
cluster_alert_count += 1
if alert.resource in NS.gluster.objects.VolumeAlertCounters(
)._defs['relationship'][alert.alert_type.lower()]:
vol_name = alert.tags.get('volume_name', None)
if vol_name:
if vol_name in alert_counts.keys():
alert_counts[vol_name]['alert_count'] += 1
# Update cluster alert count
NS.tendrl.objects.ClusterAlertCounters(
integration_id=NS.tendrl_context.integration_id,
alert_count=cluster_alert_count
).save()
# Update volume alert count
for volume in alert_counts:
NS.gluster.objects.VolumeAlertCounters(
integration_id=NS.tendrl_context.integration_id,
alert_count=alert_counts[volume]['alert_count'],
volume_id=alert_counts[volume]['vol_id']
).save()
except etcd.EtcdException as ex:
logger.log(
"debug",
NS.publisher_id,
{"message": "Unable to update alert count.err: %s" % ex}
)


def find_volume_id():
alert_counts = {}
volumes = etcd_utils.read(
"clusters/%s/Volumes" % NS.tendrl_context.integration_id
)
for volume in volumes.leaves:
try:
volume_id = volume.key.split("/")[-1]
key = volume.key + "/name"
vol_name = etcd_utils.read(key).value
alert_counts[vol_name] = {}
alert_counts[vol_name]['vol_id'] = volume_id
alert_counts[vol_name]['alert_count'] = 0
except etcd.EtcdKeyNotFound:
continue
return alert_counts

0 comments on commit 7a3ad82

Please sign in to comment.