1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146
|
# -*- coding: utf-8 -*-
# Description: zscores netdata python.d module
# Author: andrewm4894
# SPDX-License-Identifier: GPL-3.0-or-later
from datetime import datetime
import re
import requests
import numpy as np
import pandas as pd
from bases.FrameworkServices.SimpleService import SimpleService
from netdata_pandas.data import get_data, get_allmetrics
priority = 60000
update_every = 5
disabled_by_default = True
ORDER = [
'z',
'3stddev'
]
CHARTS = {
'z': {
'options': ['z', 'Z Score', 'z', 'Z Score', 'zscores.z', 'line'],
'lines': []
},
'3stddev': {
'options': ['3stddev', 'Z Score >3', 'count', '3 Stddev', 'zscores.3stddev', 'stacked'],
'lines': []
},
}
class Service(SimpleService):
def __init__(self, configuration=None, name=None):
SimpleService.__init__(self, configuration=configuration, name=name)
self.host = self.configuration.get('host', '127.0.0.1:19999')
self.charts_regex = re.compile(self.configuration.get('charts_regex', 'system.*'))
self.charts_to_exclude = self.configuration.get('charts_to_exclude', '').split(',')
self.charts_in_scope = [
c for c in
list(filter(self.charts_regex.match,
requests.get(f'http://{self.host}/api/v1/charts').json()['charts'].keys()))
if c not in self.charts_to_exclude
]
self.train_secs = self.configuration.get('train_secs', 14400)
self.offset_secs = self.configuration.get('offset_secs', 300)
self.train_every_n = self.configuration.get('train_every_n', 900)
self.z_smooth_n = self.configuration.get('z_smooth_n', 15)
self.z_clip = self.configuration.get('z_clip', 10)
self.z_abs = bool(self.configuration.get('z_abs', True))
self.burn_in = self.configuration.get('burn_in', 2)
self.mode = self.configuration.get('mode', 'per_chart')
self.per_chart_agg = self.configuration.get('per_chart_agg', 'mean')
self.order = ORDER
self.definitions = CHARTS
self.collected_dims = {'z': set(), '3stddev': set()}
self.df_mean = pd.DataFrame()
self.df_std = pd.DataFrame()
self.df_z_history = pd.DataFrame()
def check(self):
_ = get_allmetrics(self.host, self.charts_in_scope, wide=True, col_sep='.')
return True
def validate_charts(self, chart, data, algorithm='absolute', multiplier=1, divisor=1):
"""If dimension not in chart then add it.
"""
for dim in data:
if dim not in self.collected_dims[chart]:
self.collected_dims[chart].add(dim)
self.charts[chart].add_dimension([dim, dim, algorithm, multiplier, divisor])
for dim in list(self.collected_dims[chart]):
if dim not in data:
self.collected_dims[chart].remove(dim)
self.charts[chart].del_dimension(dim, hide=False)
def train_model(self):
"""Calculate the mean and stddev for all relevant metrics and store them for use in calulcating zscore at each timestep.
"""
before = int(datetime.now().timestamp()) - self.offset_secs
after = before - self.train_secs
self.df_mean = get_data(
self.host, self.charts_in_scope, after, before, points=10, group='average', col_sep='.'
).mean().to_frame().rename(columns={0: "mean"})
self.df_std = get_data(
self.host, self.charts_in_scope, after, before, points=10, group='stddev', col_sep='.'
).mean().to_frame().rename(columns={0: "std"})
def create_data(self, df_allmetrics):
"""Use x, mean, stddev to generate z scores and 3stddev flags via some pandas manipulation.
Returning two dictionaries of dimensions and measures, one for each chart.
:param df_allmetrics <pd.DataFrame>: pandas dataframe with latest data from api/v1/allmetrics.
:return: (<dict>,<dict>) tuple of dictionaries, one for zscores and the other for a flag if abs(z)>3.
"""
# calculate clipped z score for each available metric
df_z = pd.concat([self.df_mean, self.df_std, df_allmetrics], axis=1, join='inner')
df_z['z'] = ((df_z['value'] - df_z['mean']) / df_z['std']).clip(-self.z_clip, self.z_clip).fillna(0) * 100
if self.z_abs:
df_z['z'] = df_z['z'].abs()
# append last z_smooth_n rows of zscores to history table in wide format
self.df_z_history = self.df_z_history.append(
df_z[['z']].reset_index().pivot_table(values='z', columns='index'), sort=True
).tail(self.z_smooth_n)
# get average zscore for last z_smooth_n for each metric
df_z_smooth = self.df_z_history.melt(value_name='z').groupby('index')['z'].mean().to_frame()
df_z_smooth['3stddev'] = np.where(abs(df_z_smooth['z']) > 300, 1, 0)
data_z = df_z_smooth['z'].add_suffix('_z').to_dict()
# aggregate to chart level if specified
if self.mode == 'per_chart':
df_z_smooth['chart'] = ['.'.join(x[0:2]) + '_z' for x in df_z_smooth.index.str.split('.').to_list()]
if self.per_chart_agg == 'absmax':
data_z = \
list(df_z_smooth.groupby('chart').agg({'z': lambda x: max(x, key=abs)})['z'].to_dict().values())[0]
else:
data_z = list(df_z_smooth.groupby('chart').agg({'z': [self.per_chart_agg]})['z'].to_dict().values())[0]
data_3stddev = {}
for k in data_z:
data_3stddev[k.replace('_z', '')] = 1 if abs(data_z[k]) > 300 else 0
return data_z, data_3stddev
def get_data(self):
if self.runs_counter <= self.burn_in or self.runs_counter % self.train_every_n == 0:
self.train_model()
data_z, data_3stddev = self.create_data(
get_allmetrics(self.host, self.charts_in_scope, wide=True, col_sep='.').transpose())
data = {**data_z, **data_3stddev}
self.validate_charts('z', data_z, divisor=100)
self.validate_charts('3stddev', data_3stddev)
return data
|