"""
algorithm_scores_plot.py
"""
import logging
import traceback
from os import path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
[docs]def get_algorithm_scores_plot(
current_skyline_app, output_file, timeseries, algorithm, anomalous,
anomalies, scores, anomaly_window=1, anomalies_in_window=None,
unreliable=False, low_entropy_value=None):
"""
Creates a png graph image using the vortex results data.
:param current_skyline_app: the Skyline app name calling the function
:param output_file: full path and filename to output where the png image is
to be saved to
:param timeseries: the time series
:param algorithm: the algorithm
:param anomalous: anomalous
:param anomalies: the anomalies dict
:param scores: the scores list
:param anomaly_window: the anomaly window
:param anomaly_in_window: the number of anomalies in the anomaly_window
:param unreliable: unreliable
:param low_entropy_value: the spectral_entropy low_entropy_value if there is one
:type current_skyline_app: str
:type output_file: str
:type timeseries: list
:type algorithm: str
:type anomalous: boolean
:type anomalies: dict
:type scores: list
:type anomaly_window: int
:type anomaly_in_window: int
:type unreliable: boolean
:type low_entropy_value: float
:return: file
:rtype: boolean|str
"""
current_skyline_app_logger = current_skyline_app + 'Log'
current_logger = logging.getLogger(current_skyline_app_logger)
if path.isfile(output_file):
current_logger.info('get_algorithm_scores_plot - graph image already exists - %s' % output_file)
return output_file
try:
current_logger.info('get_algorithm_scores_plot - creating graph image - %s' % output_file)
params = {
'axes.labelsize': 10,
'axes.titlesize': 10,
'xtick.labelsize': 10,
}
plt.rcParams.update(params)
plt.rcParams['xtick.labelsize'] = 8
plt.rcParams['ytick.labelsize'] = 8
anomalies_indices = []
data = []
for index, item in enumerate(timeseries):
score = 0
anomaly_value = np.nan
try:
score = scores[index]
except:
score = np.nan
try:
ts = int(item[0])
except:
continue
try:
if anomalies[ts]:
anomalies_indices.append(index)
anomaly_value = item[1]
except:
# Try a string as it is coerced by JSON
try:
ts_str = str(ts)
if anomalies[ts_str]:
anomalies_indices.append(index)
anomaly_value = item[1]
except:
pass
data.append([item[0], item[1], score, anomaly_value])
current_logger.info('get_algorithm_scores_plot - %s - anomalous: %s, total anomalies: %s, requested anomaly window: last %s data points, anomalies in window: %s, anomalies_indices: %s' % (
algorithm, str(anomalous), str(len(anomalies)), str(anomaly_window),
str(anomalies_in_window), str(len(anomalies_indices))))
ylabel = '%s score' % algorithm
df = pd.DataFrame(data, columns=['date', 'value', 'score', 'anomaly'])
df['date'] = pd.to_datetime(df['date'], unit='s')
datetime_index = pd.DatetimeIndex(df['date'].values)
df = df.set_index(datetime_index)
df.drop('date', axis=1, inplace=True)
fig, ax1 = plt.subplots(1, 1, figsize=(8, 4))
title = '%s - anomalous: %s, total anomalies: %s\nrequested anomaly window: last %s data points, anomalies in window: %s' % (
algorithm, str(anomalous), str(len(anomalies)), str(anomaly_window),
str(anomalies_in_window))
if algorithm == 'sigma':
title = '%s - anomalous: %s, total anomalies: %s\nrequested anomaly window: last %s data points, anomalies in window: %s\nONLY THE LAST %s DATA POINT ARE ANALYSED FOR ANOMALIES WITH sigma NOT ALL' % (
algorithm, str(anomalous), str(len(anomalies)), str(anomaly_window),
str(anomalies_in_window), str(anomaly_window))
if algorithm == 'spectral_entropy':
if low_entropy_value or low_entropy_value == 0.0:
title = '%s (low_entropy_value: %s) - anomalous: %s, total anomalies: %s\nrequested anomaly window: last %s data points, anomalies in window: %s' % (
algorithm, str(round(low_entropy_value, 3)), str(anomalous), str(len(anomalies)), str(anomaly_window),
str(anomalies_in_window))
if unreliable:
title = '%s\nUNRELIABLE RESULTS' % title
plt.title(title)
ax1b = ax1.twinx()
plot1a, = ax1.plot(df.index, df.value, lw=0.4, alpha=1.0, zorder=1)
plot1b, = ax1b.plot(df.index, df.score, color='orange', lw=0.4, alpha=0.7, zorder=3)
s = [5 for item in df['anomaly'].tolist()]
c = ['red' for item in df['anomaly'].tolist()]
plot1c = ax1.scatter(df.index, df.anomaly, s=s, c=c)
ax1.set_ylabel('value', fontsize='small')
ax1b.set_ylabel(ylabel, fontsize='small')
ax1.set_xlabel('Date', fontsize='small')
ax1.xaxis.set_tick_params(labelsize='small')
plt.legend([plot1a, plot1b, plot1c], df.columns, loc='best')
# defining display layout
plt.tight_layout()
plt.savefig(output_file, format='png')
fig.clf()
plt.close(fig)
current_logger.info('get_algorithm_scores_plot - created graph image - %s' % output_file)
except Exception as err:
current_logger.error(traceback.format_exc())
current_logger.error('error :: get_algorithm_scores_plot :: failed to create %s - %s' % (output_file, err))
return False
return output_file