Source code for algorithm_scores_plot

"""
algorithm_scores_plot.py
"""
import logging
import traceback
from os import path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


[docs]def get_algorithm_scores_plot(
    current_skyline_app, output_file, timeseries, algorithm, anomalous,
        anomalies, scores, anomaly_window=1, anomalies_in_window=None,
        unreliable=False, low_entropy_value=None):
    """
    Creates a png graph image using the vortex results data.

    :param current_skyline_app: the Skyline app name calling the function
    :param output_file: full path and filename to output where the png image is
        to be saved to
    :param timeseries: the time series
    :param algorithm: the algorithm
    :param anomalous: anomalous
    :param anomalies: the anomalies dict
    :param scores: the scores list
    :param anomaly_window: the anomaly window
    :param anomaly_in_window: the number of anomalies in the anomaly_window
    :param unreliable: unreliable
    :param low_entropy_value: the spectral_entropy low_entropy_value if there is one
    :type current_skyline_app: str
    :type output_file: str
    :type timeseries: list
    :type algorithm: str
    :type anomalous: boolean
    :type anomalies: dict
    :type scores: list
    :type anomaly_window: int
    :type anomaly_in_window: int
    :type unreliable: boolean
    :type low_entropy_value: float
    :return: file
    :rtype: boolean|str

    """

    current_skyline_app_logger = current_skyline_app + 'Log'
    current_logger = logging.getLogger(current_skyline_app_logger)

    if path.isfile(output_file):
        current_logger.info('get_algorithm_scores_plot - graph image already exists - %s' % output_file)
        return output_file

    try:
        current_logger.info('get_algorithm_scores_plot - creating graph image - %s' % output_file)

        params = {
            'axes.labelsize': 10,
            'axes.titlesize': 10,
            'xtick.labelsize': 10,
        }
        plt.rcParams.update(params)
        plt.rcParams['xtick.labelsize'] = 8
        plt.rcParams['ytick.labelsize'] = 8

        anomalies_indices = []
        data = []
        for index, item in enumerate(timeseries):
            score = 0
            anomaly_value = np.nan
            try:
                score = scores[index]
            except:
                score = np.nan
            try:
                ts = int(item[0])
            except:
                continue
            try:
                if anomalies[ts]:
                    anomalies_indices.append(index)
                    anomaly_value = item[1]
            except:
                # Try a string as it is coerced by JSON
                try:
                    ts_str = str(ts)
                    if anomalies[ts_str]:
                        anomalies_indices.append(index)
                        anomaly_value = item[1]
                except:
                    pass

            data.append([item[0], item[1], score, anomaly_value])

        current_logger.info('get_algorithm_scores_plot - %s - anomalous: %s, total anomalies: %s, requested anomaly window: last %s data points, anomalies in window: %s, anomalies_indices: %s' % (
            algorithm, str(anomalous), str(len(anomalies)), str(anomaly_window),
            str(anomalies_in_window), str(len(anomalies_indices))))

        ylabel = '%s score' % algorithm
        df = pd.DataFrame(data, columns=['date', 'value', 'score', 'anomaly'])
        df['date'] = pd.to_datetime(df['date'], unit='s')
        datetime_index = pd.DatetimeIndex(df['date'].values)
        df = df.set_index(datetime_index)
        df.drop('date', axis=1, inplace=True)
        fig, ax1 = plt.subplots(1, 1, figsize=(8, 4))
        title = '%s - anomalous: %s, total anomalies: %s\nrequested anomaly window: last %s data points, anomalies in window: %s' % (
            algorithm, str(anomalous), str(len(anomalies)), str(anomaly_window),
            str(anomalies_in_window))
        if algorithm == 'sigma':
            title = '%s - anomalous: %s, total anomalies: %s\nrequested anomaly window: last %s data points, anomalies in window: %s\nONLY THE LAST %s DATA POINT ARE ANALYSED FOR ANOMALIES WITH sigma NOT ALL' % (
                algorithm, str(anomalous), str(len(anomalies)), str(anomaly_window),
                str(anomalies_in_window), str(anomaly_window))
        if algorithm == 'spectral_entropy':
            if low_entropy_value or low_entropy_value == 0.0:
                title = '%s (low_entropy_value: %s) - anomalous: %s, total anomalies: %s\nrequested anomaly window: last %s data points, anomalies in window: %s' % (
                    algorithm, str(round(low_entropy_value, 3)), str(anomalous), str(len(anomalies)), str(anomaly_window),
                    str(anomalies_in_window))

        if unreliable:
            title = '%s\nUNRELIABLE RESULTS' % title
        plt.title(title)
        ax1b = ax1.twinx()
        plot1a, = ax1.plot(df.index, df.value, lw=0.4, alpha=1.0, zorder=1)
        plot1b, = ax1b.plot(df.index, df.score, color='orange', lw=0.4, alpha=0.7, zorder=3)
        s = [5 for item in df['anomaly'].tolist()]
        c = ['red' for item in df['anomaly'].tolist()]
        plot1c = ax1.scatter(df.index, df.anomaly, s=s, c=c)
        ax1.set_ylabel('value', fontsize='small')
        ax1b.set_ylabel(ylabel, fontsize='small')
        ax1.set_xlabel('Date', fontsize='small')
        ax1.xaxis.set_tick_params(labelsize='small')
        plt.legend([plot1a, plot1b, plot1c], df.columns, loc='best')
        # defining display layout
        plt.tight_layout()
        plt.savefig(output_file, format='png')
        fig.clf()
        plt.close(fig)
        current_logger.info('get_algorithm_scores_plot - created graph image - %s' % output_file)
    except Exception as err:
        current_logger.error(traceback.format_exc())
        current_logger.error('error :: get_algorithm_scores_plot :: failed to create %s - %s' % (output_file, err))
        return False

    return output_file