"""
features_profile.py
"""
import logging
import os
# import sys
# import datetime
# import csv
import time
from ast import literal_eval
import traceback
# import json
from timeit import default_timer as timer
import numpy as np
import pandas as pd
from tsfresh.feature_extraction import (
# @modified 20210101 - Task #3928: Update Skyline to use new tsfresh feature extraction method
# extract_features, ReasonableFeatureExtractionSettings)
extract_features, EfficientFCParameters)
from tsfresh import __version__ as tsfresh_version
import settings
import skyline_version
from skyline_functions import write_data_to_file
# @added 20200813 - Feature #3670: IONOSPHERE_CUSTOM_KEEP_TRAINING_TIMESERIES_FOR
from skyline_functions import historical_data_dir_exists
# TSFRESH_VERSION below was only added to a single log output as it was unused
# however it may be used in one of the tests in some way, I shall have to search
from tsfresh_feature_names import TSFRESH_FEATURES, TSFRESH_VERSION
# @added 20220731 - Task #2732: Prometheus to Skyline
# Branch #4300: prometheus
from functions.metrics.get_base_name_from_labelled_metrics_name import get_base_name_from_labelled_metrics_name
from functions.metrics.get_metric_id_from_base_name import get_metric_id_from_base_name
skyline_version = skyline_version.__absolute_version__
# @added 20200813 - Feature #3670: IONOSPHERE_CUSTOM_KEEP_TRAINING_TIMESERIES_FOR
try:
IONOSPHERE_HISTORICAL_DATA_FOLDER = settings.IONOSPHERE_HISTORICAL_DATA_FOLDER
except:
IONOSPHERE_HISTORICAL_DATA_FOLDER = '/opt/skyline/ionosphere/historical_data'
try:
IONOSPHERE_CUSTOM_KEEP_TRAINING_TIMESERIES_FOR = settings.IONOSPHERE_CUSTOM_KEEP_TRAINING_TIMESERIES_FOR
except:
IONOSPHERE_CUSTOM_KEEP_TRAINING_TIMESERIES_FOR = []
# @added 20220915 - Feature #4658: ionosphere.learn_repetitive_patterns
try:
IONOSPHERE_REPETITIVE_PATTERNS_MINMAX_AVG_VALUE = float(settings.IONOSPHERE_REPETITIVE_PATTERNS_MINMAX_AVG_VALUE)
except:
IONOSPHERE_REPETITIVE_PATTERNS_MINMAX_AVG_VALUE = 100.0
[docs]def feature_name_id(current_skyline_app, feature_name):
"""
Determine the Skyline id of a tsfresh feature name
:param feature_name: the tsfresh feature name
:type feature_name: str
:return: id
:rtype: int
"""
# f_name = 'value__spkt_welch_density__coeff_8'
f_name = str(feature_name)
for feature in TSFRESH_FEATURES:
if str(feature[1]) == f_name:
fn_id = str(feature[0])
del f_name
del feature
return fn_id
del feature
del f_name
return 0
[docs]def calculate_features_profile(current_skyline_app, timestamp, metric, context):
"""
Calculates a tsfresh features profile from a training data set
:param timestamp: the timestamp of metric anomaly with training data
:type timestamp: str
:param metric: the base_name of the metric
:type metric: str
:param context: the context
:type metric: str
:return: (features_profile_csv_file_path, successful, fail_msg, traceback_format_exc, calc_time)
:rtype: int
:rtype: (str, boolean, str, str, str)
"""
current_skyline_app_logger = current_skyline_app + 'Log'
current_logger = logging.getLogger(current_skyline_app_logger)
base_name = str(metric)
# @added 20190413 - Bug #2934: Ionosphere - no mirage.redis.24h.json file
# Set a default log_context, just in case it is not set if something is
# added in the future
log_context = 'unknown'
if context == 'training_data':
log_context = 'training data'
if context == 'features_profiles':
log_context = 'features profile data'
if context == 'ionosphere':
log_context = 'ionosphere'
# @added 20170114 - Feature #1854: Ionosphere learn
if context == 'ionosphere_learn':
log_context = 'ionosphere :: learn'
# TODO
# @added 20190314 - Feature #2484: FULL_DURATION feature profiles
# Here we add the bifurcation to also create a features
# profile at FULL_DURATION for all Mirage metrics. With a
# view to increase the number of matches trained metric
# achieve by also allowing for the creation and comparing of
# the FULL_DURATION features profiles as well.
# How I am not certain but needs to tie up with this Feature in:
# skyline/ionosphere/ionosphere.py
# skyline/webapp/webapp.py
if context == 'ionosphere_echo':
log_context = 'ionosphere :: echo'
if context == 'ionosphere_echo_check':
log_context = 'ionosphere :: echo check'
# @added 20220910 - Feature #4658: ionosphere.learn_repetitive_patterns
if context == 'adhoc':
log_context = 'ionosphere :: adhoc'
if context == 'learn_repetitive_patterns':
log_context = 'ionosphere :: learn_repetitive_patterns'
# @added 20220915 - Feature #4658: ionosphere.learn_repetitive_patterns
if context == 'find_repetitive_patterns':
log_context = 'ionosphere :: find_repetitive_patterns'
current_logger.info('%s feature profile creation requested for %s at %s' % (
log_context, base_name, timestamp))
# @added 20220731 - Task #2732: Prometheus to Skyline
# Branch #4300: prometheus
# Handle labelled_metric name
use_base_name = str(base_name)
labelled_metric_name = None
if '{' in base_name and '}' in base_name and '_tenant_id="' in base_name:
metric_id = 0
try:
metric_id = get_metric_id_from_base_name(current_skyline_app, base_name)
except Exception as err:
current_logger.error('error :: get_metric_id_from_base_name failed with base_name: %s - %s' % (str(base_name), err))
if metric_id:
labelled_metric_name = 'labelled_metrics.%s' % str(metric_id)
if base_name.startswith('labelled_metrics.'):
labelled_metric_name = str(base_name)
current_logger.info('calculate_features_profile :: labelled_metric_name: %s' % labelled_metric_name)
try:
base_name = get_base_name_from_labelled_metrics_name(current_skyline_app, labelled_metric_name)
if base_name:
labelled_metric_base_name = str(base_name)
base_name = str(labelled_metric_base_name)
except Exception as err:
current_logger.error('error :: calculate_features_profile :: get_base_name_from_labelled_metrics_name failed for %s - %s' % (
base_name, err))
if labelled_metric_name:
use_base_name = str(labelled_metric_name)
timeseries_dir = use_base_name.replace('.', '/')
# if context == 'training_data' or context == 'ionosphere':
if context in ['training_data', 'ionosphere']:
metric_data_dir = '%s/%s/%s' % (
settings.IONOSPHERE_DATA_FOLDER, timestamp, timeseries_dir)
# @added 20200813 - Feature #3670: IONOSPHERE_CUSTOM_KEEP_TRAINING_TIMESERIES_FOR
if context == 'training_data':
metric_data_dir_does_not_exist = False
if not os.path.exists(metric_data_dir):
metric_data_dir_does_not_exist = True
if IONOSPHERE_CUSTOM_KEEP_TRAINING_TIMESERIES_FOR and metric_data_dir_does_not_exist:
try:
historical_data, metric_data_dir = historical_data_dir_exists(current_skyline_app, metric_data_dir)
if historical_data:
current_logger.info('create_features_profile :: using historical training data - %s' % metric_data_dir)
except:
trace = traceback.format_exc()
current_logger.error(trace)
fail_msg = 'error :: create_features_profile :: failed to determine whether this is historical training data'
current_logger.error('%s' % fail_msg)
if context == 'training_data':
# Raise to webbapp I believe to provide traceback to user in UI
raise
return False, False, False, fail_msg, trace
if context == 'features_profiles':
metric_data_dir = '%s/%s/%s' % (
settings.IONOSPHERE_PROFILES_FOLDER, timeseries_dir, timestamp)
# @added 20170113 - Feature #1854: Ionosphere learn
if context == 'ionosphere_learn':
metric_data_dir = '%s/%s/%s' % (
settings.IONOSPHERE_LEARN_FOLDER, timestamp, timeseries_dir)
# @added 20190327 - Feature #2484: FULL_DURATION feature profiles
# Added ionosphere_echo and ionosphere_echo_check
# if context == 'ionosphere_echo' or context == 'ionosphere_echo_check':
if context in ['ionosphere_echo', 'ionosphere_echo_check']:
metric_data_dir = '%s/%s/%s' % (
settings.IONOSPHERE_DATA_FOLDER, timestamp, timeseries_dir)
# @added 20220910 - Feature #4658: ionosphere.learn_repetitive_patterns
if context in ['adhoc', 'learn_repetitive_patterns', 'find_repetitive_patterns']:
metric_data_dir = '%s/%s/%s' % (
settings.SKYLINE_TMP_DIR, timestamp, timeseries_dir)
features_profile_created_file = '%s/%s.%s.fp.created.txt' % (
metric_data_dir, str(timestamp), use_base_name)
features_profile_details_file = '%s/%s.%s.fp.details.txt' % (
metric_data_dir, str(timestamp), use_base_name)
# @added 20190327 - Feature #2484: FULL_DURATION feature profiles
if context == 'ionosphere_echo_check':
features_profile_created_file = '%s/%s.%s.echo.fp.created.txt' % (
metric_data_dir, str(timestamp), use_base_name)
features_profile_details_file = '%s/%s.%s.echo.fp.details.txt' % (
metric_data_dir, str(timestamp), use_base_name)
# @added 20170108 - Feature #1842: Ionosphere - Graphite now graphs
# Added metric_check_file and ts_full_duration is needed to be determined
# and added the to features_profile_details_file as it was not added here on
# the 20170104 when it was added the webapp and ionosphere
metric_var_filename = '%s.txt' % str(use_base_name)
anomaly_check_file = '%s/%s' % (metric_data_dir, metric_var_filename)
ts_full_duration = int(settings.FULL_DURATION)
if os.path.isfile(anomaly_check_file):
# Read the details file
with open(anomaly_check_file, 'r') as f:
anomaly_details = f.readlines()
for i, line in enumerate(anomaly_details):
if 'full_duration' in line:
_ts_full_duration = '%s' % str(line).split("'", 2)
full_duration_array = literal_eval(_ts_full_duration)
ts_full_duration = str(int(full_duration_array[1]))
anomaly_json = '%s/%s.json' % (metric_data_dir, use_base_name)
# @added 20190327 - Feature #2484: FULL_DURATION feature profiles
# if context == 'ionosphere_echo' or context == 'ionosphere_echo_check':
if context in ['ionosphere_echo', 'ionosphere_echo_check']:
ts_full_duration = str(settings.FULL_DURATION)
full_duration_in_hours = int(settings.FULL_DURATION / 60 / 60)
anomaly_json = '%s/%s.mirage.redis.%sh.json' % (metric_data_dir, use_base_name, str(full_duration_in_hours))
ts_csv = '%s/%s.tsfresh.input.csv' % (metric_data_dir, use_base_name)
# @added 20190327 - Feature #2484: FULL_DURATION feature profiles
if context == 'ionosphere_echo_check':
ts_csv = '%s/%s.echo.tsfresh.input.csv' % (metric_data_dir, use_base_name)
# anomaly_json = '/opt/skyline/ionosphere/data/1480104000/stats/statsd/graphiteStats/calculationtime/stats.statsd.graphiteStats.calculationtime.json'
# ts_csv = '/opt/skyline/ionosphere/data/1480104000/stats/statsd/graphiteStats/calculationtime/stats.statsd.graphiteStats.calculationtime.tsfresh.input.csv'
# This is simply to stay in line with tsfresh naming conventions in their
# docs and examples
fname_in = ts_csv
t_fname_out = fname_in + '.features.transposed.csv'
fp_id = None
f_calc = 'unknown'
if os.path.isfile(features_profile_details_file):
# @modified 20190413 - Bug #2934: Ionosphere - no mirage.redis.24h.json file
# Added log_context to report the context
current_logger.info('%s :: features profile details file exist - %s' % (
log_context, features_profile_details_file))
try:
with open(features_profile_details_file, 'r') as f:
fp_details_str = f.read()
fp_details_array = literal_eval(fp_details_str)
f_calc = ' (previously calculated by Ionosphere) - %s' % str(fp_details_array[2])
except:
trace = traceback.format_exc()
current_logger.error(trace)
# @modified 20190413 - Bug #2934: Ionosphere - no mirage.redis.24h.json file
# Added log_context to report the context
current_logger.error(
'error: %s :: failed to read from %s' % (log_context, features_profile_details_file))
else:
# @modified 20190413 - Bug #2934: Ionosphere - no mirage.redis.24h.json file
# Added log_context to report the context
current_logger.info('%s - OK no features profile details file exists - %s' % (
log_context, features_profile_details_file))
fp_created = None
if os.path.isfile(features_profile_created_file):
# @modified 20190413 - Bug #2934: Ionosphere - no mirage.redis.24h.json file
# Added log_context to report the context
current_logger.info('%s :: features profile created file exist - %s' % (
log_context, features_profile_created_file))
try:
with open(features_profile_created_file, 'r') as f:
fp_created_str = f.read()
fp_created_array = literal_eval(fp_created_str)
fp_id = fp_created_array[0]
fp_created = True
except:
trace = traceback.format_exc()
current_logger.error(trace)
# @modified 20190413 - Bug #2934: Ionosphere - no mirage.redis.24h.json file
# Added log_context to report the context
current_logger.error(
'error: %s :: failed to read fp_id from %s' % (log_context, features_profile_created_file))
else:
# @modified 20190413 - Bug #2934: Ionosphere - no mirage.redis.24h.json file
# Added log_context to report the context
current_logger.info('%s :: OK no features profile created file exists - %s' % (
log_context, features_profile_created_file))
if os.path.isfile(t_fname_out):
# @modified 20190413 - Bug #2934: Ionosphere - no mirage.redis.24h.json file
# Added log_context to report the context
current_logger.info('%s :: transposed features already exist - %s' % (
log_context, t_fname_out))
return str(t_fname_out), True, fp_created, fp_id, 'none', 'none', f_calc
start = timer()
raw_timeseries = []
if os.path.isfile(anomaly_json):
try:
# Read the timeseries json file
with open(anomaly_json, 'r') as f:
raw_timeseries = f.read()
except:
trace = traceback.format_exc()
current_logger.error(trace)
# @modified 20190413 - Bug #2934: Ionosphere - no mirage.redis.24h.json file
# Added log_context to report the context
current_logger.error(
'error: %s :: failed to read timeseries data from %s' % (log_context, anomaly_json))
fail_msg = 'error: %s :: failed to read timeseries data from %s' % (log_context, anomaly_json)
end = timer()
return 'error', False, fp_created, fp_id, fail_msg, trace, f_calc
else:
trace = 'none'
# @modified 20190413 - Bug #2934: Ionosphere - no mirage.redis.24h.json file
# Added log_context to report the context
fail_msg = 'error :: %s :: file not found - %s' % (log_context, anomaly_json)
current_logger.error(fail_msg)
end = timer()
return 'error', False, fp_created, fp_id, fail_msg, trace, f_calc
# Convert the timeseries to csv
timeseries_array_str = str(raw_timeseries).replace('(', '[').replace(')', ']')
del raw_timeseries
timeseries = literal_eval(timeseries_array_str)
datapoints = timeseries
del timeseries
converted = []
for datapoint in datapoints:
try:
new_datapoint = [float(datapoint[0]), float(datapoint[1])]
converted.append(new_datapoint)
# @modified 20170913 - Task #2160: Test skyline with bandit
# Added nosec to exclude from bandit tests
except: # nosec
continue
del datapoints
# @added 20220910 - Feature #4658: ionosphere.learn_repetitive_patterns
# if context in ['adhoc', 'learn_repetitive_patterns', 'find_repetitive_patterns']:
if context == 'adhoc':
values_list = [item[1] for item in converted]
avg_value = sum(values_list) / len(values_list)
del values_list
# if avg_value >= IONOSPHERE_REPETITIVE_PATTERNS_MINMAX_AVG_VALUE:
if avg_value >= 100000:
minmax_timeseries = []
minmax_values = [x[1] for x in converted]
x_np = np.asarray(minmax_values)
# Min-Max scaling
np_minmax = (x_np - x_np.min()) / (x_np.max() - x_np.min())
for (ts, v) in zip(converted, np_minmax):
minmax_timeseries.append([ts[0], v])
if minmax_timeseries:
converted = list(minmax_timeseries)
del minmax_timeseries
# @added 20220822 - Task #2732: Prometheus to Skyline
# Branch #4300: prometheus
# Only allow for fp creation if there is sufficient data
last_timestamp = int(converted[-1][0])
first_timestamp = int(converted[0][0])
offset = 7200
if int(ts_full_duration) > 86400:
offset = 14400
timestamp_limit = last_timestamp - (int(ts_full_duration) - offset)
if first_timestamp > timestamp_limit:
trace = 'None'
fail_msg = 'warning :: %s :: insufficient data to create profile, last_timestamp: %s, ts_full_duration: %s, first_timestamp: %s, timestamp_limit: %s' % (
log_context, str(last_timestamp), str(ts_full_duration),
str(first_timestamp), str(timestamp_limit))
current_logger.warning('%s' % fail_msg)
return 'error', False, fp_created, fp_id, fail_msg, trace, f_calc
if os.path.isfile(ts_csv):
os.remove(ts_csv)
for ts, value in converted:
# print('%s,%s' % (str(int(ts)), str(value)))
# utc_ts_line = '%s,%s,%s\n' % (metric, str(int(ts)), str(value))
utc_ts_line = '%s,%s,%s\n' % (use_base_name, str(int(ts)), str(value))
with open(ts_csv, 'a') as fh:
fh.write(utc_ts_line)
del converted
try:
df = pd.read_csv(ts_csv, delimiter=',', header=None, names=['metric', 'timestamp', 'value'])
# @modified 20190413 - Bug #2934: Ionosphere - no mirage.redis.24h.json file
# Added log_context to report the context
current_logger.info('%s :: DataFrame created with %s' % (
log_context, ts_csv))
except:
trace = traceback.format_exc()
current_logger.error(trace)
# @modified 20190413 - Bug #2934: Ionosphere - no mirage.redis.24h.json file
# Added log_context to report the context
fail_msg = 'error: %s :: failed to create a pandas DataFrame with %s' % (log_context, ts_csv)
current_logger.error('%s' % fail_msg)
if os.path.isfile(ts_csv):
os.remove(ts_csv)
# @modified 20190413 - Bug #2934: Ionosphere - no mirage.redis.24h.json file
# Added log_context to report the context
current_logger.info('%s :: removed %s' % (log_context, ts_csv))
end = timer()
return 'error', False, fp_created, fp_id, fail_msg, trace, f_calc
# @added 20161207 - Task #1658: Patterning Skyline Ionosphere
# Coverting the Dataframe types to suit MySQL data types
# For anyone in here if you have done a code review of Skyline there are
# a number of questions that arise from the decision to deviate from json or
# storing msgppack as BLOB etc. tsfresh used csv and we can csv from Graphite
# etc. Skyline should be able to handle csv. As for how data is stored in
# MySQL, this was given considerable review and thought. Given that Ionosphere
# and Skyline in general should not be limited to the domain of analyzing
# Graphite machine metrics but other timeseries data sources too.
# df['feature_name'] = df['feature_name'].astype(string)
# df['value'] = df['value'].astype(float)
# Test the DataFrame
try:
df_created = df.head()
del df_created
except:
trace = traceback.format_exc()
current_logger.debug(trace)
# @modified 20190413 - Bug #2934: Ionosphere - no mirage.redis.24h.json file
# Added log_context to report the context
fail_msg = 'error: %s :: failed to read the pandas DataFrame created with %s' % (log_context, ts_csv)
current_logger.error('%s' % fail_msg)
if os.path.isfile(ts_csv):
os.remove(ts_csv)
# @modified 20190413 - Bug #2934: Ionosphere - no mirage.redis.24h.json file
# Added log_context to report the context
current_logger.info('%s :: removed %s' % (log_context, ts_csv))
end = timer()
return 'error', False, fp_created, fp_id, fail_msg, trace, f_calc
df.columns = ['metric', 'timestamp', 'value']
start_feature_extraction = timer()
# @modified 20190413 - Bug #2934: Ionosphere - no mirage.redis.24h.json file
# Added log_context to report the context
current_logger.info('%s :: starting extract_features with %s' % (
log_context, str(TSFRESH_VERSION)))
df_features = False
try:
# @modified 20161226 - Bug #1822: tsfresh extract_features process stalling
# Changed to use the new ReasonableFeatureExtractionSettings that was
# introduced in tsfresh-0.4.0 to exclude the computationally high cost
# of extracting features from very static timeseries that has little to
# no variation is the values, which results in features taking up to
# almost 600 seconds to calculate on a timeseries of length 10075
# (168h - 1 datapoint per 60s)
# In terms of inline feature calculatation, always exclude
# high_comp_cost features.
# df_features = extract_features(df, column_id='metric', column_sort='timestamp', column_kind=None, column_value=None)
# @modified 20210101 - Task #3928: Update Skyline to use new tsfresh feature extraction method
# tsf_settings = ReasonableFeatureExtractionSettings()
# >>> from tsfresh.feature_extraction import extract_features, EfficientFCParameters
# >>> extract_features(df, default_fc_parameters=EfficientFCParameters())
# Disable tqdm progress bar
# @modified 20210101 - Task #3928: Update Skyline to use new tsfresh feature extraction method
# tsf_settings.disable_progressbar = True
df_features = extract_features(
# @modified 20210101 - Task #3928: Update Skyline to use new tsfresh feature extraction method
# df, column_id='metric', column_sort='timestamp', column_kind=None,
# column_value=None, feature_extraction_settings=tsf_settings)
df, default_fc_parameters=EfficientFCParameters(),
column_id='metric', column_sort='timestamp', column_kind=None,
column_value=None, disable_progressbar=True)
# @modified 20190413 - Bug #2934: Ionosphere - no mirage.redis.24h.json file
# Added log_context to report the context
current_logger.info('%s :: features extracted from %s data' % (
log_context, ts_csv))
except:
trace = traceback.format_exc()
current_logger.debug(trace)
# @modified 20190413 - Bug #2934: Ionosphere - no mirage.redis.24h.json file
# Added log_context to report the context
fail_msg = 'error: %s :: extracting features with tsfresh from - %s' % (log_context, ts_csv)
current_logger.error('%s' % fail_msg)
end_feature_extraction = timer()
# @modified 20190413 - Bug #2934: Ionosphere - no mirage.redis.24h.json file
# Added log_context to report the context
current_logger.info(
'%s :: feature extraction failed in %.6f seconds' % (
log_context, (end_feature_extraction - start_feature_extraction)))
if os.path.isfile(ts_csv):
os.remove(ts_csv)
# @modified 20190413 - Bug #2934: Ionosphere - no mirage.redis.24h.json file
# Added log_context to report the context
current_logger.info('%s :: removed %s' % (log_context, ts_csv))
end = timer()
return 'error', False, fp_created, fp_id, fail_msg, trace, f_calc
end_feature_extraction = timer()
feature_extraction_time = end_feature_extraction - start_feature_extraction
# @modified 20190413 - Bug #2934: Ionosphere - no mirage.redis.24h.json file
# Added log_context to report the context
current_logger.info(
'%s :: feature extraction took %.6f seconds' % (log_context, feature_extraction_time))
del df
# write to disk
fname_out = fname_in + '.features.csv'
# df_features.to_csv(fname_out)
# Transpose
df_t = False
try:
df_t = df_features.transpose()
# @modified 20190413 - Bug #2934: Ionosphere - no mirage.redis.24h.json file
# Added log_context to report the context
current_logger.info('%s :: features transposed' % log_context)
except:
trace = traceback.format_exc()
current_logger.debug(trace)
# @modified 20190413 - Bug #2934: Ionosphere - no mirage.redis.24h.json file
# Added log_context to report the context
fail_msg = 'error :: %s :: transposing tsfresh features from - %s' % (log_context, ts_csv)
current_logger.error('%s' % fail_msg)
if os.path.isfile(ts_csv):
os.remove(ts_csv)
# @modified 20190413 - Bug #2934: Ionosphere - no mirage.redis.24h.json file
# Added log_context to report the context
current_logger.info('%s :: removed %s' % (log_context, ts_csv))
end = timer()
return 'error', False, fp_created, fp_id, fail_msg, trace, f_calc
del df_features
# Create transposed features csv
t_fname_out = fname_in + '.features.transposed.csv'
try:
df_t.to_csv(t_fname_out)
except:
trace = traceback.format_exc()
current_logger.debug(trace)
# @modified 20190413 - Bug #2934: Ionosphere - no mirage.redis.24h.json file
# Added log_context to report the context
fail_msg = 'error :: %s :: saving transposed tsfresh features from - %s' % (log_context, ts_csv)
current_logger.error('%s' % fail_msg)
if os.path.isfile(ts_csv):
os.remove(ts_csv)
# @modified 20190413 - Bug #2934: Ionosphere - no mirage.redis.24h.json file
# Added log_context to report the context
current_logger.info('%s :: removed %s' % (log_context, ts_csv))
end = timer()
return 'error', False, fp_created, fp_id, fail_msg, trace, f_calc
del df_t
# Calculate the count and sum of the features values
df_sum = False
try:
df_sum = pd.read_csv(
t_fname_out, delimiter=',', header=0,
names=['feature_name', 'value'])
df_sum.columns = ['feature_name', 'value']
df_sum['feature_name'] = df_sum['feature_name'].astype(str)
df_sum['value'] = df_sum['value'].astype(float)
except:
trace = traceback.format_exc()
current_logger.error(trace)
# @modified 20190413 - Bug #2934: Ionosphere - no mirage.redis.24h.json file
# Added log_context to report the context
current_logger.error('error :: %s :: failed to create Dataframe to sum' % log_context)
try:
features_count = len(df_sum['value'])
except:
trace = traceback.format_exc()
current_logger.debug(trace)
# @modified 20190413 - Bug #2934: Ionosphere - no mirage.redis.24h.json file
# Added log_context to report the context
current_logger.error('error :: %s :: failed to count number of features, set to 0' % log_context)
features_count = 0
try:
features_sum = df_sum['value'].sum()
except:
trace = traceback.format_exc()
current_logger.debug(trace)
# @modified 20190413 - Bug #2934: Ionosphere - no mirage.redis.24h.json file
# Added log_context to report the context
current_logger.error('error :: %s :: failed to sum feature values, set to 0' % log_context)
features_sum = 0
end = timer()
# @modified 20190413 - Bug #2934: Ionosphere - no mirage.redis.24h.json file
# Added log_context to report the context
current_logger.info('%s :: features saved to %s' % (log_context, fname_out))
current_logger.info('%s :: transposed features saved to %s' % (
log_context, t_fname_out))
total_calc_time = '%.6f' % (end - start)
calc_time = '%.6f' % (feature_extraction_time)
current_logger.info('%s :: total feature profile completed in %s seconds' % (
log_context, str(total_calc_time)))
# Create a features profile details file
try:
# @modified 20170108 - Feature #1842: Ionosphere - Graphite now graphs
# Added the ts_full_duration here as it was not added here on the 20170104
# when it was added the webapp and ionosphere
data = '[%s, \'%s\', %s, %s, %s, %s]' % (
str(int(time.time())), str(tsfresh_version), str(calc_time),
str(features_count), str(features_sum), str(ts_full_duration))
write_data_to_file(current_skyline_app, features_profile_details_file, 'w', data)
except:
trace = traceback.format_exc()
current_logger.error('%s' % trace)
# @modified 20190413 - Bug #2934: Ionosphere - no mirage.redis.24h.json file
# Added log_context to report the context
fail_msg = 'error :: %s :: failed to write %s' % (log_context, features_profile_details_file)
current_logger.error('%s' % fail_msg)
del df_sum
if os.path.isfile(ts_csv):
os.remove(ts_csv)
# @modified 20190413 - Bug #2934: Ionosphere - no mirage.redis.24h.json file
# Added log_context to report the context
current_logger.info('%s :: removed the created csv - %s' % (
log_context, ts_csv))
# @added 20170112 - Feature #1854: Ionosphere learn - Redis ionosphere.learn.work namespace
# Ionosphere learn needs Redis works sets, but this was moved to
# ionosphere_backend.py and learn.py not done here
return str(t_fname_out), True, fp_created, fp_id, 'none', 'none', str(calc_time)