diff --git a/FCRtraining/requirements.txt b/FCRtraining/requirements.txt deleted file mode 100644 index 99b31b969bd9ce3f128a13ec954f75326a81a1ec..0000000000000000000000000000000000000000 --- a/FCRtraining/requirements.txt +++ /dev/null @@ -1,48 +0,0 @@ -absl-py~=0.12.0 -aiohttp~=3.7.4.post0 -async-timeout~=3.0.1 -attrs~=20.3.0 -cachetools~=4.2.1 -certifi~=2020.12.5 -chardet~=4.0.0 -cycler~=0.10.0 -Cython~=0.29.22 -fsspec~=0.8.7 -future~=0.18.2 -google-auth~=1.28.0 -google-auth-oauthlib~=0.4.3 -grpcio~=1.36.1 -idna~=2.10 -joblib~=1.0.1 -kiwisolver~=1.3.1 -lxml~=4.6.3 -Markdown~=3.3.4 -matplotlib~=3.3.4 -multidict~=5.1.0 -numpy~=1.20.1 -oauthlib~=3.1.0 -Pillow~=8.1.2 -protobuf~=3.15.6 -pyasn1~=0.4.8 -pyasn1-modules~=0.2.8 -pyjnius~=1.3.0 -pyparsing~=2.4.7 -python-dateutil~=2.8.1 -pytorch-lightning~=1.2.4 -PyYAML~=5.3.1 -requests~=2.25.1 -requests-oauthlib~=1.3.0 -rsa~=4.7.2 -scikit-learn~=0.24.1 -scipy~=1.6.1 -six~=1.15.0 -sklearn~=0.0 -tensorboard~=2.4.1 -tensorboard-plugin-wit~=1.8.0 -threadpoolctl~=2.1.0 -torch~=1.8.0 -tqdm~=4.59.0 -typing-extensions~=3.7.4.3 -urllib3~=1.26.4 -Werkzeug~=1.0.1 -yarl~=1.6.3 diff --git a/FCRtraining/src/metrics/ASUtilityGenerator/ASUtilityGenerator.py b/FCRtraining/src/metrics/ASUtilityGenerator/ASUtilityGenerator.py deleted file mode 100644 index 84378614b8e16a0352de8ff6d08b84728acc4e08..0000000000000000000000000000000000000000 --- a/FCRtraining/src/metrics/ASUtilityGenerator/ASUtilityGenerator.py +++ /dev/null @@ -1,92 +0,0 @@ -import jnius_config -# the jar should contain the (upperware) utility generator library and the ASUtilityGeneratorApplication from java-src (with dependencies) -jnius_config.set_classpath("") - -from jnius import autoclass -from jnius import JavaClass, MetaJavaClass -from pathlib import Path -from tempfile import NamedTemporaryFile -from typing import List, Dict -from xml.etree.ElementTree import ElementTree, Element -from lxml import etree -from os import unlink - -ASUtilityGeneratorApplication = autoclass("eu.melodic.upperware.utilitygenerator.ASUtilityGeneratorApplication") -IntVariableValueDTO = autoclass("eu.melodic.upperware.utilitygenerator.cdo.cp_model.DTO.IntVariableValueDTO") -ArrayList = autoclass("java.util.ArrayList") - -class ASUtilityGenerator: - """ - Class used for utilizing the "utility generator" java library to calculate the utility function value - for given constraint problem and camel model. - - Requires java library with utility generator (and its dependencies) and ASUtilityGeneratorApplication from java-src. - The .jar containing the library should be included in the classpath declared at the beginning of this file. - """ - _cp_model: ElementTree - _xml_fillings: Dict[str, Element] - - _cp_model_path: str - _camel_model_path: str - _node_candidates_path: str - - def __init__(self, - cp_model_path: str, - camel_model_path: str, - node_candidates_path: str, - metric_names: List[str]): - """ - :param cp_model_path: path to the file containing the constraint problem model - :param camel_model_path: path to the file containing the camel model - :param node_candidates_path: path to the file containing node candidates - :param metric_names: names of the metrics that might be changed (exactly as they appear in the cp model file) - """ - - self._cp_model_path = Path(cp_model_path) - self._cp_model = etree.parse(str(Path(self._cp_model_path))) - self._camel_model_path = camel_model_path - self._node_candidates_path = node_candidates_path - - self._xml_fillings = {} - for name in metric_names: - self._xml_fillings[name] = self._cp_model.find( - f"cpMetrics[@id='{name}']")[0] - - def _add_metrics(self, filename: str, metrics: Dict[str, int]): - """ - Adds metrics to the constraint problem model. - :param filename: name of the file containing the constraint problem model - :param metrics: dictionary with pairs (arg_name, arg_value) describing the metrics to be added. If metrics are - empty, then no value wil be changed - """ - for arg_name, arg_value in metrics.items(): - arg_loc = self._xml_fillings[arg_name] - arg_loc.set('value', str(arg_value)) - - self._cp_model.write(filename, - xml_declaration=True, - encoding="ASCII") - - def evaluate(self, configuration: Dict[str, int], metrics: Dict[str, int]) -> float: - """ - Creates java objects based on the parameters and a tmeporary file with an updated contraint problem model. - Then it calculates the utility function value using the java's ASUtilityGeneratorApplication. - :param configuration: dictionary with pairs (arg_name, arg_value) describing the configuration - :param metrics: dictionary with pairs (arg_name, arg_value) describing the metrics - :return: the utility function value for the given parameters - """ - tempfile = NamedTemporaryFile(delete=False) - - self._add_metrics(filename = tempfile.name, metrics=metrics) - variable_list = ArrayList() - for (name, value) in configuration.items(): - variable_list.add(IntVariableValueDTO(name, round(value))) - - utility_generator = ASUtilityGeneratorApplication(self._camel_model_path, tempfile.name, - self._node_candidates_path) - utility_value = utility_generator.evaluate(variable_list) - - tempfile.close() - unlink(tempfile.name) - - return utility_value diff --git a/FCRtraining/src/models/lstm_net/dataloader.py b/FCRtraining/src/models/lstm_net/dataloader.py deleted file mode 100644 index adec2211b7f8fccfffab75ed8a18a9a2e69fd5c7..0000000000000000000000000000000000000000 --- a/FCRtraining/src/models/lstm_net/dataloader.py +++ /dev/null @@ -1,29 +0,0 @@ -from FCRdataLoader.src.fcrdataloader.dataset import SequenceForecastMultiDistributionDatasetFactory as SFMDD -from typing import Callable -from pathlib import Path - - -class FCRDataSetV2Factory(SFMDD): - ''' - data set factory for secure-document data version 2, localy data from set2 - ''' - - def __init__( - self, - seq_len: int, - horizon: int, - x_transforms=None - ): - super().__init__( - seq_len=seq_len, - pred_step=horizon, - x_transforms=x_transforms, - file=Path(""), - usecols=[0]+list(range(2, 17)), - experiment_id_col=0, - x_y_split=3, - x_predictions_cols=[2], - ) - - - diff --git a/FCRtraining/src/models/lstm_net/train.py b/FCRtraining/src/models/lstm_net/train.py deleted file mode 100644 index 553025a924a1dc6639522bd4cf0cb59d57cf015f..0000000000000000000000000000000000000000 --- a/FCRtraining/src/models/lstm_net/train.py +++ /dev/null @@ -1,62 +0,0 @@ -from pytorch_lightning.loggers import TensorBoardLogger -from torch.utils.data import DataLoader -from sklearn.preprocessing import MaxAbsScaler, MinMaxScaler -import pytorch_lightning as pl -import torch - -from .network import Encoder -from .dataloader import FCRDataSetV2Factory - - -HORIZON = 5 -SEQ_LEN = 30 -BATCH_SIZE = 256 -FEATURES_IN = 3 -FEATURES_OUT = 12 - - -EXTRA_HPARAMS = { - 'gradient_clip_val': 1, - 'max_epochs': 120 -} - -dataset_factory = FCRDataSetV2Factory(seq_len=SEQ_LEN, horizon=HORIZON) - -train = DataLoader( - dataset_factory.get_train_dataset(), - batch_size=BATCH_SIZE, - num_workers=4, - sampler=dataset_factory.get_uniform_dist_y_sampler() -) - -val = DataLoader( - dataset_factory.get_validation_dataset(), - batch_size=BATCH_SIZE, - num_workers=4 -) - -test = DataLoader( - dataset_factory.get_test_dataset(), - batch_size=BATCH_SIZE, - num_workers=4 -) - -model = Encoder( # currently no hparams to log - input_features=FEATURES_IN, - output_features=FEATURES_OUT, - horizon=HORIZON, - hidden_size=50, - hparams_to_log={ - 'HORIZON': HORIZON, - 'gradient_clip_val': EXTRA_HPARAMS['gradient_clip_val'], - 'max_epochs': EXTRA_HPARAMS['max_epochs'], - 'hidden_size': 40 - } - ) - -logger = TensorBoardLogger('tb_log', name='lstm_no_scaling') -logger.experiment.add_graph(model, torch.rand(model.input_features, model.seq_len, model.input_features)) - -trainer = pl.Trainer(check_val_every_n_epoch=5, logger=logger, **EXTRA_HPARAMS) -trainer.fit(model, train, val) -trainer.test(model, test) diff --git a/FCRtraining/src/models/lstm_scale/LitFCRtestBase.py b/FCRtraining/src/models/lstm_scale/LitFCRtestBase.py deleted file mode 100644 index bdf6d31a81df0842b7ca7d693ff31183a58f6d49..0000000000000000000000000000000000000000 --- a/FCRtraining/src/models/lstm_scale/LitFCRtestBase.py +++ /dev/null @@ -1,207 +0,0 @@ -from typing import Dict, Any, Callable, List, Optional -import pytorch_lightning as pl -import torch.nn as nn -import torch - -from FCRtraining.src.utils.colAvgError import AvgColL1Error -from FCRtraining.src.utils.colAvgRelError import AvgColRelativeError -from FCRtraining.src.utils.bestWorstTestPreds import BestWorstPredictions -from FCRtraining.src.metrics.UtilityFunctionMAE import UtilityFunctionMAE -from FCRtraining.src.metrics.MeanUtilityFunctionValue import MeanUtilityFunctionValue -from FCRtraining.src.utils.roundToDomain import FCRDomain - - -fcr_dom_rounder = FCRDomain() - - -class BaseTestEncoder(pl.LightningModule): - """ - abstract base class for LightningModule, - implements validation and test loops including logging - subclass must implement criterion as loss function - - input_features: - number of elements of input vectors - output_features: - number of elements of output vectors - criterion: - loss function - util_intervals: - number of bins for util based metrics - (currently ignored) - utility_fx: - utility function for elements from - network output domain. (currently - ignored) - hparams_to_log: - all haprams that will be logged - - If you want to utilize the utility function metrics then you must pass the following parameters - AND configure your environment as described in README.md: - target_labels: - labels for target value variables. - If not given defaults to "v1", "v2", ... - metric_labels_and_positions: - mapping of metric names to their respective col indexes in input tensor - cp_model_file_path: - path to the file containing the constraint problem model - camel_model_file_path: - path to the file containing the camel model - node_candidates_file_path: - path to the file containing node candidates - """ - - def __init__( - self, - input_features: int, - output_features: int, - criterion: Callable[[torch.Tensor], float], - util_intervals: int = 100, - utility_fx: Optional[Callable[[Any], float]] = None, - target_labels: Optional[List[str]] = None, - hparams_to_log: Dict[str, Any] = {}, - metric_labels_and_positions: Optional[Dict[str, int]] = None, - cp_model_file_path: Optional[str] = None, - camel_model_file_path: Optional[str] = None, - node_candidates_file_path: Optional[str] = None, - scaler = None - ): - ''' - creates train, val and test metrics which then - can be inspected (for ex: tensorboard) - ''' - - if (target_labels is not None - and metric_labels_and_positions is not None - and cp_model_file_path is not None - and camel_model_file_path is not None - and node_candidates_file_path is not None): - self.canCalculateUtilityFunction = True - else: - self.canCalculateUtilityFunction = False - - if target_labels is not None: - assert len( - target_labels) == output_features, f"Length of target labels ({len(target_labels)}) must eq output_features ({output_features})" - else: - target_labels = [f"v{i}" for i in range(output_features)] - - super(BaseTestEncoder, self).__init__() - - self.target_labels = target_labels - self.input_features = input_features - self.output_features = output_features - self.criterion = criterion - self.scaler = scaler - - # log chosen hparams - self.log_hparams(hparams_to_log) - - # train metrics - self.train_mae = pl.metrics.MeanAbsoluteError() - - # test metrics - self.test_mae = pl.metrics.MeanAbsoluteError() - - - # utility function metrics based on Melodic's utility generator - if self.canCalculateUtilityFunction: - self.utility_function_MAE = UtilityFunctionMAE(metrics_labels_and_positions=metric_labels_and_positions, - target_labels=target_labels, - cp_model_file_path=cp_model_file_path, - camel_model_file_path=camel_model_file_path, - node_candidates_file_path=node_candidates_file_path) - self.mean_target_utility_function_value = MeanUtilityFunctionValue(metrics_labels_and_positions=metric_labels_and_positions, - target_labels=target_labels, - cp_model_file_path=cp_model_file_path, - camel_model_file_path=camel_model_file_path, - node_candidates_file_path=node_candidates_file_path) - - # col avg errors - self.colAvgL1Err = AvgColL1Error(target_labels) - self.colAvgRelErr = AvgColRelativeError(target_labels) - - # best worst prediction examples - self.bwPredExamples = BestWorstPredictions(self.criterion) - - def __unscaled_target(self, y: torch.Tensor) -> torch.Tensor: - return torch.from_numpy(self.scaler.inverse_transform(y)).float() - - def log_hparams(self, hparams: Dict[str, Any]): - self.hparams = hparams - - def validation_step(self, batch, batch_nb): - """ - predicts y, and calculates loss in training - """ - - x, y = batch - preds = self(x) - - preds_us = self.__unscaled_target(preds) - loss = self.criterion(preds_us, y) # might not be necessary - - return {'loss': loss, 'preds': preds_us, 'target': y} - - def validation_step_end(self, outputs): - ''' - update and log validation metrics - ''' - - self.train_mae(outputs['preds'], outputs['target']) - - self.log('validation_mae', self.train_mae, - on_step=False, on_epoch=True) - - def test_step(self, batch, batch_idx): - """ - predicts y, and calculates loss in testing - """ - - x, y = batch - preds = self(x) - preds_us = self.__unscaled_target(preds) - preds_us = fcr_dom_rounder.round(preds_us) - loss = self.criterion(preds, y) # might not be necessary - self.bwPredExamples.update_on_test_step(x, preds_us, y) - if self.canCalculateUtilityFunction: - self.utility_function_MAE.update(x=x, preds=preds_us, target=y) - self.mean_target_utility_function_value.update(x=x, configuration=y) - - return {'loss': loss, 'preds': preds_us, 'target': y} - - def test_step_end(self, outputs): - ''' - update and log test metrics - ''' - - self.test_mae(outputs['preds'], outputs['target']) - - self.log('test_mae', self.test_mae, on_step=False, on_epoch=True) - if self.canCalculateUtilityFunction: - self.log('test_utility_function_MAE', self.utility_function_MAE, on_step=False, on_epoch=True) - self.log('test_mean_target_utility_function_value', self.mean_target_utility_function_value, on_step=False, on_epoch=True) - - # update col avg error - self.colAvgL1Err.update_after_test_step( - outputs['preds'], outputs['target']) - self.colAvgRelErr.update_after_test_step( - outputs['preds'], outputs['target']) - - def on_test_end(self): - writer = self.logger.experiment - - # show col avg error figure - writer.add_figure("avg L1 col error", self.colAvgL1Err.get_fig) - writer.add_figure("avg rel col error", - self.colAvgRelErr.get_fig) - writer.add_text( - f"best {self.bwPredExamples.get_keeped_best} data examples", - self.bwPredExamples.get_best_examples_str - ) - - writer.add_text( - f"worst {self.bwPredExamples.get_keeped_worst} data examples", - self.bwPredExamples.get_worst_examples_str - ) - diff --git a/FCRtraining/src/models/lstm_scale/network.py b/FCRtraining/src/models/lstm_scale/network.py deleted file mode 100644 index ef0c328660b9071b8b66fe9ce0a94c2f64f9a1f1..0000000000000000000000000000000000000000 --- a/FCRtraining/src/models/lstm_scale/network.py +++ /dev/null @@ -1,120 +0,0 @@ -''' - lstm network trained with data from set1 -''' - -from torch.optim.lr_scheduler import ReduceLROnPlateau -from inspect import signature -from typing import Dict, Any, Callable, Optional -import torch.nn.functional as F -import torch.nn as nn -import torch - -from .LitFCRtestBase import BaseTestEncoder - -''' - Dont touch great performance -''' - -HIDDEN_SIZE = 30 -BATCH_SIZE = 256 -SEQ_LEN = 15 -HORIZON = 5 -LSTM_LAYERS = 1 -INIT_LR = 0.01 - -FEATURES = 3 -OUTPUT = 12 - - -labels = ("LBStorage,DBCardinality,LBCardinality," - "LBRam,provider_Component_LB,AppStorage," - "AppCardinality,AppCores,provider_Component_App," - "DBCores,DBStorage,provider_Component_DB").split(',') - - -class Encoder(BaseTestEncoder): - def __init__( - self, - input_features=FEATURES, - output_features=OUTPUT, - criterion=nn.SmoothL1Loss(), - init_lr=INIT_LR, - batch_size=BATCH_SIZE, - seq_len=SEQ_LEN, - horizon=HORIZON, - hidden_size=HIDDEN_SIZE, - lstm_layers=LSTM_LAYERS, - utility_fx: Optional[Callable[[Any], float]] = None, - util_intervals: int = 100, - target_labels=labels, - hparams_to_log: Dict[str, Any] = {}, - metric_labels_and_positions: Optional[Dict[str, int]] = None, - cp_model_file_path: Optional[str] = None, - camel_model_file_path: Optional[str] = None, - node_candidates_file_path: Optional[str] = None, - scaler=None - ): - super(Encoder, self).__init__( - utility_fx=utility_fx, - util_intervals=util_intervals, - criterion=criterion, - input_features=input_features, - output_features=output_features, - target_labels=target_labels, - hparams_to_log=hparams_to_log, - metric_labels_and_positions=metric_labels_and_positions, - cp_model_file_path=cp_model_file_path, - camel_model_file_path=camel_model_file_path, - node_candidates_file_path=node_candidates_file_path, - scaler=scaler - ) - - self.seq_len = seq_len - self.horizon = horizon - self.batch_size = batch_size - self.lstm_layers = lstm_layers - self.hidden_size = hidden_size - self.fc2_size = hidden_size - - self.init_lr = init_lr - - self.relu = nn.LeakyReLU() - self.lstm = nn.LSTM(input_features, hidden_size, num_layers=self.lstm_layers, - bidirectional=True, batch_first=True) - self.fc1 = nn.Linear(hidden_size * 2, self.fc2_size) - self.fc2 = nn.Linear(self.fc2_size, self.output_features) - self.lstm_drop = nn.Dropout(p=0.3) - self.fc1_drop = nn.Dropout(p=0.6) - - def forward(self, x): - out, _ = self.lstm(x) - # out: (batch, input_features, hidden_size * directions) - out = self.lstm_drop(out[:, -1, :]) - # out: (batch, hidden_size * directions) - out = self.relu(out) - out = self.fc1(out) - out = self.relu(out) - out = self.fc1_drop(out) - #out = self.fc1_drop(out) - out = self.fc2(out) - return out - - def training_step(self, batch, batch_idx): - x, y = batch - scaled_y = torch.from_numpy(self.scaler.transform(y)).float() - prediction = self(x) - #print(f"x = {x[0]}") - #print(f"pred = {torch.round(prediction[0])}") - #print(f"y = {y[0]}") - loss = self.criterion(prediction, scaled_y) - self.log('train_loss', loss, on_step=False, on_epoch=True) - return loss - - def configure_optimizers(self): - optimizer = torch.optim.Adam(self.parameters(), lr=self.init_lr) - scheduler = ReduceLROnPlateau(optimizer, 'min', patience=5, verbose=True) - return { - 'optimizer': optimizer, - 'lr_scheduler': scheduler, - 'monitor': 'train_loss' - } diff --git a/FCRtraining/src/utils/LitFCRtestBase.py b/FCRtraining/src/utils/LitFCRtestBase.py deleted file mode 100644 index ea04736b67d6e7e8727ab23ea289c175402f4fed..0000000000000000000000000000000000000000 --- a/FCRtraining/src/utils/LitFCRtestBase.py +++ /dev/null @@ -1,210 +0,0 @@ -from typing import Dict, Any, Callable, List, Optional -import pytorch_lightning as pl -import torch.nn as nn -import torch - -from .colAvgError import AvgColL1Error -from .colAvgRelError import AvgColRelativeError -from .bestWorstTestPreds import BestWorstPredictions -from FCRtraining.src.metrics.UtilityFunctionMAE import UtilityFunctionMAE -from FCRtraining.src.metrics.MeanUtilityFunctionValue import MeanUtilityFunctionValue -from .roundToDomain import FCRDomain - - -class BaseTestEncoder(pl.LightningModule): - """ - abstract base class for LightningModule, - implements validation and test loops including logging - subclass must implement criterion as loss function - - input_features: - number of elements of input vectors - output_features: - number of elements of output vectors - criterion: - loss function - util_intervals: - number of bins for util based metrics - (currently ignored) - utility_fx: - utility function for elements from - network output domain. (currently - ignored) - hparams_to_log: - all haprams that will be logged - - If you want to utilize the utility function metrics then you must pass the following parameters - AND configure your environment as described in README.md: - target_labels: - labels for target value variables. - If not given defaults to "v1", "v2", ... - metric_labels_and_positions: - mapping of metric names to their respective col indexes in input tensor - cp_model_file_path: - path to the file containing the constraint problem model - camel_model_file_path: - path to the file containing the camel model - node_candidates_file_path: - path to the file containing node candidates - """ - - def __init__( - self, - input_features: int, - output_features: int, - criterion: Callable[[torch.Tensor], float], - util_intervals: int = 100, - utility_fx: Optional[Callable[[Any], float]] = None, - target_labels: Optional[List[str]] = None, - hparams_to_log: Dict[str, Any] = {}, - metric_labels_and_positions: Optional[Dict[str, int]] = None, - cp_model_file_path: Optional[str] = None, - camel_model_file_path: Optional[str] = None, - node_candidates_file_path: Optional[str] = None, - ): - ''' - creates train, val and test metrics which then - can be inspected (for ex: tensorboard) - ''' - - if (target_labels is not None - and metric_labels_and_positions is not None - and cp_model_file_path is not None - and camel_model_file_path is not None - and node_candidates_file_path is not None): - self.canCalculateUtilityFunction = True - else: - self.canCalculateUtilityFunction = False - - if target_labels is not None: - assert len( - target_labels) == output_features, f"Length of target labels ({len(target_labels)}) must eq output_features ({output_features})" - else: - target_labels = [f"v{i}" for i in range(output_features)] - - super(BaseTestEncoder, self).__init__() - - self.target_labels = target_labels - self.input_features = input_features - self.output_features = output_features - self.criterion = criterion - - # log chosen hparams - self.log_hparams(hparams_to_log) - - # train metrics - self.train_mse = pl.metrics.MeanSquaredError() - self.train_mae = pl.metrics.MeanAbsoluteError() - - # test metrics - self.test_mse = pl.metrics.MeanSquaredError() - self.test_mae = pl.metrics.MeanAbsoluteError() - - # utility function - self.utility_fx = utility_fx - self.util_intervals = util_intervals - - # utility function metrics based on Melodic's utility generator - if self.canCalculateUtilityFunction: - self.utility_function_MAE = UtilityFunctionMAE(metrics_labels_and_positions=metric_labels_and_positions, - target_labels=target_labels, - cp_model_file_path=cp_model_file_path, - camel_model_file_path=camel_model_file_path, - node_candidates_file_path=node_candidates_file_path) - self.mean_target_utility_function_value = MeanUtilityFunctionValue(metrics_labels_and_positions=metric_labels_and_positions, - target_labels=target_labels, - cp_model_file_path=cp_model_file_path, - camel_model_file_path=camel_model_file_path, - node_candidates_file_path=node_candidates_file_path) - - # col avg errors - self.colAvgL1Err = AvgColL1Error(target_labels) - self.colAvgRelErr = AvgColRelativeError(target_labels) - - # best worst prediction examples - self.bwPredExamples = BestWorstPredictions(self.criterion) - - # round network results to domain - self.rounder = FCRDomain() - - def log_hparams(self, hparams: Dict[str, Any]): - self.hparams = hparams - - def validation_step(self, batch, batch_nb): - """ - predicts y, and calculates loss in training - """ - - x, y = batch - preds = self(x) - preds = self.rounder.round(preds) - loss = self.criterion(preds, y) # might not be necessary - - return {'loss': loss, 'preds': preds, 'target': y} - - def validation_step_end(self, outputs): - ''' - update and log validation metrics - ''' - - self.train_mse(outputs['preds'], outputs['target']) - self.train_mae(outputs['preds'], outputs['target']) - - self.log('validation_mse', self.train_mse, - on_step=False, on_epoch=True) - self.log('validation_mae', self.train_mae, - on_step=False, on_epoch=True) - - def test_step(self, batch, batch_idx): - """ - predicts y, and calculates loss in testing - """ - - x, y = batch - preds = self(x) - preds = self.rounder.round(preds) - loss = self.criterion(preds, y) # might not be necessary - self.bwPredExamples.update_on_test_step(x, preds, y) - if self.canCalculateUtilityFunction: - self.utility_function_MAE.update(x=x, preds=preds, target=y) - self.mean_target_utility_function_value.update(x=x, configuration=y) - - return {'loss': loss, 'preds': preds, 'target': y} - - def test_step_end(self, outputs): - ''' - update and log test metrics - ''' - - self.test_mse(outputs['preds'], outputs['target']) - self.test_mae(outputs['preds'], outputs['target']) - - self.log('test_mse', self.test_mse, on_step=False, on_epoch=True) - self.log('test_mae', self.test_mae, on_step=False, on_epoch=True) - if self.canCalculateUtilityFunction: - self.log('test_utility_function_MAE', self.utility_function_MAE, on_step=False, on_epoch=True) - self.log('test_mean_target_utility_function_value', self.mean_target_utility_function_value, on_step=False, on_epoch=True) - - # update col avg error - self.colAvgL1Err.update_after_test_step( - outputs['preds'], outputs['target']) - self.colAvgRelErr.update_after_test_step( - outputs['preds'], outputs['target']) - - def on_test_end(self): - writer = self.logger.experiment - - # show col avg error figure - writer.add_figure("avg L1 col error", self.colAvgL1Err.get_fig) - writer.add_figure("avg rel col error", - self.colAvgRelErr.get_fig) - writer.add_text( - f"best {self.bwPredExamples.get_keeped_best} data examples", - self.bwPredExamples.get_best_examples_str - ) - - writer.add_text( - f"worst {self.bwPredExamples.get_keeped_worst} data examples", - self.bwPredExamples.get_worst_examples_str - ) - diff --git a/FCRtraining/src/utils/__init__.py b/FCRtraining/src/utils/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/FCRtraining/README.md b/experiments/README.md similarity index 100% rename from FCRtraining/README.md rename to experiments/README.md diff --git a/FCRtraining/src/metrics/ASUtilityGenerator/.paasage/eu.paasage.mddb.cdo.client.properties b/experiments/src/metrics/ASUtilityGenerator/.paasage/eu.paasage.mddb.cdo.client.properties similarity index 100% rename from FCRtraining/src/metrics/ASUtilityGenerator/.paasage/eu.paasage.mddb.cdo.client.properties rename to experiments/src/metrics/ASUtilityGenerator/.paasage/eu.paasage.mddb.cdo.client.properties diff --git a/FCRtraining/src/metrics/ASUtilityGenerator/.paasage/eu.paasage.mddb.cdo.server.properties b/experiments/src/metrics/ASUtilityGenerator/.paasage/eu.paasage.mddb.cdo.server.properties similarity index 97% rename from FCRtraining/src/metrics/ASUtilityGenerator/.paasage/eu.paasage.mddb.cdo.server.properties rename to experiments/src/metrics/ASUtilityGenerator/.paasage/eu.paasage.mddb.cdo.server.properties index 21ea32dd275261d784524bef3970424b8d523c8b..2ae04d74feef64bc679c5816fbf8d2bdbd71bd95 100644 --- a/FCRtraining/src/metrics/ASUtilityGenerator/.paasage/eu.paasage.mddb.cdo.server.properties +++ b/experiments/src/metrics/ASUtilityGenerator/.paasage/eu.paasage.mddb.cdo.server.properties @@ -20,7 +20,7 @@ storetype=db port=2036 #logging should be set on or off - default is off -logging=on +logging=off #security should be set on or off - default is off security=off diff --git a/FCRtraining/src/metrics/ASUtilityGenerator/java-src/ASUtilityGeneratorApplication.java b/experiments/src/metrics/ASUtilityGenerator/java-src/ASUtilityGeneratorApplication.java similarity index 100% rename from FCRtraining/src/metrics/ASUtilityGenerator/java-src/ASUtilityGeneratorApplication.java rename to experiments/src/metrics/ASUtilityGenerator/java-src/ASUtilityGeneratorApplication.java diff --git a/FCRtraining/src/metrics/MeanUtilityFunctionValue.py b/experiments/src/metrics/MeanUtilityFunctionValue.py similarity index 88% rename from FCRtraining/src/metrics/MeanUtilityFunctionValue.py rename to experiments/src/metrics/MeanUtilityFunctionValue.py index 1619bea63c6aaa6231ce8ae07612485008557335..13a24735ef4219ab7ecfead76464dbd87b30e998 100644 --- a/FCRtraining/src/metrics/MeanUtilityFunctionValue.py +++ b/experiments/src/metrics/MeanUtilityFunctionValue.py @@ -1,7 +1,7 @@ import torch from pytorch_lightning.metrics import Metric from typing import List, Dict -from FCRtraining.src.metrics.ASUtilityGenerator.ASUtilityGenerator import ASUtilityGenerator +from experiments.src.metrics.ASUtilityGenerator.ASUtilityGenerator import ASUtilityGenerator class MeanUtilityFunctionValue(Metric): @@ -9,8 +9,15 @@ class MeanUtilityFunctionValue(Metric): Metric which uses ASUtilityGenerator to calculate utility function values of given configurations and return the mean value. """ - def __init__(self, metrics_labels_and_positions: Dict[str, int], target_labels: List[str], cp_model_file_path: str, - camel_model_file_path: str, node_candidates_file_path: str, dist_sync_on_step=False): + def __init__( + self, + metrics_labels_and_positions: Dict[str, int], + target_labels: List[str], + cp_model_file_path: str, + camel_model_file_path: str, + node_candidates_file_path: str, + dist_sync_on_step=False + ): super().__init__(dist_sync_on_step=dist_sync_on_step) self.add_state("total_utility_value", default=torch.tensor(0.0), dist_reduce_fx="sum") diff --git a/FCRtraining/src/metrics/RowAccuracy.py b/experiments/src/metrics/RowAccuracy.py similarity index 100% rename from FCRtraining/src/metrics/RowAccuracy.py rename to experiments/src/metrics/RowAccuracy.py diff --git a/FCRtraining/src/metrics/UtilityAccuracy.py b/experiments/src/metrics/UtilityAccuracy.py similarity index 100% rename from FCRtraining/src/metrics/UtilityAccuracy.py rename to experiments/src/metrics/UtilityAccuracy.py diff --git a/FCRtraining/src/metrics/UtilityFunctionMAE.py b/experiments/src/metrics/UtilityFunctionMAE.py similarity index 97% rename from FCRtraining/src/metrics/UtilityFunctionMAE.py rename to experiments/src/metrics/UtilityFunctionMAE.py index 005cc96b66e00086f28715de4c4a4973b394855a..6edd940c46e2186b097da44425f6ebdec315f594 100644 --- a/FCRtraining/src/metrics/UtilityFunctionMAE.py +++ b/experiments/src/metrics/UtilityFunctionMAE.py @@ -1,7 +1,7 @@ import torch from pytorch_lightning.metrics import Metric from typing import List, Dict -from FCRtraining.src.metrics.ASUtilityGenerator.ASUtilityGenerator import ASUtilityGenerator +from experiments.src.metrics.ASUtilityGenerator.ASUtilityGenerator import ASUtilityGenerator class UtilityFunctionMAE(Metric): diff --git a/FCRtraining/src/metrics/__init__.py b/experiments/src/metrics/__init__.py similarity index 100% rename from FCRtraining/src/metrics/__init__.py rename to experiments/src/metrics/__init__.py diff --git a/FCRtraining/src/models/__init__.py b/experiments/src/models/__init__.py similarity index 100% rename from FCRtraining/src/models/__init__.py rename to experiments/src/models/__init__.py diff --git a/FCRtraining/src/models/lstm_net/__init__.py b/experiments/src/models/lstm_scale/__init__.py similarity index 100% rename from FCRtraining/src/models/lstm_net/__init__.py rename to experiments/src/models/lstm_scale/__init__.py diff --git a/FCRtraining/src/models/lstm_scale/dataloader.py b/experiments/src/models/lstm_scale/dataloader.py similarity index 100% rename from FCRtraining/src/models/lstm_scale/dataloader.py rename to experiments/src/models/lstm_scale/dataloader.py diff --git a/FCRtraining/src/models/lstm_net/network.py b/experiments/src/models/lstm_scale/network.py similarity index 54% rename from FCRtraining/src/models/lstm_net/network.py rename to experiments/src/models/lstm_scale/network.py index 72de2ba753ae1f426d5626fa2a7f56c185936ee8..bbfbfe1ac326b0ee7177a6de11c918893c722d35 100644 --- a/FCRtraining/src/models/lstm_net/network.py +++ b/experiments/src/models/lstm_scale/network.py @@ -1,78 +1,66 @@ -''' - lstm network trained with data from set1 -''' - from torch.optim.lr_scheduler import ReduceLROnPlateau from inspect import signature -from typing import Dict, Any, Callable, Optional +from typing import Dict, Any, Callable, Optional, List import torch.nn.functional as F import torch.nn as nn import torch -from FCRtraining.src.utils.LitFCRtestBase import BaseTestEncoder - -''' - Dont touch great performance -''' - -HIDDEN_SIZE = 40 -BATCH_SIZE = 256 -SEQ_LEN = 30 -HORIZON = 5 -LSTM_LAYERS = 1 -INIT_LR = 0.01 - -FEATURES = 3 -OUTPUT = 12 - - -labels = ("LBStorage,DBCardinality,LBCardinality," - "LBRam,provider_Component_LB,AppStorage," - "AppCardinality,AppCores,provider_Component_App," - "DBCores,DBStorage,provider_Component_DB").split(',') +from experiments.src.utils.LitFCRtestBase import BaseTestEncoder, UtilityData class Encoder(BaseTestEncoder): def __init__( self, - input_features=FEATURES, - output_features=OUTPUT, - criterion=nn.L1Loss(), - init_lr=INIT_LR, - batch_size=BATCH_SIZE, - seq_len=SEQ_LEN, - horizon=HORIZON, - hidden_size=HIDDEN_SIZE, - lstm_layers=LSTM_LAYERS, - utility_fx: Optional[Callable[[Any], float]] = None, - util_intervals: int = 100, - target_labels=labels, - hparams_to_log: Dict[str, Any] = {} + target_labels: List[str], + util_value_data: UtilityData, + input_features: int, + output_features: int, + criterion, + init_lr: float, + batch_size: int, + seq_len: int, + horizon: int, + hidden_size: int, + lstm_layers: int, + y_scaler=None, + test_x_transform=None ): + + if test_x_transform is None: + test_x_transform = lambda x: x + super(Encoder, self).__init__( + util_value_data=util_value_data, criterion=criterion, - input_features=input_features, - output_features=output_features, target_labels=target_labels, - hparams_to_log=hparams_to_log + best_worst_examples=10, + best_worst_metric=nn.L1Loss(), + x_transform=test_x_transform ) + self.output_features = output_features + self.input_features = input_features + self.seq_len = seq_len self.horizon = horizon self.batch_size = batch_size self.lstm_layers = lstm_layers self.hidden_size = hidden_size self.fc2_size = hidden_size + self.y_scaler = y_scaler self.init_lr = init_lr self.relu = nn.LeakyReLU() - self.lstm = nn.LSTM(input_features, hidden_size, num_layers=self.lstm_layers, + self.lstm = nn.LSTM(self.input_features, hidden_size, num_layers=self.lstm_layers, bidirectional=True, batch_first=True) self.fc1 = nn.Linear(hidden_size * 2, self.fc2_size) self.fc2 = nn.Linear(self.fc2_size, self.output_features) - self.lstm_drop = nn.Dropout(p=0.2) - self.fc1_drop = nn.Dropout(p=0.5) + self.lstm_drop = nn.Dropout(p=0.3) + self.fc1_drop = nn.Dropout(p=0.6) + + def transform_y(self, y: torch.Tensor) -> torch.Tensor: + return torch.from_numpy(self.y_scaler.transform(y)).float() def forward(self, x): out, _ = self.lstm(x) @@ -89,17 +77,15 @@ class Encoder(BaseTestEncoder): def training_step(self, batch, batch_idx): x, y = batch + scaled_y = self.transform_y(y) prediction = self(x) - #print(f"x = {x[0]}") - #print(f"pred = {torch.round(prediction[0])}") - #print(f"y = {y[0]}") - loss = self.criterion(prediction, y) + loss = self.criterion(prediction, scaled_y) self.log('train_loss', loss, on_step=False, on_epoch=True) return loss def configure_optimizers(self): optimizer = torch.optim.Adam(self.parameters(), lr=self.init_lr) - scheduler = ReduceLROnPlateau(optimizer, 'min', patience=10, verbose=True) + scheduler = ReduceLROnPlateau(optimizer, 'min', patience=5, verbose=True) return { 'optimizer': optimizer, 'lr_scheduler': scheduler, diff --git a/FCRtraining/src/models/lstm_scale/train.py b/experiments/src/models/lstm_scale/train.py similarity index 73% rename from FCRtraining/src/models/lstm_scale/train.py rename to experiments/src/models/lstm_scale/train.py index 556a7e7117ab2a03bdcd3d18ea0f9a11b5712f34..db6b8df8500367635b421aa4cee6580f9e272536 100644 --- a/FCRtraining/src/models/lstm_scale/train.py +++ b/experiments/src/models/lstm_scale/train.py @@ -3,9 +3,13 @@ from torch.utils.data import DataLoader from sklearn.preprocessing import MaxAbsScaler, StandardScaler import pytorch_lightning as pl import torch +import torch.nn as nn from .network import Encoder from .dataloader import FCRDataSetV2Factory +from experiments.src.utils.roundToDomain import FCRDomain +from .util_data import data as util_data + HORIZON = 5 @@ -14,17 +18,22 @@ BATCH_SIZE = 256 FEATURES_IN = 3 FEATURES_OUT = 12 +fcr_dom_rounder = FCRDomain() + TARGET_LABELS = ("AppCardinality,provider_Component_App,AppCores,AppStorage," "LBCardinality,provider_Component_LB,LBRam,LBStorage," "DBCardinality,provider_Component_DB,DBCores,DBStorage").split(',') EXTRA_HPARAMS = { 'gradient_clip_val': 0, - 'max_epochs': 100 + 'max_epochs': 50 } x_scaler = None y_scaler = MaxAbsScaler() +def x_rescale(x: torch.Tensor) -> torch.Tensor: + return fcr_dom_rounder.round(torch.from_numpy(y_scaler.inverse_transform(x)).float()) + dataset_factory = FCRDataSetV2Factory(seq_len=SEQ_LEN, horizon=HORIZON, x_transforms=x_scaler) y_scaler.fit(dataset_factory.get_train_target_vals()) @@ -48,28 +57,23 @@ test = DataLoader( num_workers=4 ) -model = Encoder( # currently no hparams to log +model = Encoder( + init_lr=0.01, + seq_len=30, + lstm_layers=1, + criterion=nn.SmoothL1Loss(), input_features=FEATURES_IN, output_features=FEATURES_OUT, horizon=HORIZON, batch_size=256, - hidden_size=40, - scaler=y_scaler, + hidden_size=30, + y_scaler=y_scaler, target_labels=TARGET_LABELS, - metric_labels_and_positions=None, - cp_model_file_path=None, - camel_model_file_path=None. - node_candidates_file_path=None, - hparams_to_log={ - 'HORIZON': HORIZON, - 'x_scaler': x_scaler, - 'y_scaler': y_scaler, - 'gradient_clip_val': EXTRA_HPARAMS['gradient_clip_val'], - 'hidden_size': 30 - } + util_value_data=z, + test_x_transform=x_rescale ) -logger = TensorBoardLogger('test', name='scaled_lstm') +logger = TensorBoardLogger('', name='') logger.experiment.add_graph(model, torch.rand(model.input_features, model.seq_len, model.input_features)) trainer = pl.Trainer(fast_dev_run=False, check_val_every_n_epoch=5, logger=logger, **EXTRA_HPARAMS) diff --git a/experiments/src/models/lstm_scale/util_data.py b/experiments/src/models/lstm_scale/util_data.py new file mode 100644 index 0000000000000000000000000000000000000000..bd3796f4c13b79cf47a6bc6c0820f318c3eb9914 --- /dev/null +++ b/experiments/src/models/lstm_scale/util_data.py @@ -0,0 +1,8 @@ +from experiments.src.utils.LitFCRtestBase import UtilityData + +data = UtilityData( + metric_labels_and_positions={"AverageResponseTime": 1}, + cp_model_file_path="", + camel_model_file_path="", + node_candidates_file_path="" +) \ No newline at end of file diff --git a/experiments/src/utils/LitFCRtestBase.py b/experiments/src/utils/LitFCRtestBase.py new file mode 100644 index 0000000000000000000000000000000000000000..9276e08777306cd05a22cf8d5abebe6d2cc45a60 --- /dev/null +++ b/experiments/src/utils/LitFCRtestBase.py @@ -0,0 +1,172 @@ +from typing import Dict, Any, Callable, List, Optional +from dataclasses import dataclass +import pytorch_lightning as pl +import torch.nn as nn +import torch + +from .colAvgError import AvgColL1Error +from .colAvgRelError import AvgColRelativeError +from .bestWorstTestPreds import BestWorstPredictions +from experiments.src.metrics.UtilityFunctionMAE import UtilityFunctionMAE +from experiments.src.metrics.MeanUtilityFunctionValue import MeanUtilityFunctionValue +from .roundToDomain import FCRDomain + + + +@dataclass +class UtilityData: + """Basic conainer for data associated + with utiliti value + """ + + metric_labels_and_positions: Dict[str, int] + cp_model_file_path: str + camel_model_file_path: str + node_candidates_file_path: str + + +class BaseTestEncoder(pl.LightningModule): + """Base class for experimenting with FCR and Genom apps + :param criterion: loss function used to calculated test and + validation loss + :type criterion: Callable[[torch.Tensor], float] + + :param target_labels: List of names of output features used for + generating post training logs, and utility function value + :type target_labels: List[str] + + :param best_worst_examples: number of best and worst prediction examples + remembered from test set. This ammount will be logged into tensorboard + :type best_worst_examples: int + + :param best_worst_metric: function used to generate score of prediction + used for ranking best_worst_examples + :type best_worst_metric: Callable[[torch.Tensor, torch.Tensor], float] + + :param x_transform: function used to transform network prediction + only for testing and validation. + :type x_transform: Optional[Callable[[torch.Tensor]], torch.Tensor] + """ + + def __init__( + self, + criterion: Callable[[torch.Tensor], float], + target_labels: List[str], + util_value_data: Optional[UtilityData] = None, + best_worst_examples: int = 10, + best_worst_metric: Callable[[torch.Tensor, torch.Tensor], float] = nn.L1Loss(), + x_transform: Optional[Callable[[torch.Tensor], torch.Tensor]] = None + ): + super(BaseTestEncoder, self).__init__() + + self.criterion = criterion + self.best_worst_examples = best_worst_examples + self.best_worst_metric = best_worst_metric + self.util_value_data = util_value_data + self.x_transform = x_transform + + # test metrics + self.test_mae = pl.metrics.MeanAbsoluteError() + + self.val_mae = pl.metrics.MeanAbsoluteError() + + # utility function metrics based on Melodic's utility generator + if self.util_value_data is not None: + util_data = { + "metrics_labels_and_positions": self.util_value_data.metric_labels_and_positions, + "target_labels": target_labels, + "cp_model_file_path": self.util_value_data.cp_model_file_path, + "camel_model_file_path": self.util_value_data.camel_model_file_path, + "node_candidates_file_path": self.util_value_data.node_candidates_file_path + } + + self.utility_function_MAE = UtilityFunctionMAE(**util_data) + self.mean_target_utility_function_value = MeanUtilityFunctionValue(**util_data) + + # col avg errors + self.colAvgL1Err = AvgColL1Error(target_labels) + self.colAvgRelErr = AvgColRelativeError(target_labels) + + # best worst prediction examples + self.bwPredExamples = BestWorstPredictions( + self.best_worst_metric, + keep_best=self.best_worst_examples, + keep_worst=self.best_worst_examples + ) + + def get_y(self, x: torch.Tensor) -> torch.Tensor: + preds = self(x) + if self.x_transform is not None: + preds = self.x_transform(preds) + return preds + + def validation_step(self, batch, batch_nb): + """ + predicts y, and calculates loss in training + """ + + x, y = batch + preds = self.get_y(x) + loss = self.criterion(preds, y) + + return {'loss': loss, 'preds': preds, 'target': y} + + def validation_step_end(self, outputs): + ''' + update and log validation metrics + ''' + + self.val_mae(outputs['preds'], outputs['target']) + self.log('validation_mae', self.val_mae, + on_step=False, on_epoch=True) + + def test_step(self, batch, batch_idx): + """ + predicts y, and calculates loss in testing + """ + + x, y = batch + preds = self.get_y(x) + loss = self.criterion(preds, y) + self.bwPredExamples.update_on_test_step(x, preds, y) + if self.util_value_data is not None: + self.utility_function_MAE.update(x=x, preds=preds, target=y) + self.mean_target_utility_function_value.update(x=x, configuration=y) + + return {'loss': loss, 'preds': preds, 'target': y} + + def test_step_end(self, outputs): + ''' + update and log test metrics + ''' + + self.test_mae(outputs['preds'], outputs['target']) + + self.log('test_mae', self.test_mae, on_step=False, on_epoch=True) + if self.util_value_data is not None: + self.log('test_utility_function_MAE', self.utility_function_MAE, on_step=False, on_epoch=True) + self.log('test_mean_target_utility_function_value', self.mean_target_utility_function_value, on_step=False, on_epoch=True) + + # update col avg error + self.colAvgL1Err.update_after_test_step( + outputs['preds'], outputs['target']) + self.colAvgRelErr.update_after_test_step( + outputs['preds'], outputs['target']) + + def on_test_end(self): + writer = self.logger.experiment + + # show col avg error figure + writer.add_figure("avg L1 col error", self.colAvgL1Err.get_fig) + writer.add_figure("avg rel col error", + self.colAvgRelErr.get_fig) + writer.add_text( + f"best {self.bwPredExamples.get_keeped_best} data examples", + self.bwPredExamples.get_best_examples_str + ) + + writer.add_text( + f"worst {self.bwPredExamples.get_keeped_worst} data examples", + self.bwPredExamples.get_worst_examples_str + ) + diff --git a/FCRtraining/src/models/lstm_scale/__init__.py b/experiments/src/utils/__init__.py similarity index 100% rename from FCRtraining/src/models/lstm_scale/__init__.py rename to experiments/src/utils/__init__.py diff --git a/FCRtraining/src/utils/bestWorstTestPreds.py b/experiments/src/utils/bestWorstTestPreds.py similarity index 100% rename from FCRtraining/src/utils/bestWorstTestPreds.py rename to experiments/src/utils/bestWorstTestPreds.py diff --git a/FCRtraining/src/utils/colAvgError.py b/experiments/src/utils/colAvgError.py similarity index 100% rename from FCRtraining/src/utils/colAvgError.py rename to experiments/src/utils/colAvgError.py diff --git a/FCRtraining/src/utils/colAvgRelError.py b/experiments/src/utils/colAvgRelError.py similarity index 100% rename from FCRtraining/src/utils/colAvgRelError.py rename to experiments/src/utils/colAvgRelError.py diff --git a/FCRtraining/src/utils/rarityUtility.py b/experiments/src/utils/rarityUtility.py similarity index 100% rename from FCRtraining/src/utils/rarityUtility.py rename to experiments/src/utils/rarityUtility.py diff --git a/FCRtraining/src/utils/roundToDomain.py b/experiments/src/utils/roundToDomain.py similarity index 100% rename from FCRtraining/src/utils/roundToDomain.py rename to experiments/src/utils/roundToDomain.py