From ae253dc2b0e758de0fa59f898653513745145e91 Mon Sep 17 00:00:00 2001 From: szysad Date: Mon, 29 Mar 2021 23:06:24 +0200 Subject: [PATCH 1/3] major FCRtraining refreactor --- experiments/README.md | 116 ++++++++++++ .../eu.paasage.mddb.cdo.client.properties | 18 ++ .../eu.paasage.mddb.cdo.server.properties | 29 +++ .../ASUtilityGeneratorApplication.java | 35 ++++ .../src/metrics/MeanUtilityFunctionValue.py | 66 +++++++ experiments/src/metrics/RowAccuracy.py | 29 +++ experiments/src/metrics/UtilityAccuracy.py | 36 ++++ experiments/src/metrics/UtilityFunctionMAE.py | 60 ++++++ experiments/src/metrics/__init__.py | 0 experiments/src/models/__init__.py | 0 experiments/src/models/lstm_scale/__init__.py | 0 .../src/models/lstm_scale/dataloader.py | 26 +++ experiments/src/models/lstm_scale/network.py | 93 ++++++++++ experiments/src/models/lstm_scale/train.py | 81 +++++++++ .../src/models/lstm_scale/util_data.py | 8 + experiments/src/utils/LitFCRtestBase.py | 172 ++++++++++++++++++ experiments/src/utils/__init__.py | 0 experiments/src/utils/bestWorstTestPreds.py | 145 +++++++++++++++ experiments/src/utils/colAvgError.py | 95 ++++++++++ experiments/src/utils/colAvgRelError.py | 74 ++++++++ experiments/src/utils/rarityUtility.py | 37 ++++ experiments/src/utils/roundToDomain.py | 72 ++++++++ 22 files changed, 1192 insertions(+) create mode 100644 experiments/README.md create mode 100644 experiments/src/metrics/ASUtilityGenerator/.paasage/eu.paasage.mddb.cdo.client.properties create mode 100644 experiments/src/metrics/ASUtilityGenerator/.paasage/eu.paasage.mddb.cdo.server.properties create mode 100644 experiments/src/metrics/ASUtilityGenerator/java-src/ASUtilityGeneratorApplication.java create mode 100644 experiments/src/metrics/MeanUtilityFunctionValue.py create mode 100644 experiments/src/metrics/RowAccuracy.py create mode 100644 experiments/src/metrics/UtilityAccuracy.py create mode 100644 experiments/src/metrics/UtilityFunctionMAE.py create mode 100644 experiments/src/metrics/__init__.py create mode 100644 experiments/src/models/__init__.py create mode 100644 experiments/src/models/lstm_scale/__init__.py create mode 100644 experiments/src/models/lstm_scale/dataloader.py create mode 100644 experiments/src/models/lstm_scale/network.py create mode 100644 experiments/src/models/lstm_scale/train.py create mode 100644 experiments/src/models/lstm_scale/util_data.py create mode 100644 experiments/src/utils/LitFCRtestBase.py create mode 100644 experiments/src/utils/__init__.py create mode 100644 experiments/src/utils/bestWorstTestPreds.py create mode 100644 experiments/src/utils/colAvgError.py create mode 100644 experiments/src/utils/colAvgRelError.py create mode 100644 experiments/src/utils/rarityUtility.py create mode 100644 experiments/src/utils/roundToDomain.py diff --git a/experiments/README.md b/experiments/README.md new file mode 100644 index 0000000..30bf7ab --- /dev/null +++ b/experiments/README.md @@ -0,0 +1,116 @@ +# pytorch lightning training setup for FCR app +# How to install dependencies + - make sure you have latest version of pytorch and pytorch-lightning installed + +

+ +# Contents +## FCRtraining.networks.LitFCRtestBase.BaseTestEncoder + - subclass of pytorch_lightning.LightningModule + - implements testing and training metrics logging + - use it as base of your LightningModule class + - details about LightningModule: https://pytorch-lightning.readthedocs.io/en/stable/lightning_module.html + +

+ +## Metrics logging + - after running experiment directory `lightning_logs` should appear + - you can inspect logs with `tensorboard` by running: +```sh +$ tensorboard --logdir= +``` +### Utility Function Metrics +To utilize metrics calculating the utility function value with the help of the ASUtilityGenerator (f.e. UtilityFunctionMAE), you should: +- install java and set JAVA_HOME to the jdk folder (e.g. "C:/Program Files/Java/jdk-16") +- copy catalog ".paasage" from FCRtraining/src/metrics/ASUtilityGenerator/ to your home directory +- set the path in FCRtraining/src/metrics/ASUtilityGenerator/ASUtilityGenerator.py to the .jar containing the +utility generator library and the ASUtilityGeneratorApplication from java-src + +# Example of BaseTestEncoder usage +```Python +from FCRdataLoader.fcrdataloader.dataset import FCRtrainDataSet, FCRtestDataSet +from torch.utils.data import DataLoader +from torch.optim.lr_scheduler import ReduceLROnPlateau +from .LitFCRtestBase import BaseTestEncoder +import torch.nn as nn +import torch + + +HIDDEN_SIZE = 40 +BATCH_SIZE = 32 +SEQ_LEN = 10 +HORIZON = 0 +LR = 0.01 + +FEATURES = 3 +OUTPUT = 6 + + +class Encoder(BaseTestEncoder): + def __init__( + self, + features=FEATURES, + output=OUTPUT, + learning_rate=LR, + batch_size=BATCH_SIZE, + seq_len=SEQ_LEN, + horizon=HORIZON, + hidden_size=HIDDEN_SIZE, + + ): + super(Encoder, self).__init__() + + self.seq_len = seq_len + self.horizon = horizon + self.batch_size = batch_size + + self.criterion = nn.MSELoss() + self.lr = learning_rate + + self.lstm = nn.LSTM(features, hidden_size, num_layers=2, + bidirectional=True, batch_first=True) + self.fc = nn.Linear(hidden_size * 2, output) + + def forward(self, x): + out, _ = self.lstm(x) + # out: (batch, features, hidden_size * directions) + out = out[:, -1, :] + # out: (batch, hidden_size * directions) + out = self.fc(out) + return out + + def training_step(self, batch, batch_idx): + x, y = batch + prediction = self(x) + loss = self.criterion(prediction, y) + + self.log('train_loss', loss, on_step=False, on_epoch=True) + + return loss + + def val_dataloader(self): + return self.test_dataloader() + + def train_dataloader(self): + train_data = FCRtrainDataSet(self.seq_len, self.horizon) + loader = DataLoader(train_data, batch_size=self.batch_size, + num_workers=4)#, sampler=train_data.get_weighted_rnd_sampler()) + return loader + + def test_dataloader(self): + test_data = FCRtestDataSet(self.seq_len, self.horizon) + loader = DataLoader(test_data, batch_size=self.batch_size, + num_workers=4)#, sampler=test_data.get_weighted_rnd_sampler()) + return loader + + def configure_optimizers(self): + optimizer = torch.optim.Adam(self.parameters(), lr=self.lr) + scheduler = ReduceLROnPlateau( + optimizer, 'min', patience=10, verbose=True) + return { + 'optimizer': optimizer, + 'lr_scheduler': scheduler, + 'monitor': 'train_loss' + } + +``` \ No newline at end of file diff --git a/experiments/src/metrics/ASUtilityGenerator/.paasage/eu.paasage.mddb.cdo.client.properties b/experiments/src/metrics/ASUtilityGenerator/.paasage/eu.paasage.mddb.cdo.client.properties new file mode 100644 index 0000000..5096ddf --- /dev/null +++ b/experiments/src/metrics/ASUtilityGenerator/.paasage/eu.paasage.mddb.cdo.client.properties @@ -0,0 +1,18 @@ +# +# Copyright (C) 2017 7bulls.com +# +# This Source Code Form is subject to the terms of the +# Mozilla Public License, v. 2.0. If a copy of the MPL +# was not distributed with this file, You can obtain one at +# http://mozilla.org/MPL/2.0/. +# + +#hostname where CDO Server resides +host=3.6.116.29 +#host=cdoserver +#port on which CDO Server listens +port=2036 +#the name of the CDO repository of the server +repository=repo1 +#logging to be set off or on - default is off +logging=off diff --git a/experiments/src/metrics/ASUtilityGenerator/.paasage/eu.paasage.mddb.cdo.server.properties b/experiments/src/metrics/ASUtilityGenerator/.paasage/eu.paasage.mddb.cdo.server.properties new file mode 100644 index 0000000..21ea32d --- /dev/null +++ b/experiments/src/metrics/ASUtilityGenerator/.paasage/eu.paasage.mddb.cdo.server.properties @@ -0,0 +1,29 @@ +#mysql connection properties +dbtype=mysql +dburl=jdbc:mysql://localhost:3306/repo1 +username=root +password=admin + +#hsqldb connection properties +#dbtype=hsqldb +#dburl=jdbc:hsqldb:file:cdohibernate +#username=admin +#password=admin + +#name of the repository to be created +repository=repo1 + +#type of store to be created +storetype=db + +#number of port to listen +port=2036 + +#logging should be set on or off - default is off +logging=on + +#security should be set on or off - default is off +security=off + +### logback configuration ### +logging.config=file:${MELODIC_CONFIG_DIR}/logback-conf/logback-spring.xml \ No newline at end of file diff --git a/experiments/src/metrics/ASUtilityGenerator/java-src/ASUtilityGeneratorApplication.java b/experiments/src/metrics/ASUtilityGenerator/java-src/ASUtilityGeneratorApplication.java new file mode 100644 index 0000000..cd5a8f3 --- /dev/null +++ b/experiments/src/metrics/ASUtilityGenerator/java-src/ASUtilityGeneratorApplication.java @@ -0,0 +1,35 @@ +package eu.melodic.upperware.utilitygenerator; + +import eu.melodic.cache.NodeCandidates; +import eu.melodic.cache.impl.FilecacheService; +import eu.melodic.upperware.penaltycalculator.PenaltyFunctionProperties; +import eu.melodic.upperware.utilitygenerator.cdo.cp_model.DTO.VariableValueDTO; +import eu.melodic.upperware.utilitygenerator.properties.UtilityGeneratorProperties; +import eu.paasage.upperware.security.authapi.properties.MelodicSecurityProperties; +import eu.paasage.upperware.security.authapi.token.JWTServiceImpl; + +import java.util.Collection; + +public class ASUtilityGeneratorApplication{ + private UtilityGeneratorApplication utilityGeneratorApplication; + + public ASUtilityGeneratorApplication(String camelModelFilePath, String cpModelFilePath, String NODE_CANDIDATES_FILE_PATH){ + utilityGeneratorApplication = createUtilityGeneratorApplication(camelModelFilePath, cpModelFilePath, NODE_CANDIDATES_FILE_PATH); + } + + public double evaluate(Collection solution) { + return this.utilityGeneratorApplication.evaluate(solution); + } + + private static UtilityGeneratorApplication createUtilityGeneratorApplication(String camelModelFilePath, String cpModelFilePath, String NODE_CANDIDATES_FILE_PATH) { + boolean readFromFile = true; + NodeCandidates nodeCandidates = new FilecacheService().load(NODE_CANDIDATES_FILE_PATH); + UtilityGeneratorProperties utilityGeneratorProperties = new UtilityGeneratorProperties(); + utilityGeneratorProperties.setUtilityGenerator(new UtilityGeneratorProperties.UtilityGenerator()); + utilityGeneratorProperties.getUtilityGenerator().setDlmsControllerUrl(""); + MelodicSecurityProperties melodicSecurityProperties = new MelodicSecurityProperties(); + JWTServiceImpl jWTServiceImpl = new JWTServiceImpl(melodicSecurityProperties); + PenaltyFunctionProperties penaltyFunctionProperties = new PenaltyFunctionProperties(); + return new UtilityGeneratorApplication(camelModelFilePath, cpModelFilePath, readFromFile, nodeCandidates, utilityGeneratorProperties, melodicSecurityProperties, jWTServiceImpl, penaltyFunctionProperties); + } +} diff --git a/experiments/src/metrics/MeanUtilityFunctionValue.py b/experiments/src/metrics/MeanUtilityFunctionValue.py new file mode 100644 index 0000000..13a2473 --- /dev/null +++ b/experiments/src/metrics/MeanUtilityFunctionValue.py @@ -0,0 +1,66 @@ +import torch +from pytorch_lightning.metrics import Metric +from typing import List, Dict +from experiments.src.metrics.ASUtilityGenerator.ASUtilityGenerator import ASUtilityGenerator + + +class MeanUtilityFunctionValue(Metric): + """ + Metric which uses ASUtilityGenerator to calculate utility function values + of given configurations and return the mean value. + """ + def __init__( + self, + metrics_labels_and_positions: Dict[str, int], + target_labels: List[str], + cp_model_file_path: str, + camel_model_file_path: str, + node_candidates_file_path: str, + dist_sync_on_step=False + ): + super().__init__(dist_sync_on_step=dist_sync_on_step) + + self.add_state("total_utility_value", default=torch.tensor(0.0), dist_reduce_fx="sum") + self.add_state("number_of_configurations", default=torch.tensor(0), dist_reduce_fx="sum") + + self.metrics_labels_and_positions = metrics_labels_and_positions + self.target_labels = target_labels + self.AS_utility_generator = ASUtilityGenerator(cp_model_path=cp_model_file_path, + camel_model_path=camel_model_file_path, + node_candidates_path=node_candidates_file_path, + metric_names=metrics_labels_and_positions.keys()) + + def update(self, x: torch.Tensor, configuration: torch.Tensor): + for i in range(len(configuration)): + conf = self._get_configuration(configuration[i]) + metr = self._get_metrics(x[i]) + self.total_utility_value += self.AS_utility_generator.evaluate(configuration=conf, metrics=metr) + + self.number_of_configurations += len(configuration) + + def compute(self): + + return self.total_utility_value.float() / self.number_of_configurations + + def _get_metrics(self, x: torch.Tensor) -> Dict[str, int]: + """ + Prepare a dictionary describing metrics based on the given tensor and self.metrics_labels_and_positions + :param x: input tensor to the network + :return: dictionary with pairs (metric_name: metric_value) + """ + last_row_of_x = x[-1] + metrics: Dict[str, int] = {} + for label, position in self.metrics_labels_and_positions.items(): + metrics[label] = last_row_of_x[position].item() + return metrics + + def _get_configuration(self, solve: torch.Tensor) -> Dict[str, int]: + """ + Prepare a dictionary describing the configuration based on the given tensor and self.target_labels + :param solve: tensor contatining the configuration + :return: dictionary with pairs (variable_name: variable_value) + """ + configuration: Dict[str, int] = {} + for position, label in enumerate(self.target_labels): + configuration[label] = solve[position].item() + return configuration diff --git a/experiments/src/metrics/RowAccuracy.py b/experiments/src/metrics/RowAccuracy.py new file mode 100644 index 0000000..ca37397 --- /dev/null +++ b/experiments/src/metrics/RowAccuracy.py @@ -0,0 +1,29 @@ +from pytorch_lightning.metrics import Metric +import torch + + +class RowAccuracy(Metric): + """ + Represents Accuracy of matches in dim=1 in givens tensors + + implemented as in: + https://pytorch-lightning.readthedocs.io/en/latest/metrics.html + """ + + def __init__(self, dist_sync_on_step=False): + super().__init__(dist_sync_on_step=dist_sync_on_step) + + self.cnt = 0 + self.add_state("correct", default=torch.tensor(0), dist_reduce_fx="sum") + self.add_state("total", default=torch.tensor(0), dist_reduce_fx="sum") + + def update(self, preds: torch.Tensor, target: torch.Tensor): + self.cnt += 1 + preds, target = preds, target + assert preds.shape == target.shape + self.correct += (preds == target).all(dim=1).sum() # count all row matches + self.total += target.shape[0] # add batch size + + def compute(self): + acc = self.correct.float() / self.total + return acc diff --git a/experiments/src/metrics/UtilityAccuracy.py b/experiments/src/metrics/UtilityAccuracy.py new file mode 100644 index 0000000..4a3fceb --- /dev/null +++ b/experiments/src/metrics/UtilityAccuracy.py @@ -0,0 +1,36 @@ +from pytorch_lightning.metrics import Metric +from typing import Callable, Any +import torch + +from ..utils.utility import RarityUtility + + +class AvgUtilityAccuracy(Metric): + """ + NOTE: this feature might not be possible to implement this way + + Represents average accuracy in values with same utility + + implemented as in: + https://pytorch-lightning.readthedocs.io/en/latest/metrics.html + """ + + def __init__(self, intervals=100, util_func: Callable[[Any], float] dist_sync_on_step=False): + super().__init__(dist_sync_on_step=dist_sync_on_step) + + self.util_func = util_func + self.add_state("embed", default=torch.linspace(0, 1, intervals), dist_reduce_fx=None) + self.add_state("correct", default=torch.zeros(intervals), dist_reduce_fx="sum") + self.add_state("total", default=torch.zeros(intervals), dist_reduce_fx="sum") + + def update(self, preds: torch.Tensor, target: torch.Tensor): + assert preds.shape == target.shape + + for p, t in zip(preds, torch): + embed = torch.searchsorted(self.embed, self.util_func(t)) + if (p == t).all(): + self.correct[embed] += 1 + self.total[embed] += 1 + + def compute(self): + return self.correct.float() / self.total diff --git a/experiments/src/metrics/UtilityFunctionMAE.py b/experiments/src/metrics/UtilityFunctionMAE.py new file mode 100644 index 0000000..6edd940 --- /dev/null +++ b/experiments/src/metrics/UtilityFunctionMAE.py @@ -0,0 +1,60 @@ +import torch +from pytorch_lightning.metrics import Metric +from typing import List, Dict +from experiments.src.metrics.ASUtilityGenerator.ASUtilityGenerator import ASUtilityGenerator + + +class UtilityFunctionMAE(Metric): + """ + Metric which uses ASUtilityGenerator to calculate mean absolute error between + utility function values of predicted and target configurations. + """ + def __init__(self, metrics_labels_and_positions: Dict[str, int], target_labels: List[str], cp_model_file_path: str, + camel_model_file_path: str, node_candidates_file_path: str, dist_sync_on_step=False): + super().__init__(dist_sync_on_step=dist_sync_on_step) + + self.add_state("absolute_difference_in_utility_value", default=torch.tensor(0.0), dist_reduce_fx="sum") + self.add_state("total", default=torch.tensor(0), dist_reduce_fx="sum") + + self.metrics_labels_and_positions = metrics_labels_and_positions + self.target_labels = target_labels + self.AS_utility_generator = ASUtilityGenerator(cp_model_path=cp_model_file_path, + camel_model_path=camel_model_file_path, + node_candidates_path=node_candidates_file_path, + metric_names=metrics_labels_and_positions.keys()) + + def update(self, x: torch.Tensor, preds: torch.Tensor, target: torch.Tensor): + assert preds.shape == target.shape + + for i in range(len(target)): + target_utility_value = self.AS_utility_generator.evaluate(configuration=self._get_configuration(target[i]), metrics=self._get_metrics(x[i])) + preds_utility_value = self.AS_utility_generator.evaluate(configuration=self._get_configuration(preds[i]), metrics=self._get_metrics(x[i])) + self.absolute_difference_in_utility_value += abs(target_utility_value - preds_utility_value) + + self.total += len(target) + + def compute(self): + return self.absolute_difference_in_utility_value.float() / self.total + + def _get_metrics(self, x: torch.Tensor) -> Dict[str, int]: + """ + Prepare a dictionary describing metrics based on the given tensor and self.metrics_labels_and_positions + :param x: input tensor to the network + :return: dictionary with pairs (metric_name: metric_value) + """ + last_row_of_x = x[-1] + metrics: Dict[str, int] = {} + for label, position in self.metrics_labels_and_positions.items(): + metrics[label] = last_row_of_x[position].item() + return metrics + + def _get_configuration(self, solve: torch.Tensor) -> Dict[str, int]: + """ + Prepare a dictionary describing the configuration based on the given tensor and self.target_labels + :param solve: tensor contatining the configuration + :return: dictionary with pairs (variable_name: variable_value) + """ + configuration: Dict[str, int] = {} + for position, label in enumerate(self.target_labels): + configuration[label] = solve[position].item() + return configuration diff --git a/experiments/src/metrics/__init__.py b/experiments/src/metrics/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/experiments/src/models/__init__.py b/experiments/src/models/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/experiments/src/models/lstm_scale/__init__.py b/experiments/src/models/lstm_scale/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/experiments/src/models/lstm_scale/dataloader.py b/experiments/src/models/lstm_scale/dataloader.py new file mode 100644 index 0000000..e38b4c7 --- /dev/null +++ b/experiments/src/models/lstm_scale/dataloader.py @@ -0,0 +1,26 @@ +from FCRdataLoader.src.fcrdataloader.dataset import SequenceForecastMultiDistributionDatasetFactory as SFMDD +from typing import Callable +from pathlib import Path + + +class FCRDataSetV2Factory(SFMDD): + ''' + data set factory for secure-document data version 2, localy data from set2 + ''' + + def __init__( + self, + seq_len: int, + horizon: int, + x_transforms=None + ): + super().__init__( + seq_len=seq_len, + pred_step=horizon, + x_transforms=x_transforms, + file=Path("/home/szysad/mimuw/3rok/ZPP/training-data/FCR-data/set1/combined.csv"), + usecols=[0]+list(range(2, 17)), + experiment_id_col=0, + x_y_split=3, + x_predictions_cols=[2], + ) diff --git a/experiments/src/models/lstm_scale/network.py b/experiments/src/models/lstm_scale/network.py new file mode 100644 index 0000000..bbfbfe1 --- /dev/null +++ b/experiments/src/models/lstm_scale/network.py @@ -0,0 +1,93 @@ +from torch.optim.lr_scheduler import ReduceLROnPlateau +from inspect import signature +from typing import Dict, Any, Callable, Optional, List +import torch.nn.functional as F +import torch.nn as nn +import torch + +from experiments.src.utils.LitFCRtestBase import BaseTestEncoder, UtilityData + + +class Encoder(BaseTestEncoder): + def __init__( + self, + target_labels: List[str], + util_value_data: UtilityData, + input_features: int, + output_features: int, + criterion, + init_lr: float, + batch_size: int, + seq_len: int, + horizon: int, + hidden_size: int, + lstm_layers: int, + y_scaler=None, + test_x_transform=None + ): + + if test_x_transform is None: + test_x_transform = lambda x: x + + super(Encoder, self).__init__( + util_value_data=util_value_data, + criterion=criterion, + target_labels=target_labels, + best_worst_examples=10, + best_worst_metric=nn.L1Loss(), + x_transform=test_x_transform + ) + + self.output_features = output_features + self.input_features = input_features + + self.seq_len = seq_len + self.horizon = horizon + self.batch_size = batch_size + self.lstm_layers = lstm_layers + self.hidden_size = hidden_size + self.fc2_size = hidden_size + self.y_scaler = y_scaler + + self.init_lr = init_lr + + self.relu = nn.LeakyReLU() + self.lstm = nn.LSTM(self.input_features, hidden_size, num_layers=self.lstm_layers, + bidirectional=True, batch_first=True) + self.fc1 = nn.Linear(hidden_size * 2, self.fc2_size) + self.fc2 = nn.Linear(self.fc2_size, self.output_features) + self.lstm_drop = nn.Dropout(p=0.3) + self.fc1_drop = nn.Dropout(p=0.6) + + def transform_y(self, y: torch.Tensor) -> torch.Tensor: + return torch.from_numpy(self.y_scaler.transform(y)).float() + + def forward(self, x): + out, _ = self.lstm(x) + # out: (batch, input_features, hidden_size * directions) + out = self.lstm_drop(out[:, -1, :]) + # out: (batch, hidden_size * directions) + out = self.relu(out) + out = self.fc1(out) + out = self.relu(out) + out = self.fc1_drop(out) + #out = self.fc1_drop(out) + out = self.fc2(out) + return out + + def training_step(self, batch, batch_idx): + x, y = batch + scaled_y = self.transform_y(y) + prediction = self(x) + loss = self.criterion(prediction, scaled_y) + self.log('train_loss', loss, on_step=False, on_epoch=True) + return loss + + def configure_optimizers(self): + optimizer = torch.optim.Adam(self.parameters(), lr=self.init_lr) + scheduler = ReduceLROnPlateau(optimizer, 'min', patience=5, verbose=True) + return { + 'optimizer': optimizer, + 'lr_scheduler': scheduler, + 'monitor': 'train_loss' + } diff --git a/experiments/src/models/lstm_scale/train.py b/experiments/src/models/lstm_scale/train.py new file mode 100644 index 0000000..db6b8df --- /dev/null +++ b/experiments/src/models/lstm_scale/train.py @@ -0,0 +1,81 @@ +from pytorch_lightning.loggers import TensorBoardLogger +from torch.utils.data import DataLoader +from sklearn.preprocessing import MaxAbsScaler, StandardScaler +import pytorch_lightning as pl +import torch +import torch.nn as nn + +from .network import Encoder +from .dataloader import FCRDataSetV2Factory +from experiments.src.utils.roundToDomain import FCRDomain +from .util_data import data as util_data + + + +HORIZON = 5 +SEQ_LEN = 30 +BATCH_SIZE = 256 +FEATURES_IN = 3 +FEATURES_OUT = 12 + +fcr_dom_rounder = FCRDomain() + +TARGET_LABELS = ("AppCardinality,provider_Component_App,AppCores,AppStorage," + "LBCardinality,provider_Component_LB,LBRam,LBStorage," + "DBCardinality,provider_Component_DB,DBCores,DBStorage").split(',') +EXTRA_HPARAMS = { + 'gradient_clip_val': 0, + 'max_epochs': 50 +} + +x_scaler = None +y_scaler = MaxAbsScaler() + +def x_rescale(x: torch.Tensor) -> torch.Tensor: + return fcr_dom_rounder.round(torch.from_numpy(y_scaler.inverse_transform(x)).float()) + +dataset_factory = FCRDataSetV2Factory(seq_len=SEQ_LEN, horizon=HORIZON, x_transforms=x_scaler) + +y_scaler.fit(dataset_factory.get_train_target_vals()) + +train = DataLoader( + dataset_factory.get_train_dataset(), + batch_size=BATCH_SIZE, + num_workers=4, + sampler=dataset_factory.get_uniform_dist_y_sampler() +) + +val = DataLoader( + dataset_factory.get_validation_dataset(), + batch_size=BATCH_SIZE, + num_workers=4 +) + +test = DataLoader( + dataset_factory.get_test_dataset(), + batch_size=BATCH_SIZE, + num_workers=4 +) + +model = Encoder( + init_lr=0.01, + seq_len=30, + lstm_layers=1, + criterion=nn.SmoothL1Loss(), + input_features=FEATURES_IN, + output_features=FEATURES_OUT, + horizon=HORIZON, + batch_size=256, + hidden_size=30, + y_scaler=y_scaler, + target_labels=TARGET_LABELS, + util_value_data=z, + test_x_transform=x_rescale + ) + +logger = TensorBoardLogger('', name='') +logger.experiment.add_graph(model, torch.rand(model.input_features, model.seq_len, model.input_features)) + +trainer = pl.Trainer(fast_dev_run=False, check_val_every_n_epoch=5, logger=logger, **EXTRA_HPARAMS) +trainer.fit(model, train, val) +trainer.test(model, test) diff --git a/experiments/src/models/lstm_scale/util_data.py b/experiments/src/models/lstm_scale/util_data.py new file mode 100644 index 0000000..bd3796f --- /dev/null +++ b/experiments/src/models/lstm_scale/util_data.py @@ -0,0 +1,8 @@ +from experiments.src.utils.LitFCRtestBase import UtilityData + +data = UtilityData( + metric_labels_and_positions={"AverageResponseTime": 1}, + cp_model_file_path="", + camel_model_file_path="", + node_candidates_file_path="" +) \ No newline at end of file diff --git a/experiments/src/utils/LitFCRtestBase.py b/experiments/src/utils/LitFCRtestBase.py new file mode 100644 index 0000000..9276e08 --- /dev/null +++ b/experiments/src/utils/LitFCRtestBase.py @@ -0,0 +1,172 @@ +from typing import Dict, Any, Callable, List, Optional +from dataclasses import dataclass +import pytorch_lightning as pl +import torch.nn as nn +import torch + +from .colAvgError import AvgColL1Error +from .colAvgRelError import AvgColRelativeError +from .bestWorstTestPreds import BestWorstPredictions +from experiments.src.metrics.UtilityFunctionMAE import UtilityFunctionMAE +from experiments.src.metrics.MeanUtilityFunctionValue import MeanUtilityFunctionValue +from .roundToDomain import FCRDomain + + + +@dataclass +class UtilityData: + """Basic conainer for data associated + with utiliti value + """ + + metric_labels_and_positions: Dict[str, int] + cp_model_file_path: str + camel_model_file_path: str + node_candidates_file_path: str + + +class BaseTestEncoder(pl.LightningModule): + """Base class for experimenting with FCR and Genom apps + :param criterion: loss function used to calculated test and + validation loss + :type criterion: Callable[[torch.Tensor], float] + + :param target_labels: List of names of output features used for + generating post training logs, and utility function value + :type target_labels: List[str] + + :param best_worst_examples: number of best and worst prediction examples + remembered from test set. This ammount will be logged into tensorboard + :type best_worst_examples: int + + :param best_worst_metric: function used to generate score of prediction + used for ranking best_worst_examples + :type best_worst_metric: Callable[[torch.Tensor, torch.Tensor], float] + + :param x_transform: function used to transform network prediction + only for testing and validation. + :type x_transform: Optional[Callable[[torch.Tensor]], torch.Tensor] + """ + + def __init__( + self, + criterion: Callable[[torch.Tensor], float], + target_labels: List[str], + util_value_data: Optional[UtilityData] = None, + best_worst_examples: int = 10, + best_worst_metric: Callable[[torch.Tensor, torch.Tensor], float] = nn.L1Loss(), + x_transform: Optional[Callable[[torch.Tensor], torch.Tensor]] = None + ): + super(BaseTestEncoder, self).__init__() + + self.criterion = criterion + self.best_worst_examples = best_worst_examples + self.best_worst_metric = best_worst_metric + self.util_value_data = util_value_data + self.x_transform = x_transform + + # test metrics + self.test_mae = pl.metrics.MeanAbsoluteError() + + self.val_mae = pl.metrics.MeanAbsoluteError() + + # utility function metrics based on Melodic's utility generator + if self.util_value_data is not None: + util_data = { + "metrics_labels_and_positions": self.util_value_data.metric_labels_and_positions, + "target_labels": target_labels, + "cp_model_file_path": self.util_value_data.cp_model_file_path, + "camel_model_file_path": self.util_value_data.camel_model_file_path, + "node_candidates_file_path": self.util_value_data.node_candidates_file_path + } + + self.utility_function_MAE = UtilityFunctionMAE(**util_data) + self.mean_target_utility_function_value = MeanUtilityFunctionValue(**util_data) + + # col avg errors + self.colAvgL1Err = AvgColL1Error(target_labels) + self.colAvgRelErr = AvgColRelativeError(target_labels) + + # best worst prediction examples + self.bwPredExamples = BestWorstPredictions( + self.best_worst_metric, + keep_best=self.best_worst_examples, + keep_worst=self.best_worst_examples + ) + + def get_y(self, x: torch.Tensor) -> torch.Tensor: + preds = self(x) + if self.x_transform is not None: + preds = self.x_transform(preds) + return preds + + def validation_step(self, batch, batch_nb): + """ + predicts y, and calculates loss in training + """ + + x, y = batch + preds = self.get_y(x) + loss = self.criterion(preds, y) + + return {'loss': loss, 'preds': preds, 'target': y} + + def validation_step_end(self, outputs): + ''' + update and log validation metrics + ''' + + self.val_mae(outputs['preds'], outputs['target']) + self.log('validation_mae', self.val_mae, + on_step=False, on_epoch=True) + + def test_step(self, batch, batch_idx): + """ + predicts y, and calculates loss in testing + """ + + x, y = batch + preds = self.get_y(x) + loss = self.criterion(preds, y) + self.bwPredExamples.update_on_test_step(x, preds, y) + if self.util_value_data is not None: + self.utility_function_MAE.update(x=x, preds=preds, target=y) + self.mean_target_utility_function_value.update(x=x, configuration=y) + + return {'loss': loss, 'preds': preds, 'target': y} + + def test_step_end(self, outputs): + ''' + update and log test metrics + ''' + + self.test_mae(outputs['preds'], outputs['target']) + + self.log('test_mae', self.test_mae, on_step=False, on_epoch=True) + if self.util_value_data is not None: + self.log('test_utility_function_MAE', self.utility_function_MAE, on_step=False, on_epoch=True) + self.log('test_mean_target_utility_function_value', self.mean_target_utility_function_value, on_step=False, on_epoch=True) + + # update col avg error + self.colAvgL1Err.update_after_test_step( + outputs['preds'], outputs['target']) + self.colAvgRelErr.update_after_test_step( + outputs['preds'], outputs['target']) + + def on_test_end(self): + writer = self.logger.experiment + + # show col avg error figure + writer.add_figure("avg L1 col error", self.colAvgL1Err.get_fig) + writer.add_figure("avg rel col error", + self.colAvgRelErr.get_fig) + writer.add_text( + f"best {self.bwPredExamples.get_keeped_best} data examples", + self.bwPredExamples.get_best_examples_str + ) + + writer.add_text( + f"worst {self.bwPredExamples.get_keeped_worst} data examples", + self.bwPredExamples.get_worst_examples_str + ) + diff --git a/experiments/src/utils/__init__.py b/experiments/src/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/experiments/src/utils/bestWorstTestPreds.py b/experiments/src/utils/bestWorstTestPreds.py new file mode 100644 index 0000000..922f23d --- /dev/null +++ b/experiments/src/utils/bestWorstTestPreds.py @@ -0,0 +1,145 @@ +import torch +import heapq +from typing import Callable + + +class InputTargetPred: + ''' + simple wrapper for: + input data for single network prediction + single network prediction for that data + target value for that prediction + error of target value and prediction + + also provides less then relation based + on target value & prediction error + + can be reversed to for symmetrical flip of less then relation + (for example when used in max heap) + ''' + + flipped: bool + error: float + prediction: torch.Tensor + target: torch.Tensor + + def __init__( + self, + inputdata: torch.Tensor, # of shape [in_features, seq_len] + prediciton: torch.Tensor, # of shape [out_features,] + target: torch.Tensor, # of shape [out_features,] + error: float, + flipped: bool = False, + precision: int = 3 # decimal precision for all values when printing + ): + self.inputdata = inputdata + self.prediciton = prediciton + self.target = target + self.error = error + self.flipped = flipped + self.precision = precision + + def __lt__(self, other): + lt = self.error < other.error + return lt if not self.flipped else not lt + + def __format_float(self, numb: float) -> str: + return format(numb, f'.{self.precision}f') + + def __str_tensor(self, tens: torch.Tensor) -> str: + ''' string repr of torch tensor of shape (n,) ''' + return "[" + ', '.join(map(self.__format_float, tens)) + "]" + + def __str__(self): + outstr = 'input:\n\n' + outstr += '\n\n'.join(map(lambda r: "\t\t" + self.__str_tensor(r), self.inputdata)) + outstr += '\n\n' + outstr += f'target:\n\n\t\t{self.__str_tensor(self.target)}\n\n' + outstr += f'prediciton:\n\n\t\t{self.__str_tensor(self.prediciton)}\n\n' + outstr += f'error: {self.__format_float(self.error)}\n\n' + return outstr + + +class BestWorstPredictions: + ''' + collects predictions and target values + and keeps in memory some of best and worst + predictions, which then it logs in decreasingly + sorted by given criterion order + ''' + + __best: heapq + __worst: heapq + + def __init__( + self, + criterion: Callable[[torch.Tensor, torch.Tensor], float], + keep_best=3, + keep_worst=3 + ): + self.__best = [] # min heap + self.__worst = [] # min heap + self.__criterion = criterion + self.__keep_best = keep_best + self.__keep_worst = keep_worst + + def update_on_test_step( + self, + inputdata: torch.Tensor, + prediciton: torch.Tensor, + target: torch.Tensor + ): + + for indata, pred, targ in zip(inputdata, prediciton, target): + targPred = InputTargetPred( + inputdata=indata, + prediciton=pred, + target=targ, + error=self.__criterion(pred, targ) + ) + + targPredFlipped = InputTargetPred( + inputdata=indata, + prediciton=pred, + target=targ, + error=self.__criterion(pred, targ), + flipped=True + ) + + if len(self.__best) < self.__keep_best: + heapq.heappush(self.__best, targPredFlipped) + else: + heapq.heappushpop(self.__best, targPredFlipped) + + if len(self.__worst) < self.__keep_worst: + heapq.heappush(self.__worst, targPred) + else: + heapq.heappushpop(self.__worst, targPred) + + @property + def get_best_examples_str(self) -> str: + ''' + returns summary of best prediction & target pairs + in decreasing order of prediction accuracy + ''' + + title = lambda i: "\n\n" + f"EXAMPLE {i+1}".center(20, "=") + "\n\n" + return ('\n\n\n\n').join(map(lambda i: title(i[0]) + str(i[1]), enumerate(self.__best))) + + @property + def get_worst_examples_str(self) -> str: + ''' + returns summary of worst prediction & target pairs + in increasing order of prediction accuracy + ''' + + title = lambda i: "\n\n" + f"EXAMPLE {i+1}".center(20, "=") + "\n\n" + return ('\n\n\n\n').join(map(lambda i: title(i[0]) + str(i[1]), enumerate(self.__worst))) + + @property + def get_keeped_worst(self) -> int: + return self.__keep_worst + + @property + def get_keeped_best(self) -> int: + return self.__keep_best diff --git a/experiments/src/utils/colAvgError.py b/experiments/src/utils/colAvgError.py new file mode 100644 index 0000000..a9fd7fa --- /dev/null +++ b/experiments/src/utils/colAvgError.py @@ -0,0 +1,95 @@ +from typing import List +import matplotlib.pyplot as plt +import torch + + +def autolabel(rects): + for rect in rects: + # Get X and Y placement of label from rect. + x_value = rect.get_width() + y_value = rect.get_y() + rect.get_height() / 2 + + # Number of points between bar and label. Change to your liking. + space = 5 + # Vertical alignment for positive values + ha = 'left' + + # If value of bar is negative: Place label left of bar + if x_value < 0: + # Invert space to place label to the left + space *= -1 + # Horizontally align label at right + ha = 'right' + + # Use X value as label and format number with one decimal place + label = "{:.1f}".format(x_value) + + # Create annotation + plt.annotate( + label, # Use `label` as label + (x_value, y_value), # Place label at end of the bar + xytext=(space, 0), # Horizontally shift label by `space` + textcoords="offset points", # Interpret `xytext` as offset in points + va='center', # Vertically center label + ha=ha) # Horizontally align label differently for + # positive and negative values. + + +class AvgColL1Error: + ''' + based on collected network prediction and + target values returns pyplot fig representing + average L1 error for each target variable + ''' + + __avg_errors: torch.Tensor + __min_errors: torch.Tensor + __max_errors: torch.Tensor + __labels: List[str] + __bar_width: float + + def __init__( + self, + labels: List[str], + bar_width=1 + ): + self.row_numb = 1 + self.__labels = labels + self.__avg_errors = torch.zeros(len(labels)) + self.__max_errors = torch.ones(len(labels)) * float('-inf') + self.__min_errors = torch.ones(len(labels)) * float('inf') + + # chart display conf + self.__bar_width = bar_width + + def update_after_test_step( + self, + predictions: torch.Tensor, + target: torch.Tensor + ): + + err = torch.abs(predictions - target) + for row in err: + self.__avg_errors += (row - self.__avg_errors) / self.row_numb + self.__max_errors = torch.max(self.__max_errors, row) + self.__min_errors = torch.min(self.__min_errors, row) + self.row_numb += 1 + + @property + def get_fig(self): + w = self.__bar_width + n_labels = len(self.__labels) + x = torch.arange(n_labels) * 4 * max(1, w) + fig, ax = plt.subplots(figsize=(n_labels, n_labels)) + b1 = ax.barh(x-w, self.__max_errors, w, label='max') + b2 = ax.barh(x, self.__avg_errors, w, label='avg') + b3 = ax.barh(x+w, self.__min_errors, w, label='min') + ax.set_yticks(x) + ax.set_yticklabels(self.__labels) + ax.legend() + + autolabel(b1) + autolabel(b2) + autolabel(b3) + + return fig diff --git a/experiments/src/utils/colAvgRelError.py b/experiments/src/utils/colAvgRelError.py new file mode 100644 index 0000000..e756601 --- /dev/null +++ b/experiments/src/utils/colAvgRelError.py @@ -0,0 +1,74 @@ +from typing import List +import matplotlib.pyplot as plt +import torch + +from .colAvgError import autolabel + + +class AvgColRelativeError: + ''' + based on collected network prediction and + target values returns pyplot fig representing + relative error for each target variable + ''' + + __avg_errors: torch.Tensor + __min_errors: torch.Tensor + __max_errors: torch.Tensor + __labels: List[str] + + def __init__(self, + labels: List[str], + bar_width = 1 + ): + self.row_numb = 1 + self.__labels = labels + self.__avg_errors = torch.zeros(len(labels)) + self.__max_errors = torch.ones(len(labels)) * float('-inf') + self.__min_errors = torch.ones(len(labels)) * float('inf') + + # chart display conf + self.__bar_width = bar_width + + # for torch reasons + self.__one = torch.ones(1, dtype=torch.float32) + + def err( + self, + prediction: torch.Tensor, + target: torch.Tensor + ): + return torch.abs(prediction - target) / torch.where(target == 0, self.__one, target) + + + def update_after_test_step( + self, + predictions: torch.Tensor, + target: torch.Tensor + ): + + err = self.err(predictions, target) + for row in err: + self.__avg_errors += (row - self.__avg_errors) / self.row_numb + self.__min_errors = torch.min(self.__min_errors, row) + self.__max_errors = torch.max(self.__max_errors, row) + self.row_numb += 1 + + @property + def get_fig(self): + w = self.__bar_width + n_labels = len(self.__labels) + x = torch.arange(n_labels) * 4 * max(1, w) + fig, ax = plt.subplots(figsize=(n_labels, n_labels)) + b1 = ax.barh(x-w, self.__max_errors, w, label='max') + b2 = ax.barh(x, self.__avg_errors, w, label='avg') + b3 = ax.barh(x+w, self.__min_errors, w, label='min') + ax.set_yticks(x) + ax.set_yticklabels(self.__labels) + ax.legend() + + autolabel(b1) + autolabel(b2) + autolabel(b3) + + return fig diff --git a/experiments/src/utils/rarityUtility.py b/experiments/src/utils/rarityUtility.py new file mode 100644 index 0000000..8f05f33 --- /dev/null +++ b/experiments/src/utils/rarityUtility.py @@ -0,0 +1,37 @@ +from typing import Dict, Hashable, Iterator +from collections import Counter +import torch + + +class RarityUtility: + ''' + returns utility (value from range [0, 1]) + of element based on its rarity + ''' + + __occurs: Dict[Hashable, int] + + def __init__(self, data: Iterator[Hashable] = []): + self.__occurs = {} + self.__min_occurs = 0 + self.fit_new(data) + + + def fit_new(self, data: Iterator[Hashable]): + for _el in data: + el = tuple(v.item() for v in _el) + if el in self.__occurs: + self.__occurs[el] += 1 + else: + self.__occurs[el] = 1 + self.__min_occurs = min(self.__occurs.values()) + + def __call__(self, elem) -> float: + ''' + returns utility value of + given element based on + given data + ''' + + el = tuple(v.item() for v in elem) + return self.__min_occurs / self.__occurs[el] diff --git a/experiments/src/utils/roundToDomain.py b/experiments/src/utils/roundToDomain.py new file mode 100644 index 0000000..3dc66dd --- /dev/null +++ b/experiments/src/utils/roundToDomain.py @@ -0,0 +1,72 @@ +from typing import List +import torch +import numpy as np + +class RoundToDomain: + """ + Rounds output to nearest value in domain. + Domain format: list of domains of variables. + Domain of each variable: nonempty sorted list of possible values. + Order of variable domains in domain must match order of variables in model's output. + + >>> x = torch.rand(batch_size, output_size) + >>> x_rounded = rounder.round(x) + """ + + def __init__(self, domain: List[List[int]]): + self.domain = [np.array(x) for x in domain] + + def round(self, x: torch.Tensor) -> torch.Tensor: + """ + Returns x rounded to domain. + x must be of shape (batch_size, number_of_variables). + """ + res = np.zeros_like(x, dtype=int) + for i in range(len(self.domain)): + res[:, i] = np.minimum(np.searchsorted(self.domain[i], x[:, i]), len(self.domain[i]) - 1) + res[:, i] = np.where(np.logical_and(res[:, i] > 0, + np.logical_or(res[:, i] == len(self.domain[i]), + np.fabs(x[:, i] - self.domain[i][res[:, i] - 1]) < + np.fabs(x[:, i] - self.domain[i][res[:, i]]))), + self.domain[i][res[:, i] - 1], self.domain[i][res[:, i]]) + return torch.tensor(res).float() + +class FCRDomain(RoundToDomain): + """ + Rounds output to nearest value in FCR domain. + Order of variables: + AppCardinality, + provider_Component_App, + AppCores, + AppStorage, + LBCardinality, + provider_Component_LB, + LBRam, + LBStorage, + DBCardinality, + provider_Component_DB, + DBCores, + DBStorage + How to use: + + >>> rounder = FCRDomain() + >>> x = torch.rand(batch_size, 12) + >>> x_rounded = rounder.round(x) + """ + def __init__(self): + super().__init__([ + range(1, 31), # AppCardinality + [0], # provider_Component_App + [2, 4, 8, 16, 32, 36, 40, 48], # AppCores + [0, 10, 420, 1024, 2097152], # AppStorage + range(1, 2), # LBCardinality + [0], # provider_Component_LB + [1024, 1740, 2048, 3750, 3840, 4096, 7168, 7680, + 8192, 15360, 15616, 16384, 17510, 22528, 23552, + 30720, 31232, 32768, 35020, 62464, 70041], # LBRam + [0, 10, 420], # LBStorage + range(1, 2), # DBCardinality + [0], # provider_Component_DB + [1, 2, 4, 8], # DBCores + [0, 10, 420] # DBStorage + ]) -- GitLab From be2011c3a9d9fdeda8173a3812404bf15bd797c4 Mon Sep 17 00:00:00 2001 From: szysad Date: Tue, 30 Mar 2021 10:48:52 +0200 Subject: [PATCH 2/3] changed util value logging property to off, still loggs tho --- .../.paasage/eu.paasage.mddb.cdo.server.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/experiments/src/metrics/ASUtilityGenerator/.paasage/eu.paasage.mddb.cdo.server.properties b/experiments/src/metrics/ASUtilityGenerator/.paasage/eu.paasage.mddb.cdo.server.properties index 21ea32d..2ae04d7 100644 --- a/experiments/src/metrics/ASUtilityGenerator/.paasage/eu.paasage.mddb.cdo.server.properties +++ b/experiments/src/metrics/ASUtilityGenerator/.paasage/eu.paasage.mddb.cdo.server.properties @@ -20,7 +20,7 @@ storetype=db port=2036 #logging should be set on or off - default is off -logging=on +logging=off #security should be set on or off - default is off security=off -- GitLab From 33b4c32bbe19c7db42dbb4a41052942e0ccee5d2 Mon Sep 17 00:00:00 2001 From: szysad Date: Tue, 30 Mar 2021 10:50:28 +0200 Subject: [PATCH 3/3] renamed FCRtraining root dir to experiments --- FCRtraining/README.md | 116 ---------- FCRtraining/requirements.txt | 48 ---- .../eu.paasage.mddb.cdo.client.properties | 18 -- .../eu.paasage.mddb.cdo.server.properties | 29 --- .../ASUtilityGenerator/ASUtilityGenerator.py | 92 -------- .../ASUtilityGeneratorApplication.java | 35 --- .../src/metrics/MeanUtilityFunctionValue.py | 59 ----- FCRtraining/src/metrics/RowAccuracy.py | 29 --- FCRtraining/src/metrics/UtilityAccuracy.py | 36 --- FCRtraining/src/metrics/UtilityFunctionMAE.py | 60 ----- FCRtraining/src/metrics/__init__.py | 0 FCRtraining/src/models/__init__.py | 0 FCRtraining/src/models/lstm_net/__init__.py | 0 FCRtraining/src/models/lstm_net/dataloader.py | 29 --- FCRtraining/src/models/lstm_net/network.py | 107 --------- FCRtraining/src/models/lstm_net/train.py | 62 ------ .../src/models/lstm_scale/LitFCRtestBase.py | 207 ----------------- FCRtraining/src/models/lstm_scale/__init__.py | 0 .../src/models/lstm_scale/dataloader.py | 26 --- FCRtraining/src/models/lstm_scale/network.py | 120 ---------- FCRtraining/src/models/lstm_scale/train.py | 77 ------- FCRtraining/src/utils/LitFCRtestBase.py | 210 ------------------ FCRtraining/src/utils/__init__.py | 0 FCRtraining/src/utils/bestWorstTestPreds.py | 145 ------------ FCRtraining/src/utils/colAvgError.py | 95 -------- FCRtraining/src/utils/colAvgRelError.py | 74 ------ FCRtraining/src/utils/rarityUtility.py | 37 --- FCRtraining/src/utils/roundToDomain.py | 72 ------ 28 files changed, 1783 deletions(-) delete mode 100644 FCRtraining/README.md delete mode 100644 FCRtraining/requirements.txt delete mode 100644 FCRtraining/src/metrics/ASUtilityGenerator/.paasage/eu.paasage.mddb.cdo.client.properties delete mode 100644 FCRtraining/src/metrics/ASUtilityGenerator/.paasage/eu.paasage.mddb.cdo.server.properties delete mode 100644 FCRtraining/src/metrics/ASUtilityGenerator/ASUtilityGenerator.py delete mode 100644 FCRtraining/src/metrics/ASUtilityGenerator/java-src/ASUtilityGeneratorApplication.java delete mode 100644 FCRtraining/src/metrics/MeanUtilityFunctionValue.py delete mode 100644 FCRtraining/src/metrics/RowAccuracy.py delete mode 100644 FCRtraining/src/metrics/UtilityAccuracy.py delete mode 100644 FCRtraining/src/metrics/UtilityFunctionMAE.py delete mode 100644 FCRtraining/src/metrics/__init__.py delete mode 100644 FCRtraining/src/models/__init__.py delete mode 100644 FCRtraining/src/models/lstm_net/__init__.py delete mode 100644 FCRtraining/src/models/lstm_net/dataloader.py delete mode 100644 FCRtraining/src/models/lstm_net/network.py delete mode 100644 FCRtraining/src/models/lstm_net/train.py delete mode 100644 FCRtraining/src/models/lstm_scale/LitFCRtestBase.py delete mode 100644 FCRtraining/src/models/lstm_scale/__init__.py delete mode 100644 FCRtraining/src/models/lstm_scale/dataloader.py delete mode 100644 FCRtraining/src/models/lstm_scale/network.py delete mode 100644 FCRtraining/src/models/lstm_scale/train.py delete mode 100644 FCRtraining/src/utils/LitFCRtestBase.py delete mode 100644 FCRtraining/src/utils/__init__.py delete mode 100644 FCRtraining/src/utils/bestWorstTestPreds.py delete mode 100644 FCRtraining/src/utils/colAvgError.py delete mode 100644 FCRtraining/src/utils/colAvgRelError.py delete mode 100644 FCRtraining/src/utils/rarityUtility.py delete mode 100644 FCRtraining/src/utils/roundToDomain.py diff --git a/FCRtraining/README.md b/FCRtraining/README.md deleted file mode 100644 index 30bf7ab..0000000 --- a/FCRtraining/README.md +++ /dev/null @@ -1,116 +0,0 @@ -# pytorch lightning training setup for FCR app -# How to install dependencies - - make sure you have latest version of pytorch and pytorch-lightning installed - -

- -# Contents -## FCRtraining.networks.LitFCRtestBase.BaseTestEncoder - - subclass of pytorch_lightning.LightningModule - - implements testing and training metrics logging - - use it as base of your LightningModule class - - details about LightningModule: https://pytorch-lightning.readthedocs.io/en/stable/lightning_module.html - -

- -## Metrics logging - - after running experiment directory `lightning_logs` should appear - - you can inspect logs with `tensorboard` by running: -```sh -$ tensorboard --logdir= -``` -### Utility Function Metrics -To utilize metrics calculating the utility function value with the help of the ASUtilityGenerator (f.e. UtilityFunctionMAE), you should: -- install java and set JAVA_HOME to the jdk folder (e.g. "C:/Program Files/Java/jdk-16") -- copy catalog ".paasage" from FCRtraining/src/metrics/ASUtilityGenerator/ to your home directory -- set the path in FCRtraining/src/metrics/ASUtilityGenerator/ASUtilityGenerator.py to the .jar containing the -utility generator library and the ASUtilityGeneratorApplication from java-src - -# Example of BaseTestEncoder usage -```Python -from FCRdataLoader.fcrdataloader.dataset import FCRtrainDataSet, FCRtestDataSet -from torch.utils.data import DataLoader -from torch.optim.lr_scheduler import ReduceLROnPlateau -from .LitFCRtestBase import BaseTestEncoder -import torch.nn as nn -import torch - - -HIDDEN_SIZE = 40 -BATCH_SIZE = 32 -SEQ_LEN = 10 -HORIZON = 0 -LR = 0.01 - -FEATURES = 3 -OUTPUT = 6 - - -class Encoder(BaseTestEncoder): - def __init__( - self, - features=FEATURES, - output=OUTPUT, - learning_rate=LR, - batch_size=BATCH_SIZE, - seq_len=SEQ_LEN, - horizon=HORIZON, - hidden_size=HIDDEN_SIZE, - - ): - super(Encoder, self).__init__() - - self.seq_len = seq_len - self.horizon = horizon - self.batch_size = batch_size - - self.criterion = nn.MSELoss() - self.lr = learning_rate - - self.lstm = nn.LSTM(features, hidden_size, num_layers=2, - bidirectional=True, batch_first=True) - self.fc = nn.Linear(hidden_size * 2, output) - - def forward(self, x): - out, _ = self.lstm(x) - # out: (batch, features, hidden_size * directions) - out = out[:, -1, :] - # out: (batch, hidden_size * directions) - out = self.fc(out) - return out - - def training_step(self, batch, batch_idx): - x, y = batch - prediction = self(x) - loss = self.criterion(prediction, y) - - self.log('train_loss', loss, on_step=False, on_epoch=True) - - return loss - - def val_dataloader(self): - return self.test_dataloader() - - def train_dataloader(self): - train_data = FCRtrainDataSet(self.seq_len, self.horizon) - loader = DataLoader(train_data, batch_size=self.batch_size, - num_workers=4)#, sampler=train_data.get_weighted_rnd_sampler()) - return loader - - def test_dataloader(self): - test_data = FCRtestDataSet(self.seq_len, self.horizon) - loader = DataLoader(test_data, batch_size=self.batch_size, - num_workers=4)#, sampler=test_data.get_weighted_rnd_sampler()) - return loader - - def configure_optimizers(self): - optimizer = torch.optim.Adam(self.parameters(), lr=self.lr) - scheduler = ReduceLROnPlateau( - optimizer, 'min', patience=10, verbose=True) - return { - 'optimizer': optimizer, - 'lr_scheduler': scheduler, - 'monitor': 'train_loss' - } - -``` \ No newline at end of file diff --git a/FCRtraining/requirements.txt b/FCRtraining/requirements.txt deleted file mode 100644 index 99b31b9..0000000 --- a/FCRtraining/requirements.txt +++ /dev/null @@ -1,48 +0,0 @@ -absl-py~=0.12.0 -aiohttp~=3.7.4.post0 -async-timeout~=3.0.1 -attrs~=20.3.0 -cachetools~=4.2.1 -certifi~=2020.12.5 -chardet~=4.0.0 -cycler~=0.10.0 -Cython~=0.29.22 -fsspec~=0.8.7 -future~=0.18.2 -google-auth~=1.28.0 -google-auth-oauthlib~=0.4.3 -grpcio~=1.36.1 -idna~=2.10 -joblib~=1.0.1 -kiwisolver~=1.3.1 -lxml~=4.6.3 -Markdown~=3.3.4 -matplotlib~=3.3.4 -multidict~=5.1.0 -numpy~=1.20.1 -oauthlib~=3.1.0 -Pillow~=8.1.2 -protobuf~=3.15.6 -pyasn1~=0.4.8 -pyasn1-modules~=0.2.8 -pyjnius~=1.3.0 -pyparsing~=2.4.7 -python-dateutil~=2.8.1 -pytorch-lightning~=1.2.4 -PyYAML~=5.3.1 -requests~=2.25.1 -requests-oauthlib~=1.3.0 -rsa~=4.7.2 -scikit-learn~=0.24.1 -scipy~=1.6.1 -six~=1.15.0 -sklearn~=0.0 -tensorboard~=2.4.1 -tensorboard-plugin-wit~=1.8.0 -threadpoolctl~=2.1.0 -torch~=1.8.0 -tqdm~=4.59.0 -typing-extensions~=3.7.4.3 -urllib3~=1.26.4 -Werkzeug~=1.0.1 -yarl~=1.6.3 diff --git a/FCRtraining/src/metrics/ASUtilityGenerator/.paasage/eu.paasage.mddb.cdo.client.properties b/FCRtraining/src/metrics/ASUtilityGenerator/.paasage/eu.paasage.mddb.cdo.client.properties deleted file mode 100644 index 5096ddf..0000000 --- a/FCRtraining/src/metrics/ASUtilityGenerator/.paasage/eu.paasage.mddb.cdo.client.properties +++ /dev/null @@ -1,18 +0,0 @@ -# -# Copyright (C) 2017 7bulls.com -# -# This Source Code Form is subject to the terms of the -# Mozilla Public License, v. 2.0. If a copy of the MPL -# was not distributed with this file, You can obtain one at -# http://mozilla.org/MPL/2.0/. -# - -#hostname where CDO Server resides -host=3.6.116.29 -#host=cdoserver -#port on which CDO Server listens -port=2036 -#the name of the CDO repository of the server -repository=repo1 -#logging to be set off or on - default is off -logging=off diff --git a/FCRtraining/src/metrics/ASUtilityGenerator/.paasage/eu.paasage.mddb.cdo.server.properties b/FCRtraining/src/metrics/ASUtilityGenerator/.paasage/eu.paasage.mddb.cdo.server.properties deleted file mode 100644 index 21ea32d..0000000 --- a/FCRtraining/src/metrics/ASUtilityGenerator/.paasage/eu.paasage.mddb.cdo.server.properties +++ /dev/null @@ -1,29 +0,0 @@ -#mysql connection properties -dbtype=mysql -dburl=jdbc:mysql://localhost:3306/repo1 -username=root -password=admin - -#hsqldb connection properties -#dbtype=hsqldb -#dburl=jdbc:hsqldb:file:cdohibernate -#username=admin -#password=admin - -#name of the repository to be created -repository=repo1 - -#type of store to be created -storetype=db - -#number of port to listen -port=2036 - -#logging should be set on or off - default is off -logging=on - -#security should be set on or off - default is off -security=off - -### logback configuration ### -logging.config=file:${MELODIC_CONFIG_DIR}/logback-conf/logback-spring.xml \ No newline at end of file diff --git a/FCRtraining/src/metrics/ASUtilityGenerator/ASUtilityGenerator.py b/FCRtraining/src/metrics/ASUtilityGenerator/ASUtilityGenerator.py deleted file mode 100644 index 8437861..0000000 --- a/FCRtraining/src/metrics/ASUtilityGenerator/ASUtilityGenerator.py +++ /dev/null @@ -1,92 +0,0 @@ -import jnius_config -# the jar should contain the (upperware) utility generator library and the ASUtilityGeneratorApplication from java-src (with dependencies) -jnius_config.set_classpath("") - -from jnius import autoclass -from jnius import JavaClass, MetaJavaClass -from pathlib import Path -from tempfile import NamedTemporaryFile -from typing import List, Dict -from xml.etree.ElementTree import ElementTree, Element -from lxml import etree -from os import unlink - -ASUtilityGeneratorApplication = autoclass("eu.melodic.upperware.utilitygenerator.ASUtilityGeneratorApplication") -IntVariableValueDTO = autoclass("eu.melodic.upperware.utilitygenerator.cdo.cp_model.DTO.IntVariableValueDTO") -ArrayList = autoclass("java.util.ArrayList") - -class ASUtilityGenerator: - """ - Class used for utilizing the "utility generator" java library to calculate the utility function value - for given constraint problem and camel model. - - Requires java library with utility generator (and its dependencies) and ASUtilityGeneratorApplication from java-src. - The .jar containing the library should be included in the classpath declared at the beginning of this file. - """ - _cp_model: ElementTree - _xml_fillings: Dict[str, Element] - - _cp_model_path: str - _camel_model_path: str - _node_candidates_path: str - - def __init__(self, - cp_model_path: str, - camel_model_path: str, - node_candidates_path: str, - metric_names: List[str]): - """ - :param cp_model_path: path to the file containing the constraint problem model - :param camel_model_path: path to the file containing the camel model - :param node_candidates_path: path to the file containing node candidates - :param metric_names: names of the metrics that might be changed (exactly as they appear in the cp model file) - """ - - self._cp_model_path = Path(cp_model_path) - self._cp_model = etree.parse(str(Path(self._cp_model_path))) - self._camel_model_path = camel_model_path - self._node_candidates_path = node_candidates_path - - self._xml_fillings = {} - for name in metric_names: - self._xml_fillings[name] = self._cp_model.find( - f"cpMetrics[@id='{name}']")[0] - - def _add_metrics(self, filename: str, metrics: Dict[str, int]): - """ - Adds metrics to the constraint problem model. - :param filename: name of the file containing the constraint problem model - :param metrics: dictionary with pairs (arg_name, arg_value) describing the metrics to be added. If metrics are - empty, then no value wil be changed - """ - for arg_name, arg_value in metrics.items(): - arg_loc = self._xml_fillings[arg_name] - arg_loc.set('value', str(arg_value)) - - self._cp_model.write(filename, - xml_declaration=True, - encoding="ASCII") - - def evaluate(self, configuration: Dict[str, int], metrics: Dict[str, int]) -> float: - """ - Creates java objects based on the parameters and a tmeporary file with an updated contraint problem model. - Then it calculates the utility function value using the java's ASUtilityGeneratorApplication. - :param configuration: dictionary with pairs (arg_name, arg_value) describing the configuration - :param metrics: dictionary with pairs (arg_name, arg_value) describing the metrics - :return: the utility function value for the given parameters - """ - tempfile = NamedTemporaryFile(delete=False) - - self._add_metrics(filename = tempfile.name, metrics=metrics) - variable_list = ArrayList() - for (name, value) in configuration.items(): - variable_list.add(IntVariableValueDTO(name, round(value))) - - utility_generator = ASUtilityGeneratorApplication(self._camel_model_path, tempfile.name, - self._node_candidates_path) - utility_value = utility_generator.evaluate(variable_list) - - tempfile.close() - unlink(tempfile.name) - - return utility_value diff --git a/FCRtraining/src/metrics/ASUtilityGenerator/java-src/ASUtilityGeneratorApplication.java b/FCRtraining/src/metrics/ASUtilityGenerator/java-src/ASUtilityGeneratorApplication.java deleted file mode 100644 index cd5a8f3..0000000 --- a/FCRtraining/src/metrics/ASUtilityGenerator/java-src/ASUtilityGeneratorApplication.java +++ /dev/null @@ -1,35 +0,0 @@ -package eu.melodic.upperware.utilitygenerator; - -import eu.melodic.cache.NodeCandidates; -import eu.melodic.cache.impl.FilecacheService; -import eu.melodic.upperware.penaltycalculator.PenaltyFunctionProperties; -import eu.melodic.upperware.utilitygenerator.cdo.cp_model.DTO.VariableValueDTO; -import eu.melodic.upperware.utilitygenerator.properties.UtilityGeneratorProperties; -import eu.paasage.upperware.security.authapi.properties.MelodicSecurityProperties; -import eu.paasage.upperware.security.authapi.token.JWTServiceImpl; - -import java.util.Collection; - -public class ASUtilityGeneratorApplication{ - private UtilityGeneratorApplication utilityGeneratorApplication; - - public ASUtilityGeneratorApplication(String camelModelFilePath, String cpModelFilePath, String NODE_CANDIDATES_FILE_PATH){ - utilityGeneratorApplication = createUtilityGeneratorApplication(camelModelFilePath, cpModelFilePath, NODE_CANDIDATES_FILE_PATH); - } - - public double evaluate(Collection solution) { - return this.utilityGeneratorApplication.evaluate(solution); - } - - private static UtilityGeneratorApplication createUtilityGeneratorApplication(String camelModelFilePath, String cpModelFilePath, String NODE_CANDIDATES_FILE_PATH) { - boolean readFromFile = true; - NodeCandidates nodeCandidates = new FilecacheService().load(NODE_CANDIDATES_FILE_PATH); - UtilityGeneratorProperties utilityGeneratorProperties = new UtilityGeneratorProperties(); - utilityGeneratorProperties.setUtilityGenerator(new UtilityGeneratorProperties.UtilityGenerator()); - utilityGeneratorProperties.getUtilityGenerator().setDlmsControllerUrl(""); - MelodicSecurityProperties melodicSecurityProperties = new MelodicSecurityProperties(); - JWTServiceImpl jWTServiceImpl = new JWTServiceImpl(melodicSecurityProperties); - PenaltyFunctionProperties penaltyFunctionProperties = new PenaltyFunctionProperties(); - return new UtilityGeneratorApplication(camelModelFilePath, cpModelFilePath, readFromFile, nodeCandidates, utilityGeneratorProperties, melodicSecurityProperties, jWTServiceImpl, penaltyFunctionProperties); - } -} diff --git a/FCRtraining/src/metrics/MeanUtilityFunctionValue.py b/FCRtraining/src/metrics/MeanUtilityFunctionValue.py deleted file mode 100644 index 1619bea..0000000 --- a/FCRtraining/src/metrics/MeanUtilityFunctionValue.py +++ /dev/null @@ -1,59 +0,0 @@ -import torch -from pytorch_lightning.metrics import Metric -from typing import List, Dict -from FCRtraining.src.metrics.ASUtilityGenerator.ASUtilityGenerator import ASUtilityGenerator - - -class MeanUtilityFunctionValue(Metric): - """ - Metric which uses ASUtilityGenerator to calculate utility function values - of given configurations and return the mean value. - """ - def __init__(self, metrics_labels_and_positions: Dict[str, int], target_labels: List[str], cp_model_file_path: str, - camel_model_file_path: str, node_candidates_file_path: str, dist_sync_on_step=False): - super().__init__(dist_sync_on_step=dist_sync_on_step) - - self.add_state("total_utility_value", default=torch.tensor(0.0), dist_reduce_fx="sum") - self.add_state("number_of_configurations", default=torch.tensor(0), dist_reduce_fx="sum") - - self.metrics_labels_and_positions = metrics_labels_and_positions - self.target_labels = target_labels - self.AS_utility_generator = ASUtilityGenerator(cp_model_path=cp_model_file_path, - camel_model_path=camel_model_file_path, - node_candidates_path=node_candidates_file_path, - metric_names=metrics_labels_and_positions.keys()) - - def update(self, x: torch.Tensor, configuration: torch.Tensor): - for i in range(len(configuration)): - conf = self._get_configuration(configuration[i]) - metr = self._get_metrics(x[i]) - self.total_utility_value += self.AS_utility_generator.evaluate(configuration=conf, metrics=metr) - - self.number_of_configurations += len(configuration) - - def compute(self): - - return self.total_utility_value.float() / self.number_of_configurations - - def _get_metrics(self, x: torch.Tensor) -> Dict[str, int]: - """ - Prepare a dictionary describing metrics based on the given tensor and self.metrics_labels_and_positions - :param x: input tensor to the network - :return: dictionary with pairs (metric_name: metric_value) - """ - last_row_of_x = x[-1] - metrics: Dict[str, int] = {} - for label, position in self.metrics_labels_and_positions.items(): - metrics[label] = last_row_of_x[position].item() - return metrics - - def _get_configuration(self, solve: torch.Tensor) -> Dict[str, int]: - """ - Prepare a dictionary describing the configuration based on the given tensor and self.target_labels - :param solve: tensor contatining the configuration - :return: dictionary with pairs (variable_name: variable_value) - """ - configuration: Dict[str, int] = {} - for position, label in enumerate(self.target_labels): - configuration[label] = solve[position].item() - return configuration diff --git a/FCRtraining/src/metrics/RowAccuracy.py b/FCRtraining/src/metrics/RowAccuracy.py deleted file mode 100644 index ca37397..0000000 --- a/FCRtraining/src/metrics/RowAccuracy.py +++ /dev/null @@ -1,29 +0,0 @@ -from pytorch_lightning.metrics import Metric -import torch - - -class RowAccuracy(Metric): - """ - Represents Accuracy of matches in dim=1 in givens tensors - - implemented as in: - https://pytorch-lightning.readthedocs.io/en/latest/metrics.html - """ - - def __init__(self, dist_sync_on_step=False): - super().__init__(dist_sync_on_step=dist_sync_on_step) - - self.cnt = 0 - self.add_state("correct", default=torch.tensor(0), dist_reduce_fx="sum") - self.add_state("total", default=torch.tensor(0), dist_reduce_fx="sum") - - def update(self, preds: torch.Tensor, target: torch.Tensor): - self.cnt += 1 - preds, target = preds, target - assert preds.shape == target.shape - self.correct += (preds == target).all(dim=1).sum() # count all row matches - self.total += target.shape[0] # add batch size - - def compute(self): - acc = self.correct.float() / self.total - return acc diff --git a/FCRtraining/src/metrics/UtilityAccuracy.py b/FCRtraining/src/metrics/UtilityAccuracy.py deleted file mode 100644 index 4a3fceb..0000000 --- a/FCRtraining/src/metrics/UtilityAccuracy.py +++ /dev/null @@ -1,36 +0,0 @@ -from pytorch_lightning.metrics import Metric -from typing import Callable, Any -import torch - -from ..utils.utility import RarityUtility - - -class AvgUtilityAccuracy(Metric): - """ - NOTE: this feature might not be possible to implement this way - - Represents average accuracy in values with same utility - - implemented as in: - https://pytorch-lightning.readthedocs.io/en/latest/metrics.html - """ - - def __init__(self, intervals=100, util_func: Callable[[Any], float] dist_sync_on_step=False): - super().__init__(dist_sync_on_step=dist_sync_on_step) - - self.util_func = util_func - self.add_state("embed", default=torch.linspace(0, 1, intervals), dist_reduce_fx=None) - self.add_state("correct", default=torch.zeros(intervals), dist_reduce_fx="sum") - self.add_state("total", default=torch.zeros(intervals), dist_reduce_fx="sum") - - def update(self, preds: torch.Tensor, target: torch.Tensor): - assert preds.shape == target.shape - - for p, t in zip(preds, torch): - embed = torch.searchsorted(self.embed, self.util_func(t)) - if (p == t).all(): - self.correct[embed] += 1 - self.total[embed] += 1 - - def compute(self): - return self.correct.float() / self.total diff --git a/FCRtraining/src/metrics/UtilityFunctionMAE.py b/FCRtraining/src/metrics/UtilityFunctionMAE.py deleted file mode 100644 index 005cc96..0000000 --- a/FCRtraining/src/metrics/UtilityFunctionMAE.py +++ /dev/null @@ -1,60 +0,0 @@ -import torch -from pytorch_lightning.metrics import Metric -from typing import List, Dict -from FCRtraining.src.metrics.ASUtilityGenerator.ASUtilityGenerator import ASUtilityGenerator - - -class UtilityFunctionMAE(Metric): - """ - Metric which uses ASUtilityGenerator to calculate mean absolute error between - utility function values of predicted and target configurations. - """ - def __init__(self, metrics_labels_and_positions: Dict[str, int], target_labels: List[str], cp_model_file_path: str, - camel_model_file_path: str, node_candidates_file_path: str, dist_sync_on_step=False): - super().__init__(dist_sync_on_step=dist_sync_on_step) - - self.add_state("absolute_difference_in_utility_value", default=torch.tensor(0.0), dist_reduce_fx="sum") - self.add_state("total", default=torch.tensor(0), dist_reduce_fx="sum") - - self.metrics_labels_and_positions = metrics_labels_and_positions - self.target_labels = target_labels - self.AS_utility_generator = ASUtilityGenerator(cp_model_path=cp_model_file_path, - camel_model_path=camel_model_file_path, - node_candidates_path=node_candidates_file_path, - metric_names=metrics_labels_and_positions.keys()) - - def update(self, x: torch.Tensor, preds: torch.Tensor, target: torch.Tensor): - assert preds.shape == target.shape - - for i in range(len(target)): - target_utility_value = self.AS_utility_generator.evaluate(configuration=self._get_configuration(target[i]), metrics=self._get_metrics(x[i])) - preds_utility_value = self.AS_utility_generator.evaluate(configuration=self._get_configuration(preds[i]), metrics=self._get_metrics(x[i])) - self.absolute_difference_in_utility_value += abs(target_utility_value - preds_utility_value) - - self.total += len(target) - - def compute(self): - return self.absolute_difference_in_utility_value.float() / self.total - - def _get_metrics(self, x: torch.Tensor) -> Dict[str, int]: - """ - Prepare a dictionary describing metrics based on the given tensor and self.metrics_labels_and_positions - :param x: input tensor to the network - :return: dictionary with pairs (metric_name: metric_value) - """ - last_row_of_x = x[-1] - metrics: Dict[str, int] = {} - for label, position in self.metrics_labels_and_positions.items(): - metrics[label] = last_row_of_x[position].item() - return metrics - - def _get_configuration(self, solve: torch.Tensor) -> Dict[str, int]: - """ - Prepare a dictionary describing the configuration based on the given tensor and self.target_labels - :param solve: tensor contatining the configuration - :return: dictionary with pairs (variable_name: variable_value) - """ - configuration: Dict[str, int] = {} - for position, label in enumerate(self.target_labels): - configuration[label] = solve[position].item() - return configuration diff --git a/FCRtraining/src/metrics/__init__.py b/FCRtraining/src/metrics/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/FCRtraining/src/models/__init__.py b/FCRtraining/src/models/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/FCRtraining/src/models/lstm_net/__init__.py b/FCRtraining/src/models/lstm_net/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/FCRtraining/src/models/lstm_net/dataloader.py b/FCRtraining/src/models/lstm_net/dataloader.py deleted file mode 100644 index adec221..0000000 --- a/FCRtraining/src/models/lstm_net/dataloader.py +++ /dev/null @@ -1,29 +0,0 @@ -from FCRdataLoader.src.fcrdataloader.dataset import SequenceForecastMultiDistributionDatasetFactory as SFMDD -from typing import Callable -from pathlib import Path - - -class FCRDataSetV2Factory(SFMDD): - ''' - data set factory for secure-document data version 2, localy data from set2 - ''' - - def __init__( - self, - seq_len: int, - horizon: int, - x_transforms=None - ): - super().__init__( - seq_len=seq_len, - pred_step=horizon, - x_transforms=x_transforms, - file=Path(""), - usecols=[0]+list(range(2, 17)), - experiment_id_col=0, - x_y_split=3, - x_predictions_cols=[2], - ) - - - diff --git a/FCRtraining/src/models/lstm_net/network.py b/FCRtraining/src/models/lstm_net/network.py deleted file mode 100644 index 72de2ba..0000000 --- a/FCRtraining/src/models/lstm_net/network.py +++ /dev/null @@ -1,107 +0,0 @@ -''' - lstm network trained with data from set1 -''' - -from torch.optim.lr_scheduler import ReduceLROnPlateau -from inspect import signature -from typing import Dict, Any, Callable, Optional -import torch.nn.functional as F -import torch.nn as nn -import torch - -from FCRtraining.src.utils.LitFCRtestBase import BaseTestEncoder - -''' - Dont touch great performance -''' - -HIDDEN_SIZE = 40 -BATCH_SIZE = 256 -SEQ_LEN = 30 -HORIZON = 5 -LSTM_LAYERS = 1 -INIT_LR = 0.01 - -FEATURES = 3 -OUTPUT = 12 - - -labels = ("LBStorage,DBCardinality,LBCardinality," - "LBRam,provider_Component_LB,AppStorage," - "AppCardinality,AppCores,provider_Component_App," - "DBCores,DBStorage,provider_Component_DB").split(',') - - -class Encoder(BaseTestEncoder): - def __init__( - self, - input_features=FEATURES, - output_features=OUTPUT, - criterion=nn.L1Loss(), - init_lr=INIT_LR, - batch_size=BATCH_SIZE, - seq_len=SEQ_LEN, - horizon=HORIZON, - hidden_size=HIDDEN_SIZE, - lstm_layers=LSTM_LAYERS, - utility_fx: Optional[Callable[[Any], float]] = None, - util_intervals: int = 100, - target_labels=labels, - hparams_to_log: Dict[str, Any] = {} - ): - super(Encoder, self).__init__( - criterion=criterion, - input_features=input_features, - output_features=output_features, - target_labels=target_labels, - hparams_to_log=hparams_to_log - ) - - self.seq_len = seq_len - self.horizon = horizon - self.batch_size = batch_size - self.lstm_layers = lstm_layers - self.hidden_size = hidden_size - self.fc2_size = hidden_size - - self.init_lr = init_lr - - self.relu = nn.LeakyReLU() - self.lstm = nn.LSTM(input_features, hidden_size, num_layers=self.lstm_layers, - bidirectional=True, batch_first=True) - self.fc1 = nn.Linear(hidden_size * 2, self.fc2_size) - self.fc2 = nn.Linear(self.fc2_size, self.output_features) - self.lstm_drop = nn.Dropout(p=0.2) - self.fc1_drop = nn.Dropout(p=0.5) - - def forward(self, x): - out, _ = self.lstm(x) - # out: (batch, input_features, hidden_size * directions) - out = self.lstm_drop(out[:, -1, :]) - # out: (batch, hidden_size * directions) - out = self.relu(out) - out = self.fc1(out) - out = self.relu(out) - out = self.fc1_drop(out) - #out = self.fc1_drop(out) - out = self.fc2(out) - return out - - def training_step(self, batch, batch_idx): - x, y = batch - prediction = self(x) - #print(f"x = {x[0]}") - #print(f"pred = {torch.round(prediction[0])}") - #print(f"y = {y[0]}") - loss = self.criterion(prediction, y) - self.log('train_loss', loss, on_step=False, on_epoch=True) - return loss - - def configure_optimizers(self): - optimizer = torch.optim.Adam(self.parameters(), lr=self.init_lr) - scheduler = ReduceLROnPlateau(optimizer, 'min', patience=10, verbose=True) - return { - 'optimizer': optimizer, - 'lr_scheduler': scheduler, - 'monitor': 'train_loss' - } diff --git a/FCRtraining/src/models/lstm_net/train.py b/FCRtraining/src/models/lstm_net/train.py deleted file mode 100644 index 553025a..0000000 --- a/FCRtraining/src/models/lstm_net/train.py +++ /dev/null @@ -1,62 +0,0 @@ -from pytorch_lightning.loggers import TensorBoardLogger -from torch.utils.data import DataLoader -from sklearn.preprocessing import MaxAbsScaler, MinMaxScaler -import pytorch_lightning as pl -import torch - -from .network import Encoder -from .dataloader import FCRDataSetV2Factory - - -HORIZON = 5 -SEQ_LEN = 30 -BATCH_SIZE = 256 -FEATURES_IN = 3 -FEATURES_OUT = 12 - - -EXTRA_HPARAMS = { - 'gradient_clip_val': 1, - 'max_epochs': 120 -} - -dataset_factory = FCRDataSetV2Factory(seq_len=SEQ_LEN, horizon=HORIZON) - -train = DataLoader( - dataset_factory.get_train_dataset(), - batch_size=BATCH_SIZE, - num_workers=4, - sampler=dataset_factory.get_uniform_dist_y_sampler() -) - -val = DataLoader( - dataset_factory.get_validation_dataset(), - batch_size=BATCH_SIZE, - num_workers=4 -) - -test = DataLoader( - dataset_factory.get_test_dataset(), - batch_size=BATCH_SIZE, - num_workers=4 -) - -model = Encoder( # currently no hparams to log - input_features=FEATURES_IN, - output_features=FEATURES_OUT, - horizon=HORIZON, - hidden_size=50, - hparams_to_log={ - 'HORIZON': HORIZON, - 'gradient_clip_val': EXTRA_HPARAMS['gradient_clip_val'], - 'max_epochs': EXTRA_HPARAMS['max_epochs'], - 'hidden_size': 40 - } - ) - -logger = TensorBoardLogger('tb_log', name='lstm_no_scaling') -logger.experiment.add_graph(model, torch.rand(model.input_features, model.seq_len, model.input_features)) - -trainer = pl.Trainer(check_val_every_n_epoch=5, logger=logger, **EXTRA_HPARAMS) -trainer.fit(model, train, val) -trainer.test(model, test) diff --git a/FCRtraining/src/models/lstm_scale/LitFCRtestBase.py b/FCRtraining/src/models/lstm_scale/LitFCRtestBase.py deleted file mode 100644 index bdf6d31..0000000 --- a/FCRtraining/src/models/lstm_scale/LitFCRtestBase.py +++ /dev/null @@ -1,207 +0,0 @@ -from typing import Dict, Any, Callable, List, Optional -import pytorch_lightning as pl -import torch.nn as nn -import torch - -from FCRtraining.src.utils.colAvgError import AvgColL1Error -from FCRtraining.src.utils.colAvgRelError import AvgColRelativeError -from FCRtraining.src.utils.bestWorstTestPreds import BestWorstPredictions -from FCRtraining.src.metrics.UtilityFunctionMAE import UtilityFunctionMAE -from FCRtraining.src.metrics.MeanUtilityFunctionValue import MeanUtilityFunctionValue -from FCRtraining.src.utils.roundToDomain import FCRDomain - - -fcr_dom_rounder = FCRDomain() - - -class BaseTestEncoder(pl.LightningModule): - """ - abstract base class for LightningModule, - implements validation and test loops including logging - subclass must implement criterion as loss function - - input_features: - number of elements of input vectors - output_features: - number of elements of output vectors - criterion: - loss function - util_intervals: - number of bins for util based metrics - (currently ignored) - utility_fx: - utility function for elements from - network output domain. (currently - ignored) - hparams_to_log: - all haprams that will be logged - - If you want to utilize the utility function metrics then you must pass the following parameters - AND configure your environment as described in README.md: - target_labels: - labels for target value variables. - If not given defaults to "v1", "v2", ... - metric_labels_and_positions: - mapping of metric names to their respective col indexes in input tensor - cp_model_file_path: - path to the file containing the constraint problem model - camel_model_file_path: - path to the file containing the camel model - node_candidates_file_path: - path to the file containing node candidates - """ - - def __init__( - self, - input_features: int, - output_features: int, - criterion: Callable[[torch.Tensor], float], - util_intervals: int = 100, - utility_fx: Optional[Callable[[Any], float]] = None, - target_labels: Optional[List[str]] = None, - hparams_to_log: Dict[str, Any] = {}, - metric_labels_and_positions: Optional[Dict[str, int]] = None, - cp_model_file_path: Optional[str] = None, - camel_model_file_path: Optional[str] = None, - node_candidates_file_path: Optional[str] = None, - scaler = None - ): - ''' - creates train, val and test metrics which then - can be inspected (for ex: tensorboard) - ''' - - if (target_labels is not None - and metric_labels_and_positions is not None - and cp_model_file_path is not None - and camel_model_file_path is not None - and node_candidates_file_path is not None): - self.canCalculateUtilityFunction = True - else: - self.canCalculateUtilityFunction = False - - if target_labels is not None: - assert len( - target_labels) == output_features, f"Length of target labels ({len(target_labels)}) must eq output_features ({output_features})" - else: - target_labels = [f"v{i}" for i in range(output_features)] - - super(BaseTestEncoder, self).__init__() - - self.target_labels = target_labels - self.input_features = input_features - self.output_features = output_features - self.criterion = criterion - self.scaler = scaler - - # log chosen hparams - self.log_hparams(hparams_to_log) - - # train metrics - self.train_mae = pl.metrics.MeanAbsoluteError() - - # test metrics - self.test_mae = pl.metrics.MeanAbsoluteError() - - - # utility function metrics based on Melodic's utility generator - if self.canCalculateUtilityFunction: - self.utility_function_MAE = UtilityFunctionMAE(metrics_labels_and_positions=metric_labels_and_positions, - target_labels=target_labels, - cp_model_file_path=cp_model_file_path, - camel_model_file_path=camel_model_file_path, - node_candidates_file_path=node_candidates_file_path) - self.mean_target_utility_function_value = MeanUtilityFunctionValue(metrics_labels_and_positions=metric_labels_and_positions, - target_labels=target_labels, - cp_model_file_path=cp_model_file_path, - camel_model_file_path=camel_model_file_path, - node_candidates_file_path=node_candidates_file_path) - - # col avg errors - self.colAvgL1Err = AvgColL1Error(target_labels) - self.colAvgRelErr = AvgColRelativeError(target_labels) - - # best worst prediction examples - self.bwPredExamples = BestWorstPredictions(self.criterion) - - def __unscaled_target(self, y: torch.Tensor) -> torch.Tensor: - return torch.from_numpy(self.scaler.inverse_transform(y)).float() - - def log_hparams(self, hparams: Dict[str, Any]): - self.hparams = hparams - - def validation_step(self, batch, batch_nb): - """ - predicts y, and calculates loss in training - """ - - x, y = batch - preds = self(x) - - preds_us = self.__unscaled_target(preds) - loss = self.criterion(preds_us, y) # might not be necessary - - return {'loss': loss, 'preds': preds_us, 'target': y} - - def validation_step_end(self, outputs): - ''' - update and log validation metrics - ''' - - self.train_mae(outputs['preds'], outputs['target']) - - self.log('validation_mae', self.train_mae, - on_step=False, on_epoch=True) - - def test_step(self, batch, batch_idx): - """ - predicts y, and calculates loss in testing - """ - - x, y = batch - preds = self(x) - preds_us = self.__unscaled_target(preds) - preds_us = fcr_dom_rounder.round(preds_us) - loss = self.criterion(preds, y) # might not be necessary - self.bwPredExamples.update_on_test_step(x, preds_us, y) - if self.canCalculateUtilityFunction: - self.utility_function_MAE.update(x=x, preds=preds_us, target=y) - self.mean_target_utility_function_value.update(x=x, configuration=y) - - return {'loss': loss, 'preds': preds_us, 'target': y} - - def test_step_end(self, outputs): - ''' - update and log test metrics - ''' - - self.test_mae(outputs['preds'], outputs['target']) - - self.log('test_mae', self.test_mae, on_step=False, on_epoch=True) - if self.canCalculateUtilityFunction: - self.log('test_utility_function_MAE', self.utility_function_MAE, on_step=False, on_epoch=True) - self.log('test_mean_target_utility_function_value', self.mean_target_utility_function_value, on_step=False, on_epoch=True) - - # update col avg error - self.colAvgL1Err.update_after_test_step( - outputs['preds'], outputs['target']) - self.colAvgRelErr.update_after_test_step( - outputs['preds'], outputs['target']) - - def on_test_end(self): - writer = self.logger.experiment - - # show col avg error figure - writer.add_figure("avg L1 col error", self.colAvgL1Err.get_fig) - writer.add_figure("avg rel col error", - self.colAvgRelErr.get_fig) - writer.add_text( - f"best {self.bwPredExamples.get_keeped_best} data examples", - self.bwPredExamples.get_best_examples_str - ) - - writer.add_text( - f"worst {self.bwPredExamples.get_keeped_worst} data examples", - self.bwPredExamples.get_worst_examples_str - ) - diff --git a/FCRtraining/src/models/lstm_scale/__init__.py b/FCRtraining/src/models/lstm_scale/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/FCRtraining/src/models/lstm_scale/dataloader.py b/FCRtraining/src/models/lstm_scale/dataloader.py deleted file mode 100644 index e38b4c7..0000000 --- a/FCRtraining/src/models/lstm_scale/dataloader.py +++ /dev/null @@ -1,26 +0,0 @@ -from FCRdataLoader.src.fcrdataloader.dataset import SequenceForecastMultiDistributionDatasetFactory as SFMDD -from typing import Callable -from pathlib import Path - - -class FCRDataSetV2Factory(SFMDD): - ''' - data set factory for secure-document data version 2, localy data from set2 - ''' - - def __init__( - self, - seq_len: int, - horizon: int, - x_transforms=None - ): - super().__init__( - seq_len=seq_len, - pred_step=horizon, - x_transforms=x_transforms, - file=Path("/home/szysad/mimuw/3rok/ZPP/training-data/FCR-data/set1/combined.csv"), - usecols=[0]+list(range(2, 17)), - experiment_id_col=0, - x_y_split=3, - x_predictions_cols=[2], - ) diff --git a/FCRtraining/src/models/lstm_scale/network.py b/FCRtraining/src/models/lstm_scale/network.py deleted file mode 100644 index ef0c328..0000000 --- a/FCRtraining/src/models/lstm_scale/network.py +++ /dev/null @@ -1,120 +0,0 @@ -''' - lstm network trained with data from set1 -''' - -from torch.optim.lr_scheduler import ReduceLROnPlateau -from inspect import signature -from typing import Dict, Any, Callable, Optional -import torch.nn.functional as F -import torch.nn as nn -import torch - -from .LitFCRtestBase import BaseTestEncoder - -''' - Dont touch great performance -''' - -HIDDEN_SIZE = 30 -BATCH_SIZE = 256 -SEQ_LEN = 15 -HORIZON = 5 -LSTM_LAYERS = 1 -INIT_LR = 0.01 - -FEATURES = 3 -OUTPUT = 12 - - -labels = ("LBStorage,DBCardinality,LBCardinality," - "LBRam,provider_Component_LB,AppStorage," - "AppCardinality,AppCores,provider_Component_App," - "DBCores,DBStorage,provider_Component_DB").split(',') - - -class Encoder(BaseTestEncoder): - def __init__( - self, - input_features=FEATURES, - output_features=OUTPUT, - criterion=nn.SmoothL1Loss(), - init_lr=INIT_LR, - batch_size=BATCH_SIZE, - seq_len=SEQ_LEN, - horizon=HORIZON, - hidden_size=HIDDEN_SIZE, - lstm_layers=LSTM_LAYERS, - utility_fx: Optional[Callable[[Any], float]] = None, - util_intervals: int = 100, - target_labels=labels, - hparams_to_log: Dict[str, Any] = {}, - metric_labels_and_positions: Optional[Dict[str, int]] = None, - cp_model_file_path: Optional[str] = None, - camel_model_file_path: Optional[str] = None, - node_candidates_file_path: Optional[str] = None, - scaler=None - ): - super(Encoder, self).__init__( - utility_fx=utility_fx, - util_intervals=util_intervals, - criterion=criterion, - input_features=input_features, - output_features=output_features, - target_labels=target_labels, - hparams_to_log=hparams_to_log, - metric_labels_and_positions=metric_labels_and_positions, - cp_model_file_path=cp_model_file_path, - camel_model_file_path=camel_model_file_path, - node_candidates_file_path=node_candidates_file_path, - scaler=scaler - ) - - self.seq_len = seq_len - self.horizon = horizon - self.batch_size = batch_size - self.lstm_layers = lstm_layers - self.hidden_size = hidden_size - self.fc2_size = hidden_size - - self.init_lr = init_lr - - self.relu = nn.LeakyReLU() - self.lstm = nn.LSTM(input_features, hidden_size, num_layers=self.lstm_layers, - bidirectional=True, batch_first=True) - self.fc1 = nn.Linear(hidden_size * 2, self.fc2_size) - self.fc2 = nn.Linear(self.fc2_size, self.output_features) - self.lstm_drop = nn.Dropout(p=0.3) - self.fc1_drop = nn.Dropout(p=0.6) - - def forward(self, x): - out, _ = self.lstm(x) - # out: (batch, input_features, hidden_size * directions) - out = self.lstm_drop(out[:, -1, :]) - # out: (batch, hidden_size * directions) - out = self.relu(out) - out = self.fc1(out) - out = self.relu(out) - out = self.fc1_drop(out) - #out = self.fc1_drop(out) - out = self.fc2(out) - return out - - def training_step(self, batch, batch_idx): - x, y = batch - scaled_y = torch.from_numpy(self.scaler.transform(y)).float() - prediction = self(x) - #print(f"x = {x[0]}") - #print(f"pred = {torch.round(prediction[0])}") - #print(f"y = {y[0]}") - loss = self.criterion(prediction, scaled_y) - self.log('train_loss', loss, on_step=False, on_epoch=True) - return loss - - def configure_optimizers(self): - optimizer = torch.optim.Adam(self.parameters(), lr=self.init_lr) - scheduler = ReduceLROnPlateau(optimizer, 'min', patience=5, verbose=True) - return { - 'optimizer': optimizer, - 'lr_scheduler': scheduler, - 'monitor': 'train_loss' - } diff --git a/FCRtraining/src/models/lstm_scale/train.py b/FCRtraining/src/models/lstm_scale/train.py deleted file mode 100644 index 556a7e7..0000000 --- a/FCRtraining/src/models/lstm_scale/train.py +++ /dev/null @@ -1,77 +0,0 @@ -from pytorch_lightning.loggers import TensorBoardLogger -from torch.utils.data import DataLoader -from sklearn.preprocessing import MaxAbsScaler, StandardScaler -import pytorch_lightning as pl -import torch - -from .network import Encoder -from .dataloader import FCRDataSetV2Factory - - -HORIZON = 5 -SEQ_LEN = 30 -BATCH_SIZE = 256 -FEATURES_IN = 3 -FEATURES_OUT = 12 - -TARGET_LABELS = ("AppCardinality,provider_Component_App,AppCores,AppStorage," - "LBCardinality,provider_Component_LB,LBRam,LBStorage," - "DBCardinality,provider_Component_DB,DBCores,DBStorage").split(',') -EXTRA_HPARAMS = { - 'gradient_clip_val': 0, - 'max_epochs': 100 -} - -x_scaler = None -y_scaler = MaxAbsScaler() - -dataset_factory = FCRDataSetV2Factory(seq_len=SEQ_LEN, horizon=HORIZON, x_transforms=x_scaler) - -y_scaler.fit(dataset_factory.get_train_target_vals()) - -train = DataLoader( - dataset_factory.get_train_dataset(), - batch_size=BATCH_SIZE, - num_workers=4, - sampler=dataset_factory.get_uniform_dist_y_sampler() -) - -val = DataLoader( - dataset_factory.get_validation_dataset(), - batch_size=BATCH_SIZE, - num_workers=4 -) - -test = DataLoader( - dataset_factory.get_test_dataset(), - batch_size=BATCH_SIZE, - num_workers=4 -) - -model = Encoder( # currently no hparams to log - input_features=FEATURES_IN, - output_features=FEATURES_OUT, - horizon=HORIZON, - batch_size=256, - hidden_size=40, - scaler=y_scaler, - target_labels=TARGET_LABELS, - metric_labels_and_positions=None, - cp_model_file_path=None, - camel_model_file_path=None. - node_candidates_file_path=None, - hparams_to_log={ - 'HORIZON': HORIZON, - 'x_scaler': x_scaler, - 'y_scaler': y_scaler, - 'gradient_clip_val': EXTRA_HPARAMS['gradient_clip_val'], - 'hidden_size': 30 - } - ) - -logger = TensorBoardLogger('test', name='scaled_lstm') -logger.experiment.add_graph(model, torch.rand(model.input_features, model.seq_len, model.input_features)) - -trainer = pl.Trainer(fast_dev_run=False, check_val_every_n_epoch=5, logger=logger, **EXTRA_HPARAMS) -trainer.fit(model, train, val) -trainer.test(model, test) diff --git a/FCRtraining/src/utils/LitFCRtestBase.py b/FCRtraining/src/utils/LitFCRtestBase.py deleted file mode 100644 index ea04736..0000000 --- a/FCRtraining/src/utils/LitFCRtestBase.py +++ /dev/null @@ -1,210 +0,0 @@ -from typing import Dict, Any, Callable, List, Optional -import pytorch_lightning as pl -import torch.nn as nn -import torch - -from .colAvgError import AvgColL1Error -from .colAvgRelError import AvgColRelativeError -from .bestWorstTestPreds import BestWorstPredictions -from FCRtraining.src.metrics.UtilityFunctionMAE import UtilityFunctionMAE -from FCRtraining.src.metrics.MeanUtilityFunctionValue import MeanUtilityFunctionValue -from .roundToDomain import FCRDomain - - -class BaseTestEncoder(pl.LightningModule): - """ - abstract base class for LightningModule, - implements validation and test loops including logging - subclass must implement criterion as loss function - - input_features: - number of elements of input vectors - output_features: - number of elements of output vectors - criterion: - loss function - util_intervals: - number of bins for util based metrics - (currently ignored) - utility_fx: - utility function for elements from - network output domain. (currently - ignored) - hparams_to_log: - all haprams that will be logged - - If you want to utilize the utility function metrics then you must pass the following parameters - AND configure your environment as described in README.md: - target_labels: - labels for target value variables. - If not given defaults to "v1", "v2", ... - metric_labels_and_positions: - mapping of metric names to their respective col indexes in input tensor - cp_model_file_path: - path to the file containing the constraint problem model - camel_model_file_path: - path to the file containing the camel model - node_candidates_file_path: - path to the file containing node candidates - """ - - def __init__( - self, - input_features: int, - output_features: int, - criterion: Callable[[torch.Tensor], float], - util_intervals: int = 100, - utility_fx: Optional[Callable[[Any], float]] = None, - target_labels: Optional[List[str]] = None, - hparams_to_log: Dict[str, Any] = {}, - metric_labels_and_positions: Optional[Dict[str, int]] = None, - cp_model_file_path: Optional[str] = None, - camel_model_file_path: Optional[str] = None, - node_candidates_file_path: Optional[str] = None, - ): - ''' - creates train, val and test metrics which then - can be inspected (for ex: tensorboard) - ''' - - if (target_labels is not None - and metric_labels_and_positions is not None - and cp_model_file_path is not None - and camel_model_file_path is not None - and node_candidates_file_path is not None): - self.canCalculateUtilityFunction = True - else: - self.canCalculateUtilityFunction = False - - if target_labels is not None: - assert len( - target_labels) == output_features, f"Length of target labels ({len(target_labels)}) must eq output_features ({output_features})" - else: - target_labels = [f"v{i}" for i in range(output_features)] - - super(BaseTestEncoder, self).__init__() - - self.target_labels = target_labels - self.input_features = input_features - self.output_features = output_features - self.criterion = criterion - - # log chosen hparams - self.log_hparams(hparams_to_log) - - # train metrics - self.train_mse = pl.metrics.MeanSquaredError() - self.train_mae = pl.metrics.MeanAbsoluteError() - - # test metrics - self.test_mse = pl.metrics.MeanSquaredError() - self.test_mae = pl.metrics.MeanAbsoluteError() - - # utility function - self.utility_fx = utility_fx - self.util_intervals = util_intervals - - # utility function metrics based on Melodic's utility generator - if self.canCalculateUtilityFunction: - self.utility_function_MAE = UtilityFunctionMAE(metrics_labels_and_positions=metric_labels_and_positions, - target_labels=target_labels, - cp_model_file_path=cp_model_file_path, - camel_model_file_path=camel_model_file_path, - node_candidates_file_path=node_candidates_file_path) - self.mean_target_utility_function_value = MeanUtilityFunctionValue(metrics_labels_and_positions=metric_labels_and_positions, - target_labels=target_labels, - cp_model_file_path=cp_model_file_path, - camel_model_file_path=camel_model_file_path, - node_candidates_file_path=node_candidates_file_path) - - # col avg errors - self.colAvgL1Err = AvgColL1Error(target_labels) - self.colAvgRelErr = AvgColRelativeError(target_labels) - - # best worst prediction examples - self.bwPredExamples = BestWorstPredictions(self.criterion) - - # round network results to domain - self.rounder = FCRDomain() - - def log_hparams(self, hparams: Dict[str, Any]): - self.hparams = hparams - - def validation_step(self, batch, batch_nb): - """ - predicts y, and calculates loss in training - """ - - x, y = batch - preds = self(x) - preds = self.rounder.round(preds) - loss = self.criterion(preds, y) # might not be necessary - - return {'loss': loss, 'preds': preds, 'target': y} - - def validation_step_end(self, outputs): - ''' - update and log validation metrics - ''' - - self.train_mse(outputs['preds'], outputs['target']) - self.train_mae(outputs['preds'], outputs['target']) - - self.log('validation_mse', self.train_mse, - on_step=False, on_epoch=True) - self.log('validation_mae', self.train_mae, - on_step=False, on_epoch=True) - - def test_step(self, batch, batch_idx): - """ - predicts y, and calculates loss in testing - """ - - x, y = batch - preds = self(x) - preds = self.rounder.round(preds) - loss = self.criterion(preds, y) # might not be necessary - self.bwPredExamples.update_on_test_step(x, preds, y) - if self.canCalculateUtilityFunction: - self.utility_function_MAE.update(x=x, preds=preds, target=y) - self.mean_target_utility_function_value.update(x=x, configuration=y) - - return {'loss': loss, 'preds': preds, 'target': y} - - def test_step_end(self, outputs): - ''' - update and log test metrics - ''' - - self.test_mse(outputs['preds'], outputs['target']) - self.test_mae(outputs['preds'], outputs['target']) - - self.log('test_mse', self.test_mse, on_step=False, on_epoch=True) - self.log('test_mae', self.test_mae, on_step=False, on_epoch=True) - if self.canCalculateUtilityFunction: - self.log('test_utility_function_MAE', self.utility_function_MAE, on_step=False, on_epoch=True) - self.log('test_mean_target_utility_function_value', self.mean_target_utility_function_value, on_step=False, on_epoch=True) - - # update col avg error - self.colAvgL1Err.update_after_test_step( - outputs['preds'], outputs['target']) - self.colAvgRelErr.update_after_test_step( - outputs['preds'], outputs['target']) - - def on_test_end(self): - writer = self.logger.experiment - - # show col avg error figure - writer.add_figure("avg L1 col error", self.colAvgL1Err.get_fig) - writer.add_figure("avg rel col error", - self.colAvgRelErr.get_fig) - writer.add_text( - f"best {self.bwPredExamples.get_keeped_best} data examples", - self.bwPredExamples.get_best_examples_str - ) - - writer.add_text( - f"worst {self.bwPredExamples.get_keeped_worst} data examples", - self.bwPredExamples.get_worst_examples_str - ) - diff --git a/FCRtraining/src/utils/__init__.py b/FCRtraining/src/utils/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/FCRtraining/src/utils/bestWorstTestPreds.py b/FCRtraining/src/utils/bestWorstTestPreds.py deleted file mode 100644 index 922f23d..0000000 --- a/FCRtraining/src/utils/bestWorstTestPreds.py +++ /dev/null @@ -1,145 +0,0 @@ -import torch -import heapq -from typing import Callable - - -class InputTargetPred: - ''' - simple wrapper for: - input data for single network prediction - single network prediction for that data - target value for that prediction - error of target value and prediction - - also provides less then relation based - on target value & prediction error - - can be reversed to for symmetrical flip of less then relation - (for example when used in max heap) - ''' - - flipped: bool - error: float - prediction: torch.Tensor - target: torch.Tensor - - def __init__( - self, - inputdata: torch.Tensor, # of shape [in_features, seq_len] - prediciton: torch.Tensor, # of shape [out_features,] - target: torch.Tensor, # of shape [out_features,] - error: float, - flipped: bool = False, - precision: int = 3 # decimal precision for all values when printing - ): - self.inputdata = inputdata - self.prediciton = prediciton - self.target = target - self.error = error - self.flipped = flipped - self.precision = precision - - def __lt__(self, other): - lt = self.error < other.error - return lt if not self.flipped else not lt - - def __format_float(self, numb: float) -> str: - return format(numb, f'.{self.precision}f') - - def __str_tensor(self, tens: torch.Tensor) -> str: - ''' string repr of torch tensor of shape (n,) ''' - return "[" + ', '.join(map(self.__format_float, tens)) + "]" - - def __str__(self): - outstr = 'input:\n\n' - outstr += '\n\n'.join(map(lambda r: "\t\t" + self.__str_tensor(r), self.inputdata)) - outstr += '\n\n' - outstr += f'target:\n\n\t\t{self.__str_tensor(self.target)}\n\n' - outstr += f'prediciton:\n\n\t\t{self.__str_tensor(self.prediciton)}\n\n' - outstr += f'error: {self.__format_float(self.error)}\n\n' - return outstr - - -class BestWorstPredictions: - ''' - collects predictions and target values - and keeps in memory some of best and worst - predictions, which then it logs in decreasingly - sorted by given criterion order - ''' - - __best: heapq - __worst: heapq - - def __init__( - self, - criterion: Callable[[torch.Tensor, torch.Tensor], float], - keep_best=3, - keep_worst=3 - ): - self.__best = [] # min heap - self.__worst = [] # min heap - self.__criterion = criterion - self.__keep_best = keep_best - self.__keep_worst = keep_worst - - def update_on_test_step( - self, - inputdata: torch.Tensor, - prediciton: torch.Tensor, - target: torch.Tensor - ): - - for indata, pred, targ in zip(inputdata, prediciton, target): - targPred = InputTargetPred( - inputdata=indata, - prediciton=pred, - target=targ, - error=self.__criterion(pred, targ) - ) - - targPredFlipped = InputTargetPred( - inputdata=indata, - prediciton=pred, - target=targ, - error=self.__criterion(pred, targ), - flipped=True - ) - - if len(self.__best) < self.__keep_best: - heapq.heappush(self.__best, targPredFlipped) - else: - heapq.heappushpop(self.__best, targPredFlipped) - - if len(self.__worst) < self.__keep_worst: - heapq.heappush(self.__worst, targPred) - else: - heapq.heappushpop(self.__worst, targPred) - - @property - def get_best_examples_str(self) -> str: - ''' - returns summary of best prediction & target pairs - in decreasing order of prediction accuracy - ''' - - title = lambda i: "\n\n" + f"EXAMPLE {i+1}".center(20, "=") + "\n\n" - return ('\n\n\n\n').join(map(lambda i: title(i[0]) + str(i[1]), enumerate(self.__best))) - - @property - def get_worst_examples_str(self) -> str: - ''' - returns summary of worst prediction & target pairs - in increasing order of prediction accuracy - ''' - - title = lambda i: "\n\n" + f"EXAMPLE {i+1}".center(20, "=") + "\n\n" - return ('\n\n\n\n').join(map(lambda i: title(i[0]) + str(i[1]), enumerate(self.__worst))) - - @property - def get_keeped_worst(self) -> int: - return self.__keep_worst - - @property - def get_keeped_best(self) -> int: - return self.__keep_best diff --git a/FCRtraining/src/utils/colAvgError.py b/FCRtraining/src/utils/colAvgError.py deleted file mode 100644 index a9fd7fa..0000000 --- a/FCRtraining/src/utils/colAvgError.py +++ /dev/null @@ -1,95 +0,0 @@ -from typing import List -import matplotlib.pyplot as plt -import torch - - -def autolabel(rects): - for rect in rects: - # Get X and Y placement of label from rect. - x_value = rect.get_width() - y_value = rect.get_y() + rect.get_height() / 2 - - # Number of points between bar and label. Change to your liking. - space = 5 - # Vertical alignment for positive values - ha = 'left' - - # If value of bar is negative: Place label left of bar - if x_value < 0: - # Invert space to place label to the left - space *= -1 - # Horizontally align label at right - ha = 'right' - - # Use X value as label and format number with one decimal place - label = "{:.1f}".format(x_value) - - # Create annotation - plt.annotate( - label, # Use `label` as label - (x_value, y_value), # Place label at end of the bar - xytext=(space, 0), # Horizontally shift label by `space` - textcoords="offset points", # Interpret `xytext` as offset in points - va='center', # Vertically center label - ha=ha) # Horizontally align label differently for - # positive and negative values. - - -class AvgColL1Error: - ''' - based on collected network prediction and - target values returns pyplot fig representing - average L1 error for each target variable - ''' - - __avg_errors: torch.Tensor - __min_errors: torch.Tensor - __max_errors: torch.Tensor - __labels: List[str] - __bar_width: float - - def __init__( - self, - labels: List[str], - bar_width=1 - ): - self.row_numb = 1 - self.__labels = labels - self.__avg_errors = torch.zeros(len(labels)) - self.__max_errors = torch.ones(len(labels)) * float('-inf') - self.__min_errors = torch.ones(len(labels)) * float('inf') - - # chart display conf - self.__bar_width = bar_width - - def update_after_test_step( - self, - predictions: torch.Tensor, - target: torch.Tensor - ): - - err = torch.abs(predictions - target) - for row in err: - self.__avg_errors += (row - self.__avg_errors) / self.row_numb - self.__max_errors = torch.max(self.__max_errors, row) - self.__min_errors = torch.min(self.__min_errors, row) - self.row_numb += 1 - - @property - def get_fig(self): - w = self.__bar_width - n_labels = len(self.__labels) - x = torch.arange(n_labels) * 4 * max(1, w) - fig, ax = plt.subplots(figsize=(n_labels, n_labels)) - b1 = ax.barh(x-w, self.__max_errors, w, label='max') - b2 = ax.barh(x, self.__avg_errors, w, label='avg') - b3 = ax.barh(x+w, self.__min_errors, w, label='min') - ax.set_yticks(x) - ax.set_yticklabels(self.__labels) - ax.legend() - - autolabel(b1) - autolabel(b2) - autolabel(b3) - - return fig diff --git a/FCRtraining/src/utils/colAvgRelError.py b/FCRtraining/src/utils/colAvgRelError.py deleted file mode 100644 index e756601..0000000 --- a/FCRtraining/src/utils/colAvgRelError.py +++ /dev/null @@ -1,74 +0,0 @@ -from typing import List -import matplotlib.pyplot as plt -import torch - -from .colAvgError import autolabel - - -class AvgColRelativeError: - ''' - based on collected network prediction and - target values returns pyplot fig representing - relative error for each target variable - ''' - - __avg_errors: torch.Tensor - __min_errors: torch.Tensor - __max_errors: torch.Tensor - __labels: List[str] - - def __init__(self, - labels: List[str], - bar_width = 1 - ): - self.row_numb = 1 - self.__labels = labels - self.__avg_errors = torch.zeros(len(labels)) - self.__max_errors = torch.ones(len(labels)) * float('-inf') - self.__min_errors = torch.ones(len(labels)) * float('inf') - - # chart display conf - self.__bar_width = bar_width - - # for torch reasons - self.__one = torch.ones(1, dtype=torch.float32) - - def err( - self, - prediction: torch.Tensor, - target: torch.Tensor - ): - return torch.abs(prediction - target) / torch.where(target == 0, self.__one, target) - - - def update_after_test_step( - self, - predictions: torch.Tensor, - target: torch.Tensor - ): - - err = self.err(predictions, target) - for row in err: - self.__avg_errors += (row - self.__avg_errors) / self.row_numb - self.__min_errors = torch.min(self.__min_errors, row) - self.__max_errors = torch.max(self.__max_errors, row) - self.row_numb += 1 - - @property - def get_fig(self): - w = self.__bar_width - n_labels = len(self.__labels) - x = torch.arange(n_labels) * 4 * max(1, w) - fig, ax = plt.subplots(figsize=(n_labels, n_labels)) - b1 = ax.barh(x-w, self.__max_errors, w, label='max') - b2 = ax.barh(x, self.__avg_errors, w, label='avg') - b3 = ax.barh(x+w, self.__min_errors, w, label='min') - ax.set_yticks(x) - ax.set_yticklabels(self.__labels) - ax.legend() - - autolabel(b1) - autolabel(b2) - autolabel(b3) - - return fig diff --git a/FCRtraining/src/utils/rarityUtility.py b/FCRtraining/src/utils/rarityUtility.py deleted file mode 100644 index 8f05f33..0000000 --- a/FCRtraining/src/utils/rarityUtility.py +++ /dev/null @@ -1,37 +0,0 @@ -from typing import Dict, Hashable, Iterator -from collections import Counter -import torch - - -class RarityUtility: - ''' - returns utility (value from range [0, 1]) - of element based on its rarity - ''' - - __occurs: Dict[Hashable, int] - - def __init__(self, data: Iterator[Hashable] = []): - self.__occurs = {} - self.__min_occurs = 0 - self.fit_new(data) - - - def fit_new(self, data: Iterator[Hashable]): - for _el in data: - el = tuple(v.item() for v in _el) - if el in self.__occurs: - self.__occurs[el] += 1 - else: - self.__occurs[el] = 1 - self.__min_occurs = min(self.__occurs.values()) - - def __call__(self, elem) -> float: - ''' - returns utility value of - given element based on - given data - ''' - - el = tuple(v.item() for v in elem) - return self.__min_occurs / self.__occurs[el] diff --git a/FCRtraining/src/utils/roundToDomain.py b/FCRtraining/src/utils/roundToDomain.py deleted file mode 100644 index 3dc66dd..0000000 --- a/FCRtraining/src/utils/roundToDomain.py +++ /dev/null @@ -1,72 +0,0 @@ -from typing import List -import torch -import numpy as np - -class RoundToDomain: - """ - Rounds output to nearest value in domain. - Domain format: list of domains of variables. - Domain of each variable: nonempty sorted list of possible values. - Order of variable domains in domain must match order of variables in model's output. - - >>> x = torch.rand(batch_size, output_size) - >>> x_rounded = rounder.round(x) - """ - - def __init__(self, domain: List[List[int]]): - self.domain = [np.array(x) for x in domain] - - def round(self, x: torch.Tensor) -> torch.Tensor: - """ - Returns x rounded to domain. - x must be of shape (batch_size, number_of_variables). - """ - res = np.zeros_like(x, dtype=int) - for i in range(len(self.domain)): - res[:, i] = np.minimum(np.searchsorted(self.domain[i], x[:, i]), len(self.domain[i]) - 1) - res[:, i] = np.where(np.logical_and(res[:, i] > 0, - np.logical_or(res[:, i] == len(self.domain[i]), - np.fabs(x[:, i] - self.domain[i][res[:, i] - 1]) < - np.fabs(x[:, i] - self.domain[i][res[:, i]]))), - self.domain[i][res[:, i] - 1], self.domain[i][res[:, i]]) - return torch.tensor(res).float() - -class FCRDomain(RoundToDomain): - """ - Rounds output to nearest value in FCR domain. - Order of variables: - AppCardinality, - provider_Component_App, - AppCores, - AppStorage, - LBCardinality, - provider_Component_LB, - LBRam, - LBStorage, - DBCardinality, - provider_Component_DB, - DBCores, - DBStorage - How to use: - - >>> rounder = FCRDomain() - >>> x = torch.rand(batch_size, 12) - >>> x_rounded = rounder.round(x) - """ - def __init__(self): - super().__init__([ - range(1, 31), # AppCardinality - [0], # provider_Component_App - [2, 4, 8, 16, 32, 36, 40, 48], # AppCores - [0, 10, 420, 1024, 2097152], # AppStorage - range(1, 2), # LBCardinality - [0], # provider_Component_LB - [1024, 1740, 2048, 3750, 3840, 4096, 7168, 7680, - 8192, 15360, 15616, 16384, 17510, 22528, 23552, - 30720, 31232, 32768, 35020, 62464, 70041], # LBRam - [0, 10, 420], # LBStorage - range(1, 2), # DBCardinality - [0], # provider_Component_DB - [1, 2, 4, 8], # DBCores - [0, 10, 420] # DBStorage - ]) -- GitLab