diff --git a/FCRtraining/src/networks/__init__.py b/FCRtraining/src/models/__init__.py similarity index 100% rename from FCRtraining/src/networks/__init__.py rename to FCRtraining/src/models/__init__.py diff --git a/FCRtraining/src/models/lstm_net/__init__.py b/FCRtraining/src/models/lstm_net/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/FCRtraining/src/models/lstm_net/dataloader.py b/FCRtraining/src/models/lstm_net/dataloader.py new file mode 100644 index 0000000000000000000000000000000000000000..52d82f48e8f53b8ca8073f9630f28107204b90fd --- /dev/null +++ b/FCRtraining/src/models/lstm_net/dataloader.py @@ -0,0 +1,30 @@ +from FCRdataLoader.src.fcrdataloader.dataset import SequenceForecastMultiDistributionDatasetFactory as SFMDD +from typing import Callable + + +class FCRDataSetV2Factory(SFMDD): + ''' + data set factory for secure-document data version 2, localy data from set2 + ''' + + def __init__( + self, + seq_len: int, + horizon: int, + x_transforms=None, + y_transforms=None + ): + super().__init__( + seq_len=seq_len, + pred_step=horizon, + x_transforms=x_transforms, + y_transforms=y_transforms, + file="", + usecols=[0]+list(range(2, 17)), + experiment_id_col=0, + x_y_split=3, + x_predictions_cols=[2], + ) + + + diff --git a/FCRtraining/src/models/lstm_net/network.py b/FCRtraining/src/models/lstm_net/network.py new file mode 100644 index 0000000000000000000000000000000000000000..49788e8a460a421086ab96a2e0c924fc4a8f88ad --- /dev/null +++ b/FCRtraining/src/models/lstm_net/network.py @@ -0,0 +1,109 @@ +''' + lstm network trained with data from set1 +''' + +from torch.optim.lr_scheduler import ReduceLROnPlateau +from inspect import signature +from typing import Dict, Any, Callable, Optional +import torch.nn.functional as F +import torch.nn as nn +import torch + +from FCRtraining.src.utils.LitFCRtestBase import BaseTestEncoder + +''' + Dont touch great performance +''' + +HIDDEN_SIZE = 30 +BATCH_SIZE = 256 +SEQ_LEN = 30 +HORIZON = 6 +LSTM_LAYERS = 1 +INIT_LR = 0.01 + +FEATURES = 3 +OUTPUT = 12 + + +labels = ("LBStorage,DBCardinality,LBCardinality," + "LBRam,provider_Component_LB,AppStorage," + "AppCardinality,AppCores,provider_Component_App," + "DBCores,DBStorage,provider_Component_DB").split(',') + + +class Encoder(BaseTestEncoder): + def __init__( + self, + input_features=FEATURES, + output_features=OUTPUT, + criterion=nn.MSELoss(), + init_lr=INIT_LR, + batch_size=BATCH_SIZE, + seq_len=SEQ_LEN, + horizon=HORIZON, + hidden_size=HIDDEN_SIZE, + lstm_layers=LSTM_LAYERS, + utility_fx: Optional[Callable[[Any], float]], + util_intervals: int = 100, + target_labels=labels, + hparams_to_log: Dict[str, Any] = {} + ): + super(Encoder, self).__init__( + utility_fx=utility_fx, + util_intervals=util_intervals, + criterion=criterion, + input_features=input_features, + output_features=output_features, + target_labels=target_labels, + hparams_to_log=hparams_to_log + ) + + self.seq_len = seq_len + self.horizon = horizon + self.batch_size = batch_size + self.lstm_layers = lstm_layers + self.hidden_size = hidden_size + self.fc2_size = hidden_size + + self.init_lr = init_lr + + self.relu = nn.LeakyReLU() + self.lstm = nn.LSTM(input_features, hidden_size, num_layers=self.lstm_layers, + bidirectional=True, batch_first=True) + self.fc1 = nn.Linear(hidden_size * 2, self.fc2_size) + self.fc2 = nn.Linear(self.fc2_size, self.output_features) + self.lstm_drop = nn.Dropout(p=0.2) + self.fc1_drop = nn.Dropout(p=0.5) + + def forward(self, x): + out, _ = self.lstm(x) + # out: (batch, input_features, hidden_size * directions) + out = self.lstm_drop(out[:, -1, :]) + # out: (batch, hidden_size * directions) + out = self.relu(out) + out = self.fc1(out) + out = self.relu(out) + out = self.fc1_drop(out) + #out = self.fc1_drop(out) + out = self.fc2(out) + return out + + def training_step(self, batch, batch_idx): + x, y = batch + prediction = self(x) + #print(f"x = {x[0]}") + #print(f"pred = {torch.round(prediction[0])}") + #print(f"y = {y[0]}") + loss = self.criterion(prediction, y) + self.log('train_loss', loss, on_step=False, on_epoch=True) + return loss + + def configure_optimizers(self): + optimizer = torch.optim.Adam(self.parameters(), lr=self.init_lr) + scheduler = ReduceLROnPlateau(optimizer, 'min', patience=10, verbose=True) + return { + 'optimizer': optimizer, + 'lr_scheduler': scheduler, + 'monitor': 'train_loss' + } diff --git a/FCRtraining/src/models/lstm_net/train.py b/FCRtraining/src/models/lstm_net/train.py new file mode 100644 index 0000000000000000000000000000000000000000..54c5c86ff77bef2841a2e6c212a04818bf961b0c --- /dev/null +++ b/FCRtraining/src/models/lstm_net/train.py @@ -0,0 +1,55 @@ +from pytorch_lightning.loggers import TensorBoardLogger +from torch.utils.data import DataLoader +from sklearn.preprocessing import MaxAbsScaler, MinMaxScaler +import pytorch_lightning as pl +import torch + +from .network import Encoder +from .dataloader import FCRDataSetV2Factory + + +HORIZON = 5 +SEQ_LEN = 30 +BATCH_SIZE = 256 +FEATURES_IN = 3 +FEATURES_OUT = 12 + + +EXTRA_HPARAMS = { + 'gradient_clip_val': 1, + 'max_epochs': 100 +} + +dataset_factory = FCRDataSetV2Factory(seq_len=SEQ_LEN, horizon=HORIZON) + +train = DataLoader( + dataset_factory.get_train_dataset(), + batch_size=BATCH_SIZE, + num_workers=4, + sampler=dataset_factory.get_uniform_dist_y_sampler() +) + +val = DataLoader( + dataset_factory.get_validation_dataset(), + batch_size=BATCH_SIZE, + num_workers=4 +) + +test = DataLoader( + dataset_factory.get_test_dataset(), + batch_size=BATCH_SIZE, + num_workers=4 +) + +model = Encoder( # currently no hparams to log + input_features=FEATURES_IN, + output_features=FEATURES_OUT, + horizon=HORIZON + ) + +logger = TensorBoardLogger('tb_log', name='test') +logger.experiment.add_graph(model, torch.rand(model.input_features, model.seq_len, model.input_features)) + +trainer = pl.Trainer(check_val_every_n_epoch=5, logger=logger, **EXTRA_HPARAMS) +trainer.fit(model, train, val) +trainer.test(model, test) diff --git a/FCRtraining/src/networks/LitFCRtestBase.py b/FCRtraining/src/networks/LitFCRtestBase.py deleted file mode 100644 index e5706e9d626d5e64468245f73bd8f9ad37339309..0000000000000000000000000000000000000000 --- a/FCRtraining/src/networks/LitFCRtestBase.py +++ /dev/null @@ -1,126 +0,0 @@ -from typing import Dict, Any, Callable -import pytorch_lightning as pl -import torch.nn as nn -import torch -import numpy as np - -from FCRtraining.src.metrics.RowAccuracy import RowAccuracy - - - -class BaseTestEncoder(pl.LightningModule): - """ - abstract base class for LightningModule, - implements validation and test loops including logging - subclass must implement criterion as loss function - """ - - def __init__( - self, - utility_fx: Callable[[Any], float] = None, - intervals=100): - ''' - creates train, and test metrics - TODO: add metric from smoteR paper - ''' - - super(BaseTestEncoder, self).__init__() - - # train metrics - self.train_rounded_accuracy = RowAccuracy() - self.train_rounded_mse = pl.metrics.MeanSquaredError() - self.train_rounded_mae = pl.metrics.MeanAbsoluteError() - self.train_mse = pl.metrics.MeanSquaredError() - self.train_mae = pl.metrics.MeanAbsoluteError() - - # test metrics - self.test_rounded_accuracy = RowAccuracy() - self.test_rounded_mse = pl.metrics.MeanSquaredError() - self.test_rounded_mae = pl.metrics.MeanAbsoluteError() - self.test_mse = pl.metrics.MeanSquaredError() - self.test_mae = pl.metrics.MeanAbsoluteError() - - # utility function - self.utility_fx = utility_fx - self.intervals = intervals - - def log_hparams(self, hparams: Dict[str, Any]): - self.hparams = hparams - - def validation_step(self, batch, batch_nb): - """ - predicts y, and calculates loss in training - """ - - x, y = batch - preds = self(x) - loss = self.criterion(preds, y) # might not be necessary - - return {'loss': loss, 'preds': preds, 'target': y} - - def validation_step_end(self, outputs): - ''' - update and log validation metrics - ''' - - rounded_preds = torch.round(outputs['preds']) - self.train_rounded_accuracy(rounded_preds, outputs['target']) - self.train_rounded_mse(rounded_preds, outputs['target']) - self.train_rounded_mae(rounded_preds, outputs['target']) - self.train_mse(outputs['preds'], outputs['target']) - self.train_mae(outputs['preds'], outputs['target']) - - self.log('validation_rounded_accuracy', self.train_rounded_accuracy, on_step=False, on_epoch=True) - self.log('validation_rounded_mse', self.train_rounded_mse, on_step=False, on_epoch=True) - self.log('validation_rounded_mae', self.train_rounded_mae, on_step=False, on_epoch=True) - self.log('validation_mse', self.train_mse, on_step=False, on_epoch=True) - self.log('validation_mae', self.train_mae, on_step=False, on_epoch=True) - - def test_step(self, batch, batch_idx): - """ - predicts y, and calculates loss in testing - """ - - x, y = batch - preds = self(x) - loss = self.criterion(preds, y) # might not be necessary - - return {'loss': loss, 'preds': preds, 'target': y} - - def test_step_end(self, outputs): - ''' - update and log test metrics - ''' - - rounded_preds = torch.round(outputs['preds']) - self.test_rounded_accuracy(rounded_preds, outputs['target']) - self.test_rounded_mse(rounded_preds, outputs['target']) - self.test_rounded_mae(rounded_preds, outputs['target']) - self.test_mse(outputs['preds'], outputs['target']) - self.test_mae(outputs['preds'], outputs['target']) - - self.log('test_rounded_accuracy', self.test_rounded_accuracy, on_step=False, on_epoch=True) - self.log('test_rounded_mse', self.test_rounded_mse, on_step=False, on_epoch=True) - self.log('test_rounded_mae', self.test_rounded_mae, on_step=False, on_epoch=True) - self.log('test_mse', self.test_mse, on_step=False, on_epoch=True) - self.log('test_mae', self.test_mae, on_step=False, on_epoch=True) - - # update util graph - for pred, targ in zip(rounded_preds, outputs['target']): - embed = torch.searchsorted(self.interval_embed, self.utility_fx(targ)) - if (pred == targ).all(): - self.util_correct[embed] += 1 - self.util_total[embed] += 1 - - def on_test_start(self): - self.interval_embed = torch.linspace(0, 1, self.intervals) - self.util_correct = torch.zeros(self.intervals) - self.util_total = torch.zeros(self.intervals) - - def on_test_end(self): - self.util_total = torch.max(self.util_total, torch.ones(len(self.util_total))) - avg_util_accur = self.util_correct / self.util_total - writer = self.logger.experiment - for i in range(self.intervals): - writer.add_scalar('avg utility accuracy', avg_util_accur[i], i) - print(avg_util_accur) diff --git a/FCRtraining/src/networks/lstm_net1.py b/FCRtraining/src/networks/lstm_net1.py deleted file mode 100644 index 450244a4d36653a58b8d751ca14bd1637350f91c..0000000000000000000000000000000000000000 --- a/FCRtraining/src/networks/lstm_net1.py +++ /dev/null @@ -1,115 +0,0 @@ -from torch.utils.data import DataLoader -from torch.optim.lr_scheduler import ReduceLROnPlateau -from sklearn.preprocessing import MinMaxScaler, StandardScaler -from inspect import signature -from typing import Dict, Any, Callable -import torch.nn.functional as F -import torch.nn as nn -import torch - -from FCRtraining.src.networks.LitFCRtestBase import BaseTestEncoder -from FCRdataLoader.src.fcrdataloader.dataset import FCRdatasetFactory - - -''' - Dont touch great performance -''' - - -HIDDEN_SIZE = 30 -BATCH_SIZE = 128 -SEQ_LEN = 30 -HORIZON = 5 -LSTM_LAYERS = 2 -INIT_LR = 0.1 - -FEATURES = 3 -OUTPUT = 6 - -class Encoder(BaseTestEncoder): - def __init__( - self, - features=FEATURES, - output=OUTPUT, - init_lr=INIT_LR, - batch_size=BATCH_SIZE, - seq_len=SEQ_LEN, - horizon=HORIZON, - hidden_size=HIDDEN_SIZE, - lstm_layers=LSTM_LAYERS, - utility_fx: Callable[[Any], float] = None, - intervals: int = 100, - extra_hparams: Dict[str, Any] = {} - ): - super(Encoder, self).__init__( - utility_fx=utility_fx, - intervals=intervals - ) - - self.output = output - self.features = features - self.seq_len = seq_len - self.horizon = horizon - self.batch_size = batch_size - self.lstm_layers = lstm_layers - self.hidden_size = hidden_size - self.extra_hparams = extra_hparams - - self.criterion = nn.MSELoss() - self.init_lr = init_lr - - self.lstm = nn.LSTM(features, hidden_size, num_layers=self.lstm_layers, - bidirectional=True, batch_first=True) - self.fc1 = nn.Linear(hidden_size * 2, output) - self.lstm_drop = nn.Dropout(p=0.3) - - # data transformation - self.data_set_factory = FCRdatasetFactory(SEQ_LEN, HORIZON) - - # log hparams - hparams = {hkey: getattr(self, hkey) for hkey in signature(self.__init__).parameters} - hparams['loss_function'] = str(self.criterion) - hparams['optimizers'] = list(map(str, self.configure_optimizers())) - self.log_hparams(hparams) - - def forward(self, x): - out, _ = self.lstm(x) - out = self.lstm_drop(out) - # out: (batch, features, hidden_size * directions) - out = out[:, -1, :] - # out: (batch, hidden_size * directions) - #out = self.fc1_drop(out) - out = self.fc1(out) - return out - - def training_step(self, batch, batch_idx): - x, y = batch - prediction = self(x) - #print(f"x = {x[0]}") - #print(f"pred = {torch.round(prediction[0])}") - #print(f"y = {y[0]}") - loss = self.criterion(prediction, y) - self.log('train_loss', loss, on_step=False, on_epoch=True) - return loss - - def configure_optimizers(self): - optimizer = torch.optim.Adam(self.parameters(), lr=self.init_lr) - scheduler = ReduceLROnPlateau( - optimizer, 'min', patience=20, verbose=True) - return { - 'optimizer': optimizer, - 'lr_scheduler': scheduler, - 'monitor': 'train_loss' - } - - def __get_scaler(self, train_dataset): - scaler_loader = DataLoader( - train_dataset, - batch_size=len(train_dataset) - ) - - scaler = MinMaxScaler(feature_range=(-10, 10)) - batch = next(iter(scaler_loader))[0].reshape(-1, 3) # fixed for fcr data - scaler.fit(batch) - - return scaler \ No newline at end of file diff --git a/FCRtraining/src/train.py b/FCRtraining/src/train.py deleted file mode 100644 index b70006b2f32d24b0bcba5f195c49e7dd998f154d..0000000000000000000000000000000000000000 --- a/FCRtraining/src/train.py +++ /dev/null @@ -1,62 +0,0 @@ -from pytorch_lightning.loggers import TensorBoardLogger -from torch.utils.data import DataLoader -import pytorch_lightning as pl -import torch - -from FCRtraining.src.utils.utility import RarityUtility -from FCRtraining.src.networks.lstm_net1 import Encoder -from FCRdataLoader.src.fcrdataloader.dataset import FCRdatasetFactory - - -HORIZON = 5 -SEQ_LEN = 6 -BATCH_SIZE = 256 - - -EXTRA_HPARAMS = { - 'gradient_clip_val': 0.8, - 'max_epochs': 10 -} - -dataset_factory = FCRdatasetFactory(seq_len=SEQ_LEN, pred_step=HORIZON) - -train = DataLoader( - dataset_factory.get_train_dataset(), - batch_size=BATCH_SIZE, - num_workers=4, - sampler=dataset_factory.get_uniform_dist_y_sampler() -) - -val = DataLoader( - dataset_factory.get_validation_dataset(), - batch_size=BATCH_SIZE, - num_workers=4 -) - -test = DataLoader( - dataset_factory.get_test_dataset(), - batch_size=BATCH_SIZE, - num_workers=4 -) - -def test_data_iter(): - dataset = dataset_factory.get_test_dataset() - for i in range(len(dataset)): - yield dataset[i][1] - -utility_fx = RarityUtility(data=test_data_iter()) - -model = Encoder( - horizon=HORIZON, - extra_hparams=EXTRA_HPARAMS, - batch_size=100, - utility_fx=utility_fx, - intervals=10 - ) - -logger = TensorBoardLogger('tb_log', name='lstm1') -logger.experiment.add_graph(model, torch.rand(model.features, model.seq_len, 3)) - -trainer = pl.Trainer(logger=logger, **EXTRA_HPARAMS) -trainer.fit(model, train, val) -trainer.test(model, test) diff --git a/FCRtraining/src/utils/LitFCRtestBase.py b/FCRtraining/src/utils/LitFCRtestBase.py new file mode 100644 index 0000000000000000000000000000000000000000..3aabd22b8de5607451fdd501b2cd8443d4a1d01e --- /dev/null +++ b/FCRtraining/src/utils/LitFCRtestBase.py @@ -0,0 +1,159 @@ +from typing import Dict, Any, Callable, List, Optional +import pytorch_lightning as pl +import torch.nn as nn +import torch + +from .colAvgError import AvgColL1Error +from .colAvgRelError import AvgColRelativeError +from .bestWorstTestPreds import BestWorstPredictions + + +class BaseTestEncoder(pl.LightningModule): + """ + abstract base class for LightningModule, + implements validation and test loops including logging + subclass must implement criterion as loss function + + input_features: + number of elements of input vectors + output_features: + number of elements of output vectors + criterion: + loss function + util_intervals: + number of bins for util based metrics + (currently ignored) + utility_fx: + utility function for elements from + network output domain. (currently + ignored) + target_labels: + labels for target value variables. + If not given defaults to "v1", "v2", ... + hparams_to_log: + all haprams that will be logged + """ + + def __init__( + self, + input_features: int, + output_features: int, + criterion: Callable[[torch.Tensor], float], + util_intervals=100, + utility_fx: Optional[Callable[[Any], float]], + target_labels: Optional[List[str]], + hparams_to_log: Dict[str, Any] = {} + ): + ''' + creates train, val and test metrics which then + can be inspected (for ex: tensorboard) + ''' + + if target_labels is not None: + assert len( + target_labels) == output_features, f"Length of target labels ({len(target_labels)}) must eq output_features ({output_features})" + else: + target_labels = [f"v{i}" for i in range(output_features)] + + super(BaseTestEncoder, self).__init__() + + self.labes = target_labels + self.input_features = input_features + self.output_features = output_features + self.criterion = criterion + + # log chosen hparams + self.log_hparams(hparams_to_log) + + # train metrics + self.train_mse = pl.metrics.MeanSquaredError() + self.train_mae = pl.metrics.MeanAbsoluteError() + + # test metrics + self.test_mse = pl.metrics.MeanSquaredError() + self.test_mae = pl.metrics.MeanAbsoluteError() + + # utility function + self.utility_fx = utility_fx + self.util_intervals = util_intervals + + # col avg errors + self.colAvgL1Err = AvgColL1Error(target_labels) + self.colAvgRelErr = AvgColRelativeError(target_labels) + + # best worst prediction examples + self.bwPredExamples = BestWorstPredictions(self.criterion) + + def log_hparams(self, hparams: Dict[str, Any]): + self.hparams = hparams + + def validation_step(self, batch, batch_nb): + """ + predicts y, and calculates loss in training + """ + + x, y = batch + preds = self(x) + loss = self.criterion(preds, y) # might not be necessary + + return {'loss': loss, 'preds': preds, 'target': y} + + def validation_step_end(self, outputs): + ''' + update and log validation metrics + ''' + + self.train_mse(outputs['preds'], outputs['target']) + self.train_mae(outputs['preds'], outputs['target']) + + self.log('validation_mse', self.train_mse, + on_step=False, on_epoch=True) + self.log('validation_mae', self.train_mae, + on_step=False, on_epoch=True) + + def test_step(self, batch, batch_idx): + """ + predicts y, and calculates loss in testing + """ + + x, y = batch + preds = self(x) + loss = self.criterion(preds, y) # might not be necessary + self.bwPredExamples.update_on_test_step(x, preds, y) + + return {'loss': loss, 'preds': preds, 'target': y} + + def test_step_end(self, outputs): + ''' + update and log test metrics + ''' + + self.test_mse(outputs['preds'], outputs['target']) + self.test_mae(outputs['preds'], outputs['target']) + + self.log('test_mse', self.test_mse, on_step=False, on_epoch=True) + self.log('test_mae', self.test_mae, on_step=False, on_epoch=True) + + # update col avg error + self.colAvgL1Err.update_after_test_step( + outputs['preds'], outputs['target']) + self.colAvgRelErr.update_after_test_step( + outputs['preds'], outputs['target']) + + def on_test_end(self): + writer = self.logger.experiment + + # show col avg error figure + writer.add_figure("avg L1 col error", self.colAvgL1Err.get_fig) + writer.add_figure("avg rel col error", + self.colAvgRelErr.get_fig) + writer.add_text( + f"best {self.bwPredExamples.get_keeped_best} data examples", + self.bwPredExamples.get_best_examples_str + ) + + writer.add_text( + f"worst {self.bwPredExamples.get_keeped_worst} data examples", + self.bwPredExamples.get_worst_examples_str + ) + diff --git a/FCRtraining/src/utils/bestWorstTestPreds.py b/FCRtraining/src/utils/bestWorstTestPreds.py new file mode 100644 index 0000000000000000000000000000000000000000..78fe541f5793d4f2b6e63fe40cf6e93edd382636 --- /dev/null +++ b/FCRtraining/src/utils/bestWorstTestPreds.py @@ -0,0 +1,144 @@ +import torch +import heapq +from typing import Callable + + +class InputTargetPred: + ''' + simple wrapper for: + input data for single network prediction + single network prediction for that data + target value for that prediction + error of target value and prediction + + also provides less then relation based + on target value & prediction error + + can be reversed to for symmetrical flip of less then relation + (for example when used in max heap) + ''' + + flipped: bool + error: float + prediction: torch.Tensor + target: torch.Tensor + + def __init__( + self, + inputdata: torch.Tensor, # of shape [in_features, seq_len] + prediciton: torch.Tensor, # of shape [out_features,] + target: torch.Tensor, # of shape [out_features,] + error: float, + flipped: bool = False, + precision: int = 3 # decimal precision for all values when printing + ): + self.inputdata = inputdata + self.prediciton = prediciton + self.target = target + self.error = error + self.flipped = flipped + self.precision = precision + + def __lt__(self, other): + lt = self.error < other.error + return lt if not self.flipped else not lt + + def __format_float(self, numb: float) -> str: + return format(numb, f'.{self.precision}f') + + def __str_tensor(self, tens: torch.Tensor) -> str: + ''' string repr of torch tensor of shape (n,) ''' + return "[" + ', '.join(map(self.__format_float, tens)) + "]" + + def __str__(self): + outstr = 'input:\n\n' + outstr += '\n\n'.join(map(lambda r: "\t\t" + self.__str_tensor(r), self.inputdata)) + outstr += '\n\n' + outstr += f'target:\n\n\t\t{self.__str_tensor(self.target)}\n\n' + outstr += f'prediciton:\n\n\t\t{self.__str_tensor(self.prediciton)}\n\n' + outstr += f'error: {self.__format_float(self.error)}\n\n' + return outstr + + +class BestWorstPredictions: + ''' + collects predictions and target values + and keeps in memory some of best and worst + predictions, which then it logs in decreasingly + sorted by given criterion order + ''' + + __best: heapq + __worst: heapq + + def __init__( + self, + criterion: Callable[[torch.Tensor, torch.Tensor], float], + keep_best=3, + keep_worst=3 + ): + self.__best = [] # min heap + self.__worst = [] # min heap + self.__criterion = criterion + self.__keep_best = keep_best + self.__keep_worst = keep_worst + + def update_on_test_step( + self, + inputdata: torch.Tensor, + prediciton: torch.Tensor, + target: torch.Tensor + ): + for indata, pred, targ in zip(inputdata, prediciton, target): + targPred = InputTargetPred( + inputdata=indata, + prediciton=pred, + target=targ, + error=self.__criterion(pred, targ), + ) + + targPredFlipped = InputTargetPred( + inputdata=indata, + prediciton=pred, + target=targ, + error=self.__criterion(pred, targ), + flipped=True + ) + + if len(self.__best) < self.__keep_best: + heapq.heappush(self.__best, targPredFlipped) + else: + heapq.heappushpop(self.__best, targPredFlipped) + + if len(self.__worst) < self.__keep_worst: + heapq.heappush(self.__worst, targPred) + else: + heapq.heappushpop(self.__worst, targPred) + + @property + def get_best_examples_str(self) -> str: + ''' + returns summary of best prediction & target pairs + in decreasing order of prediction accuracy + ''' + + title = lambda i: "\n\n" + f"EXAMPLE {i+1}".center(20, "=") + "\n\n" + return ('\n\n\n\n').join(map(lambda i: title(i[0]) + str(i[1]), enumerate(self.__best))) + + @property + def get_worst_examples_str(self) -> str: + ''' + returns summary of worst prediction & target pairs + in increasing order of prediction accuracy + ''' + + title = lambda i: "\n\n" + f"EXAMPLE {i+1}".center(20, "=") + "\n\n" + return ('\n\n\n\n').join(map(lambda i: title(i[0]) + str(i[1]), enumerate(self.__worst))) + + @property + def get_keeped_worst(self) -> int: + return self.__keep_worst + + @property + def get_keeped_best(self) -> int: + return self.__keep_best diff --git a/FCRtraining/src/utils/colAvgError.py b/FCRtraining/src/utils/colAvgError.py new file mode 100644 index 0000000000000000000000000000000000000000..a9fd7fa7740e24f545758066a0629598df6ca72b --- /dev/null +++ b/FCRtraining/src/utils/colAvgError.py @@ -0,0 +1,95 @@ +from typing import List +import matplotlib.pyplot as plt +import torch + + +def autolabel(rects): + for rect in rects: + # Get X and Y placement of label from rect. + x_value = rect.get_width() + y_value = rect.get_y() + rect.get_height() / 2 + + # Number of points between bar and label. Change to your liking. + space = 5 + # Vertical alignment for positive values + ha = 'left' + + # If value of bar is negative: Place label left of bar + if x_value < 0: + # Invert space to place label to the left + space *= -1 + # Horizontally align label at right + ha = 'right' + + # Use X value as label and format number with one decimal place + label = "{:.1f}".format(x_value) + + # Create annotation + plt.annotate( + label, # Use `label` as label + (x_value, y_value), # Place label at end of the bar + xytext=(space, 0), # Horizontally shift label by `space` + textcoords="offset points", # Interpret `xytext` as offset in points + va='center', # Vertically center label + ha=ha) # Horizontally align label differently for + # positive and negative values. + + +class AvgColL1Error: + ''' + based on collected network prediction and + target values returns pyplot fig representing + average L1 error for each target variable + ''' + + __avg_errors: torch.Tensor + __min_errors: torch.Tensor + __max_errors: torch.Tensor + __labels: List[str] + __bar_width: float + + def __init__( + self, + labels: List[str], + bar_width=1 + ): + self.row_numb = 1 + self.__labels = labels + self.__avg_errors = torch.zeros(len(labels)) + self.__max_errors = torch.ones(len(labels)) * float('-inf') + self.__min_errors = torch.ones(len(labels)) * float('inf') + + # chart display conf + self.__bar_width = bar_width + + def update_after_test_step( + self, + predictions: torch.Tensor, + target: torch.Tensor + ): + + err = torch.abs(predictions - target) + for row in err: + self.__avg_errors += (row - self.__avg_errors) / self.row_numb + self.__max_errors = torch.max(self.__max_errors, row) + self.__min_errors = torch.min(self.__min_errors, row) + self.row_numb += 1 + + @property + def get_fig(self): + w = self.__bar_width + n_labels = len(self.__labels) + x = torch.arange(n_labels) * 4 * max(1, w) + fig, ax = plt.subplots(figsize=(n_labels, n_labels)) + b1 = ax.barh(x-w, self.__max_errors, w, label='max') + b2 = ax.barh(x, self.__avg_errors, w, label='avg') + b3 = ax.barh(x+w, self.__min_errors, w, label='min') + ax.set_yticks(x) + ax.set_yticklabels(self.__labels) + ax.legend() + + autolabel(b1) + autolabel(b2) + autolabel(b3) + + return fig diff --git a/FCRtraining/src/utils/colAvgRelError.py b/FCRtraining/src/utils/colAvgRelError.py new file mode 100644 index 0000000000000000000000000000000000000000..e756601f00dc91e38eba71d9be20a8bba789d464 --- /dev/null +++ b/FCRtraining/src/utils/colAvgRelError.py @@ -0,0 +1,74 @@ +from typing import List +import matplotlib.pyplot as plt +import torch + +from .colAvgError import autolabel + + +class AvgColRelativeError: + ''' + based on collected network prediction and + target values returns pyplot fig representing + relative error for each target variable + ''' + + __avg_errors: torch.Tensor + __min_errors: torch.Tensor + __max_errors: torch.Tensor + __labels: List[str] + + def __init__(self, + labels: List[str], + bar_width = 1 + ): + self.row_numb = 1 + self.__labels = labels + self.__avg_errors = torch.zeros(len(labels)) + self.__max_errors = torch.ones(len(labels)) * float('-inf') + self.__min_errors = torch.ones(len(labels)) * float('inf') + + # chart display conf + self.__bar_width = bar_width + + # for torch reasons + self.__one = torch.ones(1, dtype=torch.float32) + + def err( + self, + prediction: torch.Tensor, + target: torch.Tensor + ): + return torch.abs(prediction - target) / torch.where(target == 0, self.__one, target) + + + def update_after_test_step( + self, + predictions: torch.Tensor, + target: torch.Tensor + ): + + err = self.err(predictions, target) + for row in err: + self.__avg_errors += (row - self.__avg_errors) / self.row_numb + self.__min_errors = torch.min(self.__min_errors, row) + self.__max_errors = torch.max(self.__max_errors, row) + self.row_numb += 1 + + @property + def get_fig(self): + w = self.__bar_width + n_labels = len(self.__labels) + x = torch.arange(n_labels) * 4 * max(1, w) + fig, ax = plt.subplots(figsize=(n_labels, n_labels)) + b1 = ax.barh(x-w, self.__max_errors, w, label='max') + b2 = ax.barh(x, self.__avg_errors, w, label='avg') + b3 = ax.barh(x+w, self.__min_errors, w, label='min') + ax.set_yticks(x) + ax.set_yticklabels(self.__labels) + ax.legend() + + autolabel(b1) + autolabel(b2) + autolabel(b3) + + return fig diff --git a/FCRtraining/src/utils/utility.py b/FCRtraining/src/utils/rarityUtility.py similarity index 100% rename from FCRtraining/src/utils/utility.py rename to FCRtraining/src/utils/rarityUtility.py