diff --git a/FCRtraining/src/models/lstm_net/dataloader.py b/FCRtraining/src/models/lstm_net/dataloader.py index 31aa0d5b24a2a20df548c5c678c7f5dc002a6d19..adec2211b7f8fccfffab75ed8a18a9a2e69fd5c7 100644 --- a/FCRtraining/src/models/lstm_net/dataloader.py +++ b/FCRtraining/src/models/lstm_net/dataloader.py @@ -1,5 +1,6 @@ from FCRdataLoader.src.fcrdataloader.dataset import SequenceForecastMultiDistributionDatasetFactory as SFMDD from typing import Callable +from pathlib import Path class FCRDataSetV2Factory(SFMDD): @@ -16,8 +17,8 @@ class FCRDataSetV2Factory(SFMDD): super().__init__( seq_len=seq_len, pred_step=horizon, - transforms=x_transforms, - file="", + x_transforms=x_transforms, + file=Path(""), usecols=[0]+list(range(2, 17)), experiment_id_col=0, x_y_split=3, diff --git a/FCRtraining/src/models/lstm_net/network.py b/FCRtraining/src/models/lstm_net/network.py index 46de52837b019bac4f864f7d191df898c60b4b97..72de2ba753ae1f426d5626fa2a7f56c185936ee8 100644 --- a/FCRtraining/src/models/lstm_net/network.py +++ b/FCRtraining/src/models/lstm_net/network.py @@ -15,10 +15,10 @@ from FCRtraining.src.utils.LitFCRtestBase import BaseTestEncoder Dont touch great performance ''' -HIDDEN_SIZE = 30 +HIDDEN_SIZE = 40 BATCH_SIZE = 256 SEQ_LEN = 30 -HORIZON = 6 +HORIZON = 5 LSTM_LAYERS = 1 INIT_LR = 0.01 @@ -37,13 +37,15 @@ class Encoder(BaseTestEncoder): self, input_features=FEATURES, output_features=OUTPUT, - criterion=nn.MSELoss(), + criterion=nn.L1Loss(), init_lr=INIT_LR, batch_size=BATCH_SIZE, seq_len=SEQ_LEN, horizon=HORIZON, hidden_size=HIDDEN_SIZE, lstm_layers=LSTM_LAYERS, + utility_fx: Optional[Callable[[Any], float]] = None, + util_intervals: int = 100, target_labels=labels, hparams_to_log: Dict[str, Any] = {} ): diff --git a/FCRtraining/src/models/lstm_net/train.py b/FCRtraining/src/models/lstm_net/train.py index 54c5c86ff77bef2841a2e6c212a04818bf961b0c..553025a924a1dc6639522bd4cf0cb59d57cf015f 100644 --- a/FCRtraining/src/models/lstm_net/train.py +++ b/FCRtraining/src/models/lstm_net/train.py @@ -17,7 +17,7 @@ FEATURES_OUT = 12 EXTRA_HPARAMS = { 'gradient_clip_val': 1, - 'max_epochs': 100 + 'max_epochs': 120 } dataset_factory = FCRDataSetV2Factory(seq_len=SEQ_LEN, horizon=HORIZON) @@ -44,10 +44,17 @@ test = DataLoader( model = Encoder( # currently no hparams to log input_features=FEATURES_IN, output_features=FEATURES_OUT, - horizon=HORIZON + horizon=HORIZON, + hidden_size=50, + hparams_to_log={ + 'HORIZON': HORIZON, + 'gradient_clip_val': EXTRA_HPARAMS['gradient_clip_val'], + 'max_epochs': EXTRA_HPARAMS['max_epochs'], + 'hidden_size': 40 + } ) -logger = TensorBoardLogger('tb_log', name='test') +logger = TensorBoardLogger('tb_log', name='lstm_no_scaling') logger.experiment.add_graph(model, torch.rand(model.input_features, model.seq_len, model.input_features)) trainer = pl.Trainer(check_val_every_n_epoch=5, logger=logger, **EXTRA_HPARAMS) diff --git a/FCRtraining/src/models/lstm_scale/LitFCRtestBase.py b/FCRtraining/src/models/lstm_scale/LitFCRtestBase.py new file mode 100644 index 0000000000000000000000000000000000000000..44d4c60c8591ed5ef01fa568e0c19b9b914970d0 --- /dev/null +++ b/FCRtraining/src/models/lstm_scale/LitFCRtestBase.py @@ -0,0 +1,167 @@ +from typing import Dict, Any, Callable, List, Optional +import pytorch_lightning as pl +import torch.nn as nn +import torch + +from FCRtraining.src.utils.colAvgError import AvgColL1Error +from FCRtraining.src.utils.colAvgRelError import AvgColRelativeError +from FCRtraining.src.utils.bestWorstTestPreds import BestWorstPredictions + +class BaseTestEncoder(pl.LightningModule): + """ + abstract base class for LightningModule, + implements validation and test loops including logging + subclass must implement criterion as loss function + + input_features: + number of elements of input vectors + output_features: + number of elements of output vectors + criterion: + loss function + util_intervals: + number of bins for util based metrics + (currently ignored) + utility_fx: + utility function for elements from + network output domain. (currently + ignored) + target_labels: + labels for target value variables. + If not given defaults to "v1", "v2", ... + hparams_to_log: + all haprams that will be logged + """ + + def __init__( + self, + input_features: int, + output_features: int, + criterion: Callable[[torch.Tensor], float], + util_intervals=100, + utility_fx: Optional[Callable[[Any], float]] = None, + target_labels: Optional[List[str]] = None, + hparams_to_log: Dict[str, Any] = {}, + scaler = None + ): + ''' + creates train, val and test metrics which then + can be inspected (for ex: tensorboard) + ''' + + if target_labels is not None: + assert len( + target_labels) == output_features, f"Length of target labels ({len(target_labels)}) must eq output_features ({output_features})" + else: + target_labels = [f"v{i}" for i in range(output_features)] + + super(BaseTestEncoder, self).__init__() + + self.labes = target_labels + self.input_features = input_features + self.output_features = output_features + self.criterion = criterion + self.scaler = scaler + + # log chosen hparams + self.log_hparams(hparams_to_log) + + # train metrics + self.train_mse = pl.metrics.MeanSquaredError() + self.train_mae = pl.metrics.MeanAbsoluteError() + + # test metrics + self.test_mse = pl.metrics.MeanSquaredError() + self.test_mae = pl.metrics.MeanAbsoluteError() + + # utility function + self.utility_fx = utility_fx + self.util_intervals = util_intervals + + # col avg errors + self.colAvgL1Err = AvgColL1Error(target_labels) + self.colAvgRelErr = AvgColRelativeError(target_labels) + + # best worst prediction examples + self.bwPredExamples = BestWorstPredictions(nn.functional.l1_loss) + + def __unscaled_target(self, y: torch.Tensor) -> torch.Tensor: + return torch.from_numpy(self.scaler.inverse_transform(y)).float() + + def log_hparams(self, hparams: Dict[str, Any]): + self.hparams = hparams + + def validation_step(self, batch, batch_nb): + """ + predicts y, and calculates loss in training + """ + + x, y = batch + preds = self(x) + + preds_us = self.__unscaled_target(preds) + loss = self.criterion(preds_us, y) # might not be necessary + + return {'loss': loss, 'preds': preds_us, 'target': y} + + def validation_step_end(self, outputs): + ''' + update and log validation metrics + ''' + + self.train_mse(outputs['preds'], outputs['target']) + self.train_mae(outputs['preds'], outputs['target']) + + self.log('validation_mse', self.train_mse, + on_step=False, on_epoch=True) + self.log('validation_mae', self.train_mae, + on_step=False, on_epoch=True) + + def test_step(self, batch, batch_idx): + """ + predicts y, and calculates loss in testing + """ + + x, y = batch + preds = self(x) + preds_us = self.__unscaled_target(preds) + + loss = self.criterion(preds_us, y) # might not be necessary + self.bwPredExamples.update_on_test_step(x, preds_us, y) + + return {'loss': loss, 'preds': preds_us, 'target': y} + def test_step_end(self, outputs): + ''' + update and log test metrics + ''' + + self.test_mse(outputs['preds'], outputs['target']) + self.test_mae(outputs['preds'], outputs['target']) + + self.log('test_mse', self.test_mse, on_step=False, on_epoch=True) + self.log('test_mae', self.test_mae, on_step=False, on_epoch=True) + + # update col avg error + self.colAvgL1Err.update_after_test_step( + outputs['preds'], outputs['target']) + self.colAvgRelErr.update_after_test_step( + outputs['preds'], outputs['target']) + + def on_test_end(self): + writer = self.logger.experiment + + # show col avg error figure + writer.add_figure("avg L1 col error", self.colAvgL1Err.get_fig) + writer.add_figure("avg rel col error", + self.colAvgRelErr.get_fig) + writer.add_text( + f"best {self.bwPredExamples.get_keeped_best} data examples", + self.bwPredExamples.get_best_examples_str + ) + + writer.add_text( + f"worst {self.bwPredExamples.get_keeped_worst} data examples", + self.bwPredExamples.get_worst_examples_str + ) + + diff --git a/FCRtraining/src/models/lstm_scale/__init__.py b/FCRtraining/src/models/lstm_scale/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/FCRtraining/src/models/lstm_scale/dataloader.py b/FCRtraining/src/models/lstm_scale/dataloader.py new file mode 100644 index 0000000000000000000000000000000000000000..e38b4c7d9a209aef6e4cb4e10488f16e386739ea --- /dev/null +++ b/FCRtraining/src/models/lstm_scale/dataloader.py @@ -0,0 +1,26 @@ +from FCRdataLoader.src.fcrdataloader.dataset import SequenceForecastMultiDistributionDatasetFactory as SFMDD +from typing import Callable +from pathlib import Path + + +class FCRDataSetV2Factory(SFMDD): + ''' + data set factory for secure-document data version 2, localy data from set2 + ''' + + def __init__( + self, + seq_len: int, + horizon: int, + x_transforms=None + ): + super().__init__( + seq_len=seq_len, + pred_step=horizon, + x_transforms=x_transforms, + file=Path("/home/szysad/mimuw/3rok/ZPP/training-data/FCR-data/set1/combined.csv"), + usecols=[0]+list(range(2, 17)), + experiment_id_col=0, + x_y_split=3, + x_predictions_cols=[2], + ) diff --git a/FCRtraining/src/models/lstm_scale/network.py b/FCRtraining/src/models/lstm_scale/network.py new file mode 100644 index 0000000000000000000000000000000000000000..c621ff37356efaef842fa501d6cafe3e7e852aef --- /dev/null +++ b/FCRtraining/src/models/lstm_scale/network.py @@ -0,0 +1,112 @@ +''' + lstm network trained with data from set1 +''' + +from torch.optim.lr_scheduler import ReduceLROnPlateau +from inspect import signature +from typing import Dict, Any, Callable, Optional +import torch.nn.functional as F +import torch.nn as nn +import torch + +from .LitFCRtestBase import BaseTestEncoder + +''' + Dont touch great performance +''' + +HIDDEN_SIZE = 30 +BATCH_SIZE = 256 +SEQ_LEN = 30 +HORIZON = 5 +LSTM_LAYERS = 1 +INIT_LR = 0.01 + +FEATURES = 3 +OUTPUT = 12 + + +labels = ("LBStorage,DBCardinality,LBCardinality," + "LBRam,provider_Component_LB,AppStorage," + "AppCardinality,AppCores,provider_Component_App," + "DBCores,DBStorage,provider_Component_DB").split(',') + + +class Encoder(BaseTestEncoder): + def __init__( + self, + input_features=FEATURES, + output_features=OUTPUT, + criterion=nn.L1Loss(), + init_lr=INIT_LR, + batch_size=BATCH_SIZE, + seq_len=SEQ_LEN, + horizon=HORIZON, + hidden_size=HIDDEN_SIZE, + lstm_layers=LSTM_LAYERS, + utility_fx: Optional[Callable[[Any], float]] = None, + util_intervals: int = 100, + target_labels=labels, + hparams_to_log: Dict[str, Any] = {}, + scaler=None + ): + super(Encoder, self).__init__( + utility_fx=utility_fx, + util_intervals=util_intervals, + criterion=criterion, + input_features=input_features, + output_features=output_features, + target_labels=target_labels, + hparams_to_log=hparams_to_log, + scaler=scaler + ) + + self.seq_len = seq_len + self.horizon = horizon + self.batch_size = batch_size + self.lstm_layers = lstm_layers + self.hidden_size = hidden_size + self.fc2_size = hidden_size + + self.init_lr = init_lr + + self.relu = nn.LeakyReLU() + self.lstm = nn.LSTM(input_features, hidden_size, num_layers=self.lstm_layers, + bidirectional=True, batch_first=True) + self.fc1 = nn.Linear(hidden_size * 2, self.fc2_size) + self.fc2 = nn.Linear(self.fc2_size, self.output_features) + self.lstm_drop = nn.Dropout(p=0.2) + self.fc1_drop = nn.Dropout(p=0.5) + + def forward(self, x): + out, _ = self.lstm(x) + # out: (batch, input_features, hidden_size * directions) + out = self.lstm_drop(out[:, -1, :]) + # out: (batch, hidden_size * directions) + out = self.relu(out) + out = self.fc1(out) + out = self.relu(out) + out = self.fc1_drop(out) + #out = self.fc1_drop(out) + out = self.fc2(out) + return out + + def training_step(self, batch, batch_idx): + x, y = batch + scaled_y = torch.from_numpy(self.scaler.transform(y)).float() + prediction = self(x) + #print(f"x = {x[0]}") + #print(f"pred = {torch.round(prediction[0])}") + #print(f"y = {y[0]}") + loss = self.criterion(prediction, scaled_y) + self.log('train_loss', loss, on_step=False, on_epoch=True) + return loss + + def configure_optimizers(self): + optimizer = torch.optim.Adam(self.parameters(), lr=self.init_lr) + scheduler = ReduceLROnPlateau(optimizer, 'min', patience=10, verbose=True) + return { + 'optimizer': optimizer, + 'lr_scheduler': scheduler, + 'monitor': 'train_loss' + } diff --git a/FCRtraining/src/models/lstm_scale/train.py b/FCRtraining/src/models/lstm_scale/train.py new file mode 100644 index 0000000000000000000000000000000000000000..69d3f621804cf4fb4c1322ad3e7cc274e7f0def8 --- /dev/null +++ b/FCRtraining/src/models/lstm_scale/train.py @@ -0,0 +1,70 @@ +from pytorch_lightning.loggers import TensorBoardLogger +from torch.utils.data import DataLoader +from sklearn.preprocessing import MaxAbsScaler, RobustScaler +import pytorch_lightning as pl +import torch + +from .network import Encoder +from .dataloader import FCRDataSetV2Factory + + +HORIZON = 5 +SEQ_LEN = 30 +BATCH_SIZE = 256 +FEATURES_IN = 3 +FEATURES_OUT = 12 + + +EXTRA_HPARAMS = { + 'gradient_clip_val': 0, + 'max_epochs': 100 +} + +x_scaler = None +y_scaler = MaxAbsScaler() + +dataset_factory = FCRDataSetV2Factory(seq_len=SEQ_LEN, horizon=HORIZON, x_transforms=x_scaler) + +y_scaler.fit(dataset_factory.get_train_target_vals()) + +train = DataLoader( + dataset_factory.get_train_dataset(), + batch_size=BATCH_SIZE, + num_workers=4, + sampler=dataset_factory.get_uniform_dist_y_sampler() +) + +val = DataLoader( + dataset_factory.get_validation_dataset(), + batch_size=BATCH_SIZE, + num_workers=4 +) + +test = DataLoader( + dataset_factory.get_test_dataset(), + batch_size=BATCH_SIZE, + num_workers=4 +) + +model = Encoder( # currently no hparams to log + input_features=FEATURES_IN, + output_features=FEATURES_OUT, + horizon=HORIZON, + batch_size=256, + hidden_size=40, + scaler=y_scaler, + hparams_to_log={ + 'HORIZON': HORIZON, + 'x_scaler': x_scaler, + 'y_scaler': y_scaler, + 'gradient_clip_val': EXTRA_HPARAMS['gradient_clip_val'], + 'hidden_size': 40 + } + ) + +logger = TensorBoardLogger('tb_log', name='scaled_lstm') +logger.experiment.add_graph(model, torch.rand(model.input_features, model.seq_len, model.input_features)) + +trainer = pl.Trainer(fast_dev_run=False, check_val_every_n_epoch=5, logger=logger, **EXTRA_HPARAMS) +trainer.fit(model, train, val) +trainer.test(model, test) diff --git a/FCRtraining/src/utils/bestWorstTestPreds.py b/FCRtraining/src/utils/bestWorstTestPreds.py index 78fe541f5793d4f2b6e63fe40cf6e93edd382636..922f23dbfa9a10493741de093c6bb52554a6d3a1 100644 --- a/FCRtraining/src/utils/bestWorstTestPreds.py +++ b/FCRtraining/src/utils/bestWorstTestPreds.py @@ -89,12 +89,13 @@ class BestWorstPredictions: prediciton: torch.Tensor, target: torch.Tensor ): + for indata, pred, targ in zip(inputdata, prediciton, target): targPred = InputTargetPred( inputdata=indata, prediciton=pred, target=targ, - error=self.__criterion(pred, targ), + error=self.__criterion(pred, targ) ) targPredFlipped = InputTargetPred(