From 4504817700d806b4b2961e5e9d12698be291c64e Mon Sep 17 00:00:00 2001
From: jkk <jk394387@students.mimuw.edu.pl>
Date: Sat, 27 Feb 2021 22:16:33 +0100
Subject: [PATCH 1/8] Data aggregator prototype

---
 FCRgendata/config.json                        |  17 ---
 FCRgendata/cpsolver_config.json               |  15 +++
 FCRgendata/data_config.json                   |  26 ++++
 FCRgendata/src/FCRGenData/config_schemas.py   |  83 +++++++++++++
 FCRgendata/src/FCRGenData/data_aggregator.py  | 111 ++++++++++++++++++
 FCRgendata/src/FCRGenData/interpolator.py     |  13 ++
 .../FCRGenData/rawDataReader/csv_reader.py    |  10 +-
 .../rawDataReader/raw_data_reader.py          |  12 +-
 FCRgendata/src/FCRGenData/validate_config.py  |  38 +-----
 9 files changed, 273 insertions(+), 52 deletions(-)
 delete mode 100644 FCRgendata/config.json
 create mode 100644 FCRgendata/cpsolver_config.json
 create mode 100644 FCRgendata/data_config.json
 create mode 100644 FCRgendata/src/FCRGenData/config_schemas.py
 create mode 100644 FCRgendata/src/FCRGenData/data_aggregator.py
 create mode 100644 FCRgendata/src/FCRGenData/interpolator.py

diff --git a/FCRgendata/config.json b/FCRgendata/config.json
deleted file mode 100644
index c9a0b27..0000000
--- a/FCRgendata/config.json
+++ /dev/null
@@ -1,17 +0,0 @@
-{
-    "request": {
-        "applicationId": "FCRwithDLMSApp",
-        "camelModelFilePath" : "/home/szysad/mimuw/3rok/ZPP/FCR-problems/FRC-dane-treningowe/FCR-model.xmi",
-        "cpProblemFilePath": "/home/szysad/mimuw/3rok/ZPP/FCR-problems/FRC-dane-treningowe/FCR-CP.xmi",
-        "nodeCandidatesFilePath": "/home/szysad/mimuw/3rok/ZPP/FCR-problems/FRC-dane-treningowe/FCR-NodeCandidates",
-        "watermark": {
-            "user": "mrozanska",
-            "system": "UI",
-            "date": "2017-11-23T16: 41: 41+0000",
-            "uuid": "fb6280ec-1ab8-11e7-93ae-92361f002671"
-            }
-        },
-    "cpSolverHost": "localhost:8080",
-    "outpath": "generated_data.csv",
-    "AvgResponseTimeTableFilePath": "/home/szysad/mimuw/3rok/ZPP/time-series-data/time-series-data/secure-document​/deployment-reconfiguration-range-2-to-2/2020-10-30 to 2020-10-30/V 1.0 - raw data/AvgResponseTimeTable.csv"
-}
\ No newline at end of file
diff --git a/FCRgendata/cpsolver_config.json b/FCRgendata/cpsolver_config.json
new file mode 100644
index 0000000..890e098
--- /dev/null
+++ b/FCRgendata/cpsolver_config.json
@@ -0,0 +1,15 @@
+{
+  "request": {
+    "applicationId": "FCRwithDLMSApp",
+    "camelModelFilePath": "/home/szysad/mimuw/3rok/ZPP/FCR-problems/FRC-dane-treningowe/FCR-model.xmi",
+    "cpProblemFilePath": "/home/szysad/mimuw/3rok/ZPP/FCR-problems/FRC-dane-treningowe/FCR-CP.xmi",
+    "nodeCandidatesFilePath": "/home/szysad/mimuw/3rok/ZPP/FCR-problems/FRC-dane-treningowe/FCR-NodeCandidates",
+    "watermark": {
+      "user": "mrozanska",
+      "system": "UI",
+      "date": "2017-11-23T16: 41: 41+0000",
+      "uuid": "fb6280ec-1ab8-11e7-93ae-92361f002671"
+    }
+  },
+  "cpSolverHost": "localhost:8080"
+}
\ No newline at end of file
diff --git a/FCRgendata/data_config.json b/FCRgendata/data_config.json
new file mode 100644
index 0000000..f563394
--- /dev/null
+++ b/FCRgendata/data_config.json
@@ -0,0 +1,26 @@
+{
+    "outpath": "/home/szysad/mimuw/3rok/ZPP/my-time-series/FCR-time-series/output/generated-data4.csv",
+    "datasources": [
+        {
+            "desc": "desc <optinal>",
+            "files": [
+                {
+                    "desc": "desc",
+                    "path": "<path>"
+                }
+            ],
+            "timestamp_col": "timestamp",
+            "values": [
+                {
+                    "colname": "value",
+                    "alias": "valiue1",
+                    "cumulation_period": 60
+                }
+            ],
+            "time_root": true
+        }
+    ],
+    "aggregated_columns": ["value1", "value2", "value3"],
+    "timedelta": 10,
+    "max_time_gap": 100
+}
\ No newline at end of file
diff --git a/FCRgendata/src/FCRGenData/config_schemas.py b/FCRgendata/src/FCRGenData/config_schemas.py
new file mode 100644
index 0000000..9dfcdd5
--- /dev/null
+++ b/FCRgendata/src/FCRGenData/config_schemas.py
@@ -0,0 +1,83 @@
+
+CPSOLVER_CONFIG_SCHEMA = {
+    "type": "object",
+    "properties": {
+        "request": {
+            "type": "object",
+            "properties": {
+                "applicationId": {"type": "string"},
+                "camelModelFilePath": {"type": "filepath"},
+                "cpProblemFilePath":  {"type": "filepath"},
+                "nodeCandidatesFilePath":  {"type": "filepath"},
+                "watermark": {
+                    "type": "object",
+                    "properties": {
+                        "user":  {"type": "string"},
+                        "system": {"type": "string"},
+                        "date":  {"type": "string"},
+                        "uuid":  {"type": "string"},
+                    },
+                    "minProperties": 4
+                }
+            },
+            "minProperties": 5
+        },
+        "cpSolverHost": {"type": "string"},  # for example "localhost:8080"
+    },
+    "required": ["request", "cpSolverHost"]
+}
+
+
+DATA_CONFIG_SCHEMA = {
+    "type": "object",
+    "properties": {
+        "outpath": {"type": "dirpath"},
+        "datasources": {
+            "type": "array",
+            "minItems": 1,
+            "items": {
+                "type": "object",
+                "properties": {
+                    "desc": {"type": "string"},
+                    "source":
+                        {
+                            "type": "object",
+                            "properties": {
+                                "type": {"enum": ["csv"]},
+                                "path": {"type": "filepath"}
+                            },
+                            "required": ["type", "path"]
+                        },
+                    "timestamp_column": {"type": "string"},
+                    "values": {
+                        "type": "array",
+                        "minItems": 1,
+                        "items": {
+                            "type": "object",
+                            "properties": {
+                                "column_name": {"type": "string"},
+                                "alias": {"type": "string"},
+                                "cumulation_period": {"type": "int"}
+                            },
+                            "required": ["column_name"]
+                        }
+                    },
+                    "time_root":
+                        {"type": "bool"}
+                },
+                "required": ["desc", "source", "values"]
+            }
+        },
+        "aggregated_columns":
+            {
+                "type": "array",
+                "minItems": 1,
+                "items": {"type": "string"}
+            },
+        "timedelta":
+            {"type": "int"},
+        "max_time_gap":
+            {"type": "int"}
+    },
+    "required": ["outpath", "datafiles", "aggregated_columns", "timedelta", "max_time_gap"]
+}
diff --git a/FCRgendata/src/FCRGenData/data_aggregator.py b/FCRgendata/src/FCRGenData/data_aggregator.py
new file mode 100644
index 0000000..2f2a7e8
--- /dev/null
+++ b/FCRgendata/src/FCRGenData/data_aggregator.py
@@ -0,0 +1,111 @@
+import datetime as dt
+import json
+import logging
+from pathlib import Path
+from typing import Union, Optional, Dict, List, Tuple, Mapping, Generator
+
+from FCRGenData.config_schemas import DATA_CONFIG_SCHEMA
+from FCRGenData.interpolator import InterpolatedDataStream
+from FCRGenData.rawDataReader import RawCSVReader, IRawDataProvider
+from FCRGenData.validate_config import validate_config
+
+
+def _timestamp_generator(t0: dt.datetime, tstep: dt.timedelta) -> Generator[dt.datetime, None, None]:
+    t = t0
+    while True:
+        yield t
+        t += tstep
+
+
+class AggregatedData:
+    __conf: Dict
+    __datasources: List[Tuple[Dict, IRawDataProvider]]
+
+    __header: List[str]
+
+    __roottime_reader: IRawDataProvider
+    __roottime_datasource: Mapping
+
+    __delta: dt.timedelta
+
+    def __init__(self, json_path: Union[str, Path]):
+        if isinstance(json_path, str):
+            json_path = Path(json_path)
+
+        self.__conf = validate_config(json_path, DATA_CONFIG_SCHEMA)
+        self.__datasources = []
+        roottime_data = None
+        # Parse data sources
+        for datasource in self.__conf["datasources"]:
+            desc = datasource['desc']
+            source = datasource['source']
+            timestamp_column: Optional[str] = datasource[
+                'timestamp_column'] if 'timestamp_column' in datasource else None
+            logging.debug(f'Found datasource {desc}')
+
+            if source['type'] == 'csv':
+                csv_path = Path(source['path'])
+                reader = RawCSVReader(csv_path, timestamp_column_name=timestamp_column)
+                self.__datasources.append((datasource, reader))
+            else:
+                raise NotImplementedError("Unsupported source type")
+
+            if "time_root" in datasource:
+                if datasource["time_root"]:
+                    if roottime_data is None:
+                        roottime_data = (datasource, reader)
+                    else:
+                        raise AttributeError("Specified more than 1 time root")
+
+            self.__header.extend(reader.column_names)
+
+        if roottime_data is None:
+            roottime_data = self.__datasources[0]
+
+        self.__t0 = roottime_data[1].peek_t0()
+        self.__roottime_reader = roottime_data[1]
+        self.__roottime_datasource = roottime_data[0]
+
+    def row_generator(self):
+        interpolated_streams = []
+        for datasource, reader in self.__datasources:
+            interpolated_streams.append(
+                (datasource, InterpolatedDataStream(datasource, reader, _timestamp_generator(self.__t0, self.__delta))))
+
+        while True:
+            grouped_values = []
+            timestamp = None
+            # Get columns values
+            for datasource, stream in interpolated_streams:
+                interpolated_rowdict = next(stream)
+                grouped_values.append((datasource, interpolated_rowdict))
+                if datasource == self.__roottime_datasource:
+                    timestamp = interpolated_rowdict[self.__roottime_reader.timestamp_column_name]
+
+            # Check if timespamps do not exceed maxdiff
+
+            # TODO
+
+            # Filter
+            filtered_row = {
+                self.__roottime_reader.timestamp_column_name: timestamp
+            }
+
+            for value_name in self.__conf["aggregated_columns"]:
+                for datasource, rowdict in grouped_values:
+                    for value_info in datasource["values"]:
+                        # if alias present
+                        if 'alias' in value_info:
+                            if value_info['alias'] == value_name:
+                                filtered_row[value_name] = rowdict[value_info['colname']]
+                                break
+                        else:
+                            if value_info['colname'] == value_name:
+                                filtered_row[value_name] = rowdict[value_info['colname']]
+                                break
+                    if value_name in filtered_row:
+                        break
+                if value_name not in filtered_row:
+                    raise KeyError("Value specified in aggregated_values not found among provided datasources")
+
+            yield filtered_row
\ No newline at end of file
diff --git a/FCRgendata/src/FCRGenData/interpolator.py b/FCRgendata/src/FCRGenData/interpolator.py
new file mode 100644
index 0000000..2a0a93a
--- /dev/null
+++ b/FCRgendata/src/FCRGenData/interpolator.py
@@ -0,0 +1,13 @@
+import datetime as dt
+from typing import Dict, Generator
+
+from FCRGenData.rawDataReader import IRawDataProvider
+
+
+class InterpolatedDataStream:
+
+    def __init__(self, datasource: Dict, reader: IRawDataProvider, timestamp_generator: Generator[dt.datetime, None, None]):
+        pass
+
+    def __next__(self) -> Dict:
+        pass
\ No newline at end of file
diff --git a/FCRgendata/src/FCRGenData/rawDataReader/csv_reader.py b/FCRgendata/src/FCRGenData/rawDataReader/csv_reader.py
index 20fe224..7701333 100644
--- a/FCRgendata/src/FCRGenData/rawDataReader/csv_reader.py
+++ b/FCRgendata/src/FCRGenData/rawDataReader/csv_reader.py
@@ -18,9 +18,10 @@ def _match_columns(column: List[str], keys: List[str]) -> List[str]:
 
 class RawCSVReader(IRawDataProvider):
     """CSV data reader implementation."""
+
     __path: Path
-    __delimiter: str
 
+    __delimiter: str
     __arr: pd.DataFrame
 
     __lines: List[str]
@@ -28,6 +29,8 @@ class RawCSVReader(IRawDataProvider):
     __column_names: Tuple[str]
     __column_types: Dict[str, type]
 
+    __timestamp_column_name: str
+
     def __init__(self,
                  path: Union[Path, str],
                  delimiter: str = ',',
@@ -70,6 +73,7 @@ class RawCSVReader(IRawDataProvider):
         assert self.__arr[timestamp_column_name].is_monotonic_increasing, 'Timestamps in column are not increasing'
         assert self.__arr[timestamp_column_name].is_unique, 'Found >=2 equal timestamps'
 
+        self.__timestamp_column_name = timestamp_column_name
         self.__column_types = {}
 
         for key, name in self.__arr.dtypes.apply(lambda x: x.name).to_dict().items():
@@ -87,6 +91,10 @@ class RawCSVReader(IRawDataProvider):
                 raise NotImplementedError("Unknown datatype format")
             self.__column_types[key] = new_value
 
+    @property
+    def timestamp_column_name(self) -> str:
+        return self.__timestamp_column_name
+
     @property
     def column_names(self) -> Tuple[str]:
         """Column names"""
diff --git a/FCRgendata/src/FCRGenData/rawDataReader/raw_data_reader.py b/FCRgendata/src/FCRGenData/rawDataReader/raw_data_reader.py
index def04a1..e73e162 100644
--- a/FCRgendata/src/FCRGenData/rawDataReader/raw_data_reader.py
+++ b/FCRgendata/src/FCRGenData/rawDataReader/raw_data_reader.py
@@ -20,6 +20,16 @@ class IRawDataProvider(abc.ABC):
         else:
             self._timestamp_column_names = TIMESTAMP_KEY_COLUMN_NAMES
 
+    @property
+    @abc.abstractmethod
+    def timestamp_column_name(self) -> str:
+        """Timestamp column name"""
+        pass
+
+    @abc.abstractmethod
+    def peek_t0(self):
+        pass
+
     @property
     @abc.abstractmethod
     def column_names(self) -> Tuple[str]:
@@ -40,7 +50,7 @@ class IRawDataProvider(abc.ABC):
         pass
 
     @abc.abstractmethod
-    def reader_annotated(self) -> Generator[Dict[str, type], None, None]:
+    def reader_annotated(self) -> Generator[Dict[str, any], None, None]:
         """Generator over raw data, provides rows of data in increasing order (by timestamp).
         Returns dict with column names mapping to current values.
         """
diff --git a/FCRgendata/src/FCRGenData/validate_config.py b/FCRgendata/src/FCRGenData/validate_config.py
index 0972428..f7b5a6b 100644
--- a/FCRgendata/src/FCRGenData/validate_config.py
+++ b/FCRgendata/src/FCRGenData/validate_config.py
@@ -7,53 +7,24 @@ it's structure and content
 from pathlib import Path
 import logging
 import json
+from typing import Dict
+
 import jsonschema
 import sys
 
 
-config_schema = {
-    "type": "object",
-    "properties": {
-        "request": {
-            "type": "object",
-            "properties": {
-                "applicationId": {"type": "string"},
-                "camelModelFilePath": {"type": "filepath"},
-                "cpProblemFilePath":  {"type": "filepath"},
-                "nodeCandidatesFilePath":  {"type": "filepath"},
-                "watermark": {
-                    "type": "object",
-                    "properties": {
-                        "user":  {"type": "string"},
-                        "system": {"type": "string"},
-                        "date":  {"type": "string"},
-                        "uuid":  {"type": "string"},
-                    },
-                    "minProperties": 4
-                }
-            },
-            "minProperties": 5
-        },
-        "AvgResponseTimeTableFilePath":  {"type": "filepath"},
-        "predictionsFilePath": {"type": "filepath"},
-        "cpSolverHost": {"type": "string"},  # for example "localhost:8080"
-        "outpath":  {"type": "creatablepath"},
-    },
-    "required": ["request", "AvgResponseTimeTableFilePath", "cpSolverHost", "outpath", "predictionsFilePath"]
-}
 
 # create two custom types: "filepath" (path to exsisting file)
 # and "creatablepath" (path in which new file can be created)
 type_checker = jsonschema.Draft3Validator.TYPE_CHECKER.redefine_many({
     "filepath": lambda checker, path: Path(path).is_file(),
-    "creatablepath": lambda checker, path: Path(path).parent.is_dir()
+    "dirpath": lambda checker, path: Path(path).parent.is_dir()
 })
 customValidator = jsonschema.validators.extend(
     jsonschema.Draft3Validator, type_checker=type_checker)
-validator = customValidator(schema=config_schema)
 
 
-def validate_config(conf_p: Path):
+def validate_config(conf_p: Path, schema: Dict):
     """ validates given config path, config path structure and it content,
         returns validated json object
     """
@@ -71,6 +42,7 @@ def validate_config(conf_p: Path):
             sys.exit(1)
 
     try:
+        validator = customValidator(schema=schema)
         validator.validate(j_conf)
     except jsonschema.exceptions.ValidationError as e:
         logging.critical(f'json validation error: {e.message}')
-- 
GitLab


From 1c725e6a68f1a8d92040d2a59f37092006093443 Mon Sep 17 00:00:00 2001
From: jkk <jk394387@students.mimuw.edu.pl>
Date: Mon, 1 Mar 2021 00:50:56 +0100
Subject: [PATCH 2/8] Added comments & exceptions raising

---
 FCRgendata/src/FCRGenData/data_aggregator.py  | 43 +++++++++++++------
 FCRgendata/src/FCRGenData/interpolator.py     |  8 +++-
 .../src/FCRGenData/rawDataReader/__init__.py  |  4 +-
 .../FCRGenData/rawDataReader/csv_reader.py    | 23 +++++++---
 ...{raw_data_reader.py => row_data_reader.py} | 24 ++++++++++-
 .../tests/data_reader/data_reader_template.py |  8 ++--
 .../tests/data_reader/test_csv_reader.py      | 10 ++---
 7 files changed, 85 insertions(+), 35 deletions(-)
 rename FCRgendata/src/FCRGenData/rawDataReader/{raw_data_reader.py => row_data_reader.py} (65%)

diff --git a/FCRgendata/src/FCRGenData/data_aggregator.py b/FCRgendata/src/FCRGenData/data_aggregator.py
index 2f2a7e8..784f46d 100644
--- a/FCRgendata/src/FCRGenData/data_aggregator.py
+++ b/FCRgendata/src/FCRGenData/data_aggregator.py
@@ -1,12 +1,11 @@
 import datetime as dt
-import json
 import logging
 from pathlib import Path
 from typing import Union, Optional, Dict, List, Tuple, Mapping, Generator
 
 from FCRGenData.config_schemas import DATA_CONFIG_SCHEMA
 from FCRGenData.interpolator import InterpolatedDataStream
-from FCRGenData.rawDataReader import RawCSVReader, IRawDataProvider
+from FCRGenData.rawDataReader import RowCSVReader, IRowDataProvider
 from FCRGenData.validate_config import validate_config
 
 
@@ -18,15 +17,21 @@ def _timestamp_generator(t0: dt.datetime, tstep: dt.timedelta) -> Generator[dt.d
 
 
 class AggregatedData:
+    """Aggregates data from multiple sources and uneven time periods
+
+    Args:
+        json_path (Union[str, Path]): path to the configuration json file.
+    """
     __conf: Dict
-    __datasources: List[Tuple[Dict, IRawDataProvider]]
+    __datasources: List[Tuple[Dict, IRowDataProvider]]
 
     __header: List[str]
 
-    __roottime_reader: IRawDataProvider
+    __roottime_reader: IRowDataProvider
     __roottime_datasource: Mapping
 
     __delta: dt.timedelta
+    __max_time_gap: dt.timedelta
 
     def __init__(self, json_path: Union[str, Path]):
         if isinstance(json_path, str):
@@ -34,7 +39,10 @@ class AggregatedData:
 
         self.__conf = validate_config(json_path, DATA_CONFIG_SCHEMA)
         self.__datasources = []
+
+        # Roottime data source
         roottime_data = None
+
         # Parse data sources
         for datasource in self.__conf["datasources"]:
             desc = datasource['desc']
@@ -45,11 +53,12 @@ class AggregatedData:
 
             if source['type'] == 'csv':
                 csv_path = Path(source['path'])
-                reader = RawCSVReader(csv_path, timestamp_column_name=timestamp_column)
+                reader = RowCSVReader(csv_path, timestamp_column_name=timestamp_column)
                 self.__datasources.append((datasource, reader))
             else:
                 raise NotImplementedError("Unsupported source type")
 
+            # if datasource specified as root time - save it
             if "time_root" in datasource:
                 if datasource["time_root"]:
                     if roottime_data is None:
@@ -59,6 +68,10 @@ class AggregatedData:
 
             self.__header.extend(reader.column_names)
 
+        self.__delta = dt.timedelta(seconds=self.__conf['timedelta'])
+        self.__max_time_gap = dt.timedelta(seconds=self.__conf['max_time_gap'])
+
+        # if no roottime datasource specified - get first
         if roottime_data is None:
             roottime_data = self.__datasources[0]
 
@@ -66,26 +79,30 @@ class AggregatedData:
         self.__roottime_reader = roottime_data[1]
         self.__roottime_datasource = roottime_data[0]
 
-    def row_generator(self):
+    def row_generator(self) -> Generator[Mapping[str, any], None, None]:
+        """Generates values mapping with correct and even time periods.
+        Combines all values from different sources and unify their timestamps.
+
+        Yields:
+            (Mapping[str, any]) column_alias -> value mapping
+        """
+        # Interpolate each stream
         interpolated_streams = []
         for datasource, reader in self.__datasources:
             interpolated_streams.append(
-                (datasource, InterpolatedDataStream(datasource, reader, _timestamp_generator(self.__t0, self.__delta))))
+                (datasource,
+                 InterpolatedDataStream(datasource, reader, _timestamp_generator(self.__t0, self.__delta))), self.__max_time_gap)
 
         while True:
             grouped_values = []
             timestamp = None
-            # Get columns values
+            # Get columns values and save their mappings
             for datasource, stream in interpolated_streams:
                 interpolated_rowdict = next(stream)
                 grouped_values.append((datasource, interpolated_rowdict))
                 if datasource == self.__roottime_datasource:
                     timestamp = interpolated_rowdict[self.__roottime_reader.timestamp_column_name]
 
-            # Check if timespamps do not exceed maxdiff
-
-            # TODO
-
             # Filter
             filtered_row = {
                 self.__roottime_reader.timestamp_column_name: timestamp
@@ -108,4 +125,4 @@ class AggregatedData:
                 if value_name not in filtered_row:
                     raise KeyError("Value specified in aggregated_values not found among provided datasources")
 
-            yield filtered_row
\ No newline at end of file
+            yield filtered_row
diff --git a/FCRgendata/src/FCRGenData/interpolator.py b/FCRgendata/src/FCRGenData/interpolator.py
index 2a0a93a..448fefc 100644
--- a/FCRgendata/src/FCRGenData/interpolator.py
+++ b/FCRgendata/src/FCRGenData/interpolator.py
@@ -1,12 +1,16 @@
 import datetime as dt
 from typing import Dict, Generator
 
-from FCRGenData.rawDataReader import IRawDataProvider
+from FCRGenData.rawDataReader import IRowDataProvider
 
 
 class InterpolatedDataStream:
 
-    def __init__(self, datasource: Dict, reader: IRawDataProvider, timestamp_generator: Generator[dt.datetime, None, None]):
+    def __init__(self,
+                 datasource: Dict,
+                 reader: IRowDataProvider,
+                 timestamp_generator: Generator[dt.datetime, None, None]):
+
         pass
 
     def __next__(self) -> Dict:
diff --git a/FCRgendata/src/FCRGenData/rawDataReader/__init__.py b/FCRgendata/src/FCRGenData/rawDataReader/__init__.py
index 60afb7f..5eca6a7 100644
--- a/FCRgendata/src/FCRGenData/rawDataReader/__init__.py
+++ b/FCRgendata/src/FCRGenData/rawDataReader/__init__.py
@@ -1,2 +1,2 @@
-from .raw_data_reader import IRawDataProvider
-from .csv_reader import RawCSVReader
\ No newline at end of file
+from .row_data_reader import IRowDataProvider
+from .csv_reader import RowCSVReader
\ No newline at end of file
diff --git a/FCRgendata/src/FCRGenData/rawDataReader/csv_reader.py b/FCRgendata/src/FCRGenData/rawDataReader/csv_reader.py
index 7701333..48c6212 100644
--- a/FCRgendata/src/FCRGenData/rawDataReader/csv_reader.py
+++ b/FCRgendata/src/FCRGenData/rawDataReader/csv_reader.py
@@ -6,7 +6,7 @@ from typing import Generator, Dict, Iterable, Tuple, Union, List, Optional
 
 import pandas as pd
 
-from .raw_data_reader import IRawDataProvider, TIME_COLUMN_NAMES
+from .row_data_reader import IRowDataProvider, TIME_COLUMN_NAMES
 
 logger = logging.getLogger(__name__)
 
@@ -16,7 +16,7 @@ def _match_columns(column: List[str], keys: List[str]) -> List[str]:
     return list(filter(lambda itername: any(map(lambda keyname: keyname in itername, keys)), column))
 
 
-class RawCSVReader(IRawDataProvider):
+class RowCSVReader(IRowDataProvider):
     """CSV data reader implementation."""
 
     __path: Path
@@ -33,9 +33,10 @@ class RawCSVReader(IRawDataProvider):
 
     def __init__(self,
                  path: Union[Path, str],
+                 max_time_difference: dt.timedelta,
                  delimiter: str = ',',
                  timestamp_column_name: Optional[str] = None):
-        super().__init__(timestamp_column_name=timestamp_column_name)
+        super().__init__(timestamp_column_name=timestamp_column_name, max_time_difference=max_time_difference)
 
         """
         Args:
@@ -117,20 +118,28 @@ class RawCSVReader(IRawDataProvider):
 
         Yields:
             Iterable[type]: values in the next row (order is the same as in column_names).
+
+        Raises:
+            TooBigTimeDifference
         """
         for index, row in self.__arr.iterrows():
             values = row.values.tolist()
-            values = map(RawCSVReader._convert_to_pytype, values)
+            values = map(RowCSVReader._convert_to_pytype, values)
+            self._check_time_difference(getattr(values, self.__timestamp_column_name))
             yield list(values)
 
-    def reader_annotated(self) -> Generator[Dict[str, type], None, None]:
+    def reader_annotated(self) -> Generator[Dict[str, any], None, None]:
         """Returns dict iterator over rows.
 
         Yields:
-            Dicttype[str, ]: name of the columns mapping to its values in the current row.
+            Dicttype[str, any]: name of the columns mapping to its values in the current row.
+
+        Raises:
+            TooBigTimeDifference
         """
         for index, row in self.__arr.iterrows():
             mapped_values = dict(row.to_dict())
             for key, val in mapped_values.items():
-                mapped_values[key] = RawCSVReader._convert_to_pytype(val)
+                mapped_values[key] = RowCSVReader._convert_to_pytype(val)
+            self._check_time_difference(mapped_values[self.__timestamp_column_name])
             yield mapped_values
diff --git a/FCRgendata/src/FCRGenData/rawDataReader/raw_data_reader.py b/FCRgendata/src/FCRGenData/rawDataReader/row_data_reader.py
similarity index 65%
rename from FCRgendata/src/FCRGenData/rawDataReader/raw_data_reader.py
rename to FCRgendata/src/FCRGenData/rawDataReader/row_data_reader.py
index e73e162..ff7d436 100644
--- a/FCRgendata/src/FCRGenData/rawDataReader/raw_data_reader.py
+++ b/FCRgendata/src/FCRGenData/rawDataReader/row_data_reader.py
@@ -1,25 +1,44 @@
 import abc
+import datetime as dt
+
 from typing import Tuple, Dict, Generator, Iterable, Optional, List
 
 TIME_COLUMN_NAMES = ['time', 'date', 'timestamp', 'datetime']
 TIMESTAMP_KEY_COLUMN_NAMES = ['timestamp', 'datetime']
 
 
-class IRawDataProvider(abc.ABC):
+class TooBigTimeDifference(Exception):
+    """Raised when two next rows are too far away from each other (timestamps)."""
+    pass
+
+
+class IRowDataProvider(abc.ABC):
     """Data provider interface specification. Supplied with
     time-series data generators, which provides data measured in
     increasing, non repetitive timestamps. Time between timestamps *may*
     vary."""
 
     _timestamp_column_names: List[str]
+    __previous_timestamp: Optional[dt.datetime]
 
     def __init__(self,
+                 max_time_difference: dt.timedelta,
                  timestamp_column_name: Optional[str] = None):
+        self.__max_time_difference = max_time_difference
+        self.__previous_timestamp = None
         if timestamp_column_name:
             self._timestamp_column_names = [timestamp_column_name]
         else:
             self._timestamp_column_names = TIMESTAMP_KEY_COLUMN_NAMES
 
+    def _check_time_difference(self, timestamp: dt.datetime):
+        if self.__previous_timestamp is None:
+            self.__previous_timestamp = timestamp
+        else:
+            if self.__previous_timestamp + self.__max_time_difference < timestamp:
+                raise TooBigTimeDifference(
+                    f'Previous timestamp: {self.__previous_timestamp}, actual timestamp: {timestamp}, max difference: {self.__max_time_difference}')
+
     @property
     @abc.abstractmethod
     def timestamp_column_name(self) -> str:
@@ -27,7 +46,8 @@ class IRawDataProvider(abc.ABC):
         pass
 
     @abc.abstractmethod
-    def peek_t0(self):
+    def peek_t0(self) -> dt.datetime:
+        """Returns timestamp of the first row"""
         pass
 
     @property
diff --git a/FCRgendata/tests/data_reader/data_reader_template.py b/FCRgendata/tests/data_reader/data_reader_template.py
index 6b9b501..9c6b997 100644
--- a/FCRgendata/tests/data_reader/data_reader_template.py
+++ b/FCRgendata/tests/data_reader/data_reader_template.py
@@ -6,13 +6,13 @@ from typing import Callable, Optional
 
 import pytest
 
-from FCRGenData.rawDataReader import IRawDataProvider
+from FCRGenData.rawDataReader import IRowDataProvider
 
 
 class RawDataProviderTestTemplate(abc.ABC):
     @abc.abstractmethod
     @pytest.yield_fixture
-    def reader_factory(self) -> Callable[[str, Optional[str]], IRawDataProvider]:
+    def reader_factory(self) -> Callable[[str, Optional[str]], IRowDataProvider]:
         pass
 
     def test_import_basic(self, reader_factory):
@@ -85,7 +85,7 @@ class RawDataProviderTestTemplate(abc.ABC):
 
         s.seek(0)
 
-        reader: IRawDataProvider = reader_factory(s.read(), timestamp_column_name='time')
+        reader: IRowDataProvider = reader_factory(s.read(), timestamp_column_name='time')
 
         assert len(reader.column_names) == len(header)
         for reader_col, header_col in zip(reader.column_names, header):
@@ -112,7 +112,7 @@ class RawDataProviderTestTemplate(abc.ABC):
 
         s.seek(0)
 
-        reader: IRawDataProvider = reader_factory(s.read(), timestamp_column_name='time')
+        reader: IRowDataProvider = reader_factory(s.read(), timestamp_column_name='time')
 
         row_gen = reader.reader()
         for row, grow in zip(content, row_gen):
diff --git a/FCRgendata/tests/data_reader/test_csv_reader.py b/FCRgendata/tests/data_reader/test_csv_reader.py
index 6f6436a..dd02d0b 100644
--- a/FCRgendata/tests/data_reader/test_csv_reader.py
+++ b/FCRgendata/tests/data_reader/test_csv_reader.py
@@ -3,26 +3,26 @@ from typing import Callable
 
 import pytest
 
-from FCRGenData.rawDataReader import RawCSVReader
+from FCRGenData.rawDataReader import RowCSVReader
 from FCRgendata.tests.data_reader.data_reader_template import RawDataProviderTestTemplate
 
 
 class TestCSVReader(RawDataProviderTestTemplate):
 
     @pytest.yield_fixture
-    def empty_reader(self) -> RawCSVReader:
+    def empty_reader(self) -> RowCSVReader:
         tfile = tempfile.NamedTemporaryFile(suffix='.csv')
-        yield RawCSVReader(tfile.name)
+        yield RowCSVReader(tfile.name)
         tfile.close()
 
     @pytest.yield_fixture
-    def reader_factory(self) -> Callable[[str], RawCSVReader]:
+    def reader_factory(self) -> Callable[[str], RowCSVReader]:
         tfile = tempfile.NamedTemporaryFile(suffix='.csv', mode='w')
         path = tfile.name
 
         def _write(s, *args, **kwargs):
             tfile.write(s)
             tfile.flush()
-            return RawCSVReader(path, *args, **kwargs)
+            return RowCSVReader(path, *args, **kwargs)
 
         yield _write
-- 
GitLab


From 55b2605a26aefd2dcf0fd4aea09e4f1d52616b28 Mon Sep 17 00:00:00 2001
From: jkk <jk394387@students.mimuw.edu.pl>
Date: Mon, 1 Mar 2021 18:13:29 +0100
Subject: [PATCH 3/8] peek_t0

---
 FCRgendata/src/FCRGenData/rawDataReader/csv_reader.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/FCRgendata/src/FCRGenData/rawDataReader/csv_reader.py b/FCRgendata/src/FCRGenData/rawDataReader/csv_reader.py
index 48c6212..66d526e 100644
--- a/FCRgendata/src/FCRGenData/rawDataReader/csv_reader.py
+++ b/FCRgendata/src/FCRGenData/rawDataReader/csv_reader.py
@@ -96,6 +96,14 @@ class RowCSVReader(IRowDataProvider):
     def timestamp_column_name(self) -> str:
         return self.__timestamp_column_name
 
+    def peek_t0(self) -> dt.datetime:
+        """Peeks first timestamp from file
+
+        Returns:
+            (dt.datetime) first timestamp in file`
+            """
+        return RowCSVReader._convert_to_pytype(getattr(self.__arr.iloc[0], self.__timestamp_column_name))
+
     @property
     def column_names(self) -> Tuple[str]:
         """Column names"""
-- 
GitLab


From 9e563bb0f8f76a1f5e0200adb9e73b7cfa44af40 Mon Sep 17 00:00:00 2001
From: jkk <jk394387@students.mimuw.edu.pl>
Date: Tue, 2 Mar 2021 14:08:58 +0100
Subject: [PATCH 4/8] ostatni test jeszcze do poprawki

---
 FCRgendata/tests/data_reader/__init__.py             |  0
 FCRgendata/tests/data_reader/data_reader_template.py | 12 ++++++------
 FCRgendata/tests/data_reader/test_csv_reader.py      |  2 +-
 3 files changed, 7 insertions(+), 7 deletions(-)
 create mode 100644 FCRgendata/tests/data_reader/__init__.py

diff --git a/FCRgendata/tests/data_reader/__init__.py b/FCRgendata/tests/data_reader/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/FCRgendata/tests/data_reader/data_reader_template.py b/FCRgendata/tests/data_reader/data_reader_template.py
index 9c6b997..58734e8 100644
--- a/FCRgendata/tests/data_reader/data_reader_template.py
+++ b/FCRgendata/tests/data_reader/data_reader_template.py
@@ -29,11 +29,11 @@ class RawDataProviderTestTemplate(abc.ABC):
         s.seek(0)
         # unknown timestamp column
         with pytest.raises(AssertionError) as exc:
-            reader = reader_factory(s.read())
+            reader = reader_factory(s.read(), max_time_difference=dt.timedelta(days=1))
 
         assert 'Cannot specify timestamp column' in exc.value.args[0]
 
-        reader_factory(s.read(), timestamp_column_name='time')
+        reader_factory(s.read(), timestamp_column_name='time', max_time_difference=dt.timedelta(days=1))
 
     def test_time_desc(self, reader_factory):
         header = ['time', 'int', 'str', 'float', 'bool']
@@ -49,7 +49,7 @@ class RawDataProviderTestTemplate(abc.ABC):
         s.seek(0)
         # unknown timestamp column
         with pytest.raises(AssertionError) as exc:
-            reader = reader_factory(s.read(), timestamp_column_name='time')
+            reader = reader_factory(s.read(), timestamp_column_name='time', max_time_difference=dt.timedelta(days=1))
 
         assert 'Timestamps in column are not increasing' in exc.value.args[0]
 
@@ -68,7 +68,7 @@ class RawDataProviderTestTemplate(abc.ABC):
         s.seek(0)
         # unknown timestamp column
         with pytest.raises(AssertionError) as exc:
-            reader = reader_factory(s.read(), timestamp_column_name='time')
+            reader = reader_factory(s.read(), timestamp_column_name='time', max_time_difference=dt.timedelta(days=1))
 
         assert 'Found >=2 equal timestamps' in exc.value.args[0]
 
@@ -85,7 +85,7 @@ class RawDataProviderTestTemplate(abc.ABC):
 
         s.seek(0)
 
-        reader: IRowDataProvider = reader_factory(s.read(), timestamp_column_name='time')
+        reader: IRowDataProvider = reader_factory(s.read(), timestamp_column_name='time', max_time_difference=dt.timedelta(days=1))
 
         assert len(reader.column_names) == len(header)
         for reader_col, header_col in zip(reader.column_names, header):
@@ -112,7 +112,7 @@ class RawDataProviderTestTemplate(abc.ABC):
 
         s.seek(0)
 
-        reader: IRowDataProvider = reader_factory(s.read(), timestamp_column_name='time')
+        reader: IRowDataProvider = reader_factory(s.read(), timestamp_column_name='time', max_time_difference=dt.timedelta(days=1))
 
         row_gen = reader.reader()
         for row, grow in zip(content, row_gen):
diff --git a/FCRgendata/tests/data_reader/test_csv_reader.py b/FCRgendata/tests/data_reader/test_csv_reader.py
index dd02d0b..ae77e13 100644
--- a/FCRgendata/tests/data_reader/test_csv_reader.py
+++ b/FCRgendata/tests/data_reader/test_csv_reader.py
@@ -4,7 +4,7 @@ from typing import Callable
 import pytest
 
 from FCRGenData.rawDataReader import RowCSVReader
-from FCRgendata.tests.data_reader.data_reader_template import RawDataProviderTestTemplate
+from .data_reader_template import RawDataProviderTestTemplate
 
 
 class TestCSVReader(RawDataProviderTestTemplate):
-- 
GitLab


From 11f846f4003300820ef0281a33d25f726541d32d Mon Sep 17 00:00:00 2001
From: szymon <szysad108@gmail.com>
Date: Tue, 2 Mar 2021 14:25:49 +0100
Subject: [PATCH 5/8] added interpolation - not tested

---
 FCRgendata/.env                           |   1 +
 FCRgendata/Pipfile                        |   3 +
 FCRgendata/Pipfile.lock                   | 223 +++++++++++++++-------
 FCRgendata/src/FCRGenData/interpolator.py |  84 +++++++-
 FCRgendata/tests/__init__.py              |   0
 FCRgendata/tests/test_interpolator.py     |   6 +
 6 files changed, 239 insertions(+), 78 deletions(-)
 create mode 100644 FCRgendata/.env
 create mode 100644 FCRgendata/tests/__init__.py
 create mode 100644 FCRgendata/tests/test_interpolator.py

diff --git a/FCRgendata/.env b/FCRgendata/.env
new file mode 100644
index 0000000..a328b41
--- /dev/null
+++ b/FCRgendata/.env
@@ -0,0 +1 @@
+PYTHONPATH=${PWD}/src
\ No newline at end of file
diff --git a/FCRgendata/Pipfile b/FCRgendata/Pipfile
index 782f714..2718649 100644
--- a/FCRgendata/Pipfile
+++ b/FCRgendata/Pipfile
@@ -12,6 +12,9 @@ pipenv-setup = "*"
 numpy = "*"
 lxml = "*"
 progress = "*"
+scipy = "*"
+pytest = "*"
+pandas = "*"
 
 [requires]
 python_version = "3.8"
diff --git a/FCRgendata/Pipfile.lock b/FCRgendata/Pipfile.lock
index 590980c..6314fb7 100644
--- a/FCRgendata/Pipfile.lock
+++ b/FCRgendata/Pipfile.lock
@@ -1,7 +1,7 @@
 {
     "_meta": {
         "hash": {
-            "sha256": "3ceafb48a7bc77840c0a82fa99004106b69e316ec11f6ba956adb538a5b1bd4c"
+            "sha256": "e2059ff01c7a225559e985927f443dcb77812363ddb24e1150218c97e67c0743"
         },
         "pipfile-spec": 6,
         "requires": {
@@ -98,6 +98,13 @@
             "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
             "version": "==2.10"
         },
+        "iniconfig": {
+            "hashes": [
+                "sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3",
+                "sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32"
+            ],
+            "version": "==1.1.1"
+        },
         "jsonschema": {
             "hashes": [
                 "sha256:4e5b3cf8216f577bee9ce139cbe72eca3ea4f292ec60928ff24758ce626cd163",
@@ -151,43 +158,33 @@
         },
         "numpy": {
             "hashes": [
-                "sha256:08308c38e44cc926bdfce99498b21eec1f848d24c302519e64203a8da99a97db",
-                "sha256:09c12096d843b90eafd01ea1b3307e78ddd47a55855ad402b157b6c4862197ce",
-                "sha256:13d166f77d6dc02c0a73c1101dd87fdf01339febec1030bd810dcd53fff3b0f1",
-                "sha256:141ec3a3300ab89c7f2b0775289954d193cc8edb621ea05f99db9cb181530512",
-                "sha256:16c1b388cc31a9baa06d91a19366fb99ddbe1c7b205293ed072211ee5bac1ed2",
-                "sha256:18bed2bcb39e3f758296584337966e68d2d5ba6aab7e038688ad53c8f889f757",
-                "sha256:1aeef46a13e51931c0b1cf8ae1168b4a55ecd282e6688fdb0a948cc5a1d5afb9",
-                "sha256:27d3f3b9e3406579a8af3a9f262f5339005dd25e0ecf3cf1559ff8a49ed5cbf2",
-                "sha256:2a2740aa9733d2e5b2dfb33639d98a64c3b0f24765fed86b0fd2aec07f6a0a08",
-                "sha256:4377e10b874e653fe96985c05feed2225c912e328c8a26541f7fc600fb9c637b",
-                "sha256:448ebb1b3bf64c0267d6b09a7cba26b5ae61b6d2dbabff7c91b660c7eccf2bdb",
-                "sha256:50e86c076611212ca62e5a59f518edafe0c0730f7d9195fec718da1a5c2bb1fc",
-                "sha256:5734bdc0342aba9dfc6f04920988140fb41234db42381cf7ccba64169f9fe7ac",
-                "sha256:64324f64f90a9e4ef732be0928be853eee378fd6a01be21a0a8469c4f2682c83",
-                "sha256:6ae6c680f3ebf1cf7ad1d7748868b39d9f900836df774c453c11c5440bc15b36",
-                "sha256:6d7593a705d662be5bfe24111af14763016765f43cb6923ed86223f965f52387",
-                "sha256:8cac8790a6b1ddf88640a9267ee67b1aee7a57dfa2d2dd33999d080bc8ee3a0f",
-                "sha256:8ece138c3a16db8c1ad38f52eb32be6086cc72f403150a79336eb2045723a1ad",
-                "sha256:9eeb7d1d04b117ac0d38719915ae169aa6b61fca227b0b7d198d43728f0c879c",
-                "sha256:a09f98011236a419ee3f49cedc9ef27d7a1651df07810ae430a6b06576e0b414",
-                "sha256:a5d897c14513590a85774180be713f692df6fa8ecf6483e561a6d47309566f37",
-                "sha256:ad6f2ff5b1989a4899bf89800a671d71b1612e5ff40866d1f4d8bcf48d4e5764",
-                "sha256:c42c4b73121caf0ed6cd795512c9c09c52a7287b04d105d112068c1736d7c753",
-                "sha256:cb1017eec5257e9ac6209ac172058c430e834d5d2bc21961dceeb79d111e5909",
-                "sha256:d6c7bb82883680e168b55b49c70af29b84b84abb161cbac2800e8fcb6f2109b6",
-                "sha256:e452dc66e08a4ce642a961f134814258a082832c78c90351b75c41ad16f79f63",
-                "sha256:e5b6ed0f0b42317050c88022349d994fe72bfe35f5908617512cd8c8ef9da2a9",
-                "sha256:e9b30d4bd69498fc0c3fe9db5f62fffbb06b8eb9321f92cc970f2969be5e3949",
-                "sha256:ec149b90019852266fec2341ce1db513b843e496d5a8e8cdb5ced1923a92faab",
-                "sha256:edb01671b3caae1ca00881686003d16c2209e07b7ef8b7639f1867852b948f7c",
-                "sha256:f0d3929fe88ee1c155129ecd82f981b8856c5d97bcb0d5f23e9b4242e79d1de3",
-                "sha256:f29454410db6ef8126c83bd3c968d143304633d45dc57b51252afbd79d700893",
-                "sha256:fe45becb4c2f72a0907c1d0246ea6449fe7a9e2293bb0e11c4e9a32bb0930a15",
-                "sha256:fedbd128668ead37f33917820b704784aff695e0019309ad446a6d0b065b57e4"
+                "sha256:032be656d89bbf786d743fee11d01ef318b0781281241997558fa7950028dd29",
+                "sha256:104f5e90b143dbf298361a99ac1af4cf59131218a045ebf4ee5990b83cff5fab",
+                "sha256:125a0e10ddd99a874fd357bfa1b636cd58deb78ba4a30b5ddb09f645c3512e04",
+                "sha256:12e4ba5c6420917571f1a5becc9338abbde71dd811ce40b37ba62dec7b39af6d",
+                "sha256:13adf545732bb23a796914fe5f891a12bd74cf3d2986eed7b7eba2941eea1590",
+                "sha256:2d7e27442599104ee08f4faed56bb87c55f8b10a5494ac2ead5c98a4b289e61f",
+                "sha256:3bc63486a870294683980d76ec1e3efc786295ae00128f9ea38e2c6e74d5a60a",
+                "sha256:3d3087e24e354c18fb35c454026af3ed8997cfd4997765266897c68d724e4845",
+                "sha256:4ed8e96dc146e12c1c5cdd6fb9fd0757f2ba66048bf94c5126b7efebd12d0090",
+                "sha256:60759ab15c94dd0e1ed88241fd4fa3312db4e91d2c8f5a2d4cf3863fad83d65b",
+                "sha256:65410c7f4398a0047eea5cca9b74009ea61178efd78d1be9847fac1d6716ec1e",
+                "sha256:66b467adfcf628f66ea4ac6430ded0614f5cc06ba530d09571ea404789064adc",
+                "sha256:7199109fa46277be503393be9250b983f325880766f847885607d9b13848f257",
+                "sha256:72251e43ac426ff98ea802a931922c79b8d7596480300eb9f1b1e45e0543571e",
+                "sha256:89e5336f2bec0c726ac7e7cdae181b325a9c0ee24e604704ed830d241c5e47ff",
+                "sha256:89f937b13b8dd17b0099c7c2e22066883c86ca1575a975f754babc8fbf8d69a9",
+                "sha256:9c94cab5054bad82a70b2e77741271790304651d584e2cdfe2041488e753863b",
+                "sha256:9eb551d122fadca7774b97db8a112b77231dcccda8e91a5bc99e79890797175e",
+                "sha256:a1d7995d1023335e67fb070b2fae6f5968f5be3802b15ad6d79d81ecaa014fe0",
+                "sha256:ae61f02b84a0211abb56462a3b6cd1e7ec39d466d3160eb4e1da8bf6717cdbeb",
+                "sha256:b9410c0b6fed4a22554f072a86c361e417f0258838957b78bd063bde2c7f841f",
+                "sha256:c26287dfc888cf1e65181f39ea75e11f42ffc4f4529e5bd19add57ad458996e2",
+                "sha256:c91ec9569facd4757ade0888371eced2ecf49e7982ce5634cc2cf4e7331a4b14",
+                "sha256:ecb5b74c702358cdc21268ff4c37f7466357871f53a30e6f84c686952bef16a9"
             ],
             "index": "pypi",
-            "version": "==1.19.4"
+            "version": "==1.20.1"
         },
         "orderedmultidict": {
             "hashes": [
@@ -198,11 +195,33 @@
         },
         "packaging": {
             "hashes": [
-                "sha256:24e0da08660a87484d1602c30bb4902d74816b6985b93de36926f5bc95741858",
-                "sha256:78598185a7008a470d64526a8059de9aaa449238f280fc9eb6b13ba6c4109093"
+                "sha256:5b327ac1320dc863dca72f4514ecc086f31186744b84a230374cc1fd776feae5",
+                "sha256:67714da7f7bc052e064859c05c595155bd1ee9f69f76557e21f051443c20947a"
             ],
             "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
-            "version": "==20.8"
+            "version": "==20.9"
+        },
+        "pandas": {
+            "hashes": [
+                "sha256:09761bf5f8c741d47d4b8b9073288de1be39bbfccc281d70b889ade12b2aad29",
+                "sha256:0f27fd1adfa256388dc34895ca5437eaf254832223812afd817a6f73127f969c",
+                "sha256:43e00770552595c2250d8d712ec8b6e08ca73089ac823122344f023efa4abea3",
+                "sha256:46fc671c542a8392a4f4c13edc8527e3a10f6cb62912d856f82248feb747f06e",
+                "sha256:475b7772b6e18a93a43ea83517932deff33954a10d4fbae18d0c1aba4182310f",
+                "sha256:4d821b9b911fc1b7d428978d04ace33f0af32bb7549525c8a7b08444bce46b74",
+                "sha256:5e3c8c60541396110586bcbe6eccdc335a38e7de8c217060edaf4722260b158f",
+                "sha256:621c044a1b5e535cf7dcb3ab39fca6f867095c3ef223a524f18f60c7fee028ea",
+                "sha256:72ffcea00ae8ffcdbdefff800284311e155fbb5ed6758f1a6110fc1f8f8f0c1c",
+                "sha256:8a051e957c5206f722e83f295f95a2cf053e890f9a1fba0065780a8c2d045f5d",
+                "sha256:97b1954533b2a74c7e20d1342c4f01311d3203b48f2ebf651891e6a6eaf01104",
+                "sha256:9f5829e64507ad10e2561b60baf285c470f3c4454b007c860e77849b88865ae7",
+                "sha256:a93e34f10f67d81de706ce00bf8bb3798403cabce4ccb2de10c61b5ae8786ab5",
+                "sha256:d59842a5aa89ca03c2099312163ffdd06f56486050e641a45d926a072f04d994",
+                "sha256:dbb255975eb94143f2e6ec7dadda671d25147939047839cd6b8a4aff0379bb9b",
+                "sha256:df6f10b85aef7a5bb25259ad651ad1cc1d6bb09000595cab47e718cbac250b1d"
+            ],
+            "index": "pypi",
+            "version": "==1.2.3"
         },
         "pathspec": {
             "hashes": [
@@ -251,6 +270,14 @@
             "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'",
             "version": "==0.2.3"
         },
+        "pluggy": {
+            "hashes": [
+                "sha256:15b2acde666561e1298d71b523007ed7364de07029219b604cf808bfa1c765b0",
+                "sha256:966c145cd83c96502c3c3868f50408687b38434af77734af1e9ca461a4081d2d"
+            ],
+            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
+            "version": "==0.13.1"
+        },
         "progress": {
             "hashes": [
                 "sha256:69ecedd1d1bbe71bf6313d88d1e6c4d2957b7f1d4f71312c211257f7dae64372"
@@ -258,6 +285,14 @@
             "index": "pypi",
             "version": "==1.5"
         },
+        "py": {
+            "hashes": [
+                "sha256:21b81bda15b66ef5e1a777a21c4dcd9c20ad3efd0b3f817e7a809035269e1bd3",
+                "sha256:3b80836aa6d1feeaa108e046da6423ab8f6ceda6468545ae8d02d9d58d18818a"
+            ],
+            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
+            "version": "==1.10.0"
+        },
         "pyparsing": {
             "hashes": [
                 "sha256:c203ec8783bf771a155b207279b9bccb8dea02d8f0c9e5f8ead507bc3246ecc1",
@@ -273,6 +308,14 @@
             "markers": "python_version >= '3.5'",
             "version": "==0.17.3"
         },
+        "pytest": {
+            "hashes": [
+                "sha256:9d1edf9e7d0b84d72ea3dbcdfd22b35fb543a5e8f2a60092dd578936bf63d7f9",
+                "sha256:b574b57423e818210672e07ca1fa90aaf194a4f63f3ab909a2c67ebb22913839"
+            ],
+            "index": "pypi",
+            "version": "==6.2.2"
+        },
         "python-dateutil": {
             "hashes": [
                 "sha256:73ebfe9dbf22e832286dafa60473e4cd239f8592f699aa5adaf10050e6e1823c",
@@ -281,6 +324,13 @@
             "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
             "version": "==2.8.1"
         },
+        "pytz": {
+            "hashes": [
+                "sha256:83a4a90894bf38e243cf052c8b58f381bfe9a7a483f6a9cab140bc7f702ac4da",
+                "sha256:eb10ce3e7736052ed3623d49975ce333bcd712c7bb19a58b9e2089d4057d0798"
+            ],
+            "version": "==2021.1"
+        },
         "regex": {
             "hashes": [
                 "sha256:02951b7dacb123d8ea6da44fe45ddd084aa6777d4b2454fa0da61d569c6fa538",
@@ -343,6 +393,31 @@
             "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
             "version": "==1.5.16"
         },
+        "scipy": {
+            "hashes": [
+                "sha256:0c8a51d33556bf70367452d4d601d1742c0e806cd0194785914daf19775f0e67",
+                "sha256:0e5b0ccf63155d90da576edd2768b66fb276446c371b73841e3503be1d63fb5d",
+                "sha256:2481efbb3740977e3c831edfd0bd9867be26387cacf24eb5e366a6a374d3d00d",
+                "sha256:33d6b7df40d197bdd3049d64e8e680227151673465e5d85723b3b8f6b15a6ced",
+                "sha256:5da5471aed911fe7e52b86bf9ea32fb55ae93e2f0fac66c32e58897cfb02fa07",
+                "sha256:5f331eeed0297232d2e6eea51b54e8278ed8bb10b099f69c44e2558c090d06bf",
+                "sha256:5fa9c6530b1661f1370bcd332a1e62ca7881785cc0f80c0d559b636567fab63c",
+                "sha256:6725e3fbb47da428794f243864f2297462e9ee448297c93ed1dcbc44335feb78",
+                "sha256:68cb4c424112cd4be886b4d979c5497fba190714085f46b8ae67a5e4416c32b4",
+                "sha256:794e768cc5f779736593046c9714e0f3a5940bc6dcc1dba885ad64cbfb28e9f0",
+                "sha256:83bf7c16245c15bc58ee76c5418e46ea1811edcc2e2b03041b804e46084ab627",
+                "sha256:8e403a337749ed40af60e537cc4d4c03febddcc56cd26e774c9b1b600a70d3e4",
+                "sha256:a15a1f3fc0abff33e792d6049161b7795909b40b97c6cc2934ed54384017ab76",
+                "sha256:a423533c55fec61456dedee7b6ee7dce0bb6bfa395424ea374d25afa262be261",
+                "sha256:a5193a098ae9f29af283dcf0041f762601faf2e595c0db1da929875b7570353f",
+                "sha256:bd50daf727f7c195e26f27467c85ce653d41df4358a25b32434a50d8870fc519",
+                "sha256:c4fceb864890b6168e79b0e714c585dbe2fd4222768ee90bc1aa0f8218691b11",
+                "sha256:e79570979ccdc3d165456dd62041d9556fb9733b86b4b6d818af7a0afc15f092",
+                "sha256:f46dd15335e8a320b0fb4685f58b7471702234cba8bb3442b69a3e1dc329c345"
+            ],
+            "index": "pypi",
+            "version": "==1.6.1"
+        },
         "six": {
             "hashes": [
                 "sha256:30639c035cdb23534cd4aa2dd52c3bf48f06e5f4a941509c8bafd8ce11080259",
@@ -369,46 +444,46 @@
         },
         "typed-ast": {
             "hashes": [
-                "sha256:0666aa36131496aed8f7be0410ff974562ab7eeac11ef351def9ea6fa28f6355",
-                "sha256:0c2c07682d61a629b68433afb159376e24e5b2fd4641d35424e462169c0a7919",
-                "sha256:0d8110d78a5736e16e26213114a38ca35cb15b6515d535413b090bd50951556d",
-                "sha256:249862707802d40f7f29f6e1aad8d84b5aa9e44552d2cc17384b209f091276aa",
-                "sha256:24995c843eb0ad11a4527b026b4dde3da70e1f2d8806c99b7b4a7cf491612652",
-                "sha256:269151951236b0f9a6f04015a9004084a5ab0d5f19b57de779f908621e7d8b75",
-                "sha256:3742b32cf1c6ef124d57f95be609c473d7ec4c14d0090e5a5e05a15269fb4d0c",
-                "sha256:4083861b0aa07990b619bd7ddc365eb7fa4b817e99cf5f8d9cf21a42780f6e01",
-                "sha256:498b0f36cc7054c1fead3d7fc59d2150f4d5c6c56ba7fb150c013fbc683a8d2d",
-                "sha256:4e3e5da80ccbebfff202a67bf900d081906c358ccc3d5e3c8aea42fdfdfd51c1",
-                "sha256:6daac9731f172c2a22ade6ed0c00197ee7cc1221aa84cfdf9c31defeb059a907",
-                "sha256:715ff2f2df46121071622063fc7543d9b1fd19ebfc4f5c8895af64a77a8c852c",
-                "sha256:73d785a950fc82dd2a25897d525d003f6378d1cb23ab305578394694202a58c3",
-                "sha256:7e4c9d7658aaa1fc80018593abdf8598bf91325af6af5cce4ce7c73bc45ea53d",
-                "sha256:8c8aaad94455178e3187ab22c8b01a3837f8ee50e09cf31f1ba129eb293ec30b",
-                "sha256:8ce678dbaf790dbdb3eba24056d5364fb45944f33553dd5869b7580cdbb83614",
-                "sha256:92c325624e304ebf0e025d1224b77dd4e6393f18aab8d829b5b7e04afe9b7a2c",
-                "sha256:aaee9905aee35ba5905cfb3c62f3e83b3bec7b39413f0a7f19be4e547ea01ebb",
-                "sha256:b52ccf7cfe4ce2a1064b18594381bccf4179c2ecf7f513134ec2f993dd4ab395",
-                "sha256:bcd3b13b56ea479b3650b82cabd6b5343a625b0ced5429e4ccad28a8973f301b",
-                "sha256:c9e348e02e4d2b4a8b2eedb48210430658df6951fa484e59de33ff773fbd4b41",
-                "sha256:d205b1b46085271b4e15f670058ce182bd1199e56b317bf2ec004b6a44f911f6",
-                "sha256:d43943ef777f9a1c42bf4e552ba23ac77a6351de620aa9acf64ad54933ad4d34",
-                "sha256:d5d33e9e7af3b34a40dc05f498939f0ebf187f07c385fd58d591c533ad8562fe",
-                "sha256:d648b8e3bf2fe648745c8ffcee3db3ff903d0817a01a12dd6a6ea7a8f4889072",
-                "sha256:f208eb7aff048f6bea9586e61af041ddf7f9ade7caed625742af423f6bae3298",
-                "sha256:fac11badff8313e23717f3dada86a15389d0708275bddf766cca67a84ead3e91",
-                "sha256:fc0fea399acb12edbf8a628ba8d2312f583bdbdb3335635db062fa98cf71fca4",
-                "sha256:fcf135e17cc74dbfbc05894ebca928ffeb23d9790b3167a674921db19082401f",
-                "sha256:fe460b922ec15dd205595c9b5b99e2f056fd98ae8f9f56b888e7a17dc2b757e7"
-            ],
-            "version": "==1.4.1"
+                "sha256:07d49388d5bf7e863f7fa2f124b1b1d89d8aa0e2f7812faff0a5658c01c59aa1",
+                "sha256:14bf1522cdee369e8f5581238edac09150c765ec1cb33615855889cf33dcb92d",
+                "sha256:240296b27397e4e37874abb1df2a608a92df85cf3e2a04d0d4d61055c8305ba6",
+                "sha256:36d829b31ab67d6fcb30e185ec996e1f72b892255a745d3a82138c97d21ed1cd",
+                "sha256:37f48d46d733d57cc70fd5f30572d11ab8ed92da6e6b28e024e4a3edfb456e37",
+                "sha256:4c790331247081ea7c632a76d5b2a265e6d325ecd3179d06e9cf8d46d90dd151",
+                "sha256:5dcfc2e264bd8a1db8b11a892bd1647154ce03eeba94b461effe68790d8b8e07",
+                "sha256:7147e2a76c75f0f64c4319886e7639e490fee87c9d25cb1d4faef1d8cf83a440",
+                "sha256:7703620125e4fb79b64aa52427ec192822e9f45d37d4b6625ab37ef403e1df70",
+                "sha256:8368f83e93c7156ccd40e49a783a6a6850ca25b556c0fa0240ed0f659d2fe496",
+                "sha256:84aa6223d71012c68d577c83f4e7db50d11d6b1399a9c779046d75e24bed74ea",
+                "sha256:85f95aa97a35bdb2f2f7d10ec5bbdac0aeb9dafdaf88e17492da0504de2e6400",
+                "sha256:8db0e856712f79c45956da0c9a40ca4246abc3485ae0d7ecc86a20f5e4c09abc",
+                "sha256:9044ef2df88d7f33692ae3f18d3be63dec69c4fb1b5a4a9ac950f9b4ba571606",
+                "sha256:963c80b583b0661918718b095e02303d8078950b26cc00b5e5ea9ababe0de1fc",
+                "sha256:987f15737aba2ab5f3928c617ccf1ce412e2e321c77ab16ca5a293e7bbffd581",
+                "sha256:9ec45db0c766f196ae629e509f059ff05fc3148f9ffd28f3cfe75d4afb485412",
+                "sha256:9fc0b3cb5d1720e7141d103cf4819aea239f7d136acf9ee4a69b047b7986175a",
+                "sha256:a2c927c49f2029291fbabd673d51a2180038f8cd5a5b2f290f78c4516be48be2",
+                "sha256:a38878a223bdd37c9709d07cd357bb79f4c760b29210e14ad0fb395294583787",
+                "sha256:b4fcdcfa302538f70929eb7b392f536a237cbe2ed9cba88e3bf5027b39f5f77f",
+                "sha256:c0c74e5579af4b977c8b932f40a5464764b2f86681327410aa028a22d2f54937",
+                "sha256:c1c876fd795b36126f773db9cbb393f19808edd2637e00fd6caba0e25f2c7b64",
+                "sha256:c9aadc4924d4b5799112837b226160428524a9a45f830e0d0f184b19e4090487",
+                "sha256:cc7b98bf58167b7f2db91a4327da24fb93368838eb84a44c472283778fc2446b",
+                "sha256:cf54cfa843f297991b7388c281cb3855d911137223c6b6d2dd82a47ae5125a41",
+                "sha256:d003156bb6a59cda9050e983441b7fa2487f7800d76bdc065566b7d728b4581a",
+                "sha256:d175297e9533d8d37437abc14e8a83cbc68af93cc9c1c59c2c292ec59a0697a3",
+                "sha256:d746a437cdbca200622385305aedd9aef68e8a645e385cc483bdc5e488f07166",
+                "sha256:e683e409e5c45d5c9082dc1daf13f6374300806240719f95dc783d1fc942af10"
+            ],
+            "version": "==1.4.2"
         },
         "urllib3": {
             "hashes": [
-                "sha256:19188f96923873c92ccb987120ec4acaa12f0461fa9ce5d3d0772bc965a39e08",
-                "sha256:d8ff90d979214d7b4f8ce956e80f4028fc6860e4431f731ea4a8c08f23f99473"
+                "sha256:1b465e494e3e0d8939b50680403e3aedaa2bc434b7d5af64dfd3c958d7f5ae80",
+                "sha256:de3eedaad74a2683334e282005cd8d7f22f4d55fa690a2a1020a416cb0a47e73"
             ],
             "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'",
-            "version": "==1.26.2"
+            "version": "==1.26.3"
         },
         "vistir": {
             "hashes": [
diff --git a/FCRgendata/src/FCRGenData/interpolator.py b/FCRgendata/src/FCRGenData/interpolator.py
index 448fefc..d6d7b37 100644
--- a/FCRgendata/src/FCRGenData/interpolator.py
+++ b/FCRgendata/src/FCRGenData/interpolator.py
@@ -1,17 +1,93 @@
 import datetime as dt
 from typing import Dict, Generator
+from numbers import Number
+from scipy import interpolate
 
 from FCRGenData.rawDataReader import IRowDataProvider
 
 
 class InterpolatedDataStream:
+    '''
+        generator that interpolates data given by
+        reader in points given by timestamp_generator
+        with additional config from datasource.
+
+        Interpolator returns dict which includes:
+            1. Mapping column names given by reader
+                (only numerical types) to its
+                interpolated values in points returned
+                by timestamp_generator.
+            2. Mapping from reader column name that
+                contains data timestamp to timestamp
+                given by timestamp_generator
+
+        NOTE:
+            timestamps are type of datatime.datatime
+
+        If value is set to None interpolator ignores
+        this value.
+
+        Interpolation stops after last numerical value
+        other columns are extrapolated if not given
+        to that point.
+
+
+        NOTE:
+            currently interpolates all of the data
+            if dataset will grow it might have some
+            performance issues.
+    '''
+
+    def __ts_embedding(ts: dt.datetime, ts0: dt.datatime) -> float:
+        return (ts - ts0).seconds
 
     def __init__(self,
-                 datasource: Dict,
                  reader: IRowDataProvider,
-                 timestamp_generator: Generator[dt.datetime, None, None]):
+                 timestamp_generator: Generator[dt.datetime, None, None],
+                 datasource: Dict,
+                 ):
+        self.ts_gen = timestamp_generator
+        self.t0 = next(iter(self.ts_gen))
+        self.last_ts = t0
+        self.ts_col_name = reader.timestamp_column_name
+        ts_col_embed = reader.columns.index(self.ts_col_name)
+        self.intp_col_embed = dict([cname: i for i, cname, ctype in enumerate(reader.columns) if isinstance(cname, Number)])
+
+        # col name -> (timestamp embedding, given value)
+        self.raw_data = dict(cname: ([], []) for cname in intp_cnames)
+        row_generator = reader.reader()
+
+        # populate self.inp_data
+        for row in row_generator:
+            for cname, col_idx in self.intp_col_embed:
+                if row[col_idx] is None:
+                    continue
+
+                row_ts = row[ts_col_embed]
+                self.last_ts = max(self.last_ts, row_ts)
+                tdelta_embed = self.__ts_embedding(row_ts, self.t0)
+                intp_t = self.raw_data[cname]
+                intp_t[0].append(tdelta_embed)
+                intp_t[1].append(row[col_idx])
 
-        pass
+        # interpolate aggregated data
+        self.intp_data = dict()
+        for cname, cdata in self.raw_data:
+            self.inp_data[cname] = interpolate.interp1d(*cdata, kind='cubic')
+
+    def __iter__(self):
+        self.ts_gen_iter = iter(self.ts_gen)
+        return self
 
     def __next__(self) -> Dict:
-        pass
\ No newline at end of file
+        ts = next(self.ts_gen_iter)
+        if ts > self.last_ts:
+            raise StopIteration
+
+        row_dict = {self.ts_col_name: ts}
+        ts_embed = self.__ts_embedding(ts, self.t0)
+
+        for cname in self.intp_data:
+            row_dict[cname] = self.inp_data[cname](ts_embed)
+
+        return row_dict
diff --git a/FCRgendata/tests/__init__.py b/FCRgendata/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/FCRgendata/tests/test_interpolator.py b/FCRgendata/tests/test_interpolator.py
new file mode 100644
index 0000000..bd7e54d
--- /dev/null
+++ b/FCRgendata/tests/test_interpolator.py
@@ -0,0 +1,6 @@
+import pytest
+
+
+@pytest.mark.interpolation
+def test_true():
+    assert True
-- 
GitLab


From 99f7dc84b55015126c891a3aae53f786c26f1e8b Mon Sep 17 00:00:00 2001
From: Szymon <szysad108@gmail.com>
Date: Tue, 2 Mar 2021 15:05:01 +0100
Subject: [PATCH 6/8] removed .env

---
 FCRgendata/.env | 1 -
 1 file changed, 1 deletion(-)
 delete mode 100644 FCRgendata/.env

diff --git a/FCRgendata/.env b/FCRgendata/.env
deleted file mode 100644
index a328b41..0000000
--- a/FCRgendata/.env
+++ /dev/null
@@ -1 +0,0 @@
-PYTHONPATH=${PWD}/src
\ No newline at end of file
-- 
GitLab


From 09f76bc520814436f42b32b8c58e7b93e957a06a Mon Sep 17 00:00:00 2001
From: Szymon <szysad108@gmail.com>
Date: Tue, 2 Mar 2021 22:14:15 +0100
Subject: [PATCH 7/8] modified interpolator, added __init__.py files added
 first sime test for interpolator (works), fixed deprecated decorators in
 data_reader tests

---
 FCRgendata/src/FCRGenData/interpolator.py     | 25 ++++----
 .../src/FCRGenData/rawDataReader/__init__.py  |  2 +-
 .../tests/data_reader/data_reader_template.py |  2 +-
 .../tests/data_reader/test_csv_reader.py      |  4 +-
 FCRgendata/tests/iterpolator/__init__.py      |  0
 FCRgendata/tests/iterpolator/mock_data.py     | 37 ++++++++++++
 .../tests/iterpolator/mock_row_provider.py    | 44 ++++++++++++++
 .../tests/iterpolator/test_interpolator.py    | 57 +++++++++++++++++++
 FCRgendata/tests/test_interpolator.py         |  6 --
 9 files changed, 155 insertions(+), 22 deletions(-)
 create mode 100644 FCRgendata/tests/iterpolator/__init__.py
 create mode 100644 FCRgendata/tests/iterpolator/mock_data.py
 create mode 100644 FCRgendata/tests/iterpolator/mock_row_provider.py
 create mode 100644 FCRgendata/tests/iterpolator/test_interpolator.py
 delete mode 100644 FCRgendata/tests/test_interpolator.py

diff --git a/FCRgendata/src/FCRGenData/interpolator.py b/FCRgendata/src/FCRGenData/interpolator.py
index d6d7b37..2f9c285 100644
--- a/FCRgendata/src/FCRGenData/interpolator.py
+++ b/FCRgendata/src/FCRGenData/interpolator.py
@@ -38,28 +38,29 @@ class InterpolatedDataStream:
             performance issues.
     '''
 
-    def __ts_embedding(ts: dt.datetime, ts0: dt.datatime) -> float:
+    def __ts_embedding(self, ts: dt.datetime, ts0: dt.datetime) -> int:
         return (ts - ts0).seconds
 
     def __init__(self,
                  reader: IRowDataProvider,
                  timestamp_generator: Generator[dt.datetime, None, None],
-                 datasource: Dict,
+                 datasource=None,
                  ):
         self.ts_gen = timestamp_generator
         self.t0 = next(iter(self.ts_gen))
-        self.last_ts = t0
+        self.last_ts = self.t0
         self.ts_col_name = reader.timestamp_column_name
-        ts_col_embed = reader.columns.index(self.ts_col_name)
-        self.intp_col_embed = dict([cname: i for i, cname, ctype in enumerate(reader.columns) if isinstance(cname, Number)])
+        ts_col_embed = reader.column_names.index(self.ts_col_name)
+        col_types = reader.columns
+        self.intp_col_embed = {cname: i for i, cname in enumerate(col_types) if issubclass(col_types[cname], Number)}
 
         # col name -> (timestamp embedding, given value)
-        self.raw_data = dict(cname: ([], []) for cname in intp_cnames)
+        self.raw_data = {cname: ([], []) for cname in self.intp_col_embed}
         row_generator = reader.reader()
 
         # populate self.inp_data
         for row in row_generator:
-            for cname, col_idx in self.intp_col_embed:
+            for cname, col_idx in self.intp_col_embed.items():
                 if row[col_idx] is None:
                     continue
 
@@ -71,9 +72,9 @@ class InterpolatedDataStream:
                 intp_t[1].append(row[col_idx])
 
         # interpolate aggregated data
-        self.intp_data = dict()
-        for cname, cdata in self.raw_data:
-            self.inp_data[cname] = interpolate.interp1d(*cdata, kind='cubic')
+        self.interpolants = dict()
+        for cname, cdata in self.raw_data.items():
+            self.interpolants[cname] = interpolate.interp1d(x=cdata[0], y=cdata[1], kind='linear')
 
     def __iter__(self):
         self.ts_gen_iter = iter(self.ts_gen)
@@ -87,7 +88,7 @@ class InterpolatedDataStream:
         row_dict = {self.ts_col_name: ts}
         ts_embed = self.__ts_embedding(ts, self.t0)
 
-        for cname in self.intp_data:
-            row_dict[cname] = self.inp_data[cname](ts_embed)
+        for cname in self.interpolants:
+            row_dict[cname] = self.interpolants[cname](ts_embed).flat[0]
 
         return row_dict
diff --git a/FCRgendata/src/FCRGenData/rawDataReader/__init__.py b/FCRgendata/src/FCRGenData/rawDataReader/__init__.py
index 5eca6a7..08778ba 100644
--- a/FCRgendata/src/FCRGenData/rawDataReader/__init__.py
+++ b/FCRgendata/src/FCRGenData/rawDataReader/__init__.py
@@ -1,2 +1,2 @@
 from .row_data_reader import IRowDataProvider
-from .csv_reader import RowCSVReader
\ No newline at end of file
+from .csv_reader import RowCSVReader
diff --git a/FCRgendata/tests/data_reader/data_reader_template.py b/FCRgendata/tests/data_reader/data_reader_template.py
index 58734e8..e984732 100644
--- a/FCRgendata/tests/data_reader/data_reader_template.py
+++ b/FCRgendata/tests/data_reader/data_reader_template.py
@@ -11,7 +11,7 @@ from FCRGenData.rawDataReader import IRowDataProvider
 
 class RawDataProviderTestTemplate(abc.ABC):
     @abc.abstractmethod
-    @pytest.yield_fixture
+    @pytest.fixture
     def reader_factory(self) -> Callable[[str, Optional[str]], IRowDataProvider]:
         pass
 
diff --git a/FCRgendata/tests/data_reader/test_csv_reader.py b/FCRgendata/tests/data_reader/test_csv_reader.py
index ae77e13..a0f6ed6 100644
--- a/FCRgendata/tests/data_reader/test_csv_reader.py
+++ b/FCRgendata/tests/data_reader/test_csv_reader.py
@@ -9,13 +9,13 @@ from .data_reader_template import RawDataProviderTestTemplate
 
 class TestCSVReader(RawDataProviderTestTemplate):
 
-    @pytest.yield_fixture
+    @pytest.fixture
     def empty_reader(self) -> RowCSVReader:
         tfile = tempfile.NamedTemporaryFile(suffix='.csv')
         yield RowCSVReader(tfile.name)
         tfile.close()
 
-    @pytest.yield_fixture
+    @pytest.fixture
     def reader_factory(self) -> Callable[[str], RowCSVReader]:
         tfile = tempfile.NamedTemporaryFile(suffix='.csv', mode='w')
         path = tfile.name
diff --git a/FCRgendata/tests/iterpolator/__init__.py b/FCRgendata/tests/iterpolator/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/FCRgendata/tests/iterpolator/mock_data.py b/FCRgendata/tests/iterpolator/mock_data.py
new file mode 100644
index 0000000..e61fee1
--- /dev/null
+++ b/FCRgendata/tests/iterpolator/mock_data.py
@@ -0,0 +1,37 @@
+import datetime as dt
+from typing import TypedDict, List, Any, Iterator, Tuple
+
+
+class MockData(TypedDict):
+    '''
+        Dict representing mock data
+        for IRowDataProvider.
+
+        data is List of rows.
+        Every row should contain timestamp in form
+        of dt.datetime
+    '''
+    headers: List[str]
+    types: List[type]
+    ts_col_name: str
+    data: List[Tuple[Any]]
+
+
+def mock_data_factory(
+            header_names: List[str],
+            ts_col_name: str,
+            col_types: List[type],
+            col_gens: List[Iterator[Any]]
+        ):
+    '''
+        generates mock data dict based on
+        data headers, timestamp column name,
+        column types and column generators
+    '''
+
+    return MockData(
+        headers=header_names,
+        types=col_types,
+        ts_col_name=ts_col_name,
+        data=[row for row in zip(*col_gens)]
+    )
diff --git a/FCRgendata/tests/iterpolator/mock_row_provider.py b/FCRgendata/tests/iterpolator/mock_row_provider.py
new file mode 100644
index 0000000..0f694bd
--- /dev/null
+++ b/FCRgendata/tests/iterpolator/mock_row_provider.py
@@ -0,0 +1,44 @@
+import datetime as dt
+from typing import Tuple, Dict, Generator, Iterable, List
+
+from FCRGenData.rawDataReader import IRowDataProvider
+from .mock_data import MockData
+
+
+class MockRowDataProvider(IRowDataProvider):
+    '''
+        Mock IRowDataProvider implementation
+        for testing purposes. Returns data
+        based on given MockData
+    '''
+    def __init__(self,
+                 data: MockData,
+                 max_time_difference: dt.timedelta):
+        super().__init__(timestamp_column_name=data['ts_col_name'],
+                         max_time_difference=max_time_difference)
+        self.data = data['data']
+        self.headers = data['headers']
+        self.ts_col_name = data['ts_col_name']
+        self.types = data['types']
+
+    @property
+    def timestamp_column_name(self) -> str:
+        return self.ts_col_name
+
+    def peek_t0(self) -> dt.datetime:
+        ts_col_idx = self.headers.index(self.ts_col_name)
+        return self.data[0][ts_col_idx]
+
+    @property
+    def column_names(self) -> Tuple[str]:
+        return self.headers
+
+    @property
+    def columns(self) -> Dict[str, type]:
+        return {cname: ctype for cname, ctype in zip(self.headers, self.types)}
+
+    def reader_annotated(self) -> Generator[Dict[str, any], None, None]:
+        return map(lambda row: dict(zip(self.headers, row)), self.data)
+
+    def reader(self) -> Generator[List[type], None, None]:
+        yield from self.data
diff --git a/FCRgendata/tests/iterpolator/test_interpolator.py b/FCRgendata/tests/iterpolator/test_interpolator.py
new file mode 100644
index 0000000..d19bfe7
--- /dev/null
+++ b/FCRgendata/tests/iterpolator/test_interpolator.py
@@ -0,0 +1,57 @@
+import pytest
+import datetime as dt
+from typing import Dict, Any
+from numbers import Number
+
+from FCRGenData.interpolator import InterpolatedDataStream
+from .mock_row_provider import MockRowDataProvider
+from .mock_data import mock_data_factory
+
+
+def dict_eq_w_margin(d1: Dict[str, Any], d2: Dict[str, Any], margin: float) -> bool:
+    '''
+        checks if both dict are equal with margin.
+        Meaning that both dicts have same keys with same values
+        but Numerical values can differ no more then given margin
+    '''
+    if d1.keys() != d2.keys():
+        return False
+    
+    for key in d1:
+        if isinstance(d1[key], Number) and isinstance(d2[key], Number):
+            if abs(d1[key] - d2[key]) > margin:
+                return False
+        else:
+            if d1[key] != d2[key]:
+                return False
+
+    return True
+
+
+@pytest.mark.interpolation
+def test_same_vals_at_interpolation_nodes():
+    MAX_ERR = 1e-4
+    ROWS = 10
+    t0 = dt.datetime(year=2000, month=1, day=1)
+    ts_gen = [t0 + dt.timedelta(minutes=m) for m in range(ROWS)]
+    timestamp_cname = 'ts'
+
+    data = mock_data_factory(
+        header_names=[timestamp_cname, 'const1'],
+        ts_col_name=timestamp_cname,
+        col_types=[dt.datetime, int],
+        col_gens=[ts_gen, [1.] * ROWS]
+    )
+
+    rowProvider = MockRowDataProvider(
+        data=data,
+        max_time_difference=dt.timedelta(days=1)
+    )
+    interp = InterpolatedDataStream(
+        reader=rowProvider,
+        timestamp_generator=ts_gen,
+    )
+
+    rowProvGen = rowProvider.reader_annotated()
+    for d1, d2 in zip(interp, rowProvGen):
+        assert dict_eq_w_margin(d1, d2, MAX_ERR)
diff --git a/FCRgendata/tests/test_interpolator.py b/FCRgendata/tests/test_interpolator.py
deleted file mode 100644
index bd7e54d..0000000
--- a/FCRgendata/tests/test_interpolator.py
+++ /dev/null
@@ -1,6 +0,0 @@
-import pytest
-
-
-@pytest.mark.interpolation
-def test_true():
-    assert True
-- 
GitLab


From 096b1b38e00516784519a0667a6d646e9f260cba Mon Sep 17 00:00:00 2001
From: Mateusz Mazur <m.mazur32@student.uw.edu.pl>
Date: Wed, 3 Mar 2021 15:46:16 +0100
Subject: [PATCH 8/8] added more tests to dataloader

---
 FCRdataLoader/data/data2.csv                  | 218 ------------------
 FCRdataLoader/data/data4.csv                  | 116 ----------
 FCRdataLoader/src/fcrdataloader/dataset.py    |   8 +-
 FCRdataLoader/tests/dataloader_test.py        | 131 +++++++++++
 FCRdataLoader/tests/seq1pred0_test.py         |  35 ---
 FCRdataLoader/tests/test_data/fcrdata10.csv   |  12 -
 FCRdataLoader/tests/test_data/fcrdata2.csv    |  11 -
 .../tests/test_data/fcrdata2and10.csv         |  22 ++
 8 files changed, 157 insertions(+), 396 deletions(-)
 delete mode 100644 FCRdataLoader/data/data2.csv
 delete mode 100644 FCRdataLoader/data/data4.csv
 create mode 100644 FCRdataLoader/tests/dataloader_test.py
 delete mode 100644 FCRdataLoader/tests/seq1pred0_test.py
 delete mode 100644 FCRdataLoader/tests/test_data/fcrdata10.csv
 delete mode 100644 FCRdataLoader/tests/test_data/fcrdata2.csv
 create mode 100644 FCRdataLoader/tests/test_data/fcrdata2and10.csv

diff --git a/FCRdataLoader/data/data2.csv b/FCRdataLoader/data/data2.csv
deleted file mode 100644
index aa2ee65..0000000
--- a/FCRdataLoader/data/data2.csv
+++ /dev/null
@@ -1,218 +0,0 @@
-timestamp,AvgResponseTime,AvgResponseTimePrediction,split,cardinality_Component_LB,provider_Component_LB,AppCardinality,cardinality_Component_DB,provider_Component_App,provider_Component_DB
-2020-11-27T12:44:35.519,6.0,9.435382843017578,0,1,0,1,0,1,0
-2020-11-27T12:45:35.519,17.0,8.48339557647705,0,1,0,1,0,1,0
-2020-11-27T12:46:35.519,10.0,23.755578994750977,0,1,0,1,0,1,0
-2020-11-27T12:47:35.519,5.0,11.223828315734863,0,1,0,1,0,1,0
-2020-11-27T12:48:35.519,43.0,7.788514137268066,0,1,0,1,0,1,0
-2020-11-27T12:49:35.519,10.0,44.337806701660156,0,1,0,1,0,1,0
-2020-11-27T12:50:35.519,4.0,8.92234992980957,0,1,0,1,0,1,0
-2020-11-27T12:51:35.519,5.0,7.031393051147461,0,1,0,1,0,1,0
-2020-11-27T12:52:35.519,6.0,7.168320655822754,0,1,0,1,0,1,0
-2020-11-27T12:53:35.519,8.0,7.513263702392578,0,1,0,1,0,1,0
-2020-11-27T12:54:35.519,4.0,9.204913139343262,0,1,0,1,0,1,0
-2020-11-27T12:55:35.519,14.0,6.85783052444458,0,1,0,1,0,1,0
-2020-11-27T12:56:35.519,17.33333333333333,20.1556453704834,0,1,0,1,0,1,0
-2020-11-27T12:57:35.519,12.0,15.963403701782227,0,1,0,1,0,1,0
-2020-11-27T12:58:35.519,4.0,12.1525297164917,0,1,0,1,0,1,0
-2020-11-27T12:59:35.519,4.0,6.540548324584961,0,1,0,1,0,1,0
-2020-11-27T13:00:35.519,6.0,5.795986652374268,0,1,0,1,0,1,0
-2020-11-27T13:01:35.519,7.0,6.7676005363464355,0,1,0,1,0,1,0
-2020-11-27T13:02:35.519,13.0,8.009368896484375,0,1,0,1,0,1,0
-2020-11-27T13:03:35.519,6.0,16.15487289428711,0,1,0,1,0,1,0
-2020-11-27T13:04:35.519,12.0,7.246679306030273,0,1,0,1,0,1,0
-2020-11-27T13:05:35.519,6.0,16.62491798400879,0,1,0,1,0,1,0
-2020-11-27T13:06:35.519,5.0,7.308323860168457,0,1,0,1,0,1,0
-2020-11-27T13:07:35.519,31.5,6.282835483551025,0,1,0,1,0,1,0
-2020-11-27T13:08:35.519,4.0,19.173612594604492,0,1,0,1,0,1,0
-2020-11-27T13:09:35.519,8.0,6.749720573425293,0,1,0,1,0,1,0
-2020-11-27T13:10:35.519,15.0,9.082990646362305,0,1,0,1,0,1,0
-2020-11-27T13:11:35.519,6.0,18.638320922851562,0,1,0,1,0,1,0
-2020-11-27T13:12:35.519,5.0,8.155741691589355,0,1,0,1,0,1,0
-2020-11-27T13:13:35.520,11.0,7.225281715393066,0,1,0,1,0,1,0
-2020-11-27T13:14:35.519,15.0,13.923819541931152,0,1,0,1,0,1,0
-2020-11-27T13:15:35.519,5.0,17.63955307006836,0,1,0,1,0,1,0
-2020-11-27T13:16:35.519,4.0,7.650225639343262,0,1,0,1,0,1,0
-2020-11-27T13:17:35.519,0.0,6.992892742156982,0,1,0,1,0,1,0
-2020-11-27T13:18:35.519,4.0,4.934108257293701,0,1,0,1,0,1,0
-2020-11-27T13:19:35.519,7.0,4.724740505218506,0,1,0,1,0,1,0
-2020-11-27T13:20:35.519,15.0,7.635990142822266,0,1,0,1,0,1,0
-2020-11-27T13:21:35.519,160.0,9.775514602661133,0,3,0,1,0,1,0
-2020-11-27T13:22:35.519,50.0,63.986534118652344,0,1,0,1,0,1,0
-2020-11-27T13:23:35.519,7.0,11.747467994689941,0,1,0,1,0,1,0
-2020-11-27T13:24:35.519,6.0,9.03959846496582,0,1,0,1,0,1,0
-2020-11-27T13:25:35.519,96.0,7.851625442504883,0,2,0,1,0,1,0
-2020-11-27T13:26:35.519,4.0,39.70072555541992,0,1,0,1,0,1,0
-2020-11-27T13:27:35.519,4.0,9.647521018981934,0,1,0,1,0,1,0
-2020-11-27T13:28:35.519,5.0,9.178874015808105,0,1,0,1,0,1,0
-2020-11-27T13:29:35.519,6.0,10.066906929016113,0,1,0,1,0,1,0
-2020-11-27T13:30:35.519,81.0,10.560870170593262,0,2,0,1,0,1,0
-2020-11-27T13:31:35.519,988.0,104.04701232910156,0,20,0,1,0,1,0
-2020-11-27T13:32:35.519,6.0,380.4679870605469,0,1,0,1,0,1,0
-2020-11-27T13:33:35.519,6.0,14.513042449951172,0,1,0,1,0,1,0
-2020-11-27T13:34:35.519,5.0,10.953410148620605,0,1,0,1,0,1,0
-2020-11-27T13:35:35.519,14.0,9.55089282989502,0,1,0,1,0,1,0
-2020-11-27T13:36:35.519,5.0,12.438122749328613,0,1,0,1,0,1,0
-2020-11-27T13:37:35.519,6.0,7.341762542724609,0,1,0,1,0,1,0
-2020-11-27T13:38:35.519,6.0,6.660945892333984,0,1,0,1,0,1,0
-2020-11-27T13:39:35.519,6.0,6.816884517669678,0,1,0,1,0,1,0
-2020-11-27T13:40:35.519,7.0,6.6493377685546875,0,1,0,1,0,1,0
-2020-11-27T13:41:35.519,8.0,5.92299222946167,0,1,0,1,0,1,0
-2020-11-27T13:42:35.519,6.0,5.889991283416748,0,1,0,1,0,1,0
-2020-11-27T13:43:35.519,5.0,6.110908508300781,0,1,0,1,0,1,0
-2020-11-27T13:44:35.519,4.0,5.786072254180908,0,1,0,1,0,1,0
-2020-11-27T13:45:35.519,0.0,5.522675514221191,0,1,0,1,0,1,0
-2020-11-27T13:46:35.519,8.0,4.02607536315918,0,1,0,1,0,1,0
-2020-11-27T13:47:35.519,6.0,11.456418991088867,0,1,0,1,0,1,0
-2020-11-27T13:48:35.519,0.0,5.000259876251221,0,1,0,1,0,1,0
-2020-11-27T13:49:35.519,43.0,3.127312660217285,0,1,0,1,0,1,0
-2020-11-27T13:50:35.519,0.0,30.338163375854492,0,1,0,1,0,1,0
-2020-11-27T13:51:35.521,5.0,2.966379165649414,0,1,0,1,0,1,0
-2020-11-27T13:52:35.519,5.0,3.754279613494873,0,1,0,1,0,1,0
-2020-11-27T13:53:35.519,0.0,3.5321502685546875,0,1,0,1,0,1,0
-2020-11-27T13:54:35.519,6.0,2.0308921337127686,0,1,0,1,0,1,0
-2020-11-27T13:55:35.519,6.0,4.462838172912598,0,1,0,1,0,1,0
-2020-11-27T13:56:35.519,0.0,4.041638374328613,0,1,0,1,0,1,0
-2020-11-27T13:57:35.519,7.0,2.098271369934082,0,1,0,1,0,1,0
-2020-11-27T13:58:35.519,105.5,5.880876064300537,0,2,0,1,0,1,0
-2020-11-27T13:59:35.519,0.0,110.12590026855469,0,1,0,1,0,1,0
-2020-11-27T14:00:35.519,287.0,2.413583278656006,0,6,0,1,0,1,0
-2020-11-27T14:01:35.519,14.0,335.0638732910156,0,1,0,1,0,1,0
-2020-11-27T14:02:35.519,5.0,5.429197311401367,0,1,0,1,0,1,0
-2020-11-27T14:03:35.519,8.0,4.946113109588623,0,1,0,1,0,1,0
-2020-11-27T14:04:35.519,5.0,6.548556327819824,0,1,0,1,0,1,0
-2020-11-27T14:05:35.519,6.0,5.965693950653076,0,1,0,1,0,1,0
-2020-11-27T14:06:35.519,726.0,6.134999752044678,0,14,0,1,0,1,0
-2020-11-27T14:07:35.519,4665.0,1021.2295532226562,0,47,0,1,0,1,0
-2020-11-27T14:08:35.519,0.0,3070.738037109375,0,1,0,1,0,1,0
-2020-11-27T14:09:35.520,9.0,14.447136878967285,0,1,0,1,0,1,0
-2020-11-27T14:10:35.519,5.0,10.972953796386719,0,1,0,1,0,1,0
-2020-11-27T14:11:35.519,4.0,7.169851303100586,0,1,0,1,0,1,0
-2020-11-27T14:12:35.519,7.0,5.326818943023682,0,1,0,1,0,1,0
-2020-11-27T14:13:35.519,6.0,5.767587661743164,0,1,0,1,0,1,0
-2020-11-27T14:14:35.519,6.0,5.402779579162598,0,1,0,1,0,1,0
-2020-11-27T14:15:35.519,6.0,5.32569694519043,0,1,0,1,0,1,0
-2020-11-27T14:16:35.519,18.0,4.896035671234131,0,1,0,1,0,1,0
-2020-11-27T14:17:35.519,3209.0,6.206113815307617,0,47,0,1,0,1,0
-2020-11-27T14:18:35.519,5.0,591.1841430664062,0,1,0,1,0,1,0
-2020-11-27T14:19:35.519,4.0,5.264420986175537,0,1,0,1,0,1,0
-2020-11-27T14:20:35.519,4.0,3.9137449264526367,0,1,0,1,0,1,0
-2020-11-27T14:21:35.519,6.0,3.76383638381958,0,1,0,1,0,1,0
-2020-11-27T14:22:35.519,6.0,4.28965950012207,0,1,0,1,0,1,0
-2020-11-27T14:23:35.519,5.0,4.339838027954102,0,1,0,1,0,1,0
-2020-11-27T14:24:35.519,15.5,4.126605033874512,0,1,0,1,0,1,0
-2020-11-27T14:25:35.519,16.0,6.732624530792236,0,1,0,1,0,1,0
-2020-11-27T14:26:35.519,7.0,7.165763854980469,0,1,0,1,0,1,0
-2020-11-27T14:27:35.519,5.0,5.099540710449219,0,1,0,1,0,1,0
-2020-11-27T14:28:35.519,4.0,4.222059726715088,0,1,0,1,0,1,0
-2020-11-27T14:29:35.519,4.0,4.934439659118652,0,1,0,1,0,1,0
-2020-11-27T14:30:35.519,5.0,4.902071475982666,0,1,0,1,0,1,0
-2020-11-27T14:31:35.519,7.0,5.144791603088379,0,1,0,1,0,1,0
-2020-11-27T14:32:35.519,26.0,6.1177077293396,0,1,0,1,0,1,0
-2020-11-27T14:33:35.519,2638.5,28.131595611572266,0,47,0,1,0,1,0
-2020-11-27T14:34:35.519,1067.0,616.666748046875,0,21,0,1,0,1,0
-2020-11-27T14:35:35.519,21.0,101.96915435791016,0,1,0,1,0,1,0
-2020-11-27T14:36:35.519,5.0,8.676255226135254,0,1,0,1,0,1,0
-2020-11-27T14:37:35.519,115.0,6.9384965896606445,0,2,0,1,0,1,0
-2020-11-27T14:38:35.519,5.0,57.02287292480469,0,1,0,1,0,1,0
-2020-11-27T14:39:35.519,7.0,11.969968795776367,0,1,0,1,0,1,0
-2020-11-27T14:40:35.519,6.0,12.453564643859863,0,1,0,1,0,1,0
-2020-11-27T14:41:35.519,7.0,12.684252738952637,0,1,0,1,0,1,0
-2020-11-27T14:42:35.519,7.0,12.762392044067383,0,1,0,1,0,1,0
-2020-11-27T14:43:35.519,6.0,10.544450759887695,0,1,0,1,0,1,0
-2020-11-27T14:44:35.519,16.0,7.521728515625,0,1,0,1,0,1,0
-2020-11-27T14:45:35.519,5.0,9.791584014892578,0,1,0,1,0,1,0
-2020-11-27T14:46:35.519,8.0,6.312222480773926,0,1,0,1,0,1,0
-2020-11-27T14:47:35.519,4.0,6.674623966217041,0,1,0,1,0,1,0
-2020-11-27T14:48:35.519,5.0,5.500234127044678,0,1,0,1,0,1,0
-2020-11-27T14:49:35.519,7.0,5.7142205238342285,0,1,0,1,0,1,0
-2020-11-27T14:50:35.519,5.0,6.574368953704834,0,1,0,1,0,1,0
-2020-11-27T14:51:35.519,4.0,5.641943454742432,0,1,0,1,0,1,0
-2020-11-27T14:52:35.519,48.0,5.242447853088379,0,1,0,1,0,1,0
-2020-11-27T14:53:35.519,10.0,31.18132781982422,0,1,0,1,0,1,0
-2020-11-27T14:54:35.519,17.0,6.470431327819824,0,1,0,1,0,1,0
-2020-11-27T14:55:35.519,92.0,17.268766403198242,0,2,0,1,0,1,0
-2020-11-27T14:56:35.519,5.0,94.94540405273438,0,1,0,1,0,1,0
-2020-11-27T14:57:35.519,0.0,6.798692226409912,0,1,0,1,0,1,0
-2020-11-27T14:58:35.519,5.0,5.002079963684082,0,1,0,1,0,1,0
-2020-11-27T14:59:35.519,5.0,5.192873477935791,0,1,0,1,0,1,0
-2020-11-27T15:00:35.519,47.0,5.093393325805664,0,1,0,1,0,1,0
-2020-11-27T15:01:35.519,6.0,66.24467468261719,0,1,0,1,0,1,0
-2020-11-27T15:02:35.519,106.5,6.989488124847412,0,2,0,1,0,1,0
-2020-11-27T15:03:35.519,18.0,114.1700668334961,0,1,0,1,0,1,0
-2020-11-27T15:04:35.519,4.0,9.336390495300293,0,1,0,1,0,1,0
-2020-11-27T15:05:35.519,5.0,6.51580810546875,0,1,0,1,0,1,0
-2020-11-27T15:06:35.519,8.0,5.369061470031738,0,1,0,1,0,1,0
-2020-11-27T15:07:35.519,7.0,5.92508602142334,0,1,0,1,0,1,0
-2020-11-27T15:08:35.519,6.0,6.801459312438965,0,1,0,1,0,1,0
-2020-11-27T15:09:35.519,5.0,7.037836074829102,0,1,0,1,0,1,0
-2020-11-27T15:10:35.519,6.5,6.7276225090026855,0,1,0,1,0,1,0
-2020-11-27T15:11:35.519,6.0,6.155050754547119,0,1,0,1,0,1,0
-2020-11-27T15:12:35.519,65.0,5.615631103515625,0,1,0,1,0,1,0
-2020-11-27T15:13:35.519,5.0,59.99169921875,0,1,0,1,0,1,0
-2020-11-27T15:14:35.519,0.0,5.508193492889404,0,1,0,1,0,1,0
-2020-11-27T15:15:35.519,7.0,3.9370083808898926,1,1,0,1,0,1,0
-2020-11-27T15:16:35.519,6.0,5.107851505279541,1,1,0,1,0,1,0
-2020-11-27T15:17:35.519,0.0,4.678167343139648,1,1,0,1,0,1,0
-2020-11-27T15:18:35.519,6.0,2.993084669113159,1,1,0,1,0,1,0
-2020-11-27T15:19:35.519,0.0,4.081082344055176,1,1,0,1,0,1,0
-2020-11-27T15:20:35.519,0.0,2.3085193634033203,1,1,0,1,0,1,0
-2020-11-27T15:21:35.519,0.0,1.4350025653839111,1,1,0,1,0,1,0
-2020-11-27T15:22:35.519,0.0,0.8783214092254639,1,1,0,1,0,1,0
-2020-11-27T15:23:35.519,0.0,0.5170314311981201,1,1,0,1,0,1,0
-2020-11-27T15:24:35.519,0.0,0.30115222930908203,1,1,0,1,0,1,0
-2020-11-27T15:25:35.519,0.0,0.19430434703826904,1,1,0,1,0,1,0
-2020-11-27T15:26:35.519,0.0,0.06121242046356201,1,1,0,1,0,1,0
-2020-11-27T15:27:35.519,0.0,0.0,1,1,0,1,0,1,0
-2020-11-27T15:28:35.519,0.0,0.0,1,1,0,1,0,1,0
-2020-11-27T15:29:35.519,0.0,0.0,1,1,0,1,0,1,0
-2020-11-27T15:30:35.519,0.0,0.0,1,1,0,1,0,1,0
-2020-11-27T15:31:35.520,0.0,0.0,1,1,0,1,0,1,0
-2020-11-27T15:32:35.519,0.0,0.0,1,1,0,1,0,1,0
-2020-11-27T15:33:35.519,0.0,0.0,1,1,0,1,0,1,0
-2020-11-27T15:34:35.519,0.0,0.0,1,1,0,1,0,1,0
-2020-11-27T15:35:35.519,0.0,0.0,1,1,0,1,0,1,0
-2020-11-27T15:36:35.519,0.0,0.0,1,1,0,1,0,1,0
-2020-11-27T15:37:35.519,0.0,0.0,1,1,0,1,0,1,0
-2020-11-27T15:38:35.519,0.0,0.0,1,1,0,1,0,1,0
-2020-11-27T15:39:35.519,0.0,0.0,1,1,0,1,0,1,0
-2020-11-27T15:40:35.519,0.0,0.0,1,1,0,1,0,1,0
-2020-11-27T15:41:35.519,0.0,0.0,1,1,0,1,0,1,0
-2020-11-27T15:42:35.519,0.0,0.0,1,1,0,1,0,1,0
-2020-11-27T15:43:35.519,0.0,0.0,1,1,0,1,0,1,0
-2020-11-27T15:44:35.519,0.0,0.0,1,1,0,1,0,1,0
-2020-11-27T15:45:35.519,0.0,0.0,1,1,0,1,0,1,0
-2020-11-27T15:46:35.519,0.0,0.0,1,1,0,1,0,1,0
-2020-11-27T15:47:35.519,0.0,0.0,1,1,0,1,0,1,0
-2020-11-27T15:48:35.519,0.0,0.0,1,1,0,1,0,1,0
-2020-11-27T15:49:35.519,0.0,0.0,1,1,0,1,0,1,0
-2020-11-27T15:50:35.519,0.0,0.0,1,1,0,1,0,1,0
-2020-11-27T15:51:35.519,0.0,0.0,1,1,0,1,0,1,0
-2020-11-27T15:52:35.519,0.0,0.0,1,1,0,1,0,1,0
-2020-11-27T15:53:35.519,0.0,0.0,1,1,0,1,0,1,0
-2020-11-27T15:54:35.519,0.0,0.0,1,1,0,1,0,1,0
-2020-11-27T15:55:35.519,0.0,0.0,1,1,0,1,0,1,0
-2020-11-27T15:56:35.519,0.0,0.0,1,1,0,1,0,1,0
-2020-11-27T15:57:35.519,0.0,0.0,1,1,0,1,0,1,0
-2020-11-27T15:58:35.519,0.0,0.0,-1,1,0,1,0,1,0
-2020-11-27T15:59:35.519,0.0,0.0,-1,1,0,1,0,1,0
-2020-11-27T16:00:35.519,0.0,0.0,-1,1,0,1,0,1,0
-2020-11-27T16:01:35.519,0.0,0.0,-1,1,0,1,0,1,0
-2020-11-27T16:02:35.519,0.0,0.0,-1,1,0,1,0,1,0
-2020-11-27T16:03:35.519,0.0,0.0,-1,1,0,1,0,1,0
-2020-11-27T16:04:35.519,0.0,0.0,-1,1,0,1,0,1,0
-2020-11-27T16:05:35.519,0.0,0.0,-1,1,0,1,0,1,0
-2020-11-27T16:06:35.519,0.0,0.0,-1,1,0,1,0,1,0
-2020-11-27T16:07:35.519,0.0,0.0,-1,1,0,1,0,1,0
-2020-11-27T16:08:35.519,0.0,0.0,-1,1,0,1,0,1,0
-2020-11-27T16:09:35.519,0.0,0.0,-1,1,0,1,0,1,0
-2020-11-27T16:10:35.519,0.0,0.0,-1,1,0,1,0,1,0
-2020-11-27T16:11:35.519,0.0,0.0,-1,1,0,1,0,1,0
-2020-11-27T16:12:35.519,0.0,0.0,-1,1,0,1,0,1,0
-2020-11-27T16:13:35.519,0.0,0.0,-1,1,0,1,0,1,0
-2020-11-27T16:14:35.519,0.0,0.0,-1,1,0,1,0,1,0
-2020-11-27T16:15:35.519,0.0,0.0,-1,1,0,1,0,1,0
-2020-11-27T16:16:35.519,0.0,0.0,-1,1,0,1,0,1,0
-2020-11-27T16:17:35.519,0.0,0.0,-1,1,0,1,0,1,0
-2020-11-27T16:18:35.519,0.0,0.0,-1,1,0,1,0,1,0
-2020-11-27T16:19:35.519,0.0,0.0,-1,1,0,1,0,1,0
-2020-11-27T16:20:35.519,0.0,0.0,-1,1,0,1,0,1,0
diff --git a/FCRdataLoader/data/data4.csv b/FCRdataLoader/data/data4.csv
deleted file mode 100644
index 0d25180..0000000
--- a/FCRdataLoader/data/data4.csv
+++ /dev/null
@@ -1,116 +0,0 @@
-timestamp,AvgResponseTime,AvgResponseTimePrediction,split,cardinality_Component_LB,provider_Component_LB,AppCardinality,cardinality_Component_DB,provider_Component_App,provider_Component_DB
-2020-12-15T11:49:00,0.0,2.880878210067749,0,1,0,1,0,1,0
-2020-12-15T11:50:00,0.0,1.698030710220337,0,1,0,1,0,1,0
-2020-12-15T11:51:00,0.0,0.0,0,1,0,1,0,1,0
-2020-12-15T11:52:00,0.0,0.0,0,1,0,1,0,1,0
-2020-12-15T11:53:00,0.0,0.3707929849624634,0,1,0,1,0,1,0
-2020-12-15T11:54:00,0.0,0.45026612281799316,0,1,0,1,0,1,0
-2020-12-15T11:55:00,0.0,0.41281116008758545,0,1,0,1,0,1,0
-2020-12-15T11:56:00,0.0,0.3778296709060669,0,1,0,1,0,1,0
-2020-12-15T11:57:00,0.0,0.42775797843933105,0,1,0,1,0,1,0
-2020-12-15T11:58:00,0.0,52.15424346923828,0,1,0,1,0,1,0
-2020-12-15T11:59:00,0.0,1.3606882095336914,0,1,0,1,0,1,0
-2020-12-15T12:00:00,0.0,1.7871627807617188,0,1,0,1,0,1,0
-2020-12-15T12:01:00,0.0,0.0,0,1,0,1,0,1,0
-2020-12-15T12:02:00,0.0,0.29342174530029297,0,1,0,1,0,1,0
-2020-12-15T12:03:00,0.0,0.22346079349517822,0,1,0,1,0,1,0
-2020-12-15T12:04:00,0.0,0.20592844486236572,0,1,0,1,0,1,0
-2020-12-15T12:05:00,0.0,0.0,0,1,0,1,0,1,0
-2020-12-15T12:06:00,0.0,0.2854079008102417,0,1,0,1,0,1,0
-2020-12-15T12:07:00,0.0,2.0263261795043945,0,1,0,1,0,1,0
-2020-12-15T12:08:00,0.0,1.193394660949707,0,1,0,1,0,1,0
-2020-12-15T12:09:00,0.0,0.6382132768630981,0,1,0,1,0,1,0
-2020-12-15T12:10:00,0.0,14.562797546386719,0,1,0,1,0,1,0
-2020-12-15T12:11:00,0.0,0.7512111663818359,0,1,0,1,0,1,0
-2020-12-15T12:12:00,0.0,0.8914639949798584,0,1,0,1,0,1,0
-2020-12-15T12:13:00,0.0,0.7688208818435669,0,1,0,1,0,1,0
-2020-12-15T12:14:00,0.0,0.7653945684432983,0,1,0,1,0,1,0
-2020-12-15T12:15:00,0.0,1.1794123649597168,0,1,0,1,0,1,0
-2020-12-15T12:16:00,0.0,1.1221184730529785,0,1,0,1,0,1,0
-2020-12-15T12:17:00,0.0,1.2044415473937988,0,1,0,1,0,1,0
-2020-12-15T12:18:00,0.0,1.167661190032959,0,1,0,1,0,1,0
-2020-12-15T12:19:00,0.0,29.40463638305664,0,1,0,1,0,1,0
-2020-12-15T12:20:00,0.0,0.0,0,1,0,1,0,1,0
-2020-12-15T12:21:00,0.0,0.0,0,1,0,1,0,1,0
-2020-12-15T12:22:00,0.0,0.0,0,1,0,1,0,1,0
-2020-12-15T12:23:00,0.0,0.26250433921813965,0,1,0,1,0,1,0
-2020-12-15T12:24:00,0.0,0.0,0,1,0,1,0,1,0
-2020-12-15T12:25:00,0.0,0.01861274242401123,0,1,0,1,0,1,0
-2020-12-15T12:26:00,0.0,0.0,0,1,0,1,0,1,0
-2020-12-15T12:27:00,0.0,0.0,0,1,0,1,0,1,0
-2020-12-15T12:28:00,0.0,1.031090259552002,0,1,0,1,0,1,0
-2020-12-15T12:29:00,0.0,0.1976233720779419,0,1,0,1,0,1,0
-2020-12-15T12:30:00,0.0,0.16222071647644043,0,1,0,1,0,1,0
-2020-12-15T12:31:00,0.0,0.45725953578948975,0,1,0,1,0,1,0
-2020-12-15T12:32:00,0.0,0.0,0,1,0,1,0,1,0
-2020-12-15T12:33:00,0.0,0.6110225915908813,0,1,0,1,0,1,0
-2020-12-15T12:34:00,0.0,0.5869172811508179,0,1,0,1,0,1,0
-2020-12-15T12:35:00,0.0,824.6790161132812,0,1,0,1,0,1,0
-2020-12-15T12:36:00,0.0,143.26446533203125,0,1,0,1,0,1,0
-2020-12-15T12:37:00,0.0,0.9533004760742188,0,1,0,1,0,1,0
-2020-12-15T12:38:00,0.0,6.044524192810059,0,1,0,1,0,1,0
-2020-12-15T12:39:00,0.0,2.429046630859375,0,1,0,1,0,1,0
-2020-12-15T12:40:00,0.0,2.7120466232299805,0,1,0,1,0,1,0
-2020-12-15T12:41:00,0.0,2.1224734783172607,0,1,0,1,0,1,0
-2020-12-15T12:42:00,0.0,1.8528714179992676,0,1,0,1,0,1,0
-2020-12-15T12:43:00,0.0,1.8902008533477783,0,1,0,1,0,1,0
-2020-12-15T12:44:00,0.0,1.9152851104736328,0,1,0,1,0,1,0
-2020-12-15T12:45:00,0.0,1.8422601222991943,0,1,0,1,0,1,0
-2020-12-15T12:46:00,0.0,32.213531494140625,0,1,0,1,0,1,0
-2020-12-15T12:47:00,0.0,4.091504096984863,0,1,0,1,0,1,0
-2020-12-15T12:48:00,0.0,10.508217811584473,0,1,0,1,0,1,0
-2020-12-15T12:49:00,0.0,2.708798885345459,0,1,0,1,0,1,0
-2020-12-15T12:50:00,0.0,2.3304004669189453,0,1,0,1,0,1,0
-2020-12-15T12:51:00,0.0,0.0,0,1,0,1,0,1,0
-2020-12-15T12:52:00,0.0,1.0770142078399658,0,1,0,1,0,1,0
-2020-12-15T12:53:00,0.0,0.8451101779937744,0,1,0,1,0,1,0
-2020-12-15T12:54:00,0.0,1.1430389881134033,0,1,0,1,0,1,0
-2020-12-15T12:55:00,0.0,1.2068374156951904,0,1,0,1,0,1,0
-2020-12-15T12:56:00,0.0,728.1805419921875,0,1,0,1,0,1,0
-2020-12-15T12:57:00,0.0,15.266721725463867,0,1,0,1,0,1,0
-2020-12-15T12:58:00,0.0,2.8867130279541016,0,1,0,1,0,1,0
-2020-12-15T12:59:00,0.0,2.095475196838379,0,1,0,1,0,1,0
-2020-12-15T13:00:00,0.0,0.0,0,1,0,1,0,1,0
-2020-12-15T13:01:00,0.0,0.0,0,1,0,1,0,1,0
-2020-12-15T13:02:00,0.0,0.0,0,1,0,1,0,1,0
-2020-12-15T13:03:00,0.0,0.0,0,1,0,1,0,1,0
-2020-12-15T13:04:00,0.0,0.0,0,1,0,1,0,1,0
-2020-12-15T13:05:00,0.0,0.0,1,1,0,1,0,1,0
-2020-12-15T13:06:00,0.0,204.33526611328125,1,1,0,1,0,1,0
-2020-12-15T13:07:00,0.0,2.2900924682617188,1,1,0,1,0,1,0
-2020-12-15T13:08:00,0.0,4.229484558105469,1,1,0,1,0,1,0
-2020-12-15T13:09:00,0.0,2.979954957962036,1,1,0,1,0,1,0
-2020-12-15T13:10:00,0.0,0.8080476522445679,1,1,0,1,0,1,0
-2020-12-15T13:11:00,0.0,2.4772040843963623,1,1,0,1,0,1,0
-2020-12-15T13:12:00,0.0,2.2220327854156494,1,1,0,1,0,1,0
-2020-12-15T13:13:00,0.0,365.6095275878906,1,1,0,1,0,1,0
-2020-12-15T13:14:00,0.0,2.6103553771972656,1,1,0,1,0,1,0
-2020-12-15T13:15:00,0.0,0.0,1,1,0,1,0,1,0
-2020-12-15T13:16:00,0.0,0.0,1,1,0,1,0,1,0
-2020-12-15T13:17:00,0.0,730.0831909179688,1,1,0,1,0,1,0
-2020-12-15T13:18:00,0.0,5.1552734375,1,1,0,1,0,1,0
-2020-12-15T13:19:00,0.0,5.315293312072754,1,1,0,1,0,1,0
-2020-12-15T13:20:00,0.0,3.747960090637207,1,1,0,1,0,1,0
-2020-12-15T13:21:00,0.0,3.213507652282715,1,1,0,1,0,1,0
-2020-12-15T13:22:00,0.0,3.209949493408203,1,1,0,1,0,1,0
-2020-12-15T13:23:00,0.0,12.009992599487305,1,1,0,1,0,1,0
-2020-12-15T13:24:00,0.0,68.0241928100586,1,1,0,1,0,1,0
-2020-12-15T13:25:00,0.0,5.560791492462158,1,1,0,1,0,1,0
-2020-12-15T13:26:00,0.0,4.588584899902344,1,1,0,1,0,1,0
-2020-12-15T13:27:00,0.0,2.873000144958496,1,1,0,1,0,1,0
-2020-12-15T13:28:00,0.0,2.874795436859131,1,1,0,1,0,1,0
-2020-12-15T13:29:00,0.0,2.6174628734588623,1,1,0,1,0,1,0
-2020-12-15T13:30:00,0.0,0.0,1,1,0,1,0,1,0
-2020-12-15T13:31:00,0.0,0.7121289968490601,-1,1,0,1,0,1,0
-2020-12-15T13:32:00,0.0,0.5510646104812622,-1,1,0,1,0,1,0
-2020-12-15T13:33:00,0.0,0.3965524435043335,-1,1,0,1,0,1,0
-2020-12-15T13:34:00,0.0,0.26016998291015625,-1,1,0,1,0,1,0
-2020-12-15T13:35:00,0.0,0.129294753074646,-1,1,0,1,0,1,0
-2020-12-15T13:36:00,0.0,0.0,-1,1,0,1,0,1,0
-2020-12-15T13:37:00,0.0,0.0,-1,1,0,1,0,1,0
-2020-12-15T13:38:00,0.0,0.0,-1,1,0,1,0,1,0
-2020-12-15T13:39:00,0.0,0.0,-1,1,0,1,0,1,0
-2020-12-15T13:40:00,0.0,0.0,-1,1,0,1,0,1,0
-2020-12-15T13:41:00,0.0,0.0,-1,1,0,1,0,1,0
-2020-12-15T13:42:00,0.0,0.0,-1,1,0,1,0,1,0
-2020-12-15T13:43:00,0.0,0.0,-1,1,0,1,0,1,0
diff --git a/FCRdataLoader/src/fcrdataloader/dataset.py b/FCRdataLoader/src/fcrdataloader/dataset.py
index 9c6a2a5..70d7ef8 100644
--- a/FCRdataLoader/src/fcrdataloader/dataset.py
+++ b/FCRdataLoader/src/fcrdataloader/dataset.py
@@ -147,7 +147,7 @@ class SequenceForecastMultiDistributionDatasetFactory:
 
     def __preproces_data(self, seq_len, pred_step):
         series = self.load_series(self.file)
-        sizes = np.zeros(len(series), dtype=np.int)
+        sizes = np.zeros(len(series), dtype=np.int32)
         self.x = [None] * len(series)
         self.y = [None] * len(series)
 
@@ -167,10 +167,10 @@ class SequenceForecastMultiDistributionDatasetFactory:
                 raise ValueError(f"Error with given seq_len: {seq_len} and pred_step: {pred_step} "
                                  f"dataset can't return any data from file {self.file} (they are probably too big)")
 
-        cumsizes = np.cumsum(sizes, dtype=np.int)
+        cumsizes = np.cumsum(sizes, dtype=np.int32)
         self.size = cumsizes[-1].item()
-        self.idx_file_map = np.searchsorted(cumsizes, np.arange(self.size) + 1).astype(np.int)
-        self.idx_file_idx_map = np.zeros(self.size).astype(np.int)
+        self.idx_file_map = np.searchsorted(cumsizes, np.arange(self.size) + 1).astype(np.int32)
+        self.idx_file_idx_map = np.zeros(self.size).astype(np.int32)
 
         self.idx_file_idx_map[0] = 0
         cnt = 1
diff --git a/FCRdataLoader/tests/dataloader_test.py b/FCRdataLoader/tests/dataloader_test.py
new file mode 100644
index 0000000..d96da10
--- /dev/null
+++ b/FCRdataLoader/tests/dataloader_test.py
@@ -0,0 +1,131 @@
+import pytest
+from fcrdataloader.dataset import SequenceForecastMultiDistributionDatasetFactory as SFMDDF
+from torch.utils.data import DataLoader
+from pathlib import Path
+
+TEST_FILE_DIR = 'tests/test_data'
+ROWS_IN_FIRST_EXPERIMENT = 10
+
+def test_prediction():
+    PRED_STEPS = [0, 1, 2, 5]
+
+    for pred_step in PRED_STEPS:
+        factory = SFMDDF(
+            seq_len=1,
+            pred_step=pred_step,
+            file=Path(TEST_FILE_DIR).joinpath("fcrdata2and10.csv"),
+            x_y_split=3,
+            usecols=range(1, 11),
+            experiment_id_col=9,
+            x_predictions_cols=range(1, 3)
+        )
+        train_set = factory.get_train_dataset()
+        loader = DataLoader(train_set, batch_size=1)
+
+        for row, (x, y) in enumerate(loader):
+            if row >= ROWS_IN_FIRST_EXPERIMENT - pred_step: #this represents switching to rows from the first to second experiment
+                row += pred_step
+            x = x.reshape(-1)
+            y = y.reshape(-1)
+
+            assert (x[0] == float(f"{row}.{0}"))
+            for col in range(1, 3):
+                assert (x[col] == float(f"{row + pred_step}.{col}"))
+            for col in range(0, 6):
+                assert (y[col] == float(f"{row + pred_step}.{col + 3}"))
+
+
+def test_sequences():
+    SEQ_LENS = [1, 2, 3, 5]
+
+    for seq_len in SEQ_LENS:
+        factory = SFMDDF(
+            seq_len=seq_len,
+            pred_step=0,
+            file=Path(TEST_FILE_DIR).joinpath("fcrdata2and10.csv"),
+            x_y_split=3,
+            usecols=range(1, 11),
+            experiment_id_col=9,
+            x_predictions_cols=range(1, 3)
+        )
+        train_set = factory.get_train_dataset()
+        loader = DataLoader(train_set, batch_size=1)
+
+        for row, (x, y) in enumerate(loader):
+            if row > ROWS_IN_FIRST_EXPERIMENT - seq_len: #this represents switching to rows from the first to second experiment
+                row += seq_len - 1
+            x = x.reshape(-1)
+            y = y.reshape(-1)
+            for sequence_num in range(seq_len):
+                for col in range(0, 3):
+                    assert (x[(3 * sequence_num) +col] == float(f"{row + sequence_num}.{col}"))
+            for col in range(0, 6):
+                assert (y[col] == float(f"{row + seq_len - 1}.{col + 3}"))
+
+
+def test_sequences_and_predictions():
+    SEQ_LENS = [1, 2, 3, 5]
+    PRED_STEPS = [0, 1, 2, 5]
+
+    for seq_len in SEQ_LENS:
+        for pred_step in PRED_STEPS:
+            factory = SFMDDF(
+                seq_len=seq_len,
+                pred_step=pred_step,
+                file=Path(TEST_FILE_DIR).joinpath("fcrdata2and10.csv"),
+                x_y_split=3,
+                usecols=range(1, 11),
+                experiment_id_col=9,
+                x_predictions_cols=range(1, 3)
+            )
+            train_set = factory.get_train_dataset()
+            loader = DataLoader(train_set, batch_size=1)
+
+            for row, (x, y) in enumerate(loader):
+                wanted_value = row
+                if row > ROWS_IN_FIRST_EXPERIMENT - seq_len - pred_step: #this represents switching to rows from the first to second experiment
+                    wanted_value += seq_len - 1
+                    wanted_value += pred_step
+
+                x = x.reshape(-1)
+                y = y.reshape(-1)
+                for sequence_num in range(seq_len):
+                    assert (x[(3 * sequence_num)] == float(f"{wanted_value + sequence_num}.{0}"))
+                    for col in range(1, 3):
+                        assert (x[(3 * sequence_num) +col] == float(f"{wanted_value + sequence_num + pred_step}.{col}"))
+
+                for col in range(0, 6):
+                    # print(y[col], float(f"{row + seq_len - 1}.{col + 3}"))
+                    assert (y[col] == float(f"{wanted_value + seq_len - 1 + pred_step}.{col + 3}"))
+
+def test_wrong_input():
+    bad_seq_len_pass = False
+    bad_pred_step_pass = False
+    try:
+        factory = SFMDDF(
+            seq_len=ROWS_IN_FIRST_EXPERIMENT + 1,
+            pred_step=0,
+            file=Path(TEST_FILE_DIR).joinpath("fcrdata2and10.csv"),
+            x_y_split=3,
+            usecols=range(1, 11),
+            experiment_id_col=9,
+            x_predictions_cols=range(1, 3)
+        )
+    except ValueError:
+        bad_seq_len_pass = True
+
+    try:
+        factory = SFMDDF(
+            seq_len=1,
+            pred_step=ROWS_IN_FIRST_EXPERIMENT,
+            file=Path(TEST_FILE_DIR).joinpath("fcrdata2and10.csv"),
+            x_y_split=3,
+            usecols=range(1, 11),
+            experiment_id_col=9,
+            x_predictions_cols=range(1, 3)
+        )
+    except ValueError:
+        bad_pred_step_pass = True
+
+    assert bad_seq_len_pass
+    assert bad_pred_step_pass
diff --git a/FCRdataLoader/tests/seq1pred0_test.py b/FCRdataLoader/tests/seq1pred0_test.py
deleted file mode 100644
index 9617f12..0000000
--- a/FCRdataLoader/tests/seq1pred0_test.py
+++ /dev/null
@@ -1,35 +0,0 @@
-import unittest
-from fcrdataloader.dataset import SequenceForecastMultiDistributionDatasetFactory as SFMDDF
-from torch.utils.data import DataLoader
-from pathlib import Path
-import os
-
-
-TEST_FILE_DIR = 'tests/test_data'
-
-
-class FCRCorrectDataTest(unittest.TestCase):
-
-    def test_seq1_pred0(self):
-        factory = SFMDDF(
-            seq_len=1,
-            pred_step=0,
-            files=list(Path(TEST_FILE_DIR).glob("*")),
-            x_y_split=3,
-            usecols=range(1, 10),
-            x_predictions_cols=range(1, 3)
-        )
-        train_set = factory.get_train_dataset()
-        loader = DataLoader(train_set, batch_size=1)
-
-        for row, (x, y) in enumerate(loader):
-            x = x.reshape(-1)
-            y = y.reshape(-1)
-            for col in range(0, 3):
-                self.assertEqual(x[col], float(f"{row}.{col}"))
-            for col in range(0, 6):
-                self.assertEqual(y[col], float(f"{row}.{col + 3}"))
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/FCRdataLoader/tests/test_data/fcrdata10.csv b/FCRdataLoader/tests/test_data/fcrdata10.csv
deleted file mode 100644
index 285e8c5..0000000
--- a/FCRdataLoader/tests/test_data/fcrdata10.csv
+++ /dev/null
@@ -1,12 +0,0 @@
-timestamp,AvgResponseTime,AvgResponseTimePrediction,split,cardinality_Component_LB,provider_Component_LB,AppCardinality,cardinality_Component_DB,provider_Component_App,provider_Component_DB
-2020-11-27T12:44:35.519,10.0,10.1,10.2,10.3,10.4,10.5,10.6,10.7,10.8,10.9
-2020-11-27T12:44:35.519,11.0,11.1,11.2,11.3,11.4,11.5,11.6,11.7,11.8,11.9
-2020-11-27T12:44:35.519,12.0,12.1,12.2,12.3,12.4,12.5,12.6,12.7,12.8,12.9
-2020-11-27T12:44:35.519,13.0,13.1,13.2,13.3,13.4,13.5,13.6,13.7,13.8,13.9
-2020-11-27T12:44:35.519,14.0,14.1,14.2,14.3,14.4,14.5,14.6,14.7,14.8,14.9
-2020-11-27T12:44:35.519,15.0,15.1,15.2,15.3,15.4,15.5,15.6,15.7,15.8,15.9
-2020-11-27T12:44:35.519,16.0,16.1,16.2,16.3,16.4,16.5,16.6,16.7,16.8,16.9
-2020-11-27T12:44:35.519,17.0,17.1,17.2,17.3,17.4,17.5,17.6,17.7,17.8,17.9
-2020-11-27T12:44:35.519,18.0,18.1,18.2,18.3,18.4,18.5,18.6,18.7,18.8,18.9
-2020-11-27T12:44:35.519,18.0,18.1,18.2,18.3,18.4,18.5,18.6,18.7,18.8,18.9
-2020-11-27T12:44:35.519,20.0,20.1,20.2,20.3,20.4,20.5,20.6,20.7,20.8,20.9
diff --git a/FCRdataLoader/tests/test_data/fcrdata2.csv b/FCRdataLoader/tests/test_data/fcrdata2.csv
deleted file mode 100644
index 7ef612e..0000000
--- a/FCRdataLoader/tests/test_data/fcrdata2.csv
+++ /dev/null
@@ -1,11 +0,0 @@
-timestamp,AvgResponseTime,AvgResponseTimePrediction,split,cardinality_Component_LB,provider_Component_LB,AppCardinality,cardinality_Component_DB,provider_Component_App,provider_Component_DB
-2020-11-27T12:44:35.519,0.0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9
-2020-11-27T12:44:35.519,1.0,1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8,1.9
-2020-11-27T12:44:35.519,2.0,2.1,2.2,2.3,2.4,2.5,2.6,2.7,2.8,2.9
-2020-11-27T12:44:35.519,3.0,3.1,3.2,3.3,3.4,3.5,3.6,3.7,3.8,3.9
-2020-11-27T12:44:35.519,4.0,4.1,4.2,4.3,4.4,4.5,4.6,4.7,4.8,4.9
-2020-11-27T12:44:35.519,5.0,5.1,5.2,5.3,5.4,5.5,5.6,5.7,5.8,5.9
-2020-11-27T12:44:35.519,6.0,6.1,6.2,6.3,6.4,6.5,6.6,6.7,6.8,6.9
-2020-11-27T12:44:35.519,7.0,7.1,7.2,7.3,7.4,7.5,7.6,7.7,7.8,7.9
-2020-11-27T12:44:35.519,8.0,8.1,8.2,8.3,8.4,8.5,8.6,8.7,8.8,8.9
-2020-11-27T12:44:35.519,9.0,9.1,9.2,9.3,9.4,9.5,9.6,9.7,9.8,9.9
diff --git a/FCRdataLoader/tests/test_data/fcrdata2and10.csv b/FCRdataLoader/tests/test_data/fcrdata2and10.csv
new file mode 100644
index 0000000..448baa1
--- /dev/null
+++ b/FCRdataLoader/tests/test_data/fcrdata2and10.csv
@@ -0,0 +1,22 @@
+timestamp,AvgResponseTime,AvgResponseTimePrediction,split,cardinality_Component_LB,provider_Component_LB,AppCardinality,cardinality_Component_DB,provider_Component_App,provider_Component_DB,Experiment_Id
+2020-11-27T12:44:35.519,0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,2
+2020-11-27T12:44:35.519,1,1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8,2
+2020-11-27T12:44:35.519,2,2.1,2.2,2.3,2.4,2.5,2.6,2.7,2.8,2
+2020-11-27T12:44:35.519,3,3.1,3.2,3.3,3.4,3.5,3.6,3.7,3.8,2
+2020-11-27T12:44:35.519,4,4.1,4.2,4.3,4.4,4.5,4.6,4.7,4.8,2
+2020-11-27T12:44:35.519,5,5.1,5.2,5.3,5.4,5.5,5.6,5.7,5.8,2
+2020-11-27T12:44:35.519,6,6.1,6.2,6.3,6.4,6.5,6.6,6.7,6.8,2
+2020-11-27T12:44:35.519,7,7.1,7.2,7.3,7.4,7.5,7.6,7.7,7.8,2
+2020-11-27T12:44:35.519,8,8.1,8.2,8.3,8.4,8.5,8.6,8.7,8.8,2
+2020-11-27T12:44:35.519,9,9.1,9.2,9.3,9.4,9.5,9.6,9.7,9.8,2
+2020-11-27T12:44:35.519,10,10.1,10.2,10.3,10.4,10.5,10.6,10.7,10.8,10
+2020-11-27T12:44:35.519,11,11.1,11.2,11.3,11.4,11.5,11.6,11.7,11.8,10
+2020-11-27T12:44:35.519,12,12.1,12.2,12.3,12.4,12.5,12.6,12.7,12.8,10
+2020-11-27T12:44:35.519,13,13.1,13.2,13.3,13.4,13.5,13.6,13.7,13.8,10
+2020-11-27T12:44:35.519,14,14.1,14.2,14.3,14.4,14.5,14.6,14.7,14.8,10
+2020-11-27T12:44:35.519,15,15.1,15.2,15.3,15.4,15.5,15.6,15.7,15.8,10
+2020-11-27T12:44:35.519,16,16.1,16.2,16.3,16.4,16.5,16.6,16.7,16.8,10
+2020-11-27T12:44:35.519,17,17.1,17.2,17.3,17.4,17.5,17.6,17.7,17.8,10
+2020-11-27T12:44:35.519,18,18.1,18.2,18.3,18.4,18.5,18.6,18.7,18.8,10
+2020-11-27T12:44:35.519,19,19.1,19.2,19.3,19.4,19.5,19.6,19.7,19.8,10
+2020-11-27T12:44:35.519,20,20.1,20.2,20.3,20.4,20.5,20.6,20.7,20.8,10
-- 
GitLab