diff --git a/deployment/nbeats/Dockerfile b/deployment/nbeats/Dockerfile index 704d9f032eb177bc33e1a28aac788e8e7285d72f..5ed0defaa88ce434b7f87acb2baa9ca0bc59a95f 100644 --- a/deployment/nbeats/Dockerfile +++ b/deployment/nbeats/Dockerfile @@ -2,32 +2,24 @@ FROM python:3.8-slim-buster # Install Python dependencies. WORKDIR /wd +# COPY deployment/nbeats/requirements.txt . +COPY deployment/nbeats/requirements.txt . +RUN pip3 install --no-cache-dir -r requirements.txt && mkdir models -COPY deployment/nbeats/poetry.lock deployment/nbeats/pyproject.toml /wd/ - -# Install and setup poetry -RUN pip install -U pip \ - && apt-get update \ - && apt install -y curl netcat \ - && curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/get-poetry.py | python - -ENV PATH="${PATH}:/root/.poetry/bin" - -RUN poetry config virtualenvs.create false \ - && poetry install --no-interaction --no-ansi --no-dev - -ADD https://gitlab.ow2.org/melodic/morphemic-preprocessor/-/archive/morphemic-rc2.0/morphemic-preprocessor-morphemic-rc2.0.tar.gz /var/lib/morphemic/ +ADD https://gitlab.ow2.org/melodic/morphemic-preprocessor/-/archive/morphemic-rc2.5/morphemic-preprocessor-morphemic-rc2.5.tar.gz /var/lib/morphemic/ # Copy the rest of the codebase into the image -COPY deployment/nbeats ./ +COPY deployment/nbeats/ ./ RUN cd /var/lib/morphemic/ \ - && tar -zxf morphemic-preprocessor-morphemic-rc2.0.tar.gz \ - && cd morphemic-preprocessor-morphemic-rc2.0 \ - && cd morphemic-datasetmaker && python3 setup.py install \ + && tar -zxf morphemic-preprocessor-morphemic-rc2.5.tar.gz \ + && cd morphemic-preprocessor-morphemic-rc2.5 \ + && cd morphemic-datasetmaker && python3 setup.py install \ && cd ../.. \ - && cp -R /var/lib/morphemic/morphemic-preprocessor-morphemic-rc2.0/amq-message-python-library /wd/amq_message_python_library \ + && cp -R /var/lib/morphemic/morphemic-preprocessor-morphemic-rc2.5/amq-message-python-library /wd/amq_message_python_library \ && rm -rf /var/lib/morphemic \ - && mkdir -p /wd/logs \ - && mkdir -p /wd/models + && mkdir -p /wd/logs + + +CMD ["python3", "main.py"] -CMD ["python3", "main.py"] \ No newline at end of file diff --git a/deployment/nbeats/nbeats/model_train.py b/deployment/nbeats/nbeats/model_train.py index 1aa0873e4adce2de5c5c38ac259787ff8b12e0af..0f5801ab0d3f9985d5bc7a54ae4b5c27678f7209 100644 --- a/deployment/nbeats/nbeats/model_train.py +++ b/deployment/nbeats/nbeats/model_train.py @@ -127,8 +127,8 @@ def train(target_column, prediction_length, yaml_file="model.yaml", publish_rate trainer.fit( model, - train_dataloaders=train_dataloader, - val_dataloaders=val_dataloader, + train_dataloader, + val_dataloader, ) if os.path.isfile(lockfile): with lock: diff --git a/deployment/nbeats/tests/model_train_test.py b/deployment/nbeats/tests/model_train_test.py new file mode 100644 index 0000000000000000000000000000000000000000..8ac5538479ad54725ef84235558c0cc0a2316f69 --- /dev/null +++ b/deployment/nbeats/tests/model_train_test.py @@ -0,0 +1,221 @@ +import sys + +sys.path.append(".") + +import pytest +from nbeats.model_train import train +import pandas as pd +import numpy as np +import random + + +@pytest.fixture +def df_1(): + df = pd.DataFrame({"ems_time": [], "metric_0": []}) + return df + + +@pytest.fixture +def df_2(): + df = pd.DataFrame() + df["ems_time"] = [ + int(x) + for x in pd.date_range(start="2016-01-01", end="2020-12-31", freq="10S").values[ + :1000 + ] + ] + df["metric_0"] = np.nan + return df + + +@pytest.fixture +def df_3(): + df = pd.DataFrame() + df["ems_time"] = np.array(range(0, 1000)) * 1e9 + for i in range(5): + df[f"metric_{i}"] = np.nan + return df + + +@pytest.fixture +def df_4(): + df = pd.DataFrame() + df["ems_time"] = [ + int(x) + for x in pd.date_range(start="2016-01-01", end="2020-12-31", freq="10S").values[ + :3 + ] + ] + for i in range(5): + df[f"metric_{i}"] = 1 + return df + + +@pytest.fixture +def df_5(): + df = pd.DataFrame() + df["ems_time"] = np.array(range(0, 3)) * 1e9 + for i in range(5): + df[f"metric_{i}"] = 1 + return df + + +@pytest.fixture +def df_6(): + df = pd.DataFrame() + df["ems_time"] = np.array(range(1, 1001)) * 1e9 + for i in range(5): + df[f"metric_{i}"] = np.random.rand(1000) + return df + + +@pytest.fixture +def df_7(): + df = pd.DataFrame() + df["ems_time"] = [ + int(x) + for x in pd.date_range(start="2016-01-01", end="2020-12-31", freq="10S").values[ + :1000 + ] + ] + for i in range(5): + df[f"metric_{i}"] = np.random.rand(1000) + df.loc[ + np.random.randint(0, df.shape[0] - 1, 990), + f"metric_{i}", + ] = np.nan + df[f"metric_{i}"] = df[f"metric_{i}"].fillna("None") + print(df) + return df + + +@pytest.fixture +def df_8(): + df = pd.DataFrame() + df["ems_time"] = [ + int(x) + for x in pd.date_range(start="2016-01-01", end="2020-12-31", freq="10S").values[ + :1000 + ] + ] + for i in range(5): + df[f"metric_{i}"] = np.random.rand(1000) + df.loc[ + np.random.randint(0, df.shape[0] - 1, 990), + f"metric_{i}", + ] = np.nan + df[f"metric_{i}"] = df[f"metric_{i}"].fillna(np.inf) + return df + + +@pytest.fixture +def df_9(): + df = pd.DataFrame() + df["ems_time"] = [ + int(x) + for x in pd.date_range(start="2016-01-01", end="2020-12-31", freq="10S").values[ + :1000 + ] + ] + for i in range(5): + df[f"metric_{i}"] = np.random.rand(1000) + df.loc[ + np.random.randint(0, df.shape[0] - 1, 990), + f"metric_{i}", + ] = np.nan + return df + + +@pytest.fixture +def df_10(): + df = pd.DataFrame() + df["ems_time"] = [ + int(x) + for x in pd.date_range(start="2016-01-01", end="2020-12-31", freq="10S").values[ + :1000 + ] + ] + for i in range(5): + df[f"metric_{i}"] = np.random.rand(1000) + df.loc[ + list(range(20, 300)), + f"metric_{i}", + ] = np.inf + return df + + +@pytest.fixture +def df_11(): + df = pd.DataFrame() + df["ems_time"] = [ + int(x) + for x in pd.date_range(start="2016-01-01", end="2020-12-31", freq="10S").values[ + :1000 + ] + ] + for i in range(5): + df[f"metric_{i}"] = np.random.rand(1000) + if i % 2 == 0: + df.loc[ + list(range(20, 300)), + f"metric_{i}", + ] = np.nan + return df + + +@pytest.fixture +def df_12(): + df = pd.DataFrame() + df["ems_time"] = [ + int(x) + for x in pd.date_range(start="2016-01-01", end="2020-12-31", freq="10S").values[ + :6000 + ] + ] + for i in range(5): + df[f"metric_{i}"] = [ + np.nan if i % 2 == 0 else random.random() for i in range(6000) + ] + return df + + +@pytest.fixture +def df(request): + return request.getfixturevalue(request.param) + + +@pytest.fixture +def metric(): + return "metric_0" + + +@pytest.fixture +def prediction_length(): + return 60 + + +@pytest.mark.parametrize( + "df,metric,prediction_length", + [ + ("df_1", metric, prediction_length), + ("df_2", metric, prediction_length), + ("df_3", metric, prediction_length), + ("df_4", metric, prediction_length), + ("df_5", metric, prediction_length), + ("df_6", metric, prediction_length), + ("df_7", metric, prediction_length), + ("df_8", metric, prediction_length), + ("df_9", metric, prediction_length), + ("df_10", metric, prediction_length), + ("df_11", metric, prediction_length), + ("df_12", metric, prediction_length), + ], + indirect=True, +) +def test_predict(df, metric, prediction_length): + df.to_csv("demo.csv") + output = train(metric, prediction_length) + print(output) + if output: + print("True") + assert True diff --git a/deployment/nbeats/tests/preprocess_dataset_test.py b/deployment/nbeats/tests/preprocess_dataset_test.py new file mode 100644 index 0000000000000000000000000000000000000000..a90bdfe39c333d6f10c71f05ebf3c04087057dc7 --- /dev/null +++ b/deployment/nbeats/tests/preprocess_dataset_test.py @@ -0,0 +1,214 @@ +import sys + +sys.path.append(".") + +import pytest +from nbeats.preprocess_dataset import Dataset +import pandas as pd +import numpy as np +import random + + +@pytest.fixture +def df_1(): + df = pd.DataFrame({"ems_time": [], "metric_0": []}) + return df + + +@pytest.fixture +def df_2(): + df = pd.DataFrame() + df["ems_time"] = np.array(range(0, 1000)) * 1e9 + df["metric_0"] = np.nan + return df + + +@pytest.fixture +def df_3(): + df = pd.DataFrame() + df["ems_time"] = np.array(range(0, 1000)) * 1e9 + for i in range(5): + df[f"metric_{i}"] = np.nan + return df + + +@pytest.fixture +def df_4(): + df = pd.DataFrame() + df["ems_time"] = np.array(range(0, 3)) * 1e9 + for i in range(5): + df[f"metric_{i}"] = 1 + return df + + +@pytest.fixture +def df_5(): + df = pd.DataFrame() + df["ems_time"] = np.array(range(0, 3)) * 1e9 + for i in range(5): + df[f"metric_{i}"] = 1 + return df + + +@pytest.fixture +def df_6(): + df = pd.DataFrame() + df["ems_time"] = np.array(range(0, 1000)) * 1e9 + for i in range(5): + df[f"metric_{i}"] = np.random.rand(1000) + return df + + +@pytest.fixture +def df_7(): + df = pd.DataFrame() + df["ems_time"] = np.array(range(0, 1000)) * 1e9 + for i in range(5): + df[f"metric_{i}"] = np.random.rand(1000) + df.loc[ + np.random.randint(0, df.shape[0] - 1, 990), + f"metric_{i}", + ] = np.nan + df[f"metric_{i}"] = df[f"metric_{i}"].fillna("None") + return df + + +@pytest.fixture +def df_8(): + df = pd.DataFrame() + df["ems_time"] = np.array(range(0, 1000)) * 1e9 + for i in range(5): + df[f"metric_{i}"] = np.random.rand(1000) + df.loc[ + np.random.randint(0, df.shape[0] - 1, 990), + f"metric_{i}", + ] = np.nan + df[f"metric_{i}"] = df[f"metric_{i}"].fillna(np.inf) + return df + + +@pytest.fixture +def df_9(): + df = pd.DataFrame() + df["ems_time"] = np.array(range(0, 1000)) * 1e9 + for i in range(5): + df[f"metric_{i}"] = np.random.rand(1000) + df.loc[ + np.random.randint(0, df.shape[0] - 1, 990), + f"metric_{i}", + ] = np.nan + return df + + +@pytest.fixture +def df_10(): + df = pd.DataFrame() + df["ems_time"] = np.array(range(0, 1000)) * 1e9 + for i in range(5): + df[f"metric_{i}"] = np.random.rand(1000) + df.loc[ + list(range(20, 300)), + f"metric_{i}", + ] = np.inf + return df + + +@pytest.fixture +def df_11(): + df = pd.DataFrame() + df["ems_time"] = np.array(range(0, 1000)) * 1e9 + for i in range(5): + df[f"metric_{i}"] = np.random.rand(1000) + if i % 2 == 0: + df.loc[ + list(range(20, 300)), + f"metric_{i}", + ] = np.nan + return df + + +@pytest.fixture +def df_12(): + df = pd.DataFrame() + df["ems_time"] = np.array(range(0, 1000)) * 1e9 + for i in range(5): + df[f"metric_{i}"] = [ + np.nan if i % 2 == 0 else random.random() for i in range(1000) + ] + return df + + +@pytest.fixture +def df_13(): + df = pd.DataFrame() + df["ems_time"] = 1 + for i in range(5): + df[f"metric_{i}"] = [random.random() for i in range(1000)] + return df + + +@pytest.fixture +def df_14(): + df = pd.DataFrame() + df["ems_time"] = 10 + for i in range(5): + df[f"metric_{i}"] = [np.nan for i in range(1000)] + return df + + +@pytest.fixture +def df_15(): + df = pd.DataFrame() + df["ems_time"] = [i * 30 * 1e5 for i in range(500)] + [ + i * 30 * 1e5 + 10000 for i in range(500) + ] + for i in range(5): + df[f"metric_{i}"] = [np.nan for i in range(500)] + [2 for i in range(500)] + return df + + +@pytest.fixture +def df_16(): + df = pd.DataFrame() + df["ems_time"] = [i for i in range(500)] + [i + 10000 for i in range(500)] + for i in range(5): + df[f"metric_{i}"] = [np.nan for i in range(500)] + [2 for i in range(500)] + return df + + +@pytest.fixture +def df(request): + return request.getfixturevalue(request.param) + + +@pytest.fixture +def metric(): + return "metric_0" + + +class TestDataset: + @pytest.mark.parametrize( + "df,metric", + [ + ("df_1", metric), + ("df_2", metric), + ("df_3", metric), + ("df_4", metric), + ("df_5", metric), + ("df_6", metric), + ("df_7", metric), + ("df_8", metric), + ("df_9", metric), + ("df_10", metric), + ("df_11", metric), + ("df_12", metric), + ("df_13", metric), + ("df_14", metric), + ("df_15", metric), + ("df_16", metric), + ], + indirect=True, + ) + def test_init(self, df, metric): + preprocessed_dataset = Dataset(df, metric) + assert isinstance(preprocessed_dataset, Dataset)