diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index d449d82e7c9e46b43cb0564fd5fc2c7b9f4e4ac4..f62e9dc9b7afd331767b7e264d9bcc3dd227a3c2 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -2,7 +2,7 @@ variables:
DOCKER_PUSH: "false"
LOCAL_REPO: "127.0.0.1:5000"
DOCKER_REPO: "gitlab.ow2.org:4567"
- MAVEN_IMAGE: "maven:3.5.2-jdk-8"
+ MAVEN_IMAGE: "maven:3.6.3-jdk-8"
DOCKER_DIND_IMAGE: "docker:19.03.1"
DOCKER_DIND_SERVICE: "$DOCKER_DIND_IMAGE-dind"
DOCKER_DRIVER: overlay
@@ -34,21 +34,14 @@ variables:
MCTS_SOLVER_CLI: "mvn --batch-mode -N -Dmaven.test.skip=$SKIP_TESTS -Ddocker.push=false -f zpp-solver/mcts-solver/pom.xml"
EMS_CLI: "mvn --batch-mode -Dmaven.test.skip=$SKIP_TESTS -Ddocker.push=false -f event-management/pom.xml"
- #
- EMS_UTIL_CLI: "mvn --batch-mode -N -Dmaven.test.skip=$SKIP_TESTS -Ddocker.push=false -f event-management/util/pom.xml"
- #
- EMS_BROKER_CLIENT_CLI: "mvn --batch-mode -N -Dmaven.test.skip=$SKIP_TESTS -Ddocker.push=false -f event-management/broker-client/pom.xml"
- #
- EMS_BROKER_CEP_CLI: "mvn --batch-mode -N -Dmaven.test.skip=$SKIP_TESTS -Ddocker.push=false -f event-management/broker-cep/pom.xml"
- #
- EMS_BAGUETTE_CLIENT_CLI: "mvn --batch-mode -N -Dmaven.test.skip=$SKIP_TESTS -Ddocker.push=false -f event-management/baguette-client/pom.xml"
- #
- EMS_BAGUETTE_CLIENT_INSTALL_CLI: "mvn --batch-mode -N -Dmaven.test.skip=$SKIP_TESTS -Ddocker.push=false -f event-management/baguette-client-install/pom.xml"
- #
- EMS_BAGUETTE_SERVER_CLI: "mvn --batch-mode -N -Dmaven.test.skip=$SKIP_TESTS -Ddocker.push=false -f event-management/baguette-server/pom.xml"
- #
- EMS_TRANSLATOR_CLI: "mvn --batch-mode -N -Dmaven.test.skip=$SKIP_TESTS -Ddocker.push=false -f event-management/translator/pom.xml"
- EMS_SERVER_CLI: "mvn --batch-mode -N -Dmaven.test.skip=$SKIP_TESTS -Ddocker.push=false -f event-management/control-service/pom.xml"
+ #EMS_UTIL_CLI: "mvn --batch-mode -N -Dmaven.test.skip=$SKIP_TESTS -Ddocker.push=false -f event-management/util/pom.xml"
+ #EMS_BROKER_CLIENT_CLI: "mvn --batch-mode -N -Dmaven.test.skip=$SKIP_TESTS -Ddocker.push=false -f event-management/broker-client/pom.xml"
+ #EMS_BROKER_CEP_CLI: "mvn --batch-mode -N -Dmaven.test.skip=$SKIP_TESTS -Ddocker.push=false -f event-management/broker-cep/pom.xml"
+ #EMS_BAGUETTE_CLIENT_CLI: "mvn --batch-mode -N -Dmaven.test.skip=$SKIP_TESTS -Ddocker.push=false -f event-management/baguette-client/pom.xml"
+ #EMS_BAGUETTE_CLIENT_INSTALL_CLI: "mvn --batch-mode -N -Dmaven.test.skip=$SKIP_TESTS -Ddocker.push=false -f event-management/baguette-client-install/pom.xml"
+ #EMS_BAGUETTE_SERVER_CLI: "mvn --batch-mode -N -Dmaven.test.skip=$SKIP_TESTS -Ddocker.push=false -f event-management/baguette-server/pom.xml"
+ #EMS_TRANSLATOR_CLI: "mvn --batch-mode -N -Dmaven.test.skip=$SKIP_TESTS -Ddocker.push=false -f event-management/translator/pom.xml"
+ #EMS_SERVER_CLI: "mvn --batch-mode -N -Dmaven.test.skip=$SKIP_TESTS -Ddocker.push=false -f event-management/control-service/pom.xml"
METASOLVER_CLI: "mvn --batch-mode -N -Dmaven.test.skip=$SKIP_TESTS -Ddocker.push=false -f meta_solver/pom.xml"
MQ_ADAPTER_CLI: "mvn --batch-mode -N -Dmaven.test.skip=$SKIP_TESTS -Ddocker.push=false -f mq-http-adapter/pom.xml"
diff --git a/event-management/README-for-TESTING.md b/event-management/README-for-TESTING.md
new file mode 100644
index 0000000000000000000000000000000000000000..ac63e6ecec8082652ef29542aaf5fa5081c84c4e
--- /dev/null
+++ b/event-management/README-for-TESTING.md
@@ -0,0 +1,1353 @@
+# Testing of New EMS Features
+
+
+## New features of EMS
+
+- Support for **Resource-Limited (RL)** nodes, like edge devices or small VMs
+- Support for **Self-Healing** monitoring topology (partially implemented)
+
+
+## Definitions
+We distinguish between ***Resource-Limited (RL)*** nodes and ***Normal or Non-RL*** nodes.
+
+- **Normal nodes** are VMs have enough resources, where an EMS client will be installed, along with JRE8 and Netdata.
+- **RL nodes** are VMs with few resources, where only Netdata will be installed.
+- Currently, EMS will classify a VM as an RL node if:
+ * it has 1 or 2 cores, or
+ * it has 2GB of RAM or less, or
+ * it has Total Disk space 1GB or less, or
+ * its architecture name starts with `ARM` (it will normally be `x86_64`).
+ * Thresholds can be changed in `eu.melodic.event.baguette-client-install.properties` file.
+
+
+We also distinguish between ***Monitoring Topologies***:
+
+- **2-LEVEL Monitoring Topology**: Nodes send their metrics directly to EMS server.
+
+ * Includes an EMS server, and any number of Normal and/or RL nodes.
+ * No clustering occurs in 2-LEVEL topologies, hence Aggregator role is not used.
+ * CAMEL Metric Models will only use `GLOBAL` and `PER_INSTANCE` groupings or no groupings at all (`GLOBAL` and `PER_INSTANCE` are then implied).
+
+- **3-LEVEL Monitoring Topology**: Nodes send their metrics to cluster-wide Aggregators, then Aggregators send (composite) metrics to EMS server.
+
+ * Includes an EMS server, Aggregators (one per cluster), and Normal and/or RL nodes.
+ * Nodes are groupped into clusters. Each cluster has a node with the Aggregator role.
+ * Only Normal nodes can be Aggregators.
+ * There must be exactly one Aggregator per cluster.
+ * Each cluster must have at least one Normal node (in order to become Aggregator).
+ * CAMEL Metric Model will use `GLOBAL`, `PER_ZONE` / `PER_REGION` / `PER_CLOUD`, and `PER_INSTANCE` groupings.
+
+ Clustering of nodes is used for faster failure detection, as well as distribution of load:
+ - Only 3-LEVEL topologies are clustered.
+ - 2-LEVEL topologies are not clustered.
+
+ Currently, nodes are clustered based on their:
+ - Availability Zone or Region or Cloud Service Provider, or
+ - assigned to a default cluster.
+
+
+------
+
+
+## A) Support for Resource-Limited nodes
+> Feature Quick Notes:
+> - EMS server will NOT install EMS client and JRE8 in RL nodes.
+> - EMS server will install Netda in RL nodes.
+> - EMS server or an Aggregator will periodically query Netdata agents of RL nodes for metrics.
+> - Normal nodes will periodically query their Local Netdata agent for metrics.
+
+
+
+### Test Cases
+
+**A.1) Metrics collection from RL nodes in a 2-LEVEL topology**
+
+> Test Case Quick Notes:
+> - EMS server MUST log when it collects metrics from RL nodes.
+> - EMS server MUST *NOT* log or collect metrics from Normal (Non-RL) nodes.
+> - Normal nodes MUST log when they collect metrics from their Local Netdata agents. (The Log records are slightly different).
+
+**You need a CAMEL model:**
+
+* with two Requirement Sets:
+ - for Normal nodes: 4 cores, 4GB RAM, >1 GB Disk, and
+ - for RL nodes: 1-2 cores, or <2GB RAM, or <1GB Disk
+* with 1-2 COMPONENTS using Requirement Set #1 (Normal nodes)
+* with 1-2 COMPONENTS with Requirement Set #2 (RL nodes)
+* with no Groupings in Metric Model
+
+**After Application deployment you need to check the logs of:**
+
+* ***EMS server***, for log messages about collecting metrics from RL-nodes' Netdata agents. E.g.
+
+ ```
+ e.m.e.c.c.netdata.NetdataCollector : Collectors::Netdata: Collecting metrics from remote nodes (without EMS client): [192.168.32.2, 192.168.32.4]
+ e.m.e.c.c.netdata.NetdataCollector : Collectors::Netdata: Collecting data from url: http://192.168.32.2:19999/api/v1/allmetrics?format=json
+ e.m.e.c.c.netdata.NetdataCollector : Collectors::Netdata: Metrics: extracted=0, published=0, failed=0
+ e.m.e.c.c.netdata.NetdataCollector : Collectors::Netdata: Collecting data from url: http://192.168.32.4:19999/api/v1/allmetrics?format=json
+ e.m.e.c.c.netdata.NetdataCollector : Collectors::Netdata: Metrics: extracted=0, published=0, failed=0
+ ```
+
+* ***Normal nodes***, for log messages about collecting metrics from their Local Netdata agent
+
+ ```
+ Collectors::Netdata: Collecting metrics from local node...
+ Collectors::Netdata: Collecting data from url: http://127.0.0.1:19999/api/v1/allmetrics?format=json
+ Collectors::Netdata: Metrics: extracted=0, published=0, failed=0
+ ```
+
+
+
+**A.2) Metrics collection from RL nodes in a 3-LEVEL topology**
+
+> Test Case Quick Notes:
+> - The Aggregator (it is a Normal node) MUST log each time it collects metrics from RL nodes in its cluster.
+> - The Aggregator MUST *NOT* log or collect metrics from Normal (Non-RL) nodes in its cluster.
+> - Normal nodes (including Aggregator) MUST log each time they collect metrics from their Local Netdata agents. (The Log records are slightly different).
+
+**You need a CAMEL model:**
+
+* with two Requirement Sets:
+ - for Normal nodes: 4 cores, 4GB RAM, >1 GB Disk, and
+ - for RL nodes: 1-2 cores, or <2GB RAM, or <1GB Disk
+* with 1-2 COMPONENTS with Requirement Set #1 (Normal nodes)
+* with 1-2 COMPONENTS with Requirement Set #2 (RL nodes)
+* with three (3) Groupings used in the Metric Model (`GLOBAL`, `PER_ZONE`, `PER_INSTANCE`)
+
+**After Application deployment you need to check the logs of:**
+
+* ***EMS server***, for NO logs related collecting metrics from any Netdata agent
+* ***Aggregator node(s)***, for logs about collecting metrics from the Netdata agents of RL nodes, in the same cluster. E.g.
+
+ ```
+ Collectors::Netdata: Collecting metrics from local node...
+ Collectors::Netdata: Collecting data from url: http://127.0.0.1:19999/api/v1/allmetrics?format=json
+ Collectors::Netdata: Metrics: extracted=0, published=0, failed=0
+ Collectors::Netdata: Collecting metrics from remote nodes (without EMS client): [192.168.96.2, 192.168.96.5]
+ Collectors::Netdata: Collecting data from url: http://192.168.96.2:19999/api/v1/allmetrics?format=json
+ Collectors::Netdata: Metrics: extracted=0, published=0, failed=0
+ Collectors::Netdata: Collecting data from url: http://192.168.96.5:19999/api/v1/allmetrics?format=json
+ Collectors::Netdata: Metrics: extracted=0, published=0, failed=0
+ ```
+
+* ***Normal nodes*** (including Aggregator node), for logs about collecting metrics from their Local Netdata agents. E.g.
+
+ ```
+ Collectors::Netdata: Collecting metrics from local node...
+ Collectors::Netdata: Collecting data from url: http://127.0.0.1:19999/api/v1/allmetrics?format=json
+ Collectors::Netdata: Metrics: extracted=0, published=0, failed=0
+ ```
+
+
+
+------
+
+## B) Support for Monitoring Self-Healing
+> Feature Quick Notes:
+> - Self-Healing refers to recovering the monitoring software running at the nodes.
+> - In Normal nodes, specifically refers to recovering of EMS client and/or Netdata agent.
+> - In RL nodes, refers to recovering Netdata agent only.
+
+
+
+#### Design Choices
+
+1. Each EMS client (in a Normal node) is responsible for recovering the Local Netdata agent, collocated with it.
+2. When clustering is used (i.e. in a 3-level topology), Aggregator is responsible for recovering other nodes in its cluster, both Normal and RL.
+3. When clustering is not used (i.e. in a 2-level topology), EMS server is responsible for recovering nodes (both Normal and RL).
+
+
+
+#### Self-Healing actions
+
+We distinguish between monitoring topologies:
+
+* **2-LEVEL Monitoring topology:** Only EMS server and nodes (Normal & RL) are used. No Aggregators or clustering.
+
+ * EMS server will try to recover any *Normal node* that disconnects and not reconnects after a configured period of time.
+
+ ***Condition:***
+
+ * EMS client disconnects and not re-connects after X seconds
+
+ ***Recovery steps taken by EMS server:***
+
+ * SSH to node (assuming it is a VM)
+ * Kill EMS client (if it is still running)
+ * Launch EMS client
+ * Close SSH connection
+ * Wait for a configured period of time for recovered EMS client to reconnect to EMS server
+ * After that period of time, the process is repeated (up to a configured number of retries, and then gives up).
+
+ * EMS server will try to recovery any *RL node* with inaccessible Netdata agent.
+
+ ***Condition:***
+
+ * X consecutive connection failures to Netdata agent occur.
+
+ ***Recovery steps taken by EMS server:***
+
+ * SSH to node (assuming it is a VM)
+ * Kill Netdata (if it is still running)
+ * Launch Netdata
+ * Close SSH connection
+ * Reset the consecutive failures counter.
+
+
+* **3-LEVEL Monitoring topology:** EMS server, Aggregators (one per cluster), and Nodes in clusters exist. Use of clustering.
+
+ * Aggregator will try to recover any *Normal node* that leaves the cluster and not joins back in a configured period of time.
+
+ ***Condition:***
+
+ * EMS client leaves cluster and not joins back after X seconds
+
+ ***Recovery steps taken by Aggregators:***
+
+ * Contact EMS server to get node's credentials
+ * SSH to node (assuming it is a VM)
+ * Kill EMS client (if it is still running)
+ * Launch EMS client
+ * Close SSH connection
+ * Wait for a configured period of time for EMS client to join back to cluster
+ * After that period of time the process is repeated (up to a configured number of retries, and then it gives up and notifies EMS server)
+ * When EMS client joins to cluster or in case of giving up, the node credentials are cleared from Aggregator's cache.
+
+ * Aggregator will try to recover any *RL node* with inaccessible Netdata agent.
+
+ ***Condition:***
+
+ * X consecutive connection failures to Netdata agent occur.
+
+ ***Recovery steps taken by Aggregators:***
+
+ * Contact EMS server to get node's credentials
+ * SSH to node (assuming it is a VM)
+ * Kill Netdata agent (if it is still running)
+ * Launch Netdata agent
+ * Close SSH connection
+ * Reset the consecutive failures counter
+ * On successful connection to Netdata agent the node credentials are cleared from Aggregator cache.
+
+
+* **2-LEVEL or 3-LEVEL Monitoring topology**
+
+ * Any Normal node will try to recover its Local Netdata agent, if it becomes inaccessible.
+
+ ***Condition:***
+
+ * X consecutive connection failures to Local Netdata agent occur.
+
+ ***Recovery steps (taken by NORMAL node):***
+
+ * Kill Netdata agent (if it is still running)
+ * Launch Netdata agent
+ * Reset the consecutive failures counter
+
+
+
+### Test Cases for 2-LEVEL topology
+
+> ***PREREQUISITE:***
+>
+> You need a CAMEL model with a 2-LEVEL monitoring topology:
+>
+> * with two Requirement Sets:
+> - for Normal nodes: 4 cores, 4GB RAM, >1 GB Disk, and
+> - for RL nodes: 1-2 cores, or <2GB RAM, or <1GB Disk
+> * with 1-2 components with Requirement Set #1 (Normal nodes)
+> * with 1-2 components with Requirement Set #2 (RL nodes)
+> * with no Groupings used in Metric Model.
+>
+> This CAMEL model is ***common*** to the following test cases, unless another CAMEL model is specified.
+>
+> CAMEL model MUST be re-deployed after each test case execution.
+
+
+
+**B.1.a) Successful recovery of an EMS client in a Normal node**
+
+> Test Case Quick Notes:
+> - Kill EMS client of any Normal node.
+> - The EMS server will recover the killed EMS client after a configured period of time.
+> - Check EMS server logs for disconnection, recovery actions and re-connection messages.
+
+**After Application deployment...**
+
+ * Connect to a Normal node and ***kill*** EMS client
+
+**Next, check the logs of:**
+
+ * ***EMS server***, for messages reporting an EMS client disconnection, the recovery attempt(s) and EMS client re-connection.
+
+ *
EMS server log: An EMS client disconnected
*
+ ```
+ e.m.e.b.server.ClientShellCommand : #00000==> Signaling client to exit
+ e.m.e.b.server.ClientShellCommand : #00000--> Thread stops
+ e.m.e.b.s.coordinator.NoopCoordinator : TwoLevelCoordinator: unregister(): Method invoked. CSC: ClientShellCommand_#00000
+ e.m.e.b.s.c.TwoLevelCoordinator : TwoLevelCoordinator: --------------------------------------------------
+ e.m.e.b.s.c.TwoLevelCoordinator : TwoLevelCoordinator: Client unregistered: #00000 @ 172.29.0.3
+ e.m.e.b.c.s.ClientRecoveryPlugin : ClientRecoveryPlugin: processExitEvent(): client-id=#00000, client-address=172.29.0.3
+ ```
+ *EMS server log: EMS client recovery actions
*
+ ```
+ e.m.e.b.c.s.ClientRecoveryPlugin : ClientRecoveryPlugin: runClientRecovery(): Starting client recovery: node-info=NodeRegistryEntry(ipAddress=172.29.0.3, clientId=VM-UBUNTU-vm1-vm1-AWS-vm1-85499eeb-14bc-481d-9c42-eac879845450, baguetteServer=eu.melodi
+ o.a.s.c.k.AcceptAllServerKeyVerifier : Server at /172.29.0.3:22 presented unverified EC key: SHA256:gNU4ScwysUpv050SaorPj7zlZrkiyGq4YSsOGBl+DCk
+ e.m.e.b.c.install.SshClientInstaller : SshClientInstaller: Task #0: Session will be recorded in file: /logs/172.29.0.3-22-2022.02.16.09.33.31.121-0.txt
+ e.m.e.b.c.install.SshClientInstaller : SshClientInstaller: Connected to remote host: task #0: host: 172.29.0.3:22
+ e.m.e.b.c.install.SshClientInstaller :
+ ----------------------------------------------------------------------
+ Task #0 : Instruction Set: Restarting Baguette agent at VM node
+ e.m.e.b.c.install.SshClientInstaller : SshClientInstaller: Task #0: Executing installation instructions set: Restarting Baguette agent at VM node
+ e.m.e.b.c.install.SshClientInstaller : SshClientInstaller: Task #0: Executing instruction 1/2: Killing previous EMS client process
+ e.m.e.b.c.install.SshClientInstaller : SshClientInstaller: Task #0: EXEC: /opt/baguette-client/bin/kill.sh
+ o.a.s.c.session.ClientConnectionService : globalRequest(ClientConnectionService[ClientSessionImpl[ubuntu@/172.29.0.3:22]])[hostkeys-00@openssh.com, want-reply=false] failed (SshException) to process: EdDSA provider not supported
+ e.m.e.b.c.install.SshClientInstaller : SshClientInstaller: Task #0: EXEC: exit-status=0
+ e.m.e.b.c.install.SshClientInstaller : SshClientInstaller: Task #0: Executing instruction 2/2: Starting new EMS client process
+ e.m.e.b.c.install.SshClientInstaller : SshClientInstaller: Task #0: EXEC: /opt/baguette-client/bin/run.sh
+ e.m.e.b.c.install.SshClientInstaller : SshClientInstaller: Task #0: EXEC: exit-status=0
+ e.m.e.b.c.install.SshClientInstaller : SshClientInstaller: Task #0: Installation Instructions set succeeded: Restarting Baguette agent at VM node
+ e.m.e.b.c.install.SshClientInstaller :
+ -------------------------------------------------------------------------
+ Task #0 : Instruction sets processed: successful=1, failed=0, exit-result=SUCCESS
+ e.m.e.b.c.install.SshClientInstaller : SshClientInstaller: Disconnected from remote host: task #0: host: 172.29.0.3:22
+ e.m.e.b.c.install.SshClientInstaller : SshClientInstaller: Task completed successfully #0
+ e.m.e.b.c.s.ClientRecoveryPlugin : ClientRecoveryPlugin: runClientRecovery(): Client recovery completed: result=true, node-info=NodeRegistryEntry(ipAddress=172.29.0.3, clientId=VM-UBUNTU-vm1-vm1-AWS-vm1-85499eeb-14bc-481d-9c42-eac879845450, baguetteSe
+ ```
+ *EMS server log: EMS client reconnected
*
+ ```
+ o.a.s.s.session.ServerUserAuthService : Session user-bbb5b809-3296-485c-a605-cc8bae646bbb@/172.29.0.3:39696 authenticated
+ e.m.e.b.server.ClientShellCommand : #00001--> Got session : ServerSessionImpl[user-bbb5b809-3296-485c-a605-cc8bae646bbb@/172.29.0.3:39696]
+ e.m.e.b.server.ClientShellCommand : #00001==> Thread started
+ e.m.e.b.server.ClientShellCommand : #00001--> Client Id: VM-UBUNTU-vm1-vm1-AWS-vm1-85499eeb-14bc-481d-9c42-eac879845450
+ e.m.e.b.server.ClientShellCommand : #00001--> Broker URL: ssl://172.29.0.3:61617?daemon=true&trace=false&useInactivityMonitor=false&connectionTimeout=0&keepAlive=true
+ e.m.e.b.server.ClientShellCommand : #00001--> Broker Username: user-local-Q1mnKfNgzM
+ e.m.e.b.server.ClientShellCommand : #00001--> Broker Password: xityAHGDhIiVeAxJdfax
+ e.m.e.b.server.ClientShellCommand : #00001--> Broker Cert.: -----BEGIN CERTIFICATE-----
+ .........................
+ -----END CERTIFICATE-----
+ e.m.e.b.server.ClientShellCommand : #00001--> Adding/Replacing client certificate in Truststore: alias=172.29.0.3
+ e.m.e.b.server.ClientShellCommand : #00001--> Added/Replaced client certificate in Truststore: alias=172.29.0.3, CN=C=GR, ST=Attika, L=Athens, O=Institute of Communication and Computer Systems (ICCS), OU=Information Management Unit (IMU), CN=172.29.0.3, certificate-na
+ e.m.e.b.s.coordinator.NoopCoordinator : TwoLevelCoordinator: register(): Method invoked. CSC: ClientShellCommand_#00001
+ e.m.e.b.s.c.TwoLevelCoordinator : TwoLevelCoordinator: --------------------------------------------------
+ e.m.e.b.s.c.TwoLevelCoordinator : TwoLevelCoordinator: Sending grouping configurations to client #00001...
+ .........................
+ e.m.e.b.server.ClientShellCommand : sendGroupingConfiguration: Serialization of Grouping configuration for PER_INSTANCE: rO0ABXNyACt.........................
+ e.m.e.b.server.ClientShellCommand : #00001==> PUSH : SET-GROUPING-CONFIG rO0ABXNyACt.........................
+ e.m.e.b.s.c.TwoLevelCoordinator : TwoLevelCoordinator: Sending grouping configurations to client #00001... done
+ e.m.e.b.s.c.TwoLevelCoordinator : TwoLevelCoordinator: --------------------------------------------------
+ e.m.e.b.s.c.TwoLevelCoordinator : TwoLevelCoordinator: Setting active grouping of client #00001: PER_INSTANCE
+ e.m.e.b.server.ClientShellCommand : #00001==> PUSH : SET-ACTIVE-GROUPING PER_INSTANCE
+ e.m.e.b.s.c.TwoLevelCoordinator : TwoLevelCoordinator: --------------------------------------------------
+ e.m.e.b.server.ClientShellCommand : #00001--> Client grouping changed: null --> PER_INSTANCE
+ ```
+ * ***Normal node where EMS client killed***, for EMS client's logs indicating its restart.
+ *Normal node: EMS client restarts
*
+ ```
+ Starting baguette client...
+ MELODIC_CONFIG_DIR=/opt/baguette-client/conf
+ LOG_FILE=/opt/baguette-client/logs/output.txt
+ ____ _ _ _____ _ _ _
+ | _ \ | | | | / ____| (_) | |
+ | |_) | __ _ __ _ _ _ ___| |_| |_ ___ | | | |_ ___ _ __ | |_
+ | _ < / _` |/ _` | | | |/ _ \ __| __/ _ \ | | | | |/ _ \ '_ \| __|
+ | |_) | (_| | (_| | |_| | __/ |_| || __/ | |____| | | __/ | | | |_
+ |____/ \__,_|\__, |\__,_|\___|\__|\__\___| \_____|_|_|\___|_| |_|\__|
+ __/ |
+ |___/
+ Starting BaguetteClient v4.5.0-SNAPSHOT on 21845bcaf772 with PID 779 (/opt/baguette-client/jars/baguette-client-4.5.0-SNAPSHOT.jar started by ubuntu in /opt/baguette-client)
+ No active profile set, falling back to default profiles: default
+ loadCachedClientId: Used cached Client Id: null
+ Password encoder class name is empty. Default instance of PasswordEncoder will be created
+ .........................
+ Collectors::Netdata: Collecting metrics from local node...
+ Collectors::Netdata: Collecting data from url: http://127.0.0.1:19999/api/v1/allmetrics?format=json
+ Collectors::Netdata: Metrics: extracted=0, published=0, failed=0
+ .........................
+ ```
+ * ***Other Normal nodes***, for NO logs indicating failure or recovery attempts.
+
+
+
+**B.1.b) Failed recovery of EMS client in a Normal node**
+
+> Test Case Quick Notes:
+> - Kill the VM of any Normal node.
+> - The EMS server will try to connect to the affected VM but fail.
+> - After a configured number of retries EMS server will give up.
+
+**After Application deployment...**
+
+ * Terminate the VM of a Normal node
+
+**Next, check the logs of:**
+
+ * ***EMS server***, for messages reporting an EMS client disconnection, failed recovery attempts and giving up recovery
+
+ *EMS server log: An EMS client disconnected
*
+ ```
+ e.m.e.b.server.ClientShellCommand : #00001==> Signaling client to exit
+ e.m.e.b.server.ClientShellCommand : #00001--> Thread stops
+ e.m.e.b.s.coordinator.NoopCoordinator : TwoLevelCoordinator: unregister(): Method invoked. CSC: ClientShellCommand_#00001
+ e.m.e.b.s.c.TwoLevelCoordinator : TwoLevelCoordinator: --------------------------------------------------
+ e.m.e.b.s.c.TwoLevelCoordinator : TwoLevelCoordinator: Client unregistered: #00001 @ 172.29.0.3
+ e.m.e.b.c.s.ClientRecoveryPlugin : ClientRecoveryPlugin: processExitEvent(): client-id=#00001, client-address=172.29.0.3
+ ```
+ *EMS server log: EMS client recovery actions and give up message
*
+ ```
+ e.m.e.b.c.s.ClientRecoveryPlugin : ClientRecoveryPlugin: runClientRecovery(): Starting client recovery: node-info=NodeRegistryEntry(ipAddress=172.29.0.3, clientId=VM-UBUNTU-vm1-vm1-AWS-vm1-85499eeb-14bc-481d-9c42-eac879845450, baguetteServer=eu.melodi
+ e.m.e.b.c.install.SshClientInstaller : SshClientInstaller: Error while connecting to remote host: task #0:
+ java.net.NoRouteToHostException: No route to host
+ at sun.nio.ch.UnixAsynchronousSocketChannelImpl.checkConnect(Native Method)
+ at sun.nio.ch.UnixAsynchronousSocketChannelImpl.finishConnect(UnixAsynchronousSocketChannelImpl.java:252)
+ at sun.nio.ch.UnixAsynchronousSocketChannelImpl.finish(UnixAsynchronousSocketChannelImpl.java:198)
+ at sun.nio.ch.UnixAsynchronousSocketChannelImpl.onEvent(UnixAsynchronousSocketChannelImpl.java:213)
+ at sun.nio.ch.EPollPort$EventHandlerTask.run(EPollPort.java:293)
+ at java.lang.Thread.run(Thread.java:748)
+ e.m.e.b.c.install.SshClientInstaller : SshClientInstaller: Failed executing task #0, Exception:
+ java.net.NoRouteToHostException: No route to host
+ at sun.nio.ch.UnixAsynchronousSocketChannelImpl.checkConnect(Native Method)
+ at sun.nio.ch.UnixAsynchronousSocketChannelImpl.finishConnect(UnixAsynchronousSocketChannelImpl.java:252)
+ at sun.nio.ch.UnixAsynchronousSocketChannelImpl.finish(UnixAsynchronousSocketChannelImpl.java:198)
+ at sun.nio.ch.UnixAsynchronousSocketChannelImpl.onEvent(UnixAsynchronousSocketChannelImpl.java:213)
+ at sun.nio.ch.EPollPort$EventHandlerTask.run(EPollPort.java:293)
+ at java.lang.Thread.run(Thread.java:748)
+ .........................
+ .........................
+ e.m.e.b.c.install.SshClientInstaller : SshClientInstaller: Retry 5/5 executing task #0
+ e.m.e.b.c.install.SshClientInstaller : SshClientInstaller: Error while connecting to remote host: task #0:
+ java.net.NoRouteToHostException: No route to host
+ at sun.nio.ch.UnixAsynchronousSocketChannelImpl.checkConnect(Native Method)
+ at sun.nio.ch.UnixAsynchronousSocketChannelImpl.finishConnect(UnixAsynchronousSocketChannelImpl.java:252)
+ at sun.nio.ch.UnixAsynchronousSocketChannelImpl.finish(UnixAsynchronousSocketChannelImpl.java:198)
+ at sun.nio.ch.UnixAsynchronousSocketChannelImpl.onEvent(UnixAsynchronousSocketChannelImpl.java:213)
+ at sun.nio.ch.EPollPort$EventHandlerTask.run(EPollPort.java:293)
+ at java.lang.Thread.run(Thread.java:748)
+ e.m.e.b.c.install.SshClientInstaller : SshClientInstaller: Failed executing task #0, Exception:
+ java.net.NoRouteToHostException: No route to host
+ at sun.nio.ch.UnixAsynchronousSocketChannelImpl.checkConnect(Native Method)
+ at sun.nio.ch.UnixAsynchronousSocketChannelImpl.finishConnect(UnixAsynchronousSocketChannelImpl.java:252)
+ at sun.nio.ch.UnixAsynchronousSocketChannelImpl.finish(UnixAsynchronousSocketChannelImpl.java:198)
+ at sun.nio.ch.UnixAsynchronousSocketChannelImpl.onEvent(UnixAsynchronousSocketChannelImpl.java:213)
+ at sun.nio.ch.EPollPort$EventHandlerTask.run(EPollPort.java:293)
+ at java.lang.Thread.run(Thread.java:748)
+
+ e.m.e.b.c.install.SshClientInstaller : SshClientInstaller: Giving up executing task #0 after 5 retries
+ e.m.e.b.c.s.ClientRecoveryPlugin : ClientRecoveryPlugin: runClientRecovery(): Client recovery completed: result=false, node-info=NodeRegistryEntry(ipAddress=172.29.0.3, clientId=VM-UBUNTU-vm1-vm1-AWS-vm1-85499eeb-14bc-481d-9c42-eac879845450, baguetteS
+ ```
+ * ***Normal nodes that operate***, for NO logs indicating any failure or recovery attempts
+
+
+
+**B.2.a) Successful recovery of a Netdata agent in a RL node**
+
+> Test Case Quick Notes:
+> - Kill Netdata agent of any RL node.
+> - The EMS server will recover the killed Netdata agent after a configured period of time.
+> - Check EMS server log messages reporting failures to collect metrics, recovery actions, and successful metrics collection.
+
+**After Application deployment...**
+
+ * Connect to a RL node and kill Netdata agent.
+
+ *EMS server log: Failed metric collection attempts from a Netdata agent
*
+ ```
+ ......................... Not yet implemented
+ ```
+
+**Next, check the logs of:**
+
+ * ***EMS server***, for logs reporting connection failure to a Netdata agent, and recovery actions.
+
+ *EMS server log: Netdata agent recovery actions
*
+ ```
+ ......................... Not yet implemented
+ ```
+ * ***RL node with killed Netdata***, check if the Netdata processes have started again.
+ *RL node shell: Recovered Netdata agent process
*
+ ```
+ ......................... Not yet implemented
+ ```
+ * ***Normal nodes (that operate)***, for NO Logs indicating failure or recovery attempts.
+
+
+
+**B.2.b) Failed recovery of a Netdata agent in a RL node**
+
+> Test Case Quick Notes:
+> - Kill the VM of any RL node.
+> - The EMS server will try to connect to the affected VM but fail.
+> - After a configured number of retries EMS server will give up.
+
+**After Application deployment...**
+
+ * Terminate the VM of a RL node
+
+**You need to check the logs of:**
+
+ * ***EMS server***, for logs reporting connection failure to a Netdata agent, and then a number of failed attempts to connect to VM.
+
+ *EMS server log: Failed metric collection attempts from a Netdata agent
*
+ ```
+ ......................... Not yet implemented
+ ```
+ *EMS server log: Failed Netdata agent recovery actions and give up message
*
+ ```
+ ......................... Not yet implemented
+ ```
+ * ***Normal nodes (that operate)***, for NO logs indicating connection failures or recovery actions.
+
+
+
+**B.3) Successful recovery of a Netdata agent in a Normal node**
+
+> Test Case Quick Notes:
+> - Kill Netdata agent of any Normal node.
+> - The EMS client of the node will recover the killed Netdata agent after a configured period of time.
+> - Check EMS client's logs for messages reporting failures to collect metrics, recovery actions, and successful metrics collection.
+
+**After Application deployment...**
+
+ * Connect to a Normal node and kill Netdata agent.
+
+**Next, check the logs of:**
+
+ * ***EMS server***, for No log messages indicating connection failures to Netdata, or recovery actions.
+ * ***Normal node with killed Netdata***, check if the Netdata processes have started again. Also check EMS client's log messages reporting failed metric collections, recovery actions, and successful metric collection.
+
+ *Normal node - EMS client log: Failed attempts to collect metrics from Local Netdata agent
*
+ ```
+ Collectors::Netdata: Collecting metrics from local node...
+ Collectors::Netdata: Collecting data from url: http://127.0.0.1:19999/api/v1/allmetrics?format=json
+ Collectors::Netdata: Exception while collecting metrics from node: , #errors=1, exception: org.springframework.web.client.ResourceAccessException: I/O error on GET request for "http://127.0.0.1:19999/api/v1/allmetrics": Connection refused (Connection refused); nested exception is java.net.ConnectException: Connection refused (Connection refused) -> java.net.ConnectException: Connection refused (Connection refused)
+
+ Collectors::Netdata: Collecting metrics from local node...
+ Collectors::Netdata: Collecting data from url: http://127.0.0.1:19999/api/v1/allmetrics?format=json
+ Collectors::Netdata: Exception while collecting metrics from node: , #errors=2, exception: org.springframework.web.client.ResourceAccessException: I/O error on GET request for "http://127.0.0.1:19999/api/v1/allmetrics": Connection refused (Connection refused); nested exception is java.net.ConnectException: Connection refused (Connection refused) -> java.net.ConnectException: Connection refused (Connection refused)
+
+ Collectors::Netdata: Collecting metrics from local node...
+ Collectors::Netdata: Collecting data from url: http://127.0.0.1:19999/api/v1/allmetrics?format=json
+ Collectors::Netdata: Exception while collecting metrics from node: , #errors=3, exception: org.springframework.web.client.ResourceAccessException: I/O error on GET request for "http://127.0.0.1:19999/api/v1/allmetrics": Connection refused (Connection refused); nested exception is java.net.ConnectException: Connection refused (Connection refused) -> java.net.ConnectException: Connection refused (Connection refused)
+ Collectors::Netdata: Too many consecutive errors occurred while attempting to collect metrics from node: , num-of-errors=3
+ Collectors::Netdata: Will pause metrics collection from node for 60 seconds:
+ SelfHealingPlugin: createRecoveryTask(): Created recovery task for Node: id=null, address=
+ ```
+ *Normal node - EMS client log: Local Netdata agent recovery actions
*
+ ```
+ SelfHealingPlugin: Retry #0: Recovering node: id=null, address=
+ ShellRecoveryTask: runNodeRecovery(): Executing 3 recovery commands
+ ############## Initial wait......
+ ############## Waiting for 5000ms after Initial wait......
+ ############## Sending Netdata agent kill command......
+ ############## Waiting for 2000ms after Sending Netdata agent kill command......
+ ############## Sending Netdata agent start command......
+ ############## Waiting for 10000ms after Sending Netdata agent start command......
+ ShellRecoveryTask: runNodeRecovery(): Executed 3 recovery commands
+ Collectors::Netdata: Collecting metrics from local node...
+ Collectors::Netdata: Node is in ignore list:
+ OUT> /opt/baguette-client
+ ERR> -U: 1: -U: Syntax error: Unterminated quoted string
+ ERR> 2022-02-16 10:23:29: netdata INFO : MAIN : CONFIG: cannot load cloud config '/var/lib/netdata/cloud.d/cloud.conf'. Running with internal defaults.
+ ```
+ *Normal node - EMS client log: Successful metrics collection from Local Netdata agent
*
+ ```
+ Collectors::Netdata: Collecting metrics from local node...
+ Collectors::Netdata: Node is in ignore list:
+ Collectors::Netdata: Collecting metrics from local node...
+ Collectors::Netdata: Node is in ignore list:
+ Collectors::Netdata: Collecting metrics from local node...
+ Collectors::Netdata: Node is in ignore list:
+
+ Collectors::Netdata: Resumed metrics collection from node:
+ SelfHealingPlugin: cancelRecoveryTask(): Cancelled recovery task for Node: id=null, address=
+ Collectors::Netdata: Collecting metrics from local node...
+ Collectors::Netdata: Collecting data from url: http://127.0.0.1:19999/api/v1/allmetrics?format=json
+ Collectors::Netdata: Metrics: extracted=0, published=0, failed=0
+ ```
+ * ***Normal nodes (that operate)***, for NO logs indicating connection failures or recovery actions.
+
+
+
+### Test Cases for 3-LEVEL topology
+
+> ***PREREQUISITE:***
+>
+> You need a CAMEL model for 3-LEVEL topology:
+>
+> * with two Requirement Sets:
+> - for Normal nodes: 4 cores, 4GB RAM, >1 GB Disk, and
+> - for RL nodes: 1-2 cores, or <2GB RAM, or <1GB Disk,
+> * with 1-2 COMPONENTS with Requirement Set #1 (Normal nodes)
+> * with 1-2 COMPONENTS with Requirement Set #2 (RL nodes)
+> * with three (3) Groupings used in the Metric Model (`GLOBAL`, `PER_ZONE`, `PER_INSTANCE`).
+>
+> This CAMEL model is ***common*** to the following test cases, unless another CAMEL model is specified.
+>
+> CAMEL model MUST be re-deployed after each test case execution.
+
+
+
+**B.4.a) Successful recovery of an EMS client in a clustered Normal node**
+
+> Test Case Quick Notes:
+> - Kill EMS client of any Normal node except the Aggregator.
+> - The Aggregator will recover the killed EMS client after a configured period of time.
+> - Check Aggregator log messages for node leaving cluster, recovery actions, and node joining back.
+
+**After Application deployment...**
+
+ * Connect to a Normal node, except Aggregator, and ***kill*** EMS client
+
+**Next, check the logs of:**
+
+ * ***EMS server***, for Aggregator's query for node credentials.
+ *EMS server log: Aggregator queries for node's credentials
*
+ ```
+ e.m.e.b.server.ClientShellCommand : #00000==> PUSH : {"random":"cecab3d4-4c09-43b1-b6fa-3534d37bbc8f","zone-id":"IMU-ZONE","address":"192.168.16.4","provider":"AWS","name":"vm2","ssh.port":"22","ssh.username":"ubuntu","ssh.password":"ubuntu","id":"vm2","type":"VM","operatingSystem":"UBUNTU","CLIENT_ID":"VM-UBUNTU-vm2-vm2-AWS-vm2-cecab3d4-4c09-43b1-b6fa-3534d37bbc8f",.........................
+ ```
+ Note: EMS client disconnection from EMS server will also be logged in EMS server logs, but no recovery action will be taken by EMS server.
+
+ * ***Aggregator***, for log messages about, (i) EMS client leaving cluster, (ii) recovery actions, and (iii) EMS client joining back to the cluster.
+ *Aggregator log: An EMS client left cluster
*
+ ```
+ CLM: MEMBER_REMOVED: node=node_3866738cb0f4_2002
+ BRU: Brokers after cluster change: [Member{id=node_581d745be52c_2001, address=192.168.16.3:2001, properties={aggregator-connection-configuration=eyJncm91cGluZyI6I.........................
+ SEND: SERVER-GET-NODE-SSH-CREDENTIALS 192.168.16.4
+ SelfHealingPlugin: createRecoveryTask(): Created recovery task for Node: id=node_3866738cb0f4_2002, address=192.168.16.4
+ ```
+ *Aggregator log: EMS client recovery actions
*
+ ```
+ SelfHealingPlugin: Retry #0: Recovering node: id=node_3866738cb0f4_2002, address=192.168.16.4
+ VmNodeRecoveryTask: connectToNode(): Connecting to node using SSH: address=192.168.16.4, port=22, username=ubuntu
+ Connecting to server...
+ SSH client is ready
+ VmNodeRecoveryTask: runNodeRecovery(): Executing 3 recovery commands
+ ############## Initial wait......
+ ############## Waiting for 5000ms after Initial wait......
+ ############## Sending baguette client kill command......
+ ############## Waiting for 2000ms after Sending baguette client kill command......
+ ############## Sending baguette client start command......
+ ############## Waiting for 10000ms after Sending baguette client start command......
+ SET-CLIENT-CONFIG rO0ABXNyAClldS5tZWxvZGljLmV2ZW50LnV0aWwuQ2xpZW50Q29uZmlndXJhdGlvbiAe4raCjfZzAgABTAASbm9kZXNXaXRob3V0Q2xpZW50dAAPTGphdmEvdXRpbC9TZXQ7eHBzcgARamF2YS51dGlsLkhhc2hTZXS6RIWVlri3NAMAAHhwdwwAAAAQP0AAAAAAAAB4
+ New client config.: ClientConfiguration(nodesWithoutClient=[])
+ VmNodeRecoveryTask: runNodeRecovery(): Executed 3 recovery commands
+ VmNodeRecoveryTask: disconnectFromNode(): Disconnecting from node: address=192.168.16.4, port=22, username=ubuntu
+ Stopping SSH client...
+ SSH client stopped
+ OUT> Last login: Sat Feb 12 10:40:09 2022 from 172.29.0.4
+ OUT>
+ OUT> pwd
+ OUT> ubuntu@3866738cb0f4:~$ pwd
+ OUT> /home/ubuntu
+ OUT> ubuntu@3866738cb0f4:~$ /opt/baguette-client/bin/kill.sh
+ OUT> Baguette client is not running
+ OUT> ubuntu@3866738cb0f4:~$ /opt/baguette-client/bin/run.sh
+ OUT> Starting baguette client...
+ OUT> MELODIC_CONFIG_DIR=/opt/baguette-client/conf
+ OUT> LOG_FILE=/opt/baguette-client/logs/output.txt
+ OUT> Baguette client PID: 973
+ VmNodeRecoveryTask: redirectSshOutput(): Connection closed: id=OUT
+ Collectors::Netdata: Collecting metrics from local node...
+ Collectors::Netdata: Collecting data from url: http://127.0.0.1:19999/api/v1/allmetrics?format=json
+ Collectors::Netdata: Metrics: extracted=0, published=0, failed=0
+ ```
+ *Aggregator log: EMS client joined back to cluster
*
+ ```
+ CLM: MEMBER_ADDED: node=node_3866738cb0f4_2002
+ BRU: Brokers after cluster change: [Member{id=node_581d745be52c_2001, address=192.168.16.3:2001, properties={aggregator-connection-configuration=eyJncm91cGluZyI6I.........................
+ SelfHealingPlugin: cancelRecoveryTask(): Cancelled recovery task for Node: id=node_3866738cb0f4_2002, address=192.168.16.4
+ ```
+ * ***Normal node whose EMS client killed***, for EMS client's logs indicating its restart.
+ *Normal node: EMS client restarts
*
+ ```
+ Starting baguette client...
+ MELODIC_CONFIG_DIR=/opt/baguette-client/conf
+ LOG_FILE=/opt/baguette-client/logs/output.txt
+ ____ _ _ _____ _ _ _
+ | _ \ | | | | / ____| (_) | |
+ | |_) | __ _ __ _ _ _ ___| |_| |_ ___ | | | |_ ___ _ __ | |_
+ | _ < / _` |/ _` | | | |/ _ \ __| __/ _ \ | | | | |/ _ \ '_ \| __|
+ | |_) | (_| | (_| | |_| | __/ |_| || __/ | |____| | | __/ | | | |_
+ |____/ \__,_|\__, |\__,_|\___|\__|\__\___| \_____|_|_|\___|_| |_|\__|
+ __/ |
+ |___/
+ Starting BaguetteClient v4.5.0-SNAPSHOT on 3866738cb0f4 with PID 973 (/opt/baguette-client/jars/baguette-client-4.5.0-SNAPSHOT.jar started by ubuntu in /opt/baguette-client)
+ No active profile set, falling back to default profiles: default
+ loadCachedClientId: Used cached Client Id: null
+ Password encoder class name is empty. Default instance of PasswordEncoder will be created
+ PasswordUtil.setPasswordEncoder(): PasswordEncoder set to: eu.melodic.event.util.password.AsterisksPasswordEncoder
+ PasswordUtil: Initialized default Password Encoder: eu.melodic.event.util.password.AsterisksPasswordEncoder
+ BrokerConfig.initializeKeyAndCert(): Initializing keystore, truststore and certificate for Broker-SSL...
+ KeystoreUtil.initializeKeystoresAndCertificate(): Initializing keystores and certificate
+ BrokerConfig.initializeKeyAndCert(): Initializing keystore, truststore and certificate for Broker-SSL... done
+ BrokerConfig: Creating new Broker Service instance: url=ssl://0.0.0.0:61617
+ .........................
+ .........................
+ CLUSTER-JOIN IMU-ZONE GLOBAL:PER_ZONE:PER_INSTANCE start-election=true 192.168.16.4:2002 192.168.16.3:2001
+ CLUSTER-JOIN ARGS: cluster-id=IMU-ZONE, groupings=GLOBAL:PER_ZONE:PER_INSTANCE, local-node=192.168.16.4:2002, other-nodes=[192.168.16.3:2001]
+ CLUSTER-JOIN ARGS: Groupings: global=GLOBAL, aggregator=PER_ZONE, node=PER_INSTANCE
+ CLM: Local address used for building Atomix: 192.168.16.4:2002
+ CLM: Building Atomix: Other members: [Node{id=node_3866738cb0f4_2001, address=192.168.16.3:2001}]
+ .........................
+ .........................
+ CLUSTER-EXEC broker list
+ Cluster executes command: broker list
+ CLI: Node status and scores:
+ CLI: node_581d745be52c_2001 [AGGREGATOR, 0.6640625, 9e790362-704c-4d9e-aa74-77f76e297816]
+ CLI: node_3866738cb0f4_2002 [CANDIDATE, 0.6640625, 44a5afb7-890a-4090-9f80-c65f046aeddd]
+ Collectors::Netdata: Collecting metrics from local node...
+ Collectors::Netdata: Collecting data from url: http://127.0.0.1:19999/api/v1/allmetrics?format=json
+ Collectors::Netdata: Metrics: extracted=0, published=0, failed=0
+ ```
+ * ***Other Normal nodes***, for logs about, (i) EMS client leaving cluster, (ii) EMS client joining to cluster, but NO logs about recovery actions.
+
+
+
+**B.4.b) Failed recovery of an EMS client in a clustered Normal node**
+
+> Test Case Quick Notes:
+> - Kill the VM of any Normal node, except Aggregator.
+> - The Aggregator will try to connect to the affected VM but fail.
+> - After a configured number of retries Aggregator will give up.
+
+**After Application deployment...**
+
+ * Terminate the VM of a Normal node, except the Aggregator's
+
+**Next, check the logs of:**
+
+ * ***EMS server***, for a recovery Give up message from Aggregator
+ *EMS server log: Aggregator queries for node's credentials
*
+ ```
+ e.m.e.b.server.ClientShellCommand : #00000==> PUSH : {"random":"cecab3d4-4c09-43b1-b6fa-3534d37bbc8f","zone-id":"IMU-ZONE","address":"192.168.16.4","provider":"AWS","name":"vm2","ssh.port":"22","ssh.username":"ubuntu","ssh.password":"ubuntu","id":"vm2","type":"VM","operatingSystem":"UBUNTU","CLIENT_ID":"VM-UBUNTU-vm2-vm2-AWS-vm2-cecab3d4-4c09-43b1-b6fa-3534d37bbc8f",.........................
+ ```
+ *EMS server log: Aggregator give up message
*
+ ```
+ ......................... BUG: No Give up message
+ ```
+ Note: EMS client disconnection from EMS server will also be logged in EMS server logs, but no recovery action will be taken by EMS server.
+
+ * ***Aggregator***, for messages reporting, (i) an EMS client left cluster, (ii) a number of failed connection attempts to the VM, and (iii) a recovery give up message.
+ *Aggregator log: An EMS client left cluster
*
+ ```
+ CLM: MEMBER_REMOVED: node=node_3866738cb0f4_2002
+ BRU: Brokers after cluster change: [Member{id=node_581d745be52c_2001, address=192.168.16.3:2001, properties={aggregator-connection-configuration=eyJncm91cGluZyI6I.........................
+ SEND: SERVER-GET-NODE-SSH-CREDENTIALS 192.168.16.4
+ SelfHealingPlugin: createRecoveryTask(): Created recovery task for Node: id=node_3866738cb0f4_2002, address=192.168.16.4
+ ```
+ *Aggregator log: EMS client recovery actions and give up message
*
+ ```
+ SelfHealingPlugin: Retry #0: Recovering node: id=node_3866738cb0f4_2002, address=192.168.16.4
+ VmNodeRecoveryTask: connectToNode(): Connecting to node using SSH: address=192.168.16.4, port=22, username=ubuntu
+ Connecting to server...
+ SelfHealingPlugin: EXCEPTION while recovering node: node-info={random=cecab3d4-4c09-43b1-b6fa-3534d37bbc8f, zone-id=IMU-ZONE, address=192.168.16.4,.........................
+ java.net.NoRouteToHostException: No route to host
+ at sun.nio.ch.UnixAsynchronousSocketChannelImpl.checkConnect(Native Method)
+ at sun.nio.ch.UnixAsynchronousSocketChannelImpl.finishConnect(UnixAsynchronousSocketChannelImpl.java:252)
+ at sun.nio.ch.UnixAsynchronousSocketChannelImpl.finish(UnixAsynchronousSocketChannelImpl.java:198)
+ at sun.nio.ch.UnixAsynchronousSocketChannelImpl.onEvent(UnixAsynchronousSocketChannelImpl.java:213)
+ at sun.nio.ch.EPollPort$EventHandlerTask.run(EPollPort.java:293)
+ at java.lang.Thread.run(Thread.java:748)
+
+ SelfHealingPlugin: Retry #0: Recovering node: id=node_3866738cb0f4_2002, address=192.168.16.4
+ VmNodeRecoveryTask: connectToNode(): Connecting to node using SSH: address=192.168.16.4, port=22, username=ubuntu
+ Connecting to server...
+ SelfHealingPlugin: EXCEPTION while recovering node: node-info={random=cecab3d4-4c09-43b1-b6fa-3534d37bbc8f, zone-id=IMU-ZONE, address=192.168.16.4,.........................
+ java.net.NoRouteToHostException: No route to host
+ at sun.nio.ch.UnixAsynchronousSocketChannelImpl.checkConnect(Native Method)
+ at sun.nio.ch.UnixAsynchronousSocketChannelImpl.finishConnect(UnixAsynchronousSocketChannelImpl.java:252)
+ at sun.nio.ch.UnixAsynchronousSocketChannelImpl.finish(UnixAsynchronousSocketChannelImpl.java:198)
+ at sun.nio.ch.UnixAsynchronousSocketChannelImpl.onEvent(UnixAsynchronousSocketChannelImpl.java:213)
+ at sun.nio.ch.EPollPort$EventHandlerTask.run(EPollPort.java:293)
+ at java.lang.Thread.run(Thread.java:748)
+ ```
+ ```
+ ......................... BUG: No Give up message
+ ```
+ * ***Normal nodes that operate***, for logs about EMS client leaving cluster, and NO logs about recovery actions or EMS client joining back.
+
+
+
+**B.5.a) Successful recovery of EMS client of the cluster Aggregator**
+
+> Test Case Quick Notes:
+> - Kill EMS client of the Aggregator.
+> - The cluster nodes will elect a new Aggregator. Check logs of any cluster node.
+> - The new Aggregator will recover the killed EMS client after a configured period of time.
+> - Check new Aggregator log messages for node leaving cluster, being elected as Aggregator, recovery actions, and node joining back.
+> - Old Aggregator will join back as a Normal node.
+
+**After Application deployment...**
+
+ * Connect to the Aggregator node, and ***kill*** EMS client.
+
+**Next, check the logs of:**
+
+ * ***EMS server***, for message about Aggregator change.
+ *EMS server log: A new Aggregator initialized
*
+ ```
+ e.m.e.b.server.ClientShellCommand : #00003--> Client status changed: CANDIDATE --> INITIALIZING
+ e.m.e.b.server.ClientShellCommand : #00003--> Client grouping changed: PER_INSTANCE --> PER_ZONE
+ e.m.e.b.s.c.c.ClusteringCoordinator : Updated aggregator of zone: IMU-ZONE -- New aggregator: #00003 @ 192.168.16.4 (VM-UBUNTU-vm2-vm2-AWS-vm2-cecab3d4-4c09-43b1-b6fa-3534d37bbc8f)
+ e.m.e.b.server.ClientShellCommand : #00003--> Client status changed: INITIALIZING --> AGGREGATOR
+ ```
+ *EMS server log: Aggregator queries for node's credentials
*
+ ```
+ e.m.e.b.server.ClientShellCommand : #00003==> PUSH : {"random":"8a20f11c-eaf2-4b6e-b827-d8a25a57cb0a","zone-id":"IMU-ZONE","address":"192.168.16.3","provider":"AWS",.........................
+ ```
+ Note: Aggregator disconnection from EMS server will also be logged in EMS server logs, but no recovery action will be taken by EMS server.
+
+ * ***New Aggregator***, for log messages about, (i) EMS client leaving cluster, (ii) being elected as Aggregator, (iii) recovery actions, and (iv) EMS client joining to cluster.
+ *New Aggregator log: Old Aggregator left cluster - New Aggregator election
*
+ ```
+ CLM: MEMBER_REMOVED: node=node_581d745be52c_2001
+ BRU: Brokers after cluster change: []
+
+ BRU: Broker election requested: broadcasting election message...
+ BRU: **** Broker message received: election
+ BRU: **** BROKER: Starting Broker election:
+ BRU: Member-Score: node_3866738cb0f4_2002 => 0.6640625 d4f2eb55-c355-4715-8a27-9f7c12c32924
+ BRU: Broker: node_3866738cb0f4_2002
+ ```
+ *New Aggregator log: Initializing to become the new Aggregator
*
+ ```
+ BRU: Node will become Broker. Initializing...
+ NOTIFY-STATUS-CHANGE: INITIALIZING
+ initialize(): Node starts initializing as Aggregator...
+ .........................
+ .........................
+ Notifying Baguette Server i am the new aggregator
+ .........................
+ .........................
+ BRU: Node is ready to act as Aggregator. Ready
+ BRU: **** Broker message received: ready node_3866738cb0f4_2002 New config: eyJncm91cGluZyI6IlBFUl9aT05FIiwidXJsIjoic3NsOi8vMTkyLjE2OC4xNi40OjYxNjE3P2RhZW1vbj10cn.........................
+ BRU: **** BROKER: New Broker is ready: node_3866738cb0f4_2002, New config: eyJncm91cGluZyI6IlBFUl9aT05FIiwidXJsIjoic3NsOi8vMTkyLjE2OC4xNi40OjYxNjE3P2RhZW1vbj10cn.........................
+ BRU: Node configuration updated: eyJncm91cGluZyI6IlBFUl9aT05FIiwidXJsIjoic3NsOi8vMTkyLjE2OC4xNi40OjYxNjE3P2RhZW1vbj10cn.........................
+ ```
+ *New Aggregator log: Requesting old Aggregator node's credentials
*
+ ```
+ SEND: SERVER-GET-NODE-SSH-CREDENTIALS 192.168.16.3
+ SelfHealingPlugin: createRecoveryTask(): Created recovery task for Node: id=node_581d745be52c_2001, address=192.168.16.3
+ ```
+ *New Aggregator log: Recovery actions of old Aggregator
*
+ ```
+ SelfHealingPlugin: Retry #0: Recovering node: id=node_581d745be52c_2001, address=192.168.16.3
+ VmNodeRecoveryTask: connectToNode(): Connecting to node using SSH: address=192.168.16.3, port=22, username=ubuntu
+ Connecting to server...
+ SSH client is ready
+ VmNodeRecoveryTask: runNodeRecovery(): Executing 3 recovery commands
+ ############## Initial wait......
+ ############## Waiting for 5000ms after Initial wait......
+ ############## Sending baguette client kill command......
+ ############## Waiting for 2000ms after Sending baguette client kill command......
+ ############## Sending baguette client start command......
+ ############## Waiting for 10000ms after Sending baguette client start command......
+ SET-CLIENT-CONFIG rO0ABXNyAClldS5tZWxvZGljLmV2ZW50LnV0aWwuQ2xpZW50Q29uZmlndXJhdGlvbiAe4raCjfZzAgABTAASbm9kZXNXaXRob3V0Q2xpZW50dAAPTGphdmEvdXRpbC9TZXQ7eHBzcgARamF2YS51dGlsLkhhc2hTZXS6RIWVlri3NAMAAHhwdwwAAAAQP0AAAAAAAAB4
+ New client config.: ClientConfiguration(nodesWithoutClient=[])
+ VmNodeRecoveryTask: runNodeRecovery(): Executed 3 recovery commands
+ VmNodeRecoveryTask: disconnectFromNode(): Disconnecting from node: address=192.168.16.3, port=22, username=ubuntu
+ Stopping SSH client...
+ SSH client stopped
+ OUT> Last login: Sat Feb 12 10:40:09 2022 from 172.29.0.4
+ OUT>
+ OUT> pwd
+ OUT> ubuntu@581d745be52c:~$ pwd
+ OUT> /home/ubuntu
+ OUT> ubuntu@581d745be52c:~$ /opt/baguette-client/bin/kill.sh
+ OUT> Baguette client is not running
+ OUT> ubuntu@581d745be52c:~$ /opt/baguette-client/bin/run.sh
+ OUT> Starting baguette client...
+ OUT> MELODIC_CONFIG_DIR=/opt/baguette-client/conf
+ OUT> LOG_FILE=/opt/baguette-client/logs/output.txt
+ OUT> Baguette client PID: 1242
+ VmNodeRecoveryTask: redirectSshOutput(): Connection closed: id=OUT
+ ```
+ *New Aggregator log: Old Aggregator joins back to cluster as plain node
*
+ ```
+ CLM: MEMBER_ADDED: node=node_581d745be52c_2001
+ BRU: Brokers after cluster change: [Member{id=node_581d745be52c_2001, address=192.168.16.3:2001, properties={aggregator-connection-configuration=eyJncm91cGluZyI6I.........................
+ SelfHealingPlugin: cancelRecoveryTask(): Cancelled recovery task for Node: id=node_581d745be52c_2001, address=192.168.16.3
+ ```
+ * ***Old Aggregator node whose EMS client killed***, for EMS client's logs indicating its restart (as a `PER_INSTANCE` node).
+ *Normal node: Old Aggregator restarts as a plain Normal node
*
+ ```
+ Starting baguette client...
+ MELODIC_CONFIG_DIR=/opt/baguette-client/conf
+ LOG_FILE=/opt/baguette-client/logs/output.txt
+ ____ _ _ _____ _ _ _
+ | _ \ | | | | / ____| (_) | |
+ | |_) | __ _ __ _ _ _ ___| |_| |_ ___ | | | |_ ___ _ __ | |_
+ | _ < / _` |/ _` | | | |/ _ \ __| __/ _ \ | | | | |/ _ \ '_ \| __|
+ | |_) | (_| | (_| | |_| | __/ |_| || __/ | |____| | | __/ | | | |_
+ |____/ \__,_|\__, |\__,_|\___|\__|\__\___| \_____|_|_|\___|_| |_|\__|
+ __/ |
+ |___/
+ Starting BaguetteClient v4.5.0-SNAPSHOT on 581d745be52c with PID 1242 (/opt/baguette-client/jars/baguette-client-4.5.0-SNAPSHOT.jar started by ubuntu in /opt/baguette-client)
+ No active profile set, falling back to default profiles: default
+ loadCachedClientId: Used cached Client Id: null
+ Password encoder class name is empty. Default instance of PasswordEncoder will be created
+ PasswordUtil.setPasswordEncoder(): PasswordEncoder set to: eu.melodic.event.util.password.AsterisksPasswordEncoder
+ PasswordUtil: Initialized default Password Encoder: eu.melodic.event.util.password.AsterisksPasswordEncoder
+ BrokerConfig.initializeKeyAndCert(): Initializing keystore, truststore and certificate for Broker-SSL...
+ KeystoreUtil.initializeKeystoresAndCertificate(): Initializing keystores and certificate
+ BrokerConfig.initializeKeyAndCert(): Initializing keystore, truststore and certificate for Broker-SSL... done
+ .........................
+ .........................
+ CLM: Joining cluster...
+ NOTIFY-STATUS-CHANGE: CANDIDATE
+ .........................
+ .........................
+ Joined to cluster
+ .........................
+ .........................
+ CLUSTER-EXEC broker list
+ Cluster executes command: broker list
+ CLI: Node status and scores:
+ CLI: node_3866738cb0f4_2002 [AGGREGATOR, 0.6640625, d4f2eb55-c355-4715-8a27-9f7c12c32924]
+ CLI: node_581d745be52c_2001 [CANDIDATE, 0.6640625, e974ebcd-e11e-4baa-b3cb-fa34242705ff]
+ ```
+ * ***Other Normal nodes***, for log messages about, (i) EMS client leaving cluster, (ii) Aggregator election, (iii) EMS client joining to cluster, but NO logs about recovery actions.
+
+
+
+**B.5.b) Failed recovery of EMS client of the cluster Aggregator**
+
+> Test Case Quick Notes:
+> - Kill the VM of the Aggregator.
+> - The cluster nodes will elect a new Aggregator. Check logs of any cluster node.
+> - The new Aggregator will try to connect to the affected VM but fail.
+> - After a configured number of retries new Aggregator will give up.
+
+**After Application deployment...**
+
+ * Terminate the VM of the Aggregator's
+
+**Next, check the logs of:**
+
+ * ***EMS server***, for one message about Aggregator change, and one about new Aggregator giving up recovery.
+ *EMS server log: A new Aggregator initialized
*
+ ```
+ e.m.e.b.server.ClientShellCommand : #00004--> Client status changed: CANDIDATE --> INITIALIZING
+ e.m.e.b.server.ClientShellCommand : #00004--> Client grouping changed: PER_INSTANCE --> PER_ZONE
+ e.m.e.b.s.c.c.ClusteringCoordinator : Updated aggregator of zone: IMU-ZONE -- New aggregator: #00004 @ 192.168.16.3 (VM-UBUNTU-vm1-vm1-AWS-vm1-8a20f11c-eaf2-4b6e-b827-d8a25a57cb0a)
+ e.m.e.b.server.ClientShellCommand : #00004--> Client status changed: INITIALIZING --> AGGREGATOR
+ ```
+ *EMS server log: New Aggregator queries for node's credentials
*
+ ```
+ e.m.e.b.server.ClientShellCommand : #00004==> PUSH : {"random":"4abf9ae2-b7fc-4e8c-b6d9-464623d1b05f","zone-id":"IMU-ZONE","address":"192.168.16.4","provider":"AWS","name":"vm2","ssh.port":"22","ssh.username":"ubuntu","ssh.password":"ubuntu",.........................
+ ```
+ *EMS server log: New Aggregator give up message
*
+ ```
+ ......................... BUG: No give up message
+ ```
+ Note: Aggregator disconnection from EMS server will also be logged in EMS server logs, but no recovery action will be taken by EMS server.
+
+ * ***New Aggregator***, for messages reporting, (i) an EMS client left cluster, (ii) being elected as Aggregator, (iii) a number of failed connection attempts to the VM, and (iv) a recovery give up message.
+ *New Aggregator log: Old Aggregator left cluster - New Aggregator election
*
+ ```
+ CLM: MEMBER_REMOVED: node=node_3866738cb0f4_2002
+ BRU: Brokers after cluster change: []
+ BRU: Broker election requested: broadcasting election message...
+ BRU: **** Broker message received: election
+ BRU: **** BROKER: Starting Broker election:
+ BRU: Member-Score: node_581d745be52c_2001 => 0.6640625 e974ebcd-e11e-4baa-b3cb-fa34242705ff
+ BRU: Broker: node_581d745be52c_2001
+ ```
+ *New Aggregator log: Initializing to become the new Aggregator
*
+ ```
+ CLM: MEMBER_REMOVED: node=node_3866738cb0f4_2002
+ BRU: Brokers after cluster change: []
+ BRU: Broker election requested: broadcasting election message...
+ BRU: **** Broker message received: election
+ BRU: **** BROKER: Starting Broker election:
+ BRU: Member-Score: node_581d745be52c_2001 => 0.6640625 e974ebcd-e11e-4baa-b3cb-fa34242705ff
+ BRU: Broker: node_581d745be52c_2001
+
+ BRU: Node will become Broker. Initializing...
+ 2022-02-16 12:01:34.448 [INFO ] NOTIFY-STATUS-CHANGE: INITIALIZING
+ initialize(): Node starts initializing as Aggregator...
+ .........................
+ .........................
+ Notifying Baguette Server i am the new aggregator
+ .........................
+ .........................
+ BRU: Node is ready to act as Aggregator. Ready
+ BRU: **** Broker message received: ready node_581d745be52c_2001 New config: eyJncm91cGluZyI6IlBFUl9aT05FIiwidXJsIjoic3NsOi8vMTkyLjE2OC4xNi4zOjYxNjE3P2RhZW1vbj10cn.........................
+ BRU: **** BROKER: New Broker is ready: node_581d745be52c_2001, New config: eyJncm91cGluZyI6IlBFUl9aT05FIiwidXJsIjoic3NsOi8vMTkyLjE2OC4xNi4zOjYxNjE3P2RhZW1vbj10cn.........................
+ BRU: Node configuration updated: eyJncm91cGluZyI6IlBFUl9aT05FIiwidXJsIjoic3NsOi8vMTkyLjE2OC4xNi4zOjYxNjE3P2RhZW1vbj10cn.........................
+ ```
+ *New Aggregator log: Requesting old Aggregator node's credentials
*
+ ```
+ SEND: SERVER-GET-NODE-SSH-CREDENTIALS 192.168.16.4
+ SelfHealingPlugin: createRecoveryTask(): Created recovery task for Node: id=node_3866738cb0f4_2002, address=192.168.16.4
+ ```
+ *New Aggregator log: Failing recovery actions of old Aggregator and give up message
*
+ ```
+ SelfHealingPlugin: Retry #0: Recovering node: id=node_3866738cb0f4_2002, address=192.168.16.4
+ VmNodeRecoveryTask: connectToNode(): Connecting to node using SSH: address=192.168.16.4, port=22, username=ubuntu
+ Connecting to server...
+ SelfHealingPlugin: EXCEPTION while recovering node: node-info={random=4abf9ae2-b7fc-4e8c-b6d9-464623d1b05f, zone-id=IMU-ZONE, address=192.168.16.4,.........................
+ java.net.NoRouteToHostException: No route to host
+ at sun.nio.ch.UnixAsynchronousSocketChannelImpl.checkConnect(Native Method)
+ at sun.nio.ch.UnixAsynchronousSocketChannelImpl.finishConnect(UnixAsynchronousSocketChannelImpl.java:252)
+ at sun.nio.ch.UnixAsynchronousSocketChannelImpl.finish(UnixAsynchronousSocketChannelImpl.java:198)
+ at sun.nio.ch.UnixAsynchronousSocketChannelImpl.onEvent(UnixAsynchronousSocketChannelImpl.java:213)
+ at sun.nio.ch.EPollPort$EventHandlerTask.run(EPollPort.java:293)
+ at java.lang.Thread.run(Thread.java:748)
+
+ SelfHealingPlugin: Retry #0: Recovering node: id=node_3866738cb0f4_2002, address=192.168.16.4
+ VmNodeRecoveryTask: connectToNode(): Connecting to node using SSH: address=192.168.16.4, port=22, username=ubuntu
+ Connecting to server...
+ SelfHealingPlugin: EXCEPTION while recovering node: node-info={random=4abf9ae2-b7fc-4e8c-b6d9-464623d1b05f, zone-id=IMU-ZONE, address=192.168.16.4,.........................
+ java.net.NoRouteToHostException: No route to host
+ at sun.nio.ch.UnixAsynchronousSocketChannelImpl.checkConnect(Native Method)
+ at sun.nio.ch.UnixAsynchronousSocketChannelImpl.finishConnect(UnixAsynchronousSocketChannelImpl.java:252)
+ at sun.nio.ch.UnixAsynchronousSocketChannelImpl.finish(UnixAsynchronousSocketChannelImpl.java:198)
+ at sun.nio.ch.UnixAsynchronousSocketChannelImpl.onEvent(UnixAsynchronousSocketChannelImpl.java:213)
+ at sun.nio.ch.EPollPort$EventHandlerTask.run(EPollPort.java:293)
+ at java.lang.Thread.run(Thread.java:748)
+ ```
+ ```
+ ......................... BUG: No give up message
+ ```
+ * ***Normal nodes that operate***, for log messages about, (i) EMS client leaving cluster, (ii) Aggregator election, but NO logs about recovery actions, or EMS client joining back to cluster.
+
+
+
+**B.6.a) Successful recovery of Netdata agent in a clustered RL node**
+
+> Test Case Quick Notes:
+> - Kill Netdata agent of any RL node.
+> - The Aggregator will recover the killed Netdata agent after a configured period of time.
+> - Check Aggregator log messages reporting failures to collect metrics, recovery actions, and successful metrics collection.
+
+**After Application deployment...**
+
+ * Connect to a RL node and ***kill*** Netdata agent.
+
+**Next, check the logs of:**
+
+ * ***EMS server***, for NO logs indicating a Netdata failure and recovery.
+ *EMS server log: Aggregator queries for RL node's credentials
*
+ ```
+ e.m.e.b.server.ClientShellCommand : #00000==> PUSH : {"random":"4b676a58-e00e-4ddf-a21e-b1c0d1382cd6","zone-id":"IMU-ZONE","address":"192.168.96.2","provider":"AWS",.........................
+ ```
+ * ***Aggregator***, for logs reporting, (i) connection failures to a Netdata agent, (ii) recovery actions, and (iii) successful connection to Netdata agent and collection of metrics.
+ *Aggregator log: Failed metric collection attempts from a RL node's Netdata agent
*
+ ```
+ Collectors::Netdata: Collecting metrics from local node...
+ Collectors::Netdata: Collecting data from url: http://127.0.0.1:19999/api/v1/allmetrics?format=json
+ Collectors::Netdata: Metrics: extracted=0, published=0, failed=0
+ Collectors::Netdata: Collecting metrics from remote nodes (without EMS client): [192.168.96.2]
+ Collectors::Netdata: Collecting data from url: http://192.168.96.2:19999/api/v1/allmetrics?format=json
+ Collectors::Netdata: Exception while collecting metrics from node: 192.168.96.2, #errors=1, exception: org.springframework.web.client.ResourceAccessException: I/O error on GET request for "http://192.168.96.2:19999/api/v1/allmetrics": Connection refused (Connection refused); nested exception is java.net.ConnectException: Connection refused (Connection refused) -> java.net.ConnectException: Connection refused (Connection refused)
+
+ Collectors::Netdata: Collecting metrics from local node...
+ Collectors::Netdata: Collecting data from url: http://127.0.0.1:19999/api/v1/allmetrics?format=json
+ Collectors::Netdata: Metrics: extracted=0, published=0, failed=0
+ Collectors::Netdata: Collecting metrics from remote nodes (without EMS client): [192.168.96.2]
+ Collectors::Netdata: Collecting data from url: http://192.168.96.2:19999/api/v1/allmetrics?format=json
+ Collectors::Netdata: Exception while collecting metrics from node: 192.168.96.2, #errors=2, exception: org.springframework.web.client.ResourceAccessException: I/O error on GET request for "http://192.168.96.2:19999/api/v1/allmetrics": Connection refused (Connection refused); nested exception is java.net.ConnectException: Connection refused (Connection refused) -> java.net.ConnectException: Connection refused (Connection refused)
+
+ Collectors::Netdata: Collecting metrics from local node...
+ Collectors::Netdata: Collecting data from url: http://127.0.0.1:19999/api/v1/allmetrics?format=json
+ Collectors::Netdata: Metrics: extracted=0, published=0, failed=0
+ Collectors::Netdata: Collecting metrics from remote nodes (without EMS client): [192.168.96.2]
+ Collectors::Netdata: Collecting data from url: http://192.168.96.2:19999/api/v1/allmetrics?format=json
+ Collectors::Netdata: Exception while collecting metrics from node: 192.168.96.2, #errors=3, exception: org.springframework.web.client.ResourceAccessException: I/O error on GET request for "http://192.168.96.2:19999/api/v1/allmetrics": Connection refused (Connection refused); nested exception is java.net.ConnectException: Connection refused (Connection refused) -> java.net.ConnectException: Connection refused (Connection refused)
+ Collectors::Netdata: Too many consecutive errors occurred while attempting to collect metrics from node: 192.168.96.2, num-of-errors=3
+ Collectors::Netdata: Will pause metrics collection from node for 60 seconds: 192.168.96.2
+ ```
+ *Aggregator log: Requesting RL node's credentials
*
+ ```
+ SEND: SERVER-GET-NODE-SSH-CREDENTIALS 192.168.96.2
+ SelfHealingPlugin: createRecoveryTask(): Created recovery task for Node: id=null, address=192.168.96.2
+ ```
+ *Aggregator log: Netdata agent recovery actions
*
+ ```
+ SelfHealingPlugin: Retry #0: Recovering node: id=null, address=192.168.96.2
+ VmNodeRecoveryTask: connectToNode(): Connecting to node using SSH: address=192.168.96.2, port=22, username=ubuntu
+ Connecting to server...
+ SSH client is ready
+ VmNodeRecoveryTask: runNodeRecovery(): Executing 3 recovery commands
+ ############## Initial wait......
+ ############## Waiting for 5000ms after Initial wait......
+ ############## Sending Netdata agent kill command......
+ ############## Waiting for 2000ms after Sending Netdata agent kill command......
+ ############## Sending Netdata agent start command......
+ ############## Waiting for 10000ms after Sending Netdata agent start command......
+ VmNodeRecoveryTask: runNodeRecovery(): Executed 3 recovery commands
+ VmNodeRecoveryTask: disconnectFromNode(): Disconnecting from node: address=192.168.96.2, port=22, username=ubuntu
+ Stopping SSH client...
+ SSH client stopped
+ Collectors::Netdata: Collecting metrics from local node...
+ Collectors::Netdata: Collecting data from url: http://127.0.0.1:19999/api/v1/allmetrics?format=json
+ Collectors::Netdata: Metrics: extracted=0, published=0, failed=0
+ Collectors::Netdata: Collecting metrics from remote nodes (without EMS client): [192.168.96.2]
+ Collectors::Netdata: Node is in ignore list: 192.168.96.2
+ OUT> Last login: Sat Feb 12 10:40:09 2022 from 172.29.0.4
+ OUT>
+ OUT> pwd
+ OUT> ubuntu@ec17d3e87fb4:~$ pwd
+ OUT> /home/ubuntu
+ OUT> ubuntu@ec17d3e87fb4:~$
+ OUT> < -U netdata -o "pid" --no-headers | xargs kill -9'
+ OUT>
+ OUT> Usage:
+ OUT> kill [options] [...]
+ OUT>
+ OUT> Options:
+ OUT> [...] send signal to every listed
+ OUT> -, -s, --signal
+ OUT> specify the to be sent
+ OUT> -l, --list=[] list all signal names, or convert one to a name
+ OUT> -L, --table list all signal names in a nice table
+ OUT>
+ OUT> -h, --help display this help and exit
+ OUT> -V, --version output version information and exit
+ OUT>
+ OUT> For more details see kill(1).
+ OUT> ubuntu@ec17d3e87fb4:~$ sudo netdata
+ OUT> 2022-02-16 12:27:55: netdata INFO : MAIN : CONFIG: cannot load cloud config '/var/lib/netdata/cloud.d/cloud.conf'. Running with internal defaults.
+ VmNodeRecoveryTask: redirectSshOutput(): Connection closed: id=OUT
+ ```
+ *Aggregator log: Successful metrics collection from RL node's Netdata agent
*
+ ```
+ Collectors::Netdata: Collecting metrics from local node...
+ Collectors::Netdata: Collecting data from url: http://127.0.0.1:19999/api/v1/allmetrics?format=json
+ Collectors::Netdata: Metrics: extracted=0, published=0, failed=0
+ Collectors::Netdata: Collecting metrics from remote nodes (without EMS client): [192.168.96.2]
+ Collectors::Netdata: Node is in ignore list: 192.168.96.2
+
+ Collectors::Netdata: Collecting metrics from local node...
+ Collectors::Netdata: Collecting data from url: http://127.0.0.1:19999/api/v1/allmetrics?format=json
+ Collectors::Netdata: Metrics: extracted=0, published=0, failed=0
+ Collectors::Netdata: Collecting metrics from remote nodes (without EMS client): [192.168.96.2]
+ Collectors::Netdata: Node is in ignore list: 192.168.96.2
+
+ Collectors::Netdata: Collecting metrics from local node...
+ Collectors::Netdata: Collecting data from url: http://127.0.0.1:19999/api/v1/allmetrics?format=json
+ Collectors::Netdata: Metrics: extracted=0, published=0, failed=0
+ Collectors::Netdata: Collecting metrics from remote nodes (without EMS client): [192.168.96.2]
+ Collectors::Netdata: Node is in ignore list: 192.168.96.2
+
+ Collectors::Netdata: Resumed metrics collection from node: 192.168.96.2
+ SelfHealingPlugin: cancelRecoveryTask(): Cancelled recovery task for Node: id=null, address=192.168.96.2
+
+ Collectors::Netdata: Collecting metrics from local node...
+ Collectors::Netdata: Collecting data from url: http://127.0.0.1:19999/api/v1/allmetrics?format=json
+ Collectors::Netdata: Metrics: extracted=0, published=0, failed=0
+ Collectors::Netdata: Collecting metrics from remote nodes (without EMS client): [192.168.96.2]
+ Collectors::Netdata: Collecting data from url: http://192.168.96.2:19999/api/v1/allmetrics?format=json
+ Collectors::Netdata: Metrics: extracted=0, published=0, failed=0
+ ```
+ * ***RL node with killed Netdata***, check if the Netdata processes have started again.
+ *RL node shell: Recovered Netdata agent process
*
+ ```sh
+ # ps -ef |grep netdata
+ root 610 29 0 12:27 pts/0 00:00:00 grep --color=auto netd
+ .........................
+ .........................
+ # ps -ef |grep netdata
+ netdata 623 1 5 12:27 ? 00:00:51 netdata
+ netdata 625 623 0 12:27 ? 00:00:02 /usr/sbin/netdata --special-spawn-server
+ root 894 623 0 12:28 ? 00:00:05 /usr/libexec/netdata/plugins.d/apps.plugin 1
+ netdata 1050 623 0 12:28 ? 00:00:04 /usr/libexec/netdata/plugins.d/go.d.plugin 1
+ root 1105 29 0 12:45 pts/0 00:00:00 grep --color=auto netd
+ ```
+ * ***Normal nodes (that operate)***, for NO logs indicating connection failures or recovery action.
+
+
+
+**B.6.b) Failed recovery of Netdata agent in a clustered RL node**
+
+> Test Case Quick Notes:
+> - Kill the VM of any RL node.
+> - The EMS server will try to connect to the affected VM but fail.
+> - After a configured number of retries EMS server will give up.
+
+**After Application deployment...**
+
+ * Terminate the VM of a RL node
+
+**You need to check the logs of:**
+
+ * ***EMS server***, for NO logs indicating a Netdata failure and recovery, BUT reporting a recovery give up from Aggregator.
+ *EMS server log: Aggregator queries for RL node's credentials
*
+ ```
+ e.m.e.b.server.ClientShellCommand : #00000==> PUSH : {"random":"4b676a58-e00e-4ddf-a21e-b1c0d1382cd6","zone-id":"IMU-ZONE","address":"192.168.96.2","provider":"AWS",.........................
+ ```
+ *EMS server log: Aggregator give up message
*
+ ```
+ ......................... BUG: No Give up message
+ ```
+ * ***Aggregator***, for logs reporting (i) connection failures to a Netdata agent, (ii) a number of failed attempts to connect to VM, and (iii) a recovery give up message.
+ *Aggregator log: Failed metric collection attempts from a RL node's Netdata agent
*
+ ```
+ Collectors::Netdata: Collecting metrics from local node...
+ Collectors::Netdata: Collecting data from url: http://127.0.0.1:19999/api/v1/allmetrics?format=json
+ Collectors::Netdata: Metrics: extracted=0, published=0, failed=0
+ Collectors::Netdata: Collecting metrics from remote nodes (without EMS client): [192.168.96.2]
+ Collectors::Netdata: Collecting data from url: http://192.168.96.2:19999/api/v1/allmetrics?format=json
+ Collectors::Netdata: Exception while collecting metrics from node: 192.168.96.2, #errors=1, exception: org.springframework.web.client.ResourceAccessException: I/O error on GET request for "http://192.168.96.2:19999/api/v1/allmetrics": connect timed out; nested exception is java.net.SocketTimeoutException: connect timed out -> java.net.SocketTimeoutException: connect timed out
+
+ Collectors::Netdata: Collecting metrics from local node...
+ Collectors::Netdata: Collecting data from url: http://127.0.0.1:19999/api/v1/allmetrics?format=json
+ Collectors::Netdata: Metrics: extracted=0, published=0, failed=0
+ Collectors::Netdata: Collecting metrics from remote nodes (without EMS client): [192.168.96.2]
+ Collectors::Netdata: Collecting data from url: http://192.168.96.2:19999/api/v1/allmetrics?format=json
+ Collectors::Netdata: Exception while collecting metrics from node: 192.168.96.2, #errors=2, exception: org.springframework.web.client.ResourceAccessException: I/O error on GET request for "http://192.168.96.2:19999/api/v1/allmetrics": connect timed out; nested exception is java.net.SocketTimeoutException: connect timed out -> java.net.SocketTimeoutException: connect timed out
+
+ Collectors::Netdata: Collecting metrics from local node...
+ Collectors::Netdata: Collecting data from url: http://127.0.0.1:19999/api/v1/allmetrics?format=json
+ Collectors::Netdata: Metrics: extracted=0, published=0, failed=0
+ Collectors::Netdata: Collecting metrics from remote nodes (without EMS client): [192.168.96.2]
+ Collectors::Netdata: Collecting data from url: http://192.168.96.2:19999/api/v1/allmetrics?format=json
+ Collectors::Netdata: Exception while collecting metrics from node: 192.168.96.2, #errors=3, exception: org.springframework.web.client.ResourceAccessException: I/O error on GET request for "http://192.168.96.2:19999/api/v1/allmetrics": connect timed out; nested exception is java.net.SocketTimeoutException: connect timed out -> java.net.SocketTimeoutException: connect timed out
+ Collectors::Netdata: Too many consecutive errors occurred while attempting to collect metrics from node: 192.168.96.2, num-of-errors=3
+ Collectors::Netdata: Will pause metrics collection from node for 60 seconds: 192.168.96.2
+ ```
+ *Aggregator log: Requesting RL node's credentials
*
+ ```
+ SEND: SERVER-GET-NODE-SSH-CREDENTIALS 192.168.96.2
+ SelfHealingPlugin: createRecoveryTask(): Created recovery task for Node: id=null, address=192.168.96.2
+ ```
+ *Aggregator log: Netdata agent recovery actions
*
+ ```
+ SelfHealingPlugin: Retry #0: Recovering node: id=null, address=192.168.96.2
+ VmNodeRecoveryTask: connectToNode(): Connecting to node using SSH: address=192.168.96.2, port=22, username=ubuntu
+ Connecting to server...
+ Heartbeat 1645015873205
+ SelfHealingPlugin: EXCEPTION while recovering node: node-info={random=4b676a58-e00e-4ddf-a21e-b1c0d1382cd6, zone-id=IMU-ZONE, address=192.168.96.2, provider=AWS,.........................
+ java.net.NoRouteToHostException: No route to host
+ at sun.nio.ch.UnixAsynchronousSocketChannelImpl.checkConnect(Native Method)
+ at sun.nio.ch.UnixAsynchronousSocketChannelImpl.finishConnect(UnixAsynchronousSocketChannelImpl.java:252)
+ at sun.nio.ch.UnixAsynchronousSocketChannelImpl.finish(UnixAsynchronousSocketChannelImpl.java:198)
+ at sun.nio.ch.UnixAsynchronousSocketChannelImpl.onEvent(UnixAsynchronousSocketChannelImpl.java:213)
+ at sun.nio.ch.EPollPort$EventHandlerTask.run(EPollPort.java:293)
+ at java.lang.Thread.run(Thread.java:748)
+
+ Collecting metrics from local node...
+ Collecting data from url: http://127.0.0.1:19999/api/v1/allmetrics?format=json
+ Metrics: extracted=0, published=0, failed=0
+ Collecting metrics from remote nodes (without EMS client): [192.168.96.2]
+ Node is in ignore list: 192.168.96.2
+ .........................
+ Collecting metrics from local node...
+ Collecting data from url: http://127.0.0.1:19999/api/v1/allmetrics?format=json
+ Metrics: extracted=0, published=0, failed=0
+ Collecting metrics from remote nodes (without EMS client): [192.168.96.2]
+ Node is in ignore list: 192.168.96.2
+
+ Resumed metrics collection from node: 192.168.96.2
+ ncelRecoveryTask(): Cancelled recovery task for Node: id=null, address=192.168.96.2
+
+ Collecting metrics from local node...
+ Collecting data from url: http://127.0.0.1:19999/api/v1/allmetrics?format=json
+ Metrics: extracted=0, published=0, failed=0
+ Collecting metrics from remote nodes (without EMS client): [192.168.96.2]
+ Collecting data from url: http://192.168.96.2:19999/api/v1/allmetrics?format=json
+ Exception while collecting metrics from node: 192.168.96.2, #errors=1, exception: org.springframework.web.client.ResourceAccessException: I/O error on GET request for "http://192.168.96.2:19999/api/v1/allmetrics": No route to host (Host unreachable); nested exception is java.net.NoRouteToHostException: No route to host (Host unreachable) -> java.net.NoRouteToHostException: No route to host (Host unreachable)
+
+ Collecting metrics from local node...
+ Collecting data from url: http://127.0.0.1:19999/api/v1/allmetrics?format=json
+ Metrics: extracted=0, published=0, failed=0
+ Collecting metrics from remote nodes (without EMS client): [192.168.96.2]
+ Collecting data from url: http://192.168.96.2:19999/api/v1/allmetrics?format=json
+ Exception while collecting metrics from node: 192.168.96.2, #errors=2, exception: org.springframework.web.client.ResourceAccessException: I/O error on GET request for "http://192.168.96.2:19999/api/v1/allmetrics": No route to host (Host unreachable); nested exception is java.net.NoRouteToHostException: No route to host (Host unreachable) -> java.net.NoRouteToHostException: No route to host (Host unreachable)
+
+ Collecting metrics from local node...
+ Collecting data from url: http://127.0.0.1:19999/api/v1/allmetrics?format=json
+ Metrics: extracted=0, published=0, failed=0
+ Collecting metrics from remote nodes (without EMS client): [192.168.96.2]
+ Collecting data from url: http://192.168.96.2:19999/api/v1/allmetrics?format=json
+ Exception while collecting metrics from node: 192.168.96.2, #errors=3, exception: org.springframework.web.client.ResourceAccessException: I/O error on GET request for "http://192.168.96.2:19999/api/v1/allmetrics": No route to host (Host unreachable); nested exception is java.net.NoRouteToHostException: No route to host (Host unreachable) -> java.net.NoRouteToHostException: No route to host (Host unreachable)
+ Too many consecutive errors occurred while attempting to collect metrics from node: 192.168.96.2, num-of-errors=3
+ Will pause metrics collection from node for 60 seconds: 192.168.96.2
+ .........................
+ ```
+ ```
+ ......................... BUG: No Give up message
+ ```
+ * ***Normal nodes (that operate)***, for NO logs indicating connection failures or recovery actions.
+
+
+
+**B.7) Successful recovery of Netdata agent in a clustered Normal node (including Aggregator)**
+
+> Test Case Quick Notes:
+> - Kill Netdata agent of any Normal node.
+> - The EMS client of the affected node will recover the killed Netdata agent after a configured period of time.
+> - Check EMS client's log for messages reporting failures to collect metrics, recovery actions, and successful metrics collection.
+
+**After Application deployment...**
+
+ * Connect to a Normal node and ***kill*** Netdata agent.
+
+**Next, check the logs of:**
+
+ * ***EMS server***, for No log messages indicating connection failures to a Netdata agent or recovery actions.
+ * ***Aggregator***, for No log messages indicating connection failures to a Netdata agent or recovery actions.
+ * ***Normal node with killed Netdata***, check if the Netdata processes have started again. Also check EMS client's log messages reporting failed metric collection attempts, recovery actions, and successful metric collection.
+ *Normal node - EMS client log: Failed attempts to collect metrics from Local Netdata agent
*
+ ```
+ Collectors::Netdata: Collecting metrics from local node...
+ Collectors::Netdata: Collecting data from url: http://127.0.0.1:19999/api/v1/allmetrics?format=json
+ Collectors::Netdata: Exception while collecting metrics from node: , #errors=1, exception: org.springframework.web.client.ResourceAccessException: I/O error on GET request for "http://127.0.0.1:19999/api/v1/allmetrics": Connection refused (Connection refused); nested exception is java.net.ConnectException: Connection refused (Connection refused) -> java.net.ConnectException: Connection refused (Connection refused)
+ Collectors::Netdata: Collecting metrics from local node...
+ Collectors::Netdata: Collecting data from url: http://127.0.0.1:19999/api/v1/allmetrics?format=json
+ Collectors::Netdata: Exception while collecting metrics from node: , #errors=2, exception: org.springframework.web.client.ResourceAccessException: I/O error on GET request for "http://127.0.0.1:19999/api/v1/allmetrics": Connection refused (Connection refused); nested exception is java.net.ConnectException: Connection refused (Connection refused) -> java.net.ConnectException: Connection refused (Connection refused)
+ Collectors::Netdata: Collecting metrics from local node...
+ Collectors::Netdata: Collecting data from url: http://127.0.0.1:19999/api/v1/allmetrics?format=json
+ Collectors::Netdata: Exception while collecting metrics from node: , #errors=3, exception: org.springframework.web.client.ResourceAccessException: I/O error on GET request for "http://127.0.0.1:19999/api/v1/allmetrics": Connection refused (Connection refused); nested exception is java.net.ConnectException: Connection refused (Connection refused) -> java.net.ConnectException: Connection refused (Connection refused)
+ Collectors::Netdata: Too many consecutive errors occurred while attempting to collect metrics from node: , num-of-errors=3
+ Collectors::Netdata: Will pause metrics collection from node for 60 seconds:
+ SelfHealingPlugin: createRecoveryTask(): Created recovery task for Node: id=null, address=
+ ```
+ *Normal node - EMS client log: Local Netdata agent recovery actions
*
+ ```
+ SelfHealingPlugin: Retry #0: Recovering node: id=null, address=
+ ShellRecoveryTask: runNodeRecovery(): Executing 3 recovery commands
+ ############## Initial wait......
+ ############## Waiting for 5000ms after Initial wait......
+ ############## Sending Netdata agent kill command......
+ ############## Waiting for 2000ms after Sending Netdata agent kill command......
+ ############## Sending Netdata agent start command......
+ ############## Waiting for 10000ms after Sending Netdata agent start command......
+ ShellRecoveryTask: runNodeRecovery(): Executed 3 recovery commands
+ Collectors::Netdata: Collecting metrics from local node...
+ Collectors::Netdata: Node is in ignore list:
+ OUT> /opt/baguette-client
+ ERR> -U: 1: -U: Syntax error: Unterminated quoted string
+ ERR> 2022-02-16 13:21:52: netdata INFO : MAIN : CONFIG: cannot load cloud config '/var/lib/netdata/cloud.d/cloud.conf'. Running with internal defaults.
+ ```
+ *Normal node - EMS client log: Successful metrics collection from Local Netdata agent
*
+ ```
+ Collectors::Netdata: Collecting metrics from local node...
+ Collectors::Netdata: Node is in ignore list:
+ Collectors::Netdata: Collecting metrics from local node...
+ Collectors::Netdata: Node is in ignore list:
+ Collectors::Netdata: Collecting metrics from local node...
+ Collectors::Netdata: Node is in ignore list:
+
+ Collectors::Netdata: Resumed metrics collection from node:
+ SelfHealingPlugin: cancelRecoveryTask(): Cancelled recovery task for Node: id=null, address=
+
+ Collectors::Netdata: Collecting metrics from local node...
+ Collectors::Netdata: Collecting data from url: http://127.0.0.1:19999/api/v1/allmetrics?format=json
+ Collectors::Netdata: Metrics: extracted=0, published=0, failed=0
+ ```
+ * ***Other Normal nodes (that operate)***, for NO logs indicating connection failures or recovery actions.
+
+
+
+------
+
+## Limitations and Bugs
+
+* Clustering is never used for 2-level monitoring topologies.
+* ***Bug:*** EMS clients do not give up after many recovery failures. -- No message is sent to EMS server for failed recoveries.
+* When no Normal nodes (and hence no Aggregator) exist in a cluster, no one will collect metrics from the (orphan) RL nodes.
+* When no Normal nodes (and hence no Aggregator) exist in a cluster, no one will recover the (orphan) RL nodes.
+* If EMS server fails no one will recover it.
+* Metric messages are not cached/redirected, if the next node has failed.
diff --git a/event-management/baguette-client-install/src/main/java/eu/melodic/event/baguette/client/install/ClientInstallationProperties.java b/event-management/baguette-client-install/src/main/java/eu/melodic/event/baguette/client/install/ClientInstallationProperties.java
index eb96902c0ba19e9fff92652f3949c36126a9e6fc..c0a9318621aac59f62a56c74825d5d275469c2dc 100644
--- a/event-management/baguette-client-install/src/main/java/eu/melodic/event/baguette/client/install/ClientInstallationProperties.java
+++ b/event-management/baguette-client-install/src/main/java/eu/melodic/event/baguette/client/install/ClientInstallationProperties.java
@@ -10,6 +10,7 @@
package eu.melodic.event.baguette.client.install;
import lombok.Data;
+import lombok.ToString;
import lombok.extern.slf4j.Slf4j;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.context.annotation.Configuration;
@@ -18,6 +19,7 @@ import org.springframework.context.annotation.PropertySource;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
+import java.util.regex.Pattern;
@Data
@Configuration
@@ -35,6 +37,7 @@ public class ClientInstallationProperties {
private String checkInstalledFile;
private String downloadUrl;
+ @ToString.Exclude
private String apiKey;
private String installScriptUrl;
private String installScriptFile;
@@ -66,7 +69,24 @@ public class ClientInstallationProperties {
private long commandExecutionTimeout = 60000;
private final Map> instructions = new HashMap<>();
+ private final Map parameters = new HashMap<>();
private boolean continueOnFail = false;
private String sessionRecordingDir = "logs";
+
+ // ----------------------------------------------------
+
+ private String clientInstallVarName = "__EMS_CLIENT_INSTALL__";
+ private Pattern clientInstallSuccessPattern = Pattern.compile("^INSTALLED($|[\\s:=])", Pattern.CASE_INSENSITIVE);
+ private Pattern clientInstallErrorPattern = Pattern.compile("^ERROR($|[\\s:=])", Pattern.CASE_INSENSITIVE);
+ private boolean clientInstallSuccessIfVarIsMissing = false;
+ private boolean clientInstallErrorIfVarIsMissing = true;
+
+ private String skipInstallVarName = "__EMS_CLIENT_INSTALL__";
+ private Pattern skipInstallPattern = Pattern.compile("^SKIPPED($|[\\s:=])", Pattern.CASE_INSENSITIVE);
+ private boolean skipInstallIfVarIsMissing = false;
+
+ private String ignoreNodeVarName = "__EMS_IGNORE_NODE__";
+ private Pattern ignoreNodePattern = Pattern.compile("^IGNORED($|[\\s:=])", Pattern.CASE_INSENSITIVE);
+ private boolean ignoreNodeIfVarIsMissing = false;
}
diff --git a/event-management/baguette-client-install/src/main/java/eu/melodic/event/baguette/client/install/ClientInstallationTask.java b/event-management/baguette-client-install/src/main/java/eu/melodic/event/baguette/client/install/ClientInstallationTask.java
index f0270dcd49cea30b43c3ae64bf5e84be67fe122a..96bf004630b13149458aedf198d4642a58dbf2a0 100644
--- a/event-management/baguette-client-install/src/main/java/eu/melodic/event/baguette/client/install/ClientInstallationTask.java
+++ b/event-management/baguette-client-install/src/main/java/eu/melodic/event/baguette/client/install/ClientInstallationTask.java
@@ -9,7 +9,8 @@
package eu.melodic.event.baguette.client.install;
-import eu.melodic.event.baguette.client.install.instruction.InstallationInstructions;
+import eu.melodic.event.baguette.client.install.instruction.InstructionsSet;
+import eu.melodic.event.baguette.server.NodeRegistryEntry;
import lombok.Builder;
import lombok.Data;
@@ -29,5 +30,6 @@ public class ClientInstallationTask {
private final String type;
private final String provider;
private final SshConfig ssh;
- private final List installationInstructions;
+ private final NodeRegistryEntry nodeRegistryEntry;
+ private final List instructionSets;
}
diff --git a/event-management/baguette-client-install/src/main/java/eu/melodic/event/baguette/client/install/ClientInstaller.java b/event-management/baguette-client-install/src/main/java/eu/melodic/event/baguette/client/install/ClientInstaller.java
index 1302d5d847282bf70c8942d9f9c0a72a1bd045a6..77179a2879c605c115ccbad407bd6c3b40cbac61 100644
--- a/event-management/baguette-client-install/src/main/java/eu/melodic/event/baguette/client/install/ClientInstaller.java
+++ b/event-management/baguette-client-install/src/main/java/eu/melodic/event/baguette/client/install/ClientInstaller.java
@@ -9,6 +9,8 @@
package eu.melodic.event.baguette.client.install;
+import eu.melodic.event.baguette.server.BaguetteServer;
+import eu.melodic.event.baguette.server.NodeRegistryEntry;
import lombok.NoArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.InitializingBean;
@@ -31,6 +33,8 @@ public class ClientInstaller implements InitializingBean {
@Autowired
private ClientInstallationProperties properties;
+ @Autowired
+ private BaguetteServer baguetteServer;
private final AtomicLong taskCounter = new AtomicLong();
private ExecutorService executorService;
@@ -57,8 +61,28 @@ public class ClientInstaller implements InitializingBean {
}
private boolean executeTask(ClientInstallationTask task, long taskCounter) {
+ if (baguetteServer.getNodeRegistry().getCoordinator()==null)
+ throw new IllegalStateException("Baguette Server Coordinator has not yet been initialized");
+
if ("VM".equalsIgnoreCase(task.getType())) {
- return executeVmTask(task, taskCounter);
+ NodeRegistryEntry entry = baguetteServer.getNodeRegistry().getNodeByAddress(task.getAddress());
+ if (entry==null)
+ throw new IllegalStateException("Node entry has been removed from Node Registry before installation: Node IP address: "+task.getAddress());
+ //baguetteServer.handleNodeSituation(task.getAddress(), INTERNAL_ERROR);
+ entry.nodeInstalling(task);
+
+ boolean success = executeVmTask(task, taskCounter);
+ log.debug("ClientInstaller: NODE_REGISTRY_ENTRY after installation execution: \n{}", task.getNodeRegistryEntry());
+
+ if (entry.getState()==NodeRegistryEntry.STATE.INSTALLING) {
+ log.warn("ClientInstaller: NODE_REGISTRY_ENTRY status is still INSTALLING after executing client installation. Changing to INSTALL_ERROR");
+ entry.nodeInstallationError(null);
+ }
+
+ // Pre-register Node to baguette Server Coordinator
+ baguetteServer.getNodeRegistry().getCoordinator().preregister(entry);
+
+ return success;
} else {
log.error("ClientInstaller: UNSUPPORTED TASK TYPE: {}", task.getType());
}
diff --git a/event-management/baguette-client-install/src/main/java/eu/melodic/event/baguette/client/install/ClientInstallerPlugin.java b/event-management/baguette-client-install/src/main/java/eu/melodic/event/baguette/client/install/ClientInstallerPlugin.java
index ff2b5514e852ad63d669bceae1065ca30e28d148..4e6fa33073e79dd1fe180951c4662c67adb6c0e2 100644
--- a/event-management/baguette-client-install/src/main/java/eu/melodic/event/baguette/client/install/ClientInstallerPlugin.java
+++ b/event-management/baguette-client-install/src/main/java/eu/melodic/event/baguette/client/install/ClientInstallerPlugin.java
@@ -10,5 +10,14 @@
package eu.melodic.event.baguette.client.install;
public interface ClientInstallerPlugin {
- boolean execute();
+ default boolean execute() {
+ preProcessTask();
+ boolean result = executeTask();
+ result = result && postProcessTask();
+ return result;
+ }
+
+ void preProcessTask(); // Throw exception to block task execution
+ boolean executeTask();
+ boolean postProcessTask();
}
diff --git a/event-management/baguette-client-install/src/main/java/eu/melodic/event/baguette/client/install/SshClientInstaller.java b/event-management/baguette-client-install/src/main/java/eu/melodic/event/baguette/client/install/SshClientInstaller.java
index 4350cd1a85d5ea4cb4964dbfe2a2c5887fbcd622..3578a3c1e6b468ba6648ff5611a5dbb2cfcfd3bf 100644
--- a/event-management/baguette-client-install/src/main/java/eu/melodic/event/baguette/client/install/SshClientInstaller.java
+++ b/event-management/baguette-client-install/src/main/java/eu/melodic/event/baguette/client/install/SshClientInstaller.java
@@ -9,9 +9,12 @@
package eu.melodic.event.baguette.client.install;
-import eu.melodic.event.baguette.client.install.instruction.InstallationInstructions;
+import eu.melodic.event.baguette.client.install.instruction.INSTRUCTION_RESULT;
import eu.melodic.event.baguette.client.install.instruction.Instruction;
+import eu.melodic.event.baguette.client.install.instruction.InstructionsService;
+import eu.melodic.event.baguette.client.install.instruction.InstructionsSet;
import lombok.Builder;
+import lombok.NonNull;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.text.StringSubstitutor;
@@ -26,7 +29,6 @@ import org.apache.sshd.client.scp.ScpClient;
import org.apache.sshd.client.session.ClientSession;
import org.apache.sshd.common.PropertyResolverUtils;
import org.apache.sshd.common.keyprovider.KeyPairProvider;
-import org.apache.sshd.common.scp.ScpTimestamp;
import org.apache.sshd.common.util.io.NoCloseInputStream;
import org.apache.sshd.common.util.io.NoCloseOutputStream;
import org.bouncycastle.jcajce.provider.asymmetric.rsa.BCRSAPrivateCrtKey;
@@ -40,7 +42,6 @@ import java.io.StringReader;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
-import java.nio.file.attribute.PosixFilePermission;
import java.security.*;
import java.security.spec.InvalidKeySpecException;
import java.security.spec.PKCS8EncodedKeySpec;
@@ -48,6 +49,10 @@ import java.security.spec.RSAPublicKeySpec;
import java.security.spec.X509EncodedKeySpec;
import java.text.SimpleDateFormat;
import java.util.*;
+import java.util.function.Function;
+import java.util.function.Supplier;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;
@@ -95,7 +100,7 @@ public class SshClientInstaller implements ClientInstallerPlugin {
@Builder
public SshClientInstaller(ClientInstallationTask task, long taskCounter, ClientInstallationProperties properties) {
- this.task= task;
+ this.task = task;
this.taskCounter = taskCounter;
this.maxRetries = properties.getMaxRetries()>0 ? properties.getMaxRetries() : 5;
@@ -111,9 +116,8 @@ public class SshClientInstaller implements ClientInstallerPlugin {
}
@Override
- public boolean execute() { return executeTask(); }
-
- private boolean executeTask(/*int retries*/) {
+ public boolean executeTask(/*int retries*/) {
+ task.getNodeRegistryEntry().nodeInstalling(task.getNodeRegistryEntry().getPreregistration());
boolean success = false;
int retries = 0;
while (!success && retries<=maxRetries) {
@@ -134,7 +138,8 @@ public class SshClientInstaller implements ClientInstallerPlugin {
}
try {
- success = executeInstructionsList();
+ INSTRUCTION_RESULT exitResult = executeInstructionSets();
+ success = exitResult != INSTRUCTION_RESULT.FAIL;
} catch (Exception ex) {
log.error("SshClientInstaller: Failed executing installation instructions for task #{}, Exception: ", taskCounter, ex);
success = false;
@@ -493,54 +498,104 @@ public class SshClientInstaller implements ClientInstallerPlugin {
return true;
}
- private boolean executeInstructionsList() throws IOException {
- List installationInstructionsList = task.getInstallationInstructions();
+ private INSTRUCTION_RESULT executeInstructionSets() throws IOException {
+ List instructionsSetList = task.getInstructionSets();
+ INSTRUCTION_RESULT exitResult = INSTRUCTION_RESULT.SUCCESS;
int cntSuccess = 0;
int cntFail = 0;
- for (InstallationInstructions installationInstructions : installationInstructionsList) {
- log.info("----------------------------------------------------------------------");
- log.info("SshClientInstaller: Task #{}: Executing installation instructions set: {}", taskCounter, installationInstructions.getDescription());
+ for (InstructionsSet instructionsSet : instructionsSetList) {
+ log.info("\n ----------------------------------------------------------------------\n Task #{} : Instruction Set: {}", taskCounter, instructionsSet.getDescription());
+
+ // Check installation instructions condition
+ try {
+ if (! InstructionsService.getInstance().checkCondition(instructionsSet, task.getNodeRegistryEntry().getPreregistration())) {
+ log.info("SshClientInstaller: Task #{}: Installation Instructions set is skipped due to failed condition: {}", taskCounter, instructionsSet.getDescription());
+ if (instructionsSet.isStopOnConditionFail()) {
+ log.info("SshClientInstaller: Task #{}: No further installation instructions sets will be executed due to stopOnConditionFail: {}", taskCounter, instructionsSet.getDescription());
+ exitResult = INSTRUCTION_RESULT.FAIL;
+ break;
+ }
+ continue;
+ }
+ log.debug("SshClientInstaller: Task #{}: Condition evaluation for Installation Instructions Set succeeded: {}", taskCounter, instructionsSet.getDescription());
+ } catch (Exception e) {
+ log.error("sshClientInstaller: Task #{}: Installation Instructions Set Condition evaluation error. Will not process remaining installation instructions sets: {}\n", taskCounter, instructionsSet.getDescription(), e);
+ exitResult = INSTRUCTION_RESULT.FAIL;
+ break;
+ }
+
+ // Execute installation instructions
+ log.info("SshClientInstaller: Task #{}: Executing installation instructions set: {}", taskCounter, instructionsSet.getDescription());
streamLogger.logMessage(
- String.format("----------------------------------------------------------------------\nExecuting instruction set: %s\n",
- installationInstructions.getDescription()));
- boolean result = executeInstructions(installationInstructions);
- if (!result) {
- log.error("SshClientInstaller: Task #{}: Installation Instructions failed: {}", taskCounter, installationInstructions.getDescription());
+ String.format("\n ----------------------------------------------------------------------\n Task #%d : Executing instruction set: %s\n",
+ taskCounter, instructionsSet.getDescription()));
+ INSTRUCTION_RESULT result = executeInstructions(instructionsSet);
+ if (result==INSTRUCTION_RESULT.FAIL) {
+ log.error("SshClientInstaller: Task #{}: Installation Instructions set failed: {}", taskCounter, instructionsSet.getDescription());
cntFail++;
- if (!continueOnFail)
- return false;
+ if (!continueOnFail) {
+ exitResult = INSTRUCTION_RESULT.FAIL;
+ break;
+ }
+ } else
+ if (result==INSTRUCTION_RESULT.EXIT) {
+ log.info("SshClientInstaller: Task #{}: Instruction set processing exits", taskCounter);
+ cntSuccess++;
+ exitResult = INSTRUCTION_RESULT.EXIT;
+ break;
} else {
- log.info("SshClientInstaller: Task #{}: Installation Instructions succeeded: {}", taskCounter, installationInstructions.getDescription());
+ log.info("SshClientInstaller: Task #{}: Installation Instructions set succeeded: {}", taskCounter, instructionsSet.getDescription());
cntSuccess++;
}
}
- log.info("-------------------------------------------------------------------------");
- log.info("SshClientInstaller: Task #{}: Instruction sets processed: successful={}, failed={}", taskCounter, cntSuccess, cntFail);
- return true;
+ log.info("\n -------------------------------------------------------------------------\n Task #{} : Instruction sets processed: successful={}, failed={}, exit-result={}", taskCounter, cntSuccess, cntFail, exitResult);
+ return exitResult;
}
- private boolean executeInstructions(InstallationInstructions installationInstructions) throws IOException {
- Map valueMap = installationInstructions.getValueMap();
- int numOfInstructions = installationInstructions.getInstructions().size();
+ private INSTRUCTION_RESULT executeInstructions(InstructionsSet instructionsSet) throws IOException {
+ Map valueMap = task.getNodeRegistryEntry().getPreregistration();
+ int numOfInstructions = instructionsSet.getInstructions().size();
int cnt = 0;
- int insCount = installationInstructions.getInstructions().size();
- for (Instruction ins : installationInstructions.getInstructions()) {
+ int insCount = instructionsSet.getInstructions().size();
+ for (Instruction ins : instructionsSet.getInstructions()) {
+ if (ins==null) continue;
cnt++;
+
+ // Check instruction condition
+ try {
+ if (! InstructionsService.getInstance().checkCondition(ins, valueMap)) {
+ log.info("SshClientInstaller: Task #{}: Instruction is skipped due to failed condition {}/{}: {}", taskCounter, cnt, numOfInstructions, ins.description());
+ if (ins.isStopOnConditionFail()) {
+ log.info("SshClientInstaller: Task #{}: No further instructions will be executed due to stopOnConditionFail: {}/{}: {}", taskCounter, cnt, numOfInstructions, ins.description());
+ return INSTRUCTION_RESULT.FAIL;
+ }
+ continue;
+ }
+ log.debug("SshClientInstaller: Task #{}: Condition evaluation for instruction succeeded: {}/{}: {}", taskCounter, cnt, numOfInstructions, ins.description());
+ } catch (Exception e) {
+ log.error("sshClientInstaller: Task #{}: Instruction Condition evaluation error. Will not process remaining instructions: {}/{}: {}\n", taskCounter, cnt, numOfInstructions, ins.description(), e);
+ return INSTRUCTION_RESULT.FAIL;
+ }
+
+ // Execute instruction
+ ins = InstructionsService
+ .getInstance()
+ .resolvePlaceholders(ins, valueMap);
log.trace("SshClientInstaller: Task #{}: Executing instruction {}/{}: {}", taskCounter, cnt, numOfInstructions, ins);
- log.info("SshClientInstaller: Task #{}: Executing instruction {}/{}: {}", taskCounter, cnt, numOfInstructions, ins.getDescription());
+ log.info("SshClientInstaller: Task #{}: Executing instruction {}/{}: {}", taskCounter, cnt, numOfInstructions, ins.description());
Integer exitStatus;
boolean result = true;
- switch (ins.getTaskType()) {
+ switch (ins.taskType()) {
case LOG:
- log.info("SshClientInstaller: Task #{}: LOG: {}", taskCounter, ins.getMessage());
+ log.info("SshClientInstaller: Task #{}: LOG: {}", taskCounter, ins.message());
break;
case CMD:
- log.info("SshClientInstaller: Task #{}: EXEC: {}", taskCounter, ins.getCommand());
+ log.info("SshClientInstaller: Task #{}: EXEC: {}", taskCounter, ins.command());
int retries = 0;
- int maxRetries = ins.getRetries();
+ int maxRetries = ins.retries();
while (true) {
try {
- exitStatus = sshExecCmd(ins.getCommand(), ins.getExecutionTimeout());
+ exitStatus = sshExecCmd(ins.command(), ins.executionTimeout());
result = (exitStatus!=null);
//result = (exitStatus==0);
log.info("SshClientInstaller: Task #{}: EXEC: exit-status={}", taskCounter, exitStatus);
@@ -555,7 +610,7 @@ public class SshClientInstaller implements ClientInstallerPlugin {
retries++;
if (retries<=maxRetries) {
log.info("SshClientInstaller: Task #{}: Retry {}/{} for instruction {}/{}: {}",
- taskCounter, retries, maxRetries, cnt, numOfInstructions, ins.getDescription());
+ taskCounter, retries, maxRetries, cnt, numOfInstructions, ins.description());
} else {
if (maxRetries>0)
log.error("sshClientInstaller: Task #{}: Last instruction failed {} times. Giving up", taskCounter, maxRetries);
@@ -594,43 +649,100 @@ public class SshClientInstaller implements ClientInstallerPlugin {
break;*/
case FILE:
//log.info("SshClientInstaller: Task #{}: FILE: {}, content-length={}", taskCounter, ins.getFileName(), ins.getContents().length());
- if (Paths.get(ins.getLocalFileName()).toFile().isDirectory()) {
- log.info("SshClientInstaller: Task #{}: FILE: COPY-PROCESS DIR: {} -> {}", taskCounter, ins.getLocalFileName(), ins.getFileName());
- result = copyDir(ins.getLocalFileName(), ins.getFileName(), valueMap);
+ if (Paths.get(ins.localFileName()).toFile().isDirectory()) {
+ log.info("SshClientInstaller: Task #{}: FILE: COPY-PROCESS DIR: {} -> {}", taskCounter, ins.localFileName(), ins.fileName());
+ result = copyDir(ins.localFileName(), ins.fileName(), valueMap);
} else
- if (Paths.get(ins.getLocalFileName()).toFile().isFile()) {
- log.info("SshClientInstaller: Task #{}: FILE: COPY-PROCESS FILE: {} -> {}", taskCounter, ins.getLocalFileName(), ins.getFileName());
- Path sourceFile = Paths.get(ins.getLocalFileName());
- Path sourceBaseDir = Paths.get(ins.getLocalFileName()).getParent();
- result = copyFile(sourceFile, sourceBaseDir, ins.getFileName(), valueMap, ins.isExecutable());
+ if (Paths.get(ins.localFileName()).toFile().isFile()) {
+ log.info("SshClientInstaller: Task #{}: FILE: COPY-PROCESS FILE: {} -> {}", taskCounter, ins.localFileName(), ins.fileName());
+ Path sourceFile = Paths.get(ins.localFileName());
+ Path sourceBaseDir = Paths.get(ins.localFileName()).getParent();
+ result = copyFile(sourceFile, sourceBaseDir, ins.fileName(), valueMap, ins.executable());
} else {
- log.error("SshClientInstaller: Task #{}: FILE: ERROR: Local file is not directory or normal file: {}", taskCounter, ins.getLocalFileName());
+ log.error("SshClientInstaller: Task #{}: FILE: ERROR: Local file is not directory or normal file: {}", taskCounter, ins.localFileName());
result = false;
}
break;
case COPY:
- log.info("SshClientInstaller: Task #{}: UPLOAD: {} -> {}", taskCounter, ins.getLocalFileName(), ins.getFileName());
- result = sshFileUpload(ins.getLocalFileName(), ins.getFileName());
+ case UPLOAD:
+ log.info("SshClientInstaller: Task #{}: UPLOAD: {} -> {}", taskCounter, ins.localFileName(), ins.fileName());
+ result = sshFileUpload(ins.localFileName(), ins.fileName());
+ break;
+ case DOWNLOAD:
+ log.info("SshClientInstaller: Task #{}: DOWNLOAD: {} -> {}", taskCounter, ins.fileName(), ins.localFileName());
+ result = sshFileDownload(ins.fileName(), ins.localFileName());
+ if (result)
+ result = processPatterns(ins, valueMap);
break;
case CHECK:
- log.info("SshClientInstaller: Task #{}: CHECK: {}", taskCounter, ins.getCommand());
- exitStatus = sshExecCmd(ins.getCommand());
+ log.info("SshClientInstaller: Task #{}: CHECK: {}", taskCounter, ins.command());
+ exitStatus = sshExecCmd(ins.command());
+ log.info("SshClientInstaller: Task #{}: CHECK: exit-status={}", taskCounter, exitStatus);
log.debug("SshClientInstaller: Task #{}: CHECK: Result: match={}, match-status={}, exec-status={}",
- taskCounter, ins.isMatch(), ins.getExitCode(), exitStatus);
- if (ins.isMatch() && exitStatus==ins.getExitCode()
- || !ins.isMatch() && exitStatus!=ins.getExitCode())
+ taskCounter, ins.match(), ins.exitCode(), exitStatus);
+ if (ins.match() && exitStatus==ins.exitCode()
+ || !ins.match() && exitStatus!=ins.exitCode())
{
- log.info("SshClientInstaller: Task #{}: CHECK: MATCH: {}", taskCounter, ins.getMessage());
+ log.info("SshClientInstaller: Task #{}: CHECK: MATCH: {}", taskCounter, ins.message());
log.info("SshClientInstaller: Task #{}: CHECK: MATCH: Will not process more instructions", taskCounter);
- return true;
+ return INSTRUCTION_RESULT.SUCCESS;
}
break;
+
+ case SET_VARS:
+ log.info("SshClientInstaller: Task #{}: SET_VARS:", taskCounter);
+ if (ins.variables()!=null && ins.variables().size()>0) {
+ ins.variables().forEach((varName, varExpression) -> {
+ try {
+ String varValue = InstructionsService.getInstance().processPlaceholders(varExpression, valueMap);
+ log.info("SshClientInstaller: Task #{}: Setting VAR: {} = {}", taskCounter, varName, varValue);
+ valueMap.put(varName, varValue);
+ } catch (Exception e) {
+ log.error("SshClientInstaller: Task #{}: ERROR while Setting VAR: {}: {}\n", taskCounter, varName, varExpression, e);
+ }
+ });
+ } else
+ log.warn("SshClientInstaller: Task #{}: SET_VARS: No variables specified", taskCounter);
+ break;
+ case UNSET_VARS:
+ log.info("SshClientInstaller: Task #{}: UNSET_VARS:", taskCounter);
+ if (ins.variables()!=null && ins.variables().size()>0) {
+ Set vars = ins.variables().keySet();
+ log.info("SshClientInstaller: Task #{}: Unsetting VAR: {}", taskCounter, vars);
+ valueMap.keySet().removeAll(vars);
+ } else
+ log.warn("SshClientInstaller: Task #{}: UNSET_VARS: No variables specified", taskCounter);
+ break;
+ case PRINT_VARS:
+ //log.info("SshClientInstaller: Task #{}: PRINT_VARS:", taskCounter);
+ String output = valueMap.entrySet().stream()
+ .map(e -> " VAR: "+e.getKey()+" = "+e.getValue())
+ .collect(Collectors.joining("\n"));
+ log.info("SshClientInstaller: Task #{}: PRINT_VARS:\n{}", taskCounter, output);
+ break;
+ case EXIT_SET:
+ log.info("SshClientInstaller: Task #{}: EXIT_SET: Stop this instruction set processing", taskCounter);
+ try {
+ if (StringUtils.isNotBlank(ins.command())) {
+ String exitResult = ins.command().trim().toUpperCase();
+ log.info("SshClientInstaller: Task #{}: EXIT_SET: Result={}", taskCounter, exitResult);
+ return INSTRUCTION_RESULT.valueOf(exitResult);
+ }
+ } catch (Exception e) {
+ log.error("SshClientInstaller: Task #{}: EXIT_SET: Invalid EXIT_SET result: {}. Will return FAIL", taskCounter, ins.command());
+ return INSTRUCTION_RESULT.FAIL;
+ }
+ log.info("SshClientInstaller: Task #{}: EXIT_SET: Result={}", taskCounter, INSTRUCTION_RESULT.SUCCESS);
+ return INSTRUCTION_RESULT.SUCCESS;
+ case EXIT:
+ log.info("SshClientInstaller: Task #{}: EXIT: Stop any further instruction processing", taskCounter);
+ return INSTRUCTION_RESULT.EXIT;
default:
log.error("sshClientInstaller: Unknown instruction type. Ignoring it: {}", ins);
}
if (!result) {
log.error("sshClientInstaller: Last instruction failed. Will not process remaining instructions");
- return false;
+ return INSTRUCTION_RESULT.FAIL;
}
if (cnt valueMap) throws IOException {
@@ -674,4 +786,140 @@ public class SshClientInstaller implements ClientInstallerPlugin {
return sshFileWrite(contents, targetFile, isExecutable);
}
+
+ private boolean processPatterns(Instruction ins, Map valueMap) {
+ Map patterns = ins.patterns();
+ if (patterns==null || patterns.size()==0) {
+ log.info("SshClientInstaller: processPatterns: No patterns to process");
+ return true;
+ }
+
+ // Read local file
+ String[] linesArr;
+ try (Stream lines = Files.lines(Paths.get(ins.localFileName()))) {
+ linesArr = lines.toArray(String[]::new);
+ } catch (IOException e) {
+ log.error("SshClientInstaller: processPatterns: Error while reading local file: {} -- Exception: ", ins.localFileName(), e);
+ return false;
+ }
+
+ // Process file lines against instruction patterns
+ patterns.forEach((varName,pattern) -> {
+ Matcher matcher = null;
+ for (String line : linesArr) {
+ Matcher m = pattern.matcher(line);
+ if (m.matches()) {
+ matcher = m;
+ //break; // Uncomment to return the first match. Comment to return the last match.
+ }
+ }
+ if (matcher!=null && matcher.matches()) {
+ String varValue = matcher.group( matcher.groupCount()>0 ? 1 : 0 );
+ log.info("SshClientInstaller: processPatterns: Setting variable '{}' to: {}", varName, varValue);
+ valueMap.put(varName, varValue);
+ } else {
+ log.info("SshClientInstaller: processPatterns: No match for variable '{}' with pattern: {}", varName, pattern);
+ }
+ });
+
+ return true;
+ }
+
+ @Override
+ public void preProcessTask() {
+ // Throw exception to prevent task exception, if task data have problem
+ }
+
+ @Override
+ public boolean postProcessTask() {
+ log.trace("SshClientInstaller: postProcessTask: BEGIN:\n{}", task.getNodeRegistryEntry().getPreregistration());
+
+ // Check if Baguette client has been installed (or failed to install)
+ log.trace("SshClientInstaller: postProcessTask: CLIENT INSTALLATION....");
+ boolean result = postProcessVariable(
+ properties.getClientInstallVarName(),
+ properties.getClientInstallSuccessPattern(),
+ value -> { task.getNodeRegistryEntry().nodeInstallationComplete(value); return true; },
+ null, null);
+ log.trace("SshClientInstaller: postProcessTask: CLIENT INSTALLATION.... result: {}", result);
+ if (result) return true;
+
+ // Check if Baguette client installation has failed
+ log.trace("SshClientInstaller: postProcessTask: CLIENT INSTALLATION FAILED....");
+ result = postProcessVariable(
+ properties.getClientInstallVarName(),
+ properties.getClientInstallErrorPattern(),
+ value -> { task.getNodeRegistryEntry().nodeInstallationComplete(value); return true; },
+ null, null);
+ log.trace("SshClientInstaller: postProcessTask: CLIENT INSTALLATION.... result: {}", result);
+ if (result) return true;
+
+ // Check if Baguette client installation has been skipped (not attempted at all)
+ log.trace("SshClientInstaller: postProcessTask: CLIENT INSTALLATION SKIP....");
+ result = postProcessVariable(
+ properties.getSkipInstallVarName(),
+ properties.getSkipInstallPattern(),
+ value -> { task.getNodeRegistryEntry().nodeNotInstalled(value); return true; },
+ null, null);
+ log.trace("SshClientInstaller: postProcessTask: CLIENT INSTALLATION SKIP.... result: {}", result);
+ if (result) return true;
+
+ // Check if the Node must be ignored by EMS
+ log.trace("SshClientInstaller: postProcessTask: NODE IGNORE....");
+ result = postProcessVariable(
+ properties.getIgnoreNodeVarName(),
+ properties.getIgnoreNodePattern(),
+ value -> { task.getNodeRegistryEntry().nodeIgnore(value); return true; },
+ null, null);
+ log.trace("SshClientInstaller: postProcessTask: NODE IGNORE.... result: {}", result);
+ if (result) return true;
+
+ // Process defaults, if variables are missing or inconclusive
+ log.trace("SshClientInstaller: postProcessTask: DEFAULTS....");
+ if (properties.isIgnoreNodeIfVarIsMissing()) {
+ log.trace("SshClientInstaller: postProcessTask: DEFAULTS.... NODE IGNORED");
+ task.getNodeRegistryEntry().nodeIgnore(null);
+ } else
+ if (properties.isSkipInstallIfVarIsMissing()) {
+ log.trace("SshClientInstaller: postProcessTask: DEFAULTS.... CLIENT INSTALLATION SKIPPED");
+ task.getNodeRegistryEntry().nodeNotInstalled(null);
+ } else
+ if (properties.isClientInstallSuccessIfVarIsMissing()) {
+ log.trace("SshClientInstaller: postProcessTask: DEFAULTS.... CLIENT INSTALLED");
+ task.getNodeRegistryEntry().nodeInstallationComplete(null);
+ } else
+ if (properties.isClientInstallErrorIfVarIsMissing()) {
+ log.trace("SshClientInstaller: postProcessTask: DEFAULTS.... CLIENT INSTALLATION ERROR");
+ task.getNodeRegistryEntry().nodeInstallationError(null);
+ } else
+ log.trace("SshClientInstaller: postProcessTask: DEFAULTS.... NO DEFAULT");
+ log.trace("SshClientInstaller: postProcessTask: END");
+ return true;
+ }
+
+ private boolean postProcessVariable(String varName, Pattern pattern, @NonNull Function match, Function notMatch, Supplier missing) {
+ log.trace("SshClientInstaller: postProcessVariable: var={}, pattern={}", varName, pattern);
+ if (StringUtils.isNotBlank(varName) && pattern!=null) {
+ String value = task.getNodeRegistryEntry().getPreregistration().get(varName);
+ log.trace("SshClientInstaller: postProcessVariable: var={}, value={}", varName, value);
+ if (value!=null) {
+ if (pattern.matcher(value).matches()) {
+ log.trace("SshClientInstaller: postProcessVariable: MATCH-END: var={}, value={}, pattern={}", varName, value, pattern);
+ return match.apply(value);
+ } else {
+ log.trace("SshClientInstaller: postProcessVariable: NO MATCH: var={}, value={}, pattern={}", varName, value, pattern);
+ if (notMatch!=null) {
+ log.trace("SshClientInstaller: postProcessVariable: NO MATCH-END: var={}, value={}, pattern={}", varName, value, pattern);
+ return notMatch.apply(value);
+ }
+ }
+ }
+ }
+ if (missing!=null) {
+ log.trace("SshClientInstaller: postProcessVariable: DEFAULT-END: var={}", varName);
+ return missing.get();
+ }
+ log.trace("SshClientInstaller: postProcessVariable: False-END: var={}", varName);
+ return false;
+ }
}
diff --git a/event-management/baguette-client-install/src/main/java/eu/melodic/event/baguette/client/install/helper/AbstractInstallationHelper.java b/event-management/baguette-client-install/src/main/java/eu/melodic/event/baguette/client/install/helper/AbstractInstallationHelper.java
index 14fbd79ff2f7b5075edae121f856f4450d90e715..3f332ae021c9e37e971aba77605c61e1b702510c 100644
--- a/event-management/baguette-client-install/src/main/java/eu/melodic/event/baguette/client/install/helper/AbstractInstallationHelper.java
+++ b/event-management/baguette-client-install/src/main/java/eu/melodic/event/baguette/client/install/helper/AbstractInstallationHelper.java
@@ -9,11 +9,13 @@
package eu.melodic.event.baguette.client.install.helper;
+import com.google.gson.Gson;
import eu.melodic.event.baguette.client.install.ClientInstallationProperties;
-import eu.melodic.event.baguette.client.install.instruction.InstallationInstructions;
-import eu.melodic.event.baguette.server.BaguetteServer;
+import eu.melodic.event.baguette.client.install.instruction.InstructionsSet;
+import eu.melodic.event.baguette.server.NodeRegistryEntry;
import eu.melodic.event.util.KeystoreUtil;
import eu.melodic.event.util.NetUtil;
+import eu.melodic.event.util.PasswordUtil;
import lombok.Getter;
import lombok.Setter;
import lombok.extern.slf4j.Slf4j;
@@ -53,16 +55,20 @@ public abstract class AbstractInstallationHelper implements InitializingBean, Ap
@Autowired
@Getter @Setter
protected ClientInstallationProperties properties;
+ @Autowired
+ protected PasswordUtil passwordUtil;
protected String archiveBase64;
protected boolean isServerSecure;
protected String serverCert;
- public synchronized static AbstractInstallationHelper getInstance() { return instance; }
+ public synchronized static AbstractInstallationHelper getInstance() {
+ return instance;
+ }
@Override
public void afterPropertiesSet() {
- log.info("AbstractInstallationHelper.afterPropertiesSet(): configuration: {}", properties);
+ log.debug("AbstractInstallationHelper.afterPropertiesSet(): class={}: configuration: {}", getClass().getName(), properties);
AbstractInstallationHelper.instance = this;
LINUX_OS_FAMILIES = properties.getOsFamilies().get("LINUX");
WINDOWS_OS_FAMILIES = properties.getOsFamilies().get("WINDOWS");
@@ -109,6 +115,7 @@ public abstract class AbstractInstallationHelper implements InitializingBean, Ap
log.debug("AbstractInstallationHelper.initServerCertificate(): Exporting server certificate to file: {}", certFileName);
KeystoreUtil
.getKeystore(keystoreFile, keystoreType, keystorePassword)
+ .passwordUtil(passwordUtil)
.exportCertToFile(keyAlias, certFileName);
log.debug("AbstractInstallationHelper.initServerCertificate(): Server certificate exported");
@@ -119,6 +126,7 @@ public abstract class AbstractInstallationHelper implements InitializingBean, Ap
} else {
this.serverCert = KeystoreUtil
.getKeystore(keystoreFile, keystoreType, keystorePassword)
+ .passwordUtil(passwordUtil)
.getEntryCertificateAsPEM(keyAlias);
}
@@ -189,27 +197,43 @@ public abstract class AbstractInstallationHelper implements InitializingBean, Ap
}
}
- public List prepareInstallationInstructionsForOs(Map nodeMap, Map contextMap, BaguetteServer baguette) throws IOException {
- if (! baguette.isServerRunning()) throw new RuntimeException("Baguette Server is not running");
+ public Optional> getInstallationInstructionsForOs(NodeRegistryEntry entry) throws IOException {
+ if (! entry.getBaguetteServer().isServerRunning()) throw new RuntimeException("Baguette Server is not running");
+
+ List instructionsSets = prepareInstallationInstructionsForOs(entry);
+ if (instructionsSets==null) {
+ String nodeOs = entry.getPreregistration().get("operatingSystem");
+ log.warn("AbstractInstallationHelper.getInstallationInstructionsForOs(): ERROR: Unknown node OS: {}: node-map={}", nodeOs, entry.getPreregistration());
+ return Optional.empty();
+ }
+
+ List jsonSets = null;
+ if (instructionsSets.size()>0) {
+ // Convert 'instructionsSet' into json string
+ Gson gson = new Gson();
+ jsonSets = instructionsSets.stream().map(instructionsSet -> gson.toJson(instructionsSet, InstructionsSet.class)).collect(Collectors.toList());
+ }
+ log.trace("AbstractInstallationHelper.getInstallationInstructionsForOs(): JSON instruction sets for node: node-map={}\n{}", entry.getPreregistration(), jsonSets);
+ return Optional.ofNullable(jsonSets);
+ }
- String baseUrl = contextMap.get("BASE_URL");
- String clientId = contextMap.get("CLIENT_ID");
- String ipSetting = contextMap.get("IP_SETTING");
- log.trace("AbstractInstallationHelper.prepareInstallationInstructionsForOs(): node-map={}, base-url={}, client-id={}", nodeMap, baseUrl, clientId);
+ public List prepareInstallationInstructionsForOs(NodeRegistryEntry entry) throws IOException {
+ if (! entry.getBaguetteServer().isServerRunning()) throw new RuntimeException("Baguette Server is not running");
+ log.trace("AbstractInstallationHelper.prepareInstallationInstructionsForOs(): node-map={}", entry.getPreregistration());
- String osFamily = (String) nodeMap.get("operatingSystem");
- List installationInstructionsList = null;
+ String osFamily = entry.getPreregistration().get("operatingSystem");
+ List instructionsSetList = null;
if (LINUX_OS_FAMILIES.contains(osFamily.toUpperCase()))
- installationInstructionsList = prepareInstallationInstructionsForLinux(nodeMap, contextMap, baguette);
+ instructionsSetList = prepareInstallationInstructionsForLinux(entry);
else if (WINDOWS_OS_FAMILIES.contains(osFamily.toUpperCase()))
- installationInstructionsList = prepareInstallationInstructionsForWin(nodeMap, contextMap, baguette);
+ instructionsSetList = prepareInstallationInstructionsForWin(entry);
else
log.warn("AbstractInstallationHelper.prepareInstallationInstructionsForOs(): Unsupported OS family: {}", osFamily);
- return installationInstructionsList;
+ return instructionsSetList;
}
- protected InstallationInstructions _appendCopyInstructions(
- InstallationInstructions installationInstructions,
+ protected InstructionsSet _appendCopyInstructions(
+ InstructionsSet instructionsSet,
Path p,
Path startDir,
String copyToClientDir,
@@ -223,13 +247,13 @@ public abstract class AbstractInstallationHelper implements InitializingBean, Ap
String contents = new String(Files.readAllBytes(p));
contents = StringSubstitutor.replace(contents, valueMap);
String tmpFile = clientTmpDir+"/installEMS_"+System.currentTimeMillis();
- installationInstructions
+ instructionsSet
.appendLog(String.format("Copy file from server to temp to client: %s -> %s -> %s", p.toString(), tmpFile, targetFile));
- return _appendCopyInstructions(installationInstructions, targetFile, tmpFile, contents, clientTmpDir);
+ return _appendCopyInstructions(instructionsSet, targetFile, tmpFile, contents, clientTmpDir);
}
- protected InstallationInstructions _appendCopyInstructions(
- InstallationInstructions installationInstructions,
+ protected InstructionsSet _appendCopyInstructions(
+ InstructionsSet instructionsSet,
String targetFile,
String tmpFile,
String contents,
@@ -238,16 +262,16 @@ public abstract class AbstractInstallationHelper implements InitializingBean, Ap
{
if (StringUtils.isEmpty(tmpFile))
tmpFile = clientTmpDir+"/installEMS_"+System.currentTimeMillis();
- installationInstructions
+ instructionsSet
.appendWriteFile(tmpFile, contents, false)
.appendExec("sudo mv " + tmpFile + " " + targetFile)
.appendExec("sudo chmod u+rw,og-rwx " + targetFile);
- return installationInstructions;
+ return instructionsSet;
}
protected String _prepareUrl(String urlTemplate, String baseUrl) {
return urlTemplate
- .replace("%{BASE_URL}%", baseUrl)
+ .replace("%{BASE_URL}%", Optional.ofNullable(baseUrl).orElse(""))
.replace("%{PUBLIC_IP}%", Optional.ofNullable(NetUtil.getPublicIpAddress()).orElse(""))
.replace("%{DEFAULT_IP}%", Optional.ofNullable(NetUtil.getDefaultIpAddress()).orElse(""));
}
diff --git a/event-management/baguette-client-install/src/main/java/eu/melodic/event/baguette/client/install/helper/InstallationHelper.java b/event-management/baguette-client-install/src/main/java/eu/melodic/event/baguette/client/install/helper/InstallationHelper.java
index 32bfde969ca6a4f92227ac132356ec8d066447b5..a9c06324701c6d00bb72ff400b623c931be9746d 100644
--- a/event-management/baguette-client-install/src/main/java/eu/melodic/event/baguette/client/install/helper/InstallationHelper.java
+++ b/event-management/baguette-client-install/src/main/java/eu/melodic/event/baguette/client/install/helper/InstallationHelper.java
@@ -10,17 +10,19 @@
package eu.melodic.event.baguette.client.install.helper;
import eu.melodic.event.baguette.client.install.ClientInstallationTask;
-import eu.melodic.event.baguette.client.install.instruction.InstallationInstructions;
-import eu.melodic.event.baguette.server.BaguetteServer;
+import eu.melodic.event.baguette.client.install.instruction.InstructionsSet;
+import eu.melodic.event.baguette.server.NodeRegistryEntry;
import java.io.IOException;
import java.util.List;
-import java.util.Map;
+import java.util.Optional;
public interface InstallationHelper {
- List prepareInstallationInstructionsForOs(Map nodeMap, Map contextMap, BaguetteServer baguette) throws IOException;
- List prepareInstallationInstructionsForWin(Map nodeMap, Map contextMap, BaguetteServer baguette);
- List prepareInstallationInstructionsForLinux(Map nodeMap, Map contextMap, BaguetteServer baguette) throws IOException;
+ Optional> getInstallationInstructionsForOs(NodeRegistryEntry entry) throws IOException;
- ClientInstallationTask createClientInstallationTask(Map nodeMap, Map contextMap, BaguetteServer baguette) throws Exception;
+ List prepareInstallationInstructionsForOs(NodeRegistryEntry entry) throws IOException;
+ List prepareInstallationInstructionsForWin(NodeRegistryEntry entry);
+ List prepareInstallationInstructionsForLinux(NodeRegistryEntry entry) throws IOException;
+
+ ClientInstallationTask createClientInstallationTask(NodeRegistryEntry entry) throws Exception;
}
diff --git a/event-management/baguette-client-install/src/main/java/eu/melodic/event/baguette/client/install/helper/InstallationHelperFactory.java b/event-management/baguette-client-install/src/main/java/eu/melodic/event/baguette/client/install/helper/InstallationHelperFactory.java
index 3cadb50e4fc5960ac00bb1239ba319512d0bffaf..f7f5f0f6b4b4ec3f7b0d35ae4e7c3fa5d07387c3 100644
--- a/event-management/baguette-client-install/src/main/java/eu/melodic/event/baguette/client/install/helper/InstallationHelperFactory.java
+++ b/event-management/baguette-client-install/src/main/java/eu/melodic/event/baguette/client/install/helper/InstallationHelperFactory.java
@@ -9,6 +9,7 @@
package eu.melodic.event.baguette.client.install.helper;
+import eu.melodic.event.baguette.server.NodeRegistryEntry;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.InitializingBean;
import org.springframework.beans.factory.annotation.Autowired;
@@ -36,15 +37,15 @@ public class InstallationHelperFactory implements InitializingBean {
InstallationHelperFactory.instance = this;
}
- public InstallationHelper createInstallationHelper(Map nodeMap) {
- String nodeType = (String) nodeMap.get("type");
+ public InstallationHelper createInstallationHelper(NodeRegistryEntry entry) {
+ String nodeType = entry.getPreregistration().get("type");
if ("VM".equalsIgnoreCase(nodeType)) {
- return createVmInstallationHelper(nodeMap);
+ return createVmInstallationHelper(entry);
}
throw new IllegalArgumentException("Unsupported or missing Node type: "+nodeType);
}
- public InstallationHelper createInstallationHelperBean(String className, Map nodeMap) throws ClassNotFoundException {
+ public InstallationHelper createInstallationHelperBean(String className, NodeRegistryEntry entry) throws ClassNotFoundException {
Class> clzz = Class.forName(className);
return (InstallationHelper) applicationContext.getBean(clzz);
}
@@ -56,7 +57,7 @@ public class InstallationHelperFactory implements InitializingBean {
return (InstallationHelper) clzz.getDeclaredMethod("getInstance").invoke(null);
}
- private InstallationHelper createVmInstallationHelper(Map nodeMap) {
+ private InstallationHelper createVmInstallationHelper(NodeRegistryEntry entry) {
return VmInstallationHelper.getInstance();
}
}
diff --git a/event-management/baguette-client-install/src/main/java/eu/melodic/event/baguette/client/install/helper/VmInstallationHelper.java b/event-management/baguette-client-install/src/main/java/eu/melodic/event/baguette/client/install/helper/VmInstallationHelper.java
index 31f5a2286f8dfbc108180189313f7daf18a0146d..d72f9ab26fa0845576a5d2f487b67e317844e894 100644
--- a/event-management/baguette-client-install/src/main/java/eu/melodic/event/baguette/client/install/helper/VmInstallationHelper.java
+++ b/event-management/baguette-client-install/src/main/java/eu/melodic/event/baguette/client/install/helper/VmInstallationHelper.java
@@ -11,17 +11,18 @@ package eu.melodic.event.baguette.client.install.helper;
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
+import eu.melodic.event.baguette.client.install.ClientInstallationProperties;
import eu.melodic.event.baguette.client.install.ClientInstallationTask;
import eu.melodic.event.baguette.client.install.SshConfig;
-import eu.melodic.event.baguette.client.install.instruction.InstallationInstructions;
+import eu.melodic.event.baguette.client.install.instruction.InstructionsSet;
import eu.melodic.event.baguette.client.install.instruction.Instruction;
import eu.melodic.event.baguette.server.BaguetteServer;
+import eu.melodic.event.baguette.server.NodeRegistryEntry;
import eu.melodic.event.util.CredentialsMap;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.text.StringSubstitutor;
import org.springframework.beans.factory.annotation.Autowired;
-import org.springframework.core.env.Environment;
import org.springframework.core.io.ResourceLoader;
import org.springframework.stereotype.Service;
import org.springframework.util.FileCopyUtils;
@@ -32,6 +33,7 @@ import java.io.StringWriter;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
+import java.text.SimpleDateFormat;
import java.util.*;
import java.util.stream.Collectors;
@@ -41,25 +43,35 @@ import java.util.stream.Collectors;
@Slf4j
@Service
public class VmInstallationHelper extends AbstractInstallationHelper {
+ private final static SimpleDateFormat tsW3C = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSXXX");
+ private final static SimpleDateFormat tsUTC = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS");
+ private final static SimpleDateFormat tsFile = new SimpleDateFormat("yyyy.MM.dd.HH.mm.ss.SSS");
+ static {
+ tsW3C.setTimeZone(TimeZone.getDefault());
+ tsUTC.setTimeZone(TimeZone.getTimeZone("UTC"));
+ tsFile.setTimeZone(TimeZone.getDefault());
+ }
+
@Autowired
private ResourceLoader resourceLoader;
-
@Autowired
- private Environment environment;
+ private ClientInstallationProperties clientInstallationProperties;
@Override
- public ClientInstallationTask createClientInstallationTask(Map nodeMap, Map contextMap, BaguetteServer baguette) throws IOException {
- String baseUrl = contextMap.get("BASE_URL");
- String clientId = contextMap.get("CLIENT_ID");
- String ipSetting = contextMap.get("IP_SETTING");
+ public ClientInstallationTask createClientInstallationTask(NodeRegistryEntry entry) throws IOException {
+ Map nodeMap = entry.getPreregistration();
+
+ String baseUrl = nodeMap.get("BASE_URL");
+ String clientId = nodeMap.get("CLIENT_ID");
+ String ipSetting = nodeMap.get("IP_SETTING");
// Extract node identification and type information
- String nodeId = (String) nodeMap.get("id");
- String nodeOs = (String) nodeMap.get("operatingSystem");
- String nodeAddress = (String) nodeMap.get("address");
- String nodeType = (String) nodeMap.get("type");
- String nodeName = (String) nodeMap.get("name");
- String nodeProvider = (String) nodeMap.get("provider");
+ String nodeId = nodeMap.get("id");
+ String nodeOs = nodeMap.get("operatingSystem");
+ String nodeAddress = nodeMap.get("address");
+ String nodeType = nodeMap.get("type");
+ String nodeName = nodeMap.get("name");
+ String nodeProvider = nodeMap.get("provider");
if (StringUtils.isBlank(nodeType)) nodeType = "VM";
@@ -67,26 +79,23 @@ public class VmInstallationHelper extends AbstractInstallationHelper {
if (StringUtils.isBlank(nodeAddress)) throw new IllegalArgumentException("Missing Address for Node");
// Extract node SSH information
- Object sshObj = nodeMap.get("ssh");
- if (sshObj==null) throw new IllegalArgumentException("Missing SSH info for Node");
- if (!(sshObj instanceof Map)) throw new IllegalArgumentException("SSH info for Node is *not* a Map");
-
- Map nodeSsh = (Map) nodeMap.get("ssh");
- int port = (int) Double.parseDouble(Objects.toString(nodeSsh.get("port"), "22"));
- String username = (String) nodeSsh.get("username");
- String password = (String) nodeSsh.get("password");
- String privateKey = (String) nodeSsh.get("key");
- String fingerprint = (String) nodeSsh.get("fingerprint");
-
+ int port = (int) Double.parseDouble(Objects.toString(nodeMap.get("ssh.port"), "22"));
if (port<1) port = 22;
-
- if (StringUtils.isBlank(username)) throw new IllegalArgumentException("Missing username for SSH");
- if ((password == null || password.isEmpty()) && StringUtils.isBlank(privateKey))
+ String username = nodeMap.get("ssh.username");
+ String password = nodeMap.get("ssh.password");
+ String privateKey = nodeMap.get("ssh.key");
+ String fingerprint = nodeMap.get("ssh.fingerprint");
+
+ if (port>65535)
+ throw new IllegalArgumentException("Invalid SSH port for Node: " + port);
+ if (StringUtils.isBlank(username))
+ throw new IllegalArgumentException("Missing SSH username for Node");
+ if (StringUtils.isEmpty(password) && StringUtils.isBlank(privateKey))
throw new IllegalArgumentException("Missing SSH password or private key for Node");
// Get EMS client installation instructions for VM node
- List installationInstructionsList =
- prepareInstallationInstructionsForOs(nodeMap, contextMap, baguette);
+ List instructionsSetList =
+ prepareInstallationInstructionsForOs(entry);
// Create Installation Task for VM node
ClientInstallationTask installationTask = ClientInstallationTask.builder()
@@ -105,23 +114,28 @@ public class VmInstallationHelper extends AbstractInstallationHelper {
.build())
.type(nodeType)
.provider(nodeProvider)
- .installationInstructions(installationInstructionsList)
+ .instructionSets(instructionsSetList)
+ .nodeRegistryEntry(entry)
.build();
+ log.debug("VmInstallationHelper.createClientInstallationTask(): Created client installation task: {}", installationTask);
return installationTask;
}
@Override
- public List prepareInstallationInstructionsForWin(Map nodeMap, Map contextMap, BaguetteServer baguette) {
+ public List prepareInstallationInstructionsForWin(NodeRegistryEntry entry) {
log.warn("VmInstallationHelper.prepareInstallationInstructionsForWin(): NOT YET IMPLEMENTED");
throw new IllegalArgumentException("VmInstallationHelper.prepareInstallationInstructionsForWin(): NOT YET IMPLEMENTED");
}
@Override
- public List prepareInstallationInstructionsForLinux(Map nodeMap, Map contextMap, BaguetteServer baguette) throws IOException {
- String baseUrl = contextMap.get("BASE_URL");
- String clientId = contextMap.get("CLIENT_ID");
- String ipSetting = contextMap.get("IP_SETTING");
+ public List prepareInstallationInstructionsForLinux(NodeRegistryEntry entry) throws IOException {
+ Map nodeMap = entry.getPreregistration();
+ BaguetteServer baguette = entry.getBaguetteServer();
+
+ String baseUrl = nodeMap.get("BASE_URL");
+ String clientId = nodeMap.get("CLIENT_ID");
+ String ipSetting = nodeMap.get("IP_SETTING");
log.debug("VmInstallationHelper.prepareInstallationInstructionsForLinux(): Invoked: base-url={}", baseUrl);
// Get parameters
@@ -141,38 +155,48 @@ public class VmInstallationHelper extends AbstractInstallationHelper {
String clientTmpDir = StringUtils.firstNonBlank(properties.getClientTmpDir(), "/tmp");
- // Initialize values map with nodeMap (from request)
- Map valueMap = new HashMap<>(nodeMap.entrySet().stream()
- .filter(e -> e.getValue() instanceof String)
- .collect(Collectors.toMap(e -> "NODE_"+e.getKey().toUpperCase(), e -> (String)e.getValue())));
- valueMap.putAll( ((Map)nodeMap.get("ssh")).entrySet().stream()
- .filter(e -> e.getValue() instanceof String)
- .collect(Collectors.toMap(e -> "NODE_SSH_"+e.getKey().toUpperCase(), e -> (String)e.getValue())));
+ // Create additional keys (with NODE_ prefix) for node map values (as aliases to the already existing keys)
+ Map additionalKeysMap = nodeMap.entrySet().stream()
+ .collect(Collectors.toMap(
+ e -> e.getKey().startsWith("ssh.")
+ ? "NODE_SSH_" + e.getKey().substring(4).toUpperCase()
+ : "NODE_" + e.getKey().toUpperCase(),
+ Map.Entry::getValue));
+ nodeMap.putAll(additionalKeysMap);
// Load client config. template and prepare configuration
- valueMap.put("BAGUETTE_CLIENT_ID", clientId);
- valueMap.put("BAGUETTE_SERVER_ADDRESS", baguette.getConfiguration().getServerAddress());
- valueMap.put("BAGUETTE_SERVER_HOSTNAME", baguette.getConfiguration().getServerHostname());
- valueMap.put("BAGUETTE_SERVER_PORT", ""+baguette.getConfiguration().getServerPort());
- valueMap.put("BAGUETTE_SERVER_PUBKEY", baguette.getServerPubkey());
- valueMap.put("BAGUETTE_SERVER_PUBKEY_FINGERPRINT", baguette.getServerPubkeyFingerprint());
+ nodeMap.put("BAGUETTE_CLIENT_ID", clientId);
+ nodeMap.put("BAGUETTE_SERVER_ADDRESS", baguette.getConfiguration().getServerAddress());
+ nodeMap.put("BAGUETTE_SERVER_HOSTNAME", baguette.getConfiguration().getServerHostname());
+ nodeMap.put("BAGUETTE_SERVER_PORT", ""+baguette.getConfiguration().getServerPort());
+ nodeMap.put("BAGUETTE_SERVER_PUBKEY", baguette.getServerPubkey());
+ nodeMap.put("BAGUETTE_SERVER_PUBKEY_FINGERPRINT", baguette.getServerPubkeyFingerprint());
CredentialsMap.Entry pair =
- baguette.getConfiguration().getCredentials().entrySet().iterator().next();
- valueMap.put("BAGUETTE_SERVER_USERNAME", pair.getKey());
- valueMap.put("BAGUETTE_SERVER_PASSWORD", pair.getValue());
+ baguette.getConfiguration().getCredentials().hasPreferredPair()
+ ? baguette.getConfiguration().getCredentials().getPreferredPair()
+ : baguette.getConfiguration().getCredentials().entrySet().iterator().next();
+ nodeMap.put("BAGUETTE_SERVER_USERNAME", pair.getKey());
+ nodeMap.put("BAGUETTE_SERVER_PASSWORD", pair.getValue());
if (StringUtils.isEmpty(ipSetting)) throw new IllegalArgumentException("IP_SETTING must have a value");
- valueMap.put("IP_SETTING", ipSetting);
+ nodeMap.put("IP_SETTING", ipSetting);
// Misc. installation property values
- valueMap.put("BASE_URL", baseUrl);
- valueMap.put("DOWNLOAD_URL", baseDownloadUrl);
- valueMap.put("API_KEY", apiKey);
- valueMap.put("SERVER_CERT_FILE", serverCertFile);
- valueMap.put("REMOTE_TMP_DIR", clientTmpDir);
-
- valueMap.put("EMS_PUBLIC_DIR", System.getProperty("PUBLIC_DIR", System.getenv("PUBLIC_DIR")));
- log.trace("VmInstallationHelper.prepareInstallationInstructionsForLinux: value-map: {}", valueMap);
+ nodeMap.put("BASE_URL", baseUrl);
+ nodeMap.put("DOWNLOAD_URL", baseDownloadUrl);
+ nodeMap.put("API_KEY", apiKey);
+ nodeMap.put("SERVER_CERT_FILE", serverCertFile);
+ nodeMap.put("REMOTE_TMP_DIR", clientTmpDir);
+
+ Date ts = new Date();
+ nodeMap.put("TIMESTAMP", Long.toString(ts.getTime()));
+ nodeMap.put("TIMESTAMP-W3C", tsW3C.format(ts));
+ nodeMap.put("TIMESTAMP-UTC", tsUTC.format(ts));
+ nodeMap.put("TIMESTAMP-FILE", tsFile.format(ts));
+
+ nodeMap.putAll(clientInstallationProperties.getParameters());
+ nodeMap.put("EMS_PUBLIC_DIR", System.getProperty("PUBLIC_DIR", System.getenv("PUBLIC_DIR")));
+ log.trace("VmInstallationHelper.prepareInstallationInstructionsForLinux: value-map: {}", nodeMap);
/* // Clear EMS server certificate (PEM) file, if not secure
if (!isServerSecure) {
@@ -189,58 +213,60 @@ public class VmInstallationHelper extends AbstractInstallationHelper {
.sorted()
.collect(Collectors.toList());
for (Path p : paths) {
- _appendCopyInstructions(installationInstructions, p, startDir, copyToClientDir, clientTmpDir, valueMap);
+ _appendCopyInstructions(instructionSets, p, startDir, copyToClientDir, clientTmpDir, valueMap);
}
}
}*/
- List installationInstructionsList = new ArrayList<>();
+ List instructionsSetList = new ArrayList<>();
try {
// Read installation instructions from JSON file
- List jsonFiles = properties.getInstructions().get("LINUX");
+ List jsonFiles = null;
+ if (nodeMap.containsKey("instruction-files")) {
+ jsonFiles = Arrays.stream(nodeMap.getOrDefault("instruction-files", "").toString().split(","))
+ .filter(StringUtils::isNotBlank)
+ .map(String::trim)
+ .collect(Collectors.toList());
+ if (jsonFiles.size()==0)
+ log.warn("VmInstallationHelper.prepareInstallationInstructionsForLinux: Context map contains 'instruction-files' entry with no contents");
+ } else {
+ jsonFiles = properties.getInstructions().get("LINUX");
+ }
for (String jsonFile : jsonFiles) {
log.debug("VmInstallationHelper.prepareInstallationInstructionsForLinux: Installation instructions file for LINUX: {}", jsonFile);
byte[] bdata = FileCopyUtils.copyToByteArray(resourceLoader.getResource(jsonFile).getInputStream());
String json = new String(bdata, StandardCharsets.UTF_8);
log.trace("VmInstallationHelper.prepareInstallationInstructionsForLinux: Template installation instructions for LINUX: json:\n{}", json);
- // Process placeholders
- json = StringSubstitutor.replace(json, valueMap);
- json = environment.resolvePlaceholders(json);
- //json = environment.resolveRequiredPlaceholders(json);
- json = json.replace('\\', '/');
- log.trace("VmInstallationHelper.prepareInstallationInstructionsForLinux: Installation instructions for LINUX after placeholder processing: json:\n{}", json);
-
- // Create InstallationInstructions object from JSON
- InstallationInstructions installationInstructions =
- new Gson().fromJson(json, InstallationInstructions.class);
- installationInstructions.setValueMap(valueMap);
- installationInstructions.setFileName(jsonFile);
- log.trace("VmInstallationHelper.prepareInstallationInstructionsForLinux: Installation instructions for LINUX: object:\n{}", installationInstructions);
-
- // Pretty print installationInstructions JSON
+ // Create InstructionsSet object from JSON
+ InstructionsSet instructionsSet =
+ new Gson().fromJson(json, InstructionsSet.class);
+ instructionsSet.setFileName(jsonFile);
+ log.trace("VmInstallationHelper.prepareInstallationInstructionsForLinux: Installation instructions for LINUX: object:\n{}", instructionsSet);
+
+ // Pretty print instructionsSet JSON
if (log.isTraceEnabled()) {
Gson gson = new GsonBuilder().setPrettyPrinting().create();
StringWriter sw = new StringWriter();
try (PrintWriter writer = new PrintWriter(sw)) {
- gson.toJson(installationInstructions, writer);
+ gson.toJson(instructionsSet, writer);
}
log.trace("VmInstallationHelper.prepareInstallationInstructionsForLinux: Installation instructions for LINUX: json:\n{}", sw.toString());
}
- installationInstructionsList.add(installationInstructions);
+ instructionsSetList.add(instructionsSet);
}
- return installationInstructionsList;
+ return instructionsSetList;
} catch (Exception ex) {
log.error("VmInstallationHelper.prepareInstallationInstructionsForLinux: Exception while reading Installation instructions for LINUX: ", ex);
throw ex;
}
}
- private InstallationInstructions _appendCopyInstructions(
- InstallationInstructions installationInstructions,
+ private InstructionsSet _appendCopyInstructions(
+ InstructionsSet instructionsSet,
Path path,
Path localBaseDir,
String remoteTargetDir,
@@ -253,18 +279,18 @@ public class VmInstallationHelper extends AbstractInstallationHelper {
String contents = new String(Files.readAllBytes(path));
contents = StringSubstitutor.replace(contents, valueMap);
String description = String.format("Copy file from server to temp to client: %s -> %s", path.toString(), targetFile);
- return _appendCopyInstructions(installationInstructions, targetFile, description, contents);
+ return _appendCopyInstructions(instructionsSet, targetFile, description, contents);
}
- private InstallationInstructions _appendCopyInstructions(
- InstallationInstructions installationInstructions,
+ private InstructionsSet _appendCopyInstructions(
+ InstructionsSet instructionsSet,
String targetFile,
String description,
String contents)
{
- installationInstructions
+ instructionsSet
.appendInstruction(Instruction.createWriteFile(targetFile, contents, false).description(description))
.appendExec("sudo chmod u+rw,og-rwx " + targetFile);
- return installationInstructions;
+ return instructionsSet;
}
}
diff --git a/event-management/baguette-client-install/src/main/java/eu/melodic/event/baguette/client/install/instruction/AbstractInstructionsBase.java b/event-management/baguette-client-install/src/main/java/eu/melodic/event/baguette/client/install/instruction/AbstractInstructionsBase.java
new file mode 100644
index 0000000000000000000000000000000000000000..8765864050ba2904ac0b8125d267ef607711bfe4
--- /dev/null
+++ b/event-management/baguette-client-install/src/main/java/eu/melodic/event/baguette/client/install/instruction/AbstractInstructionsBase.java
@@ -0,0 +1,18 @@
+/*
+ * Copyright (C) 2017-2022 Institute of Communication and Computer Systems (imu.iccs.gr)
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public License, v2.0, unless
+ * Esper library is used, in which case it is subject to the terms of General Public License v2.0.
+ * If a copy of the MPL was not distributed with this file, you can obtain one at
+ * https://www.mozilla.org/en-US/MPL/2.0/
+ */
+
+package eu.melodic.event.baguette.client.install.instruction;
+
+import lombok.Data;
+
+@Data
+public abstract class AbstractInstructionsBase {
+ private String condition;
+ private boolean stopOnConditionFail;
+}
diff --git a/event-management/baguette-client-install/src/main/java/eu/melodic/event/baguette/client/install/instruction/INSTRUCTION_RESULT.java b/event-management/baguette-client-install/src/main/java/eu/melodic/event/baguette/client/install/instruction/INSTRUCTION_RESULT.java
new file mode 100644
index 0000000000000000000000000000000000000000..c60d1d24fddd45fecd6e76b25db40afc854d86a9
--- /dev/null
+++ b/event-management/baguette-client-install/src/main/java/eu/melodic/event/baguette/client/install/instruction/INSTRUCTION_RESULT.java
@@ -0,0 +1,12 @@
+/*
+ * Copyright (C) 2017-2022 Institute of Communication and Computer Systems (imu.iccs.gr)
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public License, v2.0, unless
+ * Esper library is used, in which case it is subject to the terms of General Public License v2.0.
+ * If a copy of the MPL was not distributed with this file, you can obtain one at
+ * https://www.mozilla.org/en-US/MPL/2.0/
+ */
+
+package eu.melodic.event.baguette.client.install.instruction;
+
+public enum INSTRUCTION_RESULT { SUCCESS, FAIL, EXIT }
diff --git a/event-management/baguette-client-install/src/main/java/eu/melodic/event/baguette/client/install/instruction/INSTRUCTION_TYPE.java b/event-management/baguette-client-install/src/main/java/eu/melodic/event/baguette/client/install/instruction/INSTRUCTION_TYPE.java
index bdc63c2a7026f96b6cad951a248c3359943bbf7e..3a5418d756655728becfa2bcb2e649b4c60d3947 100644
--- a/event-management/baguette-client-install/src/main/java/eu/melodic/event/baguette/client/install/instruction/INSTRUCTION_TYPE.java
+++ b/event-management/baguette-client-install/src/main/java/eu/melodic/event/baguette/client/install/instruction/INSTRUCTION_TYPE.java
@@ -9,4 +9,7 @@
package eu.melodic.event.baguette.client.install.instruction;
-public enum INSTRUCTION_TYPE { LOG, CHECK, CMD, SHELL, FILE, COPY }
+public enum INSTRUCTION_TYPE {
+ LOG, CHECK, CMD, SHELL, FILE, COPY, UPLOAD, DOWNLOAD,
+ SET_VARS, UNSET_VARS, PRINT_VARS, EXIT, EXIT_SET
+}
diff --git a/event-management/baguette-client-install/src/main/java/eu/melodic/event/baguette/client/install/instruction/Instruction.java b/event-management/baguette-client-install/src/main/java/eu/melodic/event/baguette/client/install/instruction/Instruction.java
index 8eb91d855dcafba1da3bcb073b6c5c1acd40ca21..b70324176ead9cb92cdfe93f00a65aa63cf28a70 100644
--- a/event-management/baguette-client-install/src/main/java/eu/melodic/event/baguette/client/install/instruction/Instruction.java
+++ b/event-management/baguette-client-install/src/main/java/eu/melodic/event/baguette/client/install/instruction/Instruction.java
@@ -11,12 +11,16 @@ package eu.melodic.event.baguette.client.install.instruction;
import lombok.Builder;
import lombok.Data;
+import lombok.experimental.Accessors;
import javax.validation.constraints.NotNull;
+import java.util.Map;
+import java.util.regex.Pattern;
@Data
-@Builder
-public class Instruction {
+@Accessors(chain = true, fluent = true)
+@Builder(toBuilder = true)
+public class Instruction extends AbstractInstructionsBase {
private INSTRUCTION_TYPE taskType;
private String description;
private String message;
@@ -30,19 +34,11 @@ public class Instruction {
private long executionTimeout;
private int retries;
- // Fluent API
- public Instruction taskType(INSTRUCTION_TYPE taskType) { this.taskType = taskType; return this; }
- public Instruction description(String description) { this.description = description; return this; }
- public Instruction message(String message) { this.message = message; return this; }
- public Instruction command(String command) { this.command = command; return this; }
- public Instruction fileName(String fileName) { this.fileName = fileName; return this; }
- public Instruction localFileName(String localFileName) { this.localFileName = localFileName; return this; }
- public Instruction contents(String contents) { this.contents = contents; return this; }
- public Instruction executable(boolean executable) { this.executable = executable; return this; }
- public Instruction exitCode(int exitCode) { this.exitCode = exitCode; return this; }
- public Instruction match(boolean match) { this.match = match; return this; }
- public Instruction executionTimeout(long executionTimeout) { this.executionTimeout = executionTimeout; return this; }
- public Instruction retries(int retries) { this.retries = retries; return this; }
+ private Map patterns;
+ private Map variables;
+
+ // Fluent API addition
+ public Instruction pattern(String varName, Pattern pattern) { this.patterns.put(varName, pattern); return this; }
// Creators API
public static Instruction createLog(@NotNull String message) {
@@ -76,6 +72,14 @@ public class Instruction {
.build();
}
+ public static Instruction createDownloadFile(@NotNull String remoteFile, @NotNull String localFile) {
+ return Instruction.builder()
+ .taskType(INSTRUCTION_TYPE.DOWNLOAD)
+ .fileName(remoteFile)
+ .localFileName(localFile)
+ .build();
+ }
+
public static Instruction createCheck(@NotNull String command, @NotNull int exitCode, boolean match, String message) {
return Instruction.builder()
.taskType(INSTRUCTION_TYPE.CHECK)
diff --git a/event-management/baguette-client-install/src/main/java/eu/melodic/event/baguette/client/install/instruction/InstructionsService.java b/event-management/baguette-client-install/src/main/java/eu/melodic/event/baguette/client/install/instruction/InstructionsService.java
new file mode 100644
index 0000000000000000000000000000000000000000..f7c479921b79f31b5c63ed138aed239e8bc93d73
--- /dev/null
+++ b/event-management/baguette-client-install/src/main/java/eu/melodic/event/baguette/client/install/instruction/InstructionsService.java
@@ -0,0 +1,73 @@
+/*
+ * Copyright (C) 2017-2022 Institute of Communication and Computer Systems (imu.iccs.gr)
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public License, v2.0, unless
+ * Esper library is used, in which case it is subject to the terms of General Public License v2.0.
+ * If a copy of the MPL was not distributed with this file, you can obtain one at
+ * https://www.mozilla.org/en-US/MPL/2.0/
+ */
+
+package eu.melodic.event.baguette.client.install.instruction;
+
+import lombok.NonNull;
+import lombok.extern.slf4j.Slf4j;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.commons.text.StringSubstitutor;
+import org.springframework.context.EnvironmentAware;
+import org.springframework.core.env.Environment;
+import org.springframework.expression.ExpressionParser;
+import org.springframework.expression.spel.standard.SpelExpressionParser;
+import org.springframework.stereotype.Service;
+
+import java.util.Map;
+
+@Slf4j
+@Service
+public class InstructionsService implements EnvironmentAware {
+ private Environment environment;
+ private static InstructionsService INSTANCE;
+
+ public static InstructionsService getInstance() {
+ if (INSTANCE==null) throw new IllegalStateException("InstructionsService singleton instance has not yet been initialized");
+ return INSTANCE;
+ }
+
+ @Override
+ public void setEnvironment(Environment environment) {
+ this.environment = environment;
+ INSTANCE = this;
+ }
+
+ public boolean checkCondition(@NonNull AbstractInstructionsBase i, Map valueMap) {
+ String condition = i.getCondition();
+ if (StringUtils.isBlank(condition)) return true;
+ String conditionResolved = processPlaceholders(condition, valueMap);
+ final ExpressionParser parser = new SpelExpressionParser();
+ Object result = parser.parseExpression(conditionResolved).getValue();
+ if (result==null)
+ throw new IllegalArgumentException("Condition evaluation returned null: " + condition);
+ if (result instanceof Boolean)
+ return (Boolean)result;
+ throw new IllegalArgumentException("Condition evaluation returned a non-boolean value: " + result + ", condition: " + condition+", resolved condition: "+ conditionResolved);
+ }
+
+ public Instruction resolvePlaceholders(Instruction instruction, Map valueMap) {
+ return instruction.toBuilder()
+ .description(processPlaceholders(instruction.description(), valueMap))
+ .message(processPlaceholders(instruction.message(), valueMap))
+ .command(processPlaceholders(instruction.command(), valueMap))
+ .fileName(processPlaceholders(instruction.fileName(), valueMap))
+ .localFileName(processPlaceholders(instruction.localFileName(), valueMap))
+ .contents(processPlaceholders(instruction.contents(), valueMap))
+ .build();
+ }
+
+ public String processPlaceholders(String s, Map valueMap) {
+ if (StringUtils.isBlank(s)) return s;
+ s = StringSubstitutor.replace(s, valueMap);
+ s = environment.resolvePlaceholders(s);
+ //s = environment.resolveRequiredPlaceholders(s);
+ s = s.replace('\\', '/');
+ return s;
+ }
+}
\ No newline at end of file
diff --git a/event-management/baguette-client-install/src/main/java/eu/melodic/event/baguette/client/install/instruction/InstallationInstructions.java b/event-management/baguette-client-install/src/main/java/eu/melodic/event/baguette/client/install/instruction/InstructionsSet.java
similarity index 68%
rename from event-management/baguette-client-install/src/main/java/eu/melodic/event/baguette/client/install/instruction/InstallationInstructions.java
rename to event-management/baguette-client-install/src/main/java/eu/melodic/event/baguette/client/install/instruction/InstructionsSet.java
index ff1d059aa2267778d0b1139df88457157c8d44c1..d173c013187e3e441324bb152cce63d5fcd6a31e 100644
--- a/event-management/baguette-client-install/src/main/java/eu/melodic/event/baguette/client/install/instruction/InstallationInstructions.java
+++ b/event-management/baguette-client-install/src/main/java/eu/melodic/event/baguette/client/install/instruction/InstructionsSet.java
@@ -14,49 +14,53 @@ import lombok.Data;
import java.util.*;
@Data
-public class InstallationInstructions {
+public class InstructionsSet extends AbstractInstructionsBase {
private String os;
private String description;
private String fileName;
- private Map valueMap = new HashMap<>();
private List instructions = new ArrayList<>();
- public Map getValueMap() {
+ /*public Map getValueMap() {
return Collections.unmodifiableMap(valueMap);
}
public void setValueMap(Map valueMap) {
this.valueMap = new HashMap<>(valueMap);
- }
+ }*/
public List getInstructions() { return Collections.unmodifiableList(instructions); }
public void setInstructions(List ni) { instructions = new ArrayList<>(ni); }
- public InstallationInstructions appendInstruction(Instruction i) {
+ public InstructionsSet appendInstruction(Instruction i) {
instructions.add(i);
return this;
}
- public InstallationInstructions appendLog(String message) {
+ public InstructionsSet appendLog(String message) {
instructions.add(Instruction.createLog(message));
return this;
}
- public InstallationInstructions appendExec(String command) {
+ public InstructionsSet appendExec(String command) {
instructions.add(Instruction.createShellCommand(command));
return this;
}
- public InstallationInstructions appendWriteFile(String file, String contents, boolean executable) {
+ public InstructionsSet appendWriteFile(String file, String contents, boolean executable) {
instructions.add(Instruction.createWriteFile(file, contents, executable));
return this;
}
- public InstallationInstructions appendUploadFile(String localFile, String remoteFile) {
+ public InstructionsSet appendUploadFile(String localFile, String remoteFile) {
instructions.add(Instruction.createUploadFile(localFile, remoteFile));
return this;
}
- public InstallationInstructions appendCheck(String command, int exitCode, boolean match, String message) {
+ public InstructionsSet appendDownloadFile(String remoteFile, String localFile) {
+ instructions.add(Instruction.createDownloadFile(remoteFile, localFile));
+ return this;
+ }
+
+ public InstructionsSet appendCheck(String command, int exitCode, boolean match, String message) {
instructions.add(Instruction.createCheck(command, exitCode, match, message));
return this;
}
diff --git a/event-management/baguette-client-install/src/main/java/eu/melodic/event/baguette/client/selfhealing/ClientRecoveryPlugin.java b/event-management/baguette-client-install/src/main/java/eu/melodic/event/baguette/client/selfhealing/ClientRecoveryPlugin.java
new file mode 100644
index 0000000000000000000000000000000000000000..0e7ad43b6b5a5126fb7f3d11e46cf958f816da63
--- /dev/null
+++ b/event-management/baguette-client-install/src/main/java/eu/melodic/event/baguette/client/selfhealing/ClientRecoveryPlugin.java
@@ -0,0 +1,111 @@
+/*
+ * Copyright (C) 2017-2022 Institute of Communication and Computer Systems (imu.iccs.gr)
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public License, v2.0, unless
+ * Esper library is used, in which case it is subject to the terms of General Public License v2.0.
+ * If a copy of the MPL was not distributed with this file, you can obtain one at
+ * https://www.mozilla.org/en-US/MPL/2.0/
+ */
+
+package eu.melodic.event.baguette.client.selfhealing;
+
+import eu.melodic.event.baguette.client.install.ClientInstallationProperties;
+import eu.melodic.event.baguette.client.install.ClientInstallationTask;
+import eu.melodic.event.baguette.client.install.SshClientInstaller;
+import eu.melodic.event.baguette.client.install.helper.InstallationHelperFactory;
+import eu.melodic.event.baguette.server.BaguetteServer;
+import eu.melodic.event.baguette.server.ClientShellCommand;
+import eu.melodic.event.baguette.server.NodeRegistry;
+import eu.melodic.event.baguette.server.NodeRegistryEntry;
+import eu.melodic.event.util.EventBus;
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.beans.factory.InitializingBean;
+import org.springframework.beans.factory.annotation.Value;
+import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
+import org.springframework.scheduling.TaskScheduler;
+import org.springframework.stereotype.Service;
+
+import java.time.Instant;
+
+@Slf4j
+@Service
+@ConditionalOnProperty(name = "CLIENT_RECOVERY_ENABLED", havingValue = "true", matchIfMissing = true)
+@RequiredArgsConstructor
+public class ClientRecoveryPlugin implements InitializingBean, EventBus.EventConsumer {
+ private final EventBus eventBus;
+ private final NodeRegistry nodeRegistry;
+ private final TaskScheduler taskScheduler;
+ private final ClientInstallationProperties clientInstallationProperties;
+ private final BaguetteServer baguetteServer;
+
+ @Value("${CLIENT_RECOVERY_DELAY:10000}")
+ private long clientRecoveryDelay;
+ @Value("${CLIENT_RECOVERY_INSTRUCTIONS_FILES:file:${MELODIC_CONFIG_DIR}/baguette-client-install/linux/recover-baguette.json}")
+ private String recoveryInstructionsFile;
+
+ private final static String CLIENT_EXIT_TOPIC = "BAGUETTE_SERVER_CLIENT_EXITED";
+
+ @Override
+ public void afterPropertiesSet() throws Exception {
+ eventBus.subscribe(CLIENT_EXIT_TOPIC, this);
+ log.info("ClientRecoveryPlugin: Subscribed for BAGUETTE_SERVER_CLIENT_EXITED events");
+
+ log.trace("ClientRecoveryPlugin: clientInstallationProperties: {}", clientInstallationProperties);
+ log.trace("ClientRecoveryPlugin: baguetteServer: {}", baguetteServer);
+
+ log.debug("ClientRecoveryPlugin: Recovery Delay: {}", clientRecoveryDelay);
+ log.debug("ClientRecoveryPlugin: Recovery Instructions File: {}", recoveryInstructionsFile);
+ }
+
+ @Override
+ public void onMessage(String topic, Object message, Object sender) {
+ log.debug("ClientRecoveryPlugin: onMessage(): BEGIN: topic={}, message={}, sender={}", topic, message, sender);
+ if (CLIENT_EXIT_TOPIC.equals(topic)) {
+ log.debug("ClientRecoveryPlugin: onMessage(): CLIENT EXITED: message={}", message);
+ processExitEvent(message, sender);
+ }
+ }
+
+ private void processExitEvent(Object message, Object sender) {
+ log.debug("ClientRecoveryPlugin: processExitEvent(): BEGIN: message={}", message);
+ if (message instanceof ClientShellCommand) {
+ ClientShellCommand csc = (ClientShellCommand)message;
+ String clientId = csc.getId();
+ String address = csc.getClientIpAddress();
+ log.warn("ClientRecoveryPlugin: processExitEvent(): client-id={}, client-address={}", clientId, address);
+ NodeRegistryEntry nodeInfo = nodeRegistry.getNodeByAddress(address);
+ log.debug("ClientRecoveryPlugin: processExitEvent(): client-node-info={}", nodeInfo);
+ log.trace("ClientRecoveryPlugin: processExitEvent(): node-registry.node-addresses={}", nodeRegistry.getNodeAddresses());
+ log.trace("ClientRecoveryPlugin: processExitEvent(): node-registry.nodes={}", nodeRegistry.getNodes());
+ taskScheduler.schedule(() -> {
+ try {
+ runClientRecovery(nodeInfo);
+ } catch (Exception e) {
+ log.error("ClientRecoveryPlugin: processExitEvent(): EXCEPTION: while recovering node: node-info={} -- Exception: ", nodeInfo, e);
+ }
+ }, Instant.now().plusMillis(clientRecoveryDelay));
+ } else {
+ log.warn("ClientRecoveryPlugin: processExitEvent(): Message is not a {} object. Will ignore it.", ClientShellCommand.class.getSimpleName());
+ }
+ }
+
+ public void runClientRecovery(NodeRegistryEntry entry) throws Exception {
+ log.debug("ClientRecoveryPlugin: runClientRecovery(): node-info={}", entry);
+ if (entry==null) return;
+
+ entry.getPreregistration().put("instruction-files", recoveryInstructionsFile);
+
+ ClientInstallationTask task = InstallationHelperFactory.getInstance()
+ .createInstallationHelper(entry)
+ .createClientInstallationTask(entry);
+ log.debug("ClientRecoveryPlugin: runClientRecovery(): Client recovery task: {}", task);
+ SshClientInstaller installer = SshClientInstaller.builder()
+ .task(task)
+ .properties(clientInstallationProperties)
+ .build();
+ log.warn("ClientRecoveryPlugin: runClientRecovery(): Starting client recovery: node-info={}", entry);
+ boolean result = installer.execute();
+ log.warn("ClientRecoveryPlugin: runClientRecovery(): Client recovery completed: result={}, node-info={}", result, entry);
+ }
+}
diff --git a/event-management/baguette-client/bin/kill.sh b/event-management/baguette-client/bin/kill.sh
index 6baf2d68558df2631b266cd526c25cb04d8d22a6..f5446b54a55ae8e7874024a953eab2219893a330 100755
--- a/event-management/baguette-client/bin/kill.sh
+++ b/event-management/baguette-client/bin/kill.sh
@@ -16,7 +16,7 @@ BASEDIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && cd .. && pwd )
# Kill Baguette client
#PID=`jps | grep BaguetteClient | cut -d " " -f 1`
-PID=`ps -ef |grep java |grep BaguetteClient | cut -c 10-14`
+PID=`ps -ef |grep java |grep BaguetteClient | cut -c 10-20`
if [ "$PID" != "" ]
then
echo "Killing baguette client (pid: $PID)"
diff --git a/event-management/baguette-client/conf/baguette-client.properties b/event-management/baguette-client/conf/baguette-client.properties
index e87ed088fd17c714e2969d2e35ce059ee734bc4e..dded9204152a395d3efaf9e9a1472708d0afd321 100644
--- a/event-management/baguette-client/conf/baguette-client.properties
+++ b/event-management/baguette-client/conf/baguette-client.properties
@@ -32,6 +32,17 @@ server-fingerprint = ${BAGUETTE_SERVER_PUBKEY_FINGERPRINT}
server-username = ${BAGUETTE_SERVER_USERNAME}
server-password = ${BAGUETTE_SERVER_PASSWORD}
+# -----------------------------------------------------------------------------
+# Client-side Self-healing settings
+# -----------------------------------------------------------------------------
+
+#self.healing.enabled=true
+#self.healing.recovery.file.baguette=conf/baguette.json
+#self.healing.recovery.file.netdata=conf/netdata.json
+#self.healing.recovery.delay=10000
+#self.healing.recovery.retry.wait=60000
+#self.healing.recovery.max.retries=3
+
# -----------------------------------------------------------------------------
# Collectors settings
# -----------------------------------------------------------------------------
@@ -41,8 +52,11 @@ server-password = ${BAGUETTE_SERVER_PASSWORD}
collector.netdata.enable = true
collector.netdata.delay = 10000
collector.netdata.url = http://127.0.0.1:19999/api/v1/allmetrics?format=json
+collector.netdata.urlOfNodesWithoutClient = http://%s:19999/api/v1/allmetrics?format=json
#collector.netdata.create-topic = true
#collector.netdata.allowed-topics = netdata__system__cpu__user:an_alias
+collector.netdata.error-limit = 3
+collector.netdata.pause-period = 60
# -----------------------------------------------------------------------------
# Cluster settings
@@ -58,7 +72,7 @@ collector.netdata.url = http://127.0.0.1:19999/api/v1/allmetrics?format=json
#cluster.failureTimeout=5000
cluster.testInterval=5000
-cluster.log-enabled=false
+cluster.log-enabled=true
cluster.out-enabled=true
cluster.join-on-init=true
diff --git a/event-management/baguette-client/conf/baguette.json b/event-management/baguette-client/conf/baguette.json
new file mode 100644
index 0000000000000000000000000000000000000000..cdd4ab4aa6c12b51c21128fba64dfb86e5db0dff
--- /dev/null
+++ b/event-management/baguette-client/conf/baguette.json
@@ -0,0 +1,16 @@
+[{
+ "name": "Initial wait...",
+ "command": "pwd",
+ "waitBefore": 0,
+ "waitAfter": 5000
+}, {
+ "name": "Sending baguette client kill command...",
+ "command": "/opt/baguette-client/bin/kill.sh",
+ "waitBefore": 0,
+ "waitAfter": 2000
+}, {
+ "name": "Sending baguette client start command...",
+ "command": "/opt/baguette-client/bin/run.sh",
+ "waitBefore": 0,
+ "waitAfter": 10000
+}]
diff --git a/event-management/baguette-client/conf/eu.melodic.event.brokercep.properties b/event-management/baguette-client/conf/eu.melodic.event.brokercep.properties
index ca522075077176d1078de8de3e834f238fc8f5b8..f6f46317cb7c151611a73b8a4ded970da62c366d 100644
--- a/event-management/baguette-client/conf/eu.melodic.event.brokercep.properties
+++ b/event-management/baguette-client/conf/eu.melodic.event.brokercep.properties
@@ -7,7 +7,9 @@
# https://www.mozilla.org/en-US/MPL/2.0/
#
-password-encoder-class = eu.melodic.event.util.password.IdentityPasswordEncoder
+#password-encoder-class = eu.melodic.event.util.password.AsterisksPasswordEncoder
+#password-encoder-class = eu.melodic.event.util.password.IdentityPasswordEncoder
+#password-encoder-class = eu.melodic.event.util.password.PresentPasswordEncoder
# Broker ports and protocol
brokercep.broker-name = broker
@@ -52,8 +54,8 @@ brokercep.ssl.key-entry-ext-san = dns:localhost,ip:127.0.0.1,ip:%{DEFAULT_IP}%,i
# Authentication and Authorization settings
brokercep.authentication-enabled = true
-#brokercep.additional-broker-credentials = aaa/111, bbb/222
-brokercep.additional-broker-credentials = ENC(KYZnHeuoJ0NsE1OuIdDKWIHv8shUdcxXZmNtXjXJZdw=)
+#brokercep.additional-broker-credentials = aaa/111, bbb/222, morphemic/morphemic
+brokercep.additional-broker-credentials = ENC(axeJUxNHajYfBffUwvuT3kwTgLTpRliDMz/ZQ9hROZ3BNOv0Idw72NJsawzIZRuZ)
brokercep.authorization-enabled = false
# Broker instance settings
diff --git a/event-management/baguette-client/conf/logback-spring.xml b/event-management/baguette-client/conf/logback-spring.xml
index 437c93a14e4f47f0566b9e8e6001bfbb350e9ee3..f3a0d569d3c842d9652801c55bcfd4e5ad98d0fa 100644
--- a/event-management/baguette-client/conf/logback-spring.xml
+++ b/event-management/baguette-client/conf/logback-spring.xml
@@ -32,7 +32,7 @@
-
+
diff --git a/event-management/baguette-client/conf/netdata.json b/event-management/baguette-client/conf/netdata.json
new file mode 100644
index 0000000000000000000000000000000000000000..ed40f8260940dd4eeffdfcbd4266f8fcf8c61de2
--- /dev/null
+++ b/event-management/baguette-client/conf/netdata.json
@@ -0,0 +1,16 @@
+[{
+ "name": "Initial wait...",
+ "command": "pwd",
+ "waitBefore": 0,
+ "waitAfter": 5000
+}, {
+ "name": "Sending Netdata agent kill command...",
+ "command": "sudo sh -c 'ps -U netdata -o \"pid\" --no-headers | xargs kill -9' ",
+ "waitBefore": 0,
+ "waitAfter": 2000
+}, {
+ "name": "Sending Netdata agent start command...",
+ "command": "sudo netdata",
+ "waitBefore": 0,
+ "waitAfter": 10000
+}]
diff --git a/event-management/baguette-client/pom.xml b/event-management/baguette-client/pom.xml
index 35f49c3922f4494ffac8965153ad32f2983854a4..56dbe0a24bde3bf57f4074afe03b942a8a67cb3d 100644
--- a/event-management/baguette-client/pom.xml
+++ b/event-management/baguette-client/pom.xml
@@ -34,6 +34,11 @@
broker-client
${project.version}
+
+ eu.melodic.event
+ common
+ ${project.version}
+
diff --git a/event-management/baguette-client/src/main/assembly/baguette-client-installation-package.xml b/event-management/baguette-client/src/main/assembly/baguette-client-installation-package.xml
index f3491d5dfc22fad3204dab24fa63eb3997565a8b..9c70619122114a20086d8cffcbb9ce4cfebe7890 100644
--- a/event-management/baguette-client/src/main/assembly/baguette-client-installation-package.xml
+++ b/event-management/baguette-client/src/main/assembly/baguette-client-installation-package.xml
@@ -61,6 +61,22 @@
*.jar
+
+ target
+ ${project.parent.basedir}/broker-client/target
+
+ broker-client-jar-with-dependencies.jar
+
+
+
+ bin
+ ${project.parent.basedir}/broker-client
+
+ client.*
+
+ unix
+ 0755
+
diff --git a/event-management/baguette-client/src/main/java/eu/melodic/event/baguette/client/BaguetteClient.java b/event-management/baguette-client/src/main/java/eu/melodic/event/baguette/client/BaguetteClient.java
index 7603bd1f691a88f92f920169d2af4a36992ecd8f..a121b5b089a1678e7126de9fa1e95e3b2fb2ae20 100644
--- a/event-management/baguette-client/src/main/java/eu/melodic/event/baguette/client/BaguetteClient.java
+++ b/event-management/baguette-client/src/main/java/eu/melodic/event/baguette/client/BaguetteClient.java
@@ -12,16 +12,21 @@ package eu.melodic.event.baguette.client;
import edu.emory.mathcs.backport.java.util.Collections;
import eu.melodic.event.baguette.client.cluster.ClusterManagerProperties;
import eu.melodic.event.baguette.client.collector.netdata.NetdataCollector;
+import eu.melodic.event.baguette.client.plugin.recovery.SelfHealingPlugin;
+import eu.melodic.event.util.EventBus;
import lombok.Getter;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.NoSuchBeanDefinitionException;
+import org.springframework.beans.factory.config.ConfigurableBeanFactory;
import org.springframework.boot.ApplicationArguments;
import org.springframework.boot.ApplicationRunner;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
-import org.springframework.context.ApplicationContext;
import org.springframework.context.ConfigurableApplicationContext;
+import org.springframework.context.annotation.Bean;
+import org.springframework.context.annotation.Scope;
+import org.springframework.scheduling.annotation.EnableScheduling;
import java.io.IOException;
import java.util.ArrayList;
@@ -31,8 +36,9 @@ import java.util.List;
* Baguette client
*/
@Slf4j
+@EnableScheduling
@SpringBootApplication(scanBasePackages = {
- "eu.melodic.event.baguette.client", "eu.melodic.event.brokercep",
+ "eu.melodic.event.baguette.client", "eu.melodic.event.brokercep", "eu.melodic.event.common",
"eu.melodic.event.brokerclient", "eu.melodic.event.util"})
@RequiredArgsConstructor
public class BaguetteClient implements ApplicationRunner {
@@ -45,12 +51,21 @@ public class BaguetteClient implements ApplicationRunner {
private static int killDelay;
+ @Getter
+ private Sshc client;
+
public static void main(String[] args) {
SpringApplication.run(BaguetteClient.class, args);
forceExit();
}
+ @Bean
+ @Scope(value = ConfigurableBeanFactory.SCOPE_SINGLETON)
+ public EventBus eventBus() {
+ return EventBus.builder().build();
+ }
+
@Override
public void run(ApplicationArguments args) throws IOException {
log.debug("BaguetteClient: Starting");
@@ -66,21 +81,23 @@ public class BaguetteClient implements ApplicationRunner {
// Start measurement collectors (but not in interactive mode)
if (!interactiveMode) {
startCollectors();
+ applicationContext.getBean(SelfHealingPlugin.class).start();
}
if (interactiveMode) {
// Run CLI
log.debug("BaguetteClient: Enters interactive mode");
- runCli(applicationContext);
+ runCli();
} else {
// Run SSH client
log.debug("BaguetteClient: Enters SSH mode");
- runSshClient(applicationContext);
+ runSshClient();
}
log.debug("BaguetteClient: Exiting");
// Stop measurement collectors
if (!interactiveMode) {
+ applicationContext.getBean(SelfHealingPlugin.class).stop();
stopCollectors();
}
@@ -142,20 +159,16 @@ public class BaguetteClient implements ApplicationRunner {
collectorsList.clear();
}
- protected void runSshClient(ApplicationContext appCtx) {
+ protected void runSshClient() {
boolean retry = true;
while (true) {
try {
- log.trace("BaguetteClient: spring-boot application-context: {}", appCtx);
- Sshc client = appCtx.getBean(Sshc.class);
- client.setConfiguration(baguetteClientProperties);
- log.trace("BaguetteClient: Sshc instance from application-context: {}", client);
- log.trace("BaguetteClient: Calling SSHC start()");
- client.start(retry);
+ startSshClient(retry);
+
log.trace("BaguetteClient: Calling SSHC run()");
client.run();
- log.trace("BaguetteClient: Calling SSHC stop()");
- client.stop();
+
+ stopSshClient();
} catch (Exception ex) {
log.error("BaguetteClient: EXCEPTION: ", ex);
}
@@ -164,12 +177,30 @@ public class BaguetteClient implements ApplicationRunner {
}
}
- protected void runCli(ApplicationContext appCtx) throws IOException {
- BaguetteClientCLI cli = appCtx.getBean(BaguetteClientCLI.class);
+ protected void runCli() throws IOException {
+ BaguetteClientCLI cli = applicationContext.getBean(BaguetteClientCLI.class);
cli.setConfiguration(baguetteClientProperties);
cli.run();
}
+ public synchronized void startSshClient(boolean retry) throws IOException {
+ log.trace("BaguetteClient: spring-boot application-context: {}", applicationContext);
+ client = applicationContext.getBean(Sshc.class);
+ client.setConfiguration(baguetteClientProperties);
+
+ log.trace("BaguetteClient: Sshc instance from application-context: {}", client);
+ log.trace("BaguetteClient: Calling SSHC start()");
+ client.start(retry);
+ client.greeting();
+ }
+
+ public synchronized void stopSshClient() throws IOException {
+ log.trace("BaguetteClient: Calling SSHC stop()");
+ Sshc tmp = client;
+ client = null;
+ tmp.stop();
+ }
+
/*protected static Properties loadConfig(String configFile) throws IOException {
Properties config = new Properties();
try {
diff --git a/event-management/baguette-client/src/main/java/eu/melodic/event/baguette/client/Collector.java b/event-management/baguette-client/src/main/java/eu/melodic/event/baguette/client/Collector.java
index 2a5629054e9e0df8c7ac24a4f8dcc521c6b1be3b..123b36904d0f81ea7aa015deb6f581c5a558eba5 100644
--- a/event-management/baguette-client/src/main/java/eu/melodic/event/baguette/client/Collector.java
+++ b/event-management/baguette-client/src/main/java/eu/melodic/event/baguette/client/Collector.java
@@ -9,8 +9,8 @@
package eu.melodic.event.baguette.client;
-public interface Collector {
- void start();
- void stop();
+import eu.melodic.event.util.Plugin;
+
+public interface Collector extends Plugin {
void activeGroupingChanged(String oldGrouping, String newGrouping);
}
diff --git a/event-management/baguette-client/src/main/java/eu/melodic/event/baguette/client/CommandExecutor.java b/event-management/baguette-client/src/main/java/eu/melodic/event/baguette/client/CommandExecutor.java
index eaf545625b5ca8b3ea4861b11f24ccfac7ddceac..dc6ffde6a5988e70c9265fbd49996545f02fd006 100644
--- a/event-management/baguette-client/src/main/java/eu/melodic/event/baguette/client/CommandExecutor.java
+++ b/event-management/baguette-client/src/main/java/eu/melodic/event/baguette/client/CommandExecutor.java
@@ -20,12 +20,14 @@ import eu.melodic.event.brokerclient.BrokerClient;
import eu.melodic.event.brokerclient.event.EventGenerator;
import eu.melodic.event.brokerclient.properties.BrokerClientProperties;
import eu.melodic.event.util.*;
+import io.atomix.cluster.ClusterMembershipEvent;
import io.atomix.cluster.Member;
import lombok.*;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.RandomStringUtils;
import org.apache.commons.lang3.StringUtils;
import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.context.ApplicationContext;
import org.springframework.stereotype.Service;
import java.io.*;
@@ -57,6 +59,11 @@ public class CommandExecutor {
private static final int DEFAULT_ID_LENGTH = 32;
private final static String DEFAULT_KEYSTORE_DIR = DEFAULT_CONF_DIR;
+ public final static String EVENT_CLUSTER_NODE_ADDED = "CLUSTER_NODE_ADDED";
+ public final static String EVENT_CLUSTER_NODE_REMOVED = "CLUSTER_NODE_REMOVED";
+
+ @Autowired
+ private ApplicationContext applicationContext;
@Autowired
private BaguetteClient baguetteClient;
@Autowired
@@ -65,6 +72,9 @@ public class CommandExecutor {
private BrokerClientProperties brokerClientProperties;
@Autowired
private PasswordUtil passwordUtil;
+ @Autowired
+ @Getter
+ private EventBus eventBus;
private BaguetteClientProperties config;
private String idFile;
@@ -74,6 +84,8 @@ public class CommandExecutor {
private PrintStream err;
private String clientId;
+ @Getter
+ private ClientConfiguration clientConfiguration;
@Getter
private final Map groupings = new LinkedHashMap<>();
private GroupingConfiguration activeGrouping;
@@ -97,6 +109,10 @@ public class CommandExecutor {
@Getter private String aggregatorGrouping;
@Getter private String nodeGrouping;
+ private Thread serverWatcherThread;
+ private boolean captureInputLine;
+ @Getter private String lastInputLine;
+
public CommandExecutor() {
initializeClientId();
@@ -122,6 +138,46 @@ public class CommandExecutor {
}
}
+ void communicateWithServer(InputStream in, PrintStream out, PrintStream err) throws IOException {
+ BufferedReader reader = new BufferedReader(new InputStreamReader(in));
+ String line;
+ while ((line = reader.readLine()) != null) {
+ if (captureInputLine) {
+ lastInputLine = line;
+ captureInputLine = false;
+ continue;
+ }
+ line = line.trim();
+ if (StringUtils.startsWithIgnoreCase(line, "CLUSTER-KEY")) {
+ String[] s = line.split(" ", 2);
+ log.info("{} {}", s[0], s.length>1 ? passwordUtil.encodePassword(s[1]) : "");
+ } else
+ log.info(line);
+ try {
+ boolean exit = execCmd(line.split("[ \t]+"), in, out, err);
+ if (exit) break;
+ } catch (Exception ex) {
+ log.error("", ex);
+ // Report exception back to server
+ err.println(ex);
+ ex.printStackTrace(err);
+ err.flush();
+ }
+ }
+ }
+
+ public void executeCommand(String command) throws IOException, InterruptedException {
+ String[] args = command.split(" ");
+ execCmd(args, baguetteClient.getClient().getIn(), baguetteClient.getClient().getOut(), baguetteClient.getClient().getOut());
+
+ // Wait for server response/input if needed
+ while (captureInputLine) {
+ log.trace("Waiting for server input...");
+ try { Thread.sleep(100); } catch (InterruptedException e) {}
+ }
+ log.trace("Server input: {}", lastInputLine);
+ }
+
boolean executeCommand(String line, InputStream in, PrintStream out, PrintStream err) throws IOException, InterruptedException {
return execCmd(line.split("[ \t]+"), in, out, err);
}
@@ -146,6 +202,46 @@ public class CommandExecutor {
log.warn(mesg);
out.println(mesg);
}
+ } else if ("CONNECT".equals(cmd)) {
+ if (serverWatcherThread!=null) {
+ log.warn("Already connected");
+ return false;
+ }
+ baguetteClient.startSshClient(false);
+ serverWatcherThread = new Thread(() -> {
+ BufferedReader reader = new BufferedReader(new InputStreamReader(new BufferedInputStream(baguetteClient.getClient().getIn())));
+ String line;
+ try {
+ while ((line = reader.readLine()) != null) {
+ log.info(line);
+ }
+ } catch (Exception ex) {
+ if (baguetteClient.getClient()!=null)
+ log.warn("Exception in serverWatcherThread: ", ex);
+ else
+ log.debug("serverWatcherThread has exited");
+ }
+ serverWatcherThread = null;
+ });
+ serverWatcherThread.start();
+ } else if ("DISCONNECT".equals(cmd)) {
+ if (serverWatcherThread==null) {
+ log.warn("Not connected");
+ return false;
+ }
+ baguetteClient.stopSshClient();
+ serverWatcherThread = null;
+
+ } else if ("SEND".equals(cmd)) {
+ StringBuilder sb = new StringBuilder();
+ for (int i=1; i statsMap = brokerCepService.getBrokerCepStatistics();
- log.info("Statistics: {}", statsMap);
+ log.debug("Statistics: {}", statsMap);
if (out!=null) out.println("-INPUT:"+inputUuid+":"+serializeToString(statsMap));
}
@@ -1091,6 +1235,14 @@ public class CommandExecutor {
if (out!=null) out.println("STATISTICS CLEARED");
}
+ public boolean isAggregator() {
+ return activeGrouping!=null && aggregatorGrouping!=null && aggregatorGrouping.equals(activeGrouping.getName());
+ }
+
+ public boolean isNode() {
+ return ! isAggregator();
+ }
+
/*private static class StreamGobbler implements Runnable {
private InputStream inputStream1;
private InputStream inputStream2;
@@ -1130,6 +1282,19 @@ public class CommandExecutor {
log.trace("{}(): Back-off flag: {}", methodName, commandExecutor.getClusterManager().getBrokerUtil().isBackOffSet());
}
+ @Override
+ public void joinedCluster() {
+ String nodeId = commandExecutor.getClusterManager().getLocalMember().id().id();
+ log.info("joinedCluster(): Node joined cluster: {}", nodeId);
+ commandExecutor.sendClientProperty("node-id", nodeId);
+ }
+
+ @Override
+ public void leftCluster() {
+ log.info("joinedCluster(): Node left cluster");
+ commandExecutor.sendClientProperty("node-id", "");
+ }
+
@Override
public void initialize() {
printInfo("initialize", "INITIALIZE");
@@ -1151,6 +1316,22 @@ public class CommandExecutor {
@Override
public void statusChanged(NODE_STATUS oldStatus, NODE_STATUS newStatus) {
log.debug("statusChanged(): Status changed: {} --> {}", oldStatus, newStatus);
+ commandExecutor.nodeStatusChanged(oldStatus, newStatus);
+ }
+
+ @Override
+ public void clusterChanged(ClusterMembershipEvent event) {
+ log.debug("clusterChanged(): Cluster changed: {} --> {}", event.type(), event.subject().id().id());
+ if (commandExecutor.getClusterManager().getBrokerUtil().getLocalStatus()==NODE_STATUS.AGGREGATOR) {
+ if (event.type() == ClusterMembershipEvent.Type.MEMBER_ADDED) {
+ log.debug("clusterChanged(): Broadcast MEMBER_ADDED in event bus: {}", event.subject().id().id());
+ commandExecutor.getEventBus().send(EVENT_CLUSTER_NODE_ADDED, event);
+ } else
+ if (event.type() == ClusterMembershipEvent.Type.MEMBER_REMOVED) {
+ log.debug("clusterChanged(): Broadcast MEMBER_REMOVED in event bus: {}", event.subject().id().id());
+ commandExecutor.getEventBus().send(EVENT_CLUSTER_NODE_REMOVED, event);
+ }
+ }
}
@Override
diff --git a/event-management/baguette-client/src/main/java/eu/melodic/event/baguette/client/Sshc.java b/event-management/baguette-client/src/main/java/eu/melodic/event/baguette/client/Sshc.java
index 236c95ccfae647575ecd66226c9928ea42847bb4..3ed61686a4b5eeed297398f64c6182b760227124 100644
--- a/event-management/baguette-client/src/main/java/eu/melodic/event/baguette/client/Sshc.java
+++ b/event-management/baguette-client/src/main/java/eu/melodic/event/baguette/client/Sshc.java
@@ -10,6 +10,8 @@
package eu.melodic.event.baguette.client;
import eu.melodic.event.brokercep.BrokerCepService;
+import lombok.Getter;
+import lombok.Setter;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.apache.sshd.client.ClientFactoryManager;
@@ -42,8 +44,8 @@ import java.util.Optional;
/**
* Custom SSH client
*/
-@Service
@Slf4j
+@Service
public class Sshc {
private BaguetteClientProperties config;
private SshClient client;
@@ -56,16 +58,23 @@ public class Sshc {
@Autowired
private BrokerCepService brokerCepService;
+ @Getter
private InputStream in;
+ @Getter
private PrintStream out;
+ //@Getter
//private PrintStream err;
+ @Getter
private String clientId;
+ @Getter @Setter
+ private boolean useServerKeyVerifier = true;
+
public void setConfiguration(BaguetteClientProperties config) throws IOException {
this.config = config;
this.clientId = config.getClientId();
log.trace("Sshc: cmd-exec: {}", commandExecutor);
- this.commandExecutor.setConfiguration(config);
+ if (this.commandExecutor!=null) this.commandExecutor.setConfiguration(config);
}
public synchronized void start(boolean retry) throws IOException {
@@ -112,46 +121,47 @@ public class Sshc {
//client.setServerKeyVerifier(AcceptAllServerKeyVerifier.INSTANCE);
//client.setServerKeyVerifier(new RequiredServerKeyVerifier(....));
- client.setServerKeyVerifier(new ServerKeyVerifier()
- {
- private String serverFingerprint;
- private String serverPubKey;
-
- public boolean verifyServerKey(ClientSession sshClientSession, SocketAddress remoteAddress, PublicKey serverKey) {
-
- // Print server address info
- log.info("verifyServerKey(): remoteAddress: {}", remoteAddress.toString());
-
- // Check that server public key fingerprint matches with the one in configuration
- String fingerprint = KeyUtils.getFingerPrint(serverKey);
- log.info("verifyServerKey(): serverKey: fingerprint: {}", fingerprint);
- //if ( fingerprint!=null && KeyUtils.checkFingerPrint(serverFingerprint, serverKey).getFirst() ) log.info("verifyServerKey(): serverKey: fingerprint: MATCH");
- //else log.warn("verifyServerKey(): serverKey: fingerprint: NO MATCH");
-
- // Check that server public key matches with the one in configuration
- try {
- log.debug("verifyServerKey(): serverKey: decoder: {}", KeyUtils.getPublicKeyEntryDecoder(serverKey).getClass());
- java.io.ByteArrayOutputStream baos = new java.io.ByteArrayOutputStream();
- ((RSAPublicKeyDecoder) KeyUtils.getPublicKeyEntryDecoder(serverKey)).encodePublicKey(baos, (RSAPublicKey) serverKey);
- String keyStr = new String(Base64.getEncoder().encode(baos.toByteArray()));
- log.debug("verifyServerKey(): serverKey: server public key: \n{}", keyStr);
-
- return keyStr.equalsIgnoreCase(serverPubKey);
-
- } catch (Exception ex) {
- log.error("verifyServerKey(): serverKey: EXCEPTION: ", ex);
- return false;
+ if (useServerKeyVerifier) {
+ client.setServerKeyVerifier(new ServerKeyVerifier() {
+ private String serverFingerprint;
+ private String serverPubKey;
+
+ public boolean verifyServerKey(ClientSession sshClientSession, SocketAddress remoteAddress, PublicKey serverKey) {
+
+ // Print server address info
+ log.info("verifyServerKey(): remoteAddress: {}", remoteAddress.toString());
+
+ // Check that server public key fingerprint matches with the one in configuration
+ String fingerprint = KeyUtils.getFingerPrint(serverKey);
+ log.info("verifyServerKey(): serverKey: fingerprint: {}", fingerprint);
+ //if ( fingerprint!=null && KeyUtils.checkFingerPrint(serverFingerprint, serverKey).getFirst() ) log.info("verifyServerKey(): serverKey: fingerprint: MATCH");
+ //else log.warn("verifyServerKey(): serverKey: fingerprint: NO MATCH");
+
+ // Check that server public key matches with the one in configuration
+ try {
+ log.debug("verifyServerKey(): serverKey: decoder: {}", KeyUtils.getPublicKeyEntryDecoder(serverKey).getClass());
+ java.io.ByteArrayOutputStream baos = new java.io.ByteArrayOutputStream();
+ ((RSAPublicKeyDecoder) KeyUtils.getPublicKeyEntryDecoder(serverKey)).encodePublicKey(baos, (RSAPublicKey) serverKey);
+ String keyStr = new String(Base64.getEncoder().encode(baos.toByteArray()));
+ log.debug("verifyServerKey(): serverKey: server public key: \n{}", keyStr);
+
+ return keyStr.equalsIgnoreCase(serverPubKey);
+
+ } catch (Exception ex) {
+ log.error("verifyServerKey(): serverKey: EXCEPTION: ", ex);
+ return false;
+ }
}
- }
- public ServerKeyVerifier setServerPubKey(String pubkey, String fingerprint) {
- this.serverFingerprint = fingerprint;
- this.serverPubKey = pubkey;
- return this;
+ public ServerKeyVerifier setServerPubKey(String pubkey, String fingerprint) {
+ this.serverFingerprint = fingerprint;
+ this.serverPubKey = pubkey;
+ return this;
+ }
}
- }
- .setServerPubKey(serverPubKey, serverFingerprint)
- );
+ .setServerPubKey(serverPubKey, serverFingerprint)
+ );
+ }
this.simple = SshClient.wrapAsSimpleClient(client);
//simple.setConnectTimeout(...CONNECT_TIMEOUT...);
@@ -204,17 +214,8 @@ public class Sshc {
log.info("SSH client stopped");
}
- public void run() throws IOException {
+ public synchronized void greeting() {
if (!started) return;
-
- // Start communication protocol with Server
- // Execution waits here until connection is closed
- log.trace("run(): Calling communicateWithServer()...");
- communicateWithServer(in, out, out);
- }
-
- protected void communicateWithServer(InputStream in, PrintStream out, PrintStream err) throws IOException {
- BufferedReader reader = new BufferedReader(new InputStreamReader(in));
String certOneLine = Optional
.ofNullable(brokerCepService.getBrokerCertificate())
.orElse("")
@@ -232,21 +233,15 @@ public class Sshc {
brokerCepService.getBrokerPassword(),
certOneLine);
out.flush();
- String line;
- while ((line = reader.readLine()) != null) {
- line = line.trim();
- log.info(line);
- try {
- boolean exit = commandExecutor.execCmd(line.split("[ \t]+"), in, out, err);
- if (exit) break;
- } catch (Exception ex) {
- log.error("", ex);
- // Report exception back to server
- err.println(ex);
- ex.printStackTrace(err);
- err.flush();
- }
- }
+ }
+
+ public void run() throws IOException {
+ if (!started) return;
+
+ // Start communication protocol with Server
+ // Execution waits here until connection is closed
+ log.trace("run(): Calling communicateWithServer()...");
+ commandExecutor.communicateWithServer(in, out, out);
out.printf("-BYE FROM CLIENT: %s%n", clientId);
}
}
diff --git a/event-management/baguette-client/src/main/java/eu/melodic/event/baguette/client/cluster/BrokerUtil.java b/event-management/baguette-client/src/main/java/eu/melodic/event/baguette/client/cluster/BrokerUtil.java
index 3b1041e7fed8d9c0e9c2b228613a2399cf603a9b..144548a4d6156f7bf2c65fd5015016a3eef74f10 100644
--- a/event-management/baguette-client/src/main/java/eu/melodic/event/baguette/client/cluster/BrokerUtil.java
+++ b/event-management/baguette-client/src/main/java/eu/melodic/event/baguette/client/cluster/BrokerUtil.java
@@ -9,6 +9,7 @@
package eu.melodic.event.baguette.client.cluster;
+import io.atomix.cluster.ClusterMembershipEvent;
import io.atomix.cluster.Member;
import io.atomix.core.Atomix;
import lombok.Getter;
@@ -397,17 +398,39 @@ public class BrokerUtil extends AbstractLogBase {
// Check if any node is initializing as broker (then don't start election)
if (getActiveNodes().stream()
- .map(MemberWithScore::getMember).map(this::getNodeStatus)
- .noneMatch(s -> INITIALIZING==s || AGGREGATOR ==s))
+ .map(MemberWithScore::getMember)
+ .map(this::getNodeStatus)
+ .noneMatch(s -> INITIALIZING==s || AGGREGATOR==s))
{
startElection();
}
}
+ public void checkBrokerNumber() {
+ List brokers = getBrokers();
+ log_debug("BRU: Check number of Brokers in cluster: {}", brokers);
+
+ // Check if there are more than one brokers in cluster
+ long numOfBrokers = getActiveNodes().stream()
+ .map(MemberWithScore::getMember)
+ .map(this::getNodeStatus)
+ .filter(s -> AGGREGATOR==s)
+ .count();
+ log_info("BRU: Number of Brokers in cluster: {}", numOfBrokers);
+ if (numOfBrokers>1) {
+ log_warn("BRU: {} brokers found in the cluster. Starting election...", numOfBrokers);
+ startElection();
+ }
+ }
+
public interface NodeCallback {
+ void joinedCluster();
+ void leftCluster();
+
void initialize();
void stepDown();
void statusChanged(NODE_STATUS oldStatus, NODE_STATUS newStatus);
+ void clusterChanged(ClusterMembershipEvent event);
String getConfiguration(Member local);
void setConfiguration(String newConfig);
}
diff --git a/event-management/baguette-client/src/main/java/eu/melodic/event/baguette/client/cluster/ClusterManager.java b/event-management/baguette-client/src/main/java/eu/melodic/event/baguette/client/cluster/ClusterManager.java
index 9958b2744e127613a0bf2881772fcd8f999839cf..6080184df81d2aa77d763bb012189c6d1f4295f3 100644
--- a/event-management/baguette-client/src/main/java/eu/melodic/event/baguette/client/cluster/ClusterManager.java
+++ b/event-management/baguette-client/src/main/java/eu/melodic/event/baguette/client/cluster/ClusterManager.java
@@ -25,15 +25,20 @@ import io.atomix.protocols.raft.partition.RaftPartitionGroup;
import io.atomix.utils.net.Address;
import lombok.*;
import org.apache.commons.lang3.StringUtils;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.scheduling.TaskScheduler;
+import org.springframework.stereotype.Component;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.time.Duration;
import java.util.*;
import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.ScheduledFuture;
import java.util.stream.Collectors;
@Data
+@Component
@EqualsAndHashCode(callSuper = true)
public class ClusterManager extends AbstractLogBase {
@@ -54,6 +59,11 @@ public class ClusterManager extends AbstractLogBase {
@Setter(AccessLevel.NONE)
private BrokerUtil brokerUtil = null;
+ @Autowired
+ private TaskScheduler taskScheduler;
+ @Getter(AccessLevel.NONE) @Setter(AccessLevel.NONE)
+ private ScheduledFuture> checkerTask;
+
// ------------------------------------------------------------------------
public synchronized ClusterCLI getCli() {
@@ -184,6 +194,8 @@ public class ClusterManager extends AbstractLogBase {
log_info("CLM: {}: node={}", event.type(), event.subject().id().id());
brokerUtil.checkBroker();
}
+ if (callback!=null)
+ callback.clusterChanged(event);
}
});
@@ -197,6 +209,20 @@ public class ClusterManager extends AbstractLogBase {
if (startElection) {
brokerUtil.checkBroker();
}
+
+ // Start cluster checker
+ if (properties.isClusterCheckerEnabled()) {
+ long delay = Math.max(properties.getClusterCheckerDelay(), 10000L);
+ log_info("CLM: Starting cluster checker (delay: {})...", delay);
+ checkerTask = taskScheduler.scheduleWithFixedDelay(() -> {
+ if (brokerUtil != null)
+ brokerUtil.checkBrokerNumber();
+ else
+ log_warn("CLM: Cluster checker: BrokerUtil is NULL (is it a BUG?)");
+ }, delay);
+ } else {
+ log_warn("CLM: Cluster checker is DISABLED");
+ }
}
public void waitToJoin() {
@@ -204,6 +230,8 @@ public class ClusterManager extends AbstractLogBase {
if (isInitialized() && isRunning()) break;
try { Thread.sleep(500); } catch (InterruptedException e) { break; }
}
+ if (callback!=null)
+ callback.joinedCluster();
}
public void waitToJoin(long waitForMillis) {
@@ -214,9 +242,18 @@ public class ClusterManager extends AbstractLogBase {
long waitFor = Math.min(500, endTm-System.currentTimeMillis());
try { Thread.sleep(waitFor); } catch (InterruptedException e) { break; }
}
+ if (callback!=null)
+ callback.joinedCluster();
}
public void leaveCluster() {
+ // Stop cluster checker
+ if (checkerTask!=null && !checkerTask.isCancelled()) {
+ log_info("CLM: Stopping cluster checker...");
+ checkerTask.cancel(true);
+ checkerTask = null;
+ }
+
// Leave cluster
log_info("CLM: Leaving cluster...");
long startTm = System.currentTimeMillis();
@@ -226,6 +263,8 @@ public class ClusterManager extends AbstractLogBase {
log_debug("CLM: Left cluster in {}ms", endTm-startTm);
atomix = null;
brokerUtil = null;
+ if (callback!=null)
+ callback.leftCluster();
}
// ------------------------------------------------------------------------
diff --git a/event-management/baguette-client/src/main/java/eu/melodic/event/baguette/client/cluster/ClusterManagerProperties.java b/event-management/baguette-client/src/main/java/eu/melodic/event/baguette/client/cluster/ClusterManagerProperties.java
index 3c9972274df7fd5091097e0c1745bb1d12aafe2f..f1915f6cea799e7941c6ceea6982124560d24bda 100644
--- a/event-management/baguette-client/src/main/java/eu/melodic/event/baguette/client/cluster/ClusterManagerProperties.java
+++ b/event-management/baguette-client/src/main/java/eu/melodic/event/baguette/client/cluster/ClusterManagerProperties.java
@@ -38,6 +38,9 @@ public class ClusterManagerProperties {
private boolean joinOnInit = true;
private boolean electionOnJoin;
+ private boolean clusterCheckerEnabled = true;
+ private long clusterCheckerDelay = 30000L;
+
private boolean usePBInMg = true;
private boolean usePBInPg = true;
private String mgName = "system";
diff --git a/event-management/baguette-client/src/main/java/eu/melodic/event/baguette/client/cluster/TestCallback.java b/event-management/baguette-client/src/main/java/eu/melodic/event/baguette/client/cluster/TestCallback.java
index 90063cdc86613db405b2aba93ea1561d8f455a5e..016f363feae779836b0ec7a25f926890526e2ada 100644
--- a/event-management/baguette-client/src/main/java/eu/melodic/event/baguette/client/cluster/TestCallback.java
+++ b/event-management/baguette-client/src/main/java/eu/melodic/event/baguette/client/cluster/TestCallback.java
@@ -9,6 +9,7 @@
package eu.melodic.event.baguette.client.cluster;
+import io.atomix.cluster.ClusterMembershipEvent;
import io.atomix.cluster.Member;
import io.atomix.utils.net.Address;
@@ -20,6 +21,9 @@ public class TestCallback extends AbstractLogBase implements BrokerUtil.NodeCall
address = localAddress.toString();
}
+ public void joinedCluster() { }
+ public void leftCluster() { }
+
public void initialize() {
if ("L2".equals(state)) {
log_warn("__TestNode at {}: Already initialized: {}", address, state);
@@ -66,6 +70,10 @@ public class TestCallback extends AbstractLogBase implements BrokerUtil.NodeCall
log_info("__TestNode at {}: Status changed: {} --> {}", address, oldStatus, newStatus);
}
+ public void clusterChanged(ClusterMembershipEvent event) {
+ log_info("__TestNode at {}: Cluster changed: {}: {}", address, event.type(), event.subject().id().id());
+ }
+
public String getConfiguration(Member local) {
return String.format("ssl://%s:61617", local.address().host());
}
diff --git a/event-management/baguette-client/src/main/java/eu/melodic/event/baguette/client/collector/ClientCollectorContext.java b/event-management/baguette-client/src/main/java/eu/melodic/event/baguette/client/collector/ClientCollectorContext.java
new file mode 100644
index 0000000000000000000000000000000000000000..6a92a4b105d2191b30ec225e05c90acd67f20e7e
--- /dev/null
+++ b/event-management/baguette-client/src/main/java/eu/melodic/event/baguette/client/collector/ClientCollectorContext.java
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2017-2022 Institute of Communication and Computer Systems (imu.iccs.gr)
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public License, v2.0, unless
+ * Esper library is used, in which case it is subject to the terms of General Public License v2.0.
+ * If a copy of the MPL was not distributed with this file, you can obtain one at
+ * https://www.mozilla.org/en-US/MPL/2.0/
+ */
+
+package eu.melodic.event.baguette.client.collector;
+
+import eu.melodic.event.baguette.client.CommandExecutor;
+import eu.melodic.event.brokercep.event.EventMap;
+import eu.melodic.event.common.collector.CollectorContext;
+import eu.melodic.event.util.ClientConfiguration;
+import eu.melodic.event.util.GroupingConfiguration;
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.stereotype.Component;
+
+import java.io.Serializable;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+@Slf4j
+@Component
+@RequiredArgsConstructor
+public class ClientCollectorContext implements CollectorContext {
+ private final CommandExecutor commandExecutor;
+
+ public Map getGroupings() {
+ return commandExecutor.getGroupings();
+ }
+
+ @Override
+ public List getNodeConfigurations() {
+ return Collections.singletonList(commandExecutor.getClientConfiguration());
+ }
+
+ @Override
+ public Set getNodesWithoutClient() {
+ return commandExecutor.getClientConfiguration()!=null
+ ? commandExecutor.getClientConfiguration().getNodesWithoutClient() : null;
+ }
+
+ @Override
+ public boolean isAggregator() {
+ return commandExecutor.isAggregator();
+ }
+
+ @Override
+ public boolean sendEvent(String connectionString, String destinationName, EventMap event, boolean createDestination) {
+ return commandExecutor.sendEvent(connectionString, destinationName, event, createDestination);
+ }
+}
diff --git a/event-management/baguette-client/src/main/java/eu/melodic/event/baguette/client/collector/netdata/NetdataCollector.java b/event-management/baguette-client/src/main/java/eu/melodic/event/baguette/client/collector/netdata/NetdataCollector.java
index 6471af590e33d9c4184761b1257745b5b11c5ffb..f8eb53e5fab7c4071f613f6ff3b65f463988b974 100644
--- a/event-management/baguette-client/src/main/java/eu/melodic/event/baguette/client/collector/netdata/NetdataCollector.java
+++ b/event-management/baguette-client/src/main/java/eu/melodic/event/baguette/client/collector/netdata/NetdataCollector.java
@@ -10,59 +10,43 @@
package eu.melodic.event.baguette.client.collector.netdata;
import eu.melodic.event.baguette.client.Collector;
-import eu.melodic.event.baguette.client.CommandExecutor;
-import eu.melodic.event.brokercep.event.EventMap;
+import eu.melodic.event.baguette.client.collector.ClientCollectorContext;
+import eu.melodic.event.common.collector.CollectorContext;
+import eu.melodic.event.common.collector.netdata.NetdataCollectorProperties;
+import eu.melodic.event.util.EventBus;
import eu.melodic.event.util.GROUPING;
import eu.melodic.event.util.GroupingConfiguration;
-import lombok.RequiredArgsConstructor;
+import lombok.NonNull;
import lombok.extern.slf4j.Slf4j;
-import org.apache.commons.lang3.StringUtils;
-import org.springframework.beans.factory.InitializingBean;
-import org.springframework.http.HttpStatus;
-import org.springframework.http.ResponseEntity;
+import org.springframework.scheduling.TaskScheduler;
import org.springframework.stereotype.Component;
-import org.springframework.web.client.RestTemplate;
-import java.util.*;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
import java.util.stream.Collectors;
/**
- * Collects measurements from Netdata server
+ * Collects measurements from Netdata http server
*/
@Slf4j
@Component
-@RequiredArgsConstructor
-public class NetdataCollector implements Collector, InitializingBean, Runnable {
- private final NetdataCollectorProperties properties;
- private final CommandExecutor commandExecutor;
-
- private RestTemplate restTemplate = new RestTemplate();
- private boolean started;
- private Thread runner;
- private boolean running;
- private List allowedTopics;
- private Map topicMap;
-
- @Override
- public void afterPropertiesSet() {
- log.debug("Collectors::Netdata: properties: {}", properties);
- this.allowedTopics = properties.getAllowedTopics()==null
- ? null
- : properties.getAllowedTopics().stream()
- .map(s -> s.split(":")[0])
- .collect(Collectors.toList());
- this.topicMap = properties.getAllowedTopics()==null
- ? null
- : properties.getAllowedTopics().stream()
- .map(s -> s.split(":", 2))
- .collect(Collectors.toMap(a -> a[0], a -> a.length>1 ? a[1]: ""));
-
+public class NetdataCollector extends eu.melodic.event.common.collector.netdata.NetdataCollector implements Collector {
+ public NetdataCollector(@NonNull NetdataCollectorProperties properties,
+ @NonNull CollectorContext collectorContext,
+ @NonNull TaskScheduler taskScheduler,
+ @NonNull EventBus eventBus)
+ {
+ super(properties, collectorContext, taskScheduler, eventBus);
+ if (!(collectorContext instanceof ClientCollectorContext))
+ throw new IllegalArgumentException("Invalid CollectorContext provided. Expected: ClientCollectorContext, but got "+collectorContext.getClass().getName());
}
public synchronized void activeGroupingChanged(String oldGrouping, String newGrouping) {
HashSet topics = new HashSet<>();
for (String g : GROUPING.getNames()) {
- GroupingConfiguration grp = commandExecutor.getGroupings().get(g);
+ GroupingConfiguration grp = ((ClientCollectorContext)collectorContext).getGroupings().get(g);
if (grp!=null)
topics.addAll(grp.getEventTypeNames());
}
@@ -81,131 +65,4 @@ public class NetdataCollector implements Collector, InitializingBean, Runnable {
}
}
- public synchronized void start() {
- // check if already running
- if (started) {
- log.warn("Collectors::Netdata: Already started");
- return;
- }
-
- // check parameters
- if (properties==null || !properties.isEnable()) {
- log.warn("Collectors::Netdata: Collector not enabled");
- return;
- }
- if (properties.getDelay()<0) properties.setDelay(0);
- if (StringUtils.isBlank(properties.getUrl())) {
- String url = "http://127.0.0.1:19999/api/v1/allmetrics?format=json";
- log.debug("Collectors::Netdata: URL not specified. Assuming {}", url);
- properties.setUrl(url);
- }
-
- log.info("Collectors::Netdata: configuration: {}", properties);
-
- // start thread
- runner = new Thread(this, "baguette-client-collector-netdata-thread");
- runner.setDaemon(true);
- started = true;
- running = true;
- runner.start();
-
- log.info("Collectors::Netdata: Started");
- }
-
- public synchronized void stop() {
- if (!started) {
- log.warn("Collectors::Netdata: Not started");
- return;
- }
- running = false;
- // interrupt sleep
- runner.interrupt();
- }
-
- public void run() {
- if (!started) return;
-
- while (running && !Thread.currentThread().isInterrupted()) {
- try {
- // collect data
- collectAndPublishData();
-
- // sleep for 'delay' millis
- Thread.sleep(properties.getDelay());
- } catch (InterruptedException e) {
- log.warn("Collectors::Netdata: Interrupted");
- } catch (Throwable t) {
- log.warn("Collectors::Netdata: Exception: {}", t);
- }
- }
-
- synchronized (this) {
- log.info("Collectors::Netdata: Stopped");
- started = false;
- running = false;
- }
- }
-
- private void collectAndPublishData() {
- log.info("Collectors::Netdata: Collecting data: {}...", properties.getUrl());
- long startTm = System.currentTimeMillis();
- ResponseEntity response = restTemplate.getForEntity(properties.getUrl(), HashMap.class);
- long callEndTm = System.currentTimeMillis();
- log.trace("Collectors::Netdata: ...response: {}", response);
- if (response.getStatusCode()==HttpStatus.OK) {
- Map dataMap = response.getBody();
- boolean createTopic = properties.isCreateTopic();
- int countSuccess = 0;
- int countErrors = 0;
- log.trace("Collectors::Netdata: ...keys: {}", dataMap.keySet());
- for (Object key : dataMap.keySet()) {
- log.trace("Collectors::Netdata: ...Loop-1: key={}", key);
- if (key==null) continue;
- Map keyData = (Map)dataMap.get(key);
- log.trace("Collectors::Netdata: ...Loop-1: key-data={}", keyData);
- long timestamp = Long.parseLong( keyData.get("last_updated").toString() );
- Map dimensionsMap = (Map)keyData.get("dimensions");
-
- log.trace("Collectors::Netdata: ...Loop-1: ...dimensions-keys: {}", dimensionsMap.keySet());
- for (Object dimKey : dimensionsMap.keySet()) {
- log.trace("Collectors::Netdata: ...Loop-1: ...dimensions-key: {}", dimKey);
- if (dimKey==null) continue;
- String metricName = ("netdata."+key.toString()+"."+dimKey.toString()).replace(".", "__");
- log.trace("Collectors::Netdata: ...Loop-1: ...metric-name: {}", metricName);
- Map dimData = (Map)dimensionsMap.get(dimKey);
- Object valObj = dimData.get("value");
- log.trace("Collectors::Netdata: ...Loop-1: ...metric-value: {}", valObj);
- if (valObj!=null) {
- double metricValue = Double.parseDouble(valObj.toString());
- log.trace("Collectors::Netdata: {} = {}", metricName, metricValue);
- try {
- boolean createDestination = (createTopic || allowedTopics!=null && allowedTopics.contains(metricName));
- if (topicMap!=null) {
- String targetTopic = topicMap.get(metricName);
- if (targetTopic!=null && !targetTopic.isEmpty())
- metricName = targetTopic;
- }
- EventMap event = new EventMap(metricValue, 1, timestamp);
- log.debug("Collectors::Netdata: {}: {}", metricName, metricValue);
- if (commandExecutor.sendEvent(null, metricName, event, createDestination))
- countSuccess++;
- } catch (Exception e) {
- log.warn("Collectors::Netdata: Publishing netdata metric failed: ", e);
- countErrors++;
- }
- }
- }
-
- if (Thread.currentThread().isInterrupted()) break;
- }
- long endTm = System.currentTimeMillis();
- log.info("Collectors::Netdata: Collecting data...ok");
- log.info("Collectors::Netdata: Metrics: extracted={}, published={}, failed={}",
- countSuccess+countErrors, countSuccess, countErrors);
- log.info("Collectors::Netdata: Durations: rest-call={}, extract+publish={}, total={}",
- callEndTm-startTm, endTm-callEndTm, endTm-startTm);
- } else {
- log.warn("Collectors::Netdata: Collecting data...failed: Http Status: {}", response.getStatusCode());
- }
- }
}
diff --git a/event-management/baguette-client/src/main/java/eu/melodic/event/baguette/client/plugin/recovery/EmsClientRecoveryTask.java b/event-management/baguette-client/src/main/java/eu/melodic/event/baguette/client/plugin/recovery/EmsClientRecoveryTask.java
new file mode 100644
index 0000000000000000000000000000000000000000..8a7cacbe763b260a5301968a937f014db05473c7
--- /dev/null
+++ b/event-management/baguette-client/src/main/java/eu/melodic/event/baguette/client/plugin/recovery/EmsClientRecoveryTask.java
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2017-2022 Institute of Communication and Computer Systems (imu.iccs.gr)
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public License, v2.0, unless
+ * Esper library is used, in which case it is subject to the terms of General Public License v2.0.
+ * If a copy of the MPL was not distributed with this file, you can obtain one at
+ * https://www.mozilla.org/en-US/MPL/2.0/
+ */
+
+package eu.melodic.event.baguette.client.plugin.recovery;
+
+import eu.melodic.event.util.EventBus;
+import eu.melodic.event.util.PasswordUtil;
+import lombok.Getter;
+import lombok.NonNull;
+import lombok.extern.slf4j.Slf4j;
+import org.apache.commons.lang3.StringUtils;
+import org.springframework.beans.factory.annotation.Value;
+import org.springframework.scheduling.TaskScheduler;
+import org.springframework.stereotype.Component;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * EMS client (client-side) Self-Healing
+ */
+@Slf4j
+@Component
+public class EmsClientRecoveryTask extends VmNodeRecoveryTask {
+ @Getter
+ private final List recoveryCommands = Collections.unmodifiableList(Arrays.asList(
+ new RECOVERY_COMMAND("Initial wait...",
+ "pwd",0, 10000),
+ new RECOVERY_COMMAND("Sending baguette client kill command...",
+ "/opt/baguette-client/bin/kill.sh",0, 2000),
+ new RECOVERY_COMMAND("Sending baguette client start command...",
+ "/opt/baguette-client/bin/run.sh",0, 10000)
+ ));
+
+ @Value("${self.healing.recovery.file.baguette:}")
+ private String emsRecoveryFile;
+
+ public EmsClientRecoveryTask(@NonNull EventBus eventBus, @NonNull PasswordUtil passwordUtil, @NonNull TaskScheduler taskScheduler) {
+ super(eventBus, passwordUtil, taskScheduler);
+ }
+
+ public void runNodeRecovery() throws Exception {
+ if (StringUtils.isNotBlank(emsRecoveryFile))
+ runNodeRecovery(emsRecoveryFile);
+ else
+ runNodeRecovery(recoveryCommands);
+ }
+}
diff --git a/event-management/baguette-client/src/main/java/eu/melodic/event/baguette/client/plugin/recovery/NetdataAgentLocalRecoveryTask.java b/event-management/baguette-client/src/main/java/eu/melodic/event/baguette/client/plugin/recovery/NetdataAgentLocalRecoveryTask.java
new file mode 100644
index 0000000000000000000000000000000000000000..5ef1e707df689fc0dd917544baaae211c494ff2d
--- /dev/null
+++ b/event-management/baguette-client/src/main/java/eu/melodic/event/baguette/client/plugin/recovery/NetdataAgentLocalRecoveryTask.java
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2017-2022 Institute of Communication and Computer Systems (imu.iccs.gr)
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public License, v2.0, unless
+ * Esper library is used, in which case it is subject to the terms of General Public License v2.0.
+ * If a copy of the MPL was not distributed with this file, you can obtain one at
+ * https://www.mozilla.org/en-US/MPL/2.0/
+ */
+
+package eu.melodic.event.baguette.client.plugin.recovery;
+
+import eu.melodic.event.util.EventBus;
+import eu.melodic.event.util.PasswordUtil;
+import lombok.Getter;
+import lombok.NonNull;
+import lombok.extern.slf4j.Slf4j;
+import org.apache.commons.lang3.StringUtils;
+import org.springframework.beans.factory.annotation.Value;
+import org.springframework.scheduling.TaskScheduler;
+import org.springframework.stereotype.Component;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * Netdata agent (client-side) Self-Healing
+ */
+@Slf4j
+@Component
+public class NetdataAgentLocalRecoveryTask extends ShellRecoveryTask {
+ @Getter
+ private final List recoveryCommands = Collections.unmodifiableList(Arrays.asList(
+ new RECOVERY_COMMAND("Initial wait...",
+ "pwd",0, 5000),
+ new RECOVERY_COMMAND("Sending Netdata agent kill command...",
+ "sudo sh -c 'ps -U netdata -o \"pid\" --no-headers | xargs kill -9' ",0, 2000),
+ new RECOVERY_COMMAND("Sending Netdata agent start command...",
+ "sudo netdata",0, 10000)
+ ));
+
+ @Value("${self.healing.recovery.file.netdata:}")
+ private String netdataRecoveryFile;
+
+ public NetdataAgentLocalRecoveryTask(@NonNull EventBus eventBus, @NonNull PasswordUtil passwordUtil, @NonNull TaskScheduler taskScheduler) {
+ super(eventBus, taskScheduler);
+ }
+
+ public void runNodeRecovery() throws Exception {
+ if (StringUtils.isNotBlank(netdataRecoveryFile))
+ runNodeRecovery(netdataRecoveryFile);
+ else
+ runNodeRecovery(recoveryCommands);
+ }
+}
diff --git a/event-management/baguette-client/src/main/java/eu/melodic/event/baguette/client/plugin/recovery/NetdataAgentRecoveryTask.java b/event-management/baguette-client/src/main/java/eu/melodic/event/baguette/client/plugin/recovery/NetdataAgentRecoveryTask.java
new file mode 100644
index 0000000000000000000000000000000000000000..1b986d6c56c99687e132d50b4fc163476753de11
--- /dev/null
+++ b/event-management/baguette-client/src/main/java/eu/melodic/event/baguette/client/plugin/recovery/NetdataAgentRecoveryTask.java
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2017-2022 Institute of Communication and Computer Systems (imu.iccs.gr)
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public License, v2.0, unless
+ * Esper library is used, in which case it is subject to the terms of General Public License v2.0.
+ * If a copy of the MPL was not distributed with this file, you can obtain one at
+ * https://www.mozilla.org/en-US/MPL/2.0/
+ */
+
+package eu.melodic.event.baguette.client.plugin.recovery;
+
+import eu.melodic.event.util.EventBus;
+import eu.melodic.event.util.PasswordUtil;
+import lombok.Getter;
+import lombok.NonNull;
+import lombok.extern.slf4j.Slf4j;
+import org.apache.commons.lang3.StringUtils;
+import org.springframework.beans.factory.annotation.Value;
+import org.springframework.scheduling.TaskScheduler;
+import org.springframework.stereotype.Component;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * Netdata agent (client-side) Self-Healing
+ */
+@Slf4j
+@Component
+public class NetdataAgentRecoveryTask extends VmNodeRecoveryTask {
+ @Getter
+ private final List recoveryCommands = Collections.unmodifiableList(Arrays.asList(
+ new RECOVERY_COMMAND("Initial wait...",
+ "pwd",0, 5000),
+ new RECOVERY_COMMAND("Sending Netdata agent kill command...",
+ "sudo sh -c 'ps -U netdata -o \"pid\" --no-headers | xargs kill -9' ",0, 2000),
+ new RECOVERY_COMMAND("Sending Netdata agent start command...",
+ "sudo netdata",0, 10000)
+ ));
+
+ @Value("${self.healing.recovery.file.netdata:}")
+ private String netdataRecoveryFile;
+
+ public NetdataAgentRecoveryTask(@NonNull EventBus eventBus, @NonNull PasswordUtil passwordUtil, @NonNull TaskScheduler taskScheduler) {
+ super(eventBus, passwordUtil, taskScheduler);
+ }
+
+ public void runNodeRecovery() throws Exception {
+ if (StringUtils.isNotBlank(netdataRecoveryFile))
+ runNodeRecovery(netdataRecoveryFile);
+ else
+ runNodeRecovery(recoveryCommands);
+ }
+}
diff --git a/event-management/baguette-client/src/main/java/eu/melodic/event/baguette/client/plugin/recovery/NodeInfoHelper.java b/event-management/baguette-client/src/main/java/eu/melodic/event/baguette/client/plugin/recovery/NodeInfoHelper.java
new file mode 100644
index 0000000000000000000000000000000000000000..dd7071c8b0a231205c54148d11ce90af6ee056b9
--- /dev/null
+++ b/event-management/baguette-client/src/main/java/eu/melodic/event/baguette/client/plugin/recovery/NodeInfoHelper.java
@@ -0,0 +1,74 @@
+/*
+ * Copyright (C) 2017-2022 Institute of Communication and Computer Systems (imu.iccs.gr)
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public License, v2.0, unless
+ * Esper library is used, in which case it is subject to the terms of General Public License v2.0.
+ * If a copy of the MPL was not distributed with this file, you can obtain one at
+ * https://www.mozilla.org/en-US/MPL/2.0/
+ */
+
+package eu.melodic.event.baguette.client.plugin.recovery;
+
+import com.google.gson.Gson;
+import eu.melodic.event.baguette.client.BaguetteClientProperties;
+import eu.melodic.event.baguette.client.CommandExecutor;
+import eu.melodic.event.util.EventBus;
+import eu.melodic.event.util.PasswordUtil;
+import lombok.Data;
+import lombok.NonNull;
+import lombok.RequiredArgsConstructor;
+import lombok.SneakyThrows;
+import lombok.extern.slf4j.Slf4j;
+import org.apache.commons.lang3.StringUtils;
+import org.springframework.beans.factory.annotation.Value;
+import org.springframework.scheduling.TaskScheduler;
+import org.springframework.stereotype.Component;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.concurrent.ScheduledFuture;
+
+/**
+ * Node Info helper -- Retrieves node info from EMS server and caches them
+ */
+@Slf4j
+@Component
+@RequiredArgsConstructor
+public class NodeInfoHelper {
+ private final CommandExecutor commandExecutor;
+ private final HashMap nodeInfoCache = new HashMap<>();
+ private final Gson gson = new Gson();
+
+ @SneakyThrows
+ public Map getNodeInfo(String nodeId, @NonNull String nodeAddress) {
+ log.debug("NodeInfoHelper: getNodeInfo(): BEGIN: node-id={}, node-address={}", nodeId, nodeAddress);
+
+ // Get cached node info
+ Map nodeInfo = nodeInfoCache.get(nodeAddress);
+
+ if (nodeInfo==null) {
+ // Get node info from EMS server
+ try {
+ log.debug("NodeInfoHelper: getNodeInfo(): Querying EMS server for Node Info: id={}, address={}", nodeId, nodeAddress);
+ commandExecutor.executeCommand("SEND SERVER-GET-NODE-SSH-CREDENTIALS " + nodeAddress);
+ String response = commandExecutor.getLastInputLine();
+ log.debug("NodeInfoHelper: getNodeInfo(): Node Info from EMS server: id={}, address={}\n{}", nodeId, nodeAddress, response);
+ if (StringUtils.isNotBlank(response)) {
+ nodeInfo = gson.fromJson(response, Map.class);
+ }
+ nodeInfoCache.put(nodeAddress, nodeInfo);
+ } catch (Exception ex) {
+ log.error("NodeInfoHelper: getNodeInfo(): Exception while querying for node info: node-id={}, node-address={}\n", nodeId, nodeAddress, ex);
+ throw ex;
+ }
+ }
+ log.debug("NodeInfoHelper: getNodeInfo(): Node info: {}", nodeInfo);
+ return nodeInfo;
+ }
+
+ public void remove(String nodeId, @NonNull String nodeAddress) {
+ log.debug("NodeInfoHelper: remove(): node-id={}, node-address={}", nodeId, nodeAddress);
+ Map nodeInfo = nodeInfoCache.remove(nodeAddress);
+ log.trace("NodeInfoHelper: remove(): Removed: node-id={}, node-address={}, node-info={}", nodeId, nodeAddress, nodeInfo);
+ }
+}
diff --git a/event-management/baguette-client/src/main/java/eu/melodic/event/baguette/client/plugin/recovery/RECOVERY_COMMAND.java b/event-management/baguette-client/src/main/java/eu/melodic/event/baguette/client/plugin/recovery/RECOVERY_COMMAND.java
new file mode 100644
index 0000000000000000000000000000000000000000..727499505c8968e29b2a7b0d3f42cb4b84fd739f
--- /dev/null
+++ b/event-management/baguette-client/src/main/java/eu/melodic/event/baguette/client/plugin/recovery/RECOVERY_COMMAND.java
@@ -0,0 +1,20 @@
+/*
+ * Copyright (C) 2017-2022 Institute of Communication and Computer Systems (imu.iccs.gr)
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public License, v2.0, unless
+ * Esper library is used, in which case it is subject to the terms of General Public License v2.0.
+ * If a copy of the MPL was not distributed with this file, you can obtain one at
+ * https://www.mozilla.org/en-US/MPL/2.0/
+ */
+
+package eu.melodic.event.baguette.client.plugin.recovery;
+
+import lombok.Data;
+
+@Data
+class RECOVERY_COMMAND {
+ private final String name;
+ private final String command;
+ private final long waitBefore;
+ private final long waitAfter;
+}
diff --git a/event-management/baguette-client/src/main/java/eu/melodic/event/baguette/client/plugin/recovery/RecoveryTask.java b/event-management/baguette-client/src/main/java/eu/melodic/event/baguette/client/plugin/recovery/RecoveryTask.java
new file mode 100644
index 0000000000000000000000000000000000000000..87e5dc1cb0da1235262f0a20efbf09869a5ecbbb
--- /dev/null
+++ b/event-management/baguette-client/src/main/java/eu/melodic/event/baguette/client/plugin/recovery/RecoveryTask.java
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2017-2022 Institute of Communication and Computer Systems (imu.iccs.gr)
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public License, v2.0, unless
+ * Esper library is used, in which case it is subject to the terms of General Public License v2.0.
+ * If a copy of the MPL was not distributed with this file, you can obtain one at
+ * https://www.mozilla.org/en-US/MPL/2.0/
+ */
+
+package eu.melodic.event.baguette.client.plugin.recovery;
+
+import com.google.gson.Gson;
+import com.google.gson.reflect.TypeToken;
+
+import java.io.FileReader;
+import java.lang.reflect.Type;
+import java.nio.file.Paths;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Client-side Self-Healing task
+ */
+public interface RecoveryTask {
+ Map getNodeInfo();
+ void setNodeInfo(Map nodeInfo);
+
+ List getRecoveryCommands();
+
+ void runNodeRecovery() throws Exception;
+
+ void runNodeRecovery(List recoveryCommandsList) throws Exception;
+
+ default void runNodeRecovery(String recoveryCommandsFile) throws Exception {
+ try (FileReader reader = new FileReader(Paths.get(recoveryCommandsFile).toFile())) {
+ Type listType = new TypeToken>(){}.getType();
+ List recoveryCommandsList = new Gson().fromJson(reader, listType);
+ runNodeRecovery(recoveryCommandsList);
+ }
+ }
+}
diff --git a/event-management/baguette-client/src/main/java/eu/melodic/event/baguette/client/plugin/recovery/SelfHealingPlugin.java b/event-management/baguette-client/src/main/java/eu/melodic/event/baguette/client/plugin/recovery/SelfHealingPlugin.java
new file mode 100644
index 0000000000000000000000000000000000000000..33be7c8fe12bd951e16edd4bf43d65f0aa78a7ab
--- /dev/null
+++ b/event-management/baguette-client/src/main/java/eu/melodic/event/baguette/client/plugin/recovery/SelfHealingPlugin.java
@@ -0,0 +1,302 @@
+/*
+ * Copyright (C) 2017-2022 Institute of Communication and Computer Systems (imu.iccs.gr)
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public License, v2.0, unless
+ * Esper library is used, in which case it is subject to the terms of General Public License v2.0.
+ * If a copy of the MPL was not distributed with this file, you can obtain one at
+ * https://www.mozilla.org/en-US/MPL/2.0/
+ */
+
+package eu.melodic.event.baguette.client.plugin.recovery;
+
+import eu.melodic.event.baguette.client.BaguetteClientProperties;
+import eu.melodic.event.baguette.client.CommandExecutor;
+import eu.melodic.event.baguette.client.collector.netdata.NetdataCollector;
+import eu.melodic.event.util.EventBus;
+import eu.melodic.event.util.PasswordUtil;
+import eu.melodic.event.util.Plugin;
+import io.atomix.cluster.ClusterMembershipEvent;
+import lombok.NonNull;
+import lombok.RequiredArgsConstructor;
+import lombok.SneakyThrows;
+import lombok.extern.slf4j.Slf4j;
+import org.apache.commons.lang3.StringUtils;
+import org.springframework.beans.factory.InitializingBean;
+import org.springframework.beans.factory.annotation.Value;
+import org.springframework.context.ApplicationContext;
+import org.springframework.scheduling.TaskScheduler;
+import org.springframework.stereotype.Component;
+
+import java.net.InetAddress;
+import java.net.NetworkInterface;
+import java.net.SocketException;
+import java.time.Duration;
+import java.time.Instant;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.concurrent.ScheduledFuture;
+import java.util.concurrent.atomic.AtomicInteger;
+
+/**
+ * Client-side Self-Healing plugin
+ */
+@Slf4j
+@Component
+@RequiredArgsConstructor
+public class SelfHealingPlugin implements Plugin, InitializingBean, EventBus.EventConsumer {
+ private final ApplicationContext applicationContext;
+ private final BaguetteClientProperties properties;
+ private final CommandExecutor commandExecutor;
+ private final EventBus eventBus;
+ private final PasswordUtil passwordUtil;
+ private final NodeInfoHelper nodeInfoHelper;
+
+ final static String SELF_HEALING_RECOVERY_FAILED = "SELF_HEALING_RECOVERY_FAILED";
+ final static String SELF_HEALING_RECOVERY_COMPLETED = "SELF_HEALING_RECOVERY_COMPLETED";
+
+ private boolean started;
+
+ private final HashMap> waitingTasks = new HashMap<>();
+ private final TaskScheduler taskScheduler;
+
+ @Value("${self.healing.enabled:true}")
+ private boolean enabled;
+ @Value("${self.healing.recovery.delay:10000}")
+ private long clientRecoveryDelay;
+ @Value("${self.healing.recovery.retry.wait:60000}")
+ private long clientRecoveryRetryDelay;
+ @Value("${self.healing.recovery.max.retries:3}")
+ private int clientRecoveryMaxRetries;
+
+ @Override
+ public void afterPropertiesSet() {
+ log.debug("SelfHealingPlugin: properties: {}", properties);
+ }
+
+ public synchronized void start() {
+ // check if already running
+ if (started) {
+ log.warn("SelfHealingPlugin: Already started");
+ return;
+ }
+
+ eventBus.subscribe(CommandExecutor.EVENT_CLUSTER_NODE_ADDED, this);
+ eventBus.subscribe(CommandExecutor.EVENT_CLUSTER_NODE_REMOVED, this);
+ eventBus.subscribe(NetdataCollector.NETDATA_NODE_PAUSED, this);
+ eventBus.subscribe(NetdataCollector.NETDATA_NODE_RESUMED, this);
+ log.info("SelfHealingPlugin: Started");
+ }
+
+ public synchronized void stop() {
+ if (!started) {
+ log.warn("SelfHealingPlugin: Not started");
+ return;
+ }
+
+ eventBus.unsubscribe(CommandExecutor.EVENT_CLUSTER_NODE_ADDED, this);
+ eventBus.unsubscribe(CommandExecutor.EVENT_CLUSTER_NODE_REMOVED, this);
+ eventBus.unsubscribe(NetdataCollector.NETDATA_NODE_PAUSED, this);
+ eventBus.unsubscribe(NetdataCollector.NETDATA_NODE_RESUMED, this);
+
+ // Cancel all waiting recovery tasks
+ waitingTasks.forEach((nodeAddress,future) -> {
+ future.cancel(true);
+ });
+ waitingTasks.clear();
+ log.info("SelfHealingPlugin: Stopped");
+ }
+
+ @Override
+ public void onMessage(String topic, Object message, Object sender) {
+ log.debug("SelfHealingPlugin: onMessage(): BEGIN: topic={}, message={}, sender={}", topic, message, sender);
+ if (!enabled) return;
+
+ // Self-Healing for EMS clients
+ if (CommandExecutor.EVENT_CLUSTER_NODE_REMOVED.equals(topic)) {
+ log.debug("SelfHealingPlugin: onMessage(): CLUSTER NODE REMOVED: message={}", message);
+ processClusterNodeRemovedEvent(message);
+ } else
+ if (CommandExecutor.EVENT_CLUSTER_NODE_ADDED.equals(topic)) {
+ log.debug("SelfHealingPlugin: onMessage(): CLUSTER NODE ADDED: message={}", message);
+ processClusterNodeAddedEvent(message);
+ } else
+
+ // Self-healing for Netdata agents
+ if (NetdataCollector.NETDATA_NODE_PAUSED.equals(topic)) {
+ log.debug("SelfHealingPlugin: onMessage(): NETDATA NODE PAUSED: message={}", message);
+ processNetdataNodePausedEvent(message);
+ } else
+ if (NetdataCollector.NETDATA_NODE_RESUMED.equals(topic)) {
+ log.debug("SelfHealingPlugin: onMessage(): NETDATA NODE RESUMED: message={}", message);
+ processNetdataNodeResumedEvent(message);
+ } else
+
+ // Unsupported message
+ {
+ log.debug("SelfHealingPlugin: onMessage(): Unsupported message: topic={}, message={}, sender={}",
+ topic, message, sender);
+ }
+ }
+
+ // ------------------------------------------------------------------------
+
+ private void processClusterNodeRemovedEvent(Object message) {
+ log.debug("SelfHealingPlugin: processClusterNodeRemovedEvent(): BEGIN: message={}", message);
+ if (message instanceof ClusterMembershipEvent) {
+ // Get removed node id and address
+ ClusterMembershipEvent event = (ClusterMembershipEvent)message;
+ String nodeId = event.subject().id().id();
+ String nodeAddress = event.subject().address().host();
+ log.debug("SelfHealingPlugin: processClusterNodeRemovedEvent(): node-id={}, node-address={}", nodeId, nodeAddress);
+ if (StringUtils.isBlank(nodeAddress)) {
+ log.warn("SelfHealingPlugin: processClusterNodeRemovedEvent(): Node address is missing. Cannot recover node. Initial message: {}", event);
+ return;
+ }
+
+ createRecoveryTask(nodeId, nodeAddress, EmsClientRecoveryTask.class);
+ } else {
+ log.warn("SelfHealingPlugin: processClusterNodeRemovedEvent(): Message is not a {} object. Will ignore it.", ClusterMembershipEvent.class.getSimpleName());
+ }
+ }
+
+ private void processClusterNodeAddedEvent(Object message) {
+ log.debug("SelfHealingPlugin: processClusterNodeAddedEvent(): BEGIN: message={}", message);
+ if (message instanceof ClusterMembershipEvent) {
+ // Get added node id and address
+ ClusterMembershipEvent event = (ClusterMembershipEvent)message;
+ String nodeId = event.subject().id().id();
+ String nodeAddress = event.subject().address().host();
+ log.debug("SelfHealingPlugin: processClusterNodeAddedEvent(): node-id={}, node-address={}", nodeId, nodeAddress);
+ if (StringUtils.isBlank(nodeAddress)) {
+ log.warn("SelfHealingPlugin: processClusterNodeAddedEvent(): Node address is missing. Initial message: {}", event);
+ return;
+ }
+
+ // Cancel any waiting recovery task
+ cancelRecoveryTask(nodeId, nodeAddress, false);
+ } else {
+ log.warn("SelfHealingPlugin: processClusterNodeAddedEvent(): Message is not a {} object. Will ignore it.", ClusterMembershipEvent.class.getSimpleName());
+ }
+ }
+
+ // ------------------------------------------------------------------------
+
+ private void processNetdataNodePausedEvent(Object message) {
+ log.debug("SelfHealingPlugin: processNetdataNodePausedEvent(): BEGIN: message={}", message);
+ if (!(message instanceof Map)) {
+ log.warn("SelfHealingPlugin: processNetdataNodePausedEvent(): Message is not a {} object. Will ignore it.", Map.class.getSimpleName());
+ return;
+ }
+
+ // Get paused node address
+ Object addressValue = ((Map) message).getOrDefault("address", null);
+ log.debug("SelfHealingPlugin: processNetdataNodePausedEvent(): node-address={}", addressValue);
+ if (addressValue==null) {
+ log.warn("SelfHealingPlugin: processNetdataNodePausedEvent(): Node address is missing. Cannot recover node. Initial message: {}", message);
+ return;
+ }
+ String nodeAddress = addressValue.toString();
+
+ if (isLocalAddress(nodeAddress)) {
+ // We are responsible for recovering our local Netdata agent
+ createRecoveryTask(null, "", NetdataAgentLocalRecoveryTask.class);
+ } else {
+ // Aggregator is responsible for recovering remote Netdata agents
+ createRecoveryTask(null, nodeAddress, NetdataAgentRecoveryTask.class);
+ }
+ }
+
+ @SneakyThrows
+ private boolean isLocalAddress(String address) {
+ if (address.isEmpty()) return true;
+ if ("127.0.0.1".equals(address)) return true;
+ if ("::1".equals(address)) return true;
+ if ("0:0:0:0:0:0:0:1".equals(address)) return true;
+ InetAddress ia = InetAddress.getByName(address);
+ if (ia.isAnyLocalAddress() || ia.isLoopbackAddress()) return true;
+ try {
+ return NetworkInterface.getByInetAddress(ia) != null;
+ } catch (SocketException se) {
+ return false;
+ }
+ }
+
+ private void processNetdataNodeResumedEvent(Object message) {
+ log.debug("SelfHealingPlugin: processNetdataNodeResumedEvent(): BEGIN: message={}", message);
+ if (!(message instanceof Map)) {
+ log.warn("SelfHealingPlugin: processNetdataNodeResumedEvent(): Message is not a {} object. Will ignore it.", Map.class.getSimpleName());
+ return;
+ }
+
+ // Get resumed node address
+ String nodeAddress = ((Map) message).getOrDefault("address", "").toString();
+ log.debug("SelfHealingPlugin: processNetdataNodeResumedEvent(): node-address={}", nodeAddress);
+ /*if (StringUtils.isBlank(nodeAddress)) {
+ log.warn("SelfHealingPlugin: processNetdataNodeResumedEvent(): Node address is missing. Initial message: {}", message);
+ return;
+ }*/
+
+ // Cancel any waiting recovery task
+ cancelRecoveryTask(null, nodeAddress, false);
+ }
+
+ // ------------------------------------------------------------------------
+
+ private void createRecoveryTask(String nodeId, @NonNull String nodeAddress, @NonNull Class extends RecoveryTask> recoveryTaskClass) {
+ // Check if a recovery task has already been scheduled
+ synchronized (waitingTasks) {
+ if (waitingTasks.containsKey(nodeAddress)) {
+ log.warn("SelfHealingPlugin: createRecoveryTask(): Recovery has already been scheduled for Node: id={}, address={}", nodeId, nodeAddress);
+ return;
+ }
+ waitingTasks.put(nodeAddress, null);
+ }
+
+ // Get node info and credentials from EMS server
+ Map nodeInfo = null;
+ if (StringUtils.isNotBlank(nodeAddress)) {
+ nodeInfo = nodeInfoHelper.getNodeInfo(nodeId, nodeAddress);
+ if (nodeInfo == null || nodeInfo.size() == 0) {
+ log.warn("SelfHealingPlugin: createRecoveryTask(): Node info is null or empty. Cannot recover node.");
+ return;
+ }
+ log.trace("SelfHealingPlugin: createRecoveryTask(): Node info retrieved for node: id={}, address={}, node-info:\n{}", nodeId, nodeAddress, nodeInfo);
+ } else {
+ log.debug("SelfHealingPlugin: createRecoveryTask(): Node address is blank. Node info will not be retrieved: id={}, address={}", nodeId, nodeAddress);
+ }
+
+ // Schedule node recovery task
+ final RecoveryTask recoveryTask = applicationContext.getBean(recoveryTaskClass);
+ if (nodeInfo!=null && nodeInfo.size()>0)
+ recoveryTask.setNodeInfo(nodeInfo);
+ AtomicInteger retries = new AtomicInteger(0);
+ ScheduledFuture> future = taskScheduler.scheduleWithFixedDelay(() -> {
+ try {
+ log.info("SelfHealingPlugin: Retry #{}: Recovering node: id={}, address={}", retries.get(), nodeId, nodeAddress);
+ recoveryTask.runNodeRecovery();
+ //NOTE: 'recoveryTask.runNodeRecovery()' must send SELF_HEALING_RECOVERY_COMPLETED or _FAILED event
+ if (retries.getAndIncrement() > clientRecoveryMaxRetries) {
+ log.warn("SelfHealingPlugin: Max retries reached. No more recovery retries for node: id={}, address={}", nodeId, nodeAddress);
+ cancelRecoveryTask(nodeId, nodeAddress, true);
+ }
+ } catch (Exception e) {
+ log.error("SelfHealingPlugin: EXCEPTION while recovering node: node-info={} -- Exception: ", recoveryTask.getNodeInfo(), e);
+ eventBus.send(SELF_HEALING_RECOVERY_FAILED, nodeAddress);
+ }
+ }, Instant.now().plusMillis(clientRecoveryDelay), Duration.ofMillis(clientRecoveryRetryDelay));
+ waitingTasks.put(nodeAddress, future);
+ log.info("SelfHealingPlugin: createRecoveryTask(): Created recovery task for Node: id={}, address={}", nodeId, nodeAddress);
+ }
+
+ private void cancelRecoveryTask(String nodeId, @NonNull String nodeAddress, boolean retainAddress) {
+ synchronized (waitingTasks) {
+ ScheduledFuture> future = retainAddress ? waitingTasks.put(nodeAddress, null) : waitingTasks.remove(nodeAddress);
+ if (future != null) {
+ future.cancel(true);
+ nodeInfoHelper.remove(nodeId, nodeAddress);
+ log.info("SelfHealingPlugin: cancelRecoveryTask(): Cancelled recovery task for Node: id={}, address={}", nodeId, nodeAddress);
+ } else
+ log.warn("SelfHealingPlugin: cancelRecoveryTask(): No recovery task is scheduled for Node: id={}, address={}", nodeId, nodeAddress);
+ }
+ }
+}
diff --git a/event-management/baguette-client/src/main/java/eu/melodic/event/baguette/client/plugin/recovery/ShellRecoveryTask.java b/event-management/baguette-client/src/main/java/eu/melodic/event/baguette/client/plugin/recovery/ShellRecoveryTask.java
new file mode 100644
index 0000000000000000000000000000000000000000..05cae3af2b992d3bc6dc53fc9eb3e15685649ad6
--- /dev/null
+++ b/event-management/baguette-client/src/main/java/eu/melodic/event/baguette/client/plugin/recovery/ShellRecoveryTask.java
@@ -0,0 +1,113 @@
+/*
+ * Copyright (C) 2017-2022 Institute of Communication and Computer Systems (imu.iccs.gr)
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public License, v2.0, unless
+ * Esper library is used, in which case it is subject to the terms of General Public License v2.0.
+ * If a copy of the MPL was not distributed with this file, you can obtain one at
+ * https://www.mozilla.org/en-US/MPL/2.0/
+ */
+
+package eu.melodic.event.baguette.client.plugin.recovery;
+
+import eu.melodic.event.util.EventBus;
+import lombok.*;
+import lombok.extern.slf4j.Slf4j;
+import org.apache.commons.lang3.StringUtils;
+import org.springframework.scheduling.TaskScheduler;
+import org.springframework.stereotype.Component;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.time.Instant;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.atomic.AtomicBoolean;
+
+import static eu.melodic.event.baguette.client.plugin.recovery.SelfHealingPlugin.SELF_HEALING_RECOVERY_COMPLETED;
+
+/**
+ * Client-side, Local-node Self-Healing
+ */
+@Slf4j
+@Component
+@RequiredArgsConstructor
+public class ShellRecoveryTask implements RecoveryTask {
+ @NonNull private final EventBus eventBus;
+ @NonNull private final TaskScheduler taskScheduler;
+
+ @Getter @Setter
+ private Map nodeInfo;
+
+ public void setNodeInfo(@NonNull Map nodeInfo) {
+ this.nodeInfo = nodeInfo;
+ }
+
+ @SneakyThrows
+ public List getRecoveryCommands() {
+ throw new Exception("Method not implemented. Use 'runNodeRecovery(List)' instead");
+ }
+
+ public void runNodeRecovery() throws Exception {
+ throw new Exception("Method not implemented. Use 'runNodeRecovery(List)' instead");
+ }
+
+ public void runNodeRecovery(List recoveryCommands) throws Exception {
+ log.debug("ShellRecoveryTask: runNodeRecovery(): node-info={}", nodeInfo);
+
+ // Carrying out recovery commands
+ log.info("ShellRecoveryTask: runNodeRecovery(): Executing {} recovery commands", recoveryCommands.size());
+ for (RECOVERY_COMMAND command : recoveryCommands) {
+ if (command==null || StringUtils.isBlank(command.getCommand())) continue;
+
+ waitFor(command.getWaitBefore(), command.getName());
+
+ // Run command as a local process
+ log.warn("############## {}...", command.getName());
+ Process process = Runtime.getRuntime().exec(command.getCommand());
+
+ // Redirect SSH output to standard output
+ final AtomicBoolean closed = new AtomicBoolean(false);
+ redirectShellOutput(process.getInputStream(), "OUT", closed);
+ redirectShellOutput(process.getErrorStream(), "ERR", closed);
+
+ waitFor(command.getWaitAfter(), command.getName());
+
+ closed.set(true);
+ //if (process.isAlive()) process.destroyForcibly();
+ }
+ log.info("ShellRecoveryTask: runNodeRecovery(): Executed {} recovery commands", recoveryCommands.size());
+
+ // Send recovery complete event
+ eventBus.send(SELF_HEALING_RECOVERY_COMPLETED, "");
+ }
+
+ private void waitFor(long millis, String description) {
+ if (millis>0) {
+ log.warn("############## Waiting for {}ms after {}...", millis, description);
+ try { Thread.sleep(millis); } catch (InterruptedException e) { }
+ }
+ }
+
+ private void redirectShellOutput(InputStream in, String id, AtomicBoolean closed) {
+ taskScheduler.schedule(() -> {
+ try {
+ //IoUtils.copy(in, System.out);
+ try (BufferedReader reader = new BufferedReader(new InputStreamReader(in))) {
+ while (reader.ready()) {
+ log.info(" {}> {}", id, reader.readLine());
+ }
+ }
+ } catch (IOException e) {
+ if (closed.get()) {
+ log.info("ShellRecoveryTask: redirectShellOutput(): Connection closed: id={}", id);
+ } else {
+ log.error("ShellRecoveryTask: redirectShellOutput(): Exception while copying Process IN stream: id={}\n", id, e);
+ }
+ }
+ },
+ Instant.now()
+ );
+ }
+}
diff --git a/event-management/baguette-client/src/main/java/eu/melodic/event/baguette/client/plugin/recovery/VmNodeRecoveryTask.java b/event-management/baguette-client/src/main/java/eu/melodic/event/baguette/client/plugin/recovery/VmNodeRecoveryTask.java
new file mode 100644
index 0000000000000000000000000000000000000000..4ab05b5ca3dd5632881e087cec679995c51049d9
--- /dev/null
+++ b/event-management/baguette-client/src/main/java/eu/melodic/event/baguette/client/plugin/recovery/VmNodeRecoveryTask.java
@@ -0,0 +1,189 @@
+/*
+ * Copyright (C) 2017-2022 Institute of Communication and Computer Systems (imu.iccs.gr)
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public License, v2.0, unless
+ * Esper library is used, in which case it is subject to the terms of General Public License v2.0.
+ * If a copy of the MPL was not distributed with this file, you can obtain one at
+ * https://www.mozilla.org/en-US/MPL/2.0/
+ */
+
+package eu.melodic.event.baguette.client.plugin.recovery;
+
+import eu.melodic.event.baguette.client.BaguetteClientProperties;
+import eu.melodic.event.baguette.client.Sshc;
+import eu.melodic.event.util.EventBus;
+import eu.melodic.event.util.PasswordUtil;
+import lombok.*;
+import lombok.extern.slf4j.Slf4j;
+import org.apache.commons.lang3.StringUtils;
+import org.springframework.scheduling.TaskScheduler;
+import org.springframework.stereotype.Component;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.time.Instant;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.atomic.AtomicBoolean;
+
+import static eu.melodic.event.baguette.client.plugin.recovery.SelfHealingPlugin.SELF_HEALING_RECOVERY_COMPLETED;
+
+/**
+ * Client-side, VM-node Self-Healing
+ */
+@Slf4j
+@Component
+@RequiredArgsConstructor
+public class VmNodeRecoveryTask implements RecoveryTask {
+ @NonNull private final EventBus eventBus;
+ @NonNull private final PasswordUtil passwordUtil;
+ @NonNull private final TaskScheduler taskScheduler;
+
+ @Getter @Setter
+ private Map nodeInfo;
+
+ private BaguetteClientProperties baguetteClientProperties;
+
+ public void setNodeInfo(@NonNull Map nodeInfo) {
+ this.nodeInfo = nodeInfo;
+ this.baguetteClientProperties = createBaguetteClientProperties();
+ }
+
+ @SneakyThrows
+ public List getRecoveryCommands() {
+ throw new Exception("Method not implemented. Use 'runNodeRecovery(List)' instead");
+ }
+
+ public void runNodeRecovery() throws Exception {
+ throw new Exception("Method not implemented. Use 'runNodeRecovery(List)' instead");
+ }
+
+ public void runNodeRecovery(List recoveryCommands) throws Exception {
+ log.debug("VmNodeRecoveryTask: runNodeRecovery(): node-info={}", nodeInfo);
+
+ // Connect to Node (VM)
+ Sshc sshc = connectToNode();
+
+ // Redirect SSH output to standard output
+ final AtomicBoolean closed = new AtomicBoolean(false);
+ redirectSshOutput(sshc.getIn(), "OUT", closed);
+
+ // Carrying out recovery commands
+ log.info("VmNodeRecoveryTask: runNodeRecovery(): Executing {} recovery commands", recoveryCommands.size());
+ for (RECOVERY_COMMAND command : recoveryCommands) {
+ if (command==null || StringUtils.isBlank(command.getCommand())) continue;
+
+ waitFor(command.getWaitBefore(), command.getName());
+ log.warn("############## {}...", command.getName());
+ sshc.getOut().println(command.getCommand());
+ waitFor(command.getWaitAfter(), command.getName());
+ }
+ log.info("VmNodeRecoveryTask: runNodeRecovery(): Executed {} recovery commands", recoveryCommands.size());
+
+ // Disconnect from node
+ disconnectFromNode(sshc, closed);
+
+ // Send recovery complete event
+ eventBus.send(SELF_HEALING_RECOVERY_COMPLETED, baguetteClientProperties.getServerAddress());
+ }
+
+ private String str(Object o) {
+ if (o==null) return "";
+ return o.toString();
+ }
+
+ private void waitFor(long millis, String description) {
+ if (millis>0) {
+ log.warn("############## Waiting for {}ms after {}...", millis, description);
+ try { Thread.sleep(millis); } catch (InterruptedException e) { }
+ }
+ }
+
+ private BaguetteClientProperties createBaguetteClientProperties() {
+ log.debug("VmNodeRecoveryTask: createBaguetteClientProperties(): node-info={}", nodeInfo);
+
+ // Extract connection info and credentials
+ String os = str(nodeInfo.get("operatingSystem"));
+ String address = str(nodeInfo.get("address"));
+ String type = str(nodeInfo.get("type"));
+ String portStr = str(nodeInfo.get("ssh.port"));
+ String username = str(nodeInfo.get("ssh.username"));
+ String password = str(nodeInfo.get("ssh.password"));
+ String key = str(nodeInfo.get("ssh.key"));
+ String fingerprint = str(nodeInfo.get("ssh.fingerprint"));
+ int port = 22;
+ try {
+ if (StringUtils.isNotBlank(portStr))
+ port = Integer.parseInt(portStr);
+ if (port<1 || port>65535)
+ port = 22;
+ } catch (Exception e) {}
+
+ log.debug("VmNodeRecoveryTask: createBaguetteClientProperties(): os={}, address={}, type={}", os, address, type);
+ log.debug("VmNodeRecoveryTask: createBaguetteClientProperties(): username={}, password={}", username, passwordUtil.encodePassword(password));
+ log.debug("VmNodeRecoveryTask: createBaguetteClientProperties(): fingerprint={}, key={}", fingerprint, passwordUtil.encodePassword(key));
+
+ // Connect to node and restart Baguette Client
+ BaguetteClientProperties config = new BaguetteClientProperties();
+ config.setServerAddress(address);
+ config.setServerPort(port);
+ config.setServerUsername(username);
+ if (!password.isEmpty()) {
+ config.setServerPassword(password);
+ }
+ if (!key.isEmpty()) {
+ config.setServerPubkey(key);
+ config.setServerFingerprint(fingerprint);
+ }
+
+ //XXX:TODO: Make recovery authTimeout configurable
+ config.setAuthTimeout(60000);
+
+ return config;
+ }
+
+ private Sshc connectToNode() throws IOException {
+ Sshc sshc = new Sshc();
+ sshc.setConfiguration(baguetteClientProperties);
+ //XXX:TODO: Try enabling server key verification
+ sshc.setUseServerKeyVerifier(false);
+ log.info("VmNodeRecoveryTask: connectToNode(): Connecting to node using SSH: address={}, port={}, username={}",
+ baguetteClientProperties.getServerAddress(), baguetteClientProperties.getServerPort(), baguetteClientProperties.getServerUsername());
+ sshc.start();
+ log.debug("VmNodeRecoveryTask: connectToNode(): Connected to node: address={}, port={}, username={}",
+ baguetteClientProperties.getServerAddress(), baguetteClientProperties.getServerPort(), baguetteClientProperties.getServerUsername());
+ return sshc;
+ }
+
+ private void disconnectFromNode(Sshc sshc, AtomicBoolean closed) throws IOException {
+ log.info("VmNodeRecoveryTask: disconnectFromNode(): Disconnecting from node: address={}, port={}, username={}",
+ baguetteClientProperties.getServerAddress(), baguetteClientProperties.getServerPort(), baguetteClientProperties.getServerUsername());
+ closed.set(true);
+ sshc.stop();
+ log.debug("VmNodeRecoveryTask: disconnectFromNode(): Disconnected from node: address={}, port={}, username={}",
+ baguetteClientProperties.getServerAddress(), baguetteClientProperties.getServerPort(), baguetteClientProperties.getServerUsername());
+ }
+
+ private void redirectSshOutput(InputStream in, String id, AtomicBoolean closed) {
+ taskScheduler.schedule(() -> {
+ try {
+ //IoUtils.copy(sshc.getIn(), System.out);
+ try (BufferedReader reader = new BufferedReader(new InputStreamReader(in))) {
+ while (reader.ready()) {
+ log.info(" {}> {}", id, reader.readLine());
+ }
+ }
+ } catch (IOException e) {
+ if (closed.get()) {
+ log.info("VmNodeRecoveryTask: redirectSshOutput(): Connection closed: id={}", id);
+ } else {
+ log.error("VmNodeRecoveryTask: redirectSshOutput(): Exception while copying SSH IN stream: id={}\n", id, e);
+ }
+ }
+ },
+ Instant.now()
+ );
+ }
+}
diff --git a/event-management/baguette-server/src/main/java/eu/melodic/event/baguette/server/BaguetteServer.java b/event-management/baguette-server/src/main/java/eu/melodic/event/baguette/server/BaguetteServer.java
index b96b75e878eafa8b7af6ffdf551ffb9b72b640da..28cee7c6675ce66364c56cc7a48e296bf7627cb6 100644
--- a/event-management/baguette-server/src/main/java/eu/melodic/event/baguette/server/BaguetteServer.java
+++ b/event-management/baguette-server/src/main/java/eu/melodic/event/baguette/server/BaguetteServer.java
@@ -12,30 +12,37 @@ package eu.melodic.event.baguette.server;
import eu.melodic.event.baguette.server.properties.BaguetteServerProperties;
import eu.melodic.event.brokercep.BrokerCepService;
import eu.melodic.event.translate.TranslationContext;
-import eu.melodic.event.util.FunctionDefinition;
-import eu.melodic.event.util.PasswordUtil;
+import eu.melodic.event.util.*;
import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;
+import org.apache.commons.lang3.RandomStringUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.text.StringSubstitutor;
+import org.slf4j.event.Level;
+import org.springframework.beans.factory.InitializingBean;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import java.io.IOException;
+import java.net.InetAddress;
+import java.net.UnknownHostException;
import java.util.*;
+import java.util.stream.Collectors;
/**
* Baguette Server
*/
@Slf4j
@Service
-public class BaguetteServer {
+public class BaguetteServer implements InitializingBean {
@Autowired
private BaguetteServerProperties config;
@Autowired
private PasswordUtil passwordUtil;
@Autowired
private NodeRegistry nodeRegistry;
+ @Autowired
+ private EventBus eventBus;
private Sshd server;
@@ -48,17 +55,56 @@ public class BaguetteServer {
private String upperwareBrokerUrl;
private BrokerCepService brokerCepService;
+ @Override
+ public void afterPropertiesSet() {
+ // Generate a new, random username/password pair and add it to provided credentials
+ generateUsernamePassword();
+ }
+
+ private void generateUsernamePassword() {
+ String genUsername = "user-"+UUID.randomUUID();
+ String genPassword = RandomStringUtils.randomAlphanumeric(32, 64);
+ CredentialsMap credentials = config.getCredentials();
+ credentials.put(genUsername, genPassword, true);
+ log.info("BaguetteServer.afterPropertiesSet(): Generated new Baguette Server username/password: username={}, password={}", genUsername,
+ credentials.getPasswordEncoder()!=null ? credentials.getPasswordEncoder().encode(genPassword) : "****");
+ }
+
// Configuration getter methods
public Set getGroupingNames() {
+ return getGroupingNames(true);
+ }
+
+ public Set getGroupingNames(boolean removeUpperware) {
Set groupings = new HashSet<>();
groupings.addAll(groupingTopicsMap.keySet());
groupings.addAll(groupingRulesMap.keySet());
groupings.addAll(topicConnections.keySet());
// remove upperware grouping (i.e. GLOBAL)
- groupings.remove(upperwareGrouping);
+ if (removeUpperware) groupings.remove(upperwareGrouping);
return groupings;
}
+ private List getGroupingsSorted(boolean removeUpperware, boolean ascending) {
+ List list = getGroupingNames(removeUpperware).stream()
+ .map(GROUPING::valueOf)
+ .sorted()
+ .collect(Collectors.toList());
+ if (ascending) Collections.reverse(list);
+ return list;
+ }
+
+ private List getGroupingNamesSorted(boolean removeUpperware, boolean ascending) {
+ return getGroupingsSorted(removeUpperware, ascending).stream()
+ .map(GROUPING::name)
+ .collect(Collectors.toList());
+ }
+
+ private String getLowestLevelGroupingName() {
+ List list = getGroupingNamesSorted(false, true);
+ return list.size()>0 ? list.get(0) : null;
+ }
+
public BaguetteServerProperties getConfiguration() {
return config;
}
@@ -105,7 +151,8 @@ public class BaguetteServer {
log.info("BaguetteServer.startServer(): Starting SSH server instance...");
nodeRegistry.setCoordinator(coordinator);
Sshd server = new Sshd();
- server.start(config, coordinator);
+ server.start(config, coordinator, eventBus, nodeRegistry);
+ server.setNodeRegistry(getNodeRegistry());
this.server = server;
log.info("BaguetteServer.startServer(): Starting SSH server instance... done");
} else {
@@ -264,24 +311,155 @@ public class BaguetteServer {
server.sendToClient(clientId, command);
}
- public Object readFromClient(String clientId, String command) {
- return server.readFromClient(clientId, command);
+ public void sendToActiveClusters(String command) {
+ server.sendToActiveClusters(command);
+ }
+
+ public void sendToCluster(String clusterId, String command) {
+ server.sendToCluster(clusterId, command);
+ }
+
+ public Object readFromClient(String clientId, String command, Level logLevel) {
+ return server.readFromClient(clientId, command, logLevel);
}
public List getActiveClients() {
- return server.getActiveClients();
+ return ClientShellCommand.getActive().stream()
+ .map(c -> {
+ NodeRegistryEntry entry = getNodeRegistryEntryFromClientShellCommand(c);
+ return formatClientList(c, entry);
+ })
+ .sorted()
+ .collect(Collectors.toList());
}
public Map> getActiveClientsMap() {
- return server.getActiveClientsMap();
+ return ClientShellCommand.getActive().stream()
+ .map(c -> {
+ NodeRegistryEntry entry = getNodeRegistryEntryFromClientShellCommand(c);
+ return prepareClientMap(c, entry);
+ })
+ .sorted(Comparator.comparing(m -> m.get("id")))
+ .collect(Collectors.toMap(m -> m.get("id"), m -> m,
+ (u,v) -> { throw new IllegalStateException(String.format("Duplicate key %s", u)); },
+ LinkedHashMap::new));
+ }
+
+ private NodeRegistryEntry getNodeRegistryEntryFromClientShellCommand(ClientShellCommand c) {
+ NodeRegistryEntry entry = c.getNodeRegistryEntry();
+ if (entry==null)
+ entry = getNodeRegistry().getNodeByAddress(c.getClientIpAddress());
+ log.debug("getNodeRegistryEntryFromClientShellCommand: CSC ip-address: {}", c.getClientIpAddress());
+ log.debug("getNodeRegistryEntryFromClientShellCommand: CSC NR entry: {}", entry!=null ? entry.getPreregistration() : null);
+ /*if (entry==null) {
+ log.warn("getNodeRegistryEntryFromClientShellCommand: WARN: ** NOT SECURE ** CSC client-id: {}", c.getClientId());
+ entry = getNodeRegistry().getNodeByClientId(c.getClientId());
+ log.debug("getNodeRegistryEntryFromClientShellCommand: WARN: ** NOT SECURE ** CSC NR entry: {}", entry!=null ? entry.getPreregistration() : null);
+ }*/
+ return entry;
+ }
+
+ public List getNodesWithoutClient() {
+ return createClientList(new HashSet<>(Collections.singletonList(NodeRegistryEntry.STATE.NOT_INSTALLED)));
+ }
+
+ public Map> getNodesWithoutClientMap() {
+ return createClientMap(new HashSet<>(Collections.singletonList(NodeRegistryEntry.STATE.NOT_INSTALLED)));
+ }
+
+ public List getIgnoredNodes() {
+ return createClientList(new HashSet<>(Collections.singletonList(NodeRegistryEntry.STATE.IGNORE_NODE)));
+ }
+
+ public Map> getIgnoredNodesMap() {
+ return createClientMap(new HashSet<>(Collections.singletonList(NodeRegistryEntry.STATE.IGNORE_NODE)));
+ }
+
+ public List getPassiveNodes() {
+ return createClientList(new HashSet<>(Arrays.asList(NodeRegistryEntry.STATE.NOT_INSTALLED, NodeRegistryEntry.STATE.IGNORE_NODE)));
+ }
+
+ public Map> getPassiveNodesMap() {
+ return createClientMap(new HashSet<>(Arrays.asList(NodeRegistryEntry.STATE.NOT_INSTALLED, NodeRegistryEntry.STATE.IGNORE_NODE)));
+ }
+
+ private List createClientList(Set states) {
+ return nodeRegistry.getNodes().stream()
+ .filter(entry->states.contains(entry.getState()))
+ .map(entry -> {
+ log.debug("createClientList: Node ip-address: {}", entry.getIpAddress());
+ log.debug("createClientList: Node preregistration info: {}", entry.getPreregistration());
+ ClientShellCommand c = getClientShellCommandFromNodeRegistryEntry(entry);
+ return formatClientList(c, entry);
+ })
+ .sorted()
+ .collect(Collectors.toList());
+ }
+
+ private Map> createClientMap(Set states) {
+ return nodeRegistry.getNodes().stream()
+ .filter(entry -> states.contains(entry.getState()))
+ .sorted(Comparator.comparing(NodeRegistryEntry::getClientId))
+ .collect(Collectors.toMap(NodeRegistryEntry::getClientId, entry -> {
+ log.debug("createClientMap: Node ip-address: {}", entry.getIpAddress());
+ log.debug("createClientMap: Node preregistration info: {}", entry.getPreregistration());
+ ClientShellCommand c = getClientShellCommandFromNodeRegistryEntry(entry);
+ return prepareClientMap(c, entry);
+ }, (u,v) -> { throw new IllegalStateException(String.format("Duplicate key %s", u)); }, LinkedHashMap::new));
+ }
+
+ private ClientShellCommand getClientShellCommandFromNodeRegistryEntry(NodeRegistryEntry entry) {
+ return StringUtils.isNotBlank(entry.getIpAddress())
+ ? ClientShellCommand.getActiveByIpAddress(entry.getIpAddress()) : null;
+ }
+
+ private String formatClientList(ClientShellCommand c, NodeRegistryEntry entry) {
+ final StringBuilder sb = new StringBuilder();
+ prepareClientMap(c, entry).forEach((k,v)->{
+ if ("id".equals(k)) sb.append(v);
+ else if ("node-port".equals(k)) sb.append(":").append(v);
+ else sb.append(" ").append(v);
+ });
+ return sb.toString();
+ }
+
+ private Map prepareClientMap(ClientShellCommand c, NodeRegistryEntry entry) {
+ String address = entry!=null ? entry.getIpAddress() : c.getClientIpAddress();
+ String hostname = entry!=null ? entry.getHostname() : null;
+ if (StringUtils.isBlank(hostname)) {
+ if (c!=null)
+ hostname = c.getClientClusterNodeHostname();
+ if (StringUtils.isBlank(hostname)) {
+ try {
+ hostname = InetAddress.getByName(address).getHostName();
+ } catch (Exception e) {
+ log.warn("Failed to resolve client hostname from IP address: {}\n", address, e);
+ }
+ }
+ if (StringUtils.isNotBlank(hostname)) {
+ if (c!=null) c.setClientClusterNodeHostname(hostname);
+ if (entry!=null) entry.setHostname(hostname);
+ }
+ }
+ Map properties = new LinkedHashMap<>();
+ properties.put("id", c!=null ? c.getId() : entry.getClientId());
+ properties.put("ip-address", address);
+ properties.put("node-hostname", c!=null ? c.getClientClusterNodeHostname() : hostname);
+ properties.put("node-port", Integer.toString(c!=null ? c.getClientClusterNodePort() : -1));
+ properties.put("node-status", c!=null ? c.getClientNodeStatus() : null);
+ properties.put("node-zone", (entry!=null && entry.getClusterZone()!=null) ? entry.getClusterZone().getId() : null); //c.getClientZone()!=null ? c.getClientZone().getId() : null
+ properties.put("grouping", c!=null ? c.getClientGrouping() : (entry.getState()==NodeRegistryEntry.STATE.NOT_INSTALLED ? getLowestLevelGroupingName() : null));
+ properties.put("reference", entry!=null ? entry.getReference() : null);
+ properties.put("node-id", c!=null ? c.getClientProperty("node-id") : null);
+ properties.put("node-state", entry!=null && entry.getState()!=null ? entry.getState().toString() : null);
+ return properties;
}
public void sendConstants(Map constants) {
server.sendConstants(constants);
}
- //XXX: TODO: do actual node registration with Server coordinator. More information might be needed or returned.
- public String registerClient(Map nodeInfoMap) {
+ public NodeRegistryEntry registerClient(Map nodeInfoMap) throws UnknownHostException {
log.debug("BaguetteServer.registerClient(): node-info={}", nodeInfoMap);
Map nodeInfo = new HashMap<>(nodeInfoMap);
@@ -302,9 +480,6 @@ public class BaguetteServer {
log.debug("BaguetteServer.registerClient(): client-id={}", clientId);
// Add node info into node registry
- nodeInfo.put("baguette-client-id", clientId);
- nodeRegistry.addNode(nodeInfo);
-
- return clientId;
+ return nodeRegistry.addNode(nodeInfo, clientId);
}
}
diff --git a/event-management/baguette-server/src/main/java/eu/melodic/event/baguette/server/ClientShellCommand.java b/event-management/baguette-server/src/main/java/eu/melodic/event/baguette/server/ClientShellCommand.java
index 34368618d0d65365bd8007bd957e78a8ba958052..7d26f99b5a09d8c48d4d39850860a38ca1db301e 100644
--- a/event-management/baguette-server/src/main/java/eu/melodic/event/baguette/server/ClientShellCommand.java
+++ b/event-management/baguette-server/src/main/java/eu/melodic/event/baguette/server/ClientShellCommand.java
@@ -9,8 +9,13 @@
package eu.melodic.event.baguette.server;
+import com.google.gson.Gson;
+import eu.melodic.event.util.ClientConfiguration;
+import eu.melodic.event.util.EventBus;
+import eu.melodic.event.baguette.server.coordinator.cluster.IClusterZone;
import eu.melodic.event.util.GroupingConfiguration;
import lombok.Getter;
+import lombok.NonNull;
import lombok.Setter;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
@@ -20,6 +25,7 @@ import org.apache.sshd.server.ExitCallback;
import org.apache.sshd.server.SessionAware;
import org.apache.sshd.server.session.ServerSession;
import org.cryptacular.util.CertUtil;
+import org.slf4j.event.Level;
import java.io.*;
import java.net.InetSocketAddress;
@@ -28,6 +34,7 @@ import java.security.cert.X509Certificate;
import java.util.*;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicLong;
+import java.util.stream.Collectors;
@Slf4j
public class ClientShellCommand implements Command, Runnable, SessionAware {
@@ -35,12 +42,21 @@ public class ClientShellCommand implements Command, Runnable, SessionAware {
private final static Object LOCK = new Object();
private final static AtomicLong counter = new AtomicLong(0);
private final static Set activeCmdList = new HashSet<>();
+ private final static Map activeCmdMap = new HashMap<>();
private final static long INPUT_CHECK_DELAY = 100;
public static Set getActive() {
return Collections.unmodifiableSet(activeCmdList);
}
+ public static Set getActiveIds() {
+ return Collections.unmodifiableSet(activeCmdMap.keySet());
+ }
+
+ public static ClientShellCommand getActiveByIpAddress(String address) {
+ return activeCmdMap.get(address);
+ }
+
private InputStream in;
private PrintStream out;
private PrintStream err;
@@ -65,7 +81,10 @@ public class ClientShellCommand implements Command, Runnable, SessionAware {
@Getter @Setter private int clientClusterNodePort;
@Getter @Setter private String clientClusterNodeAddress;
@Getter @Setter private String clientClusterNodeHostname;
- @Getter @Setter private Object clientZone;
+ @Getter @Setter private IClusterZone clientZone;
+ @Getter private String clientNodeStatus;
+ @Getter private String clientGrouping;
+ private final Properties clientProperties = new Properties();
private final ServerCoordinator coordinator;
private final boolean clientAddressOverrideAllowed;
@@ -75,19 +94,29 @@ public class ClientShellCommand implements Command, Runnable, SessionAware {
private boolean closeConnection = false;
private Map inputsMap = new HashMap<>();
+ private EventBus eventBus;
+ @Getter
+ private Exception lastException;
+ @Getter
+ private NodeRegistry nodeRegistry;
+ @Getter @Setter
+ private NodeRegistryEntry nodeRegistryEntry;
- public ClientShellCommand(ServerCoordinator coordinator, boolean allowClientOverrideItsAddress) {
+ public ClientShellCommand(ServerCoordinator coordinator, boolean allowClientOverrideItsAddress, EventBus eventBus, NodeRegistry registry) {
synchronized (LOCK) {
id = String.format("#%05d", counter.getAndIncrement());
}
this.coordinator = coordinator;
this.clientAddressOverrideAllowed = allowClientOverrideItsAddress;
+ this.eventBus = eventBus;
+ this.nodeRegistry = registry;
}
public void setSession(ServerSession session) {
log.info("{}--> Got session : {}", id, session);
this.session = session;
-
+ eventBus.send("BAGUETTE_SERVER_CLIENT_SESSION_STARTED", this);
+
/*try {
String clientIpAddr = ((InetSocketAddress)session.getIoSession().getRemoteAddress()).getAddress().getHostAddress();
int clientPort = ((InetSocketAddress)session.getIoSession().getRemoteAddress()).getPort();
@@ -123,6 +152,7 @@ public class ClientShellCommand implements Command, Runnable, SessionAware {
public void run() {
if (closeConnection) {
log.warn("{}--> Exiting immediately because 'closeConnection' flag is set", id);
+ eventBus.send("BAGUETTE_SERVER_CLIENT_SESSION_CLOSING_IMMEDIATELY", this);
coordinator.unregister(this);
if (this.session!=null && this.session.isOpen()) {
try {
@@ -136,12 +166,17 @@ public class ClientShellCommand implements Command, Runnable, SessionAware {
callback.onExit(2);
}
log.info("{}--> Thread stopped immediately", id);
+ eventBus.send("BAGUETTE_SERVER_CLIENT_SESSION_CLOSED_IMMEDIATELY", this);
return;
}
synchronized (activeCmdList) {
+ if (activeCmdMap.containsKey(getClientIpAddress()) || activeCmdMap.containsValue(this))
+ throw new IllegalArgumentException("ClientShellCommand has already been registered");
activeCmdList.add(this);
+ activeCmdMap.put(getClientIpAddress(), this);
}
+ eventBus.send("BAGUETTE_SERVER_CLIENT_STARTING", this);
try {
log.info("{}==> Thread started", id);
@@ -153,7 +188,7 @@ public class ClientShellCommand implements Command, Runnable, SessionAware {
String line;
while ((line = reader.readLine()) != null) {
line = line.trim();
- log.info("{}--> {}", id, line);
+ log.debug("{}--> {}", id, line);
//if (echoOn) out.printf("CLIENT (%s) : ECHO : %s\n", id, line);
if (echoOn) out.printf("ECHO %s\n", line);
@@ -162,32 +197,84 @@ public class ClientShellCommand implements Command, Runnable, SessionAware {
if (line.startsWith("-HELLO FROM CLIENT:")) {
getClientInfoFromGreeting(line.substring("-HELLO FROM CLIENT:".length()));
coordinator.register(this);
+ eventBus.send("BAGUETTE_SERVER_CLIENT_REGISTERED", this);
} else if (line.startsWith("-INPUT:")) {
String input = line.substring("-INPUT:".length());
String[] part = input.split(":",2 );
inputsMap.put(part[0].trim(), deserializeFromString(part[1]));
+ } else if (StringUtils.startsWithIgnoreCase(line, "SERVER-")) {
+ String[] lineArgs = line.split(" ", 2);
+ if ("SERVER-GET-NODE-SSH-CREDENTIALS".equalsIgnoreCase(lineArgs[0].trim()) && lineArgs.length>1) {
+ String nodeAddress = lineArgs[1].trim();
+ if (!nodeAddress.isEmpty()) {
+ NodeRegistryEntry entry = nodeRegistry.getNodeByAddress(nodeAddress);
+ if (entry!=null) {
+ Map preregInfo = entry.getPreregistration();
+ log.debug("{}--> NODE PRE-REGISTRATION INFO: address={}\n{}", getId(), nodeAddress, preregInfo);
+
+ if (preregInfo!=null) {
+ String preregInfoStr = new Gson().toJson(preregInfo);
+ log.trace("{}--> NODE PRE-REGISTRATION INFO STRING: STR={}\n{}", getId(), nodeAddress, preregInfoStr);
+ sendToClient(preregInfoStr);
+ } else {
+ log.warn("{}--> NO PRE-REGISTRATION INFO FOR NODE: {}", getId(), nodeAddress);
+ sendToClient("{}");
+ }
+ } else {
+ log.warn("{}--> UNKNOWN NODE: {}", getId(), nodeAddress);
+ sendToClient("{}");
+ }
+ }
+ }
+ } else if (line.startsWith("-NOTIFY-GROUPING-CHANGE:")) {
+ String newGrouping = line.substring("-NOTIFY-GROUPING-CHANGE:".length()).trim();
+ log.info("{}--> Client grouping changed: {} --> {}", getId(), clientGrouping, newGrouping);
+ if (StringUtils.isNotBlank(newGrouping) && ! StringUtils.equals(clientGrouping, newGrouping))
+ this.clientGrouping = newGrouping;
+ } else if (line.startsWith("-NOTIFY-STATUS-CHANGE:")) {
+ String newNodeStatus = line.substring("-NOTIFY-STATUS-CHANGE:".length()).trim();
+ log.info("{}--> Client status changed: {} --> {}", getId(), clientNodeStatus, newNodeStatus);
+ if (StringUtils.isNotBlank(newNodeStatus) && ! StringUtils.equals(clientNodeStatus, newNodeStatus))
+ this.clientNodeStatus = newNodeStatus;
+ } else if (line.startsWith("-CLIENT-PROPERTY-CHANGE:")) {
+ String[] part = line.substring("-CLIENT-PROPERTY-CHANGE:".length()).trim().split(" ", 2);
+ String propertyName = part[0];
+ String propertyValue = part.length>1 ? part[1] : null;
+ String oldValue = clientProperties.getProperty(propertyName);
+ if (StringUtils.isNotBlank(propertyName)) {
+ log.info("{}--> Client property changed: {} = {} --> {}", getId(), propertyName, oldValue, propertyValue);
+ clientProperties.put(propertyName.trim(), propertyValue);
+ } else {
+ log.warn("{}--> Invalid Client property: input line: ", line);
+ }
} else if (line.equalsIgnoreCase("READY")) {
coordinator.clientReady(this);
} else {
coordinator.processClientInput(this, line);
}
}
+ eventBus.send("BAGUETTE_SERVER_CLIENT_EXITING", this);
log.info("{}==> Signaling client to exit", id);
out.println("EXIT");
- } catch (IOException ex) {
- log.warn("{}==> EXCEPTION : {}", id, ex);
+ } catch (Exception ex) {
+ log.warn("{}==> EXCEPTION : ", id, ex);
out.printf("EXCEPTION %s\n", ex);
+ this.lastException = ex;
+ eventBus.send("BAGUETTE_SERVER_CLIENT_EXCEPTION", this);
} finally {
synchronized (activeCmdList) {
activeCmdList.remove(this);
+ activeCmdMap.remove(getClientIpAddress());
}
log.info("{}--> Thread stops", id);
coordinator.unregister(this);
+ eventBus.send("BAGUETTE_SERVER_CLIENT_UNREGISTERED", this);
if (!callbackCalled.getAndSet(true)) {
callback.onExit(0);
}
+ eventBus.send("BAGUETTE_SERVER_CLIENT_EXITED", this);
}
}
@@ -319,9 +406,34 @@ public class ClientShellCommand implements Command, Runnable, SessionAware {
return clientPort;
}
+ public String getClientProperty(@NonNull String propertyName) { return clientProperties.getProperty(propertyName); }
+ public String getClientProperty(@NonNull String propertyName, String defaultValue) { return clientProperties.getProperty(propertyName, defaultValue); }
+
+ public NodeRegistryEntry getNodeRegistryEntry() {
+ if (nodeRegistryEntry!=null)
+ return nodeRegistryEntry;
+
+ //XXX:BUG: Following code seems not working...
+ String clientId = getClientId();
+ if (StringUtils.isNotBlank(clientId)) {
+ return nodeRegistry.getNodeByClientId(clientId);
+ }
+ return null;
+ }
+
public void sendToClient(String msg) {
+ sendToClient(msg, Level.INFO);
+ }
+
+ public void sendToClient(String msg, Level logLevel) {
if (msg == null || (msg = msg.trim()).isEmpty()) return;
- log.info("{}==> PUSH : {}", id, msg);
+ switch (logLevel) {
+ case TRACE: log.trace("{}==> PUSH : {}", id, msg); break;
+ case DEBUG: log.debug("{}==> PUSH : {}", id, msg); break;
+ case WARN: log.warn("{}==> PUSH : {}", id, msg); break;
+ case ERROR: log.error("{}==> PUSH : {}", id, msg); break;
+ default: log.info("{}==> PUSH : {}", id, msg);
+ }
out.println(msg);
}
@@ -329,17 +441,25 @@ public class ClientShellCommand implements Command, Runnable, SessionAware {
sendToClient(cmd);
}
+ public void sendCommand(String cmd, Level logLevel) {
+ sendToClient(cmd, logLevel);
+ }
+
public void sendCommand(String[] cmd) {
sendToClient(String.join(" ", cmd));
}
- public Object readFromClient(String cmd) {
+ public void sendCommand(String[] cmd, Level logLevel) {
+ sendToClient(String.join(" ", cmd), logLevel);
+ }
+
+ public Object readFromClient(String cmd, Level logLevel) {
String uuid = UUID.randomUUID().toString();
log.trace("ClientShellCommand.readFromClient: uuid={}, cmd={}", uuid, cmd);
Object oldValue = inputsMap.remove(uuid);
log.trace("ClientShellCommand.readFromClient: uuid={}, old-inputMap-value={}", uuid, oldValue);
log.trace("ClientShellCommand.readFromClient: uuid={}, inputMap-BEFORE={}", uuid, inputsMap);
- sendCommand(cmd+" "+uuid);
+ sendCommand(cmd+" "+uuid, logLevel);
log.trace("ClientShellCommand.readFromClient: uuid={}, Command sent to client", uuid);
while (!inputsMap.containsKey(uuid)) {
log.trace("ClientShellCommand.readFromClient: uuid={}, No input, waiting 500ms", uuid);
@@ -376,7 +496,7 @@ public class ClientShellCommand implements Command, Runnable, SessionAware {
/**
* Write an object to a Base64 string.
*/
- protected String serializeToString(Serializable o) throws IOException {
+ public static String serializeToString(Serializable o) throws IOException {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
ObjectOutputStream oos = new ObjectOutputStream(baos);
oos.writeObject(o);
@@ -387,7 +507,7 @@ public class ClientShellCommand implements Command, Runnable, SessionAware {
/**
* Read the object from Base64 string.
*/
- protected Object unserializeFromString(String s) throws IOException, ClassNotFoundException {
+ public static Object unserializeFromString(String s) throws IOException, ClassNotFoundException {
byte[] data = Base64.getDecoder().decode(s);
ObjectInputStream ois = new ObjectInputStream(new ByteArrayInputStream(data));
Object o = ois.readObject();
@@ -395,6 +515,36 @@ public class ClientShellCommand implements Command, Runnable, SessionAware {
return o;
}
+ public static void sendClientConfigurationToClients(@NonNull ClientConfiguration cc, @NonNull List clients) {
+ List clientIds = clients.stream().map(ClientShellCommand::getClientId).collect(Collectors.toList());
+ log.debug("sendClientConfigurationToClients: clients={}, client-config={}", clientIds, cc);
+ try {
+ String ccStr = serializeToString(cc);
+ log.debug("sendClientConfigurationToClients: Serialization of Client configuration: {}", ccStr);
+ ccStr = "SET-CLIENT-CONFIG " + ccStr;
+ for (ClientShellCommand csc : clients) {
+ log.info("sendClientConfigurationToClients: Sending Client configuration to client: {}", csc.getClientId());
+ csc.sendToClient(ccStr);
+ }
+ log.info("sendClientConfigurationToClients: Client configuration sent to clients: {}", clientIds);
+ } catch (IOException ex) {
+ log.error("sendClientConfigurationToClients: Exception while serializing Client configuration: ", ex);
+ log.error("sendClientConfigurationToClients: SET-CLIENT-CONFIG command *NOT* sent to clients");
+ }
+ }
+
+ public void sendClientConfiguration(ClientConfiguration cc) {
+ log.debug("sendClientConfiguration: id={}, client-config={}", id, cc);
+ try {
+ String ccStr = serializeToString(cc);
+ log.info("sendClientConfiguration: Serialization of Client configuration: {}", ccStr);
+ sendToClient("SET-CLIENT-CONFIG " + ccStr);
+ } catch (IOException ex) {
+ log.error("sendClientConfiguration: Exception while serializing Client configuration: ", ex);
+ log.error("sendClientConfiguration: SET-CLIENT-CONFIG command *NOT* sent to client");
+ }
+ }
+
public void sendGroupingConfiguration(String grouping, Map connectionConfigs, BaguetteServer server) {
GroupingConfiguration gc = GroupingConfigurationHelper.newGroupingConfiguration(grouping, connectionConfigs, server);
sendGroupingConfiguration(gc);
diff --git a/event-management/baguette-server/src/main/java/eu/melodic/event/baguette/server/NodeRegistry.java b/event-management/baguette-server/src/main/java/eu/melodic/event/baguette/server/NodeRegistry.java
index 3c4537b133e531713f2c25a1b21861f815ca84e8..f703130b3517d253b3f107ea186bf97cb9d1968e 100644
--- a/event-management/baguette-server/src/main/java/eu/melodic/event/baguette/server/NodeRegistry.java
+++ b/event-management/baguette-server/src/main/java/eu/melodic/event/baguette/server/NodeRegistry.java
@@ -15,9 +15,12 @@ import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.springframework.stereotype.Service;
+import java.net.InetAddress;
+import java.net.UnknownHostException;
import java.util.Collection;
import java.util.LinkedHashMap;
import java.util.Map;
+import java.util.stream.Collectors;
/**
* Node Registry
@@ -29,9 +32,26 @@ public class NodeRegistry {
@Getter @Setter
private ServerCoordinator coordinator;
- public synchronized void addNode(Map nodeInfo) {
- String ipAddress = getIpAddressFromNodeInfo(nodeInfo);
+ public synchronized NodeRegistryEntry addNode(Map nodeInfo, String clientId) throws UnknownHostException {
+ String hostnameOrAddress = getIpAddressFromNodeInfo(nodeInfo);
+ String ipAddress = hostnameOrAddress;
+
+ // Get IP address from provided hostname or address
+ try {
+ log.debug("NodeRegistry.addNode(): Resolving IP address from provided hostname/address: {}", hostnameOrAddress);
+ InetAddress host = InetAddress.getByName(hostnameOrAddress);
+ log.trace("NodeRegistry.addNode(): InetAddress for provided hostname/address: {}, InetAddress: {}", hostnameOrAddress, host);
+ String resolvedIpAddress = host.getHostAddress();
+ log.info("NodeRegistry.addNode(): Provided-Address={}, Resolved-IP-Address={}", hostnameOrAddress, resolvedIpAddress);
+ ipAddress = resolvedIpAddress;
+ nodeInfo.put("original-address", nodeInfo.get("address"));
+ nodeInfo.put("address", ipAddress);
+ } catch (UnknownHostException e) {
+ log.error("NodeRegistry.addNode(): EXCEPTION while resolving IP address from provided hostname/address: {}\n", ipAddress, e);
+ throw e;
+ }
+ // Check if an entry with the same IP address is already registered
NodeRegistryEntry entry = registry.get(ipAddress);
if (entry!=null) {
log.debug("NodeRegistry.addNode(): Node already pre-registered: ip-address={}\nOld Node Info: {}\nNew Node Info: {}",
@@ -46,9 +66,12 @@ public class NodeRegistry {
}
}
- entry = new NodeRegistryEntry(ipAddress).nodePreregistration(nodeInfo);
+ // Create and register node registry entry
+ entry = new NodeRegistryEntry(ipAddress, clientId, coordinator.getServer()).nodePreregistration(nodeInfo);
+ nodeInfo.put("baguette-client-id", clientId);
registry.put(ipAddress, entry);
log.debug("NodeRegistry.addNode(): Added info for node at address: {}\nNode info: {}", ipAddress, nodeInfo);
+ return entry;
}
public synchronized void removeNode(NodeRegistryEntry nodeEntry) {
@@ -85,6 +108,18 @@ public class NodeRegistry {
return entry;
}
+ public NodeRegistryEntry getNodeByReference(String ref) {
+ return registry.values().stream()
+ .filter(n->n.getReference().equals(ref))
+ .findAny().orElse(null);
+ }
+
+ public NodeRegistryEntry getNodeByClientId(String clientId) {
+ return registry.values().stream()
+ .filter(n->n.getClientId().equals(clientId))
+ .findAny().orElse(null);
+ }
+
public Collection getNodeAddresses() {
return registry.keySet();
}
@@ -92,4 +127,6 @@ public class NodeRegistry {
public Collection getNodes() {
return registry.values();
}
+
+ public Collection getNodeReferences() { return registry.values().stream().map(NodeRegistryEntry::getReference).collect(Collectors.toList()); }
}
diff --git a/event-management/baguette-server/src/main/java/eu/melodic/event/baguette/server/NodeRegistryEntry.java b/event-management/baguette-server/src/main/java/eu/melodic/event/baguette/server/NodeRegistryEntry.java
index 2b649212b66ae6796070ba52d18dfd5e606f3f41..b9d572d03bf9734d58345737c30d57ed88872744 100644
--- a/event-management/baguette-server/src/main/java/eu/melodic/event/baguette/server/NodeRegistryEntry.java
+++ b/event-management/baguette-server/src/main/java/eu/melodic/event/baguette/server/NodeRegistryEntry.java
@@ -9,44 +9,108 @@
package eu.melodic.event.baguette.server;
-import lombok.AllArgsConstructor;
-import lombok.Data;
-import lombok.Getter;
-import lombok.RequiredArgsConstructor;
+import com.fasterxml.jackson.annotation.JsonIgnore;
+import eu.melodic.event.baguette.server.coordinator.cluster.IClusterZone;
+import lombok.*;
+import org.apache.commons.lang3.StringUtils;
+import java.util.Date;
import java.util.LinkedHashMap;
import java.util.Map;
+import java.util.UUID;
@Data
@RequiredArgsConstructor
@AllArgsConstructor
public class NodeRegistryEntry {
- public enum STATE { PREREGISTERED, INSTALLED, REGISTERED };
- private final String ipAddress;
+ public enum STATE { PREREGISTERED, IGNORE_NODE, INSTALLING, NOT_INSTALLED, INSTALLED, INSTALL_ERROR,
+ WAITING_REGISTRATION, REGISTERED, NOT_REGISTERED, REGISTRATION_ERROR, DISCONNECTED
+ };
+ @Getter private final String ipAddress;
+ @Getter private final String clientId;
+ @JsonIgnore
+ @Getter private final transient BaguetteServer baguetteServer;
+ @Getter private String hostname;
@Getter private STATE state = null;
- @Getter private Map preregistration = new LinkedHashMap<>();
- @Getter private Map installation = new LinkedHashMap<>();
- @Getter private Map registration = new LinkedHashMap<>();
+ @Getter private Date stateLastUpdate;
+ @Getter private String reference = UUID.randomUUID().toString();
+ @JsonIgnore
+ @Getter private transient Map preregistration = new LinkedHashMap<>();
+ @JsonIgnore
+ @Getter private transient Map installation = new LinkedHashMap<>();
+ @JsonIgnore
+ @Getter private transient Map registration = new LinkedHashMap<>();
+ @JsonIgnore
+ @Getter @Setter private transient IClusterZone clusterZone;
+
+ public String getNodeId() {
+ return getPreregistration().get("id");
+ }
+
+ public String getNodeAddress() {
+ return ipAddress!=null ? ipAddress : getPreregistration().get("address");
+ }
+
+ public String getNodeIdOrAddress() {
+ return StringUtils.isNotBlank(getNodeId()) ? getNodeId() : getNodeAddress();
+ }
+
+ public String getNodeIdAndAddress() {
+ return getNodeId()+" @ "+getNodeAddress();
+ }
+
+ private void setState(@NonNull STATE s) {
+ state = s;
+ stateLastUpdate = new Date();
+ }
+
+ public void refreshReference() { reference = UUID.randomUUID().toString(); }
public NodeRegistryEntry nodePreregistration(Map nodeInfo) {
preregistration.clear();
preregistration.putAll(processMap("", nodeInfo));
// preregistration.putAll((Map)processMap(nodeInfo));
- state = STATE.PREREGISTERED;
+ setState(STATE.PREREGISTERED);
return this;
}
- public NodeRegistryEntry nodeInstallation(Map nodeInfo) {
+ public NodeRegistryEntry nodeIgnore(Object nodeInfo) {
installation.clear();
- installation.putAll(processMap("", nodeInfo));
- state = STATE.INSTALLED;
+ installation.put("ignore-node", nodeInfo!=null ? nodeInfo.toString() : null);
+ setState(STATE.IGNORE_NODE);
+ return this;
+ }
+
+ public NodeRegistryEntry nodeInstalling(Object nodeInfo) {
+ installation.clear();
+ installation.put("installation-task", nodeInfo!=null ? nodeInfo.toString() : "INSTALLING");
+ setState(STATE.INSTALLING);
+ return this;
+ }
+
+ public NodeRegistryEntry nodeNotInstalled(Object nodeInfo) {
+ installation.clear();
+ installation.put("installation-task-result", nodeInfo!=null ? nodeInfo.toString() : "NOT_INSTALLED");
+ setState(STATE.NOT_INSTALLED);
+ return this;
+ }
+
+ public NodeRegistryEntry nodeInstallationComplete(Object nodeInfo) {
+ installation.put("installation-task-result", nodeInfo!=null ? nodeInfo.toString() : "SUCCESS");
+ setState(STATE.INSTALLED);
+ return this;
+ }
+
+ public NodeRegistryEntry nodeInstallationError(Object nodeInfo) {
+ installation.put("installation-task-result", nodeInfo!=null ? nodeInfo.toString() : "ERROR");
+ setState(STATE.INSTALL_ERROR);
return this;
}
public NodeRegistryEntry nodeRegistration(Map nodeInfo) {
registration.clear();
registration.putAll(processMap("", nodeInfo));
- state = STATE.REGISTERED;
+ setState(STATE.REGISTERED);
return this;
}
diff --git a/event-management/baguette-server/src/main/java/eu/melodic/event/baguette/server/ServerCoordinator.java b/event-management/baguette-server/src/main/java/eu/melodic/event/baguette/server/ServerCoordinator.java
index c017bb71cc83a44039432ab78754b6ea6001d148..37dc67a508ef658dbb90671c4bb051af0085fd43 100644
--- a/event-management/baguette-server/src/main/java/eu/melodic/event/baguette/server/ServerCoordinator.java
+++ b/event-management/baguette-server/src/main/java/eu/melodic/event/baguette/server/ServerCoordinator.java
@@ -19,6 +19,8 @@ import static eu.melodic.event.util.GroupingConfiguration.BrokerConnectionConfig
public interface ServerCoordinator {
default boolean isSupported(TranslationContext tc) { return true; }
+ default boolean supportsAggregators() { return false; }
+
void initialize(TranslationContext tc, String upperwareGrouping, BaguetteServer server, Runnable callback);
default void setProperties(Map p) { }
@@ -35,6 +37,8 @@ public interface ServerCoordinator {
default boolean allowNotPreregisteredNode(ClientShellCommand csc) { return true; }
+ default void preregister(NodeRegistryEntry entry) { }
+
void register(ClientShellCommand c);
void unregister(ClientShellCommand c);
diff --git a/event-management/baguette-server/src/main/java/eu/melodic/event/baguette/server/Sshd.java b/event-management/baguette-server/src/main/java/eu/melodic/event/baguette/server/Sshd.java
index 2b9ef45b6ee96ea7f005b4fd7882baad6e615c63..30bd4c8fc40490e894e312d636f9a7b75622128b 100644
--- a/event-management/baguette-server/src/main/java/eu/melodic/event/baguette/server/Sshd.java
+++ b/event-management/baguette-server/src/main/java/eu/melodic/event/baguette/server/Sshd.java
@@ -9,7 +9,11 @@
package eu.melodic.event.baguette.server;
+import eu.melodic.event.baguette.server.coordinator.cluster.ClusteringCoordinator;
import eu.melodic.event.baguette.server.properties.BaguetteServerProperties;
+import eu.melodic.event.util.EventBus;
+import lombok.Getter;
+import lombok.Setter;
import lombok.extern.slf4j.Slf4j;
import org.apache.sshd.common.Factory;
import org.apache.sshd.common.PropertyResolverUtils;
@@ -21,6 +25,7 @@ import org.apache.sshd.server.SshServer;
import org.apache.sshd.server.auth.password.PasswordAuthenticator;
import org.apache.sshd.server.keyprovider.SimpleGeneratorHostKeyProvider;
import org.apache.sshd.server.session.ServerSession;
+import org.slf4j.event.Level;
import java.io.File;
import java.io.IOException;
@@ -34,7 +39,7 @@ import java.util.stream.Collectors;
*/
@Slf4j
public class Sshd {
- private ServerCoordinator coordinator;
+ @Getter private ServerCoordinator coordinator;
private BaguetteServerProperties configuration;
private SshServer sshd;
private String serverPubkey;
@@ -43,10 +48,16 @@ public class Sshd {
private boolean heartbeatOn;
private long heartbeatPeriod;
- public void start(BaguetteServerProperties configuration, ServerCoordinator coordinator) throws IOException {
+ private EventBus eventBus;
+ @Getter @Setter
+ private NodeRegistry nodeRegistry;
+
+ public void start(BaguetteServerProperties configuration, ServerCoordinator coordinator, EventBus eventBus, NodeRegistry registry) throws IOException {
log.info("** SSH server **");
this.coordinator = coordinator;
this.configuration = configuration;
+ this.eventBus = eventBus;
+ this.nodeRegistry = registry;
// Configure SSH server
int port = configuration.getServerPort();
@@ -64,12 +75,13 @@ public class Sshd {
sshd.setShellFactory(
new Factory() {
private ServerCoordinator coordinator;
+ private NodeRegistry nodeRegistry;
public Command create() {
- ClientShellCommand msc = new ClientShellCommand(this.coordinator, configuration.isClientAddressOverrideAllowed());
- //msc.setId( "#-"+System.currentTimeMillis() );
- log.debug("SSH server: Shell Factory: create invoked : New ClientShellCommand id: {}", msc.getId());
- return msc;
+ ClientShellCommand csc = new ClientShellCommand(this.coordinator, configuration.isClientAddressOverrideAllowed(), eventBus, nodeRegistry);
+ //csc.setId( "#-"+System.currentTimeMillis() );
+ log.debug("SSH server: Shell Factory: create invoked : New ClientShellCommand id: {}", csc.getId());
+ return csc;
}
public Command get() {
@@ -77,12 +89,13 @@ public class Sshd {
return null;
}
- public Factory setCoordinator(ServerCoordinator coordinator) {
+ public Factory setCoordinatorAndNodeRegistry(ServerCoordinator coordinator, NodeRegistry nodeRegistry) {
this.coordinator = coordinator;
+ this.nodeRegistry = nodeRegistry;
return this;
}
}
- .setCoordinator(coordinator)
+ .setCoordinatorAndNodeRegistry(coordinator, nodeRegistry)
);
sshd.setPasswordAuthenticator(
@@ -157,7 +170,7 @@ public class Sshd {
String msg = String.format("Heartbeat %d", System.currentTimeMillis());
log.debug("--> Heartbeat: {}", msg);
for (ClientShellCommand csc : ClientShellCommand.getActive()) {
- csc.sendToClient(msg);
+ csc.sendToClient(msg, Level.DEBUG);
}
}
log.info("--> Heartbeat: Stopped");
@@ -201,13 +214,29 @@ public class Sshd {
}
}
- public Object readFromClient(String clientId, String command) {
+ public void sendToActiveClusters(String command) {
+ if (!(coordinator instanceof ClusteringCoordinator)) return;
+ ((ClusteringCoordinator)coordinator).getClusters().forEach(cluster -> {
+ log.info("SSH server: Sending to cluster {} : {}", cluster.getId(), command);
+ sendToCluster(cluster.getId(), command);
+ });
+ }
+
+ public void sendToCluster(String clusterId, String command) {
+ if (!(coordinator instanceof ClusteringCoordinator)) return;
+ ((ClusteringCoordinator)coordinator).getCluster(clusterId).getNodes().forEach(csc -> {
+ log.info("SSH server: Sending to client {} : {}", csc.getId(), command);
+ csc.sendToClient(command);
+ });
+ }
+
+ public Object readFromClient(String clientId, String command, Level logLevel) {
log.trace("SSH server: Sending and Reading to/from client {}: {}", clientId, command);
for (ClientShellCommand csc : ClientShellCommand.getActive()) {
log.trace("SSH server: Check CSC: csc-id={}, client={}", csc.getId(), clientId);
if (csc.getId().equals(clientId)) {
- log.info("SSH server: Sending and Reading to/from client {} : {}", csc.getId(), command);
- return csc.readFromClient(command);
+ log.debug("SSH server: Sending and Reading to/from client {} : {}", csc.getId(), command);
+ return csc.readFromClient(command, logLevel);
}
}
return null;
@@ -257,8 +286,8 @@ public class Sshd {
String serverKeyFilePath = configuration.getServerKeyFile();
log.debug("_loadPubkeyAndFingerprint(): Server Key file: {}", serverKeyFilePath);
File serverKeyFile = new File(serverKeyFilePath);
- SimpleGeneratorHostKeyProvider z = new SimpleGeneratorHostKeyProvider(serverKeyFile);
- z.loadKeys().forEach(kp -> {
+ SimpleGeneratorHostKeyProvider simpleGeneratorHostKeyProvider = new SimpleGeneratorHostKeyProvider(serverKeyFile);
+ simpleGeneratorHostKeyProvider.loadKeys().forEach(kp -> {
log.debug("_loadPubkeyAndFingerprint(): KeyPair found: {}", kp.toString());
PublicKey serverKey = kp.getPublic();
log.debug("_loadPubkeyAndFingerprint(): Pubkey: {}", kp.toString());
@@ -274,7 +303,7 @@ public class Sshd {
log.debug("_loadPubkeyAndFingerprint(): Fingerprint: {}", serverPubkeyFingerprint);
} catch (Exception ex) {
- log.error("_loadPubkeyAndFingerprint(): EXCEPTION: {}", ex);
+ log.error("_loadPubkeyAndFingerprint(): EXCEPTION: ", ex);
}
});
}
diff --git a/event-management/baguette-server/src/main/java/eu/melodic/event/baguette/server/coordinator/NoopCoordinator.java b/event-management/baguette-server/src/main/java/eu/melodic/event/baguette/server/coordinator/NoopCoordinator.java
index 0dff0791d31e4c950d4a6377c142c45a2454d3d9..5bdb0466c06329f68835eca7ed329facd1b18872 100644
--- a/event-management/baguette-server/src/main/java/eu/melodic/event/baguette/server/coordinator/NoopCoordinator.java
+++ b/event-management/baguette-server/src/main/java/eu/melodic/event/baguette/server/coordinator/NoopCoordinator.java
@@ -11,6 +11,7 @@ package eu.melodic.event.baguette.server.coordinator;
import eu.melodic.event.baguette.server.BaguetteServer;
import eu.melodic.event.baguette.server.ClientShellCommand;
+import eu.melodic.event.baguette.server.NodeRegistryEntry;
import eu.melodic.event.baguette.server.ServerCoordinator;
import eu.melodic.event.baguette.server.properties.BaguetteServerProperties;
import eu.melodic.event.translate.TranslationContext;
@@ -62,6 +63,11 @@ public class NoopCoordinator implements ServerCoordinator {
return -1;
}
+ @Override
+ public synchronized void preregister(NodeRegistryEntry entry) {
+ _logInvocation("preregister", entry, true);
+ }
+
@Override
public synchronized void register(ClientShellCommand c) {
_logInvocation("register", c, true);
@@ -77,16 +83,21 @@ public class NoopCoordinator implements ServerCoordinator {
_logInvocation("clientReady", c, true);
}
- protected boolean _logInvocation(String methodName, ClientShellCommand c, boolean checkStarted) {
+ protected boolean _logInvocation(String methodName, Object o, boolean checkStarted) {
String className = getClass().getSimpleName();
- String cscStr = (c!=null) ? String.format(". CSC: %s", c.toString()) : "";
+ String str = (o==null) ? "" : (
+ o instanceof ClientShellCommand ? String.format(". CSC: %s", o) : (
+ o instanceof NodeRegistryEntry ? String.format(". NRE: %s", o) :
+ String.format(". Object: %s", o)
+ )
+ );
if (checkStarted && !started) {
- log.warn("{}: {}(): Coordinator has not been started{}", className, methodName, cscStr);
+ log.warn("{}: {}(): Coordinator has not been started{}", className, methodName, str);
} else
if (!checkStarted && started) {
- log.warn("{}: {}(): Coordinator is already running{}", className, methodName, cscStr);
+ log.warn("{}: {}(): Coordinator is already running{}", className, methodName, str);
} else {
- log.info("{}: {}(): Method invoked{}", className, methodName, cscStr);
+ log.info("{}: {}(): Method invoked{}", className, methodName, str);
}
return started;
}
diff --git a/event-management/baguette-server/src/main/java/eu/melodic/event/baguette/server/coordinator/cluster/AtLeastTwoZoneManagementStrategy.java b/event-management/baguette-server/src/main/java/eu/melodic/event/baguette/server/coordinator/cluster/AtLeastTwoZoneManagementStrategy.java
index 91e820b379dafca7dfedd1ed4012d10d4d12d250..1406f0e82a5c14269e7924fe33c9bceb74706514 100644
--- a/event-management/baguette-server/src/main/java/eu/melodic/event/baguette/server/coordinator/cluster/AtLeastTwoZoneManagementStrategy.java
+++ b/event-management/baguette-server/src/main/java/eu/melodic/event/baguette/server/coordinator/cluster/AtLeastTwoZoneManagementStrategy.java
@@ -10,10 +10,13 @@
package eu.melodic.event.baguette.server.coordinator.cluster;
import eu.melodic.event.baguette.server.ClientShellCommand;
+import eu.melodic.event.baguette.server.NodeRegistryEntry;
+import lombok.NonNull;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.glassfish.jersey.internal.guava.InetAddresses;
+import java.util.Map;
import java.util.UUID;
/**
@@ -37,30 +40,7 @@ public class AtLeastTwoZoneManagementStrategy implements IZoneManagementStrategy
}
@Override
- public String getZoneIdFor(ClientShellCommand c) {
- String nodeAddress = c.getClientIpAddress();
- String hostname = c.getClientHostname();
- log.debug("getZoneIdFor: {}: address: {}", c.getId(), nodeAddress);
- log.debug("getZoneIdFor: {}: hostname: {}", c.getId(), hostname);
- String zoneName = null;
- if (StringUtils.isNotBlank(hostname) && !InetAddresses.isUriInetAddress(hostname)) {
- int p = hostname.indexOf(".");
- if (p>0)
- zoneName = hostname.substring(p+1);
- }
- if (StringUtils.isBlank(zoneName) && StringUtils.isNotBlank(nodeAddress)) {
- int p = nodeAddress.lastIndexOf(".");
- if (p<0) p = nodeAddress.lastIndexOf(":");
- if (p>0)
- zoneName = nodeAddress.substring(0, p);
- }
- return StringUtils.isBlank(zoneName)
- ? UUID.randomUUID().toString()
- : zoneName.replaceAll("[^A-Za-z0-9_]","_");
- }
-
- @Override
- public synchronized void nodeAdded(ClientShellCommand csc, ClusteringCoordinator coordinator, ClusterZone zone) {
+ public synchronized void nodeAdded(ClientShellCommand csc, ClusteringCoordinator coordinator, IClusterZone zone) {
if (zone.getNodes().size() < 2)
return;
@@ -83,18 +63,18 @@ public class AtLeastTwoZoneManagementStrategy implements IZoneManagementStrategy
}
}
- private void joinToCluster(ClientShellCommand csc, ClusteringCoordinator coordinator, ClusterZone zone) {
+ private void joinToCluster(ClientShellCommand csc, ClusteringCoordinator coordinator, IClusterZone zone) {
coordinator.sendClusterKey(csc, zone);
coordinator.instructClusterJoin(csc, zone, false);
coordinator.sleep(1000);
csc.sendCommand("CLUSTER-EXEC broker list");
- coordinator.sleep(1000);
- csc.sendCommand("CLUSTER-TEST");
+ //coordinator.sleep(1000);
+ //csc.sendCommand("CLUSTER-TEST");
}
@Override
- public synchronized void nodeRemoved(ClientShellCommand csc, ClusteringCoordinator coordinator, ClusterZone zone) {
+ public synchronized void nodeRemoved(ClientShellCommand csc, ClusteringCoordinator coordinator, IClusterZone zone) {
// Instruct node to leave cluster
log.info("AtLeastTwoZoneManagementStrategy: Node to leave cluster: client={}, zone={}", csc.getId(), zone.getId());
coordinator.instructClusterLeave(csc, zone);
diff --git a/event-management/baguette-server/src/main/java/eu/melodic/event/baguette/server/coordinator/cluster/ClusterZone.java b/event-management/baguette-server/src/main/java/eu/melodic/event/baguette/server/coordinator/cluster/ClusterZone.java
index 455450fbeb3e72238e4e6eec694688db129ffa93..2db0c3e273672fe9bdd0e26d46f392173722d07b 100644
--- a/event-management/baguette-server/src/main/java/eu/melodic/event/baguette/server/coordinator/cluster/ClusterZone.java
+++ b/event-management/baguette-server/src/main/java/eu/melodic/event/baguette/server/coordinator/cluster/ClusterZone.java
@@ -10,6 +10,8 @@
package eu.melodic.event.baguette.server.coordinator.cluster;
import eu.melodic.event.baguette.server.ClientShellCommand;
+import eu.melodic.event.baguette.server.NodeRegistryEntry;
+import eu.melodic.event.util.ClientConfiguration;
import eu.melodic.event.util.KeystoreUtil;
import lombok.*;
import lombok.extern.slf4j.Slf4j;
@@ -23,7 +25,7 @@ import java.util.concurrent.atomic.AtomicInteger;
@Slf4j
@Data
-public class ClusterZone {
+public class ClusterZone implements IClusterZone {
private final String id;
private final int startPort;
private final int endPort;
@@ -34,6 +36,8 @@ public class ClusterZone {
private final Map nodes = new LinkedHashMap<>();
@Getter(AccessLevel.NONE)
private final Map addressPortCache = new HashMap<>();
+ @Getter(AccessLevel.NONE)
+ private final Map nodesWithoutClient = new LinkedHashMap<>();
private final String clusterId;
private final String clusterKeystoreBase64;
@@ -44,7 +48,7 @@ public class ClusterZone {
private ClientShellCommand aggregator;
@SneakyThrows
- public ClusterZone(@NotBlank String id, int startPort, int endPort) {
+ public ClusterZone(@NotBlank String id, int startPort, int endPort, String keystoreFileName) {
checkArgs(id, startPort, endPort);
this.id = id;
this.startPort = startPort;
@@ -52,8 +56,7 @@ public class ClusterZone {
currentPort.set(startPort);
this.clusterId = RandomStringUtils.randomAlphanumeric(64);
- String fileName = String.format("logs/cluster_%d_%s.p12", System.currentTimeMillis(), id);
- this.clusterKeystoreFile = new File(fileName);
+ this.clusterKeystoreFile = new File(keystoreFileName);
this.clusterKeystoreType = "JKS";
this.clusterKeystorePassword = RandomStringUtils.randomAlphanumeric(64);
log.info("New ClusterZone: zone: {}", id);
@@ -66,7 +69,8 @@ public class ClusterZone {
.createIfNotExist()
.createKeyAndCert(clusterId, "CN=" + clusterId, "")
.readFileAsBase64();
- log.debug(" Base64 content: {}", clusterKeystoreBase64);
+ log.debug(" Base64 content: {}",
+ StringUtils.isNotBlank(clusterKeystoreBase64) ? "Not empty" : "!!! Empty !!!");
}
private void checkArgs(String id, int startPort, int endPort) {
@@ -92,10 +96,12 @@ public class ClusterZone {
addressPortCache.clear();
}
+ // Nodes management
public void addNode(@NonNull ClientShellCommand csc) {
synchronized (Objects.requireNonNull(csc)) {
nodes.put(csc.getClientIpAddress(), csc);
csc.setClientZone(this);
+ csc.getNodeRegistryEntry().setClusterZone(this);
}
}
@@ -104,9 +110,15 @@ public class ClusterZone {
nodes.remove(csc.getClientIpAddress());
if (csc.getClientZone()==this)
csc.setClientZone(null);
+ if (csc.getNodeRegistryEntry()!=null && csc.getNodeRegistryEntry().getClusterZone()==this)
+ csc.getNodeRegistryEntry().setClusterZone(null);
}
}
+ public Set getNodeAddresses() {
+ return new HashSet<>(nodes.keySet());
+ }
+
public List getNodes() {
return new ArrayList<>(nodes.values());
}
@@ -114,4 +126,54 @@ public class ClusterZone {
public ClientShellCommand getNodeByAddress(String address) {
return nodes.get(address);
}
+
+ // Nodes-without-Clients management
+ public void addNodeWithoutClient(@NonNull NodeRegistryEntry entry) {
+ synchronized (Objects.requireNonNull(entry)) {
+ String address = entry.getIpAddress();
+ if (address == null) address = entry.getNodeAddress();
+ if (address == null) throw new IllegalArgumentException("Node address not found in Preregistration info");
+ nodesWithoutClient.put(address, entry);
+ entry.setClusterZone(this);
+ sendClientConfigurationToZoneClients();
+ }
+ }
+
+ public void removeNodeWithoutClient(@NonNull NodeRegistryEntry entry) {
+ synchronized (Objects.requireNonNull(entry)) {
+ String address = entry.getIpAddress();
+ if (address == null) address = entry.getNodeAddress();
+ if (address == null) throw new IllegalArgumentException("Node address not found in Preregistration info");
+ nodesWithoutClient.remove(address);
+ if (entry.getClusterZone() == this)
+ entry.setClusterZone(null);
+ sendClientConfigurationToZoneClients();
+ }
+ }
+
+ public Set getNodeWithoutClientAddresses() {
+ return new HashSet<>(nodesWithoutClient.keySet());
+ }
+
+ public List getNodesWithoutClient() {
+ return new ArrayList<>(nodesWithoutClient.values());
+ }
+
+ public NodeRegistryEntry getNodeWithoutClientByAddress(String address) {
+ return nodesWithoutClient.get(address);
+ }
+
+ public ClientConfiguration getClientConfiguration() {
+ return ClientConfiguration.builder()
+ .nodesWithoutClient(new HashSet<>(nodesWithoutClient.keySet()))
+ .build();
+ }
+
+ public ClientConfiguration sendClientConfigurationToZoneClients() {
+ ClientConfiguration cc = ClientConfiguration.builder()
+ .nodesWithoutClient(new HashSet<>(nodesWithoutClient.keySet()))
+ .build();
+ ClientShellCommand.sendClientConfigurationToClients(cc , getNodes());
+ return cc;
+ }
}
diff --git a/event-management/baguette-server/src/main/java/eu/melodic/event/baguette/server/coordinator/cluster/ClusterZoneDetector.java b/event-management/baguette-server/src/main/java/eu/melodic/event/baguette/server/coordinator/cluster/ClusterZoneDetector.java
new file mode 100644
index 0000000000000000000000000000000000000000..bb0388745709746f39f273e2764cf15da4341ea4
--- /dev/null
+++ b/event-management/baguette-server/src/main/java/eu/melodic/event/baguette/server/coordinator/cluster/ClusterZoneDetector.java
@@ -0,0 +1,166 @@
+/*
+ * Copyright (C) 2017-2022 Institute of Communication and Computer Systems (imu.iccs.gr)
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public License, v2.0, unless
+ * Esper library is used, in which case it is subject to the terms of General Public License v2.0.
+ * If a copy of the MPL was not distributed with this file, you can obtain one at
+ * https://www.mozilla.org/en-US/MPL/2.0/
+ */
+
+package eu.melodic.event.baguette.server.coordinator.cluster;
+
+import eu.melodic.event.baguette.server.ClientShellCommand;
+import eu.melodic.event.baguette.server.NodeRegistryEntry;
+import lombok.extern.slf4j.Slf4j;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.commons.text.StringSubstitutor;
+import org.springframework.context.expression.MapAccessor;
+import org.springframework.expression.spel.standard.SpelExpressionParser;
+import org.springframework.expression.spel.support.StandardEvaluationContext;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+
+/**
+ * Detects the Cluster/Zone the given node must be added,
+ * using node's pre-registration info and a set of configured rules
+ */
+@Slf4j
+public class ClusterZoneDetector implements IClusterZoneDetector {
+ private final static List DEFAULT_ZONE_DETECTION_RULES = Arrays.asList(
+ "'${zone:-}'",
+ "'${zone-id:-}'",
+ "'${region:-}'",
+ "'${region-id:-}'",
+ "'${cloud:-}'",
+ "'${cloud-id:-}'",
+ "'${provider:-}'",
+ "'${provider-id:-}'",
+ "T(java.time.OffsetDateTime).now().toString()",
+// "'Cluster_'+T(java.lang.System).currentTimeMillis()",
+// "'Cluster_'+T(java.util.UUID).randomUUID()",
+ ""
+ );
+ private final static RULE_TYPE DEFAULT_RULES_TYPE = RULE_TYPE.SPEL;
+ private final static List DEFAULT_ZONES = Collections.singletonList("DEFAULT_CLUSTER");
+ private final static ASSIGNMENT_TO_DEFAULT_CLUSTERS DEFAULT_ASSIGNMENT_TO_DEFAULT_CLUSTERS = ASSIGNMENT_TO_DEFAULT_CLUSTERS.RANDOM;
+
+ enum RULE_TYPE { SPEL, MAP }
+ enum ASSIGNMENT_TO_DEFAULT_CLUSTERS { RANDOM, SEQUENTIAL }
+
+ private RULE_TYPE clusterDetectionRulesType = DEFAULT_RULES_TYPE;
+ private List clusterDetectionRules = DEFAULT_ZONE_DETECTION_RULES;
+ private List defaultClusters = DEFAULT_ZONES;
+ private ASSIGNMENT_TO_DEFAULT_CLUSTERS assignmentToDefaultClusters = DEFAULT_ASSIGNMENT_TO_DEFAULT_CLUSTERS;
+
+ private SpelExpressionParser parser = new SpelExpressionParser();
+ private AtomicInteger currentDefaultCluster = new AtomicInteger(0);
+
+ @Override
+ public void setProperties(Map zoneConfig) {
+ log.debug("ClusterZoneDetector: setProperties: BEGIN: config: {}", zoneConfig);
+
+ // Get rules type (Map keys or SpEL expressions)
+ RULE_TYPE rulesType = RULE_TYPE.valueOf(
+ zoneConfig.getOrDefault("cluster-detector-rules-type", DEFAULT_RULES_TYPE.toString()).toUpperCase());
+
+ // Get rules texts and separator
+ String separator = zoneConfig.getOrDefault("cluster-detector-rules-separator", ",");
+ String rulesStr = zoneConfig.getOrDefault("cluster-detector-rules", null);
+ if (StringUtils.isNotBlank(rulesStr)) {
+ List rulesList = Arrays.stream(rulesStr.split(separator))
+ .filter(StringUtils::isNotBlank)
+ .map(String::trim)
+ .map(String::trim)
+ .collect(Collectors.toList());
+ clusterDetectionRules = (rulesList.size()>0) ? rulesList : DEFAULT_ZONE_DETECTION_RULES;
+ clusterDetectionRulesType = (rulesList.size()>0) ? rulesType : DEFAULT_RULES_TYPE;
+ }
+
+ // Get the default cluster(s)
+ List defaultsList = Arrays.stream(zoneConfig.getOrDefault("default-clusters", "").split(","))
+ .filter(StringUtils::isNotBlank)
+ .map(String::trim)
+ .collect(Collectors.toList());
+ defaultClusters = (defaultsList.size()>0) ? defaultsList : DEFAULT_ZONES;
+
+ // Get assignment method to default clusters
+ assignmentToDefaultClusters = ASSIGNMENT_TO_DEFAULT_CLUSTERS.valueOf(
+ zoneConfig.getOrDefault("assignment-to-default-clusters", DEFAULT_ASSIGNMENT_TO_DEFAULT_CLUSTERS.toString().toUpperCase()));
+
+ log.debug("ClusterZoneDetector: setProperties: clusterDetectionRulesType: {}", clusterDetectionRulesType);
+ log.debug("ClusterZoneDetector: setProperties: clusterDetectionRules: {}", clusterDetectionRules);
+ log.debug("ClusterZoneDetector: setProperties: defaultClusters: {}", defaultClusters);
+ log.debug("ClusterZoneDetector: setProperties: assignmentToDefaultClusters: {}", assignmentToDefaultClusters);
+ }
+
+ @Override
+ public String getZoneIdFor(ClientShellCommand csc) {
+ log.trace("ClusterZoneDetector: getZoneIdFor: BEGIN: CSC: {}", csc);
+ return csc.getClientZone()==null || StringUtils.isBlank(csc.getClientZone().getId())
+ ? getZoneIdFor(csc.getNodeRegistryEntry())
+ : csc.getClientZone().getId();
+ }
+
+ @Override
+ public String getZoneIdFor(NodeRegistryEntry entry) {
+ log.trace("ClusterZoneDetector: getZoneIdFor: BEGIN: NRE: {}", entry);
+ final Map info = entry.getPreregistration();
+
+ // Select and initialize the right valueMapper based on rules type
+ log.trace("ClusterZoneDetector: getZoneIdFor: PREREGISTRATION-INFO: {}", info);
+ Function valueMapper;
+ switch (clusterDetectionRulesType) {
+ case SPEL:
+ StandardEvaluationContext context = new StandardEvaluationContext(info);
+ context.addPropertyAccessor(new MapAccessor());
+ valueMapper = expression -> {
+ log.trace("ClusterZoneDetector: getZoneIdFor: Expression: {}", expression);
+ expression = StringSubstitutor.replace(expression, info);
+ expression = StringSubstitutor.replaceSystemProperties(expression);
+ log.trace("ClusterZoneDetector: getZoneIdFor: SpEL expr.: {}", expression);
+ String result = parser.parseRaw(expression).getValue(context, String.class);
+ log.trace("ClusterZoneDetector: getZoneIdFor: Result: {}", result);
+ return StringUtils.isBlank(result) ? null : result.trim();
+ };
+ break;
+ case MAP:
+ valueMapper = info::get;
+ break;
+ default:
+ throw new IllegalArgumentException("Unsupported RULE_TYPE: "+ clusterDetectionRulesType);
+ }
+
+ // Process rules one-by-one, using valueMapper, until one rule yields a non-blank value
+ String zoneId = clusterDetectionRules.stream()
+ .filter(StringUtils::isNotBlank)
+ .peek(s -> log.trace("ClusterZoneDetector: getZoneIdFor: RULE: {}", s))
+ .map(valueMapper)
+ .peek(s -> log.trace("ClusterZoneDetector: getZoneIdFor: RESULT: {}", s))
+ .filter(StringUtils::isNotBlank)
+ .findFirst()
+ .orElse(null);
+ log.debug("ClusterZoneDetector: getZoneIdFor: Intermediate: zoneId: {}", zoneId);
+
+ // If all rules yielded blank values then a default cluster id will be selected, using the assignment method
+ if (StringUtils.isBlank(zoneId)) {
+ switch (assignmentToDefaultClusters) {
+ case RANDOM:
+ zoneId = defaultClusters.get((int) (Math.random() * defaultClusters.size()));
+ break;
+ case SEQUENTIAL:
+ zoneId = defaultClusters.get(currentDefaultCluster.getAndUpdate(operand -> (operand + 1) % defaultClusters.size()));
+ break;
+ default:
+ throw new IllegalArgumentException("Unsupported ASSIGNMENT_TO_DEFAULT_CLUSTERS: "+assignmentToDefaultClusters);
+ }
+ }
+ log.debug("ClusterZoneDetector: getZoneIdFor: END: zoneId: {}", zoneId);
+ return zoneId;
+ }
+}
diff --git a/event-management/baguette-server/src/main/java/eu/melodic/event/baguette/server/coordinator/cluster/ClusteringCoordinator.java b/event-management/baguette-server/src/main/java/eu/melodic/event/baguette/server/coordinator/cluster/ClusteringCoordinator.java
index c449859fba1dfe6ade407b2ceda28f8a1440d016..ef8bb32bb3b8d0225d483dbc76e275c38a562ee8 100644
--- a/event-management/baguette-server/src/main/java/eu/melodic/event/baguette/server/coordinator/cluster/ClusteringCoordinator.java
+++ b/event-management/baguette-server/src/main/java/eu/melodic/event/baguette/server/coordinator/cluster/ClusteringCoordinator.java
@@ -14,10 +14,13 @@ import eu.melodic.event.baguette.server.ClientShellCommand;
import eu.melodic.event.baguette.server.NodeRegistryEntry;
import eu.melodic.event.baguette.server.coordinator.NoopCoordinator;
import eu.melodic.event.translate.TranslationContext;
+import eu.melodic.event.util.ClientConfiguration;
import eu.melodic.event.util.GROUPING;
+import lombok.NonNull;
import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
+import org.apache.commons.text.StringSubstitutor;
import java.util.*;
import java.util.stream.Collectors;
@@ -30,22 +33,41 @@ public class ClusteringCoordinator extends NoopCoordinator {
private final Map topologyMap = new HashMap<>();
+ private IClusterZoneDetector clusterZoneDetector;
private IZoneManagementStrategy zoneManagementStrategy;
private int zoneStartPort = 1200;
private int zoneEndPort = 65535;
+ private String zoneKeystoreFileNameFormatter = "logs/cluster_${TIMESTAMP}_${ZONE_ID}.p12";
private GROUPING topLevelGrouping;
private GROUPING aggregatorGrouping;
private GROUPING lastLevelGrouping;
+ private final Map ignoredNodes = new LinkedHashMap<>();
+
+ public Collection getClusterIdSet() { return topologyMap.keySet(); }
+ public Collection getClusters() { return topologyMap.values().stream().map(c->(IClusterZone)c).collect(Collectors.toList()); }
+ public IClusterZone getCluster(String id) { return topologyMap.get(id); }
+
@Override
public boolean isSupported(final TranslationContext _TC) {
+ log.trace("ClusteringCoordinator.isSupported: TC: {}", _TC);
+
// Check if it is a 3-level architecture
Set groupings = _TC.getG2R().keySet();
+ log.trace("ClusteringCoordinator.isSupported: Groupings: {}", groupings);
+ log.trace("ClusteringCoordinator.isSupported: Contains GLOBAL: {}", groupings.contains("GLOBAL"));
+ log.trace("ClusteringCoordinator.isSupported: Num of Levels: {}", groupings.size());
+
if (!groupings.contains("GLOBAL")) return false;
return groupings.size()==3;
}
+ @Override
+ public boolean supportsAggregators() {
+ return true;
+ }
+
@Override
public void initialize(final TranslationContext TC, String upperwareGrouping, BaguetteServer server, Runnable callback) {
if (!isSupported(TC))
@@ -74,6 +96,22 @@ public class ClusteringCoordinator extends NoopCoordinator {
? Integer.parseInt(zoneConfig.get("zone-port-start")) : zoneStartPort;
zoneEndPort = zoneConfig.containsKey("zone-port-end")
? Integer.parseInt(zoneConfig.get("zone-port-end")) : zoneEndPort;
+ zoneKeystoreFileNameFormatter = zoneConfig.containsKey("zone-keystore-file-name-formatter")
+ ? zoneConfig.get("zone-keystore-file-name-formatter") : zoneKeystoreFileNameFormatter;
+
+ // Initialize Cluster Detector
+ String clusterDetectorClass = zoneConfig.get("cluster-detector-class");
+ if (StringUtils.isNotBlank(clusterDetectorClass)) {
+ Class> clazz = Class.forName(clusterDetectorClass);
+ if (clazz.isAssignableFrom(IClusterZoneDetector.class))
+ clusterZoneDetector = (IClusterZoneDetector) clazz.newInstance();
+ else
+ throw new IllegalArgumentException("Invalid Cluster Detector class. Not implementing IClusterZoneDetector interface: "+clazz.getName());
+ } else {
+ clusterZoneDetector = new ClusterZoneDetector();
+ }
+ clusterZoneDetector.setProperties(zoneConfig);
+ log.info("Cluster Detector class: {}", clusterZoneDetector.getClass().getName());
}
@Override
@@ -85,7 +123,10 @@ public class ClusteringCoordinator extends NoopCoordinator {
String clientId1 = csc.getId();
String clientId2 = csc.getClientId();
String clientId3 = args[2];
+ log.trace("processClientInput: csc.zone: {}", csc.getClientZone()!=null ? csc.getClientZone().getId() : null);
+ log.trace("processClientInput: topology-map: {}", topologyMap.keySet());
ClusterZone zone = findZone(csc);
+ log.trace("processClientInput: zone={}", zone);
zone.setAggregator(csc);
log.info("Updated aggregator of zone: {} -- New aggregator: {} @ {} ({})",
zone.getId(), clientId1, csc.getClientIpAddress(), clientId2);
@@ -94,7 +135,7 @@ public class ClusteringCoordinator extends NoopCoordinator {
}
private ClusterZone findZone(ClientShellCommand csc) {
- String zoneId = zoneManagementStrategy.getZoneIdFor(csc);
+ String zoneId = clusterZoneDetector.getZoneIdFor(csc);
return topologyMap.get(zoneId);
}
@@ -113,26 +154,84 @@ public class ClusteringCoordinator extends NoopCoordinator {
return zoneManagementStrategy.allowNotPreregisteredNode(csc);
}
+ @Override
+ public synchronized void preregister(@NonNull NodeRegistryEntry entry) {
+ log.debug("ClusteringCoordinator: preregister: BEGIN: NRE:\n{}", entry);
+
+ if (!_logInvocation("preregister", entry.getNodeIdAndAddress(), true)) return;
+
+ // Check if client has been preregistered (or connected without being expected)
+ /*if (zoneManagementStrategy.allowNotPreregisteredNode(entry)) {
+ log.warn("Non-Preregistered node will be preregistered: {} @ {}", entry.getClientId(), entry.getIpAddress());
+ zoneManagementStrategy.notPreregisteredNode(entry);
+ }*/
+
+ log.debug("ClusteringCoordinator: preregister: Checking node State: node={}, state={}", entry.getNodeIdAndAddress(), entry.getState());
+ if (entry.getState()==NodeRegistryEntry.STATE.IGNORE_NODE) {
+ // Add in ignored nodes list
+ log.info("ClusteringCoordinator: preregister: Ignoring node: node={}, state={}", entry.getNodeIdAndAddress(), entry.getState());
+ ignoredNodes.put(entry.getIpAddress(), entry);
+ } else
+ if (entry.getState()==NodeRegistryEntry.STATE.NOT_INSTALLED) {
+ // Append to Nodes without EMS client (e.g. Edge devices, resource-limited VM's)
+ log.debug("ClusteringCoordinator: preregister: Adding node without EMS client: node={}, state={}", entry.getNodeIdAndAddress(), entry.getState());
+
+ // Assign node-without-client in a zone
+ String zoneId = clusterZoneDetector.getZoneIdFor(entry);
+ log.debug("ClusteringCoordinator: preregister: New entry: node={}, zone-id={}", entry.getNodeIdAndAddress(), zoneId);
+ if (log.isTraceEnabled()) {
+ log.trace("preregister: topologyMap: BEFORE: keys={}", topologyMap.keySet());
+ log.trace("preregister: topologyMap: containsKey: key={}, result={}", zoneId, topologyMap.containsKey(zoneId));
+ }
+ ClusterZone zone = topologyMap.computeIfAbsent(zoneId, this::createClusterZone);
+ log.trace("ClusteringCoordinator: preregister: Zone members without client: BEFORE: {}", zone.getNodesWithoutClient());
+ zone.addNodeWithoutClient(entry);
+ log.trace("ClusteringCoordinator: preregister: Zone members without client: AFTER: {}", zone.getNodesWithoutClient());
+ } else
+ if (entry.getState()==NodeRegistryEntry.STATE.INSTALLED) {
+ // Append to normal Node with EMS client
+ log.debug("ClusteringCoordinator: preregister: Node with EMS client: node={}, state={}", entry.getNodeIdAndAddress(), entry.getState());
+ // No need to do something
+ } else {
+ // Other states are ignored
+ log.warn("ClusteringCoordinator: preregister: No preregistration due to node state: node={}, state={}", entry.getNodeIdAndAddress(), entry.getState());
+ }
+ }
+
+ private ClusterZone createClusterZone(@NonNull String id) {
+ Map values = new HashMap<>();
+ values.put("TIMESTAMP", ""+System.currentTimeMillis());
+ values.put("ZONE_ID", id.replaceAll("[^A-Za-z0-9_]", "_"));
+ String keystoreFile = StringSubstitutor.replace(zoneKeystoreFileNameFormatter, values);
+ return new ClusterZone(id, zoneStartPort, zoneEndPort, keystoreFile);
+ }
+
@Override
public synchronized void register(ClientShellCommand csc) {
if (!_logInvocation("register", csc, true)) return;
// Check if client has been preregistered (or connected without being expected)
NodeRegistryEntry preregEntry = server.getNodeRegistry().getNodeByAddress(csc.getClientIpAddress());
+ log.debug("Preregistered info for node: {} @ {}:\n{}", csc.getId(), csc.getClientIpAddress(), preregEntry);
if (preregEntry==null && zoneManagementStrategy.allowNotPreregisteredNode(csc)) {
log.warn("Non Preregistered node connected: {} @ {}", csc.getId(), csc.getClientIpAddress());
+ log.warn("Preregistered nodes: {}", server.getNodeRegistry().getNodes().stream()
+ .map(entry->entry.getState()+"/"+entry.getIpAddress()+"/"+entry.getNodeIdAndAddress()+"/"+entry.getClientId())
+ .collect(Collectors.toList()));
zoneManagementStrategy.notPreregisteredNode(csc);
} else if (preregEntry==null) {
log.warn("Non Preregistered node is refused connection: {} @ {}", csc.getId(), csc.getClientIpAddress());
csc.setCloseConnection(true);
return;
}
+ if (preregEntry!=null) csc.setNodeRegistryEntry(preregEntry);
// Check if client has already been registered (i.e. is still connected)
ClientShellCommand regEntry = topologyMap.values().stream()
.map(zone->zone.getNodeByAddress(csc.getClientIpAddress()))
.filter(Objects::nonNull)
.findAny().orElse(null);
+ log.debug("Registered CSC for node: {} @ {}:\n{}", csc.getId(), csc.getClientIpAddress(), regEntry);
if (regEntry!=null && allowAlreadyRegisteredNode(csc)) {
log.warn("Already Registered node connected: {} @ {}", csc.getId(), csc.getClientIpAddress());
zoneManagementStrategy.alreadyRegisteredNode(csc);
@@ -153,6 +252,12 @@ public class ClusteringCoordinator extends NoopCoordinator {
}
protected synchronized void _do_register(ClientShellCommand csc) {
+ // Add registered node in topology map
+ addNodeInTopology(csc);
+
+ // collect client configuration
+ ClientConfiguration clientConfig = csc.getClientZone().getClientConfiguration();
+
// prepare configuration
Map connCfgMap = new LinkedHashMap<>();
BrokerConnectionConfig groupingConn = getUpperwareBrokerConfig(server);
@@ -166,6 +271,13 @@ public class ClusteringCoordinator extends NoopCoordinator {
log.trace("ClusteringCoordinator: {} broker config.: {}", groupingName, groupingConn);
}
+ // send client configuration to client
+ log.info("ClusteringCoordinator: --------------------------------------------------");
+ log.info("ClusteringCoordinator: Sending client configuration to client {}...\n{}", csc.getId(), clientConfig);
+ csc.getClientZone().sendClientConfigurationToZoneClients();
+ log.info("ClusteringCoordinator: Sending client configuration to client {}... done", csc.getId());
+ sleep(500);
+
// send grouping configurations to client
log.info("ClusteringCoordinator: --------------------------------------------------");
log.info("ClusteringCoordinator: Sending grouping configurations to client {}...\n{}", csc.getId(), connCfgMap);
@@ -181,15 +293,15 @@ public class ClusteringCoordinator extends NoopCoordinator {
log.info("ClusteringCoordinator: --------------------------------------------------");
sleep(500);
- // Add registered node in topology map
- addNodeInTopology(csc);
+ // Registered node added in topology map - Notify ZoneManagementStrategy
+ addedNodeInTopology(csc);
}
private synchronized void addNodeInTopology(ClientShellCommand csc) {
// Assign client in a zone
- String zoneId = zoneManagementStrategy.getZoneIdFor(csc);
+ String zoneId = clusterZoneDetector.getZoneIdFor(csc);
log.debug("addNodeInTopology: New client: id={}, address={}, zone-id={}", csc.getId(), csc.getClientIpAddress(), zoneId);
- ClusterZone zone = topologyMap.computeIfAbsent(zoneId, id -> new ClusterZone(id, zoneStartPort, zoneEndPort));
+ ClusterZone zone = topologyMap.computeIfAbsent(zoneId, this::createClusterZone);
log.trace("addNodeInTopology: Zone members: BEFORE: {}", zone.getNodes());
zone.addNode(csc);
log.trace("addNodeInTopology: Zone members: AFTER: {}", zone.getNodes());
@@ -205,9 +317,11 @@ public class ClusteringCoordinator extends NoopCoordinator {
//csc.setClientClusterNodeHostname(nodeCanonical);
log.debug("addNodeInTopology: New client: Cluster node: address={}, hostname={} // {}, port={}",
nodeAddress, nodeHostname, nodeCanonical, nodePort);
+ }
+ private synchronized void addedNodeInTopology(ClientShellCommand csc) {
// Signal Zone Management Strategy for new client registration
- zoneManagementStrategy.nodeAdded(csc, this, zone);
+ zoneManagementStrategy.nodeAdded(csc, this, csc.getClientZone());
log.info("addNodeInTopology: Client added in topology: client={}, address={}", csc.getId(), csc.getClientIpAddress());
}
@@ -218,7 +332,7 @@ public class ClusteringCoordinator extends NoopCoordinator {
private synchronized void removeNodeFromTopology(ClientShellCommand csc) {
// Assign client in a zone
- String zoneId = zoneManagementStrategy.getZoneIdFor(csc);
+ String zoneId = clusterZoneDetector.getZoneIdFor(csc);
ClusterZone zone = topologyMap.get(zoneId);
if (zone == null) {
log.warn("removeNodeFromTopology: Not Registered client removed: client={}, address={}", csc.getId(), csc.getClientIpAddress());
@@ -235,7 +349,7 @@ public class ClusteringCoordinator extends NoopCoordinator {
// Methods to be used by Zone Management Strategies
// ------------------------------------------------------------------------
- void sendClusterKey(ClientShellCommand csc, ClusterZone zoneInfo) {
+ void sendClusterKey(ClientShellCommand csc, IClusterZone zoneInfo) {
csc.sendCommand(String.format("CLUSTER-KEY %s %s %s %s",
zoneInfo.getClusterKeystoreFile().getName(), zoneInfo.getClusterKeystoreType(),
zoneInfo.getClusterKeystorePassword(), zoneInfo.getClusterKeystoreBase64()));
@@ -247,7 +361,7 @@ public class ClusteringCoordinator extends NoopCoordinator {
zoneNodes.forEach(c -> c.sendCommand(command));
}
- void instructClusterJoin(ClientShellCommand csc, ClusterZone zone, boolean startElection) {
+ void instructClusterJoin(ClientShellCommand csc, IClusterZone zone, boolean startElection) {
List zoneNodes = zone.getNodes();
log.debug("instructClusterJoin: Zone members: {}", zoneNodes);
@@ -282,7 +396,7 @@ public class ClusteringCoordinator extends NoopCoordinator {
csc.sendCommand("CLUSTER-JOIN "+command);
}
- void instructClusterLeave(ClientShellCommand csc, ClusterZone zone) {
+ void instructClusterLeave(ClientShellCommand csc, IClusterZone zone) {
// Send cluster leave command
log.debug("instructClusterLeave: Client {} @ {} leaves cluster: CLUSTER-LEAVE", csc.getId(), csc.getClientIpAddress());
try {
@@ -293,7 +407,7 @@ public class ClusteringCoordinator extends NoopCoordinator {
}
}
- void electAggregator(ClusterZone zone) {
+ void electAggregator(IClusterZone zone) {
sendCommandToZone("CLUSTER-EXEC broker elect", zone.getNodes());
}
}
diff --git a/event-management/baguette-server/src/main/java/eu/melodic/event/baguette/server/coordinator/cluster/DefaultZoneManagementStrategy.java b/event-management/baguette-server/src/main/java/eu/melodic/event/baguette/server/coordinator/cluster/DefaultZoneManagementStrategy.java
index 2acf7e5c1585ffa3cdcdf471e2f1e0b99512eb50..06af812998b3fbe7094561fcd3d290bf1c1fe671 100644
--- a/event-management/baguette-server/src/main/java/eu/melodic/event/baguette/server/coordinator/cluster/DefaultZoneManagementStrategy.java
+++ b/event-management/baguette-server/src/main/java/eu/melodic/event/baguette/server/coordinator/cluster/DefaultZoneManagementStrategy.java
@@ -10,6 +10,7 @@
package eu.melodic.event.baguette.server.coordinator.cluster;
import eu.melodic.event.baguette.server.ClientShellCommand;
+import eu.melodic.event.baguette.server.NodeRegistryEntry;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.glassfish.jersey.internal.guava.InetAddresses;
@@ -36,47 +37,24 @@ public class DefaultZoneManagementStrategy implements IZoneManagementStrategy {
}
@Override
- public String getZoneIdFor(ClientShellCommand c) {
- String nodeAddress = c.getClientIpAddress();
- String hostname = c.getClientHostname();
- log.debug("getZoneIdFor: {}: address: {}", c.getId(), nodeAddress);
- log.debug("getZoneIdFor: {}: hostname: {}", c.getId(), hostname);
- String zoneName = null;
- if (StringUtils.isNotBlank(hostname) && !InetAddresses.isUriInetAddress(hostname)) {
- int p = hostname.indexOf(".");
- if (p>0)
- zoneName = hostname.substring(p+1);
- }
- if (StringUtils.isBlank(zoneName) && StringUtils.isNotBlank(nodeAddress)) {
- int p = nodeAddress.lastIndexOf(".");
- if (p<0) p = nodeAddress.lastIndexOf(":");
- if (p>0)
- zoneName = nodeAddress.substring(0, p);
- }
- return StringUtils.isBlank(zoneName)
- ? UUID.randomUUID().toString()
- : zoneName.replaceAll("[^A-Za-z0-9_]","_");
- }
-
- @Override
- public synchronized void nodeAdded(ClientShellCommand csc, ClusteringCoordinator coordinator, ClusterZone zone) {
+ public synchronized void nodeAdded(ClientShellCommand csc, ClusteringCoordinator coordinator, IClusterZone zone) {
// Instruct new node to join cluster
log.info("DefaultZoneManagementStrategy: Node to join cluster: client={}, zone={}", csc.getId(), zone.getId());
joinToCluster(csc, coordinator, zone);
}
- private void joinToCluster(ClientShellCommand csc, ClusteringCoordinator coordinator, ClusterZone zone) {
+ private void joinToCluster(ClientShellCommand csc, ClusteringCoordinator coordinator, IClusterZone zone) {
coordinator.sendClusterKey(csc, zone);
coordinator.instructClusterJoin(csc, zone, true);
coordinator.sleep(1000);
csc.sendCommand("CLUSTER-EXEC broker list");
- coordinator.sleep(1000);
- csc.sendCommand("CLUSTER-TEST");
+ //coordinator.sleep(1000);
+ //csc.sendCommand("CLUSTER-TEST");
}
@Override
- public synchronized void nodeRemoved(ClientShellCommand csc, ClusteringCoordinator coordinator, ClusterZone zone) {
+ public synchronized void nodeRemoved(ClientShellCommand csc, ClusteringCoordinator coordinator, IClusterZone zone) {
// Instruct node to leave cluster
log.info("DefaultZoneManagementStrategy: Node to leave cluster: client={}, zone={}", csc.getId(), zone.getId());
coordinator.instructClusterLeave(csc, zone);
diff --git a/event-management/baguette-server/src/main/java/eu/melodic/event/baguette/server/coordinator/cluster/IClusterZone.java b/event-management/baguette-server/src/main/java/eu/melodic/event/baguette/server/coordinator/cluster/IClusterZone.java
new file mode 100644
index 0000000000000000000000000000000000000000..d2edcb2c5c368b35e39438dd730e9c74e02839a5
--- /dev/null
+++ b/event-management/baguette-server/src/main/java/eu/melodic/event/baguette/server/coordinator/cluster/IClusterZone.java
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2017-2022 Institute of Communication and Computer Systems (imu.iccs.gr)
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public License, v2.0, unless
+ * Esper library is used, in which case it is subject to the terms of General Public License v2.0.
+ * If a copy of the MPL was not distributed with this file, you can obtain one at
+ * https://www.mozilla.org/en-US/MPL/2.0/
+ */
+
+package eu.melodic.event.baguette.server.coordinator.cluster;
+
+import eu.melodic.event.baguette.server.ClientShellCommand;
+import eu.melodic.event.baguette.server.NodeRegistryEntry;
+import eu.melodic.event.util.ClientConfiguration;
+import lombok.NonNull;
+
+import java.io.File;
+import java.util.List;
+import java.util.Set;
+
+public interface IClusterZone {
+ String getId();
+ void addNode(@NonNull ClientShellCommand csc);
+ void removeNode(@NonNull ClientShellCommand csc);
+ Set getNodeAddresses();
+ List getNodes();
+ ClientShellCommand getNodeByAddress(String address);
+
+ void addNodeWithoutClient(@NonNull NodeRegistryEntry entry);
+ void removeNodeWithoutClient(@NonNull NodeRegistryEntry entry);
+ Set getNodeWithoutClientAddresses();
+ List getNodesWithoutClient();
+ NodeRegistryEntry getNodeWithoutClientByAddress(String address);
+
+ ClientConfiguration getClientConfiguration();
+ ClientConfiguration sendClientConfigurationToZoneClients();
+
+ File getClusterKeystoreFile();
+ String getClusterKeystoreType();
+ String getClusterKeystorePassword();
+ String getClusterKeystoreBase64();
+}
diff --git a/event-management/baguette-server/src/main/java/eu/melodic/event/baguette/server/coordinator/cluster/IClusterZoneDetector.java b/event-management/baguette-server/src/main/java/eu/melodic/event/baguette/server/coordinator/cluster/IClusterZoneDetector.java
new file mode 100644
index 0000000000000000000000000000000000000000..5a1eaa237cdf846dcfc9182af18bc4dfcddc29e9
--- /dev/null
+++ b/event-management/baguette-server/src/main/java/eu/melodic/event/baguette/server/coordinator/cluster/IClusterZoneDetector.java
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2017-2022 Institute of Communication and Computer Systems (imu.iccs.gr)
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public License, v2.0, unless
+ * Esper library is used, in which case it is subject to the terms of General Public License v2.0.
+ * If a copy of the MPL was not distributed with this file, you can obtain one at
+ * https://www.mozilla.org/en-US/MPL/2.0/
+ */
+
+package eu.melodic.event.baguette.server.coordinator.cluster;
+
+import eu.melodic.event.baguette.server.ClientShellCommand;
+import eu.melodic.event.baguette.server.NodeRegistryEntry;
+
+import java.util.Map;
+
+public interface IClusterZoneDetector {
+ String getZoneIdFor(ClientShellCommand csc);
+ String getZoneIdFor(NodeRegistryEntry entry);
+ void setProperties(Map zoneConfig);
+}
diff --git a/event-management/baguette-server/src/main/java/eu/melodic/event/baguette/server/coordinator/cluster/IZoneManagementStrategy.java b/event-management/baguette-server/src/main/java/eu/melodic/event/baguette/server/coordinator/cluster/IZoneManagementStrategy.java
index 5b3d814fe21a25ad5bbaa7a564098decc595e05f..413548a3102fb5d12082fab976d5aebb91f411ef 100644
--- a/event-management/baguette-server/src/main/java/eu/melodic/event/baguette/server/coordinator/cluster/IZoneManagementStrategy.java
+++ b/event-management/baguette-server/src/main/java/eu/melodic/event/baguette/server/coordinator/cluster/IZoneManagementStrategy.java
@@ -10,16 +10,22 @@
package eu.melodic.event.baguette.server.coordinator.cluster;
import eu.melodic.event.baguette.server.ClientShellCommand;
+import eu.melodic.event.baguette.server.NodeRegistryEntry;
import java.util.Map;
public interface IZoneManagementStrategy {
- String getZoneIdFor(ClientShellCommand csc);
default boolean allowAlreadyPreregisteredNode(Map nodeInfo) { return true; }
+ default boolean allowAlreadyPreregisteredNode(NodeRegistryEntry entry) { return true; }
default boolean allowAlreadyRegisteredNode(ClientShellCommand csc) { return true; }
+ default boolean allowAlreadyRegisteredNode(NodeRegistryEntry entry) { return true; }
default boolean allowNotPreregisteredNode(ClientShellCommand csc) { return true; }
+ default boolean allowNotPreregisteredNode(NodeRegistryEntry entry) { return true; }
default void notPreregisteredNode(ClientShellCommand csc) { }
+ default void notPreregisteredNode(NodeRegistryEntry entry) { }
default void alreadyRegisteredNode(ClientShellCommand csc) { }
- default void nodeAdded(ClientShellCommand csc, ClusteringCoordinator coordinator, ClusterZone zoneInfo) { }
- default void nodeRemoved(ClientShellCommand csc, ClusteringCoordinator coordinator, ClusterZone zoneInfo) { }
+ default void alreadyRegisteredNode(NodeRegistryEntry entry) { }
+
+ default void nodeAdded(ClientShellCommand csc, ClusteringCoordinator coordinator, IClusterZone zoneInfo) { }
+ default void nodeRemoved(ClientShellCommand csc, ClusteringCoordinator coordinator, IClusterZone zoneInfo) { }
}
diff --git a/event-management/bin/detect.sh b/event-management/bin/detect.sh
new file mode 100644
index 0000000000000000000000000000000000000000..6c0209268eb5e4e66c7456ef2e0d6bdfd700c350
--- /dev/null
+++ b/event-management/bin/detect.sh
@@ -0,0 +1,53 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2017-2022 Institute of Communication and Computer Systems (imu.iccs.gr)
+#
+# This Source Code Form is subject to the terms of the Mozilla Public License, v2.0, unless
+# Esper library is used, in which case it is subject to the terms of General Public License v2.0.
+# If a copy of the MPL was not distributed with this file, you can obtain one at
+# https://www.mozilla.org/en-US/MPL/2.0/
+#
+
+#Required utilities: grep,uniq,tr,cat,cut,uname. For commented commands, awk and wc.
+
+BUSYBOX_PREFIX="${args[0]}"
+
+#TMP_NUM_CPUS=$($BUSYBOX_PREFIX grep 'physical id' /proc/cpuinfo | $BUSYBOX_PREFIX sort | $BUSYBOX_PREFIX uniq | $BUSYBOX_PREFIX wc -l)
+#TMP_NUM_CORES=$($BUSYBOX_PREFIX grep 'cpu cores' /proc/cpuinfo | $BUSYBOX_PREFIX sort | $BUSYBOX_PREFIX uniq | $BUSYBOX_PREFIX cut -d ' ' -f 3)
+#TMP_NUM_PROCESSORS=$($BUSYBOX_PREFIX grep -c ^processor /proc/cpuinfo)
+TMP_RAM_TOTAL_KB=$($BUSYBOX_PREFIX cat /proc/meminfo | $BUSYBOX_PREFIX grep MemTotal | $BUSYBOX_PREFIX tr -s ' ' | $BUSYBOX_PREFIX cut -d ' ' -f 2)
+TMP_RAM_AVAILABLE_KB=$($BUSYBOX_PREFIX cat /proc/meminfo | $BUSYBOX_PREFIX grep MemAvailable | $BUSYBOX_PREFIX tr -s ' ' | $BUSYBOX_PREFIX cut -d ' ' -f 2)
+TMP_RAM_FREE_KB=$($BUSYBOX_PREFIX cat /proc/meminfo | $BUSYBOX_PREFIX grep MemFree | $BUSYBOX_PREFIX tr -s ' ' | $BUSYBOX_PREFIX cut -d ' ' -f 2)
+TMP_DISK_TOTAL_KB=$($BUSYBOX_PREFIX df -k | $BUSYBOX_PREFIX grep /$ | $BUSYBOX_PREFIX tr -s ' ' | $BUSYBOX_PREFIX cut -d ' ' -f 2)
+TMP_DISK_FREE_KB=$($BUSYBOX_PREFIX df -k | $BUSYBOX_PREFIX grep /$ | $BUSYBOX_PREFIX tr -s ' ' | $BUSYBOX_PREFIX cut -d ' ' -f 4)
+TMP_ARCHITECTURE=$($BUSYBOX_PREFIX uname -m) #x86_64 GNU/Linux indicates that you've a 64bit Linux kernel running. If you see i386/i486/i586/i686 it is a 32-bit architecture. armv7l, armv8 etc. signal a 32-bit arm version of the library while aarch64 indicates a 64-bit arm version of the library
+TMP_KERNEL=$($BUSYBOX_PREFIX uname -s)
+TMP_KERNEL_RELEASE=$($BUSYBOX_PREFIX uname -r)
+
+#NUM_CORES_ALT=$BUSYBOX_PREFIX grep ^cpu\\scores /proc/cpuinfo | $BUSYBOX_PREFIX uniq | $BUSYBOX_PREFIX awk '{print $4}'
+#CAN_RUN_x64 = grep flags /proc/cpuinfo | grep " lm" | wc | tr -s ' ' | cut -d ' ' -f 2 #1 means that it can run x64, 0 that it can't, although that possibly also depends on the kernel installed
+
+TMP_NUM_CPUS=$(lscpu -p | grep -v '#' | cut -d ',' -f 3 | sort -u | wc -l)
+TMP_NUM_CORES=$(lscpu -p | grep -v '#' | cut -d ',' -f 2 | sort -u | wc -l)
+TMP_NUM_PROCESSORS=$(lscpu -p | grep -v '#' | cut -d ',' -f 1 | sort -u | wc -l)
+TMP_RAM_USED_KB=$(echo $TMP_RAM_TOTAL_KB $TMP_RAM_FREE_KB | awk '{print $1 - $2}')
+TMP_RAM_UTILIZATION=$(echo $TMP_RAM_USED_KB $TMP_RAM_TOTAL_KB | awk '{print 100 * $1 / $2}')
+TMP_DISK_USED_KB=$(echo $TMP_DISK_TOTAL_KB $TMP_DISK_FREE_KB | awk '{print $1 - $2}')
+TMP_DISK_UTILIZATION=$(echo $TMP_DISK_USED_KB $TMP_DISK_TOTAL_KB | awk '{print 100 * $1 / $2}')
+
+
+echo CPU_SOCKETS=$TMP_NUM_CPUS
+echo CPU_CORES=$TMP_NUM_CORES
+echo CPU_PROCESSORS=$TMP_NUM_PROCESSORS
+echo RAM_TOTAL_KB=$TMP_RAM_TOTAL_KB
+echo RAM_AVAILABLE_KB=$TMP_RAM_AVAILABLE_KB
+echo RAM_FREE_KB=$TMP_RAM_FREE_KB
+echo RAM_USED_KB=$TMP_RAM_USED_KB
+echo RAM_UTILIZATION=$TMP_RAM_UTILIZATION
+echo DISK_TOTAL_KB=$TMP_DISK_TOTAL_KB
+echo DISK_FREE_KB=$TMP_DISK_FREE_KB
+echo DISK_USED_KB=$TMP_DISK_USED_KB
+echo DISK_UTILIZATION=$TMP_DISK_UTILIZATION
+echo OS_ARCHITECTURE=$TMP_ARCHITECTURE
+echo OS_KERNEL=$TMP_KERNEL
+echo OS_KERNEL_RELEASE=$TMP_KERNEL_RELEASE
diff --git a/event-management/bin/initialize-MELODIC-keystores.sh b/event-management/bin/initialize-MELODIC-keystores.sh
index dca266bbadd5c5007eefae23caad582b0d738011..7e92769ab7416d9677b656c61ce9e316509783fc 100755
--- a/event-management/bin/initialize-MELODIC-keystores.sh
+++ b/event-management/bin/initialize-MELODIC-keystores.sh
@@ -23,12 +23,24 @@ echo Resolving Public IP addresses...
PUBLIC_IP=`curl https://diagnostic.opendns.com/myip 2> /dev/null`
#PUBLIC_IP=`curl http://checkip.amazonaws.com 2> /dev/null`
+# or get IP address with 'hostname'
+if [[ "${PUBLIC_IP}" == "" ]]; then
+ PUBLIC_IP=`hostname --all-ip-addresses`
+ echo "PUBLIC_IP (hostname -I): $PUBLIC_IP"
+fi
+
# or set IP address manually
-#PUBLIC_IP='1.2.3.4'
+if [[ "${PUBLIC_IP}" == "" ]]; then
+ PUBLIC_IP=1.2.3.4
+ echo "PUBLIC_IP (manually): $PUBLIC_IP"
+fi
+# or use loopback
if [[ "${PUBLIC_IP}" == "" ]]; then
PUBLIC_IP=127.0.0.1
+ echo "PUBLIC_IP (loopback): $PUBLIC_IP"
fi
+PUBLIC_IP=`echo ${PUBLIC_IP} | sed 's/ *$//g'`
echo PUBLIC_IP=${PUBLIC_IP}
@@ -68,7 +80,17 @@ KEY_SIZE=2048
START_DATE=-1d
VALIDITY=3650
DN_FMT="CN=%s,OU=Information Management Unit (IMU),O=Institute of Communication and Computer Systems (ICCS),L=Athens,ST=Attika,C=GR"
-EXT_SAN_FMT="SAN=dns:%s,dns:localhost,ip:127.0.0.1,ip:${PUBLIC_IP}"
+if [[ "${PUBLIC_IP}" != "" ]]; then
+ PUBLIC_IP_FOR_SAN=${PUBLIC_IP// /,ip:}
+ PUBLIC_IP_FOR_SAN="ip:${PUBLIC_IP_FOR_SAN}"
+fi
+if [[ "${EXTRA_IPS_FOR_SAN}" != "" ]]; then
+ EXTRA_IPS_FOR_SAN=",${EXTRA_IPS_FOR_SAN}"
+ EXTRA_IPS_FOR_SAN=`echo ${EXTRA_IPS_FOR_SAN} | sed -e 's/,/,ip:/g'`
+ EXTRA_IPS_FOR_SAN=`echo ${EXTRA_IPS_FOR_SAN} | sed -e 's/[ \t]//g'`
+fi
+EXT_SAN_FMT="SAN=dns:%s,dns:localhost,ip:127.0.0.1,${PUBLIC_IP_FOR_SAN}${EXTRA_IPS_FOR_SAN}"
+
KEYSTORE_TYPE=PKCS12
KEYSTORE_PASS=melodic
diff --git a/event-management/bin/run.bat b/event-management/bin/run.bat
index d156520fad98eabfc4f85a72cf894f8fe479135f..1deef354d906ff27b5c0e5f0742ec055dff88bc7 100644
--- a/event-management/bin/run.bat
+++ b/event-management/bin/run.bat
@@ -51,7 +51,7 @@ if "%LOG_FILE%"=="" (
)
:: Waiting CDO to come up...
-if exist %MELODIC_CONFIG_DIR%\wait-for-cdo.bat (
+IF NOT DEFINED EMS_SKIP_WAIT_CDO IF EXIST %MELODIC_CONFIG_DIR%\wait-for-cdo.bat (
echo "Waiting CDO server to start..."
%MELODIC_CONFIG_DIR%\wait-for-cdo.bat
)
@@ -66,10 +66,10 @@ IF NOT DEFINED RESTART_EXIT_CODE set RESTART_EXIT_CODE=99
:_restart_ems
rem Use when Esper is packaged in control-service.jar
-rem java %JAVA_OPTS% -Djasypt.encryptor.password=%JASYPT_PASSWORD% -Duser.timezone=Europe/Warsaw -Djava.security.egd=file:/dev/urandom -jar %JARS_DIR%\control-service.jar --logging.config=file:%LOG_CONFIG_FILE%
+rem java %JAVA_OPTS% -Djasypt.encryptor.password=%JASYPT_PASSWORD% -Duser.timezone=Europe/Athens -Djava.security.egd=file:/dev/urandom -jar %JARS_DIR%\control-service.jar --logging.config=file:%LOG_CONFIG_FILE%
rem Use when Esper is NOT packaged in control-service.jar
-java %JAVA_OPTS% -Djasypt.encryptor.password=%JASYPT_PASSWORD% -Duser.timezone=Europe/Warsaw -Djava.security.egd=file:/dev/urandom -cp %JARS_DIR%\control-service.jar -Dloader.path=%JARS_DIR%\esper-7.1.0.jar org.springframework.boot.loader.PropertiesLauncher -nolog --logging.config=file:%LOG_CONFIG_FILE% %*
+java %JAVA_OPTS% -Djasypt.encryptor.password=%JASYPT_PASSWORD% -Djava.security.egd=file:/dev/urandom -cp %JARS_DIR%\control-service.jar -Dloader.path=%JARS_DIR%\esper-7.1.0.jar org.springframework.boot.loader.PropertiesLauncher -nolog --logging.config=file:%LOG_CONFIG_FILE% %*
if errorlevel %RESTART_EXIT_CODE% (
echo Restarting EMS server...
diff --git a/event-management/bin/run.sh b/event-management/bin/run.sh
index 7aef86f03a72baca86ba9eb3273fdb40507b3cc9..939001435b95ce675d8c32710c8e221703eb2f9b 100755
--- a/event-management/bin/run.sh
+++ b/event-management/bin/run.sh
@@ -30,7 +30,7 @@ if [[ -z $PUBLIC_DIR ]]; then PUBLIC_DIR=$BASEDIR/public_resources; export PUBLI
# Initialize keystores and certificate
# Uncomment next line to generate BrokerCEP keystore, truststore and certificate before EMS server launch
-# Modifying 'initialize-keystores.bat' script you can customize the certificate generation
+# Modifying 'initialize-keystores.sh' script you can customize the certificate generation
#./bin/initialize-keystores.sh
# Read JASYPT password (decrypts encrypted configuration settings)
@@ -54,7 +54,7 @@ if [[ -z "$LOG_FILE" ]]; then
fi
# Waiting CDO to come up...
-if [[ -f $MELODIC_CONFIG_DIR/wait-for-cdo.sh ]]; then
+if [[ -z ${EMS_SKIP_WAIT_CDO+x} ]] && [[ -f $MELODIC_CONFIG_DIR/wait-for-cdo.sh ]]; then
echo "Waiting CDO server to start..."
$MELODIC_CONFIG_DIR/wait-for-cdo.sh
fi
@@ -70,10 +70,10 @@ if [[ -z $RESTART_EXIT_CODE ]]; then RESTART_EXIT_CODE=99; export RESTART_EXIT_C
retCode=$RESTART_EXIT_CODE
while :; do
# Use when Esper is packaged in control-service.jar
- # java $JAVA_OPTS -Djasypt.encryptor.password=$JASYPT_PASSWORD -Duser.timezone=Europe/Warsaw -Djava.security.egd=file:/dev/urandom -jar $JARS_DIR/control-service/target/control-service.jar --logging.config=file:$LOG_CONFIG_FILE
+ # java $JAVA_OPTS -Djasypt.encryptor.password=$JASYPT_PASSWORD -Duser.timezone=Europe/Athens -Djava.security.egd=file:/dev/urandom -jar $JARS_DIR/control-service/target/control-service.jar --logging.config=file:$LOG_CONFIG_FILE
# Use when Esper is NOT packaged in control-service.jar
- java $JAVA_OPTS -Djasypt.encryptor.password=$JASYPT_PASSWORD -Duser.timezone=Europe/Warsaw -Djava.security.egd=file:/dev/urandom -cp ${JARS_DIR}/control-service.jar -Dloader.path=${JARS_DIR}/esper-7.1.0.jar org.springframework.boot.loader.PropertiesLauncher --logging.config=file:$LOG_CONFIG_FILE $*
+ java $JAVA_OPTS -Djasypt.encryptor.password=$JASYPT_PASSWORD -Djava.security.egd=file:/dev/urandom -cp ${JARS_DIR}/control-service.jar -Dloader.path=${JARS_DIR}/esper-7.1.0.jar org.springframework.boot.loader.PropertiesLauncher --logging.config=file:$LOG_CONFIG_FILE $*
retCode=$?
if [[ $retCode -eq $RESTART_EXIT_CODE ]]; then echo "Restarting EMS server..."; else break; fi
diff --git a/event-management/bin/sysmon.sh b/event-management/bin/sysmon.sh
new file mode 100644
index 0000000000000000000000000000000000000000..9d15d4a640c0e657f828ea13b336098e1585ebf6
--- /dev/null
+++ b/event-management/bin/sysmon.sh
@@ -0,0 +1,44 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2017-2022 Institute of Communication and Computer Systems (imu.iccs.gr)
+#
+# This Source Code Form is subject to the terms of the Mozilla Public License, v2.0, unless
+# Esper library is used, in which case it is subject to the terms of General Public License v2.0.
+# If a copy of the MPL was not distributed with this file, you can obtain one at
+# https://www.mozilla.org/en-US/MPL/2.0/
+#
+
+# Report CPU usage (%)
+echo CPU: `top -b -n1 | grep "Cpu(s)" | awk '{print $2 + $4}'`
+
+# Report Memory usage (%)
+FREE_DATA=`free -m | grep Mem`
+CURRENT=`echo $FREE_DATA | cut -f3 -d' '`
+TOTAL=`echo $FREE_DATA | cut -f2 -d' '`
+echo RAM: $(echo "$CURRENT $TOTAL" | awk '{print 100 * $1 / $2}' )
+
+# Report Disk usage (%) -- '/' partition only
+#echo DISK: `df -lh | awk '{if ($6 == "/") { print $5 }}' | head -1 | cut -d'%' -f1`
+echo DISK: `df -lh | awk '{if ($6 == "/") { print 100 * $3 / $2 }}'`
+
+# Report Network RX/TX usage (B/s)
+ARR=($(ls -1 /sys/class/net/ | grep eth))
+
+function measure_ifs() {
+ local SUMRX=0
+ local SUMTX=0
+ for IF in "${ARR[@]}"; do
+ let SUMRX=$SUMRX+`cat /sys/class/net/${IF}/statistics/rx_bytes`
+ let SUMTX=$SUMTX+`cat /sys/class/net/${IF}/statistics/tx_bytes`
+ done
+ echo $SUMRX $SUMTX
+}
+
+START=($(measure_ifs))
+sleep 1
+END=($(measure_ifs))
+
+RX=$(( END[0] - START[0] ))
+TX=$(( END[1] - START[1] ))
+echo RX: $RX
+echo TX: $TX
diff --git a/event-management/broker-cep/src/main/java/eu/melodic/event/brokercep/BrokerCepService.java b/event-management/broker-cep/src/main/java/eu/melodic/event/brokercep/BrokerCepService.java
index b77d45d600f73f062db4c100d7ad0177f224e4e6..4466b0116ad24ea59a65eec268a969c647158c69 100644
--- a/event-management/broker-cep/src/main/java/eu/melodic/event/brokercep/BrokerCepService.java
+++ b/event-management/broker-cep/src/main/java/eu/melodic/event/brokercep/BrokerCepService.java
@@ -203,7 +203,7 @@ public class BrokerCepService {
public synchronized void publishEvent(String connectionString, String destinationName, Map eventMap) throws JMSException {
if (properties.isBypassLocalBroker() && _publishLocalEvent(connectionString, destinationName, new EventMap(eventMap)))
return;
- _publishEvent(connectionString, destinationName, new EventMap(eventMap));
+ _publishEvent(connectionString, destinationName, EventMap.toEventMap(eventMap));
}
public synchronized void publishEvent(String connectionString, String username, String password, String destinationName, Map eventMap) throws JMSException {
@@ -308,12 +308,15 @@ public class BrokerCepService {
MessageProducer producer = session.createProducer(destination);
producer.setDeliveryMode(javax.jms.DeliveryMode.NON_PERSISTENT);
- // Create a messages
+ // Create a message
//ObjectMessage message = session.createObjectMessage(event);
String payload = gson.toJson(event);
log.trace("BrokerCepService.publishEvent(): Message payload: topic={}, payload={}", destination, payload);
TextMessage message = session.createTextMessage(payload);
+ // Set message properties
+ addEventPropertiesToMessage(event, message);
+
// Tell the producer to send the message
long hash = message.hashCode();
//log.info("BrokerCepService.publishEvent(): Sending message: connection={}, username={}, destination={}, hash={}, payload={}", connectionString, username, destinationName, hash, event);
@@ -323,6 +326,22 @@ public class BrokerCepService {
log.debug("BrokerCepService.publishEvent(): Message sent: destination={}, hash={}, payload={}", destinationName, hash, event);
}
+ private void addEventPropertiesToMessage(Serializable event, Message message) {
+ if (event instanceof EventMap) {
+ Map eventProperties = ((EventMap) event).getEventProperties();
+ if (eventProperties!=null) {
+ eventProperties.forEach((pName,pValue)->{
+ try {
+ message.setStringProperty(pName, pValue!=null ? pValue.toString() : null);
+ } catch (JMSException e) {
+ log.warn("BrokerCepService.publishEvent(): Exception while setting event property. Skipping it: name={}, value={}", pName, pValue);
+ log.debug("BrokerCepService.publishEvent(): Exception while setting event property. Skipping it: name={}, value={}, EXCEPTION:\n", pName, pValue, e);
+ }
+ });
+ }
+ }
+ }
+
private String getAddressFromBrokerUrl(String url) {
return StringUtils.substringBetween(url, "://",":");
}
@@ -401,7 +420,7 @@ public class BrokerCepService {
}
public Map getBrokerCepStatistics() {
- Map bcepStats = new HashMap<>();
+ Map bcepStats = new HashMap<>();
bcepStats.put("count-event-local-publish-success", BrokerCepStatementSubscriber.getLocalPublishSuccessCounter());
bcepStats.put("count-event-local-publish-failure", BrokerCepStatementSubscriber.getLocalPublishFailureCounter());
bcepStats.put("count-event-forwards-success", BrokerCepStatementSubscriber.getForwardSuccessCounter());
@@ -412,9 +431,7 @@ public class BrokerCepService {
bcepStats.put("count-total-events-other", BrokerCepConsumer.getOtherEventCounter());
bcepStats.put("count-total-events-failures", BrokerCepConsumer.getEventFailuresCounter());
- Map statsMap = new HashMap<>();
- statsMap.put("broker-cep", bcepStats);
- return statsMap;
+ return bcepStats;
}
public void clearBrokerCepStatistics() {
diff --git a/event-management/broker-cep/src/main/java/eu/melodic/event/brokercep/BrokerCepStatementSubscriber.java b/event-management/broker-cep/src/main/java/eu/melodic/event/brokercep/BrokerCepStatementSubscriber.java
index 395f633decd43e6919a6586b5dabf7e8aa0a501e..651836e5c9cec0642f16e87827b785069725fb18 100644
--- a/event-management/broker-cep/src/main/java/eu/melodic/event/brokercep/BrokerCepStatementSubscriber.java
+++ b/event-management/broker-cep/src/main/java/eu/melodic/event/brokercep/BrokerCepStatementSubscriber.java
@@ -50,19 +50,20 @@ public class BrokerCepStatementSubscriber implements StatementSubscriber {
log.info("- New event received: subscriber={}, topic={}, payload={}", name, topic, eventMap);
String localBrokerUrl = brokerCep.getBrokerCepProperties().getBrokerUrlForConsumer();
String username = brokerCep.getBrokerUsername();
- String password = passwordUtil.getPasswordEncoder().encode(brokerCep.getBrokerPassword());
+ String password = brokerCep.getBrokerPassword();
+ String passwordEncoded = passwordUtil.encodePassword(password);
try {
// Publish new event to Local Broker topic
log.trace("- Publishing event to local broker: subscriber={}, local-broker={}, username={}, password={}, topic={}, payload={}",
- name, localBrokerUrl, username, password, topic, eventMap);
+ name, localBrokerUrl, username, passwordEncoded, topic, eventMap);
brokerCep.publishEvent(localBrokerUrl, username, password, topic, eventMap);
log.debug("- Event published to local broker: subscriber={}, local-broker={}, username={}, password={}, topic={}, payload={}",
- name, localBrokerUrl, username, password, topic, eventMap);
+ name, localBrokerUrl, username, passwordEncoded, topic, eventMap);
countLocalPublish(true);
} catch (Exception ex) {
log.error("- New event: ERROR while publishing to local broker: subscriber={}, local-broker={}, username={}, password={}, topic={}, exception=",
- name, localBrokerUrl, username, password, topic, ex);
+ name, localBrokerUrl, username, passwordEncoded, topic, ex);
countLocalPublish(false);
}
}
diff --git a/event-management/broker-cep/src/main/java/eu/melodic/event/brokercep/broker/BrokerAdvisoryWatcher.java b/event-management/broker-cep/src/main/java/eu/melodic/event/brokercep/broker/BrokerAdvisoryWatcher.java
index b7a5d3a0d69ad0bdc1dac928a7cd68f3e5f2acc0..faf6cd3586bba57ab5ee3e0cc4fba17b05816c25 100644
--- a/event-management/broker-cep/src/main/java/eu/melodic/event/brokercep/broker/BrokerAdvisoryWatcher.java
+++ b/event-management/broker-cep/src/main/java/eu/melodic/event/brokercep/broker/BrokerAdvisoryWatcher.java
@@ -20,9 +20,11 @@ import org.apache.activemq.command.DestinationInfo;
import org.springframework.beans.factory.InitializingBean;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
+import org.springframework.scheduling.TaskScheduler;
import org.springframework.stereotype.Service;
import javax.jms.*;
+import java.time.Instant;
@ConditionalOnProperty(name="brokercep.enable-advisory-watcher", matchIfMissing = true)
@Service
@@ -36,12 +38,17 @@ public class BrokerAdvisoryWatcher implements MessageListener, InitializingBean
private BrokerCepService brokerCerService;
@Autowired
private PasswordUtil passwordUtil;
+ @Autowired
+ private TaskScheduler taskScheduler;
+
+ private final int initRetryDelay = 5; // in seconds
private Connection connection;
private Session session;
@Override
public void afterPropertiesSet() {
+ log.debug("BrokerAdvisoryWatcher: afterPropertiesSet: BrokerCepProperties: {}", brokerCerService.getBrokerCepProperties());
initialize();
}
@@ -72,7 +79,9 @@ public class BrokerAdvisoryWatcher implements MessageListener, InitializingBean
consumer.setMessageListener( this );
log.debug("BrokerAdvisoryWatcher.init(): Initializing instance... done");
} catch (Exception ex) {
- log.error("BrokerAdvisoryWatcher.init(): EXCEPTION: ", ex);
+ log.error("BrokerAdvisoryWatcher.init(): EXCEPTION: while retry in {} seconds:", initRetryDelay, ex);
+ final BrokerAdvisoryWatcher _this = this;
+ taskScheduler.schedule(_this::initialize, Instant.now().plusSeconds(initRetryDelay));
}
}
diff --git a/event-management/broker-cep/src/main/java/eu/melodic/event/brokercep/broker/BrokerConfig.java b/event-management/broker-cep/src/main/java/eu/melodic/event/brokercep/broker/BrokerConfig.java
index 9f12991e57e6baecbbe7c5d8bc400a99c15a490a..4b10425f12a68eaabaa2678886818284542288ed 100644
--- a/event-management/broker-cep/src/main/java/eu/melodic/event/brokercep/broker/BrokerConfig.java
+++ b/event-management/broker-cep/src/main/java/eu/melodic/event/brokercep/broker/BrokerConfig.java
@@ -13,6 +13,7 @@ import eu.melodic.event.brokercep.broker.interceptor.AbstractMessageInterceptor;
import eu.melodic.event.brokercep.properties.BrokerCepProperties;
import eu.melodic.event.util.KeystoreUtil;
import eu.melodic.event.util.PasswordUtil;
+import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.activemq.ActiveMQConnectionFactory;
import org.apache.activemq.ActiveMQSslConnectionFactory;
@@ -29,7 +30,6 @@ import org.apache.activemq.usage.SystemUsage;
import org.apache.commons.lang3.RandomStringUtils;
import org.apache.commons.lang3.StringUtils;
import org.springframework.beans.factory.InitializingBean;
-import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.ApplicationContext;
import org.springframework.context.annotation.Bean;
@@ -51,10 +51,11 @@ import java.util.stream.Collectors;
//import org.apache.activemq.security.JaasAuthenticationPlugin;
+@Slf4j
@Service
-@Configuration
@EnableJms
-@Slf4j
+@Configuration
+@RequiredArgsConstructor
public class BrokerConfig implements InitializingBean {
private final static int LOCAL_ADMIN_INDEX = 0;
@@ -64,12 +65,9 @@ public class BrokerConfig implements InitializingBean {
private final static int USERNAME_RANDOM_PART_LENGTH = 10;
private final static int PASSWORD_LENGTH = 20;
- @Autowired
- private BrokerCepProperties properties;
- @Autowired
- private PasswordUtil passwordUtil;
- @Autowired
- private ApplicationContext applicationContext;
+ private final BrokerCepProperties properties;
+ private final PasswordUtil passwordUtil;
+ private final ApplicationContext applicationContext;
private SimpleAuthenticationPlugin brokerAuthenticationPlugin;
private SimpleBrokerAuthorizationPlugin brokerAuthorizationPlugin;
@@ -133,10 +131,8 @@ public class BrokerConfig implements InitializingBean {
brokerAuthenticationPlugin = sap;
if (log.isDebugEnabled()) {
- log.debug("BrokerConfig._initializeSecurity(): Initialized broker authentication plugin: anonymous-access={}, user-credentials={}",
- sap.isAnonymousAccessAllowed(),
- sap.getUserPasswords().entrySet().stream()
- .collect(Collectors.toMap(Map.Entry::getKey, e -> passwordUtil.encodePassword(e.getValue())))
+ log.debug("BrokerConfig._initializeSecurity(): Initialized broker authentication plugin: anonymous-access={}, user-list={}",
+ sap.isAnonymousAccessAllowed(), sap.getUserPasswords().keySet()
);
}
}
@@ -170,9 +166,11 @@ public class BrokerConfig implements InitializingBean {
log.trace("BrokerConfig.initializeKeyAndCert(): Retrieving certificate for Broker-SSL...");
this.brokerCert = KeystoreUtil
.getKeystore(properties.getSsl().getKeystoreFile(), properties.getSsl().getKeystoreType(), properties.getSsl().getKeystorePassword())
+ .passwordUtil(passwordUtil)
.getEntryCertificateAsPEM(properties.getSsl().getKeyEntryNameValue());
log.trace("BrokerConfig.initializeKeyAndCert(): Retrieving certificate for Broker-SSL: file={}, type={}, password={}, alias={}, cert=\n{}",
- properties.getSsl().getKeystoreFile(), properties.getSsl().getKeystoreType(), properties.getSsl().getKeystorePassword(),
+ properties.getSsl().getKeystoreFile(), properties.getSsl().getKeystoreType(),
+ passwordUtil.encodePassword(properties.getSsl().getKeystorePassword()),
properties.getSsl().getKeyEntryNameValue(), this.brokerCert);
log.info("BrokerConfig.initializeKeyAndCert(): Initializing keystore, truststore and certificate for Broker-SSL... done");
}
@@ -349,6 +347,11 @@ public class BrokerConfig implements InitializingBean {
final MessageInterceptorRegistry registry = MessageInterceptorRegistry.getInstance().get(brokerService); // or ...get(BrokerRegistry.getInstance().findFirst());
log.trace("BrokerConfig: Message interceptor registry: {}", registry);
+ if (properties.getMessageInterceptors()==null) {
+ log.warn("BrokerConfig: No message interceptors configured");
+ return;
+ }
+
log.info("BrokerConfig: Message interceptors initializing...");
List interceptorSpecs = properties.getMessageInterceptors()
.stream()
diff --git a/event-management/broker-cep/src/main/java/eu/melodic/event/brokercep/broker/interceptor/SourceAddressMessageUpdateInterceptor.java b/event-management/broker-cep/src/main/java/eu/melodic/event/brokercep/broker/interceptor/SourceAddressMessageUpdateInterceptor.java
index 7e8a3625d6bb44b0bce080050d17b7961057f38e..2642a006d92b2a532ff566e70fb53ba5a720802d 100644
--- a/event-management/broker-cep/src/main/java/eu/melodic/event/brokercep/broker/interceptor/SourceAddressMessageUpdateInterceptor.java
+++ b/event-management/broker-cep/src/main/java/eu/melodic/event/brokercep/broker/interceptor/SourceAddressMessageUpdateInterceptor.java
@@ -9,6 +9,7 @@
package eu.melodic.event.brokercep.broker.interceptor;
+import eu.melodic.event.util.EmsConstant;
import eu.melodic.event.util.NetUtil;
import lombok.extern.slf4j.Slf4j;
import org.apache.activemq.broker.Connection;
@@ -17,12 +18,18 @@ import org.apache.commons.lang3.StringUtils;
@Slf4j
public class SourceAddressMessageUpdateInterceptor extends AbstractMessageInterceptor {
- private final String sourceAddressPropertyName = "producer-host";
+ private final String sourceAddressPropertyName = EmsConstant.EVENT_PROPERTY_SOURCE_ADDRESS;
@Override
public void intercept(Message message) {
log.trace("SourceAddressMessageUpdateInterceptor: Message: {}", message);
try {
+ Object sourceProperty = message.getProperty(sourceAddressPropertyName);
+ if (sourceProperty!=null && StringUtils.isNotBlank(sourceProperty.toString())) {
+ log.trace("SourceAddressMessageUpdateInterceptor: Message has Producer Host property set: {}", sourceProperty);
+ return;
+ }
+
// get remote address from connection
Connection conn = getProducerBrokerExchange().getConnectionContext().getConnection();
log.trace("SourceAddressMessageUpdateInterceptor: Connection: {}", conn);
diff --git a/event-management/broker-cep/src/main/java/eu/melodic/event/brokercep/event/EventMap.java b/event-management/broker-cep/src/main/java/eu/melodic/event/brokercep/event/EventMap.java
index 0394e337c3cffa2dc5945a9493a5c792a115b959..6178295f3a2e06857e6fa4a851d80cabb719dfac 100644
--- a/event-management/broker-cep/src/main/java/eu/melodic/event/brokercep/event/EventMap.java
+++ b/event-management/broker-cep/src/main/java/eu/melodic/event/brokercep/event/EventMap.java
@@ -12,6 +12,7 @@ package eu.melodic.event.brokercep.event;
import com.google.gson.Gson;
import lombok.Data;
import lombok.EqualsAndHashCode;
+import lombok.NoArgsConstructor;
import lombok.NonNull;
import lombok.extern.slf4j.Slf4j;
@@ -22,6 +23,7 @@ import java.util.stream.Collectors;
@Data
@Slf4j
+@NoArgsConstructor
@EqualsAndHashCode(callSuper = false)
public class EventMap extends LinkedHashMap implements Serializable {
@@ -65,16 +67,34 @@ public class EventMap extends LinkedHashMap implements Serializa
}
+ // Event properties
+ private Map eventProperties;
+
+ public Object getEventProperty(@NonNull String name) {
+ return eventProperties.get(name);
+ }
+
+ public synchronized Object setEventProperty(@NonNull String name, Object value) {
+ if (eventProperties ==null) eventProperties = new LinkedHashMap<>();
+ return eventProperties.put(name, value);
+ }
+
// Constructors
- public EventMap() {
+ /*public EventMap() {
super();
- }
+ put(TIMESTAMP_NAME, System.currentTimeMillis());
+ }*/
public EventMap(Map map) {
map.forEach((k, v) -> {
log.trace("EventMap.: key={}, value={}", k, v);
this.put(k, v);
});
+ if (map instanceof EventMap) {
+ Map properties = ((EventMap) map).getEventProperties();
+ if (properties!=null && properties.size()>0)
+ setEventProperties(new LinkedHashMap<>(properties));
+ }
}
public EventMap(double metricValue) {
@@ -94,6 +114,13 @@ public class EventMap extends LinkedHashMap implements Serializa
}
+ // Convert Object to EventMap
+ public static EventMap toEventMap(@NonNull Object o) {
+ if (o instanceof EventMap) return (EventMap) o;
+ if (o instanceof Map) return new EventMap((Map) o);
+ return parseEventMap(o.toString());
+ }
+
// Parse from string
public static EventMap parseEventMap(@NonNull String s) {
/*if (s==null) return null;
diff --git a/event-management/broker-cep/src/main/java/eu/melodic/event/brokercep/properties/BrokerCepProperties.java b/event-management/broker-cep/src/main/java/eu/melodic/event/brokercep/properties/BrokerCepProperties.java
index d8553c774a2efc7d7d288938c1a4abe3bcb2bada..667fe90c13ce404418927da18029d5429c8e1bcd 100644
--- a/event-management/broker-cep/src/main/java/eu/melodic/event/brokercep/properties/BrokerCepProperties.java
+++ b/event-management/broker-cep/src/main/java/eu/melodic/event/brokercep/properties/BrokerCepProperties.java
@@ -61,6 +61,7 @@ public class BrokerCepProperties {
@Value("${authentication-enabled:false}")
private boolean authenticationEnabled;
+ @ToString.Exclude
@Value("${additional-broker-credentials:}")
private String additionalBrokerCredentials;
@Value("${authorization-enabled:false}")
@@ -108,6 +109,7 @@ public class BrokerCepProperties {
public static class ForwardDestinationConfig {
private String connectionString;
private String username;
+ @ToString.Exclude
private String password;
}
}
diff --git a/event-management/broker-client/pom.xml b/event-management/broker-client/pom.xml
index 02a140f8547b9a9b080fafc97b8c67098a042405..69dfb8229b470bb30a550dcc78cc256f556c404b 100644
--- a/event-management/broker-client/pom.xml
+++ b/event-management/broker-client/pom.xml
@@ -21,6 +21,13 @@
https://gitlab.ow2.org/melodic/melodic-upperware/-/tree/master/event-management/broker-client
+
+
+ eu.melodic.event
+ util
+ ${project.version}
+
+
org.springframework.boot
diff --git a/event-management/broker-client/src/main/java/eu/melodic/event/brokerclient/BrokerClient.java b/event-management/broker-client/src/main/java/eu/melodic/event/brokerclient/BrokerClient.java
index 1aa5bf5756c1cd0ccea2b08c5dcb9f4e5fe45e35..28a1f408d1778bdf4dbdee9d76a7670882c1ac10 100644
--- a/event-management/broker-client/src/main/java/eu/melodic/event/brokerclient/BrokerClient.java
+++ b/event-management/broker-client/src/main/java/eu/melodic/event/brokerclient/BrokerClient.java
@@ -13,6 +13,7 @@ import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import eu.melodic.event.brokerclient.event.EventMap;
import eu.melodic.event.brokerclient.properties.BrokerClientProperties;
+import eu.melodic.event.util.PasswordUtil;
import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;
import org.apache.activemq.ActiveMQConnection;
@@ -41,6 +42,8 @@ public class BrokerClient {
@Autowired
private BrokerClientProperties properties;
+ @Autowired
+ private PasswordUtil passwordUtil;
private Connection connection;
private Session session;
private HashMap listeners = new HashMap<>();
@@ -57,6 +60,20 @@ public class BrokerClient {
properties = new BrokerClientProperties(p);
}
+ public BrokerClient(PasswordUtil pu) {
+ passwordUtil = pu;
+ }
+
+ public BrokerClient(BrokerClientProperties bcp, PasswordUtil pu) {
+ properties = bcp;
+ passwordUtil = pu;
+ }
+
+ public BrokerClient(Properties p, PasswordUtil pu) {
+ properties = new BrokerClientProperties(p);
+ passwordUtil = pu;
+ }
+
// ------------------------------------------------------------------------
public static BrokerClient newClient() throws java.io.IOException, JMSException {
@@ -87,7 +104,7 @@ public class BrokerClient {
}
// initialize broker client
- BrokerClient client = new BrokerClient(p);
+ BrokerClient client = new BrokerClient(p, new PasswordUtil());
log.info("BrokerClient: Default Configuration:\n{}", client.properties);
return client;
@@ -179,13 +196,43 @@ public class BrokerClient {
_publishEvent(connectionString, destinationName, messageType, eventContents, propertiesMap);
}
- @SneakyThrows
+ public synchronized void publishEventWithCredentials(String connectionString, String username, String password, String destinationName, Map eventMap) throws JMSException {
+ _publishEvent(connectionString, username, password, destinationName, MESSAGE_TYPE.TEXT, new EventMap(eventMap), null);
+ }
+
+ public synchronized void publishEventWithCredentials(String connectionString, String username, String password, String destinationName, Map eventMap, Map propertiesMap) throws JMSException {
+ _publishEvent(connectionString, username, password, destinationName, MESSAGE_TYPE.TEXT, new EventMap(eventMap), propertiesMap);
+ }
+
+ public synchronized void publishEventWithCredentials(String connectionString, String username, String password, String destinationName, String eventContents) throws JMSException {
+ _publishEvent(connectionString, username, password, destinationName, MESSAGE_TYPE.TEXT, eventContents, null);
+ }
+
+ public synchronized void publishEventWithCredentials(String connectionString, String username, String password, String destinationName, String eventContents, Map propertiesMap) throws JMSException {
+ _publishEvent(connectionString, username, password, destinationName, MESSAGE_TYPE.TEXT, eventContents, propertiesMap);
+ }
+
+ public synchronized void publishEventWithCredentials(String connectionString, String username, String password, String destinationName, String type, Serializable eventContents, Map propertiesMap) throws JMSException {
+ MESSAGE_TYPE messageType = StringUtils.isNotBlank(type)
+ ? MESSAGE_TYPE.valueOf(type.trim().toUpperCase())
+ : MESSAGE_TYPE.TEXT;
+ _publishEvent(connectionString, username, password, destinationName, messageType, eventContents, propertiesMap);
+ }
+
protected synchronized void _publishEvent(String connectionString, String destinationName, MESSAGE_TYPE messageType, Serializable event, Map propertiesMap) throws JMSException {
+ _publishEvent(connectionString, null, null, destinationName, messageType, event, propertiesMap);
+ }
+
+ @SneakyThrows
+ protected synchronized void _publishEvent(String connectionString, String username, String password, String destinationName, MESSAGE_TYPE messageType, Serializable event, Map propertiesMap) throws JMSException {
// open or reuse connection
checkProperties();
boolean _closeConn = false;
if (session==null) {
- openConnection(connectionString);
+ if (StringUtils.isBlank(username))
+ openConnection(connectionString);
+ else
+ openConnection(connectionString, username, password);
_closeConn = ! properties.isPreserveConnection();
}
@@ -387,11 +434,11 @@ public class BrokerClient {
public synchronized void openConnection(String connectionString, String username, String password, boolean preserveConnection) throws JMSException {
checkProperties();
if (connectionString == null) connectionString = properties.getBrokerUrl();
- log.debug("BrokerClient: Credetials provided as arguments: username={}, password={}", username, password);
+ log.debug("BrokerClient: Credentials provided as arguments: username={}, password={}", username, passwordUtil.encodePassword(password));
if (StringUtils.isBlank(username)) {
username = properties.getBrokerUsername();
password = properties.getBrokerPassword();
- log.debug("BrokerClient: Credetials read from properties: username={}, password={}", username, password);
+ log.debug("BrokerClient: Credentials read from properties: username={}, password={}", username, passwordUtil.encodePassword(password));
}
// Create connection factory
@@ -401,15 +448,15 @@ public class BrokerClient {
connectionFactory.setUserName(username);
connectionFactory.setPassword(password);
}
- log.debug("BrokerClient: Connection credentials: username={}, password={}", username, password);
+ log.debug("BrokerClient: Connection credentials: username={}, password={}", username, passwordUtil.encodePassword(password));
// Create a Connection
- log.info("BrokerClient: Connecting to broker: {}...", connectionString);
+ log.debug("BrokerClient: Connecting to broker: {}...", connectionString);
Connection connection = connectionFactory.createConnection();
connection.start();
// Create a Session
- log.info("BrokerClient: Opening session...");
+ log.debug("BrokerClient: Opening session...");
Session session = connection.createSession(false, Session.AUTO_ACKNOWLEDGE);
this.connection = connection;
diff --git a/event-management/broker-client/src/main/java/eu/melodic/event/brokerclient/BrokerClientApp.java b/event-management/broker-client/src/main/java/eu/melodic/event/brokerclient/BrokerClientApp.java
index b33d1d0b14fc09db12b659e1c195aa29b7ac8dbc..6b7ff87f141b5247d00f0db6297aab7b7a5761f6 100644
--- a/event-management/broker-client/src/main/java/eu/melodic/event/brokerclient/BrokerClientApp.java
+++ b/event-management/broker-client/src/main/java/eu/melodic/event/brokerclient/BrokerClientApp.java
@@ -168,12 +168,12 @@ public class BrokerClientApp {
BrokerClient client = BrokerClient.newClient();
client.openConnection(url, username, password, true);
- EventGenerator generator = new EventGenerator();
- generator.setClient(client);
+ EventGenerator generator = new EventGenerator(client);
+ //generator.setClient(client);
generator.setBrokerUrl(url);
generator.setDestinationName(topic);
generator.setInterval(interval);
- generator.setHowmany(howmany);
+ generator.setHowMany(howmany);
generator.setLowerValue(lowerValue);
generator.setUpperValue(upperValue);
generator.setLevel(level);
diff --git a/event-management/broker-client/src/main/java/eu/melodic/event/brokerclient/event/EventGenerator.java b/event-management/broker-client/src/main/java/eu/melodic/event/brokerclient/event/EventGenerator.java
index 5dcdd0bc92c416f92f80bca4c270b73a10d65418..3151a450128a47ce75e71124d4d4363903de0935 100644
--- a/event-management/broker-client/src/main/java/eu/melodic/event/brokerclient/event/EventGenerator.java
+++ b/event-management/broker-client/src/main/java/eu/melodic/event/brokerclient/event/EventGenerator.java
@@ -12,21 +12,37 @@ package eu.melodic.event.brokerclient.event;
import eu.melodic.event.brokerclient.BrokerClient;
import lombok.Data;
import lombok.extern.slf4j.Slf4j;
+import org.springframework.beans.factory.config.ConfigurableBeanFactory;
+import org.springframework.context.annotation.Scope;
+import org.springframework.stereotype.Component;
+
+import javax.annotation.PostConstruct;
+import java.util.concurrent.atomic.AtomicLong;
-@Data
@Slf4j
+@Data
+@Component
+@Scope(value = ConfigurableBeanFactory.SCOPE_PROTOTYPE)
public class EventGenerator implements Runnable {
- private BrokerClient client;
+ private final static AtomicLong counter = new AtomicLong();
+ private final BrokerClient client;
private String brokerUrl;
+ private String brokerUsername;
+ private String brokerPassword;
private String destinationName;
private long interval;
- private long howmany = -1;
+ private long howMany = -1;
private double lowerValue;
private double upperValue;
private int level;
private transient boolean keepRunning;
+ @PostConstruct
+ public void printCounter() {
+ log.info("New EventGenerator with instance number: {}", counter.getAndIncrement());
+ }
+
public void start() {
if (keepRunning) return;
Thread runner = new Thread(this);
@@ -49,12 +65,12 @@ public class EventGenerator implements Runnable {
double newValue = Math.random() * valueRangeWidth + lowerValue;
EventMap event = new EventMap(newValue, level, System.currentTimeMillis());
log.info("EventGenerator.run(): Sending event #{}: {}", countSent + 1, event);
- client.publishEvent(brokerUrl, destinationName, event);
+ client.publishEventWithCredentials(brokerUrl, brokerUsername, brokerPassword, destinationName, event);
countSent++;
- if (countSent == howmany) keepRunning = false;
+ if (countSent == howMany) keepRunning = false;
log.info("EventGenerator.run(): Event sent #{}: {}", countSent, event);
} catch (Exception ex) {
- log.warn("EventGenerator.run(): WHILE-EXCEPTION: {}", ex);
+ log.warn("EventGenerator.run(): WHILE-EXCEPTION: ", ex);
}
// sleep for 'interval' ms
try {
diff --git a/event-management/common/pom.xml b/event-management/common/pom.xml
new file mode 100644
index 0000000000000000000000000000000000000000..48034aecb4ed9ebb6dff2ab9e4f1952931144555
--- /dev/null
+++ b/event-management/common/pom.xml
@@ -0,0 +1,44 @@
+
+
+
+ event-management
+ eu.melodic.event
+ 4.5.0-SNAPSHOT
+
+ 4.0.0
+
+ common
+ Upperware - EMS - Common to EMS server and clients
+
+
+
+
+ eu.melodic.event
+ broker-cep
+ ${project.version}
+
+
+
+
+ org.springframework
+ spring-web
+
+
+
+
+ org.projectlombok
+ lombok
+ provided
+
+
+
+
\ No newline at end of file
diff --git a/event-management/common/src/main/java/eu/melodic/event/common/collector/CollectorContext.java b/event-management/common/src/main/java/eu/melodic/event/common/collector/CollectorContext.java
new file mode 100644
index 0000000000000000000000000000000000000000..a2498ddc751f6bad694ab7b96a082c2dcebbdda8
--- /dev/null
+++ b/event-management/common/src/main/java/eu/melodic/event/common/collector/CollectorContext.java
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2017-2022 Institute of Communication and Computer Systems (imu.iccs.gr)
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public License, v2.0, unless
+ * Esper library is used, in which case it is subject to the terms of General Public License v2.0.
+ * If a copy of the MPL was not distributed with this file, you can obtain one at
+ * https://www.mozilla.org/en-US/MPL/2.0/
+ */
+
+package eu.melodic.event.common.collector;
+
+import eu.melodic.event.brokercep.event.EventMap;
+import eu.melodic.event.util.ClientConfiguration;
+
+import java.io.Serializable;
+import java.util.List;
+import java.util.Set;
+
+public interface CollectorContext {
+ List getNodeConfigurations();
+ Set getNodesWithoutClient();
+ boolean isAggregator();
+ boolean sendEvent(String connectionString, String destinationName, EventMap event, boolean createDestination);
+}
diff --git a/event-management/common/src/main/java/eu/melodic/event/common/collector/netdata/NetdataCollector.java b/event-management/common/src/main/java/eu/melodic/event/common/collector/netdata/NetdataCollector.java
new file mode 100644
index 0000000000000000000000000000000000000000..e14238c95eccae1957a91146a54664eda2326f30
--- /dev/null
+++ b/event-management/common/src/main/java/eu/melodic/event/common/collector/netdata/NetdataCollector.java
@@ -0,0 +1,302 @@
+/*
+ * Copyright (C) 2017-2022 Institute of Communication and Computer Systems (imu.iccs.gr)
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public License, v2.0, unless
+ * Esper library is used, in which case it is subject to the terms of General Public License v2.0.
+ * If a copy of the MPL was not distributed with this file, you can obtain one at
+ * https://www.mozilla.org/en-US/MPL/2.0/
+ */
+
+package eu.melodic.event.common.collector.netdata;
+
+import eu.melodic.event.brokercep.event.EventMap;
+import eu.melodic.event.common.collector.CollectorContext;
+import eu.melodic.event.util.EmsConstant;
+import eu.melodic.event.util.EventBus;
+import lombok.NonNull;
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import org.apache.commons.lang3.StringUtils;
+import org.springframework.beans.factory.InitializingBean;
+import org.springframework.boot.web.client.RestTemplateBuilder;
+import org.springframework.http.HttpStatus;
+import org.springframework.http.ResponseEntity;
+import org.springframework.scheduling.TaskScheduler;
+import org.springframework.web.client.RestTemplate;
+
+import java.io.Serializable;
+import java.time.Duration;
+import java.time.Instant;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.concurrent.ScheduledFuture;
+import java.util.stream.Collectors;
+
+/**
+ * Collects measurements from Netdata http server
+ */
+@Slf4j
+@RequiredArgsConstructor
+public class NetdataCollector implements InitializingBean, Runnable {
+ public final static String NETDATA_COLLECTION_START = "NETDATA_COLLECTION_START";
+ public final static String NETDATA_COLLECTION_END = "NETDATA_COLLECTION_END";
+ public final static String NETDATA_CONN_OK = "NETDATA_CONN_OK";
+ public final static String NETDATA_CONN_ERROR = "NETDATA_CONN_ERROR";
+ public final static String NETDATA_NODE_PAUSED = "NETDATA_NODE_PAUSED";
+ public final static String NETDATA_NODE_RESUMED = "NETDATA_NODE_RESUMED";
+
+ protected final NetdataCollectorProperties properties;
+ protected final CollectorContext collectorContext;
+ protected final TaskScheduler taskScheduler;
+ protected final EventBus eventBus;
+
+ protected RestTemplate restTemplate = new RestTemplate();
+ protected boolean started;
+ protected ScheduledFuture> runner;
+ protected List allowedTopics;
+ protected Map topicMap;
+
+ protected Map errorsMap = new HashMap<>();
+ protected Map> ignoredNodes = new HashMap<>();
+
+ protected enum COLLECTION_RESULT { IGNORED, OK, ERROR }
+
+ @Override
+ public void afterPropertiesSet() {
+ log.debug("Collectors::Netdata: properties: {}", properties);
+ this.allowedTopics = properties.getAllowedTopics()==null
+ ? null
+ : properties.getAllowedTopics().stream()
+ .map(s -> s.split(":")[0])
+ .collect(Collectors.toList());
+ this.topicMap = properties.getAllowedTopics()==null
+ ? null
+ : properties.getAllowedTopics().stream()
+ .map(s -> s.split(":", 2))
+ .collect(Collectors.toMap(a -> a[0], a -> a.length>1 ? a[1]: ""));
+
+ this.restTemplate = new RestTemplateBuilder()
+ .setConnectTimeout(Duration.ofSeconds(5))
+ .setReadTimeout(Duration.ofSeconds(5))
+ .build();
+ }
+
+ public synchronized void start() {
+ // check if already running
+ if (started) {
+ log.warn("Collectors::Netdata: Already started");
+ return;
+ }
+
+ // check parameters
+ if (properties==null || !properties.isEnable()) {
+ log.warn("Collectors::Netdata: Collector not enabled");
+ return;
+ }
+ if (properties.getDelay()<0) properties.setDelay(0);
+ if (StringUtils.isBlank(properties.getUrl())) {
+ String url = "http://127.0.0.1:19999/api/v1/allmetrics?format=json";
+ log.debug("Collectors::Netdata: URL not specified. Assuming {}", url);
+ properties.setUrl(url);
+ }
+
+ log.info("Collectors::Netdata: configuration: {}", properties);
+
+ // Schedule collection execution
+ errorsMap.clear();
+ ignoredNodes.clear();
+ runner = taskScheduler.scheduleWithFixedDelay(this, properties.getDelay());
+ started = true;
+
+ log.info("Collectors::Netdata: Started");
+ }
+
+ public synchronized void stop() {
+ if (!started) {
+ log.warn("Collectors::Netdata: Not started");
+ return;
+ }
+
+ // Cancel collection execution
+ started = false;
+ runner.cancel(true);
+ runner = null;
+ ignoredNodes.values().forEach(task -> task.cancel(true));
+ log.info("Collectors::Netdata: Stopped");
+ }
+
+ public void run() {
+ if (!started) return;
+
+ log.trace("Collectors::Netdata: run(): BEGIN");
+ if (log.isTraceEnabled()) {
+ log.trace("Collectors::Netdata: run(): errors-map={}", errorsMap);
+ log.trace("Collectors::Netdata: run(): ignored-nodes={}", ignoredNodes.keySet());
+ }
+
+ // collect data from local node
+ if (! properties.isSkipLocal()) {
+ log.info("Collectors::Netdata: Collecting metrics from local node...");
+ collectAndPublishData("");
+ } else {
+ log.debug("Collectors::Netdata: Collection from local node is disabled");
+ }
+
+ // if Aggregator, collect data from nodes without client
+ log.trace("Collectors::Netdata: Nodes without clients in Zone: {}", collectorContext.getNodesWithoutClient());
+ log.trace("Collectors::Netdata: Is Aggregator: {}", collectorContext.isAggregator());
+ if (collectorContext.isAggregator()) {
+ if (collectorContext.getNodesWithoutClient().size()>0) {
+ log.info("Collectors::Netdata: Collecting metrics from remote nodes (without EMS client): {}",
+ collectorContext.getNodesWithoutClient());
+ for (Serializable nodeAddress : collectorContext.getNodesWithoutClient()) {
+ // collect data from remote node
+ collectAndPublishData(nodeAddress.toString());
+ }
+ } else
+ log.debug("Collectors::Netdata: No remote nodes (without EMS client)");
+ }
+
+ log.trace("Collectors::Netdata: run(): END");
+ }
+
+ private COLLECTION_RESULT collectAndPublishData(@NonNull String nodeAddress) {
+ if (ignoredNodes.containsKey(nodeAddress)) {
+ log.info("Collectors::Netdata: Node is in ignore list: {}", nodeAddress);
+ return COLLECTION_RESULT.IGNORED;
+ }
+ try {
+ sendEvent(NETDATA_COLLECTION_START, nodeAddress);
+ _collectAndPublishData(nodeAddress);
+ sendEvent(NETDATA_COLLECTION_END, nodeAddress);
+
+ //if (Optional.ofNullable(errorsMap.put(nodeAddress, 0)).orElse(0)>0) sendEvent(NETDATA_CONN_OK, nodeAddress);
+ sendEvent(NETDATA_CONN_OK, nodeAddress);
+ return COLLECTION_RESULT.OK;
+ } catch (Throwable t) {
+ int errors = errorsMap.compute(nodeAddress, (k, v) -> Optional.ofNullable(v).orElse(0) + 1);
+ int errorLimit = properties.getErrorLimit();
+ int pausePeriod = properties.getPausePeriod();
+ log.warn("Collectors::Netdata: Exception while collecting metrics from node: {}, #errors={}, exception: {}",
+ nodeAddress, errors, getExceptionMessages(t));
+ log.debug("Collectors::Netdata: Exception while collecting metrics from node: {}, #errors={}\n", nodeAddress, errors, t);
+
+ sendEvent(NETDATA_CONN_ERROR, nodeAddress, "errors="+errors);
+
+ if (errorLimit>0 && pausePeriod>0) {
+ if (errors >= errorLimit) {
+ log.warn("Collectors::Netdata: Too many consecutive errors occurred while attempting to collect metrics from node: {}, num-of-errors={}", nodeAddress, errors);
+ log.warn("Collectors::Netdata: Will pause metrics collection from node for {} seconds: {}", pausePeriod, nodeAddress);
+ ignoredNodes.put(nodeAddress, taskScheduler.schedule(() -> {
+ errorsMap.put(nodeAddress, 0);
+ ignoredNodes.remove(nodeAddress);
+ log.info("Collectors::Netdata: Resumed metrics collection from node: {}", nodeAddress);
+ sendEvent(NETDATA_NODE_RESUMED, nodeAddress);
+ }, Instant.now().plusSeconds(pausePeriod)));
+
+ sendEvent(NETDATA_NODE_PAUSED, nodeAddress);
+ }
+ } else
+ log.debug("Collectors::Netdata: Metrics collection pausing is disabled");
+ return COLLECTION_RESULT.ERROR;
+ }
+ }
+
+ private String getExceptionMessages(Throwable t) {
+ StringBuilder sb = new StringBuilder();
+ while (t!=null) {
+ sb.append(" -> ").append(t.getClass().getName()).append(": ").append(t.getMessage());
+ t = t.getCause();
+ }
+ return sb.substring(4);
+ }
+
+ private void sendEvent(String topic, String nodeAddress, String...extra) {
+ Map message = new HashMap<>();
+ message.put("address", nodeAddress);
+ for (String e : extra) {
+ String[] s = e.split("[:=]", 2);
+ if (s.length==2 && StringUtils.isNotBlank(s[0]))
+ message.put(s[0].trim(), s[1]);
+ }
+ eventBus.send(topic, message, getClass().getName());
+ }
+
+ private void _collectAndPublishData(String nodeAddress) {
+ String url;
+ if (StringUtils.isBlank(nodeAddress)) {
+ // Local node data collection URL
+ url = properties.getUrl();
+ if (StringUtils.isBlank(url))
+ url = String.format(properties.getUrlOfNodesWithoutClient(), "127.0.0.1");
+ } else {
+ // Remote node data collection URL
+ url = String.format(properties.getUrlOfNodesWithoutClient(), nodeAddress);
+ }
+ log.info("Collectors::Netdata: Collecting data from url: {}", url);
+
+ log.debug("Collectors::Netdata: Collecting data: {}...", url);
+ long startTm = System.currentTimeMillis();
+ ResponseEntity response = restTemplate.getForEntity(url, HashMap.class);
+ long callEndTm = System.currentTimeMillis();
+ log.trace("Collectors::Netdata: ...response: {}", response);
+ if (response.getStatusCode()==HttpStatus.OK) {
+ Map dataMap = response.getBody();
+ boolean createTopic = properties.isCreateTopic();
+ int countSuccess = 0;
+ int countErrors = 0;
+ log.trace("Collectors::Netdata: ...keys: {}", dataMap.keySet());
+ for (Object key : dataMap.keySet()) {
+ log.trace("Collectors::Netdata: ...Loop-1: key={}", key);
+ if (key==null) continue;
+ Map keyData = (Map)dataMap.get(key);
+ log.trace("Collectors::Netdata: ...Loop-1: key-data={}", keyData);
+ long timestamp = Long.parseLong( keyData.get("last_updated").toString() );
+ Map dimensionsMap = (Map)keyData.get("dimensions");
+
+ log.trace("Collectors::Netdata: ...Loop-1: ...dimensions-keys: {}", dimensionsMap.keySet());
+ for (Object dimKey : dimensionsMap.keySet()) {
+ log.trace("Collectors::Netdata: ...Loop-1: ...dimensions-key: {}", dimKey);
+ if (dimKey==null) continue;
+ String metricName = ("netdata."+ key + "."+ dimKey).replace(".", "__");
+ log.trace("Collectors::Netdata: ...Loop-1: ...metric-name: {}", metricName);
+ Map dimData = (Map)dimensionsMap.get(dimKey);
+ Object valObj = dimData.get("value");
+ log.trace("Collectors::Netdata: ...Loop-1: ...metric-value: {}", valObj);
+ if (valObj!=null) {
+ double metricValue = Double.parseDouble(valObj.toString());
+ log.trace("Collectors::Netdata: {} = {}", metricName, metricValue);
+ try {
+ boolean createDestination = (createTopic || allowedTopics!=null && allowedTopics.contains(metricName));
+ if (topicMap!=null) {
+ String targetTopic = topicMap.get(metricName);
+ if (targetTopic!=null && !targetTopic.isEmpty())
+ metricName = targetTopic;
+ }
+ EventMap event = new EventMap(metricValue, 1, timestamp);
+ event.setEventProperty(EmsConstant.EVENT_PROPERTY_SOURCE_ADDRESS, nodeAddress);
+ log.debug("Collectors::Netdata: {}: {}", metricName, metricValue);
+ if (collectorContext.sendEvent(null, metricName, event, createDestination))
+ countSuccess++;
+ } catch (Exception e) {
+ log.warn("Collectors::Netdata: Publishing netdata metric failed: ", e);
+ countErrors++;
+ }
+ }
+ }
+
+ if (Thread.currentThread().isInterrupted()) break;
+ }
+ long endTm = System.currentTimeMillis();
+ log.debug("Collectors::Netdata: Collecting data...ok");
+ log.info("Collectors::Netdata: Metrics: extracted={}, published={}, failed={}",
+ countSuccess+countErrors, countSuccess, countErrors);
+ log.debug("Collectors::Netdata: Durations: rest-call={}, extract+publish={}, total={}",
+ callEndTm-startTm, endTm-callEndTm, endTm-startTm);
+ } else {
+ log.warn("Collectors::Netdata: Collecting data...failed: Http Status: {}", response.getStatusCode());
+ }
+ }
+}
diff --git a/event-management/baguette-client/src/main/java/eu/melodic/event/baguette/client/collector/netdata/NetdataCollectorProperties.java b/event-management/common/src/main/java/eu/melodic/event/common/collector/netdata/NetdataCollectorProperties.java
similarity index 72%
rename from event-management/baguette-client/src/main/java/eu/melodic/event/baguette/client/collector/netdata/NetdataCollectorProperties.java
rename to event-management/common/src/main/java/eu/melodic/event/common/collector/netdata/NetdataCollectorProperties.java
index 294d1e232beaa9c4ec71219caaa2433c3923ce01..b73c6cf5356bb74f9ae409da63df4aad014bce05 100644
--- a/event-management/baguette-client/src/main/java/eu/melodic/event/baguette/client/collector/netdata/NetdataCollectorProperties.java
+++ b/event-management/common/src/main/java/eu/melodic/event/common/collector/netdata/NetdataCollectorProperties.java
@@ -7,13 +7,12 @@
* https://www.mozilla.org/en-US/MPL/2.0/
*/
-package eu.melodic.event.baguette.client.collector.netdata;
+package eu.melodic.event.common.collector.netdata;
import lombok.Data;
import lombok.extern.slf4j.Slf4j;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.context.annotation.Configuration;
-import org.springframework.context.annotation.PropertySource;
import java.util.List;
@@ -21,11 +20,15 @@ import java.util.List;
@Data
@Configuration
@ConfigurationProperties(prefix = "collector.netdata")
-@PropertySource("file:${MELODIC_CONFIG_DIR}/baguette-client.properties")
public class NetdataCollectorProperties {
private boolean enable;
private long delay;
private String url;
+ private String urlOfNodesWithoutClient;
+ private boolean skipLocal = false;
private boolean createTopic;
private List allowedTopics;
+
+ private int errorLimit; // num of consecutive errors. Zero or negative value disables collection pausing
+ private int pausePeriod; // in seconds. Zero or negative value disables collection pausing
}
diff --git a/event-management/config-files/baguette-client-install/linux/baguette-skip.json b/event-management/config-files/baguette-client-install/linux/baguette-skip.json
new file mode 100644
index 0000000000000000000000000000000000000000..2941719e1ed9d89cc149042e221c56187d0fbb0c
--- /dev/null
+++ b/event-management/config-files/baguette-client-install/linux/baguette-skip.json
@@ -0,0 +1,23 @@
+{
+ "os": "LINUX",
+ "description": "EMS client SKIP installation instruction set",
+ "condition": "${SKIP_BAGUETTE_INSTALLATION:-false} || '${OS_ARCHITECTURE:-x}'.startsWith('arm') || ${CPU_PROCESSORS:-0} <= ${BAGUETTE_INSTALLATION_MIN_PROCESSORS:-0} || ${RAM_AVAILABLE_KB:-0} <= ${BAGUETTE_INSTALLATION_MIN_RAM:-0} || ${DISK_FREE_KB:-0} <= ${BAGUETTE_INSTALLATION_MIN_DISK_FREE:-0}",
+ "instructions": [
+ {
+ "description": "DEBUG: Print node pre-registration VARIABLES",
+ "taskType": "PRINT_VARS"
+ },
+ {
+ "description": "Set __EMS_CLIENT_INSTALL__ variable",
+ "taskType": "SET_VARS",
+ "variables": {
+ "__EMS_CLIENT_INSTALL__": "SKIPPED"
+ }
+ },
+ {
+ "description": "Log SKIP installation",
+ "taskType": "LOG",
+ "message": "EMS client installation SKIPPED at Node"
+ }
+ ]
+}
\ No newline at end of file
diff --git a/event-management/config-files/baguette-client-install/linux.json b/event-management/config-files/baguette-client-install/linux/baguette.json
similarity index 83%
rename from event-management/config-files/baguette-client-install/linux.json
rename to event-management/config-files/baguette-client-install/linux/baguette.json
index 9c309f36eac06b4e5b53e35cdceb28b3880ec244..32d882395c7074f4d62416dd71ec59328cdf41e2 100644
--- a/event-management/config-files/baguette-client-install/linux.json
+++ b/event-management/config-files/baguette-client-install/linux/baguette.json
@@ -1,7 +1,12 @@
{
"os": "LINUX",
"description": "EMS client installation instruction set at VM node",
+ "condition": "! ${SKIP_BAGUETTE_INSTALLATION:-false} && ! '${OS_ARCHITECTURE:-x}'.startsWith('arm') && ${CPU_PROCESSORS:-0} > ${BAGUETTE_INSTALLATION_MIN_PROCESSORS:-0} && ${RAM_AVAILABLE_KB:-0} > ${BAGUETTE_INSTALLATION_MIN_RAM:-0} && ${DISK_FREE_KB:-0} > ${BAGUETTE_INSTALLATION_MIN_DISK_FREE:-0}",
"instructions": [
+ {
+ "description": "DEBUG: Print node pre-registration VARIABLES",
+ "taskType": "PRINT_VARS"
+ },
{
"description": "Check if 'java' is installed at Node",
"taskType": "CHECK",
@@ -37,7 +42,7 @@
"description": "Upload EMS client installation package",
"taskType": "COPY",
"fileName": "/tmp/baguette-client.tgz",
- "localFileName": "${EMS_PUBLIC_DIR}/baguette-client.tgz",
+ "localFileName": "${EMS_PUBLIC_DIR}/resources/baguette-client.tgz",
"executable": false,
"exitCode": 0,
"match": false
@@ -46,7 +51,7 @@
"description": "Upload installation package MD5 checksum",
"taskType": "COPY",
"fileName": "/tmp/baguette-client.tgz.md5",
- "localFileName": "${EMS_PUBLIC_DIR}/baguette-client.tgz.md5",
+ "localFileName": "${EMS_PUBLIC_DIR}/resources/baguette-client.tgz.md5",
"executable": false,
"exitCode": 0,
"match": false
@@ -128,11 +133,18 @@
{
"description": "-- LIST baguette-client FILES --",
"taskType": "CMD",
- "command": "ls -lR /opt/baguette-client ",
+ "command": "ls -l /opt/baguette-client ",
"executable": false,
"exitCode": 0,
"match": false
},
+ {
+ "description": "Set __EMS_CLIENT_INSTALL__ variable",
+ "taskType": "SET_VARS",
+ "variables": {
+ "__EMS_CLIENT_INSTALL__": "INSTALLED"
+ }
+ },
{
"description": "Log installation end",
"taskType": "LOG",
diff --git a/event-management/config-files/baguette-client-install/linux/check-ignore.json b/event-management/config-files/baguette-client-install/linux/check-ignore.json
new file mode 100644
index 0000000000000000000000000000000000000000..419f416db3717861fb128a42b7dbc40cc4f3cec8
--- /dev/null
+++ b/event-management/config-files/baguette-client-install/linux/check-ignore.json
@@ -0,0 +1,31 @@
+{
+ "os": "LINUX",
+ "description": "Check if node must be ignored",
+ "condition": "! ${SKIP_IGNORE_CHECK:-false}",
+ "instructions": [
+ {
+ "description": "Checking for .EMS_IGNORE_NODE file...",
+ "taskType": "LOG",
+ "message": "Checking for .EMS_IGNORE_NODE file..."
+ },
+ {
+ "description": "Checking for .EMS_IGNORE_NODE file",
+ "taskType": "CHECK",
+ "command": "test -e /tmp/.EMS_IGNORE_NODE",
+ "executable": false,
+ "exitCode": 0,
+ "match": false
+ },
+ {
+ "description": "Set __EMS_IGNORE_NODE__ variable",
+ "taskType": "SET_VARS",
+ "variables": {
+ "__EMS_IGNORE_NODE__": "IGNORED"
+ }
+ },
+ {
+ "description": "Stop further processing",
+ "taskType": "EXIT"
+ }
+ ]
+}
\ No newline at end of file
diff --git a/event-management/config-files/baguette-client-install/linux/detect.json b/event-management/config-files/baguette-client-install/linux/detect.json
new file mode 100644
index 0000000000000000000000000000000000000000..fb28239aabf6094a68e21ac840fd4a6540f9af86
--- /dev/null
+++ b/event-management/config-files/baguette-client-install/linux/detect.json
@@ -0,0 +1,69 @@
+{
+ "os": "LINUX",
+ "description": "Detect node features (OS, architecture, cores, RAM, disk etc)",
+ "condition": "! ${SKIP_DETECTION:-false}",
+ "instructions": [
+ {
+ "description": "Detecting target node type...",
+ "taskType": "LOG",
+ "message": "Detecting target node type..."
+ },
+ {
+ "description": "Copying detection script to node...",
+ "taskType": "COPY",
+ "fileName": "/tmp/detect.sh",
+ "localFileName": "bin/detect.sh",
+ "executable": false,
+ "exitCode": 0,
+ "match": false
+ },
+ {
+ "description": "Make detection script executable",
+ "taskType": "CMD",
+ "command": "chmod +x /tmp/detect.sh ",
+ "executable": false,
+ "exitCode": 0,
+ "match": false
+ },
+ {
+ "description": "Run detection script",
+ "taskType": "CMD",
+ /*"command": "if [ ! -e /tmp/detect.txt ]; then /tmp/detect.sh &> /tmp/detect.txt; fi",*/
+ "command": "/tmp/detect.sh &> /tmp/detect.txt",
+ "executable": false,
+ "exitCode": 0,
+ "match": false
+ },
+ {
+ "description": "Copying detection results back to EMS server...",
+ "taskType": "DOWNLOAD",
+ "fileName": "/tmp/detect.txt",
+ "localFileName": "logs/detect.${NODE_ADDRESS}--${TIMESTAMP-FILE}.txt",
+ "executable": false,
+ "exitCode": 0,
+ "match": false,
+ "patterns": {
+ "CPU_SOCKETS": { "pattern": "(^\\s*CPU_SOCKETS\\s*[=:]\\s*(.*)\\s*)", "flags": 0 },
+ "CPU_CORES": { "pattern": "(^\\s*CPU_CORES\\s*[=:]\\s*(.*)\\s*)", "flags": 0 },
+ "CPU_PROCESSORS": { "pattern": "(^\\s*CPU_PROCESSORS\\s*[=:]\\s*(.*)\\s*)", "flags": 0 },
+ "RAM_TOTAL_KB": { "pattern": "(^\\s*RAM_TOTAL_KB\\s*[=:]\\s*(.*)\\s*)", "flags": 0 },
+ "RAM_AVAILABLE_KB": { "pattern": "(^\\s*RAM_AVAILABLE_KB\\s*[=:]\\s*(.*)\\s*)", "flags": 0 },
+ "RAM_FREE_KB": { "pattern": "(^\\s*RAM_FREE_KB\\s*[=:]\\s*(.*)\\s*)", "flags": 0 },
+ "RAM_USED_KB": { "pattern": "(^\\s*RAM_USED_KB\\s*[=:]\\s*(.*)\\s*)", "flags": 0 },
+ "RAM_UTILIZATION": { "pattern": "(^\\s*RAM_UTILIZATION\\s*[=:]\\s*(.*)\\s*)", "flags": 0 },
+ "DISK_TOTAL_KB": { "pattern": "(^\\s*DISK_TOTAL_KB\\s*[=:]\\s*(.*)\\s*)", "flags": 0 },
+ "DISK_FREE_KB": { "pattern": "(^\\s*DISK_FREE_KB\\s*[=:]\\s*(.*)\\s*)", "flags": 0 },
+ "DISK_USED_KB": { "pattern": "(^\\s*DISK_USED_KB\\s*[=:]\\s*(.*)\\s*)", "flags": 0 },
+ "DISK_UTILIZATION": { "pattern": "(^\\s*DISK_UTILIZATION\\s*[=:]\\s*(.*)\\s*)", "flags": 0 },
+ "OS_ARCHITECTURE": { "pattern": "(^\\s*OS_ARCHITECTURE\\s*[=:]\\s*(.*)\\s*)", "flags": 0 },
+ "OS_KERNEL": { "pattern": "(^\\s*OS_KERNEL\\s*[=:]\\s*(.*)\\s*)", "flags": 0 },
+ "OS_KERNEL_RELEASE": { "pattern": "(^\\s*OS_KERNEL_RELEASE\\s*[=:]\\s*(.*)\\s*)", "flags": 0 }
+ }
+ },
+ {
+ "description": "Detection results...",
+ "taskType": "LOG",
+ "message": "Detection results:\n CPU_SOCKETS=${CPU_SOCKETS:-na}\n CPU_CORES=${CPU_CORES:-na}\n CPU_PROCESSORS=${CPU_PROCESSORS:-na}\n RAM_TOTAL_KB=${RAM_TOTAL_KB:-na}\n RAM_AVAILABLE_KB=${RAM_AVAILABLE_KB:-na}\n RAM_FREE_KB=${RAM_FREE_KB:-na}\n RAM_USED_KB=${RAM_USED_KB:-na}\n RAM_UTILIZATION=${RAM_UTILIZATION:-na}\n DISK_TOTAL_KB=${DISK_TOTAL_KB:-na}\n DISK_FREE_KB=${DISK_FREE_KB:-na}\n DISK_USED_KB=${DISK_USED_KB:-na}\n DISK_UTILIZATION=${DISK_UTILIZATION:-na}\n OS_ARCHITECTURE=${OS_ARCHITECTURE:-na}\n OS_KERNEL=${OS_KERNEL:-na}\n OS_KERNEL_RELEASE=${OS_KERNEL_RELEASE:-na}"
+ }
+ ]
+}
\ No newline at end of file
diff --git a/event-management/config-files/baguette-client-install/jre8.json b/event-management/config-files/baguette-client-install/linux/jre8.json
similarity index 87%
rename from event-management/config-files/baguette-client-install/jre8.json
rename to event-management/config-files/baguette-client-install/linux/jre8.json
index 4b963016cde4d8d3b741a5fae63b1c9ba3168aa3..3a909408864f57e7c5a72b0182959673637da21e 100644
--- a/event-management/config-files/baguette-client-install/jre8.json
+++ b/event-management/config-files/baguette-client-install/linux/jre8.json
@@ -1,6 +1,7 @@
{
"os": "LINUX",
"description": "JRE 8u282 installation instruction set at VM node",
+ "condition": "! ${SKIP_JRE_INSTALLATION:-false} && ! '${OS_ARCHITECTURE:-x}'.startsWith('arm') && ${CPU_PROCESSORS:-0} > ${BAGUETTE_INSTALLATION_MIN_PROCESSORS:-0} && ${RAM_AVAILABLE_KB:-0} > ${BAGUETTE_INSTALLATION_MIN_RAM:-0} && ${DISK_FREE_KB:-0} > ${BAGUETTE_INSTALLATION_MIN_DISK_FREE:-0}",
"instructions": [
{
"description": "Check if JRE 8u282 is already installed at Node",
diff --git a/event-management/config-files/baguette-client-install/linux/netdata.json b/event-management/config-files/baguette-client-install/linux/netdata.json
new file mode 100644
index 0000000000000000000000000000000000000000..715fd1419cd36cc2031bc995079cb55dfe2e3774
--- /dev/null
+++ b/event-management/config-files/baguette-client-install/linux/netdata.json
@@ -0,0 +1,34 @@
+{
+ "os": "",
+ "description": "Netdata installation instruction set at VM node",
+ "condition": "! ${SKIP_NETDATA_INSTALLATION:-false}",
+ "instructions": [
+ {
+ "description": "Log Netdata installation start",
+ "taskType": "LOG",
+ "message": "Starting Netdata installation at Node"
+ },
+ {
+ "description": "Check if Netdata is already installed at Node",
+ "taskType": "CHECK",
+ /*"command": "[[ -f /usr/sbin/netdata ]] && exit 99",*/
+ "command": "[ $(ps -e -o pid,comm,cgroup |grep netdata |grep -v docker |grep -v lxc |wc -l) -gt 0 ] && exit 99",
+ "executable": false,
+ "exitCode": 99,
+ "match": true,
+ "message": "Netdata is already installed at Node"
+ },
+ {
+ "description": "Download Netdata kickstart.sh",
+ "taskType": "CMD",
+ "command": "curl https://my-netdata.io/kickstart-static64.sh > /tmp/netdata-kickstart.sh",
+ "executionTimeout": 600000
+ },
+ {
+ "description": "Run Netdata kickstart.sh",
+ "taskType": "CMD",
+ "command": "echo ${NODE_SSH_PASSWORD} | sudo -S sh /tmp/netdata-kickstart.sh --dont-wait --no-updates --disable-telemetry ",
+ "executionTimeout": 600000
+ }
+ ]
+}
\ No newline at end of file
diff --git a/event-management/config-files/baguette-client-install/linux/recover-baguette.json b/event-management/config-files/baguette-client-install/linux/recover-baguette.json
new file mode 100644
index 0000000000000000000000000000000000000000..4d6c16df40f1800c4d12ab45ae38463e4e4ec4c7
--- /dev/null
+++ b/event-management/config-files/baguette-client-install/linux/recover-baguette.json
@@ -0,0 +1,24 @@
+{
+ "os": "LINUX",
+ "description": "Restarting Baguette agent at VM node",
+ "instructions": [
+ {
+ "description": "Killing previous EMS client process",
+ "taskType": "CMD",
+ "command": "/opt/baguette-client/bin/kill.sh",
+ "executable": false,
+ "exitCode": 0,
+ "match": false,
+ "retries": 5
+ },
+ {
+ "description": "Starting new EMS client process",
+ "taskType": "CMD",
+ "command": "/opt/baguette-client/bin/run.sh",
+ "executable": false,
+ "exitCode": 0,
+ "match": false,
+ "retries": 5
+ }
+ ]
+}
\ No newline at end of file
diff --git a/event-management/config-files/baguette-client-install/linux-start.json b/event-management/config-files/baguette-client-install/linux/start-agents.json
similarity index 72%
rename from event-management/config-files/baguette-client-install/linux-start.json
rename to event-management/config-files/baguette-client-install/linux/start-agents.json
index 472ba3beabafcdb16f18faa92931d65768790d80..7f152894575e969099177c6700615dcd3ccaa6e8 100644
--- a/event-management/config-files/baguette-client-install/linux-start.json
+++ b/event-management/config-files/baguette-client-install/linux/start-agents.json
@@ -1,6 +1,7 @@
{
"os": "LINUX",
"description": "Starting Netdata and Baguette agents at VM node",
+ "condition": "! ${SKIP_START:-false}",
"instructions": [
{
"description": "Launch EMS client",
@@ -14,7 +15,8 @@
{
"description": "Check if Netdata is already running",
"taskType": "CHECK",
- "command": "[[ $(( `ps -ef |grep /usr/sbin/netdata |grep -v grep |wc -l`+1 )) -gt 1 ]] && exit 1 || exit 0",
+ /*"command": "[[ $(( `ps -ef |grep /usr/sbin/netdata |grep -v grep |wc -l`+1 )) -gt 1 ]] && exit 1 || exit 0",*/
+ "command": "[[ $(ps -e -o pid,comm,cgroup |grep netdata |grep -v grep |grep -v docker |grep -v lxc |wc -l) -gt 0 ]] && exit 1 || exit 0",
"executable": false,
"exitCode": 1,
"match": true,
diff --git a/event-management/config-files/baguette-client-install/netdata.json b/event-management/config-files/baguette-client-install/netdata.json
deleted file mode 100644
index ef3de8269f544a40b4f5305d55a7cacf36225e57..0000000000000000000000000000000000000000
--- a/event-management/config-files/baguette-client-install/netdata.json
+++ /dev/null
@@ -1,26 +0,0 @@
-{
- "os": "",
- "description": "Netdata installation instruction set at VM node",
- "instructions": [
- {
- "description": "Log Netdata installation start",
- "taskType": "LOG",
- "message": "Starting Netdata installation at Node"
- },
- {
- "description": "Check if Netdata is already installed at Node",
- "taskType": "CHECK",
- "command": "[[ -f /usr/sbin/netdata ]] && exit 99",
- "executable": false,
- "exitCode": 99,
- "match": true,
- "message": "Netdata is already installed at Node"
- },
- {
- "description": "Download and run Netdata kickstart.sh",
- "taskType": "CMD",
- "command": "bash <(curl -Ss https://my-netdata.io/kickstart.sh) --dont-wait --no-updates --disable-telemetry",
- "executionTimeout": 600000
- }
- ]
-}
\ No newline at end of file
diff --git a/event-management/config-files/baguette-client-install/win.json b/event-management/config-files/baguette-client-install/win/win.json
similarity index 100%
rename from event-management/config-files/baguette-client-install/win.json
rename to event-management/config-files/baguette-client-install/win/win.json
diff --git a/event-management/config-files/baguette-client/conf/baguette-client.properties b/event-management/config-files/baguette-client/conf/baguette-client.properties
index 887c148e1c0583ed0b3164d573d0254dc2906a9c..dded9204152a395d3efaf9e9a1472708d0afd321 100644
--- a/event-management/config-files/baguette-client/conf/baguette-client.properties
+++ b/event-management/config-files/baguette-client/conf/baguette-client.properties
@@ -32,6 +32,17 @@ server-fingerprint = ${BAGUETTE_SERVER_PUBKEY_FINGERPRINT}
server-username = ${BAGUETTE_SERVER_USERNAME}
server-password = ${BAGUETTE_SERVER_PASSWORD}
+# -----------------------------------------------------------------------------
+# Client-side Self-healing settings
+# -----------------------------------------------------------------------------
+
+#self.healing.enabled=true
+#self.healing.recovery.file.baguette=conf/baguette.json
+#self.healing.recovery.file.netdata=conf/netdata.json
+#self.healing.recovery.delay=10000
+#self.healing.recovery.retry.wait=60000
+#self.healing.recovery.max.retries=3
+
# -----------------------------------------------------------------------------
# Collectors settings
# -----------------------------------------------------------------------------
@@ -41,8 +52,11 @@ server-password = ${BAGUETTE_SERVER_PASSWORD}
collector.netdata.enable = true
collector.netdata.delay = 10000
collector.netdata.url = http://127.0.0.1:19999/api/v1/allmetrics?format=json
+collector.netdata.urlOfNodesWithoutClient = http://%s:19999/api/v1/allmetrics?format=json
#collector.netdata.create-topic = true
#collector.netdata.allowed-topics = netdata__system__cpu__user:an_alias
+collector.netdata.error-limit = 3
+collector.netdata.pause-period = 60
# -----------------------------------------------------------------------------
# Cluster settings
diff --git a/event-management/config-files/baguette-client/conf/baguette.json b/event-management/config-files/baguette-client/conf/baguette.json
new file mode 100644
index 0000000000000000000000000000000000000000..cdd4ab4aa6c12b51c21128fba64dfb86e5db0dff
--- /dev/null
+++ b/event-management/config-files/baguette-client/conf/baguette.json
@@ -0,0 +1,16 @@
+[{
+ "name": "Initial wait...",
+ "command": "pwd",
+ "waitBefore": 0,
+ "waitAfter": 5000
+}, {
+ "name": "Sending baguette client kill command...",
+ "command": "/opt/baguette-client/bin/kill.sh",
+ "waitBefore": 0,
+ "waitAfter": 2000
+}, {
+ "name": "Sending baguette client start command...",
+ "command": "/opt/baguette-client/bin/run.sh",
+ "waitBefore": 0,
+ "waitAfter": 10000
+}]
diff --git a/event-management/config-files/baguette-client/conf/eu.melodic.event.brokercep.properties b/event-management/config-files/baguette-client/conf/eu.melodic.event.brokercep.properties
index 6297e2d2fa070c49245c342869e0811a0cb5ddd4..9480923825b872d51bc672738584b622605f0a91 100644
--- a/event-management/config-files/baguette-client/conf/eu.melodic.event.brokercep.properties
+++ b/event-management/config-files/baguette-client/conf/eu.melodic.event.brokercep.properties
@@ -7,7 +7,9 @@
# https://www.mozilla.org/en-US/MPL/2.0/
#
-password-encoder-class = eu.melodic.event.util.password.IdentityPasswordEncoder
+#password-encoder-class = eu.melodic.event.util.password.AsterisksPasswordEncoder
+#password-encoder-class = eu.melodic.event.util.password.IdentityPasswordEncoder
+#password-encoder-class = eu.melodic.event.util.password.PresentPasswordEncoder
# Broker ports and protocol
brokercep.broker-name = broker
diff --git a/event-management/config-files/baguette-client/conf/logback-spring.xml b/event-management/config-files/baguette-client/conf/logback-spring.xml
index b2dd3dcb43be7f00a6af39eac83c533e2505246e..9ee8513aefc183641ec806daa02c8124ced5a0fa 100644
--- a/event-management/config-files/baguette-client/conf/logback-spring.xml
+++ b/event-management/config-files/baguette-client/conf/logback-spring.xml
@@ -32,7 +32,7 @@
-
+