Commit 0c62933e authored by Jean-Didier Totow's avatar Jean-Didier Totow
Browse files

performance model, persistent storage

parent 766ef22d
import service_pb2, service_pb2_grpc, os, requests, json
from initial_ML_module import Predictor, MLModelManager, Trainer
from morphemic.dataset import DatasetMaker
import subprocess
from google.protobuf.json_format import MessageToJson
url_output_api = os.environ.get("URL_OUTPUT_API","http://localhost:8767/api/v1/make")
dataset_local_repository = os.environ.get("DATASET_LOCAL_REPOSITORY","./datasets/")
influxdb_hostname = os.environ.get("INFLUXDB_HOSTNAME","localhost")
influxdb_port = int(os.environ.get("INFLUXDB_PORT","8086"))
influxdb_username = os.environ.get("INFLUXDB_USERNAME","morphemic")
influxdb_password = os.environ.get("INFLUXDB_PASSWORD","password")
influxdb_dbname = os.environ.get("INFLUXDB_DBNAME","morphemic")
influxdb_org = os.environ.get("INFLUXDB_ORG","morphemic")
train_folder = os.environ.get("TRAIN_DATA_FOLDER","./train")
class PredictRPC(service_pb2_grpc.PredictServicer):
def PredictPerformance(self, request, context):
_json = json.loads(MessageToJson(request.features))
features = dict(_json["fields"])
#features = {'memory': {'floatValue': 4500.23}, 'level': {'floatValue': 1.0}, 'cpu_usage': {'floatValue': 31.0}, 'latency': {'floatValue': 2.1}}
final_features = {}
for k, _value in features.items():
final_features[k] = _value['floatValue']
predictor = Predictor(request.application, final_features, request.target, request.variant)
response = json.loads(predictor.predict())
#dict_result = service_pb2.Dictionary()
#dict_result.fields['results'].string_value = json.dumps(response['results'])
#dict_ml = service_pb2.Dictionary()
#dict_ml.fields['ml'].string_value = json.dumps(response['ml'])
reply = service_pb2.PredictReply()
reply.status = response['status']
reply.results.fields['results'].string_value = json.dumps(response['results'])
reply.ml.fields['ml'].string_value = json.dumps(response['ml'])
reply.message = response['message']
return reply
def getModel(self, request, context):
model_manager = MLModelManager()
response = model_manager.getModelTrainingData(request.application, request.target)
reply = service_pb2.ModelReply()
if response != None:
reply.results.fields['results'].string_value = json.dumps(response)
else:
reply.results.fields['results'].string_value = '\{"data":"No model found"\}'
return reply
def trainModel(self, request, context):
if request.url_file == "":
try:
configs = {'hostname': influxdb_hostname,
'port': influxdb_port,
'username': influxdb_username,
'password': influxdb_password,
'dbname': influxdb_dbname,
'path_dataset': dataset_local_repository
}
datasetmaker = DatasetMaker(request.application,None,configs)
response = datasetmaker.make()
#response = requests.post(url=url_output_api,data='{"application":"'+request.application+'", "start":"10m"}', headers={'Content-type':'application/json'}).json()
request.url_file = response['url']
#features = response['features']
except Exception as e:
print(e)
reply = service_pb2.TrainReply()
reply.status = False
reply.message = str(e)
reply.application = request.application
return reply
try:
_json = json.loads(MessageToJson(request.features))
features = list(_json["strings"])
data = {"url_file": request.url_file, "target": request.target, "application": request.application, "features": features, "variant": request.variant}
_file = open(train_folder + '/train.data', 'w')
_file.write(json.dumps(data))
_file.close()
command = ['python','-u','train.py']
p = subprocess.Popen(command, start_new_session=True)
reply = service_pb2.TrainReply()
reply.status = True
reply.message = "Training started"
reply.application = request.application
return reply
except Exception as e:
reply = service_pb2.TrainReply()
reply.status = False
reply.message = str(e)
reply.application = request.application
return reply
\ No newline at end of file
syntax = "proto2";
package proto;
service Predict {
rpc PredictPerformance (PredictRequest) returns (PredictReply) {}
rpc getModel (ModelRequest) returns (ModelReply) {}
rpc trainModel (TrainRequest) returns (TrainReply) {}
}
message Any {
oneof kind {
int64 int_value = 1;
float float_value = 2;
string string_value = 3;
ListOfStrings list_value = 4;
}
}
message Pair {
required string key = 1;
required Any value = 2;
}
message Dictionary {
//repeated Pair pairs = 1;
map<string,Any> fields = 1;
}
message PredictRequest {
required string application = 1;
required Dictionary features = 2;
required string target = 3;
required string variant = 4;
}
message PredictReply {
required bool status = 1;
required Dictionary results = 2;
required Dictionary ml = 3;
required string message = 4;
}
message ModelRequest {
required string application = 1;
required string target = 2;
}
message ModelReply {
required Dictionary results = 1;
}
message ListValue {
repeated ListOfStrings listOfStrings=1;
}
message ListOfStrings {
repeated string strings=1;
}
message TrainRequest {
required string application = 1;
required string url_file = 2;
required ListOfStrings features = 3;
required string target = 4;
required string variant = 5;
}
message TrainReply {
required bool status = 1;
required string message = 2;
required string application = 3;
}
\ No newline at end of file
setuptools
numpy>=1.9.0
scipy>=0.14.1
joblib>=0.14.1
scikit-learn>=0.24.0,<0.25.0
dask
distributed>=2.2.0
pyyaml
pandas>=1.0
liac-arff
ConfigSpace>=0.4.14,<0.5
pynisher>=0.6.3
pyrfr>=0.7,<0.9
smac>=0.13.1,<0.14
flask
pandas
emcee
pyDOE
scikit-optimize
sklearn
auto-sklearn
requests
grpcio
protobuf
stomp.py
\ No newline at end of file
import os, time, json, requests, stomp, train
from threading import Thread
from flask import Flask, Response, request
from initial_ML_module import Trainer, Predictor, MLModelManager
from concurrent import futures
import grpc, service_pb2_grpc, sqlite3
from predictgrpc import PredictRPC
from morphemic.dataset import DatasetMaker
import subprocess
dataset_local_repository = os.environ.get("DATASET_LOCAL_REPOSITORY","./datasets/")
train_folder = os.environ.get("TRAIN_DATA_FOLDER","./train")
local_database_path = os.environ.get("LOCAL_DATABASE_PATH","./db/")
influxdb_hostname = os.environ.get("INFLUXDB_HOSTNAME","localhost")
influxdb_port = int(os.environ.get("INFLUXDB_PORT","8086"))
influxdb_username = os.environ.get("INFLUXDB_USERNAME","morphemic")
influxdb_password = os.environ.get("INFLUXDB_PASSWORD","password")
influxdb_dbname = os.environ.get("INFLUXDB_DBNAME","morphemic")
influxdb_org = os.environ.get("INFLUXDB_ORG","morphemic")
#
subscription_topic = 'performance_model_evaluator'
activemq_username = os.getenv("ACTIVEMQ_USER","aaa")
activemq_password = os.getenv("ACTIVEMQ_PASSWORD","111")
activemq_hostname = os.getenv("ACTIVEMQ_HOST","localhost")
activemq_port = int(os.getenv("ACTIVEMQ_PORT","61613"))
db_evaluation_period = 60*2
db_last_evaluation = time.time()
class ServerRPC(Thread):
def __init__(self):
self.port = 8767
self.max_workers = 10
super(ServerRPC, self).__init__()
def run(self):
server = grpc.server(futures.ThreadPoolExecutor(max_workers=self.max_workers))
service_pb2_grpc.add_PredictServicer_to_server(PredictRPC(), server)
server.add_insecure_port('[::]:{0}'.format(self.port))
server.start()
print("RPC server started on {0}".format(self.port))
#server.wait_for_termination()
while True:
time.sleep(5)
if __name__ != "__main__":
rpc_server = ServerRPC()
rpc_server.start()
def DBEvaluationRoutine():
print("DB Evaluator routine started")
global db_last_evaluation
while True:
if time.time() - db_last_evaluation >= db_evaluation_period:
try:
conn = sqlite3.connect(local_database_path + "prediction.db")
cursor = conn.cursor()
data_to_send = []
for row in cursor.execute("SELECT * FROM Prediction"):
_json = {'application': row[1], 'target': row[2], 'features': json.loads(row[4]), 'prediction': row[3], 'variant': row[4]}
data_to_send.append(_json)
conn.close()
if data_to_send != []:
conn = stomp.Connection(host_and_ports = [(activemq_hostname, activemq_port)])
conn.connect(login=activemq_username,passcode=activemq_password)
time.sleep(2)
conn.send(body=json.dumps(data_to_send), destination=subscription_topic, persistent='false')
conn.disconnect()
print("Messages pushed to activemq")
print("Removing message to Local DB")
conn = sqlite3.connect(local_database_path + "prediction.db")
cursor = conn.cursor()
cursor.execute("DELETE FROM Prediction")
conn.commit()
conn.close()
else:
print("Nothing found")
except Exception as e:
print("An error occured")
print(e)
db_last_evaluation = time.time()
time.sleep(5)
thread = Thread(target=DBEvaluationRoutine, args=())
thread.start()
url_output_api = os.environ.get("URL_OUTPUT_API","http://localhost:8767/api/v1/make")
app = Flask(__name__)
@app.route('/',methods=['GET'])
def home():
response = {"Message": "Welcome to the Performance Model API", "current_version": "1.0","service_path":"/api/v1"}
return Response(json.dumps(response), status=200, mimetype="application/json")
@app.route('/api/v1/model/train',methods=['POST'])
def train_model():
url_file, application, features, target, variant = None, None, None, None , None
try:
url_file = request.json['url_file']
application = request.json['application']
features = request.json['features']
target = request.json['target']
variant = request.json['variant']
except:
return Response("Bad request", status=400, mimetype="application/json")
if application == None or application == "" or target == None or target == "" or variant == "":
return Response("application and target parameters must be specified", status=400, mimetype="application/json")
if url_file == None or url_file == "":
#print(os.system("whoami"))
#dataset will be first created
# curl -X POST -d '{"application":"application-1","start":"10m"}' -H 'Content-type:application/json' http://localhost:8767/api/v1/make
try:
configs = {'hostname': influxdb_hostname,
'port': influxdb_port,
'username': influxdb_username,
'password': influxdb_password,
'dbname': influxdb_dbname,
'path_dataset': dataset_local_repository
}
datasetmaker = DatasetMaker(application,None,configs)
response = datasetmaker.make()
#response = requests.post(url=url_output_api,data='{"application":"'+application+'", "start":"10m"}', headers={'Content-type':'application/json'}).json()
url_file = response['url']
#features = response['features']
except Exception as e:
print(e)
return Response('Could not create dataset', status=400, mimetype='application/json')
if features == None or features == "":
return Response("Missing feature parameters", status=400, mimetype="application/json")
try:
data = {"url_file": url_file, "target": target, "application": application, "features": features, "variant": variant}
_file = open(train_folder + '/train.data', 'w')
_file.write(json.dumps(data))
_file.close()
#trainer = Trainer(url_file,target,application,features)
#response = {"data": trainer.train_separated_thread()}
command = ['python','-u','train.py']
#os.spawnlp(os.P_DETACH,*command)
p = subprocess.Popen(command, start_new_session=True)
response = {"status": True, "message": "Training started", "application": application }
return Response(json.dumps(response),status=200, mimetype="application/json")
except Exception as e:
print(e)
return Response("An error occured while training the model", status=500, mimetype="application/json")
@app.route('/api/v1/model', methods=['POST'])
def get_model():
application, target = None, None
try:
application = request.json['application']
target = request.json['target']
except:
return Response("Bad request", status=400, mimetype="application/json")
if application != None:
model_manager = MLModelManager()
response = {"data": model_manager.getModelTrainingData(application, target)}
return Response(json.dumps(response), status=200, mimetype="application/json")
return Response("Bad request", status=400, mimetype="application/json")
@app.route('/api/v1/model/predict',methods=['POST'])
def predict_model():
application, features, target, variant = None, None, None, None
try:
application = request.json['application']
features = request.json['features']
target = request.json['target']
variant = request.json['variant']
except:
return Response("Bad request", status=400, mimetype="application/json")
if application != None and features != None and type(features)==type({}) and target != None:
try:
predictor = Predictor(application, features, target, variant)
response = json.loads(predictor.predict())
return Response(json.dumps(response), status=200, mimetype="application/json")
except Exception as e:
print(e)
return Response("An internal error occured", status=500, mimetype="application/json")
else:
return Response("Data format incorrect", status=400, mimetype="application/json")
if __name__ == "__main__":
app.run(host='0.0.0.0',port=8766, debug=False)
# Generated by the protocol buffer compiler. DO NOT EDIT!
# source: service.proto
import sys
_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
from google.protobuf import descriptor as _descriptor
from google.protobuf import message as _message
from google.protobuf import reflection as _reflection
from google.protobuf import symbol_database as _symbol_database
# @@protoc_insertion_point(imports)
_sym_db = _symbol_database.Default()
DESCRIPTOR = _descriptor.FileDescriptor(
name='service.proto',
package='proto',
syntax='proto2',
serialized_options=None,
serialized_pb=_b('\n\rservice.proto\x12\x05proto\"}\n\x03\x41ny\x12\x13\n\tint_value\x18\x01 \x01(\x03H\x00\x12\x15\n\x0b\x66loat_value\x18\x02 \x01(\x02H\x00\x12\x16\n\x0cstring_value\x18\x03 \x01(\tH\x00\x12*\n\nlist_value\x18\x04 \x01(\x0b\x32\x14.proto.ListOfStringsH\x00\x42\x06\n\x04kind\".\n\x04Pair\x12\x0b\n\x03key\x18\x01 \x02(\t\x12\x19\n\x05value\x18\x02 \x02(\x0b\x32\n.proto.Any\"v\n\nDictionary\x12-\n\x06\x66ields\x18\x01 \x03(\x0b\x32\x1d.proto.Dictionary.FieldsEntry\x1a\x39\n\x0b\x46ieldsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x19\n\x05value\x18\x02 \x01(\x0b\x32\n.proto.Any:\x02\x38\x01\"Z\n\x0ePredictRequest\x12\x13\n\x0b\x61pplication\x18\x01 \x02(\t\x12#\n\x08\x66\x65\x61tures\x18\x02 \x02(\x0b\x32\x11.proto.Dictionary\x12\x0e\n\x06target\x18\x03 \x02(\t\"r\n\x0cPredictReply\x12\x0e\n\x06status\x18\x01 \x02(\x08\x12\"\n\x07results\x18\x02 \x02(\x0b\x32\x11.proto.Dictionary\x12\x1d\n\x02ml\x18\x03 \x02(\x0b\x32\x11.proto.Dictionary\x12\x0f\n\x07message\x18\x04 \x02(\t\"3\n\x0cModelRequest\x12\x13\n\x0b\x61pplication\x18\x01 \x02(\t\x12\x0e\n\x06target\x18\x02 \x02(\t\"0\n\nModelReply\x12\"\n\x07results\x18\x01 \x02(\x0b\x32\x11.proto.Dictionary\"8\n\tListValue\x12+\n\rlistOfStrings\x18\x01 \x03(\x0b\x32\x14.proto.ListOfStrings\" \n\rListOfStrings\x12\x0f\n\x07strings\x18\x01 \x03(\t\"m\n\x0cTrainRequest\x12\x13\n\x0b\x61pplication\x18\x01 \x02(\t\x12\x10\n\x08url_file\x18\x02 \x02(\t\x12&\n\x08\x66\x65\x61tures\x18\x03 \x02(\x0b\x32\x14.proto.ListOfStrings\x12\x0e\n\x06target\x18\x04 \x02(\t\"B\n\nTrainReply\x12\x0e\n\x06status\x18\x01 \x02(\x08\x12\x0f\n\x07message\x18\x02 \x02(\t\x12\x13\n\x0b\x61pplication\x18\x03 \x02(\t2\xbb\x01\n\x07Predict\x12\x42\n\x12PredictPerformance\x12\x15.proto.PredictRequest\x1a\x13.proto.PredictReply\"\x00\x12\x34\n\x08getModel\x12\x13.proto.ModelRequest\x1a\x11.proto.ModelReply\"\x00\x12\x36\n\ntrainModel\x12\x13.proto.TrainRequest\x1a\x11.proto.TrainReply\"\x00')
)
_ANY = _descriptor.Descriptor(
name='Any',
full_name='proto.Any',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='int_value', full_name='proto.Any.int_value', index=0,
number=1, type=3, cpp_type=2, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='float_value', full_name='proto.Any.float_value', index=1,
number=2, type=2, cpp_type=6, label=1,
has_default_value=False, default_value=float(0),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='string_value', full_name='proto.Any.string_value', index=2,
number=3, type=9, cpp_type=9, label=1,
has_default_value=False, default_value=_b("").decode('utf-8'),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='list_value', full_name='proto.Any.list_value', index=3,
number=4, type=11, cpp_type=10, label=1,
has_default_value=False, default_value=None,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
],
extensions=[
],
nested_types=[],
enum_types=[
],
serialized_options=None,
is_extendable=False,
syntax='proto2',
extension_ranges=[],
oneofs=[
_descriptor.OneofDescriptor(
name='kind', full_name='proto.Any.kind',
index=0, containing_type=None, fields=[]),
],
serialized_start=24,
serialized_end=149,
)
_PAIR = _descriptor.Descriptor(
name='Pair',
full_name='proto.Pair',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='key', full_name='proto.Pair.key', index=0,
number=1, type=9, cpp_type=9, label=2,
has_default_value=False, default_value=_b("").decode('utf-8'),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='value', full_name='proto.Pair.value', index=1,
number=2, type=11, cpp_type=10, label=2,
has_default_value=False, default_value=None,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
],
extensions=[
],
nested_types=[],
enum_types=[
],
serialized_options=None,
is_extendable=False,
syntax='proto2',
extension_ranges=[],
oneofs=[
],
serialized_start=151,
serialized_end=197,
)
_DICTIONARY_FIELDSENTRY = _descriptor.Descriptor(
name='FieldsEntry',
full_name='proto.Dictionary.FieldsEntry',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='key', full_name='proto.Dictionary.FieldsEntry.key', index=0,
number=1, type=9, cpp_type=9, label=1,
has_default_value=False, default_value=_b("").decode('utf-8'),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='value', full_name='proto.Dictionary.FieldsEntry.value', index=1,
number=2, type=11, cpp_type=10, label=1,
has_default_value=False, default_value=None,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
],
extensions=[
],
nested_types=[],
enum_types=[
],
serialized_options=_b('8\001'),
is_extendable=False,
syntax='proto2',
extension_ranges=[],
oneofs=[
],
serialized_start=260,
serialized_end=317,
)
_DICTIONARY = _descriptor.Descriptor(
name='Dictionary',
full_name='proto.Dictionary',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='fields', full_name='proto.Dictionary.fields', index=0,
number=1, type=11, cpp_type=10, label=3,
has_default_value=False, default_value=[],
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
],
extensions=[
],
nested_types=[_DICTIONARY_FIELDSENTRY, ],
enum_types=[
],