From af6c75a5ac44070f44bc447390e28c8caeee3cf9 Mon Sep 17 00:00:00 2001 From: Andrey Shabarshov Date: Fri, 28 Jul 2023 17:11:58 +0100 Subject: [PATCH] DEEP-13, DEEP-14 Training dialog implemented. Dashboard page UI and functionality added --- DeepTrace/Controls/ModelCard.razor | 169 +++++++++++++++++++++ DeepTrace/Controls/TrainingDialog.razor | 13 +- DeepTrace/Data/ModelDefinition.cs | 36 +++-- DeepTrace/Data/Prediction.cs | 10 +- DeepTrace/Data/TrainedModelDefinition.cs | 1 + DeepTrace/ML/IMLProcessor.cs | 4 +- DeepTrace/ML/MLEvaluationMetrics.cs | 16 ++ DeepTrace/ML/MLHelpers.cs | 9 +- DeepTrace/ML/MLProcessor.cs | 47 +++++- DeepTrace/Pages/Index.razor | 29 +++- DeepTrace/Pages/Training.razor | 1 + DeepTrace/Services/IModelStorageService.cs | 1 + DeepTrace/Services/ModelStorageService.cs | 9 ++ 13 files changed, 315 insertions(+), 30 deletions(-) create mode 100644 DeepTrace/Controls/ModelCard.razor create mode 100644 DeepTrace/ML/MLEvaluationMetrics.cs diff --git a/DeepTrace/Controls/ModelCard.razor b/DeepTrace/Controls/ModelCard.razor new file mode 100644 index 0000000..ec75833 --- /dev/null +++ b/DeepTrace/Controls/ModelCard.razor @@ -0,0 +1,169 @@ +@using DeepTrace.Data; +@using DeepTrace.ML; +@using DeepTrace.Services; +@using PrometheusAPI; + +@inject PrometheusClient Prometheus +@inject IDialogService DialogService +@inject IModelStorageService ModelService +@inject ITrainedModelStorageService TrainedModelService +@inject ILogger MLProcessorLogger +@inject ILogger Logger + + + + + @Model?.Name + + + @Model?.IsEnabled + + + + Current state: @_prediction.PredictedLabel + + + +@code{ + [Parameter] + public TrainedModelDefinition? Model { get; set; } + + private ModelDefinition _modelDefinition = new(); + private Prediction _prediction = new(); + + protected override async Task OnAfterRenderAsync(bool firstRender) + { + if (!firstRender || Model?.Id == null) + { + return; + } + _modelDefinition = (await ModelService.Load(Model.Id)) ?? _modelDefinition; + + #pragma warning disable CS4014 + Task.Run(PredictionLoop); + #pragma warning restore CS4014 + } + + private bool IsEnabled + { + get => Model?.IsEnabled ?? false; + set + { + if (Model==null || Model.IsEnabled == value) + { + return; + } + Model.IsEnabled = value; + InvokeAsync(SaveIsEnabled); + } + } + + private async Task SaveIsEnabled() + { + if(Model == null) + { + return; + } + + var trainedModel = new TrainedModelDefinition + { + Id = Model.Id, + IsEnabled = Model.IsEnabled, + Name = Model.Name, + Value = Model.Value + }; + await TrainedModelService.Store(trainedModel); + } + + private async Task PredictionLoop() + { + var startDate = DateTime.UtcNow; + while (true) + { + try + { + await Task.Delay(TimeSpan.FromSeconds(5)); + var endDate = DateTime.UtcNow; + await PredictAnomaly(startDate, endDate); + startDate = endDate; + } + catch(Exception) + { + //ignore + } + } + } + + private async Task PredictAnomaly(DateTime startDate, DateTime endDate) + { + + // use automatic step value to always request 500 elements + var seconds = (endDate - startDate).TotalSeconds / 500.0; + if (seconds < 1.0) + seconds = 1.0; + var step = TimeSpan.FromSeconds(seconds); + + var tasks = _modelDefinition!.DataSource.Queries + .Select(x => Prometheus.RangeQuery(x.Query, startDate, endDate, step, TimeSpan.FromSeconds(2))) + .ToArray(); + + try + { + await Task.WhenAll(tasks); + } + catch (Exception e) + { + await ShowError(e.Message); + return; + } + + var data = new List(); + + foreach (var (res, def) in tasks.Select((x, i) => (x.Result, _modelDefinition.DataSource.Queries[i]))) + { + if (res.Status != StatusType.Success) + { + Logger.LogError(res.Error ?? "Error"); + return; + } + + if (res.ResultType != ResultTypeType.Matrix) + { + Logger.LogError($"Got {res.ResultType}, but Matrix expected for {def.Query}"); + return; + } + + var m = res.AsMatrix().Result; + if (m == null || m.Length != 1) + { + Logger.LogError($"No data returned for {def.Query}"); + return; + } + + data.Add( + new() + { + Name = def.Query, + Color = def.Color, + Data = m[0].Values!.ToList() + } + ); + } + + var mlProcessor = new MLProcessor(MLProcessorLogger); + _prediction = await mlProcessor.Predict(Model, _modelDefinition, data); + } + + private async Task ShowError(string text) + { + var options = new DialogOptions + { + CloseOnEscapeKey = true + }; + var parameters = new DialogParameters(); + parameters.Add("Text", text); + + var d = DialogService.Show("Error", parameters, options); + await d.Result; + } +} \ No newline at end of file diff --git a/DeepTrace/Controls/TrainingDialog.razor b/DeepTrace/Controls/TrainingDialog.razor index e967c91..3342658 100644 --- a/DeepTrace/Controls/TrainingDialog.razor +++ b/DeepTrace/Controls/TrainingDialog.razor @@ -12,8 +12,16 @@

@Text

- + + @if (_isTraining == false) + { + MicroAccuracy: @_evaluationMetrics!.MicroAccuracy.ToString("N2") + MacroAccuracy: @_evaluationMetrics!.MacroAccuracy.ToString("N2") + LogLoss: @_evaluationMetrics!.LogLoss.ToString("N2") + LogLossReduction: @_evaluationMetrics!.LogLossReduction.ToString("N2") + } +
@@ -28,6 +36,7 @@ private string _progressText = ""; private bool _isTraining = true; + private MLEvaluationMetrics? _evaluationMetrics; void Submit() => MudDialog?.Close(DialogResult.Ok(true)); @@ -38,7 +47,7 @@ return; } - await Processor.Train(Model, UpdateProgress); + _evaluationMetrics = await Processor.Train(Model, UpdateProgress); _isTraining = false; await InvokeAsync(StateHasChanged); } diff --git a/DeepTrace/Data/ModelDefinition.cs b/DeepTrace/Data/ModelDefinition.cs index 6bc3a0f..dbab651 100644 --- a/DeepTrace/Data/ModelDefinition.cs +++ b/DeepTrace/Data/ModelDefinition.cs @@ -44,23 +44,31 @@ public class ModelDefinition foreach (var currentInterval in IntervalDefinitionList) { - var data = ""; - for (var i = 0; i < currentInterval.Data.Count; i++) - { - - var queryData = currentInterval.Data[i]; - var min = queryData.Data.Min(x => x.Value); - var max = queryData.Data.Max(x => x.Value); - var avg = queryData.Data.Average(x => x.Value); - var mean = queryData.Data.Sum(x => x.Value) / queryData.Data.Count; - - data += min + "," + max + "," + avg + "," + mean + ","; - - } - data += currentInterval.Name; + var source = currentInterval.Data; + string data = ConvertToCsv(source); + data += "," + currentInterval.Name; writer.AppendLine(data); } return writer.ToString(); } + + public static string ConvertToCsv(List source) + { + var data = ""; + for (var i = 0; i < source.Count; i++) + { + + var queryData = source[i]; + var min = queryData.Data.Min(x => x.Value); + var max = queryData.Data.Max(x => x.Value); + var avg = queryData.Data.Average(x => x.Value); + var mean = queryData.Data.Sum(x => x.Value) / queryData.Data.Count; + + data += min + "," + max + "," + avg + "," + mean + ","; + + } + + return data+"\"ignoreMe\""; + } } diff --git a/DeepTrace/Data/Prediction.cs b/DeepTrace/Data/Prediction.cs index 644d766..46857cf 100644 --- a/DeepTrace/Data/Prediction.cs +++ b/DeepTrace/Data/Prediction.cs @@ -2,9 +2,11 @@ namespace DeepTrace.Data; -public class MyPrediction +public class Prediction { - //vector to hold alert,score,p-value values - [VectorType(3)] - public double[]? Prediction { get; set; } + [ColumnName(@"PredictedLabel")] + public string PredictedLabel { get; set; } + + [ColumnName(@"Score")] + public float[] Score { get; set; } } diff --git a/DeepTrace/Data/TrainedModelDefinition.cs b/DeepTrace/Data/TrainedModelDefinition.cs index 7ca81b0..9585f16 100644 --- a/DeepTrace/Data/TrainedModelDefinition.cs +++ b/DeepTrace/Data/TrainedModelDefinition.cs @@ -7,6 +7,7 @@ namespace DeepTrace.Data { [BsonId] public ObjectId? Id { get; set; } + public bool IsEnabled { get; set; } = false; public string Name { get; set; } = string.Empty; public byte[] Value { get; set; } = Array.Empty(); //base64 } diff --git a/DeepTrace/ML/IMLProcessor.cs b/DeepTrace/ML/IMLProcessor.cs index 37e7acb..6e78127 100644 --- a/DeepTrace/ML/IMLProcessor.cs +++ b/DeepTrace/ML/IMLProcessor.cs @@ -5,8 +5,8 @@ namespace DeepTrace.ML; public interface IMLProcessor { - Task Train(ModelDefinition modelDef, Action log); + Task Train(ModelDefinition modelDef, Action log); byte[] Export(); void Import(byte[] data); - string Predict(DataSourceDefinition dataSource); + Task Predict(TrainedModelDefinition trainedModel, ModelDefinition model, List data); } diff --git a/DeepTrace/ML/MLEvaluationMetrics.cs b/DeepTrace/ML/MLEvaluationMetrics.cs new file mode 100644 index 0000000..968baf8 --- /dev/null +++ b/DeepTrace/ML/MLEvaluationMetrics.cs @@ -0,0 +1,16 @@ +namespace DeepTrace.ML +{ + public class MLEvaluationMetrics + { + public MLEvaluationMetrics() + { + + } + + public double MicroAccuracy { get; set; } + public double MacroAccuracy { get; set; } + public double LogLoss { get; set; } + public double LogLossReduction { get; set; } + + } +} diff --git a/DeepTrace/ML/MLHelpers.cs b/DeepTrace/ML/MLHelpers.cs index d9a446e..bc6a15a 100644 --- a/DeepTrace/ML/MLHelpers.cs +++ b/DeepTrace/ML/MLHelpers.cs @@ -32,9 +32,14 @@ public static class MLHelpers await File.WriteAllTextAsync(fileName, csv); + return LoadFromCsv(mlContext, model, fileName); + } + + public static (IDataView View, string FileName) LoadFromCsv(MLContext mlContext, ModelDefinition model, string fileName) + { var columnNames = model.GetColumnNames(); - var columns = columnNames - .Select((x,i) => new TextLoader.Column(x, DataKind.String, i)) + var columns = columnNames + .Select((x, i) => new TextLoader.Column(x, DataKind.String, i)) .ToArray() ; diff --git a/DeepTrace/ML/MLProcessor.cs b/DeepTrace/ML/MLProcessor.cs index e779253..b514298 100644 --- a/DeepTrace/ML/MLProcessor.cs +++ b/DeepTrace/ML/MLProcessor.cs @@ -3,6 +3,7 @@ using Microsoft.ML; using Microsoft.ML.Data; using PrometheusAPI; using System.Data; +using static DeepTrace.MLModel1; namespace DeepTrace.ML { @@ -22,15 +23,19 @@ namespace DeepTrace.ML private string Name { get; set; } = "TestModel"; - public async Task Train(ModelDefinition modelDef, Action log) + public async Task Train(ModelDefinition modelDef, Action log) { var pipeline = _estimatorBuilder.BuildPipeline(_mlContext, modelDef); var (data, filename) = await MLHelpers.Convert(_mlContext, modelDef); + + DataOperationsCatalog.TrainTestData dataSplit = _mlContext.Data.TrainTestSplit(data, testFraction: 0.2); + _mlContext.Log += (_,e) => LogEvents(log, e); try { _schema = data.Schema; - _transformer = pipeline.Fit(data); + _transformer = pipeline.Fit(dataSplit.TrainSet); + return Evaluate(dataSplit.TestSet); } finally { @@ -49,6 +54,20 @@ namespace DeepTrace.ML } + private MLEvaluationMetrics Evaluate(IDataView testData) + { + var predictions = _transformer!.Transform(testData); + var metrics = _mlContext.MulticlassClassification.Evaluate(predictions, "Name"); + var evaluationMetrics = new MLEvaluationMetrics() + { + MicroAccuracy = metrics.MicroAccuracy, + MacroAccuracy = metrics.MacroAccuracy, + LogLoss = metrics.LogLoss, + LogLossReduction = metrics.LogLossReduction, + }; + return evaluationMetrics; + } + public byte[] Export() { if(_schema == null) @@ -89,12 +108,30 @@ namespace DeepTrace.ML mem.Read(bytes, 0, bytes.Length); (_mlContext, _schema, _transformer) = MLHelpers.ImportSingleModel(bytes); - } - public string Predict(DataSourceDefinition dataSourceDefinition) + public async Task Predict(TrainedModelDefinition trainedModel, ModelDefinition model, List data) { - throw new NotImplementedException(); + Import(trainedModel.Value); + var headers = string.Join(",", model.GetColumnNames().Select(x => $"\"{x}\"")); + var row = ModelDefinition.ConvertToCsv(data); + + var csv = headers+"\n"+row; + var fileName = Path.GetTempFileName(); + try + { + await File.WriteAllTextAsync(fileName, csv); + + var (dataView, _) = MLHelpers.LoadFromCsv(_mlContext, model, fileName); + + var predictionEngine = _mlContext.Model.CreatePredictionEngine(_transformer); + var prediction = predictionEngine.Predict(dataView); + return prediction; + } + finally + { + File.Delete(fileName); + } } } } diff --git a/DeepTrace/Pages/Index.razor b/DeepTrace/Pages/Index.razor index 6085c4a..318bc1f 100644 --- a/DeepTrace/Pages/Index.razor +++ b/DeepTrace/Pages/Index.razor @@ -1,4 +1,9 @@ @page "/" +@using DeepTrace.Data; +@using DeepTrace.Services; +@using DeepTrace.Controls; + +@inject ITrainedModelStorageService TrainedModelService Index @@ -6,4 +11,26 @@ Welcome to your new app. - +@if (_trainedModels != null) +{ + @foreach(TrainedModelDefinition model in _trainedModels) + { + + } +} else +{ + Nothing to display +} + + +@code{ + private List _trainedModels = new(); + + protected override async Task OnInitializedAsync() + { + base.OnInitialized(); + _trainedModels = await TrainedModelService.Load(); + + } + +} \ No newline at end of file diff --git a/DeepTrace/Pages/Training.razor b/DeepTrace/Pages/Training.razor index fffceec..f3377a7 100644 --- a/DeepTrace/Pages/Training.razor +++ b/DeepTrace/Pages/Training.razor @@ -553,6 +553,7 @@ var trainedModel = new TrainedModelDefinition { Id = _modelForm!.CurrentModel.Id, + IsEnabled = false, Name = _modelForm!.CurrentModel.Name, Value = bytes }; diff --git a/DeepTrace/Services/IModelStorageService.cs b/DeepTrace/Services/IModelStorageService.cs index bfa5d7a..ad40edc 100644 --- a/DeepTrace/Services/IModelStorageService.cs +++ b/DeepTrace/Services/IModelStorageService.cs @@ -10,6 +10,7 @@ namespace DeepTrace.Services { Task Delete(ModelDefinition source, bool ignoreNotStored = false); Task> Load(); + Task Load(BsonObjectId id); Task Store(ModelDefinition source); } } diff --git a/DeepTrace/Services/ModelStorageService.cs b/DeepTrace/Services/ModelStorageService.cs index f63dade..6e10239 100644 --- a/DeepTrace/Services/ModelStorageService.cs +++ b/DeepTrace/Services/ModelStorageService.cs @@ -25,6 +25,15 @@ namespace DeepTrace.Services var res = await (await collection.FindAsync("{}")).ToListAsync(); return res; } + + public async Task Load(BsonObjectId id) + { + var db = _client.GetDatabase(MongoDBDatabaseName); + var collection = db.GetCollection(MongoDBCollection); + var res = (await (await collection.FindAsync($"{{_id:ObjectId(\"{id}\")}}")).ToListAsync()).FirstOrDefault(); + return res; + } + public async Task Store(ModelDefinition source) { var db = _client.GetDatabase(MongoDBDatabaseName);