From fc98c3b50a5ec9b1db35303b30fa0d9cdaec3db3 Mon Sep 17 00:00:00 2001 From: Andrey Shabarshov Date: Sat, 22 Jul 2023 12:58:03 +0100 Subject: [PATCH] DEEP-20 Model training implemented --- .gitignore | 1 + DeepTrace/Controllers/DownloadController.cs | 38 +- DeepTrace/Data/MLModel1.consumption.cs | 189 ++++++++++ DeepTrace/Data/MLModel1.mbconfig | 388 +++++++++++++++++++- DeepTrace/Data/MLModel1.training.cs | 64 ++++ DeepTrace/Data/ModelDefinition.cs | 68 +++- DeepTrace/Data/Prediction.cs | 5 - DeepTrace/DeepTrace.csproj | 12 - DeepTrace/ML/EstimatorBuilder.cs | 1 + DeepTrace/ML/IMLProcessor.cs | 4 +- DeepTrace/ML/MLHelpers.cs | 23 +- DeepTrace/ML/MLProcessor.cs | 84 +++++ DeepTrace/ML/SpikeDetector.cs | 108 ------ DeepTrace/Pages/Training.razor | 19 +- DeepTrace/Program.cs | 2 +- DeepTrace/Services/IModelStorageService.cs | 14 +- DeepTrace/Services/ModelStorageService.cs | 17 +- 17 files changed, 837 insertions(+), 200 deletions(-) create mode 100644 DeepTrace/Data/MLModel1.consumption.cs create mode 100644 DeepTrace/Data/MLModel1.training.cs create mode 100644 DeepTrace/ML/MLProcessor.cs delete mode 100644 DeepTrace/ML/SpikeDetector.cs diff --git a/.gitignore b/.gitignore index a22d3c4..107977c 100644 --- a/.gitignore +++ b/.gitignore @@ -366,3 +366,4 @@ TrainData prometheus-2.45.0-rc.0.windows-amd64 mongodb-windows-x86_64-6.0.6 *.patch +/DeepTrace/Data/MLModel1.zip diff --git a/DeepTrace/Controllers/DownloadController.cs b/DeepTrace/Controllers/DownloadController.cs index 323f406..8613d49 100644 --- a/DeepTrace/Controllers/DownloadController.cs +++ b/DeepTrace/Controllers/DownloadController.cs @@ -8,9 +8,9 @@ namespace DeepTrace.Controllers [Route("api/[controller]")] public class DownloadController : Controller { - private readonly IModelStorageService _modelService; + private readonly IModelDefinitionService _modelService; - public DownloadController(IModelStorageService modelService) + public DownloadController(IModelDefinitionService modelService) { _modelService = modelService; } @@ -18,37 +18,11 @@ namespace DeepTrace.Controllers [HttpGet("mldata/{modelName}")] public async Task GetMLDataCsv([FromRoute] string modelName) { - var modelStorage = await _modelService.Load(); - var model = modelStorage.FirstOrDefault(x=>x.Name==modelName) ?? throw new ApplicationException($"Model {modelName} not found"); - var previousIntervals = model.IntervalDefinitionList; + var ModelDefinition = await _modelService.Load(); + var model = ModelDefinition.FirstOrDefault(x=>x.Name==modelName) ?? throw new ApplicationException($"Model {modelName} not found"); - var current = previousIntervals.First(); - var headers = string.Join(",", current.Data.Select((x, i) => $"Q{i + 1}min,Q{i + 1}max,Q{i + 1}avg,Q{i + 1}mean")); - headers += string.Join(",",",Name"); - - - var writer = new StringBuilder(); - writer.AppendLine(headers); - - foreach (var currentInterval in previousIntervals) - { - var data = ""; - for (var i = 0; i < currentInterval.Data.Count; i++) - { - - var queryData = currentInterval.Data[i]; - var min = queryData.Data.Min(x => x.Value); - var max = queryData.Data.Max(x => x.Value); - var avg = queryData.Data.Average(x => x.Value); - var mean = queryData.Data.Sum(x => x.Value) / queryData.Data.Count; - - data += min + "," + max + "," + avg + "," + mean + ","; - - } - data += currentInterval.Name; - writer.AppendLine(data); - } - return new(Encoding.UTF8.GetBytes(writer.ToString()),"text/csv") + var csv = model.ToCsv(); + return new(Encoding.UTF8.GetBytes(csv),"text/csv") { FileDownloadName = modelName+".csv" }; diff --git a/DeepTrace/Data/MLModel1.consumption.cs b/DeepTrace/Data/MLModel1.consumption.cs new file mode 100644 index 0000000..0a11343 --- /dev/null +++ b/DeepTrace/Data/MLModel1.consumption.cs @@ -0,0 +1,189 @@ +// This file was auto-generated by ML.NET Model Builder. +using Microsoft.ML; +using Microsoft.ML.Data; +using System; +using System.Linq; +using System.IO; +using System.Collections.Generic; +namespace DeepTrace +{ + public partial class MLModel1 + { + /// + /// model input class for MLModel1. + /// + #region model input class + public class ModelInput + { + [ColumnName(@"Q1min")] + public string Q1min { get; set; } + + [ColumnName(@"Q1max")] + public string Q1max { get; set; } + + [ColumnName(@"Q1avg")] + public string Q1avg { get; set; } + + [ColumnName(@"Q1mean")] + public string Q1mean { get; set; } + + [ColumnName(@"Q2min")] + public string Q2min { get; set; } + + [ColumnName(@"Q2max")] + public string Q2max { get; set; } + + [ColumnName(@"Q2avg")] + public string Q2avg { get; set; } + + [ColumnName(@"Q2mean")] + public string Q2mean { get; set; } + + [ColumnName(@"Q3min")] + public string Q3min { get; set; } + + [ColumnName(@"Q3max")] + public string Q3max { get; set; } + + [ColumnName(@"Q3avg")] + public string Q3avg { get; set; } + + [ColumnName(@"Q3mean")] + public string Q3mean { get; set; } + + [ColumnName(@"Q4min")] + public string Q4min { get; set; } + + [ColumnName(@"Q4max")] + public string Q4max { get; set; } + + [ColumnName(@"Q4avg")] + public string Q4avg { get; set; } + + [ColumnName(@"Q4mean")] + public string Q4mean { get; set; } + + [ColumnName(@"Q5min")] + public string Q5min { get; set; } + + [ColumnName(@"Q5max")] + public string Q5max { get; set; } + + [ColumnName(@"Q5avg")] + public string Q5avg { get; set; } + + [ColumnName(@"Q5mean")] + public string Q5mean { get; set; } + + [ColumnName(@"Name")] + public string Name { get; set; } + + } + + #endregion + + /// + /// model output class for MLModel1. + /// + #region model output class + public class ModelOutput + { + [ColumnName(@"Q1min")] + public string Q1min { get; set; } + + [ColumnName(@"Q1max")] + public float[] Q1max { get; set; } + + [ColumnName(@"Q1avg")] + public float[] Q1avg { get; set; } + + [ColumnName(@"Q1mean")] + public float[] Q1mean { get; set; } + + [ColumnName(@"Q2min")] + public float[] Q2min { get; set; } + + [ColumnName(@"Q2max")] + public float[] Q2max { get; set; } + + [ColumnName(@"Q2avg")] + public float[] Q2avg { get; set; } + + [ColumnName(@"Q2mean")] + public float[] Q2mean { get; set; } + + [ColumnName(@"Q3min")] + public float[] Q3min { get; set; } + + [ColumnName(@"Q3max")] + public float[] Q3max { get; set; } + + [ColumnName(@"Q3avg")] + public float[] Q3avg { get; set; } + + [ColumnName(@"Q3mean")] + public float[] Q3mean { get; set; } + + [ColumnName(@"Q4min")] + public string Q4min { get; set; } + + [ColumnName(@"Q4max")] + public float[] Q4max { get; set; } + + [ColumnName(@"Q4avg")] + public float[] Q4avg { get; set; } + + [ColumnName(@"Q4mean")] + public float[] Q4mean { get; set; } + + [ColumnName(@"Q5min")] + public float[] Q5min { get; set; } + + [ColumnName(@"Q5max")] + public float[] Q5max { get; set; } + + [ColumnName(@"Q5avg")] + public float[] Q5avg { get; set; } + + [ColumnName(@"Q5mean")] + public float[] Q5mean { get; set; } + + [ColumnName(@"Name")] + public uint Name { get; set; } + + [ColumnName(@"Features")] + public float[] Features { get; set; } + + [ColumnName(@"PredictedLabel")] + public string PredictedLabel { get; set; } + + [ColumnName(@"Score")] + public float[] Score { get; set; } + + } + + #endregion + + private static string MLNetModelPath = Path.GetFullPath("MLModel1.zip"); + + public static readonly Lazy> PredictEngine = new Lazy>(() => CreatePredictEngine(), true); + + /// + /// Use this method to predict on . + /// + /// model input. + /// + public static ModelOutput Predict(ModelInput input) + { + var predEngine = PredictEngine.Value; + return predEngine.Predict(input); + } + + private static PredictionEngine CreatePredictEngine() + { + var mlContext = new MLContext(); + ITransformer mlModel = mlContext.Model.Load(MLNetModelPath, out var _); + return mlContext.Model.CreatePredictionEngine(mlModel); + } + } +} diff --git a/DeepTrace/Data/MLModel1.mbconfig b/DeepTrace/Data/MLModel1.mbconfig index 94ebac1..e226ced 100644 --- a/DeepTrace/Data/MLModel1.mbconfig +++ b/DeepTrace/Data/MLModel1.mbconfig @@ -1,6 +1,390 @@ { - "TrainingTime": 0, - "Scenario": "Default", + "TrainingTime": 30, + "Scenario": "Classification", + "DataSource": { + "Type": "TabularFile", + "Version": 1, + "FilePath": "D:\\Downloads\\Model #4(1).csv", + "Delimiter": ",", + "DecimalMarker": ".", + "HasHeader": true, + "ColumnProperties": [ + { + "ColumnName": "Q1min", + "ColumnPurpose": "Ignore", + "ColumnDataFormat": "String", + "IsCategorical": false, + "Type": "Column", + "Version": 2 + }, + { + "ColumnName": "Q1max", + "ColumnPurpose": "Feature", + "ColumnDataFormat": "String", + "IsCategorical": false, + "Type": "Column", + "Version": 2 + }, + { + "ColumnName": "Q1avg", + "ColumnPurpose": "Feature", + "ColumnDataFormat": "String", + "IsCategorical": false, + "Type": "Column", + "Version": 2 + }, + { + "ColumnName": "Q1mean", + "ColumnPurpose": "Feature", + "ColumnDataFormat": "String", + "IsCategorical": false, + "Type": "Column", + "Version": 2 + }, + { + "ColumnName": "Q2min", + "ColumnPurpose": "Feature", + "ColumnDataFormat": "String", + "IsCategorical": false, + "Type": "Column", + "Version": 2 + }, + { + "ColumnName": "Q2max", + "ColumnPurpose": "Feature", + "ColumnDataFormat": "String", + "IsCategorical": false, + "Type": "Column", + "Version": 2 + }, + { + "ColumnName": "Q2avg", + "ColumnPurpose": "Feature", + "ColumnDataFormat": "String", + "IsCategorical": false, + "Type": "Column", + "Version": 2 + }, + { + "ColumnName": "Q2mean", + "ColumnPurpose": "Feature", + "ColumnDataFormat": "String", + "IsCategorical": false, + "Type": "Column", + "Version": 2 + }, + { + "ColumnName": "Q3min", + "ColumnPurpose": "Feature", + "ColumnDataFormat": "String", + "IsCategorical": false, + "Type": "Column", + "Version": 2 + }, + { + "ColumnName": "Q3max", + "ColumnPurpose": "Feature", + "ColumnDataFormat": "String", + "IsCategorical": false, + "Type": "Column", + "Version": 2 + }, + { + "ColumnName": "Q3avg", + "ColumnPurpose": "Feature", + "ColumnDataFormat": "String", + "IsCategorical": false, + "Type": "Column", + "Version": 2 + }, + { + "ColumnName": "Q3mean", + "ColumnPurpose": "Feature", + "ColumnDataFormat": "String", + "IsCategorical": false, + "Type": "Column", + "Version": 2 + }, + { + "ColumnName": "Q4min", + "ColumnPurpose": "Ignore", + "ColumnDataFormat": "String", + "IsCategorical": false, + "Type": "Column", + "Version": 2 + }, + { + "ColumnName": "Q4max", + "ColumnPurpose": "Feature", + "ColumnDataFormat": "String", + "IsCategorical": false, + "Type": "Column", + "Version": 2 + }, + { + "ColumnName": "Q4avg", + "ColumnPurpose": "Feature", + "ColumnDataFormat": "String", + "IsCategorical": false, + "Type": "Column", + "Version": 2 + }, + { + "ColumnName": "Q4mean", + "ColumnPurpose": "Feature", + "ColumnDataFormat": "String", + "IsCategorical": false, + "Type": "Column", + "Version": 2 + }, + { + "ColumnName": "Q5min", + "ColumnPurpose": "Feature", + "ColumnDataFormat": "String", + "IsCategorical": false, + "Type": "Column", + "Version": 2 + }, + { + "ColumnName": "Q5max", + "ColumnPurpose": "Feature", + "ColumnDataFormat": "String", + "IsCategorical": false, + "Type": "Column", + "Version": 2 + }, + { + "ColumnName": "Q5avg", + "ColumnPurpose": "Feature", + "ColumnDataFormat": "String", + "IsCategorical": false, + "Type": "Column", + "Version": 2 + }, + { + "ColumnName": "Q5mean", + "ColumnPurpose": "Feature", + "ColumnDataFormat": "String", + "IsCategorical": false, + "Type": "Column", + "Version": 2 + }, + { + "ColumnName": "Name", + "ColumnPurpose": "Label", + "ColumnDataFormat": "String", + "IsCategorical": false, + "Type": "Column", + "Version": 2 + } + ] + }, + "Environment": { + "Type": "LocalCPU", + "Version": 1 + }, + "RunHistory": { + "Version": 1, + "Type": "Result", + "Trials": [ + { + "Version": 0, + "Type": "Trial", + "TrainerName": "SdcaMaximumEntropyMulti", + "Score": 0.87460317460317472, + "RuntimeInSeconds": 5.47599983215332 + }, + { + "Version": 0, + "Type": "Trial", + "TrainerName": "FastForestOva", + "Score": 0.91460317460317475, + "RuntimeInSeconds": 3.6610000133514404 + }, + { + "Version": 0, + "Type": "Trial", + "TrainerName": "FastTreeOva", + "Score": 0.91460317460317475, + "RuntimeInSeconds": 3.2239999771118164 + }, + { + "Version": 0, + "Type": "Trial", + "TrainerName": "LbfgsLogisticRegressionOva", + "Score": 0.96035353535353529, + "RuntimeInSeconds": 2.812000036239624 + }, + { + "Version": 0, + "Type": "Trial", + "TrainerName": "SdcaLogisticRegressionOva", + "Score": 0.87460317460317472, + "RuntimeInSeconds": 7.8530001640319824 + }, + { + "Version": 0, + "Type": "Trial", + "TrainerName": "LbfgsMaximumEntropyMulti", + "Score": 0.96035353535353529, + "RuntimeInSeconds": 2.3250000476837158 + }, + { + "Version": 0, + "Type": "Trial", + "TrainerName": "LightGbmMulti", + "Score": 0.91460317460317475, + "RuntimeInSeconds": 2.875 + } + ], + "Pipeline": { + "parameter": { + "0": { + "InputColumnName": "Q1max", + "OutputColumnName": "Q1max" + }, + "1": { + "InputColumnName": "Q1avg", + "OutputColumnName": "Q1avg" + }, + "10": { + "InputColumnName": "Q3mean", + "OutputColumnName": "Q3mean" + }, + "11": { + "InputColumnName": "Q4max", + "OutputColumnName": "Q4max" + }, + "12": { + "InputColumnName": "Q4avg", + "OutputColumnName": "Q4avg" + }, + "13": { + "InputColumnName": "Q4mean", + "OutputColumnName": "Q4mean" + }, + "14": { + "InputColumnName": "Q5min", + "OutputColumnName": "Q5min" + }, + "15": { + "InputColumnName": "Q5max", + "OutputColumnName": "Q5max" + }, + "16": { + "InputColumnName": "Q5avg", + "OutputColumnName": "Q5avg" + }, + "17": { + "InputColumnName": "Q5mean", + "OutputColumnName": "Q5mean" + }, + "18": { + "InputColumnNames": [ + "Q1max", + "Q1avg", + "Q1mean", + "Q2min", + "Q2max", + "Q2avg", + "Q2mean", + "Q3min", + "Q3max", + "Q3avg", + "Q3mean", + "Q4max", + "Q4avg", + "Q4mean", + "Q5min", + "Q5max", + "Q5avg", + "Q5mean" + ], + "OutputColumnName": "Features" + }, + "19": { + "OutputColumnName": "Name", + "InputColumnName": "Name" + }, + "2": { + "InputColumnName": "Q1mean", + "OutputColumnName": "Q1mean" + }, + "20": { + "OutputColumnNames": [ + "Features" + ], + "InputColumnNames": [ + "Features" + ] + }, + "21": { + "L1Regularization": 1.0, + "L2Regularization": 1.0, + "LabelColumnName": "Name", + "FeatureColumnName": "Features" + }, + "22": { + "OutputColumnName": "PredictedLabel", + "InputColumnName": "PredictedLabel" + }, + "3": { + "InputColumnName": "Q2min", + "OutputColumnName": "Q2min" + }, + "4": { + "InputColumnName": "Q2max", + "OutputColumnName": "Q2max" + }, + "5": { + "InputColumnName": "Q2avg", + "OutputColumnName": "Q2avg" + }, + "6": { + "InputColumnName": "Q2mean", + "OutputColumnName": "Q2mean" + }, + "7": { + "InputColumnName": "Q3min", + "OutputColumnName": "Q3min" + }, + "8": { + "InputColumnName": "Q3max", + "OutputColumnName": "Q3max" + }, + "9": { + "InputColumnName": "Q3avg", + "OutputColumnName": "Q3avg" + } + }, + "estimators": [ + "FeaturizeText", + "FeaturizeText", + "FeaturizeText", + "FeaturizeText", + "FeaturizeText", + "FeaturizeText", + "FeaturizeText", + "FeaturizeText", + "FeaturizeText", + "FeaturizeText", + "FeaturizeText", + "FeaturizeText", + "FeaturizeText", + "FeaturizeText", + "FeaturizeText", + "FeaturizeText", + "FeaturizeText", + "FeaturizeText", + "Concatenate", + "MapValueToKey", + "NormalizeMinMax", + "LbfgsLogisticRegressionOva", + "MapKeyToValue" + ] + }, + "MetricName": "MicroAccuracy" + }, "Type": "TrainingConfig", "Version": 2 } \ No newline at end of file diff --git a/DeepTrace/Data/MLModel1.training.cs b/DeepTrace/Data/MLModel1.training.cs new file mode 100644 index 0000000..a2ddc91 --- /dev/null +++ b/DeepTrace/Data/MLModel1.training.cs @@ -0,0 +1,64 @@ +// This file was auto-generated by ML.NET Model Builder. +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using Microsoft.ML.Data; +using Microsoft.ML.Trainers; +using Microsoft.ML; + +namespace DeepTrace +{ + public partial class MLModel1 + { + /// + /// Retrains model using the pipeline generated as part of the training process. For more information on how to load data, see aka.ms/loaddata. + /// + /// + /// + /// + public static ITransformer RetrainPipeline(MLContext mlContext, IDataView trainData) + { + var pipeline = BuildPipeline(mlContext); + var model = pipeline.Fit(trainData); + + return model; + } + + /// + /// build the pipeline that is used from model builder. Use this function to retrain model. + /// + /// + /// + public static IEstimator BuildPipeline(MLContext mlContext) + { + // Data process configuration with pipeline data transformations + var pipeline = mlContext.Transforms.Text.FeaturizeText(inputColumnName:@"Q1max",outputColumnName:@"Q1max") + .Append(mlContext.Transforms.Text.FeaturizeText(inputColumnName:@"Q1avg",outputColumnName:@"Q1avg")) + .Append(mlContext.Transforms.Text.FeaturizeText(inputColumnName:@"Q1mean",outputColumnName:@"Q1mean")) + .Append(mlContext.Transforms.Text.FeaturizeText(inputColumnName:@"Q2min",outputColumnName:@"Q2min")) + .Append(mlContext.Transforms.Text.FeaturizeText(inputColumnName:@"Q2max",outputColumnName:@"Q2max")) + .Append(mlContext.Transforms.Text.FeaturizeText(inputColumnName:@"Q2avg",outputColumnName:@"Q2avg")) + .Append(mlContext.Transforms.Text.FeaturizeText(inputColumnName:@"Q2mean",outputColumnName:@"Q2mean")) + .Append(mlContext.Transforms.Text.FeaturizeText(inputColumnName:@"Q3min",outputColumnName:@"Q3min")) + .Append(mlContext.Transforms.Text.FeaturizeText(inputColumnName:@"Q3max",outputColumnName:@"Q3max")) + .Append(mlContext.Transforms.Text.FeaturizeText(inputColumnName:@"Q3avg",outputColumnName:@"Q3avg")) + .Append(mlContext.Transforms.Text.FeaturizeText(inputColumnName:@"Q3mean",outputColumnName:@"Q3mean")) + .Append(mlContext.Transforms.Text.FeaturizeText(inputColumnName:@"Q4max",outputColumnName:@"Q4max")) + .Append(mlContext.Transforms.Text.FeaturizeText(inputColumnName:@"Q4avg",outputColumnName:@"Q4avg")) + .Append(mlContext.Transforms.Text.FeaturizeText(inputColumnName:@"Q4mean",outputColumnName:@"Q4mean")) + .Append(mlContext.Transforms.Text.FeaturizeText(inputColumnName:@"Q5min",outputColumnName:@"Q5min")) + .Append(mlContext.Transforms.Text.FeaturizeText(inputColumnName:@"Q5max",outputColumnName:@"Q5max")) + .Append(mlContext.Transforms.Text.FeaturizeText(inputColumnName:@"Q5avg",outputColumnName:@"Q5avg")) + .Append(mlContext.Transforms.Text.FeaturizeText(inputColumnName:@"Q5mean",outputColumnName:@"Q5mean")) + .Append(mlContext.Transforms.Concatenate(@"Features", new []{@"Q1max",@"Q1avg",@"Q1mean",@"Q2min",@"Q2max",@"Q2avg",@"Q2mean",@"Q3min",@"Q3max",@"Q3avg",@"Q3mean",@"Q4max",@"Q4avg",@"Q4mean",@"Q5min",@"Q5max",@"Q5avg",@"Q5mean"})) + .Append(mlContext.Transforms.Conversion.MapValueToKey(outputColumnName:@"Name",inputColumnName:@"Name")) + .Append(mlContext.Transforms.NormalizeMinMax(@"Features", @"Features")) + .Append(mlContext.MulticlassClassification.Trainers.OneVersusAll(binaryEstimator: mlContext.BinaryClassification.Trainers.LbfgsLogisticRegression(new LbfgsLogisticRegressionBinaryTrainer.Options(){L1Regularization=1F,L2Regularization=1F,LabelColumnName=@"Name",FeatureColumnName=@"Features"}), labelColumnName:@"Name")) + .Append(mlContext.Transforms.Conversion.MapKeyToValue(outputColumnName:@"PredictedLabel",inputColumnName:@"PredictedLabel")); + + return pipeline; + } + } +} diff --git a/DeepTrace/Data/ModelDefinition.cs b/DeepTrace/Data/ModelDefinition.cs index 475a4f3..ac86b59 100644 --- a/DeepTrace/Data/ModelDefinition.cs +++ b/DeepTrace/Data/ModelDefinition.cs @@ -1,20 +1,66 @@ using DeepTrace.Services; +using MongoDB.Bson.Serialization.Attributes; +using MongoDB.Bson; +using System.Text; -namespace DeepTrace.Data +namespace DeepTrace.Data; + +public class ModelDefinition { - - public class ModelDefinition + private static int _instanceId; + public ModelDefinition() { - private static int _instanceId; - public ModelDefinition() + var id = Interlocked.Increment(ref _instanceId); + Name = $"Model #{id}"; + } + + [BsonId] + public ObjectId? Id { get; set; } + public string Name { get; set; } + public DataSourceStorage DataSource { get; set; } = new(); + public string AIparameters { get; set; } = string.Empty; + public List IntervalDefinitionList { get; set; } = new(); + + public List GetColumnNames() + { + var measureNames = new[] { "min", "max", "avg", "mean" }; + var columnNames = new List(); + foreach (var item in DataSource.Queries) { - var id = Interlocked.Increment(ref _instanceId); - Name = $"Model #{id}"; + columnNames.AddRange(measureNames.Select(x => $"{item.Query}_{x}")); } - public string Name { get; set; } - public DataSourceStorage DataSource { get; set; } = new(); - public string AIparameters { get; set; } = string.Empty; - public List IntervalDefinitionList { get; set; } = new(); + return columnNames; + } + + public string ToCsv() + { + var current = IntervalDefinitionList.First(); + var headers = string.Join(",", GetColumnNames().Select(x=>$"\"{x}\"")) + ",Name"; + + + var writer = new StringBuilder(); + writer.AppendLine(headers); + + foreach (var currentInterval in IntervalDefinitionList) + { + var data = ""; + for (var i = 0; i < currentInterval.Data.Count; i++) + { + + var queryData = currentInterval.Data[i]; + var min = queryData.Data.Min(x => x.Value); + var max = queryData.Data.Max(x => x.Value); + var avg = queryData.Data.Average(x => x.Value); + var mean = queryData.Data.Sum(x => x.Value) / queryData.Data.Count; + + data += min + "," + max + "," + avg + "," + mean + ","; + + } + data += currentInterval.Name; + writer.AppendLine(data); + } + + return writer.ToString(); } } diff --git a/DeepTrace/Data/Prediction.cs b/DeepTrace/Data/Prediction.cs index 97d8502..644d766 100644 --- a/DeepTrace/Data/Prediction.cs +++ b/DeepTrace/Data/Prediction.cs @@ -1,9 +1,4 @@ using Microsoft.ML.Data; -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; namespace DeepTrace.Data; diff --git a/DeepTrace/DeepTrace.csproj b/DeepTrace/DeepTrace.csproj index c313b24..ac1b184 100644 --- a/DeepTrace/DeepTrace.csproj +++ b/DeepTrace/DeepTrace.csproj @@ -15,16 +15,4 @@ - - - MLModel1.mbconfig - - - MLModel1.mbconfig - - - MLModel1.mbconfig - PreserveNewest - - \ No newline at end of file diff --git a/DeepTrace/ML/EstimatorBuilder.cs b/DeepTrace/ML/EstimatorBuilder.cs index 87c09af..0a359c4 100644 --- a/DeepTrace/ML/EstimatorBuilder.cs +++ b/DeepTrace/ML/EstimatorBuilder.cs @@ -2,6 +2,7 @@ using Microsoft.ML; using Microsoft.ML.Trainers; + namespace DeepTrace.ML { public class EstimatorBuilder : IEstimatorBuilder diff --git a/DeepTrace/ML/IMLProcessor.cs b/DeepTrace/ML/IMLProcessor.cs index 8d7ff5a..d96d1f9 100644 --- a/DeepTrace/ML/IMLProcessor.cs +++ b/DeepTrace/ML/IMLProcessor.cs @@ -5,8 +5,8 @@ namespace DeepTrace.ML; public interface IMLProcessor { - void Fit(ModelDefinition modelDef, DataSourceDefinition dataSourceDef); + Task Train(ModelDefinition modelDef); byte[] Export(); void Import(byte[] data); - string Predict(TimeSeries[] data); + string Predict(DataSourceDefinition dataSource); } diff --git a/DeepTrace/ML/MLHelpers.cs b/DeepTrace/ML/MLHelpers.cs index a22af46..c6d6639 100644 --- a/DeepTrace/ML/MLHelpers.cs +++ b/DeepTrace/ML/MLHelpers.cs @@ -1,4 +1,7 @@ -using Microsoft.ML; +using DeepTrace.Data; +using DeepTrace.Services; +using Microsoft.ML; +using Microsoft.ML.Data; namespace DeepTrace.ML; @@ -21,4 +24,22 @@ public static class MLHelpers return new (mlContext, schema, transformer); } + + public static async Task<(IDataView View, string FileName)> Convert(MLContext mlContext, ModelDefinition model) + { + var csv = model.ToCsv(); + var fileName = Path.GetTempFileName(); + + await File.WriteAllTextAsync(fileName, csv); + + var columnNames = model.GetColumnNames(); + var columns = columnNames + .Select((x,i) => new TextLoader.Column(x, DataKind.Double, i)) + .ToArray() + ; + + var view = mlContext.Data.LoadFromTextFile(fileName, columns, separatorChar: ',', hasHeader: true, allowQuoting: true, trimWhitespace: true); + + return (view, fileName); + } } diff --git a/DeepTrace/ML/MLProcessor.cs b/DeepTrace/ML/MLProcessor.cs new file mode 100644 index 0000000..807d2d5 --- /dev/null +++ b/DeepTrace/ML/MLProcessor.cs @@ -0,0 +1,84 @@ +using DeepTrace.Data; +using Microsoft.ML; +using Microsoft.ML.Data; +using PrometheusAPI; +using System.Data; + +namespace DeepTrace.ML +{ + public class MLProcessor : IMLProcessor + { + private MLContext _mlContext = new MLContext(); + private EstimatorBuilder _estimatorBuilder = new EstimatorBuilder(); + private DataViewSchema? _schema; + private ITransformer? _transformer; + + private string Name { get; set; } + + public async Task Train(ModelDefinition modelDef) + { + var pipeline = _estimatorBuilder.BuildPipeline(_mlContext, modelDef); + var (data, filename) = await MLHelpers.Convert(_mlContext,modelDef); + try + { + _schema = data.Schema; + _transformer = pipeline.Fit(data); + } + finally + { + File.Delete(filename); + } + + } + + private static string _signature = "DeepTrace-Model-v1-"+typeof(MLProcessor).Name; + + public byte[] Export() + { + if(_schema == null) + { + throw new ArgumentNullException(nameof (_schema)); + } + + if (_transformer == null) + { + throw new ArgumentNullException(nameof(_transformer)); + } + + using var mem = new MemoryStream(); + mem.WriteString(_signature); + + mem.WriteString(Name); + + var bytes = MLHelpers.ExportSingleModel(new ModelRecord(_mlContext, _schema, _transformer)); + + mem.WriteInt(bytes.Length); + mem.Write(bytes); + + + return mem.ToArray(); + } + + public void Import(byte[] data) + { + var mem = new MemoryStream(data); + var sig = mem.ReadString(); + if (sig != _signature) + throw new ApplicationException($"Wrong data for {GetType().Name}"); + + Name = mem.ReadString(); + var size = mem.ReadInt(); + var bytes = new byte[size]; + + mem.Read(bytes, 0, bytes.Length); + + (_mlContext, _schema, _transformer) = MLHelpers.ImportSingleModel(bytes); + + } + + public string Predict(DataSourceDefinition dataSourceDefinition) + { + throw new NotImplementedException(); + } + } +} diff --git a/DeepTrace/ML/SpikeDetector.cs b/DeepTrace/ML/SpikeDetector.cs deleted file mode 100644 index 743e886..0000000 --- a/DeepTrace/ML/SpikeDetector.cs +++ /dev/null @@ -1,108 +0,0 @@ -using DeepTrace.Data; -using Microsoft.ML; -using Microsoft.ML.Data; -using PrometheusAPI; -using System.Data; -using System.Linq; -using System.Xml.Linq; - -namespace DeepTrace.ML -{ - public class SpikeDetector : IMLProcessor - { - private readonly Dictionary _model = new(); - - public void Fit(ModelDefinition modelDef, DataSourceDefinition dataSourceDef) - { - var models = dataSourceDef.Queries - .Select( (x,i) => - { - // since we are just detecting spikes here we can combine all the time series into one - - List data = modelDef.IntervalDefinitionList[i].Data - .Select(y => y.Data) - .Aggregate>((acc, list) => acc.Concat(list)) - .ToList(); - - return (Name: x.Query, Data: data); - }) - .ToList(); - - foreach (var (name, data) in models) - { - _model[name] = FitOne(data); - } - } - - private static string _signature = "DeepTrace-Model-v1-"+typeof(SpikeDetector).Name; - - public byte[] Export() - { - using var mem = new MemoryStream(); - mem.WriteString(_signature); - mem.WriteInt(_model.Count); - - foreach ( var (name, model) in _model) - { - mem.WriteString(name); - - var bytes = MLHelpers.ExportSingleModel(model); - - mem.WriteInt(bytes.Length); - mem.Write(bytes); - } - - return mem.ToArray(); - } - - public void Import(byte[] data) - { - var mem = new MemoryStream(data); - var sig = mem.ReadString(); - if (sig != _signature) - throw new ApplicationException($"Wrong data for {GetType().Name}"); - - var count = mem.ReadInt(); - - for ( var i = 0; i < count; i++ ) - { - var name = mem.ReadString(); - var size = mem.ReadInt(); - var bytes = new byte[size]; - - mem.Read(bytes, 0, bytes.Length); - - var model = MLHelpers.ImportSingleModel(bytes); - - _model[name] = model; - } - } - - public string Predict(TimeSeries[] data) - { - throw new NotImplementedException(); - } - - // -------------------------- internals - - class SpikePrediction - { - [VectorType(3)] - public double[] Prediction { get; set; } = new double[3]; - } - - private static ModelRecord FitOne(List dataSet) - { - var mlContext = new MLContext(); - var dataView = mlContext.Data.LoadFromEnumerable(dataSet); - - const string outputColumnName = nameof(SpikePrediction.Prediction); - const string inputColumnName = nameof(TimeSeries.Value); - - var iidSpikeEstimator = mlContext.Transforms.DetectIidSpike(outputColumnName,inputColumnName, 95.0d, dataSet.Count); - var transformer = iidSpikeEstimator.Fit(dataView); - - return new (mlContext, dataView.Schema, transformer); - } - } -} diff --git a/DeepTrace/Pages/Training.razor b/DeepTrace/Pages/Training.razor index c6d43c6..9660beb 100644 --- a/DeepTrace/Pages/Training.razor +++ b/DeepTrace/Pages/Training.razor @@ -10,7 +10,7 @@ @inject PrometheusClient Prometheus @inject IDialogService DialogService @inject IDataSourceStorageService StorageService -@inject IModelStorageService ModelService +@inject IModelDefinitionService ModelService @inject IEstimatorBuilder EstimatorBuilder @inject NavigationManager NavManager @inject IJSRuntime Js @@ -32,7 +32,7 @@ - + @foreach (var model in _modelDefinitions) { @model.Name @@ -133,7 +133,7 @@ _self = self; } - private ModelStorage _currentModel = new(); + private ModelDefinition _currentModel = new(); private readonly Training _self; [Required] @@ -156,7 +156,7 @@ } [Required] - public ModelStorage CurrentModel + public ModelDefinition CurrentModel { get { @@ -191,7 +191,7 @@ private ModelForm? _modelForm; private TimeSeriesData? DisplayData { get; set; } private List _dataSources = new(); - private List _modelDefinitions = new() {new()}; + private List _modelDefinitions = new() {new()}; private DateTime? _minDate; private DateTime? _maxDate; @@ -423,12 +423,13 @@ DialogService.Show("Error", parameters, options); } - private void HandleTrain() + private async Task HandleTrain() { - var mlContext = new MLContext(); - var pipeline = EstimatorBuilder.BuildPipeline(mlContext, _modelForm!.CurrentModel); + var mlProcessor = new MLProcessor(); + await mlProcessor.Train(_modelForm!.CurrentModel); + var bytes = mlProcessor.Export(); - + //save to Mongo } diff --git a/DeepTrace/Program.cs b/DeepTrace/Program.cs index 74a11ae..a9db77a 100644 --- a/DeepTrace/Program.cs +++ b/DeepTrace/Program.cs @@ -16,7 +16,7 @@ builder.Services.AddHttpClient(c => c.BaseAddress = new UriBui builder.Services .AddSingleton( s => new MongoClient(builder.Configuration.GetValue("Connections:MongoDb") )) .AddSingleton() - .AddSingleton() + .AddSingleton() .AddSingleton() ; diff --git a/DeepTrace/Services/IModelStorageService.cs b/DeepTrace/Services/IModelStorageService.cs index 59947c7..a94e915 100644 --- a/DeepTrace/Services/IModelStorageService.cs +++ b/DeepTrace/Services/IModelStorageService.cs @@ -1,19 +1,15 @@ using MongoDB.Bson.Serialization.Attributes; using MongoDB.Bson; using DeepTrace.Data; +using System.Text; namespace DeepTrace.Services { - public class ModelStorage : ModelDefinition - { - [BsonId] - public ObjectId? Id { get; set; } - } - public interface IModelStorageService + public interface IModelDefinitionService { - Task Delete(ModelStorage source, bool ignoreNotStored = false); - Task> Load(); - Task Store(ModelStorage source); + Task Delete(ModelDefinition source, bool ignoreNotStored = false); + Task> Load(); + Task Store(ModelDefinition source); } } diff --git a/DeepTrace/Services/ModelStorageService.cs b/DeepTrace/Services/ModelStorageService.cs index b2e7070..eb4512e 100644 --- a/DeepTrace/Services/ModelStorageService.cs +++ b/DeepTrace/Services/ModelStorageService.cs @@ -1,9 +1,10 @@ -using MongoDB.Bson; +using DeepTrace.Data; +using MongoDB.Bson; using MongoDB.Driver; namespace DeepTrace.Services { - public class ModelStorageService : IModelStorageService + public class ModelDefinitionService : IModelDefinitionService { private const string MongoDBDatabaseName = "DeepTrace"; @@ -11,23 +12,23 @@ namespace DeepTrace.Services private readonly IMongoClient _client; - public ModelStorageService(IMongoClient client) + public ModelDefinitionService(IMongoClient client) { _client = client; } - public async Task> Load() + public async Task> Load() { var db = _client.GetDatabase(MongoDBDatabaseName); - var collection = db.GetCollection(MongoDBCollection); + var collection = db.GetCollection(MongoDBCollection); var res = await (await collection.FindAsync("{}")).ToListAsync(); return res; } - public async Task Store(ModelStorage source) + public async Task Store(ModelDefinition source) { var db = _client.GetDatabase(MongoDBDatabaseName); - var collection = db.GetCollection(MongoDBCollection); + var collection = db.GetCollection(MongoDBCollection); if (source.Id == null) source.Id = ObjectId.GenerateNewId(); @@ -40,7 +41,7 @@ namespace DeepTrace.Services ); } - public async Task Delete(ModelStorage source, bool ignoreNotStored = false) + public async Task Delete(ModelDefinition source, bool ignoreNotStored = false) { if (source.Id == null) {