DEEP-20 Model training implemented

This commit is contained in:
Andrey Shabarshov 2023-07-22 12:58:03 +01:00
parent 2c49f89f26
commit fc98c3b50a
17 changed files with 837 additions and 200 deletions

1
.gitignore vendored
View File

@ -366,3 +366,4 @@ TrainData
prometheus-2.45.0-rc.0.windows-amd64 prometheus-2.45.0-rc.0.windows-amd64
mongodb-windows-x86_64-6.0.6 mongodb-windows-x86_64-6.0.6
*.patch *.patch
/DeepTrace/Data/MLModel1.zip

View File

@ -8,9 +8,9 @@ namespace DeepTrace.Controllers
[Route("api/[controller]")] [Route("api/[controller]")]
public class DownloadController : Controller public class DownloadController : Controller
{ {
private readonly IModelStorageService _modelService; private readonly IModelDefinitionService _modelService;
public DownloadController(IModelStorageService modelService) public DownloadController(IModelDefinitionService modelService)
{ {
_modelService = modelService; _modelService = modelService;
} }
@ -18,37 +18,11 @@ namespace DeepTrace.Controllers
[HttpGet("mldata/{modelName}")] [HttpGet("mldata/{modelName}")]
public async Task<FileContentResult> GetMLDataCsv([FromRoute] string modelName) public async Task<FileContentResult> GetMLDataCsv([FromRoute] string modelName)
{ {
var modelStorage = await _modelService.Load(); var ModelDefinition = await _modelService.Load();
var model = modelStorage.FirstOrDefault(x=>x.Name==modelName) ?? throw new ApplicationException($"Model {modelName} not found"); var model = ModelDefinition.FirstOrDefault(x=>x.Name==modelName) ?? throw new ApplicationException($"Model {modelName} not found");
var previousIntervals = model.IntervalDefinitionList;
var current = previousIntervals.First(); var csv = model.ToCsv();
var headers = string.Join(",", current.Data.Select((x, i) => $"Q{i + 1}min,Q{i + 1}max,Q{i + 1}avg,Q{i + 1}mean")); return new(Encoding.UTF8.GetBytes(csv),"text/csv")
headers += string.Join(",",",Name");
var writer = new StringBuilder();
writer.AppendLine(headers);
foreach (var currentInterval in previousIntervals)
{
var data = "";
for (var i = 0; i < currentInterval.Data.Count; i++)
{
var queryData = currentInterval.Data[i];
var min = queryData.Data.Min(x => x.Value);
var max = queryData.Data.Max(x => x.Value);
var avg = queryData.Data.Average(x => x.Value);
var mean = queryData.Data.Sum(x => x.Value) / queryData.Data.Count;
data += min + "," + max + "," + avg + "," + mean + ",";
}
data += currentInterval.Name;
writer.AppendLine(data);
}
return new(Encoding.UTF8.GetBytes(writer.ToString()),"text/csv")
{ {
FileDownloadName = modelName+".csv" FileDownloadName = modelName+".csv"
}; };

View File

@ -0,0 +1,189 @@
// This file was auto-generated by ML.NET Model Builder.
using Microsoft.ML;
using Microsoft.ML.Data;
using System;
using System.Linq;
using System.IO;
using System.Collections.Generic;
namespace DeepTrace
{
public partial class MLModel1
{
/// <summary>
/// model input class for MLModel1.
/// </summary>
#region model input class
public class ModelInput
{
[ColumnName(@"Q1min")]
public string Q1min { get; set; }
[ColumnName(@"Q1max")]
public string Q1max { get; set; }
[ColumnName(@"Q1avg")]
public string Q1avg { get; set; }
[ColumnName(@"Q1mean")]
public string Q1mean { get; set; }
[ColumnName(@"Q2min")]
public string Q2min { get; set; }
[ColumnName(@"Q2max")]
public string Q2max { get; set; }
[ColumnName(@"Q2avg")]
public string Q2avg { get; set; }
[ColumnName(@"Q2mean")]
public string Q2mean { get; set; }
[ColumnName(@"Q3min")]
public string Q3min { get; set; }
[ColumnName(@"Q3max")]
public string Q3max { get; set; }
[ColumnName(@"Q3avg")]
public string Q3avg { get; set; }
[ColumnName(@"Q3mean")]
public string Q3mean { get; set; }
[ColumnName(@"Q4min")]
public string Q4min { get; set; }
[ColumnName(@"Q4max")]
public string Q4max { get; set; }
[ColumnName(@"Q4avg")]
public string Q4avg { get; set; }
[ColumnName(@"Q4mean")]
public string Q4mean { get; set; }
[ColumnName(@"Q5min")]
public string Q5min { get; set; }
[ColumnName(@"Q5max")]
public string Q5max { get; set; }
[ColumnName(@"Q5avg")]
public string Q5avg { get; set; }
[ColumnName(@"Q5mean")]
public string Q5mean { get; set; }
[ColumnName(@"Name")]
public string Name { get; set; }
}
#endregion
/// <summary>
/// model output class for MLModel1.
/// </summary>
#region model output class
public class ModelOutput
{
[ColumnName(@"Q1min")]
public string Q1min { get; set; }
[ColumnName(@"Q1max")]
public float[] Q1max { get; set; }
[ColumnName(@"Q1avg")]
public float[] Q1avg { get; set; }
[ColumnName(@"Q1mean")]
public float[] Q1mean { get; set; }
[ColumnName(@"Q2min")]
public float[] Q2min { get; set; }
[ColumnName(@"Q2max")]
public float[] Q2max { get; set; }
[ColumnName(@"Q2avg")]
public float[] Q2avg { get; set; }
[ColumnName(@"Q2mean")]
public float[] Q2mean { get; set; }
[ColumnName(@"Q3min")]
public float[] Q3min { get; set; }
[ColumnName(@"Q3max")]
public float[] Q3max { get; set; }
[ColumnName(@"Q3avg")]
public float[] Q3avg { get; set; }
[ColumnName(@"Q3mean")]
public float[] Q3mean { get; set; }
[ColumnName(@"Q4min")]
public string Q4min { get; set; }
[ColumnName(@"Q4max")]
public float[] Q4max { get; set; }
[ColumnName(@"Q4avg")]
public float[] Q4avg { get; set; }
[ColumnName(@"Q4mean")]
public float[] Q4mean { get; set; }
[ColumnName(@"Q5min")]
public float[] Q5min { get; set; }
[ColumnName(@"Q5max")]
public float[] Q5max { get; set; }
[ColumnName(@"Q5avg")]
public float[] Q5avg { get; set; }
[ColumnName(@"Q5mean")]
public float[] Q5mean { get; set; }
[ColumnName(@"Name")]
public uint Name { get; set; }
[ColumnName(@"Features")]
public float[] Features { get; set; }
[ColumnName(@"PredictedLabel")]
public string PredictedLabel { get; set; }
[ColumnName(@"Score")]
public float[] Score { get; set; }
}
#endregion
private static string MLNetModelPath = Path.GetFullPath("MLModel1.zip");
public static readonly Lazy<PredictionEngine<ModelInput, ModelOutput>> PredictEngine = new Lazy<PredictionEngine<ModelInput, ModelOutput>>(() => CreatePredictEngine(), true);
/// <summary>
/// Use this method to predict on <see cref="ModelInput"/>.
/// </summary>
/// <param name="input">model input.</param>
/// <returns><seealso cref=" ModelOutput"/></returns>
public static ModelOutput Predict(ModelInput input)
{
var predEngine = PredictEngine.Value;
return predEngine.Predict(input);
}
private static PredictionEngine<ModelInput, ModelOutput> CreatePredictEngine()
{
var mlContext = new MLContext();
ITransformer mlModel = mlContext.Model.Load(MLNetModelPath, out var _);
return mlContext.Model.CreatePredictionEngine<ModelInput, ModelOutput>(mlModel);
}
}
}

View File

@ -1,6 +1,390 @@
{ {
"TrainingTime": 0, "TrainingTime": 30,
"Scenario": "Default", "Scenario": "Classification",
"DataSource": {
"Type": "TabularFile",
"Version": 1,
"FilePath": "D:\\Downloads\\Model #4(1).csv",
"Delimiter": ",",
"DecimalMarker": ".",
"HasHeader": true,
"ColumnProperties": [
{
"ColumnName": "Q1min",
"ColumnPurpose": "Ignore",
"ColumnDataFormat": "String",
"IsCategorical": false,
"Type": "Column",
"Version": 2
},
{
"ColumnName": "Q1max",
"ColumnPurpose": "Feature",
"ColumnDataFormat": "String",
"IsCategorical": false,
"Type": "Column",
"Version": 2
},
{
"ColumnName": "Q1avg",
"ColumnPurpose": "Feature",
"ColumnDataFormat": "String",
"IsCategorical": false,
"Type": "Column",
"Version": 2
},
{
"ColumnName": "Q1mean",
"ColumnPurpose": "Feature",
"ColumnDataFormat": "String",
"IsCategorical": false,
"Type": "Column",
"Version": 2
},
{
"ColumnName": "Q2min",
"ColumnPurpose": "Feature",
"ColumnDataFormat": "String",
"IsCategorical": false,
"Type": "Column",
"Version": 2
},
{
"ColumnName": "Q2max",
"ColumnPurpose": "Feature",
"ColumnDataFormat": "String",
"IsCategorical": false,
"Type": "Column",
"Version": 2
},
{
"ColumnName": "Q2avg",
"ColumnPurpose": "Feature",
"ColumnDataFormat": "String",
"IsCategorical": false,
"Type": "Column",
"Version": 2
},
{
"ColumnName": "Q2mean",
"ColumnPurpose": "Feature",
"ColumnDataFormat": "String",
"IsCategorical": false,
"Type": "Column",
"Version": 2
},
{
"ColumnName": "Q3min",
"ColumnPurpose": "Feature",
"ColumnDataFormat": "String",
"IsCategorical": false,
"Type": "Column",
"Version": 2
},
{
"ColumnName": "Q3max",
"ColumnPurpose": "Feature",
"ColumnDataFormat": "String",
"IsCategorical": false,
"Type": "Column",
"Version": 2
},
{
"ColumnName": "Q3avg",
"ColumnPurpose": "Feature",
"ColumnDataFormat": "String",
"IsCategorical": false,
"Type": "Column",
"Version": 2
},
{
"ColumnName": "Q3mean",
"ColumnPurpose": "Feature",
"ColumnDataFormat": "String",
"IsCategorical": false,
"Type": "Column",
"Version": 2
},
{
"ColumnName": "Q4min",
"ColumnPurpose": "Ignore",
"ColumnDataFormat": "String",
"IsCategorical": false,
"Type": "Column",
"Version": 2
},
{
"ColumnName": "Q4max",
"ColumnPurpose": "Feature",
"ColumnDataFormat": "String",
"IsCategorical": false,
"Type": "Column",
"Version": 2
},
{
"ColumnName": "Q4avg",
"ColumnPurpose": "Feature",
"ColumnDataFormat": "String",
"IsCategorical": false,
"Type": "Column",
"Version": 2
},
{
"ColumnName": "Q4mean",
"ColumnPurpose": "Feature",
"ColumnDataFormat": "String",
"IsCategorical": false,
"Type": "Column",
"Version": 2
},
{
"ColumnName": "Q5min",
"ColumnPurpose": "Feature",
"ColumnDataFormat": "String",
"IsCategorical": false,
"Type": "Column",
"Version": 2
},
{
"ColumnName": "Q5max",
"ColumnPurpose": "Feature",
"ColumnDataFormat": "String",
"IsCategorical": false,
"Type": "Column",
"Version": 2
},
{
"ColumnName": "Q5avg",
"ColumnPurpose": "Feature",
"ColumnDataFormat": "String",
"IsCategorical": false,
"Type": "Column",
"Version": 2
},
{
"ColumnName": "Q5mean",
"ColumnPurpose": "Feature",
"ColumnDataFormat": "String",
"IsCategorical": false,
"Type": "Column",
"Version": 2
},
{
"ColumnName": "Name",
"ColumnPurpose": "Label",
"ColumnDataFormat": "String",
"IsCategorical": false,
"Type": "Column",
"Version": 2
}
]
},
"Environment": {
"Type": "LocalCPU",
"Version": 1
},
"RunHistory": {
"Version": 1,
"Type": "Result",
"Trials": [
{
"Version": 0,
"Type": "Trial",
"TrainerName": "SdcaMaximumEntropyMulti",
"Score": 0.87460317460317472,
"RuntimeInSeconds": 5.47599983215332
},
{
"Version": 0,
"Type": "Trial",
"TrainerName": "FastForestOva",
"Score": 0.91460317460317475,
"RuntimeInSeconds": 3.6610000133514404
},
{
"Version": 0,
"Type": "Trial",
"TrainerName": "FastTreeOva",
"Score": 0.91460317460317475,
"RuntimeInSeconds": 3.2239999771118164
},
{
"Version": 0,
"Type": "Trial",
"TrainerName": "LbfgsLogisticRegressionOva",
"Score": 0.96035353535353529,
"RuntimeInSeconds": 2.812000036239624
},
{
"Version": 0,
"Type": "Trial",
"TrainerName": "SdcaLogisticRegressionOva",
"Score": 0.87460317460317472,
"RuntimeInSeconds": 7.8530001640319824
},
{
"Version": 0,
"Type": "Trial",
"TrainerName": "LbfgsMaximumEntropyMulti",
"Score": 0.96035353535353529,
"RuntimeInSeconds": 2.3250000476837158
},
{
"Version": 0,
"Type": "Trial",
"TrainerName": "LightGbmMulti",
"Score": 0.91460317460317475,
"RuntimeInSeconds": 2.875
}
],
"Pipeline": {
"parameter": {
"0": {
"InputColumnName": "Q1max",
"OutputColumnName": "Q1max"
},
"1": {
"InputColumnName": "Q1avg",
"OutputColumnName": "Q1avg"
},
"10": {
"InputColumnName": "Q3mean",
"OutputColumnName": "Q3mean"
},
"11": {
"InputColumnName": "Q4max",
"OutputColumnName": "Q4max"
},
"12": {
"InputColumnName": "Q4avg",
"OutputColumnName": "Q4avg"
},
"13": {
"InputColumnName": "Q4mean",
"OutputColumnName": "Q4mean"
},
"14": {
"InputColumnName": "Q5min",
"OutputColumnName": "Q5min"
},
"15": {
"InputColumnName": "Q5max",
"OutputColumnName": "Q5max"
},
"16": {
"InputColumnName": "Q5avg",
"OutputColumnName": "Q5avg"
},
"17": {
"InputColumnName": "Q5mean",
"OutputColumnName": "Q5mean"
},
"18": {
"InputColumnNames": [
"Q1max",
"Q1avg",
"Q1mean",
"Q2min",
"Q2max",
"Q2avg",
"Q2mean",
"Q3min",
"Q3max",
"Q3avg",
"Q3mean",
"Q4max",
"Q4avg",
"Q4mean",
"Q5min",
"Q5max",
"Q5avg",
"Q5mean"
],
"OutputColumnName": "Features"
},
"19": {
"OutputColumnName": "Name",
"InputColumnName": "Name"
},
"2": {
"InputColumnName": "Q1mean",
"OutputColumnName": "Q1mean"
},
"20": {
"OutputColumnNames": [
"Features"
],
"InputColumnNames": [
"Features"
]
},
"21": {
"L1Regularization": 1.0,
"L2Regularization": 1.0,
"LabelColumnName": "Name",
"FeatureColumnName": "Features"
},
"22": {
"OutputColumnName": "PredictedLabel",
"InputColumnName": "PredictedLabel"
},
"3": {
"InputColumnName": "Q2min",
"OutputColumnName": "Q2min"
},
"4": {
"InputColumnName": "Q2max",
"OutputColumnName": "Q2max"
},
"5": {
"InputColumnName": "Q2avg",
"OutputColumnName": "Q2avg"
},
"6": {
"InputColumnName": "Q2mean",
"OutputColumnName": "Q2mean"
},
"7": {
"InputColumnName": "Q3min",
"OutputColumnName": "Q3min"
},
"8": {
"InputColumnName": "Q3max",
"OutputColumnName": "Q3max"
},
"9": {
"InputColumnName": "Q3avg",
"OutputColumnName": "Q3avg"
}
},
"estimators": [
"FeaturizeText",
"FeaturizeText",
"FeaturizeText",
"FeaturizeText",
"FeaturizeText",
"FeaturizeText",
"FeaturizeText",
"FeaturizeText",
"FeaturizeText",
"FeaturizeText",
"FeaturizeText",
"FeaturizeText",
"FeaturizeText",
"FeaturizeText",
"FeaturizeText",
"FeaturizeText",
"FeaturizeText",
"FeaturizeText",
"Concatenate",
"MapValueToKey",
"NormalizeMinMax",
"LbfgsLogisticRegressionOva",
"MapKeyToValue"
]
},
"MetricName": "MicroAccuracy"
},
"Type": "TrainingConfig", "Type": "TrainingConfig",
"Version": 2 "Version": 2
} }

View File

@ -0,0 +1,64 @@
// This file was auto-generated by ML.NET Model Builder.
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using Microsoft.ML.Data;
using Microsoft.ML.Trainers;
using Microsoft.ML;
namespace DeepTrace
{
public partial class MLModel1
{
/// <summary>
/// Retrains model using the pipeline generated as part of the training process. For more information on how to load data, see aka.ms/loaddata.
/// </summary>
/// <param name="mlContext"></param>
/// <param name="trainData"></param>
/// <returns></returns>
public static ITransformer RetrainPipeline(MLContext mlContext, IDataView trainData)
{
var pipeline = BuildPipeline(mlContext);
var model = pipeline.Fit(trainData);
return model;
}
/// <summary>
/// build the pipeline that is used from model builder. Use this function to retrain model.
/// </summary>
/// <param name="mlContext"></param>
/// <returns></returns>
public static IEstimator<ITransformer> BuildPipeline(MLContext mlContext)
{
// Data process configuration with pipeline data transformations
var pipeline = mlContext.Transforms.Text.FeaturizeText(inputColumnName:@"Q1max",outputColumnName:@"Q1max")
.Append(mlContext.Transforms.Text.FeaturizeText(inputColumnName:@"Q1avg",outputColumnName:@"Q1avg"))
.Append(mlContext.Transforms.Text.FeaturizeText(inputColumnName:@"Q1mean",outputColumnName:@"Q1mean"))
.Append(mlContext.Transforms.Text.FeaturizeText(inputColumnName:@"Q2min",outputColumnName:@"Q2min"))
.Append(mlContext.Transforms.Text.FeaturizeText(inputColumnName:@"Q2max",outputColumnName:@"Q2max"))
.Append(mlContext.Transforms.Text.FeaturizeText(inputColumnName:@"Q2avg",outputColumnName:@"Q2avg"))
.Append(mlContext.Transforms.Text.FeaturizeText(inputColumnName:@"Q2mean",outputColumnName:@"Q2mean"))
.Append(mlContext.Transforms.Text.FeaturizeText(inputColumnName:@"Q3min",outputColumnName:@"Q3min"))
.Append(mlContext.Transforms.Text.FeaturizeText(inputColumnName:@"Q3max",outputColumnName:@"Q3max"))
.Append(mlContext.Transforms.Text.FeaturizeText(inputColumnName:@"Q3avg",outputColumnName:@"Q3avg"))
.Append(mlContext.Transforms.Text.FeaturizeText(inputColumnName:@"Q3mean",outputColumnName:@"Q3mean"))
.Append(mlContext.Transforms.Text.FeaturizeText(inputColumnName:@"Q4max",outputColumnName:@"Q4max"))
.Append(mlContext.Transforms.Text.FeaturizeText(inputColumnName:@"Q4avg",outputColumnName:@"Q4avg"))
.Append(mlContext.Transforms.Text.FeaturizeText(inputColumnName:@"Q4mean",outputColumnName:@"Q4mean"))
.Append(mlContext.Transforms.Text.FeaturizeText(inputColumnName:@"Q5min",outputColumnName:@"Q5min"))
.Append(mlContext.Transforms.Text.FeaturizeText(inputColumnName:@"Q5max",outputColumnName:@"Q5max"))
.Append(mlContext.Transforms.Text.FeaturizeText(inputColumnName:@"Q5avg",outputColumnName:@"Q5avg"))
.Append(mlContext.Transforms.Text.FeaturizeText(inputColumnName:@"Q5mean",outputColumnName:@"Q5mean"))
.Append(mlContext.Transforms.Concatenate(@"Features", new []{@"Q1max",@"Q1avg",@"Q1mean",@"Q2min",@"Q2max",@"Q2avg",@"Q2mean",@"Q3min",@"Q3max",@"Q3avg",@"Q3mean",@"Q4max",@"Q4avg",@"Q4mean",@"Q5min",@"Q5max",@"Q5avg",@"Q5mean"}))
.Append(mlContext.Transforms.Conversion.MapValueToKey(outputColumnName:@"Name",inputColumnName:@"Name"))
.Append(mlContext.Transforms.NormalizeMinMax(@"Features", @"Features"))
.Append(mlContext.MulticlassClassification.Trainers.OneVersusAll(binaryEstimator: mlContext.BinaryClassification.Trainers.LbfgsLogisticRegression(new LbfgsLogisticRegressionBinaryTrainer.Options(){L1Regularization=1F,L2Regularization=1F,LabelColumnName=@"Name",FeatureColumnName=@"Features"}), labelColumnName:@"Name"))
.Append(mlContext.Transforms.Conversion.MapKeyToValue(outputColumnName:@"PredictedLabel",inputColumnName:@"PredictedLabel"));
return pipeline;
}
}
}

View File

@ -1,10 +1,12 @@
using DeepTrace.Services; using DeepTrace.Services;
using MongoDB.Bson.Serialization.Attributes;
using MongoDB.Bson;
using System.Text;
namespace DeepTrace.Data namespace DeepTrace.Data;
public class ModelDefinition
{ {
public class ModelDefinition
{
private static int _instanceId; private static int _instanceId;
public ModelDefinition() public ModelDefinition()
{ {
@ -12,9 +14,53 @@ namespace DeepTrace.Data
Name = $"Model #{id}"; Name = $"Model #{id}";
} }
[BsonId]
public ObjectId? Id { get; set; }
public string Name { get; set; } public string Name { get; set; }
public DataSourceStorage DataSource { get; set; } = new(); public DataSourceStorage DataSource { get; set; } = new();
public string AIparameters { get; set; } = string.Empty; public string AIparameters { get; set; } = string.Empty;
public List<IntervalDefinition> IntervalDefinitionList { get; set; } = new(); public List<IntervalDefinition> IntervalDefinitionList { get; set; } = new();
public List<string> GetColumnNames()
{
var measureNames = new[] { "min", "max", "avg", "mean" };
var columnNames = new List<string>();
foreach (var item in DataSource.Queries)
{
columnNames.AddRange(measureNames.Select(x => $"{item.Query}_{x}"));
}
return columnNames;
}
public string ToCsv()
{
var current = IntervalDefinitionList.First();
var headers = string.Join(",", GetColumnNames().Select(x=>$"\"{x}\"")) + ",Name";
var writer = new StringBuilder();
writer.AppendLine(headers);
foreach (var currentInterval in IntervalDefinitionList)
{
var data = "";
for (var i = 0; i < currentInterval.Data.Count; i++)
{
var queryData = currentInterval.Data[i];
var min = queryData.Data.Min(x => x.Value);
var max = queryData.Data.Max(x => x.Value);
var avg = queryData.Data.Average(x => x.Value);
var mean = queryData.Data.Sum(x => x.Value) / queryData.Data.Count;
data += min + "," + max + "," + avg + "," + mean + ",";
}
data += currentInterval.Name;
writer.AppendLine(data);
}
return writer.ToString();
} }
} }

View File

@ -1,9 +1,4 @@
using Microsoft.ML.Data; using Microsoft.ML.Data;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace DeepTrace.Data; namespace DeepTrace.Data;

View File

@ -15,16 +15,4 @@
<ItemGroup> <ItemGroup>
<ProjectReference Include="..\PrometheusAPI\PrometheusAPI.csproj" /> <ProjectReference Include="..\PrometheusAPI\PrometheusAPI.csproj" />
</ItemGroup> </ItemGroup>
<ItemGroup Label="MLModel1">
<None Include="MLModel1.consumption.cs">
<DependentUpon>MLModel1.mbconfig</DependentUpon>
</None>
<None Include="MLModel1.training.cs">
<DependentUpon>MLModel1.mbconfig</DependentUpon>
</None>
<None Include="MLModel1.zip">
<DependentUpon>MLModel1.mbconfig</DependentUpon>
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
</ItemGroup>
</Project> </Project>

View File

@ -2,6 +2,7 @@
using Microsoft.ML; using Microsoft.ML;
using Microsoft.ML.Trainers; using Microsoft.ML.Trainers;
namespace DeepTrace.ML namespace DeepTrace.ML
{ {
public class EstimatorBuilder : IEstimatorBuilder public class EstimatorBuilder : IEstimatorBuilder

View File

@ -5,8 +5,8 @@ namespace DeepTrace.ML;
public interface IMLProcessor public interface IMLProcessor
{ {
void Fit(ModelDefinition modelDef, DataSourceDefinition dataSourceDef); Task Train(ModelDefinition modelDef);
byte[] Export(); byte[] Export();
void Import(byte[] data); void Import(byte[] data);
string Predict(TimeSeries[] data); string Predict(DataSourceDefinition dataSource);
} }

View File

@ -1,4 +1,7 @@
using Microsoft.ML; using DeepTrace.Data;
using DeepTrace.Services;
using Microsoft.ML;
using Microsoft.ML.Data;
namespace DeepTrace.ML; namespace DeepTrace.ML;
@ -21,4 +24,22 @@ public static class MLHelpers
return new (mlContext, schema, transformer); return new (mlContext, schema, transformer);
} }
public static async Task<(IDataView View, string FileName)> Convert(MLContext mlContext, ModelDefinition model)
{
var csv = model.ToCsv();
var fileName = Path.GetTempFileName();
await File.WriteAllTextAsync(fileName, csv);
var columnNames = model.GetColumnNames();
var columns = columnNames
.Select((x,i) => new TextLoader.Column(x, DataKind.Double, i))
.ToArray()
;
var view = mlContext.Data.LoadFromTextFile(fileName, columns, separatorChar: ',', hasHeader: true, allowQuoting: true, trimWhitespace: true);
return (view, fileName);
}
} }

View File

@ -0,0 +1,84 @@
using DeepTrace.Data;
using Microsoft.ML;
using Microsoft.ML.Data;
using PrometheusAPI;
using System.Data;
namespace DeepTrace.ML
{
public class MLProcessor : IMLProcessor
{
private MLContext _mlContext = new MLContext();
private EstimatorBuilder _estimatorBuilder = new EstimatorBuilder();
private DataViewSchema? _schema;
private ITransformer? _transformer;
private string Name { get; set; }
public async Task Train(ModelDefinition modelDef)
{
var pipeline = _estimatorBuilder.BuildPipeline(_mlContext, modelDef);
var (data, filename) = await MLHelpers.Convert(_mlContext,modelDef);
try
{
_schema = data.Schema;
_transformer = pipeline.Fit(data);
}
finally
{
File.Delete(filename);
}
}
private static string _signature = "DeepTrace-Model-v1-"+typeof(MLProcessor).Name;
public byte[] Export()
{
if(_schema == null)
{
throw new ArgumentNullException(nameof (_schema));
}
if (_transformer == null)
{
throw new ArgumentNullException(nameof(_transformer));
}
using var mem = new MemoryStream();
mem.WriteString(_signature);
mem.WriteString(Name);
var bytes = MLHelpers.ExportSingleModel(new ModelRecord(_mlContext, _schema, _transformer));
mem.WriteInt(bytes.Length);
mem.Write(bytes);
return mem.ToArray();
}
public void Import(byte[] data)
{
var mem = new MemoryStream(data);
var sig = mem.ReadString();
if (sig != _signature)
throw new ApplicationException($"Wrong data for {GetType().Name}");
Name = mem.ReadString();
var size = mem.ReadInt();
var bytes = new byte[size];
mem.Read(bytes, 0, bytes.Length);
(_mlContext, _schema, _transformer) = MLHelpers.ImportSingleModel(bytes);
}
public string Predict(DataSourceDefinition dataSourceDefinition)
{
throw new NotImplementedException();
}
}
}

View File

@ -1,108 +0,0 @@
using DeepTrace.Data;
using Microsoft.ML;
using Microsoft.ML.Data;
using PrometheusAPI;
using System.Data;
using System.Linq;
using System.Xml.Linq;
namespace DeepTrace.ML
{
public class SpikeDetector : IMLProcessor
{
private readonly Dictionary<string, ModelRecord> _model = new();
public void Fit(ModelDefinition modelDef, DataSourceDefinition dataSourceDef)
{
var models = dataSourceDef.Queries
.Select( (x,i) =>
{
// since we are just detecting spikes here we can combine all the time series into one
List<TimeSeries> data = modelDef.IntervalDefinitionList[i].Data
.Select(y => y.Data)
.Aggregate<IEnumerable<TimeSeries>>((acc, list) => acc.Concat(list))
.ToList();
return (Name: x.Query, Data: data);
})
.ToList();
foreach (var (name, data) in models)
{
_model[name] = FitOne(data);
}
}
private static string _signature = "DeepTrace-Model-v1-"+typeof(SpikeDetector).Name;
public byte[] Export()
{
using var mem = new MemoryStream();
mem.WriteString(_signature);
mem.WriteInt(_model.Count);
foreach ( var (name, model) in _model)
{
mem.WriteString(name);
var bytes = MLHelpers.ExportSingleModel(model);
mem.WriteInt(bytes.Length);
mem.Write(bytes);
}
return mem.ToArray();
}
public void Import(byte[] data)
{
var mem = new MemoryStream(data);
var sig = mem.ReadString();
if (sig != _signature)
throw new ApplicationException($"Wrong data for {GetType().Name}");
var count = mem.ReadInt();
for ( var i = 0; i < count; i++ )
{
var name = mem.ReadString();
var size = mem.ReadInt();
var bytes = new byte[size];
mem.Read(bytes, 0, bytes.Length);
var model = MLHelpers.ImportSingleModel(bytes);
_model[name] = model;
}
}
public string Predict(TimeSeries[] data)
{
throw new NotImplementedException();
}
// -------------------------- internals
class SpikePrediction
{
[VectorType(3)]
public double[] Prediction { get; set; } = new double[3];
}
private static ModelRecord FitOne(List<TimeSeries> dataSet)
{
var mlContext = new MLContext();
var dataView = mlContext.Data.LoadFromEnumerable(dataSet);
const string outputColumnName = nameof(SpikePrediction.Prediction);
const string inputColumnName = nameof(TimeSeries.Value);
var iidSpikeEstimator = mlContext.Transforms.DetectIidSpike(outputColumnName,inputColumnName, 95.0d, dataSet.Count);
var transformer = iidSpikeEstimator.Fit(dataView);
return new (mlContext, dataView.Schema, transformer);
}
}
}

View File

@ -10,7 +10,7 @@
@inject PrometheusClient Prometheus @inject PrometheusClient Prometheus
@inject IDialogService DialogService @inject IDialogService DialogService
@inject IDataSourceStorageService StorageService @inject IDataSourceStorageService StorageService
@inject IModelStorageService ModelService @inject IModelDefinitionService ModelService
@inject IEstimatorBuilder EstimatorBuilder @inject IEstimatorBuilder EstimatorBuilder
@inject NavigationManager NavManager @inject NavigationManager NavManager
@inject IJSRuntime Js @inject IJSRuntime Js
@ -32,7 +32,7 @@
<MudCard Class="mb-3"> <MudCard Class="mb-3">
<MudCardActions> <MudCardActions>
<MudSelect T="ModelStorage" Label="Model name" AnchorOrigin="Origin.BottomCenter" @bind-Value="_modelForm!.CurrentModel"> <MudSelect T="ModelDefinition" Label="Model name" AnchorOrigin="Origin.BottomCenter" @bind-Value="_modelForm!.CurrentModel">
@foreach (var model in _modelDefinitions) @foreach (var model in _modelDefinitions)
{ {
<MudSelectItem Value="@model">@model.Name</MudSelectItem> <MudSelectItem Value="@model">@model.Name</MudSelectItem>
@ -133,7 +133,7 @@
_self = self; _self = self;
} }
private ModelStorage _currentModel = new(); private ModelDefinition _currentModel = new();
private readonly Training _self; private readonly Training _self;
[Required] [Required]
@ -156,7 +156,7 @@
} }
[Required] [Required]
public ModelStorage CurrentModel public ModelDefinition CurrentModel
{ {
get get
{ {
@ -191,7 +191,7 @@
private ModelForm? _modelForm; private ModelForm? _modelForm;
private TimeSeriesData? DisplayData { get; set; } private TimeSeriesData? DisplayData { get; set; }
private List<DataSourceStorage> _dataSources = new(); private List<DataSourceStorage> _dataSources = new();
private List<ModelStorage> _modelDefinitions = new() {new()}; private List<ModelDefinition> _modelDefinitions = new() {new()};
private DateTime? _minDate; private DateTime? _minDate;
private DateTime? _maxDate; private DateTime? _maxDate;
@ -423,12 +423,13 @@
DialogService.Show<Controls.Dialog>("Error", parameters, options); DialogService.Show<Controls.Dialog>("Error", parameters, options);
} }
private void HandleTrain() private async Task HandleTrain()
{ {
var mlContext = new MLContext(); var mlProcessor = new MLProcessor();
var pipeline = EstimatorBuilder.BuildPipeline(mlContext, _modelForm!.CurrentModel); await mlProcessor.Train(_modelForm!.CurrentModel);
var bytes = mlProcessor.Export();
//save to Mongo
} }

View File

@ -16,7 +16,7 @@ builder.Services.AddHttpClient<PrometheusClient>(c => c.BaseAddress = new UriBui
builder.Services builder.Services
.AddSingleton<IMongoClient>( s => new MongoClient(builder.Configuration.GetValue<string>("Connections:MongoDb") )) .AddSingleton<IMongoClient>( s => new MongoClient(builder.Configuration.GetValue<string>("Connections:MongoDb") ))
.AddSingleton<IDataSourceStorageService, DataSourceStorageService>() .AddSingleton<IDataSourceStorageService, DataSourceStorageService>()
.AddSingleton<IModelStorageService, ModelStorageService>() .AddSingleton<IModelDefinitionService, ModelDefinitionService>()
.AddSingleton<IEstimatorBuilder, EstimatorBuilder>() .AddSingleton<IEstimatorBuilder, EstimatorBuilder>()
; ;

View File

@ -1,19 +1,15 @@
using MongoDB.Bson.Serialization.Attributes; using MongoDB.Bson.Serialization.Attributes;
using MongoDB.Bson; using MongoDB.Bson;
using DeepTrace.Data; using DeepTrace.Data;
using System.Text;
namespace DeepTrace.Services namespace DeepTrace.Services
{ {
public class ModelStorage : ModelDefinition
{
[BsonId]
public ObjectId? Id { get; set; }
}
public interface IModelStorageService public interface IModelDefinitionService
{ {
Task Delete(ModelStorage source, bool ignoreNotStored = false); Task Delete(ModelDefinition source, bool ignoreNotStored = false);
Task<List<ModelStorage>> Load(); Task<List<ModelDefinition>> Load();
Task Store(ModelStorage source); Task Store(ModelDefinition source);
} }
} }

View File

@ -1,9 +1,10 @@
using MongoDB.Bson; using DeepTrace.Data;
using MongoDB.Bson;
using MongoDB.Driver; using MongoDB.Driver;
namespace DeepTrace.Services namespace DeepTrace.Services
{ {
public class ModelStorageService : IModelStorageService public class ModelDefinitionService : IModelDefinitionService
{ {
private const string MongoDBDatabaseName = "DeepTrace"; private const string MongoDBDatabaseName = "DeepTrace";
@ -11,23 +12,23 @@ namespace DeepTrace.Services
private readonly IMongoClient _client; private readonly IMongoClient _client;
public ModelStorageService(IMongoClient client) public ModelDefinitionService(IMongoClient client)
{ {
_client = client; _client = client;
} }
public async Task<List<ModelStorage>> Load() public async Task<List<ModelDefinition>> Load()
{ {
var db = _client.GetDatabase(MongoDBDatabaseName); var db = _client.GetDatabase(MongoDBDatabaseName);
var collection = db.GetCollection<ModelStorage>(MongoDBCollection); var collection = db.GetCollection<ModelDefinition>(MongoDBCollection);
var res = await (await collection.FindAsync("{}")).ToListAsync(); var res = await (await collection.FindAsync("{}")).ToListAsync();
return res; return res;
} }
public async Task Store(ModelStorage source) public async Task Store(ModelDefinition source)
{ {
var db = _client.GetDatabase(MongoDBDatabaseName); var db = _client.GetDatabase(MongoDBDatabaseName);
var collection = db.GetCollection<ModelStorage>(MongoDBCollection); var collection = db.GetCollection<ModelDefinition>(MongoDBCollection);
if (source.Id == null) if (source.Id == null)
source.Id = ObjectId.GenerateNewId(); source.Id = ObjectId.GenerateNewId();
@ -40,7 +41,7 @@ namespace DeepTrace.Services
); );
} }
public async Task Delete(ModelStorage source, bool ignoreNotStored = false) public async Task Delete(ModelDefinition source, bool ignoreNotStored = false)
{ {
if (source.Id == null) if (source.Id == null)
{ {