diff --git a/Splitter-UI/Program.cs b/Splitter-UI/Program.cs index 9c44a27..dd8f420 100644 --- a/Splitter-UI/Program.cs +++ b/Splitter-UI/Program.cs @@ -37,9 +37,12 @@ internal sealed class Program // splitter services services.AddSingleton(); services.AddSingleton(); - services.AddSingleton( x => new SingleThreadedDetector(x.GetRequiredService()) ); + services.AddSingleton(); + services.AddSingleton(); + services.AddSingleton(x => new SingleThreadedDetector(x.GetRequiredService())); services.AddSingleton(x => new SingleThreadedDetector(x.GetRequiredService())); services.AddSingleton(x => new SingleThreadedDetector(x.GetRequiredService())); + services.AddSingleton(x => new SingleThreadedEmbeddingExtractor(x.GetRequiredService())); services.AddSingleton>( x => detectorName => { return detectorName switch diff --git a/Splitter-UI/Services/SingleThreadedDetector.cs b/Splitter-UI/Services/SingleThreadedDetector.cs index 8746e63..296b7f3 100644 --- a/Splitter-UI/Services/SingleThreadedDetector.cs +++ b/Splitter-UI/Services/SingleThreadedDetector.cs @@ -5,7 +5,7 @@ public class SingleThreadedDetector(IObjectDetector _detector) : IObjectDetec { private Lock _lock = new(); - public List<(OpenCvSharp.Rect box, Point2f center)> DetectAll(SingleTask job, Mat frameCont) + public List DetectAll(SingleTask job, Mat frameCont) { lock (_lock) { @@ -19,3 +19,24 @@ public class SingleThreadedDetector(IObjectDetector _detector) : IObjectDetec d.Dispose(); } } + +public class SingleThreadedEmbeddingExtractor(IEmbeddingExtractor _extractor) : IEmbeddingExtractor + where T : IEmbeddingExtractor +{ + private Lock _lock = new(); + + public float[] Extract(Mat frame, OpenCvSharp.Rect box) + { + lock (_lock) + { + return _extractor.Extract(frame, box); + } + } + + public void Dispose() + { + if (_extractor is IDisposable d) + d.Dispose(); + } + +} \ No newline at end of file diff --git a/Splitter-UI/ViewModels/JobViewModel.cs b/Splitter-UI/ViewModels/JobViewModel.cs index 84b5750..493c542 100644 --- a/Splitter-UI/ViewModels/JobViewModel.cs +++ b/Splitter-UI/ViewModels/JobViewModel.cs @@ -307,7 +307,7 @@ public partial class JobViewModel : ObservableObject if (detections.Count > 0) { var primaryDetection = detections - .OrderByDescending(d => d.box.Height * d.box.Width) + .OrderByDescending(d => d.Box.Height * d.Box.Width) .FirstOrDefault(); var w = Probe.Width; @@ -316,15 +316,15 @@ public partial class JobViewModel : ObservableObject var cropWidth = Job.Crop?.width ?? CommandLine.DefaultW; var cropHeight = Job.Crop?.height ?? CommandLine.DefaultH; - var cx = primaryDetection.center.X - cropWidth / 2f; - var cy = primaryDetection.center.Y - cropHeight / 2f; + var cx = primaryDetection.Center.X - cropWidth / 2f; + var cy = primaryDetection.Center.Y - cropHeight / 2f; var r = new Rect(cx, cy, cropWidth, cropHeight); crop = ClampCrop(r, w, h); } - var boxes = detections.Select(x => x.box).ToList(); + var boxes = detections.Select(x => x.Box).ToList(); Preview = new PreviewData(frame, boxes, crop, Job.GravitateTo, pos, Job.Rotate); } catch (Exception ex) diff --git a/Splitter-UI/Views/MainWindow.axaml b/Splitter-UI/Views/MainWindow.axaml index 41dca1e..cd6f25d 100644 --- a/Splitter-UI/Views/MainWindow.axaml +++ b/Splitter-UI/Views/MainWindow.axaml @@ -8,7 +8,7 @@ x:DataType="vm:MainViewModel" x:Name="Root" Width="1800" - Height="790" + Height="830" Title="Splitter UI" Icon="avares://Splitter-UI/Assets/splitter.png"> diff --git a/splitter-cli/JobProcessor.cs b/splitter-cli/JobProcessor.cs index 5ed6060..c44ad1e 100644 --- a/splitter-cli/JobProcessor.cs +++ b/splitter-cli/JobProcessor.cs @@ -68,7 +68,9 @@ public class JobProcessor(ILogger logger) : LoggingBase(logger, 0), IJobProcesso "none" => new DummyDetector(), _ => throw new InvalidOperationException($"Unknown detector: {job.Detect}") }; - return new TrackingSplitter(i, detector, job, _logger); + var osnet = new OSNetEmbeddingExtractor(); + var tracker = new ObjectTracker(detector, osnet); + return new TrackingSplitter(i, tracker, job, _logger); }; } else diff --git a/splitter-cli/TrackingSplitter.cs b/splitter-cli/TrackingSplitter.cs index 2d74d03..ceea1a7 100644 --- a/splitter-cli/TrackingSplitter.cs +++ b/splitter-cli/TrackingSplitter.cs @@ -4,24 +4,18 @@ using System.Runtime.InteropServices; namespace splitter; -public class TrackingSplitter : LoggingBase, ISegmentProcessor, IDisposable +public class TrackingSplitter : LoggingBase, ISegmentProcessor { - private readonly IObjectDetector _detector; + private readonly IObjectTracker _tracker; public TrackingSplitter( int progressLine, - IObjectDetector detector, + IObjectTracker tracker, SingleJob cmd, ILogger logger) : base(logger, progressLine) { - _detector = detector; - } - - public void Dispose() - { - if (_detector is IDisposable d) - d.Dispose(); + _tracker = tracker; } public async Task ProcessSegment(SingleTask job, CancellationToken token) @@ -103,12 +97,12 @@ public class TrackingSplitter : LoggingBase, ISegmentProcessor, IDisposable var kalman = new KalmanTracker(); var camera = new CameraController( - videoWidth, - videoHeight, - job.Job.Crop.Value.width, - job.Job.Crop.Value.height, - kalman, - job.Job); + videoWidth, + videoHeight, + job.Job.Crop.Value.width, + job.Job.Crop.Value.height, + kalman, + job.Job); try { @@ -130,12 +124,7 @@ public class TrackingSplitter : LoggingBase, ISegmentProcessor, IDisposable Marshal.Copy(inBuffer, 0, frameMat.Data, inBytes); - var objects = _detector.DetectAll(job, frameMat); - - // Ignore detections starting in the lower 1/2 of the frame - objects = objects.Where(o => o.center.Y <= frameMat.Height * job.Job.DetectAbove).ToList(); - - var primary = SelectTrackedObject(objects, kalman.LastMeasurement); + var (objects, primary) = _tracker.SelectTrackedObject(job, frameMat, kalman.LastMeasurement); camera.Update(primary); var roi = camera.Roi; @@ -389,7 +378,7 @@ public class TrackingSplitter : LoggingBase, ISegmentProcessor, IDisposable private void DrawDebug( Mat frame, - System.Collections.Generic.List<(Rect box, Point2f center)> objects, + List objects, CameraController camera, KalmanTracker kalman) { @@ -418,52 +407,4 @@ public class TrackingSplitter : LoggingBase, ISegmentProcessor, IDisposable HersheyFonts.HersheySimplex, 0.6, color, 2); } - private (Rect box, Point2f center)? SelectTrackedObject( - List<(Rect box, Point2f center)> foundObjects, - Point2f? previousCenter) - { - if (foundObjects == null || foundObjects.Count == 0) - return null; - - if (!previousCenter.HasValue) - { - var bestIndex = 0; - var bestArea = float.MinValue; - - for (var i = 0; i < foundObjects.Count; i++) - { - var f = foundObjects[i]; - var area = f.box.Width * f.box.Height; - if (area > bestArea) - { - bestArea = area; - bestIndex = i; - } - } - - return foundObjects[bestIndex]; - } - else - { - var prev = previousCenter.Value; - var bestIndex = 0; - var bestDist2 = float.MaxValue; - - for (var i = 0; i < foundObjects.Count; i++) - { - var f = foundObjects[i]; - var dx = f.center.X - prev.X; - var dy = f.center.Y - prev.Y; - var d2 = dx * dx + dy * dy; - - if (d2 < bestDist2) - { - bestDist2 = d2; - bestIndex = i; - } - } - - return foundObjects[bestIndex]; - } - } } diff --git a/splitter-cli/algo/CameraController.cs b/splitter-cli/algo/CameraController.cs index 370d8af..4b92ae2 100644 --- a/splitter-cli/algo/CameraController.cs +++ b/splitter-cli/algo/CameraController.cs @@ -68,15 +68,15 @@ public sealed class CameraController public Point2f? ObjectCenter => _objectCenter; public Rect Roi => _roi; - public void Update((Rect box, Point2f center)? primary) + public void Update(DetectedPerson? primary) { Rect? objectBox = null; Point2f? objectCenter = null; if (primary.HasValue) { - objectCenter = primary.Value.center; - objectBox = primary.Value.box; + objectCenter = primary.Value.Center; + objectBox = primary.Value.Box; } // --------------------------------------------------------- diff --git a/splitter-cli/algo/DetectedPerson.cs b/splitter-cli/algo/DetectedPerson.cs new file mode 100644 index 0000000..3a4f4fe --- /dev/null +++ b/splitter-cli/algo/DetectedPerson.cs @@ -0,0 +1,8 @@ +namespace splitter.algo; + +public struct DetectedPerson +{ + public ulong Id; + public Rect Box; + public Point2f Center; +} diff --git a/splitter-cli/algo/DummyDetector.cs b/splitter-cli/algo/DummyDetector.cs index 687fbaa..3b5ece2 100644 --- a/splitter-cli/algo/DummyDetector.cs +++ b/splitter-cli/algo/DummyDetector.cs @@ -2,7 +2,7 @@ public sealed class DummyDetector : IObjectDetector { - public List<(Rect box, Point2f center)> DetectAll(SingleTask job, Mat frameCont) + public List DetectAll(SingleTask job, Mat frameCont) { var h = job.Info.Height; var w = job.Info.Width; @@ -14,7 +14,7 @@ public sealed class DummyDetector : IObjectDetector var center = new Point2f(x, y); var rect = new Rect(x - 1, y - 1, 2, 2); - return [(rect, center)]; + return [new DetectedPerson { Box = rect, Center = center }]; } public void Dispose() {} diff --git a/splitter-cli/algo/IEmbeddingExtractor.cs b/splitter-cli/algo/IEmbeddingExtractor.cs new file mode 100644 index 0000000..fff9bcf --- /dev/null +++ b/splitter-cli/algo/IEmbeddingExtractor.cs @@ -0,0 +1,6 @@ +namespace splitter.algo; + +public interface IEmbeddingExtractor : IDisposable +{ + float[] Extract(Mat frame, Rect box); +} \ No newline at end of file diff --git a/splitter-cli/algo/IObjectDetector.cs b/splitter-cli/algo/IObjectDetector.cs index a82a5cd..f9d1412 100644 --- a/splitter-cli/algo/IObjectDetector.cs +++ b/splitter-cli/algo/IObjectDetector.cs @@ -2,5 +2,5 @@ public interface IObjectDetector : IDisposable { - List<(Rect box, Point2f center)> DetectAll(SingleTask job, Mat frameCont); + List DetectAll(SingleTask job, Mat frameCont); } \ No newline at end of file diff --git a/splitter-cli/algo/IObjectTracker.cs b/splitter-cli/algo/IObjectTracker.cs new file mode 100644 index 0000000..b98121b --- /dev/null +++ b/splitter-cli/algo/IObjectTracker.cs @@ -0,0 +1,6 @@ +namespace splitter.algo; + +public interface IObjectTracker +{ + (List, DetectedPerson?) SelectTrackedObject(SingleTask job, Mat frameMat, Point2f? lastMeasurement); +} \ No newline at end of file diff --git a/splitter-cli/algo/OSNetEmbeddingExtractor.cs b/splitter-cli/algo/OSNetEmbeddingExtractor.cs new file mode 100644 index 0000000..a355786 --- /dev/null +++ b/splitter-cli/algo/OSNetEmbeddingExtractor.cs @@ -0,0 +1,127 @@ +using System.Runtime.CompilerServices; +using Microsoft.ML.OnnxRuntime; +using Microsoft.ML.OnnxRuntime.Tensors; + +namespace splitter.algo; + +public sealed class OSNetEmbeddingExtractor : IDisposable, IEmbeddingExtractor +{ + private readonly InferenceSession _session; + private readonly string _inputName; + private readonly string _outputName; + + private const int _batchSize = 16; + private const int _inputHeight = 256; + private const int _inputWidth = 128; + private const int _channels = 3; + + private readonly float[] _inputBuffer; + private readonly DenseTensor _inputTensor; + private readonly List _inputs = new(1); + + private readonly float[] _embedding; + + private readonly Mat _resizeMat = new(); + private readonly Mat _rgbMat = new(); + + private readonly float _inv255 = 1f / 255f; + + public OSNetEmbeddingExtractor() + { + var opt = new SessionOptions(); + opt.AppendExecutionProvider_DML(); + + var modelPath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "models", "osnet_x0_25_msmt17.onnx"); + _session = new InferenceSession(modelPath, opt); + + _inputName = _session.InputMetadata.Keys.First(); + _outputName = _session.OutputMetadata.Keys.First(); + + int inputSize = _batchSize * _channels * _inputHeight * _inputWidth; + _inputBuffer = new float[inputSize]; + + _inputTensor = new DenseTensor( + _inputBuffer, + new[] { _batchSize, _channels, _inputHeight, _inputWidth } + ); + + _inputs.Add(NamedOnnxValue.CreateFromTensor(_inputName, _inputTensor)); + + int outDim = _session.OutputMetadata[_outputName].Dimensions[1]; + _embedding = new float[outDim]; + } + + public float[] Extract(Mat frame, Rect box) + { + // Clear all batches + Array.Clear(_inputBuffer, 0, _inputBuffer.Length); + + // Extract ROI + var roi = new Mat(frame, box); + + Cv2.Resize(roi, _resizeMat, new Size(_inputWidth, _inputHeight)); + Cv2.CvtColor(_resizeMat, _rgbMat, ColorConversionCodes.BGR2RGB); + + FillBatch0(_rgbMat); + + using var results = _session.Run(_inputs); + + var output = results.First(v => v.Name == _outputName).AsTensor(); + + // Read embedding from batch 0 + for (int i = 0; i < _embedding.Length; i++) + _embedding[i] = output[0, i]; + + NormalizeL2(_embedding); + + return _embedding; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private void FillBatch0(Mat rgb) + { + int plane = _inputHeight * _inputWidth; + + unsafe + { + for (int y = 0; y < _inputHeight; y++) + { + var rowPtr = (byte*)rgb.Ptr(y).ToPointer(); + var rowSpan = new Span(rowPtr, _inputWidth * 3); + + int src = 0; + + for (int x = 0; x < _inputWidth; x++) + { + int off = y * _inputWidth + x; + + _inputBuffer[off] = rowSpan[src + 0] * _inv255; // R + _inputBuffer[plane + off] = rowSpan[src + 1] * _inv255; // G + _inputBuffer[2 * plane + off] = rowSpan[src + 2] * _inv255; // B + + src += 3; + } + } + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static void NormalizeL2(float[] v) + { + float sum = 0f; + for (int i = 0; i < v.Length; i++) + sum += v[i] * v[i]; + + float inv = 1f / MathF.Sqrt(sum); + + for (int i = 0; i < v.Length; i++) + v[i] *= inv; + } + + public void Dispose() + { + _session?.Dispose(); + _resizeMat?.Dispose(); + _rgbMat?.Dispose(); + } +} diff --git a/splitter-cli/algo/ObjectTracker.cs b/splitter-cli/algo/ObjectTracker.cs new file mode 100644 index 0000000..a6317ec --- /dev/null +++ b/splitter-cli/algo/ObjectTracker.cs @@ -0,0 +1,98 @@ +namespace splitter.algo; + +public class ObjectTracker(IObjectDetector _detector, IEmbeddingExtractor _embeddingExtractor) : IObjectTracker +{ + public (List /*objects*/, DetectedPerson? /*primary*/) SelectTrackedObject(SingleTask job, Mat frameMat, Point2f? lastMeasurement) + { + var objects = _detector.DetectAll(job, frameMat) ?? []; + + // Ignore detections starting in the lower 1/2 of the frame + objects = objects.Where(o => o.Center.Y <= frameMat.Height * job.Job.DetectAbove).ToList(); + + // attach embeddings to all persons + for (int i = 0; i < objects.Count; i++) + { + var p = objects[i]; // copy struct + + var rect = p.Box; + + rect.X = Math.Clamp(rect.X, 0, frameMat.Width - 1); + rect.Y = Math.Clamp(rect.Y, 0, frameMat.Height - 1); + rect.Width = Math.Clamp(rect.Width, 1, frameMat.Width - rect.X); + rect.Height = Math.Clamp(rect.Height, 1, frameMat.Height - rect.Y); + + var embedding = _embeddingExtractor.Extract(frameMat, rect); + p.Id = HashEmbedding(embedding); // assign ID based on embedding hash + + objects[i] = p; // write back + } + + var primary = SelectPrimaryObject(objects, lastMeasurement); + return (objects, primary); + } + + private static ulong HashEmbedding(float[] emb) + { + unchecked + { + ulong hash = 146527; + for (int i = 0; i < emb.Length; i++) + { + // convert float to int bits + uint bits = (uint)BitConverter.SingleToInt32Bits(emb[i]); + hash = (hash * 16777619) ^ bits; + } + return hash; + } + } + + private DetectedPerson? SelectPrimaryObject( + List foundObjects, + Point2f? previousCenter) + { + if (foundObjects == null || foundObjects.Count == 0) + return null; + + if (!previousCenter.HasValue) + { + var bestIndex = 0; + var bestArea = float.MinValue; + + for (var i = 0; i < foundObjects.Count; i++) + { + var f = foundObjects[i]; + var area = f.Box.Width * f.Box.Height; + if (area > bestArea) + { + bestArea = area; + bestIndex = i; + } + } + + return foundObjects[bestIndex]; + } + else + { + var prev = previousCenter.Value; + var bestIndex = 0; + var bestDist2 = float.MaxValue; + + for (var i = 0; i < foundObjects.Count; i++) + { + var f = foundObjects[i]; + var dx = f.Center.X - prev.X; + var dy = f.Center.Y - prev.Y; + var d2 = dx * dx + dy * dy; + + if (d2 < bestDist2) + { + bestDist2 = d2; + bestIndex = i; + } + } + + return foundObjects[bestIndex]; + } + } + +} diff --git a/splitter-cli/algo/UltraFaceDetector.cs b/splitter-cli/algo/UltraFaceDetector.cs index 2ae1e3d..03c8bbc 100644 --- a/splitter-cli/algo/UltraFaceDetector.cs +++ b/splitter-cli/algo/UltraFaceDetector.cs @@ -23,14 +23,14 @@ public sealed class UltraFaceDetector: LoggingBase, IDisposable, IObjectDetector _ultraFace = UltraFace.Create(param); } - public List<(Rect box, Point2f center)> DetectAll(SingleTask job, Mat frameCont) + public List DetectAll(SingleTask job, Mat frameCont) { // Convert to byte[] for UltraFace var bytesFull = frameCont.Rows * frameCont.Cols * frameCont.ElemSize(); var bgr = new byte[bytesFull]; Marshal.Copy(frameCont.Data, bgr, 0, bytesFull); - var results = new List<(Rect box, Point2f center)>(); + var results = new List(); if (bgr == null || bgr.Length == 0) return results; @@ -69,7 +69,7 @@ public sealed class UltraFaceDetector: LoggingBase, IDisposable, IObjectDetector rect.X + rect.Width / 2f, rect.Y + rect.Height / 2f); - results.Add((rect, center)); + results.Add(new DetectedPerson{ Box = rect, Center = center }); } } } diff --git a/splitter-cli/algo/YoloV10ObjectDetector.cs b/splitter-cli/algo/YoloV10ObjectDetector.cs index e6e9dc9..621e59f 100644 --- a/splitter-cli/algo/YoloV10ObjectDetector.cs +++ b/splitter-cli/algo/YoloV10ObjectDetector.cs @@ -27,7 +27,7 @@ public sealed class YoloV10ObjectDetector : LoggingBase, IObjectDetector, IDispo private readonly List _detections = new(256); private readonly List _nmsBuffer = new(256); - private readonly List<(Rect box, Point2f center)> _results = new(64); + private readonly List _results = new(64); private readonly float _inv255 = 1f / 255f; @@ -68,7 +68,7 @@ public sealed class YoloV10ObjectDetector : LoggingBase, IObjectDetector, IDispo _inputs.Add(NamedOnnxValue.CreateFromTensor(_inputName, _inputTensor)); } - public List<(Rect box, Point2f center)> DetectAll(SingleTask job, Mat frameCont) + public List DetectAll(SingleTask job, Mat frameCont) { if (frameCont.Empty()) { @@ -127,7 +127,7 @@ public sealed class YoloV10ObjectDetector : LoggingBase, IObjectDetector, IDispo var rect = new Rect(x, y, w, h); var center = new Point2f(x + w / 2f, y + h / 2f); - _results.Add((rect, center)); + _results.Add(new DetectedPerson{ Box = rect, Center = center }); } return _results; diff --git a/splitter-cli/algo/YoloV8ObjectDetector.cs b/splitter-cli/algo/YoloV8ObjectDetector.cs index e8c6e63..df57ffe 100644 --- a/splitter-cli/algo/YoloV8ObjectDetector.cs +++ b/splitter-cli/algo/YoloV8ObjectDetector.cs @@ -32,7 +32,7 @@ public sealed class YoloV8ObjectDetector : LoggingBase, IObjectDetector, IDispos private readonly List _nmsBuffer = new(256); // Reusable result list - private readonly List<(Rect box, Point2f center)> _results = new(64); + private readonly List _results = new(64); private readonly float _inv255 = 1f / 255f; @@ -78,7 +78,7 @@ public sealed class YoloV8ObjectDetector : LoggingBase, IObjectDetector, IDispos _inputs.Add(NamedOnnxValue.CreateFromTensor(_inputName, _inputTensor)); } - public List<(Rect box, Point2f center)> DetectAll(SingleTask job, Mat frameCont) + public List DetectAll(SingleTask job, Mat frameCont) { if (frameCont.Empty()) { @@ -142,7 +142,7 @@ public sealed class YoloV8ObjectDetector : LoggingBase, IObjectDetector, IDispos var rect = new Rect(x, y, w, h); var center = new Point2f(x + w / 2f, y + h / 2f); - _results.Add((rect, center)); + _results.Add(new DetectedPerson{ Box = rect, Center = center }); } return _results; diff --git a/splitter-cli/models/osnet_x0_25_msmt17.onnx b/splitter-cli/models/osnet_x0_25_msmt17.onnx new file mode 100644 index 0000000..917d3e8 Binary files /dev/null and b/splitter-cli/models/osnet_x0_25_msmt17.onnx differ