Added embedding extractor. DeepSeek next.

This commit is contained in:
Alexander Shabarshov 2026-06-08 13:52:35 +01:00
parent 78c9713425
commit 6ebeccd761
18 changed files with 306 additions and 94 deletions

View File

@ -37,9 +37,12 @@ internal sealed class Program
// splitter services
services.AddSingleton<UltraFaceDetector>();
services.AddSingleton<YoloV10ObjectDetector>();
services.AddSingleton( x => new SingleThreadedDetector<UltraFaceDetector>(x.GetRequiredService<UltraFaceDetector>()) );
services.AddSingleton<OSNetEmbeddingExtractor>();
services.AddSingleton<IObjectTracker, ObjectTracker>();
services.AddSingleton(x => new SingleThreadedDetector<UltraFaceDetector>(x.GetRequiredService<UltraFaceDetector>()));
services.AddSingleton(x => new SingleThreadedDetector<YoloV10ObjectDetector>(x.GetRequiredService<YoloV10ObjectDetector>()));
services.AddSingleton(x => new SingleThreadedDetector<DummyDetector>(x.GetRequiredService<DummyDetector>()));
services.AddSingleton<IEmbeddingExtractor>(x => new SingleThreadedEmbeddingExtractor<OSNetEmbeddingExtractor>(x.GetRequiredService<OSNetEmbeddingExtractor>()));
services.AddSingleton<Func<string, IObjectDetector>>( x => detectorName =>
{
return detectorName switch

View File

@ -5,7 +5,7 @@ public class SingleThreadedDetector<T>(IObjectDetector _detector) : IObjectDetec
{
private Lock _lock = new();
public List<(OpenCvSharp.Rect box, Point2f center)> DetectAll(SingleTask job, Mat frameCont)
public List<DetectedPerson> DetectAll(SingleTask job, Mat frameCont)
{
lock (_lock)
{
@ -19,3 +19,24 @@ public class SingleThreadedDetector<T>(IObjectDetector _detector) : IObjectDetec
d.Dispose();
}
}
public class SingleThreadedEmbeddingExtractor<T>(IEmbeddingExtractor _extractor) : IEmbeddingExtractor
where T : IEmbeddingExtractor
{
private Lock _lock = new();
public float[] Extract(Mat frame, OpenCvSharp.Rect box)
{
lock (_lock)
{
return _extractor.Extract(frame, box);
}
}
public void Dispose()
{
if (_extractor is IDisposable d)
d.Dispose();
}
}

View File

@ -307,7 +307,7 @@ public partial class JobViewModel : ObservableObject
if (detections.Count > 0)
{
var primaryDetection = detections
.OrderByDescending(d => d.box.Height * d.box.Width)
.OrderByDescending(d => d.Box.Height * d.Box.Width)
.FirstOrDefault();
var w = Probe.Width;
@ -316,15 +316,15 @@ public partial class JobViewModel : ObservableObject
var cropWidth = Job.Crop?.width ?? CommandLine.DefaultW;
var cropHeight = Job.Crop?.height ?? CommandLine.DefaultH;
var cx = primaryDetection.center.X - cropWidth / 2f;
var cy = primaryDetection.center.Y - cropHeight / 2f;
var cx = primaryDetection.Center.X - cropWidth / 2f;
var cy = primaryDetection.Center.Y - cropHeight / 2f;
var r = new Rect(cx, cy, cropWidth, cropHeight);
crop = ClampCrop(r, w, h);
}
var boxes = detections.Select(x => x.box).ToList();
var boxes = detections.Select(x => x.Box).ToList();
Preview = new PreviewData(frame, boxes, crop, Job.GravitateTo, pos, Job.Rotate);
}
catch (Exception ex)

View File

@ -8,7 +8,7 @@
x:DataType="vm:MainViewModel"
x:Name="Root"
Width="1800"
Height="790"
Height="830"
Title="Splitter UI"
Icon="avares://Splitter-UI/Assets/splitter.png">

View File

@ -68,7 +68,9 @@ public class JobProcessor(ILogger logger) : LoggingBase(logger, 0), IJobProcesso
"none" => new DummyDetector(),
_ => throw new InvalidOperationException($"Unknown detector: {job.Detect}")
};
return new TrackingSplitter(i, detector, job, _logger);
var osnet = new OSNetEmbeddingExtractor();
var tracker = new ObjectTracker(detector, osnet);
return new TrackingSplitter(i, tracker, job, _logger);
};
}
else

View File

@ -4,24 +4,18 @@ using System.Runtime.InteropServices;
namespace splitter;
public class TrackingSplitter : LoggingBase, ISegmentProcessor, IDisposable
public class TrackingSplitter : LoggingBase, ISegmentProcessor
{
private readonly IObjectDetector _detector;
private readonly IObjectTracker _tracker;
public TrackingSplitter(
int progressLine,
IObjectDetector detector,
IObjectTracker tracker,
SingleJob cmd,
ILogger logger)
: base(logger, progressLine)
{
_detector = detector;
}
public void Dispose()
{
if (_detector is IDisposable d)
d.Dispose();
_tracker = tracker;
}
public async Task ProcessSegment(SingleTask job, CancellationToken token)
@ -103,12 +97,12 @@ public class TrackingSplitter : LoggingBase, ISegmentProcessor, IDisposable
var kalman = new KalmanTracker();
var camera = new CameraController(
videoWidth,
videoHeight,
job.Job.Crop.Value.width,
job.Job.Crop.Value.height,
kalman,
job.Job);
videoWidth,
videoHeight,
job.Job.Crop.Value.width,
job.Job.Crop.Value.height,
kalman,
job.Job);
try
{
@ -130,12 +124,7 @@ public class TrackingSplitter : LoggingBase, ISegmentProcessor, IDisposable
Marshal.Copy(inBuffer, 0, frameMat.Data, inBytes);
var objects = _detector.DetectAll(job, frameMat);
// Ignore detections starting in the lower 1/2 of the frame
objects = objects.Where(o => o.center.Y <= frameMat.Height * job.Job.DetectAbove).ToList();
var primary = SelectTrackedObject(objects, kalman.LastMeasurement);
var (objects, primary) = _tracker.SelectTrackedObject(job, frameMat, kalman.LastMeasurement);
camera.Update(primary);
var roi = camera.Roi;
@ -389,7 +378,7 @@ public class TrackingSplitter : LoggingBase, ISegmentProcessor, IDisposable
private void DrawDebug(
Mat frame,
System.Collections.Generic.List<(Rect box, Point2f center)> objects,
List<DetectedPerson> objects,
CameraController camera,
KalmanTracker kalman)
{
@ -418,52 +407,4 @@ public class TrackingSplitter : LoggingBase, ISegmentProcessor, IDisposable
HersheyFonts.HersheySimplex, 0.6, color, 2);
}
private (Rect box, Point2f center)? SelectTrackedObject(
List<(Rect box, Point2f center)> foundObjects,
Point2f? previousCenter)
{
if (foundObjects == null || foundObjects.Count == 0)
return null;
if (!previousCenter.HasValue)
{
var bestIndex = 0;
var bestArea = float.MinValue;
for (var i = 0; i < foundObjects.Count; i++)
{
var f = foundObjects[i];
var area = f.box.Width * f.box.Height;
if (area > bestArea)
{
bestArea = area;
bestIndex = i;
}
}
return foundObjects[bestIndex];
}
else
{
var prev = previousCenter.Value;
var bestIndex = 0;
var bestDist2 = float.MaxValue;
for (var i = 0; i < foundObjects.Count; i++)
{
var f = foundObjects[i];
var dx = f.center.X - prev.X;
var dy = f.center.Y - prev.Y;
var d2 = dx * dx + dy * dy;
if (d2 < bestDist2)
{
bestDist2 = d2;
bestIndex = i;
}
}
return foundObjects[bestIndex];
}
}
}

View File

@ -68,15 +68,15 @@ public sealed class CameraController
public Point2f? ObjectCenter => _objectCenter;
public Rect Roi => _roi;
public void Update((Rect box, Point2f center)? primary)
public void Update(DetectedPerson? primary)
{
Rect? objectBox = null;
Point2f? objectCenter = null;
if (primary.HasValue)
{
objectCenter = primary.Value.center;
objectBox = primary.Value.box;
objectCenter = primary.Value.Center;
objectBox = primary.Value.Box;
}
// ---------------------------------------------------------

View File

@ -0,0 +1,8 @@
namespace splitter.algo;
public struct DetectedPerson
{
public ulong Id;
public Rect Box;
public Point2f Center;
}

View File

@ -2,7 +2,7 @@
public sealed class DummyDetector : IObjectDetector
{
public List<(Rect box, Point2f center)> DetectAll(SingleTask job, Mat frameCont)
public List<DetectedPerson> DetectAll(SingleTask job, Mat frameCont)
{
var h = job.Info.Height;
var w = job.Info.Width;
@ -14,7 +14,7 @@ public sealed class DummyDetector : IObjectDetector
var center = new Point2f(x, y);
var rect = new Rect(x - 1, y - 1, 2, 2);
return [(rect, center)];
return [new DetectedPerson { Box = rect, Center = center }];
}
public void Dispose() {}

View File

@ -0,0 +1,6 @@
namespace splitter.algo;
public interface IEmbeddingExtractor : IDisposable
{
float[] Extract(Mat frame, Rect box);
}

View File

@ -2,5 +2,5 @@
public interface IObjectDetector : IDisposable
{
List<(Rect box, Point2f center)> DetectAll(SingleTask job, Mat frameCont);
List<DetectedPerson> DetectAll(SingleTask job, Mat frameCont);
}

View File

@ -0,0 +1,6 @@
namespace splitter.algo;
public interface IObjectTracker
{
(List<DetectedPerson>, DetectedPerson?) SelectTrackedObject(SingleTask job, Mat frameMat, Point2f? lastMeasurement);
}

View File

@ -0,0 +1,127 @@
using System.Runtime.CompilerServices;
using Microsoft.ML.OnnxRuntime;
using Microsoft.ML.OnnxRuntime.Tensors;
namespace splitter.algo;
public sealed class OSNetEmbeddingExtractor : IDisposable, IEmbeddingExtractor
{
private readonly InferenceSession _session;
private readonly string _inputName;
private readonly string _outputName;
private const int _batchSize = 16;
private const int _inputHeight = 256;
private const int _inputWidth = 128;
private const int _channels = 3;
private readonly float[] _inputBuffer;
private readonly DenseTensor<float> _inputTensor;
private readonly List<NamedOnnxValue> _inputs = new(1);
private readonly float[] _embedding;
private readonly Mat _resizeMat = new();
private readonly Mat _rgbMat = new();
private readonly float _inv255 = 1f / 255f;
public OSNetEmbeddingExtractor()
{
var opt = new SessionOptions();
opt.AppendExecutionProvider_DML();
var modelPath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "models", "osnet_x0_25_msmt17.onnx");
_session = new InferenceSession(modelPath, opt);
_inputName = _session.InputMetadata.Keys.First();
_outputName = _session.OutputMetadata.Keys.First();
int inputSize = _batchSize * _channels * _inputHeight * _inputWidth;
_inputBuffer = new float[inputSize];
_inputTensor = new DenseTensor<float>(
_inputBuffer,
new[] { _batchSize, _channels, _inputHeight, _inputWidth }
);
_inputs.Add(NamedOnnxValue.CreateFromTensor(_inputName, _inputTensor));
int outDim = _session.OutputMetadata[_outputName].Dimensions[1];
_embedding = new float[outDim];
}
public float[] Extract(Mat frame, Rect box)
{
// Clear all batches
Array.Clear(_inputBuffer, 0, _inputBuffer.Length);
// Extract ROI
var roi = new Mat(frame, box);
Cv2.Resize(roi, _resizeMat, new Size(_inputWidth, _inputHeight));
Cv2.CvtColor(_resizeMat, _rgbMat, ColorConversionCodes.BGR2RGB);
FillBatch0(_rgbMat);
using var results = _session.Run(_inputs);
var output = results.First(v => v.Name == _outputName).AsTensor<float>();
// Read embedding from batch 0
for (int i = 0; i < _embedding.Length; i++)
_embedding[i] = output[0, i];
NormalizeL2(_embedding);
return _embedding;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private void FillBatch0(Mat rgb)
{
int plane = _inputHeight * _inputWidth;
unsafe
{
for (int y = 0; y < _inputHeight; y++)
{
var rowPtr = (byte*)rgb.Ptr(y).ToPointer();
var rowSpan = new Span<byte>(rowPtr, _inputWidth * 3);
int src = 0;
for (int x = 0; x < _inputWidth; x++)
{
int off = y * _inputWidth + x;
_inputBuffer[off] = rowSpan[src + 0] * _inv255; // R
_inputBuffer[plane + off] = rowSpan[src + 1] * _inv255; // G
_inputBuffer[2 * plane + off] = rowSpan[src + 2] * _inv255; // B
src += 3;
}
}
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static void NormalizeL2(float[] v)
{
float sum = 0f;
for (int i = 0; i < v.Length; i++)
sum += v[i] * v[i];
float inv = 1f / MathF.Sqrt(sum);
for (int i = 0; i < v.Length; i++)
v[i] *= inv;
}
public void Dispose()
{
_session?.Dispose();
_resizeMat?.Dispose();
_rgbMat?.Dispose();
}
}

View File

@ -0,0 +1,98 @@
namespace splitter.algo;
public class ObjectTracker(IObjectDetector _detector, IEmbeddingExtractor _embeddingExtractor) : IObjectTracker
{
public (List<DetectedPerson> /*objects*/, DetectedPerson? /*primary*/) SelectTrackedObject(SingleTask job, Mat frameMat, Point2f? lastMeasurement)
{
var objects = _detector.DetectAll(job, frameMat) ?? [];
// Ignore detections starting in the lower 1/2 of the frame
objects = objects.Where(o => o.Center.Y <= frameMat.Height * job.Job.DetectAbove).ToList();
// attach embeddings to all persons
for (int i = 0; i < objects.Count; i++)
{
var p = objects[i]; // copy struct
var rect = p.Box;
rect.X = Math.Clamp(rect.X, 0, frameMat.Width - 1);
rect.Y = Math.Clamp(rect.Y, 0, frameMat.Height - 1);
rect.Width = Math.Clamp(rect.Width, 1, frameMat.Width - rect.X);
rect.Height = Math.Clamp(rect.Height, 1, frameMat.Height - rect.Y);
var embedding = _embeddingExtractor.Extract(frameMat, rect);
p.Id = HashEmbedding(embedding); // assign ID based on embedding hash
objects[i] = p; // write back
}
var primary = SelectPrimaryObject(objects, lastMeasurement);
return (objects, primary);
}
private static ulong HashEmbedding(float[] emb)
{
unchecked
{
ulong hash = 146527;
for (int i = 0; i < emb.Length; i++)
{
// convert float to int bits
uint bits = (uint)BitConverter.SingleToInt32Bits(emb[i]);
hash = (hash * 16777619) ^ bits;
}
return hash;
}
}
private DetectedPerson? SelectPrimaryObject(
List<DetectedPerson> foundObjects,
Point2f? previousCenter)
{
if (foundObjects == null || foundObjects.Count == 0)
return null;
if (!previousCenter.HasValue)
{
var bestIndex = 0;
var bestArea = float.MinValue;
for (var i = 0; i < foundObjects.Count; i++)
{
var f = foundObjects[i];
var area = f.Box.Width * f.Box.Height;
if (area > bestArea)
{
bestArea = area;
bestIndex = i;
}
}
return foundObjects[bestIndex];
}
else
{
var prev = previousCenter.Value;
var bestIndex = 0;
var bestDist2 = float.MaxValue;
for (var i = 0; i < foundObjects.Count; i++)
{
var f = foundObjects[i];
var dx = f.Center.X - prev.X;
var dy = f.Center.Y - prev.Y;
var d2 = dx * dx + dy * dy;
if (d2 < bestDist2)
{
bestDist2 = d2;
bestIndex = i;
}
}
return foundObjects[bestIndex];
}
}
}

View File

@ -23,14 +23,14 @@ public sealed class UltraFaceDetector: LoggingBase, IDisposable, IObjectDetector
_ultraFace = UltraFace.Create(param);
}
public List<(Rect box, Point2f center)> DetectAll(SingleTask job, Mat frameCont)
public List<DetectedPerson> DetectAll(SingleTask job, Mat frameCont)
{
// Convert to byte[] for UltraFace
var bytesFull = frameCont.Rows * frameCont.Cols * frameCont.ElemSize();
var bgr = new byte[bytesFull];
Marshal.Copy(frameCont.Data, bgr, 0, bytesFull);
var results = new List<(Rect box, Point2f center)>();
var results = new List<DetectedPerson>();
if (bgr == null || bgr.Length == 0)
return results;
@ -69,7 +69,7 @@ public sealed class UltraFaceDetector: LoggingBase, IDisposable, IObjectDetector
rect.X + rect.Width / 2f,
rect.Y + rect.Height / 2f);
results.Add((rect, center));
results.Add(new DetectedPerson{ Box = rect, Center = center });
}
}
}

View File

@ -27,7 +27,7 @@ public sealed class YoloV10ObjectDetector : LoggingBase, IObjectDetector, IDispo
private readonly List<Detection> _detections = new(256);
private readonly List<Detection> _nmsBuffer = new(256);
private readonly List<(Rect box, Point2f center)> _results = new(64);
private readonly List<DetectedPerson> _results = new(64);
private readonly float _inv255 = 1f / 255f;
@ -68,7 +68,7 @@ public sealed class YoloV10ObjectDetector : LoggingBase, IObjectDetector, IDispo
_inputs.Add(NamedOnnxValue.CreateFromTensor(_inputName, _inputTensor));
}
public List<(Rect box, Point2f center)> DetectAll(SingleTask job, Mat frameCont)
public List<DetectedPerson> DetectAll(SingleTask job, Mat frameCont)
{
if (frameCont.Empty())
{
@ -127,7 +127,7 @@ public sealed class YoloV10ObjectDetector : LoggingBase, IObjectDetector, IDispo
var rect = new Rect(x, y, w, h);
var center = new Point2f(x + w / 2f, y + h / 2f);
_results.Add((rect, center));
_results.Add(new DetectedPerson{ Box = rect, Center = center });
}
return _results;

View File

@ -32,7 +32,7 @@ public sealed class YoloV8ObjectDetector : LoggingBase, IObjectDetector, IDispos
private readonly List<Detection> _nmsBuffer = new(256);
// Reusable result list
private readonly List<(Rect box, Point2f center)> _results = new(64);
private readonly List<DetectedPerson> _results = new(64);
private readonly float _inv255 = 1f / 255f;
@ -78,7 +78,7 @@ public sealed class YoloV8ObjectDetector : LoggingBase, IObjectDetector, IDispos
_inputs.Add(NamedOnnxValue.CreateFromTensor(_inputName, _inputTensor));
}
public List<(Rect box, Point2f center)> DetectAll(SingleTask job, Mat frameCont)
public List<DetectedPerson> DetectAll(SingleTask job, Mat frameCont)
{
if (frameCont.Empty())
{
@ -142,7 +142,7 @@ public sealed class YoloV8ObjectDetector : LoggingBase, IObjectDetector, IDispos
var rect = new Rect(x, y, w, h);
var center = new Point2f(x + w / 2f, y + h / 2f);
_results.Add((rect, center));
_results.Add(new DetectedPerson{ Box = rect, Center = center });
}
return _results;

Binary file not shown.