mirror of
https://github.com/unclshura/splitter.git
synced 2026-06-22 00:22:01 +00:00
Compare commits
2 Commits
d3c82ce924
...
6ebeccd761
| Author | SHA1 | Date | |
|---|---|---|---|
| 6ebeccd761 | |||
| 78c9713425 |
@ -13,7 +13,7 @@ Splitter pipeline is:
|
|||||||
* FFMpeg used to decode video frames into OpenCVSharp.Mat
|
* FFMpeg used to decode video frames into OpenCVSharp.Mat
|
||||||
* One of detectors used:
|
* One of detectors used:
|
||||||
- For face detection: [opencv_zoo/models/face_detection_yunet at main opencv/opencv_zoo](https://github.com/opencv/opencv_zoo/tree/main/models/face_detection_yunet)
|
- For face detection: [opencv_zoo/models/face_detection_yunet at main opencv/opencv_zoo](https://github.com/opencv/opencv_zoo/tree/main/models/face_detection_yunet)
|
||||||
- For body detection: [yolov8s.pt Ultralytics/YOLOv8 at main](https://huggingface.co/Ultralytics/YOLOv8/blob/main/yolov8s.pt)
|
- For body detection: [THU-MIG/yolov10: YOLOv10: Real-Time End-to-End Object Detection [NeurIPS 2024]](https://github.com/THU-MIG/yolov10/tree/main)
|
||||||
* Camera control aplied (CameraControl class)
|
* Camera control aplied (CameraControl class)
|
||||||
* Final video frames are encoded back to video file using FFMpeg
|
* Final video frames are encoded back to video file using FFMpeg
|
||||||
|
|
||||||
|
|||||||
@ -6,13 +6,17 @@ public class PreviewData
|
|||||||
public IReadOnlyList<OpenCvSharp.Rect> DetectedBoxes { get; }
|
public IReadOnlyList<OpenCvSharp.Rect> DetectedBoxes { get; }
|
||||||
public Rect? CropRect { get; }
|
public Rect? CropRect { get; }
|
||||||
public Point2f GravitateTo { get; }
|
public Point2f GravitateTo { get; }
|
||||||
|
public TimeSpan Position { get; }
|
||||||
|
public int? Rotate { get; }
|
||||||
|
|
||||||
public PreviewData(Avalonia.Media.Imaging.Bitmap? frame, IReadOnlyList<OpenCvSharp.Rect> boxes, Rect? crop, Point2f gravitateTo)
|
public PreviewData(Avalonia.Media.Imaging.Bitmap? frame, IReadOnlyList<OpenCvSharp.Rect> boxes, Rect? crop, Point2f gravitateTo, TimeSpan position, int? rotate)
|
||||||
{
|
{
|
||||||
Frame = frame;
|
Frame = frame;
|
||||||
DetectedBoxes = boxes;
|
DetectedBoxes = boxes;
|
||||||
CropRect = crop;
|
CropRect = crop;
|
||||||
GravitateTo = gravitateTo;
|
GravitateTo = gravitateTo;
|
||||||
|
Position = position;
|
||||||
|
Rotate = rotate;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -36,16 +36,19 @@ internal sealed class Program
|
|||||||
|
|
||||||
// splitter services
|
// splitter services
|
||||||
services.AddSingleton<UltraFaceDetector>();
|
services.AddSingleton<UltraFaceDetector>();
|
||||||
services.AddSingleton<YoloOnnxObjectDetector>();
|
services.AddSingleton<YoloV10ObjectDetector>();
|
||||||
services.AddSingleton( x => new SingleThreadedDetector<UltraFaceDetector>(x.GetRequiredService<UltraFaceDetector>()) );
|
services.AddSingleton<OSNetEmbeddingExtractor>();
|
||||||
services.AddSingleton(x => new SingleThreadedDetector<YoloOnnxObjectDetector>(x.GetRequiredService<YoloOnnxObjectDetector>()));
|
services.AddSingleton<IObjectTracker, ObjectTracker>();
|
||||||
|
services.AddSingleton(x => new SingleThreadedDetector<UltraFaceDetector>(x.GetRequiredService<UltraFaceDetector>()));
|
||||||
|
services.AddSingleton(x => new SingleThreadedDetector<YoloV10ObjectDetector>(x.GetRequiredService<YoloV10ObjectDetector>()));
|
||||||
services.AddSingleton(x => new SingleThreadedDetector<DummyDetector>(x.GetRequiredService<DummyDetector>()));
|
services.AddSingleton(x => new SingleThreadedDetector<DummyDetector>(x.GetRequiredService<DummyDetector>()));
|
||||||
|
services.AddSingleton<IEmbeddingExtractor>(x => new SingleThreadedEmbeddingExtractor<OSNetEmbeddingExtractor>(x.GetRequiredService<OSNetEmbeddingExtractor>()));
|
||||||
services.AddSingleton<Func<string, IObjectDetector>>( x => detectorName =>
|
services.AddSingleton<Func<string, IObjectDetector>>( x => detectorName =>
|
||||||
{
|
{
|
||||||
return detectorName switch
|
return detectorName switch
|
||||||
{
|
{
|
||||||
"face" => x.GetRequiredService<SingleThreadedDetector<UltraFaceDetector>>(),
|
"face" => x.GetRequiredService<SingleThreadedDetector<UltraFaceDetector>>(),
|
||||||
"body" => x.GetRequiredService<SingleThreadedDetector<YoloOnnxObjectDetector>>(),
|
"body" => x.GetRequiredService<SingleThreadedDetector<YoloV10ObjectDetector>>(),
|
||||||
"none" => x.GetRequiredService<SingleThreadedDetector<DummyDetector>>(),
|
"none" => x.GetRequiredService<SingleThreadedDetector<DummyDetector>>(),
|
||||||
_ => new DummyDetector()
|
_ => new DummyDetector()
|
||||||
};
|
};
|
||||||
|
|||||||
@ -5,7 +5,7 @@ public class SingleThreadedDetector<T>(IObjectDetector _detector) : IObjectDetec
|
|||||||
{
|
{
|
||||||
private Lock _lock = new();
|
private Lock _lock = new();
|
||||||
|
|
||||||
public List<(OpenCvSharp.Rect box, Point2f center)> DetectAll(SingleTask job, Mat frameCont)
|
public List<DetectedPerson> DetectAll(SingleTask job, Mat frameCont)
|
||||||
{
|
{
|
||||||
lock (_lock)
|
lock (_lock)
|
||||||
{
|
{
|
||||||
@ -19,3 +19,24 @@ public class SingleThreadedDetector<T>(IObjectDetector _detector) : IObjectDetec
|
|||||||
d.Dispose();
|
d.Dispose();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public class SingleThreadedEmbeddingExtractor<T>(IEmbeddingExtractor _extractor) : IEmbeddingExtractor
|
||||||
|
where T : IEmbeddingExtractor
|
||||||
|
{
|
||||||
|
private Lock _lock = new();
|
||||||
|
|
||||||
|
public float[] Extract(Mat frame, OpenCvSharp.Rect box)
|
||||||
|
{
|
||||||
|
lock (_lock)
|
||||||
|
{
|
||||||
|
return _extractor.Extract(frame, box);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void Dispose()
|
||||||
|
{
|
||||||
|
if (_extractor is IDisposable d)
|
||||||
|
d.Dispose();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
@ -15,7 +15,7 @@ public partial class JobViewModel : ObservableObject
|
|||||||
public SingleJob GetJob() => Job;
|
public SingleJob GetJob() => Job;
|
||||||
|
|
||||||
[ObservableProperty] private VideoInfo? _probe;
|
[ObservableProperty] private VideoInfo? _probe;
|
||||||
[ObservableProperty] private PreviewData? _preview = new(null, [], null, new(0.5f, 0.5f));
|
[ObservableProperty] private PreviewData? _preview = new(null, [], null, new(0.5f, 0.5f), TimeSpan.Zero, null);
|
||||||
[ObservableProperty] private Bitmap? _thumbnail;
|
[ObservableProperty] private Bitmap? _thumbnail;
|
||||||
[ObservableProperty] private double _sliderLiveValue;
|
[ObservableProperty] private double _sliderLiveValue;
|
||||||
[ObservableProperty] private double _positionSeconds;
|
[ObservableProperty] private double _positionSeconds;
|
||||||
@ -70,7 +70,7 @@ public partial class JobViewModel : ObservableObject
|
|||||||
{
|
{
|
||||||
if (string.IsNullOrWhiteSpace(value))
|
if (string.IsNullOrWhiteSpace(value))
|
||||||
{
|
{
|
||||||
Job.GravitateTo = null;
|
Job.GravitateTo = new Point2f(0.5f, 0.5f);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -109,6 +109,19 @@ public partial class JobViewModel : ObservableObject
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public float ScoreThreshold
|
||||||
|
{
|
||||||
|
get => Job.ScoreThreshold;
|
||||||
|
set
|
||||||
|
{
|
||||||
|
if (Math.Abs(Job.ScoreThreshold - value) < 0.001)
|
||||||
|
return;
|
||||||
|
Job.ScoreThreshold = value;
|
||||||
|
OnPropertyChanged();
|
||||||
|
Task.Run(CreatePreview);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public string? Mask
|
public string? Mask
|
||||||
{
|
{
|
||||||
get => Job.Mask;
|
get => Job.Mask;
|
||||||
@ -182,10 +195,10 @@ public partial class JobViewModel : ObservableObject
|
|||||||
|
|
||||||
public Point2f GravitateTo
|
public Point2f GravitateTo
|
||||||
{
|
{
|
||||||
get => Job.GravitateTo ?? new Point2f(0.5f, 0.5f);
|
get => Job.GravitateTo;
|
||||||
set
|
set
|
||||||
{
|
{
|
||||||
if (Job.GravitateTo != null && Math.Abs(Job.GravitateTo.Value.X - value.X) < 0.001 && Math.Abs(Job.GravitateTo.Value.Y - value.Y) < 0.001)
|
if (Math.Abs(Job.GravitateTo.X - value.X) < 0.001 && Math.Abs(Job.GravitateTo.Y - value.Y) < 0.001)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
Job.GravitateTo = value;
|
Job.GravitateTo = value;
|
||||||
@ -199,6 +212,8 @@ public partial class JobViewModel : ObservableObject
|
|||||||
get => Job.DetectAbove;
|
get => Job.DetectAbove;
|
||||||
set
|
set
|
||||||
{
|
{
|
||||||
|
if (Math.Abs(Job.DetectAbove - value) < 0.001 )
|
||||||
|
return;
|
||||||
Job.DetectAbove = value;
|
Job.DetectAbove = value;
|
||||||
OnPropertyChanged();
|
OnPropertyChanged();
|
||||||
Task.Run(CreatePreview);
|
Task.Run(CreatePreview);
|
||||||
@ -262,11 +277,17 @@ public partial class JobViewModel : ObservableObject
|
|||||||
return;
|
return;
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
var frame = await _thumbnails.CreateThumbnailAsync(Job.InputFile, Probe, TimeSpan.FromSeconds(PositionSeconds), Probe.Width, Probe.Height, Job.Rotate);
|
var pos = TimeSpan.FromSeconds(PositionSeconds);
|
||||||
|
|
||||||
|
Bitmap? frame;
|
||||||
|
if (Preview?.Frame == null || Preview.Position != pos)
|
||||||
|
frame = await _thumbnails.CreateThumbnailAsync(Job.InputFile, Probe, pos, Probe.Width, Probe.Height, Job.Rotate);
|
||||||
|
else
|
||||||
|
frame = Preview.Frame;
|
||||||
if ( frame == null )
|
if ( frame == null )
|
||||||
return;
|
return;
|
||||||
|
|
||||||
Preview = new PreviewData(frame, [], null, Job.GravitateTo ?? new (0.5f, 0.5f));
|
Preview = new PreviewData(frame, [], null, Job.GravitateTo, pos, Job.Rotate);
|
||||||
|
|
||||||
var detector = _detectorFactory(Job.Detect ?? "");
|
var detector = _detectorFactory(Job.Detect ?? "");
|
||||||
var j = new SingleTask
|
var j = new SingleTask
|
||||||
@ -286,7 +307,7 @@ public partial class JobViewModel : ObservableObject
|
|||||||
if (detections.Count > 0)
|
if (detections.Count > 0)
|
||||||
{
|
{
|
||||||
var primaryDetection = detections
|
var primaryDetection = detections
|
||||||
.OrderByDescending(d => d.box.Height * d.box.Width)
|
.OrderByDescending(d => d.Box.Height * d.Box.Width)
|
||||||
.FirstOrDefault();
|
.FirstOrDefault();
|
||||||
|
|
||||||
var w = Probe.Width;
|
var w = Probe.Width;
|
||||||
@ -295,16 +316,16 @@ public partial class JobViewModel : ObservableObject
|
|||||||
var cropWidth = Job.Crop?.width ?? CommandLine.DefaultW;
|
var cropWidth = Job.Crop?.width ?? CommandLine.DefaultW;
|
||||||
var cropHeight = Job.Crop?.height ?? CommandLine.DefaultH;
|
var cropHeight = Job.Crop?.height ?? CommandLine.DefaultH;
|
||||||
|
|
||||||
var cx = primaryDetection.center.X - cropWidth / 2f;
|
var cx = primaryDetection.Center.X - cropWidth / 2f;
|
||||||
var cy = primaryDetection.center.Y - cropHeight / 2f;
|
var cy = primaryDetection.Center.Y - cropHeight / 2f;
|
||||||
|
|
||||||
var r = new Rect(cx, cy, cropWidth, cropHeight);
|
var r = new Rect(cx, cy, cropWidth, cropHeight);
|
||||||
|
|
||||||
crop = ClampCrop(r, w, h);
|
crop = ClampCrop(r, w, h);
|
||||||
}
|
}
|
||||||
|
|
||||||
var boxes = detections.Select(x => x.box).ToList();
|
var boxes = detections.Select(x => x.Box).ToList();
|
||||||
Preview = new PreviewData(frame, boxes, crop, Job.GravitateTo ?? new (0.5f, 0.5f));
|
Preview = new PreviewData(frame, boxes, crop, Job.GravitateTo, pos, Job.Rotate);
|
||||||
}
|
}
|
||||||
catch (Exception ex)
|
catch (Exception ex)
|
||||||
{
|
{
|
||||||
|
|||||||
@ -86,6 +86,25 @@ x:DataType="vm:InspectorPaneViewModel">
|
|||||||
Width="160"/>
|
Width="160"/>
|
||||||
</StackPanel>
|
</StackPanel>
|
||||||
|
|
||||||
|
<!-- ScoreThreshold -->
|
||||||
|
<StackPanel Orientation="Horizontal" Spacing="8">
|
||||||
|
<TextBlock Text="Score Threshold" Width="120"/>
|
||||||
|
|
||||||
|
<StackPanel Orientation="Vertical" Spacing="4" Width="260">
|
||||||
|
<Slider Minimum="0"
|
||||||
|
Maximum="1"
|
||||||
|
SmallChange="0.01"
|
||||||
|
LargeChange="0.1"
|
||||||
|
TickFrequency="0.05"
|
||||||
|
IsSnapToTickEnabled="False"
|
||||||
|
Value="{Binding Selected.ScoreThreshold, Mode=TwoWay}"/>
|
||||||
|
|
||||||
|
<TextBlock Text="{Binding Selected.ScoreThreshold, StringFormat='0.00'}"
|
||||||
|
FontSize="10"
|
||||||
|
HorizontalAlignment="Right"/>
|
||||||
|
</StackPanel>
|
||||||
|
</StackPanel>
|
||||||
|
|
||||||
<!-- DetectAbove -->
|
<!-- DetectAbove -->
|
||||||
<StackPanel Orientation="Horizontal" Spacing="8">
|
<StackPanel Orientation="Horizontal" Spacing="8">
|
||||||
<TextBlock Text="Detect Above" Width="120"/>
|
<TextBlock Text="Detect Above" Width="120"/>
|
||||||
|
|||||||
@ -8,7 +8,7 @@
|
|||||||
x:DataType="vm:MainViewModel"
|
x:DataType="vm:MainViewModel"
|
||||||
x:Name="Root"
|
x:Name="Root"
|
||||||
Width="1800"
|
Width="1800"
|
||||||
Height="790"
|
Height="830"
|
||||||
Title="Splitter UI"
|
Title="Splitter UI"
|
||||||
Icon="avares://Splitter-UI/Assets/splitter.png">
|
Icon="avares://Splitter-UI/Assets/splitter.png">
|
||||||
|
|
||||||
|
|||||||
@ -98,6 +98,14 @@ public sealed class CommandLine
|
|||||||
else
|
else
|
||||||
Master.DetectAbove = 0.7f;
|
Master.DetectAbove = 0.7f;
|
||||||
}
|
}
|
||||||
|
else if (arg.StartsWith("--score-threshold="))
|
||||||
|
{
|
||||||
|
var val = arg.Substring("--score-threshold=".Length);
|
||||||
|
if (float.TryParse(val, NumberStyles.Float, CultureInfo.InvariantCulture, out var scoreThreshold) && scoreThreshold >= 0.0f && scoreThreshold <= 1.0f)
|
||||||
|
Master.ScoreThreshold = scoreThreshold;
|
||||||
|
else
|
||||||
|
Master.ScoreThreshold = 0.25f;
|
||||||
|
}
|
||||||
else if (arg == "--crop")
|
else if (arg == "--crop")
|
||||||
{
|
{
|
||||||
Master.Crop = ParseCrop("");
|
Master.Crop = ParseCrop("");
|
||||||
@ -224,22 +232,22 @@ public sealed class CommandLine
|
|||||||
return key.Length > 0;
|
return key.Length > 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Point2f? ParseGravitate(string value)
|
private static Point2f ParseGravitate(string value)
|
||||||
{
|
{
|
||||||
// Expected format: "<x>:<y>"
|
// Expected format: "<x>:<y>"
|
||||||
var parts = value.Split(':');
|
var parts = value.Split(':');
|
||||||
if (parts.Length != 2)
|
if (parts.Length != 2)
|
||||||
return null;
|
return new Point2f(0.5f, 0.5f);
|
||||||
|
|
||||||
if (!float.TryParse(parts[0], NumberStyles.Float, CultureInfo.InvariantCulture, out var x))
|
if (!float.TryParse(parts[0], NumberStyles.Float, CultureInfo.InvariantCulture, out var x))
|
||||||
return null;
|
return new Point2f(0.5f, 0.5f);
|
||||||
|
|
||||||
if (!float.TryParse(parts[1], NumberStyles.Float, CultureInfo.InvariantCulture, out var y))
|
if (!float.TryParse(parts[1], NumberStyles.Float, CultureInfo.InvariantCulture, out var y))
|
||||||
return null;
|
return new Point2f(0.5f, 0.5f);
|
||||||
|
|
||||||
// Normalized range check (0.0–1.0)
|
// Normalized range check (0.0–1.0)
|
||||||
if (x < 0f || x > 1f || y < 0f || y > 1f)
|
if (x < 0f || x > 1f || y < 0f || y > 1f)
|
||||||
return null;
|
return new Point2f(0.5f, 0.5f);
|
||||||
|
|
||||||
return new Point2f(x, y);
|
return new Point2f(x, y);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -64,11 +64,13 @@ public class JobProcessor(ILogger logger) : LoggingBase(logger, 0), IJobProcesso
|
|||||||
IObjectDetector detector = job.Detect switch
|
IObjectDetector detector = job.Detect switch
|
||||||
{
|
{
|
||||||
"face" => new UltraFaceDetector(_logger),
|
"face" => new UltraFaceDetector(_logger),
|
||||||
"body" => new YoloOnnxObjectDetector(_logger),
|
"body" => new YoloV10ObjectDetector(_logger),
|
||||||
"none" => new DummyDetector(),
|
"none" => new DummyDetector(),
|
||||||
_ => throw new InvalidOperationException($"Unknown detector: {job.Detect}")
|
_ => throw new InvalidOperationException($"Unknown detector: {job.Detect}")
|
||||||
};
|
};
|
||||||
return new TrackingSplitter(i, detector, job, _logger);
|
var osnet = new OSNetEmbeddingExtractor();
|
||||||
|
var tracker = new ObjectTracker(detector, osnet);
|
||||||
|
return new TrackingSplitter(i, tracker, job, _logger);
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
|||||||
@ -29,12 +29,7 @@ public class SingleJob
|
|||||||
/// such as left-center (0.2, 0.5) or top-right (0.8, 0.2). This can be useful for
|
/// such as left-center (0.2, 0.5) or top-right (0.8, 0.2). This can be useful for
|
||||||
/// videos where the subject tends to be off-center or for creative framing choices.
|
/// videos where the subject tends to be off-center or for creative framing choices.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public Point2f? GravitateTo { get; set; }
|
public Point2f GravitateTo { get; set; } = new Point2f(0.5f, 0.5f);
|
||||||
/// <summary>
|
|
||||||
/// Face or human detectors should only report detections if their upper bound starts below this threshold.
|
|
||||||
/// This is a value between 0.0 and 1.0 mapped to 0..Height.
|
|
||||||
/// </summary>
|
|
||||||
public float DetectAbove { get; set; } = 0.3f;
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Destination file mask.
|
/// Destination file mask.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
@ -50,6 +45,15 @@ public class SingleJob
|
|||||||
/// </summary>
|
/// </summary>
|
||||||
public string? Detect { get; set; }
|
public string? Detect { get; set; }
|
||||||
/// <summary>
|
/// <summary>
|
||||||
|
/// Detection confidence threshold. This is a value between 0.0 and 1.0 that sets the minimum confidence
|
||||||
|
/// </summary>
|
||||||
|
public float ScoreThreshold { get; set; } = 0.25f;
|
||||||
|
/// <summary>
|
||||||
|
/// Face or human detectors should only report detections if their upper bound starts below this threshold.
|
||||||
|
/// This is a value between 0.0 and 1.0 mapped to 0..Height.
|
||||||
|
/// </summary>
|
||||||
|
public float DetectAbove { get; set; } = 0.7f;
|
||||||
|
/// <summary>
|
||||||
/// Set starget segments length explicitly. By default, the splitter calculates segment
|
/// Set starget segments length explicitly. By default, the splitter calculates segment
|
||||||
/// lengths to be equal and not exceed 58 seconds.
|
/// lengths to be equal and not exceed 58 seconds.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
|
|||||||
@ -4,24 +4,18 @@ using System.Runtime.InteropServices;
|
|||||||
|
|
||||||
namespace splitter;
|
namespace splitter;
|
||||||
|
|
||||||
public class TrackingSplitter : LoggingBase, ISegmentProcessor, IDisposable
|
public class TrackingSplitter : LoggingBase, ISegmentProcessor
|
||||||
{
|
{
|
||||||
private readonly IObjectDetector _detector;
|
private readonly IObjectTracker _tracker;
|
||||||
|
|
||||||
public TrackingSplitter(
|
public TrackingSplitter(
|
||||||
int progressLine,
|
int progressLine,
|
||||||
IObjectDetector detector,
|
IObjectTracker tracker,
|
||||||
SingleJob cmd,
|
SingleJob cmd,
|
||||||
ILogger logger)
|
ILogger logger)
|
||||||
: base(logger, progressLine)
|
: base(logger, progressLine)
|
||||||
{
|
{
|
||||||
_detector = detector;
|
_tracker = tracker;
|
||||||
}
|
|
||||||
|
|
||||||
public void Dispose()
|
|
||||||
{
|
|
||||||
if (_detector is IDisposable d)
|
|
||||||
d.Dispose();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public async Task ProcessSegment(SingleTask job, CancellationToken token)
|
public async Task ProcessSegment(SingleTask job, CancellationToken token)
|
||||||
@ -130,12 +124,7 @@ public class TrackingSplitter : LoggingBase, ISegmentProcessor, IDisposable
|
|||||||
|
|
||||||
Marshal.Copy(inBuffer, 0, frameMat.Data, inBytes);
|
Marshal.Copy(inBuffer, 0, frameMat.Data, inBytes);
|
||||||
|
|
||||||
var objects = _detector.DetectAll(job, frameMat);
|
var (objects, primary) = _tracker.SelectTrackedObject(job, frameMat, kalman.LastMeasurement);
|
||||||
|
|
||||||
// Ignore detections starting in the lower 1/2 of the frame
|
|
||||||
objects = objects.Where(o => o.center.Y <= frameMat.Height * job.Job.DetectAbove).ToList();
|
|
||||||
|
|
||||||
var primary = SelectTrackedObject(objects, kalman.LastMeasurement);
|
|
||||||
|
|
||||||
camera.Update(primary);
|
camera.Update(primary);
|
||||||
var roi = camera.Roi;
|
var roi = camera.Roi;
|
||||||
@ -389,7 +378,7 @@ public class TrackingSplitter : LoggingBase, ISegmentProcessor, IDisposable
|
|||||||
|
|
||||||
private void DrawDebug(
|
private void DrawDebug(
|
||||||
Mat frame,
|
Mat frame,
|
||||||
System.Collections.Generic.List<(Rect box, Point2f center)> objects,
|
List<DetectedPerson> objects,
|
||||||
CameraController camera,
|
CameraController camera,
|
||||||
KalmanTracker kalman)
|
KalmanTracker kalman)
|
||||||
{
|
{
|
||||||
@ -418,52 +407,4 @@ public class TrackingSplitter : LoggingBase, ISegmentProcessor, IDisposable
|
|||||||
HersheyFonts.HersheySimplex, 0.6, color, 2);
|
HersheyFonts.HersheySimplex, 0.6, color, 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
private (Rect box, Point2f center)? SelectTrackedObject(
|
|
||||||
List<(Rect box, Point2f center)> foundObjects,
|
|
||||||
Point2f? previousCenter)
|
|
||||||
{
|
|
||||||
if (foundObjects == null || foundObjects.Count == 0)
|
|
||||||
return null;
|
|
||||||
|
|
||||||
if (!previousCenter.HasValue)
|
|
||||||
{
|
|
||||||
var bestIndex = 0;
|
|
||||||
var bestArea = float.MinValue;
|
|
||||||
|
|
||||||
for (var i = 0; i < foundObjects.Count; i++)
|
|
||||||
{
|
|
||||||
var f = foundObjects[i];
|
|
||||||
var area = f.box.Width * f.box.Height;
|
|
||||||
if (area > bestArea)
|
|
||||||
{
|
|
||||||
bestArea = area;
|
|
||||||
bestIndex = i;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return foundObjects[bestIndex];
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
var prev = previousCenter.Value;
|
|
||||||
var bestIndex = 0;
|
|
||||||
var bestDist2 = float.MaxValue;
|
|
||||||
|
|
||||||
for (var i = 0; i < foundObjects.Count; i++)
|
|
||||||
{
|
|
||||||
var f = foundObjects[i];
|
|
||||||
var dx = f.center.X - prev.X;
|
|
||||||
var dy = f.center.Y - prev.Y;
|
|
||||||
var d2 = dx * dx + dy * dy;
|
|
||||||
|
|
||||||
if (d2 < bestDist2)
|
|
||||||
{
|
|
||||||
bestDist2 = d2;
|
|
||||||
bestIndex = i;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return foundObjects[bestIndex];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -58,7 +58,7 @@ public sealed class CameraController
|
|||||||
_kalman.Reset(_cameraCenter);
|
_kalman.Reset(_cameraCenter);
|
||||||
}
|
}
|
||||||
|
|
||||||
private Point2f DefaultCenter => _cmd.GravitateTo ?? new Point2f(_videoWidth / 2f, _videoHeight / 2f);
|
private Point2f DefaultCenter => _cmd.GravitateTo;
|
||||||
|
|
||||||
public int LostFrames => _lostFrames;
|
public int LostFrames => _lostFrames;
|
||||||
public Point2f CameraCenter => _cameraCenter;
|
public Point2f CameraCenter => _cameraCenter;
|
||||||
@ -68,15 +68,15 @@ public sealed class CameraController
|
|||||||
public Point2f? ObjectCenter => _objectCenter;
|
public Point2f? ObjectCenter => _objectCenter;
|
||||||
public Rect Roi => _roi;
|
public Rect Roi => _roi;
|
||||||
|
|
||||||
public void Update((Rect box, Point2f center)? primary)
|
public void Update(DetectedPerson? primary)
|
||||||
{
|
{
|
||||||
Rect? objectBox = null;
|
Rect? objectBox = null;
|
||||||
Point2f? objectCenter = null;
|
Point2f? objectCenter = null;
|
||||||
|
|
||||||
if (primary.HasValue)
|
if (primary.HasValue)
|
||||||
{
|
{
|
||||||
objectCenter = primary.Value.center;
|
objectCenter = primary.Value.Center;
|
||||||
objectBox = primary.Value.box;
|
objectBox = primary.Value.Box;
|
||||||
}
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------
|
// ---------------------------------------------------------
|
||||||
|
|||||||
8
splitter-cli/algo/DetectedPerson.cs
Normal file
8
splitter-cli/algo/DetectedPerson.cs
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
namespace splitter.algo;
|
||||||
|
|
||||||
|
public struct DetectedPerson
|
||||||
|
{
|
||||||
|
public ulong Id;
|
||||||
|
public Rect Box;
|
||||||
|
public Point2f Center;
|
||||||
|
}
|
||||||
@ -2,19 +2,19 @@
|
|||||||
|
|
||||||
public sealed class DummyDetector : IObjectDetector
|
public sealed class DummyDetector : IObjectDetector
|
||||||
{
|
{
|
||||||
public List<(Rect box, Point2f center)> DetectAll(SingleTask job, Mat frameCont)
|
public List<DetectedPerson> DetectAll(SingleTask job, Mat frameCont)
|
||||||
{
|
{
|
||||||
var h = job.Info.Height;
|
var h = job.Info.Height;
|
||||||
var w = job.Info.Width;
|
var w = job.Info.Width;
|
||||||
|
|
||||||
var c = job.Job.GravitateTo ?? new Point2f(0.5f, 0.5f);
|
var c = job.Job.GravitateTo;
|
||||||
var x = (int)(c.X * w);
|
var x = (int)(c.X * w);
|
||||||
var y = (int)(c.Y * h);
|
var y = (int)(c.Y * h);
|
||||||
|
|
||||||
var center = new Point2f(x, y);
|
var center = new Point2f(x, y);
|
||||||
var rect = new Rect(x - 1, y - 1, 2, 2);
|
var rect = new Rect(x - 1, y - 1, 2, 2);
|
||||||
|
|
||||||
return [(rect, center)];
|
return [new DetectedPerson { Box = rect, Center = center }];
|
||||||
}
|
}
|
||||||
|
|
||||||
public void Dispose() {}
|
public void Dispose() {}
|
||||||
|
|||||||
6
splitter-cli/algo/IEmbeddingExtractor.cs
Normal file
6
splitter-cli/algo/IEmbeddingExtractor.cs
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
namespace splitter.algo;
|
||||||
|
|
||||||
|
public interface IEmbeddingExtractor : IDisposable
|
||||||
|
{
|
||||||
|
float[] Extract(Mat frame, Rect box);
|
||||||
|
}
|
||||||
@ -2,5 +2,5 @@
|
|||||||
|
|
||||||
public interface IObjectDetector : IDisposable
|
public interface IObjectDetector : IDisposable
|
||||||
{
|
{
|
||||||
List<(Rect box, Point2f center)> DetectAll(SingleTask job, Mat frameCont);
|
List<DetectedPerson> DetectAll(SingleTask job, Mat frameCont);
|
||||||
}
|
}
|
||||||
6
splitter-cli/algo/IObjectTracker.cs
Normal file
6
splitter-cli/algo/IObjectTracker.cs
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
namespace splitter.algo;
|
||||||
|
|
||||||
|
public interface IObjectTracker
|
||||||
|
{
|
||||||
|
(List<DetectedPerson>, DetectedPerson?) SelectTrackedObject(SingleTask job, Mat frameMat, Point2f? lastMeasurement);
|
||||||
|
}
|
||||||
127
splitter-cli/algo/OSNetEmbeddingExtractor.cs
Normal file
127
splitter-cli/algo/OSNetEmbeddingExtractor.cs
Normal file
@ -0,0 +1,127 @@
|
|||||||
|
using System.Runtime.CompilerServices;
|
||||||
|
using Microsoft.ML.OnnxRuntime;
|
||||||
|
using Microsoft.ML.OnnxRuntime.Tensors;
|
||||||
|
|
||||||
|
namespace splitter.algo;
|
||||||
|
|
||||||
|
public sealed class OSNetEmbeddingExtractor : IDisposable, IEmbeddingExtractor
|
||||||
|
{
|
||||||
|
private readonly InferenceSession _session;
|
||||||
|
private readonly string _inputName;
|
||||||
|
private readonly string _outputName;
|
||||||
|
|
||||||
|
private const int _batchSize = 16;
|
||||||
|
private const int _inputHeight = 256;
|
||||||
|
private const int _inputWidth = 128;
|
||||||
|
private const int _channels = 3;
|
||||||
|
|
||||||
|
private readonly float[] _inputBuffer;
|
||||||
|
private readonly DenseTensor<float> _inputTensor;
|
||||||
|
private readonly List<NamedOnnxValue> _inputs = new(1);
|
||||||
|
|
||||||
|
private readonly float[] _embedding;
|
||||||
|
|
||||||
|
private readonly Mat _resizeMat = new();
|
||||||
|
private readonly Mat _rgbMat = new();
|
||||||
|
|
||||||
|
private readonly float _inv255 = 1f / 255f;
|
||||||
|
|
||||||
|
public OSNetEmbeddingExtractor()
|
||||||
|
{
|
||||||
|
var opt = new SessionOptions();
|
||||||
|
opt.AppendExecutionProvider_DML();
|
||||||
|
|
||||||
|
var modelPath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "models", "osnet_x0_25_msmt17.onnx");
|
||||||
|
_session = new InferenceSession(modelPath, opt);
|
||||||
|
|
||||||
|
_inputName = _session.InputMetadata.Keys.First();
|
||||||
|
_outputName = _session.OutputMetadata.Keys.First();
|
||||||
|
|
||||||
|
int inputSize = _batchSize * _channels * _inputHeight * _inputWidth;
|
||||||
|
_inputBuffer = new float[inputSize];
|
||||||
|
|
||||||
|
_inputTensor = new DenseTensor<float>(
|
||||||
|
_inputBuffer,
|
||||||
|
new[] { _batchSize, _channels, _inputHeight, _inputWidth }
|
||||||
|
);
|
||||||
|
|
||||||
|
_inputs.Add(NamedOnnxValue.CreateFromTensor(_inputName, _inputTensor));
|
||||||
|
|
||||||
|
int outDim = _session.OutputMetadata[_outputName].Dimensions[1];
|
||||||
|
_embedding = new float[outDim];
|
||||||
|
}
|
||||||
|
|
||||||
|
public float[] Extract(Mat frame, Rect box)
|
||||||
|
{
|
||||||
|
// Clear all batches
|
||||||
|
Array.Clear(_inputBuffer, 0, _inputBuffer.Length);
|
||||||
|
|
||||||
|
// Extract ROI
|
||||||
|
var roi = new Mat(frame, box);
|
||||||
|
|
||||||
|
Cv2.Resize(roi, _resizeMat, new Size(_inputWidth, _inputHeight));
|
||||||
|
Cv2.CvtColor(_resizeMat, _rgbMat, ColorConversionCodes.BGR2RGB);
|
||||||
|
|
||||||
|
FillBatch0(_rgbMat);
|
||||||
|
|
||||||
|
using var results = _session.Run(_inputs);
|
||||||
|
|
||||||
|
var output = results.First(v => v.Name == _outputName).AsTensor<float>();
|
||||||
|
|
||||||
|
// Read embedding from batch 0
|
||||||
|
for (int i = 0; i < _embedding.Length; i++)
|
||||||
|
_embedding[i] = output[0, i];
|
||||||
|
|
||||||
|
NormalizeL2(_embedding);
|
||||||
|
|
||||||
|
return _embedding;
|
||||||
|
}
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
private void FillBatch0(Mat rgb)
|
||||||
|
{
|
||||||
|
int plane = _inputHeight * _inputWidth;
|
||||||
|
|
||||||
|
unsafe
|
||||||
|
{
|
||||||
|
for (int y = 0; y < _inputHeight; y++)
|
||||||
|
{
|
||||||
|
var rowPtr = (byte*)rgb.Ptr(y).ToPointer();
|
||||||
|
var rowSpan = new Span<byte>(rowPtr, _inputWidth * 3);
|
||||||
|
|
||||||
|
int src = 0;
|
||||||
|
|
||||||
|
for (int x = 0; x < _inputWidth; x++)
|
||||||
|
{
|
||||||
|
int off = y * _inputWidth + x;
|
||||||
|
|
||||||
|
_inputBuffer[off] = rowSpan[src + 0] * _inv255; // R
|
||||||
|
_inputBuffer[plane + off] = rowSpan[src + 1] * _inv255; // G
|
||||||
|
_inputBuffer[2 * plane + off] = rowSpan[src + 2] * _inv255; // B
|
||||||
|
|
||||||
|
src += 3;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
private static void NormalizeL2(float[] v)
|
||||||
|
{
|
||||||
|
float sum = 0f;
|
||||||
|
for (int i = 0; i < v.Length; i++)
|
||||||
|
sum += v[i] * v[i];
|
||||||
|
|
||||||
|
float inv = 1f / MathF.Sqrt(sum);
|
||||||
|
|
||||||
|
for (int i = 0; i < v.Length; i++)
|
||||||
|
v[i] *= inv;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void Dispose()
|
||||||
|
{
|
||||||
|
_session?.Dispose();
|
||||||
|
_resizeMat?.Dispose();
|
||||||
|
_rgbMat?.Dispose();
|
||||||
|
}
|
||||||
|
}
|
||||||
98
splitter-cli/algo/ObjectTracker.cs
Normal file
98
splitter-cli/algo/ObjectTracker.cs
Normal file
@ -0,0 +1,98 @@
|
|||||||
|
namespace splitter.algo;
|
||||||
|
|
||||||
|
public class ObjectTracker(IObjectDetector _detector, IEmbeddingExtractor _embeddingExtractor) : IObjectTracker
|
||||||
|
{
|
||||||
|
public (List<DetectedPerson> /*objects*/, DetectedPerson? /*primary*/) SelectTrackedObject(SingleTask job, Mat frameMat, Point2f? lastMeasurement)
|
||||||
|
{
|
||||||
|
var objects = _detector.DetectAll(job, frameMat) ?? [];
|
||||||
|
|
||||||
|
// Ignore detections starting in the lower 1/2 of the frame
|
||||||
|
objects = objects.Where(o => o.Center.Y <= frameMat.Height * job.Job.DetectAbove).ToList();
|
||||||
|
|
||||||
|
// attach embeddings to all persons
|
||||||
|
for (int i = 0; i < objects.Count; i++)
|
||||||
|
{
|
||||||
|
var p = objects[i]; // copy struct
|
||||||
|
|
||||||
|
var rect = p.Box;
|
||||||
|
|
||||||
|
rect.X = Math.Clamp(rect.X, 0, frameMat.Width - 1);
|
||||||
|
rect.Y = Math.Clamp(rect.Y, 0, frameMat.Height - 1);
|
||||||
|
rect.Width = Math.Clamp(rect.Width, 1, frameMat.Width - rect.X);
|
||||||
|
rect.Height = Math.Clamp(rect.Height, 1, frameMat.Height - rect.Y);
|
||||||
|
|
||||||
|
var embedding = _embeddingExtractor.Extract(frameMat, rect);
|
||||||
|
p.Id = HashEmbedding(embedding); // assign ID based on embedding hash
|
||||||
|
|
||||||
|
objects[i] = p; // write back
|
||||||
|
}
|
||||||
|
|
||||||
|
var primary = SelectPrimaryObject(objects, lastMeasurement);
|
||||||
|
return (objects, primary);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static ulong HashEmbedding(float[] emb)
|
||||||
|
{
|
||||||
|
unchecked
|
||||||
|
{
|
||||||
|
ulong hash = 146527;
|
||||||
|
for (int i = 0; i < emb.Length; i++)
|
||||||
|
{
|
||||||
|
// convert float to int bits
|
||||||
|
uint bits = (uint)BitConverter.SingleToInt32Bits(emb[i]);
|
||||||
|
hash = (hash * 16777619) ^ bits;
|
||||||
|
}
|
||||||
|
return hash;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private DetectedPerson? SelectPrimaryObject(
|
||||||
|
List<DetectedPerson> foundObjects,
|
||||||
|
Point2f? previousCenter)
|
||||||
|
{
|
||||||
|
if (foundObjects == null || foundObjects.Count == 0)
|
||||||
|
return null;
|
||||||
|
|
||||||
|
if (!previousCenter.HasValue)
|
||||||
|
{
|
||||||
|
var bestIndex = 0;
|
||||||
|
var bestArea = float.MinValue;
|
||||||
|
|
||||||
|
for (var i = 0; i < foundObjects.Count; i++)
|
||||||
|
{
|
||||||
|
var f = foundObjects[i];
|
||||||
|
var area = f.Box.Width * f.Box.Height;
|
||||||
|
if (area > bestArea)
|
||||||
|
{
|
||||||
|
bestArea = area;
|
||||||
|
bestIndex = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return foundObjects[bestIndex];
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
var prev = previousCenter.Value;
|
||||||
|
var bestIndex = 0;
|
||||||
|
var bestDist2 = float.MaxValue;
|
||||||
|
|
||||||
|
for (var i = 0; i < foundObjects.Count; i++)
|
||||||
|
{
|
||||||
|
var f = foundObjects[i];
|
||||||
|
var dx = f.Center.X - prev.X;
|
||||||
|
var dy = f.Center.Y - prev.Y;
|
||||||
|
var d2 = dx * dx + dy * dy;
|
||||||
|
|
||||||
|
if (d2 < bestDist2)
|
||||||
|
{
|
||||||
|
bestDist2 = d2;
|
||||||
|
bestIndex = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return foundObjects[bestIndex];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
@ -23,14 +23,14 @@ public sealed class UltraFaceDetector: LoggingBase, IDisposable, IObjectDetector
|
|||||||
_ultraFace = UltraFace.Create(param);
|
_ultraFace = UltraFace.Create(param);
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<(Rect box, Point2f center)> DetectAll(SingleTask job, Mat frameCont)
|
public List<DetectedPerson> DetectAll(SingleTask job, Mat frameCont)
|
||||||
{
|
{
|
||||||
// Convert to byte[] for UltraFace
|
// Convert to byte[] for UltraFace
|
||||||
var bytesFull = frameCont.Rows * frameCont.Cols * frameCont.ElemSize();
|
var bytesFull = frameCont.Rows * frameCont.Cols * frameCont.ElemSize();
|
||||||
var bgr = new byte[bytesFull];
|
var bgr = new byte[bytesFull];
|
||||||
Marshal.Copy(frameCont.Data, bgr, 0, bytesFull);
|
Marshal.Copy(frameCont.Data, bgr, 0, bytesFull);
|
||||||
|
|
||||||
var results = new List<(Rect box, Point2f center)>();
|
var results = new List<DetectedPerson>();
|
||||||
|
|
||||||
if (bgr == null || bgr.Length == 0)
|
if (bgr == null || bgr.Length == 0)
|
||||||
return results;
|
return results;
|
||||||
@ -69,7 +69,7 @@ public sealed class UltraFaceDetector: LoggingBase, IDisposable, IObjectDetector
|
|||||||
rect.X + rect.Width / 2f,
|
rect.X + rect.Width / 2f,
|
||||||
rect.Y + rect.Height / 2f);
|
rect.Y + rect.Height / 2f);
|
||||||
|
|
||||||
results.Add((rect, center));
|
results.Add(new DetectedPerson{ Box = rect, Center = center });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
278
splitter-cli/algo/YoloV10ObjectDetector.cs
Normal file
278
splitter-cli/algo/YoloV10ObjectDetector.cs
Normal file
@ -0,0 +1,278 @@
|
|||||||
|
using System.Runtime.CompilerServices;
|
||||||
|
using Microsoft.ML.OnnxRuntime;
|
||||||
|
using Microsoft.ML.OnnxRuntime.Tensors;
|
||||||
|
|
||||||
|
namespace splitter.algo;
|
||||||
|
|
||||||
|
public sealed class YoloV10ObjectDetector : LoggingBase, IObjectDetector, IDisposable
|
||||||
|
{
|
||||||
|
private readonly InferenceSession _session;
|
||||||
|
private readonly string _inputName;
|
||||||
|
private readonly string _outputName;
|
||||||
|
|
||||||
|
private const int _inputWidth = 640;
|
||||||
|
private const int _inputHeight = 640;
|
||||||
|
private const float _scoreThreshold = 0.35f;
|
||||||
|
private const float _nmsThreshold = 0.45f;
|
||||||
|
private const int _personClassIndex = 0;
|
||||||
|
|
||||||
|
private readonly Mat _resizeMat = new();
|
||||||
|
private readonly Mat _rgbMat = new();
|
||||||
|
|
||||||
|
private readonly float[] _inputBuffer;
|
||||||
|
private readonly DenseTensor<float> _inputTensor;
|
||||||
|
|
||||||
|
private readonly List<NamedOnnxValue> _inputs = new(1);
|
||||||
|
|
||||||
|
private readonly List<Detection> _detections = new(256);
|
||||||
|
private readonly List<Detection> _nmsBuffer = new(256);
|
||||||
|
|
||||||
|
private readonly List<DetectedPerson> _results = new(64);
|
||||||
|
|
||||||
|
private readonly float _inv255 = 1f / 255f;
|
||||||
|
|
||||||
|
private readonly struct Detection
|
||||||
|
{
|
||||||
|
public readonly float X;
|
||||||
|
public readonly float Y;
|
||||||
|
public readonly float Width;
|
||||||
|
public readonly float Height;
|
||||||
|
public readonly float Score;
|
||||||
|
|
||||||
|
public Detection(float x, float y, float w, float h, float score)
|
||||||
|
{
|
||||||
|
X = x;
|
||||||
|
Y = y;
|
||||||
|
Width = w;
|
||||||
|
Height = h;
|
||||||
|
Score = score;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public YoloV10ObjectDetector(ILogger logger) : base(logger, -1)
|
||||||
|
{
|
||||||
|
var options = new SessionOptions();
|
||||||
|
options.AppendExecutionProvider_DML();
|
||||||
|
|
||||||
|
var basePath = AppDomain.CurrentDomain.BaseDirectory;
|
||||||
|
var modelPath = Path.Combine(basePath, "models", "yolov10m.onnx");
|
||||||
|
|
||||||
|
_session = new InferenceSession(modelPath, options);
|
||||||
|
|
||||||
|
_inputName = _session.InputMetadata.Keys.First();
|
||||||
|
_outputName = _session.OutputMetadata.Keys.First();
|
||||||
|
|
||||||
|
_inputBuffer = new float[1 * 3 * _inputHeight * _inputWidth];
|
||||||
|
_inputTensor = new DenseTensor<float>(_inputBuffer, new[] { 1, 3, _inputHeight, _inputWidth });
|
||||||
|
|
||||||
|
_inputs.Add(NamedOnnxValue.CreateFromTensor(_inputName, _inputTensor));
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<DetectedPerson> DetectAll(SingleTask job, Mat frameCont)
|
||||||
|
{
|
||||||
|
if (frameCont.Empty())
|
||||||
|
{
|
||||||
|
_results.Clear();
|
||||||
|
return _results;
|
||||||
|
}
|
||||||
|
|
||||||
|
Cv2.Resize(frameCont, _resizeMat, new Size(_inputWidth, _inputHeight));
|
||||||
|
Cv2.CvtColor(_resizeMat, _rgbMat, ColorConversionCodes.BGR2RGB);
|
||||||
|
|
||||||
|
FillInputTensor(_rgbMat);
|
||||||
|
|
||||||
|
using var results = _session.Run(_inputs);
|
||||||
|
|
||||||
|
Tensor<float>? output = null;
|
||||||
|
foreach (var r in results)
|
||||||
|
{
|
||||||
|
if (r.Name == _outputName)
|
||||||
|
{
|
||||||
|
output = r.AsTensor<float>();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (output is null)
|
||||||
|
{
|
||||||
|
_results.Clear();
|
||||||
|
return _results;
|
||||||
|
}
|
||||||
|
|
||||||
|
ParseYoloV10(
|
||||||
|
output,
|
||||||
|
frameCont.Width,
|
||||||
|
frameCont.Height,
|
||||||
|
job.Job.ScoreThreshold,
|
||||||
|
_personClassIndex,
|
||||||
|
_detections);
|
||||||
|
|
||||||
|
var final = ApplyNms(_detections, _nmsThreshold, _nmsBuffer);
|
||||||
|
|
||||||
|
_results.Clear();
|
||||||
|
for (var i = 0; i < final.Count; i++)
|
||||||
|
{
|
||||||
|
var d = final[i];
|
||||||
|
|
||||||
|
var x = (int)d.X;
|
||||||
|
var y = (int)d.Y;
|
||||||
|
var w = (int)d.Width;
|
||||||
|
var h = (int)d.Height;
|
||||||
|
|
||||||
|
x = Math.Clamp(x, 0, frameCont.Width - 1);
|
||||||
|
y = Math.Clamp(y, 0, frameCont.Height - 1);
|
||||||
|
w = Math.Clamp(w, 1, frameCont.Width - x);
|
||||||
|
h = Math.Clamp(h, 1, frameCont.Height - y);
|
||||||
|
|
||||||
|
var rect = new Rect(x, y, w, h);
|
||||||
|
var center = new Point2f(x + w / 2f, y + h / 2f);
|
||||||
|
|
||||||
|
_results.Add(new DetectedPerson{ Box = rect, Center = center });
|
||||||
|
}
|
||||||
|
|
||||||
|
return _results;
|
||||||
|
}
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
private void FillInputTensor(Mat rgb)
|
||||||
|
{
|
||||||
|
var height = _inputHeight;
|
||||||
|
var width = _inputWidth;
|
||||||
|
|
||||||
|
var planeSize = height * width;
|
||||||
|
|
||||||
|
Span<float> dst = _inputBuffer.AsSpan();
|
||||||
|
|
||||||
|
unsafe
|
||||||
|
{
|
||||||
|
for (var y = 0; y < height; y++)
|
||||||
|
{
|
||||||
|
var rowPtr = (byte*)rgb.Ptr(y).ToPointer();
|
||||||
|
var rowSpan = new Span<byte>(rowPtr, width * 3);
|
||||||
|
|
||||||
|
var srcIndex = 0;
|
||||||
|
|
||||||
|
for (var x = 0; x < width; x++)
|
||||||
|
{
|
||||||
|
var r = rowSpan[srcIndex + 0];
|
||||||
|
var g = rowSpan[srcIndex + 1];
|
||||||
|
var b = rowSpan[srcIndex + 2];
|
||||||
|
|
||||||
|
var offset = y * width + x;
|
||||||
|
|
||||||
|
dst[offset] = r * _inv255;
|
||||||
|
dst[planeSize + offset] = g * _inv255;
|
||||||
|
dst[2 * planeSize + offset] = b * _inv255;
|
||||||
|
|
||||||
|
srcIndex += 3;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// YOLOv10 parser: [1, 300, 6] => x1, y1, x2, y2, score, class_id
|
||||||
|
private static void ParseYoloV10(
|
||||||
|
Tensor<float> output,
|
||||||
|
int originalWidth,
|
||||||
|
int originalHeight,
|
||||||
|
float scoreThreshold,
|
||||||
|
int classIndex,
|
||||||
|
List<Detection> detections)
|
||||||
|
{
|
||||||
|
detections.Clear();
|
||||||
|
|
||||||
|
// dims: [1, 300, 6]
|
||||||
|
var count = output.Dimensions[1];
|
||||||
|
|
||||||
|
var xScale = (float)originalWidth / 640f;
|
||||||
|
var yScale = (float)originalHeight / 640f;
|
||||||
|
|
||||||
|
for (var i = 0; i < count; i++)
|
||||||
|
{
|
||||||
|
var x1 = output[0, i, 0];
|
||||||
|
var y1 = output[0, i, 1];
|
||||||
|
var x2 = output[0, i, 2];
|
||||||
|
var y2 = output[0, i, 3];
|
||||||
|
var score = output[0, i, 4];
|
||||||
|
var cls = (int)output[0, i, 5];
|
||||||
|
|
||||||
|
if (cls != classIndex)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (score < scoreThreshold)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
var left = x1 * xScale;
|
||||||
|
var top = y1 * yScale;
|
||||||
|
var width = (x2 - x1) * xScale;
|
||||||
|
var height = (y2 - y1) * yScale;
|
||||||
|
|
||||||
|
detections.Add(new Detection(left, top, width, height, score));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static List<Detection> ApplyNms(
|
||||||
|
List<Detection> detections,
|
||||||
|
float nmsThreshold,
|
||||||
|
List<Detection> nmsBuffer)
|
||||||
|
{
|
||||||
|
nmsBuffer.Clear();
|
||||||
|
|
||||||
|
if (detections.Count == 0)
|
||||||
|
return nmsBuffer;
|
||||||
|
|
||||||
|
detections.Sort(static (a, b) => b.Score.CompareTo(a.Score));
|
||||||
|
|
||||||
|
for (var i = 0; i < detections.Count; i++)
|
||||||
|
{
|
||||||
|
var candidate = detections[i];
|
||||||
|
var keep = true;
|
||||||
|
|
||||||
|
for (var j = 0; j < nmsBuffer.Count; j++)
|
||||||
|
{
|
||||||
|
if (IoU(candidate, nmsBuffer[j]) >= nmsThreshold)
|
||||||
|
{
|
||||||
|
keep = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (keep)
|
||||||
|
nmsBuffer.Add(candidate);
|
||||||
|
}
|
||||||
|
|
||||||
|
return nmsBuffer;
|
||||||
|
}
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
private static float IoU(in Detection a, in Detection b)
|
||||||
|
{
|
||||||
|
var x1 = MathF.Max(a.X, b.X);
|
||||||
|
var y1 = MathF.Max(a.Y, b.Y);
|
||||||
|
var x2 = MathF.Min(a.X + a.Width, b.X + b.Width);
|
||||||
|
var y2 = MathF.Min(a.Y + a.Height, b.Y + b.Height);
|
||||||
|
|
||||||
|
var interW = x2 - x1;
|
||||||
|
if (interW <= 0f) return 0f;
|
||||||
|
|
||||||
|
var interH = y2 - y1;
|
||||||
|
if (interH <= 0f) return 0f;
|
||||||
|
|
||||||
|
var interArea = interW * interH;
|
||||||
|
|
||||||
|
var areaA = a.Width * a.Height;
|
||||||
|
var areaB = b.Width * b.Height;
|
||||||
|
|
||||||
|
var union = areaA + areaB - interArea;
|
||||||
|
if (union <= 0f) return 0f;
|
||||||
|
|
||||||
|
return interArea / union;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void Dispose()
|
||||||
|
{
|
||||||
|
_session?.Dispose();
|
||||||
|
_resizeMat?.Dispose();
|
||||||
|
_rgbMat?.Dispose();
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -4,7 +4,7 @@ using Microsoft.ML.OnnxRuntime.Tensors;
|
|||||||
|
|
||||||
namespace splitter.algo;
|
namespace splitter.algo;
|
||||||
|
|
||||||
public sealed class YoloOnnxObjectDetector : LoggingBase, IObjectDetector, IDisposable
|
public sealed class YoloV8ObjectDetector : LoggingBase, IObjectDetector, IDisposable
|
||||||
{
|
{
|
||||||
private readonly InferenceSession _session;
|
private readonly InferenceSession _session;
|
||||||
private readonly string _inputName;
|
private readonly string _inputName;
|
||||||
@ -32,7 +32,7 @@ public sealed class YoloOnnxObjectDetector : LoggingBase, IObjectDetector, IDisp
|
|||||||
private readonly List<Detection> _nmsBuffer = new(256);
|
private readonly List<Detection> _nmsBuffer = new(256);
|
||||||
|
|
||||||
// Reusable result list
|
// Reusable result list
|
||||||
private readonly List<(Rect box, Point2f center)> _results = new(64);
|
private readonly List<DetectedPerson> _results = new(64);
|
||||||
|
|
||||||
private readonly float _inv255 = 1f / 255f;
|
private readonly float _inv255 = 1f / 255f;
|
||||||
|
|
||||||
@ -54,7 +54,7 @@ public sealed class YoloOnnxObjectDetector : LoggingBase, IObjectDetector, IDisp
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public YoloOnnxObjectDetector(ILogger logger) : base(logger, -1)
|
public YoloV8ObjectDetector(ILogger logger) : base(logger, -1)
|
||||||
{
|
{
|
||||||
var options = new SessionOptions();
|
var options = new SessionOptions();
|
||||||
options.AppendExecutionProvider_DML();
|
options.AppendExecutionProvider_DML();
|
||||||
@ -78,7 +78,7 @@ public sealed class YoloOnnxObjectDetector : LoggingBase, IObjectDetector, IDisp
|
|||||||
_inputs.Add(NamedOnnxValue.CreateFromTensor(_inputName, _inputTensor));
|
_inputs.Add(NamedOnnxValue.CreateFromTensor(_inputName, _inputTensor));
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<(Rect box, Point2f center)> DetectAll(SingleTask job, Mat frameCont)
|
public List<DetectedPerson> DetectAll(SingleTask job, Mat frameCont)
|
||||||
{
|
{
|
||||||
if (frameCont.Empty())
|
if (frameCont.Empty())
|
||||||
{
|
{
|
||||||
@ -142,7 +142,7 @@ public sealed class YoloOnnxObjectDetector : LoggingBase, IObjectDetector, IDisp
|
|||||||
var rect = new Rect(x, y, w, h);
|
var rect = new Rect(x, y, w, h);
|
||||||
var center = new Point2f(x + w / 2f, y + h / 2f);
|
var center = new Point2f(x + w / 2f, y + h / 2f);
|
||||||
|
|
||||||
_results.Add((rect, center));
|
_results.Add(new DetectedPerson{ Box = rect, Center = center });
|
||||||
}
|
}
|
||||||
|
|
||||||
return _results;
|
return _results;
|
||||||
BIN
splitter-cli/models/osnet_x0_25_msmt17.onnx
Normal file
BIN
splitter-cli/models/osnet_x0_25_msmt17.onnx
Normal file
Binary file not shown.
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user