mirror of
https://github.com/unclshura/splitter.git
synced 2026-06-21 16:12:01 +00:00
Switched body detection model to yolov10m.
This commit is contained in:
parent
d3c82ce924
commit
78c9713425
@ -13,7 +13,7 @@ Splitter pipeline is:
|
|||||||
* FFMpeg used to decode video frames into OpenCVSharp.Mat
|
* FFMpeg used to decode video frames into OpenCVSharp.Mat
|
||||||
* One of detectors used:
|
* One of detectors used:
|
||||||
- For face detection: [opencv_zoo/models/face_detection_yunet at main opencv/opencv_zoo](https://github.com/opencv/opencv_zoo/tree/main/models/face_detection_yunet)
|
- For face detection: [opencv_zoo/models/face_detection_yunet at main opencv/opencv_zoo](https://github.com/opencv/opencv_zoo/tree/main/models/face_detection_yunet)
|
||||||
- For body detection: [yolov8s.pt Ultralytics/YOLOv8 at main](https://huggingface.co/Ultralytics/YOLOv8/blob/main/yolov8s.pt)
|
- For body detection: [THU-MIG/yolov10: YOLOv10: Real-Time End-to-End Object Detection [NeurIPS 2024]](https://github.com/THU-MIG/yolov10/tree/main)
|
||||||
* Camera control aplied (CameraControl class)
|
* Camera control aplied (CameraControl class)
|
||||||
* Final video frames are encoded back to video file using FFMpeg
|
* Final video frames are encoded back to video file using FFMpeg
|
||||||
|
|
||||||
|
|||||||
@ -6,13 +6,17 @@ public class PreviewData
|
|||||||
public IReadOnlyList<OpenCvSharp.Rect> DetectedBoxes { get; }
|
public IReadOnlyList<OpenCvSharp.Rect> DetectedBoxes { get; }
|
||||||
public Rect? CropRect { get; }
|
public Rect? CropRect { get; }
|
||||||
public Point2f GravitateTo { get; }
|
public Point2f GravitateTo { get; }
|
||||||
|
public TimeSpan Position { get; }
|
||||||
|
public int? Rotate { get; }
|
||||||
|
|
||||||
public PreviewData(Avalonia.Media.Imaging.Bitmap? frame, IReadOnlyList<OpenCvSharp.Rect> boxes, Rect? crop, Point2f gravitateTo)
|
public PreviewData(Avalonia.Media.Imaging.Bitmap? frame, IReadOnlyList<OpenCvSharp.Rect> boxes, Rect? crop, Point2f gravitateTo, TimeSpan position, int? rotate)
|
||||||
{
|
{
|
||||||
Frame = frame;
|
Frame = frame;
|
||||||
DetectedBoxes = boxes;
|
DetectedBoxes = boxes;
|
||||||
CropRect = crop;
|
CropRect = crop;
|
||||||
GravitateTo = gravitateTo;
|
GravitateTo = gravitateTo;
|
||||||
|
Position = position;
|
||||||
|
Rotate = rotate;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -36,16 +36,16 @@ internal sealed class Program
|
|||||||
|
|
||||||
// splitter services
|
// splitter services
|
||||||
services.AddSingleton<UltraFaceDetector>();
|
services.AddSingleton<UltraFaceDetector>();
|
||||||
services.AddSingleton<YoloOnnxObjectDetector>();
|
services.AddSingleton<YoloV10ObjectDetector>();
|
||||||
services.AddSingleton( x => new SingleThreadedDetector<UltraFaceDetector>(x.GetRequiredService<UltraFaceDetector>()) );
|
services.AddSingleton( x => new SingleThreadedDetector<UltraFaceDetector>(x.GetRequiredService<UltraFaceDetector>()) );
|
||||||
services.AddSingleton(x => new SingleThreadedDetector<YoloOnnxObjectDetector>(x.GetRequiredService<YoloOnnxObjectDetector>()));
|
services.AddSingleton(x => new SingleThreadedDetector<YoloV10ObjectDetector>(x.GetRequiredService<YoloV10ObjectDetector>()));
|
||||||
services.AddSingleton(x => new SingleThreadedDetector<DummyDetector>(x.GetRequiredService<DummyDetector>()));
|
services.AddSingleton(x => new SingleThreadedDetector<DummyDetector>(x.GetRequiredService<DummyDetector>()));
|
||||||
services.AddSingleton<Func<string, IObjectDetector>>( x => detectorName =>
|
services.AddSingleton<Func<string, IObjectDetector>>( x => detectorName =>
|
||||||
{
|
{
|
||||||
return detectorName switch
|
return detectorName switch
|
||||||
{
|
{
|
||||||
"face" => x.GetRequiredService<SingleThreadedDetector<UltraFaceDetector>>(),
|
"face" => x.GetRequiredService<SingleThreadedDetector<UltraFaceDetector>>(),
|
||||||
"body" => x.GetRequiredService<SingleThreadedDetector<YoloOnnxObjectDetector>>(),
|
"body" => x.GetRequiredService<SingleThreadedDetector<YoloV10ObjectDetector>>(),
|
||||||
"none" => x.GetRequiredService<SingleThreadedDetector<DummyDetector>>(),
|
"none" => x.GetRequiredService<SingleThreadedDetector<DummyDetector>>(),
|
||||||
_ => new DummyDetector()
|
_ => new DummyDetector()
|
||||||
};
|
};
|
||||||
|
|||||||
@ -15,7 +15,7 @@ public partial class JobViewModel : ObservableObject
|
|||||||
public SingleJob GetJob() => Job;
|
public SingleJob GetJob() => Job;
|
||||||
|
|
||||||
[ObservableProperty] private VideoInfo? _probe;
|
[ObservableProperty] private VideoInfo? _probe;
|
||||||
[ObservableProperty] private PreviewData? _preview = new(null, [], null, new(0.5f, 0.5f));
|
[ObservableProperty] private PreviewData? _preview = new(null, [], null, new(0.5f, 0.5f), TimeSpan.Zero, null);
|
||||||
[ObservableProperty] private Bitmap? _thumbnail;
|
[ObservableProperty] private Bitmap? _thumbnail;
|
||||||
[ObservableProperty] private double _sliderLiveValue;
|
[ObservableProperty] private double _sliderLiveValue;
|
||||||
[ObservableProperty] private double _positionSeconds;
|
[ObservableProperty] private double _positionSeconds;
|
||||||
@ -70,7 +70,7 @@ public partial class JobViewModel : ObservableObject
|
|||||||
{
|
{
|
||||||
if (string.IsNullOrWhiteSpace(value))
|
if (string.IsNullOrWhiteSpace(value))
|
||||||
{
|
{
|
||||||
Job.GravitateTo = null;
|
Job.GravitateTo = new Point2f(0.5f, 0.5f);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -109,6 +109,19 @@ public partial class JobViewModel : ObservableObject
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public float ScoreThreshold
|
||||||
|
{
|
||||||
|
get => Job.ScoreThreshold;
|
||||||
|
set
|
||||||
|
{
|
||||||
|
if (Math.Abs(Job.ScoreThreshold - value) < 0.001)
|
||||||
|
return;
|
||||||
|
Job.ScoreThreshold = value;
|
||||||
|
OnPropertyChanged();
|
||||||
|
Task.Run(CreatePreview);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public string? Mask
|
public string? Mask
|
||||||
{
|
{
|
||||||
get => Job.Mask;
|
get => Job.Mask;
|
||||||
@ -182,10 +195,10 @@ public partial class JobViewModel : ObservableObject
|
|||||||
|
|
||||||
public Point2f GravitateTo
|
public Point2f GravitateTo
|
||||||
{
|
{
|
||||||
get => Job.GravitateTo ?? new Point2f(0.5f, 0.5f);
|
get => Job.GravitateTo;
|
||||||
set
|
set
|
||||||
{
|
{
|
||||||
if (Job.GravitateTo != null && Math.Abs(Job.GravitateTo.Value.X - value.X) < 0.001 && Math.Abs(Job.GravitateTo.Value.Y - value.Y) < 0.001)
|
if (Math.Abs(Job.GravitateTo.X - value.X) < 0.001 && Math.Abs(Job.GravitateTo.Y - value.Y) < 0.001)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
Job.GravitateTo = value;
|
Job.GravitateTo = value;
|
||||||
@ -199,6 +212,8 @@ public partial class JobViewModel : ObservableObject
|
|||||||
get => Job.DetectAbove;
|
get => Job.DetectAbove;
|
||||||
set
|
set
|
||||||
{
|
{
|
||||||
|
if (Math.Abs(Job.DetectAbove - value) < 0.001 )
|
||||||
|
return;
|
||||||
Job.DetectAbove = value;
|
Job.DetectAbove = value;
|
||||||
OnPropertyChanged();
|
OnPropertyChanged();
|
||||||
Task.Run(CreatePreview);
|
Task.Run(CreatePreview);
|
||||||
@ -262,11 +277,17 @@ public partial class JobViewModel : ObservableObject
|
|||||||
return;
|
return;
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
var frame = await _thumbnails.CreateThumbnailAsync(Job.InputFile, Probe, TimeSpan.FromSeconds(PositionSeconds), Probe.Width, Probe.Height, Job.Rotate);
|
var pos = TimeSpan.FromSeconds(PositionSeconds);
|
||||||
|
|
||||||
|
Bitmap? frame;
|
||||||
|
if (Preview?.Frame == null || Preview.Position != pos)
|
||||||
|
frame = await _thumbnails.CreateThumbnailAsync(Job.InputFile, Probe, pos, Probe.Width, Probe.Height, Job.Rotate);
|
||||||
|
else
|
||||||
|
frame = Preview.Frame;
|
||||||
if ( frame == null )
|
if ( frame == null )
|
||||||
return;
|
return;
|
||||||
|
|
||||||
Preview = new PreviewData(frame, [], null, Job.GravitateTo ?? new (0.5f, 0.5f));
|
Preview = new PreviewData(frame, [], null, Job.GravitateTo, pos, Job.Rotate);
|
||||||
|
|
||||||
var detector = _detectorFactory(Job.Detect ?? "");
|
var detector = _detectorFactory(Job.Detect ?? "");
|
||||||
var j = new SingleTask
|
var j = new SingleTask
|
||||||
@ -304,7 +325,7 @@ public partial class JobViewModel : ObservableObject
|
|||||||
}
|
}
|
||||||
|
|
||||||
var boxes = detections.Select(x => x.box).ToList();
|
var boxes = detections.Select(x => x.box).ToList();
|
||||||
Preview = new PreviewData(frame, boxes, crop, Job.GravitateTo ?? new (0.5f, 0.5f));
|
Preview = new PreviewData(frame, boxes, crop, Job.GravitateTo, pos, Job.Rotate);
|
||||||
}
|
}
|
||||||
catch (Exception ex)
|
catch (Exception ex)
|
||||||
{
|
{
|
||||||
|
|||||||
@ -86,6 +86,25 @@ x:DataType="vm:InspectorPaneViewModel">
|
|||||||
Width="160"/>
|
Width="160"/>
|
||||||
</StackPanel>
|
</StackPanel>
|
||||||
|
|
||||||
|
<!-- ScoreThreshold -->
|
||||||
|
<StackPanel Orientation="Horizontal" Spacing="8">
|
||||||
|
<TextBlock Text="Score Threshold" Width="120"/>
|
||||||
|
|
||||||
|
<StackPanel Orientation="Vertical" Spacing="4" Width="260">
|
||||||
|
<Slider Minimum="0"
|
||||||
|
Maximum="1"
|
||||||
|
SmallChange="0.01"
|
||||||
|
LargeChange="0.1"
|
||||||
|
TickFrequency="0.05"
|
||||||
|
IsSnapToTickEnabled="False"
|
||||||
|
Value="{Binding Selected.ScoreThreshold, Mode=TwoWay}"/>
|
||||||
|
|
||||||
|
<TextBlock Text="{Binding Selected.ScoreThreshold, StringFormat='0.00'}"
|
||||||
|
FontSize="10"
|
||||||
|
HorizontalAlignment="Right"/>
|
||||||
|
</StackPanel>
|
||||||
|
</StackPanel>
|
||||||
|
|
||||||
<!-- DetectAbove -->
|
<!-- DetectAbove -->
|
||||||
<StackPanel Orientation="Horizontal" Spacing="8">
|
<StackPanel Orientation="Horizontal" Spacing="8">
|
||||||
<TextBlock Text="Detect Above" Width="120"/>
|
<TextBlock Text="Detect Above" Width="120"/>
|
||||||
|
|||||||
@ -98,6 +98,14 @@ public sealed class CommandLine
|
|||||||
else
|
else
|
||||||
Master.DetectAbove = 0.7f;
|
Master.DetectAbove = 0.7f;
|
||||||
}
|
}
|
||||||
|
else if (arg.StartsWith("--score-threshold="))
|
||||||
|
{
|
||||||
|
var val = arg.Substring("--score-threshold=".Length);
|
||||||
|
if (float.TryParse(val, NumberStyles.Float, CultureInfo.InvariantCulture, out var scoreThreshold) && scoreThreshold >= 0.0f && scoreThreshold <= 1.0f)
|
||||||
|
Master.ScoreThreshold = scoreThreshold;
|
||||||
|
else
|
||||||
|
Master.ScoreThreshold = 0.25f;
|
||||||
|
}
|
||||||
else if (arg == "--crop")
|
else if (arg == "--crop")
|
||||||
{
|
{
|
||||||
Master.Crop = ParseCrop("");
|
Master.Crop = ParseCrop("");
|
||||||
@ -224,22 +232,22 @@ public sealed class CommandLine
|
|||||||
return key.Length > 0;
|
return key.Length > 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Point2f? ParseGravitate(string value)
|
private static Point2f ParseGravitate(string value)
|
||||||
{
|
{
|
||||||
// Expected format: "<x>:<y>"
|
// Expected format: "<x>:<y>"
|
||||||
var parts = value.Split(':');
|
var parts = value.Split(':');
|
||||||
if (parts.Length != 2)
|
if (parts.Length != 2)
|
||||||
return null;
|
return new Point2f(0.5f, 0.5f);
|
||||||
|
|
||||||
if (!float.TryParse(parts[0], NumberStyles.Float, CultureInfo.InvariantCulture, out var x))
|
if (!float.TryParse(parts[0], NumberStyles.Float, CultureInfo.InvariantCulture, out var x))
|
||||||
return null;
|
return new Point2f(0.5f, 0.5f);
|
||||||
|
|
||||||
if (!float.TryParse(parts[1], NumberStyles.Float, CultureInfo.InvariantCulture, out var y))
|
if (!float.TryParse(parts[1], NumberStyles.Float, CultureInfo.InvariantCulture, out var y))
|
||||||
return null;
|
return new Point2f(0.5f, 0.5f);
|
||||||
|
|
||||||
// Normalized range check (0.0–1.0)
|
// Normalized range check (0.0–1.0)
|
||||||
if (x < 0f || x > 1f || y < 0f || y > 1f)
|
if (x < 0f || x > 1f || y < 0f || y > 1f)
|
||||||
return null;
|
return new Point2f(0.5f, 0.5f);
|
||||||
|
|
||||||
return new Point2f(x, y);
|
return new Point2f(x, y);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -64,7 +64,7 @@ public class JobProcessor(ILogger logger) : LoggingBase(logger, 0), IJobProcesso
|
|||||||
IObjectDetector detector = job.Detect switch
|
IObjectDetector detector = job.Detect switch
|
||||||
{
|
{
|
||||||
"face" => new UltraFaceDetector(_logger),
|
"face" => new UltraFaceDetector(_logger),
|
||||||
"body" => new YoloOnnxObjectDetector(_logger),
|
"body" => new YoloV10ObjectDetector(_logger),
|
||||||
"none" => new DummyDetector(),
|
"none" => new DummyDetector(),
|
||||||
_ => throw new InvalidOperationException($"Unknown detector: {job.Detect}")
|
_ => throw new InvalidOperationException($"Unknown detector: {job.Detect}")
|
||||||
};
|
};
|
||||||
|
|||||||
@ -29,12 +29,7 @@ public class SingleJob
|
|||||||
/// such as left-center (0.2, 0.5) or top-right (0.8, 0.2). This can be useful for
|
/// such as left-center (0.2, 0.5) or top-right (0.8, 0.2). This can be useful for
|
||||||
/// videos where the subject tends to be off-center or for creative framing choices.
|
/// videos where the subject tends to be off-center or for creative framing choices.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public Point2f? GravitateTo { get; set; }
|
public Point2f GravitateTo { get; set; } = new Point2f(0.5f, 0.5f);
|
||||||
/// <summary>
|
|
||||||
/// Face or human detectors should only report detections if their upper bound starts below this threshold.
|
|
||||||
/// This is a value between 0.0 and 1.0 mapped to 0..Height.
|
|
||||||
/// </summary>
|
|
||||||
public float DetectAbove { get; set; } = 0.3f;
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Destination file mask.
|
/// Destination file mask.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
@ -50,6 +45,15 @@ public class SingleJob
|
|||||||
/// </summary>
|
/// </summary>
|
||||||
public string? Detect { get; set; }
|
public string? Detect { get; set; }
|
||||||
/// <summary>
|
/// <summary>
|
||||||
|
/// Detection confidence threshold. This is a value between 0.0 and 1.0 that sets the minimum confidence
|
||||||
|
/// </summary>
|
||||||
|
public float ScoreThreshold { get; set; } = 0.25f;
|
||||||
|
/// <summary>
|
||||||
|
/// Face or human detectors should only report detections if their upper bound starts below this threshold.
|
||||||
|
/// This is a value between 0.0 and 1.0 mapped to 0..Height.
|
||||||
|
/// </summary>
|
||||||
|
public float DetectAbove { get; set; } = 0.7f;
|
||||||
|
/// <summary>
|
||||||
/// Set starget segments length explicitly. By default, the splitter calculates segment
|
/// Set starget segments length explicitly. By default, the splitter calculates segment
|
||||||
/// lengths to be equal and not exceed 58 seconds.
|
/// lengths to be equal and not exceed 58 seconds.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
|
|||||||
@ -58,7 +58,7 @@ public sealed class CameraController
|
|||||||
_kalman.Reset(_cameraCenter);
|
_kalman.Reset(_cameraCenter);
|
||||||
}
|
}
|
||||||
|
|
||||||
private Point2f DefaultCenter => _cmd.GravitateTo ?? new Point2f(_videoWidth / 2f, _videoHeight / 2f);
|
private Point2f DefaultCenter => _cmd.GravitateTo;
|
||||||
|
|
||||||
public int LostFrames => _lostFrames;
|
public int LostFrames => _lostFrames;
|
||||||
public Point2f CameraCenter => _cameraCenter;
|
public Point2f CameraCenter => _cameraCenter;
|
||||||
|
|||||||
@ -7,7 +7,7 @@ public sealed class DummyDetector : IObjectDetector
|
|||||||
var h = job.Info.Height;
|
var h = job.Info.Height;
|
||||||
var w = job.Info.Width;
|
var w = job.Info.Width;
|
||||||
|
|
||||||
var c = job.Job.GravitateTo ?? new Point2f(0.5f, 0.5f);
|
var c = job.Job.GravitateTo;
|
||||||
var x = (int)(c.X * w);
|
var x = (int)(c.X * w);
|
||||||
var y = (int)(c.Y * h);
|
var y = (int)(c.Y * h);
|
||||||
|
|
||||||
|
|||||||
278
splitter-cli/algo/YoloV10ObjectDetector.cs
Normal file
278
splitter-cli/algo/YoloV10ObjectDetector.cs
Normal file
@ -0,0 +1,278 @@
|
|||||||
|
using System.Runtime.CompilerServices;
|
||||||
|
using Microsoft.ML.OnnxRuntime;
|
||||||
|
using Microsoft.ML.OnnxRuntime.Tensors;
|
||||||
|
|
||||||
|
namespace splitter.algo;
|
||||||
|
|
||||||
|
public sealed class YoloV10ObjectDetector : LoggingBase, IObjectDetector, IDisposable
|
||||||
|
{
|
||||||
|
private readonly InferenceSession _session;
|
||||||
|
private readonly string _inputName;
|
||||||
|
private readonly string _outputName;
|
||||||
|
|
||||||
|
private const int _inputWidth = 640;
|
||||||
|
private const int _inputHeight = 640;
|
||||||
|
private const float _scoreThreshold = 0.35f;
|
||||||
|
private const float _nmsThreshold = 0.45f;
|
||||||
|
private const int _personClassIndex = 0;
|
||||||
|
|
||||||
|
private readonly Mat _resizeMat = new();
|
||||||
|
private readonly Mat _rgbMat = new();
|
||||||
|
|
||||||
|
private readonly float[] _inputBuffer;
|
||||||
|
private readonly DenseTensor<float> _inputTensor;
|
||||||
|
|
||||||
|
private readonly List<NamedOnnxValue> _inputs = new(1);
|
||||||
|
|
||||||
|
private readonly List<Detection> _detections = new(256);
|
||||||
|
private readonly List<Detection> _nmsBuffer = new(256);
|
||||||
|
|
||||||
|
private readonly List<(Rect box, Point2f center)> _results = new(64);
|
||||||
|
|
||||||
|
private readonly float _inv255 = 1f / 255f;
|
||||||
|
|
||||||
|
private readonly struct Detection
|
||||||
|
{
|
||||||
|
public readonly float X;
|
||||||
|
public readonly float Y;
|
||||||
|
public readonly float Width;
|
||||||
|
public readonly float Height;
|
||||||
|
public readonly float Score;
|
||||||
|
|
||||||
|
public Detection(float x, float y, float w, float h, float score)
|
||||||
|
{
|
||||||
|
X = x;
|
||||||
|
Y = y;
|
||||||
|
Width = w;
|
||||||
|
Height = h;
|
||||||
|
Score = score;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public YoloV10ObjectDetector(ILogger logger) : base(logger, -1)
|
||||||
|
{
|
||||||
|
var options = new SessionOptions();
|
||||||
|
options.AppendExecutionProvider_DML();
|
||||||
|
|
||||||
|
var basePath = AppDomain.CurrentDomain.BaseDirectory;
|
||||||
|
var modelPath = Path.Combine(basePath, "models", "yolov10m.onnx");
|
||||||
|
|
||||||
|
_session = new InferenceSession(modelPath, options);
|
||||||
|
|
||||||
|
_inputName = _session.InputMetadata.Keys.First();
|
||||||
|
_outputName = _session.OutputMetadata.Keys.First();
|
||||||
|
|
||||||
|
_inputBuffer = new float[1 * 3 * _inputHeight * _inputWidth];
|
||||||
|
_inputTensor = new DenseTensor<float>(_inputBuffer, new[] { 1, 3, _inputHeight, _inputWidth });
|
||||||
|
|
||||||
|
_inputs.Add(NamedOnnxValue.CreateFromTensor(_inputName, _inputTensor));
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<(Rect box, Point2f center)> DetectAll(SingleTask job, Mat frameCont)
|
||||||
|
{
|
||||||
|
if (frameCont.Empty())
|
||||||
|
{
|
||||||
|
_results.Clear();
|
||||||
|
return _results;
|
||||||
|
}
|
||||||
|
|
||||||
|
Cv2.Resize(frameCont, _resizeMat, new Size(_inputWidth, _inputHeight));
|
||||||
|
Cv2.CvtColor(_resizeMat, _rgbMat, ColorConversionCodes.BGR2RGB);
|
||||||
|
|
||||||
|
FillInputTensor(_rgbMat);
|
||||||
|
|
||||||
|
using var results = _session.Run(_inputs);
|
||||||
|
|
||||||
|
Tensor<float>? output = null;
|
||||||
|
foreach (var r in results)
|
||||||
|
{
|
||||||
|
if (r.Name == _outputName)
|
||||||
|
{
|
||||||
|
output = r.AsTensor<float>();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (output is null)
|
||||||
|
{
|
||||||
|
_results.Clear();
|
||||||
|
return _results;
|
||||||
|
}
|
||||||
|
|
||||||
|
ParseYoloV10(
|
||||||
|
output,
|
||||||
|
frameCont.Width,
|
||||||
|
frameCont.Height,
|
||||||
|
job.Job.ScoreThreshold,
|
||||||
|
_personClassIndex,
|
||||||
|
_detections);
|
||||||
|
|
||||||
|
var final = ApplyNms(_detections, _nmsThreshold, _nmsBuffer);
|
||||||
|
|
||||||
|
_results.Clear();
|
||||||
|
for (var i = 0; i < final.Count; i++)
|
||||||
|
{
|
||||||
|
var d = final[i];
|
||||||
|
|
||||||
|
var x = (int)d.X;
|
||||||
|
var y = (int)d.Y;
|
||||||
|
var w = (int)d.Width;
|
||||||
|
var h = (int)d.Height;
|
||||||
|
|
||||||
|
x = Math.Clamp(x, 0, frameCont.Width - 1);
|
||||||
|
y = Math.Clamp(y, 0, frameCont.Height - 1);
|
||||||
|
w = Math.Clamp(w, 1, frameCont.Width - x);
|
||||||
|
h = Math.Clamp(h, 1, frameCont.Height - y);
|
||||||
|
|
||||||
|
var rect = new Rect(x, y, w, h);
|
||||||
|
var center = new Point2f(x + w / 2f, y + h / 2f);
|
||||||
|
|
||||||
|
_results.Add((rect, center));
|
||||||
|
}
|
||||||
|
|
||||||
|
return _results;
|
||||||
|
}
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
private void FillInputTensor(Mat rgb)
|
||||||
|
{
|
||||||
|
var height = _inputHeight;
|
||||||
|
var width = _inputWidth;
|
||||||
|
|
||||||
|
var planeSize = height * width;
|
||||||
|
|
||||||
|
Span<float> dst = _inputBuffer.AsSpan();
|
||||||
|
|
||||||
|
unsafe
|
||||||
|
{
|
||||||
|
for (var y = 0; y < height; y++)
|
||||||
|
{
|
||||||
|
var rowPtr = (byte*)rgb.Ptr(y).ToPointer();
|
||||||
|
var rowSpan = new Span<byte>(rowPtr, width * 3);
|
||||||
|
|
||||||
|
var srcIndex = 0;
|
||||||
|
|
||||||
|
for (var x = 0; x < width; x++)
|
||||||
|
{
|
||||||
|
var r = rowSpan[srcIndex + 0];
|
||||||
|
var g = rowSpan[srcIndex + 1];
|
||||||
|
var b = rowSpan[srcIndex + 2];
|
||||||
|
|
||||||
|
var offset = y * width + x;
|
||||||
|
|
||||||
|
dst[offset] = r * _inv255;
|
||||||
|
dst[planeSize + offset] = g * _inv255;
|
||||||
|
dst[2 * planeSize + offset] = b * _inv255;
|
||||||
|
|
||||||
|
srcIndex += 3;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// YOLOv10 parser: [1, 300, 6] => x1, y1, x2, y2, score, class_id
|
||||||
|
private static void ParseYoloV10(
|
||||||
|
Tensor<float> output,
|
||||||
|
int originalWidth,
|
||||||
|
int originalHeight,
|
||||||
|
float scoreThreshold,
|
||||||
|
int classIndex,
|
||||||
|
List<Detection> detections)
|
||||||
|
{
|
||||||
|
detections.Clear();
|
||||||
|
|
||||||
|
// dims: [1, 300, 6]
|
||||||
|
var count = output.Dimensions[1];
|
||||||
|
|
||||||
|
var xScale = (float)originalWidth / 640f;
|
||||||
|
var yScale = (float)originalHeight / 640f;
|
||||||
|
|
||||||
|
for (var i = 0; i < count; i++)
|
||||||
|
{
|
||||||
|
var x1 = output[0, i, 0];
|
||||||
|
var y1 = output[0, i, 1];
|
||||||
|
var x2 = output[0, i, 2];
|
||||||
|
var y2 = output[0, i, 3];
|
||||||
|
var score = output[0, i, 4];
|
||||||
|
var cls = (int)output[0, i, 5];
|
||||||
|
|
||||||
|
if (cls != classIndex)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (score < scoreThreshold)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
var left = x1 * xScale;
|
||||||
|
var top = y1 * yScale;
|
||||||
|
var width = (x2 - x1) * xScale;
|
||||||
|
var height = (y2 - y1) * yScale;
|
||||||
|
|
||||||
|
detections.Add(new Detection(left, top, width, height, score));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static List<Detection> ApplyNms(
|
||||||
|
List<Detection> detections,
|
||||||
|
float nmsThreshold,
|
||||||
|
List<Detection> nmsBuffer)
|
||||||
|
{
|
||||||
|
nmsBuffer.Clear();
|
||||||
|
|
||||||
|
if (detections.Count == 0)
|
||||||
|
return nmsBuffer;
|
||||||
|
|
||||||
|
detections.Sort(static (a, b) => b.Score.CompareTo(a.Score));
|
||||||
|
|
||||||
|
for (var i = 0; i < detections.Count; i++)
|
||||||
|
{
|
||||||
|
var candidate = detections[i];
|
||||||
|
var keep = true;
|
||||||
|
|
||||||
|
for (var j = 0; j < nmsBuffer.Count; j++)
|
||||||
|
{
|
||||||
|
if (IoU(candidate, nmsBuffer[j]) >= nmsThreshold)
|
||||||
|
{
|
||||||
|
keep = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (keep)
|
||||||
|
nmsBuffer.Add(candidate);
|
||||||
|
}
|
||||||
|
|
||||||
|
return nmsBuffer;
|
||||||
|
}
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
private static float IoU(in Detection a, in Detection b)
|
||||||
|
{
|
||||||
|
var x1 = MathF.Max(a.X, b.X);
|
||||||
|
var y1 = MathF.Max(a.Y, b.Y);
|
||||||
|
var x2 = MathF.Min(a.X + a.Width, b.X + b.Width);
|
||||||
|
var y2 = MathF.Min(a.Y + a.Height, b.Y + b.Height);
|
||||||
|
|
||||||
|
var interW = x2 - x1;
|
||||||
|
if (interW <= 0f) return 0f;
|
||||||
|
|
||||||
|
var interH = y2 - y1;
|
||||||
|
if (interH <= 0f) return 0f;
|
||||||
|
|
||||||
|
var interArea = interW * interH;
|
||||||
|
|
||||||
|
var areaA = a.Width * a.Height;
|
||||||
|
var areaB = b.Width * b.Height;
|
||||||
|
|
||||||
|
var union = areaA + areaB - interArea;
|
||||||
|
if (union <= 0f) return 0f;
|
||||||
|
|
||||||
|
return interArea / union;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void Dispose()
|
||||||
|
{
|
||||||
|
_session?.Dispose();
|
||||||
|
_resizeMat?.Dispose();
|
||||||
|
_rgbMat?.Dispose();
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -4,7 +4,7 @@ using Microsoft.ML.OnnxRuntime.Tensors;
|
|||||||
|
|
||||||
namespace splitter.algo;
|
namespace splitter.algo;
|
||||||
|
|
||||||
public sealed class YoloOnnxObjectDetector : LoggingBase, IObjectDetector, IDisposable
|
public sealed class YoloV8ObjectDetector : LoggingBase, IObjectDetector, IDisposable
|
||||||
{
|
{
|
||||||
private readonly InferenceSession _session;
|
private readonly InferenceSession _session;
|
||||||
private readonly string _inputName;
|
private readonly string _inputName;
|
||||||
@ -54,7 +54,7 @@ public sealed class YoloOnnxObjectDetector : LoggingBase, IObjectDetector, IDisp
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public YoloOnnxObjectDetector(ILogger logger) : base(logger, -1)
|
public YoloV8ObjectDetector(ILogger logger) : base(logger, -1)
|
||||||
{
|
{
|
||||||
var options = new SessionOptions();
|
var options = new SessionOptions();
|
||||||
options.AppendExecutionProvider_DML();
|
options.AppendExecutionProvider_DML();
|
||||||
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user