Selected object tracking. Better tracked object Id generation. Not stable enough yet.

This commit is contained in:
Alexander Shabarshov 2026-06-09 09:57:58 +01:00
parent 6ebeccd761
commit f2493c1709
14 changed files with 341 additions and 188 deletions

View File

@ -3,13 +3,13 @@
public class PreviewData public class PreviewData
{ {
public Avalonia.Media.Imaging.Bitmap? Frame { get; } public Avalonia.Media.Imaging.Bitmap? Frame { get; }
public IReadOnlyList<OpenCvSharp.Rect> DetectedBoxes { get; } public IReadOnlyList<DetectedPerson> DetectedBoxes { get; }
public Rect? CropRect { get; } public Rect? CropRect { get; }
public Point2f GravitateTo { get; } public Point2f GravitateTo { get; }
public TimeSpan Position { get; } public TimeSpan Position { get; }
public int? Rotate { get; } public int? Rotate { get; }
public PreviewData(Avalonia.Media.Imaging.Bitmap? frame, IReadOnlyList<OpenCvSharp.Rect> boxes, Rect? crop, Point2f gravitateTo, TimeSpan position, int? rotate) public PreviewData(Avalonia.Media.Imaging.Bitmap? frame, IReadOnlyList<DetectedPerson> boxes, Rect? crop, Point2f gravitateTo, TimeSpan position, int? rotate)
{ {
Frame = frame; Frame = frame;
DetectedBoxes = boxes; DetectedBoxes = boxes;

View File

@ -39,19 +39,16 @@ internal sealed class Program
services.AddSingleton<YoloV10ObjectDetector>(); services.AddSingleton<YoloV10ObjectDetector>();
services.AddSingleton<OSNetEmbeddingExtractor>(); services.AddSingleton<OSNetEmbeddingExtractor>();
services.AddSingleton<IObjectTracker, ObjectTracker>(); services.AddSingleton<IObjectTracker, ObjectTracker>();
services.AddSingleton(x => new SingleThreadedDetector<UltraFaceDetector>(x.GetRequiredService<UltraFaceDetector>())); services.AddKeyedSingleton<IObjectDetector>("face", (x,_) => new SingleThreadedDetector<UltraFaceDetector>(x.GetRequiredService<UltraFaceDetector>()));
services.AddSingleton(x => new SingleThreadedDetector<YoloV10ObjectDetector>(x.GetRequiredService<YoloV10ObjectDetector>())); services.AddKeyedSingleton<IObjectDetector>("body", (x,_) => new SingleThreadedDetector<YoloV10ObjectDetector>(x.GetRequiredService<YoloV10ObjectDetector>()));
services.AddSingleton(x => new SingleThreadedDetector<DummyDetector>(x.GetRequiredService<DummyDetector>())); services.AddKeyedSingleton<IObjectDetector>("none", (x,_) => new SingleThreadedDetector<DummyDetector>(x.GetRequiredService<DummyDetector>()));
services.AddSingleton<IEmbeddingExtractor>(x => new SingleThreadedEmbeddingExtractor<OSNetEmbeddingExtractor>(x.GetRequiredService<OSNetEmbeddingExtractor>())); services.AddSingleton<IEmbeddingExtractor>(x => new SingleThreadedEmbeddingExtractor<OSNetEmbeddingExtractor>(x.GetRequiredService<OSNetEmbeddingExtractor>()));
services.AddSingleton<Func<string, IObjectDetector>>( x => detectorName => services.AddSingleton<Func<string, IObjectDetector>>(x => detectorName => x.GetKeyedService<IObjectDetector>(detectorName) ?? new DummyDetector());
services.AddSingleton<Func<string, IObjectTracker>>(x => detectorName =>
{ {
return detectorName switch var detectorFactory = x.GetRequiredService<Func<string, IObjectDetector>>();
{ var extractor = x.GetRequiredService<IEmbeddingExtractor>();
"face" => x.GetRequiredService<SingleThreadedDetector<UltraFaceDetector>>(), return new ObjectTracker(detectorFactory(detectorName), extractor);
"body" => x.GetRequiredService<SingleThreadedDetector<YoloV10ObjectDetector>>(),
"none" => x.GetRequiredService<SingleThreadedDetector<DummyDetector>>(),
_ => new DummyDetector()
};
}); });
services.AddSingleton<ILogger, GlobalLogger>(); services.AddSingleton<ILogger, GlobalLogger>();
services.AddSingleton<IJobProcessor, JobProcessor>(); services.AddSingleton<IJobProcessor, JobProcessor>();

View File

@ -30,22 +30,7 @@ public partial class InspectorPaneViewModel : ObservableObject
return; return;
foreach (JobViewModel job in Files.Where(x => !ReferenceEquals(x, Selected))) foreach (JobViewModel job in Files.Where(x => !ReferenceEquals(x, Selected)))
{ job.CopyFrom(Selected);
job.Detect = Selected.Detect;
job.Rotate = Selected.Rotate;
job.CropText = Selected.CropText;
job.ForceFixed = Selected.ForceFixed;
job.GravitateText = Selected.GravitateText;
job.Mask = Selected.Mask;
job.OutputFolder = Selected.OutputFolder;
job.OverrideTargetDuration = Selected.OverrideTargetDuration;
job.PassthroughText = Selected.PassthroughText;
job.Enhance = Selected.Enhance;
job.ParametersList.Clear();
foreach (var param in Selected.ParametersList)
job.ParametersList.Add(param);
}
} }
public IRelayCommand RotateLeftCommand { get; } public IRelayCommand RotateLeftCommand { get; }

View File

@ -28,7 +28,7 @@ public partial class JobViewModel : ObservableObject
private readonly IThumbnailService _thumbnails; private readonly IThumbnailService _thumbnails;
private readonly DispatcherTimer _debounceTimer; private readonly DispatcherTimer _debounceTimer;
private readonly Func<string, IObjectDetector> _detectorFactory; private readonly Func<string, IObjectTracker> _trackerFactory;
private readonly ILogger _log; private readonly ILogger _log;
public string FileName => Path.GetFileName(Job.InputFile); public string FileName => Path.GetFileName(Job.InputFile);
@ -220,6 +220,19 @@ public partial class JobViewModel : ObservableObject
} }
} }
public ulong? DetectId
{
get => Job.DetectId;
set
{
if (DetectId == value)
return;
Job.DetectId = value;
OnPropertyChanged();
Task.Run(CreatePreview);
}
}
public double? OverrideTargetDuration public double? OverrideTargetDuration
{ {
get => Job.OverrideTargetDuration; get => Job.OverrideTargetDuration;
@ -231,11 +244,12 @@ public partial class JobViewModel : ObservableObject
OnPropertyChanged(); OnPropertyChanged();
} }
} }
public JobViewModel(SingleJob job, IThumbnailService thumbnails, Func<string, IObjectDetector> detectorFactory, ILogger log)
public JobViewModel(SingleJob job, IThumbnailService thumbnails, Func<string, IObjectTracker> trackerFactory, ILogger log)
{ {
Job = job; Job = job;
_thumbnails = thumbnails; _thumbnails = thumbnails;
_detectorFactory = detectorFactory; _trackerFactory = trackerFactory;
_log = log; _log = log;
ParametersList.Add(new ParameterEntry("DropoutToleranceFrames" , "")); ParametersList.Add(new ParameterEntry("DropoutToleranceFrames" , ""));
@ -271,6 +285,12 @@ public partial class JobViewModel : ObservableObject
_debounceTimer.Tick += DebounceTimerTick; _debounceTimer.Tick += DebounceTimerTick;
} }
public void CopyFrom(JobViewModel src)
{
Job.CopyFrom(src.Job);
OnPropertyChanged(string.Empty); // Refresh all properties
}
public async Task CreatePreview() public async Task CreatePreview()
{ {
if ( Probe == null) if ( Probe == null)
@ -289,7 +309,7 @@ public partial class JobViewModel : ObservableObject
Preview = new PreviewData(frame, [], null, Job.GravitateTo, pos, Job.Rotate); Preview = new PreviewData(frame, [], null, Job.GravitateTo, pos, Job.Rotate);
var detector = _detectorFactory(Job.Detect ?? ""); var tracker = _trackerFactory(Job.Detect ?? "");
var j = new SingleTask var j = new SingleTask
( (
Job : Job, Job : Job,
@ -301,31 +321,26 @@ public partial class JobViewModel : ObservableObject
SegmentLength : 1, // 1 second segment for detection SegmentLength : 1, // 1 second segment for detection
ProcessorFactory: _ => throw new NotImplementedException() ProcessorFactory: _ => throw new NotImplementedException()
); );
var detections = detector.DetectAll(j, frame.ToMatContinuous());
var (detections, primaryDetection) = tracker.SelectTrackedObject(j, frame.ToMatContinuous(), j.Job.GravitateTo);
Rect? crop = null; Rect? crop = null;
if (detections.Count > 0) var w = Probe.Width;
{ var h = Probe.Height;
var primaryDetection = detections
.OrderByDescending(d => d.Box.Height * d.Box.Width)
.FirstOrDefault();
var w = Probe.Width; var cropWidth = Job.Crop?.width ?? CommandLine.DefaultW;
var h = Probe.Height; var cropHeight = Job.Crop?.height ?? CommandLine.DefaultH;
var cropWidth = Job.Crop?.width ?? CommandLine.DefaultW; var p = primaryDetection?.Center ?? new Point2f(w * Job.GravitateTo.X, h * Job.GravitateTo.Y);
var cropHeight = Job.Crop?.height ?? CommandLine.DefaultH;
var cx = primaryDetection.Center.X - cropWidth / 2f; var cx = p.X - cropWidth / 2f;
var cy = primaryDetection.Center.Y - cropHeight / 2f; var cy = p.Y - cropHeight / 2f;
var r = new Rect(cx, cy, cropWidth, cropHeight); var r = new Rect(cx, cy, cropWidth, cropHeight);
crop = ClampCrop(r, w, h); crop = ClampCrop(r, w, h);
}
var boxes = detections.Select(x => x.Box).ToList(); Preview = new PreviewData(frame, detections ?? [], crop, Job.GravitateTo, pos, Job.Rotate);
Preview = new PreviewData(frame, boxes, crop, Job.GravitateTo, pos, Job.Rotate);
} }
catch (Exception ex) catch (Exception ex)
{ {

View File

@ -35,6 +35,18 @@ public partial class PreviewPaneViewModel : ObservableObject
} }
} }
public ulong? TrackedId
{
get => Selected?.DetectId;
set
{
if (Selected == null)
return;
Selected.DetectId = value;
OnPropertyChanged(nameof(TrackedId));
}
}
partial void OnSelectedChanged(JobViewModel? oldValue, JobViewModel? newValue) partial void OnSelectedChanged(JobViewModel? oldValue, JobViewModel? newValue)
{ {
if (oldValue != null) if (oldValue != null)
@ -46,6 +58,8 @@ public partial class PreviewPaneViewModel : ObservableObject
OnPropertyChanged(nameof(Preview)); OnPropertyChanged(nameof(Preview));
OnPropertyChanged(nameof(Sar)); OnPropertyChanged(nameof(Sar));
OnPropertyChanged(nameof(Rotate)); OnPropertyChanged(nameof(Rotate));
OnPropertyChanged(nameof(TrackedId));
OnPropertyChanged(nameof(DetectAbove));
} }
private void SelectedPropertyChanged(object? sender, PropertyChangedEventArgs e) private void SelectedPropertyChanged(object? sender, PropertyChangedEventArgs e)

View File

@ -9,6 +9,21 @@ x:DataType="vm:InspectorPaneViewModel">
<ScrollViewer> <ScrollViewer>
<StackPanel Spacing="2"> <StackPanel Spacing="2">
<StackPanel Orientation="Horizontal"
HorizontalAlignment="Right"
Spacing="8"
Margin="0,10,0,0">
<Button Content="Apply to Selected"
Command="{Binding ApplyOverridesCommand}"/>
<Button Content="Transform all"
Background="#AA0000"
Foreground="White"
Command="{Binding TransformAllCommand}"/>
</StackPanel>
<TextBlock Text="Parameters" FontSize="10" Margin="0,0,0,10" FontWeight="Bold"/> <TextBlock Text="Parameters" FontSize="10" Margin="0,0,0,10" FontWeight="Bold"/>
<!-- InputFile --> <!-- InputFile -->
@ -111,6 +126,12 @@ x:DataType="vm:InspectorPaneViewModel">
<TextBox Text="{Binding Selected.DetectAbove}" Width="160"/> <TextBox Text="{Binding Selected.DetectAbove}" Width="160"/>
</StackPanel> </StackPanel>
<!-- DetectId -->
<StackPanel Orientation="Horizontal" Spacing="8">
<TextBlock Text="Object to track" Width="120"/>
<TextBox Text="{Binding Selected.DetectId}" Width="160"/>
</StackPanel>
<!-- OverrideTargetDuration --> <!-- OverrideTargetDuration -->
<StackPanel Orientation="Horizontal" Spacing="8"> <StackPanel Orientation="Horizontal" Spacing="8">
<TextBlock Text="Target Duration" Width="120"/> <TextBlock Text="Target Duration" Width="120"/>
@ -182,22 +203,6 @@ x:DataType="vm:InspectorPaneViewModel">
<TextBox Text="{Binding Selected.PassthroughText}" Width="260"/> <TextBox Text="{Binding Selected.PassthroughText}" Width="260"/>
</StackPanel> </StackPanel>
<StackPanel Orientation="Horizontal"
HorizontalAlignment="Right"
Spacing="8"
Margin="0,10,0,0">
<Button Content="Apply to Selected"
Command="{Binding ApplyOverridesCommand}"/>
<Button Content="Transform all"
Background="#AA0000"
Foreground="White"
Command="{Binding TransformAllCommand}"/>
</StackPanel>
</StackPanel> </StackPanel>
</ScrollViewer> </ScrollViewer>
</Border> </Border>

View File

@ -8,7 +8,7 @@
x:DataType="vm:MainViewModel" x:DataType="vm:MainViewModel"
x:Name="Root" x:Name="Root"
Width="1800" Width="1800"
Height="830" Height="870"
Title="Splitter UI" Title="Splitter UI"
Icon="avares://Splitter-UI/Assets/splitter.png"> Icon="avares://Splitter-UI/Assets/splitter.png">

View File

@ -1,4 +1,5 @@
using System.ComponentModel; using System.ComponentModel;
using System.Globalization;
using Avalonia; using Avalonia;
using Avalonia.Controls; using Avalonia.Controls;
using Avalonia.Input; using Avalonia.Input;
@ -17,10 +18,10 @@ public sealed class PreviewCanvas : Control
AvaloniaProperty.Register<PreviewCanvas, int>(nameof(RotateAngle)); AvaloniaProperty.Register<PreviewCanvas, int>(nameof(RotateAngle));
public static readonly StyledProperty<Point2f> GravitateToProperty = public static readonly StyledProperty<Point2f> GravitateToProperty =
AvaloniaProperty.Register<PreviewCanvas, Point2f>(nameof(GravitateTo)); AvaloniaProperty.Register<PreviewCanvas, Point2f>(nameof(GravitateTo));
// normalized 0..1 from top of frame
public static readonly StyledProperty<float> DetectAboveProperty = public static readonly StyledProperty<float> DetectAboveProperty =
AvaloniaProperty.Register<PreviewCanvas, float>(nameof(DetectAbove), 0.2f); AvaloniaProperty.Register<PreviewCanvas, float>(nameof(DetectAbove), 0.2f);
public static readonly StyledProperty<ulong?> DetectIdProperty =
AvaloniaProperty.Register<PreviewCanvas, ulong?>(nameof(DetectId));
public PreviewData? Preview public PreviewData? Preview
{ {
@ -47,6 +48,12 @@ public sealed class PreviewCanvas : Control
set => SetValue(GravitateToProperty, value); set => SetValue(GravitateToProperty, value);
} }
public ulong? DetectId
{
get => GetValue(DetectIdProperty);
set => SetValue(DetectIdProperty, value);
}
// DetectAbove is normalized (0..1) from top // DetectAbove is normalized (0..1) from top
public float DetectAbove public float DetectAbove
{ {
@ -181,30 +188,22 @@ public sealed class PreviewCanvas : Control
h * scale); h * scale);
} }
// ------------------------------------------------------------ private void GetAspects(
// Hit test for gravitate point (normalized) PreviewData preview,
// ------------------------------------------------------------ out int rawW,
out int rawH,
private bool HitGravitate(Avalonia.Point p, out Point2f value) out int rotate,
out float pixelAspect,
out double scale,
out double offsetX,
out double offsetY)
{ {
value = default; rawW = preview.Frame!.PixelSize.Width;
rawH = preview.Frame.PixelSize.Height;
rotate = RotateAngle;
var preview = Preview;
if (preview?.Frame is null)
return false;
var g = GravitateTo;
var rawW = preview.Frame.PixelSize.Width;
var rawH = preview.Frame.PixelSize.Height;
// normalized → pixel
double px = g.X * rawW;
double py = g.Y * rawH;
var rotate = RotateAngle;
var sar = Sar ?? new Point2f(1, 1); var sar = Sar ?? new Point2f(1, 1);
var pixelAspect = sar.X / sar.Y; pixelAspect = sar.X / sar.Y;
var dispW = Bounds.Width; var dispW = Bounds.Width;
var dispH = Bounds.Height; var dispH = Bounds.Height;
@ -221,9 +220,32 @@ public sealed class PreviewCanvas : Control
displayH = rawH * pixelAspect; displayH = rawH * pixelAspect;
} }
var scale = Math.Min(dispW / displayW, dispH / displayH); scale = Math.Min(dispW / displayW, dispH / displayH);
var offsetX = (dispW - displayW * scale) / 2; offsetX = (dispW - displayW * scale) / 2;
var offsetY = (dispH - displayH * scale) / 2; offsetY = (dispH - displayH * scale) / 2;
}
// ------------------------------------------------------------
// Hit test for gravitate point (normalized)
// ------------------------------------------------------------
private bool HitGravitate(Avalonia.Point p, out Point2f value)
{
value = default;
var preview = Preview;
if (preview?.Frame is null)
return false;
var g = GravitateTo;
int rawW, rawH, rotate;
float pixelAspect;
double scale, offsetX, offsetY;
GetAspects(preview, out rawW, out rawH, out rotate, out pixelAspect, out scale, out offsetX, out offsetY);
double px = g.X * rawW;
double py = g.Y * rawH;
var (cx, cy) = TransformPoint( var (cx, cy) = TransformPoint(
px, py, px, py,
@ -254,31 +276,10 @@ public sealed class PreviewCanvas : Control
if (preview?.Frame is null) if (preview?.Frame is null)
return false; return false;
var rawW = preview.Frame.PixelSize.Width; int rawW, rawH, rotate;
var rawH = preview.Frame.PixelSize.Height; float pixelAspect;
double scale, offsetX, offsetY;
var rotate = RotateAngle; GetAspects(preview, out rawW, out rawH, out rotate, out pixelAspect, out scale, out offsetX, out offsetY);
var sar = Sar ?? new Point2f(1, 1);
var pixelAspect = sar.X / sar.Y;
var dispW = Bounds.Width;
var dispH = Bounds.Height;
double displayW, displayH;
if (rotate == 0 || rotate == 180)
{
displayW = rawW * pixelAspect;
displayH = rawH;
}
else
{
displayW = rawW;
displayH = rawH * pixelAspect;
}
var scale = Math.Min(dispW / displayW, dispH / displayH);
var offsetX = (dispW - displayW * scale) / 2;
var offsetY = (dispH - displayH * scale) / 2;
var da = DetectAbove; var da = DetectAbove;
var py = da * rawH; var py = da * rawH;
@ -301,6 +302,42 @@ public sealed class PreviewCanvas : Control
return hit; return hit;
} }
// ------------------------------------------------------------
// Hit test for detected boxes
// ------------------------------------------------------------
private bool HitDetectedBox(Avalonia.Point p, out ulong? value)
{
value = null;
var preview = Preview;
if (preview?.Frame is null)
return false;
int rawW, rawH, rotate;
float pixelAspect;
double scale, offsetX, offsetY;
GetAspects(preview, out rawW, out rawH, out rotate, out pixelAspect, out scale, out offsetX, out offsetY);
var frame = preview.Frame;
foreach (var box in preview.DetectedBoxes)
{
var rect = TransformRect(
box.Box.X, box.Box.Y, box.Box.Width, box.Box.Height,
frame.PixelSize.Width, frame.PixelSize.Height,
offsetX, offsetY, scale,
RotateAngle,
Sar?.X / Sar?.Y ?? 1);
if (rect.Contains(p))
{
value = box.Id;
return true;
}
}
return false;
}
// ------------------------------------------------------------ // ------------------------------------------------------------
// Pointer events // Pointer events
// ------------------------------------------------------------ // ------------------------------------------------------------
@ -324,6 +361,12 @@ public sealed class PreviewCanvas : Control
_dragStartCanvas = p; _dragStartCanvas = p;
_dragStartDetectAbove = da; // normalized _dragStartDetectAbove = da; // normalized
e.Pointer.Capture(this); e.Pointer.Capture(this);
return;
}
if (HitDetectedBox(p, out var id))
{
DetectId = id;
} }
} }
@ -337,29 +380,10 @@ public sealed class PreviewCanvas : Control
var dxCanvas = p.X - _dragStartCanvas.X; var dxCanvas = p.X - _dragStartCanvas.X;
var dyCanvas = p.Y - _dragStartCanvas.Y; var dyCanvas = p.Y - _dragStartCanvas.Y;
var rawW = preview.Frame.PixelSize.Width; int rawW, rawH, rotate;
var rawH = preview.Frame.PixelSize.Height; float pixelAspect;
double scale, offsetX, offsetY;
var rotate = RotateAngle; GetAspects(preview, out rawW, out rawH, out rotate, out pixelAspect, out scale, out offsetX, out offsetY);
var sar = Sar ?? new Point2f(1, 1);
var pixelAspect = sar.X / sar.Y;
var dispW = Bounds.Width;
var dispH = Bounds.Height;
double displayW, displayH;
if (rotate == 0 || rotate == 180)
{
displayW = rawW * pixelAspect;
displayH = rawH;
}
else
{
displayW = rawW;
displayH = rawH * pixelAspect;
}
var scale = Math.Min(dispW / displayW, dispH / displayH);
var dx = dxCanvas / scale; var dx = dxCanvas / scale;
var dy = dyCanvas / scale; var dy = dyCanvas / scale;
@ -480,7 +504,6 @@ public sealed class PreviewCanvas : Control
{ {
var g = GravitateTo; var g = GravitateTo;
// normalized → pixel
var px = g.X * rawW; var px = g.X * rawW;
var py = g.Y * rawH; var py = g.Y * rawH;
@ -516,18 +539,24 @@ public sealed class PreviewCanvas : Control
return; return;
var pen = new Pen(Brushes.Lime, 2); var pen = new Pen(Brushes.Lime, 2);
var selectedPen = new Pen(Brushes.Magenta, 2);
foreach (var r in preview.DetectedBoxes) var detected = preview.DetectedBoxes.ToList();
foreach (var r in detected)
{ {
var rr = TransformRect( var rr = TransformRect(
r.X, r.Y, r.Width, r.Height, r.Box.X, r.Box.Y, r.Box.Width, r.Box.Height,
rawW, rawH, rawW, rawH,
offsetX, offsetY, offsetX, offsetY,
scale, scale,
rotate, rotate,
pixelAspect); pixelAspect);
context.DrawRectangle(null, pen, rr); context.DrawRectangle(null, r.Id == DetectId ? selectedPen : pen, rr);
context.DrawText(
new FormattedText($"ID: {r.Id}", CultureInfo.CurrentCulture, FlowDirection.LeftToRight, Typeface.Default, 12, r.Id == DetectId ? Brushes.Magenta : Brushes.Lime),
new Avalonia.Point(rr.X + 5, rr.Y + 5));
} }
} }

View File

@ -16,6 +16,7 @@
RotateAngle="{Binding Rotate}" RotateAngle="{Binding Rotate}"
GravitateTo="{Binding GravitateTo, Mode=TwoWay}" GravitateTo="{Binding GravitateTo, Mode=TwoWay}"
DetectAbove="{Binding DetectAbove, Mode=TwoWay}" DetectAbove="{Binding DetectAbove, Mode=TwoWay}"
DetectId="{Binding Selected.DetectId, Mode=TwoWay}"
/> />
<Grid Grid.Row="1" <Grid Grid.Row="1"

View File

@ -86,6 +86,14 @@ public sealed class CommandLine
else else
throw new FormatException($"Invalid --rotate value: {val}"); throw new FormatException($"Invalid --rotate value: {val}");
} }
else if (arg.StartsWith("--detect-id="))
{
var val = arg.Substring("--detect-id=".Length);
if (ulong.TryParse(val, out var detectId))
Master.DetectId = detectId;
else
throw new FormatException($"Invalid --detect-id value: {val}");
}
else if (arg.StartsWith("--crop=")) else if (arg.StartsWith("--crop="))
{ {
Master.Crop = ParseCrop(arg.Substring("--crop=".Length)); Master.Crop = ParseCrop(arg.Substring("--crop=".Length));
@ -164,24 +172,11 @@ public sealed class CommandLine
var files = inputFiles.SelectMany(x => FileMaskExpander.Expand(x)); var files = inputFiles.SelectMany(x => FileMaskExpander.Expand(x));
Jobs = files.Select(x => new SingleJob Jobs = files.Select(x =>
{ {
InputFile = x, var job = new SingleJob { InputFile = x };
OutputFolder = Master.OutputFolder, Master.CopyTo(job);
Crop = Master.Crop, return job;
GravitateTo = Master.GravitateTo,
Mask = Master.Mask,
Debug = Master.Debug,
Detect = Master.Detect,
OverrideTargetDuration = Master.OverrideTargetDuration,
Passthrough = Master.Passthrough,
PlainText = Master.PlainText,
EstimateOnly = Master.EstimateOnly,
ForceFixed = Master.ForceFixed,
SingleThreaded = Master.SingleThreaded,
Rotate = Master.Rotate,
RotateAuto = Master.RotateAuto,
Parameters = new Dictionary<string, string>(Master.Parameters)
}).ToArray(); }).ToArray();
if ( Jobs.Length == 0) if ( Jobs.Length == 0)
@ -370,6 +365,11 @@ Options:
--detect-above=<0-1> Face or human detectors should only report detections if their upper bound starts below this threshold. --detect-above=<0-1> Face or human detectors should only report detections if their upper bound starts below this threshold.
This is a value between 0.0 and 1.0 mapped to 0..Height. This is a value between 0.0 and 1.0 mapped to 0..Height.
--detect-id=<hex> Object ID to track. This is a hexadecimal string that identifies a specific face or
person to track across segments. This is useful when you want to consistently track the same person
across all segments of a video, even if there are multiple people present.
The ID can be obtained when running with --debug or from the debug overlay.
--gravitate=<x:y> Gravitate towards a specific point (x, y) in the video frame when tracking. --gravitate=<x:y> Gravitate towards a specific point (x, y) in the video frame when tracking.
Coordinates are normalized (0.0 to 1.0). Coordinates are normalized (0.0 to 1.0).
Example: --gravitate=0.2:0.5 (gravitate towards left-center) Example: --gravitate=0.2:0.5 (gravitate towards left-center)

View File

@ -54,6 +54,13 @@ public class SingleJob
/// </summary> /// </summary>
public float DetectAbove { get; set; } = 0.7f; public float DetectAbove { get; set; } = 0.7f;
/// <summary> /// <summary>
/// Object ID to track. This is a hexadecimal string that identifies a specific face or
/// person to track across segments. This is useful when you want to consistently track the same person
/// publacross all segments of a video, even if there are multiple people present
/// The ID can be obtained when running with --debug or from the debug overlay.
/// </summary>
public ulong? DetectId { get; set; }
/// <summary>
/// Set starget segments length explicitly. By default, the splitter calculates segment /// Set starget segments length explicitly. By default, the splitter calculates segment
/// lengths to be equal and not exceed 58 seconds. /// lengths to be equal and not exceed 58 seconds.
/// </summary> /// </summary>
@ -128,4 +135,28 @@ public class SingleJob
} }
} }
public void CopyTo(SingleJob target)
{
target.OutputFolder = OutputFolder;
target.Crop = Crop;
target.GravitateTo = GravitateTo;
target.Mask = Mask;
target.Debug = Debug;
target.Detect = Detect;
target.ScoreThreshold = ScoreThreshold;
target.DetectAbove = DetectAbove;
target.DetectId = DetectId;
target.OverrideTargetDuration = OverrideTargetDuration;
target.Passthrough = Passthrough.ToArray();
target.PlainText = PlainText;
target.EstimateOnly = EstimateOnly;
target.ForceFixed = ForceFixed;
target.SingleThreaded = SingleThreaded;
target.Rotate = Rotate;
target.RotateAuto = RotateAuto;
target.Parameters = new Dictionary<string, string>(Parameters);
target.Enhance = Enhance;
}
public void CopyFrom(SingleJob source) => source.CopyTo(this);
} }

View File

@ -2,5 +2,5 @@
public interface IObjectTracker public interface IObjectTracker
{ {
(List<DetectedPerson>, DetectedPerson?) SelectTrackedObject(SingleTask job, Mat frameMat, Point2f? lastMeasurement); (List<DetectedPerson> objects, DetectedPerson? primary) SelectTrackedObject(SingleTask job, Mat frameMat, Point2f? lastMeasurement);
} }

View File

@ -0,0 +1,78 @@
using System;
using System.Collections.Generic;
using System.Text;
namespace splitter.algo;
public sealed class IdentityCache
{
private sealed class Identity
{
public ulong Id;
public float[] Embedding; // EMA
public int Samples;
}
private readonly List<Identity> _ids = new();
private ulong _nextId = 1;
private const float Threshold = 0.35f; // good for OSNet
private const float EmaAlpha = 0.2f;
public ulong ResolveId(float[] embedding)
{
if (_ids.Count == 0)
return CreateNew(embedding);
int bestIndex = -1;
float bestDist = float.MaxValue;
for (int i = 0; i < _ids.Count; i++)
{
float d = CosineDistance(_ids[i].Embedding, embedding);
if (d < bestDist)
{
bestDist = d;
bestIndex = i;
}
}
if (bestDist <= Threshold)
{
UpdateEma(_ids[bestIndex].Embedding, embedding);
_ids[bestIndex].Samples++;
return _ids[bestIndex].Id;
}
return CreateNew(embedding);
}
private ulong CreateNew(float[] embedding)
{
var id = _nextId++;
_ids.Add(new Identity
{
Id = id,
Embedding = embedding.ToArray(),
Samples = 1
});
return id;
}
private static float CosineDistance(float[] a, float[] b)
{
float dot = 0f;
for (int i = 0; i < a.Length; i++)
dot += a[i] * b[i];
return 1f - dot;
}
private static void UpdateEma(float[] ema, float[] v)
{
for (int i = 0; i < ema.Length; i++)
ema[i] = ema[i] * (1 - EmaAlpha) + v[i] * EmaAlpha;
}
}

View File

@ -2,17 +2,21 @@
public class ObjectTracker(IObjectDetector _detector, IEmbeddingExtractor _embeddingExtractor) : IObjectTracker public class ObjectTracker(IObjectDetector _detector, IEmbeddingExtractor _embeddingExtractor) : IObjectTracker
{ {
public (List<DetectedPerson> /*objects*/, DetectedPerson? /*primary*/) SelectTrackedObject(SingleTask job, Mat frameMat, Point2f? lastMeasurement) private readonly IdentityCache _identityCache = new();
public (List<DetectedPerson> objects, DetectedPerson? primary) SelectTrackedObject(SingleTask job, Mat frameMat, Point2f? lastMeasurement)
{ {
var objects = _detector.DetectAll(job, frameMat) ?? []; var objects = _detector.DetectAll(job, frameMat) ?? [];
// Ignore detections starting in the lower 1/2 of the frame // filter by DetectAbove
objects = objects.Where(o => o.Center.Y <= frameMat.Height * job.Job.DetectAbove).ToList(); objects = objects
.Where(o => o.Center.Y <= frameMat.Height * job.Job.DetectAbove)
.ToList();
// attach embeddings to all persons // attach embeddings
for (int i = 0; i < objects.Count; i++) for (int i = 0; i < objects.Count; i++)
{ {
var p = objects[i]; // copy struct var p = objects[i];
var rect = p.Box; var rect = p.Box;
@ -21,38 +25,32 @@ public class ObjectTracker(IObjectDetector _detector, IEmbeddingExtractor _embed
rect.Width = Math.Clamp(rect.Width, 1, frameMat.Width - rect.X); rect.Width = Math.Clamp(rect.Width, 1, frameMat.Width - rect.X);
rect.Height = Math.Clamp(rect.Height, 1, frameMat.Height - rect.Y); rect.Height = Math.Clamp(rect.Height, 1, frameMat.Height - rect.Y);
var embedding = _embeddingExtractor.Extract(frameMat, rect); var embedding = _embeddingExtractor.Extract(frameMat, rect).ToArray(); // make a copy of the embedding array
p.Id = HashEmbedding(embedding); // assign ID based on embedding hash p.Id = _identityCache.ResolveId(embedding);
objects[i] = p; // write back objects[i] = p;
} }
var primary = SelectPrimaryObject(objects, lastMeasurement); // DeepSeek tracker assigns stable IDs
var primary = SelectPrimaryObject(objects, lastMeasurement, job.Job.DetectId);
return (objects, primary); return (objects, primary);
} }
private static ulong HashEmbedding(float[] emb)
{
unchecked
{
ulong hash = 146527;
for (int i = 0; i < emb.Length; i++)
{
// convert float to int bits
uint bits = (uint)BitConverter.SingleToInt32Bits(emb[i]);
hash = (hash * 16777619) ^ bits;
}
return hash;
}
}
private DetectedPerson? SelectPrimaryObject( private DetectedPerson? SelectPrimaryObject(
List<DetectedPerson> foundObjects, List<DetectedPerson> foundObjects,
Point2f? previousCenter) Point2f? previousCenter,
ulong? detectId)
{ {
if (foundObjects == null || foundObjects.Count == 0) if (foundObjects == null || foundObjects.Count == 0)
return null; return null;
if (detectId != null)
{
var match = foundObjects.FirstOrDefault(o => o.Id == detectId.Value);
if (match.Id != 0) // default struct has Id=0, so this means we found a match
return match;
}
if (!previousCenter.HasValue) if (!previousCenter.HasValue)
{ {
var bestIndex = 0; var bestIndex = 0;