Optimised version of TrackingSplitter. Fixed tensor format in YOLO detector.

This commit is contained in:
Alexander Shabarshov 2026-05-11 12:41:08 +01:00
parent 2e8dc800a5
commit 385e1c63e0
2 changed files with 91 additions and 71 deletions

View File

@ -1,5 +1,8 @@
using System.Diagnostics; using System;
using System.Diagnostics;
using System.Globalization;
using System.Runtime.InteropServices; using System.Runtime.InteropServices;
using System.Threading.Tasks;
using OpenCvSharp; using OpenCvSharp;
namespace splitter; namespace splitter;
@ -12,7 +15,7 @@ public class TrackingSplitter(
private const int LostFreezeFrames = 60; // 2 seconds at 30 FPS private const int LostFreezeFrames = 60; // 2 seconds at 30 FPS
private const float CameraEasing = 0.03f; private const float CameraEasing = 0.03f;
enum TrackState private enum TrackState
{ {
Tracking, Tracking,
LostFreeze, LostFreeze,
@ -61,12 +64,19 @@ public class TrackingSplitter(
using var stdin = ffmpeg.StandardInput.BaseStream; using var stdin = ffmpeg.StandardInput.BaseStream;
var frame = new Mat(); // Reusable frame and output mat
using var frame = new Mat();
using var outputBgr = new Mat(encHeight, encWidth, MatType.CV_8UC3);
// Reusable raw video buffer
var frameBytes = encWidth * encHeight * 3;
var videoBuffer = new byte[frameBytes];
var kalman = new KalmanTracker(); var kalman = new KalmanTracker();
kalman.Reset(new Point2f(videoWidth / 2f, videoHeight / 2f)); kalman.Reset(new Point2f(videoWidth / 2f, videoHeight / 2f));
var lostFrames = 0; var lostFrames = 0;
var reacquireCounter = 0; var reacquireCounter = 0; // kept for overlay display
var cameraCenter = new Point2f(videoWidth / 2f, videoHeight / 2f); var cameraCenter = new Point2f(videoWidth / 2f, videoHeight / 2f);
var startTime = DateTime.UtcNow; var startTime = DateTime.UtcNow;
@ -77,12 +87,10 @@ public class TrackingSplitter(
if (!capture.Read(frame) || frame.Empty()) if (!capture.Read(frame) || frame.Empty())
break; break;
Mat frameCont = frame.IsContinuous() ? frame : frame.Clone();
Rect? objectBox = null; Rect? objectBox = null;
Point2f? objectCenter = null; Point2f? objectCenter = null;
var objects = detector.DetectAll(frameCont, videoWidth, videoHeight); var objects = detector.DetectAll(frame, videoWidth, videoHeight);
var primary = SelectTrackedObject(objects, kalman.LastMeasurement); var primary = SelectTrackedObject(objects, kalman.LastMeasurement);
if (primary.HasValue) if (primary.HasValue)
@ -93,22 +101,20 @@ public class TrackingSplitter(
bool isLost = !objectCenter.HasValue; bool isLost = !objectCenter.HasValue;
// ------------------------------
// LOST / REACQUIRE STATE MACHINE // LOST / REACQUIRE STATE MACHINE
// ------------------------------
if (isLost) if (isLost)
{ {
lostFrames++; lostFrames++;
if (lostFrames <= LostFreezeFrames) if (lostFrames <= LostFreezeFrames)
{ {
// 1) LOST_FREEZE: freeze camera // LOST_FREEZE: freeze camera
state = TrackState.LostFreeze; state = TrackState.LostFreeze;
objectCenter = null; // Kalman predicts but camera won't move objectCenter = null; // Kalman predicts but camera won't move
} }
else else
{ {
// 2) LOST_DRIFT: drift camera to center // LOST_DRIFT: drift camera to center
state = TrackState.LostDrift; state = TrackState.LostDrift;
objectCenter = new Point2f(videoWidth / 2f, videoHeight / 2f); objectCenter = new Point2f(videoWidth / 2f, videoHeight / 2f);
} }
@ -120,16 +126,13 @@ public class TrackingSplitter(
lostFrames = 0; lostFrames = 0;
} }
// ------------------------------ // KALMAN + CAMERA UPDATE
// KALMAN UPDATE
// ------------------------------
Point2f smoothedCenter; Point2f smoothedCenter;
if (state == TrackState.Tracking) if (state == TrackState.Tracking)
{ {
smoothedCenter = kalman.Update(objectCenter); smoothedCenter = kalman.Update(objectCenter);
// Normal camera easing
float easing = 0.015f; // faster tracking float easing = 0.015f; // faster tracking
cameraCenter = new Point2f( cameraCenter = new Point2f(
cameraCenter.X + (smoothedCenter.X - cameraCenter.X) * easing, cameraCenter.X + (smoothedCenter.X - cameraCenter.X) * easing,
@ -144,7 +147,6 @@ public class TrackingSplitter(
{ {
smoothedCenter = kalman.Update(objectCenter); smoothedCenter = kalman.Update(objectCenter);
// Drift camera slowly to center
float driftEasing = 0.01f; float driftEasing = 0.01f;
var fallbackCenter = new Point2f(videoWidth / 2f, videoHeight / 2f); var fallbackCenter = new Point2f(videoWidth / 2f, videoHeight / 2f);
@ -152,6 +154,7 @@ public class TrackingSplitter(
cameraCenter.X + (fallbackCenter.X - cameraCenter.X) * driftEasing, cameraCenter.X + (fallbackCenter.X - cameraCenter.X) * driftEasing,
cameraCenter.Y + (fallbackCenter.Y - cameraCenter.Y) * driftEasing); cameraCenter.Y + (fallbackCenter.Y - cameraCenter.Y) * driftEasing);
} }
var halfW = originalCropWidth / 2f; var halfW = originalCropWidth / 2f;
var halfH = originalCropHeight / 2f; var halfH = originalCropHeight / 2f;
@ -160,7 +163,6 @@ public class TrackingSplitter(
if (state == TrackState.Tracking) if (state == TrackState.Tracking)
{ {
// Normal tracking
smoothedCenter = kalman.Update(objectCenter); smoothedCenter = kalman.Update(objectCenter);
cameraCenter = new Point2f( cameraCenter = new Point2f(
@ -173,7 +175,6 @@ public class TrackingSplitter(
} }
else if (state == TrackState.LostDrift) else if (state == TrackState.LostDrift)
{ {
// Drift camera slowly to center
var fallbackCenter = new Point2f(videoWidth / 2f, videoHeight / 2f); var fallbackCenter = new Point2f(videoWidth / 2f, videoHeight / 2f);
cameraCenter = new Point2f( cameraCenter = new Point2f(
@ -194,57 +195,48 @@ public class TrackingSplitter(
if (debugOverlay) if (debugOverlay)
{ {
// overlays always drawn on frameCont // overlays always drawn on frame
if (objectBox.HasValue) if (objectBox.HasValue)
{ {
var fb = objectBox.Value; var fb = objectBox.Value;
Cv2.Rectangle(frameCont, Cv2.Rectangle(frame,
new Rect(fb.X, fb.Y, fb.Width, fb.Height), new Rect(fb.X, fb.Y, fb.Width, fb.Height),
Scalar.LimeGreen, 2); Scalar.LimeGreen, 2);
} }
Cv2.Circle(frameCont, Cv2.Circle(frame,
new Point((int)smoothedCenter.X, (int)smoothedCenter.Y), new Point((int)smoothedCenter.X, (int)smoothedCenter.Y),
6, Scalar.LimeGreen, -1); 6, Scalar.LimeGreen, -1);
Cv2.Rectangle(frameCont, roi, Cv2.Rectangle(frame, roi,
objectCenter.HasValue ? Scalar.Yellow : Scalar.Red, 3); objectCenter.HasValue ? Scalar.Yellow : Scalar.Red, 3);
DrawText(frameCont, $"Faces: {objects.Count}", 20, 40, Scalar.White); DrawText(frame, $"Faces: {objects.Count}", 20, 40, Scalar.White);
DrawText(frameCont, $"LostFrames: {lostFrames}", 20, 70, Scalar.White); DrawText(frame, $"LostFrames: {lostFrames}", 20, 70, Scalar.White);
DrawText(frameCont, $"Reacquire: {reacquireCounter}", 20, 100, Scalar.White); DrawText(frame, $"Reacquire: {reacquireCounter}", 20, 100, Scalar.White);
DrawText(frameCont, $"Noise: {kalman.CurrentNoise:F3}", 20, 130, Scalar.White); DrawText(frame, $"Noise: {kalman.CurrentNoise:F3}", 20, 130, Scalar.White);
DrawText(frameCont, $"Camera: {cameraCenter.X:F1},{cameraCenter.Y:F1}", 20, 160, Scalar.White); DrawText(frame, $"Camera: {cameraCenter.X:F1},{cameraCenter.Y:F1}", 20, 160, Scalar.White);
} }
if (debugOverlay) if (debugOverlay)
{ {
// DEBUG MODE: write FULL FRAME with overlays // DEBUG MODE: write FULL FRAME with overlays
var bgr = frameCont.IsContinuous() ? frameCont : frameCont.Clone(); // Ensure contiguous buffer by copying into preallocated outputBgr
frame.CopyTo(outputBgr);
var bytes = bgr.Rows * bgr.Cols * bgr.ElemSize(); Marshal.Copy(outputBgr.Data, videoBuffer, 0, frameBytes);
var buffer = new byte[bytes]; stdin.Write(videoBuffer, 0, frameBytes);
Marshal.Copy(bgr.Data, buffer, 0, bytes);
stdin.Write(buffer, 0, bytes);
if (!ReferenceEquals(bgr, frameCont))
bgr.Dispose();
} }
else else
{ {
// PRODUCTION MODE: actual crop // PRODUCTION MODE: actual crop
using var cropped = new Mat(frameCont, roi); using var cropped = new Mat(frame, roi);
using var bgr = cropped.Clone(); cropped.CopyTo(outputBgr);
var bytes = bgr.Rows * bgr.Cols * bgr.ElemSize(); Marshal.Copy(outputBgr.Data, videoBuffer, 0, frameBytes);
var buffer = new byte[bytes]; stdin.Write(videoBuffer, 0, frameBytes);
Marshal.Copy(bgr.Data, buffer, 0, bytes);
stdin.Write(buffer, 0, bytes);
} }
if (!ReferenceEquals(frameCont, frame))
frameCont.Dispose();
var elapsed = DateTime.UtcNow - startTime; var elapsed = DateTime.UtcNow - startTime;
var progress = (double)i / totalFrames; var progress = (double)i / totalFrames;
var speed = i > 0 ? i / elapsed.TotalSeconds : 0.0; var speed = i > 0 ? i / elapsed.TotalSeconds : 0.0;
@ -272,19 +264,46 @@ public class TrackingSplitter(
if (!previousCenter.HasValue) if (!previousCenter.HasValue)
{ {
return foundObjects // Largest area
.OrderByDescending(f => f.box.Width * f.box.Height) var bestIndex = 0;
.First(); var bestArea = float.MinValue;
for (int i = 0; i < foundObjects.Count; i++)
{
var f = foundObjects[i];
var area = f.box.Width * f.box.Height;
if (area > bestArea)
{
bestArea = area;
bestIndex = i;
}
} }
return foundObjects return foundObjects[bestIndex];
.OrderBy(f => }
else
{ {
var dx = f.center.X - previousCenter.Value.X; // Closest to previous center
var dy = f.center.Y - previousCenter.Value.Y; var prev = previousCenter.Value;
return dx * dx + dy * dy; var bestIndex = 0;
}) var bestDist2 = float.MaxValue;
.First();
for (int i = 0; i < foundObjects.Count; i++)
{
var f = foundObjects[i];
var dx = f.center.X - prev.X;
var dy = f.center.Y - prev.Y;
var d2 = dx * dx + dy * dy;
if (d2 < bestDist2)
{
bestDist2 = d2;
bestIndex = i;
}
}
return foundObjects[bestIndex];
}
} }
private Process StartFfmpegNvenc( private Process StartFfmpegNvenc(
@ -297,8 +316,8 @@ public class TrackingSplitter(
string[] passthrough) string[] passthrough)
{ {
var pass = passthrough.Length > 0 ? string.Join(" ", passthrough) : ""; var pass = passthrough.Length > 0 ? string.Join(" ", passthrough) : "";
var skipSeconds = skip.TotalSeconds.ToString("0.###", System.Globalization.CultureInfo.InvariantCulture); var skipSeconds = skip.TotalSeconds.ToString("0.###", CultureInfo.InvariantCulture);
var fpsStr = fps.ToString("0.###", System.Globalization.CultureInfo.InvariantCulture); var fpsStr = fps.ToString("0.###", CultureInfo.InvariantCulture);
var args = var args =
"-y " + "-y " +
@ -337,10 +356,9 @@ public class TrackingSplitter(
return process; return process;
} }
void DrawText(Mat img, string text, int x, int y, Scalar color) private static void DrawText(Mat img, string text, int x, int y, Scalar color)
{ {
Cv2.PutText(img, text, new Point(x, y), Cv2.PutText(img, text, new Point(x, y),
HersheyFonts.HersheySimplex, 0.6, color, 2); HersheyFonts.HersheySimplex, 0.6, color, 2);
} }
} }

View File

@ -163,12 +163,13 @@ public sealed class YoloOnnxObjectDetector : LoggingBase, IObjectDetector, IDisp
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
private void FillInputTensor(Mat rgb) private void FillInputTensor(Mat rgb)
{ {
// rgb is 640x640, 3 channels, 8-bit
int height = _inputHeight; int height = _inputHeight;
int width = _inputWidth; int width = _inputWidth;
// NCHW: [1, 3, H, W]
int planeSize = height * width;
Span<float> dst = _inputBuffer.AsSpan(); Span<float> dst = _inputBuffer.AsSpan();
int dstIndex = 0;
unsafe unsafe
{ {
@ -179,21 +180,22 @@ public sealed class YoloOnnxObjectDetector : LoggingBase, IObjectDetector, IDisp
int srcIndex = 0; int srcIndex = 0;
// Layout: CHW (1,3,H,W)
// We fill in RGB order, normalized to [0,1]
// Loop structured to be SIMD-friendly; JIT can vectorize the simple arithmetic.
for (int x = 0; x < width; x++) for (int x = 0; x < width; x++)
{ {
byte r = rowSpan[srcIndex + 0]; byte r = rowSpan[srcIndex + 0];
byte g = rowSpan[srcIndex + 1]; byte g = rowSpan[srcIndex + 1];
byte b = rowSpan[srcIndex + 2]; byte b = rowSpan[srcIndex + 2];
dst[dstIndex + 0] = r * _inv255; int offset = y * width + x;
dst[dstIndex + 1] = g * _inv255;
dst[dstIndex + 2] = b * _inv255; // channel 0: R
dst[offset] = r * _inv255;
// channel 1: G
dst[planeSize + offset] = g * _inv255;
// channel 2: B
dst[2 * planeSize + offset] = b * _inv255;
srcIndex += 3; srcIndex += 3;
dstIndex += 3;
} }
} }
} }