mirror of
https://github.com/unclshura/splitter.git
synced 2026-06-22 00:22:01 +00:00
Optimised version of TrackingSplitter. Fixed tensor format in YOLO detector.
This commit is contained in:
parent
2e8dc800a5
commit
385e1c63e0
@ -1,5 +1,8 @@
|
||||
using System.Diagnostics;
|
||||
using System;
|
||||
using System.Diagnostics;
|
||||
using System.Globalization;
|
||||
using System.Runtime.InteropServices;
|
||||
using System.Threading.Tasks;
|
||||
using OpenCvSharp;
|
||||
|
||||
namespace splitter;
|
||||
@ -10,9 +13,9 @@ public class TrackingSplitter(
|
||||
) : LoggingBase(log, drawProgress)
|
||||
{
|
||||
private const int LostFreezeFrames = 60; // 2 seconds at 30 FPS
|
||||
private const float CameraEasing = 0.03f;
|
||||
private const float CameraEasing = 0.03f;
|
||||
|
||||
enum TrackState
|
||||
private enum TrackState
|
||||
{
|
||||
Tracking,
|
||||
LostFreeze,
|
||||
@ -61,12 +64,19 @@ public class TrackingSplitter(
|
||||
|
||||
using var stdin = ffmpeg.StandardInput.BaseStream;
|
||||
|
||||
var frame = new Mat();
|
||||
// Reusable frame and output mat
|
||||
using var frame = new Mat();
|
||||
using var outputBgr = new Mat(encHeight, encWidth, MatType.CV_8UC3);
|
||||
|
||||
// Reusable raw video buffer
|
||||
var frameBytes = encWidth * encHeight * 3;
|
||||
var videoBuffer = new byte[frameBytes];
|
||||
|
||||
var kalman = new KalmanTracker();
|
||||
kalman.Reset(new Point2f(videoWidth / 2f, videoHeight / 2f));
|
||||
|
||||
var lostFrames = 0;
|
||||
var reacquireCounter = 0;
|
||||
var lostFrames = 0;
|
||||
var reacquireCounter = 0; // kept for overlay display
|
||||
|
||||
var cameraCenter = new Point2f(videoWidth / 2f, videoHeight / 2f);
|
||||
var startTime = DateTime.UtcNow;
|
||||
@ -77,12 +87,10 @@ public class TrackingSplitter(
|
||||
if (!capture.Read(frame) || frame.Empty())
|
||||
break;
|
||||
|
||||
Mat frameCont = frame.IsContinuous() ? frame : frame.Clone();
|
||||
|
||||
Rect? objectBox = null;
|
||||
Point2f? objectCenter = null;
|
||||
|
||||
var objects = detector.DetectAll(frameCont, videoWidth, videoHeight);
|
||||
var objects = detector.DetectAll(frame, videoWidth, videoHeight);
|
||||
var primary = SelectTrackedObject(objects, kalman.LastMeasurement);
|
||||
|
||||
if (primary.HasValue)
|
||||
@ -93,22 +101,20 @@ public class TrackingSplitter(
|
||||
|
||||
bool isLost = !objectCenter.HasValue;
|
||||
|
||||
// ------------------------------
|
||||
// LOST / REACQUIRE STATE MACHINE
|
||||
// ------------------------------
|
||||
if (isLost)
|
||||
{
|
||||
lostFrames++;
|
||||
|
||||
if (lostFrames <= LostFreezeFrames)
|
||||
{
|
||||
// 1) LOST_FREEZE: freeze camera
|
||||
// LOST_FREEZE: freeze camera
|
||||
state = TrackState.LostFreeze;
|
||||
objectCenter = null; // Kalman predicts but camera won't move
|
||||
}
|
||||
else
|
||||
{
|
||||
// 2) LOST_DRIFT: drift camera to center
|
||||
// LOST_DRIFT: drift camera to center
|
||||
state = TrackState.LostDrift;
|
||||
objectCenter = new Point2f(videoWidth / 2f, videoHeight / 2f);
|
||||
}
|
||||
@ -120,16 +126,13 @@ public class TrackingSplitter(
|
||||
lostFrames = 0;
|
||||
}
|
||||
|
||||
// ------------------------------
|
||||
// KALMAN UPDATE
|
||||
// ------------------------------
|
||||
// KALMAN + CAMERA UPDATE
|
||||
Point2f smoothedCenter;
|
||||
|
||||
if (state == TrackState.Tracking)
|
||||
{
|
||||
smoothedCenter = kalman.Update(objectCenter);
|
||||
|
||||
// Normal camera easing
|
||||
float easing = 0.015f; // faster tracking
|
||||
cameraCenter = new Point2f(
|
||||
cameraCenter.X + (smoothedCenter.X - cameraCenter.X) * easing,
|
||||
@ -138,13 +141,12 @@ public class TrackingSplitter(
|
||||
else if (state == TrackState.LostFreeze)
|
||||
{
|
||||
// Freeze camera — do nothing
|
||||
smoothedCenter = kalman.LastMeasurement ?? new Point2f(0,0);
|
||||
smoothedCenter = kalman.LastMeasurement ?? new Point2f(0, 0);
|
||||
}
|
||||
else // LOST_DRIFT
|
||||
{
|
||||
smoothedCenter = kalman.Update(objectCenter);
|
||||
|
||||
// Drift camera slowly to center
|
||||
float driftEasing = 0.01f;
|
||||
var fallbackCenter = new Point2f(videoWidth / 2f, videoHeight / 2f);
|
||||
|
||||
@ -152,6 +154,7 @@ public class TrackingSplitter(
|
||||
cameraCenter.X + (fallbackCenter.X - cameraCenter.X) * driftEasing,
|
||||
cameraCenter.Y + (fallbackCenter.Y - cameraCenter.Y) * driftEasing);
|
||||
}
|
||||
|
||||
var halfW = originalCropWidth / 2f;
|
||||
var halfH = originalCropHeight / 2f;
|
||||
|
||||
@ -160,7 +163,6 @@ public class TrackingSplitter(
|
||||
|
||||
if (state == TrackState.Tracking)
|
||||
{
|
||||
// Normal tracking
|
||||
smoothedCenter = kalman.Update(objectCenter);
|
||||
|
||||
cameraCenter = new Point2f(
|
||||
@ -173,7 +175,6 @@ public class TrackingSplitter(
|
||||
}
|
||||
else if (state == TrackState.LostDrift)
|
||||
{
|
||||
// Drift camera slowly to center
|
||||
var fallbackCenter = new Point2f(videoWidth / 2f, videoHeight / 2f);
|
||||
|
||||
cameraCenter = new Point2f(
|
||||
@ -194,57 +195,48 @@ public class TrackingSplitter(
|
||||
|
||||
if (debugOverlay)
|
||||
{
|
||||
// overlays always drawn on frameCont
|
||||
// overlays always drawn on frame
|
||||
if (objectBox.HasValue)
|
||||
{
|
||||
var fb = objectBox.Value;
|
||||
Cv2.Rectangle(frameCont,
|
||||
Cv2.Rectangle(frame,
|
||||
new Rect(fb.X, fb.Y, fb.Width, fb.Height),
|
||||
Scalar.LimeGreen, 2);
|
||||
}
|
||||
|
||||
Cv2.Circle(frameCont,
|
||||
Cv2.Circle(frame,
|
||||
new Point((int)smoothedCenter.X, (int)smoothedCenter.Y),
|
||||
6, Scalar.LimeGreen, -1);
|
||||
|
||||
Cv2.Rectangle(frameCont, roi,
|
||||
Cv2.Rectangle(frame, roi,
|
||||
objectCenter.HasValue ? Scalar.Yellow : Scalar.Red, 3);
|
||||
|
||||
DrawText(frameCont, $"Faces: {objects.Count}", 20, 40, Scalar.White);
|
||||
DrawText(frameCont, $"LostFrames: {lostFrames}", 20, 70, Scalar.White);
|
||||
DrawText(frameCont, $"Reacquire: {reacquireCounter}", 20, 100, Scalar.White);
|
||||
DrawText(frameCont, $"Noise: {kalman.CurrentNoise:F3}", 20, 130, Scalar.White);
|
||||
DrawText(frameCont, $"Camera: {cameraCenter.X:F1},{cameraCenter.Y:F1}", 20, 160, Scalar.White);
|
||||
DrawText(frame, $"Faces: {objects.Count}", 20, 40, Scalar.White);
|
||||
DrawText(frame, $"LostFrames: {lostFrames}", 20, 70, Scalar.White);
|
||||
DrawText(frame, $"Reacquire: {reacquireCounter}", 20, 100, Scalar.White);
|
||||
DrawText(frame, $"Noise: {kalman.CurrentNoise:F3}", 20, 130, Scalar.White);
|
||||
DrawText(frame, $"Camera: {cameraCenter.X:F1},{cameraCenter.Y:F1}", 20, 160, Scalar.White);
|
||||
}
|
||||
|
||||
if (debugOverlay)
|
||||
{
|
||||
// DEBUG MODE: write FULL FRAME with overlays
|
||||
var bgr = frameCont.IsContinuous() ? frameCont : frameCont.Clone();
|
||||
// Ensure contiguous buffer by copying into preallocated outputBgr
|
||||
frame.CopyTo(outputBgr);
|
||||
|
||||
var bytes = bgr.Rows * bgr.Cols * bgr.ElemSize();
|
||||
var buffer = new byte[bytes];
|
||||
Marshal.Copy(bgr.Data, buffer, 0, bytes);
|
||||
stdin.Write(buffer, 0, bytes);
|
||||
|
||||
if (!ReferenceEquals(bgr, frameCont))
|
||||
bgr.Dispose();
|
||||
Marshal.Copy(outputBgr.Data, videoBuffer, 0, frameBytes);
|
||||
stdin.Write(videoBuffer, 0, frameBytes);
|
||||
}
|
||||
else
|
||||
{
|
||||
// PRODUCTION MODE: actual crop
|
||||
using var cropped = new Mat(frameCont, roi);
|
||||
using var bgr = cropped.Clone();
|
||||
using var cropped = new Mat(frame, roi);
|
||||
cropped.CopyTo(outputBgr);
|
||||
|
||||
var bytes = bgr.Rows * bgr.Cols * bgr.ElemSize();
|
||||
var buffer = new byte[bytes];
|
||||
Marshal.Copy(bgr.Data, buffer, 0, bytes);
|
||||
stdin.Write(buffer, 0, bytes);
|
||||
Marshal.Copy(outputBgr.Data, videoBuffer, 0, frameBytes);
|
||||
stdin.Write(videoBuffer, 0, frameBytes);
|
||||
}
|
||||
|
||||
if (!ReferenceEquals(frameCont, frame))
|
||||
frameCont.Dispose();
|
||||
|
||||
var elapsed = DateTime.UtcNow - startTime;
|
||||
var progress = (double)i / totalFrames;
|
||||
var speed = i > 0 ? i / elapsed.TotalSeconds : 0.0;
|
||||
@ -272,19 +264,46 @@ public class TrackingSplitter(
|
||||
|
||||
if (!previousCenter.HasValue)
|
||||
{
|
||||
return foundObjects
|
||||
.OrderByDescending(f => f.box.Width * f.box.Height)
|
||||
.First();
|
||||
}
|
||||
// Largest area
|
||||
var bestIndex = 0;
|
||||
var bestArea = float.MinValue;
|
||||
|
||||
return foundObjects
|
||||
.OrderBy(f =>
|
||||
for (int i = 0; i < foundObjects.Count; i++)
|
||||
{
|
||||
var dx = f.center.X - previousCenter.Value.X;
|
||||
var dy = f.center.Y - previousCenter.Value.Y;
|
||||
return dx * dx + dy * dy;
|
||||
})
|
||||
.First();
|
||||
var f = foundObjects[i];
|
||||
var area = f.box.Width * f.box.Height;
|
||||
if (area > bestArea)
|
||||
{
|
||||
bestArea = area;
|
||||
bestIndex = i;
|
||||
}
|
||||
}
|
||||
|
||||
return foundObjects[bestIndex];
|
||||
}
|
||||
else
|
||||
{
|
||||
// Closest to previous center
|
||||
var prev = previousCenter.Value;
|
||||
var bestIndex = 0;
|
||||
var bestDist2 = float.MaxValue;
|
||||
|
||||
for (int i = 0; i < foundObjects.Count; i++)
|
||||
{
|
||||
var f = foundObjects[i];
|
||||
var dx = f.center.X - prev.X;
|
||||
var dy = f.center.Y - prev.Y;
|
||||
var d2 = dx * dx + dy * dy;
|
||||
|
||||
if (d2 < bestDist2)
|
||||
{
|
||||
bestDist2 = d2;
|
||||
bestIndex = i;
|
||||
}
|
||||
}
|
||||
|
||||
return foundObjects[bestIndex];
|
||||
}
|
||||
}
|
||||
|
||||
private Process StartFfmpegNvenc(
|
||||
@ -297,8 +316,8 @@ public class TrackingSplitter(
|
||||
string[] passthrough)
|
||||
{
|
||||
var pass = passthrough.Length > 0 ? string.Join(" ", passthrough) : "";
|
||||
var skipSeconds = skip.TotalSeconds.ToString("0.###", System.Globalization.CultureInfo.InvariantCulture);
|
||||
var fpsStr = fps.ToString("0.###", System.Globalization.CultureInfo.InvariantCulture);
|
||||
var skipSeconds = skip.TotalSeconds.ToString("0.###", CultureInfo.InvariantCulture);
|
||||
var fpsStr = fps.ToString("0.###", CultureInfo.InvariantCulture);
|
||||
|
||||
var args =
|
||||
"-y " +
|
||||
@ -337,10 +356,9 @@ public class TrackingSplitter(
|
||||
return process;
|
||||
}
|
||||
|
||||
void DrawText(Mat img, string text, int x, int y, Scalar color)
|
||||
private static void DrawText(Mat img, string text, int x, int y, Scalar color)
|
||||
{
|
||||
Cv2.PutText(img, text, new Point(x, y),
|
||||
HersheyFonts.HersheySimplex, 0.6, color, 2);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -163,12 +163,13 @@ public sealed class YoloOnnxObjectDetector : LoggingBase, IObjectDetector, IDisp
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
private void FillInputTensor(Mat rgb)
|
||||
{
|
||||
// rgb is 640x640, 3 channels, 8-bit
|
||||
int height = _inputHeight;
|
||||
int width = _inputWidth;
|
||||
|
||||
// NCHW: [1, 3, H, W]
|
||||
int planeSize = height * width;
|
||||
|
||||
Span<float> dst = _inputBuffer.AsSpan();
|
||||
int dstIndex = 0;
|
||||
|
||||
unsafe
|
||||
{
|
||||
@ -179,21 +180,22 @@ public sealed class YoloOnnxObjectDetector : LoggingBase, IObjectDetector, IDisp
|
||||
|
||||
int srcIndex = 0;
|
||||
|
||||
// Layout: CHW (1,3,H,W)
|
||||
// We fill in RGB order, normalized to [0,1]
|
||||
// Loop structured to be SIMD-friendly; JIT can vectorize the simple arithmetic.
|
||||
for (int x = 0; x < width; x++)
|
||||
{
|
||||
byte r = rowSpan[srcIndex + 0];
|
||||
byte g = rowSpan[srcIndex + 1];
|
||||
byte b = rowSpan[srcIndex + 2];
|
||||
|
||||
dst[dstIndex + 0] = r * _inv255;
|
||||
dst[dstIndex + 1] = g * _inv255;
|
||||
dst[dstIndex + 2] = b * _inv255;
|
||||
int offset = y * width + x;
|
||||
|
||||
// channel 0: R
|
||||
dst[offset] = r * _inv255;
|
||||
// channel 1: G
|
||||
dst[planeSize + offset] = g * _inv255;
|
||||
// channel 2: B
|
||||
dst[2 * planeSize + offset] = b * _inv255;
|
||||
|
||||
srcIndex += 3;
|
||||
dstIndex += 3;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user