mirror of
https://github.com/unclshura/splitter.git
synced 2026-06-22 00:22:01 +00:00
Optimised version of TrackingSplitter. Fixed tensor format in YOLO detector.
This commit is contained in:
parent
2e8dc800a5
commit
385e1c63e0
@ -1,5 +1,8 @@
|
|||||||
using System.Diagnostics;
|
using System;
|
||||||
|
using System.Diagnostics;
|
||||||
|
using System.Globalization;
|
||||||
using System.Runtime.InteropServices;
|
using System.Runtime.InteropServices;
|
||||||
|
using System.Threading.Tasks;
|
||||||
using OpenCvSharp;
|
using OpenCvSharp;
|
||||||
|
|
||||||
namespace splitter;
|
namespace splitter;
|
||||||
@ -12,7 +15,7 @@ public class TrackingSplitter(
|
|||||||
private const int LostFreezeFrames = 60; // 2 seconds at 30 FPS
|
private const int LostFreezeFrames = 60; // 2 seconds at 30 FPS
|
||||||
private const float CameraEasing = 0.03f;
|
private const float CameraEasing = 0.03f;
|
||||||
|
|
||||||
enum TrackState
|
private enum TrackState
|
||||||
{
|
{
|
||||||
Tracking,
|
Tracking,
|
||||||
LostFreeze,
|
LostFreeze,
|
||||||
@ -61,12 +64,19 @@ public class TrackingSplitter(
|
|||||||
|
|
||||||
using var stdin = ffmpeg.StandardInput.BaseStream;
|
using var stdin = ffmpeg.StandardInput.BaseStream;
|
||||||
|
|
||||||
var frame = new Mat();
|
// Reusable frame and output mat
|
||||||
|
using var frame = new Mat();
|
||||||
|
using var outputBgr = new Mat(encHeight, encWidth, MatType.CV_8UC3);
|
||||||
|
|
||||||
|
// Reusable raw video buffer
|
||||||
|
var frameBytes = encWidth * encHeight * 3;
|
||||||
|
var videoBuffer = new byte[frameBytes];
|
||||||
|
|
||||||
var kalman = new KalmanTracker();
|
var kalman = new KalmanTracker();
|
||||||
kalman.Reset(new Point2f(videoWidth / 2f, videoHeight / 2f));
|
kalman.Reset(new Point2f(videoWidth / 2f, videoHeight / 2f));
|
||||||
|
|
||||||
var lostFrames = 0;
|
var lostFrames = 0;
|
||||||
var reacquireCounter = 0;
|
var reacquireCounter = 0; // kept for overlay display
|
||||||
|
|
||||||
var cameraCenter = new Point2f(videoWidth / 2f, videoHeight / 2f);
|
var cameraCenter = new Point2f(videoWidth / 2f, videoHeight / 2f);
|
||||||
var startTime = DateTime.UtcNow;
|
var startTime = DateTime.UtcNow;
|
||||||
@ -77,12 +87,10 @@ public class TrackingSplitter(
|
|||||||
if (!capture.Read(frame) || frame.Empty())
|
if (!capture.Read(frame) || frame.Empty())
|
||||||
break;
|
break;
|
||||||
|
|
||||||
Mat frameCont = frame.IsContinuous() ? frame : frame.Clone();
|
|
||||||
|
|
||||||
Rect? objectBox = null;
|
Rect? objectBox = null;
|
||||||
Point2f? objectCenter = null;
|
Point2f? objectCenter = null;
|
||||||
|
|
||||||
var objects = detector.DetectAll(frameCont, videoWidth, videoHeight);
|
var objects = detector.DetectAll(frame, videoWidth, videoHeight);
|
||||||
var primary = SelectTrackedObject(objects, kalman.LastMeasurement);
|
var primary = SelectTrackedObject(objects, kalman.LastMeasurement);
|
||||||
|
|
||||||
if (primary.HasValue)
|
if (primary.HasValue)
|
||||||
@ -93,22 +101,20 @@ public class TrackingSplitter(
|
|||||||
|
|
||||||
bool isLost = !objectCenter.HasValue;
|
bool isLost = !objectCenter.HasValue;
|
||||||
|
|
||||||
// ------------------------------
|
|
||||||
// LOST / REACQUIRE STATE MACHINE
|
// LOST / REACQUIRE STATE MACHINE
|
||||||
// ------------------------------
|
|
||||||
if (isLost)
|
if (isLost)
|
||||||
{
|
{
|
||||||
lostFrames++;
|
lostFrames++;
|
||||||
|
|
||||||
if (lostFrames <= LostFreezeFrames)
|
if (lostFrames <= LostFreezeFrames)
|
||||||
{
|
{
|
||||||
// 1) LOST_FREEZE: freeze camera
|
// LOST_FREEZE: freeze camera
|
||||||
state = TrackState.LostFreeze;
|
state = TrackState.LostFreeze;
|
||||||
objectCenter = null; // Kalman predicts but camera won't move
|
objectCenter = null; // Kalman predicts but camera won't move
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// 2) LOST_DRIFT: drift camera to center
|
// LOST_DRIFT: drift camera to center
|
||||||
state = TrackState.LostDrift;
|
state = TrackState.LostDrift;
|
||||||
objectCenter = new Point2f(videoWidth / 2f, videoHeight / 2f);
|
objectCenter = new Point2f(videoWidth / 2f, videoHeight / 2f);
|
||||||
}
|
}
|
||||||
@ -120,16 +126,13 @@ public class TrackingSplitter(
|
|||||||
lostFrames = 0;
|
lostFrames = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// ------------------------------
|
// KALMAN + CAMERA UPDATE
|
||||||
// KALMAN UPDATE
|
|
||||||
// ------------------------------
|
|
||||||
Point2f smoothedCenter;
|
Point2f smoothedCenter;
|
||||||
|
|
||||||
if (state == TrackState.Tracking)
|
if (state == TrackState.Tracking)
|
||||||
{
|
{
|
||||||
smoothedCenter = kalman.Update(objectCenter);
|
smoothedCenter = kalman.Update(objectCenter);
|
||||||
|
|
||||||
// Normal camera easing
|
|
||||||
float easing = 0.015f; // faster tracking
|
float easing = 0.015f; // faster tracking
|
||||||
cameraCenter = new Point2f(
|
cameraCenter = new Point2f(
|
||||||
cameraCenter.X + (smoothedCenter.X - cameraCenter.X) * easing,
|
cameraCenter.X + (smoothedCenter.X - cameraCenter.X) * easing,
|
||||||
@ -144,7 +147,6 @@ public class TrackingSplitter(
|
|||||||
{
|
{
|
||||||
smoothedCenter = kalman.Update(objectCenter);
|
smoothedCenter = kalman.Update(objectCenter);
|
||||||
|
|
||||||
// Drift camera slowly to center
|
|
||||||
float driftEasing = 0.01f;
|
float driftEasing = 0.01f;
|
||||||
var fallbackCenter = new Point2f(videoWidth / 2f, videoHeight / 2f);
|
var fallbackCenter = new Point2f(videoWidth / 2f, videoHeight / 2f);
|
||||||
|
|
||||||
@ -152,6 +154,7 @@ public class TrackingSplitter(
|
|||||||
cameraCenter.X + (fallbackCenter.X - cameraCenter.X) * driftEasing,
|
cameraCenter.X + (fallbackCenter.X - cameraCenter.X) * driftEasing,
|
||||||
cameraCenter.Y + (fallbackCenter.Y - cameraCenter.Y) * driftEasing);
|
cameraCenter.Y + (fallbackCenter.Y - cameraCenter.Y) * driftEasing);
|
||||||
}
|
}
|
||||||
|
|
||||||
var halfW = originalCropWidth / 2f;
|
var halfW = originalCropWidth / 2f;
|
||||||
var halfH = originalCropHeight / 2f;
|
var halfH = originalCropHeight / 2f;
|
||||||
|
|
||||||
@ -160,7 +163,6 @@ public class TrackingSplitter(
|
|||||||
|
|
||||||
if (state == TrackState.Tracking)
|
if (state == TrackState.Tracking)
|
||||||
{
|
{
|
||||||
// Normal tracking
|
|
||||||
smoothedCenter = kalman.Update(objectCenter);
|
smoothedCenter = kalman.Update(objectCenter);
|
||||||
|
|
||||||
cameraCenter = new Point2f(
|
cameraCenter = new Point2f(
|
||||||
@ -173,7 +175,6 @@ public class TrackingSplitter(
|
|||||||
}
|
}
|
||||||
else if (state == TrackState.LostDrift)
|
else if (state == TrackState.LostDrift)
|
||||||
{
|
{
|
||||||
// Drift camera slowly to center
|
|
||||||
var fallbackCenter = new Point2f(videoWidth / 2f, videoHeight / 2f);
|
var fallbackCenter = new Point2f(videoWidth / 2f, videoHeight / 2f);
|
||||||
|
|
||||||
cameraCenter = new Point2f(
|
cameraCenter = new Point2f(
|
||||||
@ -194,57 +195,48 @@ public class TrackingSplitter(
|
|||||||
|
|
||||||
if (debugOverlay)
|
if (debugOverlay)
|
||||||
{
|
{
|
||||||
// overlays always drawn on frameCont
|
// overlays always drawn on frame
|
||||||
if (objectBox.HasValue)
|
if (objectBox.HasValue)
|
||||||
{
|
{
|
||||||
var fb = objectBox.Value;
|
var fb = objectBox.Value;
|
||||||
Cv2.Rectangle(frameCont,
|
Cv2.Rectangle(frame,
|
||||||
new Rect(fb.X, fb.Y, fb.Width, fb.Height),
|
new Rect(fb.X, fb.Y, fb.Width, fb.Height),
|
||||||
Scalar.LimeGreen, 2);
|
Scalar.LimeGreen, 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
Cv2.Circle(frameCont,
|
Cv2.Circle(frame,
|
||||||
new Point((int)smoothedCenter.X, (int)smoothedCenter.Y),
|
new Point((int)smoothedCenter.X, (int)smoothedCenter.Y),
|
||||||
6, Scalar.LimeGreen, -1);
|
6, Scalar.LimeGreen, -1);
|
||||||
|
|
||||||
Cv2.Rectangle(frameCont, roi,
|
Cv2.Rectangle(frame, roi,
|
||||||
objectCenter.HasValue ? Scalar.Yellow : Scalar.Red, 3);
|
objectCenter.HasValue ? Scalar.Yellow : Scalar.Red, 3);
|
||||||
|
|
||||||
DrawText(frameCont, $"Faces: {objects.Count}", 20, 40, Scalar.White);
|
DrawText(frame, $"Faces: {objects.Count}", 20, 40, Scalar.White);
|
||||||
DrawText(frameCont, $"LostFrames: {lostFrames}", 20, 70, Scalar.White);
|
DrawText(frame, $"LostFrames: {lostFrames}", 20, 70, Scalar.White);
|
||||||
DrawText(frameCont, $"Reacquire: {reacquireCounter}", 20, 100, Scalar.White);
|
DrawText(frame, $"Reacquire: {reacquireCounter}", 20, 100, Scalar.White);
|
||||||
DrawText(frameCont, $"Noise: {kalman.CurrentNoise:F3}", 20, 130, Scalar.White);
|
DrawText(frame, $"Noise: {kalman.CurrentNoise:F3}", 20, 130, Scalar.White);
|
||||||
DrawText(frameCont, $"Camera: {cameraCenter.X:F1},{cameraCenter.Y:F1}", 20, 160, Scalar.White);
|
DrawText(frame, $"Camera: {cameraCenter.X:F1},{cameraCenter.Y:F1}", 20, 160, Scalar.White);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (debugOverlay)
|
if (debugOverlay)
|
||||||
{
|
{
|
||||||
// DEBUG MODE: write FULL FRAME with overlays
|
// DEBUG MODE: write FULL FRAME with overlays
|
||||||
var bgr = frameCont.IsContinuous() ? frameCont : frameCont.Clone();
|
// Ensure contiguous buffer by copying into preallocated outputBgr
|
||||||
|
frame.CopyTo(outputBgr);
|
||||||
|
|
||||||
var bytes = bgr.Rows * bgr.Cols * bgr.ElemSize();
|
Marshal.Copy(outputBgr.Data, videoBuffer, 0, frameBytes);
|
||||||
var buffer = new byte[bytes];
|
stdin.Write(videoBuffer, 0, frameBytes);
|
||||||
Marshal.Copy(bgr.Data, buffer, 0, bytes);
|
|
||||||
stdin.Write(buffer, 0, bytes);
|
|
||||||
|
|
||||||
if (!ReferenceEquals(bgr, frameCont))
|
|
||||||
bgr.Dispose();
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// PRODUCTION MODE: actual crop
|
// PRODUCTION MODE: actual crop
|
||||||
using var cropped = new Mat(frameCont, roi);
|
using var cropped = new Mat(frame, roi);
|
||||||
using var bgr = cropped.Clone();
|
cropped.CopyTo(outputBgr);
|
||||||
|
|
||||||
var bytes = bgr.Rows * bgr.Cols * bgr.ElemSize();
|
Marshal.Copy(outputBgr.Data, videoBuffer, 0, frameBytes);
|
||||||
var buffer = new byte[bytes];
|
stdin.Write(videoBuffer, 0, frameBytes);
|
||||||
Marshal.Copy(bgr.Data, buffer, 0, bytes);
|
|
||||||
stdin.Write(buffer, 0, bytes);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!ReferenceEquals(frameCont, frame))
|
|
||||||
frameCont.Dispose();
|
|
||||||
|
|
||||||
var elapsed = DateTime.UtcNow - startTime;
|
var elapsed = DateTime.UtcNow - startTime;
|
||||||
var progress = (double)i / totalFrames;
|
var progress = (double)i / totalFrames;
|
||||||
var speed = i > 0 ? i / elapsed.TotalSeconds : 0.0;
|
var speed = i > 0 ? i / elapsed.TotalSeconds : 0.0;
|
||||||
@ -272,19 +264,46 @@ public class TrackingSplitter(
|
|||||||
|
|
||||||
if (!previousCenter.HasValue)
|
if (!previousCenter.HasValue)
|
||||||
{
|
{
|
||||||
return foundObjects
|
// Largest area
|
||||||
.OrderByDescending(f => f.box.Width * f.box.Height)
|
var bestIndex = 0;
|
||||||
.First();
|
var bestArea = float.MinValue;
|
||||||
|
|
||||||
|
for (int i = 0; i < foundObjects.Count; i++)
|
||||||
|
{
|
||||||
|
var f = foundObjects[i];
|
||||||
|
var area = f.box.Width * f.box.Height;
|
||||||
|
if (area > bestArea)
|
||||||
|
{
|
||||||
|
bestArea = area;
|
||||||
|
bestIndex = i;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return foundObjects
|
return foundObjects[bestIndex];
|
||||||
.OrderBy(f =>
|
}
|
||||||
|
else
|
||||||
{
|
{
|
||||||
var dx = f.center.X - previousCenter.Value.X;
|
// Closest to previous center
|
||||||
var dy = f.center.Y - previousCenter.Value.Y;
|
var prev = previousCenter.Value;
|
||||||
return dx * dx + dy * dy;
|
var bestIndex = 0;
|
||||||
})
|
var bestDist2 = float.MaxValue;
|
||||||
.First();
|
|
||||||
|
for (int i = 0; i < foundObjects.Count; i++)
|
||||||
|
{
|
||||||
|
var f = foundObjects[i];
|
||||||
|
var dx = f.center.X - prev.X;
|
||||||
|
var dy = f.center.Y - prev.Y;
|
||||||
|
var d2 = dx * dx + dy * dy;
|
||||||
|
|
||||||
|
if (d2 < bestDist2)
|
||||||
|
{
|
||||||
|
bestDist2 = d2;
|
||||||
|
bestIndex = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return foundObjects[bestIndex];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private Process StartFfmpegNvenc(
|
private Process StartFfmpegNvenc(
|
||||||
@ -297,8 +316,8 @@ public class TrackingSplitter(
|
|||||||
string[] passthrough)
|
string[] passthrough)
|
||||||
{
|
{
|
||||||
var pass = passthrough.Length > 0 ? string.Join(" ", passthrough) : "";
|
var pass = passthrough.Length > 0 ? string.Join(" ", passthrough) : "";
|
||||||
var skipSeconds = skip.TotalSeconds.ToString("0.###", System.Globalization.CultureInfo.InvariantCulture);
|
var skipSeconds = skip.TotalSeconds.ToString("0.###", CultureInfo.InvariantCulture);
|
||||||
var fpsStr = fps.ToString("0.###", System.Globalization.CultureInfo.InvariantCulture);
|
var fpsStr = fps.ToString("0.###", CultureInfo.InvariantCulture);
|
||||||
|
|
||||||
var args =
|
var args =
|
||||||
"-y " +
|
"-y " +
|
||||||
@ -337,10 +356,9 @@ public class TrackingSplitter(
|
|||||||
return process;
|
return process;
|
||||||
}
|
}
|
||||||
|
|
||||||
void DrawText(Mat img, string text, int x, int y, Scalar color)
|
private static void DrawText(Mat img, string text, int x, int y, Scalar color)
|
||||||
{
|
{
|
||||||
Cv2.PutText(img, text, new Point(x, y),
|
Cv2.PutText(img, text, new Point(x, y),
|
||||||
HersheyFonts.HersheySimplex, 0.6, color, 2);
|
HersheyFonts.HersheySimplex, 0.6, color, 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -163,12 +163,13 @@ public sealed class YoloOnnxObjectDetector : LoggingBase, IObjectDetector, IDisp
|
|||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
private void FillInputTensor(Mat rgb)
|
private void FillInputTensor(Mat rgb)
|
||||||
{
|
{
|
||||||
// rgb is 640x640, 3 channels, 8-bit
|
|
||||||
int height = _inputHeight;
|
int height = _inputHeight;
|
||||||
int width = _inputWidth;
|
int width = _inputWidth;
|
||||||
|
|
||||||
|
// NCHW: [1, 3, H, W]
|
||||||
|
int planeSize = height * width;
|
||||||
|
|
||||||
Span<float> dst = _inputBuffer.AsSpan();
|
Span<float> dst = _inputBuffer.AsSpan();
|
||||||
int dstIndex = 0;
|
|
||||||
|
|
||||||
unsafe
|
unsafe
|
||||||
{
|
{
|
||||||
@ -179,21 +180,22 @@ public sealed class YoloOnnxObjectDetector : LoggingBase, IObjectDetector, IDisp
|
|||||||
|
|
||||||
int srcIndex = 0;
|
int srcIndex = 0;
|
||||||
|
|
||||||
// Layout: CHW (1,3,H,W)
|
|
||||||
// We fill in RGB order, normalized to [0,1]
|
|
||||||
// Loop structured to be SIMD-friendly; JIT can vectorize the simple arithmetic.
|
|
||||||
for (int x = 0; x < width; x++)
|
for (int x = 0; x < width; x++)
|
||||||
{
|
{
|
||||||
byte r = rowSpan[srcIndex + 0];
|
byte r = rowSpan[srcIndex + 0];
|
||||||
byte g = rowSpan[srcIndex + 1];
|
byte g = rowSpan[srcIndex + 1];
|
||||||
byte b = rowSpan[srcIndex + 2];
|
byte b = rowSpan[srcIndex + 2];
|
||||||
|
|
||||||
dst[dstIndex + 0] = r * _inv255;
|
int offset = y * width + x;
|
||||||
dst[dstIndex + 1] = g * _inv255;
|
|
||||||
dst[dstIndex + 2] = b * _inv255;
|
// channel 0: R
|
||||||
|
dst[offset] = r * _inv255;
|
||||||
|
// channel 1: G
|
||||||
|
dst[planeSize + offset] = g * _inv255;
|
||||||
|
// channel 2: B
|
||||||
|
dst[2 * planeSize + offset] = b * _inv255;
|
||||||
|
|
||||||
srcIndex += 3;
|
srcIndex += 3;
|
||||||
dstIndex += 3;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user