Better debug overlay. Changes to camera tracking. Still not good enough.

This commit is contained in:
Alexander Shabarshov 2026-05-10 15:50:57 +01:00
parent fd1dab02a9
commit 188d7e88d9
3 changed files with 155 additions and 124 deletions

View File

@ -14,6 +14,8 @@ internal sealed class KalmanTracker
// Measurement noise (dynamic) // Measurement noise (dynamic)
private float _r = 1e-1f; private float _r = 1e-1f;
public float CurrentNoise => _r;
// Identity matrix // Identity matrix
private static readonly float[,] _i = private static readonly float[,] _i =
{ {

View File

@ -9,6 +9,16 @@ public class TrackingSplitter(
Action<double /*percent*/, TimeSpan /*duration*/, double /*fps*/> drawProgress Action<double /*percent*/, TimeSpan /*duration*/, double /*fps*/> drawProgress
) : LoggingBase(log, drawProgress) ) : LoggingBase(log, drawProgress)
{ {
private const int LostFreezeFrames = 60; // 2 seconds at 30 FPS
private const float CameraEasing = 0.03f;
enum TrackState
{
Tracking,
LostFreeze,
LostDrift
}
public async Task TrackAndExtract( public async Task TrackAndExtract(
string srcFileName, string srcFileName,
string destFileName, string destFileName,
@ -20,9 +30,6 @@ public class TrackingSplitter(
string[] passthrough, string[] passthrough,
bool debugOverlay) bool debugOverlay)
{ {
// ------------------------------
// OpenCV VideoCapture (stable)
// ------------------------------
using var capture = new VideoCapture(srcFileName); using var capture = new VideoCapture(srcFileName);
if (!capture.IsOpened()) if (!capture.IsOpened())
throw new Exception("Cannot open video"); throw new Exception("Cannot open video");
@ -34,59 +41,48 @@ public class TrackingSplitter(
var fps = capture.Get(VideoCaptureProperties.Fps); var fps = capture.Get(VideoCaptureProperties.Fps);
var totalFrames = (int)(duration.TotalSeconds * fps); var totalFrames = (int)(duration.TotalSeconds * fps);
if ( debugOverlay ) var originalCropWidth = cropWidth;
{ var originalCropHeight = cropHeight;
cropHeight = videoHeight;
cropWidth = videoWidth;
}
Console.WriteLine($"[FaceTracker] skip={skip}, duration={duration}, fps={fps}, totalFrames={totalFrames}"); Console.WriteLine($"[TrackingSplitter] skip={skip}, duration={duration}, fps={fps}, totalFrames={totalFrames}");
// encoder size depends on mode
var encWidth = debugOverlay ? videoWidth : originalCropWidth;
var encHeight = debugOverlay ? videoHeight : originalCropHeight;
// ------------------------------
// FFmpeg one-pass encoder
// ------------------------------
var ffmpeg = StartFfmpegNvenc( var ffmpeg = StartFfmpegNvenc(
srcFileName, srcFileName,
destFileName, destFileName,
cropWidth, encWidth,
cropHeight, encHeight,
fps, fps,
skip, skip,
passthrough); passthrough);
using var stdin = ffmpeg.StandardInput.BaseStream; using var stdin = ffmpeg.StandardInput.BaseStream;
// ------------------------------
// Tracking state
// ------------------------------
var frame = new Mat(); var frame = new Mat();
var kalman = new KalmanTracker(); var kalman = new KalmanTracker();
kalman.Reset(new Point2f(videoWidth / 2f, videoHeight / 2f)); kalman.Reset(new Point2f(videoWidth / 2f, videoHeight / 2f));
var lostFrames = 0; var lostFrames = 0;
var wasLost = false;
var reacquireBoostFrames = 20;
var reacquireCounter = 0; var reacquireCounter = 0;
var cameraCenter = new Point2f(videoWidth / 2f, videoHeight / 2f); var cameraCenter = new Point2f(videoWidth / 2f, videoHeight / 2f);
var startTime = DateTime.UtcNow; var startTime = DateTime.UtcNow;
var state = TrackState.Tracking;
// ------------------------------
// Main loop
// ------------------------------
for (var i = 0; i < totalFrames; i++) for (var i = 0; i < totalFrames; i++)
{ {
if (!capture.Read(frame) || frame.Empty()) if (!capture.Read(frame) || frame.Empty())
break; break;
// Ensure continuous memory for detector
Mat frameCont = frame.IsContinuous() ? frame : frame.Clone(); Mat frameCont = frame.IsContinuous() ? frame : frame.Clone();
Rect? objectBox = null; Rect? objectBox = null;
Point2f? objectCenter = null; Point2f? objectCenter = null;
var objects = detector.DetectAll(frameCont, videoWidth, videoHeight); // list of (box, center) var objects = detector.DetectAll(frameCont, videoWidth, videoHeight);
var primary = SelectTrackedObject(objects, kalman.LastMeasurement); var primary = SelectTrackedObject(objects, kalman.LastMeasurement);
if (primary.HasValue) if (primary.HasValue)
@ -95,60 +91,95 @@ public class TrackingSplitter(
objectBox = primary.Value.box; objectBox = primary.Value.box;
} }
bool isLost = !objectCenter.HasValue;
var isLost = !objectCenter.HasValue; // ------------------------------
// LOST / REACQUIRE STATE MACHINE
// LOST OBJECT → drift toward center // ------------------------------
if (isLost) if (isLost)
{ {
lostFrames++; lostFrames++;
var fallbackCenter = new Point2f(videoWidth / 2f, videoHeight / 2f); if (lostFrames <= LostFreezeFrames)
var predicted = kalman.Update(null); {
// 1) LOST_FREEZE: freeze camera
var t = Math.Min(1f, lostFrames / 60f); state = TrackState.LostFreeze;
var ease = 0.02f * t; objectCenter = null; // Kalman predicts but camera won't move
objectCenter = new Point2f(
predicted.X * (1 - ease) + fallbackCenter.X * ease,
predicted.Y * (1 - ease) + fallbackCenter.Y * ease);
} }
else else
{ {
if (wasLost) // 2) LOST_DRIFT: drift camera to center
reacquireCounter = reacquireBoostFrames; state = TrackState.LostDrift;
objectCenter = new Point2f(videoWidth / 2f, videoHeight / 2f);
}
}
else
{
// Object reacquired
state = TrackState.Tracking;
lostFrames = 0; lostFrames = 0;
} }
// SMOOTH REACQUISITION // ------------------------------
if (reacquireCounter > 0) // KALMAN UPDATE
// ------------------------------
Point2f smoothedCenter;
if (state == TrackState.Tracking)
{ {
var alpha = reacquireCounter / (float)reacquireBoostFrames; smoothedCenter = kalman.Update(objectCenter);
var noise = 5e-2f + (1e-1f - 5e-2f) * (1 - alpha);
kalman.SetMeasurementNoise(noise); // Normal camera easing
reacquireCounter--; float easing = 0.015f; // faster tracking
cameraCenter = new Point2f(
cameraCenter.X + (smoothedCenter.X - cameraCenter.X) * easing,
cameraCenter.Y + (smoothedCenter.Y - cameraCenter.Y) * easing);
} }
else else if (state == TrackState.LostFreeze)
{ {
kalman.SetMeasurementNoise(1e-1f); // Freeze camera — do nothing
smoothedCenter = kalman.LastMeasurement ?? new Point2f(0,0);
} }
else // LOST_DRIFT
{
smoothedCenter = kalman.Update(objectCenter);
wasLost = isLost; // Drift camera slowly to center
float driftEasing = 0.01f;
var fallbackCenter = new Point2f(videoWidth / 2f, videoHeight / 2f);
var smoothedCenter = kalman.Update(objectCenter); cameraCenter = new Point2f(
cameraCenter.X + (fallbackCenter.X - cameraCenter.X) * driftEasing,
var halfW = cropWidth / 2f; cameraCenter.Y + (fallbackCenter.Y - cameraCenter.Y) * driftEasing);
var halfH = cropHeight / 2f; }
var halfW = originalCropWidth / 2f;
var halfH = originalCropHeight / 2f;
smoothedCenter.X = Math.Clamp(smoothedCenter.X, halfW, videoWidth - halfW); smoothedCenter.X = Math.Clamp(smoothedCenter.X, halfW, videoWidth - halfW);
smoothedCenter.Y = Math.Clamp(smoothedCenter.Y, halfH, videoHeight - halfH); smoothedCenter.Y = Math.Clamp(smoothedCenter.Y, halfH, videoHeight - halfH);
// CAMERA EASING if (state == TrackState.Tracking)
var easing = 0.003f; {
// Normal tracking
smoothedCenter = kalman.Update(objectCenter);
cameraCenter = new Point2f( cameraCenter = new Point2f(
cameraCenter.X + (smoothedCenter.X - cameraCenter.X) * easing, cameraCenter.X + (smoothedCenter.X - cameraCenter.X) * CameraEasing,
cameraCenter.Y + (smoothedCenter.Y - cameraCenter.Y) * easing); cameraCenter.Y + (smoothedCenter.Y - cameraCenter.Y) * CameraEasing);
}
else if (state == TrackState.LostFreeze)
{
// Freeze camera — do nothing
}
else if (state == TrackState.LostDrift)
{
// Drift camera slowly to center
var fallbackCenter = new Point2f(videoWidth / 2f, videoHeight / 2f);
cameraCenter = new Point2f(
cameraCenter.X + (fallbackCenter.X - cameraCenter.X) * 0.01f,
cameraCenter.Y + (fallbackCenter.Y - cameraCenter.Y) * 0.01f);
}
cameraCenter.X = Math.Clamp(cameraCenter.X, halfW, videoWidth - halfW); cameraCenter.X = Math.Clamp(cameraCenter.X, halfW, videoWidth - halfW);
cameraCenter.Y = Math.Clamp(cameraCenter.Y, halfH, videoHeight - halfH); cameraCenter.Y = Math.Clamp(cameraCenter.Y, halfH, videoHeight - halfH);
@ -156,13 +187,14 @@ public class TrackingSplitter(
var x = (int)Math.Round(cameraCenter.X - halfW); var x = (int)Math.Round(cameraCenter.X - halfW);
var y = (int)Math.Round(cameraCenter.Y - halfH); var y = (int)Math.Round(cameraCenter.Y - halfH);
x = Math.Clamp(x, 0, videoWidth - cropWidth); x = Math.Clamp(x, 0, videoWidth - originalCropWidth);
y = Math.Clamp(y, 0, videoHeight - cropHeight); y = Math.Clamp(y, 0, videoHeight - originalCropHeight);
var roi = new Rect(x, y, cropWidth, cropHeight); var roi = new Rect(x, y, originalCropWidth, originalCropHeight);
if (debugOverlay) if (debugOverlay)
{ {
// overlays always drawn on frameCont
if (objectBox.HasValue) if (objectBox.HasValue)
{ {
var fb = objectBox.Value; var fb = objectBox.Value;
@ -177,25 +209,42 @@ public class TrackingSplitter(
Cv2.Rectangle(frameCont, roi, Cv2.Rectangle(frameCont, roi,
objectCenter.HasValue ? Scalar.Yellow : Scalar.Red, 3); objectCenter.HasValue ? Scalar.Yellow : Scalar.Red, 3);
DrawText(frameCont, $"Faces: {objects.Count}", 20, 40, Scalar.White);
DrawText(frameCont, $"LostFrames: {lostFrames}", 20, 70, Scalar.White);
DrawText(frameCont, $"Reacquire: {reacquireCounter}", 20, 100, Scalar.White);
DrawText(frameCont, $"Noise: {kalman.CurrentNoise:F3}", 20, 130, Scalar.White);
DrawText(frameCont, $"Camera: {cameraCenter.X:F1},{cameraCenter.Y:F1}", 20, 160, Scalar.White);
} }
// Crop ROI if (debugOverlay)
using var cropped = new Mat(frameCont, roi); {
// DEBUG MODE: write FULL FRAME with overlays
var bgr = frameCont.IsContinuous() ? frameCont : frameCont.Clone();
// Always clone to ensure contiguous memory
using var bgr = cropped.Clone();
// Write to FFmpeg
var bytes = bgr.Rows * bgr.Cols * bgr.ElemSize(); var bytes = bgr.Rows * bgr.Cols * bgr.ElemSize();
var buffer = new byte[bytes]; var buffer = new byte[bytes];
Marshal.Copy(bgr.Data, buffer, 0, bytes); Marshal.Copy(bgr.Data, buffer, 0, bytes);
stdin.Write(buffer, 0, bytes); stdin.Write(buffer, 0, bytes);
// Dispose frameCont only if it was a clone if (!ReferenceEquals(bgr, frameCont))
bgr.Dispose();
}
else
{
// PRODUCTION MODE: actual crop
using var cropped = new Mat(frameCont, roi);
using var bgr = cropped.Clone();
var bytes = bgr.Rows * bgr.Cols * bgr.ElemSize();
var buffer = new byte[bytes];
Marshal.Copy(bgr.Data, buffer, 0, bytes);
stdin.Write(buffer, 0, bytes);
}
if (!ReferenceEquals(frameCont, frame)) if (!ReferenceEquals(frameCont, frame))
frameCont.Dispose(); frameCont.Dispose();
// Progress
var elapsed = DateTime.UtcNow - startTime; var elapsed = DateTime.UtcNow - startTime;
var progress = (double)i / totalFrames; var progress = (double)i / totalFrames;
var speed = i > 0 ? i / elapsed.TotalSeconds : 0.0; var speed = i > 0 ? i / elapsed.TotalSeconds : 0.0;
@ -223,13 +272,11 @@ public class TrackingSplitter(
if (!previousCenter.HasValue) if (!previousCenter.HasValue)
{ {
// no previous face → pick largest
return foundObjects return foundObjects
.OrderByDescending(f => f.box.Width * f.box.Height) .OrderByDescending(f => f.box.Width * f.box.Height)
.First(); .First();
} }
// pick the object closest to previous center
return foundObjects return foundObjects
.OrderBy(f => .OrderBy(f =>
{ {
@ -253,36 +300,13 @@ public class TrackingSplitter(
var skipSeconds = skip.TotalSeconds.ToString("0.###", System.Globalization.CultureInfo.InvariantCulture); var skipSeconds = skip.TotalSeconds.ToString("0.###", System.Globalization.CultureInfo.InvariantCulture);
var fpsStr = fps.ToString("0.###", System.Globalization.CultureInfo.InvariantCulture); var fpsStr = fps.ToString("0.###", System.Globalization.CultureInfo.InvariantCulture);
// One-pass pipeline:
// - rawvideo from stdin
// - audio from source MP4 (seeked)
// - NVENC video encode
// - AAC audio copy/encode
//
// This is the same structure your original OpenCV pipeline used.
//
// IMPORTANT:
// Because OpenCV reliably reads the full segment,
// FFmpeg will NOT close stdin early anymore.
//
var args = var args =
"-y " + "-y " +
// VIDEO INPUT (raw BGR24 from stdin)
$"-f rawvideo -pix_fmt bgr24 -s {width}x{height} -r {fpsStr} -i - " + $"-f rawvideo -pix_fmt bgr24 -s {width}x{height} -r {fpsStr} -i - " +
// AUDIO INPUT (seeked)
$"-ss {skipSeconds} -i \"{srcFileName}\" " + $"-ss {skipSeconds} -i \"{srcFileName}\" " +
// MAP streams
"-map 0:v:0 -map 1:a:0? -shortest " + "-map 0:v:0 -map 1:a:0? -shortest " +
// VIDEO ENCODE
"-c:v h264_nvenc -preset p4 -b:v 8M -pix_fmt yuv420p " + "-c:v h264_nvenc -preset p4 -b:v 8M -pix_fmt yuv420p " +
// AUDIO ENCODE/COPY
"-c:a aac -b:a 192k " + "-c:a aac -b:a 192k " +
// Extra passthrough flags
pass + $" \"{destFileName}\""; pass + $" \"{destFileName}\"";
var psi = new ProcessStartInfo var psi = new ProcessStartInfo
@ -299,7 +323,6 @@ public class TrackingSplitter(
var process = new Process { StartInfo = psi }; var process = new Process { StartInfo = psi };
process.Start(); process.Start();
// async stderr reader
_ = Task.Run(() => _ = Task.Run(() =>
{ {
try try
@ -314,4 +337,10 @@ public class TrackingSplitter(
return process; return process;
} }
void DrawText(Mat img, string text, int x, int y, Scalar color)
{
Cv2.PutText(img, text, new Point(x, y),
HersheyFonts.HersheySimplex, 0.6, color, 2);
}
} }

View File

@ -81,7 +81,7 @@ public sealed class YoloOnnxObjectDetector : LoggingBase, IObjectDetector, IDisp
h = Math.Clamp(h, 1, frameCont.Height - y); h = Math.Clamp(h, 1, frameCont.Height - y);
// Ignore detections starting in the lower 1/3 of the frame // Ignore detections starting in the lower 1/3 of the frame
if (y > frameCont.Height * (2f / 3f)) if (y > frameCont.Height * (0.5f))
continue; continue;
var rect = new Rect(x, y, w, h); var rect = new Rect(x, y, w, h);