diff --git a/KalmanTracker.cs b/KalmanTracker.cs index 0410858..07f5a79 100644 --- a/KalmanTracker.cs +++ b/KalmanTracker.cs @@ -14,6 +14,8 @@ internal sealed class KalmanTracker // Measurement noise (dynamic) private float _r = 1e-1f; + public float CurrentNoise => _r; + // Identity matrix private static readonly float[,] _i = { diff --git a/TrackingSplitter.cs b/TrackingSplitter.cs index b244b13..4b1069a 100644 --- a/TrackingSplitter.cs +++ b/TrackingSplitter.cs @@ -9,6 +9,16 @@ public class TrackingSplitter( Action drawProgress ) : LoggingBase(log, drawProgress) { + private const int LostFreezeFrames = 60; // 2 seconds at 30 FPS + private const float CameraEasing = 0.03f; + + enum TrackState + { + Tracking, + LostFreeze, + LostDrift + } + public async Task TrackAndExtract( string srcFileName, string destFileName, @@ -20,9 +30,6 @@ public class TrackingSplitter( string[] passthrough, bool debugOverlay) { - // ------------------------------ - // OpenCV VideoCapture (stable) - // ------------------------------ using var capture = new VideoCapture(srcFileName); if (!capture.IsOpened()) throw new Exception("Cannot open video"); @@ -34,59 +41,48 @@ public class TrackingSplitter( var fps = capture.Get(VideoCaptureProperties.Fps); var totalFrames = (int)(duration.TotalSeconds * fps); - if ( debugOverlay ) - { - cropHeight = videoHeight; - cropWidth = videoWidth; - } + var originalCropWidth = cropWidth; + var originalCropHeight = cropHeight; - Console.WriteLine($"[FaceTracker] skip={skip}, duration={duration}, fps={fps}, totalFrames={totalFrames}"); + Console.WriteLine($"[TrackingSplitter] skip={skip}, duration={duration}, fps={fps}, totalFrames={totalFrames}"); + + // encoder size depends on mode + var encWidth = debugOverlay ? videoWidth : originalCropWidth; + var encHeight = debugOverlay ? videoHeight : originalCropHeight; - // ------------------------------ - // FFmpeg one-pass encoder - // ------------------------------ var ffmpeg = StartFfmpegNvenc( srcFileName, destFileName, - cropWidth, - cropHeight, + encWidth, + encHeight, fps, skip, passthrough); using var stdin = ffmpeg.StandardInput.BaseStream; - // ------------------------------ - // Tracking state - // ------------------------------ - var frame = new Mat(); + var frame = new Mat(); var kalman = new KalmanTracker(); kalman.Reset(new Point2f(videoWidth / 2f, videoHeight / 2f)); - var lostFrames = 0; - var wasLost = false; - var reacquireBoostFrames = 20; - var reacquireCounter = 0; + var lostFrames = 0; + var reacquireCounter = 0; var cameraCenter = new Point2f(videoWidth / 2f, videoHeight / 2f); - var startTime = DateTime.UtcNow; + var startTime = DateTime.UtcNow; + var state = TrackState.Tracking; - // ------------------------------ - // Main loop - // ------------------------------ for (var i = 0; i < totalFrames; i++) { if (!capture.Read(frame) || frame.Empty()) break; - // Ensure continuous memory for detector Mat frameCont = frame.IsContinuous() ? frame : frame.Clone(); - Rect? objectBox = null; + Rect? objectBox = null; Point2f? objectCenter = null; - var objects = detector.DetectAll(frameCont, videoWidth, videoHeight); // list of (box, center) - + var objects = detector.DetectAll(frameCont, videoWidth, videoHeight); var primary = SelectTrackedObject(objects, kalman.LastMeasurement); if (primary.HasValue) @@ -95,60 +91,95 @@ public class TrackingSplitter( objectBox = primary.Value.box; } + bool isLost = !objectCenter.HasValue; - var isLost = !objectCenter.HasValue; - - // LOST OBJECT → drift toward center + // ------------------------------ + // LOST / REACQUIRE STATE MACHINE + // ------------------------------ if (isLost) { lostFrames++; - var fallbackCenter = new Point2f(videoWidth / 2f, videoHeight / 2f); - var predicted = kalman.Update(null); - - var t = Math.Min(1f, lostFrames / 60f); - var ease = 0.02f * t; - - objectCenter = new Point2f( - predicted.X * (1 - ease) + fallbackCenter.X * ease, - predicted.Y * (1 - ease) + fallbackCenter.Y * ease); + if (lostFrames <= LostFreezeFrames) + { + // 1) LOST_FREEZE: freeze camera + state = TrackState.LostFreeze; + objectCenter = null; // Kalman predicts but camera won't move + } + else + { + // 2) LOST_DRIFT: drift camera to center + state = TrackState.LostDrift; + objectCenter = new Point2f(videoWidth / 2f, videoHeight / 2f); + } } else { - if (wasLost) - reacquireCounter = reacquireBoostFrames; - + // Object reacquired + state = TrackState.Tracking; lostFrames = 0; } - // SMOOTH REACQUISITION - if (reacquireCounter > 0) + // ------------------------------ + // KALMAN UPDATE + // ------------------------------ + Point2f smoothedCenter; + + if (state == TrackState.Tracking) { - var alpha = reacquireCounter / (float)reacquireBoostFrames; - var noise = 5e-2f + (1e-1f - 5e-2f) * (1 - alpha); - kalman.SetMeasurementNoise(noise); - reacquireCounter--; + smoothedCenter = kalman.Update(objectCenter); + + // Normal camera easing + float easing = 0.015f; // faster tracking + cameraCenter = new Point2f( + cameraCenter.X + (smoothedCenter.X - cameraCenter.X) * easing, + cameraCenter.Y + (smoothedCenter.Y - cameraCenter.Y) * easing); } - else + else if (state == TrackState.LostFreeze) { - kalman.SetMeasurementNoise(1e-1f); + // Freeze camera — do nothing + smoothedCenter = kalman.LastMeasurement ?? new Point2f(0,0); } + else // LOST_DRIFT + { + smoothedCenter = kalman.Update(objectCenter); - wasLost = isLost; + // Drift camera slowly to center + float driftEasing = 0.01f; + var fallbackCenter = new Point2f(videoWidth / 2f, videoHeight / 2f); - var smoothedCenter = kalman.Update(objectCenter); - - var halfW = cropWidth / 2f; - var halfH = cropHeight / 2f; + cameraCenter = new Point2f( + cameraCenter.X + (fallbackCenter.X - cameraCenter.X) * driftEasing, + cameraCenter.Y + (fallbackCenter.Y - cameraCenter.Y) * driftEasing); + } + var halfW = originalCropWidth / 2f; + var halfH = originalCropHeight / 2f; smoothedCenter.X = Math.Clamp(smoothedCenter.X, halfW, videoWidth - halfW); smoothedCenter.Y = Math.Clamp(smoothedCenter.Y, halfH, videoHeight - halfH); - // CAMERA EASING - var easing = 0.003f; - cameraCenter = new Point2f( - cameraCenter.X + (smoothedCenter.X - cameraCenter.X) * easing, - cameraCenter.Y + (smoothedCenter.Y - cameraCenter.Y) * easing); + if (state == TrackState.Tracking) + { + // Normal tracking + smoothedCenter = kalman.Update(objectCenter); + + cameraCenter = new Point2f( + cameraCenter.X + (smoothedCenter.X - cameraCenter.X) * CameraEasing, + cameraCenter.Y + (smoothedCenter.Y - cameraCenter.Y) * CameraEasing); + } + else if (state == TrackState.LostFreeze) + { + // Freeze camera — do nothing + } + else if (state == TrackState.LostDrift) + { + // Drift camera slowly to center + var fallbackCenter = new Point2f(videoWidth / 2f, videoHeight / 2f); + + cameraCenter = new Point2f( + cameraCenter.X + (fallbackCenter.X - cameraCenter.X) * 0.01f, + cameraCenter.Y + (fallbackCenter.Y - cameraCenter.Y) * 0.01f); + } cameraCenter.X = Math.Clamp(cameraCenter.X, halfW, videoWidth - halfW); cameraCenter.Y = Math.Clamp(cameraCenter.Y, halfH, videoHeight - halfH); @@ -156,13 +187,14 @@ public class TrackingSplitter( var x = (int)Math.Round(cameraCenter.X - halfW); var y = (int)Math.Round(cameraCenter.Y - halfH); - x = Math.Clamp(x, 0, videoWidth - cropWidth); - y = Math.Clamp(y, 0, videoHeight - cropHeight); + x = Math.Clamp(x, 0, videoWidth - originalCropWidth); + y = Math.Clamp(y, 0, videoHeight - originalCropHeight); - var roi = new Rect(x, y, cropWidth, cropHeight); + var roi = new Rect(x, y, originalCropWidth, originalCropHeight); if (debugOverlay) { + // overlays always drawn on frameCont if (objectBox.HasValue) { var fb = objectBox.Value; @@ -177,31 +209,48 @@ public class TrackingSplitter( Cv2.Rectangle(frameCont, roi, objectCenter.HasValue ? Scalar.Yellow : Scalar.Red, 3); + + DrawText(frameCont, $"Faces: {objects.Count}", 20, 40, Scalar.White); + DrawText(frameCont, $"LostFrames: {lostFrames}", 20, 70, Scalar.White); + DrawText(frameCont, $"Reacquire: {reacquireCounter}", 20, 100, Scalar.White); + DrawText(frameCont, $"Noise: {kalman.CurrentNoise:F3}", 20, 130, Scalar.White); + DrawText(frameCont, $"Camera: {cameraCenter.X:F1},{cameraCenter.Y:F1}", 20, 160, Scalar.White); } - // Crop ROI - using var cropped = new Mat(frameCont, roi); + if (debugOverlay) + { + // DEBUG MODE: write FULL FRAME with overlays + var bgr = frameCont.IsContinuous() ? frameCont : frameCont.Clone(); - // Always clone to ensure contiguous memory - using var bgr = cropped.Clone(); + var bytes = bgr.Rows * bgr.Cols * bgr.ElemSize(); + var buffer = new byte[bytes]; + Marshal.Copy(bgr.Data, buffer, 0, bytes); + stdin.Write(buffer, 0, bytes); - // Write to FFmpeg - var bytes = bgr.Rows * bgr.Cols * bgr.ElemSize(); - var buffer = new byte[bytes]; - Marshal.Copy(bgr.Data, buffer, 0, bytes); - stdin.Write(buffer, 0, bytes); + if (!ReferenceEquals(bgr, frameCont)) + bgr.Dispose(); + } + else + { + // PRODUCTION MODE: actual crop + using var cropped = new Mat(frameCont, roi); + using var bgr = cropped.Clone(); + + var bytes = bgr.Rows * bgr.Cols * bgr.ElemSize(); + var buffer = new byte[bytes]; + Marshal.Copy(bgr.Data, buffer, 0, bytes); + stdin.Write(buffer, 0, bytes); + } - // Dispose frameCont only if it was a clone if (!ReferenceEquals(frameCont, frame)) frameCont.Dispose(); - // Progress - var elapsed = DateTime.UtcNow - startTime; - var progress = (double)i / totalFrames; - var speed = i > 0 ? i / elapsed.TotalSeconds : 0.0; + var elapsed = DateTime.UtcNow - startTime; + var progress = (double)i / totalFrames; + var speed = i > 0 ? i / elapsed.TotalSeconds : 0.0; var remainingFrames = totalFrames - i; - var etaSeconds = speed > 0 ? remainingFrames / speed : 0; - var eta = TimeSpan.FromSeconds(etaSeconds); + var etaSeconds = speed > 0 ? remainingFrames / speed : 0; + var eta = TimeSpan.FromSeconds(etaSeconds); DrawProgress(progress, eta, speed); } @@ -223,13 +272,11 @@ public class TrackingSplitter( if (!previousCenter.HasValue) { - // no previous face → pick largest return foundObjects .OrderByDescending(f => f.box.Width * f.box.Height) .First(); } - // pick the object closest to previous center return foundObjects .OrderBy(f => { @@ -249,57 +296,33 @@ public class TrackingSplitter( TimeSpan skip, string[] passthrough) { - var pass = passthrough.Length > 0 ? string.Join(" ", passthrough) : ""; + var pass = passthrough.Length > 0 ? string.Join(" ", passthrough) : ""; var skipSeconds = skip.TotalSeconds.ToString("0.###", System.Globalization.CultureInfo.InvariantCulture); - var fpsStr = fps.ToString("0.###", System.Globalization.CultureInfo.InvariantCulture); + var fpsStr = fps.ToString("0.###", System.Globalization.CultureInfo.InvariantCulture); - // One-pass pipeline: - // - rawvideo from stdin - // - audio from source MP4 (seeked) - // - NVENC video encode - // - AAC audio copy/encode - // - // This is the same structure your original OpenCV pipeline used. - // - // IMPORTANT: - // Because OpenCV reliably reads the full segment, - // FFmpeg will NOT close stdin early anymore. - // var args = - "-y " + - // VIDEO INPUT (raw BGR24 from stdin) - $"-f rawvideo -pix_fmt bgr24 -s {width}x{height} -r {fpsStr} -i - " + - - // AUDIO INPUT (seeked) - $"-ss {skipSeconds} -i \"{srcFileName}\" " + - - // MAP streams - "-map 0:v:0 -map 1:a:0? -shortest " + - - // VIDEO ENCODE - "-c:v h264_nvenc -preset p4 -b:v 8M -pix_fmt yuv420p " + - - // AUDIO ENCODE/COPY - "-c:a aac -b:a 192k " + - - // Extra passthrough flags - pass + $" \"{destFileName}\""; + "-y " + + $"-f rawvideo -pix_fmt bgr24 -s {width}x{height} -r {fpsStr} -i - " + + $"-ss {skipSeconds} -i \"{srcFileName}\" " + + "-map 0:v:0 -map 1:a:0? -shortest " + + "-c:v h264_nvenc -preset p4 -b:v 8M -pix_fmt yuv420p " + + "-c:a aac -b:a 192k " + + pass + $" \"{destFileName}\""; var psi = new ProcessStartInfo { - FileName = "ffmpeg", - Arguments = args, - RedirectStandardInput = true, - RedirectStandardError = true, + FileName = "ffmpeg", + Arguments = args, + RedirectStandardInput = true, + RedirectStandardError = true, RedirectStandardOutput = true, - UseShellExecute = false, - CreateNoWindow = true + UseShellExecute = false, + CreateNoWindow = true }; var process = new Process { StartInfo = psi }; process.Start(); - // async stderr reader _ = Task.Run(() => { try @@ -314,4 +337,10 @@ public class TrackingSplitter( return process; } + void DrawText(Mat img, string text, int x, int y, Scalar color) + { + Cv2.PutText(img, text, new Point(x, y), + HersheyFonts.HersheySimplex, 0.6, color, 2); + } + } diff --git a/YoloOnnxObjectDetector.cs b/YoloOnnxObjectDetector.cs index 7f86587..ac23ff5 100644 --- a/YoloOnnxObjectDetector.cs +++ b/YoloOnnxObjectDetector.cs @@ -81,7 +81,7 @@ public sealed class YoloOnnxObjectDetector : LoggingBase, IObjectDetector, IDisp h = Math.Clamp(h, 1, frameCont.Height - y); // Ignore detections starting in the lower 1/3 of the frame - if (y > frameCont.Height * (2f / 3f)) + if (y > frameCont.Height * (0.5f)) continue; var rect = new Rect(x, y, w, h);