mirror of
https://github.com/unclshura/splitter.git
synced 2026-06-22 00:22:01 +00:00
Better debug overlay. Changes to camera tracking. Still not good enough.
This commit is contained in:
parent
fd1dab02a9
commit
188d7e88d9
@ -14,6 +14,8 @@ internal sealed class KalmanTracker
|
||||
// Measurement noise (dynamic)
|
||||
private float _r = 1e-1f;
|
||||
|
||||
public float CurrentNoise => _r;
|
||||
|
||||
// Identity matrix
|
||||
private static readonly float[,] _i =
|
||||
{
|
||||
|
||||
@ -9,6 +9,16 @@ public class TrackingSplitter(
|
||||
Action<double /*percent*/, TimeSpan /*duration*/, double /*fps*/> drawProgress
|
||||
) : LoggingBase(log, drawProgress)
|
||||
{
|
||||
private const int LostFreezeFrames = 60; // 2 seconds at 30 FPS
|
||||
private const float CameraEasing = 0.03f;
|
||||
|
||||
enum TrackState
|
||||
{
|
||||
Tracking,
|
||||
LostFreeze,
|
||||
LostDrift
|
||||
}
|
||||
|
||||
public async Task TrackAndExtract(
|
||||
string srcFileName,
|
||||
string destFileName,
|
||||
@ -20,9 +30,6 @@ public class TrackingSplitter(
|
||||
string[] passthrough,
|
||||
bool debugOverlay)
|
||||
{
|
||||
// ------------------------------
|
||||
// OpenCV VideoCapture (stable)
|
||||
// ------------------------------
|
||||
using var capture = new VideoCapture(srcFileName);
|
||||
if (!capture.IsOpened())
|
||||
throw new Exception("Cannot open video");
|
||||
@ -34,59 +41,48 @@ public class TrackingSplitter(
|
||||
var fps = capture.Get(VideoCaptureProperties.Fps);
|
||||
var totalFrames = (int)(duration.TotalSeconds * fps);
|
||||
|
||||
if ( debugOverlay )
|
||||
{
|
||||
cropHeight = videoHeight;
|
||||
cropWidth = videoWidth;
|
||||
}
|
||||
var originalCropWidth = cropWidth;
|
||||
var originalCropHeight = cropHeight;
|
||||
|
||||
Console.WriteLine($"[FaceTracker] skip={skip}, duration={duration}, fps={fps}, totalFrames={totalFrames}");
|
||||
Console.WriteLine($"[TrackingSplitter] skip={skip}, duration={duration}, fps={fps}, totalFrames={totalFrames}");
|
||||
|
||||
// encoder size depends on mode
|
||||
var encWidth = debugOverlay ? videoWidth : originalCropWidth;
|
||||
var encHeight = debugOverlay ? videoHeight : originalCropHeight;
|
||||
|
||||
// ------------------------------
|
||||
// FFmpeg one-pass encoder
|
||||
// ------------------------------
|
||||
var ffmpeg = StartFfmpegNvenc(
|
||||
srcFileName,
|
||||
destFileName,
|
||||
cropWidth,
|
||||
cropHeight,
|
||||
encWidth,
|
||||
encHeight,
|
||||
fps,
|
||||
skip,
|
||||
passthrough);
|
||||
|
||||
using var stdin = ffmpeg.StandardInput.BaseStream;
|
||||
|
||||
// ------------------------------
|
||||
// Tracking state
|
||||
// ------------------------------
|
||||
var frame = new Mat();
|
||||
var frame = new Mat();
|
||||
var kalman = new KalmanTracker();
|
||||
kalman.Reset(new Point2f(videoWidth / 2f, videoHeight / 2f));
|
||||
|
||||
var lostFrames = 0;
|
||||
var wasLost = false;
|
||||
var reacquireBoostFrames = 20;
|
||||
var reacquireCounter = 0;
|
||||
var lostFrames = 0;
|
||||
var reacquireCounter = 0;
|
||||
|
||||
var cameraCenter = new Point2f(videoWidth / 2f, videoHeight / 2f);
|
||||
var startTime = DateTime.UtcNow;
|
||||
var startTime = DateTime.UtcNow;
|
||||
var state = TrackState.Tracking;
|
||||
|
||||
// ------------------------------
|
||||
// Main loop
|
||||
// ------------------------------
|
||||
for (var i = 0; i < totalFrames; i++)
|
||||
{
|
||||
if (!capture.Read(frame) || frame.Empty())
|
||||
break;
|
||||
|
||||
// Ensure continuous memory for detector
|
||||
Mat frameCont = frame.IsContinuous() ? frame : frame.Clone();
|
||||
|
||||
Rect? objectBox = null;
|
||||
Rect? objectBox = null;
|
||||
Point2f? objectCenter = null;
|
||||
|
||||
var objects = detector.DetectAll(frameCont, videoWidth, videoHeight); // list of (box, center)
|
||||
|
||||
var objects = detector.DetectAll(frameCont, videoWidth, videoHeight);
|
||||
var primary = SelectTrackedObject(objects, kalman.LastMeasurement);
|
||||
|
||||
if (primary.HasValue)
|
||||
@ -95,60 +91,95 @@ public class TrackingSplitter(
|
||||
objectBox = primary.Value.box;
|
||||
}
|
||||
|
||||
bool isLost = !objectCenter.HasValue;
|
||||
|
||||
var isLost = !objectCenter.HasValue;
|
||||
|
||||
// LOST OBJECT → drift toward center
|
||||
// ------------------------------
|
||||
// LOST / REACQUIRE STATE MACHINE
|
||||
// ------------------------------
|
||||
if (isLost)
|
||||
{
|
||||
lostFrames++;
|
||||
|
||||
var fallbackCenter = new Point2f(videoWidth / 2f, videoHeight / 2f);
|
||||
var predicted = kalman.Update(null);
|
||||
|
||||
var t = Math.Min(1f, lostFrames / 60f);
|
||||
var ease = 0.02f * t;
|
||||
|
||||
objectCenter = new Point2f(
|
||||
predicted.X * (1 - ease) + fallbackCenter.X * ease,
|
||||
predicted.Y * (1 - ease) + fallbackCenter.Y * ease);
|
||||
if (lostFrames <= LostFreezeFrames)
|
||||
{
|
||||
// 1) LOST_FREEZE: freeze camera
|
||||
state = TrackState.LostFreeze;
|
||||
objectCenter = null; // Kalman predicts but camera won't move
|
||||
}
|
||||
else
|
||||
{
|
||||
// 2) LOST_DRIFT: drift camera to center
|
||||
state = TrackState.LostDrift;
|
||||
objectCenter = new Point2f(videoWidth / 2f, videoHeight / 2f);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (wasLost)
|
||||
reacquireCounter = reacquireBoostFrames;
|
||||
|
||||
// Object reacquired
|
||||
state = TrackState.Tracking;
|
||||
lostFrames = 0;
|
||||
}
|
||||
|
||||
// SMOOTH REACQUISITION
|
||||
if (reacquireCounter > 0)
|
||||
// ------------------------------
|
||||
// KALMAN UPDATE
|
||||
// ------------------------------
|
||||
Point2f smoothedCenter;
|
||||
|
||||
if (state == TrackState.Tracking)
|
||||
{
|
||||
var alpha = reacquireCounter / (float)reacquireBoostFrames;
|
||||
var noise = 5e-2f + (1e-1f - 5e-2f) * (1 - alpha);
|
||||
kalman.SetMeasurementNoise(noise);
|
||||
reacquireCounter--;
|
||||
smoothedCenter = kalman.Update(objectCenter);
|
||||
|
||||
// Normal camera easing
|
||||
float easing = 0.015f; // faster tracking
|
||||
cameraCenter = new Point2f(
|
||||
cameraCenter.X + (smoothedCenter.X - cameraCenter.X) * easing,
|
||||
cameraCenter.Y + (smoothedCenter.Y - cameraCenter.Y) * easing);
|
||||
}
|
||||
else
|
||||
else if (state == TrackState.LostFreeze)
|
||||
{
|
||||
kalman.SetMeasurementNoise(1e-1f);
|
||||
// Freeze camera — do nothing
|
||||
smoothedCenter = kalman.LastMeasurement ?? new Point2f(0,0);
|
||||
}
|
||||
else // LOST_DRIFT
|
||||
{
|
||||
smoothedCenter = kalman.Update(objectCenter);
|
||||
|
||||
wasLost = isLost;
|
||||
// Drift camera slowly to center
|
||||
float driftEasing = 0.01f;
|
||||
var fallbackCenter = new Point2f(videoWidth / 2f, videoHeight / 2f);
|
||||
|
||||
var smoothedCenter = kalman.Update(objectCenter);
|
||||
|
||||
var halfW = cropWidth / 2f;
|
||||
var halfH = cropHeight / 2f;
|
||||
cameraCenter = new Point2f(
|
||||
cameraCenter.X + (fallbackCenter.X - cameraCenter.X) * driftEasing,
|
||||
cameraCenter.Y + (fallbackCenter.Y - cameraCenter.Y) * driftEasing);
|
||||
}
|
||||
var halfW = originalCropWidth / 2f;
|
||||
var halfH = originalCropHeight / 2f;
|
||||
|
||||
smoothedCenter.X = Math.Clamp(smoothedCenter.X, halfW, videoWidth - halfW);
|
||||
smoothedCenter.Y = Math.Clamp(smoothedCenter.Y, halfH, videoHeight - halfH);
|
||||
|
||||
// CAMERA EASING
|
||||
var easing = 0.003f;
|
||||
cameraCenter = new Point2f(
|
||||
cameraCenter.X + (smoothedCenter.X - cameraCenter.X) * easing,
|
||||
cameraCenter.Y + (smoothedCenter.Y - cameraCenter.Y) * easing);
|
||||
if (state == TrackState.Tracking)
|
||||
{
|
||||
// Normal tracking
|
||||
smoothedCenter = kalman.Update(objectCenter);
|
||||
|
||||
cameraCenter = new Point2f(
|
||||
cameraCenter.X + (smoothedCenter.X - cameraCenter.X) * CameraEasing,
|
||||
cameraCenter.Y + (smoothedCenter.Y - cameraCenter.Y) * CameraEasing);
|
||||
}
|
||||
else if (state == TrackState.LostFreeze)
|
||||
{
|
||||
// Freeze camera — do nothing
|
||||
}
|
||||
else if (state == TrackState.LostDrift)
|
||||
{
|
||||
// Drift camera slowly to center
|
||||
var fallbackCenter = new Point2f(videoWidth / 2f, videoHeight / 2f);
|
||||
|
||||
cameraCenter = new Point2f(
|
||||
cameraCenter.X + (fallbackCenter.X - cameraCenter.X) * 0.01f,
|
||||
cameraCenter.Y + (fallbackCenter.Y - cameraCenter.Y) * 0.01f);
|
||||
}
|
||||
|
||||
cameraCenter.X = Math.Clamp(cameraCenter.X, halfW, videoWidth - halfW);
|
||||
cameraCenter.Y = Math.Clamp(cameraCenter.Y, halfH, videoHeight - halfH);
|
||||
@ -156,13 +187,14 @@ public class TrackingSplitter(
|
||||
var x = (int)Math.Round(cameraCenter.X - halfW);
|
||||
var y = (int)Math.Round(cameraCenter.Y - halfH);
|
||||
|
||||
x = Math.Clamp(x, 0, videoWidth - cropWidth);
|
||||
y = Math.Clamp(y, 0, videoHeight - cropHeight);
|
||||
x = Math.Clamp(x, 0, videoWidth - originalCropWidth);
|
||||
y = Math.Clamp(y, 0, videoHeight - originalCropHeight);
|
||||
|
||||
var roi = new Rect(x, y, cropWidth, cropHeight);
|
||||
var roi = new Rect(x, y, originalCropWidth, originalCropHeight);
|
||||
|
||||
if (debugOverlay)
|
||||
{
|
||||
// overlays always drawn on frameCont
|
||||
if (objectBox.HasValue)
|
||||
{
|
||||
var fb = objectBox.Value;
|
||||
@ -177,31 +209,48 @@ public class TrackingSplitter(
|
||||
|
||||
Cv2.Rectangle(frameCont, roi,
|
||||
objectCenter.HasValue ? Scalar.Yellow : Scalar.Red, 3);
|
||||
|
||||
DrawText(frameCont, $"Faces: {objects.Count}", 20, 40, Scalar.White);
|
||||
DrawText(frameCont, $"LostFrames: {lostFrames}", 20, 70, Scalar.White);
|
||||
DrawText(frameCont, $"Reacquire: {reacquireCounter}", 20, 100, Scalar.White);
|
||||
DrawText(frameCont, $"Noise: {kalman.CurrentNoise:F3}", 20, 130, Scalar.White);
|
||||
DrawText(frameCont, $"Camera: {cameraCenter.X:F1},{cameraCenter.Y:F1}", 20, 160, Scalar.White);
|
||||
}
|
||||
|
||||
// Crop ROI
|
||||
using var cropped = new Mat(frameCont, roi);
|
||||
if (debugOverlay)
|
||||
{
|
||||
// DEBUG MODE: write FULL FRAME with overlays
|
||||
var bgr = frameCont.IsContinuous() ? frameCont : frameCont.Clone();
|
||||
|
||||
// Always clone to ensure contiguous memory
|
||||
using var bgr = cropped.Clone();
|
||||
var bytes = bgr.Rows * bgr.Cols * bgr.ElemSize();
|
||||
var buffer = new byte[bytes];
|
||||
Marshal.Copy(bgr.Data, buffer, 0, bytes);
|
||||
stdin.Write(buffer, 0, bytes);
|
||||
|
||||
// Write to FFmpeg
|
||||
var bytes = bgr.Rows * bgr.Cols * bgr.ElemSize();
|
||||
var buffer = new byte[bytes];
|
||||
Marshal.Copy(bgr.Data, buffer, 0, bytes);
|
||||
stdin.Write(buffer, 0, bytes);
|
||||
if (!ReferenceEquals(bgr, frameCont))
|
||||
bgr.Dispose();
|
||||
}
|
||||
else
|
||||
{
|
||||
// PRODUCTION MODE: actual crop
|
||||
using var cropped = new Mat(frameCont, roi);
|
||||
using var bgr = cropped.Clone();
|
||||
|
||||
var bytes = bgr.Rows * bgr.Cols * bgr.ElemSize();
|
||||
var buffer = new byte[bytes];
|
||||
Marshal.Copy(bgr.Data, buffer, 0, bytes);
|
||||
stdin.Write(buffer, 0, bytes);
|
||||
}
|
||||
|
||||
// Dispose frameCont only if it was a clone
|
||||
if (!ReferenceEquals(frameCont, frame))
|
||||
frameCont.Dispose();
|
||||
|
||||
// Progress
|
||||
var elapsed = DateTime.UtcNow - startTime;
|
||||
var progress = (double)i / totalFrames;
|
||||
var speed = i > 0 ? i / elapsed.TotalSeconds : 0.0;
|
||||
var elapsed = DateTime.UtcNow - startTime;
|
||||
var progress = (double)i / totalFrames;
|
||||
var speed = i > 0 ? i / elapsed.TotalSeconds : 0.0;
|
||||
var remainingFrames = totalFrames - i;
|
||||
var etaSeconds = speed > 0 ? remainingFrames / speed : 0;
|
||||
var eta = TimeSpan.FromSeconds(etaSeconds);
|
||||
var etaSeconds = speed > 0 ? remainingFrames / speed : 0;
|
||||
var eta = TimeSpan.FromSeconds(etaSeconds);
|
||||
|
||||
DrawProgress(progress, eta, speed);
|
||||
}
|
||||
@ -223,13 +272,11 @@ public class TrackingSplitter(
|
||||
|
||||
if (!previousCenter.HasValue)
|
||||
{
|
||||
// no previous face → pick largest
|
||||
return foundObjects
|
||||
.OrderByDescending(f => f.box.Width * f.box.Height)
|
||||
.First();
|
||||
}
|
||||
|
||||
// pick the object closest to previous center
|
||||
return foundObjects
|
||||
.OrderBy(f =>
|
||||
{
|
||||
@ -249,57 +296,33 @@ public class TrackingSplitter(
|
||||
TimeSpan skip,
|
||||
string[] passthrough)
|
||||
{
|
||||
var pass = passthrough.Length > 0 ? string.Join(" ", passthrough) : "";
|
||||
var pass = passthrough.Length > 0 ? string.Join(" ", passthrough) : "";
|
||||
var skipSeconds = skip.TotalSeconds.ToString("0.###", System.Globalization.CultureInfo.InvariantCulture);
|
||||
var fpsStr = fps.ToString("0.###", System.Globalization.CultureInfo.InvariantCulture);
|
||||
var fpsStr = fps.ToString("0.###", System.Globalization.CultureInfo.InvariantCulture);
|
||||
|
||||
// One-pass pipeline:
|
||||
// - rawvideo from stdin
|
||||
// - audio from source MP4 (seeked)
|
||||
// - NVENC video encode
|
||||
// - AAC audio copy/encode
|
||||
//
|
||||
// This is the same structure your original OpenCV pipeline used.
|
||||
//
|
||||
// IMPORTANT:
|
||||
// Because OpenCV reliably reads the full segment,
|
||||
// FFmpeg will NOT close stdin early anymore.
|
||||
//
|
||||
var args =
|
||||
"-y " +
|
||||
// VIDEO INPUT (raw BGR24 from stdin)
|
||||
$"-f rawvideo -pix_fmt bgr24 -s {width}x{height} -r {fpsStr} -i - " +
|
||||
|
||||
// AUDIO INPUT (seeked)
|
||||
$"-ss {skipSeconds} -i \"{srcFileName}\" " +
|
||||
|
||||
// MAP streams
|
||||
"-map 0:v:0 -map 1:a:0? -shortest " +
|
||||
|
||||
// VIDEO ENCODE
|
||||
"-c:v h264_nvenc -preset p4 -b:v 8M -pix_fmt yuv420p " +
|
||||
|
||||
// AUDIO ENCODE/COPY
|
||||
"-c:a aac -b:a 192k " +
|
||||
|
||||
// Extra passthrough flags
|
||||
pass + $" \"{destFileName}\"";
|
||||
"-y " +
|
||||
$"-f rawvideo -pix_fmt bgr24 -s {width}x{height} -r {fpsStr} -i - " +
|
||||
$"-ss {skipSeconds} -i \"{srcFileName}\" " +
|
||||
"-map 0:v:0 -map 1:a:0? -shortest " +
|
||||
"-c:v h264_nvenc -preset p4 -b:v 8M -pix_fmt yuv420p " +
|
||||
"-c:a aac -b:a 192k " +
|
||||
pass + $" \"{destFileName}\"";
|
||||
|
||||
var psi = new ProcessStartInfo
|
||||
{
|
||||
FileName = "ffmpeg",
|
||||
Arguments = args,
|
||||
RedirectStandardInput = true,
|
||||
RedirectStandardError = true,
|
||||
FileName = "ffmpeg",
|
||||
Arguments = args,
|
||||
RedirectStandardInput = true,
|
||||
RedirectStandardError = true,
|
||||
RedirectStandardOutput = true,
|
||||
UseShellExecute = false,
|
||||
CreateNoWindow = true
|
||||
UseShellExecute = false,
|
||||
CreateNoWindow = true
|
||||
};
|
||||
|
||||
var process = new Process { StartInfo = psi };
|
||||
process.Start();
|
||||
|
||||
// async stderr reader
|
||||
_ = Task.Run(() =>
|
||||
{
|
||||
try
|
||||
@ -314,4 +337,10 @@ public class TrackingSplitter(
|
||||
return process;
|
||||
}
|
||||
|
||||
void DrawText(Mat img, string text, int x, int y, Scalar color)
|
||||
{
|
||||
Cv2.PutText(img, text, new Point(x, y),
|
||||
HersheyFonts.HersheySimplex, 0.6, color, 2);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -81,7 +81,7 @@ public sealed class YoloOnnxObjectDetector : LoggingBase, IObjectDetector, IDisp
|
||||
h = Math.Clamp(h, 1, frameCont.Height - y);
|
||||
|
||||
// Ignore detections starting in the lower 1/3 of the frame
|
||||
if (y > frameCont.Height * (2f / 3f))
|
||||
if (y > frameCont.Height * (0.5f))
|
||||
continue;
|
||||
|
||||
var rect = new Rect(x, y, w, h);
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user