diff --git a/.gitignore b/.gitignore index db32398..205d9ee 100644 --- a/.gitignore +++ b/.gitignore @@ -363,6 +363,4 @@ MigrationBackup/ FodyWeavers.xsd # OpenCV models -*.onnx -*.bin -*.param +models/*.* diff --git a/CommandLine.cs b/CommandLine.cs new file mode 100644 index 0000000..50a7720 --- /dev/null +++ b/CommandLine.cs @@ -0,0 +1,219 @@ +using System; +using System.Collections.Generic; +using System.Globalization; +using System.Text; + +namespace splitter; + +public sealed class CommandLine +{ + public string InputFile { get; private init; } + public string OutputFolder { get; private init; } + public (int width, int height)? Crop { get; private init; } + public string? Mask { get; private init; } + public bool Debug { get; private init; } + public string? Detect { get; private init; } + public double? OverrideTargetDuration { get; private init; } + public string[] Passthrough { get; private init; } = Array.Empty(); + public bool PlainText { get; private init; } + public bool EstimateOnly { get; private init; } + public bool ForceFixed { get; private init; } + + public bool IsValid => !string.IsNullOrEmpty(InputFile) && !string.IsNullOrEmpty(OutputFolder); + + public CommandLine(string[] args) + { + InputFile = ""; + OutputFolder = ""; + + if (args.Length == 0 || args.Contains("--help")) + { + PrintHelp(); + return; + } + + // Extract passthrough parameters after "--" + var passthroughIndex = Array.IndexOf(args, "--"); + + if (passthroughIndex >= 0) + { + if (passthroughIndex < args.Length - 1) + Passthrough = args.Skip(passthroughIndex + 1).ToArray(); + + args = args.Take(passthroughIndex).ToArray(); + } + + if (args.Length < 2) + { + Console.WriteLine("Missing required parameters."); + PrintHelp(); + return; + } + + InputFile = args[0]; + OutputFolder = args[1]; + + foreach (var arg in args.Skip(2)) + { + if (arg.StartsWith("--mask=")) + { + Mask = arg.Substring("--mask=".Length); + } + else if (arg.StartsWith("--detect=")) + { + Detect = arg.Substring("--detect=".Length).ToLowerInvariant(); + } + else if (arg.StartsWith("--crop=")) + { + Crop = ParseCrop(arg.Substring("--crop=".Length)); + } + else if (arg == "--crop") + { + Crop = ParseCrop(""); + } + else if (arg == "--text") + { + PlainText = true; + } + else if (arg == "--debug") + { + Debug = true; + } + else if (arg.StartsWith("--duration=")) + { + var dur = arg.Substring("--duration=".Length); + OverrideTargetDuration = ParseDuration(dur); + if (OverrideTargetDuration <= 0) + { + Console.WriteLine($"Invalid --duration value: {dur}"); + return; + } + } + else if (arg == "--estimate") + { + EstimateOnly = true; + } + else if (arg == "--force") + { + ForceFixed = true; + } + } + } + + private static (int width, int height)? ParseCrop(string v) + { + // Default vertical Full HD for YouTube Shorts + const int defaultW = 607; + const int defaultH = 1080; + + // Empty or whitespace → default crop + if (string.IsNullOrWhiteSpace(v)) + return (defaultW, defaultH); + + var s = v.Trim().ToLowerInvariant(); + + // Expected format: "WWWxHHH" + var parts = s.Split('x'); + if (parts.Length != 2) + return null; + + var okW = int.TryParse(parts[0], out var w); + var okH = int.TryParse(parts[1], out var h); + + if (!okW || !okH || w <= 0 || h <= 0) + return null; + + return (w, h); + } + + static double ParseDuration(string text) + { + text = text.Trim().ToLowerInvariant(); + + // Case 1: pure number to seconds + if (double.TryParse(text, NumberStyles.Any, CultureInfo.InvariantCulture, out var sec)) + return sec; + + // Case 2: Ns (seconds) + if (text.EndsWith("s") && double.TryParse(text[..^1], out sec)) + return sec; + + // Case 3: NmMs (minutes + seconds) + // Examples: 2m30s, 1m5s, 10m0s + var mIndex = text.IndexOf('m'); + var sIndex = text.IndexOf('s'); + + if (mIndex > 0 && sIndex > mIndex) + { + var mPart = text[..mIndex]; + var sPart = text[(mIndex + 1)..sIndex]; + + if (double.TryParse(mPart, out var minutes) && + double.TryParse(sPart, out var seconds)) + { + return minutes * 60 + seconds; + } + } + + throw new FormatException($"Invalid duration format: {text}"); + } + public static void PrintHelp() + { + Console.WriteLine(@" +Usage: + splitter [options] [--] + +Options: + --mask= Output filename pattern. + Default: _Seg%03d.mp4 + Supports %03d or %d for segment index. + + --duration= Override target segment duration. + Accepted formats: + Ns - N seconds + NmMs - N minutes M seconds + N - N seconds (plain number) + + Examples: + --duration=90s + --duration=2m30s + --duration=45 + + Without --force: + Segments are equalized so all have same length. + + --force Use fixed segment duration exactly as given. + Last segment may be shorter. + Default: OFF + + --estimate Print calculated segment information and exit. + No splitting is performed. + + --crop[=] Crop video to width w and height h, with face tracking. + Useful to making YouTube Shorts or TikToks from horizontal video. + Default: 607x1080 (vertical video cropped from Full HD original) + + --detect= Object detector to use for tracking. + Values: face (UltraFace), body (YoloOnnx, default), none (no tracking, just a center) + + --text Display log in plain text. + + --debug Show debug overlay during face tracking. + +Passthrough: + Anything after -- is passed directly to ffmpeg. + +Examples: + splitter vertical-video.mp4 out/ + splitter vertical-video.mp4 out/ --duration=90s + splitter vertical-video.mp4 out/ --duration=2m30s --mask=""Part%03d.mp4"" + splitter vertical-video.mp4 out/ --estimate + splitter vertical-video.mp4 out/ --force --duration=45 -- -an -sn + splitter horizontal-video.mp4 out/ --crop + +Description: + Splits a video into equal or fixed-length segments using multi-threaded + ffmpeg execution. Supports ETA, speed, and rich progress display. +"); + } +} diff --git a/IObjectDetector.cs b/IObjectDetector.cs new file mode 100644 index 0000000..02458fd --- /dev/null +++ b/IObjectDetector.cs @@ -0,0 +1,8 @@ +using OpenCvSharp; + +namespace splitter; + +public interface IObjectDetector : IDisposable +{ + List<(Rect box, Point2f center)> DetectAll(Mat frameCont, int width, int height); +} \ No newline at end of file diff --git a/FaceKalmanTracker.cs b/KalmanTracker.cs similarity index 98% rename from FaceKalmanTracker.cs rename to KalmanTracker.cs index be428e3..0410858 100644 --- a/FaceKalmanTracker.cs +++ b/KalmanTracker.cs @@ -1,6 +1,6 @@ namespace splitter; -internal sealed class FaceKalmanTracker +internal sealed class KalmanTracker { // State vector: [x, y, vx, vy] private float[] _state = new float[4]; diff --git a/LoggingBase.cs b/LoggingBase.cs new file mode 100644 index 0000000..04641e8 --- /dev/null +++ b/LoggingBase.cs @@ -0,0 +1,20 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace splitter; + +public class LoggingBase( + Action log, + Action drawProgress + ) +{ + protected Action Log = log; + protected Action DrawProgress = drawProgress; + + protected void LogInfo(string msg) => Log("[INFO]", ConsoleColor.Cyan, msg); + protected void LogSuccess(string msg) => Log("[ OK ]", ConsoleColor.Green, msg); + protected void LogWarn(string msg) => Log("[WARN]", ConsoleColor.Yellow, msg); + protected void LogError(string msg) => Log("[ERR ]", ConsoleColor.Red, msg); + +} diff --git a/Properties/launchSettings.json b/Properties/launchSettings.json index f593426..399c31d 100644 --- a/Properties/launchSettings.json +++ b/Properties/launchSettings.json @@ -2,7 +2,7 @@ "profiles": { "splitter": { "commandName": "Project", - "commandLineArgs": "\"C:\\Users\\uncls\\Pictures\\2026\\2026 - Secret Rule\\20260426_212004.mp4\" \"C:\\Users\\uncls\\Pictures\\2026\\2026 - Secret Rule\\Shorts\" --crop --debug --text" + "commandLineArgs": "\"C:\\Users\\uncls\\Pictures\\2026\\2026 - Secret Rule\\20260426_212004.mp4\" \"C:\\Users\\uncls\\Pictures\\2026\\2026 - Secret Rule\\Shorts\" --crop --detect=body --debug --text" } } } \ No newline at end of file diff --git a/Rect.cs b/Rect.cs deleted file mode 100644 index 59af3c9..0000000 --- a/Rect.cs +++ /dev/null @@ -1,17 +0,0 @@ -namespace splitter; - -public struct Rect -{ - public int X; - public int Y; - public int Width; - public int Height; - - public Rect(int x, int y, int w, int h) - { - X = x; - Y = y; - Width = w; - Height = h; - } -} diff --git a/FaceTracker.cs b/TrackingSplitter.cs similarity index 76% rename from FaceTracker.cs rename to TrackingSplitter.cs index 6adf2d5..b244b13 100644 --- a/FaceTracker.cs +++ b/TrackingSplitter.cs @@ -1,23 +1,18 @@ using System.Diagnostics; using System.Runtime.InteropServices; using OpenCvSharp; -using Cv = OpenCvSharp.Cv2; -using Mat = OpenCvSharp.Mat; -using CvPoint = OpenCvSharp.Point; -using CvRect = OpenCvSharp.Rect; namespace splitter; -public class FaceTracker +public class TrackingSplitter( + Action log, + Action drawProgress + ) : LoggingBase(log, drawProgress) { - public Action DrawProgress { get; init; } = (_, _, _) => { }; - - private static Rect ToCvRect(splitter.Rect r) - => new Rect(r.X, r.Y, r.Width, r.Height); - - public async Task TrackFaceAndExtract( + public async Task TrackAndExtract( string srcFileName, string destFileName, + IObjectDetector detector, TimeSpan skip, TimeSpan duration, int cropWidth, @@ -26,7 +21,7 @@ public class FaceTracker bool debugOverlay) { // ------------------------------ - // 1. OpenCV VideoCapture (stable) + // OpenCV VideoCapture (stable) // ------------------------------ using var capture = new VideoCapture(srcFileName); if (!capture.IsOpened()) @@ -34,22 +29,21 @@ public class FaceTracker capture.Set(VideoCaptureProperties.PosMsec, skip.TotalMilliseconds); - var videoWidth = (int)capture.Get(VideoCaptureProperties.FrameWidth); + var videoWidth = (int)capture.Get(VideoCaptureProperties.FrameWidth); var videoHeight = (int)capture.Get(VideoCaptureProperties.FrameHeight); - var fps = capture.Get(VideoCaptureProperties.Fps); + var fps = capture.Get(VideoCaptureProperties.Fps); var totalFrames = (int)(duration.TotalSeconds * fps); + if ( debugOverlay ) + { + cropHeight = videoHeight; + cropWidth = videoWidth; + } + Console.WriteLine($"[FaceTracker] skip={skip}, duration={duration}, fps={fps}, totalFrames={totalFrames}"); // ------------------------------ - // 2. UltraFaceDetector (new model) - // ------------------------------ - using var detector = new UltraFaceDetector( - binPath: "slim_320.bin", - paramPath: "slim_320.param"); - - // ------------------------------ - // 3. FFmpeg one-pass encoder + // FFmpeg one-pass encoder // ------------------------------ var ffmpeg = StartFfmpegNvenc( srcFileName, @@ -63,10 +57,10 @@ public class FaceTracker using var stdin = ffmpeg.StandardInput.BaseStream; // ------------------------------ - // 4. Tracking state + // Tracking state // ------------------------------ var frame = new Mat(); - var kalman = new FaceKalmanTracker(); + var kalman = new KalmanTracker(); kalman.Reset(new Point2f(videoWidth / 2f, videoHeight / 2f)); var lostFrames = 0; @@ -78,7 +72,7 @@ public class FaceTracker var startTime = DateTime.UtcNow; // ------------------------------ - // 5. Main loop + // Main loop // ------------------------------ for (var i = 0; i < totalFrames; i++) { @@ -88,28 +82,23 @@ public class FaceTracker // Ensure continuous memory for detector Mat frameCont = frame.IsContinuous() ? frame : frame.Clone(); - // Convert to byte[] for UltraFace - var bytesFull = frameCont.Rows * frameCont.Cols * frameCont.ElemSize(); - var bufferFull = new byte[bytesFull]; - Marshal.Copy(frameCont.Data, bufferFull, 0, bytesFull); + Rect? objectBox = null; + Point2f? objectCenter = null; - Rect? faceBox = null; - Point2f? faceCenter = null; + var objects = detector.DetectAll(frameCont, videoWidth, videoHeight); // list of (box, center) - var faces = detector.DetectAll(bufferFull, videoWidth, videoHeight); // list of (box, center) - - var primary = SelectTrackedFace(faces, kalman.LastMeasurement); + var primary = SelectTrackedObject(objects, kalman.LastMeasurement); if (primary.HasValue) { - faceCenter = primary.Value.center; - faceBox = primary.Value.box; + objectCenter = primary.Value.center; + objectBox = primary.Value.box; } - var isLost = !faceCenter.HasValue; + var isLost = !objectCenter.HasValue; - // LOST FACE → drift toward center + // LOST OBJECT → drift toward center if (isLost) { lostFrames++; @@ -120,7 +109,7 @@ public class FaceTracker var t = Math.Min(1f, lostFrames / 60f); var ease = 0.02f * t; - faceCenter = new Point2f( + objectCenter = new Point2f( predicted.X * (1 - ease) + fallbackCenter.X * ease, predicted.Y * (1 - ease) + fallbackCenter.Y * ease); } @@ -147,7 +136,7 @@ public class FaceTracker wasLost = isLost; - var smoothedCenter = kalman.Update(faceCenter); + var smoothedCenter = kalman.Update(objectCenter); var halfW = cropWidth / 2f; var halfH = cropHeight / 2f; @@ -170,24 +159,24 @@ public class FaceTracker x = Math.Clamp(x, 0, videoWidth - cropWidth); y = Math.Clamp(y, 0, videoHeight - cropHeight); - var roi = new CvRect(x, y, cropWidth, cropHeight); - + var roi = new Rect(x, y, cropWidth, cropHeight); + if (debugOverlay) { - if (faceBox.HasValue) + if (objectBox.HasValue) { - var fb = faceBox.Value; - Cv.Rectangle(frameCont, - new OpenCvSharp.Rect(fb.X, fb.Y, fb.Width, fb.Height), + var fb = objectBox.Value; + Cv2.Rectangle(frameCont, + new Rect(fb.X, fb.Y, fb.Width, fb.Height), Scalar.LimeGreen, 2); } - Cv.Circle(frameCont, - new CvPoint((int)smoothedCenter.X, (int)smoothedCenter.Y), + Cv2.Circle(frameCont, + new Point((int)smoothedCenter.X, (int)smoothedCenter.Y), 6, Scalar.LimeGreen, -1); - Cv.Rectangle(frameCont, roi, - faceCenter.HasValue ? Scalar.Yellow : Scalar.Red, 3); + Cv2.Rectangle(frameCont, roi, + objectCenter.HasValue ? Scalar.Yellow : Scalar.Red, 3); } // Crop ROI @@ -225,23 +214,23 @@ public class FaceTracker throw new Exception("FFmpeg NVENC encoding failed"); } - private (Rect box, Point2f center)? SelectTrackedFace( - List<(Rect box, Point2f center)> faces, + private (Rect box, Point2f center)? SelectTrackedObject( + List<(Rect box, Point2f center)> foundObjects, Point2f? previousCenter) { - if (faces == null || faces.Count == 0) + if (foundObjects == null || foundObjects.Count == 0) return null; if (!previousCenter.HasValue) { // no previous face → pick largest - return faces + return foundObjects .OrderByDescending(f => f.box.Width * f.box.Height) .First(); } - // pick the face closest to previous center - return faces + // pick the object closest to previous center + return foundObjects .OrderBy(f => { var dx = f.center.X - previousCenter.Value.X; diff --git a/UltraFaceDetector.cs b/UltraFaceDetector.cs index 8766582..3f4420e 100644 --- a/UltraFaceDetector.cs +++ b/UltraFaceDetector.cs @@ -1,87 +1,40 @@ -using NcnnDotNet; +using System.Runtime.InteropServices; +using NcnnDotNet.Layers; +using OpenCvSharp; using UltraFaceDotNet; namespace splitter; -public sealed class UltraFaceDetector : IDisposable +public sealed class UltraFaceDetector: LoggingBase, IDisposable, IObjectDetector { private readonly UltraFace _ultraFace; - public UltraFaceDetector(string binPath, string paramPath) + public UltraFaceDetector( + Action log, + Action drawProgress + ) : base(log, drawProgress) { + var basePath = AppDomain.CurrentDomain.BaseDirectory; var param = new UltraFaceParameter { - BinFilePath = binPath, - ParamFilePath = paramPath, - InputWidth = 320, - InputLength = 240, - NumThread = 1, + BinFilePath = Path.Combine(basePath, "models", "slim_320.bin"), + ParamFilePath = Path.Combine(basePath, "models", "slim_320.param"), + InputWidth = 320, + InputLength = 240, + NumThread = 1, ScoreThreshold = 0.7f }; _ultraFace = UltraFace.Create(param); } - public (Rect box, Point2f center)? Detect(byte[] bgr, int width, int height) + public List<(Rect box, Point2f center)> DetectAll(Mat frameCont, int width, int height) { - if (bgr == null || bgr.Length == 0) - return null; + // Convert to byte[] for UltraFace + var bytesFull = frameCont.Rows * frameCont.Cols * frameCont.ElemSize(); + var bgr = new byte[bytesFull]; + Marshal.Copy(frameCont.Data, bgr, 0, bytesFull); - // bgr is contiguous BGR24: width * height * 3 - unsafe - { - fixed (byte* p = bgr) - { - using var mat = Mat.FromPixels( - (IntPtr)p, - PixelType.Bgr, // BGR24 input - width, - height); - - var faces = _ultraFace.Detect(mat); - if (faces == null) - return null; - - FaceInfo best = default; - bool hasBest = false; - - foreach (var f in faces) - { - if (!hasBest || f.Score > best.Score) - { - best = f; - hasBest = true; - } - } - - if (!hasBest) - return null; - - int x1 = (int)best.X1; - int y1 = (int)best.Y1; - int x2 = (int)best.X2; - int y2 = (int)best.Y2; - - var rect = new Rect( - x1, - y1, - x2 - x1, - y2 - y1); - - if (rect.Width <= 0 || rect.Height <= 0) - return null; - - var center = new Point2f( - rect.X + rect.Width / 2f, - rect.Y + rect.Height / 2f); - - return (rect, center); - } - } - } - - public List<(Rect box, Point2f center)> DetectAll(byte[] bgr, int width, int height) - { var results = new List<(Rect box, Point2f center)>(); if (bgr == null || bgr.Length == 0) @@ -91,9 +44,9 @@ public sealed class UltraFaceDetector : IDisposable { fixed (byte* p = bgr) { - using var mat = Mat.FromPixels( + using var mat = NcnnDotNet.Mat.FromPixels( (IntPtr)p, - PixelType.Bgr, // BGR24 input + NcnnDotNet.PixelType.Bgr, // BGR24 input width, height); diff --git a/YoloOnnxObjectDetector.cs b/YoloOnnxObjectDetector.cs new file mode 100644 index 0000000..7f86587 --- /dev/null +++ b/YoloOnnxObjectDetector.cs @@ -0,0 +1,228 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using Microsoft.ML.OnnxRuntime; +using Microsoft.ML.OnnxRuntime.Tensors; +using OpenCvSharp; + +namespace splitter; + +public sealed class YoloOnnxObjectDetector : LoggingBase, IObjectDetector, IDisposable +{ + private readonly InferenceSession _session; + private readonly string _inputName; + private readonly string _outputName; + + private const int _inputWidth = 640; + private const int _inputHeight = 640; + private const float _scoreThreshold = 0.35f; + private const float _nmsThreshold = 0.45f; + private const int _personClassIndex = 0; + + public YoloOnnxObjectDetector( + Action log, + Action drawProgress + ) : base(log, drawProgress) + { + var options = new SessionOptions(); +// options.AppendExecutionProvider_CPU(); + options.AppendExecutionProvider_DML(); + + var basePath = AppDomain.CurrentDomain.BaseDirectory; + var modelPath = System.IO.Path.Combine(basePath, "models", "yolov8n.onnx"); + + _session = new InferenceSession(modelPath, options); + + _inputName = _session.InputMetadata.Keys.First(); + _outputName = _session.OutputMetadata.Keys.First(); + + foreach (var kv in _session.OutputMetadata) + LogInfo($"[YoloOnnx] {kv.Key}: {string.Join(",", kv.Value.Dimensions)} {kv.Value.ElementType}"); + } + + public List<(Rect box, Point2f center)> DetectAll(Mat frameCont, int width, int height) + { + if (frameCont.Empty()) + return new List<(Rect, Point2f)>(); + + using var resized = frameCont.Resize(new Size(_inputWidth, _inputHeight)); + using var rgb = resized.CvtColor(ColorConversionCodes.BGR2RGB); + + var inputTensor = CreateInputTensor(rgb); + + using var results = _session.Run(new[] + { + NamedOnnxValue.CreateFromTensor(_inputName, inputTensor) + }); + + var output = results.First(r => r.Name == _outputName).AsTensor(); + + var detections = ParseYoloV8( + output, + frameCont.Width, + frameCont.Height, + _scoreThreshold, + _personClassIndex); + + var final = ApplyNms(detections, _nmsThreshold); + + var list = new List<(Rect, Point2f)>(final.Count); + + foreach (var d in final) + { + int x = (int)d.X; + int y = (int)d.Y; + int w = (int)d.Width; + int h = (int)d.Height; + + x = Math.Clamp(x, 0, frameCont.Width - 1); + y = Math.Clamp(y, 0, frameCont.Height - 1); + w = Math.Clamp(w, 1, frameCont.Width - x); + h = Math.Clamp(h, 1, frameCont.Height - y); + + // Ignore detections starting in the lower 1/3 of the frame + if (y > frameCont.Height * (2f / 3f)) + continue; + + var rect = new Rect(x, y, w, h); + var center = new Point2f(x + w / 2f, y + h / 2f); + + list.Add((rect, center)); + } + + return list; + } + + private static DenseTensor CreateInputTensor(Mat rgb) + { + int height = rgb.Rows; + int width = rgb.Cols; + + var tensor = new DenseTensor(new[] { 1, 3, height, width }); + + unsafe + { + for (int y = 0; y < height; y++) + { + byte* row = (byte*)rgb.Ptr(y).ToPointer(); + + for (int x = 0; x < width; x++) + { + int idx = x * 3; + + tensor[0, 0, y, x] = row[idx + 0] / 255f; + tensor[0, 1, y, x] = row[idx + 1] / 255f; + tensor[0, 2, y, x] = row[idx + 2] / 255f; + } + } + } + + return tensor; + } + + private sealed class Detection + { + public float X; + public float Y; + public float Width; + public float Height; + public float Score; + } + + // ----------------------------- + // CORRECT YOLOv8 PARSER + // ----------------------------- + private static List ParseYoloV8( + Tensor output, + int originalWidth, + int originalHeight, + float scoreThreshold, + int classIndex) + { + // YOLOv8 output: [1, 84, 8400] + int channels = output.Dimensions[1]; // 84 + int count = output.Dimensions[2]; // 8400 + + float xScale = (float)originalWidth / 640f; + float yScale = (float)originalHeight / 640f; + + var detections = new List(); + + for (int i = 0; i < count; i++) + { + float x = output[0, 0, i]; + float y = output[0, 1, i]; + float w = output[0, 2, i]; + float h = output[0, 3, i]; + + float classScore = output[0, 4 + classIndex, i]; + if (classScore < scoreThreshold) + continue; + + float left = (x - w / 2f) * xScale; + float top = (y - h / 2f) * yScale; + float width = w * xScale; + float height = h * yScale; + + detections.Add(new Detection + { + X = left, + Y = top, + Width = width, + Height = height, + Score = classScore + }); + } + + return detections; + } + + private static List ApplyNms(List detections, float nmsThreshold) + { + if (detections.Count == 0) + return detections; + + var ordered = detections.OrderByDescending(d => d.Score).ToList(); + var result = new List(); + + while (ordered.Count > 0) + { + var best = ordered[0]; + result.Add(best); + ordered.RemoveAt(0); + + for (int i = ordered.Count - 1; i >= 0; i--) + { + if (IoU(best, ordered[i]) >= nmsThreshold) + ordered.RemoveAt(i); + } + } + + return result; + } + + private static float IoU(Detection a, Detection b) + { + float x1 = MathF.Max(a.X, b.X); + float y1 = MathF.Max(a.Y, b.Y); + float x2 = MathF.Min(a.X + a.Width, b.X + b.Width); + float y2 = MathF.Min(a.Y + a.Height, b.Y + b.Height); + + float interW = MathF.Max(0, x2 - x1); + float interH = MathF.Max(0, y2 - y1); + float interArea = interW * interH; + + float areaA = a.Width * a.Height; + float areaB = b.Width * b.Height; + + float union = areaA + areaB - interArea; + if (union <= 0) return 0f; + + return interArea / union; + } + + public void Dispose() + { + _session?.Dispose(); + } +} diff --git a/splitter.cs b/splitter.cs index 627a736..50e21a3 100644 --- a/splitter.cs +++ b/splitter.cs @@ -3,94 +3,28 @@ using System.Globalization; using System.Text; using splitter; -class Program +static class Program { - static int logLines = 0; - static bool plainText = false; - static readonly object consoleLock = new(); - static bool progressRunning = true; + static int _logLines = 0; + static bool _plainText = false; + static readonly object _consoleLock = new(); + static bool _progressRunning = true; static void Main(string[] args) { - double? overrideTargetDuration = null; - var estimateOnly = false; - var forceFixed = false; + var cmd = new CommandLine(args); - - Console.OutputEncoding = Encoding.UTF8; - - if (args.Length == 0 || args.Contains("--help")) - { - PrintHelp(); - return; - } - - // Extract passthrough parameters after "--" - var passthrough = Array.Empty(); - var passthroughIndex = Array.IndexOf(args, "--"); - - if (passthroughIndex >= 0) - { - if (passthroughIndex < args.Length - 1) - passthrough = args.Skip(passthroughIndex + 1).ToArray(); - - args = args.Take(passthroughIndex).ToArray(); - } - - if (args.Length < 2) - { - LogError("Missing required parameters."); - PrintHelp(); - return; - } - - var inputFile = args[0]; - var outputFolder = args[1]; - (int width, int height)? crop = null; - string? mask = null; - var debug = false; - - foreach (var arg in args.Skip(2)) - { - if (arg.StartsWith("--mask=")) - { - mask = arg.Substring("--mask=".Length); - } - else if (arg.StartsWith("--crop=")) - { - crop = ParseCrop(arg.Substring("--crop=".Length)); - } - else if (arg == "--crop") - { - crop = ParseCrop(""); - } - else if (arg == "--text") - { - plainText = true; - } - else if (arg == "--debug") - { - debug = true; - } - else if (arg.StartsWith("--duration=")) - { - var dur = arg.Substring("--duration=".Length); - overrideTargetDuration = ParseDuration(dur); - if (overrideTargetDuration <= 0) - { - LogError($"Invalid --duration value: {dur}"); - return; - } - } - else if (arg == "--estimate") - { - estimateOnly = true; - } - else if (arg == "--force") - { - forceFixed = true; - } - } + var estimateOnly = cmd.EstimateOnly; + var forceFixed = cmd.ForceFixed; + var passthrough = cmd.Passthrough; + var inputFile = cmd.InputFile; + var outputFolder = cmd.OutputFolder; + (int width, int height)? crop = cmd.Crop; + string? mask = cmd.Mask; + var debug = cmd.Debug; + string? detect = cmd.Detect; + double? overrideTargetDuration = cmd.OverrideTargetDuration; + _plainText = cmd.PlainText; if (!File.Exists(inputFile)) { @@ -150,7 +84,7 @@ class Program if (crop != null) { LogInfo("Starting multi-threaded face tracking crop and splitting..."); - RunMultiThreadedCrop(inputFile, outputFolder, outputMask, duration, segments, segmentLength, passthrough, crop.Value.width, crop.Value.height, debug); + RunMultiThreadedCrop(inputFile, outputFolder, outputMask, duration, segments, segmentLength, passthrough, crop.Value.width, crop.Value.height, debug, detect); } else { @@ -159,40 +93,15 @@ class Program } LogSuccess("Done."); - progressRunning = false; + _progressRunning = false; // Move cursor below progress area - lock (consoleLock) + lock (_consoleLock) { - Console.SetCursorPosition(0, logLines + 4); + Console.SetCursorPosition(0, _logLines + 4); Console.WriteLine(); } } - private static (int width, int height)? ParseCrop(string v) - { - // Default vertical Full HD for YouTube Shorts - const int defaultW = 607; - const int defaultH = 1080; - - // Empty or whitespace → default crop - if (string.IsNullOrWhiteSpace(v)) - return (defaultW, defaultH); - - var s = v.Trim().ToLowerInvariant(); - - // Expected format: "WWWxHHH" - var parts = s.Split('x'); - if (parts.Length != 2) - return null; - - var okW = int.TryParse(parts[0], out var w); - var okH = int.TryParse(parts[1], out var h); - - if (!okW || !okH || w <= 0 || h <= 0) - return null; - - return (w, h); - } // ----------------------------- // Logging + Progress UI @@ -200,9 +109,9 @@ class Program static void Log(string prefix, ConsoleColor color, string msg) { - lock (consoleLock) + lock (_consoleLock) { - if (plainText) + if (_plainText) { Console.WriteLine($"{prefix} {msg}"); } @@ -211,7 +120,7 @@ class Program Console.ForegroundColor = color; Console.WriteLine($"{prefix} {msg}"); Console.ResetColor(); - logLines++; + _logLines++; } } } @@ -223,18 +132,18 @@ class Program static void DrawProgress(double progress, TimeSpan eta, double speed) { - if ( plainText ) + if ( _plainText ) return; - lock (consoleLock) + lock (_consoleLock) { var width = Math.Max(20, Console.WindowWidth - 20); var filled = (int)(progress * width); if (filled < 0) filled = 0; if (filled > width) filled = width; - var barLine = logLines + 1; - var infoLine = logLines + 2; + var barLine = _logLines + 1; + var infoLine = _logLines + 2; // Progress bar with 24-bit color (green) Console.SetCursorPosition(0, barLine); @@ -313,7 +222,7 @@ class Program // Progress thread var progressThread = new Thread(() => { - while (progressRunning) + while (_progressRunning) { var progress = segments == 0 ? 0 : (double)completed / segments; var processedSeconds = completed * segmentLength; @@ -347,7 +256,7 @@ class Program }); sw.Stop(); - progressRunning = false; + _progressRunning = false; progressThread.Join(); DrawProgress(1.0, TimeSpan.Zero, totalDuration / Math.Max(sw.Elapsed.TotalSeconds, 0.0001)); } @@ -378,7 +287,7 @@ class Program // Progress thread var progressThread = new Thread(() => { - while (progressRunning) + while (_progressRunning) { var progress = segments == 0 ? 0 : (double)completed / segments; var processedSeconds = completed * segmentLength; @@ -408,7 +317,7 @@ class Program } sw.Stop(); - progressRunning = false; + _progressRunning = false; progressThread.Join(); DrawProgress(1.0, TimeSpan.Zero, totalDuration / Math.Max(sw.Elapsed.TotalSeconds, 0.0001)); } @@ -426,12 +335,10 @@ class Program string[] passthrough, int width, int height, - bool showDebugOverlay) + bool showDebugOverlay, + string? detect) { - var tracker = new FaceTracker - { - DrawProgress = DrawProgress - }; + var tracker = new TrackingSplitter(Log, DrawProgress); var jobs = Enumerable.Range(0, segments) .Select(i => new @@ -446,12 +353,12 @@ class Program var completed = 0; var sw = Stopwatch.StartNew(); - progressRunning = true; + _progressRunning = true; // --- PROGRESS THREAD --- var progressThread = new Thread(() => { - while (progressRunning) + while (_progressRunning) { var progress = segments == 0 ? 0 : (double)completed / segments; var processedSeconds = completed * segmentLength; @@ -483,11 +390,18 @@ class Program async job => { var outputFile = BuildOutputFileName(outputFolder, outputMask, job.Index); + using IDisposable detector = detect switch + { + "face" => new UltraFaceDetector(Log, DrawProgress), + "body" => new YoloOnnxObjectDetector(Log, DrawProgress), + _ => throw new InvalidOperationException($"Unknown detector: {detect}") + }; // Run the face-tracking cropper - await tracker.TrackFaceAndExtract( + await tracker.TrackAndExtract( inputFile, outputFile, + (IObjectDetector)detector, TimeSpan.FromSeconds(job.Start), TimeSpan.FromSeconds(job.Length), width, @@ -500,7 +414,7 @@ class Program // --- CLEANUP --- sw.Stop(); - progressRunning = false; + _progressRunning = false; progressThread.Join(); var finalSpeed = duration / Math.Max(sw.Elapsed.TotalSeconds, 0.0001); @@ -551,97 +465,4 @@ class Program proc.StandardError.ReadToEnd(); // swallow output proc.WaitForExit(); } - - static double ParseDuration(string text) - { - text = text.Trim().ToLowerInvariant(); - - // Case 1: pure number to seconds - if (double.TryParse(text, NumberStyles.Any, CultureInfo.InvariantCulture, out var sec)) - return sec; - - // Case 2: Ns (seconds) - if (text.EndsWith("s") && double.TryParse(text[..^1], out sec)) - return sec; - - // Case 3: NmMs (minutes + seconds) - // Examples: 2m30s, 1m5s, 10m0s - var mIndex = text.IndexOf('m'); - var sIndex = text.IndexOf('s'); - - if (mIndex > 0 && sIndex > mIndex) - { - var mPart = text[..mIndex]; - var sPart = text[(mIndex + 1)..sIndex]; - - if (double.TryParse(mPart, out var minutes) && - double.TryParse(sPart, out var seconds)) - { - return minutes * 60 + seconds; - } - } - - throw new FormatException($"Invalid duration format: {text}"); - } - - // ----------------------------- - // Help - // ----------------------------- - - static void PrintHelp() - { - Console.WriteLine(@" -Usage: - splitter [options] [--] - -Options: - --mask= Output filename pattern. - Default: _Seg%03d.mp4 - Supports %03d or %d for segment index. - - --duration= Override target segment duration. - Accepted formats: - Ns - N seconds - NmMs - N minutes M seconds - N - N seconds (plain number) - - Examples: - --duration=90s - --duration=2m30s - --duration=45 - - Without --force: - Segments are equalized so all have same length. - - --force Use fixed segment duration exactly as given. - Last segment may be shorter. - Default: OFF - - --estimate Print calculated segment information and exit. - No splitting is performed. - - --crop[=] Crop video to width w and height h, with face tracking. - Useful to making YouTube Shorts or TikToks from horizontal video. - Default: 607x1080 (vertical video cropped from Full HD original) - - --text Display log in plain text. - - --debug Show debug overlay during face tracking. - -Passthrough: - Anything after -- is passed directly to ffmpeg. - -Examples: - splitter vertical-video.mp4 out/ - splitter vertical-video.mp4 out/ --duration=90s - splitter vertical-video.mp4 out/ --duration=2m30s --mask=""Part%03d.mp4"" - splitter vertical-video.mp4 out/ --estimate - splitter vertical-video.mp4 out/ --force --duration=45 -- -an -sn - splitter horizontal-video.mp4 out/ --crop - -Description: - Splits a video into equal or fixed-length segments using multi-threaded - ffmpeg execution. Supports ETA, speed, and rich progress display. -"); - } } diff --git a/splitter.csproj b/splitter.csproj index 195fc73..e33bf69 100644 --- a/splitter.csproj +++ b/splitter.csproj @@ -7,6 +7,8 @@ enable latest true + x64 + win-x64 @@ -32,16 +34,15 @@ - - PreserveNewest - - + PreserveNewest + +