Full human body detection added based on YOLOv8.

This commit is contained in:
Alexander Shabarshov 2026-05-10 12:36:37 +01:00
parent cddcd6ff6e
commit fd1dab02a9
12 changed files with 595 additions and 375 deletions

4
.gitignore vendored
View File

@ -363,6 +363,4 @@ MigrationBackup/
FodyWeavers.xsd
# OpenCV models
*.onnx
*.bin
*.param
models/*.*

219
CommandLine.cs Normal file
View File

@ -0,0 +1,219 @@
using System;
using System.Collections.Generic;
using System.Globalization;
using System.Text;
namespace splitter;
public sealed class CommandLine
{
public string InputFile { get; private init; }
public string OutputFolder { get; private init; }
public (int width, int height)? Crop { get; private init; }
public string? Mask { get; private init; }
public bool Debug { get; private init; }
public string? Detect { get; private init; }
public double? OverrideTargetDuration { get; private init; }
public string[] Passthrough { get; private init; } = Array.Empty<string>();
public bool PlainText { get; private init; }
public bool EstimateOnly { get; private init; }
public bool ForceFixed { get; private init; }
public bool IsValid => !string.IsNullOrEmpty(InputFile) && !string.IsNullOrEmpty(OutputFolder);
public CommandLine(string[] args)
{
InputFile = "";
OutputFolder = "";
if (args.Length == 0 || args.Contains("--help"))
{
PrintHelp();
return;
}
// Extract passthrough parameters after "--"
var passthroughIndex = Array.IndexOf(args, "--");
if (passthroughIndex >= 0)
{
if (passthroughIndex < args.Length - 1)
Passthrough = args.Skip(passthroughIndex + 1).ToArray();
args = args.Take(passthroughIndex).ToArray();
}
if (args.Length < 2)
{
Console.WriteLine("Missing required parameters.");
PrintHelp();
return;
}
InputFile = args[0];
OutputFolder = args[1];
foreach (var arg in args.Skip(2))
{
if (arg.StartsWith("--mask="))
{
Mask = arg.Substring("--mask=".Length);
}
else if (arg.StartsWith("--detect="))
{
Detect = arg.Substring("--detect=".Length).ToLowerInvariant();
}
else if (arg.StartsWith("--crop="))
{
Crop = ParseCrop(arg.Substring("--crop=".Length));
}
else if (arg == "--crop")
{
Crop = ParseCrop("");
}
else if (arg == "--text")
{
PlainText = true;
}
else if (arg == "--debug")
{
Debug = true;
}
else if (arg.StartsWith("--duration="))
{
var dur = arg.Substring("--duration=".Length);
OverrideTargetDuration = ParseDuration(dur);
if (OverrideTargetDuration <= 0)
{
Console.WriteLine($"Invalid --duration value: {dur}");
return;
}
}
else if (arg == "--estimate")
{
EstimateOnly = true;
}
else if (arg == "--force")
{
ForceFixed = true;
}
}
}
private static (int width, int height)? ParseCrop(string v)
{
// Default vertical Full HD for YouTube Shorts
const int defaultW = 607;
const int defaultH = 1080;
// Empty or whitespace → default crop
if (string.IsNullOrWhiteSpace(v))
return (defaultW, defaultH);
var s = v.Trim().ToLowerInvariant();
// Expected format: "WWWxHHH"
var parts = s.Split('x');
if (parts.Length != 2)
return null;
var okW = int.TryParse(parts[0], out var w);
var okH = int.TryParse(parts[1], out var h);
if (!okW || !okH || w <= 0 || h <= 0)
return null;
return (w, h);
}
static double ParseDuration(string text)
{
text = text.Trim().ToLowerInvariant();
// Case 1: pure number to seconds
if (double.TryParse(text, NumberStyles.Any, CultureInfo.InvariantCulture, out var sec))
return sec;
// Case 2: Ns (seconds)
if (text.EndsWith("s") && double.TryParse(text[..^1], out sec))
return sec;
// Case 3: NmMs (minutes + seconds)
// Examples: 2m30s, 1m5s, 10m0s
var mIndex = text.IndexOf('m');
var sIndex = text.IndexOf('s');
if (mIndex > 0 && sIndex > mIndex)
{
var mPart = text[..mIndex];
var sPart = text[(mIndex + 1)..sIndex];
if (double.TryParse(mPart, out var minutes) &&
double.TryParse(sPart, out var seconds))
{
return minutes * 60 + seconds;
}
}
throw new FormatException($"Invalid duration format: {text}");
}
public static void PrintHelp()
{
Console.WriteLine(@"
Usage:
splitter <input.mp4> <output_folder> [options] [--] <ffmpeg passthrough>
Options:
--mask=<pattern> Output filename pattern.
Default: <OriginalName>_Seg%03d.mp4
Supports %03d or %d for segment index.
--duration=<value> Override target segment duration.
Accepted formats:
Ns - N seconds
NmMs - N minutes M seconds
N - N seconds (plain number)
Examples:
--duration=90s
--duration=2m30s
--duration=45
Without --force:
Segments are equalized so all have same length.
--force Use fixed segment duration exactly as given.
Last segment may be shorter.
Default: OFF
--estimate Print calculated segment information and exit.
No splitting is performed.
--crop[=<w:h>] Crop video to width w and height h, with face tracking.
Useful to making YouTube Shorts or TikToks from horizontal video.
Default: 607x1080 (vertical video cropped from Full HD original)
--detect=<name> Object detector to use for tracking.
Values: face (UltraFace), body (YoloOnnx, default), none (no tracking, just a center)
--text Display log in plain text.
--debug Show debug overlay during face tracking.
Passthrough:
Anything after -- is passed directly to ffmpeg.
Examples:
splitter vertical-video.mp4 out/
splitter vertical-video.mp4 out/ --duration=90s
splitter vertical-video.mp4 out/ --duration=2m30s --mask=""Part%03d.mp4""
splitter vertical-video.mp4 out/ --estimate
splitter vertical-video.mp4 out/ --force --duration=45 -- -an -sn
splitter horizontal-video.mp4 out/ --crop
Description:
Splits a video into equal or fixed-length segments using multi-threaded
ffmpeg execution. Supports ETA, speed, and rich progress display.
");
}
}

8
IObjectDetector.cs Normal file
View File

@ -0,0 +1,8 @@
using OpenCvSharp;
namespace splitter;
public interface IObjectDetector : IDisposable
{
List<(Rect box, Point2f center)> DetectAll(Mat frameCont, int width, int height);
}

View File

@ -1,6 +1,6 @@
namespace splitter;
internal sealed class FaceKalmanTracker
internal sealed class KalmanTracker
{
// State vector: [x, y, vx, vy]
private float[] _state = new float[4];

20
LoggingBase.cs Normal file
View File

@ -0,0 +1,20 @@
using System;
using System.Collections.Generic;
using System.Text;
namespace splitter;
public class LoggingBase(
Action<string/*level*/, ConsoleColor /*color*/, string /*message*/> log,
Action<double /*percent*/, TimeSpan /*duration*/, double /*fps*/> drawProgress
)
{
protected Action<string/*level*/, ConsoleColor /*color*/, string /*message*/> Log = log;
protected Action<double /*percent*/, TimeSpan /*duration*/, double /*fps*/> DrawProgress = drawProgress;
protected void LogInfo(string msg) => Log("[INFO]", ConsoleColor.Cyan, msg);
protected void LogSuccess(string msg) => Log("[ OK ]", ConsoleColor.Green, msg);
protected void LogWarn(string msg) => Log("[WARN]", ConsoleColor.Yellow, msg);
protected void LogError(string msg) => Log("[ERR ]", ConsoleColor.Red, msg);
}

View File

@ -2,7 +2,7 @@
"profiles": {
"splitter": {
"commandName": "Project",
"commandLineArgs": "\"C:\\Users\\uncls\\Pictures\\2026\\2026 - Secret Rule\\20260426_212004.mp4\" \"C:\\Users\\uncls\\Pictures\\2026\\2026 - Secret Rule\\Shorts\" --crop --debug --text"
"commandLineArgs": "\"C:\\Users\\uncls\\Pictures\\2026\\2026 - Secret Rule\\20260426_212004.mp4\" \"C:\\Users\\uncls\\Pictures\\2026\\2026 - Secret Rule\\Shorts\" --crop --detect=body --debug --text"
}
}
}

17
Rect.cs
View File

@ -1,17 +0,0 @@
namespace splitter;
public struct Rect
{
public int X;
public int Y;
public int Width;
public int Height;
public Rect(int x, int y, int w, int h)
{
X = x;
Y = y;
Width = w;
Height = h;
}
}

View File

@ -1,23 +1,18 @@
using System.Diagnostics;
using System.Runtime.InteropServices;
using OpenCvSharp;
using Cv = OpenCvSharp.Cv2;
using Mat = OpenCvSharp.Mat;
using CvPoint = OpenCvSharp.Point;
using CvRect = OpenCvSharp.Rect;
namespace splitter;
public class FaceTracker
public class TrackingSplitter(
Action<string/*level*/, ConsoleColor /*color*/, string /*message*/> log,
Action<double /*percent*/, TimeSpan /*duration*/, double /*fps*/> drawProgress
) : LoggingBase(log, drawProgress)
{
public Action<double, TimeSpan, double> DrawProgress { get; init; } = (_, _, _) => { };
private static Rect ToCvRect(splitter.Rect r)
=> new Rect(r.X, r.Y, r.Width, r.Height);
public async Task TrackFaceAndExtract(
public async Task TrackAndExtract(
string srcFileName,
string destFileName,
IObjectDetector detector,
TimeSpan skip,
TimeSpan duration,
int cropWidth,
@ -26,7 +21,7 @@ public class FaceTracker
bool debugOverlay)
{
// ------------------------------
// 1. OpenCV VideoCapture (stable)
// OpenCV VideoCapture (stable)
// ------------------------------
using var capture = new VideoCapture(srcFileName);
if (!capture.IsOpened())
@ -34,22 +29,21 @@ public class FaceTracker
capture.Set(VideoCaptureProperties.PosMsec, skip.TotalMilliseconds);
var videoWidth = (int)capture.Get(VideoCaptureProperties.FrameWidth);
var videoWidth = (int)capture.Get(VideoCaptureProperties.FrameWidth);
var videoHeight = (int)capture.Get(VideoCaptureProperties.FrameHeight);
var fps = capture.Get(VideoCaptureProperties.Fps);
var fps = capture.Get(VideoCaptureProperties.Fps);
var totalFrames = (int)(duration.TotalSeconds * fps);
if ( debugOverlay )
{
cropHeight = videoHeight;
cropWidth = videoWidth;
}
Console.WriteLine($"[FaceTracker] skip={skip}, duration={duration}, fps={fps}, totalFrames={totalFrames}");
// ------------------------------
// 2. UltraFaceDetector (new model)
// ------------------------------
using var detector = new UltraFaceDetector(
binPath: "slim_320.bin",
paramPath: "slim_320.param");
// ------------------------------
// 3. FFmpeg one-pass encoder
// FFmpeg one-pass encoder
// ------------------------------
var ffmpeg = StartFfmpegNvenc(
srcFileName,
@ -63,10 +57,10 @@ public class FaceTracker
using var stdin = ffmpeg.StandardInput.BaseStream;
// ------------------------------
// 4. Tracking state
// Tracking state
// ------------------------------
var frame = new Mat();
var kalman = new FaceKalmanTracker();
var kalman = new KalmanTracker();
kalman.Reset(new Point2f(videoWidth / 2f, videoHeight / 2f));
var lostFrames = 0;
@ -78,7 +72,7 @@ public class FaceTracker
var startTime = DateTime.UtcNow;
// ------------------------------
// 5. Main loop
// Main loop
// ------------------------------
for (var i = 0; i < totalFrames; i++)
{
@ -88,28 +82,23 @@ public class FaceTracker
// Ensure continuous memory for detector
Mat frameCont = frame.IsContinuous() ? frame : frame.Clone();
// Convert to byte[] for UltraFace
var bytesFull = frameCont.Rows * frameCont.Cols * frameCont.ElemSize();
var bufferFull = new byte[bytesFull];
Marshal.Copy(frameCont.Data, bufferFull, 0, bytesFull);
Rect? objectBox = null;
Point2f? objectCenter = null;
Rect? faceBox = null;
Point2f? faceCenter = null;
var objects = detector.DetectAll(frameCont, videoWidth, videoHeight); // list of (box, center)
var faces = detector.DetectAll(bufferFull, videoWidth, videoHeight); // list of (box, center)
var primary = SelectTrackedFace(faces, kalman.LastMeasurement);
var primary = SelectTrackedObject(objects, kalman.LastMeasurement);
if (primary.HasValue)
{
faceCenter = primary.Value.center;
faceBox = primary.Value.box;
objectCenter = primary.Value.center;
objectBox = primary.Value.box;
}
var isLost = !faceCenter.HasValue;
var isLost = !objectCenter.HasValue;
// LOST FACE → drift toward center
// LOST OBJECT → drift toward center
if (isLost)
{
lostFrames++;
@ -120,7 +109,7 @@ public class FaceTracker
var t = Math.Min(1f, lostFrames / 60f);
var ease = 0.02f * t;
faceCenter = new Point2f(
objectCenter = new Point2f(
predicted.X * (1 - ease) + fallbackCenter.X * ease,
predicted.Y * (1 - ease) + fallbackCenter.Y * ease);
}
@ -147,7 +136,7 @@ public class FaceTracker
wasLost = isLost;
var smoothedCenter = kalman.Update(faceCenter);
var smoothedCenter = kalman.Update(objectCenter);
var halfW = cropWidth / 2f;
var halfH = cropHeight / 2f;
@ -170,24 +159,24 @@ public class FaceTracker
x = Math.Clamp(x, 0, videoWidth - cropWidth);
y = Math.Clamp(y, 0, videoHeight - cropHeight);
var roi = new CvRect(x, y, cropWidth, cropHeight);
var roi = new Rect(x, y, cropWidth, cropHeight);
if (debugOverlay)
{
if (faceBox.HasValue)
if (objectBox.HasValue)
{
var fb = faceBox.Value;
Cv.Rectangle(frameCont,
new OpenCvSharp.Rect(fb.X, fb.Y, fb.Width, fb.Height),
var fb = objectBox.Value;
Cv2.Rectangle(frameCont,
new Rect(fb.X, fb.Y, fb.Width, fb.Height),
Scalar.LimeGreen, 2);
}
Cv.Circle(frameCont,
new CvPoint((int)smoothedCenter.X, (int)smoothedCenter.Y),
Cv2.Circle(frameCont,
new Point((int)smoothedCenter.X, (int)smoothedCenter.Y),
6, Scalar.LimeGreen, -1);
Cv.Rectangle(frameCont, roi,
faceCenter.HasValue ? Scalar.Yellow : Scalar.Red, 3);
Cv2.Rectangle(frameCont, roi,
objectCenter.HasValue ? Scalar.Yellow : Scalar.Red, 3);
}
// Crop ROI
@ -225,23 +214,23 @@ public class FaceTracker
throw new Exception("FFmpeg NVENC encoding failed");
}
private (Rect box, Point2f center)? SelectTrackedFace(
List<(Rect box, Point2f center)> faces,
private (Rect box, Point2f center)? SelectTrackedObject(
List<(Rect box, Point2f center)> foundObjects,
Point2f? previousCenter)
{
if (faces == null || faces.Count == 0)
if (foundObjects == null || foundObjects.Count == 0)
return null;
if (!previousCenter.HasValue)
{
// no previous face → pick largest
return faces
return foundObjects
.OrderByDescending(f => f.box.Width * f.box.Height)
.First();
}
// pick the face closest to previous center
return faces
// pick the object closest to previous center
return foundObjects
.OrderBy(f =>
{
var dx = f.center.X - previousCenter.Value.X;

View File

@ -1,87 +1,40 @@
using NcnnDotNet;
using System.Runtime.InteropServices;
using NcnnDotNet.Layers;
using OpenCvSharp;
using UltraFaceDotNet;
namespace splitter;
public sealed class UltraFaceDetector : IDisposable
public sealed class UltraFaceDetector: LoggingBase, IDisposable, IObjectDetector
{
private readonly UltraFace _ultraFace;
public UltraFaceDetector(string binPath, string paramPath)
public UltraFaceDetector(
Action<string/*level*/, ConsoleColor /*color*/, string /*message*/> log,
Action<double /*percent*/, TimeSpan /*duration*/, double /*fps*/> drawProgress
) : base(log, drawProgress)
{
var basePath = AppDomain.CurrentDomain.BaseDirectory;
var param = new UltraFaceParameter
{
BinFilePath = binPath,
ParamFilePath = paramPath,
InputWidth = 320,
InputLength = 240,
NumThread = 1,
BinFilePath = Path.Combine(basePath, "models", "slim_320.bin"),
ParamFilePath = Path.Combine(basePath, "models", "slim_320.param"),
InputWidth = 320,
InputLength = 240,
NumThread = 1,
ScoreThreshold = 0.7f
};
_ultraFace = UltraFace.Create(param);
}
public (Rect box, Point2f center)? Detect(byte[] bgr, int width, int height)
public List<(Rect box, Point2f center)> DetectAll(Mat frameCont, int width, int height)
{
if (bgr == null || bgr.Length == 0)
return null;
// Convert to byte[] for UltraFace
var bytesFull = frameCont.Rows * frameCont.Cols * frameCont.ElemSize();
var bgr = new byte[bytesFull];
Marshal.Copy(frameCont.Data, bgr, 0, bytesFull);
// bgr is contiguous BGR24: width * height * 3
unsafe
{
fixed (byte* p = bgr)
{
using var mat = Mat.FromPixels(
(IntPtr)p,
PixelType.Bgr, // BGR24 input
width,
height);
var faces = _ultraFace.Detect(mat);
if (faces == null)
return null;
FaceInfo best = default;
bool hasBest = false;
foreach (var f in faces)
{
if (!hasBest || f.Score > best.Score)
{
best = f;
hasBest = true;
}
}
if (!hasBest)
return null;
int x1 = (int)best.X1;
int y1 = (int)best.Y1;
int x2 = (int)best.X2;
int y2 = (int)best.Y2;
var rect = new Rect(
x1,
y1,
x2 - x1,
y2 - y1);
if (rect.Width <= 0 || rect.Height <= 0)
return null;
var center = new Point2f(
rect.X + rect.Width / 2f,
rect.Y + rect.Height / 2f);
return (rect, center);
}
}
}
public List<(Rect box, Point2f center)> DetectAll(byte[] bgr, int width, int height)
{
var results = new List<(Rect box, Point2f center)>();
if (bgr == null || bgr.Length == 0)
@ -91,9 +44,9 @@ public sealed class UltraFaceDetector : IDisposable
{
fixed (byte* p = bgr)
{
using var mat = Mat.FromPixels(
using var mat = NcnnDotNet.Mat.FromPixels(
(IntPtr)p,
PixelType.Bgr, // BGR24 input
NcnnDotNet.PixelType.Bgr, // BGR24 input
width,
height);

228
YoloOnnxObjectDetector.cs Normal file
View File

@ -0,0 +1,228 @@
using System;
using System.Collections.Generic;
using System.Linq;
using Microsoft.ML.OnnxRuntime;
using Microsoft.ML.OnnxRuntime.Tensors;
using OpenCvSharp;
namespace splitter;
public sealed class YoloOnnxObjectDetector : LoggingBase, IObjectDetector, IDisposable
{
private readonly InferenceSession _session;
private readonly string _inputName;
private readonly string _outputName;
private const int _inputWidth = 640;
private const int _inputHeight = 640;
private const float _scoreThreshold = 0.35f;
private const float _nmsThreshold = 0.45f;
private const int _personClassIndex = 0;
public YoloOnnxObjectDetector(
Action<string, ConsoleColor, string> log,
Action<double, TimeSpan, double> drawProgress
) : base(log, drawProgress)
{
var options = new SessionOptions();
// options.AppendExecutionProvider_CPU();
options.AppendExecutionProvider_DML();
var basePath = AppDomain.CurrentDomain.BaseDirectory;
var modelPath = System.IO.Path.Combine(basePath, "models", "yolov8n.onnx");
_session = new InferenceSession(modelPath, options);
_inputName = _session.InputMetadata.Keys.First();
_outputName = _session.OutputMetadata.Keys.First();
foreach (var kv in _session.OutputMetadata)
LogInfo($"[YoloOnnx] {kv.Key}: {string.Join(",", kv.Value.Dimensions)} {kv.Value.ElementType}");
}
public List<(Rect box, Point2f center)> DetectAll(Mat frameCont, int width, int height)
{
if (frameCont.Empty())
return new List<(Rect, Point2f)>();
using var resized = frameCont.Resize(new Size(_inputWidth, _inputHeight));
using var rgb = resized.CvtColor(ColorConversionCodes.BGR2RGB);
var inputTensor = CreateInputTensor(rgb);
using var results = _session.Run(new[]
{
NamedOnnxValue.CreateFromTensor(_inputName, inputTensor)
});
var output = results.First(r => r.Name == _outputName).AsTensor<float>();
var detections = ParseYoloV8(
output,
frameCont.Width,
frameCont.Height,
_scoreThreshold,
_personClassIndex);
var final = ApplyNms(detections, _nmsThreshold);
var list = new List<(Rect, Point2f)>(final.Count);
foreach (var d in final)
{
int x = (int)d.X;
int y = (int)d.Y;
int w = (int)d.Width;
int h = (int)d.Height;
x = Math.Clamp(x, 0, frameCont.Width - 1);
y = Math.Clamp(y, 0, frameCont.Height - 1);
w = Math.Clamp(w, 1, frameCont.Width - x);
h = Math.Clamp(h, 1, frameCont.Height - y);
// Ignore detections starting in the lower 1/3 of the frame
if (y > frameCont.Height * (2f / 3f))
continue;
var rect = new Rect(x, y, w, h);
var center = new Point2f(x + w / 2f, y + h / 2f);
list.Add((rect, center));
}
return list;
}
private static DenseTensor<float> CreateInputTensor(Mat rgb)
{
int height = rgb.Rows;
int width = rgb.Cols;
var tensor = new DenseTensor<float>(new[] { 1, 3, height, width });
unsafe
{
for (int y = 0; y < height; y++)
{
byte* row = (byte*)rgb.Ptr(y).ToPointer();
for (int x = 0; x < width; x++)
{
int idx = x * 3;
tensor[0, 0, y, x] = row[idx + 0] / 255f;
tensor[0, 1, y, x] = row[idx + 1] / 255f;
tensor[0, 2, y, x] = row[idx + 2] / 255f;
}
}
}
return tensor;
}
private sealed class Detection
{
public float X;
public float Y;
public float Width;
public float Height;
public float Score;
}
// -----------------------------
// CORRECT YOLOv8 PARSER
// -----------------------------
private static List<Detection> ParseYoloV8(
Tensor<float> output,
int originalWidth,
int originalHeight,
float scoreThreshold,
int classIndex)
{
// YOLOv8 output: [1, 84, 8400]
int channels = output.Dimensions[1]; // 84
int count = output.Dimensions[2]; // 8400
float xScale = (float)originalWidth / 640f;
float yScale = (float)originalHeight / 640f;
var detections = new List<Detection>();
for (int i = 0; i < count; i++)
{
float x = output[0, 0, i];
float y = output[0, 1, i];
float w = output[0, 2, i];
float h = output[0, 3, i];
float classScore = output[0, 4 + classIndex, i];
if (classScore < scoreThreshold)
continue;
float left = (x - w / 2f) * xScale;
float top = (y - h / 2f) * yScale;
float width = w * xScale;
float height = h * yScale;
detections.Add(new Detection
{
X = left,
Y = top,
Width = width,
Height = height,
Score = classScore
});
}
return detections;
}
private static List<Detection> ApplyNms(List<Detection> detections, float nmsThreshold)
{
if (detections.Count == 0)
return detections;
var ordered = detections.OrderByDescending(d => d.Score).ToList();
var result = new List<Detection>();
while (ordered.Count > 0)
{
var best = ordered[0];
result.Add(best);
ordered.RemoveAt(0);
for (int i = ordered.Count - 1; i >= 0; i--)
{
if (IoU(best, ordered[i]) >= nmsThreshold)
ordered.RemoveAt(i);
}
}
return result;
}
private static float IoU(Detection a, Detection b)
{
float x1 = MathF.Max(a.X, b.X);
float y1 = MathF.Max(a.Y, b.Y);
float x2 = MathF.Min(a.X + a.Width, b.X + b.Width);
float y2 = MathF.Min(a.Y + a.Height, b.Y + b.Height);
float interW = MathF.Max(0, x2 - x1);
float interH = MathF.Max(0, y2 - y1);
float interArea = interW * interH;
float areaA = a.Width * a.Height;
float areaB = b.Width * b.Height;
float union = areaA + areaB - interArea;
if (union <= 0) return 0f;
return interArea / union;
}
public void Dispose()
{
_session?.Dispose();
}
}

View File

@ -3,94 +3,28 @@ using System.Globalization;
using System.Text;
using splitter;
class Program
static class Program
{
static int logLines = 0;
static bool plainText = false;
static readonly object consoleLock = new();
static bool progressRunning = true;
static int _logLines = 0;
static bool _plainText = false;
static readonly object _consoleLock = new();
static bool _progressRunning = true;
static void Main(string[] args)
{
double? overrideTargetDuration = null;
var estimateOnly = false;
var forceFixed = false;
var cmd = new CommandLine(args);
Console.OutputEncoding = Encoding.UTF8;
if (args.Length == 0 || args.Contains("--help"))
{
PrintHelp();
return;
}
// Extract passthrough parameters after "--"
var passthrough = Array.Empty<string>();
var passthroughIndex = Array.IndexOf(args, "--");
if (passthroughIndex >= 0)
{
if (passthroughIndex < args.Length - 1)
passthrough = args.Skip(passthroughIndex + 1).ToArray();
args = args.Take(passthroughIndex).ToArray();
}
if (args.Length < 2)
{
LogError("Missing required parameters.");
PrintHelp();
return;
}
var inputFile = args[0];
var outputFolder = args[1];
(int width, int height)? crop = null;
string? mask = null;
var debug = false;
foreach (var arg in args.Skip(2))
{
if (arg.StartsWith("--mask="))
{
mask = arg.Substring("--mask=".Length);
}
else if (arg.StartsWith("--crop="))
{
crop = ParseCrop(arg.Substring("--crop=".Length));
}
else if (arg == "--crop")
{
crop = ParseCrop("");
}
else if (arg == "--text")
{
plainText = true;
}
else if (arg == "--debug")
{
debug = true;
}
else if (arg.StartsWith("--duration="))
{
var dur = arg.Substring("--duration=".Length);
overrideTargetDuration = ParseDuration(dur);
if (overrideTargetDuration <= 0)
{
LogError($"Invalid --duration value: {dur}");
return;
}
}
else if (arg == "--estimate")
{
estimateOnly = true;
}
else if (arg == "--force")
{
forceFixed = true;
}
}
var estimateOnly = cmd.EstimateOnly;
var forceFixed = cmd.ForceFixed;
var passthrough = cmd.Passthrough;
var inputFile = cmd.InputFile;
var outputFolder = cmd.OutputFolder;
(int width, int height)? crop = cmd.Crop;
string? mask = cmd.Mask;
var debug = cmd.Debug;
string? detect = cmd.Detect;
double? overrideTargetDuration = cmd.OverrideTargetDuration;
_plainText = cmd.PlainText;
if (!File.Exists(inputFile))
{
@ -150,7 +84,7 @@ class Program
if (crop != null)
{
LogInfo("Starting multi-threaded face tracking crop and splitting...");
RunMultiThreadedCrop(inputFile, outputFolder, outputMask, duration, segments, segmentLength, passthrough, crop.Value.width, crop.Value.height, debug);
RunMultiThreadedCrop(inputFile, outputFolder, outputMask, duration, segments, segmentLength, passthrough, crop.Value.width, crop.Value.height, debug, detect);
}
else
{
@ -159,40 +93,15 @@ class Program
}
LogSuccess("Done.");
progressRunning = false;
_progressRunning = false;
// Move cursor below progress area
lock (consoleLock)
lock (_consoleLock)
{
Console.SetCursorPosition(0, logLines + 4);
Console.SetCursorPosition(0, _logLines + 4);
Console.WriteLine();
}
}
private static (int width, int height)? ParseCrop(string v)
{
// Default vertical Full HD for YouTube Shorts
const int defaultW = 607;
const int defaultH = 1080;
// Empty or whitespace → default crop
if (string.IsNullOrWhiteSpace(v))
return (defaultW, defaultH);
var s = v.Trim().ToLowerInvariant();
// Expected format: "WWWxHHH"
var parts = s.Split('x');
if (parts.Length != 2)
return null;
var okW = int.TryParse(parts[0], out var w);
var okH = int.TryParse(parts[1], out var h);
if (!okW || !okH || w <= 0 || h <= 0)
return null;
return (w, h);
}
// -----------------------------
// Logging + Progress UI
@ -200,9 +109,9 @@ class Program
static void Log(string prefix, ConsoleColor color, string msg)
{
lock (consoleLock)
lock (_consoleLock)
{
if (plainText)
if (_plainText)
{
Console.WriteLine($"{prefix} {msg}");
}
@ -211,7 +120,7 @@ class Program
Console.ForegroundColor = color;
Console.WriteLine($"{prefix} {msg}");
Console.ResetColor();
logLines++;
_logLines++;
}
}
}
@ -223,18 +132,18 @@ class Program
static void DrawProgress(double progress, TimeSpan eta, double speed)
{
if ( plainText )
if ( _plainText )
return;
lock (consoleLock)
lock (_consoleLock)
{
var width = Math.Max(20, Console.WindowWidth - 20);
var filled = (int)(progress * width);
if (filled < 0) filled = 0;
if (filled > width) filled = width;
var barLine = logLines + 1;
var infoLine = logLines + 2;
var barLine = _logLines + 1;
var infoLine = _logLines + 2;
// Progress bar with 24-bit color (green)
Console.SetCursorPosition(0, barLine);
@ -313,7 +222,7 @@ class Program
// Progress thread
var progressThread = new Thread(() =>
{
while (progressRunning)
while (_progressRunning)
{
var progress = segments == 0 ? 0 : (double)completed / segments;
var processedSeconds = completed * segmentLength;
@ -347,7 +256,7 @@ class Program
});
sw.Stop();
progressRunning = false;
_progressRunning = false;
progressThread.Join();
DrawProgress(1.0, TimeSpan.Zero, totalDuration / Math.Max(sw.Elapsed.TotalSeconds, 0.0001));
}
@ -378,7 +287,7 @@ class Program
// Progress thread
var progressThread = new Thread(() =>
{
while (progressRunning)
while (_progressRunning)
{
var progress = segments == 0 ? 0 : (double)completed / segments;
var processedSeconds = completed * segmentLength;
@ -408,7 +317,7 @@ class Program
}
sw.Stop();
progressRunning = false;
_progressRunning = false;
progressThread.Join();
DrawProgress(1.0, TimeSpan.Zero, totalDuration / Math.Max(sw.Elapsed.TotalSeconds, 0.0001));
}
@ -426,12 +335,10 @@ class Program
string[] passthrough,
int width,
int height,
bool showDebugOverlay)
bool showDebugOverlay,
string? detect)
{
var tracker = new FaceTracker
{
DrawProgress = DrawProgress
};
var tracker = new TrackingSplitter(Log, DrawProgress);
var jobs = Enumerable.Range(0, segments)
.Select(i => new
@ -446,12 +353,12 @@ class Program
var completed = 0;
var sw = Stopwatch.StartNew();
progressRunning = true;
_progressRunning = true;
// --- PROGRESS THREAD ---
var progressThread = new Thread(() =>
{
while (progressRunning)
while (_progressRunning)
{
var progress = segments == 0 ? 0 : (double)completed / segments;
var processedSeconds = completed * segmentLength;
@ -483,11 +390,18 @@ class Program
async job =>
{
var outputFile = BuildOutputFileName(outputFolder, outputMask, job.Index);
using IDisposable detector = detect switch
{
"face" => new UltraFaceDetector(Log, DrawProgress),
"body" => new YoloOnnxObjectDetector(Log, DrawProgress),
_ => throw new InvalidOperationException($"Unknown detector: {detect}")
};
// Run the face-tracking cropper
await tracker.TrackFaceAndExtract(
await tracker.TrackAndExtract(
inputFile,
outputFile,
(IObjectDetector)detector,
TimeSpan.FromSeconds(job.Start),
TimeSpan.FromSeconds(job.Length),
width,
@ -500,7 +414,7 @@ class Program
// --- CLEANUP ---
sw.Stop();
progressRunning = false;
_progressRunning = false;
progressThread.Join();
var finalSpeed = duration / Math.Max(sw.Elapsed.TotalSeconds, 0.0001);
@ -551,97 +465,4 @@ class Program
proc.StandardError.ReadToEnd(); // swallow output
proc.WaitForExit();
}
static double ParseDuration(string text)
{
text = text.Trim().ToLowerInvariant();
// Case 1: pure number to seconds
if (double.TryParse(text, NumberStyles.Any, CultureInfo.InvariantCulture, out var sec))
return sec;
// Case 2: Ns (seconds)
if (text.EndsWith("s") && double.TryParse(text[..^1], out sec))
return sec;
// Case 3: NmMs (minutes + seconds)
// Examples: 2m30s, 1m5s, 10m0s
var mIndex = text.IndexOf('m');
var sIndex = text.IndexOf('s');
if (mIndex > 0 && sIndex > mIndex)
{
var mPart = text[..mIndex];
var sPart = text[(mIndex + 1)..sIndex];
if (double.TryParse(mPart, out var minutes) &&
double.TryParse(sPart, out var seconds))
{
return minutes * 60 + seconds;
}
}
throw new FormatException($"Invalid duration format: {text}");
}
// -----------------------------
// Help
// -----------------------------
static void PrintHelp()
{
Console.WriteLine(@"
Usage:
splitter <input.mp4> <output_folder> [options] [--] <ffmpeg passthrough>
Options:
--mask=<pattern> Output filename pattern.
Default: <OriginalName>_Seg%03d.mp4
Supports %03d or %d for segment index.
--duration=<value> Override target segment duration.
Accepted formats:
Ns - N seconds
NmMs - N minutes M seconds
N - N seconds (plain number)
Examples:
--duration=90s
--duration=2m30s
--duration=45
Without --force:
Segments are equalized so all have same length.
--force Use fixed segment duration exactly as given.
Last segment may be shorter.
Default: OFF
--estimate Print calculated segment information and exit.
No splitting is performed.
--crop[=<w:h>] Crop video to width w and height h, with face tracking.
Useful to making YouTube Shorts or TikToks from horizontal video.
Default: 607x1080 (vertical video cropped from Full HD original)
--text Display log in plain text.
--debug Show debug overlay during face tracking.
Passthrough:
Anything after -- is passed directly to ffmpeg.
Examples:
splitter vertical-video.mp4 out/
splitter vertical-video.mp4 out/ --duration=90s
splitter vertical-video.mp4 out/ --duration=2m30s --mask=""Part%03d.mp4""
splitter vertical-video.mp4 out/ --estimate
splitter vertical-video.mp4 out/ --force --duration=45 -- -an -sn
splitter horizontal-video.mp4 out/ --crop
Description:
Splits a video into equal or fixed-length segments using multi-threaded
ffmpeg execution. Supports ETA, speed, and rich progress display.
");
}
}

View File

@ -7,6 +7,8 @@
<Nullable>enable</Nullable>
<LangVersion>latest</LangVersion>
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
<PlatformTarget>x64</PlatformTarget>
<RuntimeIdentifier>win-x64</RuntimeIdentifier>
</PropertyGroup>
<!-- DEBUG CONFIGURATION -->
@ -32,16 +34,15 @@
</PropertyGroup>
<ItemGroup>
<Content Include="slim_320.bin">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</Content>
<Content Include="slim_320.param">
<Content Include="models/*.*">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</Content>
</ItemGroup>
<ItemGroup>
<PackageReference Include="FFmpeg.AutoGen" Version="8.1.0" />
<PackageReference Include="Microsoft.ML.OnnxRuntime" Version="1.24.4" />
<PackageReference Include="Microsoft.ML.OnnxRuntime.DirectML" Version="1.24.4" />
<PackageReference Include="OpenCvSharp4" Version="4.13.0.20260427" />
<PackageReference Include="OpenCvSharp4.runtime.win" Version="4.13.0.20260302" />
<PackageReference Include="UltraFaceDotNet" Version="1.0.0.2" />