Upscaling x2 added using realbasicvsr_x2.onnx. It turns to be very slow and overall not worth it.

This commit is contained in:
Alexander Shabarshov 2026-06-06 16:57:02 +01:00
parent 9496d46411
commit de0d0c77fc
35 changed files with 740 additions and 220 deletions

19
AGENTS.md Normal file
View File

@ -0,0 +1,19 @@
You are c# programmer. I'm senior c# programmer with 30+ years of experience.
Do not be overconfident about your answers - they are 70% incorrect.
Do not say "final solution". Do not start every reply with my name.
Do not use emoji or non-ascii symbols. Do not explain "why it work".
I have C#. .NET 10 Avalonia 12 UI for ffmpeg/OpenCV video app. All packages are of very latest versions.
Use namespace splitter for splitter-cli and Splitter_UI for Splitter-UI.
Splitter pipeline is:
* FFProbe extracting all video meta to VideoInfo
* FFMpeg used to decode video frames into OpenCVSharp.Mat
* One of detectors used:
- For face detection: [opencv_zoo/models/face_detection_yunet at main opencv/opencv_zoo](https://github.com/opencv/opencv_zoo/tree/main/models/face_detection_yunet)
- For body detection: [yolov8s.pt Ultralytics/YOLOv8 at main](https://huggingface.co/Ultralytics/YOLOv8/blob/main/yolov8s.pt)
* Camera control aplied (CameraControl class)
* Final video frames are encoded back to video file using FFMpeg

View File

@ -3,7 +3,7 @@
Splitter is a high-performance command line tool for cutting one or more video files into equal or Splitter is a high-performance command line tool for cutting one or more video files into equal or
fixedlength segments using multithreaded FFmpeg execution. It supports batch input, flexible fixedlength segments using multithreaded FFmpeg execution. It supports batch input, flexible
duration formats, rotation, smart face/bodyaware cropping, ETA and speed reporting, with nice GUI duration formats, rotation, smart face/bodyaware cropping, ETA and speed reporting, with nice GUI
or both rich and plaintext terminal output. or both rich and plain-text terminal output.
The intended primary use case is for content creators who need to split large video files into smaller The intended primary use case is for content creators who need to split large video files into smaller
segments for platforms like TikTok, Instagram Reels, YouTube Shorts, or similar. The smart segments for platforms like TikTok, Instagram Reels, YouTube Shorts, or similar. The smart
@ -34,7 +34,7 @@ Splitter uses FFmpeg for the actual splitting and encoding, with multi-threading
### Command line interface ### Command line interface
![Splitter](splitter-cli/splitter.png) ![Splitter](splitter-cli/splitter.png)
### Graphical user interface ### Graphical user interface
![Splitter UI](splitter-ui/screenshot.png) ![Splitter UI](Splitter-UI/screenshot.png)
## Requirements ## Requirements

View File

@ -38,14 +38,16 @@ internal sealed class Program
services.AddSingleton<UltraFaceDetector>(); services.AddSingleton<UltraFaceDetector>();
services.AddSingleton<YoloOnnxObjectDetector>(); services.AddSingleton<YoloOnnxObjectDetector>();
services.AddSingleton( x => new SingleThreadedDetector<UltraFaceDetector>(x.GetRequiredService<UltraFaceDetector>()) ); services.AddSingleton( x => new SingleThreadedDetector<UltraFaceDetector>(x.GetRequiredService<UltraFaceDetector>()) );
services.AddSingleton( x => new SingleThreadedDetector<YoloOnnxObjectDetector>(x.GetRequiredService<YoloOnnxObjectDetector>())); services.AddSingleton(x => new SingleThreadedDetector<YoloOnnxObjectDetector>(x.GetRequiredService<YoloOnnxObjectDetector>()));
services.AddSingleton(x => new SingleThreadedDetector<DummyDetector>(x.GetRequiredService<DummyDetector>()));
services.AddSingleton<Func<string, IObjectDetector>>( x => detectorName => services.AddSingleton<Func<string, IObjectDetector>>( x => detectorName =>
{ {
return detectorName switch return detectorName switch
{ {
"face" => x.GetRequiredService<SingleThreadedDetector<UltraFaceDetector>>(), "face" => x.GetRequiredService<SingleThreadedDetector<UltraFaceDetector>>(),
"body" => x.GetRequiredService<SingleThreadedDetector<YoloOnnxObjectDetector>>(), "body" => x.GetRequiredService<SingleThreadedDetector<YoloOnnxObjectDetector>>(),
_ => new DummyDetector() "none" => x.GetRequiredService<SingleThreadedDetector<DummyDetector>>(),
_ => new DummyDetector()
}; };
}); });
services.AddSingleton<ILogger, GlobalLogger>(); services.AddSingleton<ILogger, GlobalLogger>();

View File

@ -46,12 +46,12 @@ public sealed class AutoDecisionService(IThumbnailService _thumbnails, IFileProb
var targetAR = (float)CommandLine.DefaultW / CommandLine.DefaultH; var targetAR = (float)CommandLine.DefaultW / CommandLine.DefaultH;
var pixelAspect = job.Probe!.Sar.X / job.Probe.Sar.Y; var pixelAspect = job.Probe!.Sar.X / job.Probe.Sar.Y;
float srcW = job.Probe.Width * pixelAspect; var srcW = job.Probe.Width * pixelAspect;
float srcH = job.Probe.Height; float srcH = job.Probe.Height;
var srcAR = srcW / srcH; var srcAR = srcW / srcH;
float cropH = srcH; var cropH = srcH;
float cropW = cropH * targetAR; var cropW = cropH * targetAR;
if (cropW > srcW) if (cropW > srcW)
{ {
@ -59,16 +59,16 @@ public sealed class AutoDecisionService(IThumbnailService _thumbnails, IFileProb
cropH = cropW / targetAR; cropH = cropW / targetAR;
} }
float x = (srcW - cropW) * 0.5f; var x = (srcW - cropW) * 0.5f;
float y = (srcH - cropH) * 0.5f; var y = (srcH - cropH) * 0.5f;
float invPixelAspect = 1f / pixelAspect; var invPixelAspect = 1f / pixelAspect;
float cropW_px = cropW * invPixelAspect; var cropW_px = cropW * invPixelAspect;
float cropH_px = cropH; var cropH_px = cropH;
float x_px = x * invPixelAspect; var x_px = x * invPixelAspect;
float y_px = y; var y_px = y;
job.CropText = $"{(int)MathF.Round(cropW_px)},{(int)MathF.Round(cropH_px)}"; job.CropText = $"{(int)MathF.Round(cropW_px)},{(int)MathF.Round(cropH_px)}";
} }

View File

@ -1,7 +0,0 @@
namespace Splitter_UI.Services;
internal class DummyDetector : IObjectDetector
{
public List<(OpenCvSharp.Rect box, Point2f center)> DetectAll(Mat frameCont) => [];
public void Dispose() {}
}

View File

@ -63,7 +63,7 @@ public sealed class ThumbnailService : IThumbnailService
var bgraBuffer = canUseStaticBuffers ? _bgraBuffer : new byte[width.Value * height.Value * 4]; var bgraBuffer = canUseStaticBuffers ? _bgraBuffer : new byte[width.Value * height.Value * 4];
// Decode a single frame using ffmpeg → raw BGR24 into _bgrBuffer // Decode a single frame using ffmpeg → raw BGR24 into _bgrBuffer
bool ok = await DecodeFrameAsync(bgrBuffer, file, skip.Value, width.Value, height.Value, rotateDegree); var ok = await DecodeFrameAsync(bgrBuffer, file, skip.Value, width.Value, height.Value, rotateDegree);
if (!ok) if (!ok)
return null; return null;
@ -99,14 +99,14 @@ public sealed class ThumbnailService : IThumbnailService
var p = new Process { StartInfo = psi }; var p = new Process { StartInfo = psi };
p.Start(); p.Start();
int needed = bgrBuffer.Length; var needed = bgrBuffer.Length;
int read = 0; var read = 0;
using var stdout = p.StandardOutput.BaseStream; using var stdout = p.StandardOutput.BaseStream;
while (read < needed) while (read < needed)
{ {
int r = await stdout.ReadAsync(bgrBuffer, read, needed - read); var r = await stdout.ReadAsync(bgrBuffer, read, needed - read);
if (r == 0) if (r == 0)
{ {
TryKill(p); TryKill(p);
@ -126,12 +126,12 @@ public sealed class ThumbnailService : IThumbnailService
private static void ConvertBgrToBgra(byte[] bgr, byte[] bgra, int width, int height) private static void ConvertBgrToBgra(byte[] bgr, byte[] bgra, int width, int height)
{ {
int si = 0; var si = 0;
int di = 0; var di = 0;
int totalPixels = width * height; var totalPixels = width * height;
for (int i = 0; i < totalPixels; i++) for (var i = 0; i < totalPixels; i++)
{ {
bgra[di + 0] = bgr[si + 0]; // B bgra[di + 0] = bgr[si + 0]; // B
bgra[di + 1] = bgr[si + 1]; // G bgra[di + 1] = bgr[si + 1]; // G
@ -150,7 +150,7 @@ public sealed class ThumbnailService : IThumbnailService
(height, width) = (width, height); (height, width) = (width, height);
} }
int stride = width * 4; var stride = width * 4;
fixed (byte* p = bgra) fixed (byte* p = bgra)
{ {

View File

@ -40,6 +40,7 @@ public partial class InspectorPaneViewModel : ObservableObject
job.OutputFolder = Selected.OutputFolder; job.OutputFolder = Selected.OutputFolder;
job.OverrideTargetDuration = Selected.OverrideTargetDuration; job.OverrideTargetDuration = Selected.OverrideTargetDuration;
job.PassthroughText = Selected.PassthroughText; job.PassthroughText = Selected.PassthroughText;
job.Enhance = Selected.Enhance;
job.ParametersList.Clear(); job.ParametersList.Clear();
foreach (var param in Selected.ParametersList) foreach (var param in Selected.ParametersList)

View File

@ -157,6 +157,18 @@ public partial class JobViewModel : ObservableObject
} }
} }
public bool Enhance
{
get => Job.Enhance;
set
{
if (Job.Enhance == value)
return;
Job.Enhance = value;
OnPropertyChanged();
}
}
public int? Rotate public int? Rotate
{ {
get => Job.Rotate; get => Job.Rotate;

View File

@ -72,7 +72,7 @@ public partial class MainViewModel : ViewModelBase
jobs.AddRange(fileJobs); jobs.AddRange(fileJobs);
} }
await _processor.ProcessJobs(jobs, false, _cancellationTokenSource.Token); await _processor.ProcessJobs(jobs, jobs.First().Job.Enhance, _cancellationTokenSource.Token);
} }
catch (Exception ex) catch (Exception ex)
{ {

View File

@ -1,7 +1,6 @@
using System.Collections.ObjectModel; using System.Collections.ObjectModel;
using CommunityToolkit.Mvvm.ComponentModel; using CommunityToolkit.Mvvm.ComponentModel;
using CommunityToolkit.Mvvm.Input; using CommunityToolkit.Mvvm.Input;
using Splitter_UI.Views;
namespace Splitter_UI.ViewModels; namespace Splitter_UI.ViewModels;

View File

@ -92,6 +92,16 @@ x:DataType="vm:InspectorPaneViewModel">
<NumericUpDown Value="{Binding Selected.OverrideTargetDuration}" Width="120"/> <NumericUpDown Value="{Binding Selected.OverrideTargetDuration}" Width="120"/>
</StackPanel> </StackPanel>
<!-- Enhance -->
<StackPanel Orientation="Horizontal" Spacing="8">
<CheckBox Content="Enhance resolution x2"
IsChecked="{Binding Selected.Enhance}"/>
<TextBlock Text="(Very slow and not worth it!)"
Foreground="#FFFF80FF"
FontSize="10"
Margin="0,12,0,0"/>
</StackPanel>
<!-- ForceFixed --> <!-- ForceFixed -->
<CheckBox Content="Force Fixed Duration" <CheckBox Content="Force Fixed Duration"
IsChecked="{Binding Selected.ForceFixed}"/> IsChecked="{Binding Selected.ForceFixed}"/>

View File

@ -1,5 +1,3 @@
using Avalonia.Controls;
namespace Splitter_UI.Views; namespace Splitter_UI.Views;
public partial class MainWindow : Avalonia.Controls.Window public partial class MainWindow : Avalonia.Controls.Window

View File

@ -279,8 +279,8 @@ public sealed class PreviewCanvas : Control
var scale = Math.Min(dispW / displayW, dispH / displayH); var scale = Math.Min(dispW / displayW, dispH / displayH);
double dx = dxCanvas / scale; var dx = dxCanvas / scale;
double dy = dyCanvas / scale; var dy = dyCanvas / scale;
if (rotate == 0 || rotate == 180) if (rotate == 0 || rotate == 180)
dx /= pixelAspect; dx /= pixelAspect;
@ -288,8 +288,8 @@ public sealed class PreviewCanvas : Control
dy /= pixelAspect; dy /= pixelAspect;
// start normalized → pixel // start normalized → pixel
double gx = _dragStartValue.X * rawW + dx; var gx = _dragStartValue.X * rawW + dx;
double gy = _dragStartValue.Y * rawH + dy; var gy = _dragStartValue.Y * rawH + dy;
switch (rotate) switch (rotate)
{ {
@ -368,8 +368,8 @@ public sealed class PreviewCanvas : Control
var g = GravitateTo; var g = GravitateTo;
// normalized → pixel // normalized → pixel
double px = g.X * rawW; var px = g.X * rawW;
double py = g.Y * rawH; var py = g.Y * rawH;
var (sx, sy) = TransformPoint( var (sx, sy) = TransformPoint(
px, py, px, py,

View File

@ -74,6 +74,10 @@ public sealed class CommandLine
{ {
Master.Rotate = 90; Master.Rotate = 90;
} }
else if (arg == "--enhance")
{
Master.Enhance = true;
}
else if (arg.StartsWith("--rotate=")) else if (arg.StartsWith("--rotate="))
{ {
var val = arg.Substring("--rotate=".Length); var val = arg.Substring("--rotate=".Length);
@ -328,6 +332,9 @@ Options:
Last segment may be shorter. Last segment may be shorter.
Default: OFF Default: OFF
--enhance Enable video enhancement.
Increases output resolution x4 Using RealBasicVSR_x4 model.
--rotate=<degrees> Rotate video by specified degrees (90, 180, 270). --rotate=<degrees> Rotate video by specified degrees (90, 180, 270).
Useful for videos with incorrect orientation metadata. Useful for videos with incorrect orientation metadata.

View File

@ -65,6 +65,7 @@ public class JobProcessor(ILogger logger) : LoggingBase(logger, 0), IJobProcesso
{ {
"face" => new UltraFaceDetector(_logger), "face" => new UltraFaceDetector(_logger),
"body" => new YoloOnnxObjectDetector(_logger), "body" => new YoloOnnxObjectDetector(_logger),
"none" => new DummyDetector(),
_ => throw new InvalidOperationException($"Unknown detector: {job.Detect}") _ => throw new InvalidOperationException($"Unknown detector: {job.Detect}")
}; };
return new TrackingSplitter(i, detector, job, _logger); return new TrackingSplitter(i, detector, job, _logger);
@ -146,7 +147,7 @@ public class JobProcessor(ILogger logger) : LoggingBase(logger, 0), IJobProcesso
tasks.Add(Task.Run(async () => tasks.Add(Task.Run(async () =>
{ {
int slot = -1; var slot = -1;
try try
{ {

View File

@ -147,6 +147,7 @@ All option names are preserved exactly, and descriptions are consolidated for cl
| **--mask=<pattern>** | Custom output filename pattern. Default: `[NAME]_seg[NN].[EXT]`. Supports `[NAME]`, `[N]`, `[NN]`, `[NNN]`, `[NNNN]`, `[EXT]`. Example: `--mask="[NAME]_[NNNN].mp4"`. | | **--mask=<pattern>** | Custom output filename pattern. Default: `[NAME]_seg[NN].[EXT]`. Supports `[NAME]`, `[N]`, `[NN]`, `[NNN]`, `[NNNN]`, `[EXT]`. Example: `--mask="[NAME]_[NNNN].mp4"`. |
| **--duration=<value>** | Override target segment duration. Formats: `Ns`, `NmMs`, `N`. Examples: `--duration=90s`, `--duration=2m30s`, `--duration=45`. Without `--force`: max 58 seconds, equalized across segments. | | **--duration=<value>** | Override target segment duration. Formats: `Ns`, `NmMs`, `N`. Examples: `--duration=90s`, `--duration=2m30s`, `--duration=45`. Without `--force`: max 58 seconds, equalized across segments. |
| **--force** | Use the duration exactly as provided. Last segment may be shorter. | | **--force** | Use the duration exactly as provided. Last segment may be shorter. |
| **--enhance** | Enable video enhancement. Increases output resolution x4 using RealBasicVSR_x4 model. |
| **--rotate=<degrees>** | Rotate video by 90, 180, or 270 degrees. Useful for correcting orientation metadata. | | **--rotate=<degrees>** | Rotate video by 90, 180, or 270 degrees. Useful for correcting orientation metadata. |
| **--rotate-auto** | Use automatic rotation detection. | | **--rotate-auto** | Use automatic rotation detection. |
| **--estimate** | Print calculated segment information and exit. No splitting is performed. | | **--estimate** | Print calculated segment information and exit. No splitting is performed. |

View File

@ -7,10 +7,10 @@ public class SimpleSplitter(int segmentNo, ILogger logger) : LoggingBase(logger,
{ {
public async Task ProcessSegment(SingleTask job, CancellationToken token) public async Task ProcessSegment(SingleTask job, CancellationToken token)
{ {
string inputFile = job.Job.InputFile; var inputFile = job.Job.InputFile;
string outputFile = job.OutputFileName; var outputFile = job.OutputFileName;
double start = job.SegmentStart; var start = job.SegmentStart;
double length = job.SegmentLength; var length = job.SegmentLength;
var rotation = GetRotationFilter(job.Job.Rotate); var rotation = GetRotationFilter(job.Job.Rotate);
@ -36,12 +36,12 @@ public class SimpleSplitter(int segmentNo, ILogger logger) : LoggingBase(logger,
{ {
// Rotation path: must re-encode and recompute DAR // Rotation path: must re-encode and recompute DAR
long sarNum = Convert.ToInt64(job.Info.Sar.X); var sarNum = Convert.ToInt64(job.Info.Sar.X);
long sarDen = Convert.ToInt64(job.Info.Sar.Y); var sarDen = Convert.ToInt64(job.Info.Sar.Y);
// After rotation, width/height swap // After rotation, width/height swap
int w = job.Info.Width; var w = job.Info.Width;
int h = job.Info.Height; var h = job.Info.Height;
if (job.Job.Rotate == 90 || job.Job.Rotate == 270) if (job.Job.Rotate == 90 || job.Job.Rotate == 270)
{ {
@ -119,7 +119,7 @@ public class SimpleSplitter(int segmentNo, ILogger logger) : LoggingBase(logger,
while (b != 0) while (b != 0)
{ {
long t = b; var t = b;
b = a % b; b = a % b;
a = t; a = t;
} }
@ -167,7 +167,7 @@ public class SimpleSplitter(int segmentNo, ILogger logger) : LoggingBase(logger,
{ {
// FFmpeg formats: HH:MM:SS.xx // FFmpeg formats: HH:MM:SS.xx
// We read until whitespace // We read until whitespace
int end = startIndex; var end = startIndex;
while (end < line.Length && !char.IsWhiteSpace(line[end])) while (end < line.Length && !char.IsWhiteSpace(line[end]))
end++; end++;

View File

@ -92,6 +92,10 @@ public class SingleJob
/// object detector or rotation detector. /// object detector or rotation detector.
/// </summary> /// </summary>
public Dictionary<string, string> Parameters { get; set; } = []; public Dictionary<string, string> Parameters { get; set; } = [];
/// <summary>
/// Increase output resolution by x4 using super-resolution RealBasicVSR_x4 model.
/// </summary>
public bool Enhance { get; set; }
public void Override<T>(ref T member, string name) public void Override<T>(ref T member, string name)
{ {

View File

@ -26,19 +26,18 @@ public class TrackingSplitter : LoggingBase, ISegmentProcessor, IDisposable
public async Task ProcessSegment(SingleTask job, CancellationToken token) public async Task ProcessSegment(SingleTask job, CancellationToken token)
{ {
string inputFile = job.Job.InputFile; var inputFile = job.Job.InputFile;
string outputFile = job.OutputFileName; var outputFile = job.OutputFileName;
double start = job.SegmentStart; var start = job.SegmentStart;
double length = job.SegmentLength; var length = job.SegmentLength;
int videoWidth = job.Info.Width; var videoWidth = job.Info.Width;
int videoHeight = job.Info.Height; var videoHeight = job.Info.Height;
double fps = job.Info.Fps; var fps = job.Info.Fps;
double bitrate = job.Info.Bitrate; var bitrate = job.Info.Bitrate;
string[] ffmpegPassthroughParameters = job.Job.Passthrough; var ffmpegPassthroughParameters = job.Job.Passthrough;
var name = Path.GetFileNameWithoutExtension(outputFile); var name = Path.GetFileNameWithoutExtension(outputFile);
// 1) Probe source video
if (videoWidth <= 0 || videoHeight <= 0 || fps <= 0) if (videoWidth <= 0 || videoHeight <= 0 || fps <= 0)
{ {
LogError($"{name}: ffprobe failed to get metadata"); LogError($"{name}: ffprobe failed to get metadata");
@ -51,16 +50,29 @@ public class TrackingSplitter : LoggingBase, ISegmentProcessor, IDisposable
return; return;
} }
var encWidth = job.Job.Debug ? videoWidth : job.Job.Crop.Value.width; // Processing size (what you crop / feed into enhancer)
var encHeight = job.Job.Debug ? videoHeight : job.Job.Crop.Value.height; var procWidth = job.Job.Debug ? videoWidth : job.Job.Crop.Value.width;
var procHeight = job.Job.Debug ? videoHeight : job.Job.Crop.Value.height;
LogInfo($"{name}: src={videoWidth}x{videoHeight} @ {fps:F3}fps, seg=[{start:F3},{length:F3}] enc={encWidth}x{encHeight}"); IVideoEnhancer? enhancer = null;
const int window = 5;
if (job.Job.Enhance)
{
enhancer = new RealBasicVsr2xDmlEnhancer();
await enhancer.InitializeAsync(procWidth, procHeight, window, token);
}
// Encoding size (what FFmpeg encoder expects)
var encWidth = enhancer != null ? procWidth * enhancer.ResolutionMultiplier : procWidth;
var encHeight = enhancer != null ? procHeight * enhancer.ResolutionMultiplier : procHeight;
LogInfo($"{name}: src={videoWidth}x{videoHeight} @ {fps:F3}fps, seg=[{start:F3},{length:F3}] proc={procWidth}x{procHeight} enc={encWidth}x{encHeight}");
// 2) Start FFmpeg decode (video only → raw BGR24 to stdout)
var decode = await StartFfmpegDecode(inputFile, start, length, job.Job.Rotate, job.Job.PlainText, token); var decode = await StartFfmpegDecode(inputFile, start, length, job.Job.Rotate, job.Job.PlainText, token);
using var decodeStdout = decode.StandardOutput.BaseStream; using var decodeStdout = decode.StandardOutput.BaseStream;
// 3) Start FFmpeg encode (video from stdin + audio from original)
var encode = await StartFfmpegEncode( var encode = await StartFfmpegEncode(
inputFile, inputFile,
outputFile, outputFile,
@ -75,89 +87,118 @@ public class TrackingSplitter : LoggingBase, ISegmentProcessor, IDisposable
using var encodeStdin = encode.StandardInput.BaseStream; using var encodeStdin = encode.StandardInput.BaseStream;
// Separate input/output sizes and buffers // Input: always full frame
var inBytes = videoWidth * videoHeight * 3; var inBytes = videoWidth * videoHeight * 3;
var outBytes = encWidth * encHeight * 3;
// Output: encoded frame size (may be 4x if enhancement enabled)
var outBytes = encWidth * encHeight * 3;
var inBuffer = new byte[inBytes]; var inBuffer = new byte[inBytes];
var outBuffer = new byte[outBytes]; var outBuffer = new byte[outBytes];
using var frameMat = new Mat(videoHeight, videoWidth, MatType.CV_8UC3); using var frameMat = new Mat(videoHeight, videoWidth, MatType.CV_8UC3);
using var outMat = new Mat(encHeight, encWidth, MatType.CV_8UC3);
// outMat is processing size (crop), not necessarily encoding size
using var outMat = new Mat(procHeight, procWidth, MatType.CV_8UC3);
var kalman = new KalmanTracker(); var kalman = new KalmanTracker();
var camera = new CameraController( var camera = new CameraController(
videoWidth, videoWidth,
videoHeight, videoHeight,
job.Job.Crop.Value.width, job.Job.Crop.Value.width,
job.Job.Crop.Value.height, job.Job.Crop.Value.height,
kalman, kalman,
job.Job); job.Job);
var startTime = DateTime.UtcNow; try
var totalFrames = (int)Math.Round(length * fps);
var frameIndex = 0;
while (frameIndex < totalFrames)
{ {
token.ThrowIfCancellationRequested(); var startTime = DateTime.UtcNow;
var totalFrames = (int)Math.Round(length * fps);
frameIndex++; var frameIndex = 0;
var read = await ReadExact(decodeStdout, inBuffer, 0, inBytes, token); var enhancedOutput = new Mat[window];
if (read != inBytes) //totalFrames = 10;
break; while (frameIndex < totalFrames)
// input frame → Mat
Marshal.Copy(inBuffer, 0, frameMat.Data, inBytes);
var objects = _detector.DetectAll(frameMat);
var primary = SelectTrackedObject(objects, kalman.LastMeasurement);
camera.Update(primary);
var roi = camera.Roi;
if (job.Job.Debug)
{ {
DrawDebug(frameMat, objects, camera, kalman); token.ThrowIfCancellationRequested();
frameMat.CopyTo(outMat);
} frameIndex++;
else
{ var read = await ReadExact(decodeStdout, inBuffer, 0, inBytes, token);
using var cropped = new Mat(frameMat, roi); if (read != inBytes)
cropped.CopyTo(outMat); break;
Marshal.Copy(inBuffer, 0, frameMat.Data, inBytes);
var objects = _detector.DetectAll(frameMat);
var primary = SelectTrackedObject(objects, kalman.LastMeasurement);
camera.Update(primary);
var roi = camera.Roi;
if (job.Job.Debug)
{
DrawDebug(frameMat, objects, camera, kalman);
frameMat.CopyTo(outMat); // outMat: procWidth x procHeight == full frame in debug
}
else
{
using var cropped = new Mat(frameMat, roi);
cropped.CopyTo(outMat); // outMat: procWidth x procHeight == crop
}
Mat frameToWrite = outMat;
if (enhancer != null)
{
if (enhancer.TryProcessFrame(outMat, out var enhanced, token))
frameToWrite = enhanced; // enhanced: encWidth x encHeight
else
continue;
}
Marshal.Copy(frameToWrite.Data, outBuffer, 0, outBytes);
encodeStdin.Write(outBuffer, 0, outBytes);
var elapsed = DateTime.UtcNow - startTime;
var progress = totalFrames > 0 ? (double)frameIndex / totalFrames : 0.0;
var speed = elapsed.TotalSeconds > 0 ? (frameIndex / elapsed.TotalSeconds) / fps : 0.0;
var remainingFrames = Math.Max(totalFrames - frameIndex, 0);
var etaSeconds = speed > 0 ? remainingFrames / speed : 0.0;
var eta = TimeSpan.FromSeconds(etaSeconds);
DrawProgress(name, progress, eta, speed);
} }
// output Mat → outBuffer if (enhancer != null)
Marshal.Copy(outMat.Data, outBuffer, 0, outBytes); {
encodeStdin.Write(outBuffer, 0, outBytes); int count = enhancer.Flush(enhancedOutput, token);
for (int i = 0; i < count; i++)
{
var mat = enhancedOutput[i]; // encWidth x encHeight
Marshal.Copy(mat.Data, outBuffer, 0, outBytes);
encodeStdin.Write(outBuffer, 0, outBytes);
}
}
var elapsed = DateTime.UtcNow - startTime; encodeStdin.Flush();
var progress = totalFrames > 0 ? (double)frameIndex / totalFrames : 0.0; encodeStdin.Close();
var speed = elapsed.TotalSeconds > 0 ? (frameIndex / elapsed.TotalSeconds) / fps : 0.0;
var remainingFrames = Math.Max(totalFrames - frameIndex, 0);
var etaSeconds = speed > 0 ? remainingFrames / speed : 0.0;
var eta = TimeSpan.FromSeconds(etaSeconds);
DrawProgress(name, progress, eta, speed); await encode.WaitForExitAsync();
}
finally
{
if (enhancer is IAsyncDisposable asyncDisp)
await asyncDisp.DisposeAsync();
else if (enhancer is IDisposable disp)
disp?.Dispose();
} }
encodeStdin.Flush();
// loop finished
encodeStdin.Flush();
encodeStdin.Close(); // must happen before waiting encode
await encode.WaitForExitAsync();
// belt-and-braces: if decode is still alive, kill it
try { if (!decode.HasExited) decode.Kill(entireProcessTree: true); } catch { } try { if (!decode.HasExited) decode.Kill(entireProcessTree: true); } catch { }
try { if (!decode.HasExited) await decode.WaitForExitAsync(); } catch { } try { if (!decode.HasExited) await decode.WaitForExitAsync(); } catch { }
ClearProgress(name); ClearProgress(name);
if (encode.ExitCode != 0) if (encode.ExitCode != 0)
LogError($"{name}: FFmpeg encoding failed"); LogError($"{name}: FFmpeg encoding failed");
else else
@ -245,7 +286,7 @@ public class TrackingSplitter : LoggingBase, ISegmentProcessor, IDisposable
? $"-vf setsar={info.SampleAspectRatio} " ? $"-vf setsar={info.SampleAspectRatio} "
: ""; : "";
string darArg = ""; var darArg = "";
if (info.Sar is { } s) if (info.Sar is { } s)
{ {
@ -254,8 +295,8 @@ public class TrackingSplitter : LoggingBase, ISegmentProcessor, IDisposable
var darDen = height * s.Y; var darDen = height * s.Y;
// clamp to int and reduce // clamp to int and reduce
int dn = (int)Math.Min(int.MaxValue, Math.Max(int.MinValue, darNum)); var dn = (int)Math.Min(int.MaxValue, Math.Max(int.MinValue, darNum));
int dd = (int)Math.Min(int.MaxValue, Math.Max(int.MinValue, darDen)); var dd = (int)Math.Min(int.MaxValue, Math.Max(int.MinValue, darDen));
ReduceFraction(ref dn, ref dd); ReduceFraction(ref dn, ref dd);
if (dn > 0 && dd > 0) if (dn > 0 && dd > 0)
@ -385,7 +426,7 @@ public class TrackingSplitter : LoggingBase, ISegmentProcessor, IDisposable
var bestIndex = 0; var bestIndex = 0;
var bestArea = float.MinValue; var bestArea = float.MinValue;
for (int i = 0; i < foundObjects.Count; i++) for (var i = 0; i < foundObjects.Count; i++)
{ {
var f = foundObjects[i]; var f = foundObjects[i];
var area = f.box.Width * f.box.Height; var area = f.box.Width * f.box.Height;
@ -404,7 +445,7 @@ public class TrackingSplitter : LoggingBase, ISegmentProcessor, IDisposable
var bestIndex = 0; var bestIndex = 0;
var bestDist2 = float.MaxValue; var bestDist2 = float.MaxValue;
for (int i = 0; i < foundObjects.Count; i++) for (var i = 0; i < foundObjects.Count; i++)
{ {
var f = foundObjects[i]; var f = foundObjects[i];
var dx = f.center.X - prev.X; var dx = f.center.X - prev.X;

View File

@ -95,7 +95,7 @@ public sealed class CameraController
_dropoutCounter = 0; _dropoutCounter = 0;
} }
bool isLost = !objectCenter.HasValue; var isLost = !objectCenter.HasValue;
// LOST / REACQUIRE STATE MACHINE // LOST / REACQUIRE STATE MACHINE
if (isLost) if (isLost)
@ -147,7 +147,7 @@ public sealed class CameraController
{ {
smoothedCenter = _kalman.Update(objectCenter); smoothedCenter = _kalman.Update(objectCenter);
float driftEasing = 0.01f; var driftEasing = 0.01f;
var fallbackCenter = new Point2f(_videoWidth / 2f, _videoHeight / 2f); var fallbackCenter = new Point2f(_videoWidth / 2f, _videoHeight / 2f);
_cameraCenter = new Point2f( _cameraCenter = new Point2f(

View File

@ -0,0 +1,7 @@
namespace splitter.algo;
public class DummyDetector : IObjectDetector
{
public List<(Rect box, Point2f center)> DetectAll(Mat frameCont) => [];
public void Dispose() {}
}

View File

@ -0,0 +1,14 @@
namespace splitter.algo;
public interface IVideoEnhancer : IAsyncDisposable
{
int ResolutionMultiplier { get; }
Task InitializeAsync(int width, int height, int window, CancellationToken token);
// Returns true when an enhanced frame is ready
bool TryProcessFrame(Mat input, out Mat output, CancellationToken token);
// Flush remaining frames after input is finished
int Flush(Span<Mat> outputFrames, CancellationToken token);
}

View File

@ -35,8 +35,8 @@ public sealed class KalmanTracker
_state[3] = 0; _state[3] = 0;
// Large initial uncertainty // Large initial uncertainty
for (int i = 0; i < 4; i++) for (var i = 0; i < 4; i++)
for (int j = 0; j < 4; j++) for (var j = 0; j < 4; j++)
_p[i, j] = (i == j) ? 1f : 0f; _p[i, j] = (i == j) ? 1f : 0f;
} }
@ -63,16 +63,16 @@ public sealed class KalmanTracker
var z = measurement.Value; var z = measurement.Value;
// Innovation y = z - Hx // Innovation y = z - Hx
float yx = z.X - _state[0]; var yx = z.X - _state[0];
float yy = z.Y - _state[1]; var yy = z.Y - _state[1];
// Innovation covariance S = P + R // Innovation covariance S = P + R
float Sx = _p[0, 0] + _r; var Sx = _p[0, 0] + _r;
float Sy = _p[1, 1] + _r; var Sy = _p[1, 1] + _r;
// Kalman gain K = P / S // Kalman gain K = P / S
float Kx0 = _p[0, 0] / Sx; var Kx0 = _p[0, 0] / Sx;
float Kx1 = _p[1, 1] / Sy; var Kx1 = _p[1, 1] / Sy;
// Update state // Update state
_state[0] += Kx0 * yx; _state[0] += Kx0 * yx;

View File

@ -0,0 +1,85 @@
using System.Text;
using Onnxify;
public static class OnnxInspector
{
public static string GetOnnxInfo(string modelPath)
{
var sb = new StringBuilder(4096);
if (!File.Exists(modelPath))
{
sb.Append("File not found: ").Append(modelPath);
return sb.ToString();
}
// Load ONNX model
var model = OnnxModel.FromFile(modelPath);
sb.AppendLine("=== MODEL METADATA ===");
sb.Append("IR Version: ").AppendLine(model.IrVersion.ToString());
sb.Append("Producer Name: ").AppendLine(model.ProducerName);
sb.Append("Producer Version: ").AppendLine(model.ProducerVersion);
sb.Append("Domain: ").AppendLine(model.Domain);
sb.Append("Model Version: ").AppendLine(model.ModelVersion.ToString());
sb.Append("Doc String: ").AppendLine(model.Document);
sb.AppendLine();
sb.AppendLine("=== OPSET IMPORTS ===");
foreach (var opset in model.OpsetImport)
{
sb.Append("Domain: ").Append(opset.Domain)
.Append(" Version: ").AppendLine(opset.Version.ToString());
}
sb.AppendLine();
var graph = model.Graph;
sb.AppendLine("=== GRAPH INPUTS ===");
foreach (var input in graph.Inputs)
{
sb.Append("Name: ").AppendLine(input.Name);
if (input.Type?.Denotation != null)
{
sb.Append(" Denotation: ").AppendLine(input.Type?.Denotation);
}
}
sb.AppendLine();
sb.AppendLine("=== GRAPH OUTPUTS ===");
foreach (var output in graph.Outputs)
{
sb.Append("Name: ").AppendLine(output.Name);
if (output.Type?.Denotation != null)
{
sb.Append(" Denotation: ").AppendLine(output.Type?.Denotation);
}
}
sb.AppendLine();
sb.AppendLine("=== INITIALIZERS ===");
foreach (var init in graph.Initializers)
{
sb.Append("Name: ").AppendLine(init.Name);
sb.Append(" DataType: ").AppendLine(init.DataType.ToString());
sb.Append(" Dims: ").AppendLine(string.Join("x", init.Shape));
}
sb.AppendLine();
sb.AppendLine("=== NODES ===");
foreach (var node in graph.Nodes)
{
sb.Append("OpType: ").AppendLine(node.OpType);
sb.Append(" Name: ").AppendLine(node.Name);
sb.Append(" Inputs: ").AppendLine(string.Join(", ", node.Inputs));
sb.Append(" Outputs: ").AppendLine(string.Join(", ", node.Outputs));
foreach (var attr in node.Attributes)
{
sb.Append(" Attr: ").Append(attr.Name);
}
}
return sb.ToString();
}
}

View File

@ -0,0 +1,324 @@
using Microsoft.ML.OnnxRuntime;
using Microsoft.ML.OnnxRuntime.Tensors;
namespace splitter.algo;
public sealed unsafe class RealBasicVsr2xDmlEnhancer : IVideoEnhancer
{
public int ResolutionMultiplier => 2;
private InferenceSession _session;
private SessionOptions _options;
private int _inW;
private int _inH;
private int _window;
private readonly Queue<Mat> _frames = new Queue<Mat>(32);
private float[] _inputBuffer;
private float[] _outputBuffer;
private DenseTensor<float> _inputTensor;
private DenseTensor<float> _outputTensor;
private Mat _outputMat;
private readonly List<NamedOnnxValue> _inputList = new List<NamedOnnxValue>(1);
public Task InitializeAsync(int width, int height, int window, CancellationToken token)
{
_inW = width;
_inH = height;
_window = window;
var basePath = AppDomain.CurrentDomain.BaseDirectory;
var modelPath = System.IO.Path.Combine(basePath, "models", "realbasicvsr_x2.onnx");
_options = new SessionOptions();
_options.AppendExecutionProvider_DML();
_session = new InferenceSession(modelPath, _options);
int inputSize = window * 3 * width * height;
int outW = width * 2;
int outH = height * 2;
int outputSize = 3 * outW * outH;
_inputBuffer = new float[inputSize];
_outputBuffer = new float[outputSize];
_inputTensor = new DenseTensor<float>(_inputBuffer, new[] { 1, window, 3, height, width });
_outputTensor = new DenseTensor<float>(_outputBuffer, new[] { 1, 3, outH, outW });
_outputMat = new Mat(outH, outW, MatType.CV_8UC3);
return Task.CompletedTask;
}
public unsafe bool TryProcessFrame(Mat input, out Mat output, CancellationToken token)
{
output = null;
if (token.IsCancellationRequested)
return false;
if (_frames.Count == _window)
{
var old = _frames.Dequeue();
old.Dispose();
}
_frames.Enqueue(input.Clone());
if (_frames.Count < _window)
return false;
int T = _window;
int H = _inH;
int W = _inW;
// ------------------------------------------------------------
// INPUT: CV_8UC3 BGR -> normalized RGB, channels-first [1,T,3,H,W]
// ------------------------------------------------------------
int t = 0;
foreach (var f in _frames)
{
byte* src = (byte*)f.Data;
int stride = (int)f.Step();
for (int y = 0; y < H; y++)
{
byte* row = src + y * stride;
for (int x = 0; x < W; x++)
{
int p = x * 3;
byte b = row[p + 0];
byte g = row[p + 1];
byte r = row[p + 2];
float rN = r * (1.0f / 255.0f);
float gN = g * (1.0f / 255.0f);
float bN = b * (1.0f / 255.0f);
int idxR = ((((0 * T) + t) * 3 + 0) * H + y) * W + x;
int idxG = ((((0 * T) + t) * 3 + 1) * H + y) * W + x;
int idxB = ((((0 * T) + t) * 3 + 2) * H + y) * W + x;
_inputBuffer[idxR] = rN;
_inputBuffer[idxG] = gN;
_inputBuffer[idxB] = bN;
}
}
t++;
}
_inputList.Clear();
_inputList.Add(NamedOnnxValue.CreateFromTensor("input", _inputTensor));
using var results = _session.Run(_inputList);
var outTensor = results[0].AsTensor<float>();
var dims = outTensor.Dimensions; // [1, T, 3, H2, W2]
int outT = dims[1];
int outH = dims[3];
int outW = dims[4];
int last = outT - 1;
// ------------------------------------------------------------
// STEP 1: Bicubic upscale input to x2
// ------------------------------------------------------------
using var upBgr = new Mat();
Cv2.Resize(input, upBgr, new Size(outW, outH), 0, 0, InterpolationFlags.Cubic);
using var upRgb = new Mat();
Cv2.CvtColor(upBgr, upRgb, ColorConversionCodes.BGR2RGB);
using var baseFloat = new Mat();
upRgb.ConvertTo(baseFloat, MatType.CV_32FC3, 1.0 / 255.0);
// ------------------------------------------------------------
// STEP 2: Add residual from model output
// ------------------------------------------------------------
unsafe
{
float* basePtr = (float*)baseFloat.Data;
int baseStride = (int)(baseFloat.Step() / sizeof(float));
for (int y = 0; y < outH; y++)
{
float* row = basePtr + y * baseStride;
for (int x = 0; x < outW; x++)
{
int p = x * 3;
float rBase = row[p + 0];
float gBase = row[p + 1];
float bBase = row[p + 2];
float rRes = outTensor[0, last, 0, y, x];
float gRes = outTensor[0, last, 1, y, x];
float bRes = outTensor[0, last, 2, y, x];
float r = Math.Clamp(rBase + rRes, 0f, 1f);
float g = Math.Clamp(gBase + gRes, 0f, 1f);
float b = Math.Clamp(bBase + bRes, 0f, 1f);
row[p + 0] = r;
row[p + 1] = g;
row[p + 2] = b;
}
}
}
// ------------------------------------------------------------
// STEP 3: Convert back to BGR 8-bit for FFmpeg
// ------------------------------------------------------------
using var outRgb8 = new Mat();
baseFloat.ConvertTo(outRgb8, MatType.CV_8UC3, 255.0);
Cv2.CvtColor(outRgb8, _outputMat, ColorConversionCodes.RGB2BGR);
output = _outputMat;
return true;
}
public unsafe bool TryProcessFrame2(Mat input, out Mat output, CancellationToken token)
{
output = null;
if (token.IsCancellationRequested)
return false;
if (_frames.Count == _window)
{
var old = _frames.Dequeue();
old.Dispose();
}
_frames.Enqueue(input.Clone());
if (_frames.Count < _window)
return false;
int T = _window;
int H = _inH;
int W = _inW;
// ------------------------------------------------------------
// INPUT: CV_8UC3 BGR -> normalized RGB, channels-first [1,T,3,H,W]
// ------------------------------------------------------------
int t = 0;
foreach (var f in _frames)
{
byte* src = (byte*)f.Data;
int stride = (int)f.Step();
for (int y = 0; y < H; y++)
{
byte* row = src + y * stride;
for (int x = 0; x < W; x++)
{
int p = x * 3;
byte b = row[p + 0];
byte g = row[p + 1];
byte r = row[p + 2];
float rN = r * (1.0f / 255.0f);
float gN = g * (1.0f / 255.0f);
float bN = b * (1.0f / 255.0f);
int idxR = ((((0 * T) + t) * 3 + 0) * H + y) * W + x;
int idxG = ((((0 * T) + t) * 3 + 1) * H + y) * W + x;
int idxB = ((((0 * T) + t) * 3 + 2) * H + y) * W + x;
_inputBuffer[idxR] = rN;
_inputBuffer[idxG] = gN;
_inputBuffer[idxB] = bN;
}
}
t++;
}
_inputList.Clear();
_inputList.Add(NamedOnnxValue.CreateFromTensor("input", _inputTensor));
using var results = _session.Run(_inputList);
var outTensor = results[0].AsTensor<float>();
var dims = outTensor.Dimensions; // [1, T, 3, H2, W2]
int outT = dims[1];
int outH = dims[3];
int outW = dims[4];
int last = outT - 1;
unsafe
{
byte* dstBase = (byte*)_outputMat.Data;
int dstStride = (int)_outputMat.Step();
for (int y = 0; y < outH; y++)
{
byte* row = dstBase + y * dstStride;
for (int x = 0; x < outW; x++)
{
float b = outTensor[0, last, 0, y, x]; // B, 0..1
float g = outTensor[0, last, 1, y, x]; // G, 0..1
float r = outTensor[0, last, 2, y, x]; // R, 0..1
int p = x * 3;
row[p + 0] = (byte)(b * 255.0f); // B
row[p + 1] = (byte)(g * 255.0f); // G
row[p + 2] = (byte)(r * 255.0f); // R
}
}
}
output = _outputMat;
//ColorDebug.DumpAll(output, "C:\\Temp\\splitter-color-debug\\output");
return true;
}
public int Flush(Span<Mat> outputFrames, CancellationToken token)
{
return 0;
}
public ValueTask DisposeAsync()
{
foreach (var f in _frames)
f.Dispose();
_frames.Clear();
_session?.Dispose();
_options?.Dispose();
_outputMat?.Dispose();
return ValueTask.CompletedTask;
}
}

View File

@ -51,10 +51,10 @@ public sealed class UltraFaceDetector: LoggingBase, IDisposable, IObjectDetector
foreach (var f in faces) foreach (var f in faces)
{ {
int x1 = (int)f.X1; var x1 = (int)f.X1;
int y1 = (int)f.Y1; var y1 = (int)f.Y1;
int x2 = (int)f.X2; var x2 = (int)f.X2;
int y2 = (int)f.Y2; var y2 = (int)f.Y2;
var rect = new Rect( var rect = new Rect(
x1, x1,

View File

@ -125,14 +125,14 @@ public sealed class YoloOnnxObjectDetector : LoggingBase, IObjectDetector, IDisp
// Build reusable result list // Build reusable result list
_results.Clear(); _results.Clear();
for (int i = 0; i < final.Count; i++) for (var i = 0; i < final.Count; i++)
{ {
var d = final[i]; var d = final[i];
int x = (int)d.X; var x = (int)d.X;
int y = (int)d.Y; var y = (int)d.Y;
int w = (int)d.Width; var w = (int)d.Width;
int h = (int)d.Height; var h = (int)d.Height;
x = Math.Clamp(x, 0, frameCont.Width - 1); x = Math.Clamp(x, 0, frameCont.Width - 1);
y = Math.Clamp(y, 0, frameCont.Height - 1); y = Math.Clamp(y, 0, frameCont.Height - 1);
@ -155,30 +155,30 @@ public sealed class YoloOnnxObjectDetector : LoggingBase, IObjectDetector, IDisp
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
private void FillInputTensor(Mat rgb) private void FillInputTensor(Mat rgb)
{ {
int height = _inputHeight; var height = _inputHeight;
int width = _inputWidth; var width = _inputWidth;
// NCHW: [1, 3, H, W] // NCHW: [1, 3, H, W]
int planeSize = height * width; var planeSize = height * width;
Span<float> dst = _inputBuffer.AsSpan(); Span<float> dst = _inputBuffer.AsSpan();
unsafe unsafe
{ {
for (int y = 0; y < height; y++) for (var y = 0; y < height; y++)
{ {
byte* rowPtr = (byte*)rgb.Ptr(y).ToPointer(); var rowPtr = (byte*)rgb.Ptr(y).ToPointer();
var rowSpan = new Span<byte>(rowPtr, width * 3); var rowSpan = new Span<byte>(rowPtr, width * 3);
int srcIndex = 0; var srcIndex = 0;
for (int x = 0; x < width; x++) for (var x = 0; x < width; x++)
{ {
byte r = rowSpan[srcIndex + 0]; var r = rowSpan[srcIndex + 0];
byte g = rowSpan[srcIndex + 1]; var g = rowSpan[srcIndex + 1];
byte b = rowSpan[srcIndex + 2]; var b = rowSpan[srcIndex + 2];
int offset = y * width + x; var offset = y * width + x;
// channel 0: R // channel 0: R
dst[offset] = r * _inv255; dst[offset] = r * _inv255;
@ -205,27 +205,27 @@ public sealed class YoloOnnxObjectDetector : LoggingBase, IObjectDetector, IDisp
detections.Clear(); detections.Clear();
// YOLOv8 output: [1, 84, 8400] // YOLOv8 output: [1, 84, 8400]
int channels = output.Dimensions[1]; // 84 var channels = output.Dimensions[1]; // 84
int count = output.Dimensions[2]; // 8400 var count = output.Dimensions[2]; // 8400
float xScale = (float)originalWidth / 640f; var xScale = (float)originalWidth / 640f;
float yScale = (float)originalHeight / 640f; var yScale = (float)originalHeight / 640f;
for (int i = 0; i < count; i++) for (var i = 0; i < count; i++)
{ {
float x = output[0, 0, i]; var x = output[0, 0, i];
float y = output[0, 1, i]; var y = output[0, 1, i];
float w = output[0, 2, i]; var w = output[0, 2, i];
float h = output[0, 3, i]; var h = output[0, 3, i];
float classScore = output[0, 4 + classIndex, i]; var classScore = output[0, 4 + classIndex, i];
if (classScore < scoreThreshold) if (classScore < scoreThreshold)
continue; continue;
float left = (x - w / 2f) * xScale; var left = (x - w / 2f) * xScale;
float top = (y - h / 2f) * yScale; var top = (y - h / 2f) * yScale;
float width = w * xScale; var width = w * xScale;
float height = h * yScale; var height = h * yScale;
detections.Add(new Detection detections.Add(new Detection
( (
@ -252,12 +252,12 @@ public sealed class YoloOnnxObjectDetector : LoggingBase, IObjectDetector, IDisp
// Sort in-place by score descending // Sort in-place by score descending
detections.Sort(static (a, b) => b.Score.CompareTo(a.Score)); detections.Sort(static (a, b) => b.Score.CompareTo(a.Score));
for (int i = 0; i < detections.Count; i++) for (var i = 0; i < detections.Count; i++)
{ {
var candidate = detections[i]; var candidate = detections[i];
bool keep = true; var keep = true;
for (int j = 0; j < nmsBuffer.Count; j++) for (var j = 0; j < nmsBuffer.Count; j++)
{ {
if (IoU(candidate, nmsBuffer[j]) >= nmsThreshold) if (IoU(candidate, nmsBuffer[j]) >= nmsThreshold)
{ {
@ -276,23 +276,23 @@ public sealed class YoloOnnxObjectDetector : LoggingBase, IObjectDetector, IDisp
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
private static float IoU(in Detection a, in Detection b) private static float IoU(in Detection a, in Detection b)
{ {
float x1 = MathF.Max(a.X, b.X); var x1 = MathF.Max(a.X, b.X);
float y1 = MathF.Max(a.Y, b.Y); var y1 = MathF.Max(a.Y, b.Y);
float x2 = MathF.Min(a.X + a.Width, b.X + b.Width); var x2 = MathF.Min(a.X + a.Width, b.X + b.Width);
float y2 = MathF.Min(a.Y + a.Height, b.Y + b.Height); var y2 = MathF.Min(a.Y + a.Height, b.Y + b.Height);
float interW = x2 - x1; var interW = x2 - x1;
if (interW <= 0f) return 0f; if (interW <= 0f) return 0f;
float interH = y2 - y1; var interH = y2 - y1;
if (interH <= 0f) return 0f; if (interH <= 0f) return 0f;
float interArea = interW * interH; var interArea = interW * interH;
float areaA = a.Width * a.Height; var areaA = a.Width * a.Height;
float areaB = b.Width * b.Height; var areaB = b.Width * b.Height;
float union = areaA + areaB - interArea; var union = areaA + areaB - interArea;
if (union <= 0f) return 0f; if (union <= 0f) return 0f;
return interArea / union; return interArea / union;

Binary file not shown.

View File

@ -42,28 +42,28 @@ public sealed class FrameRotationDetector
Cv2.CartToPolar(_gx, _gy, _mag, _angle, angleInDegrees: true); Cv2.CartToPolar(_gx, _gy, _mag, _angle, angleInDegrees: true);
// 4. Clear histogram // 4. Clear histogram
for (int i = 0; i < _bins; i++) for (var i = 0; i < _bins; i++)
_hist[i] = 0; _hist[i] = 0;
float binSize = 180f / _bins; var binSize = 180f / _bins;
unsafe unsafe
{ {
float* anglePtr = (float*)_angle.Data; var anglePtr = (float*)_angle.Data;
float* magPtr = (float*)_mag.Data; var magPtr = (float*)_mag.Data;
int total = _w * _h; var total = _w * _h;
for (int i = 0; i < total; i++) for (var i = 0; i < total; i++)
{ {
float m = magPtr[i]; var m = magPtr[i];
if (m < 5f) continue; // ignore weak gradients if (m < 5f) continue; // ignore weak gradients
float a = anglePtr[i]; var a = anglePtr[i];
if (a < 0) a += 360f; if (a < 0) a += 360f;
a = a % 180f; a = a % 180f;
int bin = (int)(a / binSize); var bin = (int)(a / binSize);
if (bin < 0) bin = 0; if (bin < 0) bin = 0;
if (bin >= _bins) bin = _bins - 1; if (bin >= _bins) bin = _bins - 1;
@ -73,12 +73,12 @@ public sealed class FrameRotationDetector
// 5. Energy around 0° vs 90° // 5. Energy around 0° vs 90°
float e0 = 0, e90 = 0; float e0 = 0, e90 = 0;
int window = 3; var window = 3;
int bin0 = 0; var bin0 = 0;
int bin90 = _bins / 2; var bin90 = _bins / 2;
for (int i = -window; i <= window; i++) for (var i = -window; i <= window; i++)
{ {
e0 += _hist[Wrap(bin0 + i)]; e0 += _hist[Wrap(bin0 + i)];
e90 += _hist[Wrap(bin90 + i)]; e90 += _hist[Wrap(bin90 + i)];

View File

@ -72,7 +72,7 @@ public static class ProbeVideo
var width = stream?.Width ?? 0; var width = stream?.Width ?? 0;
var height = stream?.Height ?? 0; var height = stream?.Height ?? 0;
double fps = 0.0; var fps = 0.0;
if (!string.IsNullOrWhiteSpace(stream?.Avg_frame_rate)) if (!string.IsNullOrWhiteSpace(stream?.Avg_frame_rate))
{ {
var parts = stream.Avg_frame_rate.Split('/'); var parts = stream.Avg_frame_rate.Split('/');

View File

@ -29,8 +29,8 @@ public sealed class VideoRotationSampler
RotationDetectorFrameHeight = int.Parse(s); RotationDetectorFrameHeight = int.Parse(s);
} }
int w = RotationDetectorFrameWidth; var w = RotationDetectorFrameWidth;
int h = RotationDetectorFrameHeight; var h = RotationDetectorFrameHeight;
_buffer = new byte[w * h * 3]; // raw BGR24 buffer _buffer = new byte[w * h * 3]; // raw BGR24 buffer
_frameMat = new Mat(h, w, MatType.CV_8UC3); // wraps buffer _frameMat = new Mat(h, w, MatType.CV_8UC3); // wraps buffer
@ -46,9 +46,9 @@ public sealed class VideoRotationSampler
var rotations = new List<int>(); var rotations = new List<int>();
for (int i = 0; i < RotationDetectorSampleCount; i++) for (var i = 0; i < RotationDetectorSampleCount; i++)
{ {
double t = videoLengthSeconds * (i + 1) / (RotationDetectorSampleCount + 1); var t = videoLengthSeconds * (i + 1) / (RotationDetectorSampleCount + 1);
var frame = await DecodeSingleFrameAsync( var frame = await DecodeSingleFrameAsync(
inputFile, inputFile,
@ -60,7 +60,7 @@ public sealed class VideoRotationSampler
if (frame != null && !frame.Empty()) if (frame != null && !frame.Empty())
{ {
int rot = _detector.GetRotation(frame); var rot = _detector.GetRotation(frame);
rotations.Add(rot); rotations.Add(rot);
} }
} }
@ -80,8 +80,8 @@ public sealed class VideoRotationSampler
counts[v]++; counts[v]++;
} }
int best = 0; var best = 0;
int bestCount = 0; var bestCount = 0;
foreach (var kv in counts) foreach (var kv in counts)
{ {

View File

@ -59,9 +59,10 @@
<ItemGroup> <ItemGroup>
<PackageReference Include="FFmpeg.AutoGen" Version="8.1.0" /> <PackageReference Include="FFmpeg.AutoGen" Version="8.1.0" />
<PackageReference Include="Microsoft.ML.OnnxRuntime.DirectML" Version="1.24.4" /> <PackageReference Include="Microsoft.ML.OnnxRuntime.DirectML" Version="1.24.4" />
<PackageReference Include="OpenCvSharp4" Version="4.13.0.20260427" /> <PackageReference Include="Onnxify" Version="0.1.4" />
<PackageReference Include="OpenCvSharp4.runtime.win" Version="4.13.0.20260302" /> <PackageReference Include="OpenCvSharp4" Version="4.13.0.20260602" />
<PackageReference Include="Spectre.Console" Version="0.55.2" /> <PackageReference Include="OpenCvSharp4.runtime.win" Version="4.13.0.20260602" />
<PackageReference Include="Spectre.Console" Version="0.56.0" />
<PackageReference Include="UltraFaceDotNet" Version="1.0.0.2" /> <PackageReference Include="UltraFaceDotNet" Version="1.0.0.2" />
</ItemGroup> </ItemGroup>

View File

@ -39,7 +39,7 @@ public sealed class SpectreConsoleLogger : ILogger, IDisposable
lock (_sync) lock (_sync)
{ {
_numberOfProcesses = Math.Max(1, value); _numberOfProcesses = Math.Max(1, value);
for (int i = 0; i < _numberOfProcesses; i++) for (var i = 0; i < _numberOfProcesses; i++)
{ {
if (!_progress.ContainsKey(i)) if (!_progress.ContainsKey(i))
_progress[i] = ProgressEntry.Empty; _progress[i] = ProgressEntry.Empty;
@ -282,17 +282,17 @@ public sealed class SpectreConsoleLogger : ILogger, IDisposable
if (width <= 0) if (width <= 0)
return string.Empty; return string.Empty;
int filled = (int)Math.Round(progress * width); var filled = (int)Math.Round(progress * width);
int empty = width - filled; var empty = width - filled;
if (filled <= 0) if (filled <= 0)
return $"[grey]{new string('─', width)}[/]"; return $"[grey]{new string('─', width)}[/]";
// Split filled part into three segments: blue / yellow / green // Split filled part into three segments: blue / yellow / green
// low progress: mostly blue; mid: yellow; high: green // low progress: mostly blue; mid: yellow; high: green
int blueCount = (int)Math.Round(filled * 0.33); var blueCount = (int)Math.Round(filled * 0.33);
int yellowCount = (int)Math.Round(filled * 0.34); var yellowCount = (int)Math.Round(filled * 0.34);
int greenCount = filled - blueCount - yellowCount; var greenCount = filled - blueCount - yellowCount;
var sb = new StringBuilder(); var sb = new StringBuilder();

View File

@ -8,8 +8,8 @@ public static class FileMaskExpander
if (!HasMask(input)) if (!HasMask(input))
return [Path.GetFullPath(input)]; return [Path.GetFullPath(input)];
string directory = Path.GetDirectoryName(input) ?? Directory.GetCurrentDirectory(); var directory = Path.GetDirectoryName(input) ?? Directory.GetCurrentDirectory();
string pattern = Path.GetFileName(input); var pattern = Path.GetFileName(input);
if (string.IsNullOrEmpty(directory)) if (string.IsNullOrEmpty(directory))
directory = Directory.GetCurrentDirectory(); directory = Directory.GetCurrentDirectory();

View File

@ -2,6 +2,7 @@
<Folder Name="/Solution items/"> <Folder Name="/Solution items/">
<File Path=".github/workflows/publish.yml" /> <File Path=".github/workflows/publish.yml" />
<File Path=".gitignore" /> <File Path=".gitignore" />
<File Path="AGENTS.md" />
<File Path="LICENSE.txt" /> <File Path="LICENSE.txt" />
<File Path="README.md" /> <File Path="README.md" />
</Folder> </Folder>