Upscaling x2 added using realbasicvsr_x2.onnx. It turns to be very slow and overall not worth it.

This commit is contained in:
Alexander Shabarshov 2026-06-06 16:57:02 +01:00
parent 9496d46411
commit de0d0c77fc
35 changed files with 740 additions and 220 deletions

19
AGENTS.md Normal file
View File

@ -0,0 +1,19 @@
You are c# programmer. I'm senior c# programmer with 30+ years of experience.
Do not be overconfident about your answers - they are 70% incorrect.
Do not say "final solution". Do not start every reply with my name.
Do not use emoji or non-ascii symbols. Do not explain "why it work".
I have C#. .NET 10 Avalonia 12 UI for ffmpeg/OpenCV video app. All packages are of very latest versions.
Use namespace splitter for splitter-cli and Splitter_UI for Splitter-UI.
Splitter pipeline is:
* FFProbe extracting all video meta to VideoInfo
* FFMpeg used to decode video frames into OpenCVSharp.Mat
* One of detectors used:
- For face detection: [opencv_zoo/models/face_detection_yunet at main opencv/opencv_zoo](https://github.com/opencv/opencv_zoo/tree/main/models/face_detection_yunet)
- For body detection: [yolov8s.pt Ultralytics/YOLOv8 at main](https://huggingface.co/Ultralytics/YOLOv8/blob/main/yolov8s.pt)
* Camera control aplied (CameraControl class)
* Final video frames are encoded back to video file using FFMpeg

View File

@ -3,7 +3,7 @@
Splitter is a high-performance command line tool for cutting one or more video files into equal or
fixedlength segments using multithreaded FFmpeg execution. It supports batch input, flexible
duration formats, rotation, smart face/bodyaware cropping, ETA and speed reporting, with nice GUI
or both rich and plaintext terminal output.
or both rich and plain-text terminal output.
The intended primary use case is for content creators who need to split large video files into smaller
segments for platforms like TikTok, Instagram Reels, YouTube Shorts, or similar. The smart
@ -34,7 +34,7 @@ Splitter uses FFmpeg for the actual splitting and encoding, with multi-threading
### Command line interface
![Splitter](splitter-cli/splitter.png)
### Graphical user interface
![Splitter UI](splitter-ui/screenshot.png)
![Splitter UI](Splitter-UI/screenshot.png)
## Requirements

View File

@ -39,12 +39,14 @@ internal sealed class Program
services.AddSingleton<YoloOnnxObjectDetector>();
services.AddSingleton( x => new SingleThreadedDetector<UltraFaceDetector>(x.GetRequiredService<UltraFaceDetector>()) );
services.AddSingleton(x => new SingleThreadedDetector<YoloOnnxObjectDetector>(x.GetRequiredService<YoloOnnxObjectDetector>()));
services.AddSingleton(x => new SingleThreadedDetector<DummyDetector>(x.GetRequiredService<DummyDetector>()));
services.AddSingleton<Func<string, IObjectDetector>>( x => detectorName =>
{
return detectorName switch
{
"face" => x.GetRequiredService<SingleThreadedDetector<UltraFaceDetector>>(),
"body" => x.GetRequiredService<SingleThreadedDetector<YoloOnnxObjectDetector>>(),
"none" => x.GetRequiredService<SingleThreadedDetector<DummyDetector>>(),
_ => new DummyDetector()
};
});

View File

@ -46,12 +46,12 @@ public sealed class AutoDecisionService(IThumbnailService _thumbnails, IFileProb
var targetAR = (float)CommandLine.DefaultW / CommandLine.DefaultH;
var pixelAspect = job.Probe!.Sar.X / job.Probe.Sar.Y;
float srcW = job.Probe.Width * pixelAspect;
var srcW = job.Probe.Width * pixelAspect;
float srcH = job.Probe.Height;
var srcAR = srcW / srcH;
float cropH = srcH;
float cropW = cropH * targetAR;
var cropH = srcH;
var cropW = cropH * targetAR;
if (cropW > srcW)
{
@ -59,16 +59,16 @@ public sealed class AutoDecisionService(IThumbnailService _thumbnails, IFileProb
cropH = cropW / targetAR;
}
float x = (srcW - cropW) * 0.5f;
float y = (srcH - cropH) * 0.5f;
var x = (srcW - cropW) * 0.5f;
var y = (srcH - cropH) * 0.5f;
float invPixelAspect = 1f / pixelAspect;
var invPixelAspect = 1f / pixelAspect;
float cropW_px = cropW * invPixelAspect;
float cropH_px = cropH;
var cropW_px = cropW * invPixelAspect;
var cropH_px = cropH;
float x_px = x * invPixelAspect;
float y_px = y;
var x_px = x * invPixelAspect;
var y_px = y;
job.CropText = $"{(int)MathF.Round(cropW_px)},{(int)MathF.Round(cropH_px)}";
}

View File

@ -1,7 +0,0 @@
namespace Splitter_UI.Services;
internal class DummyDetector : IObjectDetector
{
public List<(OpenCvSharp.Rect box, Point2f center)> DetectAll(Mat frameCont) => [];
public void Dispose() {}
}

View File

@ -63,7 +63,7 @@ public sealed class ThumbnailService : IThumbnailService
var bgraBuffer = canUseStaticBuffers ? _bgraBuffer : new byte[width.Value * height.Value * 4];
// Decode a single frame using ffmpeg → raw BGR24 into _bgrBuffer
bool ok = await DecodeFrameAsync(bgrBuffer, file, skip.Value, width.Value, height.Value, rotateDegree);
var ok = await DecodeFrameAsync(bgrBuffer, file, skip.Value, width.Value, height.Value, rotateDegree);
if (!ok)
return null;
@ -99,14 +99,14 @@ public sealed class ThumbnailService : IThumbnailService
var p = new Process { StartInfo = psi };
p.Start();
int needed = bgrBuffer.Length;
int read = 0;
var needed = bgrBuffer.Length;
var read = 0;
using var stdout = p.StandardOutput.BaseStream;
while (read < needed)
{
int r = await stdout.ReadAsync(bgrBuffer, read, needed - read);
var r = await stdout.ReadAsync(bgrBuffer, read, needed - read);
if (r == 0)
{
TryKill(p);
@ -126,12 +126,12 @@ public sealed class ThumbnailService : IThumbnailService
private static void ConvertBgrToBgra(byte[] bgr, byte[] bgra, int width, int height)
{
int si = 0;
int di = 0;
var si = 0;
var di = 0;
int totalPixels = width * height;
var totalPixels = width * height;
for (int i = 0; i < totalPixels; i++)
for (var i = 0; i < totalPixels; i++)
{
bgra[di + 0] = bgr[si + 0]; // B
bgra[di + 1] = bgr[si + 1]; // G
@ -150,7 +150,7 @@ public sealed class ThumbnailService : IThumbnailService
(height, width) = (width, height);
}
int stride = width * 4;
var stride = width * 4;
fixed (byte* p = bgra)
{

View File

@ -40,6 +40,7 @@ public partial class InspectorPaneViewModel : ObservableObject
job.OutputFolder = Selected.OutputFolder;
job.OverrideTargetDuration = Selected.OverrideTargetDuration;
job.PassthroughText = Selected.PassthroughText;
job.Enhance = Selected.Enhance;
job.ParametersList.Clear();
foreach (var param in Selected.ParametersList)

View File

@ -157,6 +157,18 @@ public partial class JobViewModel : ObservableObject
}
}
public bool Enhance
{
get => Job.Enhance;
set
{
if (Job.Enhance == value)
return;
Job.Enhance = value;
OnPropertyChanged();
}
}
public int? Rotate
{
get => Job.Rotate;

View File

@ -72,7 +72,7 @@ public partial class MainViewModel : ViewModelBase
jobs.AddRange(fileJobs);
}
await _processor.ProcessJobs(jobs, false, _cancellationTokenSource.Token);
await _processor.ProcessJobs(jobs, jobs.First().Job.Enhance, _cancellationTokenSource.Token);
}
catch (Exception ex)
{

View File

@ -1,7 +1,6 @@
using System.Collections.ObjectModel;
using CommunityToolkit.Mvvm.ComponentModel;
using CommunityToolkit.Mvvm.Input;
using Splitter_UI.Views;
namespace Splitter_UI.ViewModels;

View File

@ -92,6 +92,16 @@ x:DataType="vm:InspectorPaneViewModel">
<NumericUpDown Value="{Binding Selected.OverrideTargetDuration}" Width="120"/>
</StackPanel>
<!-- Enhance -->
<StackPanel Orientation="Horizontal" Spacing="8">
<CheckBox Content="Enhance resolution x2"
IsChecked="{Binding Selected.Enhance}"/>
<TextBlock Text="(Very slow and not worth it!)"
Foreground="#FFFF80FF"
FontSize="10"
Margin="0,12,0,0"/>
</StackPanel>
<!-- ForceFixed -->
<CheckBox Content="Force Fixed Duration"
IsChecked="{Binding Selected.ForceFixed}"/>

View File

@ -1,5 +1,3 @@
using Avalonia.Controls;
namespace Splitter_UI.Views;
public partial class MainWindow : Avalonia.Controls.Window

View File

@ -279,8 +279,8 @@ public sealed class PreviewCanvas : Control
var scale = Math.Min(dispW / displayW, dispH / displayH);
double dx = dxCanvas / scale;
double dy = dyCanvas / scale;
var dx = dxCanvas / scale;
var dy = dyCanvas / scale;
if (rotate == 0 || rotate == 180)
dx /= pixelAspect;
@ -288,8 +288,8 @@ public sealed class PreviewCanvas : Control
dy /= pixelAspect;
// start normalized → pixel
double gx = _dragStartValue.X * rawW + dx;
double gy = _dragStartValue.Y * rawH + dy;
var gx = _dragStartValue.X * rawW + dx;
var gy = _dragStartValue.Y * rawH + dy;
switch (rotate)
{
@ -368,8 +368,8 @@ public sealed class PreviewCanvas : Control
var g = GravitateTo;
// normalized → pixel
double px = g.X * rawW;
double py = g.Y * rawH;
var px = g.X * rawW;
var py = g.Y * rawH;
var (sx, sy) = TransformPoint(
px, py,

View File

@ -74,6 +74,10 @@ public sealed class CommandLine
{
Master.Rotate = 90;
}
else if (arg == "--enhance")
{
Master.Enhance = true;
}
else if (arg.StartsWith("--rotate="))
{
var val = arg.Substring("--rotate=".Length);
@ -328,6 +332,9 @@ Options:
Last segment may be shorter.
Default: OFF
--enhance Enable video enhancement.
Increases output resolution x4 Using RealBasicVSR_x4 model.
--rotate=<degrees> Rotate video by specified degrees (90, 180, 270).
Useful for videos with incorrect orientation metadata.

View File

@ -65,6 +65,7 @@ public class JobProcessor(ILogger logger) : LoggingBase(logger, 0), IJobProcesso
{
"face" => new UltraFaceDetector(_logger),
"body" => new YoloOnnxObjectDetector(_logger),
"none" => new DummyDetector(),
_ => throw new InvalidOperationException($"Unknown detector: {job.Detect}")
};
return new TrackingSplitter(i, detector, job, _logger);
@ -146,7 +147,7 @@ public class JobProcessor(ILogger logger) : LoggingBase(logger, 0), IJobProcesso
tasks.Add(Task.Run(async () =>
{
int slot = -1;
var slot = -1;
try
{

View File

@ -147,6 +147,7 @@ All option names are preserved exactly, and descriptions are consolidated for cl
| **--mask=<pattern>** | Custom output filename pattern. Default: `[NAME]_seg[NN].[EXT]`. Supports `[NAME]`, `[N]`, `[NN]`, `[NNN]`, `[NNNN]`, `[EXT]`. Example: `--mask="[NAME]_[NNNN].mp4"`. |
| **--duration=<value>** | Override target segment duration. Formats: `Ns`, `NmMs`, `N`. Examples: `--duration=90s`, `--duration=2m30s`, `--duration=45`. Without `--force`: max 58 seconds, equalized across segments. |
| **--force** | Use the duration exactly as provided. Last segment may be shorter. |
| **--enhance** | Enable video enhancement. Increases output resolution x4 using RealBasicVSR_x4 model. |
| **--rotate=<degrees>** | Rotate video by 90, 180, or 270 degrees. Useful for correcting orientation metadata. |
| **--rotate-auto** | Use automatic rotation detection. |
| **--estimate** | Print calculated segment information and exit. No splitting is performed. |

View File

@ -7,10 +7,10 @@ public class SimpleSplitter(int segmentNo, ILogger logger) : LoggingBase(logger,
{
public async Task ProcessSegment(SingleTask job, CancellationToken token)
{
string inputFile = job.Job.InputFile;
string outputFile = job.OutputFileName;
double start = job.SegmentStart;
double length = job.SegmentLength;
var inputFile = job.Job.InputFile;
var outputFile = job.OutputFileName;
var start = job.SegmentStart;
var length = job.SegmentLength;
var rotation = GetRotationFilter(job.Job.Rotate);
@ -36,12 +36,12 @@ public class SimpleSplitter(int segmentNo, ILogger logger) : LoggingBase(logger,
{
// Rotation path: must re-encode and recompute DAR
long sarNum = Convert.ToInt64(job.Info.Sar.X);
long sarDen = Convert.ToInt64(job.Info.Sar.Y);
var sarNum = Convert.ToInt64(job.Info.Sar.X);
var sarDen = Convert.ToInt64(job.Info.Sar.Y);
// After rotation, width/height swap
int w = job.Info.Width;
int h = job.Info.Height;
var w = job.Info.Width;
var h = job.Info.Height;
if (job.Job.Rotate == 90 || job.Job.Rotate == 270)
{
@ -119,7 +119,7 @@ public class SimpleSplitter(int segmentNo, ILogger logger) : LoggingBase(logger,
while (b != 0)
{
long t = b;
var t = b;
b = a % b;
a = t;
}
@ -167,7 +167,7 @@ public class SimpleSplitter(int segmentNo, ILogger logger) : LoggingBase(logger,
{
// FFmpeg formats: HH:MM:SS.xx
// We read until whitespace
int end = startIndex;
var end = startIndex;
while (end < line.Length && !char.IsWhiteSpace(line[end]))
end++;

View File

@ -92,6 +92,10 @@ public class SingleJob
/// object detector or rotation detector.
/// </summary>
public Dictionary<string, string> Parameters { get; set; } = [];
/// <summary>
/// Increase output resolution by x4 using super-resolution RealBasicVSR_x4 model.
/// </summary>
public bool Enhance { get; set; }
public void Override<T>(ref T member, string name)
{

View File

@ -26,19 +26,18 @@ public class TrackingSplitter : LoggingBase, ISegmentProcessor, IDisposable
public async Task ProcessSegment(SingleTask job, CancellationToken token)
{
string inputFile = job.Job.InputFile;
string outputFile = job.OutputFileName;
double start = job.SegmentStart;
double length = job.SegmentLength;
int videoWidth = job.Info.Width;
int videoHeight = job.Info.Height;
double fps = job.Info.Fps;
double bitrate = job.Info.Bitrate;
string[] ffmpegPassthroughParameters = job.Job.Passthrough;
var inputFile = job.Job.InputFile;
var outputFile = job.OutputFileName;
var start = job.SegmentStart;
var length = job.SegmentLength;
var videoWidth = job.Info.Width;
var videoHeight = job.Info.Height;
var fps = job.Info.Fps;
var bitrate = job.Info.Bitrate;
var ffmpegPassthroughParameters = job.Job.Passthrough;
var name = Path.GetFileNameWithoutExtension(outputFile);
// 1) Probe source video
if (videoWidth <= 0 || videoHeight <= 0 || fps <= 0)
{
LogError($"{name}: ffprobe failed to get metadata");
@ -51,16 +50,29 @@ public class TrackingSplitter : LoggingBase, ISegmentProcessor, IDisposable
return;
}
var encWidth = job.Job.Debug ? videoWidth : job.Job.Crop.Value.width;
var encHeight = job.Job.Debug ? videoHeight : job.Job.Crop.Value.height;
// Processing size (what you crop / feed into enhancer)
var procWidth = job.Job.Debug ? videoWidth : job.Job.Crop.Value.width;
var procHeight = job.Job.Debug ? videoHeight : job.Job.Crop.Value.height;
LogInfo($"{name}: src={videoWidth}x{videoHeight} @ {fps:F3}fps, seg=[{start:F3},{length:F3}] enc={encWidth}x{encHeight}");
IVideoEnhancer? enhancer = null;
const int window = 5;
if (job.Job.Enhance)
{
enhancer = new RealBasicVsr2xDmlEnhancer();
await enhancer.InitializeAsync(procWidth, procHeight, window, token);
}
// Encoding size (what FFmpeg encoder expects)
var encWidth = enhancer != null ? procWidth * enhancer.ResolutionMultiplier : procWidth;
var encHeight = enhancer != null ? procHeight * enhancer.ResolutionMultiplier : procHeight;
LogInfo($"{name}: src={videoWidth}x{videoHeight} @ {fps:F3}fps, seg=[{start:F3},{length:F3}] proc={procWidth}x{procHeight} enc={encWidth}x{encHeight}");
// 2) Start FFmpeg decode (video only → raw BGR24 to stdout)
var decode = await StartFfmpegDecode(inputFile, start, length, job.Job.Rotate, job.Job.PlainText, token);
using var decodeStdout = decode.StandardOutput.BaseStream;
// 3) Start FFmpeg encode (video from stdin + audio from original)
var encode = await StartFfmpegEncode(
inputFile,
outputFile,
@ -75,15 +87,19 @@ public class TrackingSplitter : LoggingBase, ISegmentProcessor, IDisposable
using var encodeStdin = encode.StandardInput.BaseStream;
// Separate input/output sizes and buffers
// Input: always full frame
var inBytes = videoWidth * videoHeight * 3;
// Output: encoded frame size (may be 4x if enhancement enabled)
var outBytes = encWidth * encHeight * 3;
var inBuffer = new byte[inBytes];
var outBuffer = new byte[outBytes];
using var frameMat = new Mat(videoHeight, videoWidth, MatType.CV_8UC3);
using var outMat = new Mat(encHeight, encWidth, MatType.CV_8UC3);
// outMat is processing size (crop), not necessarily encoding size
using var outMat = new Mat(procHeight, procWidth, MatType.CV_8UC3);
var kalman = new KalmanTracker();
var camera = new CameraController(
@ -94,10 +110,14 @@ public class TrackingSplitter : LoggingBase, ISegmentProcessor, IDisposable
kalman,
job.Job);
try
{
var startTime = DateTime.UtcNow;
var totalFrames = (int)Math.Round(length * fps);
var frameIndex = 0;
var enhancedOutput = new Mat[window];
//totalFrames = 10;
while (frameIndex < totalFrames)
{
token.ThrowIfCancellationRequested();
@ -108,7 +128,6 @@ public class TrackingSplitter : LoggingBase, ISegmentProcessor, IDisposable
if (read != inBytes)
break;
// input frame → Mat
Marshal.Copy(inBuffer, 0, frameMat.Data, inBytes);
var objects = _detector.DetectAll(frameMat);
@ -120,16 +139,25 @@ public class TrackingSplitter : LoggingBase, ISegmentProcessor, IDisposable
if (job.Job.Debug)
{
DrawDebug(frameMat, objects, camera, kalman);
frameMat.CopyTo(outMat);
frameMat.CopyTo(outMat); // outMat: procWidth x procHeight == full frame in debug
}
else
{
using var cropped = new Mat(frameMat, roi);
cropped.CopyTo(outMat);
cropped.CopyTo(outMat); // outMat: procWidth x procHeight == crop
}
// output Mat → outBuffer
Marshal.Copy(outMat.Data, outBuffer, 0, outBytes);
Mat frameToWrite = outMat;
if (enhancer != null)
{
if (enhancer.TryProcessFrame(outMat, out var enhanced, token))
frameToWrite = enhanced; // enhanced: encWidth x encHeight
else
continue;
}
Marshal.Copy(frameToWrite.Data, outBuffer, 0, outBytes);
encodeStdin.Write(outBuffer, 0, outBytes);
var elapsed = DateTime.UtcNow - startTime;
@ -142,22 +170,35 @@ public class TrackingSplitter : LoggingBase, ISegmentProcessor, IDisposable
DrawProgress(name, progress, eta, speed);
}
encodeStdin.Flush();
// loop finished
if (enhancer != null)
{
int count = enhancer.Flush(enhancedOutput, token);
for (int i = 0; i < count; i++)
{
var mat = enhancedOutput[i]; // encWidth x encHeight
Marshal.Copy(mat.Data, outBuffer, 0, outBytes);
encodeStdin.Write(outBuffer, 0, outBytes);
}
}
encodeStdin.Flush();
encodeStdin.Close(); // must happen before waiting encode
encodeStdin.Close();
await encode.WaitForExitAsync();
}
finally
{
if (enhancer is IAsyncDisposable asyncDisp)
await asyncDisp.DisposeAsync();
else if (enhancer is IDisposable disp)
disp?.Dispose();
}
// belt-and-braces: if decode is still alive, kill it
try { if (!decode.HasExited) decode.Kill(entireProcessTree: true); } catch { }
try { if (!decode.HasExited) await decode.WaitForExitAsync(); } catch { }
ClearProgress(name);
if (encode.ExitCode != 0)
LogError($"{name}: FFmpeg encoding failed");
else
@ -245,7 +286,7 @@ public class TrackingSplitter : LoggingBase, ISegmentProcessor, IDisposable
? $"-vf setsar={info.SampleAspectRatio} "
: "";
string darArg = "";
var darArg = "";
if (info.Sar is { } s)
{
@ -254,8 +295,8 @@ public class TrackingSplitter : LoggingBase, ISegmentProcessor, IDisposable
var darDen = height * s.Y;
// clamp to int and reduce
int dn = (int)Math.Min(int.MaxValue, Math.Max(int.MinValue, darNum));
int dd = (int)Math.Min(int.MaxValue, Math.Max(int.MinValue, darDen));
var dn = (int)Math.Min(int.MaxValue, Math.Max(int.MinValue, darNum));
var dd = (int)Math.Min(int.MaxValue, Math.Max(int.MinValue, darDen));
ReduceFraction(ref dn, ref dd);
if (dn > 0 && dd > 0)
@ -385,7 +426,7 @@ public class TrackingSplitter : LoggingBase, ISegmentProcessor, IDisposable
var bestIndex = 0;
var bestArea = float.MinValue;
for (int i = 0; i < foundObjects.Count; i++)
for (var i = 0; i < foundObjects.Count; i++)
{
var f = foundObjects[i];
var area = f.box.Width * f.box.Height;
@ -404,7 +445,7 @@ public class TrackingSplitter : LoggingBase, ISegmentProcessor, IDisposable
var bestIndex = 0;
var bestDist2 = float.MaxValue;
for (int i = 0; i < foundObjects.Count; i++)
for (var i = 0; i < foundObjects.Count; i++)
{
var f = foundObjects[i];
var dx = f.center.X - prev.X;

View File

@ -95,7 +95,7 @@ public sealed class CameraController
_dropoutCounter = 0;
}
bool isLost = !objectCenter.HasValue;
var isLost = !objectCenter.HasValue;
// LOST / REACQUIRE STATE MACHINE
if (isLost)
@ -147,7 +147,7 @@ public sealed class CameraController
{
smoothedCenter = _kalman.Update(objectCenter);
float driftEasing = 0.01f;
var driftEasing = 0.01f;
var fallbackCenter = new Point2f(_videoWidth / 2f, _videoHeight / 2f);
_cameraCenter = new Point2f(

View File

@ -0,0 +1,7 @@
namespace splitter.algo;
public class DummyDetector : IObjectDetector
{
public List<(Rect box, Point2f center)> DetectAll(Mat frameCont) => [];
public void Dispose() {}
}

View File

@ -0,0 +1,14 @@
namespace splitter.algo;
public interface IVideoEnhancer : IAsyncDisposable
{
int ResolutionMultiplier { get; }
Task InitializeAsync(int width, int height, int window, CancellationToken token);
// Returns true when an enhanced frame is ready
bool TryProcessFrame(Mat input, out Mat output, CancellationToken token);
// Flush remaining frames after input is finished
int Flush(Span<Mat> outputFrames, CancellationToken token);
}

View File

@ -35,8 +35,8 @@ public sealed class KalmanTracker
_state[3] = 0;
// Large initial uncertainty
for (int i = 0; i < 4; i++)
for (int j = 0; j < 4; j++)
for (var i = 0; i < 4; i++)
for (var j = 0; j < 4; j++)
_p[i, j] = (i == j) ? 1f : 0f;
}
@ -63,16 +63,16 @@ public sealed class KalmanTracker
var z = measurement.Value;
// Innovation y = z - Hx
float yx = z.X - _state[0];
float yy = z.Y - _state[1];
var yx = z.X - _state[0];
var yy = z.Y - _state[1];
// Innovation covariance S = P + R
float Sx = _p[0, 0] + _r;
float Sy = _p[1, 1] + _r;
var Sx = _p[0, 0] + _r;
var Sy = _p[1, 1] + _r;
// Kalman gain K = P / S
float Kx0 = _p[0, 0] / Sx;
float Kx1 = _p[1, 1] / Sy;
var Kx0 = _p[0, 0] / Sx;
var Kx1 = _p[1, 1] / Sy;
// Update state
_state[0] += Kx0 * yx;

View File

@ -0,0 +1,85 @@
using System.Text;
using Onnxify;
public static class OnnxInspector
{
public static string GetOnnxInfo(string modelPath)
{
var sb = new StringBuilder(4096);
if (!File.Exists(modelPath))
{
sb.Append("File not found: ").Append(modelPath);
return sb.ToString();
}
// Load ONNX model
var model = OnnxModel.FromFile(modelPath);
sb.AppendLine("=== MODEL METADATA ===");
sb.Append("IR Version: ").AppendLine(model.IrVersion.ToString());
sb.Append("Producer Name: ").AppendLine(model.ProducerName);
sb.Append("Producer Version: ").AppendLine(model.ProducerVersion);
sb.Append("Domain: ").AppendLine(model.Domain);
sb.Append("Model Version: ").AppendLine(model.ModelVersion.ToString());
sb.Append("Doc String: ").AppendLine(model.Document);
sb.AppendLine();
sb.AppendLine("=== OPSET IMPORTS ===");
foreach (var opset in model.OpsetImport)
{
sb.Append("Domain: ").Append(opset.Domain)
.Append(" Version: ").AppendLine(opset.Version.ToString());
}
sb.AppendLine();
var graph = model.Graph;
sb.AppendLine("=== GRAPH INPUTS ===");
foreach (var input in graph.Inputs)
{
sb.Append("Name: ").AppendLine(input.Name);
if (input.Type?.Denotation != null)
{
sb.Append(" Denotation: ").AppendLine(input.Type?.Denotation);
}
}
sb.AppendLine();
sb.AppendLine("=== GRAPH OUTPUTS ===");
foreach (var output in graph.Outputs)
{
sb.Append("Name: ").AppendLine(output.Name);
if (output.Type?.Denotation != null)
{
sb.Append(" Denotation: ").AppendLine(output.Type?.Denotation);
}
}
sb.AppendLine();
sb.AppendLine("=== INITIALIZERS ===");
foreach (var init in graph.Initializers)
{
sb.Append("Name: ").AppendLine(init.Name);
sb.Append(" DataType: ").AppendLine(init.DataType.ToString());
sb.Append(" Dims: ").AppendLine(string.Join("x", init.Shape));
}
sb.AppendLine();
sb.AppendLine("=== NODES ===");
foreach (var node in graph.Nodes)
{
sb.Append("OpType: ").AppendLine(node.OpType);
sb.Append(" Name: ").AppendLine(node.Name);
sb.Append(" Inputs: ").AppendLine(string.Join(", ", node.Inputs));
sb.Append(" Outputs: ").AppendLine(string.Join(", ", node.Outputs));
foreach (var attr in node.Attributes)
{
sb.Append(" Attr: ").Append(attr.Name);
}
}
return sb.ToString();
}
}

View File

@ -0,0 +1,324 @@
using Microsoft.ML.OnnxRuntime;
using Microsoft.ML.OnnxRuntime.Tensors;
namespace splitter.algo;
public sealed unsafe class RealBasicVsr2xDmlEnhancer : IVideoEnhancer
{
public int ResolutionMultiplier => 2;
private InferenceSession _session;
private SessionOptions _options;
private int _inW;
private int _inH;
private int _window;
private readonly Queue<Mat> _frames = new Queue<Mat>(32);
private float[] _inputBuffer;
private float[] _outputBuffer;
private DenseTensor<float> _inputTensor;
private DenseTensor<float> _outputTensor;
private Mat _outputMat;
private readonly List<NamedOnnxValue> _inputList = new List<NamedOnnxValue>(1);
public Task InitializeAsync(int width, int height, int window, CancellationToken token)
{
_inW = width;
_inH = height;
_window = window;
var basePath = AppDomain.CurrentDomain.BaseDirectory;
var modelPath = System.IO.Path.Combine(basePath, "models", "realbasicvsr_x2.onnx");
_options = new SessionOptions();
_options.AppendExecutionProvider_DML();
_session = new InferenceSession(modelPath, _options);
int inputSize = window * 3 * width * height;
int outW = width * 2;
int outH = height * 2;
int outputSize = 3 * outW * outH;
_inputBuffer = new float[inputSize];
_outputBuffer = new float[outputSize];
_inputTensor = new DenseTensor<float>(_inputBuffer, new[] { 1, window, 3, height, width });
_outputTensor = new DenseTensor<float>(_outputBuffer, new[] { 1, 3, outH, outW });
_outputMat = new Mat(outH, outW, MatType.CV_8UC3);
return Task.CompletedTask;
}
public unsafe bool TryProcessFrame(Mat input, out Mat output, CancellationToken token)
{
output = null;
if (token.IsCancellationRequested)
return false;
if (_frames.Count == _window)
{
var old = _frames.Dequeue();
old.Dispose();
}
_frames.Enqueue(input.Clone());
if (_frames.Count < _window)
return false;
int T = _window;
int H = _inH;
int W = _inW;
// ------------------------------------------------------------
// INPUT: CV_8UC3 BGR -> normalized RGB, channels-first [1,T,3,H,W]
// ------------------------------------------------------------
int t = 0;
foreach (var f in _frames)
{
byte* src = (byte*)f.Data;
int stride = (int)f.Step();
for (int y = 0; y < H; y++)
{
byte* row = src + y * stride;
for (int x = 0; x < W; x++)
{
int p = x * 3;
byte b = row[p + 0];
byte g = row[p + 1];
byte r = row[p + 2];
float rN = r * (1.0f / 255.0f);
float gN = g * (1.0f / 255.0f);
float bN = b * (1.0f / 255.0f);
int idxR = ((((0 * T) + t) * 3 + 0) * H + y) * W + x;
int idxG = ((((0 * T) + t) * 3 + 1) * H + y) * W + x;
int idxB = ((((0 * T) + t) * 3 + 2) * H + y) * W + x;
_inputBuffer[idxR] = rN;
_inputBuffer[idxG] = gN;
_inputBuffer[idxB] = bN;
}
}
t++;
}
_inputList.Clear();
_inputList.Add(NamedOnnxValue.CreateFromTensor("input", _inputTensor));
using var results = _session.Run(_inputList);
var outTensor = results[0].AsTensor<float>();
var dims = outTensor.Dimensions; // [1, T, 3, H2, W2]
int outT = dims[1];
int outH = dims[3];
int outW = dims[4];
int last = outT - 1;
// ------------------------------------------------------------
// STEP 1: Bicubic upscale input to x2
// ------------------------------------------------------------
using var upBgr = new Mat();
Cv2.Resize(input, upBgr, new Size(outW, outH), 0, 0, InterpolationFlags.Cubic);
using var upRgb = new Mat();
Cv2.CvtColor(upBgr, upRgb, ColorConversionCodes.BGR2RGB);
using var baseFloat = new Mat();
upRgb.ConvertTo(baseFloat, MatType.CV_32FC3, 1.0 / 255.0);
// ------------------------------------------------------------
// STEP 2: Add residual from model output
// ------------------------------------------------------------
unsafe
{
float* basePtr = (float*)baseFloat.Data;
int baseStride = (int)(baseFloat.Step() / sizeof(float));
for (int y = 0; y < outH; y++)
{
float* row = basePtr + y * baseStride;
for (int x = 0; x < outW; x++)
{
int p = x * 3;
float rBase = row[p + 0];
float gBase = row[p + 1];
float bBase = row[p + 2];
float rRes = outTensor[0, last, 0, y, x];
float gRes = outTensor[0, last, 1, y, x];
float bRes = outTensor[0, last, 2, y, x];
float r = Math.Clamp(rBase + rRes, 0f, 1f);
float g = Math.Clamp(gBase + gRes, 0f, 1f);
float b = Math.Clamp(bBase + bRes, 0f, 1f);
row[p + 0] = r;
row[p + 1] = g;
row[p + 2] = b;
}
}
}
// ------------------------------------------------------------
// STEP 3: Convert back to BGR 8-bit for FFmpeg
// ------------------------------------------------------------
using var outRgb8 = new Mat();
baseFloat.ConvertTo(outRgb8, MatType.CV_8UC3, 255.0);
Cv2.CvtColor(outRgb8, _outputMat, ColorConversionCodes.RGB2BGR);
output = _outputMat;
return true;
}
public unsafe bool TryProcessFrame2(Mat input, out Mat output, CancellationToken token)
{
output = null;
if (token.IsCancellationRequested)
return false;
if (_frames.Count == _window)
{
var old = _frames.Dequeue();
old.Dispose();
}
_frames.Enqueue(input.Clone());
if (_frames.Count < _window)
return false;
int T = _window;
int H = _inH;
int W = _inW;
// ------------------------------------------------------------
// INPUT: CV_8UC3 BGR -> normalized RGB, channels-first [1,T,3,H,W]
// ------------------------------------------------------------
int t = 0;
foreach (var f in _frames)
{
byte* src = (byte*)f.Data;
int stride = (int)f.Step();
for (int y = 0; y < H; y++)
{
byte* row = src + y * stride;
for (int x = 0; x < W; x++)
{
int p = x * 3;
byte b = row[p + 0];
byte g = row[p + 1];
byte r = row[p + 2];
float rN = r * (1.0f / 255.0f);
float gN = g * (1.0f / 255.0f);
float bN = b * (1.0f / 255.0f);
int idxR = ((((0 * T) + t) * 3 + 0) * H + y) * W + x;
int idxG = ((((0 * T) + t) * 3 + 1) * H + y) * W + x;
int idxB = ((((0 * T) + t) * 3 + 2) * H + y) * W + x;
_inputBuffer[idxR] = rN;
_inputBuffer[idxG] = gN;
_inputBuffer[idxB] = bN;
}
}
t++;
}
_inputList.Clear();
_inputList.Add(NamedOnnxValue.CreateFromTensor("input", _inputTensor));
using var results = _session.Run(_inputList);
var outTensor = results[0].AsTensor<float>();
var dims = outTensor.Dimensions; // [1, T, 3, H2, W2]
int outT = dims[1];
int outH = dims[3];
int outW = dims[4];
int last = outT - 1;
unsafe
{
byte* dstBase = (byte*)_outputMat.Data;
int dstStride = (int)_outputMat.Step();
for (int y = 0; y < outH; y++)
{
byte* row = dstBase + y * dstStride;
for (int x = 0; x < outW; x++)
{
float b = outTensor[0, last, 0, y, x]; // B, 0..1
float g = outTensor[0, last, 1, y, x]; // G, 0..1
float r = outTensor[0, last, 2, y, x]; // R, 0..1
int p = x * 3;
row[p + 0] = (byte)(b * 255.0f); // B
row[p + 1] = (byte)(g * 255.0f); // G
row[p + 2] = (byte)(r * 255.0f); // R
}
}
}
output = _outputMat;
//ColorDebug.DumpAll(output, "C:\\Temp\\splitter-color-debug\\output");
return true;
}
public int Flush(Span<Mat> outputFrames, CancellationToken token)
{
return 0;
}
public ValueTask DisposeAsync()
{
foreach (var f in _frames)
f.Dispose();
_frames.Clear();
_session?.Dispose();
_options?.Dispose();
_outputMat?.Dispose();
return ValueTask.CompletedTask;
}
}

View File

@ -51,10 +51,10 @@ public sealed class UltraFaceDetector: LoggingBase, IDisposable, IObjectDetector
foreach (var f in faces)
{
int x1 = (int)f.X1;
int y1 = (int)f.Y1;
int x2 = (int)f.X2;
int y2 = (int)f.Y2;
var x1 = (int)f.X1;
var y1 = (int)f.Y1;
var x2 = (int)f.X2;
var y2 = (int)f.Y2;
var rect = new Rect(
x1,

View File

@ -125,14 +125,14 @@ public sealed class YoloOnnxObjectDetector : LoggingBase, IObjectDetector, IDisp
// Build reusable result list
_results.Clear();
for (int i = 0; i < final.Count; i++)
for (var i = 0; i < final.Count; i++)
{
var d = final[i];
int x = (int)d.X;
int y = (int)d.Y;
int w = (int)d.Width;
int h = (int)d.Height;
var x = (int)d.X;
var y = (int)d.Y;
var w = (int)d.Width;
var h = (int)d.Height;
x = Math.Clamp(x, 0, frameCont.Width - 1);
y = Math.Clamp(y, 0, frameCont.Height - 1);
@ -155,30 +155,30 @@ public sealed class YoloOnnxObjectDetector : LoggingBase, IObjectDetector, IDisp
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private void FillInputTensor(Mat rgb)
{
int height = _inputHeight;
int width = _inputWidth;
var height = _inputHeight;
var width = _inputWidth;
// NCHW: [1, 3, H, W]
int planeSize = height * width;
var planeSize = height * width;
Span<float> dst = _inputBuffer.AsSpan();
unsafe
{
for (int y = 0; y < height; y++)
for (var y = 0; y < height; y++)
{
byte* rowPtr = (byte*)rgb.Ptr(y).ToPointer();
var rowPtr = (byte*)rgb.Ptr(y).ToPointer();
var rowSpan = new Span<byte>(rowPtr, width * 3);
int srcIndex = 0;
var srcIndex = 0;
for (int x = 0; x < width; x++)
for (var x = 0; x < width; x++)
{
byte r = rowSpan[srcIndex + 0];
byte g = rowSpan[srcIndex + 1];
byte b = rowSpan[srcIndex + 2];
var r = rowSpan[srcIndex + 0];
var g = rowSpan[srcIndex + 1];
var b = rowSpan[srcIndex + 2];
int offset = y * width + x;
var offset = y * width + x;
// channel 0: R
dst[offset] = r * _inv255;
@ -205,27 +205,27 @@ public sealed class YoloOnnxObjectDetector : LoggingBase, IObjectDetector, IDisp
detections.Clear();
// YOLOv8 output: [1, 84, 8400]
int channels = output.Dimensions[1]; // 84
int count = output.Dimensions[2]; // 8400
var channels = output.Dimensions[1]; // 84
var count = output.Dimensions[2]; // 8400
float xScale = (float)originalWidth / 640f;
float yScale = (float)originalHeight / 640f;
var xScale = (float)originalWidth / 640f;
var yScale = (float)originalHeight / 640f;
for (int i = 0; i < count; i++)
for (var i = 0; i < count; i++)
{
float x = output[0, 0, i];
float y = output[0, 1, i];
float w = output[0, 2, i];
float h = output[0, 3, i];
var x = output[0, 0, i];
var y = output[0, 1, i];
var w = output[0, 2, i];
var h = output[0, 3, i];
float classScore = output[0, 4 + classIndex, i];
var classScore = output[0, 4 + classIndex, i];
if (classScore < scoreThreshold)
continue;
float left = (x - w / 2f) * xScale;
float top = (y - h / 2f) * yScale;
float width = w * xScale;
float height = h * yScale;
var left = (x - w / 2f) * xScale;
var top = (y - h / 2f) * yScale;
var width = w * xScale;
var height = h * yScale;
detections.Add(new Detection
(
@ -252,12 +252,12 @@ public sealed class YoloOnnxObjectDetector : LoggingBase, IObjectDetector, IDisp
// Sort in-place by score descending
detections.Sort(static (a, b) => b.Score.CompareTo(a.Score));
for (int i = 0; i < detections.Count; i++)
for (var i = 0; i < detections.Count; i++)
{
var candidate = detections[i];
bool keep = true;
var keep = true;
for (int j = 0; j < nmsBuffer.Count; j++)
for (var j = 0; j < nmsBuffer.Count; j++)
{
if (IoU(candidate, nmsBuffer[j]) >= nmsThreshold)
{
@ -276,23 +276,23 @@ public sealed class YoloOnnxObjectDetector : LoggingBase, IObjectDetector, IDisp
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static float IoU(in Detection a, in Detection b)
{
float x1 = MathF.Max(a.X, b.X);
float y1 = MathF.Max(a.Y, b.Y);
float x2 = MathF.Min(a.X + a.Width, b.X + b.Width);
float y2 = MathF.Min(a.Y + a.Height, b.Y + b.Height);
var x1 = MathF.Max(a.X, b.X);
var y1 = MathF.Max(a.Y, b.Y);
var x2 = MathF.Min(a.X + a.Width, b.X + b.Width);
var y2 = MathF.Min(a.Y + a.Height, b.Y + b.Height);
float interW = x2 - x1;
var interW = x2 - x1;
if (interW <= 0f) return 0f;
float interH = y2 - y1;
var interH = y2 - y1;
if (interH <= 0f) return 0f;
float interArea = interW * interH;
var interArea = interW * interH;
float areaA = a.Width * a.Height;
float areaB = b.Width * b.Height;
var areaA = a.Width * a.Height;
var areaB = b.Width * b.Height;
float union = areaA + areaB - interArea;
var union = areaA + areaB - interArea;
if (union <= 0f) return 0f;
return interArea / union;

Binary file not shown.

View File

@ -42,28 +42,28 @@ public sealed class FrameRotationDetector
Cv2.CartToPolar(_gx, _gy, _mag, _angle, angleInDegrees: true);
// 4. Clear histogram
for (int i = 0; i < _bins; i++)
for (var i = 0; i < _bins; i++)
_hist[i] = 0;
float binSize = 180f / _bins;
var binSize = 180f / _bins;
unsafe
{
float* anglePtr = (float*)_angle.Data;
float* magPtr = (float*)_mag.Data;
var anglePtr = (float*)_angle.Data;
var magPtr = (float*)_mag.Data;
int total = _w * _h;
var total = _w * _h;
for (int i = 0; i < total; i++)
for (var i = 0; i < total; i++)
{
float m = magPtr[i];
var m = magPtr[i];
if (m < 5f) continue; // ignore weak gradients
float a = anglePtr[i];
var a = anglePtr[i];
if (a < 0) a += 360f;
a = a % 180f;
int bin = (int)(a / binSize);
var bin = (int)(a / binSize);
if (bin < 0) bin = 0;
if (bin >= _bins) bin = _bins - 1;
@ -73,12 +73,12 @@ public sealed class FrameRotationDetector
// 5. Energy around 0° vs 90°
float e0 = 0, e90 = 0;
int window = 3;
var window = 3;
int bin0 = 0;
int bin90 = _bins / 2;
var bin0 = 0;
var bin90 = _bins / 2;
for (int i = -window; i <= window; i++)
for (var i = -window; i <= window; i++)
{
e0 += _hist[Wrap(bin0 + i)];
e90 += _hist[Wrap(bin90 + i)];

View File

@ -72,7 +72,7 @@ public static class ProbeVideo
var width = stream?.Width ?? 0;
var height = stream?.Height ?? 0;
double fps = 0.0;
var fps = 0.0;
if (!string.IsNullOrWhiteSpace(stream?.Avg_frame_rate))
{
var parts = stream.Avg_frame_rate.Split('/');

View File

@ -29,8 +29,8 @@ public sealed class VideoRotationSampler
RotationDetectorFrameHeight = int.Parse(s);
}
int w = RotationDetectorFrameWidth;
int h = RotationDetectorFrameHeight;
var w = RotationDetectorFrameWidth;
var h = RotationDetectorFrameHeight;
_buffer = new byte[w * h * 3]; // raw BGR24 buffer
_frameMat = new Mat(h, w, MatType.CV_8UC3); // wraps buffer
@ -46,9 +46,9 @@ public sealed class VideoRotationSampler
var rotations = new List<int>();
for (int i = 0; i < RotationDetectorSampleCount; i++)
for (var i = 0; i < RotationDetectorSampleCount; i++)
{
double t = videoLengthSeconds * (i + 1) / (RotationDetectorSampleCount + 1);
var t = videoLengthSeconds * (i + 1) / (RotationDetectorSampleCount + 1);
var frame = await DecodeSingleFrameAsync(
inputFile,
@ -60,7 +60,7 @@ public sealed class VideoRotationSampler
if (frame != null && !frame.Empty())
{
int rot = _detector.GetRotation(frame);
var rot = _detector.GetRotation(frame);
rotations.Add(rot);
}
}
@ -80,8 +80,8 @@ public sealed class VideoRotationSampler
counts[v]++;
}
int best = 0;
int bestCount = 0;
var best = 0;
var bestCount = 0;
foreach (var kv in counts)
{

View File

@ -59,9 +59,10 @@
<ItemGroup>
<PackageReference Include="FFmpeg.AutoGen" Version="8.1.0" />
<PackageReference Include="Microsoft.ML.OnnxRuntime.DirectML" Version="1.24.4" />
<PackageReference Include="OpenCvSharp4" Version="4.13.0.20260427" />
<PackageReference Include="OpenCvSharp4.runtime.win" Version="4.13.0.20260302" />
<PackageReference Include="Spectre.Console" Version="0.55.2" />
<PackageReference Include="Onnxify" Version="0.1.4" />
<PackageReference Include="OpenCvSharp4" Version="4.13.0.20260602" />
<PackageReference Include="OpenCvSharp4.runtime.win" Version="4.13.0.20260602" />
<PackageReference Include="Spectre.Console" Version="0.56.0" />
<PackageReference Include="UltraFaceDotNet" Version="1.0.0.2" />
</ItemGroup>

View File

@ -39,7 +39,7 @@ public sealed class SpectreConsoleLogger : ILogger, IDisposable
lock (_sync)
{
_numberOfProcesses = Math.Max(1, value);
for (int i = 0; i < _numberOfProcesses; i++)
for (var i = 0; i < _numberOfProcesses; i++)
{
if (!_progress.ContainsKey(i))
_progress[i] = ProgressEntry.Empty;
@ -282,17 +282,17 @@ public sealed class SpectreConsoleLogger : ILogger, IDisposable
if (width <= 0)
return string.Empty;
int filled = (int)Math.Round(progress * width);
int empty = width - filled;
var filled = (int)Math.Round(progress * width);
var empty = width - filled;
if (filled <= 0)
return $"[grey]{new string('─', width)}[/]";
// Split filled part into three segments: blue / yellow / green
// low progress: mostly blue; mid: yellow; high: green
int blueCount = (int)Math.Round(filled * 0.33);
int yellowCount = (int)Math.Round(filled * 0.34);
int greenCount = filled - blueCount - yellowCount;
var blueCount = (int)Math.Round(filled * 0.33);
var yellowCount = (int)Math.Round(filled * 0.34);
var greenCount = filled - blueCount - yellowCount;
var sb = new StringBuilder();

View File

@ -8,8 +8,8 @@ public static class FileMaskExpander
if (!HasMask(input))
return [Path.GetFullPath(input)];
string directory = Path.GetDirectoryName(input) ?? Directory.GetCurrentDirectory();
string pattern = Path.GetFileName(input);
var directory = Path.GetDirectoryName(input) ?? Directory.GetCurrentDirectory();
var pattern = Path.GetFileName(input);
if (string.IsNullOrEmpty(directory))
directory = Directory.GetCurrentDirectory();

View File

@ -2,6 +2,7 @@
<Folder Name="/Solution items/">
<File Path=".github/workflows/publish.yml" />
<File Path=".gitignore" />
<File Path="AGENTS.md" />
<File Path="LICENSE.txt" />
<File Path="README.md" />
</Folder>