mirror of
https://github.com/unclshura/splitter.git
synced 2026-06-21 16:12:01 +00:00
Upscaling x2 added using realbasicvsr_x2.onnx. It turns to be very slow and overall not worth it.
This commit is contained in:
parent
9496d46411
commit
de0d0c77fc
19
AGENTS.md
Normal file
19
AGENTS.md
Normal file
@ -0,0 +1,19 @@
|
||||
You are c# programmer. I'm senior c# programmer with 30+ years of experience.
|
||||
Do not be overconfident about your answers - they are 70% incorrect.
|
||||
Do not say "final solution". Do not start every reply with my name.
|
||||
Do not use emoji or non-ascii symbols. Do not explain "why it work".
|
||||
|
||||
I have C#. .NET 10 Avalonia 12 UI for ffmpeg/OpenCV video app. All packages are of very latest versions.
|
||||
|
||||
Use namespace splitter for splitter-cli and Splitter_UI for Splitter-UI.
|
||||
|
||||
Splitter pipeline is:
|
||||
|
||||
* FFProbe extracting all video meta to VideoInfo
|
||||
* FFMpeg used to decode video frames into OpenCVSharp.Mat
|
||||
* One of detectors used:
|
||||
- For face detection: [opencv_zoo/models/face_detection_yunet at main opencv/opencv_zoo](https://github.com/opencv/opencv_zoo/tree/main/models/face_detection_yunet)
|
||||
- For body detection: [yolov8s.pt Ultralytics/YOLOv8 at main](https://huggingface.co/Ultralytics/YOLOv8/blob/main/yolov8s.pt)
|
||||
* Camera control aplied (CameraControl class)
|
||||
* Final video frames are encoded back to video file using FFMpeg
|
||||
|
||||
@ -3,7 +3,7 @@
|
||||
Splitter is a high-performance command line tool for cutting one or more video files into equal or
|
||||
fixed‑length segments using multi‑threaded FFmpeg execution. It supports batch input, flexible
|
||||
duration formats, rotation, smart face/body‑aware cropping, ETA and speed reporting, with nice GUI
|
||||
or both rich and plain‑text terminal output.
|
||||
or both rich and plain-text terminal output.
|
||||
|
||||
The intended primary use case is for content creators who need to split large video files into smaller
|
||||
segments for platforms like TikTok, Instagram Reels, YouTube Shorts, or similar. The smart
|
||||
@ -34,7 +34,7 @@ Splitter uses FFmpeg for the actual splitting and encoding, with multi-threading
|
||||
### Command line interface
|
||||

|
||||
### Graphical user interface
|
||||

|
||||

|
||||
|
||||
## Requirements
|
||||
|
||||
|
||||
@ -38,14 +38,16 @@ internal sealed class Program
|
||||
services.AddSingleton<UltraFaceDetector>();
|
||||
services.AddSingleton<YoloOnnxObjectDetector>();
|
||||
services.AddSingleton( x => new SingleThreadedDetector<UltraFaceDetector>(x.GetRequiredService<UltraFaceDetector>()) );
|
||||
services.AddSingleton( x => new SingleThreadedDetector<YoloOnnxObjectDetector>(x.GetRequiredService<YoloOnnxObjectDetector>()));
|
||||
services.AddSingleton(x => new SingleThreadedDetector<YoloOnnxObjectDetector>(x.GetRequiredService<YoloOnnxObjectDetector>()));
|
||||
services.AddSingleton(x => new SingleThreadedDetector<DummyDetector>(x.GetRequiredService<DummyDetector>()));
|
||||
services.AddSingleton<Func<string, IObjectDetector>>( x => detectorName =>
|
||||
{
|
||||
return detectorName switch
|
||||
{
|
||||
"face" => x.GetRequiredService<SingleThreadedDetector<UltraFaceDetector>>(),
|
||||
"body" => x.GetRequiredService<SingleThreadedDetector<YoloOnnxObjectDetector>>(),
|
||||
_ => new DummyDetector()
|
||||
"none" => x.GetRequiredService<SingleThreadedDetector<DummyDetector>>(),
|
||||
_ => new DummyDetector()
|
||||
};
|
||||
});
|
||||
services.AddSingleton<ILogger, GlobalLogger>();
|
||||
|
||||
@ -46,12 +46,12 @@ public sealed class AutoDecisionService(IThumbnailService _thumbnails, IFileProb
|
||||
var targetAR = (float)CommandLine.DefaultW / CommandLine.DefaultH;
|
||||
var pixelAspect = job.Probe!.Sar.X / job.Probe.Sar.Y;
|
||||
|
||||
float srcW = job.Probe.Width * pixelAspect;
|
||||
var srcW = job.Probe.Width * pixelAspect;
|
||||
float srcH = job.Probe.Height;
|
||||
var srcAR = srcW / srcH;
|
||||
|
||||
float cropH = srcH;
|
||||
float cropW = cropH * targetAR;
|
||||
var cropH = srcH;
|
||||
var cropW = cropH * targetAR;
|
||||
|
||||
if (cropW > srcW)
|
||||
{
|
||||
@ -59,16 +59,16 @@ public sealed class AutoDecisionService(IThumbnailService _thumbnails, IFileProb
|
||||
cropH = cropW / targetAR;
|
||||
}
|
||||
|
||||
float x = (srcW - cropW) * 0.5f;
|
||||
float y = (srcH - cropH) * 0.5f;
|
||||
var x = (srcW - cropW) * 0.5f;
|
||||
var y = (srcH - cropH) * 0.5f;
|
||||
|
||||
float invPixelAspect = 1f / pixelAspect;
|
||||
var invPixelAspect = 1f / pixelAspect;
|
||||
|
||||
float cropW_px = cropW * invPixelAspect;
|
||||
float cropH_px = cropH;
|
||||
var cropW_px = cropW * invPixelAspect;
|
||||
var cropH_px = cropH;
|
||||
|
||||
float x_px = x * invPixelAspect;
|
||||
float y_px = y;
|
||||
var x_px = x * invPixelAspect;
|
||||
var y_px = y;
|
||||
|
||||
job.CropText = $"{(int)MathF.Round(cropW_px)},{(int)MathF.Round(cropH_px)}";
|
||||
}
|
||||
|
||||
@ -1,7 +0,0 @@
|
||||
namespace Splitter_UI.Services;
|
||||
|
||||
internal class DummyDetector : IObjectDetector
|
||||
{
|
||||
public List<(OpenCvSharp.Rect box, Point2f center)> DetectAll(Mat frameCont) => [];
|
||||
public void Dispose() {}
|
||||
}
|
||||
@ -63,7 +63,7 @@ public sealed class ThumbnailService : IThumbnailService
|
||||
var bgraBuffer = canUseStaticBuffers ? _bgraBuffer : new byte[width.Value * height.Value * 4];
|
||||
|
||||
// Decode a single frame using ffmpeg → raw BGR24 into _bgrBuffer
|
||||
bool ok = await DecodeFrameAsync(bgrBuffer, file, skip.Value, width.Value, height.Value, rotateDegree);
|
||||
var ok = await DecodeFrameAsync(bgrBuffer, file, skip.Value, width.Value, height.Value, rotateDegree);
|
||||
if (!ok)
|
||||
return null;
|
||||
|
||||
@ -99,14 +99,14 @@ public sealed class ThumbnailService : IThumbnailService
|
||||
var p = new Process { StartInfo = psi };
|
||||
p.Start();
|
||||
|
||||
int needed = bgrBuffer.Length;
|
||||
int read = 0;
|
||||
var needed = bgrBuffer.Length;
|
||||
var read = 0;
|
||||
|
||||
using var stdout = p.StandardOutput.BaseStream;
|
||||
|
||||
while (read < needed)
|
||||
{
|
||||
int r = await stdout.ReadAsync(bgrBuffer, read, needed - read);
|
||||
var r = await stdout.ReadAsync(bgrBuffer, read, needed - read);
|
||||
if (r == 0)
|
||||
{
|
||||
TryKill(p);
|
||||
@ -126,12 +126,12 @@ public sealed class ThumbnailService : IThumbnailService
|
||||
|
||||
private static void ConvertBgrToBgra(byte[] bgr, byte[] bgra, int width, int height)
|
||||
{
|
||||
int si = 0;
|
||||
int di = 0;
|
||||
var si = 0;
|
||||
var di = 0;
|
||||
|
||||
int totalPixels = width * height;
|
||||
var totalPixels = width * height;
|
||||
|
||||
for (int i = 0; i < totalPixels; i++)
|
||||
for (var i = 0; i < totalPixels; i++)
|
||||
{
|
||||
bgra[di + 0] = bgr[si + 0]; // B
|
||||
bgra[di + 1] = bgr[si + 1]; // G
|
||||
@ -150,7 +150,7 @@ public sealed class ThumbnailService : IThumbnailService
|
||||
(height, width) = (width, height);
|
||||
}
|
||||
|
||||
int stride = width * 4;
|
||||
var stride = width * 4;
|
||||
|
||||
fixed (byte* p = bgra)
|
||||
{
|
||||
|
||||
@ -40,6 +40,7 @@ public partial class InspectorPaneViewModel : ObservableObject
|
||||
job.OutputFolder = Selected.OutputFolder;
|
||||
job.OverrideTargetDuration = Selected.OverrideTargetDuration;
|
||||
job.PassthroughText = Selected.PassthroughText;
|
||||
job.Enhance = Selected.Enhance;
|
||||
|
||||
job.ParametersList.Clear();
|
||||
foreach (var param in Selected.ParametersList)
|
||||
|
||||
@ -157,6 +157,18 @@ public partial class JobViewModel : ObservableObject
|
||||
}
|
||||
}
|
||||
|
||||
public bool Enhance
|
||||
{
|
||||
get => Job.Enhance;
|
||||
set
|
||||
{
|
||||
if (Job.Enhance == value)
|
||||
return;
|
||||
Job.Enhance = value;
|
||||
OnPropertyChanged();
|
||||
}
|
||||
}
|
||||
|
||||
public int? Rotate
|
||||
{
|
||||
get => Job.Rotate;
|
||||
|
||||
@ -72,7 +72,7 @@ public partial class MainViewModel : ViewModelBase
|
||||
jobs.AddRange(fileJobs);
|
||||
}
|
||||
|
||||
await _processor.ProcessJobs(jobs, false, _cancellationTokenSource.Token);
|
||||
await _processor.ProcessJobs(jobs, jobs.First().Job.Enhance, _cancellationTokenSource.Token);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
|
||||
@ -1,7 +1,6 @@
|
||||
using System.Collections.ObjectModel;
|
||||
using CommunityToolkit.Mvvm.ComponentModel;
|
||||
using CommunityToolkit.Mvvm.Input;
|
||||
using Splitter_UI.Views;
|
||||
|
||||
namespace Splitter_UI.ViewModels;
|
||||
|
||||
|
||||
@ -92,6 +92,16 @@ x:DataType="vm:InspectorPaneViewModel">
|
||||
<NumericUpDown Value="{Binding Selected.OverrideTargetDuration}" Width="120"/>
|
||||
</StackPanel>
|
||||
|
||||
<!-- Enhance -->
|
||||
<StackPanel Orientation="Horizontal" Spacing="8">
|
||||
<CheckBox Content="Enhance resolution x2"
|
||||
IsChecked="{Binding Selected.Enhance}"/>
|
||||
<TextBlock Text="(Very slow and not worth it!)"
|
||||
Foreground="#FFFF80FF"
|
||||
FontSize="10"
|
||||
Margin="0,12,0,0"/>
|
||||
</StackPanel>
|
||||
|
||||
<!-- ForceFixed -->
|
||||
<CheckBox Content="Force Fixed Duration"
|
||||
IsChecked="{Binding Selected.ForceFixed}"/>
|
||||
|
||||
@ -1,5 +1,3 @@
|
||||
using Avalonia.Controls;
|
||||
|
||||
namespace Splitter_UI.Views;
|
||||
|
||||
public partial class MainWindow : Avalonia.Controls.Window
|
||||
|
||||
@ -279,8 +279,8 @@ public sealed class PreviewCanvas : Control
|
||||
|
||||
var scale = Math.Min(dispW / displayW, dispH / displayH);
|
||||
|
||||
double dx = dxCanvas / scale;
|
||||
double dy = dyCanvas / scale;
|
||||
var dx = dxCanvas / scale;
|
||||
var dy = dyCanvas / scale;
|
||||
|
||||
if (rotate == 0 || rotate == 180)
|
||||
dx /= pixelAspect;
|
||||
@ -288,8 +288,8 @@ public sealed class PreviewCanvas : Control
|
||||
dy /= pixelAspect;
|
||||
|
||||
// start normalized → pixel
|
||||
double gx = _dragStartValue.X * rawW + dx;
|
||||
double gy = _dragStartValue.Y * rawH + dy;
|
||||
var gx = _dragStartValue.X * rawW + dx;
|
||||
var gy = _dragStartValue.Y * rawH + dy;
|
||||
|
||||
switch (rotate)
|
||||
{
|
||||
@ -368,8 +368,8 @@ public sealed class PreviewCanvas : Control
|
||||
var g = GravitateTo;
|
||||
|
||||
// normalized → pixel
|
||||
double px = g.X * rawW;
|
||||
double py = g.Y * rawH;
|
||||
var px = g.X * rawW;
|
||||
var py = g.Y * rawH;
|
||||
|
||||
var (sx, sy) = TransformPoint(
|
||||
px, py,
|
||||
|
||||
@ -74,6 +74,10 @@ public sealed class CommandLine
|
||||
{
|
||||
Master.Rotate = 90;
|
||||
}
|
||||
else if (arg == "--enhance")
|
||||
{
|
||||
Master.Enhance = true;
|
||||
}
|
||||
else if (arg.StartsWith("--rotate="))
|
||||
{
|
||||
var val = arg.Substring("--rotate=".Length);
|
||||
@ -328,6 +332,9 @@ Options:
|
||||
Last segment may be shorter.
|
||||
Default: OFF
|
||||
|
||||
--enhance Enable video enhancement.
|
||||
Increases output resolution x4 Using RealBasicVSR_x4 model.
|
||||
|
||||
--rotate=<degrees> Rotate video by specified degrees (90, 180, 270).
|
||||
Useful for videos with incorrect orientation metadata.
|
||||
|
||||
|
||||
@ -65,6 +65,7 @@ public class JobProcessor(ILogger logger) : LoggingBase(logger, 0), IJobProcesso
|
||||
{
|
||||
"face" => new UltraFaceDetector(_logger),
|
||||
"body" => new YoloOnnxObjectDetector(_logger),
|
||||
"none" => new DummyDetector(),
|
||||
_ => throw new InvalidOperationException($"Unknown detector: {job.Detect}")
|
||||
};
|
||||
return new TrackingSplitter(i, detector, job, _logger);
|
||||
@ -146,7 +147,7 @@ public class JobProcessor(ILogger logger) : LoggingBase(logger, 0), IJobProcesso
|
||||
|
||||
tasks.Add(Task.Run(async () =>
|
||||
{
|
||||
int slot = -1;
|
||||
var slot = -1;
|
||||
|
||||
try
|
||||
{
|
||||
|
||||
@ -147,6 +147,7 @@ All option names are preserved exactly, and descriptions are consolidated for cl
|
||||
| **--mask=<pattern>** | Custom output filename pattern. Default: `[NAME]_seg[NN].[EXT]`. Supports `[NAME]`, `[N]`, `[NN]`, `[NNN]`, `[NNNN]`, `[EXT]`. Example: `--mask="[NAME]_[NNNN].mp4"`. |
|
||||
| **--duration=<value>** | Override target segment duration. Formats: `Ns`, `NmMs`, `N`. Examples: `--duration=90s`, `--duration=2m30s`, `--duration=45`. Without `--force`: max 58 seconds, equalized across segments. |
|
||||
| **--force** | Use the duration exactly as provided. Last segment may be shorter. |
|
||||
| **--enhance** | Enable video enhancement. Increases output resolution x4 using RealBasicVSR_x4 model. |
|
||||
| **--rotate=<degrees>** | Rotate video by 90, 180, or 270 degrees. Useful for correcting orientation metadata. |
|
||||
| **--rotate-auto** | Use automatic rotation detection. |
|
||||
| **--estimate** | Print calculated segment information and exit. No splitting is performed. |
|
||||
|
||||
@ -7,10 +7,10 @@ public class SimpleSplitter(int segmentNo, ILogger logger) : LoggingBase(logger,
|
||||
{
|
||||
public async Task ProcessSegment(SingleTask job, CancellationToken token)
|
||||
{
|
||||
string inputFile = job.Job.InputFile;
|
||||
string outputFile = job.OutputFileName;
|
||||
double start = job.SegmentStart;
|
||||
double length = job.SegmentLength;
|
||||
var inputFile = job.Job.InputFile;
|
||||
var outputFile = job.OutputFileName;
|
||||
var start = job.SegmentStart;
|
||||
var length = job.SegmentLength;
|
||||
|
||||
var rotation = GetRotationFilter(job.Job.Rotate);
|
||||
|
||||
@ -36,12 +36,12 @@ public class SimpleSplitter(int segmentNo, ILogger logger) : LoggingBase(logger,
|
||||
{
|
||||
// Rotation path: must re-encode and recompute DAR
|
||||
|
||||
long sarNum = Convert.ToInt64(job.Info.Sar.X);
|
||||
long sarDen = Convert.ToInt64(job.Info.Sar.Y);
|
||||
var sarNum = Convert.ToInt64(job.Info.Sar.X);
|
||||
var sarDen = Convert.ToInt64(job.Info.Sar.Y);
|
||||
|
||||
// After rotation, width/height swap
|
||||
int w = job.Info.Width;
|
||||
int h = job.Info.Height;
|
||||
var w = job.Info.Width;
|
||||
var h = job.Info.Height;
|
||||
|
||||
if (job.Job.Rotate == 90 || job.Job.Rotate == 270)
|
||||
{
|
||||
@ -119,7 +119,7 @@ public class SimpleSplitter(int segmentNo, ILogger logger) : LoggingBase(logger,
|
||||
|
||||
while (b != 0)
|
||||
{
|
||||
long t = b;
|
||||
var t = b;
|
||||
b = a % b;
|
||||
a = t;
|
||||
}
|
||||
@ -167,7 +167,7 @@ public class SimpleSplitter(int segmentNo, ILogger logger) : LoggingBase(logger,
|
||||
{
|
||||
// FFmpeg formats: HH:MM:SS.xx
|
||||
// We read until whitespace
|
||||
int end = startIndex;
|
||||
var end = startIndex;
|
||||
while (end < line.Length && !char.IsWhiteSpace(line[end]))
|
||||
end++;
|
||||
|
||||
|
||||
@ -92,6 +92,10 @@ public class SingleJob
|
||||
/// object detector or rotation detector.
|
||||
/// </summary>
|
||||
public Dictionary<string, string> Parameters { get; set; } = [];
|
||||
/// <summary>
|
||||
/// Increase output resolution by x4 using super-resolution RealBasicVSR_x4 model.
|
||||
/// </summary>
|
||||
public bool Enhance { get; set; }
|
||||
|
||||
public void Override<T>(ref T member, string name)
|
||||
{
|
||||
|
||||
@ -26,19 +26,18 @@ public class TrackingSplitter : LoggingBase, ISegmentProcessor, IDisposable
|
||||
|
||||
public async Task ProcessSegment(SingleTask job, CancellationToken token)
|
||||
{
|
||||
string inputFile = job.Job.InputFile;
|
||||
string outputFile = job.OutputFileName;
|
||||
double start = job.SegmentStart;
|
||||
double length = job.SegmentLength;
|
||||
int videoWidth = job.Info.Width;
|
||||
int videoHeight = job.Info.Height;
|
||||
double fps = job.Info.Fps;
|
||||
double bitrate = job.Info.Bitrate;
|
||||
string[] ffmpegPassthroughParameters = job.Job.Passthrough;
|
||||
var inputFile = job.Job.InputFile;
|
||||
var outputFile = job.OutputFileName;
|
||||
var start = job.SegmentStart;
|
||||
var length = job.SegmentLength;
|
||||
var videoWidth = job.Info.Width;
|
||||
var videoHeight = job.Info.Height;
|
||||
var fps = job.Info.Fps;
|
||||
var bitrate = job.Info.Bitrate;
|
||||
var ffmpegPassthroughParameters = job.Job.Passthrough;
|
||||
|
||||
var name = Path.GetFileNameWithoutExtension(outputFile);
|
||||
|
||||
// 1) Probe source video
|
||||
if (videoWidth <= 0 || videoHeight <= 0 || fps <= 0)
|
||||
{
|
||||
LogError($"{name}: ffprobe failed to get metadata");
|
||||
@ -51,16 +50,29 @@ public class TrackingSplitter : LoggingBase, ISegmentProcessor, IDisposable
|
||||
return;
|
||||
}
|
||||
|
||||
var encWidth = job.Job.Debug ? videoWidth : job.Job.Crop.Value.width;
|
||||
var encHeight = job.Job.Debug ? videoHeight : job.Job.Crop.Value.height;
|
||||
// Processing size (what you crop / feed into enhancer)
|
||||
var procWidth = job.Job.Debug ? videoWidth : job.Job.Crop.Value.width;
|
||||
var procHeight = job.Job.Debug ? videoHeight : job.Job.Crop.Value.height;
|
||||
|
||||
LogInfo($"{name}: src={videoWidth}x{videoHeight} @ {fps:F3}fps, seg=[{start:F3},{length:F3}] enc={encWidth}x{encHeight}");
|
||||
IVideoEnhancer? enhancer = null;
|
||||
|
||||
const int window = 5;
|
||||
|
||||
if (job.Job.Enhance)
|
||||
{
|
||||
enhancer = new RealBasicVsr2xDmlEnhancer();
|
||||
await enhancer.InitializeAsync(procWidth, procHeight, window, token);
|
||||
}
|
||||
|
||||
// Encoding size (what FFmpeg encoder expects)
|
||||
var encWidth = enhancer != null ? procWidth * enhancer.ResolutionMultiplier : procWidth;
|
||||
var encHeight = enhancer != null ? procHeight * enhancer.ResolutionMultiplier : procHeight;
|
||||
|
||||
LogInfo($"{name}: src={videoWidth}x{videoHeight} @ {fps:F3}fps, seg=[{start:F3},{length:F3}] proc={procWidth}x{procHeight} enc={encWidth}x{encHeight}");
|
||||
|
||||
// 2) Start FFmpeg decode (video only → raw BGR24 to stdout)
|
||||
var decode = await StartFfmpegDecode(inputFile, start, length, job.Job.Rotate, job.Job.PlainText, token);
|
||||
using var decodeStdout = decode.StandardOutput.BaseStream;
|
||||
|
||||
// 3) Start FFmpeg encode (video from stdin + audio from original)
|
||||
var encode = await StartFfmpegEncode(
|
||||
inputFile,
|
||||
outputFile,
|
||||
@ -75,89 +87,118 @@ public class TrackingSplitter : LoggingBase, ISegmentProcessor, IDisposable
|
||||
|
||||
using var encodeStdin = encode.StandardInput.BaseStream;
|
||||
|
||||
// Separate input/output sizes and buffers
|
||||
// Input: always full frame
|
||||
var inBytes = videoWidth * videoHeight * 3;
|
||||
var outBytes = encWidth * encHeight * 3;
|
||||
|
||||
// Output: encoded frame size (may be 4x if enhancement enabled)
|
||||
var outBytes = encWidth * encHeight * 3;
|
||||
|
||||
var inBuffer = new byte[inBytes];
|
||||
var outBuffer = new byte[outBytes];
|
||||
|
||||
using var frameMat = new Mat(videoHeight, videoWidth, MatType.CV_8UC3);
|
||||
using var outMat = new Mat(encHeight, encWidth, MatType.CV_8UC3);
|
||||
|
||||
// outMat is processing size (crop), not necessarily encoding size
|
||||
using var outMat = new Mat(procHeight, procWidth, MatType.CV_8UC3);
|
||||
|
||||
var kalman = new KalmanTracker();
|
||||
var camera = new CameraController(
|
||||
videoWidth,
|
||||
videoHeight,
|
||||
job.Job.Crop.Value.width,
|
||||
job.Job.Crop.Value.height,
|
||||
kalman,
|
||||
job.Job);
|
||||
videoWidth,
|
||||
videoHeight,
|
||||
job.Job.Crop.Value.width,
|
||||
job.Job.Crop.Value.height,
|
||||
kalman,
|
||||
job.Job);
|
||||
|
||||
var startTime = DateTime.UtcNow;
|
||||
var totalFrames = (int)Math.Round(length * fps);
|
||||
var frameIndex = 0;
|
||||
|
||||
while (frameIndex < totalFrames)
|
||||
try
|
||||
{
|
||||
token.ThrowIfCancellationRequested();
|
||||
var startTime = DateTime.UtcNow;
|
||||
var totalFrames = (int)Math.Round(length * fps);
|
||||
var frameIndex = 0;
|
||||
|
||||
frameIndex++;
|
||||
|
||||
var read = await ReadExact(decodeStdout, inBuffer, 0, inBytes, token);
|
||||
if (read != inBytes)
|
||||
break;
|
||||
|
||||
// input frame → Mat
|
||||
Marshal.Copy(inBuffer, 0, frameMat.Data, inBytes);
|
||||
|
||||
var objects = _detector.DetectAll(frameMat);
|
||||
var primary = SelectTrackedObject(objects, kalman.LastMeasurement);
|
||||
|
||||
camera.Update(primary);
|
||||
var roi = camera.Roi;
|
||||
|
||||
if (job.Job.Debug)
|
||||
var enhancedOutput = new Mat[window];
|
||||
//totalFrames = 10;
|
||||
while (frameIndex < totalFrames)
|
||||
{
|
||||
DrawDebug(frameMat, objects, camera, kalman);
|
||||
frameMat.CopyTo(outMat);
|
||||
}
|
||||
else
|
||||
{
|
||||
using var cropped = new Mat(frameMat, roi);
|
||||
cropped.CopyTo(outMat);
|
||||
token.ThrowIfCancellationRequested();
|
||||
|
||||
frameIndex++;
|
||||
|
||||
var read = await ReadExact(decodeStdout, inBuffer, 0, inBytes, token);
|
||||
if (read != inBytes)
|
||||
break;
|
||||
|
||||
Marshal.Copy(inBuffer, 0, frameMat.Data, inBytes);
|
||||
|
||||
var objects = _detector.DetectAll(frameMat);
|
||||
var primary = SelectTrackedObject(objects, kalman.LastMeasurement);
|
||||
|
||||
camera.Update(primary);
|
||||
var roi = camera.Roi;
|
||||
|
||||
if (job.Job.Debug)
|
||||
{
|
||||
DrawDebug(frameMat, objects, camera, kalman);
|
||||
frameMat.CopyTo(outMat); // outMat: procWidth x procHeight == full frame in debug
|
||||
}
|
||||
else
|
||||
{
|
||||
using var cropped = new Mat(frameMat, roi);
|
||||
cropped.CopyTo(outMat); // outMat: procWidth x procHeight == crop
|
||||
}
|
||||
|
||||
Mat frameToWrite = outMat;
|
||||
|
||||
if (enhancer != null)
|
||||
{
|
||||
if (enhancer.TryProcessFrame(outMat, out var enhanced, token))
|
||||
frameToWrite = enhanced; // enhanced: encWidth x encHeight
|
||||
else
|
||||
continue;
|
||||
}
|
||||
|
||||
Marshal.Copy(frameToWrite.Data, outBuffer, 0, outBytes);
|
||||
encodeStdin.Write(outBuffer, 0, outBytes);
|
||||
|
||||
var elapsed = DateTime.UtcNow - startTime;
|
||||
var progress = totalFrames > 0 ? (double)frameIndex / totalFrames : 0.0;
|
||||
var speed = elapsed.TotalSeconds > 0 ? (frameIndex / elapsed.TotalSeconds) / fps : 0.0;
|
||||
var remainingFrames = Math.Max(totalFrames - frameIndex, 0);
|
||||
var etaSeconds = speed > 0 ? remainingFrames / speed : 0.0;
|
||||
var eta = TimeSpan.FromSeconds(etaSeconds);
|
||||
|
||||
DrawProgress(name, progress, eta, speed);
|
||||
}
|
||||
|
||||
// output Mat → outBuffer
|
||||
Marshal.Copy(outMat.Data, outBuffer, 0, outBytes);
|
||||
encodeStdin.Write(outBuffer, 0, outBytes);
|
||||
if (enhancer != null)
|
||||
{
|
||||
int count = enhancer.Flush(enhancedOutput, token);
|
||||
for (int i = 0; i < count; i++)
|
||||
{
|
||||
var mat = enhancedOutput[i]; // encWidth x encHeight
|
||||
Marshal.Copy(mat.Data, outBuffer, 0, outBytes);
|
||||
encodeStdin.Write(outBuffer, 0, outBytes);
|
||||
}
|
||||
}
|
||||
|
||||
var elapsed = DateTime.UtcNow - startTime;
|
||||
var progress = totalFrames > 0 ? (double)frameIndex / totalFrames : 0.0;
|
||||
var speed = elapsed.TotalSeconds > 0 ? (frameIndex / elapsed.TotalSeconds) / fps : 0.0;
|
||||
var remainingFrames = Math.Max(totalFrames - frameIndex, 0);
|
||||
var etaSeconds = speed > 0 ? remainingFrames / speed : 0.0;
|
||||
var eta = TimeSpan.FromSeconds(etaSeconds);
|
||||
encodeStdin.Flush();
|
||||
encodeStdin.Close();
|
||||
|
||||
DrawProgress(name, progress, eta, speed);
|
||||
await encode.WaitForExitAsync();
|
||||
}
|
||||
finally
|
||||
{
|
||||
if (enhancer is IAsyncDisposable asyncDisp)
|
||||
await asyncDisp.DisposeAsync();
|
||||
else if (enhancer is IDisposable disp)
|
||||
disp?.Dispose();
|
||||
}
|
||||
|
||||
encodeStdin.Flush();
|
||||
|
||||
// loop finished
|
||||
|
||||
encodeStdin.Flush();
|
||||
encodeStdin.Close(); // must happen before waiting encode
|
||||
|
||||
await encode.WaitForExitAsync();
|
||||
|
||||
// belt-and-braces: if decode is still alive, kill it
|
||||
try { if (!decode.HasExited) decode.Kill(entireProcessTree: true); } catch { }
|
||||
try { if (!decode.HasExited) await decode.WaitForExitAsync(); } catch { }
|
||||
|
||||
ClearProgress(name);
|
||||
|
||||
|
||||
if (encode.ExitCode != 0)
|
||||
LogError($"{name}: FFmpeg encoding failed");
|
||||
else
|
||||
@ -245,7 +286,7 @@ public class TrackingSplitter : LoggingBase, ISegmentProcessor, IDisposable
|
||||
? $"-vf setsar={info.SampleAspectRatio} "
|
||||
: "";
|
||||
|
||||
string darArg = "";
|
||||
var darArg = "";
|
||||
|
||||
if (info.Sar is { } s)
|
||||
{
|
||||
@ -254,8 +295,8 @@ public class TrackingSplitter : LoggingBase, ISegmentProcessor, IDisposable
|
||||
var darDen = height * s.Y;
|
||||
|
||||
// clamp to int and reduce
|
||||
int dn = (int)Math.Min(int.MaxValue, Math.Max(int.MinValue, darNum));
|
||||
int dd = (int)Math.Min(int.MaxValue, Math.Max(int.MinValue, darDen));
|
||||
var dn = (int)Math.Min(int.MaxValue, Math.Max(int.MinValue, darNum));
|
||||
var dd = (int)Math.Min(int.MaxValue, Math.Max(int.MinValue, darDen));
|
||||
ReduceFraction(ref dn, ref dd);
|
||||
|
||||
if (dn > 0 && dd > 0)
|
||||
@ -385,7 +426,7 @@ public class TrackingSplitter : LoggingBase, ISegmentProcessor, IDisposable
|
||||
var bestIndex = 0;
|
||||
var bestArea = float.MinValue;
|
||||
|
||||
for (int i = 0; i < foundObjects.Count; i++)
|
||||
for (var i = 0; i < foundObjects.Count; i++)
|
||||
{
|
||||
var f = foundObjects[i];
|
||||
var area = f.box.Width * f.box.Height;
|
||||
@ -404,7 +445,7 @@ public class TrackingSplitter : LoggingBase, ISegmentProcessor, IDisposable
|
||||
var bestIndex = 0;
|
||||
var bestDist2 = float.MaxValue;
|
||||
|
||||
for (int i = 0; i < foundObjects.Count; i++)
|
||||
for (var i = 0; i < foundObjects.Count; i++)
|
||||
{
|
||||
var f = foundObjects[i];
|
||||
var dx = f.center.X - prev.X;
|
||||
|
||||
@ -95,7 +95,7 @@ public sealed class CameraController
|
||||
_dropoutCounter = 0;
|
||||
}
|
||||
|
||||
bool isLost = !objectCenter.HasValue;
|
||||
var isLost = !objectCenter.HasValue;
|
||||
|
||||
// LOST / REACQUIRE STATE MACHINE
|
||||
if (isLost)
|
||||
@ -147,7 +147,7 @@ public sealed class CameraController
|
||||
{
|
||||
smoothedCenter = _kalman.Update(objectCenter);
|
||||
|
||||
float driftEasing = 0.01f;
|
||||
var driftEasing = 0.01f;
|
||||
var fallbackCenter = new Point2f(_videoWidth / 2f, _videoHeight / 2f);
|
||||
|
||||
_cameraCenter = new Point2f(
|
||||
|
||||
7
splitter-cli/algo/DummyDetector.cs
Normal file
7
splitter-cli/algo/DummyDetector.cs
Normal file
@ -0,0 +1,7 @@
|
||||
namespace splitter.algo;
|
||||
|
||||
public class DummyDetector : IObjectDetector
|
||||
{
|
||||
public List<(Rect box, Point2f center)> DetectAll(Mat frameCont) => [];
|
||||
public void Dispose() {}
|
||||
}
|
||||
14
splitter-cli/algo/IVideoEnhancer.cs
Normal file
14
splitter-cli/algo/IVideoEnhancer.cs
Normal file
@ -0,0 +1,14 @@
|
||||
namespace splitter.algo;
|
||||
|
||||
public interface IVideoEnhancer : IAsyncDisposable
|
||||
{
|
||||
int ResolutionMultiplier { get; }
|
||||
|
||||
Task InitializeAsync(int width, int height, int window, CancellationToken token);
|
||||
|
||||
// Returns true when an enhanced frame is ready
|
||||
bool TryProcessFrame(Mat input, out Mat output, CancellationToken token);
|
||||
|
||||
// Flush remaining frames after input is finished
|
||||
int Flush(Span<Mat> outputFrames, CancellationToken token);
|
||||
}
|
||||
@ -35,8 +35,8 @@ public sealed class KalmanTracker
|
||||
_state[3] = 0;
|
||||
|
||||
// Large initial uncertainty
|
||||
for (int i = 0; i < 4; i++)
|
||||
for (int j = 0; j < 4; j++)
|
||||
for (var i = 0; i < 4; i++)
|
||||
for (var j = 0; j < 4; j++)
|
||||
_p[i, j] = (i == j) ? 1f : 0f;
|
||||
}
|
||||
|
||||
@ -63,16 +63,16 @@ public sealed class KalmanTracker
|
||||
var z = measurement.Value;
|
||||
|
||||
// Innovation y = z - Hx
|
||||
float yx = z.X - _state[0];
|
||||
float yy = z.Y - _state[1];
|
||||
var yx = z.X - _state[0];
|
||||
var yy = z.Y - _state[1];
|
||||
|
||||
// Innovation covariance S = P + R
|
||||
float Sx = _p[0, 0] + _r;
|
||||
float Sy = _p[1, 1] + _r;
|
||||
var Sx = _p[0, 0] + _r;
|
||||
var Sy = _p[1, 1] + _r;
|
||||
|
||||
// Kalman gain K = P / S
|
||||
float Kx0 = _p[0, 0] / Sx;
|
||||
float Kx1 = _p[1, 1] / Sy;
|
||||
var Kx0 = _p[0, 0] / Sx;
|
||||
var Kx1 = _p[1, 1] / Sy;
|
||||
|
||||
// Update state
|
||||
_state[0] += Kx0 * yx;
|
||||
|
||||
85
splitter-cli/algo/OnnxInspector.cs
Normal file
85
splitter-cli/algo/OnnxInspector.cs
Normal file
@ -0,0 +1,85 @@
|
||||
using System.Text;
|
||||
using Onnxify;
|
||||
|
||||
public static class OnnxInspector
|
||||
{
|
||||
public static string GetOnnxInfo(string modelPath)
|
||||
{
|
||||
var sb = new StringBuilder(4096);
|
||||
|
||||
if (!File.Exists(modelPath))
|
||||
{
|
||||
sb.Append("File not found: ").Append(modelPath);
|
||||
return sb.ToString();
|
||||
}
|
||||
|
||||
// Load ONNX model
|
||||
var model = OnnxModel.FromFile(modelPath);
|
||||
|
||||
sb.AppendLine("=== MODEL METADATA ===");
|
||||
sb.Append("IR Version: ").AppendLine(model.IrVersion.ToString());
|
||||
sb.Append("Producer Name: ").AppendLine(model.ProducerName);
|
||||
sb.Append("Producer Version: ").AppendLine(model.ProducerVersion);
|
||||
sb.Append("Domain: ").AppendLine(model.Domain);
|
||||
sb.Append("Model Version: ").AppendLine(model.ModelVersion.ToString());
|
||||
sb.Append("Doc String: ").AppendLine(model.Document);
|
||||
sb.AppendLine();
|
||||
|
||||
sb.AppendLine("=== OPSET IMPORTS ===");
|
||||
foreach (var opset in model.OpsetImport)
|
||||
{
|
||||
sb.Append("Domain: ").Append(opset.Domain)
|
||||
.Append(" Version: ").AppendLine(opset.Version.ToString());
|
||||
}
|
||||
sb.AppendLine();
|
||||
|
||||
var graph = model.Graph;
|
||||
|
||||
sb.AppendLine("=== GRAPH INPUTS ===");
|
||||
foreach (var input in graph.Inputs)
|
||||
{
|
||||
sb.Append("Name: ").AppendLine(input.Name);
|
||||
if (input.Type?.Denotation != null)
|
||||
{
|
||||
sb.Append(" Denotation: ").AppendLine(input.Type?.Denotation);
|
||||
}
|
||||
}
|
||||
sb.AppendLine();
|
||||
|
||||
sb.AppendLine("=== GRAPH OUTPUTS ===");
|
||||
foreach (var output in graph.Outputs)
|
||||
{
|
||||
sb.Append("Name: ").AppendLine(output.Name);
|
||||
if (output.Type?.Denotation != null)
|
||||
{
|
||||
sb.Append(" Denotation: ").AppendLine(output.Type?.Denotation);
|
||||
}
|
||||
}
|
||||
sb.AppendLine();
|
||||
|
||||
sb.AppendLine("=== INITIALIZERS ===");
|
||||
foreach (var init in graph.Initializers)
|
||||
{
|
||||
sb.Append("Name: ").AppendLine(init.Name);
|
||||
sb.Append(" DataType: ").AppendLine(init.DataType.ToString());
|
||||
sb.Append(" Dims: ").AppendLine(string.Join("x", init.Shape));
|
||||
}
|
||||
sb.AppendLine();
|
||||
|
||||
sb.AppendLine("=== NODES ===");
|
||||
foreach (var node in graph.Nodes)
|
||||
{
|
||||
sb.Append("OpType: ").AppendLine(node.OpType);
|
||||
sb.Append(" Name: ").AppendLine(node.Name);
|
||||
sb.Append(" Inputs: ").AppendLine(string.Join(", ", node.Inputs));
|
||||
sb.Append(" Outputs: ").AppendLine(string.Join(", ", node.Outputs));
|
||||
|
||||
foreach (var attr in node.Attributes)
|
||||
{
|
||||
sb.Append(" Attr: ").Append(attr.Name);
|
||||
}
|
||||
}
|
||||
|
||||
return sb.ToString();
|
||||
}
|
||||
}
|
||||
324
splitter-cli/algo/RealBasicVsr2xDmlEnhancer.cs
Normal file
324
splitter-cli/algo/RealBasicVsr2xDmlEnhancer.cs
Normal file
@ -0,0 +1,324 @@
|
||||
using Microsoft.ML.OnnxRuntime;
|
||||
using Microsoft.ML.OnnxRuntime.Tensors;
|
||||
|
||||
namespace splitter.algo;
|
||||
|
||||
public sealed unsafe class RealBasicVsr2xDmlEnhancer : IVideoEnhancer
|
||||
{
|
||||
public int ResolutionMultiplier => 2;
|
||||
|
||||
private InferenceSession _session;
|
||||
private SessionOptions _options;
|
||||
|
||||
private int _inW;
|
||||
private int _inH;
|
||||
private int _window;
|
||||
|
||||
private readonly Queue<Mat> _frames = new Queue<Mat>(32);
|
||||
|
||||
private float[] _inputBuffer;
|
||||
private float[] _outputBuffer;
|
||||
|
||||
private DenseTensor<float> _inputTensor;
|
||||
private DenseTensor<float> _outputTensor;
|
||||
|
||||
private Mat _outputMat;
|
||||
|
||||
private readonly List<NamedOnnxValue> _inputList = new List<NamedOnnxValue>(1);
|
||||
|
||||
public Task InitializeAsync(int width, int height, int window, CancellationToken token)
|
||||
{
|
||||
_inW = width;
|
||||
_inH = height;
|
||||
_window = window;
|
||||
|
||||
var basePath = AppDomain.CurrentDomain.BaseDirectory;
|
||||
var modelPath = System.IO.Path.Combine(basePath, "models", "realbasicvsr_x2.onnx");
|
||||
|
||||
_options = new SessionOptions();
|
||||
_options.AppendExecutionProvider_DML();
|
||||
|
||||
_session = new InferenceSession(modelPath, _options);
|
||||
|
||||
int inputSize = window * 3 * width * height;
|
||||
int outW = width * 2;
|
||||
int outH = height * 2;
|
||||
int outputSize = 3 * outW * outH;
|
||||
|
||||
_inputBuffer = new float[inputSize];
|
||||
_outputBuffer = new float[outputSize];
|
||||
|
||||
_inputTensor = new DenseTensor<float>(_inputBuffer, new[] { 1, window, 3, height, width });
|
||||
_outputTensor = new DenseTensor<float>(_outputBuffer, new[] { 1, 3, outH, outW });
|
||||
|
||||
_outputMat = new Mat(outH, outW, MatType.CV_8UC3);
|
||||
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
public unsafe bool TryProcessFrame(Mat input, out Mat output, CancellationToken token)
|
||||
{
|
||||
output = null;
|
||||
|
||||
if (token.IsCancellationRequested)
|
||||
return false;
|
||||
|
||||
if (_frames.Count == _window)
|
||||
{
|
||||
var old = _frames.Dequeue();
|
||||
old.Dispose();
|
||||
}
|
||||
|
||||
_frames.Enqueue(input.Clone());
|
||||
|
||||
if (_frames.Count < _window)
|
||||
return false;
|
||||
|
||||
int T = _window;
|
||||
int H = _inH;
|
||||
int W = _inW;
|
||||
|
||||
// ------------------------------------------------------------
|
||||
// INPUT: CV_8UC3 BGR -> normalized RGB, channels-first [1,T,3,H,W]
|
||||
// ------------------------------------------------------------
|
||||
|
||||
int t = 0;
|
||||
|
||||
foreach (var f in _frames)
|
||||
{
|
||||
byte* src = (byte*)f.Data;
|
||||
int stride = (int)f.Step();
|
||||
|
||||
for (int y = 0; y < H; y++)
|
||||
{
|
||||
byte* row = src + y * stride;
|
||||
|
||||
for (int x = 0; x < W; x++)
|
||||
{
|
||||
int p = x * 3;
|
||||
|
||||
byte b = row[p + 0];
|
||||
byte g = row[p + 1];
|
||||
byte r = row[p + 2];
|
||||
|
||||
float rN = r * (1.0f / 255.0f);
|
||||
float gN = g * (1.0f / 255.0f);
|
||||
float bN = b * (1.0f / 255.0f);
|
||||
|
||||
int idxR = ((((0 * T) + t) * 3 + 0) * H + y) * W + x;
|
||||
int idxG = ((((0 * T) + t) * 3 + 1) * H + y) * W + x;
|
||||
int idxB = ((((0 * T) + t) * 3 + 2) * H + y) * W + x;
|
||||
|
||||
_inputBuffer[idxR] = rN;
|
||||
_inputBuffer[idxG] = gN;
|
||||
_inputBuffer[idxB] = bN;
|
||||
}
|
||||
}
|
||||
|
||||
t++;
|
||||
}
|
||||
|
||||
_inputList.Clear();
|
||||
_inputList.Add(NamedOnnxValue.CreateFromTensor("input", _inputTensor));
|
||||
|
||||
using var results = _session.Run(_inputList);
|
||||
|
||||
var outTensor = results[0].AsTensor<float>();
|
||||
var dims = outTensor.Dimensions; // [1, T, 3, H2, W2]
|
||||
|
||||
int outT = dims[1];
|
||||
int outH = dims[3];
|
||||
int outW = dims[4];
|
||||
|
||||
int last = outT - 1;
|
||||
|
||||
// ------------------------------------------------------------
|
||||
// STEP 1: Bicubic upscale input to x2
|
||||
// ------------------------------------------------------------
|
||||
|
||||
using var upBgr = new Mat();
|
||||
Cv2.Resize(input, upBgr, new Size(outW, outH), 0, 0, InterpolationFlags.Cubic);
|
||||
|
||||
using var upRgb = new Mat();
|
||||
Cv2.CvtColor(upBgr, upRgb, ColorConversionCodes.BGR2RGB);
|
||||
|
||||
using var baseFloat = new Mat();
|
||||
upRgb.ConvertTo(baseFloat, MatType.CV_32FC3, 1.0 / 255.0);
|
||||
|
||||
// ------------------------------------------------------------
|
||||
// STEP 2: Add residual from model output
|
||||
// ------------------------------------------------------------
|
||||
|
||||
unsafe
|
||||
{
|
||||
float* basePtr = (float*)baseFloat.Data;
|
||||
int baseStride = (int)(baseFloat.Step() / sizeof(float));
|
||||
|
||||
for (int y = 0; y < outH; y++)
|
||||
{
|
||||
float* row = basePtr + y * baseStride;
|
||||
|
||||
for (int x = 0; x < outW; x++)
|
||||
{
|
||||
int p = x * 3;
|
||||
|
||||
float rBase = row[p + 0];
|
||||
float gBase = row[p + 1];
|
||||
float bBase = row[p + 2];
|
||||
|
||||
float rRes = outTensor[0, last, 0, y, x];
|
||||
float gRes = outTensor[0, last, 1, y, x];
|
||||
float bRes = outTensor[0, last, 2, y, x];
|
||||
|
||||
float r = Math.Clamp(rBase + rRes, 0f, 1f);
|
||||
float g = Math.Clamp(gBase + gRes, 0f, 1f);
|
||||
float b = Math.Clamp(bBase + bRes, 0f, 1f);
|
||||
|
||||
row[p + 0] = r;
|
||||
row[p + 1] = g;
|
||||
row[p + 2] = b;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------
|
||||
// STEP 3: Convert back to BGR 8-bit for FFmpeg
|
||||
// ------------------------------------------------------------
|
||||
|
||||
using var outRgb8 = new Mat();
|
||||
baseFloat.ConvertTo(outRgb8, MatType.CV_8UC3, 255.0);
|
||||
|
||||
Cv2.CvtColor(outRgb8, _outputMat, ColorConversionCodes.RGB2BGR);
|
||||
|
||||
output = _outputMat;
|
||||
return true;
|
||||
}
|
||||
|
||||
public unsafe bool TryProcessFrame2(Mat input, out Mat output, CancellationToken token)
|
||||
{
|
||||
output = null;
|
||||
|
||||
if (token.IsCancellationRequested)
|
||||
return false;
|
||||
|
||||
if (_frames.Count == _window)
|
||||
{
|
||||
var old = _frames.Dequeue();
|
||||
old.Dispose();
|
||||
}
|
||||
|
||||
_frames.Enqueue(input.Clone());
|
||||
|
||||
if (_frames.Count < _window)
|
||||
return false;
|
||||
|
||||
int T = _window;
|
||||
int H = _inH;
|
||||
int W = _inW;
|
||||
|
||||
// ------------------------------------------------------------
|
||||
// INPUT: CV_8UC3 BGR -> normalized RGB, channels-first [1,T,3,H,W]
|
||||
// ------------------------------------------------------------
|
||||
|
||||
int t = 0;
|
||||
|
||||
foreach (var f in _frames)
|
||||
{
|
||||
byte* src = (byte*)f.Data;
|
||||
int stride = (int)f.Step();
|
||||
|
||||
for (int y = 0; y < H; y++)
|
||||
{
|
||||
byte* row = src + y * stride;
|
||||
|
||||
for (int x = 0; x < W; x++)
|
||||
{
|
||||
int p = x * 3;
|
||||
|
||||
byte b = row[p + 0];
|
||||
byte g = row[p + 1];
|
||||
byte r = row[p + 2];
|
||||
|
||||
float rN = r * (1.0f / 255.0f);
|
||||
float gN = g * (1.0f / 255.0f);
|
||||
float bN = b * (1.0f / 255.0f);
|
||||
|
||||
int idxR = ((((0 * T) + t) * 3 + 0) * H + y) * W + x;
|
||||
int idxG = ((((0 * T) + t) * 3 + 1) * H + y) * W + x;
|
||||
int idxB = ((((0 * T) + t) * 3 + 2) * H + y) * W + x;
|
||||
|
||||
_inputBuffer[idxR] = rN;
|
||||
_inputBuffer[idxG] = gN;
|
||||
_inputBuffer[idxB] = bN;
|
||||
}
|
||||
}
|
||||
|
||||
t++;
|
||||
}
|
||||
|
||||
_inputList.Clear();
|
||||
_inputList.Add(NamedOnnxValue.CreateFromTensor("input", _inputTensor));
|
||||
|
||||
using var results = _session.Run(_inputList);
|
||||
|
||||
var outTensor = results[0].AsTensor<float>();
|
||||
|
||||
var dims = outTensor.Dimensions; // [1, T, 3, H2, W2]
|
||||
|
||||
int outT = dims[1];
|
||||
int outH = dims[3];
|
||||
int outW = dims[4];
|
||||
|
||||
int last = outT - 1;
|
||||
|
||||
unsafe
|
||||
{
|
||||
byte* dstBase = (byte*)_outputMat.Data;
|
||||
int dstStride = (int)_outputMat.Step();
|
||||
|
||||
for (int y = 0; y < outH; y++)
|
||||
{
|
||||
byte* row = dstBase + y * dstStride;
|
||||
|
||||
for (int x = 0; x < outW; x++)
|
||||
{
|
||||
float b = outTensor[0, last, 0, y, x]; // B, 0..1
|
||||
float g = outTensor[0, last, 1, y, x]; // G, 0..1
|
||||
float r = outTensor[0, last, 2, y, x]; // R, 0..1
|
||||
|
||||
int p = x * 3;
|
||||
|
||||
row[p + 0] = (byte)(b * 255.0f); // B
|
||||
row[p + 1] = (byte)(g * 255.0f); // G
|
||||
row[p + 2] = (byte)(r * 255.0f); // R
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
output = _outputMat;
|
||||
//ColorDebug.DumpAll(output, "C:\\Temp\\splitter-color-debug\\output");
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
|
||||
public int Flush(Span<Mat> outputFrames, CancellationToken token)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
public ValueTask DisposeAsync()
|
||||
{
|
||||
foreach (var f in _frames)
|
||||
f.Dispose();
|
||||
|
||||
_frames.Clear();
|
||||
|
||||
_session?.Dispose();
|
||||
_options?.Dispose();
|
||||
_outputMat?.Dispose();
|
||||
|
||||
return ValueTask.CompletedTask;
|
||||
}
|
||||
}
|
||||
@ -51,10 +51,10 @@ public sealed class UltraFaceDetector: LoggingBase, IDisposable, IObjectDetector
|
||||
|
||||
foreach (var f in faces)
|
||||
{
|
||||
int x1 = (int)f.X1;
|
||||
int y1 = (int)f.Y1;
|
||||
int x2 = (int)f.X2;
|
||||
int y2 = (int)f.Y2;
|
||||
var x1 = (int)f.X1;
|
||||
var y1 = (int)f.Y1;
|
||||
var x2 = (int)f.X2;
|
||||
var y2 = (int)f.Y2;
|
||||
|
||||
var rect = new Rect(
|
||||
x1,
|
||||
|
||||
@ -125,14 +125,14 @@ public sealed class YoloOnnxObjectDetector : LoggingBase, IObjectDetector, IDisp
|
||||
|
||||
// Build reusable result list
|
||||
_results.Clear();
|
||||
for (int i = 0; i < final.Count; i++)
|
||||
for (var i = 0; i < final.Count; i++)
|
||||
{
|
||||
var d = final[i];
|
||||
|
||||
int x = (int)d.X;
|
||||
int y = (int)d.Y;
|
||||
int w = (int)d.Width;
|
||||
int h = (int)d.Height;
|
||||
var x = (int)d.X;
|
||||
var y = (int)d.Y;
|
||||
var w = (int)d.Width;
|
||||
var h = (int)d.Height;
|
||||
|
||||
x = Math.Clamp(x, 0, frameCont.Width - 1);
|
||||
y = Math.Clamp(y, 0, frameCont.Height - 1);
|
||||
@ -155,30 +155,30 @@ public sealed class YoloOnnxObjectDetector : LoggingBase, IObjectDetector, IDisp
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
private void FillInputTensor(Mat rgb)
|
||||
{
|
||||
int height = _inputHeight;
|
||||
int width = _inputWidth;
|
||||
var height = _inputHeight;
|
||||
var width = _inputWidth;
|
||||
|
||||
// NCHW: [1, 3, H, W]
|
||||
int planeSize = height * width;
|
||||
var planeSize = height * width;
|
||||
|
||||
Span<float> dst = _inputBuffer.AsSpan();
|
||||
|
||||
unsafe
|
||||
{
|
||||
for (int y = 0; y < height; y++)
|
||||
for (var y = 0; y < height; y++)
|
||||
{
|
||||
byte* rowPtr = (byte*)rgb.Ptr(y).ToPointer();
|
||||
var rowPtr = (byte*)rgb.Ptr(y).ToPointer();
|
||||
var rowSpan = new Span<byte>(rowPtr, width * 3);
|
||||
|
||||
int srcIndex = 0;
|
||||
var srcIndex = 0;
|
||||
|
||||
for (int x = 0; x < width; x++)
|
||||
for (var x = 0; x < width; x++)
|
||||
{
|
||||
byte r = rowSpan[srcIndex + 0];
|
||||
byte g = rowSpan[srcIndex + 1];
|
||||
byte b = rowSpan[srcIndex + 2];
|
||||
var r = rowSpan[srcIndex + 0];
|
||||
var g = rowSpan[srcIndex + 1];
|
||||
var b = rowSpan[srcIndex + 2];
|
||||
|
||||
int offset = y * width + x;
|
||||
var offset = y * width + x;
|
||||
|
||||
// channel 0: R
|
||||
dst[offset] = r * _inv255;
|
||||
@ -205,27 +205,27 @@ public sealed class YoloOnnxObjectDetector : LoggingBase, IObjectDetector, IDisp
|
||||
detections.Clear();
|
||||
|
||||
// YOLOv8 output: [1, 84, 8400]
|
||||
int channels = output.Dimensions[1]; // 84
|
||||
int count = output.Dimensions[2]; // 8400
|
||||
var channels = output.Dimensions[1]; // 84
|
||||
var count = output.Dimensions[2]; // 8400
|
||||
|
||||
float xScale = (float)originalWidth / 640f;
|
||||
float yScale = (float)originalHeight / 640f;
|
||||
var xScale = (float)originalWidth / 640f;
|
||||
var yScale = (float)originalHeight / 640f;
|
||||
|
||||
for (int i = 0; i < count; i++)
|
||||
for (var i = 0; i < count; i++)
|
||||
{
|
||||
float x = output[0, 0, i];
|
||||
float y = output[0, 1, i];
|
||||
float w = output[0, 2, i];
|
||||
float h = output[0, 3, i];
|
||||
var x = output[0, 0, i];
|
||||
var y = output[0, 1, i];
|
||||
var w = output[0, 2, i];
|
||||
var h = output[0, 3, i];
|
||||
|
||||
float classScore = output[0, 4 + classIndex, i];
|
||||
var classScore = output[0, 4 + classIndex, i];
|
||||
if (classScore < scoreThreshold)
|
||||
continue;
|
||||
|
||||
float left = (x - w / 2f) * xScale;
|
||||
float top = (y - h / 2f) * yScale;
|
||||
float width = w * xScale;
|
||||
float height = h * yScale;
|
||||
var left = (x - w / 2f) * xScale;
|
||||
var top = (y - h / 2f) * yScale;
|
||||
var width = w * xScale;
|
||||
var height = h * yScale;
|
||||
|
||||
detections.Add(new Detection
|
||||
(
|
||||
@ -252,12 +252,12 @@ public sealed class YoloOnnxObjectDetector : LoggingBase, IObjectDetector, IDisp
|
||||
// Sort in-place by score descending
|
||||
detections.Sort(static (a, b) => b.Score.CompareTo(a.Score));
|
||||
|
||||
for (int i = 0; i < detections.Count; i++)
|
||||
for (var i = 0; i < detections.Count; i++)
|
||||
{
|
||||
var candidate = detections[i];
|
||||
bool keep = true;
|
||||
var keep = true;
|
||||
|
||||
for (int j = 0; j < nmsBuffer.Count; j++)
|
||||
for (var j = 0; j < nmsBuffer.Count; j++)
|
||||
{
|
||||
if (IoU(candidate, nmsBuffer[j]) >= nmsThreshold)
|
||||
{
|
||||
@ -276,23 +276,23 @@ public sealed class YoloOnnxObjectDetector : LoggingBase, IObjectDetector, IDisp
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
private static float IoU(in Detection a, in Detection b)
|
||||
{
|
||||
float x1 = MathF.Max(a.X, b.X);
|
||||
float y1 = MathF.Max(a.Y, b.Y);
|
||||
float x2 = MathF.Min(a.X + a.Width, b.X + b.Width);
|
||||
float y2 = MathF.Min(a.Y + a.Height, b.Y + b.Height);
|
||||
var x1 = MathF.Max(a.X, b.X);
|
||||
var y1 = MathF.Max(a.Y, b.Y);
|
||||
var x2 = MathF.Min(a.X + a.Width, b.X + b.Width);
|
||||
var y2 = MathF.Min(a.Y + a.Height, b.Y + b.Height);
|
||||
|
||||
float interW = x2 - x1;
|
||||
var interW = x2 - x1;
|
||||
if (interW <= 0f) return 0f;
|
||||
|
||||
float interH = y2 - y1;
|
||||
var interH = y2 - y1;
|
||||
if (interH <= 0f) return 0f;
|
||||
|
||||
float interArea = interW * interH;
|
||||
var interArea = interW * interH;
|
||||
|
||||
float areaA = a.Width * a.Height;
|
||||
float areaB = b.Width * b.Height;
|
||||
var areaA = a.Width * a.Height;
|
||||
var areaB = b.Width * b.Height;
|
||||
|
||||
float union = areaA + areaB - interArea;
|
||||
var union = areaA + areaB - interArea;
|
||||
if (union <= 0f) return 0f;
|
||||
|
||||
return interArea / union;
|
||||
|
||||
BIN
splitter-cli/models/realbasicvsr_x2.onnx
Normal file
BIN
splitter-cli/models/realbasicvsr_x2.onnx
Normal file
Binary file not shown.
@ -42,28 +42,28 @@ public sealed class FrameRotationDetector
|
||||
Cv2.CartToPolar(_gx, _gy, _mag, _angle, angleInDegrees: true);
|
||||
|
||||
// 4. Clear histogram
|
||||
for (int i = 0; i < _bins; i++)
|
||||
for (var i = 0; i < _bins; i++)
|
||||
_hist[i] = 0;
|
||||
|
||||
float binSize = 180f / _bins;
|
||||
var binSize = 180f / _bins;
|
||||
|
||||
unsafe
|
||||
{
|
||||
float* anglePtr = (float*)_angle.Data;
|
||||
float* magPtr = (float*)_mag.Data;
|
||||
var anglePtr = (float*)_angle.Data;
|
||||
var magPtr = (float*)_mag.Data;
|
||||
|
||||
int total = _w * _h;
|
||||
var total = _w * _h;
|
||||
|
||||
for (int i = 0; i < total; i++)
|
||||
for (var i = 0; i < total; i++)
|
||||
{
|
||||
float m = magPtr[i];
|
||||
var m = magPtr[i];
|
||||
if (m < 5f) continue; // ignore weak gradients
|
||||
|
||||
float a = anglePtr[i];
|
||||
var a = anglePtr[i];
|
||||
if (a < 0) a += 360f;
|
||||
a = a % 180f;
|
||||
|
||||
int bin = (int)(a / binSize);
|
||||
var bin = (int)(a / binSize);
|
||||
if (bin < 0) bin = 0;
|
||||
if (bin >= _bins) bin = _bins - 1;
|
||||
|
||||
@ -73,12 +73,12 @@ public sealed class FrameRotationDetector
|
||||
|
||||
// 5. Energy around 0° vs 90°
|
||||
float e0 = 0, e90 = 0;
|
||||
int window = 3;
|
||||
var window = 3;
|
||||
|
||||
int bin0 = 0;
|
||||
int bin90 = _bins / 2;
|
||||
var bin0 = 0;
|
||||
var bin90 = _bins / 2;
|
||||
|
||||
for (int i = -window; i <= window; i++)
|
||||
for (var i = -window; i <= window; i++)
|
||||
{
|
||||
e0 += _hist[Wrap(bin0 + i)];
|
||||
e90 += _hist[Wrap(bin90 + i)];
|
||||
|
||||
@ -72,7 +72,7 @@ public static class ProbeVideo
|
||||
var width = stream?.Width ?? 0;
|
||||
var height = stream?.Height ?? 0;
|
||||
|
||||
double fps = 0.0;
|
||||
var fps = 0.0;
|
||||
if (!string.IsNullOrWhiteSpace(stream?.Avg_frame_rate))
|
||||
{
|
||||
var parts = stream.Avg_frame_rate.Split('/');
|
||||
|
||||
@ -29,8 +29,8 @@ public sealed class VideoRotationSampler
|
||||
RotationDetectorFrameHeight = int.Parse(s);
|
||||
}
|
||||
|
||||
int w = RotationDetectorFrameWidth;
|
||||
int h = RotationDetectorFrameHeight;
|
||||
var w = RotationDetectorFrameWidth;
|
||||
var h = RotationDetectorFrameHeight;
|
||||
|
||||
_buffer = new byte[w * h * 3]; // raw BGR24 buffer
|
||||
_frameMat = new Mat(h, w, MatType.CV_8UC3); // wraps buffer
|
||||
@ -46,9 +46,9 @@ public sealed class VideoRotationSampler
|
||||
|
||||
var rotations = new List<int>();
|
||||
|
||||
for (int i = 0; i < RotationDetectorSampleCount; i++)
|
||||
for (var i = 0; i < RotationDetectorSampleCount; i++)
|
||||
{
|
||||
double t = videoLengthSeconds * (i + 1) / (RotationDetectorSampleCount + 1);
|
||||
var t = videoLengthSeconds * (i + 1) / (RotationDetectorSampleCount + 1);
|
||||
|
||||
var frame = await DecodeSingleFrameAsync(
|
||||
inputFile,
|
||||
@ -60,7 +60,7 @@ public sealed class VideoRotationSampler
|
||||
|
||||
if (frame != null && !frame.Empty())
|
||||
{
|
||||
int rot = _detector.GetRotation(frame);
|
||||
var rot = _detector.GetRotation(frame);
|
||||
rotations.Add(rot);
|
||||
}
|
||||
}
|
||||
@ -80,8 +80,8 @@ public sealed class VideoRotationSampler
|
||||
counts[v]++;
|
||||
}
|
||||
|
||||
int best = 0;
|
||||
int bestCount = 0;
|
||||
var best = 0;
|
||||
var bestCount = 0;
|
||||
|
||||
foreach (var kv in counts)
|
||||
{
|
||||
|
||||
@ -59,9 +59,10 @@
|
||||
<ItemGroup>
|
||||
<PackageReference Include="FFmpeg.AutoGen" Version="8.1.0" />
|
||||
<PackageReference Include="Microsoft.ML.OnnxRuntime.DirectML" Version="1.24.4" />
|
||||
<PackageReference Include="OpenCvSharp4" Version="4.13.0.20260427" />
|
||||
<PackageReference Include="OpenCvSharp4.runtime.win" Version="4.13.0.20260302" />
|
||||
<PackageReference Include="Spectre.Console" Version="0.55.2" />
|
||||
<PackageReference Include="Onnxify" Version="0.1.4" />
|
||||
<PackageReference Include="OpenCvSharp4" Version="4.13.0.20260602" />
|
||||
<PackageReference Include="OpenCvSharp4.runtime.win" Version="4.13.0.20260602" />
|
||||
<PackageReference Include="Spectre.Console" Version="0.56.0" />
|
||||
<PackageReference Include="UltraFaceDotNet" Version="1.0.0.2" />
|
||||
</ItemGroup>
|
||||
|
||||
|
||||
@ -39,7 +39,7 @@ public sealed class SpectreConsoleLogger : ILogger, IDisposable
|
||||
lock (_sync)
|
||||
{
|
||||
_numberOfProcesses = Math.Max(1, value);
|
||||
for (int i = 0; i < _numberOfProcesses; i++)
|
||||
for (var i = 0; i < _numberOfProcesses; i++)
|
||||
{
|
||||
if (!_progress.ContainsKey(i))
|
||||
_progress[i] = ProgressEntry.Empty;
|
||||
@ -282,17 +282,17 @@ public sealed class SpectreConsoleLogger : ILogger, IDisposable
|
||||
if (width <= 0)
|
||||
return string.Empty;
|
||||
|
||||
int filled = (int)Math.Round(progress * width);
|
||||
int empty = width - filled;
|
||||
var filled = (int)Math.Round(progress * width);
|
||||
var empty = width - filled;
|
||||
|
||||
if (filled <= 0)
|
||||
return $"[grey]{new string('─', width)}[/]";
|
||||
|
||||
// Split filled part into three segments: blue / yellow / green
|
||||
// low progress: mostly blue; mid: yellow; high: green
|
||||
int blueCount = (int)Math.Round(filled * 0.33);
|
||||
int yellowCount = (int)Math.Round(filled * 0.34);
|
||||
int greenCount = filled - blueCount - yellowCount;
|
||||
var blueCount = (int)Math.Round(filled * 0.33);
|
||||
var yellowCount = (int)Math.Round(filled * 0.34);
|
||||
var greenCount = filled - blueCount - yellowCount;
|
||||
|
||||
var sb = new StringBuilder();
|
||||
|
||||
|
||||
@ -8,8 +8,8 @@ public static class FileMaskExpander
|
||||
if (!HasMask(input))
|
||||
return [Path.GetFullPath(input)];
|
||||
|
||||
string directory = Path.GetDirectoryName(input) ?? Directory.GetCurrentDirectory();
|
||||
string pattern = Path.GetFileName(input);
|
||||
var directory = Path.GetDirectoryName(input) ?? Directory.GetCurrentDirectory();
|
||||
var pattern = Path.GetFileName(input);
|
||||
|
||||
if (string.IsNullOrEmpty(directory))
|
||||
directory = Directory.GetCurrentDirectory();
|
||||
|
||||
@ -2,6 +2,7 @@
|
||||
<Folder Name="/Solution items/">
|
||||
<File Path=".github/workflows/publish.yml" />
|
||||
<File Path=".gitignore" />
|
||||
<File Path="AGENTS.md" />
|
||||
<File Path="LICENSE.txt" />
|
||||
<File Path="README.md" />
|
||||
</Folder>
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user