mirror of
https://github.com/unclshura/splitter.git
synced 2026-06-21 16:12:01 +00:00
Added automatic rotation detection.
This commit is contained in:
parent
1fd938416e
commit
5955fb2d29
124
README.md
124
README.md
@ -1,25 +1,25 @@
|
||||
# Splitter
|
||||
|
||||
Splitter is a high‑performance command line tool for cutting one or more video files into equal or fixed‑length segments using multi‑threaded FFmpeg execution.
|
||||
Splitter is a high‑performance command line tool for cutting one or more video files into equal or fixed‑length segments using multi‑threaded FFmpeg execution.
|
||||
It supports batch input, flexible duration formats, rotation, smart face/body‑aware cropping, ETA and speed reporting, and both rich and plain‑text terminal output.
|
||||
|
||||

|
||||
|
||||
## Features
|
||||
|
||||
- Multi‑threaded FFmpeg splitting for maximum throughput
|
||||
- Equal or fixed‑length segmentation
|
||||
- Batch input via file masks or list files
|
||||
- Smart cropping with face/body tracking
|
||||
- Rotation correction
|
||||
- ETA, speed, and progress display
|
||||
- FFmpeg passthrough for advanced control
|
||||
- Multi‑threaded FFmpeg splitting for maximum throughput
|
||||
- Equal or fixed‑length segmentation
|
||||
- Batch input via file masks or list files
|
||||
- Smart cropping with face/body tracking
|
||||
- Rotation correction
|
||||
- ETA, speed, and progress display
|
||||
- FFmpeg passthrough for advanced control
|
||||
- [Potentially] Cross‑platform (.NET 10)
|
||||
|
||||
## Requirements
|
||||
|
||||
- FFmpeg and FFprobe available in system PATH
|
||||
- .NET 10 Runtime or newer
|
||||
- FFmpeg and FFprobe available in system PATH
|
||||
- .NET 10 Runtime or newer
|
||||
|
||||
If you want to update model:
|
||||
|
||||
@ -37,74 +37,90 @@ model.export(format="onnx", opset=12, half=False) # FP32 ONNX
|
||||
|
||||
## How It Works
|
||||
|
||||
1. Reads total duration using ffprobe
|
||||
2. Parses target duration
|
||||
3. Computes number of segments
|
||||
4. If not forced, equalizes segment lengths
|
||||
5. Runs multiple FFmpeg processes in parallel
|
||||
6. Applies rotation, crop, and tracking if enabled
|
||||
7. Displays progress, ETA, and speed
|
||||
1. Reads total duration using ffprobe
|
||||
2. Parses target duration
|
||||
3. Computes number of segments
|
||||
4. If not forced, equalizes segment lengths
|
||||
5. Runs multiple FFmpeg processes in parallel
|
||||
6. Applies rotation, crop, and tracking if enabled
|
||||
7. Displays progress, ETA, and speed
|
||||
|
||||
---
|
||||
## Face Tracking vs Body Tracking
|
||||
|
||||
Face tracking and body tracking serve different purposes, and Splitter supports both because each
|
||||
excels in different recording environments. When converting horizontal footage into vertical clips,
|
||||
Face tracking and body tracking serve different purposes, and Splitter supports both because each
|
||||
excels in different recording environments. When converting horizontal footage into vertical clips,
|
||||
the choice of detector determines how stable, reliable, and natural the automated camera motion will be.
|
||||
|
||||

|
||||
|
||||
### Face Tracking Using UltraFace 320
|
||||
|
||||
Splitter uses the UltraFace 320 ONNX model to perform lightweight, real‑time face detection on each
|
||||
frame of the input video. The detector produces bounding boxes for visible faces, and the tracking
|
||||
Splitter uses the UltraFace 320 ONNX model to perform lightweight, real‑time face detection on each
|
||||
frame of the input video. The detector produces bounding boxes for visible faces, and the tracking
|
||||
system maintains a stable, smoothed target region across time. This is achieved by combining per‑frame
|
||||
detections with temporal smoothing (EMA), dropout tolerance, and camera easing. The result is a
|
||||
detections with temporal smoothing (EMA), dropout tolerance, and camera easing. The result is a
|
||||
continuous, stable crop window that follows the performer even when the face is partially occluded,
|
||||
briefly lost, or moving rapidly.
|
||||
|
||||
During segmentation, the crop window is recalculated for every frame, ensuring that each output
|
||||
segment inherits the same smooth camera motion. This makes the vertical clips appear as if they
|
||||
were recorded with a dedicated portrait‑oriented camera operator. The UltraFace 320 model is
|
||||
fast enough to run alongside multi‑threaded FFmpeg splitting without becoming a bottleneck,
|
||||
During segmentation, the crop window is recalculated for every frame, ensuring that each output
|
||||
segment inherits the same smooth camera motion. This makes the vertical clips appear as if they
|
||||
were recorded with a dedicated portrait‑oriented camera operator. The UltraFace 320 model is
|
||||
fast enough to run alongside multi‑threaded FFmpeg splitting without becoming a bottleneck,
|
||||
making it suitable for long recordings and batch processing.
|
||||
|
||||
### Benefits of Full‑Body Detection Using YOLOv8s for Live Gig Recordings
|
||||
|
||||
When recording concerts or live gigs, performers often move unpredictably, turn away from the
|
||||
camera, or become partially obscured by lighting, instruments, or stage effects.
|
||||
Full‑body detection using a YOLOv8s ONNX model provides a more reliable tracking anchor than
|
||||
When recording concerts or live gigs, performers often move unpredictably, turn away from the
|
||||
camera, or become partially obscured by lighting, instruments, or stage effects.
|
||||
Full‑body detection using a YOLOv8s ONNX model provides a more reliable tracking anchor than
|
||||
face detection alone. Because YOLOv8s can detect the entire human silhouette, the tracker
|
||||
maintains stable framing even when the face is not visible, when the performer is far from
|
||||
the camera, or when stage lighting makes facial features hard to detect. This produces vertical
|
||||
maintains stable framing even when the face is not visible, when the performer is far from
|
||||
the camera, or when stage lighting makes facial features hard to detect. This produces vertical
|
||||
clips that feel intentional and professionally framed, with fewer sudden jumps or lost‑tracking
|
||||
moments. For creators converting horizontal gig footage into short vertical clips for YouTube
|
||||
Shorts or TikTok, body‑based tracking significantly improves consistency, reduces manual editing,
|
||||
moments. For creators converting horizontal gig footage into short vertical clips for YouTube
|
||||
Shorts or TikTok, body‑based tracking significantly improves consistency, reduces manual editing,
|
||||
and preserves the energy and motion of the performance.
|
||||
|
||||
### Automated Camera Control
|
||||
|
||||
Splitter includes an automated camera control system that simulates the behavior of a virtual
|
||||
camera operator when generating vertical crops from horizontal footage. The goal is to maintain
|
||||
Splitter includes an automated camera control system that simulates the behavior of a virtual
|
||||
camera operator when generating vertical crops from horizontal footage. The goal is to maintain
|
||||
smooth, intentional framing around the tracked subject, even when detections are noisy, intermittent,
|
||||
or temporarily lost.
|
||||
|
||||
The controller receives object detections (face or body) and converts them into a stable crop
|
||||
window using a combination of Kalman filtering, exponential smoothing, dropout tolerance,
|
||||
and a three‑state tracking model. The Kalman filter provides predictive motion smoothing,
|
||||
while the EMA factor blends the predicted position with the previous camera center to avoid jitter.
|
||||
The camera easing value controls how quickly the virtual camera follows the subject, producing
|
||||
The controller receives object detections (face or body) and converts them into a stable crop
|
||||
window using a combination of Kalman filtering, exponential smoothing, dropout tolerance,
|
||||
and a three‑state tracking model. The Kalman filter provides predictive motion smoothing,
|
||||
while the EMA factor blends the predicted position with the previous camera center to avoid jitter.
|
||||
The camera easing value controls how quickly the virtual camera follows the subject, producing
|
||||
natural‑looking motion rather than abrupt jumps.
|
||||
|
||||
When detections disappear, the controller enters one of two fallback modes. In LostFreeze mode,
|
||||
the camera holds its last known position for a configurable number of frames, preventing sudden
|
||||
jumps during brief occlusions. If the subject remains lost beyond that threshold, the controller
|
||||
transitions to LostDrift mode, slowly drifting the camera back toward a neutral center position.
|
||||
This prevents the crop from drifting off‑screen and ensures that the output remains usable even
|
||||
When detections disappear, the controller enters one of two fallback modes. In LostFreeze mode,
|
||||
the camera holds its last known position for a configurable number of frames, preventing sudden
|
||||
jumps during brief occlusions. If the subject remains lost beyond that threshold, the controller
|
||||
transitions to LostDrift mode, slowly drifting the camera back toward a neutral center position.
|
||||
This prevents the crop from drifting off‑screen and ensures that the output remains usable even
|
||||
when tracking fails. All positions are clamped to valid bounds, guaranteeing that the crop window
|
||||
never leaves the video frame.
|
||||
|
||||
---
|
||||
### Automatic rotation detection
|
||||
|
||||
The rotation‑estimation method is based on analyzing the distribution of gradient orientations within
|
||||
a video frame. After converting the frame to grayscale, the algorithm computes horizontal and vertical
|
||||
image gradients using Sobel operators and derives per‑pixel gradient magnitudes and orientations.
|
||||
These orientations are folded into the range [0, 180) and accumulated into a fixed‑size,
|
||||
magnitude‑weighted histogram. The histogram represents the structural edge distribution of the frame,
|
||||
independent of brightness fluctuations or local lighting artifacts. By comparing the total gradient
|
||||
energy concentrated near 0 degrees (vertical edges) with the energy near 90 degrees (horizontal edges),
|
||||
the method determines whether the frame is more consistent with an upright or sideways orientation.
|
||||
|
||||
This approach is designed for environments where brightness‑based cues are unreliable, such as
|
||||
live concerts with strobe lights, LED walls, haze, and crowd movement. It relies solely on geometric
|
||||
edge structure, which remains stable even under extreme lighting variation. The implementation is
|
||||
optimized for high‑throughput video processing: all intermediate Mats, buffers, and histograms are
|
||||
preallocated, and pixel data is accessed directly through pointers to avoid per‑frame memory
|
||||
allocation. The method is intentionally biased toward the upright orientation, returning a sideways
|
||||
classification only when the horizontal‑edge energy significantly exceeds the vertical‑edge energy.
|
||||
|
||||
## Usage
|
||||
|
||||
@ -114,11 +130,10 @@ splitter [<input.mp4> ...] [options] [--] <ffmpeg passthrough>
|
||||
|
||||
Inputs may be provided directly, via `--file=...`, or using file masks such as `videos/*.mp4`.
|
||||
|
||||
---
|
||||
|
||||
## Options
|
||||
|
||||
Below is a clean, ASCII‑only **options table** version of your content.
|
||||
Below is a clean, ASCII‑only **options table** version of your content.
|
||||
All option names are preserved exactly, and descriptions are consolidated for clarity.
|
||||
|
||||
---
|
||||
@ -133,6 +148,7 @@ All option names are preserved exactly, and descriptions are consolidated for cl
|
||||
| **--duration=<value>** | Override target segment duration. Formats: `Ns`, `NmMs`, `N`. Examples: `--duration=90s`, `--duration=2m30s`, `--duration=45`. Without `--force`: max 58 seconds, equalized across segments. |
|
||||
| **--force** | Use the duration exactly as provided. Last segment may be shorter. |
|
||||
| **--rotate=<degrees>** | Rotate video by 90, 180, or 270 degrees. Useful for correcting orientation metadata. |
|
||||
| **--rotate-auto** | Use automatic rotation detection. |
|
||||
| **--estimate** | Print calculated segment information and exit. No splitting is performed. |
|
||||
| **--crop[=<w:h>]** | Crop video to a target width and height with face/body tracking. Default: 607x1080. Ideal for Shorts, TikTok, Reels. |
|
||||
| **--detect=<name>** | Object detector for tracking. Values: `face` (UltraFace), `body` (YoloOnnx, default), `none` (center crop). |
|
||||
@ -142,8 +158,6 @@ All option names are preserved exactly, and descriptions are consolidated for cl
|
||||
| **--debug** | Show debug overlay during tracking. No cropping performed, but crop region shown. |
|
||||
| **-p:<name>=<value>** | Set custom parameters for the object detector. Example: `-p:confidence=0.5`. Defaults: DropoutToleranceFrames=20, EmaFactor=0.65, CameraEasing=0.03, LostFreezeFrames=60. |
|
||||
|
||||
---
|
||||
|
||||
## FFmpeg Passthrough
|
||||
|
||||
Anything after `--` is passed directly to FFmpeg.
|
||||
@ -153,16 +167,12 @@ Example:
|
||||
splitter video.mp4 --force --duration=45 -- -an -sn
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Input and Output Behavior
|
||||
|
||||
- `input.mp4` may be a file mask (`videos/*.mp4`)
|
||||
- Output filenames follow the `--mask` pattern
|
||||
- `input.mp4` may be a file mask (`videos/*.mp4`)
|
||||
- Output filenames follow the `--mask` pattern
|
||||
- Output folder defaults to `<input folder>/Splitter` unless overridden
|
||||
|
||||
---
|
||||
|
||||
## Examples
|
||||
|
||||
Split into equal 60‑second segments:
|
||||
|
||||
@ -1,5 +1,4 @@
|
||||
using System;
|
||||
using OpenCvSharp;
|
||||
using OpenCvSharp;
|
||||
|
||||
namespace splitter;
|
||||
|
||||
|
||||
@ -1,8 +1,4 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Globalization;
|
||||
using System.Text;
|
||||
using OpenCvSharp;
|
||||
using System.Globalization;
|
||||
|
||||
namespace splitter;
|
||||
|
||||
@ -22,6 +18,7 @@ public class SingleJob
|
||||
public bool ForceFixed { get; set; }
|
||||
public bool SingleThreaded { get; set; }
|
||||
public int? Rotate { get; set; }
|
||||
public bool RotateAuto { get; set; }
|
||||
public Dictionary<string, string> Parameters { get; set; } = [];
|
||||
|
||||
public void Override<T>(ref T member, string name)
|
||||
@ -143,6 +140,10 @@ public sealed class CommandLine
|
||||
{
|
||||
Master.SingleThreaded = true;
|
||||
}
|
||||
else if (arg == "--rotate-auto")
|
||||
{
|
||||
Master.RotateAuto = true;
|
||||
}
|
||||
else if (arg.StartsWith("--gravitate="))
|
||||
{
|
||||
var val = arg.Substring("--gravitate=".Length);
|
||||
@ -197,6 +198,7 @@ public sealed class CommandLine
|
||||
ForceFixed = Master.ForceFixed,
|
||||
SingleThreaded = Master.SingleThreaded,
|
||||
Rotate = Master.Rotate,
|
||||
RotateAuto = Master.RotateAuto,
|
||||
Parameters = new Dictionary<string, string>(Master.Parameters)
|
||||
}).ToArray();
|
||||
|
||||
@ -371,6 +373,9 @@ Options:
|
||||
--rotate=<degrees> Rotate video by specified degrees (90, 180, 270).
|
||||
Useful for videos with incorrect orientation metadata.
|
||||
|
||||
--rotate-auto Auto-detect rotation and rotate accordingly.
|
||||
Uses edge orientation statistics to determine if video is rotated.
|
||||
|
||||
--estimate Print calculated segment information and exit.
|
||||
No splitting is performed.
|
||||
|
||||
@ -393,7 +398,7 @@ Options:
|
||||
--debug Show debug overlay during face tracking.
|
||||
|
||||
-p:<name>=<value> Set a custom parameter for the object detector.
|
||||
Example: -p:confidence=0.5
|
||||
Example: -p:EmaFactor=0.65
|
||||
|
||||
Tracking splitter defaults:
|
||||
DropoutToleranceFrames = 20;
|
||||
@ -401,6 +406,12 @@ Options:
|
||||
CameraEasing = 0.03;
|
||||
LostFreezeFrames = 60;
|
||||
|
||||
Rotation detector defaults:
|
||||
RotationDetectorSampleCount = 5;
|
||||
RotationDetectorSampleLength = 0.15;
|
||||
RotationDetectorFrameWidth = 320;
|
||||
RotationDetectorFrameHeight = 180;
|
||||
|
||||
Passthrough:
|
||||
Anything after -- is passed directly to ffmpeg.
|
||||
|
||||
|
||||
102
splitter-cli/FrameRotationDetector.cs
Normal file
102
splitter-cli/FrameRotationDetector.cs
Normal file
@ -0,0 +1,102 @@
|
||||
using OpenCvSharp;
|
||||
|
||||
namespace splitter;
|
||||
|
||||
public sealed class FrameRotationDetector
|
||||
{
|
||||
private readonly Mat _gray;
|
||||
private readonly Mat _gx;
|
||||
private readonly Mat _gy;
|
||||
private readonly Mat _mag;
|
||||
private readonly Mat _angle;
|
||||
|
||||
private readonly float[] _hist;
|
||||
|
||||
private readonly int _w;
|
||||
private readonly int _h;
|
||||
private readonly int _bins;
|
||||
|
||||
public FrameRotationDetector(int width = 320, int height = 180, int bins = 36)
|
||||
{
|
||||
_w = width;
|
||||
_h = height;
|
||||
_bins = bins;
|
||||
|
||||
_gray = new Mat(height, width, MatType.CV_8UC1);
|
||||
_gx = new Mat(height, width, MatType.CV_32F);
|
||||
_gy = new Mat(height, width, MatType.CV_32F);
|
||||
_mag = new Mat(height, width, MatType.CV_32F);
|
||||
_angle = new Mat(height, width, MatType.CV_32F);
|
||||
|
||||
_hist = new float[bins]; // allocated once
|
||||
}
|
||||
|
||||
public int GetRotation(Mat frame)
|
||||
{
|
||||
// 1. Grayscale
|
||||
Cv2.CvtColor(frame, _gray, ColorConversionCodes.BGR2GRAY);
|
||||
|
||||
// 2. Sobel
|
||||
Cv2.Sobel(_gray, _gx, MatType.CV_32F, 1, 0, 3);
|
||||
Cv2.Sobel(_gray, _gy, MatType.CV_32F, 0, 1, 3);
|
||||
|
||||
// 3. Magnitude + angle
|
||||
Cv2.CartToPolar(_gx, _gy, _mag, _angle, angleInDegrees: true);
|
||||
|
||||
// 4. Clear histogram
|
||||
for (int i = 0; i < _bins; i++)
|
||||
_hist[i] = 0;
|
||||
|
||||
float binSize = 180f / _bins;
|
||||
|
||||
unsafe
|
||||
{
|
||||
float* anglePtr = (float*)_angle.Data;
|
||||
float* magPtr = (float*)_mag.Data;
|
||||
|
||||
int total = _w * _h;
|
||||
|
||||
for (int i = 0; i < total; i++)
|
||||
{
|
||||
float m = magPtr[i];
|
||||
if (m < 5f) continue; // ignore weak gradients
|
||||
|
||||
float a = anglePtr[i];
|
||||
if (a < 0) a += 360f;
|
||||
a = a % 180f;
|
||||
|
||||
int bin = (int)(a / binSize);
|
||||
if (bin < 0) bin = 0;
|
||||
if (bin >= _bins) bin = _bins - 1;
|
||||
|
||||
_hist[bin] += m;
|
||||
}
|
||||
}
|
||||
|
||||
// 5. Energy around 0° vs 90°
|
||||
float e0 = 0, e90 = 0;
|
||||
int window = 3;
|
||||
|
||||
int bin0 = 0;
|
||||
int bin90 = _bins / 2;
|
||||
|
||||
for (int i = -window; i <= window; i++)
|
||||
{
|
||||
e0 += _hist[Wrap(bin0 + i)];
|
||||
e90 += _hist[Wrap(bin90 + i)];
|
||||
}
|
||||
|
||||
// 6. Decide upright vs sideways
|
||||
if (e90 > e0 * 1.6f)
|
||||
return 90; // sideways
|
||||
|
||||
return 0; // upright (concert default)
|
||||
}
|
||||
|
||||
private int Wrap(int b)
|
||||
{
|
||||
if (b < 0) return b + _bins;
|
||||
if (b >= _bins) return b - _bins;
|
||||
return b;
|
||||
}
|
||||
}
|
||||
@ -4,5 +4,5 @@ namespace splitter;
|
||||
|
||||
public interface IObjectDetector : IDisposable
|
||||
{
|
||||
List<(Rect box, Point2f center)> DetectAll(Mat frameCont, int width, int height);
|
||||
List<(Rect box, Point2f center)> DetectAll(Mat frameCont);
|
||||
}
|
||||
@ -1,8 +1,4 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text;
|
||||
|
||||
namespace splitter;
|
||||
namespace splitter;
|
||||
|
||||
public interface ISegmentProcessor
|
||||
{
|
||||
|
||||
@ -1,5 +1,4 @@
|
||||
using System;
|
||||
namespace splitter;
|
||||
namespace splitter;
|
||||
|
||||
public abstract class LoggingBase(ILogger _logger, int _progressLine)
|
||||
{
|
||||
|
||||
@ -1,8 +1,5 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
using System.Diagnostics;
|
||||
using System.Globalization;
|
||||
using System.Text;
|
||||
|
||||
namespace splitter;
|
||||
|
||||
@ -11,12 +8,31 @@ public record VideoInfo(
|
||||
int Width,
|
||||
int Height,
|
||||
double Fps,
|
||||
double Bitrate
|
||||
double Bitrate,
|
||||
int Rotation = 0
|
||||
);
|
||||
|
||||
public static class ProbeVideo
|
||||
{
|
||||
public static VideoInfo Probe(string inputFile)
|
||||
public static async Task<VideoInfo> Probe(SingleJob job)
|
||||
{
|
||||
var info = ProbeSize(job.InputFile);
|
||||
if ( job.RotateAuto)
|
||||
{
|
||||
var rotation = await ProbeRotation(job, info.Duration);
|
||||
info = info with { Rotation = rotation };
|
||||
}
|
||||
|
||||
return info;
|
||||
}
|
||||
|
||||
private static async Task<int> ProbeRotation(SingleJob job, double duration)
|
||||
{
|
||||
var rotation = await new VideoRotationSampler(job).DetectRotationAsync(job.InputFile, duration);
|
||||
return rotation;
|
||||
}
|
||||
|
||||
private static VideoInfo ProbeSize(string inputFile)
|
||||
{
|
||||
var args =
|
||||
"-v error " +
|
||||
|
||||
@ -10,7 +10,7 @@
|
||||
},
|
||||
"Debug": {
|
||||
"commandName": "Project",
|
||||
"commandLineArgs": "\"C:\\Users\\uncls\\Pictures\\2026\\2026 - Secret Rule\\20260426_212004.mp4\" --crop --detect=body --debug --single-thread --text"
|
||||
"commandLineArgs": "\"C:\\Users\\uncls\\Pictures\\2026\\2026 - Laura Cox\\Video\\MAH00041.MP4\" --rotate-auto"
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1,9 +1,5 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
using System.Diagnostics;
|
||||
using System.Globalization;
|
||||
using System.Text;
|
||||
using FFmpeg.AutoGen;
|
||||
|
||||
namespace splitter;
|
||||
|
||||
|
||||
@ -1,9 +1,4 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using System.Text;
|
||||
using Spectre.Console;
|
||||
using Spectre.Console.Rendering;
|
||||
|
||||
|
||||
@ -1,9 +1,6 @@
|
||||
using System;
|
||||
using System.Diagnostics;
|
||||
using System.Diagnostics;
|
||||
using System.Globalization;
|
||||
using System.IO;
|
||||
using System.Runtime.InteropServices;
|
||||
using System.Threading.Tasks;
|
||||
using OpenCvSharp;
|
||||
|
||||
namespace splitter;
|
||||
@ -112,7 +109,7 @@ public class TrackingSplitter : LoggingBase, ISegmentProcessor, IDisposable
|
||||
// input frame → Mat
|
||||
Marshal.Copy(inBuffer, 0, frameMat.Data, inBytes);
|
||||
|
||||
var objects = _detector.DetectAll(frameMat, videoWidth, videoHeight);
|
||||
var objects = _detector.DetectAll(frameMat);
|
||||
var primary = SelectTrackedObject(objects, kalman.LastMeasurement);
|
||||
|
||||
camera.Update(primary);
|
||||
|
||||
@ -1,5 +1,4 @@
|
||||
using System.Runtime.InteropServices;
|
||||
using NcnnDotNet.Layers;
|
||||
using OpenCvSharp;
|
||||
using UltraFaceDotNet;
|
||||
|
||||
@ -25,7 +24,7 @@ public sealed class UltraFaceDetector: LoggingBase, IDisposable, IObjectDetector
|
||||
_ultraFace = UltraFace.Create(param);
|
||||
}
|
||||
|
||||
public List<(Rect box, Point2f center)> DetectAll(Mat frameCont, int width, int height)
|
||||
public List<(Rect box, Point2f center)> DetectAll(Mat frameCont)
|
||||
{
|
||||
// Convert to byte[] for UltraFace
|
||||
var bytesFull = frameCont.Rows * frameCont.Cols * frameCont.ElemSize();
|
||||
@ -44,8 +43,8 @@ public sealed class UltraFaceDetector: LoggingBase, IDisposable, IObjectDetector
|
||||
using var mat = NcnnDotNet.Mat.FromPixels(
|
||||
(IntPtr)p,
|
||||
NcnnDotNet.PixelType.Bgr, // BGR24 input
|
||||
width,
|
||||
height);
|
||||
frameCont.Width,
|
||||
frameCont.Height);
|
||||
|
||||
var faces = _ultraFace.Detect(mat);
|
||||
if (faces == null)
|
||||
|
||||
169
splitter-cli/VideoRotationSampler.cs
Normal file
169
splitter-cli/VideoRotationSampler.cs
Normal file
@ -0,0 +1,169 @@
|
||||
using OpenCvSharp;
|
||||
using System.Diagnostics;
|
||||
|
||||
namespace splitter;
|
||||
|
||||
public sealed class VideoRotationSampler
|
||||
{
|
||||
private readonly FrameRotationDetector _detector = new FrameRotationDetector();
|
||||
|
||||
public static int RotationDetectorSampleCount = 20;
|
||||
public static double RotationDetectorSampleLength = 0.15; // seconds to decode per probe
|
||||
public static int RotationDetectorFrameWidth = 320;
|
||||
public static int RotationDetectorFrameHeight = 180;
|
||||
|
||||
// --- Zero-allocation buffers ---
|
||||
private readonly byte[] _buffer;
|
||||
private readonly Mat _frameMat;
|
||||
|
||||
public VideoRotationSampler(SingleJob _master)
|
||||
{
|
||||
if (_master.Parameters.TryGetValue("RotationDetectorSampleCount", out var s))
|
||||
RotationDetectorSampleCount = int.Parse(s);
|
||||
if (_master.Parameters.TryGetValue("RotationDetectorSampleLength", out s))
|
||||
RotationDetectorSampleLength = double.Parse(s);
|
||||
if (_master.Parameters.TryGetValue("RotationDetectorFrameWidth", out s))
|
||||
RotationDetectorFrameWidth = int.Parse(s);
|
||||
if (_master.Parameters.TryGetValue("RotationDetectorFrameHeight", out s))
|
||||
RotationDetectorFrameHeight = int.Parse(s);
|
||||
|
||||
int w = RotationDetectorFrameWidth;
|
||||
int h = RotationDetectorFrameHeight;
|
||||
|
||||
_buffer = new byte[w * h * 3]; // raw BGR24 buffer
|
||||
_frameMat = new Mat(h, w, MatType.CV_8UC3); // wraps buffer
|
||||
}
|
||||
|
||||
public async Task<int> DetectRotationAsync(
|
||||
string inputFile,
|
||||
double videoLengthSeconds)
|
||||
{
|
||||
if (videoLengthSeconds <= 0)
|
||||
return 0;
|
||||
|
||||
var rotations = new List<int>();
|
||||
|
||||
for (int i = 0; i < RotationDetectorSampleCount; i++)
|
||||
{
|
||||
double t = videoLengthSeconds * (i + 1) / (RotationDetectorSampleCount + 1);
|
||||
|
||||
var frame = await DecodeSingleFrameAsync(
|
||||
inputFile,
|
||||
t,
|
||||
RotationDetectorSampleLength,
|
||||
RotationDetectorFrameWidth,
|
||||
RotationDetectorFrameHeight);
|
||||
|
||||
if (frame != null && !frame.Empty())
|
||||
{
|
||||
int rot = _detector.GetRotation(frame);
|
||||
rotations.Add(rot);
|
||||
}
|
||||
}
|
||||
|
||||
if (rotations.Count == 0)
|
||||
return 0;
|
||||
|
||||
return Majority(rotations);
|
||||
}
|
||||
|
||||
private static int Majority(List<int> values)
|
||||
{
|
||||
var counts = new Dictionary<int, int>();
|
||||
foreach (var v in values)
|
||||
{
|
||||
if (!counts.ContainsKey(v)) counts[v] = 0;
|
||||
counts[v]++;
|
||||
}
|
||||
|
||||
int best = 0;
|
||||
int bestCount = 0;
|
||||
|
||||
foreach (var kv in counts)
|
||||
{
|
||||
if (kv.Value > bestCount)
|
||||
{
|
||||
best = kv.Key;
|
||||
bestCount = kv.Value;
|
||||
}
|
||||
}
|
||||
|
||||
return best;
|
||||
}
|
||||
|
||||
private async Task<Mat?> DecodeSingleFrameAsync(
|
||||
string inputFile,
|
||||
double start,
|
||||
double length,
|
||||
int width,
|
||||
int height)
|
||||
{
|
||||
var p = StartFfmpegDecode(inputFile, start, length, rotate: null, plainText: false);
|
||||
|
||||
int needed = _buffer.Length;
|
||||
int read = 0;
|
||||
|
||||
using var stdout = p.StandardOutput.BaseStream;
|
||||
|
||||
while (read < needed)
|
||||
{
|
||||
int r = await stdout.ReadAsync(_buffer, read, needed - read);
|
||||
if (r == 0)
|
||||
return null;
|
||||
read += r;
|
||||
}
|
||||
|
||||
try { p.Kill(); } catch { }
|
||||
|
||||
// Copy buffer → Mat (no new Mat)
|
||||
System.Runtime.InteropServices.Marshal.Copy(_buffer, 0, _frameMat.Data, _buffer.Length);
|
||||
|
||||
return _frameMat;
|
||||
}
|
||||
|
||||
private Process StartFfmpegDecode(
|
||||
string inputFile,
|
||||
double start,
|
||||
double length,
|
||||
int? rotate,
|
||||
bool plainText)
|
||||
{
|
||||
var ss = start.ToString("0.###", System.Globalization.CultureInfo.InvariantCulture);
|
||||
var t = length.ToString("0.###", System.Globalization.CultureInfo.InvariantCulture);
|
||||
|
||||
// FFmpeg does the resize + format conversion
|
||||
var args =
|
||||
$"-ss {ss} -t {t} -i \"{inputFile}\" " +
|
||||
"-an -sn " +
|
||||
$"-vf scale={RotationDetectorFrameWidth}:{RotationDetectorFrameHeight},format=bgr24 " +
|
||||
"-f rawvideo -";
|
||||
|
||||
var psi = new ProcessStartInfo
|
||||
{
|
||||
FileName = "ffmpeg",
|
||||
Arguments = args,
|
||||
RedirectStandardOutput = true,
|
||||
RedirectStandardError = true,
|
||||
UseShellExecute = false,
|
||||
CreateNoWindow = true
|
||||
};
|
||||
|
||||
var p = new Process { StartInfo = psi };
|
||||
p.Start();
|
||||
|
||||
// Optional stderr logging
|
||||
_ = Task.Run(() =>
|
||||
{
|
||||
try
|
||||
{
|
||||
string? line;
|
||||
while ((line = p.StandardError.ReadLine()) != null)
|
||||
if (plainText)
|
||||
Console.WriteLine($"[ffmpeg-decode] {line}");
|
||||
}
|
||||
catch { }
|
||||
});
|
||||
|
||||
return p;
|
||||
}
|
||||
}
|
||||
@ -1,8 +1,4 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Numerics;
|
||||
using System.Runtime.CompilerServices;
|
||||
using System.Runtime.CompilerServices;
|
||||
using Microsoft.ML.OnnxRuntime;
|
||||
using Microsoft.ML.OnnxRuntime.Tensors;
|
||||
using OpenCvSharp;
|
||||
@ -83,7 +79,7 @@ public sealed class YoloOnnxObjectDetector : LoggingBase, IObjectDetector, IDisp
|
||||
_inputs.Add(NamedOnnxValue.CreateFromTensor(_inputName, _inputTensor));
|
||||
}
|
||||
|
||||
public List<(Rect box, Point2f center)> DetectAll(Mat frameCont, int width, int height)
|
||||
public List<(Rect box, Point2f center)> DetectAll(Mat frameCont)
|
||||
{
|
||||
if (frameCont.Empty())
|
||||
{
|
||||
|
||||
@ -1,7 +1,5 @@
|
||||
using System.Collections.Concurrent;
|
||||
using System.Diagnostics;
|
||||
using System.Globalization;
|
||||
using System.Text;
|
||||
using Spectre.Console;
|
||||
using splitter;
|
||||
|
||||
@ -78,7 +76,7 @@ static partial class Program
|
||||
if (!Directory.Exists(job.OutputFolder))
|
||||
Directory.CreateDirectory(job.OutputFolder);
|
||||
|
||||
var info = ProbeVideo.Probe(job.InputFile);
|
||||
var info = await ProbeVideo.Probe(job);
|
||||
if (info.Duration <= 0)
|
||||
{
|
||||
LogError($"{baseName}: Could not read duration.");
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user