mirror of
https://github.com/unclshura/splitter.git
synced 2026-06-22 00:22:01 +00:00
Added automatic rotation detection.
This commit is contained in:
parent
1fd938416e
commit
5955fb2d29
124
README.md
124
README.md
@ -1,25 +1,25 @@
|
|||||||
# Splitter
|
# Splitter
|
||||||
|
|
||||||
Splitter is a high‑performance command line tool for cutting one or more video files into equal or fixed‑length segments using multi‑threaded FFmpeg execution.
|
Splitter is a high‑performance command line tool for cutting one or more video files into equal or fixed‑length segments using multi‑threaded FFmpeg execution.
|
||||||
It supports batch input, flexible duration formats, rotation, smart face/body‑aware cropping, ETA and speed reporting, and both rich and plain‑text terminal output.
|
It supports batch input, flexible duration formats, rotation, smart face/body‑aware cropping, ETA and speed reporting, and both rich and plain‑text terminal output.
|
||||||
|
|
||||||

|

|
||||||
|
|
||||||
## Features
|
## Features
|
||||||
|
|
||||||
- Multi‑threaded FFmpeg splitting for maximum throughput
|
- Multi‑threaded FFmpeg splitting for maximum throughput
|
||||||
- Equal or fixed‑length segmentation
|
- Equal or fixed‑length segmentation
|
||||||
- Batch input via file masks or list files
|
- Batch input via file masks or list files
|
||||||
- Smart cropping with face/body tracking
|
- Smart cropping with face/body tracking
|
||||||
- Rotation correction
|
- Rotation correction
|
||||||
- ETA, speed, and progress display
|
- ETA, speed, and progress display
|
||||||
- FFmpeg passthrough for advanced control
|
- FFmpeg passthrough for advanced control
|
||||||
- [Potentially] Cross‑platform (.NET 10)
|
- [Potentially] Cross‑platform (.NET 10)
|
||||||
|
|
||||||
## Requirements
|
## Requirements
|
||||||
|
|
||||||
- FFmpeg and FFprobe available in system PATH
|
- FFmpeg and FFprobe available in system PATH
|
||||||
- .NET 10 Runtime or newer
|
- .NET 10 Runtime or newer
|
||||||
|
|
||||||
If you want to update model:
|
If you want to update model:
|
||||||
|
|
||||||
@ -37,74 +37,90 @@ model.export(format="onnx", opset=12, half=False) # FP32 ONNX
|
|||||||
|
|
||||||
## How It Works
|
## How It Works
|
||||||
|
|
||||||
1. Reads total duration using ffprobe
|
1. Reads total duration using ffprobe
|
||||||
2. Parses target duration
|
2. Parses target duration
|
||||||
3. Computes number of segments
|
3. Computes number of segments
|
||||||
4. If not forced, equalizes segment lengths
|
4. If not forced, equalizes segment lengths
|
||||||
5. Runs multiple FFmpeg processes in parallel
|
5. Runs multiple FFmpeg processes in parallel
|
||||||
6. Applies rotation, crop, and tracking if enabled
|
6. Applies rotation, crop, and tracking if enabled
|
||||||
7. Displays progress, ETA, and speed
|
7. Displays progress, ETA, and speed
|
||||||
|
|
||||||
---
|
|
||||||
## Face Tracking vs Body Tracking
|
## Face Tracking vs Body Tracking
|
||||||
|
|
||||||
Face tracking and body tracking serve different purposes, and Splitter supports both because each
|
Face tracking and body tracking serve different purposes, and Splitter supports both because each
|
||||||
excels in different recording environments. When converting horizontal footage into vertical clips,
|
excels in different recording environments. When converting horizontal footage into vertical clips,
|
||||||
the choice of detector determines how stable, reliable, and natural the automated camera motion will be.
|
the choice of detector determines how stable, reliable, and natural the automated camera motion will be.
|
||||||
|
|
||||||

|

|
||||||
|
|
||||||
### Face Tracking Using UltraFace 320
|
### Face Tracking Using UltraFace 320
|
||||||
|
|
||||||
Splitter uses the UltraFace 320 ONNX model to perform lightweight, real‑time face detection on each
|
Splitter uses the UltraFace 320 ONNX model to perform lightweight, real‑time face detection on each
|
||||||
frame of the input video. The detector produces bounding boxes for visible faces, and the tracking
|
frame of the input video. The detector produces bounding boxes for visible faces, and the tracking
|
||||||
system maintains a stable, smoothed target region across time. This is achieved by combining per‑frame
|
system maintains a stable, smoothed target region across time. This is achieved by combining per‑frame
|
||||||
detections with temporal smoothing (EMA), dropout tolerance, and camera easing. The result is a
|
detections with temporal smoothing (EMA), dropout tolerance, and camera easing. The result is a
|
||||||
continuous, stable crop window that follows the performer even when the face is partially occluded,
|
continuous, stable crop window that follows the performer even when the face is partially occluded,
|
||||||
briefly lost, or moving rapidly.
|
briefly lost, or moving rapidly.
|
||||||
|
|
||||||
During segmentation, the crop window is recalculated for every frame, ensuring that each output
|
During segmentation, the crop window is recalculated for every frame, ensuring that each output
|
||||||
segment inherits the same smooth camera motion. This makes the vertical clips appear as if they
|
segment inherits the same smooth camera motion. This makes the vertical clips appear as if they
|
||||||
were recorded with a dedicated portrait‑oriented camera operator. The UltraFace 320 model is
|
were recorded with a dedicated portrait‑oriented camera operator. The UltraFace 320 model is
|
||||||
fast enough to run alongside multi‑threaded FFmpeg splitting without becoming a bottleneck,
|
fast enough to run alongside multi‑threaded FFmpeg splitting without becoming a bottleneck,
|
||||||
making it suitable for long recordings and batch processing.
|
making it suitable for long recordings and batch processing.
|
||||||
|
|
||||||
### Benefits of Full‑Body Detection Using YOLOv8s for Live Gig Recordings
|
### Benefits of Full‑Body Detection Using YOLOv8s for Live Gig Recordings
|
||||||
|
|
||||||
When recording concerts or live gigs, performers often move unpredictably, turn away from the
|
When recording concerts or live gigs, performers often move unpredictably, turn away from the
|
||||||
camera, or become partially obscured by lighting, instruments, or stage effects.
|
camera, or become partially obscured by lighting, instruments, or stage effects.
|
||||||
Full‑body detection using a YOLOv8s ONNX model provides a more reliable tracking anchor than
|
Full‑body detection using a YOLOv8s ONNX model provides a more reliable tracking anchor than
|
||||||
face detection alone. Because YOLOv8s can detect the entire human silhouette, the tracker
|
face detection alone. Because YOLOv8s can detect the entire human silhouette, the tracker
|
||||||
maintains stable framing even when the face is not visible, when the performer is far from
|
maintains stable framing even when the face is not visible, when the performer is far from
|
||||||
the camera, or when stage lighting makes facial features hard to detect. This produces vertical
|
the camera, or when stage lighting makes facial features hard to detect. This produces vertical
|
||||||
clips that feel intentional and professionally framed, with fewer sudden jumps or lost‑tracking
|
clips that feel intentional and professionally framed, with fewer sudden jumps or lost‑tracking
|
||||||
moments. For creators converting horizontal gig footage into short vertical clips for YouTube
|
moments. For creators converting horizontal gig footage into short vertical clips for YouTube
|
||||||
Shorts or TikTok, body‑based tracking significantly improves consistency, reduces manual editing,
|
Shorts or TikTok, body‑based tracking significantly improves consistency, reduces manual editing,
|
||||||
and preserves the energy and motion of the performance.
|
and preserves the energy and motion of the performance.
|
||||||
|
|
||||||
### Automated Camera Control
|
### Automated Camera Control
|
||||||
|
|
||||||
Splitter includes an automated camera control system that simulates the behavior of a virtual
|
Splitter includes an automated camera control system that simulates the behavior of a virtual
|
||||||
camera operator when generating vertical crops from horizontal footage. The goal is to maintain
|
camera operator when generating vertical crops from horizontal footage. The goal is to maintain
|
||||||
smooth, intentional framing around the tracked subject, even when detections are noisy, intermittent,
|
smooth, intentional framing around the tracked subject, even when detections are noisy, intermittent,
|
||||||
or temporarily lost.
|
or temporarily lost.
|
||||||
|
|
||||||
The controller receives object detections (face or body) and converts them into a stable crop
|
The controller receives object detections (face or body) and converts them into a stable crop
|
||||||
window using a combination of Kalman filtering, exponential smoothing, dropout tolerance,
|
window using a combination of Kalman filtering, exponential smoothing, dropout tolerance,
|
||||||
and a three‑state tracking model. The Kalman filter provides predictive motion smoothing,
|
and a three‑state tracking model. The Kalman filter provides predictive motion smoothing,
|
||||||
while the EMA factor blends the predicted position with the previous camera center to avoid jitter.
|
while the EMA factor blends the predicted position with the previous camera center to avoid jitter.
|
||||||
The camera easing value controls how quickly the virtual camera follows the subject, producing
|
The camera easing value controls how quickly the virtual camera follows the subject, producing
|
||||||
natural‑looking motion rather than abrupt jumps.
|
natural‑looking motion rather than abrupt jumps.
|
||||||
|
|
||||||
When detections disappear, the controller enters one of two fallback modes. In LostFreeze mode,
|
When detections disappear, the controller enters one of two fallback modes. In LostFreeze mode,
|
||||||
the camera holds its last known position for a configurable number of frames, preventing sudden
|
the camera holds its last known position for a configurable number of frames, preventing sudden
|
||||||
jumps during brief occlusions. If the subject remains lost beyond that threshold, the controller
|
jumps during brief occlusions. If the subject remains lost beyond that threshold, the controller
|
||||||
transitions to LostDrift mode, slowly drifting the camera back toward a neutral center position.
|
transitions to LostDrift mode, slowly drifting the camera back toward a neutral center position.
|
||||||
This prevents the crop from drifting off‑screen and ensures that the output remains usable even
|
This prevents the crop from drifting off‑screen and ensures that the output remains usable even
|
||||||
when tracking fails. All positions are clamped to valid bounds, guaranteeing that the crop window
|
when tracking fails. All positions are clamped to valid bounds, guaranteeing that the crop window
|
||||||
never leaves the video frame.
|
never leaves the video frame.
|
||||||
|
|
||||||
---
|
### Automatic rotation detection
|
||||||
|
|
||||||
|
The rotation‑estimation method is based on analyzing the distribution of gradient orientations within
|
||||||
|
a video frame. After converting the frame to grayscale, the algorithm computes horizontal and vertical
|
||||||
|
image gradients using Sobel operators and derives per‑pixel gradient magnitudes and orientations.
|
||||||
|
These orientations are folded into the range [0, 180) and accumulated into a fixed‑size,
|
||||||
|
magnitude‑weighted histogram. The histogram represents the structural edge distribution of the frame,
|
||||||
|
independent of brightness fluctuations or local lighting artifacts. By comparing the total gradient
|
||||||
|
energy concentrated near 0 degrees (vertical edges) with the energy near 90 degrees (horizontal edges),
|
||||||
|
the method determines whether the frame is more consistent with an upright or sideways orientation.
|
||||||
|
|
||||||
|
This approach is designed for environments where brightness‑based cues are unreliable, such as
|
||||||
|
live concerts with strobe lights, LED walls, haze, and crowd movement. It relies solely on geometric
|
||||||
|
edge structure, which remains stable even under extreme lighting variation. The implementation is
|
||||||
|
optimized for high‑throughput video processing: all intermediate Mats, buffers, and histograms are
|
||||||
|
preallocated, and pixel data is accessed directly through pointers to avoid per‑frame memory
|
||||||
|
allocation. The method is intentionally biased toward the upright orientation, returning a sideways
|
||||||
|
classification only when the horizontal‑edge energy significantly exceeds the vertical‑edge energy.
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
@ -114,11 +130,10 @@ splitter [<input.mp4> ...] [options] [--] <ffmpeg passthrough>
|
|||||||
|
|
||||||
Inputs may be provided directly, via `--file=...`, or using file masks such as `videos/*.mp4`.
|
Inputs may be provided directly, via `--file=...`, or using file masks such as `videos/*.mp4`.
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Options
|
## Options
|
||||||
|
|
||||||
Below is a clean, ASCII‑only **options table** version of your content.
|
Below is a clean, ASCII‑only **options table** version of your content.
|
||||||
All option names are preserved exactly, and descriptions are consolidated for clarity.
|
All option names are preserved exactly, and descriptions are consolidated for clarity.
|
||||||
|
|
||||||
---
|
---
|
||||||
@ -133,6 +148,7 @@ All option names are preserved exactly, and descriptions are consolidated for cl
|
|||||||
| **--duration=<value>** | Override target segment duration. Formats: `Ns`, `NmMs`, `N`. Examples: `--duration=90s`, `--duration=2m30s`, `--duration=45`. Without `--force`: max 58 seconds, equalized across segments. |
|
| **--duration=<value>** | Override target segment duration. Formats: `Ns`, `NmMs`, `N`. Examples: `--duration=90s`, `--duration=2m30s`, `--duration=45`. Without `--force`: max 58 seconds, equalized across segments. |
|
||||||
| **--force** | Use the duration exactly as provided. Last segment may be shorter. |
|
| **--force** | Use the duration exactly as provided. Last segment may be shorter. |
|
||||||
| **--rotate=<degrees>** | Rotate video by 90, 180, or 270 degrees. Useful for correcting orientation metadata. |
|
| **--rotate=<degrees>** | Rotate video by 90, 180, or 270 degrees. Useful for correcting orientation metadata. |
|
||||||
|
| **--rotate-auto** | Use automatic rotation detection. |
|
||||||
| **--estimate** | Print calculated segment information and exit. No splitting is performed. |
|
| **--estimate** | Print calculated segment information and exit. No splitting is performed. |
|
||||||
| **--crop[=<w:h>]** | Crop video to a target width and height with face/body tracking. Default: 607x1080. Ideal for Shorts, TikTok, Reels. |
|
| **--crop[=<w:h>]** | Crop video to a target width and height with face/body tracking. Default: 607x1080. Ideal for Shorts, TikTok, Reels. |
|
||||||
| **--detect=<name>** | Object detector for tracking. Values: `face` (UltraFace), `body` (YoloOnnx, default), `none` (center crop). |
|
| **--detect=<name>** | Object detector for tracking. Values: `face` (UltraFace), `body` (YoloOnnx, default), `none` (center crop). |
|
||||||
@ -142,8 +158,6 @@ All option names are preserved exactly, and descriptions are consolidated for cl
|
|||||||
| **--debug** | Show debug overlay during tracking. No cropping performed, but crop region shown. |
|
| **--debug** | Show debug overlay during tracking. No cropping performed, but crop region shown. |
|
||||||
| **-p:<name>=<value>** | Set custom parameters for the object detector. Example: `-p:confidence=0.5`. Defaults: DropoutToleranceFrames=20, EmaFactor=0.65, CameraEasing=0.03, LostFreezeFrames=60. |
|
| **-p:<name>=<value>** | Set custom parameters for the object detector. Example: `-p:confidence=0.5`. Defaults: DropoutToleranceFrames=20, EmaFactor=0.65, CameraEasing=0.03, LostFreezeFrames=60. |
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## FFmpeg Passthrough
|
## FFmpeg Passthrough
|
||||||
|
|
||||||
Anything after `--` is passed directly to FFmpeg.
|
Anything after `--` is passed directly to FFmpeg.
|
||||||
@ -153,16 +167,12 @@ Example:
|
|||||||
splitter video.mp4 --force --duration=45 -- -an -sn
|
splitter video.mp4 --force --duration=45 -- -an -sn
|
||||||
```
|
```
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Input and Output Behavior
|
## Input and Output Behavior
|
||||||
|
|
||||||
- `input.mp4` may be a file mask (`videos/*.mp4`)
|
- `input.mp4` may be a file mask (`videos/*.mp4`)
|
||||||
- Output filenames follow the `--mask` pattern
|
- Output filenames follow the `--mask` pattern
|
||||||
- Output folder defaults to `<input folder>/Splitter` unless overridden
|
- Output folder defaults to `<input folder>/Splitter` unless overridden
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Examples
|
## Examples
|
||||||
|
|
||||||
Split into equal 60‑second segments:
|
Split into equal 60‑second segments:
|
||||||
|
|||||||
@ -1,5 +1,4 @@
|
|||||||
using System;
|
using OpenCvSharp;
|
||||||
using OpenCvSharp;
|
|
||||||
|
|
||||||
namespace splitter;
|
namespace splitter;
|
||||||
|
|
||||||
|
|||||||
@ -1,8 +1,4 @@
|
|||||||
using System;
|
using System.Globalization;
|
||||||
using System.Collections.Generic;
|
|
||||||
using System.Globalization;
|
|
||||||
using System.Text;
|
|
||||||
using OpenCvSharp;
|
|
||||||
|
|
||||||
namespace splitter;
|
namespace splitter;
|
||||||
|
|
||||||
@ -22,6 +18,7 @@ public class SingleJob
|
|||||||
public bool ForceFixed { get; set; }
|
public bool ForceFixed { get; set; }
|
||||||
public bool SingleThreaded { get; set; }
|
public bool SingleThreaded { get; set; }
|
||||||
public int? Rotate { get; set; }
|
public int? Rotate { get; set; }
|
||||||
|
public bool RotateAuto { get; set; }
|
||||||
public Dictionary<string, string> Parameters { get; set; } = [];
|
public Dictionary<string, string> Parameters { get; set; } = [];
|
||||||
|
|
||||||
public void Override<T>(ref T member, string name)
|
public void Override<T>(ref T member, string name)
|
||||||
@ -143,6 +140,10 @@ public sealed class CommandLine
|
|||||||
{
|
{
|
||||||
Master.SingleThreaded = true;
|
Master.SingleThreaded = true;
|
||||||
}
|
}
|
||||||
|
else if (arg == "--rotate-auto")
|
||||||
|
{
|
||||||
|
Master.RotateAuto = true;
|
||||||
|
}
|
||||||
else if (arg.StartsWith("--gravitate="))
|
else if (arg.StartsWith("--gravitate="))
|
||||||
{
|
{
|
||||||
var val = arg.Substring("--gravitate=".Length);
|
var val = arg.Substring("--gravitate=".Length);
|
||||||
@ -197,6 +198,7 @@ public sealed class CommandLine
|
|||||||
ForceFixed = Master.ForceFixed,
|
ForceFixed = Master.ForceFixed,
|
||||||
SingleThreaded = Master.SingleThreaded,
|
SingleThreaded = Master.SingleThreaded,
|
||||||
Rotate = Master.Rotate,
|
Rotate = Master.Rotate,
|
||||||
|
RotateAuto = Master.RotateAuto,
|
||||||
Parameters = new Dictionary<string, string>(Master.Parameters)
|
Parameters = new Dictionary<string, string>(Master.Parameters)
|
||||||
}).ToArray();
|
}).ToArray();
|
||||||
|
|
||||||
@ -371,6 +373,9 @@ Options:
|
|||||||
--rotate=<degrees> Rotate video by specified degrees (90, 180, 270).
|
--rotate=<degrees> Rotate video by specified degrees (90, 180, 270).
|
||||||
Useful for videos with incorrect orientation metadata.
|
Useful for videos with incorrect orientation metadata.
|
||||||
|
|
||||||
|
--rotate-auto Auto-detect rotation and rotate accordingly.
|
||||||
|
Uses edge orientation statistics to determine if video is rotated.
|
||||||
|
|
||||||
--estimate Print calculated segment information and exit.
|
--estimate Print calculated segment information and exit.
|
||||||
No splitting is performed.
|
No splitting is performed.
|
||||||
|
|
||||||
@ -393,7 +398,7 @@ Options:
|
|||||||
--debug Show debug overlay during face tracking.
|
--debug Show debug overlay during face tracking.
|
||||||
|
|
||||||
-p:<name>=<value> Set a custom parameter for the object detector.
|
-p:<name>=<value> Set a custom parameter for the object detector.
|
||||||
Example: -p:confidence=0.5
|
Example: -p:EmaFactor=0.65
|
||||||
|
|
||||||
Tracking splitter defaults:
|
Tracking splitter defaults:
|
||||||
DropoutToleranceFrames = 20;
|
DropoutToleranceFrames = 20;
|
||||||
@ -401,6 +406,12 @@ Options:
|
|||||||
CameraEasing = 0.03;
|
CameraEasing = 0.03;
|
||||||
LostFreezeFrames = 60;
|
LostFreezeFrames = 60;
|
||||||
|
|
||||||
|
Rotation detector defaults:
|
||||||
|
RotationDetectorSampleCount = 5;
|
||||||
|
RotationDetectorSampleLength = 0.15;
|
||||||
|
RotationDetectorFrameWidth = 320;
|
||||||
|
RotationDetectorFrameHeight = 180;
|
||||||
|
|
||||||
Passthrough:
|
Passthrough:
|
||||||
Anything after -- is passed directly to ffmpeg.
|
Anything after -- is passed directly to ffmpeg.
|
||||||
|
|
||||||
|
|||||||
102
splitter-cli/FrameRotationDetector.cs
Normal file
102
splitter-cli/FrameRotationDetector.cs
Normal file
@ -0,0 +1,102 @@
|
|||||||
|
using OpenCvSharp;
|
||||||
|
|
||||||
|
namespace splitter;
|
||||||
|
|
||||||
|
public sealed class FrameRotationDetector
|
||||||
|
{
|
||||||
|
private readonly Mat _gray;
|
||||||
|
private readonly Mat _gx;
|
||||||
|
private readonly Mat _gy;
|
||||||
|
private readonly Mat _mag;
|
||||||
|
private readonly Mat _angle;
|
||||||
|
|
||||||
|
private readonly float[] _hist;
|
||||||
|
|
||||||
|
private readonly int _w;
|
||||||
|
private readonly int _h;
|
||||||
|
private readonly int _bins;
|
||||||
|
|
||||||
|
public FrameRotationDetector(int width = 320, int height = 180, int bins = 36)
|
||||||
|
{
|
||||||
|
_w = width;
|
||||||
|
_h = height;
|
||||||
|
_bins = bins;
|
||||||
|
|
||||||
|
_gray = new Mat(height, width, MatType.CV_8UC1);
|
||||||
|
_gx = new Mat(height, width, MatType.CV_32F);
|
||||||
|
_gy = new Mat(height, width, MatType.CV_32F);
|
||||||
|
_mag = new Mat(height, width, MatType.CV_32F);
|
||||||
|
_angle = new Mat(height, width, MatType.CV_32F);
|
||||||
|
|
||||||
|
_hist = new float[bins]; // allocated once
|
||||||
|
}
|
||||||
|
|
||||||
|
public int GetRotation(Mat frame)
|
||||||
|
{
|
||||||
|
// 1. Grayscale
|
||||||
|
Cv2.CvtColor(frame, _gray, ColorConversionCodes.BGR2GRAY);
|
||||||
|
|
||||||
|
// 2. Sobel
|
||||||
|
Cv2.Sobel(_gray, _gx, MatType.CV_32F, 1, 0, 3);
|
||||||
|
Cv2.Sobel(_gray, _gy, MatType.CV_32F, 0, 1, 3);
|
||||||
|
|
||||||
|
// 3. Magnitude + angle
|
||||||
|
Cv2.CartToPolar(_gx, _gy, _mag, _angle, angleInDegrees: true);
|
||||||
|
|
||||||
|
// 4. Clear histogram
|
||||||
|
for (int i = 0; i < _bins; i++)
|
||||||
|
_hist[i] = 0;
|
||||||
|
|
||||||
|
float binSize = 180f / _bins;
|
||||||
|
|
||||||
|
unsafe
|
||||||
|
{
|
||||||
|
float* anglePtr = (float*)_angle.Data;
|
||||||
|
float* magPtr = (float*)_mag.Data;
|
||||||
|
|
||||||
|
int total = _w * _h;
|
||||||
|
|
||||||
|
for (int i = 0; i < total; i++)
|
||||||
|
{
|
||||||
|
float m = magPtr[i];
|
||||||
|
if (m < 5f) continue; // ignore weak gradients
|
||||||
|
|
||||||
|
float a = anglePtr[i];
|
||||||
|
if (a < 0) a += 360f;
|
||||||
|
a = a % 180f;
|
||||||
|
|
||||||
|
int bin = (int)(a / binSize);
|
||||||
|
if (bin < 0) bin = 0;
|
||||||
|
if (bin >= _bins) bin = _bins - 1;
|
||||||
|
|
||||||
|
_hist[bin] += m;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 5. Energy around 0° vs 90°
|
||||||
|
float e0 = 0, e90 = 0;
|
||||||
|
int window = 3;
|
||||||
|
|
||||||
|
int bin0 = 0;
|
||||||
|
int bin90 = _bins / 2;
|
||||||
|
|
||||||
|
for (int i = -window; i <= window; i++)
|
||||||
|
{
|
||||||
|
e0 += _hist[Wrap(bin0 + i)];
|
||||||
|
e90 += _hist[Wrap(bin90 + i)];
|
||||||
|
}
|
||||||
|
|
||||||
|
// 6. Decide upright vs sideways
|
||||||
|
if (e90 > e0 * 1.6f)
|
||||||
|
return 90; // sideways
|
||||||
|
|
||||||
|
return 0; // upright (concert default)
|
||||||
|
}
|
||||||
|
|
||||||
|
private int Wrap(int b)
|
||||||
|
{
|
||||||
|
if (b < 0) return b + _bins;
|
||||||
|
if (b >= _bins) return b - _bins;
|
||||||
|
return b;
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -4,5 +4,5 @@ namespace splitter;
|
|||||||
|
|
||||||
public interface IObjectDetector : IDisposable
|
public interface IObjectDetector : IDisposable
|
||||||
{
|
{
|
||||||
List<(Rect box, Point2f center)> DetectAll(Mat frameCont, int width, int height);
|
List<(Rect box, Point2f center)> DetectAll(Mat frameCont);
|
||||||
}
|
}
|
||||||
@ -1,8 +1,4 @@
|
|||||||
using System;
|
namespace splitter;
|
||||||
using System.Collections.Generic;
|
|
||||||
using System.Text;
|
|
||||||
|
|
||||||
namespace splitter;
|
|
||||||
|
|
||||||
public interface ISegmentProcessor
|
public interface ISegmentProcessor
|
||||||
{
|
{
|
||||||
|
|||||||
@ -1,5 +1,4 @@
|
|||||||
using System;
|
namespace splitter;
|
||||||
namespace splitter;
|
|
||||||
|
|
||||||
public abstract class LoggingBase(ILogger _logger, int _progressLine)
|
public abstract class LoggingBase(ILogger _logger, int _progressLine)
|
||||||
{
|
{
|
||||||
|
|||||||
@ -1,8 +1,5 @@
|
|||||||
using System;
|
using System.Diagnostics;
|
||||||
using System.Collections.Generic;
|
|
||||||
using System.Diagnostics;
|
|
||||||
using System.Globalization;
|
using System.Globalization;
|
||||||
using System.Text;
|
|
||||||
|
|
||||||
namespace splitter;
|
namespace splitter;
|
||||||
|
|
||||||
@ -11,12 +8,31 @@ public record VideoInfo(
|
|||||||
int Width,
|
int Width,
|
||||||
int Height,
|
int Height,
|
||||||
double Fps,
|
double Fps,
|
||||||
double Bitrate
|
double Bitrate,
|
||||||
|
int Rotation = 0
|
||||||
);
|
);
|
||||||
|
|
||||||
public static class ProbeVideo
|
public static class ProbeVideo
|
||||||
{
|
{
|
||||||
public static VideoInfo Probe(string inputFile)
|
public static async Task<VideoInfo> Probe(SingleJob job)
|
||||||
|
{
|
||||||
|
var info = ProbeSize(job.InputFile);
|
||||||
|
if ( job.RotateAuto)
|
||||||
|
{
|
||||||
|
var rotation = await ProbeRotation(job, info.Duration);
|
||||||
|
info = info with { Rotation = rotation };
|
||||||
|
}
|
||||||
|
|
||||||
|
return info;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static async Task<int> ProbeRotation(SingleJob job, double duration)
|
||||||
|
{
|
||||||
|
var rotation = await new VideoRotationSampler(job).DetectRotationAsync(job.InputFile, duration);
|
||||||
|
return rotation;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static VideoInfo ProbeSize(string inputFile)
|
||||||
{
|
{
|
||||||
var args =
|
var args =
|
||||||
"-v error " +
|
"-v error " +
|
||||||
|
|||||||
@ -10,7 +10,7 @@
|
|||||||
},
|
},
|
||||||
"Debug": {
|
"Debug": {
|
||||||
"commandName": "Project",
|
"commandName": "Project",
|
||||||
"commandLineArgs": "\"C:\\Users\\uncls\\Pictures\\2026\\2026 - Secret Rule\\20260426_212004.mp4\" --crop --detect=body --debug --single-thread --text"
|
"commandLineArgs": "\"C:\\Users\\uncls\\Pictures\\2026\\2026 - Laura Cox\\Video\\MAH00041.MP4\" --rotate-auto"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1,9 +1,5 @@
|
|||||||
using System;
|
using System.Diagnostics;
|
||||||
using System.Collections.Generic;
|
|
||||||
using System.Diagnostics;
|
|
||||||
using System.Globalization;
|
using System.Globalization;
|
||||||
using System.Text;
|
|
||||||
using FFmpeg.AutoGen;
|
|
||||||
|
|
||||||
namespace splitter;
|
namespace splitter;
|
||||||
|
|
||||||
|
|||||||
@ -1,9 +1,4 @@
|
|||||||
using System;
|
using System.Text;
|
||||||
using System.Collections.Generic;
|
|
||||||
using System.Linq;
|
|
||||||
using System.Text;
|
|
||||||
using System.Threading;
|
|
||||||
using System.Threading.Tasks;
|
|
||||||
using Spectre.Console;
|
using Spectre.Console;
|
||||||
using Spectre.Console.Rendering;
|
using Spectre.Console.Rendering;
|
||||||
|
|
||||||
|
|||||||
@ -1,9 +1,6 @@
|
|||||||
using System;
|
using System.Diagnostics;
|
||||||
using System.Diagnostics;
|
|
||||||
using System.Globalization;
|
using System.Globalization;
|
||||||
using System.IO;
|
|
||||||
using System.Runtime.InteropServices;
|
using System.Runtime.InteropServices;
|
||||||
using System.Threading.Tasks;
|
|
||||||
using OpenCvSharp;
|
using OpenCvSharp;
|
||||||
|
|
||||||
namespace splitter;
|
namespace splitter;
|
||||||
@ -112,7 +109,7 @@ public class TrackingSplitter : LoggingBase, ISegmentProcessor, IDisposable
|
|||||||
// input frame → Mat
|
// input frame → Mat
|
||||||
Marshal.Copy(inBuffer, 0, frameMat.Data, inBytes);
|
Marshal.Copy(inBuffer, 0, frameMat.Data, inBytes);
|
||||||
|
|
||||||
var objects = _detector.DetectAll(frameMat, videoWidth, videoHeight);
|
var objects = _detector.DetectAll(frameMat);
|
||||||
var primary = SelectTrackedObject(objects, kalman.LastMeasurement);
|
var primary = SelectTrackedObject(objects, kalman.LastMeasurement);
|
||||||
|
|
||||||
camera.Update(primary);
|
camera.Update(primary);
|
||||||
|
|||||||
@ -1,5 +1,4 @@
|
|||||||
using System.Runtime.InteropServices;
|
using System.Runtime.InteropServices;
|
||||||
using NcnnDotNet.Layers;
|
|
||||||
using OpenCvSharp;
|
using OpenCvSharp;
|
||||||
using UltraFaceDotNet;
|
using UltraFaceDotNet;
|
||||||
|
|
||||||
@ -25,7 +24,7 @@ public sealed class UltraFaceDetector: LoggingBase, IDisposable, IObjectDetector
|
|||||||
_ultraFace = UltraFace.Create(param);
|
_ultraFace = UltraFace.Create(param);
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<(Rect box, Point2f center)> DetectAll(Mat frameCont, int width, int height)
|
public List<(Rect box, Point2f center)> DetectAll(Mat frameCont)
|
||||||
{
|
{
|
||||||
// Convert to byte[] for UltraFace
|
// Convert to byte[] for UltraFace
|
||||||
var bytesFull = frameCont.Rows * frameCont.Cols * frameCont.ElemSize();
|
var bytesFull = frameCont.Rows * frameCont.Cols * frameCont.ElemSize();
|
||||||
@ -44,8 +43,8 @@ public sealed class UltraFaceDetector: LoggingBase, IDisposable, IObjectDetector
|
|||||||
using var mat = NcnnDotNet.Mat.FromPixels(
|
using var mat = NcnnDotNet.Mat.FromPixels(
|
||||||
(IntPtr)p,
|
(IntPtr)p,
|
||||||
NcnnDotNet.PixelType.Bgr, // BGR24 input
|
NcnnDotNet.PixelType.Bgr, // BGR24 input
|
||||||
width,
|
frameCont.Width,
|
||||||
height);
|
frameCont.Height);
|
||||||
|
|
||||||
var faces = _ultraFace.Detect(mat);
|
var faces = _ultraFace.Detect(mat);
|
||||||
if (faces == null)
|
if (faces == null)
|
||||||
|
|||||||
169
splitter-cli/VideoRotationSampler.cs
Normal file
169
splitter-cli/VideoRotationSampler.cs
Normal file
@ -0,0 +1,169 @@
|
|||||||
|
using OpenCvSharp;
|
||||||
|
using System.Diagnostics;
|
||||||
|
|
||||||
|
namespace splitter;
|
||||||
|
|
||||||
|
public sealed class VideoRotationSampler
|
||||||
|
{
|
||||||
|
private readonly FrameRotationDetector _detector = new FrameRotationDetector();
|
||||||
|
|
||||||
|
public static int RotationDetectorSampleCount = 20;
|
||||||
|
public static double RotationDetectorSampleLength = 0.15; // seconds to decode per probe
|
||||||
|
public static int RotationDetectorFrameWidth = 320;
|
||||||
|
public static int RotationDetectorFrameHeight = 180;
|
||||||
|
|
||||||
|
// --- Zero-allocation buffers ---
|
||||||
|
private readonly byte[] _buffer;
|
||||||
|
private readonly Mat _frameMat;
|
||||||
|
|
||||||
|
public VideoRotationSampler(SingleJob _master)
|
||||||
|
{
|
||||||
|
if (_master.Parameters.TryGetValue("RotationDetectorSampleCount", out var s))
|
||||||
|
RotationDetectorSampleCount = int.Parse(s);
|
||||||
|
if (_master.Parameters.TryGetValue("RotationDetectorSampleLength", out s))
|
||||||
|
RotationDetectorSampleLength = double.Parse(s);
|
||||||
|
if (_master.Parameters.TryGetValue("RotationDetectorFrameWidth", out s))
|
||||||
|
RotationDetectorFrameWidth = int.Parse(s);
|
||||||
|
if (_master.Parameters.TryGetValue("RotationDetectorFrameHeight", out s))
|
||||||
|
RotationDetectorFrameHeight = int.Parse(s);
|
||||||
|
|
||||||
|
int w = RotationDetectorFrameWidth;
|
||||||
|
int h = RotationDetectorFrameHeight;
|
||||||
|
|
||||||
|
_buffer = new byte[w * h * 3]; // raw BGR24 buffer
|
||||||
|
_frameMat = new Mat(h, w, MatType.CV_8UC3); // wraps buffer
|
||||||
|
}
|
||||||
|
|
||||||
|
public async Task<int> DetectRotationAsync(
|
||||||
|
string inputFile,
|
||||||
|
double videoLengthSeconds)
|
||||||
|
{
|
||||||
|
if (videoLengthSeconds <= 0)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
var rotations = new List<int>();
|
||||||
|
|
||||||
|
for (int i = 0; i < RotationDetectorSampleCount; i++)
|
||||||
|
{
|
||||||
|
double t = videoLengthSeconds * (i + 1) / (RotationDetectorSampleCount + 1);
|
||||||
|
|
||||||
|
var frame = await DecodeSingleFrameAsync(
|
||||||
|
inputFile,
|
||||||
|
t,
|
||||||
|
RotationDetectorSampleLength,
|
||||||
|
RotationDetectorFrameWidth,
|
||||||
|
RotationDetectorFrameHeight);
|
||||||
|
|
||||||
|
if (frame != null && !frame.Empty())
|
||||||
|
{
|
||||||
|
int rot = _detector.GetRotation(frame);
|
||||||
|
rotations.Add(rot);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (rotations.Count == 0)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
return Majority(rotations);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static int Majority(List<int> values)
|
||||||
|
{
|
||||||
|
var counts = new Dictionary<int, int>();
|
||||||
|
foreach (var v in values)
|
||||||
|
{
|
||||||
|
if (!counts.ContainsKey(v)) counts[v] = 0;
|
||||||
|
counts[v]++;
|
||||||
|
}
|
||||||
|
|
||||||
|
int best = 0;
|
||||||
|
int bestCount = 0;
|
||||||
|
|
||||||
|
foreach (var kv in counts)
|
||||||
|
{
|
||||||
|
if (kv.Value > bestCount)
|
||||||
|
{
|
||||||
|
best = kv.Key;
|
||||||
|
bestCount = kv.Value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return best;
|
||||||
|
}
|
||||||
|
|
||||||
|
private async Task<Mat?> DecodeSingleFrameAsync(
|
||||||
|
string inputFile,
|
||||||
|
double start,
|
||||||
|
double length,
|
||||||
|
int width,
|
||||||
|
int height)
|
||||||
|
{
|
||||||
|
var p = StartFfmpegDecode(inputFile, start, length, rotate: null, plainText: false);
|
||||||
|
|
||||||
|
int needed = _buffer.Length;
|
||||||
|
int read = 0;
|
||||||
|
|
||||||
|
using var stdout = p.StandardOutput.BaseStream;
|
||||||
|
|
||||||
|
while (read < needed)
|
||||||
|
{
|
||||||
|
int r = await stdout.ReadAsync(_buffer, read, needed - read);
|
||||||
|
if (r == 0)
|
||||||
|
return null;
|
||||||
|
read += r;
|
||||||
|
}
|
||||||
|
|
||||||
|
try { p.Kill(); } catch { }
|
||||||
|
|
||||||
|
// Copy buffer → Mat (no new Mat)
|
||||||
|
System.Runtime.InteropServices.Marshal.Copy(_buffer, 0, _frameMat.Data, _buffer.Length);
|
||||||
|
|
||||||
|
return _frameMat;
|
||||||
|
}
|
||||||
|
|
||||||
|
private Process StartFfmpegDecode(
|
||||||
|
string inputFile,
|
||||||
|
double start,
|
||||||
|
double length,
|
||||||
|
int? rotate,
|
||||||
|
bool plainText)
|
||||||
|
{
|
||||||
|
var ss = start.ToString("0.###", System.Globalization.CultureInfo.InvariantCulture);
|
||||||
|
var t = length.ToString("0.###", System.Globalization.CultureInfo.InvariantCulture);
|
||||||
|
|
||||||
|
// FFmpeg does the resize + format conversion
|
||||||
|
var args =
|
||||||
|
$"-ss {ss} -t {t} -i \"{inputFile}\" " +
|
||||||
|
"-an -sn " +
|
||||||
|
$"-vf scale={RotationDetectorFrameWidth}:{RotationDetectorFrameHeight},format=bgr24 " +
|
||||||
|
"-f rawvideo -";
|
||||||
|
|
||||||
|
var psi = new ProcessStartInfo
|
||||||
|
{
|
||||||
|
FileName = "ffmpeg",
|
||||||
|
Arguments = args,
|
||||||
|
RedirectStandardOutput = true,
|
||||||
|
RedirectStandardError = true,
|
||||||
|
UseShellExecute = false,
|
||||||
|
CreateNoWindow = true
|
||||||
|
};
|
||||||
|
|
||||||
|
var p = new Process { StartInfo = psi };
|
||||||
|
p.Start();
|
||||||
|
|
||||||
|
// Optional stderr logging
|
||||||
|
_ = Task.Run(() =>
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
string? line;
|
||||||
|
while ((line = p.StandardError.ReadLine()) != null)
|
||||||
|
if (plainText)
|
||||||
|
Console.WriteLine($"[ffmpeg-decode] {line}");
|
||||||
|
}
|
||||||
|
catch { }
|
||||||
|
});
|
||||||
|
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -1,8 +1,4 @@
|
|||||||
using System;
|
using System.Runtime.CompilerServices;
|
||||||
using System.Collections.Generic;
|
|
||||||
using System.Linq;
|
|
||||||
using System.Numerics;
|
|
||||||
using System.Runtime.CompilerServices;
|
|
||||||
using Microsoft.ML.OnnxRuntime;
|
using Microsoft.ML.OnnxRuntime;
|
||||||
using Microsoft.ML.OnnxRuntime.Tensors;
|
using Microsoft.ML.OnnxRuntime.Tensors;
|
||||||
using OpenCvSharp;
|
using OpenCvSharp;
|
||||||
@ -83,7 +79,7 @@ public sealed class YoloOnnxObjectDetector : LoggingBase, IObjectDetector, IDisp
|
|||||||
_inputs.Add(NamedOnnxValue.CreateFromTensor(_inputName, _inputTensor));
|
_inputs.Add(NamedOnnxValue.CreateFromTensor(_inputName, _inputTensor));
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<(Rect box, Point2f center)> DetectAll(Mat frameCont, int width, int height)
|
public List<(Rect box, Point2f center)> DetectAll(Mat frameCont)
|
||||||
{
|
{
|
||||||
if (frameCont.Empty())
|
if (frameCont.Empty())
|
||||||
{
|
{
|
||||||
|
|||||||
@ -1,7 +1,5 @@
|
|||||||
using System.Collections.Concurrent;
|
using System.Collections.Concurrent;
|
||||||
using System.Diagnostics;
|
using System.Diagnostics;
|
||||||
using System.Globalization;
|
|
||||||
using System.Text;
|
|
||||||
using Spectre.Console;
|
using Spectre.Console;
|
||||||
using splitter;
|
using splitter;
|
||||||
|
|
||||||
@ -78,7 +76,7 @@ static partial class Program
|
|||||||
if (!Directory.Exists(job.OutputFolder))
|
if (!Directory.Exists(job.OutputFolder))
|
||||||
Directory.CreateDirectory(job.OutputFolder);
|
Directory.CreateDirectory(job.OutputFolder);
|
||||||
|
|
||||||
var info = ProbeVideo.Probe(job.InputFile);
|
var info = await ProbeVideo.Probe(job);
|
||||||
if (info.Duration <= 0)
|
if (info.Duration <= 0)
|
||||||
{
|
{
|
||||||
LogError($"{baseName}: Could not read duration.");
|
LogError($"{baseName}: Could not read duration.");
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user