refactor(video2x): split the CLI into multiple files; improve CLI args validation

Signed-off-by: k4yt3x <i@k4yt3x.com>
This commit is contained in:
k4yt3x 2024-12-04 00:00:00 +00:00
parent adf3baf4be
commit b05a6ec500
No known key found for this signature in database
30 changed files with 923 additions and 831 deletions

View File

@ -19,6 +19,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Improve error handling and error messages.
- Improve the CLI help message structure and clarity.
- Improve CLI argument validation.
### Removed

View File

@ -332,7 +332,6 @@ if(BUILD_VIDEO2X_CLI)
${ALL_INCLUDE_DIRS}
${CMAKE_CURRENT_BINARY_DIR}
${PROJECT_SOURCE_DIR}/include
${PROJECT_SOURCE_DIR}/include/libvideo2x
${PROJECT_SOURCE_DIR}/tools/video2x/include
)

View File

@ -1,5 +1,6 @@
<p align="center">
<img src="https://github.com/user-attachments/assets/5cd63373-e806-474f-94ec-6e04963bf90f"/>
<img src="https://github.com/user-attachments/assets/5cd63373-e806-474f-94ec-6e04963bf90f"
alt="Video2X: A machine learning-based video super resolution and frame interpolation framework."/>
</br>
<img src="https://img.shields.io/github/v/release/k4yt3x/video2x?style=flat-square"/>
<img src="https://img.shields.io/github/downloads/k4yt3x/video2x/total?style=flat-square"/>
@ -69,15 +70,7 @@ Join our Telegram discussion group to ask any questions you have about Video2X,
Comprehensive documentation for Video2X is available at [https://docs.video2x.org/](https://docs.video2x.org/). It offers detailed instructions on how to [build](https://docs.video2x.org/building/index.html), [install](https://docs.video2x.org/installing/index.html), [use](https://docs.video2x.org/running/index.html), and [develop](https://docs.video2x.org/developing/index.html) with this program.
## 🔰 Introduction
Video2X is a machine-learning-powered framework for video upscaling and frame interpolation, built around three main components:
- [libvideo2x](https://github.com/k4yt3x/video2x/blob/master/src/libvideo2x.cpp): The core C++ library providing upscaling and frame interpolation capabilities.
- [Video2X CLI](https://github.com/k4yt3x/video2x/blob/master/src/video2x.c): A command-line interface that utilizes `libvideo2x` for video processing.
- [Video2X Qt6](https://github.com/k4yt3x/video2x-qt6): A Qt6-based graphical interface that utilizes `libvideo2x` for video processing.
### Video Demos
## 📽️ Video Demos (Outdated)
![Spirited Away Demo](https://user-images.githubusercontent.com/21986859/49412428-65083280-f73a-11e8-8237-bb34158a545e.png)\
_Upscale demo: Spirited Away's movie trailer_

View File

@ -1,5 +1,4 @@
#ifndef AVUTILS_H
#define AVUTILS_H
#pragma once
extern "C" {
#include <libavformat/avformat.h>
@ -18,5 +17,3 @@ void av_bufferref_deleter(AVBufferRef *bufferref);
void av_frame_deleter(AVFrame *frame);
void av_packet_deleter(AVPacket *packet);
#endif // AVUTILS_H

View File

@ -1,5 +1,4 @@
#ifndef CONVERSIONS_H
#define CONVERSIONS_H
#pragma once
extern "C" {
#include <libavutil/frame.h>
@ -16,5 +15,3 @@ ncnn::Mat avframe_to_ncnn_mat(AVFrame *frame);
// Convert ncnn::Mat to AVFrame
AVFrame *ncnn_mat_to_avframe(const ncnn::Mat &mat, AVPixelFormat pix_fmt);
#endif // CONVERSIONS_H

View File

@ -1,5 +1,4 @@
#ifndef DECODER_H
#define DECODER_H
#pragma once
#include <filesystem>
@ -27,5 +26,3 @@ class Decoder {
AVCodecContext *dec_ctx_;
int in_vstream_idx_;
};
#endif // DECODER_H

View File

@ -1,5 +1,4 @@
#ifndef ENCODER_H
#define ENCODER_H
#pragma once
#include <cstdint>
#include <filesystem>
@ -16,34 +15,32 @@ extern "C" {
// Encoder configurations
struct EncoderConfig {
// Non-AVCodecContext options
AVCodecID codec;
bool copy_streams;
AVCodecID codec = AV_CODEC_ID_NONE;
bool copy_streams = true;
// Basic video options
int width;
int height;
int frm_rate_mul;
AVPixelFormat pix_fmt;
int frm_rate_mul = 0;
AVPixelFormat pix_fmt = AV_PIX_FMT_NONE;
// Rate control and compression
int64_t bit_rate;
int rc_buffer_size;
int rc_min_rate;
int rc_max_rate;
int qmin;
int qmax;
int64_t bit_rate = 0;
int rc_buffer_size = 0;
int rc_min_rate = 0;
int rc_max_rate = 0;
int qmin = -1;
int qmax = -1;
// GOP and frame structure
int gop_size;
int max_b_frames;
int keyint_min;
int refs;
int gop_size = -1;
int max_b_frames = -1;
int keyint_min = -1;
int refs = -1;
// Performance and threading
int thread_count;
int thread_count = 0;
// Latency and buffering
int delay;
int delay = -1;
// Extra AVOptions
std::vector<std::pair<StringType, StringType>> extra_opts;
@ -60,6 +57,8 @@ class Encoder {
AVFormatContext *ifmt_ctx,
AVCodecContext *dec_ctx,
EncoderConfig &enc_cfg,
int width,
int height,
int in_vstream_idx
);
@ -77,5 +76,3 @@ class Encoder {
int out_vstream_idx_;
int *stream_map_;
};
#endif // ENCODER_H

View File

@ -1,5 +1,4 @@
#ifndef FILTER_LIBPLACEBO_H
#define FILTER_LIBPLACEBO_H
#pragma once
#include <filesystem>
@ -57,5 +56,3 @@ class FilterLibplacebo : public Filter {
AVRational in_time_base_;
AVRational out_time_base_;
};
#endif // FILTER_LIBPLACEBO_H

View File

@ -1,5 +1,4 @@
#ifndef FILTER_REALESRGAN_H
#define FILTER_REALESRGAN_H
#pragma once
extern "C" {
#include <libavcodec/avcodec.h>
@ -50,5 +49,3 @@ class FilterRealesrgan : public Filter {
AVRational out_time_base_;
AVPixelFormat out_pix_fmt_;
};
#endif // FILTER_REALESRGAN_H

View File

@ -1,5 +1,4 @@
#ifndef FSUTILS_H
#define FSUTILS_H
#pragma once
#include <filesystem>
#include <string>
@ -29,5 +28,3 @@ std::string wstring_to_u8string(const StringType &wstr);
StringType path_to_string_type(const std::filesystem::path &path);
StringType to_string_type(int value);
#endif // FSUTILS_H

View File

@ -1,5 +1,4 @@
#ifndef INTERPOLATOR_RIFE_H
#define INTERPOLATOR_RIFE_H
#pragma once
extern "C" {
#include <libavcodec/avcodec.h>
@ -55,5 +54,3 @@ class InterpolatorRIFE : public Interpolator {
AVRational out_time_base_;
AVPixelFormat out_pix_fmt_;
};
#endif // INTERPOLATOR_RIFE_H

View File

@ -1,5 +1,4 @@
#ifndef PLACEBO_H
#define PLACEBO_H
#pragma once
#include <filesystem>
@ -18,5 +17,3 @@ int init_libplacebo(
uint32_t vk_device_index,
const std::filesystem::path &shader_path
);
#endif // PLACEBO_H

View File

@ -1,5 +1,4 @@
#ifndef LIBVIDEO2X_H
#define LIBVIDEO2X_H
#pragma once
#include <atomic>
#include <cstdint>
@ -26,19 +25,15 @@ extern "C" {
#define LIBVIDEO2X_API
#endif
struct HardwareConfig {
uint32_t vk_device_index;
AVHWDeviceType hw_device_type;
};
class LIBVIDEO2X_API VideoProcessor {
public:
VideoProcessor(
const HardwareConfig hw_cfg,
const ProcessorConfig proc_cfg,
EncoderConfig enc_cfg,
Video2xLogLevel = Video2xLogLevel::Info,
bool benchmark = false
const EncoderConfig enc_cfg,
const uint32_t vk_device_index = 0,
const AVHWDeviceType hw_device_type = AV_HWDEVICE_TYPE_NONE,
const Video2xLogLevel = Video2xLogLevel::Info,
const bool benchmark = false
);
virtual ~VideoProcessor() = default;
@ -85,9 +80,10 @@ class LIBVIDEO2X_API VideoProcessor {
AVFrame *proc_frame
);
HardwareConfig hw_cfg_;
ProcessorConfig proc_cfg_;
EncoderConfig enc_cfg_;
uint32_t vk_device_index_ = 0;
AVHWDeviceType hw_device_type_ = AV_HWDEVICE_TYPE_NONE;
bool benchmark_ = false;
std::atomic<int64_t> frame_index_ = 0;
@ -96,5 +92,3 @@ class LIBVIDEO2X_API VideoProcessor {
std::atomic<bool> aborted_ = false;
std::atomic<bool> completed_ = false;
};
#endif // LIBVIDEO2X_H

View File

@ -1,5 +1,4 @@
#ifndef LOGGING_H
#define LOGGING_H
#pragma once
#include <optional>
@ -19,5 +18,3 @@ enum class Video2xLogLevel {
void set_log_level(Video2xLogLevel log_level);
std::optional<Video2xLogLevel> find_log_level_by_name(const StringType &log_level_name);
#endif // LOGGING_H

View File

@ -1,5 +1,4 @@
#ifndef PROCESSOR_H
#define PROCESSOR_H
#pragma once
#include <variant>
#include <vector>
@ -18,6 +17,7 @@ enum class ProcessingMode {
};
enum class ProcessorType {
None,
Libplacebo,
RealESRGAN,
RIFE,
@ -28,26 +28,26 @@ struct LibplaceboConfig {
};
struct RealESRGANConfig {
bool tta_mode;
bool tta_mode = false;
StringType model_name;
};
struct RIFEConfig {
bool tta_mode;
bool tta_temporal_mode;
bool uhd_mode;
int num_threads;
bool tta_mode = false;
bool tta_temporal_mode = false;
bool uhd_mode = false;
int num_threads = 0;
StringType model_name;
};
// Unified filter configuration
struct ProcessorConfig {
ProcessorType processor_type;
int width;
int height;
int scaling_factor;
int frm_rate_mul;
float scn_det_thresh;
ProcessorType processor_type = ProcessorType::None;
int width = 0;
int height = 0;
int scaling_factor = 0;
int frm_rate_mul = 0;
float scn_det_thresh = 0.0f;
std::variant<LibplaceboConfig, RealESRGANConfig, RIFEConfig> config;
};
@ -81,5 +81,3 @@ class Interpolator : public Processor {
virtual int
interpolate(AVFrame *prev_frame, AVFrame *in_frame, AVFrame **out_frame, float time_step) = 0;
};
#endif // PROCESSOR_H

View File

@ -1,5 +1,4 @@
#ifndef PROCESSOR_FACTORY_H
#define PROCESSOR_FACTORY_H
#pragma once
#include <functional>
#include <memory>
@ -32,5 +31,3 @@ class ProcessorFactory {
// Static initializer for default processors
static void init_default_processors(ProcessorFactory &factory);
};
#endif // PROCESSOR_FACTORY_H

View File

@ -1,6 +1,3 @@
#ifndef VERSION_H
#define VERSION_H
#pragma once
#define LIBVIDEO2X_VERSION_STRING "@PROJECT_VERSION@"
#endif // VERSION_H

View File

@ -33,6 +33,8 @@ int Encoder::init(
AVFormatContext *ifmt_ctx,
AVCodecContext *dec_ctx,
EncoderConfig &enc_cfg,
int width,
int height,
int in_vstream_idx
) {
int ret;
@ -84,8 +86,8 @@ int Encoder::init(
enc_ctx_->sample_aspect_ratio = dec_ctx->sample_aspect_ratio;
// Set basic video options
enc_ctx_->width = enc_cfg.width;
enc_ctx_->height = enc_cfg.height;
enc_ctx_->width = width;
enc_ctx_->height = height;
// Set rate control and compression options
enc_ctx_->bit_rate = enc_cfg.bit_rate;

View File

@ -1,7 +1,7 @@
#include "fsutils.h"
#if _WIN32
#include <windows.h>
#include <Windows.h>
#include <cwchar>
#else
#include <unistd.h>

View File

@ -14,13 +14,18 @@ extern "C" {
#include "processor_factory.h"
VideoProcessor::VideoProcessor(
const HardwareConfig hw_cfg,
const ProcessorConfig proc_cfg,
const EncoderConfig enc_cfg,
Video2xLogLevel log_level,
bool benchmark
const uint32_t vk_device_index,
const AVHWDeviceType hw_device_type,
const Video2xLogLevel log_level,
const bool benchmark
)
: hw_cfg_(hw_cfg), proc_cfg_(proc_cfg), enc_cfg_(enc_cfg), benchmark_(benchmark) {
: proc_cfg_(proc_cfg),
enc_cfg_(enc_cfg),
vk_device_index_(vk_device_index),
hw_device_type_(hw_device_type),
benchmark_(benchmark) {
set_log_level(log_level);
}
@ -37,9 +42,9 @@ int VideoProcessor::process(
);
// Initialize hardware device context
if (hw_cfg_.hw_device_type != AV_HWDEVICE_TYPE_NONE) {
if (hw_device_type_ != AV_HWDEVICE_TYPE_NONE) {
AVBufferRef *tmp_hw_ctx = nullptr;
ret = av_hwdevice_ctx_create(&tmp_hw_ctx, hw_cfg_.hw_device_type, NULL, NULL, 0);
ret = av_hwdevice_ctx_create(&tmp_hw_ctx, hw_device_type_, NULL, NULL, 0);
if (ret < 0) {
av_strerror(ret, errbuf, sizeof(errbuf));
spdlog::critical("Error initializing hardware device context: {}", errbuf);
@ -50,7 +55,7 @@ int VideoProcessor::process(
// Initialize input decoder
Decoder decoder;
ret = decoder.init(hw_cfg_.hw_device_type, hw_ctx.get(), in_fname);
ret = decoder.init(hw_device_type_, hw_ctx.get(), in_fname);
if (ret < 0) {
av_strerror(ret, errbuf, sizeof(errbuf));
spdlog::critical("Failed to initialize decoder: {}", errbuf);
@ -63,7 +68,7 @@ int VideoProcessor::process(
// Create and initialize the appropriate filter
std::unique_ptr<Processor> processor(
ProcessorFactory::instance().create_processor(proc_cfg_, hw_cfg_.vk_device_index)
ProcessorFactory::instance().create_processor(proc_cfg_, vk_device_index_)
);
if (processor == nullptr) {
spdlog::critical("Failed to create filter instance");
@ -80,16 +85,21 @@ int VideoProcessor::process(
return -1;
}
// Update encoder output dimensions
enc_cfg_.width = output_width;
enc_cfg_.height = output_height;
// Update encoder frame rate multiplier
enc_cfg_.frm_rate_mul = proc_cfg_.frm_rate_mul;
// Initialize the encoder
Encoder encoder;
ret = encoder.init(hw_ctx.get(), out_fname, ifmt_ctx, dec_ctx, enc_cfg_, in_vstream_idx);
ret = encoder.init(
hw_ctx.get(),
out_fname,
ifmt_ctx,
dec_ctx,
enc_cfg_,
output_width,
output_height,
in_vstream_idx
);
if (ret < 0) {
av_strerror(ret, errbuf, sizeof(errbuf));
spdlog::critical("Failed to initialize encoder: {}", errbuf);

View File

@ -0,0 +1,29 @@
#pragma once
#include <libvideo2x/libvideo2x.h>
#include <filesystem>
// Structure to hold parsed arguments
struct Arguments {
Video2xLogLevel log_level = Video2xLogLevel::Info;
bool no_progress = false;
// General options
std::filesystem::path in_fname;
std::filesystem::path out_fname;
uint32_t vk_device_index = 0;
AVHWDeviceType hw_device_type = AV_HWDEVICE_TYPE_NONE;
bool benchmark = false;
};
[[nodiscard]] int parse_args(
int argc,
#ifdef _WIN32
wchar_t *argv[],
#else
char *argv[],
#endif
Arguments &arguments,
ProcessorConfig &proc_cfg,
EncoderConfig &enc_cfg
);

View File

@ -0,0 +1,15 @@
#pragma once
#include <atomic>
#include <optional>
#include <libvideo2x/libvideo2x.h>
#include <spdlog/spdlog.h>
extern std::atomic<bool> newline_required;
void set_spdlog_level(Video2xLogLevel log_level);
std::optional<Video2xLogLevel> find_log_level_by_name(const StringType &log_level_name);
void newline_safe_ffmpeg_log_callback(void *ptr, int level, const char *fmt, va_list vl);

View File

@ -1,5 +1,4 @@
#ifndef TIMER_H
#define TIMER_H
#pragma once
#include <atomic>
#include <chrono>
@ -30,5 +29,3 @@ class Timer {
void update_elapsed_time();
};
#endif // TIMER_H

View File

@ -0,0 +1,68 @@
#pragma once
#include <libvideo2x/fsutils.h>
#include <boost/program_options.hpp>
namespace po = boost::program_options;
template <typename T>
void validate_positive(const T &value, const std::string &option_name) {
if (value < 0) {
throw po::validation_error(
po::validation_error::invalid_option_value,
option_name,
option_name + " must be positive"
);
}
}
template <typename T>
void validate_min(const T &value, const std::string &option_name, const T &min) {
if (value < min) {
throw po::validation_error(
po::validation_error::invalid_option_value,
option_name,
option_name + " must be at least " + std::to_string(min)
);
}
}
template <typename T>
void validate_max(const T &value, const std::string &option_name, const T &max) {
if (value > max) {
throw po::validation_error(
po::validation_error::invalid_option_value,
option_name,
option_name + " must be at most " + std::to_string(max)
);
}
}
template <typename T>
void validate_range(const T &value, const std::string &option_name, const T &min, const T &max) {
if (value < min || value > max) {
throw po::validation_error(
po::validation_error::invalid_option_value,
option_name,
option_name + " must be in the range [" + std::to_string(min) + ", " +
std::to_string(max) + "]"
);
}
}
template <typename T>
void validate_greater_equal_one(const T &value, const std::string &option_name) {
if (value < 1) {
throw po::validation_error(
po::validation_error::invalid_option_value,
option_name,
option_name + " must be greater than or equal to 1"
);
}
}
void validate_anime4k_shader_name(const StringType &shader_name);
void validate_realesrgan_model_name(const StringType &model_name);
void validate_rife_model_name(const StringType &model_name);

View File

@ -0,0 +1,7 @@
#pragma once
#include <vulkan/vulkan.h>
int list_vulkan_devices();
int get_vulkan_device_prop(uint32_t vk_device_index, VkPhysicalDeviceProperties *dev_props);

View File

@ -0,0 +1,420 @@
#include "argparse.h"
#include <iostream>
#if _WIN32
#include <Windows.h>
#include <cwchar>
#endif
#include <libvideo2x/version.h>
#include <spdlog/spdlog.h>
#include <vulkan_utils.h>
#include <boost/program_options.hpp>
#include "logging.h"
#include "validators.h"
#ifdef _WIN32
#define BOOST_PROGRAM_OPTIONS_WCHAR_T
#define PO_STR_VALUE po::wvalue
#else
#define PO_STR_VALUE po::value
#endif
namespace po = boost::program_options;
#ifdef _WIN32
std::string wstring_to_u8string(const std::wstring &wstr) {
if (wstr.empty()) {
return std::string();
}
int size_needed = WideCharToMultiByte(
CP_UTF8, 0, wstr.data(), static_cast<int>(wstr.size()), nullptr, 0, nullptr, nullptr
);
std::string converted_str(size_needed, 0);
WideCharToMultiByte(
CP_UTF8,
0,
wstr.data(),
static_cast<int>(wstr.size()),
&converted_str[0],
size_needed,
nullptr,
nullptr
);
return converted_str;
}
#else
std::string wstring_to_u8string(const std::string &str) {
return str;
}
#endif
int parse_args(
int argc,
#ifdef _WIN32
wchar_t *argv[],
#else
char *argv[],
#endif
Arguments &arguments,
ProcessorConfig &proc_cfg,
EncoderConfig &enc_cfg
) {
try {
// clang-format off
po::options_description all_opts("General options");
all_opts.add_options()
("help", "Display this help page")
("version,V", "Print program version and exit")
("log-level", PO_STR_VALUE<StringType>()->default_value(STR("info"), "info"),
"Set verbosity level (trace, debug, info, warn, error, critical, none)")
("no-progress", po::bool_switch(&arguments.no_progress),
"Do not display the progress bar")
("list-devices,l", "List the available Vulkan devices (GPUs)")
// General Processing Options
("input,i", PO_STR_VALUE<StringType>(), "Input video file path")
("output,o", PO_STR_VALUE<StringType>(), "Output video file path")
("processor,p", PO_STR_VALUE<StringType>(),
"Processor to use (libplacebo, realesrgan, rife)")
("hwaccel,a", PO_STR_VALUE<StringType>()->default_value(STR("none"), "none"),
"Hardware acceleration method (decoding)")
("device,d", po::value<uint32_t>(&arguments.vk_device_index)->default_value(0),
"Vulkan device index (GPU ID)")
("benchmark,b", po::bool_switch(&arguments.benchmark),
"Discard processed frames and calculate average FPS; "
"useful for detecting encoder bottlenecks")
;
po::options_description encoder_opts("Encoder options");
encoder_opts.add_options()
("codec,c", PO_STR_VALUE<StringType>()->default_value(STR("libx264"), "libx264"),
"Output codec")
("no-copy-streams", "Do not copy audio and subtitle streams")
("pix-fmt", PO_STR_VALUE<StringType>(), "Output pixel format")
("bit-rate", po::value<int64_t>(&enc_cfg.bit_rate)->default_value(0),
"Bitrate in bits per second")
("rc-buffer-size", po::value<int>(&enc_cfg.rc_buffer_size)->default_value(0),
"Rate control buffer size in bits")
("rc-min-rate", po::value<int>(&enc_cfg.rc_min_rate)->default_value(0),
"Minimum rate control")
("rc-max-rate", po::value<int>(&enc_cfg.rc_max_rate)->default_value(0),
"Maximum rate control")
("qmin", po::value<int>(&enc_cfg.qmin)->default_value(-1), "Minimum quantizer")
("qmax", po::value<int>(&enc_cfg.qmax)->default_value(-1), "Maximum quantizer")
("gop-size", po::value<int>(&enc_cfg.gop_size)->default_value(-1),
"Group of pictures structure size")
("max-b-frames", po::value<int>(&enc_cfg.max_b_frames)->default_value(-1),
"Maximum number of B-frames")
("keyint-min", po::value<int>(&enc_cfg.keyint_min)->default_value(-1),
"Minimum interval between keyframes")
("refs", po::value<int>(&enc_cfg.refs)->default_value(-1),
"Number of reference frames")
("thread-count", po::value<int>(&enc_cfg.thread_count)->default_value(0),
"Number of threads for encoding")
("delay", po::value<int>(&enc_cfg.delay)->default_value(0),
"Delay in milliseconds for encoder")
// Extra encoder options (key-value pairs)
("extra-encoder-option,e", PO_STR_VALUE<std::vector<StringType>>()->multitoken(),
"Additional AVOption(s) for the encoder (format: -e key=value)")
;
po::options_description upscale_opts("Upscaling options");
upscale_opts.add_options()
("width,w", po::value<int>(&proc_cfg.width)
->notifier([](int v) { validate_greater_equal_one(v, "width"); }), "Output width")
("height,h", po::value<int>(&proc_cfg.height)
->notifier([](int v) { validate_greater_equal_one(v, "height"); }), "Output height")
("scaling-factor,s", po::value<int>(&proc_cfg.scaling_factor)
->notifier([](int v) { validate_min(v, "scaling-factor", 2); }), "Scaling factor")
;
po::options_description interp_opts("Frame interpolation options");
interp_opts.add_options()
("frame-rate-mul,m", po::value<int>(&proc_cfg.frm_rate_mul)
->notifier([](int v) { validate_min(v, "frame-rate-mul", 2); }),
"Frame rate multiplier")
("scene-thresh,t", po::value<float>(&proc_cfg.scn_det_thresh)->default_value(10.0f)
->notifier([](float v) { validate_range<float>(v, "scene-thresh", 0.0, 100.0); }),
"Scene detection threshold")
;
po::options_description libplacebo_opts("libplacebo options");
libplacebo_opts.add_options()
("libplacebo-shader", PO_STR_VALUE<StringType>()
->default_value(STR("anime4k-v4-a"), "anime4k-v4-a")
->notifier(validate_anime4k_shader_name),
"Name/path of the GLSL shader file to use (built-in: anime4k-v4-a, anime4k-v4-a+a, "
"anime4k-v4-b, anime4k-v4-b+b, anime4k-v4-c, anime4k-v4-c+a, anime4k-v4.1-gan)")
;
po::options_description realesrgan_opts("RealESRGAN options");
realesrgan_opts.add_options()
("realesrgan-model", PO_STR_VALUE<StringType>()
->default_value(STR("realesr-animevideov3"), "realesr-animevideov3")
->notifier(validate_realesrgan_model_name),
"Name of the RealESRGAN model to use (realesr-animevideov3, realesrgan-plus-anime, "
"realesrgan-plus)")
;
po::options_description rife_opts("RIFE options");
rife_opts.add_options()
("rife-model", PO_STR_VALUE<StringType>()->default_value(STR("rife-v4.6"), "rife-v4.6")
->notifier(validate_rife_model_name),
"Name of the RIFE model to use (rife, rife-HD, rife-UHD, rife-anime, rife-v2, "
"rife-v2.3, rife-v2.4, rife-v3.0, rife-v3.1, rife-v4, rife-v4.6)")
("rife-uhd", "Enable Ultra HD mode")
;
// clang-format on
// Combine all options
all_opts.add(encoder_opts)
.add(upscale_opts)
.add(interp_opts)
.add(libplacebo_opts)
.add(realesrgan_opts)
.add(rife_opts);
po::variables_map vm;
#ifdef _WIN32
po::store(po::wcommand_line_parser(argc, argv).options(all_opts).run(), vm);
#else
po::store(po::command_line_parser(argc, argv).options(all_opts).run(), vm);
#endif
po::notify(vm);
if (vm.count("help") || argc == 1) {
std::cout
<< all_opts << std::endl
<< "Examples:" << std::endl
<< " Upscale an anime video to 4K using libplacebo:" << std::endl
<< " video2x -i input.mp4 -o output.mp4 -w 3840 -h 2160 \\" << std::endl
<< " -p libplacebo --libplacebo-shader anime4k-v4-a+a" << std::endl
<< std::endl
<< " Upscale a film by 4x using RealESRGAN with custom encoder options:"
<< std::endl
<< " video2x -i input.mkv -o output.mkv -s 4 \\" << std::endl
<< " -p realesrgan --realesrgan-model realesrgan-plus \\" << std::endl
<< " -c libx264rgb -e crf=17 -e preset=veryslow -e tune=film" << std::endl
<< std::endl
<< " Frame-interpolate a video using RIFE to 4x the original frame rate:"
<< std::endl
<< " video2x -i input.mp4 -o output.mp4 -m 4 -p rife --rife-model rife-v4.6"
<< std::endl;
return 1;
}
if (vm.count("version")) {
std::cout << "Video2X version " << LIBVIDEO2X_VERSION_STRING << std::endl;
return 1;
}
if (vm.count("list-devices")) {
return list_vulkan_devices();
}
if (vm.count("log-level")) {
std::optional<Video2xLogLevel> log_level =
find_log_level_by_name(vm["log-level"].as<StringType>());
if (!log_level.has_value()) {
spdlog::critical("Invalid log level specified.");
return -1;
}
arguments.log_level = log_level.value();
}
set_spdlog_level(arguments.log_level);
// Print program banner
spdlog::info("Video2X version {}", LIBVIDEO2X_VERSION_STRING);
// spdlog::info("Copyright (C) 2018-2024 K4YT3X and contributors.");
// spdlog::info("Licensed under GNU AGPL version 3.");
// Assign positional arguments
if (vm.count("input")) {
arguments.in_fname = std::filesystem::path(vm["input"].as<StringType>());
spdlog::info("Processing file: {}", arguments.in_fname.u8string());
} else {
spdlog::critical("Input file path is required.");
return -1;
}
if (vm.count("output")) {
arguments.out_fname = std::filesystem::path(vm["output"].as<StringType>());
} else if (!arguments.benchmark) {
spdlog::critical("Output file path is required.");
return -1;
}
// Parse processor type
if (vm.count("processor")) {
StringType processor_type_str = vm["processor"].as<StringType>();
if (processor_type_str == STR("libplacebo")) {
proc_cfg.processor_type = ProcessorType::Libplacebo;
} else if (processor_type_str == STR("realesrgan")) {
proc_cfg.processor_type = ProcessorType::RealESRGAN;
} else if (processor_type_str == STR("rife")) {
proc_cfg.processor_type = ProcessorType::RIFE;
} else {
spdlog::critical(
"Invalid processor specified. Must be 'libplacebo', 'realesrgan', or 'rife'."
);
return -1;
}
} else {
spdlog::critical("Processor type is required.");
return -1;
}
// Parse hardware acceleration method
arguments.hw_device_type = AV_HWDEVICE_TYPE_NONE;
if (vm.count("hwaccel")) {
StringType hwaccel_str = vm["hwaccel"].as<StringType>();
if (hwaccel_str != STR("none")) {
arguments.hw_device_type =
av_hwdevice_find_type_by_name(wstring_to_u8string(hwaccel_str).c_str());
if (arguments.hw_device_type == AV_HWDEVICE_TYPE_NONE) {
spdlog::critical(
"Invalid hardware device type '{}'.", wstring_to_u8string(hwaccel_str)
);
return -1;
}
}
}
// Parse codec to AVCodec
enc_cfg.codec = AV_CODEC_ID_H264;
if (vm.count("codec")) {
StringType codec_str = vm["codec"].as<StringType>();
const AVCodec *codec =
avcodec_find_encoder_by_name(wstring_to_u8string(codec_str).c_str());
if (codec == nullptr) {
spdlog::critical("Codec '{}' not found.", wstring_to_u8string(codec_str));
return -1;
}
enc_cfg.codec = codec->id;
}
// Parse copy streams flag
enc_cfg.copy_streams = vm.count("no-copy-streams") == 0;
// Parse pixel format to AVPixelFormat
enc_cfg.pix_fmt = AV_PIX_FMT_NONE;
if (vm.count("pix-fmt")) {
StringType pix_fmt_str = vm["pix-fmt"].as<StringType>();
if (!pix_fmt_str.empty()) {
enc_cfg.pix_fmt = av_get_pix_fmt(wstring_to_u8string(pix_fmt_str).c_str());
if (enc_cfg.pix_fmt == AV_PIX_FMT_NONE) {
spdlog::critical(
"Invalid pixel format '{}'.", wstring_to_u8string(pix_fmt_str)
);
return -1;
}
}
}
// Parse extra AVOptions
if (vm.count("extra-encoder-option")) {
for (const auto &opt : vm["extra-encoder-option"].as<std::vector<StringType>>()) {
size_t eq_pos = opt.find('=');
if (eq_pos != StringType::npos) {
StringType key = opt.substr(0, eq_pos);
StringType value = opt.substr(eq_pos + 1);
enc_cfg.extra_opts.push_back(std::make_pair(key, value));
} else {
spdlog::critical("Invalid extra AVOption format: {}", wstring_to_u8string(opt));
return -1;
}
}
}
// Parse processor-specific configurations
switch (proc_cfg.processor_type) {
case ProcessorType::Libplacebo: {
if (!vm.count("libplacebo-shader")) {
spdlog::critical("Shader name/path must be set for libplacebo.");
return -1;
}
if (proc_cfg.width <= 0 || proc_cfg.height <= 0) {
spdlog::critical("Output width and height must be set for libplacebo.");
return -1;
}
proc_cfg.processor_type = ProcessorType::Libplacebo;
LibplaceboConfig libplacebo_config;
libplacebo_config.shader_path = vm["libplacebo-shader"].as<StringType>();
proc_cfg.config = libplacebo_config;
break;
}
case ProcessorType::RealESRGAN: {
if (!vm.count("realesrgan-model")) {
spdlog::critical("RealESRGAN model name must be set for RealESRGAN.");
return -1;
}
if (proc_cfg.scaling_factor != 2 && proc_cfg.scaling_factor != 3 &&
proc_cfg.scaling_factor != 4) {
spdlog::critical("Scaling factor must be set to 2, 3, or 4 for RealESRGAN.");
return -1;
}
proc_cfg.processor_type = ProcessorType::RealESRGAN;
RealESRGANConfig realesrgan_config;
realesrgan_config.tta_mode = false;
realesrgan_config.model_name = vm["realesrgan-model"].as<StringType>();
proc_cfg.config = realesrgan_config;
break;
}
case ProcessorType::RIFE: {
if (!vm.count("rife-model")) {
spdlog::critical("RIFE model name must be set for RIFE.");
return -1;
}
if (proc_cfg.frm_rate_mul < 2) {
spdlog::critical("Frame rate multiplier must be set to at least 2 for RIFE.");
return -1;
}
proc_cfg.processor_type = ProcessorType::RIFE;
RIFEConfig rife_config;
rife_config.tta_mode = false;
rife_config.tta_temporal_mode = false;
rife_config.uhd_mode = vm.count("rife-uhd") > 0;
rife_config.num_threads = 0;
rife_config.model_name = vm["rife-model"].as<StringType>();
proc_cfg.config = rife_config;
break;
}
default:
spdlog::critical("Invalid processor type.");
return -1;
}
} catch (const po::error &e) {
spdlog::critical("Error parsing arguments: {}", e.what());
return -1;
} catch (const std::exception &e) {
spdlog::critical("Unexpected exception caught while parsing options: {}", e.what());
return -1;
}
// Validate Vulkan device ID
VkPhysicalDeviceProperties dev_props;
int get_vulkan_dev_ret = get_vulkan_device_prop(arguments.vk_device_index, &dev_props);
if (get_vulkan_dev_ret != 0) {
if (get_vulkan_dev_ret == -2) {
spdlog::critical("Invalid Vulkan device ID specified.");
return -1;
} else {
spdlog::warn("Unable to validate Vulkan device ID.");
return -1;
}
} else {
// Warn if the selected device is a CPU
spdlog::info("Using Vulkan device: {} ({:#x})", dev_props.deviceName, dev_props.deviceID);
if (dev_props.deviceType == VK_PHYSICAL_DEVICE_TYPE_CPU) {
spdlog::warn("The selected Vulkan device is a CPU device.");
}
}
return 0;
}

View File

@ -0,0 +1,77 @@
#include "logging.h"
#include <algorithm>
#include <unordered_map>
extern "C" {
#include <libavutil/log.h>
}
std::atomic<bool> newline_required = false;
void set_spdlog_level(Video2xLogLevel log_level) {
switch (log_level) {
case Video2xLogLevel::Trace:
spdlog::set_level(spdlog::level::trace);
break;
case Video2xLogLevel::Debug:
spdlog::set_level(spdlog::level::debug);
break;
case Video2xLogLevel::Info:
spdlog::set_level(spdlog::level::info);
break;
case Video2xLogLevel::Warning:
spdlog::set_level(spdlog::level::warn);
break;
case Video2xLogLevel::Error:
spdlog::set_level(spdlog::level::err);
break;
case Video2xLogLevel::Critical:
spdlog::set_level(spdlog::level::critical);
break;
case Video2xLogLevel::Off:
spdlog::set_level(spdlog::level::off);
break;
default:
spdlog::set_level(spdlog::level::info);
break;
}
}
std::optional<Video2xLogLevel> find_log_level_by_name(const StringType &log_level_name) {
// Static map to store the mapping
static const std::unordered_map<StringType, Video2xLogLevel> log_level_map = {
{STR("trace"), Video2xLogLevel::Trace},
{STR("debug"), Video2xLogLevel::Debug},
{STR("info"), Video2xLogLevel::Info},
{STR("warning"), Video2xLogLevel::Warning},
{STR("warn"), Video2xLogLevel::Warning},
{STR("error"), Video2xLogLevel::Error},
{STR("critical"), Video2xLogLevel::Critical},
{STR("off"), Video2xLogLevel::Off},
{STR("none"), Video2xLogLevel::Off}
};
// Normalize the input to lowercase
StringType normalized_name = log_level_name;
std::transform(
normalized_name.begin(), normalized_name.end(), normalized_name.begin(), ::tolower
);
// Lookup the log level in the map
auto it = log_level_map.find(normalized_name);
if (it != log_level_map.end()) {
return it->second;
}
return std::nullopt;
}
// Newline-safe log callback for FFmpeg
void newline_safe_ffmpeg_log_callback(void *ptr, int level, const char *fmt, va_list vl) {
if (level <= av_log_get_level() && newline_required.load()) {
putchar('\n');
newline_required.store(false);
}
av_log_default_callback(ptr, level, fmt, vl);
}

View File

@ -0,0 +1,61 @@
#include "validators.h"
#include <unordered_set>
void validate_anime4k_shader_name(const StringType &shader_name) {
static const std::unordered_set<StringType> valid_anime4k_shaders = {
STR("anime4k-v4-a"),
STR("anime4k-v4-a+a"),
STR("anime4k-v4-b"),
STR("anime4k-v4-b+b"),
STR("anime4k-v4-c"),
STR("anime4k-v4-c+a"),
STR("anime4k-v4.1-gan")
};
if (valid_anime4k_shaders.count(shader_name) == 0 && !std::filesystem::exists(shader_name)) {
throw po::validation_error(
po::validation_error::invalid_option_value,
"libplacebo-shader",
"libplacebo-shader must be one of: anime4k-v4-a, anime4k-v4-a+a, anime4k-v4-b, "
"anime4k-v4-b+b, anime4k-v4-c, anime4k-v4-c+a, anime4k-v4.1-gan, or a valid file path"
);
}
}
void validate_realesrgan_model_name(const StringType &model_name) {
static const std::unordered_set<StringType> valid_realesrgan_models = {
STR("realesrgan-plus"), STR("realesrgan-plus-anime"), STR("realesr-animevideov3")
};
if (valid_realesrgan_models.count(model_name) == 0) {
throw po::validation_error(
po::validation_error::invalid_option_value,
"realesrgan-model",
"realesrgan-model must be one of: realesr-animevideov3, realesrgan-plus-anime, "
"realesrgan-plus"
);
}
}
void validate_rife_model_name(const StringType &model_name) {
static const std::unordered_set<StringType> valid_realesrgan_models = {
STR("rife"),
STR("rife-HD"),
STR("rife-UHD"),
STR("rife-anime"),
STR("rife-v2"),
STR("rife-v2.3"),
STR("rife-v2.4"),
STR("rife-v3.0"),
STR("rife-v3.1"),
STR("rife-v4"),
STR("rife-v4.6"),
};
if (valid_realesrgan_models.count(model_name) == 0) {
throw po::validation_error(
po::validation_error::invalid_option_value,
"rife-model",
"RIFE model must be one of: rife, rife-HD, rife-UHD, rife-anime, rife-v2, rife-v2.3, "
"rife-v2.4, rife-v3.0, rife-v3.1, rife-v4, rife-v4.6"
);
}
}

View File

@ -1,17 +1,4 @@
#include <algorithm>
#include <atomic>
#include <chrono>
#include <csignal>
#include <cstdarg>
#include <cstdint>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <filesystem>
#include <iostream>
#include <string>
#include <thread>
#include <unordered_set>
#ifdef _WIN32
#include <Windows.h>
@ -22,82 +9,12 @@
#include <unistd.h>
#endif
extern "C" {
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavutil/hwcontext.h>
#include <libavutil/pixdesc.h>
#include <libavutil/pixfmt.h>
}
#include <libvideo2x/libvideo2x.h>
#include <libvideo2x/version.h>
#include <spdlog/spdlog.h>
#include <vulkan/vulkan.h>
#ifdef _WIN32
#define BOOST_PROGRAM_OPTIONS_WCHAR_T
#define PO_STR_VALUE po::wvalue
#else
#define PO_STR_VALUE po::value
#endif
#include <boost/program_options.hpp>
namespace po = boost::program_options;
#include "argparse.h"
#include "logging.h"
#include "timer.h"
// Indicate if a newline needs to be printed before the next output
std::atomic<bool> newline_required = false;
// Structure to hold parsed arguments
struct Arguments {
Video2xLogLevel log_level = Video2xLogLevel::Info;
bool no_progress = false;
// General options
std::filesystem::path in_fname;
std::filesystem::path out_fname;
StringType processor_type;
StringType hwaccel = STR("none");
uint32_t vk_device_index = 0;
bool no_copy_streams = false;
bool benchmark = false;
// Encoder options
StringType codec = STR("libx264");
StringType pix_fmt;
int64_t bit_rate = 0;
int rc_buffer_size = 0;
int rc_min_rate = 0;
int rc_max_rate = 0;
int qmin = -1;
int qmax = -1;
int gop_size = -1;
int max_b_frames = -1;
int keyint_min = -1;
int refs = -1;
int thread_count = 0;
int delay = 0;
std::vector<std::pair<StringType, StringType>> extra_encoder_opts;
// General processing options
int width = 0;
int height = 0;
int scaling_factor = 0;
int frm_rate_mul = 2;
float scn_det_thresh = 0.0f;
// libplacebo options
StringType libplacebo_shader_path;
// RealESRGAN options
StringType realesrgan_model_name = STR("realesr-animevideov3");
// RIFE options
StringType rife_model_name = STR("rife-v4.6");
bool rife_uhd_mode = false;
};
// Set UNIX terminal input to non-blocking mode
#ifndef _WIN32
void set_nonblocking_input(bool enable) {
@ -115,241 +32,11 @@ void set_nonblocking_input(bool enable) {
}
#endif
#ifdef _WIN32
std::string wstring_to_u8string(const std::wstring &wstr) {
if (wstr.empty()) {
return std::string();
}
int size_needed = WideCharToMultiByte(
CP_UTF8, 0, wstr.data(), static_cast<int>(wstr.size()), nullptr, 0, nullptr, nullptr
);
std::string converted_str(size_needed, 0);
WideCharToMultiByte(
CP_UTF8,
0,
wstr.data(),
static_cast<int>(wstr.size()),
&converted_str[0],
size_needed,
nullptr,
nullptr
);
return converted_str;
}
#else
std::string wstring_to_u8string(const std::string &str) {
return str;
}
#endif
void set_spdlog_level(Video2xLogLevel log_level) {
switch (log_level) {
case Video2xLogLevel::Trace:
spdlog::set_level(spdlog::level::trace);
break;
case Video2xLogLevel::Debug:
spdlog::set_level(spdlog::level::debug);
break;
case Video2xLogLevel::Info:
spdlog::set_level(spdlog::level::info);
break;
case Video2xLogLevel::Warning:
spdlog::set_level(spdlog::level::warn);
break;
case Video2xLogLevel::Error:
spdlog::set_level(spdlog::level::err);
break;
case Video2xLogLevel::Critical:
spdlog::set_level(spdlog::level::critical);
break;
case Video2xLogLevel::Off:
spdlog::set_level(spdlog::level::off);
break;
default:
spdlog::set_level(spdlog::level::info);
break;
}
}
std::optional<Video2xLogLevel> find_log_level_by_name(const StringType &log_level_name) {
// Static map to store the mapping
static const std::unordered_map<StringType, Video2xLogLevel> log_level_map = {
{STR("trace"), Video2xLogLevel::Trace},
{STR("debug"), Video2xLogLevel::Debug},
{STR("info"), Video2xLogLevel::Info},
{STR("warning"), Video2xLogLevel::Warning},
{STR("warn"), Video2xLogLevel::Warning},
{STR("error"), Video2xLogLevel::Error},
{STR("critical"), Video2xLogLevel::Critical},
{STR("off"), Video2xLogLevel::Off},
{STR("none"), Video2xLogLevel::Off}
};
// Normalize the input to lowercase
StringType normalized_name = log_level_name;
std::transform(
normalized_name.begin(), normalized_name.end(), normalized_name.begin(), ::tolower
);
// Lookup the log level in the map
auto it = log_level_map.find(normalized_name);
if (it != log_level_map.end()) {
return it->second;
}
return std::nullopt;
}
// Newline-safe log callback for FFmpeg
void newline_safe_ffmpeg_log_callback(void *ptr, int level, const char *fmt, va_list vl) {
if (level <= av_log_get_level() && newline_required) {
putchar('\n');
newline_required = false;
}
av_log_default_callback(ptr, level, fmt, vl);
}
bool is_valid_realesrgan_model(const StringType &model) {
static const std::unordered_set<StringType> valid_realesrgan_models = {
STR("realesrgan-plus"), STR("realesrgan-plus-anime"), STR("realesr-animevideov3")
};
return valid_realesrgan_models.count(model) > 0;
}
bool is_valid_rife_model(const StringType &model) {
static const std::unordered_set<StringType> valid_realesrgan_models = {
STR("rife"),
STR("rife-HD"),
STR("rife-UHD"),
STR("rife-anime"),
STR("rife-v2"),
STR("rife-v2.3"),
STR("rife-v2.4"),
STR("rife-v3.0"),
STR("rife-v3.1"),
STR("rife-v4"),
STR("rife-v4.6"),
};
return valid_realesrgan_models.count(model) > 0;
}
int enumerate_vulkan_devices(VkInstance *instance, std::vector<VkPhysicalDevice> &devices) {
// Create a Vulkan instance
VkInstanceCreateInfo create_info{};
create_info.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;
VkResult result = vkCreateInstance(&create_info, nullptr, instance);
if (result != VK_SUCCESS) {
spdlog::error("Failed to create Vulkan instance.");
return -1;
}
// Enumerate physical devices
uint32_t device_count = 0;
result = vkEnumeratePhysicalDevices(*instance, &device_count, nullptr);
if (result != VK_SUCCESS || device_count == 0) {
spdlog::error("Failed to enumerate Vulkan physical devices or no devices available.");
vkDestroyInstance(*instance, nullptr);
return -1;
}
devices.resize(device_count);
result = vkEnumeratePhysicalDevices(*instance, &device_count, devices.data());
if (result != VK_SUCCESS) {
spdlog::error("Failed to retrieve Vulkan physical devices.");
vkDestroyInstance(*instance, nullptr);
return -1;
}
return 0;
}
int list_vulkan_devices() {
VkInstance instance;
std::vector<VkPhysicalDevice> physical_devices;
int result = enumerate_vulkan_devices(&instance, physical_devices);
if (result != 0) {
return result;
}
uint32_t device_count = static_cast<uint32_t>(physical_devices.size());
// List Vulkan device information
for (uint32_t i = 0; i < device_count; i++) {
VkPhysicalDevice device = physical_devices[i];
VkPhysicalDeviceProperties device_properties;
vkGetPhysicalDeviceProperties(device, &device_properties);
// Print Vulkan device ID and name
std::cout << i << ". " << device_properties.deviceName << std::endl;
std::cout << "\tType: ";
switch (device_properties.deviceType) {
case VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU:
std::cout << "Integrated GPU";
break;
case VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU:
std::cout << "Discrete GPU";
break;
case VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU:
std::cout << "Virtual GPU";
break;
case VK_PHYSICAL_DEVICE_TYPE_CPU:
std::cout << "CPU";
break;
default:
std::cout << "Unknown";
break;
}
std::cout << std::endl;
// Print Vulkan API version
std::cout << "\tVulkan API Version: " << VK_VERSION_MAJOR(device_properties.apiVersion)
<< "." << VK_VERSION_MINOR(device_properties.apiVersion) << "."
<< VK_VERSION_PATCH(device_properties.apiVersion) << std::endl;
// Print driver version
std::cout << "\tDriver Version: " << VK_VERSION_MAJOR(device_properties.driverVersion)
<< "." << VK_VERSION_MINOR(device_properties.driverVersion) << "."
<< VK_VERSION_PATCH(device_properties.driverVersion) << std::endl;
// Print device ID
std::cout << "\tDevice ID: " << std::hex << std::showbase << device_properties.deviceID
<< std::dec << std::endl;
}
// Clean up Vulkan instance
vkDestroyInstance(instance, nullptr);
return 0;
}
int get_vulkan_device_prop(uint32_t vk_device_index, VkPhysicalDeviceProperties *dev_props) {
if (dev_props == nullptr) {
spdlog::error("Invalid device properties pointer.");
return -1;
}
VkInstance instance;
std::vector<VkPhysicalDevice> devices;
int result = enumerate_vulkan_devices(&instance, devices);
if (result != 0) {
return result;
}
uint32_t device_count = static_cast<uint32_t>(devices.size());
// Check if the Vulkan device ID is valid
if (vk_device_index >= device_count) {
vkDestroyInstance(instance, nullptr);
return -2;
}
// Get device properties for the specified Vulkan device ID
vkGetPhysicalDeviceProperties(devices[vk_device_index], dev_props);
// Clean up Vulkan instance
vkDestroyInstance(instance, nullptr);
return 0;
std::tuple<int, int, int> calculate_time_components(int time_elapsed) {
int hours_elapsed = time_elapsed / 3600;
int minutes_elapsed = (time_elapsed % 3600) / 60;
int seconds_elapsed = time_elapsed % 60;
return {hours_elapsed, minutes_elapsed, seconds_elapsed};
}
#ifdef _WIN32
@ -366,385 +53,33 @@ int wmain(int argc, wchar_t *argv[]) {
#else
int main(int argc, char **argv) {
#endif
// Initialize arguments structure
// Initialize arguments structures
Arguments arguments;
// Parse command line arguments using Boost.Program_options
try {
// clang-format off
po::options_description all_opts("General options");
all_opts.add_options()
("help", "Display this help page")
("version,V", "Print program version and exit")
("log-level", PO_STR_VALUE<StringType>()->default_value(STR("info"), "info"),
"Set verbosity level (trace, debug, info, warn, error, critical, none)")
("no-progress", po::bool_switch(&arguments.no_progress),
"Do not display the progress bar")
("list-devices,l", "List the available Vulkan devices (GPUs)")
// General Processing Options
("input,i", PO_STR_VALUE<StringType>(), "Input video file path")
("output,o", PO_STR_VALUE<StringType>(), "Output video file path")
("processor,p", PO_STR_VALUE<StringType>(&arguments.processor_type),
"Processor to use (libplacebo, realesrgan, rife)")
("hwaccel,a", PO_STR_VALUE<StringType>(&arguments.hwaccel)->default_value(STR("none"),
"none"), "Hardware acceleration method (decoding)")
("device,d", po::value<uint32_t>(&arguments.vk_device_index)->default_value(0),
"Vulkan device index (GPU ID)")
("benchmark,b", po::bool_switch(&arguments.benchmark),
"Discard processed frames and calculate average FPS; "
"useful for detecting encoder bottlenecks")
;
po::options_description encoder_opts("Encoder options");
encoder_opts.add_options()
("codec,c", PO_STR_VALUE<StringType>(&arguments.codec)->default_value(STR("libx264"),
"libx264"), "Output codec")
("no-copy-streams", po::bool_switch(&arguments.no_copy_streams),
"Do not copy audio and subtitle streams")
("pix-fmt", PO_STR_VALUE<StringType>(&arguments.pix_fmt), "Output pixel format")
("bit-rate", po::value<int64_t>(&arguments.bit_rate)->default_value(0),
"Bitrate in bits per second")
("rc-buffer-size", po::value<int>(&arguments.rc_buffer_size)->default_value(0),
"Rate control buffer size in bits")
("rc-min-rate", po::value<int>(&arguments.rc_min_rate)->default_value(0),
"Minimum rate control")
("rc-max-rate", po::value<int>(&arguments.rc_max_rate)->default_value(0),
"Maximum rate control")
("qmin", po::value<int>(&arguments.qmin)->default_value(-1), "Minimum quantizer")
("qmax", po::value<int>(&arguments.qmax)->default_value(-1), "Maximum quantizer")
("gop-size", po::value<int>(&arguments.gop_size)->default_value(-1),
"Group of pictures structure size")
("max-b-frames", po::value<int>(&arguments.max_b_frames)->default_value(-1),
"Maximum number of B-frames")
("keyint-min", po::value<int>(&arguments.keyint_min)->default_value(-1),
"Minimum interval between keyframes")
("refs", po::value<int>(&arguments.refs)->default_value(-1),
"Number of reference frames")
("thread-count", po::value<int>(&arguments.thread_count)->default_value(0),
"Number of threads for encoding")
("delay", po::value<int>(&arguments.delay)->default_value(0),
"Delay in milliseconds for encoder")
// Extra encoder options (key-value pairs)
("extra-encoder-option,e", PO_STR_VALUE<std::vector<StringType>>()->multitoken(),
"Additional AVOption(s) for the encoder (format: -e key=value)")
;
po::options_description upscale_opts("Upscaling options");
upscale_opts.add_options()
("width,w", po::value<int>(&arguments.width), "Output width")
("height,h", po::value<int>(&arguments.height), "Output height")
("scaling-factor,s", po::value<int>(&arguments.scaling_factor), "Scaling factor")
;
po::options_description interp_opts("Frame interpolation options");
interp_opts.add_options()
("frame-rate-mul,m",
po::value<int>(&arguments.frm_rate_mul)->default_value(2),
"Frame rate multiplier")
("scene-thresh,t", po::value<float>(&arguments.scn_det_thresh)->default_value(10.0f),
"Scene detection threshold")
;
po::options_description libplacebo_opts("libplacebo options");
libplacebo_opts.add_options()
("libplacebo-shader", PO_STR_VALUE<StringType>(&arguments.libplacebo_shader_path),
"Name/path of the GLSL shader file to use (built-in: anime4k-v4-a, anime4k-v4-a+a, "
"anime4k-v4-b, anime4k-v4-b+b, anime4k-v4-c, anime4k-v4-c+a, anime4k-v4.1-gan)")
;
po::options_description realesrgan_opts("RealESRGAN options");
realesrgan_opts.add_options()
("realesrgan-model", PO_STR_VALUE<StringType>(&arguments.realesrgan_model_name),
"Name of the RealESRGAN model to use (realesr-animevideov3, realesrgan-plus-anime, "
"realesrgan-plus)")
;
po::options_description rife_opts("RIFE options");
rife_opts.add_options()
("rife-model", PO_STR_VALUE<StringType>(&arguments.rife_model_name),
"Name of the RIFE model to use (rife, rife-HD, rife-UHD, rife-anime, rife-v2, "
"rife-v2.3, rife-v2.4, rife-v3.0, rife-v3.1, rife-v4, rife-v4.6)")
("rife-uhd", po::bool_switch(&arguments.rife_uhd_mode),
"Enable Ultra HD mode")
;
// clang-format on
// Combine all options
all_opts.add(encoder_opts)
.add(upscale_opts)
.add(interp_opts)
.add(libplacebo_opts)
.add(realesrgan_opts)
.add(rife_opts);
// Positional arguments
po::positional_options_description p;
p.add("input", 1).add("output", 1).add("processor", 1);
po::variables_map vm;
#ifdef _WIN32
po::store(po::wcommand_line_parser(argc, argv).options(all_opts).positional(p).run(), vm);
#else
po::store(po::command_line_parser(argc, argv).options(all_opts).positional(p).run(), vm);
#endif
po::notify(vm);
if (vm.count("help") || argc == 1) {
std::cout
<< all_opts << std::endl
<< "Examples:" << std::endl
<< " Upscale an anime video to 4K using libplacebo:" << std::endl
<< " video2x -i input.mp4 -o output.mp4 -w 3840 -h 2160 \\" << std::endl
<< " -p libplacebo --libplacebo-shader anime4k-v4-a+a" << std::endl
<< std::endl
<< " Upscale a film by 4x using RealESRGAN with custom encoder options:"
<< std::endl
<< " video2x -i input.mkv -o output.mkv -s 4 \\" << std::endl
<< " -p realesrgan --realesrgan-model realesrgan-plus \\" << std::endl
<< " -c libx264rgb -e crf=17 -e preset=veryslow -e tune=film" << std::endl
<< std::endl
<< " Frame-interpolate a video using RIFE to 4x the original frame rate:"
<< std::endl
<< " video2x -i input.mp4 -o output.mp4 -m 4 -p rife --rife-model rife-v4.6"
<< std::endl;
return 0;
}
if (vm.count("version")) {
std::cout << "Video2X version " << LIBVIDEO2X_VERSION_STRING << std::endl;
return 0;
}
if (vm.count("list-devices")) {
return list_vulkan_devices();
}
if (vm.count("log-level")) {
std::optional<Video2xLogLevel> log_level =
find_log_level_by_name(vm["log-level"].as<StringType>());
if (!log_level.has_value()) {
spdlog::critical("Invalid log level specified.");
return 1;
}
arguments.log_level = log_level.value();
}
set_spdlog_level(arguments.log_level);
// Print program banner
spdlog::info("Video2X version {}", LIBVIDEO2X_VERSION_STRING);
// spdlog::info("Copyright (C) 2018-2024 K4YT3X and contributors.");
// spdlog::info("Licensed under GNU AGPL version 3.");
// Assign positional arguments
if (vm.count("input")) {
arguments.in_fname = std::filesystem::path(vm["input"].as<StringType>());
spdlog::info("Processing file: {}", arguments.in_fname.u8string());
} else {
spdlog::critical("Input file path is required.");
return 1;
}
if (vm.count("output")) {
arguments.out_fname = std::filesystem::path(vm["output"].as<StringType>());
} else if (!arguments.benchmark) {
spdlog::critical("Output file path is required.");
return 1;
}
if (!vm.count("processor")) {
spdlog::critical("Processor type is required (libplacebo, realesrgan, or rife).");
return 1;
}
// Parse extra AVOptions
if (vm.count("extra-encoder-option")) {
for (const auto &opt : vm["extra-encoder-option"].as<std::vector<StringType>>()) {
size_t eq_pos = opt.find('=');
if (eq_pos != StringType::npos) {
StringType key = opt.substr(0, eq_pos);
StringType value = opt.substr(eq_pos + 1);
arguments.extra_encoder_opts.push_back(std::make_pair(key, value));
} else {
spdlog::critical("Invalid extra AVOption format: {}", wstring_to_u8string(opt));
return 1;
}
}
}
if (vm.count("libplacebo-model")) {
if (!is_valid_realesrgan_model(vm["realesrgan-model"].as<StringType>())) {
spdlog::critical("Invalid model specified.");
return 1;
}
}
if (vm.count("rife-model")) {
if (!is_valid_rife_model(vm["rife-model"].as<StringType>())) {
spdlog::critical("Invalid RIFE model specified.");
return 1;
}
}
} catch (const po::error &e) {
spdlog::critical("Error parsing options: {}", e.what());
return 1;
} catch (const std::exception &e) {
spdlog::critical("Unexpected exception caught while parsing options: {}", e.what());
return 1;
}
// Additional validations
if (arguments.width < 0 || arguments.height < 0) {
spdlog::critical("Invalid output resolution specified.");
return 1;
}
if (arguments.scaling_factor < 0) {
spdlog::critical("Invalid scaling factor specified.");
return 1;
}
if (arguments.frm_rate_mul <= 1) {
spdlog::critical("Invalid frame rate multiplier specified.");
return 1;
}
if (arguments.scn_det_thresh < 0.0f || arguments.scn_det_thresh > 100.0f) {
spdlog::critical("Invalid scene detection threshold specified.");
return 1;
}
if (arguments.processor_type == STR("libplacebo")) {
if (arguments.libplacebo_shader_path.empty() || arguments.width == 0 ||
arguments.height == 0) {
spdlog::critical("Shader name/path, width, and height are required for libplacebo.");
return 1;
}
} else if (arguments.processor_type == STR("realesrgan")) {
if (arguments.scaling_factor != 2 && arguments.scaling_factor != 3 &&
arguments.scaling_factor != 4) {
spdlog::critical("Scaling factor must be 2, 3, or 4 for RealESRGAN.");
return 1;
}
} else if (arguments.processor_type != STR("rife")) {
spdlog::critical(
"Invalid processor specified. Must be 'libplacebo', 'realesrgan', or 'rife'."
);
return 1;
}
// Validate GPU ID
VkPhysicalDeviceProperties dev_props;
int get_vulkan_dev_ret = get_vulkan_device_prop(arguments.vk_device_index, &dev_props);
if (get_vulkan_dev_ret != 0) {
if (get_vulkan_dev_ret == -2) {
spdlog::critical("Invalid Vulkan device ID specified.");
return 1;
} else {
spdlog::warn("Unable to validate Vulkan device ID.");
return 1;
}
} else {
// Warn if the selected device is a CPU
spdlog::info("Using Vulkan device: {} ({:#x})", dev_props.deviceName, dev_props.deviceID);
if (dev_props.deviceType == VK_PHYSICAL_DEVICE_TYPE_CPU) {
spdlog::warn("The selected Vulkan device is a CPU device.");
}
}
// Validate bitrate
if (arguments.bit_rate < 0) {
spdlog::critical("Invalid bitrate specified.");
return 1;
}
// Parse codec to AVCodec
const AVCodec *codec =
avcodec_find_encoder_by_name(wstring_to_u8string(arguments.codec).c_str());
if (!codec) {
spdlog::critical("Codec '{}' not found.", wstring_to_u8string(arguments.codec));
return 1;
}
// Parse pixel format to AVPixelFormat
AVPixelFormat pix_fmt = AV_PIX_FMT_NONE;
if (!arguments.pix_fmt.empty()) {
pix_fmt = av_get_pix_fmt(wstring_to_u8string(arguments.pix_fmt).c_str());
if (pix_fmt == AV_PIX_FMT_NONE) {
spdlog::critical("Invalid pixel format '{}'.", wstring_to_u8string(arguments.pix_fmt));
return 1;
}
}
// Setup filter configurations based on the parsed arguments
ProcessorConfig proc_cfg;
proc_cfg.width = arguments.width;
proc_cfg.height = arguments.height;
proc_cfg.scaling_factor = arguments.scaling_factor;
proc_cfg.frm_rate_mul = arguments.frm_rate_mul;
proc_cfg.scn_det_thresh = arguments.scn_det_thresh;
if (arguments.processor_type == STR("libplacebo")) {
proc_cfg.processor_type = ProcessorType::Libplacebo;
LibplaceboConfig libplacebo_config;
libplacebo_config.shader_path = arguments.libplacebo_shader_path;
proc_cfg.config = libplacebo_config;
} else if (arguments.processor_type == STR("realesrgan")) {
proc_cfg.processor_type = ProcessorType::RealESRGAN;
RealESRGANConfig realesrgan_config;
realesrgan_config.tta_mode = false;
realesrgan_config.model_name = arguments.realesrgan_model_name;
proc_cfg.config = realesrgan_config;
} else if (arguments.processor_type == STR("rife")) {
proc_cfg.processor_type = ProcessorType::RIFE;
RIFEConfig rife_config;
rife_config.tta_mode = false;
rife_config.tta_temporal_mode = false;
rife_config.uhd_mode = arguments.rife_uhd_mode;
rife_config.num_threads = 0;
rife_config.model_name = arguments.rife_model_name;
proc_cfg.config = rife_config;
}
// Setup encoder configuration
EncoderConfig enc_cfg;
enc_cfg.codec = codec->id;
enc_cfg.copy_streams = !arguments.no_copy_streams;
enc_cfg.width = 0;
enc_cfg.height = 0;
enc_cfg.pix_fmt = pix_fmt;
enc_cfg.bit_rate = arguments.bit_rate;
enc_cfg.rc_buffer_size = arguments.rc_buffer_size;
enc_cfg.rc_max_rate = arguments.rc_max_rate;
enc_cfg.rc_min_rate = arguments.rc_min_rate;
enc_cfg.qmin = arguments.qmin;
enc_cfg.qmax = arguments.qmax;
enc_cfg.gop_size = arguments.gop_size;
enc_cfg.max_b_frames = arguments.max_b_frames;
enc_cfg.keyint_min = arguments.keyint_min;
enc_cfg.refs = arguments.refs;
enc_cfg.thread_count = arguments.thread_count;
enc_cfg.delay = arguments.delay;
enc_cfg.extra_opts = arguments.extra_encoder_opts;
// Setup hardware configuration
HardwareConfig hw_cfg;
hw_cfg.hw_device_type = AV_HWDEVICE_TYPE_NONE;
hw_cfg.vk_device_index = arguments.vk_device_index;
// Parse command line arguments
int parse_ret = parse_args(argc, argv, arguments, proc_cfg, enc_cfg);
// Parse hardware acceleration method
if (arguments.hwaccel != STR("none")) {
hw_cfg.hw_device_type =
av_hwdevice_find_type_by_name(wstring_to_u8string(arguments.hwaccel).c_str());
if (hw_cfg.hw_device_type == AV_HWDEVICE_TYPE_NONE) {
spdlog::critical(
"Invalid hardware device type '{}'.", wstring_to_u8string(arguments.hwaccel)
);
return 1;
// Return if parsing failed
if (parse_ret < 0) {
return parse_ret;
}
// Return if help message or version info was displayed
if (parse_ret > 0) {
return 0;
}
// Create video processor object
VideoProcessor video_processor =
VideoProcessor(hw_cfg, proc_cfg, enc_cfg, arguments.log_level, arguments.benchmark);
VideoProcessor video_processor = VideoProcessor(
proc_cfg,
enc_cfg,
arguments.vk_device_index,
arguments.hw_device_type,
arguments.log_level,
arguments.benchmark
);
// Register a newline-safe log callback for FFmpeg
av_log_set_callback(newline_safe_ffmpeg_log_callback);
@ -803,17 +138,17 @@ int main(int argc, char **argv) {
std::cout.flush();
timer.resume();
}
newline_required = true;
newline_required.store(true);
}
} else if (ch == 'q' || ch == 'Q') {
// Abort processing
if (newline_required) {
if (newline_required.load()) {
putchar('\n');
}
spdlog::warn("Aborting gracefully; press Ctrl+C to terminate forcefully.");
{
video_processor.abort();
newline_required = false;
newline_required.store(false);
}
break;
}
@ -834,9 +169,8 @@ int main(int argc, char **argv) {
int time_elapsed = static_cast<int>(timer.get_elapsed_time() / 1000);
// Calculate hours, minutes, and seconds elapsed
int hours_elapsed = time_elapsed / 3600;
int minutes_elapsed = (time_elapsed % 3600) / 60;
int seconds_elapsed = time_elapsed % 60;
auto [hours_elapsed, minutes_elapsed, seconds_elapsed] =
calculate_time_components(time_elapsed);
// Calculate estimated time remaining
int64_t frames_remaining = total_frames - processed_frames;
@ -846,9 +180,8 @@ int main(int argc, char **argv) {
time_remaining = std::max<int>(time_remaining, 0);
// Calculate hours, minutes, and seconds remaining
int hours_remaining = time_remaining / 3600;
int minutes_remaining = (time_remaining % 3600) / 60;
int seconds_remaining = time_remaining % 60;
auto [hours_remaining, minutes_remaining, seconds_remaining] =
calculate_time_components(time_remaining);
// Print the progress bar
std::cout << "\r\033[Kframe=" << processed_frames << "/" << total_frames << " ("
@ -861,7 +194,7 @@ int main(int argc, char **argv) {
<< ":" << std::setw(2) << std::setfill('0') << minutes_remaining << ":"
<< std::setw(2) << std::setfill('0') << seconds_remaining;
std::cout.flush();
newline_required = true;
newline_required.store(true);
}
}
@ -878,7 +211,7 @@ int main(int argc, char **argv) {
processing_thread.join();
// Print a newline if progress bar was displayed
if (newline_required) {
if (newline_required.load()) {
std::cout << '\n';
}
@ -896,9 +229,8 @@ int main(int argc, char **argv) {
// Calculate statistics
int64_t processed_frames = video_processor.get_processed_frames();
int time_elapsed = static_cast<int>(timer.get_elapsed_time() / 1000);
int hours_elapsed = time_elapsed / 3600;
int minutes_elapsed = (time_elapsed % 3600) / 60;
int seconds_elapsed = time_elapsed % 60;
auto [hours_elapsed, minutes_elapsed, seconds_elapsed] =
calculate_time_components(time_elapsed);
float average_speed_fps = static_cast<float>(processed_frames) /
(time_elapsed > 0 ? static_cast<float>(time_elapsed) : 1);

View File

@ -0,0 +1,125 @@
#include "vulkan_utils.h"
#include <iostream>
#include <vector>
#include <spdlog/spdlog.h>
static int enumerate_vulkan_devices(VkInstance *instance, std::vector<VkPhysicalDevice> &devices) {
// Create a Vulkan instance
VkInstanceCreateInfo create_info{};
create_info.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;
VkResult result = vkCreateInstance(&create_info, nullptr, instance);
if (result != VK_SUCCESS) {
spdlog::error("Failed to create Vulkan instance.");
return -1;
}
// Enumerate physical devices
uint32_t device_count = 0;
result = vkEnumeratePhysicalDevices(*instance, &device_count, nullptr);
if (result != VK_SUCCESS || device_count == 0) {
spdlog::error("Failed to enumerate Vulkan physical devices or no devices available.");
vkDestroyInstance(*instance, nullptr);
return -1;
}
devices.resize(device_count);
result = vkEnumeratePhysicalDevices(*instance, &device_count, devices.data());
if (result != VK_SUCCESS) {
spdlog::error("Failed to retrieve Vulkan physical devices.");
vkDestroyInstance(*instance, nullptr);
return -1;
}
return 0;
}
int list_vulkan_devices() {
VkInstance instance;
std::vector<VkPhysicalDevice> physical_devices;
int result = enumerate_vulkan_devices(&instance, physical_devices);
if (result != 0) {
return result;
}
uint32_t device_count = static_cast<uint32_t>(physical_devices.size());
// List Vulkan device information
for (uint32_t i = 0; i < device_count; i++) {
VkPhysicalDevice device = physical_devices[i];
VkPhysicalDeviceProperties device_properties;
vkGetPhysicalDeviceProperties(device, &device_properties);
// Print Vulkan device ID and name
std::cout << i << ". " << device_properties.deviceName << std::endl;
std::cout << "\tType: ";
switch (device_properties.deviceType) {
case VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU:
std::cout << "Integrated GPU";
break;
case VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU:
std::cout << "Discrete GPU";
break;
case VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU:
std::cout << "Virtual GPU";
break;
case VK_PHYSICAL_DEVICE_TYPE_CPU:
std::cout << "CPU";
break;
default:
std::cout << "Unknown";
break;
}
std::cout << std::endl;
// Print Vulkan API version
std::cout << "\tVulkan API Version: " << VK_VERSION_MAJOR(device_properties.apiVersion)
<< "." << VK_VERSION_MINOR(device_properties.apiVersion) << "."
<< VK_VERSION_PATCH(device_properties.apiVersion) << std::endl;
// Print driver version
std::cout << "\tDriver Version: " << VK_VERSION_MAJOR(device_properties.driverVersion)
<< "." << VK_VERSION_MINOR(device_properties.driverVersion) << "."
<< VK_VERSION_PATCH(device_properties.driverVersion) << std::endl;
// Print device ID
std::cout << "\tDevice ID: " << std::hex << std::showbase << device_properties.deviceID
<< std::dec << std::endl;
}
// Clean up Vulkan instance
vkDestroyInstance(instance, nullptr);
return 0;
}
int get_vulkan_device_prop(uint32_t vk_device_index, VkPhysicalDeviceProperties *dev_props) {
if (dev_props == nullptr) {
spdlog::error("Invalid device properties pointer.");
return -1;
}
VkInstance instance;
std::vector<VkPhysicalDevice> devices;
int result = enumerate_vulkan_devices(&instance, devices);
if (result != 0) {
return result;
}
uint32_t device_count = static_cast<uint32_t>(devices.size());
// Check if the Vulkan device ID is valid
if (vk_device_index >= device_count) {
vkDestroyInstance(instance, nullptr);
return -2;
}
// Get device properties for the specified Vulkan device ID
vkGetPhysicalDeviceProperties(devices[vk_device_index], dev_props);
// Clean up Vulkan instance
vkDestroyInstance(instance, nullptr);
return 0;
}