From f8dcad3aef75f1ce1892264b273c38c068b5fc67 Mon Sep 17 00:00:00 2001 From: K4YT3X Date: Mon, 2 Dec 2024 07:24:30 +0000 Subject: [PATCH] chore(libvideo2x)!: replace the C API with C++ API (#1245) * chore(libvideo2x)!: replace the C API with C++ API * fix: convert wide string to u8 for av_opt_set * style: removed unnecessary enum and struct specifiers Signed-off-by: k4yt3x --- CHANGELOG.md | 4 + CMakeLists.txt | 3 +- include/libvideo2x/avutils.h | 2 +- include/libvideo2x/char_defs.h | 22 -- include/libvideo2x/decoder.h | 5 +- include/libvideo2x/encoder.h | 4 +- include/libvideo2x/filter_libplacebo.h | 4 +- include/libvideo2x/filter_realesrgan.h | 5 +- include/libvideo2x/frames_processor.h | 4 +- include/libvideo2x/fsutils.h | 16 +- include/libvideo2x/interpolator_rife.h | 5 +- include/libvideo2x/libvideo2x.h | 110 +++----- include/libvideo2x/logging.h | 23 ++ include/libvideo2x/processor.h | 6 +- include/libvideo2x/processor_factory.h | 4 +- src/avutils.cpp | 11 +- src/decoder.cpp | 6 +- src/encoder.cpp | 60 +++-- src/filter_libplacebo.cpp | 7 +- src/filter_realesrgan.cpp | 2 +- src/frames_processor.cpp | 28 +- src/fsutils.cpp | 27 ++ src/interpolator_rife.cpp | 2 +- src/libvideo2x.cpp | 81 ++---- src/logging.cpp | 44 ++++ src/processor_factory.cpp | 51 ++-- tools/video2x/src/video2x.cpp | 341 +++++++++++-------------- 27 files changed, 420 insertions(+), 457 deletions(-) delete mode 100644 include/libvideo2x/char_defs.h create mode 100644 include/libvideo2x/logging.h create mode 100644 src/logging.cpp diff --git a/CHANGELOG.md b/CHANGELOG.md index 398f7da..749390a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Improve error handling and error messages. - Improve the CLI help message structure and clarity. +### Removed + +- The C API for easier maintenance and development. + ### Fixed - Timestamp errors processing frames with PTS equal to 0 (#1222). diff --git a/CMakeLists.txt b/CMakeLists.txt index 0ed758d..2de6609 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.10) -project(video2x VERSION 6.1.1 LANGUAGES CXX) +project(video2x VERSION 6.2.0 LANGUAGES CXX) if(POLICY CMP0167) cmake_policy(SET CMP0167 NEW) @@ -389,7 +389,6 @@ endif() # Install the header files install(FILES ${PROJECT_SOURCE_DIR}/include/libvideo2x/libvideo2x.h - ${PROJECT_SOURCE_DIR}/include/libvideo2x/char_defs.h ${CMAKE_CURRENT_BINARY_DIR}/libvideo2x/version.h DESTINATION ${INSTALL_INCLUDE_DESTINATION} ) diff --git a/include/libvideo2x/avutils.h b/include/libvideo2x/avutils.h index 68c04d9..598a4c7 100644 --- a/include/libvideo2x/avutils.h +++ b/include/libvideo2x/avutils.h @@ -11,7 +11,7 @@ AVRational get_video_frame_rate(AVFormatContext *ifmt_ctx, int in_vstream_idx); int64_t get_video_frame_count(AVFormatContext *ifmt_ctx, int in_vstream_idx); -enum AVPixelFormat +AVPixelFormat get_encoder_default_pix_fmt(const AVCodec *encoder, AVPixelFormat target_pix_fmt); float get_frame_diff(AVFrame *frame1, AVFrame *frame2); diff --git a/include/libvideo2x/char_defs.h b/include/libvideo2x/char_defs.h deleted file mode 100644 index 0f4d76a..0000000 --- a/include/libvideo2x/char_defs.h +++ /dev/null @@ -1,22 +0,0 @@ -#ifndef CHAR_DEFS_H -#define CHAR_DEFS_H - -#ifdef _WIN32 -typedef wchar_t CharType; -#define STR(x) L##x -#else -typedef char CharType; -#define STR(x) x -#endif - -#ifdef __cplusplus -#include - -#ifdef _WIN32 -typedef std::wstring StringType; -#else -typedef std::string StringType; -#endif - -#endif // __cplusplus -#endif // CHAR_DEFS_H diff --git a/include/libvideo2x/decoder.h b/include/libvideo2x/decoder.h index 55e6e1c..b316ba0 100644 --- a/include/libvideo2x/decoder.h +++ b/include/libvideo2x/decoder.h @@ -20,9 +20,8 @@ class Decoder { int get_video_stream_index() const; private: - static enum AVPixelFormat hw_pix_fmt_; - static enum AVPixelFormat - get_hw_format(AVCodecContext *ctx, const enum AVPixelFormat *pix_fmts); + static AVPixelFormat hw_pix_fmt_; + static AVPixelFormat get_hw_format(AVCodecContext *ctx, const AVPixelFormat *pix_fmts); AVFormatContext *fmt_ctx_; AVCodecContext *dec_ctx_; diff --git a/include/libvideo2x/encoder.h b/include/libvideo2x/encoder.h index 9d1de30..683136e 100644 --- a/include/libvideo2x/encoder.h +++ b/include/libvideo2x/encoder.h @@ -21,8 +21,8 @@ class Encoder { const std::filesystem::path &out_fpath, AVFormatContext *ifmt_ctx, AVCodecContext *dec_ctx, - EncoderConfig *encoder_config, - const ProcessorConfig *processor_config, + EncoderConfig &enc_cfg, + const ProcessorConfig &proc_cfg, int in_vstream_idx ); diff --git a/include/libvideo2x/filter_libplacebo.h b/include/libvideo2x/filter_libplacebo.h index 3c23222..53406e8 100644 --- a/include/libvideo2x/filter_libplacebo.h +++ b/include/libvideo2x/filter_libplacebo.h @@ -46,11 +46,11 @@ class FilterLibplacebo : public Filter { int flush(std::vector &flushed_frames) override; // Returns the filter's type - ProcessorType get_processor_type() const override { return PROCESSOR_LIBPLACEBO; } + ProcessorType get_processor_type() const override { return ProcessorType::Libplacebo; } // Returns the filter's output dimensions void get_output_dimensions( - const ProcessorConfig *processor_config, + const ProcessorConfig &proc_cfg, int in_width, int in_height, int &out_width, diff --git a/include/libvideo2x/filter_realesrgan.h b/include/libvideo2x/filter_realesrgan.h index 55b9460..ab4be0d 100644 --- a/include/libvideo2x/filter_realesrgan.h +++ b/include/libvideo2x/filter_realesrgan.h @@ -5,7 +5,6 @@ extern "C" { #include } -#include "char_defs.h" #include "processor.h" #include "realesrgan.h" @@ -40,11 +39,11 @@ class FilterRealesrgan : public Filter { int filter(AVFrame *in_frame, AVFrame **out_frame) override; // Returns the filter's type - ProcessorType get_processor_type() const override { return PROCESSOR_REALESRGAN; } + ProcessorType get_processor_type() const override { return ProcessorType::RealESRGAN; } // Returns the filter's output dimensions void get_output_dimensions( - const ProcessorConfig *processor_config, + const ProcessorConfig &proc_cfg, int in_width, int in_height, int &out_width, diff --git a/include/libvideo2x/frames_processor.h b/include/libvideo2x/frames_processor.h index e8f0e19..027e043 100644 --- a/include/libvideo2x/frames_processor.h +++ b/include/libvideo2x/frames_processor.h @@ -7,8 +7,8 @@ #include "processor.h" int process_frames( - const EncoderConfig *encoder_config, - const ProcessorConfig *processor_config, + const EncoderConfig &enc_cfg, + const ProcessorConfig &proc_cfg, VideoProcessingContext *proc_ctx, Decoder &decoder, Encoder &encoder, diff --git a/include/libvideo2x/fsutils.h b/include/libvideo2x/fsutils.h index 76e0161..57f86c3 100644 --- a/include/libvideo2x/fsutils.h +++ b/include/libvideo2x/fsutils.h @@ -4,7 +4,19 @@ #include #include -#include "char_defs.h" +#ifdef _WIN32 +typedef wchar_t CharType; +#define STR(x) L##x +#else +typedef char CharType; +#define STR(x) x +#endif + +#ifdef _WIN32 +typedef std::wstring StringType; +#else +typedef std::string StringType; +#endif bool filepath_is_readable(const std::filesystem::path &path); @@ -12,6 +24,8 @@ std::filesystem::path find_resource_file(const std::filesystem::path &path); std::string path_to_u8string(const std::filesystem::path &path); +std::string wstring_to_u8string(const StringType &wstr); + StringType path_to_string_type(const std::filesystem::path &path); StringType to_string_type(int value); diff --git a/include/libvideo2x/interpolator_rife.h b/include/libvideo2x/interpolator_rife.h index 982ba88..aff0378 100644 --- a/include/libvideo2x/interpolator_rife.h +++ b/include/libvideo2x/interpolator_rife.h @@ -5,7 +5,6 @@ extern "C" { #include } -#include "char_defs.h" #include "processor.h" #include "rife.h" @@ -45,11 +44,11 @@ class InterpolatorRIFE : public Interpolator { override; // Returns the interpolator's type - ProcessorType get_processor_type() const override { return PROCESSOR_RIFE; } + ProcessorType get_processor_type() const override { return ProcessorType::RIFE; } // Returns the interpolator's output dimensions void get_output_dimensions( - const ProcessorConfig *processor_config, + const ProcessorConfig &proc_cfg, int in_width, int in_height, int &out_width, diff --git a/include/libvideo2x/libvideo2x.h b/include/libvideo2x/libvideo2x.h index aea1922..c5a3e63 100644 --- a/include/libvideo2x/libvideo2x.h +++ b/include/libvideo2x/libvideo2x.h @@ -1,20 +1,17 @@ #ifndef LIBVIDEO2X_H #define LIBVIDEO2X_H -#include -#include -#include +#include +#include +#include -#ifdef __cplusplus extern "C" { -#endif #include #include -#ifdef __cplusplus } -#endif -#include "char_defs.h" +#include "fsutils.h" +#include "logging.h" #ifdef _WIN32 #ifdef LIBVIDEO2X_EXPORTS @@ -26,38 +23,24 @@ extern "C" { #define LIBVIDEO2X_API #endif -#ifdef __cplusplus -extern "C" { -#endif - -enum ProcessingMode { - PROCESSING_MODE_FILTER, - PROCESSING_MODE_INTERPOLATE, +enum class ProcessingMode { + Filter, + Interpolate, }; -enum ProcessorType { - PROCESSOR_LIBPLACEBO, - PROCESSOR_REALESRGAN, - PROCESSOR_RIFE, -}; - -enum Libvideo2xLogLevel { - LIBVIDEO2X_LOG_LEVEL_TRACE, - LIBVIDEO2X_LOG_LEVEL_DEBUG, - LIBVIDEO2X_LOG_LEVEL_INFO, - LIBVIDEO2X_LOG_LEVEL_WARNING, - LIBVIDEO2X_LOG_LEVEL_ERROR, - LIBVIDEO2X_LOG_LEVEL_CRITICAL, - LIBVIDEO2X_LOG_LEVEL_OFF +enum class ProcessorType { + Libplacebo, + RealESRGAN, + RIFE, }; struct LibplaceboConfig { - const CharType *shader_path; + StringType shader_path; }; struct RealESRGANConfig { bool tta_mode; - const CharType *model_name; + StringType model_name; }; struct RIFEConfig { @@ -65,34 +48,30 @@ struct RIFEConfig { bool tta_temporal_mode; bool uhd_mode; int num_threads; - const CharType *model_name; + StringType model_name; }; // Unified filter configuration struct ProcessorConfig { - enum ProcessorType processor_type; + ProcessorType processor_type; int width; int height; int scaling_factor; int frm_rate_mul; float scn_det_thresh; - union { - struct LibplaceboConfig libplacebo; - struct RealESRGANConfig realesrgan; - struct RIFEConfig rife; - } config; + std::variant config; }; // Encoder configurations struct EncoderConfig { // Non-AVCodecContext options - enum AVCodecID codec; + AVCodecID codec; bool copy_streams; // Basic video options int width; int height; - enum AVPixelFormat pix_fmt; + AVPixelFormat pix_fmt; // Rate control and compression int64_t bit_rate; @@ -115,51 +94,34 @@ struct EncoderConfig { int delay; // Extra AVOptions - struct { - const char *key; - const char *value; - } *extra_options; - size_t nb_extra_options; + std::vector> extra_opts; +}; + +struct HardwareConfig { + uint32_t vk_device_index; + AVHWDeviceType hw_device_type; }; // Video processing context struct VideoProcessingContext { int64_t processed_frames; int64_t total_frames; - time_t start_time; + std::time_t start_time; bool pause; bool abort; bool completed; }; -/** - * @brief Process a video file using the selected filter and encoder settings. - * - * @param[in] in_fname Path to the input video file - * @param[in] out_fname Path to the output video file - * @param[in] log_level Log level - * @param[in] benchmark Flag to enable benchmarking mode - * @param[in] vk_device_index Vulkan device index - * @param[in] hw_device_type Hardware device type - * @param[in] filter_config Filter configurations - * @param[in] encoder_config Encoder configurations - * @param[in,out] proc_ctx Video processing context - * @return int 0 on success, non-zero value on error - */ -LIBVIDEO2X_API int process_video( - const CharType *in_fname, - const CharType *out_fname, - enum Libvideo2xLogLevel log_level, - bool benchmark, - uint32_t vk_device_index, - enum AVHWDeviceType hw_device_type, - const struct ProcessorConfig *filter_config, - struct EncoderConfig *encoder_config, - struct VideoProcessingContext *proc_ctx +// Process a video file using the specified configurations +[[nodiscard]] LIBVIDEO2X_API int process_video( + const std::filesystem::path in_fname, + const std::filesystem::path out_fname, + const HardwareConfig hw_cfg, + const ProcessorConfig proc_cfg, + EncoderConfig enc_cfg, + VideoProcessingContext *proc_ctx, + Libvideo2xLogLevel log_level, + bool benchmark ); -#ifdef __cplusplus -} -#endif - #endif // LIBVIDEO2X_H diff --git a/include/libvideo2x/logging.h b/include/libvideo2x/logging.h new file mode 100644 index 0000000..a2c48fb --- /dev/null +++ b/include/libvideo2x/logging.h @@ -0,0 +1,23 @@ +#ifndef LOGGING_H +#define LOGGING_H + +#include + +#include "fsutils.h" + +enum class Libvideo2xLogLevel { + Unknown, + Trace, + Debug, + Info, + Warning, + Error, + Critical, + Off +}; + +void set_log_level(Libvideo2xLogLevel log_level); + +std::optional find_log_level_by_name(const StringType &log_level_name); + +#endif // LOGGING_H diff --git a/include/libvideo2x/processor.h b/include/libvideo2x/processor.h index 5fc1e8c..11a4865 100644 --- a/include/libvideo2x/processor.h +++ b/include/libvideo2x/processor.h @@ -19,7 +19,7 @@ class Processor { virtual ProcessingMode get_processing_mode() const = 0; virtual ProcessorType get_processor_type() const = 0; virtual void get_output_dimensions( - const ProcessorConfig *processor_config, + const ProcessorConfig &proc_cfg, int in_width, int in_height, int &width, @@ -30,14 +30,14 @@ class Processor { // Abstract base class for filters class Filter : public Processor { public: - ProcessingMode get_processing_mode() const override { return PROCESSING_MODE_FILTER; } + ProcessingMode get_processing_mode() const override { return ProcessingMode::Filter; } virtual int filter(AVFrame *in_frame, AVFrame **out_frame) = 0; }; // Abstract base class for interpolators class Interpolator : public Processor { public: - ProcessingMode get_processing_mode() const override { return PROCESSING_MODE_INTERPOLATE; } + ProcessingMode get_processing_mode() const override { return ProcessingMode::Interpolate; } virtual int interpolate(AVFrame *prev_frame, AVFrame *in_frame, AVFrame **out_frame, float time_step) = 0; }; diff --git a/include/libvideo2x/processor_factory.h b/include/libvideo2x/processor_factory.h index 9d48be7..de7a490 100644 --- a/include/libvideo2x/processor_factory.h +++ b/include/libvideo2x/processor_factory.h @@ -10,7 +10,7 @@ // Processor Factory Class class ProcessorFactory { public: - using Creator = std::function(const ProcessorConfig *, uint32_t)>; + using Creator = std::function(const ProcessorConfig &, uint32_t)>; // Singleton instance accessor static ProcessorFactory &instance(); @@ -20,7 +20,7 @@ class ProcessorFactory { // Create a processor instance based on configuration std::unique_ptr - create_processor(const ProcessorConfig *processor_config, uint32_t vk_device_index) const; + create_processor(const ProcessorConfig &proc_cfg, uint32_t vk_device_index) const; private: // Private constructor for Singleton diff --git a/src/avutils.cpp b/src/avutils.cpp index 7b5e7c2..9d07179 100644 --- a/src/avutils.cpp +++ b/src/avutils.cpp @@ -63,14 +63,13 @@ int64_t get_video_frame_count(AVFormatContext *ifmt_ctx, int in_vstream_idx) { return static_cast(duration_secs * fps); } -enum AVPixelFormat -get_encoder_default_pix_fmt(const AVCodec *encoder, AVPixelFormat target_pix_fmt) { +AVPixelFormat get_encoder_default_pix_fmt(const AVCodec *encoder, AVPixelFormat target_pix_fmt) { int ret; char errbuf[AV_ERROR_MAX_STRING_SIZE]; // Retrieve the list of supported pixel formats #if LIBAVCODEC_BUILD >= CALC_FFMPEG_VERSION(61, 13, 100) - const enum AVPixelFormat *supported_pix_fmts = nullptr; + const AVPixelFormat *supported_pix_fmts = nullptr; ret = avcodec_get_supported_config( nullptr, encoder, AV_CODEC_CONFIG_PIX_FORMAT, 0, (const void **)&supported_pix_fmts, nullptr ); @@ -90,7 +89,7 @@ get_encoder_default_pix_fmt(const AVCodec *encoder, AVPixelFormat target_pix_fmt } } #else - const enum AVPixelFormat *supported_pix_fmts = encoder->pix_fmts; + const AVPixelFormat *supported_pix_fmts = encoder->pix_fmts; #endif // Determine if the target pixel format has an alpha channel @@ -102,8 +101,8 @@ get_encoder_default_pix_fmt(const AVCodec *encoder, AVPixelFormat target_pix_fmt } // Iterate over supported pixel formats to find the best match - enum AVPixelFormat best_pix_fmt = AV_PIX_FMT_NONE; - for (const enum AVPixelFormat *p = supported_pix_fmts; *p != AV_PIX_FMT_NONE; p++) { + AVPixelFormat best_pix_fmt = AV_PIX_FMT_NONE; + for (const AVPixelFormat *p = supported_pix_fmts; *p != AV_PIX_FMT_NONE; p++) { if (target_pix_fmt != AV_PIX_FMT_NONE) { best_pix_fmt = av_find_best_pix_fmt_of_2(best_pix_fmt, *p, target_pix_fmt, has_alpha, nullptr); diff --git a/src/decoder.cpp b/src/decoder.cpp index 1f63e30..7d9a8c1 100644 --- a/src/decoder.cpp +++ b/src/decoder.cpp @@ -2,7 +2,7 @@ #include -enum AVPixelFormat Decoder::hw_pix_fmt_ = AV_PIX_FMT_NONE; +AVPixelFormat Decoder::hw_pix_fmt_ = AV_PIX_FMT_NONE; Decoder::Decoder() : fmt_ctx_(nullptr), dec_ctx_(nullptr), in_vstream_idx_(-1) {} @@ -17,8 +17,8 @@ Decoder::~Decoder() { } } -enum AVPixelFormat Decoder::get_hw_format(AVCodecContext *_, const enum AVPixelFormat *pix_fmts) { - for (const enum AVPixelFormat *p = pix_fmts; *p != AV_PIX_FMT_NONE; p++) { +AVPixelFormat Decoder::get_hw_format(AVCodecContext *_, const AVPixelFormat *pix_fmts) { + for (const AVPixelFormat *p = pix_fmts; *p != AV_PIX_FMT_NONE; p++) { if (*p == hw_pix_fmt_) { return *p; } diff --git a/src/encoder.cpp b/src/encoder.cpp index 148456a..15536ef 100644 --- a/src/encoder.cpp +++ b/src/encoder.cpp @@ -32,8 +32,8 @@ int Encoder::init( const std::filesystem::path &out_fpath, AVFormatContext *ifmt_ctx, AVCodecContext *dec_ctx, - EncoderConfig *encoder_config, - const ProcessorConfig *processor_config, + EncoderConfig &enc_cfg, + const ProcessorConfig &proc_cfg, int in_vstream_idx ) { int ret; @@ -46,10 +46,10 @@ int Encoder::init( } // Find the encoder - const AVCodec *encoder = avcodec_find_encoder(encoder_config->codec); + const AVCodec *encoder = avcodec_find_encoder(enc_cfg.codec); if (!encoder) { spdlog::error( - "Required video encoder not found for codec {}", avcodec_get_name(encoder_config->codec) + "Required video encoder not found for codec {}", avcodec_get_name(enc_cfg.codec) ); return AVERROR_ENCODER_NOT_FOUND; } @@ -85,33 +85,33 @@ int Encoder::init( enc_ctx_->sample_aspect_ratio = dec_ctx->sample_aspect_ratio; // Set basic video options - enc_ctx_->width = encoder_config->width; - enc_ctx_->height = encoder_config->height; + enc_ctx_->width = enc_cfg.width; + enc_ctx_->height = enc_cfg.height; // Set rate control and compression options - enc_ctx_->bit_rate = encoder_config->bit_rate; - enc_ctx_->rc_buffer_size = encoder_config->rc_buffer_size; - enc_ctx_->rc_min_rate = encoder_config->rc_min_rate; - enc_ctx_->rc_max_rate = encoder_config->rc_max_rate; - enc_ctx_->qmin = encoder_config->qmin; - enc_ctx_->qmax = encoder_config->qmax; + enc_ctx_->bit_rate = enc_cfg.bit_rate; + enc_ctx_->rc_buffer_size = enc_cfg.rc_buffer_size; + enc_ctx_->rc_min_rate = enc_cfg.rc_min_rate; + enc_ctx_->rc_max_rate = enc_cfg.rc_max_rate; + enc_ctx_->qmin = enc_cfg.qmin; + enc_ctx_->qmax = enc_cfg.qmax; // Set GOP and frame structure options - enc_ctx_->gop_size = encoder_config->gop_size; - enc_ctx_->max_b_frames = encoder_config->max_b_frames; - enc_ctx_->keyint_min = encoder_config->keyint_min; - enc_ctx_->refs = encoder_config->refs; + enc_ctx_->gop_size = enc_cfg.gop_size; + enc_ctx_->max_b_frames = enc_cfg.max_b_frames; + enc_ctx_->keyint_min = enc_cfg.keyint_min; + enc_ctx_->refs = enc_cfg.refs; // Set performance and threading options - enc_ctx_->thread_count = encoder_config->thread_count; + enc_ctx_->thread_count = enc_cfg.thread_count; // Set latency and buffering options - enc_ctx_->delay = encoder_config->delay; + enc_ctx_->delay = enc_cfg.delay; // Set the pixel format - if (encoder_config->pix_fmt != AV_PIX_FMT_NONE) { + if (enc_cfg.pix_fmt != AV_PIX_FMT_NONE) { // Use the specified pixel format - enc_ctx_->pix_fmt = encoder_config->pix_fmt; + enc_ctx_->pix_fmt = enc_cfg.pix_fmt; } else { // Automatically select the pixel format enc_ctx_->pix_fmt = get_encoder_default_pix_fmt(encoder, dec_ctx->pix_fmt); @@ -122,11 +122,9 @@ int Encoder::init( spdlog::debug("Auto-selected pixel format: {}", av_get_pix_fmt_name(enc_ctx_->pix_fmt)); } - if (processor_config->frm_rate_mul > 0) { + if (proc_cfg.frm_rate_mul > 0) { AVRational in_frame_rate = get_video_frame_rate(ifmt_ctx, in_vstream_idx); - enc_ctx_->framerate = { - in_frame_rate.num * processor_config->frm_rate_mul, in_frame_rate.den - }; + enc_ctx_->framerate = {in_frame_rate.num * proc_cfg.frm_rate_mul, in_frame_rate.den}; enc_ctx_->time_base = av_inv_q(enc_ctx_->framerate); } else { // Set the output video's time base @@ -145,13 +143,13 @@ int Encoder::init( } // Set extra AVOptions - for (size_t i = 0; i < encoder_config->nb_extra_options; i++) { - const char *key = encoder_config->extra_options[i].key; - const char *value = encoder_config->extra_options[i].value; - spdlog::debug("Setting encoder option '{}' to '{}'", key, value); + for (const auto &[opt_name, opt_value] : enc_cfg.extra_opts) { + std::string opt_name_str = wstring_to_u8string(opt_name); + std::string opt_value_str = wstring_to_u8string(opt_value); + spdlog::debug("Setting encoder option '{}' to '{}'", opt_name_str, opt_value_str); - if (av_opt_set(enc_ctx_->priv_data, key, value, 0) < 0) { - spdlog::warn("Failed to set encoder option '{}' to '{}'", key, value); + if (av_opt_set(enc_ctx_->priv_data, opt_name_str.c_str(), opt_value_str.c_str(), 0) < 0) { + spdlog::warn("Failed to set encoder option '{}' to '{}'", opt_name_str, opt_value_str); } } @@ -178,7 +176,7 @@ int Encoder::init( out_vstream->r_frame_rate = enc_ctx_->framerate; // Copy other streams if necessary - if (encoder_config->copy_streams) { + if (enc_cfg.copy_streams) { // Allocate the stream mape frame o stream_map_ = reinterpret_cast(av_malloc_array(ifmt_ctx->nb_streams, sizeof(*stream_map_))); diff --git a/src/filter_libplacebo.cpp b/src/filter_libplacebo.cpp index aa93d2f..6f946d4 100644 --- a/src/filter_libplacebo.cpp +++ b/src/filter_libplacebo.cpp @@ -4,7 +4,6 @@ #include -#include "char_defs.h" #include "fsutils.h" #include "libplacebo.h" @@ -148,12 +147,12 @@ int FilterLibplacebo::flush(std::vector &flushed_frames) { } void FilterLibplacebo::get_output_dimensions( - const ProcessorConfig *processor_config, + const ProcessorConfig &proc_cfg, int, int, int &out_width, int &out_height ) const { - out_width = processor_config->width; - out_height = processor_config->height; + out_width = proc_cfg.width; + out_height = proc_cfg.height; } diff --git a/src/filter_realesrgan.cpp b/src/filter_realesrgan.cpp index dfb8a3b..685931c 100644 --- a/src/filter_realesrgan.cpp +++ b/src/filter_realesrgan.cpp @@ -121,7 +121,7 @@ int FilterRealesrgan::filter(AVFrame *in_frame, AVFrame **out_frame) { } void FilterRealesrgan::get_output_dimensions( - const ProcessorConfig *, + const ProcessorConfig &, int in_width, int in_height, int &out_width, diff --git a/src/frames_processor.cpp b/src/frames_processor.cpp index 81c34d4..ea9cac7 100644 --- a/src/frames_processor.cpp +++ b/src/frames_processor.cpp @@ -27,7 +27,7 @@ auto av_packet_deleter = [](AVPacket *packet) { // Sets the total number of frames to process in the VideoProcessingContext void set_total_frames( - const ProcessorConfig *processor_config, + const ProcessorConfig &proc_cfg, VideoProcessingContext *proc_ctx, AVFormatContext *ifmt_ctx, int in_vstream_idx, @@ -44,8 +44,8 @@ void set_total_frames( } // Set total frames for interpolation - if (processor->get_processing_mode() == PROCESSING_MODE_INTERPOLATE) { - proc_ctx->total_frames *= processor_config->frm_rate_mul; + if (processor->get_processing_mode() == ProcessingMode::Interpolate) { + proc_ctx->total_frames *= proc_cfg.frm_rate_mul; } } @@ -126,7 +126,7 @@ int process_filtering( int process_interpolation( Processor *processor, - const ProcessorConfig *processor_config, + const ProcessorConfig &proc_cfg, VideoProcessingContext *proc_ctx, Encoder &encoder, bool benchmark, @@ -141,14 +141,14 @@ int process_interpolation( Interpolator *interpolator = static_cast(processor); // Calculate the time step for each frame - float time_step = 1.0f / static_cast(processor_config->frm_rate_mul); + float time_step = 1.0f / static_cast(proc_cfg.frm_rate_mul); float current_time_step = time_step; // Check if a scene change is detected bool skip_frame = false; if (prev_frame != nullptr) { float frame_diff = get_frame_diff(prev_frame.get(), frame); - if (frame_diff > processor_config->scn_det_thresh) { + if (frame_diff > proc_cfg.scn_det_thresh) { spdlog::debug( "Scene change detected ({:.2f}%), skipping frame {}", frame_diff, @@ -159,7 +159,7 @@ int process_interpolation( } // Write the interpolated frames - for (int i = 0; i < processor_config->frm_rate_mul - 1; i++) { + for (int i = 0; i < proc_cfg.frm_rate_mul - 1; i++) { // Skip interpolation if this is the first frame if (prev_frame == nullptr) { break; @@ -206,8 +206,8 @@ int process_interpolation( // Process frames using the selected filter. int process_frames( - const EncoderConfig *encoder_config, - const ProcessorConfig *processor_config, + const EncoderConfig &enc_cfg, + const ProcessorConfig &proc_cfg, VideoProcessingContext *proc_ctx, Decoder &decoder, Encoder &encoder, @@ -245,7 +245,7 @@ int process_frames( } // Set the total number of frames in the VideoProcessingContext - set_total_frames(processor_config, proc_ctx, ifmt_ctx, in_vstream_idx, processor); + set_total_frames(proc_cfg, proc_ctx, ifmt_ctx, in_vstream_idx, processor); // Read frames from the input file while (!proc_ctx->abort) { @@ -292,7 +292,7 @@ int process_frames( // Process the frame based on the selected processing mode switch (processor->get_processing_mode()) { - case PROCESSING_MODE_FILTER: { + case ProcessingMode::Filter: { ret = process_filtering( processor, proc_ctx, @@ -303,10 +303,10 @@ int process_frames( ); break; } - case PROCESSING_MODE_INTERPOLATE: { + case ProcessingMode::Interpolate: { ret = process_interpolation( processor, - processor_config, + proc_cfg, proc_ctx, encoder, benchmark, @@ -329,7 +329,7 @@ int process_frames( "Processed frame {}/{}", proc_ctx->processed_frames, proc_ctx->total_frames ); } - } else if (encoder_config->copy_streams && stream_map[packet->stream_index] >= 0) { + } else if (enc_cfg.copy_streams && stream_map[packet->stream_index] >= 0) { write_raw_packet(packet.get(), ifmt_ctx, ofmt_ctx, stream_map); } av_packet_unref(packet.get()); diff --git a/src/fsutils.cpp b/src/fsutils.cpp index 963c920..b2c4c20 100644 --- a/src/fsutils.cpp +++ b/src/fsutils.cpp @@ -93,6 +93,33 @@ std::string path_to_u8string(const std::filesystem::path &path) { #endif } +#ifdef _WIN32 +std::string wstring_to_u8string(const std::wstring &wstr) { + if (wstr.empty()) { + return std::string(); + } + int size_needed = WideCharToMultiByte( + CP_UTF8, 0, wstr.data(), static_cast(wstr.size()), nullptr, 0, nullptr, nullptr + ); + std::string converted_str(size_needed, 0); + WideCharToMultiByte( + CP_UTF8, + 0, + wstr.data(), + static_cast(wstr.size()), + &converted_str[0], + size_needed, + nullptr, + nullptr + ); + return converted_str; +} +#else +std::string wstring_to_u8string(const std::string &str) { + return str; +} +#endif + StringType path_to_string_type(const std::filesystem::path &path) { #if _WIN32 return path.wstring(); diff --git a/src/interpolator_rife.cpp b/src/interpolator_rife.cpp index c412b97..d8a14eb 100644 --- a/src/interpolator_rife.cpp +++ b/src/interpolator_rife.cpp @@ -119,7 +119,7 @@ int InterpolatorRIFE::interpolate( } void InterpolatorRIFE::get_output_dimensions( - const ProcessorConfig *, + const ProcessorConfig &, int in_width, int in_height, int &out_width, diff --git a/src/libvideo2x.cpp b/src/libvideo2x.cpp index 35d918c..04067ab 100644 --- a/src/libvideo2x.cpp +++ b/src/libvideo2x.cpp @@ -16,53 +16,15 @@ extern "C" { #include "processor.h" #include "processor_factory.h" -static void set_log_level(Libvideo2xLogLevel log_level) { - switch (log_level) { - case LIBVIDEO2X_LOG_LEVEL_TRACE: - av_log_set_level(AV_LOG_TRACE); - spdlog::set_level(spdlog::level::trace); - break; - case LIBVIDEO2X_LOG_LEVEL_DEBUG: - av_log_set_level(AV_LOG_DEBUG); - spdlog::set_level(spdlog::level::debug); - break; - case LIBVIDEO2X_LOG_LEVEL_INFO: - av_log_set_level(AV_LOG_INFO); - spdlog::set_level(spdlog::level::info); - break; - case LIBVIDEO2X_LOG_LEVEL_WARNING: - av_log_set_level(AV_LOG_WARNING); - spdlog::set_level(spdlog::level::warn); - break; - case LIBVIDEO2X_LOG_LEVEL_ERROR: - av_log_set_level(AV_LOG_ERROR); - spdlog::set_level(spdlog::level::err); - break; - case LIBVIDEO2X_LOG_LEVEL_CRITICAL: - av_log_set_level(AV_LOG_FATAL); - spdlog::set_level(spdlog::level::critical); - break; - case LIBVIDEO2X_LOG_LEVEL_OFF: - av_log_set_level(AV_LOG_QUIET); - spdlog::set_level(spdlog::level::off); - break; - default: - av_log_set_level(AV_LOG_INFO); - spdlog::set_level(spdlog::level::info); - break; - } -} - -extern "C" int process_video( - const CharType *in_fname, - const CharType *out_fname, +int process_video( + const std::filesystem::path in_fname, + const std::filesystem::path out_fname, + const HardwareConfig hw_cfg, + const ProcessorConfig proc_cfg, + EncoderConfig enc_cfg, + VideoProcessingContext *proc_ctx, Libvideo2xLogLevel log_level, - bool benchmark, - uint32_t vk_device_index, - AVHWDeviceType hw_type, - const ProcessorConfig *processor_config, - EncoderConfig *encoder_config, - VideoProcessingContext *proc_ctx + bool benchmark ) { char errbuf[AV_ERROR_MAX_STRING_SIZE]; int ret = 0; @@ -70,10 +32,6 @@ extern "C" int process_video( // Set the log level for FFmpeg and spdlog set_log_level(log_level); - // Convert the file names to std::filesystem::path - std::filesystem::path in_fpath(in_fname); - std::filesystem::path out_fpath(out_fname); - // Create a smart pointer to manage the hardware device context auto hw_ctx_deleter = [](AVBufferRef *ref) { if (ref != nullptr) { @@ -83,9 +41,9 @@ extern "C" int process_video( std::unique_ptr hw_ctx(nullptr, hw_ctx_deleter); // Initialize hardware device context - if (hw_type != AV_HWDEVICE_TYPE_NONE) { + if (hw_cfg.hw_device_type != AV_HWDEVICE_TYPE_NONE) { AVBufferRef *tmp_hw_ctx = nullptr; - ret = av_hwdevice_ctx_create(&tmp_hw_ctx, hw_type, NULL, NULL, 0); + ret = av_hwdevice_ctx_create(&tmp_hw_ctx, hw_cfg.hw_device_type, NULL, NULL, 0); if (ret < 0) { av_strerror(ret, errbuf, sizeof(errbuf)); spdlog::critical("Error initializing hardware device context: {}", errbuf); @@ -96,7 +54,7 @@ extern "C" int process_video( // Initialize input decoder Decoder decoder; - ret = decoder.init(hw_type, hw_ctx.get(), in_fpath); + ret = decoder.init(hw_cfg.hw_device_type, hw_ctx.get(), in_fname); if (ret < 0) { av_strerror(ret, errbuf, sizeof(errbuf)); spdlog::critical("Failed to initialize decoder: {}", errbuf); @@ -109,7 +67,7 @@ extern "C" int process_video( // Create and initialize the appropriate filter std::unique_ptr processor( - ProcessorFactory::instance().create_processor(processor_config, vk_device_index) + ProcessorFactory::instance().create_processor(proc_cfg, hw_cfg.vk_device_index) ); if (processor == nullptr) { spdlog::critical("Failed to create filter instance"); @@ -119,7 +77,7 @@ extern "C" int process_video( // Initialize output dimensions based on filter configuration int output_width = 0, output_height = 0; processor->get_output_dimensions( - processor_config, dec_ctx->width, dec_ctx->height, output_width, output_height + proc_cfg, dec_ctx->width, dec_ctx->height, output_width, output_height ); if (output_width <= 0 || output_height <= 0) { spdlog::critical("Failed to determine the output dimensions"); @@ -127,14 +85,13 @@ extern "C" int process_video( } // Update encoder configuration with output dimensions - encoder_config->width = output_width; - encoder_config->height = output_height; + enc_cfg.width = output_width; + enc_cfg.height = output_height; // Initialize the encoder Encoder encoder; - ret = encoder.init( - hw_ctx.get(), out_fpath, ifmt_ctx, dec_ctx, encoder_config, processor_config, in_vstream_idx - ); + ret = + encoder.init(hw_ctx.get(), out_fname, ifmt_ctx, dec_ctx, enc_cfg, proc_cfg, in_vstream_idx); if (ret < 0) { av_strerror(ret, errbuf, sizeof(errbuf)); spdlog::critical("Failed to initialize encoder: {}", errbuf); @@ -149,9 +106,7 @@ extern "C" int process_video( } // Process frames using the encoder and decoder - ret = process_frames( - encoder_config, processor_config, proc_ctx, decoder, encoder, processor.get(), benchmark - ); + ret = process_frames(enc_cfg, proc_cfg, proc_ctx, decoder, encoder, processor.get(), benchmark); if (ret < 0) { av_strerror(ret, errbuf, sizeof(errbuf)); spdlog::critical("Error processing frames: {}", errbuf); diff --git a/src/logging.cpp b/src/logging.cpp new file mode 100644 index 0000000..434b396 --- /dev/null +++ b/src/logging.cpp @@ -0,0 +1,44 @@ +#include "logging.h" + +extern "C" { +#include +} + +#include + +void set_log_level(Libvideo2xLogLevel log_level) { + switch (log_level) { + case Libvideo2xLogLevel::Trace: + av_log_set_level(AV_LOG_TRACE); + spdlog::set_level(spdlog::level::trace); + break; + case Libvideo2xLogLevel::Debug: + av_log_set_level(AV_LOG_DEBUG); + spdlog::set_level(spdlog::level::debug); + break; + case Libvideo2xLogLevel::Info: + av_log_set_level(AV_LOG_INFO); + spdlog::set_level(spdlog::level::info); + break; + case Libvideo2xLogLevel::Warning: + av_log_set_level(AV_LOG_WARNING); + spdlog::set_level(spdlog::level::warn); + break; + case Libvideo2xLogLevel::Error: + av_log_set_level(AV_LOG_ERROR); + spdlog::set_level(spdlog::level::err); + break; + case Libvideo2xLogLevel::Critical: + av_log_set_level(AV_LOG_FATAL); + spdlog::set_level(spdlog::level::critical); + break; + case Libvideo2xLogLevel::Off: + av_log_set_level(AV_LOG_QUIET); + spdlog::set_level(spdlog::level::off); + break; + default: + av_log_set_level(AV_LOG_INFO); + spdlog::set_level(spdlog::level::info); + break; + } +} diff --git a/src/processor_factory.cpp b/src/processor_factory.cpp index f0717bb..8e53a1c 100644 --- a/src/processor_factory.cpp +++ b/src/processor_factory.cpp @@ -28,32 +28,33 @@ void ProcessorFactory::register_processor(ProcessorType type, Creator creator) { // Create a processor instance std::unique_ptr ProcessorFactory::create_processor( - const ProcessorConfig *processor_config, + const ProcessorConfig &proc_cfg, uint32_t vk_device_index ) const { - auto it = creators.find(processor_config->processor_type); + auto it = creators.find(proc_cfg.processor_type); if (it == creators.end()) { spdlog::critical( - "Processor type not registered: {}", static_cast(processor_config->processor_type) + "Processor type not registered: {}", static_cast(proc_cfg.processor_type) ); return nullptr; } // Call the corresponding creator function - return it->second(processor_config, vk_device_index); + return it->second(proc_cfg, vk_device_index); } // Initialize default processors void ProcessorFactory::init_default_processors(ProcessorFactory &factory) { factory.register_processor( - PROCESSOR_LIBPLACEBO, - [](const ProcessorConfig *config, uint32_t vk_device_index) -> std::unique_ptr { - const auto &cfg = config->config.libplacebo; - if (!cfg.shader_path) { + ProcessorType::Libplacebo, + [](const ProcessorConfig &proc_cfg, + uint32_t vk_device_index) -> std::unique_ptr { + const auto &config = std::get(proc_cfg.config); + if (config.shader_path.empty()) { spdlog::critical("Shader path must be provided for the libplacebo filter"); return nullptr; } - if (config->width <= 0 || config->height <= 0) { + if (proc_cfg.width <= 0 || proc_cfg.height <= 0) { spdlog::critical( "Output width and height must be provided for the libplacebo filter" ); @@ -61,39 +62,41 @@ void ProcessorFactory::init_default_processors(ProcessorFactory &factory) { } return std::make_unique( vk_device_index, - std::filesystem::path(cfg.shader_path), - config->width, - config->height + std::filesystem::path(config.shader_path), + proc_cfg.width, + proc_cfg.height ); } ); factory.register_processor( - PROCESSOR_REALESRGAN, - [](const ProcessorConfig *config, uint32_t vk_device_index) -> std::unique_ptr { - const auto &cfg = config->config.realesrgan; - if (config->scaling_factor <= 0) { + ProcessorType::RealESRGAN, + [](const ProcessorConfig &proc_cfg, + uint32_t vk_device_index) -> std::unique_ptr { + const auto &config = std::get(proc_cfg.config); + if (proc_cfg.scaling_factor <= 0) { spdlog::critical("Scaling factor must be provided for the RealESRGAN filter"); return nullptr; } - if (!cfg.model_name) { + if (config.model_name.empty()) { spdlog::critical("Model name must be provided for the RealESRGAN filter"); return nullptr; } return std::make_unique( static_cast(vk_device_index), - cfg.tta_mode, - config->scaling_factor, - cfg.model_name + config.tta_mode, + proc_cfg.scaling_factor, + config.model_name ); } ); factory.register_processor( - PROCESSOR_RIFE, - [](const ProcessorConfig *config, uint32_t vk_device_index) -> std::unique_ptr { - const auto &cfg = config->config.rife; - if (!cfg.model_name) { + ProcessorType::RIFE, + [](const ProcessorConfig &proc_cfg, + uint32_t vk_device_index) -> std::unique_ptr { + const auto &cfg = std::get(proc_cfg.config); + if (cfg.model_name.empty()) { spdlog::critical("Model name must be provided for the RIFE filter"); return nullptr; } diff --git a/tools/video2x/src/video2x.cpp b/tools/video2x/src/video2x.cpp index df42820..237e1df 100644 --- a/tools/video2x/src/video2x.cpp +++ b/tools/video2x/src/video2x.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -28,11 +29,10 @@ extern "C" { #include #include #include +} #include #include -} - #include #include @@ -45,7 +45,6 @@ extern "C" { #include namespace po = boost::program_options; -#include "libvideo2x/char_defs.h" #include "timer.h" // Indicate if a newline needs to be printed before the next output @@ -56,7 +55,7 @@ std::mutex proc_ctx_mutex; // Structure to hold parsed arguments struct Arguments { - StringType log_level = STR("info"); + Libvideo2xLogLevel log_level = Libvideo2xLogLevel::Info; bool no_progress = false; // General options @@ -83,7 +82,7 @@ struct Arguments { int refs = -1; int thread_count = 0; int delay = 0; - std::vector> extra_options; + std::vector> extra_encoder_opts; // General processing options int width = 0; @@ -93,7 +92,7 @@ struct Arguments { float scn_det_thresh = 0.0f; // libplacebo options - std::filesystem::path libplacebo_shader_path; + StringType libplacebo_shader_path; // RealESRGAN options StringType realesrgan_model_name = STR("realesr-animevideov3"); @@ -106,7 +105,7 @@ struct Arguments { // Set UNIX terminal input to non-blocking mode #ifndef _WIN32 void set_nonblocking_input(bool enable) { - static struct termios oldt, newt; + static termios oldt, newt; if (enable) { tcgetattr(STDIN_FILENO, &oldt); newt = oldt; @@ -120,9 +119,8 @@ void set_nonblocking_input(bool enable) { } #endif -// Convert a wide string to UTF-8 string #ifdef _WIN32 -std::string wstring_to_utf8(const std::wstring &wstr) { +std::string wstring_to_u8string(const std::wstring &wstr) { if (wstr.empty()) { return std::string(); } @@ -143,11 +141,69 @@ std::string wstring_to_utf8(const std::wstring &wstr) { return converted_str; } #else -std::string wstring_to_utf8(const std::string &str) { +std::string wstring_to_u8string(const std::string &str) { return str; } #endif +void set_spdlog_level(Libvideo2xLogLevel log_level) { + switch (log_level) { + case Libvideo2xLogLevel::Trace: + spdlog::set_level(spdlog::level::trace); + break; + case Libvideo2xLogLevel::Debug: + spdlog::set_level(spdlog::level::debug); + break; + case Libvideo2xLogLevel::Info: + spdlog::set_level(spdlog::level::info); + break; + case Libvideo2xLogLevel::Warning: + spdlog::set_level(spdlog::level::warn); + break; + case Libvideo2xLogLevel::Error: + spdlog::set_level(spdlog::level::err); + break; + case Libvideo2xLogLevel::Critical: + spdlog::set_level(spdlog::level::critical); + break; + case Libvideo2xLogLevel::Off: + spdlog::set_level(spdlog::level::off); + break; + default: + spdlog::set_level(spdlog::level::info); + break; + } +} + +std::optional find_log_level_by_name(const StringType &log_level_name) { + // Static map to store the mapping + static const std::unordered_map LogLevelMap = { + {STR("trace"), Libvideo2xLogLevel::Trace}, + {STR("debug"), Libvideo2xLogLevel::Debug}, + {STR("info"), Libvideo2xLogLevel::Info}, + {STR("warning"), Libvideo2xLogLevel::Warning}, + {STR("warn"), Libvideo2xLogLevel::Warning}, + {STR("error"), Libvideo2xLogLevel::Error}, + {STR("critical"), Libvideo2xLogLevel::Critical}, + {STR("off"), Libvideo2xLogLevel::Off}, + {STR("none"), Libvideo2xLogLevel::Off} + }; + + // Normalize the input to lowercase + StringType normalized_name = log_level_name; + std::transform( + normalized_name.begin(), normalized_name.end(), normalized_name.begin(), ::tolower + ); + + // Lookup the log level in the map + auto it = LogLevelMap.find(normalized_name); + if (it != LogLevelMap.end()) { + return it->second; + } + + return std::nullopt; +} + // Newline-safe log callback for FFmpeg void newline_safe_ffmpeg_log_callback(void *ptr, int level, const char *fmt, va_list vl) { if (level <= av_log_get_level() && newline_required) { @@ -181,27 +237,6 @@ bool is_valid_rife_model(const StringType &model) { return valid_realesrgan_models.count(model) > 0; } -enum Libvideo2xLogLevel parse_log_level(const StringType &level_name) { - if (level_name == STR("trace")) { - return LIBVIDEO2X_LOG_LEVEL_TRACE; - } else if (level_name == STR("debug")) { - return LIBVIDEO2X_LOG_LEVEL_DEBUG; - } else if (level_name == STR("info")) { - return LIBVIDEO2X_LOG_LEVEL_INFO; - } else if (level_name == STR("warning") || level_name == STR("warn")) { - return LIBVIDEO2X_LOG_LEVEL_WARNING; - } else if (level_name == STR("error")) { - return LIBVIDEO2X_LOG_LEVEL_ERROR; - } else if (level_name == STR("critical")) { - return LIBVIDEO2X_LOG_LEVEL_CRITICAL; - } else if (level_name == STR("off") || level_name == STR("none")) { - return LIBVIDEO2X_LOG_LEVEL_OFF; - } else { - spdlog::warn("Invalid log level specified. Defaulting to 'info'."); - return LIBVIDEO2X_LOG_LEVEL_INFO; - } -} - int enumerate_vulkan_devices(VkInstance *instance, std::vector &devices) { // Create a Vulkan instance VkInstanceCreateInfo create_info{}; @@ -325,37 +360,20 @@ int get_vulkan_device_prop(uint32_t vk_device_index, VkPhysicalDeviceProperties void process_video_thread( Arguments *arguments, int *proc_ret, - AVHWDeviceType hw_device_type, - ProcessorConfig *filter_config, - EncoderConfig *encoder_config, + HardwareConfig hw_cfg, + ProcessorConfig proc_cfg, + EncoderConfig enc_cfg, VideoProcessingContext *proc_ctx ) { - enum Libvideo2xLogLevel log_level = parse_log_level(arguments->log_level); - - StringType in_fname_string; - StringType out_fname_string; - -#ifdef _WIN32 - in_fname_string = StringType(arguments->in_fname.wstring()); - out_fname_string = StringType(arguments->out_fname.wstring()); -#else - in_fname_string = StringType(arguments->in_fname.string()); - out_fname_string = StringType(arguments->out_fname.string()); -#endif - - const CharType *in_fname = in_fname_string.c_str(); - const CharType *out_fname = out_fname_string.c_str(); - *proc_ret = process_video( - in_fname, - out_fname, - log_level, - arguments->benchmark, - arguments->vk_device_index, - hw_device_type, - filter_config, - encoder_config, - proc_ctx + arguments->in_fname, + arguments->out_fname, + hw_cfg, + proc_cfg, + enc_cfg, + proc_ctx, + arguments->log_level, + arguments->benchmark ); { @@ -388,8 +406,8 @@ int main(int argc, char **argv) { all_opts.add_options() ("help", "Display this help page") ("version,V", "Print program version and exit") - ("verbose,v", PO_STR_VALUE(&arguments.log_level)->default_value(STR("info"), - "info"), "Set verbosity level (trace, debug, info, warn, error, critical, none)") + ("log-level", PO_STR_VALUE()->default_value(STR("info"), "info"), + "Set verbosity level (trace, debug, info, warn, error, critical, none)") ("no-progress", po::bool_switch(&arguments.no_progress), "Do not display the progress bar") ("list-devices,l", "List the available Vulkan devices (GPUs)") @@ -461,7 +479,7 @@ int main(int argc, char **argv) { po::options_description libplacebo_opts("libplacebo options"); libplacebo_opts.add_options() - ("libplacebo-shader", PO_STR_VALUE(), + ("libplacebo-shader", PO_STR_VALUE(&arguments.libplacebo_shader_path), "Name/path of the GLSL shader file to use (built-in: anime4k-v4-a, anime4k-v4-a+a, " "anime4k-v4-b, anime4k-v4-b+b, anime4k-v4-c, anime4k-v4-c+a, anime4k-v4.1-gan)") ; @@ -495,11 +513,10 @@ int main(int argc, char **argv) { po::positional_options_description p; p.add("input", 1).add("output", 1).add("processor", 1); -#ifdef _WIN32 po::variables_map vm; +#ifdef _WIN32 po::store(po::wcommand_line_parser(argc, argv).options(all_opts).positional(p).run(), vm); #else - po::variables_map vm; po::store(po::command_line_parser(argc, argv).options(all_opts).positional(p).run(), vm); #endif po::notify(vm); @@ -534,6 +551,17 @@ int main(int argc, char **argv) { return list_vulkan_devices(); } + if (vm.count("log-level")) { + std::optional log_level = + find_log_level_by_name(vm["log-level"].as()); + if (!log_level.has_value()) { + spdlog::critical("Invalid log level specified."); + return 1; + } + arguments.log_level = log_level.value(); + } + set_spdlog_level(arguments.log_level); + // Print program banner spdlog::info("Video2X version {}", LIBVIDEO2X_VERSION_STRING); // spdlog::info("Copyright (C) 2018-2024 K4YT3X and contributors."); @@ -567,19 +595,14 @@ int main(int argc, char **argv) { if (eq_pos != StringType::npos) { StringType key = opt.substr(0, eq_pos); StringType value = opt.substr(eq_pos + 1); - arguments.extra_options.push_back(std::make_pair(key, value)); + arguments.extra_encoder_opts.push_back(std::make_pair(key, value)); } else { - spdlog::critical("Invalid extra AVOption format: {}", wstring_to_utf8(opt)); + spdlog::critical("Invalid extra AVOption format: {}", wstring_to_u8string(opt)); return 1; } } } - if (vm.count("libplacebo-shader")) { - arguments.libplacebo_shader_path = - std::filesystem::path(vm["libplacebo-shader"].as()); - } - if (vm.count("libplacebo-model")) { if (!is_valid_realesrgan_model(vm["realesrgan-model"].as())) { spdlog::critical("Invalid model specified."); @@ -664,142 +687,86 @@ int main(int argc, char **argv) { } // Parse codec to AVCodec - const AVCodec *codec = avcodec_find_encoder_by_name(wstring_to_utf8(arguments.codec).c_str()); + const AVCodec *codec = + avcodec_find_encoder_by_name(wstring_to_u8string(arguments.codec).c_str()); if (!codec) { - spdlog::critical("Codec '{}' not found.", wstring_to_utf8(arguments.codec)); + spdlog::critical("Codec '{}' not found.", wstring_to_u8string(arguments.codec)); return 1; } // Parse pixel format to AVPixelFormat - enum AVPixelFormat pix_fmt = AV_PIX_FMT_NONE; + AVPixelFormat pix_fmt = AV_PIX_FMT_NONE; if (!arguments.pix_fmt.empty()) { - pix_fmt = av_get_pix_fmt(wstring_to_utf8(arguments.pix_fmt).c_str()); + pix_fmt = av_get_pix_fmt(wstring_to_u8string(arguments.pix_fmt).c_str()); if (pix_fmt == AV_PIX_FMT_NONE) { - spdlog::critical("Invalid pixel format '{}'.", wstring_to_utf8(arguments.pix_fmt)); + spdlog::critical("Invalid pixel format '{}'.", wstring_to_u8string(arguments.pix_fmt)); return 1; } } - // Set spdlog log level - auto log_level = parse_log_level(arguments.log_level); - switch (log_level) { - case LIBVIDEO2X_LOG_LEVEL_TRACE: - spdlog::set_level(spdlog::level::trace); - break; - case LIBVIDEO2X_LOG_LEVEL_DEBUG: - spdlog::set_level(spdlog::level::debug); - break; - case LIBVIDEO2X_LOG_LEVEL_INFO: - spdlog::set_level(spdlog::level::info); - break; - case LIBVIDEO2X_LOG_LEVEL_WARNING: - spdlog::set_level(spdlog::level::warn); - break; - case LIBVIDEO2X_LOG_LEVEL_ERROR: - spdlog::set_level(spdlog::level::err); - break; - case LIBVIDEO2X_LOG_LEVEL_CRITICAL: - spdlog::set_level(spdlog::level::critical); - break; - case LIBVIDEO2X_LOG_LEVEL_OFF: - spdlog::set_level(spdlog::level::off); - break; - default: - spdlog::set_level(spdlog::level::info); - break; - } - -#ifdef _WIN32 - std::wstring shader_path_str = arguments.libplacebo_shader_path.wstring(); -#else - std::string shader_path_str = arguments.libplacebo_shader_path.string(); -#endif - // Setup filter configurations based on the parsed arguments - ProcessorConfig processor_config; - processor_config.width = arguments.width; - processor_config.height = arguments.height; - processor_config.scaling_factor = arguments.scaling_factor; - processor_config.frm_rate_mul = arguments.frm_rate_mul; - processor_config.scn_det_thresh = arguments.scn_det_thresh; + ProcessorConfig proc_cfg; + proc_cfg.width = arguments.width; + proc_cfg.height = arguments.height; + proc_cfg.scaling_factor = arguments.scaling_factor; + proc_cfg.frm_rate_mul = arguments.frm_rate_mul; + proc_cfg.scn_det_thresh = arguments.scn_det_thresh; if (arguments.processor_type == STR("libplacebo")) { - processor_config.processor_type = PROCESSOR_LIBPLACEBO; - processor_config.config.libplacebo.shader_path = shader_path_str.c_str(); + proc_cfg.processor_type = ProcessorType::Libplacebo; + LibplaceboConfig libplacebo_config; + libplacebo_config.shader_path = arguments.libplacebo_shader_path; + proc_cfg.config = libplacebo_config; } else if (arguments.processor_type == STR("realesrgan")) { - processor_config.processor_type = PROCESSOR_REALESRGAN; - processor_config.config.realesrgan.tta_mode = false; - processor_config.config.realesrgan.model_name = arguments.realesrgan_model_name.c_str(); + proc_cfg.processor_type = ProcessorType::RealESRGAN; + RealESRGANConfig realesrgan_config; + realesrgan_config.tta_mode = false; + realesrgan_config.model_name = arguments.realesrgan_model_name; + proc_cfg.config = realesrgan_config; } else if (arguments.processor_type == STR("rife")) { - processor_config.processor_type = PROCESSOR_RIFE; - processor_config.config.rife.tta_mode = false; - processor_config.config.rife.tta_temporal_mode = false; - processor_config.config.rife.uhd_mode = arguments.rife_uhd_mode; - processor_config.config.rife.num_threads = 0; - processor_config.config.rife.model_name = arguments.rife_model_name.c_str(); + proc_cfg.processor_type = ProcessorType::RIFE; + RIFEConfig rife_config; + rife_config.tta_mode = false; + rife_config.tta_temporal_mode = false; + rife_config.uhd_mode = arguments.rife_uhd_mode; + rife_config.num_threads = 0; + rife_config.model_name = arguments.rife_model_name; + proc_cfg.config = rife_config; } // Setup encoder configuration - EncoderConfig encoder_config; - encoder_config.codec = codec->id; - encoder_config.copy_streams = !arguments.no_copy_streams; - encoder_config.width = 0; - encoder_config.height = 0; - encoder_config.pix_fmt = pix_fmt; - encoder_config.bit_rate = arguments.bit_rate; - encoder_config.rc_buffer_size = arguments.rc_buffer_size; - encoder_config.rc_max_rate = arguments.rc_max_rate; - encoder_config.rc_min_rate = arguments.rc_min_rate; - encoder_config.qmin = arguments.qmin; - encoder_config.qmax = arguments.qmax; - encoder_config.gop_size = arguments.gop_size; - encoder_config.max_b_frames = arguments.max_b_frames; - encoder_config.keyint_min = arguments.keyint_min; - encoder_config.refs = arguments.refs; - encoder_config.thread_count = arguments.thread_count; - encoder_config.delay = arguments.delay; + EncoderConfig enc_cfg; + enc_cfg.codec = codec->id; + enc_cfg.copy_streams = !arguments.no_copy_streams; + enc_cfg.width = 0; + enc_cfg.height = 0; + enc_cfg.pix_fmt = pix_fmt; + enc_cfg.bit_rate = arguments.bit_rate; + enc_cfg.rc_buffer_size = arguments.rc_buffer_size; + enc_cfg.rc_max_rate = arguments.rc_max_rate; + enc_cfg.rc_min_rate = arguments.rc_min_rate; + enc_cfg.qmin = arguments.qmin; + enc_cfg.qmax = arguments.qmax; + enc_cfg.gop_size = arguments.gop_size; + enc_cfg.max_b_frames = arguments.max_b_frames; + enc_cfg.keyint_min = arguments.keyint_min; + enc_cfg.refs = arguments.refs; + enc_cfg.thread_count = arguments.thread_count; + enc_cfg.delay = arguments.delay; + enc_cfg.extra_opts = arguments.extra_encoder_opts; - // Handle extra AVOptions - encoder_config.nb_extra_options = arguments.extra_options.size(); - encoder_config.extra_options = static_cast(malloc( - static_cast(encoder_config.nb_extra_options + 1) * - sizeof(encoder_config.extra_options[0]) - )); - if (encoder_config.extra_options == nullptr) { - spdlog::critical("Failed to allocate memory for extra AVOptions."); - return 1; - } - - // Copy extra AVOptions to the encoder configuration - for (size_t i = 0; i < encoder_config.nb_extra_options; i++) { - const std::string key = wstring_to_utf8(arguments.extra_options[i].first); - const std::string value = wstring_to_utf8(arguments.extra_options[i].second); - encoder_config.extra_options[i].key = strdup(key.c_str()); - encoder_config.extra_options[i].value = strdup(value.c_str()); - } - - // Custom deleter for extra AVOptions - auto extra_options_deleter = [&](decltype(encoder_config.extra_options) *extra_options_ptr) { - auto extra_options = *extra_options_ptr; - for (size_t i = 0; i < encoder_config.nb_extra_options; i++) { - free(const_cast(extra_options[i].key)); - free(const_cast(extra_options[i].value)); - } - free(extra_options); - *extra_options_ptr = nullptr; - }; - - // Define a unique_ptr to automatically free extra_options - std::unique_ptr - extra_options_guard(&encoder_config.extra_options, extra_options_deleter); + // Setup hardware configuration + HardwareConfig hw_cfg; + hw_cfg.hw_device_type = AV_HWDEVICE_TYPE_NONE; + hw_cfg.vk_device_index = arguments.vk_device_index; // Parse hardware acceleration method - enum AVHWDeviceType hw_device_type = AV_HWDEVICE_TYPE_NONE; if (arguments.hwaccel != STR("none")) { - hw_device_type = av_hwdevice_find_type_by_name(wstring_to_utf8(arguments.hwaccel).c_str()); - if (hw_device_type == AV_HWDEVICE_TYPE_NONE) { + hw_cfg.hw_device_type = + av_hwdevice_find_type_by_name(wstring_to_u8string(arguments.hwaccel).c_str()); + if (hw_cfg.hw_device_type == AV_HWDEVICE_TYPE_NONE) { spdlog::critical( - "Invalid hardware device type '{}'.", wstring_to_utf8(arguments.hwaccel) + "Invalid hardware device type '{}'.", wstring_to_u8string(arguments.hwaccel) ); return 1; } @@ -819,13 +786,7 @@ int main(int argc, char **argv) { // Create a thread for video processing int proc_ret = 0; std::thread processing_thread( - process_video_thread, - &arguments, - &proc_ret, - hw_device_type, - &processor_config, - &encoder_config, - &proc_ctx + process_video_thread, &arguments, &proc_ret, hw_cfg, proc_cfg, enc_cfg, &proc_ctx ); spdlog::info("Press [space] to pause/resume, [q] to abort.");