feat(ns): improve optimization flags and add namespaces (#1261)
Some checks failed
Build / ubuntu (push) Has been cancelled
Build / windows (push) Has been cancelled
Build / container (push) Has been cancelled

Signed-off-by: k4yt3x <i@k4yt3x.com>
This commit is contained in:
K4YT3X 2024-12-17 16:24:51 +00:00 committed by GitHub
parent 5884dd1ba4
commit ae2d5d32e4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
35 changed files with 424 additions and 243 deletions

View File

@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]
### Changed
- Improve optimization flags and add namespaces for better code organization.
### Fixed
- Make the encoder always use the calculated PTS with corrected math.

View File

@ -1,6 +1,7 @@
cmake_minimum_required(VERSION 3.10)
project(video2x VERSION 6.2.0 LANGUAGES CXX)
# The FindBoost module is removed in CMake 3.30
if(POLICY CMP0167)
cmake_policy(SET CMP0167 NEW)
endif()
@ -15,24 +16,24 @@ if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release)
endif()
# Set the default optimization flags for Release builds
if(CMAKE_BUILD_TYPE STREQUAL "Release")
if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Ox /GL /LTCG /MD /DNDEBUG")
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3 -march=native -flto")
set(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} -s")
set(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} -s")
endif()
endif()
# Set global compile options for all targets
if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
add_compile_options(/W4 /permissive-)
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
elseif(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
add_compile_options(-Wall -Wextra -Wpedantic -Wconversion -Wshadow)
endif()
# Set the default optimization flags for Release builds
if(CMAKE_BUILD_TYPE STREQUAL "Release")
if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
add_compile_options(/Ox /Ot /GL /DNDEBUG)
add_link_options(/LTCG /OPT:REF /OPT:ICF)
elseif(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
add_compile_options(-O3 -march=native -ffunction-sections -fdata-sections)
add_link_options(-Wl,-s -flto -Wl,--gc-sections)
endif()
endif()
# Build options
option(BUILD_SHARED_LIBS "Build libvideo2x as a shared library" ON)
option(BUILD_VIDEO2X_CLI "Build the video2x executable" ON)
@ -48,8 +49,10 @@ configure_file(
)
# Find the required packages
set(ALL_INCLUDE_DIRS)
set(ALL_LIBRARIES)
set(LIBVIDEO2X_INCLUDE_DIRS)
set(LIBVIDEO2X_LIBS)
set(VIDEO2X_INCLUDE_DIRS)
set(VIDEO2X_LIBS)
# Platform-specific dependencies
if(WIN32)
@ -66,11 +69,12 @@ if(WIN32)
${FFMPEG_BASE_PATH}/lib/avutil.lib
${FFMPEG_BASE_PATH}/lib/swscale.lib
)
list(APPEND ALL_LIBRARIES ${FFMPEG_LIB})
list(APPEND ALL_INCLUDE_DIRS ${FFMPEG_BASE_PATH}/include)
list(APPEND LIBVIDEO2X_LIBS ${FFMPEG_LIB})
list(APPEND LIBVIDEO2X_INCLUDE_DIRS ${FFMPEG_BASE_PATH}/include)
list(APPEND VIDEO2X_LIBS ${FFMPEG_LIB})
list(APPEND VIDEO2X_INCLUDE_DIRS ${FFMPEG_BASE_PATH}/include)
# ncnn
# TODO: Figure out why this file is not being copied to the install directory
set(SPIRV_BUILD_PATH
${CMAKE_BINARY_DIR}/realesrgan-prefix/src/realesrgan-build/ncnn/glslang/SPIRV
)
@ -80,11 +84,8 @@ if(WIN32)
set(SPIRV_LIB ${SPIRV_BUILD_PATH}/Debug/SPIRVd.lib)
endif()
list(APPEND ALL_LIBRARIES
${NCNN_BASE_PATH}/lib/ncnn.lib
${SPIRV_LIB}
)
list(APPEND ALL_INCLUDE_DIRS ${NCNN_BASE_PATH}/include/ncnn)
list(APPEND LIBVIDEO2X_LIBS ${NCNN_BASE_PATH}/lib/ncnn.lib ${SPIRV_LIB})
list(APPEND LIBVIDEO2X_INCLUDE_DIRS ${NCNN_BASE_PATH}/include/ncnn)
else()
# FFmpeg
find_package(PkgConfig REQUIRED)
@ -98,13 +99,13 @@ else()
)
# Loop through each package to find and collect include dirs and libraries
set(FFMPEG_LIB)
foreach(PKG ${FFMPEG_REQUIRED_PKGS})
pkg_check_modules(${PKG} REQUIRED ${PKG})
list(APPEND ALL_INCLUDE_DIRS ${${PKG}_INCLUDE_DIRS})
list(APPEND FFMPEG_LIB ${${PKG}_LIBRARIES})
list(APPEND LIBVIDEO2X_INCLUDE_DIRS ${${PKG}_INCLUDE_DIRS})
list(APPEND LIBVIDEO2X_LIBS ${${PKG}_LIBRARIES})
list(APPEND VIDEO2X_INCLUDE_DIRS ${${PKG}_INCLUDE_DIRS})
list(APPEND VIDEO2X_LIBS ${${PKG}_LIBRARIES})
endforeach()
list(APPEND ALL_LIBRARIES ${FFMPEG_LIB})
endif() # WIN32
# Find ncnn package
@ -209,32 +210,38 @@ endif()
# spdlog
if(USE_SYSTEM_SPDLOG)
find_package(spdlog REQUIRED)
list(APPEND ALL_INCLUDE_DIRS ${spdlog_INCLUDE_DIRS})
list(APPEND LIBVIDEO2X_INCLUDE_DIRS ${spdlog_INCLUDE_DIRS})
list(APPEND VIDEO2X_INCLUDE_DIRS ${spdlog_INCLUDE_DIRS})
set(SPDLOG_LIB spdlog::spdlog)
else()
add_subdirectory(third_party/spdlog)
set(SPDLOG_LIB spdlog::spdlog_header_only)
endif()
list(APPEND ALL_LIBRARIES ${SPDLOG_LIB})
list(APPEND LIBVIDEO2X_LIBS ${SPDLOG_LIB})
list(APPEND VIDEO2X_LIBS ${SPDLOG_LIB})
# Boost
if(USE_SYSTEM_BOOST)
# Find dependencies required for the CLI
if(BUILD_VIDEO2X_CLI)
# Vulkan
find_package(Vulkan REQUIRED)
list(APPEND VIDEO2X_LIBS Vulkan::Vulkan)
# Boost
if(USE_SYSTEM_BOOST)
find_package(Boost REQUIRED COMPONENTS program_options)
list(APPEND ALL_INCLUDE_DIRS ${Boost_INCLUDE_DIRS})
else()
list(APPEND LIBVIDEO2X_INCLUDE_DIRS ${Boost_INCLUDE_DIRS})
else()
option(Boost_USE_STATIC_LIBS "" ON)
option(Boost_USE_STATIC_RUNTIME "" ON)
option(Boost_COMPONENTS "program_options")
add_subdirectory(third_party/boost)
include_directories(${PROJECT_SOURCE_DIR}/third_party/boost/libs/program_options/include)
set(BOOST_BASE_PATH ${CMAKE_BINARY_DIR}/third_party/boost/libs/program_options/${CMAKE_BUILD_TYPE})
endif()
set(BOOST_LIB Boost::program_options)
if(BUILD_VIDEO2X_CLI)
find_package(Vulkan REQUIRED)
set(VULKAN_LIB Vulkan::Vulkan)
set(BOOST_BASE_PATH
${CMAKE_BINARY_DIR}/third_party/boost/libs/program_options/${CMAKE_BUILD_TYPE}
)
endif()
list(APPEND VIDEO2X_LIBS Boost::program_options)
endif()
# Include ExternalProject module
@ -267,8 +274,10 @@ ExternalProject_Add(
)
# Remove duplicate entries
list(REMOVE_DUPLICATES ALL_INCLUDE_DIRS)
list(REMOVE_DUPLICATES ALL_LIBRARIES)
list(REMOVE_DUPLICATES LIBVIDEO2X_INCLUDE_DIRS)
list(REMOVE_DUPLICATES LIBVIDEO2X_LIBS)
list(REMOVE_DUPLICATES VIDEO2X_INCLUDE_DIRS)
list(REMOVE_DUPLICATES VIDEO2X_LIBS)
# Create the shared library 'libvideo2x'
file(GLOB LIBVIDEO2X_SOURCES src/*.cpp)
@ -285,7 +294,7 @@ add_dependencies(libvideo2x realesrgan rife)
# Include directories for the shared library
target_include_directories(libvideo2x PRIVATE
${ALL_INCLUDE_DIRS}
${LIBVIDEO2X_INCLUDE_DIRS}
${CMAKE_CURRENT_BINARY_DIR}
${PROJECT_SOURCE_DIR}/include
${PROJECT_SOURCE_DIR}/include/libvideo2x
@ -294,11 +303,7 @@ target_include_directories(libvideo2x PRIVATE
)
# Compile options for the shared library
target_compile_options(libvideo2x PRIVATE
-fPIC
$<$<CONFIG:Release>:-Ofast>
$<$<CONFIG:Debug>:-g -DDEBUG>
)
target_compile_options(libvideo2x PRIVATE -fPIC $<$<CONFIG:Debug>:-g -DDEBUG>)
# Define the paths to the shared libraries
if(WIN32)
@ -308,10 +313,10 @@ else()
set(REALESRGAN_LIB ${CMAKE_BINARY_DIR}/realesrgan_install/lib/librealesrgan-ncnn-vulkan.so)
set(RIFE_LIB ${CMAKE_BINARY_DIR}/rife_install/lib/librife-ncnn-vulkan.so)
endif()
list(APPEND ALL_LIBRARIES ${REALESRGAN_LIB} ${RIFE_LIB})
list(APPEND LIBVIDEO2X_LIBS ${REALESRGAN_LIB} ${RIFE_LIB})
# Link the shared library with the dependencies
target_link_libraries(libvideo2x PRIVATE ${ALL_LIBRARIES})
target_link_libraries(libvideo2x PRIVATE ${LIBVIDEO2X_LIBS})
if(NOT WIN32)
if(USE_SYSTEM_NCNN)
@ -329,7 +334,7 @@ if(BUILD_VIDEO2X_CLI)
# Include directories for the executable
target_include_directories(video2x PRIVATE
${ALL_INCLUDE_DIRS}
${VIDEO2X_INCLUDE_DIRS}
${CMAKE_CURRENT_BINARY_DIR}
${PROJECT_SOURCE_DIR}/include
${PROJECT_SOURCE_DIR}/tools/video2x/include
@ -339,13 +344,7 @@ if(BUILD_VIDEO2X_CLI)
target_compile_options(video2x PRIVATE $<$<CONFIG:Debug>:-g -DDEBUG>)
# Link the executable with the shared library
target_link_libraries(video2x PRIVATE
libvideo2x
${FFMPEG_LIB}
${SPDLOG_LIB}
${BOOST_LIB}
${VULKAN_LIB}
)
target_link_libraries(video2x PRIVATE libvideo2x ${VIDEO2X_LIBS})
endif()
# Define the default installation directories

View File

@ -4,6 +4,9 @@ extern "C" {
#include <libavformat/avformat.h>
}
namespace video2x {
namespace avutils {
AVRational get_video_frame_rate(AVFormatContext *ifmt_ctx, int in_vstream_idx);
int64_t get_video_frame_count(AVFormatContext *ifmt_ctx, int in_vstream_idx);
@ -17,3 +20,6 @@ void av_bufferref_deleter(AVBufferRef *bufferref);
void av_frame_deleter(AVFrame *frame);
void av_packet_deleter(AVPacket *packet);
} // namespace avutils
} // namespace video2x

View File

@ -7,6 +7,9 @@ extern "C" {
#include <mat.h>
namespace video2x {
namespace conversions {
// Convert AVFrame to another pixel format
AVFrame *convert_avframe_pix_fmt(AVFrame *src_frame, AVPixelFormat pix_fmt);
@ -15,3 +18,6 @@ ncnn::Mat avframe_to_ncnn_mat(AVFrame *frame);
// Convert ncnn::Mat to AVFrame
AVFrame *ncnn_mat_to_avframe(const ncnn::Mat &mat, AVPixelFormat pix_fmt);
} // namespace conversions
} // namespace video2x

View File

@ -7,6 +7,9 @@ extern "C" {
#include <libavformat/avformat.h>
}
namespace video2x {
namespace decoder {
class Decoder {
public:
Decoder();
@ -26,3 +29,6 @@ class Decoder {
AVCodecContext *dec_ctx_;
int in_vstream_idx_;
};
} // namespace decoder
} // namespace video2x

View File

@ -12,6 +12,9 @@ extern "C" {
#include "fsutils.h"
namespace video2x {
namespace encoder {
// Encoder configurations
struct EncoderConfig {
// Non-AVCodecContext options
@ -42,7 +45,7 @@ struct EncoderConfig {
int delay = -1;
// Extra AVOptions
std::vector<std::pair<StringType, StringType>> extra_opts;
std::vector<std::pair<fsutils::StringType, fsutils::StringType>> extra_opts;
};
class Encoder {
@ -76,3 +79,6 @@ class Encoder {
int out_vstream_idx_;
int *stream_map_;
};
} // namespace encoder
} // namespace video2x

View File

@ -10,6 +10,9 @@ extern "C" {
#include "processor.h"
namespace video2x {
namespace processors {
// FilterLibplacebo class definition
class FilterLibplacebo : public Filter {
public:
@ -56,3 +59,6 @@ class FilterLibplacebo : public Filter {
AVRational in_time_base_;
AVRational out_time_base_;
};
} // namespace processors
} // namespace video2x

View File

@ -7,6 +7,9 @@ extern "C" {
#include "processor.h"
#include "realesrgan.h"
namespace video2x {
namespace processors {
// FilterRealesrgan class definition
class FilterRealesrgan : public Filter {
public:
@ -15,7 +18,7 @@ class FilterRealesrgan : public Filter {
int gpuid = 0,
bool tta_mode = false,
int scaling_factor = 4,
const StringType model_name = STR("realesr-animevideov3")
const fsutils::StringType model_name = STR("realesr-animevideov3")
);
// Destructor
@ -44,8 +47,11 @@ class FilterRealesrgan : public Filter {
int gpuid_;
bool tta_mode_;
int scaling_factor_;
const StringType model_name_;
const fsutils::StringType model_name_;
AVRational in_time_base_;
AVRational out_time_base_;
AVPixelFormat out_pix_fmt_;
};
} // namespace processors
} // namespace video2x

View File

@ -3,6 +3,9 @@
#include <filesystem>
#include <string>
namespace video2x {
namespace fsutils {
#ifdef _WIN32
typedef wchar_t CharType;
#define STR(x) L##x
@ -23,8 +26,11 @@ std::filesystem::path find_resource_file(const std::filesystem::path &path);
std::string path_to_u8string(const std::filesystem::path &path);
std::string wstring_to_u8string(const StringType &wstr);
std::string wstring_to_u8string(const fsutils::StringType &wstr);
StringType path_to_string_type(const std::filesystem::path &path);
fsutils::StringType path_to_string_type(const std::filesystem::path &path);
StringType to_string_type(int value);
fsutils::StringType to_string_type(int value);
} // namespace fsutils
} // namespace video2x

View File

@ -7,6 +7,9 @@ extern "C" {
#include "processor.h"
#include "rife.h"
namespace video2x {
namespace processors {
// InterpolatorRIFE class definition
class InterpolatorRIFE : public Interpolator {
public:
@ -17,7 +20,7 @@ class InterpolatorRIFE : public Interpolator {
bool tta_temporal_mode = false,
bool uhd_mode = false,
int num_threads = 1,
const StringType model_name = STR("rife-v4.6")
const fsutils::StringType model_name = STR("rife-v4.6")
);
// Destructor
@ -49,8 +52,11 @@ class InterpolatorRIFE : public Interpolator {
bool tta_temporal_mode_;
bool uhd_mode_;
int num_threads_;
const StringType model_name_;
const fsutils::StringType model_name_;
AVRational in_time_base_;
AVRational out_time_base_;
AVPixelFormat out_pix_fmt_;
};
} // namespace processors
} // namespace video2x

View File

@ -7,6 +7,9 @@ extern "C" {
#include <libavfilter/avfilter.h>
}
namespace video2x {
namespace processors {
int init_libplacebo(
AVFilterGraph **filter_graph,
AVFilterContext **buffersrc_ctx,
@ -17,3 +20,6 @@ int init_libplacebo(
uint32_t vk_device_index,
const std::filesystem::path &shader_path
);
} // namespace processors
} // namespace video2x

View File

@ -12,7 +12,7 @@ extern "C" {
#include "avutils.h"
#include "decoder.h"
#include "encoder.h"
#include "logging.h"
#include "logutils.h"
#include "processor.h"
#ifdef _WIN32
@ -25,6 +25,8 @@ extern "C" {
#define LIBVIDEO2X_API
#endif
namespace video2x {
enum class VideoProcessorState {
Idle,
Running,
@ -37,11 +39,11 @@ enum class VideoProcessorState {
class LIBVIDEO2X_API VideoProcessor {
public:
VideoProcessor(
const ProcessorConfig proc_cfg,
const EncoderConfig enc_cfg,
const processors::ProcessorConfig proc_cfg,
const encoder::EncoderConfig enc_cfg,
const uint32_t vk_device_idx = 0,
const AVHWDeviceType hw_device_type = AV_HWDEVICE_TYPE_NONE,
const Video2xLogLevel = Video2xLogLevel::Info,
const logutils::Video2xLogLevel = logutils::Video2xLogLevel::Info,
const bool benchmark = false
);
@ -59,10 +61,13 @@ class LIBVIDEO2X_API VideoProcessor {
int64_t get_total_frames() const { return total_frames_.load(); }
private:
[[nodiscard]] int
process_frames(Decoder &decoder, Encoder &encoder, std::unique_ptr<Processor> &processor);
[[nodiscard]] int process_frames(
decoder::Decoder &decoder,
encoder::Encoder &encoder,
std::unique_ptr<processors::Processor> &processor
);
[[nodiscard]] int write_frame(AVFrame *frame, Encoder &encoder);
[[nodiscard]] int write_frame(AVFrame *frame, encoder::Encoder &encoder);
[[nodiscard]] inline int write_raw_packet(
AVPacket *packet,
@ -72,22 +77,22 @@ class LIBVIDEO2X_API VideoProcessor {
);
[[nodiscard]] inline int process_filtering(
std::unique_ptr<Processor> &processor,
Encoder &encoder,
std::unique_ptr<processors::Processor> &processor,
encoder::Encoder &encoder,
AVFrame *frame,
AVFrame *proc_frame
);
[[nodiscard]] inline int process_interpolation(
std::unique_ptr<Processor> &processor,
Encoder &encoder,
std::unique_ptr<AVFrame, decltype(&av_frame_deleter)> &prev_frame,
std::unique_ptr<processors::Processor> &processor,
encoder::Encoder &encoder,
std::unique_ptr<AVFrame, decltype(&avutils::av_frame_deleter)> &prev_frame,
AVFrame *frame,
AVFrame *proc_frame
);
ProcessorConfig proc_cfg_;
EncoderConfig enc_cfg_;
processors::ProcessorConfig proc_cfg_;
encoder::EncoderConfig enc_cfg_;
uint32_t vk_device_idx_ = 0;
AVHWDeviceType hw_device_type_ = AV_HWDEVICE_TYPE_NONE;
bool benchmark_ = false;
@ -96,3 +101,5 @@ class LIBVIDEO2X_API VideoProcessor {
std::atomic<int64_t> frame_idx_ = 0;
std::atomic<int64_t> total_frames_ = 0;
};
} // namespace video2x

View File

@ -4,6 +4,9 @@
#include "fsutils.h"
namespace video2x {
namespace logutils {
enum class Video2xLogLevel {
Unknown,
Trace,
@ -17,4 +20,9 @@ enum class Video2xLogLevel {
void set_log_level(Video2xLogLevel log_level);
std::optional<Video2xLogLevel> find_log_level_by_name(const StringType &log_level_name);
std::optional<Video2xLogLevel> find_log_level_by_name(
const fsutils::StringType &log_level_name
);
} // namespace logutils
} // namespace video2x

View File

@ -11,6 +11,9 @@ extern "C" {
#include "fsutils.h"
namespace video2x {
namespace processors {
enum class ProcessingMode {
Filter,
Interpolate,
@ -24,12 +27,12 @@ enum class ProcessorType {
};
struct LibplaceboConfig {
StringType shader_path;
fsutils::StringType shader_path;
};
struct RealESRGANConfig {
bool tta_mode = false;
StringType model_name;
fsutils::StringType model_name;
};
struct RIFEConfig {
@ -37,7 +40,7 @@ struct RIFEConfig {
bool tta_temporal_mode = false;
bool uhd_mode = false;
int num_threads = 0;
StringType model_name;
fsutils::StringType model_name;
};
// Unified filter configuration
@ -81,3 +84,6 @@ class Interpolator : public Processor {
virtual int
interpolate(AVFrame *prev_frame, AVFrame *in_frame, AVFrame **out_frame, float time_step) = 0;
};
} // namespace processors
} // namespace video2x

View File

@ -6,6 +6,9 @@
#include "processor.h"
namespace video2x {
namespace processors {
// Processor Factory Class
class ProcessorFactory {
public:
@ -31,3 +34,6 @@ class ProcessorFactory {
// Static initializer for default processors
static void init_default_processors(ProcessorFactory &factory);
};
} // namespace processors
} // namespace video2x

View File

@ -1,3 +1,7 @@
#pragma once
namespace video2x {
#define LIBVIDEO2X_VERSION_STRING "@PROJECT_VERSION@"
} // namespace video2x

View File

@ -11,6 +11,9 @@ extern "C" {
#include "conversions.h"
namespace video2x {
namespace avutils {
AVRational get_video_frame_rate(AVFormatContext *ifmt_ctx, int in_vstream_idx) {
AVRational frame_rate = ifmt_ctx->streams[in_vstream_idx]->avg_frame_rate;
if (frame_rate.num == 0 && frame_rate.den == 0) {
@ -147,8 +150,8 @@ float get_frame_diff(AVFrame *frame1, AVFrame *frame2) {
// Convert both frames to the target pixel format using the provided function
AVPixelFormat target_pix_fmt = AV_PIX_FMT_RGB24;
AVFrame *rgb_frame1 = convert_avframe_pix_fmt(frame1, target_pix_fmt);
AVFrame *rgb_frame2 = convert_avframe_pix_fmt(frame2, target_pix_fmt);
AVFrame *rgb_frame1 = conversions::convert_avframe_pix_fmt(frame1, target_pix_fmt);
AVFrame *rgb_frame2 = conversions::convert_avframe_pix_fmt(frame2, target_pix_fmt);
if (!rgb_frame1 || !rgb_frame2) {
spdlog::error("Failed to convert frames to target pixel format");
@ -208,3 +211,6 @@ void av_packet_deleter(AVPacket *packet) {
packet = nullptr;
}
}
} // namespace avutils
} // namespace video2x

View File

@ -5,6 +5,9 @@
#include <spdlog/spdlog.h>
namespace video2x {
namespace conversions {
// Convert AVFrame format
AVFrame *convert_avframe_pix_fmt(AVFrame *src_frame, AVPixelFormat pix_fmt) {
AVFrame *dst_frame = av_frame_alloc();
@ -195,3 +198,6 @@ AVFrame *ncnn_mat_to_avframe(const ncnn::Mat &mat, AVPixelFormat pix_fmt) {
return dst_frame;
}
} // namespace conversions
} // namespace video2x

View File

@ -2,6 +2,9 @@
#include <spdlog/spdlog.h>
namespace video2x {
namespace decoder {
AVPixelFormat Decoder::hw_pix_fmt_ = AV_PIX_FMT_NONE;
Decoder::Decoder() : fmt_ctx_(nullptr), dec_ctx_(nullptr), in_vstream_idx_(-1) {}
@ -130,3 +133,6 @@ AVCodecContext *Decoder::get_codec_context() const {
int Decoder::get_video_stream_index() const {
return in_vstream_idx_;
}
} // namespace decoder
} // namespace video2x

View File

@ -9,6 +9,9 @@ extern "C" {
#include "avutils.h"
#include "conversions.h"
namespace video2x {
namespace encoder {
Encoder::Encoder()
: ofmt_ctx_(nullptr), enc_ctx_(nullptr), out_vstream_idx_(-1), stream_map_(nullptr) {}
@ -116,7 +119,7 @@ int Encoder::init(
enc_ctx_->pix_fmt = enc_cfg.pix_fmt;
} else {
// Automatically select the pixel format
enc_ctx_->pix_fmt = get_encoder_default_pix_fmt(encoder, dec_ctx->pix_fmt);
enc_ctx_->pix_fmt = avutils::get_encoder_default_pix_fmt(encoder, dec_ctx->pix_fmt);
if (enc_ctx_->pix_fmt == AV_PIX_FMT_NONE) {
spdlog::error("Could not get the default pixel format for the encoder");
return AVERROR(EINVAL);
@ -125,7 +128,7 @@ int Encoder::init(
}
if (frm_rate_mul > 0) {
AVRational in_frame_rate = get_video_frame_rate(ifmt_ctx, in_vstream_idx);
AVRational in_frame_rate = avutils::get_video_frame_rate(ifmt_ctx, in_vstream_idx);
enc_ctx_->framerate = {in_frame_rate.num * frm_rate_mul, in_frame_rate.den};
enc_ctx_->time_base = av_inv_q(enc_ctx_->framerate);
} else {
@ -146,8 +149,8 @@ int Encoder::init(
// Set extra AVOptions
for (const auto &[opt_name, opt_value] : enc_cfg.extra_opts) {
std::string opt_name_str = wstring_to_u8string(opt_name);
std::string opt_value_str = wstring_to_u8string(opt_value);
std::string opt_name_str = fsutils::wstring_to_u8string(opt_name);
std::string opt_value_str = fsutils::wstring_to_u8string(opt_value);
spdlog::debug("Setting encoder option '{}' to '{}'", opt_name_str, opt_value_str);
if (av_opt_set(enc_ctx_->priv_data, opt_name_str.c_str(), opt_value_str.c_str(), 0) < 0) {
@ -262,7 +265,7 @@ int Encoder::write_frame(AVFrame *frame, int64_t frame_idx) {
// Convert the frame to the encoder's pixel format if needed
if (frame->format != enc_ctx_->pix_fmt) {
converted_frame = convert_avframe_pix_fmt(frame, enc_ctx_->pix_fmt);
converted_frame = conversions::convert_avframe_pix_fmt(frame, enc_ctx_->pix_fmt);
if (!converted_frame) {
spdlog::error("Error converting frame to encoder's pixel format");
return AVERROR_EXTERNAL;
@ -384,3 +387,6 @@ int Encoder::get_output_video_stream_index() const {
int *Encoder::get_stream_map() const {
return stream_map_;
}
} // namespace encoder
} // namespace video2x

View File

@ -7,6 +7,9 @@
#include "fsutils.h"
#include "libplacebo.h"
namespace video2x {
namespace processors {
FilterLibplacebo::FilterLibplacebo(
uint32_t vk_device_index,
const std::filesystem::path &shader_path,
@ -39,14 +42,14 @@ FilterLibplacebo::~FilterLibplacebo() {
int FilterLibplacebo::init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx, AVBufferRef *) {
// Construct the shader path
std::filesystem::path shader_full_path;
if (filepath_is_readable(shader_path_)) {
if (fsutils::filepath_is_readable(shader_path_)) {
// If the shader path is directly readable, use it
shader_full_path = shader_path_;
} else {
// Construct the fallback path using std::filesystem
shader_full_path = find_resource_file(
shader_full_path = fsutils::find_resource_file(
std::filesystem::path(STR("models")) / STR("libplacebo") /
(path_to_string_type(shader_path_) + STR(".glsl"))
(fsutils::path_to_string_type(shader_path_) + STR(".glsl"))
);
}
@ -156,3 +159,6 @@ void FilterLibplacebo::get_output_dimensions(
out_width = proc_cfg.width;
out_height = proc_cfg.height;
}
} // namespace processors
} // namespace video2x

View File

@ -9,11 +9,14 @@
#include "conversions.h"
#include "fsutils.h"
namespace video2x {
namespace processors {
FilterRealesrgan::FilterRealesrgan(
int gpuid,
bool tta_mode,
int scaling_factor,
const StringType model_name
const fsutils::StringType model_name
)
: realesrgan_(nullptr),
gpuid_(gpuid),
@ -33,18 +36,20 @@ int FilterRealesrgan::init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx, AVB
std::filesystem::path model_param_path;
std::filesystem::path model_bin_path;
StringType param_file_name =
model_name_ + STR("-x") + to_string_type(scaling_factor_) + STR(".param");
StringType bin_file_name =
model_name_ + STR("-x") + to_string_type(scaling_factor_) + STR(".bin");
fsutils::StringType param_file_name =
model_name_ + STR("-x") + fsutils::to_string_type(scaling_factor_) + STR(".param");
fsutils::StringType bin_file_name =
model_name_ + STR("-x") + fsutils::to_string_type(scaling_factor_) + STR(".bin");
// Find the model paths by model name if provided
model_param_path = std::filesystem::path(STR("models")) / STR("realesrgan") / param_file_name;
model_bin_path = std::filesystem::path(STR("models")) / STR("realesrgan") / bin_file_name;
// Get the full paths using a function that possibly modifies or validates the path
std::filesystem::path model_param_full_path = find_resource_file(model_param_path);
std::filesystem::path model_bin_full_path = find_resource_file(model_bin_path);
std::filesystem::path model_param_full_path =
fsutils::find_resource_file(model_param_path);
std::filesystem::path model_bin_full_path =
fsutils::find_resource_file(model_bin_path);
// Check if the model files exist
if (!std::filesystem::exists(model_param_full_path)) {
@ -93,7 +98,7 @@ int FilterRealesrgan::filter(AVFrame *in_frame, AVFrame **out_frame) {
int ret;
// Convert the input frame to RGB24
ncnn::Mat in_mat = avframe_to_ncnn_mat(in_frame);
ncnn::Mat in_mat = conversions::avframe_to_ncnn_mat(in_frame);
if (in_mat.empty()) {
spdlog::error("Failed to convert AVFrame to ncnn::Mat");
return -1;
@ -111,7 +116,7 @@ int FilterRealesrgan::filter(AVFrame *in_frame, AVFrame **out_frame) {
}
// Convert ncnn::Mat to AVFrame
*out_frame = ncnn_mat_to_avframe(out_mat, out_pix_fmt_);
*out_frame = conversions::ncnn_mat_to_avframe(out_mat, out_pix_fmt_);
// Rescale PTS to encoder's time base
(*out_frame)->pts = av_rescale_q(in_frame->pts, in_time_base_, out_time_base_);
@ -130,3 +135,6 @@ void FilterRealesrgan::get_output_dimensions(
out_width = in_width * scaling_factor_;
out_height = in_height * scaling_factor_;
}
} // namespace processors
} // namespace video2x

View File

@ -10,6 +10,9 @@
#include <spdlog/spdlog.h>
namespace video2x {
namespace fsutils {
#if _WIN32
static std::filesystem::path get_executable_directory() {
std::vector<wchar_t> filepath(MAX_PATH);
@ -120,7 +123,7 @@ std::string wstring_to_u8string(const std::string &str) {
}
#endif
StringType path_to_string_type(const std::filesystem::path &path) {
fsutils::StringType path_to_string_type(const std::filesystem::path &path) {
#if _WIN32
return path.wstring();
#else
@ -128,10 +131,13 @@ StringType path_to_string_type(const std::filesystem::path &path) {
#endif
}
StringType to_string_type(int value) {
fsutils::StringType to_string_type(int value) {
#if _WIN32
return std::to_wstring(value);
#else
return std::to_string(value);
#endif
}
} // namespace fsutils
} // namespace video2x

View File

@ -8,13 +8,16 @@
#include "conversions.h"
#include "fsutils.h"
namespace video2x {
namespace processors {
InterpolatorRIFE::InterpolatorRIFE(
int gpuid,
bool tta_mode,
bool tta_temporal_mode,
bool uhd_mode,
int num_threads,
const StringType model_name
const fsutils::StringType model_name
)
: rife_(nullptr),
gpuid_(gpuid),
@ -39,7 +42,8 @@ int InterpolatorRIFE::init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx, AVB
model_param_dir = std::filesystem::path(STR("models")) / STR("rife") / model_name_;
// Get the full paths using a function that possibly modifies or validates the path
std::filesystem::path model_param_full_path = find_resource_file(model_param_dir);
std::filesystem::path model_param_full_path =
fsutils::find_resource_file(model_param_dir);
// Check if the model files exist
if (!std::filesystem::exists(model_param_full_path)) {
@ -50,13 +54,13 @@ int InterpolatorRIFE::init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx, AVB
// Automatically infer the RIFE model generation based on the model name
bool rife_v2 = false;
bool rife_v4 = false;
if (model_name_.find(STR("rife-v2")) != StringType::npos) {
if (model_name_.find(STR("rife-v2")) != fsutils::StringType::npos) {
rife_v2 = true;
} else if (model_name_.find(STR("rife-v3")) != StringType::npos) {
} else if (model_name_.find(STR("rife-v3")) != fsutils::StringType::npos) {
rife_v2 = true;
} else if (model_name_.find(STR("rife-v4")) != StringType::npos) {
} else if (model_name_.find(STR("rife-v4")) != fsutils::StringType::npos) {
rife_v4 = true;
} else if (model_name_.find(STR("rife")) == StringType::npos) {
} else if (model_name_.find(STR("rife")) == fsutils::StringType::npos) {
spdlog::critical("Failed to infer RIFE model generation from model name");
return -1;
}
@ -87,13 +91,13 @@ int InterpolatorRIFE::interpolate(
) {
int ret;
ncnn::Mat in_mat1 = avframe_to_ncnn_mat(prev_frame);
ncnn::Mat in_mat1 = conversions::avframe_to_ncnn_mat(prev_frame);
if (in_mat1.empty()) {
spdlog::error("Failed to convert AVFrame to ncnn::Mat");
return -1;
}
ncnn::Mat in_mat2 = avframe_to_ncnn_mat(in_frame);
ncnn::Mat in_mat2 = conversions::avframe_to_ncnn_mat(in_frame);
if (in_mat2.empty()) {
spdlog::error("Failed to convert AVFrame to ncnn::Mat");
return -1;
@ -109,7 +113,7 @@ int InterpolatorRIFE::interpolate(
}
// Convert ncnn::Mat to AVFrame
*out_frame = ncnn_mat_to_avframe(out_mat, out_pix_fmt_);
*out_frame = conversions::ncnn_mat_to_avframe(out_mat, out_pix_fmt_);
// Rescale PTS to encoder's time base
(*out_frame)->pts = av_rescale_q(in_frame->pts, in_time_base_, out_time_base_);
@ -128,3 +132,6 @@ void InterpolatorRIFE::get_output_dimensions(
out_width = in_width;
out_height = in_height;
}
} // namespace processors
} // namespace video2x

View File

@ -11,6 +11,9 @@ extern "C" {
#include <spdlog/spdlog.h>
namespace video2x {
namespace processors {
int init_libplacebo(
AVFilterGraph **filter_graph,
AVFilterContext **buffersrc_ctx,
@ -161,3 +164,6 @@ int init_libplacebo(
*filter_graph = graph;
return 0;
}
} // namespace processors
} // namespace video2x

View File

@ -9,16 +9,18 @@ extern "C" {
#include "avutils.h"
#include "decoder.h"
#include "encoder.h"
#include "logging.h"
#include "logutils.h"
#include "processor.h"
#include "processor_factory.h"
namespace video2x {
VideoProcessor::VideoProcessor(
const ProcessorConfig proc_cfg,
const EncoderConfig enc_cfg,
const processors::ProcessorConfig proc_cfg,
const encoder::EncoderConfig enc_cfg,
const uint32_t vk_device_idx,
const AVHWDeviceType hw_device_type,
const Video2xLogLevel log_level,
const logutils::Video2xLogLevel log_level,
const bool benchmark
)
: proc_cfg_(proc_cfg),
@ -51,8 +53,8 @@ int VideoProcessor::process(
state_.store(VideoProcessorState::Running);
// Create a smart pointer to manage the hardware device context
std::unique_ptr<AVBufferRef, decltype(&av_bufferref_deleter)> hw_ctx(
nullptr, &av_bufferref_deleter
std::unique_ptr<AVBufferRef, decltype(&avutils::av_bufferref_deleter)> hw_ctx(
nullptr, &avutils::av_bufferref_deleter
);
// Initialize hardware device context
@ -66,7 +68,7 @@ int VideoProcessor::process(
}
// Initialize input decoder
Decoder decoder;
decoder::Decoder decoder;
ret = decoder.init(hw_device_type_, hw_ctx.get(), in_fname);
if (ret < 0) {
return handle_error(ret, "Failed to initialize decoder");
@ -77,8 +79,8 @@ int VideoProcessor::process(
int in_vstream_idx = decoder.get_video_stream_index();
// Create and initialize the appropriate filter
std::unique_ptr<Processor> processor(
ProcessorFactory::instance().create_processor(proc_cfg_, vk_device_idx_)
std::unique_ptr<processors::Processor> processor(
processors::ProcessorFactory::instance().create_processor(proc_cfg_, vk_device_idx_)
);
if (processor == nullptr) {
return handle_error(-1, "Failed to create filter instance");
@ -94,7 +96,7 @@ int VideoProcessor::process(
}
// Initialize the encoder
Encoder encoder;
encoder::Encoder encoder;
ret = encoder.init(
hw_ctx.get(),
out_fname,
@ -140,9 +142,9 @@ int VideoProcessor::process(
// Process frames using the selected filter.
int VideoProcessor::process_frames(
Decoder &decoder,
Encoder &encoder,
std::unique_ptr<Processor> &processor
decoder::Decoder &decoder,
encoder::Encoder &encoder,
std::unique_ptr<processors::Processor> &processor
) {
char errbuf[AV_ERROR_MAX_STRING_SIZE];
int ret = 0;
@ -156,11 +158,13 @@ int VideoProcessor::process_frames(
// Reference to the previous frame does not require allocation
// It will be cloned from the current frame
std::unique_ptr<AVFrame, decltype(&av_frame_deleter)> prev_frame(nullptr, &av_frame_deleter);
std::unique_ptr<AVFrame, decltype(&avutils::av_frame_deleter)> prev_frame(
nullptr, &avutils::av_frame_deleter
);
// Allocate space for the decoded frames
std::unique_ptr<AVFrame, decltype(&av_frame_deleter)> frame(
av_frame_alloc(), &av_frame_deleter
std::unique_ptr<AVFrame, decltype(&avutils::av_frame_deleter)> frame(
av_frame_alloc(), &avutils::av_frame_deleter
);
if (frame == nullptr) {
spdlog::critical("Error allocating frame");
@ -168,8 +172,8 @@ int VideoProcessor::process_frames(
}
// Allocate space for the decoded packets
std::unique_ptr<AVPacket, decltype(&av_packet_deleter)> packet(
av_packet_alloc(), &av_packet_deleter
std::unique_ptr<AVPacket, decltype(&avutils::av_packet_deleter)> packet(
av_packet_alloc(), &avutils::av_packet_deleter
);
if (packet == nullptr) {
spdlog::critical("Error allocating packet");
@ -178,7 +182,7 @@ int VideoProcessor::process_frames(
// Set the total number of frames in the VideoProcessingContext
spdlog::debug("Estimating the total number of frames to process");
total_frames_ = get_video_frame_count(ifmt_ctx, in_vstream_idx);
total_frames_ = avutils::get_video_frame_count(ifmt_ctx, in_vstream_idx);
if (total_frames_ <= 0) {
spdlog::warn("Unable to determine the total number of frames");
@ -188,7 +192,7 @@ int VideoProcessor::process_frames(
}
// Set total frames for interpolation
if (processor->get_processing_mode() == ProcessingMode::Interpolate) {
if (processor->get_processing_mode() == processors::ProcessingMode::Interpolate) {
total_frames_.store(total_frames_.load() * proc_cfg_.frm_rate_mul);
}
@ -236,11 +240,11 @@ int VideoProcessor::process_frames(
// Process the frame based on the selected processing mode
AVFrame *proc_frame = nullptr;
switch (processor->get_processing_mode()) {
case ProcessingMode::Filter: {
case processors::ProcessingMode::Filter: {
ret = process_filtering(processor, encoder, frame.get(), proc_frame);
break;
}
case ProcessingMode::Interpolate: {
case processors::ProcessingMode::Interpolate: {
ret = process_interpolation(
processor, encoder, prev_frame, frame.get(), proc_frame
);
@ -276,9 +280,9 @@ int VideoProcessor::process_frames(
}
// Wrap flushed frames in unique_ptrs
std::vector<std::unique_ptr<AVFrame, decltype(&av_frame_deleter)>> flushed_frames;
std::vector<std::unique_ptr<AVFrame, decltype(&avutils::av_frame_deleter)>> flushed_frames;
for (AVFrame *raw_frame : raw_flushed_frames) {
flushed_frames.emplace_back(raw_frame, &av_frame_deleter);
flushed_frames.emplace_back(raw_frame, &avutils::av_frame_deleter);
}
// Encode and write all flushed frames
@ -301,7 +305,7 @@ int VideoProcessor::process_frames(
return ret;
}
int VideoProcessor::write_frame(AVFrame *frame, Encoder &encoder) {
int VideoProcessor::write_frame(AVFrame *frame, encoder::Encoder &encoder) {
char errbuf[AV_ERROR_MAX_STRING_SIZE];
int ret = 0;
@ -340,8 +344,8 @@ int VideoProcessor::write_raw_packet(
}
int VideoProcessor::process_filtering(
std::unique_ptr<Processor> &processor,
Encoder &encoder,
std::unique_ptr<processors::Processor> &processor,
encoder::Encoder &encoder,
AVFrame *frame,
AVFrame *proc_frame
) {
@ -349,7 +353,7 @@ int VideoProcessor::process_filtering(
int ret = 0;
// Cast the processor to a Filter
Filter *filter = static_cast<Filter *>(processor.get());
processors::Filter *filter = static_cast<processors::Filter *>(processor.get());
// Process the frame using the filter
ret = filter->filter(frame, &proc_frame);
@ -359,17 +363,18 @@ int VideoProcessor::process_filtering(
av_strerror(ret, errbuf, sizeof(errbuf));
spdlog::critical("Error filtering frame: {}", errbuf);
} else if (ret == 0 && proc_frame != nullptr) {
auto processed_frame =
std::unique_ptr<AVFrame, decltype(&av_frame_deleter)>(proc_frame, &av_frame_deleter);
auto processed_frame = std::unique_ptr<AVFrame, decltype(&avutils::av_frame_deleter)>(
proc_frame, &avutils::av_frame_deleter
);
ret = write_frame(processed_frame.get(), encoder);
}
return ret;
}
int VideoProcessor::process_interpolation(
std::unique_ptr<Processor> &processor,
Encoder &encoder,
std::unique_ptr<AVFrame, decltype(&av_frame_deleter)> &prev_frame,
std::unique_ptr<processors::Processor> &processor,
encoder::Encoder &encoder,
std::unique_ptr<AVFrame, decltype(&avutils::av_frame_deleter)> &prev_frame,
AVFrame *frame,
AVFrame *proc_frame
) {
@ -377,7 +382,8 @@ int VideoProcessor::process_interpolation(
int ret = 0;
// Cast the processor to an Interpolator
Interpolator *interpolator = static_cast<Interpolator *>(processor.get());
processors::Interpolator *interpolator =
static_cast<processors::Interpolator *>(processor.get());
// Calculate the time step for each frame
float time_step = 1.0f / static_cast<float>(proc_cfg_.frm_rate_mul);
@ -386,7 +392,7 @@ int VideoProcessor::process_interpolation(
// Check if a scene change is detected
bool skip_frame = false;
if (proc_cfg_.scn_det_thresh < 100.0 && prev_frame.get() != nullptr) {
float frame_diff = get_frame_diff(prev_frame.get(), frame);
float frame_diff = avutils::get_frame_diff(prev_frame.get(), frame);
if (frame_diff > proc_cfg_.scn_det_thresh) {
spdlog::debug(
"Scene change detected ({:.2f}%), skipping frame {}", frame_diff, frame_idx_.load()
@ -417,8 +423,8 @@ int VideoProcessor::process_interpolation(
spdlog::critical("Error interpolating frame: {}", errbuf);
return ret;
} else if (ret == 0 && proc_frame != nullptr) {
auto processed_frame = std::unique_ptr<AVFrame, decltype(&av_frame_deleter)>(
proc_frame, &av_frame_deleter
auto processed_frame = std::unique_ptr<AVFrame, decltype(&avutils::av_frame_deleter)>(
proc_frame, &avutils::av_frame_deleter
);
ret = write_frame(processed_frame.get(), encoder);
@ -438,3 +444,5 @@ int VideoProcessor::process_interpolation(
prev_frame.reset(av_frame_clone(frame));
return ret;
}
} // namespace video2x

View File

@ -1,4 +1,4 @@
#include "logging.h"
#include "logutils.h"
extern "C" {
#include <libavutil/avutil.h>
@ -6,6 +6,9 @@ extern "C" {
#include <spdlog/spdlog.h>
namespace video2x {
namespace logutils {
void set_log_level(Video2xLogLevel log_level) {
switch (log_level) {
case Video2xLogLevel::Trace:
@ -42,3 +45,6 @@ void set_log_level(Video2xLogLevel log_level) {
break;
}
}
} // namespace logutils
} // namespace video2x

View File

@ -7,6 +7,9 @@
#include "filter_realesrgan.h"
#include "interpolator_rife.h"
namespace video2x {
namespace processors {
// Access the singleton instance
ProcessorFactory &ProcessorFactory::instance() {
static ProcessorFactory factory;
@ -111,3 +114,6 @@ void ProcessorFactory::init_default_processors(ProcessorFactory &factory) {
}
);
}
} // namespace processors
} // namespace video2x

View File

@ -5,7 +5,7 @@
// Structure to hold parsed arguments
struct Arguments {
Video2xLogLevel log_level = Video2xLogLevel::Info;
video2x::logutils::Video2xLogLevel log_level = video2x::logutils::Video2xLogLevel::Info;
bool no_progress = false;
// General options
@ -24,6 +24,6 @@ struct Arguments {
char *argv[],
#endif
Arguments &arguments,
ProcessorConfig &proc_cfg,
EncoderConfig &enc_cfg
video2x::processors::ProcessorConfig &proc_cfg,
video2x::encoder::EncoderConfig &enc_cfg
);

View File

@ -8,8 +8,10 @@
extern std::atomic<bool> newline_required;
void set_spdlog_level(Video2xLogLevel log_level);
void set_spdlog_level(video2x::logutils::Video2xLogLevel log_level);
std::optional<Video2xLogLevel> find_log_level_by_name(const StringType &log_level_name);
std::optional<video2x::logutils::Video2xLogLevel> find_log_level_by_name(
const video2x::fsutils::StringType &log_level_name
);
void newline_safe_ffmpeg_log_callback(void *ptr, int level, const char *fmt, va_list vl);

View File

@ -61,8 +61,8 @@ void validate_greater_equal_one(const T &value, const std::string &option_name)
}
}
void validate_anime4k_shader_name(const StringType &shader_name);
void validate_anime4k_shader_name(const video2x::fsutils::StringType &shader_name);
void validate_realesrgan_model_name(const StringType &model_name);
void validate_realesrgan_model_name(const video2x::fsutils::StringType &model_name);
void validate_rife_model_name(const StringType &model_name);
void validate_rife_model_name(const video2x::fsutils::StringType &model_name);

View File

@ -59,8 +59,8 @@ int parse_args(
char *argv[],
#endif
Arguments &arguments,
ProcessorConfig &proc_cfg,
EncoderConfig &enc_cfg
video2x::processors::ProcessorConfig &proc_cfg,
video2x::encoder::EncoderConfig &enc_cfg
) {
try {
// clang-format off
@ -68,19 +68,20 @@ int parse_args(
all_opts.add_options()
("help", "Display this help page")
("version,V", "Print program version and exit")
("log-level", PO_STR_VALUE<StringType>()->default_value(STR("info"), "info"),
("log-level", PO_STR_VALUE<video2x::fsutils::StringType>()
->default_value(STR("info"), "info"),
"Set verbosity level (trace, debug, info, warn, error, critical, none)")
("no-progress", po::bool_switch(&arguments.no_progress),
"Do not display the progress bar")
("list-devices,l", "List the available Vulkan devices (GPUs)")
// General Processing Options
("input,i", PO_STR_VALUE<StringType>(), "Input video file path")
("output,o", PO_STR_VALUE<StringType>(), "Output video file path")
("processor,p", PO_STR_VALUE<StringType>(),
("input,i", PO_STR_VALUE<video2x::fsutils::StringType>(), "Input video file path")
("output,o", PO_STR_VALUE<video2x::fsutils::StringType>(), "Output video file path")
("processor,p", PO_STR_VALUE<video2x::fsutils::StringType>(),
"Processor to use (libplacebo, realesrgan, rife)")
("hwaccel,a", PO_STR_VALUE<StringType>()->default_value(STR("none"), "none"),
"Hardware acceleration method (decoding)")
("hwaccel,a", PO_STR_VALUE<video2x::fsutils::StringType>()
->default_value(STR("none"), "none"), "Hardware acceleration method (decoding)")
("device,d", po::value<uint32_t>(&arguments.vk_device_index)->default_value(0),
"Vulkan device index (GPU ID)")
("benchmark,b", po::bool_switch(&arguments.benchmark),
@ -90,10 +91,10 @@ int parse_args(
po::options_description encoder_opts("Encoder options");
encoder_opts.add_options()
("codec,c", PO_STR_VALUE<StringType>()->default_value(STR("libx264"), "libx264"),
"Output codec")
("codec,c", PO_STR_VALUE<video2x::fsutils::StringType>()
->default_value(STR("libx264"), "libx264"), "Output codec")
("no-copy-streams", "Do not copy audio and subtitle streams")
("pix-fmt", PO_STR_VALUE<StringType>(), "Output pixel format")
("pix-fmt", PO_STR_VALUE<video2x::fsutils::StringType>(), "Output pixel format")
("bit-rate", po::value<int64_t>(&enc_cfg.bit_rate)->default_value(0),
"Bitrate in bits per second")
("rc-buffer-size", po::value<int>(&enc_cfg.rc_buffer_size)->default_value(0),
@ -118,8 +119,8 @@ int parse_args(
"Delay in milliseconds for encoder")
// Extra encoder options (key-value pairs)
("extra-encoder-option,e", PO_STR_VALUE<std::vector<StringType>>()->multitoken(),
"Additional AVOption(s) for the encoder (format: -e key=value)")
("extra-encoder-option,e", PO_STR_VALUE<std::vector<video2x::fsutils::StringType>>()
->multitoken(), "Additional AVOption(s) for the encoder (format: -e key=value)")
;
po::options_description upscale_opts("Upscaling options");
@ -144,7 +145,7 @@ int parse_args(
po::options_description libplacebo_opts("libplacebo options");
libplacebo_opts.add_options()
("libplacebo-shader", PO_STR_VALUE<StringType>()
("libplacebo-shader", PO_STR_VALUE<video2x::fsutils::StringType>()
->default_value(STR("anime4k-v4-a"), "anime4k-v4-a")
->notifier(validate_anime4k_shader_name),
"Name/path of the GLSL shader file to use (built-in: anime4k-v4-a, anime4k-v4-a+a, "
@ -153,7 +154,7 @@ int parse_args(
po::options_description realesrgan_opts("RealESRGAN options");
realesrgan_opts.add_options()
("realesrgan-model", PO_STR_VALUE<StringType>()
("realesrgan-model", PO_STR_VALUE<video2x::fsutils::StringType>()
->default_value(STR("realesr-animevideov3"), "realesr-animevideov3")
->notifier(validate_realesrgan_model_name),
"Name of the RealESRGAN model to use (realesr-animevideov3, realesrgan-plus-anime, "
@ -162,8 +163,8 @@ int parse_args(
po::options_description rife_opts("RIFE options");
rife_opts.add_options()
("rife-model", PO_STR_VALUE<StringType>()->default_value(STR("rife-v4.6"), "rife-v4.6")
->notifier(validate_rife_model_name),
("rife-model", PO_STR_VALUE<video2x::fsutils::StringType>()
->default_value(STR("rife-v4.6"), "rife-v4.6")->notifier(validate_rife_model_name),
"Name of the RIFE model to use (rife, rife-HD, rife-UHD, rife-anime, rife-v2, "
"rife-v2.3, rife-v2.4, rife-v3.0, rife-v3.1, rife-v4, rife-v4.6)")
("rife-uhd", "Enable Ultra HD mode")
@ -220,8 +221,8 @@ int parse_args(
}
if (vm.count("log-level")) {
std::optional<Video2xLogLevel> log_level =
find_log_level_by_name(vm["log-level"].as<StringType>());
std::optional<video2x::logutils::Video2xLogLevel> log_level =
find_log_level_by_name(vm["log-level"].as<video2x::fsutils::StringType>());
if (!log_level.has_value()) {
spdlog::critical("Invalid log level specified.");
return -1;
@ -237,7 +238,8 @@ int parse_args(
// Assign positional arguments
if (vm.count("input")) {
arguments.in_fname = std::filesystem::path(vm["input"].as<StringType>());
arguments.in_fname =
std::filesystem::path(vm["input"].as<video2x::fsutils::StringType>());
spdlog::info("Processing file: {}", arguments.in_fname.u8string());
} else {
spdlog::critical("Input file path is required.");
@ -245,7 +247,8 @@ int parse_args(
}
if (vm.count("output")) {
arguments.out_fname = std::filesystem::path(vm["output"].as<StringType>());
arguments.out_fname =
std::filesystem::path(vm["output"].as<video2x::fsutils::StringType>());
} else if (!arguments.benchmark) {
spdlog::critical("Output file path is required.");
return -1;
@ -253,13 +256,14 @@ int parse_args(
// Parse processor type
if (vm.count("processor")) {
StringType processor_type_str = vm["processor"].as<StringType>();
video2x::fsutils::StringType processor_type_str =
vm["processor"].as<video2x::fsutils::StringType>();
if (processor_type_str == STR("libplacebo")) {
proc_cfg.processor_type = ProcessorType::Libplacebo;
proc_cfg.processor_type = video2x::processors::ProcessorType::Libplacebo;
} else if (processor_type_str == STR("realesrgan")) {
proc_cfg.processor_type = ProcessorType::RealESRGAN;
proc_cfg.processor_type = video2x::processors::ProcessorType::RealESRGAN;
} else if (processor_type_str == STR("rife")) {
proc_cfg.processor_type = ProcessorType::RIFE;
proc_cfg.processor_type = video2x::processors::ProcessorType::RIFE;
} else {
spdlog::critical(
"Invalid processor specified. Must be 'libplacebo', 'realesrgan', or 'rife'."
@ -274,7 +278,8 @@ int parse_args(
// Parse hardware acceleration method
arguments.hw_device_type = AV_HWDEVICE_TYPE_NONE;
if (vm.count("hwaccel")) {
StringType hwaccel_str = vm["hwaccel"].as<StringType>();
video2x::fsutils::StringType hwaccel_str =
vm["hwaccel"].as<video2x::fsutils::StringType>();
if (hwaccel_str != STR("none")) {
arguments.hw_device_type =
av_hwdevice_find_type_by_name(wstring_to_u8string(hwaccel_str).c_str());
@ -290,7 +295,7 @@ int parse_args(
// Parse codec to AVCodec
enc_cfg.codec = AV_CODEC_ID_H264;
if (vm.count("codec")) {
StringType codec_str = vm["codec"].as<StringType>();
video2x::fsutils::StringType codec_str = vm["codec"].as<video2x::fsutils::StringType>();
const AVCodec *codec =
avcodec_find_encoder_by_name(wstring_to_u8string(codec_str).c_str());
if (codec == nullptr) {
@ -306,7 +311,8 @@ int parse_args(
// Parse pixel format to AVPixelFormat
enc_cfg.pix_fmt = AV_PIX_FMT_NONE;
if (vm.count("pix-fmt")) {
StringType pix_fmt_str = vm["pix-fmt"].as<StringType>();
video2x::fsutils::StringType pix_fmt_str =
vm["pix-fmt"].as<video2x::fsutils::StringType>();
if (!pix_fmt_str.empty()) {
enc_cfg.pix_fmt = av_get_pix_fmt(wstring_to_u8string(pix_fmt_str).c_str());
if (enc_cfg.pix_fmt == AV_PIX_FMT_NONE) {
@ -320,11 +326,12 @@ int parse_args(
// Parse extra AVOptions
if (vm.count("extra-encoder-option")) {
for (const auto &opt : vm["extra-encoder-option"].as<std::vector<StringType>>()) {
for (const auto &opt :
vm["extra-encoder-option"].as<std::vector<video2x::fsutils::StringType>>()) {
size_t eq_pos = opt.find('=');
if (eq_pos != StringType::npos) {
StringType key = opt.substr(0, eq_pos);
StringType value = opt.substr(eq_pos + 1);
if (eq_pos != video2x::fsutils::StringType::npos) {
video2x::fsutils::StringType key = opt.substr(0, eq_pos);
video2x::fsutils::StringType value = opt.substr(eq_pos + 1);
enc_cfg.extra_opts.push_back(std::make_pair(key, value));
} else {
spdlog::critical("Invalid extra AVOption format: {}", wstring_to_u8string(opt));
@ -335,7 +342,7 @@ int parse_args(
// Parse processor-specific configurations
switch (proc_cfg.processor_type) {
case ProcessorType::Libplacebo: {
case video2x::processors::ProcessorType::Libplacebo: {
if (!vm.count("libplacebo-shader")) {
spdlog::critical("Shader name/path must be set for libplacebo.");
return -1;
@ -345,13 +352,14 @@ int parse_args(
return -1;
}
proc_cfg.processor_type = ProcessorType::Libplacebo;
LibplaceboConfig libplacebo_config;
libplacebo_config.shader_path = vm["libplacebo-shader"].as<StringType>();
proc_cfg.processor_type = video2x::processors::ProcessorType::Libplacebo;
video2x::processors::LibplaceboConfig libplacebo_config;
libplacebo_config.shader_path =
vm["libplacebo-shader"].as<video2x::fsutils::StringType>();
proc_cfg.config = libplacebo_config;
break;
}
case ProcessorType::RealESRGAN: {
case video2x::processors::ProcessorType::RealESRGAN: {
if (!vm.count("realesrgan-model")) {
spdlog::critical("RealESRGAN model name must be set for RealESRGAN.");
return -1;
@ -362,14 +370,15 @@ int parse_args(
return -1;
}
proc_cfg.processor_type = ProcessorType::RealESRGAN;
RealESRGANConfig realesrgan_config;
proc_cfg.processor_type = video2x::processors::ProcessorType::RealESRGAN;
video2x::processors::RealESRGANConfig realesrgan_config;
realesrgan_config.tta_mode = false;
realesrgan_config.model_name = vm["realesrgan-model"].as<StringType>();
realesrgan_config.model_name =
vm["realesrgan-model"].as<video2x::fsutils::StringType>();
proc_cfg.config = realesrgan_config;
break;
}
case ProcessorType::RIFE: {
case video2x::processors::ProcessorType::RIFE: {
if (!vm.count("rife-model")) {
spdlog::critical("RIFE model name must be set for RIFE.");
return -1;
@ -379,13 +388,13 @@ int parse_args(
return -1;
}
proc_cfg.processor_type = ProcessorType::RIFE;
RIFEConfig rife_config;
proc_cfg.processor_type = video2x::processors::ProcessorType::RIFE;
video2x::processors::RIFEConfig rife_config;
rife_config.tta_mode = false;
rife_config.tta_temporal_mode = false;
rife_config.uhd_mode = vm.count("rife-uhd") > 0;
rife_config.num_threads = 0;
rife_config.model_name = vm["rife-model"].as<StringType>();
rife_config.model_name = vm["rife-model"].as<video2x::fsutils::StringType>();
proc_cfg.config = rife_config;
break;
}

View File

@ -9,27 +9,27 @@ extern "C" {
std::atomic<bool> newline_required = false;
void set_spdlog_level(Video2xLogLevel log_level) {
void set_spdlog_level(video2x::logutils::Video2xLogLevel log_level) {
switch (log_level) {
case Video2xLogLevel::Trace:
case video2x::logutils::Video2xLogLevel::Trace:
spdlog::set_level(spdlog::level::trace);
break;
case Video2xLogLevel::Debug:
case video2x::logutils::Video2xLogLevel::Debug:
spdlog::set_level(spdlog::level::debug);
break;
case Video2xLogLevel::Info:
case video2x::logutils::Video2xLogLevel::Info:
spdlog::set_level(spdlog::level::info);
break;
case Video2xLogLevel::Warning:
case video2x::logutils::Video2xLogLevel::Warning:
spdlog::set_level(spdlog::level::warn);
break;
case Video2xLogLevel::Error:
case video2x::logutils::Video2xLogLevel::Error:
spdlog::set_level(spdlog::level::err);
break;
case Video2xLogLevel::Critical:
case video2x::logutils::Video2xLogLevel::Critical:
spdlog::set_level(spdlog::level::critical);
break;
case Video2xLogLevel::Off:
case video2x::logutils::Video2xLogLevel::Off:
spdlog::set_level(spdlog::level::off);
break;
default:
@ -38,22 +38,26 @@ void set_spdlog_level(Video2xLogLevel log_level) {
}
}
std::optional<Video2xLogLevel> find_log_level_by_name(const StringType &log_level_name) {
std::optional<video2x::logutils::Video2xLogLevel> find_log_level_by_name(
const video2x::fsutils::StringType &log_level_name
) {
// Static map to store the mapping
static const std::unordered_map<StringType, Video2xLogLevel> log_level_map = {
{STR("trace"), Video2xLogLevel::Trace},
{STR("debug"), Video2xLogLevel::Debug},
{STR("info"), Video2xLogLevel::Info},
{STR("warning"), Video2xLogLevel::Warning},
{STR("warn"), Video2xLogLevel::Warning},
{STR("error"), Video2xLogLevel::Error},
{STR("critical"), Video2xLogLevel::Critical},
{STR("off"), Video2xLogLevel::Off},
{STR("none"), Video2xLogLevel::Off}
static const std::
unordered_map<video2x::fsutils::StringType, video2x::logutils::Video2xLogLevel>
log_level_map = {
{STR("trace"), video2x::logutils::Video2xLogLevel::Trace},
{STR("debug"), video2x::logutils::Video2xLogLevel::Debug},
{STR("info"), video2x::logutils::Video2xLogLevel::Info},
{STR("warning"), video2x::logutils::Video2xLogLevel::Warning},
{STR("warn"), video2x::logutils::Video2xLogLevel::Warning},
{STR("error"), video2x::logutils::Video2xLogLevel::Error},
{STR("critical"), video2x::logutils::Video2xLogLevel::Critical},
{STR("off"), video2x::logutils::Video2xLogLevel::Off},
{STR("none"), video2x::logutils::Video2xLogLevel::Off}
};
// Normalize the input to lowercase
StringType normalized_name = log_level_name;
video2x::fsutils::StringType normalized_name = log_level_name;
std::transform(
normalized_name.begin(), normalized_name.end(), normalized_name.begin(), ::tolower
);

View File

@ -2,8 +2,8 @@
#include <unordered_set>
void validate_anime4k_shader_name(const StringType &shader_name) {
static const std::unordered_set<StringType> valid_anime4k_shaders = {
void validate_anime4k_shader_name(const video2x::fsutils::StringType &shader_name) {
static const std::unordered_set<video2x::fsutils::StringType> valid_anime4k_shaders = {
STR("anime4k-v4-a"),
STR("anime4k-v4-a+a"),
STR("anime4k-v4-b"),
@ -22,8 +22,8 @@ void validate_anime4k_shader_name(const StringType &shader_name) {
}
}
void validate_realesrgan_model_name(const StringType &model_name) {
static const std::unordered_set<StringType> valid_realesrgan_models = {
void validate_realesrgan_model_name(const video2x::fsutils::StringType &model_name) {
static const std::unordered_set<video2x::fsutils::StringType> valid_realesrgan_models = {
STR("realesrgan-plus"), STR("realesrgan-plus-anime"), STR("realesr-animevideov3")
};
if (valid_realesrgan_models.count(model_name) == 0) {
@ -36,8 +36,8 @@ void validate_realesrgan_model_name(const StringType &model_name) {
}
}
void validate_rife_model_name(const StringType &model_name) {
static const std::unordered_set<StringType> valid_realesrgan_models = {
void validate_rife_model_name(const video2x::fsutils::StringType &model_name) {
static const std::unordered_set<video2x::fsutils::StringType> valid_realesrgan_models = {
STR("rife"),
STR("rife-HD"),
STR("rife-UHD"),

View File

@ -55,8 +55,8 @@ int main(int argc, char **argv) {
#endif
// Initialize arguments structures
Arguments arguments;
ProcessorConfig proc_cfg;
EncoderConfig enc_cfg;
video2x::processors::ProcessorConfig proc_cfg;
video2x::encoder::EncoderConfig enc_cfg;
// Parse command line arguments
int parse_ret = parse_args(argc, argv, arguments, proc_cfg, enc_cfg);
@ -72,7 +72,7 @@ int main(int argc, char **argv) {
}
// Create video processor object
VideoProcessor video_processor = VideoProcessor(
video2x::VideoProcessor video_processor = video2x::VideoProcessor(
proc_cfg,
enc_cfg,
arguments.vk_device_index,
@ -123,14 +123,14 @@ int main(int argc, char **argv) {
if (ch == ' ' || ch == '\n') {
{
// Toggle pause state
if (video_processor.get_state() == VideoProcessorState::Paused) {
if (video_processor.get_state() == video2x::VideoProcessorState::Paused) {
video_processor.resume();
} else {
video_processor.pause();
}
// Print message based on current state and pause/resume the timer
if (video_processor.get_state() == VideoProcessorState::Paused) {
if (video_processor.get_state() == video2x::VideoProcessorState::Paused) {
std::cout
<< "\r\033[KProcessing paused; press [space] to resume, [q] to abort.";
std::cout.flush();
@ -159,7 +159,7 @@ int main(int argc, char **argv) {
int64_t total_frames = video_processor.get_total_frames();
// Print the progress bar if processing is not paused
if (video_processor.get_state() != VideoProcessorState::Paused &&
if (video_processor.get_state() != video2x::VideoProcessorState::Paused &&
(total_frames > 0 || processed_frames > 0)) {
double percentage = total_frames > 0 ? static_cast<double>(processed_frames) *
100.0 / static_cast<double>(total_frames)
@ -214,10 +214,11 @@ int main(int argc, char **argv) {
}
// Print final message based on processing result
if (video_processor.get_state() == VideoProcessorState::Aborted) {
if (video_processor.get_state() == video2x::VideoProcessorState::Aborted) {
spdlog::warn("Video processing aborted");
return 2;
} else if (proc_ret != 0 || video_processor.get_state() == VideoProcessorState::Failed) {
} else if (proc_ret != 0 ||
video_processor.get_state() == video2x::VideoProcessorState::Failed) {
spdlog::critical("Video processing failed with error code {}", proc_ret);
return 1;
} else {