From 94e69f9f62b2bd3364b9c9f52ad901697a2c5dd8 Mon Sep 17 00:00:00 2001 From: k4yt3x Date: Fri, 1 Nov 2024 22:19:01 -0400 Subject: [PATCH] fix(libvideo2x): fixed wide character string paths on Windows --- include/libvideo2x/decoder.h | 4 +- include/libvideo2x/encoder.h | 4 +- include/libvideo2x/libvideo2x.h | 15 +++++- include/libvideo2x/realesrgan_filter.h | 4 +- src/decoder.cpp | 6 +-- src/encoder.cpp | 8 +-- src/libvideo2x.cpp | 68 +++++++++++++++++++------- src/realesrgan_filter.cpp | 11 +++-- 8 files changed, 84 insertions(+), 36 deletions(-) diff --git a/include/libvideo2x/decoder.h b/include/libvideo2x/decoder.h index bd0312b..dc90c3e 100644 --- a/include/libvideo2x/decoder.h +++ b/include/libvideo2x/decoder.h @@ -1,6 +1,8 @@ #ifndef DECODER_H #define DECODER_H +#include + extern "C" { #include #include @@ -9,7 +11,7 @@ extern "C" { int init_decoder( AVHWDeviceType hw_type, AVBufferRef *hw_ctx, - const char *in_fname, + std::filesystem::path in_fpath, AVFormatContext **fmt_ctx, AVCodecContext **dec_ctx, int *vstream_idx diff --git a/include/libvideo2x/encoder.h b/include/libvideo2x/encoder.h index 090ceb8..65d39a4 100644 --- a/include/libvideo2x/encoder.h +++ b/include/libvideo2x/encoder.h @@ -1,6 +1,8 @@ #ifndef ENCODER_H #define ENCODER_H +#include + extern "C" { #include #include @@ -11,7 +13,7 @@ extern "C" { int init_encoder( AVBufferRef *hw_ctx, - const char *out_fname, + std::filesystem::path out_fpath, AVFormatContext *ifmt_ctx, AVFormatContext **ofmt_ctx, AVCodecContext **enc_ctx, diff --git a/include/libvideo2x/libvideo2x.h b/include/libvideo2x/libvideo2x.h index 38addab..5b315e6 100644 --- a/include/libvideo2x/libvideo2x.h +++ b/include/libvideo2x/libvideo2x.h @@ -43,7 +43,11 @@ enum Libvideo2xLogLevel { struct LibplaceboConfig { int out_width; int out_height; +#ifdef _WIN32 + const wchar_t *shader_path; +#else const char *shader_path; +#endif }; // Configuration for RealESRGAN filter @@ -51,7 +55,11 @@ struct RealESRGANConfig { int gpuid; bool tta_mode; int scaling_factor; - const char *model; +#ifdef _WIN32 + const wchar_t *model_path; +#else + const char *model_path; +#endif }; // Unified filter configuration @@ -87,8 +95,13 @@ struct VideoProcessingContext { // C-compatible process_video function LIBVIDEO2X_API int process_video( +#ifdef _WIN32 + const wchar_t *in_fname, + const wchar_t *out_fname, +#else const char *in_fname, const char *out_fname, +#endif enum Libvideo2xLogLevel log_level, bool benchmark, enum AVHWDeviceType hw_device_type, diff --git a/include/libvideo2x/realesrgan_filter.h b/include/libvideo2x/realesrgan_filter.h index e046c1f..88e3695 100644 --- a/include/libvideo2x/realesrgan_filter.h +++ b/include/libvideo2x/realesrgan_filter.h @@ -17,7 +17,7 @@ class RealesrganFilter : public Filter { int gpuid; bool tta_mode; int scaling_factor; - const char *model; + const std::filesystem::path model_path; const std::filesystem::path custom_model_param_path; const std::filesystem::path custom_model_bin_path; AVRational in_time_base; @@ -30,7 +30,7 @@ class RealesrganFilter : public Filter { int gpuid = 0, bool tta_mode = false, int scaling_factor = 4, - const char *model = "realesr-animevideov3", + const std::filesystem::path model = std::filesystem::path("realesr-animevideov3"), const std::filesystem::path custom_model_param_path = std::filesystem::path(), const std::filesystem::path custom_model_bin_path = std::filesystem::path() ); diff --git a/src/decoder.cpp b/src/decoder.cpp index 48ae246..d5a05ee 100644 --- a/src/decoder.cpp +++ b/src/decoder.cpp @@ -22,7 +22,7 @@ static enum AVPixelFormat get_hw_format(AVCodecContext *_, const enum AVPixelFor int init_decoder( AVHWDeviceType hw_type, AVBufferRef *hw_ctx, - const char *in_fname, + std::filesystem::path in_fpath, AVFormatContext **fmt_ctx, AVCodecContext **dec_ctx, int *vstream_idx @@ -31,8 +31,8 @@ int init_decoder( AVCodecContext *codec_ctx = NULL; int ret; - if ((ret = avformat_open_input(&ifmt_ctx, in_fname, NULL, NULL)) < 0) { - spdlog::error("Could not open input file '{}'", in_fname); + if ((ret = avformat_open_input(&ifmt_ctx, in_fpath.u8string().c_str(), NULL, NULL)) < 0) { + spdlog::error("Could not open input file '{}'", in_fpath.u8string().c_str()); return ret; } diff --git a/src/encoder.cpp b/src/encoder.cpp index 53d879c..d5fe5ce 100644 --- a/src/encoder.cpp +++ b/src/encoder.cpp @@ -19,7 +19,7 @@ static enum AVPixelFormat get_encoder_default_pix_fmt(const AVCodec *encoder) { int init_encoder( AVBufferRef *hw_ctx, - const char *out_fname, + std::filesystem::path out_fpath, AVFormatContext *ifmt_ctx, AVFormatContext **ofmt_ctx, AVCodecContext **enc_ctx, @@ -33,7 +33,7 @@ int init_encoder( int stream_index = 0; int ret; - avformat_alloc_output_context2(&fmt_ctx, NULL, NULL, out_fname); + avformat_alloc_output_context2(&fmt_ctx, NULL, NULL, out_fpath.u8string().c_str()); if (!fmt_ctx) { spdlog::error("Could not create output context"); return AVERROR_UNKNOWN; @@ -174,9 +174,9 @@ int init_encoder( // Open the output file if (!(fmt_ctx->oformat->flags & AVFMT_NOFILE)) { - ret = avio_open(&fmt_ctx->pb, out_fname, AVIO_FLAG_WRITE); + ret = avio_open(&fmt_ctx->pb, out_fpath.u8string().c_str(), AVIO_FLAG_WRITE); if (ret < 0) { - spdlog::error("Could not open output file '{}'", out_fname); + spdlog::error("Could not open output file '{}'", out_fpath.u8string().c_str()); return ret; } } diff --git a/src/libvideo2x.cpp b/src/libvideo2x.cpp index ae7eaf3..ee1438f 100644 --- a/src/libvideo2x.cpp +++ b/src/libvideo2x.cpp @@ -5,8 +5,11 @@ #include #include +extern "C" { +#include +} + #include -#include #include "decoder.h" #include "encoder.h" @@ -46,28 +49,46 @@ static int process_frames( std::vector flushed_frames; // Get the total number of frames in the video with OpenCV - spdlog::debug("Reading total number of frames with OpenCV"); - cv::VideoCapture cap(ifmt_ctx->url); - if (!cap.isOpened()) { - spdlog::error("Failed to open video file with OpenCV"); - return -1; + spdlog::debug("Reading total number of frames"); + proc_ctx->total_frames = ifmt_ctx->streams[vstream_idx]->nb_frames; + if (proc_ctx->total_frames > 0) { + spdlog::debug("Read total number of frames from 'nb_frames': {}", proc_ctx->total_frames); + } else { + spdlog::warn("Estimating the total number of frames from duration * fps"); + // Calculate duration in seconds + double duration_secs = static_cast(ifmt_ctx->streams[vstream_idx]->duration) * + av_q2d(ifmt_ctx->streams[vstream_idx]->time_base); + spdlog::debug("Video duration: {}s", duration_secs); + + // Calculate average FPS + double fps = av_q2d(ifmt_ctx->streams[vstream_idx]->avg_frame_rate); + if (fps <= 0) { + spdlog::debug("Unable to read the average frame rate from 'avg_frame_rate'"); + fps = av_q2d(ifmt_ctx->streams[vstream_idx]->r_frame_rate); + } + if (fps <= 0) { + spdlog::debug("Unable to read the average frame rate from 'r_frame_rate'"); + fps = av_q2d(av_guess_frame_rate(ifmt_ctx, ifmt_ctx->streams[vstream_idx], nullptr)); + } + if (fps <= 0) { + spdlog::debug("Unable to estimate the average frame rate with 'av_guess_frame_rate'"); + fps = av_q2d(ifmt_ctx->streams[vstream_idx]->time_base); + } + if (fps <= 0) { + spdlog::debug("Unable to estimate the video's average frame rate"); + } else { + // Calculate total frames + proc_ctx->total_frames = static_cast(duration_secs * fps); + } } - proc_ctx->total_frames = static_cast(cap.get(cv::CAP_PROP_FRAME_COUNT)); - cap.release(); // Check if the total number of frames is still 0 if (proc_ctx->total_frames == 0) { - spdlog::warn("Unable to determine total number of frames"); + spdlog::warn("Unable to determine the total number of frames"); } else { spdlog::debug("{} frames to process", proc_ctx->total_frames); } - // Get start time - proc_ctx->start_time = time(NULL); - if (proc_ctx->start_time == -1) { - perror("time"); - } - AVFrame *frame = av_frame_alloc(); if (frame == nullptr) { ret = AVERROR(ENOMEM); @@ -236,8 +257,13 @@ static int process_frames( * @return int 0 on success, non-zero value on error */ extern "C" int process_video( +#ifdef _WIN32 + const wchar_t *in_fname, + const wchar_t *out_fname, +#else const char *in_fname, const char *out_fname, +#endif Libvideo2xLogLevel log_level, bool benchmark, AVHWDeviceType hw_type, @@ -328,6 +354,10 @@ extern "C" int process_video( break; } + // Convert the file names to std::filesystem::path + std::filesystem::path in_fpath(in_fname); + std::filesystem::path out_fpath(out_fname); + // Initialize hardware device context if (hw_type != AV_HWDEVICE_TYPE_NONE) { ret = av_hwdevice_ctx_create(&hw_ctx, hw_type, NULL, NULL, 0); @@ -340,7 +370,7 @@ extern "C" int process_video( } // Initialize input - ret = init_decoder(hw_type, hw_ctx, in_fname, &ifmt_ctx, &dec_ctx, &vstream_idx); + ret = init_decoder(hw_type, hw_ctx, in_fpath, &ifmt_ctx, &dec_ctx, &vstream_idx); if (ret < 0) { av_strerror(ret, errbuf, sizeof(errbuf)); spdlog::error("Failed to initialize decoder: {}", errbuf); @@ -371,7 +401,7 @@ extern "C" int process_video( encoder_config->out_height = output_height; ret = init_encoder( hw_ctx, - out_fname, + out_fpath, ifmt_ctx, &ofmt_ctx, &enc_ctx, @@ -409,13 +439,13 @@ extern "C" int process_video( }; } else if (filter_config->filter_type == FILTER_REALESRGAN) { const auto &config = filter_config->config.realesrgan; - if (!config.model) { + if (!config.model_path) { spdlog::error("Model name must be provided for the RealESRGAN filter"); cleanup(); return -1; } filter = new RealesrganFilter{ - config.gpuid, config.tta_mode, config.scaling_factor, config.model + config.gpuid, config.tta_mode, config.scaling_factor, config.model_path }; } else { spdlog::error("Unknown filter type"); diff --git a/src/realesrgan_filter.cpp b/src/realesrgan_filter.cpp index 419e571..2699851 100644 --- a/src/realesrgan_filter.cpp +++ b/src/realesrgan_filter.cpp @@ -13,7 +13,7 @@ RealesrganFilter::RealesrganFilter( int gpuid, bool tta_mode, int scaling_factor, - const char *model, + const std::filesystem::path model_path, const std::filesystem::path custom_model_param_path, const std::filesystem::path custom_model_bin_path ) @@ -21,7 +21,7 @@ RealesrganFilter::RealesrganFilter( gpuid(gpuid), tta_mode(tta_mode), scaling_factor(scaling_factor), - model(model), + model_path(std::move(model_path)), custom_model_param_path(std::move(custom_model_param_path)), custom_model_bin_path(std::move(custom_model_bin_path)) {} @@ -37,12 +37,13 @@ int RealesrganFilter::init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx, AVB std::filesystem::path model_param_path; std::filesystem::path model_bin_path; - if (model) { + if (!model_path.empty()) { // Find the model paths by model name if provided + // TODO: ensure this works with wide strings on Windows model_param_path = std::filesystem::path("models") / "realesrgan" / - (std::string(model) + "-x" + std::to_string(scaling_factor) + ".param"); + (model_path.string() + "-x" + std::to_string(scaling_factor) + ".param"); model_bin_path = std::filesystem::path("models") / "realesrgan" / - (std::string(model) + "-x" + std::to_string(scaling_factor) + ".bin"); + (model_path.string() + "-x" + std::to_string(scaling_factor) + ".bin"); } else if (!custom_model_param_path.empty() && !custom_model_bin_path.empty()) { // Use the custom model paths if provided model_param_path = custom_model_param_path;