fix(libvideo2x): fixed wide character string paths on Windows

2024-12-28 06:59:11 +00:00 · 2024-11-01 22:19:01 -04:00 · 2024-11-01 22:19:01 -04:00 · 94e69f9f62
commit 94e69f9f62
parent a8b952c3ad
8 changed files with 84 additions and 36 deletions
--- a/include/libvideo2x/decoder.h
+++ b/include/libvideo2x/decoder.h
@ -1,6 +1,8 @@
 #ifndef DECODER_H
 #define DECODER_H

+#include <filesystem>
+
 extern "C" {
 #include <libavcodec/avcodec.h>
 #include <libavformat/avformat.h>
@ -9,7 +11,7 @@ extern "C" {
 int init_decoder(
    AVHWDeviceType hw_type,
    AVBufferRef *hw_ctx,
-    const char *in_fname,
+    std::filesystem::path in_fpath,
    AVFormatContext **fmt_ctx,
    AVCodecContext **dec_ctx,
    int *vstream_idx
--- a/include/libvideo2x/encoder.h
+++ b/include/libvideo2x/encoder.h
@ -1,6 +1,8 @@
 #ifndef ENCODER_H
 #define ENCODER_H

+#include <filesystem>
+
 extern "C" {
 #include <libavcodec/avcodec.h>
 #include <libavformat/avformat.h>
@ -11,7 +13,7 @@ extern "C" {

 int init_encoder(
    AVBufferRef *hw_ctx,
-    const char *out_fname,
+    std::filesystem::path out_fpath,
    AVFormatContext *ifmt_ctx,
    AVFormatContext **ofmt_ctx,
    AVCodecContext **enc_ctx,
--- a/include/libvideo2x/libvideo2x.h
+++ b/include/libvideo2x/libvideo2x.h
@ -43,7 +43,11 @@ enum Libvideo2xLogLevel {
 struct LibplaceboConfig {
    int out_width;
    int out_height;
+#ifdef _WIN32
+    const wchar_t *shader_path;
+#else
    const char *shader_path;
+#endif
 };

 // Configuration for RealESRGAN filter
@ -51,7 +55,11 @@ struct RealESRGANConfig {
    int gpuid;
    bool tta_mode;
    int scaling_factor;
-    const char *model;
+#ifdef _WIN32
+    const wchar_t *model_path;
+#else
+    const char *model_path;
+#endif
 };

 // Unified filter configuration
@ -87,8 +95,13 @@ struct VideoProcessingContext {

 // C-compatible process_video function
 LIBVIDEO2X_API int process_video(
+#ifdef _WIN32
+    const wchar_t *in_fname,
+    const wchar_t *out_fname,
+#else
    const char *in_fname,
    const char *out_fname,
+#endif
    enum Libvideo2xLogLevel log_level,
    bool benchmark,
    enum AVHWDeviceType hw_device_type,
--- a/include/libvideo2x/realesrgan_filter.h
+++ b/include/libvideo2x/realesrgan_filter.h
@ -17,7 +17,7 @@ class RealesrganFilter : public Filter {
    int gpuid;
    bool tta_mode;
    int scaling_factor;
-    const char *model;
+    const std::filesystem::path model_path;
    const std::filesystem::path custom_model_param_path;
    const std::filesystem::path custom_model_bin_path;
    AVRational in_time_base;
@ -30,7 +30,7 @@ class RealesrganFilter : public Filter {
        int gpuid = 0,
        bool tta_mode = false,
        int scaling_factor = 4,
-        const char *model = "realesr-animevideov3",
+        const std::filesystem::path model = std::filesystem::path("realesr-animevideov3"),
        const std::filesystem::path custom_model_param_path = std::filesystem::path(),
        const std::filesystem::path custom_model_bin_path = std::filesystem::path()
    );
--- a/src/decoder.cpp
+++ b/src/decoder.cpp
@ -22,7 +22,7 @@ static enum AVPixelFormat get_hw_format(AVCodecContext *_, const enum AVPixelFor
 int init_decoder(
    AVHWDeviceType hw_type,
    AVBufferRef *hw_ctx,
-    const char *in_fname,
+    std::filesystem::path in_fpath,
    AVFormatContext **fmt_ctx,
    AVCodecContext **dec_ctx,
    int *vstream_idx
@ -31,8 +31,8 @@ int init_decoder(
    AVCodecContext *codec_ctx = NULL;
    int ret;

-    if ((ret = avformat_open_input(&ifmt_ctx, in_fname, NULL, NULL)) < 0) {
-        spdlog::error("Could not open input file '{}'", in_fname);
+    if ((ret = avformat_open_input(&ifmt_ctx, in_fpath.u8string().c_str(), NULL, NULL)) < 0) {
+        spdlog::error("Could not open input file '{}'", in_fpath.u8string().c_str());
        return ret;
    }

--- a/src/encoder.cpp
+++ b/src/encoder.cpp
@ -19,7 +19,7 @@ static enum AVPixelFormat get_encoder_default_pix_fmt(const AVCodec *encoder) {

 int init_encoder(
    AVBufferRef *hw_ctx,
-    const char *out_fname,
+    std::filesystem::path out_fpath,
    AVFormatContext *ifmt_ctx,
    AVFormatContext **ofmt_ctx,
    AVCodecContext **enc_ctx,
@ -33,7 +33,7 @@ int init_encoder(
    int stream_index = 0;
    int ret;

-    avformat_alloc_output_context2(&fmt_ctx, NULL, NULL, out_fname);
+    avformat_alloc_output_context2(&fmt_ctx, NULL, NULL, out_fpath.u8string().c_str());
    if (!fmt_ctx) {
        spdlog::error("Could not create output context");
        return AVERROR_UNKNOWN;
@ -174,9 +174,9 @@ int init_encoder(

    // Open the output file
    if (!(fmt_ctx->oformat->flags & AVFMT_NOFILE)) {
-        ret = avio_open(&fmt_ctx->pb, out_fname, AVIO_FLAG_WRITE);
+        ret = avio_open(&fmt_ctx->pb, out_fpath.u8string().c_str(), AVIO_FLAG_WRITE);
        if (ret < 0) {
-            spdlog::error("Could not open output file '{}'", out_fname);
+            spdlog::error("Could not open output file '{}'", out_fpath.u8string().c_str());
            return ret;
        }
    }
--- a/src/libvideo2x.cpp
+++ b/src/libvideo2x.cpp
@ -5,8 +5,11 @@
 #include <string.h>
 #include <thread>

+extern "C" {
+#include <libavutil/avutil.h>
+}
+
 #include <spdlog/spdlog.h>
-#include <opencv2/videoio.hpp>

 #include "decoder.h"
 #include "encoder.h"
@ -46,28 +49,46 @@ static int process_frames(
    std::vector<AVFrame *> flushed_frames;

    // Get the total number of frames in the video with OpenCV
-    spdlog::debug("Reading total number of frames with OpenCV");
-    cv::VideoCapture cap(ifmt_ctx->url);
-    if (!cap.isOpened()) {
-        spdlog::error("Failed to open video file with OpenCV");
-        return -1;
+    spdlog::debug("Reading total number of frames");
+    proc_ctx->total_frames = ifmt_ctx->streams[vstream_idx]->nb_frames;
+    if (proc_ctx->total_frames > 0) {
+        spdlog::debug("Read total number of frames from 'nb_frames': {}", proc_ctx->total_frames);
+    } else {
+        spdlog::warn("Estimating the total number of frames from duration * fps");
+        // Calculate duration in seconds
+        double duration_secs = static_cast<double>(ifmt_ctx->streams[vstream_idx]->duration) *
+                               av_q2d(ifmt_ctx->streams[vstream_idx]->time_base);
+        spdlog::debug("Video duration: {}s", duration_secs);
+
+        // Calculate average FPS
+        double fps = av_q2d(ifmt_ctx->streams[vstream_idx]->avg_frame_rate);
+        if (fps <= 0) {
+            spdlog::debug("Unable to read the average frame rate from 'avg_frame_rate'");
+            fps = av_q2d(ifmt_ctx->streams[vstream_idx]->r_frame_rate);
+        }
+        if (fps <= 0) {
+            spdlog::debug("Unable to read the average frame rate from 'r_frame_rate'");
+            fps = av_q2d(av_guess_frame_rate(ifmt_ctx, ifmt_ctx->streams[vstream_idx], nullptr));
+        }
+        if (fps <= 0) {
+            spdlog::debug("Unable to estimate the average frame rate with 'av_guess_frame_rate'");
+            fps = av_q2d(ifmt_ctx->streams[vstream_idx]->time_base);
+        }
+        if (fps <= 0) {
+            spdlog::debug("Unable to estimate the video's average frame rate");
+        } else {
+            // Calculate total frames
+            proc_ctx->total_frames = static_cast<int64_t>(duration_secs * fps);
+        }
    }
-    proc_ctx->total_frames = static_cast<int64_t>(cap.get(cv::CAP_PROP_FRAME_COUNT));
-    cap.release();

    // Check if the total number of frames is still 0
    if (proc_ctx->total_frames == 0) {
-        spdlog::warn("Unable to determine total number of frames");
+        spdlog::warn("Unable to determine the total number of frames");
    } else {
        spdlog::debug("{} frames to process", proc_ctx->total_frames);
    }

-    // Get start time
-    proc_ctx->start_time = time(NULL);
-    if (proc_ctx->start_time == -1) {
-        perror("time");
-    }
-
    AVFrame *frame = av_frame_alloc();
    if (frame == nullptr) {
        ret = AVERROR(ENOMEM);
@ -236,8 +257,13 @@ static int process_frames(
 * @return int 0 on success, non-zero value on error
 */
 extern "C" int process_video(
+#ifdef _WIN32
+    const wchar_t *in_fname,
+    const wchar_t *out_fname,
+#else
    const char *in_fname,
    const char *out_fname,
+#endif
    Libvideo2xLogLevel log_level,
    bool benchmark,
    AVHWDeviceType hw_type,
@ -328,6 +354,10 @@ extern "C" int process_video(
            break;
    }

+    // Convert the file names to std::filesystem::path
+    std::filesystem::path in_fpath(in_fname);
+    std::filesystem::path out_fpath(out_fname);
+
    // Initialize hardware device context
    if (hw_type != AV_HWDEVICE_TYPE_NONE) {
        ret = av_hwdevice_ctx_create(&hw_ctx, hw_type, NULL, NULL, 0);
@ -340,7 +370,7 @@ extern "C" int process_video(
    }

    // Initialize input
-    ret = init_decoder(hw_type, hw_ctx, in_fname, &ifmt_ctx, &dec_ctx, &vstream_idx);
+    ret = init_decoder(hw_type, hw_ctx, in_fpath, &ifmt_ctx, &dec_ctx, &vstream_idx);
    if (ret < 0) {
        av_strerror(ret, errbuf, sizeof(errbuf));
        spdlog::error("Failed to initialize decoder: {}", errbuf);
@ -371,7 +401,7 @@ extern "C" int process_video(
    encoder_config->out_height = output_height;
    ret = init_encoder(
        hw_ctx,
-        out_fname,
+        out_fpath,
        ifmt_ctx,
        &ofmt_ctx,
        &enc_ctx,
@ -409,13 +439,13 @@ extern "C" int process_video(
        };
    } else if (filter_config->filter_type == FILTER_REALESRGAN) {
        const auto &config = filter_config->config.realesrgan;
-        if (!config.model) {
+        if (!config.model_path) {
            spdlog::error("Model name must be provided for the RealESRGAN filter");
            cleanup();
            return -1;
        }
        filter = new RealesrganFilter{
-            config.gpuid, config.tta_mode, config.scaling_factor, config.model
+            config.gpuid, config.tta_mode, config.scaling_factor, config.model_path
        };
    } else {
        spdlog::error("Unknown filter type");
--- a/src/realesrgan_filter.cpp
+++ b/src/realesrgan_filter.cpp
@ -13,7 +13,7 @@ RealesrganFilter::RealesrganFilter(
    int gpuid,
    bool tta_mode,
    int scaling_factor,
-    const char *model,
+    const std::filesystem::path model_path,
    const std::filesystem::path custom_model_param_path,
    const std::filesystem::path custom_model_bin_path
 )
@ -21,7 +21,7 @@ RealesrganFilter::RealesrganFilter(
      gpuid(gpuid),
      tta_mode(tta_mode),
      scaling_factor(scaling_factor),
-      model(model),
+      model_path(std::move(model_path)),
      custom_model_param_path(std::move(custom_model_param_path)),
      custom_model_bin_path(std::move(custom_model_bin_path)) {}

@ -37,12 +37,13 @@ int RealesrganFilter::init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx, AVB
    std::filesystem::path model_param_path;
    std::filesystem::path model_bin_path;

-    if (model) {
+    if (!model_path.empty()) {
        // Find the model paths by model name if provided
+        // TODO: ensure this works with wide strings on Windows
        model_param_path = std::filesystem::path("models") / "realesrgan" /
-                           (std::string(model) + "-x" + std::to_string(scaling_factor) + ".param");
+                           (model_path.string() + "-x" + std::to_string(scaling_factor) + ".param");
        model_bin_path = std::filesystem::path("models") / "realesrgan" /
-                         (std::string(model) + "-x" + std::to_string(scaling_factor) + ".bin");
+                         (model_path.string() + "-x" + std::to_string(scaling_factor) + ".bin");
    } else if (!custom_model_param_path.empty() && !custom_model_bin_path.empty()) {
        // Use the custom model paths if provided
        model_param_path = custom_model_param_path;