fix(libvideo2x): fixed wide character string paths on Windows

This commit is contained in:
k4yt3x 2024-11-01 22:19:01 -04:00
parent a8b952c3ad
commit 94e69f9f62
8 changed files with 84 additions and 36 deletions

View File

@ -1,6 +1,8 @@
#ifndef DECODER_H
#define DECODER_H
#include <filesystem>
extern "C" {
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
@ -9,7 +11,7 @@ extern "C" {
int init_decoder(
AVHWDeviceType hw_type,
AVBufferRef *hw_ctx,
const char *in_fname,
std::filesystem::path in_fpath,
AVFormatContext **fmt_ctx,
AVCodecContext **dec_ctx,
int *vstream_idx

View File

@ -1,6 +1,8 @@
#ifndef ENCODER_H
#define ENCODER_H
#include <filesystem>
extern "C" {
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
@ -11,7 +13,7 @@ extern "C" {
int init_encoder(
AVBufferRef *hw_ctx,
const char *out_fname,
std::filesystem::path out_fpath,
AVFormatContext *ifmt_ctx,
AVFormatContext **ofmt_ctx,
AVCodecContext **enc_ctx,

View File

@ -43,7 +43,11 @@ enum Libvideo2xLogLevel {
struct LibplaceboConfig {
int out_width;
int out_height;
#ifdef _WIN32
const wchar_t *shader_path;
#else
const char *shader_path;
#endif
};
// Configuration for RealESRGAN filter
@ -51,7 +55,11 @@ struct RealESRGANConfig {
int gpuid;
bool tta_mode;
int scaling_factor;
const char *model;
#ifdef _WIN32
const wchar_t *model_path;
#else
const char *model_path;
#endif
};
// Unified filter configuration
@ -87,8 +95,13 @@ struct VideoProcessingContext {
// C-compatible process_video function
LIBVIDEO2X_API int process_video(
#ifdef _WIN32
const wchar_t *in_fname,
const wchar_t *out_fname,
#else
const char *in_fname,
const char *out_fname,
#endif
enum Libvideo2xLogLevel log_level,
bool benchmark,
enum AVHWDeviceType hw_device_type,

View File

@ -17,7 +17,7 @@ class RealesrganFilter : public Filter {
int gpuid;
bool tta_mode;
int scaling_factor;
const char *model;
const std::filesystem::path model_path;
const std::filesystem::path custom_model_param_path;
const std::filesystem::path custom_model_bin_path;
AVRational in_time_base;
@ -30,7 +30,7 @@ class RealesrganFilter : public Filter {
int gpuid = 0,
bool tta_mode = false,
int scaling_factor = 4,
const char *model = "realesr-animevideov3",
const std::filesystem::path model = std::filesystem::path("realesr-animevideov3"),
const std::filesystem::path custom_model_param_path = std::filesystem::path(),
const std::filesystem::path custom_model_bin_path = std::filesystem::path()
);

View File

@ -22,7 +22,7 @@ static enum AVPixelFormat get_hw_format(AVCodecContext *_, const enum AVPixelFor
int init_decoder(
AVHWDeviceType hw_type,
AVBufferRef *hw_ctx,
const char *in_fname,
std::filesystem::path in_fpath,
AVFormatContext **fmt_ctx,
AVCodecContext **dec_ctx,
int *vstream_idx
@ -31,8 +31,8 @@ int init_decoder(
AVCodecContext *codec_ctx = NULL;
int ret;
if ((ret = avformat_open_input(&ifmt_ctx, in_fname, NULL, NULL)) < 0) {
spdlog::error("Could not open input file '{}'", in_fname);
if ((ret = avformat_open_input(&ifmt_ctx, in_fpath.u8string().c_str(), NULL, NULL)) < 0) {
spdlog::error("Could not open input file '{}'", in_fpath.u8string().c_str());
return ret;
}

View File

@ -19,7 +19,7 @@ static enum AVPixelFormat get_encoder_default_pix_fmt(const AVCodec *encoder) {
int init_encoder(
AVBufferRef *hw_ctx,
const char *out_fname,
std::filesystem::path out_fpath,
AVFormatContext *ifmt_ctx,
AVFormatContext **ofmt_ctx,
AVCodecContext **enc_ctx,
@ -33,7 +33,7 @@ int init_encoder(
int stream_index = 0;
int ret;
avformat_alloc_output_context2(&fmt_ctx, NULL, NULL, out_fname);
avformat_alloc_output_context2(&fmt_ctx, NULL, NULL, out_fpath.u8string().c_str());
if (!fmt_ctx) {
spdlog::error("Could not create output context");
return AVERROR_UNKNOWN;
@ -174,9 +174,9 @@ int init_encoder(
// Open the output file
if (!(fmt_ctx->oformat->flags & AVFMT_NOFILE)) {
ret = avio_open(&fmt_ctx->pb, out_fname, AVIO_FLAG_WRITE);
ret = avio_open(&fmt_ctx->pb, out_fpath.u8string().c_str(), AVIO_FLAG_WRITE);
if (ret < 0) {
spdlog::error("Could not open output file '{}'", out_fname);
spdlog::error("Could not open output file '{}'", out_fpath.u8string().c_str());
return ret;
}
}

View File

@ -5,8 +5,11 @@
#include <string.h>
#include <thread>
extern "C" {
#include <libavutil/avutil.h>
}
#include <spdlog/spdlog.h>
#include <opencv2/videoio.hpp>
#include "decoder.h"
#include "encoder.h"
@ -46,28 +49,46 @@ static int process_frames(
std::vector<AVFrame *> flushed_frames;
// Get the total number of frames in the video with OpenCV
spdlog::debug("Reading total number of frames with OpenCV");
cv::VideoCapture cap(ifmt_ctx->url);
if (!cap.isOpened()) {
spdlog::error("Failed to open video file with OpenCV");
return -1;
spdlog::debug("Reading total number of frames");
proc_ctx->total_frames = ifmt_ctx->streams[vstream_idx]->nb_frames;
if (proc_ctx->total_frames > 0) {
spdlog::debug("Read total number of frames from 'nb_frames': {}", proc_ctx->total_frames);
} else {
spdlog::warn("Estimating the total number of frames from duration * fps");
// Calculate duration in seconds
double duration_secs = static_cast<double>(ifmt_ctx->streams[vstream_idx]->duration) *
av_q2d(ifmt_ctx->streams[vstream_idx]->time_base);
spdlog::debug("Video duration: {}s", duration_secs);
// Calculate average FPS
double fps = av_q2d(ifmt_ctx->streams[vstream_idx]->avg_frame_rate);
if (fps <= 0) {
spdlog::debug("Unable to read the average frame rate from 'avg_frame_rate'");
fps = av_q2d(ifmt_ctx->streams[vstream_idx]->r_frame_rate);
}
if (fps <= 0) {
spdlog::debug("Unable to read the average frame rate from 'r_frame_rate'");
fps = av_q2d(av_guess_frame_rate(ifmt_ctx, ifmt_ctx->streams[vstream_idx], nullptr));
}
if (fps <= 0) {
spdlog::debug("Unable to estimate the average frame rate with 'av_guess_frame_rate'");
fps = av_q2d(ifmt_ctx->streams[vstream_idx]->time_base);
}
if (fps <= 0) {
spdlog::debug("Unable to estimate the video's average frame rate");
} else {
// Calculate total frames
proc_ctx->total_frames = static_cast<int64_t>(duration_secs * fps);
}
}
proc_ctx->total_frames = static_cast<int64_t>(cap.get(cv::CAP_PROP_FRAME_COUNT));
cap.release();
// Check if the total number of frames is still 0
if (proc_ctx->total_frames == 0) {
spdlog::warn("Unable to determine total number of frames");
spdlog::warn("Unable to determine the total number of frames");
} else {
spdlog::debug("{} frames to process", proc_ctx->total_frames);
}
// Get start time
proc_ctx->start_time = time(NULL);
if (proc_ctx->start_time == -1) {
perror("time");
}
AVFrame *frame = av_frame_alloc();
if (frame == nullptr) {
ret = AVERROR(ENOMEM);
@ -236,8 +257,13 @@ static int process_frames(
* @return int 0 on success, non-zero value on error
*/
extern "C" int process_video(
#ifdef _WIN32
const wchar_t *in_fname,
const wchar_t *out_fname,
#else
const char *in_fname,
const char *out_fname,
#endif
Libvideo2xLogLevel log_level,
bool benchmark,
AVHWDeviceType hw_type,
@ -328,6 +354,10 @@ extern "C" int process_video(
break;
}
// Convert the file names to std::filesystem::path
std::filesystem::path in_fpath(in_fname);
std::filesystem::path out_fpath(out_fname);
// Initialize hardware device context
if (hw_type != AV_HWDEVICE_TYPE_NONE) {
ret = av_hwdevice_ctx_create(&hw_ctx, hw_type, NULL, NULL, 0);
@ -340,7 +370,7 @@ extern "C" int process_video(
}
// Initialize input
ret = init_decoder(hw_type, hw_ctx, in_fname, &ifmt_ctx, &dec_ctx, &vstream_idx);
ret = init_decoder(hw_type, hw_ctx, in_fpath, &ifmt_ctx, &dec_ctx, &vstream_idx);
if (ret < 0) {
av_strerror(ret, errbuf, sizeof(errbuf));
spdlog::error("Failed to initialize decoder: {}", errbuf);
@ -371,7 +401,7 @@ extern "C" int process_video(
encoder_config->out_height = output_height;
ret = init_encoder(
hw_ctx,
out_fname,
out_fpath,
ifmt_ctx,
&ofmt_ctx,
&enc_ctx,
@ -409,13 +439,13 @@ extern "C" int process_video(
};
} else if (filter_config->filter_type == FILTER_REALESRGAN) {
const auto &config = filter_config->config.realesrgan;
if (!config.model) {
if (!config.model_path) {
spdlog::error("Model name must be provided for the RealESRGAN filter");
cleanup();
return -1;
}
filter = new RealesrganFilter{
config.gpuid, config.tta_mode, config.scaling_factor, config.model
config.gpuid, config.tta_mode, config.scaling_factor, config.model_path
};
} else {
spdlog::error("Unknown filter type");

View File

@ -13,7 +13,7 @@ RealesrganFilter::RealesrganFilter(
int gpuid,
bool tta_mode,
int scaling_factor,
const char *model,
const std::filesystem::path model_path,
const std::filesystem::path custom_model_param_path,
const std::filesystem::path custom_model_bin_path
)
@ -21,7 +21,7 @@ RealesrganFilter::RealesrganFilter(
gpuid(gpuid),
tta_mode(tta_mode),
scaling_factor(scaling_factor),
model(model),
model_path(std::move(model_path)),
custom_model_param_path(std::move(custom_model_param_path)),
custom_model_bin_path(std::move(custom_model_bin_path)) {}
@ -37,12 +37,13 @@ int RealesrganFilter::init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx, AVB
std::filesystem::path model_param_path;
std::filesystem::path model_bin_path;
if (model) {
if (!model_path.empty()) {
// Find the model paths by model name if provided
// TODO: ensure this works with wide strings on Windows
model_param_path = std::filesystem::path("models") / "realesrgan" /
(std::string(model) + "-x" + std::to_string(scaling_factor) + ".param");
(model_path.string() + "-x" + std::to_string(scaling_factor) + ".param");
model_bin_path = std::filesystem::path("models") / "realesrgan" /
(std::string(model) + "-x" + std::to_string(scaling_factor) + ".bin");
(model_path.string() + "-x" + std::to_string(scaling_factor) + ".bin");
} else if (!custom_model_param_path.empty() && !custom_model_bin_path.empty()) {
// Use the custom model paths if provided
model_param_path = custom_model_param_path;