diff --git a/CMakeLists.txt b/CMakeLists.txt index 8ec9131..7ae0968 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.10) project(video2x VERSION 6.0.0 LANGUAGES CXX C) # Set the C standard -set(CMAKE_C_STANDARD 99) +set(CMAKE_C_STANDARD 11) set(CMAKE_C_STANDARD_REQUIRED ON) # Set the C++ standard diff --git a/Dockerfile b/Dockerfile index d48dbf4..707de5c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -20,7 +20,7 @@ WORKDIR /video2x # Build the package RUN makepkg -s --noconfirm \ - && find /video2x -maxdepth 1 -name '*.pkg.tar.zst' | head -n 1 | \ + && find /video2x -maxdepth 1 -name 'video2x-*.pkg.tar.zst' ! -name '*-debug-*' | head -n 1 | \ xargs -I {} cp {} /tmp/video2x.pkg.tar.zst # stage 2: install wheels into the final image @@ -34,8 +34,6 @@ ENV VK_ICD_FILENAMES=/usr/share/vulkan/icd.d/nvidia_icd.json\ :/usr/share/vulkan/icd.d/intel_icd.x86_64.json COPY --from=builder /tmp/video2x.pkg.tar.zst /video2x.pkg.tar.zst -COPY . /video2x -WORKDIR /video2x RUN pacman -Sy --noconfirm ffmpeg ncnn \ nvidia-utils vulkan-radeon vulkan-intel vulkan-swrast \ && pacman -U --noconfirm /video2x.pkg.tar.zst \ diff --git a/Makefile b/Makefile index 49dab3c..3ea71a3 100644 --- a/Makefile +++ b/Makefile @@ -33,6 +33,26 @@ debug: cmake --build $(BINDIR) --config Debug --parallel cp $(BINDIR)/compile_commands.json . +debian: + apt-get update + apt-get install -y --no-install-recommends \ + build-essential cmake clang pkg-config \ + libavcodec-dev \ + libavdevice-dev \ + libavfilter-dev \ + libavformat-dev \ + libavutil-dev \ + libswscale-dev \ + libvulkan-dev \ + glslang-tools \ + libomp-dev + cmake -B /tmp/build -S . -DUSE_SYSTEM_NCNN=OFF \ + -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ \ + -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/tmp/install \ + -DINSTALL_BIN_DESTINATION=. -DINSTALL_INCLUDE_DESTINATION=include \ + -DINSTALL_LIB_DESTINATION=. -DINSTALL_MODEL_DESTINATION=. + cmake --build /tmp/build --config Release --target install --parallel + test-realesrgan: LD_LIBRARY_PATH=$(BINDIR) $(BINDIR)/video2x -i data/standard-test.mp4 -o data/output.mp4 \ -f realesrgan -r 4 --model realesr-animevideov3 diff --git a/include/conversions.h b/include/conversions.h index 7bb55a0..74e025d 100644 --- a/include/conversions.h +++ b/include/conversions.h @@ -1,7 +1,10 @@ #ifndef CONVERSIONS_H #define CONVERSIONS_H -#include +extern "C" { +#include +} + #include // Convert AVFrame to another pixel format diff --git a/include/decoder.h b/include/decoder.h index dc55d6d..71c240a 100644 --- a/include/decoder.h +++ b/include/decoder.h @@ -1,11 +1,14 @@ #ifndef DECODER_H #define DECODER_H +extern "C" { #include -#include #include +} int init_decoder( + AVHWDeviceType hw_type, + AVBufferRef *hw_ctx, const char *input_filename, AVFormatContext **fmt_ctx, AVCodecContext **dec_ctx, diff --git a/include/encoder.h b/include/encoder.h index a828470..48dc6c3 100644 --- a/include/encoder.h +++ b/include/encoder.h @@ -1,20 +1,32 @@ #ifndef ENCODER_H #define ENCODER_H +extern "C" { #include #include +#include +} #include "libvideo2x.h" int init_encoder( + AVBufferRef *hw_ctx, const char *output_filename, + AVFormatContext *ifmt_ctx, AVFormatContext **ofmt_ctx, AVCodecContext **enc_ctx, AVCodecContext *dec_ctx, - EncoderConfig *encoder_config + EncoderConfig *encoder_config, + int video_stream_index, + int **stream_mapping ); -int encode_and_write_frame(AVFrame *frame, AVCodecContext *enc_ctx, AVFormatContext *ofmt_ctx); +int encode_and_write_frame( + AVFrame *frame, + AVCodecContext *enc_ctx, + AVFormatContext *ofmt_ctx, + int video_stream_index +); int flush_encoder(AVCodecContext *enc_ctx, AVFormatContext *ofmt_ctx); diff --git a/include/filter.h b/include/filter.h index 9179e2d..a77d373 100644 --- a/include/filter.h +++ b/include/filter.h @@ -6,14 +6,15 @@ extern "C" { #include #include +#include } // Abstract base class for filters class Filter { public: virtual ~Filter() {} - virtual int init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx) = 0; - virtual AVFrame *process_frame(AVFrame *input_frame) = 0; + virtual int init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx, AVBufferRef *hw_ctx) = 0; + virtual int process_frame(AVFrame *input_frame, AVFrame **output_frame) = 0; virtual int flush(std::vector &processed_frames) = 0; }; diff --git a/include/getopt.h b/include/getopt.h index 9e9d68c..be8b5ac 100644 --- a/include/getopt.h +++ b/include/getopt.h @@ -22,7 +22,7 @@ struct option { #define required_argument 1 #define optional_argument 2 -int getopt(int, char **, const char *); +// int getopt(int, char **, const char *); int getopt_long(int, char **, const char *, const struct option *, int *); #ifdef __cplusplus diff --git a/include/libplacebo.h b/include/libplacebo.h index 71e3591..e32a2a4 100644 --- a/include/libplacebo.h +++ b/include/libplacebo.h @@ -3,15 +3,16 @@ #include +extern "C" { #include #include -#include +} int init_libplacebo( + AVBufferRef *hw_ctx, AVFilterGraph **filter_graph, AVFilterContext **buffersrc_ctx, AVFilterContext **buffersink_ctx, - AVBufferRef **device_ctx, AVCodecContext *dec_ctx, int output_width, int output_height, diff --git a/include/libplacebo_filter.h b/include/libplacebo_filter.h index a36dd21..c8692fb 100644 --- a/include/libplacebo_filter.h +++ b/include/libplacebo_filter.h @@ -3,7 +3,11 @@ #include -#include +extern "C" { +#include +#include +#include +} #include "filter.h" @@ -13,7 +17,6 @@ class LibplaceboFilter : public Filter { AVFilterGraph *filter_graph; AVFilterContext *buffersrc_ctx; AVFilterContext *buffersink_ctx; - AVBufferRef *device_ctx; int output_width; int output_height; const std::filesystem::path shader_path; @@ -27,10 +30,10 @@ class LibplaceboFilter : public Filter { virtual ~LibplaceboFilter(); // Initializes the filter with decoder and encoder contexts - int init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx) override; + int init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx, AVBufferRef *hw_ctx) override; // Processes an input frame and returns the processed frame - AVFrame *process_frame(AVFrame *input_frame) override; + int process_frame(AVFrame *input_frame, AVFrame **output_frame) override; // Flushes any remaining frames int flush(std::vector &processed_frames) override; diff --git a/include/libvideo2x.h b/include/libvideo2x.h index 33c1176..df0eb25 100644 --- a/include/libvideo2x.h +++ b/include/libvideo2x.h @@ -1,13 +1,10 @@ #ifndef LIBVIDEO2X_H #define LIBVIDEO2X_H -#include +#include #include #include -#include -#include - #ifdef _WIN32 #ifdef LIBVIDEO2X_EXPORTS #define LIBVIDEO2X_API __declspec(dllexport) @@ -22,6 +19,9 @@ extern "C" { #endif +#include +#include + // Enum to specify filter type enum FilterType { FILTER_LIBPLACEBO, @@ -38,7 +38,7 @@ struct LibplaceboConfig { // Configuration for RealESRGAN filter struct RealESRGANConfig { int gpuid; - int tta_mode; + bool tta_mode; int scaling_factor; const char *model; }; @@ -56,6 +56,7 @@ struct FilterConfig { struct EncoderConfig { int output_width; int output_height; + bool copy_streams; enum AVCodecID codec; enum AVPixelFormat pix_fmt; const char *preset; @@ -63,20 +64,25 @@ struct EncoderConfig { float crf; }; -// Processing status -struct ProcessingStatus { +// Video processing context +struct VideoProcessingContext { int64_t processed_frames; int64_t total_frames; time_t start_time; + bool pause; + bool abort; + bool completed; }; // C-compatible process_video function LIBVIDEO2X_API int process_video( const char *input_filename, const char *output_filename, + bool benchmark, + enum AVHWDeviceType hw_device_type, const struct FilterConfig *filter_config, struct EncoderConfig *encoder_config, - struct ProcessingStatus *status + struct VideoProcessingContext *proc_ctx ); #ifdef __cplusplus diff --git a/include/realesrgan_filter.h b/include/realesrgan_filter.h index 60b58df..b0f3103 100644 --- a/include/realesrgan_filter.h +++ b/include/realesrgan_filter.h @@ -3,6 +3,10 @@ #include +extern "C" { +#include +} + #include "filter.h" #include "realesrgan.h" @@ -35,10 +39,10 @@ class RealesrganFilter : public Filter { virtual ~RealesrganFilter(); // Initializes the filter with decoder and encoder contexts - int init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx) override; + int init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx, AVBufferRef *hw_ctx) override; // Processes an input frame and returns the processed frame - AVFrame *process_frame(AVFrame *input_frame) override; + int process_frame(AVFrame *input_frame, AVFrame **output_frame) override; // Flushes any remaining frames (if necessary) int flush(std::vector &processed_frames) override; diff --git a/src/conversions.cpp b/src/conversions.cpp index 21ff5d3..6f3e5f1 100644 --- a/src/conversions.cpp +++ b/src/conversions.cpp @@ -1,17 +1,7 @@ -#include - -// FFmpeg includes -extern "C" { -#include -#include -#include -} - -// ncnn includes -#include - #include "conversions.h" +#include + // Convert AVFrame format AVFrame *convert_avframe_pix_fmt(AVFrame *src_frame, AVPixelFormat pix_fmt) { AVFrame *dst_frame = av_frame_alloc(); @@ -77,10 +67,11 @@ ncnn::Mat avframe_to_ncnn_mat(AVFrame *frame) { converted_frame = convert_avframe_pix_fmt(frame, AV_PIX_FMT_BGR24); if (!converted_frame) { fprintf(stderr, "Failed to convert AVFrame to BGR24.\n"); - return ncnn::Mat(); // Return an empty ncnn::Mat on failure + return ncnn::Mat(); } } else { - converted_frame = frame; // If the frame is already in BGR24, use it directly + // If the frame is already in BGR24, use it directly + converted_frame = frame; } // Allocate a new ncnn::Mat and copy the data @@ -146,10 +137,7 @@ AVFrame *ncnn_mat_to_avframe(const ncnn::Mat &mat, AVPixelFormat pix_fmt) { return nullptr; } - // Copy data from ncnn::Mat to the BGR AVFrame - // mat.to_pixels(bgr_frame->data[0], ncnn::Mat::PIXEL_BGR); - - // Manually copy the pixel data from ncnn::Mat to the BGR AVFrame + // Copy the pixel data from ncnn::Mat to the BGR AVFrame for (int y = 0; y < mat.h; y++) { uint8_t *dst_row = bgr_frame->data[0] + y * bgr_frame->linesize[0]; const uint8_t *src_row = mat.row(y); diff --git a/src/decoder.cpp b/src/decoder.cpp index 0b37794..b2be584 100644 --- a/src/decoder.cpp +++ b/src/decoder.cpp @@ -1,19 +1,25 @@ +#include "decoder.h" + #include #include #include -extern "C" { -#include -#include -#include -#include -#include -#include -#include -#include +static enum AVPixelFormat hw_pix_fmt = AV_PIX_FMT_NONE; + +// Callback function to choose the hardware-accelerated pixel format +static enum AVPixelFormat get_hw_format(AVCodecContext *ctx, const enum AVPixelFormat *pix_fmts) { + for (const enum AVPixelFormat *p = pix_fmts; *p != AV_PIX_FMT_NONE; p++) { + if (*p == hw_pix_fmt) { + return *p; + } + } + fprintf(stderr, "Failed to get HW surface format.\n"); + return AV_PIX_FMT_NONE; } int init_decoder( + AVHWDeviceType hw_type, + AVBufferRef *hw_ctx, const char *input_filename, AVFormatContext **fmt_ctx, AVCodecContext **dec_ctx, @@ -44,18 +50,45 @@ int init_decoder( AVStream *video_stream = ifmt_ctx->streams[stream_index]; // Set up the decoder - const AVCodec *dec = avcodec_find_decoder(video_stream->codecpar->codec_id); - if (!dec) { + const AVCodec *decoder = avcodec_find_decoder(video_stream->codecpar->codec_id); + if (!decoder) { fprintf(stderr, "Failed to find decoder for stream #%u\n", stream_index); return AVERROR_DECODER_NOT_FOUND; } - codec_ctx = avcodec_alloc_context3(dec); + codec_ctx = avcodec_alloc_context3(decoder); if (!codec_ctx) { fprintf(stderr, "Failed to allocate the decoder context\n"); return AVERROR(ENOMEM); } + // Set hardware device context + if (hw_ctx != nullptr) { + codec_ctx->hw_device_ctx = av_buffer_ref(hw_ctx); + codec_ctx->get_format = get_hw_format; + + // Automatically determine the hardware pixel format + for (int i = 0;; i++) { + const AVCodecHWConfig *config = avcodec_get_hw_config(decoder, i); + if (config == nullptr) { + fprintf( + stderr, + "Decoder %s does not support device type %s.\n", + decoder->name, + av_hwdevice_get_type_name(hw_type) + ); + avcodec_free_context(&codec_ctx); + avformat_close_input(&ifmt_ctx); + return AVERROR(ENOSYS); + } + if (config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX && + config->device_type == hw_type) { + hw_pix_fmt = config->pix_fmt; + break; + } + } + } + if ((ret = avcodec_parameters_to_context(codec_ctx, video_stream->codecpar)) < 0) { fprintf(stderr, "Failed to copy decoder parameters to input decoder context\n"); return ret; @@ -66,7 +99,7 @@ int init_decoder( codec_ctx->pkt_timebase = video_stream->time_base; codec_ctx->framerate = av_guess_frame_rate(ifmt_ctx, video_stream, NULL); - if ((ret = avcodec_open2(codec_ctx, dec, NULL)) < 0) { + if ((ret = avcodec_open2(codec_ctx, decoder, NULL)) < 0) { fprintf(stderr, "Failed to open decoder for stream #%u\n", stream_index); return ret; } diff --git a/src/encoder.cpp b/src/encoder.cpp index 40abc8d..f5e424a 100644 --- a/src/encoder.cpp +++ b/src/encoder.cpp @@ -1,32 +1,34 @@ +#include "encoder.h" + #include #include #include -extern "C" { -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include "conversions.h" + +static enum AVPixelFormat get_encoder_default_pix_fmt(const AVCodec *encoder) { + const enum AVPixelFormat *p = encoder->pix_fmts; + if (!p) { + fprintf(stderr, "No pixel formats supported by encoder\n"); + return AV_PIX_FMT_NONE; + } + return *p; } -#include "conversions.h" -#include "libvideo2x.h" - int init_encoder( + AVBufferRef *hw_ctx, const char *output_filename, + AVFormatContext *ifmt_ctx, AVFormatContext **ofmt_ctx, AVCodecContext **enc_ctx, AVCodecContext *dec_ctx, - EncoderConfig *encoder_config + EncoderConfig *encoder_config, + int video_stream_index, + int **stream_mapping ) { AVFormatContext *fmt_ctx = NULL; AVCodecContext *codec_ctx = NULL; + int stream_index = 0; int ret; avformat_alloc_output_context2(&fmt_ctx, NULL, NULL, output_filename); @@ -35,66 +37,130 @@ int init_encoder( return AVERROR_UNKNOWN; } - // Create a new video stream - const AVCodec *enc = avcodec_find_encoder(encoder_config->codec); - if (!enc) { - fprintf(stderr, "Necessary encoder not found\n"); + const AVCodec *encoder = avcodec_find_encoder(encoder_config->codec); + if (!encoder) { + fprintf( + stderr, + "Required video encoder not found for vcodec %s\n", + avcodec_get_name(encoder_config->codec) + ); return AVERROR_ENCODER_NOT_FOUND; } + // Create a new video stream in the output file AVStream *out_stream = avformat_new_stream(fmt_ctx, NULL); if (!out_stream) { - fprintf(stderr, "Failed allocating output stream\n"); + fprintf(stderr, "Failed to allocate the output video stream\n"); return AVERROR_UNKNOWN; } - codec_ctx = avcodec_alloc_context3(enc); + codec_ctx = avcodec_alloc_context3(encoder); if (!codec_ctx) { fprintf(stderr, "Failed to allocate the encoder context\n"); return AVERROR(ENOMEM); } + // Set hardware device context + if (hw_ctx != nullptr) { + codec_ctx->hw_device_ctx = av_buffer_ref(hw_ctx); + } + // Set encoding parameters codec_ctx->height = encoder_config->output_height; codec_ctx->width = encoder_config->output_width; codec_ctx->sample_aspect_ratio = dec_ctx->sample_aspect_ratio; - codec_ctx->pix_fmt = encoder_config->pix_fmt; - codec_ctx->time_base = av_inv_q(dec_ctx->framerate); + codec_ctx->bit_rate = encoder_config->bit_rate; - if (codec_ctx->time_base.num == 0 || codec_ctx->time_base.den == 0) { - codec_ctx->time_base = av_inv_q(av_guess_frame_rate(fmt_ctx, out_stream, NULL)); + // Set the pixel format + if (encoder_config->pix_fmt != AV_PIX_FMT_NONE) { + // Use the specified pixel format + codec_ctx->pix_fmt = encoder_config->pix_fmt; + } else { + // Fall back to the default pixel format + codec_ctx->pix_fmt = get_encoder_default_pix_fmt(encoder); + if (codec_ctx->pix_fmt == AV_PIX_FMT_NONE) { + fprintf(stderr, "Could not get the default pixel format for the encoder\n"); + return AVERROR(EINVAL); + } } - // Set the bit rate and other encoder parameters if needed - codec_ctx->bit_rate = encoder_config->bit_rate; - codec_ctx->gop_size = 60; // Keyframe interval - codec_ctx->max_b_frames = 3; // B-frames - codec_ctx->keyint_min = 60; // Maximum GOP size + // Set the time base + codec_ctx->time_base = av_inv_q(dec_ctx->framerate); + if (codec_ctx->time_base.num == 0 || codec_ctx->time_base.den == 0) { + codec_ctx->time_base = av_inv_q(av_guess_frame_rate(ifmt_ctx, out_stream, NULL)); + } + // Set the CRF and preset for any codecs that support it char crf_str[16]; snprintf(crf_str, sizeof(crf_str), "%.f", encoder_config->crf); - if (encoder_config->codec == AV_CODEC_ID_H264 || encoder_config->codec == AV_CODEC_ID_HEVC) { - av_opt_set(codec_ctx->priv_data, "crf", crf_str, 0); - av_opt_set(codec_ctx->priv_data, "preset", encoder_config->preset, 0); - } + av_opt_set(codec_ctx->priv_data, "crf", crf_str, 0); + av_opt_set(codec_ctx->priv_data, "preset", encoder_config->preset, 0); if (fmt_ctx->oformat->flags & AVFMT_GLOBALHEADER) { codec_ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER; } - if ((ret = avcodec_open2(codec_ctx, enc, NULL)) < 0) { + if ((ret = avcodec_open2(codec_ctx, encoder, NULL)) < 0) { fprintf(stderr, "Cannot open video encoder\n"); return ret; } ret = avcodec_parameters_from_context(out_stream->codecpar, codec_ctx); if (ret < 0) { - fprintf(stderr, "Failed to copy encoder parameters to output stream\n"); + fprintf(stderr, "Failed to copy encoder parameters to output video stream\n"); return ret; } out_stream->time_base = codec_ctx->time_base; + if (encoder_config->copy_streams) { + // Allocate the stream map + *stream_mapping = (int *)av_malloc_array(ifmt_ctx->nb_streams, sizeof(**stream_mapping)); + if (!*stream_mapping) { + fprintf(stderr, "Could not allocate stream mapping\n"); + return AVERROR(ENOMEM); + } + + // Map the video stream + (*stream_mapping)[video_stream_index] = stream_index++; + + // Loop through each stream in the input file + for (int i = 0; i < ifmt_ctx->nb_streams; i++) { + AVStream *in_stream = ifmt_ctx->streams[i]; + AVCodecParameters *in_codecpar = in_stream->codecpar; + + if (i == video_stream_index) { + // Video stream is already handled + continue; + } + + if (in_codecpar->codec_type != AVMEDIA_TYPE_AUDIO && + in_codecpar->codec_type != AVMEDIA_TYPE_SUBTITLE) { + (*stream_mapping)[i] = -1; + continue; + } + + // Create corresponding output stream + AVStream *out_stream = avformat_new_stream(fmt_ctx, NULL); + if (!out_stream) { + fprintf(stderr, "Failed allocating output stream\n"); + return AVERROR_UNKNOWN; + } + + ret = avcodec_parameters_copy(out_stream->codecpar, in_codecpar); + if (ret < 0) { + fprintf(stderr, "Failed to copy codec parameters\n"); + return ret; + } + out_stream->codecpar->codec_tag = 0; + + // Copy time base + out_stream->time_base = in_stream->time_base; + + (*stream_mapping)[i] = stream_index++; + } + } + // Open the output file if (!(fmt_ctx->oformat->flags & AVFMT_NOFILE)) { ret = avio_open(&fmt_ctx->pb, output_filename, AVIO_FLAG_WRITE); @@ -110,7 +176,12 @@ int init_encoder( return 0; } -int encode_and_write_frame(AVFrame *frame, AVCodecContext *enc_ctx, AVFormatContext *ofmt_ctx) { +int encode_and_write_frame( + AVFrame *frame, + AVCodecContext *enc_ctx, + AVFormatContext *ofmt_ctx, + int video_stream_index +) { int ret; // Convert the frame to the encoder's pixel format if needed @@ -144,14 +215,16 @@ int encode_and_write_frame(AVFrame *frame, AVCodecContext *enc_ctx, AVFormatCont av_packet_unref(enc_pkt); break; } else if (ret < 0) { - fprintf(stderr, "Error during encoding\n"); + fprintf(stderr, "Error encoding frame\n"); av_packet_free(&enc_pkt); return ret; } // Rescale packet timestamps - av_packet_rescale_ts(enc_pkt, enc_ctx->time_base, ofmt_ctx->streams[0]->time_base); - enc_pkt->stream_index = ofmt_ctx->streams[0]->index; + av_packet_rescale_ts( + enc_pkt, enc_ctx->time_base, ofmt_ctx->streams[video_stream_index]->time_base + ); + enc_pkt->stream_index = video_stream_index; // Write the packet ret = av_interleaved_write_frame(ofmt_ctx, enc_pkt); @@ -182,7 +255,7 @@ int flush_encoder(AVCodecContext *enc_ctx, AVFormatContext *ofmt_ctx) { av_packet_unref(enc_pkt); break; } else if (ret < 0) { - fprintf(stderr, "Error during encoding\n"); + fprintf(stderr, "Error encoding frame\n"); av_packet_free(&enc_pkt); return ret; } diff --git a/src/fsutils.cpp b/src/fsutils.cpp index 0dd7ce8..025a0c2 100644 --- a/src/fsutils.cpp +++ b/src/fsutils.cpp @@ -1,4 +1,4 @@ -#include +#include "fsutils.h" #if _WIN32 #include @@ -8,8 +8,6 @@ #include #endif -#include "fsutils.h" - #if _WIN32 std::filesystem::path get_executable_directory() { std::vector filepath(MAX_PATH); diff --git a/src/libplacebo.cpp b/src/libplacebo.cpp index 7981192..6e4e85e 100644 --- a/src/libplacebo.cpp +++ b/src/libplacebo.cpp @@ -1,28 +1,15 @@ +#include "libplacebo.h" + #include #include -#include - -extern "C" { -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -} #include "fsutils.h" int init_libplacebo( + AVBufferRef *hw_ctx, AVFilterGraph **filter_graph, AVFilterContext **buffersrc_ctx, AVFilterContext **buffersink_ctx, - AVBufferRef **device_ctx, AVCodecContext *dec_ctx, int output_width, int output_height, @@ -31,14 +18,6 @@ int init_libplacebo( char args[512]; int ret; - // Initialize the Vulkan hardware device - AVBufferRef *hw_device_ctx = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_VULKAN); - ret = av_hwdevice_ctx_create(&hw_device_ctx, AV_HWDEVICE_TYPE_VULKAN, NULL, NULL, 0); - if (ret < 0) { - fprintf(stderr, "Unable to initialize Vulkan device\n"); - return ret; - } - AVFilterGraph *graph = avfilter_graph_alloc(); if (!graph) { fprintf(stderr, "Unable to create filter graph.\n"); @@ -67,7 +46,6 @@ int init_libplacebo( ret = avfilter_graph_create_filter(buffersrc_ctx, buffersrc, "in", args, NULL, graph); if (ret < 0) { fprintf(stderr, "Cannot create buffer source\n"); - av_buffer_unref(&hw_device_ctx); avfilter_graph_free(&graph); return ret; } @@ -78,7 +56,6 @@ int init_libplacebo( const AVFilter *libplacebo_filter = avfilter_get_by_name("libplacebo"); if (!libplacebo_filter) { fprintf(stderr, "Filter 'libplacebo' not found\n"); - av_buffer_unref(&hw_device_ctx); avfilter_graph_free(&graph); return AVERROR_FILTER_NOT_FOUND; } @@ -108,19 +85,19 @@ int init_libplacebo( ); if (ret < 0) { fprintf(stderr, "Cannot create libplacebo filter\n"); - av_buffer_unref(&hw_device_ctx); avfilter_graph_free(&graph); return ret; } // Set the hardware device context to Vulkan - libplacebo_ctx->hw_device_ctx = av_buffer_ref(hw_device_ctx); + if (hw_ctx != nullptr) { + libplacebo_ctx->hw_device_ctx = av_buffer_ref(hw_ctx); + } // Link buffersrc to libplacebo ret = avfilter_link(last_filter, 0, libplacebo_ctx, 0); if (ret < 0) { fprintf(stderr, "Error connecting buffersrc to libplacebo filter\n"); - av_buffer_unref(&hw_device_ctx); avfilter_graph_free(&graph); return ret; } @@ -132,7 +109,6 @@ int init_libplacebo( ret = avfilter_graph_create_filter(buffersink_ctx, buffersink, "out", NULL, NULL, graph); if (ret < 0) { fprintf(stderr, "Cannot create buffer sink\n"); - av_buffer_unref(&hw_device_ctx); avfilter_graph_free(&graph); return ret; } @@ -141,7 +117,6 @@ int init_libplacebo( ret = avfilter_link(last_filter, 0, *buffersink_ctx, 0); if (ret < 0) { fprintf(stderr, "Error connecting libplacebo filter to buffersink\n"); - av_buffer_unref(&hw_device_ctx); avfilter_graph_free(&graph); return ret; } @@ -150,12 +125,10 @@ int init_libplacebo( ret = avfilter_graph_config(graph, NULL); if (ret < 0) { fprintf(stderr, "Error configuring the filter graph\n"); - av_buffer_unref(&hw_device_ctx); avfilter_graph_free(&graph); return ret; } *filter_graph = graph; - *device_ctx = hw_device_ctx; return 0; } diff --git a/src/libplacebo_filter.cpp b/src/libplacebo_filter.cpp index b1758bd..a2ce8fe 100644 --- a/src/libplacebo_filter.cpp +++ b/src/libplacebo_filter.cpp @@ -1,21 +1,14 @@ -#include +#include "libplacebo_filter.h" -extern "C" { -#include -#include -#include -#include -} +#include #include "fsutils.h" #include "libplacebo.h" -#include "libplacebo_filter.h" LibplaceboFilter::LibplaceboFilter(int width, int height, const std::filesystem::path &shader_path) : filter_graph(nullptr), buffersrc_ctx(nullptr), buffersink_ctx(nullptr), - device_ctx(nullptr), output_width(width), output_height(height), shader_path(std::move(shader_path)) {} @@ -29,17 +22,13 @@ LibplaceboFilter::~LibplaceboFilter() { avfilter_free(buffersink_ctx); buffersink_ctx = nullptr; } - if (device_ctx) { - av_buffer_unref(&device_ctx); - device_ctx = nullptr; - } if (filter_graph) { avfilter_graph_free(&filter_graph); filter_graph = nullptr; } } -int LibplaceboFilter::init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx) { +int LibplaceboFilter::init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx, AVBufferRef *hw_ctx) { // Construct the shader path std::filesystem::path shader_full_path; if (filepath_is_readable(shader_path)) { @@ -51,14 +40,20 @@ int LibplaceboFilter::init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx) { find_resource_file(std::filesystem::path("models") / (shader_path.string() + ".glsl")); } + // Check if the shader file exists + if (!std::filesystem::exists(shader_full_path)) { + fprintf(stderr, "libplacebo shader file not found: %s\n", shader_full_path.c_str()); + return -1; + } + // Save the output time base output_time_base = enc_ctx->time_base; return init_libplacebo( + hw_ctx, &filter_graph, &buffersrc_ctx, &buffersink_ctx, - &device_ctx, dec_ctx, output_width, output_height, @@ -66,45 +61,39 @@ int LibplaceboFilter::init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx) { ); } -AVFrame *LibplaceboFilter::process_frame(AVFrame *input_frame) { +int LibplaceboFilter::process_frame(AVFrame *input_frame, AVFrame **output_frame) { int ret; // Get the filtered frame - AVFrame *output_frame = av_frame_alloc(); - if (output_frame == nullptr) { + *output_frame = av_frame_alloc(); + if (*output_frame == nullptr) { fprintf(stderr, "Failed to allocate output frame\n"); - return nullptr; + return -1; } // Feed the frame to the filter graph ret = av_buffersrc_add_frame(buffersrc_ctx, input_frame); if (ret < 0) { fprintf(stderr, "Error while feeding the filter graph\n"); - return nullptr; + return ret; } - ret = av_buffersink_get_frame(buffersink_ctx, output_frame); + ret = av_buffersink_get_frame(buffersink_ctx, *output_frame); if (ret < 0) { - av_frame_free(&output_frame); - if (ret != AVERROR(EAGAIN) && ret != AVERROR_EOF) { - char errbuf[AV_ERROR_MAX_STRING_SIZE]; - av_strerror(ret, errbuf, sizeof(errbuf)); - fprintf(stderr, "Error getting frame from filter graph: %s\n", errbuf); - return nullptr; - } - return (AVFrame *)-1; + av_frame_free(output_frame); + return ret; } // Rescale PTS to encoder's time base - output_frame->pts = - av_rescale_q(output_frame->pts, buffersink_ctx->inputs[0]->time_base, output_time_base); + (*output_frame)->pts = + av_rescale_q((*output_frame)->pts, buffersink_ctx->inputs[0]->time_base, output_time_base); // Return the processed frame to the caller - return output_frame; + return 0; } int LibplaceboFilter::flush(std::vector &processed_frames) { - int ret = av_buffersrc_add_frame(buffersrc_ctx, nullptr); // Signal EOF to the filter graph + int ret = av_buffersrc_add_frame(buffersrc_ctx, nullptr); if (ret < 0) { fprintf(stderr, "Error while flushing filter graph\n"); return ret; diff --git a/src/libvideo2x.cpp b/src/libvideo2x.cpp index 181d9e2..aa7e5dc 100644 --- a/src/libvideo2x.cpp +++ b/src/libvideo2x.cpp @@ -1,30 +1,41 @@ +#include "libvideo2x.h" + +#include #include #include #include #include - -// FFmpeg headers -extern "C" { -#include -#include -} +#include #include "decoder.h" #include "encoder.h" #include "filter.h" #include "libplacebo_filter.h" -#include "libvideo2x.h" #include "realesrgan_filter.h" -// Function to process frames using the selected filter (same as before) +/** + * @brief Process frames using the selected filter. + * + * @param[in,out] proc_ctx Struct containing the processing context + * @param[in] fmt_ctx Input format context + * @param[in] ofmt_ctx Output format context + * @param[in] dec_ctx Decoder context + * @param[in] enc_ctx Encoder context + * @param[in] filter Filter instance + * @param[in] video_stream_index Index of the video stream in the input format context + * @return int 0 on success, negative value on error + */ int process_frames( - ProcessingStatus *status, - AVFormatContext *fmt_ctx, + EncoderConfig *encoder_config, + VideoProcessingContext *proc_ctx, + AVFormatContext *ifmt_ctx, AVFormatContext *ofmt_ctx, AVCodecContext *dec_ctx, AVCodecContext *enc_ctx, Filter *filter, - int video_stream_index + int video_stream_index, + int *stream_mapping, + bool benchmark = false ) { int ret; AVPacket packet; @@ -32,21 +43,21 @@ int process_frames( char errbuf[AV_ERROR_MAX_STRING_SIZE]; // Get the total number of frames in the video - AVStream *video_stream = fmt_ctx->streams[video_stream_index]; - status->total_frames = video_stream->nb_frames; + AVStream *video_stream = ifmt_ctx->streams[video_stream_index]; + proc_ctx->total_frames = video_stream->nb_frames; // If nb_frames is not set, calculate total frames using duration and frame rate - if (status->total_frames == 0) { + if (proc_ctx->total_frames == 0) { int64_t duration = video_stream->duration; AVRational frame_rate = video_stream->avg_frame_rate; if (duration != AV_NOPTS_VALUE && frame_rate.num != 0 && frame_rate.den != 0) { - status->total_frames = duration * frame_rate.num / frame_rate.den; + proc_ctx->total_frames = duration * frame_rate.num / frame_rate.den; } } // Get start time - status->start_time = time(NULL); - if (status->start_time == -1) { + proc_ctx->start_time = time(NULL); + if (proc_ctx->start_time == -1) { perror("time"); } @@ -57,8 +68,8 @@ int process_frames( } // Read frames from the input file - while (1) { - ret = av_read_frame(fmt_ctx, &packet); + while (!proc_ctx->abort) { + ret = av_read_frame(ifmt_ctx, &packet); if (ret < 0) { break; // End of file or error } @@ -74,7 +85,13 @@ int process_frames( } // Receive and process frames from the decoder - while (1) { + while (!proc_ctx->abort) { + // Check if the processing is paused + if (proc_ctx->pause) { + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + continue; + } + ret = avcodec_receive_frame(dec_ctx, frame); if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) { break; @@ -85,43 +102,52 @@ int process_frames( } // Process the frame using the selected filter - AVFrame *processed_frame = filter->process_frame(frame); - if (processed_frame != nullptr && processed_frame != (AVFrame *)-1) { + AVFrame *processed_frame = nullptr; + ret = filter->process_frame(frame, &processed_frame); + if (ret == 0 && processed_frame != nullptr) { // Encode and write the processed frame - ret = encode_and_write_frame(processed_frame, enc_ctx, ofmt_ctx); - if (ret < 0) { - av_strerror(ret, errbuf, sizeof(errbuf)); - fprintf(stderr, "Error encoding/writing frame: %s\n", errbuf); - av_frame_free(&processed_frame); - goto end; + if (!benchmark) { + ret = encode_and_write_frame( + processed_frame, enc_ctx, ofmt_ctx, video_stream_index + ); + if (ret < 0) { + av_strerror(ret, errbuf, sizeof(errbuf)); + fprintf(stderr, "Error encoding/writing frame: %s\n", errbuf); + av_frame_free(&processed_frame); + goto end; + } } av_frame_free(&processed_frame); - status->processed_frames++; - } else if (processed_frame != (AVFrame *)-1) { - fprintf(stderr, "Error processing frame\n"); + proc_ctx->processed_frames++; + } else if (ret != AVERROR(EAGAIN) && ret != AVERROR_EOF) { + fprintf(stderr, "Filter returned an error\n"); goto end; } av_frame_unref(frame); + // TODO: Print the debug processing status + } + } else if (encoder_config->copy_streams && stream_mapping[packet.stream_index] >= 0) { + AVStream *in_stream = ifmt_ctx->streams[packet.stream_index]; + int out_stream_index = stream_mapping[packet.stream_index]; + AVStream *out_stream = ofmt_ctx->streams[out_stream_index]; - // Print the processing status - printf( - "\r[Video2X] Processing frame %ld/%ld (%.2f%%); time elapsed: %lds", - status->processed_frames, - status->total_frames, - status->processed_frames * 100.0 / status->total_frames, - time(NULL) - status->start_time - ); - fflush(stdout); + // Rescale packet timestamps + av_packet_rescale_ts(&packet, in_stream->time_base, out_stream->time_base); + packet.stream_index = out_stream_index; + + // If copy streams is enabled, copy the packet to the output + ret = av_interleaved_write_frame(ofmt_ctx, &packet); + if (ret < 0) { + fprintf(stderr, "Error muxing packet\n"); + av_packet_unref(&packet); + return ret; } } av_packet_unref(&packet); } - // Print a newline after processing all frames - printf("\n"); - // Flush the filter ret = filter->flush(flushed_frames); if (ret < 0) { @@ -132,7 +158,7 @@ int process_frames( // Encode and write all flushed frames for (AVFrame *&flushed_frame : flushed_frames) { - ret = encode_and_write_frame(flushed_frame, enc_ctx, ofmt_ctx); + ret = encode_and_write_frame(flushed_frame, enc_ctx, ofmt_ctx, video_stream_index); if (ret < 0) { av_strerror(ret, errbuf, sizeof(errbuf)); fprintf(stderr, "Error encoding/writing flushed frame: %s\n", errbuf); @@ -163,25 +189,18 @@ end: return ret; } -// Cleanup helper function +// Cleanup resources after processing the video void cleanup( - AVFormatContext *fmt_ctx, + AVFormatContext *ifmt_ctx, AVFormatContext *ofmt_ctx, AVCodecContext *dec_ctx, AVCodecContext *enc_ctx, + AVBufferRef *hw_ctx, + int *stream_mapping, Filter *filter ) { - if (filter) { - delete filter; - } - if (dec_ctx) { - avcodec_free_context(&dec_ctx); - } - if (enc_ctx) { - avcodec_free_context(&enc_ctx); - } - if (fmt_ctx) { - avformat_close_input(&fmt_ctx); + if (ifmt_ctx) { + avformat_close_input(&ifmt_ctx); } if (ofmt_ctx && !(ofmt_ctx->oformat->flags & AVFMT_NOFILE)) { avio_closep(&ofmt_ctx->pb); @@ -189,29 +208,68 @@ void cleanup( if (ofmt_ctx) { avformat_free_context(ofmt_ctx); } + if (dec_ctx) { + avcodec_free_context(&dec_ctx); + } + if (enc_ctx) { + avcodec_free_context(&enc_ctx); + } + if (hw_ctx) { + av_buffer_unref(&hw_ctx); + } + if (stream_mapping) { + av_free(stream_mapping); + } + if (filter) { + delete filter; + } } -// Main function to process the video +/** + * @brief Process a video file using the selected filter and encoder settings. + * + * @param[in] input_filename Path to the input video file + * @param[in] output_filename Path to the output video file + * @param[in] hw_type Hardware device type + * @param[in] filter_config Filter configurations + * @param[in] encoder_config Encoder configurations + * @param[in,out] proc_ctx Video processing context + * @return int 0 on success, non-zero value on error + */ extern "C" int process_video( const char *input_filename, const char *output_filename, + bool benchmark, + AVHWDeviceType hw_type, const FilterConfig *filter_config, EncoderConfig *encoder_config, - ProcessingStatus *status + VideoProcessingContext *proc_ctx ) { - AVFormatContext *fmt_ctx = nullptr; + AVFormatContext *ifmt_ctx = nullptr; AVFormatContext *ofmt_ctx = nullptr; AVCodecContext *dec_ctx = nullptr; AVCodecContext *enc_ctx = nullptr; + AVBufferRef *hw_ctx = nullptr; + int *stream_mapping = nullptr; Filter *filter = nullptr; int video_stream_index = -1; - int ret = 0; // Initialize ret with 0 to assume success + int ret = 0; + + // Initialize hardware device context + if (hw_type != AV_HWDEVICE_TYPE_NONE) { + ret = av_hwdevice_ctx_create(&hw_ctx, hw_type, NULL, NULL, 0); + if (ret < 0) { + fprintf(stderr, "Unable to initialize hardware device context\n"); + return ret; + } + } // Initialize input - if (init_decoder(input_filename, &fmt_ctx, &dec_ctx, &video_stream_index) < 0) { + ret = init_decoder(hw_type, hw_ctx, input_filename, &ifmt_ctx, &dec_ctx, &video_stream_index); + if (ret < 0) { fprintf(stderr, "Failed to initialize decoder\n"); - cleanup(fmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, filter); - return 1; + cleanup(ifmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, hw_ctx, stream_mapping, filter); + return ret; } // Initialize output based on Libplacebo or RealESRGAN configuration @@ -230,17 +288,29 @@ extern "C" int process_video( // Initialize output encoder encoder_config->output_width = output_width; encoder_config->output_height = output_height; - if (init_encoder(output_filename, &ofmt_ctx, &enc_ctx, dec_ctx, encoder_config) < 0) { + ret = init_encoder( + hw_ctx, + output_filename, + ifmt_ctx, + &ofmt_ctx, + &enc_ctx, + dec_ctx, + encoder_config, + video_stream_index, + &stream_mapping + ); + if (ret < 0) { fprintf(stderr, "Failed to initialize encoder\n"); - cleanup(fmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, filter); - return 1; + cleanup(ifmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, hw_ctx, stream_mapping, filter); + return ret; } // Write the output file header - if (avformat_write_header(ofmt_ctx, NULL) < 0) { + ret = avformat_write_header(ofmt_ctx, NULL); + if (ret < 0) { fprintf(stderr, "Error occurred when opening output file\n"); - cleanup(fmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, filter); - return 1; + cleanup(ifmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, hw_ctx, stream_mapping, filter); + return ret; } // Create and initialize the appropriate filter @@ -251,20 +321,20 @@ extern "C" int process_video( // Validate shader path if (!config.shader_path) { fprintf(stderr, "Shader path must be provided for the libplacebo filter\n"); - cleanup(fmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, filter); - return 1; + cleanup(ifmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, hw_ctx, stream_mapping, filter); + return -1; } // Validate output dimensions if (config.output_width <= 0 || config.output_height <= 0) { fprintf(stderr, "Output dimensions must be provided for the libplacebo filter\n"); - cleanup(fmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, filter); - return 1; + cleanup(ifmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, hw_ctx, stream_mapping, filter); + return -1; } - filter = new LibplaceboFilter( + filter = new LibplaceboFilter{ config.output_width, config.output_height, std::filesystem::path(config.shader_path) - ); + }; break; } case FILTER_REALESRGAN: { @@ -273,55 +343,66 @@ extern "C" int process_video( // Validate model name if (!config.model) { fprintf(stderr, "Model name must be provided for the RealESRGAN filter\n"); - cleanup(fmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, filter); - return 1; + cleanup(ifmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, hw_ctx, stream_mapping, filter); + return -1; } // Validate scaling factor if (config.scaling_factor <= 0) { fprintf(stderr, "Scaling factor must be provided for the RealESRGAN filter\n"); - cleanup(fmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, filter); - return 1; + cleanup(ifmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, hw_ctx, stream_mapping, filter); + return -1; } - filter = new RealesrganFilter( + filter = new RealesrganFilter{ config.gpuid, config.tta_mode, config.scaling_factor, config.model - ); + }; break; } default: fprintf(stderr, "Unknown filter type\n"); - cleanup(fmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, filter); - return 1; + cleanup(ifmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, hw_ctx, stream_mapping, filter); + return -1; } // Initialize the filter - if (filter->init(dec_ctx, enc_ctx) < 0) { + ret = filter->init(dec_ctx, enc_ctx, hw_ctx); + if (ret < 0) { fprintf(stderr, "Failed to initialize filter\n"); - cleanup(fmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, filter); - return 1; + cleanup(ifmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, hw_ctx, stream_mapping, filter); + return ret; } // Process frames - if ((ret = - process_frames(status, fmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, filter, video_stream_index) - ) < 0) { + ret = process_frames( + encoder_config, + proc_ctx, + ifmt_ctx, + ofmt_ctx, + dec_ctx, + enc_ctx, + filter, + video_stream_index, + stream_mapping, + benchmark + ); + if (ret < 0) { fprintf(stderr, "Error processing frames\n"); - cleanup(fmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, filter); - return 1; + cleanup(ifmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, hw_ctx, stream_mapping, filter); + return ret; } // Write the output file trailer av_write_trailer(ofmt_ctx); // Cleanup before returning - cleanup(fmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, filter); + cleanup(ifmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, hw_ctx, stream_mapping, filter); if (ret < 0 && ret != AVERROR_EOF) { char errbuf[AV_ERROR_MAX_STRING_SIZE]; av_strerror(ret, errbuf, sizeof(errbuf)); fprintf(stderr, "Error occurred: %s\n", errbuf); - return 1; + return ret; } return 0; } diff --git a/src/realesrgan_filter.cpp b/src/realesrgan_filter.cpp index 7ccc2dc..6cbe09f 100644 --- a/src/realesrgan_filter.cpp +++ b/src/realesrgan_filter.cpp @@ -1,18 +1,11 @@ +#include "realesrgan_filter.h" + #include #include -#include #include -extern "C" { -#include -#include -#include -} - #include "conversions.h" #include "fsutils.h" -#include "realesrgan.h" -#include "realesrgan_filter.h" RealesrganFilter::RealesrganFilter( int gpuid, @@ -37,7 +30,7 @@ RealesrganFilter::~RealesrganFilter() { } } -int RealesrganFilter::init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx) { +int RealesrganFilter::init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx, AVBufferRef *hw_ctx) { // Construct the model paths using std::filesystem std::filesystem::path model_param_path; std::filesystem::path model_bin_path; @@ -62,6 +55,18 @@ int RealesrganFilter::init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx) { std::filesystem::path model_param_full_path = find_resource_file(model_param_path); std::filesystem::path model_bin_full_path = find_resource_file(model_bin_path); + // Check if the model files exist + if (!std::filesystem::exists(model_param_full_path)) { + fprintf( + stderr, "RealESRGAN model param file not found: %s\n", model_param_full_path.c_str() + ); + return -1; + } + if (!std::filesystem::exists(model_bin_full_path)) { + fprintf(stderr, "RealESRGAN model bin file not found: %s\n", model_bin_full_path.c_str()); + return -1; + } + // Create a new RealESRGAN instance realesrgan = new RealESRGAN(gpuid, tta_mode); @@ -95,12 +100,14 @@ int RealesrganFilter::init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx) { return 0; } -AVFrame *RealesrganFilter::process_frame(AVFrame *input_frame) { +int RealesrganFilter::process_frame(AVFrame *input_frame, AVFrame **output_frame) { + int ret; + // Convert the input frame to RGB24 ncnn::Mat input_mat = avframe_to_ncnn_mat(input_frame); if (input_mat.empty()) { fprintf(stderr, "Failed to convert AVFrame to ncnn::Mat\n"); - return nullptr; + return -1; } // Allocate space for ouptut ncnn::Mat @@ -108,19 +115,20 @@ AVFrame *RealesrganFilter::process_frame(AVFrame *input_frame) { int output_height = input_mat.h * realesrgan->scale; ncnn::Mat output_mat = ncnn::Mat(output_width, output_height, (size_t)3, 3); - if (realesrgan->process(input_mat, output_mat) != 0) { + ret = realesrgan->process(input_mat, output_mat); + if (ret != 0) { fprintf(stderr, "RealESRGAN processing failed\n"); - return nullptr; + return ret; } // Convert ncnn::Mat to AVFrame - AVFrame *output_frame = ncnn_mat_to_avframe(output_mat, output_pix_fmt); + *output_frame = ncnn_mat_to_avframe(output_mat, output_pix_fmt); // Rescale PTS to encoder's time base - output_frame->pts = av_rescale_q(input_frame->pts, input_time_base, output_time_base); + (*output_frame)->pts = av_rescale_q(input_frame->pts, input_time_base, output_time_base); // Return the processed frame to the caller - return output_frame; + return ret; } int RealesrganFilter::flush(std::vector &processed_frames) { diff --git a/src/video2x.c b/src/video2x.c index 8ae077c..0ae7b65 100644 --- a/src/video2x.c +++ b/src/video2x.c @@ -1,7 +1,19 @@ +#include #include #include #include +#include +#include +#ifdef _WIN32 +#include +#else +#include +#include +#include +#endif + +#include #include #include @@ -11,14 +23,35 @@ const char *VIDEO2X_VERSION = "6.0.0"; +// Set UNIX terminal input to non-blocking mode +#ifndef _WIN32 +void set_nonblocking_input(bool enable) { + static struct termios oldt, newt; + if (enable) { + tcgetattr(STDIN_FILENO, &oldt); + newt = oldt; + newt.c_lflag &= ~(ICANON | ECHO); + tcsetattr(STDIN_FILENO, TCSANOW, &newt); + fcntl(STDIN_FILENO, F_SETFL, O_NONBLOCK); + } else { + tcsetattr(STDIN_FILENO, TCSANOW, &oldt); + fcntl(STDIN_FILENO, F_SETFL, 0); + } +} +#endif + // Define command line options static struct option long_options[] = { + {"version", no_argument, NULL, 'v'}, + {"help", no_argument, NULL, 0}, + // General options {"input", required_argument, NULL, 'i'}, {"output", required_argument, NULL, 'o'}, {"filter", required_argument, NULL, 'f'}, - {"version", no_argument, NULL, 'v'}, - {"help", no_argument, NULL, 0}, + {"hwaccel", required_argument, NULL, 'a'}, + {"nocopystreams", no_argument, NULL, 0}, + {"benchmark", no_argument, NULL, 0}, // Encoder options {"codec", required_argument, NULL, 'c'}, @@ -27,7 +60,7 @@ static struct option long_options[] = { {"bitrate", required_argument, NULL, 'b'}, {"crf", required_argument, NULL, 'q'}, - // Libplacebo options + // libplacebo options {"shader", required_argument, NULL, 's'}, {"width", required_argument, NULL, 'w'}, {"height", required_argument, NULL, 'h'}, @@ -45,6 +78,9 @@ struct arguments { const char *input_filename; const char *output_filename; const char *filter_type; + const char *hwaccel; + bool nocopystreams; + bool benchmark; // Encoder options const char *codec; @@ -64,6 +100,14 @@ struct arguments { int scaling_factor; }; +struct ProcessVideoThreadArguments { + struct arguments *arguments; + enum AVHWDeviceType hw_device_type; + struct FilterConfig *filter_config; + struct EncoderConfig *encoder_config; + struct VideoProcessingContext *proc_ctx; +}; + const char *valid_models[] = { "realesrgan-plus", "realesrgan-plus-anime", @@ -84,19 +128,23 @@ int is_valid_realesrgan_model(const char *model) { void print_help() { printf("Usage: video2x [OPTIONS]\n"); - printf("\nGeneral Options:\n"); + printf("\nOptions:\n"); + printf(" -v, --version Print program version\n"); + printf(" -?, --help Display this help page\n"); + printf("\nGeneral Processing Options:\n"); printf(" -i, --input Input video file path\n"); printf(" -o, --output Output video file path\n"); printf(" -f, --filter Filter to use: 'libplacebo' or 'realesrgan'\n"); - printf(" -v, --version Print program version\n"); - printf(" --help Display this help page\n"); + printf(" -a, --hwaccel Hardware acceleration method (default: none)\n"); + printf(" --nocopystreams Do not copy audio and subtitle streams\n"); + printf(" --benchmark Discard processed frames and calculate average FPS\n"); printf("\nEncoder Options (Optional):\n"); printf(" -c, --codec Output codec (default: libx264)\n"); - printf(" -p, --preset Encoder preset (default: veryslow)\n"); - printf(" -x, --pixfmt Output pixel format (default: yuv420p)\n"); - printf(" -b, --bitrate Bitrate in bits per second (default: 2000000)\n"); - printf(" -q, --crf Constant Rate Factor (default: 17.0)\n"); + printf(" -p, --preset Encoder preset (default: slow)\n"); + printf(" -x, --pixfmt Output pixel format (default: auto)\n"); + printf(" -b, --bitrate Bitrate in bits per second (default: 0 (VBR))\n"); + printf(" -q, --crf Constant Rate Factor (default: 20.0)\n"); printf("\nlibplacebo Options:\n"); printf(" -s, --shader Name or path to custom GLSL shader file\n"); @@ -107,6 +155,10 @@ void print_help() { printf(" -g, --gpuid Vulkan GPU ID (default: 0)\n"); printf(" -m, --model Name of the model to use\n"); printf(" -r, --scale Scaling factor (2, 3, or 4)\n"); + + printf("\nExamples Usage:\n"); + printf(" video2x -i in.mp4 -o out.mp4 -f libplacebo -s anime4k-mode-a -w 3840 -h 2160\n"); + printf(" video2x -i in.mp4 -o out.mp4 -f realesrgan -m realesr-animevideov3 -r 4\n"); } void parse_arguments(int argc, char **argv, struct arguments *arguments) { @@ -117,13 +169,16 @@ void parse_arguments(int argc, char **argv, struct arguments *arguments) { arguments->input_filename = NULL; arguments->output_filename = NULL; arguments->filter_type = NULL; + arguments->hwaccel = "none"; + arguments->nocopystreams = false; + arguments->benchmark = false; // Encoder options arguments->codec = "libx264"; - arguments->preset = "veryslow"; - arguments->pix_fmt = "yuv420p"; - arguments->bitrate = 2 * 1000 * 1000; - arguments->crf = 17.0; + arguments->preset = "slow"; + arguments->pix_fmt = NULL; + arguments->bitrate = 0; + arguments->crf = 20.0; // libplacebo options arguments->shader_path = NULL; @@ -135,8 +190,9 @@ void parse_arguments(int argc, char **argv, struct arguments *arguments) { arguments->model = NULL; arguments->scaling_factor = 0; - while ((c = getopt_long(argc, argv, "i:o:f:c:x:p:b:q:s:w:h:r:m:v", long_options, &option_index) - ) != -1) { + while ((c = getopt_long( + argc, argv, "i:o:f:a:c:x:p:b:q:s:w:h:r:m:v", long_options, &option_index + )) != -1) { switch (c) { case 'i': arguments->input_filename = optarg; @@ -147,6 +203,9 @@ void parse_arguments(int argc, char **argv, struct arguments *arguments) { case 'f': arguments->filter_type = optarg; break; + case 'a': + arguments->hwaccel = optarg; + break; case 'c': arguments->codec = optarg; break; @@ -195,7 +254,8 @@ void parse_arguments(int argc, char **argv, struct arguments *arguments) { if (!is_valid_realesrgan_model(arguments->model)) { fprintf( stderr, - "Error: Invalid model specified. Must be 'realesrgan-plus', 'realesrgan-plus-anime', or 'realesr-animevideov3'.\n" + "Error: Invalid model specified. Must be 'realesrgan-plus', " + "'realesrgan-plus-anime', or 'realesr-animevideov3'.\n" ); exit(1); } @@ -209,12 +269,16 @@ void parse_arguments(int argc, char **argv, struct arguments *arguments) { } break; case 'v': - printf("video2x %s\n", VIDEO2X_VERSION); + printf("Video2X v%s\n", VIDEO2X_VERSION); exit(0); - case 0: // Long-only options without short equivalents (e.g., help) + case 0: // Long-only options without short equivalents if (strcmp(long_options[option_index].name, "help") == 0) { print_help(); exit(0); + } else if (strcmp(long_options[option_index].name, "nocopystreams") == 0) { + arguments->nocopystreams = true; + } else if (strcmp(long_options[option_index].name, "benchmark") == 0) { + arguments->benchmark = true; } break; default: @@ -224,8 +288,13 @@ void parse_arguments(int argc, char **argv, struct arguments *arguments) { } // Check for required arguments - if (!arguments->input_filename || !arguments->output_filename) { - fprintf(stderr, "Error: Input and output files are required.\n"); + if (!arguments->input_filename) { + fprintf(stderr, "Error: Input file path is required.\n"); + exit(1); + } + + if (!arguments->output_filename && !arguments->benchmark) { + fprintf(stderr, "Error: Output file path is required.\n"); exit(1); } @@ -239,7 +308,8 @@ void parse_arguments(int argc, char **argv, struct arguments *arguments) { arguments->output_height == 0) { fprintf( stderr, - "Error: For libplacebo, shader name/path (-s), width (-w), and height (-e) are required.\n" + "Error: For libplacebo, shader name/path (-s), width (-w), " + "and height (-e) are required.\n" ); exit(1); } @@ -253,7 +323,40 @@ void parse_arguments(int argc, char **argv, struct arguments *arguments) { } } +// Wrapper function for video processing thread +int process_video_thread(void *arg) { + struct ProcessVideoThreadArguments *thread_args = (struct ProcessVideoThreadArguments *)arg; + + // Extract individual arguments + struct arguments *arguments = thread_args->arguments; + enum AVHWDeviceType hw_device_type = thread_args->hw_device_type; + struct FilterConfig *filter_config = thread_args->filter_config; + struct EncoderConfig *encoder_config = thread_args->encoder_config; + struct VideoProcessingContext *proc_ctx = thread_args->proc_ctx; + + // Call the process_video function + int result = process_video( + arguments->input_filename, + arguments->output_filename, + arguments->benchmark, + hw_device_type, + filter_config, + encoder_config, + proc_ctx + ); + + proc_ctx->completed = true; + return result; +} + int main(int argc, char **argv) { + // Print help if no arguments are provided + if (argc < 2) { + print_help(); + return 1; + } + + // Parse command line arguments struct arguments arguments; parse_arguments(argc, argv, &arguments); @@ -283,16 +386,20 @@ int main(int argc, char **argv) { } // Parse pixel format to AVPixelFormat - enum AVPixelFormat pix_fmt = av_get_pix_fmt(arguments.pix_fmt); - if (pix_fmt == AV_PIX_FMT_NONE) { - fprintf(stderr, "Error: Invalid pixel format '%s'.\n", arguments.pix_fmt); - return 1; + enum AVPixelFormat pix_fmt = AV_PIX_FMT_NONE; + if (arguments.pix_fmt) { + pix_fmt = av_get_pix_fmt(arguments.pix_fmt); + if (pix_fmt == AV_PIX_FMT_NONE) { + fprintf(stderr, "Error: Invalid pixel format '%s'.\n", arguments.pix_fmt); + return 1; + } } // Setup encoder configuration struct EncoderConfig encoder_config = { .output_width = 0, // To be filled by libvideo2x .output_height = 0, // To be filled by libvideo2x + .copy_streams = !arguments.nocopystreams, .codec = codec->id, .pix_fmt = pix_fmt, .preset = arguments.preset, @@ -300,26 +407,134 @@ int main(int argc, char **argv) { .crf = arguments.crf, }; - // Setup struct to store processing status - struct ProcessingStatus status = {0}; - - // Process the video - if (process_video( - arguments.input_filename, - arguments.output_filename, - &filter_config, - &encoder_config, - &status - )) { - fprintf(stderr, "Video processing failed.\n"); - return 1; + // Parse hardware acceleration method + enum AVHWDeviceType hw_device_type = AV_HWDEVICE_TYPE_NONE; + if (strcmp(arguments.hwaccel, "none") != 0) { + hw_device_type = av_hwdevice_find_type_by_name(arguments.hwaccel); + if (hw_device_type == AV_HWDEVICE_TYPE_NONE) { + fprintf(stderr, "Error: Invalid hardware device type '%s'.\n", arguments.hwaccel); + return 1; + } } + // Setup struct to store processing context + struct VideoProcessingContext proc_ctx = { + .processed_frames = 0, + .total_frames = 0, + .start_time = time(NULL), + .pause = false, + .abort = false, + .completed = false + }; + + // Create a ThreadArguments struct to hold all the arguments for the thread + struct ProcessVideoThreadArguments thread_args = { + .arguments = &arguments, + .hw_device_type = hw_device_type, + .filter_config = &filter_config, + .encoder_config = &encoder_config, + .proc_ctx = &proc_ctx + }; + +// Enable non-blocking input +#ifndef _WIN32 + set_nonblocking_input(true); +#endif + + // Create a thread for video processing + thrd_t processing_thread; + if (thrd_create(&processing_thread, process_video_thread, &thread_args) != thrd_success) { + fprintf(stderr, "Failed to create processing thread\n"); + return 1; + } + printf("[Video2X] Video processing started.\n"); + printf("[Video2X] Press SPACE to pause/resume, 'q' to abort.\n"); + + // Main thread loop to display progress and handle input + while (!proc_ctx.completed) { + // Check for key presses + int ch = -1; + + // Check for key press +#ifdef _WIN32 + if (_kbhit()) { + ch = _getch(); + } +#else + ch = getchar(); +#endif + + if (ch == ' ' || ch == '\n') { + // Toggle pause state + proc_ctx.pause = !proc_ctx.pause; + if (proc_ctx.pause) { + printf("\n[Video2X] Processing paused. Press SPACE to resume, 'q' to abort."); + } else { + printf("\n[Video2X] Resuming processing..."); + } + fflush(stdout); + } else if (ch == 'q' || ch == 'Q') { + // Abort processing + printf("\n[Video2X] Aborting processing..."); + fflush(stdout); + proc_ctx.abort = true; + break; + } + + // Display progress + if (!proc_ctx.pause && proc_ctx.total_frames > 0) { + printf( + "\r[Video2X] Processing frame %ld/%ld (%.2f%%); time elapsed: %lds", + proc_ctx.processed_frames, + proc_ctx.total_frames, + proc_ctx.total_frames > 0 + ? proc_ctx.processed_frames * 100.0 / proc_ctx.total_frames + : 0.0, + time(NULL) - proc_ctx.start_time + ); + fflush(stdout); + } + + // Sleep for a short duration + thrd_sleep(&(struct timespec){.tv_sec = 0, .tv_nsec = 100000000}, NULL); // Sleep for 100ms + } + puts(""); // Print newline after progress bar is complete + +// Restore terminal to blocking mode +#ifndef _WIN32 + set_nonblocking_input(false); +#endif + + // Join the processing thread to ensure it completes before exiting + int process_result; + thrd_join(processing_thread, &process_result); + + if (proc_ctx.abort) { + fprintf(stderr, "Video processing aborted\n"); + return 2; + } + + if (process_result != 0) { + fprintf(stderr, "Video processing failed\n"); + return process_result; + } + + // Calculate statistics + time_t time_elapsed = time(NULL) - proc_ctx.start_time; + float average_speed_fps = + (float)proc_ctx.processed_frames / (time_elapsed > 0 ? time_elapsed : 1); + // Print processing summary - printf("====== Video2X Processing summary ======\n"); + printf("====== Video2X %s summary ======\n", arguments.benchmark ? "Benchmark" : "Processing"); printf("Video file processed: %s\n", arguments.input_filename); - printf("Total frames processed: %ld\n", status.processed_frames); - printf("Total time taken: %lds\n", time(NULL) - status.start_time); - printf("Output written to: %s\n", arguments.output_filename); + printf("Total frames processed: %ld\n", proc_ctx.processed_frames); + printf("Total time taken: %lds\n", time_elapsed); + printf("Average processing speed: %.2f FPS\n", average_speed_fps); + + // Print additional information if not in benchmark mode + if (!arguments.benchmark) { + printf("Output written to: %s\n", arguments.output_filename); + } + return 0; }