From 5a59907493029d71fdc13bcbf05d6fd478dd1441 Mon Sep 17 00:00:00 2001 From: k4yt3x Date: Sat, 30 Nov 2024 00:00:00 +0000 Subject: [PATCH] feat: complete motion interpolation and add scene detection Signed-off-by: k4yt3x --- include/libvideo2x/avutils.h | 2 ++ include/libvideo2x/libvideo2x.h | 3 +- src/avutils.cpp | 59 +++++++++++++++++++++++++++++++++ src/encoder.cpp | 4 +-- src/interpolator_rife.cpp | 7 +--- src/libvideo2x.cpp | 37 ++++++++++++++++----- tools/video2x/src/video2x.cpp | 38 +++++++++++++-------- 7 files changed, 119 insertions(+), 31 deletions(-) diff --git a/include/libvideo2x/avutils.h b/include/libvideo2x/avutils.h index 6f256ae..68c04d9 100644 --- a/include/libvideo2x/avutils.h +++ b/include/libvideo2x/avutils.h @@ -14,4 +14,6 @@ int64_t get_video_frame_count(AVFormatContext *ifmt_ctx, int in_vstream_idx); enum AVPixelFormat get_encoder_default_pix_fmt(const AVCodec *encoder, AVPixelFormat target_pix_fmt); +float get_frame_diff(AVFrame *frame1, AVFrame *frame2); + #endif // AVUTILS_H diff --git a/include/libvideo2x/libvideo2x.h b/include/libvideo2x/libvideo2x.h index a641724..70f5cba 100644 --- a/include/libvideo2x/libvideo2x.h +++ b/include/libvideo2x/libvideo2x.h @@ -76,7 +76,8 @@ struct ProcessorConfig { int width; int height; int scaling_factor; - int frame_rate_multiplier; + int frm_rate_mul; + float scn_det_thresh; union { struct LibplaceboConfig libplacebo; struct RealESRGANConfig realesrgan; diff --git a/src/avutils.cpp b/src/avutils.cpp index 07f0886..7b5e7c2 100644 --- a/src/avutils.cpp +++ b/src/avutils.cpp @@ -1,5 +1,7 @@ #include "avutils.h" +#include + extern "C" { #include #include @@ -7,6 +9,8 @@ extern "C" { #include +#include "conversions.h" + AVRational get_video_frame_rate(AVFormatContext *ifmt_ctx, int in_vstream_idx) { AVRational frame_rate = ifmt_ctx->streams[in_vstream_idx]->avg_frame_rate; if (frame_rate.num == 0 && frame_rate.den == 0) { @@ -127,3 +131,58 @@ get_encoder_default_pix_fmt(const AVCodec *encoder, AVPixelFormat target_pix_fmt return best_pix_fmt; } + +float get_frame_diff(AVFrame *frame1, AVFrame *frame2) { + if (!frame1 || !frame2) { + spdlog::error("Invalid frame(s) provided for comparison"); + return -1.0f; + } + + if (frame1->width != frame2->width || frame1->height != frame2->height) { + spdlog::error("Frame dimensions do not match"); + return -1.0f; + } + + int width = frame1->width; + int height = frame1->height; + + // Convert both frames to the target pixel format using the provided function + AVPixelFormat target_pix_fmt = AV_PIX_FMT_RGB24; + AVFrame *rgb_frame1 = convert_avframe_pix_fmt(frame1, target_pix_fmt); + AVFrame *rgb_frame2 = convert_avframe_pix_fmt(frame2, target_pix_fmt); + + if (!rgb_frame1 || !rgb_frame2) { + spdlog::error("Failed to convert frames to target pixel format"); + if (rgb_frame1) { + av_frame_free(&rgb_frame1); + } + if (rgb_frame2) { + av_frame_free(&rgb_frame2); + } + return -1.0f; + } + + uint64_t sum_diff = 0; + uint64_t max_diff = 0; + + // Calculate difference pixel by pixel + for (int y = 0; y < height; y++) { + uint8_t *ptr1 = rgb_frame1->data[0] + y * rgb_frame1->linesize[0]; + uint8_t *ptr2 = rgb_frame2->data[0] + y * rgb_frame2->linesize[0]; + for (int x = 0; x < width * 3; x++) { + sum_diff += static_cast( + std::abs(static_cast(ptr1[x]) - static_cast(ptr2[x])) + ); + max_diff += 255; + } + } + + // Clean up + av_frame_free(&rgb_frame1); + av_frame_free(&rgb_frame2); + + // Calculate percentage difference + float percent_diff = (static_cast(sum_diff) / static_cast(max_diff)) * 100.0f; + + return percent_diff; +} diff --git a/src/encoder.cpp b/src/encoder.cpp index bfa5818..141f448 100644 --- a/src/encoder.cpp +++ b/src/encoder.cpp @@ -122,10 +122,10 @@ int Encoder::init( spdlog::debug("Auto-selected pixel format: {}", av_get_pix_fmt_name(enc_ctx_->pix_fmt)); } - if (processor_config->frame_rate_multiplier > 0) { + if (processor_config->frm_rate_mul > 0) { AVRational in_frame_rate = get_video_frame_rate(ifmt_ctx, in_vstream_idx); enc_ctx_->framerate = { - in_frame_rate.num * processor_config->frame_rate_multiplier, in_frame_rate.den + in_frame_rate.num * processor_config->frm_rate_mul, in_frame_rate.den }; enc_ctx_->time_base = av_inv_q(enc_ctx_->framerate); } else { diff --git a/src/interpolator_rife.cpp b/src/interpolator_rife.cpp index 21c7475..e54676d 100644 --- a/src/interpolator_rife.cpp +++ b/src/interpolator_rife.cpp @@ -78,15 +78,12 @@ int InterpolatorRIFE::interpolate( ) { int ret; - /* ncnn::Mat in_mat1 = avframe_to_ncnn_mat(prev_frame); if (in_mat1.empty()) { spdlog::error("Failed to convert AVFrame to ncnn::Mat"); return -1; } - */ - // Convert the input frame to RGB24 ncnn::Mat in_mat2 = avframe_to_ncnn_mat(in_frame); if (in_mat2.empty()) { spdlog::error("Failed to convert AVFrame to ncnn::Mat"); @@ -96,9 +93,7 @@ int InterpolatorRIFE::interpolate( // Allocate space for output ncnn::Mat ncnn::Mat out_mat = ncnn::Mat(in_mat2.w, in_mat2.h, static_cast(3), 3); - // TODO: handle frames properly - // ret = rife_->process(in_mat1, in_mat2, time_step, out_mat); - ret = rife_->process(in_mat2, in_mat2, time_step, out_mat); + ret = rife_->process(in_mat1, in_mat2, time_step, out_mat); if (ret != 0) { spdlog::error("RIFE processing failed"); return ret; diff --git a/src/libvideo2x.cpp b/src/libvideo2x.cpp index 3b6f71a..59f8fe9 100644 --- a/src/libvideo2x.cpp +++ b/src/libvideo2x.cpp @@ -150,17 +150,35 @@ static int process_frames( case PROCESSING_MODE_INTERPOLATE: { Interpolator *interpolator = dynamic_cast(processor); - float time_step = - 1.0f / static_cast(processor_config->frame_rate_multiplier); + float time_step = 1.0f / static_cast(processor_config->frm_rate_mul); float current_time_step = time_step; - while (current_time_step < 1.0f) { - ret = interpolator->interpolate( - prev_frame.get(), - frame.get(), - &raw_processed_frame, - current_time_step - ); + bool skip_frame = false; + if (prev_frame != nullptr) { + float frame_diff = get_frame_diff(prev_frame.get(), frame.get()); + if (frame_diff > processor_config->scn_det_thresh) { + spdlog::debug( + "Scene change detected ({:.2f}%), skipping frame {}", + frame_diff, + proc_ctx->processed_frames + ); + skip_frame = true; + } + } + + while (prev_frame != nullptr && current_time_step < 1.0f) { + if (!skip_frame) { + ret = interpolator->interpolate( + prev_frame.get(), + frame.get(), + &raw_processed_frame, + current_time_step + ); + } else { + ret = 0; + raw_processed_frame = av_frame_clone(prev_frame.get()); + } + if (ret < 0 && ret != AVERROR(EAGAIN)) { av_strerror(ret, errbuf, sizeof(errbuf)); return ret; @@ -197,6 +215,7 @@ static int process_frames( return ret; } } + prev_frame.reset(av_frame_clone(frame.get())); break; } default: diff --git a/tools/video2x/src/video2x.cpp b/tools/video2x/src/video2x.cpp index bdfd2a4..c00621d 100644 --- a/tools/video2x/src/video2x.cpp +++ b/tools/video2x/src/video2x.cpp @@ -8,7 +8,6 @@ #include #include #include -#include #include #include #include @@ -64,8 +63,8 @@ struct Arguments { std::filesystem::path in_fname; std::filesystem::path out_fname; StringType processor_type; - uint32_t gpu_id = 0; StringType hwaccel = STR("none"); + uint32_t gpu_id = 0; bool no_copy_streams = false; bool benchmark = false; @@ -90,7 +89,8 @@ struct Arguments { int width = 0; int height = 0; int scaling_factor = 0; - int frame_rate_multiplier = 2; + int frm_rate_mul = 2; + float scn_det_thresh = 0.0f; // libplacebo options std::filesystem::path libplacebo_shader_path; @@ -387,10 +387,10 @@ int main(int argc, char **argv) { ("output,o", PO_STR_VALUE(), "Output video file path") ("processor,p", PO_STR_VALUE(&arguments.processor_type), "Processor to use: 'libplacebo', 'realesrgan', or 'rife'") - ("gpu,g", po::value(&arguments.gpu_id)->default_value(0), - "GPU ID (Vulkan device index)") ("hwaccel,a", PO_STR_VALUE(&arguments.hwaccel)->default_value(STR("none"), "none"), "Hardware acceleration method (mostly for decoding)") + ("gpu,g", po::value(&arguments.gpu_id)->default_value(0), + "GPU ID (Vulkan device index)") ("benchmark,b", po::bool_switch(&arguments.benchmark), "Discard processed frames and calculate average FPS; " "useful for detecting encoder bottlenecks") @@ -432,14 +432,20 @@ int main(int argc, char **argv) { "Additional AVOption(s) for the encoder (format: -e key=value)") ; - po::options_description common_opts("Common aprocessing options"); - common_opts.add_options() + po::options_description upscale_opts("Upscaling options"); + upscale_opts.add_options() ("width,w", po::value(&arguments.width), "Output width") ("height,h", po::value(&arguments.height), "Output height") ("scaling-factor,s", po::value(&arguments.scaling_factor), "Scaling factor") - ("frame-rate-multiplier,r", - po::value(&arguments.frame_rate_multiplier)->default_value(0), - "Output frame rate") + ; + + po::options_description interp_opts("Frame interpolation options"); + interp_opts.add_options() + ("frame-rate-mul,f", + po::value(&arguments.frm_rate_mul)->default_value(0), + "Frame rate multiplier") + ("scene-thresh,d", po::value(&arguments.scn_det_thresh)->default_value(10.0f), + "Scene detection threshold") ; po::options_description libplacebo_opts("libplacebo options"); @@ -467,7 +473,8 @@ int main(int argc, char **argv) { // Combine all options all_opts.add(encoder_opts) - .add(common_opts) + .add(upscale_opts) + .add(interp_opts) .add(libplacebo_opts) .add(realesrgan_opts) .add(rife_opts); @@ -581,10 +588,14 @@ int main(int argc, char **argv) { spdlog::critical("Invalid scaling factor specified."); return 1; } - if (arguments.frame_rate_multiplier < 0) { + if (arguments.frm_rate_mul < 0) { spdlog::critical("Invalid target frame rate specified."); return 1; } + if (arguments.scn_det_thresh < 0.0f || arguments.scn_det_thresh > 100.0f) { + spdlog::critical("Invalid scene detection threshold specified."); + return 1; + } if (arguments.processor_type == STR("libplacebo")) { if (arguments.libplacebo_shader_path.empty() || arguments.width == 0 || @@ -681,7 +692,8 @@ int main(int argc, char **argv) { processor_config.width = arguments.width; processor_config.height = arguments.height; processor_config.scaling_factor = arguments.scaling_factor; - processor_config.frame_rate_multiplier = arguments.frame_rate_multiplier; + processor_config.frm_rate_mul = arguments.frm_rate_mul; + processor_config.scn_det_thresh = arguments.scn_det_thresh; if (arguments.processor_type == STR("libplacebo")) { processor_config.processor_type = PROCESSOR_LIBPLACEBO;