feat: complete motion interpolation and add scene detection
Some checks are pending
Build / ubuntu (push) Waiting to run
Build / windows (push) Waiting to run
Build / container (push) Waiting to run

Signed-off-by: k4yt3x <i@k4yt3x.com>
This commit is contained in:
k4yt3x 2024-11-30 00:00:00 +00:00
parent f41efb3bac
commit 5a59907493
No known key found for this signature in database
7 changed files with 119 additions and 31 deletions

View File

@ -14,4 +14,6 @@ int64_t get_video_frame_count(AVFormatContext *ifmt_ctx, int in_vstream_idx);
enum AVPixelFormat
get_encoder_default_pix_fmt(const AVCodec *encoder, AVPixelFormat target_pix_fmt);
float get_frame_diff(AVFrame *frame1, AVFrame *frame2);
#endif // AVUTILS_H

View File

@ -76,7 +76,8 @@ struct ProcessorConfig {
int width;
int height;
int scaling_factor;
int frame_rate_multiplier;
int frm_rate_mul;
float scn_det_thresh;
union {
struct LibplaceboConfig libplacebo;
struct RealESRGANConfig realesrgan;

View File

@ -1,5 +1,7 @@
#include "avutils.h"
#include <cstdint>
extern "C" {
#include <libavcodec/avcodec.h>
#include <libavutil/pixdesc.h>
@ -7,6 +9,8 @@ extern "C" {
#include <spdlog/spdlog.h>
#include "conversions.h"
AVRational get_video_frame_rate(AVFormatContext *ifmt_ctx, int in_vstream_idx) {
AVRational frame_rate = ifmt_ctx->streams[in_vstream_idx]->avg_frame_rate;
if (frame_rate.num == 0 && frame_rate.den == 0) {
@ -127,3 +131,58 @@ get_encoder_default_pix_fmt(const AVCodec *encoder, AVPixelFormat target_pix_fmt
return best_pix_fmt;
}
float get_frame_diff(AVFrame *frame1, AVFrame *frame2) {
if (!frame1 || !frame2) {
spdlog::error("Invalid frame(s) provided for comparison");
return -1.0f;
}
if (frame1->width != frame2->width || frame1->height != frame2->height) {
spdlog::error("Frame dimensions do not match");
return -1.0f;
}
int width = frame1->width;
int height = frame1->height;
// Convert both frames to the target pixel format using the provided function
AVPixelFormat target_pix_fmt = AV_PIX_FMT_RGB24;
AVFrame *rgb_frame1 = convert_avframe_pix_fmt(frame1, target_pix_fmt);
AVFrame *rgb_frame2 = convert_avframe_pix_fmt(frame2, target_pix_fmt);
if (!rgb_frame1 || !rgb_frame2) {
spdlog::error("Failed to convert frames to target pixel format");
if (rgb_frame1) {
av_frame_free(&rgb_frame1);
}
if (rgb_frame2) {
av_frame_free(&rgb_frame2);
}
return -1.0f;
}
uint64_t sum_diff = 0;
uint64_t max_diff = 0;
// Calculate difference pixel by pixel
for (int y = 0; y < height; y++) {
uint8_t *ptr1 = rgb_frame1->data[0] + y * rgb_frame1->linesize[0];
uint8_t *ptr2 = rgb_frame2->data[0] + y * rgb_frame2->linesize[0];
for (int x = 0; x < width * 3; x++) {
sum_diff += static_cast<uint64_t>(
std::abs(static_cast<int>(ptr1[x]) - static_cast<int>(ptr2[x]))
);
max_diff += 255;
}
}
// Clean up
av_frame_free(&rgb_frame1);
av_frame_free(&rgb_frame2);
// Calculate percentage difference
float percent_diff = (static_cast<float>(sum_diff) / static_cast<float>(max_diff)) * 100.0f;
return percent_diff;
}

View File

@ -122,10 +122,10 @@ int Encoder::init(
spdlog::debug("Auto-selected pixel format: {}", av_get_pix_fmt_name(enc_ctx_->pix_fmt));
}
if (processor_config->frame_rate_multiplier > 0) {
if (processor_config->frm_rate_mul > 0) {
AVRational in_frame_rate = get_video_frame_rate(ifmt_ctx, in_vstream_idx);
enc_ctx_->framerate = {
in_frame_rate.num * processor_config->frame_rate_multiplier, in_frame_rate.den
in_frame_rate.num * processor_config->frm_rate_mul, in_frame_rate.den
};
enc_ctx_->time_base = av_inv_q(enc_ctx_->framerate);
} else {

View File

@ -78,15 +78,12 @@ int InterpolatorRIFE::interpolate(
) {
int ret;
/*
ncnn::Mat in_mat1 = avframe_to_ncnn_mat(prev_frame);
if (in_mat1.empty()) {
spdlog::error("Failed to convert AVFrame to ncnn::Mat");
return -1;
}
*/
// Convert the input frame to RGB24
ncnn::Mat in_mat2 = avframe_to_ncnn_mat(in_frame);
if (in_mat2.empty()) {
spdlog::error("Failed to convert AVFrame to ncnn::Mat");
@ -96,9 +93,7 @@ int InterpolatorRIFE::interpolate(
// Allocate space for output ncnn::Mat
ncnn::Mat out_mat = ncnn::Mat(in_mat2.w, in_mat2.h, static_cast<size_t>(3), 3);
// TODO: handle frames properly
// ret = rife_->process(in_mat1, in_mat2, time_step, out_mat);
ret = rife_->process(in_mat2, in_mat2, time_step, out_mat);
ret = rife_->process(in_mat1, in_mat2, time_step, out_mat);
if (ret != 0) {
spdlog::error("RIFE processing failed");
return ret;

View File

@ -150,17 +150,35 @@ static int process_frames(
case PROCESSING_MODE_INTERPOLATE: {
Interpolator *interpolator = dynamic_cast<Interpolator *>(processor);
float time_step =
1.0f / static_cast<float>(processor_config->frame_rate_multiplier);
float time_step = 1.0f / static_cast<float>(processor_config->frm_rate_mul);
float current_time_step = time_step;
while (current_time_step < 1.0f) {
ret = interpolator->interpolate(
prev_frame.get(),
frame.get(),
&raw_processed_frame,
current_time_step
);
bool skip_frame = false;
if (prev_frame != nullptr) {
float frame_diff = get_frame_diff(prev_frame.get(), frame.get());
if (frame_diff > processor_config->scn_det_thresh) {
spdlog::debug(
"Scene change detected ({:.2f}%), skipping frame {}",
frame_diff,
proc_ctx->processed_frames
);
skip_frame = true;
}
}
while (prev_frame != nullptr && current_time_step < 1.0f) {
if (!skip_frame) {
ret = interpolator->interpolate(
prev_frame.get(),
frame.get(),
&raw_processed_frame,
current_time_step
);
} else {
ret = 0;
raw_processed_frame = av_frame_clone(prev_frame.get());
}
if (ret < 0 && ret != AVERROR(EAGAIN)) {
av_strerror(ret, errbuf, sizeof(errbuf));
return ret;
@ -197,6 +215,7 @@ static int process_frames(
return ret;
}
}
prev_frame.reset(av_frame_clone(frame.get()));
break;
}
default:

View File

@ -8,7 +8,6 @@
#include <cstring>
#include <filesystem>
#include <iostream>
#include <memory>
#include <mutex>
#include <string>
#include <thread>
@ -64,8 +63,8 @@ struct Arguments {
std::filesystem::path in_fname;
std::filesystem::path out_fname;
StringType processor_type;
uint32_t gpu_id = 0;
StringType hwaccel = STR("none");
uint32_t gpu_id = 0;
bool no_copy_streams = false;
bool benchmark = false;
@ -90,7 +89,8 @@ struct Arguments {
int width = 0;
int height = 0;
int scaling_factor = 0;
int frame_rate_multiplier = 2;
int frm_rate_mul = 2;
float scn_det_thresh = 0.0f;
// libplacebo options
std::filesystem::path libplacebo_shader_path;
@ -387,10 +387,10 @@ int main(int argc, char **argv) {
("output,o", PO_STR_VALUE<StringType>(), "Output video file path")
("processor,p", PO_STR_VALUE<StringType>(&arguments.processor_type),
"Processor to use: 'libplacebo', 'realesrgan', or 'rife'")
("gpu,g", po::value<uint32_t>(&arguments.gpu_id)->default_value(0),
"GPU ID (Vulkan device index)")
("hwaccel,a", PO_STR_VALUE<StringType>(&arguments.hwaccel)->default_value(STR("none"),
"none"), "Hardware acceleration method (mostly for decoding)")
("gpu,g", po::value<uint32_t>(&arguments.gpu_id)->default_value(0),
"GPU ID (Vulkan device index)")
("benchmark,b", po::bool_switch(&arguments.benchmark),
"Discard processed frames and calculate average FPS; "
"useful for detecting encoder bottlenecks")
@ -432,14 +432,20 @@ int main(int argc, char **argv) {
"Additional AVOption(s) for the encoder (format: -e key=value)")
;
po::options_description common_opts("Common aprocessing options");
common_opts.add_options()
po::options_description upscale_opts("Upscaling options");
upscale_opts.add_options()
("width,w", po::value<int>(&arguments.width), "Output width")
("height,h", po::value<int>(&arguments.height), "Output height")
("scaling-factor,s", po::value<int>(&arguments.scaling_factor), "Scaling factor")
("frame-rate-multiplier,r",
po::value<int>(&arguments.frame_rate_multiplier)->default_value(0),
"Output frame rate")
;
po::options_description interp_opts("Frame interpolation options");
interp_opts.add_options()
("frame-rate-mul,f",
po::value<int>(&arguments.frm_rate_mul)->default_value(0),
"Frame rate multiplier")
("scene-thresh,d", po::value<float>(&arguments.scn_det_thresh)->default_value(10.0f),
"Scene detection threshold")
;
po::options_description libplacebo_opts("libplacebo options");
@ -467,7 +473,8 @@ int main(int argc, char **argv) {
// Combine all options
all_opts.add(encoder_opts)
.add(common_opts)
.add(upscale_opts)
.add(interp_opts)
.add(libplacebo_opts)
.add(realesrgan_opts)
.add(rife_opts);
@ -581,10 +588,14 @@ int main(int argc, char **argv) {
spdlog::critical("Invalid scaling factor specified.");
return 1;
}
if (arguments.frame_rate_multiplier < 0) {
if (arguments.frm_rate_mul < 0) {
spdlog::critical("Invalid target frame rate specified.");
return 1;
}
if (arguments.scn_det_thresh < 0.0f || arguments.scn_det_thresh > 100.0f) {
spdlog::critical("Invalid scene detection threshold specified.");
return 1;
}
if (arguments.processor_type == STR("libplacebo")) {
if (arguments.libplacebo_shader_path.empty() || arguments.width == 0 ||
@ -681,7 +692,8 @@ int main(int argc, char **argv) {
processor_config.width = arguments.width;
processor_config.height = arguments.height;
processor_config.scaling_factor = arguments.scaling_factor;
processor_config.frame_rate_multiplier = arguments.frame_rate_multiplier;
processor_config.frm_rate_mul = arguments.frm_rate_mul;
processor_config.scn_det_thresh = arguments.scn_det_thresh;
if (arguments.processor_type == STR("libplacebo")) {
processor_config.processor_type = PROCESSOR_LIBPLACEBO;