feat(encoder): add support for more encoder options

Signed-off-by: k4yt3x <i@k4yt3x.com>
This commit is contained in:
k4yt3x 2024-11-22 00:00:00 +00:00
parent 169509b7d4
commit 3215c89870
No known key found for this signature in database
5 changed files with 218 additions and 65 deletions

View File

@ -10,6 +10,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added ### Added
- Automatic selection of the most suitable pixel format for the output video. - Automatic selection of the most suitable pixel format for the output video.
- Support for specifying arbitrary `AVOptions` for the encoder.
- More `AVCodecContext` options.
### Fixed ### Fixed

View File

@ -70,16 +70,43 @@ struct FilterConfig {
} config; } config;
}; };
// Encoder configuration // Encoder configurations
struct EncoderConfig { struct EncoderConfig {
int out_width; // Non-AVCodecContext options
int out_height;
bool copy_streams;
enum AVCodecID codec; enum AVCodecID codec;
bool copy_streams;
// Basic video options
int width;
int height;
enum AVPixelFormat pix_fmt; enum AVPixelFormat pix_fmt;
const char *preset;
// Rate control and compression
int64_t bit_rate; int64_t bit_rate;
float crf; int rc_buffer_size;
int rc_min_rate;
int rc_max_rate;
int qmin;
int qmax;
// GOP and frame structure
int gop_size;
int max_b_frames;
int keyint_min;
int refs;
// Performance and threading
int thread_count;
// Latency and buffering
int delay;
// Extra AVOptions
struct {
const char *key;
const char *value;
} *extra_options;
size_t nb_extra_options;
}; };
// Video processing context // Video processing context

View File

@ -73,19 +73,40 @@ int Encoder::init(
enc_ctx_->hw_device_ctx = av_buffer_ref(hw_ctx); enc_ctx_->hw_device_ctx = av_buffer_ref(hw_ctx);
} }
// Set encoding parameters // Copy the color properties from the decoder context
enc_ctx_->height = encoder_config->out_height;
enc_ctx_->width = encoder_config->out_width;
enc_ctx_->sample_aspect_ratio = dec_ctx->sample_aspect_ratio;
enc_ctx_->bit_rate = encoder_config->bit_rate;
// Set the color properties
enc_ctx_->color_range = dec_ctx->color_range; enc_ctx_->color_range = dec_ctx->color_range;
enc_ctx_->color_primaries = dec_ctx->color_primaries; enc_ctx_->color_primaries = dec_ctx->color_primaries;
enc_ctx_->color_trc = dec_ctx->color_trc; enc_ctx_->color_trc = dec_ctx->color_trc;
enc_ctx_->colorspace = dec_ctx->colorspace; enc_ctx_->colorspace = dec_ctx->colorspace;
enc_ctx_->chroma_sample_location = dec_ctx->chroma_sample_location; enc_ctx_->chroma_sample_location = dec_ctx->chroma_sample_location;
// Extra options copied from the decoder context
enc_ctx_->sample_aspect_ratio = dec_ctx->sample_aspect_ratio;
// Set basic video options
enc_ctx_->width = encoder_config->width;
enc_ctx_->height = encoder_config->height;
// Set rate control and compression options
enc_ctx_->bit_rate = encoder_config->bit_rate;
enc_ctx_->rc_buffer_size = encoder_config->rc_buffer_size;
enc_ctx_->rc_min_rate = encoder_config->rc_min_rate;
enc_ctx_->rc_max_rate = encoder_config->rc_max_rate;
enc_ctx_->qmin = encoder_config->qmin;
enc_ctx_->qmax = encoder_config->qmax;
// Set GOP and frame structure options
enc_ctx_->gop_size = encoder_config->gop_size;
enc_ctx_->max_b_frames = encoder_config->max_b_frames;
enc_ctx_->keyint_min = encoder_config->keyint_min;
enc_ctx_->refs = encoder_config->refs;
// Set performance and threading options
enc_ctx_->thread_count = encoder_config->thread_count;
// Set latency and buffering options
enc_ctx_->delay = encoder_config->delay;
// Set the pixel format // Set the pixel format
if (encoder_config->pix_fmt != AV_PIX_FMT_NONE) { if (encoder_config->pix_fmt != AV_PIX_FMT_NONE) {
// Use the specified pixel format // Use the specified pixel format
@ -114,10 +135,16 @@ int Encoder::init(
enc_ctx_->framerate = av_guess_frame_rate(ifmt_ctx, out_vstream, nullptr); enc_ctx_->framerate = av_guess_frame_rate(ifmt_ctx, out_vstream, nullptr);
} }
// Set the CRF and preset for any codecs that support it // Set extra AVOptions
std::string crf_str = std::to_string(encoder_config->crf); for (size_t i = 0; i < encoder_config->nb_extra_options; i++) {
av_opt_set(enc_ctx_->priv_data, "crf", crf_str.c_str(), 0); const char *key = encoder_config->extra_options[i].key;
av_opt_set(enc_ctx_->priv_data, "preset", encoder_config->preset, 0); const char *value = encoder_config->extra_options[i].value;
spdlog::debug("Setting encoder option '{}' to '{}'", key, value);
if (av_opt_set(enc_ctx_->priv_data, key, value, 0) < 0) {
spdlog::warn("Failed to set encoder option '{}' to '{}'", key, value);
}
}
// Use global headers if necessary // Use global headers if necessary
if (ofmt_ctx_->oformat->flags & AVFMT_GLOBALHEADER) { if (ofmt_ctx_->oformat->flags & AVFMT_GLOBALHEADER) {

View File

@ -293,8 +293,8 @@ extern "C" int process_video(
spdlog::debug("Output video dimensions: {}x{}", output_width, output_height); spdlog::debug("Output video dimensions: {}x{}", output_width, output_height);
// Update encoder configuration with output dimensions // Update encoder configuration with output dimensions
encoder_config->out_width = output_width; encoder_config->width = output_width;
encoder_config->out_height = output_height; encoder_config->height = output_height;
// Initialize the encoder // Initialize the encoder
Encoder encoder; Encoder encoder;

View File

@ -7,6 +7,7 @@
#include <cstring> #include <cstring>
#include <filesystem> #include <filesystem>
#include <iostream> #include <iostream>
#include <memory>
#include <mutex> #include <mutex>
#include <string> #include <string>
#include <thread> #include <thread>
@ -64,20 +65,30 @@ struct Arguments {
StringType filter_type; StringType filter_type;
uint32_t gpuid = 0; uint32_t gpuid = 0;
StringType hwaccel = STR("none"); StringType hwaccel = STR("none");
bool nocopystreams = false; bool no_copy_streams = false;
bool benchmark = false; bool benchmark = false;
// Encoder options // Encoder options
StringType codec = STR("libx264"); StringType codec = STR("libx264");
StringType preset = STR("slow");
StringType pix_fmt; StringType pix_fmt;
int64_t bitrate = 0; int64_t bit_rate = 0;
float crf = 20.0f; int rc_buffer_size = 0;
int rc_min_rate = 0;
int rc_max_rate = 0;
int qmin = -1;
int qmax = -1;
int gop_size = -1;
int max_b_frames = -1;
int keyint_min = -1;
int refs = -1;
int thread_count = 0;
int delay = 0;
std::vector<std::pair<StringType, StringType>> extra_options;
// libplacebo options // libplacebo options
std::filesystem::path shader_path; std::filesystem::path shader_path;
int out_width = 0; int width = 0;
int out_height = 0; int height = 0;
// RealESRGAN options // RealESRGAN options
StringType model_name; StringType model_name;
@ -340,30 +351,60 @@ int main(int argc, char **argv) {
desc.add_options() desc.add_options()
("help", "Display this help page") ("help", "Display this help page")
("version,v", "Print program version") ("version,v", "Print program version")
("loglevel", PO_STR_VALUE<StringType>(&arguments.loglevel)->default_value(STR("info"), "info"), "Set log level (trace, debug, info, warn, error, critical, none)") ("loglevel", PO_STR_VALUE<StringType>(&arguments.loglevel)->default_value(STR("info"),
("noprogress", po::bool_switch(&arguments.noprogress), "Do not display the progress bar") "info"), "Set log level (trace, debug, info, warn, error, critical, none)")
("noprogress", po::bool_switch(&arguments.noprogress),
"Do not display the progress bar")
("listgpus", "List the available GPUs") ("listgpus", "List the available GPUs")
// General Processing Options // General Processing Options
("input,i", PO_STR_VALUE<StringType>(), "Input video file path") ("input,i", PO_STR_VALUE<StringType>(), "Input video file path")
("output,o", PO_STR_VALUE<StringType>(), "Output video file path") ("output,o", PO_STR_VALUE<StringType>(), "Output video file path")
("filter,f", PO_STR_VALUE<StringType>(&arguments.filter_type), "Filter to use: 'libplacebo' or 'realesrgan'") ("filter,f", PO_STR_VALUE<StringType>(&arguments.filter_type),
("gpuid,g", po::value<uint32_t>(&arguments.gpuid)->default_value(0), "Vulkan GPU ID (default: 0)") "Filter to use: 'libplacebo' or 'realesrgan'")
("hwaccel,a", PO_STR_VALUE<StringType>(&arguments.hwaccel)->default_value(STR("none"), "none"), "Hardware acceleration method (default: none)") ("gpuid,g", po::value<uint32_t>(&arguments.gpuid)->default_value(0), "Vulkan GPU ID")
("nocopystreams", po::bool_switch(&arguments.nocopystreams), "Do not copy audio and subtitle streams") ("hwaccel,a", PO_STR_VALUE<StringType>(&arguments.hwaccel)->default_value(STR("none"),
("benchmark", po::bool_switch(&arguments.benchmark), "Discard processed frames and calculate average FPS") "none"), "Hardware acceleration method")
("benchmark", po::bool_switch(&arguments.benchmark),
"Discard processed frames and calculate average FPS")
// Encoder options // Encoder options
("codec,c", PO_STR_VALUE<StringType>(&arguments.codec)->default_value(STR("libx264"), "libx264"), "Output codec (default: libx264)") ("codec,c", PO_STR_VALUE<StringType>(&arguments.codec)->default_value(STR("libx264"),
("preset,p", PO_STR_VALUE<StringType>(&arguments.preset)->default_value(STR("slow"), "slow"), "Encoder preset (default: slow)") "libx264"), "Output codec")
("pixfmt,x", PO_STR_VALUE<StringType>(&arguments.pix_fmt), "Output pixel format (default: auto)") ("no_copy_streams", po::bool_switch(&arguments.no_copy_streams),
("bitrate,b", po::value<int64_t>(&arguments.bitrate)->default_value(0), "Bitrate in bits per second (default: 0 (VBR))") "Do not copy audio and subtitle streams")
("crf,q", po::value<float>(&arguments.crf)->default_value(20.0f), "Constant Rate Factor (default: 20.0)") ("pix_fmt", PO_STR_VALUE<StringType>(&arguments.pix_fmt), "Output pixel format")
("bit_rate", po::value<int64_t>(&arguments.bit_rate)->default_value(0),
"Bitrate in bits per second")
("rc_buffer_size", po::value<int>(&arguments.rc_buffer_size)->default_value(0),
"Rate control buffer size in bits")
("rc_min_rate", po::value<int>(&arguments.rc_min_rate)->default_value(0),
"Minimum rate control")
("rc_max_rate", po::value<int>(&arguments.rc_max_rate)->default_value(0),
"Maximum rate control")
("qmin", po::value<int>(&arguments.qmin)->default_value(-1), "Minimum quantizer")
("qmax", po::value<int>(&arguments.qmax)->default_value(-1), "Maximum quantizer")
("gop_size", po::value<int>(&arguments.gop_size)->default_value(-1),
"Group of pictures structure size")
("max_b_frames", po::value<int>(&arguments.max_b_frames)->default_value(-1),
"Maximum number of B-frames")
("keyint_min", po::value<int>(&arguments.keyint_min)->default_value(-1),
"Minimum interval between keyframes")
("refs", po::value<int>(&arguments.refs)->default_value(-1),
"Number of reference frames")
("thread_count", po::value<int>(&arguments.thread_count)->default_value(0),
"Number of threads for encoding")
("delay", po::value<int>(&arguments.delay)->default_value(0),
"Delay in milliseconds for encoder")
// Extra encoder options (key-value pairs)
("extra_option,e", PO_STR_VALUE<std::vector<StringType>>()->multitoken(),
"Additional AVOption(s) for codec settings (-e key=value)")
// libplacebo options // libplacebo options
("shader,s", PO_STR_VALUE<StringType>(), "Name or path of the GLSL shader file to use") ("shader,s", PO_STR_VALUE<StringType>(), "Name or path of the GLSL shader file to use")
("width,w", po::value<int>(&arguments.out_width), "Output width") ("width,w", po::value<int>(&arguments.width), "Output width")
("height,h", po::value<int>(&arguments.out_height), "Output height") ("height,h", po::value<int>(&arguments.height), "Output height")
// RealESRGAN options // RealESRGAN options
("model,m", PO_STR_VALUE<StringType>(&arguments.model_name), "Name of the model to use") ("model,m", PO_STR_VALUE<StringType>(&arguments.model_name), "Name of the model to use")
@ -417,6 +458,21 @@ int main(int argc, char **argv) {
return 1; return 1;
} }
// Parse avoptions
if (vm.count("extra_option")) {
for (const auto &opt : vm["extra_option"].as<std::vector<StringType>>()) {
size_t eq_pos = opt.find('=');
if (eq_pos != StringType::npos) {
StringType key = opt.substr(0, eq_pos);
StringType value = opt.substr(eq_pos + 1);
arguments.extra_options.push_back(std::make_pair(key, value));
} else {
spdlog::critical("Invalid extra AVOption format: {}", wstring_to_utf8(opt));
return 1;
}
}
}
if (vm.count("shader")) { if (vm.count("shader")) {
arguments.shader_path = std::filesystem::path(vm["shader"].as<StringType>()); arguments.shader_path = std::filesystem::path(vm["shader"].as<StringType>());
} }
@ -440,8 +496,7 @@ int main(int argc, char **argv) {
// Additional validations // Additional validations
if (arguments.filter_type == STR("libplacebo")) { if (arguments.filter_type == STR("libplacebo")) {
if (arguments.shader_path.empty() || arguments.out_width == 0 || if (arguments.shader_path.empty() || arguments.width == 0 || arguments.height == 0) {
arguments.out_height == 0) {
spdlog::critical( spdlog::critical(
"For libplacebo, shader name/path (-s), width (-w), " "For libplacebo, shader name/path (-s), width (-w), "
"and height (-h) are required." "and height (-h) are required."
@ -473,17 +528,11 @@ int main(int argc, char **argv) {
} }
// Validate bitrate // Validate bitrate
if (arguments.bitrate < 0) { if (arguments.bit_rate < 0) {
spdlog::critical("Invalid bitrate specified."); spdlog::critical("Invalid bitrate specified.");
return 1; return 1;
} }
// Validate CRF
if (arguments.crf < 0.0f || arguments.crf > 51.0f) {
spdlog::critical("CRF must be between 0 and 51.");
return 1;
}
// Parse codec to AVCodec // Parse codec to AVCodec
const AVCodec *codec = avcodec_find_encoder_by_name(wstring_to_utf8(arguments.codec).c_str()); const AVCodec *codec = avcodec_find_encoder_by_name(wstring_to_utf8(arguments.codec).c_str());
if (!codec) { if (!codec) {
@ -544,8 +593,8 @@ int main(int argc, char **argv) {
FilterConfig filter_config; FilterConfig filter_config;
if (arguments.filter_type == STR("libplacebo")) { if (arguments.filter_type == STR("libplacebo")) {
filter_config.filter_type = FILTER_LIBPLACEBO; filter_config.filter_type = FILTER_LIBPLACEBO;
filter_config.config.libplacebo.out_width = arguments.out_width; filter_config.config.libplacebo.out_width = arguments.width;
filter_config.config.libplacebo.out_height = arguments.out_height; filter_config.config.libplacebo.out_height = arguments.height;
filter_config.config.libplacebo.shader_path = shader_path_str.c_str(); filter_config.config.libplacebo.shader_path = shader_path_str.c_str();
} else if (arguments.filter_type == STR("realesrgan")) { } else if (arguments.filter_type == STR("realesrgan")) {
filter_config.filter_type = FILTER_REALESRGAN; filter_config.filter_type = FILTER_REALESRGAN;
@ -554,18 +603,59 @@ int main(int argc, char **argv) {
filter_config.config.realesrgan.model_name = arguments.model_name.c_str(); filter_config.config.realesrgan.model_name = arguments.model_name.c_str();
} }
std::string preset_str = wstring_to_utf8(arguments.preset);
// Setup encoder configuration // Setup encoder configuration
EncoderConfig encoder_config; EncoderConfig encoder_config;
encoder_config.out_width = 0;
encoder_config.out_height = 0;
encoder_config.copy_streams = !arguments.nocopystreams;
encoder_config.codec = codec->id; encoder_config.codec = codec->id;
encoder_config.copy_streams = !arguments.no_copy_streams;
encoder_config.width = arguments.width;
encoder_config.height = arguments.height;
encoder_config.pix_fmt = pix_fmt; encoder_config.pix_fmt = pix_fmt;
encoder_config.preset = preset_str.c_str(); encoder_config.bit_rate = arguments.bit_rate;
encoder_config.bit_rate = arguments.bitrate; encoder_config.rc_buffer_size = arguments.rc_buffer_size;
encoder_config.crf = arguments.crf; encoder_config.rc_max_rate = arguments.rc_max_rate;
encoder_config.rc_min_rate = arguments.rc_min_rate;
encoder_config.qmin = arguments.qmin;
encoder_config.qmax = arguments.qmax;
encoder_config.gop_size = arguments.gop_size;
encoder_config.max_b_frames = arguments.max_b_frames;
encoder_config.keyint_min = arguments.keyint_min;
encoder_config.refs = arguments.refs;
encoder_config.thread_count = arguments.thread_count;
encoder_config.delay = arguments.delay;
// Handle extra AVOptions
encoder_config.nb_extra_options = arguments.extra_options.size();
encoder_config.extra_options = static_cast<decltype(encoder_config.extra_options)>(malloc(
static_cast<unsigned long>(encoder_config.nb_extra_options + 1) *
sizeof(encoder_config.extra_options[0])
));
if (encoder_config.extra_options == nullptr) {
spdlog::critical("Failed to allocate memory for extra AVOptions.");
return 1;
}
// Copy extra AVOptions to the encoder configuration
for (size_t i = 0; i < encoder_config.nb_extra_options; i++) {
const std::string key = wstring_to_utf8(arguments.extra_options[i].first);
const std::string value = wstring_to_utf8(arguments.extra_options[i].second);
encoder_config.extra_options[i].key = strdup(key.c_str());
encoder_config.extra_options[i].value = strdup(value.c_str());
}
// Custom deleter for extra AVOptions
auto extra_options_deleter = [&](decltype(encoder_config.extra_options) *extra_options_ptr) {
auto extra_options = *extra_options_ptr;
for (size_t i = 0; i < encoder_config.nb_extra_options; i++) {
free(const_cast<char *>(extra_options[i].key));
free(const_cast<char *>(extra_options[i].value));
}
free(extra_options);
*extra_options_ptr = nullptr;
};
// Define a unique_ptr to automatically free extra_options
std::unique_ptr<decltype(encoder_config.extra_options), decltype(extra_options_deleter)>
extra_options_guard(&encoder_config.extra_options, extra_options_deleter);
// Parse hardware acceleration method // Parse hardware acceleration method
enum AVHWDeviceType hw_device_type = AV_HWDEVICE_TYPE_NONE; enum AVHWDeviceType hw_device_type = AV_HWDEVICE_TYPE_NONE;
@ -753,20 +843,27 @@ int main(int argc, char **argv) {
std::lock_guard<std::mutex> lock(proc_ctx_mutex); std::lock_guard<std::mutex> lock(proc_ctx_mutex);
processed_frames = proc_ctx.processed_frames; processed_frames = proc_ctx.processed_frames;
} }
int64_t time_elapsed = timer.get_elapsed_time() / 1000; int time_elapsed = static_cast<int>(timer.get_elapsed_time() / 1000);
int hours_elapsed = time_elapsed / 3600;
int minutes_elapsed = (time_elapsed % 3600) / 60;
int seconds_elapsed = time_elapsed % 60;
float average_speed_fps = static_cast<float>(processed_frames) / float average_speed_fps = static_cast<float>(processed_frames) /
(time_elapsed > 0 ? static_cast<float>(time_elapsed) : 1); (time_elapsed > 0 ? static_cast<float>(time_elapsed) : 1);
// Print processing summary // Print processing summary
printf("====== Video2X %s summary ======\n", arguments.benchmark ? "Benchmark" : "Processing"); std::cout << "====== Video2X " << (arguments.benchmark ? "Benchmark" : "Processing")
printf("Video file processed: %s\n", arguments.in_fname.u8string().c_str()); << " summary ======" << std::endl;
printf("Total frames processed: %ld\n", proc_ctx.processed_frames); std::cout << "Video file processed: " << arguments.in_fname.u8string() << std::endl;
printf("Total time taken: %ld s\n", time_elapsed); std::cout << "Total frames processed: " << processed_frames << std::endl;
printf("Average processing speed: %.2f FPS\n", average_speed_fps); std::cout << "Total time taken: " << std::setw(2) << std::setfill('0') << hours_elapsed << ":"
<< std::setw(2) << std::setfill('0') << minutes_elapsed << ":" << std::setw(2)
<< std::setfill('0') << seconds_elapsed << std::endl;
std::cout << "Average processing speed: " << std::fixed << std::setprecision(2)
<< average_speed_fps << " FPS" << std::endl;
// Print additional information if not in benchmark mode // Print additional information if not in benchmark mode
if (!arguments.benchmark) { if (!arguments.benchmark) {
printf("Output written to: %s\n", arguments.out_fname.u8string().c_str()); std::cout << "Output written to: " << arguments.out_fname.u8string() << std::endl;
} }
return 0; return 0;