diff --git a/CMakeLists.txt b/CMakeLists.txt index ef450f2..26361bb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,10 @@ cmake_minimum_required(VERSION 3.10) project(video2x VERSION 6.1.1 LANGUAGES CXX) +if(POLICY CMP0167) + cmake_policy(SET CMP0167 NEW) +endif() + # Set the C++ standard set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) @@ -13,9 +17,9 @@ endif() # Set the default optimization flags for Release builds if(CMAKE_BUILD_TYPE STREQUAL "Release") - if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") + if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Ox /GL /LTCG /MD /DNDEBUG") - elseif (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") + elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3 -march=native -flto") set(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} -s") set(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} -s") @@ -23,9 +27,9 @@ if(CMAKE_BUILD_TYPE STREQUAL "Release") endif() # Set global compile options for all targets -if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") +if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") add_compile_options(/W4 /permissive-) -elseif (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") +elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") add_compile_options(-Wall -Wextra -Wpedantic -Wconversion -Wshadow) endif() @@ -70,7 +74,7 @@ if(WIN32) set(SPIRV_BUILD_PATH ${CMAKE_BINARY_DIR}/realesrgan-prefix/src/realesrgan-build/ncnn/glslang/SPIRV ) - if (CMAKE_BUILD_TYPE STREQUAL "Release") + if(CMAKE_BUILD_TYPE STREQUAL "Release") set(SPIRV_LIB ${SPIRV_BUILD_PATH}/Release/SPIRV.lib) else() set(SPIRV_LIB ${SPIRV_BUILD_PATH}/Debug/SPIRVd.lib) @@ -203,7 +207,7 @@ else() endif() # spdlog -if (USE_SYSTEM_SPDLOG) +if(USE_SYSTEM_SPDLOG) find_package(spdlog REQUIRED) list(APPEND ALL_INCLUDE_DIRS ${spdlog_INCLUDE_DIRS}) set(SPDLOG_LIB spdlog::spdlog) @@ -214,7 +218,7 @@ endif() list(APPEND ALL_LIBRARIES ${SPDLOG_LIB}) # Boost -if (USE_SYSTEM_BOOST) +if(USE_SYSTEM_BOOST) find_package(Boost REQUIRED COMPONENTS program_options) list(APPEND ALL_INCLUDE_DIRS ${Boost_INCLUDE_DIRS}) else() @@ -228,7 +232,7 @@ else() endif() set(BOOST_LIB Boost::program_options) -if (BUILD_VIDEO2X_CLI) +if(BUILD_VIDEO2X_CLI) find_package(Vulkan REQUIRED) set(VULKAN_LIB Vulkan::Vulkan) endif() @@ -295,7 +299,7 @@ endif() target_link_libraries(libvideo2x PRIVATE ${ALL_LIBRARIES}) if(NOT WIN32) - if (USE_SYSTEM_NCNN) + if(USE_SYSTEM_NCNN) target_link_libraries(libvideo2x PUBLIC ncnn) else() target_link_libraries(libvideo2x PRIVATE ncnn) @@ -303,7 +307,7 @@ if(NOT WIN32) endif() # Create the executable 'video2x' -if (BUILD_VIDEO2X_CLI) +if(BUILD_VIDEO2X_CLI) file(GLOB VIDEO2X_SOURCES tools/video2x/src/*.cpp) add_executable(video2x ${VIDEO2X_SOURCES}) set_target_properties(video2x PROPERTIES OUTPUT_NAME video2x) diff --git a/include/libvideo2x/decoder.h b/include/libvideo2x/decoder.h index e6ed9f0..55e6e1c 100644 --- a/include/libvideo2x/decoder.h +++ b/include/libvideo2x/decoder.h @@ -8,13 +8,25 @@ extern "C" { #include } -int init_decoder( - AVHWDeviceType hw_type, - AVBufferRef *hw_ctx, - std::filesystem::path in_fpath, - AVFormatContext **fmt_ctx, - AVCodecContext **dec_ctx, - int *in_vstream_idx -); +class Decoder { + public: + Decoder(); + ~Decoder(); + + int init(AVHWDeviceType hw_type, AVBufferRef *hw_ctx, const std::filesystem::path &in_fpath); + + AVFormatContext *get_format_context() const; + AVCodecContext *get_codec_context() const; + int get_video_stream_index() const; + + private: + static enum AVPixelFormat hw_pix_fmt_; + static enum AVPixelFormat + get_hw_format(AVCodecContext *ctx, const enum AVPixelFormat *pix_fmts); + + AVFormatContext *fmt_ctx_; + AVCodecContext *dec_ctx_; + int in_vstream_idx_; +}; #endif // DECODER_H diff --git a/include/libvideo2x/encoder.h b/include/libvideo2x/encoder.h index 13d7814..586e267 100644 --- a/include/libvideo2x/encoder.h +++ b/include/libvideo2x/encoder.h @@ -1,37 +1,43 @@ #ifndef ENCODER_H #define ENCODER_H +#include #include extern "C" { -#include #include -#include +#include } -#include "libvideo2x.h" +#include "libvideo2x/libvideo2x.h" -int init_encoder( - AVBufferRef *hw_ctx, - std::filesystem::path out_fpath, - AVFormatContext *ifmt_ctx, - AVFormatContext **ofmt_ctx, - AVCodecContext **enc_ctx, - AVCodecContext *dec_ctx, - EncoderConfig *encoder_config, - int in_vstream_idx, - int *out_vstream_idx, - int **stream_map -); +class Encoder { + public: + Encoder(); + ~Encoder(); -int write_frame( - AVFrame *frame, - AVCodecContext *enc_ctx, - AVFormatContext *ofmt_ctx, - int out_vstream_idx, - int64_t frame_idx -); + int init( + AVBufferRef *hw_ctx, + const std::filesystem::path &out_fpath, + AVFormatContext *ifmt_ctx, + AVCodecContext *dec_ctx, + EncoderConfig *encoder_config, + int in_vstream_idx + ); -int flush_encoder(AVCodecContext *enc_ctx, AVFormatContext *ofmt_ctx, int out_vstream_idx); + int write_frame(AVFrame *frame, int64_t frame_idx); + int flush(); + + AVCodecContext *get_encoder_context() const; + AVFormatContext *get_format_context() const; + int *get_stream_map() const; + int get_output_video_stream_index() const; + + private: + AVFormatContext *ofmt_ctx_; + AVCodecContext *enc_ctx_; + int out_vstream_idx_; + int *stream_map_; +}; #endif // ENCODER_H diff --git a/src/decoder.cpp b/src/decoder.cpp index b6dd71d..1f63e30 100644 --- a/src/decoder.cpp +++ b/src/decoder.cpp @@ -1,17 +1,25 @@ #include "decoder.h" -#include -#include -#include - #include -static enum AVPixelFormat hw_pix_fmt = AV_PIX_FMT_NONE; +enum AVPixelFormat Decoder::hw_pix_fmt_ = AV_PIX_FMT_NONE; -// Callback function to choose the hardware-accelerated pixel format -static enum AVPixelFormat get_hw_format(AVCodecContext *_, const enum AVPixelFormat *pix_fmts) { +Decoder::Decoder() : fmt_ctx_(nullptr), dec_ctx_(nullptr), in_vstream_idx_(-1) {} + +Decoder::~Decoder() { + if (dec_ctx_) { + avcodec_free_context(&dec_ctx_); + dec_ctx_ = nullptr; + } + if (fmt_ctx_) { + avformat_close_input(&fmt_ctx_); + fmt_ctx_ = nullptr; + } +} + +enum AVPixelFormat Decoder::get_hw_format(AVCodecContext *_, const enum AVPixelFormat *pix_fmts) { for (const enum AVPixelFormat *p = pix_fmts; *p != AV_PIX_FMT_NONE; p++) { - if (*p == hw_pix_fmt) { + if (*p == hw_pix_fmt_) { return *p; } } @@ -19,39 +27,36 @@ static enum AVPixelFormat get_hw_format(AVCodecContext *_, const enum AVPixelFor return AV_PIX_FMT_NONE; } -int init_decoder( +int Decoder::init( AVHWDeviceType hw_type, AVBufferRef *hw_ctx, - std::filesystem::path in_fpath, - AVFormatContext **fmt_ctx, - AVCodecContext **dec_ctx, - int *in_vstream_idx + const std::filesystem::path &in_fpath ) { - AVFormatContext *ifmt_ctx = NULL; - AVCodecContext *codec_ctx = NULL; int ret; - if ((ret = avformat_open_input(&ifmt_ctx, in_fpath.u8string().c_str(), NULL, NULL)) < 0) { - spdlog::error("Could not open input file '{}'", in_fpath.u8string().c_str()); + // Open the input file + if ((ret = avformat_open_input(&fmt_ctx_, in_fpath.u8string().c_str(), nullptr, nullptr)) < 0) { + spdlog::error("Could not open input file '{}'", in_fpath.u8string()); return ret; } - if ((ret = avformat_find_stream_info(ifmt_ctx, NULL)) < 0) { + // Retrieve stream information + if ((ret = avformat_find_stream_info(fmt_ctx_, nullptr)) < 0) { spdlog::error("Failed to retrieve input stream information"); return ret; } // Find the first video stream - ret = av_find_best_stream(ifmt_ctx, AVMEDIA_TYPE_VIDEO, -1, -1, NULL, 0); + ret = av_find_best_stream(fmt_ctx_, AVMEDIA_TYPE_VIDEO, -1, -1, nullptr, 0); if (ret < 0) { spdlog::error("Could not find video stream in the input file"); return ret; } int stream_index = ret; - AVStream *video_stream = ifmt_ctx->streams[stream_index]; + AVStream *video_stream = fmt_ctx_->streams[stream_index]; - // Set up the decoder + // Find the decoder for the video stream const AVCodec *decoder = avcodec_find_decoder(video_stream->codecpar->codec_id); if (!decoder) { spdlog::error( @@ -61,16 +66,28 @@ int init_decoder( return AVERROR_DECODER_NOT_FOUND; } - codec_ctx = avcodec_alloc_context3(decoder); - if (!codec_ctx) { + // Allocate the decoder context + dec_ctx_ = avcodec_alloc_context3(decoder); + if (!dec_ctx_) { spdlog::error("Failed to allocate the decoder context"); return AVERROR(ENOMEM); } + // Copy codec parameters from input stream to decoder context + if ((ret = avcodec_parameters_to_context(dec_ctx_, video_stream->codecpar)) < 0) { + spdlog::error("Failed to copy decoder parameters to input decoder context"); + return ret; + } + + // Set the time base and frame rate + dec_ctx_->time_base = video_stream->time_base; + dec_ctx_->pkt_timebase = video_stream->time_base; + dec_ctx_->framerate = av_guess_frame_rate(fmt_ctx_, video_stream, nullptr); + // Set hardware device context if (hw_ctx != nullptr) { - codec_ctx->hw_device_ctx = av_buffer_ref(hw_ctx); - codec_ctx->get_format = get_hw_format; + dec_ctx_->hw_device_ctx = av_buffer_ref(hw_ctx); + dec_ctx_->get_format = get_hw_format; // Automatically determine the hardware pixel format for (int i = 0;; i++) { @@ -81,36 +98,35 @@ int init_decoder( decoder->name, av_hwdevice_get_type_name(hw_type) ); - avcodec_free_context(&codec_ctx); - avformat_close_input(&ifmt_ctx); return AVERROR(ENOSYS); } if (config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX && config->device_type == hw_type) { - hw_pix_fmt = config->pix_fmt; + hw_pix_fmt_ = config->pix_fmt; break; } } } - if ((ret = avcodec_parameters_to_context(codec_ctx, video_stream->codecpar)) < 0) { - spdlog::error("Failed to copy decoder parameters to input decoder context"); - return ret; - } - - // Set decoder time base and frame rate - codec_ctx->time_base = video_stream->time_base; - codec_ctx->pkt_timebase = video_stream->time_base; - codec_ctx->framerate = av_guess_frame_rate(ifmt_ctx, video_stream, NULL); - - if ((ret = avcodec_open2(codec_ctx, decoder, NULL)) < 0) { + // Open the decoder + if ((ret = avcodec_open2(dec_ctx_, decoder, nullptr)) < 0) { spdlog::error("Failed to open decoder for stream #{}", stream_index); return ret; } - *fmt_ctx = ifmt_ctx; - *dec_ctx = codec_ctx; - *in_vstream_idx = stream_index; + in_vstream_idx_ = stream_index; return 0; } + +AVFormatContext *Decoder::get_format_context() const { + return fmt_ctx_; +} + +AVCodecContext *Decoder::get_codec_context() const { + return dec_ctx_; +} + +int Decoder::get_video_stream_index() const { + return in_vstream_idx_; +} diff --git a/src/encoder.cpp b/src/encoder.cpp index 78d4f9d..23fc7ac 100644 --- a/src/encoder.cpp +++ b/src/encoder.cpp @@ -1,138 +1,152 @@ #include "encoder.h" -#include -#include -#include -#include +#include extern "C" { -#include +#include } -#include - #include "avutils.h" #include "conversions.h" -int init_encoder( +Encoder::Encoder() + : ofmt_ctx_(nullptr), enc_ctx_(nullptr), out_vstream_idx_(-1), stream_map_(nullptr) {} + +Encoder::~Encoder() { + if (enc_ctx_) { + avcodec_free_context(&enc_ctx_); + } + if (ofmt_ctx_) { + if (!(ofmt_ctx_->oformat->flags & AVFMT_NOFILE)) { + avio_closep(&ofmt_ctx_->pb); + } + avformat_free_context(ofmt_ctx_); + } + if (stream_map_) { + av_free(stream_map_); + } +} + +int Encoder::init( AVBufferRef *hw_ctx, - std::filesystem::path out_fpath, + const std::filesystem::path &out_fpath, AVFormatContext *ifmt_ctx, - AVFormatContext **ofmt_ctx, - AVCodecContext **enc_ctx, AVCodecContext *dec_ctx, EncoderConfig *encoder_config, - int in_vstream_idx, - int *out_vstream_idx, - int **stream_map + int in_vstream_idx ) { - AVFormatContext *fmt_ctx = NULL; - AVCodecContext *codec_ctx = NULL; int ret; - avformat_alloc_output_context2(&fmt_ctx, NULL, NULL, out_fpath.u8string().c_str()); - if (!fmt_ctx) { + // Allocate the output format context + avformat_alloc_output_context2(&ofmt_ctx_, nullptr, nullptr, out_fpath.u8string().c_str()); + if (!ofmt_ctx_) { spdlog::error("Could not create output context"); return AVERROR_UNKNOWN; } + // Find the encoder const AVCodec *encoder = avcodec_find_encoder(encoder_config->codec); if (!encoder) { spdlog::error( - "Required video encoder not found for vcodec {}", - avcodec_get_name(encoder_config->codec) + "Required video encoder not found for codec {}", avcodec_get_name(encoder_config->codec) ); return AVERROR_ENCODER_NOT_FOUND; } // Create a new video stream in the output file - AVStream *out_vstream = avformat_new_stream(fmt_ctx, NULL); + AVStream *out_vstream = avformat_new_stream(ofmt_ctx_, nullptr); if (!out_vstream) { spdlog::error("Failed to allocate the output video stream"); return AVERROR_UNKNOWN; } - *out_vstream_idx = out_vstream->index; + out_vstream_idx_ = out_vstream->index; - codec_ctx = avcodec_alloc_context3(encoder); - if (!codec_ctx) { + // Allocate the encoder context + enc_ctx_ = avcodec_alloc_context3(encoder); + if (!enc_ctx_) { spdlog::error("Failed to allocate the encoder context"); return AVERROR(ENOMEM); } // Set hardware device context if (hw_ctx != nullptr) { - codec_ctx->hw_device_ctx = av_buffer_ref(hw_ctx); + enc_ctx_->hw_device_ctx = av_buffer_ref(hw_ctx); } // Set encoding parameters - codec_ctx->height = encoder_config->out_height; - codec_ctx->width = encoder_config->out_width; - codec_ctx->sample_aspect_ratio = dec_ctx->sample_aspect_ratio; - codec_ctx->bit_rate = encoder_config->bit_rate; + enc_ctx_->height = encoder_config->out_height; + enc_ctx_->width = encoder_config->out_width; + enc_ctx_->sample_aspect_ratio = dec_ctx->sample_aspect_ratio; + enc_ctx_->bit_rate = encoder_config->bit_rate; // Set the color properties - codec_ctx->color_range = dec_ctx->color_range; - codec_ctx->color_primaries = dec_ctx->color_primaries; - codec_ctx->color_trc = dec_ctx->color_trc; - codec_ctx->colorspace = dec_ctx->colorspace; - codec_ctx->chroma_sample_location = dec_ctx->chroma_sample_location; + enc_ctx_->color_range = dec_ctx->color_range; + enc_ctx_->color_primaries = dec_ctx->color_primaries; + enc_ctx_->color_trc = dec_ctx->color_trc; + enc_ctx_->colorspace = dec_ctx->colorspace; + enc_ctx_->chroma_sample_location = dec_ctx->chroma_sample_location; // Set the pixel format if (encoder_config->pix_fmt != AV_PIX_FMT_NONE) { // Use the specified pixel format - codec_ctx->pix_fmt = encoder_config->pix_fmt; + enc_ctx_->pix_fmt = encoder_config->pix_fmt; } else { - codec_ctx->pix_fmt = get_encoder_default_pix_fmt(encoder, dec_ctx->pix_fmt); - if (codec_ctx->pix_fmt == AV_PIX_FMT_NONE) { + // Automatically select the pixel format + enc_ctx_->pix_fmt = get_encoder_default_pix_fmt(encoder, dec_ctx->pix_fmt); + if (enc_ctx_->pix_fmt == AV_PIX_FMT_NONE) { spdlog::error("Could not get the default pixel format for the encoder"); return AVERROR(EINVAL); } - spdlog::debug("Auto-selected pixel format: {}", av_get_pix_fmt_name(codec_ctx->pix_fmt)); + spdlog::debug("Auto-selected pixel format: {}", av_get_pix_fmt_name(enc_ctx_->pix_fmt)); } // Set the output video's time base if (dec_ctx->time_base.num > 0 && dec_ctx->time_base.den > 0) { - codec_ctx->time_base = dec_ctx->time_base; + enc_ctx_->time_base = dec_ctx->time_base; } else { - codec_ctx->time_base = av_inv_q(av_guess_frame_rate(ifmt_ctx, out_vstream, NULL)); + enc_ctx_->time_base = av_inv_q(av_guess_frame_rate(ifmt_ctx, out_vstream, nullptr)); } // Set the output video's frame rate if (dec_ctx->framerate.num > 0 && dec_ctx->framerate.den > 0) { - codec_ctx->framerate = dec_ctx->framerate; + enc_ctx_->framerate = dec_ctx->framerate; } else { - codec_ctx->framerate = av_guess_frame_rate(ifmt_ctx, out_vstream, NULL); + enc_ctx_->framerate = av_guess_frame_rate(ifmt_ctx, out_vstream, nullptr); } // Set the CRF and preset for any codecs that support it std::string crf_str = std::to_string(encoder_config->crf); - av_opt_set(codec_ctx->priv_data, "crf", crf_str.c_str(), 0); - av_opt_set(codec_ctx->priv_data, "preset", encoder_config->preset, 0); + av_opt_set(enc_ctx_->priv_data, "crf", crf_str.c_str(), 0); + av_opt_set(enc_ctx_->priv_data, "preset", encoder_config->preset, 0); - if (fmt_ctx->oformat->flags & AVFMT_GLOBALHEADER) { - codec_ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER; + // Use global headers if necessary + if (ofmt_ctx_->oformat->flags & AVFMT_GLOBALHEADER) { + enc_ctx_->flags |= AV_CODEC_FLAG_GLOBAL_HEADER; } - if ((ret = avcodec_open2(codec_ctx, encoder, NULL)) < 0) { + // Open the encoder + if ((ret = avcodec_open2(enc_ctx_, encoder, nullptr)) < 0) { spdlog::error("Cannot open video encoder"); return ret; } - ret = avcodec_parameters_from_context(out_vstream->codecpar, codec_ctx); + // Copy encoder parameters to output video stream + ret = avcodec_parameters_from_context(out_vstream->codecpar, enc_ctx_); if (ret < 0) { spdlog::error("Failed to copy encoder parameters to output video stream"); return ret; } - out_vstream->time_base = codec_ctx->time_base; - out_vstream->avg_frame_rate = codec_ctx->framerate; - out_vstream->r_frame_rate = codec_ctx->framerate; + out_vstream->time_base = enc_ctx_->time_base; + out_vstream->avg_frame_rate = enc_ctx_->framerate; + out_vstream->r_frame_rate = enc_ctx_->framerate; + // Copy other streams if necessary if (encoder_config->copy_streams) { - // Allocate the stream map - *stream_map = - reinterpret_cast(av_malloc_array(ifmt_ctx->nb_streams, sizeof(**stream_map))); - if (!*stream_map) { + // Allocate the stream mape frame o + stream_map_ = + reinterpret_cast(av_malloc_array(ifmt_ctx->nb_streams, sizeof(*stream_map_))); + if (!stream_map_) { spdlog::error("Could not allocate stream mapping"); return AVERROR(ENOMEM); } @@ -144,20 +158,20 @@ int init_encoder( // Skip the input video stream as it's already processed if (i == in_vstream_idx) { - (*stream_map)[i] = *out_vstream_idx; + stream_map_[i] = out_vstream_idx_; continue; } // Map only audio and subtitle streams (skip other types) if (in_codecpar->codec_type != AVMEDIA_TYPE_AUDIO && in_codecpar->codec_type != AVMEDIA_TYPE_SUBTITLE) { - (*stream_map)[i] = -1; // Stream not mapped + stream_map_[i] = -1; spdlog::warn("Skipping unsupported stream type at index: {}", i); continue; } // Create corresponding output stream for audio and subtitle streams - AVStream *out_stream = avformat_new_stream(fmt_ctx, NULL); + AVStream *out_stream = avformat_new_stream(ofmt_ctx_, nullptr); if (!out_stream) { spdlog::error("Failed allocating output stream"); return AVERROR_UNKNOWN; @@ -176,32 +190,23 @@ int init_encoder( // Map input stream index to output stream index spdlog::debug("Stream mapping: {} (in) -> {} (out)", i, out_stream->index); - (*stream_map)[i] = out_stream->index; + stream_map_[i] = out_stream->index; } } // Open the output file - if (!(fmt_ctx->oformat->flags & AVFMT_NOFILE)) { - ret = avio_open(&fmt_ctx->pb, out_fpath.u8string().c_str(), AVIO_FLAG_WRITE); + if (!(ofmt_ctx_->oformat->flags & AVFMT_NOFILE)) { + ret = avio_open(&ofmt_ctx_->pb, out_fpath.u8string().c_str(), AVIO_FLAG_WRITE); if (ret < 0) { - spdlog::error("Could not open output file '{}'", out_fpath.u8string().c_str()); + spdlog::error("Could not open output file '{}'", out_fpath.u8string()); return ret; } } - *ofmt_ctx = fmt_ctx; - *enc_ctx = codec_ctx; - return 0; } -int write_frame( - AVFrame *frame, - AVCodecContext *enc_ctx, - AVFormatContext *ofmt_ctx, - int out_vstream_idx, - int64_t frame_idx -) { +int Encoder::write_frame(AVFrame *frame, int64_t frame_idx) { AVFrame *converted_frame = nullptr; int ret; @@ -211,13 +216,12 @@ int write_frame( } // Convert the frame to the encoder's pixel format if needed - if (frame->format != enc_ctx->pix_fmt) { - converted_frame = convert_avframe_pix_fmt(frame, enc_ctx->pix_fmt); + if (frame->format != enc_ctx_->pix_fmt) { + converted_frame = convert_avframe_pix_fmt(frame, enc_ctx_->pix_fmt); if (!converted_frame) { spdlog::error("Error converting frame to encoder's pixel format"); return AVERROR_EXTERNAL; } - converted_frame->pts = frame->pts; } @@ -227,11 +231,12 @@ int write_frame( return AVERROR(ENOMEM); } + // Send the frame to the encoder if (converted_frame != nullptr) { - ret = avcodec_send_frame(enc_ctx, converted_frame); + ret = avcodec_send_frame(enc_ctx_, converted_frame); av_frame_free(&converted_frame); } else { - ret = avcodec_send_frame(enc_ctx, frame); + ret = avcodec_send_frame(enc_ctx_, frame); } if (ret < 0) { spdlog::error("Error sending frame to encoder"); @@ -239,8 +244,9 @@ int write_frame( return ret; } + // Receive packets from the encoder while (ret >= 0) { - ret = avcodec_receive_packet(enc_ctx, enc_pkt); + ret = avcodec_receive_packet(enc_ctx_, enc_pkt); if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) { av_packet_unref(enc_pkt); break; @@ -252,12 +258,12 @@ int write_frame( // Rescale packet timestamps av_packet_rescale_ts( - enc_pkt, enc_ctx->time_base, ofmt_ctx->streams[out_vstream_idx]->time_base + enc_pkt, enc_ctx_->time_base, ofmt_ctx_->streams[out_vstream_idx_]->time_base ); - enc_pkt->stream_index = out_vstream_idx; + enc_pkt->stream_index = out_vstream_idx_; // Write the packet - ret = av_interleaved_write_frame(ofmt_ctx, enc_pkt); + ret = av_interleaved_write_frame(ofmt_ctx_, enc_pkt); av_packet_unref(enc_pkt); if (ret < 0) { spdlog::error("Error muxing packet"); @@ -270,7 +276,7 @@ int write_frame( return 0; } -int flush_encoder(AVCodecContext *enc_ctx, AVFormatContext *ofmt_ctx, int out_vstream_idx) { +int Encoder::flush() { int ret; AVPacket *enc_pkt = av_packet_alloc(); if (!enc_pkt) { @@ -278,16 +284,17 @@ int flush_encoder(AVCodecContext *enc_ctx, AVFormatContext *ofmt_ctx, int out_vs return AVERROR(ENOMEM); } - ret = avcodec_send_frame(enc_ctx, NULL); + // Send a NULL frame to signal the encoder to flush + ret = avcodec_send_frame(enc_ctx_, nullptr); if (ret < 0) { spdlog::error("Error sending NULL frame to encoder during flush"); av_packet_free(&enc_pkt); return ret; } - // Write the packets to the output file + // Receive and write packets until flushing is complete while (true) { - ret = avcodec_receive_packet(enc_ctx, enc_pkt); + ret = avcodec_receive_packet(enc_ctx_, enc_pkt); if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) { av_packet_unref(enc_pkt); break; @@ -299,12 +306,12 @@ int flush_encoder(AVCodecContext *enc_ctx, AVFormatContext *ofmt_ctx, int out_vs // Rescale packet timestamps av_packet_rescale_ts( - enc_pkt, enc_ctx->time_base, ofmt_ctx->streams[out_vstream_idx]->time_base + enc_pkt, enc_ctx_->time_base, ofmt_ctx_->streams[out_vstream_idx_]->time_base ); - enc_pkt->stream_index = out_vstream_idx; + enc_pkt->stream_index = out_vstream_idx_; // Write the packet - ret = av_interleaved_write_frame(ofmt_ctx, enc_pkt); + ret = av_interleaved_write_frame(ofmt_ctx_, enc_pkt); av_packet_unref(enc_pkt); if (ret < 0) { spdlog::error("Error muxing packet during flush"); @@ -316,3 +323,19 @@ int flush_encoder(AVCodecContext *enc_ctx, AVFormatContext *ofmt_ctx, int out_vs av_packet_free(&enc_pkt); return 0; } + +AVCodecContext *Encoder::get_encoder_context() const { + return enc_ctx_; +} + +AVFormatContext *Encoder::get_format_context() const { + return ofmt_ctx_; +} + +int Encoder::get_output_video_stream_index() const { + return out_vstream_idx_; +} + +int *Encoder::get_stream_map() const { + return stream_map_; +} diff --git a/src/libvideo2x.cpp b/src/libvideo2x.cpp index 555159f..c06cf4c 100644 --- a/src/libvideo2x.cpp +++ b/src/libvideo2x.cpp @@ -22,65 +22,51 @@ extern "C" { static int process_frames( EncoderConfig *encoder_config, VideoProcessingContext *proc_ctx, - AVFormatContext *ifmt_ctx, - AVFormatContext *ofmt_ctx, - AVCodecContext *dec_ctx, - AVCodecContext *enc_ctx, + Decoder &decoder, + Encoder &encoder, Filter *filter, - int in_vstream_idx, - int out_vstream_idx, - int *stream_map, bool benchmark = false ) { - int ret; char errbuf[AV_ERROR_MAX_STRING_SIZE]; - std::vector flushed_frames; + int ret = 0; - // Get the total number of frames in the video with OpenCV + // Get required objects + AVFormatContext *ifmt_ctx = decoder.get_format_context(); + AVCodecContext *dec_ctx = decoder.get_codec_context(); + int in_vstream_idx = decoder.get_video_stream_index(); + AVFormatContext *ofmt_ctx = encoder.get_format_context(); + int *stream_map = encoder.get_stream_map(); + + // Get total number of frames spdlog::debug("Reading total number of frames"); proc_ctx->total_frames = get_video_frame_count(ifmt_ctx, in_vstream_idx); - // Check if the total number of frames is still 0 if (proc_ctx->total_frames <= 0) { spdlog::warn("Unable to determine the total number of frames"); } else { spdlog::debug("{} frames to process", proc_ctx->total_frames); } - AVFrame *frame = av_frame_alloc(); - if (frame == nullptr) { + // Allocate frame and packet + auto av_frame_deleter = [](AVFrame *frame) { av_frame_free(&frame); }; + std::unique_ptr frame(av_frame_alloc(), av_frame_deleter); + if (!frame) { ret = AVERROR(ENOMEM); return ret; } - AVPacket *packet = av_packet_alloc(); - if (packet == nullptr) { + auto av_packet_deleter = [](AVPacket *packet) { av_packet_free(&packet); }; + std::unique_ptr packet( + av_packet_alloc(), av_packet_deleter + ); + if (!packet) { spdlog::critical("Could not allocate AVPacket"); - av_frame_free(&frame); return AVERROR(ENOMEM); } - // Lambda function for cleaning up resources - auto cleanup = [&]() { - if (frame) { - av_frame_free(&frame); - frame = nullptr; - } - if (packet) { - av_packet_free(&packet); - packet = nullptr; - } - for (AVFrame *&flushed_frame : flushed_frames) { - if (flushed_frame) { - av_frame_free(&flushed_frame); - flushed_frame = nullptr; - } - } - }; - // Read frames from the input file while (!proc_ctx->abort) { - ret = av_read_frame(ifmt_ctx, packet); + ret = av_read_frame(ifmt_ctx, packet.get()); if (ret < 0) { if (ret == AVERROR_EOF) { spdlog::debug("Reached end of file"); @@ -88,17 +74,15 @@ static int process_frames( } av_strerror(ret, errbuf, sizeof(errbuf)); spdlog::critical("Error reading packet: {}", errbuf); - cleanup(); return ret; } if (packet->stream_index == in_vstream_idx) { - ret = avcodec_send_packet(dec_ctx, packet); + ret = avcodec_send_packet(dec_ctx, packet.get()); if (ret < 0) { av_strerror(ret, errbuf, sizeof(errbuf)); spdlog::critical("Error sending packet to decoder: {}", errbuf); - av_packet_unref(packet); - cleanup(); + av_packet_unref(packet.get()); return ret; } @@ -108,49 +92,43 @@ static int process_frames( continue; } - ret = avcodec_receive_frame(dec_ctx, frame); + ret = avcodec_receive_frame(dec_ctx, frame.get()); if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) { spdlog::debug("Frame not ready"); break; } else if (ret < 0) { av_strerror(ret, errbuf, sizeof(errbuf)); spdlog::critical("Error decoding video frame: {}", errbuf); - av_packet_unref(packet); - cleanup(); + av_packet_unref(packet.get()); return ret; } - AVFrame *processed_frame = nullptr; - ret = filter->process_frame(frame, &processed_frame); + AVFrame *raw_processed_frame = nullptr; + ret = filter->process_frame(frame.get(), &raw_processed_frame); + if (ret < 0 && ret != AVERROR(EAGAIN)) { av_strerror(ret, errbuf, sizeof(errbuf)); - av_frame_free(&processed_frame); - av_packet_unref(packet); - cleanup(); + av_packet_unref(packet.get()); return ret; - } else if (ret == 0 && processed_frame != nullptr) { + } else if (ret == 0 && raw_processed_frame != nullptr) { + auto processed_frame = std::unique_ptr( + raw_processed_frame, av_frame_deleter + ); + if (!benchmark) { - ret = write_frame( - processed_frame, - enc_ctx, - ofmt_ctx, - out_vstream_idx, - proc_ctx->processed_frames - ); + ret = + encoder.write_frame(processed_frame.get(), proc_ctx->processed_frames); if (ret < 0) { av_strerror(ret, errbuf, sizeof(errbuf)); spdlog::critical("Error encoding/writing frame: {}", errbuf); - av_frame_free(&processed_frame); - av_packet_unref(packet); - cleanup(); + av_packet_unref(packet.get()); return ret; } } - av_frame_free(&processed_frame); proc_ctx->processed_frames++; } - av_frame_unref(frame); + av_frame_unref(frame.get()); spdlog::debug( "Processed frame {}/{}", proc_ctx->processed_frames, proc_ctx->total_frames ); @@ -160,58 +138,54 @@ static int process_frames( int out_stream_index = stream_map[packet->stream_index]; AVStream *out_stream = ofmt_ctx->streams[out_stream_index]; - av_packet_rescale_ts(packet, in_stream->time_base, out_stream->time_base); + av_packet_rescale_ts(packet.get(), in_stream->time_base, out_stream->time_base); packet->stream_index = out_stream_index; - ret = av_interleaved_write_frame(ofmt_ctx, packet); + ret = av_interleaved_write_frame(ofmt_ctx, packet.get()); if (ret < 0) { av_strerror(ret, errbuf, sizeof(errbuf)); spdlog::critical("Error muxing audio/subtitle packet: {}", errbuf); - av_packet_unref(packet); - cleanup(); + av_packet_unref(packet.get()); return ret; } } - av_packet_unref(packet); + av_packet_unref(packet.get()); } // Flush the filter - ret = filter->flush(flushed_frames); + std::vector raw_flushed_frames; + ret = filter->flush(raw_flushed_frames); if (ret < 0) { av_strerror(ret, errbuf, sizeof(errbuf)); spdlog::critical("Error flushing filter: {}", errbuf); - cleanup(); return ret; } + // Wrap flushed frames in unique_ptrs + std::vector> flushed_frames; + for (AVFrame *raw_frame : raw_flushed_frames) { + flushed_frames.emplace_back(raw_frame, av_frame_deleter); + } + // Encode and write all flushed frames - for (AVFrame *&flushed_frame : flushed_frames) { - ret = write_frame( - flushed_frame, enc_ctx, ofmt_ctx, out_vstream_idx, proc_ctx->processed_frames - ); + for (auto &flushed_frame : flushed_frames) { + ret = encoder.write_frame(flushed_frame.get(), proc_ctx->processed_frames); if (ret < 0) { av_strerror(ret, errbuf, sizeof(errbuf)); spdlog::critical("Error encoding/writing flushed frame: {}", errbuf); - av_frame_free(&flushed_frame); - flushed_frame = nullptr; - cleanup(); return ret; } - av_frame_free(&flushed_frame); - flushed_frame = nullptr; proc_ctx->processed_frames++; } // Flush the encoder - ret = flush_encoder(enc_ctx, ofmt_ctx, out_vstream_idx); + ret = encoder.flush(); if (ret < 0) { av_strerror(ret, errbuf, sizeof(errbuf)); spdlog::critical("Error flushing encoder: {}", errbuf); - cleanup(); return ret; } - cleanup(); return ret; } @@ -226,55 +200,10 @@ extern "C" int process_video( EncoderConfig *encoder_config, VideoProcessingContext *proc_ctx ) { - AVFormatContext *ifmt_ctx = nullptr; - AVFormatContext *ofmt_ctx = nullptr; - AVCodecContext *dec_ctx = nullptr; - AVCodecContext *enc_ctx = nullptr; - AVBufferRef *hw_ctx = nullptr; - int *stream_map = nullptr; - Filter *filter = nullptr; - int in_vstream_idx = -1; - int out_vstream_idx = -1; char errbuf[AV_ERROR_MAX_STRING_SIZE]; int ret = 0; - // Lambda function for cleaning up resources - auto cleanup = [&]() { - if (ifmt_ctx) { - avformat_close_input(&ifmt_ctx); - ifmt_ctx = nullptr; - } - if (ofmt_ctx && !(ofmt_ctx->oformat->flags & AVFMT_NOFILE)) { - avio_closep(&ofmt_ctx->pb); - ofmt_ctx->pb = nullptr; - } - if (ofmt_ctx) { - avformat_free_context(ofmt_ctx); - ofmt_ctx = nullptr; - } - if (dec_ctx) { - avcodec_free_context(&dec_ctx); - dec_ctx = nullptr; - } - if (enc_ctx) { - avcodec_free_context(&enc_ctx); - enc_ctx = nullptr; - } - if (hw_ctx) { - av_buffer_unref(&hw_ctx); - hw_ctx = nullptr; - } - if (stream_map) { - av_free(stream_map); - stream_map = nullptr; - } - if (filter) { - delete filter; - filter = nullptr; - } - }; - - // Set the log level for FFmpeg and spdlog (libvideo2x) + // Set the log level for FFmpeg and spdlog switch (log_level) { case LIBVIDEO2X_LOG_LEVEL_TRACE: av_log_set_level(AV_LOG_TRACE); @@ -314,26 +243,38 @@ extern "C" int process_video( std::filesystem::path in_fpath(in_fname); std::filesystem::path out_fpath(out_fname); + auto hw_ctx_deleter = [](AVBufferRef *ref) { + if (ref) { + av_buffer_unref(&ref); + } + }; + std::unique_ptr hw_ctx(nullptr, hw_ctx_deleter); + // Initialize hardware device context if (hw_type != AV_HWDEVICE_TYPE_NONE) { - ret = av_hwdevice_ctx_create(&hw_ctx, hw_type, NULL, NULL, 0); + AVBufferRef *tmp_hw_ctx = nullptr; + ret = av_hwdevice_ctx_create(&tmp_hw_ctx, hw_type, NULL, NULL, 0); if (ret < 0) { av_strerror(ret, errbuf, sizeof(errbuf)); spdlog::critical("Error initializing hardware device context: {}", errbuf); - cleanup(); return ret; } + hw_ctx.reset(tmp_hw_ctx); } - // Initialize input - ret = init_decoder(hw_type, hw_ctx, in_fpath, &ifmt_ctx, &dec_ctx, &in_vstream_idx); + // Initialize input decoder + Decoder decoder; + ret = decoder.init(hw_type, hw_ctx.get(), in_fpath); if (ret < 0) { av_strerror(ret, errbuf, sizeof(errbuf)); spdlog::critical("Failed to initialize decoder: {}", errbuf); - cleanup(); return ret; } + AVFormatContext *ifmt_ctx = decoder.get_format_context(); + AVCodecContext *dec_ctx = decoder.get_codec_context(); + int in_vstream_idx = decoder.get_video_stream_index(); + // Initialize output dimensions based on filter configuration int output_width = 0, output_height = 0; switch (filter_config->filter_type) { @@ -347,116 +288,85 @@ extern "C" int process_video( break; default: spdlog::critical("Unknown filter type"); - cleanup(); return -1; } spdlog::debug("Output video dimensions: {}x{}", output_width, output_height); - // Initialize output encoder + // Update encoder configuration with output dimensions encoder_config->out_width = output_width; encoder_config->out_height = output_height; - ret = init_encoder( - hw_ctx, - out_fpath, - ifmt_ctx, - &ofmt_ctx, - &enc_ctx, - dec_ctx, - encoder_config, - in_vstream_idx, - &out_vstream_idx, - &stream_map - ); + + // Initialize the encoder + Encoder encoder; + ret = encoder.init(hw_ctx.get(), out_fpath, ifmt_ctx, dec_ctx, encoder_config, in_vstream_idx); if (ret < 0) { av_strerror(ret, errbuf, sizeof(errbuf)); spdlog::critical("Failed to initialize encoder: {}", errbuf); - cleanup(); return ret; } // Write the output file header - ret = avformat_write_header(ofmt_ctx, NULL); + ret = avformat_write_header(encoder.get_format_context(), NULL); if (ret < 0) { av_strerror(ret, errbuf, sizeof(errbuf)); spdlog::critical("Error occurred when opening output file: {}", errbuf); - cleanup(); return ret; } // Create and initialize the appropriate filter + std::unique_ptr filter; if (filter_config->filter_type == FILTER_LIBPLACEBO) { const auto &config = filter_config->config.libplacebo; if (!config.shader_path) { spdlog::critical("Shader path must be provided for the libplacebo filter"); - cleanup(); return -1; } - filter = new LibplaceboFilter{ + filter = std::make_unique( vk_device_index, std::filesystem::path(config.shader_path), config.out_width, config.out_height - }; + ); } else if (filter_config->filter_type == FILTER_REALESRGAN) { const auto &config = filter_config->config.realesrgan; if (!config.model_name) { spdlog::critical("Model name must be provided for the RealESRGAN filter"); - cleanup(); return -1; } - filter = new RealesrganFilter{ + filter = std::make_unique( static_cast(vk_device_index), config.tta_mode, config.scaling_factor, config.model_name - }; + ); } else { spdlog::critical("Unknown filter type"); - cleanup(); return -1; } // Check if the filter instance was created successfully if (filter == nullptr) { spdlog::critical("Failed to create filter instance"); - cleanup(); return -1; } // Initialize the filter - ret = filter->init(dec_ctx, enc_ctx, hw_ctx); + ret = filter->init(dec_ctx, encoder.get_encoder_context(), hw_ctx.get()); if (ret < 0) { spdlog::critical("Failed to initialize filter"); - cleanup(); return ret; } - // Process frames - ret = process_frames( - encoder_config, - proc_ctx, - ifmt_ctx, - ofmt_ctx, - dec_ctx, - enc_ctx, - filter, - in_vstream_idx, - out_vstream_idx, - stream_map, - benchmark - ); + // Process frames using the encoder and decoder + ret = process_frames(encoder_config, proc_ctx, decoder, encoder, filter.get(), benchmark); if (ret < 0) { av_strerror(ret, errbuf, sizeof(errbuf)); spdlog::critical("Error processing frames: {}", errbuf); - cleanup(); return ret; } // Write the output file trailer - av_write_trailer(ofmt_ctx); - - // Cleanup before returning - cleanup(); + av_write_trailer(encoder.get_format_context()); if (ret < 0 && ret != AVERROR_EOF) { av_strerror(ret, errbuf, sizeof(errbuf));