diff --git a/CMakeLists.txt b/CMakeLists.txt index e8eadd3..d075c54 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -387,10 +387,9 @@ if(BUILD_VIDEO2X_CLI) endif() # Install the header files +file(GLOB HEADER_FILES ${PROJECT_SOURCE_DIR}/include/libvideo2x/*.h) install(FILES - ${PROJECT_SOURCE_DIR}/include/libvideo2x/libvideo2x.h - ${PROJECT_SOURCE_DIR}/include/libvideo2x/fsutils.h - ${PROJECT_SOURCE_DIR}/include/libvideo2x/logging.h + ${HEADER_FILES} ${CMAKE_CURRENT_BINARY_DIR}/libvideo2x/version.h DESTINATION ${INSTALL_INCLUDE_DESTINATION} ) @@ -403,9 +402,7 @@ if(WIN32) set(BOOST_DLL_PATH ${BOOST_BASE_PATH}/boost_program_options-vc143-mt-gd-x64-1_86.dll) endif() - # Find all FFmpeg DLLs file(GLOB FFMPEG_DLLS "${FFMPEG_BASE_PATH}/bin/*.dll") - install(FILES ${CMAKE_BINARY_DIR}/realesrgan_install/bin/librealesrgan-ncnn-vulkan.dll ${CMAKE_BINARY_DIR}/rife_install/bin/librife-ncnn-vulkan.dll diff --git a/include/libvideo2x/avutils.h b/include/libvideo2x/avutils.h index 598a4c7..c4a799c 100644 --- a/include/libvideo2x/avutils.h +++ b/include/libvideo2x/avutils.h @@ -5,15 +5,18 @@ extern "C" { #include } -#define CALC_FFMPEG_VERSION(a, b, c) (a << 16 | b << 8 | c) - AVRational get_video_frame_rate(AVFormatContext *ifmt_ctx, int in_vstream_idx); int64_t get_video_frame_count(AVFormatContext *ifmt_ctx, int in_vstream_idx); -AVPixelFormat -get_encoder_default_pix_fmt(const AVCodec *encoder, AVPixelFormat target_pix_fmt); +AVPixelFormat get_encoder_default_pix_fmt(const AVCodec *encoder, AVPixelFormat target_pix_fmt); float get_frame_diff(AVFrame *frame1, AVFrame *frame2); +void av_bufferref_deleter(AVBufferRef *bufferref); + +void av_frame_deleter(AVFrame *frame); + +void av_packet_deleter(AVPacket *packet); + #endif // AVUTILS_H diff --git a/include/libvideo2x/encoder.h b/include/libvideo2x/encoder.h index 683136e..7cf9105 100644 --- a/include/libvideo2x/encoder.h +++ b/include/libvideo2x/encoder.h @@ -3,13 +3,51 @@ #include #include +#include extern "C" { +#include #include #include } -#include "libvideo2x/libvideo2x.h" +#include "fsutils.h" + +// Encoder configurations +struct EncoderConfig { + // Non-AVCodecContext options + AVCodecID codec; + bool copy_streams; + + // Basic video options + int width; + int height; + int frm_rate_mul; + AVPixelFormat pix_fmt; + + // Rate control and compression + int64_t bit_rate; + int rc_buffer_size; + int rc_min_rate; + int rc_max_rate; + int qmin; + int qmax; + + // GOP and frame structure + int gop_size; + int max_b_frames; + int keyint_min; + int refs; + + // Performance and threading + int thread_count; + + // Latency and buffering + int delay; + + // Extra AVOptions + std::vector> extra_opts; +}; class Encoder { public: @@ -22,7 +60,6 @@ class Encoder { AVFormatContext *ifmt_ctx, AVCodecContext *dec_ctx, EncoderConfig &enc_cfg, - const ProcessorConfig &proc_cfg, int in_vstream_idx ); diff --git a/include/libvideo2x/filter_libplacebo.h b/include/libvideo2x/filter_libplacebo.h index 53406e8..f494952 100644 --- a/include/libvideo2x/filter_libplacebo.h +++ b/include/libvideo2x/filter_libplacebo.h @@ -13,17 +13,6 @@ extern "C" { // FilterLibplacebo class definition class FilterLibplacebo : public Filter { - private: - AVFilterGraph *filter_graph_; - AVFilterContext *buffersrc_ctx_; - AVFilterContext *buffersink_ctx_; - uint32_t vk_device_index_; - const std::filesystem::path shader_path_; - int width_; - int height_; - AVRational in_time_base_; - AVRational out_time_base_; - public: // Constructor FilterLibplacebo( @@ -56,6 +45,17 @@ class FilterLibplacebo : public Filter { int &out_width, int &out_height ) const override; + + private: + AVFilterGraph *filter_graph_; + AVFilterContext *buffersrc_ctx_; + AVFilterContext *buffersink_ctx_; + uint32_t vk_device_index_; + const std::filesystem::path shader_path_; + int width_; + int height_; + AVRational in_time_base_; + AVRational out_time_base_; }; #endif // FILTER_LIBPLACEBO_H diff --git a/include/libvideo2x/filter_realesrgan.h b/include/libvideo2x/filter_realesrgan.h index ab4be0d..15534fa 100644 --- a/include/libvideo2x/filter_realesrgan.h +++ b/include/libvideo2x/filter_realesrgan.h @@ -10,16 +10,6 @@ extern "C" { // FilterRealesrgan class definition class FilterRealesrgan : public Filter { - private: - RealESRGAN *realesrgan_; - int gpuid_; - bool tta_mode_; - int scaling_factor_; - const StringType model_name_; - AVRational in_time_base_; - AVRational out_time_base_; - AVPixelFormat out_pix_fmt_; - public: // Constructor FilterRealesrgan( @@ -49,6 +39,16 @@ class FilterRealesrgan : public Filter { int &out_width, int &out_height ) const override; + + private: + RealESRGAN *realesrgan_; + int gpuid_; + bool tta_mode_; + int scaling_factor_; + const StringType model_name_; + AVRational in_time_base_; + AVRational out_time_base_; + AVPixelFormat out_pix_fmt_; }; #endif // FILTER_REALESRGAN_H diff --git a/include/libvideo2x/frames_processor.h b/include/libvideo2x/frames_processor.h deleted file mode 100644 index 027e043..0000000 --- a/include/libvideo2x/frames_processor.h +++ /dev/null @@ -1,19 +0,0 @@ -#ifndef FRAMES_PROCESSOR_H -#define FRAMES_PROCESSOR_H - -#include "decoder.h" -#include "encoder.h" -#include "libvideo2x.h" -#include "processor.h" - -int process_frames( - const EncoderConfig &enc_cfg, - const ProcessorConfig &proc_cfg, - VideoProcessingContext *proc_ctx, - Decoder &decoder, - Encoder &encoder, - Processor *processor, - bool benchmark = false -); - -#endif // FRAMES_PROCESSOR_H diff --git a/include/libvideo2x/interpolator_rife.h b/include/libvideo2x/interpolator_rife.h index aff0378..d94cdfc 100644 --- a/include/libvideo2x/interpolator_rife.h +++ b/include/libvideo2x/interpolator_rife.h @@ -10,18 +10,6 @@ extern "C" { // InterpolatorRIFE class definition class InterpolatorRIFE : public Interpolator { - private: - RIFE *rife_; - int gpuid_; - bool tta_mode_; - bool tta_temporal_mode_; - bool uhd_mode_; - int num_threads_; - const StringType model_name_; - AVRational in_time_base_; - AVRational out_time_base_; - AVPixelFormat out_pix_fmt_; - public: // Constructor InterpolatorRIFE( @@ -54,6 +42,18 @@ class InterpolatorRIFE : public Interpolator { int &out_width, int &out_height ) const override; + + private: + RIFE *rife_; + int gpuid_; + bool tta_mode_; + bool tta_temporal_mode_; + bool uhd_mode_; + int num_threads_; + const StringType model_name_; + AVRational in_time_base_; + AVRational out_time_base_; + AVPixelFormat out_pix_fmt_; }; #endif // INTERPOLATOR_RIFE_H diff --git a/include/libvideo2x/libvideo2x.h b/include/libvideo2x/libvideo2x.h index c5a3e63..f1b6e71 100644 --- a/include/libvideo2x/libvideo2x.h +++ b/include/libvideo2x/libvideo2x.h @@ -1,17 +1,20 @@ #ifndef LIBVIDEO2X_H #define LIBVIDEO2X_H -#include -#include -#include +#include +#include +#include extern "C" { #include #include } -#include "fsutils.h" +#include "avutils.h" +#include "decoder.h" +#include "encoder.h" #include "logging.h" +#include "processor.h" #ifdef _WIN32 #ifdef LIBVIDEO2X_EXPORTS @@ -23,105 +26,75 @@ extern "C" { #define LIBVIDEO2X_API #endif -enum class ProcessingMode { - Filter, - Interpolate, -}; - -enum class ProcessorType { - Libplacebo, - RealESRGAN, - RIFE, -}; - -struct LibplaceboConfig { - StringType shader_path; -}; - -struct RealESRGANConfig { - bool tta_mode; - StringType model_name; -}; - -struct RIFEConfig { - bool tta_mode; - bool tta_temporal_mode; - bool uhd_mode; - int num_threads; - StringType model_name; -}; - -// Unified filter configuration -struct ProcessorConfig { - ProcessorType processor_type; - int width; - int height; - int scaling_factor; - int frm_rate_mul; - float scn_det_thresh; - std::variant config; -}; - -// Encoder configurations -struct EncoderConfig { - // Non-AVCodecContext options - AVCodecID codec; - bool copy_streams; - - // Basic video options - int width; - int height; - AVPixelFormat pix_fmt; - - // Rate control and compression - int64_t bit_rate; - int rc_buffer_size; - int rc_min_rate; - int rc_max_rate; - int qmin; - int qmax; - - // GOP and frame structure - int gop_size; - int max_b_frames; - int keyint_min; - int refs; - - // Performance and threading - int thread_count; - - // Latency and buffering - int delay; - - // Extra AVOptions - std::vector> extra_opts; -}; - struct HardwareConfig { uint32_t vk_device_index; AVHWDeviceType hw_device_type; }; -// Video processing context -struct VideoProcessingContext { - int64_t processed_frames; - int64_t total_frames; - std::time_t start_time; - bool pause; - bool abort; - bool completed; +class LIBVIDEO2X_API VideoProcessor { + public: + VideoProcessor( + const HardwareConfig hw_cfg, + const ProcessorConfig proc_cfg, + EncoderConfig enc_cfg, + Video2xLogLevel = Video2xLogLevel::Info, + bool benchmark = false + ); + + virtual ~VideoProcessor() = default; + + [[nodiscard]] int + process(const std::filesystem::path in_fname, const std::filesystem::path out_fname); + + void pause() { paused_.store(true); } + void resume() { paused_.store(false); } + void abort() { aborted_.store(true); } + + int64_t get_processed_frames() const { return frame_index_.load(); } + int64_t get_total_frames() const { return total_frames_.load(); } + + bool is_paused() const { return paused_.load(); } + bool is_aborted() const { return aborted_.load(); } + bool is_completed() const { return completed_.load(); } + + private: + [[nodiscard]] int + process_frames(Decoder &decoder, Encoder &encoder, std::unique_ptr &processor); + + [[nodiscard]] int write_frame(AVFrame *frame, Encoder &encoder); + + [[nodiscard]] inline int write_raw_packet( + AVPacket *packet, + AVFormatContext *ifmt_ctx, + AVFormatContext *ofmt_ctx, + int *stream_map + ); + + [[nodiscard]] inline int process_filtering( + std::unique_ptr &processor, + Encoder &encoder, + AVFrame *frame, + AVFrame *proc_frame + ); + + [[nodiscard]] inline int process_interpolation( + std::unique_ptr &processor, + Encoder &encoder, + std::unique_ptr &prev_frame, + AVFrame *frame, + AVFrame *proc_frame + ); + + HardwareConfig hw_cfg_; + ProcessorConfig proc_cfg_; + EncoderConfig enc_cfg_; + bool benchmark_ = false; + + std::atomic frame_index_ = 0; + std::atomic total_frames_ = 0; + std::atomic paused_ = false; + std::atomic aborted_ = false; + std::atomic completed_ = false; }; -// Process a video file using the specified configurations -[[nodiscard]] LIBVIDEO2X_API int process_video( - const std::filesystem::path in_fname, - const std::filesystem::path out_fname, - const HardwareConfig hw_cfg, - const ProcessorConfig proc_cfg, - EncoderConfig enc_cfg, - VideoProcessingContext *proc_ctx, - Libvideo2xLogLevel log_level, - bool benchmark -); - #endif // LIBVIDEO2X_H diff --git a/include/libvideo2x/logging.h b/include/libvideo2x/logging.h index a2c48fb..fb20034 100644 --- a/include/libvideo2x/logging.h +++ b/include/libvideo2x/logging.h @@ -5,7 +5,7 @@ #include "fsutils.h" -enum class Libvideo2xLogLevel { +enum class Video2xLogLevel { Unknown, Trace, Debug, @@ -16,8 +16,8 @@ enum class Libvideo2xLogLevel { Off }; -void set_log_level(Libvideo2xLogLevel log_level); +void set_log_level(Video2xLogLevel log_level); -std::optional find_log_level_by_name(const StringType &log_level_name); +std::optional find_log_level_by_name(const StringType &log_level_name); #endif // LOGGING_H diff --git a/include/libvideo2x/processor.h b/include/libvideo2x/processor.h index 11a4865..8cf4251 100644 --- a/include/libvideo2x/processor.h +++ b/include/libvideo2x/processor.h @@ -1,6 +1,7 @@ #ifndef PROCESSOR_H #define PROCESSOR_H +#include #include extern "C" { @@ -9,7 +10,46 @@ extern "C" { #include } -#include "libvideo2x.h" +#include "fsutils.h" + +enum class ProcessingMode { + Filter, + Interpolate, +}; + +enum class ProcessorType { + Libplacebo, + RealESRGAN, + RIFE, +}; + +struct LibplaceboConfig { + StringType shader_path; +}; + +struct RealESRGANConfig { + bool tta_mode; + StringType model_name; +}; + +struct RIFEConfig { + bool tta_mode; + bool tta_temporal_mode; + bool uhd_mode; + int num_threads; + StringType model_name; +}; + +// Unified filter configuration +struct ProcessorConfig { + ProcessorType processor_type; + int width; + int height; + int scaling_factor; + int frm_rate_mul; + float scn_det_thresh; + std::variant config; +}; class Processor { public: diff --git a/src/avutils.cpp b/src/avutils.cpp index 9d07179..e621dae 100644 --- a/src/avutils.cpp +++ b/src/avutils.cpp @@ -35,7 +35,7 @@ int64_t get_video_frame_count(AVFormatContext *ifmt_ctx, int in_vstream_idx) { spdlog::debug("Read total number of frames from 'nb_frames': {}", nb_frames); return nb_frames; } - spdlog::warn("Estimating the total number of frames from duration * fps"); + spdlog::warn("Estimating the total number of frames using duration * fps"); // Get the duration of the video double duration_secs = 0.0; @@ -68,7 +68,7 @@ AVPixelFormat get_encoder_default_pix_fmt(const AVCodec *encoder, AVPixelFormat char errbuf[AV_ERROR_MAX_STRING_SIZE]; // Retrieve the list of supported pixel formats -#if LIBAVCODEC_BUILD >= CALC_FFMPEG_VERSION(61, 13, 100) +#if LIBAVCODEC_BUILD >= AV_VERSION_INT(61, 13, 100) const AVPixelFormat *supported_pix_fmts = nullptr; ret = avcodec_get_supported_config( nullptr, encoder, AV_CODEC_CONFIG_PIX_FORMAT, 0, (const void **)&supported_pix_fmts, nullptr @@ -185,3 +185,27 @@ float get_frame_diff(AVFrame *frame1, AVFrame *frame2) { return percent_diff; } + +// Deleter for AVBufferRef unique_ptr +void av_bufferref_deleter(AVBufferRef *bufferref) { + if (bufferref != nullptr) { + av_buffer_unref(&bufferref); + } +} + +// Deleter for AVFrame unique_ptr +void av_frame_deleter(AVFrame *frame) { + if (frame != nullptr) { + av_frame_free(&frame); + frame = nullptr; + } +} + +// Deleter for AVPacket unique_ptr +void av_packet_deleter(AVPacket *packet) { + if (packet != nullptr) { + av_packet_unref(packet); + av_packet_free(&packet); + packet = nullptr; + } +} diff --git a/src/encoder.cpp b/src/encoder.cpp index 15536ef..33f0c09 100644 --- a/src/encoder.cpp +++ b/src/encoder.cpp @@ -33,7 +33,6 @@ int Encoder::init( AVFormatContext *ifmt_ctx, AVCodecContext *dec_ctx, EncoderConfig &enc_cfg, - const ProcessorConfig &proc_cfg, int in_vstream_idx ) { int ret; @@ -122,9 +121,9 @@ int Encoder::init( spdlog::debug("Auto-selected pixel format: {}", av_get_pix_fmt_name(enc_ctx_->pix_fmt)); } - if (proc_cfg.frm_rate_mul > 0) { + if (enc_cfg.frm_rate_mul > 0) { AVRational in_frame_rate = get_video_frame_rate(ifmt_ctx, in_vstream_idx); - enc_ctx_->framerate = {in_frame_rate.num * proc_cfg.frm_rate_mul, in_frame_rate.den}; + enc_ctx_->framerate = {in_frame_rate.num * enc_cfg.frm_rate_mul, in_frame_rate.den}; enc_ctx_->time_base = av_inv_q(enc_ctx_->framerate); } else { // Set the output video's time base diff --git a/src/frames_processor.cpp b/src/frames_processor.cpp deleted file mode 100644 index ea9cac7..0000000 --- a/src/frames_processor.cpp +++ /dev/null @@ -1,371 +0,0 @@ -#include "frames_processor.h" - -extern "C" { -#include -} - -#include - -#include "avutils.h" - -// Deleter for AVFrame unique_ptr -auto av_frame_deleter = [](AVFrame *frame) { - if (frame != nullptr) { - av_frame_free(&frame); - frame = nullptr; - } -}; - -// Deleter for AVPacket unique_ptr -auto av_packet_deleter = [](AVPacket *packet) { - if (packet != nullptr) { - av_packet_unref(packet); - av_packet_free(&packet); - packet = nullptr; - } -}; - -// Sets the total number of frames to process in the VideoProcessingContext -void set_total_frames( - const ProcessorConfig &proc_cfg, - VideoProcessingContext *proc_ctx, - AVFormatContext *ifmt_ctx, - int in_vstream_idx, - Processor *processor -) { - spdlog::debug("Estimating the total number of frames to process"); - proc_ctx->total_frames = get_video_frame_count(ifmt_ctx, in_vstream_idx); - - if (proc_ctx->total_frames <= 0) { - spdlog::warn("Unable to determine the total number of frames"); - proc_ctx->total_frames = 0; - } else { - spdlog::debug("{} frames to process", proc_ctx->total_frames); - } - - // Set total frames for interpolation - if (processor->get_processing_mode() == ProcessingMode::Interpolate) { - proc_ctx->total_frames *= proc_cfg.frm_rate_mul; - } -} - -int write_frame( - AVFrame *frame, - VideoProcessingContext *proc_ctx, - Encoder &encoder, - bool benchmark -) { - char errbuf[AV_ERROR_MAX_STRING_SIZE]; - int ret = 0; - - if (!benchmark) { - // Set the frame type to none to let the encoder decide - frame->pict_type = AV_PICTURE_TYPE_NONE; - ret = encoder.write_frame(frame, proc_ctx->processed_frames); - if (ret < 0) { - av_strerror(ret, errbuf, sizeof(errbuf)); - spdlog::critical("Error encoding/writing frame: {}", errbuf); - } - } - return ret; -} - -int write_raw_packet( - AVPacket *packet, - AVFormatContext *ifmt_ctx, - AVFormatContext *ofmt_ctx, - int *stream_map -) { - char errbuf[AV_ERROR_MAX_STRING_SIZE]; - int ret = 0; - - AVStream *in_stream = ifmt_ctx->streams[packet->stream_index]; - int out_stream_index = stream_map[packet->stream_index]; - AVStream *out_stream = ofmt_ctx->streams[out_stream_index]; - - av_packet_rescale_ts(packet, in_stream->time_base, out_stream->time_base); - packet->stream_index = out_stream_index; - - ret = av_interleaved_write_frame(ofmt_ctx, packet); - if (ret < 0) { - av_strerror(ret, errbuf, sizeof(errbuf)); - spdlog::critical("Error muxing audio/subtitle packet: {}", errbuf); - } - return ret; -} - -int process_filtering( - Processor *processor, - VideoProcessingContext *proc_ctx, - Encoder &encoder, - bool benchmark, - AVFrame *frame, - AVFrame *raw_processed_frame -) { - char errbuf[AV_ERROR_MAX_STRING_SIZE]; - int ret = 0; - - // Cast the processor to a Filter - Filter *filter = static_cast(processor); - - // Process the frame using the filter - ret = filter->filter(frame, &raw_processed_frame); - - // Write the processed frame - if (ret < 0 && ret != AVERROR(EAGAIN)) { - av_strerror(ret, errbuf, sizeof(errbuf)); - spdlog::critical("Error filtering frame: {}", errbuf); - } else if (ret == 0 && raw_processed_frame != nullptr) { - auto processed_frame = std::unique_ptr( - raw_processed_frame, av_frame_deleter - ); - ret = write_frame(processed_frame.get(), proc_ctx, encoder, benchmark); - } - return ret; -} - -int process_interpolation( - Processor *processor, - const ProcessorConfig &proc_cfg, - VideoProcessingContext *proc_ctx, - Encoder &encoder, - bool benchmark, - std::unique_ptr &prev_frame, - AVFrame *frame, - AVFrame *raw_processed_frame -) { - char errbuf[AV_ERROR_MAX_STRING_SIZE]; - int ret = 0; - - // Cast the processor to an Interpolator - Interpolator *interpolator = static_cast(processor); - - // Calculate the time step for each frame - float time_step = 1.0f / static_cast(proc_cfg.frm_rate_mul); - float current_time_step = time_step; - - // Check if a scene change is detected - bool skip_frame = false; - if (prev_frame != nullptr) { - float frame_diff = get_frame_diff(prev_frame.get(), frame); - if (frame_diff > proc_cfg.scn_det_thresh) { - spdlog::debug( - "Scene change detected ({:.2f}%), skipping frame {}", - frame_diff, - proc_ctx->processed_frames - ); - skip_frame = true; - } - } - - // Write the interpolated frames - for (int i = 0; i < proc_cfg.frm_rate_mul - 1; i++) { - // Skip interpolation if this is the first frame - if (prev_frame == nullptr) { - break; - } - - // Get the interpolated frame from the interpolator - if (!skip_frame) { - ret = interpolator->interpolate( - prev_frame.get(), frame, &raw_processed_frame, current_time_step - ); - } else { - ret = 0; - raw_processed_frame = av_frame_clone(prev_frame.get()); - } - - // Write the interpolated frame - if (ret < 0 && ret != AVERROR(EAGAIN)) { - av_strerror(ret, errbuf, sizeof(errbuf)); - spdlog::critical("Error interpolating frame: {}", errbuf); - return ret; - } else if (ret == 0 && raw_processed_frame != nullptr) { - auto processed_frame = std::unique_ptr( - raw_processed_frame, av_frame_deleter - ); - - processed_frame->pts = proc_ctx->processed_frames; - ret = write_frame(processed_frame.get(), proc_ctx, encoder, benchmark); - if (ret < 0) { - return ret; - } - } - proc_ctx->processed_frames++; - current_time_step += time_step; - } - - // Write the original frame - frame->pts = proc_ctx->processed_frames; - ret = write_frame(frame, proc_ctx, encoder, benchmark); - - // Update the previous frame with the current frame - prev_frame.reset(av_frame_clone(frame)); - return ret; -} - -// Process frames using the selected filter. -int process_frames( - const EncoderConfig &enc_cfg, - const ProcessorConfig &proc_cfg, - VideoProcessingContext *proc_ctx, - Decoder &decoder, - Encoder &encoder, - Processor *processor, - bool benchmark -) { - char errbuf[AV_ERROR_MAX_STRING_SIZE]; - int ret = 0; - - // Get required objects - AVFormatContext *ifmt_ctx = decoder.get_format_context(); - AVCodecContext *dec_ctx = decoder.get_codec_context(); - int in_vstream_idx = decoder.get_video_stream_index(); - AVFormatContext *ofmt_ctx = encoder.get_format_context(); - int *stream_map = encoder.get_stream_map(); - - // Reference to the previous frame does not require allocation - // It will be cloned from the current frame - std::unique_ptr prev_frame(nullptr, av_frame_deleter); - - // Allocate space for the decoded frames - std::unique_ptr frame(av_frame_alloc(), av_frame_deleter); - if (frame == nullptr) { - spdlog::critical("Error allocating frame"); - return AVERROR(ENOMEM); - } - - // Allocate space for the decoded packets - std::unique_ptr packet( - av_packet_alloc(), av_packet_deleter - ); - if (packet == nullptr) { - spdlog::critical("Error allocating packet"); - return AVERROR(ENOMEM); - } - - // Set the total number of frames in the VideoProcessingContext - set_total_frames(proc_cfg, proc_ctx, ifmt_ctx, in_vstream_idx, processor); - - // Read frames from the input file - while (!proc_ctx->abort) { - ret = av_read_frame(ifmt_ctx, packet.get()); - if (ret < 0) { - if (ret == AVERROR_EOF) { - spdlog::debug("Reached end of file"); - break; - } - av_strerror(ret, errbuf, sizeof(errbuf)); - spdlog::critical("Error reading packet: {}", errbuf); - return ret; - } - - if (packet->stream_index == in_vstream_idx) { - // Send the packet to the decoder for decoding - ret = avcodec_send_packet(dec_ctx, packet.get()); - if (ret < 0) { - av_strerror(ret, errbuf, sizeof(errbuf)); - spdlog::critical("Error sending packet to decoder: {}", errbuf); - return ret; - } - - // Process frames decoded from the packet - while (!proc_ctx->abort) { - // Sleep for 100 ms if processing is paused - if (proc_ctx->pause) { - std::this_thread::sleep_for(std::chrono::milliseconds(100)); - continue; - } - - // Receive the decoded frame from the decoder - ret = avcodec_receive_frame(dec_ctx, frame.get()); - if (ret == AVERROR(EAGAIN)) { - // No more frames from this packet - break; - } else if (ret < 0) { - av_strerror(ret, errbuf, sizeof(errbuf)); - spdlog::critical("Error decoding video frame: {}", errbuf); - return ret; - } - - AVFrame *raw_processed_frame = nullptr; - - // Process the frame based on the selected processing mode - switch (processor->get_processing_mode()) { - case ProcessingMode::Filter: { - ret = process_filtering( - processor, - proc_ctx, - encoder, - benchmark, - frame.get(), - raw_processed_frame - ); - break; - } - case ProcessingMode::Interpolate: { - ret = process_interpolation( - processor, - proc_cfg, - proc_ctx, - encoder, - benchmark, - prev_frame, - frame.get(), - raw_processed_frame - ); - break; - } - default: - spdlog::critical("Unknown processing mode"); - return -1; - } - if (ret < 0 && ret != AVERROR(EAGAIN)) { - return ret; - } - av_frame_unref(frame.get()); - proc_ctx->processed_frames++; - spdlog::debug( - "Processed frame {}/{}", proc_ctx->processed_frames, proc_ctx->total_frames - ); - } - } else if (enc_cfg.copy_streams && stream_map[packet->stream_index] >= 0) { - write_raw_packet(packet.get(), ifmt_ctx, ofmt_ctx, stream_map); - } - av_packet_unref(packet.get()); - } - - // Flush the filter - std::vector raw_flushed_frames; - ret = processor->flush(raw_flushed_frames); - if (ret < 0) { - av_strerror(ret, errbuf, sizeof(errbuf)); - spdlog::critical("Error flushing filter: {}", errbuf); - return ret; - } - - // Wrap flushed frames in unique_ptrs - std::vector> flushed_frames; - for (AVFrame *raw_frame : raw_flushed_frames) { - flushed_frames.emplace_back(raw_frame, av_frame_deleter); - } - - // Encode and write all flushed frames - for (auto &flushed_frame : flushed_frames) { - ret = write_frame(flushed_frame.get(), proc_ctx, encoder, benchmark); - if (ret < 0) { - return ret; - } - proc_ctx->processed_frames++; - } - - // Flush the encoder - ret = encoder.flush(); - if (ret < 0) { - av_strerror(ret, errbuf, sizeof(errbuf)); - spdlog::critical("Error flushing encoder: {}", errbuf); - return ret; - } - - return ret; -} diff --git a/src/libvideo2x.cpp b/src/libvideo2x.cpp index 231466d..792e106 100644 --- a/src/libvideo2x.cpp +++ b/src/libvideo2x.cpp @@ -6,40 +6,40 @@ extern "C" { #include +#include "avutils.h" #include "decoder.h" #include "encoder.h" -#include "frames_processor.h" +#include "logging.h" #include "processor.h" #include "processor_factory.h" -int process_video( - const std::filesystem::path in_fname, - const std::filesystem::path out_fname, +VideoProcessor::VideoProcessor( const HardwareConfig hw_cfg, const ProcessorConfig proc_cfg, - EncoderConfig enc_cfg, - VideoProcessingContext *proc_ctx, - Libvideo2xLogLevel log_level, + const EncoderConfig enc_cfg, + Video2xLogLevel log_level, bool benchmark +) + : hw_cfg_(hw_cfg), proc_cfg_(proc_cfg), enc_cfg_(enc_cfg), benchmark_(benchmark) { + set_log_level(log_level); +} + +int VideoProcessor::process( + const std::filesystem::path in_fname, + const std::filesystem::path out_fname ) { char errbuf[AV_ERROR_MAX_STRING_SIZE]; int ret = 0; - // Set the log level for FFmpeg and spdlog - set_log_level(log_level); - // Create a smart pointer to manage the hardware device context - auto hw_ctx_deleter = [](AVBufferRef *ref) { - if (ref != nullptr) { - av_buffer_unref(&ref); - } - }; - std::unique_ptr hw_ctx(nullptr, hw_ctx_deleter); + std::unique_ptr hw_ctx( + nullptr, &av_bufferref_deleter + ); // Initialize hardware device context - if (hw_cfg.hw_device_type != AV_HWDEVICE_TYPE_NONE) { + if (hw_cfg_.hw_device_type != AV_HWDEVICE_TYPE_NONE) { AVBufferRef *tmp_hw_ctx = nullptr; - ret = av_hwdevice_ctx_create(&tmp_hw_ctx, hw_cfg.hw_device_type, NULL, NULL, 0); + ret = av_hwdevice_ctx_create(&tmp_hw_ctx, hw_cfg_.hw_device_type, NULL, NULL, 0); if (ret < 0) { av_strerror(ret, errbuf, sizeof(errbuf)); spdlog::critical("Error initializing hardware device context: {}", errbuf); @@ -50,7 +50,7 @@ int process_video( // Initialize input decoder Decoder decoder; - ret = decoder.init(hw_cfg.hw_device_type, hw_ctx.get(), in_fname); + ret = decoder.init(hw_cfg_.hw_device_type, hw_ctx.get(), in_fname); if (ret < 0) { av_strerror(ret, errbuf, sizeof(errbuf)); spdlog::critical("Failed to initialize decoder: {}", errbuf); @@ -63,7 +63,7 @@ int process_video( // Create and initialize the appropriate filter std::unique_ptr processor( - ProcessorFactory::instance().create_processor(proc_cfg, hw_cfg.vk_device_index) + ProcessorFactory::instance().create_processor(proc_cfg_, hw_cfg_.vk_device_index) ); if (processor == nullptr) { spdlog::critical("Failed to create filter instance"); @@ -73,21 +73,23 @@ int process_video( // Initialize output dimensions based on filter configuration int output_width = 0, output_height = 0; processor->get_output_dimensions( - proc_cfg, dec_ctx->width, dec_ctx->height, output_width, output_height + proc_cfg_, dec_ctx->width, dec_ctx->height, output_width, output_height ); if (output_width <= 0 || output_height <= 0) { spdlog::critical("Failed to determine the output dimensions"); return -1; } - // Update encoder configuration with output dimensions - enc_cfg.width = output_width; - enc_cfg.height = output_height; + // Update encoder output dimensions + enc_cfg_.width = output_width; + enc_cfg_.height = output_height; + + // Update encoder frame rate multiplier + enc_cfg_.frm_rate_mul = proc_cfg_.frm_rate_mul; // Initialize the encoder Encoder encoder; - ret = - encoder.init(hw_ctx.get(), out_fname, ifmt_ctx, dec_ctx, enc_cfg, proc_cfg, in_vstream_idx); + ret = encoder.init(hw_ctx.get(), out_fname, ifmt_ctx, dec_ctx, enc_cfg_, in_vstream_idx); if (ret < 0) { av_strerror(ret, errbuf, sizeof(errbuf)); spdlog::critical("Failed to initialize encoder: {}", errbuf); @@ -102,7 +104,7 @@ int process_video( } // Process frames using the encoder and decoder - ret = process_frames(enc_cfg, proc_cfg, proc_ctx, decoder, encoder, processor.get(), benchmark); + ret = process_frames(decoder, encoder, processor); if (ret < 0) { av_strerror(ret, errbuf, sizeof(errbuf)); spdlog::critical("Error processing frames: {}", errbuf); @@ -119,3 +121,310 @@ int process_video( } return 0; } + +// Process frames using the selected filter. +int VideoProcessor::process_frames( + Decoder &decoder, + Encoder &encoder, + std::unique_ptr &processor +) { + char errbuf[AV_ERROR_MAX_STRING_SIZE]; + int ret = 0; + + // Get required objects + AVFormatContext *ifmt_ctx = decoder.get_format_context(); + AVCodecContext *dec_ctx = decoder.get_codec_context(); + int in_vstream_idx = decoder.get_video_stream_index(); + AVFormatContext *ofmt_ctx = encoder.get_format_context(); + int *stream_map = encoder.get_stream_map(); + + // Reference to the previous frame does not require allocation + // It will be cloned from the current frame + std::unique_ptr prev_frame(nullptr, &av_frame_deleter); + + // Allocate space for the decoded frames + std::unique_ptr frame( + av_frame_alloc(), &av_frame_deleter + ); + if (frame == nullptr) { + spdlog::critical("Error allocating frame"); + return AVERROR(ENOMEM); + } + + // Allocate space for the decoded packets + std::unique_ptr packet( + av_packet_alloc(), &av_packet_deleter + ); + if (packet == nullptr) { + spdlog::critical("Error allocating packet"); + return AVERROR(ENOMEM); + } + + // Set the total number of frames in the VideoProcessingContext + spdlog::debug("Estimating the total number of frames to process"); + total_frames_ = get_video_frame_count(ifmt_ctx, in_vstream_idx); + + if (total_frames_ <= 0) { + spdlog::warn("Unable to determine the total number of frames"); + total_frames_ = 0; + } else { + spdlog::debug("{} frames to process", total_frames_.load()); + } + + // Set total frames for interpolation + if (processor->get_processing_mode() == ProcessingMode::Interpolate) { + total_frames_.store(total_frames_.load() * proc_cfg_.frm_rate_mul); + } + + // Read frames from the input file + while (!aborted_.load()) { + ret = av_read_frame(ifmt_ctx, packet.get()); + if (ret < 0) { + if (ret == AVERROR_EOF) { + spdlog::debug("Reached end of file"); + break; + } + av_strerror(ret, errbuf, sizeof(errbuf)); + spdlog::critical("Error reading packet: {}", errbuf); + return ret; + } + + if (packet->stream_index == in_vstream_idx) { + // Send the packet to the decoder for decoding + ret = avcodec_send_packet(dec_ctx, packet.get()); + if (ret < 0) { + av_strerror(ret, errbuf, sizeof(errbuf)); + spdlog::critical("Error sending packet to decoder: {}", errbuf); + return ret; + } + + // Process frames decoded from the packet + while (!aborted_.load()) { + // Sleep for 100 ms if processing is paused + if (paused_.load()) { + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + continue; + } + + // Receive the decoded frame from the decoder + ret = avcodec_receive_frame(dec_ctx, frame.get()); + if (ret == AVERROR(EAGAIN)) { + // No more frames from this packet + break; + } else if (ret < 0) { + av_strerror(ret, errbuf, sizeof(errbuf)); + spdlog::critical("Error decoding video frame: {}", errbuf); + return ret; + } + + // Process the frame based on the selected processing mode + AVFrame *proc_frame = nullptr; + switch (processor->get_processing_mode()) { + case ProcessingMode::Filter: { + ret = process_filtering(processor, encoder, frame.get(), proc_frame); + break; + } + case ProcessingMode::Interpolate: { + ret = process_interpolation( + processor, encoder, prev_frame, frame.get(), proc_frame + ); + break; + } + default: + spdlog::critical("Unknown processing mode"); + return -1; + } + if (ret < 0 && ret != AVERROR(EAGAIN)) { + return ret; + } + av_frame_unref(frame.get()); + frame_index_++; + spdlog::debug("Processed frame {}/{}", frame_index_.load(), total_frames_.load()); + } + } else if (enc_cfg_.copy_streams && stream_map[packet->stream_index] >= 0) { + ret = write_raw_packet(packet.get(), ifmt_ctx, ofmt_ctx, stream_map); + if (ret < 0) { + return ret; + } + } + av_packet_unref(packet.get()); + } + + // Flush the filter + std::vector raw_flushed_frames; + ret = processor->flush(raw_flushed_frames); + if (ret < 0) { + av_strerror(ret, errbuf, sizeof(errbuf)); + spdlog::critical("Error flushing filter: {}", errbuf); + return ret; + } + + // Wrap flushed frames in unique_ptrs + std::vector> flushed_frames; + for (AVFrame *raw_frame : raw_flushed_frames) { + flushed_frames.emplace_back(raw_frame, &av_frame_deleter); + } + + // Encode and write all flushed frames + for (auto &flushed_frame : flushed_frames) { + ret = write_frame(flushed_frame.get(), encoder); + if (ret < 0) { + return ret; + } + frame_index_++; + } + + // Flush the encoder + ret = encoder.flush(); + if (ret < 0) { + av_strerror(ret, errbuf, sizeof(errbuf)); + spdlog::critical("Error flushing encoder: {}", errbuf); + return ret; + } + + return ret; +} + +int VideoProcessor::write_frame(AVFrame *frame, Encoder &encoder) { + char errbuf[AV_ERROR_MAX_STRING_SIZE]; + int ret = 0; + + if (!benchmark_) { + // Set the frame type to none to let the encoder decide + frame->pict_type = AV_PICTURE_TYPE_NONE; + ret = encoder.write_frame(frame, frame_index_); + if (ret < 0) { + av_strerror(ret, errbuf, sizeof(errbuf)); + spdlog::critical("Error encoding/writing frame: {}", errbuf); + } + } + return ret; +} + +int VideoProcessor::write_raw_packet( + AVPacket *packet, + AVFormatContext *ifmt_ctx, + AVFormatContext *ofmt_ctx, + int *stream_map +) { + char errbuf[AV_ERROR_MAX_STRING_SIZE]; + int ret = 0; + + AVStream *in_stream = ifmt_ctx->streams[packet->stream_index]; + int out_stream_index = stream_map[packet->stream_index]; + AVStream *out_stream = ofmt_ctx->streams[out_stream_index]; + + av_packet_rescale_ts(packet, in_stream->time_base, out_stream->time_base); + packet->stream_index = out_stream_index; + + ret = av_interleaved_write_frame(ofmt_ctx, packet); + if (ret < 0) { + av_strerror(ret, errbuf, sizeof(errbuf)); + spdlog::critical("Error muxing audio/subtitle packet: {}", errbuf); + } + return ret; +} + +int VideoProcessor::process_filtering( + std::unique_ptr &processor, + Encoder &encoder, + AVFrame *frame, + AVFrame *proc_frame +) { + char errbuf[AV_ERROR_MAX_STRING_SIZE]; + int ret = 0; + + // Cast the processor to a Filter + Filter *filter = static_cast(processor.get()); + + // Process the frame using the filter + ret = filter->filter(frame, &proc_frame); + + // Write the processed frame + if (ret < 0 && ret != AVERROR(EAGAIN)) { + av_strerror(ret, errbuf, sizeof(errbuf)); + spdlog::critical("Error filtering frame: {}", errbuf); + } else if (ret == 0 && proc_frame != nullptr) { + auto processed_frame = + std::unique_ptr(proc_frame, &av_frame_deleter); + ret = write_frame(processed_frame.get(), encoder); + } + return ret; +} + +int VideoProcessor::process_interpolation( + std::unique_ptr &processor, + Encoder &encoder, + std::unique_ptr &prev_frame, + AVFrame *frame, + AVFrame *proc_frame +) { + char errbuf[AV_ERROR_MAX_STRING_SIZE]; + int ret = 0; + + // Cast the processor to an Interpolator + Interpolator *interpolator = static_cast(processor.get()); + + // Calculate the time step for each frame + float time_step = 1.0f / static_cast(proc_cfg_.frm_rate_mul); + float current_time_step = time_step; + + // Check if a scene change is detected + bool skip_frame = false; + if (prev_frame.get() != nullptr) { + float frame_diff = get_frame_diff(prev_frame.get(), frame); + if (frame_diff > proc_cfg_.scn_det_thresh) { + spdlog::debug( + "Scene change detected ({:.2f}%), skipping frame {}", + frame_diff, + frame_index_.load() + ); + skip_frame = true; + } + } + + // Write the interpolated frames + for (int i = 0; i < proc_cfg_.frm_rate_mul - 1; i++) { + // Skip interpolation if this is the first frame + if (prev_frame == nullptr) { + break; + } + + // Get the interpolated frame from the interpolator + if (!skip_frame) { + ret = + interpolator->interpolate(prev_frame.get(), frame, &proc_frame, current_time_step); + } else { + ret = 0; + proc_frame = av_frame_clone(prev_frame.get()); + } + + // Write the interpolated frame + if (ret < 0 && ret != AVERROR(EAGAIN)) { + av_strerror(ret, errbuf, sizeof(errbuf)); + spdlog::critical("Error interpolating frame: {}", errbuf); + return ret; + } else if (ret == 0 && proc_frame != nullptr) { + auto processed_frame = std::unique_ptr( + proc_frame, &av_frame_deleter + ); + + processed_frame->pts = frame_index_; + ret = write_frame(processed_frame.get(), encoder); + if (ret < 0) { + return ret; + } + } + + frame_index_++; + current_time_step += time_step; + } + + // Write the original frame + frame->pts = frame_index_; + ret = write_frame(frame, encoder); + + // Update the previous frame with the current frame + prev_frame.reset(av_frame_clone(frame)); + return ret; +} diff --git a/src/logging.cpp b/src/logging.cpp index 434b396..9f105bd 100644 --- a/src/logging.cpp +++ b/src/logging.cpp @@ -6,33 +6,33 @@ extern "C" { #include -void set_log_level(Libvideo2xLogLevel log_level) { +void set_log_level(Video2xLogLevel log_level) { switch (log_level) { - case Libvideo2xLogLevel::Trace: + case Video2xLogLevel::Trace: av_log_set_level(AV_LOG_TRACE); spdlog::set_level(spdlog::level::trace); break; - case Libvideo2xLogLevel::Debug: + case Video2xLogLevel::Debug: av_log_set_level(AV_LOG_DEBUG); spdlog::set_level(spdlog::level::debug); break; - case Libvideo2xLogLevel::Info: + case Video2xLogLevel::Info: av_log_set_level(AV_LOG_INFO); spdlog::set_level(spdlog::level::info); break; - case Libvideo2xLogLevel::Warning: + case Video2xLogLevel::Warning: av_log_set_level(AV_LOG_WARNING); spdlog::set_level(spdlog::level::warn); break; - case Libvideo2xLogLevel::Error: + case Video2xLogLevel::Error: av_log_set_level(AV_LOG_ERROR); spdlog::set_level(spdlog::level::err); break; - case Libvideo2xLogLevel::Critical: + case Video2xLogLevel::Critical: av_log_set_level(AV_LOG_FATAL); spdlog::set_level(spdlog::level::critical); break; - case Libvideo2xLogLevel::Off: + case Video2xLogLevel::Off: av_log_set_level(AV_LOG_QUIET); spdlog::set_level(spdlog::level::off); break; diff --git a/tools/video2x/src/video2x.cpp b/tools/video2x/src/video2x.cpp index 237e1df..6be1bb6 100644 --- a/tools/video2x/src/video2x.cpp +++ b/tools/video2x/src/video2x.cpp @@ -9,7 +9,6 @@ #include #include #include -#include #include #include #include @@ -50,12 +49,9 @@ namespace po = boost::program_options; // Indicate if a newline needs to be printed before the next output std::atomic newline_required = false; -// Mutex for synchronizing access to VideoProcessingContext -std::mutex proc_ctx_mutex; - // Structure to hold parsed arguments struct Arguments { - Libvideo2xLogLevel log_level = Libvideo2xLogLevel::Info; + Video2xLogLevel log_level = Video2xLogLevel::Info; bool no_progress = false; // General options @@ -146,27 +142,27 @@ std::string wstring_to_u8string(const std::string &str) { } #endif -void set_spdlog_level(Libvideo2xLogLevel log_level) { +void set_spdlog_level(Video2xLogLevel log_level) { switch (log_level) { - case Libvideo2xLogLevel::Trace: + case Video2xLogLevel::Trace: spdlog::set_level(spdlog::level::trace); break; - case Libvideo2xLogLevel::Debug: + case Video2xLogLevel::Debug: spdlog::set_level(spdlog::level::debug); break; - case Libvideo2xLogLevel::Info: + case Video2xLogLevel::Info: spdlog::set_level(spdlog::level::info); break; - case Libvideo2xLogLevel::Warning: + case Video2xLogLevel::Warning: spdlog::set_level(spdlog::level::warn); break; - case Libvideo2xLogLevel::Error: + case Video2xLogLevel::Error: spdlog::set_level(spdlog::level::err); break; - case Libvideo2xLogLevel::Critical: + case Video2xLogLevel::Critical: spdlog::set_level(spdlog::level::critical); break; - case Libvideo2xLogLevel::Off: + case Video2xLogLevel::Off: spdlog::set_level(spdlog::level::off); break; default: @@ -175,18 +171,18 @@ void set_spdlog_level(Libvideo2xLogLevel log_level) { } } -std::optional find_log_level_by_name(const StringType &log_level_name) { +std::optional find_log_level_by_name(const StringType &log_level_name) { // Static map to store the mapping - static const std::unordered_map LogLevelMap = { - {STR("trace"), Libvideo2xLogLevel::Trace}, - {STR("debug"), Libvideo2xLogLevel::Debug}, - {STR("info"), Libvideo2xLogLevel::Info}, - {STR("warning"), Libvideo2xLogLevel::Warning}, - {STR("warn"), Libvideo2xLogLevel::Warning}, - {STR("error"), Libvideo2xLogLevel::Error}, - {STR("critical"), Libvideo2xLogLevel::Critical}, - {STR("off"), Libvideo2xLogLevel::Off}, - {STR("none"), Libvideo2xLogLevel::Off} + static const std::unordered_map LogLevelMap = { + {STR("trace"), Video2xLogLevel::Trace}, + {STR("debug"), Video2xLogLevel::Debug}, + {STR("info"), Video2xLogLevel::Info}, + {STR("warning"), Video2xLogLevel::Warning}, + {STR("warn"), Video2xLogLevel::Warning}, + {STR("error"), Video2xLogLevel::Error}, + {STR("critical"), Video2xLogLevel::Critical}, + {STR("off"), Video2xLogLevel::Off}, + {STR("none"), Video2xLogLevel::Off} }; // Normalize the input to lowercase @@ -356,32 +352,6 @@ int get_vulkan_device_prop(uint32_t vk_device_index, VkPhysicalDeviceProperties return 0; } -// Wrapper function for video processing thread -void process_video_thread( - Arguments *arguments, - int *proc_ret, - HardwareConfig hw_cfg, - ProcessorConfig proc_cfg, - EncoderConfig enc_cfg, - VideoProcessingContext *proc_ctx -) { - *proc_ret = process_video( - arguments->in_fname, - arguments->out_fname, - hw_cfg, - proc_cfg, - enc_cfg, - proc_ctx, - arguments->log_level, - arguments->benchmark - ); - - { - std::lock_guard lock(proc_ctx_mutex); - proc_ctx->completed = true; - } -} - #ifdef _WIN32 int wmain(int argc, wchar_t *argv[]) { // Set console output code page to UTF-8 @@ -552,7 +522,7 @@ int main(int argc, char **argv) { } if (vm.count("log-level")) { - std::optional log_level = + std::optional log_level = find_log_level_by_name(vm["log-level"].as()); if (!log_level.has_value()) { spdlog::critical("Invalid log level specified."); @@ -772,22 +742,20 @@ int main(int argc, char **argv) { } } - // Setup struct to store processing context - VideoProcessingContext proc_ctx; - proc_ctx.processed_frames = 0; - proc_ctx.total_frames = 0; - proc_ctx.pause = false; - proc_ctx.abort = false; - proc_ctx.completed = false; + // Create video processor object + VideoProcessor video_processor = + VideoProcessor(hw_cfg, proc_cfg, enc_cfg, arguments.log_level, arguments.benchmark); // Register a newline-safe log callback for FFmpeg av_log_set_callback(newline_safe_ffmpeg_log_callback); // Create a thread for video processing int proc_ret = 0; - std::thread processing_thread( - process_video_thread, &arguments, &proc_ret, hw_cfg, proc_cfg, enc_cfg, &proc_ctx - ); + std::atomic completed = false; // Use atomic for thread-safe updates + std::thread processing_thread([&]() { + proc_ret = video_processor.process(arguments.in_fname, arguments.out_fname); + completed.store(true, std::memory_order_relaxed); + }); spdlog::info("Press [space] to pause/resume, [q] to abort."); // Setup timer @@ -801,12 +769,7 @@ int main(int argc, char **argv) { // Main thread loop to display progress and handle input while (true) { - bool completed; - { - std::lock_guard lock(proc_ctx_mutex); - completed = proc_ctx.completed; - } - if (completed) { + if (completed.load()) { break; } @@ -825,9 +788,12 @@ int main(int argc, char **argv) { if (ch == ' ' || ch == '\n') { // Toggle pause state { - std::lock_guard lock(proc_ctx_mutex); - proc_ctx.pause = !proc_ctx.pause; - if (proc_ctx.pause) { + if (video_processor.is_paused()) { + video_processor.resume(); + } else { + video_processor.pause(); + } + if (video_processor.is_paused()) { std::cout << "\r\033[KProcessing paused; press [space] to resume, [q] to abort."; std::cout.flush(); @@ -846,8 +812,7 @@ int main(int argc, char **argv) { } spdlog::warn("Aborting gracefully; press Ctrl+C to terminate forcefully."); { - std::lock_guard lock(proc_ctx_mutex); - proc_ctx.abort = true; + video_processor.abort(); newline_required = false; } break; @@ -856,14 +821,13 @@ int main(int argc, char **argv) { // Display progress if (!arguments.no_progress) { int64_t processed_frames, total_frames; - bool pause; + bool paused; { - std::lock_guard lock(proc_ctx_mutex); - processed_frames = proc_ctx.processed_frames; - total_frames = proc_ctx.total_frames; - pause = proc_ctx.pause; + processed_frames = video_processor.get_processed_frames(); + total_frames = video_processor.get_total_frames(); + paused = video_processor.is_paused(); } - if (!pause && (total_frames > 0 || processed_frames > 0)) { + if (!paused && (total_frames > 0 || processed_frames > 0)) { double percentage = total_frames > 0 ? static_cast(processed_frames) * 100.0 / static_cast(total_frames) : 0.0; @@ -919,12 +883,7 @@ int main(int argc, char **argv) { } // Print final message based on processing result - bool aborted; - { - std::lock_guard lock(proc_ctx_mutex); - aborted = proc_ctx.abort; - } - if (aborted) { + if (video_processor.is_aborted()) { spdlog::warn("Video processing aborted"); return 2; } else if (proc_ret != 0) { @@ -935,11 +894,7 @@ int main(int argc, char **argv) { } // Calculate statistics - int64_t processed_frames; - { - std::lock_guard lock(proc_ctx_mutex); - processed_frames = proc_ctx.processed_frames; - } + int64_t processed_frames = video_processor.get_processed_frames(); int time_elapsed = static_cast(timer.get_elapsed_time() / 1000); int hours_elapsed = time_elapsed / 3600; int minutes_elapsed = (time_elapsed % 3600) / 60;