refactor(libvideo2x): convert the video processor into a class (#1246)

Signed-off-by: k4yt3x <i@k4yt3x.com>
2024-12-27 14:39:09 +00:00 · 2024-12-03 05:22:07 +00:00 · 2024-12-03 05:22:07 +00:00 · d4d1e58f8d
commit d4d1e58f8d
parent a379c7481e
16 changed files with 612 additions and 665 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -387,10 +387,9 @@ if(BUILD_VIDEO2X_CLI)
 endif()
 # Install the header files
 file(GLOB HEADER_FILES ${PROJECT_SOURCE_DIR}/include/libvideo2x/*.h)
 install(FILES
-    ${PROJECT_SOURCE_DIR}/include/libvideo2x/libvideo2x.h
+    ${HEADER_FILES}
    ${PROJECT_SOURCE_DIR}/include/libvideo2x/fsutils.h
    ${PROJECT_SOURCE_DIR}/include/libvideo2x/logging.h
    ${CMAKE_CURRENT_BINARY_DIR}/libvideo2x/version.h
    DESTINATION ${INSTALL_INCLUDE_DESTINATION}
 )
@ -403,9 +402,7 @@ if(WIN32)
        set(BOOST_DLL_PATH ${BOOST_BASE_PATH}/boost_program_options-vc143-mt-gd-x64-1_86.dll)
    endif()
    # Find all FFmpeg DLLs
    file(GLOB FFMPEG_DLLS "${FFMPEG_BASE_PATH}/bin/*.dll")
    install(FILES
        ${CMAKE_BINARY_DIR}/realesrgan_install/bin/librealesrgan-ncnn-vulkan.dll
        ${CMAKE_BINARY_DIR}/rife_install/bin/librife-ncnn-vulkan.dll
--- a/include/libvideo2x/avutils.h
+++ b/include/libvideo2x/avutils.h
@ -5,15 +5,18 @@ extern "C" {
 #include <libavformat/avformat.h>
 }
 #define CALC_FFMPEG_VERSION(a, b, c) (a << 16 | b << 8 | c)
 AVRational get_video_frame_rate(AVFormatContext *ifmt_ctx, int in_vstream_idx);
 int64_t get_video_frame_count(AVFormatContext *ifmt_ctx, int in_vstream_idx);
-AVPixelFormat
+AVPixelFormat get_encoder_default_pix_fmt(const AVCodec *encoder, AVPixelFormat target_pix_fmt);
 get_encoder_default_pix_fmt(const AVCodec *encoder, AVPixelFormat target_pix_fmt);
 float get_frame_diff(AVFrame *frame1, AVFrame *frame2);
 void av_bufferref_deleter(AVBufferRef *bufferref);
 void av_frame_deleter(AVFrame *frame);
 void av_packet_deleter(AVPacket *packet);
 #endif  // AVUTILS_H
--- a/include/libvideo2x/encoder.h
+++ b/include/libvideo2x/encoder.h
@ -3,13 +3,51 @@
 #include <cstdint>
 #include <filesystem>
 #include <vector>
 extern "C" {
 #include <libavcodec/avcodec.h>
 #include <libavformat/avformat.h>
 #include <libavutil/pixdesc.h>
 }
-#include "libvideo2x/libvideo2x.h"
+#include "fsutils.h"
 // Encoder configurations
 struct EncoderConfig {
    // Non-AVCodecContext options
    AVCodecID codec;
    bool copy_streams;
    // Basic video options
    int width;
    int height;
    int frm_rate_mul;
    AVPixelFormat pix_fmt;
    // Rate control and compression
    int64_t bit_rate;
    int rc_buffer_size;
    int rc_min_rate;
    int rc_max_rate;
    int qmin;
    int qmax;
    // GOP and frame structure
    int gop_size;
    int max_b_frames;
    int keyint_min;
    int refs;
    // Performance and threading
    int thread_count;
    // Latency and buffering
    int delay;
    // Extra AVOptions
    std::vector<std::pair<StringType, StringType>> extra_opts;
 };
 class Encoder {
   public:
@ -22,7 +60,6 @@ class Encoder {
        AVFormatContext *ifmt_ctx,
        AVCodecContext *dec_ctx,
        EncoderConfig &enc_cfg,
        const ProcessorConfig &proc_cfg,
        int in_vstream_idx
    );
--- a/include/libvideo2x/filter_libplacebo.h
+++ b/include/libvideo2x/filter_libplacebo.h
@ -13,17 +13,6 @@ extern "C" {
 // FilterLibplacebo class definition
 class FilterLibplacebo : public Filter {
   private:
    AVFilterGraph *filter_graph_;
    AVFilterContext *buffersrc_ctx_;
    AVFilterContext *buffersink_ctx_;
    uint32_t vk_device_index_;
    const std::filesystem::path shader_path_;
    int width_;
    int height_;
    AVRational in_time_base_;
    AVRational out_time_base_;
   public:
    // Constructor
    FilterLibplacebo(
@ -56,6 +45,17 @@ class FilterLibplacebo : public Filter {
        int &out_width,
        int &out_height
    ) const override;
   private:
    AVFilterGraph *filter_graph_;
    AVFilterContext *buffersrc_ctx_;
    AVFilterContext *buffersink_ctx_;
    uint32_t vk_device_index_;
    const std::filesystem::path shader_path_;
    int width_;
    int height_;
    AVRational in_time_base_;
    AVRational out_time_base_;
 };
 #endif  // FILTER_LIBPLACEBO_H
--- a/include/libvideo2x/filter_realesrgan.h
+++ b/include/libvideo2x/filter_realesrgan.h
@ -10,16 +10,6 @@ extern "C" {
 // FilterRealesrgan class definition
 class FilterRealesrgan : public Filter {
   private:
    RealESRGAN *realesrgan_;
    int gpuid_;
    bool tta_mode_;
    int scaling_factor_;
    const StringType model_name_;
    AVRational in_time_base_;
    AVRational out_time_base_;
    AVPixelFormat out_pix_fmt_;
   public:
    // Constructor
    FilterRealesrgan(
@ -49,6 +39,16 @@ class FilterRealesrgan : public Filter {
        int &out_width,
        int &out_height
    ) const override;
   private:
    RealESRGAN *realesrgan_;
    int gpuid_;
    bool tta_mode_;
    int scaling_factor_;
    const StringType model_name_;
    AVRational in_time_base_;
    AVRational out_time_base_;
    AVPixelFormat out_pix_fmt_;
 };
 #endif  // FILTER_REALESRGAN_H
--- a/include/libvideo2x/frames_processor.h
+++ b/include/libvideo2x/frames_processor.h
@ -1,19 +0,0 @@
 #ifndef FRAMES_PROCESSOR_H
 #define FRAMES_PROCESSOR_H
 #include "decoder.h"
 #include "encoder.h"
 #include "libvideo2x.h"
 #include "processor.h"
 int process_frames(
    const EncoderConfig &enc_cfg,
    const ProcessorConfig &proc_cfg,
    VideoProcessingContext *proc_ctx,
    Decoder &decoder,
    Encoder &encoder,
    Processor *processor,
    bool benchmark = false
 );
 #endif  // FRAMES_PROCESSOR_H
--- a/include/libvideo2x/interpolator_rife.h
+++ b/include/libvideo2x/interpolator_rife.h
@ -10,18 +10,6 @@ extern "C" {
 // InterpolatorRIFE class definition
 class InterpolatorRIFE : public Interpolator {
   private:
    RIFE *rife_;
    int gpuid_;
    bool tta_mode_;
    bool tta_temporal_mode_;
    bool uhd_mode_;
    int num_threads_;
    const StringType model_name_;
    AVRational in_time_base_;
    AVRational out_time_base_;
    AVPixelFormat out_pix_fmt_;
   public:
    // Constructor
    InterpolatorRIFE(
@ -54,6 +42,18 @@ class InterpolatorRIFE : public Interpolator {
        int &out_width,
        int &out_height
    ) const override;
   private:
    RIFE *rife_;
    int gpuid_;
    bool tta_mode_;
    bool tta_temporal_mode_;
    bool uhd_mode_;
    int num_threads_;
    const StringType model_name_;
    AVRational in_time_base_;
    AVRational out_time_base_;
    AVPixelFormat out_pix_fmt_;
 };
 #endif  // INTERPOLATOR_RIFE_H
--- a/include/libvideo2x/libvideo2x.h
+++ b/include/libvideo2x/libvideo2x.h
@ -1,17 +1,20 @@
 #ifndef LIBVIDEO2X_H
 #define LIBVIDEO2X_H
-#include <filesystem>
+#include <atomic>
-#include <variant>
+#include <cstdint>
-#include <vector>
+#include <memory>
 extern "C" {
 #include <libavcodec/avcodec.h>
 #include <libavformat/avformat.h>
 }
-#include "fsutils.h"
+#include "avutils.h"
 #include "decoder.h"
 #include "encoder.h"
 #include "logging.h"
 #include "processor.h"
 #ifdef _WIN32
 #ifdef LIBVIDEO2X_EXPORTS
@ -23,105 +26,75 @@ extern "C" {
 #define LIBVIDEO2X_API
 #endif
 enum class ProcessingMode {
    Filter,
    Interpolate,
 };
 enum class ProcessorType {
    Libplacebo,
    RealESRGAN,
    RIFE,
 };
 struct LibplaceboConfig {
    StringType shader_path;
 };
 struct RealESRGANConfig {
    bool tta_mode;
    StringType model_name;
 };
 struct RIFEConfig {
    bool tta_mode;
    bool tta_temporal_mode;
    bool uhd_mode;
    int num_threads;
    StringType model_name;
 };
 // Unified filter configuration
 struct ProcessorConfig {
    ProcessorType processor_type;
    int width;
    int height;
    int scaling_factor;
    int frm_rate_mul;
    float scn_det_thresh;
    std::variant<LibplaceboConfig, RealESRGANConfig, RIFEConfig> config;
 };
 // Encoder configurations
 struct EncoderConfig {
    // Non-AVCodecContext options
    AVCodecID codec;
    bool copy_streams;
    // Basic video options
    int width;
    int height;
    AVPixelFormat pix_fmt;
    // Rate control and compression
    int64_t bit_rate;
    int rc_buffer_size;
    int rc_min_rate;
    int rc_max_rate;
    int qmin;
    int qmax;
    // GOP and frame structure
    int gop_size;
    int max_b_frames;
    int keyint_min;
    int refs;
    // Performance and threading
    int thread_count;
    // Latency and buffering
    int delay;
    // Extra AVOptions
    std::vector<std::pair<StringType, StringType>> extra_opts;
 };
 struct HardwareConfig {
    uint32_t vk_device_index;
    AVHWDeviceType hw_device_type;
 };
-// Video processing context
+class LIBVIDEO2X_API VideoProcessor {
-struct VideoProcessingContext {
+   public:
-    int64_t processed_frames;
+    VideoProcessor(
-    int64_t total_frames;
+        const HardwareConfig hw_cfg,
-    std::time_t start_time;
+        const ProcessorConfig proc_cfg,
-    bool pause;
+        EncoderConfig enc_cfg,
-    bool abort;
+        Video2xLogLevel = Video2xLogLevel::Info,
-    bool completed;
+        bool benchmark = false
    );
    virtual ~VideoProcessor() = default;
    [[nodiscard]] int
    process(const std::filesystem::path in_fname, const std::filesystem::path out_fname);
    void pause() { paused_.store(true); }
    void resume() { paused_.store(false); }
    void abort() { aborted_.store(true); }
    int64_t get_processed_frames() const { return frame_index_.load(); }
    int64_t get_total_frames() const { return total_frames_.load(); }
    bool is_paused() const { return paused_.load(); }
    bool is_aborted() const { return aborted_.load(); }
    bool is_completed() const { return completed_.load(); }
   private:
    [[nodiscard]] int
    process_frames(Decoder &decoder, Encoder &encoder, std::unique_ptr<Processor> &processor);
    [[nodiscard]] int write_frame(AVFrame *frame, Encoder &encoder);
    [[nodiscard]] inline int write_raw_packet(
        AVPacket *packet,
        AVFormatContext *ifmt_ctx,
        AVFormatContext *ofmt_ctx,
        int *stream_map
    );
    [[nodiscard]] inline int process_filtering(
        std::unique_ptr<Processor> &processor,
        Encoder &encoder,
        AVFrame *frame,
        AVFrame *proc_frame
    );
    [[nodiscard]] inline int process_interpolation(
        std::unique_ptr<Processor> &processor,
        Encoder &encoder,
        std::unique_ptr<AVFrame, decltype(&av_frame_deleter)> &prev_frame,
        AVFrame *frame,
        AVFrame *proc_frame
    );
    HardwareConfig hw_cfg_;
    ProcessorConfig proc_cfg_;
    EncoderConfig enc_cfg_;
    bool benchmark_ = false;
    std::atomic<int64_t> frame_index_ = 0;
    std::atomic<int64_t> total_frames_ = 0;
    std::atomic<bool> paused_ = false;
    std::atomic<bool> aborted_ = false;
    std::atomic<bool> completed_ = false;
 };
 // Process a video file using the specified configurations
 [[nodiscard]] LIBVIDEO2X_API int process_video(
    const std::filesystem::path in_fname,
    const std::filesystem::path out_fname,
    const HardwareConfig hw_cfg,
    const ProcessorConfig proc_cfg,
    EncoderConfig enc_cfg,
    VideoProcessingContext *proc_ctx,
    Libvideo2xLogLevel log_level,
    bool benchmark
 );
 #endif  // LIBVIDEO2X_H
--- a/include/libvideo2x/logging.h
+++ b/include/libvideo2x/logging.h
@ -5,7 +5,7 @@
 #include "fsutils.h"
-enum class Libvideo2xLogLevel {
+enum class Video2xLogLevel {
    Unknown,
    Trace,
    Debug,
@ -16,8 +16,8 @@ enum class Libvideo2xLogLevel {
    Off
 };
-void set_log_level(Libvideo2xLogLevel log_level);
+void set_log_level(Video2xLogLevel log_level);
-std::optional<Libvideo2xLogLevel> find_log_level_by_name(const StringType &log_level_name);
+std::optional<Video2xLogLevel> find_log_level_by_name(const StringType &log_level_name);
 #endif  // LOGGING_H
--- a/include/libvideo2x/processor.h
+++ b/include/libvideo2x/processor.h
@ -1,6 +1,7 @@
 #ifndef PROCESSOR_H
 #define PROCESSOR_H
 #include <variant>
 #include <vector>
 extern "C" {
@ -9,7 +10,46 @@ extern "C" {
 #include <libavutil/buffer.h>
 }
-#include "libvideo2x.h"
+#include "fsutils.h"
 enum class ProcessingMode {
    Filter,
    Interpolate,
 };
 enum class ProcessorType {
    Libplacebo,
    RealESRGAN,
    RIFE,
 };
 struct LibplaceboConfig {
    StringType shader_path;
 };
 struct RealESRGANConfig {
    bool tta_mode;
    StringType model_name;
 };
 struct RIFEConfig {
    bool tta_mode;
    bool tta_temporal_mode;
    bool uhd_mode;
    int num_threads;
    StringType model_name;
 };
 // Unified filter configuration
 struct ProcessorConfig {
    ProcessorType processor_type;
    int width;
    int height;
    int scaling_factor;
    int frm_rate_mul;
    float scn_det_thresh;
    std::variant<LibplaceboConfig, RealESRGANConfig, RIFEConfig> config;
 };
 class Processor {
   public:
--- a/src/avutils.cpp
+++ b/src/avutils.cpp
@ -35,7 +35,7 @@ int64_t get_video_frame_count(AVFormatContext *ifmt_ctx, int in_vstream_idx) {
        spdlog::debug("Read total number of frames from 'nb_frames': {}", nb_frames);
        return nb_frames;
    }
-    spdlog::warn("Estimating the total number of frames from duration * fps");
+    spdlog::warn("Estimating the total number of frames using duration * fps");
    // Get the duration of the video
    double duration_secs = 0.0;
@ -68,7 +68,7 @@ AVPixelFormat get_encoder_default_pix_fmt(const AVCodec *encoder, AVPixelFormat
    char errbuf[AV_ERROR_MAX_STRING_SIZE];
    // Retrieve the list of supported pixel formats
-#if LIBAVCODEC_BUILD >= CALC_FFMPEG_VERSION(61, 13, 100)
+#if LIBAVCODEC_BUILD >= AV_VERSION_INT(61, 13, 100)
    const AVPixelFormat *supported_pix_fmts = nullptr;
    ret = avcodec_get_supported_config(
        nullptr, encoder, AV_CODEC_CONFIG_PIX_FORMAT, 0, (const void **)&supported_pix_fmts, nullptr
@ -185,3 +185,27 @@ float get_frame_diff(AVFrame *frame1, AVFrame *frame2) {
    return percent_diff;
 }
 // Deleter for AVBufferRef unique_ptr
 void av_bufferref_deleter(AVBufferRef *bufferref) {
    if (bufferref != nullptr) {
        av_buffer_unref(&bufferref);
    }
 }
 // Deleter for AVFrame unique_ptr
 void av_frame_deleter(AVFrame *frame) {
    if (frame != nullptr) {
        av_frame_free(&frame);
        frame = nullptr;
    }
 }
 // Deleter for AVPacket unique_ptr
 void av_packet_deleter(AVPacket *packet) {
    if (packet != nullptr) {
        av_packet_unref(packet);
        av_packet_free(&packet);
        packet = nullptr;
    }
 }
--- a/src/encoder.cpp
+++ b/src/encoder.cpp
@ -33,7 +33,6 @@ int Encoder::init(
    AVFormatContext *ifmt_ctx,
    AVCodecContext *dec_ctx,
    EncoderConfig &enc_cfg,
    const ProcessorConfig &proc_cfg,
    int in_vstream_idx
 ) {
    int ret;
@ -122,9 +121,9 @@ int Encoder::init(
        spdlog::debug("Auto-selected pixel format: {}", av_get_pix_fmt_name(enc_ctx_->pix_fmt));
    }
-    if (proc_cfg.frm_rate_mul > 0) {
+    if (enc_cfg.frm_rate_mul > 0) {
        AVRational in_frame_rate = get_video_frame_rate(ifmt_ctx, in_vstream_idx);
-        enc_ctx_->framerate = {in_frame_rate.num * proc_cfg.frm_rate_mul, in_frame_rate.den};
+        enc_ctx_->framerate = {in_frame_rate.num * enc_cfg.frm_rate_mul, in_frame_rate.den};
        enc_ctx_->time_base = av_inv_q(enc_ctx_->framerate);
    } else {
        // Set the output video's time base
--- a/src/frames_processor.cpp
+++ b/src/frames_processor.cpp
@ -1,371 +0,0 @@
 #include "frames_processor.h"
 extern "C" {
 #include <libavutil/avutil.h>
 }
 #include <spdlog/spdlog.h>
 #include "avutils.h"
 // Deleter for AVFrame unique_ptr
 auto av_frame_deleter = [](AVFrame *frame) {
    if (frame != nullptr) {
        av_frame_free(&frame);
        frame = nullptr;
    }
 };
 // Deleter for AVPacket unique_ptr
 auto av_packet_deleter = [](AVPacket *packet) {
    if (packet != nullptr) {
        av_packet_unref(packet);
        av_packet_free(&packet);
        packet = nullptr;
    }
 };
 // Sets the total number of frames to process in the VideoProcessingContext
 void set_total_frames(
    const ProcessorConfig &proc_cfg,
    VideoProcessingContext *proc_ctx,
    AVFormatContext *ifmt_ctx,
    int in_vstream_idx,
    Processor *processor
 ) {
    spdlog::debug("Estimating the total number of frames to process");
    proc_ctx->total_frames = get_video_frame_count(ifmt_ctx, in_vstream_idx);
    if (proc_ctx->total_frames <= 0) {
        spdlog::warn("Unable to determine the total number of frames");
        proc_ctx->total_frames = 0;
    } else {
        spdlog::debug("{} frames to process", proc_ctx->total_frames);
    }
    // Set total frames for interpolation
    if (processor->get_processing_mode() == ProcessingMode::Interpolate) {
        proc_ctx->total_frames *= proc_cfg.frm_rate_mul;
    }
 }
 int write_frame(
    AVFrame *frame,
    VideoProcessingContext *proc_ctx,
    Encoder &encoder,
    bool benchmark
 ) {
    char errbuf[AV_ERROR_MAX_STRING_SIZE];
    int ret = 0;
    if (!benchmark) {
        // Set the frame type to none to let the encoder decide
        frame->pict_type = AV_PICTURE_TYPE_NONE;
        ret = encoder.write_frame(frame, proc_ctx->processed_frames);
        if (ret < 0) {
            av_strerror(ret, errbuf, sizeof(errbuf));
            spdlog::critical("Error encoding/writing frame: {}", errbuf);
        }
    }
    return ret;
 }
 int write_raw_packet(
    AVPacket *packet,
    AVFormatContext *ifmt_ctx,
    AVFormatContext *ofmt_ctx,
    int *stream_map
 ) {
    char errbuf[AV_ERROR_MAX_STRING_SIZE];
    int ret = 0;
    AVStream *in_stream = ifmt_ctx->streams[packet->stream_index];
    int out_stream_index = stream_map[packet->stream_index];
    AVStream *out_stream = ofmt_ctx->streams[out_stream_index];
    av_packet_rescale_ts(packet, in_stream->time_base, out_stream->time_base);
    packet->stream_index = out_stream_index;
    ret = av_interleaved_write_frame(ofmt_ctx, packet);
    if (ret < 0) {
        av_strerror(ret, errbuf, sizeof(errbuf));
        spdlog::critical("Error muxing audio/subtitle packet: {}", errbuf);
    }
    return ret;
 }
 int process_filtering(
    Processor *processor,
    VideoProcessingContext *proc_ctx,
    Encoder &encoder,
    bool benchmark,
    AVFrame *frame,
    AVFrame *raw_processed_frame
 ) {
    char errbuf[AV_ERROR_MAX_STRING_SIZE];
    int ret = 0;
    // Cast the processor to a Filter
    Filter *filter = static_cast<Filter *>(processor);
    // Process the frame using the filter
    ret = filter->filter(frame, &raw_processed_frame);
    // Write the processed frame
    if (ret < 0 && ret != AVERROR(EAGAIN)) {
        av_strerror(ret, errbuf, sizeof(errbuf));
        spdlog::critical("Error filtering frame: {}", errbuf);
    } else if (ret == 0 && raw_processed_frame != nullptr) {
        auto processed_frame = std::unique_ptr<AVFrame, decltype(av_frame_deleter)>(
            raw_processed_frame, av_frame_deleter
        );
        ret = write_frame(processed_frame.get(), proc_ctx, encoder, benchmark);
    }
    return ret;
 }
 int process_interpolation(
    Processor *processor,
    const ProcessorConfig &proc_cfg,
    VideoProcessingContext *proc_ctx,
    Encoder &encoder,
    bool benchmark,
    std::unique_ptr<AVFrame, decltype(av_frame_deleter)> &prev_frame,
    AVFrame *frame,
    AVFrame *raw_processed_frame
 ) {
    char errbuf[AV_ERROR_MAX_STRING_SIZE];
    int ret = 0;
    // Cast the processor to an Interpolator
    Interpolator *interpolator = static_cast<Interpolator *>(processor);
    // Calculate the time step for each frame
    float time_step = 1.0f / static_cast<float>(proc_cfg.frm_rate_mul);
    float current_time_step = time_step;
    // Check if a scene change is detected
    bool skip_frame = false;
    if (prev_frame != nullptr) {
        float frame_diff = get_frame_diff(prev_frame.get(), frame);
        if (frame_diff > proc_cfg.scn_det_thresh) {
            spdlog::debug(
                "Scene change detected ({:.2f}%), skipping frame {}",
                frame_diff,
                proc_ctx->processed_frames
            );
            skip_frame = true;
        }
    }
    // Write the interpolated frames
    for (int i = 0; i < proc_cfg.frm_rate_mul - 1; i++) {
        // Skip interpolation if this is the first frame
        if (prev_frame == nullptr) {
            break;
        }
        // Get the interpolated frame from the interpolator
        if (!skip_frame) {
            ret = interpolator->interpolate(
                prev_frame.get(), frame, &raw_processed_frame, current_time_step
            );
        } else {
            ret = 0;
            raw_processed_frame = av_frame_clone(prev_frame.get());
        }
        // Write the interpolated frame
        if (ret < 0 && ret != AVERROR(EAGAIN)) {
            av_strerror(ret, errbuf, sizeof(errbuf));
            spdlog::critical("Error interpolating frame: {}", errbuf);
            return ret;
        } else if (ret == 0 && raw_processed_frame != nullptr) {
            auto processed_frame = std::unique_ptr<AVFrame, decltype(av_frame_deleter)>(
                raw_processed_frame, av_frame_deleter
            );
            processed_frame->pts = proc_ctx->processed_frames;
            ret = write_frame(processed_frame.get(), proc_ctx, encoder, benchmark);
            if (ret < 0) {
                return ret;
            }
        }
        proc_ctx->processed_frames++;
        current_time_step += time_step;
    }
    // Write the original frame
    frame->pts = proc_ctx->processed_frames;
    ret = write_frame(frame, proc_ctx, encoder, benchmark);
    // Update the previous frame with the current frame
    prev_frame.reset(av_frame_clone(frame));
    return ret;
 }
 // Process frames using the selected filter.
 int process_frames(
    const EncoderConfig &enc_cfg,
    const ProcessorConfig &proc_cfg,
    VideoProcessingContext *proc_ctx,
    Decoder &decoder,
    Encoder &encoder,
    Processor *processor,
    bool benchmark
 ) {
    char errbuf[AV_ERROR_MAX_STRING_SIZE];
    int ret = 0;
    // Get required objects
    AVFormatContext *ifmt_ctx = decoder.get_format_context();
    AVCodecContext *dec_ctx = decoder.get_codec_context();
    int in_vstream_idx = decoder.get_video_stream_index();
    AVFormatContext *ofmt_ctx = encoder.get_format_context();
    int *stream_map = encoder.get_stream_map();
    // Reference to the previous frame does not require allocation
    // It will be cloned from the current frame
    std::unique_ptr<AVFrame, decltype(av_frame_deleter)> prev_frame(nullptr, av_frame_deleter);
    // Allocate space for the decoded frames
    std::unique_ptr<AVFrame, decltype(av_frame_deleter)> frame(av_frame_alloc(), av_frame_deleter);
    if (frame == nullptr) {
        spdlog::critical("Error allocating frame");
        return AVERROR(ENOMEM);
    }
    // Allocate space for the decoded packets
    std::unique_ptr<AVPacket, decltype(av_packet_deleter)> packet(
        av_packet_alloc(), av_packet_deleter
    );
    if (packet == nullptr) {
        spdlog::critical("Error allocating packet");
        return AVERROR(ENOMEM);
    }
    // Set the total number of frames in the VideoProcessingContext
    set_total_frames(proc_cfg, proc_ctx, ifmt_ctx, in_vstream_idx, processor);
    // Read frames from the input file
    while (!proc_ctx->abort) {
        ret = av_read_frame(ifmt_ctx, packet.get());
        if (ret < 0) {
            if (ret == AVERROR_EOF) {
                spdlog::debug("Reached end of file");
                break;
            }
            av_strerror(ret, errbuf, sizeof(errbuf));
            spdlog::critical("Error reading packet: {}", errbuf);
            return ret;
        }
        if (packet->stream_index == in_vstream_idx) {
            // Send the packet to the decoder for decoding
            ret = avcodec_send_packet(dec_ctx, packet.get());
            if (ret < 0) {
                av_strerror(ret, errbuf, sizeof(errbuf));
                spdlog::critical("Error sending packet to decoder: {}", errbuf);
                return ret;
            }
            // Process frames decoded from the packet
            while (!proc_ctx->abort) {
                // Sleep for 100 ms if processing is paused
                if (proc_ctx->pause) {
                    std::this_thread::sleep_for(std::chrono::milliseconds(100));
                    continue;
                }
                // Receive the decoded frame from the decoder
                ret = avcodec_receive_frame(dec_ctx, frame.get());
                if (ret == AVERROR(EAGAIN)) {
                    // No more frames from this packet
                    break;
                } else if (ret < 0) {
                    av_strerror(ret, errbuf, sizeof(errbuf));
                    spdlog::critical("Error decoding video frame: {}", errbuf);
                    return ret;
                }
                AVFrame *raw_processed_frame = nullptr;
                // Process the frame based on the selected processing mode
                switch (processor->get_processing_mode()) {
                    case ProcessingMode::Filter: {
                        ret = process_filtering(
                            processor,
                            proc_ctx,
                            encoder,
                            benchmark,
                            frame.get(),
                            raw_processed_frame
                        );
                        break;
                    }
                    case ProcessingMode::Interpolate: {
                        ret = process_interpolation(
                            processor,
                            proc_cfg,
                            proc_ctx,
                            encoder,
                            benchmark,
                            prev_frame,
                            frame.get(),
                            raw_processed_frame
                        );
                        break;
                    }
                    default:
                        spdlog::critical("Unknown processing mode");
                        return -1;
                }
                if (ret < 0 && ret != AVERROR(EAGAIN)) {
                    return ret;
                }
                av_frame_unref(frame.get());
                proc_ctx->processed_frames++;
                spdlog::debug(
                    "Processed frame {}/{}", proc_ctx->processed_frames, proc_ctx->total_frames
                );
            }
        } else if (enc_cfg.copy_streams && stream_map[packet->stream_index] >= 0) {
            write_raw_packet(packet.get(), ifmt_ctx, ofmt_ctx, stream_map);
        }
        av_packet_unref(packet.get());
    }
    // Flush the filter
    std::vector<AVFrame *> raw_flushed_frames;
    ret = processor->flush(raw_flushed_frames);
    if (ret < 0) {
        av_strerror(ret, errbuf, sizeof(errbuf));
        spdlog::critical("Error flushing filter: {}", errbuf);
        return ret;
    }
    // Wrap flushed frames in unique_ptrs
    std::vector<std::unique_ptr<AVFrame, decltype(av_frame_deleter)>> flushed_frames;
    for (AVFrame *raw_frame : raw_flushed_frames) {
        flushed_frames.emplace_back(raw_frame, av_frame_deleter);
    }
    // Encode and write all flushed frames
    for (auto &flushed_frame : flushed_frames) {
        ret = write_frame(flushed_frame.get(), proc_ctx, encoder, benchmark);
        if (ret < 0) {
            return ret;
        }
        proc_ctx->processed_frames++;
    }
    // Flush the encoder
    ret = encoder.flush();
    if (ret < 0) {
        av_strerror(ret, errbuf, sizeof(errbuf));
        spdlog::critical("Error flushing encoder: {}", errbuf);
        return ret;
    }
    return ret;
 }
--- a/src/libvideo2x.cpp
+++ b/src/libvideo2x.cpp
@ -6,40 +6,40 @@ extern "C" {
 #include <spdlog/spdlog.h>
 #include "avutils.h"
 #include "decoder.h"
 #include "encoder.h"
-#include "frames_processor.h"
+#include "logging.h"
 #include "processor.h"
 #include "processor_factory.h"
-int process_video(
+VideoProcessor::VideoProcessor(
    const std::filesystem::path in_fname,
    const std::filesystem::path out_fname,
    const HardwareConfig hw_cfg,
    const ProcessorConfig proc_cfg,
-    EncoderConfig enc_cfg,
+    const EncoderConfig enc_cfg,
-    VideoProcessingContext *proc_ctx,
+    Video2xLogLevel log_level,
    Libvideo2xLogLevel log_level,
    bool benchmark
 )
    : hw_cfg_(hw_cfg), proc_cfg_(proc_cfg), enc_cfg_(enc_cfg), benchmark_(benchmark) {
    set_log_level(log_level);
 }
 int VideoProcessor::process(
    const std::filesystem::path in_fname,
    const std::filesystem::path out_fname
 ) {
    char errbuf[AV_ERROR_MAX_STRING_SIZE];
    int ret = 0;
    // Set the log level for FFmpeg and spdlog
    set_log_level(log_level);
    // Create a smart pointer to manage the hardware device context
-    auto hw_ctx_deleter = [](AVBufferRef *ref) {
+    std::unique_ptr<AVBufferRef, decltype(&av_bufferref_deleter)> hw_ctx(
-        if (ref != nullptr) {
+        nullptr, &av_bufferref_deleter
-            av_buffer_unref(&ref);
+    );
        }
    };
    std::unique_ptr<AVBufferRef, decltype(hw_ctx_deleter)> hw_ctx(nullptr, hw_ctx_deleter);
    // Initialize hardware device context
-    if (hw_cfg.hw_device_type != AV_HWDEVICE_TYPE_NONE) {
+    if (hw_cfg_.hw_device_type != AV_HWDEVICE_TYPE_NONE) {
        AVBufferRef *tmp_hw_ctx = nullptr;
-        ret = av_hwdevice_ctx_create(&tmp_hw_ctx, hw_cfg.hw_device_type, NULL, NULL, 0);
+        ret = av_hwdevice_ctx_create(&tmp_hw_ctx, hw_cfg_.hw_device_type, NULL, NULL, 0);
        if (ret < 0) {
            av_strerror(ret, errbuf, sizeof(errbuf));
            spdlog::critical("Error initializing hardware device context: {}", errbuf);
@ -50,7 +50,7 @@ int process_video(
    // Initialize input decoder
    Decoder decoder;
-    ret = decoder.init(hw_cfg.hw_device_type, hw_ctx.get(), in_fname);
+    ret = decoder.init(hw_cfg_.hw_device_type, hw_ctx.get(), in_fname);
    if (ret < 0) {
        av_strerror(ret, errbuf, sizeof(errbuf));
        spdlog::critical("Failed to initialize decoder: {}", errbuf);
@ -63,7 +63,7 @@ int process_video(
    // Create and initialize the appropriate filter
    std::unique_ptr<Processor> processor(
-        ProcessorFactory::instance().create_processor(proc_cfg, hw_cfg.vk_device_index)
+        ProcessorFactory::instance().create_processor(proc_cfg_, hw_cfg_.vk_device_index)
    );
    if (processor == nullptr) {
        spdlog::critical("Failed to create filter instance");
@ -73,21 +73,23 @@ int process_video(
    // Initialize output dimensions based on filter configuration
    int output_width = 0, output_height = 0;
    processor->get_output_dimensions(
-        proc_cfg, dec_ctx->width, dec_ctx->height, output_width, output_height
+        proc_cfg_, dec_ctx->width, dec_ctx->height, output_width, output_height
    );
    if (output_width <= 0 || output_height <= 0) {
        spdlog::critical("Failed to determine the output dimensions");
        return -1;
    }
-    // Update encoder configuration with output dimensions
+    // Update encoder output dimensions
-    enc_cfg.width = output_width;
+    enc_cfg_.width = output_width;
-    enc_cfg.height = output_height;
+    enc_cfg_.height = output_height;
    // Update encoder frame rate multiplier
    enc_cfg_.frm_rate_mul = proc_cfg_.frm_rate_mul;
    // Initialize the encoder
    Encoder encoder;
-    ret =
+    ret = encoder.init(hw_ctx.get(), out_fname, ifmt_ctx, dec_ctx, enc_cfg_, in_vstream_idx);
        encoder.init(hw_ctx.get(), out_fname, ifmt_ctx, dec_ctx, enc_cfg, proc_cfg, in_vstream_idx);
    if (ret < 0) {
        av_strerror(ret, errbuf, sizeof(errbuf));
        spdlog::critical("Failed to initialize encoder: {}", errbuf);
@ -102,7 +104,7 @@ int process_video(
    }
    // Process frames using the encoder and decoder
-    ret = process_frames(enc_cfg, proc_cfg, proc_ctx, decoder, encoder, processor.get(), benchmark);
+    ret = process_frames(decoder, encoder, processor);
    if (ret < 0) {
        av_strerror(ret, errbuf, sizeof(errbuf));
        spdlog::critical("Error processing frames: {}", errbuf);
@ -119,3 +121,310 @@ int process_video(
    }
    return 0;
 }
 // Process frames using the selected filter.
 int VideoProcessor::process_frames(
    Decoder &decoder,
    Encoder &encoder,
    std::unique_ptr<Processor> &processor
 ) {
    char errbuf[AV_ERROR_MAX_STRING_SIZE];
    int ret = 0;
    // Get required objects
    AVFormatContext *ifmt_ctx = decoder.get_format_context();
    AVCodecContext *dec_ctx = decoder.get_codec_context();
    int in_vstream_idx = decoder.get_video_stream_index();
    AVFormatContext *ofmt_ctx = encoder.get_format_context();
    int *stream_map = encoder.get_stream_map();
    // Reference to the previous frame does not require allocation
    // It will be cloned from the current frame
    std::unique_ptr<AVFrame, decltype(&av_frame_deleter)> prev_frame(nullptr, &av_frame_deleter);
    // Allocate space for the decoded frames
    std::unique_ptr<AVFrame, decltype(&av_frame_deleter)> frame(
        av_frame_alloc(), &av_frame_deleter
    );
    if (frame == nullptr) {
        spdlog::critical("Error allocating frame");
        return AVERROR(ENOMEM);
    }
    // Allocate space for the decoded packets
    std::unique_ptr<AVPacket, decltype(&av_packet_deleter)> packet(
        av_packet_alloc(), &av_packet_deleter
    );
    if (packet == nullptr) {
        spdlog::critical("Error allocating packet");
        return AVERROR(ENOMEM);
    }
    // Set the total number of frames in the VideoProcessingContext
    spdlog::debug("Estimating the total number of frames to process");
    total_frames_ = get_video_frame_count(ifmt_ctx, in_vstream_idx);
    if (total_frames_ <= 0) {
        spdlog::warn("Unable to determine the total number of frames");
        total_frames_ = 0;
    } else {
        spdlog::debug("{} frames to process", total_frames_.load());
    }
    // Set total frames for interpolation
    if (processor->get_processing_mode() == ProcessingMode::Interpolate) {
        total_frames_.store(total_frames_.load() * proc_cfg_.frm_rate_mul);
    }
    // Read frames from the input file
    while (!aborted_.load()) {
        ret = av_read_frame(ifmt_ctx, packet.get());
        if (ret < 0) {
            if (ret == AVERROR_EOF) {
                spdlog::debug("Reached end of file");
                break;
            }
            av_strerror(ret, errbuf, sizeof(errbuf));
            spdlog::critical("Error reading packet: {}", errbuf);
            return ret;
        }
        if (packet->stream_index == in_vstream_idx) {
            // Send the packet to the decoder for decoding
            ret = avcodec_send_packet(dec_ctx, packet.get());
            if (ret < 0) {
                av_strerror(ret, errbuf, sizeof(errbuf));
                spdlog::critical("Error sending packet to decoder: {}", errbuf);
                return ret;
            }
            // Process frames decoded from the packet
            while (!aborted_.load()) {
                // Sleep for 100 ms if processing is paused
                if (paused_.load()) {
                    std::this_thread::sleep_for(std::chrono::milliseconds(100));
                    continue;
                }
                // Receive the decoded frame from the decoder
                ret = avcodec_receive_frame(dec_ctx, frame.get());
                if (ret == AVERROR(EAGAIN)) {
                    // No more frames from this packet
                    break;
                } else if (ret < 0) {
                    av_strerror(ret, errbuf, sizeof(errbuf));
                    spdlog::critical("Error decoding video frame: {}", errbuf);
                    return ret;
                }
                // Process the frame based on the selected processing mode
                AVFrame *proc_frame = nullptr;
                switch (processor->get_processing_mode()) {
                    case ProcessingMode::Filter: {
                        ret = process_filtering(processor, encoder, frame.get(), proc_frame);
                        break;
                    }
                    case ProcessingMode::Interpolate: {
                        ret = process_interpolation(
                            processor, encoder, prev_frame, frame.get(), proc_frame
                        );
                        break;
                    }
                    default:
                        spdlog::critical("Unknown processing mode");
                        return -1;
                }
                if (ret < 0 && ret != AVERROR(EAGAIN)) {
                    return ret;
                }
                av_frame_unref(frame.get());
                frame_index_++;
                spdlog::debug("Processed frame {}/{}", frame_index_.load(), total_frames_.load());
            }
        } else if (enc_cfg_.copy_streams && stream_map[packet->stream_index] >= 0) {
            ret = write_raw_packet(packet.get(), ifmt_ctx, ofmt_ctx, stream_map);
            if (ret < 0) {
                return ret;
            }
        }
        av_packet_unref(packet.get());
    }
    // Flush the filter
    std::vector<AVFrame *> raw_flushed_frames;
    ret = processor->flush(raw_flushed_frames);
    if (ret < 0) {
        av_strerror(ret, errbuf, sizeof(errbuf));
        spdlog::critical("Error flushing filter: {}", errbuf);
        return ret;
    }
    // Wrap flushed frames in unique_ptrs
    std::vector<std::unique_ptr<AVFrame, decltype(&av_frame_deleter)>> flushed_frames;
    for (AVFrame *raw_frame : raw_flushed_frames) {
        flushed_frames.emplace_back(raw_frame, &av_frame_deleter);
    }
    // Encode and write all flushed frames
    for (auto &flushed_frame : flushed_frames) {
        ret = write_frame(flushed_frame.get(), encoder);
        if (ret < 0) {
            return ret;
        }
        frame_index_++;
    }
    // Flush the encoder
    ret = encoder.flush();
    if (ret < 0) {
        av_strerror(ret, errbuf, sizeof(errbuf));
        spdlog::critical("Error flushing encoder: {}", errbuf);
        return ret;
    }
    return ret;
 }
 int VideoProcessor::write_frame(AVFrame *frame, Encoder &encoder) {
    char errbuf[AV_ERROR_MAX_STRING_SIZE];
    int ret = 0;
    if (!benchmark_) {
        // Set the frame type to none to let the encoder decide
        frame->pict_type = AV_PICTURE_TYPE_NONE;
        ret = encoder.write_frame(frame, frame_index_);
        if (ret < 0) {
            av_strerror(ret, errbuf, sizeof(errbuf));
            spdlog::critical("Error encoding/writing frame: {}", errbuf);
        }
    }
    return ret;
 }
 int VideoProcessor::write_raw_packet(
    AVPacket *packet,
    AVFormatContext *ifmt_ctx,
    AVFormatContext *ofmt_ctx,
    int *stream_map
 ) {
    char errbuf[AV_ERROR_MAX_STRING_SIZE];
    int ret = 0;
    AVStream *in_stream = ifmt_ctx->streams[packet->stream_index];
    int out_stream_index = stream_map[packet->stream_index];
    AVStream *out_stream = ofmt_ctx->streams[out_stream_index];
    av_packet_rescale_ts(packet, in_stream->time_base, out_stream->time_base);
    packet->stream_index = out_stream_index;
    ret = av_interleaved_write_frame(ofmt_ctx, packet);
    if (ret < 0) {
        av_strerror(ret, errbuf, sizeof(errbuf));
        spdlog::critical("Error muxing audio/subtitle packet: {}", errbuf);
    }
    return ret;
 }
 int VideoProcessor::process_filtering(
    std::unique_ptr<Processor> &processor,
    Encoder &encoder,
    AVFrame *frame,
    AVFrame *proc_frame
 ) {
    char errbuf[AV_ERROR_MAX_STRING_SIZE];
    int ret = 0;
    // Cast the processor to a Filter
    Filter *filter = static_cast<Filter *>(processor.get());
    // Process the frame using the filter
    ret = filter->filter(frame, &proc_frame);
    // Write the processed frame
    if (ret < 0 && ret != AVERROR(EAGAIN)) {
        av_strerror(ret, errbuf, sizeof(errbuf));
        spdlog::critical("Error filtering frame: {}", errbuf);
    } else if (ret == 0 && proc_frame != nullptr) {
        auto processed_frame =
            std::unique_ptr<AVFrame, decltype(&av_frame_deleter)>(proc_frame, &av_frame_deleter);
        ret = write_frame(processed_frame.get(), encoder);
    }
    return ret;
 }
 int VideoProcessor::process_interpolation(
    std::unique_ptr<Processor> &processor,
    Encoder &encoder,
    std::unique_ptr<AVFrame, decltype(&av_frame_deleter)> &prev_frame,
    AVFrame *frame,
    AVFrame *proc_frame
 ) {
    char errbuf[AV_ERROR_MAX_STRING_SIZE];
    int ret = 0;
    // Cast the processor to an Interpolator
    Interpolator *interpolator = static_cast<Interpolator *>(processor.get());
    // Calculate the time step for each frame
    float time_step = 1.0f / static_cast<float>(proc_cfg_.frm_rate_mul);
    float current_time_step = time_step;
    // Check if a scene change is detected
    bool skip_frame = false;
    if (prev_frame.get() != nullptr) {
        float frame_diff = get_frame_diff(prev_frame.get(), frame);
        if (frame_diff > proc_cfg_.scn_det_thresh) {
            spdlog::debug(
                "Scene change detected ({:.2f}%), skipping frame {}",
                frame_diff,
                frame_index_.load()
            );
            skip_frame = true;
        }
    }
    // Write the interpolated frames
    for (int i = 0; i < proc_cfg_.frm_rate_mul - 1; i++) {
        // Skip interpolation if this is the first frame
        if (prev_frame == nullptr) {
            break;
        }
        // Get the interpolated frame from the interpolator
        if (!skip_frame) {
            ret =
                interpolator->interpolate(prev_frame.get(), frame, &proc_frame, current_time_step);
        } else {
            ret = 0;
            proc_frame = av_frame_clone(prev_frame.get());
        }
        // Write the interpolated frame
        if (ret < 0 && ret != AVERROR(EAGAIN)) {
            av_strerror(ret, errbuf, sizeof(errbuf));
            spdlog::critical("Error interpolating frame: {}", errbuf);
            return ret;
        } else if (ret == 0 && proc_frame != nullptr) {
            auto processed_frame = std::unique_ptr<AVFrame, decltype(&av_frame_deleter)>(
                proc_frame, &av_frame_deleter
            );
            processed_frame->pts = frame_index_;
            ret = write_frame(processed_frame.get(), encoder);
            if (ret < 0) {
                return ret;
            }
        }
        frame_index_++;
        current_time_step += time_step;
    }
    // Write the original frame
    frame->pts = frame_index_;
    ret = write_frame(frame, encoder);
    // Update the previous frame with the current frame
    prev_frame.reset(av_frame_clone(frame));
    return ret;
 }
--- a/src/logging.cpp
+++ b/src/logging.cpp
@ -6,33 +6,33 @@ extern "C" {
 #include <spdlog/spdlog.h>
-void set_log_level(Libvideo2xLogLevel log_level) {
+void set_log_level(Video2xLogLevel log_level) {
    switch (log_level) {
-        case Libvideo2xLogLevel::Trace:
+        case Video2xLogLevel::Trace:
            av_log_set_level(AV_LOG_TRACE);
            spdlog::set_level(spdlog::level::trace);
            break;
-        case Libvideo2xLogLevel::Debug:
+        case Video2xLogLevel::Debug:
            av_log_set_level(AV_LOG_DEBUG);
            spdlog::set_level(spdlog::level::debug);
            break;
-        case Libvideo2xLogLevel::Info:
+        case Video2xLogLevel::Info:
            av_log_set_level(AV_LOG_INFO);
            spdlog::set_level(spdlog::level::info);
            break;
-        case Libvideo2xLogLevel::Warning:
+        case Video2xLogLevel::Warning:
            av_log_set_level(AV_LOG_WARNING);
            spdlog::set_level(spdlog::level::warn);
            break;
-        case Libvideo2xLogLevel::Error:
+        case Video2xLogLevel::Error:
            av_log_set_level(AV_LOG_ERROR);
            spdlog::set_level(spdlog::level::err);
            break;
-        case Libvideo2xLogLevel::Critical:
+        case Video2xLogLevel::Critical:
            av_log_set_level(AV_LOG_FATAL);
            spdlog::set_level(spdlog::level::critical);
            break;
-        case Libvideo2xLogLevel::Off:
+        case Video2xLogLevel::Off:
            av_log_set_level(AV_LOG_QUIET);
            spdlog::set_level(spdlog::level::off);
            break;
--- a/tools/video2x/src/video2x.cpp
+++ b/tools/video2x/src/video2x.cpp
@ -9,7 +9,6 @@
 #include <cstring>
 #include <filesystem>
 #include <iostream>
 #include <mutex>
 #include <string>
 #include <thread>
 #include <unordered_set>
@ -50,12 +49,9 @@ namespace po = boost::program_options;
 // Indicate if a newline needs to be printed before the next output
 std::atomic<bool> newline_required = false;
 // Mutex for synchronizing access to VideoProcessingContext
 std::mutex proc_ctx_mutex;
 // Structure to hold parsed arguments
 struct Arguments {
-    Libvideo2xLogLevel log_level = Libvideo2xLogLevel::Info;
+    Video2xLogLevel log_level = Video2xLogLevel::Info;
    bool no_progress = false;
    // General options
@ -146,27 +142,27 @@ std::string wstring_to_u8string(const std::string &str) {
 }
 #endif
-void set_spdlog_level(Libvideo2xLogLevel log_level) {
+void set_spdlog_level(Video2xLogLevel log_level) {
    switch (log_level) {
-        case Libvideo2xLogLevel::Trace:
+        case Video2xLogLevel::Trace:
            spdlog::set_level(spdlog::level::trace);
            break;
-        case Libvideo2xLogLevel::Debug:
+        case Video2xLogLevel::Debug:
            spdlog::set_level(spdlog::level::debug);
            break;
-        case Libvideo2xLogLevel::Info:
+        case Video2xLogLevel::Info:
            spdlog::set_level(spdlog::level::info);
            break;
-        case Libvideo2xLogLevel::Warning:
+        case Video2xLogLevel::Warning:
            spdlog::set_level(spdlog::level::warn);
            break;
-        case Libvideo2xLogLevel::Error:
+        case Video2xLogLevel::Error:
            spdlog::set_level(spdlog::level::err);
            break;
-        case Libvideo2xLogLevel::Critical:
+        case Video2xLogLevel::Critical:
            spdlog::set_level(spdlog::level::critical);
            break;
-        case Libvideo2xLogLevel::Off:
+        case Video2xLogLevel::Off:
            spdlog::set_level(spdlog::level::off);
            break;
        default:
@ -175,18 +171,18 @@ void set_spdlog_level(Libvideo2xLogLevel log_level) {
    }
 }
-std::optional<Libvideo2xLogLevel> find_log_level_by_name(const StringType &log_level_name) {
+std::optional<Video2xLogLevel> find_log_level_by_name(const StringType &log_level_name) {
    // Static map to store the mapping
-    static const std::unordered_map<StringType, Libvideo2xLogLevel> LogLevelMap = {
+    static const std::unordered_map<StringType, Video2xLogLevel> LogLevelMap = {
-        {STR("trace"), Libvideo2xLogLevel::Trace},
+        {STR("trace"), Video2xLogLevel::Trace},
-        {STR("debug"), Libvideo2xLogLevel::Debug},
+        {STR("debug"), Video2xLogLevel::Debug},
-        {STR("info"), Libvideo2xLogLevel::Info},
+        {STR("info"), Video2xLogLevel::Info},
-        {STR("warning"), Libvideo2xLogLevel::Warning},
+        {STR("warning"), Video2xLogLevel::Warning},
-        {STR("warn"), Libvideo2xLogLevel::Warning},
+        {STR("warn"), Video2xLogLevel::Warning},
-        {STR("error"), Libvideo2xLogLevel::Error},
+        {STR("error"), Video2xLogLevel::Error},
-        {STR("critical"), Libvideo2xLogLevel::Critical},
+        {STR("critical"), Video2xLogLevel::Critical},
-        {STR("off"), Libvideo2xLogLevel::Off},
+        {STR("off"), Video2xLogLevel::Off},
-        {STR("none"), Libvideo2xLogLevel::Off}
+        {STR("none"), Video2xLogLevel::Off}
    };
    // Normalize the input to lowercase
@ -356,32 +352,6 @@ int get_vulkan_device_prop(uint32_t vk_device_index, VkPhysicalDeviceProperties
    return 0;
 }
 // Wrapper function for video processing thread
 void process_video_thread(
    Arguments *arguments,
    int *proc_ret,
    HardwareConfig hw_cfg,
    ProcessorConfig proc_cfg,
    EncoderConfig enc_cfg,
    VideoProcessingContext *proc_ctx
 ) {
    *proc_ret = process_video(
        arguments->in_fname,
        arguments->out_fname,
        hw_cfg,
        proc_cfg,
        enc_cfg,
        proc_ctx,
        arguments->log_level,
        arguments->benchmark
    );
    {
        std::lock_guard<std::mutex> lock(proc_ctx_mutex);
        proc_ctx->completed = true;
    }
 }
 #ifdef _WIN32
 int wmain(int argc, wchar_t *argv[]) {
    // Set console output code page to UTF-8
@ -552,7 +522,7 @@ int main(int argc, char **argv) {
        }
        if (vm.count("log-level")) {
-            std::optional<Libvideo2xLogLevel> log_level =
+            std::optional<Video2xLogLevel> log_level =
                find_log_level_by_name(vm["log-level"].as<StringType>());
            if (!log_level.has_value()) {
                spdlog::critical("Invalid log level specified.");
@ -772,22 +742,20 @@ int main(int argc, char **argv) {
        }
    }
-    // Setup struct to store processing context
+    // Create video processor object
-    VideoProcessingContext proc_ctx;
+    VideoProcessor video_processor =
-    proc_ctx.processed_frames = 0;
+        VideoProcessor(hw_cfg, proc_cfg, enc_cfg, arguments.log_level, arguments.benchmark);
    proc_ctx.total_frames = 0;
    proc_ctx.pause = false;
    proc_ctx.abort = false;
    proc_ctx.completed = false;
    // Register a newline-safe log callback for FFmpeg
    av_log_set_callback(newline_safe_ffmpeg_log_callback);
    // Create a thread for video processing
    int proc_ret = 0;
-    std::thread processing_thread(
+    std::atomic<bool> completed = false;  // Use atomic for thread-safe updates
-        process_video_thread, &arguments, &proc_ret, hw_cfg, proc_cfg, enc_cfg, &proc_ctx
+    std::thread processing_thread([&]() {
-    );
+        proc_ret = video_processor.process(arguments.in_fname, arguments.out_fname);
        completed.store(true, std::memory_order_relaxed);
    });
    spdlog::info("Press [space] to pause/resume, [q] to abort.");
    // Setup timer
@ -801,12 +769,7 @@ int main(int argc, char **argv) {
    // Main thread loop to display progress and handle input
    while (true) {
-        bool completed;
+        if (completed.load()) {
        {
            std::lock_guard<std::mutex> lock(proc_ctx_mutex);
            completed = proc_ctx.completed;
        }
        if (completed) {
            break;
        }
@ -825,9 +788,12 @@ int main(int argc, char **argv) {
        if (ch == ' ' || ch == '\n') {
            // Toggle pause state
            {
-                std::lock_guard<std::mutex> lock(proc_ctx_mutex);
+                if (video_processor.is_paused()) {
-                proc_ctx.pause = !proc_ctx.pause;
+                    video_processor.resume();
-                if (proc_ctx.pause) {
+                } else {
                    video_processor.pause();
                }
                if (video_processor.is_paused()) {
                    std::cout
                        << "\r\033[KProcessing paused; press [space] to resume, [q] to abort.";
                    std::cout.flush();
@ -846,8 +812,7 @@ int main(int argc, char **argv) {
            }
            spdlog::warn("Aborting gracefully; press Ctrl+C to terminate forcefully.");
            {
-                std::lock_guard<std::mutex> lock(proc_ctx_mutex);
+                video_processor.abort();
                proc_ctx.abort = true;
                newline_required = false;
            }
            break;
@ -856,14 +821,13 @@ int main(int argc, char **argv) {
        // Display progress
        if (!arguments.no_progress) {
            int64_t processed_frames, total_frames;
-            bool pause;
+            bool paused;
            {
-                std::lock_guard<std::mutex> lock(proc_ctx_mutex);
+                processed_frames = video_processor.get_processed_frames();
-                processed_frames = proc_ctx.processed_frames;
+                total_frames = video_processor.get_total_frames();
-                total_frames = proc_ctx.total_frames;
+                paused = video_processor.is_paused();
                pause = proc_ctx.pause;
            }
-            if (!pause && (total_frames > 0 || processed_frames > 0)) {
+            if (!paused && (total_frames > 0 || processed_frames > 0)) {
                double percentage = total_frames > 0 ? static_cast<double>(processed_frames) *
                                                           100.0 / static_cast<double>(total_frames)
                                                     : 0.0;
@ -919,12 +883,7 @@ int main(int argc, char **argv) {
    }
    // Print final message based on processing result
-    bool aborted;
+    if (video_processor.is_aborted()) {
    {
        std::lock_guard<std::mutex> lock(proc_ctx_mutex);
        aborted = proc_ctx.abort;
    }
    if (aborted) {
        spdlog::warn("Video processing aborted");
        return 2;
    } else if (proc_ret != 0) {
@ -935,11 +894,7 @@ int main(int argc, char **argv) {
    }
    // Calculate statistics
-    int64_t processed_frames;
+    int64_t processed_frames = video_processor.get_processed_frames();
    {
        std::lock_guard<std::mutex> lock(proc_ctx_mutex);
        processed_frames = proc_ctx.processed_frames;
    }
    int time_elapsed = static_cast<int>(timer.get_elapsed_time() / 1000);
    int hours_elapsed = time_elapsed / 3600;
    int minutes_elapsed = (time_elapsed % 3600) / 60;