diff --git a/CMakeLists.txt b/CMakeLists.txt
index ef450f2..26361bb 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,6 +1,10 @@
 cmake_minimum_required(VERSION 3.10)
 project(video2x VERSION 6.1.1 LANGUAGES CXX)
 
+if(POLICY CMP0167)
+  cmake_policy(SET CMP0167 NEW)
+endif()
+
 # Set the C++ standard
 set(CMAKE_CXX_STANDARD 17)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
@@ -13,9 +17,9 @@ endif()
 
 # Set the default optimization flags for Release builds
 if(CMAKE_BUILD_TYPE STREQUAL "Release")
-    if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
+    if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
         set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Ox /GL /LTCG /MD /DNDEBUG")
-    elseif (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+    elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
         set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3 -march=native -flto")
         set(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} -s")
         set(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} -s")
@@ -23,9 +27,9 @@ if(CMAKE_BUILD_TYPE STREQUAL "Release")
 endif()
 
 # Set global compile options for all targets
-if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
+if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
     add_compile_options(/W4 /permissive-)
-elseif (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
     add_compile_options(-Wall -Wextra -Wpedantic -Wconversion -Wshadow)
 endif()
 
@@ -70,7 +74,7 @@ if(WIN32)
     set(SPIRV_BUILD_PATH
         ${CMAKE_BINARY_DIR}/realesrgan-prefix/src/realesrgan-build/ncnn/glslang/SPIRV
     )
-    if (CMAKE_BUILD_TYPE STREQUAL "Release")
+    if(CMAKE_BUILD_TYPE STREQUAL "Release")
         set(SPIRV_LIB ${SPIRV_BUILD_PATH}/Release/SPIRV.lib)
     else()
         set(SPIRV_LIB ${SPIRV_BUILD_PATH}/Debug/SPIRVd.lib)
@@ -203,7 +207,7 @@ else()
 endif()
 
 # spdlog
-if (USE_SYSTEM_SPDLOG)
+if(USE_SYSTEM_SPDLOG)
     find_package(spdlog REQUIRED)
     list(APPEND ALL_INCLUDE_DIRS ${spdlog_INCLUDE_DIRS})
     set(SPDLOG_LIB spdlog::spdlog)
@@ -214,7 +218,7 @@ endif()
 list(APPEND ALL_LIBRARIES ${SPDLOG_LIB})
 
 # Boost
-if (USE_SYSTEM_BOOST)
+if(USE_SYSTEM_BOOST)
     find_package(Boost REQUIRED COMPONENTS program_options)
     list(APPEND ALL_INCLUDE_DIRS ${Boost_INCLUDE_DIRS})
 else()
@@ -228,7 +232,7 @@ else()
 endif()
 set(BOOST_LIB Boost::program_options)
 
-if (BUILD_VIDEO2X_CLI)
+if(BUILD_VIDEO2X_CLI)
     find_package(Vulkan REQUIRED)
     set(VULKAN_LIB Vulkan::Vulkan)
 endif()
@@ -295,7 +299,7 @@ endif()
 target_link_libraries(libvideo2x PRIVATE ${ALL_LIBRARIES})
 
 if(NOT WIN32)
-    if (USE_SYSTEM_NCNN)
+    if(USE_SYSTEM_NCNN)
         target_link_libraries(libvideo2x PUBLIC ncnn)
     else()
         target_link_libraries(libvideo2x PRIVATE ncnn)
@@ -303,7 +307,7 @@ if(NOT WIN32)
 endif()
 
 # Create the executable 'video2x'
-if (BUILD_VIDEO2X_CLI)
+if(BUILD_VIDEO2X_CLI)
     file(GLOB VIDEO2X_SOURCES tools/video2x/src/*.cpp)
     add_executable(video2x ${VIDEO2X_SOURCES})
     set_target_properties(video2x PROPERTIES OUTPUT_NAME video2x)
diff --git a/include/libvideo2x/decoder.h b/include/libvideo2x/decoder.h
index e6ed9f0..55e6e1c 100644
--- a/include/libvideo2x/decoder.h
+++ b/include/libvideo2x/decoder.h
@@ -8,13 +8,25 @@ extern "C" {
 #include <libavformat/avformat.h>
 }
 
-int init_decoder(
-    AVHWDeviceType hw_type,
-    AVBufferRef *hw_ctx,
-    std::filesystem::path in_fpath,
-    AVFormatContext **fmt_ctx,
-    AVCodecContext **dec_ctx,
-    int *in_vstream_idx
-);
+class Decoder {
+   public:
+    Decoder();
+    ~Decoder();
+
+    int init(AVHWDeviceType hw_type, AVBufferRef *hw_ctx, const std::filesystem::path &in_fpath);
+
+    AVFormatContext *get_format_context() const;
+    AVCodecContext *get_codec_context() const;
+    int get_video_stream_index() const;
+
+   private:
+    static enum AVPixelFormat hw_pix_fmt_;
+    static enum AVPixelFormat
+    get_hw_format(AVCodecContext *ctx, const enum AVPixelFormat *pix_fmts);
+
+    AVFormatContext *fmt_ctx_;
+    AVCodecContext *dec_ctx_;
+    int in_vstream_idx_;
+};
 
 #endif  // DECODER_H
diff --git a/include/libvideo2x/encoder.h b/include/libvideo2x/encoder.h
index 13d7814..586e267 100644
--- a/include/libvideo2x/encoder.h
+++ b/include/libvideo2x/encoder.h
@@ -1,37 +1,43 @@
 #ifndef ENCODER_H
 #define ENCODER_H
 
+#include <cstdint>
 #include <filesystem>
 
 extern "C" {
-#include <libavcodec/avcodec.h>
 #include <libavformat/avformat.h>
-#include <libavutil/opt.h>
+#include <libavutil/pixdesc.h>
 }
 
-#include "libvideo2x.h"
+#include "libvideo2x/libvideo2x.h"
 
-int init_encoder(
-    AVBufferRef *hw_ctx,
-    std::filesystem::path out_fpath,
-    AVFormatContext *ifmt_ctx,
-    AVFormatContext **ofmt_ctx,
-    AVCodecContext **enc_ctx,
-    AVCodecContext *dec_ctx,
-    EncoderConfig *encoder_config,
-    int in_vstream_idx,
-    int *out_vstream_idx,
-    int **stream_map
-);
+class Encoder {
+   public:
+    Encoder();
+    ~Encoder();
 
-int write_frame(
-    AVFrame *frame,
-    AVCodecContext *enc_ctx,
-    AVFormatContext *ofmt_ctx,
-    int out_vstream_idx,
-    int64_t frame_idx
-);
+    int init(
+        AVBufferRef *hw_ctx,
+        const std::filesystem::path &out_fpath,
+        AVFormatContext *ifmt_ctx,
+        AVCodecContext *dec_ctx,
+        EncoderConfig *encoder_config,
+        int in_vstream_idx
+    );
 
-int flush_encoder(AVCodecContext *enc_ctx, AVFormatContext *ofmt_ctx, int out_vstream_idx);
+    int write_frame(AVFrame *frame, int64_t frame_idx);
+    int flush();
+
+    AVCodecContext *get_encoder_context() const;
+    AVFormatContext *get_format_context() const;
+    int *get_stream_map() const;
+    int get_output_video_stream_index() const;
+
+   private:
+    AVFormatContext *ofmt_ctx_;
+    AVCodecContext *enc_ctx_;
+    int out_vstream_idx_;
+    int *stream_map_;
+};
 
 #endif  // ENCODER_H
diff --git a/src/decoder.cpp b/src/decoder.cpp
index b6dd71d..1f63e30 100644
--- a/src/decoder.cpp
+++ b/src/decoder.cpp
@@ -1,17 +1,25 @@
 #include "decoder.h"
 
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
 #include <spdlog/spdlog.h>
 
-static enum AVPixelFormat hw_pix_fmt = AV_PIX_FMT_NONE;
+enum AVPixelFormat Decoder::hw_pix_fmt_ = AV_PIX_FMT_NONE;
 
-// Callback function to choose the hardware-accelerated pixel format
-static enum AVPixelFormat get_hw_format(AVCodecContext *_, const enum AVPixelFormat *pix_fmts) {
+Decoder::Decoder() : fmt_ctx_(nullptr), dec_ctx_(nullptr), in_vstream_idx_(-1) {}
+
+Decoder::~Decoder() {
+    if (dec_ctx_) {
+        avcodec_free_context(&dec_ctx_);
+        dec_ctx_ = nullptr;
+    }
+    if (fmt_ctx_) {
+        avformat_close_input(&fmt_ctx_);
+        fmt_ctx_ = nullptr;
+    }
+}
+
+enum AVPixelFormat Decoder::get_hw_format(AVCodecContext *_, const enum AVPixelFormat *pix_fmts) {
     for (const enum AVPixelFormat *p = pix_fmts; *p != AV_PIX_FMT_NONE; p++) {
-        if (*p == hw_pix_fmt) {
+        if (*p == hw_pix_fmt_) {
             return *p;
         }
     }
@@ -19,39 +27,36 @@ static enum AVPixelFormat get_hw_format(AVCodecContext *_, const enum AVPixelFor
     return AV_PIX_FMT_NONE;
 }
 
-int init_decoder(
+int Decoder::init(
     AVHWDeviceType hw_type,
     AVBufferRef *hw_ctx,
-    std::filesystem::path in_fpath,
-    AVFormatContext **fmt_ctx,
-    AVCodecContext **dec_ctx,
-    int *in_vstream_idx
+    const std::filesystem::path &in_fpath
 ) {
-    AVFormatContext *ifmt_ctx = NULL;
-    AVCodecContext *codec_ctx = NULL;
     int ret;
 
-    if ((ret = avformat_open_input(&ifmt_ctx, in_fpath.u8string().c_str(), NULL, NULL)) < 0) {
-        spdlog::error("Could not open input file '{}'", in_fpath.u8string().c_str());
+    // Open the input file
+    if ((ret = avformat_open_input(&fmt_ctx_, in_fpath.u8string().c_str(), nullptr, nullptr)) < 0) {
+        spdlog::error("Could not open input file '{}'", in_fpath.u8string());
         return ret;
     }
 
-    if ((ret = avformat_find_stream_info(ifmt_ctx, NULL)) < 0) {
+    // Retrieve stream information
+    if ((ret = avformat_find_stream_info(fmt_ctx_, nullptr)) < 0) {
         spdlog::error("Failed to retrieve input stream information");
         return ret;
     }
 
     // Find the first video stream
-    ret = av_find_best_stream(ifmt_ctx, AVMEDIA_TYPE_VIDEO, -1, -1, NULL, 0);
+    ret = av_find_best_stream(fmt_ctx_, AVMEDIA_TYPE_VIDEO, -1, -1, nullptr, 0);
     if (ret < 0) {
         spdlog::error("Could not find video stream in the input file");
         return ret;
     }
 
     int stream_index = ret;
-    AVStream *video_stream = ifmt_ctx->streams[stream_index];
+    AVStream *video_stream = fmt_ctx_->streams[stream_index];
 
-    // Set up the decoder
+    // Find the decoder for the video stream
     const AVCodec *decoder = avcodec_find_decoder(video_stream->codecpar->codec_id);
     if (!decoder) {
         spdlog::error(
@@ -61,16 +66,28 @@ int init_decoder(
         return AVERROR_DECODER_NOT_FOUND;
     }
 
-    codec_ctx = avcodec_alloc_context3(decoder);
-    if (!codec_ctx) {
+    // Allocate the decoder context
+    dec_ctx_ = avcodec_alloc_context3(decoder);
+    if (!dec_ctx_) {
         spdlog::error("Failed to allocate the decoder context");
         return AVERROR(ENOMEM);
     }
 
+    // Copy codec parameters from input stream to decoder context
+    if ((ret = avcodec_parameters_to_context(dec_ctx_, video_stream->codecpar)) < 0) {
+        spdlog::error("Failed to copy decoder parameters to input decoder context");
+        return ret;
+    }
+
+    // Set the time base and frame rate
+    dec_ctx_->time_base = video_stream->time_base;
+    dec_ctx_->pkt_timebase = video_stream->time_base;
+    dec_ctx_->framerate = av_guess_frame_rate(fmt_ctx_, video_stream, nullptr);
+
     // Set hardware device context
     if (hw_ctx != nullptr) {
-        codec_ctx->hw_device_ctx = av_buffer_ref(hw_ctx);
-        codec_ctx->get_format = get_hw_format;
+        dec_ctx_->hw_device_ctx = av_buffer_ref(hw_ctx);
+        dec_ctx_->get_format = get_hw_format;
 
         // Automatically determine the hardware pixel format
         for (int i = 0;; i++) {
@@ -81,36 +98,35 @@ int init_decoder(
                     decoder->name,
                     av_hwdevice_get_type_name(hw_type)
                 );
-                avcodec_free_context(&codec_ctx);
-                avformat_close_input(&ifmt_ctx);
                 return AVERROR(ENOSYS);
             }
             if (config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX &&
                 config->device_type == hw_type) {
-                hw_pix_fmt = config->pix_fmt;
+                hw_pix_fmt_ = config->pix_fmt;
                 break;
             }
         }
     }
 
-    if ((ret = avcodec_parameters_to_context(codec_ctx, video_stream->codecpar)) < 0) {
-        spdlog::error("Failed to copy decoder parameters to input decoder context");
-        return ret;
-    }
-
-    // Set decoder time base and frame rate
-    codec_ctx->time_base = video_stream->time_base;
-    codec_ctx->pkt_timebase = video_stream->time_base;
-    codec_ctx->framerate = av_guess_frame_rate(ifmt_ctx, video_stream, NULL);
-
-    if ((ret = avcodec_open2(codec_ctx, decoder, NULL)) < 0) {
+    // Open the decoder
+    if ((ret = avcodec_open2(dec_ctx_, decoder, nullptr)) < 0) {
         spdlog::error("Failed to open decoder for stream #{}", stream_index);
         return ret;
     }
 
-    *fmt_ctx = ifmt_ctx;
-    *dec_ctx = codec_ctx;
-    *in_vstream_idx = stream_index;
+    in_vstream_idx_ = stream_index;
 
     return 0;
 }
+
+AVFormatContext *Decoder::get_format_context() const {
+    return fmt_ctx_;
+}
+
+AVCodecContext *Decoder::get_codec_context() const {
+    return dec_ctx_;
+}
+
+int Decoder::get_video_stream_index() const {
+    return in_vstream_idx_;
+}
diff --git a/src/encoder.cpp b/src/encoder.cpp
index 78d4f9d..23fc7ac 100644
--- a/src/encoder.cpp
+++ b/src/encoder.cpp
@@ -1,138 +1,152 @@
 #include "encoder.h"
 
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <cstdint>
+#include <spdlog/spdlog.h>
 
 extern "C" {
-#include <libavutil/pixdesc.h>
+#include <libavutil/opt.h>
 }
 
-#include <spdlog/spdlog.h>
-
 #include "avutils.h"
 #include "conversions.h"
 
-int init_encoder(
+Encoder::Encoder()
+    : ofmt_ctx_(nullptr), enc_ctx_(nullptr), out_vstream_idx_(-1), stream_map_(nullptr) {}
+
+Encoder::~Encoder() {
+    if (enc_ctx_) {
+        avcodec_free_context(&enc_ctx_);
+    }
+    if (ofmt_ctx_) {
+        if (!(ofmt_ctx_->oformat->flags & AVFMT_NOFILE)) {
+            avio_closep(&ofmt_ctx_->pb);
+        }
+        avformat_free_context(ofmt_ctx_);
+    }
+    if (stream_map_) {
+        av_free(stream_map_);
+    }
+}
+
+int Encoder::init(
     AVBufferRef *hw_ctx,
-    std::filesystem::path out_fpath,
+    const std::filesystem::path &out_fpath,
     AVFormatContext *ifmt_ctx,
-    AVFormatContext **ofmt_ctx,
-    AVCodecContext **enc_ctx,
     AVCodecContext *dec_ctx,
     EncoderConfig *encoder_config,
-    int in_vstream_idx,
-    int *out_vstream_idx,
-    int **stream_map
+    int in_vstream_idx
 ) {
-    AVFormatContext *fmt_ctx = NULL;
-    AVCodecContext *codec_ctx = NULL;
     int ret;
 
-    avformat_alloc_output_context2(&fmt_ctx, NULL, NULL, out_fpath.u8string().c_str());
-    if (!fmt_ctx) {
+    // Allocate the output format context
+    avformat_alloc_output_context2(&ofmt_ctx_, nullptr, nullptr, out_fpath.u8string().c_str());
+    if (!ofmt_ctx_) {
         spdlog::error("Could not create output context");
         return AVERROR_UNKNOWN;
     }
 
+    // Find the encoder
     const AVCodec *encoder = avcodec_find_encoder(encoder_config->codec);
     if (!encoder) {
         spdlog::error(
-            "Required video encoder not found for vcodec {}",
-            avcodec_get_name(encoder_config->codec)
+            "Required video encoder not found for codec {}", avcodec_get_name(encoder_config->codec)
         );
         return AVERROR_ENCODER_NOT_FOUND;
     }
 
     // Create a new video stream in the output file
-    AVStream *out_vstream = avformat_new_stream(fmt_ctx, NULL);
+    AVStream *out_vstream = avformat_new_stream(ofmt_ctx_, nullptr);
     if (!out_vstream) {
         spdlog::error("Failed to allocate the output video stream");
         return AVERROR_UNKNOWN;
     }
-    *out_vstream_idx = out_vstream->index;
+    out_vstream_idx_ = out_vstream->index;
 
-    codec_ctx = avcodec_alloc_context3(encoder);
-    if (!codec_ctx) {
+    // Allocate the encoder context
+    enc_ctx_ = avcodec_alloc_context3(encoder);
+    if (!enc_ctx_) {
         spdlog::error("Failed to allocate the encoder context");
         return AVERROR(ENOMEM);
     }
 
     // Set hardware device context
     if (hw_ctx != nullptr) {
-        codec_ctx->hw_device_ctx = av_buffer_ref(hw_ctx);
+        enc_ctx_->hw_device_ctx = av_buffer_ref(hw_ctx);
     }
 
     // Set encoding parameters
-    codec_ctx->height = encoder_config->out_height;
-    codec_ctx->width = encoder_config->out_width;
-    codec_ctx->sample_aspect_ratio = dec_ctx->sample_aspect_ratio;
-    codec_ctx->bit_rate = encoder_config->bit_rate;
+    enc_ctx_->height = encoder_config->out_height;
+    enc_ctx_->width = encoder_config->out_width;
+    enc_ctx_->sample_aspect_ratio = dec_ctx->sample_aspect_ratio;
+    enc_ctx_->bit_rate = encoder_config->bit_rate;
 
     // Set the color properties
-    codec_ctx->color_range = dec_ctx->color_range;
-    codec_ctx->color_primaries = dec_ctx->color_primaries;
-    codec_ctx->color_trc = dec_ctx->color_trc;
-    codec_ctx->colorspace = dec_ctx->colorspace;
-    codec_ctx->chroma_sample_location = dec_ctx->chroma_sample_location;
+    enc_ctx_->color_range = dec_ctx->color_range;
+    enc_ctx_->color_primaries = dec_ctx->color_primaries;
+    enc_ctx_->color_trc = dec_ctx->color_trc;
+    enc_ctx_->colorspace = dec_ctx->colorspace;
+    enc_ctx_->chroma_sample_location = dec_ctx->chroma_sample_location;
 
     // Set the pixel format
     if (encoder_config->pix_fmt != AV_PIX_FMT_NONE) {
         // Use the specified pixel format
-        codec_ctx->pix_fmt = encoder_config->pix_fmt;
+        enc_ctx_->pix_fmt = encoder_config->pix_fmt;
     } else {
-        codec_ctx->pix_fmt = get_encoder_default_pix_fmt(encoder, dec_ctx->pix_fmt);
-        if (codec_ctx->pix_fmt == AV_PIX_FMT_NONE) {
+        // Automatically select the pixel format
+        enc_ctx_->pix_fmt = get_encoder_default_pix_fmt(encoder, dec_ctx->pix_fmt);
+        if (enc_ctx_->pix_fmt == AV_PIX_FMT_NONE) {
             spdlog::error("Could not get the default pixel format for the encoder");
             return AVERROR(EINVAL);
         }
-        spdlog::debug("Auto-selected pixel format: {}", av_get_pix_fmt_name(codec_ctx->pix_fmt));
+        spdlog::debug("Auto-selected pixel format: {}", av_get_pix_fmt_name(enc_ctx_->pix_fmt));
     }
 
     // Set the output video's time base
     if (dec_ctx->time_base.num > 0 && dec_ctx->time_base.den > 0) {
-        codec_ctx->time_base = dec_ctx->time_base;
+        enc_ctx_->time_base = dec_ctx->time_base;
     } else {
-        codec_ctx->time_base = av_inv_q(av_guess_frame_rate(ifmt_ctx, out_vstream, NULL));
+        enc_ctx_->time_base = av_inv_q(av_guess_frame_rate(ifmt_ctx, out_vstream, nullptr));
     }
 
     // Set the output video's frame rate
     if (dec_ctx->framerate.num > 0 && dec_ctx->framerate.den > 0) {
-        codec_ctx->framerate = dec_ctx->framerate;
+        enc_ctx_->framerate = dec_ctx->framerate;
     } else {
-        codec_ctx->framerate = av_guess_frame_rate(ifmt_ctx, out_vstream, NULL);
+        enc_ctx_->framerate = av_guess_frame_rate(ifmt_ctx, out_vstream, nullptr);
     }
 
     // Set the CRF and preset for any codecs that support it
     std::string crf_str = std::to_string(encoder_config->crf);
-    av_opt_set(codec_ctx->priv_data, "crf", crf_str.c_str(), 0);
-    av_opt_set(codec_ctx->priv_data, "preset", encoder_config->preset, 0);
+    av_opt_set(enc_ctx_->priv_data, "crf", crf_str.c_str(), 0);
+    av_opt_set(enc_ctx_->priv_data, "preset", encoder_config->preset, 0);
 
-    if (fmt_ctx->oformat->flags & AVFMT_GLOBALHEADER) {
-        codec_ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
+    // Use global headers if necessary
+    if (ofmt_ctx_->oformat->flags & AVFMT_GLOBALHEADER) {
+        enc_ctx_->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
     }
 
-    if ((ret = avcodec_open2(codec_ctx, encoder, NULL)) < 0) {
+    // Open the encoder
+    if ((ret = avcodec_open2(enc_ctx_, encoder, nullptr)) < 0) {
         spdlog::error("Cannot open video encoder");
         return ret;
     }
 
-    ret = avcodec_parameters_from_context(out_vstream->codecpar, codec_ctx);
+    // Copy encoder parameters to output video stream
+    ret = avcodec_parameters_from_context(out_vstream->codecpar, enc_ctx_);
     if (ret < 0) {
         spdlog::error("Failed to copy encoder parameters to output video stream");
         return ret;
     }
 
-    out_vstream->time_base = codec_ctx->time_base;
-    out_vstream->avg_frame_rate = codec_ctx->framerate;
-    out_vstream->r_frame_rate = codec_ctx->framerate;
+    out_vstream->time_base = enc_ctx_->time_base;
+    out_vstream->avg_frame_rate = enc_ctx_->framerate;
+    out_vstream->r_frame_rate = enc_ctx_->framerate;
 
+    // Copy other streams if necessary
     if (encoder_config->copy_streams) {
-        // Allocate the stream map
-        *stream_map =
-            reinterpret_cast<int *>(av_malloc_array(ifmt_ctx->nb_streams, sizeof(**stream_map)));
-        if (!*stream_map) {
+        // Allocate the stream mape frame o
+        stream_map_ =
+            reinterpret_cast<int *>(av_malloc_array(ifmt_ctx->nb_streams, sizeof(*stream_map_)));
+        if (!stream_map_) {
             spdlog::error("Could not allocate stream mapping");
             return AVERROR(ENOMEM);
         }
@@ -144,20 +158,20 @@ int init_encoder(
 
             // Skip the input video stream as it's already processed
             if (i == in_vstream_idx) {
-                (*stream_map)[i] = *out_vstream_idx;
+                stream_map_[i] = out_vstream_idx_;
                 continue;
             }
 
             // Map only audio and subtitle streams (skip other types)
             if (in_codecpar->codec_type != AVMEDIA_TYPE_AUDIO &&
                 in_codecpar->codec_type != AVMEDIA_TYPE_SUBTITLE) {
-                (*stream_map)[i] = -1;  // Stream not mapped
+                stream_map_[i] = -1;
                 spdlog::warn("Skipping unsupported stream type at index: {}", i);
                 continue;
             }
 
             // Create corresponding output stream for audio and subtitle streams
-            AVStream *out_stream = avformat_new_stream(fmt_ctx, NULL);
+            AVStream *out_stream = avformat_new_stream(ofmt_ctx_, nullptr);
             if (!out_stream) {
                 spdlog::error("Failed allocating output stream");
                 return AVERROR_UNKNOWN;
@@ -176,32 +190,23 @@ int init_encoder(
 
             // Map input stream index to output stream index
             spdlog::debug("Stream mapping: {} (in) -> {} (out)", i, out_stream->index);
-            (*stream_map)[i] = out_stream->index;
+            stream_map_[i] = out_stream->index;
         }
     }
 
     // Open the output file
-    if (!(fmt_ctx->oformat->flags & AVFMT_NOFILE)) {
-        ret = avio_open(&fmt_ctx->pb, out_fpath.u8string().c_str(), AVIO_FLAG_WRITE);
+    if (!(ofmt_ctx_->oformat->flags & AVFMT_NOFILE)) {
+        ret = avio_open(&ofmt_ctx_->pb, out_fpath.u8string().c_str(), AVIO_FLAG_WRITE);
         if (ret < 0) {
-            spdlog::error("Could not open output file '{}'", out_fpath.u8string().c_str());
+            spdlog::error("Could not open output file '{}'", out_fpath.u8string());
             return ret;
         }
     }
 
-    *ofmt_ctx = fmt_ctx;
-    *enc_ctx = codec_ctx;
-
     return 0;
 }
 
-int write_frame(
-    AVFrame *frame,
-    AVCodecContext *enc_ctx,
-    AVFormatContext *ofmt_ctx,
-    int out_vstream_idx,
-    int64_t frame_idx
-) {
+int Encoder::write_frame(AVFrame *frame, int64_t frame_idx) {
     AVFrame *converted_frame = nullptr;
     int ret;
 
@@ -211,13 +216,12 @@ int write_frame(
     }
 
     // Convert the frame to the encoder's pixel format if needed
-    if (frame->format != enc_ctx->pix_fmt) {
-        converted_frame = convert_avframe_pix_fmt(frame, enc_ctx->pix_fmt);
+    if (frame->format != enc_ctx_->pix_fmt) {
+        converted_frame = convert_avframe_pix_fmt(frame, enc_ctx_->pix_fmt);
         if (!converted_frame) {
             spdlog::error("Error converting frame to encoder's pixel format");
             return AVERROR_EXTERNAL;
         }
-
         converted_frame->pts = frame->pts;
     }
 
@@ -227,11 +231,12 @@ int write_frame(
         return AVERROR(ENOMEM);
     }
 
+    // Send the frame to the encoder
     if (converted_frame != nullptr) {
-        ret = avcodec_send_frame(enc_ctx, converted_frame);
+        ret = avcodec_send_frame(enc_ctx_, converted_frame);
         av_frame_free(&converted_frame);
     } else {
-        ret = avcodec_send_frame(enc_ctx, frame);
+        ret = avcodec_send_frame(enc_ctx_, frame);
     }
     if (ret < 0) {
         spdlog::error("Error sending frame to encoder");
@@ -239,8 +244,9 @@ int write_frame(
         return ret;
     }
 
+    // Receive packets from the encoder
     while (ret >= 0) {
-        ret = avcodec_receive_packet(enc_ctx, enc_pkt);
+        ret = avcodec_receive_packet(enc_ctx_, enc_pkt);
         if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
             av_packet_unref(enc_pkt);
             break;
@@ -252,12 +258,12 @@ int write_frame(
 
         // Rescale packet timestamps
         av_packet_rescale_ts(
-            enc_pkt, enc_ctx->time_base, ofmt_ctx->streams[out_vstream_idx]->time_base
+            enc_pkt, enc_ctx_->time_base, ofmt_ctx_->streams[out_vstream_idx_]->time_base
         );
-        enc_pkt->stream_index = out_vstream_idx;
+        enc_pkt->stream_index = out_vstream_idx_;
 
         // Write the packet
-        ret = av_interleaved_write_frame(ofmt_ctx, enc_pkt);
+        ret = av_interleaved_write_frame(ofmt_ctx_, enc_pkt);
         av_packet_unref(enc_pkt);
         if (ret < 0) {
             spdlog::error("Error muxing packet");
@@ -270,7 +276,7 @@ int write_frame(
     return 0;
 }
 
-int flush_encoder(AVCodecContext *enc_ctx, AVFormatContext *ofmt_ctx, int out_vstream_idx) {
+int Encoder::flush() {
     int ret;
     AVPacket *enc_pkt = av_packet_alloc();
     if (!enc_pkt) {
@@ -278,16 +284,17 @@ int flush_encoder(AVCodecContext *enc_ctx, AVFormatContext *ofmt_ctx, int out_vs
         return AVERROR(ENOMEM);
     }
 
-    ret = avcodec_send_frame(enc_ctx, NULL);
+    // Send a NULL frame to signal the encoder to flush
+    ret = avcodec_send_frame(enc_ctx_, nullptr);
     if (ret < 0) {
         spdlog::error("Error sending NULL frame to encoder during flush");
         av_packet_free(&enc_pkt);
         return ret;
     }
 
-    // Write the packets to the output file
+    // Receive and write packets until flushing is complete
     while (true) {
-        ret = avcodec_receive_packet(enc_ctx, enc_pkt);
+        ret = avcodec_receive_packet(enc_ctx_, enc_pkt);
         if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
             av_packet_unref(enc_pkt);
             break;
@@ -299,12 +306,12 @@ int flush_encoder(AVCodecContext *enc_ctx, AVFormatContext *ofmt_ctx, int out_vs
 
         // Rescale packet timestamps
         av_packet_rescale_ts(
-            enc_pkt, enc_ctx->time_base, ofmt_ctx->streams[out_vstream_idx]->time_base
+            enc_pkt, enc_ctx_->time_base, ofmt_ctx_->streams[out_vstream_idx_]->time_base
         );
-        enc_pkt->stream_index = out_vstream_idx;
+        enc_pkt->stream_index = out_vstream_idx_;
 
         // Write the packet
-        ret = av_interleaved_write_frame(ofmt_ctx, enc_pkt);
+        ret = av_interleaved_write_frame(ofmt_ctx_, enc_pkt);
         av_packet_unref(enc_pkt);
         if (ret < 0) {
             spdlog::error("Error muxing packet during flush");
@@ -316,3 +323,19 @@ int flush_encoder(AVCodecContext *enc_ctx, AVFormatContext *ofmt_ctx, int out_vs
     av_packet_free(&enc_pkt);
     return 0;
 }
+
+AVCodecContext *Encoder::get_encoder_context() const {
+    return enc_ctx_;
+}
+
+AVFormatContext *Encoder::get_format_context() const {
+    return ofmt_ctx_;
+}
+
+int Encoder::get_output_video_stream_index() const {
+    return out_vstream_idx_;
+}
+
+int *Encoder::get_stream_map() const {
+    return stream_map_;
+}
diff --git a/src/libvideo2x.cpp b/src/libvideo2x.cpp
index 555159f..c06cf4c 100644
--- a/src/libvideo2x.cpp
+++ b/src/libvideo2x.cpp
@@ -22,65 +22,51 @@ extern "C" {
 static int process_frames(
     EncoderConfig *encoder_config,
     VideoProcessingContext *proc_ctx,
-    AVFormatContext *ifmt_ctx,
-    AVFormatContext *ofmt_ctx,
-    AVCodecContext *dec_ctx,
-    AVCodecContext *enc_ctx,
+    Decoder &decoder,
+    Encoder &encoder,
     Filter *filter,
-    int in_vstream_idx,
-    int out_vstream_idx,
-    int *stream_map,
     bool benchmark = false
 ) {
-    int ret;
     char errbuf[AV_ERROR_MAX_STRING_SIZE];
-    std::vector<AVFrame *> flushed_frames;
+    int ret = 0;
 
-    // Get the total number of frames in the video with OpenCV
+    // Get required objects
+    AVFormatContext *ifmt_ctx = decoder.get_format_context();
+    AVCodecContext *dec_ctx = decoder.get_codec_context();
+    int in_vstream_idx = decoder.get_video_stream_index();
+    AVFormatContext *ofmt_ctx = encoder.get_format_context();
+    int *stream_map = encoder.get_stream_map();
+
+    // Get total number of frames
     spdlog::debug("Reading total number of frames");
     proc_ctx->total_frames = get_video_frame_count(ifmt_ctx, in_vstream_idx);
 
-    // Check if the total number of frames is still 0
     if (proc_ctx->total_frames <= 0) {
         spdlog::warn("Unable to determine the total number of frames");
     } else {
         spdlog::debug("{} frames to process", proc_ctx->total_frames);
     }
 
-    AVFrame *frame = av_frame_alloc();
-    if (frame == nullptr) {
+    // Allocate frame and packet
+    auto av_frame_deleter = [](AVFrame *frame) { av_frame_free(&frame); };
+    std::unique_ptr<AVFrame, decltype(av_frame_deleter)> frame(av_frame_alloc(), av_frame_deleter);
+    if (!frame) {
         ret = AVERROR(ENOMEM);
         return ret;
     }
 
-    AVPacket *packet = av_packet_alloc();
-    if (packet == nullptr) {
+    auto av_packet_deleter = [](AVPacket *packet) { av_packet_free(&packet); };
+    std::unique_ptr<AVPacket, decltype(av_packet_deleter)> packet(
+        av_packet_alloc(), av_packet_deleter
+    );
+    if (!packet) {
         spdlog::critical("Could not allocate AVPacket");
-        av_frame_free(&frame);
         return AVERROR(ENOMEM);
     }
 
-    // Lambda function for cleaning up resources
-    auto cleanup = [&]() {
-        if (frame) {
-            av_frame_free(&frame);
-            frame = nullptr;
-        }
-        if (packet) {
-            av_packet_free(&packet);
-            packet = nullptr;
-        }
-        for (AVFrame *&flushed_frame : flushed_frames) {
-            if (flushed_frame) {
-                av_frame_free(&flushed_frame);
-                flushed_frame = nullptr;
-            }
-        }
-    };
-
     // Read frames from the input file
     while (!proc_ctx->abort) {
-        ret = av_read_frame(ifmt_ctx, packet);
+        ret = av_read_frame(ifmt_ctx, packet.get());
         if (ret < 0) {
             if (ret == AVERROR_EOF) {
                 spdlog::debug("Reached end of file");
@@ -88,17 +74,15 @@ static int process_frames(
             }
             av_strerror(ret, errbuf, sizeof(errbuf));
             spdlog::critical("Error reading packet: {}", errbuf);
-            cleanup();
             return ret;
         }
 
         if (packet->stream_index == in_vstream_idx) {
-            ret = avcodec_send_packet(dec_ctx, packet);
+            ret = avcodec_send_packet(dec_ctx, packet.get());
             if (ret < 0) {
                 av_strerror(ret, errbuf, sizeof(errbuf));
                 spdlog::critical("Error sending packet to decoder: {}", errbuf);
-                av_packet_unref(packet);
-                cleanup();
+                av_packet_unref(packet.get());
                 return ret;
             }
 
@@ -108,49 +92,43 @@ static int process_frames(
                     continue;
                 }
 
-                ret = avcodec_receive_frame(dec_ctx, frame);
+                ret = avcodec_receive_frame(dec_ctx, frame.get());
                 if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
                     spdlog::debug("Frame not ready");
                     break;
                 } else if (ret < 0) {
                     av_strerror(ret, errbuf, sizeof(errbuf));
                     spdlog::critical("Error decoding video frame: {}", errbuf);
-                    av_packet_unref(packet);
-                    cleanup();
+                    av_packet_unref(packet.get());
                     return ret;
                 }
 
-                AVFrame *processed_frame = nullptr;
-                ret = filter->process_frame(frame, &processed_frame);
+                AVFrame *raw_processed_frame = nullptr;
+                ret = filter->process_frame(frame.get(), &raw_processed_frame);
+
                 if (ret < 0 && ret != AVERROR(EAGAIN)) {
                     av_strerror(ret, errbuf, sizeof(errbuf));
-                    av_frame_free(&processed_frame);
-                    av_packet_unref(packet);
-                    cleanup();
+                    av_packet_unref(packet.get());
                     return ret;
-                } else if (ret == 0 && processed_frame != nullptr) {
+                } else if (ret == 0 && raw_processed_frame != nullptr) {
+                    auto processed_frame = std::unique_ptr<AVFrame, decltype(av_frame_deleter)>(
+                        raw_processed_frame, av_frame_deleter
+                    );
+
                     if (!benchmark) {
-                        ret = write_frame(
-                            processed_frame,
-                            enc_ctx,
-                            ofmt_ctx,
-                            out_vstream_idx,
-                            proc_ctx->processed_frames
-                        );
+                        ret =
+                            encoder.write_frame(processed_frame.get(), proc_ctx->processed_frames);
                         if (ret < 0) {
                             av_strerror(ret, errbuf, sizeof(errbuf));
                             spdlog::critical("Error encoding/writing frame: {}", errbuf);
-                            av_frame_free(&processed_frame);
-                            av_packet_unref(packet);
-                            cleanup();
+                            av_packet_unref(packet.get());
                             return ret;
                         }
                     }
-                    av_frame_free(&processed_frame);
                     proc_ctx->processed_frames++;
                 }
 
-                av_frame_unref(frame);
+                av_frame_unref(frame.get());
                 spdlog::debug(
                     "Processed frame {}/{}", proc_ctx->processed_frames, proc_ctx->total_frames
                 );
@@ -160,58 +138,54 @@ static int process_frames(
             int out_stream_index = stream_map[packet->stream_index];
             AVStream *out_stream = ofmt_ctx->streams[out_stream_index];
 
-            av_packet_rescale_ts(packet, in_stream->time_base, out_stream->time_base);
+            av_packet_rescale_ts(packet.get(), in_stream->time_base, out_stream->time_base);
             packet->stream_index = out_stream_index;
 
-            ret = av_interleaved_write_frame(ofmt_ctx, packet);
+            ret = av_interleaved_write_frame(ofmt_ctx, packet.get());
             if (ret < 0) {
                 av_strerror(ret, errbuf, sizeof(errbuf));
                 spdlog::critical("Error muxing audio/subtitle packet: {}", errbuf);
-                av_packet_unref(packet);
-                cleanup();
+                av_packet_unref(packet.get());
                 return ret;
             }
         }
-        av_packet_unref(packet);
+        av_packet_unref(packet.get());
     }
 
     // Flush the filter
-    ret = filter->flush(flushed_frames);
+    std::vector<AVFrame *> raw_flushed_frames;
+    ret = filter->flush(raw_flushed_frames);
     if (ret < 0) {
         av_strerror(ret, errbuf, sizeof(errbuf));
         spdlog::critical("Error flushing filter: {}", errbuf);
-        cleanup();
         return ret;
     }
 
+    // Wrap flushed frames in unique_ptrs
+    std::vector<std::unique_ptr<AVFrame, decltype(av_frame_deleter)>> flushed_frames;
+    for (AVFrame *raw_frame : raw_flushed_frames) {
+        flushed_frames.emplace_back(raw_frame, av_frame_deleter);
+    }
+
     // Encode and write all flushed frames
-    for (AVFrame *&flushed_frame : flushed_frames) {
-        ret = write_frame(
-            flushed_frame, enc_ctx, ofmt_ctx, out_vstream_idx, proc_ctx->processed_frames
-        );
+    for (auto &flushed_frame : flushed_frames) {
+        ret = encoder.write_frame(flushed_frame.get(), proc_ctx->processed_frames);
         if (ret < 0) {
             av_strerror(ret, errbuf, sizeof(errbuf));
             spdlog::critical("Error encoding/writing flushed frame: {}", errbuf);
-            av_frame_free(&flushed_frame);
-            flushed_frame = nullptr;
-            cleanup();
             return ret;
         }
-        av_frame_free(&flushed_frame);
-        flushed_frame = nullptr;
         proc_ctx->processed_frames++;
     }
 
     // Flush the encoder
-    ret = flush_encoder(enc_ctx, ofmt_ctx, out_vstream_idx);
+    ret = encoder.flush();
     if (ret < 0) {
         av_strerror(ret, errbuf, sizeof(errbuf));
         spdlog::critical("Error flushing encoder: {}", errbuf);
-        cleanup();
         return ret;
     }
 
-    cleanup();
     return ret;
 }
 
@@ -226,55 +200,10 @@ extern "C" int process_video(
     EncoderConfig *encoder_config,
     VideoProcessingContext *proc_ctx
 ) {
-    AVFormatContext *ifmt_ctx = nullptr;
-    AVFormatContext *ofmt_ctx = nullptr;
-    AVCodecContext *dec_ctx = nullptr;
-    AVCodecContext *enc_ctx = nullptr;
-    AVBufferRef *hw_ctx = nullptr;
-    int *stream_map = nullptr;
-    Filter *filter = nullptr;
-    int in_vstream_idx = -1;
-    int out_vstream_idx = -1;
     char errbuf[AV_ERROR_MAX_STRING_SIZE];
     int ret = 0;
 
-    // Lambda function for cleaning up resources
-    auto cleanup = [&]() {
-        if (ifmt_ctx) {
-            avformat_close_input(&ifmt_ctx);
-            ifmt_ctx = nullptr;
-        }
-        if (ofmt_ctx && !(ofmt_ctx->oformat->flags & AVFMT_NOFILE)) {
-            avio_closep(&ofmt_ctx->pb);
-            ofmt_ctx->pb = nullptr;
-        }
-        if (ofmt_ctx) {
-            avformat_free_context(ofmt_ctx);
-            ofmt_ctx = nullptr;
-        }
-        if (dec_ctx) {
-            avcodec_free_context(&dec_ctx);
-            dec_ctx = nullptr;
-        }
-        if (enc_ctx) {
-            avcodec_free_context(&enc_ctx);
-            enc_ctx = nullptr;
-        }
-        if (hw_ctx) {
-            av_buffer_unref(&hw_ctx);
-            hw_ctx = nullptr;
-        }
-        if (stream_map) {
-            av_free(stream_map);
-            stream_map = nullptr;
-        }
-        if (filter) {
-            delete filter;
-            filter = nullptr;
-        }
-    };
-
-    // Set the log level for FFmpeg and spdlog (libvideo2x)
+    // Set the log level for FFmpeg and spdlog
     switch (log_level) {
         case LIBVIDEO2X_LOG_LEVEL_TRACE:
             av_log_set_level(AV_LOG_TRACE);
@@ -314,26 +243,38 @@ extern "C" int process_video(
     std::filesystem::path in_fpath(in_fname);
     std::filesystem::path out_fpath(out_fname);
 
+    auto hw_ctx_deleter = [](AVBufferRef *ref) {
+        if (ref) {
+            av_buffer_unref(&ref);
+        }
+    };
+    std::unique_ptr<AVBufferRef, decltype(hw_ctx_deleter)> hw_ctx(nullptr, hw_ctx_deleter);
+
     // Initialize hardware device context
     if (hw_type != AV_HWDEVICE_TYPE_NONE) {
-        ret = av_hwdevice_ctx_create(&hw_ctx, hw_type, NULL, NULL, 0);
+        AVBufferRef *tmp_hw_ctx = nullptr;
+        ret = av_hwdevice_ctx_create(&tmp_hw_ctx, hw_type, NULL, NULL, 0);
         if (ret < 0) {
             av_strerror(ret, errbuf, sizeof(errbuf));
             spdlog::critical("Error initializing hardware device context: {}", errbuf);
-            cleanup();
             return ret;
         }
+        hw_ctx.reset(tmp_hw_ctx);
     }
 
-    // Initialize input
-    ret = init_decoder(hw_type, hw_ctx, in_fpath, &ifmt_ctx, &dec_ctx, &in_vstream_idx);
+    // Initialize input decoder
+    Decoder decoder;
+    ret = decoder.init(hw_type, hw_ctx.get(), in_fpath);
     if (ret < 0) {
         av_strerror(ret, errbuf, sizeof(errbuf));
         spdlog::critical("Failed to initialize decoder: {}", errbuf);
-        cleanup();
         return ret;
     }
 
+    AVFormatContext *ifmt_ctx = decoder.get_format_context();
+    AVCodecContext *dec_ctx = decoder.get_codec_context();
+    int in_vstream_idx = decoder.get_video_stream_index();
+
     // Initialize output dimensions based on filter configuration
     int output_width = 0, output_height = 0;
     switch (filter_config->filter_type) {
@@ -347,116 +288,85 @@ extern "C" int process_video(
             break;
         default:
             spdlog::critical("Unknown filter type");
-            cleanup();
             return -1;
     }
     spdlog::debug("Output video dimensions: {}x{}", output_width, output_height);
 
-    // Initialize output encoder
+    // Update encoder configuration with output dimensions
     encoder_config->out_width = output_width;
     encoder_config->out_height = output_height;
-    ret = init_encoder(
-        hw_ctx,
-        out_fpath,
-        ifmt_ctx,
-        &ofmt_ctx,
-        &enc_ctx,
-        dec_ctx,
-        encoder_config,
-        in_vstream_idx,
-        &out_vstream_idx,
-        &stream_map
-    );
+
+    // Initialize the encoder
+    Encoder encoder;
+    ret = encoder.init(hw_ctx.get(), out_fpath, ifmt_ctx, dec_ctx, encoder_config, in_vstream_idx);
     if (ret < 0) {
         av_strerror(ret, errbuf, sizeof(errbuf));
         spdlog::critical("Failed to initialize encoder: {}", errbuf);
-        cleanup();
         return ret;
     }
 
     // Write the output file header
-    ret = avformat_write_header(ofmt_ctx, NULL);
+    ret = avformat_write_header(encoder.get_format_context(), NULL);
     if (ret < 0) {
         av_strerror(ret, errbuf, sizeof(errbuf));
         spdlog::critical("Error occurred when opening output file: {}", errbuf);
-        cleanup();
         return ret;
     }
 
     // Create and initialize the appropriate filter
+    std::unique_ptr<Filter> filter;
     if (filter_config->filter_type == FILTER_LIBPLACEBO) {
         const auto &config = filter_config->config.libplacebo;
         if (!config.shader_path) {
             spdlog::critical("Shader path must be provided for the libplacebo filter");
-            cleanup();
             return -1;
         }
-        filter = new LibplaceboFilter{
+        filter = std::make_unique<LibplaceboFilter>(
             vk_device_index,
             std::filesystem::path(config.shader_path),
             config.out_width,
             config.out_height
-        };
+        );
     } else if (filter_config->filter_type == FILTER_REALESRGAN) {
         const auto &config = filter_config->config.realesrgan;
         if (!config.model_name) {
             spdlog::critical("Model name must be provided for the RealESRGAN filter");
-            cleanup();
             return -1;
         }
-        filter = new RealesrganFilter{
+        filter = std::make_unique<RealesrganFilter>(
             static_cast<int>(vk_device_index),
             config.tta_mode,
             config.scaling_factor,
             config.model_name
-        };
+        );
     } else {
         spdlog::critical("Unknown filter type");
-        cleanup();
         return -1;
     }
 
     // Check if the filter instance was created successfully
     if (filter == nullptr) {
         spdlog::critical("Failed to create filter instance");
-        cleanup();
         return -1;
     }
 
     // Initialize the filter
-    ret = filter->init(dec_ctx, enc_ctx, hw_ctx);
+    ret = filter->init(dec_ctx, encoder.get_encoder_context(), hw_ctx.get());
     if (ret < 0) {
         spdlog::critical("Failed to initialize filter");
-        cleanup();
         return ret;
     }
 
-    // Process frames
-    ret = process_frames(
-        encoder_config,
-        proc_ctx,
-        ifmt_ctx,
-        ofmt_ctx,
-        dec_ctx,
-        enc_ctx,
-        filter,
-        in_vstream_idx,
-        out_vstream_idx,
-        stream_map,
-        benchmark
-    );
+    // Process frames using the encoder and decoder
+    ret = process_frames(encoder_config, proc_ctx, decoder, encoder, filter.get(), benchmark);
     if (ret < 0) {
         av_strerror(ret, errbuf, sizeof(errbuf));
         spdlog::critical("Error processing frames: {}", errbuf);
-        cleanup();
         return ret;
     }
 
     // Write the output file trailer
-    av_write_trailer(ofmt_ctx);
-
-    // Cleanup before returning
-    cleanup();
+    av_write_trailer(encoder.get_format_context());
 
     if (ret < 0 && ret != AVERROR_EOF) {
         av_strerror(ret, errbuf, sizeof(errbuf));