feat(*): added support for copying audio/subtitle streams and pause/abort (#1179)

* feat: added Makefile target for debian * fix: fixed Dockerfile installing the wrong package * feat: added hwaccel for encoder and decoder * feat: added benchmark mode * feat: removed hard-coded keyframe info * chore: cleaned up headers and organized code * style: cleaned up headers and includes * feat: added a progress bar for CLI * feat: removed atomicity requirements on processed frames * feat: added pause and abort for CLI * chore: updated default preset and crf settings * feat: added support for copying audio and subtitle streams * fix: fixed syntax issues for MSVC * fix: fixed audio/subtitle timestamp rescaling Signed-off-by: k4yt3x <i@k4yt3x.com>
2024-12-28 06:59:11 +00:00 · 2024-10-10 00:23:13 -07:00 · 2024-10-10 00:23:13 -07:00 · 37c2c4c647
commit 37c2c4c647
parent c7fa9c10e6
21 changed files with 731 additions and 322 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.10)
 project(video2x VERSION 6.0.0 LANGUAGES CXX C)

 # Set the C standard
-set(CMAKE_C_STANDARD 99)
+set(CMAKE_C_STANDARD 11)
 set(CMAKE_C_STANDARD_REQUIRED ON)

 # Set the C++ standard
--- a/4
+++ b/4
@ -20,7 +20,7 @@ WORKDIR /video2x

 # Build the package
 RUN makepkg -s --noconfirm \
-    && find /video2x -maxdepth 1 -name '*.pkg.tar.zst' | head -n 1 | \
+    && find /video2x -maxdepth 1 -name 'video2x-*.pkg.tar.zst' ! -name '*-debug-*' | head -n 1 | \
        xargs -I {} cp {} /tmp/video2x.pkg.tar.zst

 # stage 2: install wheels into the final image
@ -34,8 +34,6 @@ ENV VK_ICD_FILENAMES=/usr/share/vulkan/icd.d/nvidia_icd.json\
 :/usr/share/vulkan/icd.d/intel_icd.x86_64.json

 COPY --from=builder /tmp/video2x.pkg.tar.zst /video2x.pkg.tar.zst
-COPY . /video2x
-WORKDIR /video2x
 RUN pacman -Sy --noconfirm ffmpeg ncnn \
        nvidia-utils vulkan-radeon vulkan-intel vulkan-swrast \
    && pacman -U --noconfirm /video2x.pkg.tar.zst \
--- a/20
+++ b/20
@ -33,6 +33,26 @@ debug:
 	cmake --build $(BINDIR) --config Debug --parallel
 	cp $(BINDIR)/compile_commands.json .

+debian:
+	apt-get update
+	apt-get install -y --no-install-recommends \
+		build-essential cmake clang pkg-config \
+		libavcodec-dev \
+		libavdevice-dev \
+		libavfilter-dev \
+		libavformat-dev \
+		libavutil-dev \
+		libswscale-dev \
+		libvulkan-dev \
+		glslang-tools \
+		libomp-dev
+	cmake -B /tmp/build -S . -DUSE_SYSTEM_NCNN=OFF \
+		-DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ \
+		-DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/tmp/install \
+		-DINSTALL_BIN_DESTINATION=. -DINSTALL_INCLUDE_DESTINATION=include \
+		-DINSTALL_LIB_DESTINATION=. -DINSTALL_MODEL_DESTINATION=.
+	cmake --build /tmp/build --config Release --target install --parallel
+
 test-realesrgan:
 	LD_LIBRARY_PATH=$(BINDIR) $(BINDIR)/video2x -i data/standard-test.mp4 -o data/output.mp4 \
 		-f realesrgan -r 4 --model realesr-animevideov3
--- a/include/conversions.h
+++ b/include/conversions.h
@ -1,7 +1,10 @@
 #ifndef CONVERSIONS_H
 #define CONVERSIONS_H

-#include <libavutil/frame.h>
+extern "C" {
+#include <libswscale/swscale.h>
+}
+
 #include <mat.h>

 // Convert AVFrame to another pixel format
--- a/include/decoder.h
+++ b/include/decoder.h
@ -1,11 +1,14 @@
 #ifndef DECODER_H
 #define DECODER_H

+extern "C" {
 #include <libavcodec/avcodec.h>
-#include <libavfilter/avfilter.h>
 #include <libavformat/avformat.h>
+}

 int init_decoder(
+    AVHWDeviceType hw_type,
+    AVBufferRef *hw_ctx,
    const char *input_filename,
    AVFormatContext **fmt_ctx,
    AVCodecContext **dec_ctx,
--- a/include/encoder.h
+++ b/include/encoder.h
@ -1,20 +1,32 @@
 #ifndef ENCODER_H
 #define ENCODER_H

+extern "C" {
 #include <libavcodec/avcodec.h>
 #include <libavformat/avformat.h>
+#include <libavutil/opt.h>
+}

 #include "libvideo2x.h"

 int init_encoder(
+    AVBufferRef *hw_ctx,
    const char *output_filename,
+    AVFormatContext *ifmt_ctx,
    AVFormatContext **ofmt_ctx,
    AVCodecContext **enc_ctx,
    AVCodecContext *dec_ctx,
-    EncoderConfig *encoder_config
+    EncoderConfig *encoder_config,
+    int video_stream_index,
+    int **stream_mapping
 );

-int encode_and_write_frame(AVFrame *frame, AVCodecContext *enc_ctx, AVFormatContext *ofmt_ctx);
+int encode_and_write_frame(
+    AVFrame *frame,
+    AVCodecContext *enc_ctx,
+    AVFormatContext *ofmt_ctx,
+    int video_stream_index
+);

 int flush_encoder(AVCodecContext *enc_ctx, AVFormatContext *ofmt_ctx);

--- a/include/filter.h
+++ b/include/filter.h
@ -6,14 +6,15 @@
 extern "C" {
 #include <libavcodec/avcodec.h>
 #include <libavfilter/avfilter.h>
+#include <libavutil/buffer.h>
 }

 // Abstract base class for filters
 class Filter {
   public:
    virtual ~Filter() {}
-    virtual int init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx) = 0;
-    virtual AVFrame *process_frame(AVFrame *input_frame) = 0;
+    virtual int init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx, AVBufferRef *hw_ctx) = 0;
+    virtual int process_frame(AVFrame *input_frame, AVFrame **output_frame) = 0;
    virtual int flush(std::vector<AVFrame *> &processed_frames) = 0;
 };

--- a/include/getopt.h
+++ b/include/getopt.h
@ -22,7 +22,7 @@ struct option {
 #define required_argument 1
 #define optional_argument 2

-int getopt(int, char **, const char *);
+// int getopt(int, char **, const char *);
 int getopt_long(int, char **, const char *, const struct option *, int *);

 #ifdef __cplusplus
--- a/include/libplacebo.h
+++ b/include/libplacebo.h
@ -3,15 +3,16 @@

 #include <filesystem>

+extern "C" {
 #include <libavcodec/avcodec.h>
 #include <libavfilter/avfilter.h>
-#include <libavutil/buffer.h>
+}

 int init_libplacebo(
+    AVBufferRef *hw_ctx,
    AVFilterGraph **filter_graph,
    AVFilterContext **buffersrc_ctx,
    AVFilterContext **buffersink_ctx,
-    AVBufferRef **device_ctx,
    AVCodecContext *dec_ctx,
    int output_width,
    int output_height,
--- a/include/libplacebo_filter.h
+++ b/include/libplacebo_filter.h
@ -3,7 +3,11 @@

 #include <filesystem>

-#include <libavutil/buffer.h>
+extern "C" {
+#include <libavcodec/avcodec.h>
+#include <libavfilter/buffersink.h>
+#include <libavfilter/buffersrc.h>
+}

 #include "filter.h"

@ -13,7 +17,6 @@ class LibplaceboFilter : public Filter {
    AVFilterGraph *filter_graph;
    AVFilterContext *buffersrc_ctx;
    AVFilterContext *buffersink_ctx;
-    AVBufferRef *device_ctx;
    int output_width;
    int output_height;
    const std::filesystem::path shader_path;
@ -27,10 +30,10 @@ class LibplaceboFilter : public Filter {
    virtual ~LibplaceboFilter();

    // Initializes the filter with decoder and encoder contexts
-    int init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx) override;
+    int init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx, AVBufferRef *hw_ctx) override;

    // Processes an input frame and returns the processed frame
-    AVFrame *process_frame(AVFrame *input_frame) override;
+    int process_frame(AVFrame *input_frame, AVFrame **output_frame) override;

    // Flushes any remaining frames
    int flush(std::vector<AVFrame *> &processed_frames) override;
--- a/include/libvideo2x.h
+++ b/include/libvideo2x.h
@ -1,13 +1,10 @@
 #ifndef LIBVIDEO2X_H
 #define LIBVIDEO2X_H

-#include <libavutil/pixfmt.h>
+#include <stdbool.h>
 #include <stdint.h>
 #include <time.h>

-#include <libavcodec/avcodec.h>
-#include <libavcodec/codec_id.h>
-
 #ifdef _WIN32
 #ifdef LIBVIDEO2X_EXPORTS
 #define LIBVIDEO2X_API __declspec(dllexport)
@ -22,6 +19,9 @@
 extern "C" {
 #endif

+#include <libavcodec/avcodec.h>
+#include <libavformat/avformat.h>
+
 // Enum to specify filter type
 enum FilterType {
    FILTER_LIBPLACEBO,
@ -38,7 +38,7 @@ struct LibplaceboConfig {
 // Configuration for RealESRGAN filter
 struct RealESRGANConfig {
    int gpuid;
-    int tta_mode;
+    bool tta_mode;
    int scaling_factor;
    const char *model;
 };
@ -56,6 +56,7 @@ struct FilterConfig {
 struct EncoderConfig {
    int output_width;
    int output_height;
+    bool copy_streams;
    enum AVCodecID codec;
    enum AVPixelFormat pix_fmt;
    const char *preset;
@ -63,20 +64,25 @@ struct EncoderConfig {
    float crf;
 };

-// Processing status
-struct ProcessingStatus {
+// Video processing context
+struct VideoProcessingContext {
    int64_t processed_frames;
    int64_t total_frames;
    time_t start_time;
+    bool pause;
+    bool abort;
+    bool completed;
 };

 // C-compatible process_video function
 LIBVIDEO2X_API int process_video(
    const char *input_filename,
    const char *output_filename,
+    bool benchmark,
+    enum AVHWDeviceType hw_device_type,
    const struct FilterConfig *filter_config,
    struct EncoderConfig *encoder_config,
-    struct ProcessingStatus *status
+    struct VideoProcessingContext *proc_ctx
 );

 #ifdef __cplusplus
--- a/include/realesrgan_filter.h
+++ b/include/realesrgan_filter.h
@ -3,6 +3,10 @@

 #include <filesystem>

+extern "C" {
+#include <libavcodec/avcodec.h>
+}
+
 #include "filter.h"
 #include "realesrgan.h"

@ -35,10 +39,10 @@ class RealesrganFilter : public Filter {
    virtual ~RealesrganFilter();

    // Initializes the filter with decoder and encoder contexts
-    int init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx) override;
+    int init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx, AVBufferRef *hw_ctx) override;

    // Processes an input frame and returns the processed frame
-    AVFrame *process_frame(AVFrame *input_frame) override;
+    int process_frame(AVFrame *input_frame, AVFrame **output_frame) override;

    // Flushes any remaining frames (if necessary)
    int flush(std::vector<AVFrame *> &processed_frames) override;
--- a/src/conversions.cpp
+++ b/src/conversions.cpp
@ -1,17 +1,7 @@
-#include <cstdio>
-
-// FFmpeg includes
-extern "C" {
-#include <libavutil/frame.h>
-#include <libavutil/imgutils.h>
-#include <libswscale/swscale.h>
-}
-
-// ncnn includes
-#include <mat.h>
-
 #include "conversions.h"

+#include <cstdio>
+
 // Convert AVFrame format
 AVFrame *convert_avframe_pix_fmt(AVFrame *src_frame, AVPixelFormat pix_fmt) {
    AVFrame *dst_frame = av_frame_alloc();
@ -77,10 +67,11 @@ ncnn::Mat avframe_to_ncnn_mat(AVFrame *frame) {
        converted_frame = convert_avframe_pix_fmt(frame, AV_PIX_FMT_BGR24);
        if (!converted_frame) {
            fprintf(stderr, "Failed to convert AVFrame to BGR24.\n");
-            return ncnn::Mat();  // Return an empty ncnn::Mat on failure
+            return ncnn::Mat();
        }
    } else {
-        converted_frame = frame;  // If the frame is already in BGR24, use it directly
+        // If the frame is already in BGR24, use it directly
+        converted_frame = frame;
    }

    // Allocate a new ncnn::Mat and copy the data
@ -146,10 +137,7 @@ AVFrame *ncnn_mat_to_avframe(const ncnn::Mat &mat, AVPixelFormat pix_fmt) {
        return nullptr;
    }

-    // Copy data from ncnn::Mat to the BGR AVFrame
-    // mat.to_pixels(bgr_frame->data[0], ncnn::Mat::PIXEL_BGR);
-
-    // Manually copy the pixel data from ncnn::Mat to the BGR AVFrame
+    // Copy the pixel data from ncnn::Mat to the BGR AVFrame
    for (int y = 0; y < mat.h; y++) {
        uint8_t *dst_row = bgr_frame->data[0] + y * bgr_frame->linesize[0];
        const uint8_t *src_row = mat.row<const uint8_t>(y);
--- a/src/decoder.cpp
+++ b/src/decoder.cpp
@ -1,19 +1,25 @@
+#include "decoder.h"
+
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>

-extern "C" {
-#include <libavcodec/avcodec.h>
-#include <libavfilter/avfilter.h>
-#include <libavfilter/buffersink.h>
-#include <libavfilter/buffersrc.h>
-#include <libavformat/avformat.h>
-#include <libavutil/opt.h>
-#include <libavutil/pixdesc.h>
-#include <libavutil/rational.h>
+static enum AVPixelFormat hw_pix_fmt = AV_PIX_FMT_NONE;
+
+// Callback function to choose the hardware-accelerated pixel format
+static enum AVPixelFormat get_hw_format(AVCodecContext *ctx, const enum AVPixelFormat *pix_fmts) {
+    for (const enum AVPixelFormat *p = pix_fmts; *p != AV_PIX_FMT_NONE; p++) {
+        if (*p == hw_pix_fmt) {
+            return *p;
+        }
+    }
+    fprintf(stderr, "Failed to get HW surface format.\n");
+    return AV_PIX_FMT_NONE;
 }

 int init_decoder(
+    AVHWDeviceType hw_type,
+    AVBufferRef *hw_ctx,
    const char *input_filename,
    AVFormatContext **fmt_ctx,
    AVCodecContext **dec_ctx,
@ -44,18 +50,45 @@ int init_decoder(
    AVStream *video_stream = ifmt_ctx->streams[stream_index];

    // Set up the decoder
-    const AVCodec *dec = avcodec_find_decoder(video_stream->codecpar->codec_id);
-    if (!dec) {
+    const AVCodec *decoder = avcodec_find_decoder(video_stream->codecpar->codec_id);
+    if (!decoder) {
        fprintf(stderr, "Failed to find decoder for stream #%u\n", stream_index);
        return AVERROR_DECODER_NOT_FOUND;
    }

-    codec_ctx = avcodec_alloc_context3(dec);
+    codec_ctx = avcodec_alloc_context3(decoder);
    if (!codec_ctx) {
        fprintf(stderr, "Failed to allocate the decoder context\n");
        return AVERROR(ENOMEM);
    }

+    // Set hardware device context
+    if (hw_ctx != nullptr) {
+        codec_ctx->hw_device_ctx = av_buffer_ref(hw_ctx);
+        codec_ctx->get_format = get_hw_format;
+
+        // Automatically determine the hardware pixel format
+        for (int i = 0;; i++) {
+            const AVCodecHWConfig *config = avcodec_get_hw_config(decoder, i);
+            if (config == nullptr) {
+                fprintf(
+                    stderr,
+                    "Decoder %s does not support device type %s.\n",
+                    decoder->name,
+                    av_hwdevice_get_type_name(hw_type)
+                );
+                avcodec_free_context(&codec_ctx);
+                avformat_close_input(&ifmt_ctx);
+                return AVERROR(ENOSYS);
+            }
+            if (config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX &&
+                config->device_type == hw_type) {
+                hw_pix_fmt = config->pix_fmt;
+                break;
+            }
+        }
+    }
+
    if ((ret = avcodec_parameters_to_context(codec_ctx, video_stream->codecpar)) < 0) {
        fprintf(stderr, "Failed to copy decoder parameters to input decoder context\n");
        return ret;
@ -66,7 +99,7 @@ int init_decoder(
    codec_ctx->pkt_timebase = video_stream->time_base;
    codec_ctx->framerate = av_guess_frame_rate(ifmt_ctx, video_stream, NULL);

-    if ((ret = avcodec_open2(codec_ctx, dec, NULL)) < 0) {
+    if ((ret = avcodec_open2(codec_ctx, decoder, NULL)) < 0) {
        fprintf(stderr, "Failed to open decoder for stream #%u\n", stream_index);
        return ret;
    }
--- a/src/encoder.cpp
+++ b/src/encoder.cpp
@ -1,32 +1,34 @@
+#include "encoder.h"
+
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>

-extern "C" {
-#include <libavcodec/avcodec.h>
-#include <libavcodec/codec.h>
-#include <libavcodec/codec_id.h>
-#include <libavfilter/avfilter.h>
-#include <libavfilter/buffersink.h>
-#include <libavfilter/buffersrc.h>
-#include <libavformat/avformat.h>
-#include <libavutil/opt.h>
-#include <libavutil/pixdesc.h>
-#include <libavutil/rational.h>
+#include "conversions.h"
+
+static enum AVPixelFormat get_encoder_default_pix_fmt(const AVCodec *encoder) {
+    const enum AVPixelFormat *p = encoder->pix_fmts;
+    if (!p) {
+        fprintf(stderr, "No pixel formats supported by encoder\n");
+        return AV_PIX_FMT_NONE;
+    }
+    return *p;
 }

-#include "conversions.h"
-#include "libvideo2x.h"
-
 int init_encoder(
+    AVBufferRef *hw_ctx,
    const char *output_filename,
+    AVFormatContext *ifmt_ctx,
    AVFormatContext **ofmt_ctx,
    AVCodecContext **enc_ctx,
    AVCodecContext *dec_ctx,
-    EncoderConfig *encoder_config
+    EncoderConfig *encoder_config,
+    int video_stream_index,
+    int **stream_mapping
 ) {
    AVFormatContext *fmt_ctx = NULL;
    AVCodecContext *codec_ctx = NULL;
+    int stream_index = 0;
    int ret;

    avformat_alloc_output_context2(&fmt_ctx, NULL, NULL, output_filename);
@ -35,66 +37,130 @@ int init_encoder(
        return AVERROR_UNKNOWN;
    }

-    // Create a new video stream
-    const AVCodec *enc = avcodec_find_encoder(encoder_config->codec);
-    if (!enc) {
-        fprintf(stderr, "Necessary encoder not found\n");
+    const AVCodec *encoder = avcodec_find_encoder(encoder_config->codec);
+    if (!encoder) {
+        fprintf(
+            stderr,
+            "Required video encoder not found for vcodec %s\n",
+            avcodec_get_name(encoder_config->codec)
+        );
        return AVERROR_ENCODER_NOT_FOUND;
    }

+    // Create a new video stream in the output file
    AVStream *out_stream = avformat_new_stream(fmt_ctx, NULL);
    if (!out_stream) {
-        fprintf(stderr, "Failed allocating output stream\n");
+        fprintf(stderr, "Failed to allocate the output video stream\n");
        return AVERROR_UNKNOWN;
    }

-    codec_ctx = avcodec_alloc_context3(enc);
+    codec_ctx = avcodec_alloc_context3(encoder);
    if (!codec_ctx) {
        fprintf(stderr, "Failed to allocate the encoder context\n");
        return AVERROR(ENOMEM);
    }

+    // Set hardware device context
+    if (hw_ctx != nullptr) {
+        codec_ctx->hw_device_ctx = av_buffer_ref(hw_ctx);
+    }
+
    // Set encoding parameters
    codec_ctx->height = encoder_config->output_height;
    codec_ctx->width = encoder_config->output_width;
    codec_ctx->sample_aspect_ratio = dec_ctx->sample_aspect_ratio;
-    codec_ctx->pix_fmt = encoder_config->pix_fmt;
-    codec_ctx->time_base = av_inv_q(dec_ctx->framerate);
+    codec_ctx->bit_rate = encoder_config->bit_rate;

-    if (codec_ctx->time_base.num == 0 || codec_ctx->time_base.den == 0) {
-        codec_ctx->time_base = av_inv_q(av_guess_frame_rate(fmt_ctx, out_stream, NULL));
+    // Set the pixel format
+    if (encoder_config->pix_fmt != AV_PIX_FMT_NONE) {
+        // Use the specified pixel format
+        codec_ctx->pix_fmt = encoder_config->pix_fmt;
+    } else {
+        // Fall back to the default pixel format
+        codec_ctx->pix_fmt = get_encoder_default_pix_fmt(encoder);
+        if (codec_ctx->pix_fmt == AV_PIX_FMT_NONE) {
+            fprintf(stderr, "Could not get the default pixel format for the encoder\n");
+            return AVERROR(EINVAL);
+        }
    }

-    // Set the bit rate and other encoder parameters if needed
-    codec_ctx->bit_rate = encoder_config->bit_rate;
-    codec_ctx->gop_size = 60;     // Keyframe interval
-    codec_ctx->max_b_frames = 3;  // B-frames
-    codec_ctx->keyint_min = 60;   // Maximum GOP size
+    // Set the time base
+    codec_ctx->time_base = av_inv_q(dec_ctx->framerate);
+    if (codec_ctx->time_base.num == 0 || codec_ctx->time_base.den == 0) {
+        codec_ctx->time_base = av_inv_q(av_guess_frame_rate(ifmt_ctx, out_stream, NULL));
+    }

+    // Set the CRF and preset for any codecs that support it
    char crf_str[16];
    snprintf(crf_str, sizeof(crf_str), "%.f", encoder_config->crf);
-    if (encoder_config->codec == AV_CODEC_ID_H264 || encoder_config->codec == AV_CODEC_ID_HEVC) {
    av_opt_set(codec_ctx->priv_data, "crf", crf_str, 0);
    av_opt_set(codec_ctx->priv_data, "preset", encoder_config->preset, 0);
-    }

    if (fmt_ctx->oformat->flags & AVFMT_GLOBALHEADER) {
        codec_ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
    }

-    if ((ret = avcodec_open2(codec_ctx, enc, NULL)) < 0) {
+    if ((ret = avcodec_open2(codec_ctx, encoder, NULL)) < 0) {
        fprintf(stderr, "Cannot open video encoder\n");
        return ret;
    }

    ret = avcodec_parameters_from_context(out_stream->codecpar, codec_ctx);
    if (ret < 0) {
-        fprintf(stderr, "Failed to copy encoder parameters to output stream\n");
+        fprintf(stderr, "Failed to copy encoder parameters to output video stream\n");
        return ret;
    }

    out_stream->time_base = codec_ctx->time_base;

+    if (encoder_config->copy_streams) {
+        // Allocate the stream map
+        *stream_mapping = (int *)av_malloc_array(ifmt_ctx->nb_streams, sizeof(**stream_mapping));
+        if (!*stream_mapping) {
+            fprintf(stderr, "Could not allocate stream mapping\n");
+            return AVERROR(ENOMEM);
+        }
+
+        // Map the video stream
+        (*stream_mapping)[video_stream_index] = stream_index++;
+
+        // Loop through each stream in the input file
+        for (int i = 0; i < ifmt_ctx->nb_streams; i++) {
+            AVStream *in_stream = ifmt_ctx->streams[i];
+            AVCodecParameters *in_codecpar = in_stream->codecpar;
+
+            if (i == video_stream_index) {
+                // Video stream is already handled
+                continue;
+            }
+
+            if (in_codecpar->codec_type != AVMEDIA_TYPE_AUDIO &&
+                in_codecpar->codec_type != AVMEDIA_TYPE_SUBTITLE) {
+                (*stream_mapping)[i] = -1;
+                continue;
+            }
+
+            // Create corresponding output stream
+            AVStream *out_stream = avformat_new_stream(fmt_ctx, NULL);
+            if (!out_stream) {
+                fprintf(stderr, "Failed allocating output stream\n");
+                return AVERROR_UNKNOWN;
+            }
+
+            ret = avcodec_parameters_copy(out_stream->codecpar, in_codecpar);
+            if (ret < 0) {
+                fprintf(stderr, "Failed to copy codec parameters\n");
+                return ret;
+            }
+            out_stream->codecpar->codec_tag = 0;
+
+            // Copy time base
+            out_stream->time_base = in_stream->time_base;
+
+            (*stream_mapping)[i] = stream_index++;
+        }
+    }
+
    // Open the output file
    if (!(fmt_ctx->oformat->flags & AVFMT_NOFILE)) {
        ret = avio_open(&fmt_ctx->pb, output_filename, AVIO_FLAG_WRITE);
@ -110,7 +176,12 @@ int init_encoder(
    return 0;
 }

-int encode_and_write_frame(AVFrame *frame, AVCodecContext *enc_ctx, AVFormatContext *ofmt_ctx) {
+int encode_and_write_frame(
+    AVFrame *frame,
+    AVCodecContext *enc_ctx,
+    AVFormatContext *ofmt_ctx,
+    int video_stream_index
+) {
    int ret;

    // Convert the frame to the encoder's pixel format if needed
@ -144,14 +215,16 @@ int encode_and_write_frame(AVFrame *frame, AVCodecContext *enc_ctx, AVFormatCont
            av_packet_unref(enc_pkt);
            break;
        } else if (ret < 0) {
-            fprintf(stderr, "Error during encoding\n");
+            fprintf(stderr, "Error encoding frame\n");
            av_packet_free(&enc_pkt);
            return ret;
        }

        // Rescale packet timestamps
-        av_packet_rescale_ts(enc_pkt, enc_ctx->time_base, ofmt_ctx->streams[0]->time_base);
-        enc_pkt->stream_index = ofmt_ctx->streams[0]->index;
+        av_packet_rescale_ts(
+            enc_pkt, enc_ctx->time_base, ofmt_ctx->streams[video_stream_index]->time_base
+        );
+        enc_pkt->stream_index = video_stream_index;

        // Write the packet
        ret = av_interleaved_write_frame(ofmt_ctx, enc_pkt);
@ -182,7 +255,7 @@ int flush_encoder(AVCodecContext *enc_ctx, AVFormatContext *ofmt_ctx) {
            av_packet_unref(enc_pkt);
            break;
        } else if (ret < 0) {
-            fprintf(stderr, "Error during encoding\n");
+            fprintf(stderr, "Error encoding frame\n");
            av_packet_free(&enc_pkt);
            return ret;
        }
--- a/src/fsutils.cpp
+++ b/src/fsutils.cpp
@ -1,4 +1,4 @@
-#include <filesystem>
+#include "fsutils.h"

 #if _WIN32
 #include <windows.h>
@ -8,8 +8,6 @@
 #include <cstring>
 #endif

-#include "fsutils.h"
-
 #if _WIN32
 std::filesystem::path get_executable_directory() {
    std::vector<wchar_t> filepath(MAX_PATH);
--- a/src/libplacebo.cpp
+++ b/src/libplacebo.cpp
@ -1,28 +1,15 @@
+#include "libplacebo.h"
+
 #include <stdio.h>
 #include <stdlib.h>
-#include <filesystem>
-
-extern "C" {
-#include <libavcodec/avcodec.h>
-#include <libavfilter/avfilter.h>
-#include <libavfilter/buffersink.h>
-#include <libavfilter/buffersrc.h>
-#include <libavformat/avformat.h>
-#include <libavutil/buffer.h>
-#include <libavutil/hwcontext.h>
-#include <libavutil/opt.h>
-#include <libavutil/pixdesc.h>
-#include <libavutil/rational.h>
-#include <libswscale/swscale.h>
-}

 #include "fsutils.h"

 int init_libplacebo(
+    AVBufferRef *hw_ctx,
    AVFilterGraph **filter_graph,
    AVFilterContext **buffersrc_ctx,
    AVFilterContext **buffersink_ctx,
-    AVBufferRef **device_ctx,
    AVCodecContext *dec_ctx,
    int output_width,
    int output_height,
@ -31,14 +18,6 @@ int init_libplacebo(
    char args[512];
    int ret;

-    // Initialize the Vulkan hardware device
-    AVBufferRef *hw_device_ctx = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_VULKAN);
-    ret = av_hwdevice_ctx_create(&hw_device_ctx, AV_HWDEVICE_TYPE_VULKAN, NULL, NULL, 0);
-    if (ret < 0) {
-        fprintf(stderr, "Unable to initialize Vulkan device\n");
-        return ret;
-    }
-
    AVFilterGraph *graph = avfilter_graph_alloc();
    if (!graph) {
        fprintf(stderr, "Unable to create filter graph.\n");
@ -67,7 +46,6 @@ int init_libplacebo(
    ret = avfilter_graph_create_filter(buffersrc_ctx, buffersrc, "in", args, NULL, graph);
    if (ret < 0) {
        fprintf(stderr, "Cannot create buffer source\n");
-        av_buffer_unref(&hw_device_ctx);
        avfilter_graph_free(&graph);
        return ret;
    }
@ -78,7 +56,6 @@ int init_libplacebo(
    const AVFilter *libplacebo_filter = avfilter_get_by_name("libplacebo");
    if (!libplacebo_filter) {
        fprintf(stderr, "Filter 'libplacebo' not found\n");
-        av_buffer_unref(&hw_device_ctx);
        avfilter_graph_free(&graph);
        return AVERROR_FILTER_NOT_FOUND;
    }
@ -108,19 +85,19 @@ int init_libplacebo(
    );
    if (ret < 0) {
        fprintf(stderr, "Cannot create libplacebo filter\n");
-        av_buffer_unref(&hw_device_ctx);
        avfilter_graph_free(&graph);
        return ret;
    }

    // Set the hardware device context to Vulkan
-    libplacebo_ctx->hw_device_ctx = av_buffer_ref(hw_device_ctx);
+    if (hw_ctx != nullptr) {
+        libplacebo_ctx->hw_device_ctx = av_buffer_ref(hw_ctx);
+    }

    // Link buffersrc to libplacebo
    ret = avfilter_link(last_filter, 0, libplacebo_ctx, 0);
    if (ret < 0) {
        fprintf(stderr, "Error connecting buffersrc to libplacebo filter\n");
-        av_buffer_unref(&hw_device_ctx);
        avfilter_graph_free(&graph);
        return ret;
    }
@ -132,7 +109,6 @@ int init_libplacebo(
    ret = avfilter_graph_create_filter(buffersink_ctx, buffersink, "out", NULL, NULL, graph);
    if (ret < 0) {
        fprintf(stderr, "Cannot create buffer sink\n");
-        av_buffer_unref(&hw_device_ctx);
        avfilter_graph_free(&graph);
        return ret;
    }
@ -141,7 +117,6 @@ int init_libplacebo(
    ret = avfilter_link(last_filter, 0, *buffersink_ctx, 0);
    if (ret < 0) {
        fprintf(stderr, "Error connecting libplacebo filter to buffersink\n");
-        av_buffer_unref(&hw_device_ctx);
        avfilter_graph_free(&graph);
        return ret;
    }
@ -150,12 +125,10 @@ int init_libplacebo(
    ret = avfilter_graph_config(graph, NULL);
    if (ret < 0) {
        fprintf(stderr, "Error configuring the filter graph\n");
-        av_buffer_unref(&hw_device_ctx);
        avfilter_graph_free(&graph);
        return ret;
    }

    *filter_graph = graph;
-    *device_ctx = hw_device_ctx;
    return 0;
 }
--- a/src/libplacebo_filter.cpp
+++ b/src/libplacebo_filter.cpp
@ -1,21 +1,14 @@
-#include <cstdio>
+#include "libplacebo_filter.h"

-extern "C" {
-#include <libavcodec/avcodec.h>
-#include <libavfilter/buffersink.h>
-#include <libavfilter/buffersrc.h>
-#include <libavutil/buffer.h>
-}
+#include <cstdio>

 #include "fsutils.h"
 #include "libplacebo.h"
-#include "libplacebo_filter.h"

 LibplaceboFilter::LibplaceboFilter(int width, int height, const std::filesystem::path &shader_path)
    : filter_graph(nullptr),
      buffersrc_ctx(nullptr),
      buffersink_ctx(nullptr),
-      device_ctx(nullptr),
      output_width(width),
      output_height(height),
      shader_path(std::move(shader_path)) {}
@ -29,17 +22,13 @@ LibplaceboFilter::~LibplaceboFilter() {
        avfilter_free(buffersink_ctx);
        buffersink_ctx = nullptr;
    }
-    if (device_ctx) {
-        av_buffer_unref(&device_ctx);
-        device_ctx = nullptr;
-    }
    if (filter_graph) {
        avfilter_graph_free(&filter_graph);
        filter_graph = nullptr;
    }
 }

-int LibplaceboFilter::init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx) {
+int LibplaceboFilter::init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx, AVBufferRef *hw_ctx) {
    // Construct the shader path
    std::filesystem::path shader_full_path;
    if (filepath_is_readable(shader_path)) {
@ -51,14 +40,20 @@ int LibplaceboFilter::init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx) {
            find_resource_file(std::filesystem::path("models") / (shader_path.string() + ".glsl"));
    }

+    // Check if the shader file exists
+    if (!std::filesystem::exists(shader_full_path)) {
+        fprintf(stderr, "libplacebo shader file not found: %s\n", shader_full_path.c_str());
+        return -1;
+    }
+
    // Save the output time base
    output_time_base = enc_ctx->time_base;

    return init_libplacebo(
+        hw_ctx,
        &filter_graph,
        &buffersrc_ctx,
        &buffersink_ctx,
-        &device_ctx,
        dec_ctx,
        output_width,
        output_height,
@ -66,45 +61,39 @@ int LibplaceboFilter::init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx) {
    );
 }

-AVFrame *LibplaceboFilter::process_frame(AVFrame *input_frame) {
+int LibplaceboFilter::process_frame(AVFrame *input_frame, AVFrame **output_frame) {
    int ret;

    // Get the filtered frame
-    AVFrame *output_frame = av_frame_alloc();
-    if (output_frame == nullptr) {
+    *output_frame = av_frame_alloc();
+    if (*output_frame == nullptr) {
        fprintf(stderr, "Failed to allocate output frame\n");
-        return nullptr;
+        return -1;
    }

    // Feed the frame to the filter graph
    ret = av_buffersrc_add_frame(buffersrc_ctx, input_frame);
    if (ret < 0) {
        fprintf(stderr, "Error while feeding the filter graph\n");
-        return nullptr;
+        return ret;
    }

-    ret = av_buffersink_get_frame(buffersink_ctx, output_frame);
+    ret = av_buffersink_get_frame(buffersink_ctx, *output_frame);
    if (ret < 0) {
-        av_frame_free(&output_frame);
-        if (ret != AVERROR(EAGAIN) && ret != AVERROR_EOF) {
-            char errbuf[AV_ERROR_MAX_STRING_SIZE];
-            av_strerror(ret, errbuf, sizeof(errbuf));
-            fprintf(stderr, "Error getting frame from filter graph: %s\n", errbuf);
-            return nullptr;
-        }
-        return (AVFrame *)-1;
+        av_frame_free(output_frame);
+        return ret;
    }

    // Rescale PTS to encoder's time base
-    output_frame->pts =
-        av_rescale_q(output_frame->pts, buffersink_ctx->inputs[0]->time_base, output_time_base);
+    (*output_frame)->pts =
+        av_rescale_q((*output_frame)->pts, buffersink_ctx->inputs[0]->time_base, output_time_base);

    // Return the processed frame to the caller
-    return output_frame;
+    return 0;
 }

 int LibplaceboFilter::flush(std::vector<AVFrame *> &processed_frames) {
-    int ret = av_buffersrc_add_frame(buffersrc_ctx, nullptr);  // Signal EOF to the filter graph
+    int ret = av_buffersrc_add_frame(buffersrc_ctx, nullptr);
    if (ret < 0) {
        fprintf(stderr, "Error while flushing filter graph\n");
        return ret;
--- a/src/libvideo2x.cpp
+++ b/src/libvideo2x.cpp
@ -1,30 +1,41 @@
+#include "libvideo2x.h"
+
+#include <libavutil/mathematics.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <cstdint>
-
-// FFmpeg headers
-extern "C" {
-#include <libavcodec/avcodec.h>
-#include <libavformat/avformat.h>
-}
+#include <thread>

 #include "decoder.h"
 #include "encoder.h"
 #include "filter.h"
 #include "libplacebo_filter.h"
-#include "libvideo2x.h"
 #include "realesrgan_filter.h"

-// Function to process frames using the selected filter (same as before)
+/**
+ * @brief Process frames using the selected filter.
+ *
+ * @param[in,out] proc_ctx Struct containing the processing context
+ * @param[in] fmt_ctx Input format context
+ * @param[in] ofmt_ctx Output format context
+ * @param[in] dec_ctx Decoder context
+ * @param[in] enc_ctx Encoder context
+ * @param[in] filter Filter instance
+ * @param[in] video_stream_index Index of the video stream in the input format context
+ * @return int 0 on success, negative value on error
+ */
 int process_frames(
-    ProcessingStatus *status,
-    AVFormatContext *fmt_ctx,
+    EncoderConfig *encoder_config,
+    VideoProcessingContext *proc_ctx,
+    AVFormatContext *ifmt_ctx,
    AVFormatContext *ofmt_ctx,
    AVCodecContext *dec_ctx,
    AVCodecContext *enc_ctx,
    Filter *filter,
-    int video_stream_index
+    int video_stream_index,
+    int *stream_mapping,
+    bool benchmark = false
 ) {
    int ret;
    AVPacket packet;
@ -32,21 +43,21 @@ int process_frames(
    char errbuf[AV_ERROR_MAX_STRING_SIZE];

    // Get the total number of frames in the video
-    AVStream *video_stream = fmt_ctx->streams[video_stream_index];
-    status->total_frames = video_stream->nb_frames;
+    AVStream *video_stream = ifmt_ctx->streams[video_stream_index];
+    proc_ctx->total_frames = video_stream->nb_frames;

    // If nb_frames is not set, calculate total frames using duration and frame rate
-    if (status->total_frames == 0) {
+    if (proc_ctx->total_frames == 0) {
        int64_t duration = video_stream->duration;
        AVRational frame_rate = video_stream->avg_frame_rate;
        if (duration != AV_NOPTS_VALUE && frame_rate.num != 0 && frame_rate.den != 0) {
-            status->total_frames = duration * frame_rate.num / frame_rate.den;
+            proc_ctx->total_frames = duration * frame_rate.num / frame_rate.den;
        }
    }

    // Get start time
-    status->start_time = time(NULL);
-    if (status->start_time == -1) {
+    proc_ctx->start_time = time(NULL);
+    if (proc_ctx->start_time == -1) {
        perror("time");
    }

@ -57,8 +68,8 @@ int process_frames(
    }

    // Read frames from the input file
-    while (1) {
-        ret = av_read_frame(fmt_ctx, &packet);
+    while (!proc_ctx->abort) {
+        ret = av_read_frame(ifmt_ctx, &packet);
        if (ret < 0) {
            break;  // End of file or error
        }
@ -74,7 +85,13 @@ int process_frames(
            }

            // Receive and process frames from the decoder
-            while (1) {
+            while (!proc_ctx->abort) {
+                // Check if the processing is paused
+                if (proc_ctx->pause) {
+                    std::this_thread::sleep_for(std::chrono::milliseconds(100));
+                    continue;
+                }
+
                ret = avcodec_receive_frame(dec_ctx, frame);
                if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
                    break;
@ -85,43 +102,52 @@ int process_frames(
                }

                // Process the frame using the selected filter
-                AVFrame *processed_frame = filter->process_frame(frame);
-                if (processed_frame != nullptr && processed_frame != (AVFrame *)-1) {
+                AVFrame *processed_frame = nullptr;
+                ret = filter->process_frame(frame, &processed_frame);
+                if (ret == 0 && processed_frame != nullptr) {
                    // Encode and write the processed frame
-                    ret = encode_and_write_frame(processed_frame, enc_ctx, ofmt_ctx);
+                    if (!benchmark) {
+                        ret = encode_and_write_frame(
+                            processed_frame, enc_ctx, ofmt_ctx, video_stream_index
+                        );
                        if (ret < 0) {
                            av_strerror(ret, errbuf, sizeof(errbuf));
                            fprintf(stderr, "Error encoding/writing frame: %s\n", errbuf);
                            av_frame_free(&processed_frame);
                            goto end;
                        }
+                    }

                    av_frame_free(&processed_frame);
-                    status->processed_frames++;
-                } else if (processed_frame != (AVFrame *)-1) {
-                    fprintf(stderr, "Error processing frame\n");
+                    proc_ctx->processed_frames++;
+                } else if (ret != AVERROR(EAGAIN) && ret != AVERROR_EOF) {
+                    fprintf(stderr, "Filter returned an error\n");
                    goto end;
                }

                av_frame_unref(frame);
+                // TODO: Print the debug processing status
+            }
+        } else if (encoder_config->copy_streams && stream_mapping[packet.stream_index] >= 0) {
+            AVStream *in_stream = ifmt_ctx->streams[packet.stream_index];
+            int out_stream_index = stream_mapping[packet.stream_index];
+            AVStream *out_stream = ofmt_ctx->streams[out_stream_index];

-                // Print the processing status
-                printf(
-                    "\r[Video2X] Processing frame %ld/%ld (%.2f%%); time elapsed: %lds",
-                    status->processed_frames,
-                    status->total_frames,
-                    status->processed_frames * 100.0 / status->total_frames,
-                    time(NULL) - status->start_time
-                );
-                fflush(stdout);
+            // Rescale packet timestamps
+            av_packet_rescale_ts(&packet, in_stream->time_base, out_stream->time_base);
+            packet.stream_index = out_stream_index;
+
+            // If copy streams is enabled, copy the packet to the output
+            ret = av_interleaved_write_frame(ofmt_ctx, &packet);
+            if (ret < 0) {
+                fprintf(stderr, "Error muxing packet\n");
+                av_packet_unref(&packet);
+                return ret;
            }
        }
        av_packet_unref(&packet);
    }

-    // Print a newline after processing all frames
-    printf("\n");
-
    // Flush the filter
    ret = filter->flush(flushed_frames);
    if (ret < 0) {
@ -132,7 +158,7 @@ int process_frames(

    // Encode and write all flushed frames
    for (AVFrame *&flushed_frame : flushed_frames) {
-        ret = encode_and_write_frame(flushed_frame, enc_ctx, ofmt_ctx);
+        ret = encode_and_write_frame(flushed_frame, enc_ctx, ofmt_ctx, video_stream_index);
        if (ret < 0) {
            av_strerror(ret, errbuf, sizeof(errbuf));
            fprintf(stderr, "Error encoding/writing flushed frame: %s\n", errbuf);
@ -163,25 +189,18 @@ end:
    return ret;
 }

-// Cleanup helper function
+// Cleanup resources after processing the video
 void cleanup(
-    AVFormatContext *fmt_ctx,
+    AVFormatContext *ifmt_ctx,
    AVFormatContext *ofmt_ctx,
    AVCodecContext *dec_ctx,
    AVCodecContext *enc_ctx,
+    AVBufferRef *hw_ctx,
+    int *stream_mapping,
    Filter *filter
 ) {
-    if (filter) {
-        delete filter;
-    }
-    if (dec_ctx) {
-        avcodec_free_context(&dec_ctx);
-    }
-    if (enc_ctx) {
-        avcodec_free_context(&enc_ctx);
-    }
-    if (fmt_ctx) {
-        avformat_close_input(&fmt_ctx);
+    if (ifmt_ctx) {
+        avformat_close_input(&ifmt_ctx);
    }
    if (ofmt_ctx && !(ofmt_ctx->oformat->flags & AVFMT_NOFILE)) {
        avio_closep(&ofmt_ctx->pb);
@ -189,29 +208,68 @@ void cleanup(
    if (ofmt_ctx) {
        avformat_free_context(ofmt_ctx);
    }
+    if (dec_ctx) {
+        avcodec_free_context(&dec_ctx);
+    }
+    if (enc_ctx) {
+        avcodec_free_context(&enc_ctx);
+    }
+    if (hw_ctx) {
+        av_buffer_unref(&hw_ctx);
+    }
+    if (stream_mapping) {
+        av_free(stream_mapping);
+    }
+    if (filter) {
+        delete filter;
+    }
 }

-// Main function to process the video
+/**
+ * @brief Process a video file using the selected filter and encoder settings.
+ *
+ * @param[in] input_filename Path to the input video file
+ * @param[in] output_filename Path to the output video file
+ * @param[in] hw_type Hardware device type
+ * @param[in] filter_config Filter configurations
+ * @param[in] encoder_config Encoder configurations
+ * @param[in,out] proc_ctx Video processing context
+ * @return int 0 on success, non-zero value on error
+ */
 extern "C" int process_video(
    const char *input_filename,
    const char *output_filename,
+    bool benchmark,
+    AVHWDeviceType hw_type,
    const FilterConfig *filter_config,
    EncoderConfig *encoder_config,
-    ProcessingStatus *status
+    VideoProcessingContext *proc_ctx
 ) {
-    AVFormatContext *fmt_ctx = nullptr;
+    AVFormatContext *ifmt_ctx = nullptr;
    AVFormatContext *ofmt_ctx = nullptr;
    AVCodecContext *dec_ctx = nullptr;
    AVCodecContext *enc_ctx = nullptr;
+    AVBufferRef *hw_ctx = nullptr;
+    int *stream_mapping = nullptr;
    Filter *filter = nullptr;
    int video_stream_index = -1;
-    int ret = 0;  // Initialize ret with 0 to assume success
+    int ret = 0;
+
+    // Initialize hardware device context
+    if (hw_type != AV_HWDEVICE_TYPE_NONE) {
+        ret = av_hwdevice_ctx_create(&hw_ctx, hw_type, NULL, NULL, 0);
+        if (ret < 0) {
+            fprintf(stderr, "Unable to initialize hardware device context\n");
+            return ret;
+        }
+    }

    // Initialize input
-    if (init_decoder(input_filename, &fmt_ctx, &dec_ctx, &video_stream_index) < 0) {
+    ret = init_decoder(hw_type, hw_ctx, input_filename, &ifmt_ctx, &dec_ctx, &video_stream_index);
+    if (ret < 0) {
        fprintf(stderr, "Failed to initialize decoder\n");
-        cleanup(fmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, filter);
-        return 1;
+        cleanup(ifmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, hw_ctx, stream_mapping, filter);
+        return ret;
    }

    // Initialize output based on Libplacebo or RealESRGAN configuration
@ -230,17 +288,29 @@ extern "C" int process_video(
    // Initialize output encoder
    encoder_config->output_width = output_width;
    encoder_config->output_height = output_height;
-    if (init_encoder(output_filename, &ofmt_ctx, &enc_ctx, dec_ctx, encoder_config) < 0) {
+    ret = init_encoder(
+        hw_ctx,
+        output_filename,
+        ifmt_ctx,
+        &ofmt_ctx,
+        &enc_ctx,
+        dec_ctx,
+        encoder_config,
+        video_stream_index,
+        &stream_mapping
+    );
+    if (ret < 0) {
        fprintf(stderr, "Failed to initialize encoder\n");
-        cleanup(fmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, filter);
-        return 1;
+        cleanup(ifmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, hw_ctx, stream_mapping, filter);
+        return ret;
    }

    // Write the output file header
-    if (avformat_write_header(ofmt_ctx, NULL) < 0) {
+    ret = avformat_write_header(ofmt_ctx, NULL);
+    if (ret < 0) {
        fprintf(stderr, "Error occurred when opening output file\n");
-        cleanup(fmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, filter);
-        return 1;
+        cleanup(ifmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, hw_ctx, stream_mapping, filter);
+        return ret;
    }

    // Create and initialize the appropriate filter
@ -251,20 +321,20 @@ extern "C" int process_video(
            // Validate shader path
            if (!config.shader_path) {
                fprintf(stderr, "Shader path must be provided for the libplacebo filter\n");
-                cleanup(fmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, filter);
-                return 1;
+                cleanup(ifmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, hw_ctx, stream_mapping, filter);
+                return -1;
            }

            // Validate output dimensions
            if (config.output_width <= 0 || config.output_height <= 0) {
                fprintf(stderr, "Output dimensions must be provided for the libplacebo filter\n");
-                cleanup(fmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, filter);
-                return 1;
+                cleanup(ifmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, hw_ctx, stream_mapping, filter);
+                return -1;
            }

-            filter = new LibplaceboFilter(
+            filter = new LibplaceboFilter{
                config.output_width, config.output_height, std::filesystem::path(config.shader_path)
-            );
+            };
            break;
        }
        case FILTER_REALESRGAN: {
@ -273,55 +343,66 @@ extern "C" int process_video(
            // Validate model name
            if (!config.model) {
                fprintf(stderr, "Model name must be provided for the RealESRGAN filter\n");
-                cleanup(fmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, filter);
-                return 1;
+                cleanup(ifmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, hw_ctx, stream_mapping, filter);
+                return -1;
            }

            // Validate scaling factor
            if (config.scaling_factor <= 0) {
                fprintf(stderr, "Scaling factor must be provided for the RealESRGAN filter\n");
-                cleanup(fmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, filter);
-                return 1;
+                cleanup(ifmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, hw_ctx, stream_mapping, filter);
+                return -1;
            }

-            filter = new RealesrganFilter(
+            filter = new RealesrganFilter{
                config.gpuid, config.tta_mode, config.scaling_factor, config.model
-            );
+            };
            break;
        }
        default:
            fprintf(stderr, "Unknown filter type\n");
-            cleanup(fmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, filter);
-            return 1;
+            cleanup(ifmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, hw_ctx, stream_mapping, filter);
+            return -1;
    }

    // Initialize the filter
-    if (filter->init(dec_ctx, enc_ctx) < 0) {
+    ret = filter->init(dec_ctx, enc_ctx, hw_ctx);
+    if (ret < 0) {
        fprintf(stderr, "Failed to initialize filter\n");
-        cleanup(fmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, filter);
-        return 1;
+        cleanup(ifmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, hw_ctx, stream_mapping, filter);
+        return ret;
    }

    // Process frames
-    if ((ret =
-             process_frames(status, fmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, filter, video_stream_index)
-        ) < 0) {
+    ret = process_frames(
+        encoder_config,
+        proc_ctx,
+        ifmt_ctx,
+        ofmt_ctx,
+        dec_ctx,
+        enc_ctx,
+        filter,
+        video_stream_index,
+        stream_mapping,
+        benchmark
+    );
+    if (ret < 0) {
        fprintf(stderr, "Error processing frames\n");
-        cleanup(fmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, filter);
-        return 1;
+        cleanup(ifmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, hw_ctx, stream_mapping, filter);
+        return ret;
    }

    // Write the output file trailer
    av_write_trailer(ofmt_ctx);

    // Cleanup before returning
-    cleanup(fmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, filter);
+    cleanup(ifmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, hw_ctx, stream_mapping, filter);

    if (ret < 0 && ret != AVERROR_EOF) {
        char errbuf[AV_ERROR_MAX_STRING_SIZE];
        av_strerror(ret, errbuf, sizeof(errbuf));
        fprintf(stderr, "Error occurred: %s\n", errbuf);
-        return 1;
+        return ret;
    }
    return 0;
 }
--- a/src/realesrgan_filter.cpp
+++ b/src/realesrgan_filter.cpp
@ -1,18 +1,11 @@
+#include "realesrgan_filter.h"
+
 #include <cstdint>
 #include <cstdio>
-#include <filesystem>
 #include <string>

-extern "C" {
-#include <libavcodec/avcodec.h>
-#include <libavutil/avutil.h>
-#include <libavutil/imgutils.h>
-}
-
 #include "conversions.h"
 #include "fsutils.h"
-#include "realesrgan.h"
-#include "realesrgan_filter.h"

 RealesrganFilter::RealesrganFilter(
    int gpuid,
@ -37,7 +30,7 @@ RealesrganFilter::~RealesrganFilter() {
    }
 }

-int RealesrganFilter::init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx) {
+int RealesrganFilter::init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx, AVBufferRef *hw_ctx) {
    // Construct the model paths using std::filesystem
    std::filesystem::path model_param_path;
    std::filesystem::path model_bin_path;
@ -62,6 +55,18 @@ int RealesrganFilter::init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx) {
    std::filesystem::path model_param_full_path = find_resource_file(model_param_path);
    std::filesystem::path model_bin_full_path = find_resource_file(model_bin_path);

+    // Check if the model files exist
+    if (!std::filesystem::exists(model_param_full_path)) {
+        fprintf(
+            stderr, "RealESRGAN model param file not found: %s\n", model_param_full_path.c_str()
+        );
+        return -1;
+    }
+    if (!std::filesystem::exists(model_bin_full_path)) {
+        fprintf(stderr, "RealESRGAN model bin file not found: %s\n", model_bin_full_path.c_str());
+        return -1;
+    }
+
    // Create a new RealESRGAN instance
    realesrgan = new RealESRGAN(gpuid, tta_mode);

@ -95,12 +100,14 @@ int RealesrganFilter::init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx) {
    return 0;
 }

-AVFrame *RealesrganFilter::process_frame(AVFrame *input_frame) {
+int RealesrganFilter::process_frame(AVFrame *input_frame, AVFrame **output_frame) {
+    int ret;
+
    // Convert the input frame to RGB24
    ncnn::Mat input_mat = avframe_to_ncnn_mat(input_frame);
    if (input_mat.empty()) {
        fprintf(stderr, "Failed to convert AVFrame to ncnn::Mat\n");
-        return nullptr;
+        return -1;
    }

    // Allocate space for ouptut ncnn::Mat
@ -108,19 +115,20 @@ AVFrame *RealesrganFilter::process_frame(AVFrame *input_frame) {
    int output_height = input_mat.h * realesrgan->scale;
    ncnn::Mat output_mat = ncnn::Mat(output_width, output_height, (size_t)3, 3);

-    if (realesrgan->process(input_mat, output_mat) != 0) {
+    ret = realesrgan->process(input_mat, output_mat);
+    if (ret != 0) {
        fprintf(stderr, "RealESRGAN processing failed\n");
-        return nullptr;
+        return ret;
    }

    // Convert ncnn::Mat to AVFrame
-    AVFrame *output_frame = ncnn_mat_to_avframe(output_mat, output_pix_fmt);
+    *output_frame = ncnn_mat_to_avframe(output_mat, output_pix_fmt);

    // Rescale PTS to encoder's time base
-    output_frame->pts = av_rescale_q(input_frame->pts, input_time_base, output_time_base);
+    (*output_frame)->pts = av_rescale_q(input_frame->pts, input_time_base, output_time_base);

    // Return the processed frame to the caller
-    return output_frame;
+    return ret;
 }

 int RealesrganFilter::flush(std::vector<AVFrame *> &processed_frames) {
--- a/src/video2x.c
+++ b/src/video2x.c
@ -1,7 +1,19 @@
+#include <stdbool.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <threads.h>
+#include <time.h>

+#ifdef _WIN32
+#include <conio.h>
+#else
+#include <fcntl.h>
+#include <termios.h>
+#include <unistd.h>
+#endif
+
+#include <libavutil/hwcontext.h>
 #include <libavutil/pixdesc.h>
 #include <libavutil/pixfmt.h>

@ -11,14 +23,35 @@

 const char *VIDEO2X_VERSION = "6.0.0";

+// Set UNIX terminal input to non-blocking mode
+#ifndef _WIN32
+void set_nonblocking_input(bool enable) {
+    static struct termios oldt, newt;
+    if (enable) {
+        tcgetattr(STDIN_FILENO, &oldt);
+        newt = oldt;
+        newt.c_lflag &= ~(ICANON | ECHO);
+        tcsetattr(STDIN_FILENO, TCSANOW, &newt);
+        fcntl(STDIN_FILENO, F_SETFL, O_NONBLOCK);
+    } else {
+        tcsetattr(STDIN_FILENO, TCSANOW, &oldt);
+        fcntl(STDIN_FILENO, F_SETFL, 0);
+    }
+}
+#endif
+
 // Define command line options
 static struct option long_options[] = {
+    {"version", no_argument, NULL, 'v'},
+    {"help", no_argument, NULL, 0},
+
    // General options
    {"input", required_argument, NULL, 'i'},
    {"output", required_argument, NULL, 'o'},
    {"filter", required_argument, NULL, 'f'},
-    {"version", no_argument, NULL, 'v'},
-    {"help", no_argument, NULL, 0},
+    {"hwaccel", required_argument, NULL, 'a'},
+    {"nocopystreams", no_argument, NULL, 0},
+    {"benchmark", no_argument, NULL, 0},

    // Encoder options
    {"codec", required_argument, NULL, 'c'},
@ -27,7 +60,7 @@ static struct option long_options[] = {
    {"bitrate", required_argument, NULL, 'b'},
    {"crf", required_argument, NULL, 'q'},

-    // Libplacebo options
+    // libplacebo options
    {"shader", required_argument, NULL, 's'},
    {"width", required_argument, NULL, 'w'},
    {"height", required_argument, NULL, 'h'},
@ -45,6 +78,9 @@ struct arguments {
    const char *input_filename;
    const char *output_filename;
    const char *filter_type;
+    const char *hwaccel;
+    bool nocopystreams;
+    bool benchmark;

    // Encoder options
    const char *codec;
@ -64,6 +100,14 @@ struct arguments {
    int scaling_factor;
 };

+struct ProcessVideoThreadArguments {
+    struct arguments *arguments;
+    enum AVHWDeviceType hw_device_type;
+    struct FilterConfig *filter_config;
+    struct EncoderConfig *encoder_config;
+    struct VideoProcessingContext *proc_ctx;
+};
+
 const char *valid_models[] = {
    "realesrgan-plus",
    "realesrgan-plus-anime",
@ -84,19 +128,23 @@ int is_valid_realesrgan_model(const char *model) {

 void print_help() {
    printf("Usage: video2x [OPTIONS]\n");
-    printf("\nGeneral Options:\n");
+    printf("\nOptions:\n");
+    printf("  -v, --version		Print program version\n");
+    printf("  -?, --help		Display this help page\n");
+    printf("\nGeneral Processing Options:\n");
    printf("  -i, --input		Input video file path\n");
    printf("  -o, --output		Output video file path\n");
    printf("  -f, --filter		Filter to use: 'libplacebo' or 'realesrgan'\n");
-    printf("  -v, --version		Print program version\n");
-    printf("  --help		Display this help page\n");
+    printf("  -a, --hwaccel		Hardware acceleration method (default: none)\n");
+    printf("  --nocopystreams	Do not copy audio and subtitle streams\n");
+    printf("  --benchmark		Discard processed frames and calculate average FPS\n");

    printf("\nEncoder Options (Optional):\n");
    printf("  -c, --codec		Output codec (default: libx264)\n");
-    printf("  -p, --preset		Encoder preset (default: veryslow)\n");
-    printf("  -x, --pixfmt		Output pixel format (default: yuv420p)\n");
-    printf("  -b, --bitrate		Bitrate in bits per second (default: 2000000)\n");
-    printf("  -q, --crf		Constant Rate Factor (default: 17.0)\n");
+    printf("  -p, --preset		Encoder preset (default: slow)\n");
+    printf("  -x, --pixfmt		Output pixel format (default: auto)\n");
+    printf("  -b, --bitrate		Bitrate in bits per second (default: 0 (VBR))\n");
+    printf("  -q, --crf		Constant Rate Factor (default: 20.0)\n");

    printf("\nlibplacebo Options:\n");
    printf("  -s, --shader		Name or path to custom GLSL shader file\n");
@ -107,6 +155,10 @@ void print_help() {
    printf("  -g, --gpuid		Vulkan GPU ID (default: 0)\n");
    printf("  -m, --model		Name of the model to use\n");
    printf("  -r, --scale		Scaling factor (2, 3, or 4)\n");
+
+    printf("\nExamples Usage:\n");
+    printf("  video2x -i in.mp4 -o out.mp4 -f libplacebo -s anime4k-mode-a -w 3840 -h 2160\n");
+    printf("  video2x -i in.mp4 -o out.mp4 -f realesrgan -m realesr-animevideov3 -r 4\n");
 }

 void parse_arguments(int argc, char **argv, struct arguments *arguments) {
@ -117,13 +169,16 @@ void parse_arguments(int argc, char **argv, struct arguments *arguments) {
    arguments->input_filename = NULL;
    arguments->output_filename = NULL;
    arguments->filter_type = NULL;
+    arguments->hwaccel = "none";
+    arguments->nocopystreams = false;
+    arguments->benchmark = false;

    // Encoder options
    arguments->codec = "libx264";
-    arguments->preset = "veryslow";
-    arguments->pix_fmt = "yuv420p";
-    arguments->bitrate = 2 * 1000 * 1000;
-    arguments->crf = 17.0;
+    arguments->preset = "slow";
+    arguments->pix_fmt = NULL;
+    arguments->bitrate = 0;
+    arguments->crf = 20.0;

    // libplacebo options
    arguments->shader_path = NULL;
@ -135,8 +190,9 @@ void parse_arguments(int argc, char **argv, struct arguments *arguments) {
    arguments->model = NULL;
    arguments->scaling_factor = 0;

-    while ((c = getopt_long(argc, argv, "i:o:f:c:x:p:b:q:s:w:h:r:m:v", long_options, &option_index)
-           ) != -1) {
+    while ((c = getopt_long(
+                argc, argv, "i:o:f:a:c:x:p:b:q:s:w:h:r:m:v", long_options, &option_index
+            )) != -1) {
        switch (c) {
            case 'i':
                arguments->input_filename = optarg;
@ -147,6 +203,9 @@ void parse_arguments(int argc, char **argv, struct arguments *arguments) {
            case 'f':
                arguments->filter_type = optarg;
                break;
+            case 'a':
+                arguments->hwaccel = optarg;
+                break;
            case 'c':
                arguments->codec = optarg;
                break;
@ -195,7 +254,8 @@ void parse_arguments(int argc, char **argv, struct arguments *arguments) {
                if (!is_valid_realesrgan_model(arguments->model)) {
                    fprintf(
                        stderr,
-                        "Error: Invalid model specified. Must be 'realesrgan-plus', 'realesrgan-plus-anime', or 'realesr-animevideov3'.\n"
+                        "Error: Invalid model specified. Must be 'realesrgan-plus', "
+                        "'realesrgan-plus-anime', or 'realesr-animevideov3'.\n"
                    );
                    exit(1);
                }
@ -209,12 +269,16 @@ void parse_arguments(int argc, char **argv, struct arguments *arguments) {
                }
                break;
            case 'v':
-                printf("video2x %s\n", VIDEO2X_VERSION);
+                printf("Video2X v%s\n", VIDEO2X_VERSION);
                exit(0);
-            case 0:  // Long-only options without short equivalents (e.g., help)
+            case 0:  // Long-only options without short equivalents
                if (strcmp(long_options[option_index].name, "help") == 0) {
                    print_help();
                    exit(0);
+                } else if (strcmp(long_options[option_index].name, "nocopystreams") == 0) {
+                    arguments->nocopystreams = true;
+                } else if (strcmp(long_options[option_index].name, "benchmark") == 0) {
+                    arguments->benchmark = true;
                }
                break;
            default:
@ -224,8 +288,13 @@ void parse_arguments(int argc, char **argv, struct arguments *arguments) {
    }

    // Check for required arguments
-    if (!arguments->input_filename || !arguments->output_filename) {
-        fprintf(stderr, "Error: Input and output files are required.\n");
+    if (!arguments->input_filename) {
+        fprintf(stderr, "Error: Input file path is required.\n");
+        exit(1);
+    }
+
+    if (!arguments->output_filename && !arguments->benchmark) {
+        fprintf(stderr, "Error: Output file path is required.\n");
        exit(1);
    }

@ -239,7 +308,8 @@ void parse_arguments(int argc, char **argv, struct arguments *arguments) {
            arguments->output_height == 0) {
            fprintf(
                stderr,
-                "Error: For libplacebo, shader name/path (-s), width (-w), and height (-e) are required.\n"
+                "Error: For libplacebo, shader name/path (-s), width (-w), "
+                "and height (-e) are required.\n"
            );
            exit(1);
        }
@ -253,7 +323,40 @@ void parse_arguments(int argc, char **argv, struct arguments *arguments) {
    }
 }

+// Wrapper function for video processing thread
+int process_video_thread(void *arg) {
+    struct ProcessVideoThreadArguments *thread_args = (struct ProcessVideoThreadArguments *)arg;
+
+    // Extract individual arguments
+    struct arguments *arguments = thread_args->arguments;
+    enum AVHWDeviceType hw_device_type = thread_args->hw_device_type;
+    struct FilterConfig *filter_config = thread_args->filter_config;
+    struct EncoderConfig *encoder_config = thread_args->encoder_config;
+    struct VideoProcessingContext *proc_ctx = thread_args->proc_ctx;
+
+    // Call the process_video function
+    int result = process_video(
+        arguments->input_filename,
+        arguments->output_filename,
+        arguments->benchmark,
+        hw_device_type,
+        filter_config,
+        encoder_config,
+        proc_ctx
+    );
+
+    proc_ctx->completed = true;
+    return result;
+}
+
 int main(int argc, char **argv) {
+    // Print help if no arguments are provided
+    if (argc < 2) {
+        print_help();
+        return 1;
+    }
+
+    // Parse command line arguments
    struct arguments arguments;
    parse_arguments(argc, argv, &arguments);

@ -283,16 +386,20 @@ int main(int argc, char **argv) {
    }

    // Parse pixel format to AVPixelFormat
-    enum AVPixelFormat pix_fmt = av_get_pix_fmt(arguments.pix_fmt);
+    enum AVPixelFormat pix_fmt = AV_PIX_FMT_NONE;
+    if (arguments.pix_fmt) {
+        pix_fmt = av_get_pix_fmt(arguments.pix_fmt);
        if (pix_fmt == AV_PIX_FMT_NONE) {
            fprintf(stderr, "Error: Invalid pixel format '%s'.\n", arguments.pix_fmt);
            return 1;
        }
+    }

    // Setup encoder configuration
    struct EncoderConfig encoder_config = {
        .output_width = 0,   // To be filled by libvideo2x
        .output_height = 0,  // To be filled by libvideo2x
+        .copy_streams = !arguments.nocopystreams,
        .codec = codec->id,
        .pix_fmt = pix_fmt,
        .preset = arguments.preset,
@ -300,26 +407,134 @@ int main(int argc, char **argv) {
        .crf = arguments.crf,
    };

-    // Setup struct to store processing status
-    struct ProcessingStatus status = {0};
-
-    // Process the video
-    if (process_video(
-            arguments.input_filename,
-            arguments.output_filename,
-            &filter_config,
-            &encoder_config,
-            &status
-        )) {
-        fprintf(stderr, "Video processing failed.\n");
+    // Parse hardware acceleration method
+    enum AVHWDeviceType hw_device_type = AV_HWDEVICE_TYPE_NONE;
+    if (strcmp(arguments.hwaccel, "none") != 0) {
+        hw_device_type = av_hwdevice_find_type_by_name(arguments.hwaccel);
+        if (hw_device_type == AV_HWDEVICE_TYPE_NONE) {
+            fprintf(stderr, "Error: Invalid hardware device type '%s'.\n", arguments.hwaccel);
            return 1;
        }
+    }
+
+    // Setup struct to store processing context
+    struct VideoProcessingContext proc_ctx = {
+        .processed_frames = 0,
+        .total_frames = 0,
+        .start_time = time(NULL),
+        .pause = false,
+        .abort = false,
+        .completed = false
+    };
+
+    // Create a ThreadArguments struct to hold all the arguments for the thread
+    struct ProcessVideoThreadArguments thread_args = {
+        .arguments = &arguments,
+        .hw_device_type = hw_device_type,
+        .filter_config = &filter_config,
+        .encoder_config = &encoder_config,
+        .proc_ctx = &proc_ctx
+    };
+
+// Enable non-blocking input
+#ifndef _WIN32
+    set_nonblocking_input(true);
+#endif
+
+    // Create a thread for video processing
+    thrd_t processing_thread;
+    if (thrd_create(&processing_thread, process_video_thread, &thread_args) != thrd_success) {
+        fprintf(stderr, "Failed to create processing thread\n");
+        return 1;
+    }
+    printf("[Video2X] Video processing started.\n");
+    printf("[Video2X] Press SPACE to pause/resume, 'q' to abort.\n");
+
+    // Main thread loop to display progress and handle input
+    while (!proc_ctx.completed) {
+        // Check for key presses
+        int ch = -1;
+
+        // Check for key press
+#ifdef _WIN32
+        if (_kbhit()) {
+            ch = _getch();
+        }
+#else
+        ch = getchar();
+#endif
+
+        if (ch == ' ' || ch == '\n') {
+            // Toggle pause state
+            proc_ctx.pause = !proc_ctx.pause;
+            if (proc_ctx.pause) {
+                printf("\n[Video2X] Processing paused. Press SPACE to resume, 'q' to abort.");
+            } else {
+                printf("\n[Video2X] Resuming processing...");
+            }
+            fflush(stdout);
+        } else if (ch == 'q' || ch == 'Q') {
+            // Abort processing
+            printf("\n[Video2X] Aborting processing...");
+            fflush(stdout);
+            proc_ctx.abort = true;
+            break;
+        }
+
+        // Display progress
+        if (!proc_ctx.pause && proc_ctx.total_frames > 0) {
+            printf(
+                "\r[Video2X] Processing frame %ld/%ld (%.2f%%); time elapsed: %lds",
+                proc_ctx.processed_frames,
+                proc_ctx.total_frames,
+                proc_ctx.total_frames > 0
+                    ? proc_ctx.processed_frames * 100.0 / proc_ctx.total_frames
+                    : 0.0,
+                time(NULL) - proc_ctx.start_time
+            );
+            fflush(stdout);
+        }
+
+        // Sleep for a short duration
+        thrd_sleep(&(struct timespec){.tv_sec = 0, .tv_nsec = 100000000}, NULL);  // Sleep for 100ms
+    }
+    puts("");  // Print newline after progress bar is complete
+
+// Restore terminal to blocking mode
+#ifndef _WIN32
+    set_nonblocking_input(false);
+#endif
+
+    // Join the processing thread to ensure it completes before exiting
+    int process_result;
+    thrd_join(processing_thread, &process_result);
+
+    if (proc_ctx.abort) {
+        fprintf(stderr, "Video processing aborted\n");
+        return 2;
+    }
+
+    if (process_result != 0) {
+        fprintf(stderr, "Video processing failed\n");
+        return process_result;
+    }
+
+    // Calculate statistics
+    time_t time_elapsed = time(NULL) - proc_ctx.start_time;
+    float average_speed_fps =
+        (float)proc_ctx.processed_frames / (time_elapsed > 0 ? time_elapsed : 1);

    // Print processing summary
-    printf("====== Video2X Processing summary ======\n");
+    printf("====== Video2X %s summary ======\n", arguments.benchmark ? "Benchmark" : "Processing");
    printf("Video file processed: %s\n", arguments.input_filename);
-    printf("Total frames processed: %ld\n", status.processed_frames);
-    printf("Total time taken: %lds\n", time(NULL) - status.start_time);
+    printf("Total frames processed: %ld\n", proc_ctx.processed_frames);
+    printf("Total time taken: %lds\n", time_elapsed);
+    printf("Average processing speed: %.2f FPS\n", average_speed_fps);
+
+    // Print additional information if not in benchmark mode
+    if (!arguments.benchmark) {
        printf("Output written to: %s\n", arguments.output_filename);
+    }
+
    return 0;
 }