refactor(*): refactored the encoder and decoder into classes

Signed-off-by: k4yt3x <i@k4yt3x.com>
This commit is contained in:
k4yt3x 2024-11-17 00:00:00 +00:00
parent b520d51c6c
commit 169509b7d4
No known key found for this signature in database
6 changed files with 318 additions and 347 deletions

View File

@ -1,6 +1,10 @@
cmake_minimum_required(VERSION 3.10)
project(video2x VERSION 6.1.1 LANGUAGES CXX)
if(POLICY CMP0167)
cmake_policy(SET CMP0167 NEW)
endif()
# Set the C++ standard
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
@ -13,9 +17,9 @@ endif()
# Set the default optimization flags for Release builds
if(CMAKE_BUILD_TYPE STREQUAL "Release")
if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Ox /GL /LTCG /MD /DNDEBUG")
elseif (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3 -march=native -flto")
set(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} -s")
set(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} -s")
@ -23,9 +27,9 @@ if(CMAKE_BUILD_TYPE STREQUAL "Release")
endif()
# Set global compile options for all targets
if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
add_compile_options(/W4 /permissive-)
elseif (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
add_compile_options(-Wall -Wextra -Wpedantic -Wconversion -Wshadow)
endif()
@ -70,7 +74,7 @@ if(WIN32)
set(SPIRV_BUILD_PATH
${CMAKE_BINARY_DIR}/realesrgan-prefix/src/realesrgan-build/ncnn/glslang/SPIRV
)
if (CMAKE_BUILD_TYPE STREQUAL "Release")
if(CMAKE_BUILD_TYPE STREQUAL "Release")
set(SPIRV_LIB ${SPIRV_BUILD_PATH}/Release/SPIRV.lib)
else()
set(SPIRV_LIB ${SPIRV_BUILD_PATH}/Debug/SPIRVd.lib)
@ -203,7 +207,7 @@ else()
endif()
# spdlog
if (USE_SYSTEM_SPDLOG)
if(USE_SYSTEM_SPDLOG)
find_package(spdlog REQUIRED)
list(APPEND ALL_INCLUDE_DIRS ${spdlog_INCLUDE_DIRS})
set(SPDLOG_LIB spdlog::spdlog)
@ -214,7 +218,7 @@ endif()
list(APPEND ALL_LIBRARIES ${SPDLOG_LIB})
# Boost
if (USE_SYSTEM_BOOST)
if(USE_SYSTEM_BOOST)
find_package(Boost REQUIRED COMPONENTS program_options)
list(APPEND ALL_INCLUDE_DIRS ${Boost_INCLUDE_DIRS})
else()
@ -228,7 +232,7 @@ else()
endif()
set(BOOST_LIB Boost::program_options)
if (BUILD_VIDEO2X_CLI)
if(BUILD_VIDEO2X_CLI)
find_package(Vulkan REQUIRED)
set(VULKAN_LIB Vulkan::Vulkan)
endif()
@ -295,7 +299,7 @@ endif()
target_link_libraries(libvideo2x PRIVATE ${ALL_LIBRARIES})
if(NOT WIN32)
if (USE_SYSTEM_NCNN)
if(USE_SYSTEM_NCNN)
target_link_libraries(libvideo2x PUBLIC ncnn)
else()
target_link_libraries(libvideo2x PRIVATE ncnn)
@ -303,7 +307,7 @@ if(NOT WIN32)
endif()
# Create the executable 'video2x'
if (BUILD_VIDEO2X_CLI)
if(BUILD_VIDEO2X_CLI)
file(GLOB VIDEO2X_SOURCES tools/video2x/src/*.cpp)
add_executable(video2x ${VIDEO2X_SOURCES})
set_target_properties(video2x PROPERTIES OUTPUT_NAME video2x)

View File

@ -8,13 +8,25 @@ extern "C" {
#include <libavformat/avformat.h>
}
int init_decoder(
AVHWDeviceType hw_type,
AVBufferRef *hw_ctx,
std::filesystem::path in_fpath,
AVFormatContext **fmt_ctx,
AVCodecContext **dec_ctx,
int *in_vstream_idx
);
class Decoder {
public:
Decoder();
~Decoder();
int init(AVHWDeviceType hw_type, AVBufferRef *hw_ctx, const std::filesystem::path &in_fpath);
AVFormatContext *get_format_context() const;
AVCodecContext *get_codec_context() const;
int get_video_stream_index() const;
private:
static enum AVPixelFormat hw_pix_fmt_;
static enum AVPixelFormat
get_hw_format(AVCodecContext *ctx, const enum AVPixelFormat *pix_fmts);
AVFormatContext *fmt_ctx_;
AVCodecContext *dec_ctx_;
int in_vstream_idx_;
};
#endif // DECODER_H

View File

@ -1,37 +1,43 @@
#ifndef ENCODER_H
#define ENCODER_H
#include <cstdint>
#include <filesystem>
extern "C" {
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavutil/opt.h>
#include <libavutil/pixdesc.h>
}
#include "libvideo2x.h"
#include "libvideo2x/libvideo2x.h"
int init_encoder(
AVBufferRef *hw_ctx,
std::filesystem::path out_fpath,
AVFormatContext *ifmt_ctx,
AVFormatContext **ofmt_ctx,
AVCodecContext **enc_ctx,
AVCodecContext *dec_ctx,
EncoderConfig *encoder_config,
int in_vstream_idx,
int *out_vstream_idx,
int **stream_map
);
class Encoder {
public:
Encoder();
~Encoder();
int write_frame(
AVFrame *frame,
AVCodecContext *enc_ctx,
AVFormatContext *ofmt_ctx,
int out_vstream_idx,
int64_t frame_idx
);
int init(
AVBufferRef *hw_ctx,
const std::filesystem::path &out_fpath,
AVFormatContext *ifmt_ctx,
AVCodecContext *dec_ctx,
EncoderConfig *encoder_config,
int in_vstream_idx
);
int flush_encoder(AVCodecContext *enc_ctx, AVFormatContext *ofmt_ctx, int out_vstream_idx);
int write_frame(AVFrame *frame, int64_t frame_idx);
int flush();
AVCodecContext *get_encoder_context() const;
AVFormatContext *get_format_context() const;
int *get_stream_map() const;
int get_output_video_stream_index() const;
private:
AVFormatContext *ofmt_ctx_;
AVCodecContext *enc_ctx_;
int out_vstream_idx_;
int *stream_map_;
};
#endif // ENCODER_H

View File

@ -1,17 +1,25 @@
#include "decoder.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <spdlog/spdlog.h>
static enum AVPixelFormat hw_pix_fmt = AV_PIX_FMT_NONE;
enum AVPixelFormat Decoder::hw_pix_fmt_ = AV_PIX_FMT_NONE;
// Callback function to choose the hardware-accelerated pixel format
static enum AVPixelFormat get_hw_format(AVCodecContext *_, const enum AVPixelFormat *pix_fmts) {
Decoder::Decoder() : fmt_ctx_(nullptr), dec_ctx_(nullptr), in_vstream_idx_(-1) {}
Decoder::~Decoder() {
if (dec_ctx_) {
avcodec_free_context(&dec_ctx_);
dec_ctx_ = nullptr;
}
if (fmt_ctx_) {
avformat_close_input(&fmt_ctx_);
fmt_ctx_ = nullptr;
}
}
enum AVPixelFormat Decoder::get_hw_format(AVCodecContext *_, const enum AVPixelFormat *pix_fmts) {
for (const enum AVPixelFormat *p = pix_fmts; *p != AV_PIX_FMT_NONE; p++) {
if (*p == hw_pix_fmt) {
if (*p == hw_pix_fmt_) {
return *p;
}
}
@ -19,39 +27,36 @@ static enum AVPixelFormat get_hw_format(AVCodecContext *_, const enum AVPixelFor
return AV_PIX_FMT_NONE;
}
int init_decoder(
int Decoder::init(
AVHWDeviceType hw_type,
AVBufferRef *hw_ctx,
std::filesystem::path in_fpath,
AVFormatContext **fmt_ctx,
AVCodecContext **dec_ctx,
int *in_vstream_idx
const std::filesystem::path &in_fpath
) {
AVFormatContext *ifmt_ctx = NULL;
AVCodecContext *codec_ctx = NULL;
int ret;
if ((ret = avformat_open_input(&ifmt_ctx, in_fpath.u8string().c_str(), NULL, NULL)) < 0) {
spdlog::error("Could not open input file '{}'", in_fpath.u8string().c_str());
// Open the input file
if ((ret = avformat_open_input(&fmt_ctx_, in_fpath.u8string().c_str(), nullptr, nullptr)) < 0) {
spdlog::error("Could not open input file '{}'", in_fpath.u8string());
return ret;
}
if ((ret = avformat_find_stream_info(ifmt_ctx, NULL)) < 0) {
// Retrieve stream information
if ((ret = avformat_find_stream_info(fmt_ctx_, nullptr)) < 0) {
spdlog::error("Failed to retrieve input stream information");
return ret;
}
// Find the first video stream
ret = av_find_best_stream(ifmt_ctx, AVMEDIA_TYPE_VIDEO, -1, -1, NULL, 0);
ret = av_find_best_stream(fmt_ctx_, AVMEDIA_TYPE_VIDEO, -1, -1, nullptr, 0);
if (ret < 0) {
spdlog::error("Could not find video stream in the input file");
return ret;
}
int stream_index = ret;
AVStream *video_stream = ifmt_ctx->streams[stream_index];
AVStream *video_stream = fmt_ctx_->streams[stream_index];
// Set up the decoder
// Find the decoder for the video stream
const AVCodec *decoder = avcodec_find_decoder(video_stream->codecpar->codec_id);
if (!decoder) {
spdlog::error(
@ -61,16 +66,28 @@ int init_decoder(
return AVERROR_DECODER_NOT_FOUND;
}
codec_ctx = avcodec_alloc_context3(decoder);
if (!codec_ctx) {
// Allocate the decoder context
dec_ctx_ = avcodec_alloc_context3(decoder);
if (!dec_ctx_) {
spdlog::error("Failed to allocate the decoder context");
return AVERROR(ENOMEM);
}
// Copy codec parameters from input stream to decoder context
if ((ret = avcodec_parameters_to_context(dec_ctx_, video_stream->codecpar)) < 0) {
spdlog::error("Failed to copy decoder parameters to input decoder context");
return ret;
}
// Set the time base and frame rate
dec_ctx_->time_base = video_stream->time_base;
dec_ctx_->pkt_timebase = video_stream->time_base;
dec_ctx_->framerate = av_guess_frame_rate(fmt_ctx_, video_stream, nullptr);
// Set hardware device context
if (hw_ctx != nullptr) {
codec_ctx->hw_device_ctx = av_buffer_ref(hw_ctx);
codec_ctx->get_format = get_hw_format;
dec_ctx_->hw_device_ctx = av_buffer_ref(hw_ctx);
dec_ctx_->get_format = get_hw_format;
// Automatically determine the hardware pixel format
for (int i = 0;; i++) {
@ -81,36 +98,35 @@ int init_decoder(
decoder->name,
av_hwdevice_get_type_name(hw_type)
);
avcodec_free_context(&codec_ctx);
avformat_close_input(&ifmt_ctx);
return AVERROR(ENOSYS);
}
if (config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX &&
config->device_type == hw_type) {
hw_pix_fmt = config->pix_fmt;
hw_pix_fmt_ = config->pix_fmt;
break;
}
}
}
if ((ret = avcodec_parameters_to_context(codec_ctx, video_stream->codecpar)) < 0) {
spdlog::error("Failed to copy decoder parameters to input decoder context");
return ret;
}
// Set decoder time base and frame rate
codec_ctx->time_base = video_stream->time_base;
codec_ctx->pkt_timebase = video_stream->time_base;
codec_ctx->framerate = av_guess_frame_rate(ifmt_ctx, video_stream, NULL);
if ((ret = avcodec_open2(codec_ctx, decoder, NULL)) < 0) {
// Open the decoder
if ((ret = avcodec_open2(dec_ctx_, decoder, nullptr)) < 0) {
spdlog::error("Failed to open decoder for stream #{}", stream_index);
return ret;
}
*fmt_ctx = ifmt_ctx;
*dec_ctx = codec_ctx;
*in_vstream_idx = stream_index;
in_vstream_idx_ = stream_index;
return 0;
}
AVFormatContext *Decoder::get_format_context() const {
return fmt_ctx_;
}
AVCodecContext *Decoder::get_codec_context() const {
return dec_ctx_;
}
int Decoder::get_video_stream_index() const {
return in_vstream_idx_;
}

View File

@ -1,138 +1,152 @@
#include "encoder.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <cstdint>
#include <spdlog/spdlog.h>
extern "C" {
#include <libavutil/pixdesc.h>
#include <libavutil/opt.h>
}
#include <spdlog/spdlog.h>
#include "avutils.h"
#include "conversions.h"
int init_encoder(
Encoder::Encoder()
: ofmt_ctx_(nullptr), enc_ctx_(nullptr), out_vstream_idx_(-1), stream_map_(nullptr) {}
Encoder::~Encoder() {
if (enc_ctx_) {
avcodec_free_context(&enc_ctx_);
}
if (ofmt_ctx_) {
if (!(ofmt_ctx_->oformat->flags & AVFMT_NOFILE)) {
avio_closep(&ofmt_ctx_->pb);
}
avformat_free_context(ofmt_ctx_);
}
if (stream_map_) {
av_free(stream_map_);
}
}
int Encoder::init(
AVBufferRef *hw_ctx,
std::filesystem::path out_fpath,
const std::filesystem::path &out_fpath,
AVFormatContext *ifmt_ctx,
AVFormatContext **ofmt_ctx,
AVCodecContext **enc_ctx,
AVCodecContext *dec_ctx,
EncoderConfig *encoder_config,
int in_vstream_idx,
int *out_vstream_idx,
int **stream_map
int in_vstream_idx
) {
AVFormatContext *fmt_ctx = NULL;
AVCodecContext *codec_ctx = NULL;
int ret;
avformat_alloc_output_context2(&fmt_ctx, NULL, NULL, out_fpath.u8string().c_str());
if (!fmt_ctx) {
// Allocate the output format context
avformat_alloc_output_context2(&ofmt_ctx_, nullptr, nullptr, out_fpath.u8string().c_str());
if (!ofmt_ctx_) {
spdlog::error("Could not create output context");
return AVERROR_UNKNOWN;
}
// Find the encoder
const AVCodec *encoder = avcodec_find_encoder(encoder_config->codec);
if (!encoder) {
spdlog::error(
"Required video encoder not found for vcodec {}",
avcodec_get_name(encoder_config->codec)
"Required video encoder not found for codec {}", avcodec_get_name(encoder_config->codec)
);
return AVERROR_ENCODER_NOT_FOUND;
}
// Create a new video stream in the output file
AVStream *out_vstream = avformat_new_stream(fmt_ctx, NULL);
AVStream *out_vstream = avformat_new_stream(ofmt_ctx_, nullptr);
if (!out_vstream) {
spdlog::error("Failed to allocate the output video stream");
return AVERROR_UNKNOWN;
}
*out_vstream_idx = out_vstream->index;
out_vstream_idx_ = out_vstream->index;
codec_ctx = avcodec_alloc_context3(encoder);
if (!codec_ctx) {
// Allocate the encoder context
enc_ctx_ = avcodec_alloc_context3(encoder);
if (!enc_ctx_) {
spdlog::error("Failed to allocate the encoder context");
return AVERROR(ENOMEM);
}
// Set hardware device context
if (hw_ctx != nullptr) {
codec_ctx->hw_device_ctx = av_buffer_ref(hw_ctx);
enc_ctx_->hw_device_ctx = av_buffer_ref(hw_ctx);
}
// Set encoding parameters
codec_ctx->height = encoder_config->out_height;
codec_ctx->width = encoder_config->out_width;
codec_ctx->sample_aspect_ratio = dec_ctx->sample_aspect_ratio;
codec_ctx->bit_rate = encoder_config->bit_rate;
enc_ctx_->height = encoder_config->out_height;
enc_ctx_->width = encoder_config->out_width;
enc_ctx_->sample_aspect_ratio = dec_ctx->sample_aspect_ratio;
enc_ctx_->bit_rate = encoder_config->bit_rate;
// Set the color properties
codec_ctx->color_range = dec_ctx->color_range;
codec_ctx->color_primaries = dec_ctx->color_primaries;
codec_ctx->color_trc = dec_ctx->color_trc;
codec_ctx->colorspace = dec_ctx->colorspace;
codec_ctx->chroma_sample_location = dec_ctx->chroma_sample_location;
enc_ctx_->color_range = dec_ctx->color_range;
enc_ctx_->color_primaries = dec_ctx->color_primaries;
enc_ctx_->color_trc = dec_ctx->color_trc;
enc_ctx_->colorspace = dec_ctx->colorspace;
enc_ctx_->chroma_sample_location = dec_ctx->chroma_sample_location;
// Set the pixel format
if (encoder_config->pix_fmt != AV_PIX_FMT_NONE) {
// Use the specified pixel format
codec_ctx->pix_fmt = encoder_config->pix_fmt;
enc_ctx_->pix_fmt = encoder_config->pix_fmt;
} else {
codec_ctx->pix_fmt = get_encoder_default_pix_fmt(encoder, dec_ctx->pix_fmt);
if (codec_ctx->pix_fmt == AV_PIX_FMT_NONE) {
// Automatically select the pixel format
enc_ctx_->pix_fmt = get_encoder_default_pix_fmt(encoder, dec_ctx->pix_fmt);
if (enc_ctx_->pix_fmt == AV_PIX_FMT_NONE) {
spdlog::error("Could not get the default pixel format for the encoder");
return AVERROR(EINVAL);
}
spdlog::debug("Auto-selected pixel format: {}", av_get_pix_fmt_name(codec_ctx->pix_fmt));
spdlog::debug("Auto-selected pixel format: {}", av_get_pix_fmt_name(enc_ctx_->pix_fmt));
}
// Set the output video's time base
if (dec_ctx->time_base.num > 0 && dec_ctx->time_base.den > 0) {
codec_ctx->time_base = dec_ctx->time_base;
enc_ctx_->time_base = dec_ctx->time_base;
} else {
codec_ctx->time_base = av_inv_q(av_guess_frame_rate(ifmt_ctx, out_vstream, NULL));
enc_ctx_->time_base = av_inv_q(av_guess_frame_rate(ifmt_ctx, out_vstream, nullptr));
}
// Set the output video's frame rate
if (dec_ctx->framerate.num > 0 && dec_ctx->framerate.den > 0) {
codec_ctx->framerate = dec_ctx->framerate;
enc_ctx_->framerate = dec_ctx->framerate;
} else {
codec_ctx->framerate = av_guess_frame_rate(ifmt_ctx, out_vstream, NULL);
enc_ctx_->framerate = av_guess_frame_rate(ifmt_ctx, out_vstream, nullptr);
}
// Set the CRF and preset for any codecs that support it
std::string crf_str = std::to_string(encoder_config->crf);
av_opt_set(codec_ctx->priv_data, "crf", crf_str.c_str(), 0);
av_opt_set(codec_ctx->priv_data, "preset", encoder_config->preset, 0);
av_opt_set(enc_ctx_->priv_data, "crf", crf_str.c_str(), 0);
av_opt_set(enc_ctx_->priv_data, "preset", encoder_config->preset, 0);
if (fmt_ctx->oformat->flags & AVFMT_GLOBALHEADER) {
codec_ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
// Use global headers if necessary
if (ofmt_ctx_->oformat->flags & AVFMT_GLOBALHEADER) {
enc_ctx_->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
}
if ((ret = avcodec_open2(codec_ctx, encoder, NULL)) < 0) {
// Open the encoder
if ((ret = avcodec_open2(enc_ctx_, encoder, nullptr)) < 0) {
spdlog::error("Cannot open video encoder");
return ret;
}
ret = avcodec_parameters_from_context(out_vstream->codecpar, codec_ctx);
// Copy encoder parameters to output video stream
ret = avcodec_parameters_from_context(out_vstream->codecpar, enc_ctx_);
if (ret < 0) {
spdlog::error("Failed to copy encoder parameters to output video stream");
return ret;
}
out_vstream->time_base = codec_ctx->time_base;
out_vstream->avg_frame_rate = codec_ctx->framerate;
out_vstream->r_frame_rate = codec_ctx->framerate;
out_vstream->time_base = enc_ctx_->time_base;
out_vstream->avg_frame_rate = enc_ctx_->framerate;
out_vstream->r_frame_rate = enc_ctx_->framerate;
// Copy other streams if necessary
if (encoder_config->copy_streams) {
// Allocate the stream map
*stream_map =
reinterpret_cast<int *>(av_malloc_array(ifmt_ctx->nb_streams, sizeof(**stream_map)));
if (!*stream_map) {
// Allocate the stream mape frame o
stream_map_ =
reinterpret_cast<int *>(av_malloc_array(ifmt_ctx->nb_streams, sizeof(*stream_map_)));
if (!stream_map_) {
spdlog::error("Could not allocate stream mapping");
return AVERROR(ENOMEM);
}
@ -144,20 +158,20 @@ int init_encoder(
// Skip the input video stream as it's already processed
if (i == in_vstream_idx) {
(*stream_map)[i] = *out_vstream_idx;
stream_map_[i] = out_vstream_idx_;
continue;
}
// Map only audio and subtitle streams (skip other types)
if (in_codecpar->codec_type != AVMEDIA_TYPE_AUDIO &&
in_codecpar->codec_type != AVMEDIA_TYPE_SUBTITLE) {
(*stream_map)[i] = -1; // Stream not mapped
stream_map_[i] = -1;
spdlog::warn("Skipping unsupported stream type at index: {}", i);
continue;
}
// Create corresponding output stream for audio and subtitle streams
AVStream *out_stream = avformat_new_stream(fmt_ctx, NULL);
AVStream *out_stream = avformat_new_stream(ofmt_ctx_, nullptr);
if (!out_stream) {
spdlog::error("Failed allocating output stream");
return AVERROR_UNKNOWN;
@ -176,32 +190,23 @@ int init_encoder(
// Map input stream index to output stream index
spdlog::debug("Stream mapping: {} (in) -> {} (out)", i, out_stream->index);
(*stream_map)[i] = out_stream->index;
stream_map_[i] = out_stream->index;
}
}
// Open the output file
if (!(fmt_ctx->oformat->flags & AVFMT_NOFILE)) {
ret = avio_open(&fmt_ctx->pb, out_fpath.u8string().c_str(), AVIO_FLAG_WRITE);
if (!(ofmt_ctx_->oformat->flags & AVFMT_NOFILE)) {
ret = avio_open(&ofmt_ctx_->pb, out_fpath.u8string().c_str(), AVIO_FLAG_WRITE);
if (ret < 0) {
spdlog::error("Could not open output file '{}'", out_fpath.u8string().c_str());
spdlog::error("Could not open output file '{}'", out_fpath.u8string());
return ret;
}
}
*ofmt_ctx = fmt_ctx;
*enc_ctx = codec_ctx;
return 0;
}
int write_frame(
AVFrame *frame,
AVCodecContext *enc_ctx,
AVFormatContext *ofmt_ctx,
int out_vstream_idx,
int64_t frame_idx
) {
int Encoder::write_frame(AVFrame *frame, int64_t frame_idx) {
AVFrame *converted_frame = nullptr;
int ret;
@ -211,13 +216,12 @@ int write_frame(
}
// Convert the frame to the encoder's pixel format if needed
if (frame->format != enc_ctx->pix_fmt) {
converted_frame = convert_avframe_pix_fmt(frame, enc_ctx->pix_fmt);
if (frame->format != enc_ctx_->pix_fmt) {
converted_frame = convert_avframe_pix_fmt(frame, enc_ctx_->pix_fmt);
if (!converted_frame) {
spdlog::error("Error converting frame to encoder's pixel format");
return AVERROR_EXTERNAL;
}
converted_frame->pts = frame->pts;
}
@ -227,11 +231,12 @@ int write_frame(
return AVERROR(ENOMEM);
}
// Send the frame to the encoder
if (converted_frame != nullptr) {
ret = avcodec_send_frame(enc_ctx, converted_frame);
ret = avcodec_send_frame(enc_ctx_, converted_frame);
av_frame_free(&converted_frame);
} else {
ret = avcodec_send_frame(enc_ctx, frame);
ret = avcodec_send_frame(enc_ctx_, frame);
}
if (ret < 0) {
spdlog::error("Error sending frame to encoder");
@ -239,8 +244,9 @@ int write_frame(
return ret;
}
// Receive packets from the encoder
while (ret >= 0) {
ret = avcodec_receive_packet(enc_ctx, enc_pkt);
ret = avcodec_receive_packet(enc_ctx_, enc_pkt);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
av_packet_unref(enc_pkt);
break;
@ -252,12 +258,12 @@ int write_frame(
// Rescale packet timestamps
av_packet_rescale_ts(
enc_pkt, enc_ctx->time_base, ofmt_ctx->streams[out_vstream_idx]->time_base
enc_pkt, enc_ctx_->time_base, ofmt_ctx_->streams[out_vstream_idx_]->time_base
);
enc_pkt->stream_index = out_vstream_idx;
enc_pkt->stream_index = out_vstream_idx_;
// Write the packet
ret = av_interleaved_write_frame(ofmt_ctx, enc_pkt);
ret = av_interleaved_write_frame(ofmt_ctx_, enc_pkt);
av_packet_unref(enc_pkt);
if (ret < 0) {
spdlog::error("Error muxing packet");
@ -270,7 +276,7 @@ int write_frame(
return 0;
}
int flush_encoder(AVCodecContext *enc_ctx, AVFormatContext *ofmt_ctx, int out_vstream_idx) {
int Encoder::flush() {
int ret;
AVPacket *enc_pkt = av_packet_alloc();
if (!enc_pkt) {
@ -278,16 +284,17 @@ int flush_encoder(AVCodecContext *enc_ctx, AVFormatContext *ofmt_ctx, int out_vs
return AVERROR(ENOMEM);
}
ret = avcodec_send_frame(enc_ctx, NULL);
// Send a NULL frame to signal the encoder to flush
ret = avcodec_send_frame(enc_ctx_, nullptr);
if (ret < 0) {
spdlog::error("Error sending NULL frame to encoder during flush");
av_packet_free(&enc_pkt);
return ret;
}
// Write the packets to the output file
// Receive and write packets until flushing is complete
while (true) {
ret = avcodec_receive_packet(enc_ctx, enc_pkt);
ret = avcodec_receive_packet(enc_ctx_, enc_pkt);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
av_packet_unref(enc_pkt);
break;
@ -299,12 +306,12 @@ int flush_encoder(AVCodecContext *enc_ctx, AVFormatContext *ofmt_ctx, int out_vs
// Rescale packet timestamps
av_packet_rescale_ts(
enc_pkt, enc_ctx->time_base, ofmt_ctx->streams[out_vstream_idx]->time_base
enc_pkt, enc_ctx_->time_base, ofmt_ctx_->streams[out_vstream_idx_]->time_base
);
enc_pkt->stream_index = out_vstream_idx;
enc_pkt->stream_index = out_vstream_idx_;
// Write the packet
ret = av_interleaved_write_frame(ofmt_ctx, enc_pkt);
ret = av_interleaved_write_frame(ofmt_ctx_, enc_pkt);
av_packet_unref(enc_pkt);
if (ret < 0) {
spdlog::error("Error muxing packet during flush");
@ -316,3 +323,19 @@ int flush_encoder(AVCodecContext *enc_ctx, AVFormatContext *ofmt_ctx, int out_vs
av_packet_free(&enc_pkt);
return 0;
}
AVCodecContext *Encoder::get_encoder_context() const {
return enc_ctx_;
}
AVFormatContext *Encoder::get_format_context() const {
return ofmt_ctx_;
}
int Encoder::get_output_video_stream_index() const {
return out_vstream_idx_;
}
int *Encoder::get_stream_map() const {
return stream_map_;
}

View File

@ -22,65 +22,51 @@ extern "C" {
static int process_frames(
EncoderConfig *encoder_config,
VideoProcessingContext *proc_ctx,
AVFormatContext *ifmt_ctx,
AVFormatContext *ofmt_ctx,
AVCodecContext *dec_ctx,
AVCodecContext *enc_ctx,
Decoder &decoder,
Encoder &encoder,
Filter *filter,
int in_vstream_idx,
int out_vstream_idx,
int *stream_map,
bool benchmark = false
) {
int ret;
char errbuf[AV_ERROR_MAX_STRING_SIZE];
std::vector<AVFrame *> flushed_frames;
int ret = 0;
// Get the total number of frames in the video with OpenCV
// Get required objects
AVFormatContext *ifmt_ctx = decoder.get_format_context();
AVCodecContext *dec_ctx = decoder.get_codec_context();
int in_vstream_idx = decoder.get_video_stream_index();
AVFormatContext *ofmt_ctx = encoder.get_format_context();
int *stream_map = encoder.get_stream_map();
// Get total number of frames
spdlog::debug("Reading total number of frames");
proc_ctx->total_frames = get_video_frame_count(ifmt_ctx, in_vstream_idx);
// Check if the total number of frames is still 0
if (proc_ctx->total_frames <= 0) {
spdlog::warn("Unable to determine the total number of frames");
} else {
spdlog::debug("{} frames to process", proc_ctx->total_frames);
}
AVFrame *frame = av_frame_alloc();
if (frame == nullptr) {
// Allocate frame and packet
auto av_frame_deleter = [](AVFrame *frame) { av_frame_free(&frame); };
std::unique_ptr<AVFrame, decltype(av_frame_deleter)> frame(av_frame_alloc(), av_frame_deleter);
if (!frame) {
ret = AVERROR(ENOMEM);
return ret;
}
AVPacket *packet = av_packet_alloc();
if (packet == nullptr) {
auto av_packet_deleter = [](AVPacket *packet) { av_packet_free(&packet); };
std::unique_ptr<AVPacket, decltype(av_packet_deleter)> packet(
av_packet_alloc(), av_packet_deleter
);
if (!packet) {
spdlog::critical("Could not allocate AVPacket");
av_frame_free(&frame);
return AVERROR(ENOMEM);
}
// Lambda function for cleaning up resources
auto cleanup = [&]() {
if (frame) {
av_frame_free(&frame);
frame = nullptr;
}
if (packet) {
av_packet_free(&packet);
packet = nullptr;
}
for (AVFrame *&flushed_frame : flushed_frames) {
if (flushed_frame) {
av_frame_free(&flushed_frame);
flushed_frame = nullptr;
}
}
};
// Read frames from the input file
while (!proc_ctx->abort) {
ret = av_read_frame(ifmt_ctx, packet);
ret = av_read_frame(ifmt_ctx, packet.get());
if (ret < 0) {
if (ret == AVERROR_EOF) {
spdlog::debug("Reached end of file");
@ -88,17 +74,15 @@ static int process_frames(
}
av_strerror(ret, errbuf, sizeof(errbuf));
spdlog::critical("Error reading packet: {}", errbuf);
cleanup();
return ret;
}
if (packet->stream_index == in_vstream_idx) {
ret = avcodec_send_packet(dec_ctx, packet);
ret = avcodec_send_packet(dec_ctx, packet.get());
if (ret < 0) {
av_strerror(ret, errbuf, sizeof(errbuf));
spdlog::critical("Error sending packet to decoder: {}", errbuf);
av_packet_unref(packet);
cleanup();
av_packet_unref(packet.get());
return ret;
}
@ -108,49 +92,43 @@ static int process_frames(
continue;
}
ret = avcodec_receive_frame(dec_ctx, frame);
ret = avcodec_receive_frame(dec_ctx, frame.get());
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
spdlog::debug("Frame not ready");
break;
} else if (ret < 0) {
av_strerror(ret, errbuf, sizeof(errbuf));
spdlog::critical("Error decoding video frame: {}", errbuf);
av_packet_unref(packet);
cleanup();
av_packet_unref(packet.get());
return ret;
}
AVFrame *processed_frame = nullptr;
ret = filter->process_frame(frame, &processed_frame);
AVFrame *raw_processed_frame = nullptr;
ret = filter->process_frame(frame.get(), &raw_processed_frame);
if (ret < 0 && ret != AVERROR(EAGAIN)) {
av_strerror(ret, errbuf, sizeof(errbuf));
av_frame_free(&processed_frame);
av_packet_unref(packet);
cleanup();
av_packet_unref(packet.get());
return ret;
} else if (ret == 0 && processed_frame != nullptr) {
} else if (ret == 0 && raw_processed_frame != nullptr) {
auto processed_frame = std::unique_ptr<AVFrame, decltype(av_frame_deleter)>(
raw_processed_frame, av_frame_deleter
);
if (!benchmark) {
ret = write_frame(
processed_frame,
enc_ctx,
ofmt_ctx,
out_vstream_idx,
proc_ctx->processed_frames
);
ret =
encoder.write_frame(processed_frame.get(), proc_ctx->processed_frames);
if (ret < 0) {
av_strerror(ret, errbuf, sizeof(errbuf));
spdlog::critical("Error encoding/writing frame: {}", errbuf);
av_frame_free(&processed_frame);
av_packet_unref(packet);
cleanup();
av_packet_unref(packet.get());
return ret;
}
}
av_frame_free(&processed_frame);
proc_ctx->processed_frames++;
}
av_frame_unref(frame);
av_frame_unref(frame.get());
spdlog::debug(
"Processed frame {}/{}", proc_ctx->processed_frames, proc_ctx->total_frames
);
@ -160,58 +138,54 @@ static int process_frames(
int out_stream_index = stream_map[packet->stream_index];
AVStream *out_stream = ofmt_ctx->streams[out_stream_index];
av_packet_rescale_ts(packet, in_stream->time_base, out_stream->time_base);
av_packet_rescale_ts(packet.get(), in_stream->time_base, out_stream->time_base);
packet->stream_index = out_stream_index;
ret = av_interleaved_write_frame(ofmt_ctx, packet);
ret = av_interleaved_write_frame(ofmt_ctx, packet.get());
if (ret < 0) {
av_strerror(ret, errbuf, sizeof(errbuf));
spdlog::critical("Error muxing audio/subtitle packet: {}", errbuf);
av_packet_unref(packet);
cleanup();
av_packet_unref(packet.get());
return ret;
}
}
av_packet_unref(packet);
av_packet_unref(packet.get());
}
// Flush the filter
ret = filter->flush(flushed_frames);
std::vector<AVFrame *> raw_flushed_frames;
ret = filter->flush(raw_flushed_frames);
if (ret < 0) {
av_strerror(ret, errbuf, sizeof(errbuf));
spdlog::critical("Error flushing filter: {}", errbuf);
cleanup();
return ret;
}
// Wrap flushed frames in unique_ptrs
std::vector<std::unique_ptr<AVFrame, decltype(av_frame_deleter)>> flushed_frames;
for (AVFrame *raw_frame : raw_flushed_frames) {
flushed_frames.emplace_back(raw_frame, av_frame_deleter);
}
// Encode and write all flushed frames
for (AVFrame *&flushed_frame : flushed_frames) {
ret = write_frame(
flushed_frame, enc_ctx, ofmt_ctx, out_vstream_idx, proc_ctx->processed_frames
);
for (auto &flushed_frame : flushed_frames) {
ret = encoder.write_frame(flushed_frame.get(), proc_ctx->processed_frames);
if (ret < 0) {
av_strerror(ret, errbuf, sizeof(errbuf));
spdlog::critical("Error encoding/writing flushed frame: {}", errbuf);
av_frame_free(&flushed_frame);
flushed_frame = nullptr;
cleanup();
return ret;
}
av_frame_free(&flushed_frame);
flushed_frame = nullptr;
proc_ctx->processed_frames++;
}
// Flush the encoder
ret = flush_encoder(enc_ctx, ofmt_ctx, out_vstream_idx);
ret = encoder.flush();
if (ret < 0) {
av_strerror(ret, errbuf, sizeof(errbuf));
spdlog::critical("Error flushing encoder: {}", errbuf);
cleanup();
return ret;
}
cleanup();
return ret;
}
@ -226,55 +200,10 @@ extern "C" int process_video(
EncoderConfig *encoder_config,
VideoProcessingContext *proc_ctx
) {
AVFormatContext *ifmt_ctx = nullptr;
AVFormatContext *ofmt_ctx = nullptr;
AVCodecContext *dec_ctx = nullptr;
AVCodecContext *enc_ctx = nullptr;
AVBufferRef *hw_ctx = nullptr;
int *stream_map = nullptr;
Filter *filter = nullptr;
int in_vstream_idx = -1;
int out_vstream_idx = -1;
char errbuf[AV_ERROR_MAX_STRING_SIZE];
int ret = 0;
// Lambda function for cleaning up resources
auto cleanup = [&]() {
if (ifmt_ctx) {
avformat_close_input(&ifmt_ctx);
ifmt_ctx = nullptr;
}
if (ofmt_ctx && !(ofmt_ctx->oformat->flags & AVFMT_NOFILE)) {
avio_closep(&ofmt_ctx->pb);
ofmt_ctx->pb = nullptr;
}
if (ofmt_ctx) {
avformat_free_context(ofmt_ctx);
ofmt_ctx = nullptr;
}
if (dec_ctx) {
avcodec_free_context(&dec_ctx);
dec_ctx = nullptr;
}
if (enc_ctx) {
avcodec_free_context(&enc_ctx);
enc_ctx = nullptr;
}
if (hw_ctx) {
av_buffer_unref(&hw_ctx);
hw_ctx = nullptr;
}
if (stream_map) {
av_free(stream_map);
stream_map = nullptr;
}
if (filter) {
delete filter;
filter = nullptr;
}
};
// Set the log level for FFmpeg and spdlog (libvideo2x)
// Set the log level for FFmpeg and spdlog
switch (log_level) {
case LIBVIDEO2X_LOG_LEVEL_TRACE:
av_log_set_level(AV_LOG_TRACE);
@ -314,26 +243,38 @@ extern "C" int process_video(
std::filesystem::path in_fpath(in_fname);
std::filesystem::path out_fpath(out_fname);
auto hw_ctx_deleter = [](AVBufferRef *ref) {
if (ref) {
av_buffer_unref(&ref);
}
};
std::unique_ptr<AVBufferRef, decltype(hw_ctx_deleter)> hw_ctx(nullptr, hw_ctx_deleter);
// Initialize hardware device context
if (hw_type != AV_HWDEVICE_TYPE_NONE) {
ret = av_hwdevice_ctx_create(&hw_ctx, hw_type, NULL, NULL, 0);
AVBufferRef *tmp_hw_ctx = nullptr;
ret = av_hwdevice_ctx_create(&tmp_hw_ctx, hw_type, NULL, NULL, 0);
if (ret < 0) {
av_strerror(ret, errbuf, sizeof(errbuf));
spdlog::critical("Error initializing hardware device context: {}", errbuf);
cleanup();
return ret;
}
hw_ctx.reset(tmp_hw_ctx);
}
// Initialize input
ret = init_decoder(hw_type, hw_ctx, in_fpath, &ifmt_ctx, &dec_ctx, &in_vstream_idx);
// Initialize input decoder
Decoder decoder;
ret = decoder.init(hw_type, hw_ctx.get(), in_fpath);
if (ret < 0) {
av_strerror(ret, errbuf, sizeof(errbuf));
spdlog::critical("Failed to initialize decoder: {}", errbuf);
cleanup();
return ret;
}
AVFormatContext *ifmt_ctx = decoder.get_format_context();
AVCodecContext *dec_ctx = decoder.get_codec_context();
int in_vstream_idx = decoder.get_video_stream_index();
// Initialize output dimensions based on filter configuration
int output_width = 0, output_height = 0;
switch (filter_config->filter_type) {
@ -347,116 +288,85 @@ extern "C" int process_video(
break;
default:
spdlog::critical("Unknown filter type");
cleanup();
return -1;
}
spdlog::debug("Output video dimensions: {}x{}", output_width, output_height);
// Initialize output encoder
// Update encoder configuration with output dimensions
encoder_config->out_width = output_width;
encoder_config->out_height = output_height;
ret = init_encoder(
hw_ctx,
out_fpath,
ifmt_ctx,
&ofmt_ctx,
&enc_ctx,
dec_ctx,
encoder_config,
in_vstream_idx,
&out_vstream_idx,
&stream_map
);
// Initialize the encoder
Encoder encoder;
ret = encoder.init(hw_ctx.get(), out_fpath, ifmt_ctx, dec_ctx, encoder_config, in_vstream_idx);
if (ret < 0) {
av_strerror(ret, errbuf, sizeof(errbuf));
spdlog::critical("Failed to initialize encoder: {}", errbuf);
cleanup();
return ret;
}
// Write the output file header
ret = avformat_write_header(ofmt_ctx, NULL);
ret = avformat_write_header(encoder.get_format_context(), NULL);
if (ret < 0) {
av_strerror(ret, errbuf, sizeof(errbuf));
spdlog::critical("Error occurred when opening output file: {}", errbuf);
cleanup();
return ret;
}
// Create and initialize the appropriate filter
std::unique_ptr<Filter> filter;
if (filter_config->filter_type == FILTER_LIBPLACEBO) {
const auto &config = filter_config->config.libplacebo;
if (!config.shader_path) {
spdlog::critical("Shader path must be provided for the libplacebo filter");
cleanup();
return -1;
}
filter = new LibplaceboFilter{
filter = std::make_unique<LibplaceboFilter>(
vk_device_index,
std::filesystem::path(config.shader_path),
config.out_width,
config.out_height
};
);
} else if (filter_config->filter_type == FILTER_REALESRGAN) {
const auto &config = filter_config->config.realesrgan;
if (!config.model_name) {
spdlog::critical("Model name must be provided for the RealESRGAN filter");
cleanup();
return -1;
}
filter = new RealesrganFilter{
filter = std::make_unique<RealesrganFilter>(
static_cast<int>(vk_device_index),
config.tta_mode,
config.scaling_factor,
config.model_name
};
);
} else {
spdlog::critical("Unknown filter type");
cleanup();
return -1;
}
// Check if the filter instance was created successfully
if (filter == nullptr) {
spdlog::critical("Failed to create filter instance");
cleanup();
return -1;
}
// Initialize the filter
ret = filter->init(dec_ctx, enc_ctx, hw_ctx);
ret = filter->init(dec_ctx, encoder.get_encoder_context(), hw_ctx.get());
if (ret < 0) {
spdlog::critical("Failed to initialize filter");
cleanup();
return ret;
}
// Process frames
ret = process_frames(
encoder_config,
proc_ctx,
ifmt_ctx,
ofmt_ctx,
dec_ctx,
enc_ctx,
filter,
in_vstream_idx,
out_vstream_idx,
stream_map,
benchmark
);
// Process frames using the encoder and decoder
ret = process_frames(encoder_config, proc_ctx, decoder, encoder, filter.get(), benchmark);
if (ret < 0) {
av_strerror(ret, errbuf, sizeof(errbuf));
spdlog::critical("Error processing frames: {}", errbuf);
cleanup();
return ret;
}
// Write the output file trailer
av_write_trailer(ofmt_ctx);
// Cleanup before returning
cleanup();
av_write_trailer(encoder.get_format_context());
if (ret < 0 && ret != AVERROR_EOF) {
av_strerror(ret, errbuf, sizeof(errbuf));