From 5884dd1ba4e0252cb804fccc2924a30fef6f6f83 Mon Sep 17 00:00:00 2001 From: k4yt3x Date: Mon, 16 Dec 2024 00:00:00 +0000 Subject: [PATCH] fix(encoder): always use the calculated PTS with corrected math Signed-off-by: k4yt3x --- CHANGELOG.md | 6 ++++++ Makefile | 20 +++++++++----------- include/libvideo2x/libvideo2x.h | 8 ++++---- include/libvideo2x/processor.h | 2 +- src/encoder.cpp | 10 ++++++---- src/filter_realesrgan.cpp | 2 +- src/libvideo2x.cpp | 30 ++++++++++++------------------ 7 files changed, 39 insertions(+), 39 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3439200..faac98a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [Unreleased] + +### Fixed + +- Make the encoder always use the calculated PTS with corrected math. + ## [6.2.0] - 2024-12-11 ### Added diff --git a/Makefile b/Makefile index 53d347a..30f00a2 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,6 @@ heaptrack-realesrgan heaptrack-libplacebo heaptrack-rife BINDIR=build -CC=clang CXX=clang++ TEST_VIDEO=data/standard-test.mp4 @@ -13,7 +12,6 @@ TEST_OUTPUT=data/output.mp4 build: cmake -S . -B $(BINDIR) \ -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \ - -DCMAKE_C_COMPILER=$(CC) \ -DCMAKE_CXX_COMPILER=$(CXX) \ -DCMAKE_BUILD_TYPE=Release cmake --build $(BINDIR) --config Release --parallel @@ -22,17 +20,18 @@ build: static: cmake -S . -B $(BINDIR) \ -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \ - -DCMAKE_C_COMPILER=$(CC) \ -DCMAKE_CXX_COMPILER=$(CXX) \ -DCMAKE_BUILD_TYPE=Release \ - -DBUILD_SHARED_LIBS=OFF + -DBUILD_SHARED_LIBS=OFF \ + -DUSE_SYSTEM_NCNN=OFF \ + -DUSE_SYSTEM_SPDLOG=OFF \ + -DUSE_SYSTEM_BOOST=OFF cmake --build $(BINDIR) --config Release --parallel cp $(BINDIR)/compile_commands.json . debug: cmake -S . -B $(BINDIR) \ -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \ - -DCMAKE_C_COMPILER=$(CC) \ -DCMAKE_CXX_COMPILER=$(CXX) \ -DCMAKE_BUILD_TYPE=Debug cmake --build $(BINDIR) --config Debug --parallel @@ -71,8 +70,7 @@ debian: libomp-dev \ libspdlog-dev \ libboost-program-options-dev - cmake -B /tmp/build -S . -DUSE_SYSTEM_NCNN=OFF \ - -DCMAKE_C_COMPILER=$(CC) -DCMAKE_CXX_COMPILER=$(CXX) \ + cmake -B /tmp/build -S . -DUSE_SYSTEM_NCNN=OFF -DCMAKE_CXX_COMPILER=$(CXX) \ -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/tmp/install \ -DINSTALL_BIN_DESTINATION=. -DINSTALL_INCLUDE_DESTINATION=include \ -DINSTALL_LIB_DESTINATION=. -DINSTALL_MODEL_DESTINATION=. @@ -93,8 +91,8 @@ ubuntu2404: libomp-dev \ libboost-program-options-dev cmake -B build -S . -DUSE_SYSTEM_NCNN=OFF -DUSE_SYSTEM_SPDLOG=OFF -DSPDLOG_NO_EXCEPTIONS=ON \ - -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ \ - -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=video2x-linux-ubuntu-amd64/usr + -DCMAKE_CXX_COMPILER=g++ -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_INSTALL_PREFIX=video2x-linux-ubuntu-amd64/usr cmake --build build --config Release --target install --parallel mkdir -p video2x-linux-ubuntu-amd64/DEBIAN cp packaging/debian/control.ubuntu2404 video2x-linux-ubuntu-amd64/DEBIAN/control @@ -118,8 +116,8 @@ ubuntu2204: libomp-dev \ libboost-program-options-dev cmake -B build -S . -DUSE_SYSTEM_NCNN=OFF -DUSE_SYSTEM_SPDLOG=OFF -DSPDLOG_NO_EXCEPTIONS=ON \ - -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ \ - -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=video2x-linux-ubuntu-amd64/usr + -DCMAKE_CXX_COMPILER=g++ -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_INSTALL_PREFIX=video2x-linux-ubuntu-amd64/usr cmake --build build --config Release --target install --parallel mkdir -p video2x-linux-ubuntu-amd64/DEBIAN cp packaging/debian/control.ubuntu2204 video2x-linux-ubuntu-amd64/DEBIAN/control diff --git a/include/libvideo2x/libvideo2x.h b/include/libvideo2x/libvideo2x.h index 03c1736..c4b8027 100644 --- a/include/libvideo2x/libvideo2x.h +++ b/include/libvideo2x/libvideo2x.h @@ -39,7 +39,7 @@ class LIBVIDEO2X_API VideoProcessor { VideoProcessor( const ProcessorConfig proc_cfg, const EncoderConfig enc_cfg, - const uint32_t vk_device_index = 0, + const uint32_t vk_device_idx = 0, const AVHWDeviceType hw_device_type = AV_HWDEVICE_TYPE_NONE, const Video2xLogLevel = Video2xLogLevel::Info, const bool benchmark = false @@ -55,7 +55,7 @@ class LIBVIDEO2X_API VideoProcessor { void abort() { state_.store(VideoProcessorState::Aborted); } VideoProcessorState get_state() const { return state_.load(); } - int64_t get_processed_frames() const { return frame_index_.load(); } + int64_t get_processed_frames() const { return frame_idx_.load(); } int64_t get_total_frames() const { return total_frames_.load(); } private: @@ -88,11 +88,11 @@ class LIBVIDEO2X_API VideoProcessor { ProcessorConfig proc_cfg_; EncoderConfig enc_cfg_; - uint32_t vk_device_index_ = 0; + uint32_t vk_device_idx_ = 0; AVHWDeviceType hw_device_type_ = AV_HWDEVICE_TYPE_NONE; bool benchmark_ = false; std::atomic state_ = VideoProcessorState::Idle; - std::atomic frame_index_ = 0; + std::atomic frame_idx_ = 0; std::atomic total_frames_ = 0; }; diff --git a/include/libvideo2x/processor.h b/include/libvideo2x/processor.h index 3edda3e..6ebd700 100644 --- a/include/libvideo2x/processor.h +++ b/include/libvideo2x/processor.h @@ -55,7 +55,7 @@ class Processor { public: virtual ~Processor() = default; virtual int init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx, AVBufferRef *hw_ctx) = 0; - virtual int flush(std::vector &_) { return 0; } + virtual int flush(std::vector &) { return 0; } virtual ProcessingMode get_processing_mode() const = 0; virtual ProcessorType get_processor_type() const = 0; virtual void get_output_dimensions( diff --git a/src/encoder.cpp b/src/encoder.cpp index a83a055..cf1195f 100644 --- a/src/encoder.cpp +++ b/src/encoder.cpp @@ -253,10 +253,12 @@ int Encoder::write_frame(AVFrame *frame, int64_t frame_idx) { AVFrame *converted_frame = nullptr; int ret; - // Set the frame's presentation timestamp if not set - if (frame->pts <= 0) { - frame->pts = frame_idx; - } + // Let the encoder decide the frame type + frame->pict_type = AV_PICTURE_TYPE_NONE; + + // Calculate this frame's presentation timestamp (PTS) + frame->pts = frame_idx * (enc_ctx_->time_base.den * enc_ctx_->framerate.den / + (enc_ctx_->time_base.num * enc_ctx_->framerate.num)); // Convert the frame to the encoder's pixel format if needed if (frame->format != enc_ctx_->pix_fmt) { diff --git a/src/filter_realesrgan.cpp b/src/filter_realesrgan.cpp index 685931c..510415a 100644 --- a/src/filter_realesrgan.cpp +++ b/src/filter_realesrgan.cpp @@ -28,7 +28,7 @@ FilterRealesrgan::~FilterRealesrgan() { } } -int FilterRealesrgan::init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx, AVBufferRef *_) { +int FilterRealesrgan::init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx, AVBufferRef *) { // Construct the model paths using std::filesystem std::filesystem::path model_param_path; std::filesystem::path model_bin_path; diff --git a/src/libvideo2x.cpp b/src/libvideo2x.cpp index 07f1acf..0081b58 100644 --- a/src/libvideo2x.cpp +++ b/src/libvideo2x.cpp @@ -16,14 +16,14 @@ extern "C" { VideoProcessor::VideoProcessor( const ProcessorConfig proc_cfg, const EncoderConfig enc_cfg, - const uint32_t vk_device_index, + const uint32_t vk_device_idx, const AVHWDeviceType hw_device_type, const Video2xLogLevel log_level, const bool benchmark ) : proc_cfg_(proc_cfg), enc_cfg_(enc_cfg), - vk_device_index_(vk_device_index), + vk_device_idx_(vk_device_idx), hw_device_type_(hw_device_type), benchmark_(benchmark) { set_log_level(log_level); @@ -78,7 +78,7 @@ int VideoProcessor::process( // Create and initialize the appropriate filter std::unique_ptr processor( - ProcessorFactory::instance().create_processor(proc_cfg_, vk_device_index_) + ProcessorFactory::instance().create_processor(proc_cfg_, vk_device_idx_) ); if (processor == nullptr) { return handle_error(-1, "Failed to create filter instance"); @@ -254,8 +254,8 @@ int VideoProcessor::process_frames( return ret; } av_frame_unref(frame.get()); - frame_index_++; - spdlog::debug("Processed frame {}/{}", frame_index_.load(), total_frames_.load()); + frame_idx_++; + spdlog::debug("Processed frame {}/{}", frame_idx_.load(), total_frames_.load()); } } else if (enc_cfg_.copy_streams && stream_map[packet->stream_index] >= 0) { ret = write_raw_packet(packet.get(), ifmt_ctx, ofmt_ctx, stream_map); @@ -287,7 +287,7 @@ int VideoProcessor::process_frames( if (ret < 0) { return ret; } - frame_index_++; + frame_idx_++; } // Flush the encoder @@ -306,9 +306,7 @@ int VideoProcessor::write_frame(AVFrame *frame, Encoder &encoder) { int ret = 0; if (!benchmark_) { - // Set the frame type to none to let the encoder decide - frame->pict_type = AV_PICTURE_TYPE_NONE; - ret = encoder.write_frame(frame, frame_index_); + ret = encoder.write_frame(frame, frame_idx_); if (ret < 0) { av_strerror(ret, errbuf, sizeof(errbuf)); spdlog::critical("Error encoding/writing frame: {}", errbuf); @@ -327,11 +325,11 @@ int VideoProcessor::write_raw_packet( int ret = 0; AVStream *in_stream = ifmt_ctx->streams[packet->stream_index]; - int out_stream_index = stream_map[packet->stream_index]; - AVStream *out_stream = ofmt_ctx->streams[out_stream_index]; + int out_stream_idx = stream_map[packet->stream_index]; + AVStream *out_stream = ofmt_ctx->streams[out_stream_idx]; av_packet_rescale_ts(packet, in_stream->time_base, out_stream->time_base); - packet->stream_index = out_stream_index; + packet->stream_index = out_stream_idx; ret = av_interleaved_write_frame(ofmt_ctx, packet); if (ret < 0) { @@ -391,9 +389,7 @@ int VideoProcessor::process_interpolation( float frame_diff = get_frame_diff(prev_frame.get(), frame); if (frame_diff > proc_cfg_.scn_det_thresh) { spdlog::debug( - "Scene change detected ({:.2f}%), skipping frame {}", - frame_diff, - frame_index_.load() + "Scene change detected ({:.2f}%), skipping frame {}", frame_diff, frame_idx_.load() ); skip_frame = true; } @@ -425,19 +421,17 @@ int VideoProcessor::process_interpolation( proc_frame, &av_frame_deleter ); - processed_frame->pts = frame_index_; ret = write_frame(processed_frame.get(), encoder); if (ret < 0) { return ret; } } - frame_index_++; + frame_idx_++; current_time_step += time_step; } // Write the original frame - frame->pts = frame_index_; ret = write_frame(frame, encoder); // Update the previous frame with the current frame