From e1e8ed864d300498117fbe0f85212d47d96b0e7f Mon Sep 17 00:00:00 2001 From: K4YT3X Date: Sun, 22 Dec 2024 05:53:09 -0500 Subject: [PATCH] perf: improve vectorization optimizations and add function multi-versioning (#1271) Signed-off-by: k4yt3x --- .github/workflows/build.yml | 9 ++-- .github/workflows/release.yml | 9 ++-- CHANGELOG.md | 10 ++++ CMakeLists.txt | 95 +++++++++++++++++++++++++---------- Makefile | 27 +++++----- packaging/arch/PKGBUILD | 9 ++-- src/avutils.cpp | 1 + src/conversions.cpp | 3 ++ src/decoder.cpp | 2 +- src/encoder.cpp | 2 + src/libvideo2x.cpp | 1 + 11 files changed, 116 insertions(+), 52 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 5b57654..8435f2a 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -49,11 +49,12 @@ jobs: - name: Build Video2X run: | mkdir -p /tmp/build /tmp/install - cmake -B /tmp/build -S . -DUSE_SYSTEM_NCNN=OFF -DUSE_SYSTEM_SPDLOG=OFF \ + cmake -B /tmp/build -S . -DCMAKE_BUILD_TYPE=Debug \ -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ \ - -DCMAKE_BUILD_TYPE=Debug -DCMAKE_INSTALL_PREFIX=/tmp/install \ + -DVIDEO2X_USE_EXTERNAL_NCNN=OFF -DVIDEO2X_USE_EXTERNAL_SPDLOG=OFF \ -DINSTALL_BIN_DESTINATION=. -DINSTALL_INCLUDE_DESTINATION=include \ - -DINSTALL_LIB_DESTINATION=. -DINSTALL_MODEL_DESTINATION=. + -DINSTALL_LIB_DESTINATION=. -DINSTALL_MODEL_DESTINATION=. \ + -DCMAKE_INSTALL_PREFIX=/tmp/install cmake --build /tmp/build --config Debug --target install - name: Upload artifacts @@ -97,7 +98,7 @@ jobs: shell: pwsh run: | cmake -S . -B build ` - -DUSE_SYSTEM_NCNN=OFF -DUSE_SYSTEM_SPDLOG=OFF -DUSE_SYSTEM_BOOST=OFF ` + -DVIDEO2X_USE_EXTERNAL_NCNN=OFF -DVIDEO2X_USE_EXTERNAL_SPDLOG=OFF -DVIDEO2X_USE_EXTERNAL_BOOST=OFF ` -DCMAKE_BUILD_TYPE=Debug -DCMAKE_INSTALL_PREFIX=build/video2x_install cmake --build build --config Debug --parallel --target install diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 564066e..239a518 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -53,9 +53,10 @@ jobs: - name: Build Video2X run: | - cmake -B build -S . -DUSE_SYSTEM_NCNN=OFF -DUSE_SYSTEM_SPDLOG=OFF -DSPDLOG_NO_EXCEPTIONS=ON \ - -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ \ - -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=build/video2x-linux-ubuntu-2404-amd64/usr + cmake -B build -S . -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ \ + -DVIDEO2X_USE_EXTERNAL_NCNN=OFF -DVIDEO2X_USE_EXTERNAL_SPDLOG=OFF \ + -DCMAKE_INSTALL_PREFIX=build/video2x-linux-ubuntu-2404-amd64/usr cmake --build build --config Release --target install --parallel mkdir -p build/video2x-linux-ubuntu-2404-amd64/DEBIAN cp packaging/debian/control build/video2x-linux-ubuntu-2404-amd64/DEBIAN/control @@ -107,7 +108,7 @@ jobs: shell: pwsh run: | cmake -S . -B build ` - -DUSE_SYSTEM_NCNN=OFF -DUSE_SYSTEM_SPDLOG=OFF -DUSE_SYSTEM_BOOST=OFF ` + -DVIDEO2X_USE_EXTERNAL_NCNN=OFF -DVIDEO2X_USE_EXTERNAL_SPDLOG=OFF -DVIDEO2X_USE_EXTERNAL_BOOST=OFF ` -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=build/video2x_install cmake --build build --config Release --parallel --target install diff --git a/CHANGELOG.md b/CHANGELOG.md index 2a107a7..d8d5a47 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,16 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [Unreleased] + +### Added + +- Multi-versioning to critical functions to enhance performance in generic architecture builds. + +### Changed + +- Improve the CMake optimization flags and option names. + ## [6.3.1] - 2024-12-21 ### Fixed diff --git a/CMakeLists.txt b/CMakeLists.txt index 0e89eca..c327f30 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -16,6 +16,20 @@ if(NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE Release) endif() +# Build options +option(BUILD_SHARED_LIBS "Build libvideo2x as a shared library" ON) +option(VIDEO2X_BUILD_CLI "Build the video2x command line interface executable" ON) + +option(VIDEO2X_ENABLE_NATIVE "Enable native optimizations (-march=native)" OFF) +option(VIDEO2X_ENABLE_X86_64_V4 "Enable x86-64-v4 optimizations (-march=x86-64-v4)" OFF) +option(VIDEO2X_ENABLE_AVX512F "Enable AVX-512 foundation optimizations (-march=avx512f)" OFF) +option(VIDEO2X_ENABLE_X86_64_V3 "Enable x86-64-v3 optimizations (-march=x86-64-v3)" OFF) +option(VIDEO2X_ENABLE_AVX2 "Enable AVX2 optimizations (-march=avx2)" OFF) + +option(VIDEO2X_USE_EXTERNAL_NCNN "Use the system-provided ncnn library" ON) +option(VIDEO2X_USE_EXTERNAL_SPDLOG "Use the system-provided spdlog library" ON) +option(VIDEO2X_USE_EXTERNAL_BOOST "Use the system-provided Boost library" ON) + # Set global compile options for all targets if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") add_compile_options(/W4 /permissive-) @@ -25,30 +39,50 @@ endif() # Set the default optimization flags for Release builds if(CMAKE_BUILD_TYPE STREQUAL "Release") + # Set the optimization flags for each compiler if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") add_compile_options(/Ox /Ot /GL /DNDEBUG) add_link_options(/LTCG /OPT:REF /OPT:ICF) elseif(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang") - add_compile_options(-O3 -march=native -ffunction-sections -fdata-sections) + add_compile_options(-O3 -ffunction-sections -fdata-sections) add_link_options(-Wl,-s -flto -Wl,--gc-sections) endif() endif() -# Build options -option(BUILD_SHARED_LIBS "Build libvideo2x as a shared library" ON) -option(BUILD_VIDEO2X_CLI "Build the video2x executable" ON) -option(USE_SYSTEM_NCNN "Use system ncnn library" ON) -option(USE_SYSTEM_SPDLOG "Use system spdlog library" ON) -option(USE_SYSTEM_BOOST "Use system Boost library" ON) +# Enable the requested architecture-specific optimizations +if(VIDEO2X_ENABLE_NATIVE) + if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") + add_compile_options(/arch:NATIVE) + elseif(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang") + add_compile_options(-march=native) + endif() +elseif(VIDEO2X_ENABLE_X86_64_V4) + if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") + add_compile_options(/arch:AVX2) + elseif(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang") + add_compile_options(-march=x86-64-v4) + endif() +elseif(VIDEO2X_ENABLE_AVX512F) + if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") + add_compile_options(/arch:AVX512) + elseif(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang") + add_compile_options(-mavx512f) + endif() +elseif(VIDEO2X_ENABLE_X86_64_V3) + if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") + add_compile_options(/arch:AVX2) + elseif(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang") + add_compile_options(-march=x86-64-v3) + endif() +elseif(VIDEO2X_ENABLE_AVX2) + if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") + add_compile_options(/arch:AVX2) + elseif(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang") + add_compile_options(-mavx2) + endif() +endif() -# Generate the version header file -configure_file( - "${CMAKE_CURRENT_SOURCE_DIR}/include/libvideo2x/version.h.in" - "${CMAKE_CURRENT_BINARY_DIR}/libvideo2x/version.h" - @ONLY -) - -# Find the required packages +# Define lists to store include directories and libraries set(LIBVIDEO2X_INCLUDE_DIRS) set(LIBVIDEO2X_LIBS) set(VIDEO2X_INCLUDE_DIRS) @@ -109,7 +143,7 @@ else() endif() # WIN32 # Find ncnn package -if(USE_SYSTEM_NCNN) +if(VIDEO2X_USE_EXTERNAL_NCNN) find_package(ncnn REQUIRED) else() option(NCNN_INSTALL_SDK "" OFF) @@ -208,12 +242,14 @@ else() endif() # spdlog -if(USE_SYSTEM_SPDLOG) +if(VIDEO2X_USE_EXTERNAL_SPDLOG) find_package(spdlog REQUIRED) list(APPEND LIBVIDEO2X_INCLUDE_DIRS ${spdlog_INCLUDE_DIRS}) list(APPEND VIDEO2X_INCLUDE_DIRS ${spdlog_INCLUDE_DIRS}) set(SPDLOG_LIB spdlog::spdlog) else() + # spdlog exceptions are incompatible with ncnn + option(SPDLOG_NO_EXCEPTIONS "" OFF) add_subdirectory(third_party/spdlog) set(SPDLOG_LIB spdlog::spdlog_header_only) endif() @@ -221,13 +257,13 @@ list(APPEND LIBVIDEO2X_LIBS ${SPDLOG_LIB}) list(APPEND VIDEO2X_LIBS ${SPDLOG_LIB}) # Find dependencies required for the CLI -if(BUILD_VIDEO2X_CLI) +if(VIDEO2X_BUILD_CLI) # Vulkan find_package(Vulkan REQUIRED) list(APPEND VIDEO2X_LIBS Vulkan::Vulkan) # Boost - if(USE_SYSTEM_BOOST) + if(VIDEO2X_USE_EXTERNAL_BOOST) find_package(Boost REQUIRED COMPONENTS program_options) list(APPEND LIBVIDEO2X_INCLUDE_DIRS ${Boost_INCLUDE_DIRS}) else() @@ -255,7 +291,7 @@ ExternalProject_Add( -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DCMAKE_INSTALL_PREFIX=${CMAKE_BINARY_DIR}/realesrgan_install -DCMAKE_POSITION_INDEPENDENT_CODE=ON - -DUSE_SYSTEM_NCNN=${USE_SYSTEM_NCNN} + -DUSE_SYSTEM_NCNN=${VIDEO2X_USE_EXTERNAL_NCNN} BUILD_ALWAYS ON INSTALL_COMMAND ${CMAKE_COMMAND} --build . --target install --config ${CMAKE_BUILD_TYPE} ) @@ -268,7 +304,7 @@ ExternalProject_Add( -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DCMAKE_INSTALL_PREFIX=${CMAKE_BINARY_DIR}/realcugan_install -DCMAKE_POSITION_INDEPENDENT_CODE=ON - -DUSE_SYSTEM_NCNN=${USE_SYSTEM_NCNN} + -DUSE_SYSTEM_NCNN=${VIDEO2X_USE_EXTERNAL_NCNN} BUILD_ALWAYS ON INSTALL_COMMAND ${CMAKE_COMMAND} --build . --target install --config ${CMAKE_BUILD_TYPE} ) @@ -281,7 +317,7 @@ ExternalProject_Add( -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DCMAKE_INSTALL_PREFIX=${CMAKE_BINARY_DIR}/rife_install -DCMAKE_POSITION_INDEPENDENT_CODE=ON - -DUSE_SYSTEM_NCNN=${USE_SYSTEM_NCNN} + -DUSE_SYSTEM_NCNN=${VIDEO2X_USE_EXTERNAL_NCNN} BUILD_ALWAYS ON INSTALL_COMMAND ${CMAKE_COMMAND} --build . --target install --config ${CMAKE_BUILD_TYPE} ) @@ -305,6 +341,13 @@ endif() # Ensure that the shared library is built after the external projects add_dependencies(libvideo2x realesrgan realcugan rife) +# Generate the version header file +configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/include/libvideo2x/version.h.in" + "${CMAKE_CURRENT_BINARY_DIR}/libvideo2x/version.h" + @ONLY +) + # Include directories for the shared library target_include_directories(libvideo2x PRIVATE ${LIBVIDEO2X_INCLUDE_DIRS} @@ -335,7 +378,7 @@ list(APPEND LIBVIDEO2X_LIBS ${REALESRGAN_LIB} ${REALCUGAN_LIB} ${RIFE_LIB}) target_link_libraries(libvideo2x PRIVATE ${LIBVIDEO2X_LIBS}) if(NOT WIN32) - if(USE_SYSTEM_NCNN) + if(VIDEO2X_USE_EXTERNAL_NCNN) target_link_libraries(libvideo2x PUBLIC ncnn) else() target_link_libraries(libvideo2x PRIVATE ncnn) @@ -343,7 +386,7 @@ if(NOT WIN32) endif() # Create the executable 'video2x' -if(BUILD_VIDEO2X_CLI) +if(VIDEO2X_BUILD_CLI) file(GLOB VIDEO2X_SOURCES tools/video2x/src/*.cpp) add_executable(video2x ${VIDEO2X_SOURCES}) set_target_properties(video2x PROPERTIES OUTPUT_NAME video2x) @@ -395,8 +438,8 @@ install(TARGETS libvideo2x # Install model files install(DIRECTORY ${CMAKE_SOURCE_DIR}/models DESTINATION ${INSTALL_MODEL_DESTINATION}) -# Install the executable if BUILD_VIDEO2X_CLI is enabled -if(BUILD_VIDEO2X_CLI) +# Install the executable if VIDEO2X_BUILD_CLI is enabled +if(VIDEO2X_BUILD_CLI) install(TARGETS video2x RUNTIME DESTINATION ${INSTALL_BIN_DESTINATION}) endif() diff --git a/Makefile b/Makefile index 84a22e2..8153e59 100644 --- a/Makefile +++ b/Makefile @@ -13,7 +13,8 @@ build: cmake -S . -B $(BINDIR) \ -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \ -DCMAKE_CXX_COMPILER=$(CXX) \ - -DCMAKE_BUILD_TYPE=Release + -DCMAKE_BUILD_TYPE=Release \ + -DVIDEO2X_ENABLE_NATIVE=ON cmake --build $(BINDIR) --config Release --parallel cp $(BINDIR)/compile_commands.json . @@ -23,9 +24,9 @@ static: -DCMAKE_CXX_COMPILER=$(CXX) \ -DCMAKE_BUILD_TYPE=Release \ -DBUILD_SHARED_LIBS=OFF \ - -DUSE_SYSTEM_NCNN=OFF \ - -DUSE_SYSTEM_SPDLOG=OFF \ - -DUSE_SYSTEM_BOOST=OFF + -DVIDEO2X_USE_EXTERNAL_NCNN=OFF \ + -DVIDEO2X_USE_EXTERNAL_SPDLOG=OFF \ + -DVIDEO2X_USE_EXTERNAL_BOOST=OFF cmake --build $(BINDIR) --config Release --parallel cp $(BINDIR)/compile_commands.json . @@ -39,18 +40,18 @@ debug: windows: cmake -S . -B $(BINDIR) \ - -DUSE_SYSTEM_NCNN=OFF \ - -DUSE_SYSTEM_SPDLOG=OFF \ - -DUSE_SYSTEM_BOOST=OFF \ + -DVIDEO2X_USE_EXTERNAL_NCNN=OFF \ + -DVIDEO2X_USE_EXTERNAL_SPDLOG=OFF \ + -DVIDEO2X_USE_EXTERNAL_BOOST=OFF \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_INSTALL_PREFIX=build/libvideo2x-shared cmake --build $(BINDIR) --config Release --parallel --target install windows-debug: cmake -S . -B $(BINDIR) \ - -DUSE_SYSTEM_NCNN=OFF \ - -DUSE_SYSTEM_SPDLOG=OFF \ - -DUSE_SYSTEM_BOOST=OFF \ + -DVIDEO2X_USE_EXTERNAL_NCNN=OFF \ + -DVIDEO2X_USE_EXTERNAL_SPDLOG=OFF \ + -DVIDEO2X_USE_EXTERNAL_BOOST=OFF \ -DCMAKE_BUILD_TYPE=Debug \ -DCMAKE_INSTALL_PREFIX=build/libvideo2x-shared cmake --build $(BINDIR) --config Debug --parallel --target install @@ -70,7 +71,7 @@ debian: libomp-dev \ libspdlog-dev \ libboost-program-options-dev - cmake -B /tmp/build -S . -DUSE_SYSTEM_NCNN=OFF -DCMAKE_CXX_COMPILER=$(CXX) \ + cmake -B /tmp/build -S . -DVIDEO2X_USE_EXTERNAL_NCNN=OFF -DCMAKE_CXX_COMPILER=$(CXX) \ -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/tmp/install \ -DINSTALL_BIN_DESTINATION=. -DINSTALL_INCLUDE_DESTINATION=include \ -DINSTALL_LIB_DESTINATION=. -DINSTALL_MODEL_DESTINATION=. @@ -90,7 +91,7 @@ ubuntu2404: glslang-tools \ libomp-dev \ libboost-program-options-dev - cmake -B build -S . -DUSE_SYSTEM_NCNN=OFF -DUSE_SYSTEM_SPDLOG=OFF -DSPDLOG_NO_EXCEPTIONS=ON \ + cmake -B build -S . -DVIDEO2X_USE_EXTERNAL_NCNN=OFF -DVIDEO2X_USE_EXTERNAL_SPDLOG=OFF \ -DCMAKE_CXX_COMPILER=g++ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_INSTALL_PREFIX=video2x-linux-ubuntu-amd64/usr cmake --build build --config Release --target install --parallel @@ -115,7 +116,7 @@ ubuntu2204: glslang-tools \ libomp-dev \ libboost-program-options-dev - cmake -B build -S . -DUSE_SYSTEM_NCNN=OFF -DUSE_SYSTEM_SPDLOG=OFF -DSPDLOG_NO_EXCEPTIONS=ON \ + cmake -B build -S . -DVIDEO2X_USE_EXTERNAL_NCNN=OFF -DVIDEO2X_USE_EXTERNAL_SPDLOG=OFF \ -DCMAKE_CXX_COMPILER=g++ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_INSTALL_PREFIX=video2x-linux-ubuntu-amd64/usr cmake --build build --config Release --target install --parallel diff --git a/packaging/arch/PKGBUILD b/packaging/arch/PKGBUILD index 7890388..80215b5 100644 --- a/packaging/arch/PKGBUILD +++ b/packaging/arch/PKGBUILD @@ -1,12 +1,12 @@ pkgname=video2x -pkgver=r874.66c623f +pkgver=r958.996b0bf pkgrel=1 -pkgdesc="A machine learning-based lossless video super resolution framework" +pkgdesc="A machine learning-based video super resolution and frame interpolation framework" arch=('x86_64') url="https://github.com/k4yt3x/video2x" license=('AGPL3') depends=('ffmpeg' 'ncnn' 'vulkan-driver' 'spdlog' 'boost-libs') -makedepends=('git' 'cmake' 'make' 'clang' 'pkgconf' 'vulkan-headers' 'openmp' 'boost') +makedepends=('git' 'cmake' 'clang' 'vulkan-headers' 'openmp' 'boost') pkgver() { printf "r%s.%s" "$(git rev-list --count HEAD)" "$(git rev-parse --short HEAD)" @@ -17,7 +17,8 @@ prepare() { } build() { - cmake -B build -S .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/usr + cmake -B build -S .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/usr \ + -DCMAKE_CXX_COMPILER=clang++ -DVIDEO2X_ENABLE_X86_64_V3=ON cmake --build build --config Release --parallel } diff --git a/src/avutils.cpp b/src/avutils.cpp index 7cc3183..1001f88 100644 --- a/src/avutils.cpp +++ b/src/avutils.cpp @@ -135,6 +135,7 @@ AVPixelFormat get_encoder_default_pix_fmt(const AVCodec *encoder, AVPixelFormat return best_pix_fmt; } +[[gnu::target_clones("default", "avx2", "avx512f")]] float get_frame_diff(AVFrame *frame1, AVFrame *frame2) { if (!frame1 || !frame2) { logger()->error("Invalid frame(s) provided for comparison"); diff --git a/src/conversions.cpp b/src/conversions.cpp index 7e74119..d3d2885 100644 --- a/src/conversions.cpp +++ b/src/conversions.cpp @@ -11,6 +11,7 @@ namespace video2x { namespace conversions { // Convert AVFrame format +[[gnu::target_clones("default", "avx2", "avx512f")]] AVFrame *convert_avframe_pix_fmt(AVFrame *src_frame, AVPixelFormat pix_fmt) { AVFrame *dst_frame = av_frame_alloc(); if (dst_frame == nullptr) { @@ -67,6 +68,7 @@ AVFrame *convert_avframe_pix_fmt(AVFrame *src_frame, AVPixelFormat pix_fmt) { } // Convert AVFrame to ncnn::Mat by copying the data +[[gnu::target_clones("default", "avx2", "avx512f")]] ncnn::Mat avframe_to_ncnn_mat(AVFrame *frame) { AVFrame *converted_frame = nullptr; @@ -106,6 +108,7 @@ ncnn::Mat avframe_to_ncnn_mat(AVFrame *frame) { } // Convert ncnn::Mat to AVFrame with a specified pixel format (this part is unchanged) +[[gnu::target_clones("default", "avx2", "avx512f")]] AVFrame *ncnn_mat_to_avframe(const ncnn::Mat &mat, AVPixelFormat pix_fmt) { int ret; diff --git a/src/decoder.cpp b/src/decoder.cpp index c38ba19..add04fa 100644 --- a/src/decoder.cpp +++ b/src/decoder.cpp @@ -22,7 +22,7 @@ Decoder::~Decoder() { } } -AVPixelFormat Decoder::get_hw_format(AVCodecContext *_, const AVPixelFormat *pix_fmts) { +AVPixelFormat Decoder::get_hw_format(AVCodecContext *, const AVPixelFormat *pix_fmts) { for (const AVPixelFormat *p = pix_fmts; *p != AV_PIX_FMT_NONE; p++) { if (*p == hw_pix_fmt_) { return *p; diff --git a/src/encoder.cpp b/src/encoder.cpp index 7b732f3..efa940d 100644 --- a/src/encoder.cpp +++ b/src/encoder.cpp @@ -254,6 +254,7 @@ int Encoder::init( return 0; } +[[gnu::target_clones("default", "avx2", "avx512f")]] int Encoder::write_frame(AVFrame *frame, int64_t frame_idx) { AVFrame *converted_frame = nullptr; int ret; @@ -325,6 +326,7 @@ int Encoder::write_frame(AVFrame *frame, int64_t frame_idx) { return 0; } +[[gnu::target_clones("default", "avx2", "avx512f")]] int Encoder::flush() { int ret; AVPacket *enc_pkt = av_packet_alloc(); diff --git a/src/libvideo2x.cpp b/src/libvideo2x.cpp index 72a0a57..f86f2a4 100644 --- a/src/libvideo2x.cpp +++ b/src/libvideo2x.cpp @@ -28,6 +28,7 @@ VideoProcessor::VideoProcessor( hw_device_type_(hw_device_type), benchmark_(benchmark) {} +[[gnu::target_clones("default", "avx2", "avx512f")]] int VideoProcessor::process( const std::filesystem::path in_fname, const std::filesystem::path out_fname