perf: improve vectorization optimizations and add function multi-versioning (#1271)

Signed-off-by: k4yt3x <i@k4yt3x.com>
This commit is contained in:
K4YT3X 2024-12-22 05:53:09 -05:00 committed by GitHub
parent f68939c478
commit e1e8ed864d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 116 additions and 52 deletions

View File

@ -49,11 +49,12 @@ jobs:
- name: Build Video2X
run: |
mkdir -p /tmp/build /tmp/install
cmake -B /tmp/build -S . -DUSE_SYSTEM_NCNN=OFF -DUSE_SYSTEM_SPDLOG=OFF \
cmake -B /tmp/build -S . -DCMAKE_BUILD_TYPE=Debug \
-DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ \
-DCMAKE_BUILD_TYPE=Debug -DCMAKE_INSTALL_PREFIX=/tmp/install \
-DVIDEO2X_USE_EXTERNAL_NCNN=OFF -DVIDEO2X_USE_EXTERNAL_SPDLOG=OFF \
-DINSTALL_BIN_DESTINATION=. -DINSTALL_INCLUDE_DESTINATION=include \
-DINSTALL_LIB_DESTINATION=. -DINSTALL_MODEL_DESTINATION=.
-DINSTALL_LIB_DESTINATION=. -DINSTALL_MODEL_DESTINATION=. \
-DCMAKE_INSTALL_PREFIX=/tmp/install
cmake --build /tmp/build --config Debug --target install
- name: Upload artifacts
@ -97,7 +98,7 @@ jobs:
shell: pwsh
run: |
cmake -S . -B build `
-DUSE_SYSTEM_NCNN=OFF -DUSE_SYSTEM_SPDLOG=OFF -DUSE_SYSTEM_BOOST=OFF `
-DVIDEO2X_USE_EXTERNAL_NCNN=OFF -DVIDEO2X_USE_EXTERNAL_SPDLOG=OFF -DVIDEO2X_USE_EXTERNAL_BOOST=OFF `
-DCMAKE_BUILD_TYPE=Debug -DCMAKE_INSTALL_PREFIX=build/video2x_install
cmake --build build --config Debug --parallel --target install

View File

@ -53,9 +53,10 @@ jobs:
- name: Build Video2X
run: |
cmake -B build -S . -DUSE_SYSTEM_NCNN=OFF -DUSE_SYSTEM_SPDLOG=OFF -DSPDLOG_NO_EXCEPTIONS=ON \
-DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ \
-DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=build/video2x-linux-ubuntu-2404-amd64/usr
cmake -B build -S . -DCMAKE_BUILD_TYPE=Release \
-DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ \
-DVIDEO2X_USE_EXTERNAL_NCNN=OFF -DVIDEO2X_USE_EXTERNAL_SPDLOG=OFF \
-DCMAKE_INSTALL_PREFIX=build/video2x-linux-ubuntu-2404-amd64/usr
cmake --build build --config Release --target install --parallel
mkdir -p build/video2x-linux-ubuntu-2404-amd64/DEBIAN
cp packaging/debian/control build/video2x-linux-ubuntu-2404-amd64/DEBIAN/control
@ -107,7 +108,7 @@ jobs:
shell: pwsh
run: |
cmake -S . -B build `
-DUSE_SYSTEM_NCNN=OFF -DUSE_SYSTEM_SPDLOG=OFF -DUSE_SYSTEM_BOOST=OFF `
-DVIDEO2X_USE_EXTERNAL_NCNN=OFF -DVIDEO2X_USE_EXTERNAL_SPDLOG=OFF -DVIDEO2X_USE_EXTERNAL_BOOST=OFF `
-DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=build/video2x_install
cmake --build build --config Release --parallel --target install

View File

@ -5,6 +5,16 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [Unreleased]
### Added
- Multi-versioning to critical functions to enhance performance in generic architecture builds.
### Changed
- Improve the CMake optimization flags and option names.
## [6.3.1] - 2024-12-21
### Fixed

View File

@ -16,6 +16,20 @@ if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release)
endif()
# Build options
option(BUILD_SHARED_LIBS "Build libvideo2x as a shared library" ON)
option(VIDEO2X_BUILD_CLI "Build the video2x command line interface executable" ON)
option(VIDEO2X_ENABLE_NATIVE "Enable native optimizations (-march=native)" OFF)
option(VIDEO2X_ENABLE_X86_64_V4 "Enable x86-64-v4 optimizations (-march=x86-64-v4)" OFF)
option(VIDEO2X_ENABLE_AVX512F "Enable AVX-512 foundation optimizations (-march=avx512f)" OFF)
option(VIDEO2X_ENABLE_X86_64_V3 "Enable x86-64-v3 optimizations (-march=x86-64-v3)" OFF)
option(VIDEO2X_ENABLE_AVX2 "Enable AVX2 optimizations (-march=avx2)" OFF)
option(VIDEO2X_USE_EXTERNAL_NCNN "Use the system-provided ncnn library" ON)
option(VIDEO2X_USE_EXTERNAL_SPDLOG "Use the system-provided spdlog library" ON)
option(VIDEO2X_USE_EXTERNAL_BOOST "Use the system-provided Boost library" ON)
# Set global compile options for all targets
if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
add_compile_options(/W4 /permissive-)
@ -25,30 +39,50 @@ endif()
# Set the default optimization flags for Release builds
if(CMAKE_BUILD_TYPE STREQUAL "Release")
# Set the optimization flags for each compiler
if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
add_compile_options(/Ox /Ot /GL /DNDEBUG)
add_link_options(/LTCG /OPT:REF /OPT:ICF)
elseif(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
add_compile_options(-O3 -march=native -ffunction-sections -fdata-sections)
add_compile_options(-O3 -ffunction-sections -fdata-sections)
add_link_options(-Wl,-s -flto -Wl,--gc-sections)
endif()
endif()
# Build options
option(BUILD_SHARED_LIBS "Build libvideo2x as a shared library" ON)
option(BUILD_VIDEO2X_CLI "Build the video2x executable" ON)
option(USE_SYSTEM_NCNN "Use system ncnn library" ON)
option(USE_SYSTEM_SPDLOG "Use system spdlog library" ON)
option(USE_SYSTEM_BOOST "Use system Boost library" ON)
# Enable the requested architecture-specific optimizations
if(VIDEO2X_ENABLE_NATIVE)
if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
add_compile_options(/arch:NATIVE)
elseif(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
add_compile_options(-march=native)
endif()
elseif(VIDEO2X_ENABLE_X86_64_V4)
if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
add_compile_options(/arch:AVX2)
elseif(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
add_compile_options(-march=x86-64-v4)
endif()
elseif(VIDEO2X_ENABLE_AVX512F)
if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
add_compile_options(/arch:AVX512)
elseif(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
add_compile_options(-mavx512f)
endif()
elseif(VIDEO2X_ENABLE_X86_64_V3)
if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
add_compile_options(/arch:AVX2)
elseif(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
add_compile_options(-march=x86-64-v3)
endif()
elseif(VIDEO2X_ENABLE_AVX2)
if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
add_compile_options(/arch:AVX2)
elseif(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
add_compile_options(-mavx2)
endif()
endif()
# Generate the version header file
configure_file(
"${CMAKE_CURRENT_SOURCE_DIR}/include/libvideo2x/version.h.in"
"${CMAKE_CURRENT_BINARY_DIR}/libvideo2x/version.h"
@ONLY
)
# Find the required packages
# Define lists to store include directories and libraries
set(LIBVIDEO2X_INCLUDE_DIRS)
set(LIBVIDEO2X_LIBS)
set(VIDEO2X_INCLUDE_DIRS)
@ -109,7 +143,7 @@ else()
endif() # WIN32
# Find ncnn package
if(USE_SYSTEM_NCNN)
if(VIDEO2X_USE_EXTERNAL_NCNN)
find_package(ncnn REQUIRED)
else()
option(NCNN_INSTALL_SDK "" OFF)
@ -208,12 +242,14 @@ else()
endif()
# spdlog
if(USE_SYSTEM_SPDLOG)
if(VIDEO2X_USE_EXTERNAL_SPDLOG)
find_package(spdlog REQUIRED)
list(APPEND LIBVIDEO2X_INCLUDE_DIRS ${spdlog_INCLUDE_DIRS})
list(APPEND VIDEO2X_INCLUDE_DIRS ${spdlog_INCLUDE_DIRS})
set(SPDLOG_LIB spdlog::spdlog)
else()
# spdlog exceptions are incompatible with ncnn
option(SPDLOG_NO_EXCEPTIONS "" OFF)
add_subdirectory(third_party/spdlog)
set(SPDLOG_LIB spdlog::spdlog_header_only)
endif()
@ -221,13 +257,13 @@ list(APPEND LIBVIDEO2X_LIBS ${SPDLOG_LIB})
list(APPEND VIDEO2X_LIBS ${SPDLOG_LIB})
# Find dependencies required for the CLI
if(BUILD_VIDEO2X_CLI)
if(VIDEO2X_BUILD_CLI)
# Vulkan
find_package(Vulkan REQUIRED)
list(APPEND VIDEO2X_LIBS Vulkan::Vulkan)
# Boost
if(USE_SYSTEM_BOOST)
if(VIDEO2X_USE_EXTERNAL_BOOST)
find_package(Boost REQUIRED COMPONENTS program_options)
list(APPEND LIBVIDEO2X_INCLUDE_DIRS ${Boost_INCLUDE_DIRS})
else()
@ -255,7 +291,7 @@ ExternalProject_Add(
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
-DCMAKE_INSTALL_PREFIX=${CMAKE_BINARY_DIR}/realesrgan_install
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DUSE_SYSTEM_NCNN=${USE_SYSTEM_NCNN}
-DUSE_SYSTEM_NCNN=${VIDEO2X_USE_EXTERNAL_NCNN}
BUILD_ALWAYS ON
INSTALL_COMMAND ${CMAKE_COMMAND} --build . --target install --config ${CMAKE_BUILD_TYPE}
)
@ -268,7 +304,7 @@ ExternalProject_Add(
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
-DCMAKE_INSTALL_PREFIX=${CMAKE_BINARY_DIR}/realcugan_install
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DUSE_SYSTEM_NCNN=${USE_SYSTEM_NCNN}
-DUSE_SYSTEM_NCNN=${VIDEO2X_USE_EXTERNAL_NCNN}
BUILD_ALWAYS ON
INSTALL_COMMAND ${CMAKE_COMMAND} --build . --target install --config ${CMAKE_BUILD_TYPE}
)
@ -281,7 +317,7 @@ ExternalProject_Add(
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
-DCMAKE_INSTALL_PREFIX=${CMAKE_BINARY_DIR}/rife_install
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DUSE_SYSTEM_NCNN=${USE_SYSTEM_NCNN}
-DUSE_SYSTEM_NCNN=${VIDEO2X_USE_EXTERNAL_NCNN}
BUILD_ALWAYS ON
INSTALL_COMMAND ${CMAKE_COMMAND} --build . --target install --config ${CMAKE_BUILD_TYPE}
)
@ -305,6 +341,13 @@ endif()
# Ensure that the shared library is built after the external projects
add_dependencies(libvideo2x realesrgan realcugan rife)
# Generate the version header file
configure_file(
"${CMAKE_CURRENT_SOURCE_DIR}/include/libvideo2x/version.h.in"
"${CMAKE_CURRENT_BINARY_DIR}/libvideo2x/version.h"
@ONLY
)
# Include directories for the shared library
target_include_directories(libvideo2x PRIVATE
${LIBVIDEO2X_INCLUDE_DIRS}
@ -335,7 +378,7 @@ list(APPEND LIBVIDEO2X_LIBS ${REALESRGAN_LIB} ${REALCUGAN_LIB} ${RIFE_LIB})
target_link_libraries(libvideo2x PRIVATE ${LIBVIDEO2X_LIBS})
if(NOT WIN32)
if(USE_SYSTEM_NCNN)
if(VIDEO2X_USE_EXTERNAL_NCNN)
target_link_libraries(libvideo2x PUBLIC ncnn)
else()
target_link_libraries(libvideo2x PRIVATE ncnn)
@ -343,7 +386,7 @@ if(NOT WIN32)
endif()
# Create the executable 'video2x'
if(BUILD_VIDEO2X_CLI)
if(VIDEO2X_BUILD_CLI)
file(GLOB VIDEO2X_SOURCES tools/video2x/src/*.cpp)
add_executable(video2x ${VIDEO2X_SOURCES})
set_target_properties(video2x PROPERTIES OUTPUT_NAME video2x)
@ -395,8 +438,8 @@ install(TARGETS libvideo2x
# Install model files
install(DIRECTORY ${CMAKE_SOURCE_DIR}/models DESTINATION ${INSTALL_MODEL_DESTINATION})
# Install the executable if BUILD_VIDEO2X_CLI is enabled
if(BUILD_VIDEO2X_CLI)
# Install the executable if VIDEO2X_BUILD_CLI is enabled
if(VIDEO2X_BUILD_CLI)
install(TARGETS video2x RUNTIME DESTINATION ${INSTALL_BIN_DESTINATION})
endif()

View File

@ -13,7 +13,8 @@ build:
cmake -S . -B $(BINDIR) \
-DCMAKE_EXPORT_COMPILE_COMMANDS=ON \
-DCMAKE_CXX_COMPILER=$(CXX) \
-DCMAKE_BUILD_TYPE=Release
-DCMAKE_BUILD_TYPE=Release \
-DVIDEO2X_ENABLE_NATIVE=ON
cmake --build $(BINDIR) --config Release --parallel
cp $(BINDIR)/compile_commands.json .
@ -23,9 +24,9 @@ static:
-DCMAKE_CXX_COMPILER=$(CXX) \
-DCMAKE_BUILD_TYPE=Release \
-DBUILD_SHARED_LIBS=OFF \
-DUSE_SYSTEM_NCNN=OFF \
-DUSE_SYSTEM_SPDLOG=OFF \
-DUSE_SYSTEM_BOOST=OFF
-DVIDEO2X_USE_EXTERNAL_NCNN=OFF \
-DVIDEO2X_USE_EXTERNAL_SPDLOG=OFF \
-DVIDEO2X_USE_EXTERNAL_BOOST=OFF
cmake --build $(BINDIR) --config Release --parallel
cp $(BINDIR)/compile_commands.json .
@ -39,18 +40,18 @@ debug:
windows:
cmake -S . -B $(BINDIR) \
-DUSE_SYSTEM_NCNN=OFF \
-DUSE_SYSTEM_SPDLOG=OFF \
-DUSE_SYSTEM_BOOST=OFF \
-DVIDEO2X_USE_EXTERNAL_NCNN=OFF \
-DVIDEO2X_USE_EXTERNAL_SPDLOG=OFF \
-DVIDEO2X_USE_EXTERNAL_BOOST=OFF \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_INSTALL_PREFIX=build/libvideo2x-shared
cmake --build $(BINDIR) --config Release --parallel --target install
windows-debug:
cmake -S . -B $(BINDIR) \
-DUSE_SYSTEM_NCNN=OFF \
-DUSE_SYSTEM_SPDLOG=OFF \
-DUSE_SYSTEM_BOOST=OFF \
-DVIDEO2X_USE_EXTERNAL_NCNN=OFF \
-DVIDEO2X_USE_EXTERNAL_SPDLOG=OFF \
-DVIDEO2X_USE_EXTERNAL_BOOST=OFF \
-DCMAKE_BUILD_TYPE=Debug \
-DCMAKE_INSTALL_PREFIX=build/libvideo2x-shared
cmake --build $(BINDIR) --config Debug --parallel --target install
@ -70,7 +71,7 @@ debian:
libomp-dev \
libspdlog-dev \
libboost-program-options-dev
cmake -B /tmp/build -S . -DUSE_SYSTEM_NCNN=OFF -DCMAKE_CXX_COMPILER=$(CXX) \
cmake -B /tmp/build -S . -DVIDEO2X_USE_EXTERNAL_NCNN=OFF -DCMAKE_CXX_COMPILER=$(CXX) \
-DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/tmp/install \
-DINSTALL_BIN_DESTINATION=. -DINSTALL_INCLUDE_DESTINATION=include \
-DINSTALL_LIB_DESTINATION=. -DINSTALL_MODEL_DESTINATION=.
@ -90,7 +91,7 @@ ubuntu2404:
glslang-tools \
libomp-dev \
libboost-program-options-dev
cmake -B build -S . -DUSE_SYSTEM_NCNN=OFF -DUSE_SYSTEM_SPDLOG=OFF -DSPDLOG_NO_EXCEPTIONS=ON \
cmake -B build -S . -DVIDEO2X_USE_EXTERNAL_NCNN=OFF -DVIDEO2X_USE_EXTERNAL_SPDLOG=OFF \
-DCMAKE_CXX_COMPILER=g++ -DCMAKE_BUILD_TYPE=Release \
-DCMAKE_INSTALL_PREFIX=video2x-linux-ubuntu-amd64/usr
cmake --build build --config Release --target install --parallel
@ -115,7 +116,7 @@ ubuntu2204:
glslang-tools \
libomp-dev \
libboost-program-options-dev
cmake -B build -S . -DUSE_SYSTEM_NCNN=OFF -DUSE_SYSTEM_SPDLOG=OFF -DSPDLOG_NO_EXCEPTIONS=ON \
cmake -B build -S . -DVIDEO2X_USE_EXTERNAL_NCNN=OFF -DVIDEO2X_USE_EXTERNAL_SPDLOG=OFF \
-DCMAKE_CXX_COMPILER=g++ -DCMAKE_BUILD_TYPE=Release \
-DCMAKE_INSTALL_PREFIX=video2x-linux-ubuntu-amd64/usr
cmake --build build --config Release --target install --parallel

View File

@ -1,12 +1,12 @@
pkgname=video2x
pkgver=r874.66c623f
pkgver=r958.996b0bf
pkgrel=1
pkgdesc="A machine learning-based lossless video super resolution framework"
pkgdesc="A machine learning-based video super resolution and frame interpolation framework"
arch=('x86_64')
url="https://github.com/k4yt3x/video2x"
license=('AGPL3')
depends=('ffmpeg' 'ncnn' 'vulkan-driver' 'spdlog' 'boost-libs')
makedepends=('git' 'cmake' 'make' 'clang' 'pkgconf' 'vulkan-headers' 'openmp' 'boost')
makedepends=('git' 'cmake' 'clang' 'vulkan-headers' 'openmp' 'boost')
pkgver() {
printf "r%s.%s" "$(git rev-list --count HEAD)" "$(git rev-parse --short HEAD)"
@ -17,7 +17,8 @@ prepare() {
}
build() {
cmake -B build -S .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/usr
cmake -B build -S .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/usr \
-DCMAKE_CXX_COMPILER=clang++ -DVIDEO2X_ENABLE_X86_64_V3=ON
cmake --build build --config Release --parallel
}

View File

@ -135,6 +135,7 @@ AVPixelFormat get_encoder_default_pix_fmt(const AVCodec *encoder, AVPixelFormat
return best_pix_fmt;
}
[[gnu::target_clones("default", "avx2", "avx512f")]]
float get_frame_diff(AVFrame *frame1, AVFrame *frame2) {
if (!frame1 || !frame2) {
logger()->error("Invalid frame(s) provided for comparison");

View File

@ -11,6 +11,7 @@ namespace video2x {
namespace conversions {
// Convert AVFrame format
[[gnu::target_clones("default", "avx2", "avx512f")]]
AVFrame *convert_avframe_pix_fmt(AVFrame *src_frame, AVPixelFormat pix_fmt) {
AVFrame *dst_frame = av_frame_alloc();
if (dst_frame == nullptr) {
@ -67,6 +68,7 @@ AVFrame *convert_avframe_pix_fmt(AVFrame *src_frame, AVPixelFormat pix_fmt) {
}
// Convert AVFrame to ncnn::Mat by copying the data
[[gnu::target_clones("default", "avx2", "avx512f")]]
ncnn::Mat avframe_to_ncnn_mat(AVFrame *frame) {
AVFrame *converted_frame = nullptr;
@ -106,6 +108,7 @@ ncnn::Mat avframe_to_ncnn_mat(AVFrame *frame) {
}
// Convert ncnn::Mat to AVFrame with a specified pixel format (this part is unchanged)
[[gnu::target_clones("default", "avx2", "avx512f")]]
AVFrame *ncnn_mat_to_avframe(const ncnn::Mat &mat, AVPixelFormat pix_fmt) {
int ret;

View File

@ -22,7 +22,7 @@ Decoder::~Decoder() {
}
}
AVPixelFormat Decoder::get_hw_format(AVCodecContext *_, const AVPixelFormat *pix_fmts) {
AVPixelFormat Decoder::get_hw_format(AVCodecContext *, const AVPixelFormat *pix_fmts) {
for (const AVPixelFormat *p = pix_fmts; *p != AV_PIX_FMT_NONE; p++) {
if (*p == hw_pix_fmt_) {
return *p;

View File

@ -254,6 +254,7 @@ int Encoder::init(
return 0;
}
[[gnu::target_clones("default", "avx2", "avx512f")]]
int Encoder::write_frame(AVFrame *frame, int64_t frame_idx) {
AVFrame *converted_frame = nullptr;
int ret;
@ -325,6 +326,7 @@ int Encoder::write_frame(AVFrame *frame, int64_t frame_idx) {
return 0;
}
[[gnu::target_clones("default", "avx2", "avx512f")]]
int Encoder::flush() {
int ret;
AVPacket *enc_pkt = av_packet_alloc();

View File

@ -28,6 +28,7 @@ VideoProcessor::VideoProcessor(
hw_device_type_(hw_device_type),
benchmark_(benchmark) {}
[[gnu::target_clones("default", "avx2", "avx512f")]]
int VideoProcessor::process(
const std::filesystem::path in_fname,
const std::filesystem::path out_fname