From 50ef2e3b28960163c71cf3988f100e06ca44952b Mon Sep 17 00:00:00 2001 From: Yury Gribov Date: Sun, 7 Apr 2024 14:41:37 +0300 Subject: [PATCH] Added test of vector arguments. --- .github/workflows/ci.yml | 16 ++++++++ arch/x86_64/table.S.tpl | 73 +++++++++++++++++++++++++++++++++- scripts/travis.sh | 4 ++ tests/stack-args/run.sh | 5 +-- tests/vector-args/interposed.c | 34 ++++++++++++++++ tests/vector-args/interposed.h | 33 +++++++++++++++ tests/vector-args/main.c | 36 +++++++++++++++++ tests/vector-args/run.sh | 38 ++++++++++++++++++ tests/vector-args/test.ref | 0 9 files changed, 233 insertions(+), 6 deletions(-) create mode 100644 tests/vector-args/interposed.c create mode 100644 tests/vector-args/interposed.h create mode 100644 tests/vector-args/main.c create mode 100755 tests/vector-args/run.sh create mode 100644 tests/vector-args/test.ref diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index cd40bdd..a3c21e1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -87,6 +87,22 @@ jobs: fi done codecov --required + avx: + runs-on: ubuntu-latest + env: + CFLAGS: -mavx + steps: + - uses: actions/checkout@v3 + - name: Run tests + run: scripts/travis.sh +# avx2: +# runs-on: ubuntu-latest +# env: +# CFLAGS: -mavx2 -mavx512f +# steps: +# - uses: actions/checkout@v3 +# - name: Run tests +# run: scripts/travis.sh x86: runs-on: ubuntu-latest env: diff --git a/arch/x86_64/table.S.tpl b/arch/x86_64/table.S.tpl index 1e67964..dd24f69 100644 --- a/arch/x86_64/table.S.tpl +++ b/arch/x86_64/table.S.tpl @@ -34,9 +34,20 @@ _${lib_suffix}_save_regs_and_resolve: #define DEC_STACK(d) subq $$d, %rsp; .cfi_adjust_cfa_offset d #define INC_STACK(d) addq $$d, %rsp; .cfi_adjust_cfa_offset -d +#define PUSH_MMX_REG(reg) DEC_STACK(8); movq %reg, (%rsp); .cfi_rel_offset reg, 0 +#define POP_MMX_REG(reg) movq (%rsp), %reg; .cfi_restore reg; INC_STACK(8) + #define PUSH_XMM_REG(reg) DEC_STACK(16); movdqa %reg, (%rsp); .cfi_rel_offset reg, 0 #define POP_XMM_REG(reg) movdqa (%rsp), %reg; .cfi_restore reg; INC_STACK(16) +// TODO: cfi_offset/cfi_restore +#define PUSH_YMM_REG(reg) DEC_STACK(32); vmovdqu %reg, (%rsp) +#define POP_YMM_REG(reg) vmovdqu (%rsp), %reg; INC_STACK(32) + +// TODO: cfi_offset/cfi_restore +#define PUSH_ZMM_REG(reg) DEC_STACK(64); vmovdqu32 %reg, (%rsp) +#define POP_ZMM_REG(reg) vmovdqu32 (%rsp), %reg; INC_STACK(64) + // Slow path which calls dlsym, taken only on first call. // All registers are stored to handle arbitrary calling conventions // (except x87 FPU registers which do not have to be preserved). @@ -44,8 +55,6 @@ _${lib_suffix}_save_regs_and_resolve: .cfi_def_cfa_offset 8 // Return address - // FIXME: AVX (YMM, ZMM) registers are NOT saved to simplify code. - PUSH_REG(rdi) // 16 mov 0x10(%rsp), %rdi PUSH_REG(rax) @@ -62,6 +71,26 @@ _${lib_suffix}_save_regs_and_resolve: PUSH_REG(r13) // 16 PUSH_REG(r14) PUSH_REG(r15) // 16 + +#ifdef __AVX2__ + PUSH_ZMM_REG(zmm0) + PUSH_ZMM_REG(zmm1) + PUSH_ZMM_REG(zmm2) + PUSH_ZMM_REG(zmm3) + PUSH_ZMM_REG(zmm4) + PUSH_ZMM_REG(zmm5) + PUSH_ZMM_REG(zmm6) + PUSH_ZMM_REG(zmm7) +#elif defined __AVX__ + PUSH_YMM_REG(ymm0) + PUSH_YMM_REG(ymm1) + PUSH_YMM_REG(ymm2) + PUSH_YMM_REG(ymm3) + PUSH_YMM_REG(ymm4) + PUSH_YMM_REG(ymm5) + PUSH_YMM_REG(ymm6) + PUSH_YMM_REG(ymm7) +#elif defined __SSE__ PUSH_XMM_REG(xmm0) PUSH_XMM_REG(xmm1) PUSH_XMM_REG(xmm2) @@ -70,10 +99,39 @@ _${lib_suffix}_save_regs_and_resolve: PUSH_XMM_REG(xmm5) PUSH_XMM_REG(xmm6) PUSH_XMM_REG(xmm7) +#elif defined __MMX__ + PUSH_MMX_REG(mm0) + PUSH_MMX_REG(mm1) + PUSH_MMX_REG(mm2) + PUSH_MMX_REG(mm3) + PUSH_MMX_REG(mm4) + PUSH_MMX_REG(mm5) + PUSH_MMX_REG(mm6) + PUSH_MMX_REG(mm7) +#endif // Stack is just 8-byte aligned but callee will re-align to 16 call _${lib_suffix}_tramp_resolve +#ifdef __AVX2__ + POP_ZMM_REG(zmm7) + POP_ZMM_REG(zmm6) + POP_ZMM_REG(zmm5) + POP_ZMM_REG(zmm4) + POP_ZMM_REG(zmm3) + POP_ZMM_REG(zmm2) + POP_ZMM_REG(zmm1) + POP_ZMM_REG(zmm0) // 16 +#elif defined __AVX__ + POP_YMM_REG(ymm7) + POP_YMM_REG(ymm6) + POP_YMM_REG(ymm5) + POP_YMM_REG(ymm4) + POP_YMM_REG(ymm3) + POP_YMM_REG(ymm2) + POP_YMM_REG(ymm1) + POP_YMM_REG(ymm0) // 16 +#elif defined __SSE__ POP_XMM_REG(xmm7) POP_XMM_REG(xmm6) POP_XMM_REG(xmm5) @@ -82,6 +140,17 @@ _${lib_suffix}_save_regs_and_resolve: POP_XMM_REG(xmm2) POP_XMM_REG(xmm1) POP_XMM_REG(xmm0) // 16 +#elif defined __MMX__ + POP_MMX_REG(mm7) + POP_MMX_REG(mm6) + POP_MMX_REG(mm5) + POP_MMX_REG(mm4) + POP_MMX_REG(mm3) + POP_MMX_REG(mm2) + POP_MMX_REG(mm1) + POP_MMX_REG(mm0) // 16 +#endif + POP_REG(r15) POP_REG(r14) // 16 POP_REG(r13) diff --git a/scripts/travis.sh b/scripts/travis.sh index 5ed6a0a..b53f40d 100755 --- a/scripts/travis.sh +++ b/scripts/travis.sh @@ -41,3 +41,7 @@ if ! echo "$ARCH" | grep -q powerpc; then tests/many-functions/run.sh $ARCH fi tests/stack-args/run.sh $ARCH +if ! echo "$ARCH" | grep -q 'powerpc\|mips\|riscv'; then + # TODO: support vector types for remaining platforms + tests/vector-args/run.sh $ARCH +fi diff --git a/tests/stack-args/run.sh b/tests/stack-args/run.sh index 43800a4..27f65bd 100755 --- a/tests/stack-args/run.sh +++ b/tests/stack-args/run.sh @@ -7,12 +7,9 @@ # Use of this source code is governed by MIT license that can be # found in the LICENSE.txt file. -# This is a simple test for Implib.so functionality. +# This is a simple test that verifies that parameters are correctly passed on stack. # Run it like # ./run.sh ARCH -# where ARCH stands for any supported arch (arm, x86_64, etc., see `implib-gen -h' for full list). -# Note that you may need to install qemu-user for respective platform -# (i386 also needs gcc-multilib). set -eu diff --git a/tests/vector-args/interposed.c b/tests/vector-args/interposed.c new file mode 100644 index 0000000..b20b152 --- /dev/null +++ b/tests/vector-args/interposed.c @@ -0,0 +1,34 @@ +/* + * Copyright 2024 Yury Gribov + * + * The MIT License (MIT) + * + * Use of this source code is governed by MIT license that can be + * found in the LICENSE.txt file. + */ + +#include "interposed.h" + +__attribute__((visibility("default"))) +vector_type foo(vector_type x) { + return 3 *x; +} + +static +#ifdef __clang__ +# if __clang_major__ >= 14 +__attribute__((noipa)) +# else +__attribute__((noinline)) +# endif +#else +__attribute__((noinline,noclone)) +#endif +vector_type dummy(vector_type x0, vector_type x1, vector_type x2, vector_type x3, vector_type x4, vector_type x5, vector_type x6, vector_type x7) { + return x0 + x1 + x2 + x3 + x4 + x5 + x6 + x7; +} + +__attribute__((constructor)) void touch_vector_regs() { + vector_type zero = {0}; + dummy(zero, zero, zero, zero, zero, zero, zero, zero); +} diff --git a/tests/vector-args/interposed.h b/tests/vector-args/interposed.h new file mode 100644 index 0000000..2cdb374 --- /dev/null +++ b/tests/vector-args/interposed.h @@ -0,0 +1,33 @@ +/* + * Copyright 2024 Yury Gribov + * + * The MIT License (MIT) + * + * Use of this source code is governed by MIT license that can be + * found in the LICENSE.txt file. + */ + +#ifndef INTERPOSED_H +#define INTERPOSED_H + +// Determine number of 32-bit ints in native vector type +// for each supported platform +#if defined __AVX2__ /* ZMM regs */ +# define VECTOR_SIZE 16 +#elif defined __AVX__ /* YMM regs */ +# define VECTOR_SIZE 8 +#elif defined __SSE__ /* XMM regs */ \ + || defined __aarch64__ /* NEON regs */ +# define VECTOR_SIZE 4 +#elif defined __MMX__ /* MMX regs */ \ + || defined __arm__ /* NEON regs */ +# define VECTOR_SIZE 2 +#else +# error "Unknown platform" +#endif + +typedef int vector_type __attribute__((vector_size(sizeof(int) * VECTOR_SIZE))); + +extern vector_type foo(vector_type x); + +#endif diff --git a/tests/vector-args/main.c b/tests/vector-args/main.c new file mode 100644 index 0000000..6d1d482 --- /dev/null +++ b/tests/vector-args/main.c @@ -0,0 +1,36 @@ +/* + * Copyright 2024 Yury Gribov + * + * The MIT License (MIT) + * + * Use of this source code is governed by MIT license that can be + * found in the LICENSE.txt file. + */ + +#include + +#include "interposed.h" + +#if VECTOR_SIZE == 2 +# define VECTOR_INIT {1, 2} +#elif VECTOR_SIZE == 4 +# define VECTOR_INIT {1, 2, 3, 4} +#elif VECTOR_SIZE == 8 +# define VECTOR_INIT {1, 2, 3, 4, 5, 6, 7} +#elif VECTOR_SIZE == 16 +# define VECTOR_INIT {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15} +#else +# error "Unsupported vector size" +#endif + +int main() { + vector_type x = VECTOR_INIT, res = foo(x), ref = 3 * x; + int i; + for (i = 0; i < VECTOR_SIZE; ++i) { + if (res[i] != ref[i]) { + printf("NOT OK\n"); + return 1; + } + } + return 0; +} diff --git a/tests/vector-args/run.sh b/tests/vector-args/run.sh new file mode 100755 index 0000000..5c3bd8e --- /dev/null +++ b/tests/vector-args/run.sh @@ -0,0 +1,38 @@ +#!/bin/sh + +# Copyright 2024 Yury Gribov +# +# The MIT License (MIT) +# +# Use of this source code is governed by MIT license that can be +# found in the LICENSE.txt file. + +# This test verifies that Implib trampolines save/restore vector regs correctly. +# Run it like +# ./run.sh ARCH + +set -eu + +cd $(dirname $0) + +if test -n "${1:-}"; then + ARCH="$1" +fi + +. ../common.sh + +CFLAGS="-g -O2 $CFLAGS" + +# Build shlib to test against +$CC $CFLAGS -shared -fPIC interposed.c -o libinterposed.so + +# Prepare implib +${PYTHON:-} ../../implib-gen.py -q --target $TARGET libinterposed.so + +# Build app +$CC $CFLAGS main.c libinterposed.so.tramp.S libinterposed.so.init.c $LIBS + +LD_LIBRARY_PATH=.:${LD_LIBRARY_PATH:-} $INTERP ./a.out > a.out.log +diff test.ref a.out.log + +echo SUCCESS diff --git a/tests/vector-args/test.ref b/tests/vector-args/test.ref new file mode 100644 index 0000000..e69de29