yyqng

  博客园  :: 首页  :: 新随笔  :: 联系 :: 订阅 订阅  :: 管理

编译选项:

CXXFLAGS += -mavx2

#include <immintrin.h>
#include <cmath>

void reset_xy(int *x, int *y, int vector_size)
{
    for (int i = 0; i < vector_size; i++) {
        x[i] = 1;
        y[i] = 1;
    }
}

template <typename F>
double runtime_test(int repeat_times, const F& f)
{
    clock_t start = clock();
    int counter = repeat_times;
    while (counter--) {
        f();
    }
    clock_t end = clock();
    double time = (double)(end - start) / CLOCKS_PER_SEC;
    return time;
}

int avx_test()
{
    constexpr int repeat_times = 100;
    constexpr int vector_size = 1000 * 1000;
    int x[vector_size] = {0};
    int y[vector_size] = {0};
    reset_xy(x, y, vector_size);
    std::cout << "repeat_times = " << repeat_times << " vector_size = " << vector_size << std::endl;

    auto f_noavx = [&x, &y, vector_size]() {
        for(int i = 0; i < vector_size; i++) {
            x[i] += y[i];
        }
    };
    double noavx_time = runtime_test(repeat_times, f_noavx);
    std::cout << "x[0] = " << x[0] << " x[vector_size - 1] = " << x[vector_size - 1] << std::endl;

    auto f_avx = [&x, &y, vector_size]() {
        for(int i = 0; i < vector_size; i += 8) { // step 8 is 8 * 32 = 256 bit
            int *x0 = x + i;
            int *y0 = y + i;
            __m256i v1 = _mm256_loadu_si256((const __m256i*)x0);
            __m256i v2 = _mm256_loadu_si256((const __m256i*)y0);
            v1 = _mm256_add_epi32(v1, v2);
            _mm256_storeu_si256 ((__m256i*)x0, v1);
        }
    };
    reset_xy(x, y, vector_size);
    double avx_time = runtime_test(repeat_times, f_avx);
    std::cout << "x[0] = " << x[0] << " x[vector_size - 1] = " << x[vector_size - 1] << std::endl;
    std::cout << "noavx_time = " << noavx_time << " avx_time = " << avx_time << std::endl;
    std::cout << "noavx_time / avx_time = "<< noavx_time / avx_time << std::endl;
    return 0;
}

 

 

 
posted on 2022-10-29 21:16  zziii  阅读(7)  评论(0编辑  收藏  举报