多媒体指令(AVX加速数组求和)

#include <stdio.h>  
#include <intrin.h>  
#include <iostream>
#include <ctime>

using namespace std;
void test1(double *a, double *b, double *re)
{
    size_t t = clock();
    for (int k = 0; k < 4; k++)
    {
        for (int i = 0; i < 100000000; i++)
        {
            re[i] = a[i] + b[i];
        }
    }
    size_t en = clock();
    cout << en - t << endl;
}

void test2(double *a, double *b, double *re)
{
    size_t t = clock();
    __m256d m1, m2;
    for (int k = 0; k < 4; k++)
    {
        for (int i = 0; i < 100000000; i += 4)
        {
            m1 = _mm256_set_pd(a[i], a[i + 1], a[i + 2], a[i + 3]);
            m2 = _mm256_set_pd(b[i], b[i + 1], b[i + 2], b[i + 3]);

            __m256d l1 = _mm256_add_pd(m1, m2);
            
            re[i + 3] = l1.m256d_f64[0];
            re[i + 2] = l1.m256d_f64[1];
            re[i + 1] = l1.m256d_f64[2];
            re[i]     = l1.m256d_f64[3];
        }
    }
    size_t en = clock();
    cout << en - t << endl;
}

int main(int argc, char* argv[])
{
    double *a = new double[100000000];
    double *b = new double[100000000];
    double *re = new double[100000000];
    for (int i = 0; i < 100000000; i++)
    {
        a[i] = i;
        b[i] = i;
    }
    test1(a, b, re);
    test2(a, b, re);
    delete[] a;
    delete[] b;
    delete[] re;
    system("pause");
    return 0;
}

大概能快个100毫秒左右。

posted @ 2018-05-17 11:48  Dsp Tian  阅读(1628)  评论(0编辑  收藏  举报