+ -
当前位置:首页 → 问答吧 → g++ 编译sse出错

g++ 编译sse出错

时间:2010-06-27

来源:互联网

代码如下:
#include <iostream>
#include <math.h>
#include <stdio.h>
#include <xmmintrin.h>

using namespace std;

typedef union{
__m128 m;
float f[4];
} sseTemp;

float dotWithSSE(float *a, float *b, const int len){
int step = len/4;
__m128* one = (__m128*)a;
__m128* two = (__m128*)b;
if(NULL == one || NULL == two){
printf("%d\n", __LINE__);
exit(1);
}

__m128 result = _mm_setzero_ps();
__m128 temp = _mm_set_ps(0.1f, 0.1f, 0.1f, 0.1f);

for(int i = 0; i < step -1; i++){
temp = _mm_add_ps(*one, *two);
temp = _mm_mul_ps(temp, temp);
result = _mm_add_ps(result, temp);
}

sseTemp s;
s.m = result;

float r = 0.0f;

r += s.f[0] + s.f[1] + s.f[2] + s.f[3];

if(0 != (len & 3)){
for(int i = 4*step; i < len; i++){
r += (a[i] - b[i])*(a[i] - b[i]);
}
}

return r;
}

float dotWithoutSSE(float* __restrict__ a, float* __restrict__ b, const int len){
float result = 0.0f, temp;

for(int i = 0; i < len; i++){
temp = (a[i] - b[i]);
result += temp*temp;
}

return result;
}

int runTest(const int len){
float *a, *b;
a = (float*) malloc(sizeof(float)*len);
b = (float*) malloc(sizeof(float)*len);
if(NULL == a || NULL == b){
printf("malloc fail\n");
exit(1);
}

for(int i = 0; i < len; i++){
a[i] = 1.0f*rand()/RAND_MAX;
b[i] = 1.0f*rand()/RAND_MAX;
}

clock_t start = clock();
float sse = dotWithSSE(a, b, len);
clock_t end = clock();
clock_t timeSSE = end - start;

start = clock();
float withoutSSE = dotWithoutSSE(a, b, len);
end = clock();
clock_t timeWithoutSSE = end - start;

if(fabs(sse - withoutSSE) < 0.00001){
printf("test pass\n");
}
printf("withSSE = %f, wihtoutSSE = %f", sse, withoutSSE);

free(a);
free(b);
}

int main(){

runTest(1000);
return 0;
}

使用gcc测试时,代码 temp = _mm_add_ps(*one, *two);越界,这另我百思不得其解,先谢谢了!

作者: yyfn风辰   发布时间: 2010-06-27

好像是地址没对齐,SSE 要求 16-Byte 对齐的。

作者: 变异老鼠   发布时间: 2010-06-27

回复 变异老鼠
谢谢你!

那在gcc中怎么将malloc分配的空间对齐到16字节啊?我试过__attribute__(aligned(16))不行啊!

作者: yyfn风辰   发布时间: 2010-06-27