struct Vector4
{
float x, y, z, w;
};
struct Matrix
{
float _M[4][4];
};
void SSE_VectorMultiplyMatrix(const Vector4& v,const Matrix& m1,Vector4& ret)
{
Vector4 va,vb,vc,vd;
Vector4 *pva,*pvb,*pvc,*pvd;
const Vector4 *pv;
//取出矩阵每一列
va.x = m1._M[0][0];
va.y = m1._M[1][0];
va.z = m1._M[2][0];
va.w = m1._M[3][0];
vb.x = m1._M[0][1];
vb.y = m1._M[1][1];
vb.z = m1._M[2][1];
vb.w = m1._M[3][1];
vc.x = m1._M[0][2];
vc.y = m1._M[1][2];
vc.z = m1._M[2][2];
vc.w = m1._M[3][2];
vd.x = m1._M[0][3];
vd.y = m1._M[1][3];
vd.z = m1._M[2][3];
vd.w = m1._M[3][3];
pva = &va;
pvb = &vb;
pvc = &vc;
pvd = &vd;
pv = &v;
__asm
{
//矩阵四列放入mmx0-mmx3
MOV EAX, pva // Load pointer into CPU reg
MOVUPS XMM0, [EAX]
MOV EAX, pvb // Load pointer into CPU reg
MOVUPS XMM1, [EAX]
MOV EAX, pvc // Load pointer into CPU reg
MOVUPS XMM2, [EAX]
MOV EAX, pvd // Load pointer into CPU reg
MOVUPS XMM3, [EAX]
//向量放入 mmx4
MOV EAX, pv
MOVUPS XMM4, [EAX]
//向量点乘矩阵每列
MULPS XMM0,XMM4
MULPS XMM1,XMM4
MULPS XMM2,XMM4
MULPS XMM3,XMM4
//输出四个分量
MOVUPS [va], XMM0
MOVUPS [vb], XMM1
MOVUPS [vc], XMM2
MOVUPS [vd], XMM3
}
//四个分量求和得变换后向量
ret.x = va.w + va.x + va.y + va.z;
ret.y = vb.w + vb.x + vb.y + vb.z;
ret.z = vc.w + vc.x + vc.y + vc.z;
ret.w = vd.w + vd.x + vd.y + vd.z;
}
![]()