RISC-V RVV第13讲之RVV 浮点算术指令
RISC-V RVV第13 讲之RVV浮点算术指令
目录
这一章讲述RVV 向量浮点算术命令。
1 向量浮点异常标志
参与向量浮点运算的活跃元素如果产生异常,将会在fflag寄存器中设置相关标志位
注意:向量浮点运算会共用标量浮点运算的一些浮点状态寄存器。如浮点异常标志寄存器fflags, 浮点舍入模式寄存器frm 等
2 单宽度向量浮点加减法指令
提供向量浮点加减法指令。
# Floating-point add
vfadd.vv vd, vs2, vs1, vm # Vector-vector
vfadd.vf vd, vs2, rs1, vm # vector-scalar
# Floating-point subtract
vfsub.vv vd, vs2, vs1, vm # Vector-vector
vfsub.vf vd, vs2, rs1, vm # Vector-scalar vd[i] = vs2[i] - f[rs1]
vfrsub.vf vd, vs2, rs1, vm # Scalar-vector vd[i] = f[rs1] - vs2[i]
示例如下:
// 浮点加法
typedef float float32_t;
#define DATALEN 16
int main(void)
{
const float32_t vec1[DATALEN] = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0 };
const float32_t vec2[DATALEN] = { 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 };
float32_t res[DATALEN] = {0.0};
const float32_t *pSrcA = vec1;
const float32_t *pSrcB = vec2;
float32_t *pDes = res;
size_t avl = DATALEN;
size_t vl;
vfloat32m4_t op1, op2, rd;
for (; (vl = __riscv_vsetvl_e32m4(avl)) > 0; avl -= vl) {
// load数据
op1 = __riscv_vle32_v_f32m4(pSrcA, vl);
op2 = __riscv_vle32_v_f32m4(pSrcB, vl);
pSrcA += vl;
pSrcB += vl;
// 向量浮点加法
rd = __riscv_vfadd_vv_f32m4(op1, op2, vl);
// store数据
__riscv_vse32_v_f32m4 (pDes, rd, vl);
pDes += vl;
}
// 数据打印
for (int i = 0; i < DATALEN; i++) {
printf("%f, ", res[i]);
}
printf("\r\n");
return 0;
}
打印结果为:
res[16] = {2.000000, 3.000000, 4.000000, 5.000000, 6.000000, 7.000000, 8.000000, 9.000000, 10.000000, 11.000000, 12.000000, 13.000000, 14.000000, 15.000000, 16.000000, 17.000000,}
浮点数反减的示例如下:
// 浮点数反减
typedef float float32_t;
#define DATALEN 16
int main(void)
{
const float32_t vec1[DATALEN] = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0 };
float32_t res[DATALEN] = {0.0};
const float32_t *pSrcA = vec1;
float32_t *pDes = res;
size_t avl = DATALEN;
size_t vl;
vfloat32m4_t op1, rd;
float32_t op2 = 0.0; // 被减数
for (; (vl = __riscv_vsetvl_e32m4(avl)) > 0; avl -= vl) {
// load数据
op1 = __riscv_vle32_v_f32m4(pSrcA, vl);
pSrcA += vl;
// 向量减法
rd = __riscv_vfrsub_vf_f32m4(op1, op2, vl);
// store数据
__riscv_vse32_v_f32m4 (pDes, rd, vl);
pDes += vl;
}
// 数据打印
for (int i = 0; i < DATALEN; i++) {
printf("%f, ", res[i]);
}
printf("\r\n");
return 0;
}
打印日志如下:
res[16] = {-1.000000, -2.000000, -3.000000, -4.000000, -5.000000, -6.000000, -7.000000, -8.000000, -9.000000, -10.000000, -11.000000, -12.000000, -13.000000, -14.000000, -15.000000, -16.000000}
3 扩宽向量浮点加减指令
# Widening FP add/subtract, 2*SEW = SEW +/- SEW
vfwadd.vv vd, vs2, vs1, vm # vector-vector
vfwadd.vf vd, vs2, rs1, vm # vector-scalar
vfwsub.vv vd, vs2, vs1, vm # vector-vector
vfwsub.vf vd, vs2, rs1, vm # vector-scalar
# Widening FP add/subtract, 2*SEW = 2*SEW +/- SEW
vfwadd.wv vd, vs2, vs1, vm # vector-vector
vfwadd.wf vd, vs2, rs1, vm # vector-scalar
vfwsub.wv vd, vs2, vs1, vm # vector-vector
vfwsub.wf vd, vs2, rs1, vm # vector-scalar
示例如下:
typedef float float32_t;
typedef _Float16 float16_t;
#define DATALEN 16
int main(void)
{
float16_t vec1[DATALEN];
float16_t vec2[DATALEN];
for (int i = 0; i < DATALEN; i++) {
vec1[i] = 32767.0f;
vec2[i] = 100.0f;
}
float32_t res[DATALEN] = {0};
const float16_t *pSrcA = vec1;
const float16_t *pSrcB = vec2;
float32_t *pDes = res;
size_t avl = DATALEN;
size_t vl;
vfloat16m2_t op1, op2;
vfloat32m4_t rd;
for (; (vl = __riscv_vsetvl_e16m2(avl)) > 0; avl -= vl) {
// load数据
op1 = __riscv_vle16_v_f16m2(pSrcA, vl);
op2 = __riscv_vle16_v_f16m2(pSrcB, vl);
pSrcA += vl;
pSrcB += vl;
// 向量扩宽加法
rd = __riscv_vfwadd_vv_f32m4(op1, op2, vl);
// store数据
__riscv_vse32_v_f32m4 (pDes, rd, vl);
pDes += vl;
}
// 数据打印
printf("vfwadd:\r\n");
for (int i = 0; i < DATALEN; i++) {
printf("%f, ", res[i]);
}
printf("\r\n");
return 0;
}
打印结果为:
res[16] = {32868.000000, 32868.000000, 32868.000000, 32868.000000, 32868.000000, 32868.000000, 32868.000000, 32868.000000, 32868.000000, 32868.000000, 32868.000000, 32868.000000, 32868.000000, 32868.000000, 32868.000000, 32868.000000}
4 单宽度浮点乘除指令
# Floating-point multiply
vfmul.vv vd, vs2, vs1, vm # Vector-vector
vfmul.vf vd, vs2, rs1, vm # vector-scalar
# Floating-point divide
vfdiv.vv vd, vs2, vs1, vm # Vector-vector
vfdiv.vf vd, vs2, rs1, vm # vector-scalar
# Reverse floating-point divide vector = scalar / vector
vfrdiv.vf vd, vs2, rs1, vm # scalar-vector, vd[i] = f[rs1]/vs2[i]
浮点数乘法示例如下:
// 浮点乘法
typedef float float32_t;
#define DATALEN 16
int main(void)
{
const float32_t vec1[DATALEN] = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0 };
const float32_t vec2[DATALEN] = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0 };
float32_t res[DATALEN] = {0.0};
const float32_t *pSrcA = vec1;
const float32_t *pSrcB = vec2;
float32_t *pDes = res;
size_t avl = DATALEN;
size_t vl;
vfloat32m4_t op1, op2, rd;
for (; (vl = __riscv_vsetvl_e32m4(avl)) > 0; avl -= vl) {
// load数据
op1 = __riscv_vle32_v_f32m4(pSrcA, vl);
op2 = __riscv_vle32_v_f32m4(pSrcB, vl);
pSrcA += vl;
pSrcB += vl;
// 向量乘法
rd = __riscv_vfmul_vv_f32m4(op1, op2, vl);
// store数据
__riscv_vse32_v_f32m4 (pDes, rd, vl);
pDes += vl;
}
// 数据打印
for (int i = 0; i < DATALEN; i++) {
printf("%f, ", res[i]);
}
printf("\r\n");
return 0;
}
打印结果为:
res[16] = {1.000000, 4.000000, 9.000000, 16.000000, 25.000000, 36.000000, 49.000000, 64.000000, 81.000000, 100.000000, 121.000000, 144.000000, 169.000000, 196.000000, 225.000000, 256.000000}
浮点数除法示例如下:
// 浮点数除法
typedef float float32_t;
#define DATALEN 16
int main(void)
{
const float32_t vec1[DATALEN] = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0 };
float32_t res[DATALEN] = {0};
const float32_t *pSrcA = vec1;
float32_t *pDes = res;
size_t avl = DATALEN;
size_t vl;
vfloat32m4_t op1, rd;
float32_t op2 = 4.0f;
for (; (vl = __riscv_vsetvl_e32m4(avl)) > 0; avl -= vl) {
// load数据
op1 = __riscv_vle32_v_f32m4(pSrcA, vl);
pSrcA += vl;
// 向量除法
rd = __riscv_vfdiv_vf_f32m4(op1, op2, vl);
// store数据
__riscv_vse32_v_f32m4 (pDes, rd, vl);
pDes += vl;
}
// 数据打印
for (int i = 0; i < DATALEN; i++) {
printf("%f, ", res[i]);
}
printf("\r\n");
return 0;
}
打印结果为:
res[16] = {0.250000, 0.500000, 0.750000, 1.000000, 1.250000, 1.500000, 1.750000, 2.000000, 2.250000, 2.500000, 2.750000, 3.000000, 3.250000, 3.500000, 3.750000, 4.000000}
5 扩展向量浮点乘法
# Widening floating-point multiply
vfwmul.vv vd, vs2, vs1, vm # vector-vector
vfwmul.vf vd, vs2, rs1, vm # vector-scalar
示例如下:
typedef float float32_t;
typedef _Float16 float16_t;
#define DATALEN 16
int main(void)
{
const float16_t vec1[DATALEN] = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0 };
const float16_t vec2[DATALEN] = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0 };
float32_t res[DATALEN] = {0.0};
const float16_t *pSrcA = vec1;
const float16_t *pSrcB = vec2;
float32_t *pDes = res;
size_t avl = DATALEN;
size_t vl;
vfloat16m2_t op1, op2;
vfloat32m4_t rd;
for (; (vl = __riscv_vsetvl_e16m2(avl)) > 0; avl -= vl) {
// load数据
op1 = __riscv_vle16_v_f16m2(pSrcA, vl);
op2 = __riscv_vle16_v_f16m2(pSrcB, vl);
pSrcA += vl;
pSrcB += vl;
// vfwmul
rd = __riscv_vfwmul_vv_f32m4(op1, op2, vl);
// store数据
__riscv_vse32_v_f32m4 (pDes, rd, vl);
pDes += vl;
}
// 数据打印
for (int i = 0; i < DATALEN; i++) {
printf("%f, ", res[i]);
}
printf("\r\n");
return 0;
}
打印结果为:
res[16] = {1.000000, 4.000000, 9.000000, 16.000000, 25.000000, 36.000000, 49.000000, 64.000000, 81.000000, 100.000000, 121.000000, 144.000000, 169.000000, 196.000000, 225.000000, 256.000000}
6 单宽度向量浮点乘加指令
# FP multiply-accumulate, overwrites addend
vfmacc.vv vd, vs1, vs2, vm # vd[i] = +(vs1[i] * vs2[i]) + vd[i]
vfmacc.vf vd, rs1, vs2, vm # vd[i] = +(f[rs1] * vs2[i]) + vd[i]
# FP negate-(multiply-accumulate), overwrites subtrahend
vfnmacc.vv vd, vs1, vs2, vm # vd[i] = -(vs1[i] * vs2[i]) - vd[i]
vfnmacc.vf vd, rs1, vs2, vm # vd[i] = -(f[rs1] * vs2[i]) - vd[i]
# FP multiply-subtract-accumulator, overwrites subtrahend
vfmsac.vv vd, vs1, vs2, vm # vd[i] = +(vs1[i] * vs2[i]) - vd[i]
vfmsac.vf vd, rs1, vs2, vm # vd[i] = +(f[rs1] * vs2[i]) - vd[i]
# FP negate-(multiply-subtract-accumulator), overwrites minuend
vfnmsac.vv vd, vs1, vs2, vm # vd[i] = -(vs1[i] * vs2[i]) + vd[i]
vfnmsac.vf vd, rs1, vs2, vm # vd[i] = -(f[rs1] * vs2[i]) + vd[i]
# FP multiply-add, overwrites multiplicand
vfmadd.vv vd, vs1, vs2, vm # vd[i] = +(vs1[i] * vd[i]) + vs2[i]
vfmadd.vf vd, rs1, vs2, vm # vd[i] = +(f[rs1] * vd[i]) + vs2[i]
# FP negate-(multiply-add), overwrites multiplicand
vfnmadd.vv vd, vs1, vs2, vm # vd[i] = -(vs1[i] * vd[i]) - vs2[i]
vfnmadd.vf vd, rs1, vs2, vm # vd[i] = -(f[rs1] * vd[i]) - vs2[i]
# FP multiply-sub, overwrites multiplicand
vfmsub.vv vd, vs1, vs2, vm # vd[i] = +(vs1[i] * vd[i]) - vs2[i]
vfmsub.vf vd, rs1, vs2, vm # vd[i] = +(f[rs1] * vd[i]) - vs2[i]
# FP negate-(multiply-sub), overwrites multiplicand
vfnmsub.vv vd, vs1, vs2, vm # vd[i] = -(vs1[i] * vd[i]) + vs2[i]
vfnmsub.vf vd, rs1, vs2, vm # vd[i] = -(f[rs1] * vd[i]) + vs2[i]
示例如下:
typedef float float32_t;
#define DATALEN 16
int main(void)
{
const float32_t vec1[DATALEN] = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0 };
const float32_t vec2[DATALEN] = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0 };
float32_t res[DATALEN] = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0};
const float32_t *pSrcA = vec1;
const float32_t *pSrcB = vec2;
float32_t *pDes = res;
size_t avl = DATALEN;
size_t vl;
vfloat32m4_t op1, op2, rd;
for (; (vl = __riscv_vsetvl_e32m4(avl)) > 0; avl -= vl) {
// load数据
op1 = __riscv_vle32_v_f32m4(pSrcA, vl);
op2 = __riscv_vle32_v_f32m4(pSrcB, vl);
pSrcA += vl;
pSrcB += vl;
rd = __riscv_vle32_v_f32m4(pDes, vl);
// 向量乘加
rd = __riscv_vfmacc_vv_f32m4(rd, op1, op2, vl);
// store数据
__riscv_vse32_v_f32m4 (pDes, rd, vl);
pDes += vl;
}
// 数据打印
for (int i = 0; i < DATALEN; i++) {
printf("%f, ", res[i]);
}
printf("\r\n");
return 0;
}
打印结果为:
res[16] = {2.000000, 6.000000, 12.000000, 20.000000, 30.000000, 42.000000, 56.000000, 72.000000, 90.000000, 110.000000, 132.000000, 156.000000, 182.000000, 210.000000, 240.000000, 272.000000}
7 扩宽向量浮点乘加指令
提供扩宽的浮点乘加等指令。
# FP widening multiply-accumulate, overwrites addend
vfwmacc.vv vd, vs1, vs2, vm # vd[i] = +(vs1[i] * vs2[i]) + vd[i]
vfwmacc.vf vd, rs1, vs2, vm # vd[i] = +(f[rs1] * vs2[i]) + vd[i]
# FP widening negate-(multiply-accumulate), overwrites addend
vfwnmacc.vv vd, vs1, vs2, vm # vd[i] = -(vs1[i] * vs2[i]) - vd[i]
vfwnmacc.vf vd, rs1, vs2, vm # vd[i] = -(f[rs1] * vs2[i]) - vd[i]
# FP widening multiply-subtract-accumulator, overwrites addend
vfwmsac.vv vd, vs1, vs2, vm # vd[i] = +(vs1[i] * vs2[i]) - vd[i]
vfwmsac.vf vd, rs1, vs2, vm # vd[i] = +(f[rs1] * vs2[i]) - vd[i]
# FP widening negate-(multiply-subtract-accumulator), overwrites addend
vfwnmsac.vv vd, vs1, vs2, vm # vd[i] = -(vs1[i] * vs2[i]) + vd[i]
vfwnmsac.vf vd, rs1, vs2, vm # vd[i] = -(f[rs1] * vs2[i]) + vd[i]
8 向量浮点平方根指令
# Floating-point square root sqrt(x)
vfsqrt.v vd, vs2, vm # Vector-vector square root
示例见本篇10节。
9 向量浮点倒数平方根估计指令
# Floating-point reciprocal square-root estimate to 7 bits. estimate of 1/sqrt(x) accurate to 7 bits
vfrsqrt7.v vd, vs2, vm
示例见本篇10节。
10 向量浮点倒数估计指令
# Floating-point reciprocal (1/x) estimate to 7 bits.
vfrec7.v vd, vs2, vm
以上提供一个示例测试 vfsqrt,vfrsqrt7, vfrec7 指令。
typedef float float32_t;
#define DATALEN 16
int main(void)
{
const float32_t vec1[DATALEN] = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0 };
float32_t res[DATALEN] = {0.0};
const float32_t *pSrcA = vec1;
float32_t *pDes = res;
size_t avl = DATALEN;
size_t vl;
vfloat32m4_t op1, vfsqrt_rd, vfrsqrt7_rd, vfrec7_rd;
vl = __riscv_vsetvl_e32m4(avl);
// load数据
op1 = __riscv_vle32_v_f32m4(pSrcA, vl);
vfsqrt_rd = __riscv_vfsqrt_v_f32m4(op1, vl);
vfrsqrt7_rd = __riscv_vfrsqrt7_v_f32m4(op1, vl);
vfrec7_rd = __riscv_vfrec7_v_f32m4(op1, vl);
// store数据
__riscv_vse32_v_f32m4 (pDes, vfsqrt_rd, vl);
printf("fsqrt result:\r\n");
for (int i = 0; i < DATALEN; i++) {
printf("%f, ", res[i]);
}
printf("\r\n");
__riscv_vse32_v_f32m4 (pDes, vfrsqrt7_rd, vl);
printf("vfrsqrt7 result:\r\n");
for (int i = 0; i < DATALEN; i++) {
printf("%f, ", res[i]);
}
printf("\r\n");
__riscv_vse32_v_f32m4 (pDes, vfrec7_rd, vl);
printf("vfrec7 result:\r\n");
for (int i = 0; i < DATALEN; i++) {
printf("%f, ", res[i]);
}
printf("\r\n");
return 0;
}
打印结果为:
fsqrt result[16] = {1.000000, 1.414214, 1.732051, 2.000000, 2.236068, 2.449490, 2.645751, 2.828427, 3.000000, 3.162278, 3.316625, 3.464102, 3.605551, 3.741657, 3.872983, 4.000000}
vfrsqrt7 result[16] = {0.996094, 0.703125, 0.574219, 0.498047, 0.445312, 0.406250, 0.376953, 0.351562, 0.332031, 0.314453, 0.300781, 0.287109, 0.277344, 0.267578, 0.257812, 0.249023}
vfrec7 result[16] = {0.996094, 0.498047, 0.332031, 0.249023, 0.199219, 0.166016, 0.142578, 0.124512, 0.110840, 0.099609, 0.090820, 0.083008, 0.076660, 0.071289, 0.066406, 0.062256}
11 向量浮点MIN/MAX指令
# Floating-point minimum
vfmin.vv vd, vs2, vs1, vm # Vector-vector
vfmin.vf vd, vs2, rs1, vm # vector-scalar
# Floating-point maximum
vfmax.vv vd, vs2, vs1, vm # Vector-vector
vfmax.vf vd, vs2, rs1, vm # vector-scalar
示例如下:
typedef float float32_t;
#define DATALEN 16
int main(void)
{
float32_t vec1[DATALEN] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0};
float32_t vec2[DATALEN] = {5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0};
float32_t res[DATALEN] = {0.0};
const float32_t *pSrcA = vec1;
const float32_t *pSrcB = vec2;
float32_t *pDes = res;
size_t avl = DATALEN;
size_t vl;
vfloat32m4_t op1, op2, rd;
for (; (vl = __riscv_vsetvl_e32m4(avl)) > 0; avl -= vl) {
// load数据
op1 = __riscv_vle32_v_f32m4(pSrcA, vl);
op2 = __riscv_vle32_v_f32m4(pSrcB, vl);
pSrcA += vl;
pSrcB += vl;
// 求向量的最大值
rd = __riscv_vfmax_vv_f32m4(op1, op2, vl);
// store数据
__riscv_vse32_v_f32m4 (pDes, rd, vl);
pDes += vl;
}
// 数据打印
for (int i = 0; i < DATALEN; i++) {
printf("%f, ", res[i]);
}
printf("\r\n");
return 0;
}
打印结果为:
res[16] = {5.000000, 5.000000, 5.000000, 5.000000, 5.000000, 6.000000, 7.000000, 8.000000, 16.000000, 16.000000, 16.000000, 16.000000, 16.000000, 16.000000, 16.000000, 16.000000}
12 向量浮点符号注入指令
vfsgnj.vv vd, vs2, vs1, vm # Vector-vector
vfsgnj.vf vd, vs2, rs1, vm # vector-scalar
vfsgnjn.vv vd, vs2, vs1, vm # Vector-vector
vfsgnjn.vf vd, vs2, rs1, vm # vector-scalar
vfsgnjx.vv vd, vs2, vs1, vm # Vector-vector
vfsgnjx.vf vd, vs2, rs1, vm # vector-scalar
并提供两条伪指令:
vfneg.v vd,vs = vfsgnjn.vv vd,vs,vs
vfabs.v vd,vs = vfsgnjx.vv vd,vs,vs
提供一个示例如下:
typedef float float32_t;
#define DATALEN 16
void main(void)
{
float32_t vec1[DATALEN] = {-1.0, -2.0, -3.0, -4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, -13.0, -14.0, -15.0, 16.0};
float32_t res[DATALEN] = {0.0};
size_t avl = DATALEN;
size_t vl;
vfloat32m4_t op1, rd;
float32_t *pSrc = vec1;
float32_t *pDes = res;
for (; (vl = __riscv_vsetvl_e32m4(avl)) > 0; avl -= vl) {
op1 = __riscv_vle32_v_f32m4(pSrc, vl);
pSrc += vl;
rd = __riscv_vfabs_v_f32m4(op1, vl);
__riscv_vse32_v_f32m4(pDes, rd, vl);
pDes += vl;
}
// 数据打印
for (int i = 0; i < DATALEN; i++) {
printf("%f, ", res[i]);
}
printf("\r\n");
}
打印结果为:
res[16] = {1.000000, 2.000000, 3.000000, 4.000000, 5.000000, 6.000000, 7.000000, 8.000000, 9.000000, 10.000000, 11.000000, 12.000000, 13.000000, 14.000000, 15.000000, 16.000000}
13 向量浮点比较指令
# Compare equal
vmfeq.vv vd, vs2, vs1, vm # Vector-vector
vmfeq.vf vd, vs2, rs1, vm # vector-scalar
# Compare not equal
vmfne.vv vd, vs2, vs1, vm # Vector-vector
vmfne.vf vd, vs2, rs1, vm # vector-scalar
# Compare less than
vmflt.vv vd, vs2, vs1, vm # Vector-vector
vmflt.vf vd, vs2, rs1, vm # vector-scalar
# Compare less than or equal
vmfle.vv vd, vs2, vs1, vm # Vector-vector
vmfle.vf vd, vs2, rs1, vm # vector-scalar
# Compare greater than
vmfgt.vf vd, vs2, rs1, vm # vector-scalar
# Compare greater than or equal
vmfge.vf vd, vs2, rs1, vm # vector-scalar
14 向量浮点分类指令
vfclass.v vd, vs2, vm # Vector-vector
示例如下:
typedef float float32_t;
#define DATALEN 8
int main(void)
{
const float32_t vec1[DATALEN] = { 16.0, 16.50, -16.5000, -16.5001, +0.0, -0.0, -1e-38, 1e-38 };
uint32_t res[DATALEN] = {0.0};
const float32_t *pSrcA = vec1;
uint32_t *pDes = res;
size_t avl = DATALEN;
size_t vl;
vfloat32m2_t op1;
vuint32m2_t rd;
for (; (vl = __riscv_vsetvl_e32m2(avl)) > 0; avl -= vl) {
// load数据
op1 = __riscv_vle32_v_f32m2(pSrcA, vl);
pSrcA += vl;
rd = __riscv_vfclass_v_u32m2(op1, vl);
// store数据
__riscv_vse32_v_u32m2 (pDes, rd, vl);
pDes += vl;
}
// 数据打印
for (int i = 0; i < DATALEN; i++) {
printf("%u, ", res[i]);
}
printf("\r\n");
return 0;
}
打印日志为:
res[8] = {64, 64, 2, 2, 16, 8, 4, 32}
其值的含义需要查阅下表,见标量指令fclass含义表:

也即:
16.0 为规格化正数
16.50 为规格化正数
-16.5000 正0为规格化负数
-16.5001 为规格化负数
+0.0 为正0
-0.0 为负0
-1e-38 为非规格化负数
1e-38 为非规格化正数
15 向量浮点合并指令
vfmerge.vfm vd, vs2, rs1, v0 # vd[i] = v0.mask[i] ? f[rs1] : vs2[i]
示例如下:
typedef float float32_t;
#define DATALEN 16
int main(void)
{
const float32_t vec1[DATALEN] = {5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0};
const float32_t vec2[DATALEN] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0};
float32_t res[DATALEN] = {0};
const float32_t *pSrcA = vec1;
const float32_t *pSrcB = vec2;
float32_t *pDes = res;
size_t avl = DATALEN;
size_t vl;
vfloat32m4_t op1, op2, rd;
for (; (vl = __riscv_vsetvl_e32m4(avl)) > 0; avl -= vl) {
// load数据
op1 = __riscv_vle32_v_f32m4(pSrcA, vl);
op2 = __riscv_vle32_v_f32m4(pSrcB, vl);
pSrcA += vl;
pSrcB += vl;
vbool8_t mask = __riscv_vmflt_vv_f32m4_b8(op1, op2, vl);
// merge
rd = __riscv_vmerge_vvm_f32m4(op1, op2, mask, vl);
// store数据
__riscv_vse32_v_f32m4 (pDes, rd, vl);
pDes += vl;
}
// 数据打印
for (int i = 0; i < DATALEN; i++) {
printf("%f, ", res[i]);
}
printf("\r\n");
return 0;
}
打印结果为:
res[16] = {5.000000, 5.000000, 5.000000, 5.000000, 5.000000, 6.000000, 7.000000, 8.000000, 16.000000, 16.000000, 16.000000, 16.000000, 16.000000, 16.000000, 16.000000, 16.000000}
16 向量浮点移动指令
vfmv.v.f vd, rs1 # vd[i] = f[rs1]
17 单宽度浮点&整数类型转换指令
vfcvt.xu.f.v vd, vs2, vm # 将浮点数转换为无符号整数.
vfcvt.x.f.v vd, vs2, vm # 将浮点数转换为有符号整数.
vfcvt.rtz.xu.f.v vd, vs2, vm # 将浮点数转换为无符号整数, 归零截断.
vfcvt.rtz.x.f.v vd, vs2, vm # 将浮点数转换为有符号整数,归零截断
vfcvt.f.xu.v vd, vs2, vm # 将无符号数转换为浮点数
vfcvt.f.x.v vd, vs2, vm # 将有符号数转换为浮点数
例如,将浮点数转换为有符号整数:
typedef float float32_t;
typedef float float32_t;
#define DATALEN 8
int main(void)
{
const float32_t vec1[DATALEN] = { 16.0, 16.40, 16.5000, 16.5001, 16.9, -16.1000, -16.5000, -16.8 };
int32_t res[DATALEN] = {0.0};
const float32_t *pSrcA = vec1;
int32_t *pDes = res;
size_t avl = DATALEN;
size_t vl;
vfloat32m2_t op1;
vint32m2_t rd;
for (; (vl = __riscv_vsetvl_e32m2(avl)) > 0; avl -= vl) {
// load数据
op1 = __riscv_vle32_v_f32m2(pSrcA, vl);
pSrcA += vl;
// 向量浮点加法
rd = __riscv_vfcvt_x_f_v_i32m2(op1, vl);
// store数据
__riscv_vse32_v_i32m2 (pDes, rd, vl);
pDes += vl;
}
// 数据打印
for (int i = 0; i < DATALEN; i++) {
printf("%d, ", res[i]);
}
printf("\r\n");
return 0;
}
打印结果为:
res[4] = {16, 16, 16, 17, 17, -16, -16, -17}
归零截断:
typedef float float32_t;
#define DATALEN 8
int main(void)
{
const float32_t vec1[DATALEN] = { 16.0, 16.40, 16.5000, 16.5001, 16.9, -16.1000, -16.5000, -16.8 };
int32_t res[DATALEN] = {0.0};
const float32_t *pSrcA = vec1;
int32_t *pDes = res;
size_t avl = DATALEN;
size_t vl;
vfloat32m2_t op1;
vint32m2_t rd;
for (; (vl = __riscv_vsetvl_e32m2(avl)) > 0; avl -= vl) {
// load数据
op1 = __riscv_vle32_v_f32m2(pSrcA, vl);
pSrcA += vl;
// 向量浮点加法
rd = __riscv_vfcvt_x_f_v_i32m2(op1, vl);
// store数据
__riscv_vse32_v_i32m2 (pDes, rd, vl);
pDes += vl;
}
// 数据打印
for (int i = 0; i < DATALEN; i++) {
printf("%d, ", res[i]);
}
printf("\r\n");
return 0;
}
打印结果为:
res[4] = {16, 16, 16, 16, 16, -16, -16, -16,}
18 扩宽浮点/整数类型转换指令
vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.
vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer.
vfwcvt.rtz.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer, truncating.
vfwcvt.rtz.x.f.v vd, vs2, vm # Convert float to double-width signed integer, truncating.
vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float.
vfwcvt.f.x.v vd, vs2, vm # Convert signed integer to double-width float.
vfwcvt.f.f.v vd, vs2, vm # Convert single-width float to double-width float.
19 缩减浮点/整数类型转换指令
vfncvt.xu.f.w vd, vs2, vm # Convert double-width float to unsigned integer.
vfncvt.x.f.w vd, vs2, vm # Convert double-width float to signed integer.
vfncvt.rtz.xu.f.w vd, vs2, vm # Convert double-width float to unsigned integer, truncating.
vfncvt.rtz.x.f.w vd, vs2, vm # Convert double-width float to signed integer, truncating.
vfncvt.f.xu.w vd, vs2, vm # Convert double-width unsigned integer to float.
vfncvt.f.x.w vd, vs2, vm # Convert double-width signed integer to float.
vfncvt.f.f.w vd, vs2, vm # Convert double-width float to single-width float.
vfncvt.rod.f.f.w vd, vs2, vm # Convert double-width float to single-width float,
# rounding towards odd.

浙公网安备 33010602011771号