RISC-V RVV第12讲之RVV 定点算术指令

RISC-V RVV第12 讲之RVV定点算术指令

RISC-V RVV第12 讲之RVV定点算术指令

V扩展提供定点的算术运算指令，Q类型数是实现定点数的一种方式，参考：Q (number format) - Wikipedia

Q数据类型完整格式为： Qm.n ，共需1（符号位）+ m （整数位） + n （小数位）bit位来表示数据。

当整数位m = 0时，除符号位以外都是小数位，表示区间为[-1, 1)，可简写为Qn。如Q15表示16bit，其中1bit为符号位，其余15bit为小数位。

RVV中提供一些Q类型(Q7, Q15, Q31, Q63)运算的指令，恰当的运用这些指令可以提高运算效率。

1 单宽度向量饱和加减

# 无符号定点数饱和加
vsaddu.vv vd, vs2, vs1, vm # Vector-vector
vsaddu.vx vd, vs2, rs1, vm # vector-scalar
vsaddu.vi vd, vs2, imm, vm # vector-immediate

# 有符号定点数饱和加
vsadd.vv vd, vs2, vs1, vm # Vector-vector
vsadd.vx vd, vs2, rs1, vm # vector-scalar
vsadd.vi vd, vs2, imm, vm # vector-immediate

# 无符号定点数饱和减
vssubu.vv vd, vs2, vs1, vm # Vector-vector
vssubu.vx vd, vs2, rs1, vm # vector-scalar

# 有符号定点数饱和减
vssub.vv vd, vs2, vs1, vm # Vector-vector
vssub.vx vd, vs2, rs1, vm # vector-scalar

以Q15 类型运算举例：

Qn的加法：

带饱和的Q15 定点数加法：

q15_t q_add_sat(q15_t a, q15_t b)
{
    q15_t result;
    int32_t tmp;

    tmp = (int32_t)a + (int32_t)b;
    if (tmp > 0x7FFF)
        tmp = 0x7FFF;
    if (tmp < -1 * 0x8000)
        tmp = -1 * 0x8000;
    result = (q15_t)tmp;

    return result;
}

若直接使用RVV指令来实现上述C代码，可以采用如下三条指令

1. vwadd      扩宽add
2. vmin/vmax  饱和
3. vnsra      截断

不过RVV提供了定点算术指令：vsadd，实现上述几条指令的效果

示例如下：

#define DATALEN 8
int main(void)
{
  const int16_t vec1[DATALEN] = { 1862, 1862, 1862, 1862, 32767, 32767, 32767, 32767, };
  const int16_t vec2[DATALEN] = { 1862, 1862, 1862, 1862, 32767, 32767, 32767, 32767, };

  int16_t res[DATALEN] = {0};

  const int16_t *pSrcA = vec1;
  const int16_t *pSrcB = vec2;
  int16_t *pDes = res;

  size_t avl = DATALEN;
  size_t vl;
  vint16m1_t op1, op2, rd;

  for (; (vl = __riscv_vsetvl_e16m1(avl)) > 0; avl -= vl) {
    // load数据
    op1 = __riscv_vle16_v_i16m1(pSrcA, vl);
    op2 = __riscv_vle16_v_i16m1(pSrcB, vl);
    pSrcA += vl;
    pSrcB += vl;

    // 定点数向量加法
    rd = __riscv_vsadd_vv_i16m1(op1, op2, vl);

    // store数据
    __riscv_vse16_v_i16m1 (pDes, rd, vl);
    pDes += vl;
  }

  // 数据打印
  for (int i = 0; i < DATALEN; i++) {
    printf("%d, ", res[i]);
  }
  printf("\r\n");

  return 0;
}

打印结果为：

res[8] = {3724, 3724, 3724, 3724, 32767, 32767, 32767, 32767}

Qn的减法：

同饱和加一样，带饱和的减法可使用vssub，一条指令实现如下C版本的效果。

q15_t q_sub_sat(q15_t a, q15_t b)
{
    q15_t result;
    int32_t tmp;

    tmp = (int32_t)a - (int32_t)b;
    if (tmp > 0x7FFF)
        tmp = 0x7FFF;
    if (tmp < -1 * 0x8000)
        tmp = -1 * 0x8000;
    result = (q15_t)tmp;

    return result;
}

示例如下：

#define DATALEN 8
int main(void)
{
  const int16_t vec1[DATALEN] = { 32767, 32767, 32767, 32767, -32767, -32767, -32767, -32767, };
  const int16_t vec2[DATALEN] = { 1862, 1862, 1862, 1862, 32767, 32767, 32767, 32767, };

  int16_t res[DATALEN] = {0};

  const int16_t *pSrcA = vec1;
  const int16_t *pSrcB = vec2;
  int16_t *pDes = res;

  size_t avl = DATALEN;
  size_t vl;
  vint16m1_t op1, op2, rd;

  for (; (vl = __riscv_vsetvl_e16m1(avl)) > 0; avl -= vl) {
    // load数据
    op1 = __riscv_vle16_v_i16m1(pSrcA, vl);
    op2 = __riscv_vle16_v_i16m1(pSrcB, vl);
    pSrcA += vl;
    pSrcB += vl;

    // 定点数向量减法
    rd = __riscv_vssub_vv_i16m1(op1, op2, vl);

    // store数据
    __riscv_vse16_v_i16m1 (pDes, rd, vl);
    pDes += vl;
  }

  // 数据打印
  for (int i = 0; i < DATALEN; i++) {
    printf("%d, ", res[i]);
  }
  printf("\r\n");

  return 0;
}

打印结果为：

res[8] = {30905, 30905, 30905, 30905, -32768, -32768, -32768, -32768}

2 单宽度向量平均加减

# Averaging add
# Averaging adds of unsigned integers.
vaaddu.vv vd, vs2, vs1, vm # roundoff_unsigned(vs2[i] + vs1[i], 1)
vaaddu.vx vd, vs2, rs1, vm # roundoff_unsigned(vs2[i] + x[rs1], 1)
# Averaging adds of signed integers.
vaadd.vv vd, vs2, vs1, vm # roundoff_signed(vs2[i] + vs1[i], 1)
vaadd.vx vd, vs2, rs1, vm # roundoff_signed(vs2[i] + x[rs1], 1)
# Averaging subtract
# Averaging subtract of unsigned integers.
vasubu.vv vd, vs2, vs1, vm # roundoff_unsigned(vs2[i] - vs1[i], 1)
vasubu.vx vd, vs2, rs1, vm # roundoff_unsigned(vs2[i] - x[rs1], 1)
# Averaging subtract of signed integers.
vasub.vv vd, vs2, vs1, vm # roundoff_signed(vs2[i] - vs1[i], 1)
vasub.vx vd, vs2, rs1, vm # roundoff_signed(vs2[i] - x[rs1], 1)

示例如下：

#define DATALEN 8
int main(void)
{
  const int16_t vec1[DATALEN] = { 1862, 1862, 1862, 1862, 32767, 32767, 32767, 32767, };
  const int16_t vec2[DATALEN] = { 1862, 1862, 1862, 1862, 32767, 32767, 32767, 32767, };

  int16_t res[DATALEN] = {0};

  const int16_t *pSrcA = vec1;
  const int16_t *pSrcB = vec2;
  int16_t *pDes = res;

  size_t avl = DATALEN;
  size_t vl;
  vint16m1_t op1, op2, rd;

  for (; (vl = __riscv_vsetvl_e16m1(avl)) > 0; avl -= vl) {
    // load数据
    op1 = __riscv_vle16_v_i16m1(pSrcA, vl);
    op2 = __riscv_vle16_v_i16m1(pSrcB, vl);
    pSrcA += vl;
    pSrcB += vl;

    // 定点数向量加法
    rd = __riscv_vaadd_vv_i16m1(op1, op2, __RISCV_VXRM_RDN, vl);

    // store数据
    __riscv_vse16_v_i16m1 (pDes, rd, vl);
    pDes += vl;
  }

  // 数据打印
  for (int i = 0; i < DATALEN; i++) {
    printf("%d, ", res[i]);
  }
  printf("\r\n");

  return 0;
}

打印结果为：

res[8] = {1862, 1862, 1862, 1862, 32767, 32767, 32767, 32767}

3 单宽度向量饱和乘

# Signed saturating and rounding fractional multiply
# See vxrm description for rounding calculation
vsmul.vv vd, vs2, vs1, vm # vd[i] = clip(roundoff_signed(vs2[i]*vs1[i], SEW-1))
vsmul.vx vd, vs2, rs1, vm # vd[i] = clip(roundoff_signed(vs2[i]*x[rs1], SEW-1))

Qn的乘法：

带饱和的乘法V扩展指令：vsmul

如：Q15向量乘法

// saturate to range of int16_t
int16_t sat16(int32_t x)
{
	if (x > 0x7FFF) return 0x7FFF;
	else if (x < -0x8000) return -0x8000;
	else return (int16_t)x;
}

int16_t q_mul(int16_t a, int16_t b)
{
    int16_t result;
    int32_t temp;

    temp = (int32_t)a * (int32_t)b; // result type is operand's type
    // Correct by dividing by base and saturate result
    result = sat16(temp >> 15);

    return result;
}

示例如下：

#define DATALEN 8
int main(void)
{
  const int16_t vec1[DATALEN] = { 32767, 32767, 32767, 32767, -32767, -32767, -32767, -32767, };
  const int16_t vec2[DATALEN] = { 1862, 1862, 1862, 1862, 32767, 32767, 32767, 32767, };

  int16_t res[DATALEN] = {0};

  const int16_t *pSrcA = vec1;
  const int16_t *pSrcB = vec2;
  int16_t *pDes = res;

  size_t avl = DATALEN;
  size_t vl;
  vint16m1_t op1, op2, rd;

  for (; (vl = __riscv_vsetvl_e16m1(avl)) > 0; avl -= vl) {
    // load数据
    op1 = __riscv_vle16_v_i16m1(pSrcA, vl);
    op2 = __riscv_vle16_v_i16m1(pSrcB, vl);
    pSrcA += vl;
    pSrcB += vl;

    rd = __riscv_vsmul_vv_i16m1(op1, op2, __RISCV_VXRM_RDN, vl);

    // store数据
    __riscv_vse16_v_i16m1 (pDes, rd, vl);
    pDes += vl;
  }

  // 数据打印
  for (int i = 0; i < DATALEN; i++) {
    printf("%d, ", res[i]);
  }
  printf("\r\n");

  return 0;
}

打印结果为：

res[4] = {1861, 1861, 1861, 1861, -32767, -32767, -32767, -32767}

4 单宽度向量缩放移位指令

 # Scaling shift right logical
 vssrl.vv vd, vs2, vs1, vm # vd[i] = roundoff_unsigned(vs2[i], vs1[i])
 vssrl.vx vd, vs2, rs1, vm # vd[i] = roundoff_unsigned(vs2[i], x[rs1])
 vssrl.vi vd, vs2, uimm, vm # vd[i] = roundoff_unsigned(vs2[i], uimm)
 # Scaling shift right arithmetic
 vssra.vv vd, vs2, vs1, vm # vd[i] = roundoff_signed(vs2[i],vs1[i])
 vssra.vx vd, vs2, rs1, vm # vd[i] = roundoff_signed(vs2[i], x[rs1])
 vssra.vi vd, vs2, uimm, vm # vd[i] = roundoff_signed(vs2[i], uimm)

注释中：

roundoff_unsigned(v, d) = (unsigned(v) >> d) + r
roundoff_signed(v, d) = (signed(v) >> d) + r

示例如下：

#define DATALEN 8
int main(void)
{
  const int16_t vec1[DATALEN] = { 32767, 32767, 32767, 32767, -32767, -32767, -32767, -32767, };

  int16_t res[DATALEN] = {0};

  const int16_t *pSrcA = vec1;
  int16_t *pDes = res;

  size_t avl = DATALEN;
  size_t vl;
  vint16m1_t op1, rd;
  size_t shift = 2;

  for (; (vl = __riscv_vsetvl_e16m1(avl)) > 0; avl -= vl) {
    // load数据
    op1 = __riscv_vle16_v_i16m1(pSrcA, vl);
    pSrcA += vl;

    rd = __riscv_vssra_vx_i16m1(op1, shift, __RISCV_VXRM_RDN, vl);

    // store数据
    __riscv_vse16_v_i16m1 (pDes, rd, vl);
    pDes += vl;
  }

  // 数据打印
  for (int i = 0; i < DATALEN; i++) {
    printf("%d, ", res[i]);
  }
  printf("\r\n");

  return 0;
}

打印结果为：

res[8] = {8191, 8191, 8191, 8191, -8192, -8192, -8192, -8192}

5 缩减向量定点数剪切指令

# Narrowing unsigned clip
# SEW 2*SEW SEW
 vnclipu.wv vd, vs2, vs1, vm # vd[i] = clip(roundoff_unsigned(vs2[i], vs1[i]))
 vnclipu.wx vd, vs2, rs1, vm # vd[i] = clip(roundoff_unsigned(vs2[i], x[rs1]))
 vnclipu.wi vd, vs2, uimm, vm # vd[i] = clip(roundoff_unsigned(vs2[i], uimm))
# Narrowing signed clip
 vnclip.wv vd, vs2, vs1, vm # vd[i] = clip(roundoff_signed(vs2[i], vs1[i]))
 vnclip.wx vd, vs2, rs1, vm # vd[i] = clip(roundoff_signed(vs2[i], x[rs1]))
 vnclip.wi vd, vs2, uimm, vm # vd[i] = clip(roundoff_signed(vs2[i], uimm))

示例如下：

#define DATALEN 8
int main(void)
{
  const int16_t vec1[DATALEN] = { 32767, 32767, 32767, 32767, -32767, -32767, -32767, -32767, };

  int8_t res[DATALEN] = {0};

  const int16_t *pSrcA = vec1;
  int8_t *pDes = res;

  size_t avl = DATALEN;
  size_t vl;
  vint16m1_t op1;
  vint8mf2_t rd;
  size_t shift = 2;

  for (; (vl = __riscv_vsetvl_e16m1(avl)) > 0; avl -= vl) {
    // load数据
    op1 = __riscv_vle16_v_i16m1(pSrcA, vl);
    pSrcA += vl;

    rd = __riscv_vnclip_wx_i8mf2(op1, shift, __RISCV_VXRM_RDN, vl);

    // store数据
    __riscv_vse8_v_i8mf2 (pDes, rd, vl);
    pDes += vl;
  }

  // 数据打印
  for (int i = 0; i < DATALEN; i++) {
    printf("%d, ", res[i]);
  }
  printf("\r\n");

  return 0;
}

打印结果为：

res[8] = {127, 127, 127, 127, -128, -128, -128, -128}

参考：

posted @ 2025-04-12 16:19 sureZ_ok 阅读(555) 评论(2) 收藏举报

刷新页面返回顶部

sureZ

RISC-V RVV第12讲之RVV 定点算术指令

RISC-V RVV第12 讲之RVV定点算术指令

1 单宽度向量饱和加减

2 单宽度向量平均加减

3 单宽度向量饱和乘

4 单宽度向量缩放移位指令

5 缩减向量定点数剪切指令

公告