From: LIU Zhiwei <zhiwei_liu@c-sky.com>
To: qemu-devel@nongnu.org, qemu-riscv@nongnu.org
Cc: palmer@dabbelt.com, richard.henderson@linaro.org,
bin.meng@windriver.com, Alistair.Francis@wdc.com,
LIU Zhiwei <zhiwei_liu@c-sky.com>
Subject: [PATCH v2 03/37] target/riscv: 16-bit Addition & Subtraction Instructions
Date: Thu, 10 Jun 2021 15:58:34 +0800 [thread overview]
Message-ID: <20210610075908.3305506-4-zhiwei_liu@c-sky.com> (raw)
In-Reply-To: <20210610075908.3305506-1-zhiwei_liu@c-sky.com>
Include 5 groups: Wrap-around (dropping overflow), Signed Halving,
Unsigned Halving, Signed Saturation, and Unsigned Saturation.
Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
---
include/tcg/tcg-op-gvec.h | 10 +
target/riscv/helper.h | 30 ++
target/riscv/insn32.decode | 32 +++
target/riscv/insn_trans/trans_rvp.c.inc | 117 ++++++++
target/riscv/meson.build | 1 +
target/riscv/packed_helper.c | 354 ++++++++++++++++++++++++
target/riscv/translate.c | 1 +
tcg/tcg-op-gvec.c | 28 ++
8 files changed, 573 insertions(+)
create mode 100644 target/riscv/insn_trans/trans_rvp.c.inc
create mode 100644 target/riscv/packed_helper.c
diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h
index c69a7de984..2dae9e78d0 100644
--- a/include/tcg/tcg-op-gvec.h
+++ b/include/tcg/tcg-op-gvec.h
@@ -386,10 +386,12 @@ void tcg_gen_vec_neg32_i64(TCGv_i64 d, TCGv_i64 a);
void tcg_gen_vec_add8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
void tcg_gen_vec_add16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
+void tcg_gen_vec_add16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
void tcg_gen_vec_add32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
void tcg_gen_vec_sub8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
void tcg_gen_vec_sub16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
+void tcg_gen_vec_sub16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
void tcg_gen_vec_sub32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
void tcg_gen_vec_shl8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t);
@@ -401,4 +403,12 @@ void tcg_gen_vec_sar16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t);
void tcg_gen_vec_rotl8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c);
void tcg_gen_vec_rotl16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c);
+#if TARGET_LONG_BITS == 64
+#define tcg_gen_vec_add16_tl tcg_gen_vec_add16_i64
+#define tcg_gen_vec_sub16_tl tcg_gen_vec_sub16_i64
+#else
+#define tcg_gen_vec_add16_tl tcg_gen_vec_add16_i32
+#define tcg_gen_vec_sub16_tl tcg_gen_vec_sub16_i32
+#endif
+
#endif
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
index 415e37bc37..b6a71ade33 100644
--- a/target/riscv/helper.h
+++ b/target/riscv/helper.h
@@ -1149,3 +1149,33 @@ DEF_HELPER_6(vcompress_vm_b, void, ptr, ptr, ptr, ptr, env, i32)
DEF_HELPER_6(vcompress_vm_h, void, ptr, ptr, ptr, ptr, env, i32)
DEF_HELPER_6(vcompress_vm_w, void, ptr, ptr, ptr, ptr, env, i32)
DEF_HELPER_6(vcompress_vm_d, void, ptr, ptr, ptr, ptr, env, i32)
+
+/* P extension function */
+DEF_HELPER_3(radd16, tl, env, tl, tl)
+DEF_HELPER_3(uradd16, tl, env, tl, tl)
+DEF_HELPER_3(kadd16, tl, env, tl, tl)
+DEF_HELPER_3(ukadd16, tl, env, tl, tl)
+DEF_HELPER_3(rsub16, tl, env, tl, tl)
+DEF_HELPER_3(ursub16, tl, env, tl, tl)
+DEF_HELPER_3(ksub16, tl, env, tl, tl)
+DEF_HELPER_3(uksub16, tl, env, tl, tl)
+DEF_HELPER_3(cras16, tl, env, tl, tl)
+DEF_HELPER_3(rcras16, tl, env, tl, tl)
+DEF_HELPER_3(urcras16, tl, env, tl, tl)
+DEF_HELPER_3(kcras16, tl, env, tl, tl)
+DEF_HELPER_3(ukcras16, tl, env, tl, tl)
+DEF_HELPER_3(crsa16, tl, env, tl, tl)
+DEF_HELPER_3(rcrsa16, tl, env, tl, tl)
+DEF_HELPER_3(urcrsa16, tl, env, tl, tl)
+DEF_HELPER_3(kcrsa16, tl, env, tl, tl)
+DEF_HELPER_3(ukcrsa16, tl, env, tl, tl)
+DEF_HELPER_3(stas16, tl, env, tl, tl)
+DEF_HELPER_3(rstas16, tl, env, tl, tl)
+DEF_HELPER_3(urstas16, tl, env, tl, tl)
+DEF_HELPER_3(kstas16, tl, env, tl, tl)
+DEF_HELPER_3(ukstas16, tl, env, tl, tl)
+DEF_HELPER_3(stsa16, tl, env, tl, tl)
+DEF_HELPER_3(rstsa16, tl, env, tl, tl)
+DEF_HELPER_3(urstsa16, tl, env, tl, tl)
+DEF_HELPER_3(kstsa16, tl, env, tl, tl)
+DEF_HELPER_3(ukstsa16, tl, env, tl, tl)
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
index f09f8d5faf..57f72fabf6 100644
--- a/target/riscv/insn32.decode
+++ b/target/riscv/insn32.decode
@@ -732,3 +732,35 @@ greviw 0110100 .......... 101 ..... 0011011 @sh5
gorciw 0010100 .......... 101 ..... 0011011 @sh5
slli_uw 00001. ........... 001 ..... 0011011 @sh
+
+# *** RV32P Extension ***
+add16 0100000 ..... ..... 000 ..... 1110111 @r
+radd16 0000000 ..... ..... 000 ..... 1110111 @r
+uradd16 0010000 ..... ..... 000 ..... 1110111 @r
+kadd16 0001000 ..... ..... 000 ..... 1110111 @r
+ukadd16 0011000 ..... ..... 000 ..... 1110111 @r
+sub16 0100001 ..... ..... 000 ..... 1110111 @r
+rsub16 0000001 ..... ..... 000 ..... 1110111 @r
+ursub16 0010001 ..... ..... 000 ..... 1110111 @r
+ksub16 0001001 ..... ..... 000 ..... 1110111 @r
+uksub16 0011001 ..... ..... 000 ..... 1110111 @r
+cras16 0100010 ..... ..... 000 ..... 1110111 @r
+rcras16 0000010 ..... ..... 000 ..... 1110111 @r
+urcras16 0010010 ..... ..... 000 ..... 1110111 @r
+kcras16 0001010 ..... ..... 000 ..... 1110111 @r
+ukcras16 0011010 ..... ..... 000 ..... 1110111 @r
+crsa16 0100011 ..... ..... 000 ..... 1110111 @r
+rcrsa16 0000011 ..... ..... 000 ..... 1110111 @r
+urcrsa16 0010011 ..... ..... 000 ..... 1110111 @r
+kcrsa16 0001011 ..... ..... 000 ..... 1110111 @r
+ukcrsa16 0011011 ..... ..... 000 ..... 1110111 @r
+stas16 1111010 ..... ..... 010 ..... 1110111 @r
+rstas16 1011010 ..... ..... 010 ..... 1110111 @r
+urstas16 1101010 ..... ..... 010 ..... 1110111 @r
+kstas16 1100010 ..... ..... 010 ..... 1110111 @r
+ukstas16 1110010 ..... ..... 010 ..... 1110111 @r
+stsa16 1111011 ..... ..... 010 ..... 1110111 @r
+rstsa16 1011011 ..... ..... 010 ..... 1110111 @r
+urstsa16 1101011 ..... ..... 010 ..... 1110111 @r
+kstsa16 1100011 ..... ..... 010 ..... 1110111 @r
+ukstsa16 1110011 ..... ..... 010 ..... 1110111 @r
diff --git a/target/riscv/insn_trans/trans_rvp.c.inc b/target/riscv/insn_trans/trans_rvp.c.inc
new file mode 100644
index 0000000000..43f395657a
--- /dev/null
+++ b/target/riscv/insn_trans/trans_rvp.c.inc
@@ -0,0 +1,117 @@
+/*
+ * RISC-V translation routines for the RVP Standard Extension.
+ *
+ * Copyright (c) 2021 T-Head Semiconductor Co., Ltd. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "tcg/tcg-op-gvec.h"
+#include "tcg/tcg-gvec-desc.h"
+#include "tcg/tcg.h"
+
+/*
+ *** SIMD Data Processing Instructions
+ */
+
+/* 16-bit Addition & Subtraction Instructions */
+
+/*
+ * For some instructions, such as add16, an oberservation can be utilized:
+ * 1) If any reg is zero, it can be reduced to an inline op on the whole reg.
+ * 2) Otherwise, it can be acclebrated by an vec op.
+ */
+static inline bool
+r_inline(DisasContext *ctx, arg_r *a,
+ void (* vecop)(TCGv, TCGv, TCGv),
+ void (* op)(TCGv, TCGv, TCGv))
+{
+ if (!has_ext(ctx, RVP)) {
+ return false;
+ }
+ if (a->rd && a->rs1 && a->rs2) {
+ vecop(cpu_gpr[a->rd], cpu_gpr[a->rs1], cpu_gpr[a->rs2]);
+ } else {
+ gen_arith(ctx, a, op);
+ }
+ return true;
+}
+
+/* Complete inline implementation */
+#define GEN_RVP_R_INLINE(NAME, VECOP, OP) \
+static bool trans_##NAME(DisasContext *s, arg_r *a) \
+{ \
+ return r_inline(s, a, VECOP, OP); \
+}
+
+GEN_RVP_R_INLINE(add16, tcg_gen_vec_add16_tl, tcg_gen_add_tl);
+GEN_RVP_R_INLINE(sub16, tcg_gen_vec_sub16_tl, tcg_gen_sub_tl);
+
+/* Out of line helpers for R format packed instructions */
+static inline bool
+r_ool(DisasContext *ctx, arg_r *a, void (* fn)(TCGv, TCGv_ptr, TCGv, TCGv))
+{
+ TCGv src1, src2, dst;
+ if (!has_ext(ctx, RVP)) {
+ return false;
+ }
+
+ src1 = tcg_temp_new();
+ src2 = tcg_temp_new();
+ dst = tcg_temp_new();
+
+ gen_get_gpr(src1, a->rs1);
+ gen_get_gpr(src2, a->rs2);
+ fn(dst, cpu_env, src1, src2);
+ gen_set_gpr(a->rd, dst);
+
+ tcg_temp_free(src1);
+ tcg_temp_free(src2);
+ tcg_temp_free(dst);
+ return true;
+}
+
+#define GEN_RVP_R_OOL(NAME) \
+static bool trans_##NAME(DisasContext *s, arg_r *a) \
+{ \
+ return r_ool(s, a, gen_helper_##NAME); \
+}
+
+GEN_RVP_R_OOL(radd16);
+GEN_RVP_R_OOL(uradd16);
+GEN_RVP_R_OOL(kadd16);
+GEN_RVP_R_OOL(ukadd16);
+GEN_RVP_R_OOL(rsub16);
+GEN_RVP_R_OOL(ursub16);
+GEN_RVP_R_OOL(ksub16);
+GEN_RVP_R_OOL(uksub16);
+GEN_RVP_R_OOL(cras16);
+GEN_RVP_R_OOL(rcras16);
+GEN_RVP_R_OOL(urcras16);
+GEN_RVP_R_OOL(kcras16);
+GEN_RVP_R_OOL(ukcras16);
+GEN_RVP_R_OOL(crsa16);
+GEN_RVP_R_OOL(rcrsa16);
+GEN_RVP_R_OOL(urcrsa16);
+GEN_RVP_R_OOL(kcrsa16);
+GEN_RVP_R_OOL(ukcrsa16);
+GEN_RVP_R_OOL(stas16);
+GEN_RVP_R_OOL(rstas16);
+GEN_RVP_R_OOL(urstas16);
+GEN_RVP_R_OOL(kstas16);
+GEN_RVP_R_OOL(ukstas16);
+GEN_RVP_R_OOL(stsa16);
+GEN_RVP_R_OOL(rstsa16);
+GEN_RVP_R_OOL(urstsa16);
+GEN_RVP_R_OOL(kstsa16);
+GEN_RVP_R_OOL(ukstsa16);
diff --git a/target/riscv/meson.build b/target/riscv/meson.build
index d5e0bc93ea..cc169e1b2c 100644
--- a/target/riscv/meson.build
+++ b/target/riscv/meson.build
@@ -17,6 +17,7 @@ riscv_ss.add(files(
'op_helper.c',
'vector_helper.c',
'bitmanip_helper.c',
+ 'packed_helper.c',
'translate.c',
))
diff --git a/target/riscv/packed_helper.c b/target/riscv/packed_helper.c
new file mode 100644
index 0000000000..b84abaaf25
--- /dev/null
+++ b/target/riscv/packed_helper.c
@@ -0,0 +1,354 @@
+/*
+ * RISC-V P Extension Helpers for QEMU.
+ *
+ * Copyright (c) 2021 T-Head Semiconductor Co., Ltd. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "exec/exec-all.h"
+#include "exec/helper-proto.h"
+#include "exec/cpu_ldst.h"
+#include "fpu/softfloat.h"
+#include <math.h>
+#include "internals.h"
+
+/*
+ *** SIMD Data Processing Instructions
+ */
+
+/* 16-bit Addition & Subtraction Instructions */
+typedef void PackedFn3i(CPURISCVState *, void *, void *, void *, uint8_t);
+
+/* Define a common function to loop elements in packed register */
+static inline target_ulong
+rvpr(CPURISCVState *env, target_ulong a, target_ulong b,
+ uint8_t step, uint8_t size, PackedFn3i *fn)
+{
+ int i, passes = sizeof(target_ulong) / size;
+ target_ulong result = 0;
+
+ for (i = 0; i < passes; i += step) {
+ fn(env, &result, &a, &b, i);
+ }
+ return result;
+}
+
+#define RVPR(NAME, STEP, SIZE) \
+target_ulong HELPER(NAME)(CPURISCVState *env, target_ulong a, \
+ target_ulong b) \
+{ \
+ return rvpr(env, a, b, STEP, SIZE, (PackedFn3i *)do_##NAME);\
+}
+
+static inline int32_t hadd32(int32_t a, int32_t b)
+{
+ return ((int64_t)a + b) >> 1;
+}
+
+static inline void do_radd16(CPURISCVState *env, void *vd, void *va,
+ void *vb, uint8_t i)
+{
+ int16_t *d = vd, *a = va, *b = vb;
+ d[i] = hadd32(a[i], b[i]);
+}
+
+RVPR(radd16, 1, 2);
+
+static inline uint32_t haddu32(uint32_t a, uint32_t b)
+{
+ return ((uint64_t)a + b) >> 1;
+}
+
+static inline void do_uradd16(CPURISCVState *env, void *vd, void *va,
+ void *vb, uint8_t i)
+{
+ uint16_t *d = vd, *a = va, *b = vb;
+ d[i] = haddu32(a[i], b[i]);
+}
+
+RVPR(uradd16, 1, 2);
+
+static inline void do_kadd16(CPURISCVState *env, void *vd, void *va,
+ void *vb, uint8_t i)
+{
+ int16_t *d = vd, *a = va, *b = vb;
+ d[i] = sadd16(env, 0, a[i], b[i]);
+}
+
+RVPR(kadd16, 1, 2);
+
+static inline void do_ukadd16(CPURISCVState *env, void *vd, void *va,
+ void *vb, uint8_t i)
+{
+ uint16_t *d = vd, *a = va, *b = vb;
+ d[i] = saddu16(env, 0, a[i], b[i]);
+}
+
+RVPR(ukadd16, 1, 2);
+
+static inline int32_t hsub32(int32_t a, int32_t b)
+{
+ return ((int64_t)a - b) >> 1;
+}
+
+static inline int64_t hsub64(int64_t a, int64_t b)
+{
+ int64_t res = a - b;
+ int64_t over = (res ^ a) & (a ^ b) & INT64_MIN;
+
+ /* With signed overflow, bit 64 is inverse of bit 63. */
+ return (res >> 1) ^ over;
+}
+
+static inline void do_rsub16(CPURISCVState *env, void *vd, void *va,
+ void *vb, uint8_t i)
+{
+ int16_t *d = vd, *a = va, *b = vb;
+ d[i] = hsub32(a[i], b[i]);
+}
+
+RVPR(rsub16, 1, 2);
+
+static inline uint64_t hsubu64(uint64_t a, uint64_t b)
+{
+ return (a - b) >> 1;
+}
+
+static inline void do_ursub16(CPURISCVState *env, void *vd, void *va,
+ void *vb, uint8_t i)
+{
+ uint16_t *d = vd, *a = va, *b = vb;
+ d[i] = hsubu64(a[i], b[i]);
+}
+
+RVPR(ursub16, 1, 2);
+
+static inline void do_ksub16(CPURISCVState *env, void *vd, void *va,
+ void *vb, uint8_t i)
+{
+ int16_t *d = vd, *a = va, *b = vb;
+ d[i] = ssub16(env, 0, a[i], b[i]);
+}
+
+RVPR(ksub16, 1, 2);
+
+static inline void do_uksub16(CPURISCVState *env, void *vd, void *va,
+ void *vb, uint8_t i)
+{
+ uint16_t *d = vd, *a = va, *b = vb;
+ d[i] = ssubu16(env, 0, a[i], b[i]);
+}
+
+RVPR(uksub16, 1, 2);
+
+static inline void do_cras16(CPURISCVState *env, void *vd, void *va,
+ void *vb, uint8_t i)
+{
+ uint16_t *d = vd, *a = va, *b = vb;
+ d[H2(i)] = a[H2(i)] - b[H2(i + 1)];
+ d[H2(i + 1)] = a[H2(i + 1)] + b[H2(i)];
+}
+
+RVPR(cras16, 2, 2);
+
+static inline void do_rcras16(CPURISCVState *env, void *vd, void *va,
+ void *vb, uint8_t i)
+{
+ int16_t *d = vd, *a = va, *b = vb;
+ d[H2(i)] = hsub32(a[H2(i)], b[H2(i + 1)]);
+ d[H2(i + 1)] = hadd32(a[H2(i + 1)], b[H2(i)]);
+}
+
+RVPR(rcras16, 2, 2);
+
+static inline void do_urcras16(CPURISCVState *env, void *vd, void *va,
+ void *vb, uint8_t i)
+{
+ uint16_t *d = vd, *a = va, *b = vb;
+ d[H2(i)] = hsubu64(a[H2(i)], b[H2(i + 1)]);
+ d[H2(i + 1)] = haddu32(a[H2(i + 1)], b[H2(i)]);
+}
+
+RVPR(urcras16, 2, 2);
+
+static inline void do_kcras16(CPURISCVState *env, void *vd, void *va,
+ void *vb, uint8_t i)
+{
+ int16_t *d = vd, *a = va, *b = vb;
+ d[H2(i)] = ssub16(env, 0, a[H2(i)], b[H2(i + 1)]);
+ d[H2(i + 1)] = sadd16(env, 0, a[H2(i + 1)], b[H2(i)]);
+}
+
+RVPR(kcras16, 2, 2);
+
+static inline void do_ukcras16(CPURISCVState *env, void *vd, void *va,
+ void *vb, uint8_t i)
+{
+ uint16_t *d = vd, *a = va, *b = vb;
+ d[H2(i)] = ssubu16(env, 0, a[H2(i)], b[H2(i + 1)]);
+ d[H2(i + 1)] = saddu16(env, 0, a[H2(i + 1)], b[H2(i)]);
+}
+
+RVPR(ukcras16, 2, 2);
+
+static inline void do_crsa16(CPURISCVState *env, void *vd, void *va,
+ void *vb, uint8_t i)
+{
+ uint16_t *d = vd, *a = va, *b = vb;
+ d[H2(i)] = a[H2(i)] + b[H2(i + 1)];
+ d[H2(i + 1)] = a[H2(i + 1)] - b[H2(i)];
+}
+
+RVPR(crsa16, 2, 2);
+
+static inline void do_rcrsa16(CPURISCVState *env, void *vd, void *va,
+ void *vb, uint8_t i)
+{
+ int16_t *d = vd, *a = va, *b = vb;
+ d[H2(i)] = hadd32(a[H2(i)], b[H2(i + 1)]);
+ d[H2(i + 1)] = hsub32(a[H2(i + 1)], b[H2(i)]);
+}
+
+RVPR(rcrsa16, 2, 2);
+
+static inline void do_urcrsa16(CPURISCVState *env, void *vd, void *va,
+ void *vb, uint8_t i)
+{
+ uint16_t *d = vd, *a = va, *b = vb;
+ d[H2(i)] = haddu32(a[H2(i)], b[H2(i + 1)]);
+ d[H2(i + 1)] = hsubu64(a[H2(i + 1)], b[H2(i)]);
+}
+
+RVPR(urcrsa16, 2, 2);
+
+static inline void do_kcrsa16(CPURISCVState *env, void *vd, void *va,
+ void *vb, uint8_t i)
+{
+ int16_t *d = vd, *a = va, *b = vb;
+ d[H2(i)] = sadd16(env, 0, a[H2(i)], b[H2(i + 1)]);
+ d[H2(i + 1)] = ssub16(env, 0, a[H2(i + 1)], b[H2(i)]);
+}
+
+RVPR(kcrsa16, 2, 2);
+
+static inline void do_ukcrsa16(CPURISCVState *env, void *vd, void *va,
+ void *vb, uint8_t i)
+{
+ uint16_t *d = vd, *a = va, *b = vb;
+ d[H2(i)] = saddu16(env, 0, a[H2(i)], b[H2(i + 1)]);
+ d[H2(i + 1)] = ssubu16(env, 0, a[H2(i + 1)], b[H2(i)]);
+}
+
+RVPR(ukcrsa16, 2, 2);
+
+static inline void do_stas16(CPURISCVState *env, void *vd, void *va,
+ void *vb, uint8_t i)
+{
+ int16_t *d = vd, *a = va, *b = vb;
+ d[H2(i)] = a[H2(i)] - b[H2(i)];
+ d[H2(i + 1)] = a[H2(i + 1)] + b[H2(i + 1)];
+}
+
+RVPR(stas16, 2, 2);
+
+static inline void do_rstas16(CPURISCVState *env, void *vd, void *va,
+ void *vb, uint8_t i)
+{
+ int16_t *d = vd, *a = va, *b = vb;
+ d[H2(i)] = hsub32(a[H2(i)], b[H2(i)]);
+ d[H2(i + 1)] = hadd32(a[H2(i + 1)], b[H2(i + 1)]);
+}
+
+RVPR(rstas16, 2, 2);
+
+static inline void do_urstas16(CPURISCVState *env, void *vd, void *va,
+ void *vb, uint8_t i)
+{
+ uint16_t *d = vd, *a = va, *b = vb;
+ d[H2(i)] = hsubu64(a[H2(i)], b[H2(i)]);
+ d[H2(i + 1)] = haddu32(a[H2(i + 1)], b[H2(i + 1)]);
+}
+
+RVPR(urstas16, 2, 2);
+
+static inline void do_kstas16(CPURISCVState *env, void *vd, void *va,
+ void *vb, uint8_t i)
+{
+ int16_t *d = vd, *a = va, *b = vb;
+ d[H2(i)] = ssub16(env, 0, a[H2(i)], b[H2(i)]);
+ d[H2(i + 1)] = sadd16(env, 0, a[H2(i + 1)], b[H2(i + 1)]);
+}
+
+RVPR(kstas16, 2, 2);
+
+static inline void do_ukstas16(CPURISCVState *env, void *vd, void *va,
+ void *vb, uint8_t i)
+{
+ uint16_t *d = vd, *a = va, *b = vb;
+ d[H2(i)] = ssubu16(env, 0, a[H2(i)], b[H2(i)]);
+ d[H2(i + 1)] = saddu16(env, 0, a[H2(i + 1)], b[H2(i + 1)]);
+}
+
+RVPR(ukstas16, 2, 2);
+
+static inline void do_stsa16(CPURISCVState *env, void *vd, void *va,
+ void *vb, uint8_t i)
+{
+ uint16_t *d = vd, *a = va, *b = vb;
+ d[H2(i)] = a[H2(i)] + b[H2(i)];
+ d[H2(i + 1)] = a[H2(i + 1)] - b[H2(i + 1)];
+}
+
+RVPR(stsa16, 2, 2);
+
+static inline void do_rstsa16(CPURISCVState *env, void *vd, void *va,
+ void *vb, uint8_t i)
+{
+ int16_t *d = vd, *a = va, *b = vb;
+ d[H2(i)] = hadd32(a[H2(i)], b[H2(i)]);
+ d[H2(i + 1)] = hsub32(a[H2(i + 1)], b[H2(i + 1)]);
+}
+
+RVPR(rstsa16, 2, 2);
+
+static inline void do_urstsa16(CPURISCVState *env, void *vd, void *va,
+ void *vb, uint8_t i)
+{
+ uint16_t *d = vd, *a = va, *b = vb;
+ d[H2(i)] = haddu32(a[H2(i)], b[H2(i)]);
+ d[H2(i + 1)] = hsubu64(a[H2(i + 1)], b[H2(i + 1)]);
+}
+
+RVPR(urstsa16, 2, 2);
+
+static inline void do_kstsa16(CPURISCVState *env, void *vd, void *va,
+ void *vb, uint8_t i)
+{
+ int16_t *d = vd, *a = va, *b = vb;
+ d[H2(i)] = sadd16(env, 0, a[H2(i)], b[H2(i)]);
+ d[H2(i + 1)] = ssub16(env, 0, a[H2(i + 1)], b[H2(i + 1)]);
+}
+
+RVPR(kstsa16, 2, 2);
+
+static inline void do_ukstsa16(CPURISCVState *env, void *vd, void *va,
+ void *vb, uint8_t i)
+{
+ uint16_t *d = vd, *a = va, *b = vb;
+ d[H2(i)] = saddu16(env, 0, a[H2(i)], b[H2(i)]);
+ d[H2(i + 1)] = ssubu16(env, 0, a[H2(i + 1)], b[H2(i + 1)]);
+}
+
+RVPR(ukstsa16, 2, 2);
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index 0e6ede4d71..51b144e9be 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -908,6 +908,7 @@ static bool gen_unary(DisasContext *ctx, arg_r2 *a,
#include "insn_trans/trans_rvh.c.inc"
#include "insn_trans/trans_rvv.c.inc"
#include "insn_trans/trans_rvb.c.inc"
+#include "insn_trans/trans_rvp.c.inc"
#include "insn_trans/trans_privileged.c.inc"
/* Include the auto-generated decoder for 16 bit insn */
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
index 498a959839..a8898ba7bf 100644
--- a/tcg/tcg-op-gvec.c
+++ b/tcg/tcg-op-gvec.c
@@ -1742,6 +1742,20 @@ void tcg_gen_vec_add16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
gen_addv_mask(d, a, b, m);
}
+void tcg_gen_vec_add16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ TCGv_i32 t1 = tcg_temp_new_i32();
+ TCGv_i32 t2 = tcg_temp_new_i32();
+
+ tcg_gen_andi_i32(t1, a, ~0xffff);
+ tcg_gen_add_i32(t2, a, b);
+ tcg_gen_add_i32(t1, t1, b);
+ tcg_gen_deposit_i32(d, t1, t2, 0, 16);
+
+ tcg_temp_free_i32(t1);
+ tcg_temp_free_i32(t2);
+}
+
void tcg_gen_vec_add32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
{
TCGv_i64 t1 = tcg_temp_new_i64();
@@ -1892,6 +1906,20 @@ void tcg_gen_vec_sub16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
gen_subv_mask(d, a, b, m);
}
+void tcg_gen_vec_sub16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ TCGv_i32 t1 = tcg_temp_new_i32();
+ TCGv_i32 t2 = tcg_temp_new_i32();
+
+ tcg_gen_andi_i32(t1, b, ~0xffff);
+ tcg_gen_sub_i32(t2, a, b);
+ tcg_gen_sub_i32(t1, a, t1);
+ tcg_gen_deposit_i32(d, t1, t2, 0, 16);
+
+ tcg_temp_free_i32(t1);
+ tcg_temp_free_i32(t2);
+}
+
void tcg_gen_vec_sub32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
{
TCGv_i64 t1 = tcg_temp_new_i64();
--
2.25.1
next prev parent reply other threads:[~2021-06-10 8:04 UTC|newest]
Thread overview: 44+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-06-10 7:58 [PATCH v2 00/37] target/riscv: support packed extension v0.9.4 LIU Zhiwei
2021-06-10 7:58 ` [PATCH v2 01/37] target/riscv: implementation-defined constant parameters LIU Zhiwei
2021-06-10 7:58 ` [PATCH v2 02/37] target/riscv: Make the vector helper functions public LIU Zhiwei
2021-06-10 7:58 ` LIU Zhiwei [this message]
2021-06-10 18:00 ` [PATCH v2 03/37] target/riscv: 16-bit Addition & Subtraction Instructions Richard Henderson
2021-06-10 7:58 ` [PATCH v2 04/37] target/riscv: 8-bit Addition & Subtraction Instruction LIU Zhiwei
2021-06-10 19:39 ` Richard Henderson
2021-06-11 4:36 ` LIU Zhiwei
2021-06-24 6:05 ` LIU Zhiwei
2021-06-10 7:58 ` [PATCH v2 05/37] target/riscv: SIMD 16-bit Shift Instructions LIU Zhiwei
2021-06-10 19:44 ` Richard Henderson
2021-06-10 7:58 ` [PATCH v2 06/37] target/riscv: SIMD 8-bit " LIU Zhiwei
2021-06-10 7:58 ` [PATCH v2 07/37] target/riscv: SIMD 16-bit Compare Instructions LIU Zhiwei
2021-06-10 7:58 ` [PATCH v2 08/37] target/riscv: SIMD 8-bit " LIU Zhiwei
2021-06-10 7:58 ` [PATCH v2 09/37] target/riscv: SIMD 16-bit Multiply Instructions LIU Zhiwei
2021-06-10 7:58 ` [PATCH v2 10/37] target/riscv: SIMD 8-bit " LIU Zhiwei
2021-06-10 7:58 ` [PATCH v2 11/37] target/riscv: SIMD 16-bit Miscellaneous Instructions LIU Zhiwei
2021-06-10 7:58 ` [PATCH v2 12/37] target/riscv: SIMD 8-bit " LIU Zhiwei
2021-06-10 7:58 ` [PATCH v2 13/37] target/riscv: 8-bit Unpacking Instructions LIU Zhiwei
2021-06-10 7:58 ` [PATCH v2 14/37] target/riscv: 16-bit Packing Instructions LIU Zhiwei
2021-06-10 7:58 ` [PATCH v2 15/37] target/riscv: Signed MSW 32x32 Multiply and Add Instructions LIU Zhiwei
2021-06-10 7:58 ` [PATCH v2 16/37] target/riscv: Signed MSW 32x16 " LIU Zhiwei
2021-06-10 7:58 ` [PATCH v2 17/37] target/riscv: Signed 16-bit Multiply 32-bit Add/Subtract Instructions LIU Zhiwei
2021-06-10 7:58 ` [PATCH v2 18/37] target/riscv: Signed 16-bit Multiply 64-bit " LIU Zhiwei
2021-06-10 7:58 ` [PATCH v2 19/37] target/riscv: Partial-SIMD Miscellaneous Instructions LIU Zhiwei
2021-06-10 7:58 ` [PATCH v2 20/37] target/riscv: 8-bit Multiply with 32-bit Add Instructions LIU Zhiwei
2021-06-10 7:58 ` [PATCH v2 21/37] target/riscv: 64-bit Add/Subtract Instructions LIU Zhiwei
2021-06-10 7:58 ` [PATCH v2 22/37] target/riscv: 32-bit Multiply " LIU Zhiwei
2021-06-10 7:58 ` [PATCH v2 23/37] target/riscv: Signed 16-bit Multiply with " LIU Zhiwei
2021-06-10 7:58 ` [PATCH v2 24/37] target/riscv: Non-SIMD Q15 saturation ALU Instructions LIU Zhiwei
2021-06-10 7:58 ` [PATCH v2 25/37] target/riscv: Non-SIMD Q31 " LIU Zhiwei
2021-06-10 7:58 ` [PATCH v2 26/37] target/riscv: 32-bit Computation Instructions LIU Zhiwei
2021-06-10 7:58 ` [PATCH v2 27/37] target/riscv: Non-SIMD Miscellaneous Instructions LIU Zhiwei
2021-06-10 7:58 ` [PATCH v2 28/37] target/riscv: RV64 Only SIMD 32-bit Add/Subtract Instructions LIU Zhiwei
2021-06-10 7:59 ` [PATCH v2 29/37] target/riscv: RV64 Only SIMD 32-bit Shift Instructions LIU Zhiwei
2021-06-10 7:59 ` [PATCH v2 30/37] target/riscv: RV64 Only SIMD 32-bit Miscellaneous Instructions LIU Zhiwei
2021-06-10 7:59 ` [PATCH v2 31/37] target/riscv: RV64 Only SIMD Q15 saturating Multiply Instructions LIU Zhiwei
2021-06-10 7:59 ` [PATCH v2 32/37] target/riscv: RV64 Only 32-bit " LIU Zhiwei
2021-06-10 7:59 ` [PATCH v2 33/37] target/riscv: RV64 Only 32-bit Multiply & Add Instructions LIU Zhiwei
2021-06-10 7:59 ` [PATCH v2 34/37] target/riscv: RV64 Only 32-bit Parallel " LIU Zhiwei
2021-06-10 7:59 ` [PATCH v2 35/37] target/riscv: RV64 Only Non-SIMD 32-bit Shift Instructions LIU Zhiwei
2021-06-10 7:59 ` [PATCH v2 36/37] target/riscv: RV64 Only 32-bit Packing Instructions LIU Zhiwei
2021-06-10 7:59 ` [PATCH v2 37/37] target/riscv: configure and turn on packed extension from command line LIU Zhiwei
2021-06-14 22:55 ` [PATCH v2 00/37] target/riscv: support packed extension v0.9.4 no-reply
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210610075908.3305506-4-zhiwei_liu@c-sky.com \
--to=zhiwei_liu@c-sky.com \
--cc=Alistair.Francis@wdc.com \
--cc=bin.meng@windriver.com \
--cc=palmer@dabbelt.com \
--cc=qemu-devel@nongnu.org \
--cc=qemu-riscv@nongnu.org \
--cc=richard.henderson@linaro.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).