All the mail mirrored from lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH 00/20] tcg improvements for ia64
@ 2013-10-31 20:21 Richard Henderson
  2013-10-31 20:21 ` [Qemu-devel] [PATCH 01/20] tcg-ia64: Use TCGMemOp within qemu_ldst routines Richard Henderson
                   ` (19 more replies)
  0 siblings, 20 replies; 39+ messages in thread
From: Richard Henderson @ 2013-10-31 20:21 UTC (permalink / raw
  To: qemu-devel; +Cc: aliguori, aurelien

This is an update and rebase of a patch set I sent at the
beginning of September.  If I get no comments on this round
I'll just send a pull request next week.


r~


Richard Henderson (20):
  tcg-ia64: Use TCGMemOp within qemu_ldst routines
  tcg-ia64: Use shortcuts for nop insns
  tcg-ia64: Handle constant calls
  tcg-ia64: Simplify brcond
  tcg-ia64: Move AREG0 to R32
  tcg-ia64: Avoid unnecessary stop bit in tcg_out_alu
  tcg-ia64: Use ADDS for small addition
  tcg-ia64: Use SUB_A3 and ADDS_A4 for subtraction
  tcg-ia64: Use A3 form of logical operations
  tcg-ia64: Introduce tcg_opc_mov_a
  tcg-ia64: Introduce tcg_opc_movi_a
  tcg-ia64: Introduce tcg_opc_ext_i
  tcg-ia64: Introduce tcg_opc_bswap64_i
  tcg-ia64: Re-bundle the tlb load
  tcg-ia64: Move bswap for store into tlb load
  tcg-ia64: Move tlb addend load into tlb read
  tcg-ia64: Reduce code duplication in tcg_out_qemu_ld
  tcg-ia64: Convert to new ldst helpers
  tcg-ia64: Move part of softmmu slow path out of line
  tcg-ia64: Optimize small arguments to exit_tb

 tcg/ia64/tcg-target.c | 1044 +++++++++++++++++++++++++++----------------------
 tcg/ia64/tcg-target.h |    4 +-
 2 files changed, 572 insertions(+), 476 deletions(-)

-- 
1.8.3.1

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [Qemu-devel] [PATCH 01/20] tcg-ia64: Use TCGMemOp within qemu_ldst routines
  2013-10-31 20:21 [Qemu-devel] [PATCH 00/20] tcg improvements for ia64 Richard Henderson
@ 2013-10-31 20:21 ` Richard Henderson
  2013-11-03 22:55   ` Aurelien Jarno
  2013-10-31 20:21 ` [Qemu-devel] [PATCH 02/20] tcg-ia64: Use shortcuts for nop insns Richard Henderson
                   ` (18 subsequent siblings)
  19 siblings, 1 reply; 39+ messages in thread
From: Richard Henderson @ 2013-10-31 20:21 UTC (permalink / raw
  To: qemu-devel; +Cc: aliguori, aurelien

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/ia64/tcg-target.c | 171 ++++++++++++++++++++++++++------------------------
 1 file changed, 90 insertions(+), 81 deletions(-)

diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index 0656d39..e09ad24 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -1496,7 +1496,7 @@ static inline void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGArg ret,
    R2 is loaded with the address of the addend TLB entry.
    R57 is loaded with the address, zero extented on 32-bit targets. */
 static inline void tcg_out_qemu_tlb(TCGContext *s, TCGArg addr_reg,
-                                    int s_bits, uint64_t offset_rw,
+                                    TCGMemOp s_bits, uint64_t offset_rw,
                                     uint64_t offset_addend)
 {
     tcg_out_bundle(s, mII,
@@ -1538,23 +1538,24 @@ static const void * const qemu_ld_helpers[4] = {
     helper_ldq_mmu,
 };
 
-static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
+static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
+                                   TCGMemOp opc)
 {
-    int addr_reg, data_reg, mem_index, s_bits, bswap;
-    uint64_t opc_ld_m1[4] = { OPC_LD1_M1, OPC_LD2_M1, OPC_LD4_M1, OPC_LD8_M1 };
-    uint64_t opc_ext_i29[8] = { OPC_ZXT1_I29, OPC_ZXT2_I29, OPC_ZXT4_I29, 0,
-                                OPC_SXT1_I29, OPC_SXT2_I29, OPC_SXT4_I29, 0 };
+    static const uint64_t opc_ld_m1[4] = {
+        OPC_LD1_M1, OPC_LD2_M1, OPC_LD4_M1, OPC_LD8_M1
+    };
+    static const uint64_t opc_ext_i29[8] = {
+        OPC_ZXT1_I29, OPC_ZXT2_I29, OPC_ZXT4_I29, 0,
+        OPC_SXT1_I29, OPC_SXT2_I29, OPC_SXT4_I29, 0
+    };
+    int addr_reg, data_reg, mem_index;
+    TCGMemOp s_bits, bswap;
 
     data_reg = *args++;
     addr_reg = *args++;
     mem_index = *args;
-    s_bits = opc & 3;
-
-#ifdef TARGET_WORDS_BIGENDIAN
-    bswap = 1;
-#else
-    bswap = 0;
-#endif
+    s_bits = opc & MO_SIZE;
+    bswap = opc & MO_BSWAP;
 
     /* Read the TLB entry */
     tcg_out_qemu_tlb(s, addr_reg, s_bits,
@@ -1575,14 +1576,14 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
                                TCG_REG_R3, TCG_REG_R57),
                    tcg_opc_i21(TCG_REG_P7, OPC_MOV_I21, TCG_REG_B6,
                                TCG_REG_R3, 0));
-    if (bswap && s_bits == 1) {
+    if (bswap && s_bits == MO_16) {
         tcg_out_bundle(s, MmI,
                        tcg_opc_m1 (TCG_REG_P6, opc_ld_m1[s_bits],
                                    TCG_REG_R8, TCG_REG_R3),
                        tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R2),
                        tcg_opc_i12(TCG_REG_P6, OPC_DEP_Z_I12,
                                    TCG_REG_R8, TCG_REG_R8, 15, 15));
-    } else if (bswap && s_bits == 2) {
+    } else if (bswap && s_bits == MO_32) {
         tcg_out_bundle(s, MmI,
                        tcg_opc_m1 (TCG_REG_P6, opc_ld_m1[s_bits],
                                    TCG_REG_R8, TCG_REG_R3),
@@ -1596,7 +1597,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
                        tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R2),
                        tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
     }
-    if (!bswap || s_bits == 0) {
+    if (!bswap) {
         tcg_out_bundle(s, miB,
                        tcg_opc_a5 (TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R58,
                                    mem_index, TCG_REG_R0),
@@ -1613,7 +1614,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
                                    TCG_REG_B0, TCG_REG_B6));
     }
 
-    if (opc == 3) {
+    if (s_bits == MO_64) {
         tcg_out_bundle(s, miI,
                        tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
                        tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
@@ -1623,7 +1624,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
         tcg_out_bundle(s, miI,
                        tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
                        tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
-                       tcg_opc_i29(TCG_REG_P0, opc_ext_i29[opc],
+                       tcg_opc_i29(TCG_REG_P0, opc_ext_i29[opc & MO_SSIZE],
                                    data_reg, TCG_REG_R8));
     }
 }
@@ -1637,22 +1638,21 @@ static const void * const qemu_st_helpers[4] = {
     helper_stq_mmu,
 };
 
-static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
+static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
+                                   TCGMemOp opc)
 {
-    int addr_reg, data_reg, mem_index, bswap;
-    uint64_t opc_st_m4[4] = { OPC_ST1_M4, OPC_ST2_M4, OPC_ST4_M4, OPC_ST8_M4 };
+    static const uint64_t opc_st_m4[4] = {
+        OPC_ST1_M4, OPC_ST2_M4, OPC_ST4_M4, OPC_ST8_M4
+    };
+    int addr_reg, data_reg, mem_index;
+    TCGMemOp s_bits;
 
     data_reg = *args++;
     addr_reg = *args++;
     mem_index = *args;
+    s_bits = opc & MO_SIZE;
 
-#ifdef TARGET_WORDS_BIGENDIAN
-    bswap = 1;
-#else
-    bswap = 0;
-#endif
-
-    tcg_out_qemu_tlb(s, addr_reg, opc,
+    tcg_out_qemu_tlb(s, addr_reg, s_bits,
                      offsetof(CPUArchState, tlb_table[mem_index][0].addr_write),
                      offsetof(CPUArchState, tlb_table[mem_index][0].addend));
 
@@ -1660,9 +1660,9 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
     tcg_out_bundle(s, mLX,
                    tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4,
                                TCG_REG_R56, 0, TCG_AREG0),
-                   tcg_opc_l2 ((tcg_target_long) qemu_st_helpers[opc]),
+                   tcg_opc_l2 ((tcg_target_long) qemu_st_helpers[s_bits]),
                    tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R2,
-                               (tcg_target_long) qemu_st_helpers[opc]));
+                               (tcg_target_long) qemu_st_helpers[s_bits]));
     tcg_out_bundle(s, MmI,
                    tcg_opc_m3 (TCG_REG_P0, OPC_LD8_M3, TCG_REG_R3,
                                TCG_REG_R2, 8),
@@ -1671,14 +1671,20 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
                    tcg_opc_i21(TCG_REG_P7, OPC_MOV_I21, TCG_REG_B6,
                                TCG_REG_R3, 0));
 
-    if (!bswap || opc == 0) {
+    switch (opc) {
+    case MO_8:
+    case MO_16:
+    case MO_32:
+    case MO_64:
         tcg_out_bundle(s, mii,
                        tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1,
                                    TCG_REG_R1, TCG_REG_R2),
                        tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, TCG_REG_R58,
                                    0, data_reg),
                        tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
-    } else if (opc == 1) {
+        break;
+
+    case MO_16 | MO_BSWAP:
         tcg_out_bundle(s, miI,
                        tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1,
                                    TCG_REG_R1, TCG_REG_R2),
@@ -1692,7 +1698,9 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
                        tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3,
                                    TCG_REG_R2, TCG_REG_R2, 0xb));
         data_reg = TCG_REG_R2;
-    } else if (opc == 2) {
+        break;
+
+    case MO_32 | MO_BSWAP:
         tcg_out_bundle(s, miI,
                        tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1,
                                    TCG_REG_R1, TCG_REG_R2),
@@ -1706,7 +1714,9 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
                        tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3,
                                    TCG_REG_R2, TCG_REG_R2, 0xb));
         data_reg = TCG_REG_R2;
-    } else if (opc == 3) {
+        break;
+
+    case MO_64 | MO_BSWAP:
         tcg_out_bundle(s, miI,
                        tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1,
                                    TCG_REG_R1, TCG_REG_R2),
@@ -1715,6 +1725,10 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
                        tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3,
                                    TCG_REG_R2, data_reg, 0xb));
         data_reg = TCG_REG_R2;
+        break;
+
+    default:
+        tcg_abort();
     }
 
     tcg_out_bundle(s, miB,
@@ -1728,7 +1742,8 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
 
 #else /* !CONFIG_SOFTMMU */
 
-static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
+static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
+                                   TCGMemOp opc)
 {
     static uint64_t const opc_ld_m1[4] = {
         OPC_LD1_M1, OPC_LD2_M1, OPC_LD4_M1, OPC_LD8_M1
@@ -1736,17 +1751,13 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
     static uint64_t const opc_sxt_i29[4] = {
         OPC_SXT1_I29, OPC_SXT2_I29, OPC_SXT4_I29, 0
     };
-    int addr_reg, data_reg, s_bits, bswap;
+    int addr_reg, data_reg;
+    TCGMemOp s_bits, bswap;
 
     data_reg = *args++;
     addr_reg = *args++;
-    s_bits = opc & 3;
-
-#ifdef TARGET_WORDS_BIGENDIAN
-    bswap = 1;
-#else
-    bswap = 0;
-#endif
+    s_bits = opc & MO_SIZE;
+    bswap = opc & MO_BSWAP;
 
 #if TARGET_LONG_BITS == 32
     if (GUEST_BASE != 0) {
@@ -1764,8 +1775,8 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
                        tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
     }
 
-    if (!bswap || s_bits == 0) {
-        if (s_bits == opc) {
+    if (!bswap) {
+        if (!(opc & MO_SIGN)) {
             tcg_out_bundle(s, miI,
                            tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits],
                                        data_reg, TCG_REG_R2),
@@ -1779,7 +1790,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
                            tcg_opc_i29(TCG_REG_P0, opc_sxt_i29[s_bits],
                                        data_reg, data_reg));
         }
-    } else if (s_bits == 3) {
+    } else if (s_bits == MO_64) {
             tcg_out_bundle(s, mII,
                            tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits],
                                        data_reg, TCG_REG_R2),
@@ -1787,7 +1798,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
                            tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
                                        data_reg, data_reg, 0xb));
     } else {
-        if (s_bits == 1) {
+        if (s_bits == MO_16) {
             tcg_out_bundle(s, mII,
                            tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits],
                                        data_reg, TCG_REG_R2),
@@ -1802,7 +1813,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
                            tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
                                       data_reg, data_reg, 31, 31));
         }
-        if (opc == s_bits) {
+        if (!(opc & MO_SIGN)) {
             tcg_out_bundle(s, miI,
                            tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
                            tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
@@ -1833,28 +1844,28 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
                        tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
     }
 
-    if (bswap && s_bits == 1) {
+    if (bswap && s_bits == MO_16) {
         tcg_out_bundle(s, mII,
                        tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
                        tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
                                    data_reg, data_reg, 15, 15),
                        tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
                                    data_reg, data_reg, 0xb));
-    } else if (bswap && s_bits == 2) {
+    } else if (bswap && s_bits == MO_32) {
         tcg_out_bundle(s, mII,
                        tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
                        tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
                                    data_reg, data_reg, 31, 31),
                        tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
                                    data_reg, data_reg, 0xb));
-    } else if (bswap && s_bits == 3) {
+    } else if (bswap && s_bits == MO_64) {
         tcg_out_bundle(s, miI,
                        tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
                        tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
                        tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
                                    data_reg, data_reg, 0xb));
     }
-    if (s_bits != opc) {
+    if (opc & MO_SIGN) {
         tcg_out_bundle(s, miI,
                        tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
                        tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
@@ -1864,24 +1875,22 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
 #endif
 }
 
-static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
+static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
+                                   TCGMemOp opc)
 {
     static uint64_t const opc_st_m4[4] = {
         OPC_ST1_M4, OPC_ST2_M4, OPC_ST4_M4, OPC_ST8_M4
     };
-    int addr_reg, data_reg, bswap;
+    int addr_reg, data_reg;
 #if TARGET_LONG_BITS == 64
     uint64_t add_guest_base;
 #endif
+    TCGMemOp s_bits, bswap;
 
     data_reg = *args++;
     addr_reg = *args++;
-
-#ifdef TARGET_WORDS_BIGENDIAN
-    bswap = 1;
-#else
-    bswap = 0;
-#endif
+    s_bits = opc & MO_SIZE;
+    bswap = opc & MO_BSWAP;
 
 #if TARGET_LONG_BITS == 32
     if (GUEST_BASE != 0) {
@@ -1900,7 +1909,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
     }
 
     if (bswap) {
-        if (opc == 1) {
+        if (s_bits == MO_16) {
             tcg_out_bundle(s, mII,
                            tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
                            tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
@@ -1908,7 +1917,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
                            tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
                                        TCG_REG_R3, TCG_REG_R3, 0xb));
             data_reg = TCG_REG_R3;
-        } else if (opc == 2) {
+        } else if (s_bits == MO_32) {
             tcg_out_bundle(s, mII,
                            tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
                            tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
@@ -1916,7 +1925,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
                            tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
                                        TCG_REG_R3, TCG_REG_R3, 0xb));
             data_reg = TCG_REG_R3;
-        } else if (opc == 3) {
+        } else if (s_bits == MO_64) {
             tcg_out_bundle(s, miI,
                            tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
                            tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
@@ -1926,7 +1935,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
         }
     }
     tcg_out_bundle(s, mmI,
-                   tcg_opc_m4 (TCG_REG_P0, opc_st_m4[opc],
+                   tcg_opc_m4 (TCG_REG_P0, opc_st_m4[s_bits],
                                data_reg, TCG_REG_R2),
                    tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
                    tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
@@ -1939,14 +1948,14 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
         add_guest_base = tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0);
     }
 
-    if (!bswap || opc == 0) {
+    if (!bswap) {
         tcg_out_bundle(s, (GUEST_BASE ? MmI : mmI),
                        add_guest_base,
-                       tcg_opc_m4 (TCG_REG_P0, opc_st_m4[opc],
+                       tcg_opc_m4 (TCG_REG_P0, opc_st_m4[s_bits],
                                    data_reg, addr_reg),
                        tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
     } else {
-        if (opc == 1) {
+        if (s_bits == MO_16) {
             tcg_out_bundle(s, mII,
                            add_guest_base,
                            tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
@@ -1954,7 +1963,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
                            tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
                                        TCG_REG_R3, TCG_REG_R3, 0xb));
             data_reg = TCG_REG_R3;
-        } else if (opc == 2) {
+        } else if (s_bits == MO_32) {
             tcg_out_bundle(s, mII,
                            add_guest_base,
                            tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
@@ -1962,7 +1971,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
                            tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
                                        TCG_REG_R3, TCG_REG_R3, 0xb));
             data_reg = TCG_REG_R3;
-        } else if (opc == 3) {
+        } else if (s_bits == MO_64) {
             tcg_out_bundle(s, miI,
                            add_guest_base,
                            tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
@@ -1971,7 +1980,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
             data_reg = TCG_REG_R3;
         }
         tcg_out_bundle(s, miI,
-                       tcg_opc_m4 (TCG_REG_P0, opc_st_m4[opc],
+                       tcg_opc_m4 (TCG_REG_P0, opc_st_m4[s_bits],
                                    data_reg, addr_reg),
                        tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
                        tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
@@ -2203,39 +2212,39 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         break;
 
     case INDEX_op_qemu_ld8u:
-        tcg_out_qemu_ld(s, args, 0);
+        tcg_out_qemu_ld(s, args, MO_UB);
         break;
     case INDEX_op_qemu_ld8s:
-        tcg_out_qemu_ld(s, args, 0 | 4);
+        tcg_out_qemu_ld(s, args, MO_SB);
         break;
     case INDEX_op_qemu_ld16u:
-        tcg_out_qemu_ld(s, args, 1);
+        tcg_out_qemu_ld(s, args, MO_TEUW);
         break;
     case INDEX_op_qemu_ld16s:
-        tcg_out_qemu_ld(s, args, 1 | 4);
+        tcg_out_qemu_ld(s, args, MO_TESW);
         break;
     case INDEX_op_qemu_ld32:
     case INDEX_op_qemu_ld32u:
-        tcg_out_qemu_ld(s, args, 2);
+        tcg_out_qemu_ld(s, args, MO_TEUL);
         break;
     case INDEX_op_qemu_ld32s:
-        tcg_out_qemu_ld(s, args, 2 | 4);
+        tcg_out_qemu_ld(s, args, MO_TESL);
         break;
     case INDEX_op_qemu_ld64:
-        tcg_out_qemu_ld(s, args, 3);
+        tcg_out_qemu_ld(s, args, MO_TEQ);
         break;
 
     case INDEX_op_qemu_st8:
-        tcg_out_qemu_st(s, args, 0);
+        tcg_out_qemu_st(s, args, MO_UB);
         break;
     case INDEX_op_qemu_st16:
-        tcg_out_qemu_st(s, args, 1);
+        tcg_out_qemu_st(s, args, MO_TEUW);
         break;
     case INDEX_op_qemu_st32:
-        tcg_out_qemu_st(s, args, 2);
+        tcg_out_qemu_st(s, args, MO_TEUL);
         break;
     case INDEX_op_qemu_st64:
-        tcg_out_qemu_st(s, args, 3);
+        tcg_out_qemu_st(s, args, MO_TEQ);
         break;
 
     default:
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [Qemu-devel] [PATCH 02/20] tcg-ia64: Use shortcuts for nop insns
  2013-10-31 20:21 [Qemu-devel] [PATCH 00/20] tcg improvements for ia64 Richard Henderson
  2013-10-31 20:21 ` [Qemu-devel] [PATCH 01/20] tcg-ia64: Use TCGMemOp within qemu_ldst routines Richard Henderson
@ 2013-10-31 20:21 ` Richard Henderson
  2013-11-03 22:55   ` Aurelien Jarno
  2013-10-31 20:21 ` [Qemu-devel] [PATCH 03/20] tcg-ia64: Handle constant calls Richard Henderson
                   ` (17 subsequent siblings)
  19 siblings, 1 reply; 39+ messages in thread
From: Richard Henderson @ 2013-10-31 20:21 UTC (permalink / raw
  To: qemu-devel; +Cc: aliguori, aurelien

There's no need to go through the full opcode-to-insn function call
to generate nops.  This makes the source a bit more readable.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/ia64/tcg-target.c | 251 +++++++++++++++++++++++++-------------------------
 1 file changed, 127 insertions(+), 124 deletions(-)

diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index e09ad24..e1b8b7d 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -284,6 +284,9 @@ enum {
     OPC_ZXT1_I29              = 0x00080000000ull,
     OPC_ZXT2_I29              = 0x00088000000ull,
     OPC_ZXT4_I29              = 0x00090000000ull,
+
+    INSN_NOP_M                = OPC_NOP_M48,  /* nop.m 0 */
+    INSN_NOP_I                = OPC_NOP_I18,  /* nop.i 0 */
 };
 
 static inline uint64_t tcg_opc_a1(int qp, uint64_t opc, int r1,
@@ -855,8 +858,8 @@ static inline void tcg_out_mov(TCGContext *s, TCGType type,
                                TCGReg ret, TCGReg arg)
 {
     tcg_out_bundle(s, mmI,
-                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
-                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                   INSN_NOP_M,
+                   INSN_NOP_M,
                    tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, ret, 0, arg));
 }
 
@@ -864,7 +867,7 @@ static inline void tcg_out_movi(TCGContext *s, TCGType type,
                                 TCGReg reg, tcg_target_long arg)
 {
     tcg_out_bundle(s, mLX,
-                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                   INSN_NOP_M,
                    tcg_opc_l2 (arg),
                    tcg_opc_x2 (TCG_REG_P0, OPC_MOVL_X2, reg, arg));
 }
@@ -877,8 +880,8 @@ static void tcg_out_br(TCGContext *s, int label_index)
        the existing value and using it again. This ensure that caches and
        memory are kept coherent during retranslation. */
     tcg_out_bundle(s, mmB,
-                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
-                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                   INSN_NOP_M,
+                   INSN_NOP_M,
                    tcg_opc_b1 (TCG_REG_P0, OPC_BR_SPTK_MANY_B1,
                                get_reloc_pcrel21b(s->code_ptr + 2)));
 
@@ -899,7 +902,7 @@ static inline void tcg_out_call(TCGContext *s, TCGArg addr)
                                TCG_REG_B6, TCG_REG_R2, 0));
     tcg_out_bundle(s, mmB,
                    tcg_opc_m1 (TCG_REG_P0, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R3),
-                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                   INSN_NOP_M,
                    tcg_opc_b5 (TCG_REG_P0, OPC_BR_CALL_SPTK_MANY_B5,
                                TCG_REG_B0, TCG_REG_B6));
 }
@@ -915,7 +918,7 @@ static void tcg_out_exit_tb(TCGContext *s, tcg_target_long arg)
     imm = (uint64_t)disp >> 4;
 
     tcg_out_bundle(s, mLX,
-                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                   INSN_NOP_M,
                    tcg_opc_l3 (imm),
                    tcg_opc_x3 (TCG_REG_P0, OPC_BRL_SPTK_MANY_X3, imm));
 }
@@ -932,12 +935,12 @@ static inline void tcg_out_goto_tb(TCGContext *s, TCGArg arg)
         tcg_out_bundle(s, MmI,
                        tcg_opc_m1 (TCG_REG_P0, OPC_LD8_M1,
                                    TCG_REG_R2, TCG_REG_R2),
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                       INSN_NOP_M,
                        tcg_opc_i21(TCG_REG_P0, OPC_MOV_I21, TCG_REG_B6,
                                    TCG_REG_R2, 0));
         tcg_out_bundle(s, mmB,
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                       INSN_NOP_M,
+                       INSN_NOP_M,
                        tcg_opc_b4 (TCG_REG_P0, OPC_BR_SPTK_MANY_B4,
                                    TCG_REG_B6));
     }
@@ -947,12 +950,12 @@ static inline void tcg_out_goto_tb(TCGContext *s, TCGArg arg)
 static inline void tcg_out_jmp(TCGContext *s, TCGArg addr)
 {
     tcg_out_bundle(s, mmI,
-                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
-                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                   INSN_NOP_M,
+                   INSN_NOP_M,
                    tcg_opc_i21(TCG_REG_P0, OPC_MOV_I21, TCG_REG_B6, addr, 0));
     tcg_out_bundle(s, mmB,
-                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
-                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                   INSN_NOP_M,
+                   INSN_NOP_M,
                    tcg_opc_b4(TCG_REG_P0, OPC_BR_SPTK_MANY_B4, TCG_REG_B6));
 }
 
@@ -964,14 +967,14 @@ static inline void tcg_out_ld_rel(TCGContext *s, uint64_t opc_m4, TCGArg arg,
                        tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4,
                                   TCG_REG_R2, arg2, arg1),
                        tcg_opc_m1 (TCG_REG_P0, opc_m4, arg, TCG_REG_R2),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
+                       INSN_NOP_I);
     } else {
         tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, arg2);
         tcg_out_bundle(s, MmI,
                        tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1,
                                    TCG_REG_R2, TCG_REG_R2, arg1),
                        tcg_opc_m1 (TCG_REG_P0, opc_m4, arg, TCG_REG_R2),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
+                       INSN_NOP_I);
     }
 }
 
@@ -983,14 +986,14 @@ static inline void tcg_out_st_rel(TCGContext *s, uint64_t opc_m4, TCGArg arg,
                        tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4,
                                   TCG_REG_R2, arg2, arg1),
                        tcg_opc_m4 (TCG_REG_P0, opc_m4, arg, TCG_REG_R2),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
+                       INSN_NOP_I);
     } else {
         tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, arg2);
         tcg_out_bundle(s, MmI,
                        tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1,
                                    TCG_REG_R2, TCG_REG_R2, arg1),
                        tcg_opc_m4 (TCG_REG_P0, opc_m4, arg, TCG_REG_R2),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
+                       INSN_NOP_I);
     }
 }
 
@@ -1025,7 +1028,7 @@ static inline void tcg_out_alu(TCGContext *s, uint64_t opc_a1, TCGArg ret,
                           TCG_REG_R2, arg1, TCG_REG_R0);
         arg1 = TCG_REG_R2;
     } else {
-        opc1 = tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0);
+        opc1 = INSN_NOP_M;
     }
 
     if (const_arg2 && arg2 != 0) {
@@ -1033,7 +1036,7 @@ static inline void tcg_out_alu(TCGContext *s, uint64_t opc_a1, TCGArg ret,
                           TCG_REG_R3, arg2, TCG_REG_R0);
         arg2 = TCG_REG_R3;
     } else {
-        opc2 = tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0);
+        opc2 = INSN_NOP_I;
     }
 
     tcg_out_bundle(s, mII,
@@ -1047,7 +1050,7 @@ static inline void tcg_out_eqv(TCGContext *s, TCGArg ret,
                                TCGArg arg2, int const_arg2)
 {
     tcg_out_bundle(s, mII,
-                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                   INSN_NOP_M,
                    tcg_opc_a1 (TCG_REG_P0, OPC_XOR_A1, ret, arg1, arg2),
                    tcg_opc_a3 (TCG_REG_P0, OPC_ANDCM_A3, ret, -1, ret));
 }
@@ -1057,7 +1060,7 @@ static inline void tcg_out_nand(TCGContext *s, TCGArg ret,
                                 TCGArg arg2, int const_arg2)
 {
     tcg_out_bundle(s, mII,
-                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                   INSN_NOP_M,
                    tcg_opc_a1 (TCG_REG_P0, OPC_AND_A1, ret, arg1, arg2),
                    tcg_opc_a3 (TCG_REG_P0, OPC_ANDCM_A3, ret, -1, ret));
 }
@@ -1067,7 +1070,7 @@ static inline void tcg_out_nor(TCGContext *s, TCGArg ret,
                                TCGArg arg2, int const_arg2)
 {
     tcg_out_bundle(s, mII,
-                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                   INSN_NOP_M,
                    tcg_opc_a1 (TCG_REG_P0, OPC_OR_A1, ret, arg1, arg2),
                    tcg_opc_a3 (TCG_REG_P0, OPC_ANDCM_A3, ret, -1, ret));
 }
@@ -1077,7 +1080,7 @@ static inline void tcg_out_orc(TCGContext *s, TCGArg ret,
                                TCGArg arg2, int const_arg2)
 {
     tcg_out_bundle(s, mII,
-                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                   INSN_NOP_M,
                    tcg_opc_a3 (TCG_REG_P0, OPC_ANDCM_A3, TCG_REG_R2, -1, arg2),
                    tcg_opc_a1 (TCG_REG_P0, OPC_OR_A1, ret, arg1, TCG_REG_R2));
 }
@@ -1088,16 +1091,16 @@ static inline void tcg_out_mul(TCGContext *s, TCGArg ret,
     tcg_out_bundle(s, mmI,
                    tcg_opc_m18(TCG_REG_P0, OPC_SETF_SIG_M18, TCG_REG_F6, arg1),
                    tcg_opc_m18(TCG_REG_P0, OPC_SETF_SIG_M18, TCG_REG_F7, arg2),
-                   tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
+                   INSN_NOP_I);
     tcg_out_bundle(s, mmF,
-                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
-                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                   INSN_NOP_M,
+                   INSN_NOP_M,
                    tcg_opc_f2 (TCG_REG_P0, OPC_XMA_L_F2, TCG_REG_F6, TCG_REG_F6,
                                TCG_REG_F7, TCG_REG_F0));
     tcg_out_bundle(s, miI,
                    tcg_opc_m19(TCG_REG_P0, OPC_GETF_SIG_M19, ret, TCG_REG_F6),
-                   tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
-                   tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
+                   INSN_NOP_I,
+                   INSN_NOP_I);
 }
 
 static inline void tcg_out_sar_i32(TCGContext *s, TCGArg ret, TCGArg arg1,
@@ -1105,8 +1108,8 @@ static inline void tcg_out_sar_i32(TCGContext *s, TCGArg ret, TCGArg arg1,
 {
     if (const_arg2) {
         tcg_out_bundle(s, miI,
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                       INSN_NOP_M,
+                       INSN_NOP_I,
                        tcg_opc_i11(TCG_REG_P0, OPC_EXTR_I11,
                                    ret, arg1, arg2, 31 - arg2));
     } else {
@@ -1124,14 +1127,14 @@ static inline void tcg_out_sar_i64(TCGContext *s, TCGArg ret, TCGArg arg1,
 {
     if (const_arg2) {
         tcg_out_bundle(s, miI,
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                       INSN_NOP_M,
+                       INSN_NOP_I,
                        tcg_opc_i11(TCG_REG_P0, OPC_EXTR_I11,
                                    ret, arg1, arg2, 63 - arg2));
     } else {
         tcg_out_bundle(s, miI,
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                       INSN_NOP_M,
+                       INSN_NOP_I,
                        tcg_opc_i5 (TCG_REG_P0, OPC_SHR_I5, ret, arg1, arg2));
     }
 }
@@ -1141,13 +1144,13 @@ static inline void tcg_out_shl_i32(TCGContext *s, TCGArg ret, TCGArg arg1,
 {
     if (const_arg2) {
         tcg_out_bundle(s, miI,
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                       INSN_NOP_M,
+                       INSN_NOP_I,
                        tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, ret,
                                    arg1, 63 - arg2, 31 - arg2));
     } else {
         tcg_out_bundle(s, mII,
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                       INSN_NOP_M,
                        tcg_opc_a3 (TCG_REG_P0, OPC_AND_A3, TCG_REG_R2,
                                    0x1f, arg2),
                        tcg_opc_i7 (TCG_REG_P0, OPC_SHL_I7, ret,
@@ -1160,14 +1163,14 @@ static inline void tcg_out_shl_i64(TCGContext *s, TCGArg ret, TCGArg arg1,
 {
     if (const_arg2) {
         tcg_out_bundle(s, miI,
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                       INSN_NOP_M,
+                       INSN_NOP_I,
                        tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, ret,
                                    arg1, 63 - arg2, 63 - arg2));
     } else {
         tcg_out_bundle(s, miI,
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                       INSN_NOP_M,
+                       INSN_NOP_I,
                        tcg_opc_i7 (TCG_REG_P0, OPC_SHL_I7, ret,
                                    arg1, arg2));
     }
@@ -1178,8 +1181,8 @@ static inline void tcg_out_shr_i32(TCGContext *s, TCGArg ret, TCGArg arg1,
 {
     if (const_arg2) {
         tcg_out_bundle(s, miI,
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                       INSN_NOP_M,
+                       INSN_NOP_I,
                        tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, ret,
                                    arg1, arg2, 31 - arg2));
     } else {
@@ -1197,14 +1200,14 @@ static inline void tcg_out_shr_i64(TCGContext *s, TCGArg ret, TCGArg arg1,
 {
     if (const_arg2) {
         tcg_out_bundle(s, miI,
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                       INSN_NOP_M,
+                       INSN_NOP_I,
                        tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, ret,
                                    arg1, arg2, 63 - arg2));
     } else {
         tcg_out_bundle(s, miI,
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                       INSN_NOP_M,
+                       INSN_NOP_I,
                        tcg_opc_i5 (TCG_REG_P0, OPC_SHR_U_I5, ret,
                                    arg1, arg2));
     }
@@ -1215,20 +1218,20 @@ static inline void tcg_out_rotl_i32(TCGContext *s, TCGArg ret, TCGArg arg1,
 {
     if (const_arg2) {
         tcg_out_bundle(s, mII,
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                       INSN_NOP_M,
                        tcg_opc_i2 (TCG_REG_P0, OPC_UNPACK4_L_I2,
                                    TCG_REG_R2, arg1, arg1),
                        tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, ret,
                                    TCG_REG_R2, 32 - arg2, 31));
     } else {
         tcg_out_bundle(s, miI,
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                       INSN_NOP_M,
                        tcg_opc_i2 (TCG_REG_P0, OPC_UNPACK4_L_I2,
                                    TCG_REG_R2, arg1, arg1),
                        tcg_opc_a3 (TCG_REG_P0, OPC_AND_A3, TCG_REG_R3,
                                    0x1f, arg2));
         tcg_out_bundle(s, mII,
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                       INSN_NOP_M,
                        tcg_opc_a3 (TCG_REG_P0, OPC_SUB_A3, TCG_REG_R3,
                                    0x20, TCG_REG_R3),
                        tcg_opc_i5 (TCG_REG_P0, OPC_SHR_U_I5, ret,
@@ -1241,8 +1244,8 @@ static inline void tcg_out_rotl_i64(TCGContext *s, TCGArg ret, TCGArg arg1,
 {
     if (const_arg2) {
         tcg_out_bundle(s, miI,
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                       INSN_NOP_M,
+                       INSN_NOP_I,
                        tcg_opc_i10(TCG_REG_P0, OPC_SHRP_I10, ret, arg1,
                                    arg1, 0x40 - arg2));
     } else {
@@ -1254,8 +1257,8 @@ static inline void tcg_out_rotl_i64(TCGContext *s, TCGArg ret, TCGArg arg1,
                        tcg_opc_i5 (TCG_REG_P0, OPC_SHR_U_I5, TCG_REG_R2,
                                    arg1, TCG_REG_R2));
         tcg_out_bundle(s, miI,
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                       INSN_NOP_M,
+                       INSN_NOP_I,
                        tcg_opc_a1 (TCG_REG_P0, OPC_OR_A1, ret,
                                    TCG_REG_R2, TCG_REG_R3));
     }
@@ -1266,7 +1269,7 @@ static inline void tcg_out_rotr_i32(TCGContext *s, TCGArg ret, TCGArg arg1,
 {
     if (const_arg2) {
         tcg_out_bundle(s, mII,
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                       INSN_NOP_M,
                        tcg_opc_i2 (TCG_REG_P0, OPC_UNPACK4_L_I2,
                                    TCG_REG_R2, arg1, arg1),
                        tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, ret,
@@ -1287,8 +1290,8 @@ static inline void tcg_out_rotr_i64(TCGContext *s, TCGArg ret, TCGArg arg1,
 {
     if (const_arg2) {
         tcg_out_bundle(s, miI,
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                       INSN_NOP_M,
+                       INSN_NOP_I,
                        tcg_opc_i10(TCG_REG_P0, OPC_SHRP_I10, ret, arg1,
                                    arg1, arg2));
     } else {
@@ -1300,8 +1303,8 @@ static inline void tcg_out_rotr_i64(TCGContext *s, TCGArg ret, TCGArg arg1,
                        tcg_opc_i7 (TCG_REG_P0, OPC_SHL_I7, TCG_REG_R2,
                                    arg1, TCG_REG_R2));
         tcg_out_bundle(s, miI,
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                       INSN_NOP_M,
+                       INSN_NOP_I,
                        tcg_opc_a1 (TCG_REG_P0, OPC_OR_A1, ret,
                                    TCG_REG_R2, TCG_REG_R3));
     }
@@ -1311,15 +1314,15 @@ static inline void tcg_out_ext(TCGContext *s, uint64_t opc_i29,
                                TCGArg ret, TCGArg arg)
 {
     tcg_out_bundle(s, miI,
-                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
-                   tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                   INSN_NOP_M,
+                   INSN_NOP_I,
                    tcg_opc_i29(TCG_REG_P0, opc_i29, ret, arg));
 }
 
 static inline void tcg_out_bswap16(TCGContext *s, TCGArg ret, TCGArg arg)
 {
     tcg_out_bundle(s, mII,
-                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                   INSN_NOP_M,
                    tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, ret, arg, 15, 15),
                    tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3, ret, ret, 0xb));
 }
@@ -1327,7 +1330,7 @@ static inline void tcg_out_bswap16(TCGContext *s, TCGArg ret, TCGArg arg)
 static inline void tcg_out_bswap32(TCGContext *s, TCGArg ret, TCGArg arg)
 {
     tcg_out_bundle(s, mII,
-                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                   INSN_NOP_M,
                    tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, ret, arg, 31, 31),
                    tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3, ret, ret, 0xb));
 }
@@ -1335,8 +1338,8 @@ static inline void tcg_out_bswap32(TCGContext *s, TCGArg ret, TCGArg arg)
 static inline void tcg_out_bswap64(TCGContext *s, TCGArg ret, TCGArg arg)
 {
     tcg_out_bundle(s, miI,
-                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
-                   tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                   INSN_NOP_M,
+                   INSN_NOP_I,
                    tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3, ret, arg, 0xb));
 }
 
@@ -1366,8 +1369,8 @@ static inline void tcg_out_deposit(TCGContext *s, TCGArg ret, TCGArg a1,
         i2 = tcg_opc_i15(TCG_REG_P0, OPC_DEP_I15, ret, a2, a1, cpos, lm1);
     }
     tcg_out_bundle(s, (i1 ? mII : miI),
-                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
-                   i1 ? i1 : tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                   INSN_NOP_M,
+                   i1 ? i1 : INSN_NOP_I,
                    i2);
 }
 
@@ -1425,7 +1428,7 @@ static inline void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGArg arg1,
                           arg1, TCG_REG_R0);
         arg1 = TCG_REG_R2;
     } else {
-        opc1 = tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0);
+        opc1 = INSN_NOP_M;
     }
 
     if (const_arg2 && arg2 != 0) {
@@ -1433,7 +1436,7 @@ static inline void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGArg arg1,
                           arg2, TCG_REG_R0);
         arg2 = TCG_REG_R3;
     } else {
-        opc2 = tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0);
+        opc2 = INSN_NOP_I;
     }
 
     tcg_out_bundle(s, mII,
@@ -1441,8 +1444,8 @@ static inline void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGArg arg1,
                    opc2,
                    tcg_opc_cmp_a(TCG_REG_P0, cond, arg1, arg2, cmp4));
     tcg_out_bundle(s, mmB,
-                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
-                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                   INSN_NOP_M,
+                   INSN_NOP_M,
                    tcg_opc_b1 (TCG_REG_P6, OPC_BR_DPTK_FEW_B1,
                                get_reloc_pcrel21b(s->code_ptr + 2)));
 
@@ -1473,14 +1476,14 @@ static inline void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGArg ret,
     if (const_v1) {
         opc1 = tcg_opc_a5(TCG_REG_P6, OPC_ADDL_A5, ret, v1, TCG_REG_R0);
     } else if (ret == v1) {
-        opc1 = tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0);
+        opc1 = INSN_NOP_M;
     } else {
         opc1 = tcg_opc_a4(TCG_REG_P6, OPC_ADDS_A4, ret, 0, v1);
     }
     if (const_v2) {
         opc2 = tcg_opc_a5(TCG_REG_P7, OPC_ADDL_A5, ret, v2, TCG_REG_R0);
     } else if (ret == v2) {
-        opc2 = tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0);
+        opc2 = INSN_NOP_I;
     } else {
         opc2 = tcg_opc_a4(TCG_REG_P7, OPC_ADDS_A4, ret, 0, v2);
     }
@@ -1500,7 +1503,7 @@ static inline void tcg_out_qemu_tlb(TCGContext *s, TCGArg addr_reg,
                                     uint64_t offset_addend)
 {
     tcg_out_bundle(s, mII,
-                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                   INSN_NOP_M,
                    tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, TCG_REG_R2,
                                addr_reg, TARGET_PAGE_BITS, CPU_TLB_BITS - 1),
                    tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, TCG_REG_R2,
@@ -1595,13 +1598,13 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
                        tcg_opc_m1 (TCG_REG_P6, opc_ld_m1[s_bits],
                                    TCG_REG_R8, TCG_REG_R3),
                        tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R2),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
+                       INSN_NOP_I);
     }
     if (!bswap) {
         tcg_out_bundle(s, miB,
                        tcg_opc_a5 (TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R58,
                                    mem_index, TCG_REG_R0),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                       INSN_NOP_I,
                        tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5,
                                    TCG_REG_B0, TCG_REG_B6));
     } else {
@@ -1616,14 +1619,14 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
 
     if (s_bits == MO_64) {
         tcg_out_bundle(s, miI,
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                       INSN_NOP_M,
+                       INSN_NOP_I,
                        tcg_opc_a4 (TCG_REG_P0, OPC_ADDS_A4,
                                    data_reg, 0, TCG_REG_R8));
     } else {
         tcg_out_bundle(s, miI,
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                       INSN_NOP_M,
+                       INSN_NOP_I,
                        tcg_opc_i29(TCG_REG_P0, opc_ext_i29[opc & MO_SSIZE],
                                    data_reg, TCG_REG_R8));
     }
@@ -1681,20 +1684,20 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
                                    TCG_REG_R1, TCG_REG_R2),
                        tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, TCG_REG_R58,
                                    0, data_reg),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
+                       INSN_NOP_I);
         break;
 
     case MO_16 | MO_BSWAP:
         tcg_out_bundle(s, miI,
                        tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1,
                                    TCG_REG_R1, TCG_REG_R2),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                       INSN_NOP_I,
                        tcg_opc_i12(TCG_REG_P6, OPC_DEP_Z_I12,
                                    TCG_REG_R2, data_reg, 15, 15));
         tcg_out_bundle(s, miI,
                        tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, TCG_REG_R58,
                                    0, data_reg),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                       INSN_NOP_I,
                        tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3,
                                    TCG_REG_R2, TCG_REG_R2, 0xb));
         data_reg = TCG_REG_R2;
@@ -1704,13 +1707,13 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
         tcg_out_bundle(s, miI,
                        tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1,
                                    TCG_REG_R1, TCG_REG_R2),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                       INSN_NOP_I,
                        tcg_opc_i12(TCG_REG_P6, OPC_DEP_Z_I12,
                                    TCG_REG_R2, data_reg, 31, 31));
         tcg_out_bundle(s, miI,
                        tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, TCG_REG_R58,
                                    0, data_reg),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                       INSN_NOP_I,
                        tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3,
                                    TCG_REG_R2, TCG_REG_R2, 0xb));
         data_reg = TCG_REG_R2;
@@ -1762,17 +1765,17 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
 #if TARGET_LONG_BITS == 32
     if (GUEST_BASE != 0) {
         tcg_out_bundle(s, mII,
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                       INSN_NOP_M,
                        tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29,
                                    TCG_REG_R3, addr_reg),
                        tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2,
                                    TCG_GUEST_BASE_REG, TCG_REG_R3));
     } else {
         tcg_out_bundle(s, miI,
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                       INSN_NOP_M,
                        tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29,
                                    TCG_REG_R2, addr_reg),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
+                       INSN_NOP_I);
     }
 
     if (!bswap) {
@@ -1780,13 +1783,13 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
             tcg_out_bundle(s, miI,
                            tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits],
                                        data_reg, TCG_REG_R2),
-                           tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
-                           tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
+                           INSN_NOP_I,
+                           INSN_NOP_I);
         } else {
             tcg_out_bundle(s, mII,
                            tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits],
                                        data_reg, TCG_REG_R2),
-                           tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                           INSN_NOP_I,
                            tcg_opc_i29(TCG_REG_P0, opc_sxt_i29[s_bits],
                                        data_reg, data_reg));
         }
@@ -1794,7 +1797,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
             tcg_out_bundle(s, mII,
                            tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits],
                                        data_reg, TCG_REG_R2),
-                           tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                           INSN_NOP_I,
                            tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
                                        data_reg, data_reg, 0xb));
     } else {
@@ -1802,26 +1805,26 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
             tcg_out_bundle(s, mII,
                            tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits],
                                        data_reg, TCG_REG_R2),
-                           tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                           INSN_NOP_I,
                            tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
                                       data_reg, data_reg, 15, 15));
         } else {
             tcg_out_bundle(s, mII,
                            tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits],
                                        data_reg, TCG_REG_R2),
-                           tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                           INSN_NOP_I,
                            tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
                                       data_reg, data_reg, 31, 31));
         }
         if (!(opc & MO_SIGN)) {
             tcg_out_bundle(s, miI,
-                           tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
-                           tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                           INSN_NOP_M,
+                           INSN_NOP_I,
                            tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
                                        data_reg, data_reg, 0xb));
         } else {
             tcg_out_bundle(s, mII,
-                           tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                           INSN_NOP_M,
                            tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
                                        data_reg, data_reg, 0xb),
                            tcg_opc_i29(TCG_REG_P0, opc_sxt_i29[s_bits],
@@ -1835,40 +1838,40 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
                                    TCG_GUEST_BASE_REG, addr_reg),
                        tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits],
                                    data_reg, TCG_REG_R2),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
+                       INSN_NOP_I);
     } else {
         tcg_out_bundle(s, mmI,
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                       INSN_NOP_M,
                        tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits],
                                    data_reg, addr_reg),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
+                       INSN_NOP_I);
     }
 
     if (bswap && s_bits == MO_16) {
         tcg_out_bundle(s, mII,
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                       INSN_NOP_M,
                        tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
                                    data_reg, data_reg, 15, 15),
                        tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
                                    data_reg, data_reg, 0xb));
     } else if (bswap && s_bits == MO_32) {
         tcg_out_bundle(s, mII,
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                       INSN_NOP_M,
                        tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
                                    data_reg, data_reg, 31, 31),
                        tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
                                    data_reg, data_reg, 0xb));
     } else if (bswap && s_bits == MO_64) {
         tcg_out_bundle(s, miI,
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                       INSN_NOP_M,
+                       INSN_NOP_I,
                        tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
                                    data_reg, data_reg, 0xb));
     }
     if (opc & MO_SIGN) {
         tcg_out_bundle(s, miI,
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                       INSN_NOP_M,
+                       INSN_NOP_I,
                        tcg_opc_i29(TCG_REG_P0, opc_sxt_i29[s_bits],
                                    data_reg, data_reg));
     }
@@ -1895,23 +1898,23 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
 #if TARGET_LONG_BITS == 32
     if (GUEST_BASE != 0) {
         tcg_out_bundle(s, mII,
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                       INSN_NOP_M,
                        tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29,
                                    TCG_REG_R3, addr_reg),
                        tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2,
                                    TCG_GUEST_BASE_REG, TCG_REG_R3));
     } else {
         tcg_out_bundle(s, miI,
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                       INSN_NOP_M,
                        tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29,
                                    TCG_REG_R2, addr_reg),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
+                       INSN_NOP_I);
     }
 
     if (bswap) {
         if (s_bits == MO_16) {
             tcg_out_bundle(s, mII,
-                           tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                           INSN_NOP_M,
                            tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
                                        TCG_REG_R3, data_reg, 15, 15),
                            tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
@@ -1919,7 +1922,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
             data_reg = TCG_REG_R3;
         } else if (s_bits == MO_32) {
             tcg_out_bundle(s, mII,
-                           tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                           INSN_NOP_M,
                            tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
                                        TCG_REG_R3, data_reg, 31, 31),
                            tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
@@ -1927,8 +1930,8 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
             data_reg = TCG_REG_R3;
         } else if (s_bits == MO_64) {
             tcg_out_bundle(s, miI,
-                           tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
-                           tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                           INSN_NOP_M,
+                           INSN_NOP_I,
                            tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
                                        TCG_REG_R3, data_reg, 0xb));
             data_reg = TCG_REG_R3;
@@ -1937,15 +1940,15 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
     tcg_out_bundle(s, mmI,
                    tcg_opc_m4 (TCG_REG_P0, opc_st_m4[s_bits],
                                data_reg, TCG_REG_R2),
-                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
-                   tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
+                   INSN_NOP_M,
+                   INSN_NOP_I);
 #else
     if (GUEST_BASE != 0) {
         add_guest_base = tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2,
                                      TCG_GUEST_BASE_REG, addr_reg);
         addr_reg = TCG_REG_R2;
     } else {
-        add_guest_base = tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0);
+        add_guest_base = INSN_NOP_M;
     }
 
     if (!bswap) {
@@ -1953,7 +1956,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
                        add_guest_base,
                        tcg_opc_m4 (TCG_REG_P0, opc_st_m4[s_bits],
                                    data_reg, addr_reg),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
+                       INSN_NOP_I);
     } else {
         if (s_bits == MO_16) {
             tcg_out_bundle(s, mII,
@@ -1974,7 +1977,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
         } else if (s_bits == MO_64) {
             tcg_out_bundle(s, miI,
                            add_guest_base,
-                           tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+                           INSN_NOP_I,
                            tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
                                        TCG_REG_R3, data_reg, 0xb));
             data_reg = TCG_REG_R3;
@@ -1982,8 +1985,8 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
         tcg_out_bundle(s, miI,
                        tcg_opc_m4 (TCG_REG_P0, opc_st_m4[s_bits],
                                    data_reg, addr_reg),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
-                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
+                       INSN_NOP_I,
+                       INSN_NOP_I);
     }
 #endif
 }
@@ -2402,7 +2405,7 @@ static void tcg_target_qemu_prologue(TCGContext *s)
        an ADDL in the M slot of the next bundle.  */
     if (GUEST_BASE != 0) {
         tcg_out_bundle(s, mlx,
-                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                       INSN_NOP_M,
                        tcg_opc_l2 (GUEST_BASE),
                        tcg_opc_x2 (TCG_REG_P0, OPC_MOVL_X2,
                                    TCG_GUEST_BASE_REG, GUEST_BASE));
@@ -2419,13 +2422,13 @@ static void tcg_target_qemu_prologue(TCGContext *s)
     /* epilogue */
     tb_ret_addr = s->code_ptr;
     tcg_out_bundle(s, miI,
-                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                   INSN_NOP_M,
                    tcg_opc_i21(TCG_REG_P0, OPC_MOV_I21,
                                TCG_REG_B0, TCG_REG_R32, 0),
                    tcg_opc_a4 (TCG_REG_P0, OPC_ADDS_A4,
                                TCG_REG_R12, frame_size, TCG_REG_R12));
     tcg_out_bundle(s, miB,
-                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+                   INSN_NOP_M,
                    tcg_opc_i26(TCG_REG_P0, OPC_MOV_I_I26,
                                TCG_REG_PFS, TCG_REG_R34),
                    tcg_opc_b4 (TCG_REG_P0, OPC_BR_RET_SPTK_MANY_B4,
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [Qemu-devel] [PATCH 03/20] tcg-ia64: Handle constant calls
  2013-10-31 20:21 [Qemu-devel] [PATCH 00/20] tcg improvements for ia64 Richard Henderson
  2013-10-31 20:21 ` [Qemu-devel] [PATCH 01/20] tcg-ia64: Use TCGMemOp within qemu_ldst routines Richard Henderson
  2013-10-31 20:21 ` [Qemu-devel] [PATCH 02/20] tcg-ia64: Use shortcuts for nop insns Richard Henderson
@ 2013-10-31 20:21 ` Richard Henderson
  2013-11-03 22:56   ` Aurelien Jarno
  2013-10-31 20:21 ` [Qemu-devel] [PATCH 04/20] tcg-ia64: Simplify brcond Richard Henderson
                   ` (16 subsequent siblings)
  19 siblings, 1 reply; 39+ messages in thread
From: Richard Henderson @ 2013-10-31 20:21 UTC (permalink / raw
  To: qemu-devel; +Cc: aliguori, aurelien

Using only indirect calls results in 3 bundles (one to load the
descriptor address), and 4 stop bits.  By looking through the
descriptor to the constants, we can perform the call with 2
bundles and only 1 stop bit.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/ia64/tcg-target.c | 38 +++++++++++++++++++++++++++++++++++---
 1 file changed, 35 insertions(+), 3 deletions(-)

diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index e1b8b7d..b19e298 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -226,6 +226,7 @@ enum {
     OPC_BR_CALL_SPTK_MANY_B5  = 0x02100001000ull,
     OPC_BR_RET_SPTK_MANY_B4   = 0x00108001100ull,
     OPC_BRL_SPTK_MANY_X3      = 0x18000001000ull,
+    OPC_BRL_CALL_SPTK_MANY_X4 = 0x1a000001000ull,
     OPC_CMP_LT_A6             = 0x18000000000ull,
     OPC_CMP_LTU_A6            = 0x1a000000000ull,
     OPC_CMP_EQ_A6             = 0x1c000000000ull,
@@ -584,6 +585,8 @@ static inline uint64_t tcg_opc_l3(uint64_t imm)
     return (imm & 0x07fffffffff00000ull) >> 18;
 }
 
+#define tcg_opc_l4  tcg_opc_l3
+
 static inline uint64_t tcg_opc_m1(int qp, uint64_t opc, int r1, int r3)
 {
     return opc
@@ -668,6 +671,15 @@ static inline uint64_t tcg_opc_x3(int qp, uint64_t opc, uint64_t imm)
            | (qp & 0x3f);
 }
 
+static inline uint64_t tcg_opc_x4(int qp, uint64_t opc, int b1, uint64_t imm)
+{
+    return opc
+           | ((imm & 0x0800000000000000ull) >> 23) /* i */
+           | ((imm & 0x00000000000fffffull) << 13) /* imm20b */
+           | ((b1 & 0x7) << 6)
+           | (qp & 0x3f);
+}
+
 
 /*
  * Relocations
@@ -893,7 +905,23 @@ static void tcg_out_br(TCGContext *s, int label_index)
     }
 }
 
-static inline void tcg_out_call(TCGContext *s, TCGArg addr)
+static inline void tcg_out_calli(TCGContext *s, uintptr_t addr)
+{
+    /* Look through the function descriptor.  */
+    uintptr_t disp, *desc = (uintptr_t *)addr;
+    tcg_out_bundle(s, mlx,
+                   INSN_NOP_M,
+                   tcg_opc_l2 (desc[1]),
+                   tcg_opc_x2 (TCG_REG_P0, OPC_MOVL_X2, TCG_REG_R1, desc[1]));
+    disp = (desc[0] - (uintptr_t)s->code_ptr) >> 4;
+    tcg_out_bundle(s, mLX,
+                   INSN_NOP_M,
+                   tcg_opc_l4 (disp),
+                   tcg_opc_x4 (TCG_REG_P0, OPC_BRL_CALL_SPTK_MANY_X4,
+                               TCG_REG_B0, disp));
+}
+
+static inline void tcg_out_callr(TCGContext *s, TCGReg addr)
 {
     tcg_out_bundle(s, MmI,
                    tcg_opc_m1 (TCG_REG_P0, OPC_LD8_M1, TCG_REG_R2, addr),
@@ -2004,7 +2032,11 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         tcg_out_br(s, args[0]);
         break;
     case INDEX_op_call:
-        tcg_out_call(s, args[0]);
+        if (likely(const_args[0])) {
+            tcg_out_calli(s, args[0]);
+        } else {
+            tcg_out_callr(s, args[0]);
+        }
         break;
     case INDEX_op_goto_tb:
         tcg_out_goto_tb(s, args[0]);
@@ -2257,7 +2289,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
 
 static const TCGTargetOpDef ia64_op_defs[] = {
     { INDEX_op_br, { } },
-    { INDEX_op_call, { "r" } },
+    { INDEX_op_call, { "ri" } },
     { INDEX_op_exit_tb, { } },
     { INDEX_op_goto_tb, { } },
 
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [Qemu-devel] [PATCH 04/20] tcg-ia64: Simplify brcond
  2013-10-31 20:21 [Qemu-devel] [PATCH 00/20] tcg improvements for ia64 Richard Henderson
                   ` (2 preceding siblings ...)
  2013-10-31 20:21 ` [Qemu-devel] [PATCH 03/20] tcg-ia64: Handle constant calls Richard Henderson
@ 2013-10-31 20:21 ` Richard Henderson
  2013-11-03 22:56   ` Aurelien Jarno
  2013-10-31 20:21 ` [Qemu-devel] [PATCH 05/20] tcg-ia64: Move AREG0 to R32 Richard Henderson
                   ` (15 subsequent siblings)
  19 siblings, 1 reply; 39+ messages in thread
From: Richard Henderson @ 2013-10-31 20:21 UTC (permalink / raw
  To: qemu-devel; +Cc: aliguori, aurelien

There was a misconception that a stop bit is required between a compare
and the branch that uses the predicate set by the compare.  This lead to
the usage of an extra bundle in which to perform the compare.  The extra
bundle left room for constants to be loaded for use with the compare insn.

If we pack the compare and the branch together in the same bundle, then
there's no longer any room for non-zero constants.  At which point we
can eliminate half the function by not handling them.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/ia64/tcg-target.c | 42 +++++++++---------------------------------
 1 file changed, 9 insertions(+), 33 deletions(-)

diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index b19e298..2fdc38a5 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -1444,38 +1444,16 @@ static inline uint64_t tcg_opc_cmp_a(int qp, TCGCond cond, TCGArg arg1,
     }
 }
 
-static inline void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGArg arg1,
-                                  int const_arg1, TCGArg arg2, int const_arg2,
-                                  int label_index, int cmp4)
+static inline void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1,
+                                  TCGReg arg2, int label_index, int cmp4)
 {
     TCGLabel *l = &s->labels[label_index];
-    uint64_t opc1, opc2;
 
-    if (const_arg1 && arg1 != 0) {
-        opc1 = tcg_opc_a5(TCG_REG_P0, OPC_ADDL_A5, TCG_REG_R2,
-                          arg1, TCG_REG_R0);
-        arg1 = TCG_REG_R2;
-    } else {
-        opc1 = INSN_NOP_M;
-    }
-
-    if (const_arg2 && arg2 != 0) {
-        opc2 = tcg_opc_a5(TCG_REG_P0, OPC_ADDL_A5, TCG_REG_R3,
-                          arg2, TCG_REG_R0);
-        arg2 = TCG_REG_R3;
-    } else {
-        opc2 = INSN_NOP_I;
-    }
-
-    tcg_out_bundle(s, mII,
-                   opc1,
-                   opc2,
-                   tcg_opc_cmp_a(TCG_REG_P0, cond, arg1, arg2, cmp4));
-    tcg_out_bundle(s, mmB,
-                   INSN_NOP_M,
+    tcg_out_bundle(s, miB,
                    INSN_NOP_M,
-                   tcg_opc_b1 (TCG_REG_P6, OPC_BR_DPTK_FEW_B1,
-                               get_reloc_pcrel21b(s->code_ptr + 2)));
+                   tcg_opc_cmp_a(TCG_REG_P0, cond, arg1, arg2, cmp4),
+                   tcg_opc_b1(TCG_REG_P6, OPC_BR_DPTK_FEW_B1,
+                              get_reloc_pcrel21b(s->code_ptr + 2)));
 
     if (l->has_value) {
         reloc_pcrel21b((s->code_ptr - 16) + 2, l->u.value);
@@ -2224,12 +2202,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         break;
 
     case INDEX_op_brcond_i32:
-        tcg_out_brcond(s, args[2], args[0], const_args[0],
-                       args[1], const_args[1], args[3], 1);
+        tcg_out_brcond(s, args[2], args[0], args[1], args[3], 1);
         break;
     case INDEX_op_brcond_i64:
-        tcg_out_brcond(s, args[2], args[0], const_args[0],
-                       args[1], const_args[1], args[3], 0);
+        tcg_out_brcond(s, args[2], args[0], args[1], args[3], 0);
         break;
     case INDEX_op_setcond_i32:
         tcg_out_setcond(s, args[3], args[0], args[1], args[2], 1);
@@ -2333,7 +2309,7 @@ static const TCGTargetOpDef ia64_op_defs[] = {
     { INDEX_op_bswap16_i32, { "r", "rZ" } },
     { INDEX_op_bswap32_i32, { "r", "rZ" } },
 
-    { INDEX_op_brcond_i32, { "rI", "rI" } },
+    { INDEX_op_brcond_i32, { "rZ", "rZ" } },
     { INDEX_op_setcond_i32, { "r", "rZ", "rZ" } },
     { INDEX_op_movcond_i32, { "r", "rZ", "rZ", "rI", "rI" } },
 
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [Qemu-devel] [PATCH 05/20] tcg-ia64: Move AREG0 to R32
  2013-10-31 20:21 [Qemu-devel] [PATCH 00/20] tcg improvements for ia64 Richard Henderson
                   ` (3 preceding siblings ...)
  2013-10-31 20:21 ` [Qemu-devel] [PATCH 04/20] tcg-ia64: Simplify brcond Richard Henderson
@ 2013-10-31 20:21 ` Richard Henderson
  2013-11-06 22:33   ` Aurelien Jarno
  2013-10-31 20:21 ` [Qemu-devel] [PATCH 06/20] tcg-ia64: Avoid unnecessary stop bit in tcg_out_alu Richard Henderson
                   ` (14 subsequent siblings)
  19 siblings, 1 reply; 39+ messages in thread
From: Richard Henderson @ 2013-10-31 20:21 UTC (permalink / raw
  To: qemu-devel; +Cc: aliguori, aurelien

Since the move away from the global areg0, we're no longer globally
reserving areg0.  Which means our use of R7 clobbers a call-saved
register.  Shift areg0 into the windowed registers.  Indeed, choose
the incoming parameter register that it comes to us by.

This requires moving the register holding the return address elsewhere.
Choose R33 for tidiness.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/ia64/tcg-target.c | 13 ++++++-------
 tcg/ia64/tcg-target.h |  4 ++--
 2 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index 2fdc38a5..336781f 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -109,7 +109,6 @@ enum {
 };
 
 static const int tcg_target_reg_alloc_order[] = {
-    TCG_REG_R33,
     TCG_REG_R35,
     TCG_REG_R36,
     TCG_REG_R37,
@@ -2404,8 +2403,7 @@ static void tcg_target_qemu_prologue(TCGContext *s)
     tcg_out_bundle(s, miI,
                    tcg_opc_m34(TCG_REG_P0, OPC_ALLOC_M34,
                                TCG_REG_R34, 32, 24, 0),
-                   tcg_opc_a4 (TCG_REG_P0, OPC_ADDS_A4,
-                               TCG_AREG0, 0, TCG_REG_R32),
+                   INSN_NOP_I,
                    tcg_opc_i21(TCG_REG_P0, OPC_MOV_I21,
                                TCG_REG_B6, TCG_REG_R33, 0));
 
@@ -2424,7 +2422,7 @@ static void tcg_target_qemu_prologue(TCGContext *s)
                    tcg_opc_a4 (TCG_REG_P0, OPC_ADDS_A4,
                                TCG_REG_R12, -frame_size, TCG_REG_R12),
                    tcg_opc_i22(TCG_REG_P0, OPC_MOV_I22,
-                               TCG_REG_R32, TCG_REG_B0),
+                               TCG_REG_R33, TCG_REG_B0),
                    tcg_opc_b4 (TCG_REG_P0, OPC_BR_SPTK_MANY_B4, TCG_REG_B6));
 
     /* epilogue */
@@ -2432,7 +2430,7 @@ static void tcg_target_qemu_prologue(TCGContext *s)
     tcg_out_bundle(s, miI,
                    INSN_NOP_M,
                    tcg_opc_i21(TCG_REG_P0, OPC_MOV_I21,
-                               TCG_REG_B0, TCG_REG_R32, 0),
+                               TCG_REG_B0, TCG_REG_R33, 0),
                    tcg_opc_a4 (TCG_REG_P0, OPC_ADDS_A4,
                                TCG_REG_R12, frame_size, TCG_REG_R12));
     tcg_out_bundle(s, miB,
@@ -2489,16 +2487,17 @@ static void tcg_target_init(TCGContext *s)
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_R3);   /* internal use */
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_R12);  /* stack pointer */
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13);  /* thread pointer */
-    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R32);  /* return address */
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R33);  /* return address */
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_R34);  /* PFS */
 
-    /* The following 3 are not in use, are call-saved, but *not* saved
+    /* The following 4 are not in use, are call-saved, but *not* saved
        by the prologue.  Therefore we cannot use them without modifying
        the prologue.  There doesn't seem to be any good reason to use
        these as opposed to the windowed registers.  */
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_R4);
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_R5);
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_R6);
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R7);
 
     tcg_add_target_add_op_defs(ia64_op_defs);
 }
diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h
index c90038a..52a939c 100644
--- a/tcg/ia64/tcg-target.h
+++ b/tcg/ia64/tcg-target.h
@@ -92,6 +92,8 @@ typedef enum {
     TCG_REG_R61,
     TCG_REG_R62,
     TCG_REG_R63,
+
+    TCG_AREG0 = TCG_REG_R32,
 } TCGReg;
 
 #define TCG_CT_CONST_ZERO 0x100
@@ -162,8 +164,6 @@ typedef enum {
 #define TCG_TARGET_HAS_not_i32          0 /* xor r1, -1, r3 */
 #define TCG_TARGET_HAS_not_i64          0 /* xor r1, -1, r3 */
 
-#define TCG_AREG0 TCG_REG_R7
-
 static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
 {
     start = start & ~(32UL - 1UL);
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [Qemu-devel] [PATCH 06/20] tcg-ia64: Avoid unnecessary stop bit in tcg_out_alu
  2013-10-31 20:21 [Qemu-devel] [PATCH 00/20] tcg improvements for ia64 Richard Henderson
                   ` (4 preceding siblings ...)
  2013-10-31 20:21 ` [Qemu-devel] [PATCH 05/20] tcg-ia64: Move AREG0 to R32 Richard Henderson
@ 2013-10-31 20:21 ` Richard Henderson
  2013-11-06 22:33   ` Aurelien Jarno
  2013-10-31 20:21 ` [Qemu-devel] [PATCH 07/20] tcg-ia64: Use ADDS for small addition Richard Henderson
                   ` (13 subsequent siblings)
  19 siblings, 1 reply; 39+ messages in thread
From: Richard Henderson @ 2013-10-31 20:21 UTC (permalink / raw
  To: qemu-devel; +Cc: aliguori, aurelien

When performing an operation with two input registers, we'd leave
the stop bit (and thus an extra cycle) that's only needed when one
or the other input is a constant.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/ia64/tcg-target.c | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index 336781f..b7f74a9 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -1044,31 +1044,26 @@ static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
     }
 }
 
-static inline void tcg_out_alu(TCGContext *s, uint64_t opc_a1, TCGArg ret,
-                               TCGArg arg1, int const_arg1,
-                               TCGArg arg2, int const_arg2)
+static void tcg_out_alu(TCGContext *s, uint64_t opc_a1, TCGReg ret, TCGArg arg1,
+                        int const_arg1, TCGArg arg2, int const_arg2)
 {
-    uint64_t opc1, opc2;
+    uint64_t opc1 = 0, opc2 = 0;
 
     if (const_arg1 && arg1 != 0) {
         opc1 = tcg_opc_a5(TCG_REG_P0, OPC_ADDL_A5,
                           TCG_REG_R2, arg1, TCG_REG_R0);
         arg1 = TCG_REG_R2;
-    } else {
-        opc1 = INSN_NOP_M;
     }
 
     if (const_arg2 && arg2 != 0) {
         opc2 = tcg_opc_a5(TCG_REG_P0, OPC_ADDL_A5,
                           TCG_REG_R3, arg2, TCG_REG_R0);
         arg2 = TCG_REG_R3;
-    } else {
-        opc2 = INSN_NOP_I;
     }
 
-    tcg_out_bundle(s, mII,
-                   opc1,
-                   opc2,
+    tcg_out_bundle(s, (opc1 || opc2 ? mII : miI),
+                   opc1 ? opc1 : INSN_NOP_M,
+                   opc2 ? opc2 : INSN_NOP_I,
                    tcg_opc_a1(TCG_REG_P0, opc_a1, ret, arg1, arg2));
 }
 
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [Qemu-devel] [PATCH 07/20] tcg-ia64: Use ADDS for small addition
  2013-10-31 20:21 [Qemu-devel] [PATCH 00/20] tcg improvements for ia64 Richard Henderson
                   ` (5 preceding siblings ...)
  2013-10-31 20:21 ` [Qemu-devel] [PATCH 06/20] tcg-ia64: Avoid unnecessary stop bit in tcg_out_alu Richard Henderson
@ 2013-10-31 20:21 ` Richard Henderson
  2013-11-06 22:34   ` Aurelien Jarno
  2013-10-31 20:21 ` [Qemu-devel] [PATCH 08/20] tcg-ia64: Use SUB_A3 and ADDS_A4 for subtraction Richard Henderson
                   ` (12 subsequent siblings)
  19 siblings, 1 reply; 39+ messages in thread
From: Richard Henderson @ 2013-10-31 20:21 UTC (permalink / raw
  To: qemu-devel; +Cc: aliguori, aurelien

Avoids a wasted cycle loading up small constants.

Simplify the code assuming the tcg optimizer is going to work
and don't expect the first operand of the add to be constant.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/ia64/tcg-target.c | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index b7f74a9..be74606 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -1067,6 +1067,19 @@ static void tcg_out_alu(TCGContext *s, uint64_t opc_a1, TCGReg ret, TCGArg arg1,
                    tcg_opc_a1(TCG_REG_P0, opc_a1, ret, arg1, arg2));
 }
 
+static inline void tcg_out_add(TCGContext *s, TCGReg ret, TCGReg arg1,
+                               TCGArg arg2, int const_arg2)
+{
+    if (const_arg2 && arg2 == sextract64(arg2, 0, 14)) {
+        tcg_out_bundle(s, mmI,
+                       INSN_NOP_M,
+                       INSN_NOP_M,
+                       tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, ret, arg2, arg1));
+    } else {
+        tcg_out_alu(s, OPC_ADD_A1, ret, arg1, 0, arg2, const_arg2);
+    }
+}
+
 static inline void tcg_out_eqv(TCGContext *s, TCGArg ret,
                                TCGArg arg1, int const_arg1,
                                TCGArg arg2, int const_arg2)
@@ -2068,8 +2081,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
 
     case INDEX_op_add_i32:
     case INDEX_op_add_i64:
-        tcg_out_alu(s, OPC_ADD_A1, args[0], args[1], const_args[1],
-                    args[2], const_args[2]);
+        tcg_out_add(s, args[0], args[1], args[2], const_args[2]);
         break;
     case INDEX_op_sub_i32:
     case INDEX_op_sub_i64:
@@ -2275,7 +2287,7 @@ static const TCGTargetOpDef ia64_op_defs[] = {
     { INDEX_op_st16_i32, { "rZ", "r" } },
     { INDEX_op_st_i32, { "rZ", "r" } },
 
-    { INDEX_op_add_i32, { "r", "rI", "rI" } },
+    { INDEX_op_add_i32, { "r", "rZ", "rI" } },
     { INDEX_op_sub_i32, { "r", "rI", "rI" } },
 
     { INDEX_op_and_i32, { "r", "rI", "rI" } },
@@ -2322,7 +2334,7 @@ static const TCGTargetOpDef ia64_op_defs[] = {
     { INDEX_op_st32_i64, { "rZ", "r" } },
     { INDEX_op_st_i64, { "rZ", "r" } },
 
-    { INDEX_op_add_i64, { "r", "rI", "rI" } },
+    { INDEX_op_add_i64, { "r", "rZ", "rI" } },
     { INDEX_op_sub_i64, { "r", "rI", "rI" } },
 
     { INDEX_op_and_i64, { "r", "rI", "rI" } },
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [Qemu-devel] [PATCH 08/20] tcg-ia64: Use SUB_A3 and ADDS_A4 for subtraction
  2013-10-31 20:21 [Qemu-devel] [PATCH 00/20] tcg improvements for ia64 Richard Henderson
                   ` (6 preceding siblings ...)
  2013-10-31 20:21 ` [Qemu-devel] [PATCH 07/20] tcg-ia64: Use ADDS for small addition Richard Henderson
@ 2013-10-31 20:21 ` Richard Henderson
  2013-11-06 22:34   ` Aurelien Jarno
  2013-10-31 20:21 ` [Qemu-devel] [PATCH 09/20] tcg-ia64: Use A3 form of logical operations Richard Henderson
                   ` (11 subsequent siblings)
  19 siblings, 1 reply; 39+ messages in thread
From: Richard Henderson @ 2013-10-31 20:21 UTC (permalink / raw
  To: qemu-devel; +Cc: aliguori, aurelien

We can subtract from more small constants that just 0 with one insn,
and we can add the negative for most small constants.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/ia64/tcg-target.c | 25 +++++++++++++++++++++++--
 1 file changed, 23 insertions(+), 2 deletions(-)

diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index be74606..d7bccd2 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -1080,6 +1080,28 @@ static inline void tcg_out_add(TCGContext *s, TCGReg ret, TCGReg arg1,
     }
 }
 
+static inline void tcg_out_sub(TCGContext *s, TCGReg ret, TCGArg arg1,
+                               int const_arg1, TCGArg arg2, int const_arg2)
+{
+    if (const_arg1 && arg1 == (int8_t)arg1) {
+        if (const_arg2) {
+            tcg_out_movi(s, TCG_TYPE_I64, ret, arg1 - arg2);
+            return;
+        }
+        tcg_out_bundle(s, mmI,
+                       INSN_NOP_M,
+                       INSN_NOP_M,
+                       tcg_opc_a3(TCG_REG_P0, OPC_SUB_A3, ret, arg1, arg2));
+    } else if (const_arg2 && -arg2 == sextract64(-arg2, 0, 14)) {
+        tcg_out_bundle(s, mmI,
+                       INSN_NOP_M,
+                       INSN_NOP_M,
+                       tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, ret, -arg2, arg1));
+    } else {
+        tcg_out_alu(s, OPC_SUB_A1, ret, arg1, const_arg1, arg2, const_arg2);
+    }
+}
+
 static inline void tcg_out_eqv(TCGContext *s, TCGArg ret,
                                TCGArg arg1, int const_arg1,
                                TCGArg arg2, int const_arg2)
@@ -2085,8 +2107,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         break;
     case INDEX_op_sub_i32:
     case INDEX_op_sub_i64:
-        tcg_out_alu(s, OPC_SUB_A1, args[0], args[1], const_args[1],
-                    args[2], const_args[2]);
+        tcg_out_sub(s, args[0], args[1], const_args[1], args[2], const_args[2]);
         break;
 
     case INDEX_op_and_i32:
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [Qemu-devel] [PATCH 09/20] tcg-ia64: Use A3 form of logical operations
  2013-10-31 20:21 [Qemu-devel] [PATCH 00/20] tcg improvements for ia64 Richard Henderson
                   ` (7 preceding siblings ...)
  2013-10-31 20:21 ` [Qemu-devel] [PATCH 08/20] tcg-ia64: Use SUB_A3 and ADDS_A4 for subtraction Richard Henderson
@ 2013-10-31 20:21 ` Richard Henderson
  2013-11-06 22:34   ` Aurelien Jarno
  2013-10-31 20:21 ` [Qemu-devel] [PATCH 10/20] tcg-ia64: Introduce tcg_opc_mov_a Richard Henderson
                   ` (10 subsequent siblings)
  19 siblings, 1 reply; 39+ messages in thread
From: Richard Henderson @ 2013-10-31 20:21 UTC (permalink / raw
  To: qemu-devel; +Cc: aliguori, aurelien

We can and/or/xor/andcm small constants, saving one cycle.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/ia64/tcg-target.c | 64 +++++++++++++++++++++++++++------------------------
 1 file changed, 34 insertions(+), 30 deletions(-)

diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index d7bccd2..e45b571 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -263,6 +263,7 @@ enum {
     OPC_MOV_I_I26             = 0x00150000000ull,
     OPC_MOVL_X2               = 0x0c000000000ull,
     OPC_OR_A1                 = 0x10070000000ull,
+    OPC_OR_A3                 = 0x10170000000ull,
     OPC_SETF_EXP_M18          = 0x0c748000000ull,
     OPC_SETF_SIG_M18          = 0x0c708000000ull,
     OPC_SHL_I7                = 0x0f240000000ull,
@@ -281,6 +282,7 @@ enum {
     OPC_UNPACK4_L_I2          = 0x0f860000000ull,
     OPC_XMA_L_F2              = 0x1d000000000ull,
     OPC_XOR_A1                = 0x10078000000ull,
+    OPC_XOR_A3                = 0x10178000000ull,
     OPC_ZXT1_I29              = 0x00080000000ull,
     OPC_ZXT2_I29              = 0x00088000000ull,
     OPC_ZXT4_I29              = 0x00090000000ull,
@@ -1044,27 +1046,34 @@ static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
     }
 }
 
-static void tcg_out_alu(TCGContext *s, uint64_t opc_a1, TCGReg ret, TCGArg arg1,
-                        int const_arg1, TCGArg arg2, int const_arg2)
+static inline void tcg_out_alu(TCGContext *s, uint64_t opc_a1, uint64_t opc_a3,
+                               TCGReg ret, TCGArg arg1, int const_arg1,
+                               TCGArg arg2, int const_arg2)
 {
-    uint64_t opc1 = 0, opc2 = 0;
-
-    if (const_arg1 && arg1 != 0) {
-        opc1 = tcg_opc_a5(TCG_REG_P0, OPC_ADDL_A5,
-                          TCG_REG_R2, arg1, TCG_REG_R0);
-        arg1 = TCG_REG_R2;
-    }
+    uint64_t opc1 = 0, opc2 = 0, opc3 = 0;
 
     if (const_arg2 && arg2 != 0) {
         opc2 = tcg_opc_a5(TCG_REG_P0, OPC_ADDL_A5,
                           TCG_REG_R3, arg2, TCG_REG_R0);
         arg2 = TCG_REG_R3;
     }
+    if (const_arg1 && arg1 != 0) {
+        if (opc_a3 && arg1 == (int8_t)arg1) {
+            opc3 = tcg_opc_a3(TCG_REG_P0, opc_a3, ret, arg1, arg2);
+        } else {
+            opc1 = tcg_opc_a5(TCG_REG_P0, OPC_ADDL_A5,
+                              TCG_REG_R2, arg1, TCG_REG_R0);
+            arg1 = TCG_REG_R2;
+        }
+    }
+    if (opc3 == 0) {
+        opc3 = tcg_opc_a1(TCG_REG_P0, opc_a1, ret, arg1, arg2);
+    }
 
     tcg_out_bundle(s, (opc1 || opc2 ? mII : miI),
                    opc1 ? opc1 : INSN_NOP_M,
                    opc2 ? opc2 : INSN_NOP_I,
-                   tcg_opc_a1(TCG_REG_P0, opc_a1, ret, arg1, arg2));
+                   opc3);
 }
 
 static inline void tcg_out_add(TCGContext *s, TCGReg ret, TCGReg arg1,
@@ -1076,29 +1085,21 @@ static inline void tcg_out_add(TCGContext *s, TCGReg ret, TCGReg arg1,
                        INSN_NOP_M,
                        tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, ret, arg2, arg1));
     } else {
-        tcg_out_alu(s, OPC_ADD_A1, ret, arg1, 0, arg2, const_arg2);
+        tcg_out_alu(s, OPC_ADD_A1, 0, ret, arg1, 0, arg2, const_arg2);
     }
 }
 
 static inline void tcg_out_sub(TCGContext *s, TCGReg ret, TCGArg arg1,
                                int const_arg1, TCGArg arg2, int const_arg2)
 {
-    if (const_arg1 && arg1 == (int8_t)arg1) {
-        if (const_arg2) {
-            tcg_out_movi(s, TCG_TYPE_I64, ret, arg1 - arg2);
-            return;
-        }
-        tcg_out_bundle(s, mmI,
-                       INSN_NOP_M,
-                       INSN_NOP_M,
-                       tcg_opc_a3(TCG_REG_P0, OPC_SUB_A3, ret, arg1, arg2));
-    } else if (const_arg2 && -arg2 == sextract64(-arg2, 0, 14)) {
+    if (!const_arg1 && const_arg2 && -arg2 == sextract64(-arg2, 0, 14)) {
         tcg_out_bundle(s, mmI,
                        INSN_NOP_M,
                        INSN_NOP_M,
                        tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, ret, -arg2, arg1));
     } else {
-        tcg_out_alu(s, OPC_SUB_A1, ret, arg1, const_arg1, arg2, const_arg2);
+        tcg_out_alu(s, OPC_SUB_A1, OPC_SUB_A3, ret,
+                    arg1, const_arg1, arg2, const_arg2);
     }
 }
 
@@ -2112,13 +2113,14 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
 
     case INDEX_op_and_i32:
     case INDEX_op_and_i64:
-        tcg_out_alu(s, OPC_AND_A1, args[0], args[1], const_args[1],
-                    args[2], const_args[2]);
+        /* TCG expects arg2 constant; A3 expects arg1 constant.  Swap.  */
+        tcg_out_alu(s, OPC_AND_A1, OPC_AND_A3, args[0],
+                    args[2], const_args[2], args[1], const_args[1]);
         break;
     case INDEX_op_andc_i32:
     case INDEX_op_andc_i64:
-        tcg_out_alu(s, OPC_ANDCM_A1, args[0], args[1], const_args[1],
-                    args[2], const_args[2]);
+        tcg_out_alu(s, OPC_ANDCM_A1, OPC_ANDCM_A3, args[0],
+                    args[1], const_args[1], args[2], const_args[2]);
         break;
     case INDEX_op_eqv_i32:
     case INDEX_op_eqv_i64:
@@ -2137,8 +2139,9 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         break;
     case INDEX_op_or_i32:
     case INDEX_op_or_i64:
-        tcg_out_alu(s, OPC_OR_A1, args[0], args[1], const_args[1],
-                    args[2], const_args[2]);
+        /* TCG expects arg2 constant; A3 expects arg1 constant.  Swap.  */
+        tcg_out_alu(s, OPC_OR_A1, OPC_OR_A3, args[0],
+                    args[2], const_args[2], args[1], const_args[1]);
         break;
     case INDEX_op_orc_i32:
     case INDEX_op_orc_i64:
@@ -2147,8 +2150,9 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         break;
     case INDEX_op_xor_i32:
     case INDEX_op_xor_i64:
-        tcg_out_alu(s, OPC_XOR_A1, args[0], args[1], const_args[1],
-                    args[2], const_args[2]);
+        /* TCG expects arg2 constant; A3 expects arg1 constant.  Swap.  */
+        tcg_out_alu(s, OPC_XOR_A1, OPC_XOR_A3, args[0],
+                    args[2], const_args[2], args[1], const_args[1]);
         break;
 
     case INDEX_op_mul_i32:
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [Qemu-devel] [PATCH 10/20] tcg-ia64: Introduce tcg_opc_mov_a
  2013-10-31 20:21 [Qemu-devel] [PATCH 00/20] tcg improvements for ia64 Richard Henderson
                   ` (8 preceding siblings ...)
  2013-10-31 20:21 ` [Qemu-devel] [PATCH 09/20] tcg-ia64: Use A3 form of logical operations Richard Henderson
@ 2013-10-31 20:21 ` Richard Henderson
  2013-11-06 22:34   ` Aurelien Jarno
  2013-10-31 20:22 ` [Qemu-devel] [PATCH 11/20] tcg-ia64: Introduce tcg_opc_movi_a Richard Henderson
                   ` (9 subsequent siblings)
  19 siblings, 1 reply; 39+ messages in thread
From: Richard Henderson @ 2013-10-31 20:21 UTC (permalink / raw
  To: qemu-devel; +Cc: aliguori, aurelien

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/ia64/tcg-target.c | 35 ++++++++++++++++-------------------
 1 file changed, 16 insertions(+), 19 deletions(-)

diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index e45b571..c40e32f 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -867,13 +867,18 @@ static inline void tcg_out_bundle(TCGContext *s, int template,
     s->code_ptr += 16;
 }
 
+static inline uint64_t tcg_opc_mov_a(int qp, TCGReg dst, TCGReg src)
+{
+    return tcg_opc_a4(qp, OPC_ADDS_A4, dst, 0, src);
+}
+
 static inline void tcg_out_mov(TCGContext *s, TCGType type,
                                TCGReg ret, TCGReg arg)
 {
     tcg_out_bundle(s, mmI,
                    INSN_NOP_M,
                    INSN_NOP_M,
-                   tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, ret, 0, arg));
+                   tcg_opc_mov_a(TCG_REG_P0, ret, arg));
 }
 
 static inline void tcg_out_movi(TCGContext *s, TCGType type,
@@ -1514,14 +1519,14 @@ static inline void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGArg ret,
     } else if (ret == v1) {
         opc1 = INSN_NOP_M;
     } else {
-        opc1 = tcg_opc_a4(TCG_REG_P6, OPC_ADDS_A4, ret, 0, v1);
+        opc1 = tcg_opc_mov_a(TCG_REG_P6, ret, v1);
     }
     if (const_v2) {
         opc2 = tcg_opc_a5(TCG_REG_P7, OPC_ADDL_A5, ret, v2, TCG_REG_R0);
     } else if (ret == v2) {
         opc2 = INSN_NOP_I;
     } else {
-        opc2 = tcg_opc_a4(TCG_REG_P7, OPC_ADDS_A4, ret, 0, v2);
+        opc2 = tcg_opc_mov_a(TCG_REG_P7, ret, v2);
     }
 
     tcg_out_bundle(s, MmI,
@@ -1551,8 +1556,7 @@ static inline void tcg_out_qemu_tlb(TCGContext *s, TCGArg addr_reg,
 #if TARGET_LONG_BITS == 32
                    tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, TCG_REG_R57, addr_reg),
 #else
-                   tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, TCG_REG_R57,
-                              0, addr_reg),
+                   tcg_opc_mov_a(TCG_REG_P0, TCG_REG_R57, addr_reg),
 #endif
                    tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2,
                                TCG_REG_R2, TCG_AREG0));
@@ -1603,8 +1607,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
 
     /* P6 is the fast path, and P7 the slow path */
     tcg_out_bundle(s, mLX,
-                   tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4,
-                               TCG_REG_R56, 0, TCG_AREG0),
+                   tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R56, TCG_AREG0),
                    tcg_opc_l2 ((tcg_target_long) qemu_ld_helpers[s_bits]),
                    tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R2,
                                (tcg_target_long) qemu_ld_helpers[s_bits]));
@@ -1657,8 +1660,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
         tcg_out_bundle(s, miI,
                        INSN_NOP_M,
                        INSN_NOP_I,
-                       tcg_opc_a4 (TCG_REG_P0, OPC_ADDS_A4,
-                                   data_reg, 0, TCG_REG_R8));
+                       tcg_opc_mov_a(TCG_REG_P0, data_reg, TCG_REG_R8));
     } else {
         tcg_out_bundle(s, miI,
                        INSN_NOP_M,
@@ -1697,8 +1699,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
 
     /* P6 is the fast path, and P7 the slow path */
     tcg_out_bundle(s, mLX,
-                   tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4,
-                               TCG_REG_R56, 0, TCG_AREG0),
+                   tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R56, TCG_AREG0),
                    tcg_opc_l2 ((tcg_target_long) qemu_st_helpers[s_bits]),
                    tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R2,
                                (tcg_target_long) qemu_st_helpers[s_bits]));
@@ -1718,8 +1719,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
         tcg_out_bundle(s, mii,
                        tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1,
                                    TCG_REG_R1, TCG_REG_R2),
-                       tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, TCG_REG_R58,
-                                   0, data_reg),
+                       tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R58, data_reg),
                        INSN_NOP_I);
         break;
 
@@ -1731,8 +1731,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
                        tcg_opc_i12(TCG_REG_P6, OPC_DEP_Z_I12,
                                    TCG_REG_R2, data_reg, 15, 15));
         tcg_out_bundle(s, miI,
-                       tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, TCG_REG_R58,
-                                   0, data_reg),
+                       tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R58, data_reg),
                        INSN_NOP_I,
                        tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3,
                                    TCG_REG_R2, TCG_REG_R2, 0xb));
@@ -1747,8 +1746,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
                        tcg_opc_i12(TCG_REG_P6, OPC_DEP_Z_I12,
                                    TCG_REG_R2, data_reg, 31, 31));
         tcg_out_bundle(s, miI,
-                       tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, TCG_REG_R58,
-                                   0, data_reg),
+                       tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R58, data_reg),
                        INSN_NOP_I,
                        tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3,
                                    TCG_REG_R2, TCG_REG_R2, 0xb));
@@ -1759,8 +1757,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
         tcg_out_bundle(s, miI,
                        tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1,
                                    TCG_REG_R1, TCG_REG_R2),
-                       tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, TCG_REG_R58,
-                                   0, data_reg),
+                       tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R58, data_reg),
                        tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3,
                                    TCG_REG_R2, data_reg, 0xb));
         data_reg = TCG_REG_R2;
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [Qemu-devel] [PATCH 11/20] tcg-ia64: Introduce tcg_opc_movi_a
  2013-10-31 20:21 [Qemu-devel] [PATCH 00/20] tcg improvements for ia64 Richard Henderson
                   ` (9 preceding siblings ...)
  2013-10-31 20:21 ` [Qemu-devel] [PATCH 10/20] tcg-ia64: Introduce tcg_opc_mov_a Richard Henderson
@ 2013-10-31 20:22 ` Richard Henderson
  2013-11-06 22:34   ` Aurelien Jarno
  2013-10-31 20:22 ` [Qemu-devel] [PATCH 12/20] tcg-ia64: Introduce tcg_opc_ext_i Richard Henderson
                   ` (8 subsequent siblings)
  19 siblings, 1 reply; 39+ messages in thread
From: Richard Henderson @ 2013-10-31 20:22 UTC (permalink / raw
  To: qemu-devel; +Cc: aliguori, aurelien

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/ia64/tcg-target.c | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index c40e32f..c134034 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -881,6 +881,12 @@ static inline void tcg_out_mov(TCGContext *s, TCGType type,
                    tcg_opc_mov_a(TCG_REG_P0, ret, arg));
 }
 
+static inline uint64_t tcg_opc_movi_a(int qp, TCGReg dst, int64_t src)
+{
+    assert(src == sextract64(src, 0, 22));
+    return tcg_opc_a5(qp, OPC_ADDL_A5, dst, src, TCG_REG_R0);
+}
+
 static inline void tcg_out_movi(TCGContext *s, TCGType type,
                                 TCGReg reg, tcg_target_long arg)
 {
@@ -1058,16 +1064,14 @@ static inline void tcg_out_alu(TCGContext *s, uint64_t opc_a1, uint64_t opc_a3,
     uint64_t opc1 = 0, opc2 = 0, opc3 = 0;
 
     if (const_arg2 && arg2 != 0) {
-        opc2 = tcg_opc_a5(TCG_REG_P0, OPC_ADDL_A5,
-                          TCG_REG_R3, arg2, TCG_REG_R0);
+        opc2 = tcg_opc_movi_a(TCG_REG_P0, TCG_REG_R3, arg2);
         arg2 = TCG_REG_R3;
     }
     if (const_arg1 && arg1 != 0) {
         if (opc_a3 && arg1 == (int8_t)arg1) {
             opc3 = tcg_opc_a3(TCG_REG_P0, opc_a3, ret, arg1, arg2);
         } else {
-            opc1 = tcg_opc_a5(TCG_REG_P0, OPC_ADDL_A5,
-                              TCG_REG_R2, arg1, TCG_REG_R0);
+            opc1 = tcg_opc_movi_a(TCG_REG_P0, TCG_REG_R2, arg1);
             arg1 = TCG_REG_R2;
         }
     }
@@ -1423,8 +1427,7 @@ static inline void tcg_out_deposit(TCGContext *s, TCGArg ret, TCGArg a1,
         } else {
             /* Otherwise, load any constant into a temporary.  Do this into
                the first I slot to help out with cross-unit delays.  */
-            i1 = tcg_opc_a5(TCG_REG_P0, OPC_ADDL_A5,
-                            TCG_REG_R2, a2, TCG_REG_R0);
+            i1 = tcg_opc_movi_a(TCG_REG_P0, TCG_REG_R2, a2);
             a2 = TCG_REG_R2;
         }
     }
@@ -1503,8 +1506,8 @@ static inline void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGArg ret,
 {
     tcg_out_bundle(s, MmI,
                    tcg_opc_cmp_a(TCG_REG_P0, cond, arg1, arg2, cmp4),
-                   tcg_opc_a5(TCG_REG_P6, OPC_ADDL_A5, ret, 1, TCG_REG_R0),
-                   tcg_opc_a5(TCG_REG_P7, OPC_ADDL_A5, ret, 0, TCG_REG_R0));
+                   tcg_opc_movi_a(TCG_REG_P6, ret, 1),
+                   tcg_opc_movi_a(TCG_REG_P7, ret, 0));
 }
 
 static inline void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGArg ret,
@@ -1515,14 +1518,14 @@ static inline void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGArg ret,
     uint64_t opc1, opc2;
 
     if (const_v1) {
-        opc1 = tcg_opc_a5(TCG_REG_P6, OPC_ADDL_A5, ret, v1, TCG_REG_R0);
+        opc1 = tcg_opc_movi_a(TCG_REG_P6, ret, v1);
     } else if (ret == v1) {
         opc1 = INSN_NOP_M;
     } else {
         opc1 = tcg_opc_mov_a(TCG_REG_P6, ret, v1);
     }
     if (const_v2) {
-        opc2 = tcg_opc_a5(TCG_REG_P7, OPC_ADDL_A5, ret, v2, TCG_REG_R0);
+        opc2 = tcg_opc_movi_a(TCG_REG_P7, ret, v2);
     } else if (ret == v2) {
         opc2 = INSN_NOP_I;
     } else {
@@ -1641,15 +1644,13 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
     }
     if (!bswap) {
         tcg_out_bundle(s, miB,
-                       tcg_opc_a5 (TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R58,
-                                   mem_index, TCG_REG_R0),
+                       tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R58, mem_index),
                        INSN_NOP_I,
                        tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5,
                                    TCG_REG_B0, TCG_REG_B6));
     } else {
         tcg_out_bundle(s, miB,
-                       tcg_opc_a5 (TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R58,
-                                   mem_index, TCG_REG_R0),
+                       tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R58, mem_index),
                        tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3,
                                    TCG_REG_R8, TCG_REG_R8, 0xb),
                        tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5,
@@ -1770,8 +1771,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
     tcg_out_bundle(s, miB,
                    tcg_opc_m4 (TCG_REG_P6, opc_st_m4[opc],
                                data_reg, TCG_REG_R3),
-                   tcg_opc_a5 (TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R59,
-                               mem_index, TCG_REG_R0),
+                   tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R59, mem_index),
                    tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5,
                                TCG_REG_B0, TCG_REG_B6));
 }
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [Qemu-devel] [PATCH 12/20] tcg-ia64: Introduce tcg_opc_ext_i
  2013-10-31 20:21 [Qemu-devel] [PATCH 00/20] tcg improvements for ia64 Richard Henderson
                   ` (10 preceding siblings ...)
  2013-10-31 20:22 ` [Qemu-devel] [PATCH 11/20] tcg-ia64: Introduce tcg_opc_movi_a Richard Henderson
@ 2013-10-31 20:22 ` Richard Henderson
  2013-11-06 22:34   ` Aurelien Jarno
  2013-10-31 20:22 ` [Qemu-devel] [PATCH 13/20] tcg-ia64: Introduce tcg_opc_bswap64_i Richard Henderson
                   ` (7 subsequent siblings)
  19 siblings, 1 reply; 39+ messages in thread
From: Richard Henderson @ 2013-10-31 20:22 UTC (permalink / raw
  To: qemu-devel; +Cc: aliguori, aurelien

Being able to "extend" from 64-bits (with a mov) simplifies
a few places where the conditional breaks the train of thought.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/ia64/tcg-target.c | 54 +++++++++++++++++++++++----------------------------
 1 file changed, 24 insertions(+), 30 deletions(-)

diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index c134034..ca38608 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -1377,6 +1377,20 @@ static inline void tcg_out_rotr_i64(TCGContext *s, TCGArg ret, TCGArg arg1,
     }
 }
 
+static const uint64_t opc_ext_i29[8] = {
+    OPC_ZXT1_I29, OPC_ZXT2_I29, OPC_ZXT4_I29, 0,
+    OPC_SXT1_I29, OPC_SXT2_I29, OPC_SXT4_I29, 0
+};
+
+static inline uint64_t tcg_opc_ext_i(int qp, TCGMemOp opc, TCGReg d, TCGReg s)
+{
+    if ((opc & MO_SIZE) == MO_64) {
+        return tcg_opc_mov_a(qp, d, s);
+    } else {
+        return tcg_opc_i29(qp, opc_ext_i29[opc & MO_SSIZE], d, s);
+    }
+}
+
 static inline void tcg_out_ext(TCGContext *s, uint64_t opc_i29,
                                TCGArg ret, TCGArg arg)
 {
@@ -1556,11 +1570,9 @@ static inline void tcg_out_qemu_tlb(TCGContext *s, TCGArg addr_reg,
     tcg_out_bundle(s, mII,
                    tcg_opc_a5 (TCG_REG_P0, OPC_ADDL_A5, TCG_REG_R2,
                                offset_rw, TCG_REG_R2),
-#if TARGET_LONG_BITS == 32
-                   tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, TCG_REG_R57, addr_reg),
-#else
-                   tcg_opc_mov_a(TCG_REG_P0, TCG_REG_R57, addr_reg),
-#endif
+                   tcg_opc_ext_i(TCG_REG_P0,
+                                 TARGET_LONG_BITS == 32 ? MO_UL : MO_Q,
+                                 TCG_REG_R57, addr_reg),
                    tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2,
                                TCG_REG_R2, TCG_AREG0));
     tcg_out_bundle(s, mII,
@@ -1590,10 +1602,6 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
     static const uint64_t opc_ld_m1[4] = {
         OPC_LD1_M1, OPC_LD2_M1, OPC_LD4_M1, OPC_LD8_M1
     };
-    static const uint64_t opc_ext_i29[8] = {
-        OPC_ZXT1_I29, OPC_ZXT2_I29, OPC_ZXT4_I29, 0,
-        OPC_SXT1_I29, OPC_SXT2_I29, OPC_SXT4_I29, 0
-    };
     int addr_reg, data_reg, mem_index;
     TCGMemOp s_bits, bswap;
 
@@ -1657,18 +1665,10 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
                                    TCG_REG_B0, TCG_REG_B6));
     }
 
-    if (s_bits == MO_64) {
-        tcg_out_bundle(s, miI,
-                       INSN_NOP_M,
-                       INSN_NOP_I,
-                       tcg_opc_mov_a(TCG_REG_P0, data_reg, TCG_REG_R8));
-    } else {
-        tcg_out_bundle(s, miI,
-                       INSN_NOP_M,
-                       INSN_NOP_I,
-                       tcg_opc_i29(TCG_REG_P0, opc_ext_i29[opc & MO_SSIZE],
-                                   data_reg, TCG_REG_R8));
-    }
+    tcg_out_bundle(s, miI,
+                   INSN_NOP_M,
+                   INSN_NOP_I,
+                   tcg_opc_ext_i(TCG_REG_P0, opc, data_reg, TCG_REG_R8));
 }
 
 /* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
@@ -1784,9 +1784,6 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
     static uint64_t const opc_ld_m1[4] = {
         OPC_LD1_M1, OPC_LD2_M1, OPC_LD4_M1, OPC_LD8_M1
     };
-    static uint64_t const opc_sxt_i29[4] = {
-        OPC_SXT1_I29, OPC_SXT2_I29, OPC_SXT4_I29, 0
-    };
     int addr_reg, data_reg;
     TCGMemOp s_bits, bswap;
 
@@ -1823,8 +1820,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
                            tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits],
                                        data_reg, TCG_REG_R2),
                            INSN_NOP_I,
-                           tcg_opc_i29(TCG_REG_P0, opc_sxt_i29[s_bits],
-                                       data_reg, data_reg));
+                           tcg_opc_ext_i(TCG_REG_P0, opc, data_reg, data_reg));
         }
     } else if (s_bits == MO_64) {
             tcg_out_bundle(s, mII,
@@ -1860,8 +1856,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
                            INSN_NOP_M,
                            tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
                                        data_reg, data_reg, 0xb),
-                           tcg_opc_i29(TCG_REG_P0, opc_sxt_i29[s_bits],
-                                       data_reg, data_reg));
+                           tcg_opc_ext_i(TCG_REG_P0, opc, data_reg, data_reg));
         }
     }
 #else
@@ -1905,8 +1900,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
         tcg_out_bundle(s, miI,
                        INSN_NOP_M,
                        INSN_NOP_I,
-                       tcg_opc_i29(TCG_REG_P0, opc_sxt_i29[s_bits],
-                                   data_reg, data_reg));
+                       tcg_opc_ext_i(TCG_REG_P0, opc, data_reg, data_reg));
     }
 #endif
 }
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [Qemu-devel] [PATCH 13/20] tcg-ia64: Introduce tcg_opc_bswap64_i
  2013-10-31 20:21 [Qemu-devel] [PATCH 00/20] tcg improvements for ia64 Richard Henderson
                   ` (11 preceding siblings ...)
  2013-10-31 20:22 ` [Qemu-devel] [PATCH 12/20] tcg-ia64: Introduce tcg_opc_ext_i Richard Henderson
@ 2013-10-31 20:22 ` Richard Henderson
  2013-11-06 22:34   ` Aurelien Jarno
  2013-10-31 20:22 ` [Qemu-devel] [PATCH 14/20] tcg-ia64: Re-bundle the tlb load Richard Henderson
                   ` (6 subsequent siblings)
  19 siblings, 1 reply; 39+ messages in thread
From: Richard Henderson @ 2013-10-31 20:22 UTC (permalink / raw
  To: qemu-devel; +Cc: aliguori, aurelien

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/ia64/tcg-target.c | 63 +++++++++++++++++++++++----------------------------
 1 file changed, 28 insertions(+), 35 deletions(-)

diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index ca38608..9ed587a 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -1400,12 +1400,17 @@ static inline void tcg_out_ext(TCGContext *s, uint64_t opc_i29,
                    tcg_opc_i29(TCG_REG_P0, opc_i29, ret, arg));
 }
 
+static inline uint64_t tcg_opc_bswap64_i(int qp, TCGReg d, TCGReg s)
+{
+    return tcg_opc_i3(qp, OPC_MUX1_I3, d, s, 0xb);
+}
+
 static inline void tcg_out_bswap16(TCGContext *s, TCGArg ret, TCGArg arg)
 {
     tcg_out_bundle(s, mII,
                    INSN_NOP_M,
                    tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, ret, arg, 15, 15),
-                   tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3, ret, ret, 0xb));
+                   tcg_opc_bswap64_i(TCG_REG_P0, ret, ret));
 }
 
 static inline void tcg_out_bswap32(TCGContext *s, TCGArg ret, TCGArg arg)
@@ -1413,7 +1418,7 @@ static inline void tcg_out_bswap32(TCGContext *s, TCGArg ret, TCGArg arg)
     tcg_out_bundle(s, mII,
                    INSN_NOP_M,
                    tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, ret, arg, 31, 31),
-                   tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3, ret, ret, 0xb));
+                   tcg_opc_bswap64_i(TCG_REG_P0, ret, ret));
 }
 
 static inline void tcg_out_bswap64(TCGContext *s, TCGArg ret, TCGArg arg)
@@ -1421,7 +1426,7 @@ static inline void tcg_out_bswap64(TCGContext *s, TCGArg ret, TCGArg arg)
     tcg_out_bundle(s, miI,
                    INSN_NOP_M,
                    INSN_NOP_I,
-                   tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3, ret, arg, 0xb));
+                   tcg_opc_bswap64_i(TCG_REG_P0, ret, arg));
 }
 
 static inline void tcg_out_deposit(TCGContext *s, TCGArg ret, TCGArg a1,
@@ -1659,8 +1664,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
     } else {
         tcg_out_bundle(s, miB,
                        tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R58, mem_index),
-                       tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3,
-                                   TCG_REG_R8, TCG_REG_R8, 0xb),
+                       tcg_opc_bswap64_i(TCG_REG_P6, TCG_REG_R8, TCG_REG_R8),
                        tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5,
                                    TCG_REG_B0, TCG_REG_B6));
     }
@@ -1734,8 +1738,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
         tcg_out_bundle(s, miI,
                        tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R58, data_reg),
                        INSN_NOP_I,
-                       tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3,
-                                   TCG_REG_R2, TCG_REG_R2, 0xb));
+                       tcg_opc_bswap64_i(TCG_REG_P6, TCG_REG_R2, TCG_REG_R2));
         data_reg = TCG_REG_R2;
         break;
 
@@ -1749,8 +1752,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
         tcg_out_bundle(s, miI,
                        tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R58, data_reg),
                        INSN_NOP_I,
-                       tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3,
-                                   TCG_REG_R2, TCG_REG_R2, 0xb));
+                       tcg_opc_bswap64_i(TCG_REG_P6, TCG_REG_R2, TCG_REG_R2));
         data_reg = TCG_REG_R2;
         break;
 
@@ -1759,8 +1761,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
                        tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1,
                                    TCG_REG_R1, TCG_REG_R2),
                        tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R58, data_reg),
-                       tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3,
-                                   TCG_REG_R2, data_reg, 0xb));
+                       tcg_opc_bswap64_i(TCG_REG_P6, TCG_REG_R2, data_reg));
         data_reg = TCG_REG_R2;
         break;
 
@@ -1827,8 +1828,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
                            tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits],
                                        data_reg, TCG_REG_R2),
                            INSN_NOP_I,
-                           tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
-                                       data_reg, data_reg, 0xb));
+                           tcg_opc_bswap64_i(TCG_REG_P0, data_reg, data_reg));
     } else {
         if (s_bits == MO_16) {
             tcg_out_bundle(s, mII,
@@ -1849,13 +1849,11 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
             tcg_out_bundle(s, miI,
                            INSN_NOP_M,
                            INSN_NOP_I,
-                           tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
-                                       data_reg, data_reg, 0xb));
+                           tcg_opc_bswap64_i(TCG_REG_P0, data_reg, data_reg));
         } else {
             tcg_out_bundle(s, mII,
                            INSN_NOP_M,
-                           tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
-                                       data_reg, data_reg, 0xb),
+                           tcg_opc_bswap64_i(TCG_REG_P0, data_reg, data_reg),
                            tcg_opc_ext_i(TCG_REG_P0, opc, data_reg, data_reg));
         }
     }
@@ -1880,21 +1878,18 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
                        INSN_NOP_M,
                        tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
                                    data_reg, data_reg, 15, 15),
-                       tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
-                                   data_reg, data_reg, 0xb));
+                       tcg_opc_bswap64_i(TCG_REG_P0, data_reg, data_reg));
     } else if (bswap && s_bits == MO_32) {
         tcg_out_bundle(s, mII,
                        INSN_NOP_M,
                        tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
                                    data_reg, data_reg, 31, 31),
-                       tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
-                                   data_reg, data_reg, 0xb));
+                       tcg_opc_bswap64_i(TCG_REG_P0, data_reg, data_reg));
     } else if (bswap && s_bits == MO_64) {
         tcg_out_bundle(s, miI,
                        INSN_NOP_M,
                        INSN_NOP_I,
-                       tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
-                                   data_reg, data_reg, 0xb));
+                       tcg_opc_bswap64_i(TCG_REG_P0, data_reg, data_reg));
     }
     if (opc & MO_SIGN) {
         tcg_out_bundle(s, miI,
@@ -1944,23 +1939,22 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
                            INSN_NOP_M,
                            tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
                                        TCG_REG_R3, data_reg, 15, 15),
-                           tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
-                                       TCG_REG_R3, TCG_REG_R3, 0xb));
+                           tcg_opc_bswap64_i(TCG_REG_P0,
+                                             TCG_REG_R3, TCG_REG_R3));
             data_reg = TCG_REG_R3;
         } else if (s_bits == MO_32) {
             tcg_out_bundle(s, mII,
                            INSN_NOP_M,
                            tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
                                        TCG_REG_R3, data_reg, 31, 31),
-                           tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
-                                       TCG_REG_R3, TCG_REG_R3, 0xb));
+                           tcg_opc_bswap64_i(TCG_REG_P0,
+                                             TCG_REG_R3, TCG_REG_R3));
             data_reg = TCG_REG_R3;
         } else if (s_bits == MO_64) {
             tcg_out_bundle(s, miI,
                            INSN_NOP_M,
                            INSN_NOP_I,
-                           tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
-                                       TCG_REG_R3, data_reg, 0xb));
+                           tcg_opc_bswap64_i(TCG_REG_P0, TCG_REG_R3, data_reg));
             data_reg = TCG_REG_R3;
         }
     }
@@ -1990,23 +1984,22 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
                            add_guest_base,
                            tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
                                        TCG_REG_R3, data_reg, 15, 15),
-                           tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
-                                       TCG_REG_R3, TCG_REG_R3, 0xb));
+                           tcg_opc_bswap64_i(TCG_REG_P0,
+                                             TCG_REG_R3, TCG_REG_R3));
             data_reg = TCG_REG_R3;
         } else if (s_bits == MO_32) {
             tcg_out_bundle(s, mII,
                            add_guest_base,
                            tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
                                        TCG_REG_R3, data_reg, 31, 31),
-                           tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
-                                       TCG_REG_R3, TCG_REG_R3, 0xb));
+                           tcg_opc_bswap64_i(TCG_REG_P0,
+                                             TCG_REG_R3, TCG_REG_R3));
             data_reg = TCG_REG_R3;
         } else if (s_bits == MO_64) {
             tcg_out_bundle(s, miI,
                            add_guest_base,
                            INSN_NOP_I,
-                           tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
-                                       TCG_REG_R3, data_reg, 0xb));
+                           tcg_opc_bswap64_i(TCG_REG_P0, TCG_REG_R3, data_reg));
             data_reg = TCG_REG_R3;
         }
         tcg_out_bundle(s, miI,
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [Qemu-devel] [PATCH 14/20] tcg-ia64: Re-bundle the tlb load
  2013-10-31 20:21 [Qemu-devel] [PATCH 00/20] tcg improvements for ia64 Richard Henderson
                   ` (12 preceding siblings ...)
  2013-10-31 20:22 ` [Qemu-devel] [PATCH 13/20] tcg-ia64: Introduce tcg_opc_bswap64_i Richard Henderson
@ 2013-10-31 20:22 ` Richard Henderson
  2013-10-31 20:22 ` [Qemu-devel] [PATCH 15/20] tcg-ia64: Move bswap for store into " Richard Henderson
                   ` (5 subsequent siblings)
  19 siblings, 0 replies; 39+ messages in thread
From: Richard Henderson @ 2013-10-31 20:22 UTC (permalink / raw
  To: qemu-devel; +Cc: aliguori, aurelien

This sequencing requires 5 stop bits instead of 6, and has room left
over to pre-load the tlb addend, and bswap data prior to being stored.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/ia64/tcg-target.c | 77 ++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 54 insertions(+), 23 deletions(-)

diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index 9ed587a..b4bb305 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -1558,38 +1558,69 @@ static inline void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGArg ret,
 }
 
 #if defined(CONFIG_SOFTMMU)
+/* We're expecting to use an signed 22-bit immediate add.  */
+QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1])
+                  > 0x1fffff)
+
 /* Load and compare a TLB entry, and return the result in (p6, p7).
    R2 is loaded with the address of the addend TLB entry.
-   R57 is loaded with the address, zero extented on 32-bit targets. */
-static inline void tcg_out_qemu_tlb(TCGContext *s, TCGArg addr_reg,
-                                    TCGMemOp s_bits, uint64_t offset_rw,
-                                    uint64_t offset_addend)
-{
-    tcg_out_bundle(s, mII,
-                   INSN_NOP_M,
-                   tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, TCG_REG_R2,
+   R57 is loaded with the address, zero extented on 32-bit targets.
+   R1, R3 are clobbered. */
+static inline void tcg_out_qemu_tlb(TCGContext *s, TCGReg addr_reg,
+                                    TCGMemOp s_bits, int off_rw, int off_add)
+{
+     /*
+        .mii
+        mov	r2 = off_rw
+        extr.u	r3 = addr_reg, ...		# extract tlb page
+        zxt4	r57 = addr_reg                  # or mov for 64-bit guest
+        ;;
+        .mii
+        addl	r2 = r2, areg0
+        shl	r3 = r3, cteb                   # via dep.z
+        dep	r1 = 0, r57, ...                # zero page ofs, keep align
+        ;;
+        .mmi
+        add	r2 = r2, r3
+        ;;
+        ld4	r3 = [r2], off_add-off_rw	# or ld8 for 64-bit guest
+        nop
+        ;;
+        .mmi
+        nop
+        cmp.eq	p6, p7 = r3, r58
+        nop
+        ;;
+      */
+    tcg_out_bundle(s, miI,
+                   tcg_opc_movi_a(TCG_REG_P0, TCG_REG_R2, off_rw),
+                   tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, TCG_REG_R3,
                                addr_reg, TARGET_PAGE_BITS, CPU_TLB_BITS - 1),
-                   tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, TCG_REG_R2,
-                               TCG_REG_R2, 63 - CPU_TLB_ENTRY_BITS,
-                               63 - CPU_TLB_ENTRY_BITS));
-    tcg_out_bundle(s, mII,
-                   tcg_opc_a5 (TCG_REG_P0, OPC_ADDL_A5, TCG_REG_R2,
-                               offset_rw, TCG_REG_R2),
                    tcg_opc_ext_i(TCG_REG_P0,
                                  TARGET_LONG_BITS == 32 ? MO_UL : MO_Q,
-                                 TCG_REG_R57, addr_reg),
+                                 TCG_REG_R57, addr_reg));
+    tcg_out_bundle(s, miI,
                    tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2,
-                               TCG_REG_R2, TCG_AREG0));
-    tcg_out_bundle(s, mII,
+                               TCG_REG_R2, TCG_AREG0),
+                   tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, TCG_REG_R3,
+                               TCG_REG_R3, 63 - CPU_TLB_ENTRY_BITS,
+                               63 - CPU_TLB_ENTRY_BITS),
+                   tcg_opc_i14(TCG_REG_P0, OPC_DEP_I14, TCG_REG_R1, 0,
+                               TCG_REG_R57, 63 - s_bits,
+                               TARGET_PAGE_BITS - s_bits - 1));
+    tcg_out_bundle(s, MmI,
+                   tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1,
+                               TCG_REG_R2, TCG_REG_R2, TCG_REG_R3),
                    tcg_opc_m3 (TCG_REG_P0,
                                (TARGET_LONG_BITS == 32
-                                ? OPC_LD4_M3 : OPC_LD8_M3), TCG_REG_R56,
-                               TCG_REG_R2, offset_addend - offset_rw),
-                   tcg_opc_i14(TCG_REG_P0, OPC_DEP_I14, TCG_REG_R3, 0,
-                               TCG_REG_R57, 63 - s_bits,
-                               TARGET_PAGE_BITS - s_bits - 1),
+                                ? OPC_LD4_M3 : OPC_LD8_M3), TCG_REG_R3,
+                               TCG_REG_R2, off_add - off_rw),
+                   INSN_NOP_I);
+    tcg_out_bundle(s, mmI,
+                   INSN_NOP_M,
                    tcg_opc_a6 (TCG_REG_P0, OPC_CMP_EQ_A6, TCG_REG_P6,
-                               TCG_REG_P7, TCG_REG_R3, TCG_REG_R56));
+                               TCG_REG_P7, TCG_REG_R1, TCG_REG_R3),
+                   INSN_NOP_I);
 }
 
 /* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [Qemu-devel] [PATCH 15/20] tcg-ia64: Move bswap for store into tlb load
  2013-10-31 20:21 [Qemu-devel] [PATCH 00/20] tcg improvements for ia64 Richard Henderson
                   ` (13 preceding siblings ...)
  2013-10-31 20:22 ` [Qemu-devel] [PATCH 14/20] tcg-ia64: Re-bundle the tlb load Richard Henderson
@ 2013-10-31 20:22 ` Richard Henderson
  2013-10-31 20:22 ` [Qemu-devel] [PATCH 16/20] tcg-ia64: Move tlb addend load into tlb read Richard Henderson
                   ` (4 subsequent siblings)
  19 siblings, 0 replies; 39+ messages in thread
From: Richard Henderson @ 2013-10-31 20:22 UTC (permalink / raw
  To: qemu-devel; +Cc: aliguori, aurelien

Saving at least two cycles per store, and cleaning up the code.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/ia64/tcg-target.c | 96 +++++++++++++++++----------------------------------
 1 file changed, 32 insertions(+), 64 deletions(-)

diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index b4bb305..985e213 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -1565,9 +1565,11 @@ QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1])
 /* Load and compare a TLB entry, and return the result in (p6, p7).
    R2 is loaded with the address of the addend TLB entry.
    R57 is loaded with the address, zero extented on 32-bit targets.
-   R1, R3 are clobbered. */
+   R1, R3 are clobbered, leaving R56 free for...
+   BSWAP_1, BSWAP_2 and I-slot insns for swapping data for store.  */
 static inline void tcg_out_qemu_tlb(TCGContext *s, TCGReg addr_reg,
-                                    TCGMemOp s_bits, int off_rw, int off_add)
+                                    TCGMemOp s_bits, int off_rw, int off_add,
+                                    uint64_t bswap1, uint64_t bswap2)
 {
      /*
         .mii
@@ -1615,12 +1617,12 @@ static inline void tcg_out_qemu_tlb(TCGContext *s, TCGReg addr_reg,
                                (TARGET_LONG_BITS == 32
                                 ? OPC_LD4_M3 : OPC_LD8_M3), TCG_REG_R3,
                                TCG_REG_R2, off_add - off_rw),
-                   INSN_NOP_I);
+                   bswap1);
     tcg_out_bundle(s, mmI,
                    INSN_NOP_M,
                    tcg_opc_a6 (TCG_REG_P0, OPC_CMP_EQ_A6, TCG_REG_P6,
                                TCG_REG_P7, TCG_REG_R1, TCG_REG_R3),
-                   INSN_NOP_I);
+                   bswap2);
 }
 
 /* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
@@ -1650,7 +1652,8 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
     /* Read the TLB entry */
     tcg_out_qemu_tlb(s, addr_reg, s_bits,
                      offsetof(CPUArchState, tlb_table[mem_index][0].addr_read),
-                     offsetof(CPUArchState, tlb_table[mem_index][0].addend));
+                     offsetof(CPUArchState, tlb_table[mem_index][0].addend),
+                     INSN_NOP_I, INSN_NOP_I);
 
     /* P6 is the fast path, and P7 the slow path */
     tcg_out_bundle(s, mLX,
@@ -1721,17 +1724,31 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
     static const uint64_t opc_st_m4[4] = {
         OPC_ST1_M4, OPC_ST2_M4, OPC_ST4_M4, OPC_ST8_M4
     };
-    int addr_reg, data_reg, mem_index;
+    TCGReg addr_reg, data_reg, store_reg;
+    int mem_index;
+    uint64_t bswap1, bswap2;
     TCGMemOp s_bits;
 
-    data_reg = *args++;
+    store_reg = data_reg = *args++;
     addr_reg = *args++;
     mem_index = *args;
     s_bits = opc & MO_SIZE;
 
+    bswap1 = bswap2 = INSN_NOP_I;
+    if (opc & MO_BSWAP) {
+        store_reg = TCG_REG_R56;
+        bswap1 = tcg_opc_bswap64_i(TCG_REG_P0, store_reg, data_reg);
+        if (s_bits < MO_64) {
+            int shift = 64 - (8 << s_bits);
+            bswap2 = tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11,
+                                 store_reg, store_reg, shift, 63 - shift);
+        }
+    }
+
     tcg_out_qemu_tlb(s, addr_reg, s_bits,
                      offsetof(CPUArchState, tlb_table[mem_index][0].addr_write),
-                     offsetof(CPUArchState, tlb_table[mem_index][0].addend));
+                     offsetof(CPUArchState, tlb_table[mem_index][0].addend),
+                     bswap1, bswap2);
 
     /* P6 is the fast path, and P7 the slow path */
     tcg_out_bundle(s, mLX,
@@ -1746,63 +1763,14 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
                                TCG_REG_R3, TCG_REG_R57),
                    tcg_opc_i21(TCG_REG_P7, OPC_MOV_I21, TCG_REG_B6,
                                TCG_REG_R3, 0));
-
-    switch (opc) {
-    case MO_8:
-    case MO_16:
-    case MO_32:
-    case MO_64:
-        tcg_out_bundle(s, mii,
-                       tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1,
-                                   TCG_REG_R1, TCG_REG_R2),
-                       tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R58, data_reg),
-                       INSN_NOP_I);
-        break;
-
-    case MO_16 | MO_BSWAP:
-        tcg_out_bundle(s, miI,
-                       tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1,
-                                   TCG_REG_R1, TCG_REG_R2),
-                       INSN_NOP_I,
-                       tcg_opc_i12(TCG_REG_P6, OPC_DEP_Z_I12,
-                                   TCG_REG_R2, data_reg, 15, 15));
-        tcg_out_bundle(s, miI,
-                       tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R58, data_reg),
-                       INSN_NOP_I,
-                       tcg_opc_bswap64_i(TCG_REG_P6, TCG_REG_R2, TCG_REG_R2));
-        data_reg = TCG_REG_R2;
-        break;
-
-    case MO_32 | MO_BSWAP:
-        tcg_out_bundle(s, miI,
-                       tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1,
-                                   TCG_REG_R1, TCG_REG_R2),
-                       INSN_NOP_I,
-                       tcg_opc_i12(TCG_REG_P6, OPC_DEP_Z_I12,
-                                   TCG_REG_R2, data_reg, 31, 31));
-        tcg_out_bundle(s, miI,
-                       tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R58, data_reg),
-                       INSN_NOP_I,
-                       tcg_opc_bswap64_i(TCG_REG_P6, TCG_REG_R2, TCG_REG_R2));
-        data_reg = TCG_REG_R2;
-        break;
-
-    case MO_64 | MO_BSWAP:
-        tcg_out_bundle(s, miI,
-                       tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1,
-                                   TCG_REG_R1, TCG_REG_R2),
-                       tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R58, data_reg),
-                       tcg_opc_bswap64_i(TCG_REG_P6, TCG_REG_R2, data_reg));
-        data_reg = TCG_REG_R2;
-        break;
-
-    default:
-        tcg_abort();
-    }
-
+    tcg_out_bundle(s, mii,
+                   tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1,
+                               TCG_REG_R1, TCG_REG_R2),
+                   tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R58, data_reg),
+                   INSN_NOP_I);
     tcg_out_bundle(s, miB,
-                   tcg_opc_m4 (TCG_REG_P6, opc_st_m4[opc],
-                               data_reg, TCG_REG_R3),
+                   tcg_opc_m4 (TCG_REG_P6, opc_st_m4[s_bits],
+                               store_reg, TCG_REG_R3),
                    tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R59, mem_index),
                    tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5,
                                TCG_REG_B0, TCG_REG_B6));
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [Qemu-devel] [PATCH 16/20] tcg-ia64: Move tlb addend load into tlb read
  2013-10-31 20:21 [Qemu-devel] [PATCH 00/20] tcg improvements for ia64 Richard Henderson
                   ` (14 preceding siblings ...)
  2013-10-31 20:22 ` [Qemu-devel] [PATCH 15/20] tcg-ia64: Move bswap for store into " Richard Henderson
@ 2013-10-31 20:22 ` Richard Henderson
  2013-10-31 20:22 ` [Qemu-devel] [PATCH 17/20] tcg-ia64: Reduce code duplication in tcg_out_qemu_ld Richard Henderson
                   ` (3 subsequent siblings)
  19 siblings, 0 replies; 39+ messages in thread
From: Richard Henderson @ 2013-10-31 20:22 UTC (permalink / raw
  To: qemu-devel; +Cc: aliguori, aurelien

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/ia64/tcg-target.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index 985e213..b62337e 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -1563,7 +1563,7 @@ QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1])
                   > 0x1fffff)
 
 /* Load and compare a TLB entry, and return the result in (p6, p7).
-   R2 is loaded with the address of the addend TLB entry.
+   R2 is loaded with the addend TLB entry.
    R57 is loaded with the address, zero extented on 32-bit targets.
    R1, R3 are clobbered, leaving R56 free for...
    BSWAP_1, BSWAP_2 and I-slot insns for swapping data for store.  */
@@ -1619,7 +1619,7 @@ static inline void tcg_out_qemu_tlb(TCGContext *s, TCGReg addr_reg,
                                TCG_REG_R2, off_add - off_rw),
                    bswap1);
     tcg_out_bundle(s, mmI,
-                   INSN_NOP_M,
+                   tcg_opc_m1 (TCG_REG_P0, OPC_LD8_M1, TCG_REG_R2, TCG_REG_R2),
                    tcg_opc_a6 (TCG_REG_P0, OPC_CMP_EQ_A6, TCG_REG_P6,
                                TCG_REG_P7, TCG_REG_R1, TCG_REG_R3),
                    bswap2);
@@ -1662,30 +1662,30 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
                    tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R2,
                                (tcg_target_long) qemu_ld_helpers[s_bits]));
     tcg_out_bundle(s, MmI,
-                   tcg_opc_m3 (TCG_REG_P0, OPC_LD8_M3, TCG_REG_R3,
+                   tcg_opc_m3 (TCG_REG_P7, OPC_LD8_M3, TCG_REG_R3,
                                TCG_REG_R2, 8),
-                   tcg_opc_a1 (TCG_REG_P6, OPC_ADD_A1, TCG_REG_R3,
-                               TCG_REG_R3, TCG_REG_R57),
+                   tcg_opc_a1 (TCG_REG_P6, OPC_ADD_A1, TCG_REG_R2,
+                               TCG_REG_R2, TCG_REG_R57),
                    tcg_opc_i21(TCG_REG_P7, OPC_MOV_I21, TCG_REG_B6,
                                TCG_REG_R3, 0));
     if (bswap && s_bits == MO_16) {
         tcg_out_bundle(s, MmI,
                        tcg_opc_m1 (TCG_REG_P6, opc_ld_m1[s_bits],
-                                   TCG_REG_R8, TCG_REG_R3),
+                                   TCG_REG_R8, TCG_REG_R2),
                        tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R2),
                        tcg_opc_i12(TCG_REG_P6, OPC_DEP_Z_I12,
                                    TCG_REG_R8, TCG_REG_R8, 15, 15));
     } else if (bswap && s_bits == MO_32) {
         tcg_out_bundle(s, MmI,
                        tcg_opc_m1 (TCG_REG_P6, opc_ld_m1[s_bits],
-                                   TCG_REG_R8, TCG_REG_R3),
+                                   TCG_REG_R8, TCG_REG_R2),
                        tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R2),
                        tcg_opc_i12(TCG_REG_P6, OPC_DEP_Z_I12,
                                    TCG_REG_R8, TCG_REG_R8, 31, 31));
     } else {
         tcg_out_bundle(s, mmI,
                        tcg_opc_m1 (TCG_REG_P6, opc_ld_m1[s_bits],
-                                   TCG_REG_R8, TCG_REG_R3),
+                                   TCG_REG_R8, TCG_REG_R2),
                        tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R2),
                        INSN_NOP_I);
     }
@@ -1757,10 +1757,10 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
                    tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R2,
                                (tcg_target_long) qemu_st_helpers[s_bits]));
     tcg_out_bundle(s, MmI,
-                   tcg_opc_m3 (TCG_REG_P0, OPC_LD8_M3, TCG_REG_R3,
+                   tcg_opc_m3 (TCG_REG_P7, OPC_LD8_M3, TCG_REG_R3,
                                TCG_REG_R2, 8),
-                   tcg_opc_a1 (TCG_REG_P6, OPC_ADD_A1, TCG_REG_R3,
-                               TCG_REG_R3, TCG_REG_R57),
+                   tcg_opc_a1 (TCG_REG_P6, OPC_ADD_A1, TCG_REG_R2,
+                               TCG_REG_R2, TCG_REG_R57),
                    tcg_opc_i21(TCG_REG_P7, OPC_MOV_I21, TCG_REG_B6,
                                TCG_REG_R3, 0));
     tcg_out_bundle(s, mii,
@@ -1770,7 +1770,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
                    INSN_NOP_I);
     tcg_out_bundle(s, miB,
                    tcg_opc_m4 (TCG_REG_P6, opc_st_m4[s_bits],
-                               store_reg, TCG_REG_R3),
+                               store_reg, TCG_REG_R2),
                    tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R59, mem_index),
                    tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5,
                                TCG_REG_B0, TCG_REG_B6));
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [Qemu-devel] [PATCH 17/20] tcg-ia64: Reduce code duplication in tcg_out_qemu_ld
  2013-10-31 20:21 [Qemu-devel] [PATCH 00/20] tcg improvements for ia64 Richard Henderson
                   ` (15 preceding siblings ...)
  2013-10-31 20:22 ` [Qemu-devel] [PATCH 16/20] tcg-ia64: Move tlb addend load into tlb read Richard Henderson
@ 2013-10-31 20:22 ` Richard Henderson
  2013-10-31 20:22 ` [Qemu-devel] [PATCH 18/20] tcg-ia64: Convert to new ldst helpers Richard Henderson
                   ` (2 subsequent siblings)
  19 siblings, 0 replies; 39+ messages in thread
From: Richard Henderson @ 2013-10-31 20:22 UTC (permalink / raw
  To: qemu-devel; +Cc: aliguori, aurelien

The only differences were in the bswap insns emitted.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/ia64/tcg-target.c | 60 ++++++++++++++++++++-------------------------------
 1 file changed, 23 insertions(+), 37 deletions(-)

diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index b62337e..a7325ac 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -1641,13 +1641,13 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
         OPC_LD1_M1, OPC_LD2_M1, OPC_LD4_M1, OPC_LD8_M1
     };
     int addr_reg, data_reg, mem_index;
-    TCGMemOp s_bits, bswap;
+    TCGMemOp s_bits;
+    uint64_t bswap1, bswap2;
 
     data_reg = *args++;
     addr_reg = *args++;
     mem_index = *args;
     s_bits = opc & MO_SIZE;
-    bswap = opc & MO_BSWAP;
 
     /* Read the TLB entry */
     tcg_out_qemu_tlb(s, addr_reg, s_bits,
@@ -1656,6 +1656,17 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
                      INSN_NOP_I, INSN_NOP_I);
 
     /* P6 is the fast path, and P7 the slow path */
+
+    bswap1 = bswap2 = INSN_NOP_I;
+    if (opc & MO_BSWAP) {
+        bswap1 = tcg_opc_bswap64_i(TCG_REG_P6, TCG_REG_R8, TCG_REG_R8);
+        if (s_bits < MO_64) {
+            int shift = 64 - (8 << s_bits);
+            bswap2 = tcg_opc_i11(TCG_REG_P6, OPC_EXTR_U_I11,
+                                 TCG_REG_R8, TCG_REG_R8, shift, 63 - shift);
+        }
+    }
+
     tcg_out_bundle(s, mLX,
                    tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R56, TCG_AREG0),
                    tcg_opc_l2 ((tcg_target_long) qemu_ld_helpers[s_bits]),
@@ -1668,41 +1679,16 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
                                TCG_REG_R2, TCG_REG_R57),
                    tcg_opc_i21(TCG_REG_P7, OPC_MOV_I21, TCG_REG_B6,
                                TCG_REG_R3, 0));
-    if (bswap && s_bits == MO_16) {
-        tcg_out_bundle(s, MmI,
-                       tcg_opc_m1 (TCG_REG_P6, opc_ld_m1[s_bits],
-                                   TCG_REG_R8, TCG_REG_R2),
-                       tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R2),
-                       tcg_opc_i12(TCG_REG_P6, OPC_DEP_Z_I12,
-                                   TCG_REG_R8, TCG_REG_R8, 15, 15));
-    } else if (bswap && s_bits == MO_32) {
-        tcg_out_bundle(s, MmI,
-                       tcg_opc_m1 (TCG_REG_P6, opc_ld_m1[s_bits],
-                                   TCG_REG_R8, TCG_REG_R2),
-                       tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R2),
-                       tcg_opc_i12(TCG_REG_P6, OPC_DEP_Z_I12,
-                                   TCG_REG_R8, TCG_REG_R8, 31, 31));
-    } else {
-        tcg_out_bundle(s, mmI,
-                       tcg_opc_m1 (TCG_REG_P6, opc_ld_m1[s_bits],
-                                   TCG_REG_R8, TCG_REG_R2),
-                       tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R2),
-                       INSN_NOP_I);
-    }
-    if (!bswap) {
-        tcg_out_bundle(s, miB,
-                       tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R58, mem_index),
-                       INSN_NOP_I,
-                       tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5,
-                                   TCG_REG_B0, TCG_REG_B6));
-    } else {
-        tcg_out_bundle(s, miB,
-                       tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R58, mem_index),
-                       tcg_opc_bswap64_i(TCG_REG_P6, TCG_REG_R8, TCG_REG_R8),
-                       tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5,
-                                   TCG_REG_B0, TCG_REG_B6));
-    }
-
+    tcg_out_bundle(s, MmI,
+                   tcg_opc_m1 (TCG_REG_P6, opc_ld_m1[s_bits],
+                               TCG_REG_R8, TCG_REG_R2),
+                   tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R2),
+                   bswap1);
+    tcg_out_bundle(s, miB,
+                   tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R58, mem_index),
+                   bswap2,
+                   tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5,
+                               TCG_REG_B0, TCG_REG_B6));
     tcg_out_bundle(s, miI,
                    INSN_NOP_M,
                    INSN_NOP_I,
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [Qemu-devel] [PATCH 18/20] tcg-ia64: Convert to new ldst helpers
  2013-10-31 20:21 [Qemu-devel] [PATCH 00/20] tcg improvements for ia64 Richard Henderson
                   ` (16 preceding siblings ...)
  2013-10-31 20:22 ` [Qemu-devel] [PATCH 17/20] tcg-ia64: Reduce code duplication in tcg_out_qemu_ld Richard Henderson
@ 2013-10-31 20:22 ` Richard Henderson
  2013-10-31 20:22 ` [Qemu-devel] [PATCH 19/20] tcg-ia64: Move part of softmmu slow path out of line Richard Henderson
  2013-10-31 20:22 ` [Qemu-devel] [PATCH 20/20] tcg-ia64: Optimize small arguments to exit_tb Richard Henderson
  19 siblings, 0 replies; 39+ messages in thread
From: Richard Henderson @ 2013-10-31 20:22 UTC (permalink / raw
  To: qemu-devel; +Cc: aliguori, aurelien

Still inline, but updated to the new routines.  Always use the LE
helpers, reusing the bswap between the fast and slot paths.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/ia64/tcg-target.c | 140 ++++++++++++++++++++++++++++----------------------
 1 file changed, 79 insertions(+), 61 deletions(-)

diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index a7325ac..c881a67 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -225,6 +225,7 @@ enum {
     OPC_BR_CALL_SPTK_MANY_B5  = 0x02100001000ull,
     OPC_BR_RET_SPTK_MANY_B4   = 0x00108001100ull,
     OPC_BRL_SPTK_MANY_X3      = 0x18000001000ull,
+    OPC_BRL_CALL_SPNT_MANY_X4 = 0x1a200001000ull,
     OPC_BRL_CALL_SPTK_MANY_X4 = 0x1a000001000ull,
     OPC_CMP_LT_A6             = 0x18000000000ull,
     OPC_CMP_LTU_A6            = 0x1a000000000ull,
@@ -815,6 +816,7 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
 #if defined(CONFIG_SOFTMMU)
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_R56);
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_R57);
+        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R58);
 #endif
         break;
     case 'Z':
@@ -1626,12 +1628,12 @@ static inline void tcg_out_qemu_tlb(TCGContext *s, TCGReg addr_reg,
 }
 
 /* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
-   int mmu_idx) */
+   int mmu_idx, uintptr_t retaddr) */
 static const void * const qemu_ld_helpers[4] = {
-    helper_ldb_mmu,
-    helper_ldw_mmu,
-    helper_ldl_mmu,
-    helper_ldq_mmu,
+    helper_ret_ldub_mmu,
+    helper_le_lduw_mmu,
+    helper_le_ldul_mmu,
+    helper_le_ldq_mmu,
 };
 
 static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
@@ -1642,7 +1644,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
     };
     int addr_reg, data_reg, mem_index;
     TCGMemOp s_bits;
-    uint64_t bswap1, bswap2;
+    uint64_t fin1, fin2, *desc, func, gp, here;
 
     data_reg = *args++;
     addr_reg = *args++;
@@ -1657,51 +1659,59 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
 
     /* P6 is the fast path, and P7 the slow path */
 
-    bswap1 = bswap2 = INSN_NOP_I;
+    fin2 = 0;
     if (opc & MO_BSWAP) {
-        bswap1 = tcg_opc_bswap64_i(TCG_REG_P6, TCG_REG_R8, TCG_REG_R8);
+        fin1 = tcg_opc_bswap64_i(TCG_REG_P0, data_reg, TCG_REG_R8);
         if (s_bits < MO_64) {
             int shift = 64 - (8 << s_bits);
-            bswap2 = tcg_opc_i11(TCG_REG_P6, OPC_EXTR_U_I11,
-                                 TCG_REG_R8, TCG_REG_R8, shift, 63 - shift);
+            fin2 = tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11,
+                               data_reg, data_reg, shift, 63 - shift);
         }
+    } else {
+        fin1 = tcg_opc_ext_i(TCG_REG_P0, opc, data_reg, TCG_REG_R8);
     }
 
-    tcg_out_bundle(s, mLX,
+    desc = (uintptr_t *)qemu_ld_helpers[s_bits];
+    func = desc[0];
+    gp = desc[1];
+    here = (uintptr_t)s->code_ptr;
+
+    tcg_out_bundle(s, mlx,
                    tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R56, TCG_AREG0),
-                   tcg_opc_l2 ((tcg_target_long) qemu_ld_helpers[s_bits]),
-                   tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R2,
-                               (tcg_target_long) qemu_ld_helpers[s_bits]));
-    tcg_out_bundle(s, MmI,
-                   tcg_opc_m3 (TCG_REG_P7, OPC_LD8_M3, TCG_REG_R3,
-                               TCG_REG_R2, 8),
+                   tcg_opc_l2 (here),
+                   tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R59, here));
+    tcg_out_bundle(s, mLX,
                    tcg_opc_a1 (TCG_REG_P6, OPC_ADD_A1, TCG_REG_R2,
                                TCG_REG_R2, TCG_REG_R57),
-                   tcg_opc_i21(TCG_REG_P7, OPC_MOV_I21, TCG_REG_B6,
-                               TCG_REG_R3, 0));
-    tcg_out_bundle(s, MmI,
+                   tcg_opc_l2 (gp),
+                   tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R1, gp));
+    tcg_out_bundle(s, mmi,
                    tcg_opc_m1 (TCG_REG_P6, opc_ld_m1[s_bits],
                                TCG_REG_R8, TCG_REG_R2),
-                   tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R2),
-                   bswap1);
-    tcg_out_bundle(s, miB,
                    tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R58, mem_index),
-                   bswap2,
-                   tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5,
-                               TCG_REG_B0, TCG_REG_B6));
-    tcg_out_bundle(s, miI,
+                   INSN_NOP_I);
+    func -= (uintptr_t)s->code_ptr;
+    tcg_out_bundle(s, mLX,
                    INSN_NOP_M,
-                   INSN_NOP_I,
-                   tcg_opc_ext_i(TCG_REG_P0, opc, data_reg, TCG_REG_R8));
+                   tcg_opc_l4 (func >> 4),
+                   tcg_opc_x4 (TCG_REG_P7, OPC_BRL_CALL_SPNT_MANY_X4,
+                               TCG_REG_B0, func >> 4));
+
+    /* Note that we always use LE helper functions, so the bswap insns
+       here for the fast path also apply to the slow path.  */
+    tcg_out_bundle(s, (fin2 ? mII : miI),
+                   INSN_NOP_M,
+                   fin1,
+                   fin2 ? fin2 : INSN_NOP_I);
 }
 
 /* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
-   uintxx_t val, int mmu_idx) */
+   uintxx_t val, int mmu_idx, uintptr_t retaddr) */
 static const void * const qemu_st_helpers[4] = {
-    helper_stb_mmu,
-    helper_stw_mmu,
-    helper_stl_mmu,
-    helper_stq_mmu,
+    helper_ret_stb_mmu,
+    helper_le_stw_mmu,
+    helper_le_stl_mmu,
+    helper_le_stq_mmu,
 };
 
 static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
@@ -1710,56 +1720,64 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
     static const uint64_t opc_st_m4[4] = {
         OPC_ST1_M4, OPC_ST2_M4, OPC_ST4_M4, OPC_ST8_M4
     };
-    TCGReg addr_reg, data_reg, store_reg;
+    TCGReg addr_reg, data_reg;
     int mem_index;
-    uint64_t bswap1, bswap2;
+    uint64_t pre1, pre2, *desc, func, gp, here;
     TCGMemOp s_bits;
 
-    store_reg = data_reg = *args++;
+    data_reg = *args++;
     addr_reg = *args++;
     mem_index = *args;
     s_bits = opc & MO_SIZE;
 
-    bswap1 = bswap2 = INSN_NOP_I;
+    /* Note that we always use LE helper functions, so the bswap insns
+       that are here for the fast path also apply to the slow path,
+       and move the data into the argument register.  */
+    pre2 = INSN_NOP_I;
     if (opc & MO_BSWAP) {
-        store_reg = TCG_REG_R56;
-        bswap1 = tcg_opc_bswap64_i(TCG_REG_P0, store_reg, data_reg);
+        pre1 = tcg_opc_bswap64_i(TCG_REG_P0, TCG_REG_R58, data_reg);
         if (s_bits < MO_64) {
             int shift = 64 - (8 << s_bits);
-            bswap2 = tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11,
-                                 store_reg, store_reg, shift, 63 - shift);
+            pre2 = tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11,
+                               TCG_REG_R58, TCG_REG_R58, shift, 63 - shift);
         }
+    } else {
+        /* Just move the data into place for the slow path.  */
+        pre1 = tcg_opc_ext_i(TCG_REG_P0, opc, TCG_REG_R58, data_reg);
     }
 
     tcg_out_qemu_tlb(s, addr_reg, s_bits,
                      offsetof(CPUArchState, tlb_table[mem_index][0].addr_write),
                      offsetof(CPUArchState, tlb_table[mem_index][0].addend),
-                     bswap1, bswap2);
+                     pre1, pre2);
 
     /* P6 is the fast path, and P7 the slow path */
-    tcg_out_bundle(s, mLX,
+
+    desc = (uintptr_t *)qemu_st_helpers[s_bits];
+    func = desc[0];
+    gp = desc[1];
+    here = (uintptr_t)s->code_ptr;
+
+    tcg_out_bundle(s, mlx,
                    tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R56, TCG_AREG0),
-                   tcg_opc_l2 ((tcg_target_long) qemu_st_helpers[s_bits]),
-                   tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R2,
-                               (tcg_target_long) qemu_st_helpers[s_bits]));
-    tcg_out_bundle(s, MmI,
-                   tcg_opc_m3 (TCG_REG_P7, OPC_LD8_M3, TCG_REG_R3,
-                               TCG_REG_R2, 8),
+                   tcg_opc_l2 (here),
+                   tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R60, here));
+    tcg_out_bundle(s, mLX,
                    tcg_opc_a1 (TCG_REG_P6, OPC_ADD_A1, TCG_REG_R2,
                                TCG_REG_R2, TCG_REG_R57),
-                   tcg_opc_i21(TCG_REG_P7, OPC_MOV_I21, TCG_REG_B6,
-                               TCG_REG_R3, 0));
-    tcg_out_bundle(s, mii,
-                   tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1,
-                               TCG_REG_R1, TCG_REG_R2),
-                   tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R58, data_reg),
-                   INSN_NOP_I);
-    tcg_out_bundle(s, miB,
+                   tcg_opc_l2 (gp),
+                   tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R1, gp));
+    tcg_out_bundle(s, mmi,
                    tcg_opc_m4 (TCG_REG_P6, opc_st_m4[s_bits],
-                               store_reg, TCG_REG_R2),
+                               TCG_REG_R58, TCG_REG_R2),
                    tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R59, mem_index),
-                   tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5,
-                               TCG_REG_B0, TCG_REG_B6));
+                   INSN_NOP_I);
+    func -= (uintptr_t)s->code_ptr;
+    tcg_out_bundle(s, mLX,
+                   INSN_NOP_M,
+                   tcg_opc_l4 (func >> 4),
+                   tcg_opc_x4 (TCG_REG_P7, OPC_BRL_CALL_SPNT_MANY_X4,
+                               TCG_REG_B0, func >> 4));
 }
 
 #else /* !CONFIG_SOFTMMU */
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [Qemu-devel] [PATCH 19/20] tcg-ia64: Move part of softmmu slow path out of line
  2013-10-31 20:21 [Qemu-devel] [PATCH 00/20] tcg improvements for ia64 Richard Henderson
                   ` (17 preceding siblings ...)
  2013-10-31 20:22 ` [Qemu-devel] [PATCH 18/20] tcg-ia64: Convert to new ldst helpers Richard Henderson
@ 2013-10-31 20:22 ` Richard Henderson
  2013-10-31 20:22 ` [Qemu-devel] [PATCH 20/20] tcg-ia64: Optimize small arguments to exit_tb Richard Henderson
  19 siblings, 0 replies; 39+ messages in thread
From: Richard Henderson @ 2013-10-31 20:22 UTC (permalink / raw
  To: qemu-devel; +Cc: aliguori, aurelien

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/ia64/tcg-target.c | 176 ++++++++++++++++++++++++++++++++------------------
 1 file changed, 114 insertions(+), 62 deletions(-)

diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index c881a67..a9dd153 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -23,8 +23,6 @@
  * THE SOFTWARE.
  */
 
-#include "tcg-be-null.h"
-
 /*
  * Register definitions
  */
@@ -221,6 +219,7 @@ enum {
     OPC_ALLOC_M34             = 0x02c00000000ull,
     OPC_BR_DPTK_FEW_B1        = 0x08400000000ull,
     OPC_BR_SPTK_MANY_B1       = 0x08000001000ull,
+    OPC_BR_CALL_SPNT_FEW_B3   = 0x0a200000000ull,
     OPC_BR_SPTK_MANY_B4       = 0x00100001000ull,
     OPC_BR_CALL_SPTK_MANY_B5  = 0x02100001000ull,
     OPC_BR_RET_SPTK_MANY_B4   = 0x00108001100ull,
@@ -357,6 +356,15 @@ static inline uint64_t tcg_opc_b1(int qp, uint64_t opc, uint64_t imm)
            | (qp & 0x3f);
 }
 
+static inline uint64_t tcg_opc_b3(int qp, uint64_t opc, int b1, uint64_t imm)
+{
+    return opc
+           | ((imm & 0x100000) << 16) /* s */
+           | ((imm & 0x0fffff) << 13) /* imm20b */
+           | ((b1 & 0x7) << 6)
+           | (qp & 0x3f);
+}
+
 static inline uint64_t tcg_opc_b4(int qp, uint64_t opc, int b2)
 {
     return opc
@@ -1627,14 +1635,87 @@ static inline void tcg_out_qemu_tlb(TCGContext *s, TCGReg addr_reg,
                    bswap2);
 }
 
-/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
-   int mmu_idx, uintptr_t retaddr) */
-static const void * const qemu_ld_helpers[4] = {
-    helper_ret_ldub_mmu,
-    helper_le_lduw_mmu,
-    helper_le_ldul_mmu,
-    helper_le_ldq_mmu,
-};
+#define TCG_MAX_QEMU_LDST       640
+
+typedef struct TCGLabelQemuLdst {
+    bool is_ld;
+    TCGMemOp size;
+    uint8_t *label_ptr;     /* label pointers to be updated */
+} TCGLabelQemuLdst;
+
+typedef struct TCGBackendData {
+    int nb_ldst_labels;
+    TCGLabelQemuLdst ldst_labels[TCG_MAX_QEMU_LDST];
+} TCGBackendData;
+
+static inline void tcg_out_tb_init(TCGContext *s)
+{
+    s->be->nb_ldst_labels = 0;
+}
+
+static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOp opc,
+                                uint8_t *label_ptr)
+{
+    TCGBackendData *be = s->be;
+    TCGLabelQemuLdst *l = &be->ldst_labels[be->nb_ldst_labels++];
+
+    assert(be->nb_ldst_labels <= TCG_MAX_QEMU_LDST);
+    l->is_ld = is_ld;
+    l->size = opc & MO_SIZE;
+    l->label_ptr = label_ptr;
+}
+
+static void tcg_out_tb_finalize(TCGContext *s)
+{
+    static const void * const helpers[8] = {
+        helper_ret_stb_mmu,
+        helper_le_stw_mmu,
+        helper_le_stl_mmu,
+        helper_le_stq_mmu,
+        helper_ret_ldub_mmu,
+        helper_le_lduw_mmu,
+        helper_le_ldul_mmu,
+        helper_le_ldq_mmu,
+    };
+    uintptr_t thunks[8] = { };
+    TCGBackendData *be = s->be;
+    size_t i, n = be->nb_ldst_labels;
+
+    for (i = 0; i < n; i++) {
+        TCGLabelQemuLdst *l = &be->ldst_labels[i];
+        long x = l->is_ld * 4 + l->size;
+        uintptr_t dest = thunks[x];
+
+        /* The out-of-line thunks are all the same; load the return address
+           from B0, load the GP, and branch to the code.  Note that we are
+           always post-call, so the register window has rolled, so we're
+           using incomming parameter register numbers, not outgoing.  */
+        if (dest == 0) {
+            uintptr_t disp, *desc = (uintptr_t *)helpers[x];
+
+            thunks[x] = dest = (uintptr_t)s->code_ptr;
+
+            tcg_out_bundle(s, mlx,
+                           INSN_NOP_M,
+                           tcg_opc_l2 (desc[1]),
+                           tcg_opc_x2 (TCG_REG_P0, OPC_MOVL_X2,
+                                       TCG_REG_R1, desc[1]));
+            tcg_out_bundle(s, mii,
+                           INSN_NOP_M,
+                           INSN_NOP_I,
+                           tcg_opc_i22(TCG_REG_P0, OPC_MOV_I22,
+                                       l->is_ld ? TCG_REG_R35 : TCG_REG_R36,
+                                       TCG_REG_B0));
+            disp = (desc[0] - (uintptr_t)s->code_ptr) >> 4;
+            tcg_out_bundle(s, mLX,
+                           INSN_NOP_M,
+                           tcg_opc_l3 (disp),
+                           tcg_opc_x3 (TCG_REG_P0, OPC_BRL_SPTK_MANY_X3, disp));
+        }
+
+        reloc_pcrel21b(l->label_ptr, dest);
+    }
+}
 
 static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
                                    TCGMemOp opc)
@@ -1644,7 +1725,8 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
     };
     int addr_reg, data_reg, mem_index;
     TCGMemOp s_bits;
-    uint64_t fin1, fin2, *desc, func, gp, here;
+    uint64_t fin1, fin2;
+    uint8_t *label_ptr;
 
     data_reg = *args++;
     addr_reg = *args++;
@@ -1671,31 +1753,20 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
         fin1 = tcg_opc_ext_i(TCG_REG_P0, opc, data_reg, TCG_REG_R8);
     }
 
-    desc = (uintptr_t *)qemu_ld_helpers[s_bits];
-    func = desc[0];
-    gp = desc[1];
-    here = (uintptr_t)s->code_ptr;
-
-    tcg_out_bundle(s, mlx,
+    tcg_out_bundle(s, mmI,
                    tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R56, TCG_AREG0),
-                   tcg_opc_l2 (here),
-                   tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R59, here));
-    tcg_out_bundle(s, mLX,
                    tcg_opc_a1 (TCG_REG_P6, OPC_ADD_A1, TCG_REG_R2,
                                TCG_REG_R2, TCG_REG_R57),
-                   tcg_opc_l2 (gp),
-                   tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R1, gp));
-    tcg_out_bundle(s, mmi,
+                   tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R58, mem_index));
+    label_ptr = s->code_ptr + 2;
+    tcg_out_bundle(s, miB,
                    tcg_opc_m1 (TCG_REG_P6, opc_ld_m1[s_bits],
                                TCG_REG_R8, TCG_REG_R2),
-                   tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R58, mem_index),
-                   INSN_NOP_I);
-    func -= (uintptr_t)s->code_ptr;
-    tcg_out_bundle(s, mLX,
-                   INSN_NOP_M,
-                   tcg_opc_l4 (func >> 4),
-                   tcg_opc_x4 (TCG_REG_P7, OPC_BRL_CALL_SPNT_MANY_X4,
-                               TCG_REG_B0, func >> 4));
+                   INSN_NOP_I,
+                   tcg_opc_b3 (TCG_REG_P7, OPC_BR_CALL_SPNT_FEW_B3, TCG_REG_B0,
+                               get_reloc_pcrel21b(label_ptr)));
+
+    add_qemu_ldst_label(s, 1, opc, label_ptr);
 
     /* Note that we always use LE helper functions, so the bswap insns
        here for the fast path also apply to the slow path.  */
@@ -1705,15 +1776,6 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
                    fin2 ? fin2 : INSN_NOP_I);
 }
 
-/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
-   uintxx_t val, int mmu_idx, uintptr_t retaddr) */
-static const void * const qemu_st_helpers[4] = {
-    helper_ret_stb_mmu,
-    helper_le_stw_mmu,
-    helper_le_stl_mmu,
-    helper_le_stq_mmu,
-};
-
 static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
                                    TCGMemOp opc)
 {
@@ -1722,8 +1784,9 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
     };
     TCGReg addr_reg, data_reg;
     int mem_index;
-    uint64_t pre1, pre2, *desc, func, gp, here;
+    uint64_t pre1, pre2;
     TCGMemOp s_bits;
+    uint8_t *label_ptr;
 
     data_reg = *args++;
     addr_reg = *args++;
@@ -1752,35 +1815,24 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
                      pre1, pre2);
 
     /* P6 is the fast path, and P7 the slow path */
-
-    desc = (uintptr_t *)qemu_st_helpers[s_bits];
-    func = desc[0];
-    gp = desc[1];
-    here = (uintptr_t)s->code_ptr;
-
-    tcg_out_bundle(s, mlx,
+    tcg_out_bundle(s, mmI,
                    tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R56, TCG_AREG0),
-                   tcg_opc_l2 (here),
-                   tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R60, here));
-    tcg_out_bundle(s, mLX,
                    tcg_opc_a1 (TCG_REG_P6, OPC_ADD_A1, TCG_REG_R2,
                                TCG_REG_R2, TCG_REG_R57),
-                   tcg_opc_l2 (gp),
-                   tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R1, gp));
-    tcg_out_bundle(s, mmi,
+                   tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R59, mem_index));
+    label_ptr = s->code_ptr + 2;
+    tcg_out_bundle(s, miB,
                    tcg_opc_m4 (TCG_REG_P6, opc_st_m4[s_bits],
                                TCG_REG_R58, TCG_REG_R2),
-                   tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R59, mem_index),
-                   INSN_NOP_I);
-    func -= (uintptr_t)s->code_ptr;
-    tcg_out_bundle(s, mLX,
-                   INSN_NOP_M,
-                   tcg_opc_l4 (func >> 4),
-                   tcg_opc_x4 (TCG_REG_P7, OPC_BRL_CALL_SPNT_MANY_X4,
-                               TCG_REG_B0, func >> 4));
+                   INSN_NOP_I,
+                   tcg_opc_b3 (TCG_REG_P7, OPC_BR_CALL_SPNT_FEW_B3, TCG_REG_B0,
+                               get_reloc_pcrel21b(label_ptr)));
+
+    add_qemu_ldst_label(s, 0, opc, label_ptr);
 }
 
 #else /* !CONFIG_SOFTMMU */
+# include "tcg-be-null.h"
 
 static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
                                    TCGMemOp opc)
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [Qemu-devel] [PATCH 20/20] tcg-ia64: Optimize small arguments to exit_tb
  2013-10-31 20:21 [Qemu-devel] [PATCH 00/20] tcg improvements for ia64 Richard Henderson
                   ` (18 preceding siblings ...)
  2013-10-31 20:22 ` [Qemu-devel] [PATCH 19/20] tcg-ia64: Move part of softmmu slow path out of line Richard Henderson
@ 2013-10-31 20:22 ` Richard Henderson
  19 siblings, 0 replies; 39+ messages in thread
From: Richard Henderson @ 2013-10-31 20:22 UTC (permalink / raw
  To: qemu-devel; +Cc: aliguori, aurelien

Saves one bundle for the common case of exit_tb 0.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/ia64/tcg-target.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index a9dd153..6483fa0 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -960,15 +960,21 @@ static inline void tcg_out_callr(TCGContext *s, TCGReg addr)
 static void tcg_out_exit_tb(TCGContext *s, tcg_target_long arg)
 {
     int64_t disp;
-    uint64_t imm;
+    uint64_t imm, opc1;
 
-    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R8, arg);
+    /* At least arg == 0 is a common operation.  */
+    if (arg == sextract64(arg, 0, 22)) {
+        opc1 = tcg_opc_movi_a(TCG_REG_P0, TCG_REG_R8, arg);
+    } else {
+        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R8, arg);
+        opc1 = INSN_NOP_M;
+    }
 
     disp = tb_ret_addr - s->code_ptr;
     imm = (uint64_t)disp >> 4;
 
     tcg_out_bundle(s, mLX,
-                   INSN_NOP_M,
+                   opc1,
                    tcg_opc_l3 (imm),
                    tcg_opc_x3 (TCG_REG_P0, OPC_BRL_SPTK_MANY_X3, imm));
 }
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* Re: [Qemu-devel] [PATCH 01/20] tcg-ia64: Use TCGMemOp within qemu_ldst routines
  2013-10-31 20:21 ` [Qemu-devel] [PATCH 01/20] tcg-ia64: Use TCGMemOp within qemu_ldst routines Richard Henderson
@ 2013-11-03 22:55   ` Aurelien Jarno
  2013-11-06 22:05     ` Aurelien Jarno
  0 siblings, 1 reply; 39+ messages in thread
From: Aurelien Jarno @ 2013-11-03 22:55 UTC (permalink / raw
  To: Richard Henderson; +Cc: qemu-devel, aliguori

On Thu, Oct 31, 2013 at 01:21:50PM -0700, Richard Henderson wrote:
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  tcg/ia64/tcg-target.c | 171 ++++++++++++++++++++++++++------------------------
>  1 file changed, 90 insertions(+), 81 deletions(-)
> 
> diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
> index 0656d39..e09ad24 100644
> --- a/tcg/ia64/tcg-target.c
> +++ b/tcg/ia64/tcg-target.c
> @@ -1496,7 +1496,7 @@ static inline void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGArg ret,
>     R2 is loaded with the address of the addend TLB entry.
>     R57 is loaded with the address, zero extented on 32-bit targets. */
>  static inline void tcg_out_qemu_tlb(TCGContext *s, TCGArg addr_reg,
> -                                    int s_bits, uint64_t offset_rw,
> +                                    TCGMemOp s_bits, uint64_t offset_rw,
>                                      uint64_t offset_addend)
>  {
>      tcg_out_bundle(s, mII,
> @@ -1538,23 +1538,24 @@ static const void * const qemu_ld_helpers[4] = {
>      helper_ldq_mmu,
>  };
>  
> -static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
> +static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
> +                                   TCGMemOp opc)
>  {
> -    int addr_reg, data_reg, mem_index, s_bits, bswap;
> -    uint64_t opc_ld_m1[4] = { OPC_LD1_M1, OPC_LD2_M1, OPC_LD4_M1, OPC_LD8_M1 };
> -    uint64_t opc_ext_i29[8] = { OPC_ZXT1_I29, OPC_ZXT2_I29, OPC_ZXT4_I29, 0,
> -                                OPC_SXT1_I29, OPC_SXT2_I29, OPC_SXT4_I29, 0 };
> +    static const uint64_t opc_ld_m1[4] = {
> +        OPC_LD1_M1, OPC_LD2_M1, OPC_LD4_M1, OPC_LD8_M1
> +    };
> +    static const uint64_t opc_ext_i29[8] = {
> +        OPC_ZXT1_I29, OPC_ZXT2_I29, OPC_ZXT4_I29, 0,
> +        OPC_SXT1_I29, OPC_SXT2_I29, OPC_SXT4_I29, 0
> +    };
> +    int addr_reg, data_reg, mem_index;
> +    TCGMemOp s_bits, bswap;
>  
>      data_reg = *args++;
>      addr_reg = *args++;
>      mem_index = *args;
> -    s_bits = opc & 3;
> -
> -#ifdef TARGET_WORDS_BIGENDIAN
> -    bswap = 1;
> -#else
> -    bswap = 0;
> -#endif
> +    s_bits = opc & MO_SIZE;
> +    bswap = opc & MO_BSWAP;
>  
>      /* Read the TLB entry */
>      tcg_out_qemu_tlb(s, addr_reg, s_bits,
> @@ -1575,14 +1576,14 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
>                                 TCG_REG_R3, TCG_REG_R57),
>                     tcg_opc_i21(TCG_REG_P7, OPC_MOV_I21, TCG_REG_B6,
>                                 TCG_REG_R3, 0));
> -    if (bswap && s_bits == 1) {
> +    if (bswap && s_bits == MO_16) {
>          tcg_out_bundle(s, MmI,
>                         tcg_opc_m1 (TCG_REG_P6, opc_ld_m1[s_bits],
>                                     TCG_REG_R8, TCG_REG_R3),
>                         tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R2),
>                         tcg_opc_i12(TCG_REG_P6, OPC_DEP_Z_I12,
>                                     TCG_REG_R8, TCG_REG_R8, 15, 15));
> -    } else if (bswap && s_bits == 2) {
> +    } else if (bswap && s_bits == MO_32) {
>          tcg_out_bundle(s, MmI,
>                         tcg_opc_m1 (TCG_REG_P6, opc_ld_m1[s_bits],
>                                     TCG_REG_R8, TCG_REG_R3),
> @@ -1596,7 +1597,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
>                         tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R2),
>                         tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
>      }
> -    if (!bswap || s_bits == 0) {
> +    if (!bswap) {
>          tcg_out_bundle(s, miB,
>                         tcg_opc_a5 (TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R58,
>                                     mem_index, TCG_REG_R0),
> @@ -1613,7 +1614,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
>                                     TCG_REG_B0, TCG_REG_B6));
>      }
>  
> -    if (opc == 3) {
> +    if (s_bits == MO_64) {
>          tcg_out_bundle(s, miI,
>                         tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
>                         tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
> @@ -1623,7 +1624,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
>          tcg_out_bundle(s, miI,
>                         tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
>                         tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
> -                       tcg_opc_i29(TCG_REG_P0, opc_ext_i29[opc],
> +                       tcg_opc_i29(TCG_REG_P0, opc_ext_i29[opc & MO_SSIZE],
>                                     data_reg, TCG_REG_R8));
>      }
>  }
> @@ -1637,22 +1638,21 @@ static const void * const qemu_st_helpers[4] = {
>      helper_stq_mmu,
>  };
>  
> -static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
> +static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
> +                                   TCGMemOp opc)
>  {
> -    int addr_reg, data_reg, mem_index, bswap;
> -    uint64_t opc_st_m4[4] = { OPC_ST1_M4, OPC_ST2_M4, OPC_ST4_M4, OPC_ST8_M4 };
> +    static const uint64_t opc_st_m4[4] = {
> +        OPC_ST1_M4, OPC_ST2_M4, OPC_ST4_M4, OPC_ST8_M4
> +    };
> +    int addr_reg, data_reg, mem_index;
> +    TCGMemOp s_bits;
>  
>      data_reg = *args++;
>      addr_reg = *args++;
>      mem_index = *args;
> +    s_bits = opc & MO_SIZE;
>  
> -#ifdef TARGET_WORDS_BIGENDIAN
> -    bswap = 1;
> -#else
> -    bswap = 0;
> -#endif
> -
> -    tcg_out_qemu_tlb(s, addr_reg, opc,
> +    tcg_out_qemu_tlb(s, addr_reg, s_bits,
>                       offsetof(CPUArchState, tlb_table[mem_index][0].addr_write),
>                       offsetof(CPUArchState, tlb_table[mem_index][0].addend));
>  
> @@ -1660,9 +1660,9 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
>      tcg_out_bundle(s, mLX,
>                     tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4,
>                                 TCG_REG_R56, 0, TCG_AREG0),
> -                   tcg_opc_l2 ((tcg_target_long) qemu_st_helpers[opc]),
> +                   tcg_opc_l2 ((tcg_target_long) qemu_st_helpers[s_bits]),
>                     tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R2,
> -                               (tcg_target_long) qemu_st_helpers[opc]));
> +                               (tcg_target_long) qemu_st_helpers[s_bits]));
>      tcg_out_bundle(s, MmI,
>                     tcg_opc_m3 (TCG_REG_P0, OPC_LD8_M3, TCG_REG_R3,
>                                 TCG_REG_R2, 8),
> @@ -1671,14 +1671,20 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
>                     tcg_opc_i21(TCG_REG_P7, OPC_MOV_I21, TCG_REG_B6,
>                                 TCG_REG_R3, 0));
>  
> -    if (!bswap || opc == 0) {
> +    switch (opc) {
> +    case MO_8:
> +    case MO_16:
> +    case MO_32:
> +    case MO_64:
>          tcg_out_bundle(s, mii,
>                         tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1,
>                                     TCG_REG_R1, TCG_REG_R2),
>                         tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, TCG_REG_R58,
>                                     0, data_reg),
>                         tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
> -    } else if (opc == 1) {
> +        break;
> +
> +    case MO_16 | MO_BSWAP:
>          tcg_out_bundle(s, miI,
>                         tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1,
>                                     TCG_REG_R1, TCG_REG_R2),
> @@ -1692,7 +1698,9 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
>                         tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3,
>                                     TCG_REG_R2, TCG_REG_R2, 0xb));
>          data_reg = TCG_REG_R2;
> -    } else if (opc == 2) {
> +        break;
> +
> +    case MO_32 | MO_BSWAP:
>          tcg_out_bundle(s, miI,
>                         tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1,
>                                     TCG_REG_R1, TCG_REG_R2),
> @@ -1706,7 +1714,9 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
>                         tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3,
>                                     TCG_REG_R2, TCG_REG_R2, 0xb));
>          data_reg = TCG_REG_R2;
> -    } else if (opc == 3) {
> +        break;
> +
> +    case MO_64 | MO_BSWAP:
>          tcg_out_bundle(s, miI,
>                         tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1,
>                                     TCG_REG_R1, TCG_REG_R2),
> @@ -1715,6 +1725,10 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
>                         tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3,
>                                     TCG_REG_R2, data_reg, 0xb));
>          data_reg = TCG_REG_R2;
> +        break;
> +
> +    default:
> +        tcg_abort();
>      }
>  
>      tcg_out_bundle(s, miB,
> @@ -1728,7 +1742,8 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
>  
>  #else /* !CONFIG_SOFTMMU */
>  
> -static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
> +static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
> +                                   TCGMemOp opc)
>  {
>      static uint64_t const opc_ld_m1[4] = {
>          OPC_LD1_M1, OPC_LD2_M1, OPC_LD4_M1, OPC_LD8_M1
> @@ -1736,17 +1751,13 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
>      static uint64_t const opc_sxt_i29[4] = {
>          OPC_SXT1_I29, OPC_SXT2_I29, OPC_SXT4_I29, 0
>      };
> -    int addr_reg, data_reg, s_bits, bswap;
> +    int addr_reg, data_reg;
> +    TCGMemOp s_bits, bswap;
>  
>      data_reg = *args++;
>      addr_reg = *args++;
> -    s_bits = opc & 3;
> -
> -#ifdef TARGET_WORDS_BIGENDIAN
> -    bswap = 1;
> -#else
> -    bswap = 0;
> -#endif
> +    s_bits = opc & MO_SIZE;
> +    bswap = opc & MO_BSWAP;
>  
>  #if TARGET_LONG_BITS == 32
>      if (GUEST_BASE != 0) {
> @@ -1764,8 +1775,8 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
>                         tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
>      }
>  
> -    if (!bswap || s_bits == 0) {
> -        if (s_bits == opc) {
> +    if (!bswap) {
> +        if (!(opc & MO_SIGN)) {
>              tcg_out_bundle(s, miI,
>                             tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits],
>                                         data_reg, TCG_REG_R2),
> @@ -1779,7 +1790,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
>                             tcg_opc_i29(TCG_REG_P0, opc_sxt_i29[s_bits],
>                                         data_reg, data_reg));
>          }
> -    } else if (s_bits == 3) {
> +    } else if (s_bits == MO_64) {
>              tcg_out_bundle(s, mII,
>                             tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits],
>                                         data_reg, TCG_REG_R2),
> @@ -1787,7 +1798,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
>                             tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
>                                         data_reg, data_reg, 0xb));
>      } else {
> -        if (s_bits == 1) {
> +        if (s_bits == MO_16) {
>              tcg_out_bundle(s, mII,
>                             tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits],
>                                         data_reg, TCG_REG_R2),
> @@ -1802,7 +1813,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
>                             tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
>                                        data_reg, data_reg, 31, 31));
>          }
> -        if (opc == s_bits) {
> +        if (!(opc & MO_SIGN)) {
>              tcg_out_bundle(s, miI,
>                             tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
>                             tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
> @@ -1833,28 +1844,28 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
>                         tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
>      }
>  
> -    if (bswap && s_bits == 1) {
> +    if (bswap && s_bits == MO_16) {
>          tcg_out_bundle(s, mII,
>                         tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
>                         tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
>                                     data_reg, data_reg, 15, 15),
>                         tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
>                                     data_reg, data_reg, 0xb));
> -    } else if (bswap && s_bits == 2) {
> +    } else if (bswap && s_bits == MO_32) {
>          tcg_out_bundle(s, mII,
>                         tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
>                         tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
>                                     data_reg, data_reg, 31, 31),
>                         tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
>                                     data_reg, data_reg, 0xb));
> -    } else if (bswap && s_bits == 3) {
> +    } else if (bswap && s_bits == MO_64) {
>          tcg_out_bundle(s, miI,
>                         tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
>                         tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
>                         tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
>                                     data_reg, data_reg, 0xb));
>      }
> -    if (s_bits != opc) {
> +    if (opc & MO_SIGN) {
>          tcg_out_bundle(s, miI,
>                         tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
>                         tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
> @@ -1864,24 +1875,22 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
>  #endif
>  }
>  
> -static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
> +static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
> +                                   TCGMemOp opc)
>  {
>      static uint64_t const opc_st_m4[4] = {
>          OPC_ST1_M4, OPC_ST2_M4, OPC_ST4_M4, OPC_ST8_M4
>      };
> -    int addr_reg, data_reg, bswap;
> +    int addr_reg, data_reg;
>  #if TARGET_LONG_BITS == 64
>      uint64_t add_guest_base;
>  #endif
> +    TCGMemOp s_bits, bswap;
>  
>      data_reg = *args++;
>      addr_reg = *args++;
> -
> -#ifdef TARGET_WORDS_BIGENDIAN
> -    bswap = 1;
> -#else
> -    bswap = 0;
> -#endif
> +    s_bits = opc & MO_SIZE;
> +    bswap = opc & MO_BSWAP;
>  
>  #if TARGET_LONG_BITS == 32
>      if (GUEST_BASE != 0) {
> @@ -1900,7 +1909,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
>      }
>  
>      if (bswap) {
> -        if (opc == 1) {
> +        if (s_bits == MO_16) {
>              tcg_out_bundle(s, mII,
>                             tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
>                             tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
> @@ -1908,7 +1917,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
>                             tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
>                                         TCG_REG_R3, TCG_REG_R3, 0xb));
>              data_reg = TCG_REG_R3;
> -        } else if (opc == 2) {
> +        } else if (s_bits == MO_32) {
>              tcg_out_bundle(s, mII,
>                             tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
>                             tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
> @@ -1916,7 +1925,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
>                             tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
>                                         TCG_REG_R3, TCG_REG_R3, 0xb));
>              data_reg = TCG_REG_R3;
> -        } else if (opc == 3) {
> +        } else if (s_bits == MO_64) {
>              tcg_out_bundle(s, miI,
>                             tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
>                             tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
> @@ -1926,7 +1935,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
>          }
>      }
>      tcg_out_bundle(s, mmI,
> -                   tcg_opc_m4 (TCG_REG_P0, opc_st_m4[opc],
> +                   tcg_opc_m4 (TCG_REG_P0, opc_st_m4[s_bits],
>                                 data_reg, TCG_REG_R2),
>                     tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
>                     tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
> @@ -1939,14 +1948,14 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
>          add_guest_base = tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0);
>      }
>  
> -    if (!bswap || opc == 0) {
> +    if (!bswap) {
>          tcg_out_bundle(s, (GUEST_BASE ? MmI : mmI),
>                         add_guest_base,
> -                       tcg_opc_m4 (TCG_REG_P0, opc_st_m4[opc],
> +                       tcg_opc_m4 (TCG_REG_P0, opc_st_m4[s_bits],
>                                     data_reg, addr_reg),
>                         tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
>      } else {
> -        if (opc == 1) {
> +        if (s_bits == MO_16) {
>              tcg_out_bundle(s, mII,
>                             add_guest_base,
>                             tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
> @@ -1954,7 +1963,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
>                             tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
>                                         TCG_REG_R3, TCG_REG_R3, 0xb));
>              data_reg = TCG_REG_R3;
> -        } else if (opc == 2) {
> +        } else if (s_bits == MO_32) {
>              tcg_out_bundle(s, mII,
>                             add_guest_base,
>                             tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
> @@ -1962,7 +1971,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
>                             tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
>                                         TCG_REG_R3, TCG_REG_R3, 0xb));
>              data_reg = TCG_REG_R3;
> -        } else if (opc == 3) {
> +        } else if (s_bits == MO_64) {
>              tcg_out_bundle(s, miI,
>                             add_guest_base,
>                             tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
> @@ -1971,7 +1980,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
>              data_reg = TCG_REG_R3;
>          }
>          tcg_out_bundle(s, miI,
> -                       tcg_opc_m4 (TCG_REG_P0, opc_st_m4[opc],
> +                       tcg_opc_m4 (TCG_REG_P0, opc_st_m4[s_bits],
>                                     data_reg, addr_reg),
>                         tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
>                         tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
> @@ -2203,39 +2212,39 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
>          break;
>  
>      case INDEX_op_qemu_ld8u:
> -        tcg_out_qemu_ld(s, args, 0);
> +        tcg_out_qemu_ld(s, args, MO_UB);
>          break;
>      case INDEX_op_qemu_ld8s:
> -        tcg_out_qemu_ld(s, args, 0 | 4);
> +        tcg_out_qemu_ld(s, args, MO_SB);
>          break;
>      case INDEX_op_qemu_ld16u:
> -        tcg_out_qemu_ld(s, args, 1);
> +        tcg_out_qemu_ld(s, args, MO_TEUW);
>          break;
>      case INDEX_op_qemu_ld16s:
> -        tcg_out_qemu_ld(s, args, 1 | 4);
> +        tcg_out_qemu_ld(s, args, MO_TESW);
>          break;
>      case INDEX_op_qemu_ld32:
>      case INDEX_op_qemu_ld32u:
> -        tcg_out_qemu_ld(s, args, 2);
> +        tcg_out_qemu_ld(s, args, MO_TEUL);
>          break;
>      case INDEX_op_qemu_ld32s:
> -        tcg_out_qemu_ld(s, args, 2 | 4);
> +        tcg_out_qemu_ld(s, args, MO_TESL);
>          break;
>      case INDEX_op_qemu_ld64:
> -        tcg_out_qemu_ld(s, args, 3);
> +        tcg_out_qemu_ld(s, args, MO_TEQ);
>          break;
>  
>      case INDEX_op_qemu_st8:
> -        tcg_out_qemu_st(s, args, 0);
> +        tcg_out_qemu_st(s, args, MO_UB);
>          break;
>      case INDEX_op_qemu_st16:
> -        tcg_out_qemu_st(s, args, 1);
> +        tcg_out_qemu_st(s, args, MO_TEUW);
>          break;
>      case INDEX_op_qemu_st32:
> -        tcg_out_qemu_st(s, args, 2);
> +        tcg_out_qemu_st(s, args, MO_TEUL);
>          break;
>      case INDEX_op_qemu_st64:
> -        tcg_out_qemu_st(s, args, 3);
> +        tcg_out_qemu_st(s, args, MO_TEQ);
>          break;
>  
>      default:

Acked-by: Aurelien Jarno <aurelien@aurel32.net>

-- 
Aurelien Jarno                          GPG: 1024D/F1BCDB73
aurelien@aurel32.net                 http://www.aurel32.net

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [Qemu-devel] [PATCH 02/20] tcg-ia64: Use shortcuts for nop insns
  2013-10-31 20:21 ` [Qemu-devel] [PATCH 02/20] tcg-ia64: Use shortcuts for nop insns Richard Henderson
@ 2013-11-03 22:55   ` Aurelien Jarno
  0 siblings, 0 replies; 39+ messages in thread
From: Aurelien Jarno @ 2013-11-03 22:55 UTC (permalink / raw
  To: Richard Henderson; +Cc: qemu-devel, aliguori

On Thu, Oct 31, 2013 at 01:21:51PM -0700, Richard Henderson wrote:
> There's no need to go through the full opcode-to-insn function call
> to generate nops.  This makes the source a bit more readable.
> 
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  tcg/ia64/tcg-target.c | 251 +++++++++++++++++++++++++-------------------------
>  1 file changed, 127 insertions(+), 124 deletions(-)
> 
> diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
> index e09ad24..e1b8b7d 100644
> --- a/tcg/ia64/tcg-target.c
> +++ b/tcg/ia64/tcg-target.c
> @@ -284,6 +284,9 @@ enum {
>      OPC_ZXT1_I29              = 0x00080000000ull,
>      OPC_ZXT2_I29              = 0x00088000000ull,
>      OPC_ZXT4_I29              = 0x00090000000ull,
> +
> +    INSN_NOP_M                = OPC_NOP_M48,  /* nop.m 0 */
> +    INSN_NOP_I                = OPC_NOP_I18,  /* nop.i 0 */
>  };
>  
>  static inline uint64_t tcg_opc_a1(int qp, uint64_t opc, int r1,
> @@ -855,8 +858,8 @@ static inline void tcg_out_mov(TCGContext *s, TCGType type,
>                                 TCGReg ret, TCGReg arg)
>  {
>      tcg_out_bundle(s, mmI,
> -                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> -                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> +                   INSN_NOP_M,
> +                   INSN_NOP_M,
>                     tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, ret, 0, arg));
>  }
>  
> @@ -864,7 +867,7 @@ static inline void tcg_out_movi(TCGContext *s, TCGType type,
>                                  TCGReg reg, tcg_target_long arg)
>  {
>      tcg_out_bundle(s, mLX,
> -                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> +                   INSN_NOP_M,
>                     tcg_opc_l2 (arg),
>                     tcg_opc_x2 (TCG_REG_P0, OPC_MOVL_X2, reg, arg));
>  }
> @@ -877,8 +880,8 @@ static void tcg_out_br(TCGContext *s, int label_index)
>         the existing value and using it again. This ensure that caches and
>         memory are kept coherent during retranslation. */
>      tcg_out_bundle(s, mmB,
> -                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> -                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> +                   INSN_NOP_M,
> +                   INSN_NOP_M,
>                     tcg_opc_b1 (TCG_REG_P0, OPC_BR_SPTK_MANY_B1,
>                                 get_reloc_pcrel21b(s->code_ptr + 2)));
>  
> @@ -899,7 +902,7 @@ static inline void tcg_out_call(TCGContext *s, TCGArg addr)
>                                 TCG_REG_B6, TCG_REG_R2, 0));
>      tcg_out_bundle(s, mmB,
>                     tcg_opc_m1 (TCG_REG_P0, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R3),
> -                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> +                   INSN_NOP_M,
>                     tcg_opc_b5 (TCG_REG_P0, OPC_BR_CALL_SPTK_MANY_B5,
>                                 TCG_REG_B0, TCG_REG_B6));
>  }
> @@ -915,7 +918,7 @@ static void tcg_out_exit_tb(TCGContext *s, tcg_target_long arg)
>      imm = (uint64_t)disp >> 4;
>  
>      tcg_out_bundle(s, mLX,
> -                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> +                   INSN_NOP_M,
>                     tcg_opc_l3 (imm),
>                     tcg_opc_x3 (TCG_REG_P0, OPC_BRL_SPTK_MANY_X3, imm));
>  }
> @@ -932,12 +935,12 @@ static inline void tcg_out_goto_tb(TCGContext *s, TCGArg arg)
>          tcg_out_bundle(s, MmI,
>                         tcg_opc_m1 (TCG_REG_P0, OPC_LD8_M1,
>                                     TCG_REG_R2, TCG_REG_R2),
> -                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> +                       INSN_NOP_M,
>                         tcg_opc_i21(TCG_REG_P0, OPC_MOV_I21, TCG_REG_B6,
>                                     TCG_REG_R2, 0));
>          tcg_out_bundle(s, mmB,
> -                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> -                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> +                       INSN_NOP_M,
> +                       INSN_NOP_M,
>                         tcg_opc_b4 (TCG_REG_P0, OPC_BR_SPTK_MANY_B4,
>                                     TCG_REG_B6));
>      }
> @@ -947,12 +950,12 @@ static inline void tcg_out_goto_tb(TCGContext *s, TCGArg arg)
>  static inline void tcg_out_jmp(TCGContext *s, TCGArg addr)
>  {
>      tcg_out_bundle(s, mmI,
> -                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> -                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> +                   INSN_NOP_M,
> +                   INSN_NOP_M,
>                     tcg_opc_i21(TCG_REG_P0, OPC_MOV_I21, TCG_REG_B6, addr, 0));
>      tcg_out_bundle(s, mmB,
> -                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> -                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> +                   INSN_NOP_M,
> +                   INSN_NOP_M,
>                     tcg_opc_b4(TCG_REG_P0, OPC_BR_SPTK_MANY_B4, TCG_REG_B6));
>  }
>  
> @@ -964,14 +967,14 @@ static inline void tcg_out_ld_rel(TCGContext *s, uint64_t opc_m4, TCGArg arg,
>                         tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4,
>                                    TCG_REG_R2, arg2, arg1),
>                         tcg_opc_m1 (TCG_REG_P0, opc_m4, arg, TCG_REG_R2),
> -                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
> +                       INSN_NOP_I);
>      } else {
>          tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, arg2);
>          tcg_out_bundle(s, MmI,
>                         tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1,
>                                     TCG_REG_R2, TCG_REG_R2, arg1),
>                         tcg_opc_m1 (TCG_REG_P0, opc_m4, arg, TCG_REG_R2),
> -                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
> +                       INSN_NOP_I);
>      }
>  }
>  
> @@ -983,14 +986,14 @@ static inline void tcg_out_st_rel(TCGContext *s, uint64_t opc_m4, TCGArg arg,
>                         tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4,
>                                    TCG_REG_R2, arg2, arg1),
>                         tcg_opc_m4 (TCG_REG_P0, opc_m4, arg, TCG_REG_R2),
> -                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
> +                       INSN_NOP_I);
>      } else {
>          tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, arg2);
>          tcg_out_bundle(s, MmI,
>                         tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1,
>                                     TCG_REG_R2, TCG_REG_R2, arg1),
>                         tcg_opc_m4 (TCG_REG_P0, opc_m4, arg, TCG_REG_R2),
> -                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
> +                       INSN_NOP_I);
>      }
>  }
>  
> @@ -1025,7 +1028,7 @@ static inline void tcg_out_alu(TCGContext *s, uint64_t opc_a1, TCGArg ret,
>                            TCG_REG_R2, arg1, TCG_REG_R0);
>          arg1 = TCG_REG_R2;
>      } else {
> -        opc1 = tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0);
> +        opc1 = INSN_NOP_M;
>      }
>  
>      if (const_arg2 && arg2 != 0) {
> @@ -1033,7 +1036,7 @@ static inline void tcg_out_alu(TCGContext *s, uint64_t opc_a1, TCGArg ret,
>                            TCG_REG_R3, arg2, TCG_REG_R0);
>          arg2 = TCG_REG_R3;
>      } else {
> -        opc2 = tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0);
> +        opc2 = INSN_NOP_I;
>      }
>  
>      tcg_out_bundle(s, mII,
> @@ -1047,7 +1050,7 @@ static inline void tcg_out_eqv(TCGContext *s, TCGArg ret,
>                                 TCGArg arg2, int const_arg2)
>  {
>      tcg_out_bundle(s, mII,
> -                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> +                   INSN_NOP_M,
>                     tcg_opc_a1 (TCG_REG_P0, OPC_XOR_A1, ret, arg1, arg2),
>                     tcg_opc_a3 (TCG_REG_P0, OPC_ANDCM_A3, ret, -1, ret));
>  }
> @@ -1057,7 +1060,7 @@ static inline void tcg_out_nand(TCGContext *s, TCGArg ret,
>                                  TCGArg arg2, int const_arg2)
>  {
>      tcg_out_bundle(s, mII,
> -                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> +                   INSN_NOP_M,
>                     tcg_opc_a1 (TCG_REG_P0, OPC_AND_A1, ret, arg1, arg2),
>                     tcg_opc_a3 (TCG_REG_P0, OPC_ANDCM_A3, ret, -1, ret));
>  }
> @@ -1067,7 +1070,7 @@ static inline void tcg_out_nor(TCGContext *s, TCGArg ret,
>                                 TCGArg arg2, int const_arg2)
>  {
>      tcg_out_bundle(s, mII,
> -                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> +                   INSN_NOP_M,
>                     tcg_opc_a1 (TCG_REG_P0, OPC_OR_A1, ret, arg1, arg2),
>                     tcg_opc_a3 (TCG_REG_P0, OPC_ANDCM_A3, ret, -1, ret));
>  }
> @@ -1077,7 +1080,7 @@ static inline void tcg_out_orc(TCGContext *s, TCGArg ret,
>                                 TCGArg arg2, int const_arg2)
>  {
>      tcg_out_bundle(s, mII,
> -                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> +                   INSN_NOP_M,
>                     tcg_opc_a3 (TCG_REG_P0, OPC_ANDCM_A3, TCG_REG_R2, -1, arg2),
>                     tcg_opc_a1 (TCG_REG_P0, OPC_OR_A1, ret, arg1, TCG_REG_R2));
>  }
> @@ -1088,16 +1091,16 @@ static inline void tcg_out_mul(TCGContext *s, TCGArg ret,
>      tcg_out_bundle(s, mmI,
>                     tcg_opc_m18(TCG_REG_P0, OPC_SETF_SIG_M18, TCG_REG_F6, arg1),
>                     tcg_opc_m18(TCG_REG_P0, OPC_SETF_SIG_M18, TCG_REG_F7, arg2),
> -                   tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
> +                   INSN_NOP_I);
>      tcg_out_bundle(s, mmF,
> -                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> -                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> +                   INSN_NOP_M,
> +                   INSN_NOP_M,
>                     tcg_opc_f2 (TCG_REG_P0, OPC_XMA_L_F2, TCG_REG_F6, TCG_REG_F6,
>                                 TCG_REG_F7, TCG_REG_F0));
>      tcg_out_bundle(s, miI,
>                     tcg_opc_m19(TCG_REG_P0, OPC_GETF_SIG_M19, ret, TCG_REG_F6),
> -                   tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
> -                   tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
> +                   INSN_NOP_I,
> +                   INSN_NOP_I);
>  }
>  
>  static inline void tcg_out_sar_i32(TCGContext *s, TCGArg ret, TCGArg arg1,
> @@ -1105,8 +1108,8 @@ static inline void tcg_out_sar_i32(TCGContext *s, TCGArg ret, TCGArg arg1,
>  {
>      if (const_arg2) {
>          tcg_out_bundle(s, miI,
> -                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> -                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
> +                       INSN_NOP_M,
> +                       INSN_NOP_I,
>                         tcg_opc_i11(TCG_REG_P0, OPC_EXTR_I11,
>                                     ret, arg1, arg2, 31 - arg2));
>      } else {
> @@ -1124,14 +1127,14 @@ static inline void tcg_out_sar_i64(TCGContext *s, TCGArg ret, TCGArg arg1,
>  {
>      if (const_arg2) {
>          tcg_out_bundle(s, miI,
> -                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> -                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
> +                       INSN_NOP_M,
> +                       INSN_NOP_I,
>                         tcg_opc_i11(TCG_REG_P0, OPC_EXTR_I11,
>                                     ret, arg1, arg2, 63 - arg2));
>      } else {
>          tcg_out_bundle(s, miI,
> -                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> -                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
> +                       INSN_NOP_M,
> +                       INSN_NOP_I,
>                         tcg_opc_i5 (TCG_REG_P0, OPC_SHR_I5, ret, arg1, arg2));
>      }
>  }
> @@ -1141,13 +1144,13 @@ static inline void tcg_out_shl_i32(TCGContext *s, TCGArg ret, TCGArg arg1,
>  {
>      if (const_arg2) {
>          tcg_out_bundle(s, miI,
> -                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> -                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
> +                       INSN_NOP_M,
> +                       INSN_NOP_I,
>                         tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, ret,
>                                     arg1, 63 - arg2, 31 - arg2));
>      } else {
>          tcg_out_bundle(s, mII,
> -                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> +                       INSN_NOP_M,
>                         tcg_opc_a3 (TCG_REG_P0, OPC_AND_A3, TCG_REG_R2,
>                                     0x1f, arg2),
>                         tcg_opc_i7 (TCG_REG_P0, OPC_SHL_I7, ret,
> @@ -1160,14 +1163,14 @@ static inline void tcg_out_shl_i64(TCGContext *s, TCGArg ret, TCGArg arg1,
>  {
>      if (const_arg2) {
>          tcg_out_bundle(s, miI,
> -                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> -                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
> +                       INSN_NOP_M,
> +                       INSN_NOP_I,
>                         tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, ret,
>                                     arg1, 63 - arg2, 63 - arg2));
>      } else {
>          tcg_out_bundle(s, miI,
> -                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> -                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
> +                       INSN_NOP_M,
> +                       INSN_NOP_I,
>                         tcg_opc_i7 (TCG_REG_P0, OPC_SHL_I7, ret,
>                                     arg1, arg2));
>      }
> @@ -1178,8 +1181,8 @@ static inline void tcg_out_shr_i32(TCGContext *s, TCGArg ret, TCGArg arg1,
>  {
>      if (const_arg2) {
>          tcg_out_bundle(s, miI,
> -                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> -                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
> +                       INSN_NOP_M,
> +                       INSN_NOP_I,
>                         tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, ret,
>                                     arg1, arg2, 31 - arg2));
>      } else {
> @@ -1197,14 +1200,14 @@ static inline void tcg_out_shr_i64(TCGContext *s, TCGArg ret, TCGArg arg1,
>  {
>      if (const_arg2) {
>          tcg_out_bundle(s, miI,
> -                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> -                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
> +                       INSN_NOP_M,
> +                       INSN_NOP_I,
>                         tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, ret,
>                                     arg1, arg2, 63 - arg2));
>      } else {
>          tcg_out_bundle(s, miI,
> -                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> -                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
> +                       INSN_NOP_M,
> +                       INSN_NOP_I,
>                         tcg_opc_i5 (TCG_REG_P0, OPC_SHR_U_I5, ret,
>                                     arg1, arg2));
>      }
> @@ -1215,20 +1218,20 @@ static inline void tcg_out_rotl_i32(TCGContext *s, TCGArg ret, TCGArg arg1,
>  {
>      if (const_arg2) {
>          tcg_out_bundle(s, mII,
> -                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> +                       INSN_NOP_M,
>                         tcg_opc_i2 (TCG_REG_P0, OPC_UNPACK4_L_I2,
>                                     TCG_REG_R2, arg1, arg1),
>                         tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, ret,
>                                     TCG_REG_R2, 32 - arg2, 31));
>      } else {
>          tcg_out_bundle(s, miI,
> -                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> +                       INSN_NOP_M,
>                         tcg_opc_i2 (TCG_REG_P0, OPC_UNPACK4_L_I2,
>                                     TCG_REG_R2, arg1, arg1),
>                         tcg_opc_a3 (TCG_REG_P0, OPC_AND_A3, TCG_REG_R3,
>                                     0x1f, arg2));
>          tcg_out_bundle(s, mII,
> -                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> +                       INSN_NOP_M,
>                         tcg_opc_a3 (TCG_REG_P0, OPC_SUB_A3, TCG_REG_R3,
>                                     0x20, TCG_REG_R3),
>                         tcg_opc_i5 (TCG_REG_P0, OPC_SHR_U_I5, ret,
> @@ -1241,8 +1244,8 @@ static inline void tcg_out_rotl_i64(TCGContext *s, TCGArg ret, TCGArg arg1,
>  {
>      if (const_arg2) {
>          tcg_out_bundle(s, miI,
> -                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> -                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
> +                       INSN_NOP_M,
> +                       INSN_NOP_I,
>                         tcg_opc_i10(TCG_REG_P0, OPC_SHRP_I10, ret, arg1,
>                                     arg1, 0x40 - arg2));
>      } else {
> @@ -1254,8 +1257,8 @@ static inline void tcg_out_rotl_i64(TCGContext *s, TCGArg ret, TCGArg arg1,
>                         tcg_opc_i5 (TCG_REG_P0, OPC_SHR_U_I5, TCG_REG_R2,
>                                     arg1, TCG_REG_R2));
>          tcg_out_bundle(s, miI,
> -                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> -                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
> +                       INSN_NOP_M,
> +                       INSN_NOP_I,
>                         tcg_opc_a1 (TCG_REG_P0, OPC_OR_A1, ret,
>                                     TCG_REG_R2, TCG_REG_R3));
>      }
> @@ -1266,7 +1269,7 @@ static inline void tcg_out_rotr_i32(TCGContext *s, TCGArg ret, TCGArg arg1,
>  {
>      if (const_arg2) {
>          tcg_out_bundle(s, mII,
> -                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> +                       INSN_NOP_M,
>                         tcg_opc_i2 (TCG_REG_P0, OPC_UNPACK4_L_I2,
>                                     TCG_REG_R2, arg1, arg1),
>                         tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, ret,
> @@ -1287,8 +1290,8 @@ static inline void tcg_out_rotr_i64(TCGContext *s, TCGArg ret, TCGArg arg1,
>  {
>      if (const_arg2) {
>          tcg_out_bundle(s, miI,
> -                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> -                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
> +                       INSN_NOP_M,
> +                       INSN_NOP_I,
>                         tcg_opc_i10(TCG_REG_P0, OPC_SHRP_I10, ret, arg1,
>                                     arg1, arg2));
>      } else {
> @@ -1300,8 +1303,8 @@ static inline void tcg_out_rotr_i64(TCGContext *s, TCGArg ret, TCGArg arg1,
>                         tcg_opc_i7 (TCG_REG_P0, OPC_SHL_I7, TCG_REG_R2,
>                                     arg1, TCG_REG_R2));
>          tcg_out_bundle(s, miI,
> -                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> -                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
> +                       INSN_NOP_M,
> +                       INSN_NOP_I,
>                         tcg_opc_a1 (TCG_REG_P0, OPC_OR_A1, ret,
>                                     TCG_REG_R2, TCG_REG_R3));
>      }
> @@ -1311,15 +1314,15 @@ static inline void tcg_out_ext(TCGContext *s, uint64_t opc_i29,
>                                 TCGArg ret, TCGArg arg)
>  {
>      tcg_out_bundle(s, miI,
> -                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> -                   tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
> +                   INSN_NOP_M,
> +                   INSN_NOP_I,
>                     tcg_opc_i29(TCG_REG_P0, opc_i29, ret, arg));
>  }
>  
>  static inline void tcg_out_bswap16(TCGContext *s, TCGArg ret, TCGArg arg)
>  {
>      tcg_out_bundle(s, mII,
> -                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> +                   INSN_NOP_M,
>                     tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, ret, arg, 15, 15),
>                     tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3, ret, ret, 0xb));
>  }
> @@ -1327,7 +1330,7 @@ static inline void tcg_out_bswap16(TCGContext *s, TCGArg ret, TCGArg arg)
>  static inline void tcg_out_bswap32(TCGContext *s, TCGArg ret, TCGArg arg)
>  {
>      tcg_out_bundle(s, mII,
> -                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> +                   INSN_NOP_M,
>                     tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, ret, arg, 31, 31),
>                     tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3, ret, ret, 0xb));
>  }
> @@ -1335,8 +1338,8 @@ static inline void tcg_out_bswap32(TCGContext *s, TCGArg ret, TCGArg arg)
>  static inline void tcg_out_bswap64(TCGContext *s, TCGArg ret, TCGArg arg)
>  {
>      tcg_out_bundle(s, miI,
> -                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> -                   tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
> +                   INSN_NOP_M,
> +                   INSN_NOP_I,
>                     tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3, ret, arg, 0xb));
>  }
>  
> @@ -1366,8 +1369,8 @@ static inline void tcg_out_deposit(TCGContext *s, TCGArg ret, TCGArg a1,
>          i2 = tcg_opc_i15(TCG_REG_P0, OPC_DEP_I15, ret, a2, a1, cpos, lm1);
>      }
>      tcg_out_bundle(s, (i1 ? mII : miI),
> -                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> -                   i1 ? i1 : tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
> +                   INSN_NOP_M,
> +                   i1 ? i1 : INSN_NOP_I,
>                     i2);
>  }
>  
> @@ -1425,7 +1428,7 @@ static inline void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGArg arg1,
>                            arg1, TCG_REG_R0);
>          arg1 = TCG_REG_R2;
>      } else {
> -        opc1 = tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0);
> +        opc1 = INSN_NOP_M;
>      }
>  
>      if (const_arg2 && arg2 != 0) {
> @@ -1433,7 +1436,7 @@ static inline void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGArg arg1,
>                            arg2, TCG_REG_R0);
>          arg2 = TCG_REG_R3;
>      } else {
> -        opc2 = tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0);
> +        opc2 = INSN_NOP_I;
>      }
>  
>      tcg_out_bundle(s, mII,
> @@ -1441,8 +1444,8 @@ static inline void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGArg arg1,
>                     opc2,
>                     tcg_opc_cmp_a(TCG_REG_P0, cond, arg1, arg2, cmp4));
>      tcg_out_bundle(s, mmB,
> -                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> -                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> +                   INSN_NOP_M,
> +                   INSN_NOP_M,
>                     tcg_opc_b1 (TCG_REG_P6, OPC_BR_DPTK_FEW_B1,
>                                 get_reloc_pcrel21b(s->code_ptr + 2)));
>  
> @@ -1473,14 +1476,14 @@ static inline void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGArg ret,
>      if (const_v1) {
>          opc1 = tcg_opc_a5(TCG_REG_P6, OPC_ADDL_A5, ret, v1, TCG_REG_R0);
>      } else if (ret == v1) {
> -        opc1 = tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0);
> +        opc1 = INSN_NOP_M;
>      } else {
>          opc1 = tcg_opc_a4(TCG_REG_P6, OPC_ADDS_A4, ret, 0, v1);
>      }
>      if (const_v2) {
>          opc2 = tcg_opc_a5(TCG_REG_P7, OPC_ADDL_A5, ret, v2, TCG_REG_R0);
>      } else if (ret == v2) {
> -        opc2 = tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0);
> +        opc2 = INSN_NOP_I;
>      } else {
>          opc2 = tcg_opc_a4(TCG_REG_P7, OPC_ADDS_A4, ret, 0, v2);
>      }
> @@ -1500,7 +1503,7 @@ static inline void tcg_out_qemu_tlb(TCGContext *s, TCGArg addr_reg,
>                                      uint64_t offset_addend)
>  {
>      tcg_out_bundle(s, mII,
> -                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> +                   INSN_NOP_M,
>                     tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, TCG_REG_R2,
>                                 addr_reg, TARGET_PAGE_BITS, CPU_TLB_BITS - 1),
>                     tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, TCG_REG_R2,
> @@ -1595,13 +1598,13 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
>                         tcg_opc_m1 (TCG_REG_P6, opc_ld_m1[s_bits],
>                                     TCG_REG_R8, TCG_REG_R3),
>                         tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R2),
> -                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
> +                       INSN_NOP_I);
>      }
>      if (!bswap) {
>          tcg_out_bundle(s, miB,
>                         tcg_opc_a5 (TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R58,
>                                     mem_index, TCG_REG_R0),
> -                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
> +                       INSN_NOP_I,
>                         tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5,
>                                     TCG_REG_B0, TCG_REG_B6));
>      } else {
> @@ -1616,14 +1619,14 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
>  
>      if (s_bits == MO_64) {
>          tcg_out_bundle(s, miI,
> -                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> -                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
> +                       INSN_NOP_M,
> +                       INSN_NOP_I,
>                         tcg_opc_a4 (TCG_REG_P0, OPC_ADDS_A4,
>                                     data_reg, 0, TCG_REG_R8));
>      } else {
>          tcg_out_bundle(s, miI,
> -                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> -                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
> +                       INSN_NOP_M,
> +                       INSN_NOP_I,
>                         tcg_opc_i29(TCG_REG_P0, opc_ext_i29[opc & MO_SSIZE],
>                                     data_reg, TCG_REG_R8));
>      }
> @@ -1681,20 +1684,20 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
>                                     TCG_REG_R1, TCG_REG_R2),
>                         tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, TCG_REG_R58,
>                                     0, data_reg),
> -                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
> +                       INSN_NOP_I);
>          break;
>  
>      case MO_16 | MO_BSWAP:
>          tcg_out_bundle(s, miI,
>                         tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1,
>                                     TCG_REG_R1, TCG_REG_R2),
> -                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
> +                       INSN_NOP_I,
>                         tcg_opc_i12(TCG_REG_P6, OPC_DEP_Z_I12,
>                                     TCG_REG_R2, data_reg, 15, 15));
>          tcg_out_bundle(s, miI,
>                         tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, TCG_REG_R58,
>                                     0, data_reg),
> -                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
> +                       INSN_NOP_I,
>                         tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3,
>                                     TCG_REG_R2, TCG_REG_R2, 0xb));
>          data_reg = TCG_REG_R2;
> @@ -1704,13 +1707,13 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
>          tcg_out_bundle(s, miI,
>                         tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1,
>                                     TCG_REG_R1, TCG_REG_R2),
> -                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
> +                       INSN_NOP_I,
>                         tcg_opc_i12(TCG_REG_P6, OPC_DEP_Z_I12,
>                                     TCG_REG_R2, data_reg, 31, 31));
>          tcg_out_bundle(s, miI,
>                         tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, TCG_REG_R58,
>                                     0, data_reg),
> -                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
> +                       INSN_NOP_I,
>                         tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3,
>                                     TCG_REG_R2, TCG_REG_R2, 0xb));
>          data_reg = TCG_REG_R2;
> @@ -1762,17 +1765,17 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
>  #if TARGET_LONG_BITS == 32
>      if (GUEST_BASE != 0) {
>          tcg_out_bundle(s, mII,
> -                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> +                       INSN_NOP_M,
>                         tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29,
>                                     TCG_REG_R3, addr_reg),
>                         tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2,
>                                     TCG_GUEST_BASE_REG, TCG_REG_R3));
>      } else {
>          tcg_out_bundle(s, miI,
> -                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> +                       INSN_NOP_M,
>                         tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29,
>                                     TCG_REG_R2, addr_reg),
> -                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
> +                       INSN_NOP_I);
>      }
>  
>      if (!bswap) {
> @@ -1780,13 +1783,13 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
>              tcg_out_bundle(s, miI,
>                             tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits],
>                                         data_reg, TCG_REG_R2),
> -                           tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
> -                           tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
> +                           INSN_NOP_I,
> +                           INSN_NOP_I);
>          } else {
>              tcg_out_bundle(s, mII,
>                             tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits],
>                                         data_reg, TCG_REG_R2),
> -                           tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
> +                           INSN_NOP_I,
>                             tcg_opc_i29(TCG_REG_P0, opc_sxt_i29[s_bits],
>                                         data_reg, data_reg));
>          }
> @@ -1794,7 +1797,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
>              tcg_out_bundle(s, mII,
>                             tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits],
>                                         data_reg, TCG_REG_R2),
> -                           tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
> +                           INSN_NOP_I,
>                             tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
>                                         data_reg, data_reg, 0xb));
>      } else {
> @@ -1802,26 +1805,26 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
>              tcg_out_bundle(s, mII,
>                             tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits],
>                                         data_reg, TCG_REG_R2),
> -                           tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
> +                           INSN_NOP_I,
>                             tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
>                                        data_reg, data_reg, 15, 15));
>          } else {
>              tcg_out_bundle(s, mII,
>                             tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits],
>                                         data_reg, TCG_REG_R2),
> -                           tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
> +                           INSN_NOP_I,
>                             tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
>                                        data_reg, data_reg, 31, 31));
>          }
>          if (!(opc & MO_SIGN)) {
>              tcg_out_bundle(s, miI,
> -                           tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> -                           tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
> +                           INSN_NOP_M,
> +                           INSN_NOP_I,
>                             tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
>                                         data_reg, data_reg, 0xb));
>          } else {
>              tcg_out_bundle(s, mII,
> -                           tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> +                           INSN_NOP_M,
>                             tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
>                                         data_reg, data_reg, 0xb),
>                             tcg_opc_i29(TCG_REG_P0, opc_sxt_i29[s_bits],
> @@ -1835,40 +1838,40 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
>                                     TCG_GUEST_BASE_REG, addr_reg),
>                         tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits],
>                                     data_reg, TCG_REG_R2),
> -                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
> +                       INSN_NOP_I);
>      } else {
>          tcg_out_bundle(s, mmI,
> -                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> +                       INSN_NOP_M,
>                         tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits],
>                                     data_reg, addr_reg),
> -                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
> +                       INSN_NOP_I);
>      }
>  
>      if (bswap && s_bits == MO_16) {
>          tcg_out_bundle(s, mII,
> -                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> +                       INSN_NOP_M,
>                         tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
>                                     data_reg, data_reg, 15, 15),
>                         tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
>                                     data_reg, data_reg, 0xb));
>      } else if (bswap && s_bits == MO_32) {
>          tcg_out_bundle(s, mII,
> -                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> +                       INSN_NOP_M,
>                         tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
>                                     data_reg, data_reg, 31, 31),
>                         tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
>                                     data_reg, data_reg, 0xb));
>      } else if (bswap && s_bits == MO_64) {
>          tcg_out_bundle(s, miI,
> -                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> -                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
> +                       INSN_NOP_M,
> +                       INSN_NOP_I,
>                         tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
>                                     data_reg, data_reg, 0xb));
>      }
>      if (opc & MO_SIGN) {
>          tcg_out_bundle(s, miI,
> -                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> -                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
> +                       INSN_NOP_M,
> +                       INSN_NOP_I,
>                         tcg_opc_i29(TCG_REG_P0, opc_sxt_i29[s_bits],
>                                     data_reg, data_reg));
>      }
> @@ -1895,23 +1898,23 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
>  #if TARGET_LONG_BITS == 32
>      if (GUEST_BASE != 0) {
>          tcg_out_bundle(s, mII,
> -                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> +                       INSN_NOP_M,
>                         tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29,
>                                     TCG_REG_R3, addr_reg),
>                         tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2,
>                                     TCG_GUEST_BASE_REG, TCG_REG_R3));
>      } else {
>          tcg_out_bundle(s, miI,
> -                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> +                       INSN_NOP_M,
>                         tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29,
>                                     TCG_REG_R2, addr_reg),
> -                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
> +                       INSN_NOP_I);
>      }
>  
>      if (bswap) {
>          if (s_bits == MO_16) {
>              tcg_out_bundle(s, mII,
> -                           tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> +                           INSN_NOP_M,
>                             tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
>                                         TCG_REG_R3, data_reg, 15, 15),
>                             tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
> @@ -1919,7 +1922,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
>              data_reg = TCG_REG_R3;
>          } else if (s_bits == MO_32) {
>              tcg_out_bundle(s, mII,
> -                           tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> +                           INSN_NOP_M,
>                             tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
>                                         TCG_REG_R3, data_reg, 31, 31),
>                             tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
> @@ -1927,8 +1930,8 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
>              data_reg = TCG_REG_R3;
>          } else if (s_bits == MO_64) {
>              tcg_out_bundle(s, miI,
> -                           tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> -                           tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
> +                           INSN_NOP_M,
> +                           INSN_NOP_I,
>                             tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
>                                         TCG_REG_R3, data_reg, 0xb));
>              data_reg = TCG_REG_R3;
> @@ -1937,15 +1940,15 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
>      tcg_out_bundle(s, mmI,
>                     tcg_opc_m4 (TCG_REG_P0, opc_st_m4[s_bits],
>                                 data_reg, TCG_REG_R2),
> -                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> -                   tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
> +                   INSN_NOP_M,
> +                   INSN_NOP_I);
>  #else
>      if (GUEST_BASE != 0) {
>          add_guest_base = tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2,
>                                       TCG_GUEST_BASE_REG, addr_reg);
>          addr_reg = TCG_REG_R2;
>      } else {
> -        add_guest_base = tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0);
> +        add_guest_base = INSN_NOP_M;
>      }
>  
>      if (!bswap) {
> @@ -1953,7 +1956,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
>                         add_guest_base,
>                         tcg_opc_m4 (TCG_REG_P0, opc_st_m4[s_bits],
>                                     data_reg, addr_reg),
> -                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
> +                       INSN_NOP_I);
>      } else {
>          if (s_bits == MO_16) {
>              tcg_out_bundle(s, mII,
> @@ -1974,7 +1977,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
>          } else if (s_bits == MO_64) {
>              tcg_out_bundle(s, miI,
>                             add_guest_base,
> -                           tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
> +                           INSN_NOP_I,
>                             tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
>                                         TCG_REG_R3, data_reg, 0xb));
>              data_reg = TCG_REG_R3;
> @@ -1982,8 +1985,8 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
>          tcg_out_bundle(s, miI,
>                         tcg_opc_m4 (TCG_REG_P0, opc_st_m4[s_bits],
>                                     data_reg, addr_reg),
> -                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
> -                       tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
> +                       INSN_NOP_I,
> +                       INSN_NOP_I);
>      }
>  #endif
>  }
> @@ -2402,7 +2405,7 @@ static void tcg_target_qemu_prologue(TCGContext *s)
>         an ADDL in the M slot of the next bundle.  */
>      if (GUEST_BASE != 0) {
>          tcg_out_bundle(s, mlx,
> -                       tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> +                       INSN_NOP_M,
>                         tcg_opc_l2 (GUEST_BASE),
>                         tcg_opc_x2 (TCG_REG_P0, OPC_MOVL_X2,
>                                     TCG_GUEST_BASE_REG, GUEST_BASE));
> @@ -2419,13 +2422,13 @@ static void tcg_target_qemu_prologue(TCGContext *s)
>      /* epilogue */
>      tb_ret_addr = s->code_ptr;
>      tcg_out_bundle(s, miI,
> -                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> +                   INSN_NOP_M,
>                     tcg_opc_i21(TCG_REG_P0, OPC_MOV_I21,
>                                 TCG_REG_B0, TCG_REG_R32, 0),
>                     tcg_opc_a4 (TCG_REG_P0, OPC_ADDS_A4,
>                                 TCG_REG_R12, frame_size, TCG_REG_R12));
>      tcg_out_bundle(s, miB,
> -                   tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> +                   INSN_NOP_M,
>                     tcg_opc_i26(TCG_REG_P0, OPC_MOV_I_I26,
>                                 TCG_REG_PFS, TCG_REG_R34),
>                     tcg_opc_b4 (TCG_REG_P0, OPC_BR_RET_SPTK_MANY_B4,

Acked-by: Aurelien Jarno <aurelien@aurel32.net>


-- 
Aurelien Jarno                          GPG: 1024D/F1BCDB73
aurelien@aurel32.net                 http://www.aurel32.net

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [Qemu-devel] [PATCH 03/20] tcg-ia64: Handle constant calls
  2013-10-31 20:21 ` [Qemu-devel] [PATCH 03/20] tcg-ia64: Handle constant calls Richard Henderson
@ 2013-11-03 22:56   ` Aurelien Jarno
  0 siblings, 0 replies; 39+ messages in thread
From: Aurelien Jarno @ 2013-11-03 22:56 UTC (permalink / raw
  To: Richard Henderson; +Cc: qemu-devel, aliguori

On Thu, Oct 31, 2013 at 01:21:52PM -0700, Richard Henderson wrote:
> Using only indirect calls results in 3 bundles (one to load the
> descriptor address), and 4 stop bits.  By looking through the
> descriptor to the constants, we can perform the call with 2
> bundles and only 1 stop bit.
> 
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  tcg/ia64/tcg-target.c | 38 +++++++++++++++++++++++++++++++++++---
>  1 file changed, 35 insertions(+), 3 deletions(-)
> 
> diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
> index e1b8b7d..b19e298 100644
> --- a/tcg/ia64/tcg-target.c
> +++ b/tcg/ia64/tcg-target.c
> @@ -226,6 +226,7 @@ enum {
>      OPC_BR_CALL_SPTK_MANY_B5  = 0x02100001000ull,
>      OPC_BR_RET_SPTK_MANY_B4   = 0x00108001100ull,
>      OPC_BRL_SPTK_MANY_X3      = 0x18000001000ull,
> +    OPC_BRL_CALL_SPTK_MANY_X4 = 0x1a000001000ull,
>      OPC_CMP_LT_A6             = 0x18000000000ull,
>      OPC_CMP_LTU_A6            = 0x1a000000000ull,
>      OPC_CMP_EQ_A6             = 0x1c000000000ull,
> @@ -584,6 +585,8 @@ static inline uint64_t tcg_opc_l3(uint64_t imm)
>      return (imm & 0x07fffffffff00000ull) >> 18;
>  }
>  
> +#define tcg_opc_l4  tcg_opc_l3
> +
>  static inline uint64_t tcg_opc_m1(int qp, uint64_t opc, int r1, int r3)
>  {
>      return opc
> @@ -668,6 +671,15 @@ static inline uint64_t tcg_opc_x3(int qp, uint64_t opc, uint64_t imm)
>             | (qp & 0x3f);
>  }
>  
> +static inline uint64_t tcg_opc_x4(int qp, uint64_t opc, int b1, uint64_t imm)
> +{
> +    return opc
> +           | ((imm & 0x0800000000000000ull) >> 23) /* i */
> +           | ((imm & 0x00000000000fffffull) << 13) /* imm20b */
> +           | ((b1 & 0x7) << 6)
> +           | (qp & 0x3f);
> +}
> +
>  
>  /*
>   * Relocations
> @@ -893,7 +905,23 @@ static void tcg_out_br(TCGContext *s, int label_index)
>      }
>  }
>  
> -static inline void tcg_out_call(TCGContext *s, TCGArg addr)
> +static inline void tcg_out_calli(TCGContext *s, uintptr_t addr)
> +{
> +    /* Look through the function descriptor.  */
> +    uintptr_t disp, *desc = (uintptr_t *)addr;
> +    tcg_out_bundle(s, mlx,
> +                   INSN_NOP_M,
> +                   tcg_opc_l2 (desc[1]),
> +                   tcg_opc_x2 (TCG_REG_P0, OPC_MOVL_X2, TCG_REG_R1, desc[1]));
> +    disp = (desc[0] - (uintptr_t)s->code_ptr) >> 4;
> +    tcg_out_bundle(s, mLX,
> +                   INSN_NOP_M,
> +                   tcg_opc_l4 (disp),
> +                   tcg_opc_x4 (TCG_REG_P0, OPC_BRL_CALL_SPTK_MANY_X4,
> +                               TCG_REG_B0, disp));
> +}
> +
> +static inline void tcg_out_callr(TCGContext *s, TCGReg addr)
>  {
>      tcg_out_bundle(s, MmI,
>                     tcg_opc_m1 (TCG_REG_P0, OPC_LD8_M1, TCG_REG_R2, addr),
> @@ -2004,7 +2032,11 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
>          tcg_out_br(s, args[0]);
>          break;
>      case INDEX_op_call:
> -        tcg_out_call(s, args[0]);
> +        if (likely(const_args[0])) {
> +            tcg_out_calli(s, args[0]);
> +        } else {
> +            tcg_out_callr(s, args[0]);
> +        }
>          break;
>      case INDEX_op_goto_tb:
>          tcg_out_goto_tb(s, args[0]);
> @@ -2257,7 +2289,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
>  
>  static const TCGTargetOpDef ia64_op_defs[] = {
>      { INDEX_op_br, { } },
> -    { INDEX_op_call, { "r" } },
> +    { INDEX_op_call, { "ri" } },
>      { INDEX_op_exit_tb, { } },
>      { INDEX_op_goto_tb, { } },
>  

Acked-by: Aurelien Jarno <aurelien@aurel32.net>

-- 
Aurelien Jarno                          GPG: 1024D/F1BCDB73
aurelien@aurel32.net                 http://www.aurel32.net

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [Qemu-devel] [PATCH 04/20] tcg-ia64: Simplify brcond
  2013-10-31 20:21 ` [Qemu-devel] [PATCH 04/20] tcg-ia64: Simplify brcond Richard Henderson
@ 2013-11-03 22:56   ` Aurelien Jarno
  2013-11-03 23:34     ` Richard Henderson
  0 siblings, 1 reply; 39+ messages in thread
From: Aurelien Jarno @ 2013-11-03 22:56 UTC (permalink / raw
  To: Richard Henderson; +Cc: qemu-devel, aliguori

On Thu, Oct 31, 2013 at 01:21:53PM -0700, Richard Henderson wrote:
> There was a misconception that a stop bit is required between a compare
> and the branch that uses the predicate set by the compare.  This lead to

This seems to be true.

> the usage of an extra bundle in which to perform the compare.  The extra
> bundle left room for constants to be loaded for use with the compare insn.
> 
> If we pack the compare and the branch together in the same bundle, then
> there's no longer any room for non-zero constants.  At which point we
> can eliminate half the function by not handling them.

That said the implementation is likely wrong as with this patch applied,
qemu-system-x86_64 is not even able to execute seabios to the first
printed message. Please do at least basic testing.

> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  tcg/ia64/tcg-target.c | 42 +++++++++---------------------------------
>  1 file changed, 9 insertions(+), 33 deletions(-)
> 
> diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
> index b19e298..2fdc38a5 100644
> --- a/tcg/ia64/tcg-target.c
> +++ b/tcg/ia64/tcg-target.c
> @@ -1444,38 +1444,16 @@ static inline uint64_t tcg_opc_cmp_a(int qp, TCGCond cond, TCGArg arg1,
>      }
>  }
>  
> -static inline void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGArg arg1,
> -                                  int const_arg1, TCGArg arg2, int const_arg2,
> -                                  int label_index, int cmp4)
> +static inline void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1,
> +                                  TCGReg arg2, int label_index, int cmp4)
>  {
>      TCGLabel *l = &s->labels[label_index];
> -    uint64_t opc1, opc2;
>  
> -    if (const_arg1 && arg1 != 0) {
> -        opc1 = tcg_opc_a5(TCG_REG_P0, OPC_ADDL_A5, TCG_REG_R2,
> -                          arg1, TCG_REG_R0);
> -        arg1 = TCG_REG_R2;
> -    } else {
> -        opc1 = INSN_NOP_M;
> -    }
> -
> -    if (const_arg2 && arg2 != 0) {
> -        opc2 = tcg_opc_a5(TCG_REG_P0, OPC_ADDL_A5, TCG_REG_R3,
> -                          arg2, TCG_REG_R0);
> -        arg2 = TCG_REG_R3;
> -    } else {
> -        opc2 = INSN_NOP_I;
> -    }
> -
> -    tcg_out_bundle(s, mII,
> -                   opc1,
> -                   opc2,
> -                   tcg_opc_cmp_a(TCG_REG_P0, cond, arg1, arg2, cmp4));
> -    tcg_out_bundle(s, mmB,
> -                   INSN_NOP_M,
> +    tcg_out_bundle(s, miB,
>                     INSN_NOP_M,
> -                   tcg_opc_b1 (TCG_REG_P6, OPC_BR_DPTK_FEW_B1,
> -                               get_reloc_pcrel21b(s->code_ptr + 2)));
> +                   tcg_opc_cmp_a(TCG_REG_P0, cond, arg1, arg2, cmp4),
> +                   tcg_opc_b1(TCG_REG_P6, OPC_BR_DPTK_FEW_B1,
> +                              get_reloc_pcrel21b(s->code_ptr + 2)));
>  
>      if (l->has_value) {
>          reloc_pcrel21b((s->code_ptr - 16) + 2, l->u.value);
> @@ -2224,12 +2202,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
>          break;
>  
>      case INDEX_op_brcond_i32:
> -        tcg_out_brcond(s, args[2], args[0], const_args[0],
> -                       args[1], const_args[1], args[3], 1);
> +        tcg_out_brcond(s, args[2], args[0], args[1], args[3], 1);
>          break;
>      case INDEX_op_brcond_i64:
> -        tcg_out_brcond(s, args[2], args[0], const_args[0],
> -                       args[1], const_args[1], args[3], 0);
> +        tcg_out_brcond(s, args[2], args[0], args[1], args[3], 0);
>          break;
>      case INDEX_op_setcond_i32:
>          tcg_out_setcond(s, args[3], args[0], args[1], args[2], 1);
> @@ -2333,7 +2309,7 @@ static const TCGTargetOpDef ia64_op_defs[] = {
>      { INDEX_op_bswap16_i32, { "r", "rZ" } },
>      { INDEX_op_bswap32_i32, { "r", "rZ" } },
>  
> -    { INDEX_op_brcond_i32, { "rI", "rI" } },
> +    { INDEX_op_brcond_i32, { "rZ", "rZ" } },
>      { INDEX_op_setcond_i32, { "r", "rZ", "rZ" } },
>      { INDEX_op_movcond_i32, { "r", "rZ", "rZ", "rI", "rI" } },
>  
> -- 
> 1.8.3.1
> 
> 

-- 
Aurelien Jarno                          GPG: 1024D/F1BCDB73
aurelien@aurel32.net                 http://www.aurel32.net

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [Qemu-devel] [PATCH 04/20] tcg-ia64: Simplify brcond
  2013-11-03 22:56   ` Aurelien Jarno
@ 2013-11-03 23:34     ` Richard Henderson
  2013-11-04  4:24       ` Richard Henderson
  2013-11-06 22:04       ` Aurelien Jarno
  0 siblings, 2 replies; 39+ messages in thread
From: Richard Henderson @ 2013-11-03 23:34 UTC (permalink / raw
  To: Aurelien Jarno; +Cc: qemu-devel, aliguori

On 11/03/2013 12:56 PM, Aurelien Jarno wrote:
> That said the implementation is likely wrong as with this patch applied,
> qemu-system-x86_64 is not even able to execute seabios to the first
> printed message. Please do at least basic testing.

Really?  I'll look into it.

I do basic testing -- arm, sparc, alpha.  Those are the ones that can
usefully boot via serial console instead of having to set up a tunnel
for a vnc viewer.


r~

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [Qemu-devel] [PATCH 04/20] tcg-ia64: Simplify brcond
  2013-11-03 23:34     ` Richard Henderson
@ 2013-11-04  4:24       ` Richard Henderson
  2013-11-06 22:05         ` Aurelien Jarno
  2013-11-06 22:04       ` Aurelien Jarno
  1 sibling, 1 reply; 39+ messages in thread
From: Richard Henderson @ 2013-11-04  4:24 UTC (permalink / raw
  To: Aurelien Jarno; +Cc: qemu-devel, aliguori

On 11/04/2013 09:34 AM, Richard Henderson wrote:
> On 11/03/2013 12:56 PM, Aurelien Jarno wrote:
>> That said the implementation is likely wrong as with this patch applied,
>> qemu-system-x86_64 is not even able to execute seabios to the first
>> printed message. Please do at least basic testing.
> 
> Really?  I'll look into it.
> 
> I do basic testing -- arm, sparc, alpha.  Those are the ones that can
> usefully boot via serial console instead of having to set up a tunnel
> for a vnc viewer.

Yes indeed, there's a bug.  This additional patch is required:

diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index 2fdc38a..e0413d9 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -2359,7 +2359,7 @@ static const TCGTargetOpDef ia64_op_defs[] = {
     { INDEX_op_bswap32_i64, { "r", "rZ" } },
     { INDEX_op_bswap64_i64, { "r", "rZ" } },

-    { INDEX_op_brcond_i64, { "rI", "rI" } },
+    { INDEX_op_brcond_i64, { "rZ", "rZ" } },
     { INDEX_op_setcond_i64, { "r", "rZ", "rZ" } },
     { INDEX_op_movcond_i64, { "r", "rZ", "rZ", "rI", "rI" } },


I managed to change the constraints for brcond_i32 but forget its brother.
Naturally, arm and sparc don't excercise brcond_i64, and as it happens alpha
would have only generated brcond_i64 vs zero and not seen the problem.

Will be fixed in v3.  Do you see anything else in the patch set?


r~

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* Re: [Qemu-devel] [PATCH 04/20] tcg-ia64: Simplify brcond
  2013-11-03 23:34     ` Richard Henderson
  2013-11-04  4:24       ` Richard Henderson
@ 2013-11-06 22:04       ` Aurelien Jarno
  1 sibling, 0 replies; 39+ messages in thread
From: Aurelien Jarno @ 2013-11-06 22:04 UTC (permalink / raw
  To: Richard Henderson; +Cc: qemu-devel, aliguori

On Sun, Nov 03, 2013 at 01:34:12PM -1000, Richard Henderson wrote:
> On 11/03/2013 12:56 PM, Aurelien Jarno wrote:
> > That said the implementation is likely wrong as with this patch applied,
> > qemu-system-x86_64 is not even able to execute seabios to the first
> > printed message. Please do at least basic testing.
> 
> Really?  I'll look into it.
> 
> I do basic testing -- arm, sparc, alpha.  Those are the ones that can
> usefully boot via serial console instead of having to set up a tunnel
> for a vnc viewer.

If you don't want to setup a VNC tunnel, you can also uses -curses
(provide the curses support is compiled) for i386 and x86-64.

-- 
Aurelien Jarno                          GPG: 1024D/F1BCDB73
aurelien@aurel32.net                 http://www.aurel32.net

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [Qemu-devel] [PATCH 04/20] tcg-ia64: Simplify brcond
  2013-11-04  4:24       ` Richard Henderson
@ 2013-11-06 22:05         ` Aurelien Jarno
  0 siblings, 0 replies; 39+ messages in thread
From: Aurelien Jarno @ 2013-11-06 22:05 UTC (permalink / raw
  To: Richard Henderson; +Cc: qemu-devel, aliguori

On Mon, Nov 04, 2013 at 02:24:59PM +1000, Richard Henderson wrote:
> On 11/04/2013 09:34 AM, Richard Henderson wrote:
> > On 11/03/2013 12:56 PM, Aurelien Jarno wrote:
> >> That said the implementation is likely wrong as with this patch applied,
> >> qemu-system-x86_64 is not even able to execute seabios to the first
> >> printed message. Please do at least basic testing.
> > 
> > Really?  I'll look into it.
> > 
> > I do basic testing -- arm, sparc, alpha.  Those are the ones that can
> > usefully boot via serial console instead of having to set up a tunnel
> > for a vnc viewer.
> 
> Yes indeed, there's a bug.  This additional patch is required:
> 
> diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
> index 2fdc38a..e0413d9 100644
> --- a/tcg/ia64/tcg-target.c
> +++ b/tcg/ia64/tcg-target.c
> @@ -2359,7 +2359,7 @@ static const TCGTargetOpDef ia64_op_defs[] = {
>      { INDEX_op_bswap32_i64, { "r", "rZ" } },
>      { INDEX_op_bswap64_i64, { "r", "rZ" } },
> 
> -    { INDEX_op_brcond_i64, { "rI", "rI" } },
> +    { INDEX_op_brcond_i64, { "rZ", "rZ" } },
>      { INDEX_op_setcond_i64, { "r", "rZ", "rZ" } },
>      { INDEX_op_movcond_i64, { "r", "rZ", "rZ", "rI", "rI" } },
> 
> 
> I managed to change the constraints for brcond_i32 but forget its brother.
> Naturally, arm and sparc don't excercise brcond_i64, and as it happens alpha
> would have only generated brcond_i64 vs zero and not seen the problem.

I confirm this fixes the problem, provided that you squash the patch
above in the patch:

Acked-by: Aurelien Jarno <aurelien@aurel32.net>

> Will be fixed in v3.  Do you see anything else in the patch set?

I am still looking at it, trying to bisect regressions.

-- 
Aurelien Jarno                          GPG: 1024D/F1BCDB73
aurelien@aurel32.net                 http://www.aurel32.net

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [Qemu-devel] [PATCH 01/20] tcg-ia64: Use TCGMemOp within qemu_ldst routines
  2013-11-03 22:55   ` Aurelien Jarno
@ 2013-11-06 22:05     ` Aurelien Jarno
  0 siblings, 0 replies; 39+ messages in thread
From: Aurelien Jarno @ 2013-11-06 22:05 UTC (permalink / raw
  To: Richard Henderson; +Cc: qemu-devel, aliguori

On Sun, Nov 03, 2013 at 11:55:55PM +0100, Aurelien Jarno wrote:
> On Thu, Oct 31, 2013 at 01:21:50PM -0700, Richard Henderson wrote:
> > Signed-off-by: Richard Henderson <rth@twiddle.net>
> > ---
> >  tcg/ia64/tcg-target.c | 171 ++++++++++++++++++++++++++------------------------
> >  1 file changed, 90 insertions(+), 81 deletions(-)
> > 
> > diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
> > index 0656d39..e09ad24 100644
> > --- a/tcg/ia64/tcg-target.c
> > +++ b/tcg/ia64/tcg-target.c
> > @@ -1496,7 +1496,7 @@ static inline void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGArg ret,
> >     R2 is loaded with the address of the addend TLB entry.
> >     R57 is loaded with the address, zero extented on 32-bit targets. */
> >  static inline void tcg_out_qemu_tlb(TCGContext *s, TCGArg addr_reg,
> > -                                    int s_bits, uint64_t offset_rw,
> > +                                    TCGMemOp s_bits, uint64_t offset_rw,
> >                                      uint64_t offset_addend)
> >  {
> >      tcg_out_bundle(s, mII,
> > @@ -1538,23 +1538,24 @@ static const void * const qemu_ld_helpers[4] = {
> >      helper_ldq_mmu,
> >  };
> >  
> > -static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
> > +static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
> > +                                   TCGMemOp opc)
> >  {
> > -    int addr_reg, data_reg, mem_index, s_bits, bswap;
> > -    uint64_t opc_ld_m1[4] = { OPC_LD1_M1, OPC_LD2_M1, OPC_LD4_M1, OPC_LD8_M1 };
> > -    uint64_t opc_ext_i29[8] = { OPC_ZXT1_I29, OPC_ZXT2_I29, OPC_ZXT4_I29, 0,
> > -                                OPC_SXT1_I29, OPC_SXT2_I29, OPC_SXT4_I29, 0 };
> > +    static const uint64_t opc_ld_m1[4] = {
> > +        OPC_LD1_M1, OPC_LD2_M1, OPC_LD4_M1, OPC_LD8_M1
> > +    };
> > +    static const uint64_t opc_ext_i29[8] = {
> > +        OPC_ZXT1_I29, OPC_ZXT2_I29, OPC_ZXT4_I29, 0,
> > +        OPC_SXT1_I29, OPC_SXT2_I29, OPC_SXT4_I29, 0
> > +    };
> > +    int addr_reg, data_reg, mem_index;
> > +    TCGMemOp s_bits, bswap;
> >  
> >      data_reg = *args++;
> >      addr_reg = *args++;
> >      mem_index = *args;
> > -    s_bits = opc & 3;
> > -
> > -#ifdef TARGET_WORDS_BIGENDIAN
> > -    bswap = 1;
> > -#else
> > -    bswap = 0;
> > -#endif
> > +    s_bits = opc & MO_SIZE;
> > +    bswap = opc & MO_BSWAP;
> >  
> >      /* Read the TLB entry */
> >      tcg_out_qemu_tlb(s, addr_reg, s_bits,
> > @@ -1575,14 +1576,14 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
> >                                 TCG_REG_R3, TCG_REG_R57),
> >                     tcg_opc_i21(TCG_REG_P7, OPC_MOV_I21, TCG_REG_B6,
> >                                 TCG_REG_R3, 0));
> > -    if (bswap && s_bits == 1) {
> > +    if (bswap && s_bits == MO_16) {
> >          tcg_out_bundle(s, MmI,
> >                         tcg_opc_m1 (TCG_REG_P6, opc_ld_m1[s_bits],
> >                                     TCG_REG_R8, TCG_REG_R3),
> >                         tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R2),
> >                         tcg_opc_i12(TCG_REG_P6, OPC_DEP_Z_I12,
> >                                     TCG_REG_R8, TCG_REG_R8, 15, 15));
> > -    } else if (bswap && s_bits == 2) {
> > +    } else if (bswap && s_bits == MO_32) {
> >          tcg_out_bundle(s, MmI,
> >                         tcg_opc_m1 (TCG_REG_P6, opc_ld_m1[s_bits],
> >                                     TCG_REG_R8, TCG_REG_R3),
> > @@ -1596,7 +1597,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
> >                         tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R2),
> >                         tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
> >      }
> > -    if (!bswap || s_bits == 0) {
> > +    if (!bswap) {
> >          tcg_out_bundle(s, miB,
> >                         tcg_opc_a5 (TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R58,
> >                                     mem_index, TCG_REG_R0),
> > @@ -1613,7 +1614,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
> >                                     TCG_REG_B0, TCG_REG_B6));
> >      }
> >  
> > -    if (opc == 3) {
> > +    if (s_bits == MO_64) {
> >          tcg_out_bundle(s, miI,
> >                         tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> >                         tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
> > @@ -1623,7 +1624,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
> >          tcg_out_bundle(s, miI,
> >                         tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> >                         tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
> > -                       tcg_opc_i29(TCG_REG_P0, opc_ext_i29[opc],
> > +                       tcg_opc_i29(TCG_REG_P0, opc_ext_i29[opc & MO_SSIZE],
> >                                     data_reg, TCG_REG_R8));
> >      }
> >  }
> > @@ -1637,22 +1638,21 @@ static const void * const qemu_st_helpers[4] = {
> >      helper_stq_mmu,
> >  };
> >  
> > -static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
> > +static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
> > +                                   TCGMemOp opc)
> >  {
> > -    int addr_reg, data_reg, mem_index, bswap;
> > -    uint64_t opc_st_m4[4] = { OPC_ST1_M4, OPC_ST2_M4, OPC_ST4_M4, OPC_ST8_M4 };
> > +    static const uint64_t opc_st_m4[4] = {
> > +        OPC_ST1_M4, OPC_ST2_M4, OPC_ST4_M4, OPC_ST8_M4
> > +    };
> > +    int addr_reg, data_reg, mem_index;
> > +    TCGMemOp s_bits;
> >  
> >      data_reg = *args++;
> >      addr_reg = *args++;
> >      mem_index = *args;
> > +    s_bits = opc & MO_SIZE;
> >  
> > -#ifdef TARGET_WORDS_BIGENDIAN
> > -    bswap = 1;
> > -#else
> > -    bswap = 0;
> > -#endif
> > -
> > -    tcg_out_qemu_tlb(s, addr_reg, opc,
> > +    tcg_out_qemu_tlb(s, addr_reg, s_bits,
> >                       offsetof(CPUArchState, tlb_table[mem_index][0].addr_write),
> >                       offsetof(CPUArchState, tlb_table[mem_index][0].addend));
> >  
> > @@ -1660,9 +1660,9 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
> >      tcg_out_bundle(s, mLX,
> >                     tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4,
> >                                 TCG_REG_R56, 0, TCG_AREG0),
> > -                   tcg_opc_l2 ((tcg_target_long) qemu_st_helpers[opc]),
> > +                   tcg_opc_l2 ((tcg_target_long) qemu_st_helpers[s_bits]),
> >                     tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R2,
> > -                               (tcg_target_long) qemu_st_helpers[opc]));
> > +                               (tcg_target_long) qemu_st_helpers[s_bits]));
> >      tcg_out_bundle(s, MmI,
> >                     tcg_opc_m3 (TCG_REG_P0, OPC_LD8_M3, TCG_REG_R3,
> >                                 TCG_REG_R2, 8),
> > @@ -1671,14 +1671,20 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
> >                     tcg_opc_i21(TCG_REG_P7, OPC_MOV_I21, TCG_REG_B6,
> >                                 TCG_REG_R3, 0));
> >  
> > -    if (!bswap || opc == 0) {
> > +    switch (opc) {
> > +    case MO_8:
> > +    case MO_16:
> > +    case MO_32:
> > +    case MO_64:
> >          tcg_out_bundle(s, mii,
> >                         tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1,
> >                                     TCG_REG_R1, TCG_REG_R2),
> >                         tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, TCG_REG_R58,
> >                                     0, data_reg),
> >                         tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
> > -    } else if (opc == 1) {
> > +        break;
> > +
> > +    case MO_16 | MO_BSWAP:
> >          tcg_out_bundle(s, miI,
> >                         tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1,
> >                                     TCG_REG_R1, TCG_REG_R2),
> > @@ -1692,7 +1698,9 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
> >                         tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3,
> >                                     TCG_REG_R2, TCG_REG_R2, 0xb));
> >          data_reg = TCG_REG_R2;
> > -    } else if (opc == 2) {
> > +        break;
> > +
> > +    case MO_32 | MO_BSWAP:
> >          tcg_out_bundle(s, miI,
> >                         tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1,
> >                                     TCG_REG_R1, TCG_REG_R2),
> > @@ -1706,7 +1714,9 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
> >                         tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3,
> >                                     TCG_REG_R2, TCG_REG_R2, 0xb));
> >          data_reg = TCG_REG_R2;
> > -    } else if (opc == 3) {
> > +        break;
> > +
> > +    case MO_64 | MO_BSWAP:
> >          tcg_out_bundle(s, miI,
> >                         tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1,
> >                                     TCG_REG_R1, TCG_REG_R2),
> > @@ -1715,6 +1725,10 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
> >                         tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3,
> >                                     TCG_REG_R2, data_reg, 0xb));
> >          data_reg = TCG_REG_R2;
> > +        break;
> > +
> > +    default:
> > +        tcg_abort();
> >      }
> >  
> >      tcg_out_bundle(s, miB,
> > @@ -1728,7 +1742,8 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
> >  
> >  #else /* !CONFIG_SOFTMMU */
> >  
> > -static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
> > +static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
> > +                                   TCGMemOp opc)
> >  {
> >      static uint64_t const opc_ld_m1[4] = {
> >          OPC_LD1_M1, OPC_LD2_M1, OPC_LD4_M1, OPC_LD8_M1
> > @@ -1736,17 +1751,13 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
> >      static uint64_t const opc_sxt_i29[4] = {
> >          OPC_SXT1_I29, OPC_SXT2_I29, OPC_SXT4_I29, 0
> >      };
> > -    int addr_reg, data_reg, s_bits, bswap;
> > +    int addr_reg, data_reg;
> > +    TCGMemOp s_bits, bswap;
> >  
> >      data_reg = *args++;
> >      addr_reg = *args++;
> > -    s_bits = opc & 3;
> > -
> > -#ifdef TARGET_WORDS_BIGENDIAN
> > -    bswap = 1;
> > -#else
> > -    bswap = 0;
> > -#endif
> > +    s_bits = opc & MO_SIZE;
> > +    bswap = opc & MO_BSWAP;
> >  
> >  #if TARGET_LONG_BITS == 32
> >      if (GUEST_BASE != 0) {
> > @@ -1764,8 +1775,8 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
> >                         tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
> >      }
> >  
> > -    if (!bswap || s_bits == 0) {
> > -        if (s_bits == opc) {
> > +    if (!bswap) {
> > +        if (!(opc & MO_SIGN)) {
> >              tcg_out_bundle(s, miI,
> >                             tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits],
> >                                         data_reg, TCG_REG_R2),
> > @@ -1779,7 +1790,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
> >                             tcg_opc_i29(TCG_REG_P0, opc_sxt_i29[s_bits],
> >                                         data_reg, data_reg));
> >          }
> > -    } else if (s_bits == 3) {
> > +    } else if (s_bits == MO_64) {
> >              tcg_out_bundle(s, mII,
> >                             tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits],
> >                                         data_reg, TCG_REG_R2),
> > @@ -1787,7 +1798,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
> >                             tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
> >                                         data_reg, data_reg, 0xb));
> >      } else {
> > -        if (s_bits == 1) {
> > +        if (s_bits == MO_16) {
> >              tcg_out_bundle(s, mII,
> >                             tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits],
> >                                         data_reg, TCG_REG_R2),
> > @@ -1802,7 +1813,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
> >                             tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
> >                                        data_reg, data_reg, 31, 31));
> >          }
> > -        if (opc == s_bits) {
> > +        if (!(opc & MO_SIGN)) {
> >              tcg_out_bundle(s, miI,
> >                             tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> >                             tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
> > @@ -1833,28 +1844,28 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
> >                         tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
> >      }
> >  
> > -    if (bswap && s_bits == 1) {
> > +    if (bswap && s_bits == MO_16) {
> >          tcg_out_bundle(s, mII,
> >                         tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> >                         tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
> >                                     data_reg, data_reg, 15, 15),
> >                         tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
> >                                     data_reg, data_reg, 0xb));
> > -    } else if (bswap && s_bits == 2) {
> > +    } else if (bswap && s_bits == MO_32) {
> >          tcg_out_bundle(s, mII,
> >                         tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> >                         tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
> >                                     data_reg, data_reg, 31, 31),
> >                         tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
> >                                     data_reg, data_reg, 0xb));
> > -    } else if (bswap && s_bits == 3) {
> > +    } else if (bswap && s_bits == MO_64) {
> >          tcg_out_bundle(s, miI,
> >                         tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> >                         tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
> >                         tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
> >                                     data_reg, data_reg, 0xb));
> >      }
> > -    if (s_bits != opc) {
> > +    if (opc & MO_SIGN) {
> >          tcg_out_bundle(s, miI,
> >                         tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> >                         tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
> > @@ -1864,24 +1875,22 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
> >  #endif
> >  }
> >  
> > -static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
> > +static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
> > +                                   TCGMemOp opc)
> >  {
> >      static uint64_t const opc_st_m4[4] = {
> >          OPC_ST1_M4, OPC_ST2_M4, OPC_ST4_M4, OPC_ST8_M4
> >      };
> > -    int addr_reg, data_reg, bswap;
> > +    int addr_reg, data_reg;
> >  #if TARGET_LONG_BITS == 64
> >      uint64_t add_guest_base;
> >  #endif
> > +    TCGMemOp s_bits, bswap;
> >  
> >      data_reg = *args++;
> >      addr_reg = *args++;
> > -
> > -#ifdef TARGET_WORDS_BIGENDIAN
> > -    bswap = 1;
> > -#else
> > -    bswap = 0;
> > -#endif
> > +    s_bits = opc & MO_SIZE;
> > +    bswap = opc & MO_BSWAP;
> >  
> >  #if TARGET_LONG_BITS == 32
> >      if (GUEST_BASE != 0) {
> > @@ -1900,7 +1909,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
> >      }
> >  
> >      if (bswap) {
> > -        if (opc == 1) {
> > +        if (s_bits == MO_16) {
> >              tcg_out_bundle(s, mII,
> >                             tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> >                             tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
> > @@ -1908,7 +1917,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
> >                             tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
> >                                         TCG_REG_R3, TCG_REG_R3, 0xb));
> >              data_reg = TCG_REG_R3;
> > -        } else if (opc == 2) {
> > +        } else if (s_bits == MO_32) {
> >              tcg_out_bundle(s, mII,
> >                             tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> >                             tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
> > @@ -1916,7 +1925,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
> >                             tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
> >                                         TCG_REG_R3, TCG_REG_R3, 0xb));
> >              data_reg = TCG_REG_R3;
> > -        } else if (opc == 3) {
> > +        } else if (s_bits == MO_64) {
> >              tcg_out_bundle(s, miI,
> >                             tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> >                             tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
> > @@ -1926,7 +1935,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
> >          }
> >      }
> >      tcg_out_bundle(s, mmI,
> > -                   tcg_opc_m4 (TCG_REG_P0, opc_st_m4[opc],
> > +                   tcg_opc_m4 (TCG_REG_P0, opc_st_m4[s_bits],
> >                                 data_reg, TCG_REG_R2),
> >                     tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
> >                     tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
> > @@ -1939,14 +1948,14 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
> >          add_guest_base = tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0);
> >      }
> >  
> > -    if (!bswap || opc == 0) {
> > +    if (!bswap) {
> >          tcg_out_bundle(s, (GUEST_BASE ? MmI : mmI),
> >                         add_guest_base,
> > -                       tcg_opc_m4 (TCG_REG_P0, opc_st_m4[opc],
> > +                       tcg_opc_m4 (TCG_REG_P0, opc_st_m4[s_bits],
> >                                     data_reg, addr_reg),
> >                         tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
> >      } else {
> > -        if (opc == 1) {
> > +        if (s_bits == MO_16) {
> >              tcg_out_bundle(s, mII,
> >                             add_guest_base,
> >                             tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
> > @@ -1954,7 +1963,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
> >                             tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
> >                                         TCG_REG_R3, TCG_REG_R3, 0xb));
> >              data_reg = TCG_REG_R3;
> > -        } else if (opc == 2) {
> > +        } else if (s_bits == MO_32) {
> >              tcg_out_bundle(s, mII,
> >                             add_guest_base,
> >                             tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
> > @@ -1962,7 +1971,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
> >                             tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
> >                                         TCG_REG_R3, TCG_REG_R3, 0xb));
> >              data_reg = TCG_REG_R3;
> > -        } else if (opc == 3) {
> > +        } else if (s_bits == MO_64) {
> >              tcg_out_bundle(s, miI,
> >                             add_guest_base,
> >                             tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
> > @@ -1971,7 +1980,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
> >              data_reg = TCG_REG_R3;
> >          }
> >          tcg_out_bundle(s, miI,
> > -                       tcg_opc_m4 (TCG_REG_P0, opc_st_m4[opc],
> > +                       tcg_opc_m4 (TCG_REG_P0, opc_st_m4[s_bits],
> >                                     data_reg, addr_reg),
> >                         tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
> >                         tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
> > @@ -2203,39 +2212,39 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
> >          break;
> >  
> >      case INDEX_op_qemu_ld8u:
> > -        tcg_out_qemu_ld(s, args, 0);
> > +        tcg_out_qemu_ld(s, args, MO_UB);
> >          break;
> >      case INDEX_op_qemu_ld8s:
> > -        tcg_out_qemu_ld(s, args, 0 | 4);
> > +        tcg_out_qemu_ld(s, args, MO_SB);
> >          break;
> >      case INDEX_op_qemu_ld16u:
> > -        tcg_out_qemu_ld(s, args, 1);
> > +        tcg_out_qemu_ld(s, args, MO_TEUW);
> >          break;
> >      case INDEX_op_qemu_ld16s:
> > -        tcg_out_qemu_ld(s, args, 1 | 4);
> > +        tcg_out_qemu_ld(s, args, MO_TESW);
> >          break;
> >      case INDEX_op_qemu_ld32:
> >      case INDEX_op_qemu_ld32u:
> > -        tcg_out_qemu_ld(s, args, 2);
> > +        tcg_out_qemu_ld(s, args, MO_TEUL);
> >          break;
> >      case INDEX_op_qemu_ld32s:
> > -        tcg_out_qemu_ld(s, args, 2 | 4);
> > +        tcg_out_qemu_ld(s, args, MO_TESL);
> >          break;
> >      case INDEX_op_qemu_ld64:
> > -        tcg_out_qemu_ld(s, args, 3);
> > +        tcg_out_qemu_ld(s, args, MO_TEQ);
> >          break;
> >  
> >      case INDEX_op_qemu_st8:
> > -        tcg_out_qemu_st(s, args, 0);
> > +        tcg_out_qemu_st(s, args, MO_UB);
> >          break;
> >      case INDEX_op_qemu_st16:
> > -        tcg_out_qemu_st(s, args, 1);
> > +        tcg_out_qemu_st(s, args, MO_TEUW);
> >          break;
> >      case INDEX_op_qemu_st32:
> > -        tcg_out_qemu_st(s, args, 2);
> > +        tcg_out_qemu_st(s, args, MO_TEUL);
> >          break;
> >      case INDEX_op_qemu_st64:
> > -        tcg_out_qemu_st(s, args, 3);
> > +        tcg_out_qemu_st(s, args, MO_TEQ);
> >          break;
> >  
> >      default:
> 
> Acked-by: Aurelien Jarno <aurelien@aurel32.net>
> 

Actually I spoke too fast, you also need the following patch for big
endian targets:

diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index 9ed587a..2d8e00c 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -1770,7 +1770,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
     }
 
     tcg_out_bundle(s, miB,
-                   tcg_opc_m4 (TCG_REG_P6, opc_st_m4[opc],
+                   tcg_opc_m4 (TCG_REG_P6, opc_st_m4[s_bits],
                                data_reg, TCG_REG_R3),
                    tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R59, mem_index),
                    tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5,

Please squash it in this patch for the next version.

-- 
Aurelien Jarno                          GPG: 1024D/F1BCDB73
aurelien@aurel32.net                 http://www.aurel32.net

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* Re: [Qemu-devel] [PATCH 05/20] tcg-ia64: Move AREG0 to R32
  2013-10-31 20:21 ` [Qemu-devel] [PATCH 05/20] tcg-ia64: Move AREG0 to R32 Richard Henderson
@ 2013-11-06 22:33   ` Aurelien Jarno
  0 siblings, 0 replies; 39+ messages in thread
From: Aurelien Jarno @ 2013-11-06 22:33 UTC (permalink / raw
  To: Richard Henderson; +Cc: qemu-devel, aliguori

On Thu, Oct 31, 2013 at 01:21:54PM -0700, Richard Henderson wrote:
> Since the move away from the global areg0, we're no longer globally
> reserving areg0.  Which means our use of R7 clobbers a call-saved
> register.  Shift areg0 into the windowed registers.  Indeed, choose
> the incoming parameter register that it comes to us by.
> 
> This requires moving the register holding the return address elsewhere.
> Choose R33 for tidiness.
> 
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  tcg/ia64/tcg-target.c | 13 ++++++-------
>  tcg/ia64/tcg-target.h |  4 ++--
>  2 files changed, 8 insertions(+), 9 deletions(-)
> 
> diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
> index 2fdc38a5..336781f 100644
> --- a/tcg/ia64/tcg-target.c
> +++ b/tcg/ia64/tcg-target.c
> @@ -109,7 +109,6 @@ enum {
>  };
>  
>  static const int tcg_target_reg_alloc_order[] = {
> -    TCG_REG_R33,
>      TCG_REG_R35,
>      TCG_REG_R36,
>      TCG_REG_R37,
> @@ -2404,8 +2403,7 @@ static void tcg_target_qemu_prologue(TCGContext *s)
>      tcg_out_bundle(s, miI,
>                     tcg_opc_m34(TCG_REG_P0, OPC_ALLOC_M34,
>                                 TCG_REG_R34, 32, 24, 0),
> -                   tcg_opc_a4 (TCG_REG_P0, OPC_ADDS_A4,
> -                               TCG_AREG0, 0, TCG_REG_R32),
> +                   INSN_NOP_I,
>                     tcg_opc_i21(TCG_REG_P0, OPC_MOV_I21,
>                                 TCG_REG_B6, TCG_REG_R33, 0));
>  
> @@ -2424,7 +2422,7 @@ static void tcg_target_qemu_prologue(TCGContext *s)
>                     tcg_opc_a4 (TCG_REG_P0, OPC_ADDS_A4,
>                                 TCG_REG_R12, -frame_size, TCG_REG_R12),
>                     tcg_opc_i22(TCG_REG_P0, OPC_MOV_I22,
> -                               TCG_REG_R32, TCG_REG_B0),
> +                               TCG_REG_R33, TCG_REG_B0),
>                     tcg_opc_b4 (TCG_REG_P0, OPC_BR_SPTK_MANY_B4, TCG_REG_B6));
>  
>      /* epilogue */
> @@ -2432,7 +2430,7 @@ static void tcg_target_qemu_prologue(TCGContext *s)
>      tcg_out_bundle(s, miI,
>                     INSN_NOP_M,
>                     tcg_opc_i21(TCG_REG_P0, OPC_MOV_I21,
> -                               TCG_REG_B0, TCG_REG_R32, 0),
> +                               TCG_REG_B0, TCG_REG_R33, 0),
>                     tcg_opc_a4 (TCG_REG_P0, OPC_ADDS_A4,
>                                 TCG_REG_R12, frame_size, TCG_REG_R12));
>      tcg_out_bundle(s, miB,
> @@ -2489,16 +2487,17 @@ static void tcg_target_init(TCGContext *s)
>      tcg_regset_set_reg(s->reserved_regs, TCG_REG_R3);   /* internal use */
>      tcg_regset_set_reg(s->reserved_regs, TCG_REG_R12);  /* stack pointer */
>      tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13);  /* thread pointer */
> -    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R32);  /* return address */
> +    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R33);  /* return address */
>      tcg_regset_set_reg(s->reserved_regs, TCG_REG_R34);  /* PFS */
>  
> -    /* The following 3 are not in use, are call-saved, but *not* saved
> +    /* The following 4 are not in use, are call-saved, but *not* saved
>         by the prologue.  Therefore we cannot use them without modifying
>         the prologue.  There doesn't seem to be any good reason to use
>         these as opposed to the windowed registers.  */
>      tcg_regset_set_reg(s->reserved_regs, TCG_REG_R4);
>      tcg_regset_set_reg(s->reserved_regs, TCG_REG_R5);
>      tcg_regset_set_reg(s->reserved_regs, TCG_REG_R6);
> +    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R7);
>  
>      tcg_add_target_add_op_defs(ia64_op_defs);
>  }
> diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h
> index c90038a..52a939c 100644
> --- a/tcg/ia64/tcg-target.h
> +++ b/tcg/ia64/tcg-target.h
> @@ -92,6 +92,8 @@ typedef enum {
>      TCG_REG_R61,
>      TCG_REG_R62,
>      TCG_REG_R63,
> +
> +    TCG_AREG0 = TCG_REG_R32,
>  } TCGReg;
>  
>  #define TCG_CT_CONST_ZERO 0x100
> @@ -162,8 +164,6 @@ typedef enum {
>  #define TCG_TARGET_HAS_not_i32          0 /* xor r1, -1, r3 */
>  #define TCG_TARGET_HAS_not_i64          0 /* xor r1, -1, r3 */
>  
> -#define TCG_AREG0 TCG_REG_R7
> -
>  static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
>  {
>      start = start & ~(32UL - 1UL);

Acked-by: Aurelien Jarno <aurelien@aurel32.net>

-- 
Aurelien Jarno                          GPG: 1024D/F1BCDB73
aurelien@aurel32.net                 http://www.aurel32.net

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [Qemu-devel] [PATCH 06/20] tcg-ia64: Avoid unnecessary stop bit in tcg_out_alu
  2013-10-31 20:21 ` [Qemu-devel] [PATCH 06/20] tcg-ia64: Avoid unnecessary stop bit in tcg_out_alu Richard Henderson
@ 2013-11-06 22:33   ` Aurelien Jarno
  0 siblings, 0 replies; 39+ messages in thread
From: Aurelien Jarno @ 2013-11-06 22:33 UTC (permalink / raw
  To: Richard Henderson; +Cc: qemu-devel, aliguori

On Thu, Oct 31, 2013 at 01:21:55PM -0700, Richard Henderson wrote:
> When performing an operation with two input registers, we'd leave
> the stop bit (and thus an extra cycle) that's only needed when one
> or the other input is a constant.
> 
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  tcg/ia64/tcg-target.c | 17 ++++++-----------
>  1 file changed, 6 insertions(+), 11 deletions(-)
> 
> diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
> index 336781f..b7f74a9 100644
> --- a/tcg/ia64/tcg-target.c
> +++ b/tcg/ia64/tcg-target.c
> @@ -1044,31 +1044,26 @@ static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
>      }
>  }
>  
> -static inline void tcg_out_alu(TCGContext *s, uint64_t opc_a1, TCGArg ret,
> -                               TCGArg arg1, int const_arg1,
> -                               TCGArg arg2, int const_arg2)
> +static void tcg_out_alu(TCGContext *s, uint64_t opc_a1, TCGReg ret, TCGArg arg1,
> +                        int const_arg1, TCGArg arg2, int const_arg2)
>  {
> -    uint64_t opc1, opc2;
> +    uint64_t opc1 = 0, opc2 = 0;
>  
>      if (const_arg1 && arg1 != 0) {
>          opc1 = tcg_opc_a5(TCG_REG_P0, OPC_ADDL_A5,
>                            TCG_REG_R2, arg1, TCG_REG_R0);
>          arg1 = TCG_REG_R2;
> -    } else {
> -        opc1 = INSN_NOP_M;
>      }
>  
>      if (const_arg2 && arg2 != 0) {
>          opc2 = tcg_opc_a5(TCG_REG_P0, OPC_ADDL_A5,
>                            TCG_REG_R3, arg2, TCG_REG_R0);
>          arg2 = TCG_REG_R3;
> -    } else {
> -        opc2 = INSN_NOP_I;
>      }
>  
> -    tcg_out_bundle(s, mII,
> -                   opc1,
> -                   opc2,
> +    tcg_out_bundle(s, (opc1 || opc2 ? mII : miI),
> +                   opc1 ? opc1 : INSN_NOP_M,
> +                   opc2 ? opc2 : INSN_NOP_I,
>                     tcg_opc_a1(TCG_REG_P0, opc_a1, ret, arg1, arg2));
>  }
>  

Acked-by: Aurelien Jarno <aurelien@aurel32.net>

-- 
Aurelien Jarno                          GPG: 1024D/F1BCDB73
aurelien@aurel32.net                 http://www.aurel32.net

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [Qemu-devel] [PATCH 07/20] tcg-ia64: Use ADDS for small addition
  2013-10-31 20:21 ` [Qemu-devel] [PATCH 07/20] tcg-ia64: Use ADDS for small addition Richard Henderson
@ 2013-11-06 22:34   ` Aurelien Jarno
  0 siblings, 0 replies; 39+ messages in thread
From: Aurelien Jarno @ 2013-11-06 22:34 UTC (permalink / raw
  To: Richard Henderson; +Cc: qemu-devel, aliguori

On Thu, Oct 31, 2013 at 01:21:56PM -0700, Richard Henderson wrote:
> Avoids a wasted cycle loading up small constants.
> 
> Simplify the code assuming the tcg optimizer is going to work
> and don't expect the first operand of the add to be constant.
> 
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  tcg/ia64/tcg-target.c | 20 ++++++++++++++++----
>  1 file changed, 16 insertions(+), 4 deletions(-)
> 
> diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
> index b7f74a9..be74606 100644
> --- a/tcg/ia64/tcg-target.c
> +++ b/tcg/ia64/tcg-target.c
> @@ -1067,6 +1067,19 @@ static void tcg_out_alu(TCGContext *s, uint64_t opc_a1, TCGReg ret, TCGArg arg1,
>                     tcg_opc_a1(TCG_REG_P0, opc_a1, ret, arg1, arg2));
>  }
>  
> +static inline void tcg_out_add(TCGContext *s, TCGReg ret, TCGReg arg1,
> +                               TCGArg arg2, int const_arg2)
> +{
> +    if (const_arg2 && arg2 == sextract64(arg2, 0, 14)) {
> +        tcg_out_bundle(s, mmI,
> +                       INSN_NOP_M,
> +                       INSN_NOP_M,
> +                       tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, ret, arg2, arg1));
> +    } else {
> +        tcg_out_alu(s, OPC_ADD_A1, ret, arg1, 0, arg2, const_arg2);
> +    }
> +}
> +
>  static inline void tcg_out_eqv(TCGContext *s, TCGArg ret,
>                                 TCGArg arg1, int const_arg1,
>                                 TCGArg arg2, int const_arg2)
> @@ -2068,8 +2081,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
>  
>      case INDEX_op_add_i32:
>      case INDEX_op_add_i64:
> -        tcg_out_alu(s, OPC_ADD_A1, args[0], args[1], const_args[1],
> -                    args[2], const_args[2]);
> +        tcg_out_add(s, args[0], args[1], args[2], const_args[2]);
>          break;
>      case INDEX_op_sub_i32:
>      case INDEX_op_sub_i64:
> @@ -2275,7 +2287,7 @@ static const TCGTargetOpDef ia64_op_defs[] = {
>      { INDEX_op_st16_i32, { "rZ", "r" } },
>      { INDEX_op_st_i32, { "rZ", "r" } },
>  
> -    { INDEX_op_add_i32, { "r", "rI", "rI" } },
> +    { INDEX_op_add_i32, { "r", "rZ", "rI" } },
>      { INDEX_op_sub_i32, { "r", "rI", "rI" } },
>  
>      { INDEX_op_and_i32, { "r", "rI", "rI" } },
> @@ -2322,7 +2334,7 @@ static const TCGTargetOpDef ia64_op_defs[] = {
>      { INDEX_op_st32_i64, { "rZ", "r" } },
>      { INDEX_op_st_i64, { "rZ", "r" } },
>  
> -    { INDEX_op_add_i64, { "r", "rI", "rI" } },
> +    { INDEX_op_add_i64, { "r", "rZ", "rI" } },
>      { INDEX_op_sub_i64, { "r", "rI", "rI" } },
>  
>      { INDEX_op_and_i64, { "r", "rI", "rI" } },

Acked-by: Aurelien Jarno <aurelien@aurel32.net>

-- 
Aurelien Jarno                          GPG: 1024D/F1BCDB73
aurelien@aurel32.net                 http://www.aurel32.net

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [Qemu-devel] [PATCH 08/20] tcg-ia64: Use SUB_A3 and ADDS_A4 for subtraction
  2013-10-31 20:21 ` [Qemu-devel] [PATCH 08/20] tcg-ia64: Use SUB_A3 and ADDS_A4 for subtraction Richard Henderson
@ 2013-11-06 22:34   ` Aurelien Jarno
  0 siblings, 0 replies; 39+ messages in thread
From: Aurelien Jarno @ 2013-11-06 22:34 UTC (permalink / raw
  To: Richard Henderson; +Cc: qemu-devel, aliguori

On Thu, Oct 31, 2013 at 01:21:57PM -0700, Richard Henderson wrote:
> We can subtract from more small constants that just 0 with one insn,
> and we can add the negative for most small constants.
> 
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  tcg/ia64/tcg-target.c | 25 +++++++++++++++++++++++--
>  1 file changed, 23 insertions(+), 2 deletions(-)
> 
> diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
> index be74606..d7bccd2 100644
> --- a/tcg/ia64/tcg-target.c
> +++ b/tcg/ia64/tcg-target.c
> @@ -1080,6 +1080,28 @@ static inline void tcg_out_add(TCGContext *s, TCGReg ret, TCGReg arg1,
>      }
>  }
>  
> +static inline void tcg_out_sub(TCGContext *s, TCGReg ret, TCGArg arg1,
> +                               int const_arg1, TCGArg arg2, int const_arg2)
> +{
> +    if (const_arg1 && arg1 == (int8_t)arg1) {
> +        if (const_arg2) {
> +            tcg_out_movi(s, TCG_TYPE_I64, ret, arg1 - arg2);
> +            return;
> +        }
> +        tcg_out_bundle(s, mmI,
> +                       INSN_NOP_M,
> +                       INSN_NOP_M,
> +                       tcg_opc_a3(TCG_REG_P0, OPC_SUB_A3, ret, arg1, arg2));
> +    } else if (const_arg2 && -arg2 == sextract64(-arg2, 0, 14)) {
> +        tcg_out_bundle(s, mmI,
> +                       INSN_NOP_M,
> +                       INSN_NOP_M,
> +                       tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, ret, -arg2, arg1));
> +    } else {
> +        tcg_out_alu(s, OPC_SUB_A1, ret, arg1, const_arg1, arg2, const_arg2);
> +    }
> +}
> +
>  static inline void tcg_out_eqv(TCGContext *s, TCGArg ret,
>                                 TCGArg arg1, int const_arg1,
>                                 TCGArg arg2, int const_arg2)
> @@ -2085,8 +2107,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
>          break;
>      case INDEX_op_sub_i32:
>      case INDEX_op_sub_i64:
> -        tcg_out_alu(s, OPC_SUB_A1, args[0], args[1], const_args[1],
> -                    args[2], const_args[2]);
> +        tcg_out_sub(s, args[0], args[1], const_args[1], args[2], const_args[2]);
>          break;
>  
>      case INDEX_op_and_i32:

Acked-by: Aurelien Jarno <aurelien@aurel32.net>

-- 
Aurelien Jarno                          GPG: 1024D/F1BCDB73
aurelien@aurel32.net                 http://www.aurel32.net

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [Qemu-devel] [PATCH 09/20] tcg-ia64: Use A3 form of logical operations
  2013-10-31 20:21 ` [Qemu-devel] [PATCH 09/20] tcg-ia64: Use A3 form of logical operations Richard Henderson
@ 2013-11-06 22:34   ` Aurelien Jarno
  0 siblings, 0 replies; 39+ messages in thread
From: Aurelien Jarno @ 2013-11-06 22:34 UTC (permalink / raw
  To: Richard Henderson; +Cc: qemu-devel, aliguori

On Thu, Oct 31, 2013 at 01:21:58PM -0700, Richard Henderson wrote:
> We can and/or/xor/andcm small constants, saving one cycle.
> 
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  tcg/ia64/tcg-target.c | 64 +++++++++++++++++++++++++++------------------------
>  1 file changed, 34 insertions(+), 30 deletions(-)
> 
> diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
> index d7bccd2..e45b571 100644
> --- a/tcg/ia64/tcg-target.c
> +++ b/tcg/ia64/tcg-target.c
> @@ -263,6 +263,7 @@ enum {
>      OPC_MOV_I_I26             = 0x00150000000ull,
>      OPC_MOVL_X2               = 0x0c000000000ull,
>      OPC_OR_A1                 = 0x10070000000ull,
> +    OPC_OR_A3                 = 0x10170000000ull,
>      OPC_SETF_EXP_M18          = 0x0c748000000ull,
>      OPC_SETF_SIG_M18          = 0x0c708000000ull,
>      OPC_SHL_I7                = 0x0f240000000ull,
> @@ -281,6 +282,7 @@ enum {
>      OPC_UNPACK4_L_I2          = 0x0f860000000ull,
>      OPC_XMA_L_F2              = 0x1d000000000ull,
>      OPC_XOR_A1                = 0x10078000000ull,
> +    OPC_XOR_A3                = 0x10178000000ull,
>      OPC_ZXT1_I29              = 0x00080000000ull,
>      OPC_ZXT2_I29              = 0x00088000000ull,
>      OPC_ZXT4_I29              = 0x00090000000ull,
> @@ -1044,27 +1046,34 @@ static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
>      }
>  }
>  
> -static void tcg_out_alu(TCGContext *s, uint64_t opc_a1, TCGReg ret, TCGArg arg1,
> -                        int const_arg1, TCGArg arg2, int const_arg2)
> +static inline void tcg_out_alu(TCGContext *s, uint64_t opc_a1, uint64_t opc_a3,
> +                               TCGReg ret, TCGArg arg1, int const_arg1,
> +                               TCGArg arg2, int const_arg2)
>  {
> -    uint64_t opc1 = 0, opc2 = 0;
> -
> -    if (const_arg1 && arg1 != 0) {
> -        opc1 = tcg_opc_a5(TCG_REG_P0, OPC_ADDL_A5,
> -                          TCG_REG_R2, arg1, TCG_REG_R0);
> -        arg1 = TCG_REG_R2;
> -    }
> +    uint64_t opc1 = 0, opc2 = 0, opc3 = 0;
>  
>      if (const_arg2 && arg2 != 0) {
>          opc2 = tcg_opc_a5(TCG_REG_P0, OPC_ADDL_A5,
>                            TCG_REG_R3, arg2, TCG_REG_R0);
>          arg2 = TCG_REG_R3;
>      }
> +    if (const_arg1 && arg1 != 0) {
> +        if (opc_a3 && arg1 == (int8_t)arg1) {
> +            opc3 = tcg_opc_a3(TCG_REG_P0, opc_a3, ret, arg1, arg2);
> +        } else {
> +            opc1 = tcg_opc_a5(TCG_REG_P0, OPC_ADDL_A5,
> +                              TCG_REG_R2, arg1, TCG_REG_R0);
> +            arg1 = TCG_REG_R2;
> +        }
> +    }
> +    if (opc3 == 0) {
> +        opc3 = tcg_opc_a1(TCG_REG_P0, opc_a1, ret, arg1, arg2);
> +    }
>  
>      tcg_out_bundle(s, (opc1 || opc2 ? mII : miI),
>                     opc1 ? opc1 : INSN_NOP_M,
>                     opc2 ? opc2 : INSN_NOP_I,
> -                   tcg_opc_a1(TCG_REG_P0, opc_a1, ret, arg1, arg2));
> +                   opc3);
>  }
>  
>  static inline void tcg_out_add(TCGContext *s, TCGReg ret, TCGReg arg1,
> @@ -1076,29 +1085,21 @@ static inline void tcg_out_add(TCGContext *s, TCGReg ret, TCGReg arg1,
>                         INSN_NOP_M,
>                         tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, ret, arg2, arg1));
>      } else {
> -        tcg_out_alu(s, OPC_ADD_A1, ret, arg1, 0, arg2, const_arg2);
> +        tcg_out_alu(s, OPC_ADD_A1, 0, ret, arg1, 0, arg2, const_arg2);
>      }
>  }
>  
>  static inline void tcg_out_sub(TCGContext *s, TCGReg ret, TCGArg arg1,
>                                 int const_arg1, TCGArg arg2, int const_arg2)
>  {
> -    if (const_arg1 && arg1 == (int8_t)arg1) {
> -        if (const_arg2) {
> -            tcg_out_movi(s, TCG_TYPE_I64, ret, arg1 - arg2);
> -            return;
> -        }
> -        tcg_out_bundle(s, mmI,
> -                       INSN_NOP_M,
> -                       INSN_NOP_M,
> -                       tcg_opc_a3(TCG_REG_P0, OPC_SUB_A3, ret, arg1, arg2));
> -    } else if (const_arg2 && -arg2 == sextract64(-arg2, 0, 14)) {
> +    if (!const_arg1 && const_arg2 && -arg2 == sextract64(-arg2, 0, 14)) {
>          tcg_out_bundle(s, mmI,
>                         INSN_NOP_M,
>                         INSN_NOP_M,
>                         tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, ret, -arg2, arg1));
>      } else {
> -        tcg_out_alu(s, OPC_SUB_A1, ret, arg1, const_arg1, arg2, const_arg2);
> +        tcg_out_alu(s, OPC_SUB_A1, OPC_SUB_A3, ret,
> +                    arg1, const_arg1, arg2, const_arg2);
>      }
>  }
>  
> @@ -2112,13 +2113,14 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
>  
>      case INDEX_op_and_i32:
>      case INDEX_op_and_i64:
> -        tcg_out_alu(s, OPC_AND_A1, args[0], args[1], const_args[1],
> -                    args[2], const_args[2]);
> +        /* TCG expects arg2 constant; A3 expects arg1 constant.  Swap.  */
> +        tcg_out_alu(s, OPC_AND_A1, OPC_AND_A3, args[0],
> +                    args[2], const_args[2], args[1], const_args[1]);
>          break;
>      case INDEX_op_andc_i32:
>      case INDEX_op_andc_i64:
> -        tcg_out_alu(s, OPC_ANDCM_A1, args[0], args[1], const_args[1],
> -                    args[2], const_args[2]);
> +        tcg_out_alu(s, OPC_ANDCM_A1, OPC_ANDCM_A3, args[0],
> +                    args[1], const_args[1], args[2], const_args[2]);
>          break;
>      case INDEX_op_eqv_i32:
>      case INDEX_op_eqv_i64:
> @@ -2137,8 +2139,9 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
>          break;
>      case INDEX_op_or_i32:
>      case INDEX_op_or_i64:
> -        tcg_out_alu(s, OPC_OR_A1, args[0], args[1], const_args[1],
> -                    args[2], const_args[2]);
> +        /* TCG expects arg2 constant; A3 expects arg1 constant.  Swap.  */
> +        tcg_out_alu(s, OPC_OR_A1, OPC_OR_A3, args[0],
> +                    args[2], const_args[2], args[1], const_args[1]);
>          break;
>      case INDEX_op_orc_i32:
>      case INDEX_op_orc_i64:
> @@ -2147,8 +2150,9 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
>          break;
>      case INDEX_op_xor_i32:
>      case INDEX_op_xor_i64:
> -        tcg_out_alu(s, OPC_XOR_A1, args[0], args[1], const_args[1],
> -                    args[2], const_args[2]);
> +        /* TCG expects arg2 constant; A3 expects arg1 constant.  Swap.  */
> +        tcg_out_alu(s, OPC_XOR_A1, OPC_XOR_A3, args[0],
> +                    args[2], const_args[2], args[1], const_args[1]);
>          break;
>  
>      case INDEX_op_mul_i32:

Acked-by: Aurelien Jarno <aurelien@aurel32.net>

-- 
Aurelien Jarno                          GPG: 1024D/F1BCDB73
aurelien@aurel32.net                 http://www.aurel32.net

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [Qemu-devel] [PATCH 10/20] tcg-ia64: Introduce tcg_opc_mov_a
  2013-10-31 20:21 ` [Qemu-devel] [PATCH 10/20] tcg-ia64: Introduce tcg_opc_mov_a Richard Henderson
@ 2013-11-06 22:34   ` Aurelien Jarno
  0 siblings, 0 replies; 39+ messages in thread
From: Aurelien Jarno @ 2013-11-06 22:34 UTC (permalink / raw
  To: Richard Henderson; +Cc: qemu-devel, aliguori

On Thu, Oct 31, 2013 at 01:21:59PM -0700, Richard Henderson wrote:
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  tcg/ia64/tcg-target.c | 35 ++++++++++++++++-------------------
>  1 file changed, 16 insertions(+), 19 deletions(-)
> 
> diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
> index e45b571..c40e32f 100644
> --- a/tcg/ia64/tcg-target.c
> +++ b/tcg/ia64/tcg-target.c
> @@ -867,13 +867,18 @@ static inline void tcg_out_bundle(TCGContext *s, int template,
>      s->code_ptr += 16;
>  }
>  
> +static inline uint64_t tcg_opc_mov_a(int qp, TCGReg dst, TCGReg src)
> +{
> +    return tcg_opc_a4(qp, OPC_ADDS_A4, dst, 0, src);
> +}
> +
>  static inline void tcg_out_mov(TCGContext *s, TCGType type,
>                                 TCGReg ret, TCGReg arg)
>  {
>      tcg_out_bundle(s, mmI,
>                     INSN_NOP_M,
>                     INSN_NOP_M,
> -                   tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, ret, 0, arg));
> +                   tcg_opc_mov_a(TCG_REG_P0, ret, arg));
>  }
>  
>  static inline void tcg_out_movi(TCGContext *s, TCGType type,
> @@ -1514,14 +1519,14 @@ static inline void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGArg ret,
>      } else if (ret == v1) {
>          opc1 = INSN_NOP_M;
>      } else {
> -        opc1 = tcg_opc_a4(TCG_REG_P6, OPC_ADDS_A4, ret, 0, v1);
> +        opc1 = tcg_opc_mov_a(TCG_REG_P6, ret, v1);
>      }
>      if (const_v2) {
>          opc2 = tcg_opc_a5(TCG_REG_P7, OPC_ADDL_A5, ret, v2, TCG_REG_R0);
>      } else if (ret == v2) {
>          opc2 = INSN_NOP_I;
>      } else {
> -        opc2 = tcg_opc_a4(TCG_REG_P7, OPC_ADDS_A4, ret, 0, v2);
> +        opc2 = tcg_opc_mov_a(TCG_REG_P7, ret, v2);
>      }
>  
>      tcg_out_bundle(s, MmI,
> @@ -1551,8 +1556,7 @@ static inline void tcg_out_qemu_tlb(TCGContext *s, TCGArg addr_reg,
>  #if TARGET_LONG_BITS == 32
>                     tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, TCG_REG_R57, addr_reg),
>  #else
> -                   tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, TCG_REG_R57,
> -                              0, addr_reg),
> +                   tcg_opc_mov_a(TCG_REG_P0, TCG_REG_R57, addr_reg),
>  #endif
>                     tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2,
>                                 TCG_REG_R2, TCG_AREG0));
> @@ -1603,8 +1607,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
>  
>      /* P6 is the fast path, and P7 the slow path */
>      tcg_out_bundle(s, mLX,
> -                   tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4,
> -                               TCG_REG_R56, 0, TCG_AREG0),
> +                   tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R56, TCG_AREG0),
>                     tcg_opc_l2 ((tcg_target_long) qemu_ld_helpers[s_bits]),
>                     tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R2,
>                                 (tcg_target_long) qemu_ld_helpers[s_bits]));
> @@ -1657,8 +1660,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
>          tcg_out_bundle(s, miI,
>                         INSN_NOP_M,
>                         INSN_NOP_I,
> -                       tcg_opc_a4 (TCG_REG_P0, OPC_ADDS_A4,
> -                                   data_reg, 0, TCG_REG_R8));
> +                       tcg_opc_mov_a(TCG_REG_P0, data_reg, TCG_REG_R8));
>      } else {
>          tcg_out_bundle(s, miI,
>                         INSN_NOP_M,
> @@ -1697,8 +1699,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
>  
>      /* P6 is the fast path, and P7 the slow path */
>      tcg_out_bundle(s, mLX,
> -                   tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4,
> -                               TCG_REG_R56, 0, TCG_AREG0),
> +                   tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R56, TCG_AREG0),
>                     tcg_opc_l2 ((tcg_target_long) qemu_st_helpers[s_bits]),
>                     tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R2,
>                                 (tcg_target_long) qemu_st_helpers[s_bits]));
> @@ -1718,8 +1719,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
>          tcg_out_bundle(s, mii,
>                         tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1,
>                                     TCG_REG_R1, TCG_REG_R2),
> -                       tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, TCG_REG_R58,
> -                                   0, data_reg),
> +                       tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R58, data_reg),
>                         INSN_NOP_I);
>          break;
>  
> @@ -1731,8 +1731,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
>                         tcg_opc_i12(TCG_REG_P6, OPC_DEP_Z_I12,
>                                     TCG_REG_R2, data_reg, 15, 15));
>          tcg_out_bundle(s, miI,
> -                       tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, TCG_REG_R58,
> -                                   0, data_reg),
> +                       tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R58, data_reg),
>                         INSN_NOP_I,
>                         tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3,
>                                     TCG_REG_R2, TCG_REG_R2, 0xb));
> @@ -1747,8 +1746,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
>                         tcg_opc_i12(TCG_REG_P6, OPC_DEP_Z_I12,
>                                     TCG_REG_R2, data_reg, 31, 31));
>          tcg_out_bundle(s, miI,
> -                       tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, TCG_REG_R58,
> -                                   0, data_reg),
> +                       tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R58, data_reg),
>                         INSN_NOP_I,
>                         tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3,
>                                     TCG_REG_R2, TCG_REG_R2, 0xb));
> @@ -1759,8 +1757,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
>          tcg_out_bundle(s, miI,
>                         tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1,
>                                     TCG_REG_R1, TCG_REG_R2),
> -                       tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, TCG_REG_R58,
> -                                   0, data_reg),
> +                       tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R58, data_reg),
>                         tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3,
>                                     TCG_REG_R2, data_reg, 0xb));
>          data_reg = TCG_REG_R2;

Acked-by: Aurelien Jarno <aurelien@aurel32.net>

-- 
Aurelien Jarno                          GPG: 1024D/F1BCDB73
aurelien@aurel32.net                 http://www.aurel32.net

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [Qemu-devel] [PATCH 11/20] tcg-ia64: Introduce tcg_opc_movi_a
  2013-10-31 20:22 ` [Qemu-devel] [PATCH 11/20] tcg-ia64: Introduce tcg_opc_movi_a Richard Henderson
@ 2013-11-06 22:34   ` Aurelien Jarno
  0 siblings, 0 replies; 39+ messages in thread
From: Aurelien Jarno @ 2013-11-06 22:34 UTC (permalink / raw
  To: Richard Henderson; +Cc: qemu-devel, aliguori

On Thu, Oct 31, 2013 at 01:22:00PM -0700, Richard Henderson wrote:
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  tcg/ia64/tcg-target.c | 32 ++++++++++++++++----------------
>  1 file changed, 16 insertions(+), 16 deletions(-)
> 
> diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
> index c40e32f..c134034 100644
> --- a/tcg/ia64/tcg-target.c
> +++ b/tcg/ia64/tcg-target.c
> @@ -881,6 +881,12 @@ static inline void tcg_out_mov(TCGContext *s, TCGType type,
>                     tcg_opc_mov_a(TCG_REG_P0, ret, arg));
>  }
>  
> +static inline uint64_t tcg_opc_movi_a(int qp, TCGReg dst, int64_t src)
> +{
> +    assert(src == sextract64(src, 0, 22));
> +    return tcg_opc_a5(qp, OPC_ADDL_A5, dst, src, TCG_REG_R0);
> +}
> +
>  static inline void tcg_out_movi(TCGContext *s, TCGType type,
>                                  TCGReg reg, tcg_target_long arg)
>  {
> @@ -1058,16 +1064,14 @@ static inline void tcg_out_alu(TCGContext *s, uint64_t opc_a1, uint64_t opc_a3,
>      uint64_t opc1 = 0, opc2 = 0, opc3 = 0;
>  
>      if (const_arg2 && arg2 != 0) {
> -        opc2 = tcg_opc_a5(TCG_REG_P0, OPC_ADDL_A5,
> -                          TCG_REG_R3, arg2, TCG_REG_R0);
> +        opc2 = tcg_opc_movi_a(TCG_REG_P0, TCG_REG_R3, arg2);
>          arg2 = TCG_REG_R3;
>      }
>      if (const_arg1 && arg1 != 0) {
>          if (opc_a3 && arg1 == (int8_t)arg1) {
>              opc3 = tcg_opc_a3(TCG_REG_P0, opc_a3, ret, arg1, arg2);
>          } else {
> -            opc1 = tcg_opc_a5(TCG_REG_P0, OPC_ADDL_A5,
> -                              TCG_REG_R2, arg1, TCG_REG_R0);
> +            opc1 = tcg_opc_movi_a(TCG_REG_P0, TCG_REG_R2, arg1);
>              arg1 = TCG_REG_R2;
>          }
>      }
> @@ -1423,8 +1427,7 @@ static inline void tcg_out_deposit(TCGContext *s, TCGArg ret, TCGArg a1,
>          } else {
>              /* Otherwise, load any constant into a temporary.  Do this into
>                 the first I slot to help out with cross-unit delays.  */
> -            i1 = tcg_opc_a5(TCG_REG_P0, OPC_ADDL_A5,
> -                            TCG_REG_R2, a2, TCG_REG_R0);
> +            i1 = tcg_opc_movi_a(TCG_REG_P0, TCG_REG_R2, a2);
>              a2 = TCG_REG_R2;
>          }
>      }
> @@ -1503,8 +1506,8 @@ static inline void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGArg ret,
>  {
>      tcg_out_bundle(s, MmI,
>                     tcg_opc_cmp_a(TCG_REG_P0, cond, arg1, arg2, cmp4),
> -                   tcg_opc_a5(TCG_REG_P6, OPC_ADDL_A5, ret, 1, TCG_REG_R0),
> -                   tcg_opc_a5(TCG_REG_P7, OPC_ADDL_A5, ret, 0, TCG_REG_R0));
> +                   tcg_opc_movi_a(TCG_REG_P6, ret, 1),
> +                   tcg_opc_movi_a(TCG_REG_P7, ret, 0));
>  }
>  
>  static inline void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGArg ret,
> @@ -1515,14 +1518,14 @@ static inline void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGArg ret,
>      uint64_t opc1, opc2;
>  
>      if (const_v1) {
> -        opc1 = tcg_opc_a5(TCG_REG_P6, OPC_ADDL_A5, ret, v1, TCG_REG_R0);
> +        opc1 = tcg_opc_movi_a(TCG_REG_P6, ret, v1);
>      } else if (ret == v1) {
>          opc1 = INSN_NOP_M;
>      } else {
>          opc1 = tcg_opc_mov_a(TCG_REG_P6, ret, v1);
>      }
>      if (const_v2) {
> -        opc2 = tcg_opc_a5(TCG_REG_P7, OPC_ADDL_A5, ret, v2, TCG_REG_R0);
> +        opc2 = tcg_opc_movi_a(TCG_REG_P7, ret, v2);
>      } else if (ret == v2) {
>          opc2 = INSN_NOP_I;
>      } else {
> @@ -1641,15 +1644,13 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
>      }
>      if (!bswap) {
>          tcg_out_bundle(s, miB,
> -                       tcg_opc_a5 (TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R58,
> -                                   mem_index, TCG_REG_R0),
> +                       tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R58, mem_index),
>                         INSN_NOP_I,
>                         tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5,
>                                     TCG_REG_B0, TCG_REG_B6));
>      } else {
>          tcg_out_bundle(s, miB,
> -                       tcg_opc_a5 (TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R58,
> -                                   mem_index, TCG_REG_R0),
> +                       tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R58, mem_index),
>                         tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3,
>                                     TCG_REG_R8, TCG_REG_R8, 0xb),
>                         tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5,
> @@ -1770,8 +1771,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
>      tcg_out_bundle(s, miB,
>                     tcg_opc_m4 (TCG_REG_P6, opc_st_m4[opc],
>                                 data_reg, TCG_REG_R3),
> -                   tcg_opc_a5 (TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R59,
> -                               mem_index, TCG_REG_R0),
> +                   tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R59, mem_index),
>                     tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5,
>                                 TCG_REG_B0, TCG_REG_B6));
>  }

Acked-by: Aurelien Jarno <aurelien@aurel32.net>

-- 
Aurelien Jarno                          GPG: 1024D/F1BCDB73
aurelien@aurel32.net                 http://www.aurel32.net

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [Qemu-devel] [PATCH 12/20] tcg-ia64: Introduce tcg_opc_ext_i
  2013-10-31 20:22 ` [Qemu-devel] [PATCH 12/20] tcg-ia64: Introduce tcg_opc_ext_i Richard Henderson
@ 2013-11-06 22:34   ` Aurelien Jarno
  0 siblings, 0 replies; 39+ messages in thread
From: Aurelien Jarno @ 2013-11-06 22:34 UTC (permalink / raw
  To: Richard Henderson; +Cc: qemu-devel, aliguori

On Thu, Oct 31, 2013 at 01:22:01PM -0700, Richard Henderson wrote:
> Being able to "extend" from 64-bits (with a mov) simplifies
> a few places where the conditional breaks the train of thought.
> 
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  tcg/ia64/tcg-target.c | 54 +++++++++++++++++++++++----------------------------
>  1 file changed, 24 insertions(+), 30 deletions(-)
> 
> diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
> index c134034..ca38608 100644
> --- a/tcg/ia64/tcg-target.c
> +++ b/tcg/ia64/tcg-target.c
> @@ -1377,6 +1377,20 @@ static inline void tcg_out_rotr_i64(TCGContext *s, TCGArg ret, TCGArg arg1,
>      }
>  }
>  
> +static const uint64_t opc_ext_i29[8] = {
> +    OPC_ZXT1_I29, OPC_ZXT2_I29, OPC_ZXT4_I29, 0,
> +    OPC_SXT1_I29, OPC_SXT2_I29, OPC_SXT4_I29, 0
> +};
> +
> +static inline uint64_t tcg_opc_ext_i(int qp, TCGMemOp opc, TCGReg d, TCGReg s)
> +{
> +    if ((opc & MO_SIZE) == MO_64) {
> +        return tcg_opc_mov_a(qp, d, s);
> +    } else {
> +        return tcg_opc_i29(qp, opc_ext_i29[opc & MO_SSIZE], d, s);
> +    }
> +}
> +
>  static inline void tcg_out_ext(TCGContext *s, uint64_t opc_i29,
>                                 TCGArg ret, TCGArg arg)
>  {
> @@ -1556,11 +1570,9 @@ static inline void tcg_out_qemu_tlb(TCGContext *s, TCGArg addr_reg,
>      tcg_out_bundle(s, mII,
>                     tcg_opc_a5 (TCG_REG_P0, OPC_ADDL_A5, TCG_REG_R2,
>                                 offset_rw, TCG_REG_R2),
> -#if TARGET_LONG_BITS == 32
> -                   tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, TCG_REG_R57, addr_reg),
> -#else
> -                   tcg_opc_mov_a(TCG_REG_P0, TCG_REG_R57, addr_reg),
> -#endif
> +                   tcg_opc_ext_i(TCG_REG_P0,
> +                                 TARGET_LONG_BITS == 32 ? MO_UL : MO_Q,
> +                                 TCG_REG_R57, addr_reg),
>                     tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2,
>                                 TCG_REG_R2, TCG_AREG0));
>      tcg_out_bundle(s, mII,
> @@ -1590,10 +1602,6 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
>      static const uint64_t opc_ld_m1[4] = {
>          OPC_LD1_M1, OPC_LD2_M1, OPC_LD4_M1, OPC_LD8_M1
>      };
> -    static const uint64_t opc_ext_i29[8] = {
> -        OPC_ZXT1_I29, OPC_ZXT2_I29, OPC_ZXT4_I29, 0,
> -        OPC_SXT1_I29, OPC_SXT2_I29, OPC_SXT4_I29, 0
> -    };
>      int addr_reg, data_reg, mem_index;
>      TCGMemOp s_bits, bswap;
>  
> @@ -1657,18 +1665,10 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
>                                     TCG_REG_B0, TCG_REG_B6));
>      }
>  
> -    if (s_bits == MO_64) {
> -        tcg_out_bundle(s, miI,
> -                       INSN_NOP_M,
> -                       INSN_NOP_I,
> -                       tcg_opc_mov_a(TCG_REG_P0, data_reg, TCG_REG_R8));
> -    } else {
> -        tcg_out_bundle(s, miI,
> -                       INSN_NOP_M,
> -                       INSN_NOP_I,
> -                       tcg_opc_i29(TCG_REG_P0, opc_ext_i29[opc & MO_SSIZE],
> -                                   data_reg, TCG_REG_R8));
> -    }
> +    tcg_out_bundle(s, miI,
> +                   INSN_NOP_M,
> +                   INSN_NOP_I,
> +                   tcg_opc_ext_i(TCG_REG_P0, opc, data_reg, TCG_REG_R8));
>  }
>  
>  /* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
> @@ -1784,9 +1784,6 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
>      static uint64_t const opc_ld_m1[4] = {
>          OPC_LD1_M1, OPC_LD2_M1, OPC_LD4_M1, OPC_LD8_M1
>      };
> -    static uint64_t const opc_sxt_i29[4] = {
> -        OPC_SXT1_I29, OPC_SXT2_I29, OPC_SXT4_I29, 0
> -    };
>      int addr_reg, data_reg;
>      TCGMemOp s_bits, bswap;
>  
> @@ -1823,8 +1820,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
>                             tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits],
>                                         data_reg, TCG_REG_R2),
>                             INSN_NOP_I,
> -                           tcg_opc_i29(TCG_REG_P0, opc_sxt_i29[s_bits],
> -                                       data_reg, data_reg));
> +                           tcg_opc_ext_i(TCG_REG_P0, opc, data_reg, data_reg));
>          }
>      } else if (s_bits == MO_64) {
>              tcg_out_bundle(s, mII,
> @@ -1860,8 +1856,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
>                             INSN_NOP_M,
>                             tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
>                                         data_reg, data_reg, 0xb),
> -                           tcg_opc_i29(TCG_REG_P0, opc_sxt_i29[s_bits],
> -                                       data_reg, data_reg));
> +                           tcg_opc_ext_i(TCG_REG_P0, opc, data_reg, data_reg));
>          }
>      }
>  #else
> @@ -1905,8 +1900,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
>          tcg_out_bundle(s, miI,
>                         INSN_NOP_M,
>                         INSN_NOP_I,
> -                       tcg_opc_i29(TCG_REG_P0, opc_sxt_i29[s_bits],
> -                                   data_reg, data_reg));
> +                       tcg_opc_ext_i(TCG_REG_P0, opc, data_reg, data_reg));
>      }
>  #endif
>  }

Acked-by: Aurelien Jarno <aurelien@aurel32.net>

-- 
Aurelien Jarno                          GPG: 1024D/F1BCDB73
aurelien@aurel32.net                 http://www.aurel32.net

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [Qemu-devel] [PATCH 13/20] tcg-ia64: Introduce tcg_opc_bswap64_i
  2013-10-31 20:22 ` [Qemu-devel] [PATCH 13/20] tcg-ia64: Introduce tcg_opc_bswap64_i Richard Henderson
@ 2013-11-06 22:34   ` Aurelien Jarno
  0 siblings, 0 replies; 39+ messages in thread
From: Aurelien Jarno @ 2013-11-06 22:34 UTC (permalink / raw
  To: Richard Henderson; +Cc: qemu-devel, aliguori

On Thu, Oct 31, 2013 at 01:22:02PM -0700, Richard Henderson wrote:
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  tcg/ia64/tcg-target.c | 63 +++++++++++++++++++++++----------------------------
>  1 file changed, 28 insertions(+), 35 deletions(-)
> 
> diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
> index ca38608..9ed587a 100644
> --- a/tcg/ia64/tcg-target.c
> +++ b/tcg/ia64/tcg-target.c
> @@ -1400,12 +1400,17 @@ static inline void tcg_out_ext(TCGContext *s, uint64_t opc_i29,
>                     tcg_opc_i29(TCG_REG_P0, opc_i29, ret, arg));
>  }
>  
> +static inline uint64_t tcg_opc_bswap64_i(int qp, TCGReg d, TCGReg s)
> +{
> +    return tcg_opc_i3(qp, OPC_MUX1_I3, d, s, 0xb);
> +}
> +
>  static inline void tcg_out_bswap16(TCGContext *s, TCGArg ret, TCGArg arg)
>  {
>      tcg_out_bundle(s, mII,
>                     INSN_NOP_M,
>                     tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, ret, arg, 15, 15),
> -                   tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3, ret, ret, 0xb));
> +                   tcg_opc_bswap64_i(TCG_REG_P0, ret, ret));
>  }
>  
>  static inline void tcg_out_bswap32(TCGContext *s, TCGArg ret, TCGArg arg)
> @@ -1413,7 +1418,7 @@ static inline void tcg_out_bswap32(TCGContext *s, TCGArg ret, TCGArg arg)
>      tcg_out_bundle(s, mII,
>                     INSN_NOP_M,
>                     tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, ret, arg, 31, 31),
> -                   tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3, ret, ret, 0xb));
> +                   tcg_opc_bswap64_i(TCG_REG_P0, ret, ret));
>  }
>  
>  static inline void tcg_out_bswap64(TCGContext *s, TCGArg ret, TCGArg arg)
> @@ -1421,7 +1426,7 @@ static inline void tcg_out_bswap64(TCGContext *s, TCGArg ret, TCGArg arg)
>      tcg_out_bundle(s, miI,
>                     INSN_NOP_M,
>                     INSN_NOP_I,
> -                   tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3, ret, arg, 0xb));
> +                   tcg_opc_bswap64_i(TCG_REG_P0, ret, arg));
>  }
>  
>  static inline void tcg_out_deposit(TCGContext *s, TCGArg ret, TCGArg a1,
> @@ -1659,8 +1664,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
>      } else {
>          tcg_out_bundle(s, miB,
>                         tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R58, mem_index),
> -                       tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3,
> -                                   TCG_REG_R8, TCG_REG_R8, 0xb),
> +                       tcg_opc_bswap64_i(TCG_REG_P6, TCG_REG_R8, TCG_REG_R8),
>                         tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5,
>                                     TCG_REG_B0, TCG_REG_B6));
>      }
> @@ -1734,8 +1738,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
>          tcg_out_bundle(s, miI,
>                         tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R58, data_reg),
>                         INSN_NOP_I,
> -                       tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3,
> -                                   TCG_REG_R2, TCG_REG_R2, 0xb));
> +                       tcg_opc_bswap64_i(TCG_REG_P6, TCG_REG_R2, TCG_REG_R2));
>          data_reg = TCG_REG_R2;
>          break;
>  
> @@ -1749,8 +1752,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
>          tcg_out_bundle(s, miI,
>                         tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R58, data_reg),
>                         INSN_NOP_I,
> -                       tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3,
> -                                   TCG_REG_R2, TCG_REG_R2, 0xb));
> +                       tcg_opc_bswap64_i(TCG_REG_P6, TCG_REG_R2, TCG_REG_R2));
>          data_reg = TCG_REG_R2;
>          break;
>  
> @@ -1759,8 +1761,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
>                         tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1,
>                                     TCG_REG_R1, TCG_REG_R2),
>                         tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R58, data_reg),
> -                       tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3,
> -                                   TCG_REG_R2, data_reg, 0xb));
> +                       tcg_opc_bswap64_i(TCG_REG_P6, TCG_REG_R2, data_reg));
>          data_reg = TCG_REG_R2;
>          break;
>  
> @@ -1827,8 +1828,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
>                             tcg_opc_m1 (TCG_REG_P0, opc_ld_m1[s_bits],
>                                         data_reg, TCG_REG_R2),
>                             INSN_NOP_I,
> -                           tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
> -                                       data_reg, data_reg, 0xb));
> +                           tcg_opc_bswap64_i(TCG_REG_P0, data_reg, data_reg));
>      } else {
>          if (s_bits == MO_16) {
>              tcg_out_bundle(s, mII,
> @@ -1849,13 +1849,11 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
>              tcg_out_bundle(s, miI,
>                             INSN_NOP_M,
>                             INSN_NOP_I,
> -                           tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
> -                                       data_reg, data_reg, 0xb));
> +                           tcg_opc_bswap64_i(TCG_REG_P0, data_reg, data_reg));
>          } else {
>              tcg_out_bundle(s, mII,
>                             INSN_NOP_M,
> -                           tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
> -                                       data_reg, data_reg, 0xb),
> +                           tcg_opc_bswap64_i(TCG_REG_P0, data_reg, data_reg),
>                             tcg_opc_ext_i(TCG_REG_P0, opc, data_reg, data_reg));
>          }
>      }
> @@ -1880,21 +1878,18 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
>                         INSN_NOP_M,
>                         tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
>                                     data_reg, data_reg, 15, 15),
> -                       tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
> -                                   data_reg, data_reg, 0xb));
> +                       tcg_opc_bswap64_i(TCG_REG_P0, data_reg, data_reg));
>      } else if (bswap && s_bits == MO_32) {
>          tcg_out_bundle(s, mII,
>                         INSN_NOP_M,
>                         tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
>                                     data_reg, data_reg, 31, 31),
> -                       tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
> -                                   data_reg, data_reg, 0xb));
> +                       tcg_opc_bswap64_i(TCG_REG_P0, data_reg, data_reg));
>      } else if (bswap && s_bits == MO_64) {
>          tcg_out_bundle(s, miI,
>                         INSN_NOP_M,
>                         INSN_NOP_I,
> -                       tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
> -                                   data_reg, data_reg, 0xb));
> +                       tcg_opc_bswap64_i(TCG_REG_P0, data_reg, data_reg));
>      }
>      if (opc & MO_SIGN) {
>          tcg_out_bundle(s, miI,
> @@ -1944,23 +1939,22 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
>                             INSN_NOP_M,
>                             tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
>                                         TCG_REG_R3, data_reg, 15, 15),
> -                           tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
> -                                       TCG_REG_R3, TCG_REG_R3, 0xb));
> +                           tcg_opc_bswap64_i(TCG_REG_P0,
> +                                             TCG_REG_R3, TCG_REG_R3));
>              data_reg = TCG_REG_R3;
>          } else if (s_bits == MO_32) {
>              tcg_out_bundle(s, mII,
>                             INSN_NOP_M,
>                             tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
>                                         TCG_REG_R3, data_reg, 31, 31),
> -                           tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
> -                                       TCG_REG_R3, TCG_REG_R3, 0xb));
> +                           tcg_opc_bswap64_i(TCG_REG_P0,
> +                                             TCG_REG_R3, TCG_REG_R3));
>              data_reg = TCG_REG_R3;
>          } else if (s_bits == MO_64) {
>              tcg_out_bundle(s, miI,
>                             INSN_NOP_M,
>                             INSN_NOP_I,
> -                           tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
> -                                       TCG_REG_R3, data_reg, 0xb));
> +                           tcg_opc_bswap64_i(TCG_REG_P0, TCG_REG_R3, data_reg));
>              data_reg = TCG_REG_R3;
>          }
>      }
> @@ -1990,23 +1984,22 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
>                             add_guest_base,
>                             tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
>                                         TCG_REG_R3, data_reg, 15, 15),
> -                           tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
> -                                       TCG_REG_R3, TCG_REG_R3, 0xb));
> +                           tcg_opc_bswap64_i(TCG_REG_P0,
> +                                             TCG_REG_R3, TCG_REG_R3));
>              data_reg = TCG_REG_R3;
>          } else if (s_bits == MO_32) {
>              tcg_out_bundle(s, mII,
>                             add_guest_base,
>                             tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12,
>                                         TCG_REG_R3, data_reg, 31, 31),
> -                           tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
> -                                       TCG_REG_R3, TCG_REG_R3, 0xb));
> +                           tcg_opc_bswap64_i(TCG_REG_P0,
> +                                             TCG_REG_R3, TCG_REG_R3));
>              data_reg = TCG_REG_R3;
>          } else if (s_bits == MO_64) {
>              tcg_out_bundle(s, miI,
>                             add_guest_base,
>                             INSN_NOP_I,
> -                           tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3,
> -                                       TCG_REG_R3, data_reg, 0xb));
> +                           tcg_opc_bswap64_i(TCG_REG_P0, TCG_REG_R3, data_reg));
>              data_reg = TCG_REG_R3;
>          }
>          tcg_out_bundle(s, miI,

Acked-by: Aurelien Jarno <aurelien@aurel32.net>

-- 
Aurelien Jarno                          GPG: 1024D/F1BCDB73
aurelien@aurel32.net                 http://www.aurel32.net

^ permalink raw reply	[flat|nested] 39+ messages in thread

end of thread, other threads:[~2013-11-06 22:34 UTC | newest]

Thread overview: 39+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-10-31 20:21 [Qemu-devel] [PATCH 00/20] tcg improvements for ia64 Richard Henderson
2013-10-31 20:21 ` [Qemu-devel] [PATCH 01/20] tcg-ia64: Use TCGMemOp within qemu_ldst routines Richard Henderson
2013-11-03 22:55   ` Aurelien Jarno
2013-11-06 22:05     ` Aurelien Jarno
2013-10-31 20:21 ` [Qemu-devel] [PATCH 02/20] tcg-ia64: Use shortcuts for nop insns Richard Henderson
2013-11-03 22:55   ` Aurelien Jarno
2013-10-31 20:21 ` [Qemu-devel] [PATCH 03/20] tcg-ia64: Handle constant calls Richard Henderson
2013-11-03 22:56   ` Aurelien Jarno
2013-10-31 20:21 ` [Qemu-devel] [PATCH 04/20] tcg-ia64: Simplify brcond Richard Henderson
2013-11-03 22:56   ` Aurelien Jarno
2013-11-03 23:34     ` Richard Henderson
2013-11-04  4:24       ` Richard Henderson
2013-11-06 22:05         ` Aurelien Jarno
2013-11-06 22:04       ` Aurelien Jarno
2013-10-31 20:21 ` [Qemu-devel] [PATCH 05/20] tcg-ia64: Move AREG0 to R32 Richard Henderson
2013-11-06 22:33   ` Aurelien Jarno
2013-10-31 20:21 ` [Qemu-devel] [PATCH 06/20] tcg-ia64: Avoid unnecessary stop bit in tcg_out_alu Richard Henderson
2013-11-06 22:33   ` Aurelien Jarno
2013-10-31 20:21 ` [Qemu-devel] [PATCH 07/20] tcg-ia64: Use ADDS for small addition Richard Henderson
2013-11-06 22:34   ` Aurelien Jarno
2013-10-31 20:21 ` [Qemu-devel] [PATCH 08/20] tcg-ia64: Use SUB_A3 and ADDS_A4 for subtraction Richard Henderson
2013-11-06 22:34   ` Aurelien Jarno
2013-10-31 20:21 ` [Qemu-devel] [PATCH 09/20] tcg-ia64: Use A3 form of logical operations Richard Henderson
2013-11-06 22:34   ` Aurelien Jarno
2013-10-31 20:21 ` [Qemu-devel] [PATCH 10/20] tcg-ia64: Introduce tcg_opc_mov_a Richard Henderson
2013-11-06 22:34   ` Aurelien Jarno
2013-10-31 20:22 ` [Qemu-devel] [PATCH 11/20] tcg-ia64: Introduce tcg_opc_movi_a Richard Henderson
2013-11-06 22:34   ` Aurelien Jarno
2013-10-31 20:22 ` [Qemu-devel] [PATCH 12/20] tcg-ia64: Introduce tcg_opc_ext_i Richard Henderson
2013-11-06 22:34   ` Aurelien Jarno
2013-10-31 20:22 ` [Qemu-devel] [PATCH 13/20] tcg-ia64: Introduce tcg_opc_bswap64_i Richard Henderson
2013-11-06 22:34   ` Aurelien Jarno
2013-10-31 20:22 ` [Qemu-devel] [PATCH 14/20] tcg-ia64: Re-bundle the tlb load Richard Henderson
2013-10-31 20:22 ` [Qemu-devel] [PATCH 15/20] tcg-ia64: Move bswap for store into " Richard Henderson
2013-10-31 20:22 ` [Qemu-devel] [PATCH 16/20] tcg-ia64: Move tlb addend load into tlb read Richard Henderson
2013-10-31 20:22 ` [Qemu-devel] [PATCH 17/20] tcg-ia64: Reduce code duplication in tcg_out_qemu_ld Richard Henderson
2013-10-31 20:22 ` [Qemu-devel] [PATCH 18/20] tcg-ia64: Convert to new ldst helpers Richard Henderson
2013-10-31 20:22 ` [Qemu-devel] [PATCH 19/20] tcg-ia64: Move part of softmmu slow path out of line Richard Henderson
2013-10-31 20:22 ` [Qemu-devel] [PATCH 20/20] tcg-ia64: Optimize small arguments to exit_tb Richard Henderson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.