All the mail mirrored from lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 01/11] nvc0/ir: add emission of dadd/dmul/dmad opcodes, fix minmax
@ 2015-02-20  1:02 Ilia Mirkin
       [not found] ` <1424394160-22417-1-git-send-email-imirkin-FrUbXkNCsVf2fBVCVOL8/A@public.gmane.org>
  2015-02-20  1:02 ` [PATCH 07/11] nvc0/ir: no instruction can load a double immediate Ilia Mirkin
  0 siblings, 2 replies; 11+ messages in thread
From: Ilia Mirkin @ 2015-02-20  1:02 UTC (permalink / raw
  To: mesa-dev, nouveau

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 .../drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp  | 66 +++++++++++++++++++++-
 1 file changed, 63 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
index dfb093c..e38a3b8 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
@@ -92,11 +92,14 @@ private:
 
    void emitUADD(const Instruction *);
    void emitFADD(const Instruction *);
+   void emitDADD(const Instruction *);
    void emitUMUL(const Instruction *);
    void emitFMUL(const Instruction *);
+   void emitDMUL(const Instruction *);
    void emitIMAD(const Instruction *);
    void emitISAD(const Instruction *);
    void emitFMAD(const Instruction *);
+   void emitDMAD(const Instruction *);
    void emitMADSP(const Instruction *);
 
    void emitNOT(Instruction *);
@@ -523,6 +526,25 @@ CodeEmitterNVC0::emitFMAD(const Instruction *i)
 }
 
 void
+CodeEmitterNVC0::emitDMAD(const Instruction *i)
+{
+   bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg();
+
+   emitForm_A(i, HEX64(20000000, 00000001));
+
+   if (i->src(2).mod.neg())
+      code[0] |= 1 << 8;
+
+   roundMode_A(i);
+
+   if (neg1)
+      code[0] |= 1 << 9;
+
+   assert(!i->saturate);
+   assert(!i->ftz);
+}
+
+void
 CodeEmitterNVC0::emitFMUL(const Instruction *i)
 {
    bool neg = (i->src(0).mod ^ i->src(1).mod).neg();
@@ -557,6 +579,23 @@ CodeEmitterNVC0::emitFMUL(const Instruction *i)
 }
 
 void
+CodeEmitterNVC0::emitDMUL(const Instruction *i)
+{
+   bool neg = (i->src(0).mod ^ i->src(1).mod).neg();
+
+   emitForm_A(i, HEX64(50000000, 00000001));
+   roundMode_A(i);
+
+   if (neg)
+      code[0] |= 1 << 9;
+
+   assert(!i->saturate);
+   assert(!i->ftz);
+   assert(!i->dnz);
+   assert(!i->postFactor);
+}
+
+void
 CodeEmitterNVC0::emitUMUL(const Instruction *i)
 {
    if (i->encSize == 8) {
@@ -619,6 +658,19 @@ CodeEmitterNVC0::emitFADD(const Instruction *i)
 }
 
 void
+CodeEmitterNVC0::emitDADD(const Instruction *i)
+{
+   assert(i->encSize == 8);
+   emitForm_A(i, HEX64(48000000, 00000001));
+   roundMode_A(i);
+   assert(!i->saturate);
+   assert(!i->ftz);
+   emitNegAbs12(i);
+   if (i->op == OP_SUB)
+      code[0] ^= 1 << 8;
+}
+
+void
 CodeEmitterNVC0::emitUADD(const Instruction *i)
 {
    uint32_t addOp = 0;
@@ -895,6 +947,8 @@ CodeEmitterNVC0::emitMINMAX(const Instruction *i)
    else
    if (!isFloatType(i->dType))
       op |= isSignedType(i->dType) ? 0x23 : 0x03;
+   if (i->dType == TYPE_F64)
+      op |= 0x01;
 
    emitForm_A(i, op);
    emitNegAbs12(i);
@@ -2242,20 +2296,26 @@ CodeEmitterNVC0::emitInstruction(Instruction *insn)
       break;
    case OP_ADD:
    case OP_SUB:
-      if (isFloatType(insn->dType))
+      if (insn->dType == TYPE_F64)
+         emitDADD(insn);
+      else if (isFloatType(insn->dType))
          emitFADD(insn);
       else
          emitUADD(insn);
       break;
    case OP_MUL:
-      if (isFloatType(insn->dType))
+      if (insn->dType == TYPE_F64)
+         emitDMUL(insn);
+      else if (isFloatType(insn->dType))
          emitFMUL(insn);
       else
          emitUMUL(insn);
       break;
    case OP_MAD:
    case OP_FMA:
-      if (isFloatType(insn->dType))
+      if (insn->dType == TYPE_F64)
+         emitDMAD(insn);
+      else if (isFloatType(insn->dType))
          emitFMAD(insn);
       else
          emitIMAD(insn);
-- 
2.0.5

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 02/11] gk110/ir: add emission of dadd/dmul/dmad opcodes
       [not found] ` <1424394160-22417-1-git-send-email-imirkin-FrUbXkNCsVf2fBVCVOL8/A@public.gmane.org>
@ 2015-02-20  1:02   ` Ilia Mirkin
  2015-02-20  1:02   ` [PATCH 03/11] gm107/ir: fix DMUL opcode encoding Ilia Mirkin
                     ` (7 subsequent siblings)
  8 siblings, 0 replies; 11+ messages in thread
From: Ilia Mirkin @ 2015-02-20  1:02 UTC (permalink / raw
  To: mesa-dev-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 .../drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp | 80 +++++++++++++++++++++-
 1 file changed, 77 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
index d8adc93..204d911 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
@@ -84,11 +84,14 @@ private:
 
    void emitUADD(const Instruction *);
    void emitFADD(const Instruction *);
+   void emitDADD(const Instruction *);
    void emitIMUL(const Instruction *);
    void emitFMUL(const Instruction *);
+   void emitDMUL(const Instruction *);
    void emitIMAD(const Instruction *);
    void emitISAD(const Instruction *);
    void emitFMAD(const Instruction *);
+   void emitDMAD(const Instruction *);
 
    void emitNOT(const Instruction *);
    void emitLogicOp(const Instruction *, uint8_t subOp);
@@ -479,6 +482,28 @@ CodeEmitterGK110::emitFMAD(const Instruction *i)
 }
 
 void
+CodeEmitterGK110::emitDMAD(const Instruction *i)
+{
+   assert(!i->saturate);
+   assert(!i->ftz);
+
+   emitForm_21(i, 0x1b8, 0xb38);
+
+   NEG_(34, 2);
+   RND_(36, F);
+
+   bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg();
+
+   if (code[0] & 0x1) {
+      if (neg1)
+         code[1] ^= 1 << 27;
+   } else
+   if (neg1) {
+      code[1] |= 1 << 19;
+   }
+}
+
+void
 CodeEmitterGK110::emitFMUL(const Instruction *i)
 {
    bool neg = (i->src(0).mod ^ i->src(1).mod).neg();
@@ -516,6 +541,29 @@ CodeEmitterGK110::emitFMUL(const Instruction *i)
 }
 
 void
+CodeEmitterGK110::emitDMUL(const Instruction *i)
+{
+   bool neg = (i->src(0).mod ^ i->src(1).mod).neg();
+
+   assert(!i->postFactor);
+   assert(!i->saturate);
+   assert(!i->ftz);
+   assert(!i->dnz);
+
+   emitForm_21(i, 0x240, 0xc40);
+
+   RND_(2a, F);
+
+   if (code[0] & 0x1) {
+      if (neg)
+         code[1] ^= 1 << 27;
+   } else
+   if (neg) {
+      code[1] |= 1 << 19;
+   }
+}
+
+void
 CodeEmitterGK110::emitIMUL(const Instruction *i)
 {
    assert(!i->src(0).mod.neg() && !i->src(1).mod.neg());
@@ -574,6 +622,26 @@ CodeEmitterGK110::emitFADD(const Instruction *i)
 }
 
 void
+CodeEmitterGK110::emitDADD(const Instruction *i)
+{
+   assert(!i->saturate);
+   assert(!i->ftz);
+
+   emitForm_21(i, 0x238, 0xc38);
+   RND_(2a, F);
+   ABS_(31, 0);
+   NEG_(33, 0);
+   if (code[0] & 0x1) {
+      modNegAbsF32_3b(i, 1);
+      if (i->op == OP_SUB) code[1] ^= 1 << 27;
+   } else {
+      NEG_(30, 1);
+      ABS_(34, 1);
+      if (i->op == OP_SUB) code[1] ^= 1 << 16;
+   }
+}
+
+void
 CodeEmitterGK110::emitUADD(const Instruction *i)
 {
    uint8_t addOp = (i->src(0).mod.neg() << 1) | i->src(1).mod.neg();
@@ -1634,20 +1702,26 @@ CodeEmitterGK110::emitInstruction(Instruction *insn)
       break;
    case OP_ADD:
    case OP_SUB:
-      if (isFloatType(insn->dType))
+      if (insn->dType == TYPE_F64)
+         emitDADD(insn);
+      else if (isFloatType(insn->dType))
          emitFADD(insn);
       else
          emitUADD(insn);
       break;
    case OP_MUL:
-      if (isFloatType(insn->dType))
+      if (insn->dType == TYPE_F64)
+         emitDMUL(insn);
+      else if (isFloatType(insn->dType))
          emitFMUL(insn);
       else
          emitIMUL(insn);
       break;
    case OP_MAD:
    case OP_FMA:
-      if (isFloatType(insn->dType))
+      if (insn->dType == TYPE_F64)
+         emitDMAD(insn);
+      else if (isFloatType(insn->dType))
          emitFMAD(insn);
       else
          emitIMAD(insn);
-- 
2.0.5

_______________________________________________
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 03/11] gm107/ir: fix DMUL opcode encoding
       [not found] ` <1424394160-22417-1-git-send-email-imirkin-FrUbXkNCsVf2fBVCVOL8/A@public.gmane.org>
  2015-02-20  1:02   ` [PATCH 02/11] gk110/ir: add emission of dadd/dmul/dmad opcodes Ilia Mirkin
@ 2015-02-20  1:02   ` Ilia Mirkin
  2015-02-20  1:02   ` [PATCH 04/11] gm107/ir: fix DSET boolean float flag Ilia Mirkin
                     ` (6 subsequent siblings)
  8 siblings, 0 replies; 11+ messages in thread
From: Ilia Mirkin @ 2015-02-20  1:02 UTC (permalink / raw
  To: mesa-dev-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
index 944ceb2..9f4c435 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
@@ -924,15 +924,15 @@ CodeEmitterGM107::emitDMUL()
 {
    switch (insn->src(1).getFile()) {
    case FILE_GPR:
-      emitInsn(0x5c680000);
+      emitInsn(0x5c800000);
       emitGPR (0x14, insn->src(1));
       break;
    case FILE_MEMORY_CONST:
-      emitInsn(0x4c680000);
+      emitInsn(0x4c800000);
       emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
       break;
    case FILE_IMMEDIATE:
-      emitInsn(0x38680000);
+      emitInsn(0x38800000);
       emitIMMD(0x14, 19, insn->src(1));
       break;
    default:
-- 
2.0.5

_______________________________________________
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 04/11] gm107/ir: fix DSET boolean float flag
       [not found] ` <1424394160-22417-1-git-send-email-imirkin-FrUbXkNCsVf2fBVCVOL8/A@public.gmane.org>
  2015-02-20  1:02   ` [PATCH 02/11] gk110/ir: add emission of dadd/dmul/dmad opcodes Ilia Mirkin
  2015-02-20  1:02   ` [PATCH 03/11] gm107/ir: fix DMUL opcode encoding Ilia Mirkin
@ 2015-02-20  1:02   ` Ilia Mirkin
  2015-02-20  1:02   ` [PATCH 05/11] gm107/ir: fix F2F flipped stype/dtype flags Ilia Mirkin
                     ` (5 subsequent siblings)
  8 siblings, 0 replies; 11+ messages in thread
From: Ilia Mirkin @ 2015-02-20  1:02 UTC (permalink / raw
  To: mesa-dev-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
index 9f4c435..73a65fa 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
@@ -1060,6 +1060,7 @@ CodeEmitterGM107::emitDSET()
 
    emitABS  (0x36, insn->src(0));
    emitNEG  (0x35, insn->src(1));
+   emitField(0x34, 1, insn->dType == TYPE_F32);
    emitCond4(0x30, insn->setCond);
    emitCC   (0x2f);
    emitABS  (0x2c, insn->src(1));
-- 
2.0.5

_______________________________________________
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 05/11] gm107/ir: fix F2F flipped stype/dtype flags
       [not found] ` <1424394160-22417-1-git-send-email-imirkin-FrUbXkNCsVf2fBVCVOL8/A@public.gmane.org>
                     ` (2 preceding siblings ...)
  2015-02-20  1:02   ` [PATCH 04/11] gm107/ir: fix DSET boolean float flag Ilia Mirkin
@ 2015-02-20  1:02   ` Ilia Mirkin
  2015-02-20  1:02   ` [PATCH 06/11] nvc0/ir: fix lowering of RSQ/RCP/SQRT/MOD to work with F64 Ilia Mirkin
                     ` (4 subsequent siblings)
  8 siblings, 0 replies; 11+ messages in thread
From: Ilia Mirkin @ 2015-02-20  1:02 UTC (permalink / raw
  To: mesa-dev-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
index 73a65fa..3e1da7e 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
@@ -731,8 +731,8 @@ CodeEmitterGM107::emitF2F()
    emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
    emitFMZ  (0x2c, 1);
    emitRND  (0x27, rnd, 0x2a);
-   emitField(0x0a, 2, util_logbase2(typeSizeof(insn->dType)));
-   emitField(0x08, 2, util_logbase2(typeSizeof(insn->sType)));
+   emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
+   emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
    emitGPR  (0x00, insn->def(0));
 }
 
-- 
2.0.5

_______________________________________________
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 06/11] nvc0/ir: fix lowering of RSQ/RCP/SQRT/MOD to work with F64
       [not found] ` <1424394160-22417-1-git-send-email-imirkin-FrUbXkNCsVf2fBVCVOL8/A@public.gmane.org>
                     ` (3 preceding siblings ...)
  2015-02-20  1:02   ` [PATCH 05/11] gm107/ir: fix F2F flipped stype/dtype flags Ilia Mirkin
@ 2015-02-20  1:02   ` Ilia Mirkin
  2015-02-20  1:02   ` [PATCH 08/11] nvc0/ir: handle zero and negative sqrt arguments Ilia Mirkin
                     ` (3 subsequent siblings)
  8 siblings, 0 replies; 11+ messages in thread
From: Ilia Mirkin @ 2015-02-20  1:02 UTC (permalink / raw
  To: mesa-dev-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/gallium/drivers/nouveau/codegen/nv50_ir.h      |  1 +
 .../drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp |  4 +-
 .../drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp |  4 +-
 .../drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp  |  4 +-
 .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp      | 43 +++++++++++++++++-----
 5 files changed, 40 insertions(+), 16 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
index 0ff5e5d..529dcb9 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
@@ -175,6 +175,7 @@ enum operation
 #define NV50_IR_SUBOP_MOV_FINAL    1
 #define NV50_IR_SUBOP_EXTBF_REV    1
 #define NV50_IR_SUBOP_BFIND_SAMT   1
+#define NV50_IR_SUBOP_RCPRSQ_64H   1
 #define NV50_IR_SUBOP_PERMT_F4E    1
 #define NV50_IR_SUBOP_PERMT_B4E    2
 #define NV50_IR_SUBOP_PERMT_RC8    3
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
index 204d911..674be69 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
@@ -1771,10 +1771,10 @@ CodeEmitterGK110::emitInstruction(Instruction *insn)
       emitCVT(insn);
       break;
    case OP_RSQ:
-      emitSFnOp(insn, 5);
+      emitSFnOp(insn, 5 + 2 * insn->subOp);
       break;
    case OP_RCP:
-      emitSFnOp(insn, 4);
+      emitSFnOp(insn, 4 + 2 * insn->subOp);
       break;
    case OP_LG2:
       emitSFnOp(insn, 3);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
index 3e1da7e..ee0487f 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
@@ -1265,8 +1265,8 @@ CodeEmitterGM107::emitMUFU()
    case OP_SIN: mufu = 1; break;
    case OP_EX2: mufu = 2; break;
    case OP_LG2: mufu = 3; break;
-   case OP_RCP: mufu = 4; break;
-   case OP_RSQ: mufu = 5; break;
+   case OP_RCP: mufu = 4 + 2 * insn->subOp; break;
+   case OP_RSQ: mufu = 5 + 2 * insn->subOp; break;
    default:
       assert(!"invalid mufu");
       break;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
index e38a3b8..1a4f6e0 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
@@ -2365,10 +2365,10 @@ CodeEmitterNVC0::emitInstruction(Instruction *insn)
       emitCVT(insn);
       break;
    case OP_RSQ:
-      emitSFnOp(insn, 5);
+      emitSFnOp(insn, 5 + 2 * insn->subOp);
       break;
    case OP_RCP:
-      emitSFnOp(insn, 4);
+      emitSFnOp(insn, 4 + 2 * insn->subOp);
       break;
    case OP_LG2:
       emitSFnOp(insn, 3);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index 5dfb777..8ac3b26 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -70,7 +70,30 @@ NVC0LegalizeSSA::handleDIV(Instruction *i)
 void
 NVC0LegalizeSSA::handleRCPRSQ(Instruction *i)
 {
-   // TODO
+   assert(i->dType == TYPE_F64);
+   // There are instructions that will compute the high 32 bits of the 64-bit
+   // float. We will just stick 0 in the bottom 32 bits.
+
+   bld.setPosition(i, false);
+
+   // 1. Take the source and it up.
+   Value *src[2], *dst[2], *def = i->getDef(0);
+   bld.mkSplit(src, 4, i->getSrc(0));
+
+   // 2. We don't care about the low 32 bits of the destination. Stick a 0 in.
+   dst[0] = bld.loadImm(NULL, 0);
+   dst[1] = bld.getSSA();
+
+   // 3. The new version of the instruction takes the high 32 bits of the
+   // source and outputs the high 32 bits of the destination.
+   i->setSrc(0, src[1]);
+   i->setDef(0, dst[1]);
+   i->setType(TYPE_F32);
+   i->subOp = NV50_IR_SUBOP_RCPRSQ_64H;
+
+   // 4. Recombine the two dst pieces back into the original destination.
+   bld.setPosition(i, true);
+   bld.mkOp2(OP_MERGE, TYPE_U64, def, dst[0], dst[1]);
 }
 
 bool
@@ -1520,7 +1543,7 @@ NVC0LoweringPass::handleDIV(Instruction *i)
    if (!isFloatType(i->dType))
       return true;
    bld.setPosition(i, false);
-   Instruction *rcp = bld.mkOp1(OP_RCP, i->dType, bld.getSSA(), i->getSrc(1));
+   Instruction *rcp = bld.mkOp1(OP_RCP, i->dType, bld.getSSA(typeSizeof(i->dType)), i->getSrc(1));
    i->op = OP_MUL;
    i->setSrc(1, rcp->getDef(0));
    return true;
@@ -1529,13 +1552,13 @@ NVC0LoweringPass::handleDIV(Instruction *i)
 bool
 NVC0LoweringPass::handleMOD(Instruction *i)
 {
-   if (i->dType != TYPE_F32)
+   if (!isFloatType(i->dType))
       return true;
-   LValue *value = bld.getScratch();
-   bld.mkOp1(OP_RCP, TYPE_F32, value, i->getSrc(1));
-   bld.mkOp2(OP_MUL, TYPE_F32, value, i->getSrc(0), value);
-   bld.mkOp1(OP_TRUNC, TYPE_F32, value, value);
-   bld.mkOp2(OP_MUL, TYPE_F32, value, i->getSrc(1), value);
+   LValue *value = bld.getScratch(typeSizeof(i->dType));
+   bld.mkOp1(OP_RCP, i->dType, value, i->getSrc(1));
+   bld.mkOp2(OP_MUL, i->dType, value, i->getSrc(0), value);
+   bld.mkOp1(OP_TRUNC, i->dType, value, value);
+   bld.mkOp2(OP_MUL, i->dType, value, i->getSrc(1), value);
    i->op = OP_SUB;
    i->setSrc(1, value);
    return true;
@@ -1544,8 +1567,8 @@ NVC0LoweringPass::handleMOD(Instruction *i)
 bool
 NVC0LoweringPass::handleSQRT(Instruction *i)
 {
-   Instruction *rsq = bld.mkOp1(OP_RSQ, TYPE_F32,
-                                bld.getSSA(), i->getSrc(0));
+   Instruction *rsq = bld.mkOp1(OP_RSQ, i->dType,
+                                bld.getSSA(typeSizeof(i->dType)), i->getSrc(0));
    i->op = OP_MUL;
    i->setSrc(1, rsq->getDef(0));
 
-- 
2.0.5

_______________________________________________
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 07/11] nvc0/ir: no instruction can load a double immediate
  2015-02-20  1:02 [PATCH 01/11] nvc0/ir: add emission of dadd/dmul/dmad opcodes, fix minmax Ilia Mirkin
       [not found] ` <1424394160-22417-1-git-send-email-imirkin-FrUbXkNCsVf2fBVCVOL8/A@public.gmane.org>
@ 2015-02-20  1:02 ` Ilia Mirkin
  1 sibling, 0 replies; 11+ messages in thread
From: Ilia Mirkin @ 2015-02-20  1:02 UTC (permalink / raw
  To: mesa-dev, nouveau

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
index 817ceb8..7d4a859 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
@@ -337,6 +337,8 @@ TargetNVC0::insnCanLoad(const Instruction *i, int s,
    if (sf == FILE_IMMEDIATE) {
       Storage &reg = ld->getSrc(0)->asImm()->reg;
 
+      if (typeSizeof(i->sType) > 4)
+         return false;
       if (opInfo[i->op].immdBits != 0xffffffff) {
          if (i->sType == TYPE_F32) {
             if (reg.data.u32 & 0xfff)
-- 
2.0.5

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 08/11] nvc0/ir: handle zero and negative sqrt arguments
       [not found] ` <1424394160-22417-1-git-send-email-imirkin-FrUbXkNCsVf2fBVCVOL8/A@public.gmane.org>
                     ` (4 preceding siblings ...)
  2015-02-20  1:02   ` [PATCH 06/11] nvc0/ir: fix lowering of RSQ/RCP/SQRT/MOD to work with F64 Ilia Mirkin
@ 2015-02-20  1:02   ` Ilia Mirkin
  2015-02-20  1:02   ` [PATCH 09/11] nvc0/ir: add support for new TGSI double opcodes (v2) Ilia Mirkin
                     ` (2 subsequent siblings)
  8 siblings, 0 replies; 11+ messages in thread
From: Ilia Mirkin @ 2015-02-20  1:02 UTC (permalink / raw
  To: mesa-dev-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 .../drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp    | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index 8ac3b26..18e8e67 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -1567,10 +1567,22 @@ NVC0LoweringPass::handleMOD(Instruction *i)
 bool
 NVC0LoweringPass::handleSQRT(Instruction *i)
 {
-   Instruction *rsq = bld.mkOp1(OP_RSQ, i->dType,
-                                bld.getSSA(typeSizeof(i->dType)), i->getSrc(0));
+   Value *pred = bld.getSSA(1, FILE_PREDICATE);
+   Value *zero = bld.getSSA();
+   Instruction *rsq;
+
+   bld.mkOp1(OP_MOV, TYPE_U32, zero, bld.mkImm(0));
+   if (i->dType == TYPE_F64)
+      zero = bld.mkOp2v(OP_MERGE, TYPE_U64, bld.getSSA(8), zero, zero);
+   bld.mkCmp(OP_SET, CC_LE, i->dType, pred, i->dType, i->getSrc(0), zero);
+   bld.mkOp1(OP_MOV, i->dType, i->getDef(0), zero)->setPredicate(CC_P, pred);
+   rsq = bld.mkOp1(OP_RSQ, i->dType,
+                   bld.getSSA(typeSizeof(i->dType)), i->getSrc(0));
+   rsq->setPredicate(CC_NOT_P, pred);
    i->op = OP_MUL;
    i->setSrc(1, rsq->getDef(0));
+   i->setPredicate(CC_NOT_P, pred);
+
 
    return true;
 }
-- 
2.0.5

_______________________________________________
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 09/11] nvc0/ir: add support for new TGSI double opcodes (v2)
       [not found] ` <1424394160-22417-1-git-send-email-imirkin-FrUbXkNCsVf2fBVCVOL8/A@public.gmane.org>
                     ` (5 preceding siblings ...)
  2015-02-20  1:02   ` [PATCH 08/11] nvc0/ir: handle zero and negative sqrt arguments Ilia Mirkin
@ 2015-02-20  1:02   ` Ilia Mirkin
  2015-02-20  1:02   ` [PATCH 10/11] nvc0/ir: remove merge/split pairs to allow normal propagation to occur Ilia Mirkin
  2015-02-20  1:02   ` [PATCH 11/11] nvc0: enable double support Ilia Mirkin
  8 siblings, 0 replies; 11+ messages in thread
From: Ilia Mirkin @ 2015-02-20  1:02 UTC (permalink / raw
  To: mesa-dev-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

v2: drop DDIV

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 .../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp  | 196 +++++++++++++++++++++
 1 file changed, 196 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
index 9ee927f..028a17e 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -441,6 +441,27 @@ nv50_ir::DataType Instruction::inferSrcType() const
    case TGSI_OPCODE_IBFE:
    case TGSI_OPCODE_IMSB:
       return nv50_ir::TYPE_S32;
+   case TGSI_OPCODE_D2F:
+   case TGSI_OPCODE_DABS:
+   case TGSI_OPCODE_DNEG:
+   case TGSI_OPCODE_DADD:
+   case TGSI_OPCODE_DMUL:
+   case TGSI_OPCODE_DMAX:
+   case TGSI_OPCODE_DMIN:
+   case TGSI_OPCODE_DSLT:
+   case TGSI_OPCODE_DSGE:
+   case TGSI_OPCODE_DSEQ:
+   case TGSI_OPCODE_DSNE:
+   case TGSI_OPCODE_DRCP:
+   case TGSI_OPCODE_DSQRT:
+   case TGSI_OPCODE_DMAD:
+   case TGSI_OPCODE_DFRAC:
+   case TGSI_OPCODE_DRSQ:
+   case TGSI_OPCODE_DTRUNC:
+   case TGSI_OPCODE_DCEIL:
+   case TGSI_OPCODE_DFLR:
+   case TGSI_OPCODE_DROUND:
+      return nv50_ir::TYPE_F64;
    default:
       return nv50_ir::TYPE_F32;
    }
@@ -455,10 +476,17 @@ nv50_ir::DataType Instruction::inferDstType() const
    case TGSI_OPCODE_FSGE:
    case TGSI_OPCODE_FSLT:
    case TGSI_OPCODE_FSNE:
+   case TGSI_OPCODE_DSEQ:
+   case TGSI_OPCODE_DSGE:
+   case TGSI_OPCODE_DSLT:
+   case TGSI_OPCODE_DSNE:
       return nv50_ir::TYPE_U32;
    case TGSI_OPCODE_I2F:
    case TGSI_OPCODE_U2F:
+   case TGSI_OPCODE_D2F:
       return nv50_ir::TYPE_F32;
+   case TGSI_OPCODE_F2D:
+      return nv50_ir::TYPE_F64;
    default:
       return inferSrcType();
    }
@@ -473,6 +501,7 @@ nv50_ir::CondCode Instruction::getSetCond() const
    case TGSI_OPCODE_ISLT:
    case TGSI_OPCODE_USLT:
    case TGSI_OPCODE_FSLT:
+   case TGSI_OPCODE_DSLT:
       return CC_LT;
    case TGSI_OPCODE_SLE:
       return CC_LE;
@@ -480,15 +509,18 @@ nv50_ir::CondCode Instruction::getSetCond() const
    case TGSI_OPCODE_ISGE:
    case TGSI_OPCODE_USGE:
    case TGSI_OPCODE_FSGE:
+   case TGSI_OPCODE_DSGE:
       return CC_GE;
    case TGSI_OPCODE_SGT:
       return CC_GT;
    case TGSI_OPCODE_SEQ:
    case TGSI_OPCODE_USEQ:
    case TGSI_OPCODE_FSEQ:
+   case TGSI_OPCODE_DSEQ:
       return CC_EQ;
    case TGSI_OPCODE_SNE:
    case TGSI_OPCODE_FSNE:
+   case TGSI_OPCODE_DSNE:
       return CC_NEU;
    case TGSI_OPCODE_USNE:
       return CC_NE;
@@ -601,6 +633,25 @@ static nv50_ir::operation translateOpcode(uint opcode)
    NV50_IR_OPCODE_CASE(USLT, SET);
    NV50_IR_OPCODE_CASE(USNE, SET);
 
+   NV50_IR_OPCODE_CASE(DABS, ABS);
+   NV50_IR_OPCODE_CASE(DNEG, NEG);
+   NV50_IR_OPCODE_CASE(DADD, ADD);
+   NV50_IR_OPCODE_CASE(DMUL, MUL);
+   NV50_IR_OPCODE_CASE(DMAX, MAX);
+   NV50_IR_OPCODE_CASE(DMIN, MIN);
+   NV50_IR_OPCODE_CASE(DSLT, SET);
+   NV50_IR_OPCODE_CASE(DSGE, SET);
+   NV50_IR_OPCODE_CASE(DSEQ, SET);
+   NV50_IR_OPCODE_CASE(DSNE, SET);
+   NV50_IR_OPCODE_CASE(DRCP, RCP);
+   NV50_IR_OPCODE_CASE(DSQRT, SQRT);
+   NV50_IR_OPCODE_CASE(DMAD, MAD);
+   NV50_IR_OPCODE_CASE(DRSQ, RSQ);
+   NV50_IR_OPCODE_CASE(DTRUNC, TRUNC);
+   NV50_IR_OPCODE_CASE(DCEIL, CEIL);
+   NV50_IR_OPCODE_CASE(DFLR, FLOOR);
+   NV50_IR_OPCODE_CASE(DROUND, CVT);
+
    NV50_IR_OPCODE_CASE(IMUL_HI, MUL);
    NV50_IR_OPCODE_CASE(UMUL_HI, MUL);
 
@@ -2880,6 +2931,151 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
    case TGSI_OPCODE_INTERP_OFFSET:
       handleINTERP(dst0);
       break;
+   case TGSI_OPCODE_D2F: {
+      int pos = 0;
+      FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+         Value *dreg = getSSA(8);
+         src0 = fetchSrc(0, pos);
+         src1 = fetchSrc(0, pos + 1);
+         mkOp2(OP_MERGE, TYPE_U64, dreg, src0, src1);
+         mkCvt(OP_CVT, dstTy, dst0[c], srcTy, dreg);
+         pos += 2;
+      }
+      break;
+   }
+   case TGSI_OPCODE_F2D:
+      FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+         Value *dreg = getSSA(8);
+         mkCvt(OP_CVT, dstTy, dreg, srcTy, fetchSrc(0, c / 2));
+         mkSplit(&dst0[c], 4, dreg);
+         c++;
+      }
+      break;
+   case TGSI_OPCODE_DABS:
+   case TGSI_OPCODE_DNEG:
+   case TGSI_OPCODE_DRCP:
+   case TGSI_OPCODE_DSQRT:
+   case TGSI_OPCODE_DRSQ:
+   case TGSI_OPCODE_DTRUNC:
+   case TGSI_OPCODE_DCEIL:
+   case TGSI_OPCODE_DFLR:
+      FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+         src0 = getSSA(8);
+         Value *dst = getSSA(8), *tmp[2];
+         tmp[0] = fetchSrc(0, c);
+         tmp[1] = fetchSrc(0, c + 1);
+         mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
+         mkOp1(op, dstTy, dst, src0);
+         mkSplit(&dst0[c], 4, dst);
+         c++;
+      }
+      break;
+   case TGSI_OPCODE_DFRAC:
+      FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+         src0 = getSSA(8);
+         Value *dst = getSSA(8), *tmp[2];
+         tmp[0] = fetchSrc(0, c);
+         tmp[1] = fetchSrc(0, c + 1);
+         mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
+         mkOp1(OP_FLOOR, TYPE_F64, dst, src0);
+         mkOp2(OP_SUB, TYPE_F64, dst, src0, dst);
+         mkSplit(&dst0[c], 4, dst);
+         c++;
+      }
+      break;
+   case TGSI_OPCODE_DSLT:
+   case TGSI_OPCODE_DSGE:
+   case TGSI_OPCODE_DSEQ:
+   case TGSI_OPCODE_DSNE: {
+      int pos = 0;
+      FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+         Value *tmp[2];
+
+         src0 = getSSA(8);
+         src1 = getSSA(8);
+         tmp[0] = fetchSrc(0, pos);
+         tmp[1] = fetchSrc(0, pos + 1);
+         mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
+         tmp[0] = fetchSrc(1, pos);
+         tmp[1] = fetchSrc(1, pos + 1);
+         mkOp2(OP_MERGE, TYPE_U64, src1, tmp[0], tmp[1]);
+         mkCmp(op, tgsi.getSetCond(), dstTy, dst0[c], srcTy, src0, src1);
+         pos += 2;
+      }
+      break;
+   }
+   case TGSI_OPCODE_DADD:
+   case TGSI_OPCODE_DMUL:
+   case TGSI_OPCODE_DMAX:
+   case TGSI_OPCODE_DMIN:
+      FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+         src0 = getSSA(8);
+         src1 = getSSA(8);
+         Value *dst = getSSA(8), *tmp[2];
+         tmp[0] = fetchSrc(0, c);
+         tmp[1] = fetchSrc(0, c + 1);
+         mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
+         tmp[0] = fetchSrc(1, c);
+         tmp[1] = fetchSrc(1, c + 1);
+         mkOp2(OP_MERGE, TYPE_U64, src1, tmp[0], tmp[1]);
+         mkOp2(op, dstTy, dst, src0, src1);
+         mkSplit(&dst0[c], 4, dst);
+         c++;
+      }
+      break;
+   case TGSI_OPCODE_DMAD:
+      FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+         src0 = getSSA(8);
+         src1 = getSSA(8);
+         src2 = getSSA(8);
+         Value *dst = getSSA(8), *tmp[2];
+         tmp[0] = fetchSrc(0, c);
+         tmp[1] = fetchSrc(0, c + 1);
+         mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
+         tmp[0] = fetchSrc(1, c);
+         tmp[1] = fetchSrc(1, c + 1);
+         mkOp2(OP_MERGE, TYPE_U64, src1, tmp[0], tmp[1]);
+         tmp[0] = fetchSrc(2, c);
+         tmp[1] = fetchSrc(2, c + 1);
+         mkOp2(OP_MERGE, TYPE_U64, src2, tmp[0], tmp[1]);
+         mkOp3(op, dstTy, dst, src0, src1, src2);
+         mkSplit(&dst0[c], 4, dst);
+         c++;
+      }
+      break;
+   case TGSI_OPCODE_DROUND:
+      FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+         src0 = getSSA(8);
+         Value *dst = getSSA(8), *tmp[2];
+         tmp[0] = fetchSrc(0, c);
+         tmp[1] = fetchSrc(0, c + 1);
+         mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
+         mkCvt(OP_CVT, TYPE_F64, dst, TYPE_F64, src0)
+         ->rnd = ROUND_NI;
+         mkSplit(&dst0[c], 4, dst);
+         c++;
+      }
+      break;
+   case TGSI_OPCODE_DSSG:
+      FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+         src0 = getSSA(8);
+         Value *dst = getSSA(8), *dstF32 = getSSA(), *tmp[2];
+         tmp[0] = fetchSrc(0, c);
+         tmp[1] = fetchSrc(0, c + 1);
+         mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
+
+         val0 = getScratch();
+         val1 = getScratch();
+         // The zero is wrong here since it's only 32-bit, but it works out in
+         // the end since it gets replaced with $r63.
+         mkCmp(OP_SET, CC_GT, TYPE_F32, val0, TYPE_F64, src0, zero);
+         mkCmp(OP_SET, CC_LT, TYPE_F32, val1, TYPE_F64, src0, zero);
+         mkOp2(OP_SUB, TYPE_F32, dstF32, val0, val1);
+         mkCvt(OP_CVT, TYPE_F64, dst, TYPE_F32, dstF32);
+         mkSplit(&dst0[c], 4, dst);
+         c++;
+      }
+      break;
    default:
       ERROR("unhandled TGSI opcode: %u\n", tgsi.getOpcode());
       assert(0);
-- 
2.0.5

_______________________________________________
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 10/11] nvc0/ir: remove merge/split pairs to allow normal propagation to occur
       [not found] ` <1424394160-22417-1-git-send-email-imirkin-FrUbXkNCsVf2fBVCVOL8/A@public.gmane.org>
                     ` (6 preceding siblings ...)
  2015-02-20  1:02   ` [PATCH 09/11] nvc0/ir: add support for new TGSI double opcodes (v2) Ilia Mirkin
@ 2015-02-20  1:02   ` Ilia Mirkin
  2015-02-20  1:02   ` [PATCH 11/11] nvc0: enable double support Ilia Mirkin
  8 siblings, 0 replies; 11+ messages in thread
From: Ilia Mirkin @ 2015-02-20  1:02 UTC (permalink / raw
  To: mesa-dev-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Because the TGSI interface creates merges for each instruction source
and then splits them back out, there are a lot of unnecessary
merge/split pairs which do essentially nothing. The various modifier/etc
propagation doesn't know how to walk though those, so just remove them
when they're unnecessary.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 .../drivers/nouveau/codegen/nv50_ir_peephole.cpp   | 30 ++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 62d2ef7..6a4ea4e 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -118,6 +118,35 @@ CopyPropagation::visit(BasicBlock *bb)
 
 // =============================================================================
 
+class MergeSplits : public Pass
+{
+private:
+   virtual bool visit(BasicBlock *);
+};
+
+// For SPLIT / MERGE pairs that operate on the same registers, replace the
+// post-merge def with the SPLIT's source.
+bool
+MergeSplits::visit(BasicBlock *bb)
+{
+   Instruction *i, *next, *si;
+
+   for (i = bb->getEntry(); i; i = next) {
+      next = i->next;
+      if (i->op != OP_MERGE || typeSizeof(i->dType) != 8)
+         continue;
+      si = i->getSrc(0)->getInsn();
+      if (si->op != OP_SPLIT || si != i->getSrc(1)->getInsn())
+         continue;
+      i->def(0).replace(si->getSrc(0), false);
+      delete_Instruction(prog, i);
+   }
+
+   return true;
+}
+
+// =============================================================================
+
 class LoadPropagation : public Pass
 {
 private:
@@ -2662,6 +2691,7 @@ Program::optimizeSSA(int level)
 {
    RUN_PASS(1, DeadCodeElim, buryAll);
    RUN_PASS(1, CopyPropagation, run);
+   RUN_PASS(1, MergeSplits, run);
    RUN_PASS(2, GlobalCSE, run);
    RUN_PASS(1, LocalCSE, run);
    RUN_PASS(2, AlgebraicOpt, run);
-- 
2.0.5

_______________________________________________
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 11/11] nvc0: enable double support
       [not found] ` <1424394160-22417-1-git-send-email-imirkin-FrUbXkNCsVf2fBVCVOL8/A@public.gmane.org>
                     ` (7 preceding siblings ...)
  2015-02-20  1:02   ` [PATCH 10/11] nvc0/ir: remove merge/split pairs to allow normal propagation to occur Ilia Mirkin
@ 2015-02-20  1:02   ` Ilia Mirkin
  8 siblings, 0 replies; 11+ messages in thread
From: Ilia Mirkin @ 2015-02-20  1:02 UTC (permalink / raw
  To: mesa-dev-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index 8546ac8..686d892 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -291,9 +291,9 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
    case PIPE_SHADER_CAP_INTEGERS:
       return 1;
    case PIPE_SHADER_CAP_DOUBLES:
-      return 0;
+      return 1;
    case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
-      return 0;
+      return 1;
    case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
       return 0;
    case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
-- 
2.0.5

_______________________________________________
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau

^ permalink raw reply related	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2015-02-20  1:02 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-02-20  1:02 [PATCH 01/11] nvc0/ir: add emission of dadd/dmul/dmad opcodes, fix minmax Ilia Mirkin
     [not found] ` <1424394160-22417-1-git-send-email-imirkin-FrUbXkNCsVf2fBVCVOL8/A@public.gmane.org>
2015-02-20  1:02   ` [PATCH 02/11] gk110/ir: add emission of dadd/dmul/dmad opcodes Ilia Mirkin
2015-02-20  1:02   ` [PATCH 03/11] gm107/ir: fix DMUL opcode encoding Ilia Mirkin
2015-02-20  1:02   ` [PATCH 04/11] gm107/ir: fix DSET boolean float flag Ilia Mirkin
2015-02-20  1:02   ` [PATCH 05/11] gm107/ir: fix F2F flipped stype/dtype flags Ilia Mirkin
2015-02-20  1:02   ` [PATCH 06/11] nvc0/ir: fix lowering of RSQ/RCP/SQRT/MOD to work with F64 Ilia Mirkin
2015-02-20  1:02   ` [PATCH 08/11] nvc0/ir: handle zero and negative sqrt arguments Ilia Mirkin
2015-02-20  1:02   ` [PATCH 09/11] nvc0/ir: add support for new TGSI double opcodes (v2) Ilia Mirkin
2015-02-20  1:02   ` [PATCH 10/11] nvc0/ir: remove merge/split pairs to allow normal propagation to occur Ilia Mirkin
2015-02-20  1:02   ` [PATCH 11/11] nvc0: enable double support Ilia Mirkin
2015-02-20  1:02 ` [PATCH 07/11] nvc0/ir: no instruction can load a double immediate Ilia Mirkin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.