From 3c08aabcd355cce36a39bad253e1f57a3bfc0464 Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 1 Aug 2022 11:36:21 -0700 Subject: [PATCH 1/4] LZA refactoring --- pipelined/src/fpu/fma.sv | 2 +- pipelined/src/fpu/fmalza.sv | 26 ++++++++++++++++---------- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/pipelined/src/fpu/fma.sv b/pipelined/src/fpu/fma.sv index 5f595b1fc..e698cdaf2 100644 --- a/pipelined/src/fpu/fma.sv +++ b/pipelined/src/fpu/fma.sv @@ -85,6 +85,6 @@ module fma( fmaadd add(.Am, .Pm, .Ze, .Pe, .Ps, .As, .KillProd, .ZmSticky, .AmInv, .PmKilled, .NegSum, .InvA, .Sm, .Se, .Ss); - fmalza lza(.A(AmInv+{(3*`NF+6)'(0),InvA&~((ZmSticky&~KillProd))}), .Pm({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .SCnt); + fmalza lza(.A(AmInv), .Pm({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .Cin(InvA & ~(ZmSticky & ~KillProd)), .SCnt); endmodule diff --git a/pipelined/src/fpu/fmalza.sv b/pipelined/src/fpu/fmalza.sv index a05084e2d..e69ba73f4 100644 --- a/pipelined/src/fpu/fmalza.sv +++ b/pipelined/src/fpu/fmalza.sv @@ -30,28 +30,34 @@ `include "wally-config.vh" module fmalza( // [Schmookler & Nowka, Leading zero anticipation and detection, IEEE Sym. Computer Arithmetic, 2001] - input logic [3*`NF+6:0] A, // addend - input logic [2*`NF+3:0] Pm, // product - output logic [$clog2(3*`NF+7)-1:0] SCnt // normalization shift count for the positive result + input logic [3*`NF+6:0] A, // addend + input logic [2*`NF+3:0] Pm, // product + input logic Cin, // carry in + output logic [$clog2(3*`NF+7)-1:0] SCnt // normalization shift count for the positive result ); localparam WIDTH = 3*`NF+7; - logic [WIDTH-1:0] B, P, G, K, F; - logic [WIDTH-1:0] Pp1, Gm1, Km1; + logic [WIDTH-1:0] AA, B, P, G, K, F; + logic [WIDTH-2:0] Pp1, Gm1, Km1; assign B = {{(`NF+3){1'b0}}, Pm}; // Zero extend product + assign AA = A + Cin; - assign P = A^B; - assign G = A&B; - assign K= ~A&~B; + assign P = AA^B; + assign G = AA&B; + assign K= ~AA&~B; + assign Pp1 = P[WIDTH-1:1]; + assign Gm1 = {G[WIDTH-3:0], Cin}; + assign Km1 = {K[WIDTH-3:0], ~Cin}; + // Apply function to determine Leading pattern // - note: the paper linked above uses the numbering system where 0 is the most significant bit //f[n] = ~P[n]&P[n-1] note: n is the MSB //f[i] = (P[i+1]&(G[i]&~K[i-1] | K[i]&~G[i-1])) | (~P[i+1]&(K[i]&~K[i-1] | G[i]&~G[i-1])) assign F[WIDTH-1] = ~P[WIDTH-1]&P[WIDTH-2]; - assign F[WIDTH-2:0] = (P[3*`NF+6:1]&(G[3*`NF+5:0]&{~K[3*`NF+4:0], 1'b0} | K[3*`NF+5:0]&{~G[3*`NF+4:0], 1'b1})) | (~P[3*`NF+6:1]&(K[3*`NF+5:0]&{~K[3*`NF+4:0], 1'b0} | G[3*`NF+5:0]&{~G[3*`NF+4:0], 1'b1})); + assign F[WIDTH-2:0] = (Pp1&(G[3*`NF+5:0]&{~K[3*`NF+4:0], 1'b0} | K[3*`NF+5:0]&{~G[3*`NF+4:0], 1'b1})) | (~P[3*`NF+6:1]&(K[3*`NF+5:0]&{~K[3*`NF+4:0], 1'b0} | G[3*`NF+5:0]&{~G[3*`NF+4:0], 1'b1})); - lzc #(3*`NF+7) lzc (.num(F), .ZeroCnt(SCnt)); + lzc #(WIDTH) lzc (.num(F), .ZeroCnt(SCnt)); endmodule From 99462049e7998190cad96ff49234ee99c69634e8 Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 1 Aug 2022 12:20:23 -0700 Subject: [PATCH 2/4] LZA refactoring switched to Pp1, Gm1, Km1 --- pipelined/src/fpu/fmalza.sv | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pipelined/src/fpu/fmalza.sv b/pipelined/src/fpu/fmalza.sv index e69ba73f4..afffca472 100644 --- a/pipelined/src/fpu/fmalza.sv +++ b/pipelined/src/fpu/fmalza.sv @@ -42,11 +42,11 @@ module fmalza( // [Schmookler & Nowka, Leading zero anticipation and detection, logic [WIDTH-2:0] Pp1, Gm1, Km1; assign B = {{(`NF+3){1'b0}}, Pm}; // Zero extend product - assign AA = A + Cin; +// assign AA = A + Cin; - assign P = AA^B; - assign G = AA&B; - assign K= ~AA&~B; + assign P = A^B; + assign G = A&B; + assign K= ~A&~B; assign Pp1 = P[WIDTH-1:1]; assign Gm1 = {G[WIDTH-3:0], Cin}; @@ -57,7 +57,7 @@ module fmalza( // [Schmookler & Nowka, Leading zero anticipation and detection, //f[n] = ~P[n]&P[n-1] note: n is the MSB //f[i] = (P[i+1]&(G[i]&~K[i-1] | K[i]&~G[i-1])) | (~P[i+1]&(K[i]&~K[i-1] | G[i]&~G[i-1])) assign F[WIDTH-1] = ~P[WIDTH-1]&P[WIDTH-2]; - assign F[WIDTH-2:0] = (Pp1&(G[3*`NF+5:0]&{~K[3*`NF+4:0], 1'b0} | K[3*`NF+5:0]&{~G[3*`NF+4:0], 1'b1})) | (~P[3*`NF+6:1]&(K[3*`NF+5:0]&{~K[3*`NF+4:0], 1'b0} | G[3*`NF+5:0]&{~G[3*`NF+4:0], 1'b1})); + assign F[WIDTH-2:0] = (Pp1&(G[WIDTH-2:0]&~Km1 | K[WIDTH-2:0]&~Gm1)) | (~Pp1&(K[WIDTH-2:0]&~Km1 | G[WIDTH-2:0]&~Gm1)); lzc #(WIDTH) lzc (.num(F), .ZeroCnt(SCnt)); endmodule From b34d2065c392d60cc4b80bb2d5e4adba5b582fff Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 1 Aug 2022 12:30:42 -0700 Subject: [PATCH 3/4] LZA cleanup --- pipelined/src/fpu/fmalza.sv | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pipelined/src/fpu/fmalza.sv b/pipelined/src/fpu/fmalza.sv index afffca472..b7b40091d 100644 --- a/pipelined/src/fpu/fmalza.sv +++ b/pipelined/src/fpu/fmalza.sv @@ -38,7 +38,8 @@ module fmalza( // [Schmookler & Nowka, Leading zero anticipation and detection, localparam WIDTH = 3*`NF+7; - logic [WIDTH-1:0] AA, B, P, G, K, F; + logic [WIDTH-1:0] B,F; + logic [WIDTH-1:0] P, G, K; logic [WIDTH-2:0] Pp1, Gm1, Km1; assign B = {{(`NF+3){1'b0}}, Pm}; // Zero extend product @@ -48,7 +49,7 @@ module fmalza( // [Schmookler & Nowka, Leading zero anticipation and detection, assign G = A&B; assign K= ~A&~B; - assign Pp1 = P[WIDTH-1:1]; + assign Pp1 = {A[WIDTH-1], P[WIDTH-2:1]}; assign Gm1 = {G[WIDTH-3:0], Cin}; assign Km1 = {K[WIDTH-3:0], ~Cin}; From 2869d67e50c06e8746d90c10a3b65e401141467a Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 1 Aug 2022 12:34:00 -0700 Subject: [PATCH 4/4] more lza cleanup --- pipelined/src/fpu/fmalza.sv | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/pipelined/src/fpu/fmalza.sv b/pipelined/src/fpu/fmalza.sv index b7b40091d..f70b1bc93 100644 --- a/pipelined/src/fpu/fmalza.sv +++ b/pipelined/src/fpu/fmalza.sv @@ -39,15 +39,17 @@ module fmalza( // [Schmookler & Nowka, Leading zero anticipation and detection, localparam WIDTH = 3*`NF+7; logic [WIDTH-1:0] B,F; - logic [WIDTH-1:0] P, G, K; + logic [WIDTH-1:0] P, G; + logic [WIDTH-2:0] K; logic [WIDTH-2:0] Pp1, Gm1, Km1; assign B = {{(`NF+3){1'b0}}, Pm}; // Zero extend product -// assign AA = A + Cin; + // next steps***replace P[WIDTH-1] with sub, then remove one bit from A + assign P = A^B; - assign G = A&B; - assign K= ~A&~B; + assign G = A[WIDTH-2:0]&B[WIDTH-2:0]; + assign K= ~A[WIDTH-2:0]&~B[WIDTH-2:0]; assign Pp1 = {A[WIDTH-1], P[WIDTH-2:1]}; assign Gm1 = {G[WIDTH-3:0], Cin}; @@ -58,7 +60,7 @@ module fmalza( // [Schmookler & Nowka, Leading zero anticipation and detection, //f[n] = ~P[n]&P[n-1] note: n is the MSB //f[i] = (P[i+1]&(G[i]&~K[i-1] | K[i]&~G[i-1])) | (~P[i+1]&(K[i]&~K[i-1] | G[i]&~G[i-1])) assign F[WIDTH-1] = ~P[WIDTH-1]&P[WIDTH-2]; - assign F[WIDTH-2:0] = (Pp1&(G[WIDTH-2:0]&~Km1 | K[WIDTH-2:0]&~Gm1)) | (~Pp1&(K[WIDTH-2:0]&~Km1 | G[WIDTH-2:0]&~Gm1)); + assign F[WIDTH-2:0] = (Pp1&(G&~Km1 | K&~Gm1)) | (~Pp1&(K&~Km1 | G&~Gm1)); lzc #(WIDTH) lzc (.num(F), .ZeroCnt(SCnt)); endmodule