From a36747fda04aa0f00998cf6164d5d14ff97b1596 Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 19 Sep 2022 08:30:59 -0700 Subject: [PATCH] Finished unified divsqrt otfc and fgen name changes --- pipelined/config/shared/wally-shared.vh | 2 +- pipelined/src/fpu/fdivsqrtiter.sv | 4 +- pipelined/src/fpu/fdivsqrtpostproc.sv | 6 ++- pipelined/src/fpu/fdivsqrtstage2.sv | 8 ++-- pipelined/src/fpu/fdivsqrtstage4.sv | 10 ++--- pipelined/src/fpu/otfc.sv | 51 ++++++++++++------------- pipelined/src/fpu/qsel.sv | 16 ++++---- 7 files changed, 49 insertions(+), 48 deletions(-) diff --git a/pipelined/config/shared/wally-shared.vh b/pipelined/config/shared/wally-shared.vh index 1f05a4f1..214c747d 100644 --- a/pipelined/config/shared/wally-shared.vh +++ b/pipelined/config/shared/wally-shared.vh @@ -102,7 +102,7 @@ // division constants `define RADIX 32'h2 -`define DIVCOPIES 32'h1 +`define DIVCOPIES 32'h5 `define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF + 3)) // `define DIVN (`NF < `XLEN ? `XLEN : `NF+1) // length of input `define DIVN (`NF < `XLEN ? `XLEN : `NF+3) // length of input diff --git a/pipelined/src/fpu/fdivsqrtiter.sv b/pipelined/src/fpu/fdivsqrtiter.sv index 399daeb8..ea2c99bc 100644 --- a/pipelined/src/fpu/fdivsqrtiter.sv +++ b/pipelined/src/fpu/fdivsqrtiter.sv @@ -119,13 +119,13 @@ module fdivsqrtiter( if (`RADIX == 2) begin: stage fdivsqrtstage2 fdivsqrtstage(.D, .DBar, .D2, .DBar2, .SqrtM, .WS(WS[i]), .WC(WC[i]), .WSA(WSA[i]), .WCA(WCA[i]), - .C(C[i]), .S(U[i]), .SM(UM[i]), .CNext(C[i+1]), .SNext(UNext[i]), .SMNext(UMNext[i]), .qn(qn[i])); + .C(C[i]), .U(U[i]), .UM(UM[i]), .CNext(C[i+1]), .UNext(UNext[i]), .UMNext(UMNext[i]), .qn(qn[i])); end else begin: stage logic j1; assign j1 = (i == 0 & ~C[0][`DIVb-1]); fdivsqrtstage4 fdivsqrtstage(.D, .DBar, .D2, .DBar2, .SqrtM, .j1, .WS(WS[i]), .WC(WC[i]), .WSA(WSA[i]), .WCA(WCA[i]), - .C(C[i]), .S(U[i]), .SM(UM[i]), .CNext(C[i+1]), .SNext(UNext[i]), .SMNext(UMNext[i]), .qn(qn[i])); + .C(C[i]), .U(U[i]), .UM(UM[i]), .CNext(C[i+1]), .UNext(UNext[i]), .UMNext(UMNext[i]), .qn(qn[i])); end if(i<(`DIVCOPIES-1)) begin assign WS[i+1] = WSA[i] << `LOGR; diff --git a/pipelined/src/fpu/fdivsqrtpostproc.sv b/pipelined/src/fpu/fdivsqrtpostproc.sv index a9015ad6..c882dffa 100644 --- a/pipelined/src/fpu/fdivsqrtpostproc.sv +++ b/pipelined/src/fpu/fdivsqrtpostproc.sv @@ -71,11 +71,13 @@ module fdivsqrtpostproc( // division takes the result from the next cycle, which is shifted to the left one more time so the square root also needs to be shifted always_comb - if(SqrtM) // sqrt ouputs in the range (1, .5] + if(NegSticky) QmM = FirstUM[`DIVb-(`RADIX/4):0] << SqrtM; + else QmM = FirstU[`DIVb-(`RADIX/4):0] << SqrtM; +/* if(SqrtM) // sqrt ouputs in the range (1, .5] if(NegSticky) QmM = {FirstUM[`DIVb-1-(`RADIX/4):0], 1'b0}; else QmM = {FirstU[`DIVb-1-(`RADIX/4):0], 1'b0}; else if(NegSticky) QmM = FirstUM[`DIVb-(`RADIX/4):0]; - else QmM = FirstU[`DIVb-(`RADIX/4):0]; + else QmM = FirstU[`DIVb-(`RADIX/4):0]; */ endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/fdivsqrtstage2.sv b/pipelined/src/fpu/fdivsqrtstage2.sv index 12f83ac0..1671ddaa 100644 --- a/pipelined/src/fpu/fdivsqrtstage2.sv +++ b/pipelined/src/fpu/fdivsqrtstage2.sv @@ -34,13 +34,13 @@ module fdivsqrtstage2 ( input logic [`DIVN-2:0] D, input logic [`DIVb+3:0] DBar, D2, DBar2, - input logic [`DIVb:0] S, SM, + input logic [`DIVb:0] U, UM, input logic [`DIVb+3:0] WS, WC, input logic [`DIVb+1:0] C, input logic SqrtM, output logic qn, output logic [`DIVb+1:0] CNext, - output logic [`DIVb:0] SNext, SMNext, + output logic [`DIVb:0] UNext, UMNext, output logic [`DIVb+3:0] WSA, WCA ); /* verilator lint_on UNOPTFLAT */ @@ -61,7 +61,7 @@ module fdivsqrtstage2 ( // 0010 = -1 // 0001 = -2 qsel2 qsel2(WS[`DIVb+3:`DIVb], WC[`DIVb+3:`DIVb], qp, qz, qn); - fgen2 fgen2(.sp(qp), .sz(qz), .C(CNext), .S, .SM, .F); + fgen2 fgen2(.sp(qp), .sz(qz), .C(CNext), .U, .UM, .F); assign Dsel = {`DIVb+4{~qz}}&(qp ? DBar : {3'b0, 1'b1, D, {`DIVb-`DIVN+1{1'b0}}}); // Partial Product Generation @@ -69,7 +69,7 @@ module fdivsqrtstage2 ( assign AddIn = SqrtM ? F : Dsel; csa #(`DIVb+4) csa(WS, WC, AddIn, qp&~SqrtM, WSA, WCA); - sotfc2 sotfc2(.sp(qp), .sz(qz), .C(CNext), .S, .SM, .SNext, .SMNext); + uotfc2 uotfc2(.sp(qp), .sz(qz), .C(CNext), .U, .UM, .UNext, .UMNext); endmodule diff --git a/pipelined/src/fpu/fdivsqrtstage4.sv b/pipelined/src/fpu/fdivsqrtstage4.sv index 643c914a..9f70b9c2 100644 --- a/pipelined/src/fpu/fdivsqrtstage4.sv +++ b/pipelined/src/fpu/fdivsqrtstage4.sv @@ -34,13 +34,13 @@ module fdivsqrtstage4 ( input logic [`DIVN-2:0] D, input logic [`DIVb+3:0] DBar, D2, DBar2, - input logic [`DIVb:0] S, SM, + input logic [`DIVb:0] U, UM, input logic [`DIVb+3:0] WS, WC, input logic [`DIVb+1:0] C, output logic [`DIVb+1:0] CNext, input logic SqrtM, j1, output logic qn, - output logic [`DIVb:0] SNext, SMNext, + output logic [`DIVb:0] UNext, UMNext, output logic [`DIVb+3:0] WSA, WCA ); /* verilator lint_on UNOPTFLAT */ @@ -61,9 +61,9 @@ module fdivsqrtstage4 ( // 0000 = 0 // 0010 = -1 // 0001 = -2 - assign Smsbs = S[`DIVb:`DIVb-4]; + assign Smsbs = U[`DIVb:`DIVb-4]; qsel4 qsel4(.D, .Smsbs, .WS, .WC, .Sqrt(SqrtM), .j1, .q); - fgen4 fgen4(.s(q), .C({2'b11, CNext}), .S({3'b000, S}), .SM({3'b000, SM}), .F); + fgen4 fgen4(.s(q), .C({2'b11, CNext}), .U({3'b000, U}), .UM({3'b000, UM}), .F); always_comb case (q) @@ -81,7 +81,7 @@ module fdivsqrtstage4 ( assign CarryIn = ~SqrtM & (q[3] | q[2]); // +1 for 2's complement of -D and -2D csa #(`DIVb+4) csa(WS, WC, AddIn, CarryIn, WSA, WCA); - sotfc4 sotfc4(.s(q), .Sqrt(SqrtM), .C(CNext[`DIVb:0]), .S, .SM, .SNext, .SMNext); + uotfc4 uotfc4(.s(q), .Sqrt(SqrtM), .C(CNext[`DIVb:0]), .U, .UM, .UNext, .UMNext); assign qn = 0; // unused for radix 4 endmodule diff --git a/pipelined/src/fpu/otfc.sv b/pipelined/src/fpu/otfc.sv index 8f12a6d6..cc4ab534 100644 --- a/pipelined/src/fpu/otfc.sv +++ b/pipelined/src/fpu/otfc.sv @@ -31,45 +31,44 @@ `include "wally-config.vh" /////////////////////////////// -// Square Root OTFC, Radix 2 // +// Un ified OTFC, Radix 2 // /////////////////////////////// -module sotfc2( +module uotfc2( input logic sp, sz, input logic [`DIVb+1:0] C, - input logic [`DIVb:0] S, SM, - output logic [`DIVb:0] SNext, SMNext + input logic [`DIVb:0] U, UM, + output logic [`DIVb:0] UNext, UMNext ); - // The on-the-fly converter transfers the square root + // The on-the-fly converter transfers the divsqrt // bits to the quotient as they come. - // Use this otfc for division and square root. logic [`DIVb:0] K; assign K = (C[`DIVb:0] & ~(C[`DIVb:0] << 1)); always_comb begin if (sp) begin - SNext = S | K; - SMNext = S; + UNext = U | K; + UMNext = U; end else if (sz) begin - SNext = S; - SMNext = SM | K; + UNext = U; + UMNext = UM | K; end else begin // If sp and sz are not true, then sn is - SNext = SM | K; - SMNext = SM; + UNext = UM | K; + UMNext = UM; end end endmodule /////////////////////////////// -// Square Root OTFC, Radix 4 // +// Unified OTFC, Radix 4 // /////////////////////////////// -module sotfc4( +module uotfc4( input logic [3:0] s, input logic Sqrt, - input logic [`DIVb:0] S, SM, + input logic [`DIVb:0] U, UM, input logic [`DIVb:0] C, - output logic [`DIVb:0] SNext, SMNext + output logic [`DIVb:0] UNext, UMNext ); // The on-the-fly converter transfers the square root // bits to the quotient as they come. @@ -82,20 +81,20 @@ module sotfc4( always_comb begin if (s[3]) begin - SNext = S | K2; - SMNext = S | K1; + UNext = U | K2; + UMNext = U | K1; end else if (s[2]) begin - SNext = S | K1; - SMNext = S; + UNext = U | K1; + UMNext = U; end else if (s[1]) begin - SNext = SM | K3; - SMNext = SM | K2; + UNext = UM | K3; + UMNext = UM | K2; end else if (s[0]) begin - SNext = SM | K2; - SMNext = SM | K1; + UNext = UM | K2; + UMNext = UM | K1; end else begin // If sp and sn are not true, then sz is - SNext = S; - SMNext = SM | K3; + UNext = U; + UMNext = UM | K3; end end diff --git a/pipelined/src/fpu/qsel.sv b/pipelined/src/fpu/qsel.sv index cb1c72ef..84614197 100644 --- a/pipelined/src/fpu/qsel.sv +++ b/pipelined/src/fpu/qsel.sv @@ -68,14 +68,14 @@ endmodule module fgen2 ( input logic sp, sz, input logic [`DIVb+1:0] C, - input logic [`DIVb:0] S, SM, + input logic [`DIVb:0] U, UM, output logic [`DIVb+3:0] F ); logic [`DIVb+3:0] FP, FN, FZ; logic [`DIVb+3:0] SExt, SMExt, CExt; - assign SExt = {3'b0, S}; - assign SMExt = {3'b0, SM}; + assign SExt = {3'b0, U}; + assign SMExt = {3'b0, UM}; assign CExt = {2'b11, C}; // extend C from Q2.k to Q4.k // Generate for both positive and negative bits @@ -254,17 +254,17 @@ endmodule //////////////////////////////////// module fgen4 ( input logic [3:0] s, - input logic [`DIVb+3:0] C, S, SM, + input logic [`DIVb+3:0] C, U, UM, output logic [`DIVb+3:0] F ); logic [`DIVb+3:0] F2, F1, F0, FN1, FN2; // Generate for both positive and negative bits - assign F2 = (~S << 2) & (C << 2); - assign F1 = ~(S << 1) & C; + assign F2 = (~U << 2) & (C << 2); + assign F1 = ~(U << 1) & C; assign F0 = '0; - assign FN1 = (SM << 1) | (C & ~(C << 3)); - assign FN2 = (SM << 2) | ((C << 2)&~(C << 4)); + assign FN1 = (UM << 1) | (C & ~(C << 3)); + assign FN2 = (UM << 2) | ((C << 2)&~(C << 4)); // Choose which adder input will be used