From dceb6f9034854fc3ee26a48bf367f7ed111573e2 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 9 Oct 2022 04:45:45 -0700 Subject: [PATCH] Moved shift into divsqrt stage and cleaned up comments --- pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv | 20 ++++------ pipelined/src/fpu/fdivsqrt/fdivsqrtstage2.sv | 14 +++++-- pipelined/src/fpu/fdivsqrt/fdivsqrtstage4.sv | 40 +++++++++++--------- 3 files changed, 42 insertions(+), 32 deletions(-) diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv index 5e22be3e..5c067796 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv @@ -55,8 +55,8 @@ module fdivsqrtiter( // U/UM should be 1.b so b+1 bits or b:0 // C needs to be the lenght of the final fraction 0.b so b or b-1:0 /* verilator lint_off UNOPTFLAT */ - logic [`DIVb+3:0] WSA[`DIVCOPIES-1:0]; // Q4.b - logic [`DIVb+3:0] WCA[`DIVCOPIES-1:0]; // Q4.b + logic [`DIVb+3:0] WSNext[`DIVCOPIES-1:0]; // Q4.b + logic [`DIVb+3:0] WCNext[`DIVCOPIES-1:0]; // Q4.b logic [`DIVb+3:0] WS[`DIVCOPIES:0]; // Q4.b logic [`DIVb+3:0] WC[`DIVCOPIES:0]; // Q4.b logic [`DIVb:0] U[`DIVCOPIES:0]; // U1.b @@ -78,12 +78,8 @@ module fdivsqrtiter( // Top Muxes and Registers // When start is asserted, the inputs are loaded into the divider. - // Otherwise, the divisor is retained and the partial remainder - // is fed back for the next iteration. - // - when the start signal is asserted X and 0 are loaded into WS and WC - // - otherwise load WSA into the flipflop - // - the assumed one is added to D since it's always normalized (and X/0 is a special case handeled by result selection) - // - XZeroE is used as the assumed one to avoid creating a sticky bit - all other numbers are normalized + // Otherwise, the divisor is retained and the residual and result + // are fed back for the next iteration. // Residual WS/SC registers/initializaiton mux mux2 #(`DIVb+4) wsmux(WS[`DIVCOPIES], X, DivStartE, WSN); @@ -126,17 +122,17 @@ module fdivsqrtiter( for(i=0; $unsigned(i)<`DIVCOPIES; i++) begin : interations if (`RADIX == 2) begin: stage fdivsqrtstage2 fdivsqrtstage(.D, .DBar, .SqrtM, - .WS(WS[i]), .WC(WC[i]), .WSA(WSA[i]), .WCA(WCA[i]), + .WS(WS[i]), .WC(WC[i]), .WSNext(WSNext[i]), .WCNext(WCNext[i]), .C(C[i]), .U(U[i]), .UM(UM[i]), .CNext(C[i+1]), .UNext(UNext[i]), .UMNext(UMNext[i]), .un(un[i])); end else begin: stage logic j1; assign j1 = (i == 0 & ~C[0][`DIVb-1]); fdivsqrtstage4 fdivsqrtstage(.D, .DBar, .D2, .DBar2, .SqrtM, .j1, - .WS(WS[i]), .WC(WC[i]), .WSA(WSA[i]), .WCA(WCA[i]), + .WS(WS[i]), .WC(WC[i]), .WSNext(WSNext[i]), .WCNext(WCNext[i]), .C(C[i]), .U(U[i]), .UM(UM[i]), .CNext(C[i+1]), .UNext(UNext[i]), .UMNext(UMNext[i]), .un(un[i])); end - assign WS[i+1] = WSA[i] << `LOGR; - assign WC[i+1] = WCA[i] << `LOGR; + assign WS[i+1] = WSNext[i]; + assign WC[i+1] = WCNext[i]; assign U[i+1] = UNext[i]; assign UM[i+1] = UMNext[i]; end diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtstage2.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtstage2.sv index 987f2357..8ed1664a 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtstage2.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtstage2.sv @@ -41,7 +41,7 @@ module fdivsqrtstage2 ( output logic un, output logic [`DIVb+1:0] CNext, output logic [`DIVb:0] UNext, UMNext, - output logic [`DIVb+3:0] WSA, WCA + output logic [`DIVb+3:0] WSNext, WCNext ); /* verilator lint_on UNOPTFLAT */ @@ -49,8 +49,7 @@ module fdivsqrtstage2 ( logic up, uz; logic [`DIVb+3:0] F; logic [`DIVb+3:0] AddIn; - - assign CNext = {1'b1, C[`DIVb+1:1]}; + logic [`DIVb+3:0] WSA, WCA; // Qmient Selection logic // Given partial remainder, select digit of +1, 0, or -1 (up, uz, un) @@ -61,8 +60,11 @@ module fdivsqrtstage2 ( // 0010 = -1 // 0001 = -2 fdivsqrtqsel2 qsel2(WS[`DIVb+3:`DIVb], WC[`DIVb+3:`DIVb], up, uz, un); + + // Sqrt F generatin fdivsqrtfgen2 fgen2(.up, .uz, .C(CNext), .U, .UM, .F); + // Divisor multiple always_comb if (up) Dsel = DBar; else if (uz) Dsel = '0; // qz @@ -72,7 +74,13 @@ module fdivsqrtstage2 ( // WSA, WCA = WS + WC - qD assign AddIn = SqrtM ? F : Dsel; csa #(`DIVb+4) csa(WS, WC, AddIn, up&~SqrtM, WSA, WCA); + assign WSNext = WSA << 1; + assign WCNext = WCA << 1; + // Shift thermometer code C + assign CNext = {1'b1, C[`DIVb+1:1]}; + + // Unified On-The-Fly Converter to accumulate result fdivsqrtuotfc2 uotfc2(.up, .uz, .C(CNext), .U, .UM, .UNext, .UMNext); endmodule diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtstage4.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtstage4.sv index e463762a..e4931d4d 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtstage4.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtstage4.sv @@ -30,7 +30,6 @@ `include "wally-config.vh" -/* verilator lint_off UNOPTFLAT */ module fdivsqrtstage4 ( input logic [`DIVN-2:0] D, input logic [`DIVb+3:0] DBar, D2, DBar2, @@ -41,9 +40,8 @@ module fdivsqrtstage4 ( input logic SqrtM, j1, output logic un, output logic [`DIVb:0] UNext, UMNext, - output logic [`DIVb+3:0] WSA, WCA + output logic [`DIVb+3:0] WSNext, WCNext ); - /* verilator lint_on UNOPTFLAT */ logic [`DIVb+3:0] Dsel; logic [3:0] udigit; @@ -51,7 +49,7 @@ module fdivsqrtstage4 ( logic [`DIVb+3:0] AddIn; logic [4:0] Smsbs; logic CarryIn; - assign CNext = {2'b11, C[`DIVb+1:2]}; + logic [`DIVb+3:0] WSA, WCA; // Digit Selection logic // u encoding: @@ -62,27 +60,35 @@ module fdivsqrtstage4 ( // 0001 = -2 assign Smsbs = U[`DIVb:`DIVb-4]; fdivsqrtqsel4 qsel4(.D, .Smsbs, .WS, .WC, .Sqrt(SqrtM), .j1, .udigit); + assign un = 0; // unused for radix 4 + + // F generation logic fdivsqrtfgen4 fgen4(.udigit, .C({2'b11, CNext}), .U({3'b000, U}), .UM({3'b000, UM}), .F); + // Divisor multiple logic always_comb - case (udigit) - 4'b1000: Dsel = DBar2; - 4'b0100: Dsel = DBar; - 4'b0000: Dsel = '0; - 4'b0010: Dsel = {3'b0, 1'b1, D, {`DIVb-`DIVN+1{1'b0}}}; - 4'b0001: Dsel = D2; - default: Dsel = 'x; - endcase + case (udigit) + 4'b1000: Dsel = DBar2; + 4'b0100: Dsel = DBar; + 4'b0000: Dsel = '0; + 4'b0010: Dsel = {3'b0, 1'b1, D, {`DIVb-`DIVN+1{1'b0}}}; + 4'b0001: Dsel = D2; + default: Dsel = 'x; + endcase - // Partial Product Generation - // WSA, WCA = WS + WC - qD + // Residual Update + // {WS, WC}}Next = (WS + WC - qD or F) << 2 assign AddIn = SqrtM ? F : Dsel; assign CarryIn = ~SqrtM & (udigit[3] | udigit[2]); // +1 for 2's complement of -D and -2D csa #(`DIVb+4) csa(WS, WC, AddIn, CarryIn, WSA, WCA); - - fdivsqrtuotfc4 fdivsqrtuotfc4(.udigit, .Sqrt(SqrtM), .C(CNext[`DIVb:0]), .U, .UM, .UNext, .UMNext); + assign WSNext = WSA << 2; + assign WCNext = WCA << 2; - assign un = 0; // unused for radix 4 + // Shift thermometer code C + assign CNext = {2'b11, C[`DIVb+1:2]}; + + // On-the-fly converter to accumulate result + fdivsqrtuotfc4 fdivsqrtuotfc4(.udigit, .Sqrt(SqrtM), .C(CNext[`DIVb:0]), .U, .UM, .UNext, .UMNext); endmodule