diff --git a/pipelined/config/shared/wally-shared.vh b/pipelined/config/shared/wally-shared.vh index e5adea1f9..1f05a4f13 100644 --- a/pipelined/config/shared/wally-shared.vh +++ b/pipelined/config/shared/wally-shared.vh @@ -101,17 +101,18 @@ `define CORRSHIFTSZ ((`DIVRESLEN+`NF) > (3*`NF+8) ? (`DIVRESLEN+`NF) : (3*`NF+6)) // division constants -`define RADIX 32'h4 -`define DIVCOPIES 32'h4 +`define RADIX 32'h2 +`define DIVCOPIES 32'h1 `define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF + 3)) -`define DIVN (`NF < `XLEN ? `XLEN : `NF+1) // length of input +// `define DIVN (`NF < `XLEN ? `XLEN : `NF+1) // length of input +`define DIVN (`NF < `XLEN ? `XLEN : `NF+3) // length of input `define EXTRAFRACBITS ((`NF<(`XLEN)) ? (`XLEN - `NF) : 3) `define EXTRAINTBITS ((`NF<(`XLEN)) ? 0 : (`NF - `XLEN + 3)) `define DIVRESLEN ((`NF>`XLEN) ? `NF+4 : `XLEN) `define LOGR ((`RADIX==2) ? 32'h1 : 32'h2) // FPDUR = ceil(DIVRESLEN/(LOGR*DIVCOPIES)) // one interation is required for the integer bit for minimally redundent radix-4 -`define FPDUR ((`DIVLEN+(`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES)+(`RADIX/4)) +`define FPDUR ((`DIVN+2+(`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES)+(`RADIX/4)) `define DURLEN ($clog2(`FPDUR+1)) `define QLEN (`FPDUR*`LOGR*`DIVCOPIES) `define DIVb (`FPDUR*`LOGR*`DIVCOPIES)-1 diff --git a/pipelined/regression/wave-fpu.do b/pipelined/regression/wave-fpu.do index e16d7b0b5..f06fb6d63 100644 --- a/pipelined/regression/wave-fpu.do +++ b/pipelined/regression/wave-fpu.do @@ -30,9 +30,9 @@ add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/QNext add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/QMNext add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/* add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srt/interations[0]/divinteration/* -# add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srt/interations[0]/divinteration/otfc/otfc2/* -# add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srt/interations[0]/divinteration/qsel/qsel2/* -add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srt/interations[0]/divinteration/genblk1/qsel4/* +add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srt/interations[0]/divinteration/otfc/otfc2/* +add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srt/interations[0]/divinteration/qsel/qsel2/* +# add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srt/interations[0]/divinteration/genblk1/qsel4/* add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srtpreproc/* add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srtpreproc/expcalc/* add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srtfsm/* diff --git a/pipelined/src/fpu/divsqrt.sv b/pipelined/src/fpu/divsqrt.sv index 70610bcd3..a1b19394a 100644 --- a/pipelined/src/fpu/divsqrt.sv +++ b/pipelined/src/fpu/divsqrt.sv @@ -65,6 +65,6 @@ module divsqrt( srtfsm srtfsm(.reset, .XsE, .SqrtE, .NextWSN, .NextWCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivSE(DivSM), .XNaNE, .YNaNE, .StickyWSA, .XInfE, .YInfE, .NegSticky(NegSticky), .EarlyTermShiftE(EarlyTermShiftM)); - srt srt(.clk, .Sqrt(SqrtM), .X,.Dpreproc, .NegSticky, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .Xe(XeE), .Ye(YeE), .XZeroE, .YZeroE, + srt srt(.clk, .SqrtE, .SqrtM, .X,.Dpreproc, .NegSticky, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .Xe(XeE), .Ye(YeE), .XZeroE, .YZeroE, .StickyWSA, .DivBusy, .Qm(QmM)); endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/flags.sv b/pipelined/src/fpu/flags.sv index 403b65fe4..67fdb4935 100644 --- a/pipelined/src/fpu/flags.sv +++ b/pipelined/src/fpu/flags.sv @@ -157,7 +157,7 @@ module flags( // or when the positive res rounds up out of range assign SigNaN = (XSNaN&~(IntToFp&CvtOp)) | (YSNaN&~CvtOp) | (ZSNaN&FmaOp); assign FmaInvalid = ((XInf | YInf) & ZInf & (FmaPs ^ FmaAs) & ~NaNIn) | (XZero & YInf) | (YZero & XInf); - assign DivInvalid = ((XInf & YInf) | (XZero & YZero))&~Sqrt | (Xs&Sqrt); + assign DivInvalid = ((XInf & YInf) | (XZero & YZero))&~Sqrt | (Xs&Sqrt&~NaNIn&~XZero); assign Invalid = SigNaN | (FmaInvalid&FmaOp) | (DivInvalid&DivOp); diff --git a/pipelined/src/fpu/otfc.sv b/pipelined/src/fpu/otfc.sv index 71320fedf..b2d1310ff 100644 --- a/pipelined/src/fpu/otfc.sv +++ b/pipelined/src/fpu/otfc.sv @@ -147,9 +147,9 @@ endmodule module sotfc4( input logic [3:0] s, input logic Sqrt, - input logic [`DIVLEN+3:0] S, SM, - input logic [`DIVLEN+3:0] C, - output logic [`DIVLEN+3:0] SNext, SMNext + input logic [`DIVb+3:0] S, SM, + input logic [`DIVb+3:0] C, + output logic [`DIVb+3:0] SNext, SMNext ); // The on-the-fly converter transfers the square root // bits to the quotient as they come. diff --git a/pipelined/src/fpu/qsel.sv b/pipelined/src/fpu/qsel.sv index cb8d3202b..e9350da26 100644 --- a/pipelined/src/fpu/qsel.sv +++ b/pipelined/src/fpu/qsel.sv @@ -31,11 +31,11 @@ `include "wally-config.vh" module qsel2 ( // *** eventually just change to 4 bits - input logic [`DIVLEN+3:`DIVLEN] ps, pc, + input logic [3:0] ps, pc, output logic qp, qz//, qn ); - logic [`DIVLEN+3:`DIVLEN] p, g; + logic [3:0] p, g; logic magnitude, sign, cout; // The quotient selection logic is presented for simplicity, not @@ -46,9 +46,9 @@ module qsel2 ( // *** eventually just change to 4 bits assign p = ps ^ pc; assign g = ps & pc; - assign magnitude = ~(&p[`DIVLEN+2:`DIVLEN]); - assign cout = g[`DIVLEN+2] | (p[`DIVLEN+2] & (g[`DIVLEN+1] | p[`DIVLEN+1] & g[`DIVLEN])); - assign sign = p[`DIVLEN+3] ^ cout; + assign magnitude = ~(&p[2:0]); + assign cout = g[2] | (p[2] & (g[1] | p[1] & g[0])); + assign sign = p[3] ^ cout; /* assign #1 magnitude = ~((ps[54]^pc[54]) & (ps[53]^pc[53]) & (ps[52]^pc[52])); assign #1 sign = (ps[55]^pc[55])^ @@ -80,7 +80,7 @@ module fgen2 ( // Generate for both positive and negative bits assign FP = ~(SExt << 1) & CExt; - assign FN = (SMExt << 1) | (CExt & (~CExt << 2)); + assign FN = (SMExt << 1) | (CExt & ~(CExt << 2)); assign FZ = '0; // Choose which adder input will be used @@ -172,10 +172,10 @@ endmodule //////////////////////////////////// module fgen4 ( input logic [3:0] s, - input logic [`DIVLEN+3:0] C, S, SM, - output logic [`DIVLEN+3:0] F + input logic [`DIVb+3:0] C, S, SM, + output logic [`DIVb+3:0] F ); - logic [`DIVLEN+3:0] F2, F1, F0, FN1, FN2; + logic [`DIVb+3:0] F2, F1, F0, FN1, FN2; // Generate for both positive and negative bits assign F2 = (~S << 2) & (C << 2); diff --git a/pipelined/src/fpu/srt.sv b/pipelined/src/fpu/srt.sv index db2abf25a..0e244a229 100644 --- a/pipelined/src/fpu/srt.sv +++ b/pipelined/src/fpu/srt.sv @@ -36,7 +36,8 @@ module srt( input logic DivBusy, input logic [`NE-1:0] Xe, Ye, input logic XZeroE, YZeroE, - input logic Sqrt, + input logic SqrtE, + input logic SqrtM, input logic [`DIVb:0] X, input logic [`DIVN-2:0] Dpreproc, input logic NegSticky, @@ -95,21 +96,14 @@ module srt( end -// mux2 #(`DIVb+4) wsmux(NextWSN, {{3{Sqrt}}, X}, DivStart, WSN); //*** modified for sqrt which doesnt work -// flopen #(`DIVb+4) wsflop(clk, DivStart|DivBusy, WSN, WS[0]); -// mux2 #(`DIVb+4) wcmux(NextWCN, '0, DivStart, WCN); -// flopen #(`DIVb+4) wcflop(clk, DivStart|DivBusy, WCN, WC[0]); -// flopen #(`DIVN-1) dflop(clk, DivStart, Dpreproc, D); -// mux2 #(`DIVb) Cmux(NextC, {Sqrt, {(`DIVb-1){1'b0}}}, DivStart, CMux); -// flop #(`DIVb) cflop(clk, CMux, C[0]); - - mux2 #(`DIVb+4) wsmux(NextWSN, {3'b0, X}, DivStart, WSN); + // mux2 #(`DIVb+4) wsmux(NextWSN, {3'b0, X}, DivStart, WSN); + mux2 #(`DIVb+4) wsmux(NextWSN, {{3{SqrtE&~XZeroE}}, X}, DivStart, WSN); flopen #(`DIVb+4) wsflop(clk, DivStart|DivBusy, WSN, WS[0]); mux2 #(`DIVb+4) wcmux(NextWCN, '0, DivStart, WCN); flopen #(`DIVb+4) wcflop(clk, DivStart|DivBusy, WCN, WC[0]); flopen #(`DIVN-1) dflop(clk, DivStart, Dpreproc, D); - mux2 #(`DIVb) Cmux({2'b11, C[`DIVCOPIES-1][`DIVb-1:2]}, {Sqrt, {(`DIVb-1){1'b0}}}, DivStart, CMux); - flop #(`DIVb) cflop(clk, CMux, C[0]); + mux2 #(`DIVb) Cmux(NextC, {1'b1, {(`DIVb-1){1'b0}}}, DivStart, CMux); + flopen #(`DIVb) cflop(clk, DivStart|DivBusy, CMux, C[0]); // Divisor Selections // - choose the negitive version of what's being selected @@ -123,7 +117,7 @@ module srt( genvar i; generate for(i=0; $unsigned(i)<`DIVCOPIES; i++) begin : interations - divinteration divinteration(.D, .DBar, .D2, .DBar2, .Sqrt, + divinteration divinteration(.D, .DBar, .D2, .DBar2, .SqrtM, .WS(WS[i]), .WC(WC[i]), .WSA(WSA[i]), .WCA(WCA[i]), .Q(Q[i]), .QM(QM[i]), .QNext(QNext[i]), .QMNext(QMNext[i]), .C(C[i]), .S(S[i]), .SM(SM[i]), .SNext(SNext[i]), .SMNext(SMNext[i])); if(i<(`DIVCOPIES-1)) begin @@ -151,11 +145,11 @@ module srt( flopen #(`DIVb+1) QMreg(clk, DivBusy, QMMux, QM[0]); flopr #(`DIVb+1) SMreg(clk, DivStart, SMNext[`DIVCOPIES-1], SM[0]); - mux2 #(`DIVb+1) Smux(SNext[`DIVCOPIES-1], {Sqrt, {(`DIVb){1'b0}}}, DivStart, SMux); + mux2 #(`DIVb+1) Smux(SNext[`DIVCOPIES-1], {SqrtM, {(`DIVb){1'b0}}}, DivStart, SMux); flop #(`DIVb+1) Sreg(clk, SMux, S[0]); // division takes the result from the next cycle, which is shifted to the left one more time so the square root also needs to be shifted always_comb - if(Sqrt) // sqrt ouputs in the range (1, .5] + if(SqrtM) // sqrt ouputs in the range (1, .5] if(NegSticky) Qm = {SM[0][`DIVb-1-(`RADIX/4):0], 1'b0}; else Qm = {S[0][`DIVb-1-(`RADIX/4):0], 1'b0}; else @@ -186,7 +180,7 @@ module divinteration ( input logic [`DIVb:0] S, SM, input logic [`DIVb+3:0] WS, WC, input logic [`DIVb-1:0] C, - input logic Sqrt, + input logic SqrtM, output logic [`DIVb:0] QNext, QMNext, output logic [`DIVb:0] SNext, SMNext, output logic [`DIVb+3:0] WSA, WCA @@ -211,7 +205,7 @@ module divinteration ( qsel2 qsel2(WS[`DIVb+3:`DIVb], WC[`DIVb+3:`DIVb], qp, qz); fgen2 fgen2(.sp(qp), .sz(qz), .C, .S, .SM, .F); end else begin - qsel4 qsel4(.D, .WS, .WC, .Sqrt, .q); + qsel4 qsel4(.D, .WS, .WC, .Sqrt(SqrtM), .q); // fgen4 fgen4(.s(q), .C, .S, .SM, .F); end @@ -230,11 +224,11 @@ module divinteration ( end // Partial Product Generation // WSA, WCA = WS + WC - qD - assign AddIn = Sqrt ? F : Dsel; + assign AddIn = SqrtM ? F : Dsel; if (`RADIX == 2) begin : csa - csa #(`DIVb+4) csa(WS, WC, AddIn, qp&~Sqrt, WSA, WCA); + csa #(`DIVb+4) csa(WS, WC, AddIn, qp&~SqrtM, WSA, WCA); end else begin - csa #(`DIVb+4) csa(WS, WC, AddIn, |q[3:2]&~Sqrt, WSA, WCA); + csa #(`DIVb+4) csa(WS, WC, AddIn, |q[3:2]&~SqrtM, WSA, WCA); end if (`RADIX == 2) begin : otfc @@ -242,7 +236,7 @@ module divinteration ( sotfc2 sotfc2(.sp(qp), .sz(qz), .C, .S, .SM, .SNext, .SMNext); end else begin otfc4 otfc4(.q, .Q, .QM, .QNext, .QMNext); - // sotfc4 sotfc4(.s(q), .Sqrt, .C, .S, .SM, .SNext, .SMNext); + // sotfc4 sotfc4(.s(q), .SqrtM, .C, .S, .SM, .SNext, .SMNext); end endmodule diff --git a/pipelined/testbench/testbench-fp.sv b/pipelined/testbench/testbench-fp.sv index 17383d1f7..88190aad2 100644 --- a/pipelined/testbench/testbench-fp.sv +++ b/pipelined/testbench/testbench-fp.sv @@ -85,10 +85,6 @@ module testbenchfp; logic [`DURLEN-1:0] EarlyTermShift; logic DivStart, DivBusy; logic reset = 1'b0; - logic [`DIVLEN-1:0] DivX; - logic [`DIVLEN-1:0] Dpreproc; - logic [`DIVLEN+3:0] NextWSN, WS; - logic [`DIVLEN+3:0] NextWCN, WC; logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt; logic [`DURLEN-1:0] Dur;