From 86ebdd05f0753693a1d86e27ea48f7e897e4fe90 Mon Sep 17 00:00:00 2001 From: cturek Date: Thu, 21 Jul 2022 17:59:10 +0000 Subject: [PATCH 1/2] Division working too --- pipelined/srt/srt.sv | 2 +- pipelined/srt/testbench.sv | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pipelined/srt/srt.sv b/pipelined/srt/srt.sv index 157be2e7f..a7216b9ff 100644 --- a/pipelined/srt/srt.sv +++ b/pipelined/srt/srt.sv @@ -2,7 +2,7 @@ // srt.sv // // Written: David_Harris@hmc.edu 13 January 2022 -// Modified: cturek@hmc.edu June 2022 +// Modified: cturek@hmc.edu July 2022 // // Purpose: Combined Divide and Square Root Floating Point and Integer Unit // diff --git a/pipelined/srt/testbench.sv b/pipelined/srt/testbench.sv index 39696af44..7a4e1897b 100644 --- a/pipelined/srt/testbench.sv +++ b/pipelined/srt/testbench.sv @@ -72,7 +72,7 @@ module testbench; // Equip Int test or Sqrt test assign Int = 1'b0; - assign Sqrt = 1'b1; + assign Sqrt = 1'b0; // Divider srt srt(.clk, .Start(req), @@ -101,7 +101,7 @@ module testbench; begin testnum = 0; errors = 0; - $readmemh ("sqrttestvectors", Tests); + $readmemh ("testvectors", Tests); Vec = Tests[testnum]; a = Vec[`mema]; {asign, aExp, afrac} = a; From fbe8bb2298413d731f70306e100e8cc3881222ec Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Thu, 21 Jul 2022 19:38:06 +0000 Subject: [PATCH 2/2] radix-4 division integrated into srt - not tested --- addins/embench-iot | 2 +- pipelined/config/shared/wally-shared.vh | 2 +- pipelined/regression/wave-fpu.do | 2 +- pipelined/src/fpu/divsqrt.sv | 13 +-- pipelined/src/fpu/fctrl.sv | 4 +- pipelined/src/fpu/fpu.sv | 2 +- pipelined/src/fpu/otfc.sv | 70 ++++++++++++++++ pipelined/src/fpu/qsel.sv | 89 +++++++++++++++++---- pipelined/src/fpu/srt.sv | 102 +++++++++++------------- pipelined/src/fpu/srtpreproc.sv | 78 +++++++++++++++--- pipelined/testbench/testbench-fp.sv | 2 +- 11 files changed, 271 insertions(+), 95 deletions(-) diff --git a/addins/embench-iot b/addins/embench-iot index 58ffa0c68..261a65e0a 160000 --- a/addins/embench-iot +++ b/addins/embench-iot @@ -1 +1 @@ -Subproject commit 58ffa0c68c52f291d12c5902fc787d2bca94ddf9 +Subproject commit 261a65e0a2d3e8d62d81b1d8fe7e309a096bc6a9 diff --git a/pipelined/config/shared/wally-shared.vh b/pipelined/config/shared/wally-shared.vh index 015ef2611..b2abdff7b 100644 --- a/pipelined/config/shared/wally-shared.vh +++ b/pipelined/config/shared/wally-shared.vh @@ -101,7 +101,7 @@ `define CORRSHIFTSZ ((`DIVRESLEN+`NF) > (3*`NF+8) ? (`DIVRESLEN+`NF) : (3*`NF+6)) // division constants -`define RADIX 32'h2 +`define RADIX 32'h4 `define DIVCOPIES 32'h1 `define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF + 3)) `define EXTRAFRACBITS ((`NF<(`XLEN)) ? (`XLEN - `NF) : 3) diff --git a/pipelined/regression/wave-fpu.do b/pipelined/regression/wave-fpu.do index 98c72f170..b71207e09 100644 --- a/pipelined/regression/wave-fpu.do +++ b/pipelined/regression/wave-fpu.do @@ -33,7 +33,7 @@ add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srt/intera # add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srt/interations[0]/divinteration/otfc/otfc2/* # add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srt/interations[0]/divinteration/qsel/qsel2/* add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srtpreproc/* -add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/expcalc/* +# add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/expcalc/* add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srtfsm/* add wave -group {Testbench} -noupdate /testbenchfp/* add wave -group {Testbench} -noupdate /testbenchfp/readvectors/* diff --git a/pipelined/src/fpu/divsqrt.sv b/pipelined/src/fpu/divsqrt.sv index a2f0ba8e3..7ba44a953 100644 --- a/pipelined/src/fpu/divsqrt.sv +++ b/pipelined/src/fpu/divsqrt.sv @@ -41,7 +41,8 @@ module divsqrt( input logic XNaNE, YNaNE, input logic DivStartE, input logic StallM, - input logic StallE, + input logic StallE, + input logic SqrtE, SqrtM, output logic DivSM, output logic DivBusy, output logic DivDone, @@ -55,15 +56,15 @@ module divsqrt( logic [`DIVLEN+3:0] WS, WC; logic [`DIVLEN+3:0] StickyWSA; logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt; - logic [`DIVLEN-1:0] X; - logic [`DIVLEN-1:0] Dpreproc; + logic [`DIVLEN+3:0] X; + logic [`DIVLEN+3:0] Dpreproc; logic [`DURLEN-1:0] Dur; logic NegSticky; - srtpreproc srtpreproc(.Xm(XmE), .Dur, .Ym(YmE), .X,.Dpreproc, .XZeroCnt, .YZeroCnt); + srtpreproc srtpreproc(.clk, .DivStart(DivStartE), .Xm(XmE), .QeM, .Xe(XeE), .Fmt(FmtE), .Ye(YeE), .Sqrt(SqrtE), .Dur, .Ym(YmE), .XZero(XZeroE), .X, .Dpreproc, .XZeroCnt, .YZeroCnt); srtfsm srtfsm(.reset, .NextWSN, .NextWCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivSE(DivSM), .XNaNE, .YNaNE, .StickyWSA, .XInfE, .YInfE, .NegSticky(NegSticky), .EarlyTermShiftE(EarlyTermShiftM)); - srt srt(.clk, .FmtE, .X,.Dpreproc, .NegSticky, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .Xe(XeE), .Ye(YeE), .XZeroE, .YZeroE, - .StickyWSA, .DivBusy, .Qm(QmM), .Rem(), .QeM); + srt srt(.clk, .Sqrt(SqrtM), .X,.Dpreproc, .NegSticky, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .Xe(XeE), .Ye(YeE), .XZeroE, .YZeroE, + .StickyWSA, .DivBusy, .Qm(QmM), .Rem()); endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/fctrl.sv b/pipelined/src/fpu/fctrl.sv index 5b6b22ef0..20e4a0099 100755 --- a/pipelined/src/fpu/fctrl.sv +++ b/pipelined/src/fpu/fctrl.sv @@ -219,8 +219,8 @@ module fctrl ( // 110 - add // 111 - sub // Div: -// 0 - ??? -// 1 - ??? +// 0 - div +// 1 - sqrt // Cvt Int: {Int to Fp?, 64 bit int?, signed int?} // Cvt Fp: output format // 10 - to half diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv index cfa46b657..3e214b0f1 100755 --- a/pipelined/src/fpu/fpu.sv +++ b/pipelined/src/fpu/fpu.sv @@ -259,7 +259,7 @@ module fpu ( // - fdiv // - fsqrt // *** add other opperations - divsqrt divsqrt(.clk, .reset, .FmtE, .XmE, .YmE, .XeE, .YeE, + divsqrt divsqrt(.clk, .reset, .FmtE, .XmE, .YmE, .XeE, .YeE, .SqrtE(OpCtrlE[0]), .SqrtM(OpCtrlM[0]), .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .DivStartE(DivStartE), .StallE, .StallM, .DivSM, .DivBusy(FDivBusyE), .QeM, //***change divbusyE to M signal .EarlyTermShiftM, .QmM, .DivDone(DivDoneM)); diff --git a/pipelined/src/fpu/otfc.sv b/pipelined/src/fpu/otfc.sv index 66af5b3c5..7ecb823e6 100644 --- a/pipelined/src/fpu/otfc.sv +++ b/pipelined/src/fpu/otfc.sv @@ -58,6 +58,41 @@ module otfc2 ( endmodule +/////////////////////////////// +// Square Root OTFC, Radix 2 // +/////////////////////////////// +module sotfc2( + input logic clk, + input logic Start, + input logic sp, sn, + input logic Sqrt, + input logic [`DIVLEN+3:0] C, + output logic [`DIVLEN-2:0] Sq, + output logic [`DIVLEN+3:0] S, SM +); + // The on-the-fly converter transfers the square root + // bits to the quotient as they come. + // Use this otfc for division and square root. + logic [`DIVLEN+3:0] SNext, SMNext, SMux; + + flopr #(`DIVLEN+4) SMreg(clk, Start, SMNext, SM); + mux2 #(`DIVLEN+4) Smux(SNext, {3'b000, Sqrt, {(`DIVLEN){1'b0}}}, Start, SMux); + flop #(`DIVLEN+4) Sreg(clk, SMux, S); + + always_comb begin + if (sp) begin + SNext = S | (C & ~(C << 1)); + SMNext = S; + end else if (sn) begin + SNext = SM | (C & ~(C << 1)); + SMNext = SM; + end else begin // If sp and sn are not true, then sz is + SNext = S; + SMNext = SM | (C & ~(C << 1)); + end + end + assign Sq = S[`DIVLEN] ? S[`DIVLEN-1:1] : S[`DIVLEN-2:0]; +endmodule module otfc4 ( input logic [3:0] q, @@ -110,3 +145,38 @@ module otfc4 ( // Final Qmeint is in the range [.5, 2) endmodule + +/////////////////////////////// +// Square Root OTFC, Radix 4 // +/////////////////////////////// +module sotfc4( + input logic [3:0] s, + input logic Sqrt, + input logic [`DIVLEN+3:0] S, SM, + input logic [`DIVLEN+3:0] C, + output logic [`DIVLEN+3:0] SNext, SMNext +); + // The on-the-fly converter transfers the square root + // bits to the quotient as they come. + // Use this otfc for division and square root. + + always_comb begin + if (s[3]) begin + SNext = S | ((C << 1)&~(C << 2)); + SMNext = S | (C&~(C << 1)); + end else if (s[2]) begin + SNext = S | (C&~(C << 1)); + SMNext = S; + end else if (s[1]) begin + SNext = SM | (C&~(C << 2)); + SMNext = SM | ((C << 1)&~(C << 2)); + end else if (s[0]) begin + SNext = SM | ((C << 1)&~(C << 2)); + SMNext = SM | (C&~(C << 1)); + end else begin // If sp and sn are not true, then sz is + SNext = S; + SMNext = SM | (C & ~(C << 2)); + end + end + +endmodule diff --git a/pipelined/src/fpu/qsel.sv b/pipelined/src/fpu/qsel.sv index 202b3ee81..87c6a4b25 100644 --- a/pipelined/src/fpu/qsel.sv +++ b/pipelined/src/fpu/qsel.sv @@ -62,9 +62,36 @@ module qsel2 ( // *** eventually just change to 4 bits // assign #1 qn = magnitude & sign; endmodule +//////////////////////////////////// +// Adder Input Generation, Radix 2 // +//////////////////////////////////// +module fgen2 ( + input logic sp, sn, + input logic [`DIVLEN+3:0] C, S, SM, + output logic [`DIVLEN+3:0] F +); + logic [`DIVLEN+3:0] FP, FN, FZ; + + // Generate for both positive and negative bits + assign FP = ~(S << 1) & C; + assign FN = (SM << 1) | (C & (~C << 2)); + assign FZ = '0; + + // Choose which adder input will be used + + always_comb + if (sp) F = FP; + else if (sn) F = FN; + else F = FZ; + + // assign F = sp ? FP : (sn ? FN : FZ); + +endmodule + module qsel4 ( input logic [`DIVLEN+3:0] D, input logic [`DIVLEN+3:0] WS, WC, + input logic Sqrt, output logic [3:0] q ); logic [6:0] Wmsbs; @@ -91,45 +118,77 @@ module qsel4 ( else if(w2>=4) QSel4[i] = 4'b0100; else if(w2>=-4) QSel4[i] = 4'b0000; else if(w2>=-13) QSel4[i] = 4'b0010; - else QSel4[i] = 4'b0001; + else QSel4[i] = 4'b0001; 1: if(w2>=14) QSel4[i] = 4'b1000; else if(w2>=4) QSel4[i] = 4'b0100; - else if(w2>=-6) QSel4[i] = 4'b0000; - else if(w2>=-15) QSel4[i] = 4'b0010; - else QSel4[i] = 4'b0001; + else if(w2>=-5) QSel4[i] = 4'b0000; // was -6 + else if(~Sqrt&(w2>=-15)) QSel4[i] = 4'b0010; // divide case + else if( Sqrt&(w2>=-14)) QSel4[i] = 4'b0010; // sqrt case + else QSel4[i] = 4'b0001; 2: if(w2>=15) QSel4[i] = 4'b1000; else if(w2>=4) QSel4[i] = 4'b0100; else if(w2>=-6) QSel4[i] = 4'b0000; else if(w2>=-16) QSel4[i] = 4'b0010; - else QSel4[i] = 4'b0001; + else QSel4[i] = 4'b0001; 3: if(w2>=16) QSel4[i] = 4'b1000; else if(w2>=4) QSel4[i] = 4'b0100; else if(w2>=-6) QSel4[i] = 4'b0000; - else if(w2>=-18) QSel4[i] = 4'b0010; - else QSel4[i] = 4'b0001; + else if(w2>=-17) QSel4[i] = 4'b0010; // was -18 + else QSel4[i] = 4'b0001; 4: if(w2>=18) QSel4[i] = 4'b1000; else if(w2>=6) QSel4[i] = 4'b0100; - else if(w2>=-8) QSel4[i] = 4'b0000; - else if(w2>=-20) QSel4[i] = 4'b0010; - else QSel4[i] = 4'b0001; + else if(w2>=-6) QSel4[i] = 4'b0000; // was -8 + else if(~Sqrt&(w2>=-20)) QSel4[i] = 4'b0010; // divide case + else if( Sqrt&(w2>=-18)) QSel4[i] = 4'b0010; // sqrt case + else QSel4[i] = 4'b0001; 5: if(w2>=20) QSel4[i] = 4'b1000; else if(w2>=6) QSel4[i] = 4'b0100; else if(w2>=-8) QSel4[i] = 4'b0000; else if(w2>=-20) QSel4[i] = 4'b0010; - else QSel4[i] = 4'b0001; + else QSel4[i] = 4'b0001; 6: if(w2>=20) QSel4[i] = 4'b1000; else if(w2>=8) QSel4[i] = 4'b0100; else if(w2>=-8) QSel4[i] = 4'b0000; else if(w2>=-22) QSel4[i] = 4'b0010; - else QSel4[i] = 4'b0001; - 7: if(w2>=24) QSel4[i] = 4'b1000; + else QSel4[i] = 4'b0001; + 7: if(w2>=22) QSel4[i] = 4'b1000; // was 24 else if(w2>=8) QSel4[i] = 4'b0100; else if(w2>=-8) QSel4[i] = 4'b0000; - else if(w2>=-24) QSel4[i] = 4'b0010; - else QSel4[i] = 4'b0001; + else if(w2>=-23) QSel4[i] = 4'b0010; // was -24 + else QSel4[i] = 4'b0001; endcase end end assign q = QSel4[{Dmsbs,Wmsbs}]; endmodule + +//////////////////////////////////// +// Adder Input Generation, Radix 4 // +//////////////////////////////////// +module fgen4 ( + input logic [3:0] s, + input logic [`DIVLEN+3:0] C, S, SM, + output logic [`DIVLEN+3:0] F +); + logic [`DIVLEN+3:0] F2, F1, F0, FN1, FN2; + + // Generate for both positive and negative bits + assign F2 = (~S << 2) & (C << 2); + assign F1 = ~(S << 1) & C; + assign F0 = '0; + assign FN1 = (SM << 1) | (C & ~(C << 2)); + assign FN2 = (SM << 2) | ((C << 2)&~(C <<4)); + + // Choose which adder input will be used + + always_comb + if (s[3]) F = F2; + else if (s[2]) F = F1; + else if (s[1]) F = FN1; + else if (s[0]) F = FN2; + else F = F0; + + // assign F = sp ? FP : (sn ? FN : FZ); + +endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/srt.sv b/pipelined/src/fpu/srt.sv index 7e9f9922a..633ac1787 100644 --- a/pipelined/src/fpu/srt.sv +++ b/pipelined/src/fpu/srt.sv @@ -34,18 +34,17 @@ module srt( input logic clk, input logic DivStart, input logic DivBusy, - input logic [`FMTBITS-1:0] FmtE, input logic [`NE-1:0] Xe, Ye, input logic XZeroE, YZeroE, - input logic [`DIVLEN-1:0] X, - input logic [`DIVLEN-1:0] Dpreproc, - input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt, - input logic NegSticky, + input logic Sqrt, + input logic [`DIVLEN+3:0] X, + input logic [`DIVLEN+3:0] Dpreproc, + input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt, + input logic NegSticky, output logic [`QLEN-1-(`RADIX/4):0] Qm, output logic [`DIVLEN+3:0] NextWSN, NextWCN, output logic [`DIVLEN+3:0] StickyWSA, output logic [`DIVLEN+3:0] FirstWS, FirstWC, - output logic [`NE+1:0] QeM, output logic [`XLEN-1:0] Rem ); @@ -59,13 +58,19 @@ module srt( logic [`QLEN-1:0] QM[`DIVCOPIES-1:0]; logic [`QLEN-1:0] QNext[`DIVCOPIES-1:0]; logic [`QLEN-1:0] QMNext[`DIVCOPIES-1:0]; + logic [`DIVLEN+3:0] S[`DIVCOPIES-1:0]; //***change to QLEN??? + logic [`DIVLEN+3:0] SM[`DIVCOPIES-1:0]; + logic [`DIVLEN+3:0] SNext[`DIVCOPIES-1:0]; + logic [`DIVLEN+3:0] SMNext[`DIVCOPIES-1:0]; + logic [`DIVLEN+3:0] C[`DIVCOPIES-1:0]; /* verilator lint_on UNOPTFLAT */ logic [`DIVLEN+3:0] WSN, WCN; logic [`DIVLEN+3:0] D, DBar, D2, DBar2; - logic [`NE+1:0] Qe; logic [$clog2(`XLEN+1)-1:0] intExp; logic intSign; logic [`QLEN-1:0] QMMux; + logic [`DIVLEN+3:0] CMux; + logic [`DIVLEN+3:0] SMux; // Top Muxes and Registers // When start is asserted, the inputs are loaded into the divider. @@ -83,13 +88,13 @@ module srt( assign NextWCN = {WCA[`DIVCOPIES-1][`DIVLEN+1:0], 2'b0}; end - mux2 #(`DIVLEN+4) wsmux(NextWSN, {3'b000, ~XZeroE, X}, DivStart, WSN); + mux2 #(`DIVLEN+4) wsmux(NextWSN, X, DivStart, WSN); flopen #(`DIVLEN+4) wsflop(clk, DivStart|DivBusy, WSN, WS[0]); mux2 #(`DIVLEN+4) wcmux(NextWCN, {`DIVLEN+4{1'b0}}, DivStart, WCN); flopen #(`DIVLEN+4) wcflop(clk, DivStart|DivBusy, WCN, WC[0]); - flopen #(`DIVLEN+4) dflop(clk, DivStart, {4'b0001, Dpreproc}, D); - flopen #(`NE+2) expflop(clk, DivStart, Qe, QeM); - + flopen #(`DIVLEN+4) dflop(clk, DivStart, Dpreproc, D); + mux2 #(`DIVLEN+4) Cmux({2'b11, C[`DIVCOPIES-1][`DIVLEN+3:2]}, {5'b11111, Sqrt, {(`DIVLEN-2){1'b0}}}, DivStart, CMux); + flop #(`DIVLEN+4) cflop(clk, CMux, C[0]); // Divisor Selections // - choose the negitive version of what's being selected @@ -102,8 +107,9 @@ module srt( genvar i; generate for(i=0; $unsigned(i)<`DIVCOPIES; i++) begin : interations - divinteration divinteration(.D, .DBar, .D2, .DBar2, - .WS(WS[i]), .WC(WC[i]), .WSA(WSA[i]), .WCA(WCA[i]), .Q(Q[i]), .QM(QM[i]), .QNext(QNext[i]), .QMNext(QMNext[i])); + divinteration divinteration(.D, .DBar, .D2, .DBar2, .Sqrt, + .WS(WS[i]), .WC(WC[i]), .WSA(WSA[i]), .WCA(WCA[i]), .Q(Q[i]), .QM(QM[i]), .QNext(QNext[i]), .QMNext(QMNext[i]), + .C(C[i]), .S(S[i]), .SM(SM[i]), .SNext(SNext[i]), .SMNext(SMNext[i])); if(i<(`DIVCOPIES-1)) begin if (`RADIX==2)begin assign WS[i+1] = {WSA[i][`DIVLEN+1:0], 1'b0}; @@ -111,9 +117,12 @@ module srt( end else begin assign WS[i+1] = {WSA[i][`DIVLEN+1:0], 2'b0}; assign WC[i+1] = {WCA[i][`DIVLEN+1:0], 2'b0}; + assign C[i+1] = {2'b11, C[i][`DIVLEN+3:2]}; end assign Q[i+1] = QNext[i]; assign QM[i+1] = QMNext[i]; + assign S[i+1] = SNext[i]; + assign SM[i+1] = SMNext[i]; end end endgenerate @@ -123,16 +132,27 @@ module srt( flopenr #(`QLEN) Qreg(clk, DivStart, DivBusy, QNext[`DIVCOPIES-1], Q[0]); flopen #(`QLEN) QMreg(clk, DivBusy, QMMux, QM[0]); - assign Qm = NegSticky ? QM[0][`QLEN-1-(`RADIX/4):0] : Q[0][`QLEN-1-(`RADIX/4):0]; + flopr #(`DIVLEN+4) SMreg(clk, DivStart, SMNext[`DIVCOPIES-1], SM[0]); + mux2 #(`DIVLEN+4) Smux(SNext[`DIVCOPIES-1], {3'b000, Sqrt, {(`DIVLEN){1'b0}}}, DivStart, SMux); + flop #(`DIVLEN+4) Sreg(clk, SMux, S[0]); + + always_comb + if(Sqrt) + if(NegSticky) Qm = SM[0][`QLEN-1-(`RADIX/4):0]; + else Qm = S[0][`QLEN-1-(`RADIX/4):0]; + else + if(NegSticky) Qm = QM[0][`QLEN-1-(`RADIX/4):0]; + else Qm = Q[0][`QLEN-1-(`RADIX/4):0]; + assign FirstWS = WS[0]; assign FirstWC = WC[0]; + if(`RADIX==2) if (`DIVCOPIES == 1) assign StickyWSA = {WSA[0][`DIVLEN+2:0], 1'b0}; else assign StickyWSA = {WSA[1][`DIVLEN+2:0], 1'b0}; - expcalc expcalc(.FmtE, .Xe, .Ye, .XZeroE, .XZeroCnt, .YZeroCnt, .Qe); endmodule @@ -145,8 +165,12 @@ module divinteration ( input logic [`DIVLEN+3:0] D, input logic [`DIVLEN+3:0] DBar, D2, DBar2, input logic [`QLEN-1:0] Q, QM, + input logic [`DIVLEN+3:0] S, SM, input logic [`DIVLEN+3:0] WS, WC, + input logic [`DIVLEN+3:0] C, + input logic Sqrt, output logic [`QLEN-1:0] QNext, QMNext, + output logic [`DIVLEN+3:0] SNext, SMNext, output logic [`DIVLEN+3:0] WSA, WCA ); /* verilator lint_on UNOPTFLAT */ @@ -154,6 +178,8 @@ module divinteration ( logic [`DIVLEN+3:0] Dsel; logic [3:0] q; logic qp, qz;//, qn; + logic [`DIVLEN+3:0] F; + logic [`DIVLEN+3:0] AddIn; // Qmient Selection logic // Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm) @@ -166,7 +192,8 @@ module divinteration ( if(`RADIX == 2) begin : qsel qsel2 qsel2(WS[`DIVLEN+3:`DIVLEN], WC[`DIVLEN+3:`DIVLEN], qp, qz);//, qn); end else begin - qsel4 qsel4(.D, .WS, .WC, .q); + qsel4 qsel4(.D, .WS, .WC, .Sqrt, .q); + fgen4 fgen4(.s(q), .C, .S, .SM, .F); end if(`RADIX == 2) begin : dsel @@ -184,16 +211,18 @@ module divinteration ( end // Partial Product Generation // WSA, WCA = WS + WC - qD + assign AddIn = Sqrt ? F : Dsel; if (`RADIX == 2) begin : csa - csa #(`DIVLEN+4) csa(WS, WC, Dsel, qp, WSA, WCA); + csa #(`DIVLEN+4) csa(WS, WC, AddIn, qp, WSA, WCA); end else begin - csa #(`DIVLEN+4) csa(WS, WC, Dsel, |q[3:2], WSA, WCA); + csa #(`DIVLEN+4) csa(WS, WC, AddIn, |q[3:2], WSA, WCA); end if (`RADIX == 2) begin : otfc otfc2 otfc2(.qp, .qz, .Q, .QM, .QNext, .QMNext); end else begin otfc4 otfc4(.q, .Q, .QM, .QNext, .QMNext); + sotfc4 sotfc4(.s(q), .Sqrt, .C, .S, .SM, .SNext, .SMNext); end endmodule @@ -220,40 +249,3 @@ module csa #(parameter N=69) ( assign out2 = {in1[N-2:0] & (in2[N-2:0] | in3[N-2:0]) | (in2[N-2:0] & in3[N-2:0]), cin}; endmodule - -module expcalc( - input logic [`FMTBITS-1:0] FmtE, - input logic [`NE-1:0] Xe, Ye, - input logic XZeroE, - input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt, - output logic [`NE+1:0] Qe - ); - logic [`NE-2:0] Bias; - - if (`FPSIZES == 1) begin - assign Bias = (`NE-1)'(`BIAS); - - end else if (`FPSIZES == 2) begin - assign Bias = FmtE ? (`NE-1)'(`BIAS) : (`NE-1)'(`BIAS1); - - end else if (`FPSIZES == 3) begin - always_comb - case (FmtE) - `FMT: Bias = (`NE-1)'(`BIAS); - `FMT1: Bias = (`NE-1)'(`BIAS1); - `FMT2: Bias = (`NE-1)'(`BIAS2); - default: Bias = 'x; - endcase - - end else if (`FPSIZES == 4) begin - always_comb - case (FmtE) - 2'h3: Bias = (`NE-1)'(`Q_BIAS); - 2'h1: Bias = (`NE-1)'(`D_BIAS); - 2'h0: Bias = (`NE-1)'(`S_BIAS); - 2'h2: Bias = (`NE-1)'(`H_BIAS); - endcase - end - // correct exponent for denormalized input's normalization shifts - assign Qe = ({2'b0, Xe} - {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, XZeroCnt} - {2'b0, Ye} + {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, YZeroCnt} + {3'b0, Bias})&{`NE+2{~XZeroE}}; - endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/srtpreproc.sv b/pipelined/src/fpu/srtpreproc.sv index b9fb8bb82..4d2609179 100644 --- a/pipelined/src/fpu/srtpreproc.sv +++ b/pipelined/src/fpu/srtpreproc.sv @@ -31,16 +31,25 @@ `include "wally-config.vh" module srtpreproc ( + input logic clk, + input logic DivStart, input logic [`NF:0] Xm, Ym, - output logic [`DIVLEN-1:0] X, - output logic [`DIVLEN-1:0] Dpreproc, + input logic [`NE-1:0] Xe, Ye, + input logic [`FMTBITS-1:0] Fmt, + input logic Sqrt, + input logic XZero, + output logic [`NE+1:0] QeM, + output logic [`DIVLEN+3:0] X, + output logic [`DIVLEN+3:0] Dpreproc, output logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt, output logic [`DURLEN-1:0] Dur ); // logic [`XLEN-1:0] PosA, PosB; // logic [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY; - logic [`DIVLEN-1:0] PreprocA, PreprocX; - logic [`DIVLEN-1:0] PreprocB, PreprocY; + logic [`NF-1:0] PreprocA, PreprocX; + logic [`NF-1:0] PreprocB, PreprocY; + logic [`NF+3:0] SqrtX; + logic [`NE+1:0] Qe; // assign PosA = (Signed & SrcA[`XLEN - 1]) ? -SrcA : SrcA; // assign PosB = (Signed & SrcB[`XLEN - 1]) ? -SrcB : SrcB; @@ -49,23 +58,22 @@ module srtpreproc ( // ***can probably merge X LZC with conversion // cout the number of leading zeros - lzc #(`NF+1) lzcA (Xm, XZeroCnt); - lzc #(`NF+1) lzcB (Ym, YZeroCnt); + lzc #(`NF+1) lzcX (Xm, XZeroCnt); + lzc #(`NF+1) lzcY (Ym, YZeroCnt); // assign ExtraA = {PosA, {`DIVLEN-`XLEN{1'b0}}}; // assign ExtraB = {PosB, {`DIVLEN-`XLEN{1'b0}}}; // assign PreprocA = ExtraA << zeroCntA; // assign PreprocB = ExtraB << (zeroCntB + 1); - assign PreprocX = {Xm[`NF-1:0]<