diff --git a/pipelined/config/shared/wally-shared.vh b/pipelined/config/shared/wally-shared.vh index 1f05a4f1..e40506c7 100644 --- a/pipelined/config/shared/wally-shared.vh +++ b/pipelined/config/shared/wally-shared.vh @@ -102,7 +102,7 @@ // division constants `define RADIX 32'h2 -`define DIVCOPIES 32'h1 +`define DIVCOPIES 32'h2 `define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF + 3)) // `define DIVN (`NF < `XLEN ? `XLEN : `NF+1) // length of input `define DIVN (`NF < `XLEN ? `XLEN : `NF+3) // length of input diff --git a/pipelined/src/fpu/divsqrt.sv b/pipelined/src/fpu/divsqrt.sv index 7a9a2ae8..e76d8645 100644 --- a/pipelined/src/fpu/divsqrt.sv +++ b/pipelined/src/fpu/divsqrt.sv @@ -59,13 +59,18 @@ module divsqrt( logic [`DIVb:0] X; logic [`DIVN-2:0] D; // U0.N-1 logic [`DIVN-2:0] Dpreproc; + logic [`DIVb:0] LastSM; + logic [`DIVb-1:0] LastC; + logic [`DIVb:0] FirstSM; + logic [`DIVb-1:0] FirstC; logic [`DURLEN-1:0] Dur; logic NegSticky; + logic [`DIVCOPIES-1:0] qn; srtpreproc srtpreproc(.clk, .DivStart(DivStartE), .Xm(XmE), .QeM, .Xe(XeE), .Fmt(FmtE), .Ye(YeE), .Sqrt(SqrtE), .Dur, .Ym(YmE), .XZero(XZeroE), .X, .Dpreproc); - srtfsm srtfsm(.reset, .D, .XsE, .SqrtE, .SqrtM, .NextWSN, .NextWCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivSE(DivSM), .XNaNE, .YNaNE, + srtfsm srtfsm(.reset, .qn, .LastSM, .LastC, .FirstSM, .FirstC, .D, .XsE, .SqrtE, .SqrtM, .NextWSN, .NextWCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivSE(DivSM), .XNaNE, .YNaNE, .StickyWSA, .XInfE, .YInfE, .NegSticky(NegSticky), .EarlyTermShiftE(EarlyTermShiftM)); - srt srt(.clk, .D, .SqrtE, .SqrtM, .X,.Dpreproc, .NegSticky, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .Xe(XeE), .Ye(YeE), .XZeroE, .YZeroE, + srt srt(.clk, .qn, .D, .LastSM, .LastC, .FirstSM, .FirstC, .SqrtE, .SqrtM, .X,.Dpreproc, .NegSticky, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .Xe(XeE), .Ye(YeE), .XZeroE, .YZeroE, .StickyWSA, .DivBusy, .Qm(QmM)); endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/qsel.sv b/pipelined/src/fpu/qsel.sv index 897d8169..68478e89 100644 --- a/pipelined/src/fpu/qsel.sv +++ b/pipelined/src/fpu/qsel.sv @@ -32,7 +32,7 @@ module qsel2 ( // *** eventually just change to 4 bits input logic [3:0] ps, pc, - output logic qp, qz//, qn + output logic qp, qz, qn ); logic [3:0] p, g; @@ -46,20 +46,20 @@ module qsel2 ( // *** eventually just change to 4 bits assign p = ps ^ pc; assign g = ps & pc; - assign magnitude = ~(&p[2:0]); + //assign magnitude = ~(&p[2:0]); assign cout = g[2] | (p[2] & (g[1] | p[1] & g[0])); - assign sign = p[3] ^ cout; -/* assign #1 magnitude = ~((ps[54]^pc[54]) & (ps[53]^pc[53]) & - (ps[52]^pc[52])); - assign #1 sign = (ps[55]^pc[55])^ - (ps[54] & pc[54] | ((ps[54]^pc[54]) & - (ps[53]&pc[53] | ((ps[53]^pc[53]) & - (ps[52]&pc[52]))))); */ + //assign sign = p[3] ^ cout; + assign magnitude = ~((ps[2]^pc[2]) & (ps[1]^pc[1]) & + (ps[0]^pc[0])); + assign sign = (ps[3]^pc[3])^ + (ps[2] & pc[2] | ((ps[2]^pc[2]) & + (ps[1]&pc[1] | ((ps[1]^pc[1]) & + (ps[0]&pc[0]))))); // Produce quotient = +1, 0, or -1 assign qp = magnitude & ~sign; assign qz = ~magnitude; -// assign #1 qn = magnitude & sign; + assign qn = magnitude & sign; endmodule //////////////////////////////////// diff --git a/pipelined/src/fpu/srt.sv b/pipelined/src/fpu/srt.sv index 72f84b2d..a9b58413 100644 --- a/pipelined/src/fpu/srt.sv +++ b/pipelined/src/fpu/srt.sv @@ -45,6 +45,11 @@ module srt( output logic [`DIVN-2:0] D, // U0.N-1 output logic [`DIVb+3:0] NextWSN, NextWCN, output logic [`DIVb+3:0] StickyWSA, + output logic [`DIVb:0] LastSM, + output logic [`DIVb-1:0] LastC, + output logic [`DIVb:0] FirstSM, + output logic [`DIVb-1:0] FirstC, + output logic [`DIVCOPIES-1:0] qn, output logic [`DIVb+3:0] FirstWS, FirstWC ); @@ -119,7 +124,7 @@ module srt( for(i=0; $unsigned(i)<`DIVCOPIES; i++) begin : interations divinteration divinteration(.D, .DBar, .D2, .DBar2, .SqrtM, .WS(WS[i]), .WC(WC[i]), .WSA(WSA[i]), .WCA(WCA[i]), .Q(Q[i]), .QM(QM[i]), .QNext(QNext[i]), .QMNext(QMNext[i]), - .C(C[i]), .S(S[i]), .SM(SM[i]), .SNext(SNext[i]), .SMNext(SMNext[i])); + .C(C[i]), .S(S[i]), .SM(SM[i]), .SNext(SNext[i]), .SMNext(SMNext[i]), .qn(qn[i])); if(i<(`DIVCOPIES-1)) begin if (`RADIX==2)begin assign WS[i+1] = {WSA[i][`DIVb+2:0], 1'b0}; @@ -159,6 +164,11 @@ module srt( assign FirstWS = WS[0]; assign FirstWC = WC[0]; + assign LastSM = SM[`DIVCOPIES-1]; + assign LastC = C[`DIVCOPIES-1]; + assign FirstSM = SM[0]; + assign FirstC = C[0]; + if(`RADIX==2) if (`DIVCOPIES == 1) assign StickyWSA = {WSA[0][`DIVb+2:0], 1'b0}; @@ -182,6 +192,7 @@ module divinteration ( input logic [`DIVb-1:0] C, input logic SqrtM, output logic [`DIVb:0] QNext, QMNext, + output logic qn, output logic [`DIVb:0] SNext, SMNext, output logic [`DIVb+3:0] WSA, WCA ); @@ -202,7 +213,7 @@ module divinteration ( // 0010 = -1 // 0001 = -2 if(`RADIX == 2) begin : qsel - qsel2 qsel2(WS[`DIVb+3:`DIVb], WC[`DIVb+3:`DIVb], qp, qz); + qsel2 qsel2(WS[`DIVb+3:`DIVb], WC[`DIVb+3:`DIVb], qp, qz, qn); fgen2 fgen2(.sp(qp), .sz(qz), .C, .S, .SM, .F); end else begin qsel4 qsel4(.D, .WS, .WC, .Sqrt(SqrtM), .q); diff --git a/pipelined/src/fpu/srtfsm.sv b/pipelined/src/fpu/srtfsm.sv index ebfc653d..304a219c 100644 --- a/pipelined/src/fpu/srtfsm.sv +++ b/pipelined/src/fpu/srtfsm.sv @@ -46,6 +46,11 @@ module srtfsm( input logic [`DIVN-2:0] D, // U0.N-1 input logic [`DIVb+3:0] StickyWSA, input logic [`DURLEN-1:0] Dur, + input logic [`DIVb:0] LastSM, + input logic [`DIVb:0] FirstSM, + input logic [`DIVb-1:0] LastC, + input logic [`DIVb-1:0] FirstC, + input logic [`DIVCOPIES-1:0] qn, output logic [`DURLEN-1:0] EarlyTermShiftE, output logic DivSE, output logic DivDone, @@ -67,11 +72,15 @@ module srtfsm( // this is only a problem on radix 2 (and pssibly maximally redundant 4) since minimally redundant // radix-4 division can't create a QM that continually adds 0's if (`RADIX == 2) begin - logic [`DIVb+3:0] FNext; - assign FNext = SqrtM ? 0 : {3'b1,D,{`DIVb-`DIVN+2{1'b0}}}; + logic [`DIVb+3:0] FZero, FSticky; + logic [`DIVb+3:0] LastK, FirstK; + assign LastK = ({4'b1111, LastC} & ~({4'b1111, LastC} << 1)); + assign FirstK = ({4'b1111, FirstC<<1} & ~({4'b1111, FirstC<<1} << 1)); + assign FZero = SqrtM ? {{2{LastSM[`DIVb]}}, LastSM, 2'b0} | {LastK,1'b0} : {4'b1,D,{`DIVb-`DIVN+2{1'b0}}}; + assign FSticky = SqrtM ? {FirstSM, 2'b0} | {FirstK,1'b0} : {4'b1,D,{`DIVb-`DIVN+2{1'b0}}}; // *** |... for continual -1 is not efficent fix - also only needed for radix-2 - assign WZero = ((NextWSN^NextWCN)=={NextWSN[`DIVb+2:0]|NextWCN[`DIVb+2:0], 1'b0})|((NextWSN+NextWCN+FNext)==0); - assign DivSE = |W&~((W+FNext)==0); //***not efficent fix == + assign WZero = ((NextWSN^NextWCN)=={NextWSN[`DIVb+2:0]|NextWCN[`DIVb+2:0], 1'b0})|(((NextWSN+NextWCN+FZero)==0)&qn[`DIVCOPIES-1]); + assign DivSE = |W&~((W+FSticky)==0); //***not efficent fix == and need the & qn end else begin assign WZero = ((NextWSN^NextWCN)=={NextWSN[`DIVb+2:0]|NextWCN[`DIVb+2:0], 1'b0}); assign DivSE = |W;