diff --git a/pipelined/config/shared/wally-shared.vh b/pipelined/config/shared/wally-shared.vh index 27df384fe..4ff665782 100644 --- a/pipelined/config/shared/wally-shared.vh +++ b/pipelined/config/shared/wally-shared.vh @@ -102,7 +102,7 @@ // division constants `define RADIX 32'h4 -`define DIVCOPIES 32'h2 +`define DIVCOPIES 32'h1 `define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF + 3)) // `define DIVN (`NF < `XLEN ? `XLEN : `NF+1) // length of input `define DIVN (`NF < `XLEN ? `XLEN : `NF+3) // length of input diff --git a/pipelined/src/fpu/fdivsqrtfsm.sv b/pipelined/src/fpu/fdivsqrtfsm.sv index 6ab1fca59..27d46cffd 100644 --- a/pipelined/src/fpu/fdivsqrtfsm.sv +++ b/pipelined/src/fpu/fdivsqrtfsm.sv @@ -90,20 +90,20 @@ module fdivsqrtfsm( end if (`RADIX == 2) begin - logic [`DIVb+3:0] FZero, FSticky; + logic [`DIVb+3:0] FZeroD, FSticky; logic [`DIVb+2:0] LastK, FirstK; assign LastK = ({3'b111, LastC} & ~({3'b111, LastC} << 1)); assign FirstK = ({3'b111, FirstC<<1} & ~({3'b111, FirstC<<1} << 1)); - assign FZero = SqrtM ? {LastSM[`DIVb], LastSM, 2'b0} | {LastK,1'b0} : {3'b1,D,{`DIVb-`DIVN+2{1'b0}}}; + assign FZeroD = SqrtM ? {FirstSM[`DIVb], FirstSM, 2'b0} | {FirstK,1'b0} : {3'b1,D,{`DIVb-`DIVN+2{1'b0}}}; assign FSticky = SqrtM ? {FirstSM[`DIVb], FirstSM, 2'b0} | {FirstK,1'b0} : {3'b1,D,{`DIVb-`DIVN+2{1'b0}}}; // *** |... for continual -1 is not efficent fix - also only needed for radix-2 - assign WZeroD = ((WS^WC)=={WS[`DIVb+2:0]|WC[`DIVb+2:0], 1'b0})|(((WS+WC+FZero)==0)&qn[`DIVCOPIES-1]); + assign WZeroD = ((WS^WC)=={WS[`DIVb+2:0]|WC[`DIVb+2:0], 1'b0})|(((WS+WC+FZeroD)==0)&qn[`DIVCOPIES-1]); end else begin assign WZeroD = ((WS^WC)=={WS[`DIVb+2:0]|WC[`DIVb+2:0], 1'b0}); end flopr #(1) WZeroReg(clk, reset | DivStart, WZero, WZeroDelayed); -// assign DivDone = (state == DONE); +// assign DivDone = (state == DONE) | (WZeroD & (state == BUSY)); assign DivDone = (state == DONE) | (WZeroDelayed & (state == BUSY)); assign W = WC+WS; assign NegSticky = W[`DIVb+3]; @@ -123,8 +123,7 @@ module fdivsqrtfsm( if (StallM) state <= #1 DONE; else state <= #1 IDLE; end else if (state == BUSY) begin -// if (step == 1 | WZero ) begin - if (step == 1 /* | WZero */) begin + if (step == 1) begin state <= #1 DONE; end step <= step - 1; diff --git a/pipelined/src/fpu/fdivsqrtstage4.sv b/pipelined/src/fpu/fdivsqrtstage4.sv index 1da37b0cf..1865fe4ac 100644 --- a/pipelined/src/fpu/fdivsqrtstage4.sv +++ b/pipelined/src/fpu/fdivsqrtstage4.sv @@ -60,7 +60,7 @@ module fdivsqrtstage4 ( // 0010 = -1 // 0001 = -2 qsel4 qsel4(.D, .WS, .WC, .Sqrt(SqrtM), .q); - // fgen4 fgen4(.s(q), .C, .S, .SM, .F); + fgen4 fgen4(.s(q), .C, .S, .SM, .F); always_comb case (q) @@ -78,7 +78,7 @@ module fdivsqrtstage4 ( csa #(`DIVb+4) csa(WS, WC, AddIn, |q[3:2]&~SqrtM, WSA, WCA); otfc4 otfc4(.q, .Q, .QM, .QNext, .QMNext); - // sotfc4 sotfc4(.s(q), .SqrtM, .C, .S, .SM, .SNext, .SMNext); + sotfc4 sotfc4(.s(q), .SqrtM, .C, .S, .SM, .SNext, .SMNext); endmodule diff --git a/pipelined/src/fpu/qsel.sv b/pipelined/src/fpu/qsel.sv index 68478e89c..27efe4d7f 100644 --- a/pipelined/src/fpu/qsel.sv +++ b/pipelined/src/fpu/qsel.sv @@ -111,6 +111,80 @@ module qsel4 ( logic [3:0] QSel4[1023:0]; + always_comb begin + integer d, w, i, w2; + for(d=0; d<8; d++) + for(w=0; w<128; w++)begin + i = d*128+w; + w2 = w-128*(w>=64); // convert to two's complement + case(d) + 0: if($signed(w2)>=$signed(12)) QSel4[i] = 4'b1000; + else if(w2>=4) QSel4[i] = 4'b0100; + else if(w2>=-4) QSel4[i] = 4'b0000; + else if(w2>=-13) QSel4[i] = 4'b0010; + else QSel4[i] = 4'b0001; + 1: if(w2>=14) QSel4[i] = 4'b1000; + else if(w2>=4) QSel4[i] = 4'b0100; + else if(w2>=-4) QSel4[i] = 4'b0000; + else if(w2>=-14) QSel4[i] = 4'b0010; + else QSel4[i] = 4'b0001; + 2: if(w2>=16) QSel4[i] = 4'b1000; + else if(w2>=4) QSel4[i] = 4'b0100; + else if(w2>=-6) QSel4[i] = 4'b0000; + else if(w2>=-16) QSel4[i] = 4'b0010; + else QSel4[i] = 4'b0001; + 3: if(w2>=16) QSel4[i] = 4'b1000; + else if(w2>=4) QSel4[i] = 4'b0100; + else if(w2>=-6) QSel4[i] = 4'b0000; + else if(w2>=-17) QSel4[i] = 4'b0010; + else QSel4[i] = 4'b0001; + 4: if(w2>=18) QSel4[i] = 4'b1000; + else if(w2>=6) QSel4[i] = 4'b0100; + else if(w2>=-6) QSel4[i] = 4'b0000; + else if(w2>=-18) QSel4[i] = 4'b0010; + else QSel4[i] = 4'b0001; + 5: if(w2>=20) QSel4[i] = 4'b1000; + else if(w2>=6) QSel4[i] = 4'b0100; + else if(w2>=-8) QSel4[i] = 4'b0000; + else if(w2>=-20) QSel4[i] = 4'b0010; + else QSel4[i] = 4'b0001; + 6: if(w2>=20) QSel4[i] = 4'b1000; + else if(w2>=8) QSel4[i] = 4'b0100; + else if(w2>=-8) QSel4[i] = 4'b0000; + else if(w2>=-22) QSel4[i] = 4'b0010; + else QSel4[i] = 4'b0001; + 7: if(w2>=24) QSel4[i] = 4'b1000; + else if(w2>=8) QSel4[i] = 4'b0100; + else if(w2>=-8) QSel4[i] = 4'b0000; + else if(w2>=-22) QSel4[i] = 4'b0010; + else QSel4[i] = 4'b0001; + endcase + end + end + assign q = QSel4[{Dmsbs,Wmsbs}]; + +endmodule + +// qsel4old was working for divide +module qsel4old ( + input logic [`DIVN-2:0] D, + input logic [`DIVb+3:0] WS, WC, + input logic Sqrt, + output logic [3:0] q +); + logic [6:0] Wmsbs; + logic [7:0] PreWmsbs; + logic [2:0] Dmsbs; + assign PreWmsbs = WC[`DIVb+3:`DIVb-4] + WS[`DIVb+3:`DIVb-4]; + assign Wmsbs = PreWmsbs[7:1]; + assign Dmsbs = D[`DIVN-2:`DIVN-4];//|{3{D[`DIVN-2]&Sqrt}}; + // D = 0001.xxx... + // Dmsbs = | | + // W = xxxx.xxx... + // Wmsbs = | | + + logic [3:0] QSel4[1023:0]; + always_comb begin integer d, w, i, w2; for(d=0; d<8; d++) @@ -192,7 +266,4 @@ module fgen4 ( else if (s[1]) F = FN1; else if (s[0]) F = FN2; else F = F0; - - // assign F = sp ? FP : (sn ? FN : FZ); - endmodule \ No newline at end of file