radix-2 1 copy passes testfloat

2025-02-11 06:05:49 +00:00 · 2022-08-06 22:54:05 +00:00 · 2022-08-06 22:54:05 +00:00 · 8eeca3319c
commit 8eeca3319c
parent 8f1d8669b0
8 changed files with 37 additions and 46 deletions
--- a/pipelined/config/shared/wally-shared.vh
+++ b/pipelined/config/shared/wally-shared.vh
@ -101,17 +101,18 @@
 `define CORRSHIFTSZ ((`DIVRESLEN+`NF) > (3*`NF+8) ? (`DIVRESLEN+`NF) : (3*`NF+6))

 // division constants
-`define RADIX 32'h4
-`define DIVCOPIES 32'h4
+`define RADIX 32'h2
+`define DIVCOPIES 32'h1
 `define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF + 3))
-`define DIVN (`NF < `XLEN ? `XLEN : `NF+1) // length of input
+// `define DIVN (`NF < `XLEN ? `XLEN : `NF+1) // length of input
+`define DIVN (`NF < `XLEN ? `XLEN : `NF+3) // length of input
 `define EXTRAFRACBITS ((`NF<(`XLEN)) ? (`XLEN - `NF) : 3)
 `define EXTRAINTBITS ((`NF<(`XLEN)) ? 0 : (`NF - `XLEN + 3))
 `define DIVRESLEN ((`NF>`XLEN) ? `NF+4 : `XLEN)
 `define LOGR ((`RADIX==2) ? 32'h1 : 32'h2)
 // FPDUR = ceil(DIVRESLEN/(LOGR*DIVCOPIES))
 // one interation is required for the integer bit for minimally redundent radix-4
-`define FPDUR ((`DIVLEN+(`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES)+(`RADIX/4))
+`define FPDUR ((`DIVN+2+(`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES)+(`RADIX/4))
 `define DURLEN ($clog2(`FPDUR+1))
 `define QLEN (`FPDUR*`LOGR*`DIVCOPIES)
 `define DIVb (`FPDUR*`LOGR*`DIVCOPIES)-1
--- a/pipelined/regression/wave-fpu.do
+++ b/pipelined/regression/wave-fpu.do
@ -30,9 +30,9 @@ add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/QNext
 add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/QMNext
 add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/*
 add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srt/interations[0]/divinteration/*
-# add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srt/interations[0]/divinteration/otfc/otfc2/*
-# add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srt/interations[0]/divinteration/qsel/qsel2/*
-add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srt/interations[0]/divinteration/genblk1/qsel4/*
+add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srt/interations[0]/divinteration/otfc/otfc2/*
+add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srt/interations[0]/divinteration/qsel/qsel2/*
+# add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srt/interations[0]/divinteration/genblk1/qsel4/*
 add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srtpreproc/*
 add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srtpreproc/expcalc/*
 add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srtfsm/*
--- a/pipelined/src/fpu/divsqrt.sv
+++ b/pipelined/src/fpu/divsqrt.sv
@ -65,6 +65,6 @@ module divsqrt(

  srtfsm srtfsm(.reset, .XsE, .SqrtE, .NextWSN, .NextWCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivSE(DivSM), .XNaNE, .YNaNE,
               .StickyWSA, .XInfE, .YInfE, .NegSticky(NegSticky), .EarlyTermShiftE(EarlyTermShiftM));
-  srt srt(.clk, .Sqrt(SqrtM), .X,.Dpreproc, .NegSticky, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .Xe(XeE), .Ye(YeE), .XZeroE, .YZeroE,
+  srt srt(.clk, .SqrtE, .SqrtM, .X,.Dpreproc, .NegSticky, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .Xe(XeE), .Ye(YeE), .XZeroE, .YZeroE,
                .StickyWSA, .DivBusy, .Qm(QmM));
 endmodule
--- a/pipelined/src/fpu/flags.sv
+++ b/pipelined/src/fpu/flags.sv
@ -157,7 +157,7 @@ module flags(
    //                                                                                                     or when the positive res rounds up out of range
    assign SigNaN = (XSNaN&~(IntToFp&CvtOp)) | (YSNaN&~CvtOp) | (ZSNaN&FmaOp);
    assign FmaInvalid = ((XInf | YInf) & ZInf & (FmaPs ^ FmaAs) & ~NaNIn) | (XZero & YInf) | (YZero & XInf);
-    assign DivInvalid = ((XInf & YInf) | (XZero & YZero))&~Sqrt | (Xs&Sqrt);
+    assign DivInvalid = ((XInf & YInf) | (XZero & YZero))&~Sqrt | (Xs&Sqrt&~NaNIn&~XZero);

    assign Invalid = SigNaN | (FmaInvalid&FmaOp) | (DivInvalid&DivOp);

--- a/pipelined/src/fpu/otfc.sv
+++ b/pipelined/src/fpu/otfc.sv
@ -147,9 +147,9 @@ endmodule
 module sotfc4(
  input  logic [3:0]   s,
  input  logic         Sqrt,
-  input  logic [`DIVLEN+3:0] S, SM,
-  input  logic [`DIVLEN+3:0] C,
-  output logic [`DIVLEN+3:0] SNext, SMNext
+  input  logic [`DIVb+3:0] S, SM,
+  input  logic [`DIVb+3:0] C,
+  output logic [`DIVb+3:0] SNext, SMNext
 );
  //  The on-the-fly converter transfers the square root 
  //  bits to the quotient as they come.
--- a/pipelined/src/fpu/qsel.sv
+++ b/pipelined/src/fpu/qsel.sv
@ -31,11 +31,11 @@
 `include "wally-config.vh"

 module qsel2 ( // *** eventually just change to 4 bits
-  input  logic [`DIVLEN+3:`DIVLEN] ps, pc, 
+  input  logic [3:0] ps, pc, 
  output logic         qp, qz//, qn
 );
 
-  logic [`DIVLEN+3:`DIVLEN]  p, g;
+  logic [3:0]  p, g;
  logic          magnitude, sign, cout;

  // The quotient selection logic is presented for simplicity, not
@ -46,9 +46,9 @@ module qsel2 ( // *** eventually just change to 4 bits
  assign p = ps ^ pc;
  assign g = ps & pc;

-  assign magnitude = ~(&p[`DIVLEN+2:`DIVLEN]);
-  assign cout = g[`DIVLEN+2] | (p[`DIVLEN+2] & (g[`DIVLEN+1] | p[`DIVLEN+1] & g[`DIVLEN]));
-  assign sign = p[`DIVLEN+3] ^ cout;
+  assign magnitude = ~(&p[2:0]);
+  assign cout = g[2] | (p[2] & (g[1] | p[1] & g[0]));
+  assign sign = p[3] ^ cout;
 /*  assign #1 magnitude = ~((ps[54]^pc[54]) & (ps[53]^pc[53]) & 
 			  (ps[52]^pc[52]));
  assign #1 sign = (ps[55]^pc[55])^
@ -80,7 +80,7 @@ module fgen2 (

  // Generate for both positive and negative bits
  assign FP = ~(SExt << 1) & CExt;
-  assign FN = (SMExt << 1) | (CExt & (~CExt << 2));
+  assign FN = (SMExt << 1) | (CExt & ~(CExt << 2));
  assign FZ = '0;

  // Choose which adder input will be used
@ -172,10 +172,10 @@ endmodule
 ////////////////////////////////////
 module fgen4 (
  input  logic [3:0] s,
-  input  logic [`DIVLEN+3:0] C, S, SM,
-  output logic [`DIVLEN+3:0] F
+  input  logic [`DIVb+3:0] C, S, SM,
+  output logic [`DIVb+3:0] F
 );
-  logic [`DIVLEN+3:0] F2, F1, F0, FN1, FN2;
+  logic [`DIVb+3:0] F2, F1, F0, FN1, FN2;
  
  // Generate for both positive and negative bits
  assign F2  = (~S << 2) & (C << 2);
--- a/pipelined/src/fpu/srt.sv
+++ b/pipelined/src/fpu/srt.sv
@ -36,7 +36,8 @@ module srt(
  input  logic DivBusy, 
  input  logic [`NE-1:0] Xe, Ye,
  input  logic XZeroE, YZeroE, 
-  input  logic Sqrt,
+  input  logic SqrtE,
+  input  logic SqrtM,
  input  logic [`DIVb:0] X,
  input  logic [`DIVN-2:0] Dpreproc,
  input  logic NegSticky,
@ -95,21 +96,14 @@ module srt(
  end


-//   mux2   #(`DIVb+4) wsmux(NextWSN, {{3{Sqrt}}, X}, DivStart, WSN); //*** modified for sqrt which doesnt work
-//   flopen   #(`DIVb+4) wsflop(clk, DivStart|DivBusy, WSN, WS[0]);
-//   mux2   #(`DIVb+4) wcmux(NextWCN, '0, DivStart, WCN);
-//   flopen   #(`DIVb+4) wcflop(clk, DivStart|DivBusy, WCN, WC[0]);
-//   flopen #(`DIVN-1) dflop(clk, DivStart, Dpreproc, D);
-//   mux2 #(`DIVb) Cmux(NextC, {Sqrt, {(`DIVb-1){1'b0}}}, DivStart, CMux);
-//   flop #(`DIVb) cflop(clk, CMux, C[0]);
-
-  mux2   #(`DIVb+4) wsmux(NextWSN, {3'b0, X}, DivStart, WSN);
+  // mux2   #(`DIVb+4) wsmux(NextWSN, {3'b0, X}, DivStart, WSN);
+  mux2   #(`DIVb+4) wsmux(NextWSN, {{3{SqrtE&~XZeroE}}, X}, DivStart, WSN);
  flopen   #(`DIVb+4) wsflop(clk, DivStart|DivBusy, WSN, WS[0]);
  mux2   #(`DIVb+4) wcmux(NextWCN, '0, DivStart, WCN);
  flopen   #(`DIVb+4) wcflop(clk, DivStart|DivBusy, WCN, WC[0]);
  flopen #(`DIVN-1) dflop(clk, DivStart, Dpreproc, D);
-  mux2 #(`DIVb) Cmux({2'b11, C[`DIVCOPIES-1][`DIVb-1:2]}, {Sqrt, {(`DIVb-1){1'b0}}}, DivStart, CMux);
-  flop #(`DIVb) cflop(clk, CMux, C[0]);
+  mux2 #(`DIVb) Cmux(NextC, {1'b1, {(`DIVb-1){1'b0}}}, DivStart, CMux);
+  flopen #(`DIVb) cflop(clk, DivStart|DivBusy, CMux, C[0]);

  // Divisor Selections
  //  - choose the negitive version of what's being selected
@ -123,7 +117,7 @@ module srt(
  genvar i;
  generate
    for(i=0; $unsigned(i)<`DIVCOPIES; i++) begin : interations
-      divinteration divinteration(.D, .DBar, .D2, .DBar2, .Sqrt,
+      divinteration divinteration(.D, .DBar, .D2, .DBar2, .SqrtM,
      .WS(WS[i]), .WC(WC[i]), .WSA(WSA[i]), .WCA(WCA[i]), .Q(Q[i]), .QM(QM[i]), .QNext(QNext[i]), .QMNext(QMNext[i]),
      .C(C[i]), .S(S[i]), .SM(SM[i]), .SNext(SNext[i]), .SMNext(SMNext[i]));
      if(i<(`DIVCOPIES-1)) begin 
@ -151,11 +145,11 @@ module srt(
  flopen #(`DIVb+1) QMreg(clk, DivBusy, QMMux, QM[0]);

  flopr #(`DIVb+1) SMreg(clk, DivStart, SMNext[`DIVCOPIES-1], SM[0]);
-  mux2 #(`DIVb+1) Smux(SNext[`DIVCOPIES-1], {Sqrt, {(`DIVb){1'b0}}}, DivStart, SMux);
+  mux2 #(`DIVb+1) Smux(SNext[`DIVCOPIES-1], {SqrtM, {(`DIVb){1'b0}}}, DivStart, SMux);
  flop #(`DIVb+1) Sreg(clk, SMux, S[0]);
 // division takes the result from the next cycle, which is shifted to the left one more time so the square root also needs to be shifted
  always_comb
-    if(Sqrt) // sqrt ouputs in the range (1, .5]
+    if(SqrtM) // sqrt ouputs in the range (1, .5]
      if(NegSticky) Qm = {SM[0][`DIVb-1-(`RADIX/4):0], 1'b0};
      else          Qm = {S[0][`DIVb-1-(`RADIX/4):0], 1'b0};
    else  
@ -186,7 +180,7 @@ module divinteration (
  input logic [`DIVb:0] S, SM,
  input logic [`DIVb+3:0]  WS, WC,
  input logic [`DIVb-1:0] C,
-  input logic Sqrt,
+  input logic SqrtM,
  output logic [`DIVb:0] QNext, QMNext, 
  output logic [`DIVb:0] SNext, SMNext, 
  output logic [`DIVb+3:0]  WSA, WCA
@ -211,7 +205,7 @@ module divinteration (
    qsel2 qsel2(WS[`DIVb+3:`DIVb], WC[`DIVb+3:`DIVb], qp, qz);
    fgen2 fgen2(.sp(qp), .sz(qz), .C, .S, .SM, .F);
  end else begin
-    qsel4 qsel4(.D, .WS, .WC, .Sqrt, .q);
+    qsel4 qsel4(.D, .WS, .WC, .Sqrt(SqrtM), .q);
    // fgen4 fgen4(.s(q), .C, .S, .SM, .F);
  end

@ -230,11 +224,11 @@ module divinteration (
  end
  // Partial Product Generation
  //  WSA, WCA = WS + WC - qD
-  assign AddIn = Sqrt ? F : Dsel;
+  assign AddIn = SqrtM ? F : Dsel;
  if (`RADIX == 2) begin : csa
-    csa #(`DIVb+4) csa(WS, WC, AddIn, qp&~Sqrt, WSA, WCA);
+    csa #(`DIVb+4) csa(WS, WC, AddIn, qp&~SqrtM, WSA, WCA);
  end else begin
-    csa #(`DIVb+4) csa(WS, WC, AddIn, |q[3:2]&~Sqrt, WSA, WCA);
+    csa #(`DIVb+4) csa(WS, WC, AddIn, |q[3:2]&~SqrtM, WSA, WCA);
  end

  if (`RADIX == 2) begin : otfc
@ -242,7 +236,7 @@ module divinteration (
    sotfc2 sotfc2(.sp(qp), .sz(qz), .C, .S, .SM, .SNext, .SMNext);
  end else begin
    otfc4 otfc4(.q, .Q, .QM, .QNext, .QMNext);
-    // sotfc4 sotfc4(.s(q), .Sqrt, .C, .S, .SM, .SNext, .SMNext);
+    // sotfc4 sotfc4(.s(q), .SqrtM, .C, .S, .SM, .SNext, .SMNext);
  end

 endmodule
--- a/pipelined/testbench/testbench-fp.sv
+++ b/pipelined/testbench/testbench-fp.sv
@ -85,10 +85,6 @@ module testbenchfp;
  logic [`DURLEN-1:0] EarlyTermShift;
  logic DivStart, DivBusy;
  logic reset = 1'b0;
-  logic [`DIVLEN-1:0]    DivX;
-  logic [`DIVLEN-1:0]  Dpreproc;
-  logic [`DIVLEN+3:0]  NextWSN, WS;
-  logic [`DIVLEN+3:0]  NextWCN, WC;
  logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt;
  logic [`DURLEN-1:0] Dur;