radix-4 divider passing tests

2025-02-11 06:05:49 +00:00 · 2022-06-20 22:56:08 +00:00 · 2022-06-20 22:56:08 +00:00 · 5d5f79eb8f
commit 5d5f79eb8f
parent 254ebf478e
6 changed files with 1553 additions and 0 deletions
--- a/pipelined/srt/qsel4.sv
+++ b/pipelined/srt/qsel4.sv
--- a/pipelined/srt/sim-srt4
+++ b/pipelined/srt/sim-srt4
@ -0,0 +1,2 @@
+vsim -do "do srt-radix4.do"
+
--- a/pipelined/srt/sim-srt4-batch
+++ b/pipelined/srt/sim-srt4-batch
@ -0,0 +1 @@
+vsim -c -do "do srt-radix4.do"
--- a/pipelined/srt/srt-radix4.do
+++ b/pipelined/srt/srt-radix4.do
@ -0,0 +1,31 @@
+# srt.do   
+#
+# David_Harris@hmc.edu 19 October 2021
+
+# Use this wally-pipelined.do file to run this example.
+# Either bring up ModelSim and type the following at the "ModelSim>" prompt:
+#     do wally-pipelined.do
+# or, to run from a shell, type the following at the shell prompt:
+#     vsim -do wally-pipelined.do -c
+# (omit the "-c" to see the GUI while running from the shell)
+
+onbreak {resume}
+
+# create library
+if [file exists work] {
+    vdel -all
+}
+vlib work
+
+vlog +incdir+../config/rv64gc +incdir+../config/shared srt-radix4.sv testbench-radix4.sv qsel4.sv ../src/generic/flop/flop*.sv ../src/generic/mux.sv ../src/generic/lzc.sv
+vopt +acc work.testbenchradix4 -o workopt 
+vsim workopt
+
+-- display input and output signals as hexidecimal values
+add wave /testbenchradix4/*
+add wave /testbenchradix4/srtradix4/*
+add wave /testbenchradix4/srtradix4/qsel4/*
+add wave /testbenchradix4/srtradix4/otfc4/*
+
+-- Run the Simulation 
+run -all
--- a/pipelined/srt/srt-radix4.sv
+++ b/pipelined/srt/srt-radix4.sv
@ -0,0 +1,323 @@
+///////////////////////////////////////////
+// srt.sv
+//
+// Written: David_Harris@hmc.edu 13 January 2022
+// Modified: 
+//
+// Purpose: Combined Divide and Square Root Floating Point and Integer Unit
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// MIT LICENSE
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this 
+// software and associated documentation files (the "Software"), to deal in the Software 
+// without restriction, including without limitation the rights to use, copy, modify, merge, 
+// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons 
+// to whom the Software is furnished to do so, subject to the following conditions:
+//
+//   The above copyright notice and this permission notice shall be included in all copies or 
+//   substantial portions of the Software.
+//
+//   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
+//   INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
+//   PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
+//   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
+//   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
+//   OR OTHER DEALINGS IN THE SOFTWARE.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+`include "wally-config.vh"
+
+`define DIVLEN ((`NF<(`XLEN)) ? (`XLEN) : `NF)
+
+module srtradix4 (
+  input  logic clk,
+  input  logic Start, 
+  input  logic Stall, // *** multiple pipe stages
+  input  logic Flush, // *** multiple pipe stages
+  // Floating Point Inputs
+  // later add exponents, signs, special cases
+  input  logic       XSign, YSign,
+  input  logic [`NE-1:0] XExp, YExp,
+  input  logic [`NF-1:0] XFrac, YFrac,
+  input  logic [`XLEN-1:0] SrcA, SrcB,
+  input  logic [1:0] Fmt, // Floats: 00 = 16 bit, 01 = 32 bit, 10 = 64 bit, 11 = 128 bit
+  input  logic       W64, // 32-bit ints on XLEN=64
+  input  logic       Signed, // Interpret integers as signed 2's complement
+  input  logic       Int, // Choose integer inputs
+  input  logic       Sqrt, // perform square root, not divide
+  output logic       rsign,
+  output logic [`DIVLEN-1:0] Quot, Rem, // *** later handle integers
+  output logic [`NE-1:0] rExp,
+  output logic [3:0] Flags
+);
+
+  // logic           qp, qz, qm; // quotient is +1, 0, or -1
+  logic [3:0]     q;
+  logic [`NE-1:0] calcExp;
+  logic           calcSign;
+  logic [`DIVLEN-1:0]  X, Dpreproc;
+  logic [`DIVLEN+3:0]  WS, WSA, WSN;
+  logic [`DIVLEN+3:0]  WC, WCA, WCN;
+  logic [`DIVLEN+3:0]  D, DBar, D2, DBar2, Dsel;
+  logic [$clog2(`XLEN+1)-1:0] intExp;
+  logic           intSign;
+ 
+  srtpreproc preproc(SrcA, SrcB, XFrac, YFrac, Fmt, W64, Signed, Int, Sqrt, X, Dpreproc, intExp, intSign);
+
+  // Top Muxes and Registers
+  // When start is asserted, the inputs are loaded into the divider.
+  // Otherwise, the divisor is retained and the partial remainder
+  // is fed back for the next iteration.
+  //  - assumed one is added here since all numbers are normlaized
+  //    *** wait what about zero? is that specal case? can the divider handle it?
+  //  - when the start signal is asserted X and 0 are loaded into WS and WC
+  //  - otherwise load WSA into the flipflop
+  //  *** what does N and A stand for?
+  //  *** change shift amount for radix4
+  mux2   #(`DIVLEN+4) wsmux({WSA[`DIVLEN+1:0], 2'b0}, {4'b0001, X}, Start, WSN);
+  flop   #(`DIVLEN+4) wsflop(clk, WSN, WS);
+  mux2   #(`DIVLEN+4) wcmux({WCA[`DIVLEN+1:0], 2'b0}, {`DIVLEN+4{1'b0}}, Start, WCN);
+  flop   #(`DIVLEN+4) wcflop(clk, WCN, WC);
+  flopen #(`DIVLEN+4) dflop(clk, Start, {4'b0001, Dpreproc}, D);
+
+  // Quotient Selection logic
+  // Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm)
+  // *** change this for radix 4 - generate w/ stine code
+  // q encoding:
+	// 1000 = +2
+	// 0100 = +1
+	// 0000 =  0
+	// 0010 = -1
+	// 0001 = -2
+  qsel4 qsel4(.D, .WS, .WC, .q);
+
+  // Store the expoenent and sign until division is done
+  flopen #(`NE) expflop(clk, Start, calcExp, rExp);
+  flopen #(1) signflop(clk, Start, calcSign, rsign);
+
+  // Divisor Selection logic
+  // *** radix 4 change to choose -2 to 2
+  // - choose the negitive version of what's being selected
+  assign DBar = ~D;
+  assign DBar2 = {~D[`DIVLEN+2:0], 1'b1};
+  assign D2 = {D[`DIVLEN+2:0], 1'b0};
+
+  always_comb
+    case (q)
+      4'b1000: Dsel = DBar2;
+      4'b0100: Dsel = DBar;
+      4'b0000: Dsel = {(`DIVLEN+4){1'b0}};
+      4'b0010: Dsel = D;
+      4'b0001: Dsel = D2;
+      default: Dsel = {`DIVLEN+4{1'bx}};
+    endcase
+
+  // Partial Product Generation
+  //  WSA, WCA = WS + WC - qD
+  csa    #(`DIVLEN+4) csa(WS, WC, Dsel, |q[3:2], WSA, WCA);
+  
+  //*** change for radix 4
+  otfc4  #(`DIVLEN) otfc4(clk, Start, q, Quot);
+
+  expcalc expcalc(.XExp, .YExp, .calcExp);
+
+  signcalc signcalc(.XSign, .YSign, .calcSign);
+
+endmodule
+
+////////////////
+// Submodules //
+////////////////
+
+///////////////////
+// Preprocessing //
+///////////////////
+module srtpreproc (
+  input  logic [`XLEN-1:0] SrcA, SrcB,
+  input  logic [`NF-1:0] XFrac, YFrac,
+  input  logic [1:0] Fmt, // Floats: 00 = 16 bit, 01 = 32 bit, 10 = 64 bit, 11 = 128 bit
+  input  logic       W64, // 32-bit ints on XLEN=64
+  input  logic       Signed, // Interpret integers as signed 2's complement
+  input  logic       Int, // Choose integer inputs
+  input  logic       Sqrt, // perform square root, not divide
+  output logic [`DIVLEN-1:0] X, D,
+  output logic [$clog2(`XLEN+1)-1:0] intExp, // Quotient integer exponent
+  output logic       intSign // Quotient integer sign
+);
+
+  logic  [$clog2(`XLEN+1)-1:0] zeroCntA, zeroCntB;
+  logic  [`XLEN-1:0] PosA, PosB;
+  logic  [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY;
+
+  assign PosA = (Signed & SrcA[`XLEN - 1]) ? -SrcA : SrcA;
+  assign PosB = (Signed & SrcB[`XLEN - 1]) ? -SrcB : SrcB;
+
+  lzc #(`XLEN) lzcA (PosA, zeroCntA);
+  lzc #(`XLEN) lzcB (PosB, zeroCntB);
+
+  assign ExtraA = {PosA, {`DIVLEN-`XLEN{1'b0}}};
+  assign ExtraB = {PosB, {`DIVLEN-`XLEN{1'b0}}};
+
+  assign PreprocA = ExtraA << zeroCntA;
+  assign PreprocB = ExtraB << (zeroCntB + 1);
+  assign PreprocX = {XFrac, {`DIVLEN-`NF{1'b0}}};
+  assign PreprocY = {YFrac, {`DIVLEN-`NF{1'b0}}};
+
+  
+  assign X = Int ? PreprocA : PreprocX;
+  assign D = Int ? PreprocB : PreprocY;
+  assign intExp = zeroCntB - zeroCntA + 1;
+  assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]);
+endmodule
+
+/////////////////////////////////
+// Quotient Selection, Radix 2 //
+/////////////////////////////////
+module qsel2 ( // *** eventually just change to 4 bits
+  input  logic [`DIVLEN+3:`DIVLEN] ps, pc, 
+  output logic         qp, qz, qm
+);
+ 
+  logic [`DIVLEN+3:`DIVLEN]  p, g;
+  logic          magnitude, sign, cout;
+
+  // The quotient selection logic is presented for simplicity, not
+  // for efficiency.  You can probably optimize your logic to
+  // select the proper divisor with less delay.
+
+  // Quotient equations from EE371 lecture notes 13-20
+  assign p = ps ^ pc;
+  assign g = ps & pc;
+
+  assign #1 magnitude = ~(&p[`DIVLEN+2:`DIVLEN]);
+  assign #1 cout = g[`DIVLEN+2] | (p[`DIVLEN+2] & (g[`DIVLEN+1] | p[`DIVLEN+1] & g[`DIVLEN]));
+  assign #1 sign = p[`DIVLEN+3] ^ cout;
+/*  assign #1 magnitude = ~((ps[54]^pc[54]) & (ps[53]^pc[53]) & 
+			  (ps[52]^pc[52]));
+  assign #1 sign = (ps[55]^pc[55])^
+      (ps[54] & pc[54] | ((ps[54]^pc[54]) &
+			    (ps[53]&pc[53] | ((ps[53]^pc[53]) &
+						(ps[52]&pc[52]))))); */
+
+  // Produce quotient = +1, 0, or -1
+  assign #1 qp = magnitude & ~sign;
+  assign #1 qz = ~magnitude;
+  assign #1 qm = magnitude & sign;
+endmodule
+
+
+///////////////////////////////////
+// On-The-Fly Converter, Radix 2 //
+///////////////////////////////////
+module otfc4 #(parameter N=65) (
+  input  logic         clk,
+  input  logic         Start,
+  input  logic [3:0]   q,
+  output logic [N-1:0] r
+);
+
+  //  The on-the-fly converter transfers the quotient 
+  //  bits to the quotient as they come. 
+  //
+  //  This code follows the psuedocode presented in the 
+  //  floating point chapter of the book. Right now, 
+  //  it is written for Radix-2 division.
+  //
+  //  QM is Q-1. It allows us to write negative bits 
+  //  without using a costly CPA. 
+  logic [N+2:0] Q, QM, QNext, QMNext, QMux, QMMux;
+  //  QR and QMR are the shifted versions of Q and QM.
+  //  They are treated as [N-1:r] size signals, and 
+  //  discard the r most significant bits of Q and QM. 
+  logic [N:0] QR, QMR;
+  // if starting a new divison set Q to 0 and QM to -1
+  mux2 #(N+3) Qmux(QNext, {N+3{1'b0}}, Start, QMux);
+  mux2 #(N+3) QMmux(QMNext, {N+3{1'b1}}, Start, QMMux);
+  flop #(N+3) Qreg(clk, QMux, Q);
+  flop #(N+3) QMreg(clk, QMMux, QM);
+
+  // shift Q (quotent) and QM (quotent-1)
+		// if 	q = 2  	    Q = {Q, 10} 	QM = {Q, 01}		
+		// else if 	q = 1   Q = {Q, 01} 	QM = {Q, 00}	
+		// else if 	q = 0   Q = {Q, 00} 	QM = {QM, 11}	
+		// else if 	q = -1	Q = {QM, 11} 	QM = {QM, 10}
+		// else if 	q = -2	Q = {QM, 10} 	QM = {QM, 01}
+    // *** how does the 0 concatination numbers work?
+
+
+
+  always_comb begin
+    QR  = Q[N:0];
+    QMR = QM[N:0];     // Shift Q and QM
+    if (q[3]) begin // +2
+      QNext  = {QR,  2'b10};
+      QMNext = {QR,  2'b01};
+    end else if (q[2]) begin // +1
+      QNext  = {QR,  2'b01};
+      QMNext = {QR,  2'b00};
+    end else if (q[1]) begin // -1
+      QNext  = {QMR,  2'b11};
+      QMNext = {QMR,  2'b10};
+    end else if (q[0]) begin // -2
+      QNext  = {QMR,  2'b10};
+      QMNext = {QMR,  2'b01};
+    end else begin           // 0
+      QNext  = {QR,  2'b00};
+      QMNext = {QMR, 2'b11};
+    end 
+  end
+  assign r = Q[N+2] ? Q[N+1:2] : Q[N:1];
+
+endmodule
+
+
+
+/////////
+// csa //
+/////////
+module csa #(parameter N=69) (
+  input  logic [N-1:0] in1, in2, in3, 
+  input  logic         cin, 
+  output logic [N-1:0] out1, out2
+);
+
+  // This block adds in1, in2, in3, and cin to produce 
+  // a result out1 / out2 in carry-save redundant form.
+  // cin is just added to the least significant bit and
+  // is required to handle adding a negative divisor.
+  // Fortunately, the carry (out2) is shifted left by one
+  // bit, leaving room in the least significant bit to 
+  // insert cin.
+
+  assign #1 out1 = in1 ^ in2 ^ in3;
+  assign #1 out2 = {in1[N-2:0] & (in2[N-2:0] | in3[N-2:0]) | 
+		    (in2[N-2:0] & in3[N-2:0]), cin};
+endmodule
+
+
+//////////////
+// expcalc  //
+//////////////
+module expcalc(
+  input logic  [`NE-1:0] XExp, YExp,
+  output logic [`NE-1:0] calcExp
+);
+
+  assign calcExp = XExp - YExp + (`NE)'(`BIAS);
+
+endmodule
+
+//////////////
+// signcalc //
+//////////////
+module signcalc(
+  input logic  XSign, YSign,
+  output logic calcSign
+);
+
+  assign calcSign = XSign ^ YSign;
+
+endmodule
--- a/pipelined/srt/testbench-radix4.sv
+++ b/pipelined/srt/testbench-radix4.sv
@ -0,0 +1,148 @@
+
+`include "wally-config.vh"
+`define DIVLEN ((`NF<`XLEN) ? `XLEN : `NF)
+
+/////////////
+// counter //
+/////////////
+module counter(input  logic clk, 
+               input  logic req, 
+               output logic done);
+ 
+   logic    [5:0]  count;
+
+  // This block of control logic sequences the divider
+  // through its iterations.  You may modify it if you
+  // build a divider which completes in fewer iterations.
+  // You are not responsible for the (trivial) circuit
+  // design of the block.
+
+  always @(posedge clk)
+    begin
+      if      (count == `DIVLEN/2+1) done <= #1 1;
+      else if (done | req) done <= #1 0;	
+      if (req) count <= #1 0;
+      else     count <= #1 count+1;
+    end
+endmodule
+
+///////////
+// clock //
+///////////
+module clock(clk);
+  output clk;
+ 
+  // Internal clk signal
+  logic clk;
+ 
+endmodule
+
+//////////
+// testbench //
+//////////
+module testbenchradix4;
+  logic              clk;
+  logic              req;
+  logic              done;
+  logic [63:0]       a, b;
+  logic [51:0]       afrac, bfrac;
+  logic [10:0]       aExp, bExp;
+  logic              asign, bsign;
+  logic [51:0]       r, rOTFC;
+  logic [`DIVLEN-1:0]  Quot, QuotOTFC;
+  logic [54:0]       rp, rm;   // positive quotient digits
+ 
+  // Test parameters
+  parameter MEM_SIZE = 40000;
+  parameter MEM_WIDTH = 64+64+64;
+ 
+  `define memr  63:0
+  `define memb  127:64
+  `define mema  191:128
+
+  // Test logicisters
+  logic [MEM_WIDTH-1:0] Tests [0:MEM_SIZE];  // Space for input file
+  logic [MEM_WIDTH-1:0] Vec;  // Verilog doesn't allow direct access to a
+                            // bit field of an array 
+  logic [63:0] correctr, nextr, diffn, diffp;
+  logic [10:0] rExp;
+  logic        rsign;
+  integer testnum, errors;
+
+  // Divider
+  srtradix4 srtradix4(.clk, .Start(req), 
+                .Stall(1'b0), .Flush(1'b0), 
+                .XExp(aExp), .YExp(bExp), .rExp,
+                .XSign(asign), .YSign(bsign), .rsign,
+                .XFrac(afrac), .YFrac(bfrac), 
+                .SrcA('0), .SrcB('0), .Fmt(2'b00), 
+                .W64(1'b0), .Signed(1'b0), .Int(1'b0), .Sqrt(1'b0), 
+                .Quot, .Rem(), .Flags());
+
+  // Counter
+  counter counter(clk, req, done);
+
+
+    initial
+    forever
+      begin
+        clk = 1; #17;
+        clk = 0; #17;
+      end
+
+
+  // Read test vectors from disk
+  initial
+    begin
+      testnum = 0; 
+      errors = 0;
+      $readmemh ("testvectors", Tests);
+      Vec = Tests[testnum];
+      a = Vec[`mema];
+      {asign, aExp, afrac} = a;
+      b = Vec[`memb];
+      {bsign, bExp, bfrac} = b;
+      nextr = Vec[`memr];
+      r = Quot[`DIVLEN-1:`DIVLEN - 52];
+      req <= 1;
+    end
+  
+  // Apply directed test vectors read from file.
+
+  always @(posedge clk)
+    begin
+      r = Quot[`DIVLEN-1:`DIVLEN - 52];
+      if (done) begin
+        req <= 1;
+        diffp = correctr[51:0] - r;
+        diffn = r - correctr[51:0];
+        if ((rsign !== correctr[63]) | (rExp !== correctr[62:52]) | ($signed(diffn) > 1) | ($signed(diffp) > 1) | (diffn === 64'bx) | (diffp === 64'bx)) // check if accurate to 1 ulp
+          begin
+            errors = errors+1;
+            $display("result was %h_%h, should be %h %h %h\n", rExp, r, correctr, diffn, diffp);
+            $display("failed\n");
+            $stop;
+          end
+        if (afrac === 52'hxxxxxxxxxxxxx)
+          begin
+            $display("%d Tests completed successfully", testnum);
+            $stop;
+          end
+	end
+      if (req) 
+	begin
+	  req <= 0;
+	  correctr = nextr;
+	  testnum = testnum+1;
+	  Vec = Tests[testnum];
+	  $display("a = %h  b = %h",a,b);
+    a = Vec[`mema];
+    {asign, aExp, afrac} = a;
+    b = Vec[`memb];
+    {bsign, bExp, bfrac} = b;
+    nextr = Vec[`memr];
+	end
+    end
+ 
+endmodule
+