From 30ec68d5678d5fc505b2078bb9ccdacf466431d6 Mon Sep 17 00:00:00 2001
From: David Harris <david_harris@hmc.edu>
Date: Sun, 3 Oct 2021 01:10:15 -0400
Subject: [PATCH] Parameterized number of bits per cycle for integer division

---
 .../config/buildroot/wally-config.vh          |  4 +++
 .../config/busybear/wally-config.vh           |  4 +++
 .../config/coremark/wally-config.vh           |  7 +++++
 .../config/coremark_bare/wally-config.vh      |  4 +++
 wally-pipelined/config/rv32ic/wally-config.vh |  4 +++
 .../config/rv32icfd/wally-config.vh           |  4 +++
 wally-pipelined/config/rv64BP/wally-config.vh |  7 +++++
 wally-pipelined/config/rv64ic/wally-config.vh |  4 +++
 .../config/rv64icfd/wally-config.vh           |  4 +++
 .../config/rv64imc/wally-config.vh            |  7 +++++
 wally-pipelined/src/muldiv/intdivrestoring.sv | 27 ++++++++++++-------
 .../src/muldiv/intdivrestoringstep.sv         |  3 +++
 .../testbench/testbench-imperas.sv            |  1 +
 13 files changed, 70 insertions(+), 10 deletions(-)

diff --git a/wally-pipelined/config/buildroot/wally-config.vh b/wally-pipelined/config/buildroot/wally-config.vh
index 1b6e030ff..0a59bc3ad 100644
--- a/wally-pipelined/config/buildroot/wally-config.vh
+++ b/wally-pipelined/config/buildroot/wally-config.vh
@@ -66,6 +66,10 @@
 `define ICACHE_WAYSIZEINBYTES 4096
 `define ICACHE_BLOCKLENINBITS 256
 
+// Integer Divider Configuration
+// DIV_BITSPERCYCLE must be 1, 2, or 4
+`define DIV_BITSPERCYCLE 4
+
 // Legal number of PMP entries are 0, 16, or 64
 `define PMP_ENTRIES 16
 
diff --git a/wally-pipelined/config/busybear/wally-config.vh b/wally-pipelined/config/busybear/wally-config.vh
index 86385bd88..614cfb2e5 100644
--- a/wally-pipelined/config/busybear/wally-config.vh
+++ b/wally-pipelined/config/busybear/wally-config.vh
@@ -66,6 +66,10 @@
 `define ICACHE_WAYSIZEINBYTES 4096
 `define ICACHE_BLOCKLENINBITS 256
 
+// Integer Divider Configuration
+// DIV_BITSPERCYCLE must be 1, 2, or 4
+`define DIV_BITSPERCYCLE 4
+
 // Legal number of PMP entries are 0, 16, or 64
 `define PMP_ENTRIES 16
 
diff --git a/wally-pipelined/config/coremark/wally-config.vh b/wally-pipelined/config/coremark/wally-config.vh
index 32006c690..8b1ae7dc7 100644
--- a/wally-pipelined/config/coremark/wally-config.vh
+++ b/wally-pipelined/config/coremark/wally-config.vh
@@ -65,6 +65,13 @@
 `define ICACHE_WAYSIZEINBYTES 4096
 `define ICACHE_BLOCKLENINBITS 256
 
+// Integer Divider Configuration
+// DIV_BITSPERCYCLE must be 1, 2, or 4
+`define DIV_BITSPERCYCLE 4
+
+// Legal number of PMP entries are 0, 16, or 64
+`define PMP_ENTRIES 16
+
 // Address space
 `define RESET_VECTOR 64'h00000000000100b0
 
diff --git a/wally-pipelined/config/coremark_bare/wally-config.vh b/wally-pipelined/config/coremark_bare/wally-config.vh
index 8f79212b5..be4a83205 100644
--- a/wally-pipelined/config/coremark_bare/wally-config.vh
+++ b/wally-pipelined/config/coremark_bare/wally-config.vh
@@ -66,6 +66,10 @@
 `define ICACHE_WAYSIZEINBYTES 4096
 `define ICACHE_BLOCKLENINBITS 256
 
+// Integer Divider Configuration
+// DIV_BITSPERCYCLE must be 1, 2, or 4
+`define DIV_BITSPERCYCLE 4
+
 // Legal number of PMP entries are 0, 16, or 64
 `define PMP_ENTRIES 64
 
diff --git a/wally-pipelined/config/rv32ic/wally-config.vh b/wally-pipelined/config/rv32ic/wally-config.vh
index dfe1c61e0..3280c3759 100644
--- a/wally-pipelined/config/rv32ic/wally-config.vh
+++ b/wally-pipelined/config/rv32ic/wally-config.vh
@@ -64,6 +64,10 @@
 `define ICACHE_WAYSIZEINBYTES 4096
 `define ICACHE_BLOCKLENINBITS 256
 
+// Integer Divider Configuration
+// DIV_BITSPERCYCLE must be 1, 2, or 4
+`define DIV_BITSPERCYCLE 4
+
 // Legal number of PMP entries are 0, 16, or 64
 `define PMP_ENTRIES 16
 
diff --git a/wally-pipelined/config/rv32icfd/wally-config.vh b/wally-pipelined/config/rv32icfd/wally-config.vh
index 2f0bc378e..432906c85 100644
--- a/wally-pipelined/config/rv32icfd/wally-config.vh
+++ b/wally-pipelined/config/rv32icfd/wally-config.vh
@@ -64,6 +64,10 @@
 `define ICACHE_WAYSIZEINBYTES 4096
 `define ICACHE_BLOCKLENINBITS 256
 
+// Integer Divider Configuration
+// DIV_BITSPERCYCLE must be 1, 2, or 4
+`define DIV_BITSPERCYCLE 4
+
 // Legal number of PMP entries are 0, 16, or 64
 `define PMP_ENTRIES 16
 
diff --git a/wally-pipelined/config/rv64BP/wally-config.vh b/wally-pipelined/config/rv64BP/wally-config.vh
index c189cb0fe..162192495 100644
--- a/wally-pipelined/config/rv64BP/wally-config.vh
+++ b/wally-pipelined/config/rv64BP/wally-config.vh
@@ -66,6 +66,13 @@
 `define ICACHE_WAYSIZEINBYTES 4096
 `define ICACHE_BLOCKLENINBITS 256
 
+// Integer Divider Configuration
+// DIV_BITSPERCYCLE must be 1, 2, or 4
+`define DIV_BITSPERCYCLE 4
+
+// Legal number of PMP entries are 0, 16, or 64
+`define PMP_ENTRIES 16
+
 // Address space
 `define RESET_VECTOR 64'h0000000000000000
 
diff --git a/wally-pipelined/config/rv64ic/wally-config.vh b/wally-pipelined/config/rv64ic/wally-config.vh
index ef935ae2c..518da71c7 100644
--- a/wally-pipelined/config/rv64ic/wally-config.vh
+++ b/wally-pipelined/config/rv64ic/wally-config.vh
@@ -65,6 +65,10 @@
 `define ICACHE_WAYSIZEINBYTES 4096
 `define ICACHE_BLOCKLENINBITS 256
 
+// Integer Divider Configuration
+// DIV_BITSPERCYCLE must be 1, 2, or 4
+`define DIV_BITSPERCYCLE 4
+
 // Legal number of PMP entries are 0, 16, or 64
 `define PMP_ENTRIES 64
 
diff --git a/wally-pipelined/config/rv64icfd/wally-config.vh b/wally-pipelined/config/rv64icfd/wally-config.vh
index a91531dd0..d3587ff4d 100644
--- a/wally-pipelined/config/rv64icfd/wally-config.vh
+++ b/wally-pipelined/config/rv64icfd/wally-config.vh
@@ -66,6 +66,10 @@
 `define ICACHE_WAYSIZEINBYTES 4096
 `define ICACHE_BLOCKLENINBITS 256
 
+// Integer Divider Configuration
+// DIV_BITSPERCYCLE must be 1, 2, or 4
+`define DIV_BITSPERCYCLE 4
+
 // Legal number of PMP entries are 0, 16, or 64
 `define PMP_ENTRIES 64
 
diff --git a/wally-pipelined/config/rv64imc/wally-config.vh b/wally-pipelined/config/rv64imc/wally-config.vh
index 0a874a72d..437a0040f 100644
--- a/wally-pipelined/config/rv64imc/wally-config.vh
+++ b/wally-pipelined/config/rv64imc/wally-config.vh
@@ -64,6 +64,13 @@
 `define ICACHE_WAYSIZEINBYTES 4096
 `define ICACHE_BLOCKLENINBITS 256
 
+// Integer Divider Configuration
+// DIV_BITSPERCYCLE must be 1, 2, or 4
+`define DIV_BITSPERCYCLE 4
+
+// Legal number of PMP entries are 0, 16, or 64
+`define PMP_ENTRIES 64
+
 // Address space
 `define RESET_VECTOR 64'h0000000080000000
 
diff --git a/wally-pipelined/src/muldiv/intdivrestoring.sv b/wally-pipelined/src/muldiv/intdivrestoring.sv
index 15ec9e005..c579f274e 100644
--- a/wally-pipelined/src/muldiv/intdivrestoring.sv
+++ b/wally-pipelined/src/muldiv/intdivrestoring.sv
@@ -25,6 +25,8 @@
 
 `include "wally-config.vh"
 
+  /* verilator lint_off UNOPTFLAT */
+
 module intdivrestoring (
   input  logic clk,
   input  logic reset,
@@ -36,8 +38,10 @@ module intdivrestoring (
   output logic [`XLEN-1:0] QuotM, RemM
  );
 
-  logic [`XLEN-1:0] DSavedE, XSavedE, XSavedM, DnE, DAbsBE, XnE, XInitE, WE, XQE, W1E, XQ1E, WNextE, XQNextE, WM, XQM, WnM, XQnM;
-  localparam STEPBITS = $clog2(`XLEN)-1;
+  logic [`XLEN-1:0] WE[`DIV_BITSPERCYCLE:0];
+  logic [`XLEN-1:0] XQE[`DIV_BITSPERCYCLE:0];
+  logic [`XLEN-1:0] DSavedE, XSavedE, XSavedM, DnE, DAbsBE, XnE, XInitE, WM, XQM, WnM, XQnM;
+  localparam STEPBITS = $clog2(`XLEN/`DIV_BITSPERCYCLE);
   logic [STEPBITS:0] step;
   logic Div0E, Div0M;
   logic DivInitE, SignXE, SignXM, SignDE, SignDM, NegWM, NegQM;
@@ -66,16 +70,19 @@ module intdivrestoring (
   mux2 #(`XLEN) xabsmux(XSavedE, XnE, SignedDivideE & SignXE, XInitE);  // need original X as remainder if doing divide by 0
 
   // initialization multiplexers on first cycle of operation (one cycle after start is asserted)
-  mux2 #(`XLEN) wmux(WM, {`XLEN{1'b0}}, DivInitE, WE);
-  mux2 #(`XLEN) xmux(XQM, XInitE, DivInitE, XQE);
+  mux2 #(`XLEN) wmux(WM, {`XLEN{1'b0}}, DivInitE, WE[0]);
+  mux2 #(`XLEN) xmux(XQM, XInitE, DivInitE, XQE[0]);
 
-  // *** parameterize steps per cycle
-  intdivrestoringstep step1(WE, XQE, DAbsBE, W1E, XQ1E);
-  intdivrestoringstep step2(W1E, XQ1E, DAbsBE, WNextE, XQNextE);
+  // one copy of divstep for each bit produced per cycle
+  generate
+      genvar i;
+      for (i=0; i<`DIV_BITSPERCYCLE; i = i+1)
+        intdivrestoringstep divstep(WE[i], XQE[i], DAbsBE, WE[i+1], XQE[i+1]);
+  endgenerate
 
   // registers after division steps
-  flopen #(`XLEN) wreg(clk, BusyE, WNextE, WM); 
-  flopen #(`XLEN) xreg(clk, BusyE, XQNextE, XQM);
+  flopen #(`XLEN) wreg(clk, BusyE, WE[`DIV_BITSPERCYCLE], WM); 
+  flopen #(`XLEN) xreg(clk, BusyE, XQE[`DIV_BITSPERCYCLE], XQM);
 
   // Output selection logic in Memory Stage
   // On final setp of signed operations, negate outputs as needed
@@ -112,4 +119,4 @@ module intdivrestoring (
 
 endmodule 
 
-// *** clean up internal signals
\ No newline at end of file
+/* verilator lint_on UNOPTFLAT */
diff --git a/wally-pipelined/src/muldiv/intdivrestoringstep.sv b/wally-pipelined/src/muldiv/intdivrestoringstep.sv
index fe32da554..339695fcf 100644
--- a/wally-pipelined/src/muldiv/intdivrestoringstep.sv
+++ b/wally-pipelined/src/muldiv/intdivrestoringstep.sv
@@ -25,6 +25,8 @@
 
 `include "wally-config.vh"
 
+/* verilator lint_off UNOPTFLAT */
+
 module intdivrestoringstep(
   input  logic [`XLEN-1:0] W, XQ, DAbsB,
   output logic [`XLEN-1:0] WOut, XQOut);
@@ -39,3 +41,4 @@ module intdivrestoringstep(
   mux2 #(`XLEN) wrestoremux(WShift, WPrime, qi, WOut);
 endmodule
 
+/* verilator lint_on UNOPTFLAT */
diff --git a/wally-pipelined/testbench/testbench-imperas.sv b/wally-pipelined/testbench/testbench-imperas.sv
index 50b447039..65fc56810 100644
--- a/wally-pipelined/testbench/testbench-imperas.sv
+++ b/wally-pipelined/testbench/testbench-imperas.sv
@@ -743,6 +743,7 @@ module riscvassertions();
   // Legal number of PMP entries are 0, 16, or 64
   initial begin
     assert (`PMP_ENTRIES == 0 || `PMP_ENTRIES==16 || `PMP_ENTRIES==64) else $error("Illegal number of PMP entries: PMP_ENTRIES must be 0, 16, or 64");
+    assert (`DIV_BITSPERCYCLE == 1 || `DIV_BITSPERCYCLE==2 || `DIV_BITSPERCYCLE==4) else $error("Illegal number of divider bits/cycle: DIV_BITSPERCYCLE must be 1, 2, or 4");
     assert (`F_SUPPORTED || ~`D_SUPPORTED) else $error("Can't support double without supporting float");
     assert (`XLEN == 64 || ~`D_SUPPORTED) else $error("Wally does not yet support D extensions on RV32");
     assert (`DCACHE_WAYSIZEINBYTES <= 4096 || `MEM_DCACHE == 0 || `MEM_VIRTMEM == 0) else $error("DCACHE_WAYSIZEINBYTES cannot exceed 4 KiB when caches and vitual memory is enabled (to prevent aliasing)");