MDU and hazard unit now also parameterized. Based on Lim's work. Again I want to clarify this their work. Not mine. I'm just doing this because the merge had an issue.

2025-02-11 06:05:49 +00:00 · 2023-05-24 15:01:35 -05:00 · 2023-05-24 15:01:35 -05:00 · 7fc53226ac
commit 7fc53226ac
parent 8f9151b125
5 changed files with 72 additions and 80 deletions
--- a/src/hazard/hazard.sv
+++ b/src/hazard/hazard.sv
@ -26,8 +26,6 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////

-`include "wally-config.vh"
-
 module hazard (
  // Detect hazards
  input  logic  BPWrongE, CSRWriteFenceM, RetM, TrapM,   
--- a/src/mdu/div.sv
+++ b/src/mdu/div.sv
@ -26,9 +26,7 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////

-`include "wally-config.vh"
-
-module div(
+module div import cvw::*;  #(parameter cvw_t P) (
  input  logic             clk,
  input  logic             reset,
  input  logic             StallM,
@ -36,26 +34,26 @@ module div(
  input  logic             IntDivE,                       // integer division/remainder instruction of any type
  input  logic             DivSignedE,                    // signed division 
  input  logic             W64E,                          // W-type instructions (divw, divuw, remw, remuw)
-  input  logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE,// Forwarding mux outputs for Source A and B
+  input  logic [P.XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE,// Forwarding mux outputs for Source A and B
  output logic             DivBusyE,                      // Divide is busy - stall pipeline
-  output logic [`XLEN-1:0] QuotM, RemM                    // Quotient and remainder outputs
+  output logic [P.XLEN-1:0] QuotM, RemM                    // Quotient and remainder outputs
 );

-  localparam STEPBITS = $clog2(`XLEN/`IDIV_BITSPERCYCLE); // Number of steps
+  localparam STEPBITS = $clog2(P.XLEN/P.IDIV_BITSPERCYCLE); // Number of steps

  typedef enum logic [1:0] {IDLE, BUSY, DONE} statetype;  // division FSM state
  statetype state;

-  logic [`XLEN-1:0]   W[`IDIV_BITSPERCYCLE:0];            // Residual for each of k steps
-  logic [`XLEN-1:0]   XQ[`IDIV_BITSPERCYCLE:0];           // dividend/quotient for each of k steps
-  logic [`XLEN-1:0]   WNext, XQNext;                      // initialized W and XQ going into registers
-  logic [`XLEN-1:0]   DinE, XinE;                         // divisor & dividend, possibly truncated to 32 bits
-  logic [`XLEN-1:0]   DnE;                                // DnE = ~DinE
-  logic [`XLEN-1:0]   DAbsBE;                             // absolute value of D
-  logic [`XLEN-1:0]   DAbsB;                              // registered absolute value of D, constant during division
-  logic [`XLEN-1:0]   XnE;                                // DXnE = ~XinE
-  logic [`XLEN-1:0]   XInitE;                             // |X|, or original X for divide by 0
-  logic [`XLEN-1:0]   WnM, XQnM;                          // negated residual W and quotient XQ for postprocessing sign correction
+  logic [P.XLEN-1:0]   W[P.IDIV_BITSPERCYCLE:0];            // Residual for each of k steps
+  logic [P.XLEN-1:0]   XQ[P.IDIV_BITSPERCYCLE:0];           // dividend/quotient for each of k steps
+  logic [P.XLEN-1:0]   WNext, XQNext;                      // initialized W and XQ going into registers
+  logic [P.XLEN-1:0]   DinE, XinE;                         // divisor & dividend, possibly truncated to 32 bits
+  logic [P.XLEN-1:0]   DnE;                                // DnE = ~DinE
+  logic [P.XLEN-1:0]   DAbsBE;                             // absolute value of D
+  logic [P.XLEN-1:0]   DAbsB;                              // registered absolute value of D, constant during division
+  logic [P.XLEN-1:0]   XnE;                                // DXnE = ~XinE
+  logic [P.XLEN-1:0]   XInitE;                             // |X|, or original X for divide by 0
+  logic [P.XLEN-1:0]   WnM, XQnM;                          // negated residual W and quotient XQ for postprocessing sign correction
  logic [STEPBITS:0]  step;                               // division step
  logic               Div0E, Div0M;                       // divide by 0
  logic               DivStartE;                          // start integer division
@ -71,42 +69,42 @@ module div(
  assign DivBusyE = (state == BUSY) | DivStartE;

  // Handle sign extension for W-type instructions
-  if (`XLEN == 64) begin:rv64 // RV64 has W-type instructions
-    mux2 #(`XLEN) xinmux(ForwardedSrcAE, {ForwardedSrcAE[31:0], 32'b0}, W64E, XinE);
-    mux2 #(`XLEN) dinmux(ForwardedSrcBE, {{32{ForwardedSrcBE[31]&DivSignedE}}, ForwardedSrcBE[31:0]}, W64E, DinE);
+  if (P.XLEN == 64) begin:rv64 // RV64 has W-type instructions
+    mux2 #(P.XLEN) xinmux(ForwardedSrcAE, {ForwardedSrcAE[31:0], 32'b0}, W64E, XinE);
+    mux2 #(P.XLEN) dinmux(ForwardedSrcBE, {{32{ForwardedSrcBE[31]&DivSignedE}}, ForwardedSrcBE[31:0]}, W64E, DinE);
  end else begin // RV32 has no W-type instructions
    assign XinE = ForwardedSrcAE;
    assign DinE = ForwardedSrcBE;      
    end   

  // Extract sign bits and check fo division by zero
-  assign SignDE = DivSignedE & DinE[`XLEN-1]; 
-  assign SignXE = DivSignedE & XinE[`XLEN-1];
+  assign SignDE = DivSignedE & DinE[P.XLEN-1]; 
+  assign SignXE = DivSignedE & XinE[P.XLEN-1];
  assign NegQE = SignDE ^ SignXE;
  assign Div0E = (DinE == 0);

  // Take absolute value for signed operations, and negate D to handle subtraction in divider stages
-  neg #(`XLEN) negd(DinE, DnE);
-  mux2 #(`XLEN) dabsmux(DnE, DinE, SignDE, DAbsBE);  // take absolute value for signed operations, and negate for subtraction setp
-  neg #(`XLEN) negx(XinE, XnE);
-  mux3 #(`XLEN) xabsmux(XinE, XnE, ForwardedSrcAE, {Div0E, SignXE}, XInitE);  // take absolute value for signed operations, or keep original value for divide by 0
+  neg #(P.XLEN) negd(DinE, DnE);
+  mux2 #(P.XLEN) dabsmux(DnE, DinE, SignDE, DAbsBE);  // take absolute value for signed operations, and negate for subtraction setp
+  neg #(P.XLEN) negx(XinE, XnE);
+  mux3 #(P.XLEN) xabsmux(XinE, XnE, ForwardedSrcAE, {Div0E, SignXE}, XInitE);  // take absolute value for signed operations, or keep original value for divide by 0

  //////////////////////////////
  // Division Iterations (effectively stalled execute stage, no suffix)
  //////////////////////////////

  // initialization multiplexers on first cycle of operation
-  mux2 #(`XLEN) wmux(W[`IDIV_BITSPERCYCLE], {`XLEN{1'b0}}, DivStartE, WNext);
-  mux2 #(`XLEN) xmux(XQ[`IDIV_BITSPERCYCLE], XInitE, DivStartE, XQNext);
+  mux2 #(P.XLEN) wmux(W[P.IDIV_BITSPERCYCLE], {P.XLEN{1'b0}}, DivStartE, WNext);
+  mux2 #(P.XLEN) xmux(XQ[P.IDIV_BITSPERCYCLE], XInitE, DivStartE, XQNext);

  // registers before division steps
-  flopen #(`XLEN) wreg(clk, DivBusyE, WNext, W[0]); 
-  flopen #(`XLEN) xreg(clk, DivBusyE, XQNext, XQ[0]);
-  flopen #(`XLEN) dabsreg(clk, DivStartE, DAbsBE, DAbsB);
+  flopen #(P.XLEN) wreg(clk, DivBusyE, WNext, W[0]); 
+  flopen #(P.XLEN) xreg(clk, DivBusyE, XQNext, XQ[0]);
+  flopen #(P.XLEN) dabsreg(clk, DivStartE, DAbsBE, DAbsB);

  // one copy of divstep for each bit produced per cycle
  genvar i;
-  for (i=0; i<`IDIV_BITSPERCYCLE; i = i+1)
+  for (i=0; i<P.IDIV_BITSPERCYCLE; i = i+1)
    divstep divstep(W[i], XQ[i], DAbsB, W[i+1], XQ[i+1]);

  //////////////////////////////
@ -116,11 +114,11 @@ module div(
  flopen #(3) Div0eMReg(clk, DivStartE, {Div0E, NegQE, SignXE}, {Div0M, NegQM, NegWM});
  
  // On final setp of signed operations, negate outputs as needed to get correct sign
-  neg #(`XLEN) qneg(XQ[0], XQnM);
-  neg #(`XLEN) wneg(W[0], WnM);
+  neg #(P.XLEN) qneg(XQ[0], XQnM);
+  neg #(P.XLEN) wneg(W[0], WnM);
  // Select appropriate output: normal, negated, or for divide by zero
-  mux3 #(`XLEN) qmux(XQ[0], XQnM, {`XLEN{1'b1}}, {Div0M, NegQM}, QuotM); // Q taken from XQ register, negated if necessary, or all 1s when dividing by zero
-  mux3 #(`XLEN) remmux(W[0], WnM, XQ[0], {Div0M, NegWM}, RemM); // REM taken from W register, negated if necessary, or from X when dividing by zero
+  mux3 #(P.XLEN) qmux(XQ[0], XQnM, {P.XLEN{1'b1}}, {Div0M, NegQM}, QuotM); // Q taken from XQ register, negated if necessary, or all 1s when dividing by zero
+  mux3 #(P.XLEN) remmux(W[0], WnM, XQ[0], {Div0M, NegWM}, RemM); // REM taken from W register, negated if necessary, or from X when dividing by zero

  //////////////////////////////
  // Divider FSM to sequence Busy and Done
@ -134,7 +132,7 @@ module div(
        if (Div0E) state <= DONE;
        else       state <= BUSY;
     end else if (state == BUSY) begin // pause one cycle at beginning of signed operations for absolute value
-        if (step[STEPBITS] | (`XLEN==64) & W64E & step[STEPBITS-1]) begin // complete in half the time for W-type instructions
+        if (step[STEPBITS] | (P.XLEN==64) & W64E & step[STEPBITS-1]) begin // complete in half the time for W-type instructions
            state <= DONE;
        end
        step <= step + 1;
--- a/src/mdu/mdu.sv
+++ b/src/mdu/mdu.sv
@ -26,38 +26,36 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////

-`include "wally-config.vh"
-
-module mdu(
+module mdu import cvw::*;  #(parameter cvw_t P) (
  input  logic             clk, reset,
  input  logic             StallM, StallW, 
  input  logic             FlushE, FlushM, FlushW,
-  input  logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // inputs A and B from IEU forwarding mux output
+  input  logic [P.XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // inputs A and B from IEU forwarding mux output
  input  logic [2:0]       Funct3E, Funct3M,               // type of MDU operation
  input  logic             IntDivE, W64E,                  // Integer division/remainder, and W-type instrutions
-  output logic [`XLEN-1:0] MDUResultW,                     // multiply/divide result
+  output logic [P.XLEN-1:0] MDUResultW,                     // multiply/divide result
  output logic             DivBusyE                        // busy signal to stall pipeline in Execute stage
 );

-  logic [`XLEN*2-1:0]      ProdM;                          // double-width product from mul
-  logic [`XLEN-1:0]        QuotM, RemM;                    // quotient and remainder from intdivrestoring
-  logic [`XLEN-1:0]        PrelimResultM;                  // selected result before W truncation
-  logic [`XLEN-1:0]        MDUResultM;                     // result after W truncation
+  logic [P.XLEN*2-1:0]      ProdM;                          // double-width product from mul
+  logic [P.XLEN-1:0]        QuotM, RemM;                    // quotient and remainder from intdivrestoring
+  logic [P.XLEN-1:0]        PrelimResultM;                  // selected result before W truncation
+  logic [P.XLEN-1:0]        MDUResultM;                     // result after W truncation
  logic                    W64M;                           // W-type instruction

  // Multiplier
-  mul mul(.clk, .reset, .StallM, .FlushM, .ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .ProdM);
+  mul #(P.XLEN) mul(.clk, .reset, .StallM, .FlushM, .ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .ProdM);

  // Divider
  // Start a divide when a new division instruction is received and the divider isn't already busy or finishing
  // When IDIV_ON_FPU is set, use the FPU divider instead
  // In ZMMUL, with M_SUPPORTED = 0, omit the divider
-  if ((`IDIV_ON_FPU) || (!`M_SUPPORTED)) begin:nodiv  
+  if ((P.IDIV_ON_FPU) || (!P.M_SUPPORTED)) begin:nodiv  
    assign QuotM = 0;
    assign RemM = 0;
    assign DivBusyE = 0;
  end else begin:div
-    div div(.clk, .reset, .StallM, .FlushE, .DivSignedE(~Funct3E[0]), .W64E, .IntDivE, 
+    div #(P) div(.clk, .reset, .StallM, .FlushE, .DivSignedE(~Funct3E[0]), .W64E, .IntDivE, 
        .ForwardedSrcAE, .ForwardedSrcBE, .DivBusyE, .QuotM, .RemM);
  end
    
@ -65,10 +63,10 @@ module mdu(
  // For ZMMUL, QuotM and RemM are tied to 0, so the mux automatically simplifies
  always_comb
    case (Funct3M)     
-      3'b000: PrelimResultM = ProdM[`XLEN-1:0];          // mul
-      3'b001: PrelimResultM = ProdM[`XLEN*2-1:`XLEN];    // mulh
-      3'b010: PrelimResultM = ProdM[`XLEN*2-1:`XLEN];    // mulhsu
-      3'b011: PrelimResultM = ProdM[`XLEN*2-1:`XLEN];    // mulhu
+      3'b000: PrelimResultM = ProdM[P.XLEN-1:0];          // mul
+      3'b001: PrelimResultM = ProdM[P.XLEN*2-1:P.XLEN];    // mulh
+      3'b010: PrelimResultM = ProdM[P.XLEN*2-1:P.XLEN];    // mulhsu
+      3'b011: PrelimResultM = ProdM[P.XLEN*2-1:P.XLEN];    // mulhu
      3'b100: PrelimResultM = QuotM;                     // div
      3'b101: PrelimResultM = QuotM;                     // divu
      3'b110: PrelimResultM = RemM;                      // rem
@ -77,14 +75,14 @@ module mdu(

  // Handle sign extension for W-type instructions
  flopenrc #(1) W64MReg(clk, reset, FlushM, ~StallM, W64E, W64M);
-  if (`XLEN == 64) begin:resmux // RV64 has W-type instructions
+  if (P.XLEN == 64) begin:resmux // RV64 has W-type instructions
    assign MDUResultM = W64M ? {{32{PrelimResultM[31]}}, PrelimResultM[31:0]} : PrelimResultM;
  end else begin:resmux // RV32 has no W-type instructions
    assign MDUResultM = PrelimResultM;
  end

  // Writeback stage pipeline register
-  flopenrc #(`XLEN) MDUResultWReg(clk, reset, FlushW, ~StallW, MDUResultM, MDUResultW);   
+  flopenrc #(P.XLEN) MDUResultWReg(clk, reset, FlushW, ~StallW, MDUResultM, MDUResultW);   
 endmodule // mdu


--- a/src/mdu/mul.sv
+++ b/src/mdu/mul.sv
@ -26,14 +26,12 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////

-`include "wally-config.vh"
-
-module mul(
+module mul #(parameter XLEN) (
  input  logic                clk, reset,
  input  logic                StallM, FlushM,
-  input  logic [`XLEN-1:0]    ForwardedSrcAE, ForwardedSrcBE, // source A and B from after Forwarding mux
+  input  logic [XLEN-1:0]    ForwardedSrcAE, ForwardedSrcBE, // source A and B from after Forwarding mux
  input  logic [2:0]          Funct3E,                        // type of multiply
-  output logic [`XLEN*2-1:0]  ProdM                           // double-widthproduct
+  output logic [XLEN*2-1:0]  ProdM                           // double-widthproduct
 );

  // Number systems
@ -50,44 +48,44 @@ module mul(
  // Signed * Unsigned   = P' + ( PA - PB)*2^(XLEN-1) - PP*2^(2XLEN-2)
  // Unsigned * Unsigned = P' + ( PA + PB)*2^(XLEN-1) + PP*2^(2XLEN-2)

-  logic [`XLEN-1:0]   Aprime, Bprime;                       // lower bits of source A and B
+  logic [XLEN-1:0]   Aprime, Bprime;                       // lower bits of source A and B
  logic               MULH, MULHSU;                         // type of multiply
-  logic [`XLEN-2:0]   PA, PB;                               // product of msb and lsbs
+  logic [XLEN-2:0]   PA, PB;                               // product of msb and lsbs
  logic               PP;                                   // product of msbs
-  logic [`XLEN*2-1:0] PP1E, PP2E, PP3E, PP4E;               // partial products
-  logic [`XLEN*2-1:0] PP1M, PP2M, PP3M, PP4M;               // registered partial proudcts
+  logic [XLEN*2-1:0] PP1E, PP2E, PP3E, PP4E;               // partial products
+  logic [XLEN*2-1:0] PP1M, PP2M, PP3M, PP4M;               // registered partial proudcts
 
  //////////////////////////////
  // Execute Stage: Compute partial products
  //////////////////////////////

-  assign Aprime = {1'b0, ForwardedSrcAE[`XLEN-2:0]};
-  assign Bprime = {1'b0, ForwardedSrcBE[`XLEN-2:0]};
+  assign Aprime = {1'b0, ForwardedSrcAE[XLEN-2:0]};
+  assign Bprime = {1'b0, ForwardedSrcBE[XLEN-2:0]};
  assign PP1E = Aprime * Bprime;
-  assign PA = {(`XLEN-1){ForwardedSrcAE[`XLEN-1]}} & ForwardedSrcBE[`XLEN-2:0];  
-  assign PB = {(`XLEN-1){ForwardedSrcBE[`XLEN-1]}} & ForwardedSrcAE[`XLEN-2:0];
-  assign PP = ForwardedSrcAE[`XLEN-1] & ForwardedSrcBE[`XLEN-1];
+  assign PA = {(XLEN-1){ForwardedSrcAE[XLEN-1]}} & ForwardedSrcBE[XLEN-2:0];  
+  assign PB = {(XLEN-1){ForwardedSrcBE[XLEN-1]}} & ForwardedSrcAE[XLEN-2:0];
+  assign PP = ForwardedSrcAE[XLEN-1] & ForwardedSrcBE[XLEN-1];

  // flavor of multiplication
  assign MULH   = (Funct3E == 3'b001);
  assign MULHSU = (Funct3E == 3'b010);

  // Select partial products, handling signed multiplication
-  assign PP2E = {2'b00, (MULH | MULHSU) ? ~PA : PA, {(`XLEN-1){1'b0}}};
-  assign PP3E = {2'b00, (MULH) ? ~PB : PB, {(`XLEN-1){1'b0}}};
+  assign PP2E = {2'b00, (MULH | MULHSU) ? ~PA : PA, {(XLEN-1){1'b0}}};
+  assign PP3E = {2'b00, (MULH) ? ~PB : PB, {(XLEN-1){1'b0}}};
  always_comb 
-  if (MULH)        PP4E = {1'b1, PP, {(`XLEN-3){1'b0}}, 1'b1, {(`XLEN){1'b0}}}; 
-  else if (MULHSU) PP4E = {1'b1, ~PP, {(`XLEN-2){1'b0}}, 1'b1, {(`XLEN-1){1'b0}}};
-  else             PP4E = {1'b0, PP, {(`XLEN*2-2){1'b0}}};
+  if (MULH)        PP4E = {1'b1, PP, {(XLEN-3){1'b0}}, 1'b1, {(XLEN){1'b0}}}; 
+  else if (MULHSU) PP4E = {1'b1, ~PP, {(XLEN-2){1'b0}}, 1'b1, {(XLEN-1){1'b0}}};
+  else             PP4E = {1'b0, PP, {(XLEN*2-2){1'b0}}};

  //////////////////////////////
  // Memory Stage: Sum partial proudcts
  //////////////////////////////

-  flopenrc #(`XLEN*2) PP1Reg(clk, reset, FlushM, ~StallM, PP1E, PP1M); 
-  flopenrc #(`XLEN*2) PP2Reg(clk, reset, FlushM, ~StallM, PP2E, PP2M); 
-  flopenrc #(`XLEN*2) PP3Reg(clk, reset, FlushM, ~StallM, PP3E, PP3M); 
-  flopenrc #(`XLEN*2) PP4Reg(clk, reset, FlushM, ~StallM, PP4E, PP4M); 
+  flopenrc #(XLEN*2) PP1Reg(clk, reset, FlushM, ~StallM, PP1E, PP1M); 
+  flopenrc #(XLEN*2) PP2Reg(clk, reset, FlushM, ~StallM, PP2E, PP2M); 
+  flopenrc #(XLEN*2) PP3Reg(clk, reset, FlushM, ~StallM, PP3E, PP3M); 
+  flopenrc #(XLEN*2) PP4Reg(clk, reset, FlushM, ~StallM, PP4E, PP4M); 

  // add up partial products; this multi-input add implies CSAs and a final CPA
  assign ProdM = PP1M + PP2M + PP3M + PP4M; //ForwardedSrcAE * ForwardedSrcBE;
--- a/src/wally/wallypipelinedcore.sv
+++ b/src/wally/wallypipelinedcore.sv
@ -304,7 +304,7 @@ module wallypipelinedcore import cvw::*;  #(parameter cvw_t P) (

  // multiply/divide unit
  if (P.M_SUPPORTED | P.ZMMUL_SUPPORTED) begin:mdu
-    mdu mdu(.clk, .reset, .StallM, .StallW, .FlushE, .FlushM, .FlushW,
+    mdu #(P) mdu(.clk, .reset, .StallM, .StallW, .FlushE, .FlushM, .FlushW,
      .ForwardedSrcAE, .ForwardedSrcBE, 
      .Funct3E, .Funct3M, .IntDivE, .W64E,
      .MDUResultW, .DivBusyE);