Merge branch 'main' of https://github.com/davidharrishmc/riscv-wally into main

2022-12-30 09:56:35 -06:00 · 2022-12-30 09:56:35 -06:00 · 5844a596a3
commit 5844a596a3
parent 90eb4fc1f1 58218dbdd1
12 changed files with 108 additions and 129 deletions
--- a/pipelined/config/rv64gc/wally-config.vh
+++ b/pipelined/config/rv64gc/wally-config.vh
@ -139,7 +139,7 @@
 `define PLIC_GPIO_ID 3
 `define PLIC_UART_ID 10

-`define BPRED_ENABLED 0
+`define BPRED_ENABLED 1
 `define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE
 `define TESTSBP 0
 `define BPRED_SIZE 10
--- a/pipelined/config/shared/wally-shared.vh
+++ b/pipelined/config/shared/wally-shared.vh
@ -109,7 +109,7 @@
 `define CORRSHIFTSZ ((`DIVRESLEN+`NF) > (3*`NF+7) ? (`DIVRESLEN+`NF) : (3*`NF+5))//change

 // division constants
-`define RADIX 32'h2
+`define RADIX 32'h4
 `define DIVCOPIES 32'h4
 `define DIVLEN ((`NF < `XLEN) ? (`XLEN) : `NF+3)
 // `define DIVN (`NF < `XLEN ? `XLEN : `NF+1) // length of input
--- a/pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv
+++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv
@ -69,7 +69,7 @@ module fdivsqrtfsm(
    assign ISpecialCaseE = AZeroE | BZeroE; // *** why is AZeroE part of this.  Should other special cases be considered?
    assign SpecialCaseE  = MDUE ? ISpecialCaseE : FSpecialCaseE;
  end else assign SpecialCaseE = FSpecialCaseE;
-  flopenr #(1) SpecialCaseReg(clk, reset, ~StallM, SpecialCaseE, SpecialCaseM); // save SpecialCase for checking in fdivsqrtpostproc
+  flopenr #(1) SpecialCaseReg(clk, reset, IFDivStartE, SpecialCaseE, SpecialCaseM); // save SpecialCase for checking in fdivsqrtpostproc

 // DIVN = `NF+3
 // NS = NF + 1
--- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv
+++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv
@ -52,9 +52,6 @@ module fdivsqrtpostproc(
  logic [`DIVb:0] PreQmM;
  logic NegStickyM;
  logic weq0E, weq0M, WZeroM;
-  logic [`DIVBLEN:0] NormShiftM;
-  logic [`DIVb:0] NormQuotM;
-  logic [`DIVb+3:0] IntQuotM, IntRemM, NormRemM;
  logic signed [`DIVb+3:0] PreResultM, PreFPIntDivResultM;
  logic [`XLEN-1:0] SpecialFPIntDivResultM;

@ -104,33 +101,26 @@ module fdivsqrtpostproc(
  assign QmM = SqrtM ? (PreQmM << 1) : PreQmM;

  if (`IDIV_ON_FPU) begin
+    logic [`DIVBLEN:0] NormShiftM;
+    logic [`DIVb+3:0] IntQuotM, IntRemM, NormRemM, NormRemDM;
+
    assign W = $signed(Sum) >>> `LOGR;
    assign DM = {4'b0001, D};

-    // Integer division: sign handling for div and rem
-    always_comb 
-      if (~AsM)
-        if (NegStickyM) begin
-          NormQuotM = FirstUM;
-          NormRemM  = W + DM;
-        end else begin
-          NormQuotM = FirstU;
-          NormRemM  = W;
-        end
-      else 
-        if (NegStickyM) begin
-          NormQuotM = FirstUM;
-          NormRemM  = -(W + DM);
-        end else begin 
-          NormQuotM = FirstU;
-          NormRemM  = -W;
-        end
+    // Integer remainder: sticky and sign correction muxes
+    mux2 #(`DIVb+4) normremdmux(W, W+DM, NegStickyM, NormRemDM);
+    mux2 #(`DIVb+4) normremsmux(NormRemDM, -NormRemDM, AsM, NormRemM);

-    // Integer division: Special cases
+    // special case logic
    always_comb
-      if (ALTBM) begin
-        IntQuotM = '0;
-        IntRemM  = {{(`DIVb-`XLEN+4){1'b0}}, AM};
+      if (BZeroM) begin 
+        if (RemOpM) SpecialFPIntDivResultM = AM;
+        else        SpecialFPIntDivResultM = {(`XLEN){1'b1}};
+      end else if (ALTBM) begin
+        if (RemOpM) SpecialFPIntDivResultM = AM;
+        else        SpecialFPIntDivResultM = '0;
+ //       IntQuotM = '0;
+ //       IntRemM  = {{(`DIVb-`XLEN+4){1'b0}}, AM};
      end else begin
        logic [`DIVb+3:0] PreIntQuotM;
        if (WZeroM) begin
@ -142,36 +132,28 @@ module fdivsqrtpostproc(
            IntRemM  = '0;
          end 
        end else begin 
-          PreIntQuotM = {3'b000, NormQuotM};
+          PreIntQuotM = {3'b000, PreQmM};
          IntRemM  = NormRemM;
        end 
        // flip sign if necessary
        if (NegQuotM) IntQuotM = -PreIntQuotM;
        else          IntQuotM =  PreIntQuotM;
-      end
-    
-    always_comb
-      if (RemOpM) begin
-        NormShiftM = ALTBM ? '0 : (mM + (`DIVBLEN+1)'(`DIVa)); // no postshift if forwarding input A to remainder
-        PreResultM = IntRemM;
-      end else begin
-        NormShiftM = ((`DIVBLEN+1)'(`DIVb) - (nM * (`DIVBLEN+1)'(`LOGR)));
-        PreResultM = IntQuotM;
-        /*
-        if (~ALTBM & NegQuotM) begin
-          PreResultM = {3'b111, -IntQuotM};
+        if (RemOpM) begin
+          NormShiftM = ALTBM ? '0 : (mM + (`DIVBLEN+1)'(`DIVa)); // no postshift if forwarding input A to remainder
+          PreResultM = IntRemM;
        end else begin
-          PreResultM = {3'b000, IntQuotM};
-        end*/
-        //PreResultM = {IntQuotM[`DIVb], IntQuotM[`DIVb], IntQuotM[`DIVb], IntQuotM}; // Suspicious Sign Extender
+          NormShiftM = ((`DIVBLEN+1)'(`DIVb) - (nM * (`DIVBLEN+1)'(`LOGR)));
+          PreResultM = IntQuotM;
+        end
+        PreFPIntDivResultM = $signed(PreResultM >>> NormShiftM);
+        SpecialFPIntDivResultM = PreFPIntDivResultM[`XLEN-1:0];
      end
-    

-    // division takes the result from the next cycle, which is shifted to the left one more time so the square root also needs to be shifted
-    
-    assign PreFPIntDivResultM = $signed(PreResultM >>> NormShiftM);
-    assign SpecialFPIntDivResultM = BZeroM ? (RemOpM ? AM : {(`XLEN){1'b1}}) : PreFPIntDivResultM[`XLEN-1:0]; // special cases
-    // *** conditional on RV64
-    assign FPIntDivResultM = (W64M ? {{(`XLEN-32){SpecialFPIntDivResultM[31]}}, SpecialFPIntDivResultM[31:0]} : SpecialFPIntDivResultM[`XLEN-1:0]); // Sign extending in case of W64
+    // sign extend result for W64
+    if (`XLEN==64)
+      assign FPIntDivResultM = (W64M ? {{(`XLEN-32){SpecialFPIntDivResultM[31]}}, SpecialFPIntDivResultM[31:0]} : 
+                                       SpecialFPIntDivResultM[`XLEN-1:0]); // Sign extending in case of W64
+    else
+      assign FPIntDivResultM = SpecialFPIntDivResultM[`XLEN-1:0];
  end
 endmodule
--- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv
+++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv
@ -51,26 +51,23 @@ module fdivsqrtpreproc (
 );

  logic  [`DIVb-1:0] XPreproc;
-  logic  [`DIVb:0] SqrtX;
-  logic  [`DIVb+3:0] DivX;
+  logic  [`DIVb:0] PreSqrtX;
+  logic  [`DIVb+3:0] DivX, SqrtX;
  logic  [`NE+1:0] QeE;
-  // Intdiv signals
  logic  [`DIVb-1:0] IFNormLenX, IFNormLenD;
-  logic  [`DIVBLEN:0] mE;
-  logic  [`DIVBLEN:0] ZeroDiff, IntBits, RightShiftX;
-  logic  [`DIVBLEN:0] pPlusr, pPrCeil, p, ell;
-  logic  [`LOGRK:0] pPrTrunc;
+  logic  [`DIVBLEN:0] mE, ell;
  logic  [`DIVb+3:0]  PreShiftX;
  logic  NumZeroE;

-  // ***can probably merge X LZC with conversion
-  // cout the number of leading zeros
-
  if (`IDIV_ON_FPU) begin
    logic signedDiv;
    logic  AsE, BsE, ALTBE, NegQuotE;
    logic  [`XLEN-1:0]  AE, BE;
    logic  [`XLEN-1:0] PosA, PosB;
+    logic  [`DIVBLEN:0] ZeroDiff, IntBits;
+    logic  [`LOGRK-1:0] RightShiftX;
+    logic  [`DIVBLEN:0] pPlusr, pPrCeil, p;
+    logic  [`LOGRK-1:0] pPrTrunc;

    // Extract inputs, signs, zero, depending on W64 mode if applicable
    assign signedDiv = ~Funct3E[0];
@ -107,13 +104,13 @@ module fdivsqrtpreproc (
    assign p = ALTBE ? '0 : ZeroDiff;

  /* verilator lint_off WIDTH */
-    // right shift amount to complete in discrete number of steps
-    assign pPlusr = (`DIVBLEN)'(`LOGR) + p;
+    // calculate number of cycles nE right shift amount RightShiftX to complete in discrete number of steps
+    assign pPlusr = `LOGR + p;
    assign pPrTrunc = pPlusr % `RK;
-    assign pPrCeil = (pPlusr >> `LOGRK) + {{`DIVBLEN{1'b0}}, |(pPrTrunc)};
-    assign nE = (pPrCeil * (`DIVBLEN+1)'(`DIVCOPIES)) - {{(`DIVBLEN){1'b0}}, 1'b1};
-    assign IntBits = (`DIVBLEN)'(`LOGR) + p - {{(`DIVBLEN){1'b0}}, 1'b1};
-    assign RightShiftX = ((`DIVBLEN)'(`RK) - 1) - (IntBits % `RK);
+    assign pPrCeil = (pPlusr >> `LOGRK) + |pPrTrunc;
+    assign nE = (pPrCeil * `DIVCOPIES) - 1;
+    assign IntBits = `LOGR + p - 1;
+    assign RightShiftX = `RK - 1 - IntBits % `RK;
  /* verilator lint_on WIDTH */

    // Selet integer or floating-point operands
@ -148,16 +145,16 @@ module fdivsqrtpreproc (
  assign DPreproc = IFNormLenD << (mE + {{`DIVBLEN{1'b0}}, 1'b1}); 

  //  append leading 1 (for nonzero inputs) and zero-extend
-  assign SqrtX = (Xe[0]^ell[0]) ? {1'b0, ~NumZeroE, XPreproc[`DIVb-1:1]} : {~NumZeroE, XPreproc}; // Bottom bit of XPreproc is always zero because DIVb is larger than XLEN and NF
+  // *** explain this next line
+  assign PreSqrtX = (Xe[0]^ell[0]) ? {1'b0, ~NumZeroE, XPreproc[`DIVb-1:1]} : {~NumZeroE, XPreproc}; // Bottom bit of XPreproc is always zero because DIVb is larger than XLEN and NF
  assign DivX = {3'b000, ~NumZeroE, XPreproc};
-
-  // *** explain why X is shifted between radices (initial assignment of WS=RX)
-  if (`RADIX == 2)  assign PreShiftX = Sqrt ? {3'b111, SqrtX} : DivX;
-  else              assign PreShiftX = Sqrt ? {2'b11, SqrtX, 1'b0} : DivX;
-
+  // Sqrt is initialized after a first step of R(X-1), which depends on Radix
+  if (`RADIX == 2)  assign SqrtX = {3'b111, PreSqrtX};
+  else              assign SqrtX = {2'b11, PreSqrtX, 1'b0};
+  assign PreShiftX = Sqrt ? SqrtX : DivX;
+ 
  // Floating-point exponent
  fdivsqrtexpcalc expcalc(.Fmt, .Xe, .Ye, .Sqrt, .XZero(XZeroE), .ell, .m(mE), .Qe(QeE));
-
-  flopen #(`NE+2)    expreg(clk, IFDivStartE, QeE, QeM);
+  flopen #(`NE+2) expreg(clk, IFDivStartE, QeE, QeM);
 endmodule

--- a/pipelined/src/generic/mem/ram2p1r1wb.sv
+++ b/pipelined/src/generic/mem/ram2p1r1wb.sv
@ -49,21 +49,21 @@ module ram2p1r1wb
   input logic              reset,
  
   // port 1 is read only
-   input logic [DEPTH-1:0]  RA1,
-   output logic [WIDTH-1:0] RD1,
-   input logic              REN1,
+   input logic [DEPTH-1:0]  ra1,
+   output logic [WIDTH-1:0] rd1,
+   input logic              ren1,
  
   // port 2 is write only
-   input logic [DEPTH-1:0]  WA1,
-   input logic [WIDTH-1:0]  WD1,
-   input logic              WEN1,
-   input logic [WIDTH-1:0]  BitWEN1
+   input logic [DEPTH-1:0]  wa2,
+   input logic [WIDTH-1:0]  wd2,
+   input logic              wen2,
+   input logic [WIDTH-1:0]  bwe2
 );
  

-  logic [DEPTH-1:0]         RA1Q, WA1Q;
-  logic                     WEN1Q;
-  logic [WIDTH-1:0]         WD1Q;
+  logic [DEPTH-1:0]         ra1q, wa2q;
+  logic                     wen2q;
+  logic [WIDTH-1:0]         wd2q;

  logic [WIDTH-1:0]         mem[2**DEPTH-1:0];
  logic [WIDTH-1:0]         bwe;
@ -76,18 +76,18 @@ module ram2p1r1wb
  //  prefer not to have two-cycle write latency
  //  will require branch predictor changes
  
-  flopenr #(DEPTH) RA1Reg(clk, reset, REN1, RA1, RA1Q);
-  flopenr #(DEPTH) WA1Reg(clk, reset, REN1, WA1, WA1Q);
-  flopr   #(1)     WEN1Reg(clk, reset, WEN1, WEN1Q);
-  flopenr #(WIDTH) WD1Reg(clk, reset, REN1, WD1, WD1Q);
+  flopenr #(DEPTH) ra1Reg(clk, reset, ren1, ra1, ra1q);
+  flopenr #(DEPTH) wa2Reg(clk, reset, ren1, wa2, wa2q);
+  flopr   #(1)     wen2Reg(clk, reset, wen2, wen2q);
+  flopenr #(WIDTH) wd2Reg(clk, reset, ren1, wd2, wd2q);

  // read port
-  assign RD1 = mem[RA1Q];
+  assign rd1 = mem[ra1q];
  
  // write port
-  assign bwe = {WIDTH{WEN1Q}} & BitWEN1;
+  assign bwe = {WIDTH{wen2q}} & bwe2;
  always_ff @(posedge clk)
-    mem[WA1Q] <= WD1Q & bwe | mem[WA1Q] & ~bwe;
+    mem[wa2q] <= wd2q & bwe | mem[wa2q] & ~bwe;
 
 endmodule  

--- a/pipelined/src/ifu/BTBPredictor.sv
+++ b/pipelined/src/ifu/BTBPredictor.sv
@ -105,13 +105,13 @@ module BTBPredictor
  // *** optimize for byte write enables
  ram2p1r1wb #(Depth, `XLEN+5) memory(.clk(clk),
          .reset(reset),
-          .RA1(LookUpPCIndex),
-          .RD1({{InstrClass, TargetPC}}),
-          .REN1(~StallF),
-          .WA1(UpdatePCIndex),
-          .WD1({UpdateInstrClass, UpdateTarget}),
-          .WEN1(UpdateEN),
-          .BitWEN1({5'h1F, {`XLEN{1'b1}}})); // *** definitely not right.
+          .ra1(LookUpPCIndex),
+          .rd1({{InstrClass, TargetPC}}),
+          .ren1(~StallF),
+          .wa2(UpdatePCIndex),
+          .wd2({UpdateInstrClass, UpdateTarget}),
+          .wen2(UpdateEN),
+          .bwe2({5'h1F, {`XLEN{1'b1}}})); // *** definitely not right.


 endmodule
--- a/pipelined/src/ifu/globalHistoryPredictor.sv
+++ b/pipelined/src/ifu/globalHistoryPredictor.sv
@ -116,12 +116,12 @@ module globalHistoryPredictor
  ram2p1r1wb #(k, 2) PHT(.clk(clk),
    .reset(reset),
    //.RA1(GHR[k-1:0]),
-    .RA1(GHRLookup),
-    .RD1(BPPredF),
-    .REN1(~StallF),
-    .WA1(PHTUpdateAdr),
-    .WD1(UpdateBPPredE),
-    .WEN1(PHTUpdateEN),
-    .BitWEN1(2'b11));
+    .ra1(GHRLookup),
+    .rd1(BPPredF),
+    .ren1(~StallF),
+    .wa2(PHTUpdateAdr),
+    .wd2(UpdateBPPredE),
+    .wen2(PHTUpdateEN),
+    .bwe2(2'b11));

 endmodule
--- a/pipelined/src/ifu/gsharePredictor.sv
+++ b/pipelined/src/ifu/gsharePredictor.sv
@ -113,12 +113,12 @@ module gsharePredictor
  ram2p1r1wb #(`BPRED_SIZE, 2) PHT(.clk(clk),
    .reset(reset),
    //.RA1(GHR[`BPRED_SIZE-1:0]),
-    .RA1(GHRLookup ^ PCNextF[`BPRED_SIZE:1]),
-    .RD1(BPPredF),
-    .REN1(~StallF),
-    .WA1(PHTUpdateAdr ^ PCE[`BPRED_SIZE:1]),
-    .WD1(UpdateBPPredE),
-    .WEN1(PHTUpdateEN),
-    .BitWEN1(2'b11));
+    .ra1(GHRLookup ^ PCNextF[`BPRED_SIZE:1]),
+    .rd1(BPPredF),
+    .ren1(~StallF),
+    .wa2(PHTUpdateAdr ^ PCE[`BPRED_SIZE:1]),
+    .wd2(UpdateBPPredE),
+    .wen2(PHTUpdateEN),
+    .bwe2(2'b11));

 endmodule // gsharePredictor
--- a/pipelined/src/ifu/localHistoryPredictor.sv
+++ b/pipelined/src/ifu/localHistoryPredictor.sv
@ -86,13 +86,13 @@ module localHistoryPredictor
  // LHRE refers to the address that the past k branches points to in the exectution stage
  ram2p1r1wb #(k, 2) PHT(.clk(clk), 
    .reset(reset),
-    .RA1(ForwardLHRNext),
-    .RD1(PredictionMemory),
-    .REN1(~StallF),
-    .WA1(LHRFNext),
-    .WD1(UpdatePrediction),
-    .WEN1(UpdateEN),
-    .BitWEN1(2'b11));
+    .ra1(ForwardLHRNext),
+    .rd1(PredictionMemory),
+    .ren1(~StallF),
+    .wa2(LHRFNext),
+    .wd2(UpdatePrediction),
+    .wen2(UpdateEN),
+    .bwe2(2'b11));


  
--- a/pipelined/src/ifu/twoBitPredictor.sv
+++ b/pipelined/src/ifu/twoBitPredictor.sv
@ -62,13 +62,13 @@ module twoBitPredictor

  ram2p1r1wb #(Depth, 2) PHT(.clk(clk),
    .reset(reset),
-    .RA1(LookUpPCIndex),
-    .RD1(PredictionMemory),
-    .REN1(~StallF),
-    .WA1(UpdatePCIndex),
-    .WD1(UpdatePrediction),
-    .WEN1(UpdateEN),
-    .BitWEN1(2'b11));
+    .ra1(LookUpPCIndex),
+    .rd1(PredictionMemory),
+    .ren1(~StallF),
+    .wa2(UpdatePCIndex),
+    .wd2(UpdatePrediction),
+    .wen2(UpdateEN),
+    .bwe2(2'b11));

  // need to forward when updating to the same address as reading.
  // first we compare to see if the update and lookup addreses are the same
--- a/pipelined/testbench/tests.vh
+++ b/pipelined/testbench/tests.vh
@ -1098,7 +1098,7 @@ string imperas32f[] = '{
    "rv64i_m/F/src/flw-align-01.S",
    "rv64i_m/F/src/fmadd_b1-01.S",
    "rv64i_m/F/src/fmadd_b14-01.S",
-    "rv64i_m/F/src/fmadd_b15-01.S",
+    //"rv64i_m/F/src/fmadd_b15-01.S",
    "rv64i_m/F/src/fmadd_b16-01.S",
    "rv64i_m/F/src/fmadd_b17-01.S",
    "rv64i_m/F/src/fmadd_b18-01.S",
@ -1473,7 +1473,7 @@ string imperas32f[] = '{
    "rv32i_m/F/src/fmin_b19-01.S",
    "rv32i_m/F/src/fmsub_b1-01.S",
    "rv32i_m/F/src/fmsub_b14-01.S",
-    "rv32i_m/F/src/fmsub_b15-01.S",
+    //"rv32i_m/F/src/fmsub_b15-01.S",
    "rv32i_m/F/src/fmsub_b16-01.S",
    "rv32i_m/F/src/fmsub_b17-01.S",
    "rv32i_m/F/src/fmsub_b18-01.S",