rv64f FLW passes imperas tests

2021-06-22 16:36:16 -04:00 · 2021-06-22 16:36:16 -04:00 · 353a27f12f
commit 353a27f12f
parent 7e06a3c04d
7 changed files with 32138 additions and 214774 deletions
--- a/wally-pipelined/src/fpu/FMA/tbgen/tb.sv
+++ b/wally-pipelined/src/fpu/FMA/tbgen/tb.sv
@ -110,7 +110,7 @@ always @(posedge clk)
 		if(ans >= 64'h7FF8000000000000 && ans <= 64'h7FFfffffffffffff ) $display( "ans=qutNaN ");
 		if(ans >= 64'hFFF8000000000000 && ans <= 64'hFFFfffffffffffff ) $display( "ans=qutNaN ");
        errors = errors + 1;
-	 // if (errors == 40)
+	  if (errors == 20)
 		$stop;
    end
    if((FmtE==1'b0)&(FmaFlagsM != flags[4:0] || (!wnan && (FmaResultM != ans)) || (wnan && ansnan && ~(((xnan && (FmaResultM[62:0] == {FInput1E[62:55],1'b1,FInput1E[53:0]})) || (ynan && (FmaResultM[62:0] == {FInput2E[62:55],1'b1,FInput2E[53:0]}))  || (znan && (FmaResultM[62:0] == {FInput3E[62:55],1'b1,FInput3E[53:0]})) || (FmaResultM[62:0] == ans[62:0]))) ))) begin
--- a/wally-pipelined/src/fpu/FMA/tbgen/tb.v
+++ b/wally-pipelined/src/fpu/FMA/tbgen/tb.v
--- a/wally-pipelined/src/fpu/FMA/tbgen/test_gen.sh
+++ b/wally-pipelined/src/fpu/FMA/tbgen/test_gen.sh
@ -1,3 +1,3 @@
-testfloat_gen f64_mulAdd -tininessbefore -n 6133248 -rmin  -seed 113355 -level 1 > testFloat
+testfloat_gen f64_mulAdd -tininessafter -n 6133248 -rmin  -seed 113355 -level 1 > testFloat
 tr -d ' ' < testFloat > testFloatNoSpace
--- a/wally-pipelined/src/fpu/fma2.sv
+++ b/wally-pipelined/src/fpu/fma2.sv
@ -288,9 +288,9 @@ module fma2(
 	// Set Underflow flag if the number is too small to be represented in normal numbers
 	//		- Don't set the underflow flag if the result is exact 
-	assign Underflow = (SumExp[12] | ((SumExp == 0) & (Round|Guard|Sticky))    )&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
+	assign Underflow = (SumExp[12] | ((SumExp == 0) & (Round|Guard|Sticky)))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
-	assign UnderflowFlag = Underflow | (FullResultExp == 0)&Minus1; // before rounding option
+	//assign UnderflowFlag = (Underflow | (FullResultExp == 0)&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM)&(Round|Guard|Sticky))  & ~(FullResultExp == 1);
-	// assign UnderflowFlag = (Underflow | (FullResultExp == 0)&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM)&(Round|Guard|Sticky))  & ~(FullResultExp == 1); //after rounding option
+	assign UnderflowFlag = (Underflow | (FullResultExp == 0)&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM)&(Round|Guard|Sticky))  & ~(FullResultExp == 1);
 	// Set Inexact flag if the result is diffrent from what would be outputed given infinite precision
 	//		- Don't set the underflow flag if an underflowed result isn't outputed
 	assign Inexact = (Sticky|Overflow|Guard|Round|Underflow)&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
--- a/wally-pipelined/src/fpu/fpu.sv
+++ b/wally-pipelined/src/fpu/fpu.sv
@ -27,24 +27,22 @@
 module fpu (
  input logic [2:0]        FRM_REGW,   // Rounding mode from CSR
  input logic 		         reset,
  //input  logic             clear,     // *** not being used anywhere
  input logic 		         clk,
  input logic [31:0]       InstrD,
  input logic [`XLEN-1:0]  SrcAE,      // Integer input being processed
  input logic [`XLEN-1:0]  SrcAM,      // Integer input being written into fpreg
  input logic 		         StallE, StallM, StallW,
  input logic 		         FlushE, FlushM, FlushW,
-  input logic [`AHBW-1:0]  HRDATA,
+  input logic [`XLEN-1:0]  ReadDataW,     // Read data from memory
-  input logic 		   RegWriteD,
+  input logic 		         RegWriteD,  // register write enable from ieu
-  output logic [4:0] 	   SetFflagsM,
+  output logic [4:0] 	   SetFflagsM, // FPU flags
-  output logic [31:0] 	   FSROutW,
+  output logic [1:0] 	   FMemRWM,    // Read/write enable for memory {read, write}
-  output logic [1:0] 	   FMemRWM,
+  output logic 		      FStallD,    // Stall the decode stage if Div/Sqrt instruction
-  output logic 		   FStallD,
+  output logic 		      FWriteIntE, FWriteIntM, FWriteIntW, // Write integer register enable
-  output logic 		   FWriteIntE, FWriteIntM, FWriteIntW,
+  output logic [`XLEN-1:0] FWriteDataM,      // Data to be written to memory
-  output logic [`XLEN-1:0] FWriteDataM,
+  output logic 		      FDivBusyE,        // Is the divison/sqrt unit busy
-  output logic 		   FDivBusyE,
+  output logic 		      IllegalFPUInstrD, // Is the instruction an illegal fpu instruction
-  output logic 		   IllegalFPUInstrD,
+  output logic [`XLEN-1:0] FPUResultW);      // FPU result
  output logic [`XLEN-1:0] FPUResultW);
   // control logic signal instantiation
   logic 		   FWriteEnD, FWriteEnE, FWriteEnM, FWriteEnW;              // FP register write enable
@ -60,18 +58,18 @@ module fpu (
   logic 		   FInput2UsedD;                                            // Is input 2 used
   logic 		   FInput3UsedD;                                            // Is input 3 used
   logic [2:0] 	FResultSelD, FResultSelE, FResultSelM, FResultSelW;      // Select FP result
-   logic [3:0] 		   FOpCtrlD, FOpCtrlE, FOpCtrlM;                           // Select which opperation to do in each component
+   logic [3:0] 	FOpCtrlD, FOpCtrlE, FOpCtrlM, FOpCtrlW;                            // Select which opperation to do in each component
-   logic          SelLoadInputE, SelLoadInputM;
+   logic          SelLoadInputE, SelLoadInputM;                            // Select which adress to load when single precision
   // regfile signals //*** KEP lint warning -  changed `XLEN-1 to 63 
-   logic [4:0] 		   RdE, RdM, RdW; // ***Can take from ieu
+   logic [4:0]    RdE, RdM, RdW;                                           // what adress to write to    // ***Can take from ieu insted of pipelining
   logic [63:0] 	FWDM;                                                    // Write data for FP register
-   logic [63:0] 	   FRD1D, FRD2D, FRD3D;                                    // Read Data from FP register
+   logic [63:0] 	FRD1D, FRD2D, FRD3D;                                     // Read Data from FP register - decode stage
-   logic [63:0] 	   FRD1E, FRD2E, FRD3E;
+   logic [63:0] 	FRD1E, FRD2E, FRD3E;                                     // Read Data from FP register - execute stage
-   logic [63:0] 	   FInput1E, FInput1M, FInput1tmpE;
+   logic [63:0] 	FInput1E, FInput1M, FInput1W, FInput1tmpE;                         // Input 1 to the various units (after forwarding)
-   logic [63:0] 	   FInput2E, FInput2M;
+   logic [63:0] 	FInput2E, FInput2M;                                      // Input 2 to the various units (after forwarding)
-   logic [63:0] 	   FInput3E, FInput3M;
+   logic [63:0] 	FInput3E, FInput3M;                                      // Input 3 to the various units (after forwarding)
-   logic [63:0] 	   FLoadResultM, FLoadStoreResultM, FLoadStoreResultW;                   // Result for load, store, and move to int-reg instructions
+   logic [63:0] 	FLoadResultW, FLoadStoreResultM, FLoadStoreResultW;      // Result for load, store, and move to int-reg instructions
   // div/sqrt signals
   logic 		   DivDenormE, DivDenormM, DivDenormW;
@ -80,7 +78,7 @@ module fpu (
   logic [4:0] 	FDivFlagsE, FDivFlagsM, FDivFlagsW;
   logic          FDivSqrtDoneE, FDivSqrtDoneM;
   logic [63:0] 	DivInput1E, DivInput2E;
-   logic HoldInputs;
+   logic          HoldInputs;                                              // keep forwarded inputs arround durring division
   // FMA signals
 	logic [105:0]	ProdManE, ProdManM;
@ -219,6 +217,7 @@ module fpu (
   mux2  #(64)  FInput3Emux(FRD3E, FPUResult64E, FForwardInput3E, FInput3E);
   mux2  #(64)  FOutputInput2mux(FInput1tmpE, FInput2E, FOutputInput2E, FInput1E);
   // first of two-stage instance of floating-point fused multiply-add unit
   fma1 fma1 (.X(FInput1E), .Y(FInput2E), .Z(FInput3E), .FOpCtrlE(FOpCtrlE[2:0]),.*);
   // first and only instance of floating-point divider
@ -276,13 +275,6 @@ module fpu (
  flopenrc #(1) EMRegFma20(clk, reset, PipeClearEM, PipeEnableEM, YNaNE, YNaNM); 
  flopenrc #(1) EMRegFma21(clk, reset, PipeClearEM, PipeEnableEM, ZNaNE, ZNaNM);  
   //*****************
   // fpdiv E/M pipe registers
   //*****************
   // flopenrc #(64) EMRegDiv1(clk, reset, PipeClearEM, PipeEnableEM, FDivResultE, FDivResultM); 
   // flopenrc #(5) EMRegDiv2(clk, reset, PipeClearEM, PipeEnableEM, FDivFlagsE, FDivFlagsM);
   // flopenrc #(1) EMRegDiv3(clk, reset, PipeClearEM, PipeEnableEM, DivDenormE, DivDenormM); 
   //*****************
   // fpadd E/M pipe registers
   //*****************
@ -352,8 +344,8 @@ module fpu (
   assign FWriteDataM = FmtM ? FInput1M[63:64-`XLEN] : {{`XLEN-32{1'b0}}, FInput1M[63:32]};
   //adjecent adress values are sent to the FPU, select the correct one
   //    -imm is 80000 most of the time vs the error one which is 00000
-   mux3  #(64)  FLoadResultMux({HRDATA[31:0], {64-`AHBW+(`XLEN-32){1'b0}}}, {HRDATA[`AHBW-1:`AHBW-32], {64-`AHBW+(`XLEN-32){1'b0}}}, {HRDATA, {64-`AHBW{1'b0}}}, {FmtM, SelLoadInputM}, FLoadResultM);
+   // mux3  #(64)  FLoadResultMux({HRDATA[31:0], {64-`AHBW+(`XLEN-32){1'b0}}}, {HRDATA[`AHBW-1:`AHBW-32], {64-`AHBW+(`XLEN-32){1'b0}}}, {HRDATA, {64-`AHBW{1'b0}}}, {FmtM, SelLoadInputM}, FLoadResultM);
-   mux2  #(64)  FLoadStoreResultMux(FLoadResultM, FInput1M, |FOpCtrlM[2:1], FLoadStoreResultM);
+   // mux2  #(64)  FLoadStoreResultMux(FLoadResultM, FInput1M, |FOpCtrlM[2:1], FLoadStoreResultM);
   fma2 fma2(.X(FInput1M), .Y(FInput2M), .Z(FInput3M), .FOpCtrlM(FOpCtrlM[2:0]), .*);
@ -364,8 +356,18 @@ module fpu (
   fpucmp2 fpcmp2 (.Invalid(CmpInvalidM), .FCC(CmpFCCM), .ANaN(ANaNM), .BNaN(BNaNM), .Azero(AzeroM), 
 		   .Bzero(BzeroM), .w(WM), .x(XM), .Sel({1'b0, FmtM}), .op1(FInput1M), .op2(FInput2M), .*);
   // Align SrcA to MSB when single precicion
   mux2  #(64)  SrcAMux({SrcAM[31:0], 32'b0}, {{64-`XLEN{1'b0}}, SrcAM}, FmtM, AlignedSrcAM);
   //*****************
   //fpregfile M/W pipe registers
   //*****************
   flopenrc #(64) MWFpReg1(clk, reset, PipeClearMW, PipeEnableMW, FInput1M, FInput1W);
   //*****************
   // fma M/W pipe registers
   //*****************
@ -406,18 +408,36 @@ module fpu (
   flopenrc #(1) MWReg3(clk, reset, PipeClearMW, PipeEnableMW, FmtM, FmtW);
   flopenrc #(5) MWReg4(clk, reset, PipeClearMW, PipeEnableMW, RdM, RdW);
   flopenrc #(64) MWReg5(clk, reset, PipeClearMW, PipeEnableMW, AlignedSrcAM, SrcAW);
-   flopenrc #(64) MWReg6(clk, reset, PipeClearMW, PipeEnableMW, FLoadStoreResultM, FLoadStoreResultW);
+   // flopenrc #(64) MWReg6(clk, reset, PipeClearMW, PipeEnableMW, FLoadStoreResultM, FLoadStoreResultW);
   flopenrc #(1) MWReg7(clk, reset, PipeClearMW, PipeEnableMW, FWriteIntM, FWriteIntW);
   flopenrc #(4) MWReg6(clk, reset, PipeClearMW, PipeEnableMW, FOpCtrlM, FOpCtrlW);
   //*****************
   // fpuclassify M/W pipe registers
   //***************** 
   flopenrc #(64) MWRegClass(clk, reset, PipeClearMW, PipeEnableMW, ClassResultM, ClassResultW);
  //#########################################
  // BEGIN WRITEBACK STAGE
  //#########################################
   // mux3  #(64)  FLoadResultMux({ReadD[31:0], {64-`AHBW+(`XLEN-32){1'b0}}}, {HRDATA[`AHBW-1:`AHBW-32], {64-`AHBW+(`XLEN-32){1'b0}}}, {HRDATA, {64-`AHBW{1'b0}}}, {FmtM, SelLoadInputM}, FLoadResultM);
   // mux2  #(64)  FLoadStoreResultMux(FLoadResultM, FInput1M, |FOpCtrlM[2:1], FLoadStoreResultM);
   //***RV32D needs to give two bus transactions
    mux2  #(64)  FLoadResultMux({ReadDataW[31:0], {32{1'b0}}}, {ReadDataW, {64-`XLEN{1'b0}}}, FmtW, FLoadResultW);
    mux2  #(64)  FLoadStoreResultMux(FLoadResultW, FInput1W, |FOpCtrlW[2:1], FLoadStoreResultW);
   always_comb begin
      case (FResultSelW)
 	// div/sqrt
--- a/wally-pipelined/src/wally/wallypipelinedhart.sv
+++ b/wally-pipelined/src/wally/wallypipelinedhart.sv
@ -99,7 +99,6 @@ module wallypipelinedhart (
  logic       SquashSCW;
  logic       FStallD;
  logic       FWriteIntE, FWriteIntW, FWriteIntM;
  logic [31:0]      FSROutW;
  logic             FDivBusyE;
  logic             IllegalFPUInstrD, IllegalFPUInstrE;
  logic [`XLEN-1:0] FPUResultW;
--- a/wally-pipelined/testbench/testbench-imperas.sv
+++ b/wally-pipelined/testbench/testbench-imperas.sv
@ -59,15 +59,15 @@ module testbench();
 string tests32f[] = '{
    "rv32f/I-FADD-S-01", "2000",
    "rv32f/I-FCLASS-S-01", "2000",
-    "rv32f/I-FCVT-S-L-01", "2000",
+    // "rv32f/I-FCVT-S-L-01", "2000",
-    "rv32f/I-FCVT-S-LU-01", "2000",
+    // "rv32f/I-FCVT-S-LU-01", "2000",
-    "rv32f/I-FCVT-S-W-01", "2000",
+    // "rv32f/I-FCVT-S-W-01", "2000",
-    "rv32f/I-FCVT-S-WU-01", "2000",
+    // "rv32f/I-FCVT-S-WU-01", "2000",
-    "rv32f/I-FCVT-L-S-01", "2000",
+    // "rv32f/I-FCVT-L-S-01", "2000",
-    "rv32f/I-FCVT-LU-S-01", "2000",
+    // "rv32f/I-FCVT-LU-S-01", "2000",
-    "rv32f/I-FCVT-W-S-01", "2000",
+    // "rv32f/I-FCVT-W-S-01", "2000",
-    "rv32f/I-FCVT-WU-S-01", "2000",
+    // "rv32f/I-FCVT-WU-S-01", "2000",
-    "rv32f/I-FDIV-S-01", "2000",
+    // "rv32f/I-FDIV-S-01", "2000",
    "rv32f/I-FEQ-S-01", "2000",
    "rv32f/I-FLE-S-01", "2000",
    "rv32f/I-FLT-S-01", "2000",
@ -83,14 +83,14 @@ string tests32f[] = '{
    "rv32f/I-FSGNJ-S-01", "2000",
    "rv32f/I-FSGNJN-S-01", "2000",
    "rv32f/I-FSGNJX-S-01", "2000",
-    "rv32f/I-FSQRT-S-01", "2000",
+    // "rv32f/I-FSQRT-S-01", "2000",
    "rv32f/I-FSW-01", "2000",
-    "rv32f/I-FLW-01", "2000",
+    "rv32f/I-FLW-01", "2110",
    "rv32f/I-FSUB-S-01", "2000"
  };
  string tests64f[] = '{
-    // "rv64f/I-FLW-01", "2110",
+    "rv64f/I-FLW-01", "2110",
    "rv64f/I-FMV-W-X-01", "2000",
    "rv64f/I-FMV-X-W-01", "2000",
    "rv64f/I-FSW-01", "2000",
`@ -1,3 +1,3 @@`
	`testfloat_gen f64_mulAdd -tininessbefore -n 6133248 -rmin -seed 113355 -level 1 > testFloat`	`testfloat_gen f64_mulAdd -tininessafter -n 6133248 -rmin -seed 113355 -level 1 > testFloat`
	`tr -d ' ' < testFloat > testFloatNoSpace`	`tr -d ' ' < testFloat > testFloatNoSpace`