Merge branch 'main' of github.com:davidharrishmc/riscv-wally into main

2021-10-13 15:38:32 -07:00 · 2021-10-13 15:38:32 -07:00 · 71397d5db9
commit 71397d5db9
parent 4ca4e13ba2 1dba57dce7
7 changed files with 158 additions and 36 deletions
--- a/wally-pipelined/regression/sim-fp64
+++ b/wally-pipelined/regression/sim-fp64
@ -0,0 +1 @@
 vsim -do wally-fp64.do
--- a/wally-pipelined/regression/sim-fp64-batch
+++ b/wally-pipelined/regression/sim-fp64-batch
@ -0,0 +1,3 @@
 vsim -c <<!
 do wally-fp64-batch.do rv64g imperas64d
 !
--- a/wally-pipelined/regression/wally-fp64-batch.do
+++ b/wally-pipelined/regression/wally-fp64-batch.do
@ -0,0 +1,50 @@
 # wally-pipelined-batch.do 
 #
 # Modification by Oklahoma State University & Harvey Mudd College
 # Use with Testbench 
 # James Stine, 2008; David Harris 2021
 # Go Cowboys!!!!!!
 #
 # Takes 1:10 to run RV64IC tests using gui
 # Usage: do wally-pipelined-batch.do <config> <testcases>
 # Example: do wally-pipelined-batch.do rv32ic imperas-32i
 # Use this wally-pipelined-batch.do file to run this example.
 # Either bring up ModelSim and type the following at the "ModelSim>" prompt:
 #     do wally-pipelined-batch.do
 # or, to run from a shell, type the following at the shell prompt:
 #     vsim -do wally-pipelined-batch.do -c
 # (omit the "-c" to see the GUI while running from the shell)
 onbreak {resume}
 # create library
 if [file exists work_${1}_${2}] {
    vdel -lib work_${1}_${2} -all
 }
 vlib work_${1}_${2}
 # compile source files
 # suppress spurious warnngs about 
 # "Extra checking for conflicts with always_comb done at vopt time"
 # because vsim will run vopt
 # default to config/rv64ic, but allow this to be overridden at the command line.  For example:
 # do wally-pipelined-batch.do ../config/rv32ic rv32ic
 vlog -work work_${1}_${2} +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench-f64.sv ../testbench/common/*.sv   ../src/*/*.sv -suppress 2583
 # start and run simulation
 # remove +acc flag for faster sim during regressions if there is no need to access internal signals
 vopt work_${1}_${2}.testbench -work work_${1}_${2} -G TEST=$2 -o testbenchopt
 vsim -lib work_${1}_${2} testbenchopt 
 # Adding coverage increases runtime from 2:00 to 4:29.  Can't run it all the time
 #vopt work_$2.testbench -work work_$2 -o workopt_$2 +cover=sbectf
 #vsim -coverage -lib work_$2 workopt_$2
 run -all
 #coverage report -file wally-pipelined-coverage.txt
 # These aren't doing anything helpful
 #coverage report -memory 
 #profile report -calltree -file wally-pipelined-calltree.rpt -cutoff 2
 quit
--- a/wally-pipelined/regression/wally-fp64.do
+++ b/wally-pipelined/regression/wally-fp64.do
@ -0,0 +1,54 @@
 # wally-pipelined.do 
 #
 # Modification by Oklahoma State University & Harvey Mudd College
 # Use with Testbench 
 # James Stine, 2008; David Harris 2021
 # Go Cowboys!!!!!!
 #
 # Takes 1:10 to run RV64IC tests using gui
 # run with vsim -do "do wally-pipelined.do rv64ic riscvarchtest-64m"
 # Use this wally-pipelined.do file to run this example.
 # Either bring up ModelSim and type the following at the "ModelSim>" prompt:
 #     do wally-pipelined.do
 # or, to run from a shell, type the following at the shell prompt:
 #     vsim -do wally-pipelined.do -c
 # (omit the "-c" to see the GUI while running from the shell)
 onbreak {resume}
 # create library
 if [file exists work] {
    vdel -all
 }
 vlib work
 # compile source files
 # suppress spurious warnngs about 
 # "Extra checking for conflicts with always_comb done at vopt time"
 # because vsim will run vopt
 # default to config/rv64ic, but allow this to be overridden at the command line.  For example:
 # do wally-pipelined.do ../config/rv32ic
 #switch $argc {
 #    0 {vlog +incdir+../config/rv64ic +incdir+../config/shared ../testbench/testbench.sv ../testbench/common/*.sv ../src/*/*.sv -suppress 2583}
 #    1 {vlog +incdir+$1  +incdir+../config/shared ../testbench/testbench.sv ../testbench/common/*.sv ../src/*/*.sv -suppress 2583}
 #}
 # start and run simulation
 # remove +acc flag for faster sim during regressions if there is no need to access internal signals
 vlog +incdir+../config/rv64g +incdir+../config/shared ../testbench/testbench-f64.sv ../testbench/common/*.sv   ../src/*/*.sv -suppress 2583
 vopt +acc work.testbench -G TEST=imperas64d -o workopt 
 vsim workopt
 view wave
 -- display input and output signals as hexidecimal values
 do ./wave-dos/generic.do
 -- Run the Simulation 
 #run 3600 
 run -all
 #quit
 #noview ../testbench/testbench-imperas.sv
 noview ../testbench/testbench.sv
 view wave
--- a/wally-pipelined/src/fpu/fpu.sv
+++ b/wally-pipelined/src/fpu/fpu.sv
@ -191,33 +191,20 @@ module fpu (
 	      .FmtE, .FmtM, .FrmM, 
 	      .FMAFlgM, .FMAResM);
     // clock gater
     //    - creates a clock that only runs durring divide/sqrt instructions
     //    - using the seperate clock gives the divide/sqrt unit some to get set up
     // *** the module says not to use in synthisis
     clockgater fpdivclkg(.E(FDivStartE),
 			  .SE(1'b0),
 			  .CLK(clk),
 			  .ECLK(FDivClk));
     // capture the inputs for divide/sqrt
     //    - if not captured any forwarded inputs will change durring computation
     //        - this problem is caused by stalling the execute stage
     //    - the other units don't have this problem, only div/sqrt stalls the execute stage
     floprc #(64) reg_input1 (.d({XSgnE, XExpE, XManE[51:0]}), .q(DivInput1E),
-				.clear(FDivSqrtDoneE),
+			      .clear(FDivSqrtDoneE),
-				.reset(reset),  .clk(FDivBusyE));
+			      .reset(reset),  .clk(FDivBusyE));
     floprc #(64) reg_input2 (.d({YSgnE, YExpE, YManE[51:0]}), .q(DivInput2E),
-				.clear(FDivSqrtDoneE),
+			      .clear(FDivSqrtDoneE),
-				.reset(reset),  .clk(FDivBusyE));
+			      .reset(reset),  .clk(FDivBusyE));
     floprc #(6) reg_input3 (.d({XNaNE, YNaNE, XInfE, YInfE, XZeroE, YZeroE}), 
-				.q({XNaNQ, YNaNQ, XInfQ, YInfQ, XZeroQ, YZeroQ}),
+			     .q({XNaNQ, YNaNQ, XInfQ, YInfQ, XZeroQ, YZeroQ}),
-			   .clear(FDivSqrtDoneE),
+			     .clear(FDivSqrtDoneE),
-				.reset(reset),  .clk(FDivBusyE));
+			     .reset(reset),  .clk(FDivBusyE));            
-            
+     // fpdivsqrt using Goldschmidt's iteration
-      // fpdivsqrt using Goldschmidt's iteration
+     fpdiv fdivsqrt (.op1(DivInput1E), .op2(DivInput2E), .rm(FrmE[1:0]), .op_type(FOpCtrlE[0]), 
-      fpdiv fdivsqrt (.op1(DivInput1E), .op2(DivInput2E), .rm(FrmE[1:0]), .op_type(FOpCtrlE[0]), 
+		      .reset, .clk(clk), .start(FDivStartE), .P(~FmtE), .OvEn(1'b1), .UnEn(1'b1),
 		      .reset, .clk(FDivClk), .start(FDivStartE), .P(~FmtE), .OvEn(1'b1), .UnEn(1'b1),
 		      .XNaNQ, .YNaNQ, .XInfQ, .YInfQ, .XZeroQ, .YZeroQ,
 		      .FDivBusyE, .done(FDivSqrtDoneE), .AS_Result(FDivResM), .Flags(FDivFlgM));
--- a/wally-pipelined/src/fpu/fsm.sv
+++ b/wally-pipelined/src/fpu/fsm.sv
@ -47,7 +47,7 @@ module fsm (
   statetype current_state, next_state;
-   always @(negedge clk)
+   always @(posedge clk)
     begin
 	if (reset == 1'b1)
 	  current_state = S0;
@ -269,8 +269,23 @@ module fsm (
 	       sel_muxa = 3'b000;
 	       sel_muxb = 3'b000;
 	       sel_muxr = 1'b0;
 	       next_state = S11;
 	    end // case: S10
 	  S11:  // done
 	    begin
 	       done = 1'b0;
 	       divBusy = 1'b0;
 	       load_rega = 1'b0;
 	       load_regb = 1'b0;
 	       load_regc = 1'b0;
 	       load_regd = 1'b0;	       
 	       load_regr = 1'b0;
 	       load_regs = 1'b0;		    	       
 	       sel_muxa = 3'b000;
 	       sel_muxb = 3'b000;
 	       sel_muxr = 1'b0;
 	       next_state = S0;
-	    end 
+	    end 	  
 	  S13:  // start of sqrt path
 	    begin
 	       done = 1'b0;
@ -479,8 +494,23 @@ module fsm (
 	       sel_muxa = 3'b000;
 	       sel_muxb = 3'b000;
 	       sel_muxr = 1'b0;
 	       next_state = S27;
 	    end // case: S26
 	  S27:  // done
 	    begin
 	       done = 1'b0;
 	       divBusy = 1'b0;
 	       load_rega = 1'b0;
 	       load_regb = 1'b0;
 	       load_regc = 1'b0;
 	       load_regd = 1'b0;	       
 	       load_regr = 1'b0;
 	       load_regs = 1'b0;		    	       
 	       sel_muxa = 3'b000;
 	       sel_muxb = 3'b000;
 	       sel_muxr = 1'b0;
 	       next_state = S0;
-	    end 
+	    end 	  
 	  default: 
 	    begin
 	       done = 1'b0;
--- a/wally-pipelined/testbench/testbench-f64.sv
+++ b/wally-pipelined/testbench/testbench-f64.sv
@ -30,7 +30,7 @@ module testbench ();
   logic 	XExpMaxE;  
   logic 	XNormE;
   logic 	FDivBusyE;   
-   
+    
   logic 	start;
   logic 	reset;
@ -57,16 +57,13 @@ module testbench ();
 		       .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .YDenormE, .ZDenormE, 
 		       .XZeroE, .YZeroE, .ZZeroE, .BiasE, .XInfE, .YInfE, .ZInfE, .XExpMaxE, .XNormE);
   fpdiv fdivsqrt (.op1, .op2, .rm(FrmE[1:0]), .op_type(FOpCtrlE[0]),
-		   .reset, .clk, .start, .P(FmtE), .OvEn(1'b1), .UnEn(1'b1),
+		   .reset, .clk, .start, .P(~FmtE), .OvEn(1'b0), .UnEn(1'b0),
 		   .XNaNQ(XNaNE), .YNaNQ(YNaNE), .XInfQ(XInfE), .YInfQ(YInfE), .XZeroQ(XZeroE), .YZeroQ(YZeroE),
 		   .FDivBusyE, .done(done), .AS_Result(AS_Result), .Flags(Flags));
   // current fpdivsqrt does not operation on denorms yet
-   assign XZeroM = (op1[51:0] == 52'h0);
+   assign Denorm = XDenormE | YDenormE | Flags[3];   
   assign YZeroM = (op2[51:0] == 52'h0);   
   assign XDenorm = XZeroE & ~XZeroM;
   assign YDenorm = YZeroE & ~YZeroM;
   assign Denorm = XDenorm | YDenorm;   
  // generate clock to sequence tests
  always
@ -77,7 +74,7 @@ module testbench ();
   initial
     begin
 	handle3 = $fopen("f64_div_rne.out");
-	$readmemh("../testbench/fp/f64_div_rne.tv", testvectors);
+	$readmemh("../testbench/fp/vectors/f64_div_rne.tv", testvectors);
 	vectornum = 0; errors = 0;
 	start = 1'b0;
 	// reset
@ -90,7 +87,7 @@ module testbench ();
 	// Operation (if applicable)
 	#0  op_type = 1'b0;
 	// Precision (32-bit or 64-bit)
-	#0  FmtE = 1'b0;
+	#0  FmtE = 1'b1;
 	// From fctrl logic to dictate operation
 	#0  FOpCtrlE = 3'b000;
 	// Rounding Mode
@ -114,7 +111,7 @@ module testbench ();
 	       @(posedge clk);
 	     $fdisplay(desc3, "%h_%h_%h_%b_%b | %h_%b", op1, op2, AS_Result, Flags, Denorm, yexpected, (AS_Result==yexpected));
 	     vectornum = vectornum + 1;
-	     if (vectornum == 1)
+	     if (vectornum == 40)
 	       $finish;	     
 	     if (testvectors[vectornum] === 200'bx) begin
 		$display("%d tests completed", vectornum);