diff --git a/wally-pipelined/regression/sim-fp64 b/wally-pipelined/regression/sim-fp64 new file mode 100755 index 00000000..b6b8ba5c --- /dev/null +++ b/wally-pipelined/regression/sim-fp64 @@ -0,0 +1 @@ +vsim -do wally-fp64.do diff --git a/wally-pipelined/regression/sim-fp64-batch b/wally-pipelined/regression/sim-fp64-batch new file mode 100755 index 00000000..693bfeb2 --- /dev/null +++ b/wally-pipelined/regression/sim-fp64-batch @@ -0,0 +1,3 @@ +vsim -c < +# Example: do wally-pipelined-batch.do rv32ic imperas-32i + +# Use this wally-pipelined-batch.do file to run this example. +# Either bring up ModelSim and type the following at the "ModelSim>" prompt: +# do wally-pipelined-batch.do +# or, to run from a shell, type the following at the shell prompt: +# vsim -do wally-pipelined-batch.do -c +# (omit the "-c" to see the GUI while running from the shell) + +onbreak {resume} + +# create library +if [file exists work_${1}_${2}] { + vdel -lib work_${1}_${2} -all +} +vlib work_${1}_${2} + +# compile source files +# suppress spurious warnngs about +# "Extra checking for conflicts with always_comb done at vopt time" +# because vsim will run vopt + +# default to config/rv64ic, but allow this to be overridden at the command line. For example: +# do wally-pipelined-batch.do ../config/rv32ic rv32ic +vlog -work work_${1}_${2} +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench-f64.sv ../testbench/common/*.sv ../src/*/*.sv -suppress 2583 + +# start and run simulation +# remove +acc flag for faster sim during regressions if there is no need to access internal signals +vopt work_${1}_${2}.testbench -work work_${1}_${2} -G TEST=$2 -o testbenchopt +vsim -lib work_${1}_${2} testbenchopt +# Adding coverage increases runtime from 2:00 to 4:29. Can't run it all the time +#vopt work_$2.testbench -work work_$2 -o workopt_$2 +cover=sbectf +#vsim -coverage -lib work_$2 workopt_$2 + +run -all +#coverage report -file wally-pipelined-coverage.txt +# These aren't doing anything helpful +#coverage report -memory +#profile report -calltree -file wally-pipelined-calltree.rpt -cutoff 2 +quit diff --git a/wally-pipelined/regression/wally-fp64.do b/wally-pipelined/regression/wally-fp64.do new file mode 100644 index 00000000..c131ff16 --- /dev/null +++ b/wally-pipelined/regression/wally-fp64.do @@ -0,0 +1,54 @@ +# wally-pipelined.do +# +# Modification by Oklahoma State University & Harvey Mudd College +# Use with Testbench +# James Stine, 2008; David Harris 2021 +# Go Cowboys!!!!!! +# +# Takes 1:10 to run RV64IC tests using gui + +# run with vsim -do "do wally-pipelined.do rv64ic riscvarchtest-64m" + +# Use this wally-pipelined.do file to run this example. +# Either bring up ModelSim and type the following at the "ModelSim>" prompt: +# do wally-pipelined.do +# or, to run from a shell, type the following at the shell prompt: +# vsim -do wally-pipelined.do -c +# (omit the "-c" to see the GUI while running from the shell) + +onbreak {resume} + +# create library +if [file exists work] { + vdel -all +} +vlib work + +# compile source files +# suppress spurious warnngs about +# "Extra checking for conflicts with always_comb done at vopt time" +# because vsim will run vopt + +# default to config/rv64ic, but allow this to be overridden at the command line. For example: +# do wally-pipelined.do ../config/rv32ic +#switch $argc { +# 0 {vlog +incdir+../config/rv64ic +incdir+../config/shared ../testbench/testbench.sv ../testbench/common/*.sv ../src/*/*.sv -suppress 2583} +# 1 {vlog +incdir+$1 +incdir+../config/shared ../testbench/testbench.sv ../testbench/common/*.sv ../src/*/*.sv -suppress 2583} +#} +# start and run simulation +# remove +acc flag for faster sim during regressions if there is no need to access internal signals +vlog +incdir+../config/rv64g +incdir+../config/shared ../testbench/testbench-f64.sv ../testbench/common/*.sv ../src/*/*.sv -suppress 2583 +vopt +acc work.testbench -G TEST=imperas64d -o workopt +vsim workopt + +view wave +-- display input and output signals as hexidecimal values +do ./wave-dos/generic.do + +-- Run the Simulation +#run 3600 +run -all +#quit +#noview ../testbench/testbench-imperas.sv +noview ../testbench/testbench.sv +view wave diff --git a/wally-pipelined/src/fpu/fpu.sv b/wally-pipelined/src/fpu/fpu.sv index 7ca34f50..8258b9c6 100755 --- a/wally-pipelined/src/fpu/fpu.sv +++ b/wally-pipelined/src/fpu/fpu.sv @@ -191,33 +191,20 @@ module fpu ( .FmtE, .FmtM, .FrmM, .FMAFlgM, .FMAResM); - // clock gater - // - creates a clock that only runs durring divide/sqrt instructions - // - using the seperate clock gives the divide/sqrt unit some to get set up - // *** the module says not to use in synthisis - clockgater fpdivclkg(.E(FDivStartE), - .SE(1'b0), - .CLK(clk), - .ECLK(FDivClk)); - // capture the inputs for divide/sqrt - // - if not captured any forwarded inputs will change durring computation - // - this problem is caused by stalling the execute stage - // - the other units don't have this problem, only div/sqrt stalls the execute stage floprc #(64) reg_input1 (.d({XSgnE, XExpE, XManE[51:0]}), .q(DivInput1E), - .clear(FDivSqrtDoneE), - .reset(reset), .clk(FDivBusyE)); + .clear(FDivSqrtDoneE), + .reset(reset), .clk(FDivBusyE)); floprc #(64) reg_input2 (.d({YSgnE, YExpE, YManE[51:0]}), .q(DivInput2E), - .clear(FDivSqrtDoneE), - .reset(reset), .clk(FDivBusyE)); + .clear(FDivSqrtDoneE), + .reset(reset), .clk(FDivBusyE)); floprc #(6) reg_input3 (.d({XNaNE, YNaNE, XInfE, YInfE, XZeroE, YZeroE}), - .q({XNaNQ, YNaNQ, XInfQ, YInfQ, XZeroQ, YZeroQ}), - .clear(FDivSqrtDoneE), - .reset(reset), .clk(FDivBusyE)); - - // fpdivsqrt using Goldschmidt's iteration - fpdiv fdivsqrt (.op1(DivInput1E), .op2(DivInput2E), .rm(FrmE[1:0]), .op_type(FOpCtrlE[0]), - .reset, .clk(FDivClk), .start(FDivStartE), .P(~FmtE), .OvEn(1'b1), .UnEn(1'b1), + .q({XNaNQ, YNaNQ, XInfQ, YInfQ, XZeroQ, YZeroQ}), + .clear(FDivSqrtDoneE), + .reset(reset), .clk(FDivBusyE)); + // fpdivsqrt using Goldschmidt's iteration + fpdiv fdivsqrt (.op1(DivInput1E), .op2(DivInput2E), .rm(FrmE[1:0]), .op_type(FOpCtrlE[0]), + .reset, .clk(clk), .start(FDivStartE), .P(~FmtE), .OvEn(1'b1), .UnEn(1'b1), .XNaNQ, .YNaNQ, .XInfQ, .YInfQ, .XZeroQ, .YZeroQ, .FDivBusyE, .done(FDivSqrtDoneE), .AS_Result(FDivResM), .Flags(FDivFlgM)); diff --git a/wally-pipelined/src/fpu/fsm.sv b/wally-pipelined/src/fpu/fsm.sv index a0e874bc..9b0e18a7 100755 --- a/wally-pipelined/src/fpu/fsm.sv +++ b/wally-pipelined/src/fpu/fsm.sv @@ -47,7 +47,7 @@ module fsm ( statetype current_state, next_state; - always @(negedge clk) + always @(posedge clk) begin if (reset == 1'b1) current_state = S0; @@ -269,8 +269,23 @@ module fsm ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; + next_state = S11; + end // case: S10 + S11: // done + begin + done = 1'b0; + divBusy = 1'b0; + load_rega = 1'b0; + load_regb = 1'b0; + load_regc = 1'b0; + load_regd = 1'b0; + load_regr = 1'b0; + load_regs = 1'b0; + sel_muxa = 3'b000; + sel_muxb = 3'b000; + sel_muxr = 1'b0; next_state = S0; - end + end S13: // start of sqrt path begin done = 1'b0; @@ -479,8 +494,23 @@ module fsm ( sel_muxa = 3'b000; sel_muxb = 3'b000; sel_muxr = 1'b0; + next_state = S27; + end // case: S26 + S27: // done + begin + done = 1'b0; + divBusy = 1'b0; + load_rega = 1'b0; + load_regb = 1'b0; + load_regc = 1'b0; + load_regd = 1'b0; + load_regr = 1'b0; + load_regs = 1'b0; + sel_muxa = 3'b000; + sel_muxb = 3'b000; + sel_muxr = 1'b0; next_state = S0; - end + end default: begin done = 1'b0; diff --git a/wally-pipelined/testbench/testbench-f64.sv b/wally-pipelined/testbench/testbench-f64.sv index a9dd9ad2..5ae96f83 100755 --- a/wally-pipelined/testbench/testbench-f64.sv +++ b/wally-pipelined/testbench/testbench-f64.sv @@ -30,7 +30,7 @@ module testbench (); logic XExpMaxE; logic XNormE; logic FDivBusyE; - + logic start; logic reset; @@ -57,16 +57,13 @@ module testbench (); .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .BiasE, .XInfE, .YInfE, .ZInfE, .XExpMaxE, .XNormE); fpdiv fdivsqrt (.op1, .op2, .rm(FrmE[1:0]), .op_type(FOpCtrlE[0]), - .reset, .clk, .start, .P(FmtE), .OvEn(1'b1), .UnEn(1'b1), + .reset, .clk, .start, .P(~FmtE), .OvEn(1'b0), .UnEn(1'b0), .XNaNQ(XNaNE), .YNaNQ(YNaNE), .XInfQ(XInfE), .YInfQ(YInfE), .XZeroQ(XZeroE), .YZeroQ(YZeroE), .FDivBusyE, .done(done), .AS_Result(AS_Result), .Flags(Flags)); + // current fpdivsqrt does not operation on denorms yet - assign XZeroM = (op1[51:0] == 52'h0); - assign YZeroM = (op2[51:0] == 52'h0); - assign XDenorm = XZeroE & ~XZeroM; - assign YDenorm = YZeroE & ~YZeroM; - assign Denorm = XDenorm | YDenorm; + assign Denorm = XDenormE | YDenormE | Flags[3]; // generate clock to sequence tests always @@ -77,7 +74,7 @@ module testbench (); initial begin handle3 = $fopen("f64_div_rne.out"); - $readmemh("../testbench/fp/f64_div_rne.tv", testvectors); + $readmemh("../testbench/fp/vectors/f64_div_rne.tv", testvectors); vectornum = 0; errors = 0; start = 1'b0; // reset @@ -90,7 +87,7 @@ module testbench (); // Operation (if applicable) #0 op_type = 1'b0; // Precision (32-bit or 64-bit) - #0 FmtE = 1'b0; + #0 FmtE = 1'b1; // From fctrl logic to dictate operation #0 FOpCtrlE = 3'b000; // Rounding Mode @@ -114,7 +111,7 @@ module testbench (); @(posedge clk); $fdisplay(desc3, "%h_%h_%h_%b_%b | %h_%b", op1, op2, AS_Result, Flags, Denorm, yexpected, (AS_Result==yexpected)); vectornum = vectornum + 1; - if (vectornum == 1) + if (vectornum == 40) $finish; if (testvectors[vectornum] === 200'bx) begin $display("%d tests completed", vectornum);