diff --git a/.gitignore b/.gitignore index 8e9c77346..d1dde2a66 100644 --- a/.gitignore +++ b/.gitignore @@ -188,6 +188,7 @@ sim/cfi/* sim/branch/* sim/obj_dir examples/verilog/fulladder/obj_dir +examples/verilog/fulladder/fulladder.vcd config/deriv docs/docker/buildroot-config-src docs/docker/testvector-generation diff --git a/bin/regression-wally b/bin/regression-wally index 44a65e820..fe2da740a 100755 --- a/bin/regression-wally +++ b/bin/regression-wally @@ -274,6 +274,7 @@ os.chdir(regressionDir) coveragesim = "questa" # Questa is required for code/functional coverage defaultsim = "questa" # Default simulator for all other tests; change to Verilator when flow is ready +#defaultsim = "verilator" # Default simulator for all other tests coverage = '--coverage' in sys.argv fp = '--fp' in sys.argv @@ -299,9 +300,9 @@ configs = [ TestCase( name="lints", variant="all", - cmd="lint-wally " + nightMode + " | tee " + WALLY + "/sim/questa/logs/all_lints.log", + cmd="lint-wally " + nightMode + " | tee " + WALLY + "/sim/verilator/logs/all_lints.log", grepstr="lints run with no errors or warnings", - grepfile = WALLY + "/sim/questa/logs/all_lints.log") + grepfile = WALLY + "/sim/verilator/logs/all_lints.log") ] if (coverage): # only run RV64GC tests on Questa in coverage mode diff --git a/examples/verilog/fulladder/fulladder.sv b/examples/verilog/fulladder/fulladder.sv index 478c3db82..156bec3f9 100644 --- a/examples/verilog/fulladder/fulladder.sv +++ b/examples/verilog/fulladder/fulladder.sv @@ -19,6 +19,8 @@ module testbench(); // at start of test, load vectors and pulse reset initial begin + $dumpfile("fulladder.vcd"); + $dumpvars; $readmemb("fulladder.tv", testvectors); cycle = 0; vectornum = 0; errors = 0; @@ -47,6 +49,7 @@ module testbench(); $finish; end end + endmodule module fulladder(input logic a, b, c, diff --git a/examples/verilog/fulladder/verilate b/examples/verilog/fulladder/verilate index 2b6d7908d..f1efcc74c 100755 --- a/examples/verilog/fulladder/verilate +++ b/examples/verilog/fulladder/verilate @@ -1,5 +1,3 @@ -#verilator --timescale "1ns/1ns" --timing -cc --exe --build --top-module testbench fulladder.sv -#verilator --timescale "1ns/1ns" --timing -cc --exe --top-module testbench fulladder.sv -#verilator --binary --top-module testbench fulladder.sv -verilator --timescale "1ns/1ns" --timing --binary --top-module testbench fulladder.sv +verilator --binary --top-module testbench --trace fulladder.sv +obj_dir/Vtestbench diff --git a/sim/verilator/Makefile b/sim/verilator/Makefile index 7acce82b8..5235456db 100644 --- a/sim/verilator/Makefile +++ b/sim/verilator/Makefile @@ -30,6 +30,10 @@ DEPENDENCIES=${WALLY}/config/shared/*.vh $(SOURCES) default: run +run: wkdir/$(WALLYCONF)_$(TEST)/Vtestbench + mkdir -p $(VERILATOR_DIR)/logs + wkdir/$(WALLYCONF)_$(TEST)/Vtestbench +TEST=$(TEST) + profile: obj_dir_profiling/Vtestbench_$(WALLYCONF) $(VERILATOR_DIR)/obj_dir_profiling/Vtestbench_$(WALLYCONF) +TEST=$(TEST) mv gmon.out gmon_$(WALLYCONF).out @@ -39,17 +43,13 @@ profile: obj_dir_profiling/Vtestbench_$(WALLYCONF) mv gmon_$(WALLYCONF)* $(VERILATOR_DIR)/logs_profiling echo "Please check $(VERILATOR_DIR)/logs_profiling/gmon_$(WALLYCONF)* for logs and output files." -run: wkdir/$(WALLYCONF)_$(TEST)/Vtestbench - mkdir -p $(VERILATOR_DIR)/logs - wkdir/$(WALLYCONF)_$(TEST)/Vtestbench +TEST=$(TEST) - wkdir/$(WALLYCONF)_$(TEST)/Vtestbench: $(DEPENDENCIES) verilator \ --Mdir wkdir/$(WALLYCONF)_$(TEST) -o Vtestbench \ --binary --trace \ $(OPT) $(PARAMS) $(NONPROF) \ $(EXTRA_ARGS) \ - --timescale "1ns/1ns" --timing --top-module testbench --relative-includes \ + --top-module testbench --relative-includes \ $(INCLUDE_PATH) \ ${WALLY}/sim/verilator/wrapper.c \ $(SOURCES) @@ -61,13 +61,10 @@ obj_dir_profiling/Vtestbench_$(WALLYCONF): $(DEPENDENCIES) --binary \ --prof-cfuncs $(OPT) $(PARAMS) \ $(EXTRA_ARGS) \ - --timescale "1ns/1ns" --timing --top-module testbench --relative-includes \ + --top-module testbench --relative-includes \ $(INCLUDE_PATH) \ ${WALLY}/sim/verilator/wrapper.c \ $(SOURCES) -questa: - vsim -c -do "do ${WALLY}/sim/wally-batch.do $(WALLYCONF) $(TEST)" - clean: rm -rf $(VERILATOR_DIR)/wkdir $(VERILATOR_DIR)/obj_dir_profiling $(VERILATOR_DIR)/logs $(VERILATOR_DIR)/logs_profiling \ No newline at end of file diff --git a/src/fpu/fpu.sv b/src/fpu/fpu.sv index 36cc0be3b..22c650ed8 100755 --- a/src/fpu/fpu.sv +++ b/src/fpu/fpu.sv @@ -162,7 +162,9 @@ module fpu import cvw::*; #(parameter cvw_t P) ( logic StallUnpackedM; // Stall unpacker outputs during multicycle fdivsqrt logic [P.FLEN-1:0] SgnExtXE; // Sign-extended X input for move to integer logic mvsgn; // sign bit for extending move - logic [P.FLEN-1:0] FliResE; // Floating-point load immediate value + logic [P.FLEN-1:0] FliResE; // Zfa Floating-point load immediate value + logic [P.FLEN-1:0] FRoundE; // Zfa fround output + logic [4:0] FRoundFlagsE; // Zfa fround flags ////////////////////////////////////////////////////////////////////////////////////////// // Decode Stage: fctrl decoder, read register file @@ -267,15 +269,25 @@ module fpu import cvw::*; #(parameter cvw_t P) ( .ToInt(FWriteIntE), .XZero(XZeroE), .Fmt(FmtE), .Ce(CeE), .ShiftAmt(CvtShiftAmtE), .ResSubnormUf(CvtResSubnormUfE), .Cs(CsE), .IntZero(IntZeroE), .LzcIn(CvtLzcInE)); - // floating-point load immediate: fli + // ZFA: fround and floating-point load immediate fli if (P.ZFA_SUPPORTED) begin logic [4:0] Rs1E; logic [1:0] Fmt2E; // Two-bit format field from instruction - + + // fround + fround #(P) fround(.Xs(XsE), .Xe(XeE), .Xm(XmE), + .XNaN(XNaNE), .XSNaN(XSNaNE), .XZero(XZeroE), .Fmt(FmtE), + .FRound(FRoundE), .FRoundFlags(FRoundFlagsE)); + + // fli flopenrc #(5) Rs1EReg(clk, reset, FlushE, ~StallE, InstrD[19:15], Rs1E); flopenrc #(2) Fmt2EReg(clk, reset, FlushE, ~StallE, InstrD[26:25], Fmt2E); fli #(P) fli(.Rs1(Rs1E), .Fmt(Fmt2E), .Imm(FliResE)); - end else assign FliResE = '0; + end else begin + assign FRoundE = '0; + assign FRoundFlagsE = '0; + assign FliResE = '0; + end // fmv.*.x: NaN Box SrcA to extend integer to requested FP size if(P.FPSIZES == 1) diff --git a/src/fpu/fround.sv b/src/fpu/fround.sv index 195f44684..180f99605 100644 --- a/src/fpu/fround.sv +++ b/src/fpu/fround.sv @@ -34,10 +34,11 @@ module fround import cvw::*; #(parameter cvw_t P) ( input logic XNaN, // X is NaN input logic XSNaN, // X is Signalling NaN input logic XZero, // X is Zero - input logic [P.FMTBITS-1:0] Fmt // the input's precision (11=quad 01=double 00=single 10=half) + input logic [P.FMTBITS-1:0] Fmt, // the input's precision (11=quad 01=double 00=single 10=half) + output logic [P.FLEN-1:0] FRound, // Rounded result + output logic [4:0] FRoundFlags // Rounder flags ); - logic [P.NE-2:0] Bias; logic [P.NE-1:0] E; logic [P.NF:0] Imask, Tmasknonneg, Tmaskneg, Tmask, HotE, HotEP1, Trunc, Rnd; @@ -171,4 +172,7 @@ module fround import cvw::*; #(parameter cvw_t P) ( assign Inexact = FRoundNX & ~(XNaN | Exact) & (Rp | T'); */ + assign FRound = '0; + assign FRoundFlags = '0; + endmodule diff --git a/src/ifu/ifu.sv b/src/ifu/ifu.sv index 811d53e21..78a5a0370 100644 --- a/src/ifu/ifu.sv +++ b/src/ifu/ifu.sv @@ -286,16 +286,18 @@ module ifu import cvw::*; #(parameter cvw_t P) ( assign IFUHBURST = 3'b0; assign {ICacheMiss, ICacheAccess, ICacheStallF} = '0; end + + // mux between the alignments of uncached reads. + if(P.XLEN == 64) mux4 #(32) UncachedShiftInstrMux(FetchBuffer[32-1:0], FetchBuffer[48-1:16], + FetchBuffer[64-1:32], {16'b0, FetchBuffer[64-1:48]}, + PCSpillF[2:1], ShiftUncachedInstr); + else mux2 #(32) UncachedShiftInstrMux(FetchBuffer[32-1:0], {16'b0, FetchBuffer[32-1:16]}, PCSpillF[1], ShiftUncachedInstr); end else begin : nobus // block: bus - assign {BusStall, CacheCommittedF} = '0; + assign {IFUHADDR, IFUHWRITE, IFUHSIZE, IFUHBURST, IFUHTRANS, + BusStall, CacheCommittedF, BusCommittedF, FetchBuffer} = '0; assign {ICacheStallF, ICacheMiss, ICacheAccess} = '0; assign InstrRawF = IROMInstrF; end - - // mux between the alignments of uncached reads. - if(P.XLEN == 64) mux4 #(32) UncachedShiftInstrMux(FetchBuffer[32-1:0], FetchBuffer[48-1:16], FetchBuffer[64-1:32], {16'b0, FetchBuffer[64-1:48]}, - PCSpillF[2:1], ShiftUncachedInstr); - else mux2 #(32) UncachedShiftInstrMux(FetchBuffer[32-1:0], {16'b0, FetchBuffer[32-1:16]}, PCSpillF[1], ShiftUncachedInstr); assign IFUCacheBusStallF = ICacheStallF | BusStall; assign IFUStallF = IFUCacheBusStallF | SelSpillNextF; diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index c4e0e009e..cb9cd0722 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -383,7 +383,8 @@ module lsu import cvw::*; #(parameter cvw_t P) ( assign {DCacheStallM, DCacheCommittedM, DCacheMiss, DCacheAccess} = '0; end end else begin: nobus // block: bus, only DTIM - assign LSUHWDATA = '0; + assign {LSUHWDATA, LSUHADDR, LSUHWRITE, LSUHSIZE, LSUHBURST, LSUHTRANS, LSUHWSTRB} = '0; + assign DCacheReadDataWordM = '0; assign ReadDataWordMuxM = DTIMReadDataWordM; assign {BusStall, BusCommittedM} = '0; assign {DCacheMiss, DCacheAccess} = '0; diff --git a/src/lsu/subwordread.sv b/src/lsu/subwordread.sv index a5ccd12bf..a0e1bfc2f 100644 --- a/src/lsu/subwordread.sv +++ b/src/lsu/subwordread.sv @@ -52,6 +52,7 @@ module subwordread import cvw::*; #(parameter cvw_t P) ( // Use indexed part select to imply muxes to select each size of subword if (P.LLEN == 128) mux2 #(64) dblmux(ReadDataWordMuxM[63:0], ReadDataWordMuxM[127:64], PAdrSwapM[3], DblWordM); else if (P.LLEN == 64) assign DblWordM = ReadDataWordMuxM; + else assign DblWordM = '0; // unused for RV32F if (P.LLEN >= 64) mux2 #(32) wordmux(DblWordM[31:0], DblWordM[63:32], PAdrSwapM[2], WordM); else assign WordM = ReadDataWordMuxM; mux2 #(16) halfwordmux(WordM[15:0], WordM[31:16], PAdrSwapM[1], HalfwordM); diff --git a/src/wally/wallypipelinedcore.sv b/src/wally/wallypipelinedcore.sv index 860f98559..348ed6872 100644 --- a/src/wally/wallypipelinedcore.sv +++ b/src/wally/wallypipelinedcore.sv @@ -264,6 +264,9 @@ module wallypipelinedcore import cvw::*; #(parameter cvw_t P) ( .HREADY, .HRESP, .HCLK, .HRESETn, .HADDR, .HWDATA, .HWSTRB, .HWRITE, .HSIZE, .HBURST, .HPROT, .HTRANS, .HMASTLOCK); + end else begin + assign {IFUHREADY, LSUHREADY, HCLK, HRESETn, HADDR, HWDATA, + HWSTRB, HWRITE, HSIZE, HBURST, HPROT, HTRANS, HMASTLOCK} = '0; end // global stall and flush control @@ -302,15 +305,12 @@ module wallypipelinedcore import cvw::*; #(parameter cvw_t P) ( .PMPCFG_ARRAY_REGW, .PMPADDR_ARRAY_REGW, .FRM_REGW, .ENVCFG_CBE, .ENVCFG_PBMTE, .ENVCFG_ADUE, .wfiM, .IntPendingM, .BigEndianM); end else begin - assign CSRReadValW = '0; - assign EPCM = '0; - assign TrapVectorM = '0; - assign RetM = 1'b0; - assign TrapM = 1'b0; - assign wfiM = 1'b0; - assign IntPendingM = 1'b0; - assign sfencevmaM = 1'b0; - assign BigEndianM = 1'b0; + assign {CSRReadValW, PrivilegeModeW, + SATP_REGW, STATUS_MXR, STATUS_SUM, STATUS_MPRV, STATUS_MPP, STATUS_FS, FRM_REGW, + // PMPCFG_ARRAY_REGW, PMPADDR_ARRAY_REGW, + ENVCFG_CBE, ENVCFG_PBMTE, ENVCFG_ADUE, + EPCM, TrapVectorM, RetM, TrapM, + sfencevmaM, BigEndianM, wfiM, IntPendingM} = '0; end // multiply/divide unit @@ -351,15 +351,9 @@ module wallypipelinedcore import cvw::*; #(parameter cvw_t P) ( .SetFflagsM, // FPU flags (to privileged unit) .FIntDivResultW); end else begin // no F_SUPPORTED or D_SUPPORTED; tie outputs low - assign FPUStallD = 1'b0; - assign FWriteIntE = 1'b0; - assign FCvtIntE = 1'b0; - assign FIntResM = '0; - assign FCvtIntW = 1'b0; - assign FDivBusyE = 1'b0; - assign IllegalFPUInstrD = 1'b1; - assign SetFflagsM = '0; - assign FpLoadStoreM = 1'b0; + assign {FPUStallD, FWriteIntE, FCvtIntE, FIntResM, FCvtIntW, + IllegalFPUInstrD, SetFflagsM, FpLoadStoreM, + FWriteDataM, FCvtIntResW, FIntDivResultW, FDivBusyE} = '0; end endmodule diff --git a/src/wally/wallypipelinedsoc.sv b/src/wally/wallypipelinedsoc.sv index 8e223f858..4489646fe 100644 --- a/src/wally/wallypipelinedsoc.sv +++ b/src/wally/wallypipelinedsoc.sv @@ -85,6 +85,9 @@ module wallypipelinedsoc import cvw::*; #(parameter cvw_t P) ( .HREADYEXT, .HRESPEXT, .HRDATA, .HREADY, .HRESP, .HSELEXT, .HSELEXTSDC, .MTimerInt, .MSwInt, .MExtInt, .SExtInt, .GPIOIN, .GPIOOUT, .GPIOEN, .UARTSin, .UARTSout, .MTIME_CLINT, .SDCIntr, .SPIIn, .SPIOut, .SPICS); + end else begin + assign {HRDATA, HREADY, HRESP, HSELEXT, HSELEXTSDC, MTimerInt, MSwInt, MExtInt, SExtInt, + MTIME_CLINT, GPIOOUT, GPIOEN, UARTSout, SPIOut, SPICS} = '0; end endmodule diff --git a/synthDC/extractSummary.py b/synthDC/extractSummary.py index 150f20efb..354c6f70f 100755 --- a/synthDC/extractSummary.py +++ b/synthDC/extractSummary.py @@ -33,9 +33,9 @@ def synthsintocsv(): for oneSynth in allSynths: descrip = specReg.findall(oneSynth) - print("From " + oneSynth + " Find ") - for d in descrip: - print(d) +# print("From " + oneSynth + " Find ") +# for d in descrip: +# print(d) if (descrip[3] == "sram"): base = 4 else: @@ -54,7 +54,7 @@ def synthsintocsv(): for phrase in ['Path Slack', 'Design Area']: bashCommand = 'grep "{}" '+ oneSynth[2:]+'/reports/*qor*' bashCommand = bashCommand.format(phrase) - print(bashCommand) +# print(bashCommand) try: output = subprocess.check_output(['bash','-c', bashCommand]) nums = metricReg.findall(str(output)) diff --git a/synthDC/wallySynthAll.sh b/synthDC/wallySynthAll.sh index 9af40a379..f235c73e3 100755 --- a/synthDC/wallySynthAll.sh +++ b/synthDC/wallySynthAll.sh @@ -1,14 +1,21 @@ # Run all Wally synthesis experiments from chapter 8 # However, trying to run the freqsweeps at the same time maxes out licenses and some runs fail -#./wallySynth.py --freqsweep 330 --tech sky130 -#./wallySynth.py --freqsweep 870 --tech sky90 -#./wallySynth.py --freqsweep 2800 --tech tsmc28psyn --usesram +# Adding the sleep gives them time to finish. +./wallySynth.py --freqsweep 330 --tech sky130 +sleep 300 +./wallySynth.py --freqsweep 870 --tech sky90 +sleep 300 +./wallySynth.py --freqsweep 2800 --tech tsmc28psyn --usesram +sleep 300 + +# These jobs can run in parallel and take longer ./wallySynth.py --configsweep --tech sky130 --targetfreq 330 ./wallySynth.py --configsweep --tech sky90 --targetfreq 870 ./wallySynth.py --configsweep --tech tsmc28psyn --targetfreq 2800 --usesram ./wallySynth.py --featuresweep --tech sky130 --targetfreq 330 ./wallySynth.py --featuresweep --tech sky90 --targetfreq 870 ./wallySynth.py --featuresweep --tech tsmc28psyn --targetfreq 2800 --usesram -# Extract summary data (run this by hand after all experiments finish) -#./extractSummary.py --sky130freq 330 --sky90freq 870 --tsmcfreq 2800 + +# Extract summary data (run this by hand after all experiments finish) +./extractSummary.py --sky130freq 330 --sky90freq 870 --tsmcfreq 2800