Merge pull request #760 from davidharrishmc/dev

Synthesis and VCS fixes
This commit is contained in:
Rose Thompson 2024-04-29 13:16:38 -05:00 committed by GitHub
commit d196f8f8af
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
14 changed files with 79 additions and 55 deletions

1
.gitignore vendored
View File

@ -188,6 +188,7 @@ sim/cfi/*
sim/branch/*
sim/obj_dir
examples/verilog/fulladder/obj_dir
examples/verilog/fulladder/fulladder.vcd
config/deriv
docs/docker/buildroot-config-src
docs/docker/testvector-generation

View File

@ -274,6 +274,7 @@ os.chdir(regressionDir)
coveragesim = "questa" # Questa is required for code/functional coverage
defaultsim = "questa" # Default simulator for all other tests; change to Verilator when flow is ready
#defaultsim = "verilator" # Default simulator for all other tests
coverage = '--coverage' in sys.argv
fp = '--fp' in sys.argv
@ -299,9 +300,9 @@ configs = [
TestCase(
name="lints",
variant="all",
cmd="lint-wally " + nightMode + " | tee " + WALLY + "/sim/questa/logs/all_lints.log",
cmd="lint-wally " + nightMode + " | tee " + WALLY + "/sim/verilator/logs/all_lints.log",
grepstr="lints run with no errors or warnings",
grepfile = WALLY + "/sim/questa/logs/all_lints.log")
grepfile = WALLY + "/sim/verilator/logs/all_lints.log")
]
if (coverage): # only run RV64GC tests on Questa in coverage mode

View File

@ -19,6 +19,8 @@ module testbench();
// at start of test, load vectors and pulse reset
initial
begin
$dumpfile("fulladder.vcd");
$dumpvars;
$readmemb("fulladder.tv", testvectors);
cycle = 0;
vectornum = 0; errors = 0;
@ -47,6 +49,7 @@ module testbench();
$finish;
end
end
endmodule
module fulladder(input logic a, b, c,

View File

@ -1,5 +1,3 @@
#verilator --timescale "1ns/1ns" --timing -cc --exe --build --top-module testbench fulladder.sv
#verilator --timescale "1ns/1ns" --timing -cc --exe --top-module testbench fulladder.sv
#verilator --binary --top-module testbench fulladder.sv
verilator --timescale "1ns/1ns" --timing --binary --top-module testbench fulladder.sv
verilator --binary --top-module testbench --trace fulladder.sv
obj_dir/Vtestbench

View File

@ -30,6 +30,10 @@ DEPENDENCIES=${WALLY}/config/shared/*.vh $(SOURCES)
default: run
run: wkdir/$(WALLYCONF)_$(TEST)/Vtestbench
mkdir -p $(VERILATOR_DIR)/logs
wkdir/$(WALLYCONF)_$(TEST)/Vtestbench +TEST=$(TEST)
profile: obj_dir_profiling/Vtestbench_$(WALLYCONF)
$(VERILATOR_DIR)/obj_dir_profiling/Vtestbench_$(WALLYCONF) +TEST=$(TEST)
mv gmon.out gmon_$(WALLYCONF).out
@ -39,17 +43,13 @@ profile: obj_dir_profiling/Vtestbench_$(WALLYCONF)
mv gmon_$(WALLYCONF)* $(VERILATOR_DIR)/logs_profiling
echo "Please check $(VERILATOR_DIR)/logs_profiling/gmon_$(WALLYCONF)* for logs and output files."
run: wkdir/$(WALLYCONF)_$(TEST)/Vtestbench
mkdir -p $(VERILATOR_DIR)/logs
wkdir/$(WALLYCONF)_$(TEST)/Vtestbench +TEST=$(TEST)
wkdir/$(WALLYCONF)_$(TEST)/Vtestbench: $(DEPENDENCIES)
verilator \
--Mdir wkdir/$(WALLYCONF)_$(TEST) -o Vtestbench \
--binary --trace \
$(OPT) $(PARAMS) $(NONPROF) \
$(EXTRA_ARGS) \
--timescale "1ns/1ns" --timing --top-module testbench --relative-includes \
--top-module testbench --relative-includes \
$(INCLUDE_PATH) \
${WALLY}/sim/verilator/wrapper.c \
$(SOURCES)
@ -61,13 +61,10 @@ obj_dir_profiling/Vtestbench_$(WALLYCONF): $(DEPENDENCIES)
--binary \
--prof-cfuncs $(OPT) $(PARAMS) \
$(EXTRA_ARGS) \
--timescale "1ns/1ns" --timing --top-module testbench --relative-includes \
--top-module testbench --relative-includes \
$(INCLUDE_PATH) \
${WALLY}/sim/verilator/wrapper.c \
$(SOURCES)
questa:
vsim -c -do "do ${WALLY}/sim/wally-batch.do $(WALLYCONF) $(TEST)"
clean:
rm -rf $(VERILATOR_DIR)/wkdir $(VERILATOR_DIR)/obj_dir_profiling $(VERILATOR_DIR)/logs $(VERILATOR_DIR)/logs_profiling

View File

@ -162,7 +162,9 @@ module fpu import cvw::*; #(parameter cvw_t P) (
logic StallUnpackedM; // Stall unpacker outputs during multicycle fdivsqrt
logic [P.FLEN-1:0] SgnExtXE; // Sign-extended X input for move to integer
logic mvsgn; // sign bit for extending move
logic [P.FLEN-1:0] FliResE; // Floating-point load immediate value
logic [P.FLEN-1:0] FliResE; // Zfa Floating-point load immediate value
logic [P.FLEN-1:0] FRoundE; // Zfa fround output
logic [4:0] FRoundFlagsE; // Zfa fround flags
//////////////////////////////////////////////////////////////////////////////////////////
// Decode Stage: fctrl decoder, read register file
@ -267,15 +269,25 @@ module fpu import cvw::*; #(parameter cvw_t P) (
.ToInt(FWriteIntE), .XZero(XZeroE), .Fmt(FmtE), .Ce(CeE), .ShiftAmt(CvtShiftAmtE),
.ResSubnormUf(CvtResSubnormUfE), .Cs(CsE), .IntZero(IntZeroE), .LzcIn(CvtLzcInE));
// floating-point load immediate: fli
// ZFA: fround and floating-point load immediate fli
if (P.ZFA_SUPPORTED) begin
logic [4:0] Rs1E;
logic [1:0] Fmt2E; // Two-bit format field from instruction
// fround
fround #(P) fround(.Xs(XsE), .Xe(XeE), .Xm(XmE),
.XNaN(XNaNE), .XSNaN(XSNaNE), .XZero(XZeroE), .Fmt(FmtE),
.FRound(FRoundE), .FRoundFlags(FRoundFlagsE));
// fli
flopenrc #(5) Rs1EReg(clk, reset, FlushE, ~StallE, InstrD[19:15], Rs1E);
flopenrc #(2) Fmt2EReg(clk, reset, FlushE, ~StallE, InstrD[26:25], Fmt2E);
fli #(P) fli(.Rs1(Rs1E), .Fmt(Fmt2E), .Imm(FliResE));
end else assign FliResE = '0;
end else begin
assign FRoundE = '0;
assign FRoundFlagsE = '0;
assign FliResE = '0;
end
// fmv.*.x: NaN Box SrcA to extend integer to requested FP size
if(P.FPSIZES == 1)

View File

@ -34,10 +34,11 @@ module fround import cvw::*; #(parameter cvw_t P) (
input logic XNaN, // X is NaN
input logic XSNaN, // X is Signalling NaN
input logic XZero, // X is Zero
input logic [P.FMTBITS-1:0] Fmt // the input's precision (11=quad 01=double 00=single 10=half)
input logic [P.FMTBITS-1:0] Fmt, // the input's precision (11=quad 01=double 00=single 10=half)
output logic [P.FLEN-1:0] FRound, // Rounded result
output logic [4:0] FRoundFlags // Rounder flags
);
logic [P.NE-2:0] Bias;
logic [P.NE-1:0] E;
logic [P.NF:0] Imask, Tmasknonneg, Tmaskneg, Tmask, HotE, HotEP1, Trunc, Rnd;
@ -171,4 +172,7 @@ module fround import cvw::*; #(parameter cvw_t P) (
assign Inexact = FRoundNX & ~(XNaN | Exact) & (Rp | T');
*/
assign FRound = '0;
assign FRoundFlags = '0;
endmodule

View File

@ -286,16 +286,18 @@ module ifu import cvw::*; #(parameter cvw_t P) (
assign IFUHBURST = 3'b0;
assign {ICacheMiss, ICacheAccess, ICacheStallF} = '0;
end
// mux between the alignments of uncached reads.
if(P.XLEN == 64) mux4 #(32) UncachedShiftInstrMux(FetchBuffer[32-1:0], FetchBuffer[48-1:16],
FetchBuffer[64-1:32], {16'b0, FetchBuffer[64-1:48]},
PCSpillF[2:1], ShiftUncachedInstr);
else mux2 #(32) UncachedShiftInstrMux(FetchBuffer[32-1:0], {16'b0, FetchBuffer[32-1:16]}, PCSpillF[1], ShiftUncachedInstr);
end else begin : nobus // block: bus
assign {BusStall, CacheCommittedF} = '0;
assign {IFUHADDR, IFUHWRITE, IFUHSIZE, IFUHBURST, IFUHTRANS,
BusStall, CacheCommittedF, BusCommittedF, FetchBuffer} = '0;
assign {ICacheStallF, ICacheMiss, ICacheAccess} = '0;
assign InstrRawF = IROMInstrF;
end
// mux between the alignments of uncached reads.
if(P.XLEN == 64) mux4 #(32) UncachedShiftInstrMux(FetchBuffer[32-1:0], FetchBuffer[48-1:16], FetchBuffer[64-1:32], {16'b0, FetchBuffer[64-1:48]},
PCSpillF[2:1], ShiftUncachedInstr);
else mux2 #(32) UncachedShiftInstrMux(FetchBuffer[32-1:0], {16'b0, FetchBuffer[32-1:16]}, PCSpillF[1], ShiftUncachedInstr);
assign IFUCacheBusStallF = ICacheStallF | BusStall;
assign IFUStallF = IFUCacheBusStallF | SelSpillNextF;

View File

@ -383,7 +383,8 @@ module lsu import cvw::*; #(parameter cvw_t P) (
assign {DCacheStallM, DCacheCommittedM, DCacheMiss, DCacheAccess} = '0;
end
end else begin: nobus // block: bus, only DTIM
assign LSUHWDATA = '0;
assign {LSUHWDATA, LSUHADDR, LSUHWRITE, LSUHSIZE, LSUHBURST, LSUHTRANS, LSUHWSTRB} = '0;
assign DCacheReadDataWordM = '0;
assign ReadDataWordMuxM = DTIMReadDataWordM;
assign {BusStall, BusCommittedM} = '0;
assign {DCacheMiss, DCacheAccess} = '0;

View File

@ -52,6 +52,7 @@ module subwordread import cvw::*; #(parameter cvw_t P) (
// Use indexed part select to imply muxes to select each size of subword
if (P.LLEN == 128) mux2 #(64) dblmux(ReadDataWordMuxM[63:0], ReadDataWordMuxM[127:64], PAdrSwapM[3], DblWordM);
else if (P.LLEN == 64) assign DblWordM = ReadDataWordMuxM;
else assign DblWordM = '0; // unused for RV32F
if (P.LLEN >= 64) mux2 #(32) wordmux(DblWordM[31:0], DblWordM[63:32], PAdrSwapM[2], WordM);
else assign WordM = ReadDataWordMuxM;
mux2 #(16) halfwordmux(WordM[15:0], WordM[31:16], PAdrSwapM[1], HalfwordM);

View File

@ -264,6 +264,9 @@ module wallypipelinedcore import cvw::*; #(parameter cvw_t P) (
.HREADY, .HRESP, .HCLK, .HRESETn,
.HADDR, .HWDATA, .HWSTRB, .HWRITE, .HSIZE, .HBURST,
.HPROT, .HTRANS, .HMASTLOCK);
end else begin
assign {IFUHREADY, LSUHREADY, HCLK, HRESETn, HADDR, HWDATA,
HWSTRB, HWRITE, HSIZE, HBURST, HPROT, HTRANS, HMASTLOCK} = '0;
end
// global stall and flush control
@ -302,15 +305,12 @@ module wallypipelinedcore import cvw::*; #(parameter cvw_t P) (
.PMPCFG_ARRAY_REGW, .PMPADDR_ARRAY_REGW,
.FRM_REGW, .ENVCFG_CBE, .ENVCFG_PBMTE, .ENVCFG_ADUE, .wfiM, .IntPendingM, .BigEndianM);
end else begin
assign CSRReadValW = '0;
assign EPCM = '0;
assign TrapVectorM = '0;
assign RetM = 1'b0;
assign TrapM = 1'b0;
assign wfiM = 1'b0;
assign IntPendingM = 1'b0;
assign sfencevmaM = 1'b0;
assign BigEndianM = 1'b0;
assign {CSRReadValW, PrivilegeModeW,
SATP_REGW, STATUS_MXR, STATUS_SUM, STATUS_MPRV, STATUS_MPP, STATUS_FS, FRM_REGW,
// PMPCFG_ARRAY_REGW, PMPADDR_ARRAY_REGW,
ENVCFG_CBE, ENVCFG_PBMTE, ENVCFG_ADUE,
EPCM, TrapVectorM, RetM, TrapM,
sfencevmaM, BigEndianM, wfiM, IntPendingM} = '0;
end
// multiply/divide unit
@ -351,15 +351,9 @@ module wallypipelinedcore import cvw::*; #(parameter cvw_t P) (
.SetFflagsM, // FPU flags (to privileged unit)
.FIntDivResultW);
end else begin // no F_SUPPORTED or D_SUPPORTED; tie outputs low
assign FPUStallD = 1'b0;
assign FWriteIntE = 1'b0;
assign FCvtIntE = 1'b0;
assign FIntResM = '0;
assign FCvtIntW = 1'b0;
assign FDivBusyE = 1'b0;
assign IllegalFPUInstrD = 1'b1;
assign SetFflagsM = '0;
assign FpLoadStoreM = 1'b0;
assign {FPUStallD, FWriteIntE, FCvtIntE, FIntResM, FCvtIntW,
IllegalFPUInstrD, SetFflagsM, FpLoadStoreM,
FWriteDataM, FCvtIntResW, FIntDivResultW, FDivBusyE} = '0;
end
endmodule

View File

@ -85,6 +85,9 @@ module wallypipelinedsoc import cvw::*; #(parameter cvw_t P) (
.HREADYEXT, .HRESPEXT, .HRDATA, .HREADY, .HRESP, .HSELEXT, .HSELEXTSDC,
.MTimerInt, .MSwInt, .MExtInt, .SExtInt, .GPIOIN, .GPIOOUT, .GPIOEN, .UARTSin,
.UARTSout, .MTIME_CLINT, .SDCIntr, .SPIIn, .SPIOut, .SPICS);
end else begin
assign {HRDATA, HREADY, HRESP, HSELEXT, HSELEXTSDC, MTimerInt, MSwInt, MExtInt, SExtInt,
MTIME_CLINT, GPIOOUT, GPIOEN, UARTSout, SPIOut, SPICS} = '0;
end
endmodule

View File

@ -33,9 +33,9 @@ def synthsintocsv():
for oneSynth in allSynths:
descrip = specReg.findall(oneSynth)
print("From " + oneSynth + " Find ")
for d in descrip:
print(d)
# print("From " + oneSynth + " Find ")
# for d in descrip:
# print(d)
if (descrip[3] == "sram"):
base = 4
else:
@ -54,7 +54,7 @@ def synthsintocsv():
for phrase in ['Path Slack', 'Design Area']:
bashCommand = 'grep "{}" '+ oneSynth[2:]+'/reports/*qor*'
bashCommand = bashCommand.format(phrase)
print(bashCommand)
# print(bashCommand)
try:
output = subprocess.check_output(['bash','-c', bashCommand])
nums = metricReg.findall(str(output))

View File

@ -1,14 +1,21 @@
# Run all Wally synthesis experiments from chapter 8
# However, trying to run the freqsweeps at the same time maxes out licenses and some runs fail
#./wallySynth.py --freqsweep 330 --tech sky130
#./wallySynth.py --freqsweep 870 --tech sky90
#./wallySynth.py --freqsweep 2800 --tech tsmc28psyn --usesram
# Adding the sleep gives them time to finish.
./wallySynth.py --freqsweep 330 --tech sky130
sleep 300
./wallySynth.py --freqsweep 870 --tech sky90
sleep 300
./wallySynth.py --freqsweep 2800 --tech tsmc28psyn --usesram
sleep 300
# These jobs can run in parallel and take longer
./wallySynth.py --configsweep --tech sky130 --targetfreq 330
./wallySynth.py --configsweep --tech sky90 --targetfreq 870
./wallySynth.py --configsweep --tech tsmc28psyn --targetfreq 2800 --usesram
./wallySynth.py --featuresweep --tech sky130 --targetfreq 330
./wallySynth.py --featuresweep --tech sky90 --targetfreq 870
./wallySynth.py --featuresweep --tech tsmc28psyn --targetfreq 2800 --usesram
# Extract summary data (run this by hand after all experiments finish)
#./extractSummary.py --sky130freq 330 --sky90freq 870 --tsmcfreq 2800
# Extract summary data (run this by hand after all experiments finish)
./extractSummary.py --sky130freq 330 --sky90freq 870 --tsmcfreq 2800