diff --git a/.gitignore b/.gitignore index 41ccba5f9..b4223b50e 100644 --- a/.gitignore +++ b/.gitignore @@ -62,6 +62,7 @@ examples/fp/fpcalc/fpcalc examples/C/inline/inline examples/C/sum_mixed/sum_mixed examples/asm/trap/trap +examples/asm/etc/pause src/fma/fma16_testgen linux/devicetree/debug/* !linux/devicetree/debug/dump-dts.sh @@ -82,6 +83,7 @@ synthDC/ppa/plots synthDC/wallyplots/ synthDC/runArchive synthDC/hdl +synthDC/wrappers sim/power.saif tests/fp/vectors/*.tv synthDC/Summary.csv diff --git a/bin/CModelBTBAccuracy.sh b/bin/CModelBTBAccuracy.sh new file mode 100755 index 000000000..5cde4238c --- /dev/null +++ b/bin/CModelBTBAccuracy.sh @@ -0,0 +1,57 @@ +#!/bin/bash + +########################################### +## Written: ross1728@gmail.com +## Created: 23 October 2023 +## Modified: +## +## Purpose: Takes a directory of branch outcomes organized as 1 files per benchmark. +## Computes the geometric mean for btb accuracy +## +## A component of the CORE-V-WALLY configurable RISC-V project. +## +## Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +## +## SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +## +## Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +## except in compliance with the License, or, at your option, the Apache License version 2.0. You +## may obtain a copy of the License at +## +## https:##solderpad.org/licenses/SHL-2.1/ +## +## Unless required by applicable law or agreed to in writing, any work distributed under the +## License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +## either express or implied. See the License for the specific language governing permissions +## and limitations under the License. +################################################################################################ + + +Directory="$1" +Files="$1/*.log" + +for Size in $(seq 6 2 16) +do + Product=1.0 + Count=0 + BMDRArray=() + for File in $Files + do + lines=`sim_bp gshare 16 16 $Size 1 $File | tail -5` + Total=`echo "$lines" | head -1 | awk '{print $5}'` + Miss=`echo "$lines" | tail -2 | head -1 | awk '{print $8}'` + BMDR=`echo "$Miss / $Total" | bc -l` + BMDRArray+=("$BMDR") + if [ $Miss -eq 0 ]; then + Product=`echo "scale=200; $Product / $Total" | bc -l` + else + Product=`echo "scale=200; $Product * $Miss / $Total" | bc -l` + fi + Count=$((Count+1)) + done + # with such long precision bc outputs onto multiple lines + # must remove \n and \ from string + Product=`echo "$Product" | tr -d '\n' | tr -d '\\\'` + GeoMean=`perl -E "say $Product**(1/$Count)"` + echo "$Pred$Size $GeoMean" +done diff --git a/bin/libppa.pl b/bin/libppa.pl index 56b3702dd..ccf4f1548 100755 --- a/bin/libppa.pl +++ b/bin/libppa.pl @@ -41,6 +41,16 @@ my @cr; my @cf; my @rt; my @ft; # cell and corners to analyze my $libpath; my $libbase; my $cellname; my @corners; +# Sky130 +$libpath ="/opt/riscv/cad/lib/sky130_osu_sc_t12/12T_ms/lib"; +$libbase = "sky130_osu_sc_12T_ms_"; +$cellname = "sky130_osu_sc_12T_ms__inv_1"; +@corners = ("TT_1P8_25C.ccs", "tt_1P80_25C.ccs", "tt_1P62_25C.ccs", "tt_1P89_25C.ccs", "ss_1P60_-40C.ccs", "ss_1P60_100C.ccs", "ss_1P60_150C.ccs", "ff_1P95_-40C.ccs", "ff_1P95_100C.ccs", "ff_1P95_150C.ccs"); +printf("Library $libbase Cell $cellname\n"); +foreach my $corner (@corners) { + &analyzeCell($corner); +} + # Sky90 $libpath ="/opt/riscv/cad/lib/sky90/sky90_sc/V1.7.4/lib"; $libbase = "scc9gena_"; @@ -54,7 +64,7 @@ foreach my $corner (@corners) { # TSMC $libpath = "/proj/models/tsmc28/libraries/28nmtsmc/tcbn28hpcplusbwp30p140_190a/TSMCHOME/digital/Front_End/timing_power_noise/NLDM/tcbn28hpcplusbwp30p140_180a"; $libbase = "tcbn28hpcplusbwp30p140"; -$cellname = "INVD1..."; // replace this with the full name of the library cell +$cellname = "INVD1..."; # replace this with the full name of the library cell @corners = ("tt0p9v25c", "tt0p8v25c", "tt1v25c", "tt0p9v85c", "ssg0p9vm40c", "ssg0p9v125c", "ssg0p81vm40c", "ssg0p81v125c", "ffg0p88vm40c", "ffg0p88v125c", "ffg0p99vm40c", "ffg0p99v125c"); printf("\nLibrary $libbase Cell $cellname\n"); foreach my $corner (@corners) { @@ -129,7 +139,7 @@ sub analyzeCell { my $delay = &computeDelay($cap); my $cornerr = sprintf("%20s", $corner); my $delayr = sprintf("%2.1f", $delay*1000); - my $leakager = sprintf("%3.1f", $leakage); + my $leakager = sprintf("%3.3f", $leakage); print("$cornerr: Delay $delayr Leakage: $leakager capacitance: $cap\n"); #print("$cellname $corner: Area $area Leakage: $leakage capacitance: $cap delay $delay\n"); diff --git a/bin/wally-tool-chain-install.sh b/bin/wally-tool-chain-install.sh index 7ff470f17..26dd54c8d 100755 --- a/bin/wally-tool-chain-install.sh +++ b/bin/wally-tool-chain-install.sh @@ -167,3 +167,7 @@ sudo ln -sf $RISCV/sail-riscv/c_emulator/riscv_sim_RV32 /usr/bin/riscv_sim_RV32 sudo pip3 install testresources pip3 install git+https://github.com/riscv/riscof.git +# Download OSU Skywater 130 cell library +sudo mkdir -p $RISCV/cad/lib +cd $RISCV/cad/lib +sudo git clone https://foss-eda-tools.googlesource.com/skywater-pdk/libs/sky130_osu_sc_t12 diff --git a/config/rv32i/config.vh b/config/rv32i/config.vh index fa34d978f..aa7186761 100644 --- a/config/rv32i/config.vh +++ b/config/rv32i/config.vh @@ -36,7 +36,7 @@ localparam XLEN = 32'd32; localparam IEEE754 = 0; // I -localparam MISA = (32'h00000104); +localparam MISA = (32'h00000100); localparam ZICSR_SUPPORTED = 0; localparam ZIFENCEI_SUPPORTED = 0; localparam COUNTERS = 0; diff --git a/config/rv64i/config.vh b/config/rv64i/config.vh index 8fccc50f1..e547dca6f 100644 --- a/config/rv64i/config.vh +++ b/config/rv64i/config.vh @@ -36,7 +36,7 @@ localparam XLEN = 32'd64; localparam IEEE754 = 0; // MISA RISC-V configuration per specification -localparam MISA = (32'h00000104); +localparam MISA = (32'h00000100); localparam ZICSR_SUPPORTED = 0; localparam ZIFENCEI_SUPPORTED = 0; localparam COUNTERS = 0; diff --git a/config/shared/config-shared.vh b/config/shared/config-shared.vh index 54a6675ee..48f02b848 100644 --- a/config/shared/config-shared.vh +++ b/config/shared/config-shared.vh @@ -24,6 +24,7 @@ localparam SV48 = 4'd9; localparam A_SUPPORTED = ((MISA >> 0) % 2 == 1); localparam B_SUPPORTED = ((ZBA_SUPPORTED | ZBB_SUPPORTED | ZBC_SUPPORTED | ZBS_SUPPORTED));// not based on MISA localparam C_SUPPORTED = ((MISA >> 2) % 2 == 1); +localparam COMPRESSED_SUPPORTED = C_SUPPORTED | ZCA_SUPPORTED; localparam D_SUPPORTED = ((MISA >> 3) % 2 == 1); localparam E_SUPPORTED = ((MISA >> 4) % 2 == 1); localparam F_SUPPORTED = ((MISA >> 5) % 2 == 1); diff --git a/config/shared/parameter-defs.vh b/config/shared/parameter-defs.vh index b24be045e..d04b35e56 100644 --- a/config/shared/parameter-defs.vh +++ b/config/shared/parameter-defs.vh @@ -123,6 +123,7 @@ localparam cvw_t P = '{ A_SUPPORTED : A_SUPPORTED, B_SUPPORTED : B_SUPPORTED, C_SUPPORTED : C_SUPPORTED, + COMPRESSED_SUPPORTED : COMPRESSED_SUPPORTED, D_SUPPORTED : D_SUPPORTED, E_SUPPORTED : E_SUPPORTED, F_SUPPORTED : F_SUPPORTED, diff --git a/examples/asm/etc/Makefile b/examples/asm/etc/Makefile new file mode 100644 index 000000000..72f99e975 --- /dev/null +++ b/examples/asm/etc/Makefile @@ -0,0 +1,11 @@ +TARGET = pause + +$(TARGET).objdump: $(TARGET) + riscv64-unknown-elf-objdump -D $(TARGET) > $(TARGET).objdump + +pause: pause.S Makefile + riscv64-unknown-elf-gcc -o pause -march=rv32ia_zihintpause -mabi=ilp32 -mcmodel=medany \ + -nostartfiles -T../../link/link.ld pause.S + +clean: + rm -f $(TARGET) $(TARGET).objdump diff --git a/examples/asm/etc/pause.S b/examples/asm/etc/pause.S new file mode 100644 index 000000000..4e0aacfb4 --- /dev/null +++ b/examples/asm/etc/pause.S @@ -0,0 +1,25 @@ +.section .text.init +.globl rvtest_entry_point +rvtest_entry_point: + + +la a0, lock + +spinlock: # address of lock is in a0 + lr.w t0, (a0) # read the lock + bnez t0, retry # spin until free + li t1, 1 + sc.w t0, t1, (a0) # try to write a 1 to take lock + bnez t0, retry # spin until successful + ret # got the lock! +retry: # no lock yet + pause # pause hint to reduce spin power + j spinlock # try again + + +self_loop: + j self_loop + +.data +lock: + .word 1 \ No newline at end of file diff --git a/sim/bpred-sim.py b/sim/bpred-sim.py index 9a59e8866..209e21fc4 100755 --- a/sim/bpred-sim.py +++ b/sim/bpred-sim.py @@ -11,6 +11,7 @@ # ################################## import sys,os,shutil +import argparse class bcolors: HEADER = '\033[95m' @@ -46,55 +47,6 @@ configs = [ ) ] -# bpdSize = [6, 8, 10, 12, 14, 16] -# bpdType = ['twobit', 'gshare', 'global', 'gshare_basic', 'global_basic', 'local_basic'] -# for CurrBPType in bpdType: -# for CurrBPSize in bpdSize: -# name = CurrBPType+str(CurrBPSize) -# configOptions = "+define+INSTR_CLASS_PRED=0 +define+BPRED_OVERRIDE +define+BPRED_TYPE=" + str(bpdType.index(CurrBPType)) + "+define+BPRED_SIZE=" + str(CurrBPSize) -# tc = TestCase( -# name=name, -# variant="rv32gc", -# cmd="vsim > {} -c < {} -c < {} -c < {} -c < {} -c < {} -c < {} -c < {} -c <0; log2=log2+1) - value = value>>1; + int val; + val = value; + for (log2 = 0; val > 0; log2 = log2+1) + val = val >> 1; return log2; endfunction // log2 // coverage on diff --git a/src/cvw.sv b/src/cvw.sv index 4283172b3..4cbf67b28 100644 --- a/src/cvw.sv +++ b/src/cvw.sv @@ -201,6 +201,7 @@ typedef struct packed { logic A_SUPPORTED; logic B_SUPPORTED; logic C_SUPPORTED; + logic COMPRESSED_SUPPORTED; // C or ZCA logic D_SUPPORTED; logic E_SUPPORTED; logic F_SUPPORTED; diff --git a/src/ebu/ebufsmarb.sv b/src/ebu/ebufsmarb.sv index 91fa9e491..302c4752f 100644 --- a/src/ebu/ebufsmarb.sv +++ b/src/ebu/ebufsmarb.sv @@ -116,5 +116,5 @@ module ebufsmarb ( // 11 16 15 always_comb if (HBURST[2:1] == 2'b00) Threshold = 4'b0000; - else Threshold = (2 << HBURST[2:1]) - 1; + else Threshold = ('d2 << HBURST[2:1]) - 'd1; endmodule diff --git a/src/hazard/hazard.sv b/src/hazard/hazard.sv index 8efa454d9..cb70605c0 100644 --- a/src/hazard/hazard.sv +++ b/src/hazard/hazard.sv @@ -86,7 +86,7 @@ module hazard ( assign StallFCause = '0; assign StallDCause = (LoadStallD | StoreStallD | MDUStallD | CSRRdStallD | FCvtIntStallD | FPUStallD) & ~FlushDCause; assign StallECause = (DivBusyE | FDivBusyE) & ~FlushECause; - assign StallMCause = WFIStallM & ~FlushMCause; + assign StallMCause = WFIStallM & ~FlushMCause; // Need to gate IFUStallF when the equivalent FlushFCause = FlushDCause = 1. // assign StallWCause = ((IFUStallF & ~FlushDCause) | LSUStallM) & ~FlushWCause; // Because FlushWCause is a strict subset of FlushDCause, FlushWCause is factored out. diff --git a/src/ifu/bpred/icpred.sv b/src/ifu/bpred/icpred.sv index 65e60c59c..e4895d4b7 100644 --- a/src/ifu/bpred/icpred.sv +++ b/src/ifu/bpred/icpred.sv @@ -51,32 +51,32 @@ module icpred import cvw::*; #(parameter cvw_t P, // An alternative to using the BTB to store the instruction class is to partially decode // the instructions in the Fetch stage into, Call, Return, Jump, and Branch instructions. // This logic is not described in the text book as of 23 February 2023. - logic ccall, cj, cjr, ccallr, CJumpF, CBranchF; + logic cjal, cj, cjr, cjalr, CJumpF, CBranchF; logic NCJumpF, NCBranchF; - if(P.C_SUPPORTED) begin + if(P.COMPRESSED_SUPPORTED) begin logic [4:0] CompressedOpcF; assign CompressedOpcF = {PostSpillInstrRawF[1:0], PostSpillInstrRawF[15:13]}; - assign ccall = CompressedOpcF == 5'h09 & P.XLEN == 32; + assign cjal = CompressedOpcF == 5'h09 & P.XLEN == 32; assign cj = CompressedOpcF == 5'h0d; assign cjr = CompressedOpcF == 5'h14 & ~PostSpillInstrRawF[12] & PostSpillInstrRawF[6:2] == 5'b0 & PostSpillInstrRawF[11:7] != 5'b0; - assign ccallr = CompressedOpcF == 5'h14 & PostSpillInstrRawF[12] & PostSpillInstrRawF[6:2] == 5'b0 & PostSpillInstrRawF[11:7] != 5'b0; - assign CJumpF = ccall | cj | cjr | ccallr; + assign cjalr = CompressedOpcF == 5'h14 & PostSpillInstrRawF[12] & PostSpillInstrRawF[6:2] == 5'b0 & PostSpillInstrRawF[11:7] != 5'b0; + assign CJumpF = cjal | cj | cjr | cjalr; assign CBranchF = CompressedOpcF[4:1] == 4'h7; end else begin - assign {ccall, cj, cjr, ccallr, CJumpF, CBranchF} = '0; + assign {cjal, cj, cjr, cjalr, CJumpF, CBranchF} = '0; end assign NCJumpF = PostSpillInstrRawF[6:0] == 7'h67 | PostSpillInstrRawF[6:0] == 7'h6F; assign NCBranchF = PostSpillInstrRawF[6:0] == 7'h63; - assign BPBranchF = NCBranchF | (P.C_SUPPORTED & CBranchF); - assign BPJumpF = NCJumpF | (P.C_SUPPORTED & (CJumpF)); - assign BPReturnF = (NCJumpF & (PostSpillInstrRawF[19:15] & 5'h1B) == 5'h01) | // returnurn must returnurn to ra or r5 - (P.C_SUPPORTED & (ccallr | cjr) & ((PostSpillInstrRawF[11:7] & 5'h1B) == 5'h01)); + assign BPBranchF = NCBranchF | (P.COMPRESSED_SUPPORTED & CBranchF); + assign BPJumpF = NCJumpF | (P.COMPRESSED_SUPPORTED & (CJumpF)); + assign BPReturnF = (NCJumpF & (PostSpillInstrRawF[19:15] & 5'h1B) == 5'h01 & PostSpillInstrRawF[11:7] == 5'b0) | // return must return to ra or r5 + (P.COMPRESSED_SUPPORTED & cjr & ((PostSpillInstrRawF[11:7] & 5'h1B) == 5'h01)); assign BPCallF = (NCJumpF & (PostSpillInstrRawF[11:07] & 5'h1B) == 5'h01) | // call(r) must link to ra or x5 - (P.C_SUPPORTED & (ccall | (ccallr & (PostSpillInstrRawF[11:7] & 5'h1b) == 5'h01))); + (P.COMPRESSED_SUPPORTED & (cjal | (cjalr & (PostSpillInstrRawF[11:7] & 5'h1b) == 5'h01))); end else begin // This section connects the BTB's instruction class prediction. diff --git a/src/ifu/ifu.sv b/src/ifu/ifu.sv index af6f70898..a93b24f9d 100644 --- a/src/ifu/ifu.sv +++ b/src/ifu/ifu.sv @@ -144,7 +144,7 @@ module ifu import cvw::*; #(parameter cvw_t P) ( // Spill Support ///////////////////////////////////////////////////////////////////////////////////////////// - if(P.C_SUPPORTED) begin : Spill + if(P.COMPRESSED_SUPPORTED) begin : Spill spill #(P) spill(.clk, .reset, .StallD, .FlushD, .PCF, .PCPlus4F, .PCNextF, .InstrRawF, .InstrUpdateDAF, .CacheableF, .IFUCacheBusStallF, .ITLBMissF, .PCSpillNextF, .PCSpillF, .SelSpillNextF, .PostSpillInstrRawF, .CompressedF); end else begin : NoSpill @@ -340,8 +340,21 @@ module ifu import cvw::*; #(parameter cvw_t P) ( end else begin : bpred mux2 #(P.XLEN) pcmux1(.d0(PCPlus2or4F), .d1(IEUAdrE), .s(PCSrcE), .y(PC1NextF)); + logic BranchM, JumpM, BranchW, JumpW; + logic CallD, CallE, CallM, CallW; + logic ReturnD, ReturnE, ReturnM, ReturnW; assign BPWrongE = PCSrcE; - assign {InstrClassM, BPDirPredWrongM, BTAWrongM, RASPredPCWrongM, IClassWrongM} = '0; + icpred #(P, 0) icpred(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, + .PostSpillInstrRawF, .InstrD, .BranchD, .BranchE, .JumpD, .JumpE, .BranchM, .BranchW, .JumpM, .JumpW, + .CallD, .CallE, .CallM, .CallW, .ReturnD, .ReturnE, .ReturnM, .ReturnW, + .BTBCallF(1'b0), .BTBReturnF(1'b0), .BTBJumpF(1'b0), + .BTBBranchF(1'b0), .BPCallF(), .BPReturnF(), .BPJumpF(), .BPBranchF(), .IClassWrongM, + .IClassWrongE(), .BPReturnWrongD()); + flopenrc #(1) PCSrcMReg(clk, reset, FlushM, ~StallM, PCSrcE, BPWrongM); + assign RASPredPCWrongM = '0; + assign BPDirPredWrongM = BPWrongM; + assign BTAWrongM = BPWrongM; + assign InstrClassM = {CallM, ReturnM, JumpM, BranchM}; assign NextValidPCE = PCE; end @@ -353,7 +366,7 @@ module ifu import cvw::*; #(parameter cvw_t P) ( flopenrc #(P.XLEN) PCDReg(clk, reset, FlushD, ~StallD, PCF, PCD); // expand 16-bit compressed instructions to 32 bits - if (P.C_SUPPORTED | P.ZCA_SUPPORTED) begin + if (P.COMPRESSED_SUPPORTED) begin logic IllegalCompInstrD; decompress #(P) decomp(.InstrRawD, .InstrD, .IllegalCompInstrD); assign IllegalIEUInstrD = IllegalBaseInstrD | IllegalCompInstrD; // illegal if bad 32 or 16-bit instr @@ -373,7 +386,7 @@ module ifu import cvw::*; #(parameter cvw_t P) ( // only IALIGN=32, the two low bits (mepc[1:0]) are always zero. // Spec 3.1.14 // Traps: Can’t happen. The bottom two bits of MTVEC are ignored so the trap always is to a multiple of 4. See 3.1.7 of the privileged spec. - assign BranchMisalignedFaultE = (IEUAdrE[1] & ~P.C_SUPPORTED) & PCSrcE; + assign BranchMisalignedFaultE = (IEUAdrE[1] & ~P.COMPRESSED_SUPPORTED) & PCSrcE; flopenr #(1) InstrMisalignedReg(clk, reset, ~StallM, BranchMisalignedFaultE, InstrMisalignedFaultM); // Instruction and PC/PCLink pipeline registers @@ -389,7 +402,7 @@ module ifu import cvw::*; #(parameter cvw_t P) ( flopenrc #(1) CompressedDReg(clk, reset, FlushD, ~StallD, CompressedF, CompressedD); flopenrc #(1) CompressedEReg(clk, reset, FlushE, ~StallE, CompressedD, CompressedE); - assign PCLinkE = PCE + (CompressedE ? 2 : 4); + assign PCLinkE = PCE + (CompressedE ? 'd2 : 'd4); // 'd4 means 4 but stops Design Compiler complaining about signed to unsigned conversion // pipeline original compressed instruction in case it is needed for MTVAL on an illegal instruction exception flopenrc #(16) InstrRawEReg(clk, reset, FlushE, ~StallE, InstrRawD[15:0], InstrRawE); diff --git a/src/lsu/swbytemask.sv b/src/lsu/swbytemask.sv index ad20a4414..d8c4ed167 100644 --- a/src/lsu/swbytemask.sv +++ b/src/lsu/swbytemask.sv @@ -33,7 +33,7 @@ module swbytemask #(parameter WORDLEN)( output logic [WORDLEN/8-1:0] ByteMask ); - assign ByteMask = ((2**(2**Size))-1) << Adr; + assign ByteMask =(('d2**('d2**Size))-'d1) << Adr; // 'd2 means 2, but stops Design Compiler from complaining about signed to unsigned conversion /* Equivalent to the following diff --git a/src/privileged/csr.sv b/src/privileged/csr.sv index 4cdce4989..7b590c077 100644 --- a/src/privileged/csr.sv +++ b/src/privileged/csr.sv @@ -202,8 +202,8 @@ module csr import cvw::*; #(parameter cvw_t P) ( /////////////////////////////////////////// assign CSRAdrM = InstrM[31:20]; - assign UnalignedNextEPCM = TrapM ? ((wfiM & IntPendingM) ? PCM+4 : PCM) : CSRWriteValM; - assign NextEPCM = P.C_SUPPORTED ? {UnalignedNextEPCM[P.XLEN-1:1], 1'b0} : {UnalignedNextEPCM[P.XLEN-1:2], 2'b00}; // 3.1.15 alignment + assign UnalignedNextEPCM = TrapM ? PCM : CSRWriteValM; + assign NextEPCM = P.COMPRESSED_SUPPORTED ? {UnalignedNextEPCM[P.XLEN-1:1], 1'b0} : {UnalignedNextEPCM[P.XLEN-1:2], 2'b00}; // 3.1.15 alignment assign NextCauseM = TrapM ? {InterruptM, CauseM}: {CSRWriteValM[P.XLEN-1], CSRWriteValM[3:0]}; assign NextMtvalM = TrapM ? NextFaultMtvalM : CSRWriteValM; assign UngatedCSRMWriteM = CSRWriteM & (PrivilegeModeW == P.M_MODE); diff --git a/src/privileged/csrm.sv b/src/privileged/csrm.sv index 6e5a49c80..2d714bf6a 100644 --- a/src/privileged/csrm.sv +++ b/src/privileged/csrm.sv @@ -94,7 +94,8 @@ module csrm import cvw::*; #(parameter cvw_t P) ( localparam DSCRATCH1 = 12'h7B3; // Constants localparam ZERO = {(P.XLEN){1'b0}}; - localparam MEDELEG_MASK = 16'hB3FF; + // when compressed instructions are supported, there can't be misaligned instructions + localparam MEDELEG_MASK = P.COMPRESSED_SUPPORTED ? 16'hB3FE : 16'hB3FF; localparam MIDELEG_MASK = 12'h222; // we choose to not make machine interrupts delegable // There are PMP_ENTRIES = 0, 16, or 64 PMPADDR registers, each of which has its own flop diff --git a/src/privileged/privdec.sv b/src/privileged/privdec.sv index a0195f366..eb17b8d04 100644 --- a/src/privileged/privdec.sv +++ b/src/privileged/privdec.sv @@ -29,7 +29,7 @@ module privdec import cvw::*; #(parameter cvw_t P) ( input logic clk, reset, - input logic StallM, + input logic StallM, StallW, FlushW, input logic [31:15] InstrM, // privileged instruction function field input logic PrivilegedM, // is this a privileged instruction (from IEU controller) input logic IllegalIEUFPUInstrM, // Not a legal IEU instruction @@ -39,7 +39,7 @@ module privdec import cvw::*; #(parameter cvw_t P) ( output logic IllegalInstrFaultM, // Illegal instruction output logic EcallFaultM, BreakpointFaultM, // Ecall or breakpoint; must retire, so don't flush it when the trap occurs output logic sretM, mretM, // return instructions - output logic wfiM, sfencevmaM // wfi / sfence.vma / sinval.vma instructions + output logic wfiM, wfiW, sfencevmaM // wfi / sfence.vma / sinval.vma instructions ); logic rs1zeroM; // rs1 field = 0 @@ -86,6 +86,8 @@ module privdec import cvw::*; #(parameter cvw_t P) ( // coverage on end else assign WFITimeoutM = 0; + flopenrc #(1) wfiWReg(clk, reset, FlushW, ~StallW, wfiM, wfiW); + /////////////////////////////////////////// // Extract exceptions by name and handle them /////////////////////////////////////////// diff --git a/src/privileged/privileged.sv b/src/privileged/privileged.sv index fff4af8b8..d777e0bf9 100644 --- a/src/privileged/privileged.sv +++ b/src/privileged/privileged.sv @@ -115,15 +115,17 @@ module privileged import cvw::*; #(parameter cvw_t P) ( logic ExceptionM; // Memory stage instruction caused a fault logic HPTWInstrAccessFaultM; // Hardware page table access fault while fetching instruction PTE + logic wfiW; + // track the current privilege level privmode #(P) privmode(.clk, .reset, .StallW, .TrapM, .mretM, .sretM, .DelegateM, .STATUS_MPP, .STATUS_SPP, .NextPrivilegeModeM, .PrivilegeModeW); // decode privileged instructions - privdec #(P) pmd(.clk, .reset, .StallM, .InstrM(InstrM[31:15]), + privdec #(P) pmd(.clk, .reset, .StallM, .StallW, .FlushW, .InstrM(InstrM[31:15]), .PrivilegedM, .IllegalIEUFPUInstrM, .IllegalCSRAccessM, .PrivilegeModeW, .STATUS_TSR, .STATUS_TVM, .STATUS_TW, .IllegalInstrFaultM, - .EcallFaultM, .BreakpointFaultM, .sretM, .mretM, .wfiM, .sfencevmaM); + .EcallFaultM, .BreakpointFaultM, .sretM, .mretM, .wfiM, .wfiW, .sfencevmaM); // Control and Status Registers csr #(P) csr(.clk, .reset, .FlushM, .FlushW, .StallE, .StallM, .StallW, @@ -156,5 +158,5 @@ module privileged import cvw::*; #(parameter cvw_t P) ( .mretM, .sretM, .PrivilegeModeW, .MIP_REGW, .MIE_REGW, .MIDELEG_REGW, .MEDELEG_REGW, .STATUS_MIE, .STATUS_SIE, .InstrValidM, .CommittedM, .CommittedF, - .TrapM, .RetM, .wfiM, .InterruptM, .ExceptionM, .IntPendingM, .DelegateM, .CauseM); + .TrapM, .RetM, .wfiM, .wfiW, .InterruptM, .ExceptionM, .IntPendingM, .DelegateM, .CauseM); endmodule diff --git a/src/privileged/trap.sv b/src/privileged/trap.sv index bcde634de..bfbbeb65f 100644 --- a/src/privileged/trap.sv +++ b/src/privileged/trap.sv @@ -33,7 +33,7 @@ module trap import cvw::*; #(parameter cvw_t P) ( input logic LoadAccessFaultM, StoreAmoAccessFaultM, EcallFaultM, InstrPageFaultM, input logic LoadPageFaultM, StoreAmoPageFaultM, // various trap sources input logic mretM, sretM, // return instructions - input logic wfiM, // wait for interrupt instruction + input logic wfiM, wfiW, // wait for interrupt instruction input logic [1:0] PrivilegeModeW, // current privilege mode input logic [11:0] MIP_REGW, MIE_REGW, MIDELEG_REGW, // interrupt pending, enabled, and delegate CSRs input logic [15:0] MEDELEG_REGW, // exception delegation SR @@ -68,7 +68,8 @@ module trap import cvw::*; #(parameter cvw_t P) ( assign Committed = CommittedM | CommittedF; assign EnabledIntsM = ({12{MIntGlobalEnM}} & PendingIntsM & ~MIDELEG_REGW | {12{SIntGlobalEnM}} & PendingIntsM & MIDELEG_REGW); assign ValidIntsM = {12{~Committed}} & EnabledIntsM; - assign InterruptM = (|ValidIntsM) & InstrValidM; // suppress interrupt if the memory system has partially processed a request. + assign InterruptM = (|ValidIntsM) & InstrValidM & (~wfiM | wfiW); // suppress interrupt if the memory system has partially processed a request. Delay interrupt until wfi is in the W stage. + // wfiW is to support possible but unlikely back to back wfi instructions. wfiM would be high in the M stage, while also in the W stage. assign DelegateM = P.S_SUPPORTED & (InterruptM ? MIDELEG_REGW[CauseM] : MEDELEG_REGW[CauseM]) & (PrivilegeModeW == P.U_MODE | PrivilegeModeW == P.S_MODE); diff --git a/src/uncore/ahbapbbridge.sv b/src/uncore/ahbapbbridge.sv index 381297f00..454f4d5df 100644 --- a/src/uncore/ahbapbbridge.sv +++ b/src/uncore/ahbapbbridge.sv @@ -88,7 +88,6 @@ module ahbapbbridge import cvw::*; #(parameter cvw_t P, int i; always_comb begin // default: no peripheral selected: read 0, indicate ready during access phase so bus doesn't hang - // *** also could assert ready right away HRDATA = 0; PREADYOUT = 1'b1; for (i=0; i