From fd088f8ecd19fd7957ab94b490bf1db41665e377 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Sat, 3 Jul 2021 15:51:25 -0500 Subject: [PATCH 01/30] Added explicit names to lsu, lsuarb and pagetable walker to make the code refactoring process eaiser. --- wally-pipelined/src/lsu/lsu.sv | 16 +-- .../src/wally/wallypipelinedhart.sv | 125 ++++++++++++++---- 2 files changed, 109 insertions(+), 32 deletions(-) diff --git a/wally-pipelined/src/lsu/lsu.sv b/wally-pipelined/src/lsu/lsu.sv index 36d4715fe..36d859071 100644 --- a/wally-pipelined/src/lsu/lsu.sv +++ b/wally-pipelined/src/lsu/lsu.sv @@ -66,7 +66,7 @@ module lsu ( input logic MemAckW, // from ahb input logic [`XLEN-1:0] HRDATAW, // from ahb output logic [2:0] Funct3MfromLSU, - output logic StallWfromLSU, + output logic StallWfromLSU, // mmu management @@ -85,14 +85,14 @@ module lsu ( output logic DTLBHitM, // not connected // PMA/PMP (inside mmu) signals - input logic [31:0] HADDR, // *** replace all of these H inputs with physical adress once pma checkers have been edited to use paddr as well. - input logic [2:0] HSIZE, HBURST, - input logic HWRITE, - input var logic [63:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES/8-1:0], - input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0], // *** this one especially has a large note attached to it in pmpchecker. + input logic [31:0] HADDR, // *** replace all of these H inputs with physical adress once pma checkers have been edited to use paddr as well. + input logic [2:0] HSIZE, HBURST, + input logic HWRITE, + input var logic [63:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES/8-1:0], + input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0], // *** this one especially has a large note attached to it in pmpchecker. - output logic PMALoadAccessFaultM, PMAStoreAccessFaultM, - output logic PMPLoadAccessFaultM, PMPStoreAccessFaultM, // *** can these be parameterized? we dont need the m stage ones for the immu and vice versa. + output logic PMALoadAccessFaultM, PMAStoreAccessFaultM, + output logic PMPLoadAccessFaultM, PMPStoreAccessFaultM, // *** can these be parameterized? we dont need the m stage ones for the immu and vice versa. output logic DSquashBusAccessM // output logic [5:0] DHSELRegionsM diff --git a/wally-pipelined/src/wally/wallypipelinedhart.sv b/wally-pipelined/src/wally/wallypipelinedhart.sv index 978f747fa..47035ec6b 100644 --- a/wally-pipelined/src/wally/wallypipelinedhart.sv +++ b/wally-pipelined/src/wally/wallypipelinedhart.sv @@ -122,8 +122,8 @@ module wallypipelinedhart logic PMPInstrAccessFaultF, PMPLoadAccessFaultM, PMPStoreAccessFaultM; logic PMAInstrAccessFaultF, PMALoadAccessFaultM, PMAStoreAccessFaultM; logic DSquashBusAccessM, ISquashBusAccessF; - var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0]; - var logic [63:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES/8-1:0]; + var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0]; + var logic [63:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES/8-1:0]; // IMem stalls logic ICacheStallF; @@ -187,10 +187,38 @@ module wallypipelinedhart // mux2 #(`XLEN) OutputInput2mux(WriteDataM, FWriteDataM, FMemRWM[0], WriteDatatmpM); - pagetablewalker pagetablewalker(.HPTWRead(HPTWRead), - .*); // can send addresses to ahblite, send out pagetablestall + pagetablewalker pagetablewalker( + .clk(clk), + .reset(reset), + .SATP_REGW(SATP_REGW), // already on lsu port + .PCF(PCF), // add to lsu port + .MemAdrM(MemAdrM), // alreayd on lsu port + .ITLBMissF(ITLBMissF), // add to lsu port + .DTLBMissM(DTLBMissM), // already on lsu port convert to internal + .MemRWM(MemRWM), // already on lsu port + .PageTableEntryF(PageTableEntryF), // add to lsu port + .PageTableEntryM(PageTableEntryM), // already on lsu port convert to internal + .PageTypeF(PageTypeF), // add to lsu port connects to ifu + .PageTypeM(PageTypeM), + .ITLBWriteF(ITLBWriteF), + .DTLBWriteM(DTLBWriteM), + .MMUReadPTE(MMUReadPTE), + .MMUReady(MMUReady), + .HPTWStall(HPTWStall), + .MMUPAdr(MMUPAdr), + .MMUTranslate(MMUTranslate), + .HPTWRead(HPTWRead), + .MMUStall(MMUStall), + .WalkerInstrPageFaultF(WalkerInstrPageFaultF), + .WalkerLoadPageFaultM(WalkerLoadPageFaultM), + .WalkerStorePageFaultM(WalkerStorePageFaultM)); + + + // arbiter between IEU and pagetablewalker - lsuArb arbiter(// HPTW connection + lsuArb arbiter(.clk(clk), + .reset(reset), + // HPTW connection .HPTWTranslate(MMUTranslate), .HPTWRead(HPTWRead), .HPTWPAdr(MMUPAdr), @@ -202,8 +230,8 @@ module wallypipelinedhart .Funct3M(Funct3M), .AtomicM(AtomicM), .MemAdrM(MemAdrM), - .StallW(StallW), .WriteDataM(WriteDataM), + .StallW(StallW), .ReadDataW(ReadDataW), .CommittedM(CommittedM), .SquashSCW(SquashSCW), @@ -222,29 +250,78 @@ module wallypipelinedhart .DataMisalignedMfromLSU(DataMisalignedMfromLSU), .ReadDataWFromLSU(ReadDataWFromLSU), .HPTWReadyfromLSU(HPTWReadyfromLSU), - .DataStall(DataStall), - .*); + .DataStall(DataStall)); - lsu lsu(.MemRWM(MemRWMtoLSU), - .Funct3M(Funct3MtoLSU), - .AtomicM(AtomicMtoLSU), - .MemAdrM(MemAdrMtoLSU), - .WriteDataM(WriteDataMtoLSU), - .ReadDataW(ReadDataWFromLSU), + lsu lsu(.clk(clk), + .reset(reset), + .StallM(StallM), + .FlushM(FlushM), .StallW(StallWtoLSU), + .FlushW(FlushW), + // connected to arbiter (reconnect to CPU) + .MemRWM(MemRWMtoLSU), // change to MemRWM + .Funct3M(Funct3MtoLSU), // change to Funct3M + .AtomicM(AtomicMtoLSU), // change to AtomicMtoLSU + .CommittedM(CommittedMfromLSU), // change to CommitttedM + .SquashSCW(SquashSCWfromLSU), // change to SquashSCW + .DataMisalignedM(DataMisalignedMfromLSU), // change to DataMisalignedM + .MemAdrM(MemAdrMtoLSU), // change to MemAdrM + .WriteDataM(WriteDataMtoLSU), // change to WriteDataM + .ReadDataW(ReadDataWFromLSU), // change to ReadDataW - .CommittedM(CommittedMfromLSU), - .SquashSCW(SquashSCWfromLSU), - .DataMisalignedM(DataMisalignedMfromLSU), - .DisableTranslation(DisableTranslation), + // connected to ahb (all stay the same) + .CommitM(CommitM), + .MemPAdrM(MemPAdrM), + .MemReadM(MemReadM), + .MemWriteM(MemWriteM), + .AtomicMaskedM(AtomicMaskedM), + .MemAckW(MemAckW), + .HRDATAW(HRDATAW), + .Funct3MfromLSU(Funct3MfromLSU), // stays the same + .StallWfromLSU(StallWfromLSU), // stays the same + .DSquashBusAccessM(DSquashBusAccessM), // probalby removed after dcache implemenation? + // currently not connected (but will need to be used for lsu talking to ahb. + .HADDR(HADDR), + .HSIZE(HSIZE), + .HBURST(HBURST), + .HWRITE(HWRITE), + + // connect to csr or privilege and stay the same. + .PrivilegeModeW(PrivilegeModeW), // connects to csr + .PMPCFG_ARRAY_REGW(PMPCFG_ARRAY_REGW), // connects to csr + .PMPADDR_ARRAY_REGW(PMPADDR_ARRAY_REGW), // connects to csr + // hptw keep i/o + .SATP_REGW(SATP_REGW), // from csr + .STATUS_MXR(STATUS_MXR), // from csr + .STATUS_SUM(STATUS_SUM), // from csr + + .DTLBFlushM(DTLBFlushM), // connects to privilege + .NonBusTrapM(NonBusTrapM), // connects to privilege + .DTLBLoadPageFaultM(DTLBLoadPageFaultM), // connects to privilege + .DTLBStorePageFaultM(DTLBStorePageFaultM), // connects to privilege + .LoadMisalignedFaultM(LoadMisalignedFaultM), // connects to privilege + .LoadAccessFaultM(LoadAccessFaultM), // connects to privilege + .StoreMisalignedFaultM(StoreMisalignedFaultM), // connects to privilege + .StoreAccessFaultM(StoreAccessFaultM), // connects to privilege + .PMALoadAccessFaultM(PMALoadAccessFaultM), + .PMAStoreAccessFaultM(PMAStoreAccessFaultM), + .PMPLoadAccessFaultM(PMPLoadAccessFaultM), + .PMPStoreAccessFaultM(PMPStoreAccessFaultM), + + // connected to hptw. Move to internal. + .PageTableEntryM(PageTableEntryM), + .PageTypeM(PageTypeM), + .DTLBWriteM(DTLBWriteM), // from hptw. + .DTLBMissM(DTLBMissM), // to hptw from dmmu + .DisableTranslation(DisableTranslation), // from hptw to dmmu + .HPTWReady(HPTWReadyfromLSU), // from hptw, remove + + .DTLBHitM(DTLBHitM), // not connected remove + + .DataStall(DataStall)) // change to DCacheStall + ; - .DataStall(DataStall), - .HPTWReady(HPTWReadyfromLSU), - .Funct3MfromLSU(Funct3MfromLSU), - .StallWfromLSU(StallWfromLSU), -// .DataStall(LSUStall), - .* ); // data cache unit ahblite ebu( //.InstrReadF(1'b0), From 9b959715a0278723166e55e6a83bfe15a1f4f38d Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Sat, 3 Jul 2021 16:06:09 -0500 Subject: [PATCH 02/30] removed mmustall and finished port annotations on ptw and lsuArb. --- wally-pipelined/src/ebu/ahblite.sv | 9 ------- wally-pipelined/src/mmu/pagetablewalker.sv | 15 ----------- .../src/wally/wallypipelinedhart.sv | 26 +++++++++---------- 3 files changed, 12 insertions(+), 38 deletions(-) diff --git a/wally-pipelined/src/ebu/ahblite.sv b/wally-pipelined/src/ebu/ahblite.sv index 70f32bf70..09e5799c5 100644 --- a/wally-pipelined/src/ebu/ahblite.sv +++ b/wally-pipelined/src/ebu/ahblite.sv @@ -53,13 +53,6 @@ module ahblite ( input logic [1:0] MemSizeM, //output logic DataStall, // Signals from MMU -/* -----\/----- EXCLUDED -----\/----- - input logic MMUStall, - input logic [`XLEN-1:0] MMUPAdr, - input logic MMUTranslate, - output logic [`XLEN-1:0] MMUReadPTE, - output logic MMUReady, - -----/\----- EXCLUDED -----/\----- */ // Signals from PMA checker input logic DSquashBusAccessM, ISquashBusAccessF, // Signals to PMA checker (metadata of proposed access) @@ -158,8 +151,6 @@ module ahblite ( -----/\----- EXCLUDED -----/\----- */ - //assign #1 InstrStall = ((NextBusState == INSTRREAD) || (NextBusState == INSTRREADC) || - // MMUStall); // bus outputs assign #1 GrantData = (ProposedNextBusState == MEMREAD) || (ProposedNextBusState == MEMWRITE) || diff --git a/wally-pipelined/src/mmu/pagetablewalker.sv b/wally-pipelined/src/mmu/pagetablewalker.sv index d0d2152f6..45479d4ab 100644 --- a/wally-pipelined/src/mmu/pagetablewalker.sv +++ b/wally-pipelined/src/mmu/pagetablewalker.sv @@ -64,11 +64,6 @@ module pagetablewalker output logic HPTWRead, - - - // Stall signal - output logic MMUStall, - // Faults output logic WalkerInstrPageFaultF, output logic WalkerLoadPageFaultM, @@ -190,7 +185,6 @@ module pagetablewalker PRegEn = 1'b0; TranslationPAdr = '0; HPTWRead = 1'b0; - MMUStall = 1'b1; PageTableEntry = '0; PageType = '0; DTLBWriteM = '0; @@ -209,7 +203,6 @@ module pagetablewalker end else begin NextWalkerState = IDLE; TranslationPAdr = '0; - MMUStall = 1'b0; end end @@ -271,14 +264,12 @@ module pagetablewalker LEAF: begin NextWalkerState = IDLE; - MMUStall = 1'b0; end FAULT: begin NextWalkerState = IDLE; WalkerInstrPageFaultF = ~DTLBMissMQ; WalkerLoadPageFaultM = DTLBMissMQ && ~MemStore; WalkerStorePageFaultM = DTLBMissMQ && MemStore; - MMUStall = 1'b0; end // Default case should never happen, but is included for linter. @@ -293,8 +284,6 @@ module pagetablewalker assign VPN1 = TranslationVAdrQ[31:22]; assign VPN0 = TranslationVAdrQ[21:12]; - //assign HPTWRead = (WalkerState == IDLE && MMUTranslate) || - // WalkerState == LEVEL2 || WalkerState == LEVEL1; // Capture page table entry from data cache @@ -335,7 +324,6 @@ module pagetablewalker PRegEn = 1'b0; TranslationPAdr = '0; HPTWRead = 1'b0; - MMUStall = 1'b1; PageTableEntry = '0; PageType = '0; DTLBWriteM = '0; @@ -358,7 +346,6 @@ module pagetablewalker end else begin NextWalkerState = IDLE; TranslationPAdr = '0; - MMUStall = 1'b0; end end @@ -499,7 +486,6 @@ module pagetablewalker LEAF: begin NextWalkerState = IDLE; - MMUStall = 1'b0; end FAULT: begin @@ -507,7 +493,6 @@ module pagetablewalker WalkerInstrPageFaultF = ~DTLBMissMQ; WalkerLoadPageFaultM = DTLBMissMQ && ~MemStore; WalkerStorePageFaultM = DTLBMissMQ && MemStore; - MMUStall = 1'b0; end // Default case should never happen diff --git a/wally-pipelined/src/wally/wallypipelinedhart.sv b/wally-pipelined/src/wally/wallypipelinedhart.sv index 47035ec6b..9a678189b 100644 --- a/wally-pipelined/src/wally/wallypipelinedhart.sv +++ b/wally-pipelined/src/wally/wallypipelinedhart.sv @@ -129,7 +129,6 @@ module wallypipelinedhart logic ICacheStallF; logic DCacheStall; logic [`XLEN-1:0] MMUPAdr, MMUReadPTE; - logic MMUStall; logic MMUTranslate, MMUReady; logic HPTWRead; logic HPTWReadyfromLSU; @@ -199,19 +198,18 @@ module wallypipelinedhart .PageTableEntryF(PageTableEntryF), // add to lsu port .PageTableEntryM(PageTableEntryM), // already on lsu port convert to internal .PageTypeF(PageTypeF), // add to lsu port connects to ifu - .PageTypeM(PageTypeM), - .ITLBWriteF(ITLBWriteF), - .DTLBWriteM(DTLBWriteM), - .MMUReadPTE(MMUReadPTE), - .MMUReady(MMUReady), - .HPTWStall(HPTWStall), - .MMUPAdr(MMUPAdr), - .MMUTranslate(MMUTranslate), - .HPTWRead(HPTWRead), - .MMUStall(MMUStall), - .WalkerInstrPageFaultF(WalkerInstrPageFaultF), - .WalkerLoadPageFaultM(WalkerLoadPageFaultM), - .WalkerStorePageFaultM(WalkerStorePageFaultM)); + .PageTypeM(PageTypeM), // already on lsu port convert to internal + .ITLBWriteF(ITLBWriteF), // add to lsu port connects to ifu + .DTLBWriteM(DTLBWriteM), // already on lsu port convert to internal + .MMUReadPTE(MMUReadPTE), // from lsu arb convert to internal + .MMUReady(MMUReady), // to lsu arb, convert to internal + .HPTWStall(HPTWStall), // from lsu arb convert to internal + .MMUPAdr(MMUPAdr), // to lsu arb, convert to internal + .MMUTranslate(MMUTranslate), // to lsu arb, convert to internal + .HPTWRead(HPTWRead), // to lsu arb, convert to internal + .WalkerInstrPageFaultF(WalkerInstrPageFaultF), // add to lsu port + .WalkerLoadPageFaultM(WalkerLoadPageFaultM), // add to lsu port (to privilege) + .WalkerStorePageFaultM(WalkerStorePageFaultM)); // add to lsu port (to privilege) From 17ef10568f7b5afa1b31cd05106763dd7efd941d Mon Sep 17 00:00:00 2001 From: bbracker Date: Sat, 3 Jul 2021 17:25:19 -0400 Subject: [PATCH 03/30] optionally output GDB-formatted instruction list to main buildroot folder --- wally-pipelined/linux-testgen/logAllBuildroot.sh | 7 ++++--- wally-pipelined/linux-testgen/parse_qemu.py | 6 +++++- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/wally-pipelined/linux-testgen/logAllBuildroot.sh b/wally-pipelined/linux-testgen/logAllBuildroot.sh index 740fa8c4b..86bba6af4 100755 --- a/wally-pipelined/linux-testgen/logAllBuildroot.sh +++ b/wally-pipelined/linux-testgen/logAllBuildroot.sh @@ -21,11 +21,12 @@ # - Logs parse_qemu.py's simulated gdb output to qemu_in_gdb_format.txt #cat qemu_output.txt | ./parse_qemu.py >qemu_in_gdb_format.txt #cat qemu_output.txt | ./parse_qemu.py | ./parse_gdb_output.py "/courses/e190ax/buildroot_boot/" + # Uncomment this version in case you just want to have qemu_in_gdb_format.txt around # It is often helpful for general debugging -#(qemu-system-riscv64 -M virt -nographic -bios /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/fw_jump.elf -kernel /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/Image -append "root=/dev/vda ro" -initrd /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/rootfs.cpio -d nochain,cpu,in_asm -serial /dev/null -singlestep -s -S 2>&1 >/dev/null | ./parse_qemu.py >qemu_in_gdb_format.txt) & riscv64-unknown-elf-gdb -x gdbinit_qemulog - +(qemu-system-riscv64 -M virt -nographic -bios /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/fw_jump.elf -kernel /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/Image -append "root=/dev/vda ro" -initrd /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/rootfs.cpio -d nochain,cpu,in_asm -serial /dev/null -singlestep -s -S 2>&1 >/dev/null | ./parse_qemu.py >/courses/e190ax/buildroot_boot/qemu_in_gdb_format.txt) & riscv64-unknown-elf-gdb -x gdbinit_qemulog # Split qemu_in_gdb_format.txt into chunks of 100,000 instructions for easier inspection +#cd /courses/e190ax/buildroot_boot #split -d -l 5600000 qemu_in_gdb_format.txt --verbose # Uncomment this version for parse_gdb_output.py debugging @@ -36,4 +37,4 @@ # =========== Just Do the Thing ========== # Uncomment this version for the whole thing # - Logs info needed by buildroot testbench -(qemu-system-riscv64 -M virt -nographic -bios /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/fw_jump.elf -kernel /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/Image -append "root=/dev/vda ro" -initrd /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/rootfs.cpio -d nochain,cpu,in_asm -serial /dev/null -singlestep -s -S 2>&1 >/dev/null | ./parse_qemu.py | ./parse_gdb_output.py "/courses/e190ax/buildroot_boot_new/") & riscv64-unknown-elf-gdb -x gdbinit_qemulog +#(qemu-system-riscv64 -M virt -nographic -bios /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/fw_jump.elf -kernel /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/Image -append "root=/dev/vda ro" -initrd /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/rootfs.cpio -d nochain,cpu,in_asm -serial /dev/null -singlestep -s -S 2>&1 >/dev/null | ./parse_qemu.py | ./parse_gdb_output.py "/courses/e190ax/buildroot_boot_new/") & riscv64-unknown-elf-gdb -x gdbinit_qemulog diff --git a/wally-pipelined/linux-testgen/parse_qemu.py b/wally-pipelined/linux-testgen/parse_qemu.py index ac5d95f0b..4da168aba 100755 --- a/wally-pipelined/linux-testgen/parse_qemu.py +++ b/wally-pipelined/linux-testgen/parse_qemu.py @@ -9,9 +9,10 @@ pageFaultCSRs = {} regs = {} pageFaultRegs = {} instrs = {} +instrCount = 0 def printPC(l): - global parseState, inPageFault, CSRs, pageFaultCSRs, regs, pageFaultCSRs, instrs + global parseState, inPageFault, CSRs, pageFaultCSRs, regs, pageFaultCSRs, instrs, instrCount if not inPageFault: inst = l.split() if len(inst) > 3: @@ -19,6 +20,9 @@ def printPC(l): else: print(f'=> {inst[1]}:\t{inst[2]}') print(f'{inst[0]} 0x{inst[1]}') + instrCount += 1 + if ((instrCount % 100000) == 0): + sys.stderr.write("QEMU parser reached "+str(instrCount)+" instrs\n") def printCSRs(): global parseState, inPageFault, CSRs, pageFaultCSRs, regs, pageFaultCSRs, instrs From c897bef8cd21cb6a0a09fcf3e80c3b61f7324c09 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 4 Jul 2021 01:19:38 -0400 Subject: [PATCH 04/30] Moved BOOTTIM to 0x1000-0x1FFF. Added logic to detect an access to undefined memory and assert HREADY so bus doesn't hang. --- .../config/buildroot/wally-config.vh | 8 +++--- .../config/busybear/wally-config.vh | 8 +++--- .../config/coremark-64i/wally-config.vh | 27 +++++++++---------- .../config/coremark/wally-config.vh | 26 +++++++++--------- .../config/coremark_bare/wally-config.vh | 26 +++++++++--------- wally-pipelined/config/rv32ic/wally-config.vh | 6 ++--- wally-pipelined/config/rv64BP/wally-config.vh | 26 +++++++++--------- wally-pipelined/config/rv64ic/wally-config.vh | 8 +++--- .../config/rv64icfd/wally-config.vh | 26 +++++++++--------- .../config/rv64imc/wally-config.vh | 26 +++++++++--------- .../regression/wave-dos/linux-waves.do | 3 +-- wally-pipelined/src/generic/flop.sv | 2 +- wally-pipelined/src/mmu/adrdecs.sv | 6 ++++- wally-pipelined/src/mmu/pmachecker.sv | 2 +- wally-pipelined/src/mmu/pmpchecker.sv | 2 +- wally-pipelined/src/uncore/uncore.sv | 20 +++++--------- .../testbench/testbench-imperas.sv | 12 +++++++++ wally-pipelined/testbench/testbench-linux.sv | 12 +++++++++ 18 files changed, 125 insertions(+), 121 deletions(-) diff --git a/wally-pipelined/config/buildroot/wally-config.vh b/wally-pipelined/config/buildroot/wally-config.vh index e1e4f300d..ace7366dc 100644 --- a/wally-pipelined/config/buildroot/wally-config.vh +++ b/wally-pipelined/config/buildroot/wally-config.vh @@ -62,10 +62,10 @@ // Peripheral memory space extends from BASE to BASE+RANGE // Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits `define BOOTTIM_SUPPORTED 1'b1 -`define BOOTTIM_BASE 56'h00000000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder -`define BOOTTIM_RANGE 56'h00003FFF -//`define BOOTTIM_BASE 56'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder -//`define BOOTTIM_RANGE 56'h00000FFF +//`define BOOTTIM_BASE 56'h00000000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder +//`define BOOTTIM_RANGE 56'h00003FFF +`define BOOTTIM_BASE 56'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder +`define BOOTTIM_RANGE 56'h00000FFF `define TIM_SUPPORTED 1'b1 `define TIM_BASE 56'h80000000 `define TIM_RANGE 56'h07FFFFFF diff --git a/wally-pipelined/config/busybear/wally-config.vh b/wally-pipelined/config/busybear/wally-config.vh index 58c1c8a0a..447294496 100644 --- a/wally-pipelined/config/busybear/wally-config.vh +++ b/wally-pipelined/config/busybear/wally-config.vh @@ -64,10 +64,10 @@ // Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits `define BOOTTIM_SUPPORTED 1'b1 -`define BOOTTIM_BASE 56'h00000000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder -`define BOOTTIM_RANGE 56'h00003FFF -//`define BOOTTIM_BASE 56'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder -//`define BOOTTIM_RANGE 56'h00000FFF +//`define BOOTTIM_BASE 56'h00000000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder +//`define BOOTTIM_RANGE 56'h00003FFF +`define BOOTTIM_BASE 56'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder +`define BOOTTIM_RANGE 56'h00000FFF `define TIM_SUPPORTED 1'b1 `define TIM_BASE 56'h80000000 `define TIM_RANGE 56'h07FFFFFF diff --git a/wally-pipelined/config/coremark-64i/wally-config.vh b/wally-pipelined/config/coremark-64i/wally-config.vh index f72b4f616..17d483441 100644 --- a/wally-pipelined/config/coremark-64i/wally-config.vh +++ b/wally-pipelined/config/coremark-64i/wally-config.vh @@ -55,26 +55,23 @@ // Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits `define BOOTTIM_SUPPORTED 1'b1 -`define BOOTTIM_BASE 32'h00000000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder -`define BOOTTIM_RANGE 32'h00003FFF -//`define BOOTTIM_BASE 32'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder -//`define BOOTTIM_RANGE 32'h00000FFF +`define BOOTTIM_BASE 56'h00001000 +`define BOOTTIM_RANGE 56'h00000FFF `define TIM_SUPPORTED 1'b1 -`define TIM_BASE 32'h80000000 -`define TIM_RANGE 32'h07FFFFFF +`define TIM_BASE 56'h80000000 +`define TIM_RANGE 56'h07FFFFFF `define CLINT_SUPPORTED 1'b1 -`define CLINT_BASE 32'h02000000 -`define CLINT_RANGE 32'h0000FFFF +`define CLINT_BASE 56'h02000000 +`define CLINT_RANGE 56'h0000FFFF `define GPIO_SUPPORTED 1'b1 -`define GPIO_BASE 32'h10012000 -`define GPIO_RANGE 32'h000000FF +`define GPIO_BASE 56'h10012000 +`define GPIO_RANGE 56'h000000FF `define UART_SUPPORTED 1'b1 -`define UART_BASE 32'h10000000 -`define UART_RANGE 32'h00000007 +`define UART_BASE 56'h10000000 +`define UART_RANGE 56'h00000007 `define PLIC_SUPPORTED 1'b1 -`define PLIC_BASE 32'h0C000000 -`define PLIC_RANGE 32'h03FFFFFF - +`define PLIC_BASE 56'h0C000000 +`define PLIC_RANGE 56'h03FFFFFF // Test modes // Tie GPIO outputs back to inputs diff --git a/wally-pipelined/config/coremark/wally-config.vh b/wally-pipelined/config/coremark/wally-config.vh index ccf0a64b8..7bfdc8821 100644 --- a/wally-pipelined/config/coremark/wally-config.vh +++ b/wally-pipelined/config/coremark/wally-config.vh @@ -63,25 +63,23 @@ // Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits `define BOOTTIM_SUPPORTED 1'b1 -`define BOOTTIM_BASE 32'h00000000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder -`define BOOTTIM_RANGE 32'h00003FFF -//`define BOOTTIM_BASE 32'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder -//`define BOOTTIM_RANGE 32'h00000FFF +`define BOOTTIM_BASE 34'h00001000 +`define BOOTTIM_RANGE 34'h00000FFF `define TIM_SUPPORTED 1'b1 -`define TIM_BASE 32'h80000000 -`define TIM_RANGE 32'h07FFFFFF +`define TIM_BASE 34'h80000000 +`define TIM_RANGE 34'h07FFFFFF `define CLINT_SUPPORTED 1'b1 -`define CLINT_BASE 32'h02000000 -`define CLINT_RANGE 32'h0000FFFF +`define CLINT_BASE 34'h02000000 +`define CLINT_RANGE 34'h0000FFFF `define GPIO_SUPPORTED 1'b1 -`define GPIO_BASE 32'h10012000 -`define GPIO_RANGE 32'h000000FF +`define GPIO_BASE 34'h10012000 +`define GPIO_RANGE 34'h000000FF `define UART_SUPPORTED 1'b1 -`define UART_BASE 32'h10000000 -`define UART_RANGE 32'h00000007 +`define UART_BASE 34'h10000000 +`define UART_RANGE 34'h00000007 `define PLIC_SUPPORTED 1'b1 -`define PLIC_BASE 32'h0C000000 -`define PLIC_RANGE 32'h03FFFFFF +`define PLIC_BASE 34'h0C000000 +`define PLIC_RANGE 34'h03FFFFFF // Test modes diff --git a/wally-pipelined/config/coremark_bare/wally-config.vh b/wally-pipelined/config/coremark_bare/wally-config.vh index d55200b48..fb4bee347 100644 --- a/wally-pipelined/config/coremark_bare/wally-config.vh +++ b/wally-pipelined/config/coremark_bare/wally-config.vh @@ -66,25 +66,23 @@ // Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits `define BOOTTIM_SUPPORTED 1'b1 -`define BOOTTIM_BASE 32'h00000000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder -`define BOOTTIM_RANGE 32'h00003FFF -//`define BOOTTIM_BASE 32'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder -//`define BOOTTIM_RANGE 32'h00000FFF +`define BOOTTIM_BASE 34'h00001000 +`define BOOTTIM_RANGE 34'h00000FFF `define TIM_SUPPORTED 1'b1 -`define TIM_BASE 32'h80000000 -`define TIM_RANGE 32'h07FFFFFF +`define TIM_BASE 34'h80000000 +`define TIM_RANGE 34'h07FFFFFF `define CLINT_SUPPORTED 1'b1 -`define CLINT_BASE 32'h02000000 -`define CLINT_RANGE 32'h0000FFFF +`define CLINT_BASE 34'h02000000 +`define CLINT_RANGE 34'h0000FFFF `define GPIO_SUPPORTED 1'b1 -`define GPIO_BASE 32'h10012000 -`define GPIO_RANGE 32'h000000FF +`define GPIO_BASE 34'h10012000 +`define GPIO_RANGE 34'h000000FF `define UART_SUPPORTED 1'b1 -`define UART_BASE 32'h10000000 -`define UART_RANGE 32'h00000007 +`define UART_BASE 34'h10000000 +`define UART_RANGE 34'h00000007 `define PLIC_SUPPORTED 1'b1 -`define PLIC_BASE 32'h0C000000 -`define PLIC_RANGE 32'h03FFFFFF +`define PLIC_BASE 34'h0C000000 +`define PLIC_RANGE 34'h03FFFFFF // Test modes diff --git a/wally-pipelined/config/rv32ic/wally-config.vh b/wally-pipelined/config/rv32ic/wally-config.vh index 29cd973ae..1933a2e7a 100644 --- a/wally-pipelined/config/rv32ic/wally-config.vh +++ b/wally-pipelined/config/rv32ic/wally-config.vh @@ -63,10 +63,8 @@ // *** each of these is `PA_BITS wide. is this paramaterizable INSIDE the config file? `define BOOTTIM_SUPPORTED 1'b1 -`define BOOTTIM_BASE 34'h00000000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder -`define BOOTTIM_RANGE 34'h00003FFF -//`define BOOTTIM_BASE 34'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder -//`define BOOTTIM_RANGE 34'h00000FFF +`define BOOTTIM_BASE 34'h00001000 +`define BOOTTIM_RANGE 34'h00000FFF `define TIM_SUPPORTED 1'b1 `define TIM_BASE 34'h80000000 `define TIM_RANGE 34'h07FFFFFF diff --git a/wally-pipelined/config/rv64BP/wally-config.vh b/wally-pipelined/config/rv64BP/wally-config.vh index 2e5eaf378..335f2d87a 100644 --- a/wally-pipelined/config/rv64BP/wally-config.vh +++ b/wally-pipelined/config/rv64BP/wally-config.vh @@ -64,25 +64,23 @@ // Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits `define BOOTTIM_SUPPORTED 1'b1 -`define BOOTTIM_BASE 32'h00000000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder -`define BOOTTIM_RANGE 32'h00003FFF -//`define BOOTTIM_BASE 32'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder -//`define BOOTTIM_RANGE 32'h00000FFF +`define BOOTTIM_BASE 56'h00001000 +`define BOOTTIM_RANGE 56'h00000FFF `define TIM_SUPPORTED 1'b1 -`define TIM_BASE 32'h80000000 -`define TIM_RANGE 32'h07FFFFFF +`define TIM_BASE 56'h80000000 +`define TIM_RANGE 56'h07FFFFFF `define CLINT_SUPPORTED 1'b1 -`define CLINT_BASE 32'h02000000 -`define CLINT_RANGE 32'h0000FFFF +`define CLINT_BASE 56'h02000000 +`define CLINT_RANGE 56'h0000FFFF `define GPIO_SUPPORTED 1'b1 -`define GPIO_BASE 32'h10012000 -`define GPIO_RANGE 32'h000000FF +`define GPIO_BASE 56'h10012000 +`define GPIO_RANGE 56'h000000FF `define UART_SUPPORTED 1'b1 -`define UART_BASE 32'h10000000 -`define UART_RANGE 32'h00000007 +`define UART_BASE 56'h10000000 +`define UART_RANGE 56'h00000007 `define PLIC_SUPPORTED 1'b1 -`define PLIC_BASE 32'h0C000000 -`define PLIC_RANGE 32'h03FFFFFF +`define PLIC_BASE 56'h0C000000 +`define PLIC_RANGE 56'h03FFFFFF // Test modes diff --git a/wally-pipelined/config/rv64ic/wally-config.vh b/wally-pipelined/config/rv64ic/wally-config.vh index a15ef18b7..ad97d446e 100644 --- a/wally-pipelined/config/rv64ic/wally-config.vh +++ b/wally-pipelined/config/rv64ic/wally-config.vh @@ -67,10 +67,10 @@ // *** each of these is `PA_BITS wide. is this paramaterizable INSIDE the config file? `define BOOTTIM_SUPPORTED 1'b1 -`define BOOTTIM_RANGE 56'h00003FFF -`define BOOTTIM_BASE 56'h00000000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder -//`define BOOTTIM_BASE 56'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder -//`define BOOTTIM_RANGE 56'h00000FFF +//`define BOOTTIM_RANGE 56'h00003FFF +//`define BOOTTIM_BASE 56'h00000000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder +`define BOOTTIM_BASE 56'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder +`define BOOTTIM_RANGE 56'h00000FFF `define TIM_SUPPORTED 1'b1 `define TIM_BASE 56'h80000000 `define TIM_RANGE 56'h07FFFFFF diff --git a/wally-pipelined/config/rv64icfd/wally-config.vh b/wally-pipelined/config/rv64icfd/wally-config.vh index 293222104..e645f014b 100644 --- a/wally-pipelined/config/rv64icfd/wally-config.vh +++ b/wally-pipelined/config/rv64icfd/wally-config.vh @@ -66,25 +66,23 @@ // Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits `define BOOTTIM_SUPPORTED 1'b1 -`define BOOTTIM_BASE 32'h00000000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder -`define BOOTTIM_RANGE 32'h00003FFF -//`define BOOTTIM_BASE 32'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder -//`define BOOTTIM_RANGE 32'h00000FFF +`define BOOTTIM_BASE 56'h00001000 +`define BOOTTIM_RANGE 56'h00000FFF `define TIM_SUPPORTED 1'b1 -`define TIM_BASE 32'h80000000 -`define TIM_RANGE 32'h07FFFFFF +`define TIM_BASE 56'h80000000 +`define TIM_RANGE 56'h07FFFFFF `define CLINT_SUPPORTED 1'b1 -`define CLINT_BASE 32'h02000000 -`define CLINT_RANGE 32'h0000FFFF +`define CLINT_BASE 56'h02000000 +`define CLINT_RANGE 56'h0000FFFF `define GPIO_SUPPORTED 1'b1 -`define GPIO_BASE 32'h10012000 -`define GPIO_RANGE 32'h000000FF +`define GPIO_BASE 56'h10012000 +`define GPIO_RANGE 56'h000000FF `define UART_SUPPORTED 1'b1 -`define UART_BASE 32'h10000000 -`define UART_RANGE 32'h00000007 +`define UART_BASE 56'h10000000 +`define UART_RANGE 56'h00000007 `define PLIC_SUPPORTED 1'b1 -`define PLIC_BASE 32'h0C000000 -`define PLIC_RANGE 32'h03FFFFFF +`define PLIC_BASE 56'h0C000000 +`define PLIC_RANGE 56'h03FFFFFF // Test modes diff --git a/wally-pipelined/config/rv64imc/wally-config.vh b/wally-pipelined/config/rv64imc/wally-config.vh index 5ecb9bef5..885a519d7 100644 --- a/wally-pipelined/config/rv64imc/wally-config.vh +++ b/wally-pipelined/config/rv64imc/wally-config.vh @@ -62,25 +62,23 @@ // Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits `define BOOTTIM_SUPPORTED 1'b1 -`define BOOTTIM_BASE 32'h00000000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder -`define BOOTTIM_RANGE 32'h00003FFF -//`define BOOTTIM_BASE 32'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder -//`define BOOTTIM_RANGE 32'h00000FFF +`define BOOTTIM_BASE 56'h00001000 +`define BOOTTIM_RANGE 56'h00000FFF `define TIM_SUPPORTED 1'b1 -`define TIM_BASE 32'h80000000 -`define TIM_RANGE 32'h07FFFFFF +`define TIM_BASE 56'h80000000 +`define TIM_RANGE 56'h07FFFFFF `define CLINT_SUPPORTED 1'b1 -`define CLINT_BASE 32'h02000000 -`define CLINT_RANGE 32'h0000FFFF +`define CLINT_BASE 56'h02000000 +`define CLINT_RANGE 56'h0000FFFF `define GPIO_SUPPORTED 1'b1 -`define GPIO_BASE 32'h10012000 -`define GPIO_RANGE 32'h000000FF +`define GPIO_BASE 56'h10012000 +`define GPIO_RANGE 56'h000000FF `define UART_SUPPORTED 1'b1 -`define UART_BASE 32'h10000000 -`define UART_RANGE 32'h00000007 +`define UART_BASE 56'h10000000 +`define UART_RANGE 56'h00000007 `define PLIC_SUPPORTED 1'b1 -`define PLIC_BASE 32'h0C000000 -`define PLIC_RANGE 32'h03FFFFFF +`define PLIC_BASE 56'h0C000000 +`define PLIC_RANGE 56'h03FFFFFF // Test modes diff --git a/wally-pipelined/regression/wave-dos/linux-waves.do b/wally-pipelined/regression/wave-dos/linux-waves.do index 63623891c..843ed2745 100644 --- a/wally-pipelined/regression/wave-dos/linux-waves.do +++ b/wally-pipelined/regression/wave-dos/linux-waves.do @@ -122,8 +122,7 @@ add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csrn/UEPC_REGW add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csrn/UTVEC_REGW add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csrn/UIP_REGW add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csrn/UIE_REGW -add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csrm/PMPCFG01_REGW -add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csrm/PMPCFG23_REGW +add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csrm/PMPCFG_ARRAY_REGW add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csrm/PMPADDR_ARRAY_REGW add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csrm/MISA_REGW add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csru/FRM_REGW diff --git a/wally-pipelined/src/generic/flop.sv b/wally-pipelined/src/generic/flop.sv index a5636c6f4..cb583de2e 100644 --- a/wally-pipelined/src/generic/flop.sv +++ b/wally-pipelined/src/generic/flop.sv @@ -77,7 +77,7 @@ module flopenr #(parameter WIDTH = 8) ( output logic [WIDTH-1:0] q); always_ff @(posedge clk, posedge reset) - if (reset) q <= #1 0; + if (reset) q <= #1 0; else if (en) q <= #1 d; endmodule diff --git a/wally-pipelined/src/mmu/adrdecs.sv b/wally-pipelined/src/mmu/adrdecs.sv index 8585a4ee7..94951aad7 100644 --- a/wally-pipelined/src/mmu/adrdecs.sv +++ b/wally-pipelined/src/mmu/adrdecs.sv @@ -24,12 +24,13 @@ /////////////////////////////////////////// `include "wally-config.vh" + // verilator lint_off UNOPTFLAT module adrdecs ( input logic [`PA_BITS-1:0] PhysicalAddress, input logic AccessRW, AccessRX, AccessRWX, input logic [1:0] Size, - output logic [5:0] SelRegions + output logic [6:0] SelRegions ); // Determine which region of physical memory (if any) is being accessed @@ -41,5 +42,8 @@ module adrdecs ( adrdec uartdec(PhysicalAddress, `UART_BASE, `UART_RANGE, `UART_SUPPORTED, AccessRW, Size, 4'b0001, SelRegions[1]); adrdec plicdec(PhysicalAddress, `PLIC_BASE, `PLIC_RANGE, `PLIC_SUPPORTED, AccessRW, Size, 4'b0100, SelRegions[0]); + assign SelRegions[6] = ~|(SelRegions[5:0]); + endmodule + // verilator lint_on UNOPTFLAT diff --git a/wally-pipelined/src/mmu/pmachecker.sv b/wally-pipelined/src/mmu/pmachecker.sv index 26d8ac875..86abcb3f6 100644 --- a/wally-pipelined/src/mmu/pmachecker.sv +++ b/wally-pipelined/src/mmu/pmachecker.sv @@ -45,7 +45,7 @@ module pmachecker ( logic PMAAccessFault; logic AccessRW, AccessRWX, AccessRX; - logic [5:0] SelRegions; + logic [6:0] SelRegions; // Determine what type of access is being made assign AccessRW = ReadAccessM | WriteAccessM; diff --git a/wally-pipelined/src/mmu/pmpchecker.sv b/wally-pipelined/src/mmu/pmpchecker.sv index 27c7e508c..960182317 100644 --- a/wally-pipelined/src/mmu/pmpchecker.sv +++ b/wally-pipelined/src/mmu/pmpchecker.sv @@ -72,7 +72,7 @@ module pmpchecker ( assign {PMPCFG[j+7], PMPCFG[j+6], PMPCFG[j+5], PMPCFG[j+4], PMPCFG[j+3], PMPCFG[j+2], PMPCFG[j+1], PMPCFG[j]} = PMPCFG_ARRAY_REGW[j/8]; // verilator lint_on WIDTH - for (i=0; i<`PMP_ENTRIES; i++) + for (i=0; i<`PMP_ENTRIES; i++) // *** can this be done with array notation? pmpadrdec pmpadrdec(.PhysicalAddress, .PMPCfg(PMPCFG[i]), .PMPAdr(PMPADDR_ARRAY_REGW[i]), diff --git a/wally-pipelined/src/uncore/uncore.sv b/wally-pipelined/src/uncore/uncore.sv index 79f7a0e86..e5f5fdd7d 100644 --- a/wally-pipelined/src/uncore/uncore.sv +++ b/wally-pipelined/src/uncore/uncore.sv @@ -62,13 +62,14 @@ module uncore ( logic [`XLEN-1:0] HWDATA; logic [`XLEN-1:0] HREADTim, HREADCLINT, HREADPLIC, HREADGPIO, HREADUART; - logic [5:0] HSELRegions; + logic [6:0] HSELRegions; logic HSELTim, HSELCLINT, HSELPLIC, HSELGPIO, PreHSELUART, HSELUART; logic HSELTimD, HSELCLINTD, HSELPLICD, HSELGPIOD, HSELUARTD; logic HRESPTim, HRESPCLINT, HRESPPLIC, HRESPGPIO, HRESPUART; logic HREADYTim, HREADYCLINT, HREADYPLIC, HREADYGPIO, HREADYUART; logic [`XLEN-1:0] HREADBootTim; logic HSELBootTim, HSELBootTimD, HRESPBootTim, HREADYBootTim; + logic HSELNoneD; logic [1:0] MemRWboottim; logic UARTIntr,GPIOIntr; @@ -78,7 +79,7 @@ module uncore ( adrdecs adrdecs({{(`PA_BITS-32){1'b0}}, HADDR}, 1'b1, 1'b1, 1'b1, HSIZE[1:0], HSELRegions); // unswizzle HSEL signals - assign {HSELBootTim, HSELTim, HSELCLINT, HSELGPIO, HSELUART, HSELPLIC} = HSELRegions; + assign {HSELBootTim, HSELTim, HSELCLINT, HSELGPIO, HSELUART, HSELPLIC} = HSELRegions[5:0]; // subword accesses: converts HWDATAIN to HWDATA subwordwrite sww(.*); @@ -134,19 +135,10 @@ module uncore ( HSELPLICD & HREADYPLIC | HSELGPIOD & HREADYGPIO | HSELBootTimD & HREADYBootTim | - HSELUARTD & HREADYUART; - - /* PMA checker now handles access faults. *** This can be deleted - // Faults - assign DataAccessFaultM = ~(HSELTimD | HSELCLINTD | HSELPLICD | HSELGPIOD | HSELBootTimD | HSELUARTD); - */ + HSELUARTD & HREADYUART | + HSELNoneD; // don't lock up the bus if no region is being accessed // Address Decoder Delay (figure 4-2 in spec) - flopr #(1) hseltimreg(HCLK, ~HRESETn, HSELTim, HSELTimD); - flopr #(1) hselclintreg(HCLK, ~HRESETn, HSELCLINT, HSELCLINTD); - flopr #(1) hselplicreg(HCLK, ~HRESETn, HSELPLIC, HSELPLICD); - flopr #(1) hselgpioreg(HCLK, ~HRESETn, HSELGPIO, HSELGPIOD); - flopr #(1) hseluartreg(HCLK, ~HRESETn, HSELUART, HSELUARTD); - flopr #(1) hselboottimreg(HCLK, ~HRESETn, HSELBootTim, HSELBootTimD); + flopr #(7) hseldelayreg(HCLK, ~HRESETn, HSELRegions, {HSELNoneD, HSELBootTimD, HSELTimD, HSELCLINTD, HSELGPIOD, HSELUARTD, HSELPLICD}); endmodule diff --git a/wally-pipelined/testbench/testbench-imperas.sv b/wally-pipelined/testbench/testbench-imperas.sv index 11b8e5620..8c3e28c3a 100644 --- a/wally-pipelined/testbench/testbench-imperas.sv +++ b/wally-pipelined/testbench/testbench-imperas.sv @@ -520,6 +520,7 @@ string tests32f[] = '{ // check assertions for a legal configuration riscvassertions riscvassertions(); + logging logging(clk, reset, dut.uncore.HADDR, dut.uncore.HTRANS); // pick tests based on modes supported initial begin @@ -722,6 +723,7 @@ module riscvassertions(); // Legal number of PMP entries are 0, 16, or 64 initial begin assert (`PMP_ENTRIES == 0 || `PMP_ENTRIES==16 || `PMP_ENTRIES==64) else $error("Illegal number of PMP entries"); + assert (`F_SUPPORTED || ~`D_SUPPORTED) else $error("Can't support double without supporting float"); end endmodule @@ -949,3 +951,13 @@ module instrNameDecTB( default: name = "ILLEGAL"; endcase endmodule + +module logging( + input logic clk, reset, + input logic [31:0] HADDR, + input logic [1:0] HTRANS); + + always @(posedge clk) + if (HTRANS != 2'b00 && HADDR == 0) + $display("Warning: access to memory address 0\n"); +endmodule diff --git a/wally-pipelined/testbench/testbench-linux.sv b/wally-pipelined/testbench/testbench-linux.sv index 8f8a5d442..18ef74ffd 100644 --- a/wally-pipelined/testbench/testbench-linux.sv +++ b/wally-pipelined/testbench/testbench-linux.sv @@ -334,6 +334,8 @@ module testbench(); `SCAN_PC(data_file_PCM, scan_file_PCM, trashString, trashString, InstrMExpected, PCMexpected); end + logging logging(clk, reset, dut.uncore.HADDR, dut.uncore.HTRANS); + // ------------------- // Additional Hardware // ------------------- @@ -718,6 +720,16 @@ module testbench(); endfunction endmodule +module logging( + input logic clk, reset, + input logic [31:0] HADDR, + input logic [1:0] HTRANS); + + always @(posedge clk) + if (HTRANS != 2'b00 && HADDR == 0) + $display("Warning: access to memory address 0\n"); +endmodule + module instrTrackerTB( input logic clk, reset, From c016ab8e582387f5f7b24a3e543731489f83f911 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 4 Jul 2021 01:40:27 -0400 Subject: [PATCH 05/30] Commented out some unused modules --- wally-pipelined/src/fpu/mult_R4_64_64_cs.sv | 20 ++++-- wally-pipelined/src/fpu/shifter_denorm.sv | 2 + wally-pipelined/src/muldiv/div.sv | 3 +- wally-pipelined/src/uncore/imem.sv | 71 --------------------- 4 files changed, 17 insertions(+), 79 deletions(-) delete mode 100644 wally-pipelined/src/uncore/imem.sv diff --git a/wally-pipelined/src/fpu/mult_R4_64_64_cs.sv b/wally-pipelined/src/fpu/mult_R4_64_64_cs.sv index 7b4d3f64e..eca5fadf6 100644 --- a/wally-pipelined/src/fpu/mult_R4_64_64_cs.sv +++ b/wally-pipelined/src/fpu/mult_R4_64_64_cs.sv @@ -2,6 +2,7 @@ // It is unsigned and uses Radix-4 Booth encoding. // This file was automatically generated by tdm.pl. +/* module mult64 (x, y, P); input [63:0] x; @@ -18,7 +19,8 @@ module mult64 (x, y, P); //assign P = Pt[127:0]; ldf128 cpa (cout, P, Sum, Carry, 1'b0); -endmodule // mult64 +endmodule // mult64 +*/ module multiplier( y, x, Sum, Carry ); @@ -11612,7 +11614,7 @@ module r4be(x0,x1,x2,sing,doub,neg); endmodule // r4be - +/* // Use maj and two xor2's, with cin being late module fullAdd_xc(cout, s, a, b, cin); @@ -11629,7 +11631,7 @@ module fullAdd_xc(cout, s, a, b, cin); maj MAJ_0_112(cout,a,b,cin); endmodule // fullAdd_xc - +*/ module maj(y, a, b, c); @@ -11645,6 +11647,7 @@ module maj(y, a, b, c); endmodule // maj +/* // 4:2 Weinberger compressor module fourtwo_x(t, S, C, X, Y, Z, W, t_1); @@ -11664,6 +11667,7 @@ module fourtwo_x(t, S, C, X, Y, Z, W, t_1); fullAdd_xc secondCSA_0_160(C,S,W,t_1,intermediate); endmodule // fourtwo_x +*/ module inverter(egress, in); @@ -11767,6 +11771,7 @@ module fullAdd_x(cout,sum,a,b,c); endmodule // fullAdd_x +/* module nand2(egress,in1,in2); output egress; @@ -11800,7 +11805,7 @@ module and3(y,a,b,c); assign y = a&b&c; endmodule // and3 - +*/ module and2(y,a,b); output y; @@ -11810,7 +11815,7 @@ module and2(y,a,b); assign y = a&b; endmodule // and2 - +/* module nor2(egress,in1,in2); output egress; @@ -11902,6 +11907,7 @@ module oai(egress,in1,in2,in3); assign egress = ~(in3 & (in1|in2)); endmodule // oai +*/ module aoi(egress,in1,in2,in3); @@ -11949,7 +11955,7 @@ module fullAdd_i(cout_b,sum_b,a,b,c); sum_b sum_0_32(sum_b,a,b,c,cout_b); endmodule // fullAdd_i - +/* module fullAdd(cout,s,a,b,c); output cout; @@ -11979,7 +11985,7 @@ module blackCell(g_i_j, p_i_j, g_i_k, p_i_k, g_kneg1_j, p_kneg1_j); and2 and_0_48(p_i_j, p_i_k, p_kneg1_j); endmodule // blackCell - +*/ module grayCell(g_i_j, g_i_k, p_i_k, g_kneg1_j); output g_i_j; diff --git a/wally-pipelined/src/fpu/shifter_denorm.sv b/wally-pipelined/src/fpu/shifter_denorm.sv index ed2083816..e56b00729 100755 --- a/wally-pipelined/src/fpu/shifter_denorm.sv +++ b/wally-pipelined/src/fpu/shifter_denorm.sv @@ -118,6 +118,7 @@ module barrel_shifter_r57 (Z, Sticky, A, Shift); endmodule // barrel_shifter_r57 +/* module barrel_shifter_r64 (Z, Sticky, A, Shift); input [63:0] A; @@ -160,3 +161,4 @@ module barrel_shifter_r64 (Z, Sticky, A, Shift); assign Sticky = (S != sixtythreezeros); endmodule // barrel_shifter_r64 +*/ \ No newline at end of file diff --git a/wally-pipelined/src/muldiv/div.sv b/wally-pipelined/src/muldiv/div.sv index 3bea0e47d..70767dcc4 100755 --- a/wally-pipelined/src/muldiv/div.sv +++ b/wally-pipelined/src/muldiv/div.sv @@ -307,7 +307,7 @@ module csa #(parameter WIDTH=8) (input logic [WIDTH-1:0] a, b, c, assign carry = {carry_temp[WIDTH-1:1], 1'b0}; endmodule // csa - +/* module eqcmp #(parameter WIDTH = 8) (input logic [WIDTH-1:0] a, b, output logic y); @@ -315,6 +315,7 @@ module eqcmp #(parameter WIDTH = 8) assign y = (a == b); endmodule // eqcmp +*/ // QST for r=4 module qst4 (input logic [6:0] s, input logic [2:0] d, diff --git a/wally-pipelined/src/uncore/imem.sv b/wally-pipelined/src/uncore/imem.sv deleted file mode 100644 index 85362edf7..000000000 --- a/wally-pipelined/src/uncore/imem.sv +++ /dev/null @@ -1,71 +0,0 @@ -/////////////////////////////////////////// -// imem.sv -// -// Written: David_Harris@hmc.edu 9 January 2021 -// Modified: -// -// Purpose: -// -// A component of the Wally configurable RISC-V project. -// -// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University -// -// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, -// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software -// is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT -// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -/////////////////////////////////////////// - -`include "wally-config.vh" - -module imem ( - input logic [`XLEN-1:1] AdrF, - output logic [31:0] InstrF, - output logic [15:0] rd2, // bogus, delete when real multicycle fetch works - output logic InstrAccessFaultF); - - /* verilator lint_off UNDRIVEN */ - logic [`XLEN-1:0] RAM[`TIM_BASE>>(1+`XLEN/32):(`TIM_RANGE+`TIM_BASE)>>(1+`XLEN/32)]; - logic [`XLEN-1:0] bootram[`BOOTTIM_BASE>>(1+`XLEN/32):(`BOOTTIM_RANGE+`BOOTTIM_BASE)>>(1+`XLEN/32)]; - /* verilator lint_on UNDRIVEN */ - logic [31:0] adrbits; // needs to be 32 bits to index RAM - logic [`XLEN-1:0] rd; -// logic [15:0] rd2; - - generate - if (`XLEN==32) assign adrbits = AdrF[31:2]; - else assign adrbits = AdrF[31:3]; - endgenerate - - assign #2 rd = (AdrF < (`TIM_BASE >> 1)) ? bootram[adrbits] : RAM[adrbits]; // busybear: 2 memory options - - // hack right now for unaligned 32-bit instructions - // eventually this will need to cause a stall like a cache miss - // when the instruction wraps around a cache line - // could be optimized to only stall when the instruction wrapping is 32 bits - assign #2 rd2 = (AdrF < (`TIM_BASE >> 1)) ? bootram[adrbits+1][15:0] : RAM[adrbits+1][15:0]; //busybear: 2 memory options - generate - if (`XLEN==32) begin - assign InstrF = AdrF[1] ? {rd2[15:0], rd[31:16]} : rd; - // First, AdrF needs to get its last bit appended back onto it - // Then not-XORing it with TIM_BASE checks if it matches TIM_BASE exactly - // Then ORing it with TIM_RANGE introduces some leeway into the previous check, by allowing the lower bits to be either high or low - - assign InstrAccessFaultF = (~&(({AdrF,1'b0} ~^ `TIM_BASE) | `TIM_RANGE)) & (~&(({AdrF,1'b0} ~^ `BOOTTIM_BASE) | `BOOTTIM_RANGE)); - - end else begin - assign InstrF = AdrF[2] ? (AdrF[1] ? {rd2[15:0], rd[63:48]} : rd[63:32]) - : (AdrF[1] ? rd[47:16] : rd[31:0]); - // - assign InstrAccessFaultF = (|AdrF[`XLEN-1:32] | ~&({AdrF[31:1],1'b0} ~^ `TIM_BASE | `TIM_RANGE)) & (|AdrF[`XLEN-1:32] | ~&({AdrF[31:1],1'b0} ~^ `BOOTTIM_BASE | `BOOTTIM_RANGE)); - end - endgenerate -endmodule - From 927644679762742a1c519b05c622f276206751f1 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 4 Jul 2021 10:51:56 -0400 Subject: [PATCH 06/30] Switched to array notation for pmpchecker --- .../config/buildroot/wally-config.vh | 4 +- .../config/coremark-64i/wally-config.vh | 1 + wally-pipelined/src/mmu/pmpchecker.sv | 49 ++++++++----------- 3 files changed, 23 insertions(+), 31 deletions(-) diff --git a/wally-pipelined/config/buildroot/wally-config.vh b/wally-pipelined/config/buildroot/wally-config.vh index ace7366dc..a535abfff 100644 --- a/wally-pipelined/config/buildroot/wally-config.vh +++ b/wally-pipelined/config/buildroot/wally-config.vh @@ -62,9 +62,7 @@ // Peripheral memory space extends from BASE to BASE+RANGE // Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits `define BOOTTIM_SUPPORTED 1'b1 -//`define BOOTTIM_BASE 56'h00000000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder -//`define BOOTTIM_RANGE 56'h00003FFF -`define BOOTTIM_BASE 56'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder +`define BOOTTIM_BASE 56'h00001000 `define BOOTTIM_RANGE 56'h00000FFF `define TIM_SUPPORTED 1'b1 `define TIM_BASE 56'h80000000 diff --git a/wally-pipelined/config/coremark-64i/wally-config.vh b/wally-pipelined/config/coremark-64i/wally-config.vh index 17d483441..ae3100c6f 100644 --- a/wally-pipelined/config/coremark-64i/wally-config.vh +++ b/wally-pipelined/config/coremark-64i/wally-config.vh @@ -72,6 +72,7 @@ `define PLIC_SUPPORTED 1'b1 `define PLIC_BASE 56'h0C000000 `define PLIC_RANGE 56'h03FFFFFF + // Test modes // Tie GPIO outputs back to inputs diff --git a/wally-pipelined/src/mmu/pmpchecker.sv b/wally-pipelined/src/mmu/pmpchecker.sv index 960182317..e073a2f18 100644 --- a/wally-pipelined/src/mmu/pmpchecker.sv +++ b/wally-pipelined/src/mmu/pmpchecker.sv @@ -51,43 +51,36 @@ module pmpchecker ( output logic PMPStoreAccessFaultM ); - // verilator lint_off UNOPTFLAT // Bit i is high when the address falls in PMP region i logic EnforcePMP; - logic [7:0] PMPCFG [`PMP_ENTRIES-1:0]; + logic [7:0] PMPCfg[`PMP_ENTRIES-1:0]; logic [`PMP_ENTRIES-1:0] Match; // PMP Entry matches logic [`PMP_ENTRIES-1:0] Active; // PMP register i is non-null logic [`PMP_ENTRIES-1:0] L, X, W, R; // PMP matches and has flag set - logic [`PMP_ENTRIES:0] NoLowerMatch; // None of the lower PMP entries match - logic [`PMP_ENTRIES:0] PAgePMPAdr; // for TOR PMP matching, PhysicalAddress > PMPAdr[i] - genvar i,j; - - assign PAgePMPAdr[0] = 1'b1; - assign NoLowerMatch[0] = 1'b1; - - generate - // verilator lint_off WIDTH - for (j=0; j<`PMP_ENTRIES; j = j+8) - assign {PMPCFG[j+7], PMPCFG[j+6], PMPCFG[j+5], PMPCFG[j+4], - PMPCFG[j+3], PMPCFG[j+2], PMPCFG[j+1], PMPCFG[j]} = PMPCFG_ARRAY_REGW[j/8]; - // verilator lint_on WIDTH - for (i=0; i<`PMP_ENTRIES; i++) // *** can this be done with array notation? - pmpadrdec pmpadrdec(.PhysicalAddress, - .PMPCfg(PMPCFG[i]), - .PMPAdr(PMPADDR_ARRAY_REGW[i]), - .PAgePMPAdrIn(PAgePMPAdr[i]), - .PAgePMPAdrOut(PAgePMPAdr[i+1]), - .NoLowerMatchIn(NoLowerMatch[i]), - .NoLowerMatchOut(NoLowerMatch[i+1]), - .Match(Match[i]), - .Active(Active[i]), - .L(L[i]), .X(X[i]), .W(W[i]), .R(R[i]) - ); - + // verilator lint_off UNOPTFLAT + logic [`PMP_ENTRIES-1:0] NoLowerMatch; // None of the lower PMP entries match // verilator lint_on UNOPTFLAT + logic [`PMP_ENTRIES-1:0] PAgePMPAdr; // for TOR PMP matching, PhysicalAddress > PMPAdr[i] + genvar i,j; + + generate // extract 8-bit chunks from PMPCFG array + for (j=0; j<`PMP_ENTRIES; j = j+8) + assign {PMPCfg[j+7], PMPCfg[j+6], PMPCfg[j+5], PMPCfg[j+4], + PMPCfg[j+3], PMPCfg[j+2], PMPCfg[j+1], PMPCfg[j]} = PMPCFG_ARRAY_REGW[j/8]; endgenerate + pmpadrdec pmpadrdec[`PMP_ENTRIES-1:0]( + .PhysicalAddress, + .PMPCfg, + .PMPAdr(PMPADDR_ARRAY_REGW), + .PAgePMPAdrIn({PAgePMPAdr[`PMP_ENTRIES-2:0], 1'b1}), + .PAgePMPAdrOut(PAgePMPAdr), + .NoLowerMatchIn({NoLowerMatch[`PMP_ENTRIES-2:0], 1'b1}), + .NoLowerMatchOut(NoLowerMatch), + .Match, .Active, .L, .X, .W, .R); + + // Only enforce PMP checking for S and U modes when at least one PMP is active or in Machine mode when L bit is set in selected region assign EnforcePMP = (PrivilegeModeW == `M_MODE) ? |L : |Active; From b5df9b282ddf4211ad5b77e4933a7d8ae62c7eaa Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 4 Jul 2021 11:39:59 -0400 Subject: [PATCH 07/30] Added support for PMP lock bits in csrm and repartitioned design to pass around 8-bit PMPCFG entries --- wally-pipelined/src/ifu/ifu.sv | 4 +- wally-pipelined/src/lsu/lsu.sv | 4 +- wally-pipelined/src/mmu/mmu.sv | 2 +- wally-pipelined/src/mmu/pmpchecker.sv | 10 +-- wally-pipelined/src/privileged/csr.sv | 2 +- wally-pipelined/src/privileged/csrm.sv | 89 +++++++------------ wally-pipelined/src/privileged/privdec.sv | 4 +- wally-pipelined/src/privileged/privileged.sv | 4 +- wally-pipelined/src/uncore/clint.sv | 6 +- wally-pipelined/src/uncore/dtim.sv | 4 +- wally-pipelined/src/uncore/gpio.sv | 9 +- wally-pipelined/src/uncore/uartPC16550D.sv | 18 ++-- .../src/wally/wallypipelinedhart.sv | 2 +- 13 files changed, 68 insertions(+), 90 deletions(-) diff --git a/wally-pipelined/src/ifu/ifu.sv b/wally-pipelined/src/ifu/ifu.sv index ddfd88cc5..714119e6a 100644 --- a/wally-pipelined/src/ifu/ifu.sv +++ b/wally-pipelined/src/ifu/ifu.sv @@ -77,8 +77,8 @@ module ifu ( output logic ITLBMissF, ITLBHitF, // pmp/pma (inside mmu) signals. *** temporarily from AHB bus but eventually replace with internal versions pre H - input var logic [63:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES/8-1:0], - input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0], + input var logic [7:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES-1:0], + input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW[`PMP_ENTRIES-1:0], output logic PMPInstrAccessFaultF, PMAInstrAccessFaultF, output logic ISquashBusAccessF diff --git a/wally-pipelined/src/lsu/lsu.sv b/wally-pipelined/src/lsu/lsu.sv index 36d4715fe..2a88b25fd 100644 --- a/wally-pipelined/src/lsu/lsu.sv +++ b/wally-pipelined/src/lsu/lsu.sv @@ -88,8 +88,8 @@ module lsu ( input logic [31:0] HADDR, // *** replace all of these H inputs with physical adress once pma checkers have been edited to use paddr as well. input logic [2:0] HSIZE, HBURST, input logic HWRITE, - input var logic [63:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES/8-1:0], - input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0], // *** this one especially has a large note attached to it in pmpchecker. + input var logic [7:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES-1:0], + input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW[`PMP_ENTRIES-1:0], // *** this one especially has a large note attached to it in pmpchecker. output logic PMALoadAccessFaultM, PMAStoreAccessFaultM, output logic PMPLoadAccessFaultM, PMPStoreAccessFaultM, // *** can these be parameterized? we dont need the m stage ones for the immu and vice versa. diff --git a/wally-pipelined/src/mmu/mmu.sv b/wally-pipelined/src/mmu/mmu.sv index 8082d01ad..303076b08 100644 --- a/wally-pipelined/src/mmu/mmu.sv +++ b/wally-pipelined/src/mmu/mmu.sv @@ -68,7 +68,7 @@ module mmu #(parameter ENTRY_BITS = 3, // PMA checker signals input logic AtomicAccessM, ExecuteAccessF, WriteAccessM, ReadAccessM, - input var logic [63:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES/8-1:0], + input var logic [7:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES-1:0], input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0], output logic SquashBusAccess, // *** send to privileged unit diff --git a/wally-pipelined/src/mmu/pmpchecker.sv b/wally-pipelined/src/mmu/pmpchecker.sv index e073a2f18..5208032a7 100644 --- a/wally-pipelined/src/mmu/pmpchecker.sv +++ b/wally-pipelined/src/mmu/pmpchecker.sv @@ -39,7 +39,7 @@ module pmpchecker ( // this will be understood as a var. However, if we don't supply the `var` // keyword, the compiler warns us that it's interpreting the signal as a var, // which we might not intend. - input var logic [63:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES/8-1:0], + input var logic [7:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES-1:0], input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0], input logic ExecuteAccessF, WriteAccessM, ReadAccessM, @@ -60,19 +60,19 @@ module pmpchecker ( logic [`PMP_ENTRIES-1:0] L, X, W, R; // PMP matches and has flag set // verilator lint_off UNOPTFLAT logic [`PMP_ENTRIES-1:0] NoLowerMatch; // None of the lower PMP entries match - // verilator lint_on UNOPTFLAT + // verilator lint_on UNOPTFLAT logic [`PMP_ENTRIES-1:0] PAgePMPAdr; // for TOR PMP matching, PhysicalAddress > PMPAdr[i] genvar i,j; - + /* generate // extract 8-bit chunks from PMPCFG array for (j=0; j<`PMP_ENTRIES; j = j+8) assign {PMPCfg[j+7], PMPCfg[j+6], PMPCfg[j+5], PMPCfg[j+4], PMPCfg[j+3], PMPCfg[j+2], PMPCfg[j+1], PMPCfg[j]} = PMPCFG_ARRAY_REGW[j/8]; - endgenerate + endgenerate */ pmpadrdec pmpadrdec[`PMP_ENTRIES-1:0]( .PhysicalAddress, - .PMPCfg, + .PMPCfg(PMPCFG_ARRAY_REGW), .PMPAdr(PMPADDR_ARRAY_REGW), .PAgePMPAdrIn({PAgePMPAdr[`PMP_ENTRIES-2:0], 1'b1}), .PAgePMPAdrOut(PAgePMPAdr), diff --git a/wally-pipelined/src/privileged/csr.sv b/wally-pipelined/src/privileged/csr.sv index dfac55711..ba2362c3e 100644 --- a/wally-pipelined/src/privileged/csr.sv +++ b/wally-pipelined/src/privileged/csr.sv @@ -60,7 +60,7 @@ module csr #(parameter output logic STATUS_MIE, STATUS_SIE, output logic STATUS_MXR, STATUS_SUM, output logic STATUS_MPRV, - output var logic [63:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES/8-1:0], + output var logic [7:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES-1:0], output var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0], input logic [4:0] SetFflagsM, output logic [2:0] FRM_REGW, diff --git a/wally-pipelined/src/privileged/csrm.sv b/wally-pipelined/src/privileged/csrm.sv index 8c74b9517..29cc482f3 100644 --- a/wally-pipelined/src/privileged/csrm.sv +++ b/wally-pipelined/src/privileged/csrm.sv @@ -74,7 +74,8 @@ module csrm #(parameter output logic [31:0] MCOUNTEREN_REGW, MCOUNTINHIBIT_REGW, output logic [`XLEN-1:0] MEDELEG_REGW, MIDELEG_REGW, // 64-bit registers in RV64, or two 32-bit registers in RV32 - output var logic [63:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES/8-1:0], + //output var logic [63:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES/8-1:0], + output var logic [7:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES-1:0], output var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0], input logic [11:0] MIP_REGW, MIE_REGW, output logic WriteMSTATUSM, @@ -87,8 +88,9 @@ module csrm #(parameter logic WriteMTVECM, WriteMEDELEGM, WriteMIDELEGM; logic WriteMSCRATCHM, WriteMEPCM, WriteMCAUSEM, WriteMTVALM; logic WriteMCOUNTERENM, WriteMCOUNTINHIBITM; - logic [`PMP_ENTRIES/8-1:0] WritePMPCFGM, WritePMPCFGHM ; - logic [`PMP_ENTRIES-1:0] WritePMPADDRM ; + logic [`PMP_ENTRIES-1:0] WritePMPCFGM; + logic [`PMP_ENTRIES-1:0] WritePMPADDRM ; + logic [`PMP_ENTRIES-1:0] ADDRLocked, CFGLocked; localparam MISA_26 = (`MISA) & 32'h03ffffff; @@ -104,30 +106,9 @@ module csrm #(parameter assign WriteMEPCM = MTrapM | (CSRMWriteM && (CSRAdrM == MEPC)) && ~StallW; assign WriteMCAUSEM = MTrapM | (CSRMWriteM && (CSRAdrM == MCAUSE)) && ~StallW; assign WriteMTVALM = MTrapM | (CSRMWriteM && (CSRAdrM == MTVAL)) && ~StallW; -/* assign WritePMPCFG0M = (CSRMWriteM && (CSRAdrM == PMPCFG0)) && ~StallW; - assign WritePMPCFG2M = (CSRMWriteM && (CSRAdrM == PMPCFG2)) && ~StallW; - assign WritePMPADDRM[0] = (CSRMWriteM && (CSRAdrM == PMPADDR0)) && ~StallW; - assign WritePMPADDRM[1] = (CSRMWriteM && (CSRAdrM == PMPADDR1)) && ~StallW; - assign WritePMPADDRM[2] = (CSRMWriteM && (CSRAdrM == PMPADDR2)) && ~StallW; - assign WritePMPADDRM[3] = (CSRMWriteM && (CSRAdrM == PMPADDR3)) && ~StallW; - assign WritePMPADDRM[4] = (CSRMWriteM && (CSRAdrM == PMPADDR4)) && ~StallW; - assign WritePMPADDRM[5] = (CSRMWriteM && (CSRAdrM == PMPADDR5)) && ~StallW; - assign WritePMPADDRM[6] = (CSRMWriteM && (CSRAdrM == PMPADDR6)) && ~StallW; - assign WritePMPADDRM[7] = (CSRMWriteM && (CSRAdrM == PMPADDR7)) && ~StallW; - assign WritePMPADDRM[8] = (CSRMWriteM && (CSRAdrM == PMPADDR8)) && ~StallW; - assign WritePMPADDRM[9] = (CSRMWriteM && (CSRAdrM == PMPADDR9)) && ~StallW; - assign WritePMPADDRM[10] = (CSRMWriteM && (CSRAdrM == PMPADDR10)) && ~StallW; - assign WritePMPADDRM[11] = (CSRMWriteM && (CSRAdrM == PMPADDR11)) && ~StallW; - assign WritePMPADDRM[12] = (CSRMWriteM && (CSRAdrM == PMPADDR12)) && ~StallW; - assign WritePMPADDRM[13] = (CSRMWriteM && (CSRAdrM == PMPADDR13)) && ~StallW; - assign WritePMPADDRM[14] = (CSRMWriteM && (CSRAdrM == PMPADDR14)) && ~StallW; - assign WritePMPADDRM[15] = (CSRMWriteM && (CSRAdrM == PMPADDR15)) && ~StallW; */ assign WriteMCOUNTERENM = CSRMWriteM && (CSRAdrM == MCOUNTEREN) && ~StallW; assign WriteMCOUNTINHIBITM = CSRMWriteM && (CSRAdrM == MCOUNTINHIBIT) && ~StallW; - - - assign IllegalCSRMWriteReadonlyM = CSRMWriteM && (CSRAdrM == MVENDORID || CSRAdrM == MARCHID || CSRAdrM == MIMPID || CSRAdrM == MHARTID); // CSRs @@ -164,32 +145,49 @@ module csrm #(parameter genvar i; generate for(i=0; i<`PMP_ENTRIES; i++) begin - assign WritePMPADDRM[i] = (CSRMWriteM && (CSRAdrM == PMPADDR0+i)) && ~StallW; + // when the lock bit is set, don't allow writes to the PMPCFG or PMPADDR + // also, when the lock bit of the next entry is set and the next entry is TOR, don't allow writes to this entry PMPADDR + assign CFGLocked[i] = PMPCFG_ARRAY_REGW[i][7]; + if (i == `PMP_ENTRIES-1) + assign ADDRLocked[i] = PMPCFG_ARRAY_REGW[i][7]; + else + assign ADDRLocked[i] = PMPCFG_ARRAY_REGW[i][7] | (PMPCFG_ARRAY_REGW[i+1][7] & PMPCFG_ARRAY_REGW[i+1][4:3] == 2'b01); + + assign WritePMPADDRM[i] = (CSRMWriteM & (CSRAdrM == (PMPADDR0+i))) & ~StallW & ~ADDRLocked[i]; flopenr #(`XLEN) PMPADDRreg(clk, reset, WritePMPADDRM[i], CSRWriteValM, PMPADDR_ARRAY_REGW[i]); - end - for (i=0; i<`PMP_ENTRIES/8; i++) begin if (`XLEN==64) begin - assign WritePMPCFGM[i] = (CSRMWriteM && (CSRAdrM == PMPCFG0+2*i)) && ~StallW; - flopenr #(`XLEN) PMPCFGreg(clk, reset, WritePMPCFGM[i], CSRWriteValM, PMPCFG_ARRAY_REGW[i]); + assign WritePMPCFGM[i] = (CSRMWriteM & (CSRAdrM == (PMPCFG0+2*(i/8)))) & ~StallW & ~CFGLocked[i]; + flopenr #(8) PMPCFGreg(clk, reset, WritePMPCFGM[i], CSRWriteValM[(i%8)*8+7:(i%8)*8], PMPCFG_ARRAY_REGW[i]); end else begin - assign WritePMPCFGM[i] = (CSRMWriteM && (CSRAdrM == PMPCFG0+2*i)) && ~StallW; - assign WritePMPCFGHM[i] = (CSRMWriteM && (CSRAdrM == PMPCFG0+2*i+1)) && ~StallW; - flopenr #(`XLEN) PMPCFGreg(clk, reset, WritePMPCFGM[i], CSRWriteValM, PMPCFG_ARRAY_REGW[i][31:0]); - flopenr #(`XLEN) PMPCFGHreg(clk, reset, WritePMPCFGHM[i], CSRWriteValM, PMPCFG_ARRAY_REGW[i][63:32]); + assign WritePMPCFGM[i] = (CSRMWriteM & (CSRAdrM == (PMPCFG0+i/4))) & ~StallW & ~CFGLocked[i]; +// assign WritePMPCFGHM[i] = (CSRMWriteM && (CSRAdrM == PMPCFG0+2*i+1)) && ~StallW; + flopenr #(8) PMPCFGreg(clk, reset, WritePMPCFGM[i], CSRWriteValM[(i%4)*8+7:(i%4)*8], PMPCFG_ARRAY_REGW[i]); +// flopenr #(`XLEN) PMPCFGHreg(clk, reset, WritePMPCFGHM[i], CSRWriteValM, PMPCFG_ARRAY_REGW[i][63:32]); end end endgenerate // Read machine mode CSRs // verilator lint_off WIDTH + logic [5:0] entry; always_comb begin IllegalCSRMAccessM = !(`S_SUPPORTED | `U_SUPPORTED & `N_SUPPORTED) && (CSRAdrM == MEDELEG || CSRAdrM == MIDELEG); // trap on DELEG register access when no S or N-mode if (CSRAdrM >= PMPADDR0 && CSRAdrM < PMPADDR0 + `PMP_ENTRIES) // reading a PMP entry CSRMReadValM = PMPADDR_ARRAY_REGW[CSRAdrM - PMPADDR0]; else if (CSRAdrM >= PMPCFG0 && CSRAdrM < PMPCFG0 + `PMP_ENTRIES/4) begin - if (~CSRAdrM[0]) CSRMReadValM = PMPCFG_ARRAY_REGW[(CSRAdrM - PMPCFG0)/2][`XLEN-1:0]; - else CSRMReadValM = {{(`XLEN-32){1'b0}}, PMPCFG_ARRAY_REGW[(CSRAdrM - PMPCFG0-1)/2][63:32]}; + if (`XLEN==64) begin + entry = ({CSRAdrM[11:1], 1'b0} - PMPCFG0)*4; // disregard odd entries in RV64 + CSRMReadValM = {PMPCFG_ARRAY_REGW[entry+7],PMPCFG_ARRAY_REGW[entry+6],PMPCFG_ARRAY_REGW[entry+5],PMPCFG_ARRAY_REGW[entry+4], + PMPCFG_ARRAY_REGW[entry+3],PMPCFG_ARRAY_REGW[entry+2],PMPCFG_ARRAY_REGW[entry+1],PMPCFG_ARRAY_REGW[entry]}; + end else begin + entry = (CSRAdrM - PMPCFG0)*4; + CSRMReadValM = {PMPCFG_ARRAY_REGW[entry+3],PMPCFG_ARRAY_REGW[entry+2],PMPCFG_ARRAY_REGW[entry+1],PMPCFG_ARRAY_REGW[entry]}; + end + + /* + if (~CSRAdrM[0]) CSRMReadValM = {PMPCFG_ARRAY_REGW[]}; + else CSRMReadValM = {{(`XLEN-32){1'b0}}, PMPCFG_ARRAY_REGW[(CSRAdrM - PMPCFG0-1)/2][63:32]};*/ end else case (CSRAdrM) MISA_ADR: CSRMReadValM = MISA_REGW; @@ -212,26 +210,7 @@ module csrm #(parameter MTVAL: CSRMReadValM = MTVAL_REGW; MCOUNTEREN:CSRMReadValM = {{(`XLEN-32){1'b0}}, MCOUNTEREN_REGW}; MCOUNTINHIBIT:CSRMReadValM = {{(`XLEN-32){1'b0}}, MCOUNTINHIBIT_REGW}; -/* PMPCFG0: CSRMReadValM = PMPCFG01_REGW[`XLEN-1:0]; - PMPCFG1: CSRMReadValM = {{(`XLEN-32){1'b0}}, PMPCFG01_REGW[63:32]}; - PMPCFG2: CSRMReadValM = PMPCFG23_REGW[`XLEN-1:0]; - PMPCFG3: CSRMReadValM = {{(`XLEN-32){1'b0}}, PMPCFG23_REGW[63:32]}; - PMPADDR0: CSRMReadValM = PMPADDR_ARRAY_REGW[0]; // *** make configurable - PMPADDR1: CSRMReadValM = PMPADDR_ARRAY_REGW[1]; - PMPADDR2: CSRMReadValM = PMPADDR_ARRAY_REGW[2]; - PMPADDR3: CSRMReadValM = PMPADDR_ARRAY_REGW[3]; - PMPADDR4: CSRMReadValM = PMPADDR_ARRAY_REGW[4]; - PMPADDR5: CSRMReadValM = PMPADDR_ARRAY_REGW[5]; - PMPADDR6: CSRMReadValM = PMPADDR_ARRAY_REGW[6]; - PMPADDR7: CSRMReadValM = PMPADDR_ARRAY_REGW[7]; - PMPADDR8: CSRMReadValM = PMPADDR_ARRAY_REGW[8]; - PMPADDR9: CSRMReadValM = PMPADDR_ARRAY_REGW[9]; - PMPADDR10: CSRMReadValM = PMPADDR_ARRAY_REGW[10]; - PMPADDR11: CSRMReadValM = PMPADDR_ARRAY_REGW[11]; - PMPADDR12: CSRMReadValM = PMPADDR_ARRAY_REGW[12]; - PMPADDR13: CSRMReadValM = PMPADDR_ARRAY_REGW[13]; - PMPADDR14: CSRMReadValM = PMPADDR_ARRAY_REGW[14]; - PMPADDR15: CSRMReadValM = PMPADDR_ARRAY_REGW[15]; */ + default: begin CSRMReadValM = 0; IllegalCSRMAccessM = 1; diff --git a/wally-pipelined/src/privileged/privdec.sv b/wally-pipelined/src/privileged/privdec.sv index 1330a62bc..621ef9a2c 100644 --- a/wally-pipelined/src/privileged/privdec.sv +++ b/wally-pipelined/src/privileged/privdec.sv @@ -38,9 +38,9 @@ module privdec ( // xRET defined in Privileged Spect 3.2.2 assign uretM = PrivilegedM & (InstrM[31:20] == 12'b000000000010) & `N_SUPPORTED; - assign sretM = PrivilegedM & (InstrM[31:20] == 12'b000100000010) & `S_SUPPORTED && + assign sretM = PrivilegedM & (InstrM[31:20] == 12'b000100000010) & `S_SUPPORTED & PrivilegeModeW[0] & ~STATUS_TSR; - assign mretM = PrivilegedM & (InstrM[31:20] == 12'b001100000010) && (PrivilegeModeW == `M_MODE); + assign mretM = PrivilegedM & (InstrM[31:20] == 12'b001100000010) & (PrivilegeModeW == `M_MODE); assign ecallM = PrivilegedM & (InstrM[31:20] == 12'b000000000000); assign ebreakM = PrivilegedM & (InstrM[31:20] == 12'b000000000001); diff --git a/wally-pipelined/src/privileged/privileged.sv b/wally-pipelined/src/privileged/privileged.sv index 5ed8c8807..618b32145 100644 --- a/wally-pipelined/src/privileged/privileged.sv +++ b/wally-pipelined/src/privileged/privileged.sv @@ -64,11 +64,11 @@ module privileged ( input logic PMALoadAccessFaultM, PMPLoadAccessFaultM, input logic PMAStoreAccessFaultM, PMPStoreAccessFaultM, - output logic IllegalFPUInstrE, + output logic IllegalFPUInstrE, output logic [1:0] PrivilegeModeW, output logic [`XLEN-1:0] SATP_REGW, output logic STATUS_MXR, STATUS_SUM, - output var logic [63:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES/8-1:0], + output var logic [7:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES-1:0], output var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0], output logic [2:0] FRM_REGW ); diff --git a/wally-pipelined/src/uncore/clint.sv b/wally-pipelined/src/uncore/clint.sv index d2014468a..80cb28a1d 100644 --- a/wally-pipelined/src/uncore/clint.sv +++ b/wally-pipelined/src/uncore/clint.sv @@ -94,7 +94,7 @@ module clint ( if (~HRESETn) begin MTIME <= 0; // MTIMECMP is not reset - end else if (memwrite && entryd == 16'hBFF8) begin + end else if (memwrite & entryd == 16'hBFF8) begin // MTIME Counter. Eventually change this to run off separate clock. Synchronization then needed MTIME <= HWDATA; end else MTIME <= MTIME + 1; @@ -125,9 +125,9 @@ module clint ( if (~HRESETn) begin MTIME <= 0; // MTIMECMP is not reset - end else if (memwrite && (entryd == 16'hBFF8)) begin + end else if (memwrite & (entryd == 16'hBFF8)) begin MTIME[31:0] <= HWDATA; - end else if (memwrite && (entryd == 16'hBFFC)) begin + end else if (memwrite & (entryd == 16'hBFFC)) begin // MTIME Counter. Eventually change this to run off separate clock. Synchronization then needed MTIME[63:32]<= HWDATA; end else MTIME <= MTIME + 1; diff --git a/wally-pipelined/src/uncore/dtim.sv b/wally-pipelined/src/uncore/dtim.sv index 408645675..c195c5516 100644 --- a/wally-pipelined/src/uncore/dtim.sv +++ b/wally-pipelined/src/uncore/dtim.sv @@ -102,13 +102,13 @@ module dtim #(parameter BASE=0, RANGE = 65535) ( always_ff @(posedge HCLK) begin HWADDR <= #1 A; HREADTim0 <= #1 RAM[A[31:3]]; - if (memwrite && risingHREADYTim) RAM[HWADDR[31:3]] <= #1 HWDATA; + if (memwrite & risingHREADYTim) RAM[HWADDR[31:3]] <= #1 HWDATA; end end else begin always_ff @(posedge HCLK) begin HWADDR <= #1 A; HREADTim0 <= #1 RAM[A[31:2]]; - if (memwrite && risingHREADYTim) RAM[HWADDR[31:2]] <= #1 HWDATA; + if (memwrite & risingHREADYTim) RAM[HWADDR[31:2]] <= #1 HWDATA; end end endgenerate diff --git a/wally-pipelined/src/uncore/gpio.sv b/wally-pipelined/src/uncore/gpio.sv index bddec8e63..cde55b7a7 100644 --- a/wally-pipelined/src/uncore/gpio.sv +++ b/wally-pipelined/src/uncore/gpio.sv @@ -131,19 +131,19 @@ module gpio ( default: Dout <= #1 0; endcase // interrupts - if (memwrite && (entryd == 8'h1C)) + if (memwrite & (entryd == 8'h1C)) rise_ip <= rise_ip & ~Din | (input2d & ~input3d); else rise_ip <= rise_ip | (input2d & ~input3d); - if (memwrite && (entryd == 8'h24)) + if (memwrite & (entryd == 8'h24)) fall_ip <= fall_ip & ~Din | (~input2d & input3d); else fall_ip <= fall_ip | (~input2d & input3d); - if (memwrite && (entryd == 8'h2C)) + if (memwrite & (entryd == 8'h2C)) high_ip <= high_ip & ~Din | input3d; else high_ip <= high_ip | input3d; - if (memwrite && (entryd == 8'h34)) + if (memwrite & (entryd == 8'h34)) low_ip <= low_ip & ~Din | ~input3d; else low_ip <= low_ip | ~input3d; @@ -157,7 +157,6 @@ module gpio ( else assign input0d = GPIOPinsIn & input_en; endgenerate - // *** this costs lots of flops; I suspect they don't need to be resettable, do they? flop #(32) sync1(HCLK,input0d,input1d); flop #(32) sync2(HCLK,input1d,input2d); flop #(32) sync3(HCLK,input2d,input3d); diff --git a/wally-pipelined/src/uncore/uartPC16550D.sv b/wally-pipelined/src/uncore/uartPC16550D.sv index 4ead3f679..62c8ea004 100644 --- a/wally-pipelined/src/uncore/uartPC16550D.sv +++ b/wally-pipelined/src/uncore/uartPC16550D.sv @@ -224,11 +224,11 @@ module uartPC16550D( else rxstate <= #1 UART_IDLE; end // timeout counting - if (~MEMRb && A == 3'b000 && ~DLAB) rxtimeoutcnt <= #1 0; // reset timeout on read + if (~MEMRb & A == 3'b000 & ~DLAB) rxtimeoutcnt <= #1 0; // reset timeout on read else if (fifoenabled & ~rxfifoempty & rxbaudpulse & ~rxfifotimeout) rxtimeoutcnt <= #1 rxtimeoutcnt+1; // *** not right end - assign rxcentered = rxbaudpulse && (rxoversampledcnt == 4'b1000); // implies rxstate = UART_ACTIVE + assign rxcentered = rxbaudpulse & (rxoversampledcnt == 4'b1000); // implies rxstate = UART_ACTIVE assign rxbitsexpected = 4'd1 + (4'd5 + {2'b00, LCR[1:0]}) + {3'b000, LCR[3]} + 4'd1; // start bit + data bits + (parity bit) + stop bit /////////////////////////////////////////// @@ -267,12 +267,12 @@ module uartPC16550D( rxfifohead <= #1 rxfifohead + 1; end rxdataready <= #1 1; - end else if (~MEMRb && A == 3'b000 && ~DLAB) begin // reading RBR updates ready / pops fifo + end else if (~MEMRb & A == 3'b000 & ~DLAB) begin // reading RBR updates ready / pops fifo if (fifoenabled) begin rxfifotail <= #1 rxfifotail + 1; if (rxfifohead == rxfifotail +1) rxdataready <= #1 0; end else rxdataready <= #1 0; - end else if (~MEMWb && A == 3'b010) // writes to FIFO Control Register + end else if (~MEMWb & A == 3'b010) // writes to FIFO Control Register if (Din[1] | ~Din[0]) begin // rx FIFO reset or FIFO disable clears FIFO contents rxfifohead <= #1 0; rxfifotail <= #1 0; end @@ -326,7 +326,7 @@ module uartPC16550D( txoversampledcnt <= #1 0; txstate <= #1 UART_IDLE; txbitssent <= #1 0; - end else if ((txstate == UART_IDLE) && txsrfull) begin // start transmitting + end else if ((txstate == UART_IDLE) & txsrfull) begin // start transmitting txstate <= #1 UART_ACTIVE; txoversampledcnt <= #1 1; txbitssent <= #1 0; @@ -341,7 +341,7 @@ module uartPC16550D( end assign txbitsexpected = 4'd1 + (4'd5 + {2'b00, LCR[1:0]}) + {3'b000, LCR[3]} + 4'd1 + {3'b000, LCR[2]} - 4'd1; // start bit + data bits + (parity bit) + stop bit(s) - assign txnextbit = txbaudpulse && (txoversampledcnt == 4'b0000); // implies txstate = UART_ACTIVE + assign txnextbit = txbaudpulse & (txoversampledcnt == 4'b0000); // implies txstate = UART_ACTIVE /////////////////////////////////////////// // transmit holding register, shift register, FIFO @@ -372,7 +372,7 @@ module uartPC16550D( if (~HRESETn) begin txfifohead <= #1 0; txfifotail <= #1 0; txhrfull <= #1 0; txsrfull <= #1 0; TXHR <= #1 0; txsr <= #1 12'hfff; end else begin - if (~MEMWb && A == 3'b000 && ~DLAB) begin // writing transmit holding register or fifo + if (~MEMWb & A == 3'b000 & ~DLAB) begin // writing transmit holding register or fifo if (fifoenabled) begin txfifo[txfifohead] <= #1 Din; txfifohead <= #1 txfifohead + 1; @@ -395,8 +395,8 @@ module uartPC16550D( txsrfull <= #1 1; end end else if (txstate == UART_DONE) txsrfull <= #1 0; // done transmitting shift register - else if (txstate == UART_ACTIVE && txnextbit) txsr <= #1 {txsr[10:0], 1'b1}; // shift txhr - if (!MEMWb && A == 3'b010) // writes to FIFO control register + else if (txstate == UART_ACTIVE & txnextbit) txsr <= #1 {txsr[10:0], 1'b1}; // shift txhr + if (!MEMWb & A == 3'b010) // writes to FIFO control register if (Din[2] | ~Din[0]) begin // tx FIFO reste or FIFO disable clears FIFO contents txfifohead <= #1 0; txfifotail <= #1 0; end diff --git a/wally-pipelined/src/wally/wallypipelinedhart.sv b/wally-pipelined/src/wally/wallypipelinedhart.sv index 978f747fa..be59c18b6 100644 --- a/wally-pipelined/src/wally/wallypipelinedhart.sv +++ b/wally-pipelined/src/wally/wallypipelinedhart.sv @@ -123,7 +123,7 @@ module wallypipelinedhart logic PMAInstrAccessFaultF, PMALoadAccessFaultM, PMAStoreAccessFaultM; logic DSquashBusAccessM, ISquashBusAccessF; var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0]; - var logic [63:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES/8-1:0]; + var logic [7:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES-1:0]; // IMem stalls logic ICacheStallF; From 622060b99f791b6a3399a2e9054ac8185147f9a2 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 4 Jul 2021 12:32:27 -0400 Subject: [PATCH 08/30] Replaced generates with arrays in TLB --- wally-pipelined/src/mmu/pmpchecker.sv | 2 +- wally-pipelined/src/mmu/tlb.sv | 3 ++- wally-pipelined/src/mmu/tlbcam.sv | 14 +++++++++++--- wally-pipelined/src/mmu/tlbram.sv | 9 ++++++--- 4 files changed, 20 insertions(+), 8 deletions(-) diff --git a/wally-pipelined/src/mmu/pmpchecker.sv b/wally-pipelined/src/mmu/pmpchecker.sv index 5208032a7..ee4b261df 100644 --- a/wally-pipelined/src/mmu/pmpchecker.sv +++ b/wally-pipelined/src/mmu/pmpchecker.sv @@ -70,7 +70,7 @@ module pmpchecker ( PMPCfg[j+3], PMPCfg[j+2], PMPCfg[j+1], PMPCfg[j]} = PMPCFG_ARRAY_REGW[j/8]; endgenerate */ - pmpadrdec pmpadrdec[`PMP_ENTRIES-1:0]( + pmpadrdec pmpadrdecs[`PMP_ENTRIES-1:0]( .PhysicalAddress, .PMPCfg(PMPCFG_ARRAY_REGW), .PMPAdr(PMPADDR_ARRAY_REGW), diff --git a/wally-pipelined/src/mmu/tlb.sv b/wally-pipelined/src/mmu/tlb.sv index 1cf639064..bd3652804 100644 --- a/wally-pipelined/src/mmu/tlb.sv +++ b/wally-pipelined/src/mmu/tlb.sv @@ -95,7 +95,7 @@ module tlb #(parameter ENTRY_BITS = 3, // Index (currently random) to write the next TLB entry logic [ENTRY_BITS-1:0] WriteIndex; - logic [(2**ENTRY_BITS)-1:0] WriteLines; // used as the one-hot encoding of WriteIndex + logic [(2**ENTRY_BITS)-1:0] WriteLines, WriteEnables; // used as the one-hot encoding of WriteIndex // Sections of the virtual and physical addresses logic [`VPN_BITS-1:0] VirtualPageNumber; @@ -121,6 +121,7 @@ module tlb #(parameter ENTRY_BITS = 3, // Decode the integer encoded WriteIndex into the one-hot encoded WriteLines decoder #(ENTRY_BITS) writedecoder(WriteIndex, WriteLines); + assign WriteEnables = WriteLines & {(2**ENTRY_BITS){TLBWrite}}; // The bus width is always the largest it could be for that XLEN. For example, vpn will be 36 bits wide in rv64 // this, even though it could be 27 bits (SV39) or 36 bits (SV48) wide. When the value of VPN is narrower, diff --git a/wally-pipelined/src/mmu/tlbcam.sv b/wally-pipelined/src/mmu/tlbcam.sv index bd64afea9..e45b124a1 100644 --- a/wally-pipelined/src/mmu/tlbcam.sv +++ b/wally-pipelined/src/mmu/tlbcam.sv @@ -35,9 +35,9 @@ module tlbcam #(parameter ENTRY_BITS = 3, input logic [KEY_BITS-1:0] VirtualPageNumber, input logic [1:0] PageTypeWriteVal, // input logic [`SVMODE_BITS-1:0] SvMode, // *** may not need to be used. - input logic TLBWrite, +// input logic TLBWrite, input logic TLBFlush, - input logic [2**ENTRY_BITS-1:0] WriteLines, + input logic [2**ENTRY_BITS-1:0] WriteEnables, output logic [ENTRY_BITS-1:0] VPNIndex, output logic [1:0] HitPageType, @@ -55,16 +55,24 @@ module tlbcam #(parameter ENTRY_BITS = 3, // original virtual page number from when the address was written, regardless // of page type. However, matches are determined based on a subset of the // page number segments. + + camline #(KEY_BITS, SEGMENT_BITS) camlines[NENTRIES-1:0]( + .CAMLineWrite(WriteEnables), + .PageType(PageTypeList), + .Match(Matches), + .*); +/* generate genvar i; for (i = 0; i < NENTRIES; i++) begin camline #(KEY_BITS, SEGMENT_BITS) camline( - .CAMLineWrite(WriteLines[i] && TLBWrite), + .CAMLineWrite(WriteEnables[i]), .PageType(PageTypeList[i]), .Match(Matches[i]), .*); end endgenerate + */ // In case there are multiple matches in the CAM, select only one // *** it might be guaranteed that the CAM will never have multiple matches. diff --git a/wally-pipelined/src/mmu/tlbram.sv b/wally-pipelined/src/mmu/tlbram.sv index 2012ed829..f13666bcd 100644 --- a/wally-pipelined/src/mmu/tlbram.sv +++ b/wally-pipelined/src/mmu/tlbram.sv @@ -32,8 +32,8 @@ module tlbram #(parameter ENTRY_BITS = 3) ( input logic [ENTRY_BITS-1:0] VPNIndex, // Index to read from // input logic [ENTRY_BITS-1:0] WriteIndex, // *** unused? input logic [`XLEN-1:0] PTEWriteVal, - input logic TLBWrite, - input logic [2**ENTRY_BITS-1:0] WriteLines, +// input logic TLBWrite, + input logic [2**ENTRY_BITS-1:0] WriteEnables, output logic [`PPN_BITS-1:0] PhysicalPageNumber, output logic [7:0] PTEAccessBits @@ -45,13 +45,16 @@ module tlbram #(parameter ENTRY_BITS = 3) ( logic [`XLEN-1:0] PageTableEntry; // Generate a flop for every entry in the RAM + flopenr #(`XLEN) pteflops[NENTRIES-1:0](clk, reset, WriteEnables, PTEWriteVal, ram); +/* generate genvar i; for (i = 0; i < NENTRIES; i++) begin: tlb_ram_flops - flopenr #(`XLEN) pteflop(clk, reset, WriteLines[i] & TLBWrite, + flopenr #(`XLEN) pteflop(clk, reset, WriteEnables[i], PTEWriteVal, ram[i]); end endgenerate +*/ assign PageTableEntry = ram[VPNIndex]; assign PTEAccessBits = PageTableEntry[7:0]; From 7a810357d70d1bacb0e6d8e4ec1af93049f10bcf Mon Sep 17 00:00:00 2001 From: bbracker Date: Sun, 4 Jul 2021 12:48:13 -0400 Subject: [PATCH 09/30] Make Wally take InstrPageFaultF traps --- wally-pipelined/src/ifu/ifu.sv | 4 ++-- wally-pipelined/src/privileged/trap.sv | 8 ++++---- wally-pipelined/src/wally/wallypipelinedhart.sv | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/wally-pipelined/src/ifu/ifu.sv b/wally-pipelined/src/ifu/ifu.sv index ddfd88cc5..d3f864e02 100644 --- a/wally-pipelined/src/ifu/ifu.sv +++ b/wally-pipelined/src/ifu/ifu.sv @@ -151,7 +151,7 @@ module ifu ( icache icache(.*, .PCNextF(PCNextFPhys), .PCPF(PCPFmmu), - .WalkerInstrPageFaultF(WalkerInstrPageFaultF)); + .WalkerInstrPageFaultF); flopenl #(32) AlignedInstrRawDFlop(clk, reset | reset_q, ~StallD, FlushD ? nop : FinalInstrRawF, nop, InstrRawD); @@ -184,7 +184,7 @@ module ifu ( assign PCNextF = {UnalignedPCNextF[`XLEN-1:1], 1'b0}; // hart-SPEC p. 21 about 16-bit alignment - flopenl #(`XLEN) pcreg(clk, reset, ~StallF & ~ICacheStallF, PCNextF, `RESET_VECTOR, PCF); + flopenl #(`XLEN) pcreg(clk, reset, (~StallF & ~ICacheStallF) | WalkerInstrPageFaultF, PCNextF, `RESET_VECTOR, PCF); // branch and jump predictor generate diff --git a/wally-pipelined/src/privileged/trap.sv b/wally-pipelined/src/privileged/trap.sv index af4f17305..b3b657fc7 100644 --- a/wally-pipelined/src/privileged/trap.sv +++ b/wally-pipelined/src/privileged/trap.sv @@ -30,7 +30,7 @@ module trap ( input logic clk, reset, input logic InstrMisalignedFaultM, InstrAccessFaultM, IllegalInstrFaultM, input logic BreakpointFaultM, LoadMisalignedFaultM, StoreMisalignedFaultM, - input logic LoadAccessFaultM, StoreAccessFaultM, EcallFaultM, InstrPageFaultM, + input logic LoadAccessFaultM, StoreAccessFaultM, EcallFaultM, InstrPageFaultF, input logic LoadPageFaultM, StorePageFaultM, input logic mretM, sretM, uretM, input logic [1:0] PrivilegeModeW, NextPrivilegeModeM, @@ -69,7 +69,7 @@ module trap ( assign BusTrapM = LoadAccessFaultM | StoreAccessFaultM; assign NonBusTrapM = InstrMisalignedFaultM | InstrAccessFaultM | IllegalInstrFaultM | LoadMisalignedFaultM | StoreMisalignedFaultM | - InstrPageFaultM | LoadPageFaultM | StorePageFaultM | + InstrPageFaultF | LoadPageFaultM | StorePageFaultM | BreakpointFaultM | EcallFaultM | InterruptM; assign TrapM = BusTrapM | NonBusTrapM; @@ -121,7 +121,7 @@ module trap ( else if (PendingIntsM[9]) CauseM = (1 << (`XLEN-1)) + 9; // Supervisor External Int else if (PendingIntsM[1]) CauseM = (1 << (`XLEN-1)) + 1; // Supervisor Sw Int else if (PendingIntsM[5]) CauseM = (1 << (`XLEN-1)) + 5; // Supervisor Timer Int - else if (InstrPageFaultM) CauseM = 12; + else if (InstrPageFaultF) CauseM = 12; else if (InstrAccessFaultM) CauseM = 1; else if (InstrMisalignedFaultM) CauseM = 0; else if (IllegalInstrFaultM) CauseM = 2; @@ -148,7 +148,7 @@ module trap ( if (InstrMisalignedFaultM) NextFaultMtvalM = InstrMisalignedAdrM; else if (LoadMisalignedFaultM) NextFaultMtvalM = MemAdrM; else if (StoreMisalignedFaultM) NextFaultMtvalM = MemAdrM; - else if (InstrPageFaultM) NextFaultMtvalM = PCM; + else if (InstrPageFaultF) NextFaultMtvalM = PCM; else if (LoadPageFaultM) NextFaultMtvalM = MemAdrM; else if (StorePageFaultM) NextFaultMtvalM = MemAdrM; else if (IllegalInstrFaultM) NextFaultMtvalM = {{(`XLEN-32){1'b0}}, InstrM}; diff --git a/wally-pipelined/src/wally/wallypipelinedhart.sv b/wally-pipelined/src/wally/wallypipelinedhart.sv index 978f747fa..2d442929d 100644 --- a/wally-pipelined/src/wally/wallypipelinedhart.sv +++ b/wally-pipelined/src/wally/wallypipelinedhart.sv @@ -179,7 +179,7 @@ module wallypipelinedhart ifu ifu(.InstrInF(InstrRData), - .WalkerInstrPageFaultF(WalkerInstrPageFaultF), + .WalkerInstrPageFaultF, .*); // instruction fetch unit: PC, branch prediction, instruction cache ieu ieu(.*); // integer execution unit: integer register file, datapath and controller From 5b891e05ac6f78ad05bb81637ab4d17f5d7e3ff3 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 4 Jul 2021 12:53:52 -0400 Subject: [PATCH 10/30] TLB mux and swizzling cleanup --- wally-pipelined/src/mmu/tlb.sv | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/wally-pipelined/src/mmu/tlb.sv b/wally-pipelined/src/mmu/tlb.sv index bd3652804..6d8ed46a7 100644 --- a/wally-pipelined/src/mmu/tlb.sv +++ b/wally-pipelined/src/mmu/tlb.sv @@ -101,6 +101,7 @@ module tlb #(parameter ENTRY_BITS = 3, logic [`VPN_BITS-1:0] VirtualPageNumber; logic [`PPN_BITS-1:0] PhysicalPageNumber, PhysicalPageNumberMixed; logic [`PA_BITS-1:0] PhysicalAddressFull; + logic [`XLEN+1:0] VAExt; // Sections of the page table entry logic [7:0] PTEAccessBits; @@ -137,7 +138,7 @@ module tlb #(parameter ENTRY_BITS = 3, endgenerate // Whether translation should occur - assign Translate = (SvMode != `NO_TRANSLATE) & (PrivilegeModeW != `M_MODE) & ~ DisableTranslation; + assign Translate = (SvMode != `NO_TRANSLATE) & (PrivilegeModeW != `M_MODE) & ~ DisableTranslation; // *** needs to account for mprv // Determine how the TLB is currently being used // Note that we use ReadAccess for both loads and instruction fetches @@ -155,11 +156,8 @@ module tlb #(parameter ENTRY_BITS = 3, tlbcam #(ENTRY_BITS, `VPN_BITS, `VPN_SEGMENT_BITS) tlbcam(.*); // unswizzle useful PTE bits - assign PTE_U = PTEAccessBits[4]; - assign PTE_X = PTEAccessBits[3]; - assign PTE_W = PTEAccessBits[2]; - assign PTE_R = PTEAccessBits[1]; - + assign {PTE_U, PTE_X, PTE_W, PTE_R} = PTEAccessBits[4:1]; + // Check whether the access is allowed, page faulting if not. // *** We might not have S mode. generate @@ -198,17 +196,22 @@ module tlb #(parameter ENTRY_BITS = 3, // Provide physical address only on TLBHits to cause catastrophic errors if // garbage address is used. - assign PhysicalAddressFull = (TLBHit) ? - {PhysicalPageNumberMixed, PageOffset} : '0; + assign PhysicalAddressFull = (TLBHit) ? {PhysicalPageNumberMixed, PageOffset} : '0; // Output the hit physical address if translation is currently on. - generate +/* generate if (`XLEN == 32) begin + VirtualAddressPALen = {2'b0, VirtualAddress}; + mux2 #(`PA_BITS) addressmux({2'b0, VirtualAddress}, PhysicalAddressFull, Translate, PhysicalAddress); end else begin + VirtualAddressPALen = VirtualAddress[`PA_BITS-1:0]; mux2 #(`PA_BITS) addressmux(VirtualAddress[`PA_BITS-1:0], PhysicalAddressFull, Translate, PhysicalAddress); end - endgenerate + endgenerate*/ + + assign VAExt = {2'b00, VirtualAddress}; // extend length of virtual address if necessary for RV32 + mux2 #(`PA_BITS) addressmux(VAExt[`PA_BITS-1:0], PhysicalAddressFull, Translate, PhysicalAddress); assign TLBHit = CAMHit & TLBAccess; assign TLBMiss = ~TLBHit & ~TLBFlush & Translate & TLBAccess; From a5c0dc8c81bc22014b1e1e3ca601c5b3d6679f9d Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 4 Jul 2021 13:20:29 -0400 Subject: [PATCH 11/30] Fixed MPRV and MXR checks in TLB --- wally-pipelined/src/ifu/ifu.sv | 3 +- wally-pipelined/src/lsu/lsu.sv | 3 +- wally-pipelined/src/mmu/mmu.sv | 3 +- wally-pipelined/src/mmu/tlb.sv | 29 ++++++++++--------- wally-pipelined/src/privileged/csr.sv | 3 +- wally-pipelined/src/privileged/privileged.sv | 6 ++-- .../src/wally/wallypipelinedhart.sv | 3 +- 7 files changed, 28 insertions(+), 22 deletions(-) diff --git a/wally-pipelined/src/ifu/ifu.sv b/wally-pipelined/src/ifu/ifu.sv index 714119e6a..65f8a9b84 100644 --- a/wally-pipelined/src/ifu/ifu.sv +++ b/wally-pipelined/src/ifu/ifu.sv @@ -70,7 +70,8 @@ module ifu ( input logic [`XLEN-1:0] PageTableEntryF, input logic [1:0] PageTypeF, input logic [`XLEN-1:0] SATP_REGW, - input logic STATUS_MXR, STATUS_SUM, + input logic STATUS_MXR, STATUS_SUM, STATUS_MPRV, + input logic [1:0] STATUS_MPP, input logic ITLBWriteF, ITLBFlushF, input logic WalkerInstrPageFaultF, diff --git a/wally-pipelined/src/lsu/lsu.sv b/wally-pipelined/src/lsu/lsu.sv index 2a88b25fd..d7ff78d78 100644 --- a/wally-pipelined/src/lsu/lsu.sv +++ b/wally-pipelined/src/lsu/lsu.sv @@ -75,7 +75,8 @@ module lsu ( input logic [`XLEN-1:0] PageTableEntryM, input logic [1:0] PageTypeM, input logic [`XLEN-1:0] SATP_REGW, // from csr - input logic STATUS_MXR, STATUS_SUM, // from csr + input logic STATUS_MXR, STATUS_SUM, STATUS_MPRV, + input logic [1:0] STATUS_MPP, input logic DTLBWriteM, output logic DTLBMissM, input logic DisableTranslation, // used to stop intermediate PTE physical addresses being saved to TLB. diff --git a/wally-pipelined/src/mmu/mmu.sv b/wally-pipelined/src/mmu/mmu.sv index 303076b08..e28db2e77 100644 --- a/wally-pipelined/src/mmu/mmu.sv +++ b/wally-pipelined/src/mmu/mmu.sv @@ -34,7 +34,8 @@ module mmu #(parameter ENTRY_BITS = 3, input logic clk, reset, // Current value of satp CSR (from privileged unit) input logic [`XLEN-1:0] SATP_REGW, - input logic STATUS_MXR, STATUS_SUM, + input logic STATUS_MXR, STATUS_SUM, STATUS_MPRV, + input logic [1:0] STATUS_MPP, // Current privilege level of the processeor input logic [1:0] PrivilegeModeW, diff --git a/wally-pipelined/src/mmu/tlb.sv b/wally-pipelined/src/mmu/tlb.sv index 6d8ed46a7..9f6a4d25d 100644 --- a/wally-pipelined/src/mmu/tlb.sv +++ b/wally-pipelined/src/mmu/tlb.sv @@ -55,7 +55,8 @@ module tlb #(parameter ENTRY_BITS = 3, // Current value of satp CSR (from privileged unit) input logic [`XLEN-1:0] SATP_REGW, - input logic STATUS_MXR, STATUS_SUM, + input logic STATUS_MXR, STATUS_SUM, STATUS_MPRV, + input logic [1:0] STATUS_MPP, // Current privilege level of the processeor input logic [1:0] PrivilegeModeW, @@ -92,6 +93,7 @@ module tlb #(parameter ENTRY_BITS = 3, // Store current virtual memory mode (SV32, SV39, SV48, ect...) logic [`SVMODE_BITS-1:0] SvMode; + logic [1:0] EffectivePrivilegeMode; // privilege mode, possibly modified by MPRV // Index (currently random) to write the next TLB entry logic [ENTRY_BITS-1:0] WriteIndex; @@ -137,16 +139,15 @@ module tlb #(parameter ENTRY_BITS = 3, end endgenerate - // Whether translation should occur - assign Translate = (SvMode != `NO_TRANSLATE) & (PrivilegeModeW != `M_MODE) & ~ DisableTranslation; // *** needs to account for mprv - + // Whether translation should occur; ITLB ignores MPRVW + assign Translate = (SvMode != `NO_TRANSLATE) & (EffectivePrivilegeMode != `M_MODE) & ~ DisableTranslation; + // Determine how the TLB is currently being used // Note that we use ReadAccess for both loads and instruction fetches assign ReadAccess = TLBAccessType[1]; assign WriteAccess = TLBAccessType[0]; assign TLBAccess = ReadAccess || WriteAccess; - assign PageOffset = VirtualAddress[11:0]; // TLB entries are evicted according to the LRU algorithm @@ -164,28 +165,30 @@ module tlb #(parameter ENTRY_BITS = 3, if (ITLB == 1) begin logic ImproperPrivilege; + assign EffectivePrivilegeMode = PrivilegeModeW; // ITLB ignores MPRV + // User mode may only execute user mode pages, and supervisor mode may // only execute non-user mode pages. - assign ImproperPrivilege = ((PrivilegeModeW == `U_MODE) && ~PTE_U) || - ((PrivilegeModeW == `S_MODE) && PTE_U); + assign ImproperPrivilege = ((EffectivePrivilegeMode == `U_MODE) && ~PTE_U) || + ((EffectivePrivilegeMode == `S_MODE) && PTE_U); assign TLBPageFault = Translate && TLBHit && (ImproperPrivilege || ~PTE_X); end else begin logic ImproperPrivilege, InvalidRead, InvalidWrite; + assign EffectivePrivilegeMode = STATUS_MPRV ? STATUS_MPP : PrivilegeModeW; // DTLB uses MPP mode when MPRV is 1 + // User mode may only load/store from user mode pages, and supervisor mode // may only access user mode pages when STATUS_SUM is low. - assign ImproperPrivilege = ((PrivilegeModeW == `U_MODE) && ~PTE_U) || - ((PrivilegeModeW == `S_MODE) && PTE_U && ~STATUS_SUM); + assign ImproperPrivilege = ((EffectivePrivilegeMode == `U_MODE) && ~PTE_U) || + ((EffectivePrivilegeMode == `S_MODE) && PTE_U && ~STATUS_SUM); // Check for read error. Reads are invalid when the page is not readable // (and executable pages are not readable) or when the page is neither // readable nor executable (and executable pages are readable). - assign InvalidRead = ReadAccess && - ((~STATUS_MXR && ~PTE_R) || (STATUS_MXR && ~PTE_R && PTE_X)); + assign InvalidRead = ReadAccess && ~PTE_R && (~STATUS_MXR | ~PTE_X); // Check for write error. Writes are invalid when the page's write bit is // low. assign InvalidWrite = WriteAccess && ~PTE_W; - assign TLBPageFault = Translate && TLBHit && - (ImproperPrivilege || InvalidRead || InvalidWrite); + assign TLBPageFault = Translate && TLBHit && (ImproperPrivilege || InvalidRead || InvalidWrite); end endgenerate diff --git a/wally-pipelined/src/privileged/csr.sv b/wally-pipelined/src/privileged/csr.sv index ba2362c3e..7162263c2 100644 --- a/wally-pipelined/src/privileged/csr.sv +++ b/wally-pipelined/src/privileged/csr.sv @@ -58,8 +58,7 @@ module csr #(parameter output logic [`XLEN-1:0] SATP_REGW, output logic [11:0] MIP_REGW, MIE_REGW, SIP_REGW, SIE_REGW, output logic STATUS_MIE, STATUS_SIE, - output logic STATUS_MXR, STATUS_SUM, - output logic STATUS_MPRV, + output logic STATUS_MXR, STATUS_SUM, STATUS_MPRV, output var logic [7:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES-1:0], output var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0], input logic [4:0] SetFflagsM, diff --git a/wally-pipelined/src/privileged/privileged.sv b/wally-pipelined/src/privileged/privileged.sv index 618b32145..90830137b 100644 --- a/wally-pipelined/src/privileged/privileged.sv +++ b/wally-pipelined/src/privileged/privileged.sv @@ -67,7 +67,8 @@ module privileged ( output logic IllegalFPUInstrE, output logic [1:0] PrivilegeModeW, output logic [`XLEN-1:0] SATP_REGW, - output logic STATUS_MXR, STATUS_SUM, + output logic STATUS_MXR, STATUS_SUM, STATUS_MPRV, + output logic [1:0] STATUS_MPP, output var logic [7:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES-1:0], output var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0], output logic [2:0] FRM_REGW @@ -94,8 +95,7 @@ module privileged ( logic MTrapM, STrapM, UTrapM; logic InterruptM; - logic [1:0] STATUS_MPP; - logic STATUS_SPP, STATUS_TSR, STATUS_MPRV; // **** status mprv is unused outside of the csr module as of 4 June 2021. should it be deleted alltogether from the module, or should I leav the pin here in case someone needs it? + logic STATUS_SPP, STATUS_TSR; logic STATUS_MIE, STATUS_SIE; logic [11:0] MIP_REGW, MIE_REGW, SIP_REGW, SIE_REGW; logic md, sd; diff --git a/wally-pipelined/src/wally/wallypipelinedhart.sv b/wally-pipelined/src/wally/wallypipelinedhart.sv index be59c18b6..33d2e13fb 100644 --- a/wally-pipelined/src/wally/wallypipelinedhart.sv +++ b/wally-pipelined/src/wally/wallypipelinedhart.sv @@ -112,7 +112,8 @@ module wallypipelinedhart logic ITLBMissF, ITLBHitF; logic DTLBMissM, DTLBHitM; logic [`XLEN-1:0] SATP_REGW; - logic STATUS_MXR, STATUS_SUM; + logic STATUS_MXR, STATUS_SUM, STATUS_MPRV; + logic [1:0] STATUS_MPP; logic [1:0] PrivilegeModeW; logic [`XLEN-1:0] PageTableEntryF, PageTableEntryM; logic [1:0] PageTypeF, PageTypeM; From 834c10c58c4ab98bc90098f533841ef01b2a4e87 Mon Sep 17 00:00:00 2001 From: bbracker Date: Sun, 4 Jul 2021 13:31:30 -0400 Subject: [PATCH 12/30] Revert "Make Wally take InstrPageFaultF traps" This reverts commit 7a810357d70d1bacb0e6d8e4ec1af93049f10bcf. --- wally-pipelined/src/ifu/ifu.sv | 4 ++-- wally-pipelined/src/privileged/trap.sv | 8 ++++---- wally-pipelined/src/wally/wallypipelinedhart.sv | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/wally-pipelined/src/ifu/ifu.sv b/wally-pipelined/src/ifu/ifu.sv index e0a6a3546..714119e6a 100644 --- a/wally-pipelined/src/ifu/ifu.sv +++ b/wally-pipelined/src/ifu/ifu.sv @@ -151,7 +151,7 @@ module ifu ( icache icache(.*, .PCNextF(PCNextFPhys), .PCPF(PCPFmmu), - .WalkerInstrPageFaultF); + .WalkerInstrPageFaultF(WalkerInstrPageFaultF)); flopenl #(32) AlignedInstrRawDFlop(clk, reset | reset_q, ~StallD, FlushD ? nop : FinalInstrRawF, nop, InstrRawD); @@ -184,7 +184,7 @@ module ifu ( assign PCNextF = {UnalignedPCNextF[`XLEN-1:1], 1'b0}; // hart-SPEC p. 21 about 16-bit alignment - flopenl #(`XLEN) pcreg(clk, reset, (~StallF & ~ICacheStallF) | WalkerInstrPageFaultF, PCNextF, `RESET_VECTOR, PCF); + flopenl #(`XLEN) pcreg(clk, reset, ~StallF & ~ICacheStallF, PCNextF, `RESET_VECTOR, PCF); // branch and jump predictor generate diff --git a/wally-pipelined/src/privileged/trap.sv b/wally-pipelined/src/privileged/trap.sv index b3b657fc7..af4f17305 100644 --- a/wally-pipelined/src/privileged/trap.sv +++ b/wally-pipelined/src/privileged/trap.sv @@ -30,7 +30,7 @@ module trap ( input logic clk, reset, input logic InstrMisalignedFaultM, InstrAccessFaultM, IllegalInstrFaultM, input logic BreakpointFaultM, LoadMisalignedFaultM, StoreMisalignedFaultM, - input logic LoadAccessFaultM, StoreAccessFaultM, EcallFaultM, InstrPageFaultF, + input logic LoadAccessFaultM, StoreAccessFaultM, EcallFaultM, InstrPageFaultM, input logic LoadPageFaultM, StorePageFaultM, input logic mretM, sretM, uretM, input logic [1:0] PrivilegeModeW, NextPrivilegeModeM, @@ -69,7 +69,7 @@ module trap ( assign BusTrapM = LoadAccessFaultM | StoreAccessFaultM; assign NonBusTrapM = InstrMisalignedFaultM | InstrAccessFaultM | IllegalInstrFaultM | LoadMisalignedFaultM | StoreMisalignedFaultM | - InstrPageFaultF | LoadPageFaultM | StorePageFaultM | + InstrPageFaultM | LoadPageFaultM | StorePageFaultM | BreakpointFaultM | EcallFaultM | InterruptM; assign TrapM = BusTrapM | NonBusTrapM; @@ -121,7 +121,7 @@ module trap ( else if (PendingIntsM[9]) CauseM = (1 << (`XLEN-1)) + 9; // Supervisor External Int else if (PendingIntsM[1]) CauseM = (1 << (`XLEN-1)) + 1; // Supervisor Sw Int else if (PendingIntsM[5]) CauseM = (1 << (`XLEN-1)) + 5; // Supervisor Timer Int - else if (InstrPageFaultF) CauseM = 12; + else if (InstrPageFaultM) CauseM = 12; else if (InstrAccessFaultM) CauseM = 1; else if (InstrMisalignedFaultM) CauseM = 0; else if (IllegalInstrFaultM) CauseM = 2; @@ -148,7 +148,7 @@ module trap ( if (InstrMisalignedFaultM) NextFaultMtvalM = InstrMisalignedAdrM; else if (LoadMisalignedFaultM) NextFaultMtvalM = MemAdrM; else if (StoreMisalignedFaultM) NextFaultMtvalM = MemAdrM; - else if (InstrPageFaultF) NextFaultMtvalM = PCM; + else if (InstrPageFaultM) NextFaultMtvalM = PCM; else if (LoadPageFaultM) NextFaultMtvalM = MemAdrM; else if (StorePageFaultM) NextFaultMtvalM = MemAdrM; else if (IllegalInstrFaultM) NextFaultMtvalM = {{(`XLEN-32){1'b0}}, InstrM}; diff --git a/wally-pipelined/src/wally/wallypipelinedhart.sv b/wally-pipelined/src/wally/wallypipelinedhart.sv index ffc67d89d..be59c18b6 100644 --- a/wally-pipelined/src/wally/wallypipelinedhart.sv +++ b/wally-pipelined/src/wally/wallypipelinedhart.sv @@ -179,7 +179,7 @@ module wallypipelinedhart ifu ifu(.InstrInF(InstrRData), - .WalkerInstrPageFaultF, + .WalkerInstrPageFaultF(WalkerInstrPageFaultF), .*); // instruction fetch unit: PC, branch prediction, instruction cache ieu ieu(.*); // integer execution unit: integer register file, datapath and controller From fed096407bbff83be30992c664907fa3a7538d4f Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 4 Jul 2021 14:30:56 -0400 Subject: [PATCH 13/30] TLB minor organization --- wally-pipelined/src/mmu/tlb.sv | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/wally-pipelined/src/mmu/tlb.sv b/wally-pipelined/src/mmu/tlb.sv index 9f6a4d25d..1c28aa9c3 100644 --- a/wally-pipelined/src/mmu/tlb.sv +++ b/wally-pipelined/src/mmu/tlb.sv @@ -119,8 +119,9 @@ module tlb #(parameter ENTRY_BITS = 3, // Whether the virtual address has a match in the CAM logic CAMHit; - // Grab the sv mode from SATP + // Grab the sv mode from SATP and determine whether translation should occur assign SvMode = SATP_REGW[`XLEN-1:`XLEN-`SVMODE_BITS]; + assign Translate = (SvMode != `NO_TRANSLATE) & (EffectivePrivilegeMode != `M_MODE) & ~ DisableTranslation; // Decode the integer encoded WriteIndex into the one-hot encoded WriteLines decoder #(ENTRY_BITS) writedecoder(WriteIndex, WriteLines); @@ -139,8 +140,6 @@ module tlb #(parameter ENTRY_BITS = 3, end endgenerate - // Whether translation should occur; ITLB ignores MPRVW - assign Translate = (SvMode != `NO_TRANSLATE) & (EffectivePrivilegeMode != `M_MODE) & ~ DisableTranslation; // Determine how the TLB is currently being used // Note that we use ReadAccess for both loads and instruction fetches From 243c03f870eb89a1c59466adb178812152ff7a2e Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 4 Jul 2021 14:37:53 -0400 Subject: [PATCH 14/30] TLB cleanup --- wally-pipelined/src/mmu/tlb.sv | 10 ++++------ .../{physicalpagemask.sv => tlbphysicalpagemask.sv} | 9 ++++----- 2 files changed, 8 insertions(+), 11 deletions(-) rename wally-pipelined/src/mmu/{physicalpagemask.sv => tlbphysicalpagemask.sv} (93%) diff --git a/wally-pipelined/src/mmu/tlb.sv b/wally-pipelined/src/mmu/tlb.sv index 1c28aa9c3..a4326ce8a 100644 --- a/wally-pipelined/src/mmu/tlb.sv +++ b/wally-pipelined/src/mmu/tlb.sv @@ -121,6 +121,7 @@ module tlb #(parameter ENTRY_BITS = 3, // Grab the sv mode from SATP and determine whether translation should occur assign SvMode = SATP_REGW[`XLEN-1:`XLEN-`SVMODE_BITS]; + assign EffectivePrivilegeMode = (ITLB == 1) ? PrivilegeModeW : (STATUS_MPRV ? STATUS_MPP : PrivilegeModeW); // DTLB uses MPP mode when MPRV is 1 assign Translate = (SvMode != `NO_TRANSLATE) & (EffectivePrivilegeMode != `M_MODE) & ~ DisableTranslation; // Decode the integer encoded WriteIndex into the one-hot encoded WriteLines @@ -152,6 +153,7 @@ module tlb #(parameter ENTRY_BITS = 3, // TLB entries are evicted according to the LRU algorithm tlblru #(ENTRY_BITS) lru(.*); + // TLB memory tlbram #(ENTRY_BITS) tlbram(.*); tlbcam #(ENTRY_BITS, `VPN_BITS, `VPN_SEGMENT_BITS) tlbcam(.*); @@ -164,8 +166,6 @@ module tlb #(parameter ENTRY_BITS = 3, if (ITLB == 1) begin logic ImproperPrivilege; - assign EffectivePrivilegeMode = PrivilegeModeW; // ITLB ignores MPRV - // User mode may only execute user mode pages, and supervisor mode may // only execute non-user mode pages. assign ImproperPrivilege = ((EffectivePrivilegeMode == `U_MODE) && ~PTE_U) || @@ -174,8 +174,6 @@ module tlb #(parameter ENTRY_BITS = 3, end else begin logic ImproperPrivilege, InvalidRead, InvalidWrite; - assign EffectivePrivilegeMode = STATUS_MPRV ? STATUS_MPP : PrivilegeModeW; // DTLB uses MPP mode when MPRV is 1 - // User mode may only load/store from user mode pages, and supervisor mode // may only access user mode pages when STATUS_SUM is low. assign ImproperPrivilege = ((EffectivePrivilegeMode == `U_MODE) && ~PTE_U) || @@ -194,11 +192,11 @@ module tlb #(parameter ENTRY_BITS = 3, // Replace segments of the virtual page number with segments of the physical // page number. For 4 KB pages, the entire virtual page number is replaced. // For superpages, some segments are considered offsets into a larger page. - physicalpagemask PageNumberMixer(VirtualPageNumber, PhysicalPageNumber, HitPageType, PhysicalPageNumberMixed); + tlbphysicalpagemask PageMask(VirtualPageNumber, PhysicalPageNumber, HitPageType, PhysicalPageNumberMixed); // Provide physical address only on TLBHits to cause catastrophic errors if // garbage address is used. - assign PhysicalAddressFull = (TLBHit) ? {PhysicalPageNumberMixed, PageOffset} : '0; + assign PhysicalAddressFull = TLBHit ? {PhysicalPageNumberMixed, PageOffset} : '0; // Output the hit physical address if translation is currently on. /* generate diff --git a/wally-pipelined/src/mmu/physicalpagemask.sv b/wally-pipelined/src/mmu/tlbphysicalpagemask.sv similarity index 93% rename from wally-pipelined/src/mmu/physicalpagemask.sv rename to wally-pipelined/src/mmu/tlbphysicalpagemask.sv index b1f77e2d6..858c39390 100644 --- a/wally-pipelined/src/mmu/physicalpagemask.sv +++ b/wally-pipelined/src/mmu/tlbphysicalpagemask.sv @@ -1,5 +1,5 @@ /////////////////////////////////////////// -// physicalpagemask.sv +// tlbphysicalpagemask.sv // // Written: David Harris and kmacsaigoren@hmc.edu 7 June 2021 // Modified: @@ -40,13 +40,11 @@ module physicalpagemask ( logic [`PPN_BITS-1:0] ZeroExtendedVPN; logic [`PPN_BITS-1:0] PageNumberMask; - assign ZeroExtendedVPN = {{EXTRA_BITS{1'b0}}, VPN}; // forces the VPN to be the same width as PPN. - generate if (`XLEN == 32) begin always_comb case (PageType[0]) - // *** the widths of these constansts are hardocded here to match `PPN_BITS in the wally-constants file. + // the widths of these constansts are hardocded here to match `PPN_BITS in the wally-constants file. 0: PageNumberMask = 22'h3FFFFF; // kilopage: 22 bits of PPN, 0 bits of VPN 1: PageNumberMask = 22'h3FFC00; // megapage: 12 bits of PPN, 10 bits of VPN endcase @@ -57,7 +55,7 @@ module physicalpagemask ( 1: PageNumberMask = 44'hFFFFFFFFE00; // megapage: 35 bits of PPN, 9 bits of VPN 2: PageNumberMask = 44'hFFFFFFC0000; // gigapage: 26 bits of PPN, 18 bits of VPN 3: PageNumberMask = 44'hFFFF8000000; // terapage: 17 bits of PPN, 27 bits of VPN - // *** make sure that this doesnt break when using sv39. In that case, all of these + // Bus widths accomodate SV48. In SV39, all of these // busses are the widths for sv48, but extra bits should be zeroed out by the mux // in the tlb when it generates VPN from the full virtualadress. endcase @@ -65,6 +63,7 @@ module physicalpagemask ( endgenerate // merge low segments of VPN with high segments of PPN decided by the pagetype. + assign ZeroExtendedVPN = {{EXTRA_BITS{1'b0}}, VPN}; // forces the VPN to be the same width as PPN. assign MixedPageNumber = (ZeroExtendedVPN & ~PageNumberMask) | (PPN & PageNumberMask); endmodule From 8ae0a5bd7d81dd45a5c6a769b8a798f6b7f7ca48 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Sun, 4 Jul 2021 13:49:38 -0500 Subject: [PATCH 15/30] relocated lsuarb and pagetable walker inside the lsu. Does not pass busybear or buildroot, but passes rv32ic and rv64ic. --- wally-pipelined/regression/wave.do | 93 +++----- wally-pipelined/src/lsu/lsu.sv | 212 +++++++++++++----- wally-pipelined/src/lsu/lsuArb.sv | 3 - .../src/wally/wallypipelinedhart.sv | 108 ++------- 4 files changed, 201 insertions(+), 215 deletions(-) diff --git a/wally-pipelined/regression/wave.do b/wally-pipelined/regression/wave.do index 213b5ceea..42da60938 100644 --- a/wally-pipelined/regression/wave.do +++ b/wally-pipelined/regression/wave.do @@ -7,19 +7,19 @@ add wave -noupdate -expand -group {Execution Stage} /testbench/FunctionName/Func add wave -noupdate -expand -group {Execution Stage} /testbench/dut/hart/ifu/PCE add wave -noupdate -expand -group {Execution Stage} /testbench/InstrEName add wave -noupdate -expand -group {Execution Stage} /testbench/dut/hart/ifu/InstrE -add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/InstrMisalignedFaultM -add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/InstrAccessFaultM -add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/IllegalInstrFaultM -add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/BreakpointFaultM -add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/LoadMisalignedFaultM -add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/StoreMisalignedFaultM -add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/LoadAccessFaultM -add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/StoreAccessFaultM -add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/EcallFaultM -add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/InstrPageFaultM -add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/LoadPageFaultM -add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/StorePageFaultM -add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/InterruptM +add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/InstrMisalignedFaultM +add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/InstrAccessFaultM +add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/IllegalInstrFaultM +add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/BreakpointFaultM +add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/LoadMisalignedFaultM +add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/StoreMisalignedFaultM +add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/LoadAccessFaultM +add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/StoreAccessFaultM +add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/EcallFaultM +add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/InstrPageFaultM +add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/LoadPageFaultM +add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/StorePageFaultM +add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/InterruptM add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/BPPredWrongE add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/CSRWritePendingDEM add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/RetM @@ -118,18 +118,18 @@ add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/hart add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/hart/ieu/dp/CSRReadValW add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/hart/ieu/dp/ResultSrcW add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/hart/ieu/dp/ResultW -add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/a -add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/b -add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/alucontrol -add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/result -add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/flags -add wave -noupdate -expand -group alu -divider internals -add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/overflow -add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/carry -add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/zero -add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/neg -add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/lt -add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/ltu +add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/a +add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/b +add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/alucontrol +add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/result +add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/flags +add wave -noupdate -group alu -divider internals +add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/overflow +add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/carry +add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/zero +add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/neg +add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/lt +add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/ltu add wave -noupdate -group Forward /testbench/dut/hart/ieu/fw/Rs1D add wave -noupdate -group Forward /testbench/dut/hart/ieu/fw/Rs2D add wave -noupdate -group Forward /testbench/dut/hart/ieu/fw/Rs1E @@ -243,7 +243,6 @@ add wave -noupdate -group AHB /testbench/dut/hart/ebu/StallW add wave -noupdate -expand -group lsu -color Gold /testbench/dut/hart/lsu/CurrState add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/DisableTranslation add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/MemRWM -add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/DataStall add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/MemAdrM add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/MemPAdrM add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/ReadDataW @@ -294,42 +293,7 @@ add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/MTIME add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/MTIMECMP add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/TimerIntM add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/SwIntM -add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/MMUTranslate -add wave -noupdate -expand -group ptwalker -color Gold /testbench/dut/hart/pagetablewalker/WalkerState -add wave -noupdate -expand -group ptwalker -color Salmon /testbench/dut/hart/pagetablewalker/HPTWStall -add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/HPTWRead -add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/MMUPAdr -add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/MMUStall -add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/EndWalk -add wave -noupdate -expand -group ptwalker -expand -group pte /testbench/dut/hart/pagetablewalker/MMUReadPTE -add wave -noupdate -expand -group ptwalker -expand -group pte /testbench/dut/hart/pagetablewalker/PRegEn -add wave -noupdate -expand -group ptwalker -expand -group pte /testbench/dut/hart/pagetablewalker/CurrentPTE add wave -noupdate -expand -group ptwalker -divider data -add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/TranslationPAdr -add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/ValidPTE -add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/LeafPTE -add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/MMUStall -add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/pagetablewalker/TranslationPAdr -add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/pagetablewalker/PageTableEntry -add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/pagetablewalker/PageType -add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/pagetablewalker/ITLBWriteF -add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/pagetablewalker/DTLBWriteM -add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/pagetablewalker/WalkerInstrPageFaultF -add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/pagetablewalker/WalkerLoadPageFaultM -add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/pagetablewalker/WalkerStorePageFaultM -add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/pagetablewalker/MMUStall -add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/pagetablewalker/EndWalk -add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/MMUPAdr -add wave -noupdate -expand -group {LSU ARB} -color Gold /testbench/dut/hart/arbiter/CurrState -add wave -noupdate -expand -group {LSU ARB} -color {Medium Orchid} /testbench/dut/hart/arbiter/SelPTW -add wave -noupdate -expand -group {LSU ARB} /testbench/dut/hart/pagetablewalker/MMUStall -add wave -noupdate -expand -group {LSU ARB} -expand -group hptw /testbench/dut/hart/arbiter/HPTWTranslate -add wave -noupdate -expand -group {LSU ARB} -expand -group hptw /testbench/dut/hart/arbiter/HPTWRead -add wave -noupdate -expand -group {LSU ARB} -expand -group hptw /testbench/dut/hart/arbiter/HPTWPAdr -add wave -noupdate -expand -group {LSU ARB} -expand -group hptw /testbench/dut/hart/arbiter/HPTWReadPTE -add wave -noupdate -expand -group {LSU ARB} -expand -group hptw /testbench/dut/hart/arbiter/HPTWReady -add wave -noupdate -expand -group {LSU ARB} -group toLSU /testbench/dut/hart/arbiter/MemAdrMtoLSU -add wave -noupdate /testbench/dut/hart/lsu/DataStall add wave -noupdate -group csr /testbench/dut/hart/priv/csr/MIP_REGW add wave -noupdate -group uart /testbench/dut/uncore/genblk4/uart/HCLK add wave -noupdate -group uart /testbench/dut/uncore/genblk4/uart/HRESETn @@ -356,7 +320,6 @@ add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/genb add wave -noupdate -group dtlb /testbench/dut/hart/lsu/dmmu/TLBMiss add wave -noupdate -group dtlb /testbench/dut/hart/lsu/dmmu/tlb/TLBWrite add wave -noupdate -group itlb /testbench/dut/hart/ifu/ITLBMissF -add wave -noupdate /testbench/dut/hart/pagetablewalker/StartWalk add wave -noupdate /testbench/dut/hart/lsu/dmmu/tlb/DisableTranslation add wave -noupdate -group tlbread /testbench/dut/hart/lsu/dmmu/tlb/VirtualAddress add wave -noupdate -group tlbread /testbench/dut/hart/lsu/dmmu/tlb/tlbcam/CAMHit @@ -367,8 +330,8 @@ add wave -noupdate -group tlbwrite /testbench/dut/hart/lsu/dmmu/tlb/tlbcam/TLBWr add wave -noupdate -group tlbwrite /testbench/dut/hart/lsu/dmmu/tlb/PTEWriteVal add wave -noupdate -group tlbwrite /testbench/dut/hart/lsu/dmmu/tlb/tlbcam/WriteLines TreeUpdate [SetDefaultTree] -WaveRestoreCursors {{Cursor 8} {4545 ns} 0} {{Cursor 3} {3377 ns} 0} {{Cursor 4} {3215 ns} 0} -quietly wave cursor active 1 +WaveRestoreCursors {{Cursor 8} {4545 ns} 0} {{Cursor 3} {2540 ns} 0} {{Cursor 4} {681 ns} 0} +quietly wave cursor active 2 configure wave -namecolwidth 250 configure wave -valuecolwidth 189 configure wave -justifyvalue left @@ -383,4 +346,4 @@ configure wave -griddelta 40 configure wave -timeline 0 configure wave -timelineunits ns update -WaveRestoreZoom {4209 ns} {4657 ns} +WaveRestoreZoom {2313 ns} {2789 ns} diff --git a/wally-pipelined/src/lsu/lsu.sv b/wally-pipelined/src/lsu/lsu.sv index 36d859071..45fdf9e0f 100644 --- a/wally-pipelined/src/lsu/lsu.sv +++ b/wally-pipelined/src/lsu/lsu.sv @@ -31,8 +31,7 @@ module lsu ( input logic clk, reset, input logic StallM, FlushM, StallW, FlushW, - output logic DataStall, - output logic HPTWReady, + output logic DCacheStall, // Memory Stage // connected to cpu (controls) @@ -72,15 +71,17 @@ module lsu ( // mmu management // page table walker - input logic [`XLEN-1:0] PageTableEntryM, - input logic [1:0] PageTypeM, input logic [`XLEN-1:0] SATP_REGW, // from csr input logic STATUS_MXR, STATUS_SUM, // from csr - input logic DTLBWriteM, - output logic DTLBMissM, - input logic DisableTranslation, // used to stop intermediate PTE physical addresses being saved to TLB. - + input logic [`XLEN-1:0] PCF, + input logic ITLBMissF, + output logic [`XLEN-1:0] PageTableEntryF, + output logic [1:0] PageTypeF, + output logic ITLBWriteF, + output logic WalkerInstrPageFaultF, + output logic WalkerLoadPageFaultM, + output logic WalkerStorePageFaultM, output logic DTLBHitM, // not connected @@ -119,14 +120,106 @@ module lsu ( logic PMPInstrAccessFaultF, PMAInstrAccessFaultF; // *** these are just so that the mmu has somewhere to put these outputs since they aren't used in dmem // *** if you're allowed to parameterize outputs/ inputs existence, these are an easy delete. + logic DTLBMissM; + logic [`XLEN-1:0] PageTableEntryM; + logic [1:0] PageTypeM; + logic DTLBWriteM; + logic [`XLEN-1:0] MMUReadPTE; + logic MMUReady; + logic HPTWStall; + logic [`XLEN-1:0] MMUPAdr; + logic MMUTranslate; + logic HPTWRead; + logic [1:0] MemRWMtoLSU; + logic [2:0] Funct3MtoLSU; + logic [1:0] AtomicMtoLSU; + logic [`XLEN-1:0] MemAdrMtoLSU; + logic [`XLEN-1:0] WriteDataMtoLSU; + logic [`XLEN-1:0] ReadDataWFromLSU; + logic StallWtoLSU; + logic CommittedMfromLSU; + logic SquashSCWfromLSU; + logic DataMisalignedMfromLSU; + logic HPTWReady; + logic LSUStall; + logic DisableTranslation; // used to stop intermediate PTE physical addresses being saved to TLB. + + + + // for time being until we have a dcache the AHB Lite read bus HRDATAW will be connected to the // CPU's read data input ReadDataW. - assign ReadDataW = HRDATAW; + assign ReadDataWFromLSU = HRDATAW; + + + pagetablewalker pagetablewalker( + .clk(clk), + .reset(reset), + .SATP_REGW(SATP_REGW), + .PCF(PCF), + .MemAdrM(MemAdrM), + .ITLBMissF(ITLBMissF), + .DTLBMissM(DTLBMissM), + .MemRWM(MemRWM), + .PageTableEntryF(PageTableEntryF), + .PageTableEntryM(PageTableEntryM), + .PageTypeF(PageTypeF), + .PageTypeM(PageTypeM), + .ITLBWriteF(ITLBWriteF), + .DTLBWriteM(DTLBWriteM), + .MMUReadPTE(MMUReadPTE), + .MMUReady(HPTWReady), + .HPTWStall(HPTWStall), + .MMUPAdr(MMUPAdr), + .MMUTranslate(MMUTranslate), + .HPTWRead(HPTWRead), + .WalkerInstrPageFaultF(WalkerInstrPageFaultF), + .WalkerLoadPageFaultM(WalkerLoadPageFaultM), + .WalkerStorePageFaultM(WalkerStorePageFaultM)); + + + + // arbiter between IEU and pagetablewalker + lsuArb arbiter(.clk(clk), + .reset(reset), + // HPTW connection + .HPTWTranslate(MMUTranslate), + .HPTWRead(HPTWRead), + .HPTWPAdr(MMUPAdr), + .HPTWReadPTE(MMUReadPTE), + .HPTWStall(HPTWStall), + // CPU connection + .MemRWM(MemRWM), + .Funct3M(Funct3M), + .AtomicM(AtomicM), + .MemAdrM(MemAdrM), + .WriteDataM(WriteDataM), // *** Need to remove this. + .StallW(StallW), + .ReadDataW(ReadDataW), + .CommittedM(CommittedM), + .SquashSCW(SquashSCW), + .DataMisalignedM(DataMisalignedM), + .DCacheStall(DCacheStall), + // LSU + .DisableTranslation(DisableTranslation), + .MemRWMtoLSU(MemRWMtoLSU), + .Funct3MtoLSU(Funct3MtoLSU), + .AtomicMtoLSU(AtomicMtoLSU), + .MemAdrMtoLSU(MemAdrMtoLSU), + .WriteDataMtoLSU(WriteDataMtoLSU), // *** ?????????????? + .StallWtoLSU(StallWtoLSU), + .CommittedMfromLSU(CommittedMfromLSU), + .SquashSCWfromLSU(SquashSCWfromLSU), + .DataMisalignedMfromLSU(DataMisalignedMfromLSU), + .ReadDataWFromLSU(ReadDataWFromLSU), + .DataStall(LSUStall)); + + mmu #(.ENTRY_BITS(`DTLB_ENTRY_BITS), .IMMU(0)) - dmmu(.TLBAccessType(MemRWM), - .VirtualAddress(MemAdrM), - .Size(Funct3M[1:0]), + dmmu(.TLBAccessType(MemRWMtoLSU), + .VirtualAddress(MemAdrMtoLSU), + .Size(Funct3MtoLSU[1:0]), .PTEWriteVal(PageTableEntryM), .PageTypeWriteVal(PageTypeM), .TLBWrite(DTLBWriteM), @@ -137,45 +230,46 @@ module lsu ( .TLBPageFault(DTLBPageFaultM), .ExecuteAccessF(1'b0), .AtomicAccessM(AtomicMaskedM[1]), - .WriteAccessM(MemRWM[0]), - .ReadAccessM(MemRWM[1]), + .WriteAccessM(MemRWMtoLSU[0]), + .ReadAccessM(MemRWMtoLSU[1]), .SquashBusAccess(DSquashBusAccessM), + .DisableTranslation(DisableTranslation), // .SelRegions(DHSELRegionsM), .*); // *** the pma/pmp instruction acess faults don't really matter here. is it possible to parameterize which outputs exist? // Specify which type of page fault is occurring - assign DTLBLoadPageFaultM = DTLBPageFaultM & MemRWM[1]; - assign DTLBStorePageFaultM = DTLBPageFaultM & MemRWM[0]; + assign DTLBLoadPageFaultM = DTLBPageFaultM & MemRWMtoLSU[1]; + assign DTLBStorePageFaultM = DTLBPageFaultM & MemRWMtoLSU[0]; // Determine if an Unaligned access is taking place always_comb - case(Funct3M[1:0]) - 2'b00: DataMisalignedM = 0; // lb, sb, lbu - 2'b01: DataMisalignedM = MemAdrM[0]; // lh, sh, lhu - 2'b10: DataMisalignedM = MemAdrM[1] | MemAdrM[0]; // lw, sw, flw, fsw, lwu - 2'b11: DataMisalignedM = |MemAdrM[2:0]; // ld, sd, fld, fsd + case(Funct3MtoLSU[1:0]) + 2'b00: DataMisalignedMfromLSU = 0; // lb, sb, lbu + 2'b01: DataMisalignedMfromLSU = MemAdrMtoLSU[0]; // lh, sh, lhu + 2'b10: DataMisalignedMfromLSU = MemAdrMtoLSU[1] | MemAdrMtoLSU[0]; // lw, sw, flw, fsw, lwu + 2'b11: DataMisalignedMfromLSU = |MemAdrMtoLSU[2:0]; // ld, sd, fld, fsd endcase // Squash unaligned data accesses and failed store conditionals // *** this is also the place to squash if the cache is hit - // Changed DataMisalignedM to a larger combination of trap sources + // Changed DataMisalignedMfromLSU to a larger combination of trap sources // NonBusTrapM is anything that the bus doesn't contribute to producing // By contrast, using TrapM results in circular logic errors - assign MemReadM = MemRWM[1] & ~NonBusTrapM & ~DTLBMissM & CurrState != STATE_STALLED; - assign MemWriteM = MemRWM[0] & ~NonBusTrapM & ~DTLBMissM & ~SquashSCM & CurrState != STATE_STALLED; - assign AtomicMaskedM = CurrState != STATE_STALLED ? AtomicM : 2'b00 ; + assign MemReadM = MemRWMtoLSU[1] & ~NonBusTrapM & ~DTLBMissM & CurrState != STATE_STALLED; + assign MemWriteM = MemRWMtoLSU[0] & ~NonBusTrapM & ~DTLBMissM & ~SquashSCM & CurrState != STATE_STALLED; + assign AtomicMaskedM = CurrState != STATE_STALLED ? AtomicMtoLSU : 2'b00 ; assign MemAccessM = MemReadM | MemWriteM; // Determine if M stage committed // Reset whenever unstalled. Set when access successfully occurs - flopr #(1) committedMreg(clk,reset,(CommittedM | CommitM) & StallM,preCommittedM); - assign CommittedM = preCommittedM | CommitM; + flopr #(1) committedMreg(clk,reset,(CommittedMfromLSU | CommitM) & StallM,preCommittedM); + assign CommittedMfromLSU = preCommittedM | CommitM; // Determine if address is valid - assign LoadMisalignedFaultM = DataMisalignedM & MemRWM[1]; - assign LoadAccessFaultM = MemRWM[1]; - assign StoreMisalignedFaultM = DataMisalignedM & MemRWM[0]; - assign StoreAccessFaultM = MemRWM[0]; + assign LoadMisalignedFaultM = DataMisalignedMfromLSU & MemRWMtoLSU[1]; + assign LoadAccessFaultM = MemRWMtoLSU[1]; + assign StoreMisalignedFaultM = DataMisalignedMfromLSU & MemRWMtoLSU[0]; + assign StoreAccessFaultM = MemRWMtoLSU[0]; // Handle atomic load reserved / store conditional generate @@ -184,9 +278,9 @@ module lsu ( logic ReservationValidM, ReservationValidW; logic lrM, scM, WriteAdrMatchM; - assign lrM = MemReadM && AtomicM[0]; - assign scM = MemRWM[0] && AtomicM[0]; - assign WriteAdrMatchM = MemRWM[0] && (MemPAdrM[`PA_BITS-1:2] == ReservationPAdrW) && ReservationValidW; + assign lrM = MemReadM && AtomicMtoLSU[0]; + assign scM = MemRWMtoLSU[0] && AtomicMtoLSU[0]; + assign WriteAdrMatchM = MemRWMtoLSU[0] && (MemPAdrM[`PA_BITS-1:2] == ReservationPAdrW) && ReservationValidW; assign SquashSCM = scM && ~WriteAdrMatchM; always_comb begin // ReservationValidM (next value of valid reservation) if (lrM) ReservationValidM = 1; // set valid on load reserve @@ -195,15 +289,15 @@ module lsu ( end flopenrc #(`PA_BITS-2) resadrreg(clk, reset, FlushW, lrM, MemPAdrM[`PA_BITS-1:2], ReservationPAdrW); // could drop clear on this one but not valid flopenrc #(1) resvldreg(clk, reset, FlushW, lrM, ReservationValidM, ReservationValidW); - flopenrc #(1) squashreg(clk, reset, FlushW, ~StallW, SquashSCM, SquashSCW); + flopenrc #(1) squashreg(clk, reset, FlushW, ~StallWtoLSU, SquashSCM, SquashSCWfromLSU); end else begin // Atomic operations not supported assign SquashSCM = 0; - assign SquashSCW = 0; + assign SquashSCWfromLSU = 0; end endgenerate // Data stall - //assign DataStall = (NextState == STATE_FETCH) || (NextState == STATE_FETCH_AMO_1) || (NextState == STATE_FETCH_AMO_2); + //assign LSUStall = (NextState == STATE_FETCH) || (NextState == STATE_FETCH_AMO_1) || (NextState == STATE_FETCH_AMO_2); assign HPTWReady = (CurrState == STATE_READY); @@ -224,22 +318,22 @@ module lsu ( STATE_READY: if (DTLBMissM) begin NextState = STATE_PTW_READY; - DataStall = 1'b1; + LSUStall = 1'b1; end else if (AtomicMaskedM[1]) begin NextState = STATE_FETCH_AMO_1; // *** should be some misalign check - DataStall = 1'b1; - end else if((MemReadM & AtomicM[0]) | (MemWriteM & AtomicM[0])) begin + LSUStall = 1'b1; + end else if((MemReadM & AtomicMtoLSU[0]) | (MemWriteM & AtomicMtoLSU[0])) begin NextState = STATE_FETCH_AMO_2; - DataStall = 1'b1; - end else if (MemAccessM & ~DataMisalignedM) begin + LSUStall = 1'b1; + end else if (MemAccessM & ~DataMisalignedMfromLSU) begin NextState = STATE_FETCH; - DataStall = 1'b1; + LSUStall = 1'b1; end else begin NextState = STATE_READY; - DataStall = 1'b0; + LSUStall = 1'b0; end STATE_FETCH_AMO_1: begin - DataStall = 1'b1; + LSUStall = 1'b1; if (MemAckW) begin NextState = STATE_FETCH_AMO_2; end else begin @@ -247,45 +341,45 @@ module lsu ( end end STATE_FETCH_AMO_2: begin - DataStall = 1'b1; - if (MemAckW & ~StallW) begin + LSUStall = 1'b1; + if (MemAckW & ~StallWtoLSU) begin NextState = STATE_FETCH_AMO_2; - end else if (MemAckW & StallW) begin + end else if (MemAckW & StallWtoLSU) begin NextState = STATE_STALLED; end else begin NextState = STATE_FETCH_AMO_2; end end STATE_FETCH: begin - DataStall = 1'b1; - if (MemAckW & ~StallW) begin + LSUStall = 1'b1; + if (MemAckW & ~StallWtoLSU) begin NextState = STATE_READY; - end else if (MemAckW & StallW) begin + end else if (MemAckW & StallWtoLSU) begin NextState = STATE_STALLED; end else begin NextState = STATE_FETCH; end end STATE_STALLED: begin - DataStall = 1'b0; - if (~StallW) begin + LSUStall = 1'b0; + if (~StallWtoLSU) begin NextState = STATE_READY; end else begin NextState = STATE_STALLED; end end STATE_PTW_READY: begin - DataStall = 1'b0; + LSUStall = 1'b0; if (DTLBWriteM) begin NextState = STATE_READY; - end else if (MemReadM & ~DataMisalignedM) begin + end else if (MemReadM & ~DataMisalignedMfromLSU) begin NextState = STATE_PTW_FETCH; end else begin NextState = STATE_PTW_READY; end end STATE_PTW_FETCH : begin - DataStall = 1'b1; + LSUStall = 1'b1; if (MemAckW & ~DTLBWriteM) begin NextState = STATE_PTW_READY; end else if (MemAckW & DTLBWriteM) begin @@ -298,15 +392,15 @@ module lsu ( NextState = STATE_READY; end default: begin - DataStall = 1'b0; + LSUStall = 1'b0; NextState = STATE_READY; end endcase end // always_comb // *** for now just pass through size - assign Funct3MfromLSU = Funct3M; - assign StallWfromLSU = StallW; + assign Funct3MfromLSU = Funct3MtoLSU; + assign StallWfromLSU = StallWtoLSU; endmodule diff --git a/wally-pipelined/src/lsu/lsuArb.sv b/wally-pipelined/src/lsu/lsuArb.sv index 76d89798a..3f57cabb4 100644 --- a/wally-pipelined/src/lsu/lsuArb.sv +++ b/wally-pipelined/src/lsu/lsuArb.sv @@ -35,7 +35,6 @@ module lsuArb input logic [`XLEN-1:0] HPTWPAdr, // to page table walker. output logic [`XLEN-1:0] HPTWReadPTE, - output logic HPTWReady, output logic HPTWStall, // from CPU @@ -65,7 +64,6 @@ module lsuArb input logic SquashSCWfromLSU, input logic DataMisalignedMfromLSU, input logic [`XLEN-1:0] ReadDataWFromLSU, - input logic HPTWReadyfromLSU, input logic DataStall ); @@ -159,7 +157,6 @@ module lsuArb assign CommittedM = SelPTW ? 1'b0 : CommittedMfromLSU; assign SquashSCW = SelPTW ? 1'b0 : SquashSCWfromLSU; assign DataMisalignedM = SelPTW ? 1'b0 : DataMisalignedMfromLSU; - assign HPTWReady = HPTWReadyfromLSU; // *** need to rename DcacheStall and Datastall. // not clear at all. I think it should be LSUStall from the LSU, // which is demuxed to HPTWStall and CPUDataStall? (not sure on this last one). diff --git a/wally-pipelined/src/wally/wallypipelinedhart.sv b/wally-pipelined/src/wally/wallypipelinedhart.sv index 9a678189b..89486d8b8 100644 --- a/wally-pipelined/src/wally/wallypipelinedhart.sv +++ b/wally-pipelined/src/wally/wallypipelinedhart.sv @@ -128,11 +128,7 @@ module wallypipelinedhart // IMem stalls logic ICacheStallF; logic DCacheStall; - logic [`XLEN-1:0] MMUPAdr, MMUReadPTE; - logic MMUTranslate, MMUReady; - logic HPTWRead; - logic HPTWReadyfromLSU; - logic HPTWStall; + // bus interface to dmem @@ -145,7 +141,6 @@ module wallypipelinedhart logic [`PA_BITS-1:0] InstrPAdrF; logic [`XLEN-1:0] InstrRData; logic InstrReadF; - logic DataStall; logic InstrAckF, MemAckW; logic CommitM, CommittedM; @@ -162,7 +157,6 @@ module wallypipelinedhart logic [`XLEN-1:0] HRDATAW; // IEU vs HPTW arbitration signals to send to LSU - logic DisableTranslation; logic [1:0] MemRWMtoLSU; logic [2:0] Funct3MtoLSU; logic [1:0] AtomicMtoLSU; @@ -186,87 +180,23 @@ module wallypipelinedhart // mux2 #(`XLEN) OutputInput2mux(WriteDataM, FWriteDataM, FMemRWM[0], WriteDatatmpM); - pagetablewalker pagetablewalker( - .clk(clk), - .reset(reset), - .SATP_REGW(SATP_REGW), // already on lsu port - .PCF(PCF), // add to lsu port - .MemAdrM(MemAdrM), // alreayd on lsu port - .ITLBMissF(ITLBMissF), // add to lsu port - .DTLBMissM(DTLBMissM), // already on lsu port convert to internal - .MemRWM(MemRWM), // already on lsu port - .PageTableEntryF(PageTableEntryF), // add to lsu port - .PageTableEntryM(PageTableEntryM), // already on lsu port convert to internal - .PageTypeF(PageTypeF), // add to lsu port connects to ifu - .PageTypeM(PageTypeM), // already on lsu port convert to internal - .ITLBWriteF(ITLBWriteF), // add to lsu port connects to ifu - .DTLBWriteM(DTLBWriteM), // already on lsu port convert to internal - .MMUReadPTE(MMUReadPTE), // from lsu arb convert to internal - .MMUReady(MMUReady), // to lsu arb, convert to internal - .HPTWStall(HPTWStall), // from lsu arb convert to internal - .MMUPAdr(MMUPAdr), // to lsu arb, convert to internal - .MMUTranslate(MMUTranslate), // to lsu arb, convert to internal - .HPTWRead(HPTWRead), // to lsu arb, convert to internal - .WalkerInstrPageFaultF(WalkerInstrPageFaultF), // add to lsu port - .WalkerLoadPageFaultM(WalkerLoadPageFaultM), // add to lsu port (to privilege) - .WalkerStorePageFaultM(WalkerStorePageFaultM)); // add to lsu port (to privilege) - - - - // arbiter between IEU and pagetablewalker - lsuArb arbiter(.clk(clk), - .reset(reset), - // HPTW connection - .HPTWTranslate(MMUTranslate), - .HPTWRead(HPTWRead), - .HPTWPAdr(MMUPAdr), - .HPTWReadPTE(MMUReadPTE), - .HPTWReady(MMUReady), - .HPTWStall(HPTWStall), - // CPU connection - .MemRWM(MemRWM), - .Funct3M(Funct3M), - .AtomicM(AtomicM), - .MemAdrM(MemAdrM), - .WriteDataM(WriteDataM), - .StallW(StallW), - .ReadDataW(ReadDataW), - .CommittedM(CommittedM), - .SquashSCW(SquashSCW), - .DataMisalignedM(DataMisalignedM), - .DCacheStall(DCacheStall), - // LSU - .DisableTranslation(DisableTranslation), - .MemRWMtoLSU(MemRWMtoLSU), - .Funct3MtoLSU(Funct3MtoLSU), - .AtomicMtoLSU(AtomicMtoLSU), - .MemAdrMtoLSU(MemAdrMtoLSU), - .WriteDataMtoLSU(WriteDataMtoLSU), - .StallWtoLSU(StallWtoLSU), - .CommittedMfromLSU(CommittedMfromLSU), - .SquashSCWfromLSU(SquashSCWfromLSU), - .DataMisalignedMfromLSU(DataMisalignedMfromLSU), - .ReadDataWFromLSU(ReadDataWFromLSU), - .HPTWReadyfromLSU(HPTWReadyfromLSU), - .DataStall(DataStall)); - lsu lsu(.clk(clk), .reset(reset), .StallM(StallM), .FlushM(FlushM), - .StallW(StallWtoLSU), + .StallW(StallW), .FlushW(FlushW), // connected to arbiter (reconnect to CPU) - .MemRWM(MemRWMtoLSU), // change to MemRWM - .Funct3M(Funct3MtoLSU), // change to Funct3M - .AtomicM(AtomicMtoLSU), // change to AtomicMtoLSU - .CommittedM(CommittedMfromLSU), // change to CommitttedM - .SquashSCW(SquashSCWfromLSU), // change to SquashSCW - .DataMisalignedM(DataMisalignedMfromLSU), // change to DataMisalignedM - .MemAdrM(MemAdrMtoLSU), // change to MemAdrM - .WriteDataM(WriteDataMtoLSU), // change to WriteDataM - .ReadDataW(ReadDataWFromLSU), // change to ReadDataW + .MemRWM(MemRWM), + .Funct3M(Funct3M), + .AtomicM(AtomicM), + .CommittedM(CommittedM), + .SquashSCW(SquashSCW), + .DataMisalignedM(DataMisalignedM), + .MemAdrM(MemAdrM), + .WriteDataM(WriteDataM), + .ReadDataW(ReadDataW), // connected to ahb (all stay the same) .CommitM(CommitM), @@ -308,16 +238,18 @@ module wallypipelinedhart .PMPStoreAccessFaultM(PMPStoreAccessFaultM), // connected to hptw. Move to internal. - .PageTableEntryM(PageTableEntryM), - .PageTypeM(PageTypeM), - .DTLBWriteM(DTLBWriteM), // from hptw. - .DTLBMissM(DTLBMissM), // to hptw from dmmu - .DisableTranslation(DisableTranslation), // from hptw to dmmu - .HPTWReady(HPTWReadyfromLSU), // from hptw, remove + .PCF(PCF), + .ITLBMissF(ITLBMissF), + .PageTableEntryF(PageTableEntryF), + .PageTypeF(PageTypeF), + .ITLBWriteF(ITLBWriteF), + .WalkerInstrPageFaultF(WalkerInstrPageFaultF), + .WalkerLoadPageFaultM(WalkerLoadPageFaultM), + .WalkerStorePageFaultM(WalkerStorePageFaultM), .DTLBHitM(DTLBHitM), // not connected remove - .DataStall(DataStall)) // change to DCacheStall + .DCacheStall(DCacheStall)) // change to DCacheStall ; From deae60eb1dfb18672cdb51a6c70a7a3637a26b19 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 4 Jul 2021 14:59:04 -0400 Subject: [PATCH 16/30] TLB cleanup --- wally-pipelined/src/mmu/priorityencoder.sv | 2 +- wally-pipelined/src/mmu/tlb.sv | 29 +++++-------------- wally-pipelined/src/mmu/tlbcam.sv | 22 +++----------- .../src/mmu/{camline.sv => tlbcamline.sv} | 8 ++--- .../src/mmu/tlbphysicalpagemask.sv | 2 +- wally-pipelined/src/mmu/tlbram.sv | 11 +------ 6 files changed, 19 insertions(+), 55 deletions(-) rename wally-pipelined/src/mmu/{camline.sv => tlbcamline.sv} (93%) diff --git a/wally-pipelined/src/mmu/priorityencoder.sv b/wally-pipelined/src/mmu/priorityencoder.sv index d56da3d65..44ce88da2 100644 --- a/wally-pipelined/src/mmu/priorityencoder.sv +++ b/wally-pipelined/src/mmu/priorityencoder.sv @@ -31,7 +31,7 @@ `include "wally-config.vh" -module priorityencoder #(parameter BINARY_BITS = 3) ( +module tlbpriority #(parameter BINARY_BITS = 3) ( input logic [2**BINARY_BITS - 1:0] onehot, output logic [BINARY_BITS - 1:0] binary ); diff --git a/wally-pipelined/src/mmu/tlb.sv b/wally-pipelined/src/mmu/tlb.sv index a4326ce8a..8aaf65707 100644 --- a/wally-pipelined/src/mmu/tlb.sv +++ b/wally-pipelined/src/mmu/tlb.sv @@ -148,7 +148,6 @@ module tlb #(parameter ENTRY_BITS = 3, assign WriteAccess = TLBAccessType[0]; assign TLBAccess = ReadAccess || WriteAccess; - assign PageOffset = VirtualAddress[11:0]; // TLB entries are evicted according to the LRU algorithm tlblru #(ENTRY_BITS) lru(.*); @@ -157,11 +156,15 @@ module tlb #(parameter ENTRY_BITS = 3, tlbram #(ENTRY_BITS) tlbram(.*); tlbcam #(ENTRY_BITS, `VPN_BITS, `VPN_SEGMENT_BITS) tlbcam(.*); + // Replace segments of the virtual page number with segments of the physical + // page number. For 4 KB pages, the entire virtual page number is replaced. + // For superpages, some segments are considered offsets into a larger page. + tlbphysicalpagemask PageMask(VirtualPageNumber, PhysicalPageNumber, HitPageType, PhysicalPageNumberMixed); + // unswizzle useful PTE bits assign {PTE_U, PTE_X, PTE_W, PTE_R} = PTEAccessBits[4:1]; // Check whether the access is allowed, page faulting if not. - // *** We might not have S mode. generate if (ITLB == 1) begin logic ImproperPrivilege; @@ -189,28 +192,12 @@ module tlb #(parameter ENTRY_BITS = 3, end endgenerate - // Replace segments of the virtual page number with segments of the physical - // page number. For 4 KB pages, the entire virtual page number is replaced. - // For superpages, some segments are considered offsets into a larger page. - tlbphysicalpagemask PageMask(VirtualPageNumber, PhysicalPageNumber, HitPageType, PhysicalPageNumberMixed); - - // Provide physical address only on TLBHits to cause catastrophic errors if - // garbage address is used. - assign PhysicalAddressFull = TLBHit ? {PhysicalPageNumberMixed, PageOffset} : '0; // Output the hit physical address if translation is currently on. -/* generate - if (`XLEN == 32) begin - VirtualAddressPALen = {2'b0, VirtualAddress}; - - mux2 #(`PA_BITS) addressmux({2'b0, VirtualAddress}, PhysicalAddressFull, Translate, PhysicalAddress); - end else begin - VirtualAddressPALen = VirtualAddress[`PA_BITS-1:0]; - mux2 #(`PA_BITS) addressmux(VirtualAddress[`PA_BITS-1:0], PhysicalAddressFull, Translate, PhysicalAddress); - end - endgenerate*/ - + // Provide physical address of zero if not TLBHits, to cause segmentation error if miss somehow percolated through signal assign VAExt = {2'b00, VirtualAddress}; // extend length of virtual address if necessary for RV32 + assign PageOffset = VirtualAddress[11:0]; + assign PhysicalAddressFull = TLBHit ? {PhysicalPageNumberMixed, PageOffset} : '0; mux2 #(`PA_BITS) addressmux(VAExt[`PA_BITS-1:0], PhysicalAddressFull, Translate, PhysicalAddress); assign TLBHit = CAMHit & TLBAccess; diff --git a/wally-pipelined/src/mmu/tlbcam.sv b/wally-pipelined/src/mmu/tlbcam.sv index e45b124a1..07ec38ffb 100644 --- a/wally-pipelined/src/mmu/tlbcam.sv +++ b/wally-pipelined/src/mmu/tlbcam.sv @@ -34,8 +34,6 @@ module tlbcam #(parameter ENTRY_BITS = 3, input logic clk, reset, input logic [KEY_BITS-1:0] VirtualPageNumber, input logic [1:0] PageTypeWriteVal, -// input logic [`SVMODE_BITS-1:0] SvMode, // *** may not need to be used. -// input logic TLBWrite, input logic TLBFlush, input logic [2**ENTRY_BITS-1:0] WriteEnables, @@ -56,23 +54,11 @@ module tlbcam #(parameter ENTRY_BITS = 3, // of page type. However, matches are determined based on a subset of the // page number segments. - camline #(KEY_BITS, SEGMENT_BITS) camlines[NENTRIES-1:0]( + tlbcamline #(KEY_BITS, SEGMENT_BITS) camlines[NENTRIES-1:0]( .CAMLineWrite(WriteEnables), - .PageType(PageTypeList), - .Match(Matches), - .*); -/* - generate - genvar i; - for (i = 0; i < NENTRIES; i++) begin - camline #(KEY_BITS, SEGMENT_BITS) camline( - .CAMLineWrite(WriteEnables[i]), - .PageType(PageTypeList[i]), - .Match(Matches[i]), - .*); - end - endgenerate - */ + .PageType(PageTypeList), + .Match(Matches), + .*); // In case there are multiple matches in the CAM, select only one // *** it might be guaranteed that the CAM will never have multiple matches. diff --git a/wally-pipelined/src/mmu/camline.sv b/wally-pipelined/src/mmu/tlbcamline.sv similarity index 93% rename from wally-pipelined/src/mmu/camline.sv rename to wally-pipelined/src/mmu/tlbcamline.sv index 6e3f705c3..3e882cf1e 100644 --- a/wally-pipelined/src/mmu/camline.sv +++ b/wally-pipelined/src/mmu/tlbcamline.sv @@ -1,5 +1,5 @@ /////////////////////////////////////////// -// camline.sv +// tlbcamline.sv // // Written: tfleming@hmc.edu & jtorrey@hmc.edu 6 April 2021 // Modified: kmacsaigoren@hmc.edu 1 June 2021 @@ -28,7 +28,7 @@ `include "wally-config.vh" -module camline #(parameter KEY_BITS = 20, +module tlbcamline #(parameter KEY_BITS = 20, parameter SEGMENT_BITS = 10) ( input logic clk, reset, @@ -85,12 +85,12 @@ module camline #(parameter KEY_BITS = 20, assign {Key3, Key2, Key1, Key0} = Key; // Calculate the actual match value based on the input vpn and the page type. - // For example, a gigapage in SV only cares about VPN[2], so VPN[0] and VPN[1] + // For example, a gigapage in SV39 only cares about VPN[2], so VPN[0] and VPN[1] // should automatically match. assign Match0 = (Query0 == Key0) || (PageType > 2'd0); // least signifcant section assign Match1 = (Query1 == Key1) || (PageType > 2'd1); assign Match2 = (Query2 == Key2) || (PageType > 2'd2); - assign Match3 = (Query3 == Key3); // *** this should always match in sv39 since both vPN3 and key3 are zeroed by the pagetable walker before getting to the cam + assign Match3 = (Query3 == Key3); // this should always match in sv39 since both vPN3 and key3 are zeroed by the pagetable walker before getting to the cam assign Match = Match0 & Match1 & Match2 & Match3 & Valid; end diff --git a/wally-pipelined/src/mmu/tlbphysicalpagemask.sv b/wally-pipelined/src/mmu/tlbphysicalpagemask.sv index 858c39390..dd791e48a 100644 --- a/wally-pipelined/src/mmu/tlbphysicalpagemask.sv +++ b/wally-pipelined/src/mmu/tlbphysicalpagemask.sv @@ -28,7 +28,7 @@ `include "wally-config.vh" -module physicalpagemask ( +module tlbphysicalpagemask ( input logic [`VPN_BITS-1:0] VPN, input logic [`PPN_BITS-1:0] PPN, input logic [1:0] PageType, diff --git a/wally-pipelined/src/mmu/tlbram.sv b/wally-pipelined/src/mmu/tlbram.sv index f13666bcd..9482cd6f0 100644 --- a/wally-pipelined/src/mmu/tlbram.sv +++ b/wally-pipelined/src/mmu/tlbram.sv @@ -46,16 +46,7 @@ module tlbram #(parameter ENTRY_BITS = 3) ( // Generate a flop for every entry in the RAM flopenr #(`XLEN) pteflops[NENTRIES-1:0](clk, reset, WriteEnables, PTEWriteVal, ram); -/* - generate - genvar i; - for (i = 0; i < NENTRIES; i++) begin: tlb_ram_flops - flopenr #(`XLEN) pteflop(clk, reset, WriteEnables[i], - PTEWriteVal, ram[i]); - end - endgenerate -*/ - + assign PageTableEntry = ram[VPNIndex]; assign PTEAccessBits = PageTableEntry[7:0]; assign PhysicalPageNumber = PageTableEntry[`PPN_BITS+9:10]; From b59213c83f834f2253304ee01e287ec6a55b3ca5 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 4 Jul 2021 16:33:13 -0400 Subject: [PATCH 18/30] Reorganized TLB to use one-hot read/write select signals to eliminate decoders and encoders --- wally-pipelined/src/mmu/tlb.sv | 11 ++-- wally-pipelined/src/mmu/tlbcam.sv | 13 ++--- wally-pipelined/src/mmu/tlbcamline.sv | 4 +- wally-pipelined/src/mmu/tlblru.sv | 21 ++++---- .../{priorityencoder.sv => tlbpriority.sv} | 50 +++++++------------ wally-pipelined/src/mmu/tlbram.sv | 40 ++++++++++++--- 6 files changed, 77 insertions(+), 62 deletions(-) rename wally-pipelined/src/mmu/{priorityencoder.sv => tlbpriority.sv} (62%) diff --git a/wally-pipelined/src/mmu/tlb.sv b/wally-pipelined/src/mmu/tlb.sv index 8aaf65707..2741f958b 100644 --- a/wally-pipelined/src/mmu/tlb.sv +++ b/wally-pipelined/src/mmu/tlb.sv @@ -88,6 +88,8 @@ module tlb #(parameter ENTRY_BITS = 3, output logic TLBPageFault ); + localparam NENTRIES = 2**ENTRY_BITS; + logic Translate; logic TLBAccess, ReadAccess, WriteAccess; @@ -95,9 +97,8 @@ module tlb #(parameter ENTRY_BITS = 3, logic [`SVMODE_BITS-1:0] SvMode; logic [1:0] EffectivePrivilegeMode; // privilege mode, possibly modified by MPRV - // Index (currently random) to write the next TLB entry - logic [ENTRY_BITS-1:0] WriteIndex; - logic [(2**ENTRY_BITS)-1:0] WriteLines, WriteEnables; // used as the one-hot encoding of WriteIndex + //logic [ENTRY_BITS-1:0] WriteIndex; + logic [NENTRIES-1:0] ReadLines, WriteLines, WriteEnables; // used as the one-hot encoding of WriteIndex // Sections of the virtual and physical addresses logic [`VPN_BITS-1:0] VirtualPageNumber; @@ -113,7 +114,7 @@ module tlb #(parameter ENTRY_BITS = 3, logic PTE_U, PTE_X, PTE_W, PTE_R; // Pattern location in the CAM and type of page hit - logic [ENTRY_BITS-1:0] VPNIndex; + //ogic [ENTRY_BITS-1:0] VPNIndex; logic [1:0] HitPageType; // Whether the virtual address has a match in the CAM @@ -125,7 +126,7 @@ module tlb #(parameter ENTRY_BITS = 3, assign Translate = (SvMode != `NO_TRANSLATE) & (EffectivePrivilegeMode != `M_MODE) & ~ DisableTranslation; // Decode the integer encoded WriteIndex into the one-hot encoded WriteLines - decoder #(ENTRY_BITS) writedecoder(WriteIndex, WriteLines); + //decoder #(ENTRY_BITS) writedecoder(WriteIndex, WriteLines); assign WriteEnables = WriteLines & {(2**ENTRY_BITS){TLBWrite}}; // The bus width is always the largest it could be for that XLEN. For example, vpn will be 36 bits wide in rv64 diff --git a/wally-pipelined/src/mmu/tlbcam.sv b/wally-pipelined/src/mmu/tlbcam.sv index 07ec38ffb..f10442cd7 100644 --- a/wally-pipelined/src/mmu/tlbcam.sv +++ b/wally-pipelined/src/mmu/tlbcam.sv @@ -37,7 +37,8 @@ module tlbcam #(parameter ENTRY_BITS = 3, input logic TLBFlush, input logic [2**ENTRY_BITS-1:0] WriteEnables, - output logic [ENTRY_BITS-1:0] VPNIndex, + //output logic [ENTRY_BITS-1:0] VPNIndex, + output logic [2**ENTRY_BITS-1:0] ReadLines, output logic [1:0] HitPageType, output logic CAMHit ); @@ -56,16 +57,16 @@ module tlbcam #(parameter ENTRY_BITS = 3, tlbcamline #(KEY_BITS, SEGMENT_BITS) camlines[NENTRIES-1:0]( .CAMLineWrite(WriteEnables), - .PageType(PageTypeList), - .Match(Matches), + .MatchedPageType(PageTypeList), // *** change name to agree + .Match(ReadLines), // *** change name to agree .*); // In case there are multiple matches in the CAM, select only one // *** it might be guaranteed that the CAM will never have multiple matches. // If so, this is just an encoder - priorityencoder #(ENTRY_BITS) matchencoder(Matches, VPNIndex); + //priorityencoder #(ENTRY_BITS) matchencoder(Matches, VPNIndex); - assign CAMHit = |Matches & ~TLBFlush; - assign HitPageType = PageTypeList[VPNIndex]; + assign CAMHit = |ReadLines & ~TLBFlush; + assign HitPageType = PageTypeList.or; // applies OR to elements of the (NENTRIES x 2) array to get 2-bit result endmodule diff --git a/wally-pipelined/src/mmu/tlbcamline.sv b/wally-pipelined/src/mmu/tlbcamline.sv index 3e882cf1e..2eaa3a011 100644 --- a/wally-pipelined/src/mmu/tlbcamline.sv +++ b/wally-pipelined/src/mmu/tlbcamline.sv @@ -50,13 +50,14 @@ module tlbcamline #(parameter KEY_BITS = 20, // PageType == 2'b01 --> megapage // PageType == 2'b10 --> gigapage // PageType == 2'b11 --> terapage - output logic [1:0] PageType, // *** should this be the stored version or the always updated one? + output logic [1:0] MatchedPageType, // *** should this be the stored version or the always updated one? output logic Match ); // This entry has KEY_BITS for the key plus one valid bit. logic Valid; logic [KEY_BITS-1:0] Key; + logic [1:0] PageType; // Split up key and query into sections for each page table level. @@ -98,6 +99,7 @@ module tlbcamline #(parameter KEY_BITS = 20, // On a write, update the type of the page referred to by this line. flopenr #(2) pagetypeflop(clk, reset, CAMLineWrite, PageTypeWriteVal, PageType); + assign MatchedPageType = PageType & {2{Match}}; //mux2 #(2) pagetypemux(StoredPageType, PageTypeWrite, CAMLineWrite, PageType); // On a write, set the valid bit high and update the stored key. diff --git a/wally-pipelined/src/mmu/tlblru.sv b/wally-pipelined/src/mmu/tlblru.sv index e86598f71..f1ac4db21 100644 --- a/wally-pipelined/src/mmu/tlblru.sv +++ b/wally-pipelined/src/mmu/tlblru.sv @@ -28,11 +28,9 @@ module tlblru #(parameter ENTRY_BITS = 3) ( input logic clk, reset, input logic TLBWrite, input logic TLBFlush, - input logic [ENTRY_BITS-1:0] VPNIndex, + input logic [2**ENTRY_BITS-1:0] ReadLines, input logic CAMHit, - input logic [2**ENTRY_BITS-1:0] WriteLines, - - output logic [ENTRY_BITS-1:0] WriteIndex + output logic [2**ENTRY_BITS-1:0] WriteLines ); localparam NENTRIES = 2**ENTRY_BITS; @@ -41,26 +39,27 @@ module tlblru #(parameter ENTRY_BITS = 3) ( logic [NENTRIES-1:0] RUBits, RUBitsNext, RUBitsAccessed; // One-hot encodings of which line is being accessed - logic [NENTRIES-1:0] ReadLineOneHot, AccessLineOneHot; + logic [NENTRIES-1:0] AccessLines; // High if the next access causes all RU bits to be 1 logic AllUsed; // Convert indices to one-hot encodings - decoder #(ENTRY_BITS) readdecoder(VPNIndex, ReadLineOneHot); + //decoder #(ENTRY_BITS) readdecoder(VPNIndex, ReadLineOneHot); // Find the first line not recently used - priorityencoder #(ENTRY_BITS) firstnru(~RUBits, WriteIndex); + tlbpriority #(NENTRIES) nru(~RUBits, WriteLines); + //priorityencoder #(ENTRY_BITS) firstnru(~RUBits, WriteIndex); // Access either the hit line or written line - assign AccessLineOneHot = (TLBWrite) ? WriteLines : ReadLineOneHot; + assign AccessLines = TLBWrite ? WriteLines : ReadLines; // Raise the bit of the recently accessed line - assign RUBitsAccessed = AccessLineOneHot | RUBits; + assign RUBitsAccessed = AccessLines | RUBits; // Determine whether we need to reset the RU bits to all zeroes - assign AllUsed = &(RUBitsAccessed); - assign RUBitsNext = (AllUsed) ? AccessLineOneHot : RUBitsAccessed; + assign AllUsed = &RUBitsAccessed; + assign RUBitsNext = AllUsed ? AccessLines : RUBitsAccessed; // *** seems it should set to 0, not to AccessLines // Update LRU state on any TLB hit or write flopenrc #(NENTRIES) lrustate(clk, reset, TLBFlush, (CAMHit || TLBWrite), diff --git a/wally-pipelined/src/mmu/priorityencoder.sv b/wally-pipelined/src/mmu/tlbpriority.sv similarity index 62% rename from wally-pipelined/src/mmu/priorityencoder.sv rename to wally-pipelined/src/mmu/tlbpriority.sv index 44ce88da2..a061f622b 100644 --- a/wally-pipelined/src/mmu/priorityencoder.sv +++ b/wally-pipelined/src/mmu/tlbpriority.sv @@ -1,16 +1,15 @@ /////////////////////////////////////////// -// priorityencoder.sv +// tlbpriority.sv // // Written: tfleming@hmc.edu & jtorrey@hmc.edu 7 April 2021 -// Based on implementation from https://www.allaboutcircuits.com/ip-cores/communication-controller/priority-encoder/ -// *** Give proper LGPL attribution for above source // Modified: Teo Ene 15 Apr 2021: // Temporarily removed paramterized priority encoder for non-parameterized one // To get synthesis working quickly // Kmacsaigoren@hmc.edu 28 May 2021: // Added working version of parameterized priority encoder. +// David_Harris@Hmc.edu switched to one-hot output // -// Purpose: One-hot encoding to binary encoder +// Purpose: Priority circuit to choose most significant one-hot output // // A component of the Wally configurable RISC-V project. // @@ -31,35 +30,20 @@ `include "wally-config.vh" -module tlbpriority #(parameter BINARY_BITS = 3) ( - input logic [2**BINARY_BITS - 1:0] onehot, - output logic [BINARY_BITS - 1:0] binary +module tlbpriority #(parameter ENTRIES = 8) ( + input logic [ENTRIES-1:0] a, + output logic [ENTRIES-1:0] y ); + // verilator lint_off UNOPTFLAT + logic [ENTRIES-1:0] nolower; - integer i; - always_comb begin - binary = 0; - for (i = 0; i < 2**BINARY_BITS; i++) begin - // verilator lint_off WIDTH - if (onehot[i]) binary = i; // prioritizes the most significant bit - // verilator lint_on WIDTH - end - end - // *** triple check synthesizability here - - // Ideally this mimics the following: - /* - always_comb begin - casex (one_hot) - 1xx ... x: binary = BINARY_BITS - 1; - 01x ... x: binary = BINARY_BITS - 2; - 001 ... x: binary = BINARY_BITS - 3; - - {...} - - 00 ... 1xx: binary = 2; - 00 ... 01x: binary = 1; - 00 ... 001: binary = 0; - end - */ + // generate thermometer code mask + genvar i; + generate + assign nolower[0] = 1; + for (i=1; i Date: Sun, 4 Jul 2021 17:01:22 -0400 Subject: [PATCH 19/30] Restructured TLB Read as AND-OR operation with one-hot match/read line --- wally-pipelined/src/mmu/tlbcam.sv | 9 +++---- wally-pipelined/src/mmu/tlbcamline.sv | 16 +++++------ wally-pipelined/src/mmu/tlblru.sv | 19 +++----------- wally-pipelined/src/mmu/tlbram.sv | 31 +--------------------- wally-pipelined/src/mmu/tlbramline.sv | 38 +++++++++++++++++++++++++++ 5 files changed, 55 insertions(+), 58 deletions(-) create mode 100644 wally-pipelined/src/mmu/tlbramline.sv diff --git a/wally-pipelined/src/mmu/tlbcam.sv b/wally-pipelined/src/mmu/tlbcam.sv index f10442cd7..ef64e0d6e 100644 --- a/wally-pipelined/src/mmu/tlbcam.sv +++ b/wally-pipelined/src/mmu/tlbcam.sv @@ -45,8 +45,7 @@ module tlbcam #(parameter ENTRY_BITS = 3, localparam NENTRIES = 2**ENTRY_BITS; - - logic [1:0] PageTypeList [NENTRIES-1:0]; + logic [1:0] PageTypeRead [NENTRIES-1:0]; logic [NENTRIES-1:0] Matches; // Create NENTRIES CAM lines, each of which will independently consider @@ -56,8 +55,8 @@ module tlbcam #(parameter ENTRY_BITS = 3, // page number segments. tlbcamline #(KEY_BITS, SEGMENT_BITS) camlines[NENTRIES-1:0]( - .CAMLineWrite(WriteEnables), - .MatchedPageType(PageTypeList), // *** change name to agree + .WriteEnable(WriteEnables), + .PageTypeRead, // *** change name to agree .Match(ReadLines), // *** change name to agree .*); @@ -67,6 +66,6 @@ module tlbcam #(parameter ENTRY_BITS = 3, //priorityencoder #(ENTRY_BITS) matchencoder(Matches, VPNIndex); assign CAMHit = |ReadLines & ~TLBFlush; - assign HitPageType = PageTypeList.or; // applies OR to elements of the (NENTRIES x 2) array to get 2-bit result + assign HitPageType = PageTypeRead.or; // applies OR to elements of the (NENTRIES x 2) array to get 2-bit result endmodule diff --git a/wally-pipelined/src/mmu/tlbcamline.sv b/wally-pipelined/src/mmu/tlbcamline.sv index 2eaa3a011..605d8f304 100644 --- a/wally-pipelined/src/mmu/tlbcamline.sv +++ b/wally-pipelined/src/mmu/tlbcamline.sv @@ -39,7 +39,7 @@ module tlbcamline #(parameter KEY_BITS = 20, input logic [KEY_BITS-1:0] VirtualPageNumber, // Signals to write a new entry to this line - input logic CAMLineWrite, + input logic WriteEnable, input logic [1:0] PageTypeWriteVal, // Flush this line (set valid to 0) @@ -50,7 +50,7 @@ module tlbcamline #(parameter KEY_BITS = 20, // PageType == 2'b01 --> megapage // PageType == 2'b10 --> gigapage // PageType == 2'b11 --> terapage - output logic [1:0] MatchedPageType, // *** should this be the stored version or the always updated one? + output logic [1:0] PageTypeRead, // *** should this be the stored version or the always updated one? output logic Match ); @@ -59,11 +59,12 @@ module tlbcamline #(parameter KEY_BITS = 20, logic [KEY_BITS-1:0] Key; logic [1:0] PageType; - // Split up key and query into sections for each page table level. logic [SEGMENT_BITS-1:0] Key0, Key1, Query0, Query1; logic Match0, Match1; + // *** need to add ASID and G bit support + generate if (`XLEN == 32) begin @@ -98,15 +99,14 @@ module tlbcamline #(parameter KEY_BITS = 20, endgenerate // On a write, update the type of the page referred to by this line. - flopenr #(2) pagetypeflop(clk, reset, CAMLineWrite, PageTypeWriteVal, PageType); - assign MatchedPageType = PageType & {2{Match}}; - //mux2 #(2) pagetypemux(StoredPageType, PageTypeWrite, CAMLineWrite, PageType); + flopenr #(2) pagetypeflop(clk, reset, WriteEnable, PageTypeWriteVal, PageType); + assign PageTypeRead = PageType & {2{Match}}; // On a write, set the valid bit high and update the stored key. // On a flush, zero the valid bit and leave the key unchanged. // *** Might we want to update stored key right away to output match on the // write cycle? (using a mux) - flopenrc #(1) validbitflop(clk, reset, TLBFlush, CAMLineWrite, 1'b1, Valid); - flopenr #(KEY_BITS) keyflop(clk, reset, CAMLineWrite, VirtualPageNumber, Key); + flopenrc #(1) validbitflop(clk, reset, TLBFlush, WriteEnable, 1'b1, Valid); + flopenr #(KEY_BITS) keyflop(clk, reset, WriteEnable, VirtualPageNumber, Key); endmodule diff --git a/wally-pipelined/src/mmu/tlblru.sv b/wally-pipelined/src/mmu/tlblru.sv index f1ac4db21..5dc1f846c 100644 --- a/wally-pipelined/src/mmu/tlblru.sv +++ b/wally-pipelined/src/mmu/tlblru.sv @@ -44,25 +44,14 @@ module tlblru #(parameter ENTRY_BITS = 3) ( // High if the next access causes all RU bits to be 1 logic AllUsed; - // Convert indices to one-hot encodings - //decoder #(ENTRY_BITS) readdecoder(VPNIndex, ReadLineOneHot); - // Find the first line not recently used tlbpriority #(NENTRIES) nru(~RUBits, WriteLines); - //priorityencoder #(ENTRY_BITS) firstnru(~RUBits, WriteIndex); - // Access either the hit line or written line + // Track recently used lines, updating on a CAM Hit or TLB write assign AccessLines = TLBWrite ? WriteLines : ReadLines; - - // Raise the bit of the recently accessed line assign RUBitsAccessed = AccessLines | RUBits; - - // Determine whether we need to reset the RU bits to all zeroes - assign AllUsed = &RUBitsAccessed; - assign RUBitsNext = AllUsed ? AccessLines : RUBitsAccessed; // *** seems it should set to 0, not to AccessLines - - // Update LRU state on any TLB hit or write - flopenrc #(NENTRIES) lrustate(clk, reset, TLBFlush, (CAMHit || TLBWrite), - RUBitsNext, RUBits); + assign AllUsed = &RUBitsAccessed; // if all recently used, then clear to none + assign RUBitsNext = AllUsed ? 0 : RUBitsAccessed; + flopenrc #(NENTRIES) lrustate(clk, reset, TLBFlush, (CAMHit || TLBWrite), RUBitsNext, RUBits); endmodule diff --git a/wally-pipelined/src/mmu/tlbram.sv b/wally-pipelined/src/mmu/tlbram.sv index 70e7bb6cb..f70cb44c6 100644 --- a/wally-pipelined/src/mmu/tlbram.sv +++ b/wally-pipelined/src/mmu/tlbram.sv @@ -41,42 +41,13 @@ module tlbram #(parameter ENTRY_BITS = 3) ( localparam NENTRIES = 2**ENTRY_BITS; - //logic [`XLEN-1:0] ram[NENTRIES-1:0]; logic [`XLEN-1:0] RamRead[NENTRIES-1:0]; logic [`XLEN-1:0] PageTableEntry; -// logic [ENTRY_BITS-1:0] VPNIndex; - // Generate a flop for every entry in the RAM - //flopenr #(`XLEN) pteflops[NENTRIES-1:0](clk, reset, WriteEnables, PTEWriteVal, ram); tlbramline #(`XLEN) tlblineram[NENTRIES-1:0](clk, reset, ReadLines, WriteEnables, PTEWriteVal, RamRead); -/* - // temporary code for read - // verilator lint_off WIDTH - integer i; - generate - always_comb begin - VPNIndex = 0; - for (i=0; i Date: Sun, 4 Jul 2021 17:20:16 -0400 Subject: [PATCH 20/30] for GPIO give priority to clearing interrupts --- wally-pipelined/src/uncore/gpio.sv | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/wally-pipelined/src/uncore/gpio.sv b/wally-pipelined/src/uncore/gpio.sv index cde55b7a7..49b96e1e9 100644 --- a/wally-pipelined/src/uncore/gpio.sv +++ b/wally-pipelined/src/uncore/gpio.sv @@ -132,19 +132,19 @@ module gpio ( endcase // interrupts if (memwrite & (entryd == 8'h1C)) - rise_ip <= rise_ip & ~Din | (input2d & ~input3d); + rise_ip <= rise_ip & ~Din; else rise_ip <= rise_ip | (input2d & ~input3d); if (memwrite & (entryd == 8'h24)) - fall_ip <= fall_ip & ~Din | (~input2d & input3d); + fall_ip <= fall_ip & ~Din; else fall_ip <= fall_ip | (~input2d & input3d); if (memwrite & (entryd == 8'h2C)) - high_ip <= high_ip & ~Din | input3d; + high_ip <= high_ip & ~Din; else high_ip <= high_ip | input3d; if (memwrite & (entryd == 8'h34)) - low_ip <= low_ip & ~Din | ~input3d; + low_ip <= low_ip & ~Din; else low_ip <= low_ip | ~input3d; end From 6b9cfe90d85946eecac482e959645acc8387eed3 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 4 Jul 2021 17:52:00 -0400 Subject: [PATCH 21/30] Added ASID & Global PTE handling to TLB CAM --- .../config/buildroot/wally-config.vh | 4 +- .../config/busybear/wally-config.vh | 4 +- .../config/coremark/wally-config.vh | 4 +- .../config/coremark_bare/wally-config.vh | 4 +- wally-pipelined/config/rv32ic/wally-config.vh | 4 +- wally-pipelined/config/rv64BP/wally-config.vh | 4 +- wally-pipelined/config/rv64ic/wally-config.vh | 4 +- .../config/rv64icfd/wally-config.vh | 4 +- .../config/rv64imc/wally-config.vh | 4 +- .../config/shared/wally-constants.vh | 4 +- wally-pipelined/src/ifu/ifu.sv | 2 +- wally-pipelined/src/lsu/lsu.sv | 2 +- wally-pipelined/src/mmu/mmu.sv | 6 +-- wally-pipelined/src/mmu/tlb.sv | 30 +++++-------- wally-pipelined/src/mmu/tlbcam.sv | 34 ++++++--------- wally-pipelined/src/mmu/tlbcamline.sv | 43 ++++++++----------- wally-pipelined/src/mmu/tlblru.sv | 31 ++++++------- .../src/mmu/tlbphysicalpagemask.sv | 7 ++- wally-pipelined/src/mmu/tlbram.sv | 17 +++----- 19 files changed, 87 insertions(+), 125 deletions(-) diff --git a/wally-pipelined/config/buildroot/wally-config.vh b/wally-pipelined/config/buildroot/wally-config.vh index a535abfff..cb59bb69e 100644 --- a/wally-pipelined/config/buildroot/wally-config.vh +++ b/wally-pipelined/config/buildroot/wally-config.vh @@ -49,8 +49,8 @@ `define MEM_VIRTMEM 0 `define VECTORED_INTERRUPTS_SUPPORTED 1 // Domenico Ottolia 4/15: Support for vectored interrupts in _tvec csrs. Just implemented in src/privileged/trap.sv around line 75. Pretty sure this should be 1. -`define ITLB_ENTRY_BITS 5 -`define DTLB_ENTRY_BITS 5 +`define ITLB_ENTRIES 32 +`define DTLB_ENTRIES 32 // Legal number of PMP entries are 0, 16, or 64 `define PMP_ENTRIES 16 diff --git a/wally-pipelined/config/busybear/wally-config.vh b/wally-pipelined/config/busybear/wally-config.vh index 447294496..26e37fa6c 100644 --- a/wally-pipelined/config/busybear/wally-config.vh +++ b/wally-pipelined/config/busybear/wally-config.vh @@ -50,8 +50,8 @@ `define MEM_VIRTMEM 0 `define VECTORED_INTERRUPTS_SUPPORTED 1 // Domenico Ottolia 4/15: Support for vectored interrupts in _tvec csrs. Just implemented in src/privileged/trap.sv around line 75. Pretty sure this should be 1. -`define ITLB_ENTRY_BITS 5 -`define DTLB_ENTRY_BITS 5 +`define ITLB_ENTRIES 32 +`define DTLB_ENTRIES 32 // Legal number of PMP entries are 0, 16, or 64 `define PMP_ENTRIES 16 diff --git a/wally-pipelined/config/coremark/wally-config.vh b/wally-pipelined/config/coremark/wally-config.vh index 7bfdc8821..e4e3376db 100644 --- a/wally-pipelined/config/coremark/wally-config.vh +++ b/wally-pipelined/config/coremark/wally-config.vh @@ -49,8 +49,8 @@ `define MEM_VIRTMEM 0 `define VECTORED_INTERRUPTS_SUPPORTED 1 -`define ITLB_ENTRY_BITS 5 -`define DTLB_ENTRY_BITS 5 +`define ITLB_ENTRIES 32 +`define DTLB_ENTRIES 32 // Address space `define RESET_VECTOR 64'h00000000000100b0 diff --git a/wally-pipelined/config/coremark_bare/wally-config.vh b/wally-pipelined/config/coremark_bare/wally-config.vh index fb4bee347..95441f8f0 100644 --- a/wally-pipelined/config/coremark_bare/wally-config.vh +++ b/wally-pipelined/config/coremark_bare/wally-config.vh @@ -49,8 +49,8 @@ `define MEM_VIRTMEM 1 `define VECTORED_INTERRUPTS_SUPPORTED 1 -`define ITLB_ENTRY_BITS 5 -`define DTLB_ENTRY_BITS 5 +`define ITLB_ENTRIES 32 +`define DTLB_ENTRIES 32 // Legal number of PMP entries are 0, 16, or 64 `define PMP_ENTRIES 16 diff --git a/wally-pipelined/config/rv32ic/wally-config.vh b/wally-pipelined/config/rv32ic/wally-config.vh index 1933a2e7a..090da8d62 100644 --- a/wally-pipelined/config/rv32ic/wally-config.vh +++ b/wally-pipelined/config/rv32ic/wally-config.vh @@ -48,8 +48,8 @@ `define MEM_VIRTMEM 1 `define VECTORED_INTERRUPTS_SUPPORTED 1 -`define ITLB_ENTRY_BITS 5 -`define DTLB_ENTRY_BITS 5 +`define ITLB_ENTRIES 32 +`define DTLB_ENTRIES 32 // Legal number of PMP entries are 0, 16, or 64 `define PMP_ENTRIES 16 diff --git a/wally-pipelined/config/rv64BP/wally-config.vh b/wally-pipelined/config/rv64BP/wally-config.vh index 335f2d87a..01680b9d8 100644 --- a/wally-pipelined/config/rv64BP/wally-config.vh +++ b/wally-pipelined/config/rv64BP/wally-config.vh @@ -50,8 +50,8 @@ `define MEM_VIRTMEM 1 `define VECTORED_INTERRUPTS_SUPPORTED 1 -`define ITLB_ENTRY_BITS 5 -`define DTLB_ENTRY_BITS 5 +`define ITLB_ENTRIES 32 +`define DTLB_ENTRIES 32 // Address space `define RESET_VECTOR 64'h0000000000000000 diff --git a/wally-pipelined/config/rv64ic/wally-config.vh b/wally-pipelined/config/rv64ic/wally-config.vh index ad97d446e..44a90e1c2 100644 --- a/wally-pipelined/config/rv64ic/wally-config.vh +++ b/wally-pipelined/config/rv64ic/wally-config.vh @@ -49,8 +49,8 @@ `define MEM_VIRTMEM 1 `define VECTORED_INTERRUPTS_SUPPORTED 1 -`define ITLB_ENTRY_BITS 5 -`define DTLB_ENTRY_BITS 5 +`define ITLB_ENTRIES 32 +`define DTLB_ENTRIES 32 // Legal number of PMP entries are 0, 16, or 64 `define PMP_ENTRIES 64 diff --git a/wally-pipelined/config/rv64icfd/wally-config.vh b/wally-pipelined/config/rv64icfd/wally-config.vh index e645f014b..25b8cbca9 100644 --- a/wally-pipelined/config/rv64icfd/wally-config.vh +++ b/wally-pipelined/config/rv64icfd/wally-config.vh @@ -49,8 +49,8 @@ `define MEM_VIRTMEM 1 `define VECTORED_INTERRUPTS_SUPPORTED 1 -`define ITLB_ENTRY_BITS 5 -`define DTLB_ENTRY_BITS 5 +`define ITLB_ENTRIES 32 +`define DTLB_ENTRIES 32 // Legal number of PMP entries are 0, 16, or 64 `define PMP_ENTRIES 16 diff --git a/wally-pipelined/config/rv64imc/wally-config.vh b/wally-pipelined/config/rv64imc/wally-config.vh index 885a519d7..a554a612b 100644 --- a/wally-pipelined/config/rv64imc/wally-config.vh +++ b/wally-pipelined/config/rv64imc/wally-config.vh @@ -48,8 +48,8 @@ `define MEM_VIRTMEM 0 `define VECTORED_INTERRUPTS_SUPPORTED 1 -`define ITLB_ENTRY_BITS 5 -`define DTLB_ENTRY_BITS 5 +`define ITLB_ENTRIES 32 +`define DTLB_ENTRIES 32 // Address space `define RESET_VECTOR 64'h0000000080000000 diff --git a/wally-pipelined/config/shared/wally-constants.vh b/wally-pipelined/config/shared/wally-constants.vh index 706997b93..99269ae5b 100644 --- a/wally-pipelined/config/shared/wally-constants.vh +++ b/wally-pipelined/config/shared/wally-constants.vh @@ -39,7 +39,9 @@ `define VPN_BITS (`XLEN==32 ? (2*`VPN_SEGMENT_BITS) : (4*`VPN_SEGMENT_BITS)) `define PPN_BITS (`XLEN==32 ? 22 : 44) `define PA_BITS (`XLEN==32 ? 34 : 56) -`define SVMODE_BITS (`XLEN == 32 ? 1 : 4) +`define SVMODE_BITS (`XLEN==32 ? 1 : 4) +`define ASID_BASE (`XLEN==32 ? 22 : 44) +`define ASID_BITS (`XLEN==32 ? 9 : 16) // constants to check SATP_MODE against // defined in Table 4.3 of the privileged spec diff --git a/wally-pipelined/src/ifu/ifu.sv b/wally-pipelined/src/ifu/ifu.sv index 65f8a9b84..4fcefe857 100644 --- a/wally-pipelined/src/ifu/ifu.sv +++ b/wally-pipelined/src/ifu/ifu.sv @@ -116,7 +116,7 @@ module ifu ( end endgenerate - mmu #(.ENTRY_BITS(`ITLB_ENTRY_BITS), .IMMU(1)) + mmu #(.TLB_ENTRIES(`ITLB_ENTRIES), .IMMU(1)) itlb(.TLBAccessType(2'b10), .VirtualAddress(PCF), .Size(2'b10), diff --git a/wally-pipelined/src/lsu/lsu.sv b/wally-pipelined/src/lsu/lsu.sv index d7ff78d78..bbb56ad06 100644 --- a/wally-pipelined/src/lsu/lsu.sv +++ b/wally-pipelined/src/lsu/lsu.sv @@ -124,7 +124,7 @@ module lsu ( // CPU's read data input ReadDataW. assign ReadDataW = HRDATAW; - mmu #(.ENTRY_BITS(`DTLB_ENTRY_BITS), .IMMU(0)) + mmu #(.TLB_ENTRIES(`DTLB_ENTRIES), .IMMU(0)) dmmu(.TLBAccessType(MemRWM), .VirtualAddress(MemAdrM), .Size(Funct3M[1:0]), diff --git a/wally-pipelined/src/mmu/mmu.sv b/wally-pipelined/src/mmu/mmu.sv index e28db2e77..4faac7bc1 100644 --- a/wally-pipelined/src/mmu/mmu.sv +++ b/wally-pipelined/src/mmu/mmu.sv @@ -26,9 +26,7 @@ `include "wally-config.vh" -// The TLB will have 2**ENTRY_BITS total entries - -module mmu #(parameter ENTRY_BITS = 3, +module mmu #(parameter TLB_ENTRIES = 8, // nuber of TLB Entries parameter IMMU = 0) ( input logic clk, reset, @@ -83,7 +81,7 @@ module mmu #(parameter ENTRY_BITS = 3, logic Cacheable, Idempotent, AtomicAllowed; // *** here so that the pmachecker has somewhere to put these outputs. *** I'm leaving them as outputs to pma checker, but I'm stopping them here. // Translation lookaside buffer - tlb #(.ENTRY_BITS(ENTRY_BITS), .ITLB(IMMU)) tlb(.*); + tlb #(.TLB_ENTRIES(TLB_ENTRIES), .ITLB(IMMU)) tlb(.*); /////////////////////////////////////////// // Check physical memory accesses diff --git a/wally-pipelined/src/mmu/tlb.sv b/wally-pipelined/src/mmu/tlb.sv index 2741f958b..644e56a56 100644 --- a/wally-pipelined/src/mmu/tlb.sv +++ b/wally-pipelined/src/mmu/tlb.sv @@ -49,7 +49,7 @@ `include "wally-config.vh" // The TLB will have 2**ENTRY_BITS total entries -module tlb #(parameter ENTRY_BITS = 3, +module tlb #(parameter TLB_ENTRIES = 8, parameter ITLB = 0) ( input logic clk, reset, @@ -88,8 +88,6 @@ module tlb #(parameter ENTRY_BITS = 3, output logic TLBPageFault ); - localparam NENTRIES = 2**ENTRY_BITS; - logic Translate; logic TLBAccess, ReadAccess, WriteAccess; @@ -97,8 +95,7 @@ module tlb #(parameter ENTRY_BITS = 3, logic [`SVMODE_BITS-1:0] SvMode; logic [1:0] EffectivePrivilegeMode; // privilege mode, possibly modified by MPRV - //logic [ENTRY_BITS-1:0] WriteIndex; - logic [NENTRIES-1:0] ReadLines, WriteLines, WriteEnables; // used as the one-hot encoding of WriteIndex + logic [TLB_ENTRIES-1:0] ReadLines, WriteLines, WriteEnables, Global; // used as the one-hot encoding of WriteIndex // Sections of the virtual and physical addresses logic [`VPN_BITS-1:0] VirtualPageNumber; @@ -110,24 +107,19 @@ module tlb #(parameter ENTRY_BITS = 3, logic [7:0] PTEAccessBits; logic [11:0] PageOffset; - // Useful PTE Control Bits - logic PTE_U, PTE_X, PTE_W, PTE_R; - - // Pattern location in the CAM and type of page hit - //ogic [ENTRY_BITS-1:0] VPNIndex; + logic PTE_U, PTE_X, PTE_W, PTE_R; // Useful PTE Control Bits logic [1:0] HitPageType; - - // Whether the virtual address has a match in the CAM logic CAMHit; + logic [`ASID_BITS-1:0] ASID; // Grab the sv mode from SATP and determine whether translation should occur assign SvMode = SATP_REGW[`XLEN-1:`XLEN-`SVMODE_BITS]; + assign ASID = SATP_REGW[`ASID_BASE+`ASID_BITS-1:`ASID_BASE]; assign EffectivePrivilegeMode = (ITLB == 1) ? PrivilegeModeW : (STATUS_MPRV ? STATUS_MPP : PrivilegeModeW); // DTLB uses MPP mode when MPRV is 1 assign Translate = (SvMode != `NO_TRANSLATE) & (EffectivePrivilegeMode != `M_MODE) & ~ DisableTranslation; - // Decode the integer encoded WriteIndex into the one-hot encoded WriteLines - //decoder #(ENTRY_BITS) writedecoder(WriteIndex, WriteLines); - assign WriteEnables = WriteLines & {(2**ENTRY_BITS){TLBWrite}}; + // Determine whether to write TLB + assign WriteEnables = WriteLines & {(TLB_ENTRIES){TLBWrite}}; // The bus width is always the largest it could be for that XLEN. For example, vpn will be 36 bits wide in rv64 // this, even though it could be 27 bits (SV39) or 36 bits (SV48) wide. When the value of VPN is narrower, @@ -142,20 +134,18 @@ module tlb #(parameter ENTRY_BITS = 3, end endgenerate - // Determine how the TLB is currently being used // Note that we use ReadAccess for both loads and instruction fetches assign ReadAccess = TLBAccessType[1]; assign WriteAccess = TLBAccessType[0]; assign TLBAccess = ReadAccess || WriteAccess; - // TLB entries are evicted according to the LRU algorithm - tlblru #(ENTRY_BITS) lru(.*); + tlblru #(TLB_ENTRIES) lru(.*); // TLB memory - tlbram #(ENTRY_BITS) tlbram(.*); - tlbcam #(ENTRY_BITS, `VPN_BITS, `VPN_SEGMENT_BITS) tlbcam(.*); + tlbram #(TLB_ENTRIES) tlbram(.*); + tlbcam #(TLB_ENTRIES, `VPN_BITS + `ASID_BITS, `VPN_SEGMENT_BITS) tlbcam(.*); // Replace segments of the virtual page number with segments of the physical // page number. For 4 KB pages, the entire virtual page number is replaced. diff --git a/wally-pipelined/src/mmu/tlbcam.sv b/wally-pipelined/src/mmu/tlbcam.sv index ef64e0d6e..996620cf3 100644 --- a/wally-pipelined/src/mmu/tlbcam.sv +++ b/wally-pipelined/src/mmu/tlbcam.sv @@ -28,44 +28,36 @@ `include "wally-config.vh" -module tlbcam #(parameter ENTRY_BITS = 3, - parameter KEY_BITS = 20, - parameter SEGMENT_BITS = 10) ( +module tlbcam #(parameter TLB_ENTRIES = 8, + parameter KEY_BITS = 20, + parameter SEGMENT_BITS = 10) ( input logic clk, reset, - input logic [KEY_BITS-1:0] VirtualPageNumber, + input logic [`VPN_BITS-1:0] VirtualPageNumber, input logic [1:0] PageTypeWriteVal, input logic TLBFlush, - input logic [2**ENTRY_BITS-1:0] WriteEnables, - - //output logic [ENTRY_BITS-1:0] VPNIndex, - output logic [2**ENTRY_BITS-1:0] ReadLines, + input logic [TLB_ENTRIES-1:0] WriteEnables, + input logic [TLB_ENTRIES-1:0] Global + input logic [`ASID_BITS-1:0] ASID, + output logic [TLB_ENTRIES-1:0] ReadLines, output logic [1:0] HitPageType, output logic CAMHit ); - localparam NENTRIES = 2**ENTRY_BITS; + logic [1:0] PageTypeRead [TLB_ENTRIES-1:0]; + logic [TLB_ENTRIES-1:0] Matches; - logic [1:0] PageTypeRead [NENTRIES-1:0]; - logic [NENTRIES-1:0] Matches; - - // Create NENTRIES CAM lines, each of which will independently consider + // Create TLB_ENTRIES CAM lines, each of which will independently consider // whether the requested virtual address is a match. Each line stores the // original virtual page number from when the address was written, regardless // of page type. However, matches are determined based on a subset of the // page number segments. - tlbcamline #(KEY_BITS, SEGMENT_BITS) camlines[NENTRIES-1:0]( + tlbcamline #(KEY_BITS, SEGMENT_BITS) camlines[TLB_ENTRIES-1:0]( .WriteEnable(WriteEnables), .PageTypeRead, // *** change name to agree .Match(ReadLines), // *** change name to agree .*); - - // In case there are multiple matches in the CAM, select only one - // *** it might be guaranteed that the CAM will never have multiple matches. - // If so, this is just an encoder - //priorityencoder #(ENTRY_BITS) matchencoder(Matches, VPNIndex); - assign CAMHit = |ReadLines & ~TLBFlush; - assign HitPageType = PageTypeRead.or; // applies OR to elements of the (NENTRIES x 2) array to get 2-bit result + assign HitPageType = PageTypeRead.or; // applies OR to elements of the (TLB_ENTRIES x 2) array to get 2-bit result endmodule diff --git a/wally-pipelined/src/mmu/tlbcamline.sv b/wally-pipelined/src/mmu/tlbcamline.sv index 605d8f304..e0381ed60 100644 --- a/wally-pipelined/src/mmu/tlbcamline.sv +++ b/wally-pipelined/src/mmu/tlbcamline.sv @@ -29,30 +29,23 @@ `include "wally-config.vh" module tlbcamline #(parameter KEY_BITS = 20, - parameter SEGMENT_BITS = 10) ( - input logic clk, reset, + parameter SEGMENT_BITS = 10) ( + input logic clk, reset, + input logic [`VPN_BITS-1:0] VirtualPageNumber, // The requested page number to compare against the key + input logic [`ASID_BITS-1:0] ASID, + input logic WriteEnable, // Write a new entry to this line + input logic Global, + input logic [1:0] PageTypeWriteVal, + input logic TLBFlush, // Flush this line (set valid to 0) + output logic [1:0] PageTypeRead, // *** should this be the stored version or the always updated one? + output logic Match +); - // input to check which SvMode is running -// input logic [`SVMODE_BITS-1:0] SvMode, // *** may no longer be needed. - - // The requested page number to compare against the key - input logic [KEY_BITS-1:0] VirtualPageNumber, - - // Signals to write a new entry to this line - input logic WriteEnable, - input logic [1:0] PageTypeWriteVal, - - // Flush this line (set valid to 0) - input logic TLBFlush, - - // This entry is a key for a tera, giga, mega, or kilopage. + // PageTypeRead is a key for a tera, giga, mega, or kilopage. // PageType == 2'b00 --> kilopage // PageType == 2'b01 --> megapage // PageType == 2'b10 --> gigapage // PageType == 2'b11 --> terapage - output logic [1:0] PageTypeRead, // *** should this be the stored version or the always updated one? - output logic Match -); // This entry has KEY_BITS for the key plus one valid bit. logic Valid; @@ -60,15 +53,16 @@ module tlbcamline #(parameter KEY_BITS = 20, logic [1:0] PageType; // Split up key and query into sections for each page table level. + logic [`ASID_BITS-1:0] Key_ASID; logic [SEGMENT_BITS-1:0] Key0, Key1, Query0, Query1; - logic Match0, Match1; + logic MatchASID, Match0, Match1; - // *** need to add ASID and G bit support + assign MatchASID = (ASID == Key_ASID) | Global; generate if (`XLEN == 32) begin - assign {Key1, Key0} = Key; + assign {Key_ASID, Key1, Key0} = Key; assign {Query1, Query0} = VirtualPageNumber; // Calculate the actual match value based on the input vpn and the page type. @@ -84,7 +78,7 @@ module tlbcamline #(parameter KEY_BITS = 20, logic Match2, Match3; assign {Query3, Query2, Query1, Query0} = VirtualPageNumber; - assign {Key3, Key2, Key1, Key0} = Key; + assign {Key_ASID, Key3, Key2, Key1, Key0} = Key; // Calculate the actual match value based on the input vpn and the page type. // For example, a gigapage in SV39 only cares about VPN[2], so VPN[0] and VPN[1] @@ -107,6 +101,5 @@ module tlbcamline #(parameter KEY_BITS = 20, // *** Might we want to update stored key right away to output match on the // write cycle? (using a mux) flopenrc #(1) validbitflop(clk, reset, TLBFlush, WriteEnable, 1'b1, Valid); - flopenr #(KEY_BITS) keyflop(clk, reset, WriteEnable, VirtualPageNumber, Key); - + flopenr #(KEY_BITS) keyflop(clk, reset, WriteEnable, {ASID, VirtualPageNumber}, Key); endmodule diff --git a/wally-pipelined/src/mmu/tlblru.sv b/wally-pipelined/src/mmu/tlblru.sv index 5dc1f846c..ae933f805 100644 --- a/wally-pipelined/src/mmu/tlblru.sv +++ b/wally-pipelined/src/mmu/tlblru.sv @@ -24,34 +24,27 @@ // OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. /////////////////////////////////////////// -module tlblru #(parameter ENTRY_BITS = 3) ( - input logic clk, reset, - input logic TLBWrite, - input logic TLBFlush, - input logic [2**ENTRY_BITS-1:0] ReadLines, - input logic CAMHit, - output logic [2**ENTRY_BITS-1:0] WriteLines +module tlblru #(parameter TLB_ENTRIES = 8) ( + input logic clk, reset, + input logic TLBWrite, + input logic TLBFlush, + input logic [TLB_ENTRIES-1:0] ReadLines, + input logic CAMHit, + output logic [TLB_ENTRIES-1:0] WriteLines ); - localparam NENTRIES = 2**ENTRY_BITS; - - // Keep a "recently-used" record for each TLB entry. On access, set to 1 - logic [NENTRIES-1:0] RUBits, RUBitsNext, RUBitsAccessed; - - // One-hot encodings of which line is being accessed - logic [NENTRIES-1:0] AccessLines; - - // High if the next access causes all RU bits to be 1 - logic AllUsed; + logic [TLB_ENTRIES-1:0] RUBits, RUBitsNext, RUBitsAccessed; + logic [TLB_ENTRIES-1:0] AccessLines; // One-hot encodings of which line is being accessed + logic AllUsed; // High if the next access causes all RU bits to be 1 // Find the first line not recently used - tlbpriority #(NENTRIES) nru(~RUBits, WriteLines); + tlbpriority #(TLB_ENTRIES) nru(~RUBits, WriteLines); // Track recently used lines, updating on a CAM Hit or TLB write assign AccessLines = TLBWrite ? WriteLines : ReadLines; assign RUBitsAccessed = AccessLines | RUBits; assign AllUsed = &RUBitsAccessed; // if all recently used, then clear to none assign RUBitsNext = AllUsed ? 0 : RUBitsAccessed; - flopenrc #(NENTRIES) lrustate(clk, reset, TLBFlush, (CAMHit || TLBWrite), RUBitsNext, RUBits); + flopenrc #(TLB_ENTRIES) lrustate(clk, reset, TLBFlush, (CAMHit || TLBWrite), RUBitsNext, RUBits); endmodule diff --git a/wally-pipelined/src/mmu/tlbphysicalpagemask.sv b/wally-pipelined/src/mmu/tlbphysicalpagemask.sv index dd791e48a..b45237c51 100644 --- a/wally-pipelined/src/mmu/tlbphysicalpagemask.sv +++ b/wally-pipelined/src/mmu/tlbphysicalpagemask.sv @@ -29,10 +29,9 @@ `include "wally-config.vh" module tlbphysicalpagemask ( - input logic [`VPN_BITS-1:0] VPN, - input logic [`PPN_BITS-1:0] PPN, - input logic [1:0] PageType, - + input logic [`VPN_BITS-1:0] VPN, + input logic [`PPN_BITS-1:0] PPN, + input logic [1:0] PageType, output logic [`PPN_BITS-1:0] MixedPageNumber ); diff --git a/wally-pipelined/src/mmu/tlbram.sv b/wally-pipelined/src/mmu/tlbram.sv index f70cb44c6..0fc50854a 100644 --- a/wally-pipelined/src/mmu/tlbram.sv +++ b/wally-pipelined/src/mmu/tlbram.sv @@ -27,25 +27,20 @@ `include "wally-config.vh" -module tlbram #(parameter ENTRY_BITS = 3) ( +module tlbram #(parameter TLB_ENTRIES = 8) ( input logic clk, reset, - //input logic [ENTRY_BITS-1:0] VPNIndex, // Index to read from -// input logic [ENTRY_BITS-1:0] WriteIndex, // *** unused? input logic [`XLEN-1:0] PTEWriteVal, -// input logic TLBWrite, - input logic [2**ENTRY_BITS-1:0] ReadLines, WriteEnables, - + input logic [TLB_ENTRIES-1:0] ReadLines, WriteEnables, output logic [`PPN_BITS-1:0] PhysicalPageNumber, - output logic [7:0] PTEAccessBits + output logic [7:0] PTEAccessBits, + output logic [TLB_ENTRIES-1:0] Global ); - localparam NENTRIES = 2**ENTRY_BITS; - - logic [`XLEN-1:0] RamRead[NENTRIES-1:0]; + logic [`XLEN-1:0] RamRead[TLB_ENTRIES-1:0]; logic [`XLEN-1:0] PageTableEntry; // Generate a flop for every entry in the RAM - tlbramline #(`XLEN) tlblineram[NENTRIES-1:0](clk, reset, ReadLines, WriteEnables, PTEWriteVal, RamRead); + tlbramline #(`XLEN) tlblineram[TLB_ENTRIES-1:0](clk, reset, ReadLines, WriteEnables, PTEWriteVal, RamRead); assign PageTableEntry = RamRead.or; // OR each column of RAM read to read PTE assign PTEAccessBits = PageTableEntry[7:0]; From 595df47a3e1e2e035b9617e763a41985f4f6c697 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 4 Jul 2021 18:05:22 -0400 Subject: [PATCH 22/30] Fixed TLB_ENTRIES merge conflict and handling of global PTEs --- wally-pipelined/src/lsu/lsu.sv | 2 +- wally-pipelined/src/mmu/tlb.sv | 5 +++-- wally-pipelined/src/mmu/tlbcam.sv | 2 +- wally-pipelined/src/mmu/tlbcamline.sv | 4 ++-- wally-pipelined/src/mmu/tlbram.sv | 4 ++-- wally-pipelined/src/mmu/tlbramline.sv | 6 ++++-- 6 files changed, 13 insertions(+), 10 deletions(-) diff --git a/wally-pipelined/src/lsu/lsu.sv b/wally-pipelined/src/lsu/lsu.sv index ee2bf31ec..8d4df6ec0 100644 --- a/wally-pipelined/src/lsu/lsu.sv +++ b/wally-pipelined/src/lsu/lsu.sv @@ -217,7 +217,7 @@ module lsu ( - mmu #(.TLB_ENTRIES(`DTLB_ENTRY_BITS), .IMMU(0)) + mmu #(.TLB_ENTRIES(`DTLB_ENTRIES), .IMMU(0)) dmmu(.TLBAccessType(MemRWMtoLSU), .VirtualAddress(MemAdrMtoLSU), .Size(Funct3MtoLSU[1:0]), diff --git a/wally-pipelined/src/mmu/tlb.sv b/wally-pipelined/src/mmu/tlb.sv index 644e56a56..750212659 100644 --- a/wally-pipelined/src/mmu/tlb.sv +++ b/wally-pipelined/src/mmu/tlb.sv @@ -95,7 +95,7 @@ module tlb #(parameter TLB_ENTRIES = 8, logic [`SVMODE_BITS-1:0] SvMode; logic [1:0] EffectivePrivilegeMode; // privilege mode, possibly modified by MPRV - logic [TLB_ENTRIES-1:0] ReadLines, WriteLines, WriteEnables, Global; // used as the one-hot encoding of WriteIndex + logic [TLB_ENTRIES-1:0] ReadLines, WriteLines, WriteEnables, PTE_G; // used as the one-hot encoding of WriteIndex // Sections of the virtual and physical addresses logic [`VPN_BITS-1:0] VirtualPageNumber; @@ -107,7 +107,7 @@ module tlb #(parameter TLB_ENTRIES = 8, logic [7:0] PTEAccessBits; logic [11:0] PageOffset; - logic PTE_U, PTE_X, PTE_W, PTE_R; // Useful PTE Control Bits + logic PTE_D, PTE_A, PTE_U, PTE_X, PTE_W, PTE_R; // Useful PTE Control Bits logic [1:0] HitPageType; logic CAMHit; logic [`ASID_BITS-1:0] ASID; @@ -153,6 +153,7 @@ module tlb #(parameter TLB_ENTRIES = 8, tlbphysicalpagemask PageMask(VirtualPageNumber, PhysicalPageNumber, HitPageType, PhysicalPageNumberMixed); // unswizzle useful PTE bits + assign {PTE_D, PTE_A} = PTEAccessBits[7:6]; assign {PTE_U, PTE_X, PTE_W, PTE_R} = PTEAccessBits[4:1]; // Check whether the access is allowed, page faulting if not. diff --git a/wally-pipelined/src/mmu/tlbcam.sv b/wally-pipelined/src/mmu/tlbcam.sv index 996620cf3..0ad81605d 100644 --- a/wally-pipelined/src/mmu/tlbcam.sv +++ b/wally-pipelined/src/mmu/tlbcam.sv @@ -36,7 +36,7 @@ module tlbcam #(parameter TLB_ENTRIES = 8, input logic [1:0] PageTypeWriteVal, input logic TLBFlush, input logic [TLB_ENTRIES-1:0] WriteEnables, - input logic [TLB_ENTRIES-1:0] Global + input logic [TLB_ENTRIES-1:0] PTE_G, input logic [`ASID_BITS-1:0] ASID, output logic [TLB_ENTRIES-1:0] ReadLines, output logic [1:0] HitPageType, diff --git a/wally-pipelined/src/mmu/tlbcamline.sv b/wally-pipelined/src/mmu/tlbcamline.sv index e0381ed60..ebb9ce3f5 100644 --- a/wally-pipelined/src/mmu/tlbcamline.sv +++ b/wally-pipelined/src/mmu/tlbcamline.sv @@ -34,7 +34,7 @@ module tlbcamline #(parameter KEY_BITS = 20, input logic [`VPN_BITS-1:0] VirtualPageNumber, // The requested page number to compare against the key input logic [`ASID_BITS-1:0] ASID, input logic WriteEnable, // Write a new entry to this line - input logic Global, + input logic PTE_G, input logic [1:0] PageTypeWriteVal, input logic TLBFlush, // Flush this line (set valid to 0) output logic [1:0] PageTypeRead, // *** should this be the stored version or the always updated one? @@ -57,7 +57,7 @@ module tlbcamline #(parameter KEY_BITS = 20, logic [SEGMENT_BITS-1:0] Key0, Key1, Query0, Query1; logic MatchASID, Match0, Match1; - assign MatchASID = (ASID == Key_ASID) | Global; + assign MatchASID = (ASID == Key_ASID) | PTE_G; generate if (`XLEN == 32) begin diff --git a/wally-pipelined/src/mmu/tlbram.sv b/wally-pipelined/src/mmu/tlbram.sv index 0fc50854a..98650d0b9 100644 --- a/wally-pipelined/src/mmu/tlbram.sv +++ b/wally-pipelined/src/mmu/tlbram.sv @@ -33,14 +33,14 @@ module tlbram #(parameter TLB_ENTRIES = 8) ( input logic [TLB_ENTRIES-1:0] ReadLines, WriteEnables, output logic [`PPN_BITS-1:0] PhysicalPageNumber, output logic [7:0] PTEAccessBits, - output logic [TLB_ENTRIES-1:0] Global + output logic [TLB_ENTRIES-1:0] PTE_G ); logic [`XLEN-1:0] RamRead[TLB_ENTRIES-1:0]; logic [`XLEN-1:0] PageTableEntry; // Generate a flop for every entry in the RAM - tlbramline #(`XLEN) tlblineram[TLB_ENTRIES-1:0](clk, reset, ReadLines, WriteEnables, PTEWriteVal, RamRead); + tlbramline #(`XLEN) tlblineram[TLB_ENTRIES-1:0](clk, reset, ReadLines, WriteEnables, PTEWriteVal, RamRead, PTE_G); assign PageTableEntry = RamRead.or; // OR each column of RAM read to read PTE assign PTEAccessBits = PageTableEntry[7:0]; diff --git a/wally-pipelined/src/mmu/tlbramline.sv b/wally-pipelined/src/mmu/tlbramline.sv index 77a0e89b1..089f93661 100644 --- a/wally-pipelined/src/mmu/tlbramline.sv +++ b/wally-pipelined/src/mmu/tlbramline.sv @@ -29,10 +29,12 @@ module tlbramline #(parameter WIDTH) (input logic clk, reset, input logic re, we, input logic [WIDTH-1:0] d, - output logic [WIDTH-1:0] q); + output logic [WIDTH-1:0] q, + output logic PTE_G); logic [WIDTH-1:0] line; - + flopenr #(`XLEN) pteflop(clk, reset, we, d, line); assign q = re ? line : 0; + assign PTE_G = line[5]; // send global bit to CAM as part of ASID matching endmodule \ No newline at end of file From 11606e96f1e3fea2b8b02ab744aaf1b7cacd590f Mon Sep 17 00:00:00 2001 From: bbracker Date: Sun, 4 Jul 2021 18:17:06 -0400 Subject: [PATCH 23/30] ICacheCntrl now reacts differently to InstrPageFaultF vs ITLBWriteF --- wally-pipelined/src/cache/ICacheCntrl.sv | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/wally-pipelined/src/cache/ICacheCntrl.sv b/wally-pipelined/src/cache/ICacheCntrl.sv index 748b3f5e5..ee58ed6f4 100644 --- a/wally-pipelined/src/cache/ICacheCntrl.sv +++ b/wally-pipelined/src/cache/ICacheCntrl.sv @@ -115,8 +115,8 @@ module ICacheCntrl #(parameter BLOCKLEN = 256) localparam STATE_INVALIDATE = 'h12; // *** not sure if invalidate or evict? invalidate by cache block or address? localparam STATE_TLB_MISS = 'h13; localparam STATE_TLB_MISS_DONE = 'h14; - - + localparam STATE_INSTR_PAGE_FAULT = 'h15; + localparam AHBByteLength = `XLEN / 8; localparam AHBOFFETWIDTH = $clog2(AHBByteLength); @@ -370,13 +370,20 @@ module ICacheCntrl #(parameter BLOCKLEN = 256) NextState = STATE_READY; end STATE_TLB_MISS: begin - if (ITLBWriteF | WalkerInstrPageFaultF) begin + if (WalkerInstrPageFaultF) begin + NextState = STATE_INSTR_PAGE_FAULT; + ICacheStallF = 1'b0; + end else if (ITLBWriteF) begin NextState = STATE_TLB_MISS_DONE; end else begin NextState = STATE_TLB_MISS; end end - STATE_TLB_MISS_DONE : begin + STATE_TLB_MISS_DONE: begin + NextState = STATE_READY; + end + STATE_INSTR_PAGE_FAULT: begin + ICacheStallF = 1'b0; NextState = STATE_READY; end default: begin From cc04009f82075c27e98f207619ede196a871a273 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 4 Jul 2021 18:17:09 -0400 Subject: [PATCH 24/30] Touched up TLB D and A bit checks --- wally-pipelined/src/ebu/ahblite.sv | 2 -- wally-pipelined/src/mmu/tlb.sv | 9 +++++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/wally-pipelined/src/ebu/ahblite.sv b/wally-pipelined/src/ebu/ahblite.sv index b0c6f0336..edbaad68f 100644 --- a/wally-pipelined/src/ebu/ahblite.sv +++ b/wally-pipelined/src/ebu/ahblite.sv @@ -219,8 +219,6 @@ module ahblite ( generate if (`A_SUPPORTED) begin logic [`XLEN-1:0] AMOResult; -// amoalu amoalu(.a(HRDATA), .b(WriteDataM), .funct(Funct7M), .width(MemSizeM), -// .result(AMOResult)); amoalu amoalu(.srca(HRDATAW), .srcb(WriteDataM), .funct(Funct7M), .width(MemSizeM), .result(AMOResult)); mux2 #(`XLEN) wdmux(WriteDataM, AMOResult, AtomicMaskedM[1], WriteData); diff --git a/wally-pipelined/src/mmu/tlb.sv b/wally-pipelined/src/mmu/tlb.sv index 750212659..34400647d 100644 --- a/wally-pipelined/src/mmu/tlb.sv +++ b/wally-pipelined/src/mmu/tlb.sv @@ -111,6 +111,7 @@ module tlb #(parameter TLB_ENTRIES = 8, logic [1:0] HitPageType; logic CAMHit; logic [`ASID_BITS-1:0] ASID; + logic DAFault; // Grab the sv mode from SATP and determine whether translation should occur assign SvMode = SATP_REGW[`XLEN-1:`XLEN-`SVMODE_BITS]; @@ -165,7 +166,9 @@ module tlb #(parameter TLB_ENTRIES = 8, // only execute non-user mode pages. assign ImproperPrivilege = ((EffectivePrivilegeMode == `U_MODE) && ~PTE_U) || ((EffectivePrivilegeMode == `S_MODE) && PTE_U); - assign TLBPageFault = Translate && TLBHit && (ImproperPrivilege || ~PTE_X); + // fault for software handling if access bit is off + assign DAFault = ~PTE_A; + assign TLBPageFault = Translate && TLBHit && (ImproperPrivilege || ~PTE_X || DAFault); end else begin logic ImproperPrivilege, InvalidRead, InvalidWrite; @@ -180,7 +183,9 @@ module tlb #(parameter TLB_ENTRIES = 8, // Check for write error. Writes are invalid when the page's write bit is // low. assign InvalidWrite = WriteAccess && ~PTE_W; - assign TLBPageFault = Translate && TLBHit && (ImproperPrivilege || InvalidRead || InvalidWrite); + // Fault for software handling if access bit is off or writing a page with dirty bit off + assign DAFault = ~PTE_A | WriteAccess & ~PTE_D; + assign TLBPageFault = Translate && TLBHit && (ImproperPrivilege || InvalidRead || InvalidWrite || DAFault); end endgenerate From 57e1111df307bd5aec977d8253186030f861fc38 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 4 Jul 2021 18:52:16 -0400 Subject: [PATCH 25/30] Gave names to for loops in generate blocks for ease of reference --- wally-pipelined/src/cache/ICacheCntrl.sv | 4 ++-- wally-pipelined/src/cache/dmapped.sv | 4 ++-- wally-pipelined/src/ebu/ahblite.sv | 2 +- wally-pipelined/src/generic/shift.sv | 14 ++++++-------- wally-pipelined/src/ieu/alu.sv | 2 +- wally-pipelined/src/ieu/datapath.sv | 2 +- wally-pipelined/src/ifu/SRAM2P1R1W.sv | 8 ++++---- wally-pipelined/src/ifu/ifu.sv | 2 +- wally-pipelined/src/ifu/localHistoryPredictor.sv | 2 +- wally-pipelined/src/lsu/dcache.sv | 2 +- wally-pipelined/src/lsu/lsuArb.sv | 6 ++++-- wally-pipelined/src/mmu/pmpadrdec.sv | 3 ++- wally-pipelined/src/mmu/pmpchecker.sv | 7 ------- wally-pipelined/src/mmu/tlbpriority.sv | 3 ++- wally-pipelined/src/muldiv/div.sv | 7 +++---- wally-pipelined/src/privileged/csrc.sv | 2 +- wally-pipelined/src/privileged/csri.sv | 2 +- wally-pipelined/src/privileged/csrn.sv | 2 +- wally-pipelined/src/privileged/csrs.sv | 2 +- wally-pipelined/src/privileged/csru.sv | 2 +- wally-pipelined/src/uncore/gpio.sv | 2 +- wally-pipelined/src/uncore/uartPC16550D.sv | 2 +- 22 files changed, 38 insertions(+), 44 deletions(-) diff --git a/wally-pipelined/src/cache/ICacheCntrl.sv b/wally-pipelined/src/cache/ICacheCntrl.sv index 748b3f5e5..6c1981eea 100644 --- a/wally-pipelined/src/cache/ICacheCntrl.sv +++ b/wally-pipelined/src/cache/ICacheCntrl.sv @@ -425,8 +425,8 @@ module ICacheCntrl #(parameter BLOCKLEN = 256) // store read data from memory interface before writing into SRAM. genvar i; generate - for (i = 0; i < WORDSPERLINE; i++) begin - flopenr #(`XLEN) flop(.clk(clk), + for (i = 0; i < WORDSPERLINE; i++) begin:storebuffer + flopenr #(`XLEN) sb(.clk(clk), .reset(reset), .en(InstrAckF & (i == FetchCount)), .d(InstrInF), diff --git a/wally-pipelined/src/cache/dmapped.sv b/wally-pipelined/src/cache/dmapped.sv index f40da412a..426697529 100644 --- a/wally-pipelined/src/cache/dmapped.sv +++ b/wally-pipelined/src/cache/dmapped.sv @@ -106,7 +106,7 @@ module rodirectmappedmem #(parameter NUMLINES=512, parameter LINESIZE = 256, par assign DataWord = ReadLineTransformed[ReadOffset]; genvar i; generate - for (i=0; i < LINESIZE/WORDSIZE; i++) begin + for (i=0; i < LINESIZE/WORDSIZE; i++) begin:readline assign ReadLineTransformed[i] = ReadLine[(i+1)*WORDSIZE-1:i*WORDSIZE]; end endgenerate @@ -214,7 +214,7 @@ module wtdirectmappedmem #(parameter NUMLINES=512, parameter LINESIZE = 256, par assign DataWord = ReadLineTransformed[ReadOffset]; genvar i; generate - for (i=0; i < LINESIZE/WORDSIZE; i++) begin + for (i=0; i < LINESIZE/WORDSIZE; i++) begin:readline assign ReadLineTransformed[i] = ReadLine[(i+1)*WORDSIZE-1:i*WORDSIZE]; end endgenerate diff --git a/wally-pipelined/src/ebu/ahblite.sv b/wally-pipelined/src/ebu/ahblite.sv index edbaad68f..4bd079e96 100644 --- a/wally-pipelined/src/ebu/ahblite.sv +++ b/wally-pipelined/src/ebu/ahblite.sv @@ -216,7 +216,7 @@ module ahblite ( subwordread swr(.*); // Handle AMO instructions if applicable - generate + generate if (`A_SUPPORTED) begin logic [`XLEN-1:0] AMOResult; amoalu amoalu(.srca(HRDATAW), .srcb(WriteDataM), .funct(Funct7M), .width(MemSizeM), diff --git a/wally-pipelined/src/generic/shift.sv b/wally-pipelined/src/generic/shift.sv index 881525882..70e1076d6 100755 --- a/wally-pipelined/src/generic/shift.sv +++ b/wally-pipelined/src/generic/shift.sv @@ -38,13 +38,12 @@ module shift_right #(parameter WIDTH=8) assign stage[0] = A; generate - for (i=0;i<$clog2(WIDTH);i=i+1) - begin : genbit - mux2 #(WIDTH) mux_inst (stage[i], + for (i=0;i<$clog2(WIDTH);i=i+1) begin : genbit + mux2 #(WIDTH) mux_inst (stage[i], {{(WIDTH/(2**(i+1))){1'b0}}, stage[i][WIDTH-1:WIDTH/(2**(i+1))]}, Shift[$clog2(WIDTH)-i-1], stage[i+1]); - end + end endgenerate assign Z = stage[$clog2(WIDTH)]; @@ -60,13 +59,12 @@ module shift_left #(parameter WIDTH=8) assign stage[0] = A; generate - for (i=0;i<$clog2(WIDTH);i=i+1) - begin : genbit - mux2 #(WIDTH) mux_inst (stage[i], + for (i=0;i<$clog2(WIDTH);i=i+1) begin : genbit + mux2 #(WIDTH) mux_inst (stage[i], {stage[i][WIDTH-1-WIDTH/(2**(i+1)):0], {(WIDTH/(2**(i+1))){1'b0}}}, Shift[$clog2(WIDTH)-i-1], stage[i+1]); - end + end endgenerate assign Z = stage[$clog2(WIDTH)]; diff --git a/wally-pipelined/src/ieu/alu.sv b/wally-pipelined/src/ieu/alu.sv index 102fbbedf..ac2c06dd5 100644 --- a/wally-pipelined/src/ieu/alu.sv +++ b/wally-pipelined/src/ieu/alu.sv @@ -42,7 +42,7 @@ module alu #(parameter WIDTH=32) ( assign {carry, presum} = a + condinvb + {{(WIDTH-1){1'b0}},alucontrol[3]}; // support W-type RV64I ADDW/SUBW/ADDIW that sign-extend 32-bit result to 64 bits - generate + generate if (WIDTH==64) assign sum = w64 ? {{32{presum[31]}}, presum[31:0]} : presum; else diff --git a/wally-pipelined/src/ieu/datapath.sv b/wally-pipelined/src/ieu/datapath.sv index 44a40045a..f041fce63 100644 --- a/wally-pipelined/src/ieu/datapath.sv +++ b/wally-pipelined/src/ieu/datapath.sv @@ -129,7 +129,7 @@ module datapath ( flopenrc #(5) RdWEg(clk, reset, FlushW, ~StallW, RdM, RdW); // handle Store Conditional result if atomic extension supported - generate + generate if (`A_SUPPORTED) assign SCResultW = SquashSCW ? {{(`XLEN-1){1'b0}}, 1'b1} : {{(`XLEN-1){1'b0}}, 1'b0}; else diff --git a/wally-pipelined/src/ifu/SRAM2P1R1W.sv b/wally-pipelined/src/ifu/SRAM2P1R1W.sv index d71f8bc4c..046aacc63 100644 --- a/wally-pipelined/src/ifu/SRAM2P1R1W.sv +++ b/wally-pipelined/src/ifu/SRAM2P1R1W.sv @@ -97,11 +97,11 @@ module SRAM2P1R1W // write port generate - for (index = 0; index < Width; index = index + 1) begin + for (index = 0; index < Width; index = index + 1) begin:mem always_ff @ (posedge clk) begin - if (WEN1Q & BitWEN1[index]) begin - memory[WA1Q][index] <= WD1Q[index]; - end + if (WEN1Q & BitWEN1[index]) begin + memory[WA1Q][index] <= WD1Q[index]; + end end end endgenerate diff --git a/wally-pipelined/src/ifu/ifu.sv b/wally-pipelined/src/ifu/ifu.sv index 4fcefe857..24952edfa 100644 --- a/wally-pipelined/src/ifu/ifu.sv +++ b/wally-pipelined/src/ifu/ifu.sv @@ -188,7 +188,7 @@ module ifu ( flopenl #(`XLEN) pcreg(clk, reset, ~StallF & ~ICacheStallF, PCNextF, `RESET_VECTOR, PCF); // branch and jump predictor - generate + generate if (`BPRED_ENABLED == 1) begin : bpred // I am making the port connection explicit for now as I want to see them and they will be changing. bpred bpred(.*, diff --git a/wally-pipelined/src/ifu/localHistoryPredictor.sv b/wally-pipelined/src/ifu/localHistoryPredictor.sv index 8aaa85c0d..6c5c94783 100644 --- a/wally-pipelined/src/ifu/localHistoryPredictor.sv +++ b/wally-pipelined/src/ifu/localHistoryPredictor.sv @@ -67,7 +67,7 @@ module localHistoryPredictor genvar index; generate - for (index = 0; index < 2**m; index = index +1) begin + for (index = 0; index < 2**m; index = index +1) begin:localhist flopenr #(k) LocalHistoryRegister(.clk(clk), .reset(reset), diff --git a/wally-pipelined/src/lsu/dcache.sv b/wally-pipelined/src/lsu/dcache.sv index fec70ef4b..e8dfeb5cd 100644 --- a/wally-pipelined/src/lsu/dcache.sv +++ b/wally-pipelined/src/lsu/dcache.sv @@ -151,7 +151,7 @@ module dcachecontroller #(parameter LINESIZE = 256) ( genvar i; generate - for (i=0; i < WORDSPERLINE; i++) begin + for (i=0; i < WORDSPERLINE; i++) begin:sb flopenr #(`XLEN) flop(clk, reset, FetchState & (i == FetchWordNum), ReadDataW, DCacheMemWriteData[(i+1)*`XLEN-1:i*`XLEN]); end endgenerate diff --git a/wally-pipelined/src/lsu/lsuArb.sv b/wally-pipelined/src/lsu/lsuArb.sv index 3f57cabb4..dc77ec9df 100644 --- a/wally-pipelined/src/lsu/lsuArb.sv +++ b/wally-pipelined/src/lsu/lsuArb.sv @@ -138,12 +138,14 @@ module lsuArb assign MemRWMtoLSU = SelPTW ? {HPTWRead, 1'b0} : MemRWM; generate - if (`XLEN == 32) begin + assign PTWSize = (`XLEN==32 ? 3'b010 : 3'b011); // 32 or 64-bit access from htpw + /* if (`XLEN == 32) begin assign Funct3MtoLSU = SelPTW ? 3'b010 : Funct3M; end else begin assign Funct3MtoLSU = SelPTW ? 3'b011 : Funct3M; - end + end*/ endgenerate + mux2 sizemux(Funct3M, PTWSize, SelPTW, Funct3MtoLSU); assign AtomicMtoLSU = SelPTW ? 2'b00 : AtomicM; assign MemAdrMtoLSU = SelPTW ? HPTWPAdr : MemAdrM; diff --git a/wally-pipelined/src/mmu/pmpadrdec.sv b/wally-pipelined/src/mmu/pmpadrdec.sv index 50d399aed..0a14d8320 100644 --- a/wally-pipelined/src/mmu/pmpadrdec.sv +++ b/wally-pipelined/src/mmu/pmpadrdec.sv @@ -76,8 +76,9 @@ module pmpadrdec ( generate assign Mask[1:0] = 2'b11; assign Mask[2] = (AdrMode == NAPOT); // mask has 0s in upper bis for NA4 region - for (i=3; i < `PA_BITS; i=i+1) + for (i=3; i < `PA_BITS; i=i+1) begin:mask assign Mask[i] = Mask[i-1] & PMPAdr[i-3]; // NAPOT mask: 1's indicate bits to ignore + end endgenerate // verilator lint_on UNOPTFLAT diff --git a/wally-pipelined/src/mmu/pmpchecker.sv b/wally-pipelined/src/mmu/pmpchecker.sv index ee4b261df..9c7f11da4 100644 --- a/wally-pipelined/src/mmu/pmpchecker.sv +++ b/wally-pipelined/src/mmu/pmpchecker.sv @@ -63,12 +63,6 @@ module pmpchecker ( // verilator lint_on UNOPTFLAT logic [`PMP_ENTRIES-1:0] PAgePMPAdr; // for TOR PMP matching, PhysicalAddress > PMPAdr[i] genvar i,j; - /* - generate // extract 8-bit chunks from PMPCFG array - for (j=0; j<`PMP_ENTRIES; j = j+8) - assign {PMPCfg[j+7], PMPCfg[j+6], PMPCfg[j+5], PMPCfg[j+4], - PMPCfg[j+3], PMPCfg[j+2], PMPCfg[j+1], PMPCfg[j]} = PMPCFG_ARRAY_REGW[j/8]; - endgenerate */ pmpadrdec pmpadrdecs[`PMP_ENTRIES-1:0]( .PhysicalAddress, @@ -80,7 +74,6 @@ module pmpchecker ( .NoLowerMatchOut(NoLowerMatch), .Match, .Active, .L, .X, .W, .R); - // Only enforce PMP checking for S and U modes when at least one PMP is active or in Machine mode when L bit is set in selected region assign EnforcePMP = (PrivilegeModeW == `M_MODE) ? |L : |Active; diff --git a/wally-pipelined/src/mmu/tlbpriority.sv b/wally-pipelined/src/mmu/tlbpriority.sv index a061f622b..5096cae60 100644 --- a/wally-pipelined/src/mmu/tlbpriority.sv +++ b/wally-pipelined/src/mmu/tlbpriority.sv @@ -41,8 +41,9 @@ module tlbpriority #(parameter ENTRIES = 8) ( genvar i; generate assign nolower[0] = 1; - for (i=1; i 0) assign rxfullbit[i] = ((rxfifohead==i) | rxfullbit[i-1]) & (rxfifotail != i); From 39fa84efdd3c2e0ad22502f6b56575fd5fe016a8 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 4 Jul 2021 18:56:30 -0400 Subject: [PATCH 26/30] Renamed Funct3ToLSU/fromLSU -> SizeToLSU/FromLSU and simplified size muxing in lsuArb --- wally-pipelined/src/lsu/lsu.sv | 12 ++++++------ wally-pipelined/src/lsu/lsuArb.sv | 10 +++------- wally-pipelined/src/wally/wallypipelinedhart.sv | 8 ++++---- 3 files changed, 13 insertions(+), 17 deletions(-) diff --git a/wally-pipelined/src/lsu/lsu.sv b/wally-pipelined/src/lsu/lsu.sv index 8d4df6ec0..a2bcf52bb 100644 --- a/wally-pipelined/src/lsu/lsu.sv +++ b/wally-pipelined/src/lsu/lsu.sv @@ -64,7 +64,7 @@ module lsu ( output logic [1:0] AtomicMaskedM, input logic MemAckW, // from ahb input logic [`XLEN-1:0] HRDATAW, // from ahb - output logic [2:0] Funct3MfromLSU, + output logic [2:0] SizeFromLSU, output logic StallWfromLSU, @@ -132,7 +132,7 @@ module lsu ( logic MMUTranslate; logic HPTWRead; logic [1:0] MemRWMtoLSU; - logic [2:0] Funct3MtoLSU; + logic [2:0] SizeToLSU; logic [1:0] AtomicMtoLSU; logic [`XLEN-1:0] MemAdrMtoLSU; logic [`XLEN-1:0] WriteDataMtoLSU; @@ -204,7 +204,7 @@ module lsu ( // LSU .DisableTranslation(DisableTranslation), .MemRWMtoLSU(MemRWMtoLSU), - .Funct3MtoLSU(Funct3MtoLSU), + .SizeToLSU(SizeToLSU), .AtomicMtoLSU(AtomicMtoLSU), .MemAdrMtoLSU(MemAdrMtoLSU), .WriteDataMtoLSU(WriteDataMtoLSU), // *** ?????????????? @@ -220,7 +220,7 @@ module lsu ( mmu #(.TLB_ENTRIES(`DTLB_ENTRIES), .IMMU(0)) dmmu(.TLBAccessType(MemRWMtoLSU), .VirtualAddress(MemAdrMtoLSU), - .Size(Funct3MtoLSU[1:0]), + .Size(SizeToLSU[1:0]), .PTEWriteVal(PageTableEntryM), .PageTypeWriteVal(PageTypeM), .TLBWrite(DTLBWriteM), @@ -244,7 +244,7 @@ module lsu ( // Determine if an Unaligned access is taking place always_comb - case(Funct3MtoLSU[1:0]) + case(SizeToLSU[1:0]) 2'b00: DataMisalignedMfromLSU = 0; // lb, sb, lbu 2'b01: DataMisalignedMfromLSU = MemAdrMtoLSU[0]; // lh, sh, lhu 2'b10: DataMisalignedMfromLSU = MemAdrMtoLSU[1] | MemAdrMtoLSU[0]; // lw, sw, flw, fsw, lwu @@ -400,7 +400,7 @@ module lsu ( end // always_comb // *** for now just pass through size - assign Funct3MfromLSU = Funct3MtoLSU; + assign SizeFromLSU = SizeToLSU; assign StallWfromLSU = StallWtoLSU; diff --git a/wally-pipelined/src/lsu/lsuArb.sv b/wally-pipelined/src/lsu/lsuArb.sv index dc77ec9df..23e88970f 100644 --- a/wally-pipelined/src/lsu/lsuArb.sv +++ b/wally-pipelined/src/lsu/lsuArb.sv @@ -54,7 +54,7 @@ module lsuArb // to LSU output logic DisableTranslation, output logic [1:0] MemRWMtoLSU, - output logic [2:0] Funct3MtoLSU, + output logic [2:0] SizeToLSU, output logic [1:0] AtomicMtoLSU, output logic [`XLEN-1:0] MemAdrMtoLSU, output logic [`XLEN-1:0] WriteDataMtoLSU, @@ -87,6 +87,7 @@ module lsuArb statetype CurrState, NextState; logic SelPTW; logic HPTWStallD; + logic [2:0] PTWSize; flopenl #(.TYPE(statetype)) StateReg(.clk(clk), @@ -139,13 +140,8 @@ module lsuArb generate assign PTWSize = (`XLEN==32 ? 3'b010 : 3'b011); // 32 or 64-bit access from htpw - /* if (`XLEN == 32) begin - assign Funct3MtoLSU = SelPTW ? 3'b010 : Funct3M; - end else begin - assign Funct3MtoLSU = SelPTW ? 3'b011 : Funct3M; - end*/ endgenerate - mux2 sizemux(Funct3M, PTWSize, SelPTW, Funct3MtoLSU); + mux2 #(3) sizemux(Funct3M, PTWSize, SelPTW, SizeToLSU); assign AtomicMtoLSU = SelPTW ? 2'b00 : AtomicM; assign MemAdrMtoLSU = SelPTW ? HPTWPAdr : MemAdrM; diff --git a/wally-pipelined/src/wally/wallypipelinedhart.sv b/wally-pipelined/src/wally/wallypipelinedhart.sv index 1c44565f9..f18d5af43 100644 --- a/wally-pipelined/src/wally/wallypipelinedhart.sv +++ b/wally-pipelined/src/wally/wallypipelinedhart.sv @@ -159,7 +159,7 @@ module wallypipelinedhart // IEU vs HPTW arbitration signals to send to LSU logic [1:0] MemRWMtoLSU; - logic [2:0] Funct3MtoLSU; + logic [2:0] SizeToLSU; logic [1:0] AtomicMtoLSU; logic [`XLEN-1:0] MemAdrMtoLSU; logic [`XLEN-1:0] WriteDataMtoLSU; @@ -169,7 +169,7 @@ module wallypipelinedhart logic DataMisalignedMfromLSU; logic StallWtoLSU; logic StallWfromLSU; - logic [2:0] Funct3MfromLSU; + logic [2:0] SizeFromLSU; ifu ifu(.InstrInF(InstrRData), @@ -207,7 +207,7 @@ module wallypipelinedhart .AtomicMaskedM(AtomicMaskedM), .MemAckW(MemAckW), .HRDATAW(HRDATAW), - .Funct3MfromLSU(Funct3MfromLSU), // stays the same + .SizeFromLSU(SizeFromLSU), // stays the same .StallWfromLSU(StallWfromLSU), // stays the same .DSquashBusAccessM(DSquashBusAccessM), // probalby removed after dcache implemenation? // currently not connected (but will need to be used for lsu talking to ahb. @@ -261,7 +261,7 @@ module wallypipelinedhart //.InstrRData(InstrF), // hook up InstrF later .ISquashBusAccessF(1'b0), // *** temporary hack to disable PMP instruction fetch checking .WriteDataM(WriteDataM), - .MemSizeM(Funct3MfromLSU[1:0]), .UnsignedLoadM(Funct3MfromLSU[2]), + .MemSizeM(SizeFromLSU[1:0]), .UnsignedLoadM(SizeFromLSU[2]), .Funct7M(InstrM[31:25]), .HRDATAW(HRDATAW), .StallW(StallWfromLSU), From ce3edd028895f0a1f2ef016c4e02fb0f5d7adeea Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 4 Jul 2021 19:02:56 -0400 Subject: [PATCH 27/30] Renamed Funct3ToLSU/fromLSU -> SizeToLSU/FromLSU and simplified size muxing in lsuArb --- wally-pipelined/src/uncore/plic.sv | 42 +++++++++++++++++------------- 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/wally-pipelined/src/uncore/plic.sv b/wally-pipelined/src/uncore/plic.sv index dc50eb4f1..70c72189f 100644 --- a/wally-pipelined/src/uncore/plic.sv +++ b/wally-pipelined/src/uncore/plic.sv @@ -164,27 +164,31 @@ module plic ( flopr #(N) intPendingFlop(HCLK,~HRESETn,nextIntPending,intPending); // pending array - indexed by priority_lvl x source_ID - genvar i; + genvar i, j; generate - for (i=1; i<=N; i=i+1) begin - // *** make sure that this synthesizes into N decoders, not 7*N 3-bit equality comparators (right?) - assign pendingArray[7][i] = (intPriority[i]==7) & intEn[i] & intPending[i]; - assign pendingArray[6][i] = (intPriority[i]==6) & intEn[i] & intPending[i]; - assign pendingArray[5][i] = (intPriority[i]==5) & intEn[i] & intPending[i]; - assign pendingArray[4][i] = (intPriority[i]==4) & intEn[i] & intPending[i]; - assign pendingArray[3][i] = (intPriority[i]==3) & intEn[i] & intPending[i]; - assign pendingArray[2][i] = (intPriority[i]==2) & intEn[i] & intPending[i]; - assign pendingArray[1][i] = (intPriority[i]==1) & intEn[i] & intPending[i]; + for (j=1; j<=7; j++) begin: pending + for (i=1; i<=N; i=i+1) begin: pendingbit + // *** make sure that this synthesizes into N decoders, not 7*N 3-bit equality comparators (right?) + assign pendingArray[j][i] = (intPriority[i]==j) & intEn[i] & intPending[i]; +/* assign pendingArray[6][i] = (intPriority[i]==6) & intEn[i] & intPending[i]; + assign pendingArray[5][i] = (intPriority[i]==5) & intEn[i] & intPending[i]; + assign pendingArray[4][i] = (intPriority[i]==4) & intEn[i] & intPending[i]; + assign pendingArray[3][i] = (intPriority[i]==3) & intEn[i] & intPending[i]; + assign pendingArray[2][i] = (intPriority[i]==2) & intEn[i] & intPending[i]; + assign pendingArray[1][i] = (intPriority[i]==1) & intEn[i] & intPending[i]; */ + end end endgenerate // pending array, except grouped by priority - assign pendingPGrouped[7:1] = {|pendingArray[7], +/* assign pendingPGrouped[7:1] = {|pendingArray[7], |pendingArray[6], |pendingArray[5], |pendingArray[4], |pendingArray[3], |pendingArray[2], - |pendingArray[1]}; + |pendingArray[1]}; */ + assign pendingPGrouped = pendingArray.or; + // pendingPGrouped, except only topmost priority is active assign pendingMaxP[7:1] = {pendingPGrouped[7], pendingPGrouped[6] & ~|pendingPGrouped[7], @@ -202,14 +206,16 @@ module plic ( | ({N{pendingMaxP[2]}} & pendingArray[2]) | ({N{pendingMaxP[1]}} & pendingArray[1]); // find the lowest ID amongst active interrupts at the highest priority - integer j; + genvar k; // *** verify that this synthesizes to a reasonable priority encoder and that j doesn't actually exist in hardware - always_comb begin - intClaim = 6'b0; - for(j=N; j>0; j=j-1) begin - if(pendingRequestsAtMaxP[j]) intClaim = j[5:0]; + generate + always_comb begin + intClaim = 6'b0; + for(k=N; k>0; k=k-1) begin:priorityenc + if(pendingRequestsAtMaxP[k]) intClaim = k; + end end - end + endgenerate // create threshold mask always_comb begin From bbbc1d2f89df8719eaa227e757325a576b5dcda4 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 4 Jul 2021 19:17:15 -0400 Subject: [PATCH 28/30] Simplified PLIC with generate --- wally-pipelined/src/uncore/plic.sv | 44 ++++++++++++------------------ 1 file changed, 18 insertions(+), 26 deletions(-) diff --git a/wally-pipelined/src/uncore/plic.sv b/wally-pipelined/src/uncore/plic.sv index 70c72189f..ef7ecdd5b 100644 --- a/wally-pipelined/src/uncore/plic.sv +++ b/wally-pipelined/src/uncore/plic.sv @@ -170,24 +170,18 @@ module plic ( for (i=1; i<=N; i=i+1) begin: pendingbit // *** make sure that this synthesizes into N decoders, not 7*N 3-bit equality comparators (right?) assign pendingArray[j][i] = (intPriority[i]==j) & intEn[i] & intPending[i]; -/* assign pendingArray[6][i] = (intPriority[i]==6) & intEn[i] & intPending[i]; - assign pendingArray[5][i] = (intPriority[i]==5) & intEn[i] & intPending[i]; - assign pendingArray[4][i] = (intPriority[i]==4) & intEn[i] & intPending[i]; - assign pendingArray[3][i] = (intPriority[i]==3) & intEn[i] & intPending[i]; - assign pendingArray[2][i] = (intPriority[i]==2) & intEn[i] & intPending[i]; - assign pendingArray[1][i] = (intPriority[i]==1) & intEn[i] & intPending[i]; */ end end endgenerate // pending array, except grouped by priority -/* assign pendingPGrouped[7:1] = {|pendingArray[7], + assign pendingPGrouped[7:1] = {|pendingArray[7], |pendingArray[6], |pendingArray[5], |pendingArray[4], |pendingArray[3], |pendingArray[2], - |pendingArray[1]}; */ - assign pendingPGrouped = pendingArray.or; + |pendingArray[1]}; + //assign pendingPGrouped = pendingArray.or; // pendingPGrouped, except only topmost priority is active assign pendingMaxP[7:1] = {pendingPGrouped[7], @@ -206,26 +200,24 @@ module plic ( | ({N{pendingMaxP[2]}} & pendingArray[2]) | ({N{pendingMaxP[1]}} & pendingArray[1]); // find the lowest ID amongst active interrupts at the highest priority - genvar k; - // *** verify that this synthesizes to a reasonable priority encoder and that j doesn't actually exist in hardware - generate - always_comb begin - intClaim = 6'b0; - for(k=N; k>0; k=k-1) begin:priorityenc - if(pendingRequestsAtMaxP[k]) intClaim = k; - end + int k; + // *** verify that this synthesizes to a reasonable priority encoder and that k doesn't actually exist in hardware + always_comb begin + intClaim = 6'b0; + for(k=N; k>0; k=k-1) begin + if(pendingRequestsAtMaxP[k]) intClaim = k[5:0]; end - endgenerate + end // create threshold mask - always_comb begin - threshMask[7] = ~(7==intThreshold); - threshMask[6] = ~(6==intThreshold) & threshMask[7]; - threshMask[5] = ~(5==intThreshold) & threshMask[6]; - threshMask[4] = ~(4==intThreshold) & threshMask[5]; - threshMask[3] = ~(3==intThreshold) & threshMask[4]; - threshMask[2] = ~(2==intThreshold) & threshMask[3]; - threshMask[1] = ~(1==intThreshold) & threshMask[2]; + always_comb begin + threshMask[7] = (intThreshold != 7); + threshMask[6] = (intThreshold != 6) & threshMask[7]; + threshMask[5] = (intThreshold != 5) & threshMask[6]; + threshMask[4] = (intThreshold != 4) & threshMask[5]; + threshMask[3] = (intThreshold != 3) & threshMask[4]; + threshMask[2] = (intThreshold != 2) & threshMask[3]; + threshMask[1] = (intThreshold != 1) & threshMask[2]; end // is the max priority > threshold? // *** would it be any better to first priority encode maxPriority into binary and then ">" with threshold? From b8b7fab02bdf0043983486fa6175f8966cad701f Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 4 Jul 2021 19:33:46 -0400 Subject: [PATCH 29/30] Fixed disabling MulDiv when not supported. Started adding generate for FPU unsupported --- wally-pipelined/src/fpu/fpu.sv | 678 ++++++++++++++------------- wally-pipelined/src/muldiv/muldiv.sv | 4 +- 2 files changed, 351 insertions(+), 331 deletions(-) diff --git a/wally-pipelined/src/fpu/fpu.sv b/wally-pipelined/src/fpu/fpu.sv index ff29dfd70..3b1a4ed18 100755 --- a/wally-pipelined/src/fpu/fpu.sv +++ b/wally-pipelined/src/fpu/fpu.sv @@ -43,90 +43,94 @@ module fpu ( output logic [4:0] SetFflagsM, // FPU flags output logic [`XLEN-1:0] FPUResultW); // FPU result // *** change FMA to do 16 - 32 - 64 - 128 FEXPBITS - // control logic signal instantiation - logic FWriteEnD, FWriteEnE, FWriteEnM, FWriteEnW; // FP register write enable - logic [2:0] FrmD, FrmE, FrmM; // FP rounding mode - logic FmtD, FmtE, FmtM, FmtW; // FP precision 0-single 1-double - logic FDivStartD, FDivStartE; // Start division - logic FWriteIntD; // Write to integer register - logic [1:0] ForwardXE, ForwardYE, ForwardZE; // Input3 forwarding mux control signal - logic [2:0] FResultSelD, FResultSelE, FResultSelM, FResultSelW; // Select FP result - logic [3:0] FOpCtrlD, FOpCtrlE, FOpCtrlM; // Select which opperation to do in each component - logic [1:0] FResSelD, FResSelE, FResSelM; - logic [1:0] FIntResSelD, FIntResSelE, FIntResSelM; - logic [4:0] Adr1E, Adr2E, Adr3E; - - // regfile signals - logic [4:0] RdE, RdM, RdW; // what adress to write to // ***Can take from ieu insted of pipelining - logic [63:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage - logic [63:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage - logic [`XLEN-1:0] SrcXMAligned; - logic [63:0] SrcXE, SrcXM; // Input 1 to the various units (after forwarding) - logic [63:0] SrcYE, SrcYM; // Input 2 to the various units (after forwarding) - logic [63:0] SrcZE, SrcZM; // Input 3 to the various units (after forwarding) - - // div/sqrt signals - logic [63:0] FDivResultM, FDivResultW; - logic [4:0] FDivSqrtFlgM, FDivSqrtFlgW; - logic FDivSqrtDoneE; - logic [63:0] DivInput1E, DivInput2E; - logic HoldInputs; // keep forwarded inputs arround durring division - - // FMA signals - logic [105:0] ProdManE, ProdManM; ///*** put pipline stages in units - logic [161:0] AlignedAddendE, AlignedAddendM; - logic [12:0] ProdExpE, ProdExpM; - logic AddendStickyE, AddendStickyM; - logic KillProdE, KillProdM; - logic XZeroE, YZeroE, ZZeroE, XZeroM, YZeroM, ZZeroM; - logic XInfE, YInfE, ZInfE, XInfM, YInfM, ZInfM; - logic XNaNE, YNaNE, ZNaNE, XNaNM, YNaNM, ZNaNM; - logic [63:0] FMAResM, FMAResW; - logic [4:0] FMAFlgM, FMAFlgW; - // add/cvt signals - logic [63:0] AddSumE, AddSumM; - logic [63:0] AddSumTcE, AddSumTcM; - logic [3:0] AddSelInvE, AddSelInvM; - logic [10:0] AddExpPostSumE,AddExpPostSumM; - logic AddCorrSignE, AddCorrSignM; - logic AddOp1NormE, AddOp1NormM; - logic AddOp2NormE, AddOp2NormM; - logic AddOpANormE, AddOpANormM; - logic AddOpBNormE, AddOpBNormM; - logic AddInvalidE, AddInvalidM; - logic AddDenormInE, AddDenormInM; - logic AddSwapE, AddSwapM; - logic AddNormOvflowE, AddNormOvflowM; //***this isn't used in addcvt2 - logic AddSignAE, AddSignAM; - logic AddConvertE, AddConvertM; - logic [63:0] AddFloat1E, AddFloat2E, AddFloat1M, AddFloat2M; - logic [11:0] AddExp1DenormE, AddExp2DenormE, AddExp1DenormM, AddExp2DenormM; - logic [10:0] AddExponentE, AddExponentM; - logic [63:0] FAddResM, FAddResW; - logic [4:0] FAddFlgM, FAddFlgW; - - // cmp signals - logic CmpNVE, CmpNVM, CmpNVW; - logic [63:0] CmpResE, CmpResM, CmpResW; - - // fsgn signals - logic [63:0] SgnResE, SgnResM; - logic SgnNVE, SgnNVM, SgnNVW; - logic [63:0] FResM, FResW; - logic FFlgM, FFlgW; - - // instantiation of W stage regfile signals - logic [63:0] AlignedSrcAM; - - // classify signals - logic [63:0] ClassResE, ClassResM; - - // 64-bit FPU result - logic [63:0] FPUResult64W; - logic [4:0] FPUFlagsW; - - + /*generate + if (`F_SUPPORTED) begin */ + + // control logic signal instantiation + logic FWriteEnD, FWriteEnE, FWriteEnM, FWriteEnW; // FP register write enable + logic [2:0] FrmD, FrmE, FrmM; // FP rounding mode + logic FmtD, FmtE, FmtM, FmtW; // FP precision 0-single 1-double + logic FDivStartD, FDivStartE; // Start division + logic FWriteIntD; // Write to integer register + logic [1:0] ForwardXE, ForwardYE, ForwardZE; // Input3 forwarding mux control signal + logic [2:0] FResultSelD, FResultSelE, FResultSelM, FResultSelW; // Select FP result + logic [3:0] FOpCtrlD, FOpCtrlE, FOpCtrlM; // Select which opperation to do in each component + logic [1:0] FResSelD, FResSelE, FResSelM; + logic [1:0] FIntResSelD, FIntResSelE, FIntResSelM; + logic [4:0] Adr1E, Adr2E, Adr3E; + + // regfile signals + logic [4:0] RdE, RdM, RdW; // what adress to write to // ***Can take from ieu insted of pipelining + logic [63:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage + logic [63:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage + logic [`XLEN-1:0] SrcXMAligned; + logic [63:0] SrcXE, SrcXM; // Input 1 to the various units (after forwarding) + logic [63:0] SrcYE, SrcYM; // Input 2 to the various units (after forwarding) + logic [63:0] SrcZE, SrcZM; // Input 3 to the various units (after forwarding) + + // div/sqrt signals + logic [63:0] FDivResultM, FDivResultW; + logic [4:0] FDivSqrtFlgM, FDivSqrtFlgW; + logic FDivSqrtDoneE; + logic [63:0] DivInput1E, DivInput2E; + logic HoldInputs; // keep forwarded inputs arround durring division + + // FMA signals + logic [105:0] ProdManE, ProdManM; ///*** put pipline stages in units + logic [161:0] AlignedAddendE, AlignedAddendM; + logic [12:0] ProdExpE, ProdExpM; + logic AddendStickyE, AddendStickyM; + logic KillProdE, KillProdM; + logic XZeroE, YZeroE, ZZeroE, XZeroM, YZeroM, ZZeroM; + logic XInfE, YInfE, ZInfE, XInfM, YInfM, ZInfM; + logic XNaNE, YNaNE, ZNaNE, XNaNM, YNaNM, ZNaNM; + logic [63:0] FMAResM, FMAResW; + logic [4:0] FMAFlgM, FMAFlgW; + + // add/cvt signals + logic [63:0] AddSumE, AddSumM; + logic [63:0] AddSumTcE, AddSumTcM; + logic [3:0] AddSelInvE, AddSelInvM; + logic [10:0] AddExpPostSumE,AddExpPostSumM; + logic AddCorrSignE, AddCorrSignM; + logic AddOp1NormE, AddOp1NormM; + logic AddOp2NormE, AddOp2NormM; + logic AddOpANormE, AddOpANormM; + logic AddOpBNormE, AddOpBNormM; + logic AddInvalidE, AddInvalidM; + logic AddDenormInE, AddDenormInM; + logic AddSwapE, AddSwapM; + logic AddNormOvflowE, AddNormOvflowM; //***this isn't used in addcvt2 + logic AddSignAE, AddSignAM; + logic AddConvertE, AddConvertM; + logic [63:0] AddFloat1E, AddFloat2E, AddFloat1M, AddFloat2M; + logic [11:0] AddExp1DenormE, AddExp2DenormE, AddExp1DenormM, AddExp2DenormM; + logic [10:0] AddExponentE, AddExponentM; + logic [63:0] FAddResM, FAddResW; + logic [4:0] FAddFlgM, FAddFlgW; + + // cmp signals + logic CmpNVE, CmpNVM, CmpNVW; + logic [63:0] CmpResE, CmpResM, CmpResW; + + // fsgn signals + logic [63:0] SgnResE, SgnResM; + logic SgnNVE, SgnNVM, SgnNVW; + logic [63:0] FResM, FResW; + logic FFlgM, FFlgW; + + // instantiation of W stage regfile signals + logic [63:0] AlignedSrcAM; + + // classify signals + logic [63:0] ClassResE, ClassResM; + + // 64-bit FPU result + logic [63:0] FPUResult64W; + logic [4:0] FPUFlagsW; + + @@ -134,189 +138,19 @@ module fpu ( - //DECODE STAGE - - - // top-level controller for FPU - fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), - .FRM_REGW, .IllegalFPUInstrD, .FWriteEnD, .FDivStartD, .FResultSelD, .FOpCtrlD, .FResSelD, - .FIntResSelD, .FmtD, .FrmD, .FWriteIntD); - - // regfile instantiation - fregfile fregfile (clk, reset, FWriteEnW, - InstrD[19:15], InstrD[24:20], InstrD[31:27], RdW, - FPUResult64W, - FRD1D, FRD2D, FRD3D); - - - - - - - - - - //***************** - // D/E pipe registers - //***************** - flopenrc #(64) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E); - flopenrc #(64) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E); - flopenrc #(64) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E); - flopenrc #(1) DECtrlRegE1(clk, reset, FlushE, ~StallE, FDivStartD, FDivStartE); - flopenrc #(15) DECtrlRegE2(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]}, - {Adr1E, Adr2E, Adr3E}); - flopenrc #(22) DECtrlReg3(clk, reset, FlushE, ~StallE, - {FWriteEnD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, InstrD[11:7], FOpCtrlD, FWriteIntD}, - {FWriteEnE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, RdE, FOpCtrlE, FWriteIntE}); - - - - - - - - - - - - - - - //EXECUTION STAGE - - // Hazard unit for FPU - fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FWriteEnM, .FWriteEnW, .RdM, .RdW, .FResultSelM, .FStallD, - .ForwardXE, .ForwardYE, .ForwardZE); - - // forwarding muxs - mux3 #(64) fxemux(FRD1E, FPUResult64W, FResM, ForwardXE, SrcXE); - mux3 #(64) fyemux(FRD2E, FPUResult64W, FResM, ForwardYE, SrcYE); - mux3 #(64) fzemux(FRD3E, FPUResult64W, FResM, ForwardZE, SrcZE); - - - // first of two-stage instance of floating-point fused multiply-add unit - fma1 fma1 (.X(SrcXE), .Y(SrcYE), .Z(SrcZE), .FOpCtrlE(FOpCtrlE[2:0]), .FmtE, .ProdManE, .AlignedAddendE, - .ProdExpE, .AddendStickyE, .KillProdE, .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE, - .XNaNE, .YNaNE, .ZNaNE ); - - // first and only instance of floating-point divider - logic fpdivClk; - - clockgater fpdivclkg(.E(FDivStartE), - .SE(1'b0), - .CLK(clk), - .ECLK(fpdivClk)); - - // capture the inputs for div/sqrt - flopenrc #(64) reg_input1 (.d(SrcXE), .q(DivInput1E), - .en(~HoldInputs), .clear(FDivSqrtDoneE), - .reset(reset), .clk(clk)); - flopenrc #(64) reg_input2 (.d(SrcYE), .q(DivInput2E), - .en(~HoldInputs), .clear(FDivSqrtDoneE), - .reset(reset), .clk(clk)); - - fdivsqrt fdivsqrt (.DivOpType(FOpCtrlE[0]), .clk(fpdivClk), .FmtE(~FmtE), .DivInput1E, .DivInput2E, - .FrmE, .DivOvEn(1'b1), .DivUnEn(1'b1), .FDivStartE, .FDivResultM, .FDivSqrtFlgM, - .FDivSqrtDoneE, .FDivBusyE, .HoldInputs, .reset); - - - - // first of two-stage instance of floating-point add/cvt unit - fpuaddcvt1 fpadd1 (.SrcXE, .SrcYE, .FOpCtrlE, .FmtE, .AddFloat1E, .AddFloat2E, .AddExponentE, - .AddExpPostSumE, .AddExp1DenormE, .AddExp2DenormE, .AddSumE, .AddSumTcE, .AddSelInvE, - .AddCorrSignE, .AddSignAE, .AddOp1NormE, .AddOp2NormE, .AddOpANormE, .AddOpBNormE, .AddInvalidE, - .AddDenormInE, .AddConvertE, .AddSwapE, .AddNormOvflowE); - - // first and only instance of floating-point comparator - fcmp fcmp (SrcXE, SrcYE, FOpCtrlE[2:0], FmtE, CmpNVE, CmpResE); - - // first and only instance of floating-point sign converter - fsgn fsgn (.SgnOpCodeE(FOpCtrlE[1:0]), .SrcXE, .SrcYE, .SgnResE, .SgnNVE); - - // first and only instance of floating-point classify unit - fclassify fclassify (.SrcXE, .FmtE, .ClassResE); - - // output for store instructions - assign FWriteDataE = FmtE ? SrcYE[63:64-`XLEN] : {{`XLEN-32{1'b0}}, SrcYE[63:32]}; - //***swap to mux - - - - - - - - - - - //***************** - // E/M pipe registers - //***************** - flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, SrcXE, SrcXM); - flopenrc #(64) EMFpReg2(clk, reset, FlushM, ~StallM, SrcYE, SrcYM); - flopenrc #(64) EMFpReg3(clk, reset, FlushM, ~StallM, SrcZE, SrcZM); - - flopenrc #(106) EMRegFma1(clk, reset, FlushM, ~StallM, ProdManE, ProdManM); - flopenrc #(162) EMRegFma2(clk, reset, FlushM, ~StallM, AlignedAddendE, AlignedAddendM); - flopenrc #(13) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM); - flopenrc #(11) EMRegFma4(clk, reset, FlushM, ~StallM, - {AddendStickyE, KillProdE, XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE}, - {AddendStickyM, KillProdM, XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM}); - - flopenrc #(64) EMRegAdd1(clk, reset, FlushM, ~StallM, AddSumE, AddSumM); - flopenrc #(64) EMRegAdd2(clk, reset, FlushM, ~StallM, AddSumTcE, AddSumTcM); - flopenrc #(11) EMRegAdd3(clk, reset, FlushM, ~StallM, AddExpPostSumE, AddExpPostSumM); - flopenrc #(64) EMRegAdd4(clk, reset, FlushM, ~StallM, AddFloat1E, AddFloat1M); - flopenrc #(64) EMRegAdd5(clk, reset, FlushM, ~StallM, AddFloat2E, AddFloat2M); - flopenrc #(12) EMRegAdd6(clk, reset, FlushM, ~StallM, AddExp1DenormE, AddExp1DenormM); - flopenrc #(12) EMRegAdd7(clk, reset, FlushM, ~StallM, AddExp2DenormE, AddExp2DenormM); - flopenrc #(11) EMRegAdd8(clk, reset, FlushM, ~StallM, AddExponentE, AddExponentM); - flopenrc #(15) EMRegAdd9(clk, reset, FlushM, ~StallM, - {AddSelInvE, AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE, AddDenormInE, AddConvertE, AddSwapE, AddNormOvflowE, AddSignAE}, - {AddSelInvM, AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM, AddDenormInM, AddConvertM, AddSwapM, AddNormOvflowM, AddSignAM}); - - flopenrc #(1) EMRegCmp1(clk, reset, FlushM, ~StallM, CmpNVE, CmpNVM); - flopenrc #(64) EMRegCmp2(clk, reset, FlushM, ~StallM, CmpResE, CmpResM); - - flopenrc #(64) EMRegSgn1(clk, reset, FlushM, ~StallM, SgnResE, SgnResM); - flopenrc #(1) EMRegSgn2(clk, reset, FlushM, ~StallM, SgnNVE, SgnNVM); - - flopenrc #(22) EMCtrlReg(clk, reset, FlushM, ~StallM, - {FWriteEnE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, RdE, FOpCtrlE, FWriteIntE}, - {FWriteEnM, FResultSelM, FResSelM, FIntResSelM, FrmM, FmtM, RdM, FOpCtrlM, FWriteIntM}); - - flopenrc #(64) EMRegClass(clk, reset, FlushM, ~StallM, ClassResE, ClassResM); - - - - - - - - - //BEGIN MEMORY STAGE - - mux3 #(64) FResMux(AlignedSrcAM, SgnResM, CmpResM, FResSelM, FResM); - mux3 #(1) FFlgMux(1'b0, SgnNVM, CmpNVM, FResSelM, FFlgM); - - //***change to mux - assign SrcXMAligned = FmtM ? SrcXM[63:64-`XLEN] : {{`XLEN-32{1'b0}}, SrcXM[63:32]}; - mux3 #(`XLEN) IntResMux(CmpResM[`XLEN-1:0], SrcXMAligned, ClassResM[`XLEN-1:0], FIntResSelM, FIntResM); - - // second instance of two-stage FMA unit - fma2 fma2(.X(SrcXM), .Y(SrcYM), .Z(SrcZM), .FOpCtrlM(FOpCtrlM[2:0]), .FrmM, .FmtM, - .ProdManM, .AlignedAddendM, .ProdExpM, .AddendStickyM, .KillProdM, - .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .ZInfM, .XNaNM, .YNaNM, .ZNaNM, - .FMAResM, .FMAFlgM); - - // second instance of two-stage floating-point add/cvt unit - fpuaddcvt2 fpadd2 (.FrmM, .FOpCtrlM, .FmtM, .AddSumM, .AddSumTcM, .AddFloat1M, .AddFloat2M, - .AddExp1DenormM, .AddExp2DenormM, .AddExponentM, .AddExpPostSumM, .AddSelInvM, - .AddOp1NormM, .AddOp2NormM, .AddOpANormM, .AddOpBNormM, .AddInvalidM, .AddDenormInM, - .AddSignAM, .AddCorrSignM, .AddConvertM, .AddSwapM, .FAddResM, .FAddFlgM); - - // Align SrcA to MSB when single precicion - mux2 #(64) SrcAMux({SrcAM[31:0], 32'b0}, {{64-`XLEN{1'b0}}, SrcAM}, FmtM, AlignedSrcAM); + //DECODE STAGE + + + // top-level controller for FPU + fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), + .FRM_REGW, .IllegalFPUInstrD, .FWriteEnD, .FDivStartD, .FResultSelD, .FOpCtrlD, .FResSelD, + .FIntResSelD, .FmtD, .FrmD, .FWriteIntD); + + // regfile instantiation + fregfile fregfile (clk, reset, FWriteEnW, + InstrD[19:15], InstrD[24:20], InstrD[31:27], RdW, + FPUResult64W, + FRD1D, FRD2D, FRD3D); @@ -326,77 +160,261 @@ module fpu ( + //***************** + // D/E pipe registers + //***************** + flopenrc #(64) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E); + flopenrc #(64) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E); + flopenrc #(64) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E); + flopenrc #(1) DECtrlRegE1(clk, reset, FlushE, ~StallE, FDivStartD, FDivStartE); + flopenrc #(15) DECtrlRegE2(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]}, + {Adr1E, Adr2E, Adr3E}); + flopenrc #(22) DECtrlReg3(clk, reset, FlushE, ~StallE, + {FWriteEnD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, InstrD[11:7], FOpCtrlD, FWriteIntD}, + {FWriteEnE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, RdE, FOpCtrlE, FWriteIntE}); + + + + + + + + + + + + + + + //EXECUTION STAGE + + // Hazard unit for FPU + fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FWriteEnM, .FWriteEnW, .RdM, .RdW, .FResultSelM, .FStallD, + .ForwardXE, .ForwardYE, .ForwardZE); + + // forwarding muxs + mux3 #(64) fxemux(FRD1E, FPUResult64W, FResM, ForwardXE, SrcXE); + mux3 #(64) fyemux(FRD2E, FPUResult64W, FResM, ForwardYE, SrcYE); + mux3 #(64) fzemux(FRD3E, FPUResult64W, FResM, ForwardZE, SrcZE); + + + // first of two-stage instance of floating-point fused multiply-add unit + fma1 fma1 (.X(SrcXE), .Y(SrcYE), .Z(SrcZE), .FOpCtrlE(FOpCtrlE[2:0]), .FmtE, .ProdManE, .AlignedAddendE, + .ProdExpE, .AddendStickyE, .KillProdE, .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE, + .XNaNE, .YNaNE, .ZNaNE ); + + // first and only instance of floating-point divider + logic fpdivClk; + + clockgater fpdivclkg(.E(FDivStartE), + .SE(1'b0), + .CLK(clk), + .ECLK(fpdivClk)); + + // capture the inputs for div/sqrt + flopenrc #(64) reg_input1 (.d(SrcXE), .q(DivInput1E), + .en(~HoldInputs), .clear(FDivSqrtDoneE), + .reset(reset), .clk(clk)); + flopenrc #(64) reg_input2 (.d(SrcYE), .q(DivInput2E), + .en(~HoldInputs), .clear(FDivSqrtDoneE), + .reset(reset), .clk(clk)); + + fdivsqrt fdivsqrt (.DivOpType(FOpCtrlE[0]), .clk(fpdivClk), .FmtE(~FmtE), .DivInput1E, .DivInput2E, + .FrmE, .DivOvEn(1'b1), .DivUnEn(1'b1), .FDivStartE, .FDivResultM, .FDivSqrtFlgM, + .FDivSqrtDoneE, .FDivBusyE, .HoldInputs, .reset); + + + + // first of two-stage instance of floating-point add/cvt unit + fpuaddcvt1 fpadd1 (.SrcXE, .SrcYE, .FOpCtrlE, .FmtE, .AddFloat1E, .AddFloat2E, .AddExponentE, + .AddExpPostSumE, .AddExp1DenormE, .AddExp2DenormE, .AddSumE, .AddSumTcE, .AddSelInvE, + .AddCorrSignE, .AddSignAE, .AddOp1NormE, .AddOp2NormE, .AddOpANormE, .AddOpBNormE, .AddInvalidE, + .AddDenormInE, .AddConvertE, .AddSwapE, .AddNormOvflowE); + + // first and only instance of floating-point comparator + fcmp fcmp (SrcXE, SrcYE, FOpCtrlE[2:0], FmtE, CmpNVE, CmpResE); + + // first and only instance of floating-point sign converter + fsgn fsgn (.SgnOpCodeE(FOpCtrlE[1:0]), .SrcXE, .SrcYE, .SgnResE, .SgnNVE); + + // first and only instance of floating-point classify unit + fclassify fclassify (.SrcXE, .FmtE, .ClassResE); + + // output for store instructions + assign FWriteDataE = FmtE ? SrcYE[63:64-`XLEN] : {{`XLEN-32{1'b0}}, SrcYE[63:32]}; + //***swap to mux + + + + + + + + + + + //***************** + // E/M pipe registers + //***************** + flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, SrcXE, SrcXM); + flopenrc #(64) EMFpReg2(clk, reset, FlushM, ~StallM, SrcYE, SrcYM); + flopenrc #(64) EMFpReg3(clk, reset, FlushM, ~StallM, SrcZE, SrcZM); + + flopenrc #(106) EMRegFma1(clk, reset, FlushM, ~StallM, ProdManE, ProdManM); + flopenrc #(162) EMRegFma2(clk, reset, FlushM, ~StallM, AlignedAddendE, AlignedAddendM); + flopenrc #(13) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM); + flopenrc #(11) EMRegFma4(clk, reset, FlushM, ~StallM, + {AddendStickyE, KillProdE, XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE}, + {AddendStickyM, KillProdM, XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM}); + + flopenrc #(64) EMRegAdd1(clk, reset, FlushM, ~StallM, AddSumE, AddSumM); + flopenrc #(64) EMRegAdd2(clk, reset, FlushM, ~StallM, AddSumTcE, AddSumTcM); + flopenrc #(11) EMRegAdd3(clk, reset, FlushM, ~StallM, AddExpPostSumE, AddExpPostSumM); + flopenrc #(64) EMRegAdd4(clk, reset, FlushM, ~StallM, AddFloat1E, AddFloat1M); + flopenrc #(64) EMRegAdd5(clk, reset, FlushM, ~StallM, AddFloat2E, AddFloat2M); + flopenrc #(12) EMRegAdd6(clk, reset, FlushM, ~StallM, AddExp1DenormE, AddExp1DenormM); + flopenrc #(12) EMRegAdd7(clk, reset, FlushM, ~StallM, AddExp2DenormE, AddExp2DenormM); + flopenrc #(11) EMRegAdd8(clk, reset, FlushM, ~StallM, AddExponentE, AddExponentM); + flopenrc #(15) EMRegAdd9(clk, reset, FlushM, ~StallM, + {AddSelInvE, AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE, AddDenormInE, AddConvertE, AddSwapE, AddNormOvflowE, AddSignAE}, + {AddSelInvM, AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM, AddDenormInM, AddConvertM, AddSwapM, AddNormOvflowM, AddSignAM}); + + flopenrc #(1) EMRegCmp1(clk, reset, FlushM, ~StallM, CmpNVE, CmpNVM); + flopenrc #(64) EMRegCmp2(clk, reset, FlushM, ~StallM, CmpResE, CmpResM); + + flopenrc #(64) EMRegSgn1(clk, reset, FlushM, ~StallM, SgnResE, SgnResM); + flopenrc #(1) EMRegSgn2(clk, reset, FlushM, ~StallM, SgnNVE, SgnNVM); + + flopenrc #(22) EMCtrlReg(clk, reset, FlushM, ~StallM, + {FWriteEnE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, RdE, FOpCtrlE, FWriteIntE}, + {FWriteEnM, FResultSelM, FResSelM, FIntResSelM, FrmM, FmtM, RdM, FOpCtrlM, FWriteIntM}); + + flopenrc #(64) EMRegClass(clk, reset, FlushM, ~StallM, ClassResE, ClassResM); + + + + + + + + + //BEGIN MEMORY STAGE + + mux3 #(64) FResMux(AlignedSrcAM, SgnResM, CmpResM, FResSelM, FResM); + mux3 #(1) FFlgMux(1'b0, SgnNVM, CmpNVM, FResSelM, FFlgM); + + //***change to mux + assign SrcXMAligned = FmtM ? SrcXM[63:64-`XLEN] : {{`XLEN-32{1'b0}}, SrcXM[63:32]}; + mux3 #(`XLEN) IntResMux(CmpResM[`XLEN-1:0], SrcXMAligned, ClassResM[`XLEN-1:0], FIntResSelM, FIntResM); + + // second instance of two-stage FMA unit + fma2 fma2(.X(SrcXM), .Y(SrcYM), .Z(SrcZM), .FOpCtrlM(FOpCtrlM[2:0]), .FrmM, .FmtM, + .ProdManM, .AlignedAddendM, .ProdExpM, .AddendStickyM, .KillProdM, + .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .ZInfM, .XNaNM, .YNaNM, .ZNaNM, + .FMAResM, .FMAFlgM); + + // second instance of two-stage floating-point add/cvt unit + fpuaddcvt2 fpadd2 (.FrmM, .FOpCtrlM, .FmtM, .AddSumM, .AddSumTcM, .AddFloat1M, .AddFloat2M, + .AddExp1DenormM, .AddExp2DenormM, .AddExponentM, .AddExpPostSumM, .AddSelInvM, + .AddOp1NormM, .AddOp2NormM, .AddOpANormM, .AddOpBNormM, .AddInvalidM, .AddDenormInM, + .AddSignAM, .AddCorrSignM, .AddConvertM, .AddSwapM, .FAddResM, .FAddFlgM); + + // Align SrcA to MSB when single precicion + mux2 #(64) SrcAMux({SrcAM[31:0], 32'b0}, {{64-`XLEN{1'b0}}, SrcAM}, FmtM, AlignedSrcAM); - //***************** - // M/W pipe registers - //***************** - flopenrc #(64) MWRegFma1(clk, reset, FlushW, ~StallW, FMAResM, FMAResW); - flopenrc #(5) MWRegFma2(clk, reset, FlushW, ~StallW, FMAFlgM, FMAFlgW); - - flopenrc #(64) MWRegDiv1(clk, reset, FlushW, ~StallW, FDivResultM, FDivResultW); - flopenrc #(5) MWRegDiv2(clk, reset, FlushW, ~StallW, FDivSqrtFlgM, FDivSqrtFlgW); - - flopenrc #(64) MWRegAdd1(clk, reset, FlushW, ~StallW, FAddResM, FAddResW); - flopenrc #(5) MWRegAdd2(clk, reset, FlushW, ~StallW, FAddFlgM, FAddFlgW); - - flopenrc #(1) MWRegCmp1(clk, reset, FlushW, ~StallW, CmpNVM, CmpNVW); - flopenrc #(64) MWRegCmp3(clk, reset, FlushW, ~StallW, CmpResM, CmpResW); - - flopenrc #(64) MWRegClass2(clk, reset, FlushW, ~StallW, FResM, FResW); - flopenrc #(1) MWRegClass1(clk, reset, FlushW, ~StallW, FFlgM, FFlgW); - - flopenrc #(11) MWCtrlReg(clk, reset, FlushW, ~StallW, - {FWriteEnM, FResultSelM, RdM, FmtM, FWriteIntM}, - {FWriteEnW, FResultSelW, RdW, FmtW, FWriteIntW}); - - - - - - - //######################################### - // BEGIN WRITEBACK STAGE - //######################################### -//***turn into muxs - always_comb begin - case (FResultSelW) - 3'b000 : FPUFlagsW = 5'b0; - 3'b001 : FPUFlagsW = FMAFlgW; - 3'b010 : FPUFlagsW = FAddFlgW; - 3'b011 : FPUFlagsW = FDivSqrtFlgW; - 3'b100 : FPUFlagsW = {4'b0,FFlgW}; - default : FPUFlagsW = 5'bxxxxx; - endcase - end - - always_comb begin - case (FResultSelW) - 3'b000 : FPUResult64W = FmtW ? {ReadDataW, {64-`XLEN{1'b0}}} : {ReadDataW[31:0], 32'b0}; - 3'b001 : FPUResult64W = FMAResW; - 3'b010 : FPUResult64W = FAddResW; - 3'b011 : FPUResult64W = FDivResultW; - 3'b100 : FPUResult64W = FResW; - default : FPUResult64W = 64'bxxxxx; - endcase - end - - - // interface between XLEN size datapath and double-precision sized - // floating-point results - // - // define offsets for LSB zero extension or truncation - always_comb begin - // zero extension -//***turn into mux - FPUResultW = FmtW ? FPUResult64W[63:64-`XLEN] : {{`XLEN-32{1'b0}}, FPUResult64W[63:32]}; - //*** put into mem stage - SetFflagsM = FPUFlagsW; + + + + + //***************** + // M/W pipe registers + //***************** + flopenrc #(64) MWRegFma1(clk, reset, FlushW, ~StallW, FMAResM, FMAResW); + flopenrc #(5) MWRegFma2(clk, reset, FlushW, ~StallW, FMAFlgM, FMAFlgW); + + flopenrc #(64) MWRegDiv1(clk, reset, FlushW, ~StallW, FDivResultM, FDivResultW); + flopenrc #(5) MWRegDiv2(clk, reset, FlushW, ~StallW, FDivSqrtFlgM, FDivSqrtFlgW); + + flopenrc #(64) MWRegAdd1(clk, reset, FlushW, ~StallW, FAddResM, FAddResW); + flopenrc #(5) MWRegAdd2(clk, reset, FlushW, ~StallW, FAddFlgM, FAddFlgW); + + flopenrc #(1) MWRegCmp1(clk, reset, FlushW, ~StallW, CmpNVM, CmpNVW); + flopenrc #(64) MWRegCmp3(clk, reset, FlushW, ~StallW, CmpResM, CmpResW); + + flopenrc #(64) MWRegClass2(clk, reset, FlushW, ~StallW, FResM, FResW); + flopenrc #(1) MWRegClass1(clk, reset, FlushW, ~StallW, FFlgM, FFlgW); + + flopenrc #(11) MWCtrlReg(clk, reset, FlushW, ~StallW, + {FWriteEnM, FResultSelM, RdM, FmtM, FWriteIntM}, + {FWriteEnW, FResultSelW, RdW, FmtW, FWriteIntW}); + + + + + + + //######################################### + // BEGIN WRITEBACK STAGE + //######################################### + + + + + + //***turn into muxs + always_comb begin + case (FResultSelW) + 3'b000 : FPUFlagsW = 5'b0; + 3'b001 : FPUFlagsW = FMAFlgW; + 3'b010 : FPUFlagsW = FAddFlgW; + 3'b011 : FPUFlagsW = FDivSqrtFlgW; + 3'b100 : FPUFlagsW = {4'b0,FFlgW}; + default : FPUFlagsW = 5'bxxxxx; + endcase + end + + always_comb begin + case (FResultSelW) + 3'b000 : FPUResult64W = FmtW ? {ReadDataW, {64-`XLEN{1'b0}}} : {ReadDataW[31:0], 32'b0}; + 3'b001 : FPUResult64W = FMAResW; + 3'b010 : FPUResult64W = FAddResW; + 3'b011 : FPUResult64W = FDivResultW; + 3'b100 : FPUResult64W = FResW; + default : FPUResult64W = 64'bxxxxx; + endcase + end + + + // interface between XLEN size datapath and double-precision sized + // floating-point results + // + // define offsets for LSB zero extension or truncation + always_comb begin + // zero extension + //***turn into mux + FPUResultW = FmtW ? FPUResult64W[63:64-`XLEN] : {{`XLEN-32{1'b0}}, FPUResult64W[63:32]}; + //*** put into mem stage + SetFflagsM = FPUFlagsW; + end + + /* end else begin + assign FStallD = 0; + assign FWriteIntE = 0; + assign FWriteIntM = 0; + assign FWriteIntW = 0; + assign FWriteDataE = 0; + assign FIntResM = 0; + assign FDivBusyE = 0; + assign IllegalFPUInstrD = 0; + assign SetFflagsM = 0; + assign FPUResultW = 0; end + endgenerate*/ endmodule // fpu diff --git a/wally-pipelined/src/muldiv/muldiv.sv b/wally-pipelined/src/muldiv/muldiv.sv index e10b0c55d..7288229c5 100644 --- a/wally-pipelined/src/muldiv/muldiv.sv +++ b/wally-pipelined/src/muldiv/muldiv.sv @@ -138,7 +138,9 @@ module muldiv ( flopenrc #(`XLEN) MulDivResultWReg(clk, reset, FlushW, ~StallW, MulDivResultM, MulDivResultW); end else begin // no M instructions supported - assign MulDivResultW = 0; + assign MulDivResultW = 0; + assign DivBusyE = 0; + assign DivDoneE = 0; end endgenerate From e65fb5bb3578b7c71c21037b9d11002973af5973 Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 5 Jul 2021 10:30:46 -0400 Subject: [PATCH 30/30] Added F_SUPPORTED flag to disable floating point unit when not in MISA --- .../regression/wave-dos/peripheral-waves.do | 3 ++- wally-pipelined/src/fpu/fpu.sv | 12 +++++------- wally-pipelined/testbench/testbench-imperas.sv | 14 ++++++++------ 3 files changed, 15 insertions(+), 14 deletions(-) diff --git a/wally-pipelined/regression/wave-dos/peripheral-waves.do b/wally-pipelined/regression/wave-dos/peripheral-waves.do index 1304b40c6..a42bfbd43 100644 --- a/wally-pipelined/regression/wave-dos/peripheral-waves.do +++ b/wally-pipelined/regression/wave-dos/peripheral-waves.do @@ -9,7 +9,8 @@ add wave /testbench/clk add wave /testbench/reset add wave -divider -add wave /testbench/dut/hart/DataStall +#add wave /testbench/dut/hart/DataStall +add wave /testbench/debug add wave /testbench/dut/hart/StallF add wave /testbench/dut/hart/StallD add wave /testbench/dut/hart/StallE diff --git a/wally-pipelined/src/fpu/fpu.sv b/wally-pipelined/src/fpu/fpu.sv index 3b1a4ed18..59f5e4392 100755 --- a/wally-pipelined/src/fpu/fpu.sv +++ b/wally-pipelined/src/fpu/fpu.sv @@ -44,9 +44,8 @@ module fpu ( output logic [`XLEN-1:0] FPUResultW); // FPU result // *** change FMA to do 16 - 32 - 64 - 128 FEXPBITS - /*generate - if (`F_SUPPORTED) begin */ - + generate + if (`F_SUPPORTED) begin // control logic signal instantiation logic FWriteEnD, FWriteEnE, FWriteEnM, FWriteEnW; // FP register write enable logic [2:0] FrmD, FrmE, FrmM; // FP rounding mode @@ -401,8 +400,7 @@ module fpu ( //*** put into mem stage SetFflagsM = FPUFlagsW; end - - /* end else begin + end else begin // no F_SUPPORTED; tie outputs low assign FStallD = 0; assign FWriteIntE = 0; assign FWriteIntM = 0; @@ -410,11 +408,11 @@ module fpu ( assign FWriteDataE = 0; assign FIntResM = 0; assign FDivBusyE = 0; - assign IllegalFPUInstrD = 0; + assign IllegalFPUInstrD = 1; assign SetFflagsM = 0; assign FPUResultW = 0; end - endgenerate*/ + endgenerate endmodule // fpu diff --git a/wally-pipelined/testbench/testbench-imperas.sv b/wally-pipelined/testbench/testbench-imperas.sv index 8c3e28c3a..2cf37c17f 100644 --- a/wally-pipelined/testbench/testbench-imperas.sv +++ b/wally-pipelined/testbench/testbench-imperas.sv @@ -514,6 +514,9 @@ string tests32f[] = '{ logic HMASTLOCK; logic HCLK, HRESETn; logic [`XLEN-1:0] PCW; + + logic [`XLEN-1:0] debug; + assign debug = dut.uncore.dtim.RAM[536872960]; flopenr #(`XLEN) PCWReg(clk, reset, ~dut.hart.ieu.dp.StallW, dut.hart.ifu.PCM, PCW); flopenr #(32) InstrWReg(clk, reset, ~dut.hart.ieu.dp.StallW, dut.hart.ifu.InstrM, InstrW); @@ -656,10 +659,7 @@ string tests32f[] = '{ // Check errors errors = (i == SIGNATURESIZE+1); // error if file is empty i = 0; - if (`XLEN == 32) - testadr = (`TIM_BASE+tests[test+1].atohex())/4; - else - testadr = (`TIM_BASE+tests[test+1].atohex())/8; + testadr = (`TIM_BASE+tests[test+1].atohex())/(`XLEN/8); /* verilator lint_off INFINITELOOP */ while (signature[i] !== 'bx) begin //$display("signature[%h] = %h", i, signature[i]); @@ -669,14 +669,16 @@ string tests32f[] = '{ // kind of hacky test for garbage right now errors = errors+1; $display(" Error on test %s result %d: adr = %h sim = %h, signature = %h", - tests[test], i, (testadr+i)*`XLEN/8, dut.uncore.dtim.RAM[testadr+i], signature[i]); + tests[test], i, (testadr+i)*(`XLEN/8), dut.uncore.dtim.RAM[testadr+i], signature[i]); $stop;//***debug end end i = i + 1; end /* verilator lint_on INFINITELOOP */ - if (errors == 0) $display("%s succeeded. Brilliant!!!", tests[test]); + if (errors == 0) begin + $display("%s succeeded. Brilliant!!!", tests[test]); + end else begin $display("%s failed with %d errors. :(", tests[test], errors); totalerrors = totalerrors+1;