diff --git a/fpga/generator/Makefile b/fpga/generator/Makefile index 551e4aca..8dff84a4 100644 --- a/fpga/generator/Makefile +++ b/fpga/generator/Makefile @@ -2,7 +2,7 @@ dst := IP # vcu118 #export XILINX_PART := xcvu9p-flga2104-2L-e #export XILINX_BOARD := xilinx.com:vcu118:part0:2.4 -#export FREQ := 30 +#export board := vcu118 # vcu108 export XILINX_PART := xcvu095-ffva2104-2-e @@ -13,7 +13,7 @@ export board := vcu108 all: FPGA FPGA: IP - vivado -mode batch -source wally.tcl 2>&1 | tee wally.log + vivado -mode tcl -source wally.tcl 2>&1 | tee wally.log IP: $(dst)/xlnx_proc_sys_reset.log \ $(dst)/xlnx_ddr4-$(board).log \ diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index 756c5cc9..b3d81705 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -67,8 +67,8 @@ module fdivsqrtpreproc ( // ***can probably merge X LZC with conversion // cout the number of leading zeros - assign As = ForwardedSrcAE[`XLEN-1] & Funct3E[0]; - assign Bs = ForwardedSrcBE[`XLEN-1] & Funct3E[0]; + assign As = ForwardedSrcAE[`XLEN-1] & ~Funct3E[0]; + assign Bs = ForwardedSrcBE[`XLEN-1] & ~Funct3E[0]; assign A64 = W64E ? {{(`XLEN-32){As}}, ForwardedSrcAE[31:0]} : ForwardedSrcAE; assign B64 = W64E ? {{(`XLEN-32){Bs}}, ForwardedSrcBE[31:0]} : ForwardedSrcBE; diff --git a/pipelined/src/lsu/lsu.sv b/pipelined/src/lsu/lsu.sv index b7eb3cd0..e66fb35a 100644 --- a/pipelined/src/lsu/lsu.sv +++ b/pipelined/src/lsu/lsu.sv @@ -123,12 +123,12 @@ module lsu ( assign LSUStallM = DCacheStallM | HPTWStall | BusStall; ///////////////////////////////////////////////////////////////////////////////////////////// - // HPTW and Interlock FSM (only needed if VM supported) + // HPTW(only needed if VM supported) // MMU include PMP and is needed if any privileged supported ///////////////////////////////////////////////////////////////////////////////////////////// if(`VIRTMEM_SUPPORTED) begin : VIRTMEM_SUPPORTED - lsuvirtmem lsuvirtmem(.clk, .reset, .StallW, .MemRWM, .AtomicM, .ITLBMissF, .ITLBWriteF, + hptw hptw(.clk, .reset, .StallW, .MemRWM, .AtomicM, .ITLBMissF, .ITLBWriteF, .DTLBMissM, .DTLBWriteM, .InstrDAPageFaultF, .DataDAPageFaultM, .FlushW, .DCacheStallM, .SATP_REGW, .PCF, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, .PrivilegeModeW, diff --git a/pipelined/src/lsu/lsuvirtmen.sv b/pipelined/src/lsu/lsuvirtmen.sv deleted file mode 100644 index d1476db8..00000000 --- a/pipelined/src/lsu/lsuvirtmen.sv +++ /dev/null @@ -1,117 +0,0 @@ -/////////////////////////////////////////// -// lsuvirtmem.sv -// -// Written: Ross Thompson ross1728@gmail.com January 30, 2022 -// Modified: -// -// Purpose: Encapsulates the hptw and muxes required to support virtual memory. -// A component of the Wally configurable RISC-V project. -// -// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University -// -// MIT LICENSE -// Permission is hereby granted, free of charge, to any person obtaining a copy of this -// software and associated documentation files (the "Software"), to deal in the Software -// without restriction, including without limitation the rights to use, copy, modify, merge, -// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons -// to whom the Software is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all copies or -// substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, -// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR -// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE -// OR OTHER DEALINGS IN THE SOFTWARE. -//////////////////////////////////////////////////////////////////////////////////////////////// - -`include "wally-config.vh" - -module lsuvirtmem( - input logic clk, reset, StallW, - input logic [1:0] MemRWM, - input logic [1:0] AtomicM, - input logic ITLBMissF, - output logic ITLBWriteF, - input logic DTLBMissM, - output logic DTLBWriteM, - input logic InstrDAPageFaultF, - input logic DataDAPageFaultM, - input logic FlushW, - input logic DCacheStallM, - input logic [`XLEN-1:0] SATP_REGW, // from csr - input logic STATUS_MXR, STATUS_SUM, STATUS_MPRV, - input logic [1:0] STATUS_MPP, - input logic [1:0] PrivilegeModeW, - input logic [`XLEN-1:0] PCF, - input logic [`XLEN-1:0] ReadDataM, - input logic [`XLEN-1:0] WriteDataM, - input logic [2:0] Funct3M, - output logic [2:0] LSUFunct3M, - input logic [6:0] Funct7M, - output logic [6:0] LSUFunct7M, - output logic [`XLEN-1:0] PTE, - output logic [`XLEN-1:0] IMWriteDataM, - output logic [1:0] PageType, - output logic [1:0] PreLSURWM, - output logic [1:0] LSUAtomicM, - output logic [`XLEN+1:0] IHAdrM, - input logic [`XLEN+1:0] IEUAdrExtM, // *** can move internally. - - output logic HPTWStall, - output logic CPUBusy, - output logic SelHPTW, - output logic IgnoreRequestTLB); - - - logic AnyCPUReqM; - logic [`PA_BITS-1:0] HPTWAdr; - logic [`XLEN+1:0] HPTWAdrExt; - logic [1:0] HPTWRW; - logic [2:0] HPTWSize; - logic ITLBMissOrDAFaultF, ITLBMissOrDAFaultNoTrapF; - logic DTLBMissOrDAFaultM, DTLBMissOrDAFaultNoTrapM; - logic SelHPTWAdr; - - /// **** move to HPTW - // **** rename to walker mux? - // move all the muxes to walkermux and instantiate these in lsu under virtmem_supported. - assign ITLBMissOrDAFaultF = ITLBMissF | (`HPTW_WRITES_SUPPORTED & InstrDAPageFaultF); - assign DTLBMissOrDAFaultM = DTLBMissM | (`HPTW_WRITES_SUPPORTED & DataDAPageFaultM); - //assign ITLBMissOrDAFaultNoTrapF = ITLBMissOrDAFaultF & ~TrapM; - assign ITLBMissOrDAFaultNoTrapF = ITLBMissOrDAFaultF; - //assign DTLBMissOrDAFaultNoTrapM = DTLBMissOrDAFaultM & ~TrapM; - assign DTLBMissOrDAFaultNoTrapM = DTLBMissOrDAFaultM; - - hptw hptw( - .clk, .reset, .SATP_REGW, .PCF, .IEUAdrExtM, .MemRWM, .AtomicM, .FlushW, - .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, .PrivilegeModeW, - .ITLBMissOrDAFaultNoTrapF, .DTLBMissOrDAFaultNoTrapM, - .PTE, .PageType, .ITLBWriteF, .DTLBWriteM, .HPTWReadPTE(ReadDataM), // *** should it be HPTWReadDataM - .DCacheStallM, .HPTWAdr, .HPTWRW, .HPTWSize, .IgnoreRequestTLB, .SelHPTW, .HPTWStall); - // *** possible future optimization of simplifying page table entry with precomputed misalignment (Ross) low priority - - // Once the walk is done and it is time to update the TLB we need to switch back - // to the orignal data virtual address. - assign SelHPTWAdr = SelHPTW & ~(DTLBWriteM | ITLBWriteF); - - // multiplex the outputs to LSU - if(`XLEN+2-`PA_BITS > 0) begin - logic [(`XLEN+2-`PA_BITS)-1:0] zeros; - assign zeros = '0; - assign HPTWAdrExt = {zeros, HPTWAdr}; - end else assign HPTWAdrExt = HPTWAdr; - mux2 #(2) rwmux(MemRWM, HPTWRW, SelHPTW, PreLSURWM); - mux2 #(3) sizemux(Funct3M, HPTWSize, SelHPTW, LSUFunct3M); - mux2 #(7) funct7mux(Funct7M, 7'b0, SelHPTW, LSUFunct7M); - mux2 #(2) atomicmux(AtomicM, 2'b00, SelHPTW, LSUAtomicM); - mux2 #(`XLEN+2) lsupadrmux(IEUAdrExtM, HPTWAdrExt, SelHPTWAdr, IHAdrM); - if(`HPTW_WRITES_SUPPORTED) - mux2 #(`XLEN) lsuwritedatamux(WriteDataM, PTE, SelHPTW, IMWriteDataM); - else assign IMWriteDataM = WriteDataM; - - // always block interrupts when using the hardware page table walker. - assign CPUBusy = StallW & ~SelHPTW; -endmodule diff --git a/pipelined/src/mmu/hptw.sv b/pipelined/src/mmu/hptw.sv index 52f7e868..b06814c8 100644 --- a/pipelined/src/mmu/hptw.sv +++ b/pipelined/src/mmu/hptw.sv @@ -30,30 +30,39 @@ `include "wally-config.vh" -module hptw - ( - input logic clk, reset, - input logic [`XLEN-1:0] SATP_REGW, // includes SATP.MODE to determine number of levels in page table - input logic [`XLEN-1:0] PCF, // addresses to translate - input logic [`XLEN+1:0] IEUAdrExtM, // addresses to translate - input logic [1:0] MemRWM, AtomicM, - input logic FlushW, - // system status - input logic STATUS_MXR, STATUS_SUM, STATUS_MPRV, - input logic [1:0] STATUS_MPP, - input logic [1:0] PrivilegeModeW, - (* mark_debug = "true" *) input logic ITLBMissOrDAFaultNoTrapF, DTLBMissOrDAFaultNoTrapM, // TLB Miss - input logic [`XLEN-1:0] HPTWReadPTE, // page table entry from LSU *** change to ReadDataM - input logic DCacheStallM, // stall from LSU - output logic [`XLEN-1:0] PTE, // page table entry to TLBs - output logic [1:0] PageType, // page type to TLBs - (* mark_debug = "true" *) output logic ITLBWriteF, DTLBWriteM, // write TLB with new entry - output logic [`PA_BITS-1:0] HPTWAdr, - output logic [1:0] HPTWRW, // HPTW requesting to write or read memory - output logic [2:0] HPTWSize, // 32 or 64 bit access. - output logic IgnoreRequestTLB, - output logic SelHPTW, - output logic HPTWStall +module hptw ( + input logic clk, reset, StallW, + input logic [`XLEN-1:0] SATP_REGW, // includes SATP.MODE to determine number of levels in page table + input logic [`XLEN-1:0] PCF, // addresses to translate + input logic [`XLEN+1:0] IEUAdrExtM, // addresses to translate + input logic [1:0] MemRWM, AtomicM, + // system status + input logic STATUS_MXR, STATUS_SUM, STATUS_MPRV, + input logic [1:0] STATUS_MPP, + input logic [1:0] PrivilegeModeW, + input logic [`XLEN-1:0] ReadDataM, // page table entry from LSU + input logic [`XLEN-1:0] WriteDataM, + input logic DCacheStallM, // stall from LSU + input logic [2:0] Funct3M, + input logic [6:0] Funct7M, + input logic ITLBMissF, + input logic DTLBMissM, + input logic FlushW, + input logic InstrDAPageFaultF, + input logic DataDAPageFaultM, + output logic [`XLEN-1:0] PTE, // page table entry to TLBs + output logic [1:0] PageType, // page type to TLBs + (* mark_debug = "true" *) output logic ITLBWriteF, DTLBWriteM, // write TLB with new entry + output logic [1:0] PreLSURWM, + output logic [`XLEN+1:0] IHAdrM, + output logic [`XLEN-1:0] IMWriteDataM, + output logic [1:0] LSUAtomicM, + output logic [2:0] LSUFunct3M, + output logic [6:0] LSUFunct7M, + output logic IgnoreRequestTLB, + output logic SelHPTW, + output logic CPUBusy, + output logic HPTWStall ); typedef enum logic [3:0] {L0_ADR, L0_RD, @@ -78,20 +87,28 @@ module hptw logic UpdatePTE; logic DAPageFault; logic [`PA_BITS-1:0] HPTWReadAdr; + logic SelHPTWAdr; + logic [`XLEN+1:0] HPTWAdrExt; + logic ITLBMissOrDAFaultF; + logic DTLBMissOrDAFaultM; + logic [`PA_BITS-1:0] HPTWAdr; + logic [1:0] HPTWRW; + logic [2:0] HPTWSize; // 32 or 64 bit access. + (* mark_debug = "true" *) statetype WalkerState, NextWalkerState, InitialWalkerState; // Extract bits from CSRs and inputs assign SvMode = SATP_REGW[`XLEN-1:`XLEN-`SVMODE_BITS]; assign BasePageTablePPN = SATP_REGW[`PPN_BITS-1:0]; - assign TLBMiss = (DTLBMissOrDAFaultNoTrapM | ITLBMissOrDAFaultNoTrapF); + assign TLBMiss = (DTLBMissOrDAFaultM | ITLBMissOrDAFaultF); // Determine which address to translate assign TranslationVAdr = DTLBWalk ? IEUAdrExtM[`XLEN-1:0] : PCF; assign CurrentPPN = PTE[`PPN_BITS+9:10]; // State flops - flopenr #(1) TLBMissMReg(clk, reset, StartWalk, DTLBMissOrDAFaultNoTrapM, DTLBWalk); // when walk begins, record whether it was for DTLB (or record 0 for ITLB) + flopenr #(1) TLBMissMReg(clk, reset, StartWalk, DTLBMissOrDAFaultNoFlushW, DTLBWalk); // when walk begins, record whether it was for DTLB (or record 0 for ITLB) assign PRegEn = HPTWRW[1] & ~DCacheStallM; flopenr #(`XLEN) PTEReg(clk, reset, PRegEn | UpdatePTE, NextPTE, PTE); // Capture page table entry from data cache @@ -120,7 +137,7 @@ module hptw logic [`XLEN-1:0] AccessedPTE; assign AccessedPTE = {PTE[`XLEN-1:8], (SetDirty | PTE[7]), 1'b1, PTE[5:0]}; // set accessed bit, conditionally set dirty bit - mux2 #(`XLEN) NextPTEMux(HPTWReadPTE, AccessedPTE, UpdatePTE, NextPTE); + mux2 #(`XLEN) NextPTEMux(ReadDataM, AccessedPTE, UpdatePTE, NextPTE); flopenr #(`PA_BITS) HPTWAdrWriteReg(clk, reset, SaveHPTWAdr, HPTWReadAdr, HPTWWriteAdr); assign SaveHPTWAdr = WalkerState == L0_ADR; @@ -151,9 +168,9 @@ module hptw assign DAPageFault = ValidLeafPTE & (~Accessed | SetDirty) & ~OtherPageFault; assign HPTWRW[0] = (WalkerState == UPDATE_PTE); - assign UpdatePTE = WalkerState == LEAF & DAPageFault; + assign UpdatePTE = (WalkerState == LEAF) & DAPageFault; end else begin // block: hptwwrites - assign NextPTE = HPTWReadPTE; + assign NextPTE = ReadDataM; assign HPTWAdr = HPTWReadAdr; assign DAPageFault = '0; assign UpdatePTE = '0; @@ -256,9 +273,36 @@ module hptw assign IgnoreRequestTLB = WalkerState == IDLE & TLBMiss; assign SelHPTW = WalkerState != IDLE; assign HPTWStall = (WalkerState != IDLE) | (WalkerState == IDLE & TLBMiss); - + + + assign ITLBMissOrDAFaultF = ITLBMissF | (`HPTW_WRITES_SUPPORTED & InstrDAPageFaultF); + assign DTLBMissOrDAFaultM = DTLBMissM | (`HPTW_WRITES_SUPPORTED & DataDAPageFaultM); + + // HTPW address/data/control muxing + + // Once the walk is done and it is time to update the TLB we need to switch back + // to the orignal data virtual address. + assign SelHPTWAdr = SelHPTW & ~(DTLBWriteM | ITLBWriteF); + // always block interrupts when using the hardware page table walker. + assign CPUBusy = StallW & ~SelHPTW; + + // multiplex the outputs to LSU + if(`XLEN+2-`PA_BITS > 0) begin // *** replace with XLEN=32 + logic [(`XLEN+2-`PA_BITS)-1:0] zeros; + assign zeros = '0; + assign HPTWAdrExt = {zeros, HPTWAdr}; + end else assign HPTWAdrExt = HPTWAdr; + mux2 #(2) rwmux(MemRWM, HPTWRW, SelHPTW, PreLSURWM); + mux2 #(3) sizemux(Funct3M, HPTWSize, SelHPTW, LSUFunct3M); + mux2 #(7) funct7mux(Funct7M, 7'b0, SelHPTW, LSUFunct7M); + mux2 #(2) atomicmux(AtomicM, 2'b00, SelHPTW, LSUAtomicM); + mux2 #(`XLEN+2) lsupadrmux(IEUAdrExtM, HPTWAdrExt, SelHPTWAdr, IHAdrM); + if(`HPTW_WRITES_SUPPORTED) + mux2 #(`XLEN) lsuwritedatamux(WriteDataM, PTE, SelHPTW, IMWriteDataM); + else assign IMWriteDataM = WriteDataM; + endmodule -// another idea. We keep gating the control by ~TrapM, but this adds considerable length to the critical path. -// should we do this differently? For example TLBMiss is gated by ~TrapM and then drives HPTWStall, which drives LSUStallM, which drives -// the hazard unit to issue stall and flush controlls. ~TrapM already suppresses these in the hazard unit. +// another idea. We keep gating the control by ~FlushW, but this adds considerable length to the critical path. +// should we do this differently? For example TLBMiss is gated by ~FlushW and then drives HPTWStall, which drives LSUStallM, which drives +// the hazard unit to issue stall and flush controlls. ~FlushW already suppresses these in the hazard unit. diff --git a/pipelined/testbench/tests.vh b/pipelined/testbench/tests.vh index 633ecb81..8ba3ed2f 100644 --- a/pipelined/testbench/tests.vh +++ b/pipelined/testbench/tests.vh @@ -1403,12 +1403,12 @@ string imperas32f[] = '{ `RISCVARCHTEST, "rv32i_m/M/src/div-01.S", "rv32i_m/M/src/divu-01.S", + "rv32i_m/M/src/rem-01.S", + "rv32i_m/M/src/remu-01.S", "rv32i_m/M/src/mul-01.S", "rv32i_m/M/src/mulh-01.S", "rv32i_m/M/src/mulhsu-01.S", - "rv32i_m/M/src/mulhu-01.S", - "rv32i_m/M/src/rem-01.S", - "rv32i_m/M/src/remu-01.S" + "rv32i_m/M/src/mulhu-01.S" }; string arch32f[] = '{