From 7847ff33fc39631eb82c05d781dee79e55d56c7e Mon Sep 17 00:00:00 2001 From: David Harris Date: Thu, 30 Dec 2021 21:14:57 +0000 Subject: [PATCH 1/2] Removed carry-save multiplier option from muldiv --- wally-pipelined/src/muldiv/mult_cs.sv | 101 --------------------- wally-pipelined/src/muldiv/redundantmul.sv | 2 - 2 files changed, 103 deletions(-) delete mode 100644 wally-pipelined/src/muldiv/mult_cs.sv diff --git a/wally-pipelined/src/muldiv/mult_cs.sv b/wally-pipelined/src/muldiv/mult_cs.sv deleted file mode 100644 index f297401c..00000000 --- a/wally-pipelined/src/muldiv/mult_cs.sv +++ /dev/null @@ -1,101 +0,0 @@ -/////////////////////////////////////////// -// mul_cs.sv -// -// Written: james.stine@okstate.edu 17 October 2021 -// Modified: -// -// Purpose: Carry/Save Multiplier output with Wallace Reduction -// -// A component of the Wally configurable RISC-V project. -// -// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University -// -// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, -// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software -// is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT -// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -/////////////////////////////////////////// - -module mult_cs #(parameter WIDTH = 8) - (a, b, tc, sum, carry); - - input logic [WIDTH-1:0] a; - input logic [WIDTH-1:0] b; - input logic tc; - - output logic [2*WIDTH-1:0] sum; - output logic [2*WIDTH-1:0] carry; - - // PP array - logic [2*WIDTH-1:0] pp_array [0:WIDTH-1]; - logic [2*WIDTH-1:0] next_pp_array [0:WIDTH-1]; - logic [2*WIDTH-1:0] tmp_sum, tmp_carry; - logic [2*WIDTH-1:0] temp_pp; - logic [2*WIDTH-1:0] tmp_pp_carry; - logic [WIDTH-1:0] temp_b; - logic temp_bitgroup; - integer bit_pair, height, i; - - always_comb - begin - // For each multiplicand PP generation - for (bit_pair=0; bit_pair < WIDTH; bit_pair=bit_pair+1) - begin - // Shift to the right via P&H - temp_b = (b >> (bit_pair)); - temp_bitgroup = temp_b[0]; - // PP generation - case (temp_bitgroup) - 1'b0 : temp_pp = {2*WIDTH-1{1'b0}}; - 1'b1 : temp_pp = a; - default : temp_pp = {2*WIDTH-1{1'b0}}; - endcase - // Shift to the left via P&H - temp_pp = temp_pp << (bit_pair); - pp_array[bit_pair] = temp_pp; - end - - // Height is multiplier - height = WIDTH; - - // Wallace Tree PP reduction - while (height > 2) - begin - for (i=0; i < (height/3); i=i+1) - begin - next_pp_array[i*2] = pp_array[i*3]^pp_array[i*3+1]^pp_array[i*3+2]; - tmp_pp_carry = (pp_array[i*3] & pp_array[i*3+1]) | - (pp_array[i*3+1] & pp_array[i*3+2]) | - (pp_array[i*3] & pp_array[i*3+2]); - next_pp_array[i*2+1] = tmp_pp_carry << 1; - end - // Reasssign not divisible by 3 rows to next_pp_array - if ((height % 3) > 0) - begin - for (i=0; i < (height % 3); i=i+1) - next_pp_array[2 * (height/3) + i] = pp_array[3 * (height/3) + i]; - end - // Put back values in pp_array to start again - for (i=0; i < WIDTH; i=i+1) - pp_array[i] = next_pp_array[i]; - // Reduce height - height = height - (height/3); - end - // Sum is first row in reduced array - tmp_sum = pp_array[0]; - // Carry is second row in reduced array - tmp_carry = pp_array[1]; - end - - assign sum = tmp_sum; - assign carry = tmp_carry; - -endmodule // mult_cs - diff --git a/wally-pipelined/src/muldiv/redundantmul.sv b/wally-pipelined/src/muldiv/redundantmul.sv index 8eaf0e46..23fc2416 100644 --- a/wally-pipelined/src/muldiv/redundantmul.sv +++ b/wally-pipelined/src/muldiv/redundantmul.sv @@ -47,8 +47,6 @@ module redundantmul #(parameter WIDTH =8)( DW02_multp #(WIDTH, WIDTH, 2*WIDTH+2) mul(.a, .b, .tc(1'b0), .out0(tmp_out0), .out1(tmp_out1)); assign out0 = tmp_out0[2*WIDTH-1:0]; assign out1 = tmp_out1[2*WIDTH-1:0]; - end else if (`DESIGN_COMPILER == 2) begin:mul // *** need to remove this - mult_cs #(WIDTH) mul(.a, .b, .tc(1'b0), .sum(out0), .carry(out1)); end else begin:mul // force a nonredunant multipler. This will simulate properly and also is appropriate for FPGAs. assign out0 = a * b; assign out1 = 0; From 347896064dd65cd128562d994799e5dd1e984ddc Mon Sep 17 00:00:00 2001 From: David Harris Date: Thu, 30 Dec 2021 21:21:00 +0000 Subject: [PATCH 2/2] Removed unnecessary generate inside hptw --- wally-pipelined/src/lsu/lsu.sv | 263 +++++++++++++++--------------- wally-pipelined/src/mmu/hptw.sv | 280 ++++++++++++++++---------------- 2 files changed, 267 insertions(+), 276 deletions(-) diff --git a/wally-pipelined/src/lsu/lsu.sv b/wally-pipelined/src/lsu/lsu.sv index 5472f977..dfd21063 100644 --- a/wally-pipelined/src/lsu/lsu.sv +++ b/wally-pipelined/src/lsu/lsu.sv @@ -121,73 +121,73 @@ module lsu assign IEUAdrExtM = {2'b00, IEUAdrM}; generate - if(`MEM_VIRTMEM) begin : MEM_VIRTMEM - logic AnyCPUReqM; - logic [`PA_BITS-1:0] HPTWAdr; - logic HPTWRead; - logic [2:0] HPTWSize; - logic SelReplayCPURequest; + if(`MEM_VIRTMEM) begin : MEM_VIRTMEM + logic AnyCPUReqM; + logic [`PA_BITS-1:0] HPTWAdr; + logic HPTWRead; + logic [2:0] HPTWSize; + logic SelReplayCPURequest; - assign AnyCPUReqM = (|MemRWM) | (|AtomicM); + assign AnyCPUReqM = (|MemRWM) | (|AtomicM); - interlockfsm interlockfsm (.clk, .reset, .AnyCPUReqM, .ITLBMissF, .ITLBWriteF, - .DTLBMissM, .DTLBWriteM, .ExceptionM, .PendingInterruptM, .DCacheStall, - .InterlockStall, .SelReplayCPURequest, .SelHPTW, - .IgnoreRequest); - - hptw hptw(.clk, .reset, .SATP_REGW, .PCF, .IEUAdrM, - .ITLBMissF(ITLBMissF & ~PendingInterruptM), - .DTLBMissM(DTLBMissM & ~PendingInterruptM), - .MemRWM, .PTE, .PageType, .ITLBWriteF, .DTLBWriteM, - .HPTWReadPTE(ReadDataM), - .DCacheStall, .HPTWAdr, .HPTWRead, .HPTWSize, .AnyCPUReqM); + interlockfsm interlockfsm (.clk, .reset, .AnyCPUReqM, .ITLBMissF, .ITLBWriteF, + .DTLBMissM, .DTLBWriteM, .ExceptionM, .PendingInterruptM, .DCacheStall, + .InterlockStall, .SelReplayCPURequest, .SelHPTW, + .IgnoreRequest); + + hptw hptw(.clk, .reset, .SATP_REGW, .PCF, .IEUAdrM, + .ITLBMissF(ITLBMissF & ~PendingInterruptM), + .DTLBMissM(DTLBMissM & ~PendingInterruptM), + .MemRWM, .PTE, .PageType, .ITLBWriteF, .DTLBWriteM, + .HPTWReadPTE(ReadDataM), + .DCacheStall, .HPTWAdr, .HPTWRead, .HPTWSize, .AnyCPUReqM); - // arbiter between IEU and hptw - - // multiplex the outputs to LSU - mux2 #(2) rwmux(MemRWM, {HPTWRead, 1'b0}, SelHPTW, PreLsuRWM); - mux2 #(3) sizemux(Funct3M, HPTWSize, SelHPTW, LsuFunct3M); - mux2 #(2) atomicmux(AtomicM, 2'b00, SelHPTW, LsuAtomicM); - mux2 #(12) adremux(IEUAdrE[11:0], HPTWAdr[11:0], SelHPTW, PreLsuAdrE); - mux2 #(`PA_BITS) lsupadrmux(IEUAdrExtM[`PA_BITS-1:0], HPTWAdr, SelHPTW, PreLsuPAdrM); + // arbiter between IEU and hptw + + // multiplex the outputs to LSU + mux2 #(2) rwmux(MemRWM, {HPTWRead, 1'b0}, SelHPTW, PreLsuRWM); + mux2 #(3) sizemux(Funct3M, HPTWSize, SelHPTW, LsuFunct3M); + mux2 #(2) atomicmux(AtomicM, 2'b00, SelHPTW, LsuAtomicM); + mux2 #(12) adremux(IEUAdrE[11:0], HPTWAdr[11:0], SelHPTW, PreLsuAdrE); + mux2 #(`PA_BITS) lsupadrmux(IEUAdrExtM[`PA_BITS-1:0], HPTWAdr, SelHPTW, PreLsuPAdrM); - // always block interrupts when using the hardware page table walker. - assign CPUBusy = StallW & ~SelHPTW; - - // It is not possible to pipeline hptw as the following load will depend on the previous load's - // data. Therefore we don't need a pipeline register - //flop #(`PA_BITS) HPTWAdrMReg(clk, HPTWAdr, HPTWAdrM); // delay HPTWAdrM by a cycle + // always block interrupts when using the hardware page table walker. + assign CPUBusy = StallW & ~SelHPTW; + + // It is not possible to pipeline hptw as the following load will depend on the previous load's + // data. Therefore we don't need a pipeline register + //flop #(`PA_BITS) HPTWAdrMReg(clk, HPTWAdr, HPTWAdrM); // delay HPTWAdrM by a cycle - // Specify which type of page fault is occurring - assign DTLBLoadPageFaultM = DTLBPageFaultM & PreLsuRWM[1]; - assign DTLBStorePageFaultM = DTLBPageFaultM & PreLsuRWM[0]; + // Specify which type of page fault is occurring + assign DTLBLoadPageFaultM = DTLBPageFaultM & PreLsuRWM[1]; + assign DTLBStorePageFaultM = DTLBPageFaultM & PreLsuRWM[0]; - // When replaying CPU memory request after PTW select the IEUAdrM for correct address. - assign LsuAdrE = SelReplayCPURequest ? IEUAdrM[11:0] : PreLsuAdrE; + // When replaying CPU memory request after PTW select the IEUAdrM for correct address. + assign LsuAdrE = SelReplayCPURequest ? IEUAdrM[11:0] : PreLsuAdrE; - end // if (`MEM_VIRTMEM) - else begin - assign InterlockStall = 1'b0; - - assign LsuAdrE = PreLsuAdrE; - assign SelHPTW = 1'b0; - assign IgnoreRequest = 1'b0; + end // if (`MEM_VIRTMEM) + else begin + assign InterlockStall = 1'b0; + + assign LsuAdrE = PreLsuAdrE; + assign SelHPTW = 1'b0; + assign IgnoreRequest = 1'b0; - assign PTE = '0; - assign PageType = '0; - assign DTLBWriteM = 1'b0; - assign ITLBWriteF = 1'b0; - - assign PreLsuRWM = MemRWM; - assign LsuFunct3M = Funct3M; - assign LsuAtomicM = AtomicM; - assign PreLsuAdrE = IEUAdrE[11:0]; - assign PreLsuPAdrM = IEUAdrExtM; - assign CPUBusy = StallW; - - assign DTLBLoadPageFaultM = 1'b0; - assign DTLBStorePageFaultM = 1'b0; - end + assign PTE = '0; + assign PageType = '0; + assign DTLBWriteM = 1'b0; + assign ITLBWriteF = 1'b0; + + assign PreLsuRWM = MemRWM; + assign LsuFunct3M = Funct3M; + assign LsuAtomicM = AtomicM; + assign PreLsuAdrE = IEUAdrE[11:0]; + assign PreLsuPAdrM = IEUAdrExtM; + assign CPUBusy = StallW; + + assign DTLBLoadPageFaultM = 1'b0; + assign DTLBStorePageFaultM = 1'b0; + end endgenerate // **** look into this confusing signal. @@ -201,54 +201,54 @@ module lsu assign CommittedM = SelHPTW | DCacheCommittedM | BusCommittedM; generate - if(`ZICSR_SUPPORTED == 1) begin : dmmu - logic DataMisalignedM; + if(`ZICSR_SUPPORTED == 1) begin : dmmu + logic DataMisalignedM; - mmu #(.TLB_ENTRIES(`DTLB_ENTRIES), .IMMU(0)) - dmmu(.clk, .reset, .SATP_REGW, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, - .PrivilegeModeW, .DisableTranslation(SelHPTW), - .PAdr(PreLsuPAdrM), - .VAdr(IEUAdrM), - .Size(LsuFunct3M[1:0]), - .PTE, - .PageTypeWriteVal(PageType), - .TLBWrite(DTLBWriteM), - .TLBFlush(DTLBFlushM), - .PhysicalAddress(LsuPAdrM), - .TLBMiss(DTLBMissM), - .Cacheable(CacheableM), - .Idempotent(), .AtomicAllowed(), - .TLBPageFault(DTLBPageFaultM), - .InstrAccessFaultF(), .LoadAccessFaultM, .StoreAccessFaultM, - .AtomicAccessM(1'b0), .ExecuteAccessF(1'b0), /// atomicaccessm is probably a bug - .WriteAccessM(PreLsuRWM[0]), .ReadAccessM(PreLsuRWM[1]), - .PMPCFG_ARRAY_REGW, .PMPADDR_ARRAY_REGW - ); // *** the pma/pmp instruction access faults don't really matter here. is it possible to parameterize which outputs exist? + mmu #(.TLB_ENTRIES(`DTLB_ENTRIES), .IMMU(0)) + dmmu(.clk, .reset, .SATP_REGW, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, + .PrivilegeModeW, .DisableTranslation(SelHPTW), + .PAdr(PreLsuPAdrM), + .VAdr(IEUAdrM), + .Size(LsuFunct3M[1:0]), + .PTE, + .PageTypeWriteVal(PageType), + .TLBWrite(DTLBWriteM), + .TLBFlush(DTLBFlushM), + .PhysicalAddress(LsuPAdrM), + .TLBMiss(DTLBMissM), + .Cacheable(CacheableM), + .Idempotent(), .AtomicAllowed(), + .TLBPageFault(DTLBPageFaultM), + .InstrAccessFaultF(), .LoadAccessFaultM, .StoreAccessFaultM, + .AtomicAccessM(1'b0), .ExecuteAccessF(1'b0), /// atomicaccessm is probably a bug + .WriteAccessM(PreLsuRWM[0]), .ReadAccessM(PreLsuRWM[1]), + .PMPCFG_ARRAY_REGW, .PMPADDR_ARRAY_REGW + ); // *** the pma/pmp instruction access faults don't really matter here. is it possible to parameterize which outputs exist? - // Determine if an Unaligned access is taking place - // hptw guarantees alignment, only check inputs from IEU. - always_comb - case(Funct3M[1:0]) - 2'b00: DataMisalignedM = 0; // lb, sb, lbu - 2'b01: DataMisalignedM = IEUAdrM[0]; // lh, sh, lhu - 2'b10: DataMisalignedM = IEUAdrM[1] | IEUAdrM[0]; // lw, sw, flw, fsw, lwu - 2'b11: DataMisalignedM = |IEUAdrM[2:0]; // ld, sd, fld, fsd - endcase + // Determine if an Unaligned access is taking place + // hptw guarantees alignment, only check inputs from IEU. + always_comb + case(Funct3M[1:0]) + 2'b00: DataMisalignedM = 0; // lb, sb, lbu + 2'b01: DataMisalignedM = IEUAdrM[0]; // lh, sh, lhu + 2'b10: DataMisalignedM = IEUAdrM[1] | IEUAdrM[0]; // lw, sw, flw, fsw, lwu + 2'b11: DataMisalignedM = |IEUAdrM[2:0]; // ld, sd, fld, fsd + endcase - // If the CPU's (not HPTW's) request is a page fault. - assign LoadMisalignedFaultM = DataMisalignedM & MemRWM[1]; - assign StoreMisalignedFaultM = DataMisalignedM & MemRWM[0]; - - end else begin - assign LsuPAdrM = PreLsuPAdrM; - assign DTLBMissM = 0; - assign CacheableM = 1; - assign DTLBPageFaultM = 0; - assign LoadAccessFaultM = 0; - assign StoreAccessFaultM = 0; - assign LoadMisalignedFaultM = 0; - assign StoreMisalignedFaultM = 0; - end + // If the CPU's (not HPTW's) request is a page fault. + assign LoadMisalignedFaultM = DataMisalignedM & MemRWM[1]; + assign StoreMisalignedFaultM = DataMisalignedM & MemRWM[0]; + + end else begin + assign LsuPAdrM = PreLsuPAdrM; + assign DTLBMissM = 0; + assign CacheableM = 1; + assign DTLBPageFaultM = 0; + assign LoadAccessFaultM = 0; + assign StoreAccessFaultM = 0; + assign LoadMisalignedFaultM = 0; + assign StoreMisalignedFaultM = 0; + end endgenerate assign LSUStall = DCacheStall | InterlockStall | BusStall; @@ -257,18 +257,17 @@ module lsu // Move generate from lrsc to outside this module. // use PreLsu as prefix for lrsc generate - if (`A_SUPPORTED) begin:lrsc - assign MemReadM = PreLsuRWM[1] & ~(IgnoreRequest) & ~DTLBMissM; - lrsc lrsc(.clk, .reset, .FlushW, .CPUBusy, .MemReadM, .PreLsuRWM, .LsuAtomicM, .LsuPAdrM, - .SquashSCW, .LsuRWM); - end else begin:lrsc - assign SquashSCW = 0; - assign LsuRWM = PreLsuRWM; - end + if (`A_SUPPORTED) begin:lrsc + assign MemReadM = PreLsuRWM[1] & ~(IgnoreRequest) & ~DTLBMissM; + lrsc lrsc(.clk, .reset, .FlushW, .CPUBusy, .MemReadM, .PreLsuRWM, .LsuAtomicM, .LsuPAdrM, + .SquashSCW, .LsuRWM); + end else begin:lrsc + assign SquashSCW = 0; + assign LsuRWM = PreLsuRWM; + end endgenerate - // conditional // 1. ram // controlled by `MEM_DTIM // 2. cache `MEM_DCACHE @@ -306,25 +305,25 @@ module lsu logic SelUncachedAdr; generate - if(`MEM_DCACHE) begin : dcache - dcache dcache(.clk, .reset, .CPUBusy, - .LsuRWM, .FlushDCacheM, .LsuAtomicM, .LsuAdrE, .LsuPAdrM, - .FinalWriteDataM, .ReadDataWordM, .DCacheStall, - .DCacheMiss, .DCacheAccess, - .IgnoreRequest, .CacheableM, .DCacheCommittedM, - .DCacheBusAdr, .ReadDataBlockSetsM, .DCacheMemWriteData, - .DCacheFetchLine, .DCacheWriteLine,.DCacheBusAck); - end else begin : passthrough - assign ReadDataWordM = 0; - assign DCacheStall = 0; - assign DCacheMiss = 1; - assign DCacheAccess = CacheableM; - assign DCacheCommittedM = 0; - assign DCacheWriteLine = 0; - assign DCacheFetchLine = 0; - assign DCacheBusAdr = 0; - assign ReadDataBlockSetsM[0] = 0; - end + if(`MEM_DCACHE) begin : dcache + dcache dcache(.clk, .reset, .CPUBusy, + .LsuRWM, .FlushDCacheM, .LsuAtomicM, .LsuAdrE, .LsuPAdrM, + .FinalWriteDataM, .ReadDataWordM, .DCacheStall, + .DCacheMiss, .DCacheAccess, + .IgnoreRequest, .CacheableM, .DCacheCommittedM, + .DCacheBusAdr, .ReadDataBlockSetsM, .DCacheMemWriteData, + .DCacheFetchLine, .DCacheWriteLine,.DCacheBusAck); + end else begin : passthrough + assign ReadDataWordM = 0; + assign DCacheStall = 0; + assign DCacheMiss = 1; + assign DCacheAccess = CacheableM; + assign DCacheCommittedM = 0; + assign DCacheWriteLine = 0; + assign DCacheFetchLine = 0; + assign DCacheBusAdr = 0; + assign ReadDataBlockSetsM[0] = 0; + end endgenerate diff --git a/wally-pipelined/src/mmu/hptw.sv b/wally-pipelined/src/mmu/hptw.sv index 53083953..81255308 100644 --- a/wally-pipelined/src/mmu/hptw.sv +++ b/wally-pipelined/src/mmu/hptw.sv @@ -48,162 +48,154 @@ module hptw output logic [2:0] HPTWSize // 32 or 64 bit access. ); - typedef enum {L0_ADR, L0_RD, - L1_ADR, L1_RD, - L2_ADR, L2_RD, - L3_ADR, L3_RD, - LEAF, IDLE} statetype; // *** placed outside generate statement to remove synthesis errors + typedef enum {L0_ADR, L0_RD, + L1_ADR, L1_RD, + L2_ADR, L2_RD, + L3_ADR, L3_RD, + LEAF, IDLE} statetype; // *** placed outside generate statement to remove synthesis errors - generate - if (`MEM_VIRTMEM) begin:virtmem - logic DTLBWalk; // register TLBs translation miss requests - logic [`PPN_BITS-1:0] BasePageTablePPN; - logic [`PPN_BITS-1:0] CurrentPPN; - logic MemWrite; - logic Executable, Writable, Readable, Valid; - logic Misaligned, MegapageMisaligned; - logic ValidPTE, LeafPTE, ValidLeafPTE, ValidNonLeafPTE; - logic StartWalk; - logic TLBMiss; - logic PRegEn; - logic [1:0] NextPageType; - logic [`SVMODE_BITS-1:0] SvMode; - logic [`XLEN-1:0] TranslationVAdr; - - (* mark_debug = "true" *) statetype WalkerState, NextWalkerState, InitialWalkerState; + logic DTLBWalk; // register TLBs translation miss requests + logic [`PPN_BITS-1:0] BasePageTablePPN; + logic [`PPN_BITS-1:0] CurrentPPN; + logic MemWrite; + logic Executable, Writable, Readable, Valid; + logic Misaligned, MegapageMisaligned; + logic ValidPTE, LeafPTE, ValidLeafPTE, ValidNonLeafPTE; + logic StartWalk; + logic TLBMiss; + logic PRegEn; + logic [1:0] NextPageType; + logic [`SVMODE_BITS-1:0] SvMode; + logic [`XLEN-1:0] TranslationVAdr; - // Extract bits from CSRs and inputs - assign SvMode = SATP_REGW[`XLEN-1:`XLEN-`SVMODE_BITS]; - assign BasePageTablePPN = SATP_REGW[`PPN_BITS-1:0]; - assign MemWrite = MemRWM[0]; - assign TLBMiss = (DTLBMissM | ITLBMissF); + (* mark_debug = "true" *) statetype WalkerState, NextWalkerState, InitialWalkerState; - // Determine which address to translate - assign TranslationVAdr = DTLBWalk ? IEUAdrM : PCF; - assign CurrentPPN = PTE[`PPN_BITS+9:10]; + // Extract bits from CSRs and inputs + assign SvMode = SATP_REGW[`XLEN-1:`XLEN-`SVMODE_BITS]; + assign BasePageTablePPN = SATP_REGW[`PPN_BITS-1:0]; + assign MemWrite = MemRWM[0]; + assign TLBMiss = (DTLBMissM | ITLBMissF); - // State flops - flopenr #(1) TLBMissMReg(clk, reset, StartWalk, DTLBMissM, DTLBWalk); // when walk begins, record whether it was for DTLB (or record 0 for ITLB) - assign PRegEn = HPTWRead & ~DCacheStall; - flopenr #(`XLEN) PTEReg(clk, reset, PRegEn, HPTWReadPTE, PTE); // Capture page table entry from data cache - - // Assign PTE descriptors common across all XLEN values - // For non-leaf PTEs, D, A, U bits are reserved and ignored. They do not cause faults while walking the page table - assign {Executable, Writable, Readable, Valid} = PTE[3:0]; - assign LeafPTE = Executable | Writable | Readable; - assign ValidPTE = Valid && ~(Writable && ~Readable); - assign ValidLeafPTE = ValidPTE & LeafPTE; - assign ValidNonLeafPTE = ValidPTE & ~LeafPTE; - - // Enable and select signals based on states - assign StartWalk = (WalkerState == IDLE) & TLBMiss; - assign HPTWRead = (WalkerState == L3_RD) | (WalkerState == L2_RD) | (WalkerState == L1_RD) | (WalkerState == L0_RD); - assign DTLBWriteM = (WalkerState == LEAF) & DTLBWalk; - assign ITLBWriteF = (WalkerState == LEAF) & ~DTLBWalk; + // Determine which address to translate + assign TranslationVAdr = DTLBWalk ? IEUAdrM : PCF; + assign CurrentPPN = PTE[`PPN_BITS+9:10]; - // FSM to track PageType based on the levels of the page table traversed - flopr #(2) PageTypeReg(clk, reset, NextPageType, PageType); - always_comb - case (WalkerState) - L3_RD: NextPageType = 2'b11; // terapage - L2_RD: NextPageType = 2'b10; // gigapage - L1_RD: NextPageType = 2'b01; // megapage - L0_RD: NextPageType = 2'b00; // kilopage - default: NextPageType = PageType; + // State flops + flopenr #(1) TLBMissMReg(clk, reset, StartWalk, DTLBMissM, DTLBWalk); // when walk begins, record whether it was for DTLB (or record 0 for ITLB) + assign PRegEn = HPTWRead & ~DCacheStall; + flopenr #(`XLEN) PTEReg(clk, reset, PRegEn, HPTWReadPTE, PTE); // Capture page table entry from data cache + + // Assign PTE descriptors common across all XLEN values + // For non-leaf PTEs, D, A, U bits are reserved and ignored. They do not cause faults while walking the page table + assign {Executable, Writable, Readable, Valid} = PTE[3:0]; + assign LeafPTE = Executable | Writable | Readable; + assign ValidPTE = Valid && ~(Writable && ~Readable); + assign ValidLeafPTE = ValidPTE & LeafPTE; + assign ValidNonLeafPTE = ValidPTE & ~LeafPTE; + + // Enable and select signals based on states + assign StartWalk = (WalkerState == IDLE) & TLBMiss; + assign HPTWRead = (WalkerState == L3_RD) | (WalkerState == L2_RD) | (WalkerState == L1_RD) | (WalkerState == L0_RD); + assign DTLBWriteM = (WalkerState == LEAF) & DTLBWalk; + assign ITLBWriteF = (WalkerState == LEAF) & ~DTLBWalk; + + // FSM to track PageType based on the levels of the page table traversed + flopr #(2) PageTypeReg(clk, reset, NextPageType, PageType); + always_comb + case (WalkerState) + L3_RD: NextPageType = 2'b11; // terapage + L2_RD: NextPageType = 2'b10; // gigapage + L1_RD: NextPageType = 2'b01; // megapage + L0_RD: NextPageType = 2'b00; // kilopage + default: NextPageType = PageType; + endcase + + // HPTWAdr muxing + if (`XLEN==32) begin // RV32 + logic [9:0] VPN; + logic [`PPN_BITS-1:0] PPN; + assign VPN = ((WalkerState == L1_ADR) | (WalkerState == L1_RD)) ? TranslationVAdr[31:22] : TranslationVAdr[21:12]; // select VPN field based on HPTW state + assign PPN = ((WalkerState == L1_ADR) | (WalkerState == L1_RD)) ? BasePageTablePPN : CurrentPPN; + assign HPTWAdr = {PPN, VPN, 2'b00}; + assign HPTWSize = 3'b010; + end else begin // RV64 + logic [8:0] VPN; + logic [`PPN_BITS-1:0] PPN; + always_comb + case (WalkerState) // select VPN field based on HPTW state + L3_ADR, L3_RD: VPN = TranslationVAdr[47:39]; + L2_ADR, L2_RD: VPN = TranslationVAdr[38:30]; + L1_ADR, L1_RD: VPN = TranslationVAdr[29:21]; + default: VPN = TranslationVAdr[20:12]; endcase + assign PPN = ((WalkerState == L3_ADR) | (WalkerState == L3_RD) | + (SvMode != `SV48 & ((WalkerState == L2_ADR) | (WalkerState == L2_RD)))) ? BasePageTablePPN : CurrentPPN; + assign HPTWAdr = {PPN, VPN, 3'b000}; + assign HPTWSize = 3'b011; + end - // HPTWAdr muxing - if (`XLEN==32) begin // RV32 - logic [9:0] VPN; - logic [`PPN_BITS-1:0] PPN; - assign VPN = ((WalkerState == L1_ADR) | (WalkerState == L1_RD)) ? TranslationVAdr[31:22] : TranslationVAdr[21:12]; // select VPN field based on HPTW state - assign PPN = ((WalkerState == L1_ADR) | (WalkerState == L1_RD)) ? BasePageTablePPN : CurrentPPN; - assign HPTWAdr = {PPN, VPN, 2'b00}; - assign HPTWSize = 3'b010; - end else begin // RV64 - logic [8:0] VPN; - logic [`PPN_BITS-1:0] PPN; - always_comb - case (WalkerState) // select VPN field based on HPTW state - L3_ADR, L3_RD: VPN = TranslationVAdr[47:39]; - L2_ADR, L2_RD: VPN = TranslationVAdr[38:30]; - L1_ADR, L1_RD: VPN = TranslationVAdr[29:21]; - default: VPN = TranslationVAdr[20:12]; - endcase - assign PPN = ((WalkerState == L3_ADR) | (WalkerState == L3_RD) | - (SvMode != `SV48 & ((WalkerState == L2_ADR) | (WalkerState == L2_RD)))) ? BasePageTablePPN : CurrentPPN; - assign HPTWAdr = {PPN, VPN, 3'b000}; - assign HPTWSize = 3'b011; - end + // Initial state and misalignment for RV32/64 + if (`XLEN == 32) begin + assign InitialWalkerState = L1_ADR; + assign MegapageMisaligned = |(CurrentPPN[9:0]); // must have zero PPN0 + // *** Possible bug - should be L1_ADR? + assign Misaligned = ((WalkerState == L0_ADR) & MegapageMisaligned); + end else begin + logic GigapageMisaligned, TerapageMisaligned; + assign InitialWalkerState = (SvMode == `SV48) ? L3_ADR : L2_ADR; + assign TerapageMisaligned = |(CurrentPPN[26:0]); // must have zero PPN2, PPN1, PPN0 + assign GigapageMisaligned = |(CurrentPPN[17:0]); // must have zero PPN1 and PPN0 + assign MegapageMisaligned = |(CurrentPPN[8:0]); // must have zero PPN0 + assign Misaligned = ((WalkerState == L2_ADR) & TerapageMisaligned) | ((WalkerState == L1_ADR) & GigapageMisaligned) | ((WalkerState == L0_ADR) & MegapageMisaligned); + end - // Initial state and misalignment for RV32/64 - if (`XLEN == 32) begin - assign InitialWalkerState = L1_ADR; - assign MegapageMisaligned = |(CurrentPPN[9:0]); // must have zero PPN0 - // *** Possible bug - should be L1_ADR? - assign Misaligned = ((WalkerState == L0_ADR) & MegapageMisaligned); - end else begin - logic GigapageMisaligned, TerapageMisaligned; - assign InitialWalkerState = (SvMode == `SV48) ? L3_ADR : L2_ADR; - assign TerapageMisaligned = |(CurrentPPN[26:0]); // must have zero PPN2, PPN1, PPN0 - assign GigapageMisaligned = |(CurrentPPN[17:0]); // must have zero PPN1 and PPN0 - assign MegapageMisaligned = |(CurrentPPN[8:0]); // must have zero PPN0 - assign Misaligned = ((WalkerState == L2_ADR) & TerapageMisaligned) | ((WalkerState == L1_ADR) & GigapageMisaligned) | ((WalkerState == L0_ADR) & MegapageMisaligned); - end - - // Page Table Walker FSM + // Page Table Walker FSM // If the setup time on the D$ RAM is short, it should be possible to merge the LEVELx_READ and LEVELx states // to decrease the latency of the HPTW. However, if the D$ is a cycle limiter, it's better to leave the // HPTW as shown below to keep the D$ setup time out of the critical path. // *** Is this really true. Talk with Ross. Seems like it's the next state logic on critical path instead. flopenl #(.TYPE(statetype)) WalkerStateReg(clk, reset, 1'b1, NextWalkerState, IDLE, WalkerState); always_comb - case (WalkerState) - IDLE: if (TLBMiss) NextWalkerState = InitialWalkerState; - else NextWalkerState = IDLE; - L3_ADR: NextWalkerState = L3_RD; // first access in SV48 - L3_RD: if (DCacheStall) NextWalkerState = L3_RD; - else NextWalkerState = L2_ADR; -// LEVEL3: if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF; -// else if (ValidNonLeafPTE) NextWalkerState = L2_ADR; -// else NextWalkerState = FAULT; - L2_ADR: if (InitialWalkerState == L2_ADR) NextWalkerState = L2_RD; // first access in SV39 - else if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF; // could shortcut this by a cyle for all Lx_ADR superpages - else if (ValidNonLeafPTE) NextWalkerState = L2_RD; - else NextWalkerState = LEAF; - L2_RD: if (DCacheStall) NextWalkerState = L2_RD; - else NextWalkerState = L1_ADR; -// LEVEL2: if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF; -// else if (ValidNonLeafPTE) NextWalkerState = L1_ADR; -// else NextWalkerState = FAULT; - L1_ADR: if (InitialWalkerState == L1_ADR) NextWalkerState = L1_RD; // first access in SV32 - else if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF; // could shortcut this by a cyle for all Lx_ADR superpages - else if (ValidNonLeafPTE) NextWalkerState = L1_RD; - else NextWalkerState = LEAF; - L1_RD: if (DCacheStall) NextWalkerState = L1_RD; - else NextWalkerState = L0_ADR; -// LEVEL1: if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF; -// else if (ValidNonLeafPTE) NextWalkerState = L0_ADR; -// else NextWalkerState = FAULT; - L0_ADR: if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF; // could shortcut this by a cyle for all Lx_ADR superpages - else if (ValidNonLeafPTE) NextWalkerState = L0_RD; - else NextWalkerState = LEAF; - L0_RD: if (DCacheStall) NextWalkerState = L0_RD; - else NextWalkerState = LEAF; -// LEVEL0: if (ValidLeafPTE) NextWalkerState = LEAF; -// else NextWalkerState = FAULT; - LEAF: NextWalkerState = IDLE; // updates TLB - default: begin - // synthesis translate_off - $error("Default state in HPTW should be unreachable"); - // synthesis translate_on - NextWalkerState = IDLE; // should never be reached - end - endcase - end else begin // No Virtual memory supported; tie HPTW outputs to 0 - assign HPTWRead = 0; - assign HPTWAdr = 0; - assign HPTWSize = 3'b000; - end - endgenerate + case (WalkerState) + IDLE: if (TLBMiss) NextWalkerState = InitialWalkerState; + else NextWalkerState = IDLE; + L3_ADR: NextWalkerState = L3_RD; // first access in SV48 + L3_RD: if (DCacheStall) NextWalkerState = L3_RD; + else NextWalkerState = L2_ADR; + // LEVEL3: if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF; + // else if (ValidNonLeafPTE) NextWalkerState = L2_ADR; + // else NextWalkerState = FAULT; + L2_ADR: if (InitialWalkerState == L2_ADR) NextWalkerState = L2_RD; // first access in SV39 + else if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF; // could shortcut this by a cyle for all Lx_ADR superpages + else if (ValidNonLeafPTE) NextWalkerState = L2_RD; + else NextWalkerState = LEAF; + L2_RD: if (DCacheStall) NextWalkerState = L2_RD; + else NextWalkerState = L1_ADR; + // LEVEL2: if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF; + // else if (ValidNonLeafPTE) NextWalkerState = L1_ADR; + // else NextWalkerState = FAULT; + L1_ADR: if (InitialWalkerState == L1_ADR) NextWalkerState = L1_RD; // first access in SV32 + else if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF; // could shortcut this by a cyle for all Lx_ADR superpages + else if (ValidNonLeafPTE) NextWalkerState = L1_RD; + else NextWalkerState = LEAF; + L1_RD: if (DCacheStall) NextWalkerState = L1_RD; + else NextWalkerState = L0_ADR; + // LEVEL1: if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF; + // else if (ValidNonLeafPTE) NextWalkerState = L0_ADR; + // else NextWalkerState = FAULT; + L0_ADR: if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF; // could shortcut this by a cyle for all Lx_ADR superpages + else if (ValidNonLeafPTE) NextWalkerState = L0_RD; + else NextWalkerState = LEAF; + L0_RD: if (DCacheStall) NextWalkerState = L0_RD; + else NextWalkerState = LEAF; + // LEVEL0: if (ValidLeafPTE) NextWalkerState = LEAF; + // else NextWalkerState = FAULT; + LEAF: NextWalkerState = IDLE; // updates TLB + default: begin + // synthesis translate_off + $error("Default state in HPTW should be unreachable"); + // synthesis translate_on + NextWalkerState = IDLE; // should never be reached + end + endcase endmodule