Merge branch 'main' of github.com:davidharrishmc/riscv-wally into main

This commit is contained in:
Ross Thompson 2021-12-30 15:26:41 -06:00
commit 54d71006b1
4 changed files with 267 additions and 379 deletions

View File

@ -121,73 +121,73 @@ module lsu
assign IEUAdrExtM = {2'b00, IEUAdrM}; assign IEUAdrExtM = {2'b00, IEUAdrM};
generate generate
if(`MEM_VIRTMEM) begin : MEM_VIRTMEM if(`MEM_VIRTMEM) begin : MEM_VIRTMEM
logic AnyCPUReqM; logic AnyCPUReqM;
logic [`PA_BITS-1:0] HPTWAdr; logic [`PA_BITS-1:0] HPTWAdr;
logic HPTWRead; logic HPTWRead;
logic [2:0] HPTWSize; logic [2:0] HPTWSize;
logic SelReplayCPURequest; logic SelReplayCPURequest;
assign AnyCPUReqM = (|MemRWM) | (|AtomicM); assign AnyCPUReqM = (|MemRWM) | (|AtomicM);
interlockfsm interlockfsm (.clk, .reset, .AnyCPUReqM, .ITLBMissF, .ITLBWriteF, interlockfsm interlockfsm (.clk, .reset, .AnyCPUReqM, .ITLBMissF, .ITLBWriteF,
.DTLBMissM, .DTLBWriteM, .ExceptionM, .PendingInterruptM, .DCacheStall, .DTLBMissM, .DTLBWriteM, .ExceptionM, .PendingInterruptM, .DCacheStall,
.InterlockStall, .SelReplayCPURequest, .SelHPTW, .InterlockStall, .SelReplayCPURequest, .SelHPTW,
.IgnoreRequest); .IgnoreRequest);
hptw hptw(.clk, .reset, .SATP_REGW, .PCF, .IEUAdrM, hptw hptw(.clk, .reset, .SATP_REGW, .PCF, .IEUAdrM,
.ITLBMissF(ITLBMissF & ~PendingInterruptM), .ITLBMissF(ITLBMissF & ~PendingInterruptM),
.DTLBMissM(DTLBMissM & ~PendingInterruptM), .DTLBMissM(DTLBMissM & ~PendingInterruptM),
.MemRWM, .PTE, .PageType, .ITLBWriteF, .DTLBWriteM, .MemRWM, .PTE, .PageType, .ITLBWriteF, .DTLBWriteM,
.HPTWReadPTE(ReadDataM), .HPTWReadPTE(ReadDataM),
.DCacheStall, .HPTWAdr, .HPTWRead, .HPTWSize, .AnyCPUReqM); .DCacheStall, .HPTWAdr, .HPTWRead, .HPTWSize, .AnyCPUReqM);
// arbiter between IEU and hptw // arbiter between IEU and hptw
// multiplex the outputs to LSU // multiplex the outputs to LSU
mux2 #(2) rwmux(MemRWM, {HPTWRead, 1'b0}, SelHPTW, PreLsuRWM); mux2 #(2) rwmux(MemRWM, {HPTWRead, 1'b0}, SelHPTW, PreLsuRWM);
mux2 #(3) sizemux(Funct3M, HPTWSize, SelHPTW, LsuFunct3M); mux2 #(3) sizemux(Funct3M, HPTWSize, SelHPTW, LsuFunct3M);
mux2 #(2) atomicmux(AtomicM, 2'b00, SelHPTW, LsuAtomicM); mux2 #(2) atomicmux(AtomicM, 2'b00, SelHPTW, LsuAtomicM);
mux2 #(12) adremux(IEUAdrE[11:0], HPTWAdr[11:0], SelHPTW, PreLsuAdrE); mux2 #(12) adremux(IEUAdrE[11:0], HPTWAdr[11:0], SelHPTW, PreLsuAdrE);
mux2 #(`PA_BITS) lsupadrmux(IEUAdrExtM[`PA_BITS-1:0], HPTWAdr, SelHPTW, PreLsuPAdrM); mux2 #(`PA_BITS) lsupadrmux(IEUAdrExtM[`PA_BITS-1:0], HPTWAdr, SelHPTW, PreLsuPAdrM);
// always block interrupts when using the hardware page table walker. // always block interrupts when using the hardware page table walker.
assign CPUBusy = StallW & ~SelHPTW; assign CPUBusy = StallW & ~SelHPTW;
// It is not possible to pipeline hptw as the following load will depend on the previous load's // It is not possible to pipeline hptw as the following load will depend on the previous load's
// data. Therefore we don't need a pipeline register // data. Therefore we don't need a pipeline register
//flop #(`PA_BITS) HPTWAdrMReg(clk, HPTWAdr, HPTWAdrM); // delay HPTWAdrM by a cycle //flop #(`PA_BITS) HPTWAdrMReg(clk, HPTWAdr, HPTWAdrM); // delay HPTWAdrM by a cycle
// Specify which type of page fault is occurring // Specify which type of page fault is occurring
assign DTLBLoadPageFaultM = DTLBPageFaultM & PreLsuRWM[1]; assign DTLBLoadPageFaultM = DTLBPageFaultM & PreLsuRWM[1];
assign DTLBStorePageFaultM = DTLBPageFaultM & PreLsuRWM[0]; assign DTLBStorePageFaultM = DTLBPageFaultM & PreLsuRWM[0];
// When replaying CPU memory request after PTW select the IEUAdrM for correct address. // When replaying CPU memory request after PTW select the IEUAdrM for correct address.
assign LsuAdrE = SelReplayCPURequest ? IEUAdrM[11:0] : PreLsuAdrE; assign LsuAdrE = SelReplayCPURequest ? IEUAdrM[11:0] : PreLsuAdrE;
end // if (`MEM_VIRTMEM) end // if (`MEM_VIRTMEM)
else begin else begin
assign InterlockStall = 1'b0; assign InterlockStall = 1'b0;
assign LsuAdrE = PreLsuAdrE; assign LsuAdrE = PreLsuAdrE;
assign SelHPTW = 1'b0; assign SelHPTW = 1'b0;
assign IgnoreRequest = 1'b0; assign IgnoreRequest = 1'b0;
assign PTE = '0; assign PTE = '0;
assign PageType = '0; assign PageType = '0;
assign DTLBWriteM = 1'b0; assign DTLBWriteM = 1'b0;
assign ITLBWriteF = 1'b0; assign ITLBWriteF = 1'b0;
assign PreLsuRWM = MemRWM; assign PreLsuRWM = MemRWM;
assign LsuFunct3M = Funct3M; assign LsuFunct3M = Funct3M;
assign LsuAtomicM = AtomicM; assign LsuAtomicM = AtomicM;
assign PreLsuAdrE = IEUAdrE[11:0]; assign PreLsuAdrE = IEUAdrE[11:0];
assign PreLsuPAdrM = IEUAdrExtM; assign PreLsuPAdrM = IEUAdrExtM;
assign CPUBusy = StallW; assign CPUBusy = StallW;
assign DTLBLoadPageFaultM = 1'b0; assign DTLBLoadPageFaultM = 1'b0;
assign DTLBStorePageFaultM = 1'b0; assign DTLBStorePageFaultM = 1'b0;
end end
endgenerate endgenerate
// **** look into this confusing signal. // **** look into this confusing signal.
@ -201,54 +201,54 @@ module lsu
assign CommittedM = SelHPTW | DCacheCommittedM | BusCommittedM; assign CommittedM = SelHPTW | DCacheCommittedM | BusCommittedM;
generate generate
if(`ZICSR_SUPPORTED == 1) begin : dmmu if(`ZICSR_SUPPORTED == 1) begin : dmmu
logic DataMisalignedM; logic DataMisalignedM;
mmu #(.TLB_ENTRIES(`DTLB_ENTRIES), .IMMU(0)) mmu #(.TLB_ENTRIES(`DTLB_ENTRIES), .IMMU(0))
dmmu(.clk, .reset, .SATP_REGW, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, dmmu(.clk, .reset, .SATP_REGW, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP,
.PrivilegeModeW, .DisableTranslation(SelHPTW), .PrivilegeModeW, .DisableTranslation(SelHPTW),
.PAdr(PreLsuPAdrM), .PAdr(PreLsuPAdrM),
.VAdr(IEUAdrM), .VAdr(IEUAdrM),
.Size(LsuFunct3M[1:0]), .Size(LsuFunct3M[1:0]),
.PTE, .PTE,
.PageTypeWriteVal(PageType), .PageTypeWriteVal(PageType),
.TLBWrite(DTLBWriteM), .TLBWrite(DTLBWriteM),
.TLBFlush(DTLBFlushM), .TLBFlush(DTLBFlushM),
.PhysicalAddress(LsuPAdrM), .PhysicalAddress(LsuPAdrM),
.TLBMiss(DTLBMissM), .TLBMiss(DTLBMissM),
.Cacheable(CacheableM), .Cacheable(CacheableM),
.Idempotent(), .AtomicAllowed(), .Idempotent(), .AtomicAllowed(),
.TLBPageFault(DTLBPageFaultM), .TLBPageFault(DTLBPageFaultM),
.InstrAccessFaultF(), .LoadAccessFaultM, .StoreAccessFaultM, .InstrAccessFaultF(), .LoadAccessFaultM, .StoreAccessFaultM,
.AtomicAccessM(1'b0), .ExecuteAccessF(1'b0), /// atomicaccessm is probably a bug .AtomicAccessM(1'b0), .ExecuteAccessF(1'b0), /// atomicaccessm is probably a bug
.WriteAccessM(PreLsuRWM[0]), .ReadAccessM(PreLsuRWM[1]), .WriteAccessM(PreLsuRWM[0]), .ReadAccessM(PreLsuRWM[1]),
.PMPCFG_ARRAY_REGW, .PMPADDR_ARRAY_REGW .PMPCFG_ARRAY_REGW, .PMPADDR_ARRAY_REGW
); // *** the pma/pmp instruction access faults don't really matter here. is it possible to parameterize which outputs exist? ); // *** the pma/pmp instruction access faults don't really matter here. is it possible to parameterize which outputs exist?
// Determine if an Unaligned access is taking place // Determine if an Unaligned access is taking place
// hptw guarantees alignment, only check inputs from IEU. // hptw guarantees alignment, only check inputs from IEU.
always_comb always_comb
case(Funct3M[1:0]) case(Funct3M[1:0])
2'b00: DataMisalignedM = 0; // lb, sb, lbu 2'b00: DataMisalignedM = 0; // lb, sb, lbu
2'b01: DataMisalignedM = IEUAdrM[0]; // lh, sh, lhu 2'b01: DataMisalignedM = IEUAdrM[0]; // lh, sh, lhu
2'b10: DataMisalignedM = IEUAdrM[1] | IEUAdrM[0]; // lw, sw, flw, fsw, lwu 2'b10: DataMisalignedM = IEUAdrM[1] | IEUAdrM[0]; // lw, sw, flw, fsw, lwu
2'b11: DataMisalignedM = |IEUAdrM[2:0]; // ld, sd, fld, fsd 2'b11: DataMisalignedM = |IEUAdrM[2:0]; // ld, sd, fld, fsd
endcase endcase
// If the CPU's (not HPTW's) request is a page fault. // If the CPU's (not HPTW's) request is a page fault.
assign LoadMisalignedFaultM = DataMisalignedM & MemRWM[1]; assign LoadMisalignedFaultM = DataMisalignedM & MemRWM[1];
assign StoreMisalignedFaultM = DataMisalignedM & MemRWM[0]; assign StoreMisalignedFaultM = DataMisalignedM & MemRWM[0];
end else begin end else begin
assign LsuPAdrM = PreLsuPAdrM; assign LsuPAdrM = PreLsuPAdrM;
assign DTLBMissM = 0; assign DTLBMissM = 0;
assign CacheableM = 1; assign CacheableM = 1;
assign DTLBPageFaultM = 0; assign DTLBPageFaultM = 0;
assign LoadAccessFaultM = 0; assign LoadAccessFaultM = 0;
assign StoreAccessFaultM = 0; assign StoreAccessFaultM = 0;
assign LoadMisalignedFaultM = 0; assign LoadMisalignedFaultM = 0;
assign StoreMisalignedFaultM = 0; assign StoreMisalignedFaultM = 0;
end end
endgenerate endgenerate
assign LSUStall = DCacheStall | InterlockStall | BusStall; assign LSUStall = DCacheStall | InterlockStall | BusStall;
@ -257,18 +257,17 @@ module lsu
// Move generate from lrsc to outside this module. // Move generate from lrsc to outside this module.
// use PreLsu as prefix for lrsc // use PreLsu as prefix for lrsc
generate generate
if (`A_SUPPORTED) begin:lrsc if (`A_SUPPORTED) begin:lrsc
assign MemReadM = PreLsuRWM[1] & ~(IgnoreRequest) & ~DTLBMissM; assign MemReadM = PreLsuRWM[1] & ~(IgnoreRequest) & ~DTLBMissM;
lrsc lrsc(.clk, .reset, .FlushW, .CPUBusy, .MemReadM, .PreLsuRWM, .LsuAtomicM, .LsuPAdrM, lrsc lrsc(.clk, .reset, .FlushW, .CPUBusy, .MemReadM, .PreLsuRWM, .LsuAtomicM, .LsuPAdrM,
.SquashSCW, .LsuRWM); .SquashSCW, .LsuRWM);
end else begin:lrsc end else begin:lrsc
assign SquashSCW = 0; assign SquashSCW = 0;
assign LsuRWM = PreLsuRWM; assign LsuRWM = PreLsuRWM;
end end
endgenerate endgenerate
// conditional // conditional
// 1. ram // controlled by `MEM_DTIM // 1. ram // controlled by `MEM_DTIM
// 2. cache `MEM_DCACHE // 2. cache `MEM_DCACHE
@ -306,25 +305,25 @@ module lsu
logic SelUncachedAdr; logic SelUncachedAdr;
generate generate
if(`MEM_DCACHE) begin : dcache if(`MEM_DCACHE) begin : dcache
dcache dcache(.clk, .reset, .CPUBusy, dcache dcache(.clk, .reset, .CPUBusy,
.LsuRWM, .FlushDCacheM, .LsuAtomicM, .LsuAdrE, .LsuPAdrM, .LsuRWM, .FlushDCacheM, .LsuAtomicM, .LsuAdrE, .LsuPAdrM,
.FinalWriteDataM, .ReadDataWordM, .DCacheStall, .FinalWriteDataM, .ReadDataWordM, .DCacheStall,
.DCacheMiss, .DCacheAccess, .DCacheMiss, .DCacheAccess,
.IgnoreRequest, .CacheableM, .DCacheCommittedM, .IgnoreRequest, .CacheableM, .DCacheCommittedM,
.DCacheBusAdr, .ReadDataBlockSetsM, .DCacheMemWriteData, .DCacheBusAdr, .ReadDataBlockSetsM, .DCacheMemWriteData,
.DCacheFetchLine, .DCacheWriteLine,.DCacheBusAck); .DCacheFetchLine, .DCacheWriteLine,.DCacheBusAck);
end else begin : passthrough end else begin : passthrough
assign ReadDataWordM = 0; assign ReadDataWordM = 0;
assign DCacheStall = 0; assign DCacheStall = 0;
assign DCacheMiss = 1; assign DCacheMiss = 1;
assign DCacheAccess = CacheableM; assign DCacheAccess = CacheableM;
assign DCacheCommittedM = 0; assign DCacheCommittedM = 0;
assign DCacheWriteLine = 0; assign DCacheWriteLine = 0;
assign DCacheFetchLine = 0; assign DCacheFetchLine = 0;
assign DCacheBusAdr = 0; assign DCacheBusAdr = 0;
assign ReadDataBlockSetsM[0] = 0; assign ReadDataBlockSetsM[0] = 0;
end end
endgenerate endgenerate

View File

@ -48,162 +48,154 @@ module hptw
output logic [2:0] HPTWSize // 32 or 64 bit access. output logic [2:0] HPTWSize // 32 or 64 bit access.
); );
typedef enum {L0_ADR, L0_RD, typedef enum {L0_ADR, L0_RD,
L1_ADR, L1_RD, L1_ADR, L1_RD,
L2_ADR, L2_RD, L2_ADR, L2_RD,
L3_ADR, L3_RD, L3_ADR, L3_RD,
LEAF, IDLE} statetype; // *** placed outside generate statement to remove synthesis errors LEAF, IDLE} statetype; // *** placed outside generate statement to remove synthesis errors
generate logic DTLBWalk; // register TLBs translation miss requests
if (`MEM_VIRTMEM) begin:virtmem logic [`PPN_BITS-1:0] BasePageTablePPN;
logic DTLBWalk; // register TLBs translation miss requests logic [`PPN_BITS-1:0] CurrentPPN;
logic [`PPN_BITS-1:0] BasePageTablePPN; logic MemWrite;
logic [`PPN_BITS-1:0] CurrentPPN; logic Executable, Writable, Readable, Valid;
logic MemWrite; logic Misaligned, MegapageMisaligned;
logic Executable, Writable, Readable, Valid; logic ValidPTE, LeafPTE, ValidLeafPTE, ValidNonLeafPTE;
logic Misaligned, MegapageMisaligned; logic StartWalk;
logic ValidPTE, LeafPTE, ValidLeafPTE, ValidNonLeafPTE; logic TLBMiss;
logic StartWalk; logic PRegEn;
logic TLBMiss; logic [1:0] NextPageType;
logic PRegEn; logic [`SVMODE_BITS-1:0] SvMode;
logic [1:0] NextPageType; logic [`XLEN-1:0] TranslationVAdr;
logic [`SVMODE_BITS-1:0] SvMode;
logic [`XLEN-1:0] TranslationVAdr;
(* mark_debug = "true" *) statetype WalkerState, NextWalkerState, InitialWalkerState; (* mark_debug = "true" *) statetype WalkerState, NextWalkerState, InitialWalkerState;
// Extract bits from CSRs and inputs // Extract bits from CSRs and inputs
assign SvMode = SATP_REGW[`XLEN-1:`XLEN-`SVMODE_BITS]; assign SvMode = SATP_REGW[`XLEN-1:`XLEN-`SVMODE_BITS];
assign BasePageTablePPN = SATP_REGW[`PPN_BITS-1:0]; assign BasePageTablePPN = SATP_REGW[`PPN_BITS-1:0];
assign MemWrite = MemRWM[0]; assign MemWrite = MemRWM[0];
assign TLBMiss = (DTLBMissM | ITLBMissF); assign TLBMiss = (DTLBMissM | ITLBMissF);
// Determine which address to translate // Determine which address to translate
assign TranslationVAdr = DTLBWalk ? IEUAdrM : PCF; assign TranslationVAdr = DTLBWalk ? IEUAdrM : PCF;
assign CurrentPPN = PTE[`PPN_BITS+9:10]; assign CurrentPPN = PTE[`PPN_BITS+9:10];
// State flops // State flops
flopenr #(1) TLBMissMReg(clk, reset, StartWalk, DTLBMissM, DTLBWalk); // when walk begins, record whether it was for DTLB (or record 0 for ITLB) flopenr #(1) TLBMissMReg(clk, reset, StartWalk, DTLBMissM, DTLBWalk); // when walk begins, record whether it was for DTLB (or record 0 for ITLB)
assign PRegEn = HPTWRead & ~DCacheStall; assign PRegEn = HPTWRead & ~DCacheStall;
flopenr #(`XLEN) PTEReg(clk, reset, PRegEn, HPTWReadPTE, PTE); // Capture page table entry from data cache flopenr #(`XLEN) PTEReg(clk, reset, PRegEn, HPTWReadPTE, PTE); // Capture page table entry from data cache
// Assign PTE descriptors common across all XLEN values // Assign PTE descriptors common across all XLEN values
// For non-leaf PTEs, D, A, U bits are reserved and ignored. They do not cause faults while walking the page table // For non-leaf PTEs, D, A, U bits are reserved and ignored. They do not cause faults while walking the page table
assign {Executable, Writable, Readable, Valid} = PTE[3:0]; assign {Executable, Writable, Readable, Valid} = PTE[3:0];
assign LeafPTE = Executable | Writable | Readable; assign LeafPTE = Executable | Writable | Readable;
assign ValidPTE = Valid && ~(Writable && ~Readable); assign ValidPTE = Valid && ~(Writable && ~Readable);
assign ValidLeafPTE = ValidPTE & LeafPTE; assign ValidLeafPTE = ValidPTE & LeafPTE;
assign ValidNonLeafPTE = ValidPTE & ~LeafPTE; assign ValidNonLeafPTE = ValidPTE & ~LeafPTE;
// Enable and select signals based on states // Enable and select signals based on states
assign StartWalk = (WalkerState == IDLE) & TLBMiss; assign StartWalk = (WalkerState == IDLE) & TLBMiss;
assign HPTWRead = (WalkerState == L3_RD) | (WalkerState == L2_RD) | (WalkerState == L1_RD) | (WalkerState == L0_RD); assign HPTWRead = (WalkerState == L3_RD) | (WalkerState == L2_RD) | (WalkerState == L1_RD) | (WalkerState == L0_RD);
assign DTLBWriteM = (WalkerState == LEAF) & DTLBWalk; assign DTLBWriteM = (WalkerState == LEAF) & DTLBWalk;
assign ITLBWriteF = (WalkerState == LEAF) & ~DTLBWalk; assign ITLBWriteF = (WalkerState == LEAF) & ~DTLBWalk;
// FSM to track PageType based on the levels of the page table traversed // FSM to track PageType based on the levels of the page table traversed
flopr #(2) PageTypeReg(clk, reset, NextPageType, PageType); flopr #(2) PageTypeReg(clk, reset, NextPageType, PageType);
always_comb always_comb
case (WalkerState) case (WalkerState)
L3_RD: NextPageType = 2'b11; // terapage L3_RD: NextPageType = 2'b11; // terapage
L2_RD: NextPageType = 2'b10; // gigapage L2_RD: NextPageType = 2'b10; // gigapage
L1_RD: NextPageType = 2'b01; // megapage L1_RD: NextPageType = 2'b01; // megapage
L0_RD: NextPageType = 2'b00; // kilopage L0_RD: NextPageType = 2'b00; // kilopage
default: NextPageType = PageType; default: NextPageType = PageType;
endcase
// HPTWAdr muxing
if (`XLEN==32) begin // RV32
logic [9:0] VPN;
logic [`PPN_BITS-1:0] PPN;
assign VPN = ((WalkerState == L1_ADR) | (WalkerState == L1_RD)) ? TranslationVAdr[31:22] : TranslationVAdr[21:12]; // select VPN field based on HPTW state
assign PPN = ((WalkerState == L1_ADR) | (WalkerState == L1_RD)) ? BasePageTablePPN : CurrentPPN;
assign HPTWAdr = {PPN, VPN, 2'b00};
assign HPTWSize = 3'b010;
end else begin // RV64
logic [8:0] VPN;
logic [`PPN_BITS-1:0] PPN;
always_comb
case (WalkerState) // select VPN field based on HPTW state
L3_ADR, L3_RD: VPN = TranslationVAdr[47:39];
L2_ADR, L2_RD: VPN = TranslationVAdr[38:30];
L1_ADR, L1_RD: VPN = TranslationVAdr[29:21];
default: VPN = TranslationVAdr[20:12];
endcase endcase
assign PPN = ((WalkerState == L3_ADR) | (WalkerState == L3_RD) |
(SvMode != `SV48 & ((WalkerState == L2_ADR) | (WalkerState == L2_RD)))) ? BasePageTablePPN : CurrentPPN;
assign HPTWAdr = {PPN, VPN, 3'b000};
assign HPTWSize = 3'b011;
end
// HPTWAdr muxing // Initial state and misalignment for RV32/64
if (`XLEN==32) begin // RV32 if (`XLEN == 32) begin
logic [9:0] VPN; assign InitialWalkerState = L1_ADR;
logic [`PPN_BITS-1:0] PPN; assign MegapageMisaligned = |(CurrentPPN[9:0]); // must have zero PPN0
assign VPN = ((WalkerState == L1_ADR) | (WalkerState == L1_RD)) ? TranslationVAdr[31:22] : TranslationVAdr[21:12]; // select VPN field based on HPTW state // *** Possible bug - should be L1_ADR?
assign PPN = ((WalkerState == L1_ADR) | (WalkerState == L1_RD)) ? BasePageTablePPN : CurrentPPN; assign Misaligned = ((WalkerState == L0_ADR) & MegapageMisaligned);
assign HPTWAdr = {PPN, VPN, 2'b00}; end else begin
assign HPTWSize = 3'b010; logic GigapageMisaligned, TerapageMisaligned;
end else begin // RV64 assign InitialWalkerState = (SvMode == `SV48) ? L3_ADR : L2_ADR;
logic [8:0] VPN; assign TerapageMisaligned = |(CurrentPPN[26:0]); // must have zero PPN2, PPN1, PPN0
logic [`PPN_BITS-1:0] PPN; assign GigapageMisaligned = |(CurrentPPN[17:0]); // must have zero PPN1 and PPN0
always_comb assign MegapageMisaligned = |(CurrentPPN[8:0]); // must have zero PPN0
case (WalkerState) // select VPN field based on HPTW state assign Misaligned = ((WalkerState == L2_ADR) & TerapageMisaligned) | ((WalkerState == L1_ADR) & GigapageMisaligned) | ((WalkerState == L0_ADR) & MegapageMisaligned);
L3_ADR, L3_RD: VPN = TranslationVAdr[47:39]; end
L2_ADR, L2_RD: VPN = TranslationVAdr[38:30];
L1_ADR, L1_RD: VPN = TranslationVAdr[29:21];
default: VPN = TranslationVAdr[20:12];
endcase
assign PPN = ((WalkerState == L3_ADR) | (WalkerState == L3_RD) |
(SvMode != `SV48 & ((WalkerState == L2_ADR) | (WalkerState == L2_RD)))) ? BasePageTablePPN : CurrentPPN;
assign HPTWAdr = {PPN, VPN, 3'b000};
assign HPTWSize = 3'b011;
end
// Initial state and misalignment for RV32/64 // Page Table Walker FSM
if (`XLEN == 32) begin
assign InitialWalkerState = L1_ADR;
assign MegapageMisaligned = |(CurrentPPN[9:0]); // must have zero PPN0
// *** Possible bug - should be L1_ADR?
assign Misaligned = ((WalkerState == L0_ADR) & MegapageMisaligned);
end else begin
logic GigapageMisaligned, TerapageMisaligned;
assign InitialWalkerState = (SvMode == `SV48) ? L3_ADR : L2_ADR;
assign TerapageMisaligned = |(CurrentPPN[26:0]); // must have zero PPN2, PPN1, PPN0
assign GigapageMisaligned = |(CurrentPPN[17:0]); // must have zero PPN1 and PPN0
assign MegapageMisaligned = |(CurrentPPN[8:0]); // must have zero PPN0
assign Misaligned = ((WalkerState == L2_ADR) & TerapageMisaligned) | ((WalkerState == L1_ADR) & GigapageMisaligned) | ((WalkerState == L0_ADR) & MegapageMisaligned);
end
// Page Table Walker FSM
// If the setup time on the D$ RAM is short, it should be possible to merge the LEVELx_READ and LEVELx states // If the setup time on the D$ RAM is short, it should be possible to merge the LEVELx_READ and LEVELx states
// to decrease the latency of the HPTW. However, if the D$ is a cycle limiter, it's better to leave the // to decrease the latency of the HPTW. However, if the D$ is a cycle limiter, it's better to leave the
// HPTW as shown below to keep the D$ setup time out of the critical path. // HPTW as shown below to keep the D$ setup time out of the critical path.
// *** Is this really true. Talk with Ross. Seems like it's the next state logic on critical path instead. // *** Is this really true. Talk with Ross. Seems like it's the next state logic on critical path instead.
flopenl #(.TYPE(statetype)) WalkerStateReg(clk, reset, 1'b1, NextWalkerState, IDLE, WalkerState); flopenl #(.TYPE(statetype)) WalkerStateReg(clk, reset, 1'b1, NextWalkerState, IDLE, WalkerState);
always_comb always_comb
case (WalkerState) case (WalkerState)
IDLE: if (TLBMiss) NextWalkerState = InitialWalkerState; IDLE: if (TLBMiss) NextWalkerState = InitialWalkerState;
else NextWalkerState = IDLE; else NextWalkerState = IDLE;
L3_ADR: NextWalkerState = L3_RD; // first access in SV48 L3_ADR: NextWalkerState = L3_RD; // first access in SV48
L3_RD: if (DCacheStall) NextWalkerState = L3_RD; L3_RD: if (DCacheStall) NextWalkerState = L3_RD;
else NextWalkerState = L2_ADR; else NextWalkerState = L2_ADR;
// LEVEL3: if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF; // LEVEL3: if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF;
// else if (ValidNonLeafPTE) NextWalkerState = L2_ADR; // else if (ValidNonLeafPTE) NextWalkerState = L2_ADR;
// else NextWalkerState = FAULT; // else NextWalkerState = FAULT;
L2_ADR: if (InitialWalkerState == L2_ADR) NextWalkerState = L2_RD; // first access in SV39 L2_ADR: if (InitialWalkerState == L2_ADR) NextWalkerState = L2_RD; // first access in SV39
else if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF; // could shortcut this by a cyle for all Lx_ADR superpages else if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF; // could shortcut this by a cyle for all Lx_ADR superpages
else if (ValidNonLeafPTE) NextWalkerState = L2_RD; else if (ValidNonLeafPTE) NextWalkerState = L2_RD;
else NextWalkerState = LEAF; else NextWalkerState = LEAF;
L2_RD: if (DCacheStall) NextWalkerState = L2_RD; L2_RD: if (DCacheStall) NextWalkerState = L2_RD;
else NextWalkerState = L1_ADR; else NextWalkerState = L1_ADR;
// LEVEL2: if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF; // LEVEL2: if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF;
// else if (ValidNonLeafPTE) NextWalkerState = L1_ADR; // else if (ValidNonLeafPTE) NextWalkerState = L1_ADR;
// else NextWalkerState = FAULT; // else NextWalkerState = FAULT;
L1_ADR: if (InitialWalkerState == L1_ADR) NextWalkerState = L1_RD; // first access in SV32 L1_ADR: if (InitialWalkerState == L1_ADR) NextWalkerState = L1_RD; // first access in SV32
else if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF; // could shortcut this by a cyle for all Lx_ADR superpages else if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF; // could shortcut this by a cyle for all Lx_ADR superpages
else if (ValidNonLeafPTE) NextWalkerState = L1_RD; else if (ValidNonLeafPTE) NextWalkerState = L1_RD;
else NextWalkerState = LEAF; else NextWalkerState = LEAF;
L1_RD: if (DCacheStall) NextWalkerState = L1_RD; L1_RD: if (DCacheStall) NextWalkerState = L1_RD;
else NextWalkerState = L0_ADR; else NextWalkerState = L0_ADR;
// LEVEL1: if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF; // LEVEL1: if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF;
// else if (ValidNonLeafPTE) NextWalkerState = L0_ADR; // else if (ValidNonLeafPTE) NextWalkerState = L0_ADR;
// else NextWalkerState = FAULT; // else NextWalkerState = FAULT;
L0_ADR: if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF; // could shortcut this by a cyle for all Lx_ADR superpages L0_ADR: if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF; // could shortcut this by a cyle for all Lx_ADR superpages
else if (ValidNonLeafPTE) NextWalkerState = L0_RD; else if (ValidNonLeafPTE) NextWalkerState = L0_RD;
else NextWalkerState = LEAF; else NextWalkerState = LEAF;
L0_RD: if (DCacheStall) NextWalkerState = L0_RD; L0_RD: if (DCacheStall) NextWalkerState = L0_RD;
else NextWalkerState = LEAF; else NextWalkerState = LEAF;
// LEVEL0: if (ValidLeafPTE) NextWalkerState = LEAF; // LEVEL0: if (ValidLeafPTE) NextWalkerState = LEAF;
// else NextWalkerState = FAULT; // else NextWalkerState = FAULT;
LEAF: NextWalkerState = IDLE; // updates TLB LEAF: NextWalkerState = IDLE; // updates TLB
default: begin default: begin
// synthesis translate_off // synthesis translate_off
$error("Default state in HPTW should be unreachable"); $error("Default state in HPTW should be unreachable");
// synthesis translate_on // synthesis translate_on
NextWalkerState = IDLE; // should never be reached NextWalkerState = IDLE; // should never be reached
end end
endcase endcase
end else begin // No Virtual memory supported; tie HPTW outputs to 0
assign HPTWRead = 0;
assign HPTWAdr = 0;
assign HPTWSize = 3'b000;
end
endgenerate
endmodule endmodule

View File

@ -1,101 +0,0 @@
///////////////////////////////////////////
// mul_cs.sv
//
// Written: james.stine@okstate.edu 17 October 2021
// Modified:
//
// Purpose: Carry/Save Multiplier output with Wallace Reduction
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
module mult_cs #(parameter WIDTH = 8)
(a, b, tc, sum, carry);
input logic [WIDTH-1:0] a;
input logic [WIDTH-1:0] b;
input logic tc;
output logic [2*WIDTH-1:0] sum;
output logic [2*WIDTH-1:0] carry;
// PP array
logic [2*WIDTH-1:0] pp_array [0:WIDTH-1];
logic [2*WIDTH-1:0] next_pp_array [0:WIDTH-1];
logic [2*WIDTH-1:0] tmp_sum, tmp_carry;
logic [2*WIDTH-1:0] temp_pp;
logic [2*WIDTH-1:0] tmp_pp_carry;
logic [WIDTH-1:0] temp_b;
logic temp_bitgroup;
integer bit_pair, height, i;
always_comb
begin
// For each multiplicand PP generation
for (bit_pair=0; bit_pair < WIDTH; bit_pair=bit_pair+1)
begin
// Shift to the right via P&H
temp_b = (b >> (bit_pair));
temp_bitgroup = temp_b[0];
// PP generation
case (temp_bitgroup)
1'b0 : temp_pp = {2*WIDTH-1{1'b0}};
1'b1 : temp_pp = a;
default : temp_pp = {2*WIDTH-1{1'b0}};
endcase
// Shift to the left via P&H
temp_pp = temp_pp << (bit_pair);
pp_array[bit_pair] = temp_pp;
end
// Height is multiplier
height = WIDTH;
// Wallace Tree PP reduction
while (height > 2)
begin
for (i=0; i < (height/3); i=i+1)
begin
next_pp_array[i*2] = pp_array[i*3]^pp_array[i*3+1]^pp_array[i*3+2];
tmp_pp_carry = (pp_array[i*3] & pp_array[i*3+1]) |
(pp_array[i*3+1] & pp_array[i*3+2]) |
(pp_array[i*3] & pp_array[i*3+2]);
next_pp_array[i*2+1] = tmp_pp_carry << 1;
end
// Reasssign not divisible by 3 rows to next_pp_array
if ((height % 3) > 0)
begin
for (i=0; i < (height % 3); i=i+1)
next_pp_array[2 * (height/3) + i] = pp_array[3 * (height/3) + i];
end
// Put back values in pp_array to start again
for (i=0; i < WIDTH; i=i+1)
pp_array[i] = next_pp_array[i];
// Reduce height
height = height - (height/3);
end
// Sum is first row in reduced array
tmp_sum = pp_array[0];
// Carry is second row in reduced array
tmp_carry = pp_array[1];
end
assign sum = tmp_sum;
assign carry = tmp_carry;
endmodule // mult_cs

View File

@ -47,8 +47,6 @@ module redundantmul #(parameter WIDTH =8)(
DW02_multp #(WIDTH, WIDTH, 2*WIDTH+2) mul(.a, .b, .tc(1'b0), .out0(tmp_out0), .out1(tmp_out1)); DW02_multp #(WIDTH, WIDTH, 2*WIDTH+2) mul(.a, .b, .tc(1'b0), .out0(tmp_out0), .out1(tmp_out1));
assign out0 = tmp_out0[2*WIDTH-1:0]; assign out0 = tmp_out0[2*WIDTH-1:0];
assign out1 = tmp_out1[2*WIDTH-1:0]; assign out1 = tmp_out1[2*WIDTH-1:0];
end else if (`DESIGN_COMPILER == 2) begin:mul // *** need to remove this
mult_cs #(WIDTH) mul(.a, .b, .tc(1'b0), .sum(out0), .carry(out1));
end else begin:mul // force a nonredunant multipler. This will simulate properly and also is appropriate for FPGAs. end else begin:mul // force a nonredunant multipler. This will simulate properly and also is appropriate for FPGAs.
assign out0 = a * b; assign out0 = a * b;
assign out1 = 0; assign out1 = 0;