mirror of
https://github.com/openhwgroup/cvw
synced 2025-02-11 06:05:49 +00:00
Merge branch 'main' of github.com:davidharrishmc/riscv-wally into main
This commit is contained in:
commit
54d71006b1
@ -121,73 +121,73 @@ module lsu
|
|||||||
assign IEUAdrExtM = {2'b00, IEUAdrM};
|
assign IEUAdrExtM = {2'b00, IEUAdrM};
|
||||||
|
|
||||||
generate
|
generate
|
||||||
if(`MEM_VIRTMEM) begin : MEM_VIRTMEM
|
if(`MEM_VIRTMEM) begin : MEM_VIRTMEM
|
||||||
logic AnyCPUReqM;
|
logic AnyCPUReqM;
|
||||||
logic [`PA_BITS-1:0] HPTWAdr;
|
logic [`PA_BITS-1:0] HPTWAdr;
|
||||||
logic HPTWRead;
|
logic HPTWRead;
|
||||||
logic [2:0] HPTWSize;
|
logic [2:0] HPTWSize;
|
||||||
logic SelReplayCPURequest;
|
logic SelReplayCPURequest;
|
||||||
|
|
||||||
assign AnyCPUReqM = (|MemRWM) | (|AtomicM);
|
assign AnyCPUReqM = (|MemRWM) | (|AtomicM);
|
||||||
|
|
||||||
interlockfsm interlockfsm (.clk, .reset, .AnyCPUReqM, .ITLBMissF, .ITLBWriteF,
|
interlockfsm interlockfsm (.clk, .reset, .AnyCPUReqM, .ITLBMissF, .ITLBWriteF,
|
||||||
.DTLBMissM, .DTLBWriteM, .ExceptionM, .PendingInterruptM, .DCacheStall,
|
.DTLBMissM, .DTLBWriteM, .ExceptionM, .PendingInterruptM, .DCacheStall,
|
||||||
.InterlockStall, .SelReplayCPURequest, .SelHPTW,
|
.InterlockStall, .SelReplayCPURequest, .SelHPTW,
|
||||||
.IgnoreRequest);
|
.IgnoreRequest);
|
||||||
|
|
||||||
hptw hptw(.clk, .reset, .SATP_REGW, .PCF, .IEUAdrM,
|
hptw hptw(.clk, .reset, .SATP_REGW, .PCF, .IEUAdrM,
|
||||||
.ITLBMissF(ITLBMissF & ~PendingInterruptM),
|
.ITLBMissF(ITLBMissF & ~PendingInterruptM),
|
||||||
.DTLBMissM(DTLBMissM & ~PendingInterruptM),
|
.DTLBMissM(DTLBMissM & ~PendingInterruptM),
|
||||||
.MemRWM, .PTE, .PageType, .ITLBWriteF, .DTLBWriteM,
|
.MemRWM, .PTE, .PageType, .ITLBWriteF, .DTLBWriteM,
|
||||||
.HPTWReadPTE(ReadDataM),
|
.HPTWReadPTE(ReadDataM),
|
||||||
.DCacheStall, .HPTWAdr, .HPTWRead, .HPTWSize, .AnyCPUReqM);
|
.DCacheStall, .HPTWAdr, .HPTWRead, .HPTWSize, .AnyCPUReqM);
|
||||||
|
|
||||||
// arbiter between IEU and hptw
|
// arbiter between IEU and hptw
|
||||||
|
|
||||||
// multiplex the outputs to LSU
|
// multiplex the outputs to LSU
|
||||||
mux2 #(2) rwmux(MemRWM, {HPTWRead, 1'b0}, SelHPTW, PreLsuRWM);
|
mux2 #(2) rwmux(MemRWM, {HPTWRead, 1'b0}, SelHPTW, PreLsuRWM);
|
||||||
mux2 #(3) sizemux(Funct3M, HPTWSize, SelHPTW, LsuFunct3M);
|
mux2 #(3) sizemux(Funct3M, HPTWSize, SelHPTW, LsuFunct3M);
|
||||||
mux2 #(2) atomicmux(AtomicM, 2'b00, SelHPTW, LsuAtomicM);
|
mux2 #(2) atomicmux(AtomicM, 2'b00, SelHPTW, LsuAtomicM);
|
||||||
mux2 #(12) adremux(IEUAdrE[11:0], HPTWAdr[11:0], SelHPTW, PreLsuAdrE);
|
mux2 #(12) adremux(IEUAdrE[11:0], HPTWAdr[11:0], SelHPTW, PreLsuAdrE);
|
||||||
mux2 #(`PA_BITS) lsupadrmux(IEUAdrExtM[`PA_BITS-1:0], HPTWAdr, SelHPTW, PreLsuPAdrM);
|
mux2 #(`PA_BITS) lsupadrmux(IEUAdrExtM[`PA_BITS-1:0], HPTWAdr, SelHPTW, PreLsuPAdrM);
|
||||||
|
|
||||||
// always block interrupts when using the hardware page table walker.
|
// always block interrupts when using the hardware page table walker.
|
||||||
assign CPUBusy = StallW & ~SelHPTW;
|
assign CPUBusy = StallW & ~SelHPTW;
|
||||||
|
|
||||||
// It is not possible to pipeline hptw as the following load will depend on the previous load's
|
// It is not possible to pipeline hptw as the following load will depend on the previous load's
|
||||||
// data. Therefore we don't need a pipeline register
|
// data. Therefore we don't need a pipeline register
|
||||||
//flop #(`PA_BITS) HPTWAdrMReg(clk, HPTWAdr, HPTWAdrM); // delay HPTWAdrM by a cycle
|
//flop #(`PA_BITS) HPTWAdrMReg(clk, HPTWAdr, HPTWAdrM); // delay HPTWAdrM by a cycle
|
||||||
|
|
||||||
// Specify which type of page fault is occurring
|
// Specify which type of page fault is occurring
|
||||||
assign DTLBLoadPageFaultM = DTLBPageFaultM & PreLsuRWM[1];
|
assign DTLBLoadPageFaultM = DTLBPageFaultM & PreLsuRWM[1];
|
||||||
assign DTLBStorePageFaultM = DTLBPageFaultM & PreLsuRWM[0];
|
assign DTLBStorePageFaultM = DTLBPageFaultM & PreLsuRWM[0];
|
||||||
|
|
||||||
// When replaying CPU memory request after PTW select the IEUAdrM for correct address.
|
// When replaying CPU memory request after PTW select the IEUAdrM for correct address.
|
||||||
assign LsuAdrE = SelReplayCPURequest ? IEUAdrM[11:0] : PreLsuAdrE;
|
assign LsuAdrE = SelReplayCPURequest ? IEUAdrM[11:0] : PreLsuAdrE;
|
||||||
|
|
||||||
end // if (`MEM_VIRTMEM)
|
end // if (`MEM_VIRTMEM)
|
||||||
else begin
|
else begin
|
||||||
assign InterlockStall = 1'b0;
|
assign InterlockStall = 1'b0;
|
||||||
|
|
||||||
assign LsuAdrE = PreLsuAdrE;
|
assign LsuAdrE = PreLsuAdrE;
|
||||||
assign SelHPTW = 1'b0;
|
assign SelHPTW = 1'b0;
|
||||||
assign IgnoreRequest = 1'b0;
|
assign IgnoreRequest = 1'b0;
|
||||||
|
|
||||||
assign PTE = '0;
|
assign PTE = '0;
|
||||||
assign PageType = '0;
|
assign PageType = '0;
|
||||||
assign DTLBWriteM = 1'b0;
|
assign DTLBWriteM = 1'b0;
|
||||||
assign ITLBWriteF = 1'b0;
|
assign ITLBWriteF = 1'b0;
|
||||||
|
|
||||||
assign PreLsuRWM = MemRWM;
|
assign PreLsuRWM = MemRWM;
|
||||||
assign LsuFunct3M = Funct3M;
|
assign LsuFunct3M = Funct3M;
|
||||||
assign LsuAtomicM = AtomicM;
|
assign LsuAtomicM = AtomicM;
|
||||||
assign PreLsuAdrE = IEUAdrE[11:0];
|
assign PreLsuAdrE = IEUAdrE[11:0];
|
||||||
assign PreLsuPAdrM = IEUAdrExtM;
|
assign PreLsuPAdrM = IEUAdrExtM;
|
||||||
assign CPUBusy = StallW;
|
assign CPUBusy = StallW;
|
||||||
|
|
||||||
assign DTLBLoadPageFaultM = 1'b0;
|
assign DTLBLoadPageFaultM = 1'b0;
|
||||||
assign DTLBStorePageFaultM = 1'b0;
|
assign DTLBStorePageFaultM = 1'b0;
|
||||||
end
|
end
|
||||||
endgenerate
|
endgenerate
|
||||||
|
|
||||||
// **** look into this confusing signal.
|
// **** look into this confusing signal.
|
||||||
@ -201,54 +201,54 @@ module lsu
|
|||||||
assign CommittedM = SelHPTW | DCacheCommittedM | BusCommittedM;
|
assign CommittedM = SelHPTW | DCacheCommittedM | BusCommittedM;
|
||||||
|
|
||||||
generate
|
generate
|
||||||
if(`ZICSR_SUPPORTED == 1) begin : dmmu
|
if(`ZICSR_SUPPORTED == 1) begin : dmmu
|
||||||
logic DataMisalignedM;
|
logic DataMisalignedM;
|
||||||
|
|
||||||
mmu #(.TLB_ENTRIES(`DTLB_ENTRIES), .IMMU(0))
|
mmu #(.TLB_ENTRIES(`DTLB_ENTRIES), .IMMU(0))
|
||||||
dmmu(.clk, .reset, .SATP_REGW, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP,
|
dmmu(.clk, .reset, .SATP_REGW, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP,
|
||||||
.PrivilegeModeW, .DisableTranslation(SelHPTW),
|
.PrivilegeModeW, .DisableTranslation(SelHPTW),
|
||||||
.PAdr(PreLsuPAdrM),
|
.PAdr(PreLsuPAdrM),
|
||||||
.VAdr(IEUAdrM),
|
.VAdr(IEUAdrM),
|
||||||
.Size(LsuFunct3M[1:0]),
|
.Size(LsuFunct3M[1:0]),
|
||||||
.PTE,
|
.PTE,
|
||||||
.PageTypeWriteVal(PageType),
|
.PageTypeWriteVal(PageType),
|
||||||
.TLBWrite(DTLBWriteM),
|
.TLBWrite(DTLBWriteM),
|
||||||
.TLBFlush(DTLBFlushM),
|
.TLBFlush(DTLBFlushM),
|
||||||
.PhysicalAddress(LsuPAdrM),
|
.PhysicalAddress(LsuPAdrM),
|
||||||
.TLBMiss(DTLBMissM),
|
.TLBMiss(DTLBMissM),
|
||||||
.Cacheable(CacheableM),
|
.Cacheable(CacheableM),
|
||||||
.Idempotent(), .AtomicAllowed(),
|
.Idempotent(), .AtomicAllowed(),
|
||||||
.TLBPageFault(DTLBPageFaultM),
|
.TLBPageFault(DTLBPageFaultM),
|
||||||
.InstrAccessFaultF(), .LoadAccessFaultM, .StoreAccessFaultM,
|
.InstrAccessFaultF(), .LoadAccessFaultM, .StoreAccessFaultM,
|
||||||
.AtomicAccessM(1'b0), .ExecuteAccessF(1'b0), /// atomicaccessm is probably a bug
|
.AtomicAccessM(1'b0), .ExecuteAccessF(1'b0), /// atomicaccessm is probably a bug
|
||||||
.WriteAccessM(PreLsuRWM[0]), .ReadAccessM(PreLsuRWM[1]),
|
.WriteAccessM(PreLsuRWM[0]), .ReadAccessM(PreLsuRWM[1]),
|
||||||
.PMPCFG_ARRAY_REGW, .PMPADDR_ARRAY_REGW
|
.PMPCFG_ARRAY_REGW, .PMPADDR_ARRAY_REGW
|
||||||
); // *** the pma/pmp instruction access faults don't really matter here. is it possible to parameterize which outputs exist?
|
); // *** the pma/pmp instruction access faults don't really matter here. is it possible to parameterize which outputs exist?
|
||||||
|
|
||||||
// Determine if an Unaligned access is taking place
|
// Determine if an Unaligned access is taking place
|
||||||
// hptw guarantees alignment, only check inputs from IEU.
|
// hptw guarantees alignment, only check inputs from IEU.
|
||||||
always_comb
|
always_comb
|
||||||
case(Funct3M[1:0])
|
case(Funct3M[1:0])
|
||||||
2'b00: DataMisalignedM = 0; // lb, sb, lbu
|
2'b00: DataMisalignedM = 0; // lb, sb, lbu
|
||||||
2'b01: DataMisalignedM = IEUAdrM[0]; // lh, sh, lhu
|
2'b01: DataMisalignedM = IEUAdrM[0]; // lh, sh, lhu
|
||||||
2'b10: DataMisalignedM = IEUAdrM[1] | IEUAdrM[0]; // lw, sw, flw, fsw, lwu
|
2'b10: DataMisalignedM = IEUAdrM[1] | IEUAdrM[0]; // lw, sw, flw, fsw, lwu
|
||||||
2'b11: DataMisalignedM = |IEUAdrM[2:0]; // ld, sd, fld, fsd
|
2'b11: DataMisalignedM = |IEUAdrM[2:0]; // ld, sd, fld, fsd
|
||||||
endcase
|
endcase
|
||||||
|
|
||||||
// If the CPU's (not HPTW's) request is a page fault.
|
// If the CPU's (not HPTW's) request is a page fault.
|
||||||
assign LoadMisalignedFaultM = DataMisalignedM & MemRWM[1];
|
assign LoadMisalignedFaultM = DataMisalignedM & MemRWM[1];
|
||||||
assign StoreMisalignedFaultM = DataMisalignedM & MemRWM[0];
|
assign StoreMisalignedFaultM = DataMisalignedM & MemRWM[0];
|
||||||
|
|
||||||
end else begin
|
end else begin
|
||||||
assign LsuPAdrM = PreLsuPAdrM;
|
assign LsuPAdrM = PreLsuPAdrM;
|
||||||
assign DTLBMissM = 0;
|
assign DTLBMissM = 0;
|
||||||
assign CacheableM = 1;
|
assign CacheableM = 1;
|
||||||
assign DTLBPageFaultM = 0;
|
assign DTLBPageFaultM = 0;
|
||||||
assign LoadAccessFaultM = 0;
|
assign LoadAccessFaultM = 0;
|
||||||
assign StoreAccessFaultM = 0;
|
assign StoreAccessFaultM = 0;
|
||||||
assign LoadMisalignedFaultM = 0;
|
assign LoadMisalignedFaultM = 0;
|
||||||
assign StoreMisalignedFaultM = 0;
|
assign StoreMisalignedFaultM = 0;
|
||||||
end
|
end
|
||||||
endgenerate
|
endgenerate
|
||||||
assign LSUStall = DCacheStall | InterlockStall | BusStall;
|
assign LSUStall = DCacheStall | InterlockStall | BusStall;
|
||||||
|
|
||||||
@ -257,18 +257,17 @@ module lsu
|
|||||||
// Move generate from lrsc to outside this module.
|
// Move generate from lrsc to outside this module.
|
||||||
// use PreLsu as prefix for lrsc
|
// use PreLsu as prefix for lrsc
|
||||||
generate
|
generate
|
||||||
if (`A_SUPPORTED) begin:lrsc
|
if (`A_SUPPORTED) begin:lrsc
|
||||||
assign MemReadM = PreLsuRWM[1] & ~(IgnoreRequest) & ~DTLBMissM;
|
assign MemReadM = PreLsuRWM[1] & ~(IgnoreRequest) & ~DTLBMissM;
|
||||||
lrsc lrsc(.clk, .reset, .FlushW, .CPUBusy, .MemReadM, .PreLsuRWM, .LsuAtomicM, .LsuPAdrM,
|
lrsc lrsc(.clk, .reset, .FlushW, .CPUBusy, .MemReadM, .PreLsuRWM, .LsuAtomicM, .LsuPAdrM,
|
||||||
.SquashSCW, .LsuRWM);
|
.SquashSCW, .LsuRWM);
|
||||||
end else begin:lrsc
|
end else begin:lrsc
|
||||||
assign SquashSCW = 0;
|
assign SquashSCW = 0;
|
||||||
assign LsuRWM = PreLsuRWM;
|
assign LsuRWM = PreLsuRWM;
|
||||||
end
|
end
|
||||||
endgenerate
|
endgenerate
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// conditional
|
// conditional
|
||||||
// 1. ram // controlled by `MEM_DTIM
|
// 1. ram // controlled by `MEM_DTIM
|
||||||
// 2. cache `MEM_DCACHE
|
// 2. cache `MEM_DCACHE
|
||||||
@ -306,25 +305,25 @@ module lsu
|
|||||||
logic SelUncachedAdr;
|
logic SelUncachedAdr;
|
||||||
|
|
||||||
generate
|
generate
|
||||||
if(`MEM_DCACHE) begin : dcache
|
if(`MEM_DCACHE) begin : dcache
|
||||||
dcache dcache(.clk, .reset, .CPUBusy,
|
dcache dcache(.clk, .reset, .CPUBusy,
|
||||||
.LsuRWM, .FlushDCacheM, .LsuAtomicM, .LsuAdrE, .LsuPAdrM,
|
.LsuRWM, .FlushDCacheM, .LsuAtomicM, .LsuAdrE, .LsuPAdrM,
|
||||||
.FinalWriteDataM, .ReadDataWordM, .DCacheStall,
|
.FinalWriteDataM, .ReadDataWordM, .DCacheStall,
|
||||||
.DCacheMiss, .DCacheAccess,
|
.DCacheMiss, .DCacheAccess,
|
||||||
.IgnoreRequest, .CacheableM, .DCacheCommittedM,
|
.IgnoreRequest, .CacheableM, .DCacheCommittedM,
|
||||||
.DCacheBusAdr, .ReadDataBlockSetsM, .DCacheMemWriteData,
|
.DCacheBusAdr, .ReadDataBlockSetsM, .DCacheMemWriteData,
|
||||||
.DCacheFetchLine, .DCacheWriteLine,.DCacheBusAck);
|
.DCacheFetchLine, .DCacheWriteLine,.DCacheBusAck);
|
||||||
end else begin : passthrough
|
end else begin : passthrough
|
||||||
assign ReadDataWordM = 0;
|
assign ReadDataWordM = 0;
|
||||||
assign DCacheStall = 0;
|
assign DCacheStall = 0;
|
||||||
assign DCacheMiss = 1;
|
assign DCacheMiss = 1;
|
||||||
assign DCacheAccess = CacheableM;
|
assign DCacheAccess = CacheableM;
|
||||||
assign DCacheCommittedM = 0;
|
assign DCacheCommittedM = 0;
|
||||||
assign DCacheWriteLine = 0;
|
assign DCacheWriteLine = 0;
|
||||||
assign DCacheFetchLine = 0;
|
assign DCacheFetchLine = 0;
|
||||||
assign DCacheBusAdr = 0;
|
assign DCacheBusAdr = 0;
|
||||||
assign ReadDataBlockSetsM[0] = 0;
|
assign ReadDataBlockSetsM[0] = 0;
|
||||||
end
|
end
|
||||||
endgenerate
|
endgenerate
|
||||||
|
|
||||||
|
|
||||||
|
@ -48,162 +48,154 @@ module hptw
|
|||||||
output logic [2:0] HPTWSize // 32 or 64 bit access.
|
output logic [2:0] HPTWSize // 32 or 64 bit access.
|
||||||
);
|
);
|
||||||
|
|
||||||
typedef enum {L0_ADR, L0_RD,
|
typedef enum {L0_ADR, L0_RD,
|
||||||
L1_ADR, L1_RD,
|
L1_ADR, L1_RD,
|
||||||
L2_ADR, L2_RD,
|
L2_ADR, L2_RD,
|
||||||
L3_ADR, L3_RD,
|
L3_ADR, L3_RD,
|
||||||
LEAF, IDLE} statetype; // *** placed outside generate statement to remove synthesis errors
|
LEAF, IDLE} statetype; // *** placed outside generate statement to remove synthesis errors
|
||||||
|
|
||||||
generate
|
logic DTLBWalk; // register TLBs translation miss requests
|
||||||
if (`MEM_VIRTMEM) begin:virtmem
|
logic [`PPN_BITS-1:0] BasePageTablePPN;
|
||||||
logic DTLBWalk; // register TLBs translation miss requests
|
logic [`PPN_BITS-1:0] CurrentPPN;
|
||||||
logic [`PPN_BITS-1:0] BasePageTablePPN;
|
logic MemWrite;
|
||||||
logic [`PPN_BITS-1:0] CurrentPPN;
|
logic Executable, Writable, Readable, Valid;
|
||||||
logic MemWrite;
|
logic Misaligned, MegapageMisaligned;
|
||||||
logic Executable, Writable, Readable, Valid;
|
logic ValidPTE, LeafPTE, ValidLeafPTE, ValidNonLeafPTE;
|
||||||
logic Misaligned, MegapageMisaligned;
|
logic StartWalk;
|
||||||
logic ValidPTE, LeafPTE, ValidLeafPTE, ValidNonLeafPTE;
|
logic TLBMiss;
|
||||||
logic StartWalk;
|
logic PRegEn;
|
||||||
logic TLBMiss;
|
logic [1:0] NextPageType;
|
||||||
logic PRegEn;
|
logic [`SVMODE_BITS-1:0] SvMode;
|
||||||
logic [1:0] NextPageType;
|
logic [`XLEN-1:0] TranslationVAdr;
|
||||||
logic [`SVMODE_BITS-1:0] SvMode;
|
|
||||||
logic [`XLEN-1:0] TranslationVAdr;
|
|
||||||
|
|
||||||
(* mark_debug = "true" *) statetype WalkerState, NextWalkerState, InitialWalkerState;
|
(* mark_debug = "true" *) statetype WalkerState, NextWalkerState, InitialWalkerState;
|
||||||
|
|
||||||
// Extract bits from CSRs and inputs
|
// Extract bits from CSRs and inputs
|
||||||
assign SvMode = SATP_REGW[`XLEN-1:`XLEN-`SVMODE_BITS];
|
assign SvMode = SATP_REGW[`XLEN-1:`XLEN-`SVMODE_BITS];
|
||||||
assign BasePageTablePPN = SATP_REGW[`PPN_BITS-1:0];
|
assign BasePageTablePPN = SATP_REGW[`PPN_BITS-1:0];
|
||||||
assign MemWrite = MemRWM[0];
|
assign MemWrite = MemRWM[0];
|
||||||
assign TLBMiss = (DTLBMissM | ITLBMissF);
|
assign TLBMiss = (DTLBMissM | ITLBMissF);
|
||||||
|
|
||||||
// Determine which address to translate
|
// Determine which address to translate
|
||||||
assign TranslationVAdr = DTLBWalk ? IEUAdrM : PCF;
|
assign TranslationVAdr = DTLBWalk ? IEUAdrM : PCF;
|
||||||
assign CurrentPPN = PTE[`PPN_BITS+9:10];
|
assign CurrentPPN = PTE[`PPN_BITS+9:10];
|
||||||
|
|
||||||
// State flops
|
// State flops
|
||||||
flopenr #(1) TLBMissMReg(clk, reset, StartWalk, DTLBMissM, DTLBWalk); // when walk begins, record whether it was for DTLB (or record 0 for ITLB)
|
flopenr #(1) TLBMissMReg(clk, reset, StartWalk, DTLBMissM, DTLBWalk); // when walk begins, record whether it was for DTLB (or record 0 for ITLB)
|
||||||
assign PRegEn = HPTWRead & ~DCacheStall;
|
assign PRegEn = HPTWRead & ~DCacheStall;
|
||||||
flopenr #(`XLEN) PTEReg(clk, reset, PRegEn, HPTWReadPTE, PTE); // Capture page table entry from data cache
|
flopenr #(`XLEN) PTEReg(clk, reset, PRegEn, HPTWReadPTE, PTE); // Capture page table entry from data cache
|
||||||
|
|
||||||
// Assign PTE descriptors common across all XLEN values
|
// Assign PTE descriptors common across all XLEN values
|
||||||
// For non-leaf PTEs, D, A, U bits are reserved and ignored. They do not cause faults while walking the page table
|
// For non-leaf PTEs, D, A, U bits are reserved and ignored. They do not cause faults while walking the page table
|
||||||
assign {Executable, Writable, Readable, Valid} = PTE[3:0];
|
assign {Executable, Writable, Readable, Valid} = PTE[3:0];
|
||||||
assign LeafPTE = Executable | Writable | Readable;
|
assign LeafPTE = Executable | Writable | Readable;
|
||||||
assign ValidPTE = Valid && ~(Writable && ~Readable);
|
assign ValidPTE = Valid && ~(Writable && ~Readable);
|
||||||
assign ValidLeafPTE = ValidPTE & LeafPTE;
|
assign ValidLeafPTE = ValidPTE & LeafPTE;
|
||||||
assign ValidNonLeafPTE = ValidPTE & ~LeafPTE;
|
assign ValidNonLeafPTE = ValidPTE & ~LeafPTE;
|
||||||
|
|
||||||
// Enable and select signals based on states
|
// Enable and select signals based on states
|
||||||
assign StartWalk = (WalkerState == IDLE) & TLBMiss;
|
assign StartWalk = (WalkerState == IDLE) & TLBMiss;
|
||||||
assign HPTWRead = (WalkerState == L3_RD) | (WalkerState == L2_RD) | (WalkerState == L1_RD) | (WalkerState == L0_RD);
|
assign HPTWRead = (WalkerState == L3_RD) | (WalkerState == L2_RD) | (WalkerState == L1_RD) | (WalkerState == L0_RD);
|
||||||
assign DTLBWriteM = (WalkerState == LEAF) & DTLBWalk;
|
assign DTLBWriteM = (WalkerState == LEAF) & DTLBWalk;
|
||||||
assign ITLBWriteF = (WalkerState == LEAF) & ~DTLBWalk;
|
assign ITLBWriteF = (WalkerState == LEAF) & ~DTLBWalk;
|
||||||
|
|
||||||
// FSM to track PageType based on the levels of the page table traversed
|
// FSM to track PageType based on the levels of the page table traversed
|
||||||
flopr #(2) PageTypeReg(clk, reset, NextPageType, PageType);
|
flopr #(2) PageTypeReg(clk, reset, NextPageType, PageType);
|
||||||
always_comb
|
always_comb
|
||||||
case (WalkerState)
|
case (WalkerState)
|
||||||
L3_RD: NextPageType = 2'b11; // terapage
|
L3_RD: NextPageType = 2'b11; // terapage
|
||||||
L2_RD: NextPageType = 2'b10; // gigapage
|
L2_RD: NextPageType = 2'b10; // gigapage
|
||||||
L1_RD: NextPageType = 2'b01; // megapage
|
L1_RD: NextPageType = 2'b01; // megapage
|
||||||
L0_RD: NextPageType = 2'b00; // kilopage
|
L0_RD: NextPageType = 2'b00; // kilopage
|
||||||
default: NextPageType = PageType;
|
default: NextPageType = PageType;
|
||||||
|
endcase
|
||||||
|
|
||||||
|
// HPTWAdr muxing
|
||||||
|
if (`XLEN==32) begin // RV32
|
||||||
|
logic [9:0] VPN;
|
||||||
|
logic [`PPN_BITS-1:0] PPN;
|
||||||
|
assign VPN = ((WalkerState == L1_ADR) | (WalkerState == L1_RD)) ? TranslationVAdr[31:22] : TranslationVAdr[21:12]; // select VPN field based on HPTW state
|
||||||
|
assign PPN = ((WalkerState == L1_ADR) | (WalkerState == L1_RD)) ? BasePageTablePPN : CurrentPPN;
|
||||||
|
assign HPTWAdr = {PPN, VPN, 2'b00};
|
||||||
|
assign HPTWSize = 3'b010;
|
||||||
|
end else begin // RV64
|
||||||
|
logic [8:0] VPN;
|
||||||
|
logic [`PPN_BITS-1:0] PPN;
|
||||||
|
always_comb
|
||||||
|
case (WalkerState) // select VPN field based on HPTW state
|
||||||
|
L3_ADR, L3_RD: VPN = TranslationVAdr[47:39];
|
||||||
|
L2_ADR, L2_RD: VPN = TranslationVAdr[38:30];
|
||||||
|
L1_ADR, L1_RD: VPN = TranslationVAdr[29:21];
|
||||||
|
default: VPN = TranslationVAdr[20:12];
|
||||||
endcase
|
endcase
|
||||||
|
assign PPN = ((WalkerState == L3_ADR) | (WalkerState == L3_RD) |
|
||||||
|
(SvMode != `SV48 & ((WalkerState == L2_ADR) | (WalkerState == L2_RD)))) ? BasePageTablePPN : CurrentPPN;
|
||||||
|
assign HPTWAdr = {PPN, VPN, 3'b000};
|
||||||
|
assign HPTWSize = 3'b011;
|
||||||
|
end
|
||||||
|
|
||||||
// HPTWAdr muxing
|
// Initial state and misalignment for RV32/64
|
||||||
if (`XLEN==32) begin // RV32
|
if (`XLEN == 32) begin
|
||||||
logic [9:0] VPN;
|
assign InitialWalkerState = L1_ADR;
|
||||||
logic [`PPN_BITS-1:0] PPN;
|
assign MegapageMisaligned = |(CurrentPPN[9:0]); // must have zero PPN0
|
||||||
assign VPN = ((WalkerState == L1_ADR) | (WalkerState == L1_RD)) ? TranslationVAdr[31:22] : TranslationVAdr[21:12]; // select VPN field based on HPTW state
|
// *** Possible bug - should be L1_ADR?
|
||||||
assign PPN = ((WalkerState == L1_ADR) | (WalkerState == L1_RD)) ? BasePageTablePPN : CurrentPPN;
|
assign Misaligned = ((WalkerState == L0_ADR) & MegapageMisaligned);
|
||||||
assign HPTWAdr = {PPN, VPN, 2'b00};
|
end else begin
|
||||||
assign HPTWSize = 3'b010;
|
logic GigapageMisaligned, TerapageMisaligned;
|
||||||
end else begin // RV64
|
assign InitialWalkerState = (SvMode == `SV48) ? L3_ADR : L2_ADR;
|
||||||
logic [8:0] VPN;
|
assign TerapageMisaligned = |(CurrentPPN[26:0]); // must have zero PPN2, PPN1, PPN0
|
||||||
logic [`PPN_BITS-1:0] PPN;
|
assign GigapageMisaligned = |(CurrentPPN[17:0]); // must have zero PPN1 and PPN0
|
||||||
always_comb
|
assign MegapageMisaligned = |(CurrentPPN[8:0]); // must have zero PPN0
|
||||||
case (WalkerState) // select VPN field based on HPTW state
|
assign Misaligned = ((WalkerState == L2_ADR) & TerapageMisaligned) | ((WalkerState == L1_ADR) & GigapageMisaligned) | ((WalkerState == L0_ADR) & MegapageMisaligned);
|
||||||
L3_ADR, L3_RD: VPN = TranslationVAdr[47:39];
|
end
|
||||||
L2_ADR, L2_RD: VPN = TranslationVAdr[38:30];
|
|
||||||
L1_ADR, L1_RD: VPN = TranslationVAdr[29:21];
|
|
||||||
default: VPN = TranslationVAdr[20:12];
|
|
||||||
endcase
|
|
||||||
assign PPN = ((WalkerState == L3_ADR) | (WalkerState == L3_RD) |
|
|
||||||
(SvMode != `SV48 & ((WalkerState == L2_ADR) | (WalkerState == L2_RD)))) ? BasePageTablePPN : CurrentPPN;
|
|
||||||
assign HPTWAdr = {PPN, VPN, 3'b000};
|
|
||||||
assign HPTWSize = 3'b011;
|
|
||||||
end
|
|
||||||
|
|
||||||
// Initial state and misalignment for RV32/64
|
// Page Table Walker FSM
|
||||||
if (`XLEN == 32) begin
|
|
||||||
assign InitialWalkerState = L1_ADR;
|
|
||||||
assign MegapageMisaligned = |(CurrentPPN[9:0]); // must have zero PPN0
|
|
||||||
// *** Possible bug - should be L1_ADR?
|
|
||||||
assign Misaligned = ((WalkerState == L0_ADR) & MegapageMisaligned);
|
|
||||||
end else begin
|
|
||||||
logic GigapageMisaligned, TerapageMisaligned;
|
|
||||||
assign InitialWalkerState = (SvMode == `SV48) ? L3_ADR : L2_ADR;
|
|
||||||
assign TerapageMisaligned = |(CurrentPPN[26:0]); // must have zero PPN2, PPN1, PPN0
|
|
||||||
assign GigapageMisaligned = |(CurrentPPN[17:0]); // must have zero PPN1 and PPN0
|
|
||||||
assign MegapageMisaligned = |(CurrentPPN[8:0]); // must have zero PPN0
|
|
||||||
assign Misaligned = ((WalkerState == L2_ADR) & TerapageMisaligned) | ((WalkerState == L1_ADR) & GigapageMisaligned) | ((WalkerState == L0_ADR) & MegapageMisaligned);
|
|
||||||
end
|
|
||||||
|
|
||||||
// Page Table Walker FSM
|
|
||||||
// If the setup time on the D$ RAM is short, it should be possible to merge the LEVELx_READ and LEVELx states
|
// If the setup time on the D$ RAM is short, it should be possible to merge the LEVELx_READ and LEVELx states
|
||||||
// to decrease the latency of the HPTW. However, if the D$ is a cycle limiter, it's better to leave the
|
// to decrease the latency of the HPTW. However, if the D$ is a cycle limiter, it's better to leave the
|
||||||
// HPTW as shown below to keep the D$ setup time out of the critical path.
|
// HPTW as shown below to keep the D$ setup time out of the critical path.
|
||||||
// *** Is this really true. Talk with Ross. Seems like it's the next state logic on critical path instead.
|
// *** Is this really true. Talk with Ross. Seems like it's the next state logic on critical path instead.
|
||||||
flopenl #(.TYPE(statetype)) WalkerStateReg(clk, reset, 1'b1, NextWalkerState, IDLE, WalkerState);
|
flopenl #(.TYPE(statetype)) WalkerStateReg(clk, reset, 1'b1, NextWalkerState, IDLE, WalkerState);
|
||||||
always_comb
|
always_comb
|
||||||
case (WalkerState)
|
case (WalkerState)
|
||||||
IDLE: if (TLBMiss) NextWalkerState = InitialWalkerState;
|
IDLE: if (TLBMiss) NextWalkerState = InitialWalkerState;
|
||||||
else NextWalkerState = IDLE;
|
else NextWalkerState = IDLE;
|
||||||
L3_ADR: NextWalkerState = L3_RD; // first access in SV48
|
L3_ADR: NextWalkerState = L3_RD; // first access in SV48
|
||||||
L3_RD: if (DCacheStall) NextWalkerState = L3_RD;
|
L3_RD: if (DCacheStall) NextWalkerState = L3_RD;
|
||||||
else NextWalkerState = L2_ADR;
|
else NextWalkerState = L2_ADR;
|
||||||
// LEVEL3: if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF;
|
// LEVEL3: if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF;
|
||||||
// else if (ValidNonLeafPTE) NextWalkerState = L2_ADR;
|
// else if (ValidNonLeafPTE) NextWalkerState = L2_ADR;
|
||||||
// else NextWalkerState = FAULT;
|
// else NextWalkerState = FAULT;
|
||||||
L2_ADR: if (InitialWalkerState == L2_ADR) NextWalkerState = L2_RD; // first access in SV39
|
L2_ADR: if (InitialWalkerState == L2_ADR) NextWalkerState = L2_RD; // first access in SV39
|
||||||
else if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF; // could shortcut this by a cyle for all Lx_ADR superpages
|
else if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF; // could shortcut this by a cyle for all Lx_ADR superpages
|
||||||
else if (ValidNonLeafPTE) NextWalkerState = L2_RD;
|
else if (ValidNonLeafPTE) NextWalkerState = L2_RD;
|
||||||
else NextWalkerState = LEAF;
|
else NextWalkerState = LEAF;
|
||||||
L2_RD: if (DCacheStall) NextWalkerState = L2_RD;
|
L2_RD: if (DCacheStall) NextWalkerState = L2_RD;
|
||||||
else NextWalkerState = L1_ADR;
|
else NextWalkerState = L1_ADR;
|
||||||
// LEVEL2: if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF;
|
// LEVEL2: if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF;
|
||||||
// else if (ValidNonLeafPTE) NextWalkerState = L1_ADR;
|
// else if (ValidNonLeafPTE) NextWalkerState = L1_ADR;
|
||||||
// else NextWalkerState = FAULT;
|
// else NextWalkerState = FAULT;
|
||||||
L1_ADR: if (InitialWalkerState == L1_ADR) NextWalkerState = L1_RD; // first access in SV32
|
L1_ADR: if (InitialWalkerState == L1_ADR) NextWalkerState = L1_RD; // first access in SV32
|
||||||
else if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF; // could shortcut this by a cyle for all Lx_ADR superpages
|
else if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF; // could shortcut this by a cyle for all Lx_ADR superpages
|
||||||
else if (ValidNonLeafPTE) NextWalkerState = L1_RD;
|
else if (ValidNonLeafPTE) NextWalkerState = L1_RD;
|
||||||
else NextWalkerState = LEAF;
|
else NextWalkerState = LEAF;
|
||||||
L1_RD: if (DCacheStall) NextWalkerState = L1_RD;
|
L1_RD: if (DCacheStall) NextWalkerState = L1_RD;
|
||||||
else NextWalkerState = L0_ADR;
|
else NextWalkerState = L0_ADR;
|
||||||
// LEVEL1: if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF;
|
// LEVEL1: if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF;
|
||||||
// else if (ValidNonLeafPTE) NextWalkerState = L0_ADR;
|
// else if (ValidNonLeafPTE) NextWalkerState = L0_ADR;
|
||||||
// else NextWalkerState = FAULT;
|
// else NextWalkerState = FAULT;
|
||||||
L0_ADR: if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF; // could shortcut this by a cyle for all Lx_ADR superpages
|
L0_ADR: if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF; // could shortcut this by a cyle for all Lx_ADR superpages
|
||||||
else if (ValidNonLeafPTE) NextWalkerState = L0_RD;
|
else if (ValidNonLeafPTE) NextWalkerState = L0_RD;
|
||||||
else NextWalkerState = LEAF;
|
else NextWalkerState = LEAF;
|
||||||
L0_RD: if (DCacheStall) NextWalkerState = L0_RD;
|
L0_RD: if (DCacheStall) NextWalkerState = L0_RD;
|
||||||
else NextWalkerState = LEAF;
|
else NextWalkerState = LEAF;
|
||||||
// LEVEL0: if (ValidLeafPTE) NextWalkerState = LEAF;
|
// LEVEL0: if (ValidLeafPTE) NextWalkerState = LEAF;
|
||||||
// else NextWalkerState = FAULT;
|
// else NextWalkerState = FAULT;
|
||||||
LEAF: NextWalkerState = IDLE; // updates TLB
|
LEAF: NextWalkerState = IDLE; // updates TLB
|
||||||
default: begin
|
default: begin
|
||||||
// synthesis translate_off
|
// synthesis translate_off
|
||||||
$error("Default state in HPTW should be unreachable");
|
$error("Default state in HPTW should be unreachable");
|
||||||
// synthesis translate_on
|
// synthesis translate_on
|
||||||
NextWalkerState = IDLE; // should never be reached
|
NextWalkerState = IDLE; // should never be reached
|
||||||
end
|
end
|
||||||
endcase
|
endcase
|
||||||
end else begin // No Virtual memory supported; tie HPTW outputs to 0
|
|
||||||
assign HPTWRead = 0;
|
|
||||||
assign HPTWAdr = 0;
|
|
||||||
assign HPTWSize = 3'b000;
|
|
||||||
end
|
|
||||||
endgenerate
|
|
||||||
endmodule
|
endmodule
|
||||||
|
@ -1,101 +0,0 @@
|
|||||||
///////////////////////////////////////////
|
|
||||||
// mul_cs.sv
|
|
||||||
//
|
|
||||||
// Written: james.stine@okstate.edu 17 October 2021
|
|
||||||
// Modified:
|
|
||||||
//
|
|
||||||
// Purpose: Carry/Save Multiplier output with Wallace Reduction
|
|
||||||
//
|
|
||||||
// A component of the Wally configurable RISC-V project.
|
|
||||||
//
|
|
||||||
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
|
|
||||||
//
|
|
||||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
|
|
||||||
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
|
|
||||||
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
|
|
||||||
// is furnished to do so, subject to the following conditions:
|
|
||||||
//
|
|
||||||
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
|
||||||
//
|
|
||||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
||||||
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
|
||||||
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
|
|
||||||
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
||||||
///////////////////////////////////////////
|
|
||||||
|
|
||||||
module mult_cs #(parameter WIDTH = 8)
|
|
||||||
(a, b, tc, sum, carry);
|
|
||||||
|
|
||||||
input logic [WIDTH-1:0] a;
|
|
||||||
input logic [WIDTH-1:0] b;
|
|
||||||
input logic tc;
|
|
||||||
|
|
||||||
output logic [2*WIDTH-1:0] sum;
|
|
||||||
output logic [2*WIDTH-1:0] carry;
|
|
||||||
|
|
||||||
// PP array
|
|
||||||
logic [2*WIDTH-1:0] pp_array [0:WIDTH-1];
|
|
||||||
logic [2*WIDTH-1:0] next_pp_array [0:WIDTH-1];
|
|
||||||
logic [2*WIDTH-1:0] tmp_sum, tmp_carry;
|
|
||||||
logic [2*WIDTH-1:0] temp_pp;
|
|
||||||
logic [2*WIDTH-1:0] tmp_pp_carry;
|
|
||||||
logic [WIDTH-1:0] temp_b;
|
|
||||||
logic temp_bitgroup;
|
|
||||||
integer bit_pair, height, i;
|
|
||||||
|
|
||||||
always_comb
|
|
||||||
begin
|
|
||||||
// For each multiplicand PP generation
|
|
||||||
for (bit_pair=0; bit_pair < WIDTH; bit_pair=bit_pair+1)
|
|
||||||
begin
|
|
||||||
// Shift to the right via P&H
|
|
||||||
temp_b = (b >> (bit_pair));
|
|
||||||
temp_bitgroup = temp_b[0];
|
|
||||||
// PP generation
|
|
||||||
case (temp_bitgroup)
|
|
||||||
1'b0 : temp_pp = {2*WIDTH-1{1'b0}};
|
|
||||||
1'b1 : temp_pp = a;
|
|
||||||
default : temp_pp = {2*WIDTH-1{1'b0}};
|
|
||||||
endcase
|
|
||||||
// Shift to the left via P&H
|
|
||||||
temp_pp = temp_pp << (bit_pair);
|
|
||||||
pp_array[bit_pair] = temp_pp;
|
|
||||||
end
|
|
||||||
|
|
||||||
// Height is multiplier
|
|
||||||
height = WIDTH;
|
|
||||||
|
|
||||||
// Wallace Tree PP reduction
|
|
||||||
while (height > 2)
|
|
||||||
begin
|
|
||||||
for (i=0; i < (height/3); i=i+1)
|
|
||||||
begin
|
|
||||||
next_pp_array[i*2] = pp_array[i*3]^pp_array[i*3+1]^pp_array[i*3+2];
|
|
||||||
tmp_pp_carry = (pp_array[i*3] & pp_array[i*3+1]) |
|
|
||||||
(pp_array[i*3+1] & pp_array[i*3+2]) |
|
|
||||||
(pp_array[i*3] & pp_array[i*3+2]);
|
|
||||||
next_pp_array[i*2+1] = tmp_pp_carry << 1;
|
|
||||||
end
|
|
||||||
// Reasssign not divisible by 3 rows to next_pp_array
|
|
||||||
if ((height % 3) > 0)
|
|
||||||
begin
|
|
||||||
for (i=0; i < (height % 3); i=i+1)
|
|
||||||
next_pp_array[2 * (height/3) + i] = pp_array[3 * (height/3) + i];
|
|
||||||
end
|
|
||||||
// Put back values in pp_array to start again
|
|
||||||
for (i=0; i < WIDTH; i=i+1)
|
|
||||||
pp_array[i] = next_pp_array[i];
|
|
||||||
// Reduce height
|
|
||||||
height = height - (height/3);
|
|
||||||
end
|
|
||||||
// Sum is first row in reduced array
|
|
||||||
tmp_sum = pp_array[0];
|
|
||||||
// Carry is second row in reduced array
|
|
||||||
tmp_carry = pp_array[1];
|
|
||||||
end
|
|
||||||
|
|
||||||
assign sum = tmp_sum;
|
|
||||||
assign carry = tmp_carry;
|
|
||||||
|
|
||||||
endmodule // mult_cs
|
|
||||||
|
|
@ -47,8 +47,6 @@ module redundantmul #(parameter WIDTH =8)(
|
|||||||
DW02_multp #(WIDTH, WIDTH, 2*WIDTH+2) mul(.a, .b, .tc(1'b0), .out0(tmp_out0), .out1(tmp_out1));
|
DW02_multp #(WIDTH, WIDTH, 2*WIDTH+2) mul(.a, .b, .tc(1'b0), .out0(tmp_out0), .out1(tmp_out1));
|
||||||
assign out0 = tmp_out0[2*WIDTH-1:0];
|
assign out0 = tmp_out0[2*WIDTH-1:0];
|
||||||
assign out1 = tmp_out1[2*WIDTH-1:0];
|
assign out1 = tmp_out1[2*WIDTH-1:0];
|
||||||
end else if (`DESIGN_COMPILER == 2) begin:mul // *** need to remove this
|
|
||||||
mult_cs #(WIDTH) mul(.a, .b, .tc(1'b0), .sum(out0), .carry(out1));
|
|
||||||
end else begin:mul // force a nonredunant multipler. This will simulate properly and also is appropriate for FPGAs.
|
end else begin:mul // force a nonredunant multipler. This will simulate properly and also is appropriate for FPGAs.
|
||||||
assign out0 = a * b;
|
assign out0 = a * b;
|
||||||
assign out1 = 0;
|
assign out1 = 0;
|
||||||
|
Loading…
Reference in New Issue
Block a user