From cd0da2e3b335d4c7b28aff83e62b39874989aba2 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Tue, 23 Aug 2022 10:34:39 -0500 Subject: [PATCH] Updated the names of the *WriteDataM inside the LSU to more meaningful names. Moved the FWriteDataMux so that the bus and dtim both get fpu stores. Modified the PMA to disallow double sized reads when XLEN=32. --- pipelined/src/generic/flop/simpleram.sv | 10 +++--- pipelined/src/ifu/ifu.sv | 2 +- pipelined/src/lsu/atomic.sv | 8 ++--- pipelined/src/lsu/dtim.sv | 8 ++--- pipelined/src/lsu/lsu.sv | 45 +++++++++++-------------- pipelined/src/lsu/lsuvirtmen.sv | 6 ++-- pipelined/src/lsu/subwordwrite.sv | 31 +++++++++++------ pipelined/src/mmu/adrdecs.sv | 12 +++---- 8 files changed, 63 insertions(+), 59 deletions(-) diff --git a/pipelined/src/generic/flop/simpleram.sv b/pipelined/src/generic/flop/simpleram.sv index b08021614..08c7d113b 100644 --- a/pipelined/src/generic/flop/simpleram.sv +++ b/pipelined/src/generic/flop/simpleram.sv @@ -34,15 +34,15 @@ module simpleram #(parameter BASE=0, RANGE = 65535) ( input logic clk, input logic [31:0] a, input logic we, - input logic [`XLEN/8-1:0] ByteMask, - input logic [`XLEN-1:0] wd, - output logic [`XLEN-1:0] rd + input logic [`LLEN/8-1:0] ByteMask, + input logic [`LLEN-1:0] wd, + output logic [`LLEN-1:0] rd ); localparam ADDR_WDITH = $clog2(RANGE/8); - localparam OFFSET = $clog2(`XLEN/8); + localparam OFFSET = $clog2(`LLEN/8); - bram1p1rw #(`XLEN/8, 8, ADDR_WDITH) + bram1p1rw #(`LLEN/8, 8, ADDR_WDITH) memory(.clk, .we, .bwe(ByteMask), .addr(a[ADDR_WDITH+OFFSET-1:OFFSET]), .dout(rd), .din(wd)); endmodule diff --git a/pipelined/src/ifu/ifu.sv b/pipelined/src/ifu/ifu.sv index b8e636c5f..e699bc576 100644 --- a/pipelined/src/ifu/ifu.sv +++ b/pipelined/src/ifu/ifu.sv @@ -187,7 +187,7 @@ module ifu ( if (`IMEM == `MEM_TIM) begin : irom // *** fix up dtim taking PA_BITS rather than XLEN, *** IEUAdr is a bad name. Probably use a ROM rather than DTIM dtim irom(.clk, .reset, .CPUBusy, .LSURWM(2'b10), .IEUAdrM({{(`XLEN-32){1'b0}}, PCPF[31:0]}), .IEUAdrE(PCNextFSpill), - .TrapM(1'b0), .FinalWriteDataM(), .ByteMaskM('0), + .TrapM(1'b0), .WriteDataM(), .ByteMaskM('0), .ReadDataWordM({{(`XLEN-32){1'b0}}, FinalInstrRawF}), .BusStall, .LSUBusWrite(), .LSUBusRead(IFUBusRead), .BusCommittedM(), .DCacheStallM(ICacheStallF), .Cacheable(CacheableF), .DCacheCommittedM(), .DCacheMiss(ICacheMiss), .DCacheAccess(ICacheAccess)); diff --git a/pipelined/src/lsu/atomic.sv b/pipelined/src/lsu/atomic.sv index 5a0753974..2c7259a19 100644 --- a/pipelined/src/lsu/atomic.sv +++ b/pipelined/src/lsu/atomic.sv @@ -34,23 +34,23 @@ module atomic ( input logic clk, input logic reset, StallW, input logic [`XLEN-1:0] ReadDataM, - input logic [`XLEN-1:0] LSUWriteDataM, + input logic [`XLEN-1:0] IMWriteDataM, input logic [`PA_BITS-1:0] LSUPAdrM, input logic [6:0] LSUFunct7M, input logic [2:0] LSUFunct3M, input logic [1:0] LSUAtomicM, input logic [1:0] PreLSURWM, input logic IgnoreRequest, - output logic [`XLEN-1:0] AMOWriteDataM, + output logic [`XLEN-1:0] IMAWriteDataM, output logic SquashSCW, output logic [1:0] LSURWM); logic [`XLEN-1:0] AMOResult; logic MemReadM; - amoalu amoalu(.srca(ReadDataM), .srcb(LSUWriteDataM), .funct(LSUFunct7M), .width(LSUFunct3M[1:0]), + amoalu amoalu(.srca(ReadDataM), .srcb(IMWriteDataM), .funct(LSUFunct7M), .width(LSUFunct3M[1:0]), .result(AMOResult)); - mux2 #(`XLEN) wdmux(LSUWriteDataM, AMOResult, LSUAtomicM[1], AMOWriteDataM); + mux2 #(`XLEN) wdmux(IMWriteDataM, AMOResult, LSUAtomicM[1], IMAWriteDataM); assign MemReadM = PreLSURWM[1] & ~IgnoreRequest; lrsc lrsc(.clk, .reset, .StallW, .MemReadM, .PreLSURWM, .LSUAtomicM, .LSUPAdrM, .SquashSCW, .LSURWM); diff --git a/pipelined/src/lsu/dtim.sv b/pipelined/src/lsu/dtim.sv index 5b4969ab8..4dcbda665 100644 --- a/pipelined/src/lsu/dtim.sv +++ b/pipelined/src/lsu/dtim.sv @@ -36,10 +36,10 @@ module dtim( input logic [`XLEN-1:0] IEUAdrM, input logic [`XLEN-1:0] IEUAdrE, input logic TrapM, - input logic [`XLEN-1:0] FinalWriteDataM, - input logic [`XLEN/8-1:0] ByteMaskM, + input logic [`LLEN-1:0] WriteDataM, + input logic [`LLEN/8-1:0] ByteMaskM, input logic Cacheable, - output logic [`XLEN-1:0] ReadDataWordM, + output logic [`LLEN-1:0] ReadDataWordM, output logic BusStall, output logic LSUBusWrite, output logic LSUBusRead, @@ -53,7 +53,7 @@ module dtim( .clk, .ByteMask(ByteMaskM), .a(CPUBusy | LSURWM[0] | reset ? IEUAdrM[31:0] : IEUAdrE[31:0]), // move mux out; this shouldn't be needed when stails are handled differently *** .we(LSURWM[0] & Cacheable & ~TrapM), // have to ignore write if Trap. - .wd(FinalWriteDataM), .rd(ReadDataWordM)); + .wd(WriteDataM), .rd(ReadDataWordM)); // since we have a local memory the bus connections are all disabled. // There are no peripherals supported. diff --git a/pipelined/src/lsu/lsu.sv b/pipelined/src/lsu/lsu.sv index cf44fa297..cb37e1ef3 100644 --- a/pipelined/src/lsu/lsu.sv +++ b/pipelined/src/lsu/lsu.sv @@ -110,9 +110,10 @@ module lsu ( logic BusCommittedM, DCacheCommittedM; logic SelLSUBusWord; logic DataDAPageFaultM; - logic [`XLEN-1:0] LSUWriteDataM; + logic [`XLEN-1:0] IMWriteDataM, IMAWriteDataM; + logic [`LLEN-1:0] IMAFWriteDataM; logic [`LLEN-1:0] ReadDataM; - logic [(`LLEN-1)/8:0] ByteMaskM, FinalByteMaskM; + logic [(`LLEN-1)/8:0] ByteMaskM; // *** TO DO: Burst mode @@ -131,7 +132,7 @@ module lsu ( .TrapM, .DCacheStallM, .SATP_REGW, .PCF, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, .PrivilegeModeW, .ReadDataM(ReadDataM[`XLEN-1:0]), .WriteDataM, .Funct3M, .LSUFunct3M, .Funct7M, .LSUFunct7M, - .IEUAdrExtM, .PTE, .LSUWriteDataM, .PageType, .PreLSURWM, .LSUAtomicM, .IEUAdrE, + .IEUAdrExtM, .PTE, .IMWriteDataM, .PageType, .PreLSURWM, .LSUAtomicM, .IEUAdrE, .LSUAdrE, .PreLSUPAdrM, .CPUBusy, .InterlockStall, .SelHPTW, .IgnoreRequestTLB); end else begin @@ -140,7 +141,7 @@ module lsu ( assign LSUAdrE = IEUAdrE[11:0]; assign PreLSUPAdrM = IEUAdrExtM; assign LSUFunct3M = Funct3M; assign LSUFunct7M = Funct7M; assign LSUAtomicM = AtomicM; - assign LSUWriteDataM = WriteDataM; + assign IMWriteDataM = WriteDataM; end // CommittedM tells the CPU's privilege unit the current instruction @@ -188,8 +189,7 @@ module lsu ( // Memory System // Either Data Cache or Data Tightly Integrated Memory or just bus interface ///////////////////////////////////////////////////////////////////////////////////////////// - logic [`XLEN-1:0] AMOWriteDataM, IEUWriteDataM, LittleEndianWriteDataM; - logic [`LLEN-1:0] FinalWriteDataM; + logic [`LLEN-1:0] LSUWriteDataM, LittleEndianWriteDataM; logic [`LLEN-1:0] ReadDataWordM, LittleEndianReadDataWordM; logic [`LLEN-1:0] ReadDataWordMuxM; logic IgnoreRequest; @@ -202,7 +202,7 @@ module lsu ( if (`DMEM == `MEM_TIM) begin : dtim // *** directly instantiate RAM or ROM here. Instantiate SRAM1P1RW. // Merge SimpleRAM and SRAM1p1rw into one that is good for synthesis and RAM libraries and flops - dtim dtim(.clk, .reset, .CPUBusy, .LSURWM, .IEUAdrM, .IEUAdrE, .TrapM, .FinalWriteDataM(IEUWriteDataM), //*** fix the dtim FinalWriteData + dtim dtim(.clk, .reset, .CPUBusy, .LSURWM, .IEUAdrM, .IEUAdrE, .TrapM, .WriteDataM(LSUWriteDataM), //*** fix the dtim FinalWriteData .ReadDataWordM(ReadDataWordM[`XLEN-1:0]), .BusStall, .LSUBusWrite,.LSUBusRead, .BusCommittedM, .DCacheStallM, .DCacheCommittedM, .ByteMaskM(ByteMaskM[`XLEN/8-1:0]), .Cacheable(CacheableM), .DCacheMiss, .DCacheAccess); @@ -230,23 +230,15 @@ module lsu ( mux2 #(`LLEN) UnCachedDataMux(.d0(LittleEndianReadDataWordM), .d1({{`LLEN-`XLEN{1'b0}}, DLSUBusBuffer[`XLEN-1:0]}), .s(SelUncachedAdr), .y(ReadDataWordMuxM)); - mux2 #(`XLEN) LsuBushwdataMux(.d0(ReadDataWordM[`XLEN-1:0]), .d1(IEUWriteDataM), + mux2 #(`XLEN) LsuBushwdataMux(.d0(ReadDataWordM[`XLEN-1:0]), .d1(LSUWriteDataM), .s(SelUncachedAdr), .y(LSUBusHWDATA)); - - // *** Ross fix up location of mux to be here; remove from IEU datapath - // *** look over entire FPU write and read paths - // *** Why is if(CACHE_ENABLED) begin : dcache - if (`F_SUPPORTED) - mux2 #(`LLEN) datamux({{`LLEN/`XLEN}{IEUWriteDataM}}, FWriteDataM, FpLoadStoreM, FinalWriteDataM); - else - assign FinalWriteDataM = IEUWriteDataM; cache #(.LINELEN(`DCACHE_LINELENINBITS), .NUMLINES(`DCACHE_WAYSIZEINBYTES*8/LINELEN), .NUMWAYS(`DCACHE_NUMWAYS), .LOGBWPL(LOGBWPL), .WORDLEN(`LLEN), .MUXINTERVAL(`XLEN), .DCACHE(1)) dcache( .clk, .reset, .CPUBusy, .SelLSUBusWord, .RW(LSURWM), .Atomic(LSUAtomicM), .FlushCache(FlushDCacheM), .NextAdr(LSUAdrE), .PAdr(LSUPAdrM), - .ByteMask(FinalByteMaskM), .WordCount, - .FinalWriteData(FinalWriteDataM), .Cacheable(CacheableM), + .ByteMask(ByteMaskM), .WordCount, + .FinalWriteData(LSUWriteDataM), .Cacheable(CacheableM), .CacheStall(DCacheStallM), .CacheMiss(DCacheMiss), .CacheAccess(DCacheAccess), .IgnoreRequestTLB, .TrapM, .CacheCommitted(DCacheCommittedM), .CacheBusAdr(DCacheBusAdr), .ReadDataWord(ReadDataWordM), @@ -266,24 +258,27 @@ module lsu ( // Atomic operations ///////////////////////////////////////////////////////////////////////////////////////////// if (`A_SUPPORTED) begin:atomic - atomic atomic(.clk, .reset, .StallW, .ReadDataM(ReadDataM[`XLEN-1:0]), .LSUWriteDataM, .LSUPAdrM, + atomic atomic(.clk, .reset, .StallW, .ReadDataM(ReadDataM[`XLEN-1:0]), .IMWriteDataM, .LSUPAdrM, .LSUFunct7M, .LSUFunct3M, .LSUAtomicM, .PreLSURWM, .IgnoreRequest, - .AMOWriteDataM, .SquashSCW, .LSURWM); + .IMAWriteDataM, .SquashSCW, .LSURWM); end else begin:lrsc - assign SquashSCW = 0; assign LSURWM = PreLSURWM; assign AMOWriteDataM = LSUWriteDataM; + assign SquashSCW = 0; assign LSURWM = PreLSURWM; assign IMAWriteDataM = IMWriteDataM; end + if (`F_SUPPORTED) + mux2 #(`LLEN) datamux({{`LLEN/`XLEN}{IMAWriteDataM}}, FWriteDataM, FpLoadStoreM, IMAFWriteDataM); + else assign IMAFWriteDataM = IMAWriteDataM; + ///////////////////////////////////////////////////////////////////////////////////////////// // Subword Accesses ///////////////////////////////////////////////////////////////////////////////////////////// subwordread subwordread(.ReadDataWordMuxM, .LSUPAdrM(LSUPAdrM[2:0]), .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); subwordwrite subwordwrite(.LSUPAdrM(LSUPAdrM[2:0]), - .LSUFunct3M, .AMOWriteDataM, .LittleEndianWriteDataM); + .LSUFunct3M, .IMAFWriteDataM, .LittleEndianWriteDataM); // Compute byte masks swbytemaskword #(`LLEN) swbytemask(.Size(LSUFunct3M), .Adr(LSUPAdrM[$clog2(`LLEN/8)-1:0]), .ByteMask(ByteMaskM)); - assign FinalByteMaskM = ByteMaskM; ///////////////////////////////////////////////////////////////////////////////////////////// // MW Pipeline Register @@ -297,10 +292,10 @@ module lsu ( // swap the bytes when read from big-endian memory ///////////////////////////////////////////////////////////////////////////////////////////// if (`BIGENDIAN_SUPPORTED) begin:endian - bigendianswap #(`XLEN) storeswap(.BigEndianM, .a(LittleEndianWriteDataM), .y(IEUWriteDataM)); + bigendianswap #(`LLEN) storeswap(.BigEndianM, .a(LittleEndianWriteDataM), .y(LSUWriteDataM)); bigendianswap #(`LLEN) loadswap(.BigEndianM, .a(ReadDataWordM), .y(LittleEndianReadDataWordM)); end else begin - assign IEUWriteDataM = LittleEndianWriteDataM; + assign LSUWriteDataM = LittleEndianWriteDataM; assign LittleEndianReadDataWordM = ReadDataWordM; end diff --git a/pipelined/src/lsu/lsuvirtmen.sv b/pipelined/src/lsu/lsuvirtmen.sv index 748aa3df0..a2d7c6285 100644 --- a/pipelined/src/lsu/lsuvirtmen.sv +++ b/pipelined/src/lsu/lsuvirtmen.sv @@ -54,7 +54,7 @@ module lsuvirtmem( output logic [6:0] LSUFunct7M, input logic [`XLEN-1:0] IEUAdrE, output logic [`XLEN-1:0] PTE, - output logic [`XLEN-1:0] LSUWriteDataM, + output logic [`XLEN-1:0] IMWriteDataM, output logic [1:0] PageType, output logic [1:0] PreLSURWM, output logic [1:0] LSUAtomicM, @@ -112,8 +112,8 @@ module lsuvirtmem( mux2 #(12) adremux(IEUAdrE[11:0], HPTWAdr[11:0], SelHPTW, PreLSUAdrE); mux2 #(`XLEN+2) lsupadrmux(IEUAdrExtM, HPTWAdrExt, SelHPTWAdr, PreLSUPAdrM); if(`HPTW_WRITES_SUPPORTED) - mux2 #(`XLEN) lsuwritedatamux(WriteDataM, PTE, SelHPTW, LSUWriteDataM); - else assign LSUWriteDataM = WriteDataM; + mux2 #(`XLEN) lsuwritedatamux(WriteDataM, PTE, SelHPTW, IMWriteDataM); + else assign IMWriteDataM = WriteDataM; mux2 #(12) replaymux(PreLSUAdrE, IEUAdrExtM[11:0], SelReplayMemE, LSUAdrE); // replay cpu request after hptw. *** redudant with mux in cache. // always block interrupts when using the hardware page table walker. diff --git a/pipelined/src/lsu/subwordwrite.sv b/pipelined/src/lsu/subwordwrite.sv index d42033ef7..59546ec74 100644 --- a/pipelined/src/lsu/subwordwrite.sv +++ b/pipelined/src/lsu/subwordwrite.sv @@ -33,25 +33,34 @@ module subwordwrite ( input logic [2:0] LSUPAdrM, input logic [2:0] LSUFunct3M, - input logic [`XLEN-1:0] AMOWriteDataM, - output logic [`XLEN-1:0] LittleEndianWriteDataM); + input logic [`LLEN-1:0] IMAFWriteDataM, + output logic [`LLEN-1:0] LittleEndianWriteDataM); // Replicate data for subword writes - if (`XLEN == 64) begin:sww + if (`LLEN == 128) begin:sww + always_comb + case(LSUFunct3M[2:0]) + 2'b000: LittleEndianWriteDataM = {16{IMAFWriteDataM[7:0]}}; // sb + 2'b001: LittleEndianWriteDataM = {8{IMAFWriteDataM[15:0]}}; // sh + 2'b010: LittleEndianWriteDataM = {4{IMAFWriteDataM[31:0]}}; // sw + 2'b011: LittleEndianWriteDataM = {2{IMAFWriteDataM[63:0]}}; // sd + default: LittleEndianWriteDataM = IMAFWriteDataM; // sq + endcase + end else if (`LLEN == 64) begin:sww always_comb case(LSUFunct3M[1:0]) - 2'b00: LittleEndianWriteDataM = {8{AMOWriteDataM[7:0]}}; // sb - 2'b01: LittleEndianWriteDataM = {4{AMOWriteDataM[15:0]}}; // sh - 2'b10: LittleEndianWriteDataM = {2{AMOWriteDataM[31:0]}}; // sw - 2'b11: LittleEndianWriteDataM = AMOWriteDataM; // sw + 2'b00: LittleEndianWriteDataM = {8{IMAFWriteDataM[7:0]}}; // sb + 2'b01: LittleEndianWriteDataM = {4{IMAFWriteDataM[15:0]}}; // sh + 2'b10: LittleEndianWriteDataM = {2{IMAFWriteDataM[31:0]}}; // sw + 2'b11: LittleEndianWriteDataM = IMAFWriteDataM; // sd endcase end else begin:sww // 32-bit always_comb case(LSUFunct3M[1:0]) - 2'b00: LittleEndianWriteDataM = {4{AMOWriteDataM[7:0]}}; // sb - 2'b01: LittleEndianWriteDataM = {2{AMOWriteDataM[15:0]}}; // sh - 2'b10: LittleEndianWriteDataM = AMOWriteDataM; // sw - default: LittleEndianWriteDataM = AMOWriteDataM; // shouldn't happen + 2'b00: LittleEndianWriteDataM = {4{IMAFWriteDataM[7:0]}}; // sb + 2'b01: LittleEndianWriteDataM = {2{IMAFWriteDataM[15:0]}}; // sh + 2'b10: LittleEndianWriteDataM = IMAFWriteDataM; // sw + default: LittleEndianWriteDataM = IMAFWriteDataM; // shouldn't happen endcase end endmodule diff --git a/pipelined/src/mmu/adrdecs.sv b/pipelined/src/mmu/adrdecs.sv index 0104ca578..3923c2a67 100644 --- a/pipelined/src/mmu/adrdecs.sv +++ b/pipelined/src/mmu/adrdecs.sv @@ -38,17 +38,17 @@ module adrdecs ( output logic [8:0] SelRegions ); + localparam logic [3:0] SUPPORTED_SIZE = (`XLEN == 64 ? 4'b1111 : 4'b0111); // Determine which region of physical memory (if any) is being accessed - // *** eventually uncomment Access signals - adrdec ddr4dec(PhysicalAddress, `EXT_MEM_BASE, `EXT_MEM_RANGE, `EXT_MEM_SUPPORTED, AccessRWX, Size, 4'b1111, SelRegions[7]); - adrdec boottimdec(PhysicalAddress, `BOOTROM_BASE, `BOOTROM_RANGE, `BOOTROM_SUPPORTED, /*1'b1*/AccessRX, Size, 4'b1111, SelRegions[6]); - adrdec timdec(PhysicalAddress, `RAM_BASE, `RAM_RANGE, `RAM_SUPPORTED, /*1'b1*/AccessRWX, Size, 4'b1111, SelRegions[5]); + adrdec ddr4dec(PhysicalAddress, `EXT_MEM_BASE, `EXT_MEM_RANGE, `EXT_MEM_SUPPORTED, AccessRWX, Size, SUPPORTED_SIZE, SelRegions[7]); + adrdec boottimdec(PhysicalAddress, `BOOTROM_BASE, `BOOTROM_RANGE, `BOOTROM_SUPPORTED, AccessRX, Size, SUPPORTED_SIZE, SelRegions[6]); + adrdec timdec(PhysicalAddress, `RAM_BASE, `RAM_RANGE, `RAM_SUPPORTED, AccessRWX, Size, SUPPORTED_SIZE, SelRegions[5]); - adrdec clintdec(PhysicalAddress, `CLINT_BASE, `CLINT_RANGE, `CLINT_SUPPORTED, AccessRW, Size, 4'b1111, SelRegions[4]); + adrdec clintdec(PhysicalAddress, `CLINT_BASE, `CLINT_RANGE, `CLINT_SUPPORTED, AccessRW, Size, SUPPORTED_SIZE, SelRegions[4]); adrdec gpiodec(PhysicalAddress, `GPIO_BASE, `GPIO_RANGE, `GPIO_SUPPORTED, AccessRW, Size, 4'b0100, SelRegions[3]); adrdec uartdec(PhysicalAddress, `UART_BASE, `UART_RANGE, `UART_SUPPORTED, AccessRW, Size, 4'b0001, SelRegions[2]); adrdec plicdec(PhysicalAddress, `PLIC_BASE, `PLIC_RANGE, `PLIC_SUPPORTED, AccessRW, Size, 4'b0100, SelRegions[1]); - adrdec sdcdec(PhysicalAddress, `SDC_BASE, `SDC_RANGE, `SDC_SUPPORTED, AccessRW, Size, 4'b1100, SelRegions[0]); // *** PMA chapter says xlen only like CLINT + adrdec sdcdec(PhysicalAddress, `SDC_BASE, `SDC_RANGE, `SDC_SUPPORTED, AccessRW, Size, SUPPORTED_SIZE & 4'b1100, SelRegions[0]); assign SelRegions[8] = ~|(SelRegions[7:0]);