diff --git a/pipelined/src/generic/flop/bram1p1rw.sv b/pipelined/src/generic/flop/bram1p1rw.sv new file mode 100644 index 00000000..2b17fb44 --- /dev/null +++ b/pipelined/src/generic/flop/bram1p1rw.sv @@ -0,0 +1,71 @@ +/////////////////////////////////////////// +// block ram model should be equivalent to srsam. +// +// Written: Ross Thompson +// March 29, 2022 +// Modified: Based on UG901 vivado documentation. +// +// Purpose: On-chip SIMPLERAM, external to core +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// MIT LICENSE +// Permission is hereby granted, free of charge, to any person obtaining a copy of this +// software and associated documentation files (the "Software"), to deal in the Software +// without restriction, including without limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +// to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +//////////////////////////////////////////////////////////////////////////////////////////////// + +// This model actually works correctly with vivado. + +`include "wally-config.vh" + +module bram1p1rw + #( + //-------------------------------------------------------------------------- + parameter NUM_COL = 8, + parameter COL_WIDTH = 8, + parameter ADDR_WIDTH = 10, + // Addr Width in bits : 2 *ADDR_WIDTH = RAM Depth + parameter DATA_WIDTH = NUM_COL*COL_WIDTH // Data Width in bits + //---------------------------------------------------------------------- + ) ( + input logic clk, + input logic ena, + input logic [NUM_COL-1:0] we, + input logic [ADDR_WIDTH-1:0] addr, + output logic [DATA_WIDTH-1:0] dout, + input logic [DATA_WIDTH-1:0] din + ); + // Core Memory + logic [DATA_WIDTH-1:0] RAM [(2**ADDR_WIDTH)-1:0]; + integer i; + + initial begin + $readmemh("big64.txt", RAM); + end + + always @ (posedge clk) begin + dout <= RAM[addr]; + if(ena) begin + for(i=0;i>(1+`XLEN/32):(RANGE+BASE)>>1+(`XLEN/32)]; // discard bottom 2 or 3 bits of address offset within word or doubleword @@ -55,5 +63,6 @@ module simpleram #(parameter BASE=0, RANGE = 65535) ( if (we & ByteMask[index]) RAM[adrmsbs][8*(index+1)-1:8*index] <= #1 wd[8*(index+1)-1:8*index]; end end + -----/\----- EXCLUDED -----/\----- */ endmodule diff --git a/pipelined/src/ifu/ifu.sv b/pipelined/src/ifu/ifu.sv index 6adc5ad2..480556aa 100644 --- a/pipelined/src/ifu/ifu.sv +++ b/pipelined/src/ifu/ifu.sv @@ -142,8 +142,7 @@ module ifu ( mmu #(.TLB_ENTRIES(`ITLB_ENTRIES), .IMMU(1)) immu(.clk, .reset, .SATP_REGW, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, .PrivilegeModeW, .DisableTranslation(1'b0), - .PAdr(PCFExt[`PA_BITS-1:0]), - .VAdr(PCFSpill), + .VAdr(PCFExt), .Size(2'b10), .PTE(PTE), .PageTypeWriteVal(PageType), diff --git a/pipelined/src/lsu/dtim.sv b/pipelined/src/lsu/dtim.sv index 6ca5be4a..5b4969ab 100644 --- a/pipelined/src/lsu/dtim.sv +++ b/pipelined/src/lsu/dtim.sv @@ -48,7 +48,7 @@ module dtim( output logic DCacheCommittedM, output logic DCacheMiss, output logic DCacheAccess); - + simpleram #(.BASE(`RAM_BASE), .RANGE(`RAM_RANGE)) ram ( .clk, .ByteMask(ByteMaskM), .a(CPUBusy | LSURWM[0] | reset ? IEUAdrM[31:0] : IEUAdrE[31:0]), // move mux out; this shouldn't be needed when stails are handled differently *** diff --git a/pipelined/src/lsu/lsu.sv b/pipelined/src/lsu/lsu.sv index 7442aea7..d5e5b2b3 100644 --- a/pipelined/src/lsu/lsu.sv +++ b/pipelined/src/lsu/lsu.sv @@ -91,7 +91,7 @@ module lsu ( logic [2:0] LSUFunct3M; logic [6:0] LSUFunct7M; logic [1:0] LSUAtomicM; - (* mark_debug = "true" *) logic [`PA_BITS-1:0] PreLSUPAdrM; + (* mark_debug = "true" *) logic [`XLEN+1:0] PreLSUPAdrM; logic [11:0] PreLSUAdrE, LSUAdrE; logic CPUBusy; logic DCacheStallM; @@ -132,7 +132,7 @@ module lsu ( assign {InterlockStall, SelHPTW, PTE, PageType, DTLBWriteM, ITLBWriteF, IgnoreRequestTLB} = '0; assign IgnoreRequestTrapM = TrapM; assign CPUBusy = StallW; assign PreLSURWM = MemRWM; assign LSUAdrE = PreLSUAdrE; assign PreLSUAdrE = IEUAdrE[11:0]; - assign PreLSUPAdrM = IEUAdrExtM[`PA_BITS-1:0]; + assign PreLSUPAdrM = IEUAdrExtM; assign LSUFunct3M = Funct3M; assign LSUFunct7M = Funct7M; assign LSUAtomicM = AtomicM; assign LSUWriteDataM = WriteDataM; end @@ -151,8 +151,7 @@ module lsu ( mmu #(.TLB_ENTRIES(`DTLB_ENTRIES), .IMMU(0)) dmmu(.clk, .reset, .SATP_REGW, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, .PrivilegeModeW, .DisableTranslation, - .PAdr(PreLSUPAdrM), - .VAdr(IEUAdrM), + .VAdr(PreLSUPAdrM), .Size(LSUFunct3M[1:0]), .PTE, .PageTypeWriteVal(PageType), diff --git a/pipelined/src/lsu/lsuvirtmen.sv b/pipelined/src/lsu/lsuvirtmen.sv index 83176470..751c2c92 100644 --- a/pipelined/src/lsu/lsuvirtmen.sv +++ b/pipelined/src/lsu/lsuvirtmen.sv @@ -59,7 +59,7 @@ module lsuvirtmem( output logic [1:0] PreLSURWM, output logic [1:0] LSUAtomicM, output logic [11:0] LSUAdrE, - output logic [`PA_BITS-1:0] PreLSUPAdrM, + output logic [`XLEN+1:0] PreLSUPAdrM, input logic [`XLEN+1:0] IEUAdrExtM, // *** can move internally. output logic InterlockStall, @@ -71,13 +71,15 @@ module lsuvirtmem( logic AnyCPUReqM; logic [`PA_BITS-1:0] HPTWAdr; + logic [`XLEN+1:0] HPTWAdrExt; logic [1:0] HPTWRW; logic [2:0] HPTWSize; logic SelReplayMemE; logic [11:0] PreLSUAdrE; logic ITLBMissOrDAFaultF, ITLBMissOrDAFaultNoTrapF; - logic DTLBMissOrDAFaultM, DTLBMissOrDAFaultNoTrapM; - + logic DTLBMissOrDAFaultM, DTLBMissOrDAFaultNoTrapM; + logic SelHPTWAdr; + assign ITLBMissOrDAFaultF = ITLBMissF | (`HPTW_WRITES_SUPPORTED & InstrDAPageFaultF); assign DTLBMissOrDAFaultM = DTLBMissM | (`HPTW_WRITES_SUPPORTED & DataDAPageFaultM); assign ITLBMissOrDAFaultNoTrapF = ITLBMissOrDAFaultF & ~TrapM; @@ -94,13 +96,22 @@ module lsuvirtmem( .DCacheStallM, .HPTWAdr, .HPTWRW, .HPTWSize); // *** possible future optimization of simplifying page table entry with precomputed misalignment (Ross) low priority + // Once the walk is done and it is time to update the DTLB we need to switch back + // to the orignal data virtual address. + assign SelHPTWAdr = SelHPTW & ~DTLBWriteM; + // multiplex the outputs to LSU + if(`XLEN+2-`PA_BITS > 0) begin + logic [(`XLEN+2-`PA_BITS)-1:0] zeros; + assign zeros = '0; + assign HPTWAdrExt = {zeros, HPTWAdr}; + end else assign HPTWAdrExt = HPTWAdr; mux2 #(2) rwmux(MemRWM, HPTWRW, SelHPTW, PreLSURWM); mux2 #(3) sizemux(Funct3M, HPTWSize, SelHPTW, LSUFunct3M); mux2 #(7) funct7mux(Funct7M, 7'b0, SelHPTW, LSUFunct7M); mux2 #(2) atomicmux(AtomicM, 2'b00, SelHPTW, LSUAtomicM); mux2 #(12) adremux(IEUAdrE[11:0], HPTWAdr[11:0], SelHPTW, PreLSUAdrE); - mux2 #(`PA_BITS) lsupadrmux(IEUAdrExtM[`PA_BITS-1:0], HPTWAdr, SelHPTW, PreLSUPAdrM); + mux2 #(`XLEN+2) lsupadrmux(IEUAdrExtM, HPTWAdrExt, SelHPTWAdr, PreLSUPAdrM); if(`HPTW_WRITES_SUPPORTED) mux2 #(`XLEN) lsuwritedatamux(WriteDataM, PTE, SelHPTW, LSUWriteDataM); else assign LSUWriteDataM = WriteDataM; diff --git a/pipelined/src/mmu/mmu.sv b/pipelined/src/mmu/mmu.sv index 0d53aacc..ccf49ce4 100644 --- a/pipelined/src/mmu/mmu.sv +++ b/pipelined/src/mmu/mmu.sv @@ -49,16 +49,10 @@ module mmu #(parameter TLB_ENTRIES = 8, // number of TLB Entries // 11 - TLB is accessed for both read and write input logic DisableTranslation, - // VAdr goes to the TLB only. Virtual if the TLB is active. - // PAdr goes to address mux bypassing the TLB. PAdr used when there is no translation. - // Comes from either the program address (instruction address or load/store address) - // or from the hardware pagetable walker. - // PAdr is intended to used as a phsycial address. Discarded by the address mux when translation is - // performed. + // VAdr is the virtual/physical address from IEU or physical address from HPTW. // PhysicalAddress is selected to be PAdr when no translation or the translated VAdr (TLBPAdr) // when there is translation. - input logic [`PA_BITS-1:0] PAdr, // *** consider renaming this. - input logic [`XLEN-1:0] VAdr, + input logic [`XLEN+1:0] VAdr, input logic [1:0] Size, // 00 = 8 bits, 01 = 16 bits, 10 = 32 bits , 11 = 64 bits // Controls for writing a new entry to the TLB @@ -106,7 +100,7 @@ module mmu #(parameter TLB_ENTRIES = 8, // number of TLB Entries tlb(.clk, .reset, .SATP_MODE(SATP_REGW[`XLEN-1:`XLEN-`SVMODE_BITS]), .SATP_ASID(SATP_REGW[`ASID_BASE+`ASID_BITS-1:`ASID_BASE]), - .VAdr, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, + .VAdr(VAdr[`XLEN-1:0]), .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, .PrivilegeModeW, .ReadAccess, .WriteAccess, .DisableTranslation, .PTE, .PageTypeWriteVal, .TLBWrite, .TLBFlush, .TLBPAdr, .TLBMiss, .TLBHit, @@ -122,8 +116,8 @@ module mmu #(parameter TLB_ENTRIES = 8, // number of TLB Entries // the lower 12 bits are the page offset. These are never changed from the orginal // non translated address. //mux2 #(`PA_BITS) addressmux(PAdr, TLBPAdr, Translate, PhysicalAddress); - mux2 #(`PA_BITS-12) addressmux(PAdr[`PA_BITS-1:12], TLBPAdr[`PA_BITS-1:12], Translate, PhysicalAddress[`PA_BITS-1:12]); - assign PhysicalAddress[11:0] = PAdr[11:0]; + mux2 #(`PA_BITS-12) addressmux(VAdr[`PA_BITS-1:12], TLBPAdr[`PA_BITS-1:12], Translate, PhysicalAddress[`PA_BITS-1:12]); + assign PhysicalAddress[11:0] = VAdr[11:0]; /////////////////////////////////////////// diff --git a/pipelined/src/uncore/ram.sv b/pipelined/src/uncore/ram.sv index e9b0af32..120f3247 100644 --- a/pipelined/src/uncore/ram.sv +++ b/pipelined/src/uncore/ram.sv @@ -43,78 +43,22 @@ module ram #(parameter BASE=0, RANGE = 65535) ( output logic HRESPRam, HREADYRam ); - localparam MemStartAddr = BASE>>(1+`XLEN/32); - localparam MemEndAddr = (RANGE+BASE)>>1+(`XLEN/32); + logic [`XLEN/8-1:0] ByteMaskM; + logic [31:0] HWADDR, A; + logic prevHREADYRam, risingHREADYRam; + logic initTrans; + logic memwrite; + logic [3:0] busycount; - logic [`XLEN-1:0] RAM[BASE>>(1+`XLEN/32):(RANGE+BASE)>>1+(`XLEN/32)]; - logic [31:0] HWADDR, A; + swbytemask swbytemask(.HSIZED, .HADDRD(HWADDR[2:0]), .ByteMask(ByteMaskM)); - logic prevHREADYRam, risingHREADYRam; - logic initTrans; - logic memwrite; - logic [3:0] busycount; - logic [`XLEN/8-1:0] ByteMaskM; - - if(`FPGA) begin:ram - initial begin - // *** need to address this preload for fpga. It should work as a preload file - // but for some reason vivado is not synthesizing the preload. - //$readmemh(PRELOAD, RAM); - RAM[0] = 64'h94e1819300002197; - RAM[1] = 64'h4281420141014081; - RAM[2] = 64'h4481440143814301; - RAM[3] = 64'h4681460145814501; - RAM[4] = 64'h4881480147814701; - RAM[5] = 64'h4a814a0149814901; - RAM[6] = 64'h4c814c014b814b01; - RAM[7] = 64'h4e814e014d814d01; - RAM[8] = 64'h0110011b4f814f01; - RAM[9] = 64'h059b45011161016e; - RAM[10] = 64'h0004063705fe0010; - RAM[11] = 64'h05a000ef8006061b; - RAM[12] = 64'h0ff003930000100f; - RAM[13] = 64'h4e952e3110060e37; - RAM[14] = 64'hc602829b0053f2b7; - RAM[15] = 64'h2023fe02dfe312fd; - RAM[16] = 64'h829b0053f2b7007e; - RAM[17] = 64'hfe02dfe312fdc602; - RAM[18] = 64'h4de31efd000e2023; - RAM[19] = 64'h059bf1402573fdd0; - RAM[20] = 64'h0000061705e20870; - RAM[21] = 64'h0010029b01260613; - RAM[22] = 64'h11010002806702fe; - RAM[23] = 64'h84b2842ae426e822; - RAM[24] = 64'h892ee04aec064505; - RAM[25] = 64'h06e000ef07e000ef; - RAM[26] = 64'h979334fd02905563; - RAM[27] = 64'h07930177d4930204; - RAM[28] = 64'h4089093394be2004; - RAM[29] = 64'h04138522008905b3; - RAM[30] = 64'h19e3014000ef2004; - RAM[31] = 64'h64a2644260e2fe94; - RAM[32] = 64'h6749808261056902; - RAM[33] = 64'hdfed8b8510472783; - RAM[34] = 64'h2423479110a73823; - RAM[35] = 64'h10472783674910f7; - RAM[36] = 64'h20058693ffed8b89; - RAM[37] = 64'h05a1118737836749; - RAM[38] = 64'hfed59be3fef5bc23; - RAM[39] = 64'h1047278367498082; - RAM[40] = 64'h67c98082dfed8b85; - RAM[41] = 64'h0000808210a7a023; - end // initial begin - end // if (FPGA) - - swbytemask swbytemask(.HSIZED, .HADDRD(A[2:0]), .ByteMask(ByteMaskM)); - assign initTrans = HREADY & HSELRam & (HTRANS != 2'b00); // *** this seems like a weird way to use reset flopenr #(1) memwritereg(HCLK, 1'b0, initTrans | ~HRESETn, HSELRam & HWRITE, memwrite); flopenr #(32) haddrreg(HCLK, 1'b0, initTrans | ~HRESETn, HADDR, A); - // busy FSM to extend READY signal - always_ff @(posedge HCLK, negedge HRESETn) + always @(posedge HCLK, negedge HRESETn) if (~HRESETn) begin busycount <= 0; HREADYRam <= #1 0; @@ -131,47 +75,26 @@ module ram #(parameter BASE=0, RANGE = 65535) ( end end assign HRESPRam = 0; // OK + + localparam ADDR_WDITH = $clog2(RANGE/8); + localparam OFFSET = $clog2(`XLEN/8); // Rising HREADY edge detector // Indicates when ram is finishing up // Needed because HREADY may go high for other reasons, // and we only want to write data when finishing up. - flopr #(1) prevhreadyRamreg(HCLK,~HRESETn,HREADYRam,prevHREADYRam); + flopenr #(1) prevhreadyRamreg(HCLK,~HRESETn, 1'b1, HREADYRam,prevHREADYRam); assign risingHREADYRam = HREADYRam & ~prevHREADYRam; - // Model memory read and write -/* -----\/----- EXCLUDED -----\/----- - integer index; - - initial begin - for(index = MemStartAddr; index < MemEndAddr; index = index + 1) begin - RAM[index] <= {`XLEN{1'b0}}; - end - end - -----/\----- EXCLUDED -----/\----- */ - - /* verilator lint_off WIDTH */ - genvar index; - always_ff @(posedge HCLK) + always @(posedge HCLK) HWADDR <= #1 A; - if (`XLEN == 64) begin:ramrw - always_ff @(posedge HCLK) - HREADRam <= #1 RAM[A[31:3]]; - for(index = 0; index < `XLEN/8; index++) begin - always_ff @(posedge HCLK) begin - if (memwrite & risingHREADYRam & ByteMaskM[index]) RAM[HWADDR[31:3]][8*(index+1)-1:8*index] <= #1 HWDATA[8*(index+1)-1:8*index]; - end - end - end else begin - always_ff @(posedge HCLK) - HREADRam <= #1 RAM[A[31:2]]; - for(index = 0; index < `XLEN/8; index++) begin - always_ff @(posedge HCLK) begin:ramrw - if (memwrite & risingHREADYRam & ByteMaskM[index]) RAM[HWADDR[31:2]][8*(index+1)-1:8*index] <= #1 HWDATA[8*(index+1)-1:8*index]; - end - end - end - /* verilator lint_on WIDTH */ + bram2p1r1w #(`XLEN/8, 8, ADDR_WDITH, `FPGA) + memory(.clk(HCLK), .enaA(1'b1), + .addrA(A[ADDR_WDITH+OFFSET-1:OFFSET]), .doutA(HREADRam), + .enaB(memwrite & risingHREADYRam), .weB(ByteMaskM), + .addrB(HWADDR[ADDR_WDITH+OFFSET-1:OFFSET]), .dinB(HWDATA)); + + endmodule - + diff --git a/pipelined/testbench/testbench-linux.sv b/pipelined/testbench/testbench-linux.sv index da623315..50893cf6 100644 --- a/pipelined/testbench/testbench-linux.sv +++ b/pipelined/testbench/testbench-linux.sv @@ -368,14 +368,14 @@ module testbench; ProgramLabelMapFile = {linuxImageDir,"disassembly/vmlinux.objdump.lab"}; // initialize bootrom memFile = $fopen({testvectorDir,"bootmem.bin"}, "rb"); - readResult = $fread(dut.uncore.bootrom.bootrom.RAM,memFile); + readResult = $fread(dut.uncore.bootrom.bootrom.memory.RAM,memFile); $fclose(memFile); // initialize RAM if (CHECKPOINT==0) memFile = $fopen({testvectorDir,"ram.bin"}, "rb"); else memFile = $fopen({checkpointDir,"ram.bin"}, "rb"); - readResult = $fread(dut.uncore.ram.ram.RAM,memFile); + readResult = $fread(dut.uncore.ram.ram.memory.RAM,memFile); $fclose(memFile); if (CHECKPOINT==0) begin // normal traceFileM = $fopen({testvectorDir,"all.txt"}, "r"); @@ -383,7 +383,7 @@ module testbench; InstrCountW = '0; AttemptedInstructionCount = '0; end else begin // checkpoint - //$readmemh({checkpointDir,"ram.txt"}, dut.uncore.ram.ram.RAM); + //$readmemh({checkpointDir,"ram.txt"}, dut.uncore.ram.ram.memory.RAM); traceFileE = $fopen({checkpointDir,"all.txt"}, "r"); traceFileM = $fopen({checkpointDir,"all.txt"}, "r"); InstrCountW = CHECKPOINT; @@ -791,9 +791,9 @@ module testbench; BaseAdr = SATP[43:0] << 12; for (i = 2; i >= 0; i--) begin PAdr = BaseAdr + (VPN[i] << 3); - // ram.RAM is 64-bit addressed. PAdr specifies a byte. We right shift + // ram.memory.RAM is 64-bit addressed. PAdr specifies a byte. We right shift // by 3 (the PTE size) to get the requested 64-bit PTE. - PTE = dut.uncore.ram.ram.RAM[PAdr >> 3]; + PTE = dut.uncore.ram.ram.memory.RAM[PAdr >> 3]; PTE_R = PTE[1]; PTE_X = PTE[3]; if (PTE_R | PTE_X) begin diff --git a/pipelined/testbench/testbench.sv b/pipelined/testbench/testbench.sv index 775a21a7..2b8e6df8 100644 --- a/pipelined/testbench/testbench.sv +++ b/pipelined/testbench/testbench.sv @@ -44,7 +44,7 @@ module testbench; int test, i, errors, totalerrors; logic [31:0] sig32[0:SIGNATURESIZE]; logic [`XLEN-1:0] signature[0:SIGNATURESIZE]; - logic [`XLEN-1:0] testadr; + logic [`XLEN-1:0] testadr, testadrNoBase; string InstrFName, InstrDName, InstrEName, InstrMName, InstrWName; logic [31:0] InstrW; @@ -170,6 +170,7 @@ logic [3:0] dummy; test = 1; totalerrors = 0; testadr = 0; + testadrNoBase = 0; // fill memory with defined values to reduce Xs in simulation // Quick note the memory will need to be initialized. The C library does not // guarantee the initialized reads. For example a strcmp can read 6 byte @@ -178,7 +179,7 @@ logic [3:0] dummy; // the design. if (TEST == "coremark") for (i=MemStartAddr; i