From e324db71b4bce8eebd267a454c16c367a61333dd Mon Sep 17 00:00:00 2001 From: DTowersM Date: Tue, 7 Jun 2022 23:27:54 +0000 Subject: [PATCH 1/7] cleaned up the code, now works for code bases larger than 0x10000000 --- pipelined/testbench/testbench.sv | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/pipelined/testbench/testbench.sv b/pipelined/testbench/testbench.sv index 0e1ec7d7..a997a248 100644 --- a/pipelined/testbench/testbench.sv +++ b/pipelined/testbench/testbench.sv @@ -220,15 +220,13 @@ logic [3:0] dummy; ProgramAddrMap = $fopen(ProgramAddrMapFile, "r"); if (ProgramLabelMap & ProgramAddrMap) begin // check we found both files while (!$feof(ProgramLabelMap)) begin - string addr, label; + string label; integer returncode; returncode = $fgets(label, ProgramLabelMap); - returncode = $fgets(addr, ProgramAddrMap); + returncode = $fgets(adrstr, ProgramAddrMap); if (label == "begin_signature\n") begin - adrstr = addr[1:7]; - if (adrstr=="0000000") // if running on rv64 we get the address at a later - adrstr = addr[9:15]; if (DEBUG) $display("%s begin_signature adrstr: %s", TEST, adrstr); + break; end end end @@ -238,8 +236,8 @@ logic [3:0] dummy; $fclose(ProgramLabelMap); $fclose(ProgramAddrMap); - testadr = (`RAM_BASE+adrstr.atohex())/(`XLEN/8); - testadrNoBase = (adrstr.atohex())/(`XLEN/8); + testadr = ($unsigned(adrstr.atohex()))/(`XLEN/8); + testadrNoBase = (adrstr.atohex() - `RAM_BASE)/(`XLEN/8); #600; // give time for instructions in pipeline to finish if (TEST == "embench") begin // Writes contents of begin_signature to .sim.output file From 02a424d65b2e973824dd97a09a2d0ae42170f969 Mon Sep 17 00:00:00 2001 From: DTowersM Date: Tue, 7 Jun 2022 23:58:50 +0000 Subject: [PATCH 2/7] modified testbench.sv- now works with coremark --- benchmarks/riscv-coremark/Makefile | 3 --- pipelined/testbench/testbench.sv | 7 +++++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/benchmarks/riscv-coremark/Makefile b/benchmarks/riscv-coremark/Makefile index 2100b784..ce8dea81 100644 --- a/benchmarks/riscv-coremark/Makefile +++ b/benchmarks/riscv-coremark/Makefile @@ -11,9 +11,6 @@ work/coremark.bare.riscv.elf.memfile: work/coremark.bare.riscv riscv64-unknown-elf-elf2hex --bit-width 64 --input $< --output $@ extractFunctionRadix.sh $<.elf.objdump -work/coremark.bare.riscv.objdump: work/coremark.bare.riscv - riscv64-unknown-elf-objdump -D work/coremark.bare.riscv > work/coremark.bare.riscv.objdump - work/coremark.bare.riscv: $(sources) Makefile # make -C $(cmbase) PORT_DIR=$(PORT_DIR) compile RISCV=/opt/riscv/riscv-gnu-toolchain XCFLAGS="-march=rv64imd -mabi=lp64d -mbranch-cost=1 -DSKIP_DEFAULT_MEMSET -mtune=sifive-7-series -Ofast -funroll-all-loops -fno-delete-null-pointer-checks -fno-rename-registers --param=loop-max-datarefs-for-datadeps=0 -funroll-all-loops --param=uninlined-function-insns=8 -fno-tree-vrp -fwrapv -fno-toplevel-reorder --param=max-inline-insns-size=128 -fipa-pta" # These flags were used by WD on CoreMark diff --git a/pipelined/testbench/testbench.sv b/pipelined/testbench/testbench.sv index 0e1ec7d7..03910d72 100644 --- a/pipelined/testbench/testbench.sv +++ b/pipelined/testbench/testbench.sv @@ -162,8 +162,8 @@ logic [3:0] dummy; InstrFName, InstrDName, InstrEName, InstrMName, InstrWName); // initialize tests - localparam integer MemStartAddr = `RAM_BASE>>(1+`XLEN/32); - localparam integer MemEndAddr = (`RAM_RANGE+`RAM_BASE)>>1+(`XLEN/32); + localparam integer MemStartAddr = 0; + localparam integer MemEndAddr = `RAM_RANGE>>1+(`XLEN/32); initial begin @@ -204,6 +204,9 @@ logic [3:0] dummy; // if ($time % 100000 == 0) $display("Time is %0t", $time); end + logic [`XLEN-1:0] debugmemoryadr; + assign debugmemoryadr = dut.uncore.ram.ram.memory.RAM[5140]; + // check results always @(negedge clk) begin From 9e5ab4d378cd31d4022545e3257b12ef51f43880 Mon Sep 17 00:00:00 2001 From: David Harris Date: Wed, 8 Jun 2022 01:31:34 +0000 Subject: [PATCH 3/7] Added ahbapbbridge and cleaning RAM --- pipelined/src/generic/flop/bram1p1rw.sv | 4 +- pipelined/src/generic/flop/simpleram.sv | 22 +--- pipelined/src/uncore/ahbapbbridge.sv | 98 ++++++++++++++++ pipelined/src/uncore/gpio_apb.sv | 146 ++++++++++++++++++++++++ pipelined/src/uncore/ram.sv | 1 + pipelined/src/uncore/ram_orig.sv | 107 +++++++++++++++++ pipelined/src/uncore/uncore.sv | 4 +- 7 files changed, 357 insertions(+), 25 deletions(-) create mode 100644 pipelined/src/uncore/ahbapbbridge.sv create mode 100644 pipelined/src/uncore/gpio_apb.sv create mode 100644 pipelined/src/uncore/ram_orig.sv diff --git a/pipelined/src/generic/flop/bram1p1rw.sv b/pipelined/src/generic/flop/bram1p1rw.sv index 2b17fb44..cccf1f1f 100644 --- a/pipelined/src/generic/flop/bram1p1rw.sv +++ b/pipelined/src/generic/flop/bram1p1rw.sv @@ -44,7 +44,7 @@ module bram1p1rw //---------------------------------------------------------------------- ) ( input logic clk, - input logic ena, + input logic en, input logic [NUM_COL-1:0] we, input logic [ADDR_WIDTH-1:0] addr, output logic [DATA_WIDTH-1:0] dout, @@ -60,7 +60,7 @@ module bram1p1rw always @ (posedge clk) begin dout <= RAM[addr]; - if(ena) begin + if(en) begin for(i=0;i>(1+`XLEN/32):(RANGE+BASE)>>1+(`XLEN/32)]; - - // discard bottom 2 or 3 bits of address offset within word or doubleword - localparam adrlsb = (`XLEN==64) ? 3 : 2; - logic [31:adrlsb] adrmsbs; - assign adrmsbs = a[31:adrlsb]; - - always_ff @(posedge clk) - rd <= RAM[adrmsbs]; - - genvar index; - for(index = 0; index < `XLEN/8; index++) begin - always_ff @(posedge clk) begin - if (we & ByteMask[index]) RAM[adrmsbs][8*(index+1)-1:8*index] <= #1 wd[8*(index+1)-1:8*index]; - end - end - -----/\----- EXCLUDED -----/\----- */ + memory(.clk, .en(we), .we(ByteMask), .addr(a[ADDR_WDITH+OFFSET-1:OFFSET]), .dout(rd), .din(wd)); endmodule diff --git a/pipelined/src/uncore/ahbapbbridge.sv b/pipelined/src/uncore/ahbapbbridge.sv new file mode 100644 index 00000000..e05ee3d8 --- /dev/null +++ b/pipelined/src/uncore/ahbapbbridge.sv @@ -0,0 +1,98 @@ +/////////////////////////////////////////// +// ahbapbbridge.sv +// +// Written: David_Harris@hmc.edu & Nic Lucio 7 June 2022 +// +// Purpose: AHB to APB bridge +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// MIT LICENSE +// Permission is hereby granted, free of charge, to any person obtaining a copy of this +// software and associated documentation files (the "Software"), to deal in the Software +// without restriction, including without limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +// to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +//////////////////////////////////////////////////////////////////////////////////////////////// + +`include "wally-config.vh" + +module ahbapbbridge #(PERIPHS = 2) ( + input logic HCLK, HRESETn, + input logic [PERIPHS-1:0] HSEL, + input logic [31:0] HADDR, + input logic [`XLEN-1:0] HWDATA, + input logic HWRITE, + input logic [1:0] HTRANS, + input logic HREADY, + output logic [`XLEN-1:0] HRDATA, + output logic HRESP, HREADYOUT, + output logic PCLK, PRESETn, + output logic [PERIPHS-1:0] PSEL, + output logic PWRITE, + output logic PENABLE, + output logic [31:0] PADDR, + output logic [`XLEN-1:0] PWDATA, + input logic [PERIPHS-1:0] PREADY, + input var [`XLEN-1:0][PERIPHS-1:0] PRDATA +); + + logic activeTrans; + logic initTrans, initTransSel, initTransSelD; + logic nextPENABLE; + + // convert AHB to APB signals + assign PCLK = HCLK; + assign PRESETn = HRESETn; + + // identify start of a transaction + assign activeTrans = (HTRANS == 2'b10); // only accept nonsequential transactions + assign initTrans = activeTrans & HREADY; // start a transaction when the bus is ready and an active transaction is requested + assign initTransSel = initTrans & |HSEL; // capture data and address if any of the peripherals are selected + + // delay AHB Address phase signals to align with AHB Data phase because APB expects them at the same time + flopenr #(32) addrreg(HCLK, ~HRESETn, initTransSel, HADDR, PADDR); + flopenr #(1) writereg(HCLK, ~HRESETn, initTransSel, HWRITE, PWRITE); + // enable selreg with iniTrans rather than initTransSel so PSEL can turn off + flopenr #(PERIPHS) selreg(HCLK, ~HRESETn, initTrans, HSEL & {PERIPHS{activeTrans}}, PSEL); + // AHB Data phase signal doesn't need delay. Note that HWDATA is guaranteed to remain stable until READY is asserted + assign PWDATA = HWDATA; + + // enable logic: goes high a cycle after initTrans, then back low on cycle after desired PREADY is asserted + // cycle1: AHB puts HADDR, HWRITE, HSEL on bus. initTrans is 1, and these are captured + // cycle2: AHB puts HWDATA on the bus. This effectively extends the setup phase + // cycle3: bridge raises PENABLE. Peripheral typically responds with PREADY. + // Read occurs by end of cycle. Write occurs at end of cycle. + flopr #(1) inittransreg(HCLK, ~HRESETn, initTransSel, initTransSelD); + assign nextPENABLE = PENABLE ? ~HREADY : initTransSelD; + flopr #(1) enablereg(HCLK, ~HRESETn, nextPENABLE, PENABLE); + + // result and ready multiplexer + int i; + always_comb + for (i=0; i1 cycle to respond + + // account for subword read/write circuitry + // -- Note GPIO registers are 32 bits no matter what; access them with LW SW. + // (At least that's what I think when FE310 spec says "only naturally aligned 32-bit accesses are supported") + if (`XLEN == 64) begin + assign Din = entry[2] ? PWDATA[63:32] : PWDATA[31:0]; + assign PRDATA = entry[2] ? {Dout,32'b0} : {32'b0,Dout}; + end else begin // 32-bit + assign Din = PWDATA[31:0]; + assign PRDATA = Dout; + end + + // register access + always_ff @(posedge PCLK, negedge PRESETn) + if (~PRESETn) begin // asynch reset + input_en <= 0; + output_en <= 0; + // *** synch reset not yet implemented [DH: can we delete this comment? Check if a sync reset is required] + output_val <= #1 0; + rise_ie <= #1 0; + rise_ip <= #1 0; + fall_ie <= #1 0; + fall_ip <= #1 0; + high_ie <= #1 0; + high_ip <= #1 0; + low_ie <= #1 0; + low_ip <= #1 0; + end else begin // writes + // According to FE310 spec: Once the interrupt is pending, it will remain set until a 1 is written to the *_ip register at that bit. + /* verilator lint_off CASEINCOMPLETE */ + if (memwrite) + case(entry) + 8'h04: input_en <= #1 Din; + 8'h08: output_en <= #1 Din; + 8'h0C: output_val <= #1 Din; + 8'h18: rise_ie <= #1 Din; + 8'h20: fall_ie <= #1 Din; + 8'h28: high_ie <= #1 Din; + 8'h30: low_ie <= #1 Din; + 8'h40: output_val <= #1 output_val ^ Din; // OUT_XOR + endcase + /* verilator lint_on CASEINCOMPLETE */ + + // interrupts can be cleared by writing corresponding bits to a register + if (memwrite & entry == 8'h1C) rise_ip <= rise_ip & ~Din; + else rise_ip <= rise_ip | (input2d & ~input3d); + if (memwrite & (entry == 8'h24)) fall_ip <= fall_ip & ~Din; + else fall_ip <= fall_ip | (~input2d & input3d); + if (memwrite & (entry == 8'h2C)) high_ip <= high_ip & ~Din; + else high_ip <= high_ip | input3d; + if (memwrite & (entry == 8'h34)) low_ip <= low_ip & ~Din; + else low_ip <= low_ip | ~input3d; + + case(entry) // flop to sample inputs + 8'h00: Dout <= #1 input_val; + 8'h04: Dout <= #1 input_en; + 8'h08: Dout <= #1 output_en; + 8'h0C: Dout <= #1 output_val; + 8'h18: Dout <= #1 rise_ie; + 8'h1C: Dout <= #1 rise_ip; + 8'h20: Dout <= #1 fall_ie; + 8'h24: Dout <= #1 fall_ip; + 8'h28: Dout <= #1 high_ie; + 8'h2C: Dout <= #1 high_ip; + 8'h30: Dout <= #1 low_ie; + 8'h34: Dout <= #1 low_ip; + 8'h40: Dout <= #1 0; // OUT_XOR reads as 0 + default: Dout <= #1 0; + endcase + end + + // chip i/o + // connect OUT to IN for loopback testing + if (`GPIO_LOOPBACK_TEST) assign input0d = ((output_en & GPIOPinsOut) | (~output_en & GPIOPinsIn)) & input_en; + else assign input0d = GPIOPinsIn & input_en; + + // synchroninzer for inputs + flop #(32) sync1(PCLK,input0d,input1d); + flop #(32) sync2(PCLK,input1d,input2d); + flop #(32) sync3(PCLK,input2d,input3d); + assign input_val = input3d; + assign GPIOPinsOut = output_val; + assign GPIOPinsEn = output_en; + + assign GPIOIntr = |{(rise_ip & rise_ie),(fall_ip & fall_ie),(high_ip & high_ie),(low_ip & low_ie)}; +endmodule + diff --git a/pipelined/src/uncore/ram.sv b/pipelined/src/uncore/ram.sv index 442bfc50..c53d1f48 100644 --- a/pipelined/src/uncore/ram.sv +++ b/pipelined/src/uncore/ram.sv @@ -63,6 +63,7 @@ module ram #(parameter BASE=0, RANGE = 65535) ( // *** this seems like a weird way to use reset flopenr #(1) memwritereg(HCLK, 1'b0, initTrans | ~HRESETn, HSELRam & HWRITE, memwrite); flopenr #(32) haddrreg(HCLK, 1'b0, initTrans | ~HRESETn, HADDR, A); + // busy FSM to extend READY signal always @(posedge HCLK, negedge HRESETn) if (~HRESETn) begin diff --git a/pipelined/src/uncore/ram_orig.sv b/pipelined/src/uncore/ram_orig.sv new file mode 100644 index 00000000..e40da7b3 --- /dev/null +++ b/pipelined/src/uncore/ram_orig.sv @@ -0,0 +1,107 @@ +/////////////////////////////////////////// +// ram_orig.sv +// +// Written: David_Harris@hmc.edu 9 January 2021 +// Modified: +// +// Purpose: On-chip RAM, external to core +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// MIT LICENSE +// Permission is hereby granted, free of charge, to any person obtaining a copy of this +// software and associated documentation files (the "Software"), to deal in the Software +// without restriction, including without limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +// to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +//////////////////////////////////////////////////////////////////////////////////////////////// + +`include "wally-config.vh" + +module ram_orig #(parameter BASE=0, RANGE = 65535) ( + input logic HCLK, HRESETn, + input logic HSELRam, + input logic [31:0] HADDR, + input logic HWRITE, + input logic HREADY, + input logic [1:0] HTRANS, + input logic [`XLEN-1:0] HWDATA, + input logic [3:0] HSIZED, + output logic [`XLEN-1:0] HREADRam, + output logic HRESPRam, HREADYRam +); + + // Desired changes. + // 1. find a way to merge read and write address into 1 port. + // 2. remove all unnecessary latencies. (HREADY needs to be able to constant high.) + // 3. implement burst. + // 4. remove the configurable latency. + + logic [`XLEN/8-1:0] ByteMaskM; + logic [31:0] HWADDR, A; + logic prevHREADYRam, risingHREADYRam; + logic initTrans; + logic memwrite; + logic [3:0] busycount; + + swbytemask swbytemask(.Size(HSIZED[1:0]), .Adr(HWADDR[2:0]), .ByteMask(ByteMaskM)); + + assign initTrans = HREADY & HSELRam & (HTRANS != 2'b00); + + // *** this seems like a weird way to use reset + flopenr #(1) memwritereg(HCLK, 1'b0, initTrans | ~HRESETn, HSELRam & HWRITE, memwrite); + flopenr #(32) haddrreg(HCLK, 1'b0, initTrans | ~HRESETn, HADDR, A); + + // busy FSM to extend READY signal + always @(posedge HCLK, negedge HRESETn) + if (~HRESETn) begin + busycount <= 0; + HREADYRam <= #1 0; + end else begin + if (initTrans) begin + busycount <= 0; + HREADYRam <= #1 0; + end else if (~HREADYRam) begin + if (busycount == 0) begin // Ram latency, for testing purposes. *** test with different values such as 2 + HREADYRam <= #1 1; + end else begin + busycount <= busycount + 1; + end + end + end + assign HRESPRam = 0; // OK + + localparam ADDR_WDITH = $clog2(RANGE/8); + localparam OFFSET = $clog2(`XLEN/8); + + // Rising HREADY edge detector + // Indicates when ram is finishing up + // Needed because HREADY may go high for other reasons, + // and we only want to write data when finishing up. + flopenr #(1) prevhreadyRamreg(HCLK,~HRESETn, 1'b1, HREADYRam,prevHREADYRam); + assign risingHREADYRam = HREADYRam & ~prevHREADYRam; + + always @(posedge HCLK) + HWADDR <= #1 A; + + bram2p1r1w #(`XLEN/8, 8, ADDR_WDITH, `FPGA) + memory(.clk(HCLK), .enaA(1'b1), + .addrA(A[ADDR_WDITH+OFFSET-1:OFFSET]), .doutA(HREADRam), + .enaB(memwrite & risingHREADYRam), .weB(ByteMaskM), + .addrB(HWADDR[ADDR_WDITH+OFFSET-1:OFFSET]), .dinB(HWDATA)); + + +endmodule + diff --git a/pipelined/src/uncore/uncore.sv b/pipelined/src/uncore/uncore.sv index c6728294..d1a97bad 100644 --- a/pipelined/src/uncore/uncore.sv +++ b/pipelined/src/uncore/uncore.sv @@ -194,7 +194,7 @@ module uncore ( ({`XLEN{HSELSDCD}} & HREADSDC); assign HRESP = HSELRamD & HRESPRam | - HSELEXTD & HRESPEXT | + HSELEXTD & HRESPEXT | HSELCLINTD & HRESPCLINT | HSELPLICD & HRESPPLIC | HSELGPIOD & HRESPGPIO | @@ -203,7 +203,7 @@ module uncore ( HSELSDC & HRESPSDC; assign HREADY = HSELRamD & HREADYRam | - HSELEXTD & HREADYEXT | + HSELEXTD & HREADYEXT | HSELCLINTD & HREADYCLINT | HSELPLICD & HREADYPLIC | HSELGPIOD & HREADYGPIO | From 3c8eafc8eefc1ade967ad6ea49409301f019d3e8 Mon Sep 17 00:00:00 2001 From: David Harris Date: Wed, 8 Jun 2022 01:39:44 +0000 Subject: [PATCH 4/7] Cleaned bram interface --- pipelined/src/generic/flop/bram1p1rw.sv | 8 ++++---- pipelined/src/generic/flop/bram2p1r1w.sv | 12 +++++------ pipelined/src/generic/flop/simpleram.sv | 2 +- pipelined/src/uncore/ram.sv | 26 +++++++++++++++++++++--- pipelined/src/uncore/ram_orig.sv | 4 ++-- 5 files changed, 36 insertions(+), 16 deletions(-) diff --git a/pipelined/src/generic/flop/bram1p1rw.sv b/pipelined/src/generic/flop/bram1p1rw.sv index cccf1f1f..d0d3c40a 100644 --- a/pipelined/src/generic/flop/bram1p1rw.sv +++ b/pipelined/src/generic/flop/bram1p1rw.sv @@ -44,8 +44,8 @@ module bram1p1rw //---------------------------------------------------------------------- ) ( input logic clk, - input logic en, - input logic [NUM_COL-1:0] we, + input logic we, + input logic [NUM_COL-1:0] bwe, input logic [ADDR_WIDTH-1:0] addr, output logic [DATA_WIDTH-1:0] dout, input logic [DATA_WIDTH-1:0] din @@ -60,9 +60,9 @@ module bram1p1rw always @ (posedge clk) begin dout <= RAM[addr]; - if(en) begin + if(we) begin for(i=0;i Date: Wed, 8 Jun 2022 02:06:00 +0000 Subject: [PATCH 5/7] Modified RAM for single-cycle latency --- pipelined/src/uncore/ram.sv | 72 +++++++++++++++------------------- pipelined/src/uncore/uncore.sv | 4 +- 2 files changed, 34 insertions(+), 42 deletions(-) diff --git a/pipelined/src/uncore/ram.sv b/pipelined/src/uncore/ram.sv index 40bf81eb..2aa0df36 100644 --- a/pipelined/src/uncore/ram.sv +++ b/pipelined/src/uncore/ram.sv @@ -49,22 +49,24 @@ module ram #(parameter BASE=0, RANGE = 65535) ( // 3. implement burst. // 4. remove the configurable latency. - logic [`XLEN/8-1:0] ByteMaskM; - logic [31:0] HWADDR, A; - logic prevHREADYRam, risingHREADYRam; + logic [`XLEN/8-1:0] ByteMask; + logic [31:0] HADDRD, RamAddr; + //logic prevHREADYRam, risingHREADYRam; logic initTrans; - logic memwrite; - logic [3:0] busycount; + logic memwrite, memwriteD; + logic nextHREADYRam; + //logic [3:0] busycount; - swbytemask swbytemask(.Size(HSIZED[1:0]), .Adr(HWADDR[2:0]), .ByteMask(ByteMaskM)); + swbytemask swbytemask(.Size(HSIZED[1:0]), .Adr(HADDRD[2:0]), .ByteMask(ByteMask)); - assign initTrans = HREADY & HSELRam & (HTRANS != 2'b00); + assign initTrans = HREADY & HSELRam & (HTRANS != 2'b00); // *** add burst support, or disable on busy + assign memwrite = initTrans & HWRITE; // *** this seems like a weird way to use reset - flopenr #(1) memwritereg(HCLK, 1'b0, initTrans | ~HRESETn, HSELRam & HWRITE, memwrite); - flopenr #(32) haddrreg(HCLK, 1'b0, initTrans | ~HRESETn, HADDR, A); + flopen #(1) memwritereg(HCLK, initTrans | ~HRESETn, memwrite, memwriteD); // probably drop ~HRESETn in all this + flopen #(32) haddrreg(HCLK, initTrans | ~HRESETn, HADDR, HADDRD); - // busy FSM to extend READY signal +/* // busy FSM to extend READY signal always @(posedge HCLK, negedge HRESETn) if (~HRESETn) begin busycount <= 0; @@ -80,48 +82,38 @@ module ram #(parameter BASE=0, RANGE = 65535) ( busycount <= busycount + 1; end end - end + end */ + + + assign nextHREADYRam = ~(memwriteD & ~memwrite); + flopr #(1) readyreg(HCLK, ~HRESETn, nextHREADYRam, HREADYRam); +// assign HREADYRam = ~(memwriteD & ~memwrite); assign HRESPRam = 0; // OK - localparam ADDR_WDITH = $clog2(RANGE/8); + localparam ADDR_WIDTH = $clog2(RANGE/8); localparam OFFSET = $clog2(`XLEN/8); - // Rising HREADY edge detector +/* // Rising HREADY edge detector // Indicates when ram is finishing up // Needed because HREADY may go high for other reasons, // and we only want to write data when finishing up. flopenr #(1) prevhreadyRamreg(HCLK,~HRESETn, 1'b1, HREADYRam,prevHREADYRam); - assign risingHREADYRam = HREADYRam & ~prevHREADYRam; + assign risingHREADYRam = HREADYRam & ~prevHREADYRam;*/ - always @(posedge HCLK) - HWADDR <= #1 A; - - bram2p1r1w #(`XLEN/8, 8, ADDR_WDITH, `FPGA) +/* + bram2p1r1w #(`XLEN/8, 8, ADDR_WDITH, `FPGA) memory(.clk(HCLK), .reA(1'b1), .addrA(A[ADDR_WDITH+OFFSET-1:OFFSET]), .doutA(HREADRam), .weB(memwrite & risingHREADYRam), .bweB(ByteMaskM), - .addrB(HWADDR[ADDR_WDITH+OFFSET-1:OFFSET]), .dinB(HWDATA)); -/* - bram1p1r1w #(`XLEN/8, 8, ADDR_WDITH) - memory(.clk(HCLK), .we(memwrite), .bwe(ByteMaskM), . addr(A***), .dout(HREADRam), .din(HWDATA)); - - #( - //-------------------------------------------------------------------------- - parameter NUM_COL = 8, - parameter COL_WIDTH = 8, - parameter ADDR_WIDTH = 10, - // Addr Width in bits : 2 *ADDR_WIDTH = RAM Depth - parameter DATA_WIDTH = NUM_COL*COL_WIDTH // Data Width in bits - //---------------------------------------------------------------------- - ) ( - input logic clk, - input logic ena, - input logic [NUM_COL-1:0] we, - input logic [ADDR_WIDTH-1:0] addr, - output logic [DATA_WIDTH-1:0] dout, - input logic [DATA_WIDTH-1:0] din - );*/ + .addrB(HWADDR[ADDR_WDITH+OFFSET-1:OFFSET]), .dinB(HWDATA)); */ - + + + // On writes, use address delayed by one cycle to sync with HWDATA + mux2 #(32) adrmux(HADDR, HADDRD, memwriteD, RamAddr); + + // single-ported RAM + bram1p1rw #(`XLEN/8, 8, ADDR_WIDTH) + memory(.clk(HCLK), .we(memwriteD), .bwe(ByteMask), .addr(RamAddr[ADDR_WIDTH+OFFSET-1:OFFSET]), .dout(HREADRam), .din(HWDATA)); endmodule diff --git a/pipelined/src/uncore/uncore.sv b/pipelined/src/uncore/uncore.sv index d1a97bad..488a61e0 100644 --- a/pipelined/src/uncore/uncore.sv +++ b/pipelined/src/uncore/uncore.sv @@ -92,7 +92,7 @@ module uncore ( // generate // on-chip RAM if (`RAM_SUPPORTED) begin : ram - ram #( + ram_orig #( .BASE(`RAM_BASE), .RANGE(`RAM_RANGE)) ram ( .HCLK, .HRESETn, .HSELRam, .HADDR, @@ -102,7 +102,7 @@ module uncore ( end if (`BOOTROM_SUPPORTED) begin : bootrom - ram #(.BASE(`BOOTROM_BASE), .RANGE(`BOOTROM_RANGE)) + ram_orig #(.BASE(`BOOTROM_BASE), .RANGE(`BOOTROM_RANGE)) bootrom( .HCLK, .HRESETn, .HSELRam(HSELBootRom), .HADDR, From 69449963290bdc07ec0dd9b7192684413062a42f Mon Sep 17 00:00:00 2001 From: DTowersM Date: Wed, 8 Jun 2022 16:28:09 +0000 Subject: [PATCH 6/7] added #1 delays to Stalls and Flushes in hazard unit --- pipelined/src/hazard/hazard.sv | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/pipelined/src/hazard/hazard.sv b/pipelined/src/hazard/hazard.sv index b9163d6b..3c641180 100644 --- a/pipelined/src/hazard/hazard.sv +++ b/pipelined/src/hazard/hazard.sv @@ -69,11 +69,11 @@ module hazard( assign StallMCause = wfiM & (~TrapM & ~IntPendingM); assign StallWCause = LSUStallM | IFUStallF; - assign StallF = StallFCause | StallD; - assign StallD = StallDCause | StallE; - assign StallE = StallECause | StallM; - assign StallM = StallMCause | StallW; - assign StallW = StallWCause; + assign #1 StallF = StallFCause | StallD; + assign #1 StallD = StallDCause | StallE; + assign #1 StallE = StallECause | StallM; + assign #1 StallM = StallMCause | StallW; + assign #1 StallW = StallWCause; assign FirstUnstalledD = ~StallD & StallF; assign FirstUnstalledE = ~StallE & StallD; @@ -81,11 +81,11 @@ module hazard( assign FirstUnstalledW = ~StallW & StallM; // Each stage flushes if the previous stage is the last one stalled (for cause) or the system has reason to flush - assign FlushF = BPPredWrongE; - assign FlushD = FirstUnstalledD | TrapM | RetM | BPPredWrongE; - assign FlushE = FirstUnstalledE | TrapM | RetM | BPPredWrongE; // *** why is BPPredWrongE here, but not needed in simple processor - assign FlushM = FirstUnstalledM | TrapM | RetM; + assign #1 FlushF = BPPredWrongE; + assign #1 FlushD = FirstUnstalledD | TrapM | RetM | BPPredWrongE; + assign #1 FlushE = FirstUnstalledE | TrapM | RetM | BPPredWrongE; // *** why is BPPredWrongE here, but not needed in simple processor + assign #1 FlushM = FirstUnstalledM | TrapM | RetM; // on Trap the memory stage should be flushed going into the W stage, // except if the instruction causing the Trap is an ecall or ebreak. - assign FlushW = FirstUnstalledW | (TrapM & ~(BreakpointFaultM | EcallFaultM)); + assign #1 FlushW = FirstUnstalledW | (TrapM & ~(BreakpointFaultM | EcallFaultM)); endmodule From eda8bb732b18d9ca5fe35495c7db26483d001bbd Mon Sep 17 00:00:00 2001 From: DTowersM Date: Wed, 8 Jun 2022 17:27:16 +0000 Subject: [PATCH 7/7] Added my name to the makefile --- benchmarks/embench/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/embench/Makefile b/benchmarks/embench/Makefile index 1525a2b8..c6ac3b67 100644 --- a/benchmarks/embench/Makefile +++ b/benchmarks/embench/Makefile @@ -1,5 +1,5 @@ # Makefile added 1/20/22 David_Harris@hmc.edu -# Expanded and developed by dtorres@hmc.edu +# Expanded and developed by Daniel Torres dtorres@hmc.edu # Compile Embench for Wally all: sim size