diff --git a/.gitmodules b/.gitmodules index 65e1e71c..e69de29b 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +0,0 @@ -[submodule "sky130/sky130_osu_sc_t12"] - path = sky130/sky130_osu_sc_t12 - url = https://foss-eda-tools.googlesource.com/skywater-pdk/libs/sky130_osu_sc_t12/ diff --git a/sky130/sky130_osu_sc_t12 b/sky130/sky130_osu_sc_t12 deleted file mode 160000 index f60f2d03..00000000 --- a/sky130/sky130_osu_sc_t12 +++ /dev/null @@ -1 +0,0 @@ -Subproject commit f60f2d0395053c4df362a97d7e2099721b6face6 diff --git a/wally-pipelined/config/buildroot/wally-config.vh b/wally-pipelined/config/buildroot/wally-config.vh index c6f67880..e1e4f300 100644 --- a/wally-pipelined/config/buildroot/wally-config.vh +++ b/wally-pipelined/config/buildroot/wally-config.vh @@ -62,25 +62,25 @@ // Peripheral memory space extends from BASE to BASE+RANGE // Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits `define BOOTTIM_SUPPORTED 1'b1 -`define BOOTTIM_BASE 32'h00000000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder -`define BOOTTIM_RANGE 32'h00003FFF -//`define BOOTTIM_BASE 32'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder -//`define BOOTTIM_RANGE 32'h00000FFF +`define BOOTTIM_BASE 56'h00000000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder +`define BOOTTIM_RANGE 56'h00003FFF +//`define BOOTTIM_BASE 56'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder +//`define BOOTTIM_RANGE 56'h00000FFF `define TIM_SUPPORTED 1'b1 -`define TIM_BASE 32'h80000000 -`define TIM_RANGE 32'h07FFFFFF +`define TIM_BASE 56'h80000000 +`define TIM_RANGE 56'h07FFFFFF `define CLINT_SUPPORTED 1'b1 -`define CLINT_BASE 32'h02000000 -`define CLINT_RANGE 32'h0000FFFF +`define CLINT_BASE 56'h02000000 +`define CLINT_RANGE 56'h0000FFFF `define GPIO_SUPPORTED 1'b1 -`define GPIO_BASE 32'h10012000 -`define GPIO_RANGE 32'h000000FF +`define GPIO_BASE 56'h10012000 +`define GPIO_RANGE 56'h000000FF `define UART_SUPPORTED 1'b1 -`define UART_BASE 32'h10000000 -`define UART_RANGE 32'h00000007 +`define UART_BASE 56'h10000000 +`define UART_RANGE 56'h00000007 `define PLIC_SUPPORTED 1'b1 -`define PLIC_BASE 32'h0C000000 -`define PLIC_RANGE 32'h03FFFFFF +`define PLIC_BASE 56'h0C000000 +`define PLIC_RANGE 56'h03FFFFFF // Bus Interface width `define AHBW 64 diff --git a/wally-pipelined/config/busybear/wally-config.vh b/wally-pipelined/config/busybear/wally-config.vh index 0db13778..58c1c8a0 100644 --- a/wally-pipelined/config/busybear/wally-config.vh +++ b/wally-pipelined/config/busybear/wally-config.vh @@ -31,6 +31,7 @@ `define BUSYBEAR 1 `define LINUX_FIX_READ {'h10000005} `define LINUX_TEST_VECTORS "/courses/e190ax/busybear_boot/" +//`define LINUX_TEST_VECTORS "../../../busybear_boot/" // RV32 or RV64: XLEN = 32 or 64 `define XLEN 64 @@ -63,25 +64,25 @@ // Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits `define BOOTTIM_SUPPORTED 1'b1 -`define BOOTTIM_BASE 32'h00000000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder -`define BOOTTIM_RANGE 32'h00003FFF -//`define BOOTTIM_BASE 32'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder -//`define BOOTTIM_RANGE 32'h00000FFF +`define BOOTTIM_BASE 56'h00000000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder +`define BOOTTIM_RANGE 56'h00003FFF +//`define BOOTTIM_BASE 56'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder +//`define BOOTTIM_RANGE 56'h00000FFF `define TIM_SUPPORTED 1'b1 -`define TIM_BASE 32'h80000000 -`define TIM_RANGE 32'h07FFFFFF +`define TIM_BASE 56'h80000000 +`define TIM_RANGE 56'h07FFFFFF `define CLINT_SUPPORTED 1'b1 -`define CLINT_BASE 32'h02000000 -`define CLINT_RANGE 32'h0000FFFF +`define CLINT_BASE 56'h02000000 +`define CLINT_RANGE 56'h0000FFFF `define GPIO_SUPPORTED 1'b1 -`define GPIO_BASE 32'h10012000 -`define GPIO_RANGE 32'h000000FF +`define GPIO_BASE 56'h10012000 +`define GPIO_RANGE 56'h000000FF `define UART_SUPPORTED 1'b1 -`define UART_BASE 32'h10000000 -`define UART_RANGE 32'h00000007 +`define UART_BASE 56'h10000000 +`define UART_RANGE 56'h00000007 `define PLIC_SUPPORTED 1'b1 -`define PLIC_BASE 32'h0C000000 -`define PLIC_RANGE 32'h03FFFFFF +`define PLIC_BASE 56'h0C000000 +`define PLIC_RANGE 56'h03FFFFFF // Bus Interface width `define AHBW 64 diff --git a/wally-pipelined/config/rv32ic/wally-config.vh b/wally-pipelined/config/rv32ic/wally-config.vh index b6878061..29cd973a 100644 --- a/wally-pipelined/config/rv32ic/wally-config.vh +++ b/wally-pipelined/config/rv32ic/wally-config.vh @@ -61,26 +61,27 @@ // Peripheral memory space extends from BASE to BASE+RANGE // Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits +// *** each of these is `PA_BITS wide. is this paramaterizable INSIDE the config file? `define BOOTTIM_SUPPORTED 1'b1 -`define BOOTTIM_BASE 32'h00000000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder -`define BOOTTIM_RANGE 32'h00003FFF -//`define BOOTTIM_BASE 32'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder -//`define BOOTTIM_RANGE 32'h00000FFF +`define BOOTTIM_BASE 34'h00000000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder +`define BOOTTIM_RANGE 34'h00003FFF +//`define BOOTTIM_BASE 34'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder +//`define BOOTTIM_RANGE 34'h00000FFF `define TIM_SUPPORTED 1'b1 -`define TIM_BASE 32'h80000000 -`define TIM_RANGE 32'h07FFFFFF +`define TIM_BASE 34'h80000000 +`define TIM_RANGE 34'h07FFFFFF `define CLINT_SUPPORTED 1'b1 -`define CLINT_BASE 32'h02000000 -`define CLINT_RANGE 32'h0000FFFF +`define CLINT_BASE 34'h02000000 +`define CLINT_RANGE 34'h0000FFFF `define GPIO_SUPPORTED 1'b1 -`define GPIO_BASE 32'h10012000 -`define GPIO_RANGE 32'h000000FF +`define GPIO_BASE 34'h10012000 +`define GPIO_RANGE 34'h000000FF `define UART_SUPPORTED 1'b1 -`define UART_BASE 32'h10000000 -`define UART_RANGE 32'h00000007 +`define UART_BASE 34'h10000000 +`define UART_RANGE 34'h00000007 `define PLIC_SUPPORTED 1'b1 -`define PLIC_BASE 32'h0C000000 -`define PLIC_RANGE 32'h03FFFFFF +`define PLIC_BASE 34'h0C000000 +`define PLIC_RANGE 34'h03FFFFFF // Bus Interface width `define AHBW 32 diff --git a/wally-pipelined/config/rv64ic/wally-config.vh b/wally-pipelined/config/rv64ic/wally-config.vh index 954e126b..a15ef18b 100644 --- a/wally-pipelined/config/rv64ic/wally-config.vh +++ b/wally-pipelined/config/rv64ic/wally-config.vh @@ -53,7 +53,7 @@ `define DTLB_ENTRY_BITS 5 // Legal number of PMP entries are 0, 16, or 64 -`define PMP_ENTRIES 16 +`define PMP_ENTRIES 64 // Address space `define RESET_VECTOR 64'h0000000080000000 @@ -65,26 +65,27 @@ // Peripheral memory space extends from BASE to BASE+RANGE // Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits +// *** each of these is `PA_BITS wide. is this paramaterizable INSIDE the config file? `define BOOTTIM_SUPPORTED 1'b1 -`define BOOTTIM_BASE 32'h00000000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder -`define BOOTTIM_RANGE 32'h00003FFF -//`define BOOTTIM_BASE 32'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder -//`define BOOTTIM_RANGE 32'h00000FFF +`define BOOTTIM_RANGE 56'h00003FFF +`define BOOTTIM_BASE 56'h00000000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder +//`define BOOTTIM_BASE 56'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder +//`define BOOTTIM_RANGE 56'h00000FFF `define TIM_SUPPORTED 1'b1 -`define TIM_BASE 32'h80000000 -`define TIM_RANGE 32'h07FFFFFF +`define TIM_BASE 56'h80000000 +`define TIM_RANGE 56'h07FFFFFF `define CLINT_SUPPORTED 1'b1 -`define CLINT_BASE 32'h02000000 -`define CLINT_RANGE 32'h0000FFFF +`define CLINT_BASE 56'h02000000 +`define CLINT_RANGE 56'h0000FFFF `define GPIO_SUPPORTED 1'b1 -`define GPIO_BASE 32'h10012000 -`define GPIO_RANGE 32'h000000FF +`define GPIO_BASE 56'h10012000 +`define GPIO_RANGE 56'h000000FF `define UART_SUPPORTED 1'b1 -`define UART_BASE 32'h10000000 -`define UART_RANGE 32'h00000007 +`define UART_BASE 56'h10000000 +`define UART_RANGE 56'h00000007 `define PLIC_SUPPORTED 1'b1 -`define PLIC_BASE 32'h0C000000 -`define PLIC_RANGE 32'h03FFFFFF +`define PLIC_BASE 56'h0C000000 +`define PLIC_RANGE 56'h03FFFFFF // Test modes diff --git a/wally-pipelined/regression/wally-busybear-batch.do b/wally-pipelined/regression/wally-busybear-batch.do index a4a80eb7..e2817dfa 100644 --- a/wally-pipelined/regression/wally-busybear-batch.do +++ b/wally-pipelined/regression/wally-busybear-batch.do @@ -35,5 +35,6 @@ vopt work_busybear.testbench -o workopt_busybear vsim workopt_busybear -suppress 8852,12070 +run -all run -all quit diff --git a/wally-pipelined/regression/wally-busybear.do b/wally-pipelined/regression/wally-busybear.do index 11876dde..204d1c4e 100644 --- a/wally-pipelined/regression/wally-busybear.do +++ b/wally-pipelined/regression/wally-busybear.do @@ -35,9 +35,10 @@ vopt +acc work.testbench -o workopt vsim workopt -suppress 8852,12070 -do ./wave-dos/linux-waves.do #-- Run the Simulation run -all +do ./wave-dos/linux-waves.do +run -all ##quit diff --git a/wally-pipelined/regression/wally-pipelined-ross.do b/wally-pipelined/regression/wally-pipelined-ross.do index 90a4f5c2..15a515de 100644 --- a/wally-pipelined/regression/wally-pipelined-ross.do +++ b/wally-pipelined/regression/wally-pipelined-ross.do @@ -35,8 +35,8 @@ switch $argc { } # start and run simulation # remove +acc flag for faster sim during regressions if there is no need to access internal signals -vopt +acc -gDEBUG=1 work.testbench -o workopt -vsim workopt +vopt -fsmdebug +acc -gDEBUG=1 work.testbench -o workopt +vsim workopt -fsmdebug do wave.do diff --git a/wally-pipelined/regression/wave-dos/linux-waves.do b/wally-pipelined/regression/wave-dos/linux-waves.do index 63623891..a891c206 100644 --- a/wally-pipelined/regression/wave-dos/linux-waves.do +++ b/wally-pipelined/regression/wave-dos/linux-waves.do @@ -122,11 +122,11 @@ add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csrn/UEPC_REGW add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csrn/UTVEC_REGW add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csrn/UIP_REGW add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csrn/UIE_REGW -add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csrm/PMPCFG01_REGW -add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csrm/PMPCFG23_REGW -add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csrm/PMPADDR_ARRAY_REGW -add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csrm/MISA_REGW -add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csru/FRM_REGW +#add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csrm/PMPCFG01_REGW +#add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csrm/PMPCFG23_REGW +#add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csrm/PMPADDR_ARRAY_REGW +#add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csrm/MISA_REGW +#add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csru/FRM_REGW add wave -divider add wave -hex -r /testbench/* diff --git a/wally-pipelined/regression/wave.do b/wally-pipelined/regression/wave.do index 9210a1a9..213b5cee 100644 --- a/wally-pipelined/regression/wave.do +++ b/wally-pipelined/regression/wave.do @@ -7,32 +7,32 @@ add wave -noupdate -expand -group {Execution Stage} /testbench/FunctionName/Func add wave -noupdate -expand -group {Execution Stage} /testbench/dut/hart/ifu/PCE add wave -noupdate -expand -group {Execution Stage} /testbench/InstrEName add wave -noupdate -expand -group {Execution Stage} /testbench/dut/hart/ifu/InstrE -add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/InstrMisalignedFaultM -add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/InstrAccessFaultM -add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/IllegalInstrFaultM -add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/BreakpointFaultM -add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/LoadMisalignedFaultM -add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/StoreMisalignedFaultM -add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/LoadAccessFaultM -add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/StoreAccessFaultM -add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/EcallFaultM -add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/InstrPageFaultM -add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/LoadPageFaultM -add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/StorePageFaultM -add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/InterruptM -add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/BPPredWrongE -add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/CSRWritePendingDEM -add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/RetM -add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/TrapM -add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/LoadStallD -add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/ICacheStallF -add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/DataStall -add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/MulDivStallD -add wave -noupdate -expand -group HDU -expand -group Flush -color Yellow /testbench/dut/hart/hzu/FlushF -add wave -noupdate -expand -group HDU -expand -group Flush -color Yellow /testbench/dut/hart/FlushD -add wave -noupdate -expand -group HDU -expand -group Flush -color Yellow /testbench/dut/hart/FlushE -add wave -noupdate -expand -group HDU -expand -group Flush -color Yellow /testbench/dut/hart/FlushM -add wave -noupdate -expand -group HDU -expand -group Flush -color Yellow /testbench/dut/hart/FlushW +add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/InstrMisalignedFaultM +add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/InstrAccessFaultM +add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/IllegalInstrFaultM +add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/BreakpointFaultM +add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/LoadMisalignedFaultM +add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/StoreMisalignedFaultM +add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/LoadAccessFaultM +add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/StoreAccessFaultM +add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/EcallFaultM +add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/InstrPageFaultM +add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/LoadPageFaultM +add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/StorePageFaultM +add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/InterruptM +add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/BPPredWrongE +add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/CSRWritePendingDEM +add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/RetM +add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/TrapM +add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/LoadStallD +add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/ICacheStallF +add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/DCacheStall +add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/MulDivStallD +add wave -noupdate -expand -group HDU -group Flush -color Yellow /testbench/dut/hart/hzu/FlushF +add wave -noupdate -expand -group HDU -group Flush -color Yellow /testbench/dut/hart/FlushD +add wave -noupdate -expand -group HDU -group Flush -color Yellow /testbench/dut/hart/FlushE +add wave -noupdate -expand -group HDU -group Flush -color Yellow /testbench/dut/hart/FlushM +add wave -noupdate -expand -group HDU -group Flush -color Yellow /testbench/dut/hart/FlushW add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallF add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallD add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallE @@ -89,6 +89,7 @@ add wave -noupdate -expand -group {instruction pipeline} /testbench/InstrFName add wave -noupdate -expand -group {instruction pipeline} /testbench/dut/hart/ifu/InstrD add wave -noupdate -expand -group {instruction pipeline} /testbench/dut/hart/ifu/InstrE add wave -noupdate -expand -group {instruction pipeline} /testbench/dut/hart/ifu/InstrM +add wave -noupdate -expand -group {instruction pipeline} /testbench/InstrW add wave -noupdate -group {PCNext Generation} /testbench/dut/hart/ifu/PCNextF add wave -noupdate -group {PCNext Generation} /testbench/dut/hart/ifu/PCF add wave -noupdate -group {PCNext Generation} /testbench/dut/hart/ifu/PCPlus2or4F @@ -104,7 +105,7 @@ add wave -noupdate -group {Decode Stage} /testbench/dut/hart/ieu/c/RegWriteD add wave -noupdate -group {Decode Stage} /testbench/dut/hart/ieu/dp/RdD add wave -noupdate -group {Decode Stage} /testbench/dut/hart/ieu/dp/Rs1D add wave -noupdate -group {Decode Stage} /testbench/dut/hart/ieu/dp/Rs2D -add wave -noupdate -group RegFile /testbench/dut/hart/ieu/dp/regf/rf +add wave -noupdate -group RegFile -expand /testbench/dut/hart/ieu/dp/regf/rf add wave -noupdate -group RegFile /testbench/dut/hart/ieu/dp/regf/a1 add wave -noupdate -group RegFile /testbench/dut/hart/ieu/dp/regf/a2 add wave -noupdate -group RegFile /testbench/dut/hart/ieu/dp/regf/a3 @@ -117,31 +118,18 @@ add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/hart add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/hart/ieu/dp/CSRReadValW add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/hart/ieu/dp/ResultSrcW add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/hart/ieu/dp/ResultW -add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/a -add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/b -add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/alucontrol -add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/result -add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/flags -add wave -noupdate -group alu -divider internals -add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/overflow -add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/carry -add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/zero -add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/neg -add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/lt -add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/ltu -add wave -noupdate -group {dcache memory} /testbench/dut/hart/dmem/MemReadM -add wave -noupdate -group {dcache memory} /testbench/dut/hart/dmem/MemWriteM -add wave -noupdate -group {dcache memory} /testbench/dut/hart/dmem/MemAckW -add wave -noupdate -group dcache -expand -group {cpu request} /testbench/dut/hart/dmem/MemRWM -add wave -noupdate -group dcache -expand -group {cpu request} /testbench/dut/hart/dmem/AtomicM -add wave -noupdate -group dcache -expand -group {cpu request} /testbench/dut/hart/MemAdrM -add wave -noupdate -group dcache -expand -group {cpu request} /testbench/dut/hart/dmem/ReadDataW -add wave -noupdate -group dcache -expand -group {cpu request} /testbench/dut/hart/WriteDataM -add wave -noupdate -group dcache -color Gray90 /testbench/dut/hart/dmem/CurrState -add wave -noupdate -group dcache /testbench/dut/hart/MemPAdrM -add wave -noupdate -group dcache /testbench/dut/hart/dmem/MemAccessM -add wave -noupdate -group dcache /testbench/dut/hart/dmem/AtomicMaskedM -add wave -noupdate -group dcache /testbench/dut/hart/dmem/MemAckW +add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/a +add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/b +add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/alucontrol +add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/result +add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/flags +add wave -noupdate -expand -group alu -divider internals +add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/overflow +add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/carry +add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/zero +add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/neg +add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/lt +add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/ltu add wave -noupdate -group Forward /testbench/dut/hart/ieu/fw/Rs1D add wave -noupdate -group Forward /testbench/dut/hart/ieu/fw/Rs2D add wave -noupdate -group Forward /testbench/dut/hart/ieu/fw/Rs1E @@ -184,48 +172,60 @@ add wave -noupdate -group divider /testbench/dut/hart/mdu/genblk1/div/N add wave -noupdate -group divider /testbench/dut/hart/mdu/genblk1/div/D add wave -noupdate -group divider /testbench/dut/hart/mdu/genblk1/div/Q add wave -noupdate -group divider /testbench/dut/hart/mdu/genblk1/div/rem0 -add wave -noupdate -expand -group icache -color Orange /testbench/dut/hart/ifu/icache/controller/CurrState -add wave -noupdate -expand -group icache /testbench/dut/hart/ifu/icache/controller/NextState -add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/hit -add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/spill -add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/ICacheStallF -add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/SavePC -add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/spillSave -add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/UnalignedSelect -add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/PCMux -add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/spillSave -add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/CntReset -add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/PreCntEn -add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/CntEn -add wave -noupdate -expand -group icache -expand -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/NUMLINES -add wave -noupdate -expand -group icache -expand -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/BLOCKLEN -add wave -noupdate -expand -group icache -expand -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/BLOCKBYTELEN -add wave -noupdate -expand -group icache -expand -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/OFFSETLEN -add wave -noupdate -expand -group icache -expand -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/INDEXLEN -add wave -noupdate -expand -group icache -expand -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/TAGLEN -add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/FetchCountFlag -add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/FetchCount -add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrPAdrF -add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrReadF -add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrAckF -add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrInF -add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/ICacheMemWriteEnable -add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/ICacheMemWriteData -add wave -noupdate -expand -group icache -expand -group memory -group {tag read} /testbench/dut/hart/ifu/icache/cachemem/DataValidBit -add wave -noupdate -expand -group icache -expand -group memory -group {tag read} /testbench/dut/hart/ifu/icache/cachemem/cachetags/ReadData -add wave -noupdate -expand -group icache -expand -group memory -group {tag write} /testbench/dut/hart/ifu/icache/cachemem/WriteEnable -add wave -noupdate -expand -group icache -expand -group memory -group {tag write} /testbench/dut/hart/ifu/icache/cachemem/WriteLine -add wave -noupdate -expand -group icache -expand -group memory -group {tag write} /testbench/dut/hart/ifu/icache/cachemem/cachetags/StoredData -add wave -noupdate -expand -group icache -expand -group {instr to cpu} /testbench/dut/hart/ifu/icache/controller/FinalInstrRawF -add wave -noupdate -expand -group icache -expand -group {instr to cpu} /testbench/dut/hart/ifu/icache/controller/AlignedInstrRawD -add wave -noupdate -expand -group icache -expand -group {instr to cpu} /testbench/dut/hart/ifu/icache/controller/FlushDLastCyclen -add wave -noupdate -expand -group icache -expand -group {instr to cpu} /testbench/dut/hart/ifu/icache/controller/InstrRawD -add wave -noupdate -expand -group icache -expand -group pc /testbench/dut/hart/ifu/icache/controller/PCPF -add wave -noupdate -expand -group icache -expand -group pc /testbench/dut/hart/ifu/icache/controller/PCPreFinalF -add wave -noupdate -expand -group icache -expand -group pc /testbench/dut/hart/ifu/icache/controller/PCPFinalF -add wave -noupdate -group AHB /testbench/dut/hart/ebu/BusState +add wave -noupdate -group icache -color Orange /testbench/dut/hart/ifu/icache/controller/CurrState +add wave -noupdate -group icache /testbench/dut/hart/ifu/icache/controller/NextState +add wave -noupdate -group icache /testbench/dut/hart/ifu/ITLBMissF +add wave -noupdate -group icache /testbench/dut/hart/ifu/icache/ITLBWriteF +add wave -noupdate -group icache -group {tag read} /testbench/dut/hart/ifu/icache/cachemem/DataValidBit +add wave -noupdate -group icache -group {tag read} /testbench/dut/hart/ifu/icache/cachemem/cachetags/ReadData +add wave -noupdate -group icache -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/hit +add wave -noupdate -group icache -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/spill +add wave -noupdate -group icache -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/ICacheStallF +add wave -noupdate -group icache -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/SavePC +add wave -noupdate -group icache -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/spillSave +add wave -noupdate -group icache -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/UnalignedSelect +add wave -noupdate -group icache -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/PCMux +add wave -noupdate -group icache -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/spillSave +add wave -noupdate -group icache -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/CntReset +add wave -noupdate -group icache -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/PreCntEn +add wave -noupdate -group icache -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/CntEn +add wave -noupdate -group icache -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/NUMLINES +add wave -noupdate -group icache -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/BLOCKLEN +add wave -noupdate -group icache -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/BLOCKBYTELEN +add wave -noupdate -group icache -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/OFFSETLEN +add wave -noupdate -group icache -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/INDEXLEN +add wave -noupdate -group icache -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/TAGLEN +add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/FetchCountFlag +add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/FetchCount +add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrPAdrF +add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrReadF +add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrAckF +add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrInF +add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/ICacheMemWriteEnable +add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/ICacheMemWriteData +add wave -noupdate -group icache -expand -group memory -group {tag write} /testbench/dut/hart/ifu/icache/cachemem/WriteEnable +add wave -noupdate -group icache -expand -group memory -group {tag write} /testbench/dut/hart/ifu/icache/cachemem/WriteLine +add wave -noupdate -group icache -expand -group memory -group {tag write} /testbench/dut/hart/ifu/icache/cachemem/cachetags/StoredData +add wave -noupdate -group icache -expand -group {instr to cpu} /testbench/dut/hart/ifu/icache/controller/FinalInstrRawF +add wave -noupdate -group icache -expand -group pc /testbench/dut/hart/ifu/icache/controller/PCPF +add wave -noupdate -group icache -expand -group pc /testbench/dut/hart/ifu/icache/controller/PCPreFinalF +add wave -noupdate -group AHB -expand -group read /testbench/dut/hart/ebu/HRDATA +add wave -noupdate -group AHB -expand -group read /testbench/dut/hart/ebu/HRDATAMasked +add wave -noupdate -group AHB -expand -group read /testbench/dut/hart/ebu/HRDATANext +add wave -noupdate -group AHB -color Gold /testbench/dut/hart/ebu/BusState +add wave -noupdate -group AHB /testbench/dut/hart/ebu/ProposedNextBusState +add wave -noupdate -group AHB /testbench/dut/hart/ebu/NextBusState +add wave -noupdate -group AHB /testbench/dut/hart/ebu/DSquashBusAccessM +add wave -noupdate -group AHB /testbench/dut/hart/ebu/ISquashBusAccessF +add wave -noupdate -group AHB -expand -group {input requests} /testbench/dut/hart/ebu/AtomicMaskedM +add wave -noupdate -group AHB -expand -group {input requests} /testbench/dut/hart/ebu/MemReadM +add wave -noupdate -group AHB -expand -group {input requests} /testbench/dut/hart/ebu/MemWriteM +add wave -noupdate -group AHB -expand -group {input requests} /testbench/dut/hart/ebu/InstrReadF +add wave -noupdate -group AHB -expand -group {input requests} /testbench/dut/hart/ebu/MemSizeM add wave -noupdate -group AHB /testbench/dut/hart/ebu/HCLK +add wave -noupdate -group AHB /testbench/dut/hart/ebu/HRESETn add wave -noupdate -group AHB /testbench/dut/hart/ebu/HRDATA +add wave -noupdate -group AHB /testbench/dut/hart/ebu/HRDATANext add wave -noupdate -group AHB /testbench/dut/hart/ebu/HREADY add wave -noupdate -group AHB /testbench/dut/hart/ebu/HRESP add wave -noupdate -group AHB /testbench/dut/hart/ebu/HADDR @@ -239,12 +239,136 @@ add wave -noupdate -group AHB /testbench/dut/hart/ebu/HMASTLOCK add wave -noupdate -group AHB /testbench/dut/hart/ebu/HADDRD add wave -noupdate -group AHB /testbench/dut/hart/ebu/HSIZED add wave -noupdate -group AHB /testbench/dut/hart/ebu/HWRITED -add wave -noupdate -group csr -color Aquamarine -label {br executed} -radix unsigned {/testbench/dut/hart/priv/csr/genblk1/counters/genblk2/HPMCOUNTER_REGW[5]} -add wave -noupdate -group csr -color Aquamarine -label {br miss predicted} -radix unsigned {/testbench/dut/hart/priv/csr/genblk1/counters/genblk2/HPMCOUNTER_REGW[4]} -add wave -noupdate -group csr -childformat {{{/testbench/dut/hart/priv/csr/genblk1/counters/genblk2/HPMCOUNTER_REGW[5]} -radix unsigned} {{/testbench/dut/hart/priv/csr/genblk1/counters/genblk2/HPMCOUNTER_REGW[4]} -radix unsigned}} -subitemconfig {{/testbench/dut/hart/priv/csr/genblk1/counters/genblk2/HPMCOUNTER_REGW[5]} {-height 16 -radix unsigned} {/testbench/dut/hart/priv/csr/genblk1/counters/genblk2/HPMCOUNTER_REGW[4]} {-height 16 -radix unsigned}} /testbench/dut/hart/priv/csr/genblk1/counters/genblk2/HPMCOUNTER_REGW +add wave -noupdate -group AHB /testbench/dut/hart/ebu/StallW +add wave -noupdate -expand -group lsu -color Gold /testbench/dut/hart/lsu/CurrState +add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/DisableTranslation +add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/MemRWM +add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/DataStall +add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/MemAdrM +add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/MemPAdrM +add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/ReadDataW +add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/WriteDataM +add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/AtomicMaskedM +add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/DSquashBusAccessM +add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/HRDATAW +add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/MemAckW +add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/StallW +add wave -noupdate -group plic /testbench/dut/uncore/genblk2/plic/HCLK +add wave -noupdate -group plic /testbench/dut/uncore/genblk2/plic/HSELPLIC +add wave -noupdate -group plic /testbench/dut/uncore/genblk2/plic/HADDR +add wave -noupdate -group plic /testbench/dut/uncore/genblk2/plic/HWRITE +add wave -noupdate -group plic /testbench/dut/uncore/genblk2/plic/HREADY +add wave -noupdate -group plic /testbench/dut/uncore/genblk2/plic/HTRANS +add wave -noupdate -group plic /testbench/dut/uncore/genblk2/plic/HWDATA +add wave -noupdate -group plic /testbench/dut/uncore/genblk2/plic/UARTIntr +add wave -noupdate -group plic /testbench/dut/uncore/genblk2/plic/GPIOIntr +add wave -noupdate -group plic /testbench/dut/uncore/genblk2/plic/HREADPLIC +add wave -noupdate -group plic /testbench/dut/uncore/genblk2/plic/HRESPPLIC +add wave -noupdate -group plic /testbench/dut/uncore/genblk2/plic/HREADYPLIC +add wave -noupdate -group plic /testbench/dut/uncore/genblk2/plic/ExtIntM +add wave -noupdate -group GPIO /testbench/dut/uncore/genblk3/gpio/HCLK +add wave -noupdate -group GPIO /testbench/dut/uncore/genblk3/gpio/HSELGPIO +add wave -noupdate -group GPIO /testbench/dut/uncore/genblk3/gpio/HADDR +add wave -noupdate -group GPIO /testbench/dut/uncore/genblk3/gpio/HWDATA +add wave -noupdate -group GPIO /testbench/dut/uncore/genblk3/gpio/HWRITE +add wave -noupdate -group GPIO /testbench/dut/uncore/genblk3/gpio/HREADY +add wave -noupdate -group GPIO /testbench/dut/uncore/genblk3/gpio/HTRANS +add wave -noupdate -group GPIO /testbench/dut/uncore/genblk3/gpio/HREADGPIO +add wave -noupdate -group GPIO /testbench/dut/uncore/genblk3/gpio/HRESPGPIO +add wave -noupdate -group GPIO /testbench/dut/uncore/genblk3/gpio/HREADYGPIO +add wave -noupdate -group GPIO /testbench/dut/uncore/genblk3/gpio/GPIOPinsIn +add wave -noupdate -group GPIO /testbench/dut/uncore/genblk3/gpio/GPIOPinsOut +add wave -noupdate -group GPIO /testbench/dut/uncore/genblk3/gpio/GPIOPinsEn +add wave -noupdate -group GPIO /testbench/dut/uncore/genblk3/gpio/GPIOIntr +add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/HCLK +add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/HSELCLINT +add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/HADDR +add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/HWRITE +add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/HWDATA +add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/HREADY +add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/HTRANS +add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/HREADCLINT +add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/HRESPCLINT +add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/HREADYCLINT +add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/MTIME +add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/MTIMECMP +add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/TimerIntM +add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/SwIntM +add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/MMUTranslate +add wave -noupdate -expand -group ptwalker -color Gold /testbench/dut/hart/pagetablewalker/WalkerState +add wave -noupdate -expand -group ptwalker -color Salmon /testbench/dut/hart/pagetablewalker/HPTWStall +add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/HPTWRead +add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/MMUPAdr +add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/MMUStall +add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/EndWalk +add wave -noupdate -expand -group ptwalker -expand -group pte /testbench/dut/hart/pagetablewalker/MMUReadPTE +add wave -noupdate -expand -group ptwalker -expand -group pte /testbench/dut/hart/pagetablewalker/PRegEn +add wave -noupdate -expand -group ptwalker -expand -group pte /testbench/dut/hart/pagetablewalker/CurrentPTE +add wave -noupdate -expand -group ptwalker -divider data +add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/TranslationPAdr +add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/ValidPTE +add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/LeafPTE +add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/MMUStall +add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/pagetablewalker/TranslationPAdr +add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/pagetablewalker/PageTableEntry +add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/pagetablewalker/PageType +add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/pagetablewalker/ITLBWriteF +add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/pagetablewalker/DTLBWriteM +add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/pagetablewalker/WalkerInstrPageFaultF +add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/pagetablewalker/WalkerLoadPageFaultM +add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/pagetablewalker/WalkerStorePageFaultM +add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/pagetablewalker/MMUStall +add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/pagetablewalker/EndWalk +add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/MMUPAdr +add wave -noupdate -expand -group {LSU ARB} -color Gold /testbench/dut/hart/arbiter/CurrState +add wave -noupdate -expand -group {LSU ARB} -color {Medium Orchid} /testbench/dut/hart/arbiter/SelPTW +add wave -noupdate -expand -group {LSU ARB} /testbench/dut/hart/pagetablewalker/MMUStall +add wave -noupdate -expand -group {LSU ARB} -expand -group hptw /testbench/dut/hart/arbiter/HPTWTranslate +add wave -noupdate -expand -group {LSU ARB} -expand -group hptw /testbench/dut/hart/arbiter/HPTWRead +add wave -noupdate -expand -group {LSU ARB} -expand -group hptw /testbench/dut/hart/arbiter/HPTWPAdr +add wave -noupdate -expand -group {LSU ARB} -expand -group hptw /testbench/dut/hart/arbiter/HPTWReadPTE +add wave -noupdate -expand -group {LSU ARB} -expand -group hptw /testbench/dut/hart/arbiter/HPTWReady +add wave -noupdate -expand -group {LSU ARB} -group toLSU /testbench/dut/hart/arbiter/MemAdrMtoLSU +add wave -noupdate /testbench/dut/hart/lsu/DataStall +add wave -noupdate -group csr /testbench/dut/hart/priv/csr/MIP_REGW +add wave -noupdate -group uart /testbench/dut/uncore/genblk4/uart/HCLK +add wave -noupdate -group uart /testbench/dut/uncore/genblk4/uart/HRESETn +add wave -noupdate -group uart /testbench/dut/uncore/genblk4/uart/HSELUART +add wave -noupdate -group uart /testbench/dut/uncore/genblk4/uart/HADDR +add wave -noupdate -group uart /testbench/dut/uncore/genblk4/uart/HWRITE +add wave -noupdate -group uart /testbench/dut/uncore/genblk4/uart/HWDATA +add wave -noupdate -group uart /testbench/dut/uncore/genblk4/uart/HREADUART +add wave -noupdate -group uart /testbench/dut/uncore/genblk4/uart/HRESPUART +add wave -noupdate -group uart /testbench/dut/uncore/genblk4/uart/HREADYUART +add wave -noupdate -group uart /testbench/dut/uncore/genblk4/uart/SIN +add wave -noupdate -group uart /testbench/dut/uncore/genblk4/uart/DSRb +add wave -noupdate -group uart /testbench/dut/uncore/genblk4/uart/DCDb +add wave -noupdate -group uart /testbench/dut/uncore/genblk4/uart/CTSb +add wave -noupdate -group uart /testbench/dut/uncore/genblk4/uart/RIb +add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/genblk4/uart/SOUT +add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/genblk4/uart/RTSb +add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/genblk4/uart/DTRb +add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/genblk4/uart/OUT1b +add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/genblk4/uart/OUT2b +add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/genblk4/uart/INTR +add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/genblk4/uart/TXRDYb +add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/genblk4/uart/RXRDYb +add wave -noupdate -group dtlb /testbench/dut/hart/lsu/dmmu/TLBMiss +add wave -noupdate -group dtlb /testbench/dut/hart/lsu/dmmu/tlb/TLBWrite +add wave -noupdate -group itlb /testbench/dut/hart/ifu/ITLBMissF +add wave -noupdate /testbench/dut/hart/pagetablewalker/StartWalk +add wave -noupdate /testbench/dut/hart/lsu/dmmu/tlb/DisableTranslation +add wave -noupdate -group tlbread /testbench/dut/hart/lsu/dmmu/tlb/VirtualAddress +add wave -noupdate -group tlbread /testbench/dut/hart/lsu/dmmu/tlb/tlbcam/CAMHit +add wave -noupdate -group tlbread /testbench/dut/hart/lsu/dmmu/tlb/tlbcam/VPNIndex +add wave -noupdate -group tlbread /testbench/dut/hart/lsu/dmmu/tlb/tlbcam/HitPageType +add wave -noupdate -group tlbread /testbench/dut/hart/lsu/dmmu/tlb/tlbcam/VirtualPageNumber +add wave -noupdate -group tlbwrite /testbench/dut/hart/lsu/dmmu/tlb/tlbcam/TLBWrite +add wave -noupdate -group tlbwrite /testbench/dut/hart/lsu/dmmu/tlb/PTEWriteVal +add wave -noupdate -group tlbwrite /testbench/dut/hart/lsu/dmmu/tlb/tlbcam/WriteLines TreeUpdate [SetDefaultTree] -WaveRestoreCursors {{Cursor 4} {32648010 ns} 0} {{Cursor 5} {12105831 ns} 0} -quietly wave cursor active 2 +WaveRestoreCursors {{Cursor 8} {4545 ns} 0} {{Cursor 3} {3377 ns} 0} {{Cursor 4} {3215 ns} 0} +quietly wave cursor active 1 configure wave -namecolwidth 250 configure wave -valuecolwidth 189 configure wave -justifyvalue left @@ -259,4 +383,4 @@ configure wave -griddelta 40 configure wave -timeline 0 configure wave -timelineunits ns update -WaveRestoreZoom {0 ns} {30754715 ns} +WaveRestoreZoom {4209 ns} {4657 ns} diff --git a/wally-pipelined/src/cache/ICacheCntrl.sv b/wally-pipelined/src/cache/ICacheCntrl.sv index f290f0ad..748b3f5e 100644 --- a/wally-pipelined/src/cache/ICacheCntrl.sv +++ b/wally-pipelined/src/cache/ICacheCntrl.sv @@ -25,53 +25,57 @@ `include "wally-config.vh" -module ICacheCntrl #(parameter BLOCKLEN = 256) ( - // Inputs from pipeline - input logic clk, reset, - input logic StallF, StallD, - input logic FlushD, +module ICacheCntrl #(parameter BLOCKLEN = 256) + ( + // Inputs from pipeline + input logic clk, reset, + input logic StallF, StallD, + input logic FlushD, - // Input the address to read - // The upper bits of the physical pc - input logic [`PA_BITS-1:0] PCNextF, - input logic [`PA_BITS-1:0] PCPF, - // Signals to/from cache memory - // The read coming out of it - input logic [31:0] ICacheMemReadData, - input logic ICacheMemReadValid, - // The address at which we want to search the cache memory - output logic [`PA_BITS-1:0] PCTagF, - output logic [`PA_BITS-1:0] PCNextIndexF, - output logic ICacheReadEn, - // Load data into the cache - output logic ICacheMemWriteEnable, - output logic [BLOCKLEN-1:0] ICacheMemWriteData, + // Input the address to read + // The upper bits of the physical pc + input logic [`PA_BITS-1:0] PCNextF, + input logic [`PA_BITS-1:0] PCPF, + // Signals to/from cache memory + // The read coming out of it + input logic [31:0] ICacheMemReadData, + input logic ICacheMemReadValid, + // The address at which we want to search the cache memory + output logic [`PA_BITS-1:0] PCTagF, + output logic [`PA_BITS-1:0] PCNextIndexF, + output logic ICacheReadEn, + // Load data into the cache + output logic ICacheMemWriteEnable, + output logic [BLOCKLEN-1:0] ICacheMemWriteData, - // Outputs to rest of ifu - // High if the instruction in the fetch stage is compressed - output logic CompressedF, - // The instruction that was requested - // If this instruction is compressed, upper 16 bits may be the next 16 bits or may be zeros - output logic [31:0] FinalInstrRawF, + // Outputs to rest of ifu + // High if the instruction in the fetch stage is compressed + output logic CompressedF, + // The instruction that was requested + // If this instruction is compressed, upper 16 bits may be the next 16 bits or may be zeros + output logic [31:0] FinalInstrRawF, - // Outputs to pipeline control stuff - output logic ICacheStallF, EndFetchState, + // Outputs to pipeline control stuff + output logic ICacheStallF, EndFetchState, + input logic ITLBMissF, + input logic ITLBWriteF, + input logic WalkerInstrPageFaultF, - // Signals to/from ahblite interface - // A read containing the requested data - input logic [`XLEN-1:0] InstrInF, - input logic InstrAckF, - // The read we request from main memory - output logic [`PA_BITS-1:0] InstrPAdrF, - output logic InstrReadF -); + // Signals to/from ahblite interface + // A read containing the requested data + input logic [`XLEN-1:0] InstrInF, + input logic InstrAckF, + // The read we request from main memory + output logic [`PA_BITS-1:0] InstrPAdrF, + output logic InstrReadF + ); // FSM states - localparam STATE_READY = 0; - localparam STATE_HIT_SPILL = 1; // spill, block 0 hit - localparam STATE_HIT_SPILL_MISS_FETCH_WDV = 2; // block 1 miss, issue read to AHB and wait data. - localparam STATE_HIT_SPILL_MISS_FETCH_DONE = 3; // write data into SRAM/LUT - localparam STATE_HIT_SPILL_MERGE = 4; // Read block 0 of CPU access, should be able to optimize into STATE_HIT_SPILL. + localparam STATE_READY = 'h0; + localparam STATE_HIT_SPILL = 'h1; // spill, block 0 hit + localparam STATE_HIT_SPILL_MISS_FETCH_WDV = 'h2; // block 1 miss, issue read to AHB and wait data. + localparam STATE_HIT_SPILL_MISS_FETCH_DONE = 'h3; // write data into SRAM/LUT + localparam STATE_HIT_SPILL_MERGE = 'h4; // Read block 0 of CPU access, should be able to optimize into STATE_HIT_SPILL. // a challenge is the spill signal gets us out of the ready state and moves us to // 1 of the 2 spill branches. However the original fsm design had us return to @@ -87,28 +91,32 @@ module ICacheCntrl #(parameter BLOCKLEN = 256) ( // between CPU stalling and that register. // Picking option 1. - localparam STATE_HIT_SPILL_FINAL = 5; // this state replicates STATE_READY's replay of the + localparam STATE_HIT_SPILL_FINAL = 'h5; // this state replicates STATE_READY's replay of the // spill access but does nto consider spill. It also does not do another operation. - localparam STATE_MISS_FETCH_WDV = 6; // aligned miss, issue read to AHB and wait for data. - localparam STATE_MISS_FETCH_DONE = 7; // write data into SRAM/LUT - localparam STATE_MISS_READ = 8; // read block 1 from SRAM/LUT + localparam STATE_MISS_FETCH_WDV = 'h6; // aligned miss, issue read to AHB and wait for data. + localparam STATE_MISS_FETCH_DONE = 'h7; // write data into SRAM/LUT + localparam STATE_MISS_READ = 'h8; // read block 1 from SRAM/LUT - localparam STATE_MISS_SPILL_FETCH_WDV = 9; // spill, miss on block 0, issue read to AHB and wait - localparam STATE_MISS_SPILL_FETCH_DONE = 10; // write data into SRAM/LUT - localparam STATE_MISS_SPILL_READ1 = 11; // read block 0 from SRAM/LUT - localparam STATE_MISS_SPILL_2 = 12; // return to ready if hit or do second block update. - localparam STATE_MISS_SPILL_2_START = 13; // return to ready if hit or do second block update. - localparam STATE_MISS_SPILL_MISS_FETCH_WDV = 14; // miss on block 1, issue read to AHB and wait - localparam STATE_MISS_SPILL_MISS_FETCH_DONE = 15; // write data to SRAM/LUT - localparam STATE_MISS_SPILL_MERGE = 16; // read block 0 of CPU access, + localparam STATE_MISS_SPILL_FETCH_WDV = 'h9; // spill, miss on block 0, issue read to AHB and wait + localparam STATE_MISS_SPILL_FETCH_DONE = 'ha; // write data into SRAM/LUT + localparam STATE_MISS_SPILL_READ1 = 'hb; // read block 0 from SRAM/LUT + localparam STATE_MISS_SPILL_2 = 'hc; // return to ready if hit or do second block update. + localparam STATE_MISS_SPILL_2_START = 'hd; // return to ready if hit or do second block update. + localparam STATE_MISS_SPILL_MISS_FETCH_WDV = 'he; // miss on block 1, issue read to AHB and wait + localparam STATE_MISS_SPILL_MISS_FETCH_DONE = 'hf; // write data to SRAM/LUT + localparam STATE_MISS_SPILL_MERGE = 'h10; // read block 0 of CPU access, - localparam STATE_MISS_SPILL_FINAL = 17; // this state replicates STATE_READY's replay of the + localparam STATE_MISS_SPILL_FINAL = 'h11; // this state replicates STATE_READY's replay of the // spill access but does nto consider spill. It also does not do another operation. - localparam STATE_INVALIDATE = 18; // *** not sure if invalidate or evict? invalidate by cache block or address? + localparam STATE_INVALIDATE = 'h12; // *** not sure if invalidate or evict? invalidate by cache block or address? + localparam STATE_TLB_MISS = 'h13; + localparam STATE_TLB_MISS_DONE = 'h14; + + localparam AHBByteLength = `XLEN / 8; localparam AHBOFFETWIDTH = $clog2(AHBByteLength); @@ -119,39 +127,39 @@ module ICacheCntrl #(parameter BLOCKLEN = 256) ( localparam WORDSPERLINE = BLOCKLEN/`XLEN; localparam LOGWPL = $clog2(WORDSPERLINE); - localparam integer PA_WIDTH = `PA_BITS - 2; + localparam integer PA_WIDTH = `PA_BITS - 2; - logic [4:0] CurrState, NextState; - logic hit, spill; - logic SavePC; - logic [1:0] PCMux; - logic CntReset; - logic PreCntEn, CntEn; - logic spillSave; - logic UnalignedSelect; - logic FetchCountFlag; + logic [4:0] CurrState, NextState; + logic hit, spill; + logic SavePC; + logic [1:0] PCMux; + logic CntReset; + logic PreCntEn, CntEn; + logic spillSave; + logic UnalignedSelect; + logic FetchCountFlag; localparam FetchCountThreshold = WORDSPERLINE - 1; - logic [LOGWPL:0] FetchCount, NextFetchCount; + logic [LOGWPL:0] FetchCount, NextFetchCount; - logic [`PA_BITS-1:0] PCPreFinalF, PCPSpillF; + logic [`PA_BITS-1:0] PCPreFinalF, PCPSpillF; logic [`PA_BITS-1:OFFSETWIDTH] PCPTrunkF; - logic [15:0] SpillDataBlock0; + logic [15:0] SpillDataBlock0; localparam [31:0] NOP = 32'h13; - logic reset_q; - logic [1:0] PCMux_q; + logic reset_q; + logic [1:0] PCMux_q; - // Misaligned signals - //logic [`XLEN:0] MisalignedInstrRawF; - //logic MisalignedStall; - // Cache fault signals - //logic FaultStall; + // Misaligned signals + //logic [`XLEN:0] MisalignedInstrRawF; + //logic MisalignedStall; + // Cache fault signals + //logic FaultStall; // on spill we want to get the first 2 bytes of the next cache block. // the spill only occurs if the PCPF mod BlockByteLength == -2. Therefore we can @@ -175,7 +183,7 @@ module ICacheCntrl #(parameter BLOCKLEN = 256) ( // truncate the offset from PCPF for memory address generation assign PCPTrunkF = PCTagF[`PA_BITS-1:OFFSETWIDTH]; - // Detect if the instruction is compressed + // Detect if the instruction is compressed assign CompressedF = FinalInstrRawF[1:0] != 2'b11; @@ -205,167 +213,175 @@ module ICacheCntrl #(parameter BLOCKLEN = 256) ( ICacheStallF = 1'b1; case (CurrState) - STATE_READY: begin - PCMux = 2'b00; - ICacheReadEn = 1'b1; - if (hit & ~spill) begin - SavePC = 1'b1; - ICacheStallF = 1'b0; - NextState = STATE_READY; - end else if (hit & spill) begin - spillSave = 1'b1; - PCMux = 2'b10; - NextState = STATE_HIT_SPILL; - end else if (~hit & ~spill) begin - CntReset = 1'b1; - NextState = STATE_MISS_FETCH_WDV; - end else if (~hit & spill) begin - CntReset = 1'b1; - PCMux = 2'b01; - NextState = STATE_MISS_SPILL_FETCH_WDV; - end else begin + PCMux = 2'b00; + ICacheReadEn = 1'b1; + if (ITLBMissF) begin + NextState = STATE_TLB_MISS; + end else if (hit & ~spill) begin + SavePC = 1'b1; + ICacheStallF = 1'b0; NextState = STATE_READY; - end + end else if (hit & spill) begin + spillSave = 1'b1; + PCMux = 2'b10; + NextState = STATE_HIT_SPILL; + end else if (~hit & ~spill) begin + CntReset = 1'b1; + NextState = STATE_MISS_FETCH_WDV; + end else if (~hit & spill) begin + CntReset = 1'b1; + PCMux = 2'b01; + NextState = STATE_MISS_SPILL_FETCH_WDV; + end else begin + NextState = STATE_READY; + end end - // branch 1, hit spill and 2, miss spill hit STATE_HIT_SPILL: begin - PCMux = 2'b10; - UnalignedSelect = 1'b1; - ICacheReadEn = 1'b1; - if (hit) begin + PCMux = 2'b10; + UnalignedSelect = 1'b1; + ICacheReadEn = 1'b1; + if (hit) begin NextState = STATE_HIT_SPILL_FINAL; - end else begin - CntReset = 1'b1; + end else begin + CntReset = 1'b1; NextState = STATE_HIT_SPILL_MISS_FETCH_WDV; - end + end end STATE_HIT_SPILL_MISS_FETCH_WDV: begin - PCMux = 2'b10; - //InstrReadF = 1'b1; - PreCntEn = 1'b1; - if (FetchCountFlag & InstrAckF) begin - NextState = STATE_HIT_SPILL_MISS_FETCH_DONE; - end else begin - NextState = STATE_HIT_SPILL_MISS_FETCH_WDV; - end + PCMux = 2'b10; + //InstrReadF = 1'b1; + PreCntEn = 1'b1; + if (FetchCountFlag & InstrAckF) begin + NextState = STATE_HIT_SPILL_MISS_FETCH_DONE; + end else begin + NextState = STATE_HIT_SPILL_MISS_FETCH_WDV; + end end STATE_HIT_SPILL_MISS_FETCH_DONE: begin - PCMux = 2'b10; - ICacheMemWriteEnable = 1'b1; + PCMux = 2'b10; + ICacheMemWriteEnable = 1'b1; NextState = STATE_HIT_SPILL_MERGE; end STATE_HIT_SPILL_MERGE: begin - PCMux = 2'b10; - UnalignedSelect = 1'b1; - ICacheReadEn = 1'b1; + PCMux = 2'b10; + UnalignedSelect = 1'b1; + ICacheReadEn = 1'b1; NextState = STATE_HIT_SPILL_FINAL; end STATE_HIT_SPILL_FINAL: begin - ICacheReadEn = 1'b1; - PCMux = 2'b00; - UnalignedSelect = 1'b1; - SavePC = 1'b1; - NextState = STATE_READY; - ICacheStallF = 1'b0; + ICacheReadEn = 1'b1; + PCMux = 2'b00; + UnalignedSelect = 1'b1; + SavePC = 1'b1; + NextState = STATE_READY; + ICacheStallF = 1'b0; end - // branch 3 miss no spill STATE_MISS_FETCH_WDV: begin - PCMux = 2'b01; - //InstrReadF = 1'b1; - PreCntEn = 1'b1; - if (FetchCountFlag & InstrAckF) begin - NextState = STATE_MISS_FETCH_DONE; - end else begin - NextState = STATE_MISS_FETCH_WDV; - end + PCMux = 2'b01; + //InstrReadF = 1'b1; + PreCntEn = 1'b1; + if (FetchCountFlag & InstrAckF) begin + NextState = STATE_MISS_FETCH_DONE; + end else begin + NextState = STATE_MISS_FETCH_WDV; + end end STATE_MISS_FETCH_DONE: begin - PCMux = 2'b01; - ICacheMemWriteEnable = 1'b1; + PCMux = 2'b01; + ICacheMemWriteEnable = 1'b1; NextState = STATE_MISS_READ; end STATE_MISS_READ: begin - PCMux = 2'b01; - ICacheReadEn = 1'b1; - NextState = STATE_READY; + PCMux = 2'b01; + ICacheReadEn = 1'b1; + NextState = STATE_READY; end - // branch 4 miss spill hit, and 5 miss spill miss STATE_MISS_SPILL_FETCH_WDV: begin - PCMux = 2'b01; - PreCntEn = 1'b1; - //InstrReadF = 1'b1; - if (FetchCountFlag & InstrAckF) begin - NextState = STATE_MISS_SPILL_FETCH_DONE; - end else begin - NextState = STATE_MISS_SPILL_FETCH_WDV; - end + PCMux = 2'b01; + PreCntEn = 1'b1; + //InstrReadF = 1'b1; + if (FetchCountFlag & InstrAckF) begin + NextState = STATE_MISS_SPILL_FETCH_DONE; + end else begin + NextState = STATE_MISS_SPILL_FETCH_WDV; + end end STATE_MISS_SPILL_FETCH_DONE: begin - PCMux = 2'b01; - ICacheMemWriteEnable = 1'b1; - NextState = STATE_MISS_SPILL_READ1; + PCMux = 2'b01; + ICacheMemWriteEnable = 1'b1; + NextState = STATE_MISS_SPILL_READ1; end STATE_MISS_SPILL_READ1: begin // always be a hit as we just wrote that cache block. - PCMux = 2'b01; // there is a 1 cycle delay after setting the address before the date arrives. - ICacheReadEn = 1'b1; - NextState = STATE_MISS_SPILL_2; + PCMux = 2'b01; // there is a 1 cycle delay after setting the address before the date arrives. + ICacheReadEn = 1'b1; + NextState = STATE_MISS_SPILL_2; end STATE_MISS_SPILL_2: begin - PCMux = 2'b10; - UnalignedSelect = 1'b1; - spillSave = 1'b1; /// *** Could pipeline these to make it clearer in the fsm. - ICacheReadEn = 1'b1; - NextState = STATE_MISS_SPILL_2_START; + PCMux = 2'b10; + UnalignedSelect = 1'b1; + spillSave = 1'b1; /// *** Could pipeline these to make it clearer in the fsm. + ICacheReadEn = 1'b1; + NextState = STATE_MISS_SPILL_2_START; end STATE_MISS_SPILL_2_START: begin - if (~hit) begin - CntReset = 1'b1; - NextState = STATE_MISS_SPILL_MISS_FETCH_WDV; - end else begin - NextState = STATE_READY; - ICacheReadEn = 1'b1; - PCMux = 2'b00; - UnalignedSelect = 1'b1; - SavePC = 1'b1; - ICacheStallF = 1'b0; - end + if (~hit) begin + CntReset = 1'b1; + NextState = STATE_MISS_SPILL_MISS_FETCH_WDV; + end else begin + NextState = STATE_READY; + ICacheReadEn = 1'b1; + PCMux = 2'b00; + UnalignedSelect = 1'b1; + SavePC = 1'b1; + ICacheStallF = 1'b0; + end end STATE_MISS_SPILL_MISS_FETCH_WDV: begin - PCMux = 2'b10; - PreCntEn = 1'b1; - //InstrReadF = 1'b1; - if (FetchCountFlag & InstrAckF) begin - NextState = STATE_MISS_SPILL_MISS_FETCH_DONE; - end else begin - NextState = STATE_MISS_SPILL_MISS_FETCH_WDV; - end + PCMux = 2'b10; + PreCntEn = 1'b1; + //InstrReadF = 1'b1; + if (FetchCountFlag & InstrAckF) begin + NextState = STATE_MISS_SPILL_MISS_FETCH_DONE; + end else begin + NextState = STATE_MISS_SPILL_MISS_FETCH_WDV; + end end STATE_MISS_SPILL_MISS_FETCH_DONE: begin - PCMux = 2'b10; - ICacheMemWriteEnable = 1'b1; - NextState = STATE_MISS_SPILL_MERGE; + PCMux = 2'b10; + ICacheMemWriteEnable = 1'b1; + NextState = STATE_MISS_SPILL_MERGE; end STATE_MISS_SPILL_MERGE: begin - PCMux = 2'b10; - UnalignedSelect = 1'b1; - ICacheReadEn = 1'b1; + PCMux = 2'b10; + UnalignedSelect = 1'b1; + ICacheReadEn = 1'b1; NextState = STATE_MISS_SPILL_FINAL; end STATE_MISS_SPILL_FINAL: begin - ICacheReadEn = 1'b1; - PCMux = 2'b00; - UnalignedSelect = 1'b1; - SavePC = 1'b1; - ICacheStallF = 1'b0; - NextState = STATE_READY; + ICacheReadEn = 1'b1; + PCMux = 2'b00; + UnalignedSelect = 1'b1; + SavePC = 1'b1; + ICacheStallF = 1'b0; + NextState = STATE_READY; + end + STATE_TLB_MISS: begin + if (ITLBWriteF | WalkerInstrPageFaultF) begin + NextState = STATE_TLB_MISS_DONE; + end else begin + NextState = STATE_TLB_MISS; + end + end + STATE_TLB_MISS_DONE : begin + NextState = STATE_READY; end default: begin - PCMux = 2'b01; - NextState = STATE_READY; + PCMux = 2'b01; + NextState = STATE_READY; end // *** add in error handling and invalidate/evict endcase @@ -407,7 +423,7 @@ module ICacheCntrl #(parameter BLOCKLEN = 256) ( // store read data from memory interface before writing into SRAM. - genvar i; + genvar i; generate for (i = 0; i < WORDSPERLINE; i++) begin flopenr #(`XLEN) flop(.clk(clk), diff --git a/wally-pipelined/src/cache/icache.sv b/wally-pipelined/src/cache/icache.sv index abf828fc..943ab1b8 100644 --- a/wally-pipelined/src/cache/icache.sv +++ b/wally-pipelined/src/cache/icache.sv @@ -28,24 +28,28 @@ module icache ( // Basic pipeline stuff - input logic clk, reset, - input logic StallF, StallD, - input logic FlushD, + input logic clk, reset, + input logic StallF, StallD, + input logic FlushD, input logic [`PA_BITS-1:0] PCNextF, input logic [`PA_BITS-1:0] PCPF, // Data read in from the ebu unit - input logic [`XLEN-1:0] InstrInF, - input logic InstrAckF, + input logic [`XLEN-1:0] InstrInF, + input logic InstrAckF, // Read requested from the ebu unit output logic [`PA_BITS-1:0] InstrPAdrF, - output logic InstrReadF, + output logic InstrReadF, // High if the instruction currently in the fetch stage is compressed - output logic CompressedF, + output logic CompressedF, // High if the icache is requesting a stall - output logic ICacheStallF, + output logic ICacheStallF, + input logic ITLBMissF, + input logic ITLBWriteF, + input logic WalkerInstrPageFaultF, + // The raw (not decompressed) instruction that was requested // If this instruction is compressed, upper 16 bits may be the next 16 bits or may be zeros - output logic [31:0] FinalInstrRawF + output logic [31:0] FinalInstrRawF ); // Configuration parameters diff --git a/wally-pipelined/src/ebu/ahblite.sv b/wally-pipelined/src/ebu/ahblite.sv index c59dfa9b..84e8f3b6 100644 --- a/wally-pipelined/src/ebu/ahblite.sv +++ b/wally-pipelined/src/ebu/ahblite.sv @@ -51,18 +51,20 @@ module ahblite ( input logic MemReadM, MemWriteM, input logic [`XLEN-1:0] WriteDataM, input logic [1:0] MemSizeM, + //output logic DataStall, // Signals from MMU +/* -----\/----- EXCLUDED -----\/----- input logic MMUStall, input logic [`XLEN-1:0] MMUPAdr, input logic MMUTranslate, output logic [`XLEN-1:0] MMUReadPTE, output logic MMUReady, + -----/\----- EXCLUDED -----/\----- */ // Signals from PMA checker input logic DSquashBusAccessM, ISquashBusAccessF, // Signals to PMA checker (metadata of proposed access) - output logic AtomicAccessM, ExecuteAccessF, WriteAccessM, ReadAccessM, // Return from bus - output logic [`XLEN-1:0] ReadDataW, + output logic [`XLEN-1:0] HRDATAW, // AHB-Lite external signals input logic [`AHBW-1:0] HRDATA, input logic HREADY, HRESP, @@ -80,14 +82,13 @@ module ahblite ( output logic [3:0] HSIZED, output logic HWRITED, // Stalls - output logic /*InstrUpdate, */DataStall, output logic CommitM, MemAckW ); logic GrantData; logic [31:0] AccessAddress; logic [2:0] AccessSize, PTESize, ISize; - logic [`AHBW-1:0] HRDATAMasked, ReadDataM, CapturedData, ReadDataWnext, WriteData; + logic [`AHBW-1:0] HRDATAMasked, ReadDataM, CapturedHRDATAMasked, HRDATANext, WriteData; logic IReady, DReady; logic CaptureDataM,CapturedDataAvailable; @@ -115,14 +116,16 @@ module ahblite ( // interface that might be used in place of the ahblite. always_comb case (BusState) - IDLE: if (MMUTranslate) ProposedNextBusState = MMUTRANSLATE; - else if (AtomicMaskedM[1]) ProposedNextBusState = ATOMICREAD; + IDLE: /*if (MMUTranslate) ProposedNextBusState = MMUTRANSLATE; + else*/ if (AtomicMaskedM[1]) ProposedNextBusState = ATOMICREAD; else if (MemReadM) ProposedNextBusState = MEMREAD; // Memory has priority over instructions else if (MemWriteM) ProposedNextBusState = MEMWRITE; else if (InstrReadF) ProposedNextBusState = INSTRREAD; else ProposedNextBusState = IDLE; +/* -----\/----- EXCLUDED -----\/----- MMUTRANSLATE: if (~HREADY) ProposedNextBusState = MMUTRANSLATE; else ProposedNextBusState = IDLE; + -----/\----- EXCLUDED -----/\----- */ ATOMICREAD: if (~HREADY) ProposedNextBusState = ATOMICREAD; else ProposedNextBusState = ATOMICWRITE; ATOMICWRITE: if (~HREADY) ProposedNextBusState = ATOMICWRITE; @@ -140,21 +143,21 @@ module ahblite ( endcase // Determine access type (important for determining whether to fault) - assign AtomicAccessM = (ProposedNextBusState == ATOMICREAD) || (ProposedNextBusState == ATOMICWRITE); - assign ExecuteAccessF = (ProposedNextBusState == INSTRREAD); - assign WriteAccessM = (ProposedNextBusState == MEMWRITE) || (ProposedNextBusState == ATOMICWRITE); - assign ReadAccessM = (ProposedNextBusState == MEMREAD) || (ProposedNextBusState == ATOMICREAD) || - (ProposedNextBusState == MMUTRANSLATE); +// (ProposedNextBusState == MMUTRANSLATE); // The PMA and PMP checkers can decide to squash the access + // *** this probably needs to be controlled by the caches rather than EBU dh 7/2/11 assign NextBusState = (DSquashBusAccessM || ISquashBusAccessF) ? IDLE : ProposedNextBusState; // stall signals // Note that we need to extend both stalls when MMUTRANSLATE goes to idle, // since translation might not be complete. + // *** Ross Thompson remove this datastall +/* -----\/----- EXCLUDED -----\/----- assign #2 DataStall = ((NextBusState == MEMREAD) || (NextBusState == MEMWRITE) || - (NextBusState == ATOMICREAD) || (NextBusState == ATOMICWRITE) || - MMUStall); + (NextBusState == ATOMICREAD) || (NextBusState == ATOMICWRITE)); + -----/\----- EXCLUDED -----/\----- */ + //assign #1 InstrStall = ((NextBusState == INSTRREAD) || (NextBusState == INSTRREADC) || // MMUStall); @@ -163,14 +166,16 @@ module ahblite ( assign #1 GrantData = (ProposedNextBusState == MEMREAD) || (ProposedNextBusState == MEMWRITE) || (ProposedNextBusState == ATOMICREAD) || (ProposedNextBusState == ATOMICWRITE); assign #1 AccessAddress = (GrantData) ? MemPAdrM[31:0] : InstrPAdrF[31:0]; - assign #1 HADDR = (MMUTranslate) ? MMUPAdr[31:0] : AccessAddress; + //assign #1 HADDR = (MMUTranslate) ? MMUPAdr[31:0] : AccessAddress; + assign #1 HADDR = AccessAddress; generate if (`XLEN == 32) assign PTESize = 3'b010; // in rv32, PTEs are 4 bytes else assign PTESize = 3'b011; // in rv64, PTEs are 8 bytes endgenerate assign ISize = 3'b010; // 32 bit instructions for now; later improve for filling cache with full width; ignored on reads anyway assign #1 AccessSize = (GrantData) ? {1'b0, MemSizeM} : ISize; - assign #1 HSIZE = (MMUTranslate) ? PTESize : AccessSize; + //assign #1 HSIZE = (MMUTranslate) ? PTESize : AccessSize; + assign #1 HSIZE = AccessSize; assign HBURST = 3'b000; // Single burst only supported; consider generalizing for cache fillsfH assign HPROT = 4'b0011; // not used; see Section 3.7 assign HTRANS = (NextBusState != IDLE) ? 2'b10 : 2'b00; // NONSEQ if reading or writing, IDLE otherwise @@ -186,7 +191,7 @@ module ahblite ( // Route signals to Instruction and Data Caches // *** assumes AHBW = XLEN - assign MMUReady = (BusState == MMUTRANSLATE && HREADY); + //assign MMUReady = (BusState == MMUTRANSLATE && HREADY); assign InstrRData = HRDATA; assign InstrAckF = (BusState == INSTRREAD) && (NextBusState != INSTRREAD); @@ -194,15 +199,14 @@ module ahblite ( // *** Bracker 6/5/21: why is this W stage? assign MemAckW = (BusState == MEMREAD) && (NextBusState != MEMREAD) || (BusState == MEMWRITE) && (NextBusState != MEMWRITE) || ((BusState == ATOMICREAD) && (NextBusState != ATOMICREAD)) || ((BusState == ATOMICWRITE) && (NextBusState != ATOMICWRITE)); - assign MMUReadPTE = HRDATA; - assign ReadDataM = HRDATAMasked; // changed from W to M dh 2/7/2021 + //assign MMUReadPTE = HRDATA; // Carefully decide when to update ReadDataW // ReadDataMstored holds the most recent memory read. // We need to wait until the pipeline actually advances before we can update the contents of ReadDataW // (or else the W stage will accidentally get the M stage's data when the pipeline does advance). assign CaptureDataM = ((BusState == MEMREAD) && (NextBusState != MEMREAD)) || ((BusState == ATOMICREAD) && (NextBusState != ATOMICREAD)); - flopenr #(`XLEN) ReadDataNewWReg(clk, reset, CaptureDataM, ReadDataM, CapturedData); + flopenr #(`XLEN) ReadDataNewWReg(clk, reset, CaptureDataM, HRDATAMasked, CapturedHRDATAMasked); always @(posedge HCLK, negedge HRESETn) if (~HRESETn) @@ -211,11 +215,11 @@ module ahblite ( CapturedDataAvailable <= #1 (StallW) ? (CaptureDataM | CapturedDataAvailable) : 1'b0; always_comb casez({StallW && (BusState != ATOMICREAD),CapturedDataAvailable}) - 2'b00: ReadDataWnext = ReadDataM; - 2'b01: ReadDataWnext = CapturedData; - 2'b1?: ReadDataWnext = ReadDataW; + 2'b00: HRDATANext = HRDATAMasked; + 2'b01: HRDATANext = CapturedHRDATAMasked; + 2'b1?: HRDATANext = HRDATAW; endcase - flopr #(`XLEN) ReadDataOldWReg(clk, reset, ReadDataWnext, ReadDataW); + flopr #(`XLEN) ReadDataOldWReg(clk, reset, HRDATANext, HRDATAW); // Extract and sign-extend subwords if necessary subwordread swr(.*); @@ -226,7 +230,7 @@ module ahblite ( logic [`XLEN-1:0] AMOResult; // amoalu amoalu(.a(HRDATA), .b(WriteDataM), .funct(Funct7M), .width(MemSizeM), // .result(AMOResult)); - amoalu amoalu(.srca(ReadDataW), .srcb(WriteDataM), .funct(Funct7M), .width(MemSizeM), + amoalu amoalu(.srca(HRDATAW), .srcb(WriteDataM), .funct(Funct7M), .width(MemSizeM), .result(AMOResult)); mux2 #(`XLEN) wdmux(WriteDataM, AMOResult, AtomicMaskedM[1], WriteData); end else diff --git a/wally-pipelined/src/fpu/bk128.sv b/wally-pipelined/src/fpu/bk128.sv deleted file mode 100755 index a302a031..00000000 --- a/wally-pipelined/src/fpu/bk128.sv +++ /dev/null @@ -1,599 +0,0 @@ -// Brent-Kung Carry-save Prefix Adder - -module bk128 (cout, sum, a, b, cin); - - input [127:0] a, b; - input cin; - - output [127:0] sum; - output cout; - - wire [128:0] p,g,t; - wire [127:0] c; - - // pre-computation - assign p={a^b,1'b0}; - assign g={a&b, cin}; - assign t[1]=p[1]; - assign t[2]=p[2]; - assign t[3]=p[3]^g[2]; - assign t[4]=p[4]; - assign t[5]=p[5]^g[4]; - assign t[6]=p[6]; - assign t[7]=p[7]^g[6]; - assign t[8]=p[8]; - assign t[9]=p[9]^g[8]; - assign t[10]=p[10]; - assign t[11]=p[11]^g[10]; - assign t[12]=p[12]; - assign t[13]=p[13]^g[12]; - assign t[14]=p[14]; - assign t[15]=p[15]^g[14]; - assign t[16]=p[16]; - assign t[17]=p[17]^g[16]; - assign t[18]=p[18]; - assign t[19]=p[19]^g[18]; - assign t[20]=p[20]; - assign t[21]=p[21]^g[20]; - assign t[22]=p[22]; - assign t[23]=p[23]^g[22]; - assign t[24]=p[24]; - assign t[25]=p[25]^g[24]; - assign t[26]=p[26]; - assign t[27]=p[27]^g[26]; - assign t[28]=p[28]; - assign t[29]=p[29]^g[28]; - assign t[30]=p[30]; - assign t[31]=p[31]^g[30]; - assign t[32]=p[32]; - assign t[33]=p[33]^g[32]; - assign t[34]=p[34]; - assign t[35]=p[35]^g[34]; - assign t[36]=p[36]; - assign t[37]=p[37]^g[36]; - assign t[38]=p[38]; - assign t[39]=p[39]^g[38]; - assign t[40]=p[40]; - assign t[41]=p[41]^g[40]; - assign t[42]=p[42]; - assign t[43]=p[43]^g[42]; - assign t[44]=p[44]; - assign t[45]=p[45]^g[44]; - assign t[46]=p[46]; - assign t[47]=p[47]^g[46]; - assign t[48]=p[48]; - assign t[49]=p[49]^g[48]; - assign t[50]=p[50]; - assign t[51]=p[51]^g[50]; - assign t[52]=p[52]; - assign t[53]=p[53]^g[52]; - assign t[54]=p[54]; - assign t[55]=p[55]^g[54]; - assign t[56]=p[56]; - assign t[57]=p[57]^g[56]; - assign t[58]=p[58]; - assign t[59]=p[59]^g[58]; - assign t[60]=p[60]; - assign t[61]=p[61]^g[60]; - assign t[62]=p[62]; - assign t[63]=p[63]^g[62]; - assign t[64]=p[64]; - assign t[65]=p[65]^g[64]; - assign t[66]=p[66]; - assign t[67]=p[67]^g[66]; - assign t[68]=p[68]; - assign t[69]=p[69]^g[68]; - assign t[70]=p[70]; - assign t[71]=p[71]^g[70]; - assign t[72]=p[72]; - assign t[73]=p[73]^g[72]; - assign t[74]=p[74]; - assign t[75]=p[75]^g[74]; - assign t[76]=p[76]; - assign t[77]=p[77]^g[76]; - assign t[78]=p[78]; - assign t[79]=p[79]^g[78]; - assign t[80]=p[80]; - assign t[81]=p[81]^g[80]; - assign t[82]=p[82]; - assign t[83]=p[83]^g[82]; - assign t[84]=p[84]; - assign t[85]=p[85]^g[84]; - assign t[86]=p[86]; - assign t[87]=p[87]^g[86]; - assign t[88]=p[88]; - assign t[89]=p[89]^g[88]; - assign t[90]=p[90]; - assign t[91]=p[91]^g[90]; - assign t[92]=p[92]; - assign t[93]=p[93]^g[92]; - assign t[94]=p[94]; - assign t[95]=p[95]^g[94]; - assign t[96]=p[96]; - assign t[97]=p[97]^g[96]; - assign t[98]=p[98]; - assign t[99]=p[99]^g[98]; - assign t[100]=p[100]; - assign t[101]=p[101]^g[100]; - assign t[102]=p[102]; - assign t[103]=p[103]^g[102]; - assign t[104]=p[104]; - assign t[105]=p[105]^g[104]; - assign t[106]=p[106]; - assign t[107]=p[107]^g[106]; - assign t[108]=p[108]; - assign t[109]=p[109]^g[108]; - assign t[110]=p[110]; - assign t[111]=p[111]^g[110]; - assign t[112]=p[112]; - assign t[113]=p[113]^g[112]; - assign t[114]=p[114]; - assign t[115]=p[115]^g[114]; - assign t[116]=p[116]; - assign t[117]=p[117]^g[116]; - assign t[118]=p[118]; - assign t[119]=p[119]^g[118]; - assign t[120]=p[120]; - assign t[121]=p[121]^g[120]; - assign t[122]=p[122]; - assign t[123]=p[123]^g[122]; - assign t[124]=p[124]; - assign t[125]=p[125]^g[124]; - assign t[126]=p[126]; - assign t[127]=p[127]^g[126]; - assign t[128]=p[128]; - - // prefix tree - brent_kung_cs128 prefix_tree(c, p[127:0], g[127:0]); - - // post-computation - assign sum=p[128:1]^c; - assign cout=g[128]|(p[128]&c[127]); - -endmodule - -module brent_kung_cs128 (c, p, g); - - input [127:0] p; - input [127:0] g; - output [128:1] c; - - - // parallel-prefix, Brent-Kung - - // Stage 1: Generates G/P pairs that span 1 bits - grey b_1_0 (G_1_0, {g[1],g[0]}, p[1]); - black b_3_2 (G_3_2, P_3_2, {g[3],g[2]}, {p[3],p[2]}); - black b_5_4 (G_5_4, P_5_4, {g[5],g[4]}, {p[5],p[4]}); - black b_7_6 (G_7_6, P_7_6, {g[7],g[6]}, {p[7],p[6]}); - black b_9_8 (G_9_8, P_9_8, {g[9],g[8]}, {p[9],p[8]}); - black b_11_10 (G_11_10, P_11_10, {g[11],g[10]}, {p[11],p[10]}); - black b_13_12 (G_13_12, P_13_12, {g[13],g[12]}, {p[13],p[12]}); - black b_15_14 (G_15_14, P_15_14, {g[15],g[14]}, {p[15],p[14]}); - - black b_17_16 (G_17_16, P_17_16, {g[17],g[16]}, {p[17],p[16]}); - black b_19_18 (G_19_18, P_19_18, {g[19],g[18]}, {p[19],p[18]}); - black b_21_20 (G_21_20, P_21_20, {g[21],g[20]}, {p[21],p[20]}); - black b_23_22 (G_23_22, P_23_22, {g[23],g[22]}, {p[23],p[22]}); - black b_25_24 (G_25_24, P_25_24, {g[25],g[24]}, {p[25],p[24]}); - black b_27_26 (G_27_26, P_27_26, {g[27],g[26]}, {p[27],p[26]}); - black b_29_28 (G_29_28, P_29_28, {g[29],g[28]}, {p[29],p[28]}); - black b_31_30 (G_31_30, P_31_30, {g[31],g[30]}, {p[31],p[30]}); - - black b_33_32 (G_33_32, P_33_32, {g[33],g[32]}, {p[33],p[32]}); - black b_35_34 (G_35_34, P_35_34, {g[35],g[34]}, {p[35],p[34]}); - black b_37_36 (G_37_36, P_37_36, {g[37],g[36]}, {p[37],p[36]}); - black b_39_38 (G_39_38, P_39_38, {g[39],g[38]}, {p[39],p[38]}); - black b_41_40 (G_41_40, P_41_40, {g[41],g[40]}, {p[41],p[40]}); - black b_43_42 (G_43_42, P_43_42, {g[43],g[42]}, {p[43],p[42]}); - black b_45_44 (G_45_44, P_45_44, {g[45],g[44]}, {p[45],p[44]}); - black b_47_46 (G_47_46, P_47_46, {g[47],g[46]}, {p[47],p[46]}); - - black b_49_48 (G_49_48, P_49_48, {g[49],g[48]}, {p[49],p[48]}); - black b_51_50 (G_51_50, P_51_50, {g[51],g[50]}, {p[51],p[50]}); - black b_53_52 (G_53_52, P_53_52, {g[53],g[52]}, {p[53],p[52]}); - black b_55_54 (G_55_54, P_55_54, {g[55],g[54]}, {p[55],p[54]}); - black b_57_56 (G_57_56, P_57_56, {g[57],g[56]}, {p[57],p[56]}); - black b_59_58 (G_59_58, P_59_58, {g[59],g[58]}, {p[59],p[58]}); - black b_61_60 (G_61_60, P_61_60, {g[61],g[60]}, {p[61],p[60]}); - black b_63_62 (G_63_62, P_63_62, {g[63],g[62]}, {p[63],p[62]}); - - black b_65_64 (G_65_64, P_65_64, {g[65],g[64]}, {p[65],p[64]}); - black b_67_66 (G_67_66, P_67_66, {g[67],g[66]}, {p[67],p[66]}); - black b_69_68 (G_69_68, P_69_68, {g[69],g[68]}, {p[69],p[68]}); - black b_71_70 (G_71_70, P_71_70, {g[71],g[70]}, {p[71],p[70]}); - black b_73_72 (G_73_72, P_73_72, {g[73],g[72]}, {p[73],p[72]}); - black b_75_74 (G_75_74, P_75_74, {g[75],g[74]}, {p[75],p[74]}); - black b_77_76 (G_77_76, P_77_76, {g[77],g[76]}, {p[77],p[76]}); - black b_79_78 (G_79_78, P_79_78, {g[79],g[78]}, {p[79],p[78]}); - - black b_81_80 (G_81_80, P_81_80, {g[81],g[80]}, {p[81],p[80]}); - black b_83_82 (G_83_82, P_83_82, {g[83],g[82]}, {p[83],p[82]}); - black b_85_84 (G_85_84, P_85_84, {g[85],g[84]}, {p[85],p[84]}); - black b_87_86 (G_87_86, P_87_86, {g[87],g[86]}, {p[87],p[86]}); - black b_89_88 (G_89_88, P_89_88, {g[89],g[88]}, {p[89],p[88]}); - black b_91_90 (G_91_90, P_91_90, {g[91],g[90]}, {p[91],p[90]}); - black b_93_92 (G_93_92, P_93_92, {g[93],g[92]}, {p[93],p[92]}); - black b_95_94 (G_95_94, P_95_94, {g[95],g[94]}, {p[95],p[94]}); - - black b_97_96 (G_97_96, P_97_96, {g[97],g[96]}, {p[97],p[96]}); - black b_99_98 (G_99_98, P_99_98, {g[99],g[98]}, {p[99],p[98]}); - black b_101_100 (G_101_100, P_101_100, {g[101],g[100]}, {p[101],p[100]}); - black b_103_102 (G_103_102, P_103_102, {g[103],g[102]}, {p[103],p[102]}); - black b_105_104 (G_105_104, P_105_104, {g[105],g[104]}, {p[105],p[104]}); - black b_107_106 (G_107_106, P_107_106, {g[107],g[106]}, {p[107],p[106]}); - black b_109_108 (G_109_108, P_109_108, {g[109],g[108]}, {p[109],p[108]}); - black b_111_110 (G_111_110, P_111_110, {g[111],g[110]}, {p[111],p[110]}); - - black b_113_112 (G_113_112, P_113_112, {g[113],g[112]}, {p[113],p[112]}); - black b_115_114 (G_115_114, P_115_114, {g[115],g[114]}, {p[115],p[114]}); - black b_117_116 (G_117_116, P_117_116, {g[117],g[116]}, {p[117],p[116]}); - black b_119_118 (G_119_118, P_119_118, {g[119],g[118]}, {p[119],p[118]}); - black b_121_120 (G_121_120, P_121_120, {g[121],g[120]}, {p[121],p[120]}); - black b_123_122 (G_123_122, P_123_122, {g[123],g[122]}, {p[123],p[122]}); - black b_125_124 (G_125_124, P_125_124, {g[125],g[124]}, {p[125],p[124]}); - black b_127_126 (G_127_126, P_127_126, {g[127],g[126]}, {p[127],p[126]}); - - - // Stage 2: Generates G/P pairs that span 2 bits - grey g_3_0 (G_3_0, {G_3_2,G_1_0}, P_3_2); - black b_7_4 (G_7_4, P_7_4, {G_7_6,G_5_4}, {P_7_6,P_5_4}); - black b_11_8 (G_11_8, P_11_8, {G_11_10,G_9_8}, {P_11_10,P_9_8}); - black b_15_12 (G_15_12, P_15_12, {G_15_14,G_13_12}, {P_15_14,P_13_12}); - black b_19_16 (G_19_16, P_19_16, {G_19_18,G_17_16}, {P_19_18,P_17_16}); - black b_23_20 (G_23_20, P_23_20, {G_23_22,G_21_20}, {P_23_22,P_21_20}); - black b_27_24 (G_27_24, P_27_24, {G_27_26,G_25_24}, {P_27_26,P_25_24}); - black b_31_28 (G_31_28, P_31_28, {G_31_30,G_29_28}, {P_31_30,P_29_28}); - - black b_35_32 (G_35_32, P_35_32, {G_35_34,G_33_32}, {P_35_34,P_33_32}); - black b_39_36 (G_39_36, P_39_36, {G_39_38,G_37_36}, {P_39_38,P_37_36}); - black b_43_40 (G_43_40, P_43_40, {G_43_42,G_41_40}, {P_43_42,P_41_40}); - black b_47_44 (G_47_44, P_47_44, {G_47_46,G_45_44}, {P_47_46,P_45_44}); - black b_51_48 (G_51_48, P_51_48, {G_51_50,G_49_48}, {P_51_50,P_49_48}); - black b_55_52 (G_55_52, P_55_52, {G_55_54,G_53_52}, {P_55_54,P_53_52}); - black b_59_56 (G_59_56, P_59_56, {G_59_58,G_57_56}, {P_59_58,P_57_56}); - black b_63_60 (G_63_60, P_63_60, {G_63_62,G_61_60}, {P_63_62,P_61_60}); - - black b_67_64 (G_67_64, P_67_64, {G_67_66,G_65_64}, {P_67_66,P_65_64}); - black b_71_68 (G_71_68, P_71_68, {G_71_70,G_69_68}, {P_71_70,P_69_68}); - black b_75_72 (G_75_72, P_75_72, {G_75_74,G_73_72}, {P_75_74,P_73_72}); - black b_79_76 (G_79_76, P_79_76, {G_79_78,G_77_76}, {P_79_78,P_77_76}); - black b_83_80 (G_83_80, P_83_80, {G_83_82,G_81_80}, {P_83_82,P_81_80}); - black b_87_84 (G_87_84, P_87_84, {G_87_86,G_85_84}, {P_87_86,P_85_84}); - black b_91_88 (G_91_88, P_91_88, {G_91_90,G_89_88}, {P_91_90,P_89_88}); - black b_95_92 (G_95_92, P_95_92, {G_95_94,G_93_92}, {P_95_94,P_93_92}); - - black b_99_96 (G_99_96, P_99_96, {G_99_98,G_97_96}, {P_99_98,P_97_96}); - black b_103_100 (G_103_100, P_103_100, {G_103_102,G_101_100}, {P_103_102,P_101_100}); - black b_107_104 (G_107_104, P_107_104, {G_107_106,G_105_104}, {P_107_106,P_105_104}); - black b_111_108 (G_111_108, P_111_108, {G_111_110,G_109_108}, {P_111_110,P_109_108}); - black b_115_112 (G_115_112, P_115_112, {G_115_114,G_113_112}, {P_115_114,P_113_112}); - black b_119_116 (G_119_116, P_119_116, {G_119_118,G_117_116}, {P_119_118,P_117_116}); - black b_123_120 (G_123_120, P_123_120, {G_123_122,G_121_120}, {P_123_122,P_121_120}); - black b_127_124 (G_127_124, P_127_124, {G_127_126,G_125_124}, {P_127_126,P_125_124}); - - - // Stage 3: Generates G/P pairs that span 4 bits - grey g_7_0 (G_7_0, {G_7_4,G_3_0}, P_7_4); - black b_15_8 (G_15_8, P_15_8, {G_15_12,G_11_8}, {P_15_12,P_11_8}); - black b_23_16 (G_23_16, P_23_16, {G_23_20,G_19_16}, {P_23_20,P_19_16}); - black b_31_24 (G_31_24, P_31_24, {G_31_28,G_27_24}, {P_31_28,P_27_24}); - black b_39_32 (G_39_32, P_39_32, {G_39_36,G_35_32}, {P_39_36,P_35_32}); - black b_47_40 (G_47_40, P_47_40, {G_47_44,G_43_40}, {P_47_44,P_43_40}); - black b_55_48 (G_55_48, P_55_48, {G_55_52,G_51_48}, {P_55_52,P_51_48}); - black b_63_56 (G_63_56, P_63_56, {G_63_60,G_59_56}, {P_63_60,P_59_56}); - - black b_71_64 (G_71_64, P_71_64, {G_71_68,G_67_64}, {P_71_68,P_67_64}); - black b_79_72 (G_79_72, P_79_72, {G_79_76,G_75_72}, {P_79_76,P_75_72}); - black b_87_80 (G_87_80, P_87_80, {G_87_84,G_83_80}, {P_87_84,P_83_80}); - black b_95_88 (G_95_88, P_95_88, {G_95_92,G_91_88}, {P_95_92,P_91_88}); - black b_103_96 (G_103_96, P_103_96, {G_103_100,G_99_96}, {P_103_100,P_99_96}); - black b_111_104 (G_111_104, P_111_104, {G_111_108,G_107_104}, {P_111_108,P_107_104}); - black b_119_112 (G_119_112, P_119_112, {G_119_116,G_115_112}, {P_119_116,P_115_112}); - black b_127_120 (G_127_120, P_127_120, {G_127_124,G_123_120}, {P_127_124,P_123_120}); - - - // Stage 4: Generates G/P pairs that span 8 bits - grey g_15_0 (G_15_0, {G_15_8,G_7_0}, P_15_8); - black b_31_16 (G_31_16, P_31_16, {G_31_24,G_23_16}, {P_31_24,P_23_16}); - black b_47_32 (G_47_32, P_47_32, {G_47_40,G_39_32}, {P_47_40,P_39_32}); - black b_63_48 (G_63_48, P_63_48, {G_63_56,G_55_48}, {P_63_56,P_55_48}); - black b_79_64 (G_79_64, P_79_64, {G_79_72,G_71_64}, {P_79_72,P_71_64}); - black b_95_80 (G_95_80, P_95_80, {G_95_88,G_87_80}, {P_95_88,P_87_80}); - black b_111_96 (G_111_96, P_111_96, {G_111_104,G_103_96}, {P_111_104,P_103_96}); - black b_127_112 (G_127_112, P_127_112, {G_127_120,G_119_112}, {P_127_120,P_119_112}); - - - // Stage 5: Generates G/P pairs that span 16 bits - grey g_31_0 (G_31_0, {G_31_16,G_15_0}, P_31_16); - black b_63_32 (G_63_32, P_63_32, {G_63_48,G_47_32}, {P_63_48,P_47_32}); - black b_95_64 (G_95_64, P_95_64, {G_95_80,G_79_64}, {P_95_80,P_79_64}); - black b_127_96 (G_127_96, P_127_96, {G_127_112,G_111_96}, {P_127_112,P_111_96}); - - // Stage 6: Generates G/P pairs that span 32 bits - grey g_63_0 (G_63_0, {G_63_32,G_31_0}, P_63_32); - black b_127_64 (G_127_64, P_127_64, {G_127_96,G_95_64}, {P_127_96,P_95_64}); - - // Stage 7: Generates G/P pairs that span 64 bits - grey g_127_0 (G_127_0, {G_127_64,G_63_0}, P_127_64); - - // Stage 8: Generates G/P pairs that span 32 bits - grey g_95_0 (G_95_0, {G_95_64,G_63_0}, P_95_64); - - // Stage 9: Generates G/P pairs that span 16 bits - grey g_47_0 (G_47_0, {G_47_32,G_31_0}, P_47_32); - grey g_79_0 (G_79_0, {G_79_64,G_63_0}, P_79_64); - grey g_111_0 (G_111_0, {G_111_96,G_95_0}, P_111_96); - - // Stage 10: Generates G/P pairs that span 8 bits - grey g_23_0 (G_23_0, {G_23_16,G_15_0}, P_23_16); - grey g_39_0 (G_39_0, {G_39_32,G_31_0}, P_39_32); - grey g_55_0 (G_55_0, {G_55_48,G_47_0}, P_55_48); - grey g_71_0 (G_71_0, {G_71_64,G_63_0}, P_71_64); - grey g_87_0 (G_87_0, {G_87_80,G_79_0}, P_87_80); - grey g_103_0 (G_103_0, {G_103_96,G_95_0}, P_103_96); - grey g_119_0 (G_119_0, {G_119_112,G_111_0}, P_119_112); - - // Stage 11: Generates G/P pairs that span 4 bits - grey g_11_0 (G_11_0, {G_11_8,G_7_0}, P_11_8); - grey g_19_0 (G_19_0, {G_19_16,G_15_0}, P_19_16); - grey g_27_0 (G_27_0, {G_27_24,G_23_0}, P_27_24); - grey g_35_0 (G_35_0, {G_35_32,G_31_0}, P_35_32); - grey g_43_0 (G_43_0, {G_43_40,G_39_0}, P_43_40); - grey g_51_0 (G_51_0, {G_51_48,G_47_0}, P_51_48); - grey g_59_0 (G_59_0, {G_59_56,G_55_0}, P_59_56); - grey g_67_0 (G_67_0, {G_67_64,G_63_0}, P_67_64); - grey g_75_0 (G_75_0, {G_75_72,G_71_0}, P_75_72); - grey g_83_0 (G_83_0, {G_83_80,G_79_0}, P_83_80); - grey g_91_0 (G_91_0, {G_91_88,G_87_0}, P_91_88); - grey g_99_0 (G_99_0, {G_99_96,G_95_0}, P_99_96); - grey g_107_0 (G_107_0, {G_107_104,G_103_0}, P_107_104); - grey g_115_0 (G_115_0, {G_115_112,G_111_0}, P_115_112); - grey g_123_0 (G_123_0, {G_123_120,G_119_0}, P_123_120); - - // Stage 12: Generates G/P pairs that span 2 bits - grey g_5_0 (G_5_0, {G_5_4,G_3_0}, P_5_4); - grey g_9_0 (G_9_0, {G_9_8,G_7_0}, P_9_8); - grey g_13_0 (G_13_0, {G_13_12,G_11_0}, P_13_12); - grey g_17_0 (G_17_0, {G_17_16,G_15_0}, P_17_16); - grey g_21_0 (G_21_0, {G_21_20,G_19_0}, P_21_20); - grey g_25_0 (G_25_0, {G_25_24,G_23_0}, P_25_24); - grey g_29_0 (G_29_0, {G_29_28,G_27_0}, P_29_28); - grey g_33_0 (G_33_0, {G_33_32,G_31_0}, P_33_32); - grey g_37_0 (G_37_0, {G_37_36,G_35_0}, P_37_36); - grey g_41_0 (G_41_0, {G_41_40,G_39_0}, P_41_40); - grey g_45_0 (G_45_0, {G_45_44,G_43_0}, P_45_44); - grey g_49_0 (G_49_0, {G_49_48,G_47_0}, P_49_48); - grey g_53_0 (G_53_0, {G_53_52,G_51_0}, P_53_52); - grey g_57_0 (G_57_0, {G_57_56,G_55_0}, P_57_56); - grey g_61_0 (G_61_0, {G_61_60,G_59_0}, P_61_60); - grey g_65_0 (G_65_0, {G_65_64,G_63_0}, P_65_64); - grey g_69_0 (G_69_0, {G_69_68,G_67_0}, P_69_68); - grey g_73_0 (G_73_0, {G_73_72,G_71_0}, P_73_72); - grey g_77_0 (G_77_0, {G_77_76,G_75_0}, P_77_76); - grey g_81_0 (G_81_0, {G_81_80,G_79_0}, P_81_80); - grey g_85_0 (G_85_0, {G_85_84,G_83_0}, P_85_84); - grey g_89_0 (G_89_0, {G_89_88,G_87_0}, P_89_88); - grey g_93_0 (G_93_0, {G_93_92,G_91_0}, P_93_92); - grey g_97_0 (G_97_0, {G_97_96,G_95_0}, P_97_96); - grey g_101_0 (G_101_0, {G_101_100,G_99_0}, P_101_100); - grey g_105_0 (G_105_0, {G_105_104,G_103_0}, P_105_104); - grey g_109_0 (G_109_0, {G_109_108,G_107_0}, P_109_108); - grey g_113_0 (G_113_0, {G_113_112,G_111_0}, P_113_112); - grey g_117_0 (G_117_0, {G_117_116,G_115_0}, P_117_116); - grey g_121_0 (G_121_0, {G_121_120,G_119_0}, P_121_120); - grey g_125_0 (G_125_0, {G_125_124,G_123_0}, P_125_124); - - // Last grey cell stage - grey g_2_0 (G_2_0, {g[2],G_1_0}, p[2]); - grey g_4_0 (G_4_0, {g[4],G_3_0}, p[4]); - grey g_6_0 (G_6_0, {g[6],G_5_0}, p[6]); - grey g_8_0 (G_8_0, {g[8],G_7_0}, p[8]); - grey g_10_0 (G_10_0, {g[10],G_9_0}, p[10]); - grey g_12_0 (G_12_0, {g[12],G_11_0}, p[12]); - grey g_14_0 (G_14_0, {g[14],G_13_0}, p[14]); - grey g_16_0 (G_16_0, {g[16],G_15_0}, p[16]); - grey g_18_0 (G_18_0, {g[18],G_17_0}, p[18]); - grey g_20_0 (G_20_0, {g[20],G_19_0}, p[20]); - grey g_22_0 (G_22_0, {g[22],G_21_0}, p[22]); - grey g_24_0 (G_24_0, {g[24],G_23_0}, p[24]); - grey g_26_0 (G_26_0, {g[26],G_25_0}, p[26]); - grey g_28_0 (G_28_0, {g[28],G_27_0}, p[28]); - grey g_30_0 (G_30_0, {g[30],G_29_0}, p[30]); - grey g_32_0 (G_32_0, {g[32],G_31_0}, p[32]); - grey g_34_0 (G_34_0, {g[34],G_33_0}, p[34]); - grey g_36_0 (G_36_0, {g[36],G_35_0}, p[36]); - grey g_38_0 (G_38_0, {g[38],G_37_0}, p[38]); - grey g_40_0 (G_40_0, {g[40],G_39_0}, p[40]); - grey g_42_0 (G_42_0, {g[42],G_41_0}, p[42]); - grey g_44_0 (G_44_0, {g[44],G_43_0}, p[44]); - grey g_46_0 (G_46_0, {g[46],G_45_0}, p[46]); - grey g_48_0 (G_48_0, {g[48],G_47_0}, p[48]); - grey g_50_0 (G_50_0, {g[50],G_49_0}, p[50]); - grey g_52_0 (G_52_0, {g[52],G_51_0}, p[52]); - grey g_54_0 (G_54_0, {g[54],G_53_0}, p[54]); - grey g_56_0 (G_56_0, {g[56],G_55_0}, p[56]); - grey g_58_0 (G_58_0, {g[58],G_57_0}, p[58]); - grey g_60_0 (G_60_0, {g[60],G_59_0}, p[60]); - grey g_62_0 (G_62_0, {g[62],G_61_0}, p[62]); - grey g_64_0 (G_64_0, {g[64],G_63_0}, p[64]); - grey g_66_0 (G_66_0, {g[66],G_65_0}, p[66]); - grey g_68_0 (G_68_0, {g[68],G_67_0}, p[68]); - grey g_70_0 (G_70_0, {g[70],G_69_0}, p[70]); - grey g_72_0 (G_72_0, {g[72],G_71_0}, p[72]); - grey g_74_0 (G_74_0, {g[74],G_73_0}, p[74]); - grey g_76_0 (G_76_0, {g[76],G_75_0}, p[76]); - grey g_78_0 (G_78_0, {g[78],G_77_0}, p[78]); - grey g_80_0 (G_80_0, {g[80],G_79_0}, p[80]); - grey g_82_0 (G_82_0, {g[82],G_81_0}, p[82]); - grey g_84_0 (G_84_0, {g[84],G_83_0}, p[84]); - grey g_86_0 (G_86_0, {g[86],G_85_0}, p[86]); - grey g_88_0 (G_88_0, {g[88],G_87_0}, p[88]); - grey g_90_0 (G_90_0, {g[90],G_89_0}, p[90]); - grey g_92_0 (G_92_0, {g[92],G_91_0}, p[92]); - grey g_94_0 (G_94_0, {g[94],G_93_0}, p[94]); - grey g_96_0 (G_96_0, {g[96],G_95_0}, p[96]); - grey g_98_0 (G_98_0, {g[98],G_97_0}, p[98]); - grey g_100_0 (G_100_0, {g[100],G_99_0}, p[100]); - grey g_102_0 (G_102_0, {g[102],G_101_0}, p[102]); - grey g_104_0 (G_104_0, {g[104],G_103_0}, p[104]); - grey g_106_0 (G_106_0, {g[106],G_105_0}, p[106]); - grey g_108_0 (G_108_0, {g[108],G_107_0}, p[108]); - grey g_110_0 (G_110_0, {g[110],G_109_0}, p[110]); - grey g_112_0 (G_112_0, {g[112],G_111_0}, p[112]); - grey g_114_0 (G_114_0, {g[114],G_113_0}, p[114]); - grey g_116_0 (G_116_0, {g[116],G_115_0}, p[116]); - grey g_118_0 (G_118_0, {g[118],G_117_0}, p[118]); - grey g_120_0 (G_120_0, {g[120],G_119_0}, p[120]); - grey g_122_0 (G_122_0, {g[122],G_121_0}, p[122]); - grey g_124_0 (G_124_0, {g[124],G_123_0}, p[124]); - grey g_126_0 (G_126_0, {g[126],G_125_0}, p[126]); - - // Final Stage: Apply c_k+1=G_k_0 - assign c[1]=g[0]; - assign c[2]=G_1_0; - assign c[3]=G_2_0; - assign c[4]=G_3_0; - assign c[5]=G_4_0; - assign c[6]=G_5_0; - assign c[7]=G_6_0; - assign c[8]=G_7_0; - assign c[9]=G_8_0; - - assign c[10]=G_9_0; - assign c[11]=G_10_0; - assign c[12]=G_11_0; - assign c[13]=G_12_0; - assign c[14]=G_13_0; - assign c[15]=G_14_0; - assign c[16]=G_15_0; - assign c[17]=G_16_0; - - assign c[18]=G_17_0; - assign c[19]=G_18_0; - assign c[20]=G_19_0; - assign c[21]=G_20_0; - assign c[22]=G_21_0; - assign c[23]=G_22_0; - assign c[24]=G_23_0; - assign c[25]=G_24_0; - - assign c[26]=G_25_0; - assign c[27]=G_26_0; - assign c[28]=G_27_0; - assign c[29]=G_28_0; - assign c[30]=G_29_0; - assign c[31]=G_30_0; - assign c[32]=G_31_0; - assign c[33]=G_32_0; - - assign c[34]=G_33_0; - assign c[35]=G_34_0; - assign c[36]=G_35_0; - assign c[37]=G_36_0; - assign c[38]=G_37_0; - assign c[39]=G_38_0; - assign c[40]=G_39_0; - assign c[41]=G_40_0; - - assign c[42]=G_41_0; - assign c[43]=G_42_0; - assign c[44]=G_43_0; - assign c[45]=G_44_0; - assign c[46]=G_45_0; - assign c[47]=G_46_0; - assign c[48]=G_47_0; - assign c[49]=G_48_0; - - assign c[50]=G_49_0; - assign c[51]=G_50_0; - assign c[52]=G_51_0; - assign c[53]=G_52_0; - assign c[54]=G_53_0; - assign c[55]=G_54_0; - assign c[56]=G_55_0; - assign c[57]=G_56_0; - - assign c[58]=G_57_0; - assign c[59]=G_58_0; - assign c[60]=G_59_0; - assign c[61]=G_60_0; - assign c[62]=G_61_0; - assign c[63]=G_62_0; - assign c[64]=G_63_0; - assign c[65]=G_64_0; - - assign c[66]=G_65_0; - assign c[67]=G_66_0; - assign c[68]=G_67_0; - assign c[69]=G_68_0; - assign c[70]=G_69_0; - assign c[71]=G_70_0; - assign c[72]=G_71_0; - assign c[73]=G_72_0; - - assign c[74]=G_73_0; - assign c[75]=G_74_0; - assign c[76]=G_75_0; - assign c[77]=G_76_0; - assign c[78]=G_77_0; - assign c[79]=G_78_0; - assign c[80]=G_79_0; - assign c[81]=G_80_0; - - assign c[82]=G_81_0; - assign c[83]=G_82_0; - assign c[84]=G_83_0; - assign c[85]=G_84_0; - assign c[86]=G_85_0; - assign c[87]=G_86_0; - assign c[88]=G_87_0; - assign c[89]=G_88_0; - - assign c[90]=G_89_0; - assign c[91]=G_90_0; - assign c[92]=G_91_0; - assign c[93]=G_92_0; - assign c[94]=G_93_0; - assign c[95]=G_94_0; - assign c[96]=G_95_0; - assign c[97]=G_96_0; - - assign c[98]=G_97_0; - assign c[99]=G_98_0; - assign c[100]=G_99_0; - assign c[101]=G_100_0; - assign c[102]=G_101_0; - assign c[103]=G_102_0; - assign c[104]=G_103_0; - assign c[105]=G_104_0; - - assign c[106]=G_105_0; - assign c[107]=G_106_0; - assign c[108]=G_107_0; - assign c[109]=G_108_0; - assign c[110]=G_109_0; - assign c[111]=G_110_0; - assign c[112]=G_111_0; - assign c[113]=G_112_0; - - assign c[114]=G_113_0; - assign c[115]=G_114_0; - assign c[116]=G_115_0; - assign c[117]=G_116_0; - assign c[118]=G_117_0; - assign c[119]=G_118_0; - assign c[120]=G_119_0; - assign c[121]=G_120_0; - - assign c[122]=G_121_0; - assign c[123]=G_122_0; - assign c[124]=G_123_0; - assign c[125]=G_124_0; - assign c[126]=G_125_0; - assign c[127]=G_126_0; - assign c[128]=G_127_0; - -endmodule // brent_kung_cs - - diff --git a/wally-pipelined/src/fpu/bk13.sv b/wally-pipelined/src/fpu/bk13.sv deleted file mode 100755 index 84158db9..00000000 --- a/wally-pipelined/src/fpu/bk13.sv +++ /dev/null @@ -1,97 +0,0 @@ -// Brent-Kung Carry-save Prefix Adder - -module bk13 (cout, sum, a, b, cin); - input [12:0] a, b; - input cin; - output [12:0] sum; - output cout; - - wire [13:0] p,g,t; - wire [12:0] c; - -// pre-computation - assign p={a^b,1'b0}; - assign g={a&b, cin}; - assign t[1]=p[1]; - assign t[2]=p[2]; - assign t[3]=p[3]^g[2]; - assign t[4]=p[4]; - assign t[5]=p[5]^g[4]; - assign t[6]=p[6]; - assign t[7]=p[7]^g[6]; - assign t[8]=p[8]; - assign t[9]=p[9]^g[8]; - assign t[10]=p[10]; - assign t[11]=p[11]^g[10]; - assign t[12]=p[12]; - assign t[13]=p[13]; - -// prefix tree - brent_kung_cs13 prefix_tree(c, p[12:0], g[12:0]); - -// post-computation - assign sum=p[13:1]^c; - assign cout=g[13]|(p[13]&c[12]); - -endmodule - -module brent_kung_cs13 (c, p, g); - - input [13:0] p; - input [13:0] g; - output [13:1] c; - - - // parallel-prefix, Brent-Kung - - // Stage 1: Generates G/P pairs that span 1 bits - grey b_1_0 (G_1_0, {g[1],g[0]}, p[1]); - black b_3_2 (G_3_2, P_3_2, {g[3],g[2]}, {p[3],p[2]}); - black b_5_4 (G_5_4, P_5_4, {g[5],g[4]}, {p[5],p[4]}); - black b_7_6 (G_7_6, P_7_6, {g[7],g[6]}, {p[7],p[6]}); - black b_9_8 (G_9_8, P_9_8, {g[9],g[8]}, {p[9],p[8]}); - black b_11_10 (G_11_10, P_11_10, {g[11],g[10]}, {p[11],p[10]}); - black b_13_12 (G_13_12, P_13_12, {g[13],g[12]}, {p[13],p[12]}); - - // Stage 2: Generates G/P pairs that span 2 bits - grey g_3_0 (G_3_0, {G_3_2,G_1_0}, P_3_2); - black b_7_4 (G_7_4, P_7_4, {G_7_6,G_5_4}, {P_7_6,P_5_4}); - black b_11_8 (G_11_8, P_11_8, {G_11_10,G_9_8}, {P_11_10,P_9_8}); - - // Stage 3: Generates G/P pairs that span 4 bits - grey g_7_0 (G_7_0, {G_7_4,G_3_0}, P_7_4); - - // Stage 4: Generates G/P pairs that span 8 bits - - // Stage 5: Generates G/P pairs that span 4 bits - grey g_11_0 (G_11_0, {G_11_8,G_7_0}, P_11_8); - - // Stage 6: Generates G/P pairs that span 2 bits - grey g_5_0 (G_5_0, {G_5_4,G_3_0}, P_5_4); - grey g_9_0 (G_9_0, {G_9_8,G_7_0}, P_9_8); - - // Last grey cell stage - grey g_2_0 (G_2_0, {g[2],G_1_0}, p[2]); - grey g_4_0 (G_4_0, {g[4],G_3_0}, p[4]); - grey g_6_0 (G_6_0, {g[6],G_5_0}, p[6]); - grey g_8_0 (G_8_0, {g[8],G_7_0}, p[8]); - grey g_10_0 (G_10_0, {g[10],G_9_0}, p[10]); - grey g_12_0 (G_12_0, {g[12],G_11_0}, p[12]); - - // Final Stage: Apply c_k+1=G_k_0 - assign c[1]=g[0]; - assign c[2]=G_1_0; - assign c[3]=G_2_0; - assign c[4]=G_3_0; - assign c[5]=G_4_0; - assign c[6]=G_5_0; - assign c[7]=G_6_0; - assign c[8]=G_7_0; - assign c[9]=G_8_0; - - assign c[10]=G_9_0; - assign c[11]=G_10_0; - assign c[12]=G_11_0; - assign c[13]=G_12_0; - -endmodule diff --git a/wally-pipelined/src/fpu/bk14.sv b/wally-pipelined/src/fpu/bk14.sv deleted file mode 100755 index 46872167..00000000 --- a/wally-pipelined/src/fpu/bk14.sv +++ /dev/null @@ -1,86 +0,0 @@ -// Brent-Kung Prefix Adder - -module bk14 (cout, sum, a, b, cin); - input [13:0] a, b; - input cin; - output [13:0] sum; - output cout; - - wire [14:0] p,g; - wire [13:0] c; - -// pre-computation - assign p={a^b,1'b0}; - assign g={a&b, cin}; - -// prefix tree - brent_kung14 prefix_tree(c, p[13:0], g[13:0]); - -// post-computation - assign sum=p[14:1]^c; - assign cout=g[14]|(p[14]&c[13]); - -endmodule - -module brent_kung14 (c, p, g); - - input [13:0] p; - input [13:0] g; - output [14:1] c; - - - // parallel-prefix, Brent-Kung - - // Stage 1: Generates G/P pairs that span 1 bits - grey b_1_0 (G_1_0, {g[1],g[0]}, p[1]); - black b_3_2 (G_3_2, P_3_2, {g[3],g[2]}, {p[3],p[2]}); - black b_5_4 (G_5_4, P_5_4, {g[5],g[4]}, {p[5],p[4]}); - black b_7_6 (G_7_6, P_7_6, {g[7],g[6]}, {p[7],p[6]}); - black b_9_8 (G_9_8, P_9_8, {g[9],g[8]}, {p[9],p[8]}); - black b_11_10 (G_11_10, P_11_10, {g[11],g[10]}, {p[11],p[10]}); - black b_13_12 (G_13_12, P_13_12, {g[13],g[12]}, {p[13],p[12]}); - - // Stage 2: Generates G/P pairs that span 2 bits - grey g_3_0 (G_3_0, {G_3_2,G_1_0}, P_3_2); - black b_7_4 (G_7_4, P_7_4, {G_7_6,G_5_4}, {P_7_6,P_5_4}); - black b_11_8 (G_11_8, P_11_8, {G_11_10,G_9_8}, {P_11_10,P_9_8}); - - // Stage 3: Generates G/P pairs that span 4 bits - grey g_7_0 (G_7_0, {G_7_4,G_3_0}, P_7_4); - - // Stage 4: Generates G/P pairs that span 8 bits - - // Stage 5: Generates G/P pairs that span 4 bits - grey g_11_0 (G_11_0, {G_11_8,G_7_0}, P_11_8); - - // Stage 6: Generates G/P pairs that span 2 bits - grey g_5_0 (G_5_0, {G_5_4,G_3_0}, P_5_4); - grey g_9_0 (G_9_0, {G_9_8,G_7_0}, P_9_8); - grey g_13_0 (G_13_0, {G_13_12,G_11_0}, P_13_12); - - // Last grey cell stage - grey g_2_0 (G_2_0, {g[2],G_1_0}, p[2]); - grey g_4_0 (G_4_0, {g[4],G_3_0}, p[4]); - grey g_6_0 (G_6_0, {g[6],G_5_0}, p[6]); - grey g_8_0 (G_8_0, {g[8],G_7_0}, p[8]); - grey g_10_0 (G_10_0, {g[10],G_9_0}, p[10]); - grey g_12_0 (G_12_0, {g[12],G_11_0}, p[12]); - - // Final Stage: Apply c_k+1=G_k_0 - assign c[1]=g[0]; - assign c[2]=G_1_0; - assign c[3]=G_2_0; - assign c[4]=G_3_0; - assign c[5]=G_4_0; - assign c[6]=G_5_0; - assign c[7]=G_6_0; - assign c[8]=G_7_0; - assign c[9]=G_8_0; - - assign c[10]=G_9_0; - assign c[11]=G_10_0; - assign c[12]=G_11_0; - assign c[13]=G_12_0; - assign c[14]=G_13_0; - -endmodule diff --git a/wally-pipelined/src/fpu/csa.sv b/wally-pipelined/src/fpu/csa.sv deleted file mode 100644 index 1e5682cf..00000000 --- a/wally-pipelined/src/fpu/csa.sv +++ /dev/null @@ -1,70 +0,0 @@ -module ha (C, S, A, B) ; - - input A, B; - output S, C; - - assign S = A^B; - assign C = A&B; - -endmodule // HA - -// module fa (input logic a, b, c, output logic sum, carry); - -// assign sum = a^b^c; -// assign carry = a&b|a&c|b&c; - -// endmodule // fa - -// module csa #(parameter WIDTH=8) (a, b,c, sum, carry, cout); - -// input logic [WIDTH-1:0] a, b, c; - -// output logic [WIDTH-1:0] sum, carry; -// output logic cout; - -// logic [WIDTH:0] carry_temp; -// genvar i; -// generate -// for (i=0;i fp // fmv.w.x = ???0 // fmv.w.d = ???1 diff --git a/wally-pipelined/src/fpu/fpdiv.sv b/wally-pipelined/src/fpu/fdivsqrt.sv similarity index 93% rename from wally-pipelined/src/fpu/fpdiv.sv rename to wally-pipelined/src/fpu/fdivsqrt.sv index 8c305f3e..6d8da23f 100755 --- a/wally-pipelined/src/fpu/fpdiv.sv +++ b/wally-pipelined/src/fpu/fdivsqrt.sv @@ -23,7 +23,7 @@ // // `timescale 1ps/1ps -module fpdiv (FDivSqrtDoneE, FDivResultM, FDivFlagsM, DivDenormM, DivInput1E, DivInput2E, FrmE, DivOpType, FmtE, DivOvEn, DivUnEn, +module fdivsqrt (FDivSqrtDoneE, FDivResultM, FDivSqrtFlgM, DivInput1E, DivInput2E, FrmE, DivOpType, FmtE, DivOvEn, DivUnEn, FDivStartE, reset, clk, FDivBusyE, HoldInputs); input [63:0] DivInput1E; // 1st input operand (A) @@ -39,8 +39,7 @@ module fpdiv (FDivSqrtDoneE, FDivResultM, FDivFlagsM, DivDenormM, DivInput1E, Di input clk; output [63:0] FDivResultM; // Result of operation - output [4:0] FDivFlagsM; // IEEE exception flags - output DivDenormM; // DivDenormM on input or output + output [4:0] FDivSqrtFlgM; // IEEE exception flags output FDivSqrtDoneE; output FDivBusyE, HoldInputs; @@ -51,6 +50,7 @@ module fpdiv (FDivSqrtDoneE, FDivResultM, FDivFlagsM, DivDenormM, DivInput1E, Di wire [63:0] Float2; wire [63:0] IntValue; + wire DivDenormM; // DivDenormM on input or output wire [12:0] exp1, exp2, expF; wire [12:0] exp_diff, bias; wire [13:0] exp_sqrt; @@ -103,7 +103,7 @@ module fpdiv (FDivSqrtDoneE, FDivResultM, FDivFlagsM, DivDenormM, DivInput1E, Di convert_inputs_div divconv1 (Float1, Float2, DivInput1E, DivInput2E, DivOpType, FmtE); // Test for exceptions and return the "Invalid Operation" and - // "Denormalized" Input FDivFlagsM. The "sel_inv" is used in + // "Denormalized" Input FDivSqrtFlgM. The "sel_inv" is used in // the third pipeline stage to select the result. Also, op1_Norm // and op2_Norm are one if DivInput1E and DivInput2E are not zero or denormalized. // sub is one if the effective operation is subtaction. @@ -120,12 +120,12 @@ module fpdiv (FDivSqrtDoneE, FDivResultM, FDivFlagsM, DivDenormM, DivInput1E, Di // bias : DP = 2^{11-1}-1 = 1023 assign bias = {3'h0, 10'h3FF}; // Divide exponent - csa #(13) csa1 (exp1, ~exp2, bias, exp_s, exp_c); - exp_add explogic1 (exp_cout1, {open, exp_diff}, + csa #(13) csa1 (exp1, ~exp2, bias, exp_s, exp_c); //***adder + exp_add explogic1 (exp_cout1, {open, exp_diff}, //***adder? {vss, exp_s}, {vss, exp_c}, 1'b1); // Sqrt exponent (check if exponent is odd) assign exp_odd = Float1[52] ? vss : vdd; - exp_add explogic2 (exp_cout2, exp_sqrt, + exp_add explogic2 (exp_cout2, exp_sqrt, //***adder? {vss, exp1}, {4'h0, 10'h3ff}, exp_odd); // Choose correct exponent assign expF = DivOpType ? exp_sqrt[13:1] : exp_diff; @@ -156,7 +156,7 @@ module fpdiv (FDivSqrtDoneE, FDivResultM, FDivFlagsM, DivDenormM, DivInput1E, Di // Store the final result and the exception flags in registers. flopenr #(64) rega (clk, reset, FDivSqrtDoneE, Result, FDivResultM); flopenr #(1) regb (clk, reset, FDivSqrtDoneE, DenormIO, DivDenormM); - flopenr #(5) regc (clk, reset, FDivSqrtDoneE, FlagsIn, FDivFlagsM); + flopenr #(5) regc (clk, reset, FDivSqrtDoneE, FlagsIn, FDivSqrtFlgM); endmodule // fpadd diff --git a/wally-pipelined/src/fpu/fpuhazard.sv b/wally-pipelined/src/fpu/fhazard.sv similarity index 99% rename from wally-pipelined/src/fpu/fpuhazard.sv rename to wally-pipelined/src/fpu/fhazard.sv index 4d0895a7..53f7dde2 100644 --- a/wally-pipelined/src/fpu/fpuhazard.sv +++ b/wally-pipelined/src/fpu/fhazard.sv @@ -25,7 +25,7 @@ `include "wally-config.vh" -module fpuhazard( +module fhazard( input logic [4:0] Adr1E, Adr2E, Adr3E, input logic FWriteEnM, FWriteEnW, input logic [4:0] RdM, RdW, diff --git a/wally-pipelined/src/fpu/fma2.sv b/wally-pipelined/src/fpu/fma2.sv index 131f9839..518b7a76 100644 --- a/wally-pipelined/src/fpu/fma2.sv +++ b/wally-pipelined/src/fpu/fma2.sv @@ -16,8 +16,8 @@ module fma2( input logic XZeroM, YZeroM, ZZeroM, // inputs are zero input logic XInfM, YInfM, ZInfM, // inputs are infinity input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN - output logic [63:0] FmaResultM, // FMA final result - output logic [4:0] FmaFlagsM); // FMA flags {invalid, divide by zero, overflow, underflow, inexact} + output logic [63:0] FMAResM, // FMA final result + output logic [4:0] FMAFlgM); // FMA flags {invalid, divide by zero, overflow, underflow, inexact} @@ -57,7 +57,7 @@ module fma2( logic [12:0] MaxExp; // maximum value of the exponent logic [12:0] FracLen; // length of the fraction logic SigNaN; // is an input a signaling NaN - logic UnderflowFlag; // Underflow singal used in FmaFlagsM (used to avoid a circular depencency) + logic UnderflowFlag; // Underflow singal used in FMAFlgM (used to avoid a circular depencency) logic [63:0] XNaNResult, YNaNResult, ZNaNResult, InvalidResult, OverflowResult, KillProdResult, UnderflowResult; // possible results @@ -316,7 +316,7 @@ module fma2( // Combine flags // - FMA can't set the Divide by zero flag // - Don't set the underflow flag if the result was rounded up to a normal number - assign FmaFlagsM = {Invalid, 1'b0, Overflow, UnderflowFlag, Inexact}; + assign FMAFlgM = {Invalid, 1'b0, Overflow, UnderflowFlag, Inexact}; @@ -337,7 +337,7 @@ module fma2( assign InvalidResult = FmtM ? {ResultSgn, 11'h7ff, 1'b1, 51'b0} : {ResultSgn, 8'hff, 1'b1, 54'b0}; assign KillProdResult = FmtM ?{ResultSgn, Addend[62:0] - {62'b0, (Minus1&AddendStickyM)}} + {62'b0, (Plus1&AddendStickyM)} : {ResultSgn, Addend[62:32] - {30'b0, (Minus1&AddendStickyM)} + {30'b0, (Plus1&AddendStickyM)}, 32'b0}; assign UnderflowResult = FmtM ? {ResultSgn, 63'b0} + {63'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))} : {{ResultSgn, 31'b0} + {31'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))}, 32'b0}; - assign FmaResultM = XNaNM ? XNaNResult : + assign FMAResM = XNaNM ? XNaNResult : YNaNM ? YNaNResult : ZNaNM ? ZNaNResult : Invalid ? InvalidResult : // has to be before inf diff --git a/wally-pipelined/src/fpu/fpadd_denorm.sv b/wally-pipelined/src/fpu/fpadd_denorm.sv index eabfcd3a..43de3087 100755 --- a/wally-pipelined/src/fpu/fpadd_denorm.sv +++ b/wally-pipelined/src/fpu/fpadd_denorm.sv @@ -229,11 +229,11 @@ module fpadd (AS_Result, Flags, Denorm, op1, op2, rm, op_type, P, OvEn, UnEn); assign corr_sign = ~op_type[2]&~op_type[1]&op_type[0]&swap; // 64-bit Mantissa Adder/Subtractor - cla64 add1 (sum, mantissaA3, mantissaB3, sub); + cla64 add1 (sum, mantissaA3, mantissaB3, sub); //***adder // 64-bit Mantissa Subtractor - to get the two's complement of the // result when the sign from the adder/subtractor is negative. - cla_sub64 sub1 (sum_tc, mantissaB3, mantissaA3); + cla_sub64 sub1 (sum_tc, mantissaB3, mantissaA3); //***adder // Determine the correct sign of the result assign sign_corr = ((corr_sign ^ signA) & ~convert) ^ sum[63]; diff --git a/wally-pipelined/src/fpu/fpu.sv b/wally-pipelined/src/fpu/fpu.sv index 5c15268e..ff29dfd7 100755 --- a/wally-pipelined/src/fpu/fpu.sv +++ b/wally-pipelined/src/fpu/fpu.sv @@ -34,7 +34,7 @@ module fpu ( input logic [`XLEN-1:0] SrcAM, // Integer input being written into fpreg input logic StallE, StallM, StallW, input logic FlushE, FlushM, FlushW, - output logic FStallD, // Stall the decode stage if Div/Sqrt instruction + output logic FStallD, // Stall the decode stage output logic FWriteIntE, FWriteIntM, FWriteIntW, // Write integer register enable output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory output logic [`XLEN-1:0] FIntResM, @@ -42,48 +42,38 @@ module fpu ( output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction output logic [4:0] SetFflagsM, // FPU flags output logic [`XLEN-1:0] FPUResultW); // FPU result - +// *** change FMA to do 16 - 32 - 64 - 128 FEXPBITS // control logic signal instantiation logic FWriteEnD, FWriteEnE, FWriteEnM, FWriteEnW; // FP register write enable - logic [2:0] FrmD, FrmE, FrmM, FrmW; // FP rounding mode + logic [2:0] FrmD, FrmE, FrmM; // FP rounding mode logic FmtD, FmtE, FmtM, FmtW; // FP precision 0-single 1-double logic FDivStartD, FDivStartE; // Start division logic FWriteIntD; // Write to integer register - logic FOutputInput2D, FOutputInput2E; // Put Input2 in Input1 if a store instruction - logic [1:0] FMemRWD; // Read and write enable for memory - logic [1:0] ForwardXD, ForwardXE; // Input1 forwarding mux control signal - logic [1:0] ForwardYD, ForwardYE; // Input2 forwarding mux control signal - logic [1:0] ForwardZD, ForwardZE; // Input3 forwarding mux control signal - logic SrcYUsedD; // Is input 2 used - logic SrcZUsedD; // Is input 3 used + logic [1:0] ForwardXE, ForwardYE, ForwardZE; // Input3 forwarding mux control signal logic [2:0] FResultSelD, FResultSelE, FResultSelM, FResultSelW; // Select FP result - logic [3:0] FOpCtrlD, FOpCtrlE, FOpCtrlM, FOpCtrlW; // Select which opperation to do in each component - logic [1:0] FResSelD, FResSelE, FResSelM; - logic [1:0] FIntResSelD, FIntResSelE, FIntResSelM; + logic [3:0] FOpCtrlD, FOpCtrlE, FOpCtrlM; // Select which opperation to do in each component + logic [1:0] FResSelD, FResSelE, FResSelM; + logic [1:0] FIntResSelD, FIntResSelE, FIntResSelM; logic [4:0] Adr1E, Adr2E, Adr3E; // regfile signals logic [4:0] RdE, RdM, RdW; // what adress to write to // ***Can take from ieu insted of pipelining - logic [63:0] FWDM; // Write data for FP register logic [63:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage logic [63:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage - logic [63:0] SrcXE, SrcXM, SrcXW; // Input 1 to the various units (after forwarding) logic [`XLEN-1:0] SrcXMAligned; - logic [63:0] SrcYE, SrcYM, SrcYW; // Input 2 to the various units (after forwarding) + logic [63:0] SrcXE, SrcXM; // Input 1 to the various units (after forwarding) + logic [63:0] SrcYE, SrcYM; // Input 2 to the various units (after forwarding) logic [63:0] SrcZE, SrcZM; // Input 3 to the various units (after forwarding) - logic [63:0] FLoadResultW, FLoadStoreResultM, FLoadStoreResultW; // Result for load, store, and move to int-reg instructions // div/sqrt signals - logic DivDenormE, DivDenormM, DivDenormW; - logic DivOvEn, DivUnEn; - logic [63:0] FDivResultE, FDivResultM, FDivResultW; - logic [4:0] FDivFlagsE, FDivFlagsM, FDivFlagsW; - logic FDivSqrtDoneE, FDivSqrtDoneM; + logic [63:0] FDivResultM, FDivResultW; + logic [4:0] FDivSqrtFlgM, FDivSqrtFlgW; + logic FDivSqrtDoneE; logic [63:0] DivInput1E, DivInput2E; logic HoldInputs; // keep forwarded inputs arround durring division // FMA signals - logic [105:0] ProdManE, ProdManM; + logic [105:0] ProdManE, ProdManM; ///*** put pipline stages in units logic [161:0] AlignedAddendE, AlignedAddendM; logic [12:0] ProdExpE, ProdExpM; logic AddendStickyE, AddendStickyM; @@ -91,93 +81,112 @@ module fpu ( logic XZeroE, YZeroE, ZZeroE, XZeroM, YZeroM, ZZeroM; logic XInfE, YInfE, ZInfE, XInfM, YInfM, ZInfM; logic XNaNE, YNaNE, ZNaNE, XNaNM, YNaNM, ZNaNM; - logic [63:0] FmaResultM, FmaResultW; - logic [4:0] FmaFlagsM, FmaFlagsW; + logic [63:0] FMAResM, FMAResW; + logic [4:0] FMAFlgM, FMAFlgW; // add/cvt signals - logic [63:0] AddSumE, AddSumTcE; - logic [3:0] AddSelInvE; - logic [10:0] AddExpPostSumE; - logic AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE; - logic AddDenormInE, AddSwapE, AddNormOvflowE, AddSignAE; - logic AddConvertE; - logic [63:0] AddFloat1E, AddFloat2E; - logic [11:0] AddExp1DenormE, AddExp2DenormE; - logic [10:0] AddExponentE; - logic [2:0] AddRmE; - logic [3:0] AddOpTypeE; - logic AddPE, AddOvEnE, AddUnEnE; - logic AddDenormM; - logic [63:0] AddSumM, AddSumTcM; - logic [3:0] AddSelInvM; - logic [10:0] AddExpPostSumM; - logic AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM; - logic AddDenormInM, AddSwapM, AddNormOvflowM, AddSignAM; - logic AddConvertM, AddSignM; - logic [63:0] AddFloat1M, AddFloat2M; - logic [11:0] AddExp1DenormM, AddExp2DenormM; - logic [10:0] AddExponentM; - logic [63:0] AddOp1M, AddOp2M; - logic [2:0] AddRmM; - logic [3:0] AddOpTypeM; - logic AddPM, AddOvEnM, AddUnEnM; - logic [63:0] FAddResultM, FAddResultW; - logic [4:0] FAddFlagsM, FAddFlagsW; + logic [63:0] AddSumE, AddSumM; + logic [63:0] AddSumTcE, AddSumTcM; + logic [3:0] AddSelInvE, AddSelInvM; + logic [10:0] AddExpPostSumE,AddExpPostSumM; + logic AddCorrSignE, AddCorrSignM; + logic AddOp1NormE, AddOp1NormM; + logic AddOp2NormE, AddOp2NormM; + logic AddOpANormE, AddOpANormM; + logic AddOpBNormE, AddOpBNormM; + logic AddInvalidE, AddInvalidM; + logic AddDenormInE, AddDenormInM; + logic AddSwapE, AddSwapM; + logic AddNormOvflowE, AddNormOvflowM; //***this isn't used in addcvt2 + logic AddSignAE, AddSignAM; + logic AddConvertE, AddConvertM; + logic [63:0] AddFloat1E, AddFloat2E, AddFloat1M, AddFloat2M; + logic [11:0] AddExp1DenormE, AddExp2DenormE, AddExp1DenormM, AddExp2DenormM; + logic [10:0] AddExponentE, AddExponentM; + logic [63:0] FAddResM, FAddResW; + logic [4:0] FAddFlgM, FAddFlgW; // cmp signals - logic CmpInvalidE, CmpInvalidM, CmpInvalidW; - logic [63:0] FCmpResultE, FCmpResultM, FCmpResultW; + logic CmpNVE, CmpNVM, CmpNVW; + logic [63:0] CmpResE, CmpResM, CmpResW; // fsgn signals - logic [63:0] SgnResultE, SgnResultM, SgnResultW; - logic [4:0] SgnFlagsE, SgnFlagsM, SgnFlagsW; + logic [63:0] SgnResE, SgnResM; + logic SgnNVE, SgnNVM, SgnNVW; logic [63:0] FResM, FResW; - logic FFlgM, FFlgW; + logic FFlgM, FFlgW; // instantiation of W stage regfile signals - logic [63:0] AlignedSrcAM, ForwardSrcAM, SrcAW; + logic [63:0] AlignedSrcAM; // classify signals - logic [63:0] ClassResultE, ClassResultM, ClassResultW; + logic [63:0] ClassResE, ClassResM; // 64-bit FPU result - logic [63:0] FPUResult64W, FPUResult64E; + logic [63:0] FPUResult64W; logic [4:0] FPUFlagsW; + + + + + + + //DECODE STAGE // top-level controller for FPU - fctrl ctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .*); + fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), + .FRM_REGW, .IllegalFPUInstrD, .FWriteEnD, .FDivStartD, .FResultSelD, .FOpCtrlD, .FResSelD, + .FIntResSelD, .FmtD, .FrmD, .FWriteIntD); // regfile instantiation - FPregfile fpregfile (clk, reset, FWriteEnW, + fregfile fregfile (clk, reset, FWriteEnW, InstrD[19:15], InstrD[24:20], InstrD[31:27], RdW, FPUResult64W, FRD1D, FRD2D, FRD3D); + + + + + + + + //***************** - // fpregfile D/E pipe registers + // D/E pipe registers //***************** flopenrc #(64) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E); flopenrc #(64) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E); flopenrc #(64) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E); - - //***************** - // other D/E pipe registers - //***************** - flopenrc #(1) CtrlRegE1(clk, reset, FlushE, ~StallE, FDivStartD, FDivStartE); - flopenrc #(15) CtrlRegE2(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]}, + flopenrc #(1) DECtrlRegE1(clk, reset, FlushE, ~StallE, FDivStartD, FDivStartE); + flopenrc #(15) DECtrlRegE2(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]}, {Adr1E, Adr2E, Adr3E}); - flopenrc #(22) DECtrlReg(clk, reset, FlushE, ~StallE, + flopenrc #(22) DECtrlReg3(clk, reset, FlushE, ~StallE, {FWriteEnD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, InstrD[11:7], FOpCtrlD, FWriteIntD}, {FWriteEnE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, RdE, FOpCtrlE, FWriteIntE}); + + + + + + + + + + + + + //EXECUTION STAGE // Hazard unit for FPU - fpuhazard hazard(.*); + fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FWriteEnM, .FWriteEnW, .RdM, .RdW, .FResultSelM, .FStallD, + .ForwardXE, .ForwardYE, .ForwardZE); // forwarding muxs mux3 #(64) fxemux(FRD1E, FPUResult64W, FResM, ForwardXE, SrcXE); @@ -186,7 +195,9 @@ module fpu ( // first of two-stage instance of floating-point fused multiply-add unit - fma1 fma1 (.X(SrcXE), .Y(SrcYE), .Z(SrcZE), .FOpCtrlE(FOpCtrlE[2:0]),.*); + fma1 fma1 (.X(SrcXE), .Y(SrcYE), .Z(SrcZE), .FOpCtrlE(FOpCtrlE[2:0]), .FmtE, .ProdManE, .AlignedAddendE, + .ProdExpE, .AddendStickyE, .KillProdE, .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE, + .XNaNE, .YNaNE, .ZNaNE ); // first and only instance of floating-point divider logic fpdivClk; @@ -204,174 +215,140 @@ module fpu ( .en(~HoldInputs), .clear(FDivSqrtDoneE), .reset(reset), .clk(clk)); - fpdiv fpdivsqrt (.DivOpType(FOpCtrlE[0]), .clk(fpdivClk), .FmtE(~FmtE), .*); + fdivsqrt fdivsqrt (.DivOpType(FOpCtrlE[0]), .clk(fpdivClk), .FmtE(~FmtE), .DivInput1E, .DivInput2E, + .FrmE, .DivOvEn(1'b1), .DivUnEn(1'b1), .FDivStartE, .FDivResultM, .FDivSqrtFlgM, + .FDivSqrtDoneE, .FDivBusyE, .HoldInputs, .reset); // first of two-stage instance of floating-point add/cvt unit - fpuaddcvt1 fpadd1 (.*); + fpuaddcvt1 fpadd1 (.SrcXE, .SrcYE, .FOpCtrlE, .FmtE, .AddFloat1E, .AddFloat2E, .AddExponentE, + .AddExpPostSumE, .AddExp1DenormE, .AddExp2DenormE, .AddSumE, .AddSumTcE, .AddSelInvE, + .AddCorrSignE, .AddSignAE, .AddOp1NormE, .AddOp2NormE, .AddOpANormE, .AddOpBNormE, .AddInvalidE, + .AddDenormInE, .AddConvertE, .AddSwapE, .AddNormOvflowE); - // first of two-stage instance of floating-point comparator - fpucmp1 fpcmp1 (SrcXE, SrcYE, FOpCtrlE[2:0], FmtE, CmpInvalidE, FCmpResultE); + // first and only instance of floating-point comparator + fcmp fcmp (SrcXE, SrcYE, FOpCtrlE[2:0], FmtE, CmpNVE, CmpResE); // first and only instance of floating-point sign converter - fpusgn fpsgn (.SgnOpCodeE(FOpCtrlE[1:0]),.*); + fsgn fsgn (.SgnOpCodeE(FOpCtrlE[1:0]), .SrcXE, .SrcYE, .SgnResE, .SgnNVE); // first and only instance of floating-point classify unit - fpuclassify fpuclass (.*); + fclassify fclassify (.SrcXE, .FmtE, .ClassResE); // output for store instructions assign FWriteDataE = FmtE ? SrcYE[63:64-`XLEN] : {{`XLEN-32{1'b0}}, SrcYE[63:32]}; - + //***swap to mux + + + + + + + + + + //***************** - //fpregfile D/E pipe registers + // E/M pipe registers //***************** flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, SrcXE, SrcXM); flopenrc #(64) EMFpReg2(clk, reset, FlushM, ~StallM, SrcYE, SrcYM); flopenrc #(64) EMFpReg3(clk, reset, FlushM, ~StallM, SrcZE, SrcZM); - //***************** - // fma E/M pipe registers - //***************** - flopenrc #(106) EMRegFma3(clk, reset, FlushM, ~StallM, ProdManE, ProdManM); - flopenrc #(162) EMRegFma4(clk, reset, FlushM, ~StallM, AlignedAddendE, AlignedAddendM); - flopenrc #(13) EMRegFma6(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM); - flopenrc #(1) EMRegFma7(clk, reset, FlushM, ~StallM, AddendStickyE, AddendStickyM); - flopenrc #(1) EMRegFma8(clk, reset, FlushM, ~StallM, KillProdE, KillProdM); - flopenrc #(1) EMRegFma10(clk, reset, FlushM, ~StallM, XZeroE, XZeroM); - flopenrc #(1) EMRegFma11(clk, reset, FlushM, ~StallM, YZeroE, YZeroM); - flopenrc #(1) EMRegFma12(clk, reset, FlushM, ~StallM, ZZeroE, ZZeroM); - flopenrc #(1) EMRegFma16(clk, reset, FlushM, ~StallM, XInfE, XInfM); - flopenrc #(1) EMRegFma17(clk, reset, FlushM, ~StallM, YInfE, YInfM); - flopenrc #(1) EMRegFma18(clk, reset, FlushM, ~StallM, ZInfE, ZInfM); - flopenrc #(1) EMRegFma19(clk, reset, FlushM, ~StallM, XNaNE, XNaNM); - flopenrc #(1) EMRegFma20(clk, reset, FlushM, ~StallM, YNaNE, YNaNM); - flopenrc #(1) EMRegFma21(clk, reset, FlushM, ~StallM, ZNaNE, ZNaNM); + flopenrc #(106) EMRegFma1(clk, reset, FlushM, ~StallM, ProdManE, ProdManM); + flopenrc #(162) EMRegFma2(clk, reset, FlushM, ~StallM, AlignedAddendE, AlignedAddendM); + flopenrc #(13) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM); + flopenrc #(11) EMRegFma4(clk, reset, FlushM, ~StallM, + {AddendStickyE, KillProdE, XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE}, + {AddendStickyM, KillProdM, XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM}); - //***************** - // fpadd E/M pipe registers - //***************** flopenrc #(64) EMRegAdd1(clk, reset, FlushM, ~StallM, AddSumE, AddSumM); flopenrc #(64) EMRegAdd2(clk, reset, FlushM, ~StallM, AddSumTcE, AddSumTcM); - flopenrc #(4) EMRegAdd3(clk, reset, FlushM, ~StallM, AddSelInvE, AddSelInvM); - flopenrc #(11) EMRegAdd4(clk, reset, FlushM, ~StallM, AddExpPostSumE, AddExpPostSumM); - flopenrc #(1) EMRegAdd5(clk, reset, FlushM, ~StallM, AddCorrSignE, AddCorrSignM); - flopenrc #(1) EMRegAdd6(clk, reset, FlushM, ~StallM, AddOp1NormE, AddOp1NormM); - flopenrc #(1) EMRegAdd7(clk, reset, FlushM, ~StallM, AddOp2NormE, AddOp2NormM); - flopenrc #(1) EMRegAdd8(clk, reset, FlushM, ~StallM, AddOpANormE, AddOpANormM); - flopenrc #(1) EMRegAdd9(clk, reset, FlushM, ~StallM, AddOpBNormE, AddOpBNormM); - flopenrc #(1) EMRegAdd10(clk, reset, FlushM, ~StallM, AddInvalidE, AddInvalidM); - flopenrc #(1) EMRegAdd11(clk, reset, FlushM, ~StallM, AddDenormInE, AddDenormInM); - flopenrc #(1) EMRegAdd12(clk, reset, FlushM, ~StallM, AddConvertE, AddConvertM); - flopenrc #(1) EMRegAdd13(clk, reset, FlushM, ~StallM, AddSwapE, AddSwapM); - flopenrc #(1) EMRegAdd14(clk, reset, FlushM, ~StallM, AddNormOvflowE, AddNormOvflowM); - flopenrc #(1) EMRegAdd15(clk, reset, FlushM, ~StallM, AddSignAE, AddSignAM); - flopenrc #(64) EMRegAdd16(clk, reset, FlushM, ~StallM, AddFloat1E, AddFloat1M); - flopenrc #(64) EMRegAdd17(clk, reset, FlushM, ~StallM, AddFloat2E, AddFloat2M); - flopenrc #(12) EMRegAdd18(clk, reset, FlushM, ~StallM, AddExp1DenormE, AddExp1DenormM); - flopenrc #(12) EMRegAdd19(clk, reset, FlushM, ~StallM, AddExp2DenormE, AddExp2DenormM); - flopenrc #(11) EMRegAdd20(clk, reset, FlushM, ~StallM, AddExponentE, AddExponentM); - flopenrc #(3) EMRegAdd23(clk, reset, FlushM, ~StallM, AddRmE, AddRmM); - flopenrc #(4) EMRegAdd24(clk, reset, FlushM, ~StallM, AddOpTypeE, AddOpTypeM); - flopenrc #(1) EMRegAdd25(clk, reset, FlushM, ~StallM, AddPE, AddPM); - flopenrc #(1) EMRegAdd26(clk, reset, FlushM, ~StallM, AddOvEnE, AddOvEnM); - flopenrc #(1) EMRegAdd27(clk, reset, FlushM, ~StallM, AddUnEnE, AddUnEnM); + flopenrc #(11) EMRegAdd3(clk, reset, FlushM, ~StallM, AddExpPostSumE, AddExpPostSumM); + flopenrc #(64) EMRegAdd4(clk, reset, FlushM, ~StallM, AddFloat1E, AddFloat1M); + flopenrc #(64) EMRegAdd5(clk, reset, FlushM, ~StallM, AddFloat2E, AddFloat2M); + flopenrc #(12) EMRegAdd6(clk, reset, FlushM, ~StallM, AddExp1DenormE, AddExp1DenormM); + flopenrc #(12) EMRegAdd7(clk, reset, FlushM, ~StallM, AddExp2DenormE, AddExp2DenormM); + flopenrc #(11) EMRegAdd8(clk, reset, FlushM, ~StallM, AddExponentE, AddExponentM); + flopenrc #(15) EMRegAdd9(clk, reset, FlushM, ~StallM, + {AddSelInvE, AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE, AddDenormInE, AddConvertE, AddSwapE, AddNormOvflowE, AddSignAE}, + {AddSelInvM, AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM, AddDenormInM, AddConvertM, AddSwapM, AddNormOvflowM, AddSignAM}); + + flopenrc #(1) EMRegCmp1(clk, reset, FlushM, ~StallM, CmpNVE, CmpNVM); + flopenrc #(64) EMRegCmp2(clk, reset, FlushM, ~StallM, CmpResE, CmpResM); - //***************** - // fpcmp E/M pipe registers - //***************** - flopenrc #(1) EMRegCmp1(clk, reset, FlushM, ~StallM, CmpInvalidE, CmpInvalidM); - flopenrc #(64) EMRegCmp3(clk, reset, FlushM, ~StallM, FCmpResultE, FCmpResultM); + flopenrc #(64) EMRegSgn1(clk, reset, FlushM, ~StallM, SgnResE, SgnResM); + flopenrc #(1) EMRegSgn2(clk, reset, FlushM, ~StallM, SgnNVE, SgnNVM); - //***************** - // fpsgn E/M pipe registers - //***************** - flopenrc #(64) EMRegSgn2(clk, reset, FlushM, ~StallM, SgnResultE, SgnResultM); - flopenrc #(5) EMRegSgn3(clk, reset, FlushM, ~StallM, SgnFlagsE, SgnFlagsM); - - //***************** - // other E/M pipe registers - //***************** flopenrc #(22) EMCtrlReg(clk, reset, FlushM, ~StallM, {FWriteEnE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, RdE, FOpCtrlE, FWriteIntE}, {FWriteEnM, FResultSelM, FResSelM, FIntResSelM, FrmM, FmtM, RdM, FOpCtrlM, FWriteIntM}); + + flopenrc #(64) EMRegClass(clk, reset, FlushM, ~StallM, ClassResE, ClassResM); - //***************** - // fpuclassify E/M pipe registers - //***************** - flopenrc #(64) EMRegClass(clk, reset, FlushM, ~StallM, ClassResultE, ClassResultM); - + + + + + + + //BEGIN MEMORY STAGE - mux3 #(64) FResMux(AlignedSrcAM, SgnResultM, FCmpResultM, FResSelM, FResM); - assign FFlgM = CmpInvalidM & FResSelM[1]; + mux3 #(64) FResMux(AlignedSrcAM, SgnResM, CmpResM, FResSelM, FResM); + mux3 #(1) FFlgMux(1'b0, SgnNVM, CmpNVM, FResSelM, FFlgM); + //***change to mux assign SrcXMAligned = FmtM ? SrcXM[63:64-`XLEN] : {{`XLEN-32{1'b0}}, SrcXM[63:32]}; - mux3 #(`XLEN) IntResMux(FCmpResultM[`XLEN-1:0], SrcXMAligned, ClassResultM[`XLEN-1:0], FIntResSelM, FIntResM); + mux3 #(`XLEN) IntResMux(CmpResM[`XLEN-1:0], SrcXMAligned, ClassResM[`XLEN-1:0], FIntResSelM, FIntResM); // second instance of two-stage FMA unit - fma2 fma2(.X(SrcXM), .Y(SrcYM), .Z(SrcZM), .FOpCtrlM(FOpCtrlM[2:0]), .*); + fma2 fma2(.X(SrcXM), .Y(SrcYM), .Z(SrcZM), .FOpCtrlM(FOpCtrlM[2:0]), .FrmM, .FmtM, + .ProdManM, .AlignedAddendM, .ProdExpM, .AddendStickyM, .KillProdM, + .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .ZInfM, .XNaNM, .YNaNM, .ZNaNM, + .FMAResM, .FMAFlgM); // second instance of two-stage floating-point add/cvt unit - fpuaddcvt2 fpadd2 (.*); + fpuaddcvt2 fpadd2 (.FrmM, .FOpCtrlM, .FmtM, .AddSumM, .AddSumTcM, .AddFloat1M, .AddFloat2M, + .AddExp1DenormM, .AddExp2DenormM, .AddExponentM, .AddExpPostSumM, .AddSelInvM, + .AddOp1NormM, .AddOp2NormM, .AddOpANormM, .AddOpBNormM, .AddInvalidM, .AddDenormInM, + .AddSignAM, .AddCorrSignM, .AddConvertM, .AddSwapM, .FAddResM, .FAddFlgM); // Align SrcA to MSB when single precicion mux2 #(64) SrcAMux({SrcAM[31:0], 32'b0}, {{64-`XLEN{1'b0}}, SrcAM}, FmtM, AlignedSrcAM); - + + + + + + + //***************** - //fpregfile M/W pipe registers + // M/W pipe registers //***************** - flopenrc #(64) MWFpReg1(clk, reset, FlushW, ~StallW, SrcXM, SrcXW); - flopenrc #(64) MWFpReg2(clk, reset, FlushW, ~StallW, SrcYM, SrcYW); + flopenrc #(64) MWRegFma1(clk, reset, FlushW, ~StallW, FMAResM, FMAResW); + flopenrc #(5) MWRegFma2(clk, reset, FlushW, ~StallW, FMAFlgM, FMAFlgW); - //***************** - // fma M/W pipe registers - //***************** - flopenrc #(64) MWRegFma1(clk, reset, FlushW, ~StallW, FmaResultM, FmaResultW); - flopenrc #(5) MWRegFma2(clk, reset, FlushW, ~StallW, FmaFlagsM, FmaFlagsW); - - //***************** - // fpdiv M/W pipe registers - //***************** flopenrc #(64) MWRegDiv1(clk, reset, FlushW, ~StallW, FDivResultM, FDivResultW); - flopenrc #(5) MWRegDiv2(clk, reset, FlushW, ~StallW, FDivFlagsM, FDivFlagsW); - flopenrc #(1) MWRegDiv3(clk, reset, FlushW, ~StallW, DivDenormM, DivDenormW); + flopenrc #(5) MWRegDiv2(clk, reset, FlushW, ~StallW, FDivSqrtFlgM, FDivSqrtFlgW); - //***************** - // fpadd M/W pipe registers - //***************** - flopenrc #(64) MWRegAdd1(clk, reset, FlushW, ~StallW, FAddResultM, FAddResultW); - flopenrc #(5) MWRegAdd2(clk, reset, FlushW, ~StallW, FAddFlagsM, FAddFlagsW); + flopenrc #(64) MWRegAdd1(clk, reset, FlushW, ~StallW, FAddResM, FAddResW); + flopenrc #(5) MWRegAdd2(clk, reset, FlushW, ~StallW, FAddFlgM, FAddFlgW); - //***************** - // fpcmp M/W pipe registers - //***************** - flopenrc #(1) MWRegCmp1(clk, reset, FlushW, ~StallW, CmpInvalidM, CmpInvalidW); - // flopenrc #(2) MWRegCmp2(clk, reset, FlushW, ~StallW, CmpFCCM, CmpFCCW); - flopenrc #(64) MWRegCmp3(clk, reset, FlushW, ~StallW, FCmpResultM, FCmpResultW); + flopenrc #(1) MWRegCmp1(clk, reset, FlushW, ~StallW, CmpNVM, CmpNVW); + flopenrc #(64) MWRegCmp3(clk, reset, FlushW, ~StallW, CmpResM, CmpResW); + + flopenrc #(64) MWRegClass2(clk, reset, FlushW, ~StallW, FResM, FResW); + flopenrc #(1) MWRegClass1(clk, reset, FlushW, ~StallW, FFlgM, FFlgW); - //***************** - // fpsgn M/W pipe registers - //***************** - flopenrc #(64) MWRegSgn1(clk, reset, FlushW, ~StallW, SgnResultM, SgnResultW); - flopenrc #(5) MWRegSgn2(clk, reset, FlushW, ~StallW, SgnFlagsM, SgnFlagsW); - - //***************** - // other M/W pipe registers - //***************** flopenrc #(11) MWCtrlReg(clk, reset, FlushW, ~StallW, {FWriteEnM, FResultSelM, RdM, FmtM, FWriteIntM}, {FWriteEnW, FResultSelW, RdW, FmtW, FWriteIntW}); - //***************** - // fpuclassify M/W pipe registers - //***************** - flopenrc #(64) MWRegClass(clk, reset, FlushW, ~StallW, ClassResultM, ClassResultW); - flopenrc #(64) MWRegClass2(clk, reset, FlushW, ~StallW, FResM, FResW); - flopenrc #(1) MWRegClass1(clk, reset, FlushW, ~StallW, FFlgM, FFlgW); @@ -385,13 +362,13 @@ module fpu ( - +//***turn into muxs always_comb begin case (FResultSelW) 3'b000 : FPUFlagsW = 5'b0; - 3'b001 : FPUFlagsW = FmaFlagsW; - 3'b010 : FPUFlagsW = FAddFlagsW; - 3'b011 : FPUFlagsW = FDivFlagsW; + 3'b001 : FPUFlagsW = FMAFlgW; + 3'b010 : FPUFlagsW = FAddFlgW; + 3'b011 : FPUFlagsW = FDivSqrtFlgW; 3'b100 : FPUFlagsW = {4'b0,FFlgW}; default : FPUFlagsW = 5'bxxxxx; endcase @@ -400,8 +377,8 @@ module fpu ( always_comb begin case (FResultSelW) 3'b000 : FPUResult64W = FmtW ? {ReadDataW, {64-`XLEN{1'b0}}} : {ReadDataW[31:0], 32'b0}; - 3'b001 : FPUResult64W = FmaResultW; - 3'b010 : FPUResult64W = FAddResultW; + 3'b001 : FPUResult64W = FMAResW; + 3'b010 : FPUResult64W = FAddResW; 3'b011 : FPUResult64W = FDivResultW; 3'b100 : FPUResult64W = FResW; default : FPUResult64W = 64'bxxxxx; @@ -415,7 +392,9 @@ module fpu ( // define offsets for LSB zero extension or truncation always_comb begin // zero extension +//***turn into mux FPUResultW = FmtW ? FPUResult64W[63:64-`XLEN] : {{`XLEN-32{1'b0}}, FPUResult64W[63:32]}; + //*** put into mem stage SetFflagsM = FPUFlagsW; end diff --git a/wally-pipelined/src/fpu/fpuaddcvt1.sv b/wally-pipelined/src/fpu/fpuaddcvt1.sv index 8f045dcd..1b86b198 100755 --- a/wally-pipelined/src/fpu/fpuaddcvt1.sv +++ b/wally-pipelined/src/fpu/fpuaddcvt1.sv @@ -183,11 +183,11 @@ module fpuaddcvt1 (AddSumE, AddSumTcE, AddSelInvE, AddExpPostSumE, AddCorrSignE, assign AddCorrSignE = ~FOpCtrlE[2]&~FOpCtrlE[1]&FOpCtrlE[0]&AddSwapE; // 64-bit Mantissa Adder/Subtractor - cla64 add1 (AddSumE, mantissaA3, mantissaB3, sub); + cla64 add1 (AddSumE, mantissaA3, mantissaB3, sub); //***adder // 64-bit Mantissa Subtractor - to get the two's complement of the // result when the sign from the adder/subtractor is negative. - cla_sub64 sub1 (AddSumTcE, mantissaB3, mantissaA3); + cla_sub64 sub1 (AddSumTcE, mantissaB3, mantissaA3); //***adder // Finds normal underflow result to determine whether to round final exponent down //***KEP used to be (AddSumE == 16'h0) I am unsure what it's supposed to be diff --git a/wally-pipelined/src/fpu/fpuaddcvt2.sv b/wally-pipelined/src/fpu/fpuaddcvt2.sv index 46eac200..1fe8ac65 100755 --- a/wally-pipelined/src/fpu/fpuaddcvt2.sv +++ b/wally-pipelined/src/fpu/fpuaddcvt2.sv @@ -27,7 +27,7 @@ // -module fpuaddcvt2 (FAddResultM, FAddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddSelInvM, AddExpPostSumM, AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM, AddDenormInM, AddConvertM, AddSwapM, AddSignAM, AddFloat1M, AddFloat2M, AddExp1DenormM, AddExp2DenormM, AddExponentM, FrmM, FOpCtrlM, FmtM); +module fpuaddcvt2 (FAddResM, FAddFlgM, AddSumM, AddSumTcM, AddSelInvM, AddExpPostSumM, AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM, AddDenormInM, AddConvertM, AddSwapM, AddSignAM, AddFloat1M, AddFloat2M, AddExp1DenormM, AddExp2DenormM, AddExponentM, FrmM, FOpCtrlM, FmtM); input [2:0] FrmM; // Rounding mode - specify values input [3:0] FOpCtrlM; // Function opcode @@ -51,9 +51,9 @@ module fpuaddcvt2 (FAddResultM, FAddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddS input AddSwapM; // input AddNormOvflowM; - output [63:0] FAddResultM; // Result of operation - output [4:0] FAddFlagsM; // IEEE exception flags - output AddDenormM; // AddDenormM on input or output + output [63:0] FAddResM; // Result of operation + output [4:0] FAddFlgM; // IEEE exception flags + wire AddDenormM; // AddDenormM on input or output wire P; assign P = ~FmtM | FOpCtrlM[2]; @@ -145,7 +145,7 @@ module fpuaddcvt2 (FAddResultM, FAddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddS // exactly where the rounding point is. The rounding units also // handles special cases and set the exception flags. - // Changed DenormIO -> AddDenormM and FlagsIn -> FAddFlagsM in order to + // Changed DenormIO -> AddDenormM and FlagsIn -> FAddFlgM in order to // help in processor reservation station detection of load/stores. In // other words, the processor would like to know ahead of time that // if the result is an exception then don't load or store. @@ -155,8 +155,8 @@ module fpuaddcvt2 (FAddResultM, FAddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddS AddNormOvflowM, normal_underflow, AddSwapM, FOpCtrlM, AddSumM); // Store the final result and the exception flags in registers. - assign FAddResultM = Result; - assign {AddDenormM, FAddFlagsM} = {DenormIO, FlagsIn}; + assign FAddResM = Result; + assign {AddDenormM, FAddFlgM} = {DenormIO, FlagsIn}; endmodule // fpadd diff --git a/wally-pipelined/src/fpu/fpuclassify.sv b/wally-pipelined/src/fpu/fpuclassify.sv deleted file mode 100644 index b320b2f0..00000000 --- a/wally-pipelined/src/fpu/fpuclassify.sv +++ /dev/null @@ -1,50 +0,0 @@ - -`include "wally-config.vh" - -module fpuclassify ( - input logic [63:0] SrcXE, - input logic FmtE, // 0-single 1-double - output logic [63:0] ClassResultE - ); - - logic [31:0] single; - logic [63:0] double; - logic sign; - logic infinity, NaN, zero, normal, subnormal; - logic ExpNotZero, ExpOnes, ManNotZero, ExpZero, ManZero, FirstBitMan; - - // single and double precision layouts - assign single = SrcXE[63:32]; - assign double = SrcXE; - assign sign = SrcXE[63]; - - // basic calculations for readabillity - assign ExpNotZero = FmtE ? |double[62:52] : |single[30:23]; - assign ExpZero = ~ExpNotZero; - assign ExpOnes = FmtE ? &double[62:52] : &single[30:23]; - assign ManNotZero = FmtE ? |double[51:0] : |single[22:0]; - assign ManZero = ~ManNotZero; - assign FirstBitMan = FmtE ? double[51] : single[22]; - - // determine the type of number - assign NaN = ExpOnes & ManNotZero; - assign infinity = ExpOnes & ManZero; - assign zero = ExpZero & ManZero; - assign subnormal= ExpZero & ManNotZero; - assign normal = ExpNotZero; - - // determine sub category and combine into the result - // bit 0 - -infinity - // bit 1 - -normal - // bit 2 - -subnormal - // bit 3 - -zero - // bit 4 - +zero - // bit 5 - +subnormal - // bit 6 - +normal - // bit 7 - +infinity - // bit 8 - signaling NaN - // bit 9 - quiet NaN - assign ClassResultE = {{54{1'b0}}, FirstBitMan&NaN, ~FirstBitMan&NaN, ~sign&infinity, ~sign&normal, - ~sign&subnormal, ~sign&zero, sign&zero, sign&subnormal, sign&normal, sign&infinity}; - -endmodule diff --git a/wally-pipelined/src/fpu/fpucmp2.sv b/wally-pipelined/src/fpu/fpucmp2.sv deleted file mode 100755 index ee14afb9..00000000 --- a/wally-pipelined/src/fpu/fpucmp2.sv +++ /dev/null @@ -1,243 +0,0 @@ -// // -// // File name : fpcomp.v -// // Title : Floating-Point Comparator -// // project : FPU -// // Library : fpcomp -// // Author(s) : James E. Stine -// // Purpose : definition of main unit to floating-point comparator -// // notes : -// // -// // Copyright Oklahoma State University -// // -// // Floating Point Comparator (Algorithm) -// // -// // 1.) Performs sign-extension if the inputs are 32-bit integers. -// // 2.) Perform a magnitude comparison on the lower 63 bits of the inputs -// // 3.) Check for special cases (+0=-0, unordered, and infinite values) -// // and correct for sign bits -// // -// // This module takes 64-bits inputs op1 and op2, VSS, and VDD -// // signals, and a 2-bit signal Sel that indicates the type of -// // operands being compared as indicated below. -// // Sel Description -// // 00 double precision numbers -// // 01 single precision numbers -// // 10 half precision numbers -// // 11 (unused) -// // -// // The comparator produces a 2-bit signal FCC, which -// // indicates the result of the comparison: -// // -// // fcc decscription -// // 00 A = B -// // 01 A < B -// // 10 A > B -// // 11 A and B are unordered (i.e., A or B is NaN) -// // -// // It also produces an invalid operation flag, which is one -// // if either of the input operands is a signaling NaN per 754 - -// module fpucmp2 ( -// input logic [63:0] op1, -// input logic [63:0] op2, -// input logic [1:0] Sel, -// input logic [7:0] w, x, -// input logic ANaN, BNaN, -// input logic Azero, Bzero, -// input logic [3:0] FOpCtrlM, -// input logic FmtM, - -// output logic Invalid, // Invalid Operation -// output logic [1:0] FCC, // Condition Codes -// output logic [63:0] FCmpResultM); - -// logic LT; // magnitude op1 < magnitude op2 -// logic EQ; // magnitude op1 = magnitude op2 - -// // Perform magnitude comparison between the 63 least signficant bits -// // of the input operands. Only LT and EQ are returned, since GT can -// // be determined from these values. -// magcompare64b_2 magcomp2 (LT, EQ, w, x); - -// // Determine final values based on output of magnitude comparison, -// // sign bits, and special case testing. -// exception_cmp_2 exc2 (.invalid(Invalid), .fcc(FCC), .LT_mag(LT), .EQ_mag(EQ), .ANaN(ANaN), .BNaN(BNaN), .Azero(Azero), .Bzero(Bzero), .Sel(Sel), .A(op1), .B(op2), .*); - - -// endmodule // fpcomp - -// /*module magcompare2b (LT, GT, A, B); - -// input logic [1:0] A; -// input logic [1:0] B; - -// output logic LT; -// output logic GT; - -// // Determine if A < B using a minimized sum-of-products expression -// assign LT = ~A[1]&B[1] | ~A[1]&~A[0]&B[0] | ~A[0]&B[1]&B[0]; -// // Determine if A > B using a minimized sum-of-products expression -// assign GT = A[1]&~B[1] | A[1]&A[0]&~B[0] | A[0]&~B[1]&~B[0]; - -// endmodule*/ // magcompare2b - -// // 2-bit magnitude comparator -// // This module compares two 2-bit values A and B. LT is '1' if A < B -// // and GT is '1'if A > B. LT and GT are both '0' if A = B. However, -// // this version actually incorporates don't cares into the equation to -// // simplify the optimization - -// // module magcompare2c (LT, GT, A, B); - -// // input logic [1:0] A; -// // input logic [1:0] B; - -// // output logic LT; -// // output logic GT; - -// // assign LT = B[1] | (!A[1]&B[0]); -// // assign GT = A[1] | (!B[1]&A[0]); - -// // endmodule // magcompare2b - -// // This module compares two 64-bit values A and B. LT is '1' if A < B -// // and EQ is '1'if A = B. LT and GT are both '0' if A > B. -// // This structure was modified so -// // that it only does a strict magnitdude comparison, and only -// // returns flags for less than (LT) and eqaual to (EQ). It uses a tree -// // of 63 2-bit magnitude comparators, followed by one OR gates. -// // -// // J. E. Stine and M. J. Schulte, "A combined two's complement and -// // floating-point comparator," 2005 IEEE International Symposium on -// // Circuits and Systems, Kobe, 2005, pp. 89-92 Vol. 1. -// // doi: 10.1109/ISCAS.2005.1464531 - -// module magcompare64b_2 (LT, EQ, w, x); - -// input logic [7:0] w; -// input logic [7:0] x; -// logic [3:0] y; -// logic [3:0] z; -// logic [1:0] a; -// logic [1:0] b; -// logic GT; - -// output logic LT; -// output logic EQ; - -// magcompare2c mag39(y[0], z[0], x[1:0], w[1:0]); -// magcompare2c mag3A(y[1], z[1], x[3:2], w[3:2]); -// magcompare2c mag3B(y[2], z[2], x[5:4], w[5:4]); -// magcompare2c mag3C(y[3], z[3], x[7:6], w[7:6]); - -// magcompare2c mag3D(a[0], b[0], z[1:0], y[1:0]); -// magcompare2c mag3E(a[1], b[1], z[3:2], y[3:2]); - -// magcompare2c mag3F(LT, GT, b[1:0], a[1:0]); - -// assign EQ = ~(LT | GT); - -// endmodule // magcompare64b - -// // This module takes 64-bits inputs A and B, two magnitude comparison -// // flags LT_mag and EQ_mag, and a 2-bit signal Sel that indicates the type of -// // operands being compared as indicated below. -// // Sel Description -// // 00 double precision numbers -// // 01 single precision numbers -// // 10 half precision numbers -// // 11 bfloat precision numbers -// // -// // The comparator produces a 2-bit signal fcc, which -// // indicates the result of the comparison as follows: -// // fcc decscription -// // 00 A = B -// // 01 A < B -// // 10 A > B -// // 11 A and B are unordered (i.e., A or B is NaN) -// // It also produces a invalid operation flag, which is one -// // if either of the input operands is a signaling NaN. - -// module exception_cmp_2 ( -// input logic [63:0] A, -// input logic [63:0] B, -// input logic FmtM, -// input logic LT_mag, -// input logic EQ_mag, -// input logic [1:0] Sel, -// input logic [3:0] FOpCtrlM, - -// output logic invalid, -// output logic [1:0] fcc, -// output logic [63:0] FCmpResultM, - -// input logic Azero, -// input logic Bzero, -// input logic ANaN, -// input logic BNaN); - -// logic dp; -// logic sp; -// logic hp; -// logic ASNaN; -// logic BSNaN; -// logic UO; -// logic GT; -// logic LT; -// logic EQ; -// logic [62:0] sixtythreezeros = 63'h0; - -// assign dp = !Sel[1]&!Sel[0]; -// assign sp = !Sel[1]&Sel[0]; -// assign hp = Sel[1]&!Sel[0]; - -// // Values are unordered if ((A is NaN) OR (B is NaN)) AND (a floating -// // point comparison is being performed. -// assign UO = (ANaN | BNaN); - -// // Test if A or B is a signaling NaN. -// assign ASNaN = ANaN & (sp&~A[53] | dp&~A[50] | hp&~A[56]); -// assign BSNaN = BNaN & (sp&~B[53] | dp&~B[50] | hp&~B[56]); - -// // If either A or B is a signaling NaN the "Invalid Operation" -// // exception flag is set to one; otherwise it is zero. -// assign invalid = (ASNaN | BSNaN); - -// // A and B are equal if (their magnitudes are equal) AND ((their signs are -// // equal) or (their magnitudes are zero AND they are floating point -// // numbers)). Also, A and B are not equal if they are unordered. -// assign EQ = (EQ_mag | (Azero&Bzero)) & (~UO); - -// // A is less than B if (A is negative and B is posiive) OR -// // (A and B are positive and the magnitude of A is less than -// // the magnitude of B) or (A and B are negative integers and -// // the magnitude of A is less than the magnitude of B) or -// // (A and B are negative floating point numbers and -// // the magnitude of A is greater than the magnitude of B). -// // Also, A is not less than B if A and B are equal or unordered. -// assign LT = ((~LT_mag & A[63] & B[63]) | -// (LT_mag & ~(A[63] & B[63])))&~EQ&~UO; - -// // A is greater than B when LT, EQ, and UO are are false. -// assign GT = ~(LT | EQ | UO); - -// // Note: it may be possible to optimize the setting of fcc -// // a little more, but it is probably not worth the effort. - -// // Set the bits of fcc based on LT, GT, EQ, and UO -// assign fcc[0] = LT | UO; -// assign fcc[1] = GT | UO; - -// always_comb begin -// case (FOpCtrlM[2:0]) -// 3'b111: FCmpResultM = LT ? A : B;//min -// 3'b101: FCmpResultM = GT ? A : B;//max -// 3'b010: FCmpResultM = FmtM ? {63'b0, EQ} : {31'b0, EQ, 32'b0};//equal -// 3'b001: FCmpResultM = FmtM ? {63'b0, LT} : {31'b0, LT, 32'b0};//less than -// 3'b011: FCmpResultM = FmtM ? {63'b0, LT|EQ} : {31'b0, LT|EQ, 32'b0};//less than or equal -// default: FCmpResultM = 64'b0; -// endcase -// end - - -// endmodule // exception_cmp diff --git a/wally-pipelined/src/fpu/freg.sv b/wally-pipelined/src/fpu/freg.sv deleted file mode 100755 index b7e16713..00000000 --- a/wally-pipelined/src/fpu/freg.sv +++ /dev/null @@ -1,515 +0,0 @@ - -`include "wally-config.vh" -// `include "../../config/rv64icfd/wally-config.vh" //debug - -module freg1adr ( - input logic FmtW, - input logic reset, - input logic clear, - input logic clk, - input logic [4:0] rd, - input logic write, - input logic [4:0] adr1, - input logic [`XLEN-1:0] writeData, - output logic [`XLEN-1:0] readData); - - //note - not word aligning based on precision of - //operation (FmtW) - - //reg number should remain static, but it doesn't hurt - //to parameterize - parameter numRegs = 32; - - //intermediary signals - useful for debugging - //and easy instatiation of generated modules - logic [`XLEN-1:0] [numRegs-1:0] regInput; - logic [`XLEN-1:0] [numRegs-1:0] regOutput; - - //generate fp registers themselves - genvar i; - generate - for (i = 0; i < numRegs; i = i + 1) begin:register - - floprc #(`XLEN) freg[i](.clk(clk), .reset(reset), .clear(clear), .d(regInput[i][`XLEN-1:0]), .q(regOutput[i][`XLEN-1:0])); - end - - endgenerate - - //this could be done with: - // - //assign readData = regOutput[adr1]; - // - //but always_comb allows for finer control - - - //address decoder - //only 1 for this fp register set - //used with fpsign - //defaults to outputting zeroes - always_comb begin - case(adr1) - 5'b00000 : readData = regOutput[0]; - 5'b00001 : readData = regOutput[1]; - 5'b00010 : readData = regOutput[2]; - 5'b00011 : readData = regOutput[3]; - 5'b00100 : readData = regOutput[4]; - 5'b00101 : readData = regOutput[5]; - 5'b00110 : readData = regOutput[6]; - 5'b00111 : readData = regOutput[7]; - 5'b01000 : readData = regOutput[8]; - 5'b01001 : readData = regOutput[9]; - 5'b01010 : readData = regOutput[10]; - 5'b01011 : readData = regOutput[11]; - 5'b01100 : readData = regOutput[12]; - 5'b01101 : readData = regOutput[13]; - 5'b01110 : readData = regOutput[14]; - 5'b01111 : readData = regOutput[15]; - 5'b10000 : readData = regOutput[16]; - 5'b10001 : readData = regOutput[17]; - 5'b10010 : readData = regOutput[18]; - 5'b10011 : readData = regOutput[19]; - 5'b10100 : readData = regOutput[20]; - 5'b10101 : readData = regOutput[21]; - 5'b10110 : readData = regOutput[22]; - 5'b10111 : readData = regOutput[23]; - 5'b11000 : readData = regOutput[24]; - 5'b11001 : readData = regOutput[25]; - 5'b11010 : readData = regOutput[26]; - 5'b11011 : readData = regOutput[27]; - 5'b11100 : readData = regOutput[28]; - 5'b11101 : readData = regOutput[29]; - 5'b11110 : readData = regOutput[30]; - 5'b11111 : readData = regOutput[31]; - default : readData = `XLEN'h0; - endcase - end - - //destination register decoder - //only change input values on write - //defaults to undefined with invalid address - // - //note - this is an intermediary signal, so - //this is not asynch assignment. FF in flopr - //will not update data until clk pulse - always_comb begin - if(write) begin - case(rd) - 5'b00000 : regInput[0] = writeData; - 5'b00001 : regInput[1] = writeData; - 5'b00010 : regInput[2] = writeData; - 5'b00011 : regInput[3] = writeData; - 5'b00100 : regInput[4] = writeData; - 5'b00101 : regInput[5] = writeData; - 5'b00110 : regInput[6] = writeData; - 5'b00111 : regInput[7] = writeData; - 5'b01000 : regInput[8] = writeData; - 5'b01000 : regInput[9] = writeData; - 5'b01001 : regInput[10] = writeData; - 5'b01010 : regInput[11] = writeData; - 5'b01111 : regInput[12] = writeData; - 5'b01101 : regInput[13] = writeData; - 5'b01110 : regInput[14] = writeData; - 5'b01111 : regInput[15] = writeData; - 5'b10000 : regInput[16] = writeData; - 5'b10001 : regInput[17] = writeData; - 5'b10010 : regInput[18] = writeData; - 5'b10011 : regInput[19] = writeData; - 5'b10100 : regInput[20] = writeData; - 5'b10101 : regInput[21] = writeData; - 5'b10110 : regInput[22] = writeData; - 5'b10111 : regInput[23] = writeData; - 5'b11000 : regInput[24] = writeData; - 5'b11000 : regInput[25] = writeData; - 5'b11001 : regInput[26] = writeData; - 5'b11010 : regInput[27] = writeData; - 5'b11111 : regInput[28] = writeData; - 5'b11101 : regInput[29] = writeData; - 5'b11110 : regInput[30] = writeData; - 5'b11111 : regInput[31] = writeData; - default : regInput[0] = `XLEN'hx; - endcase - end - end - -endmodule - -////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//******** -//formatting separation -//******** -////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -module freg2adr ( - input logic FmtW, - input logic reset, - input logic clear, - input logic clk, - input logic [4:0] rd, - input logic write, - input logic [4:0] adr1, - input logic [4:0] adr2, - input logic [`XLEN-1:0] writeData, - output logic [`XLEN-1:0] readData1, - output logic [`XLEN-1:0] readData2); - - //note - not word aligning based on precision of - //operation (FmtW) - - //reg number should remain static, but it doesn't hurt - //to parameterize - parameter numRegs = 32; - - //intermediary signals - useful for debugging - //and easy instatiation of generated modules - logic [`XLEN-1:0] [numRegs-1:0] regInput; - logic [`XLEN-1:0] [numRegs-1:0] regOutput; - - //generate fp registers themselves - genvar i; - generate - for (i = 0; i < numRegs; i = i + 1) begin:register - - floprc #(`XLEN) freg[i](.clk(clk), .reset(reset), .clear(clear), .d(regInput[i][`XLEN-1:0]), .q(regOutput[i][`XLEN-1:0])); - end - - endgenerate - - //address decoder - //2 are used for this fp register set - //used with fpadd/cvt, fpdiv/sqrt, and fpcmp - //defaults to outputting zeroes - always_comb begin - - //adderss 1 decoder - case(adr1) - 5'b00000 : readData1 = regOutput[0]; - 5'b00001 : readData1 = regOutput[1]; - 5'b00010 : readData1 = regOutput[2]; - 5'b00011 : readData1 = regOutput[3]; - 5'b00100 : readData1 = regOutput[4]; - 5'b00101 : readData1 = regOutput[5]; - 5'b00110 : readData1 = regOutput[6]; - 5'b00111 : readData1 = regOutput[7]; - 5'b01000 : readData1 = regOutput[8]; - 5'b01001 : readData1 = regOutput[9]; - 5'b01010 : readData1 = regOutput[10]; - 5'b01011 : readData1 = regOutput[11]; - 5'b01100 : readData1 = regOutput[12]; - 5'b01101 : readData1 = regOutput[13]; - 5'b01110 : readData1 = regOutput[14]; - 5'b01111 : readData1 = regOutput[15]; - 5'b10000 : readData1 = regOutput[16]; - 5'b10001 : readData1 = regOutput[17]; - 5'b10010 : readData1 = regOutput[18]; - 5'b10011 : readData1 = regOutput[19]; - 5'b10100 : readData1 = regOutput[20]; - 5'b10101 : readData1 = regOutput[21]; - 5'b10110 : readData1 = regOutput[22]; - 5'b10111 : readData1 = regOutput[23]; - 5'b11000 : readData1 = regOutput[24]; - 5'b11001 : readData1 = regOutput[25]; - 5'b11010 : readData1 = regOutput[26]; - 5'b11011 : readData1 = regOutput[27]; - 5'b11100 : readData1 = regOutput[28]; - 5'b11101 : readData1 = regOutput[29]; - 5'b11110 : readData1 = regOutput[30]; - 5'b11111 : readData1 = regOutput[31]; - default : readData1 = `XLEN'h0; - endcase - - //address 2 decoder - case(adr2) - 5'b00000 : readData2 = regOutput[0]; - 5'b00001 : readData2 = regOutput[1]; - 5'b00010 : readData2 = regOutput[2]; - 5'b00011 : readData2 = regOutput[3]; - 5'b00100 : readData2 = regOutput[4]; - 5'b00101 : readData2 = regOutput[5]; - 5'b00110 : readData2 = regOutput[6]; - 5'b00111 : readData2 = regOutput[7]; - 5'b01000 : readData2 = regOutput[8]; - 5'b01001 : readData2 = regOutput[9]; - 5'b01010 : readData2 = regOutput[10]; - 5'b01011 : readData2 = regOutput[11]; - 5'b01100 : readData2 = regOutput[12]; - 5'b01101 : readData2 = regOutput[13]; - 5'b01110 : readData2 = regOutput[14]; - 5'b01111 : readData2 = regOutput[15]; - 5'b10000 : readData2 = regOutput[16]; - 5'b10001 : readData2 = regOutput[17]; - 5'b10010 : readData2 = regOutput[18]; - 5'b10011 : readData2 = regOutput[19]; - 5'b10100 : readData2 = regOutput[20]; - 5'b10101 : readData2 = regOutput[21]; - 5'b10110 : readData2 = regOutput[22]; - 5'b10111 : readData2 = regOutput[23]; - 5'b11000 : readData2 = regOutput[24]; - 5'b11001 : readData2 = regOutput[25]; - 5'b11010 : readData2 = regOutput[26]; - 5'b11011 : readData2 = regOutput[27]; - 5'b11100 : readData2 = regOutput[28]; - 5'b11101 : readData2 = regOutput[29]; - 5'b11110 : readData2 = regOutput[30]; - 5'b11111 : readData2 = regOutput[31]; - default : readData2 = `XLEN'h0; - endcase - end - - //destination register decoder - //only change input values on write - //defaults to undefined with invalid address - // - //note - this is an intermediary signal, so - //this is not asynch assignment. FF in flopr - //will not update data until clk pulse - always_comb begin - if(write) begin - case(rd) - 5'b00000 : regInput[0] = writeData; - 5'b00001 : regInput[1] = writeData; - 5'b00010 : regInput[2] = writeData; - 5'b00011 : regInput[3] = writeData; - 5'b00100 : regInput[4] = writeData; - 5'b00101 : regInput[5] = writeData; - 5'b00110 : regInput[6] = writeData; - 5'b00111 : regInput[7] = writeData; - 5'b01000 : regInput[8] = writeData; - 5'b01000 : regInput[9] = writeData; - 5'b01001 : regInput[10] = writeData; - 5'b01010 : regInput[11] = writeData; - 5'b01111 : regInput[12] = writeData; - 5'b01101 : regInput[13] = writeData; - 5'b01110 : regInput[14] = writeData; - 5'b01111 : regInput[15] = writeData; - 5'b10000 : regInput[16] = writeData; - 5'b10001 : regInput[17] = writeData; - 5'b10010 : regInput[18] = writeData; - 5'b10011 : regInput[19] = writeData; - 5'b10100 : regInput[20] = writeData; - 5'b10101 : regInput[21] = writeData; - 5'b10110 : regInput[22] = writeData; - 5'b10111 : regInput[23] = writeData; - 5'b11000 : regInput[24] = writeData; - 5'b11000 : regInput[25] = writeData; - 5'b11001 : regInput[26] = writeData; - 5'b11010 : regInput[27] = writeData; - 5'b11111 : regInput[28] = writeData; - 5'b11101 : regInput[29] = writeData; - 5'b11110 : regInput[30] = writeData; - 5'b11111 : regInput[31] = writeData; - default : regInput[0] = `XLEN'hx; - endcase - end - end - -endmodule - -////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//******** -//formatting separation -//******** -///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -module freg3adr ( - input logic FmtW, - input logic reset, - input logic clear, - input logic clk, - input logic [4:0] rd, - input logic write, - input logic [4:0] adr1, - input logic [4:0] adr2, - input logic [4:0] adr3, - input logic [`XLEN-1:0] writeData, - output logic [`XLEN-1:0] readData1, - output logic [`XLEN-1:0] readData2, - output logic [`XLEN-1:0] readData3); - - //note - not word aligning based on precision of - //operation (FmtW) - - //reg number should remain static, but it doesn't hurt - //to parameterize - parameter numRegs = 32; - - //intermediary signals - useful for debugging - //and easy instatiation of generated modules - logic [numRegs-1:0] [`XLEN-1:0] regInput; - logic [numRegs-1:0] [`XLEN-1:0] regOutput; - - //generate fp registers themselves - genvar i; - generate - for (i = 0; i < numRegs; i = i + 1) begin:register - - floprc #(`XLEN) freg(.clk(clk), .reset(reset), .clear(clear), .d(regInput[i][`XLEN-1:0]), .q(regOutput[i][`XLEN-1:0])); - end - - endgenerate - - //address decoder - //3 are used for this fp register set - //used exclusively for fma - //defaults to outputting zeroes - always_comb begin - - //adderss 1 decoder - case(adr1) - 5'b00000 : readData1 = regOutput[0]; - 5'b00001 : readData1 = regOutput[1]; - 5'b00010 : readData1 = regOutput[2]; - 5'b00011 : readData1 = regOutput[3]; - 5'b00100 : readData1 = regOutput[4]; - 5'b00101 : readData1 = regOutput[5]; - 5'b00110 : readData1 = regOutput[6]; - 5'b00111 : readData1 = regOutput[7]; - 5'b01000 : readData1 = regOutput[8]; - 5'b01001 : readData1 = regOutput[9]; - 5'b01010 : readData1 = regOutput[10]; - 5'b01011 : readData1 = regOutput[11]; - 5'b01100 : readData1 = regOutput[12]; - 5'b01101 : readData1 = regOutput[13]; - 5'b01110 : readData1 = regOutput[14]; - 5'b01111 : readData1 = regOutput[15]; - 5'b10000 : readData1 = regOutput[16]; - 5'b10001 : readData1 = regOutput[17]; - 5'b10010 : readData1 = regOutput[18]; - 5'b10011 : readData1 = regOutput[19]; - 5'b10100 : readData1 = regOutput[20]; - 5'b10101 : readData1 = regOutput[21]; - 5'b10110 : readData1 = regOutput[22]; - 5'b10111 : readData1 = regOutput[23]; - 5'b11000 : readData1 = regOutput[24]; - 5'b11001 : readData1 = regOutput[25]; - 5'b11010 : readData1 = regOutput[26]; - 5'b11011 : readData1 = regOutput[27]; - 5'b11100 : readData1 = regOutput[28]; - 5'b11101 : readData1 = regOutput[29]; - 5'b11110 : readData1 = regOutput[30]; - 5'b11111 : readData1 = regOutput[31]; - default : readData1 = `XLEN'h0; - endcase - - //address 2 decoder - case(adr2) - 5'b00000 : readData2 = regOutput[0]; - 5'b00001 : readData2 = regOutput[1]; - 5'b00010 : readData2 = regOutput[2]; - 5'b00011 : readData2 = regOutput[3]; - 5'b00100 : readData2 = regOutput[4]; - 5'b00101 : readData2 = regOutput[5]; - 5'b00110 : readData2 = regOutput[6]; - 5'b00111 : readData2 = regOutput[7]; - 5'b01000 : readData2 = regOutput[8]; - 5'b01001 : readData2 = regOutput[9]; - 5'b01010 : readData2 = regOutput[10]; - 5'b01011 : readData2 = regOutput[11]; - 5'b01100 : readData2 = regOutput[12]; - 5'b01101 : readData2 = regOutput[13]; - 5'b01110 : readData2 = regOutput[14]; - 5'b01111 : readData2 = regOutput[15]; - 5'b10000 : readData2 = regOutput[16]; - 5'b10001 : readData2 = regOutput[17]; - 5'b10010 : readData2 = regOutput[18]; - 5'b10011 : readData2 = regOutput[19]; - 5'b10100 : readData2 = regOutput[20]; - 5'b10101 : readData2 = regOutput[21]; - 5'b10110 : readData2 = regOutput[22]; - 5'b10111 : readData2 = regOutput[23]; - 5'b11000 : readData2 = regOutput[24]; - 5'b11001 : readData2 = regOutput[25]; - 5'b11010 : readData2 = regOutput[26]; - 5'b11011 : readData2 = regOutput[27]; - 5'b11100 : readData2 = regOutput[28]; - 5'b11101 : readData2 = regOutput[29]; - 5'b11110 : readData2 = regOutput[30]; - 5'b11111 : readData2 = regOutput[31]; - default : readData2 = `XLEN'h0; - endcase - - //address 3 decoder - case(adr3) - 5'b00000 : readData3 = regOutput[0]; - 5'b00001 : readData3 = regOutput[1]; - 5'b00010 : readData3 = regOutput[2]; - 5'b00011 : readData3 = regOutput[3]; - 5'b00100 : readData3 = regOutput[4]; - 5'b00101 : readData3 = regOutput[5]; - 5'b00110 : readData3 = regOutput[6]; - 5'b00111 : readData3 = regOutput[7]; - 5'b01000 : readData3 = regOutput[8]; - 5'b01001 : readData3 = regOutput[9]; - 5'b01010 : readData3 = regOutput[10]; - 5'b01011 : readData3 = regOutput[11]; - 5'b01100 : readData3 = regOutput[12]; - 5'b01101 : readData3 = regOutput[13]; - 5'b01110 : readData3 = regOutput[14]; - 5'b01111 : readData3 = regOutput[15]; - 5'b10000 : readData3 = regOutput[16]; - 5'b10001 : readData3 = regOutput[17]; - 5'b10010 : readData3 = regOutput[18]; - 5'b10011 : readData3 = regOutput[19]; - 5'b10100 : readData3 = regOutput[20]; - 5'b10101 : readData3 = regOutput[21]; - 5'b10110 : readData3 = regOutput[22]; - 5'b10111 : readData3 = regOutput[23]; - 5'b11000 : readData3 = regOutput[24]; - 5'b11001 : readData3 = regOutput[25]; - 5'b11010 : readData3 = regOutput[26]; - 5'b11011 : readData3 = regOutput[27]; - 5'b11100 : readData3 = regOutput[28]; - 5'b11101 : readData3 = regOutput[29]; - 5'b11110 : readData3 = regOutput[30]; - 5'b11111 : readData3 = regOutput[31]; - default : readData3 = `XLEN'h0; - endcase - end - - //destination register decoder - //only change input values on write - //defaults to undefined with invalid address - // - //note - this is an intermediary signal, so - //this is not asynch assignment. FF in flopr - //will not update data until clk pulse - always_comb begin - if(write) begin - case(rd) - 5'b00000 : regInput[0] = writeData; - 5'b00001 : regInput[1] = writeData; - 5'b00010 : regInput[2] = writeData; - 5'b00011 : regInput[3] = writeData; - 5'b00100 : regInput[4] = writeData; - 5'b00101 : regInput[5] = writeData; - 5'b00110 : regInput[6] = writeData; - 5'b00111 : regInput[7] = writeData; - 5'b01000 : regInput[8] = writeData; - 5'b01001 : regInput[9] = writeData; - 5'b01010 : regInput[10] = writeData; - 5'b01011 : regInput[11] = writeData; - 5'b01100 : regInput[12] = writeData; - 5'b01101 : regInput[13] = writeData; - 5'b01110 : regInput[14] = writeData; - 5'b01111 : regInput[15] = writeData; - 5'b10000 : regInput[16] = writeData; - 5'b10001 : regInput[17] = writeData; - 5'b10010 : regInput[18] = writeData; - 5'b10011 : regInput[19] = writeData; - 5'b10100 : regInput[20] = writeData; - 5'b10101 : regInput[21] = writeData; - 5'b10110 : regInput[22] = writeData; - 5'b10111 : regInput[23] = writeData; - 5'b11000 : regInput[24] = writeData; - 5'b11001 : regInput[25] = writeData; - 5'b11010 : regInput[26] = writeData; - 5'b11011 : regInput[27] = writeData; - 5'b11100 : regInput[28] = writeData; - 5'b11101 : regInput[29] = writeData; - 5'b11110 : regInput[30] = writeData; - 5'b11111 : regInput[31] = writeData; - default : regInput[0] = `XLEN'hx; - endcase - end - end - -endmodule diff --git a/wally-pipelined/src/fpu/FPregfile.sv b/wally-pipelined/src/fpu/fregfile.sv similarity index 99% rename from wally-pipelined/src/fpu/FPregfile.sv rename to wally-pipelined/src/fpu/fregfile.sv index 99d18bce..78c24b3e 100644 --- a/wally-pipelined/src/fpu/FPregfile.sv +++ b/wally-pipelined/src/fpu/fregfile.sv @@ -25,7 +25,7 @@ `include "wally-config.vh" -module FPregfile ( +module fregfile ( input logic clk, reset, input logic we4, input logic [ 4:0] a1, a2, a3, a4, diff --git a/wally-pipelined/src/fpu/fsgn.sv b/wally-pipelined/src/fpu/fsgn.sv index 62d0e7d7..7df9386c 100755 --- a/wally-pipelined/src/fpu/fsgn.sv +++ b/wally-pipelined/src/fpu/fsgn.sv @@ -1,13 +1,12 @@ //performs the fsgnj/fsgnjn/fsgnjx RISCV instructions -module fpusgn (SgnOpCodeE, SgnResultE, SgnFlagsE, SrcXE, SrcYE); +module fsgn ( + input logic [63:0] SrcXE, SrcYE, + input logic [1:0] SgnOpCodeE, + output logic [63:0] SgnResE, + output logic SgnNVE); - input [63:0] SrcXE, SrcYE; - input [1:0] SgnOpCodeE; - output [63:0] SgnResultE; - output [4:0] SgnFlagsE; - - wire AonesExp; + logic AonesExp; //op code designation: // @@ -16,8 +15,8 @@ module fpusgn (SgnOpCodeE, SgnResultE, SgnFlagsE, SrcXE, SrcYE); //10 - fsgnjx - XOR sign values of SrcXE & SrcYE // - assign SgnResultE[63] = SgnOpCodeE[1] ? (SrcXE[63] ^ SrcYE[63]) : (SrcYE[63] ^ SgnOpCodeE[0]); - assign SgnResultE[62:0] = SrcXE[62:0]; + assign SgnResE[63] = SgnOpCodeE[1] ? (SrcXE[63] ^ SrcYE[63]) : (SrcYE[63] ^ SgnOpCodeE[0]); + assign SgnResE[62:0] = SrcXE[62:0]; //If the exponent is all ones, then the value is either Inf or NaN, //both of which will produce a QNaN/SNaN value of some sort. This will @@ -26,6 +25,6 @@ module fpusgn (SgnOpCodeE, SgnResultE, SgnFlagsE, SrcXE, SrcYE); //the only flag that can occur during this operation is invalid //due to changing sign on already existing NaN - assign SgnFlagsE = {AonesExp & SgnResultE[63], 1'b0, 1'b0, 1'b0, 1'b0}; + assign SgnNVE = AonesExp & SgnResE[63]; endmodule diff --git a/wally-pipelined/src/fpu/ling_bk13.sv b/wally-pipelined/src/fpu/ling_bk13.sv deleted file mode 100755 index a35c7a8f..00000000 --- a/wally-pipelined/src/fpu/ling_bk13.sv +++ /dev/null @@ -1,89 +0,0 @@ -// Brent-Kung Prefix Adder - -module ling_bk13 (cout, sum, a, b, cin); - input [12:0] a, b; - input cin; - output [12:0] sum; - output cout; - - wire [13:0] p,g; - wire [13:1] h,c; - -// pre-computation - assign p={a|b,1'b1}; - assign g={a&b, cin}; - -// prefix tree - ling_brent_kung prefix_tree(h, c, p[12:0], g[12:0]); - -// post-computation - assign h[13]=g[13]|c[13]; - assign sum=p[13:1]^h|g[13:1]&c; - assign cout=p[13]&h[13]; - -endmodule - -module ling_brent_kung (h, c, p, g); - - input [12:0] p; - input [13:0] g; - output [13:1] h; - output [13:1] c; - - - // parallel-prefix, Brent-Kung - - // Stage 1: Generates H/I pairs that span 1 bits - rgry g_1_0 (H_1_0, {g[1],g[0]}); - rblk b_3_2 (H_3_2, I_3_2, {g[3],g[2]}, {p[2],p[1]}); - rblk b_5_4 (H_5_4, I_5_4, {g[5],g[4]}, {p[4],p[3]}); - rblk b_7_6 (H_7_6, I_7_6, {g[7],g[6]}, {p[6],p[5]}); - rblk b_9_8 (H_9_8, I_9_8, {g[9],g[8]}, {p[8],p[7]}); - rblk b_11_10 (H_11_10, I_11_10, {g[11],g[10]}, {p[10],p[9]}); - rblk b_13_12 (H_13_12, I_13_12, {g[13],g[12]}, {p[12],p[11]}); - - // Stage 2: Generates H/I pairs that span 2 bits - grey g_3_0 (H_3_0, {H_3_2,H_1_0}, I_3_2); - black b_7_4 (H_7_4, I_7_4, {H_7_6,H_5_4}, {I_7_6,I_5_4}); - black b_11_8 (H_11_8, I_11_8, {H_11_10,H_9_8}, {I_11_10,I_9_8}); - - // Stage 3: Generates H/I pairs that span 4 bits - grey g_7_0 (H_7_0, {H_7_4,H_3_0}, I_7_4); - - // Stage 4: Generates H/I pairs that span 8 bits - - // Stage 5: Generates H/I pairs that span 4 bits - grey g_11_0 (H_11_0, {H_11_8,H_7_0}, I_11_8); - - // Stage 6: Generates H/I pairs that span 2 bits - grey g_5_0 (H_5_0, {H_5_4,H_3_0}, I_5_4); - grey g_9_0 (H_9_0, {H_9_8,H_7_0}, I_9_8); - - // Last grey cell stage - grey g_2_0 (H_2_0, {g[2],H_1_0}, p[1]); - grey g_4_0 (H_4_0, {g[4],H_3_0}, p[3]); - grey g_6_0 (H_6_0, {g[6],H_5_0}, p[5]); - grey g_8_0 (H_8_0, {g[8],H_7_0}, p[7]); - grey g_10_0 (H_10_0, {g[10],H_9_0}, p[9]); - grey g_12_0 (H_12_0, {g[12],H_11_0}, p[11]); - - // Final Stage: Apply c_k+1=p_k&H_k_0 - assign c[1]=g[0]; - - assign h[1]=H_1_0; assign c[2]=p[1]&H_1_0; - assign h[2]=H_2_0; assign c[3]=p[2]&H_2_0; - assign h[3]=H_3_0; assign c[4]=p[3]&H_3_0; - assign h[4]=H_4_0; assign c[5]=p[4]&H_4_0; - assign h[5]=H_5_0; assign c[6]=p[5]&H_5_0; - assign h[6]=H_6_0; assign c[7]=p[6]&H_6_0; - assign h[7]=H_7_0; assign c[8]=p[7]&H_7_0; - assign h[8]=H_8_0; assign c[9]=p[8]&H_8_0; - - assign h[9]=H_9_0; assign c[10]=p[9]&H_9_0; - assign h[10]=H_10_0; assign c[11]=p[10]&H_10_0; - assign h[11]=H_11_0; assign c[12]=p[11]&H_11_0; - assign h[12]=H_12_0; assign c[13]=p[12]&H_12_0; - -endmodule - - diff --git a/wally-pipelined/src/fpu/lzd_denorm.sv b/wally-pipelined/src/fpu/lzd_denorm.sv index 21efbf5f..860a3381 100755 --- a/wally-pipelined/src/fpu/lzd_denorm.sv +++ b/wally-pipelined/src/fpu/lzd_denorm.sv @@ -168,3 +168,4 @@ module lz52 (ZP, ZV, B); endmodule // lz52 + diff --git a/wally-pipelined/src/fpu/mult_R4_64_64_cs.sv b/wally-pipelined/src/fpu/mult_R4_64_64_cs.sv old mode 100755 new mode 100644 diff --git a/wally-pipelined/src/fpu/rounder_denorm.sv b/wally-pipelined/src/fpu/rounder_denorm.sv index 70df0656..b6793594 100755 --- a/wally-pipelined/src/fpu/rounder_denorm.sv +++ b/wally-pipelined/src/fpu/rounder_denorm.sv @@ -115,11 +115,11 @@ module rounder (Result, DenormIO, Flags, rm, P, OvEn, assign B_12_overflow = {8'h0, 3'b0, normal_overflow}; assign B_12_underflow = {8'h0, 3'b0, normal_underflow}; - cla52 add1(Tmant, Cout, A[62:11], B); + cla52 add1(Tmant, Cout, A[62:11], B); //***adder - cla12 add1_exp(Texp_addone, Cout_overflow, Texp, B_12_overflow); + cla12 add1_exp(Texp_addone, Cout_overflow, Texp, B_12_overflow); //***adder - cla_sub12 sub1_exp(Texp_subone, Texp, B_12_underflow); + cla_sub12 sub1_exp(Texp_subone, Texp, B_12_underflow); //***adder // Now that rounding is done, we compute the final exponent // and test for special cases. diff --git a/wally-pipelined/src/fpu/sbtm_a4.sv b/wally-pipelined/src/fpu/sbtm_a4.sv deleted file mode 100755 index 7ffe4c61..00000000 --- a/wally-pipelined/src/fpu/sbtm_a4.sv +++ /dev/null @@ -1,204 +0,0 @@ -module sbtm_a4 (input logic [7:0] a, - output logic [13:0] y); - always_comb - case(a) - 8'b01000000: y = 14'b10110100010111; - 8'b01000001: y = 14'b10110010111111; - 8'b01000010: y = 14'b10110001101000; - 8'b01000011: y = 14'b10110000010011; - 8'b01000100: y = 14'b10101111000001; - 8'b01000101: y = 14'b10101101110000; - 8'b01000110: y = 14'b10101100100001; - 8'b01000111: y = 14'b10101011010011; - 8'b01001000: y = 14'b10101010000111; - 8'b01001001: y = 14'b10101000111101; - 8'b01001010: y = 14'b10100111110100; - 8'b01001011: y = 14'b10100110101101; - 8'b01001100: y = 14'b10100101100111; - 8'b01001101: y = 14'b10100100100010; - 8'b01001110: y = 14'b10100011011111; - 8'b01001111: y = 14'b10100010011101; - 8'b01010000: y = 14'b10100001011100; - 8'b01010001: y = 14'b10100000011100; - 8'b01010010: y = 14'b10011111011110; - 8'b01010011: y = 14'b10011110100001; - 8'b01010100: y = 14'b10011101100100; - 8'b01010101: y = 14'b10011100101001; - 8'b01010110: y = 14'b10011011101111; - 8'b01010111: y = 14'b10011010110110; - 8'b01011000: y = 14'b10011001111110; - 8'b01011001: y = 14'b10011001000110; - 8'b01011010: y = 14'b10011000010000; - 8'b01011011: y = 14'b10010111011011; - 8'b01011100: y = 14'b10010110100110; - 8'b01011101: y = 14'b10010101110011; - 8'b01011110: y = 14'b10010101000000; - 8'b01011111: y = 14'b10010100001110; - 8'b01100000: y = 14'b10010011011100; - 8'b01100001: y = 14'b10010010101100; - 8'b01100010: y = 14'b10010001111100; - 8'b01100011: y = 14'b10010001001101; - 8'b01100100: y = 14'b10010000011111; - 8'b01100101: y = 14'b10001111110001; - 8'b01100110: y = 14'b10001111000100; - 8'b01100111: y = 14'b10001110011000; - 8'b01101000: y = 14'b10001101101100; - 8'b01101001: y = 14'b10001101000001; - 8'b01101010: y = 14'b10001100010110; - 8'b01101011: y = 14'b10001011101100; - 8'b01101100: y = 14'b10001011000011; - 8'b01101101: y = 14'b10001010011010; - 8'b01101110: y = 14'b10001001110010; - 8'b01101111: y = 14'b10001001001010; - 8'b01110000: y = 14'b10001000100011; - 8'b01110001: y = 14'b10000111111101; - 8'b01110010: y = 14'b10000111010111; - 8'b01110011: y = 14'b10000110110001; - 8'b01110100: y = 14'b10000110001100; - 8'b01110101: y = 14'b10000101100111; - 8'b01110110: y = 14'b10000101000011; - 8'b01110111: y = 14'b10000100011111; - 8'b01111000: y = 14'b10000011111100; - 8'b01111001: y = 14'b10000011011001; - 8'b01111010: y = 14'b10000010110111; - 8'b01111011: y = 14'b10000010010101; - 8'b01111100: y = 14'b10000001110011; - 8'b01111101: y = 14'b10000001010010; - 8'b01111110: y = 14'b10000000110001; - 8'b01111111: y = 14'b10000000010001; - 8'b10000000: y = 14'b01111111110001; - 8'b10000001: y = 14'b01111111010001; - 8'b10000010: y = 14'b01111110110010; - 8'b10000011: y = 14'b01111110010011; - 8'b10000100: y = 14'b01111101110101; - 8'b10000101: y = 14'b01111101010110; - 8'b10000110: y = 14'b01111100111001; - 8'b10000111: y = 14'b01111100011011; - 8'b10001000: y = 14'b01111011111110; - 8'b10001001: y = 14'b01111011100001; - 8'b10001010: y = 14'b01111011000100; - 8'b10001011: y = 14'b01111010101000; - 8'b10001100: y = 14'b01111010001100; - 8'b10001101: y = 14'b01111001110000; - 8'b10001110: y = 14'b01111001010101; - 8'b10001111: y = 14'b01111000111010; - 8'b10010000: y = 14'b01111000011111; - 8'b10010001: y = 14'b01111000000100; - 8'b10010010: y = 14'b01110111101010; - 8'b10010011: y = 14'b01110111010000; - 8'b10010100: y = 14'b01110110110110; - 8'b10010101: y = 14'b01110110011101; - 8'b10010110: y = 14'b01110110000100; - 8'b10010111: y = 14'b01110101101011; - 8'b10011000: y = 14'b01110101010010; - 8'b10011001: y = 14'b01110100111001; - 8'b10011010: y = 14'b01110100100001; - 8'b10011011: y = 14'b01110100001001; - 8'b10011100: y = 14'b01110011110001; - 8'b10011101: y = 14'b01110011011010; - 8'b10011110: y = 14'b01110011000010; - 8'b10011111: y = 14'b01110010101011; - 8'b10100000: y = 14'b01110010010100; - 8'b10100001: y = 14'b01110001111110; - 8'b10100010: y = 14'b01110001100111; - 8'b10100011: y = 14'b01110001010001; - 8'b10100100: y = 14'b01110000111011; - 8'b10100101: y = 14'b01110000100101; - 8'b10100110: y = 14'b01110000001111; - 8'b10100111: y = 14'b01101111111010; - 8'b10101000: y = 14'b01101111100101; - 8'b10101001: y = 14'b01101111010000; - 8'b10101010: y = 14'b01101110111011; - 8'b10101011: y = 14'b01101110100110; - 8'b10101100: y = 14'b01101110010001; - 8'b10101101: y = 14'b01101101111101; - 8'b10101110: y = 14'b01101101101001; - 8'b10101111: y = 14'b01101101010101; - 8'b10110000: y = 14'b01101101000001; - 8'b10110001: y = 14'b01101100101101; - 8'b10110010: y = 14'b01101100011010; - 8'b10110011: y = 14'b01101100000110; - 8'b10110100: y = 14'b01101011110011; - 8'b10110101: y = 14'b01101011100000; - 8'b10110110: y = 14'b01101011001101; - 8'b10110111: y = 14'b01101010111010; - 8'b10111000: y = 14'b01101010101000; - 8'b10111001: y = 14'b01101010010101; - 8'b10111010: y = 14'b01101010000011; - 8'b10111011: y = 14'b01101001110001; - 8'b10111100: y = 14'b01101001011111; - 8'b10111101: y = 14'b01101001001101; - 8'b10111110: y = 14'b01101000111100; - 8'b10111111: y = 14'b01101000101010; - 8'b11000000: y = 14'b01101000011001; - 8'b11000001: y = 14'b01101000000111; - 8'b11000010: y = 14'b01100111110110; - 8'b11000011: y = 14'b01100111100101; - 8'b11000100: y = 14'b01100111010100; - 8'b11000101: y = 14'b01100111000011; - 8'b11000110: y = 14'b01100110110011; - 8'b11000111: y = 14'b01100110100010; - 8'b11001000: y = 14'b01100110010010; - 8'b11001001: y = 14'b01100110000010; - 8'b11001010: y = 14'b01100101110010; - 8'b11001011: y = 14'b01100101100001; - 8'b11001100: y = 14'b01100101010010; - 8'b11001101: y = 14'b01100101000010; - 8'b11001110: y = 14'b01100100110010; - 8'b11001111: y = 14'b01100100100011; - 8'b11010000: y = 14'b01100100010011; - 8'b11010001: y = 14'b01100100000100; - 8'b11010010: y = 14'b01100011110101; - 8'b11010011: y = 14'b01100011100101; - 8'b11010100: y = 14'b01100011010110; - 8'b11010101: y = 14'b01100011000111; - 8'b11010110: y = 14'b01100010111001; - 8'b11010111: y = 14'b01100010101010; - 8'b11011000: y = 14'b01100010011011; - 8'b11011001: y = 14'b01100010001101; - 8'b11011010: y = 14'b01100001111110; - 8'b11011011: y = 14'b01100001110000; - 8'b11011100: y = 14'b01100001100010; - 8'b11011101: y = 14'b01100001010100; - 8'b11011110: y = 14'b01100001000110; - 8'b11011111: y = 14'b01100000111000; - 8'b11100000: y = 14'b01100000101010; - 8'b11100001: y = 14'b01100000011100; - 8'b11100010: y = 14'b01100000001111; - 8'b11100011: y = 14'b01100000000001; - 8'b11100100: y = 14'b01011111110100; - 8'b11100101: y = 14'b01011111100110; - 8'b11100110: y = 14'b01011111011001; - 8'b11100111: y = 14'b01011111001100; - 8'b11101000: y = 14'b01011110111111; - 8'b11101001: y = 14'b01011110110010; - 8'b11101010: y = 14'b01011110100101; - 8'b11101011: y = 14'b01011110011000; - 8'b11101100: y = 14'b01011110001011; - 8'b11101101: y = 14'b01011101111110; - 8'b11101110: y = 14'b01011101110010; - 8'b11101111: y = 14'b01011101100101; - 8'b11110000: y = 14'b01011101011001; - 8'b11110001: y = 14'b01011101001100; - 8'b11110010: y = 14'b01011101000000; - 8'b11110011: y = 14'b01011100110100; - 8'b11110100: y = 14'b01011100101000; - 8'b11110101: y = 14'b01011100011100; - 8'b11110110: y = 14'b01011100010000; - 8'b11110111: y = 14'b01011100000100; - 8'b11111000: y = 14'b01011011111000; - 8'b11111001: y = 14'b01011011101100; - 8'b11111010: y = 14'b01011011100000; - 8'b11111011: y = 14'b01011011010101; - 8'b11111100: y = 14'b01011011001001; - 8'b11111101: y = 14'b01011010111101; - 8'b11111110: y = 14'b01011010110010; - 8'b11111111: y = 14'b01011010100111; - default: y = 14'bxxxxxxxxxxxxxx; - endcase // case (a) - -endmodule // sbtm_a0 - - - - \ No newline at end of file diff --git a/wally-pipelined/src/fpu/sk14.sv b/wally-pipelined/src/fpu/sk14.sv deleted file mode 100755 index 8d6aadb5..00000000 --- a/wally-pipelined/src/fpu/sk14.sv +++ /dev/null @@ -1,90 +0,0 @@ -// Sklansky Prefix Adder - -module sk14 (cout, sum, a, b, cin); - input [13:0] a, b; - input cin; - output [13:0] sum; - output cout; - - wire [14:0] p,g; - wire [13:0] c; - -// pre-computation - assign p={a^b,1'b0}; - assign g={a&b, cin}; - -// prefix tree - sklansky prefix_tree(c, p[13:0], g[13:0]); - -// post-computation - assign sum=p[14:1]^c; - assign cout=g[14]|(p[14]&c[13]); - -endmodule - -module sklansky (c, p, g); - - input [14:0] p; - input [14:0] g; - output [14:1] c; - - - // parallel-prefix, Sklansky - // Stage 1: Generates G/P pairs that span 1 bits - grey b_1_0 (G_1_0, {g[1],g[0]}, p[1]); - black b_3_2 (G_3_2, P_3_2, {g[3],g[2]}, {p[3],p[2]}); - black b_5_4 (G_5_4, P_5_4, {g[5],g[4]}, {p[5],p[4]}); - black b_7_6 (G_7_6, P_7_6, {g[7],g[6]}, {p[7],p[6]}); - black b_9_8 (G_9_8, P_9_8, {g[9],g[8]}, {p[9],p[8]}); - black b_11_10 (G_11_10, P_11_10, {g[11],g[10]}, {p[11],p[10]}); - black b_13_12 (G_13_12, P_13_12, {g[13],g[12]}, {p[13],p[12]}); - // Stage 2: Generates G/P pairs that span 2 bits - grey g_2_0 (G_2_0, {g[2],G_1_0}, p[2]); - grey g_3_0 (G_3_0, {G_3_2,G_1_0}, P_3_2); - black b_6_4 (G_6_4, P_6_4, {g[6],G_5_4}, {p[6],P_5_4}); - black b_7_4 (G_7_4, P_7_4, {G_7_6,G_5_4}, {P_7_6,P_5_4}); - black b_10_8 (G_10_8, P_10_8, {g[10],G_9_8}, {p[10],P_9_8}); - black b_11_8 (G_11_8, P_11_8, {G_11_10,G_9_8}, {P_11_10,P_9_8}); - black b_14_12 (G_14_12, P_14_12, {g[14],G_13_12}, {p[14],P_13_12}); - black b_15_12 (G_15_12, P_15_12, {G_15_14,G_13_12}, {P_15_14,P_13_12}); - - // Stage 3: Generates G/P pairs that span 4 bits - grey g_4_0 (G_4_0, {g[4],G_3_0}, p[4]); - grey g_5_0 (G_5_0, {G_5_4,G_3_0}, P_5_4); - grey g_6_0 (G_6_0, {G_6_4,G_3_0}, P_6_4); - grey g_7_0 (G_7_0, {G_7_4,G_3_0}, P_7_4); - black b_12_8 (G_12_8, P_12_8, {g[12],G_11_8}, {p[12],P_11_8}); - black b_13_8 (G_13_8, P_13_8, {G_13_12,G_11_8}, {P_13_12,P_11_8}); - black b_14_8 (G_14_8, P_14_8, {G_14_12,G_11_8}, {P_14_12,P_11_8}); - black b_15_8 (G_15_8, P_15_8, {G_15_12,G_11_8}, {P_15_12,P_11_8}); - - // Stage 4: Generates G/P pairs that span 8 bits - grey g_8_0 (G_8_0, {g[8],G_7_0}, p[8]); - grey g_9_0 (G_9_0, {G_9_8,G_7_0}, P_9_8); - grey g_10_0 (G_10_0, {G_10_8,G_7_0}, P_10_8); - grey g_11_0 (G_11_0, {G_11_8,G_7_0}, P_11_8); - grey g_12_0 (G_12_0, {G_12_8,G_7_0}, P_12_8); - grey g_13_0 (G_13_0, {G_13_8,G_7_0}, P_13_8); - grey g_14_0 (G_14_0, {G_14_8,G_7_0}, P_14_8); - grey g_15_0 (G_15_0, {G_15_8,G_7_0}, P_15_8); - - - // Final Stage: Apply c_k+1=G_k_0 - assign c[1]=g[0]; - assign c[2]=G_1_0; - assign c[3]=G_2_0; - assign c[4]=G_3_0; - assign c[5]=G_4_0; - assign c[6]=G_5_0; - assign c[7]=G_6_0; - assign c[8]=G_7_0; - assign c[9]=G_8_0; - - assign c[10]=G_9_0; - assign c[11]=G_10_0; - assign c[12]=G_11_0; - assign c[13]=G_12_0; - assign c[14]=G_13_0; - -endmodule - diff --git a/wally-pipelined/src/generic/lzd.sv~ b/wally-pipelined/src/generic/lzd.sv~ deleted file mode 100755 index bfffe5e5..00000000 --- a/wally-pipelined/src/generic/lzd.sv~ +++ /dev/null @@ -1,195 +0,0 @@ -/////////////////////////////////////////// -// lzd.sv -// -// Written: James.Stine@okstate.edu 1 February 2021 -// Modified: -// -// Purpose: Integer Divide instructions -// -// A component of the Wally configurable RISC-V project. -// -// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University -// -// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, -// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software -// is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT -// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -/////////////////////////////////////////// - -`include "wally-config.vh" -/* verilator lint_off DECLFILENAME */ - -// Original idea came from V. G. Oklobdzija, "An algorithmic and novel -// design of a leading zero detector circuit: comparison with logic -// synthesis," in IEEE Transactions on Very Large Scale Integration -// (VLSI) Systems, vol. 2, no. 1, pp. 124-128, March 1994, doi: -// 10.1109/92.273153. - -// Modified to be more hierarchical - -module lz2 (P, V, B); - - input logic [1:0] B; - - output logic P; - output logic V; - - assign V = B[0] | B[1]; - assign P = B[0] & ~B[1]; - -endmodule // lz2 - -module lzd_hier #(parameter WIDTH=8) - (input logic [WIDTH-1:0] B, - output logic [$clog2(WIDTH)-1:0] ZP, - output logic ZV); - - if (WIDTH == 128) - lz128 lzd127 (ZP, ZV, B); - else if (WIDTH == 64) - lz64 lzd64 (ZP, ZV, B); - else if (WIDTH == 32) - lz32 lzd32 (ZP, ZV, B); - else if (WIDTH == 16) - lz16 lzd16 (ZP, ZV, B); - else if (WIDTH == 8) - lz8 lzd8 (ZP, ZV, B); - else if (WIDTH == 4) - lz4 lzd4 (ZP, ZV, B); - -endmodule // lzd_hier - -module lz4 (ZP, ZV, B); - - input logic [3:0] B; - - logic ZPa; - logic ZPb; - logic ZVa; - logic ZVb; - - output logic [1:0] ZP; - output logic ZV; - - lz2 l1(ZPa, ZVa, B[1:0]); - lz2 l2(ZPb, ZVb, B[3:2]); - - assign ZP[0:0] = ZVb ? ZPb : ZPa; - assign ZP[1] = ~ZVb; - assign ZV = ZVa | ZVb; - -endmodule - -module lz8 (ZP, ZV, B); - - input logic [7:0] B; - - logic [1:0] ZPa; - logic [1:0] ZPb; - logic ZVa; - logic ZVb; - - output logic [2:0] ZP; - output logic ZV; - - lz4 l1(ZPa, ZVa, B[3:0]); - lz4 l2(ZPb, ZVb, B[7:4]); - - assign ZP[1:0] = ZVb ? ZPb : ZPa; - assign ZP[2] = ~ZVb; - assign ZV = ZVa | ZVb; - -endmodule - -module lz16 (ZP, ZV, B); - - input logic [15:0] B; - - logic [2:0] ZPa; - logic [2:0] ZPb; - logic ZVa; - logic ZVb; - - output logic [3:0] ZP; - output logic ZV; - - lz8 l1(ZPa, ZVa, B[7:0]); - lz8 l2(ZPb, ZVb, B[15:8]); - - assign ZP[2:0] = ZVb ? ZPb : ZPa; - assign ZP[3] = ~ZVb; - assign ZV = ZVa | ZVb; - -endmodule // lz16 - -module lz32 (ZP, ZV, B); - - input logic [31:0] B; - - logic [3:0] ZPa; - logic [3:0] ZPb; - logic ZVa; - logic ZVb; - - output logic [4:0] ZP; - output logic ZV; - - lz16 l1(ZPa, ZVa, B[15:0]); - lz16 l2(ZPb, ZVb, B[31:16]); - - assign ZP[3:0] = ZVb ? ZPb : ZPa; - assign ZP[4] = ~ZVb; - assign ZV = ZVa | ZVb; - -endmodule // lz32 - -module lz64 (ZP, ZV, B); - - input logic [63:0] B; - - logic [4:0] ZPa; - logic [4:0] ZPb; - logic ZVa; - logic ZVb; - - output logic [5:0] ZP; - output logic ZV; - - lz32 l1(ZPa, ZVa, B[31:0]); - lz32 l2(ZPb, ZVb, B[63:32]); - - assign ZP[4:0] = ZVb ? ZPb : ZPa; - assign ZP[5] = ~ZVb; - assign ZV = ZVa | ZVb; - -endmodule // lz64 - -module lz128 (ZP, ZV, B); - - input logic [127:0] B; - - logic [5:0] ZPa; - logic [5:0] ZPb; - logic ZVa; - logic ZVb; - - output logic [6:0] ZP; - output logic ZV; - - lz64 l1(ZPa, ZVa, B[64:0]); - lz64 l2(ZPb, ZVb, B[127:63]); - - assign ZP[5:0] = ZVb ? ZPb : ZPa; - assign ZP[6] = ~ZVb; - assign ZV = ZVa | ZVb; - -endmodule // lz128 - -/* verilator lint_on DECLFILENAME */ diff --git a/wally-pipelined/src/hazard/hazard.sv b/wally-pipelined/src/hazard/hazard.sv index 356574d0..f5552106 100644 --- a/wally-pipelined/src/hazard/hazard.sv +++ b/wally-pipelined/src/hazard/hazard.sv @@ -31,8 +31,8 @@ module hazard( // Detect hazards input logic BPPredWrongE, CSRWritePendingDEM, RetM, TrapM, input logic LoadStallD, MulDivStallD, CSRRdStallD, - input logic DataStall, ICacheStallF, - input logic FPUStallD, FStallD, + input logic DCacheStall, ICacheStallF, + input logic FPUStallD, FStallD, input logic DivBusyE,FDivBusyE, // Stall & flush outputs output logic StallF, StallD, StallE, StallM, StallW, @@ -55,16 +55,16 @@ module hazard( // A stage must stall if the next stage is stalled // If any stages are stalled, the first stage that isn't stalled must flush. - assign StallFCause = CSRWritePendingDEM && ~(TrapM || RetM || BPPredWrongE); - assign StallDCause = (LoadStallD || MulDivStallD || CSRRdStallD || FPUStallD || FStallD) && ~(TrapM || RetM || BPPredWrongE); // stall in decode if instruction is a load/mul/csr dependent on previous - assign StallECause = DivBusyE || FDivBusyE; + assign StallFCause = CSRWritePendingDEM && ~(TrapM | RetM | BPPredWrongE); + assign StallDCause = (LoadStallD | MulDivStallD | CSRRdStallD | FPUStallD | FStallD) & ~(TrapM | RetM | BPPredWrongE); // stall in decode if instruction is a load/mul/csr dependent on previous + assign StallECause = DivBusyE | FDivBusyE; assign StallMCause = 0; - assign StallWCause = DataStall || ICacheStallF; + assign StallWCause = DCacheStall | ICacheStallF; - assign StallF = StallFCause || StallD; - assign StallD = StallDCause || StallE; - assign StallE = StallECause || StallM; - assign StallM = StallMCause || StallW; + assign StallF = StallFCause | StallD; + assign StallD = StallDCause | StallE; + assign StallE = StallECause | StallM; + assign StallM = StallMCause | StallW; assign StallW = StallWCause; //assign FirstUnstalledD = (~StallD & StallF & ~MulDivStallD); @@ -76,8 +76,8 @@ module hazard( // Each stage flushes if the previous stage is the last one stalled (for cause) or the system has reason to flush assign FlushF = BPPredWrongE; - assign FlushD = FirstUnstalledD || TrapM || RetM || BPPredWrongE; - assign FlushE = FirstUnstalledE || TrapM || RetM || BPPredWrongE; - assign FlushM = FirstUnstalledM || TrapM || RetM; - assign FlushW = FirstUnstalledW || TrapM; + assign FlushD = FirstUnstalledD | TrapM | RetM | BPPredWrongE; + assign FlushE = FirstUnstalledE | TrapM | RetM | BPPredWrongE; + assign FlushM = FirstUnstalledM | TrapM | RetM; + assign FlushW = FirstUnstalledW | TrapM; endmodule diff --git a/wally-pipelined/src/ieu/ieu.sv b/wally-pipelined/src/ieu/ieu.sv index 50bf79e8..87e21d79 100644 --- a/wally-pipelined/src/ieu/ieu.sv +++ b/wally-pipelined/src/ieu/ieu.sv @@ -31,31 +31,34 @@ module ieu ( input logic [31:0] InstrD, input logic IllegalIEUInstrFaultD, output logic IllegalBaseInstrFaultD, - output logic RegWriteD, + output logic RegWriteD, // Execute Stage interface input logic [`XLEN-1:0] PCE, input logic [`XLEN-1:0] PCLinkE, - input logic FWriteIntE, - input logic IllegalFPUInstrE, - input logic [`XLEN-1:0] FWriteDataE, + input logic FWriteIntE, + input logic IllegalFPUInstrE, + input logic [`XLEN-1:0] FWriteDataE, output logic [`XLEN-1:0] PCTargetE, output logic MulDivE, W64E, output logic [2:0] Funct3E, output logic [`XLEN-1:0] SrcAE, SrcBE, + input logic FWriteIntM, + // Memory stage interface - input logic DataMisalignedM, + input logic DataMisalignedM, // from LSU + input logic SquashSCW, // from LSU + output logic [1:0] MemRWM, // read/write control goes to LSU + output logic [1:0] AtomicM, // atomic control goes to LSU + output logic [`XLEN-1:0] MemAdrM, WriteDataM, // Address and write data to LSU + + output logic [2:0] Funct3M, // size and signedness to LSU + output logic [`XLEN-1:0] SrcAM, // to privilege and fpu input logic DataAccessFaultM, - input logic FWriteIntM, - input logic [`XLEN-1:0] FIntResM, - output logic [1:0] MemRWM, - output logic [1:0] AtomicM, - output logic [`XLEN-1:0] MemAdrM, WriteDataM, - output logic [`XLEN-1:0] SrcAM, - output logic [2:0] Funct3M, + input logic [`XLEN-1:0] FIntResM, + // Writeback stage input logic [`XLEN-1:0] CSRReadValW, ReadDataW, MulDivResultW, - input logic FWriteIntW, - input logic SquashSCW, + input logic FWriteIntW, // input logic [`XLEN-1:0] PCLinkW, output logic InstrValidM, InstrValidW, // hazards diff --git a/wally-pipelined/src/ifu/ifu.sv b/wally-pipelined/src/ifu/ifu.sv index afae5ff4..ddfd88cc 100644 --- a/wally-pipelined/src/ifu/ifu.sv +++ b/wally-pipelined/src/ifu/ifu.sv @@ -27,64 +27,62 @@ `include "wally-config.vh" module ifu ( - input logic clk, reset, - input logic StallF, StallD, StallE, StallM, StallW, - input logic FlushF, FlushD, FlushE, FlushM, FlushW, + input logic clk, reset, + input logic StallF, StallD, StallE, StallM, StallW, + input logic FlushF, FlushD, FlushE, FlushM, FlushW, // Fetch - input logic [`XLEN-1:0] InstrInF, - input logic InstrAckF, - output logic [`XLEN-1:0] PCF, + input logic [`XLEN-1:0] InstrInF, + input logic InstrAckF, + output logic [`XLEN-1:0] PCF, output logic [`PA_BITS-1:0] InstrPAdrF, - output logic InstrReadF, - output logic ICacheStallF, + output logic InstrReadF, + output logic ICacheStallF, // Decode - output logic [`XLEN-1:0] PCD, + output logic [`XLEN-1:0] PCD, // Execute - output logic [`XLEN-1:0] PCLinkE, - input logic PCSrcE, - input logic [`XLEN-1:0] PCTargetE, - output logic [`XLEN-1:0] PCE, - output logic BPPredWrongE, + output logic [`XLEN-1:0] PCLinkE, + input logic PCSrcE, + input logic [`XLEN-1:0] PCTargetE, + output logic [`XLEN-1:0] PCE, + output logic BPPredWrongE, // Mem - input logic RetM, TrapM, - input logic [`XLEN-1:0] PrivilegedNextPCM, - output logic [31:0] InstrD, InstrE, InstrM, InstrW, - output logic [`XLEN-1:0] PCM, - output logic [4:0] InstrClassM, - output logic BPPredDirWrongM, - output logic BTBPredPCWrongM, - output logic RASPredPCWrongM, - output logic BPPredClassNonCFIWrongM, + input logic RetM, TrapM, + input logic [`XLEN-1:0] PrivilegedNextPCM, + output logic [31:0] InstrD, InstrE, InstrM, InstrW, + output logic [`XLEN-1:0] PCM, + output logic [4:0] InstrClassM, + output logic BPPredDirWrongM, + output logic BTBPredPCWrongM, + output logic RASPredPCWrongM, + output logic BPPredClassNonCFIWrongM, // Writeback // output logic [`XLEN-1:0] PCLinkW, // Faults - input logic IllegalBaseInstrFaultD, - output logic ITLBInstrPageFaultF, - output logic IllegalIEUInstrFaultD, - output logic InstrMisalignedFaultM, - output logic [`XLEN-1:0] InstrMisalignedAdrM, + input logic IllegalBaseInstrFaultD, + output logic ITLBInstrPageFaultF, + output logic IllegalIEUInstrFaultD, + output logic InstrMisalignedFaultM, + output logic [`XLEN-1:0] InstrMisalignedAdrM, // mmu management - input logic [1:0] PrivilegeModeW, - input logic [`XLEN-1:0] PageTableEntryF, - input logic [1:0] PageTypeF, - input logic [`XLEN-1:0] SATP_REGW, - input logic STATUS_MXR, STATUS_SUM, - input logic ITLBWriteF, ITLBFlushF, - output logic ITLBMissF, ITLBHitF, + input logic [1:0] PrivilegeModeW, + input logic [`XLEN-1:0] PageTableEntryF, + input logic [1:0] PageTypeF, + input logic [`XLEN-1:0] SATP_REGW, + input logic STATUS_MXR, STATUS_SUM, + input logic ITLBWriteF, ITLBFlushF, + input logic WalkerInstrPageFaultF, + + output logic ITLBMissF, ITLBHitF, // pmp/pma (inside mmu) signals. *** temporarily from AHB bus but eventually replace with internal versions pre H - input logic [31:0] HADDR, - input logic [2:0] HSIZE, HBURST, - input logic HWRITE, - input logic ExecuteAccessF, //read, write, and atomic access are all set to zero because this mmu is onlt working with instructinos in the F stage. - input logic [63:0] PMPCFG01_REGW, PMPCFG23_REGW, // *** all of these come from the privileged unit, so they're gonna have to come over into ifu and dmem + input var logic [63:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES/8-1:0], input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0], - output logic PMPInstrAccessFaultF, PMAInstrAccessFaultF, - output logic ISquashBusAccessF, - output logic [5:0] IHSELRegionsF + output logic PMPInstrAccessFaultF, PMAInstrAccessFaultF, + output logic ISquashBusAccessF +// output logic [5:0] IHSELRegionsF ); @@ -105,24 +103,38 @@ module ifu ( logic PMPLoadAccessFaultM, PMPStoreAccessFaultM; // *** these are just so that the mmu has somewhere to put these outputs, they're unused in this stage // if you're allowed to parameterize outputs/ inputs existence, these are an easy delete. - logic [`PA_BITS-1:0] PCPFmmu; + logic [`PA_BITS-1:0] PCPFmmu, PCNextFPhys; // used to either truncate or expand PCPF and PCNextF into `PA_BITS width. generate - if (`XLEN==32) + if (`XLEN==32) begin assign PCPF = PCPFmmu[31:0]; - else + assign PCNextFPhys = {{(`PA_BITS-`XLEN){1'b0}}, PCNextF}; + end else begin assign PCPF = {8'b0, PCPFmmu}; + assign PCNextFPhys = PCNextF[`PA_BITS-1:0]; + end endgenerate - mmu #(.ENTRY_BITS(`ITLB_ENTRY_BITS), .IMMU(1)) itlb(.TLBAccessType(2'b10), .VirtualAddress(PCF), .Size(2'b10), - .PTEWriteVal(PageTableEntryF), .PageTypeWriteVal(PageTypeF), - .TLBWrite(ITLBWriteF), .TLBFlush(ITLBFlushF), - .PhysicalAddress(PCPFmmu), .TLBMiss(ITLBMissF), - .TLBHit(ITLBHitF), .TLBPageFault(ITLBInstrPageFaultF), - - .AtomicAccessM(1'b0), .WriteAccessM(1'b0), .ReadAccessM(1'b0), // *** is this the right way force these bits constant? should they be someething else? - .SquashBusAccess(ISquashBusAccessF), .HSELRegions(IHSELRegionsF), - .*); + mmu #(.ENTRY_BITS(`ITLB_ENTRY_BITS), .IMMU(1)) + itlb(.TLBAccessType(2'b10), + .VirtualAddress(PCF), + .Size(2'b10), + .PTEWriteVal(PageTableEntryF), + .PageTypeWriteVal(PageTypeF), + .TLBWrite(ITLBWriteF), + .TLBFlush(ITLBFlushF), + .PhysicalAddress(PCPFmmu), + .TLBMiss(ITLBMissF), + .TLBHit(ITLBHitF), + .TLBPageFault(ITLBInstrPageFaultF), + .ExecuteAccessF(1'b1), // ***dh -- this should eventually change to only true if an instruction fetch is occurring + .AtomicAccessM(1'b0), + .ReadAccessM(1'b0), + .WriteAccessM(1'b0), + .SquashBusAccess(ISquashBusAccessF), +// .HSELRegions(IHSELRegionsF), + .DisableTranslation(1'b0), + .*); // branch predictor signals @@ -137,8 +149,9 @@ module ifu ( // assign InstrReadF = 1; // *** & ICacheMissF; add later icache icache(.*, - .PCNextF(PCNextF[`PA_BITS-1:0]), - .PCPF(PCPFmmu)); + .PCNextF(PCNextFPhys), + .PCPF(PCPFmmu), + .WalkerInstrPageFaultF(WalkerInstrPageFaultF)); flopenl #(32) AlignedInstrRawDFlop(clk, reset | reset_q, ~StallD, FlushD ? nop : FinalInstrRawF, nop, InstrRawD); diff --git a/wally-pipelined/src/lsu/lsu.sv b/wally-pipelined/src/lsu/lsu.sv index ffa79adf..36d4715f 100644 --- a/wally-pipelined/src/lsu/lsu.sv +++ b/wally-pipelined/src/lsu/lsu.sv @@ -29,104 +29,142 @@ // *** Ross Thompson amo misalignment check? module lsu ( - input logic clk, reset, - input logic StallM, FlushM, StallW, FlushW, - //output logic DataStall, + input logic clk, reset, + input logic StallM, FlushM, StallW, FlushW, + output logic DataStall, + output logic HPTWReady, // Memory Stage - input logic [1:0] MemRWM, - input logic [`XLEN-1:0] MemAdrM, - input logic [2:0] Funct3M, - //input logic [`XLEN-1:0] ReadDataW, - input logic [`XLEN-1:0] WriteDataM, - input logic [1:0] AtomicM, - input logic CommitM, - output logic [`PA_BITS-1:0] MemPAdrM, - output logic MemReadM, MemWriteM, - output logic [1:0] AtomicMaskedM, - output logic DataMisalignedM, - output logic CommittedM, - // Writeback Stage - input logic MemAckW, - input logic [`XLEN-1:0] ReadDataW, - output logic SquashSCW, + + // connected to cpu (controls) + input logic [1:0] MemRWM, + input logic [2:0] Funct3M, + input logic [1:0] AtomicM, + output logic CommittedM, + output logic SquashSCW, + output logic DataMisalignedM, + + // address and write data + input logic [`XLEN-1:0] MemAdrM, + input logic [`XLEN-1:0] WriteDataM, + output logic [`XLEN-1:0] ReadDataW, + + // cpu privilege + input logic [1:0] PrivilegeModeW, + input logic DTLBFlushM, // faults - input logic NonBusTrapM, - input logic DataAccessFaultM, - output logic DTLBLoadPageFaultM, DTLBStorePageFaultM, - output logic LoadMisalignedFaultM, LoadAccessFaultM, - output logic StoreMisalignedFaultM, StoreAccessFaultM, - + input logic NonBusTrapM, + output logic DTLBLoadPageFaultM, DTLBStorePageFaultM, + output logic LoadMisalignedFaultM, LoadAccessFaultM, + // cpu hazard unit (trap) + output logic StoreMisalignedFaultM, StoreAccessFaultM, + + // connect to ahb + input logic CommitM, // should this be generated in the abh interface? + output logic [`PA_BITS-1:0] MemPAdrM, // to ahb + output logic MemReadM, MemWriteM, + output logic [1:0] AtomicMaskedM, + input logic MemAckW, // from ahb + input logic [`XLEN-1:0] HRDATAW, // from ahb + output logic [2:0] Funct3MfromLSU, + output logic StallWfromLSU, + + // mmu management - input logic [1:0] PrivilegeModeW, - input logic [`XLEN-1:0] PageTableEntryM, - input logic [1:0] PageTypeM, - input logic [`XLEN-1:0] SATP_REGW, - input logic STATUS_MXR, STATUS_SUM, - input logic DTLBWriteM, DTLBFlushM, - output logic DTLBMissM, DTLBHitM, + + // page table walker + input logic [`XLEN-1:0] PageTableEntryM, + input logic [1:0] PageTypeM, + input logic [`XLEN-1:0] SATP_REGW, // from csr + input logic STATUS_MXR, STATUS_SUM, // from csr + input logic DTLBWriteM, + output logic DTLBMissM, + input logic DisableTranslation, // used to stop intermediate PTE physical addresses being saved to TLB. + + + + output logic DTLBHitM, // not connected // PMA/PMP (inside mmu) signals input logic [31:0] HADDR, // *** replace all of these H inputs with physical adress once pma checkers have been edited to use paddr as well. input logic [2:0] HSIZE, HBURST, input logic HWRITE, - input logic AtomicAccessM, WriteAccessM, ReadAccessM, // execute access is hardwired to zero in this mmu because we're only working with data in the M stage. - input logic [63:0] PMPCFG01_REGW, PMPCFG23_REGW, // *** all of these come from the privileged unit, so thwyre gonna have to come over into ifu and dmem + input var logic [63:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES/8-1:0], input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0], // *** this one especially has a large note attached to it in pmpchecker. output logic PMALoadAccessFaultM, PMAStoreAccessFaultM, output logic PMPLoadAccessFaultM, PMPStoreAccessFaultM, // *** can these be parameterized? we dont need the m stage ones for the immu and vice versa. - output logic DSquashBusAccessM, - output logic [5:0] DHSELRegionsM + output logic DSquashBusAccessM +// output logic [5:0] DHSELRegionsM ); logic SquashSCM; logic DTLBPageFaultM; logic MemAccessM; - logic [1:0] CurrState, NextState; + logic preCommittedM; - localparam STATE_READY = 0; - localparam STATE_FETCH = 1; - localparam STATE_FETCH_AMO = 2; - localparam STATE_STALLED = 3; + typedef enum {STATE_READY, + STATE_FETCH, + STATE_FETCH_AMO_1, + STATE_FETCH_AMO_2, + STATE_STALLED, + STATE_PTW_READY, + STATE_PTW_FETCH, + STATE_PTW_DONE} statetype; + statetype CurrState, NextState; + logic PMPInstrAccessFaultF, PMAInstrAccessFaultF; // *** these are just so that the mmu has somewhere to put these outputs since they aren't used in dmem // *** if you're allowed to parameterize outputs/ inputs existence, these are an easy delete. - - mmu #(.ENTRY_BITS(`DTLB_ENTRY_BITS), .IMMU(0)) dmmu(.TLBAccessType(MemRWM), .VirtualAddress(MemAdrM), .Size(Funct3M[1:0]), - .PTEWriteVal(PageTableEntryM), .PageTypeWriteVal(PageTypeM), - .TLBWrite(DTLBWriteM), .TLBFlush(DTLBFlushM), - .PhysicalAddress(MemPAdrM), .TLBMiss(DTLBMissM), - .TLBHit(DTLBHitM), .TLBPageFault(DTLBPageFaultM), - .ExecuteAccessF(1'b0), - .SquashBusAccess(DSquashBusAccessM), .HSELRegions(DHSELRegionsM), - .*); // *** the pma/pmp instruction acess faults don't really matter here. is it possible to parameterize which outputs exist? + // for time being until we have a dcache the AHB Lite read bus HRDATAW will be connected to the + // CPU's read data input ReadDataW. + assign ReadDataW = HRDATAW; + + mmu #(.ENTRY_BITS(`DTLB_ENTRY_BITS), .IMMU(0)) + dmmu(.TLBAccessType(MemRWM), + .VirtualAddress(MemAdrM), + .Size(Funct3M[1:0]), + .PTEWriteVal(PageTableEntryM), + .PageTypeWriteVal(PageTypeM), + .TLBWrite(DTLBWriteM), + .TLBFlush(DTLBFlushM), + .PhysicalAddress(MemPAdrM), + .TLBMiss(DTLBMissM), + .TLBHit(DTLBHitM), + .TLBPageFault(DTLBPageFaultM), + .ExecuteAccessF(1'b0), + .AtomicAccessM(AtomicMaskedM[1]), + .WriteAccessM(MemRWM[0]), + .ReadAccessM(MemRWM[1]), + .SquashBusAccess(DSquashBusAccessM), +// .SelRegions(DHSELRegionsM), + .*); // *** the pma/pmp instruction acess faults don't really matter here. is it possible to parameterize which outputs exist? // Specify which type of page fault is occurring assign DTLBLoadPageFaultM = DTLBPageFaultM & MemRWM[1]; assign DTLBStorePageFaultM = DTLBPageFaultM & MemRWM[0]; - // Determine if an Unaligned access is taking place - always_comb - case(Funct3M[1:0]) - 2'b00: DataMisalignedM = 0; // lb, sb, lbu - 2'b01: DataMisalignedM = MemAdrM[0]; // lh, sh, lhu - 2'b10: DataMisalignedM = MemAdrM[1] | MemAdrM[0]; // lw, sw, flw, fsw, lwu - 2'b11: DataMisalignedM = |MemAdrM[2:0]; // ld, sd, fld, fsd - endcase + // Determine if an Unaligned access is taking place + always_comb + case(Funct3M[1:0]) + 2'b00: DataMisalignedM = 0; // lb, sb, lbu + 2'b01: DataMisalignedM = MemAdrM[0]; // lh, sh, lhu + 2'b10: DataMisalignedM = MemAdrM[1] | MemAdrM[0]; // lw, sw, flw, fsw, lwu + 2'b11: DataMisalignedM = |MemAdrM[2:0]; // ld, sd, fld, fsd + endcase // Squash unaligned data accesses and failed store conditionals // *** this is also the place to squash if the cache is hit // Changed DataMisalignedM to a larger combination of trap sources // NonBusTrapM is anything that the bus doesn't contribute to producing // By contrast, using TrapM results in circular logic errors - assign MemReadM = MemRWM[1] & ~NonBusTrapM & CurrState != STATE_STALLED; - assign MemWriteM = MemRWM[0] & ~NonBusTrapM && ~SquashSCM & CurrState != STATE_STALLED; + assign MemReadM = MemRWM[1] & ~NonBusTrapM & ~DTLBMissM & CurrState != STATE_STALLED; + assign MemWriteM = MemRWM[0] & ~NonBusTrapM & ~DTLBMissM & ~SquashSCM & CurrState != STATE_STALLED; assign AtomicMaskedM = CurrState != STATE_STALLED ? AtomicM : 2'b00 ; - assign MemAccessM = |MemRWM; + assign MemAccessM = MemReadM | MemWriteM; // Determine if M stage committed // Reset whenever unstalled. Set when access successfully occurs @@ -135,9 +173,9 @@ module lsu ( // Determine if address is valid assign LoadMisalignedFaultM = DataMisalignedM & MemRWM[1]; - assign LoadAccessFaultM = DataAccessFaultM & MemRWM[1]; + assign LoadAccessFaultM = MemRWM[1]; assign StoreMisalignedFaultM = DataMisalignedM & MemRWM[0]; - assign StoreAccessFaultM = DataAccessFaultM & MemRWM[0]; + assign StoreAccessFaultM = MemRWM[0]; // Handle atomic load reserved / store conditional generate @@ -165,33 +203,111 @@ module lsu ( endgenerate // Data stall - //assign DataStall = 0; + //assign DataStall = (NextState == STATE_FETCH) || (NextState == STATE_FETCH_AMO_1) || (NextState == STATE_FETCH_AMO_2); + assign HPTWReady = (CurrState == STATE_READY); + // Ross Thompson April 22, 2021 // for now we need to handle the issue where the data memory interface repeately // requests data from memory rather than issuing a single request. - flopr #(2) stateReg(.clk(clk), - .reset(reset), - .d(NextState), - .q(CurrState)); + flopenl #(.TYPE(statetype)) stateReg(.clk(clk), + .load(reset), + .en(1'b1), + .d(NextState), + .val(STATE_READY), + .q(CurrState)); always_comb begin case (CurrState) - STATE_READY: if (MemRWM[1] & MemRWM[0]) NextState = STATE_FETCH_AMO; // *** should be some misalign check - else if (MemAccessM & ~DataMisalignedM) NextState = STATE_FETCH; - else NextState = STATE_READY; - STATE_FETCH_AMO: if (MemAckW) NextState = STATE_FETCH; - else NextState = STATE_FETCH_AMO; - STATE_FETCH: if (MemAckW & ~StallW) NextState = STATE_READY; - else if (MemAckW & StallW) NextState = STATE_STALLED; - else NextState = STATE_FETCH; - STATE_STALLED: if (~StallW) NextState = STATE_READY; - else NextState = STATE_STALLED; - default: NextState = STATE_READY; - endcase // case (CurrState) - end + STATE_READY: + if (DTLBMissM) begin + NextState = STATE_PTW_READY; + DataStall = 1'b1; + end else if (AtomicMaskedM[1]) begin + NextState = STATE_FETCH_AMO_1; // *** should be some misalign check + DataStall = 1'b1; + end else if((MemReadM & AtomicM[0]) | (MemWriteM & AtomicM[0])) begin + NextState = STATE_FETCH_AMO_2; + DataStall = 1'b1; + end else if (MemAccessM & ~DataMisalignedM) begin + NextState = STATE_FETCH; + DataStall = 1'b1; + end else begin + NextState = STATE_READY; + DataStall = 1'b0; + end + STATE_FETCH_AMO_1: begin + DataStall = 1'b1; + if (MemAckW) begin + NextState = STATE_FETCH_AMO_2; + end else begin + NextState = STATE_FETCH_AMO_1; + end + end + STATE_FETCH_AMO_2: begin + DataStall = 1'b1; + if (MemAckW & ~StallW) begin + NextState = STATE_FETCH_AMO_2; + end else if (MemAckW & StallW) begin + NextState = STATE_STALLED; + end else begin + NextState = STATE_FETCH_AMO_2; + end + end + STATE_FETCH: begin + DataStall = 1'b1; + if (MemAckW & ~StallW) begin + NextState = STATE_READY; + end else if (MemAckW & StallW) begin + NextState = STATE_STALLED; + end else begin + NextState = STATE_FETCH; + end + end + STATE_STALLED: begin + DataStall = 1'b0; + if (~StallW) begin + NextState = STATE_READY; + end else begin + NextState = STATE_STALLED; + end + end + STATE_PTW_READY: begin + DataStall = 1'b0; + if (DTLBWriteM) begin + NextState = STATE_READY; + end else if (MemReadM & ~DataMisalignedM) begin + NextState = STATE_PTW_FETCH; + end else begin + NextState = STATE_PTW_READY; + end + end + STATE_PTW_FETCH : begin + DataStall = 1'b1; + if (MemAckW & ~DTLBWriteM) begin + NextState = STATE_PTW_READY; + end else if (MemAckW & DTLBWriteM) begin + NextState = STATE_READY; + end else begin + NextState = STATE_PTW_FETCH; + end + end + STATE_PTW_DONE: begin + NextState = STATE_READY; + end + default: begin + DataStall = 1'b0; + NextState = STATE_READY; + end + endcase + end // always_comb + + // *** for now just pass through size + assign Funct3MfromLSU = Funct3M; + assign StallWfromLSU = StallW; + endmodule diff --git a/wally-pipelined/src/lsu/lsuArb.sv b/wally-pipelined/src/lsu/lsuArb.sv new file mode 100644 index 00000000..76d89798 --- /dev/null +++ b/wally-pipelined/src/lsu/lsuArb.sv @@ -0,0 +1,178 @@ +/////////////////////////////////////////// +// lsuArb.sv +// +// Written: Ross THompson and Kip Macsai-Goren +// Modified: kmacsaigoren@hmc.edu June 23, 2021 +// +// Purpose: LSU arbiter between the CPU's demand request for data memory and +// the page table walker +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +`include "wally-config.vh" + +module lsuArb + (input logic clk, reset, + + // from page table walker + input logic HPTWTranslate, + input logic HPTWRead, + input logic [`XLEN-1:0] HPTWPAdr, + // to page table walker. + output logic [`XLEN-1:0] HPTWReadPTE, + output logic HPTWReady, + output logic HPTWStall, + + // from CPU + input logic [1:0] MemRWM, + input logic [2:0] Funct3M, + input logic [1:0] AtomicM, + input logic [`XLEN-1:0] MemAdrM, + input logic [`XLEN-1:0] WriteDataM, + input logic StallW, + // to CPU + output logic [`XLEN-1:0] ReadDataW, + output logic CommittedM, + output logic SquashSCW, + output logic DataMisalignedM, + output logic DCacheStall, + + // to LSU + output logic DisableTranslation, + output logic [1:0] MemRWMtoLSU, + output logic [2:0] Funct3MtoLSU, + output logic [1:0] AtomicMtoLSU, + output logic [`XLEN-1:0] MemAdrMtoLSU, + output logic [`XLEN-1:0] WriteDataMtoLSU, + output logic StallWtoLSU, + // from LSU + input logic CommittedMfromLSU, + input logic SquashSCWfromLSU, + input logic DataMisalignedMfromLSU, + input logic [`XLEN-1:0] ReadDataWFromLSU, + input logic HPTWReadyfromLSU, + input logic DataStall + + ); + + // HPTWTranslate is the request for memory by the page table walker. When + // this is high the page table walker gains priority over the CPU's data + // input. Note the ptw only makes a request after an instruction or data + // tlb miss. It is entirely possible the dcache is currently processing + // a data cache miss when an instruction tlb miss occurs. If an instruction + // in the E stage causes a d cache miss, the d cache will immediately start + // processing the request. Simultaneously the ITLB misses. By the time + // the TLB miss causes the page table walker to issue the first request + // to data memory the d cache is already busy. We can interlock by + // leveraging Stall as a d cache busy. We will need an FSM to handle this. + + typedef enum{StateReady, + StatePTWPending, + StatePTWActive} statetype; + + + statetype CurrState, NextState; + logic SelPTW; + logic HPTWStallD; + + + flopenl #(.TYPE(statetype)) StateReg(.clk(clk), + .load(reset), + .en(1'b1), + .d(NextState), + .val(StateReady), + .q(CurrState)); + + always_comb begin + case(CurrState) + StateReady: + if (HPTWTranslate) NextState = StatePTWActive; + else NextState = StateReady; + StatePTWActive: + if (HPTWTranslate) NextState = StatePTWActive; + else NextState = StateReady; + default: NextState = StateReady; + endcase + end + +/* -----\/----- EXCLUDED -----\/----- + + always_comb begin + case(CurrState) + StateReady: + /-* -----\/----- EXCLUDED -----\/----- + if (HPTWTranslate & DataStall) NextState = StatePTWPending; + else + -----/\----- EXCLUDED -----/\----- *-/ + if (HPTWTranslate) NextState = StatePTWActive; + else NextState = StateReady; + StatePTWPending: + if (HPTWTranslate & ~DataStall) NextState = StatePTWActive; + else if (HPTWTranslate & DataStall) NextState = StatePTWPending; + else NextState = StateReady; + StatePTWActive: + if (HPTWTranslate) NextState = StatePTWActive; + else NextState = StateReady; + default: NextState = StateReady; + endcase + end + + -----/\----- EXCLUDED -----/\----- */ + + // multiplex the outputs to LSU + assign DisableTranslation = SelPTW; // change names between SelPTW would be confusing in DTLB. + assign SelPTW = (CurrState == StatePTWActive && HPTWTranslate) || (CurrState == StateReady && HPTWTranslate); + assign MemRWMtoLSU = SelPTW ? {HPTWRead, 1'b0} : MemRWM; + + generate + if (`XLEN == 32) begin + assign Funct3MtoLSU = SelPTW ? 3'b010 : Funct3M; + end else begin + assign Funct3MtoLSU = SelPTW ? 3'b011 : Funct3M; + end + endgenerate + + assign AtomicMtoLSU = SelPTW ? 2'b00 : AtomicM; + assign MemAdrMtoLSU = SelPTW ? HPTWPAdr : MemAdrM; + assign WriteDataMtoLSU = SelPTW ? `XLEN'b0 : WriteDataM; + assign StallWtoLSU = SelPTW ? 1'b0 : StallW; + + // demux the inputs from LSU to walker or cpu's data port. + + assign ReadDataW = SelPTW ? `XLEN'b0 : ReadDataWFromLSU; // probably can avoid this demux + assign HPTWReadPTE = SelPTW ? ReadDataWFromLSU : `XLEN'b0 ; // probably can avoid this demux + assign CommittedM = SelPTW ? 1'b0 : CommittedMfromLSU; + assign SquashSCW = SelPTW ? 1'b0 : SquashSCWfromLSU; + assign DataMisalignedM = SelPTW ? 1'b0 : DataMisalignedMfromLSU; + assign HPTWReady = HPTWReadyfromLSU; + // *** need to rename DcacheStall and Datastall. + // not clear at all. I think it should be LSUStall from the LSU, + // which is demuxed to HPTWStall and CPUDataStall? (not sure on this last one). + assign HPTWStall = SelPTW ? DataStall : 1'b1; + //assign HPTWStallD = SelPTW ? DataStall : 1'b1; +/* -----\/----- EXCLUDED -----\/----- + assign HPTWStallD = SelPTW ? DataStall : 1'b1; + flopr #(1) HPTWStallReg (.clk(clk), + .reset(reset), + .d(HPTWStallD), + .q(HPTWStall)); + -----/\----- EXCLUDED -----/\----- */ + + assign DCacheStall = SelPTW ? 1'b1 : DataStall; // *** this is probably going to change. + +endmodule diff --git a/wally-pipelined/src/mmu/adrdec.sv b/wally-pipelined/src/mmu/adrdec.sv index e2c63731..5995d8e3 100644 --- a/wally-pipelined/src/mmu/adrdec.sv +++ b/wally-pipelined/src/mmu/adrdec.sv @@ -26,13 +26,13 @@ `include "wally-config.vh" module adrdec ( - input logic [31:0] HADDR, - input logic [31:0] Base, Range, - input logic Supported, - input logic AccessValid, - input logic [2:0] Size, - input logic [3:0] SizeMask, - output logic HSEL + input logic [`PA_BITS-1:0] PhysicalAddress, + input logic [`PA_BITS-1:0] Base, Range, + input logic Supported, + input logic AccessValid, + input logic [1:0] Size, + input logic [3:0] SizeMask, + output logic Sel ); logic Match; @@ -41,12 +41,12 @@ module adrdec ( // determine if an address is in a range starting at the base // for example, if Base = 0x04002000 and range = 0x00000FFF, // then anything address between 0x04002000 and 0x04002FFF should match (HSEL=1) - assign Match = &((HADDR ~^ Base) | Range); + assign Match = &((PhysicalAddress ~^ Base) | Range); // determine if legal size of access is being made (byte, halfword, word, doubleword) - assign SizeValid = SizeMask[Size[1:0]]; + assign SizeValid = SizeMask[Size]; - assign HSEL = Match && Supported && AccessValid && SizeValid; + assign Sel = Match && Supported && AccessValid && SizeValid; endmodule diff --git a/wally-pipelined/src/mmu/adrdecs.sv b/wally-pipelined/src/mmu/adrdecs.sv index 17f78d51..8585a4ee 100644 --- a/wally-pipelined/src/mmu/adrdecs.sv +++ b/wally-pipelined/src/mmu/adrdecs.sv @@ -26,19 +26,20 @@ `include "wally-config.vh" module adrdecs ( - input logic [31:0] HADDR, // *** will need to use PAdr in mmu, stick with HADDR in uncore - input logic AccessRW, AccessRX, AccessRWX, - input logic [2:0] HSIZE, - output logic [5:0] HSELRegions + input logic [`PA_BITS-1:0] PhysicalAddress, + input logic AccessRW, AccessRX, AccessRWX, + input logic [1:0] Size, + output logic [5:0] SelRegions ); // Determine which region of physical memory (if any) is being accessed // *** eventually uncomment Access signals - adrdec boottimdec(HADDR, `BOOTTIM_BASE, `BOOTTIM_RANGE, `BOOTTIM_SUPPORTED, 1'b1/*AccessRX*/, HSIZE, 4'b1111, HSELRegions[5]); - adrdec timdec(HADDR, `TIM_BASE, `TIM_RANGE, `TIM_SUPPORTED, 1'b1/*AccessRWX*/, HSIZE, 4'b1111, HSELRegions[4]); - adrdec clintdec(HADDR, `CLINT_BASE, `CLINT_RANGE, `CLINT_SUPPORTED, AccessRW, HSIZE, 4'b1111, HSELRegions[3]); - adrdec gpiodec(HADDR, `GPIO_BASE, `GPIO_RANGE, `GPIO_SUPPORTED, AccessRW, HSIZE, 4'b0100, HSELRegions[2]); - adrdec uartdec(HADDR, `UART_BASE, `UART_RANGE, `UART_SUPPORTED, AccessRW, HSIZE, 4'b0001, HSELRegions[1]); - adrdec plicdec(HADDR, `PLIC_BASE, `PLIC_RANGE, `PLIC_SUPPORTED, AccessRW, HSIZE, 4'b0100, HSELRegions[0]); + adrdec boottimdec(PhysicalAddress, `BOOTTIM_BASE, `BOOTTIM_RANGE, `BOOTTIM_SUPPORTED, 1'b1/*AccessRX*/, Size, 4'b1111, SelRegions[5]); + adrdec timdec(PhysicalAddress, `TIM_BASE, `TIM_RANGE, `TIM_SUPPORTED, 1'b1/*AccessRWX*/, Size, 4'b1111, SelRegions[4]); + adrdec clintdec(PhysicalAddress, `CLINT_BASE, `CLINT_RANGE, `CLINT_SUPPORTED, AccessRW, Size, 4'b1111, SelRegions[3]); + adrdec gpiodec(PhysicalAddress, `GPIO_BASE, `GPIO_RANGE, `GPIO_SUPPORTED, AccessRW, Size, 4'b0100, SelRegions[2]); + adrdec uartdec(PhysicalAddress, `UART_BASE, `UART_RANGE, `UART_SUPPORTED, AccessRW, Size, 4'b0001, SelRegions[1]); + adrdec plicdec(PhysicalAddress, `PLIC_BASE, `PLIC_RANGE, `PLIC_SUPPORTED, AccessRW, Size, 4'b0100, SelRegions[0]); + endmodule diff --git a/wally-pipelined/src/mmu/mmu.sv b/wally-pipelined/src/mmu/mmu.sv index ff315f12..8082d01a 100644 --- a/wally-pipelined/src/mmu/mmu.sv +++ b/wally-pipelined/src/mmu/mmu.sv @@ -44,6 +44,7 @@ module mmu #(parameter ENTRY_BITS = 3, // x1 - TLB is accessed for a write // 11 - TLB is accessed for both read and write input logic [1:0] TLBAccessType, + input logic DisableTranslation, // Virtual address input input logic [`XLEN-1:0] VirtualAddress, @@ -66,17 +67,14 @@ module mmu #(parameter ENTRY_BITS = 3, output logic TLBPageFault, // PMA checker signals - input logic [31:0] HADDR, - input logic [2:0] HSIZE, HBURST, - input logic HWRITE, input logic AtomicAccessM, ExecuteAccessF, WriteAccessM, ReadAccessM, - input logic [63:0] PMPCFG01_REGW, PMPCFG23_REGW, // *** all of these come from the privileged unit, so thwyre gonna have to come over into ifu and dmem - input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0], + input var logic [63:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES/8-1:0], + input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0], output logic SquashBusAccess, // *** send to privileged unit output logic PMPInstrAccessFaultF, PMPLoadAccessFaultM, PMPStoreAccessFaultM, - output logic PMAInstrAccessFaultF, PMALoadAccessFaultM, PMAStoreAccessFaultM, - output logic [5:0] HSELRegions + output logic PMAInstrAccessFaultF, PMALoadAccessFaultM, PMAStoreAccessFaultM +// output logic [5:0] SelRegions ); @@ -96,4 +94,4 @@ module mmu #(parameter ENTRY_BITS = 3, assign SquashBusAccess = PMASquashBusAccess || PMPSquashBusAccess; -endmodule \ No newline at end of file +endmodule diff --git a/wally-pipelined/src/mmu/pagetablewalker.sv b/wally-pipelined/src/mmu/pagetablewalker.sv index 70ca1ac3..d0d2152f 100644 --- a/wally-pipelined/src/mmu/pagetablewalker.sv +++ b/wally-pipelined/src/mmu/pagetablewalker.sv @@ -30,61 +30,92 @@ `include "wally-config.vh" /* *** - TO-DO: - - Implement faults on accessed/dirty behavior -*/ + TO-DO: + - Implement faults on accessed/dirty behavior + */ -module pagetablewalker ( - // Control signals - input logic HCLK, HRESETn, - input logic [`XLEN-1:0] SATP_REGW, +module pagetablewalker + ( + // Control signals + input logic clk, reset, + input logic [`XLEN-1:0] SATP_REGW, - // Signals from TLBs (addresses to translate) - input logic [`XLEN-1:0] PCF, MemAdrM, - input logic ITLBMissF, DTLBMissM, - input logic [1:0] MemRWM, + // Signals from TLBs (addresses to translate) + input logic [`XLEN-1:0] PCF, MemAdrM, + input logic ITLBMissF, DTLBMissM, + input logic [1:0] MemRWM, - // Outputs to the TLBs (PTEs to write) - output logic [`XLEN-1:0] PageTableEntryF, PageTableEntryM, - output logic [1:0] PageTypeF, PageTypeM, - output logic ITLBWriteF, DTLBWriteM, + // Outputs to the TLBs (PTEs to write) + output logic [`XLEN-1:0] PageTableEntryF, PageTableEntryM, + output logic [1:0] PageTypeF, PageTypeM, + output logic ITLBWriteF, DTLBWriteM, - // Signals from ahblite (PTEs from memory) - input logic [`XLEN-1:0] MMUReadPTE, - input logic MMUReady, - // Signals to ahblite (memory addresses to access) - output logic [`XLEN-1:0] MMUPAdr, - output logic MMUTranslate, - // Stall signal - output logic MMUStall, - // Faults - output logic WalkerInstrPageFaultF, - output logic WalkerLoadPageFaultM, - output logic WalkerStorePageFaultM -); + // *** modify to send to LSU // *** KMG: These are inputs/results from the ahblite whose addresses should have already been checked, so I don't think they need to be sent through the LSU + input logic [`XLEN-1:0] MMUReadPTE, + input logic MMUReady, + input logic HPTWStall, + + // *** modify to send to LSU + output logic [`XLEN-1:0] MMUPAdr, // this probalby should be `PA_BITS wide + output logic MMUTranslate, // *** rename to HPTWReq + output logic HPTWRead, + + + + + // Stall signal + output logic MMUStall, + + // Faults + output logic WalkerInstrPageFaultF, + output logic WalkerLoadPageFaultM, + output logic WalkerStorePageFaultM + ); // Internal signals - logic [`PPN_BITS-1:0] BasePageTablePPN; - logic [`XLEN-1:0] TranslationVAdr; - logic [`XLEN-1:0] SavedPTE, CurrentPTE; - logic [`PA_BITS-1:0] TranslationPAdr; - logic [`PPN_BITS-1:0] CurrentPPN; + // register TLBs translation miss requests + logic [`XLEN-1:0] TranslationVAdrQ; + logic ITLBMissFQ, DTLBMissMQ; + + logic [`PPN_BITS-1:0] BasePageTablePPN; + logic [`XLEN-1:0] TranslationVAdr; + logic [`XLEN-1:0] SavedPTE, CurrentPTE; + logic [`PA_BITS-1:0] TranslationPAdr; + logic [`PPN_BITS-1:0] CurrentPPN; logic [`SVMODE_BITS-1:0] SvMode; - logic MemStore; + logic MemStore; // PTE Control Bits - logic Dirty, Accessed, Global, User, - Executable, Writable, Readable, Valid; + logic Dirty, Accessed, Global, User, + Executable, Writable, Readable, Valid; // PTE descriptions - logic ValidPTE, AccessAlert, MegapageMisaligned, BadMegapage, LeafPTE; + logic ValidPTE, AccessAlert, MegapageMisaligned, BadMegapage, LeafPTE; // Outputs of walker - logic [`XLEN-1:0] PageTableEntry; - logic [1:0] PageType; + logic [`XLEN-1:0] PageTableEntry; + logic [1:0] PageType; + logic StartWalk; + logic EndWalk; + + typedef enum {LEVEL0_WDV, + LEVEL0, + LEVEL1_WDV, + LEVEL1, + LEVEL2_WDV, + LEVEL2, + LEVEL3_WDV, + LEVEL3, + LEAF, + IDLE, + FAULT} statetype; + statetype WalkerState, NextWalkerState; + + logic PRegEn; + assign SvMode = SATP_REGW[`XLEN-1:`XLEN-`SVMODE_BITS]; assign BasePageTablePPN = SATP_REGW[`PPN_BITS-1:0]; @@ -92,8 +123,41 @@ module pagetablewalker ( assign MemStore = MemRWM[0]; // Prefer data address translations over instruction address translations - assign TranslationVAdr = (DTLBMissM) ? MemAdrM : PCF; - assign MMUTranslate = DTLBMissM || ITLBMissF; + assign TranslationVAdr = (DTLBMissM) ? MemAdrM : PCF; // *** need to register TranslationVAdr + flopenr #(`XLEN) + TranslationVAdrReg(.clk(clk), + .reset(reset), + .en(StartWalk), // *** use enable later to save power + .d(TranslationVAdr), + .q(TranslationVAdrQ)); + + flopenrc #(1) + DTLBMissMReg(.clk(clk), + .reset(reset), + .en(StartWalk | EndWalk), + .clear(EndWalk), + .d(DTLBMissM), + .q(DTLBMissMQ)); + + flopenrc #(1) + ITLBMissMReg(.clk(clk), + .reset(reset), + .en(StartWalk | EndWalk), + .clear(EndWalk), + .d(ITLBMissF), + .q(ITLBMissFQ)); + + + assign StartWalk = WalkerState == IDLE && (DTLBMissM | ITLBMissF); + assign EndWalk = WalkerState == LEAF || + //(WalkerState == LEVEL0 && ValidPTE && LeafPTE && ~AccessAlert) || + (WalkerState == LEVEL1 && ValidPTE && LeafPTE && ~AccessAlert) || + (WalkerState == LEVEL2 && ValidPTE && LeafPTE && ~AccessAlert) || + (WalkerState == LEVEL3 && ValidPTE && LeafPTE && ~AccessAlert) || + (WalkerState == FAULT); + + assign MMUTranslate = (DTLBMissMQ | ITLBMissFQ) & ~EndWalk; + //assign MMUTranslate = DTLBMissM | ITLBMissF; // unswizzle PTE bits assign {Dirty, Accessed, Global, User, @@ -102,7 +166,7 @@ module pagetablewalker ( // Assign PTE descriptors common across all XLEN values assign LeafPTE = Executable | Writable | Readable; assign ValidPTE = Valid && ~(Writable && ~Readable); - assign AccessAlert = ~Accessed || (MemStore && ~Dirty); + assign AccessAlert = ~Accessed | (MemStore & ~Dirty); // Assign specific outputs to general outputs assign PageTableEntryF = PageTableEntry; @@ -110,43 +174,113 @@ module pagetablewalker ( assign PageTypeF = PageType; assign PageTypeM = PageType; -localparam LEVEL0 = 3'h0; - localparam LEVEL1 = 3'h1; - // space left for more levels - localparam LEAF = 3'h5; - localparam IDLE = 3'h6; - localparam FAULT = 3'h7; - - logic [2:0] WalkerState, NextWalkerState; generate if (`XLEN == 32) begin logic [9:0] VPN1, VPN0; - flopenl #(3) mmureg(HCLK, ~HRESETn, 1'b1, NextWalkerState, IDLE, WalkerState); + flopenl #(.TYPE(statetype)) mmureg(clk, reset, 1'b1, NextWalkerState, IDLE, WalkerState); + +/* -----\/----- EXCLUDED -----\/----- + assign PRegEn = (WalkerState == LEVEL1_WDV || WalkerState == LEVEL0_WDV) && ~HPTWStall; + -----/\----- EXCLUDED -----/\----- */ // State transition logic always_comb begin + PRegEn = 1'b0; + TranslationPAdr = '0; + HPTWRead = 1'b0; + MMUStall = 1'b1; + PageTableEntry = '0; + PageType = '0; + DTLBWriteM = '0; + ITLBWriteF = '0; + + WalkerInstrPageFaultF = 1'b0; + WalkerLoadPageFaultM = 1'b0; + WalkerStorePageFaultM = 1'b0; + case (WalkerState) - IDLE: if (MMUTranslate) NextWalkerState = LEVEL1; - else NextWalkerState = IDLE; - LEVEL1: if (~MMUReady) NextWalkerState = LEVEL1; - // *** According to the architecture, we should - // fault upon finding a superpage that is misaligned or has 0 - // access bit. The following commented line of code is - // supposed to perform that check. However, it is untested. - else if (ValidPTE && LeafPTE && ~BadMegapage) NextWalkerState = LEAF; - // else if (ValidPTE && LeafPTE) NextWalkerState = LEAF; // *** Once the above line is properly tested, delete this line. - else if (ValidPTE && ~LeafPTE) NextWalkerState = LEVEL0; - else NextWalkerState = FAULT; - LEVEL0: if (~MMUReady) NextWalkerState = LEVEL0; - else if (ValidPTE && LeafPTE && ~AccessAlert) - NextWalkerState = LEAF; - else NextWalkerState = FAULT; - LEAF: if (MMUTranslate) NextWalkerState = LEVEL1; - else NextWalkerState = IDLE; - FAULT: if (MMUTranslate) NextWalkerState = LEVEL1; - else NextWalkerState = IDLE; + IDLE: begin + if (MMUTranslate && SvMode == `SV32) begin // *** Added SvMode + NextWalkerState = LEVEL1_WDV; + TranslationPAdr = {BasePageTablePPN, VPN1, 2'b00}; + HPTWRead = 1'b1; + end else begin + NextWalkerState = IDLE; + TranslationPAdr = '0; + MMUStall = 1'b0; + end + end + + LEVEL1_WDV: begin + TranslationPAdr = {BasePageTablePPN, VPN1, 2'b00}; + if (HPTWStall) begin + NextWalkerState = LEVEL1_WDV; + end else begin + NextWalkerState = LEVEL1; + PRegEn = 1'b1; + end + end + + LEVEL1: begin + // *** According to the architecture, we should + // fault upon finding a superpage that is misaligned or has 0 + // access bit. The following commented line of code is + // supposed to perform that check. However, it is untested. + if (ValidPTE && LeafPTE && ~BadMegapage) begin + NextWalkerState = LEAF; + PageTableEntry = CurrentPTE; + PageType = (WalkerState == LEVEL1) ? 2'b01 : 2'b00; // *** not sure about this mux? + DTLBWriteM = DTLBMissMQ; + ITLBWriteF = ~DTLBMissMQ; // Prefer data over instructions + TranslationPAdr = {2'b00, TranslationVAdrQ[31:0]}; + end + // else if (ValidPTE && LeafPTE) NextWalkerState = LEAF; // *** Once the above line is properly tested, delete this line. + else if (ValidPTE && ~LeafPTE) begin + NextWalkerState = LEVEL0_WDV; + TranslationPAdr = {CurrentPPN, VPN0, 2'b00}; + HPTWRead = 1'b1; + end else begin + NextWalkerState = FAULT; + end + end + + LEVEL0_WDV: begin + TranslationPAdr = {CurrentPPN, VPN0, 2'b00}; + if (HPTWStall) begin + NextWalkerState = LEVEL0_WDV; + end else begin + NextWalkerState = LEVEL0; + PRegEn = 1'b1; + end + end + + LEVEL0: begin + if (ValidPTE & LeafPTE & ~AccessAlert) begin + NextWalkerState = LEAF; + PageTableEntry = CurrentPTE; + PageType = (WalkerState == LEVEL1) ? 2'b01 : 2'b00; + DTLBWriteM = DTLBMissMQ; + ITLBWriteF = ~DTLBMissMQ; // Prefer data over instructions + TranslationPAdr = {2'b00, TranslationVAdrQ[31:0]}; + end else begin + NextWalkerState = FAULT; + end + end + + LEAF: begin + NextWalkerState = IDLE; + MMUStall = 1'b0; + end + FAULT: begin + NextWalkerState = IDLE; + WalkerInstrPageFaultF = ~DTLBMissMQ; + WalkerLoadPageFaultM = DTLBMissMQ && ~MemStore; + WalkerStorePageFaultM = DTLBMissMQ && MemStore; + MMUStall = 1'b0; + end + // Default case should never happen, but is included for linter. default: NextWalkerState = IDLE; endcase @@ -156,56 +290,23 @@ localparam LEVEL0 = 3'h0; assign MegapageMisaligned = |(CurrentPPN[9:0]); assign BadMegapage = MegapageMisaligned || AccessAlert; // *** Implement better access/dirty scheme - assign VPN1 = TranslationVAdr[31:22]; - assign VPN0 = TranslationVAdr[21:12]; + assign VPN1 = TranslationVAdrQ[31:22]; + assign VPN0 = TranslationVAdrQ[21:12]; - // Assign combinational outputs - always_comb begin - // default values - TranslationPAdr = '0; - PageTableEntry = '0; - PageType ='0; - DTLBWriteM = '0; - ITLBWriteF = '0; - WalkerInstrPageFaultF = '0; - WalkerLoadPageFaultM = '0; - WalkerStorePageFaultM = '0; - MMUStall = '1; + //assign HPTWRead = (WalkerState == IDLE && MMUTranslate) || + // WalkerState == LEVEL2 || WalkerState == LEVEL1; + - case (NextWalkerState) - IDLE: begin - MMUStall = '0; - end - LEVEL1: begin - TranslationPAdr = {BasePageTablePPN, VPN1, 2'b00}; - end - LEVEL0: begin - TranslationPAdr = {CurrentPPN, VPN0, 2'b00}; - end - LEAF: begin - // Keep physical address alive to prevent HADDR dropping to 0 - TranslationPAdr = {CurrentPPN, VPN0, 2'b00}; - PageTableEntry = CurrentPTE; - PageType = (WalkerState == LEVEL1) ? 2'b01 : 2'b00; - DTLBWriteM = DTLBMissM; - ITLBWriteF = ~DTLBMissM; // Prefer data over instructions - end - FAULT: begin - TranslationPAdr = {CurrentPPN, VPN0, 2'b00}; - WalkerInstrPageFaultF = ~DTLBMissM; - WalkerLoadPageFaultM = DTLBMissM && ~MemStore; - WalkerStorePageFaultM = DTLBMissM && MemStore; - MMUStall = '0; // Drop the stall early to enter trap handling code - end - default: begin - // nothing - end - endcase - end - - // Capture page table entry from ahblite - flopenr #(32) ptereg(HCLK, ~HRESETn, MMUReady, MMUReadPTE, SavedPTE); - mux2 #(32) ptemux(SavedPTE, MMUReadPTE, MMUReady, CurrentPTE); + // Capture page table entry from data cache + // *** may need to delay reading this value until the next clock cycle. + // The clk to q latency of the SRAM in the data cache will be long. + // I cannot see directly using this value. This is no different than + // a load delay hazard. This will require rewriting the walker fsm. + // also need a new signal to save. Should be a mealy output of the fsm + // request followed by ~stall. + flopenr #(32) ptereg(clk, reset, PRegEn, MMUReadPTE, SavedPTE); + //mux2 #(32) ptemux(SavedPTE, MMUReadPTE, PRegEn, CurrentPTE); + assign CurrentPTE = SavedPTE; assign CurrentPPN = CurrentPTE[`PPN_BITS+9:10]; // Assign outputs to ahblite @@ -214,64 +315,206 @@ localparam LEVEL0 = 3'h0; assign MMUPAdr = TranslationPAdr[31:0]; end else begin - localparam LEVEL2 = 3'h2; - localparam LEVEL3 = 3'h3; logic [8:0] VPN3, VPN2, VPN1, VPN0; - logic TerapageMisaligned, GigapageMisaligned, BadTerapage, BadGigapage; + logic TerapageMisaligned, GigapageMisaligned, BadTerapage, BadGigapage; - flopenl #(3) mmureg(HCLK, ~HRESETn, 1'b1, NextWalkerState, IDLE, WalkerState); + flopenl #(.TYPE(statetype)) mmureg(clk, reset, 1'b1, NextWalkerState, IDLE, WalkerState); + + /* -----\/----- EXCLUDED -----\/----- + assign PRegEn = (WalkerState == LEVEL1_WDV || WalkerState == LEVEL0_WDV || + WalkerState == LEVEL2_WDV || WalkerState == LEVEL3_WDV) && ~HPTWStall; + -----/\----- EXCLUDED -----/\----- */ + + //assign HPTWRead = (WalkerState == IDLE && MMUTranslate) || WalkerState == LEVEL3 || + // WalkerState == LEVEL2 || WalkerState == LEVEL1; + always_comb begin + PRegEn = 1'b0; + TranslationPAdr = '0; + HPTWRead = 1'b0; + MMUStall = 1'b1; + PageTableEntry = '0; + PageType = '0; + DTLBWriteM = '0; + ITLBWriteF = '0; + + WalkerInstrPageFaultF = 1'b0; + WalkerLoadPageFaultM = 1'b0; + WalkerStorePageFaultM = 1'b0; + case (WalkerState) - IDLE: if (MMUTranslate && SvMode == `SV48) NextWalkerState = LEVEL3; - else if (MMUTranslate && SvMode == `SV39) NextWalkerState = LEVEL2; - else NextWalkerState = IDLE; + IDLE: begin + if (MMUTranslate && SvMode == `SV48) begin + NextWalkerState = LEVEL3_WDV; + TranslationPAdr = {BasePageTablePPN, VPN3, 3'b000}; + HPTWRead = 1'b1; + end else if (MMUTranslate && SvMode == `SV39) begin + NextWalkerState = LEVEL2_WDV; + TranslationPAdr = {BasePageTablePPN, VPN2, 3'b000}; + HPTWRead = 1'b1; + end else begin + NextWalkerState = IDLE; + TranslationPAdr = '0; + MMUStall = 1'b0; + end + end - LEVEL3: if (~MMUReady) NextWalkerState = LEVEL3; - // *** According to the architecture, we should - // fault upon finding a superpage that is misaligned or has 0 - // access bit. The following commented line of code is - // supposed to perform that check. However, it is untested. - else if (ValidPTE && LeafPTE && ~BadTerapage) NextWalkerState = LEAF; - // else if (ValidPTE && LeafPTE) NextWalkerState = LEAF; // *** Once the above line is properly tested, delete this line. - else if (ValidPTE && ~LeafPTE) NextWalkerState = LEVEL2; - else NextWalkerState = FAULT; + LEVEL3_WDV: begin + TranslationPAdr = {BasePageTablePPN, VPN3, 3'b000}; + if (HPTWStall) begin + NextWalkerState = LEVEL3_WDV; + end else begin + NextWalkerState = LEVEL3; + PRegEn = 1'b1; + end + end + + LEVEL3: begin + // *** According to the architecture, we should + // fault upon finding a superpage that is misaligned or has 0 + // access bit. The following commented line of code is + // supposed to perform that check. However, it is untested. + if (ValidPTE && LeafPTE && ~BadTerapage) begin + NextWalkerState = LEAF; + PageTableEntry = CurrentPTE; + PageType = (WalkerState == LEVEL3) ? 2'b11 : // *** not sure about this mux? + ((WalkerState == LEVEL2) ? 2'b10 : + ((WalkerState == LEVEL1) ? 2'b01 : 2'b00)); + DTLBWriteM = DTLBMissMQ; + ITLBWriteF = ~DTLBMissMQ; // Prefer data over instructions + TranslationPAdr = TranslationVAdrQ[`PA_BITS-1:0]; + end + // else if (ValidPTE && LeafPTE) NextWalkerState = LEAF; // *** Once the above line is properly tested, delete this line. + else if (ValidPTE && ~LeafPTE) begin + NextWalkerState = LEVEL2_WDV; + TranslationPAdr = {(SvMode == `SV48) ? CurrentPPN : BasePageTablePPN, VPN2, 3'b000}; + HPTWRead = 1'b1; + end else begin + NextWalkerState = FAULT; + end - LEVEL2: if (~MMUReady) NextWalkerState = LEVEL2; - // *** According to the architecture, we should - // fault upon finding a superpage that is misaligned or has 0 - // access bit. The following commented line of code is - // supposed to perform that check. However, it is untested. - else if (ValidPTE && LeafPTE && ~BadGigapage) NextWalkerState = LEAF; - // else if (ValidPTE && LeafPTE) NextWalkerState = LEAF; // *** Once the above line is properly tested, delete this line. - else if (ValidPTE && ~LeafPTE) NextWalkerState = LEVEL1; - else NextWalkerState = FAULT; + end - LEVEL1: if (~MMUReady) NextWalkerState = LEVEL1; - // *** According to the architecture, we should - // fault upon finding a superpage that is misaligned or has 0 - // access bit. The following commented line of code is - // supposed to perform that check. However, it is untested. - else if (ValidPTE && LeafPTE && ~BadMegapage) NextWalkerState = LEAF; - // else if (ValidPTE && LeafPTE) NextWalkerState = LEAF; // *** Once the above line is properly tested, delete this line. - else if (ValidPTE && ~LeafPTE) NextWalkerState = LEVEL0; - else NextWalkerState = FAULT; + LEVEL2_WDV: begin + TranslationPAdr = {(SvMode == `SV48) ? CurrentPPN : BasePageTablePPN, VPN2, 3'b000}; + //HPTWRead = 1'b1; + if (HPTWStall) begin + NextWalkerState = LEVEL2_WDV; + end else begin + NextWalkerState = LEVEL2; + PRegEn = 1'b1; + end + end + + LEVEL2: begin + // *** According to the architecture, we should + // fault upon finding a superpage that is misaligned or has 0 + // access bit. The following commented line of code is + // supposed to perform that check. However, it is untested. + if (ValidPTE && LeafPTE && ~BadGigapage) begin + NextWalkerState = LEAF; + PageTableEntry = CurrentPTE; + PageType = (WalkerState == LEVEL3) ? 2'b11 : + ((WalkerState == LEVEL2) ? 2'b10 : + ((WalkerState == LEVEL1) ? 2'b01 : 2'b00)); + DTLBWriteM = DTLBMissMQ; + ITLBWriteF = ~DTLBMissMQ; // Prefer data over instructions + TranslationPAdr = TranslationVAdrQ[`PA_BITS-1:0]; + end + // else if (ValidPTE && LeafPTE) NextWalkerState = LEAF; // *** Once the above line is properly tested, delete this line. + else if (ValidPTE && ~LeafPTE) begin + NextWalkerState = LEVEL1_WDV; + TranslationPAdr = {CurrentPPN, VPN1, 3'b000}; + HPTWRead = 1'b1; + end else begin + NextWalkerState = FAULT; + end - LEVEL0: if (~MMUReady) NextWalkerState = LEVEL0; - else if (ValidPTE && LeafPTE && ~AccessAlert) NextWalkerState = LEAF; - else NextWalkerState = FAULT; - - LEAF: if (MMUTranslate && SvMode == `SV48) NextWalkerState = LEVEL3; - else if (MMUTranslate && SvMode == `SV39) NextWalkerState = LEVEL2; - else NextWalkerState = IDLE; + end + + LEVEL1_WDV: begin + TranslationPAdr = {CurrentPPN, VPN1, 3'b000}; + //HPTWRead = 1'b1; + if (HPTWStall) begin + NextWalkerState = LEVEL1_WDV; + end else begin + NextWalkerState = LEVEL1; + PRegEn = 1'b1; + end + end + + LEVEL1: begin + // *** According to the architecture, we should + // fault upon finding a superpage that is misaligned or has 0 + // access bit. The following commented line of code is + // supposed to perform that check. However, it is untested. + if (ValidPTE && LeafPTE && ~BadMegapage) begin + NextWalkerState = LEAF; + PageTableEntry = CurrentPTE; + PageType = (WalkerState == LEVEL3) ? 2'b11 : + ((WalkerState == LEVEL2) ? 2'b10 : + ((WalkerState == LEVEL1) ? 2'b01 : 2'b00)); + DTLBWriteM = DTLBMissMQ; + ITLBWriteF = ~DTLBMissMQ; // Prefer data over instructions + TranslationPAdr = TranslationVAdrQ[`PA_BITS-1:0]; + + end + // else if (ValidPTE && LeafPTE) NextWalkerState = LEAF; // *** Once the above line is properly tested, delete this line. + else if (ValidPTE && ~LeafPTE) begin + NextWalkerState = LEVEL0_WDV; + TranslationPAdr = {CurrentPPN, VPN0, 3'b000}; + HPTWRead = 1'b1; + end else begin + NextWalkerState = FAULT; + end + end + + LEVEL0_WDV: begin + TranslationPAdr = {CurrentPPN, VPN0, 3'b000}; + if (HPTWStall) begin + NextWalkerState = LEVEL0_WDV; + end else begin + NextWalkerState = LEVEL0; + PRegEn = 1'b1; + end + end + + LEVEL0: begin + if (ValidPTE && LeafPTE && ~AccessAlert) begin + NextWalkerState = LEAF; + PageTableEntry = CurrentPTE; + PageType = (WalkerState == LEVEL3) ? 2'b11 : + ((WalkerState == LEVEL2) ? 2'b10 : + ((WalkerState == LEVEL1) ? 2'b01 : 2'b00)); + DTLBWriteM = DTLBMissMQ; + ITLBWriteF = ~DTLBMissMQ; // Prefer data over instructions + TranslationPAdr = TranslationVAdrQ[`PA_BITS-1:0]; + end else begin + NextWalkerState = FAULT; + end + end + + LEAF: begin + NextWalkerState = IDLE; + MMUStall = 1'b0; + end + + FAULT: begin + NextWalkerState = IDLE; + WalkerInstrPageFaultF = ~DTLBMissMQ; + WalkerLoadPageFaultM = DTLBMissMQ && ~MemStore; + WalkerStorePageFaultM = DTLBMissMQ && MemStore; + MMUStall = 1'b0; + end + + // Default case should never happen + default: begin + NextWalkerState = IDLE; + end - FAULT: if (MMUTranslate && SvMode == `SV48) NextWalkerState = LEVEL3; - else if (MMUTranslate && SvMode == `SV39) NextWalkerState = LEVEL2; - else NextWalkerState = IDLE; - // Default case should never happen, but is included for linter. - default: NextWalkerState = IDLE; endcase end @@ -288,76 +531,22 @@ localparam LEVEL0 = 3'h0; assign BadGigapage = GigapageMisaligned || AccessAlert; // *** Implement better access/dirty scheme assign BadMegapage = MegapageMisaligned || AccessAlert; // *** Implement better access/dirty scheme - assign VPN3 = TranslationVAdr[47:39]; - assign VPN2 = TranslationVAdr[38:30]; - assign VPN1 = TranslationVAdr[29:21]; - assign VPN0 = TranslationVAdr[20:12]; + assign VPN3 = TranslationVAdrQ[47:39]; + assign VPN2 = TranslationVAdrQ[38:30]; + assign VPN1 = TranslationVAdrQ[29:21]; + assign VPN0 = TranslationVAdrQ[20:12]; - always_comb begin - // default values - TranslationPAdr = '0; - PageTableEntry = '0; - PageType = '0; - DTLBWriteM = '0; - ITLBWriteF = '0; - WalkerInstrPageFaultF = '0; - WalkerLoadPageFaultM = '0; - WalkerStorePageFaultM = '0; - - // The MMU defaults to stalling the processor - MMUStall = '1; - - case (NextWalkerState) - IDLE: begin - MMUStall = '0; - end - LEVEL3: begin - TranslationPAdr = {BasePageTablePPN, VPN3, 3'b000}; - // *** this is a huge breaking point. if we're going through level3 every time, even when sv48 is off, - // what should translationPAdr be when level3 is just off? - end - LEVEL2: begin - TranslationPAdr = {(SvMode == `SV48) ? CurrentPPN : BasePageTablePPN, VPN2, 3'b000}; - end - LEVEL1: begin - TranslationPAdr = {CurrentPPN, VPN1, 3'b000}; - end - LEVEL0: begin - TranslationPAdr = {CurrentPPN, VPN0, 3'b000}; - end - LEAF: begin - // Keep physical address alive to prevent HADDR dropping to 0 - TranslationPAdr = {CurrentPPN, VPN0, 3'b000}; - PageTableEntry = CurrentPTE; - PageType = (WalkerState == LEVEL3) ? 2'b11 : - ((WalkerState == LEVEL2) ? 2'b10 : - ((WalkerState == LEVEL1) ? 2'b01 : 2'b00)); - DTLBWriteM = DTLBMissM; - ITLBWriteF = ~DTLBMissM; // Prefer data over instructions - end - FAULT: begin - // Keep physical address alive to prevent HADDR dropping to 0 - TranslationPAdr = {CurrentPPN, VPN0, 3'b000}; - WalkerInstrPageFaultF = ~DTLBMissM; - WalkerLoadPageFaultM = DTLBMissM && ~MemStore; - WalkerStorePageFaultM = DTLBMissM && MemStore; - MMUStall = '0; // Drop the stall early to enter trap handling code - end - default: begin - // nothing - end - endcase - end // Capture page table entry from ahblite - flopenr #(`XLEN) ptereg(HCLK, ~HRESETn, MMUReady, MMUReadPTE, SavedPTE); - mux2 #(`XLEN) ptemux(SavedPTE, MMUReadPTE, MMUReady, CurrentPTE); + flopenr #(`XLEN) ptereg(clk, reset, PRegEn, MMUReadPTE, SavedPTE); + //mux2 #(`XLEN) ptemux(SavedPTE, MMUReadPTE, PRegEn, CurrentPTE); + assign CurrentPTE = SavedPTE; assign CurrentPPN = CurrentPTE[`PPN_BITS+9:10]; // Assign outputs to ahblite // *** Currently truncate address to 32 bits. This must be changed if // we support larger physical address spaces - assign MMUPAdr = {{(`XLEN-32){1'b0}}, TranslationPAdr[31:0]}; + assign MMUPAdr = {{(`XLEN-`PA_BITS){1'b0}}, TranslationPAdr[`PA_BITS-1:0]}; end endgenerate diff --git a/wally-pipelined/src/mmu/pmachecker.sv b/wally-pipelined/src/mmu/pmachecker.sv index 1d8cc3ee..26d8ac87 100644 --- a/wally-pipelined/src/mmu/pmachecker.sv +++ b/wally-pipelined/src/mmu/pmachecker.sv @@ -32,25 +32,20 @@ module pmachecker ( input logic [`PA_BITS-1:0] PhysicalAddress, input logic [1:0] Size, - input logic [31:0] HADDR, - input logic [2:0] HSIZE, -// input logic [2:0] HBURST, // *** in AHBlite, HBURST is hardwired to zero for single bursts only allowed. consider removing from this module if unused. input logic AtomicAccessM, ExecuteAccessF, WriteAccessM, ReadAccessM, // *** atomicaccessM is unused but might want to stay in for future use. output logic Cacheable, Idempotent, AtomicAllowed, output logic PMASquashBusAccess, - output logic [5:0] HSELRegions, - output logic PMAInstrAccessFaultF, output logic PMALoadAccessFaultM, output logic PMAStoreAccessFaultM ); - // logic BootTim, Tim, CLINT, GPIO, UART, PLIC; logic PMAAccessFault; logic AccessRW, AccessRWX, AccessRX; + logic [5:0] SelRegions; // Determine what type of access is being made assign AccessRW = ReadAccessM | WriteAccessM; @@ -58,17 +53,18 @@ module pmachecker ( assign AccessRX = ReadAccessM | ExecuteAccessF; // Determine which region of physical memory (if any) is being accessed - adrdecs adrdecs(HADDR, AccessRW, AccessRX, AccessRWX, HSIZE, HSELRegions); + adrdecs adrdecs(PhysicalAddress, AccessRW, AccessRX, AccessRWX, Size, SelRegions); // Only RAM memory regions are cacheable - assign Cacheable = HSELRegions[5] | HSELRegions[4]; - assign Idempotent = HSELRegions[4]; - assign AtomicAllowed = HSELRegions[4]; + assign Cacheable = SelRegions[5] | SelRegions[4]; + assign Idempotent = SelRegions[4]; + assign AtomicAllowed = SelRegions[4]; // Detect access faults - assign PMAAccessFault = (~|HSELRegions) && AccessRWX; + assign PMAAccessFault = (~|SelRegions) & AccessRWX; assign PMAInstrAccessFaultF = ExecuteAccessF && PMAAccessFault; assign PMALoadAccessFaultM = ReadAccessM && PMAAccessFault; assign PMAStoreAccessFaultM = WriteAccessM && PMAAccessFault; assign PMASquashBusAccess = PMAAccessFault; endmodule + diff --git a/wally-pipelined/src/mmu/pmpadrdec.sv b/wally-pipelined/src/mmu/pmpadrdec.sv index 87f5d8f1..50d399ae 100644 --- a/wally-pipelined/src/mmu/pmpadrdec.sv +++ b/wally-pipelined/src/mmu/pmpadrdec.sv @@ -30,35 +30,43 @@ `include "wally-config.vh" module pmpadrdec ( - input logic [31:0] HADDR, // *** replace with PAdr - input logic [1:0] AdrMode, - input logic [`XLEN-1:0] CurrentPMPAdr, - input logic AdrAtLeastPreviousPMP, - output logic AdrAtLeastCurrentPMP, - output logic Match + input logic [`PA_BITS-1:0] PhysicalAddress, + input logic [7:0] PMPCfg, + input logic [`XLEN-1:0] PMPAdr, + input logic PAgePMPAdrIn, + input logic NoLowerMatchIn, + output logic PAgePMPAdrOut, + output logic NoLowerMatchOut, + output logic Match, Active, + output logic L, X, W, R ); + localparam TOR = 2'b01; localparam NA4 = 2'b10; localparam NAPOT = 2'b11; logic TORMatch, NAMatch; - logic AdrBelowCurrentPMP; + logic PAltPMPAdr; + logic FirstMatch; logic [`PA_BITS-1:0] CurrentAdrFull; - logic [`PA_BITS-1:0] FakePhysAdr; + logic [1:0] AdrMode; - // ***replace this when the true physical address from MMU is available - assign FakePhysAdr = {{(`PA_BITS-32){1'b0}}, HADDR}; + + assign AdrMode = PMPCfg[4:3]; + + // The two lsb of the physical address don't matter for this checking. + // The following code includes them, but hardwires the PMP checker lsbs to 00 + // and masks them later. Logic synthesis should optimize away these bottom bits. // Top-of-range (TOR) // Append two implicit trailing 0's to PMPAdr value - assign CurrentAdrFull = {CurrentPMPAdr[`PA_BITS-3:0], 2'b00}; - assign AdrBelowCurrentPMP = /*HADDR */FakePhysAdr < CurrentAdrFull; // *** make sure unsigned comparison works correctly - assign AdrAtLeastCurrentPMP = ~AdrBelowCurrentPMP; - assign TORMatch = AdrAtLeastPreviousPMP && AdrBelowCurrentPMP; + assign CurrentAdrFull = {PMPAdr[`PA_BITS-3:0], 2'b00}; + assign PAltPMPAdr = {1'b0, PhysicalAddress} < {1'b0, CurrentAdrFull}; // unsigned comparison + assign PAgePMPAdrOut = ~PAltPMPAdr; + assign TORMatch = PAgePMPAdrIn && PAltPMPAdr; // Naturally aligned regions - // *** should be able to optimize away bottom 2 bits // verilator lint_off UNOPTFLAT logic [`PA_BITS-1:0] Mask; @@ -69,69 +77,22 @@ module pmpadrdec ( assign Mask[1:0] = 2'b11; assign Mask[2] = (AdrMode == NAPOT); // mask has 0s in upper bis for NA4 region for (i=3; i < `PA_BITS; i=i+1) - assign Mask[i] = Mask[i-1] & CurrentPMPAdr[i-3]; // NAPOT mask: 1's indicate bits to ignore + assign Mask[i] = Mask[i-1] & PMPAdr[i-3]; // NAPOT mask: 1's indicate bits to ignore endgenerate // verilator lint_on UNOPTFLAT - assign NAMatch = &((FakePhysAdr ~^ CurrentAdrFull) | Mask); - - /* generate - if (`XLEN == 32 || `XLEN == 64) begin // ***redo for various sizes - // priority encoder to translate address to range - // *** We'd like to replace this with a better priority encoder - // *** We should not be truncating 64 bit physical addresses to 32 bits... - // *** there is an easy combinatinoal way to do this with a cascade of AND gates O(32) rather than O(32^2) dh - always_comb - if (AdrMode == NA4) Range = (2**2) - 1; - else casez (CurrentPMPAdr[31:0]) // NAPOT regions - 32'b???????????????????????????????0: Range = (2**3) - 1; - 32'b??????????????????????????????01: Range = (2**4) - 1; - 32'b?????????????????????????????011: Range = (2**5) - 1; - 32'b????????????????????????????0111: Range = (2**6) - 1; - 32'b???????????????????????????01111: Range = (2**7) - 1; - 32'b??????????????????????????011111: Range = (2**8) - 1; - 32'b?????????????????????????0111111: Range = (2**9) - 1; - 32'b????????????????????????01111111: Range = (2**10) - 1; - 32'b???????????????????????011111111: Range = (2**11) - 1; - 32'b??????????????????????0111111111: Range = (2**12) - 1; - 32'b?????????????????????01111111111: Range = (2**13) - 1; - 32'b????????????????????011111111111: Range = (2**14) - 1; - 32'b???????????????????0111111111111: Range = (2**15) - 1; - 32'b??????????????????01111111111111: Range = (2**16) - 1; - 32'b?????????????????011111111111111: Range = (2**17) - 1; - 32'b????????????????0111111111111111: Range = (2**18) - 1; - 32'b???????????????01111111111111111: Range = (2**19) - 1; - 32'b??????????????011111111111111111: Range = (2**20) - 1; - 32'b?????????????0111111111111111111: Range = (2**21) - 1; - 32'b????????????01111111111111111111: Range = (2**22) - 1; - 32'b???????????011111111111111111111: Range = (2**23) - 1; - 32'b??????????0111111111111111111111: Range = (2**24) - 1; - 32'b?????????01111111111111111111111: Range = (2**25) - 1; - 32'b????????011111111111111111111111: Range = (2**26) - 1; - 32'b???????0111111111111111111111111: Range = (2**27) - 1; - 32'b??????01111111111111111111111111: Range = (2**28) - 1; - 32'b?????011111111111111111111111111: Range = (2**29) - 1; - 32'b????0111111111111111111111111111: Range = (2**30) - 1; - 32'b???01111111111111111111111111111: Range = (2**31) - 1; - 32'b??011111111111111111111111111111: Range = (2**32) - 1; - 32'b?0111111111111111111111111111111: Range = (2**33) - 1; - 32'b01111111111111111111111111111111: Range = (2**34) - 1; - 32'b11111111111111111111111111111111: Range = (2**35) - 1; - default: Range = '0; - endcase - end else begin - assign Range = '0; - end - endgenerate - - // *** Range should not be truncated... but our physical address space is - // currently only 32 bits wide. - // with a bit of combining of range selection, this could be shared with NA4Match *** - assign NAMatch = &((HADDR ~^ CurrentAdrFull) | Range[31:0]);*/ + assign NAMatch = &((PhysicalAddress ~^ CurrentAdrFull) | Mask); assign Match = (AdrMode == TOR) ? TORMatch : (AdrMode == NA4 || AdrMode == NAPOT) ? NAMatch : 0; -endmodule + assign FirstMatch = NoLowerMatchIn & Match; + assign NoLowerMatchOut = NoLowerMatchIn & ~Match; + assign L = PMPCfg[7] & FirstMatch; + assign X = PMPCfg[2] & FirstMatch; + assign W = PMPCfg[1] & FirstMatch; + assign R = PMPCfg[0] & FirstMatch; + assign Active = |PMPCfg[4:3]; + endmodule diff --git a/wally-pipelined/src/mmu/pmpchecker.sv b/wally-pipelined/src/mmu/pmpchecker.sv index f88d56fa..27c7e508 100644 --- a/wally-pipelined/src/mmu/pmpchecker.sv +++ b/wally-pipelined/src/mmu/pmpchecker.sv @@ -29,13 +29,8 @@ `include "wally-config.vh" module pmpchecker ( -// input logic clk, reset, //*** it seems like clk, reset is also not needed here? - - input logic [31:0] HADDR, - - input logic [1:0] PrivilegeModeW, - - input logic [63:0] PMPCFG01_REGW, PMPCFG23_REGW, + input logic [`PA_BITS-1:0] PhysicalAddress, + input logic [1:0] PrivilegeModeW, // *** ModelSim has a switch -svinputport which controls whether input ports // are nets (wires) or vars by default. The default setting of this switch is @@ -44,10 +39,7 @@ module pmpchecker ( // this will be understood as a var. However, if we don't supply the `var` // keyword, the compiler warns us that it's interpreting the signal as a var, // which we might not intend. - // However, it's still bad form to pass 512 or 1024 signals across a module - // boundary. It would be better to store the PMP address registers in a module - // somewhere in the CSR hierarchy and do PMP checking _within_ that module, so - // we don't have to pass around 16 whole registers. + input var logic [63:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES/8-1:0], input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0], input logic ExecuteAccessF, WriteAccessM, ReadAccessM, @@ -59,98 +51,50 @@ module pmpchecker ( output logic PMPStoreAccessFaultM ); + // verilator lint_off UNOPTFLAT + // Bit i is high when the address falls in PMP region i - logic [15:0] Regions; - logic [3:0] MatchedRegion; - logic Match, EnforcePMP; + logic EnforcePMP; + logic [7:0] PMPCFG [`PMP_ENTRIES-1:0]; + logic [`PMP_ENTRIES-1:0] Match; // PMP Entry matches + logic [`PMP_ENTRIES-1:0] Active; // PMP register i is non-null + logic [`PMP_ENTRIES-1:0] L, X, W, R; // PMP matches and has flag set + logic [`PMP_ENTRIES:0] NoLowerMatch; // None of the lower PMP entries match + logic [`PMP_ENTRIES:0] PAgePMPAdr; // for TOR PMP matching, PhysicalAddress > PMPAdr[i] + genvar i,j; - logic [7:0] PMPCFG [15:0]; + assign PAgePMPAdr[0] = 1'b1; + assign NoLowerMatch[0] = 1'b1; + + generate + // verilator lint_off WIDTH + for (j=0; j<`PMP_ENTRIES; j = j+8) + assign {PMPCFG[j+7], PMPCFG[j+6], PMPCFG[j+5], PMPCFG[j+4], + PMPCFG[j+3], PMPCFG[j+2], PMPCFG[j+1], PMPCFG[j]} = PMPCFG_ARRAY_REGW[j/8]; + // verilator lint_on WIDTH + for (i=0; i<`PMP_ENTRIES; i++) + pmpadrdec pmpadrdec(.PhysicalAddress, + .PMPCfg(PMPCFG[i]), + .PMPAdr(PMPADDR_ARRAY_REGW[i]), + .PAgePMPAdrIn(PAgePMPAdr[i]), + .PAgePMPAdrOut(PAgePMPAdr[i+1]), + .NoLowerMatchIn(NoLowerMatch[i]), + .NoLowerMatchOut(NoLowerMatch[i+1]), + .Match(Match[i]), + .Active(Active[i]), + .L(L[i]), .X(X[i]), .W(W[i]), .R(R[i]) + ); - // Bit i is high when the address is greater than or equal to PMPADR[i] - // Used for determining whether TOR PMP regions match - logic [15:0] AboveRegion; - - // Bit i is high if PMP register i is non-null - logic [15:0] ActiveRegion; - - logic L_Bit, X_Bit, W_Bit, R_Bit; - logic InvalidExecute, InvalidWrite, InvalidRead; - - // *** extend to optionally 64 configurations - - assign {PMPCFG[15], PMPCFG[14], PMPCFG[13], PMPCFG[12], - PMPCFG[11], PMPCFG[10], PMPCFG[9], PMPCFG[8]} = PMPCFG23_REGW; - - assign {PMPCFG[7], PMPCFG[6], PMPCFG[5], PMPCFG[4], - PMPCFG[3], PMPCFG[2], PMPCFG[1], PMPCFG[0]} = PMPCFG01_REGW; - - pmpadrdec pmpadrdec(.HADDR(HADDR), .AdrMode(PMPCFG[0][4:3]), - .CurrentPMPAdr(PMPADDR_ARRAY_REGW[0]), - .AdrAtLeastPreviousPMP(1'b1), - .AdrAtLeastCurrentPMP(AboveRegion[0]), - .Match(Regions[0])); - assign ActiveRegion[0] = |PMPCFG[0][4:3]; - - generate // *** only for PMP_ENTRIES > 0 - genvar i; - for (i = 1; i < `PMP_ENTRIES; i++) begin - pmpadrdec pmpadrdec(.HADDR(HADDR), .AdrMode(PMPCFG[i][4:3]), - .CurrentPMPAdr(PMPADDR_ARRAY_REGW[i]), - .AdrAtLeastPreviousPMP(AboveRegion[i-1]), - .AdrAtLeastCurrentPMP(AboveRegion[i]), - .Match(Regions[i])); - - assign ActiveRegion[i] = |PMPCFG[i][4:3]; - end + // verilator lint_on UNOPTFLAT endgenerate - assign Match = |Regions; + // Only enforce PMP checking for S and U modes when at least one PMP is active or in Machine mode when L bit is set in selected region + assign EnforcePMP = (PrivilegeModeW == `M_MODE) ? |L : |Active; - // Only enforce PMP checking for S and U modes when at least one PMP is active - assign EnforcePMP = |ActiveRegion; + assign PMPInstrAccessFaultF = EnforcePMP && ExecuteAccessF && ~|X; + assign PMPStoreAccessFaultM = EnforcePMP && WriteAccessM && ~|W; + assign PMPLoadAccessFaultM = EnforcePMP && ReadAccessM && ~|R; - // *** extend to up to 64, fold bit extraction to avoid need for binary encoding of region - always_comb - casez (Regions) - 16'b???????????????1: MatchedRegion = 0; - 16'b??????????????10: MatchedRegion = 1; - 16'b?????????????100: MatchedRegion = 2; - 16'b????????????1000: MatchedRegion = 3; - 16'b???????????10000: MatchedRegion = 4; - 16'b??????????100000: MatchedRegion = 5; - 16'b?????????1000000: MatchedRegion = 6; - 16'b????????10000000: MatchedRegion = 7; - 16'b???????100000000: MatchedRegion = 8; - 16'b??????1000000000: MatchedRegion = 9; - 16'b?????10000000000: MatchedRegion = 10; - 16'b????100000000000: MatchedRegion = 11; - 16'b???1000000000000: MatchedRegion = 12; - 16'b??10000000000000: MatchedRegion = 13; - 16'b?100000000000000: MatchedRegion = 14; - 16'b1000000000000000: MatchedRegion = 15; - default: MatchedRegion = 0; // Should only occur if there is no match - endcase - - assign L_Bit = PMPCFG[MatchedRegion][7] && Match; - assign X_Bit = PMPCFG[MatchedRegion][2] && Match; - assign W_Bit = PMPCFG[MatchedRegion][1] && Match; - assign R_Bit = PMPCFG[MatchedRegion][0] && Match; - - assign InvalidExecute = ExecuteAccessF && ~X_Bit; - assign InvalidWrite = WriteAccessM && ~W_Bit; - assign InvalidRead = ReadAccessM && ~R_Bit; - - // *** don't cause faults when there are no PMPs - assign PMPInstrAccessFaultF = (PrivilegeModeW == `M_MODE) ? - Match && L_Bit && InvalidExecute : - EnforcePMP && InvalidExecute; - assign PMPStoreAccessFaultM = (PrivilegeModeW == `M_MODE) ? - Match && L_Bit && InvalidWrite : - EnforcePMP && InvalidWrite; - assign PMPLoadAccessFaultM = (PrivilegeModeW == `M_MODE) ? - Match && L_Bit && InvalidRead : - EnforcePMP && InvalidRead; - - assign PMPSquashBusAccess = PMPInstrAccessFaultF || PMPLoadAccessFaultM || PMPStoreAccessFaultM; + assign PMPSquashBusAccess = PMPInstrAccessFaultF | PMPLoadAccessFaultM | PMPStoreAccessFaultM; endmodule diff --git a/wally-pipelined/src/mmu/tlb.sv b/wally-pipelined/src/mmu/tlb.sv index 127dc5a5..1cf63906 100644 --- a/wally-pipelined/src/mmu/tlb.sv +++ b/wally-pipelined/src/mmu/tlb.sv @@ -65,6 +65,7 @@ module tlb #(parameter ENTRY_BITS = 3, // x1 - TLB is accessed for a write // 11 - TLB is accessed for both read and write input logic [1:0] TLBAccessType, + input logic DisableTranslation, // Virtual address input input logic [`XLEN-1:0] VirtualAddress, @@ -135,7 +136,7 @@ module tlb #(parameter ENTRY_BITS = 3, endgenerate // Whether translation should occur - assign Translate = (SvMode != `NO_TRANSLATE) & (PrivilegeModeW != `M_MODE); + assign Translate = (SvMode != `NO_TRANSLATE) & (PrivilegeModeW != `M_MODE) & ~ DisableTranslation; // Determine how the TLB is currently being used // Note that we use ReadAccess for both loads and instruction fetches diff --git a/wally-pipelined/src/privileged/csr.sv b/wally-pipelined/src/privileged/csr.sv index 213bcde3..dfac5571 100644 --- a/wally-pipelined/src/privileged/csr.sv +++ b/wally-pipelined/src/privileged/csr.sv @@ -60,7 +60,7 @@ module csr #(parameter output logic STATUS_MIE, STATUS_SIE, output logic STATUS_MXR, STATUS_SUM, output logic STATUS_MPRV, - output logic [63:0] PMPCFG01_REGW, PMPCFG23_REGW, + output var logic [63:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES/8-1:0], output var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0], input logic [4:0] SetFflagsM, output logic [2:0] FRM_REGW, diff --git a/wally-pipelined/src/privileged/csrm.sv b/wally-pipelined/src/privileged/csrm.sv index 33b903a8..8c74b951 100644 --- a/wally-pipelined/src/privileged/csrm.sv +++ b/wally-pipelined/src/privileged/csrm.sv @@ -48,25 +48,9 @@ module csrm #(parameter MTVAL = 12'h343, MIP = 12'h344, PMPCFG0 = 12'h3A0, - PMPCFG1 = 12'h3A1, - PMPCFG2 = 12'h3A2, - PMPCFG3 = 12'h3A3, + // .. up to 15 more at consecutive addresses PMPADDR0 = 12'h3B0, - PMPADDR1 = 12'h3B1, - PMPADDR2 = 12'h3B2, - PMPADDR3 = 12'h3B3, - PMPADDR4 = 12'h3B4, - PMPADDR5 = 12'h3B5, - PMPADDR6 = 12'h3B6, - PMPADDR7 = 12'h3B7, - PMPADDR8 = 12'h3B8, - PMPADDR9 = 12'h3B9, - PMPADDR10 = 12'h3BA, - PMPADDR11 = 12'h3BB, - PMPADDR12 = 12'h3BC, - PMPADDR13 = 12'h3BD, - PMPADDR14 = 12'h3BE, - PMPADDR15 = 12'h3BF, + // ... up to 63 more at consecutive addresses TSELECT = 12'h7A0, TDATA1 = 12'h7A1, TDATA2 = 12'h7A2, @@ -90,7 +74,7 @@ module csrm #(parameter output logic [31:0] MCOUNTEREN_REGW, MCOUNTINHIBIT_REGW, output logic [`XLEN-1:0] MEDELEG_REGW, MIDELEG_REGW, // 64-bit registers in RV64, or two 32-bit registers in RV32 - output logic [63:0] PMPCFG01_REGW, PMPCFG23_REGW, + output var logic [63:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES/8-1:0], output var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0], input logic [11:0] MIP_REGW, MIE_REGW, output logic WriteMSTATUSM, @@ -103,8 +87,8 @@ module csrm #(parameter logic WriteMTVECM, WriteMEDELEGM, WriteMIDELEGM; logic WriteMSCRATCHM, WriteMEPCM, WriteMCAUSEM, WriteMTVALM; logic WriteMCOUNTERENM, WriteMCOUNTINHIBITM; - logic WritePMPCFG0M, WritePMPCFG2M; - logic WritePMPADDRM [15:0]; + logic [`PMP_ENTRIES/8-1:0] WritePMPCFGM, WritePMPCFGHM ; + logic [`PMP_ENTRIES-1:0] WritePMPADDRM ; localparam MISA_26 = (`MISA) & 32'h03ffffff; @@ -120,7 +104,7 @@ module csrm #(parameter assign WriteMEPCM = MTrapM | (CSRMWriteM && (CSRAdrM == MEPC)) && ~StallW; assign WriteMCAUSEM = MTrapM | (CSRMWriteM && (CSRAdrM == MCAUSE)) && ~StallW; assign WriteMTVALM = MTrapM | (CSRMWriteM && (CSRAdrM == MTVAL)) && ~StallW; - assign WritePMPCFG0M = (CSRMWriteM && (CSRAdrM == PMPCFG0)) && ~StallW; +/* assign WritePMPCFG0M = (CSRMWriteM && (CSRAdrM == PMPCFG0)) && ~StallW; assign WritePMPCFG2M = (CSRMWriteM && (CSRAdrM == PMPCFG2)) && ~StallW; assign WritePMPADDRM[0] = (CSRMWriteM && (CSRAdrM == PMPADDR0)) && ~StallW; assign WritePMPADDRM[1] = (CSRMWriteM && (CSRAdrM == PMPADDR1)) && ~StallW; @@ -137,10 +121,13 @@ module csrm #(parameter assign WritePMPADDRM[12] = (CSRMWriteM && (CSRAdrM == PMPADDR12)) && ~StallW; assign WritePMPADDRM[13] = (CSRMWriteM && (CSRAdrM == PMPADDR13)) && ~StallW; assign WritePMPADDRM[14] = (CSRMWriteM && (CSRAdrM == PMPADDR14)) && ~StallW; - assign WritePMPADDRM[15] = (CSRMWriteM && (CSRAdrM == PMPADDR15)) && ~StallW; + assign WritePMPADDRM[15] = (CSRMWriteM && (CSRAdrM == PMPADDR15)) && ~StallW; */ assign WriteMCOUNTERENM = CSRMWriteM && (CSRAdrM == MCOUNTEREN) && ~StallW; assign WriteMCOUNTINHIBITM = CSRMWriteM && (CSRAdrM == MCOUNTINHIBIT) && ~StallW; + + + assign IllegalCSRMWriteReadonlyM = CSRMWriteM && (CSRAdrM == MVENDORID || CSRAdrM == MARCHID || CSRAdrM == MIMPID || CSRAdrM == MHARTID); // CSRs @@ -172,33 +159,39 @@ module csrm #(parameter flopenl #(32) MCOUNTINHIBITreg(clk, reset, WriteMCOUNTINHIBITM, CSRWriteValM[31:0], 32'hFFFFFFFF, MCOUNTINHIBIT_REGW); // There are PMP_ENTRIES = 0, 16, or 64 PMPADDR registers, each of which has its own flop + + // *** need to add support for locked PMPCFG and PMPADR + genvar i; generate - genvar i; - for (i = 0; i < `PMP_ENTRIES; i++) begin: pmp_flop + for(i=0; i<`PMP_ENTRIES; i++) begin + assign WritePMPADDRM[i] = (CSRMWriteM && (CSRAdrM == PMPADDR0+i)) && ~StallW; flopenr #(`XLEN) PMPADDRreg(clk, reset, WritePMPADDRM[i], CSRWriteValM, PMPADDR_ARRAY_REGW[i]); end + for (i=0; i<`PMP_ENTRIES/8; i++) begin + if (`XLEN==64) begin + assign WritePMPCFGM[i] = (CSRMWriteM && (CSRAdrM == PMPCFG0+2*i)) && ~StallW; + flopenr #(`XLEN) PMPCFGreg(clk, reset, WritePMPCFGM[i], CSRWriteValM, PMPCFG_ARRAY_REGW[i]); + end else begin + assign WritePMPCFGM[i] = (CSRMWriteM && (CSRAdrM == PMPCFG0+2*i)) && ~StallW; + assign WritePMPCFGHM[i] = (CSRMWriteM && (CSRAdrM == PMPCFG0+2*i+1)) && ~StallW; + flopenr #(`XLEN) PMPCFGreg(clk, reset, WritePMPCFGM[i], CSRWriteValM, PMPCFG_ARRAY_REGW[i][31:0]); + flopenr #(`XLEN) PMPCFGHreg(clk, reset, WritePMPCFGHM[i], CSRWriteValM, PMPCFG_ARRAY_REGW[i][63:32]); + end + end endgenerate - // PMPCFG registers are a pair of 64-bit in RV64 and four 32-bit in RV32 - generate - if (`XLEN==64) begin - flopenr #(`XLEN) PMPCFG01reg(clk, reset, WritePMPCFG0M, CSRWriteValM, PMPCFG01_REGW); - flopenr #(`XLEN) PMPCFG23reg(clk, reset, WritePMPCFG2M, CSRWriteValM, PMPCFG23_REGW); - end else begin - logic WritePMPCFG1M, WritePMPCFG3M; - assign WritePMPCFG1M = MTrapM | (CSRMWriteM && (CSRAdrM == PMPCFG1)); - assign WritePMPCFG3M = MTrapM | (CSRMWriteM && (CSRAdrM == PMPCFG3)); - flopenr #(`XLEN) PMPCFG0reg(clk, reset, WritePMPCFG0M, CSRWriteValM, PMPCFG01_REGW[31:0]); - flopenr #(`XLEN) PMPCFG1reg(clk, reset, WritePMPCFG1M, CSRWriteValM, PMPCFG01_REGW[63:32]); - flopenr #(`XLEN) PMPCFG2reg(clk, reset, WritePMPCFG2M, CSRWriteValM, PMPCFG23_REGW[31:0]); - flopenr #(`XLEN) PMPCFG3reg(clk, reset, WritePMPCFG3M, CSRWriteValM, PMPCFG23_REGW[63:32]); - end - endgenerate // Read machine mode CSRs + // verilator lint_off WIDTH always_comb begin IllegalCSRMAccessM = !(`S_SUPPORTED | `U_SUPPORTED & `N_SUPPORTED) && (CSRAdrM == MEDELEG || CSRAdrM == MIDELEG); // trap on DELEG register access when no S or N-mode - case (CSRAdrM) + if (CSRAdrM >= PMPADDR0 && CSRAdrM < PMPADDR0 + `PMP_ENTRIES) // reading a PMP entry + CSRMReadValM = PMPADDR_ARRAY_REGW[CSRAdrM - PMPADDR0]; + else if (CSRAdrM >= PMPCFG0 && CSRAdrM < PMPCFG0 + `PMP_ENTRIES/4) begin + if (~CSRAdrM[0]) CSRMReadValM = PMPCFG_ARRAY_REGW[(CSRAdrM - PMPCFG0)/2][`XLEN-1:0]; + else CSRMReadValM = {{(`XLEN-32){1'b0}}, PMPCFG_ARRAY_REGW[(CSRAdrM - PMPCFG0-1)/2][63:32]}; + end + else case (CSRAdrM) MISA_ADR: CSRMReadValM = MISA_REGW; MVENDORID: CSRMReadValM = 0; MARCHID: CSRMReadValM = 0; @@ -219,7 +212,7 @@ module csrm #(parameter MTVAL: CSRMReadValM = MTVAL_REGW; MCOUNTEREN:CSRMReadValM = {{(`XLEN-32){1'b0}}, MCOUNTEREN_REGW}; MCOUNTINHIBIT:CSRMReadValM = {{(`XLEN-32){1'b0}}, MCOUNTINHIBIT_REGW}; - PMPCFG0: CSRMReadValM = PMPCFG01_REGW[`XLEN-1:0]; +/* PMPCFG0: CSRMReadValM = PMPCFG01_REGW[`XLEN-1:0]; PMPCFG1: CSRMReadValM = {{(`XLEN-32){1'b0}}, PMPCFG01_REGW[63:32]}; PMPCFG2: CSRMReadValM = PMPCFG23_REGW[`XLEN-1:0]; PMPCFG3: CSRMReadValM = {{(`XLEN-32){1'b0}}, PMPCFG23_REGW[63:32]}; @@ -238,11 +231,12 @@ module csrm #(parameter PMPADDR12: CSRMReadValM = PMPADDR_ARRAY_REGW[12]; PMPADDR13: CSRMReadValM = PMPADDR_ARRAY_REGW[13]; PMPADDR14: CSRMReadValM = PMPADDR_ARRAY_REGW[14]; - PMPADDR15: CSRMReadValM = PMPADDR_ARRAY_REGW[15]; + PMPADDR15: CSRMReadValM = PMPADDR_ARRAY_REGW[15]; */ default: begin CSRMReadValM = 0; IllegalCSRMAccessM = 1; end endcase end + // verilator lint_on WIDTH endmodule diff --git a/wally-pipelined/src/privileged/privileged.sv b/wally-pipelined/src/privileged/privileged.sv index 1275cd4b..5ed8c880 100644 --- a/wally-pipelined/src/privileged/privileged.sv +++ b/wally-pipelined/src/privileged/privileged.sv @@ -68,7 +68,7 @@ module privileged ( output logic [1:0] PrivilegeModeW, output logic [`XLEN-1:0] SATP_REGW, output logic STATUS_MXR, STATUS_SUM, - output logic [63:0] PMPCFG01_REGW, PMPCFG23_REGW, + output var logic [63:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES/8-1:0], output var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0], output logic [2:0] FRM_REGW ); diff --git a/wally-pipelined/src/uncore/dtim.sv b/wally-pipelined/src/uncore/dtim.sv index 6b474dae..40864567 100644 --- a/wally-pipelined/src/uncore/dtim.sv +++ b/wally-pipelined/src/uncore/dtim.sv @@ -37,8 +37,8 @@ module dtim #(parameter BASE=0, RANGE = 65535) ( output logic HRESPTim, HREADYTim ); - localparam integer MemStartAddr = BASE>>(1+`XLEN/32); - localparam integer MemEndAddr = (RANGE+BASE)>>1+(`XLEN/32); + localparam MemStartAddr = BASE>>(1+`XLEN/32); + localparam MemEndAddr = (RANGE+BASE)>>1+(`XLEN/32); logic [`XLEN-1:0] RAM[BASE>>(1+`XLEN/32):(RANGE+BASE)>>1+(`XLEN/32)]; logic [31:0] HWADDR, A; diff --git a/wally-pipelined/src/uncore/uncore.sv b/wally-pipelined/src/uncore/uncore.sv index cb0a8c2a..79f7a0e8 100644 --- a/wally-pipelined/src/uncore/uncore.sv +++ b/wally-pipelined/src/uncore/uncore.sv @@ -74,7 +74,8 @@ module uncore ( // Determine which region of physical memory (if any) is being accessed // Use a trimmed down portion of the PMA checker - only the address decoders - adrdecs adrdecs(HADDR, 1'b1, 1'b1, 1'b1, HSIZE, HSELRegions); + // Set access types to all 1 as don't cares because the MMU has already done access checking + adrdecs adrdecs({{(`PA_BITS-32){1'b0}}, HADDR}, 1'b1, 1'b1, 1'b1, HSIZE[1:0], HSELRegions); // unswizzle HSEL signals assign {HSELBootTim, HSELTim, HSELCLINT, HSELGPIO, HSELUART, HSELPLIC} = HSELRegions; diff --git a/wally-pipelined/src/wally/wallypipelinedhart.sv b/wally-pipelined/src/wally/wallypipelinedhart.sv index fe1f057c..978f747f 100644 --- a/wally-pipelined/src/wally/wallypipelinedhart.sv +++ b/wally-pipelined/src/wally/wallypipelinedhart.sv @@ -26,167 +26,238 @@ `include "wally-config.vh" /* verilator lint_on UNUSED */ -module wallypipelinedhart ( - input logic clk, reset, - output logic [`XLEN-1:0] PCF, -// input logic [31:0] InstrF, - // Privileged - input logic TimerIntM, ExtIntM, SwIntM, - input logic InstrAccessFaultF, - input logic DataAccessFaultM, - input logic [63:0] MTIME_CLINT, MTIMECMP_CLINT, - // Bus Interface - input logic [15:0] rd2, // bogus, delete when real multicycle fetch works - input logic [`AHBW-1:0] HRDATA, - input logic HREADY, HRESP, - output logic HCLK, HRESETn, - output logic [31:0] HADDR, - output logic [`AHBW-1:0] HWDATA, - output logic HWRITE, - output logic [2:0] HSIZE, - output logic [2:0] HBURST, - output logic [3:0] HPROT, - output logic [1:0] HTRANS, - output logic HMASTLOCK, - output logic [5:0] HSELRegions, - // Delayed signals for subword write - output logic [2:0] HADDRD, - output logic [3:0] HSIZED, - output logic HWRITED -); +module wallypipelinedhart + ( + input logic clk, reset, + output logic [`XLEN-1:0] PCF, + // input logic [31:0] InstrF, + // Privileged + input logic TimerIntM, ExtIntM, SwIntM, + input logic InstrAccessFaultF, + input logic DataAccessFaultM, + input logic [63:0] MTIME_CLINT, MTIMECMP_CLINT, + // Bus Interface + input logic [15:0] rd2, // bogus, delete when real multicycle fetch works + input logic [`AHBW-1:0] HRDATA, + input logic HREADY, HRESP, + output logic HCLK, HRESETn, + output logic [31:0] HADDR, + output logic [`AHBW-1:0] HWDATA, + output logic HWRITE, + output logic [2:0] HSIZE, + output logic [2:0] HBURST, + output logic [3:0] HPROT, + output logic [1:0] HTRANS, + output logic HMASTLOCK, + output logic [5:0] HSELRegions, + // Delayed signals for subword write + output logic [2:0] HADDRD, + output logic [3:0] HSIZED, + output logic HWRITED + ); - // logic [1:0] ForwardAE, ForwardBE; - logic StallF, StallD, StallE, StallM, StallW; - logic FlushF, FlushD, FlushE, FlushM, FlushW; - logic RetM, TrapM, NonBusTrapM; + // logic [1:0] ForwardAE, ForwardBE; + logic StallF, StallD, StallE, StallM, StallW; + logic FlushF, FlushD, FlushE, FlushM, FlushW; + logic RetM, TrapM, NonBusTrapM; // new signals that must connect through DP - logic MulDivE, W64E; - logic CSRReadM, CSRWriteM, PrivilegedM; - logic [1:0] AtomicM; - logic [`XLEN-1:0] SrcAE, SrcBE; - logic [`XLEN-1:0] SrcAM; - logic [2:0] Funct3E; + logic MulDivE, W64E; + logic CSRReadM, CSRWriteM, PrivilegedM; + logic [1:0] AtomicM; + logic [`XLEN-1:0] SrcAE, SrcBE; + logic [`XLEN-1:0] SrcAM; + logic [2:0] Funct3E; // logic [31:0] InstrF; - logic [31:0] InstrD, InstrE, InstrM, InstrW; - logic [`XLEN-1:0] PCD, PCE, PCM, PCLinkE, PCLinkW; - logic [`XLEN-1:0] PCTargetE; - logic [`XLEN-1:0] CSRReadValW, MulDivResultW; - logic [`XLEN-1:0] PrivilegedNextPCM; - logic [1:0] MemRWM; - logic InstrValidM, InstrValidW; - logic InstrMisalignedFaultM; - logic DataMisalignedM; - logic IllegalBaseInstrFaultD, IllegalIEUInstrFaultD; - logic ITLBInstrPageFaultF, DTLBLoadPageFaultM, DTLBStorePageFaultM; - logic WalkerInstrPageFaultF, WalkerLoadPageFaultM, WalkerStorePageFaultM; - logic LoadMisalignedFaultM, LoadAccessFaultM; - logic StoreMisalignedFaultM, StoreAccessFaultM; - logic [`XLEN-1:0] InstrMisalignedAdrM; - - logic PCSrcE; - logic CSRWritePendingDEM; - logic LoadStallD, MulDivStallD, CSRRdStallD; - logic DivDoneE; - logic DivBusyE; - logic DivDoneW; - logic RegWriteD; - logic SquashSCM, SquashSCW; + logic [31:0] InstrD, InstrE, InstrM, InstrW; + logic [`XLEN-1:0] PCD, PCE, PCM, PCLinkE, PCLinkW; + logic [`XLEN-1:0] PCTargetE; + logic [`XLEN-1:0] CSRReadValW, MulDivResultW; + logic [`XLEN-1:0] PrivilegedNextPCM; + logic [1:0] MemRWM; + logic InstrValidM, InstrValidW; + logic InstrMisalignedFaultM; + logic DataMisalignedM; + logic IllegalBaseInstrFaultD, IllegalIEUInstrFaultD; + logic ITLBInstrPageFaultF, DTLBLoadPageFaultM, DTLBStorePageFaultM; + logic WalkerInstrPageFaultF, WalkerLoadPageFaultM, WalkerStorePageFaultM; + logic LoadMisalignedFaultM, LoadAccessFaultM; + logic StoreMisalignedFaultM, StoreAccessFaultM; + logic [`XLEN-1:0] InstrMisalignedAdrM; + logic PCSrcE; + logic CSRWritePendingDEM; + logic DivDoneE; + logic DivBusyE; + logic RegWriteD; + logic LoadStallD, MulDivStallD, CSRRdStallD; + logic SquashSCM, SquashSCW; // floating point unit signals - logic [2:0] FRM_REGW; - logic [1:0] FMemRWM, FMemRWE; - logic FStallD; - logic FWriteIntE, FWriteIntM, FWriteIntW; - logic [`XLEN-1:0] FWriteDataE; - logic [`XLEN-1:0] FIntResM; - logic FDivBusyE; - logic IllegalFPUInstrD, IllegalFPUInstrE; - logic FloatRegWriteW; - logic FPUStallD; - logic [4:0] SetFflagsM; - logic [`XLEN-1:0] FPUResultW; + logic [2:0] FRM_REGW; + logic [1:0] FMemRWM, FMemRWE; + logic FStallD; + logic FWriteIntE, FWriteIntM, FWriteIntW; + logic [`XLEN-1:0] FWriteDataE; + logic [`XLEN-1:0] FIntResM; + logic FDivBusyE; + logic IllegalFPUInstrD, IllegalFPUInstrE; + logic FloatRegWriteW; + logic FPUStallD; + logic [4:0] SetFflagsM; + logic [`XLEN-1:0] FPUResultW; // memory management unit signals - logic ITLBWriteF, DTLBWriteM; - logic ITLBFlushF, DTLBFlushM; - logic ITLBMissF, ITLBHitF; - logic DTLBMissM, DTLBHitM; - logic [`XLEN-1:0] SATP_REGW; - logic STATUS_MXR, STATUS_SUM; - logic [1:0] PrivilegeModeW; - logic [`XLEN-1:0] PageTableEntryF, PageTableEntryM; - logic [1:0] PageTypeF, PageTypeM; + logic ITLBWriteF, DTLBWriteM; + logic ITLBFlushF, DTLBFlushM; + logic ITLBMissF, ITLBHitF; + logic DTLBMissM, DTLBHitM; + logic [`XLEN-1:0] SATP_REGW; + logic STATUS_MXR, STATUS_SUM; + logic [1:0] PrivilegeModeW; + logic [`XLEN-1:0] PageTableEntryF, PageTableEntryM; + logic [1:0] PageTypeF, PageTypeM; // PMA checker signals - logic AtomicAccessM, ExecuteAccessF, WriteAccessM, ReadAccessM; - logic PMPInstrAccessFaultF, PMPLoadAccessFaultM, PMPStoreAccessFaultM; - logic PMAInstrAccessFaultF, PMALoadAccessFaultM, PMAStoreAccessFaultM; - logic DSquashBusAccessM, ISquashBusAccessF; - logic [5:0] DHSELRegionsM, IHSELRegionsF; + + logic PMPInstrAccessFaultF, PMPLoadAccessFaultM, PMPStoreAccessFaultM; + logic PMAInstrAccessFaultF, PMALoadAccessFaultM, PMAStoreAccessFaultM; + logic DSquashBusAccessM, ISquashBusAccessF; var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0]; - logic [63:0] PMPCFG01_REGW, PMPCFG23_REGW; // signals being sent from privileged unit to pmp/pma in dmem and ifu. - assign HSELRegions = ExecuteAccessF ? IHSELRegionsF : DHSELRegionsM; // *** this is a pure guess on how one of these should be selected. it passes tests, but is it the right way to do this? + var logic [63:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES/8-1:0]; // IMem stalls - logic ICacheStallF; - logic [`XLEN-1:0] MMUPAdr, MMUReadPTE; - logic MMUStall; - logic MMUTranslate, MMUReady; + logic ICacheStallF; + logic DCacheStall; + logic [`XLEN-1:0] MMUPAdr, MMUReadPTE; + logic MMUStall; + logic MMUTranslate, MMUReady; + logic HPTWRead; + logic HPTWReadyfromLSU; + logic HPTWStall; + // bus interface to dmem - logic MemReadM, MemWriteM; - logic [1:0] AtomicMaskedM; - logic [2:0] Funct3M; - logic [`XLEN-1:0] MemAdrM, WriteDataM; - logic [`PA_BITS-1:0] MemPAdrM; - logic [`XLEN-1:0] ReadDataW; - logic [`PA_BITS-1:0] InstrPAdrF; - logic [`XLEN-1:0] InstrRData; - logic InstrReadF; - logic DataStall; - logic InstrAckF, MemAckW; - logic CommitM, CommittedM; + logic MemReadM, MemWriteM; + logic [1:0] AtomicMaskedM; + logic [2:0] Funct3M; + logic [`XLEN-1:0] MemAdrM, WriteDataM; + logic [`PA_BITS-1:0] MemPAdrM; + logic [`XLEN-1:0] ReadDataW; + logic [`PA_BITS-1:0] InstrPAdrF; + logic [`XLEN-1:0] InstrRData; + logic InstrReadF; + logic DataStall; + logic InstrAckF, MemAckW; + logic CommitM, CommittedM; - logic BPPredWrongE; - logic BPPredDirWrongM; - logic BTBPredPCWrongM; - logic RASPredPCWrongM; - logic BPPredClassNonCFIWrongM; + logic BPPredWrongE; + logic BPPredDirWrongM; + logic BTBPredPCWrongM; + logic RASPredPCWrongM; + logic BPPredClassNonCFIWrongM; - logic[`XLEN-1:0] WriteDatatmpM; + logic [`XLEN-1:0] WriteDatatmpM; + + logic [4:0] InstrClassM; + + logic [`XLEN-1:0] HRDATAW; + + // IEU vs HPTW arbitration signals to send to LSU + logic DisableTranslation; + logic [1:0] MemRWMtoLSU; + logic [2:0] Funct3MtoLSU; + logic [1:0] AtomicMtoLSU; + logic [`XLEN-1:0] MemAdrMtoLSU; + logic [`XLEN-1:0] WriteDataMtoLSU; + logic [`XLEN-1:0] ReadDataWFromLSU; + logic CommittedMfromLSU; + logic SquashSCWfromLSU; + logic DataMisalignedMfromLSU; + logic StallWtoLSU; + logic StallWfromLSU; + logic [2:0] Funct3MfromLSU; - logic [4:0] InstrClassM; - - ifu ifu(.InstrInF(InstrRData), .*); // instruction fetch unit: PC, branch prediction, instruction cache + ifu ifu(.InstrInF(InstrRData), + .WalkerInstrPageFaultF(WalkerInstrPageFaultF), + .*); // instruction fetch unit: PC, branch prediction, instruction cache ieu ieu(.*); // integer execution unit: integer register file, datapath and controller // mux2 #(`XLEN) OutputInput2mux(WriteDataM, FWriteDataM, FMemRWM[0], WriteDatatmpM); - lsu lsu(.*); // data cache unit + + pagetablewalker pagetablewalker(.HPTWRead(HPTWRead), + .*); // can send addresses to ahblite, send out pagetablestall + // arbiter between IEU and pagetablewalker + lsuArb arbiter(// HPTW connection + .HPTWTranslate(MMUTranslate), + .HPTWRead(HPTWRead), + .HPTWPAdr(MMUPAdr), + .HPTWReadPTE(MMUReadPTE), + .HPTWReady(MMUReady), + .HPTWStall(HPTWStall), + // CPU connection + .MemRWM(MemRWM), + .Funct3M(Funct3M), + .AtomicM(AtomicM), + .MemAdrM(MemAdrM), + .StallW(StallW), + .WriteDataM(WriteDataM), + .ReadDataW(ReadDataW), + .CommittedM(CommittedM), + .SquashSCW(SquashSCW), + .DataMisalignedM(DataMisalignedM), + .DCacheStall(DCacheStall), + // LSU + .DisableTranslation(DisableTranslation), + .MemRWMtoLSU(MemRWMtoLSU), + .Funct3MtoLSU(Funct3MtoLSU), + .AtomicMtoLSU(AtomicMtoLSU), + .MemAdrMtoLSU(MemAdrMtoLSU), + .WriteDataMtoLSU(WriteDataMtoLSU), + .StallWtoLSU(StallWtoLSU), + .CommittedMfromLSU(CommittedMfromLSU), + .SquashSCWfromLSU(SquashSCWfromLSU), + .DataMisalignedMfromLSU(DataMisalignedMfromLSU), + .ReadDataWFromLSU(ReadDataWFromLSU), + .HPTWReadyfromLSU(HPTWReadyfromLSU), + .DataStall(DataStall), + .*); + + + lsu lsu(.MemRWM(MemRWMtoLSU), + .Funct3M(Funct3MtoLSU), + .AtomicM(AtomicMtoLSU), + .MemAdrM(MemAdrMtoLSU), + .WriteDataM(WriteDataMtoLSU), + .ReadDataW(ReadDataWFromLSU), + .StallW(StallWtoLSU), + + .CommittedM(CommittedMfromLSU), + .SquashSCW(SquashSCWfromLSU), + .DataMisalignedM(DataMisalignedMfromLSU), + .DisableTranslation(DisableTranslation), + + .DataStall(DataStall), + .HPTWReady(HPTWReadyfromLSU), + .Funct3MfromLSU(Funct3MfromLSU), + .StallWfromLSU(StallWfromLSU), +// .DataStall(LSUStall), + .* ); // data cache unit ahblite ebu( - //.InstrReadF(1'b0), - //.InstrRData(InstrF), // hook up InstrF later - .ISquashBusAccessF(1'b0), // *** temporary hack to disable PMP instruction fetch checking - .WriteDataM(WriteDataM), - .MemSizeM(Funct3M[1:0]), .UnsignedLoadM(Funct3M[2]), - .Funct7M(InstrM[31:25]), - .*); + //.InstrReadF(1'b0), + //.InstrRData(InstrF), // hook up InstrF later + .ISquashBusAccessF(1'b0), // *** temporary hack to disable PMP instruction fetch checking + .WriteDataM(WriteDataM), + .MemSizeM(Funct3MfromLSU[1:0]), .UnsignedLoadM(Funct3MfromLSU[2]), + .Funct7M(InstrM[31:25]), + .HRDATAW(HRDATAW), + .StallW(StallWfromLSU), + .*); - pagetablewalker pagetablewalker(.*); // can send addresses to ahblite, send out pagetablestall - // *** can connect to hazard unit - // changing from this to the line above breaks the program. auipc at 104 fails; seems to be flushed. - // Would need to insertinstruction as InstrD, not InstrF - /*ahblite ebu( - .InstrReadF(1'b0), - .InstrRData(), // hook up InstrF later - .MemSizeM(Funct3M[1:0]), .UnsignedLoadM(Funct3M[2]), - .*); */ - - + muldiv mdu(.*); // multiply and divide unit hazard hzu(.*); // global stall and flush control @@ -200,5 +271,5 @@ module wallypipelinedhart ( // presently stub out SetFlagsM and FloatRegWriteW //assign SetFflagsM = 0; //assign FloatRegWriteW = 0; - + endmodule diff --git a/wally-pipelined/testbench/testbench-linux.sv b/wally-pipelined/testbench/testbench-linux.sv index 6676d1a7..8f8a5d44 100644 --- a/wally-pipelined/testbench/testbench-linux.sv +++ b/wally-pipelined/testbench/testbench-linux.sv @@ -27,8 +27,8 @@ module testbench(); - parameter waveOnICount = 2657000; // # of instructions at which to turn on waves in graphical sim - + parameter waveOnICount = `BUSYBEAR*140000 + `BUILDROOT*2400000; // # of instructions at which to turn on waves in graphical sim + parameter stopICount = `BUSYBEAR*143898 + `BUILDROOT*0000000; // # instructions at which to halt sim completely (set to 0 to let it run as far as it can) /////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////// DUT ///////////////////////////////////// @@ -248,6 +248,9 @@ module testbench(); if (instrs == waveOnICount) begin $display("turning on waves at %0d instructions", instrs); $stop; + end else if (instrs == stopICount && stopICount != 0) begin + $display("Ending sim at %0d instructions (set stopICount to 0 to let the sim go on)", instrs); + $stop; end // Check if PCD is going to be flushed due to a branch or jump