This commit is contained in:
Kip Macsai-Goren 2021-07-03 16:32:27 -04:00
commit 9566daccaa
61 changed files with 2130 additions and 3692 deletions

3
.gitmodules vendored
View File

@ -1,3 +0,0 @@
[submodule "sky130/sky130_osu_sc_t12"]
path = sky130/sky130_osu_sc_t12
url = https://foss-eda-tools.googlesource.com/skywater-pdk/libs/sky130_osu_sc_t12/

@ -1 +0,0 @@
Subproject commit f60f2d0395053c4df362a97d7e2099721b6face6

View File

@ -62,25 +62,25 @@
// Peripheral memory space extends from BASE to BASE+RANGE
// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits
`define BOOTTIM_SUPPORTED 1'b1
`define BOOTTIM_BASE 32'h00000000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder
`define BOOTTIM_RANGE 32'h00003FFF
//`define BOOTTIM_BASE 32'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder
//`define BOOTTIM_RANGE 32'h00000FFF
`define BOOTTIM_BASE 56'h00000000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder
`define BOOTTIM_RANGE 56'h00003FFF
//`define BOOTTIM_BASE 56'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder
//`define BOOTTIM_RANGE 56'h00000FFF
`define TIM_SUPPORTED 1'b1
`define TIM_BASE 32'h80000000
`define TIM_RANGE 32'h07FFFFFF
`define TIM_BASE 56'h80000000
`define TIM_RANGE 56'h07FFFFFF
`define CLINT_SUPPORTED 1'b1
`define CLINT_BASE 32'h02000000
`define CLINT_RANGE 32'h0000FFFF
`define CLINT_BASE 56'h02000000
`define CLINT_RANGE 56'h0000FFFF
`define GPIO_SUPPORTED 1'b1
`define GPIO_BASE 32'h10012000
`define GPIO_RANGE 32'h000000FF
`define GPIO_BASE 56'h10012000
`define GPIO_RANGE 56'h000000FF
`define UART_SUPPORTED 1'b1
`define UART_BASE 32'h10000000
`define UART_RANGE 32'h00000007
`define UART_BASE 56'h10000000
`define UART_RANGE 56'h00000007
`define PLIC_SUPPORTED 1'b1
`define PLIC_BASE 32'h0C000000
`define PLIC_RANGE 32'h03FFFFFF
`define PLIC_BASE 56'h0C000000
`define PLIC_RANGE 56'h03FFFFFF
// Bus Interface width
`define AHBW 64

View File

@ -31,6 +31,7 @@
`define BUSYBEAR 1
`define LINUX_FIX_READ {'h10000005}
`define LINUX_TEST_VECTORS "/courses/e190ax/busybear_boot/"
//`define LINUX_TEST_VECTORS "../../../busybear_boot/"
// RV32 or RV64: XLEN = 32 or 64
`define XLEN 64
@ -63,25 +64,25 @@
// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits
`define BOOTTIM_SUPPORTED 1'b1
`define BOOTTIM_BASE 32'h00000000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder
`define BOOTTIM_RANGE 32'h00003FFF
//`define BOOTTIM_BASE 32'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder
//`define BOOTTIM_RANGE 32'h00000FFF
`define BOOTTIM_BASE 56'h00000000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder
`define BOOTTIM_RANGE 56'h00003FFF
//`define BOOTTIM_BASE 56'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder
//`define BOOTTIM_RANGE 56'h00000FFF
`define TIM_SUPPORTED 1'b1
`define TIM_BASE 32'h80000000
`define TIM_RANGE 32'h07FFFFFF
`define TIM_BASE 56'h80000000
`define TIM_RANGE 56'h07FFFFFF
`define CLINT_SUPPORTED 1'b1
`define CLINT_BASE 32'h02000000
`define CLINT_RANGE 32'h0000FFFF
`define CLINT_BASE 56'h02000000
`define CLINT_RANGE 56'h0000FFFF
`define GPIO_SUPPORTED 1'b1
`define GPIO_BASE 32'h10012000
`define GPIO_RANGE 32'h000000FF
`define GPIO_BASE 56'h10012000
`define GPIO_RANGE 56'h000000FF
`define UART_SUPPORTED 1'b1
`define UART_BASE 32'h10000000
`define UART_RANGE 32'h00000007
`define UART_BASE 56'h10000000
`define UART_RANGE 56'h00000007
`define PLIC_SUPPORTED 1'b1
`define PLIC_BASE 32'h0C000000
`define PLIC_RANGE 32'h03FFFFFF
`define PLIC_BASE 56'h0C000000
`define PLIC_RANGE 56'h03FFFFFF
// Bus Interface width
`define AHBW 64

View File

@ -61,26 +61,27 @@
// Peripheral memory space extends from BASE to BASE+RANGE
// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits
// *** each of these is `PA_BITS wide. is this paramaterizable INSIDE the config file?
`define BOOTTIM_SUPPORTED 1'b1
`define BOOTTIM_BASE 32'h00000000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder
`define BOOTTIM_RANGE 32'h00003FFF
//`define BOOTTIM_BASE 32'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder
//`define BOOTTIM_RANGE 32'h00000FFF
`define BOOTTIM_BASE 34'h00000000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder
`define BOOTTIM_RANGE 34'h00003FFF
//`define BOOTTIM_BASE 34'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder
//`define BOOTTIM_RANGE 34'h00000FFF
`define TIM_SUPPORTED 1'b1
`define TIM_BASE 32'h80000000
`define TIM_RANGE 32'h07FFFFFF
`define TIM_BASE 34'h80000000
`define TIM_RANGE 34'h07FFFFFF
`define CLINT_SUPPORTED 1'b1
`define CLINT_BASE 32'h02000000
`define CLINT_RANGE 32'h0000FFFF
`define CLINT_BASE 34'h02000000
`define CLINT_RANGE 34'h0000FFFF
`define GPIO_SUPPORTED 1'b1
`define GPIO_BASE 32'h10012000
`define GPIO_RANGE 32'h000000FF
`define GPIO_BASE 34'h10012000
`define GPIO_RANGE 34'h000000FF
`define UART_SUPPORTED 1'b1
`define UART_BASE 32'h10000000
`define UART_RANGE 32'h00000007
`define UART_BASE 34'h10000000
`define UART_RANGE 34'h00000007
`define PLIC_SUPPORTED 1'b1
`define PLIC_BASE 32'h0C000000
`define PLIC_RANGE 32'h03FFFFFF
`define PLIC_BASE 34'h0C000000
`define PLIC_RANGE 34'h03FFFFFF
// Bus Interface width
`define AHBW 32

View File

@ -53,7 +53,7 @@
`define DTLB_ENTRY_BITS 5
// Legal number of PMP entries are 0, 16, or 64
`define PMP_ENTRIES 16
`define PMP_ENTRIES 64
// Address space
`define RESET_VECTOR 64'h0000000080000000
@ -65,26 +65,27 @@
// Peripheral memory space extends from BASE to BASE+RANGE
// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits
// *** each of these is `PA_BITS wide. is this paramaterizable INSIDE the config file?
`define BOOTTIM_SUPPORTED 1'b1
`define BOOTTIM_BASE 32'h00000000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder
`define BOOTTIM_RANGE 32'h00003FFF
//`define BOOTTIM_BASE 32'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder
//`define BOOTTIM_RANGE 32'h00000FFF
`define BOOTTIM_RANGE 56'h00003FFF
`define BOOTTIM_BASE 56'h00000000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder
//`define BOOTTIM_BASE 56'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder
//`define BOOTTIM_RANGE 56'h00000FFF
`define TIM_SUPPORTED 1'b1
`define TIM_BASE 32'h80000000
`define TIM_RANGE 32'h07FFFFFF
`define TIM_BASE 56'h80000000
`define TIM_RANGE 56'h07FFFFFF
`define CLINT_SUPPORTED 1'b1
`define CLINT_BASE 32'h02000000
`define CLINT_RANGE 32'h0000FFFF
`define CLINT_BASE 56'h02000000
`define CLINT_RANGE 56'h0000FFFF
`define GPIO_SUPPORTED 1'b1
`define GPIO_BASE 32'h10012000
`define GPIO_RANGE 32'h000000FF
`define GPIO_BASE 56'h10012000
`define GPIO_RANGE 56'h000000FF
`define UART_SUPPORTED 1'b1
`define UART_BASE 32'h10000000
`define UART_RANGE 32'h00000007
`define UART_BASE 56'h10000000
`define UART_RANGE 56'h00000007
`define PLIC_SUPPORTED 1'b1
`define PLIC_BASE 32'h0C000000
`define PLIC_RANGE 32'h03FFFFFF
`define PLIC_BASE 56'h0C000000
`define PLIC_RANGE 56'h03FFFFFF
// Test modes

View File

@ -35,5 +35,6 @@ vopt work_busybear.testbench -o workopt_busybear
vsim workopt_busybear -suppress 8852,12070
run -all
run -all
quit

View File

@ -35,9 +35,10 @@ vopt +acc work.testbench -o workopt
vsim workopt -suppress 8852,12070
do ./wave-dos/linux-waves.do
#-- Run the Simulation
run -all
do ./wave-dos/linux-waves.do
run -all
##quit

View File

@ -35,8 +35,8 @@ switch $argc {
}
# start and run simulation
# remove +acc flag for faster sim during regressions if there is no need to access internal signals
vopt +acc -gDEBUG=1 work.testbench -o workopt
vsim workopt
vopt -fsmdebug +acc -gDEBUG=1 work.testbench -o workopt
vsim workopt -fsmdebug
do wave.do

View File

@ -122,11 +122,11 @@ add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csrn/UEPC_REGW
add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csrn/UTVEC_REGW
add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csrn/UIP_REGW
add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csrn/UIE_REGW
add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csrm/PMPCFG01_REGW
add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csrm/PMPCFG23_REGW
add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csrm/PMPADDR_ARRAY_REGW
add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csrm/MISA_REGW
add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csru/FRM_REGW
#add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csrm/PMPCFG01_REGW
#add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csrm/PMPCFG23_REGW
#add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csrm/PMPADDR_ARRAY_REGW
#add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csrm/MISA_REGW
#add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csru/FRM_REGW
add wave -divider
add wave -hex -r /testbench/*

View File

@ -7,32 +7,32 @@ add wave -noupdate -expand -group {Execution Stage} /testbench/FunctionName/Func
add wave -noupdate -expand -group {Execution Stage} /testbench/dut/hart/ifu/PCE
add wave -noupdate -expand -group {Execution Stage} /testbench/InstrEName
add wave -noupdate -expand -group {Execution Stage} /testbench/dut/hart/ifu/InstrE
add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/InstrMisalignedFaultM
add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/InstrAccessFaultM
add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/IllegalInstrFaultM
add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/BreakpointFaultM
add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/LoadMisalignedFaultM
add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/StoreMisalignedFaultM
add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/LoadAccessFaultM
add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/StoreAccessFaultM
add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/EcallFaultM
add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/InstrPageFaultM
add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/LoadPageFaultM
add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/StorePageFaultM
add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/InterruptM
add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/BPPredWrongE
add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/CSRWritePendingDEM
add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/RetM
add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/TrapM
add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/LoadStallD
add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/ICacheStallF
add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/DataStall
add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/MulDivStallD
add wave -noupdate -expand -group HDU -expand -group Flush -color Yellow /testbench/dut/hart/hzu/FlushF
add wave -noupdate -expand -group HDU -expand -group Flush -color Yellow /testbench/dut/hart/FlushD
add wave -noupdate -expand -group HDU -expand -group Flush -color Yellow /testbench/dut/hart/FlushE
add wave -noupdate -expand -group HDU -expand -group Flush -color Yellow /testbench/dut/hart/FlushM
add wave -noupdate -expand -group HDU -expand -group Flush -color Yellow /testbench/dut/hart/FlushW
add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/InstrMisalignedFaultM
add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/InstrAccessFaultM
add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/IllegalInstrFaultM
add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/BreakpointFaultM
add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/LoadMisalignedFaultM
add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/StoreMisalignedFaultM
add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/LoadAccessFaultM
add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/StoreAccessFaultM
add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/EcallFaultM
add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/InstrPageFaultM
add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/LoadPageFaultM
add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/StorePageFaultM
add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/InterruptM
add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/BPPredWrongE
add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/CSRWritePendingDEM
add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/RetM
add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/TrapM
add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/LoadStallD
add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/ICacheStallF
add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/DCacheStall
add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/MulDivStallD
add wave -noupdate -expand -group HDU -group Flush -color Yellow /testbench/dut/hart/hzu/FlushF
add wave -noupdate -expand -group HDU -group Flush -color Yellow /testbench/dut/hart/FlushD
add wave -noupdate -expand -group HDU -group Flush -color Yellow /testbench/dut/hart/FlushE
add wave -noupdate -expand -group HDU -group Flush -color Yellow /testbench/dut/hart/FlushM
add wave -noupdate -expand -group HDU -group Flush -color Yellow /testbench/dut/hart/FlushW
add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallF
add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallD
add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallE
@ -89,6 +89,7 @@ add wave -noupdate -expand -group {instruction pipeline} /testbench/InstrFName
add wave -noupdate -expand -group {instruction pipeline} /testbench/dut/hart/ifu/InstrD
add wave -noupdate -expand -group {instruction pipeline} /testbench/dut/hart/ifu/InstrE
add wave -noupdate -expand -group {instruction pipeline} /testbench/dut/hart/ifu/InstrM
add wave -noupdate -expand -group {instruction pipeline} /testbench/InstrW
add wave -noupdate -group {PCNext Generation} /testbench/dut/hart/ifu/PCNextF
add wave -noupdate -group {PCNext Generation} /testbench/dut/hart/ifu/PCF
add wave -noupdate -group {PCNext Generation} /testbench/dut/hart/ifu/PCPlus2or4F
@ -104,7 +105,7 @@ add wave -noupdate -group {Decode Stage} /testbench/dut/hart/ieu/c/RegWriteD
add wave -noupdate -group {Decode Stage} /testbench/dut/hart/ieu/dp/RdD
add wave -noupdate -group {Decode Stage} /testbench/dut/hart/ieu/dp/Rs1D
add wave -noupdate -group {Decode Stage} /testbench/dut/hart/ieu/dp/Rs2D
add wave -noupdate -group RegFile /testbench/dut/hart/ieu/dp/regf/rf
add wave -noupdate -group RegFile -expand /testbench/dut/hart/ieu/dp/regf/rf
add wave -noupdate -group RegFile /testbench/dut/hart/ieu/dp/regf/a1
add wave -noupdate -group RegFile /testbench/dut/hart/ieu/dp/regf/a2
add wave -noupdate -group RegFile /testbench/dut/hart/ieu/dp/regf/a3
@ -117,31 +118,18 @@ add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/hart
add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/hart/ieu/dp/CSRReadValW
add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/hart/ieu/dp/ResultSrcW
add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/hart/ieu/dp/ResultW
add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/a
add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/b
add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/alucontrol
add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/result
add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/flags
add wave -noupdate -group alu -divider internals
add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/overflow
add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/carry
add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/zero
add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/neg
add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/lt
add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/ltu
add wave -noupdate -group {dcache memory} /testbench/dut/hart/dmem/MemReadM
add wave -noupdate -group {dcache memory} /testbench/dut/hart/dmem/MemWriteM
add wave -noupdate -group {dcache memory} /testbench/dut/hart/dmem/MemAckW
add wave -noupdate -group dcache -expand -group {cpu request} /testbench/dut/hart/dmem/MemRWM
add wave -noupdate -group dcache -expand -group {cpu request} /testbench/dut/hart/dmem/AtomicM
add wave -noupdate -group dcache -expand -group {cpu request} /testbench/dut/hart/MemAdrM
add wave -noupdate -group dcache -expand -group {cpu request} /testbench/dut/hart/dmem/ReadDataW
add wave -noupdate -group dcache -expand -group {cpu request} /testbench/dut/hart/WriteDataM
add wave -noupdate -group dcache -color Gray90 /testbench/dut/hart/dmem/CurrState
add wave -noupdate -group dcache /testbench/dut/hart/MemPAdrM
add wave -noupdate -group dcache /testbench/dut/hart/dmem/MemAccessM
add wave -noupdate -group dcache /testbench/dut/hart/dmem/AtomicMaskedM
add wave -noupdate -group dcache /testbench/dut/hart/dmem/MemAckW
add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/a
add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/b
add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/alucontrol
add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/result
add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/flags
add wave -noupdate -expand -group alu -divider internals
add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/overflow
add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/carry
add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/zero
add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/neg
add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/lt
add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/ltu
add wave -noupdate -group Forward /testbench/dut/hart/ieu/fw/Rs1D
add wave -noupdate -group Forward /testbench/dut/hart/ieu/fw/Rs2D
add wave -noupdate -group Forward /testbench/dut/hart/ieu/fw/Rs1E
@ -184,48 +172,60 @@ add wave -noupdate -group divider /testbench/dut/hart/mdu/genblk1/div/N
add wave -noupdate -group divider /testbench/dut/hart/mdu/genblk1/div/D
add wave -noupdate -group divider /testbench/dut/hart/mdu/genblk1/div/Q
add wave -noupdate -group divider /testbench/dut/hart/mdu/genblk1/div/rem0
add wave -noupdate -expand -group icache -color Orange /testbench/dut/hart/ifu/icache/controller/CurrState
add wave -noupdate -expand -group icache /testbench/dut/hart/ifu/icache/controller/NextState
add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/hit
add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/spill
add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/ICacheStallF
add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/SavePC
add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/spillSave
add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/UnalignedSelect
add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/PCMux
add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/spillSave
add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/CntReset
add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/PreCntEn
add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/CntEn
add wave -noupdate -expand -group icache -expand -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/NUMLINES
add wave -noupdate -expand -group icache -expand -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/BLOCKLEN
add wave -noupdate -expand -group icache -expand -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/BLOCKBYTELEN
add wave -noupdate -expand -group icache -expand -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/OFFSETLEN
add wave -noupdate -expand -group icache -expand -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/INDEXLEN
add wave -noupdate -expand -group icache -expand -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/TAGLEN
add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/FetchCountFlag
add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/FetchCount
add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrPAdrF
add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrReadF
add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrAckF
add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrInF
add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/ICacheMemWriteEnable
add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/ICacheMemWriteData
add wave -noupdate -expand -group icache -expand -group memory -group {tag read} /testbench/dut/hart/ifu/icache/cachemem/DataValidBit
add wave -noupdate -expand -group icache -expand -group memory -group {tag read} /testbench/dut/hart/ifu/icache/cachemem/cachetags/ReadData
add wave -noupdate -expand -group icache -expand -group memory -group {tag write} /testbench/dut/hart/ifu/icache/cachemem/WriteEnable
add wave -noupdate -expand -group icache -expand -group memory -group {tag write} /testbench/dut/hart/ifu/icache/cachemem/WriteLine
add wave -noupdate -expand -group icache -expand -group memory -group {tag write} /testbench/dut/hart/ifu/icache/cachemem/cachetags/StoredData
add wave -noupdate -expand -group icache -expand -group {instr to cpu} /testbench/dut/hart/ifu/icache/controller/FinalInstrRawF
add wave -noupdate -expand -group icache -expand -group {instr to cpu} /testbench/dut/hart/ifu/icache/controller/AlignedInstrRawD
add wave -noupdate -expand -group icache -expand -group {instr to cpu} /testbench/dut/hart/ifu/icache/controller/FlushDLastCyclen
add wave -noupdate -expand -group icache -expand -group {instr to cpu} /testbench/dut/hart/ifu/icache/controller/InstrRawD
add wave -noupdate -expand -group icache -expand -group pc /testbench/dut/hart/ifu/icache/controller/PCPF
add wave -noupdate -expand -group icache -expand -group pc /testbench/dut/hart/ifu/icache/controller/PCPreFinalF
add wave -noupdate -expand -group icache -expand -group pc /testbench/dut/hart/ifu/icache/controller/PCPFinalF
add wave -noupdate -group AHB /testbench/dut/hart/ebu/BusState
add wave -noupdate -group icache -color Orange /testbench/dut/hart/ifu/icache/controller/CurrState
add wave -noupdate -group icache /testbench/dut/hart/ifu/icache/controller/NextState
add wave -noupdate -group icache /testbench/dut/hart/ifu/ITLBMissF
add wave -noupdate -group icache /testbench/dut/hart/ifu/icache/ITLBWriteF
add wave -noupdate -group icache -group {tag read} /testbench/dut/hart/ifu/icache/cachemem/DataValidBit
add wave -noupdate -group icache -group {tag read} /testbench/dut/hart/ifu/icache/cachemem/cachetags/ReadData
add wave -noupdate -group icache -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/hit
add wave -noupdate -group icache -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/spill
add wave -noupdate -group icache -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/ICacheStallF
add wave -noupdate -group icache -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/SavePC
add wave -noupdate -group icache -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/spillSave
add wave -noupdate -group icache -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/UnalignedSelect
add wave -noupdate -group icache -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/PCMux
add wave -noupdate -group icache -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/spillSave
add wave -noupdate -group icache -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/CntReset
add wave -noupdate -group icache -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/PreCntEn
add wave -noupdate -group icache -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/CntEn
add wave -noupdate -group icache -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/NUMLINES
add wave -noupdate -group icache -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/BLOCKLEN
add wave -noupdate -group icache -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/BLOCKBYTELEN
add wave -noupdate -group icache -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/OFFSETLEN
add wave -noupdate -group icache -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/INDEXLEN
add wave -noupdate -group icache -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/TAGLEN
add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/FetchCountFlag
add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/FetchCount
add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrPAdrF
add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrReadF
add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrAckF
add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrInF
add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/ICacheMemWriteEnable
add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/ICacheMemWriteData
add wave -noupdate -group icache -expand -group memory -group {tag write} /testbench/dut/hart/ifu/icache/cachemem/WriteEnable
add wave -noupdate -group icache -expand -group memory -group {tag write} /testbench/dut/hart/ifu/icache/cachemem/WriteLine
add wave -noupdate -group icache -expand -group memory -group {tag write} /testbench/dut/hart/ifu/icache/cachemem/cachetags/StoredData
add wave -noupdate -group icache -expand -group {instr to cpu} /testbench/dut/hart/ifu/icache/controller/FinalInstrRawF
add wave -noupdate -group icache -expand -group pc /testbench/dut/hart/ifu/icache/controller/PCPF
add wave -noupdate -group icache -expand -group pc /testbench/dut/hart/ifu/icache/controller/PCPreFinalF
add wave -noupdate -group AHB -expand -group read /testbench/dut/hart/ebu/HRDATA
add wave -noupdate -group AHB -expand -group read /testbench/dut/hart/ebu/HRDATAMasked
add wave -noupdate -group AHB -expand -group read /testbench/dut/hart/ebu/HRDATANext
add wave -noupdate -group AHB -color Gold /testbench/dut/hart/ebu/BusState
add wave -noupdate -group AHB /testbench/dut/hart/ebu/ProposedNextBusState
add wave -noupdate -group AHB /testbench/dut/hart/ebu/NextBusState
add wave -noupdate -group AHB /testbench/dut/hart/ebu/DSquashBusAccessM
add wave -noupdate -group AHB /testbench/dut/hart/ebu/ISquashBusAccessF
add wave -noupdate -group AHB -expand -group {input requests} /testbench/dut/hart/ebu/AtomicMaskedM
add wave -noupdate -group AHB -expand -group {input requests} /testbench/dut/hart/ebu/MemReadM
add wave -noupdate -group AHB -expand -group {input requests} /testbench/dut/hart/ebu/MemWriteM
add wave -noupdate -group AHB -expand -group {input requests} /testbench/dut/hart/ebu/InstrReadF
add wave -noupdate -group AHB -expand -group {input requests} /testbench/dut/hart/ebu/MemSizeM
add wave -noupdate -group AHB /testbench/dut/hart/ebu/HCLK
add wave -noupdate -group AHB /testbench/dut/hart/ebu/HRESETn
add wave -noupdate -group AHB /testbench/dut/hart/ebu/HRDATA
add wave -noupdate -group AHB /testbench/dut/hart/ebu/HRDATANext
add wave -noupdate -group AHB /testbench/dut/hart/ebu/HREADY
add wave -noupdate -group AHB /testbench/dut/hart/ebu/HRESP
add wave -noupdate -group AHB /testbench/dut/hart/ebu/HADDR
@ -239,12 +239,136 @@ add wave -noupdate -group AHB /testbench/dut/hart/ebu/HMASTLOCK
add wave -noupdate -group AHB /testbench/dut/hart/ebu/HADDRD
add wave -noupdate -group AHB /testbench/dut/hart/ebu/HSIZED
add wave -noupdate -group AHB /testbench/dut/hart/ebu/HWRITED
add wave -noupdate -group csr -color Aquamarine -label {br executed} -radix unsigned {/testbench/dut/hart/priv/csr/genblk1/counters/genblk2/HPMCOUNTER_REGW[5]}
add wave -noupdate -group csr -color Aquamarine -label {br miss predicted} -radix unsigned {/testbench/dut/hart/priv/csr/genblk1/counters/genblk2/HPMCOUNTER_REGW[4]}
add wave -noupdate -group csr -childformat {{{/testbench/dut/hart/priv/csr/genblk1/counters/genblk2/HPMCOUNTER_REGW[5]} -radix unsigned} {{/testbench/dut/hart/priv/csr/genblk1/counters/genblk2/HPMCOUNTER_REGW[4]} -radix unsigned}} -subitemconfig {{/testbench/dut/hart/priv/csr/genblk1/counters/genblk2/HPMCOUNTER_REGW[5]} {-height 16 -radix unsigned} {/testbench/dut/hart/priv/csr/genblk1/counters/genblk2/HPMCOUNTER_REGW[4]} {-height 16 -radix unsigned}} /testbench/dut/hart/priv/csr/genblk1/counters/genblk2/HPMCOUNTER_REGW
add wave -noupdate -group AHB /testbench/dut/hart/ebu/StallW
add wave -noupdate -expand -group lsu -color Gold /testbench/dut/hart/lsu/CurrState
add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/DisableTranslation
add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/MemRWM
add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/DataStall
add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/MemAdrM
add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/MemPAdrM
add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/ReadDataW
add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/WriteDataM
add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/AtomicMaskedM
add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/DSquashBusAccessM
add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/HRDATAW
add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/MemAckW
add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/StallW
add wave -noupdate -group plic /testbench/dut/uncore/genblk2/plic/HCLK
add wave -noupdate -group plic /testbench/dut/uncore/genblk2/plic/HSELPLIC
add wave -noupdate -group plic /testbench/dut/uncore/genblk2/plic/HADDR
add wave -noupdate -group plic /testbench/dut/uncore/genblk2/plic/HWRITE
add wave -noupdate -group plic /testbench/dut/uncore/genblk2/plic/HREADY
add wave -noupdate -group plic /testbench/dut/uncore/genblk2/plic/HTRANS
add wave -noupdate -group plic /testbench/dut/uncore/genblk2/plic/HWDATA
add wave -noupdate -group plic /testbench/dut/uncore/genblk2/plic/UARTIntr
add wave -noupdate -group plic /testbench/dut/uncore/genblk2/plic/GPIOIntr
add wave -noupdate -group plic /testbench/dut/uncore/genblk2/plic/HREADPLIC
add wave -noupdate -group plic /testbench/dut/uncore/genblk2/plic/HRESPPLIC
add wave -noupdate -group plic /testbench/dut/uncore/genblk2/plic/HREADYPLIC
add wave -noupdate -group plic /testbench/dut/uncore/genblk2/plic/ExtIntM
add wave -noupdate -group GPIO /testbench/dut/uncore/genblk3/gpio/HCLK
add wave -noupdate -group GPIO /testbench/dut/uncore/genblk3/gpio/HSELGPIO
add wave -noupdate -group GPIO /testbench/dut/uncore/genblk3/gpio/HADDR
add wave -noupdate -group GPIO /testbench/dut/uncore/genblk3/gpio/HWDATA
add wave -noupdate -group GPIO /testbench/dut/uncore/genblk3/gpio/HWRITE
add wave -noupdate -group GPIO /testbench/dut/uncore/genblk3/gpio/HREADY
add wave -noupdate -group GPIO /testbench/dut/uncore/genblk3/gpio/HTRANS
add wave -noupdate -group GPIO /testbench/dut/uncore/genblk3/gpio/HREADGPIO
add wave -noupdate -group GPIO /testbench/dut/uncore/genblk3/gpio/HRESPGPIO
add wave -noupdate -group GPIO /testbench/dut/uncore/genblk3/gpio/HREADYGPIO
add wave -noupdate -group GPIO /testbench/dut/uncore/genblk3/gpio/GPIOPinsIn
add wave -noupdate -group GPIO /testbench/dut/uncore/genblk3/gpio/GPIOPinsOut
add wave -noupdate -group GPIO /testbench/dut/uncore/genblk3/gpio/GPIOPinsEn
add wave -noupdate -group GPIO /testbench/dut/uncore/genblk3/gpio/GPIOIntr
add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/HCLK
add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/HSELCLINT
add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/HADDR
add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/HWRITE
add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/HWDATA
add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/HREADY
add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/HTRANS
add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/HREADCLINT
add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/HRESPCLINT
add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/HREADYCLINT
add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/MTIME
add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/MTIMECMP
add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/TimerIntM
add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/SwIntM
add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/MMUTranslate
add wave -noupdate -expand -group ptwalker -color Gold /testbench/dut/hart/pagetablewalker/WalkerState
add wave -noupdate -expand -group ptwalker -color Salmon /testbench/dut/hart/pagetablewalker/HPTWStall
add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/HPTWRead
add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/MMUPAdr
add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/MMUStall
add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/EndWalk
add wave -noupdate -expand -group ptwalker -expand -group pte /testbench/dut/hart/pagetablewalker/MMUReadPTE
add wave -noupdate -expand -group ptwalker -expand -group pte /testbench/dut/hart/pagetablewalker/PRegEn
add wave -noupdate -expand -group ptwalker -expand -group pte /testbench/dut/hart/pagetablewalker/CurrentPTE
add wave -noupdate -expand -group ptwalker -divider data
add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/TranslationPAdr
add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/ValidPTE
add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/LeafPTE
add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/MMUStall
add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/pagetablewalker/TranslationPAdr
add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/pagetablewalker/PageTableEntry
add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/pagetablewalker/PageType
add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/pagetablewalker/ITLBWriteF
add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/pagetablewalker/DTLBWriteM
add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/pagetablewalker/WalkerInstrPageFaultF
add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/pagetablewalker/WalkerLoadPageFaultM
add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/pagetablewalker/WalkerStorePageFaultM
add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/pagetablewalker/MMUStall
add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/pagetablewalker/EndWalk
add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/MMUPAdr
add wave -noupdate -expand -group {LSU ARB} -color Gold /testbench/dut/hart/arbiter/CurrState
add wave -noupdate -expand -group {LSU ARB} -color {Medium Orchid} /testbench/dut/hart/arbiter/SelPTW
add wave -noupdate -expand -group {LSU ARB} /testbench/dut/hart/pagetablewalker/MMUStall
add wave -noupdate -expand -group {LSU ARB} -expand -group hptw /testbench/dut/hart/arbiter/HPTWTranslate
add wave -noupdate -expand -group {LSU ARB} -expand -group hptw /testbench/dut/hart/arbiter/HPTWRead
add wave -noupdate -expand -group {LSU ARB} -expand -group hptw /testbench/dut/hart/arbiter/HPTWPAdr
add wave -noupdate -expand -group {LSU ARB} -expand -group hptw /testbench/dut/hart/arbiter/HPTWReadPTE
add wave -noupdate -expand -group {LSU ARB} -expand -group hptw /testbench/dut/hart/arbiter/HPTWReady
add wave -noupdate -expand -group {LSU ARB} -group toLSU /testbench/dut/hart/arbiter/MemAdrMtoLSU
add wave -noupdate /testbench/dut/hart/lsu/DataStall
add wave -noupdate -group csr /testbench/dut/hart/priv/csr/MIP_REGW
add wave -noupdate -group uart /testbench/dut/uncore/genblk4/uart/HCLK
add wave -noupdate -group uart /testbench/dut/uncore/genblk4/uart/HRESETn
add wave -noupdate -group uart /testbench/dut/uncore/genblk4/uart/HSELUART
add wave -noupdate -group uart /testbench/dut/uncore/genblk4/uart/HADDR
add wave -noupdate -group uart /testbench/dut/uncore/genblk4/uart/HWRITE
add wave -noupdate -group uart /testbench/dut/uncore/genblk4/uart/HWDATA
add wave -noupdate -group uart /testbench/dut/uncore/genblk4/uart/HREADUART
add wave -noupdate -group uart /testbench/dut/uncore/genblk4/uart/HRESPUART
add wave -noupdate -group uart /testbench/dut/uncore/genblk4/uart/HREADYUART
add wave -noupdate -group uart /testbench/dut/uncore/genblk4/uart/SIN
add wave -noupdate -group uart /testbench/dut/uncore/genblk4/uart/DSRb
add wave -noupdate -group uart /testbench/dut/uncore/genblk4/uart/DCDb
add wave -noupdate -group uart /testbench/dut/uncore/genblk4/uart/CTSb
add wave -noupdate -group uart /testbench/dut/uncore/genblk4/uart/RIb
add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/genblk4/uart/SOUT
add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/genblk4/uart/RTSb
add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/genblk4/uart/DTRb
add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/genblk4/uart/OUT1b
add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/genblk4/uart/OUT2b
add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/genblk4/uart/INTR
add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/genblk4/uart/TXRDYb
add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/genblk4/uart/RXRDYb
add wave -noupdate -group dtlb /testbench/dut/hart/lsu/dmmu/TLBMiss
add wave -noupdate -group dtlb /testbench/dut/hart/lsu/dmmu/tlb/TLBWrite
add wave -noupdate -group itlb /testbench/dut/hart/ifu/ITLBMissF
add wave -noupdate /testbench/dut/hart/pagetablewalker/StartWalk
add wave -noupdate /testbench/dut/hart/lsu/dmmu/tlb/DisableTranslation
add wave -noupdate -group tlbread /testbench/dut/hart/lsu/dmmu/tlb/VirtualAddress
add wave -noupdate -group tlbread /testbench/dut/hart/lsu/dmmu/tlb/tlbcam/CAMHit
add wave -noupdate -group tlbread /testbench/dut/hart/lsu/dmmu/tlb/tlbcam/VPNIndex
add wave -noupdate -group tlbread /testbench/dut/hart/lsu/dmmu/tlb/tlbcam/HitPageType
add wave -noupdate -group tlbread /testbench/dut/hart/lsu/dmmu/tlb/tlbcam/VirtualPageNumber
add wave -noupdate -group tlbwrite /testbench/dut/hart/lsu/dmmu/tlb/tlbcam/TLBWrite
add wave -noupdate -group tlbwrite /testbench/dut/hart/lsu/dmmu/tlb/PTEWriteVal
add wave -noupdate -group tlbwrite /testbench/dut/hart/lsu/dmmu/tlb/tlbcam/WriteLines
TreeUpdate [SetDefaultTree]
WaveRestoreCursors {{Cursor 4} {32648010 ns} 0} {{Cursor 5} {12105831 ns} 0}
quietly wave cursor active 2
WaveRestoreCursors {{Cursor 8} {4545 ns} 0} {{Cursor 3} {3377 ns} 0} {{Cursor 4} {3215 ns} 0}
quietly wave cursor active 1
configure wave -namecolwidth 250
configure wave -valuecolwidth 189
configure wave -justifyvalue left
@ -259,4 +383,4 @@ configure wave -griddelta 40
configure wave -timeline 0
configure wave -timelineunits ns
update
WaveRestoreZoom {0 ns} {30754715 ns}
WaveRestoreZoom {4209 ns} {4657 ns}

View File

@ -25,53 +25,57 @@
`include "wally-config.vh"
module ICacheCntrl #(parameter BLOCKLEN = 256) (
// Inputs from pipeline
input logic clk, reset,
input logic StallF, StallD,
input logic FlushD,
module ICacheCntrl #(parameter BLOCKLEN = 256)
(
// Inputs from pipeline
input logic clk, reset,
input logic StallF, StallD,
input logic FlushD,
// Input the address to read
// The upper bits of the physical pc
input logic [`PA_BITS-1:0] PCNextF,
input logic [`PA_BITS-1:0] PCPF,
// Signals to/from cache memory
// The read coming out of it
input logic [31:0] ICacheMemReadData,
input logic ICacheMemReadValid,
// The address at which we want to search the cache memory
output logic [`PA_BITS-1:0] PCTagF,
output logic [`PA_BITS-1:0] PCNextIndexF,
output logic ICacheReadEn,
// Load data into the cache
output logic ICacheMemWriteEnable,
output logic [BLOCKLEN-1:0] ICacheMemWriteData,
// Input the address to read
// The upper bits of the physical pc
input logic [`PA_BITS-1:0] PCNextF,
input logic [`PA_BITS-1:0] PCPF,
// Signals to/from cache memory
// The read coming out of it
input logic [31:0] ICacheMemReadData,
input logic ICacheMemReadValid,
// The address at which we want to search the cache memory
output logic [`PA_BITS-1:0] PCTagF,
output logic [`PA_BITS-1:0] PCNextIndexF,
output logic ICacheReadEn,
// Load data into the cache
output logic ICacheMemWriteEnable,
output logic [BLOCKLEN-1:0] ICacheMemWriteData,
// Outputs to rest of ifu
// High if the instruction in the fetch stage is compressed
output logic CompressedF,
// The instruction that was requested
// If this instruction is compressed, upper 16 bits may be the next 16 bits or may be zeros
output logic [31:0] FinalInstrRawF,
// Outputs to rest of ifu
// High if the instruction in the fetch stage is compressed
output logic CompressedF,
// The instruction that was requested
// If this instruction is compressed, upper 16 bits may be the next 16 bits or may be zeros
output logic [31:0] FinalInstrRawF,
// Outputs to pipeline control stuff
output logic ICacheStallF, EndFetchState,
// Outputs to pipeline control stuff
output logic ICacheStallF, EndFetchState,
input logic ITLBMissF,
input logic ITLBWriteF,
input logic WalkerInstrPageFaultF,
// Signals to/from ahblite interface
// A read containing the requested data
input logic [`XLEN-1:0] InstrInF,
input logic InstrAckF,
// The read we request from main memory
output logic [`PA_BITS-1:0] InstrPAdrF,
output logic InstrReadF
);
// Signals to/from ahblite interface
// A read containing the requested data
input logic [`XLEN-1:0] InstrInF,
input logic InstrAckF,
// The read we request from main memory
output logic [`PA_BITS-1:0] InstrPAdrF,
output logic InstrReadF
);
// FSM states
localparam STATE_READY = 0;
localparam STATE_HIT_SPILL = 1; // spill, block 0 hit
localparam STATE_HIT_SPILL_MISS_FETCH_WDV = 2; // block 1 miss, issue read to AHB and wait data.
localparam STATE_HIT_SPILL_MISS_FETCH_DONE = 3; // write data into SRAM/LUT
localparam STATE_HIT_SPILL_MERGE = 4; // Read block 0 of CPU access, should be able to optimize into STATE_HIT_SPILL.
localparam STATE_READY = 'h0;
localparam STATE_HIT_SPILL = 'h1; // spill, block 0 hit
localparam STATE_HIT_SPILL_MISS_FETCH_WDV = 'h2; // block 1 miss, issue read to AHB and wait data.
localparam STATE_HIT_SPILL_MISS_FETCH_DONE = 'h3; // write data into SRAM/LUT
localparam STATE_HIT_SPILL_MERGE = 'h4; // Read block 0 of CPU access, should be able to optimize into STATE_HIT_SPILL.
// a challenge is the spill signal gets us out of the ready state and moves us to
// 1 of the 2 spill branches. However the original fsm design had us return to
@ -87,28 +91,32 @@ module ICacheCntrl #(parameter BLOCKLEN = 256) (
// between CPU stalling and that register.
// Picking option 1.
localparam STATE_HIT_SPILL_FINAL = 5; // this state replicates STATE_READY's replay of the
localparam STATE_HIT_SPILL_FINAL = 'h5; // this state replicates STATE_READY's replay of the
// spill access but does nto consider spill. It also does not do another operation.
localparam STATE_MISS_FETCH_WDV = 6; // aligned miss, issue read to AHB and wait for data.
localparam STATE_MISS_FETCH_DONE = 7; // write data into SRAM/LUT
localparam STATE_MISS_READ = 8; // read block 1 from SRAM/LUT
localparam STATE_MISS_FETCH_WDV = 'h6; // aligned miss, issue read to AHB and wait for data.
localparam STATE_MISS_FETCH_DONE = 'h7; // write data into SRAM/LUT
localparam STATE_MISS_READ = 'h8; // read block 1 from SRAM/LUT
localparam STATE_MISS_SPILL_FETCH_WDV = 9; // spill, miss on block 0, issue read to AHB and wait
localparam STATE_MISS_SPILL_FETCH_DONE = 10; // write data into SRAM/LUT
localparam STATE_MISS_SPILL_READ1 = 11; // read block 0 from SRAM/LUT
localparam STATE_MISS_SPILL_2 = 12; // return to ready if hit or do second block update.
localparam STATE_MISS_SPILL_2_START = 13; // return to ready if hit or do second block update.
localparam STATE_MISS_SPILL_MISS_FETCH_WDV = 14; // miss on block 1, issue read to AHB and wait
localparam STATE_MISS_SPILL_MISS_FETCH_DONE = 15; // write data to SRAM/LUT
localparam STATE_MISS_SPILL_MERGE = 16; // read block 0 of CPU access,
localparam STATE_MISS_SPILL_FETCH_WDV = 'h9; // spill, miss on block 0, issue read to AHB and wait
localparam STATE_MISS_SPILL_FETCH_DONE = 'ha; // write data into SRAM/LUT
localparam STATE_MISS_SPILL_READ1 = 'hb; // read block 0 from SRAM/LUT
localparam STATE_MISS_SPILL_2 = 'hc; // return to ready if hit or do second block update.
localparam STATE_MISS_SPILL_2_START = 'hd; // return to ready if hit or do second block update.
localparam STATE_MISS_SPILL_MISS_FETCH_WDV = 'he; // miss on block 1, issue read to AHB and wait
localparam STATE_MISS_SPILL_MISS_FETCH_DONE = 'hf; // write data to SRAM/LUT
localparam STATE_MISS_SPILL_MERGE = 'h10; // read block 0 of CPU access,
localparam STATE_MISS_SPILL_FINAL = 17; // this state replicates STATE_READY's replay of the
localparam STATE_MISS_SPILL_FINAL = 'h11; // this state replicates STATE_READY's replay of the
// spill access but does nto consider spill. It also does not do another operation.
localparam STATE_INVALIDATE = 18; // *** not sure if invalidate or evict? invalidate by cache block or address?
localparam STATE_INVALIDATE = 'h12; // *** not sure if invalidate or evict? invalidate by cache block or address?
localparam STATE_TLB_MISS = 'h13;
localparam STATE_TLB_MISS_DONE = 'h14;
localparam AHBByteLength = `XLEN / 8;
localparam AHBOFFETWIDTH = $clog2(AHBByteLength);
@ -119,39 +127,39 @@ module ICacheCntrl #(parameter BLOCKLEN = 256) (
localparam WORDSPERLINE = BLOCKLEN/`XLEN;
localparam LOGWPL = $clog2(WORDSPERLINE);
localparam integer PA_WIDTH = `PA_BITS - 2;
localparam integer PA_WIDTH = `PA_BITS - 2;
logic [4:0] CurrState, NextState;
logic hit, spill;
logic SavePC;
logic [1:0] PCMux;
logic CntReset;
logic PreCntEn, CntEn;
logic spillSave;
logic UnalignedSelect;
logic FetchCountFlag;
logic [4:0] CurrState, NextState;
logic hit, spill;
logic SavePC;
logic [1:0] PCMux;
logic CntReset;
logic PreCntEn, CntEn;
logic spillSave;
logic UnalignedSelect;
logic FetchCountFlag;
localparam FetchCountThreshold = WORDSPERLINE - 1;
logic [LOGWPL:0] FetchCount, NextFetchCount;
logic [LOGWPL:0] FetchCount, NextFetchCount;
logic [`PA_BITS-1:0] PCPreFinalF, PCPSpillF;
logic [`PA_BITS-1:0] PCPreFinalF, PCPSpillF;
logic [`PA_BITS-1:OFFSETWIDTH] PCPTrunkF;
logic [15:0] SpillDataBlock0;
logic [15:0] SpillDataBlock0;
localparam [31:0] NOP = 32'h13;
logic reset_q;
logic [1:0] PCMux_q;
logic reset_q;
logic [1:0] PCMux_q;
// Misaligned signals
//logic [`XLEN:0] MisalignedInstrRawF;
//logic MisalignedStall;
// Cache fault signals
//logic FaultStall;
// Misaligned signals
//logic [`XLEN:0] MisalignedInstrRawF;
//logic MisalignedStall;
// Cache fault signals
//logic FaultStall;
// on spill we want to get the first 2 bytes of the next cache block.
// the spill only occurs if the PCPF mod BlockByteLength == -2. Therefore we can
@ -175,7 +183,7 @@ module ICacheCntrl #(parameter BLOCKLEN = 256) (
// truncate the offset from PCPF for memory address generation
assign PCPTrunkF = PCTagF[`PA_BITS-1:OFFSETWIDTH];
// Detect if the instruction is compressed
// Detect if the instruction is compressed
assign CompressedF = FinalInstrRawF[1:0] != 2'b11;
@ -205,167 +213,175 @@ module ICacheCntrl #(parameter BLOCKLEN = 256) (
ICacheStallF = 1'b1;
case (CurrState)
STATE_READY: begin
PCMux = 2'b00;
ICacheReadEn = 1'b1;
if (hit & ~spill) begin
SavePC = 1'b1;
ICacheStallF = 1'b0;
NextState = STATE_READY;
end else if (hit & spill) begin
spillSave = 1'b1;
PCMux = 2'b10;
NextState = STATE_HIT_SPILL;
end else if (~hit & ~spill) begin
CntReset = 1'b1;
NextState = STATE_MISS_FETCH_WDV;
end else if (~hit & spill) begin
CntReset = 1'b1;
PCMux = 2'b01;
NextState = STATE_MISS_SPILL_FETCH_WDV;
end else begin
PCMux = 2'b00;
ICacheReadEn = 1'b1;
if (ITLBMissF) begin
NextState = STATE_TLB_MISS;
end else if (hit & ~spill) begin
SavePC = 1'b1;
ICacheStallF = 1'b0;
NextState = STATE_READY;
end
end else if (hit & spill) begin
spillSave = 1'b1;
PCMux = 2'b10;
NextState = STATE_HIT_SPILL;
end else if (~hit & ~spill) begin
CntReset = 1'b1;
NextState = STATE_MISS_FETCH_WDV;
end else if (~hit & spill) begin
CntReset = 1'b1;
PCMux = 2'b01;
NextState = STATE_MISS_SPILL_FETCH_WDV;
end else begin
NextState = STATE_READY;
end
end
// branch 1, hit spill and 2, miss spill hit
STATE_HIT_SPILL: begin
PCMux = 2'b10;
UnalignedSelect = 1'b1;
ICacheReadEn = 1'b1;
if (hit) begin
PCMux = 2'b10;
UnalignedSelect = 1'b1;
ICacheReadEn = 1'b1;
if (hit) begin
NextState = STATE_HIT_SPILL_FINAL;
end else begin
CntReset = 1'b1;
end else begin
CntReset = 1'b1;
NextState = STATE_HIT_SPILL_MISS_FETCH_WDV;
end
end
end
STATE_HIT_SPILL_MISS_FETCH_WDV: begin
PCMux = 2'b10;
//InstrReadF = 1'b1;
PreCntEn = 1'b1;
if (FetchCountFlag & InstrAckF) begin
NextState = STATE_HIT_SPILL_MISS_FETCH_DONE;
end else begin
NextState = STATE_HIT_SPILL_MISS_FETCH_WDV;
end
PCMux = 2'b10;
//InstrReadF = 1'b1;
PreCntEn = 1'b1;
if (FetchCountFlag & InstrAckF) begin
NextState = STATE_HIT_SPILL_MISS_FETCH_DONE;
end else begin
NextState = STATE_HIT_SPILL_MISS_FETCH_WDV;
end
end
STATE_HIT_SPILL_MISS_FETCH_DONE: begin
PCMux = 2'b10;
ICacheMemWriteEnable = 1'b1;
PCMux = 2'b10;
ICacheMemWriteEnable = 1'b1;
NextState = STATE_HIT_SPILL_MERGE;
end
STATE_HIT_SPILL_MERGE: begin
PCMux = 2'b10;
UnalignedSelect = 1'b1;
ICacheReadEn = 1'b1;
PCMux = 2'b10;
UnalignedSelect = 1'b1;
ICacheReadEn = 1'b1;
NextState = STATE_HIT_SPILL_FINAL;
end
STATE_HIT_SPILL_FINAL: begin
ICacheReadEn = 1'b1;
PCMux = 2'b00;
UnalignedSelect = 1'b1;
SavePC = 1'b1;
NextState = STATE_READY;
ICacheStallF = 1'b0;
ICacheReadEn = 1'b1;
PCMux = 2'b00;
UnalignedSelect = 1'b1;
SavePC = 1'b1;
NextState = STATE_READY;
ICacheStallF = 1'b0;
end
// branch 3 miss no spill
STATE_MISS_FETCH_WDV: begin
PCMux = 2'b01;
//InstrReadF = 1'b1;
PreCntEn = 1'b1;
if (FetchCountFlag & InstrAckF) begin
NextState = STATE_MISS_FETCH_DONE;
end else begin
NextState = STATE_MISS_FETCH_WDV;
end
PCMux = 2'b01;
//InstrReadF = 1'b1;
PreCntEn = 1'b1;
if (FetchCountFlag & InstrAckF) begin
NextState = STATE_MISS_FETCH_DONE;
end else begin
NextState = STATE_MISS_FETCH_WDV;
end
end
STATE_MISS_FETCH_DONE: begin
PCMux = 2'b01;
ICacheMemWriteEnable = 1'b1;
PCMux = 2'b01;
ICacheMemWriteEnable = 1'b1;
NextState = STATE_MISS_READ;
end
STATE_MISS_READ: begin
PCMux = 2'b01;
ICacheReadEn = 1'b1;
NextState = STATE_READY;
PCMux = 2'b01;
ICacheReadEn = 1'b1;
NextState = STATE_READY;
end
// branch 4 miss spill hit, and 5 miss spill miss
STATE_MISS_SPILL_FETCH_WDV: begin
PCMux = 2'b01;
PreCntEn = 1'b1;
//InstrReadF = 1'b1;
if (FetchCountFlag & InstrAckF) begin
NextState = STATE_MISS_SPILL_FETCH_DONE;
end else begin
NextState = STATE_MISS_SPILL_FETCH_WDV;
end
PCMux = 2'b01;
PreCntEn = 1'b1;
//InstrReadF = 1'b1;
if (FetchCountFlag & InstrAckF) begin
NextState = STATE_MISS_SPILL_FETCH_DONE;
end else begin
NextState = STATE_MISS_SPILL_FETCH_WDV;
end
end
STATE_MISS_SPILL_FETCH_DONE: begin
PCMux = 2'b01;
ICacheMemWriteEnable = 1'b1;
NextState = STATE_MISS_SPILL_READ1;
PCMux = 2'b01;
ICacheMemWriteEnable = 1'b1;
NextState = STATE_MISS_SPILL_READ1;
end
STATE_MISS_SPILL_READ1: begin // always be a hit as we just wrote that cache block.
PCMux = 2'b01; // there is a 1 cycle delay after setting the address before the date arrives.
ICacheReadEn = 1'b1;
NextState = STATE_MISS_SPILL_2;
PCMux = 2'b01; // there is a 1 cycle delay after setting the address before the date arrives.
ICacheReadEn = 1'b1;
NextState = STATE_MISS_SPILL_2;
end
STATE_MISS_SPILL_2: begin
PCMux = 2'b10;
UnalignedSelect = 1'b1;
spillSave = 1'b1; /// *** Could pipeline these to make it clearer in the fsm.
ICacheReadEn = 1'b1;
NextState = STATE_MISS_SPILL_2_START;
PCMux = 2'b10;
UnalignedSelect = 1'b1;
spillSave = 1'b1; /// *** Could pipeline these to make it clearer in the fsm.
ICacheReadEn = 1'b1;
NextState = STATE_MISS_SPILL_2_START;
end
STATE_MISS_SPILL_2_START: begin
if (~hit) begin
CntReset = 1'b1;
NextState = STATE_MISS_SPILL_MISS_FETCH_WDV;
end else begin
NextState = STATE_READY;
ICacheReadEn = 1'b1;
PCMux = 2'b00;
UnalignedSelect = 1'b1;
SavePC = 1'b1;
ICacheStallF = 1'b0;
end
if (~hit) begin
CntReset = 1'b1;
NextState = STATE_MISS_SPILL_MISS_FETCH_WDV;
end else begin
NextState = STATE_READY;
ICacheReadEn = 1'b1;
PCMux = 2'b00;
UnalignedSelect = 1'b1;
SavePC = 1'b1;
ICacheStallF = 1'b0;
end
end
STATE_MISS_SPILL_MISS_FETCH_WDV: begin
PCMux = 2'b10;
PreCntEn = 1'b1;
//InstrReadF = 1'b1;
if (FetchCountFlag & InstrAckF) begin
NextState = STATE_MISS_SPILL_MISS_FETCH_DONE;
end else begin
NextState = STATE_MISS_SPILL_MISS_FETCH_WDV;
end
PCMux = 2'b10;
PreCntEn = 1'b1;
//InstrReadF = 1'b1;
if (FetchCountFlag & InstrAckF) begin
NextState = STATE_MISS_SPILL_MISS_FETCH_DONE;
end else begin
NextState = STATE_MISS_SPILL_MISS_FETCH_WDV;
end
end
STATE_MISS_SPILL_MISS_FETCH_DONE: begin
PCMux = 2'b10;
ICacheMemWriteEnable = 1'b1;
NextState = STATE_MISS_SPILL_MERGE;
PCMux = 2'b10;
ICacheMemWriteEnable = 1'b1;
NextState = STATE_MISS_SPILL_MERGE;
end
STATE_MISS_SPILL_MERGE: begin
PCMux = 2'b10;
UnalignedSelect = 1'b1;
ICacheReadEn = 1'b1;
PCMux = 2'b10;
UnalignedSelect = 1'b1;
ICacheReadEn = 1'b1;
NextState = STATE_MISS_SPILL_FINAL;
end
STATE_MISS_SPILL_FINAL: begin
ICacheReadEn = 1'b1;
PCMux = 2'b00;
UnalignedSelect = 1'b1;
SavePC = 1'b1;
ICacheStallF = 1'b0;
NextState = STATE_READY;
ICacheReadEn = 1'b1;
PCMux = 2'b00;
UnalignedSelect = 1'b1;
SavePC = 1'b1;
ICacheStallF = 1'b0;
NextState = STATE_READY;
end
STATE_TLB_MISS: begin
if (ITLBWriteF | WalkerInstrPageFaultF) begin
NextState = STATE_TLB_MISS_DONE;
end else begin
NextState = STATE_TLB_MISS;
end
end
STATE_TLB_MISS_DONE : begin
NextState = STATE_READY;
end
default: begin
PCMux = 2'b01;
NextState = STATE_READY;
PCMux = 2'b01;
NextState = STATE_READY;
end
// *** add in error handling and invalidate/evict
endcase
@ -407,7 +423,7 @@ module ICacheCntrl #(parameter BLOCKLEN = 256) (
// store read data from memory interface before writing into SRAM.
genvar i;
genvar i;
generate
for (i = 0; i < WORDSPERLINE; i++) begin
flopenr #(`XLEN) flop(.clk(clk),

View File

@ -28,24 +28,28 @@
module icache
(
// Basic pipeline stuff
input logic clk, reset,
input logic StallF, StallD,
input logic FlushD,
input logic clk, reset,
input logic StallF, StallD,
input logic FlushD,
input logic [`PA_BITS-1:0] PCNextF,
input logic [`PA_BITS-1:0] PCPF,
// Data read in from the ebu unit
input logic [`XLEN-1:0] InstrInF,
input logic InstrAckF,
input logic [`XLEN-1:0] InstrInF,
input logic InstrAckF,
// Read requested from the ebu unit
output logic [`PA_BITS-1:0] InstrPAdrF,
output logic InstrReadF,
output logic InstrReadF,
// High if the instruction currently in the fetch stage is compressed
output logic CompressedF,
output logic CompressedF,
// High if the icache is requesting a stall
output logic ICacheStallF,
output logic ICacheStallF,
input logic ITLBMissF,
input logic ITLBWriteF,
input logic WalkerInstrPageFaultF,
// The raw (not decompressed) instruction that was requested
// If this instruction is compressed, upper 16 bits may be the next 16 bits or may be zeros
output logic [31:0] FinalInstrRawF
output logic [31:0] FinalInstrRawF
);
// Configuration parameters

View File

@ -51,18 +51,20 @@ module ahblite (
input logic MemReadM, MemWriteM,
input logic [`XLEN-1:0] WriteDataM,
input logic [1:0] MemSizeM,
//output logic DataStall,
// Signals from MMU
/* -----\/----- EXCLUDED -----\/-----
input logic MMUStall,
input logic [`XLEN-1:0] MMUPAdr,
input logic MMUTranslate,
output logic [`XLEN-1:0] MMUReadPTE,
output logic MMUReady,
-----/\----- EXCLUDED -----/\----- */
// Signals from PMA checker
input logic DSquashBusAccessM, ISquashBusAccessF,
// Signals to PMA checker (metadata of proposed access)
output logic AtomicAccessM, ExecuteAccessF, WriteAccessM, ReadAccessM,
// Return from bus
output logic [`XLEN-1:0] ReadDataW,
output logic [`XLEN-1:0] HRDATAW,
// AHB-Lite external signals
input logic [`AHBW-1:0] HRDATA,
input logic HREADY, HRESP,
@ -80,14 +82,13 @@ module ahblite (
output logic [3:0] HSIZED,
output logic HWRITED,
// Stalls
output logic /*InstrUpdate, */DataStall,
output logic CommitM, MemAckW
);
logic GrantData;
logic [31:0] AccessAddress;
logic [2:0] AccessSize, PTESize, ISize;
logic [`AHBW-1:0] HRDATAMasked, ReadDataM, CapturedData, ReadDataWnext, WriteData;
logic [`AHBW-1:0] HRDATAMasked, ReadDataM, CapturedHRDATAMasked, HRDATANext, WriteData;
logic IReady, DReady;
logic CaptureDataM,CapturedDataAvailable;
@ -115,14 +116,16 @@ module ahblite (
// interface that might be used in place of the ahblite.
always_comb
case (BusState)
IDLE: if (MMUTranslate) ProposedNextBusState = MMUTRANSLATE;
else if (AtomicMaskedM[1]) ProposedNextBusState = ATOMICREAD;
IDLE: /*if (MMUTranslate) ProposedNextBusState = MMUTRANSLATE;
else*/ if (AtomicMaskedM[1]) ProposedNextBusState = ATOMICREAD;
else if (MemReadM) ProposedNextBusState = MEMREAD; // Memory has priority over instructions
else if (MemWriteM) ProposedNextBusState = MEMWRITE;
else if (InstrReadF) ProposedNextBusState = INSTRREAD;
else ProposedNextBusState = IDLE;
/* -----\/----- EXCLUDED -----\/-----
MMUTRANSLATE: if (~HREADY) ProposedNextBusState = MMUTRANSLATE;
else ProposedNextBusState = IDLE;
-----/\----- EXCLUDED -----/\----- */
ATOMICREAD: if (~HREADY) ProposedNextBusState = ATOMICREAD;
else ProposedNextBusState = ATOMICWRITE;
ATOMICWRITE: if (~HREADY) ProposedNextBusState = ATOMICWRITE;
@ -140,21 +143,21 @@ module ahblite (
endcase
// Determine access type (important for determining whether to fault)
assign AtomicAccessM = (ProposedNextBusState == ATOMICREAD) || (ProposedNextBusState == ATOMICWRITE);
assign ExecuteAccessF = (ProposedNextBusState == INSTRREAD);
assign WriteAccessM = (ProposedNextBusState == MEMWRITE) || (ProposedNextBusState == ATOMICWRITE);
assign ReadAccessM = (ProposedNextBusState == MEMREAD) || (ProposedNextBusState == ATOMICREAD) ||
(ProposedNextBusState == MMUTRANSLATE);
// (ProposedNextBusState == MMUTRANSLATE);
// The PMA and PMP checkers can decide to squash the access
// *** this probably needs to be controlled by the caches rather than EBU dh 7/2/11
assign NextBusState = (DSquashBusAccessM || ISquashBusAccessF) ? IDLE : ProposedNextBusState;
// stall signals
// Note that we need to extend both stalls when MMUTRANSLATE goes to idle,
// since translation might not be complete.
// *** Ross Thompson remove this datastall
/* -----\/----- EXCLUDED -----\/-----
assign #2 DataStall = ((NextBusState == MEMREAD) || (NextBusState == MEMWRITE) ||
(NextBusState == ATOMICREAD) || (NextBusState == ATOMICWRITE) ||
MMUStall);
(NextBusState == ATOMICREAD) || (NextBusState == ATOMICWRITE));
-----/\----- EXCLUDED -----/\----- */
//assign #1 InstrStall = ((NextBusState == INSTRREAD) || (NextBusState == INSTRREADC) ||
// MMUStall);
@ -163,14 +166,16 @@ module ahblite (
assign #1 GrantData = (ProposedNextBusState == MEMREAD) || (ProposedNextBusState == MEMWRITE) ||
(ProposedNextBusState == ATOMICREAD) || (ProposedNextBusState == ATOMICWRITE);
assign #1 AccessAddress = (GrantData) ? MemPAdrM[31:0] : InstrPAdrF[31:0];
assign #1 HADDR = (MMUTranslate) ? MMUPAdr[31:0] : AccessAddress;
//assign #1 HADDR = (MMUTranslate) ? MMUPAdr[31:0] : AccessAddress;
assign #1 HADDR = AccessAddress;
generate
if (`XLEN == 32) assign PTESize = 3'b010; // in rv32, PTEs are 4 bytes
else assign PTESize = 3'b011; // in rv64, PTEs are 8 bytes
endgenerate
assign ISize = 3'b010; // 32 bit instructions for now; later improve for filling cache with full width; ignored on reads anyway
assign #1 AccessSize = (GrantData) ? {1'b0, MemSizeM} : ISize;
assign #1 HSIZE = (MMUTranslate) ? PTESize : AccessSize;
//assign #1 HSIZE = (MMUTranslate) ? PTESize : AccessSize;
assign #1 HSIZE = AccessSize;
assign HBURST = 3'b000; // Single burst only supported; consider generalizing for cache fillsfH
assign HPROT = 4'b0011; // not used; see Section 3.7
assign HTRANS = (NextBusState != IDLE) ? 2'b10 : 2'b00; // NONSEQ if reading or writing, IDLE otherwise
@ -186,7 +191,7 @@ module ahblite (
// Route signals to Instruction and Data Caches
// *** assumes AHBW = XLEN
assign MMUReady = (BusState == MMUTRANSLATE && HREADY);
//assign MMUReady = (BusState == MMUTRANSLATE && HREADY);
assign InstrRData = HRDATA;
assign InstrAckF = (BusState == INSTRREAD) && (NextBusState != INSTRREAD);
@ -194,15 +199,14 @@ module ahblite (
// *** Bracker 6/5/21: why is this W stage?
assign MemAckW = (BusState == MEMREAD) && (NextBusState != MEMREAD) || (BusState == MEMWRITE) && (NextBusState != MEMWRITE) ||
((BusState == ATOMICREAD) && (NextBusState != ATOMICREAD)) || ((BusState == ATOMICWRITE) && (NextBusState != ATOMICWRITE));
assign MMUReadPTE = HRDATA;
assign ReadDataM = HRDATAMasked; // changed from W to M dh 2/7/2021
//assign MMUReadPTE = HRDATA;
// Carefully decide when to update ReadDataW
// ReadDataMstored holds the most recent memory read.
// We need to wait until the pipeline actually advances before we can update the contents of ReadDataW
// (or else the W stage will accidentally get the M stage's data when the pipeline does advance).
assign CaptureDataM = ((BusState == MEMREAD) && (NextBusState != MEMREAD)) ||
((BusState == ATOMICREAD) && (NextBusState != ATOMICREAD));
flopenr #(`XLEN) ReadDataNewWReg(clk, reset, CaptureDataM, ReadDataM, CapturedData);
flopenr #(`XLEN) ReadDataNewWReg(clk, reset, CaptureDataM, HRDATAMasked, CapturedHRDATAMasked);
always @(posedge HCLK, negedge HRESETn)
if (~HRESETn)
@ -211,11 +215,11 @@ module ahblite (
CapturedDataAvailable <= #1 (StallW) ? (CaptureDataM | CapturedDataAvailable) : 1'b0;
always_comb
casez({StallW && (BusState != ATOMICREAD),CapturedDataAvailable})
2'b00: ReadDataWnext = ReadDataM;
2'b01: ReadDataWnext = CapturedData;
2'b1?: ReadDataWnext = ReadDataW;
2'b00: HRDATANext = HRDATAMasked;
2'b01: HRDATANext = CapturedHRDATAMasked;
2'b1?: HRDATANext = HRDATAW;
endcase
flopr #(`XLEN) ReadDataOldWReg(clk, reset, ReadDataWnext, ReadDataW);
flopr #(`XLEN) ReadDataOldWReg(clk, reset, HRDATANext, HRDATAW);
// Extract and sign-extend subwords if necessary
subwordread swr(.*);
@ -226,7 +230,7 @@ module ahblite (
logic [`XLEN-1:0] AMOResult;
// amoalu amoalu(.a(HRDATA), .b(WriteDataM), .funct(Funct7M), .width(MemSizeM),
// .result(AMOResult));
amoalu amoalu(.srca(ReadDataW), .srcb(WriteDataM), .funct(Funct7M), .width(MemSizeM),
amoalu amoalu(.srca(HRDATAW), .srcb(WriteDataM), .funct(Funct7M), .width(MemSizeM),
.result(AMOResult));
mux2 #(`XLEN) wdmux(WriteDataM, AMOResult, AtomicMaskedM[1], WriteData);
end else

View File

@ -1,599 +0,0 @@
// Brent-Kung Carry-save Prefix Adder
module bk128 (cout, sum, a, b, cin);
input [127:0] a, b;
input cin;
output [127:0] sum;
output cout;
wire [128:0] p,g,t;
wire [127:0] c;
// pre-computation
assign p={a^b,1'b0};
assign g={a&b, cin};
assign t[1]=p[1];
assign t[2]=p[2];
assign t[3]=p[3]^g[2];
assign t[4]=p[4];
assign t[5]=p[5]^g[4];
assign t[6]=p[6];
assign t[7]=p[7]^g[6];
assign t[8]=p[8];
assign t[9]=p[9]^g[8];
assign t[10]=p[10];
assign t[11]=p[11]^g[10];
assign t[12]=p[12];
assign t[13]=p[13]^g[12];
assign t[14]=p[14];
assign t[15]=p[15]^g[14];
assign t[16]=p[16];
assign t[17]=p[17]^g[16];
assign t[18]=p[18];
assign t[19]=p[19]^g[18];
assign t[20]=p[20];
assign t[21]=p[21]^g[20];
assign t[22]=p[22];
assign t[23]=p[23]^g[22];
assign t[24]=p[24];
assign t[25]=p[25]^g[24];
assign t[26]=p[26];
assign t[27]=p[27]^g[26];
assign t[28]=p[28];
assign t[29]=p[29]^g[28];
assign t[30]=p[30];
assign t[31]=p[31]^g[30];
assign t[32]=p[32];
assign t[33]=p[33]^g[32];
assign t[34]=p[34];
assign t[35]=p[35]^g[34];
assign t[36]=p[36];
assign t[37]=p[37]^g[36];
assign t[38]=p[38];
assign t[39]=p[39]^g[38];
assign t[40]=p[40];
assign t[41]=p[41]^g[40];
assign t[42]=p[42];
assign t[43]=p[43]^g[42];
assign t[44]=p[44];
assign t[45]=p[45]^g[44];
assign t[46]=p[46];
assign t[47]=p[47]^g[46];
assign t[48]=p[48];
assign t[49]=p[49]^g[48];
assign t[50]=p[50];
assign t[51]=p[51]^g[50];
assign t[52]=p[52];
assign t[53]=p[53]^g[52];
assign t[54]=p[54];
assign t[55]=p[55]^g[54];
assign t[56]=p[56];
assign t[57]=p[57]^g[56];
assign t[58]=p[58];
assign t[59]=p[59]^g[58];
assign t[60]=p[60];
assign t[61]=p[61]^g[60];
assign t[62]=p[62];
assign t[63]=p[63]^g[62];
assign t[64]=p[64];
assign t[65]=p[65]^g[64];
assign t[66]=p[66];
assign t[67]=p[67]^g[66];
assign t[68]=p[68];
assign t[69]=p[69]^g[68];
assign t[70]=p[70];
assign t[71]=p[71]^g[70];
assign t[72]=p[72];
assign t[73]=p[73]^g[72];
assign t[74]=p[74];
assign t[75]=p[75]^g[74];
assign t[76]=p[76];
assign t[77]=p[77]^g[76];
assign t[78]=p[78];
assign t[79]=p[79]^g[78];
assign t[80]=p[80];
assign t[81]=p[81]^g[80];
assign t[82]=p[82];
assign t[83]=p[83]^g[82];
assign t[84]=p[84];
assign t[85]=p[85]^g[84];
assign t[86]=p[86];
assign t[87]=p[87]^g[86];
assign t[88]=p[88];
assign t[89]=p[89]^g[88];
assign t[90]=p[90];
assign t[91]=p[91]^g[90];
assign t[92]=p[92];
assign t[93]=p[93]^g[92];
assign t[94]=p[94];
assign t[95]=p[95]^g[94];
assign t[96]=p[96];
assign t[97]=p[97]^g[96];
assign t[98]=p[98];
assign t[99]=p[99]^g[98];
assign t[100]=p[100];
assign t[101]=p[101]^g[100];
assign t[102]=p[102];
assign t[103]=p[103]^g[102];
assign t[104]=p[104];
assign t[105]=p[105]^g[104];
assign t[106]=p[106];
assign t[107]=p[107]^g[106];
assign t[108]=p[108];
assign t[109]=p[109]^g[108];
assign t[110]=p[110];
assign t[111]=p[111]^g[110];
assign t[112]=p[112];
assign t[113]=p[113]^g[112];
assign t[114]=p[114];
assign t[115]=p[115]^g[114];
assign t[116]=p[116];
assign t[117]=p[117]^g[116];
assign t[118]=p[118];
assign t[119]=p[119]^g[118];
assign t[120]=p[120];
assign t[121]=p[121]^g[120];
assign t[122]=p[122];
assign t[123]=p[123]^g[122];
assign t[124]=p[124];
assign t[125]=p[125]^g[124];
assign t[126]=p[126];
assign t[127]=p[127]^g[126];
assign t[128]=p[128];
// prefix tree
brent_kung_cs128 prefix_tree(c, p[127:0], g[127:0]);
// post-computation
assign sum=p[128:1]^c;
assign cout=g[128]|(p[128]&c[127]);
endmodule
module brent_kung_cs128 (c, p, g);
input [127:0] p;
input [127:0] g;
output [128:1] c;
// parallel-prefix, Brent-Kung
// Stage 1: Generates G/P pairs that span 1 bits
grey b_1_0 (G_1_0, {g[1],g[0]}, p[1]);
black b_3_2 (G_3_2, P_3_2, {g[3],g[2]}, {p[3],p[2]});
black b_5_4 (G_5_4, P_5_4, {g[5],g[4]}, {p[5],p[4]});
black b_7_6 (G_7_6, P_7_6, {g[7],g[6]}, {p[7],p[6]});
black b_9_8 (G_9_8, P_9_8, {g[9],g[8]}, {p[9],p[8]});
black b_11_10 (G_11_10, P_11_10, {g[11],g[10]}, {p[11],p[10]});
black b_13_12 (G_13_12, P_13_12, {g[13],g[12]}, {p[13],p[12]});
black b_15_14 (G_15_14, P_15_14, {g[15],g[14]}, {p[15],p[14]});
black b_17_16 (G_17_16, P_17_16, {g[17],g[16]}, {p[17],p[16]});
black b_19_18 (G_19_18, P_19_18, {g[19],g[18]}, {p[19],p[18]});
black b_21_20 (G_21_20, P_21_20, {g[21],g[20]}, {p[21],p[20]});
black b_23_22 (G_23_22, P_23_22, {g[23],g[22]}, {p[23],p[22]});
black b_25_24 (G_25_24, P_25_24, {g[25],g[24]}, {p[25],p[24]});
black b_27_26 (G_27_26, P_27_26, {g[27],g[26]}, {p[27],p[26]});
black b_29_28 (G_29_28, P_29_28, {g[29],g[28]}, {p[29],p[28]});
black b_31_30 (G_31_30, P_31_30, {g[31],g[30]}, {p[31],p[30]});
black b_33_32 (G_33_32, P_33_32, {g[33],g[32]}, {p[33],p[32]});
black b_35_34 (G_35_34, P_35_34, {g[35],g[34]}, {p[35],p[34]});
black b_37_36 (G_37_36, P_37_36, {g[37],g[36]}, {p[37],p[36]});
black b_39_38 (G_39_38, P_39_38, {g[39],g[38]}, {p[39],p[38]});
black b_41_40 (G_41_40, P_41_40, {g[41],g[40]}, {p[41],p[40]});
black b_43_42 (G_43_42, P_43_42, {g[43],g[42]}, {p[43],p[42]});
black b_45_44 (G_45_44, P_45_44, {g[45],g[44]}, {p[45],p[44]});
black b_47_46 (G_47_46, P_47_46, {g[47],g[46]}, {p[47],p[46]});
black b_49_48 (G_49_48, P_49_48, {g[49],g[48]}, {p[49],p[48]});
black b_51_50 (G_51_50, P_51_50, {g[51],g[50]}, {p[51],p[50]});
black b_53_52 (G_53_52, P_53_52, {g[53],g[52]}, {p[53],p[52]});
black b_55_54 (G_55_54, P_55_54, {g[55],g[54]}, {p[55],p[54]});
black b_57_56 (G_57_56, P_57_56, {g[57],g[56]}, {p[57],p[56]});
black b_59_58 (G_59_58, P_59_58, {g[59],g[58]}, {p[59],p[58]});
black b_61_60 (G_61_60, P_61_60, {g[61],g[60]}, {p[61],p[60]});
black b_63_62 (G_63_62, P_63_62, {g[63],g[62]}, {p[63],p[62]});
black b_65_64 (G_65_64, P_65_64, {g[65],g[64]}, {p[65],p[64]});
black b_67_66 (G_67_66, P_67_66, {g[67],g[66]}, {p[67],p[66]});
black b_69_68 (G_69_68, P_69_68, {g[69],g[68]}, {p[69],p[68]});
black b_71_70 (G_71_70, P_71_70, {g[71],g[70]}, {p[71],p[70]});
black b_73_72 (G_73_72, P_73_72, {g[73],g[72]}, {p[73],p[72]});
black b_75_74 (G_75_74, P_75_74, {g[75],g[74]}, {p[75],p[74]});
black b_77_76 (G_77_76, P_77_76, {g[77],g[76]}, {p[77],p[76]});
black b_79_78 (G_79_78, P_79_78, {g[79],g[78]}, {p[79],p[78]});
black b_81_80 (G_81_80, P_81_80, {g[81],g[80]}, {p[81],p[80]});
black b_83_82 (G_83_82, P_83_82, {g[83],g[82]}, {p[83],p[82]});
black b_85_84 (G_85_84, P_85_84, {g[85],g[84]}, {p[85],p[84]});
black b_87_86 (G_87_86, P_87_86, {g[87],g[86]}, {p[87],p[86]});
black b_89_88 (G_89_88, P_89_88, {g[89],g[88]}, {p[89],p[88]});
black b_91_90 (G_91_90, P_91_90, {g[91],g[90]}, {p[91],p[90]});
black b_93_92 (G_93_92, P_93_92, {g[93],g[92]}, {p[93],p[92]});
black b_95_94 (G_95_94, P_95_94, {g[95],g[94]}, {p[95],p[94]});
black b_97_96 (G_97_96, P_97_96, {g[97],g[96]}, {p[97],p[96]});
black b_99_98 (G_99_98, P_99_98, {g[99],g[98]}, {p[99],p[98]});
black b_101_100 (G_101_100, P_101_100, {g[101],g[100]}, {p[101],p[100]});
black b_103_102 (G_103_102, P_103_102, {g[103],g[102]}, {p[103],p[102]});
black b_105_104 (G_105_104, P_105_104, {g[105],g[104]}, {p[105],p[104]});
black b_107_106 (G_107_106, P_107_106, {g[107],g[106]}, {p[107],p[106]});
black b_109_108 (G_109_108, P_109_108, {g[109],g[108]}, {p[109],p[108]});
black b_111_110 (G_111_110, P_111_110, {g[111],g[110]}, {p[111],p[110]});
black b_113_112 (G_113_112, P_113_112, {g[113],g[112]}, {p[113],p[112]});
black b_115_114 (G_115_114, P_115_114, {g[115],g[114]}, {p[115],p[114]});
black b_117_116 (G_117_116, P_117_116, {g[117],g[116]}, {p[117],p[116]});
black b_119_118 (G_119_118, P_119_118, {g[119],g[118]}, {p[119],p[118]});
black b_121_120 (G_121_120, P_121_120, {g[121],g[120]}, {p[121],p[120]});
black b_123_122 (G_123_122, P_123_122, {g[123],g[122]}, {p[123],p[122]});
black b_125_124 (G_125_124, P_125_124, {g[125],g[124]}, {p[125],p[124]});
black b_127_126 (G_127_126, P_127_126, {g[127],g[126]}, {p[127],p[126]});
// Stage 2: Generates G/P pairs that span 2 bits
grey g_3_0 (G_3_0, {G_3_2,G_1_0}, P_3_2);
black b_7_4 (G_7_4, P_7_4, {G_7_6,G_5_4}, {P_7_6,P_5_4});
black b_11_8 (G_11_8, P_11_8, {G_11_10,G_9_8}, {P_11_10,P_9_8});
black b_15_12 (G_15_12, P_15_12, {G_15_14,G_13_12}, {P_15_14,P_13_12});
black b_19_16 (G_19_16, P_19_16, {G_19_18,G_17_16}, {P_19_18,P_17_16});
black b_23_20 (G_23_20, P_23_20, {G_23_22,G_21_20}, {P_23_22,P_21_20});
black b_27_24 (G_27_24, P_27_24, {G_27_26,G_25_24}, {P_27_26,P_25_24});
black b_31_28 (G_31_28, P_31_28, {G_31_30,G_29_28}, {P_31_30,P_29_28});
black b_35_32 (G_35_32, P_35_32, {G_35_34,G_33_32}, {P_35_34,P_33_32});
black b_39_36 (G_39_36, P_39_36, {G_39_38,G_37_36}, {P_39_38,P_37_36});
black b_43_40 (G_43_40, P_43_40, {G_43_42,G_41_40}, {P_43_42,P_41_40});
black b_47_44 (G_47_44, P_47_44, {G_47_46,G_45_44}, {P_47_46,P_45_44});
black b_51_48 (G_51_48, P_51_48, {G_51_50,G_49_48}, {P_51_50,P_49_48});
black b_55_52 (G_55_52, P_55_52, {G_55_54,G_53_52}, {P_55_54,P_53_52});
black b_59_56 (G_59_56, P_59_56, {G_59_58,G_57_56}, {P_59_58,P_57_56});
black b_63_60 (G_63_60, P_63_60, {G_63_62,G_61_60}, {P_63_62,P_61_60});
black b_67_64 (G_67_64, P_67_64, {G_67_66,G_65_64}, {P_67_66,P_65_64});
black b_71_68 (G_71_68, P_71_68, {G_71_70,G_69_68}, {P_71_70,P_69_68});
black b_75_72 (G_75_72, P_75_72, {G_75_74,G_73_72}, {P_75_74,P_73_72});
black b_79_76 (G_79_76, P_79_76, {G_79_78,G_77_76}, {P_79_78,P_77_76});
black b_83_80 (G_83_80, P_83_80, {G_83_82,G_81_80}, {P_83_82,P_81_80});
black b_87_84 (G_87_84, P_87_84, {G_87_86,G_85_84}, {P_87_86,P_85_84});
black b_91_88 (G_91_88, P_91_88, {G_91_90,G_89_88}, {P_91_90,P_89_88});
black b_95_92 (G_95_92, P_95_92, {G_95_94,G_93_92}, {P_95_94,P_93_92});
black b_99_96 (G_99_96, P_99_96, {G_99_98,G_97_96}, {P_99_98,P_97_96});
black b_103_100 (G_103_100, P_103_100, {G_103_102,G_101_100}, {P_103_102,P_101_100});
black b_107_104 (G_107_104, P_107_104, {G_107_106,G_105_104}, {P_107_106,P_105_104});
black b_111_108 (G_111_108, P_111_108, {G_111_110,G_109_108}, {P_111_110,P_109_108});
black b_115_112 (G_115_112, P_115_112, {G_115_114,G_113_112}, {P_115_114,P_113_112});
black b_119_116 (G_119_116, P_119_116, {G_119_118,G_117_116}, {P_119_118,P_117_116});
black b_123_120 (G_123_120, P_123_120, {G_123_122,G_121_120}, {P_123_122,P_121_120});
black b_127_124 (G_127_124, P_127_124, {G_127_126,G_125_124}, {P_127_126,P_125_124});
// Stage 3: Generates G/P pairs that span 4 bits
grey g_7_0 (G_7_0, {G_7_4,G_3_0}, P_7_4);
black b_15_8 (G_15_8, P_15_8, {G_15_12,G_11_8}, {P_15_12,P_11_8});
black b_23_16 (G_23_16, P_23_16, {G_23_20,G_19_16}, {P_23_20,P_19_16});
black b_31_24 (G_31_24, P_31_24, {G_31_28,G_27_24}, {P_31_28,P_27_24});
black b_39_32 (G_39_32, P_39_32, {G_39_36,G_35_32}, {P_39_36,P_35_32});
black b_47_40 (G_47_40, P_47_40, {G_47_44,G_43_40}, {P_47_44,P_43_40});
black b_55_48 (G_55_48, P_55_48, {G_55_52,G_51_48}, {P_55_52,P_51_48});
black b_63_56 (G_63_56, P_63_56, {G_63_60,G_59_56}, {P_63_60,P_59_56});
black b_71_64 (G_71_64, P_71_64, {G_71_68,G_67_64}, {P_71_68,P_67_64});
black b_79_72 (G_79_72, P_79_72, {G_79_76,G_75_72}, {P_79_76,P_75_72});
black b_87_80 (G_87_80, P_87_80, {G_87_84,G_83_80}, {P_87_84,P_83_80});
black b_95_88 (G_95_88, P_95_88, {G_95_92,G_91_88}, {P_95_92,P_91_88});
black b_103_96 (G_103_96, P_103_96, {G_103_100,G_99_96}, {P_103_100,P_99_96});
black b_111_104 (G_111_104, P_111_104, {G_111_108,G_107_104}, {P_111_108,P_107_104});
black b_119_112 (G_119_112, P_119_112, {G_119_116,G_115_112}, {P_119_116,P_115_112});
black b_127_120 (G_127_120, P_127_120, {G_127_124,G_123_120}, {P_127_124,P_123_120});
// Stage 4: Generates G/P pairs that span 8 bits
grey g_15_0 (G_15_0, {G_15_8,G_7_0}, P_15_8);
black b_31_16 (G_31_16, P_31_16, {G_31_24,G_23_16}, {P_31_24,P_23_16});
black b_47_32 (G_47_32, P_47_32, {G_47_40,G_39_32}, {P_47_40,P_39_32});
black b_63_48 (G_63_48, P_63_48, {G_63_56,G_55_48}, {P_63_56,P_55_48});
black b_79_64 (G_79_64, P_79_64, {G_79_72,G_71_64}, {P_79_72,P_71_64});
black b_95_80 (G_95_80, P_95_80, {G_95_88,G_87_80}, {P_95_88,P_87_80});
black b_111_96 (G_111_96, P_111_96, {G_111_104,G_103_96}, {P_111_104,P_103_96});
black b_127_112 (G_127_112, P_127_112, {G_127_120,G_119_112}, {P_127_120,P_119_112});
// Stage 5: Generates G/P pairs that span 16 bits
grey g_31_0 (G_31_0, {G_31_16,G_15_0}, P_31_16);
black b_63_32 (G_63_32, P_63_32, {G_63_48,G_47_32}, {P_63_48,P_47_32});
black b_95_64 (G_95_64, P_95_64, {G_95_80,G_79_64}, {P_95_80,P_79_64});
black b_127_96 (G_127_96, P_127_96, {G_127_112,G_111_96}, {P_127_112,P_111_96});
// Stage 6: Generates G/P pairs that span 32 bits
grey g_63_0 (G_63_0, {G_63_32,G_31_0}, P_63_32);
black b_127_64 (G_127_64, P_127_64, {G_127_96,G_95_64}, {P_127_96,P_95_64});
// Stage 7: Generates G/P pairs that span 64 bits
grey g_127_0 (G_127_0, {G_127_64,G_63_0}, P_127_64);
// Stage 8: Generates G/P pairs that span 32 bits
grey g_95_0 (G_95_0, {G_95_64,G_63_0}, P_95_64);
// Stage 9: Generates G/P pairs that span 16 bits
grey g_47_0 (G_47_0, {G_47_32,G_31_0}, P_47_32);
grey g_79_0 (G_79_0, {G_79_64,G_63_0}, P_79_64);
grey g_111_0 (G_111_0, {G_111_96,G_95_0}, P_111_96);
// Stage 10: Generates G/P pairs that span 8 bits
grey g_23_0 (G_23_0, {G_23_16,G_15_0}, P_23_16);
grey g_39_0 (G_39_0, {G_39_32,G_31_0}, P_39_32);
grey g_55_0 (G_55_0, {G_55_48,G_47_0}, P_55_48);
grey g_71_0 (G_71_0, {G_71_64,G_63_0}, P_71_64);
grey g_87_0 (G_87_0, {G_87_80,G_79_0}, P_87_80);
grey g_103_0 (G_103_0, {G_103_96,G_95_0}, P_103_96);
grey g_119_0 (G_119_0, {G_119_112,G_111_0}, P_119_112);
// Stage 11: Generates G/P pairs that span 4 bits
grey g_11_0 (G_11_0, {G_11_8,G_7_0}, P_11_8);
grey g_19_0 (G_19_0, {G_19_16,G_15_0}, P_19_16);
grey g_27_0 (G_27_0, {G_27_24,G_23_0}, P_27_24);
grey g_35_0 (G_35_0, {G_35_32,G_31_0}, P_35_32);
grey g_43_0 (G_43_0, {G_43_40,G_39_0}, P_43_40);
grey g_51_0 (G_51_0, {G_51_48,G_47_0}, P_51_48);
grey g_59_0 (G_59_0, {G_59_56,G_55_0}, P_59_56);
grey g_67_0 (G_67_0, {G_67_64,G_63_0}, P_67_64);
grey g_75_0 (G_75_0, {G_75_72,G_71_0}, P_75_72);
grey g_83_0 (G_83_0, {G_83_80,G_79_0}, P_83_80);
grey g_91_0 (G_91_0, {G_91_88,G_87_0}, P_91_88);
grey g_99_0 (G_99_0, {G_99_96,G_95_0}, P_99_96);
grey g_107_0 (G_107_0, {G_107_104,G_103_0}, P_107_104);
grey g_115_0 (G_115_0, {G_115_112,G_111_0}, P_115_112);
grey g_123_0 (G_123_0, {G_123_120,G_119_0}, P_123_120);
// Stage 12: Generates G/P pairs that span 2 bits
grey g_5_0 (G_5_0, {G_5_4,G_3_0}, P_5_4);
grey g_9_0 (G_9_0, {G_9_8,G_7_0}, P_9_8);
grey g_13_0 (G_13_0, {G_13_12,G_11_0}, P_13_12);
grey g_17_0 (G_17_0, {G_17_16,G_15_0}, P_17_16);
grey g_21_0 (G_21_0, {G_21_20,G_19_0}, P_21_20);
grey g_25_0 (G_25_0, {G_25_24,G_23_0}, P_25_24);
grey g_29_0 (G_29_0, {G_29_28,G_27_0}, P_29_28);
grey g_33_0 (G_33_0, {G_33_32,G_31_0}, P_33_32);
grey g_37_0 (G_37_0, {G_37_36,G_35_0}, P_37_36);
grey g_41_0 (G_41_0, {G_41_40,G_39_0}, P_41_40);
grey g_45_0 (G_45_0, {G_45_44,G_43_0}, P_45_44);
grey g_49_0 (G_49_0, {G_49_48,G_47_0}, P_49_48);
grey g_53_0 (G_53_0, {G_53_52,G_51_0}, P_53_52);
grey g_57_0 (G_57_0, {G_57_56,G_55_0}, P_57_56);
grey g_61_0 (G_61_0, {G_61_60,G_59_0}, P_61_60);
grey g_65_0 (G_65_0, {G_65_64,G_63_0}, P_65_64);
grey g_69_0 (G_69_0, {G_69_68,G_67_0}, P_69_68);
grey g_73_0 (G_73_0, {G_73_72,G_71_0}, P_73_72);
grey g_77_0 (G_77_0, {G_77_76,G_75_0}, P_77_76);
grey g_81_0 (G_81_0, {G_81_80,G_79_0}, P_81_80);
grey g_85_0 (G_85_0, {G_85_84,G_83_0}, P_85_84);
grey g_89_0 (G_89_0, {G_89_88,G_87_0}, P_89_88);
grey g_93_0 (G_93_0, {G_93_92,G_91_0}, P_93_92);
grey g_97_0 (G_97_0, {G_97_96,G_95_0}, P_97_96);
grey g_101_0 (G_101_0, {G_101_100,G_99_0}, P_101_100);
grey g_105_0 (G_105_0, {G_105_104,G_103_0}, P_105_104);
grey g_109_0 (G_109_0, {G_109_108,G_107_0}, P_109_108);
grey g_113_0 (G_113_0, {G_113_112,G_111_0}, P_113_112);
grey g_117_0 (G_117_0, {G_117_116,G_115_0}, P_117_116);
grey g_121_0 (G_121_0, {G_121_120,G_119_0}, P_121_120);
grey g_125_0 (G_125_0, {G_125_124,G_123_0}, P_125_124);
// Last grey cell stage
grey g_2_0 (G_2_0, {g[2],G_1_0}, p[2]);
grey g_4_0 (G_4_0, {g[4],G_3_0}, p[4]);
grey g_6_0 (G_6_0, {g[6],G_5_0}, p[6]);
grey g_8_0 (G_8_0, {g[8],G_7_0}, p[8]);
grey g_10_0 (G_10_0, {g[10],G_9_0}, p[10]);
grey g_12_0 (G_12_0, {g[12],G_11_0}, p[12]);
grey g_14_0 (G_14_0, {g[14],G_13_0}, p[14]);
grey g_16_0 (G_16_0, {g[16],G_15_0}, p[16]);
grey g_18_0 (G_18_0, {g[18],G_17_0}, p[18]);
grey g_20_0 (G_20_0, {g[20],G_19_0}, p[20]);
grey g_22_0 (G_22_0, {g[22],G_21_0}, p[22]);
grey g_24_0 (G_24_0, {g[24],G_23_0}, p[24]);
grey g_26_0 (G_26_0, {g[26],G_25_0}, p[26]);
grey g_28_0 (G_28_0, {g[28],G_27_0}, p[28]);
grey g_30_0 (G_30_0, {g[30],G_29_0}, p[30]);
grey g_32_0 (G_32_0, {g[32],G_31_0}, p[32]);
grey g_34_0 (G_34_0, {g[34],G_33_0}, p[34]);
grey g_36_0 (G_36_0, {g[36],G_35_0}, p[36]);
grey g_38_0 (G_38_0, {g[38],G_37_0}, p[38]);
grey g_40_0 (G_40_0, {g[40],G_39_0}, p[40]);
grey g_42_0 (G_42_0, {g[42],G_41_0}, p[42]);
grey g_44_0 (G_44_0, {g[44],G_43_0}, p[44]);
grey g_46_0 (G_46_0, {g[46],G_45_0}, p[46]);
grey g_48_0 (G_48_0, {g[48],G_47_0}, p[48]);
grey g_50_0 (G_50_0, {g[50],G_49_0}, p[50]);
grey g_52_0 (G_52_0, {g[52],G_51_0}, p[52]);
grey g_54_0 (G_54_0, {g[54],G_53_0}, p[54]);
grey g_56_0 (G_56_0, {g[56],G_55_0}, p[56]);
grey g_58_0 (G_58_0, {g[58],G_57_0}, p[58]);
grey g_60_0 (G_60_0, {g[60],G_59_0}, p[60]);
grey g_62_0 (G_62_0, {g[62],G_61_0}, p[62]);
grey g_64_0 (G_64_0, {g[64],G_63_0}, p[64]);
grey g_66_0 (G_66_0, {g[66],G_65_0}, p[66]);
grey g_68_0 (G_68_0, {g[68],G_67_0}, p[68]);
grey g_70_0 (G_70_0, {g[70],G_69_0}, p[70]);
grey g_72_0 (G_72_0, {g[72],G_71_0}, p[72]);
grey g_74_0 (G_74_0, {g[74],G_73_0}, p[74]);
grey g_76_0 (G_76_0, {g[76],G_75_0}, p[76]);
grey g_78_0 (G_78_0, {g[78],G_77_0}, p[78]);
grey g_80_0 (G_80_0, {g[80],G_79_0}, p[80]);
grey g_82_0 (G_82_0, {g[82],G_81_0}, p[82]);
grey g_84_0 (G_84_0, {g[84],G_83_0}, p[84]);
grey g_86_0 (G_86_0, {g[86],G_85_0}, p[86]);
grey g_88_0 (G_88_0, {g[88],G_87_0}, p[88]);
grey g_90_0 (G_90_0, {g[90],G_89_0}, p[90]);
grey g_92_0 (G_92_0, {g[92],G_91_0}, p[92]);
grey g_94_0 (G_94_0, {g[94],G_93_0}, p[94]);
grey g_96_0 (G_96_0, {g[96],G_95_0}, p[96]);
grey g_98_0 (G_98_0, {g[98],G_97_0}, p[98]);
grey g_100_0 (G_100_0, {g[100],G_99_0}, p[100]);
grey g_102_0 (G_102_0, {g[102],G_101_0}, p[102]);
grey g_104_0 (G_104_0, {g[104],G_103_0}, p[104]);
grey g_106_0 (G_106_0, {g[106],G_105_0}, p[106]);
grey g_108_0 (G_108_0, {g[108],G_107_0}, p[108]);
grey g_110_0 (G_110_0, {g[110],G_109_0}, p[110]);
grey g_112_0 (G_112_0, {g[112],G_111_0}, p[112]);
grey g_114_0 (G_114_0, {g[114],G_113_0}, p[114]);
grey g_116_0 (G_116_0, {g[116],G_115_0}, p[116]);
grey g_118_0 (G_118_0, {g[118],G_117_0}, p[118]);
grey g_120_0 (G_120_0, {g[120],G_119_0}, p[120]);
grey g_122_0 (G_122_0, {g[122],G_121_0}, p[122]);
grey g_124_0 (G_124_0, {g[124],G_123_0}, p[124]);
grey g_126_0 (G_126_0, {g[126],G_125_0}, p[126]);
// Final Stage: Apply c_k+1=G_k_0
assign c[1]=g[0];
assign c[2]=G_1_0;
assign c[3]=G_2_0;
assign c[4]=G_3_0;
assign c[5]=G_4_0;
assign c[6]=G_5_0;
assign c[7]=G_6_0;
assign c[8]=G_7_0;
assign c[9]=G_8_0;
assign c[10]=G_9_0;
assign c[11]=G_10_0;
assign c[12]=G_11_0;
assign c[13]=G_12_0;
assign c[14]=G_13_0;
assign c[15]=G_14_0;
assign c[16]=G_15_0;
assign c[17]=G_16_0;
assign c[18]=G_17_0;
assign c[19]=G_18_0;
assign c[20]=G_19_0;
assign c[21]=G_20_0;
assign c[22]=G_21_0;
assign c[23]=G_22_0;
assign c[24]=G_23_0;
assign c[25]=G_24_0;
assign c[26]=G_25_0;
assign c[27]=G_26_0;
assign c[28]=G_27_0;
assign c[29]=G_28_0;
assign c[30]=G_29_0;
assign c[31]=G_30_0;
assign c[32]=G_31_0;
assign c[33]=G_32_0;
assign c[34]=G_33_0;
assign c[35]=G_34_0;
assign c[36]=G_35_0;
assign c[37]=G_36_0;
assign c[38]=G_37_0;
assign c[39]=G_38_0;
assign c[40]=G_39_0;
assign c[41]=G_40_0;
assign c[42]=G_41_0;
assign c[43]=G_42_0;
assign c[44]=G_43_0;
assign c[45]=G_44_0;
assign c[46]=G_45_0;
assign c[47]=G_46_0;
assign c[48]=G_47_0;
assign c[49]=G_48_0;
assign c[50]=G_49_0;
assign c[51]=G_50_0;
assign c[52]=G_51_0;
assign c[53]=G_52_0;
assign c[54]=G_53_0;
assign c[55]=G_54_0;
assign c[56]=G_55_0;
assign c[57]=G_56_0;
assign c[58]=G_57_0;
assign c[59]=G_58_0;
assign c[60]=G_59_0;
assign c[61]=G_60_0;
assign c[62]=G_61_0;
assign c[63]=G_62_0;
assign c[64]=G_63_0;
assign c[65]=G_64_0;
assign c[66]=G_65_0;
assign c[67]=G_66_0;
assign c[68]=G_67_0;
assign c[69]=G_68_0;
assign c[70]=G_69_0;
assign c[71]=G_70_0;
assign c[72]=G_71_0;
assign c[73]=G_72_0;
assign c[74]=G_73_0;
assign c[75]=G_74_0;
assign c[76]=G_75_0;
assign c[77]=G_76_0;
assign c[78]=G_77_0;
assign c[79]=G_78_0;
assign c[80]=G_79_0;
assign c[81]=G_80_0;
assign c[82]=G_81_0;
assign c[83]=G_82_0;
assign c[84]=G_83_0;
assign c[85]=G_84_0;
assign c[86]=G_85_0;
assign c[87]=G_86_0;
assign c[88]=G_87_0;
assign c[89]=G_88_0;
assign c[90]=G_89_0;
assign c[91]=G_90_0;
assign c[92]=G_91_0;
assign c[93]=G_92_0;
assign c[94]=G_93_0;
assign c[95]=G_94_0;
assign c[96]=G_95_0;
assign c[97]=G_96_0;
assign c[98]=G_97_0;
assign c[99]=G_98_0;
assign c[100]=G_99_0;
assign c[101]=G_100_0;
assign c[102]=G_101_0;
assign c[103]=G_102_0;
assign c[104]=G_103_0;
assign c[105]=G_104_0;
assign c[106]=G_105_0;
assign c[107]=G_106_0;
assign c[108]=G_107_0;
assign c[109]=G_108_0;
assign c[110]=G_109_0;
assign c[111]=G_110_0;
assign c[112]=G_111_0;
assign c[113]=G_112_0;
assign c[114]=G_113_0;
assign c[115]=G_114_0;
assign c[116]=G_115_0;
assign c[117]=G_116_0;
assign c[118]=G_117_0;
assign c[119]=G_118_0;
assign c[120]=G_119_0;
assign c[121]=G_120_0;
assign c[122]=G_121_0;
assign c[123]=G_122_0;
assign c[124]=G_123_0;
assign c[125]=G_124_0;
assign c[126]=G_125_0;
assign c[127]=G_126_0;
assign c[128]=G_127_0;
endmodule // brent_kung_cs

View File

@ -1,97 +0,0 @@
// Brent-Kung Carry-save Prefix Adder
module bk13 (cout, sum, a, b, cin);
input [12:0] a, b;
input cin;
output [12:0] sum;
output cout;
wire [13:0] p,g,t;
wire [12:0] c;
// pre-computation
assign p={a^b,1'b0};
assign g={a&b, cin};
assign t[1]=p[1];
assign t[2]=p[2];
assign t[3]=p[3]^g[2];
assign t[4]=p[4];
assign t[5]=p[5]^g[4];
assign t[6]=p[6];
assign t[7]=p[7]^g[6];
assign t[8]=p[8];
assign t[9]=p[9]^g[8];
assign t[10]=p[10];
assign t[11]=p[11]^g[10];
assign t[12]=p[12];
assign t[13]=p[13];
// prefix tree
brent_kung_cs13 prefix_tree(c, p[12:0], g[12:0]);
// post-computation
assign sum=p[13:1]^c;
assign cout=g[13]|(p[13]&c[12]);
endmodule
module brent_kung_cs13 (c, p, g);
input [13:0] p;
input [13:0] g;
output [13:1] c;
// parallel-prefix, Brent-Kung
// Stage 1: Generates G/P pairs that span 1 bits
grey b_1_0 (G_1_0, {g[1],g[0]}, p[1]);
black b_3_2 (G_3_2, P_3_2, {g[3],g[2]}, {p[3],p[2]});
black b_5_4 (G_5_4, P_5_4, {g[5],g[4]}, {p[5],p[4]});
black b_7_6 (G_7_6, P_7_6, {g[7],g[6]}, {p[7],p[6]});
black b_9_8 (G_9_8, P_9_8, {g[9],g[8]}, {p[9],p[8]});
black b_11_10 (G_11_10, P_11_10, {g[11],g[10]}, {p[11],p[10]});
black b_13_12 (G_13_12, P_13_12, {g[13],g[12]}, {p[13],p[12]});
// Stage 2: Generates G/P pairs that span 2 bits
grey g_3_0 (G_3_0, {G_3_2,G_1_0}, P_3_2);
black b_7_4 (G_7_4, P_7_4, {G_7_6,G_5_4}, {P_7_6,P_5_4});
black b_11_8 (G_11_8, P_11_8, {G_11_10,G_9_8}, {P_11_10,P_9_8});
// Stage 3: Generates G/P pairs that span 4 bits
grey g_7_0 (G_7_0, {G_7_4,G_3_0}, P_7_4);
// Stage 4: Generates G/P pairs that span 8 bits
// Stage 5: Generates G/P pairs that span 4 bits
grey g_11_0 (G_11_0, {G_11_8,G_7_0}, P_11_8);
// Stage 6: Generates G/P pairs that span 2 bits
grey g_5_0 (G_5_0, {G_5_4,G_3_0}, P_5_4);
grey g_9_0 (G_9_0, {G_9_8,G_7_0}, P_9_8);
// Last grey cell stage
grey g_2_0 (G_2_0, {g[2],G_1_0}, p[2]);
grey g_4_0 (G_4_0, {g[4],G_3_0}, p[4]);
grey g_6_0 (G_6_0, {g[6],G_5_0}, p[6]);
grey g_8_0 (G_8_0, {g[8],G_7_0}, p[8]);
grey g_10_0 (G_10_0, {g[10],G_9_0}, p[10]);
grey g_12_0 (G_12_0, {g[12],G_11_0}, p[12]);
// Final Stage: Apply c_k+1=G_k_0
assign c[1]=g[0];
assign c[2]=G_1_0;
assign c[3]=G_2_0;
assign c[4]=G_3_0;
assign c[5]=G_4_0;
assign c[6]=G_5_0;
assign c[7]=G_6_0;
assign c[8]=G_7_0;
assign c[9]=G_8_0;
assign c[10]=G_9_0;
assign c[11]=G_10_0;
assign c[12]=G_11_0;
assign c[13]=G_12_0;
endmodule

View File

@ -1,86 +0,0 @@
// Brent-Kung Prefix Adder
module bk14 (cout, sum, a, b, cin);
input [13:0] a, b;
input cin;
output [13:0] sum;
output cout;
wire [14:0] p,g;
wire [13:0] c;
// pre-computation
assign p={a^b,1'b0};
assign g={a&b, cin};
// prefix tree
brent_kung14 prefix_tree(c, p[13:0], g[13:0]);
// post-computation
assign sum=p[14:1]^c;
assign cout=g[14]|(p[14]&c[13]);
endmodule
module brent_kung14 (c, p, g);
input [13:0] p;
input [13:0] g;
output [14:1] c;
// parallel-prefix, Brent-Kung
// Stage 1: Generates G/P pairs that span 1 bits
grey b_1_0 (G_1_0, {g[1],g[0]}, p[1]);
black b_3_2 (G_3_2, P_3_2, {g[3],g[2]}, {p[3],p[2]});
black b_5_4 (G_5_4, P_5_4, {g[5],g[4]}, {p[5],p[4]});
black b_7_6 (G_7_6, P_7_6, {g[7],g[6]}, {p[7],p[6]});
black b_9_8 (G_9_8, P_9_8, {g[9],g[8]}, {p[9],p[8]});
black b_11_10 (G_11_10, P_11_10, {g[11],g[10]}, {p[11],p[10]});
black b_13_12 (G_13_12, P_13_12, {g[13],g[12]}, {p[13],p[12]});
// Stage 2: Generates G/P pairs that span 2 bits
grey g_3_0 (G_3_0, {G_3_2,G_1_0}, P_3_2);
black b_7_4 (G_7_4, P_7_4, {G_7_6,G_5_4}, {P_7_6,P_5_4});
black b_11_8 (G_11_8, P_11_8, {G_11_10,G_9_8}, {P_11_10,P_9_8});
// Stage 3: Generates G/P pairs that span 4 bits
grey g_7_0 (G_7_0, {G_7_4,G_3_0}, P_7_4);
// Stage 4: Generates G/P pairs that span 8 bits
// Stage 5: Generates G/P pairs that span 4 bits
grey g_11_0 (G_11_0, {G_11_8,G_7_0}, P_11_8);
// Stage 6: Generates G/P pairs that span 2 bits
grey g_5_0 (G_5_0, {G_5_4,G_3_0}, P_5_4);
grey g_9_0 (G_9_0, {G_9_8,G_7_0}, P_9_8);
grey g_13_0 (G_13_0, {G_13_12,G_11_0}, P_13_12);
// Last grey cell stage
grey g_2_0 (G_2_0, {g[2],G_1_0}, p[2]);
grey g_4_0 (G_4_0, {g[4],G_3_0}, p[4]);
grey g_6_0 (G_6_0, {g[6],G_5_0}, p[6]);
grey g_8_0 (G_8_0, {g[8],G_7_0}, p[8]);
grey g_10_0 (G_10_0, {g[10],G_9_0}, p[10]);
grey g_12_0 (G_12_0, {g[12],G_11_0}, p[12]);
// Final Stage: Apply c_k+1=G_k_0
assign c[1]=g[0];
assign c[2]=G_1_0;
assign c[3]=G_2_0;
assign c[4]=G_3_0;
assign c[5]=G_4_0;
assign c[6]=G_5_0;
assign c[7]=G_6_0;
assign c[8]=G_7_0;
assign c[9]=G_8_0;
assign c[10]=G_9_0;
assign c[11]=G_10_0;
assign c[12]=G_11_0;
assign c[13]=G_12_0;
assign c[14]=G_13_0;
endmodule

View File

@ -1,70 +0,0 @@
module ha (C, S, A, B) ;
input A, B;
output S, C;
assign S = A^B;
assign C = A&B;
endmodule // HA
// module fa (input logic a, b, c, output logic sum, carry);
// assign sum = a^b^c;
// assign carry = a&b|a&c|b&c;
// endmodule // fa
// module csa #(parameter WIDTH=8) (a, b,c, sum, carry, cout);
// input logic [WIDTH-1:0] a, b, c;
// output logic [WIDTH-1:0] sum, carry;
// output logic cout;
// logic [WIDTH:0] carry_temp;
// genvar i;
// generate
// for (i=0;i<WIDTH;i=i+1)
// begin : genbit
// fa fa_inst (a[i], b[i], c[i], sum[i], carry_temp[i+1]);
// end
// endgenerate
// assign carry = {1'b0, carry_temp[WIDTH-1:1], 1'b0};
// assign cout = carry_temp[WIDTH];
// endmodule // csa
module FA_array (S, C, A, B, Ci) ;
parameter n = 32;
input [n-1:0] A;
input [n-1:0] B;
input [n-1:0] Ci;
output [n-1:0] S;
output [n-1:0] C;
wire [n-1:0] n0;
wire [n-1:0] n1;
wire [n-1:0] n2;
genvar i;
generate
for (i = 0; i < n; i = i + 1) begin : index
fa FA1(.sum(S[i]), .carry(C[i]), .a(A[i]), .b(B[i]), .c(Ci[i]));
end
endgenerate
endmodule // FA_array
module HA_array (S, C, A, B) ;
parameter n = 32;
input [n-1:0] A, B;
output [n-1:0] S, C;
genvar i;
generate
for (i = 0; i < n; i = i + 1) begin : index
ha ha1(.S(S[i]), .C(C[i]), .A(A[i]), .B(B[i]));
end
endgenerate
endmodule // HA_array

View File

@ -68,9 +68,9 @@ module divconv (q1, qm1, qp1, q0, qm0, qp0,
mux2 #(64) mx5 (muxb_out, mcand_q, sel_muxr&op_type, mplier);
mux2 #(64) mx6 (muxa_out, mcand_q, sel_muxr, mcand);
// TDM multiplier (carry/save)
multiplier mult1 (mcand, mplier, Sum, Carry);
multiplier mult1 (mcand, mplier, Sum, Carry); // ***multiply
// Q*D - N (reversed but changed in rounder.v to account for sign reversal)
csa #(128) csa1 (Sum, Carry, constant, Sum2, Carry2);
csa #(128) csa1 (Sum, Carry, constant, Sum2, Carry2); //***adder
// Add ulp for subtraction in remainder
mux2 #(1) mx7 (1'b0, 1'b1, sel_muxr, muxr_out);
@ -80,15 +80,15 @@ module divconv (q1, qm1, qp1, q0, qm0, qp0,
mux2 #(64) mxA ({64'hFFFF_FFFF_FFFF_F9FF}, {64'hFFFF_FF3F_FFFF_FFFF}, P, qm_const);
// CPA (from CSA)/Remainder addition/subtraction
ldf128 cpa1 (cout1, mul_out, Sum2, Carry2, muxr_out);
ldf128 cpa1 (cout1, mul_out, Sum2, Carry2, muxr_out); //***adder
// Assuming [1,2) - q1
ldf64 cpa2 (cout2, q_out1, regb_out, q_const, 1'b0);
ldf64 cpa3 (cout3, qp_out1, regb_out, qp_const, 1'b0);
ldf64 cpa4 (cout4, qm_out1, regb_out, qm_const, 1'b1);
ldf64 cpa2 (cout2, q_out1, regb_out, q_const, 1'b0); //***adder
ldf64 cpa3 (cout3, qp_out1, regb_out, qp_const, 1'b0); //***adder
ldf64 cpa4 (cout4, qm_out1, regb_out, qm_const, 1'b1); //***adder
// Assuming [0.5,1) - q0
ldf64 cpa5 (cout5, q_out0, {regb_out[62:0], vss}, q_const, 1'b0);
ldf64 cpa6 (cout6, qp_out0, {regb_out[62:0], vss}, qp_const, 1'b0);
ldf64 cpa7 (cout7, qm_out0, {regb_out[62:0], vss}, qm_const, 1'b1);
ldf64 cpa5 (cout5, q_out0, {regb_out[62:0], vss}, q_const, 1'b0); //***adder
ldf64 cpa6 (cout6, qp_out0, {regb_out[62:0], vss}, qp_const, 1'b0); //***adder
ldf64 cpa7 (cout7, qm_out0, {regb_out[62:0], vss}, qm_const, 1'b1); //***adder
// One's complement instead of two's complement (for hw efficiency)
assign three = {~mul_out[126], mul_out[126], ~mul_out[125:63]};
mux2 #(64) mxTC (~mul_out[126:63], three[64:1], op_type, twocmp_out);

View File

@ -0,0 +1,62 @@
`include "wally-config.vh"
module fclassify (
input logic [63:0] SrcXE,
input logic FmtE, // 0-Single 1-Double
output logic [63:0] ClassResE
);
logic [31:0] Single;
logic [63:0] Double;
logic Sgn;
logic Inf, NaN, Zero, Norm, Denorm;
logic PInf, QNaN, PZero, PNorm, PDenorm;
logic NInf, SNaN, NZero, NNorm, NDenorm;
logic MaxExp, ExpZero, ManZero, FirstBitFrac;
// Single and Double precision layouts
assign Single = SrcXE[63:32];
assign Double = SrcXE;
assign Sgn = SrcXE[63];
// basic calculations for readabillity
assign ExpZero = FmtE ? ~|Double[62:52] : ~|Single[30:23];
assign MaxExp = FmtE ? &Double[62:52] : &Single[30:23];
assign ManZero = FmtE ? ~|Double[51:0] : ~|Single[22:0];
assign FirstBitFrac = FmtE ? Double[51] : Single[22];
// determine the type of number
assign NaN = MaxExp & ~ManZero;
assign Inf = MaxExp & ManZero;
assign Zero = ExpZero & ManZero;
assign Denorm= ExpZero & ~ManZero;
assign Norm = ~ExpZero;
// determine the sub categories
assign QNaN = FirstBitFrac&NaN;
assign SNaN = ~FirstBitFrac&NaN;
assign PInf = ~Sgn&Inf;
assign NInf = Sgn&Inf;
assign PNorm = ~Sgn&Norm;
assign NNorm = Sgn&Norm;
assign PDenorm = ~Sgn&Denorm;
assign NDenorm = Sgn&Denorm;
assign PZero = ~Sgn&Zero;
assign NZero = Sgn&Zero;
// determine sub category and combine into the result
// bit 0 - -Inf
// bit 1 - -Norm
// bit 2 - -Denorm
// bit 3 - -Zero
// bit 4 - +Zero
// bit 5 - +Denorm
// bit 6 - +Norm
// bit 7 - +Inf
// bit 8 - signaling NaN
// bit 9 - quiet NaN
assign ClassResE = {{54{1'b0}}, QNaN, SNaN, PInf, PNorm, PDenorm, PZero, NZero, NDenorm, NNorm, NInf};
endmodule

View File

@ -39,7 +39,7 @@
// if either of the input operands is a signaling NaN per 754
`include "wally-config.vh"
module fpucmp1 (
module fcmp (
input logic [63:0] op1,
input logic [63:0] op2,
input logic [2:0] FOpCtrlE,
@ -48,7 +48,7 @@ module fpucmp1 (
output logic Invalid, // Invalid Operation
// output logic [1:0] FCC, // Condition Codes
output logic [63:0] FCmpResultE);
output logic [63:0] CmpResE);
// Perform magnitude comparison between the 63 least signficant bits
// of the input operands. Only LT and EQ are returned, since GT can
// be determined from these values.
@ -392,7 +392,7 @@ module exception_cmp_2 (
output logic invalid,
output logic [1:0] fcc,
output logic [63:0] FCmpResultE,
output logic [63:0] CmpResE,
input logic Azero,
input logic Bzero,
@ -453,12 +453,12 @@ module exception_cmp_2 (
always_comb begin
case (FOpCtrlE[2:0])
3'b111: FCmpResultE = LT ? A : B;//min
3'b101: FCmpResultE = GT ? A : B;//max
3'b010: FCmpResultE = {63'b0, EQ};//equal
3'b001: FCmpResultE = {63'b0, LT};//less than
3'b011: FCmpResultE = {63'b0, LT|EQ};//less than or equal
default: FCmpResultE = 64'b0;
3'b111: CmpResE = LT ? A : B;//min
3'b101: CmpResE = GT ? A : B;//max
3'b010: CmpResE = {63'b0, EQ};//equal
3'b001: CmpResE = {63'b0, LT};//less than
3'b011: CmpResE = {63'b0, LT|EQ};//less than or equal
default: CmpResE = 64'b0;
endcase
end

View File

@ -64,30 +64,38 @@ module fctrl (
else if (Funct3D[1:0] == 2'b00) ControlsD = `FCTRLW'b0_1_100_0100_00_01_0_0; // fmv.x.w
else if (Funct3D[1:0] == 2'b01) ControlsD = `FCTRLW'b0_1_100_0101_00_01_0_0; // fmv.x.d
else ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
7'b1100000: case(Rs2D[0])
1'b0: ControlsD = `FCTRLW'b0_1_010_0110_00_00_0_0; // fcvt.s.w
1'b1: ControlsD = `FCTRLW'b0_1_010_0101_00_00_0_0; // fcvt.s.wu
7'b1100000: case(Rs2D[1:0])
2'b00: ControlsD = `FCTRLW'b0_1_100_0001_00_00_0_0; // fcvt.s.w
2'b01: ControlsD = `FCTRLW'b0_1_100_0101_00_00_0_0; // fcvt.s.wu
2'b10: ControlsD = `FCTRLW'b0_1_100_1001_00_00_0_0; // fcvt.s.l
2'b11: ControlsD = `FCTRLW'b0_1_100_1101_00_00_0_0; // fcvt.s.lu
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
endcase
7'b1101000: case(Rs2D[0])
1'b0: ControlsD = `FCTRLW'b1_1_010_0100_00_00_0_0; // fcvt.w.s
1'b1: ControlsD = `FCTRLW'b1_1_010_0101_00_00_0_0; // fcvt.wu.s
7'b1101000: case(Rs2D[1:0])
2'b00: ControlsD = `FCTRLW'b1_1_100_0010_00_00_0_0; // fcvt.w.s
2'b01: ControlsD = `FCTRLW'b1_1_100_0110_00_00_0_0; // fcvt.wu.s
2'b10: ControlsD = `FCTRLW'b1_1_100_1010_00_00_0_0; // fcvt.l.s
2'b11: ControlsD = `FCTRLW'b1_1_100_1110_00_00_0_0; // fcvt.lu.s
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
endcase
7'b1111000: ControlsD = `FCTRLW'b1_0_100_0000_00_00_0_0; // fmv.w.x
7'b0100000: ControlsD = `FCTRLW'b1_0_010_0010_00_00_0_0; // fcvt.s.d
7'b1100001: case(Rs2D[0])
1'b0: ControlsD = `FCTRLW'b0_1_010_1110_00_00_0_0; // fcvt.d.w
1'b1: ControlsD = `FCTRLW'b0_1_010_1111_00_00_0_0; // fcvt.d.wu
7'b0100000: ControlsD = `FCTRLW'b1_0_010_0000_00_00_0_0; // fcvt.s.d
7'b1100001: case(Rs2D[1:0])
2'b00: ControlsD = `FCTRLW'b0_1_100_0001_00_00_0_0; // fcvt.d.w
2'b01: ControlsD = `FCTRLW'b0_1_100_0101_00_00_0_0; // fcvt.d.wu
2'b10: ControlsD = `FCTRLW'b0_1_100_1001_00_00_0_0; // fcvt.d.l
2'b11: ControlsD = `FCTRLW'b0_1_100_1101_00_00_0_0; // fcvt.d.lu
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
endcase
7'b1101001: case(Rs2D[0])
1'b0: ControlsD = `FCTRLW'b1_0_010_1100_00_00_0_0; // fcvt.w.d
1'b1: ControlsD = `FCTRLW'b1_0_010_1101_00_00_0_0; // fcvt.wu.d
7'b1101001: case(Rs2D[1:0])
2'b00: ControlsD = `FCTRLW'b1_0_100_0010_00_00_0_0; // fcvt.w.d
2'b01: ControlsD = `FCTRLW'b1_0_100_0110_00_00_0_0; // fcvt.wu.d
2'b10: ControlsD = `FCTRLW'b1_0_100_1010_00_00_0_0; // fcvt.l.d
2'b11: ControlsD = `FCTRLW'b1_0_100_1110_00_00_0_0; // fcvt.lu.d
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
endcase
7'b1111001: ControlsD = `FCTRLW'b1_0_100_0001_00_00_0_0; // fmv.d.x
7'b0100001: ControlsD = `FCTRLW'b1_0_010_1000_00_00_0_0; // fcvt.d.s
7'b0100001: ControlsD = `FCTRLW'b1_0_100_0000_00_00_0_0; // fcvt.d.s
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
endcase
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
@ -130,17 +138,26 @@ module fctrl (
// add/sub/cnvt
// fadd = 0000
// fsub = 0001
// fcvt.w.s = 0100
// fcvt.wu.s = 0101
// fcvt.s.w = 0110
// fcvt.s.wu = 0111
// fcvt.s.d = 0010
// fcvt.w.d = 1100
// fcvt.wu.d = 1101
// fcvt.d.w = 1110
// fcvt.d.wu = 1111
// fcvt.d.s = 1000
// { is double and not add/sub, is to/from int, is to int or float to double, is unsigned or sub}
// cnvt
// fcvt.w.s = 0010
// fcvt.wu.s = 0110
// fcvt.s.w = 0001
// fcvt.s.wu = 0101
// fcvt.s.d = 0000
// fcvt.l.s = 1010
// fcvt.lu.s = 1110
// fcvt.s.l = 1001
// fcvt.s.lu = 1101
// fcvt.w.d = 0010
// fcvt.wu.d = 0110
// fcvt.d.w = 0001
// fcvt.d.wu = 0101
// fcvt.d.s = 0000
// fcvt.l.d = 1010
// fcvt.lu.d = 1110
// fcvt.d.l = 1001
// fcvt.d.lu = 1101
// {long, unsigned, to int, from int} Fmt controls the output for fp -> fp
// fmv.w.x = ???0
// fmv.w.d = ???1

View File

@ -23,7 +23,7 @@
//
// `timescale 1ps/1ps
module fpdiv (FDivSqrtDoneE, FDivResultM, FDivFlagsM, DivDenormM, DivInput1E, DivInput2E, FrmE, DivOpType, FmtE, DivOvEn, DivUnEn,
module fdivsqrt (FDivSqrtDoneE, FDivResultM, FDivSqrtFlgM, DivInput1E, DivInput2E, FrmE, DivOpType, FmtE, DivOvEn, DivUnEn,
FDivStartE, reset, clk, FDivBusyE, HoldInputs);
input [63:0] DivInput1E; // 1st input operand (A)
@ -39,8 +39,7 @@ module fpdiv (FDivSqrtDoneE, FDivResultM, FDivFlagsM, DivDenormM, DivInput1E, Di
input clk;
output [63:0] FDivResultM; // Result of operation
output [4:0] FDivFlagsM; // IEEE exception flags
output DivDenormM; // DivDenormM on input or output
output [4:0] FDivSqrtFlgM; // IEEE exception flags
output FDivSqrtDoneE;
output FDivBusyE, HoldInputs;
@ -51,6 +50,7 @@ module fpdiv (FDivSqrtDoneE, FDivResultM, FDivFlagsM, DivDenormM, DivInput1E, Di
wire [63:0] Float2;
wire [63:0] IntValue;
wire DivDenormM; // DivDenormM on input or output
wire [12:0] exp1, exp2, expF;
wire [12:0] exp_diff, bias;
wire [13:0] exp_sqrt;
@ -103,7 +103,7 @@ module fpdiv (FDivSqrtDoneE, FDivResultM, FDivFlagsM, DivDenormM, DivInput1E, Di
convert_inputs_div divconv1 (Float1, Float2, DivInput1E, DivInput2E, DivOpType, FmtE);
// Test for exceptions and return the "Invalid Operation" and
// "Denormalized" Input FDivFlagsM. The "sel_inv" is used in
// "Denormalized" Input FDivSqrtFlgM. The "sel_inv" is used in
// the third pipeline stage to select the result. Also, op1_Norm
// and op2_Norm are one if DivInput1E and DivInput2E are not zero or denormalized.
// sub is one if the effective operation is subtaction.
@ -120,12 +120,12 @@ module fpdiv (FDivSqrtDoneE, FDivResultM, FDivFlagsM, DivDenormM, DivInput1E, Di
// bias : DP = 2^{11-1}-1 = 1023
assign bias = {3'h0, 10'h3FF};
// Divide exponent
csa #(13) csa1 (exp1, ~exp2, bias, exp_s, exp_c);
exp_add explogic1 (exp_cout1, {open, exp_diff},
csa #(13) csa1 (exp1, ~exp2, bias, exp_s, exp_c); //***adder
exp_add explogic1 (exp_cout1, {open, exp_diff}, //***adder?
{vss, exp_s}, {vss, exp_c}, 1'b1);
// Sqrt exponent (check if exponent is odd)
assign exp_odd = Float1[52] ? vss : vdd;
exp_add explogic2 (exp_cout2, exp_sqrt,
exp_add explogic2 (exp_cout2, exp_sqrt, //***adder?
{vss, exp1}, {4'h0, 10'h3ff}, exp_odd);
// Choose correct exponent
assign expF = DivOpType ? exp_sqrt[13:1] : exp_diff;
@ -156,7 +156,7 @@ module fpdiv (FDivSqrtDoneE, FDivResultM, FDivFlagsM, DivDenormM, DivInput1E, Di
// Store the final result and the exception flags in registers.
flopenr #(64) rega (clk, reset, FDivSqrtDoneE, Result, FDivResultM);
flopenr #(1) regb (clk, reset, FDivSqrtDoneE, DenormIO, DivDenormM);
flopenr #(5) regc (clk, reset, FDivSqrtDoneE, FlagsIn, FDivFlagsM);
flopenr #(5) regc (clk, reset, FDivSqrtDoneE, FlagsIn, FDivSqrtFlgM);
endmodule // fpadd

View File

@ -25,7 +25,7 @@
`include "wally-config.vh"
module fpuhazard(
module fhazard(
input logic [4:0] Adr1E, Adr2E, Adr3E,
input logic FWriteEnM, FWriteEnW,
input logic [4:0] RdM, RdW,

View File

@ -16,8 +16,8 @@ module fma2(
input logic XZeroM, YZeroM, ZZeroM, // inputs are zero
input logic XInfM, YInfM, ZInfM, // inputs are infinity
input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN
output logic [63:0] FmaResultM, // FMA final result
output logic [4:0] FmaFlagsM); // FMA flags {invalid, divide by zero, overflow, underflow, inexact}
output logic [63:0] FMAResM, // FMA final result
output logic [4:0] FMAFlgM); // FMA flags {invalid, divide by zero, overflow, underflow, inexact}
@ -57,7 +57,7 @@ module fma2(
logic [12:0] MaxExp; // maximum value of the exponent
logic [12:0] FracLen; // length of the fraction
logic SigNaN; // is an input a signaling NaN
logic UnderflowFlag; // Underflow singal used in FmaFlagsM (used to avoid a circular depencency)
logic UnderflowFlag; // Underflow singal used in FMAFlgM (used to avoid a circular depencency)
logic [63:0] XNaNResult, YNaNResult, ZNaNResult, InvalidResult, OverflowResult, KillProdResult, UnderflowResult; // possible results
@ -316,7 +316,7 @@ module fma2(
// Combine flags
// - FMA can't set the Divide by zero flag
// - Don't set the underflow flag if the result was rounded up to a normal number
assign FmaFlagsM = {Invalid, 1'b0, Overflow, UnderflowFlag, Inexact};
assign FMAFlgM = {Invalid, 1'b0, Overflow, UnderflowFlag, Inexact};
@ -337,7 +337,7 @@ module fma2(
assign InvalidResult = FmtM ? {ResultSgn, 11'h7ff, 1'b1, 51'b0} : {ResultSgn, 8'hff, 1'b1, 54'b0};
assign KillProdResult = FmtM ?{ResultSgn, Addend[62:0] - {62'b0, (Minus1&AddendStickyM)}} + {62'b0, (Plus1&AddendStickyM)} : {ResultSgn, Addend[62:32] - {30'b0, (Minus1&AddendStickyM)} + {30'b0, (Plus1&AddendStickyM)}, 32'b0};
assign UnderflowResult = FmtM ? {ResultSgn, 63'b0} + {63'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))} : {{ResultSgn, 31'b0} + {31'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))}, 32'b0};
assign FmaResultM = XNaNM ? XNaNResult :
assign FMAResM = XNaNM ? XNaNResult :
YNaNM ? YNaNResult :
ZNaNM ? ZNaNResult :
Invalid ? InvalidResult : // has to be before inf

View File

@ -229,11 +229,11 @@ module fpadd (AS_Result, Flags, Denorm, op1, op2, rm, op_type, P, OvEn, UnEn);
assign corr_sign = ~op_type[2]&~op_type[1]&op_type[0]&swap;
// 64-bit Mantissa Adder/Subtractor
cla64 add1 (sum, mantissaA3, mantissaB3, sub);
cla64 add1 (sum, mantissaA3, mantissaB3, sub); //***adder
// 64-bit Mantissa Subtractor - to get the two's complement of the
// result when the sign from the adder/subtractor is negative.
cla_sub64 sub1 (sum_tc, mantissaB3, mantissaA3);
cla_sub64 sub1 (sum_tc, mantissaB3, mantissaA3); //***adder
// Determine the correct sign of the result
assign sign_corr = ((corr_sign ^ signA) & ~convert) ^ sum[63];

View File

@ -34,7 +34,7 @@ module fpu (
input logic [`XLEN-1:0] SrcAM, // Integer input being written into fpreg
input logic StallE, StallM, StallW,
input logic FlushE, FlushM, FlushW,
output logic FStallD, // Stall the decode stage if Div/Sqrt instruction
output logic FStallD, // Stall the decode stage
output logic FWriteIntE, FWriteIntM, FWriteIntW, // Write integer register enable
output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory
output logic [`XLEN-1:0] FIntResM,
@ -42,48 +42,38 @@ module fpu (
output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction
output logic [4:0] SetFflagsM, // FPU flags
output logic [`XLEN-1:0] FPUResultW); // FPU result
// *** change FMA to do 16 - 32 - 64 - 128 FEXPBITS
// control logic signal instantiation
logic FWriteEnD, FWriteEnE, FWriteEnM, FWriteEnW; // FP register write enable
logic [2:0] FrmD, FrmE, FrmM, FrmW; // FP rounding mode
logic [2:0] FrmD, FrmE, FrmM; // FP rounding mode
logic FmtD, FmtE, FmtM, FmtW; // FP precision 0-single 1-double
logic FDivStartD, FDivStartE; // Start division
logic FWriteIntD; // Write to integer register
logic FOutputInput2D, FOutputInput2E; // Put Input2 in Input1 if a store instruction
logic [1:0] FMemRWD; // Read and write enable for memory
logic [1:0] ForwardXD, ForwardXE; // Input1 forwarding mux control signal
logic [1:0] ForwardYD, ForwardYE; // Input2 forwarding mux control signal
logic [1:0] ForwardZD, ForwardZE; // Input3 forwarding mux control signal
logic SrcYUsedD; // Is input 2 used
logic SrcZUsedD; // Is input 3 used
logic [1:0] ForwardXE, ForwardYE, ForwardZE; // Input3 forwarding mux control signal
logic [2:0] FResultSelD, FResultSelE, FResultSelM, FResultSelW; // Select FP result
logic [3:0] FOpCtrlD, FOpCtrlE, FOpCtrlM, FOpCtrlW; // Select which opperation to do in each component
logic [1:0] FResSelD, FResSelE, FResSelM;
logic [1:0] FIntResSelD, FIntResSelE, FIntResSelM;
logic [3:0] FOpCtrlD, FOpCtrlE, FOpCtrlM; // Select which opperation to do in each component
logic [1:0] FResSelD, FResSelE, FResSelM;
logic [1:0] FIntResSelD, FIntResSelE, FIntResSelM;
logic [4:0] Adr1E, Adr2E, Adr3E;
// regfile signals
logic [4:0] RdE, RdM, RdW; // what adress to write to // ***Can take from ieu insted of pipelining
logic [63:0] FWDM; // Write data for FP register
logic [63:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage
logic [63:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage
logic [63:0] SrcXE, SrcXM, SrcXW; // Input 1 to the various units (after forwarding)
logic [`XLEN-1:0] SrcXMAligned;
logic [63:0] SrcYE, SrcYM, SrcYW; // Input 2 to the various units (after forwarding)
logic [63:0] SrcXE, SrcXM; // Input 1 to the various units (after forwarding)
logic [63:0] SrcYE, SrcYM; // Input 2 to the various units (after forwarding)
logic [63:0] SrcZE, SrcZM; // Input 3 to the various units (after forwarding)
logic [63:0] FLoadResultW, FLoadStoreResultM, FLoadStoreResultW; // Result for load, store, and move to int-reg instructions
// div/sqrt signals
logic DivDenormE, DivDenormM, DivDenormW;
logic DivOvEn, DivUnEn;
logic [63:0] FDivResultE, FDivResultM, FDivResultW;
logic [4:0] FDivFlagsE, FDivFlagsM, FDivFlagsW;
logic FDivSqrtDoneE, FDivSqrtDoneM;
logic [63:0] FDivResultM, FDivResultW;
logic [4:0] FDivSqrtFlgM, FDivSqrtFlgW;
logic FDivSqrtDoneE;
logic [63:0] DivInput1E, DivInput2E;
logic HoldInputs; // keep forwarded inputs arround durring division
// FMA signals
logic [105:0] ProdManE, ProdManM;
logic [105:0] ProdManE, ProdManM; ///*** put pipline stages in units
logic [161:0] AlignedAddendE, AlignedAddendM;
logic [12:0] ProdExpE, ProdExpM;
logic AddendStickyE, AddendStickyM;
@ -91,93 +81,112 @@ module fpu (
logic XZeroE, YZeroE, ZZeroE, XZeroM, YZeroM, ZZeroM;
logic XInfE, YInfE, ZInfE, XInfM, YInfM, ZInfM;
logic XNaNE, YNaNE, ZNaNE, XNaNM, YNaNM, ZNaNM;
logic [63:0] FmaResultM, FmaResultW;
logic [4:0] FmaFlagsM, FmaFlagsW;
logic [63:0] FMAResM, FMAResW;
logic [4:0] FMAFlgM, FMAFlgW;
// add/cvt signals
logic [63:0] AddSumE, AddSumTcE;
logic [3:0] AddSelInvE;
logic [10:0] AddExpPostSumE;
logic AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE;
logic AddDenormInE, AddSwapE, AddNormOvflowE, AddSignAE;
logic AddConvertE;
logic [63:0] AddFloat1E, AddFloat2E;
logic [11:0] AddExp1DenormE, AddExp2DenormE;
logic [10:0] AddExponentE;
logic [2:0] AddRmE;
logic [3:0] AddOpTypeE;
logic AddPE, AddOvEnE, AddUnEnE;
logic AddDenormM;
logic [63:0] AddSumM, AddSumTcM;
logic [3:0] AddSelInvM;
logic [10:0] AddExpPostSumM;
logic AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM;
logic AddDenormInM, AddSwapM, AddNormOvflowM, AddSignAM;
logic AddConvertM, AddSignM;
logic [63:0] AddFloat1M, AddFloat2M;
logic [11:0] AddExp1DenormM, AddExp2DenormM;
logic [10:0] AddExponentM;
logic [63:0] AddOp1M, AddOp2M;
logic [2:0] AddRmM;
logic [3:0] AddOpTypeM;
logic AddPM, AddOvEnM, AddUnEnM;
logic [63:0] FAddResultM, FAddResultW;
logic [4:0] FAddFlagsM, FAddFlagsW;
logic [63:0] AddSumE, AddSumM;
logic [63:0] AddSumTcE, AddSumTcM;
logic [3:0] AddSelInvE, AddSelInvM;
logic [10:0] AddExpPostSumE,AddExpPostSumM;
logic AddCorrSignE, AddCorrSignM;
logic AddOp1NormE, AddOp1NormM;
logic AddOp2NormE, AddOp2NormM;
logic AddOpANormE, AddOpANormM;
logic AddOpBNormE, AddOpBNormM;
logic AddInvalidE, AddInvalidM;
logic AddDenormInE, AddDenormInM;
logic AddSwapE, AddSwapM;
logic AddNormOvflowE, AddNormOvflowM; //***this isn't used in addcvt2
logic AddSignAE, AddSignAM;
logic AddConvertE, AddConvertM;
logic [63:0] AddFloat1E, AddFloat2E, AddFloat1M, AddFloat2M;
logic [11:0] AddExp1DenormE, AddExp2DenormE, AddExp1DenormM, AddExp2DenormM;
logic [10:0] AddExponentE, AddExponentM;
logic [63:0] FAddResM, FAddResW;
logic [4:0] FAddFlgM, FAddFlgW;
// cmp signals
logic CmpInvalidE, CmpInvalidM, CmpInvalidW;
logic [63:0] FCmpResultE, FCmpResultM, FCmpResultW;
logic CmpNVE, CmpNVM, CmpNVW;
logic [63:0] CmpResE, CmpResM, CmpResW;
// fsgn signals
logic [63:0] SgnResultE, SgnResultM, SgnResultW;
logic [4:0] SgnFlagsE, SgnFlagsM, SgnFlagsW;
logic [63:0] SgnResE, SgnResM;
logic SgnNVE, SgnNVM, SgnNVW;
logic [63:0] FResM, FResW;
logic FFlgM, FFlgW;
logic FFlgM, FFlgW;
// instantiation of W stage regfile signals
logic [63:0] AlignedSrcAM, ForwardSrcAM, SrcAW;
logic [63:0] AlignedSrcAM;
// classify signals
logic [63:0] ClassResultE, ClassResultM, ClassResultW;
logic [63:0] ClassResE, ClassResM;
// 64-bit FPU result
logic [63:0] FPUResult64W, FPUResult64E;
logic [63:0] FPUResult64W;
logic [4:0] FPUFlagsW;
//DECODE STAGE
// top-level controller for FPU
fctrl ctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .*);
fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]),
.FRM_REGW, .IllegalFPUInstrD, .FWriteEnD, .FDivStartD, .FResultSelD, .FOpCtrlD, .FResSelD,
.FIntResSelD, .FmtD, .FrmD, .FWriteIntD);
// regfile instantiation
FPregfile fpregfile (clk, reset, FWriteEnW,
fregfile fregfile (clk, reset, FWriteEnW,
InstrD[19:15], InstrD[24:20], InstrD[31:27], RdW,
FPUResult64W,
FRD1D, FRD2D, FRD3D);
//*****************
// fpregfile D/E pipe registers
// D/E pipe registers
//*****************
flopenrc #(64) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E);
flopenrc #(64) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E);
flopenrc #(64) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E);
//*****************
// other D/E pipe registers
//*****************
flopenrc #(1) CtrlRegE1(clk, reset, FlushE, ~StallE, FDivStartD, FDivStartE);
flopenrc #(15) CtrlRegE2(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]},
flopenrc #(1) DECtrlRegE1(clk, reset, FlushE, ~StallE, FDivStartD, FDivStartE);
flopenrc #(15) DECtrlRegE2(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]},
{Adr1E, Adr2E, Adr3E});
flopenrc #(22) DECtrlReg(clk, reset, FlushE, ~StallE,
flopenrc #(22) DECtrlReg3(clk, reset, FlushE, ~StallE,
{FWriteEnD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, InstrD[11:7], FOpCtrlD, FWriteIntD},
{FWriteEnE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, RdE, FOpCtrlE, FWriteIntE});
//EXECUTION STAGE
// Hazard unit for FPU
fpuhazard hazard(.*);
fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FWriteEnM, .FWriteEnW, .RdM, .RdW, .FResultSelM, .FStallD,
.ForwardXE, .ForwardYE, .ForwardZE);
// forwarding muxs
mux3 #(64) fxemux(FRD1E, FPUResult64W, FResM, ForwardXE, SrcXE);
@ -186,7 +195,9 @@ module fpu (
// first of two-stage instance of floating-point fused multiply-add unit
fma1 fma1 (.X(SrcXE), .Y(SrcYE), .Z(SrcZE), .FOpCtrlE(FOpCtrlE[2:0]),.*);
fma1 fma1 (.X(SrcXE), .Y(SrcYE), .Z(SrcZE), .FOpCtrlE(FOpCtrlE[2:0]), .FmtE, .ProdManE, .AlignedAddendE,
.ProdExpE, .AddendStickyE, .KillProdE, .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE,
.XNaNE, .YNaNE, .ZNaNE );
// first and only instance of floating-point divider
logic fpdivClk;
@ -204,174 +215,140 @@ module fpu (
.en(~HoldInputs), .clear(FDivSqrtDoneE),
.reset(reset), .clk(clk));
fpdiv fpdivsqrt (.DivOpType(FOpCtrlE[0]), .clk(fpdivClk), .FmtE(~FmtE), .*);
fdivsqrt fdivsqrt (.DivOpType(FOpCtrlE[0]), .clk(fpdivClk), .FmtE(~FmtE), .DivInput1E, .DivInput2E,
.FrmE, .DivOvEn(1'b1), .DivUnEn(1'b1), .FDivStartE, .FDivResultM, .FDivSqrtFlgM,
.FDivSqrtDoneE, .FDivBusyE, .HoldInputs, .reset);
// first of two-stage instance of floating-point add/cvt unit
fpuaddcvt1 fpadd1 (.*);
fpuaddcvt1 fpadd1 (.SrcXE, .SrcYE, .FOpCtrlE, .FmtE, .AddFloat1E, .AddFloat2E, .AddExponentE,
.AddExpPostSumE, .AddExp1DenormE, .AddExp2DenormE, .AddSumE, .AddSumTcE, .AddSelInvE,
.AddCorrSignE, .AddSignAE, .AddOp1NormE, .AddOp2NormE, .AddOpANormE, .AddOpBNormE, .AddInvalidE,
.AddDenormInE, .AddConvertE, .AddSwapE, .AddNormOvflowE);
// first of two-stage instance of floating-point comparator
fpucmp1 fpcmp1 (SrcXE, SrcYE, FOpCtrlE[2:0], FmtE, CmpInvalidE, FCmpResultE);
// first and only instance of floating-point comparator
fcmp fcmp (SrcXE, SrcYE, FOpCtrlE[2:0], FmtE, CmpNVE, CmpResE);
// first and only instance of floating-point sign converter
fpusgn fpsgn (.SgnOpCodeE(FOpCtrlE[1:0]),.*);
fsgn fsgn (.SgnOpCodeE(FOpCtrlE[1:0]), .SrcXE, .SrcYE, .SgnResE, .SgnNVE);
// first and only instance of floating-point classify unit
fpuclassify fpuclass (.*);
fclassify fclassify (.SrcXE, .FmtE, .ClassResE);
// output for store instructions
assign FWriteDataE = FmtE ? SrcYE[63:64-`XLEN] : {{`XLEN-32{1'b0}}, SrcYE[63:32]};
//***swap to mux
//*****************
//fpregfile D/E pipe registers
// E/M pipe registers
//*****************
flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, SrcXE, SrcXM);
flopenrc #(64) EMFpReg2(clk, reset, FlushM, ~StallM, SrcYE, SrcYM);
flopenrc #(64) EMFpReg3(clk, reset, FlushM, ~StallM, SrcZE, SrcZM);
//*****************
// fma E/M pipe registers
//*****************
flopenrc #(106) EMRegFma3(clk, reset, FlushM, ~StallM, ProdManE, ProdManM);
flopenrc #(162) EMRegFma4(clk, reset, FlushM, ~StallM, AlignedAddendE, AlignedAddendM);
flopenrc #(13) EMRegFma6(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM);
flopenrc #(1) EMRegFma7(clk, reset, FlushM, ~StallM, AddendStickyE, AddendStickyM);
flopenrc #(1) EMRegFma8(clk, reset, FlushM, ~StallM, KillProdE, KillProdM);
flopenrc #(1) EMRegFma10(clk, reset, FlushM, ~StallM, XZeroE, XZeroM);
flopenrc #(1) EMRegFma11(clk, reset, FlushM, ~StallM, YZeroE, YZeroM);
flopenrc #(1) EMRegFma12(clk, reset, FlushM, ~StallM, ZZeroE, ZZeroM);
flopenrc #(1) EMRegFma16(clk, reset, FlushM, ~StallM, XInfE, XInfM);
flopenrc #(1) EMRegFma17(clk, reset, FlushM, ~StallM, YInfE, YInfM);
flopenrc #(1) EMRegFma18(clk, reset, FlushM, ~StallM, ZInfE, ZInfM);
flopenrc #(1) EMRegFma19(clk, reset, FlushM, ~StallM, XNaNE, XNaNM);
flopenrc #(1) EMRegFma20(clk, reset, FlushM, ~StallM, YNaNE, YNaNM);
flopenrc #(1) EMRegFma21(clk, reset, FlushM, ~StallM, ZNaNE, ZNaNM);
flopenrc #(106) EMRegFma1(clk, reset, FlushM, ~StallM, ProdManE, ProdManM);
flopenrc #(162) EMRegFma2(clk, reset, FlushM, ~StallM, AlignedAddendE, AlignedAddendM);
flopenrc #(13) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM);
flopenrc #(11) EMRegFma4(clk, reset, FlushM, ~StallM,
{AddendStickyE, KillProdE, XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE},
{AddendStickyM, KillProdM, XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM});
//*****************
// fpadd E/M pipe registers
//*****************
flopenrc #(64) EMRegAdd1(clk, reset, FlushM, ~StallM, AddSumE, AddSumM);
flopenrc #(64) EMRegAdd2(clk, reset, FlushM, ~StallM, AddSumTcE, AddSumTcM);
flopenrc #(4) EMRegAdd3(clk, reset, FlushM, ~StallM, AddSelInvE, AddSelInvM);
flopenrc #(11) EMRegAdd4(clk, reset, FlushM, ~StallM, AddExpPostSumE, AddExpPostSumM);
flopenrc #(1) EMRegAdd5(clk, reset, FlushM, ~StallM, AddCorrSignE, AddCorrSignM);
flopenrc #(1) EMRegAdd6(clk, reset, FlushM, ~StallM, AddOp1NormE, AddOp1NormM);
flopenrc #(1) EMRegAdd7(clk, reset, FlushM, ~StallM, AddOp2NormE, AddOp2NormM);
flopenrc #(1) EMRegAdd8(clk, reset, FlushM, ~StallM, AddOpANormE, AddOpANormM);
flopenrc #(1) EMRegAdd9(clk, reset, FlushM, ~StallM, AddOpBNormE, AddOpBNormM);
flopenrc #(1) EMRegAdd10(clk, reset, FlushM, ~StallM, AddInvalidE, AddInvalidM);
flopenrc #(1) EMRegAdd11(clk, reset, FlushM, ~StallM, AddDenormInE, AddDenormInM);
flopenrc #(1) EMRegAdd12(clk, reset, FlushM, ~StallM, AddConvertE, AddConvertM);
flopenrc #(1) EMRegAdd13(clk, reset, FlushM, ~StallM, AddSwapE, AddSwapM);
flopenrc #(1) EMRegAdd14(clk, reset, FlushM, ~StallM, AddNormOvflowE, AddNormOvflowM);
flopenrc #(1) EMRegAdd15(clk, reset, FlushM, ~StallM, AddSignAE, AddSignAM);
flopenrc #(64) EMRegAdd16(clk, reset, FlushM, ~StallM, AddFloat1E, AddFloat1M);
flopenrc #(64) EMRegAdd17(clk, reset, FlushM, ~StallM, AddFloat2E, AddFloat2M);
flopenrc #(12) EMRegAdd18(clk, reset, FlushM, ~StallM, AddExp1DenormE, AddExp1DenormM);
flopenrc #(12) EMRegAdd19(clk, reset, FlushM, ~StallM, AddExp2DenormE, AddExp2DenormM);
flopenrc #(11) EMRegAdd20(clk, reset, FlushM, ~StallM, AddExponentE, AddExponentM);
flopenrc #(3) EMRegAdd23(clk, reset, FlushM, ~StallM, AddRmE, AddRmM);
flopenrc #(4) EMRegAdd24(clk, reset, FlushM, ~StallM, AddOpTypeE, AddOpTypeM);
flopenrc #(1) EMRegAdd25(clk, reset, FlushM, ~StallM, AddPE, AddPM);
flopenrc #(1) EMRegAdd26(clk, reset, FlushM, ~StallM, AddOvEnE, AddOvEnM);
flopenrc #(1) EMRegAdd27(clk, reset, FlushM, ~StallM, AddUnEnE, AddUnEnM);
flopenrc #(11) EMRegAdd3(clk, reset, FlushM, ~StallM, AddExpPostSumE, AddExpPostSumM);
flopenrc #(64) EMRegAdd4(clk, reset, FlushM, ~StallM, AddFloat1E, AddFloat1M);
flopenrc #(64) EMRegAdd5(clk, reset, FlushM, ~StallM, AddFloat2E, AddFloat2M);
flopenrc #(12) EMRegAdd6(clk, reset, FlushM, ~StallM, AddExp1DenormE, AddExp1DenormM);
flopenrc #(12) EMRegAdd7(clk, reset, FlushM, ~StallM, AddExp2DenormE, AddExp2DenormM);
flopenrc #(11) EMRegAdd8(clk, reset, FlushM, ~StallM, AddExponentE, AddExponentM);
flopenrc #(15) EMRegAdd9(clk, reset, FlushM, ~StallM,
{AddSelInvE, AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE, AddDenormInE, AddConvertE, AddSwapE, AddNormOvflowE, AddSignAE},
{AddSelInvM, AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM, AddDenormInM, AddConvertM, AddSwapM, AddNormOvflowM, AddSignAM});
flopenrc #(1) EMRegCmp1(clk, reset, FlushM, ~StallM, CmpNVE, CmpNVM);
flopenrc #(64) EMRegCmp2(clk, reset, FlushM, ~StallM, CmpResE, CmpResM);
//*****************
// fpcmp E/M pipe registers
//*****************
flopenrc #(1) EMRegCmp1(clk, reset, FlushM, ~StallM, CmpInvalidE, CmpInvalidM);
flopenrc #(64) EMRegCmp3(clk, reset, FlushM, ~StallM, FCmpResultE, FCmpResultM);
flopenrc #(64) EMRegSgn1(clk, reset, FlushM, ~StallM, SgnResE, SgnResM);
flopenrc #(1) EMRegSgn2(clk, reset, FlushM, ~StallM, SgnNVE, SgnNVM);
//*****************
// fpsgn E/M pipe registers
//*****************
flopenrc #(64) EMRegSgn2(clk, reset, FlushM, ~StallM, SgnResultE, SgnResultM);
flopenrc #(5) EMRegSgn3(clk, reset, FlushM, ~StallM, SgnFlagsE, SgnFlagsM);
//*****************
// other E/M pipe registers
//*****************
flopenrc #(22) EMCtrlReg(clk, reset, FlushM, ~StallM,
{FWriteEnE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, RdE, FOpCtrlE, FWriteIntE},
{FWriteEnM, FResultSelM, FResSelM, FIntResSelM, FrmM, FmtM, RdM, FOpCtrlM, FWriteIntM});
flopenrc #(64) EMRegClass(clk, reset, FlushM, ~StallM, ClassResE, ClassResM);
//*****************
// fpuclassify E/M pipe registers
//*****************
flopenrc #(64) EMRegClass(clk, reset, FlushM, ~StallM, ClassResultE, ClassResultM);
//BEGIN MEMORY STAGE
mux3 #(64) FResMux(AlignedSrcAM, SgnResultM, FCmpResultM, FResSelM, FResM);
assign FFlgM = CmpInvalidM & FResSelM[1];
mux3 #(64) FResMux(AlignedSrcAM, SgnResM, CmpResM, FResSelM, FResM);
mux3 #(1) FFlgMux(1'b0, SgnNVM, CmpNVM, FResSelM, FFlgM);
//***change to mux
assign SrcXMAligned = FmtM ? SrcXM[63:64-`XLEN] : {{`XLEN-32{1'b0}}, SrcXM[63:32]};
mux3 #(`XLEN) IntResMux(FCmpResultM[`XLEN-1:0], SrcXMAligned, ClassResultM[`XLEN-1:0], FIntResSelM, FIntResM);
mux3 #(`XLEN) IntResMux(CmpResM[`XLEN-1:0], SrcXMAligned, ClassResM[`XLEN-1:0], FIntResSelM, FIntResM);
// second instance of two-stage FMA unit
fma2 fma2(.X(SrcXM), .Y(SrcYM), .Z(SrcZM), .FOpCtrlM(FOpCtrlM[2:0]), .*);
fma2 fma2(.X(SrcXM), .Y(SrcYM), .Z(SrcZM), .FOpCtrlM(FOpCtrlM[2:0]), .FrmM, .FmtM,
.ProdManM, .AlignedAddendM, .ProdExpM, .AddendStickyM, .KillProdM,
.XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .ZInfM, .XNaNM, .YNaNM, .ZNaNM,
.FMAResM, .FMAFlgM);
// second instance of two-stage floating-point add/cvt unit
fpuaddcvt2 fpadd2 (.*);
fpuaddcvt2 fpadd2 (.FrmM, .FOpCtrlM, .FmtM, .AddSumM, .AddSumTcM, .AddFloat1M, .AddFloat2M,
.AddExp1DenormM, .AddExp2DenormM, .AddExponentM, .AddExpPostSumM, .AddSelInvM,
.AddOp1NormM, .AddOp2NormM, .AddOpANormM, .AddOpBNormM, .AddInvalidM, .AddDenormInM,
.AddSignAM, .AddCorrSignM, .AddConvertM, .AddSwapM, .FAddResM, .FAddFlgM);
// Align SrcA to MSB when single precicion
mux2 #(64) SrcAMux({SrcAM[31:0], 32'b0}, {{64-`XLEN{1'b0}}, SrcAM}, FmtM, AlignedSrcAM);
//*****************
//fpregfile M/W pipe registers
// M/W pipe registers
//*****************
flopenrc #(64) MWFpReg1(clk, reset, FlushW, ~StallW, SrcXM, SrcXW);
flopenrc #(64) MWFpReg2(clk, reset, FlushW, ~StallW, SrcYM, SrcYW);
flopenrc #(64) MWRegFma1(clk, reset, FlushW, ~StallW, FMAResM, FMAResW);
flopenrc #(5) MWRegFma2(clk, reset, FlushW, ~StallW, FMAFlgM, FMAFlgW);
//*****************
// fma M/W pipe registers
//*****************
flopenrc #(64) MWRegFma1(clk, reset, FlushW, ~StallW, FmaResultM, FmaResultW);
flopenrc #(5) MWRegFma2(clk, reset, FlushW, ~StallW, FmaFlagsM, FmaFlagsW);
//*****************
// fpdiv M/W pipe registers
//*****************
flopenrc #(64) MWRegDiv1(clk, reset, FlushW, ~StallW, FDivResultM, FDivResultW);
flopenrc #(5) MWRegDiv2(clk, reset, FlushW, ~StallW, FDivFlagsM, FDivFlagsW);
flopenrc #(1) MWRegDiv3(clk, reset, FlushW, ~StallW, DivDenormM, DivDenormW);
flopenrc #(5) MWRegDiv2(clk, reset, FlushW, ~StallW, FDivSqrtFlgM, FDivSqrtFlgW);
//*****************
// fpadd M/W pipe registers
//*****************
flopenrc #(64) MWRegAdd1(clk, reset, FlushW, ~StallW, FAddResultM, FAddResultW);
flopenrc #(5) MWRegAdd2(clk, reset, FlushW, ~StallW, FAddFlagsM, FAddFlagsW);
flopenrc #(64) MWRegAdd1(clk, reset, FlushW, ~StallW, FAddResM, FAddResW);
flopenrc #(5) MWRegAdd2(clk, reset, FlushW, ~StallW, FAddFlgM, FAddFlgW);
//*****************
// fpcmp M/W pipe registers
//*****************
flopenrc #(1) MWRegCmp1(clk, reset, FlushW, ~StallW, CmpInvalidM, CmpInvalidW);
// flopenrc #(2) MWRegCmp2(clk, reset, FlushW, ~StallW, CmpFCCM, CmpFCCW);
flopenrc #(64) MWRegCmp3(clk, reset, FlushW, ~StallW, FCmpResultM, FCmpResultW);
flopenrc #(1) MWRegCmp1(clk, reset, FlushW, ~StallW, CmpNVM, CmpNVW);
flopenrc #(64) MWRegCmp3(clk, reset, FlushW, ~StallW, CmpResM, CmpResW);
flopenrc #(64) MWRegClass2(clk, reset, FlushW, ~StallW, FResM, FResW);
flopenrc #(1) MWRegClass1(clk, reset, FlushW, ~StallW, FFlgM, FFlgW);
//*****************
// fpsgn M/W pipe registers
//*****************
flopenrc #(64) MWRegSgn1(clk, reset, FlushW, ~StallW, SgnResultM, SgnResultW);
flopenrc #(5) MWRegSgn2(clk, reset, FlushW, ~StallW, SgnFlagsM, SgnFlagsW);
//*****************
// other M/W pipe registers
//*****************
flopenrc #(11) MWCtrlReg(clk, reset, FlushW, ~StallW,
{FWriteEnM, FResultSelM, RdM, FmtM, FWriteIntM},
{FWriteEnW, FResultSelW, RdW, FmtW, FWriteIntW});
//*****************
// fpuclassify M/W pipe registers
//*****************
flopenrc #(64) MWRegClass(clk, reset, FlushW, ~StallW, ClassResultM, ClassResultW);
flopenrc #(64) MWRegClass2(clk, reset, FlushW, ~StallW, FResM, FResW);
flopenrc #(1) MWRegClass1(clk, reset, FlushW, ~StallW, FFlgM, FFlgW);
@ -385,13 +362,13 @@ module fpu (
//***turn into muxs
always_comb begin
case (FResultSelW)
3'b000 : FPUFlagsW = 5'b0;
3'b001 : FPUFlagsW = FmaFlagsW;
3'b010 : FPUFlagsW = FAddFlagsW;
3'b011 : FPUFlagsW = FDivFlagsW;
3'b001 : FPUFlagsW = FMAFlgW;
3'b010 : FPUFlagsW = FAddFlgW;
3'b011 : FPUFlagsW = FDivSqrtFlgW;
3'b100 : FPUFlagsW = {4'b0,FFlgW};
default : FPUFlagsW = 5'bxxxxx;
endcase
@ -400,8 +377,8 @@ module fpu (
always_comb begin
case (FResultSelW)
3'b000 : FPUResult64W = FmtW ? {ReadDataW, {64-`XLEN{1'b0}}} : {ReadDataW[31:0], 32'b0};
3'b001 : FPUResult64W = FmaResultW;
3'b010 : FPUResult64W = FAddResultW;
3'b001 : FPUResult64W = FMAResW;
3'b010 : FPUResult64W = FAddResW;
3'b011 : FPUResult64W = FDivResultW;
3'b100 : FPUResult64W = FResW;
default : FPUResult64W = 64'bxxxxx;
@ -415,7 +392,9 @@ module fpu (
// define offsets for LSB zero extension or truncation
always_comb begin
// zero extension
//***turn into mux
FPUResultW = FmtW ? FPUResult64W[63:64-`XLEN] : {{`XLEN-32{1'b0}}, FPUResult64W[63:32]};
//*** put into mem stage
SetFflagsM = FPUFlagsW;
end

View File

@ -183,11 +183,11 @@ module fpuaddcvt1 (AddSumE, AddSumTcE, AddSelInvE, AddExpPostSumE, AddCorrSignE,
assign AddCorrSignE = ~FOpCtrlE[2]&~FOpCtrlE[1]&FOpCtrlE[0]&AddSwapE;
// 64-bit Mantissa Adder/Subtractor
cla64 add1 (AddSumE, mantissaA3, mantissaB3, sub);
cla64 add1 (AddSumE, mantissaA3, mantissaB3, sub); //***adder
// 64-bit Mantissa Subtractor - to get the two's complement of the
// result when the sign from the adder/subtractor is negative.
cla_sub64 sub1 (AddSumTcE, mantissaB3, mantissaA3);
cla_sub64 sub1 (AddSumTcE, mantissaB3, mantissaA3); //***adder
// Finds normal underflow result to determine whether to round final exponent down
//***KEP used to be (AddSumE == 16'h0) I am unsure what it's supposed to be

View File

@ -27,7 +27,7 @@
//
module fpuaddcvt2 (FAddResultM, FAddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddSelInvM, AddExpPostSumM, AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM, AddDenormInM, AddConvertM, AddSwapM, AddSignAM, AddFloat1M, AddFloat2M, AddExp1DenormM, AddExp2DenormM, AddExponentM, FrmM, FOpCtrlM, FmtM);
module fpuaddcvt2 (FAddResM, FAddFlgM, AddSumM, AddSumTcM, AddSelInvM, AddExpPostSumM, AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM, AddDenormInM, AddConvertM, AddSwapM, AddSignAM, AddFloat1M, AddFloat2M, AddExp1DenormM, AddExp2DenormM, AddExponentM, FrmM, FOpCtrlM, FmtM);
input [2:0] FrmM; // Rounding mode - specify values
input [3:0] FOpCtrlM; // Function opcode
@ -51,9 +51,9 @@ module fpuaddcvt2 (FAddResultM, FAddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddS
input AddSwapM;
// input AddNormOvflowM;
output [63:0] FAddResultM; // Result of operation
output [4:0] FAddFlagsM; // IEEE exception flags
output AddDenormM; // AddDenormM on input or output
output [63:0] FAddResM; // Result of operation
output [4:0] FAddFlgM; // IEEE exception flags
wire AddDenormM; // AddDenormM on input or output
wire P;
assign P = ~FmtM | FOpCtrlM[2];
@ -145,7 +145,7 @@ module fpuaddcvt2 (FAddResultM, FAddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddS
// exactly where the rounding point is. The rounding units also
// handles special cases and set the exception flags.
// Changed DenormIO -> AddDenormM and FlagsIn -> FAddFlagsM in order to
// Changed DenormIO -> AddDenormM and FlagsIn -> FAddFlgM in order to
// help in processor reservation station detection of load/stores. In
// other words, the processor would like to know ahead of time that
// if the result is an exception then don't load or store.
@ -155,8 +155,8 @@ module fpuaddcvt2 (FAddResultM, FAddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddS
AddNormOvflowM, normal_underflow, AddSwapM, FOpCtrlM, AddSumM);
// Store the final result and the exception flags in registers.
assign FAddResultM = Result;
assign {AddDenormM, FAddFlagsM} = {DenormIO, FlagsIn};
assign FAddResM = Result;
assign {AddDenormM, FAddFlgM} = {DenormIO, FlagsIn};
endmodule // fpadd

View File

@ -1,50 +0,0 @@
`include "wally-config.vh"
module fpuclassify (
input logic [63:0] SrcXE,
input logic FmtE, // 0-single 1-double
output logic [63:0] ClassResultE
);
logic [31:0] single;
logic [63:0] double;
logic sign;
logic infinity, NaN, zero, normal, subnormal;
logic ExpNotZero, ExpOnes, ManNotZero, ExpZero, ManZero, FirstBitMan;
// single and double precision layouts
assign single = SrcXE[63:32];
assign double = SrcXE;
assign sign = SrcXE[63];
// basic calculations for readabillity
assign ExpNotZero = FmtE ? |double[62:52] : |single[30:23];
assign ExpZero = ~ExpNotZero;
assign ExpOnes = FmtE ? &double[62:52] : &single[30:23];
assign ManNotZero = FmtE ? |double[51:0] : |single[22:0];
assign ManZero = ~ManNotZero;
assign FirstBitMan = FmtE ? double[51] : single[22];
// determine the type of number
assign NaN = ExpOnes & ManNotZero;
assign infinity = ExpOnes & ManZero;
assign zero = ExpZero & ManZero;
assign subnormal= ExpZero & ManNotZero;
assign normal = ExpNotZero;
// determine sub category and combine into the result
// bit 0 - -infinity
// bit 1 - -normal
// bit 2 - -subnormal
// bit 3 - -zero
// bit 4 - +zero
// bit 5 - +subnormal
// bit 6 - +normal
// bit 7 - +infinity
// bit 8 - signaling NaN
// bit 9 - quiet NaN
assign ClassResultE = {{54{1'b0}}, FirstBitMan&NaN, ~FirstBitMan&NaN, ~sign&infinity, ~sign&normal,
~sign&subnormal, ~sign&zero, sign&zero, sign&subnormal, sign&normal, sign&infinity};
endmodule

View File

@ -1,243 +0,0 @@
// //
// // File name : fpcomp.v
// // Title : Floating-Point Comparator
// // project : FPU
// // Library : fpcomp
// // Author(s) : James E. Stine
// // Purpose : definition of main unit to floating-point comparator
// // notes :
// //
// // Copyright Oklahoma State University
// //
// // Floating Point Comparator (Algorithm)
// //
// // 1.) Performs sign-extension if the inputs are 32-bit integers.
// // 2.) Perform a magnitude comparison on the lower 63 bits of the inputs
// // 3.) Check for special cases (+0=-0, unordered, and infinite values)
// // and correct for sign bits
// //
// // This module takes 64-bits inputs op1 and op2, VSS, and VDD
// // signals, and a 2-bit signal Sel that indicates the type of
// // operands being compared as indicated below.
// // Sel Description
// // 00 double precision numbers
// // 01 single precision numbers
// // 10 half precision numbers
// // 11 (unused)
// //
// // The comparator produces a 2-bit signal FCC, which
// // indicates the result of the comparison:
// //
// // fcc decscription
// // 00 A = B
// // 01 A < B
// // 10 A > B
// // 11 A and B are unordered (i.e., A or B is NaN)
// //
// // It also produces an invalid operation flag, which is one
// // if either of the input operands is a signaling NaN per 754
// module fpucmp2 (
// input logic [63:0] op1,
// input logic [63:0] op2,
// input logic [1:0] Sel,
// input logic [7:0] w, x,
// input logic ANaN, BNaN,
// input logic Azero, Bzero,
// input logic [3:0] FOpCtrlM,
// input logic FmtM,
// output logic Invalid, // Invalid Operation
// output logic [1:0] FCC, // Condition Codes
// output logic [63:0] FCmpResultM);
// logic LT; // magnitude op1 < magnitude op2
// logic EQ; // magnitude op1 = magnitude op2
// // Perform magnitude comparison between the 63 least signficant bits
// // of the input operands. Only LT and EQ are returned, since GT can
// // be determined from these values.
// magcompare64b_2 magcomp2 (LT, EQ, w, x);
// // Determine final values based on output of magnitude comparison,
// // sign bits, and special case testing.
// exception_cmp_2 exc2 (.invalid(Invalid), .fcc(FCC), .LT_mag(LT), .EQ_mag(EQ), .ANaN(ANaN), .BNaN(BNaN), .Azero(Azero), .Bzero(Bzero), .Sel(Sel), .A(op1), .B(op2), .*);
// endmodule // fpcomp
// /*module magcompare2b (LT, GT, A, B);
// input logic [1:0] A;
// input logic [1:0] B;
// output logic LT;
// output logic GT;
// // Determine if A < B using a minimized sum-of-products expression
// assign LT = ~A[1]&B[1] | ~A[1]&~A[0]&B[0] | ~A[0]&B[1]&B[0];
// // Determine if A > B using a minimized sum-of-products expression
// assign GT = A[1]&~B[1] | A[1]&A[0]&~B[0] | A[0]&~B[1]&~B[0];
// endmodule*/ // magcompare2b
// // 2-bit magnitude comparator
// // This module compares two 2-bit values A and B. LT is '1' if A < B
// // and GT is '1'if A > B. LT and GT are both '0' if A = B. However,
// // this version actually incorporates don't cares into the equation to
// // simplify the optimization
// // module magcompare2c (LT, GT, A, B);
// // input logic [1:0] A;
// // input logic [1:0] B;
// // output logic LT;
// // output logic GT;
// // assign LT = B[1] | (!A[1]&B[0]);
// // assign GT = A[1] | (!B[1]&A[0]);
// // endmodule // magcompare2b
// // This module compares two 64-bit values A and B. LT is '1' if A < B
// // and EQ is '1'if A = B. LT and GT are both '0' if A > B.
// // This structure was modified so
// // that it only does a strict magnitdude comparison, and only
// // returns flags for less than (LT) and eqaual to (EQ). It uses a tree
// // of 63 2-bit magnitude comparators, followed by one OR gates.
// //
// // J. E. Stine and M. J. Schulte, "A combined two's complement and
// // floating-point comparator," 2005 IEEE International Symposium on
// // Circuits and Systems, Kobe, 2005, pp. 89-92 Vol. 1.
// // doi: 10.1109/ISCAS.2005.1464531
// module magcompare64b_2 (LT, EQ, w, x);
// input logic [7:0] w;
// input logic [7:0] x;
// logic [3:0] y;
// logic [3:0] z;
// logic [1:0] a;
// logic [1:0] b;
// logic GT;
// output logic LT;
// output logic EQ;
// magcompare2c mag39(y[0], z[0], x[1:0], w[1:0]);
// magcompare2c mag3A(y[1], z[1], x[3:2], w[3:2]);
// magcompare2c mag3B(y[2], z[2], x[5:4], w[5:4]);
// magcompare2c mag3C(y[3], z[3], x[7:6], w[7:6]);
// magcompare2c mag3D(a[0], b[0], z[1:0], y[1:0]);
// magcompare2c mag3E(a[1], b[1], z[3:2], y[3:2]);
// magcompare2c mag3F(LT, GT, b[1:0], a[1:0]);
// assign EQ = ~(LT | GT);
// endmodule // magcompare64b
// // This module takes 64-bits inputs A and B, two magnitude comparison
// // flags LT_mag and EQ_mag, and a 2-bit signal Sel that indicates the type of
// // operands being compared as indicated below.
// // Sel Description
// // 00 double precision numbers
// // 01 single precision numbers
// // 10 half precision numbers
// // 11 bfloat precision numbers
// //
// // The comparator produces a 2-bit signal fcc, which
// // indicates the result of the comparison as follows:
// // fcc decscription
// // 00 A = B
// // 01 A < B
// // 10 A > B
// // 11 A and B are unordered (i.e., A or B is NaN)
// // It also produces a invalid operation flag, which is one
// // if either of the input operands is a signaling NaN.
// module exception_cmp_2 (
// input logic [63:0] A,
// input logic [63:0] B,
// input logic FmtM,
// input logic LT_mag,
// input logic EQ_mag,
// input logic [1:0] Sel,
// input logic [3:0] FOpCtrlM,
// output logic invalid,
// output logic [1:0] fcc,
// output logic [63:0] FCmpResultM,
// input logic Azero,
// input logic Bzero,
// input logic ANaN,
// input logic BNaN);
// logic dp;
// logic sp;
// logic hp;
// logic ASNaN;
// logic BSNaN;
// logic UO;
// logic GT;
// logic LT;
// logic EQ;
// logic [62:0] sixtythreezeros = 63'h0;
// assign dp = !Sel[1]&!Sel[0];
// assign sp = !Sel[1]&Sel[0];
// assign hp = Sel[1]&!Sel[0];
// // Values are unordered if ((A is NaN) OR (B is NaN)) AND (a floating
// // point comparison is being performed.
// assign UO = (ANaN | BNaN);
// // Test if A or B is a signaling NaN.
// assign ASNaN = ANaN & (sp&~A[53] | dp&~A[50] | hp&~A[56]);
// assign BSNaN = BNaN & (sp&~B[53] | dp&~B[50] | hp&~B[56]);
// // If either A or B is a signaling NaN the "Invalid Operation"
// // exception flag is set to one; otherwise it is zero.
// assign invalid = (ASNaN | BSNaN);
// // A and B are equal if (their magnitudes are equal) AND ((their signs are
// // equal) or (their magnitudes are zero AND they are floating point
// // numbers)). Also, A and B are not equal if they are unordered.
// assign EQ = (EQ_mag | (Azero&Bzero)) & (~UO);
// // A is less than B if (A is negative and B is posiive) OR
// // (A and B are positive and the magnitude of A is less than
// // the magnitude of B) or (A and B are negative integers and
// // the magnitude of A is less than the magnitude of B) or
// // (A and B are negative floating point numbers and
// // the magnitude of A is greater than the magnitude of B).
// // Also, A is not less than B if A and B are equal or unordered.
// assign LT = ((~LT_mag & A[63] & B[63]) |
// (LT_mag & ~(A[63] & B[63])))&~EQ&~UO;
// // A is greater than B when LT, EQ, and UO are are false.
// assign GT = ~(LT | EQ | UO);
// // Note: it may be possible to optimize the setting of fcc
// // a little more, but it is probably not worth the effort.
// // Set the bits of fcc based on LT, GT, EQ, and UO
// assign fcc[0] = LT | UO;
// assign fcc[1] = GT | UO;
// always_comb begin
// case (FOpCtrlM[2:0])
// 3'b111: FCmpResultM = LT ? A : B;//min
// 3'b101: FCmpResultM = GT ? A : B;//max
// 3'b010: FCmpResultM = FmtM ? {63'b0, EQ} : {31'b0, EQ, 32'b0};//equal
// 3'b001: FCmpResultM = FmtM ? {63'b0, LT} : {31'b0, LT, 32'b0};//less than
// 3'b011: FCmpResultM = FmtM ? {63'b0, LT|EQ} : {31'b0, LT|EQ, 32'b0};//less than or equal
// default: FCmpResultM = 64'b0;
// endcase
// end
// endmodule // exception_cmp

View File

@ -1,515 +0,0 @@
`include "wally-config.vh"
// `include "../../config/rv64icfd/wally-config.vh" //debug
module freg1adr (
input logic FmtW,
input logic reset,
input logic clear,
input logic clk,
input logic [4:0] rd,
input logic write,
input logic [4:0] adr1,
input logic [`XLEN-1:0] writeData,
output logic [`XLEN-1:0] readData);
//note - not word aligning based on precision of
//operation (FmtW)
//reg number should remain static, but it doesn't hurt
//to parameterize
parameter numRegs = 32;
//intermediary signals - useful for debugging
//and easy instatiation of generated modules
logic [`XLEN-1:0] [numRegs-1:0] regInput;
logic [`XLEN-1:0] [numRegs-1:0] regOutput;
//generate fp registers themselves
genvar i;
generate
for (i = 0; i < numRegs; i = i + 1) begin:register
floprc #(`XLEN) freg[i](.clk(clk), .reset(reset), .clear(clear), .d(regInput[i][`XLEN-1:0]), .q(regOutput[i][`XLEN-1:0]));
end
endgenerate
//this could be done with:
//
//assign readData = regOutput[adr1];
//
//but always_comb allows for finer control
//address decoder
//only 1 for this fp register set
//used with fpsign
//defaults to outputting zeroes
always_comb begin
case(adr1)
5'b00000 : readData = regOutput[0];
5'b00001 : readData = regOutput[1];
5'b00010 : readData = regOutput[2];
5'b00011 : readData = regOutput[3];
5'b00100 : readData = regOutput[4];
5'b00101 : readData = regOutput[5];
5'b00110 : readData = regOutput[6];
5'b00111 : readData = regOutput[7];
5'b01000 : readData = regOutput[8];
5'b01001 : readData = regOutput[9];
5'b01010 : readData = regOutput[10];
5'b01011 : readData = regOutput[11];
5'b01100 : readData = regOutput[12];
5'b01101 : readData = regOutput[13];
5'b01110 : readData = regOutput[14];
5'b01111 : readData = regOutput[15];
5'b10000 : readData = regOutput[16];
5'b10001 : readData = regOutput[17];
5'b10010 : readData = regOutput[18];
5'b10011 : readData = regOutput[19];
5'b10100 : readData = regOutput[20];
5'b10101 : readData = regOutput[21];
5'b10110 : readData = regOutput[22];
5'b10111 : readData = regOutput[23];
5'b11000 : readData = regOutput[24];
5'b11001 : readData = regOutput[25];
5'b11010 : readData = regOutput[26];
5'b11011 : readData = regOutput[27];
5'b11100 : readData = regOutput[28];
5'b11101 : readData = regOutput[29];
5'b11110 : readData = regOutput[30];
5'b11111 : readData = regOutput[31];
default : readData = `XLEN'h0;
endcase
end
//destination register decoder
//only change input values on write
//defaults to undefined with invalid address
//
//note - this is an intermediary signal, so
//this is not asynch assignment. FF in flopr
//will not update data until clk pulse
always_comb begin
if(write) begin
case(rd)
5'b00000 : regInput[0] = writeData;
5'b00001 : regInput[1] = writeData;
5'b00010 : regInput[2] = writeData;
5'b00011 : regInput[3] = writeData;
5'b00100 : regInput[4] = writeData;
5'b00101 : regInput[5] = writeData;
5'b00110 : regInput[6] = writeData;
5'b00111 : regInput[7] = writeData;
5'b01000 : regInput[8] = writeData;
5'b01000 : regInput[9] = writeData;
5'b01001 : regInput[10] = writeData;
5'b01010 : regInput[11] = writeData;
5'b01111 : regInput[12] = writeData;
5'b01101 : regInput[13] = writeData;
5'b01110 : regInput[14] = writeData;
5'b01111 : regInput[15] = writeData;
5'b10000 : regInput[16] = writeData;
5'b10001 : regInput[17] = writeData;
5'b10010 : regInput[18] = writeData;
5'b10011 : regInput[19] = writeData;
5'b10100 : regInput[20] = writeData;
5'b10101 : regInput[21] = writeData;
5'b10110 : regInput[22] = writeData;
5'b10111 : regInput[23] = writeData;
5'b11000 : regInput[24] = writeData;
5'b11000 : regInput[25] = writeData;
5'b11001 : regInput[26] = writeData;
5'b11010 : regInput[27] = writeData;
5'b11111 : regInput[28] = writeData;
5'b11101 : regInput[29] = writeData;
5'b11110 : regInput[30] = writeData;
5'b11111 : regInput[31] = writeData;
default : regInput[0] = `XLEN'hx;
endcase
end
end
endmodule
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//********
//formatting separation
//********
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
module freg2adr (
input logic FmtW,
input logic reset,
input logic clear,
input logic clk,
input logic [4:0] rd,
input logic write,
input logic [4:0] adr1,
input logic [4:0] adr2,
input logic [`XLEN-1:0] writeData,
output logic [`XLEN-1:0] readData1,
output logic [`XLEN-1:0] readData2);
//note - not word aligning based on precision of
//operation (FmtW)
//reg number should remain static, but it doesn't hurt
//to parameterize
parameter numRegs = 32;
//intermediary signals - useful for debugging
//and easy instatiation of generated modules
logic [`XLEN-1:0] [numRegs-1:0] regInput;
logic [`XLEN-1:0] [numRegs-1:0] regOutput;
//generate fp registers themselves
genvar i;
generate
for (i = 0; i < numRegs; i = i + 1) begin:register
floprc #(`XLEN) freg[i](.clk(clk), .reset(reset), .clear(clear), .d(regInput[i][`XLEN-1:0]), .q(regOutput[i][`XLEN-1:0]));
end
endgenerate
//address decoder
//2 are used for this fp register set
//used with fpadd/cvt, fpdiv/sqrt, and fpcmp
//defaults to outputting zeroes
always_comb begin
//adderss 1 decoder
case(adr1)
5'b00000 : readData1 = regOutput[0];
5'b00001 : readData1 = regOutput[1];
5'b00010 : readData1 = regOutput[2];
5'b00011 : readData1 = regOutput[3];
5'b00100 : readData1 = regOutput[4];
5'b00101 : readData1 = regOutput[5];
5'b00110 : readData1 = regOutput[6];
5'b00111 : readData1 = regOutput[7];
5'b01000 : readData1 = regOutput[8];
5'b01001 : readData1 = regOutput[9];
5'b01010 : readData1 = regOutput[10];
5'b01011 : readData1 = regOutput[11];
5'b01100 : readData1 = regOutput[12];
5'b01101 : readData1 = regOutput[13];
5'b01110 : readData1 = regOutput[14];
5'b01111 : readData1 = regOutput[15];
5'b10000 : readData1 = regOutput[16];
5'b10001 : readData1 = regOutput[17];
5'b10010 : readData1 = regOutput[18];
5'b10011 : readData1 = regOutput[19];
5'b10100 : readData1 = regOutput[20];
5'b10101 : readData1 = regOutput[21];
5'b10110 : readData1 = regOutput[22];
5'b10111 : readData1 = regOutput[23];
5'b11000 : readData1 = regOutput[24];
5'b11001 : readData1 = regOutput[25];
5'b11010 : readData1 = regOutput[26];
5'b11011 : readData1 = regOutput[27];
5'b11100 : readData1 = regOutput[28];
5'b11101 : readData1 = regOutput[29];
5'b11110 : readData1 = regOutput[30];
5'b11111 : readData1 = regOutput[31];
default : readData1 = `XLEN'h0;
endcase
//address 2 decoder
case(adr2)
5'b00000 : readData2 = regOutput[0];
5'b00001 : readData2 = regOutput[1];
5'b00010 : readData2 = regOutput[2];
5'b00011 : readData2 = regOutput[3];
5'b00100 : readData2 = regOutput[4];
5'b00101 : readData2 = regOutput[5];
5'b00110 : readData2 = regOutput[6];
5'b00111 : readData2 = regOutput[7];
5'b01000 : readData2 = regOutput[8];
5'b01001 : readData2 = regOutput[9];
5'b01010 : readData2 = regOutput[10];
5'b01011 : readData2 = regOutput[11];
5'b01100 : readData2 = regOutput[12];
5'b01101 : readData2 = regOutput[13];
5'b01110 : readData2 = regOutput[14];
5'b01111 : readData2 = regOutput[15];
5'b10000 : readData2 = regOutput[16];
5'b10001 : readData2 = regOutput[17];
5'b10010 : readData2 = regOutput[18];
5'b10011 : readData2 = regOutput[19];
5'b10100 : readData2 = regOutput[20];
5'b10101 : readData2 = regOutput[21];
5'b10110 : readData2 = regOutput[22];
5'b10111 : readData2 = regOutput[23];
5'b11000 : readData2 = regOutput[24];
5'b11001 : readData2 = regOutput[25];
5'b11010 : readData2 = regOutput[26];
5'b11011 : readData2 = regOutput[27];
5'b11100 : readData2 = regOutput[28];
5'b11101 : readData2 = regOutput[29];
5'b11110 : readData2 = regOutput[30];
5'b11111 : readData2 = regOutput[31];
default : readData2 = `XLEN'h0;
endcase
end
//destination register decoder
//only change input values on write
//defaults to undefined with invalid address
//
//note - this is an intermediary signal, so
//this is not asynch assignment. FF in flopr
//will not update data until clk pulse
always_comb begin
if(write) begin
case(rd)
5'b00000 : regInput[0] = writeData;
5'b00001 : regInput[1] = writeData;
5'b00010 : regInput[2] = writeData;
5'b00011 : regInput[3] = writeData;
5'b00100 : regInput[4] = writeData;
5'b00101 : regInput[5] = writeData;
5'b00110 : regInput[6] = writeData;
5'b00111 : regInput[7] = writeData;
5'b01000 : regInput[8] = writeData;
5'b01000 : regInput[9] = writeData;
5'b01001 : regInput[10] = writeData;
5'b01010 : regInput[11] = writeData;
5'b01111 : regInput[12] = writeData;
5'b01101 : regInput[13] = writeData;
5'b01110 : regInput[14] = writeData;
5'b01111 : regInput[15] = writeData;
5'b10000 : regInput[16] = writeData;
5'b10001 : regInput[17] = writeData;
5'b10010 : regInput[18] = writeData;
5'b10011 : regInput[19] = writeData;
5'b10100 : regInput[20] = writeData;
5'b10101 : regInput[21] = writeData;
5'b10110 : regInput[22] = writeData;
5'b10111 : regInput[23] = writeData;
5'b11000 : regInput[24] = writeData;
5'b11000 : regInput[25] = writeData;
5'b11001 : regInput[26] = writeData;
5'b11010 : regInput[27] = writeData;
5'b11111 : regInput[28] = writeData;
5'b11101 : regInput[29] = writeData;
5'b11110 : regInput[30] = writeData;
5'b11111 : regInput[31] = writeData;
default : regInput[0] = `XLEN'hx;
endcase
end
end
endmodule
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//********
//formatting separation
//********
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
module freg3adr (
input logic FmtW,
input logic reset,
input logic clear,
input logic clk,
input logic [4:0] rd,
input logic write,
input logic [4:0] adr1,
input logic [4:0] adr2,
input logic [4:0] adr3,
input logic [`XLEN-1:0] writeData,
output logic [`XLEN-1:0] readData1,
output logic [`XLEN-1:0] readData2,
output logic [`XLEN-1:0] readData3);
//note - not word aligning based on precision of
//operation (FmtW)
//reg number should remain static, but it doesn't hurt
//to parameterize
parameter numRegs = 32;
//intermediary signals - useful for debugging
//and easy instatiation of generated modules
logic [numRegs-1:0] [`XLEN-1:0] regInput;
logic [numRegs-1:0] [`XLEN-1:0] regOutput;
//generate fp registers themselves
genvar i;
generate
for (i = 0; i < numRegs; i = i + 1) begin:register
floprc #(`XLEN) freg(.clk(clk), .reset(reset), .clear(clear), .d(regInput[i][`XLEN-1:0]), .q(regOutput[i][`XLEN-1:0]));
end
endgenerate
//address decoder
//3 are used for this fp register set
//used exclusively for fma
//defaults to outputting zeroes
always_comb begin
//adderss 1 decoder
case(adr1)
5'b00000 : readData1 = regOutput[0];
5'b00001 : readData1 = regOutput[1];
5'b00010 : readData1 = regOutput[2];
5'b00011 : readData1 = regOutput[3];
5'b00100 : readData1 = regOutput[4];
5'b00101 : readData1 = regOutput[5];
5'b00110 : readData1 = regOutput[6];
5'b00111 : readData1 = regOutput[7];
5'b01000 : readData1 = regOutput[8];
5'b01001 : readData1 = regOutput[9];
5'b01010 : readData1 = regOutput[10];
5'b01011 : readData1 = regOutput[11];
5'b01100 : readData1 = regOutput[12];
5'b01101 : readData1 = regOutput[13];
5'b01110 : readData1 = regOutput[14];
5'b01111 : readData1 = regOutput[15];
5'b10000 : readData1 = regOutput[16];
5'b10001 : readData1 = regOutput[17];
5'b10010 : readData1 = regOutput[18];
5'b10011 : readData1 = regOutput[19];
5'b10100 : readData1 = regOutput[20];
5'b10101 : readData1 = regOutput[21];
5'b10110 : readData1 = regOutput[22];
5'b10111 : readData1 = regOutput[23];
5'b11000 : readData1 = regOutput[24];
5'b11001 : readData1 = regOutput[25];
5'b11010 : readData1 = regOutput[26];
5'b11011 : readData1 = regOutput[27];
5'b11100 : readData1 = regOutput[28];
5'b11101 : readData1 = regOutput[29];
5'b11110 : readData1 = regOutput[30];
5'b11111 : readData1 = regOutput[31];
default : readData1 = `XLEN'h0;
endcase
//address 2 decoder
case(adr2)
5'b00000 : readData2 = regOutput[0];
5'b00001 : readData2 = regOutput[1];
5'b00010 : readData2 = regOutput[2];
5'b00011 : readData2 = regOutput[3];
5'b00100 : readData2 = regOutput[4];
5'b00101 : readData2 = regOutput[5];
5'b00110 : readData2 = regOutput[6];
5'b00111 : readData2 = regOutput[7];
5'b01000 : readData2 = regOutput[8];
5'b01001 : readData2 = regOutput[9];
5'b01010 : readData2 = regOutput[10];
5'b01011 : readData2 = regOutput[11];
5'b01100 : readData2 = regOutput[12];
5'b01101 : readData2 = regOutput[13];
5'b01110 : readData2 = regOutput[14];
5'b01111 : readData2 = regOutput[15];
5'b10000 : readData2 = regOutput[16];
5'b10001 : readData2 = regOutput[17];
5'b10010 : readData2 = regOutput[18];
5'b10011 : readData2 = regOutput[19];
5'b10100 : readData2 = regOutput[20];
5'b10101 : readData2 = regOutput[21];
5'b10110 : readData2 = regOutput[22];
5'b10111 : readData2 = regOutput[23];
5'b11000 : readData2 = regOutput[24];
5'b11001 : readData2 = regOutput[25];
5'b11010 : readData2 = regOutput[26];
5'b11011 : readData2 = regOutput[27];
5'b11100 : readData2 = regOutput[28];
5'b11101 : readData2 = regOutput[29];
5'b11110 : readData2 = regOutput[30];
5'b11111 : readData2 = regOutput[31];
default : readData2 = `XLEN'h0;
endcase
//address 3 decoder
case(adr3)
5'b00000 : readData3 = regOutput[0];
5'b00001 : readData3 = regOutput[1];
5'b00010 : readData3 = regOutput[2];
5'b00011 : readData3 = regOutput[3];
5'b00100 : readData3 = regOutput[4];
5'b00101 : readData3 = regOutput[5];
5'b00110 : readData3 = regOutput[6];
5'b00111 : readData3 = regOutput[7];
5'b01000 : readData3 = regOutput[8];
5'b01001 : readData3 = regOutput[9];
5'b01010 : readData3 = regOutput[10];
5'b01011 : readData3 = regOutput[11];
5'b01100 : readData3 = regOutput[12];
5'b01101 : readData3 = regOutput[13];
5'b01110 : readData3 = regOutput[14];
5'b01111 : readData3 = regOutput[15];
5'b10000 : readData3 = regOutput[16];
5'b10001 : readData3 = regOutput[17];
5'b10010 : readData3 = regOutput[18];
5'b10011 : readData3 = regOutput[19];
5'b10100 : readData3 = regOutput[20];
5'b10101 : readData3 = regOutput[21];
5'b10110 : readData3 = regOutput[22];
5'b10111 : readData3 = regOutput[23];
5'b11000 : readData3 = regOutput[24];
5'b11001 : readData3 = regOutput[25];
5'b11010 : readData3 = regOutput[26];
5'b11011 : readData3 = regOutput[27];
5'b11100 : readData3 = regOutput[28];
5'b11101 : readData3 = regOutput[29];
5'b11110 : readData3 = regOutput[30];
5'b11111 : readData3 = regOutput[31];
default : readData3 = `XLEN'h0;
endcase
end
//destination register decoder
//only change input values on write
//defaults to undefined with invalid address
//
//note - this is an intermediary signal, so
//this is not asynch assignment. FF in flopr
//will not update data until clk pulse
always_comb begin
if(write) begin
case(rd)
5'b00000 : regInput[0] = writeData;
5'b00001 : regInput[1] = writeData;
5'b00010 : regInput[2] = writeData;
5'b00011 : regInput[3] = writeData;
5'b00100 : regInput[4] = writeData;
5'b00101 : regInput[5] = writeData;
5'b00110 : regInput[6] = writeData;
5'b00111 : regInput[7] = writeData;
5'b01000 : regInput[8] = writeData;
5'b01001 : regInput[9] = writeData;
5'b01010 : regInput[10] = writeData;
5'b01011 : regInput[11] = writeData;
5'b01100 : regInput[12] = writeData;
5'b01101 : regInput[13] = writeData;
5'b01110 : regInput[14] = writeData;
5'b01111 : regInput[15] = writeData;
5'b10000 : regInput[16] = writeData;
5'b10001 : regInput[17] = writeData;
5'b10010 : regInput[18] = writeData;
5'b10011 : regInput[19] = writeData;
5'b10100 : regInput[20] = writeData;
5'b10101 : regInput[21] = writeData;
5'b10110 : regInput[22] = writeData;
5'b10111 : regInput[23] = writeData;
5'b11000 : regInput[24] = writeData;
5'b11001 : regInput[25] = writeData;
5'b11010 : regInput[26] = writeData;
5'b11011 : regInput[27] = writeData;
5'b11100 : regInput[28] = writeData;
5'b11101 : regInput[29] = writeData;
5'b11110 : regInput[30] = writeData;
5'b11111 : regInput[31] = writeData;
default : regInput[0] = `XLEN'hx;
endcase
end
end
endmodule

View File

@ -25,7 +25,7 @@
`include "wally-config.vh"
module FPregfile (
module fregfile (
input logic clk, reset,
input logic we4,
input logic [ 4:0] a1, a2, a3, a4,

View File

@ -1,13 +1,12 @@
//performs the fsgnj/fsgnjn/fsgnjx RISCV instructions
module fpusgn (SgnOpCodeE, SgnResultE, SgnFlagsE, SrcXE, SrcYE);
module fsgn (
input logic [63:0] SrcXE, SrcYE,
input logic [1:0] SgnOpCodeE,
output logic [63:0] SgnResE,
output logic SgnNVE);
input [63:0] SrcXE, SrcYE;
input [1:0] SgnOpCodeE;
output [63:0] SgnResultE;
output [4:0] SgnFlagsE;
wire AonesExp;
logic AonesExp;
//op code designation:
//
@ -16,8 +15,8 @@ module fpusgn (SgnOpCodeE, SgnResultE, SgnFlagsE, SrcXE, SrcYE);
//10 - fsgnjx - XOR sign values of SrcXE & SrcYE
//
assign SgnResultE[63] = SgnOpCodeE[1] ? (SrcXE[63] ^ SrcYE[63]) : (SrcYE[63] ^ SgnOpCodeE[0]);
assign SgnResultE[62:0] = SrcXE[62:0];
assign SgnResE[63] = SgnOpCodeE[1] ? (SrcXE[63] ^ SrcYE[63]) : (SrcYE[63] ^ SgnOpCodeE[0]);
assign SgnResE[62:0] = SrcXE[62:0];
//If the exponent is all ones, then the value is either Inf or NaN,
//both of which will produce a QNaN/SNaN value of some sort. This will
@ -26,6 +25,6 @@ module fpusgn (SgnOpCodeE, SgnResultE, SgnFlagsE, SrcXE, SrcYE);
//the only flag that can occur during this operation is invalid
//due to changing sign on already existing NaN
assign SgnFlagsE = {AonesExp & SgnResultE[63], 1'b0, 1'b0, 1'b0, 1'b0};
assign SgnNVE = AonesExp & SgnResE[63];
endmodule

View File

@ -1,89 +0,0 @@
// Brent-Kung Prefix Adder
module ling_bk13 (cout, sum, a, b, cin);
input [12:0] a, b;
input cin;
output [12:0] sum;
output cout;
wire [13:0] p,g;
wire [13:1] h,c;
// pre-computation
assign p={a|b,1'b1};
assign g={a&b, cin};
// prefix tree
ling_brent_kung prefix_tree(h, c, p[12:0], g[12:0]);
// post-computation
assign h[13]=g[13]|c[13];
assign sum=p[13:1]^h|g[13:1]&c;
assign cout=p[13]&h[13];
endmodule
module ling_brent_kung (h, c, p, g);
input [12:0] p;
input [13:0] g;
output [13:1] h;
output [13:1] c;
// parallel-prefix, Brent-Kung
// Stage 1: Generates H/I pairs that span 1 bits
rgry g_1_0 (H_1_0, {g[1],g[0]});
rblk b_3_2 (H_3_2, I_3_2, {g[3],g[2]}, {p[2],p[1]});
rblk b_5_4 (H_5_4, I_5_4, {g[5],g[4]}, {p[4],p[3]});
rblk b_7_6 (H_7_6, I_7_6, {g[7],g[6]}, {p[6],p[5]});
rblk b_9_8 (H_9_8, I_9_8, {g[9],g[8]}, {p[8],p[7]});
rblk b_11_10 (H_11_10, I_11_10, {g[11],g[10]}, {p[10],p[9]});
rblk b_13_12 (H_13_12, I_13_12, {g[13],g[12]}, {p[12],p[11]});
// Stage 2: Generates H/I pairs that span 2 bits
grey g_3_0 (H_3_0, {H_3_2,H_1_0}, I_3_2);
black b_7_4 (H_7_4, I_7_4, {H_7_6,H_5_4}, {I_7_6,I_5_4});
black b_11_8 (H_11_8, I_11_8, {H_11_10,H_9_8}, {I_11_10,I_9_8});
// Stage 3: Generates H/I pairs that span 4 bits
grey g_7_0 (H_7_0, {H_7_4,H_3_0}, I_7_4);
// Stage 4: Generates H/I pairs that span 8 bits
// Stage 5: Generates H/I pairs that span 4 bits
grey g_11_0 (H_11_0, {H_11_8,H_7_0}, I_11_8);
// Stage 6: Generates H/I pairs that span 2 bits
grey g_5_0 (H_5_0, {H_5_4,H_3_0}, I_5_4);
grey g_9_0 (H_9_0, {H_9_8,H_7_0}, I_9_8);
// Last grey cell stage
grey g_2_0 (H_2_0, {g[2],H_1_0}, p[1]);
grey g_4_0 (H_4_0, {g[4],H_3_0}, p[3]);
grey g_6_0 (H_6_0, {g[6],H_5_0}, p[5]);
grey g_8_0 (H_8_0, {g[8],H_7_0}, p[7]);
grey g_10_0 (H_10_0, {g[10],H_9_0}, p[9]);
grey g_12_0 (H_12_0, {g[12],H_11_0}, p[11]);
// Final Stage: Apply c_k+1=p_k&H_k_0
assign c[1]=g[0];
assign h[1]=H_1_0; assign c[2]=p[1]&H_1_0;
assign h[2]=H_2_0; assign c[3]=p[2]&H_2_0;
assign h[3]=H_3_0; assign c[4]=p[3]&H_3_0;
assign h[4]=H_4_0; assign c[5]=p[4]&H_4_0;
assign h[5]=H_5_0; assign c[6]=p[5]&H_5_0;
assign h[6]=H_6_0; assign c[7]=p[6]&H_6_0;
assign h[7]=H_7_0; assign c[8]=p[7]&H_7_0;
assign h[8]=H_8_0; assign c[9]=p[8]&H_8_0;
assign h[9]=H_9_0; assign c[10]=p[9]&H_9_0;
assign h[10]=H_10_0; assign c[11]=p[10]&H_10_0;
assign h[11]=H_11_0; assign c[12]=p[11]&H_11_0;
assign h[12]=H_12_0; assign c[13]=p[12]&H_12_0;
endmodule

View File

@ -168,3 +168,4 @@ module lz52 (ZP, ZV, B);
endmodule // lz52

0
wally-pipelined/src/fpu/mult_R4_64_64_cs.sv Executable file → Normal file
View File

View File

@ -115,11 +115,11 @@ module rounder (Result, DenormIO, Flags, rm, P, OvEn,
assign B_12_overflow = {8'h0, 3'b0, normal_overflow};
assign B_12_underflow = {8'h0, 3'b0, normal_underflow};
cla52 add1(Tmant, Cout, A[62:11], B);
cla52 add1(Tmant, Cout, A[62:11], B); //***adder
cla12 add1_exp(Texp_addone, Cout_overflow, Texp, B_12_overflow);
cla12 add1_exp(Texp_addone, Cout_overflow, Texp, B_12_overflow); //***adder
cla_sub12 sub1_exp(Texp_subone, Texp, B_12_underflow);
cla_sub12 sub1_exp(Texp_subone, Texp, B_12_underflow); //***adder
// Now that rounding is done, we compute the final exponent
// and test for special cases.

View File

@ -1,204 +0,0 @@
module sbtm_a4 (input logic [7:0] a,
output logic [13:0] y);
always_comb
case(a)
8'b01000000: y = 14'b10110100010111;
8'b01000001: y = 14'b10110010111111;
8'b01000010: y = 14'b10110001101000;
8'b01000011: y = 14'b10110000010011;
8'b01000100: y = 14'b10101111000001;
8'b01000101: y = 14'b10101101110000;
8'b01000110: y = 14'b10101100100001;
8'b01000111: y = 14'b10101011010011;
8'b01001000: y = 14'b10101010000111;
8'b01001001: y = 14'b10101000111101;
8'b01001010: y = 14'b10100111110100;
8'b01001011: y = 14'b10100110101101;
8'b01001100: y = 14'b10100101100111;
8'b01001101: y = 14'b10100100100010;
8'b01001110: y = 14'b10100011011111;
8'b01001111: y = 14'b10100010011101;
8'b01010000: y = 14'b10100001011100;
8'b01010001: y = 14'b10100000011100;
8'b01010010: y = 14'b10011111011110;
8'b01010011: y = 14'b10011110100001;
8'b01010100: y = 14'b10011101100100;
8'b01010101: y = 14'b10011100101001;
8'b01010110: y = 14'b10011011101111;
8'b01010111: y = 14'b10011010110110;
8'b01011000: y = 14'b10011001111110;
8'b01011001: y = 14'b10011001000110;
8'b01011010: y = 14'b10011000010000;
8'b01011011: y = 14'b10010111011011;
8'b01011100: y = 14'b10010110100110;
8'b01011101: y = 14'b10010101110011;
8'b01011110: y = 14'b10010101000000;
8'b01011111: y = 14'b10010100001110;
8'b01100000: y = 14'b10010011011100;
8'b01100001: y = 14'b10010010101100;
8'b01100010: y = 14'b10010001111100;
8'b01100011: y = 14'b10010001001101;
8'b01100100: y = 14'b10010000011111;
8'b01100101: y = 14'b10001111110001;
8'b01100110: y = 14'b10001111000100;
8'b01100111: y = 14'b10001110011000;
8'b01101000: y = 14'b10001101101100;
8'b01101001: y = 14'b10001101000001;
8'b01101010: y = 14'b10001100010110;
8'b01101011: y = 14'b10001011101100;
8'b01101100: y = 14'b10001011000011;
8'b01101101: y = 14'b10001010011010;
8'b01101110: y = 14'b10001001110010;
8'b01101111: y = 14'b10001001001010;
8'b01110000: y = 14'b10001000100011;
8'b01110001: y = 14'b10000111111101;
8'b01110010: y = 14'b10000111010111;
8'b01110011: y = 14'b10000110110001;
8'b01110100: y = 14'b10000110001100;
8'b01110101: y = 14'b10000101100111;
8'b01110110: y = 14'b10000101000011;
8'b01110111: y = 14'b10000100011111;
8'b01111000: y = 14'b10000011111100;
8'b01111001: y = 14'b10000011011001;
8'b01111010: y = 14'b10000010110111;
8'b01111011: y = 14'b10000010010101;
8'b01111100: y = 14'b10000001110011;
8'b01111101: y = 14'b10000001010010;
8'b01111110: y = 14'b10000000110001;
8'b01111111: y = 14'b10000000010001;
8'b10000000: y = 14'b01111111110001;
8'b10000001: y = 14'b01111111010001;
8'b10000010: y = 14'b01111110110010;
8'b10000011: y = 14'b01111110010011;
8'b10000100: y = 14'b01111101110101;
8'b10000101: y = 14'b01111101010110;
8'b10000110: y = 14'b01111100111001;
8'b10000111: y = 14'b01111100011011;
8'b10001000: y = 14'b01111011111110;
8'b10001001: y = 14'b01111011100001;
8'b10001010: y = 14'b01111011000100;
8'b10001011: y = 14'b01111010101000;
8'b10001100: y = 14'b01111010001100;
8'b10001101: y = 14'b01111001110000;
8'b10001110: y = 14'b01111001010101;
8'b10001111: y = 14'b01111000111010;
8'b10010000: y = 14'b01111000011111;
8'b10010001: y = 14'b01111000000100;
8'b10010010: y = 14'b01110111101010;
8'b10010011: y = 14'b01110111010000;
8'b10010100: y = 14'b01110110110110;
8'b10010101: y = 14'b01110110011101;
8'b10010110: y = 14'b01110110000100;
8'b10010111: y = 14'b01110101101011;
8'b10011000: y = 14'b01110101010010;
8'b10011001: y = 14'b01110100111001;
8'b10011010: y = 14'b01110100100001;
8'b10011011: y = 14'b01110100001001;
8'b10011100: y = 14'b01110011110001;
8'b10011101: y = 14'b01110011011010;
8'b10011110: y = 14'b01110011000010;
8'b10011111: y = 14'b01110010101011;
8'b10100000: y = 14'b01110010010100;
8'b10100001: y = 14'b01110001111110;
8'b10100010: y = 14'b01110001100111;
8'b10100011: y = 14'b01110001010001;
8'b10100100: y = 14'b01110000111011;
8'b10100101: y = 14'b01110000100101;
8'b10100110: y = 14'b01110000001111;
8'b10100111: y = 14'b01101111111010;
8'b10101000: y = 14'b01101111100101;
8'b10101001: y = 14'b01101111010000;
8'b10101010: y = 14'b01101110111011;
8'b10101011: y = 14'b01101110100110;
8'b10101100: y = 14'b01101110010001;
8'b10101101: y = 14'b01101101111101;
8'b10101110: y = 14'b01101101101001;
8'b10101111: y = 14'b01101101010101;
8'b10110000: y = 14'b01101101000001;
8'b10110001: y = 14'b01101100101101;
8'b10110010: y = 14'b01101100011010;
8'b10110011: y = 14'b01101100000110;
8'b10110100: y = 14'b01101011110011;
8'b10110101: y = 14'b01101011100000;
8'b10110110: y = 14'b01101011001101;
8'b10110111: y = 14'b01101010111010;
8'b10111000: y = 14'b01101010101000;
8'b10111001: y = 14'b01101010010101;
8'b10111010: y = 14'b01101010000011;
8'b10111011: y = 14'b01101001110001;
8'b10111100: y = 14'b01101001011111;
8'b10111101: y = 14'b01101001001101;
8'b10111110: y = 14'b01101000111100;
8'b10111111: y = 14'b01101000101010;
8'b11000000: y = 14'b01101000011001;
8'b11000001: y = 14'b01101000000111;
8'b11000010: y = 14'b01100111110110;
8'b11000011: y = 14'b01100111100101;
8'b11000100: y = 14'b01100111010100;
8'b11000101: y = 14'b01100111000011;
8'b11000110: y = 14'b01100110110011;
8'b11000111: y = 14'b01100110100010;
8'b11001000: y = 14'b01100110010010;
8'b11001001: y = 14'b01100110000010;
8'b11001010: y = 14'b01100101110010;
8'b11001011: y = 14'b01100101100001;
8'b11001100: y = 14'b01100101010010;
8'b11001101: y = 14'b01100101000010;
8'b11001110: y = 14'b01100100110010;
8'b11001111: y = 14'b01100100100011;
8'b11010000: y = 14'b01100100010011;
8'b11010001: y = 14'b01100100000100;
8'b11010010: y = 14'b01100011110101;
8'b11010011: y = 14'b01100011100101;
8'b11010100: y = 14'b01100011010110;
8'b11010101: y = 14'b01100011000111;
8'b11010110: y = 14'b01100010111001;
8'b11010111: y = 14'b01100010101010;
8'b11011000: y = 14'b01100010011011;
8'b11011001: y = 14'b01100010001101;
8'b11011010: y = 14'b01100001111110;
8'b11011011: y = 14'b01100001110000;
8'b11011100: y = 14'b01100001100010;
8'b11011101: y = 14'b01100001010100;
8'b11011110: y = 14'b01100001000110;
8'b11011111: y = 14'b01100000111000;
8'b11100000: y = 14'b01100000101010;
8'b11100001: y = 14'b01100000011100;
8'b11100010: y = 14'b01100000001111;
8'b11100011: y = 14'b01100000000001;
8'b11100100: y = 14'b01011111110100;
8'b11100101: y = 14'b01011111100110;
8'b11100110: y = 14'b01011111011001;
8'b11100111: y = 14'b01011111001100;
8'b11101000: y = 14'b01011110111111;
8'b11101001: y = 14'b01011110110010;
8'b11101010: y = 14'b01011110100101;
8'b11101011: y = 14'b01011110011000;
8'b11101100: y = 14'b01011110001011;
8'b11101101: y = 14'b01011101111110;
8'b11101110: y = 14'b01011101110010;
8'b11101111: y = 14'b01011101100101;
8'b11110000: y = 14'b01011101011001;
8'b11110001: y = 14'b01011101001100;
8'b11110010: y = 14'b01011101000000;
8'b11110011: y = 14'b01011100110100;
8'b11110100: y = 14'b01011100101000;
8'b11110101: y = 14'b01011100011100;
8'b11110110: y = 14'b01011100010000;
8'b11110111: y = 14'b01011100000100;
8'b11111000: y = 14'b01011011111000;
8'b11111001: y = 14'b01011011101100;
8'b11111010: y = 14'b01011011100000;
8'b11111011: y = 14'b01011011010101;
8'b11111100: y = 14'b01011011001001;
8'b11111101: y = 14'b01011010111101;
8'b11111110: y = 14'b01011010110010;
8'b11111111: y = 14'b01011010100111;
default: y = 14'bxxxxxxxxxxxxxx;
endcase // case (a)
endmodule // sbtm_a0

View File

@ -1,90 +0,0 @@
// Sklansky Prefix Adder
module sk14 (cout, sum, a, b, cin);
input [13:0] a, b;
input cin;
output [13:0] sum;
output cout;
wire [14:0] p,g;
wire [13:0] c;
// pre-computation
assign p={a^b,1'b0};
assign g={a&b, cin};
// prefix tree
sklansky prefix_tree(c, p[13:0], g[13:0]);
// post-computation
assign sum=p[14:1]^c;
assign cout=g[14]|(p[14]&c[13]);
endmodule
module sklansky (c, p, g);
input [14:0] p;
input [14:0] g;
output [14:1] c;
// parallel-prefix, Sklansky
// Stage 1: Generates G/P pairs that span 1 bits
grey b_1_0 (G_1_0, {g[1],g[0]}, p[1]);
black b_3_2 (G_3_2, P_3_2, {g[3],g[2]}, {p[3],p[2]});
black b_5_4 (G_5_4, P_5_4, {g[5],g[4]}, {p[5],p[4]});
black b_7_6 (G_7_6, P_7_6, {g[7],g[6]}, {p[7],p[6]});
black b_9_8 (G_9_8, P_9_8, {g[9],g[8]}, {p[9],p[8]});
black b_11_10 (G_11_10, P_11_10, {g[11],g[10]}, {p[11],p[10]});
black b_13_12 (G_13_12, P_13_12, {g[13],g[12]}, {p[13],p[12]});
// Stage 2: Generates G/P pairs that span 2 bits
grey g_2_0 (G_2_0, {g[2],G_1_0}, p[2]);
grey g_3_0 (G_3_0, {G_3_2,G_1_0}, P_3_2);
black b_6_4 (G_6_4, P_6_4, {g[6],G_5_4}, {p[6],P_5_4});
black b_7_4 (G_7_4, P_7_4, {G_7_6,G_5_4}, {P_7_6,P_5_4});
black b_10_8 (G_10_8, P_10_8, {g[10],G_9_8}, {p[10],P_9_8});
black b_11_8 (G_11_8, P_11_8, {G_11_10,G_9_8}, {P_11_10,P_9_8});
black b_14_12 (G_14_12, P_14_12, {g[14],G_13_12}, {p[14],P_13_12});
black b_15_12 (G_15_12, P_15_12, {G_15_14,G_13_12}, {P_15_14,P_13_12});
// Stage 3: Generates G/P pairs that span 4 bits
grey g_4_0 (G_4_0, {g[4],G_3_0}, p[4]);
grey g_5_0 (G_5_0, {G_5_4,G_3_0}, P_5_4);
grey g_6_0 (G_6_0, {G_6_4,G_3_0}, P_6_4);
grey g_7_0 (G_7_0, {G_7_4,G_3_0}, P_7_4);
black b_12_8 (G_12_8, P_12_8, {g[12],G_11_8}, {p[12],P_11_8});
black b_13_8 (G_13_8, P_13_8, {G_13_12,G_11_8}, {P_13_12,P_11_8});
black b_14_8 (G_14_8, P_14_8, {G_14_12,G_11_8}, {P_14_12,P_11_8});
black b_15_8 (G_15_8, P_15_8, {G_15_12,G_11_8}, {P_15_12,P_11_8});
// Stage 4: Generates G/P pairs that span 8 bits
grey g_8_0 (G_8_0, {g[8],G_7_0}, p[8]);
grey g_9_0 (G_9_0, {G_9_8,G_7_0}, P_9_8);
grey g_10_0 (G_10_0, {G_10_8,G_7_0}, P_10_8);
grey g_11_0 (G_11_0, {G_11_8,G_7_0}, P_11_8);
grey g_12_0 (G_12_0, {G_12_8,G_7_0}, P_12_8);
grey g_13_0 (G_13_0, {G_13_8,G_7_0}, P_13_8);
grey g_14_0 (G_14_0, {G_14_8,G_7_0}, P_14_8);
grey g_15_0 (G_15_0, {G_15_8,G_7_0}, P_15_8);
// Final Stage: Apply c_k+1=G_k_0
assign c[1]=g[0];
assign c[2]=G_1_0;
assign c[3]=G_2_0;
assign c[4]=G_3_0;
assign c[5]=G_4_0;
assign c[6]=G_5_0;
assign c[7]=G_6_0;
assign c[8]=G_7_0;
assign c[9]=G_8_0;
assign c[10]=G_9_0;
assign c[11]=G_10_0;
assign c[12]=G_11_0;
assign c[13]=G_12_0;
assign c[14]=G_13_0;
endmodule

View File

@ -1,195 +0,0 @@
///////////////////////////////////////////
// lzd.sv
//
// Written: James.Stine@okstate.edu 1 February 2021
// Modified:
//
// Purpose: Integer Divide instructions
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
`include "wally-config.vh"
/* verilator lint_off DECLFILENAME */
// Original idea came from V. G. Oklobdzija, "An algorithmic and novel
// design of a leading zero detector circuit: comparison with logic
// synthesis," in IEEE Transactions on Very Large Scale Integration
// (VLSI) Systems, vol. 2, no. 1, pp. 124-128, March 1994, doi:
// 10.1109/92.273153.
// Modified to be more hierarchical
module lz2 (P, V, B);
input logic [1:0] B;
output logic P;
output logic V;
assign V = B[0] | B[1];
assign P = B[0] & ~B[1];
endmodule // lz2
module lzd_hier #(parameter WIDTH=8)
(input logic [WIDTH-1:0] B,
output logic [$clog2(WIDTH)-1:0] ZP,
output logic ZV);
if (WIDTH == 128)
lz128 lzd127 (ZP, ZV, B);
else if (WIDTH == 64)
lz64 lzd64 (ZP, ZV, B);
else if (WIDTH == 32)
lz32 lzd32 (ZP, ZV, B);
else if (WIDTH == 16)
lz16 lzd16 (ZP, ZV, B);
else if (WIDTH == 8)
lz8 lzd8 (ZP, ZV, B);
else if (WIDTH == 4)
lz4 lzd4 (ZP, ZV, B);
endmodule // lzd_hier
module lz4 (ZP, ZV, B);
input logic [3:0] B;
logic ZPa;
logic ZPb;
logic ZVa;
logic ZVb;
output logic [1:0] ZP;
output logic ZV;
lz2 l1(ZPa, ZVa, B[1:0]);
lz2 l2(ZPb, ZVb, B[3:2]);
assign ZP[0:0] = ZVb ? ZPb : ZPa;
assign ZP[1] = ~ZVb;
assign ZV = ZVa | ZVb;
endmodule
module lz8 (ZP, ZV, B);
input logic [7:0] B;
logic [1:0] ZPa;
logic [1:0] ZPb;
logic ZVa;
logic ZVb;
output logic [2:0] ZP;
output logic ZV;
lz4 l1(ZPa, ZVa, B[3:0]);
lz4 l2(ZPb, ZVb, B[7:4]);
assign ZP[1:0] = ZVb ? ZPb : ZPa;
assign ZP[2] = ~ZVb;
assign ZV = ZVa | ZVb;
endmodule
module lz16 (ZP, ZV, B);
input logic [15:0] B;
logic [2:0] ZPa;
logic [2:0] ZPb;
logic ZVa;
logic ZVb;
output logic [3:0] ZP;
output logic ZV;
lz8 l1(ZPa, ZVa, B[7:0]);
lz8 l2(ZPb, ZVb, B[15:8]);
assign ZP[2:0] = ZVb ? ZPb : ZPa;
assign ZP[3] = ~ZVb;
assign ZV = ZVa | ZVb;
endmodule // lz16
module lz32 (ZP, ZV, B);
input logic [31:0] B;
logic [3:0] ZPa;
logic [3:0] ZPb;
logic ZVa;
logic ZVb;
output logic [4:0] ZP;
output logic ZV;
lz16 l1(ZPa, ZVa, B[15:0]);
lz16 l2(ZPb, ZVb, B[31:16]);
assign ZP[3:0] = ZVb ? ZPb : ZPa;
assign ZP[4] = ~ZVb;
assign ZV = ZVa | ZVb;
endmodule // lz32
module lz64 (ZP, ZV, B);
input logic [63:0] B;
logic [4:0] ZPa;
logic [4:0] ZPb;
logic ZVa;
logic ZVb;
output logic [5:0] ZP;
output logic ZV;
lz32 l1(ZPa, ZVa, B[31:0]);
lz32 l2(ZPb, ZVb, B[63:32]);
assign ZP[4:0] = ZVb ? ZPb : ZPa;
assign ZP[5] = ~ZVb;
assign ZV = ZVa | ZVb;
endmodule // lz64
module lz128 (ZP, ZV, B);
input logic [127:0] B;
logic [5:0] ZPa;
logic [5:0] ZPb;
logic ZVa;
logic ZVb;
output logic [6:0] ZP;
output logic ZV;
lz64 l1(ZPa, ZVa, B[64:0]);
lz64 l2(ZPb, ZVb, B[127:63]);
assign ZP[5:0] = ZVb ? ZPb : ZPa;
assign ZP[6] = ~ZVb;
assign ZV = ZVa | ZVb;
endmodule // lz128
/* verilator lint_on DECLFILENAME */

View File

@ -31,8 +31,8 @@ module hazard(
// Detect hazards
input logic BPPredWrongE, CSRWritePendingDEM, RetM, TrapM,
input logic LoadStallD, MulDivStallD, CSRRdStallD,
input logic DataStall, ICacheStallF,
input logic FPUStallD, FStallD,
input logic DCacheStall, ICacheStallF,
input logic FPUStallD, FStallD,
input logic DivBusyE,FDivBusyE,
// Stall & flush outputs
output logic StallF, StallD, StallE, StallM, StallW,
@ -55,16 +55,16 @@ module hazard(
// A stage must stall if the next stage is stalled
// If any stages are stalled, the first stage that isn't stalled must flush.
assign StallFCause = CSRWritePendingDEM && ~(TrapM || RetM || BPPredWrongE);
assign StallDCause = (LoadStallD || MulDivStallD || CSRRdStallD || FPUStallD || FStallD) && ~(TrapM || RetM || BPPredWrongE); // stall in decode if instruction is a load/mul/csr dependent on previous
assign StallECause = DivBusyE || FDivBusyE;
assign StallFCause = CSRWritePendingDEM && ~(TrapM | RetM | BPPredWrongE);
assign StallDCause = (LoadStallD | MulDivStallD | CSRRdStallD | FPUStallD | FStallD) & ~(TrapM | RetM | BPPredWrongE); // stall in decode if instruction is a load/mul/csr dependent on previous
assign StallECause = DivBusyE | FDivBusyE;
assign StallMCause = 0;
assign StallWCause = DataStall || ICacheStallF;
assign StallWCause = DCacheStall | ICacheStallF;
assign StallF = StallFCause || StallD;
assign StallD = StallDCause || StallE;
assign StallE = StallECause || StallM;
assign StallM = StallMCause || StallW;
assign StallF = StallFCause | StallD;
assign StallD = StallDCause | StallE;
assign StallE = StallECause | StallM;
assign StallM = StallMCause | StallW;
assign StallW = StallWCause;
//assign FirstUnstalledD = (~StallD & StallF & ~MulDivStallD);
@ -76,8 +76,8 @@ module hazard(
// Each stage flushes if the previous stage is the last one stalled (for cause) or the system has reason to flush
assign FlushF = BPPredWrongE;
assign FlushD = FirstUnstalledD || TrapM || RetM || BPPredWrongE;
assign FlushE = FirstUnstalledE || TrapM || RetM || BPPredWrongE;
assign FlushM = FirstUnstalledM || TrapM || RetM;
assign FlushW = FirstUnstalledW || TrapM;
assign FlushD = FirstUnstalledD | TrapM | RetM | BPPredWrongE;
assign FlushE = FirstUnstalledE | TrapM | RetM | BPPredWrongE;
assign FlushM = FirstUnstalledM | TrapM | RetM;
assign FlushW = FirstUnstalledW | TrapM;
endmodule

View File

@ -31,31 +31,34 @@ module ieu (
input logic [31:0] InstrD,
input logic IllegalIEUInstrFaultD,
output logic IllegalBaseInstrFaultD,
output logic RegWriteD,
output logic RegWriteD,
// Execute Stage interface
input logic [`XLEN-1:0] PCE,
input logic [`XLEN-1:0] PCLinkE,
input logic FWriteIntE,
input logic IllegalFPUInstrE,
input logic [`XLEN-1:0] FWriteDataE,
input logic FWriteIntE,
input logic IllegalFPUInstrE,
input logic [`XLEN-1:0] FWriteDataE,
output logic [`XLEN-1:0] PCTargetE,
output logic MulDivE, W64E,
output logic [2:0] Funct3E,
output logic [`XLEN-1:0] SrcAE, SrcBE,
input logic FWriteIntM,
// Memory stage interface
input logic DataMisalignedM,
input logic DataMisalignedM, // from LSU
input logic SquashSCW, // from LSU
output logic [1:0] MemRWM, // read/write control goes to LSU
output logic [1:0] AtomicM, // atomic control goes to LSU
output logic [`XLEN-1:0] MemAdrM, WriteDataM, // Address and write data to LSU
output logic [2:0] Funct3M, // size and signedness to LSU
output logic [`XLEN-1:0] SrcAM, // to privilege and fpu
input logic DataAccessFaultM,
input logic FWriteIntM,
input logic [`XLEN-1:0] FIntResM,
output logic [1:0] MemRWM,
output logic [1:0] AtomicM,
output logic [`XLEN-1:0] MemAdrM, WriteDataM,
output logic [`XLEN-1:0] SrcAM,
output logic [2:0] Funct3M,
input logic [`XLEN-1:0] FIntResM,
// Writeback stage
input logic [`XLEN-1:0] CSRReadValW, ReadDataW, MulDivResultW,
input logic FWriteIntW,
input logic SquashSCW,
input logic FWriteIntW,
// input logic [`XLEN-1:0] PCLinkW,
output logic InstrValidM, InstrValidW,
// hazards

View File

@ -27,64 +27,62 @@
`include "wally-config.vh"
module ifu (
input logic clk, reset,
input logic StallF, StallD, StallE, StallM, StallW,
input logic FlushF, FlushD, FlushE, FlushM, FlushW,
input logic clk, reset,
input logic StallF, StallD, StallE, StallM, StallW,
input logic FlushF, FlushD, FlushE, FlushM, FlushW,
// Fetch
input logic [`XLEN-1:0] InstrInF,
input logic InstrAckF,
output logic [`XLEN-1:0] PCF,
input logic [`XLEN-1:0] InstrInF,
input logic InstrAckF,
output logic [`XLEN-1:0] PCF,
output logic [`PA_BITS-1:0] InstrPAdrF,
output logic InstrReadF,
output logic ICacheStallF,
output logic InstrReadF,
output logic ICacheStallF,
// Decode
output logic [`XLEN-1:0] PCD,
output logic [`XLEN-1:0] PCD,
// Execute
output logic [`XLEN-1:0] PCLinkE,
input logic PCSrcE,
input logic [`XLEN-1:0] PCTargetE,
output logic [`XLEN-1:0] PCE,
output logic BPPredWrongE,
output logic [`XLEN-1:0] PCLinkE,
input logic PCSrcE,
input logic [`XLEN-1:0] PCTargetE,
output logic [`XLEN-1:0] PCE,
output logic BPPredWrongE,
// Mem
input logic RetM, TrapM,
input logic [`XLEN-1:0] PrivilegedNextPCM,
output logic [31:0] InstrD, InstrE, InstrM, InstrW,
output logic [`XLEN-1:0] PCM,
output logic [4:0] InstrClassM,
output logic BPPredDirWrongM,
output logic BTBPredPCWrongM,
output logic RASPredPCWrongM,
output logic BPPredClassNonCFIWrongM,
input logic RetM, TrapM,
input logic [`XLEN-1:0] PrivilegedNextPCM,
output logic [31:0] InstrD, InstrE, InstrM, InstrW,
output logic [`XLEN-1:0] PCM,
output logic [4:0] InstrClassM,
output logic BPPredDirWrongM,
output logic BTBPredPCWrongM,
output logic RASPredPCWrongM,
output logic BPPredClassNonCFIWrongM,
// Writeback
// output logic [`XLEN-1:0] PCLinkW,
// Faults
input logic IllegalBaseInstrFaultD,
output logic ITLBInstrPageFaultF,
output logic IllegalIEUInstrFaultD,
output logic InstrMisalignedFaultM,
output logic [`XLEN-1:0] InstrMisalignedAdrM,
input logic IllegalBaseInstrFaultD,
output logic ITLBInstrPageFaultF,
output logic IllegalIEUInstrFaultD,
output logic InstrMisalignedFaultM,
output logic [`XLEN-1:0] InstrMisalignedAdrM,
// mmu management
input logic [1:0] PrivilegeModeW,
input logic [`XLEN-1:0] PageTableEntryF,
input logic [1:0] PageTypeF,
input logic [`XLEN-1:0] SATP_REGW,
input logic STATUS_MXR, STATUS_SUM,
input logic ITLBWriteF, ITLBFlushF,
output logic ITLBMissF, ITLBHitF,
input logic [1:0] PrivilegeModeW,
input logic [`XLEN-1:0] PageTableEntryF,
input logic [1:0] PageTypeF,
input logic [`XLEN-1:0] SATP_REGW,
input logic STATUS_MXR, STATUS_SUM,
input logic ITLBWriteF, ITLBFlushF,
input logic WalkerInstrPageFaultF,
output logic ITLBMissF, ITLBHitF,
// pmp/pma (inside mmu) signals. *** temporarily from AHB bus but eventually replace with internal versions pre H
input logic [31:0] HADDR,
input logic [2:0] HSIZE, HBURST,
input logic HWRITE,
input logic ExecuteAccessF, //read, write, and atomic access are all set to zero because this mmu is onlt working with instructinos in the F stage.
input logic [63:0] PMPCFG01_REGW, PMPCFG23_REGW, // *** all of these come from the privileged unit, so they're gonna have to come over into ifu and dmem
input var logic [63:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES/8-1:0],
input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0],
output logic PMPInstrAccessFaultF, PMAInstrAccessFaultF,
output logic ISquashBusAccessF,
output logic [5:0] IHSELRegionsF
output logic PMPInstrAccessFaultF, PMAInstrAccessFaultF,
output logic ISquashBusAccessF
// output logic [5:0] IHSELRegionsF
);
@ -105,24 +103,38 @@ module ifu (
logic PMPLoadAccessFaultM, PMPStoreAccessFaultM; // *** these are just so that the mmu has somewhere to put these outputs, they're unused in this stage
// if you're allowed to parameterize outputs/ inputs existence, these are an easy delete.
logic [`PA_BITS-1:0] PCPFmmu;
logic [`PA_BITS-1:0] PCPFmmu, PCNextFPhys; // used to either truncate or expand PCPF and PCNextF into `PA_BITS width.
generate
if (`XLEN==32)
if (`XLEN==32) begin
assign PCPF = PCPFmmu[31:0];
else
assign PCNextFPhys = {{(`PA_BITS-`XLEN){1'b0}}, PCNextF};
end else begin
assign PCPF = {8'b0, PCPFmmu};
assign PCNextFPhys = PCNextF[`PA_BITS-1:0];
end
endgenerate
mmu #(.ENTRY_BITS(`ITLB_ENTRY_BITS), .IMMU(1)) itlb(.TLBAccessType(2'b10), .VirtualAddress(PCF), .Size(2'b10),
.PTEWriteVal(PageTableEntryF), .PageTypeWriteVal(PageTypeF),
.TLBWrite(ITLBWriteF), .TLBFlush(ITLBFlushF),
.PhysicalAddress(PCPFmmu), .TLBMiss(ITLBMissF),
.TLBHit(ITLBHitF), .TLBPageFault(ITLBInstrPageFaultF),
.AtomicAccessM(1'b0), .WriteAccessM(1'b0), .ReadAccessM(1'b0), // *** is this the right way force these bits constant? should they be someething else?
.SquashBusAccess(ISquashBusAccessF), .HSELRegions(IHSELRegionsF),
.*);
mmu #(.ENTRY_BITS(`ITLB_ENTRY_BITS), .IMMU(1))
itlb(.TLBAccessType(2'b10),
.VirtualAddress(PCF),
.Size(2'b10),
.PTEWriteVal(PageTableEntryF),
.PageTypeWriteVal(PageTypeF),
.TLBWrite(ITLBWriteF),
.TLBFlush(ITLBFlushF),
.PhysicalAddress(PCPFmmu),
.TLBMiss(ITLBMissF),
.TLBHit(ITLBHitF),
.TLBPageFault(ITLBInstrPageFaultF),
.ExecuteAccessF(1'b1), // ***dh -- this should eventually change to only true if an instruction fetch is occurring
.AtomicAccessM(1'b0),
.ReadAccessM(1'b0),
.WriteAccessM(1'b0),
.SquashBusAccess(ISquashBusAccessF),
// .HSELRegions(IHSELRegionsF),
.DisableTranslation(1'b0),
.*);
// branch predictor signals
@ -137,8 +149,9 @@ module ifu (
// assign InstrReadF = 1; // *** & ICacheMissF; add later
icache icache(.*,
.PCNextF(PCNextF[`PA_BITS-1:0]),
.PCPF(PCPFmmu));
.PCNextF(PCNextFPhys),
.PCPF(PCPFmmu),
.WalkerInstrPageFaultF(WalkerInstrPageFaultF));
flopenl #(32) AlignedInstrRawDFlop(clk, reset | reset_q, ~StallD, FlushD ? nop : FinalInstrRawF, nop, InstrRawD);

View File

@ -29,104 +29,142 @@
// *** Ross Thompson amo misalignment check?
module lsu (
input logic clk, reset,
input logic StallM, FlushM, StallW, FlushW,
//output logic DataStall,
input logic clk, reset,
input logic StallM, FlushM, StallW, FlushW,
output logic DataStall,
output logic HPTWReady,
// Memory Stage
input logic [1:0] MemRWM,
input logic [`XLEN-1:0] MemAdrM,
input logic [2:0] Funct3M,
//input logic [`XLEN-1:0] ReadDataW,
input logic [`XLEN-1:0] WriteDataM,
input logic [1:0] AtomicM,
input logic CommitM,
output logic [`PA_BITS-1:0] MemPAdrM,
output logic MemReadM, MemWriteM,
output logic [1:0] AtomicMaskedM,
output logic DataMisalignedM,
output logic CommittedM,
// Writeback Stage
input logic MemAckW,
input logic [`XLEN-1:0] ReadDataW,
output logic SquashSCW,
// connected to cpu (controls)
input logic [1:0] MemRWM,
input logic [2:0] Funct3M,
input logic [1:0] AtomicM,
output logic CommittedM,
output logic SquashSCW,
output logic DataMisalignedM,
// address and write data
input logic [`XLEN-1:0] MemAdrM,
input logic [`XLEN-1:0] WriteDataM,
output logic [`XLEN-1:0] ReadDataW,
// cpu privilege
input logic [1:0] PrivilegeModeW,
input logic DTLBFlushM,
// faults
input logic NonBusTrapM,
input logic DataAccessFaultM,
output logic DTLBLoadPageFaultM, DTLBStorePageFaultM,
output logic LoadMisalignedFaultM, LoadAccessFaultM,
output logic StoreMisalignedFaultM, StoreAccessFaultM,
input logic NonBusTrapM,
output logic DTLBLoadPageFaultM, DTLBStorePageFaultM,
output logic LoadMisalignedFaultM, LoadAccessFaultM,
// cpu hazard unit (trap)
output logic StoreMisalignedFaultM, StoreAccessFaultM,
// connect to ahb
input logic CommitM, // should this be generated in the abh interface?
output logic [`PA_BITS-1:0] MemPAdrM, // to ahb
output logic MemReadM, MemWriteM,
output logic [1:0] AtomicMaskedM,
input logic MemAckW, // from ahb
input logic [`XLEN-1:0] HRDATAW, // from ahb
output logic [2:0] Funct3MfromLSU,
output logic StallWfromLSU,
// mmu management
input logic [1:0] PrivilegeModeW,
input logic [`XLEN-1:0] PageTableEntryM,
input logic [1:0] PageTypeM,
input logic [`XLEN-1:0] SATP_REGW,
input logic STATUS_MXR, STATUS_SUM,
input logic DTLBWriteM, DTLBFlushM,
output logic DTLBMissM, DTLBHitM,
// page table walker
input logic [`XLEN-1:0] PageTableEntryM,
input logic [1:0] PageTypeM,
input logic [`XLEN-1:0] SATP_REGW, // from csr
input logic STATUS_MXR, STATUS_SUM, // from csr
input logic DTLBWriteM,
output logic DTLBMissM,
input logic DisableTranslation, // used to stop intermediate PTE physical addresses being saved to TLB.
output logic DTLBHitM, // not connected
// PMA/PMP (inside mmu) signals
input logic [31:0] HADDR, // *** replace all of these H inputs with physical adress once pma checkers have been edited to use paddr as well.
input logic [2:0] HSIZE, HBURST,
input logic HWRITE,
input logic AtomicAccessM, WriteAccessM, ReadAccessM, // execute access is hardwired to zero in this mmu because we're only working with data in the M stage.
input logic [63:0] PMPCFG01_REGW, PMPCFG23_REGW, // *** all of these come from the privileged unit, so thwyre gonna have to come over into ifu and dmem
input var logic [63:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES/8-1:0],
input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0], // *** this one especially has a large note attached to it in pmpchecker.
output logic PMALoadAccessFaultM, PMAStoreAccessFaultM,
output logic PMPLoadAccessFaultM, PMPStoreAccessFaultM, // *** can these be parameterized? we dont need the m stage ones for the immu and vice versa.
output logic DSquashBusAccessM,
output logic [5:0] DHSELRegionsM
output logic DSquashBusAccessM
// output logic [5:0] DHSELRegionsM
);
logic SquashSCM;
logic DTLBPageFaultM;
logic MemAccessM;
logic [1:0] CurrState, NextState;
logic preCommittedM;
localparam STATE_READY = 0;
localparam STATE_FETCH = 1;
localparam STATE_FETCH_AMO = 2;
localparam STATE_STALLED = 3;
typedef enum {STATE_READY,
STATE_FETCH,
STATE_FETCH_AMO_1,
STATE_FETCH_AMO_2,
STATE_STALLED,
STATE_PTW_READY,
STATE_PTW_FETCH,
STATE_PTW_DONE} statetype;
statetype CurrState, NextState;
logic PMPInstrAccessFaultF, PMAInstrAccessFaultF; // *** these are just so that the mmu has somewhere to put these outputs since they aren't used in dmem
// *** if you're allowed to parameterize outputs/ inputs existence, these are an easy delete.
mmu #(.ENTRY_BITS(`DTLB_ENTRY_BITS), .IMMU(0)) dmmu(.TLBAccessType(MemRWM), .VirtualAddress(MemAdrM), .Size(Funct3M[1:0]),
.PTEWriteVal(PageTableEntryM), .PageTypeWriteVal(PageTypeM),
.TLBWrite(DTLBWriteM), .TLBFlush(DTLBFlushM),
.PhysicalAddress(MemPAdrM), .TLBMiss(DTLBMissM),
.TLBHit(DTLBHitM), .TLBPageFault(DTLBPageFaultM),
.ExecuteAccessF(1'b0),
.SquashBusAccess(DSquashBusAccessM), .HSELRegions(DHSELRegionsM),
.*); // *** the pma/pmp instruction acess faults don't really matter here. is it possible to parameterize which outputs exist?
// for time being until we have a dcache the AHB Lite read bus HRDATAW will be connected to the
// CPU's read data input ReadDataW.
assign ReadDataW = HRDATAW;
mmu #(.ENTRY_BITS(`DTLB_ENTRY_BITS), .IMMU(0))
dmmu(.TLBAccessType(MemRWM),
.VirtualAddress(MemAdrM),
.Size(Funct3M[1:0]),
.PTEWriteVal(PageTableEntryM),
.PageTypeWriteVal(PageTypeM),
.TLBWrite(DTLBWriteM),
.TLBFlush(DTLBFlushM),
.PhysicalAddress(MemPAdrM),
.TLBMiss(DTLBMissM),
.TLBHit(DTLBHitM),
.TLBPageFault(DTLBPageFaultM),
.ExecuteAccessF(1'b0),
.AtomicAccessM(AtomicMaskedM[1]),
.WriteAccessM(MemRWM[0]),
.ReadAccessM(MemRWM[1]),
.SquashBusAccess(DSquashBusAccessM),
// .SelRegions(DHSELRegionsM),
.*); // *** the pma/pmp instruction acess faults don't really matter here. is it possible to parameterize which outputs exist?
// Specify which type of page fault is occurring
assign DTLBLoadPageFaultM = DTLBPageFaultM & MemRWM[1];
assign DTLBStorePageFaultM = DTLBPageFaultM & MemRWM[0];
// Determine if an Unaligned access is taking place
always_comb
case(Funct3M[1:0])
2'b00: DataMisalignedM = 0; // lb, sb, lbu
2'b01: DataMisalignedM = MemAdrM[0]; // lh, sh, lhu
2'b10: DataMisalignedM = MemAdrM[1] | MemAdrM[0]; // lw, sw, flw, fsw, lwu
2'b11: DataMisalignedM = |MemAdrM[2:0]; // ld, sd, fld, fsd
endcase
// Determine if an Unaligned access is taking place
always_comb
case(Funct3M[1:0])
2'b00: DataMisalignedM = 0; // lb, sb, lbu
2'b01: DataMisalignedM = MemAdrM[0]; // lh, sh, lhu
2'b10: DataMisalignedM = MemAdrM[1] | MemAdrM[0]; // lw, sw, flw, fsw, lwu
2'b11: DataMisalignedM = |MemAdrM[2:0]; // ld, sd, fld, fsd
endcase
// Squash unaligned data accesses and failed store conditionals
// *** this is also the place to squash if the cache is hit
// Changed DataMisalignedM to a larger combination of trap sources
// NonBusTrapM is anything that the bus doesn't contribute to producing
// By contrast, using TrapM results in circular logic errors
assign MemReadM = MemRWM[1] & ~NonBusTrapM & CurrState != STATE_STALLED;
assign MemWriteM = MemRWM[0] & ~NonBusTrapM && ~SquashSCM & CurrState != STATE_STALLED;
assign MemReadM = MemRWM[1] & ~NonBusTrapM & ~DTLBMissM & CurrState != STATE_STALLED;
assign MemWriteM = MemRWM[0] & ~NonBusTrapM & ~DTLBMissM & ~SquashSCM & CurrState != STATE_STALLED;
assign AtomicMaskedM = CurrState != STATE_STALLED ? AtomicM : 2'b00 ;
assign MemAccessM = |MemRWM;
assign MemAccessM = MemReadM | MemWriteM;
// Determine if M stage committed
// Reset whenever unstalled. Set when access successfully occurs
@ -135,9 +173,9 @@ module lsu (
// Determine if address is valid
assign LoadMisalignedFaultM = DataMisalignedM & MemRWM[1];
assign LoadAccessFaultM = DataAccessFaultM & MemRWM[1];
assign LoadAccessFaultM = MemRWM[1];
assign StoreMisalignedFaultM = DataMisalignedM & MemRWM[0];
assign StoreAccessFaultM = DataAccessFaultM & MemRWM[0];
assign StoreAccessFaultM = MemRWM[0];
// Handle atomic load reserved / store conditional
generate
@ -165,33 +203,111 @@ module lsu (
endgenerate
// Data stall
//assign DataStall = 0;
//assign DataStall = (NextState == STATE_FETCH) || (NextState == STATE_FETCH_AMO_1) || (NextState == STATE_FETCH_AMO_2);
assign HPTWReady = (CurrState == STATE_READY);
// Ross Thompson April 22, 2021
// for now we need to handle the issue where the data memory interface repeately
// requests data from memory rather than issuing a single request.
flopr #(2) stateReg(.clk(clk),
.reset(reset),
.d(NextState),
.q(CurrState));
flopenl #(.TYPE(statetype)) stateReg(.clk(clk),
.load(reset),
.en(1'b1),
.d(NextState),
.val(STATE_READY),
.q(CurrState));
always_comb begin
case (CurrState)
STATE_READY: if (MemRWM[1] & MemRWM[0]) NextState = STATE_FETCH_AMO; // *** should be some misalign check
else if (MemAccessM & ~DataMisalignedM) NextState = STATE_FETCH;
else NextState = STATE_READY;
STATE_FETCH_AMO: if (MemAckW) NextState = STATE_FETCH;
else NextState = STATE_FETCH_AMO;
STATE_FETCH: if (MemAckW & ~StallW) NextState = STATE_READY;
else if (MemAckW & StallW) NextState = STATE_STALLED;
else NextState = STATE_FETCH;
STATE_STALLED: if (~StallW) NextState = STATE_READY;
else NextState = STATE_STALLED;
default: NextState = STATE_READY;
endcase // case (CurrState)
end
STATE_READY:
if (DTLBMissM) begin
NextState = STATE_PTW_READY;
DataStall = 1'b1;
end else if (AtomicMaskedM[1]) begin
NextState = STATE_FETCH_AMO_1; // *** should be some misalign check
DataStall = 1'b1;
end else if((MemReadM & AtomicM[0]) | (MemWriteM & AtomicM[0])) begin
NextState = STATE_FETCH_AMO_2;
DataStall = 1'b1;
end else if (MemAccessM & ~DataMisalignedM) begin
NextState = STATE_FETCH;
DataStall = 1'b1;
end else begin
NextState = STATE_READY;
DataStall = 1'b0;
end
STATE_FETCH_AMO_1: begin
DataStall = 1'b1;
if (MemAckW) begin
NextState = STATE_FETCH_AMO_2;
end else begin
NextState = STATE_FETCH_AMO_1;
end
end
STATE_FETCH_AMO_2: begin
DataStall = 1'b1;
if (MemAckW & ~StallW) begin
NextState = STATE_FETCH_AMO_2;
end else if (MemAckW & StallW) begin
NextState = STATE_STALLED;
end else begin
NextState = STATE_FETCH_AMO_2;
end
end
STATE_FETCH: begin
DataStall = 1'b1;
if (MemAckW & ~StallW) begin
NextState = STATE_READY;
end else if (MemAckW & StallW) begin
NextState = STATE_STALLED;
end else begin
NextState = STATE_FETCH;
end
end
STATE_STALLED: begin
DataStall = 1'b0;
if (~StallW) begin
NextState = STATE_READY;
end else begin
NextState = STATE_STALLED;
end
end
STATE_PTW_READY: begin
DataStall = 1'b0;
if (DTLBWriteM) begin
NextState = STATE_READY;
end else if (MemReadM & ~DataMisalignedM) begin
NextState = STATE_PTW_FETCH;
end else begin
NextState = STATE_PTW_READY;
end
end
STATE_PTW_FETCH : begin
DataStall = 1'b1;
if (MemAckW & ~DTLBWriteM) begin
NextState = STATE_PTW_READY;
end else if (MemAckW & DTLBWriteM) begin
NextState = STATE_READY;
end else begin
NextState = STATE_PTW_FETCH;
end
end
STATE_PTW_DONE: begin
NextState = STATE_READY;
end
default: begin
DataStall = 1'b0;
NextState = STATE_READY;
end
endcase
end // always_comb
// *** for now just pass through size
assign Funct3MfromLSU = Funct3M;
assign StallWfromLSU = StallW;
endmodule

View File

@ -0,0 +1,178 @@
///////////////////////////////////////////
// lsuArb.sv
//
// Written: Ross THompson and Kip Macsai-Goren
// Modified: kmacsaigoren@hmc.edu June 23, 2021
//
// Purpose: LSU arbiter between the CPU's demand request for data memory and
// the page table walker
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
`include "wally-config.vh"
module lsuArb
(input logic clk, reset,
// from page table walker
input logic HPTWTranslate,
input logic HPTWRead,
input logic [`XLEN-1:0] HPTWPAdr,
// to page table walker.
output logic [`XLEN-1:0] HPTWReadPTE,
output logic HPTWReady,
output logic HPTWStall,
// from CPU
input logic [1:0] MemRWM,
input logic [2:0] Funct3M,
input logic [1:0] AtomicM,
input logic [`XLEN-1:0] MemAdrM,
input logic [`XLEN-1:0] WriteDataM,
input logic StallW,
// to CPU
output logic [`XLEN-1:0] ReadDataW,
output logic CommittedM,
output logic SquashSCW,
output logic DataMisalignedM,
output logic DCacheStall,
// to LSU
output logic DisableTranslation,
output logic [1:0] MemRWMtoLSU,
output logic [2:0] Funct3MtoLSU,
output logic [1:0] AtomicMtoLSU,
output logic [`XLEN-1:0] MemAdrMtoLSU,
output logic [`XLEN-1:0] WriteDataMtoLSU,
output logic StallWtoLSU,
// from LSU
input logic CommittedMfromLSU,
input logic SquashSCWfromLSU,
input logic DataMisalignedMfromLSU,
input logic [`XLEN-1:0] ReadDataWFromLSU,
input logic HPTWReadyfromLSU,
input logic DataStall
);
// HPTWTranslate is the request for memory by the page table walker. When
// this is high the page table walker gains priority over the CPU's data
// input. Note the ptw only makes a request after an instruction or data
// tlb miss. It is entirely possible the dcache is currently processing
// a data cache miss when an instruction tlb miss occurs. If an instruction
// in the E stage causes a d cache miss, the d cache will immediately start
// processing the request. Simultaneously the ITLB misses. By the time
// the TLB miss causes the page table walker to issue the first request
// to data memory the d cache is already busy. We can interlock by
// leveraging Stall as a d cache busy. We will need an FSM to handle this.
typedef enum{StateReady,
StatePTWPending,
StatePTWActive} statetype;
statetype CurrState, NextState;
logic SelPTW;
logic HPTWStallD;
flopenl #(.TYPE(statetype)) StateReg(.clk(clk),
.load(reset),
.en(1'b1),
.d(NextState),
.val(StateReady),
.q(CurrState));
always_comb begin
case(CurrState)
StateReady:
if (HPTWTranslate) NextState = StatePTWActive;
else NextState = StateReady;
StatePTWActive:
if (HPTWTranslate) NextState = StatePTWActive;
else NextState = StateReady;
default: NextState = StateReady;
endcase
end
/* -----\/----- EXCLUDED -----\/-----
always_comb begin
case(CurrState)
StateReady:
/-* -----\/----- EXCLUDED -----\/-----
if (HPTWTranslate & DataStall) NextState = StatePTWPending;
else
-----/\----- EXCLUDED -----/\----- *-/
if (HPTWTranslate) NextState = StatePTWActive;
else NextState = StateReady;
StatePTWPending:
if (HPTWTranslate & ~DataStall) NextState = StatePTWActive;
else if (HPTWTranslate & DataStall) NextState = StatePTWPending;
else NextState = StateReady;
StatePTWActive:
if (HPTWTranslate) NextState = StatePTWActive;
else NextState = StateReady;
default: NextState = StateReady;
endcase
end
-----/\----- EXCLUDED -----/\----- */
// multiplex the outputs to LSU
assign DisableTranslation = SelPTW; // change names between SelPTW would be confusing in DTLB.
assign SelPTW = (CurrState == StatePTWActive && HPTWTranslate) || (CurrState == StateReady && HPTWTranslate);
assign MemRWMtoLSU = SelPTW ? {HPTWRead, 1'b0} : MemRWM;
generate
if (`XLEN == 32) begin
assign Funct3MtoLSU = SelPTW ? 3'b010 : Funct3M;
end else begin
assign Funct3MtoLSU = SelPTW ? 3'b011 : Funct3M;
end
endgenerate
assign AtomicMtoLSU = SelPTW ? 2'b00 : AtomicM;
assign MemAdrMtoLSU = SelPTW ? HPTWPAdr : MemAdrM;
assign WriteDataMtoLSU = SelPTW ? `XLEN'b0 : WriteDataM;
assign StallWtoLSU = SelPTW ? 1'b0 : StallW;
// demux the inputs from LSU to walker or cpu's data port.
assign ReadDataW = SelPTW ? `XLEN'b0 : ReadDataWFromLSU; // probably can avoid this demux
assign HPTWReadPTE = SelPTW ? ReadDataWFromLSU : `XLEN'b0 ; // probably can avoid this demux
assign CommittedM = SelPTW ? 1'b0 : CommittedMfromLSU;
assign SquashSCW = SelPTW ? 1'b0 : SquashSCWfromLSU;
assign DataMisalignedM = SelPTW ? 1'b0 : DataMisalignedMfromLSU;
assign HPTWReady = HPTWReadyfromLSU;
// *** need to rename DcacheStall and Datastall.
// not clear at all. I think it should be LSUStall from the LSU,
// which is demuxed to HPTWStall and CPUDataStall? (not sure on this last one).
assign HPTWStall = SelPTW ? DataStall : 1'b1;
//assign HPTWStallD = SelPTW ? DataStall : 1'b1;
/* -----\/----- EXCLUDED -----\/-----
assign HPTWStallD = SelPTW ? DataStall : 1'b1;
flopr #(1) HPTWStallReg (.clk(clk),
.reset(reset),
.d(HPTWStallD),
.q(HPTWStall));
-----/\----- EXCLUDED -----/\----- */
assign DCacheStall = SelPTW ? 1'b1 : DataStall; // *** this is probably going to change.
endmodule

View File

@ -26,13 +26,13 @@
`include "wally-config.vh"
module adrdec (
input logic [31:0] HADDR,
input logic [31:0] Base, Range,
input logic Supported,
input logic AccessValid,
input logic [2:0] Size,
input logic [3:0] SizeMask,
output logic HSEL
input logic [`PA_BITS-1:0] PhysicalAddress,
input logic [`PA_BITS-1:0] Base, Range,
input logic Supported,
input logic AccessValid,
input logic [1:0] Size,
input logic [3:0] SizeMask,
output logic Sel
);
logic Match;
@ -41,12 +41,12 @@ module adrdec (
// determine if an address is in a range starting at the base
// for example, if Base = 0x04002000 and range = 0x00000FFF,
// then anything address between 0x04002000 and 0x04002FFF should match (HSEL=1)
assign Match = &((HADDR ~^ Base) | Range);
assign Match = &((PhysicalAddress ~^ Base) | Range);
// determine if legal size of access is being made (byte, halfword, word, doubleword)
assign SizeValid = SizeMask[Size[1:0]];
assign SizeValid = SizeMask[Size];
assign HSEL = Match && Supported && AccessValid && SizeValid;
assign Sel = Match && Supported && AccessValid && SizeValid;
endmodule

View File

@ -26,19 +26,20 @@
`include "wally-config.vh"
module adrdecs (
input logic [31:0] HADDR, // *** will need to use PAdr in mmu, stick with HADDR in uncore
input logic AccessRW, AccessRX, AccessRWX,
input logic [2:0] HSIZE,
output logic [5:0] HSELRegions
input logic [`PA_BITS-1:0] PhysicalAddress,
input logic AccessRW, AccessRX, AccessRWX,
input logic [1:0] Size,
output logic [5:0] SelRegions
);
// Determine which region of physical memory (if any) is being accessed
// *** eventually uncomment Access signals
adrdec boottimdec(HADDR, `BOOTTIM_BASE, `BOOTTIM_RANGE, `BOOTTIM_SUPPORTED, 1'b1/*AccessRX*/, HSIZE, 4'b1111, HSELRegions[5]);
adrdec timdec(HADDR, `TIM_BASE, `TIM_RANGE, `TIM_SUPPORTED, 1'b1/*AccessRWX*/, HSIZE, 4'b1111, HSELRegions[4]);
adrdec clintdec(HADDR, `CLINT_BASE, `CLINT_RANGE, `CLINT_SUPPORTED, AccessRW, HSIZE, 4'b1111, HSELRegions[3]);
adrdec gpiodec(HADDR, `GPIO_BASE, `GPIO_RANGE, `GPIO_SUPPORTED, AccessRW, HSIZE, 4'b0100, HSELRegions[2]);
adrdec uartdec(HADDR, `UART_BASE, `UART_RANGE, `UART_SUPPORTED, AccessRW, HSIZE, 4'b0001, HSELRegions[1]);
adrdec plicdec(HADDR, `PLIC_BASE, `PLIC_RANGE, `PLIC_SUPPORTED, AccessRW, HSIZE, 4'b0100, HSELRegions[0]);
adrdec boottimdec(PhysicalAddress, `BOOTTIM_BASE, `BOOTTIM_RANGE, `BOOTTIM_SUPPORTED, 1'b1/*AccessRX*/, Size, 4'b1111, SelRegions[5]);
adrdec timdec(PhysicalAddress, `TIM_BASE, `TIM_RANGE, `TIM_SUPPORTED, 1'b1/*AccessRWX*/, Size, 4'b1111, SelRegions[4]);
adrdec clintdec(PhysicalAddress, `CLINT_BASE, `CLINT_RANGE, `CLINT_SUPPORTED, AccessRW, Size, 4'b1111, SelRegions[3]);
adrdec gpiodec(PhysicalAddress, `GPIO_BASE, `GPIO_RANGE, `GPIO_SUPPORTED, AccessRW, Size, 4'b0100, SelRegions[2]);
adrdec uartdec(PhysicalAddress, `UART_BASE, `UART_RANGE, `UART_SUPPORTED, AccessRW, Size, 4'b0001, SelRegions[1]);
adrdec plicdec(PhysicalAddress, `PLIC_BASE, `PLIC_RANGE, `PLIC_SUPPORTED, AccessRW, Size, 4'b0100, SelRegions[0]);
endmodule

View File

@ -44,6 +44,7 @@ module mmu #(parameter ENTRY_BITS = 3,
// x1 - TLB is accessed for a write
// 11 - TLB is accessed for both read and write
input logic [1:0] TLBAccessType,
input logic DisableTranslation,
// Virtual address input
input logic [`XLEN-1:0] VirtualAddress,
@ -66,17 +67,14 @@ module mmu #(parameter ENTRY_BITS = 3,
output logic TLBPageFault,
// PMA checker signals
input logic [31:0] HADDR,
input logic [2:0] HSIZE, HBURST,
input logic HWRITE,
input logic AtomicAccessM, ExecuteAccessF, WriteAccessM, ReadAccessM,
input logic [63:0] PMPCFG01_REGW, PMPCFG23_REGW, // *** all of these come from the privileged unit, so thwyre gonna have to come over into ifu and dmem
input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0],
input var logic [63:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES/8-1:0],
input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0],
output logic SquashBusAccess, // *** send to privileged unit
output logic PMPInstrAccessFaultF, PMPLoadAccessFaultM, PMPStoreAccessFaultM,
output logic PMAInstrAccessFaultF, PMALoadAccessFaultM, PMAStoreAccessFaultM,
output logic [5:0] HSELRegions
output logic PMAInstrAccessFaultF, PMALoadAccessFaultM, PMAStoreAccessFaultM
// output logic [5:0] SelRegions
);
@ -96,4 +94,4 @@ module mmu #(parameter ENTRY_BITS = 3,
assign SquashBusAccess = PMASquashBusAccess || PMPSquashBusAccess;
endmodule
endmodule

View File

@ -30,61 +30,92 @@
`include "wally-config.vh"
/* ***
TO-DO:
- Implement faults on accessed/dirty behavior
*/
TO-DO:
- Implement faults on accessed/dirty behavior
*/
module pagetablewalker (
// Control signals
input logic HCLK, HRESETn,
input logic [`XLEN-1:0] SATP_REGW,
module pagetablewalker
(
// Control signals
input logic clk, reset,
input logic [`XLEN-1:0] SATP_REGW,
// Signals from TLBs (addresses to translate)
input logic [`XLEN-1:0] PCF, MemAdrM,
input logic ITLBMissF, DTLBMissM,
input logic [1:0] MemRWM,
// Signals from TLBs (addresses to translate)
input logic [`XLEN-1:0] PCF, MemAdrM,
input logic ITLBMissF, DTLBMissM,
input logic [1:0] MemRWM,
// Outputs to the TLBs (PTEs to write)
output logic [`XLEN-1:0] PageTableEntryF, PageTableEntryM,
output logic [1:0] PageTypeF, PageTypeM,
output logic ITLBWriteF, DTLBWriteM,
// Outputs to the TLBs (PTEs to write)
output logic [`XLEN-1:0] PageTableEntryF, PageTableEntryM,
output logic [1:0] PageTypeF, PageTypeM,
output logic ITLBWriteF, DTLBWriteM,
// Signals from ahblite (PTEs from memory)
input logic [`XLEN-1:0] MMUReadPTE,
input logic MMUReady,
// Signals to ahblite (memory addresses to access)
output logic [`XLEN-1:0] MMUPAdr,
output logic MMUTranslate,
// Stall signal
output logic MMUStall,
// Faults
output logic WalkerInstrPageFaultF,
output logic WalkerLoadPageFaultM,
output logic WalkerStorePageFaultM
);
// *** modify to send to LSU // *** KMG: These are inputs/results from the ahblite whose addresses should have already been checked, so I don't think they need to be sent through the LSU
input logic [`XLEN-1:0] MMUReadPTE,
input logic MMUReady,
input logic HPTWStall,
// *** modify to send to LSU
output logic [`XLEN-1:0] MMUPAdr, // this probalby should be `PA_BITS wide
output logic MMUTranslate, // *** rename to HPTWReq
output logic HPTWRead,
// Stall signal
output logic MMUStall,
// Faults
output logic WalkerInstrPageFaultF,
output logic WalkerLoadPageFaultM,
output logic WalkerStorePageFaultM
);
// Internal signals
logic [`PPN_BITS-1:0] BasePageTablePPN;
logic [`XLEN-1:0] TranslationVAdr;
logic [`XLEN-1:0] SavedPTE, CurrentPTE;
logic [`PA_BITS-1:0] TranslationPAdr;
logic [`PPN_BITS-1:0] CurrentPPN;
// register TLBs translation miss requests
logic [`XLEN-1:0] TranslationVAdrQ;
logic ITLBMissFQ, DTLBMissMQ;
logic [`PPN_BITS-1:0] BasePageTablePPN;
logic [`XLEN-1:0] TranslationVAdr;
logic [`XLEN-1:0] SavedPTE, CurrentPTE;
logic [`PA_BITS-1:0] TranslationPAdr;
logic [`PPN_BITS-1:0] CurrentPPN;
logic [`SVMODE_BITS-1:0] SvMode;
logic MemStore;
logic MemStore;
// PTE Control Bits
logic Dirty, Accessed, Global, User,
Executable, Writable, Readable, Valid;
logic Dirty, Accessed, Global, User,
Executable, Writable, Readable, Valid;
// PTE descriptions
logic ValidPTE, AccessAlert, MegapageMisaligned, BadMegapage, LeafPTE;
logic ValidPTE, AccessAlert, MegapageMisaligned, BadMegapage, LeafPTE;
// Outputs of walker
logic [`XLEN-1:0] PageTableEntry;
logic [1:0] PageType;
logic [`XLEN-1:0] PageTableEntry;
logic [1:0] PageType;
logic StartWalk;
logic EndWalk;
typedef enum {LEVEL0_WDV,
LEVEL0,
LEVEL1_WDV,
LEVEL1,
LEVEL2_WDV,
LEVEL2,
LEVEL3_WDV,
LEVEL3,
LEAF,
IDLE,
FAULT} statetype;
statetype WalkerState, NextWalkerState;
logic PRegEn;
assign SvMode = SATP_REGW[`XLEN-1:`XLEN-`SVMODE_BITS];
assign BasePageTablePPN = SATP_REGW[`PPN_BITS-1:0];
@ -92,8 +123,41 @@ module pagetablewalker (
assign MemStore = MemRWM[0];
// Prefer data address translations over instruction address translations
assign TranslationVAdr = (DTLBMissM) ? MemAdrM : PCF;
assign MMUTranslate = DTLBMissM || ITLBMissF;
assign TranslationVAdr = (DTLBMissM) ? MemAdrM : PCF; // *** need to register TranslationVAdr
flopenr #(`XLEN)
TranslationVAdrReg(.clk(clk),
.reset(reset),
.en(StartWalk), // *** use enable later to save power
.d(TranslationVAdr),
.q(TranslationVAdrQ));
flopenrc #(1)
DTLBMissMReg(.clk(clk),
.reset(reset),
.en(StartWalk | EndWalk),
.clear(EndWalk),
.d(DTLBMissM),
.q(DTLBMissMQ));
flopenrc #(1)
ITLBMissMReg(.clk(clk),
.reset(reset),
.en(StartWalk | EndWalk),
.clear(EndWalk),
.d(ITLBMissF),
.q(ITLBMissFQ));
assign StartWalk = WalkerState == IDLE && (DTLBMissM | ITLBMissF);
assign EndWalk = WalkerState == LEAF ||
//(WalkerState == LEVEL0 && ValidPTE && LeafPTE && ~AccessAlert) ||
(WalkerState == LEVEL1 && ValidPTE && LeafPTE && ~AccessAlert) ||
(WalkerState == LEVEL2 && ValidPTE && LeafPTE && ~AccessAlert) ||
(WalkerState == LEVEL3 && ValidPTE && LeafPTE && ~AccessAlert) ||
(WalkerState == FAULT);
assign MMUTranslate = (DTLBMissMQ | ITLBMissFQ) & ~EndWalk;
//assign MMUTranslate = DTLBMissM | ITLBMissF;
// unswizzle PTE bits
assign {Dirty, Accessed, Global, User,
@ -102,7 +166,7 @@ module pagetablewalker (
// Assign PTE descriptors common across all XLEN values
assign LeafPTE = Executable | Writable | Readable;
assign ValidPTE = Valid && ~(Writable && ~Readable);
assign AccessAlert = ~Accessed || (MemStore && ~Dirty);
assign AccessAlert = ~Accessed | (MemStore & ~Dirty);
// Assign specific outputs to general outputs
assign PageTableEntryF = PageTableEntry;
@ -110,43 +174,113 @@ module pagetablewalker (
assign PageTypeF = PageType;
assign PageTypeM = PageType;
localparam LEVEL0 = 3'h0;
localparam LEVEL1 = 3'h1;
// space left for more levels
localparam LEAF = 3'h5;
localparam IDLE = 3'h6;
localparam FAULT = 3'h7;
logic [2:0] WalkerState, NextWalkerState;
generate
if (`XLEN == 32) begin
logic [9:0] VPN1, VPN0;
flopenl #(3) mmureg(HCLK, ~HRESETn, 1'b1, NextWalkerState, IDLE, WalkerState);
flopenl #(.TYPE(statetype)) mmureg(clk, reset, 1'b1, NextWalkerState, IDLE, WalkerState);
/* -----\/----- EXCLUDED -----\/-----
assign PRegEn = (WalkerState == LEVEL1_WDV || WalkerState == LEVEL0_WDV) && ~HPTWStall;
-----/\----- EXCLUDED -----/\----- */
// State transition logic
always_comb begin
PRegEn = 1'b0;
TranslationPAdr = '0;
HPTWRead = 1'b0;
MMUStall = 1'b1;
PageTableEntry = '0;
PageType = '0;
DTLBWriteM = '0;
ITLBWriteF = '0;
WalkerInstrPageFaultF = 1'b0;
WalkerLoadPageFaultM = 1'b0;
WalkerStorePageFaultM = 1'b0;
case (WalkerState)
IDLE: if (MMUTranslate) NextWalkerState = LEVEL1;
else NextWalkerState = IDLE;
LEVEL1: if (~MMUReady) NextWalkerState = LEVEL1;
// *** <FUTURE WORK> According to the architecture, we should
// fault upon finding a superpage that is misaligned or has 0
// access bit. The following commented line of code is
// supposed to perform that check. However, it is untested.
else if (ValidPTE && LeafPTE && ~BadMegapage) NextWalkerState = LEAF;
// else if (ValidPTE && LeafPTE) NextWalkerState = LEAF; // *** Once the above line is properly tested, delete this line.
else if (ValidPTE && ~LeafPTE) NextWalkerState = LEVEL0;
else NextWalkerState = FAULT;
LEVEL0: if (~MMUReady) NextWalkerState = LEVEL0;
else if (ValidPTE && LeafPTE && ~AccessAlert)
NextWalkerState = LEAF;
else NextWalkerState = FAULT;
LEAF: if (MMUTranslate) NextWalkerState = LEVEL1;
else NextWalkerState = IDLE;
FAULT: if (MMUTranslate) NextWalkerState = LEVEL1;
else NextWalkerState = IDLE;
IDLE: begin
if (MMUTranslate && SvMode == `SV32) begin // *** Added SvMode
NextWalkerState = LEVEL1_WDV;
TranslationPAdr = {BasePageTablePPN, VPN1, 2'b00};
HPTWRead = 1'b1;
end else begin
NextWalkerState = IDLE;
TranslationPAdr = '0;
MMUStall = 1'b0;
end
end
LEVEL1_WDV: begin
TranslationPAdr = {BasePageTablePPN, VPN1, 2'b00};
if (HPTWStall) begin
NextWalkerState = LEVEL1_WDV;
end else begin
NextWalkerState = LEVEL1;
PRegEn = 1'b1;
end
end
LEVEL1: begin
// *** <FUTURE WORK> According to the architecture, we should
// fault upon finding a superpage that is misaligned or has 0
// access bit. The following commented line of code is
// supposed to perform that check. However, it is untested.
if (ValidPTE && LeafPTE && ~BadMegapage) begin
NextWalkerState = LEAF;
PageTableEntry = CurrentPTE;
PageType = (WalkerState == LEVEL1) ? 2'b01 : 2'b00; // *** not sure about this mux?
DTLBWriteM = DTLBMissMQ;
ITLBWriteF = ~DTLBMissMQ; // Prefer data over instructions
TranslationPAdr = {2'b00, TranslationVAdrQ[31:0]};
end
// else if (ValidPTE && LeafPTE) NextWalkerState = LEAF; // *** Once the above line is properly tested, delete this line.
else if (ValidPTE && ~LeafPTE) begin
NextWalkerState = LEVEL0_WDV;
TranslationPAdr = {CurrentPPN, VPN0, 2'b00};
HPTWRead = 1'b1;
end else begin
NextWalkerState = FAULT;
end
end
LEVEL0_WDV: begin
TranslationPAdr = {CurrentPPN, VPN0, 2'b00};
if (HPTWStall) begin
NextWalkerState = LEVEL0_WDV;
end else begin
NextWalkerState = LEVEL0;
PRegEn = 1'b1;
end
end
LEVEL0: begin
if (ValidPTE & LeafPTE & ~AccessAlert) begin
NextWalkerState = LEAF;
PageTableEntry = CurrentPTE;
PageType = (WalkerState == LEVEL1) ? 2'b01 : 2'b00;
DTLBWriteM = DTLBMissMQ;
ITLBWriteF = ~DTLBMissMQ; // Prefer data over instructions
TranslationPAdr = {2'b00, TranslationVAdrQ[31:0]};
end else begin
NextWalkerState = FAULT;
end
end
LEAF: begin
NextWalkerState = IDLE;
MMUStall = 1'b0;
end
FAULT: begin
NextWalkerState = IDLE;
WalkerInstrPageFaultF = ~DTLBMissMQ;
WalkerLoadPageFaultM = DTLBMissMQ && ~MemStore;
WalkerStorePageFaultM = DTLBMissMQ && MemStore;
MMUStall = 1'b0;
end
// Default case should never happen, but is included for linter.
default: NextWalkerState = IDLE;
endcase
@ -156,56 +290,23 @@ localparam LEVEL0 = 3'h0;
assign MegapageMisaligned = |(CurrentPPN[9:0]);
assign BadMegapage = MegapageMisaligned || AccessAlert; // *** Implement better access/dirty scheme
assign VPN1 = TranslationVAdr[31:22];
assign VPN0 = TranslationVAdr[21:12];
assign VPN1 = TranslationVAdrQ[31:22];
assign VPN0 = TranslationVAdrQ[21:12];
// Assign combinational outputs
always_comb begin
// default values
TranslationPAdr = '0;
PageTableEntry = '0;
PageType ='0;
DTLBWriteM = '0;
ITLBWriteF = '0;
WalkerInstrPageFaultF = '0;
WalkerLoadPageFaultM = '0;
WalkerStorePageFaultM = '0;
MMUStall = '1;
//assign HPTWRead = (WalkerState == IDLE && MMUTranslate) ||
// WalkerState == LEVEL2 || WalkerState == LEVEL1;
case (NextWalkerState)
IDLE: begin
MMUStall = '0;
end
LEVEL1: begin
TranslationPAdr = {BasePageTablePPN, VPN1, 2'b00};
end
LEVEL0: begin
TranslationPAdr = {CurrentPPN, VPN0, 2'b00};
end
LEAF: begin
// Keep physical address alive to prevent HADDR dropping to 0
TranslationPAdr = {CurrentPPN, VPN0, 2'b00};
PageTableEntry = CurrentPTE;
PageType = (WalkerState == LEVEL1) ? 2'b01 : 2'b00;
DTLBWriteM = DTLBMissM;
ITLBWriteF = ~DTLBMissM; // Prefer data over instructions
end
FAULT: begin
TranslationPAdr = {CurrentPPN, VPN0, 2'b00};
WalkerInstrPageFaultF = ~DTLBMissM;
WalkerLoadPageFaultM = DTLBMissM && ~MemStore;
WalkerStorePageFaultM = DTLBMissM && MemStore;
MMUStall = '0; // Drop the stall early to enter trap handling code
end
default: begin
// nothing
end
endcase
end
// Capture page table entry from ahblite
flopenr #(32) ptereg(HCLK, ~HRESETn, MMUReady, MMUReadPTE, SavedPTE);
mux2 #(32) ptemux(SavedPTE, MMUReadPTE, MMUReady, CurrentPTE);
// Capture page table entry from data cache
// *** may need to delay reading this value until the next clock cycle.
// The clk to q latency of the SRAM in the data cache will be long.
// I cannot see directly using this value. This is no different than
// a load delay hazard. This will require rewriting the walker fsm.
// also need a new signal to save. Should be a mealy output of the fsm
// request followed by ~stall.
flopenr #(32) ptereg(clk, reset, PRegEn, MMUReadPTE, SavedPTE);
//mux2 #(32) ptemux(SavedPTE, MMUReadPTE, PRegEn, CurrentPTE);
assign CurrentPTE = SavedPTE;
assign CurrentPPN = CurrentPTE[`PPN_BITS+9:10];
// Assign outputs to ahblite
@ -214,64 +315,206 @@ localparam LEVEL0 = 3'h0;
assign MMUPAdr = TranslationPAdr[31:0];
end else begin
localparam LEVEL2 = 3'h2;
localparam LEVEL3 = 3'h3;
logic [8:0] VPN3, VPN2, VPN1, VPN0;
logic TerapageMisaligned, GigapageMisaligned, BadTerapage, BadGigapage;
logic TerapageMisaligned, GigapageMisaligned, BadTerapage, BadGigapage;
flopenl #(3) mmureg(HCLK, ~HRESETn, 1'b1, NextWalkerState, IDLE, WalkerState);
flopenl #(.TYPE(statetype)) mmureg(clk, reset, 1'b1, NextWalkerState, IDLE, WalkerState);
/* -----\/----- EXCLUDED -----\/-----
assign PRegEn = (WalkerState == LEVEL1_WDV || WalkerState == LEVEL0_WDV ||
WalkerState == LEVEL2_WDV || WalkerState == LEVEL3_WDV) && ~HPTWStall;
-----/\----- EXCLUDED -----/\----- */
//assign HPTWRead = (WalkerState == IDLE && MMUTranslate) || WalkerState == LEVEL3 ||
// WalkerState == LEVEL2 || WalkerState == LEVEL1;
always_comb begin
PRegEn = 1'b0;
TranslationPAdr = '0;
HPTWRead = 1'b0;
MMUStall = 1'b1;
PageTableEntry = '0;
PageType = '0;
DTLBWriteM = '0;
ITLBWriteF = '0;
WalkerInstrPageFaultF = 1'b0;
WalkerLoadPageFaultM = 1'b0;
WalkerStorePageFaultM = 1'b0;
case (WalkerState)
IDLE: if (MMUTranslate && SvMode == `SV48) NextWalkerState = LEVEL3;
else if (MMUTranslate && SvMode == `SV39) NextWalkerState = LEVEL2;
else NextWalkerState = IDLE;
IDLE: begin
if (MMUTranslate && SvMode == `SV48) begin
NextWalkerState = LEVEL3_WDV;
TranslationPAdr = {BasePageTablePPN, VPN3, 3'b000};
HPTWRead = 1'b1;
end else if (MMUTranslate && SvMode == `SV39) begin
NextWalkerState = LEVEL2_WDV;
TranslationPAdr = {BasePageTablePPN, VPN2, 3'b000};
HPTWRead = 1'b1;
end else begin
NextWalkerState = IDLE;
TranslationPAdr = '0;
MMUStall = 1'b0;
end
end
LEVEL3: if (~MMUReady) NextWalkerState = LEVEL3;
// *** <FUTURE WORK> According to the architecture, we should
// fault upon finding a superpage that is misaligned or has 0
// access bit. The following commented line of code is
// supposed to perform that check. However, it is untested.
else if (ValidPTE && LeafPTE && ~BadTerapage) NextWalkerState = LEAF;
// else if (ValidPTE && LeafPTE) NextWalkerState = LEAF; // *** Once the above line is properly tested, delete this line.
else if (ValidPTE && ~LeafPTE) NextWalkerState = LEVEL2;
else NextWalkerState = FAULT;
LEVEL3_WDV: begin
TranslationPAdr = {BasePageTablePPN, VPN3, 3'b000};
if (HPTWStall) begin
NextWalkerState = LEVEL3_WDV;
end else begin
NextWalkerState = LEVEL3;
PRegEn = 1'b1;
end
end
LEVEL3: begin
// *** <FUTURE WORK> According to the architecture, we should
// fault upon finding a superpage that is misaligned or has 0
// access bit. The following commented line of code is
// supposed to perform that check. However, it is untested.
if (ValidPTE && LeafPTE && ~BadTerapage) begin
NextWalkerState = LEAF;
PageTableEntry = CurrentPTE;
PageType = (WalkerState == LEVEL3) ? 2'b11 : // *** not sure about this mux?
((WalkerState == LEVEL2) ? 2'b10 :
((WalkerState == LEVEL1) ? 2'b01 : 2'b00));
DTLBWriteM = DTLBMissMQ;
ITLBWriteF = ~DTLBMissMQ; // Prefer data over instructions
TranslationPAdr = TranslationVAdrQ[`PA_BITS-1:0];
end
// else if (ValidPTE && LeafPTE) NextWalkerState = LEAF; // *** Once the above line is properly tested, delete this line.
else if (ValidPTE && ~LeafPTE) begin
NextWalkerState = LEVEL2_WDV;
TranslationPAdr = {(SvMode == `SV48) ? CurrentPPN : BasePageTablePPN, VPN2, 3'b000};
HPTWRead = 1'b1;
end else begin
NextWalkerState = FAULT;
end
LEVEL2: if (~MMUReady) NextWalkerState = LEVEL2;
// *** <FUTURE WORK> According to the architecture, we should
// fault upon finding a superpage that is misaligned or has 0
// access bit. The following commented line of code is
// supposed to perform that check. However, it is untested.
else if (ValidPTE && LeafPTE && ~BadGigapage) NextWalkerState = LEAF;
// else if (ValidPTE && LeafPTE) NextWalkerState = LEAF; // *** Once the above line is properly tested, delete this line.
else if (ValidPTE && ~LeafPTE) NextWalkerState = LEVEL1;
else NextWalkerState = FAULT;
end
LEVEL1: if (~MMUReady) NextWalkerState = LEVEL1;
// *** <FUTURE WORK> According to the architecture, we should
// fault upon finding a superpage that is misaligned or has 0
// access bit. The following commented line of code is
// supposed to perform that check. However, it is untested.
else if (ValidPTE && LeafPTE && ~BadMegapage) NextWalkerState = LEAF;
// else if (ValidPTE && LeafPTE) NextWalkerState = LEAF; // *** Once the above line is properly tested, delete this line.
else if (ValidPTE && ~LeafPTE) NextWalkerState = LEVEL0;
else NextWalkerState = FAULT;
LEVEL2_WDV: begin
TranslationPAdr = {(SvMode == `SV48) ? CurrentPPN : BasePageTablePPN, VPN2, 3'b000};
//HPTWRead = 1'b1;
if (HPTWStall) begin
NextWalkerState = LEVEL2_WDV;
end else begin
NextWalkerState = LEVEL2;
PRegEn = 1'b1;
end
end
LEVEL2: begin
// *** <FUTURE WORK> According to the architecture, we should
// fault upon finding a superpage that is misaligned or has 0
// access bit. The following commented line of code is
// supposed to perform that check. However, it is untested.
if (ValidPTE && LeafPTE && ~BadGigapage) begin
NextWalkerState = LEAF;
PageTableEntry = CurrentPTE;
PageType = (WalkerState == LEVEL3) ? 2'b11 :
((WalkerState == LEVEL2) ? 2'b10 :
((WalkerState == LEVEL1) ? 2'b01 : 2'b00));
DTLBWriteM = DTLBMissMQ;
ITLBWriteF = ~DTLBMissMQ; // Prefer data over instructions
TranslationPAdr = TranslationVAdrQ[`PA_BITS-1:0];
end
// else if (ValidPTE && LeafPTE) NextWalkerState = LEAF; // *** Once the above line is properly tested, delete this line.
else if (ValidPTE && ~LeafPTE) begin
NextWalkerState = LEVEL1_WDV;
TranslationPAdr = {CurrentPPN, VPN1, 3'b000};
HPTWRead = 1'b1;
end else begin
NextWalkerState = FAULT;
end
LEVEL0: if (~MMUReady) NextWalkerState = LEVEL0;
else if (ValidPTE && LeafPTE && ~AccessAlert) NextWalkerState = LEAF;
else NextWalkerState = FAULT;
LEAF: if (MMUTranslate && SvMode == `SV48) NextWalkerState = LEVEL3;
else if (MMUTranslate && SvMode == `SV39) NextWalkerState = LEVEL2;
else NextWalkerState = IDLE;
end
LEVEL1_WDV: begin
TranslationPAdr = {CurrentPPN, VPN1, 3'b000};
//HPTWRead = 1'b1;
if (HPTWStall) begin
NextWalkerState = LEVEL1_WDV;
end else begin
NextWalkerState = LEVEL1;
PRegEn = 1'b1;
end
end
LEVEL1: begin
// *** <FUTURE WORK> According to the architecture, we should
// fault upon finding a superpage that is misaligned or has 0
// access bit. The following commented line of code is
// supposed to perform that check. However, it is untested.
if (ValidPTE && LeafPTE && ~BadMegapage) begin
NextWalkerState = LEAF;
PageTableEntry = CurrentPTE;
PageType = (WalkerState == LEVEL3) ? 2'b11 :
((WalkerState == LEVEL2) ? 2'b10 :
((WalkerState == LEVEL1) ? 2'b01 : 2'b00));
DTLBWriteM = DTLBMissMQ;
ITLBWriteF = ~DTLBMissMQ; // Prefer data over instructions
TranslationPAdr = TranslationVAdrQ[`PA_BITS-1:0];
end
// else if (ValidPTE && LeafPTE) NextWalkerState = LEAF; // *** Once the above line is properly tested, delete this line.
else if (ValidPTE && ~LeafPTE) begin
NextWalkerState = LEVEL0_WDV;
TranslationPAdr = {CurrentPPN, VPN0, 3'b000};
HPTWRead = 1'b1;
end else begin
NextWalkerState = FAULT;
end
end
LEVEL0_WDV: begin
TranslationPAdr = {CurrentPPN, VPN0, 3'b000};
if (HPTWStall) begin
NextWalkerState = LEVEL0_WDV;
end else begin
NextWalkerState = LEVEL0;
PRegEn = 1'b1;
end
end
LEVEL0: begin
if (ValidPTE && LeafPTE && ~AccessAlert) begin
NextWalkerState = LEAF;
PageTableEntry = CurrentPTE;
PageType = (WalkerState == LEVEL3) ? 2'b11 :
((WalkerState == LEVEL2) ? 2'b10 :
((WalkerState == LEVEL1) ? 2'b01 : 2'b00));
DTLBWriteM = DTLBMissMQ;
ITLBWriteF = ~DTLBMissMQ; // Prefer data over instructions
TranslationPAdr = TranslationVAdrQ[`PA_BITS-1:0];
end else begin
NextWalkerState = FAULT;
end
end
LEAF: begin
NextWalkerState = IDLE;
MMUStall = 1'b0;
end
FAULT: begin
NextWalkerState = IDLE;
WalkerInstrPageFaultF = ~DTLBMissMQ;
WalkerLoadPageFaultM = DTLBMissMQ && ~MemStore;
WalkerStorePageFaultM = DTLBMissMQ && MemStore;
MMUStall = 1'b0;
end
// Default case should never happen
default: begin
NextWalkerState = IDLE;
end
FAULT: if (MMUTranslate && SvMode == `SV48) NextWalkerState = LEVEL3;
else if (MMUTranslate && SvMode == `SV39) NextWalkerState = LEVEL2;
else NextWalkerState = IDLE;
// Default case should never happen, but is included for linter.
default: NextWalkerState = IDLE;
endcase
end
@ -288,76 +531,22 @@ localparam LEVEL0 = 3'h0;
assign BadGigapage = GigapageMisaligned || AccessAlert; // *** Implement better access/dirty scheme
assign BadMegapage = MegapageMisaligned || AccessAlert; // *** Implement better access/dirty scheme
assign VPN3 = TranslationVAdr[47:39];
assign VPN2 = TranslationVAdr[38:30];
assign VPN1 = TranslationVAdr[29:21];
assign VPN0 = TranslationVAdr[20:12];
assign VPN3 = TranslationVAdrQ[47:39];
assign VPN2 = TranslationVAdrQ[38:30];
assign VPN1 = TranslationVAdrQ[29:21];
assign VPN0 = TranslationVAdrQ[20:12];
always_comb begin
// default values
TranslationPAdr = '0;
PageTableEntry = '0;
PageType = '0;
DTLBWriteM = '0;
ITLBWriteF = '0;
WalkerInstrPageFaultF = '0;
WalkerLoadPageFaultM = '0;
WalkerStorePageFaultM = '0;
// The MMU defaults to stalling the processor
MMUStall = '1;
case (NextWalkerState)
IDLE: begin
MMUStall = '0;
end
LEVEL3: begin
TranslationPAdr = {BasePageTablePPN, VPN3, 3'b000};
// *** this is a huge breaking point. if we're going through level3 every time, even when sv48 is off,
// what should translationPAdr be when level3 is just off?
end
LEVEL2: begin
TranslationPAdr = {(SvMode == `SV48) ? CurrentPPN : BasePageTablePPN, VPN2, 3'b000};
end
LEVEL1: begin
TranslationPAdr = {CurrentPPN, VPN1, 3'b000};
end
LEVEL0: begin
TranslationPAdr = {CurrentPPN, VPN0, 3'b000};
end
LEAF: begin
// Keep physical address alive to prevent HADDR dropping to 0
TranslationPAdr = {CurrentPPN, VPN0, 3'b000};
PageTableEntry = CurrentPTE;
PageType = (WalkerState == LEVEL3) ? 2'b11 :
((WalkerState == LEVEL2) ? 2'b10 :
((WalkerState == LEVEL1) ? 2'b01 : 2'b00));
DTLBWriteM = DTLBMissM;
ITLBWriteF = ~DTLBMissM; // Prefer data over instructions
end
FAULT: begin
// Keep physical address alive to prevent HADDR dropping to 0
TranslationPAdr = {CurrentPPN, VPN0, 3'b000};
WalkerInstrPageFaultF = ~DTLBMissM;
WalkerLoadPageFaultM = DTLBMissM && ~MemStore;
WalkerStorePageFaultM = DTLBMissM && MemStore;
MMUStall = '0; // Drop the stall early to enter trap handling code
end
default: begin
// nothing
end
endcase
end
// Capture page table entry from ahblite
flopenr #(`XLEN) ptereg(HCLK, ~HRESETn, MMUReady, MMUReadPTE, SavedPTE);
mux2 #(`XLEN) ptemux(SavedPTE, MMUReadPTE, MMUReady, CurrentPTE);
flopenr #(`XLEN) ptereg(clk, reset, PRegEn, MMUReadPTE, SavedPTE);
//mux2 #(`XLEN) ptemux(SavedPTE, MMUReadPTE, PRegEn, CurrentPTE);
assign CurrentPTE = SavedPTE;
assign CurrentPPN = CurrentPTE[`PPN_BITS+9:10];
// Assign outputs to ahblite
// *** Currently truncate address to 32 bits. This must be changed if
// we support larger physical address spaces
assign MMUPAdr = {{(`XLEN-32){1'b0}}, TranslationPAdr[31:0]};
assign MMUPAdr = {{(`XLEN-`PA_BITS){1'b0}}, TranslationPAdr[`PA_BITS-1:0]};
end
endgenerate

View File

@ -32,25 +32,20 @@ module pmachecker (
input logic [`PA_BITS-1:0] PhysicalAddress,
input logic [1:0] Size,
input logic [31:0] HADDR,
input logic [2:0] HSIZE,
// input logic [2:0] HBURST, // *** in AHBlite, HBURST is hardwired to zero for single bursts only allowed. consider removing from this module if unused.
input logic AtomicAccessM, ExecuteAccessF, WriteAccessM, ReadAccessM, // *** atomicaccessM is unused but might want to stay in for future use.
output logic Cacheable, Idempotent, AtomicAllowed,
output logic PMASquashBusAccess,
output logic [5:0] HSELRegions,
output logic PMAInstrAccessFaultF,
output logic PMALoadAccessFaultM,
output logic PMAStoreAccessFaultM
);
// logic BootTim, Tim, CLINT, GPIO, UART, PLIC;
logic PMAAccessFault;
logic AccessRW, AccessRWX, AccessRX;
logic [5:0] SelRegions;
// Determine what type of access is being made
assign AccessRW = ReadAccessM | WriteAccessM;
@ -58,17 +53,18 @@ module pmachecker (
assign AccessRX = ReadAccessM | ExecuteAccessF;
// Determine which region of physical memory (if any) is being accessed
adrdecs adrdecs(HADDR, AccessRW, AccessRX, AccessRWX, HSIZE, HSELRegions);
adrdecs adrdecs(PhysicalAddress, AccessRW, AccessRX, AccessRWX, Size, SelRegions);
// Only RAM memory regions are cacheable
assign Cacheable = HSELRegions[5] | HSELRegions[4];
assign Idempotent = HSELRegions[4];
assign AtomicAllowed = HSELRegions[4];
assign Cacheable = SelRegions[5] | SelRegions[4];
assign Idempotent = SelRegions[4];
assign AtomicAllowed = SelRegions[4];
// Detect access faults
assign PMAAccessFault = (~|HSELRegions) && AccessRWX;
assign PMAAccessFault = (~|SelRegions) & AccessRWX;
assign PMAInstrAccessFaultF = ExecuteAccessF && PMAAccessFault;
assign PMALoadAccessFaultM = ReadAccessM && PMAAccessFault;
assign PMAStoreAccessFaultM = WriteAccessM && PMAAccessFault;
assign PMASquashBusAccess = PMAAccessFault;
endmodule

View File

@ -30,35 +30,43 @@
`include "wally-config.vh"
module pmpadrdec (
input logic [31:0] HADDR, // *** replace with PAdr
input logic [1:0] AdrMode,
input logic [`XLEN-1:0] CurrentPMPAdr,
input logic AdrAtLeastPreviousPMP,
output logic AdrAtLeastCurrentPMP,
output logic Match
input logic [`PA_BITS-1:0] PhysicalAddress,
input logic [7:0] PMPCfg,
input logic [`XLEN-1:0] PMPAdr,
input logic PAgePMPAdrIn,
input logic NoLowerMatchIn,
output logic PAgePMPAdrOut,
output logic NoLowerMatchOut,
output logic Match, Active,
output logic L, X, W, R
);
localparam TOR = 2'b01;
localparam NA4 = 2'b10;
localparam NAPOT = 2'b11;
logic TORMatch, NAMatch;
logic AdrBelowCurrentPMP;
logic PAltPMPAdr;
logic FirstMatch;
logic [`PA_BITS-1:0] CurrentAdrFull;
logic [`PA_BITS-1:0] FakePhysAdr;
logic [1:0] AdrMode;
// ***replace this when the true physical address from MMU is available
assign FakePhysAdr = {{(`PA_BITS-32){1'b0}}, HADDR};
assign AdrMode = PMPCfg[4:3];
// The two lsb of the physical address don't matter for this checking.
// The following code includes them, but hardwires the PMP checker lsbs to 00
// and masks them later. Logic synthesis should optimize away these bottom bits.
// Top-of-range (TOR)
// Append two implicit trailing 0's to PMPAdr value
assign CurrentAdrFull = {CurrentPMPAdr[`PA_BITS-3:0], 2'b00};
assign AdrBelowCurrentPMP = /*HADDR */FakePhysAdr < CurrentAdrFull; // *** make sure unsigned comparison works correctly
assign AdrAtLeastCurrentPMP = ~AdrBelowCurrentPMP;
assign TORMatch = AdrAtLeastPreviousPMP && AdrBelowCurrentPMP;
assign CurrentAdrFull = {PMPAdr[`PA_BITS-3:0], 2'b00};
assign PAltPMPAdr = {1'b0, PhysicalAddress} < {1'b0, CurrentAdrFull}; // unsigned comparison
assign PAgePMPAdrOut = ~PAltPMPAdr;
assign TORMatch = PAgePMPAdrIn && PAltPMPAdr;
// Naturally aligned regions
// *** should be able to optimize away bottom 2 bits
// verilator lint_off UNOPTFLAT
logic [`PA_BITS-1:0] Mask;
@ -69,69 +77,22 @@ module pmpadrdec (
assign Mask[1:0] = 2'b11;
assign Mask[2] = (AdrMode == NAPOT); // mask has 0s in upper bis for NA4 region
for (i=3; i < `PA_BITS; i=i+1)
assign Mask[i] = Mask[i-1] & CurrentPMPAdr[i-3]; // NAPOT mask: 1's indicate bits to ignore
assign Mask[i] = Mask[i-1] & PMPAdr[i-3]; // NAPOT mask: 1's indicate bits to ignore
endgenerate
// verilator lint_on UNOPTFLAT
assign NAMatch = &((FakePhysAdr ~^ CurrentAdrFull) | Mask);
/* generate
if (`XLEN == 32 || `XLEN == 64) begin // ***redo for various sizes
// priority encoder to translate address to range
// *** We'd like to replace this with a better priority encoder
// *** We should not be truncating 64 bit physical addresses to 32 bits...
// *** there is an easy combinatinoal way to do this with a cascade of AND gates O(32) rather than O(32^2) dh
always_comb
if (AdrMode == NA4) Range = (2**2) - 1;
else casez (CurrentPMPAdr[31:0]) // NAPOT regions
32'b???????????????????????????????0: Range = (2**3) - 1;
32'b??????????????????????????????01: Range = (2**4) - 1;
32'b?????????????????????????????011: Range = (2**5) - 1;
32'b????????????????????????????0111: Range = (2**6) - 1;
32'b???????????????????????????01111: Range = (2**7) - 1;
32'b??????????????????????????011111: Range = (2**8) - 1;
32'b?????????????????????????0111111: Range = (2**9) - 1;
32'b????????????????????????01111111: Range = (2**10) - 1;
32'b???????????????????????011111111: Range = (2**11) - 1;
32'b??????????????????????0111111111: Range = (2**12) - 1;
32'b?????????????????????01111111111: Range = (2**13) - 1;
32'b????????????????????011111111111: Range = (2**14) - 1;
32'b???????????????????0111111111111: Range = (2**15) - 1;
32'b??????????????????01111111111111: Range = (2**16) - 1;
32'b?????????????????011111111111111: Range = (2**17) - 1;
32'b????????????????0111111111111111: Range = (2**18) - 1;
32'b???????????????01111111111111111: Range = (2**19) - 1;
32'b??????????????011111111111111111: Range = (2**20) - 1;
32'b?????????????0111111111111111111: Range = (2**21) - 1;
32'b????????????01111111111111111111: Range = (2**22) - 1;
32'b???????????011111111111111111111: Range = (2**23) - 1;
32'b??????????0111111111111111111111: Range = (2**24) - 1;
32'b?????????01111111111111111111111: Range = (2**25) - 1;
32'b????????011111111111111111111111: Range = (2**26) - 1;
32'b???????0111111111111111111111111: Range = (2**27) - 1;
32'b??????01111111111111111111111111: Range = (2**28) - 1;
32'b?????011111111111111111111111111: Range = (2**29) - 1;
32'b????0111111111111111111111111111: Range = (2**30) - 1;
32'b???01111111111111111111111111111: Range = (2**31) - 1;
32'b??011111111111111111111111111111: Range = (2**32) - 1;
32'b?0111111111111111111111111111111: Range = (2**33) - 1;
32'b01111111111111111111111111111111: Range = (2**34) - 1;
32'b11111111111111111111111111111111: Range = (2**35) - 1;
default: Range = '0;
endcase
end else begin
assign Range = '0;
end
endgenerate
// *** Range should not be truncated... but our physical address space is
// currently only 32 bits wide.
// with a bit of combining of range selection, this could be shared with NA4Match ***
assign NAMatch = &((HADDR ~^ CurrentAdrFull) | Range[31:0]);*/
assign NAMatch = &((PhysicalAddress ~^ CurrentAdrFull) | Mask);
assign Match = (AdrMode == TOR) ? TORMatch :
(AdrMode == NA4 || AdrMode == NAPOT) ? NAMatch :
0;
endmodule
assign FirstMatch = NoLowerMatchIn & Match;
assign NoLowerMatchOut = NoLowerMatchIn & ~Match;
assign L = PMPCfg[7] & FirstMatch;
assign X = PMPCfg[2] & FirstMatch;
assign W = PMPCfg[1] & FirstMatch;
assign R = PMPCfg[0] & FirstMatch;
assign Active = |PMPCfg[4:3];
endmodule

View File

@ -29,13 +29,8 @@
`include "wally-config.vh"
module pmpchecker (
// input logic clk, reset, //*** it seems like clk, reset is also not needed here?
input logic [31:0] HADDR,
input logic [1:0] PrivilegeModeW,
input logic [63:0] PMPCFG01_REGW, PMPCFG23_REGW,
input logic [`PA_BITS-1:0] PhysicalAddress,
input logic [1:0] PrivilegeModeW,
// *** ModelSim has a switch -svinputport which controls whether input ports
// are nets (wires) or vars by default. The default setting of this switch is
@ -44,10 +39,7 @@ module pmpchecker (
// this will be understood as a var. However, if we don't supply the `var`
// keyword, the compiler warns us that it's interpreting the signal as a var,
// which we might not intend.
// However, it's still bad form to pass 512 or 1024 signals across a module
// boundary. It would be better to store the PMP address registers in a module
// somewhere in the CSR hierarchy and do PMP checking _within_ that module, so
// we don't have to pass around 16 whole registers.
input var logic [63:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES/8-1:0],
input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0],
input logic ExecuteAccessF, WriteAccessM, ReadAccessM,
@ -59,98 +51,50 @@ module pmpchecker (
output logic PMPStoreAccessFaultM
);
// verilator lint_off UNOPTFLAT
// Bit i is high when the address falls in PMP region i
logic [15:0] Regions;
logic [3:0] MatchedRegion;
logic Match, EnforcePMP;
logic EnforcePMP;
logic [7:0] PMPCFG [`PMP_ENTRIES-1:0];
logic [`PMP_ENTRIES-1:0] Match; // PMP Entry matches
logic [`PMP_ENTRIES-1:0] Active; // PMP register i is non-null
logic [`PMP_ENTRIES-1:0] L, X, W, R; // PMP matches and has flag set
logic [`PMP_ENTRIES:0] NoLowerMatch; // None of the lower PMP entries match
logic [`PMP_ENTRIES:0] PAgePMPAdr; // for TOR PMP matching, PhysicalAddress > PMPAdr[i]
genvar i,j;
logic [7:0] PMPCFG [15:0];
assign PAgePMPAdr[0] = 1'b1;
assign NoLowerMatch[0] = 1'b1;
generate
// verilator lint_off WIDTH
for (j=0; j<`PMP_ENTRIES; j = j+8)
assign {PMPCFG[j+7], PMPCFG[j+6], PMPCFG[j+5], PMPCFG[j+4],
PMPCFG[j+3], PMPCFG[j+2], PMPCFG[j+1], PMPCFG[j]} = PMPCFG_ARRAY_REGW[j/8];
// verilator lint_on WIDTH
for (i=0; i<`PMP_ENTRIES; i++)
pmpadrdec pmpadrdec(.PhysicalAddress,
.PMPCfg(PMPCFG[i]),
.PMPAdr(PMPADDR_ARRAY_REGW[i]),
.PAgePMPAdrIn(PAgePMPAdr[i]),
.PAgePMPAdrOut(PAgePMPAdr[i+1]),
.NoLowerMatchIn(NoLowerMatch[i]),
.NoLowerMatchOut(NoLowerMatch[i+1]),
.Match(Match[i]),
.Active(Active[i]),
.L(L[i]), .X(X[i]), .W(W[i]), .R(R[i])
);
// Bit i is high when the address is greater than or equal to PMPADR[i]
// Used for determining whether TOR PMP regions match
logic [15:0] AboveRegion;
// Bit i is high if PMP register i is non-null
logic [15:0] ActiveRegion;
logic L_Bit, X_Bit, W_Bit, R_Bit;
logic InvalidExecute, InvalidWrite, InvalidRead;
// *** extend to optionally 64 configurations
assign {PMPCFG[15], PMPCFG[14], PMPCFG[13], PMPCFG[12],
PMPCFG[11], PMPCFG[10], PMPCFG[9], PMPCFG[8]} = PMPCFG23_REGW;
assign {PMPCFG[7], PMPCFG[6], PMPCFG[5], PMPCFG[4],
PMPCFG[3], PMPCFG[2], PMPCFG[1], PMPCFG[0]} = PMPCFG01_REGW;
pmpadrdec pmpadrdec(.HADDR(HADDR), .AdrMode(PMPCFG[0][4:3]),
.CurrentPMPAdr(PMPADDR_ARRAY_REGW[0]),
.AdrAtLeastPreviousPMP(1'b1),
.AdrAtLeastCurrentPMP(AboveRegion[0]),
.Match(Regions[0]));
assign ActiveRegion[0] = |PMPCFG[0][4:3];
generate // *** only for PMP_ENTRIES > 0
genvar i;
for (i = 1; i < `PMP_ENTRIES; i++) begin
pmpadrdec pmpadrdec(.HADDR(HADDR), .AdrMode(PMPCFG[i][4:3]),
.CurrentPMPAdr(PMPADDR_ARRAY_REGW[i]),
.AdrAtLeastPreviousPMP(AboveRegion[i-1]),
.AdrAtLeastCurrentPMP(AboveRegion[i]),
.Match(Regions[i]));
assign ActiveRegion[i] = |PMPCFG[i][4:3];
end
// verilator lint_on UNOPTFLAT
endgenerate
assign Match = |Regions;
// Only enforce PMP checking for S and U modes when at least one PMP is active or in Machine mode when L bit is set in selected region
assign EnforcePMP = (PrivilegeModeW == `M_MODE) ? |L : |Active;
// Only enforce PMP checking for S and U modes when at least one PMP is active
assign EnforcePMP = |ActiveRegion;
assign PMPInstrAccessFaultF = EnforcePMP && ExecuteAccessF && ~|X;
assign PMPStoreAccessFaultM = EnforcePMP && WriteAccessM && ~|W;
assign PMPLoadAccessFaultM = EnforcePMP && ReadAccessM && ~|R;
// *** extend to up to 64, fold bit extraction to avoid need for binary encoding of region
always_comb
casez (Regions)
16'b???????????????1: MatchedRegion = 0;
16'b??????????????10: MatchedRegion = 1;
16'b?????????????100: MatchedRegion = 2;
16'b????????????1000: MatchedRegion = 3;
16'b???????????10000: MatchedRegion = 4;
16'b??????????100000: MatchedRegion = 5;
16'b?????????1000000: MatchedRegion = 6;
16'b????????10000000: MatchedRegion = 7;
16'b???????100000000: MatchedRegion = 8;
16'b??????1000000000: MatchedRegion = 9;
16'b?????10000000000: MatchedRegion = 10;
16'b????100000000000: MatchedRegion = 11;
16'b???1000000000000: MatchedRegion = 12;
16'b??10000000000000: MatchedRegion = 13;
16'b?100000000000000: MatchedRegion = 14;
16'b1000000000000000: MatchedRegion = 15;
default: MatchedRegion = 0; // Should only occur if there is no match
endcase
assign L_Bit = PMPCFG[MatchedRegion][7] && Match;
assign X_Bit = PMPCFG[MatchedRegion][2] && Match;
assign W_Bit = PMPCFG[MatchedRegion][1] && Match;
assign R_Bit = PMPCFG[MatchedRegion][0] && Match;
assign InvalidExecute = ExecuteAccessF && ~X_Bit;
assign InvalidWrite = WriteAccessM && ~W_Bit;
assign InvalidRead = ReadAccessM && ~R_Bit;
// *** don't cause faults when there are no PMPs
assign PMPInstrAccessFaultF = (PrivilegeModeW == `M_MODE) ?
Match && L_Bit && InvalidExecute :
EnforcePMP && InvalidExecute;
assign PMPStoreAccessFaultM = (PrivilegeModeW == `M_MODE) ?
Match && L_Bit && InvalidWrite :
EnforcePMP && InvalidWrite;
assign PMPLoadAccessFaultM = (PrivilegeModeW == `M_MODE) ?
Match && L_Bit && InvalidRead :
EnforcePMP && InvalidRead;
assign PMPSquashBusAccess = PMPInstrAccessFaultF || PMPLoadAccessFaultM || PMPStoreAccessFaultM;
assign PMPSquashBusAccess = PMPInstrAccessFaultF | PMPLoadAccessFaultM | PMPStoreAccessFaultM;
endmodule

View File

@ -65,6 +65,7 @@ module tlb #(parameter ENTRY_BITS = 3,
// x1 - TLB is accessed for a write
// 11 - TLB is accessed for both read and write
input logic [1:0] TLBAccessType,
input logic DisableTranslation,
// Virtual address input
input logic [`XLEN-1:0] VirtualAddress,
@ -135,7 +136,7 @@ module tlb #(parameter ENTRY_BITS = 3,
endgenerate
// Whether translation should occur
assign Translate = (SvMode != `NO_TRANSLATE) & (PrivilegeModeW != `M_MODE);
assign Translate = (SvMode != `NO_TRANSLATE) & (PrivilegeModeW != `M_MODE) & ~ DisableTranslation;
// Determine how the TLB is currently being used
// Note that we use ReadAccess for both loads and instruction fetches

View File

@ -60,7 +60,7 @@ module csr #(parameter
output logic STATUS_MIE, STATUS_SIE,
output logic STATUS_MXR, STATUS_SUM,
output logic STATUS_MPRV,
output logic [63:0] PMPCFG01_REGW, PMPCFG23_REGW,
output var logic [63:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES/8-1:0],
output var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0],
input logic [4:0] SetFflagsM,
output logic [2:0] FRM_REGW,

View File

@ -48,25 +48,9 @@ module csrm #(parameter
MTVAL = 12'h343,
MIP = 12'h344,
PMPCFG0 = 12'h3A0,
PMPCFG1 = 12'h3A1,
PMPCFG2 = 12'h3A2,
PMPCFG3 = 12'h3A3,
// .. up to 15 more at consecutive addresses
PMPADDR0 = 12'h3B0,
PMPADDR1 = 12'h3B1,
PMPADDR2 = 12'h3B2,
PMPADDR3 = 12'h3B3,
PMPADDR4 = 12'h3B4,
PMPADDR5 = 12'h3B5,
PMPADDR6 = 12'h3B6,
PMPADDR7 = 12'h3B7,
PMPADDR8 = 12'h3B8,
PMPADDR9 = 12'h3B9,
PMPADDR10 = 12'h3BA,
PMPADDR11 = 12'h3BB,
PMPADDR12 = 12'h3BC,
PMPADDR13 = 12'h3BD,
PMPADDR14 = 12'h3BE,
PMPADDR15 = 12'h3BF,
// ... up to 63 more at consecutive addresses
TSELECT = 12'h7A0,
TDATA1 = 12'h7A1,
TDATA2 = 12'h7A2,
@ -90,7 +74,7 @@ module csrm #(parameter
output logic [31:0] MCOUNTEREN_REGW, MCOUNTINHIBIT_REGW,
output logic [`XLEN-1:0] MEDELEG_REGW, MIDELEG_REGW,
// 64-bit registers in RV64, or two 32-bit registers in RV32
output logic [63:0] PMPCFG01_REGW, PMPCFG23_REGW,
output var logic [63:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES/8-1:0],
output var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0],
input logic [11:0] MIP_REGW, MIE_REGW,
output logic WriteMSTATUSM,
@ -103,8 +87,8 @@ module csrm #(parameter
logic WriteMTVECM, WriteMEDELEGM, WriteMIDELEGM;
logic WriteMSCRATCHM, WriteMEPCM, WriteMCAUSEM, WriteMTVALM;
logic WriteMCOUNTERENM, WriteMCOUNTINHIBITM;
logic WritePMPCFG0M, WritePMPCFG2M;
logic WritePMPADDRM [15:0];
logic [`PMP_ENTRIES/8-1:0] WritePMPCFGM, WritePMPCFGHM ;
logic [`PMP_ENTRIES-1:0] WritePMPADDRM ;
localparam MISA_26 = (`MISA) & 32'h03ffffff;
@ -120,7 +104,7 @@ module csrm #(parameter
assign WriteMEPCM = MTrapM | (CSRMWriteM && (CSRAdrM == MEPC)) && ~StallW;
assign WriteMCAUSEM = MTrapM | (CSRMWriteM && (CSRAdrM == MCAUSE)) && ~StallW;
assign WriteMTVALM = MTrapM | (CSRMWriteM && (CSRAdrM == MTVAL)) && ~StallW;
assign WritePMPCFG0M = (CSRMWriteM && (CSRAdrM == PMPCFG0)) && ~StallW;
/* assign WritePMPCFG0M = (CSRMWriteM && (CSRAdrM == PMPCFG0)) && ~StallW;
assign WritePMPCFG2M = (CSRMWriteM && (CSRAdrM == PMPCFG2)) && ~StallW;
assign WritePMPADDRM[0] = (CSRMWriteM && (CSRAdrM == PMPADDR0)) && ~StallW;
assign WritePMPADDRM[1] = (CSRMWriteM && (CSRAdrM == PMPADDR1)) && ~StallW;
@ -137,10 +121,13 @@ module csrm #(parameter
assign WritePMPADDRM[12] = (CSRMWriteM && (CSRAdrM == PMPADDR12)) && ~StallW;
assign WritePMPADDRM[13] = (CSRMWriteM && (CSRAdrM == PMPADDR13)) && ~StallW;
assign WritePMPADDRM[14] = (CSRMWriteM && (CSRAdrM == PMPADDR14)) && ~StallW;
assign WritePMPADDRM[15] = (CSRMWriteM && (CSRAdrM == PMPADDR15)) && ~StallW;
assign WritePMPADDRM[15] = (CSRMWriteM && (CSRAdrM == PMPADDR15)) && ~StallW; */
assign WriteMCOUNTERENM = CSRMWriteM && (CSRAdrM == MCOUNTEREN) && ~StallW;
assign WriteMCOUNTINHIBITM = CSRMWriteM && (CSRAdrM == MCOUNTINHIBIT) && ~StallW;
assign IllegalCSRMWriteReadonlyM = CSRMWriteM && (CSRAdrM == MVENDORID || CSRAdrM == MARCHID || CSRAdrM == MIMPID || CSRAdrM == MHARTID);
// CSRs
@ -172,33 +159,39 @@ module csrm #(parameter
flopenl #(32) MCOUNTINHIBITreg(clk, reset, WriteMCOUNTINHIBITM, CSRWriteValM[31:0], 32'hFFFFFFFF, MCOUNTINHIBIT_REGW);
// There are PMP_ENTRIES = 0, 16, or 64 PMPADDR registers, each of which has its own flop
// *** need to add support for locked PMPCFG and PMPADR
genvar i;
generate
genvar i;
for (i = 0; i < `PMP_ENTRIES; i++) begin: pmp_flop
for(i=0; i<`PMP_ENTRIES; i++) begin
assign WritePMPADDRM[i] = (CSRMWriteM && (CSRAdrM == PMPADDR0+i)) && ~StallW;
flopenr #(`XLEN) PMPADDRreg(clk, reset, WritePMPADDRM[i], CSRWriteValM, PMPADDR_ARRAY_REGW[i]);
end
for (i=0; i<`PMP_ENTRIES/8; i++) begin
if (`XLEN==64) begin
assign WritePMPCFGM[i] = (CSRMWriteM && (CSRAdrM == PMPCFG0+2*i)) && ~StallW;
flopenr #(`XLEN) PMPCFGreg(clk, reset, WritePMPCFGM[i], CSRWriteValM, PMPCFG_ARRAY_REGW[i]);
end else begin
assign WritePMPCFGM[i] = (CSRMWriteM && (CSRAdrM == PMPCFG0+2*i)) && ~StallW;
assign WritePMPCFGHM[i] = (CSRMWriteM && (CSRAdrM == PMPCFG0+2*i+1)) && ~StallW;
flopenr #(`XLEN) PMPCFGreg(clk, reset, WritePMPCFGM[i], CSRWriteValM, PMPCFG_ARRAY_REGW[i][31:0]);
flopenr #(`XLEN) PMPCFGHreg(clk, reset, WritePMPCFGHM[i], CSRWriteValM, PMPCFG_ARRAY_REGW[i][63:32]);
end
end
endgenerate
// PMPCFG registers are a pair of 64-bit in RV64 and four 32-bit in RV32
generate
if (`XLEN==64) begin
flopenr #(`XLEN) PMPCFG01reg(clk, reset, WritePMPCFG0M, CSRWriteValM, PMPCFG01_REGW);
flopenr #(`XLEN) PMPCFG23reg(clk, reset, WritePMPCFG2M, CSRWriteValM, PMPCFG23_REGW);
end else begin
logic WritePMPCFG1M, WritePMPCFG3M;
assign WritePMPCFG1M = MTrapM | (CSRMWriteM && (CSRAdrM == PMPCFG1));
assign WritePMPCFG3M = MTrapM | (CSRMWriteM && (CSRAdrM == PMPCFG3));
flopenr #(`XLEN) PMPCFG0reg(clk, reset, WritePMPCFG0M, CSRWriteValM, PMPCFG01_REGW[31:0]);
flopenr #(`XLEN) PMPCFG1reg(clk, reset, WritePMPCFG1M, CSRWriteValM, PMPCFG01_REGW[63:32]);
flopenr #(`XLEN) PMPCFG2reg(clk, reset, WritePMPCFG2M, CSRWriteValM, PMPCFG23_REGW[31:0]);
flopenr #(`XLEN) PMPCFG3reg(clk, reset, WritePMPCFG3M, CSRWriteValM, PMPCFG23_REGW[63:32]);
end
endgenerate
// Read machine mode CSRs
// verilator lint_off WIDTH
always_comb begin
IllegalCSRMAccessM = !(`S_SUPPORTED | `U_SUPPORTED & `N_SUPPORTED) &&
(CSRAdrM == MEDELEG || CSRAdrM == MIDELEG); // trap on DELEG register access when no S or N-mode
case (CSRAdrM)
if (CSRAdrM >= PMPADDR0 && CSRAdrM < PMPADDR0 + `PMP_ENTRIES) // reading a PMP entry
CSRMReadValM = PMPADDR_ARRAY_REGW[CSRAdrM - PMPADDR0];
else if (CSRAdrM >= PMPCFG0 && CSRAdrM < PMPCFG0 + `PMP_ENTRIES/4) begin
if (~CSRAdrM[0]) CSRMReadValM = PMPCFG_ARRAY_REGW[(CSRAdrM - PMPCFG0)/2][`XLEN-1:0];
else CSRMReadValM = {{(`XLEN-32){1'b0}}, PMPCFG_ARRAY_REGW[(CSRAdrM - PMPCFG0-1)/2][63:32]};
end
else case (CSRAdrM)
MISA_ADR: CSRMReadValM = MISA_REGW;
MVENDORID: CSRMReadValM = 0;
MARCHID: CSRMReadValM = 0;
@ -219,7 +212,7 @@ module csrm #(parameter
MTVAL: CSRMReadValM = MTVAL_REGW;
MCOUNTEREN:CSRMReadValM = {{(`XLEN-32){1'b0}}, MCOUNTEREN_REGW};
MCOUNTINHIBIT:CSRMReadValM = {{(`XLEN-32){1'b0}}, MCOUNTINHIBIT_REGW};
PMPCFG0: CSRMReadValM = PMPCFG01_REGW[`XLEN-1:0];
/* PMPCFG0: CSRMReadValM = PMPCFG01_REGW[`XLEN-1:0];
PMPCFG1: CSRMReadValM = {{(`XLEN-32){1'b0}}, PMPCFG01_REGW[63:32]};
PMPCFG2: CSRMReadValM = PMPCFG23_REGW[`XLEN-1:0];
PMPCFG3: CSRMReadValM = {{(`XLEN-32){1'b0}}, PMPCFG23_REGW[63:32]};
@ -238,11 +231,12 @@ module csrm #(parameter
PMPADDR12: CSRMReadValM = PMPADDR_ARRAY_REGW[12];
PMPADDR13: CSRMReadValM = PMPADDR_ARRAY_REGW[13];
PMPADDR14: CSRMReadValM = PMPADDR_ARRAY_REGW[14];
PMPADDR15: CSRMReadValM = PMPADDR_ARRAY_REGW[15];
PMPADDR15: CSRMReadValM = PMPADDR_ARRAY_REGW[15]; */
default: begin
CSRMReadValM = 0;
IllegalCSRMAccessM = 1;
end
endcase
end
// verilator lint_on WIDTH
endmodule

View File

@ -68,7 +68,7 @@ module privileged (
output logic [1:0] PrivilegeModeW,
output logic [`XLEN-1:0] SATP_REGW,
output logic STATUS_MXR, STATUS_SUM,
output logic [63:0] PMPCFG01_REGW, PMPCFG23_REGW,
output var logic [63:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES/8-1:0],
output var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0],
output logic [2:0] FRM_REGW
);

View File

@ -37,8 +37,8 @@ module dtim #(parameter BASE=0, RANGE = 65535) (
output logic HRESPTim, HREADYTim
);
localparam integer MemStartAddr = BASE>>(1+`XLEN/32);
localparam integer MemEndAddr = (RANGE+BASE)>>1+(`XLEN/32);
localparam MemStartAddr = BASE>>(1+`XLEN/32);
localparam MemEndAddr = (RANGE+BASE)>>1+(`XLEN/32);
logic [`XLEN-1:0] RAM[BASE>>(1+`XLEN/32):(RANGE+BASE)>>1+(`XLEN/32)];
logic [31:0] HWADDR, A;

View File

@ -74,7 +74,8 @@ module uncore (
// Determine which region of physical memory (if any) is being accessed
// Use a trimmed down portion of the PMA checker - only the address decoders
adrdecs adrdecs(HADDR, 1'b1, 1'b1, 1'b1, HSIZE, HSELRegions);
// Set access types to all 1 as don't cares because the MMU has already done access checking
adrdecs adrdecs({{(`PA_BITS-32){1'b0}}, HADDR}, 1'b1, 1'b1, 1'b1, HSIZE[1:0], HSELRegions);
// unswizzle HSEL signals
assign {HSELBootTim, HSELTim, HSELCLINT, HSELGPIO, HSELUART, HSELPLIC} = HSELRegions;

View File

@ -26,167 +26,238 @@
`include "wally-config.vh"
/* verilator lint_on UNUSED */
module wallypipelinedhart (
input logic clk, reset,
output logic [`XLEN-1:0] PCF,
// input logic [31:0] InstrF,
// Privileged
input logic TimerIntM, ExtIntM, SwIntM,
input logic InstrAccessFaultF,
input logic DataAccessFaultM,
input logic [63:0] MTIME_CLINT, MTIMECMP_CLINT,
// Bus Interface
input logic [15:0] rd2, // bogus, delete when real multicycle fetch works
input logic [`AHBW-1:0] HRDATA,
input logic HREADY, HRESP,
output logic HCLK, HRESETn,
output logic [31:0] HADDR,
output logic [`AHBW-1:0] HWDATA,
output logic HWRITE,
output logic [2:0] HSIZE,
output logic [2:0] HBURST,
output logic [3:0] HPROT,
output logic [1:0] HTRANS,
output logic HMASTLOCK,
output logic [5:0] HSELRegions,
// Delayed signals for subword write
output logic [2:0] HADDRD,
output logic [3:0] HSIZED,
output logic HWRITED
);
module wallypipelinedhart
(
input logic clk, reset,
output logic [`XLEN-1:0] PCF,
// input logic [31:0] InstrF,
// Privileged
input logic TimerIntM, ExtIntM, SwIntM,
input logic InstrAccessFaultF,
input logic DataAccessFaultM,
input logic [63:0] MTIME_CLINT, MTIMECMP_CLINT,
// Bus Interface
input logic [15:0] rd2, // bogus, delete when real multicycle fetch works
input logic [`AHBW-1:0] HRDATA,
input logic HREADY, HRESP,
output logic HCLK, HRESETn,
output logic [31:0] HADDR,
output logic [`AHBW-1:0] HWDATA,
output logic HWRITE,
output logic [2:0] HSIZE,
output logic [2:0] HBURST,
output logic [3:0] HPROT,
output logic [1:0] HTRANS,
output logic HMASTLOCK,
output logic [5:0] HSELRegions,
// Delayed signals for subword write
output logic [2:0] HADDRD,
output logic [3:0] HSIZED,
output logic HWRITED
);
// logic [1:0] ForwardAE, ForwardBE;
logic StallF, StallD, StallE, StallM, StallW;
logic FlushF, FlushD, FlushE, FlushM, FlushW;
logic RetM, TrapM, NonBusTrapM;
// logic [1:0] ForwardAE, ForwardBE;
logic StallF, StallD, StallE, StallM, StallW;
logic FlushF, FlushD, FlushE, FlushM, FlushW;
logic RetM, TrapM, NonBusTrapM;
// new signals that must connect through DP
logic MulDivE, W64E;
logic CSRReadM, CSRWriteM, PrivilegedM;
logic [1:0] AtomicM;
logic [`XLEN-1:0] SrcAE, SrcBE;
logic [`XLEN-1:0] SrcAM;
logic [2:0] Funct3E;
logic MulDivE, W64E;
logic CSRReadM, CSRWriteM, PrivilegedM;
logic [1:0] AtomicM;
logic [`XLEN-1:0] SrcAE, SrcBE;
logic [`XLEN-1:0] SrcAM;
logic [2:0] Funct3E;
// logic [31:0] InstrF;
logic [31:0] InstrD, InstrE, InstrM, InstrW;
logic [`XLEN-1:0] PCD, PCE, PCM, PCLinkE, PCLinkW;
logic [`XLEN-1:0] PCTargetE;
logic [`XLEN-1:0] CSRReadValW, MulDivResultW;
logic [`XLEN-1:0] PrivilegedNextPCM;
logic [1:0] MemRWM;
logic InstrValidM, InstrValidW;
logic InstrMisalignedFaultM;
logic DataMisalignedM;
logic IllegalBaseInstrFaultD, IllegalIEUInstrFaultD;
logic ITLBInstrPageFaultF, DTLBLoadPageFaultM, DTLBStorePageFaultM;
logic WalkerInstrPageFaultF, WalkerLoadPageFaultM, WalkerStorePageFaultM;
logic LoadMisalignedFaultM, LoadAccessFaultM;
logic StoreMisalignedFaultM, StoreAccessFaultM;
logic [`XLEN-1:0] InstrMisalignedAdrM;
logic PCSrcE;
logic CSRWritePendingDEM;
logic LoadStallD, MulDivStallD, CSRRdStallD;
logic DivDoneE;
logic DivBusyE;
logic DivDoneW;
logic RegWriteD;
logic SquashSCM, SquashSCW;
logic [31:0] InstrD, InstrE, InstrM, InstrW;
logic [`XLEN-1:0] PCD, PCE, PCM, PCLinkE, PCLinkW;
logic [`XLEN-1:0] PCTargetE;
logic [`XLEN-1:0] CSRReadValW, MulDivResultW;
logic [`XLEN-1:0] PrivilegedNextPCM;
logic [1:0] MemRWM;
logic InstrValidM, InstrValidW;
logic InstrMisalignedFaultM;
logic DataMisalignedM;
logic IllegalBaseInstrFaultD, IllegalIEUInstrFaultD;
logic ITLBInstrPageFaultF, DTLBLoadPageFaultM, DTLBStorePageFaultM;
logic WalkerInstrPageFaultF, WalkerLoadPageFaultM, WalkerStorePageFaultM;
logic LoadMisalignedFaultM, LoadAccessFaultM;
logic StoreMisalignedFaultM, StoreAccessFaultM;
logic [`XLEN-1:0] InstrMisalignedAdrM;
logic PCSrcE;
logic CSRWritePendingDEM;
logic DivDoneE;
logic DivBusyE;
logic RegWriteD;
logic LoadStallD, MulDivStallD, CSRRdStallD;
logic SquashSCM, SquashSCW;
// floating point unit signals
logic [2:0] FRM_REGW;
logic [1:0] FMemRWM, FMemRWE;
logic FStallD;
logic FWriteIntE, FWriteIntM, FWriteIntW;
logic [`XLEN-1:0] FWriteDataE;
logic [`XLEN-1:0] FIntResM;
logic FDivBusyE;
logic IllegalFPUInstrD, IllegalFPUInstrE;
logic FloatRegWriteW;
logic FPUStallD;
logic [4:0] SetFflagsM;
logic [`XLEN-1:0] FPUResultW;
logic [2:0] FRM_REGW;
logic [1:0] FMemRWM, FMemRWE;
logic FStallD;
logic FWriteIntE, FWriteIntM, FWriteIntW;
logic [`XLEN-1:0] FWriteDataE;
logic [`XLEN-1:0] FIntResM;
logic FDivBusyE;
logic IllegalFPUInstrD, IllegalFPUInstrE;
logic FloatRegWriteW;
logic FPUStallD;
logic [4:0] SetFflagsM;
logic [`XLEN-1:0] FPUResultW;
// memory management unit signals
logic ITLBWriteF, DTLBWriteM;
logic ITLBFlushF, DTLBFlushM;
logic ITLBMissF, ITLBHitF;
logic DTLBMissM, DTLBHitM;
logic [`XLEN-1:0] SATP_REGW;
logic STATUS_MXR, STATUS_SUM;
logic [1:0] PrivilegeModeW;
logic [`XLEN-1:0] PageTableEntryF, PageTableEntryM;
logic [1:0] PageTypeF, PageTypeM;
logic ITLBWriteF, DTLBWriteM;
logic ITLBFlushF, DTLBFlushM;
logic ITLBMissF, ITLBHitF;
logic DTLBMissM, DTLBHitM;
logic [`XLEN-1:0] SATP_REGW;
logic STATUS_MXR, STATUS_SUM;
logic [1:0] PrivilegeModeW;
logic [`XLEN-1:0] PageTableEntryF, PageTableEntryM;
logic [1:0] PageTypeF, PageTypeM;
// PMA checker signals
logic AtomicAccessM, ExecuteAccessF, WriteAccessM, ReadAccessM;
logic PMPInstrAccessFaultF, PMPLoadAccessFaultM, PMPStoreAccessFaultM;
logic PMAInstrAccessFaultF, PMALoadAccessFaultM, PMAStoreAccessFaultM;
logic DSquashBusAccessM, ISquashBusAccessF;
logic [5:0] DHSELRegionsM, IHSELRegionsF;
logic PMPInstrAccessFaultF, PMPLoadAccessFaultM, PMPStoreAccessFaultM;
logic PMAInstrAccessFaultF, PMALoadAccessFaultM, PMAStoreAccessFaultM;
logic DSquashBusAccessM, ISquashBusAccessF;
var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0];
logic [63:0] PMPCFG01_REGW, PMPCFG23_REGW; // signals being sent from privileged unit to pmp/pma in dmem and ifu.
assign HSELRegions = ExecuteAccessF ? IHSELRegionsF : DHSELRegionsM; // *** this is a pure guess on how one of these should be selected. it passes tests, but is it the right way to do this?
var logic [63:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES/8-1:0];
// IMem stalls
logic ICacheStallF;
logic [`XLEN-1:0] MMUPAdr, MMUReadPTE;
logic MMUStall;
logic MMUTranslate, MMUReady;
logic ICacheStallF;
logic DCacheStall;
logic [`XLEN-1:0] MMUPAdr, MMUReadPTE;
logic MMUStall;
logic MMUTranslate, MMUReady;
logic HPTWRead;
logic HPTWReadyfromLSU;
logic HPTWStall;
// bus interface to dmem
logic MemReadM, MemWriteM;
logic [1:0] AtomicMaskedM;
logic [2:0] Funct3M;
logic [`XLEN-1:0] MemAdrM, WriteDataM;
logic [`PA_BITS-1:0] MemPAdrM;
logic [`XLEN-1:0] ReadDataW;
logic [`PA_BITS-1:0] InstrPAdrF;
logic [`XLEN-1:0] InstrRData;
logic InstrReadF;
logic DataStall;
logic InstrAckF, MemAckW;
logic CommitM, CommittedM;
logic MemReadM, MemWriteM;
logic [1:0] AtomicMaskedM;
logic [2:0] Funct3M;
logic [`XLEN-1:0] MemAdrM, WriteDataM;
logic [`PA_BITS-1:0] MemPAdrM;
logic [`XLEN-1:0] ReadDataW;
logic [`PA_BITS-1:0] InstrPAdrF;
logic [`XLEN-1:0] InstrRData;
logic InstrReadF;
logic DataStall;
logic InstrAckF, MemAckW;
logic CommitM, CommittedM;
logic BPPredWrongE;
logic BPPredDirWrongM;
logic BTBPredPCWrongM;
logic RASPredPCWrongM;
logic BPPredClassNonCFIWrongM;
logic BPPredWrongE;
logic BPPredDirWrongM;
logic BTBPredPCWrongM;
logic RASPredPCWrongM;
logic BPPredClassNonCFIWrongM;
logic[`XLEN-1:0] WriteDatatmpM;
logic [`XLEN-1:0] WriteDatatmpM;
logic [4:0] InstrClassM;
logic [`XLEN-1:0] HRDATAW;
// IEU vs HPTW arbitration signals to send to LSU
logic DisableTranslation;
logic [1:0] MemRWMtoLSU;
logic [2:0] Funct3MtoLSU;
logic [1:0] AtomicMtoLSU;
logic [`XLEN-1:0] MemAdrMtoLSU;
logic [`XLEN-1:0] WriteDataMtoLSU;
logic [`XLEN-1:0] ReadDataWFromLSU;
logic CommittedMfromLSU;
logic SquashSCWfromLSU;
logic DataMisalignedMfromLSU;
logic StallWtoLSU;
logic StallWfromLSU;
logic [2:0] Funct3MfromLSU;
logic [4:0] InstrClassM;
ifu ifu(.InstrInF(InstrRData), .*); // instruction fetch unit: PC, branch prediction, instruction cache
ifu ifu(.InstrInF(InstrRData),
.WalkerInstrPageFaultF(WalkerInstrPageFaultF),
.*); // instruction fetch unit: PC, branch prediction, instruction cache
ieu ieu(.*); // integer execution unit: integer register file, datapath and controller
// mux2 #(`XLEN) OutputInput2mux(WriteDataM, FWriteDataM, FMemRWM[0], WriteDatatmpM);
lsu lsu(.*); // data cache unit
pagetablewalker pagetablewalker(.HPTWRead(HPTWRead),
.*); // can send addresses to ahblite, send out pagetablestall
// arbiter between IEU and pagetablewalker
lsuArb arbiter(// HPTW connection
.HPTWTranslate(MMUTranslate),
.HPTWRead(HPTWRead),
.HPTWPAdr(MMUPAdr),
.HPTWReadPTE(MMUReadPTE),
.HPTWReady(MMUReady),
.HPTWStall(HPTWStall),
// CPU connection
.MemRWM(MemRWM),
.Funct3M(Funct3M),
.AtomicM(AtomicM),
.MemAdrM(MemAdrM),
.StallW(StallW),
.WriteDataM(WriteDataM),
.ReadDataW(ReadDataW),
.CommittedM(CommittedM),
.SquashSCW(SquashSCW),
.DataMisalignedM(DataMisalignedM),
.DCacheStall(DCacheStall),
// LSU
.DisableTranslation(DisableTranslation),
.MemRWMtoLSU(MemRWMtoLSU),
.Funct3MtoLSU(Funct3MtoLSU),
.AtomicMtoLSU(AtomicMtoLSU),
.MemAdrMtoLSU(MemAdrMtoLSU),
.WriteDataMtoLSU(WriteDataMtoLSU),
.StallWtoLSU(StallWtoLSU),
.CommittedMfromLSU(CommittedMfromLSU),
.SquashSCWfromLSU(SquashSCWfromLSU),
.DataMisalignedMfromLSU(DataMisalignedMfromLSU),
.ReadDataWFromLSU(ReadDataWFromLSU),
.HPTWReadyfromLSU(HPTWReadyfromLSU),
.DataStall(DataStall),
.*);
lsu lsu(.MemRWM(MemRWMtoLSU),
.Funct3M(Funct3MtoLSU),
.AtomicM(AtomicMtoLSU),
.MemAdrM(MemAdrMtoLSU),
.WriteDataM(WriteDataMtoLSU),
.ReadDataW(ReadDataWFromLSU),
.StallW(StallWtoLSU),
.CommittedM(CommittedMfromLSU),
.SquashSCW(SquashSCWfromLSU),
.DataMisalignedM(DataMisalignedMfromLSU),
.DisableTranslation(DisableTranslation),
.DataStall(DataStall),
.HPTWReady(HPTWReadyfromLSU),
.Funct3MfromLSU(Funct3MfromLSU),
.StallWfromLSU(StallWfromLSU),
// .DataStall(LSUStall),
.* ); // data cache unit
ahblite ebu(
//.InstrReadF(1'b0),
//.InstrRData(InstrF), // hook up InstrF later
.ISquashBusAccessF(1'b0), // *** temporary hack to disable PMP instruction fetch checking
.WriteDataM(WriteDataM),
.MemSizeM(Funct3M[1:0]), .UnsignedLoadM(Funct3M[2]),
.Funct7M(InstrM[31:25]),
.*);
//.InstrReadF(1'b0),
//.InstrRData(InstrF), // hook up InstrF later
.ISquashBusAccessF(1'b0), // *** temporary hack to disable PMP instruction fetch checking
.WriteDataM(WriteDataM),
.MemSizeM(Funct3MfromLSU[1:0]), .UnsignedLoadM(Funct3MfromLSU[2]),
.Funct7M(InstrM[31:25]),
.HRDATAW(HRDATAW),
.StallW(StallWfromLSU),
.*);
pagetablewalker pagetablewalker(.*); // can send addresses to ahblite, send out pagetablestall
// *** can connect to hazard unit
// changing from this to the line above breaks the program. auipc at 104 fails; seems to be flushed.
// Would need to insertinstruction as InstrD, not InstrF
/*ahblite ebu(
.InstrReadF(1'b0),
.InstrRData(), // hook up InstrF later
.MemSizeM(Funct3M[1:0]), .UnsignedLoadM(Funct3M[2]),
.*); */
muldiv mdu(.*); // multiply and divide unit
hazard hzu(.*); // global stall and flush control
@ -200,5 +271,5 @@ module wallypipelinedhart (
// presently stub out SetFlagsM and FloatRegWriteW
//assign SetFflagsM = 0;
//assign FloatRegWriteW = 0;
endmodule

View File

@ -27,8 +27,8 @@
module testbench();
parameter waveOnICount = 2657000; // # of instructions at which to turn on waves in graphical sim
parameter waveOnICount = `BUSYBEAR*140000 + `BUILDROOT*2400000; // # of instructions at which to turn on waves in graphical sim
parameter stopICount = `BUSYBEAR*143898 + `BUILDROOT*0000000; // # instructions at which to halt sim completely (set to 0 to let it run as far as it can)
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////// DUT /////////////////////////////////////
@ -248,6 +248,9 @@ module testbench();
if (instrs == waveOnICount) begin
$display("turning on waves at %0d instructions", instrs);
$stop;
end else if (instrs == stopICount && stopICount != 0) begin
$display("Ending sim at %0d instructions (set stopICount to 0 to let the sim go on)", instrs);
$stop;
end
// Check if PCD is going to be flushed due to a branch or jump