forked from Github_Repos/cvw
Merge branch 'main' of https://github.com/davidharrishmc/riscv-wally into main
This commit is contained in:
commit
9566daccaa
3
.gitmodules
vendored
3
.gitmodules
vendored
@ -1,3 +0,0 @@
|
||||
[submodule "sky130/sky130_osu_sc_t12"]
|
||||
path = sky130/sky130_osu_sc_t12
|
||||
url = https://foss-eda-tools.googlesource.com/skywater-pdk/libs/sky130_osu_sc_t12/
|
@ -1 +0,0 @@
|
||||
Subproject commit f60f2d0395053c4df362a97d7e2099721b6face6
|
@ -62,25 +62,25 @@
|
||||
// Peripheral memory space extends from BASE to BASE+RANGE
|
||||
// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits
|
||||
`define BOOTTIM_SUPPORTED 1'b1
|
||||
`define BOOTTIM_BASE 32'h00000000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder
|
||||
`define BOOTTIM_RANGE 32'h00003FFF
|
||||
//`define BOOTTIM_BASE 32'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder
|
||||
//`define BOOTTIM_RANGE 32'h00000FFF
|
||||
`define BOOTTIM_BASE 56'h00000000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder
|
||||
`define BOOTTIM_RANGE 56'h00003FFF
|
||||
//`define BOOTTIM_BASE 56'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder
|
||||
//`define BOOTTIM_RANGE 56'h00000FFF
|
||||
`define TIM_SUPPORTED 1'b1
|
||||
`define TIM_BASE 32'h80000000
|
||||
`define TIM_RANGE 32'h07FFFFFF
|
||||
`define TIM_BASE 56'h80000000
|
||||
`define TIM_RANGE 56'h07FFFFFF
|
||||
`define CLINT_SUPPORTED 1'b1
|
||||
`define CLINT_BASE 32'h02000000
|
||||
`define CLINT_RANGE 32'h0000FFFF
|
||||
`define CLINT_BASE 56'h02000000
|
||||
`define CLINT_RANGE 56'h0000FFFF
|
||||
`define GPIO_SUPPORTED 1'b1
|
||||
`define GPIO_BASE 32'h10012000
|
||||
`define GPIO_RANGE 32'h000000FF
|
||||
`define GPIO_BASE 56'h10012000
|
||||
`define GPIO_RANGE 56'h000000FF
|
||||
`define UART_SUPPORTED 1'b1
|
||||
`define UART_BASE 32'h10000000
|
||||
`define UART_RANGE 32'h00000007
|
||||
`define UART_BASE 56'h10000000
|
||||
`define UART_RANGE 56'h00000007
|
||||
`define PLIC_SUPPORTED 1'b1
|
||||
`define PLIC_BASE 32'h0C000000
|
||||
`define PLIC_RANGE 32'h03FFFFFF
|
||||
`define PLIC_BASE 56'h0C000000
|
||||
`define PLIC_RANGE 56'h03FFFFFF
|
||||
|
||||
// Bus Interface width
|
||||
`define AHBW 64
|
||||
|
@ -31,6 +31,7 @@
|
||||
`define BUSYBEAR 1
|
||||
`define LINUX_FIX_READ {'h10000005}
|
||||
`define LINUX_TEST_VECTORS "/courses/e190ax/busybear_boot/"
|
||||
//`define LINUX_TEST_VECTORS "../../../busybear_boot/"
|
||||
// RV32 or RV64: XLEN = 32 or 64
|
||||
`define XLEN 64
|
||||
|
||||
@ -63,25 +64,25 @@
|
||||
// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits
|
||||
|
||||
`define BOOTTIM_SUPPORTED 1'b1
|
||||
`define BOOTTIM_BASE 32'h00000000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder
|
||||
`define BOOTTIM_RANGE 32'h00003FFF
|
||||
//`define BOOTTIM_BASE 32'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder
|
||||
//`define BOOTTIM_RANGE 32'h00000FFF
|
||||
`define BOOTTIM_BASE 56'h00000000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder
|
||||
`define BOOTTIM_RANGE 56'h00003FFF
|
||||
//`define BOOTTIM_BASE 56'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder
|
||||
//`define BOOTTIM_RANGE 56'h00000FFF
|
||||
`define TIM_SUPPORTED 1'b1
|
||||
`define TIM_BASE 32'h80000000
|
||||
`define TIM_RANGE 32'h07FFFFFF
|
||||
`define TIM_BASE 56'h80000000
|
||||
`define TIM_RANGE 56'h07FFFFFF
|
||||
`define CLINT_SUPPORTED 1'b1
|
||||
`define CLINT_BASE 32'h02000000
|
||||
`define CLINT_RANGE 32'h0000FFFF
|
||||
`define CLINT_BASE 56'h02000000
|
||||
`define CLINT_RANGE 56'h0000FFFF
|
||||
`define GPIO_SUPPORTED 1'b1
|
||||
`define GPIO_BASE 32'h10012000
|
||||
`define GPIO_RANGE 32'h000000FF
|
||||
`define GPIO_BASE 56'h10012000
|
||||
`define GPIO_RANGE 56'h000000FF
|
||||
`define UART_SUPPORTED 1'b1
|
||||
`define UART_BASE 32'h10000000
|
||||
`define UART_RANGE 32'h00000007
|
||||
`define UART_BASE 56'h10000000
|
||||
`define UART_RANGE 56'h00000007
|
||||
`define PLIC_SUPPORTED 1'b1
|
||||
`define PLIC_BASE 32'h0C000000
|
||||
`define PLIC_RANGE 32'h03FFFFFF
|
||||
`define PLIC_BASE 56'h0C000000
|
||||
`define PLIC_RANGE 56'h03FFFFFF
|
||||
|
||||
// Bus Interface width
|
||||
`define AHBW 64
|
||||
|
@ -61,26 +61,27 @@
|
||||
// Peripheral memory space extends from BASE to BASE+RANGE
|
||||
// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits
|
||||
|
||||
// *** each of these is `PA_BITS wide. is this paramaterizable INSIDE the config file?
|
||||
`define BOOTTIM_SUPPORTED 1'b1
|
||||
`define BOOTTIM_BASE 32'h00000000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder
|
||||
`define BOOTTIM_RANGE 32'h00003FFF
|
||||
//`define BOOTTIM_BASE 32'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder
|
||||
//`define BOOTTIM_RANGE 32'h00000FFF
|
||||
`define BOOTTIM_BASE 34'h00000000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder
|
||||
`define BOOTTIM_RANGE 34'h00003FFF
|
||||
//`define BOOTTIM_BASE 34'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder
|
||||
//`define BOOTTIM_RANGE 34'h00000FFF
|
||||
`define TIM_SUPPORTED 1'b1
|
||||
`define TIM_BASE 32'h80000000
|
||||
`define TIM_RANGE 32'h07FFFFFF
|
||||
`define TIM_BASE 34'h80000000
|
||||
`define TIM_RANGE 34'h07FFFFFF
|
||||
`define CLINT_SUPPORTED 1'b1
|
||||
`define CLINT_BASE 32'h02000000
|
||||
`define CLINT_RANGE 32'h0000FFFF
|
||||
`define CLINT_BASE 34'h02000000
|
||||
`define CLINT_RANGE 34'h0000FFFF
|
||||
`define GPIO_SUPPORTED 1'b1
|
||||
`define GPIO_BASE 32'h10012000
|
||||
`define GPIO_RANGE 32'h000000FF
|
||||
`define GPIO_BASE 34'h10012000
|
||||
`define GPIO_RANGE 34'h000000FF
|
||||
`define UART_SUPPORTED 1'b1
|
||||
`define UART_BASE 32'h10000000
|
||||
`define UART_RANGE 32'h00000007
|
||||
`define UART_BASE 34'h10000000
|
||||
`define UART_RANGE 34'h00000007
|
||||
`define PLIC_SUPPORTED 1'b1
|
||||
`define PLIC_BASE 32'h0C000000
|
||||
`define PLIC_RANGE 32'h03FFFFFF
|
||||
`define PLIC_BASE 34'h0C000000
|
||||
`define PLIC_RANGE 34'h03FFFFFF
|
||||
|
||||
// Bus Interface width
|
||||
`define AHBW 32
|
||||
|
@ -53,7 +53,7 @@
|
||||
`define DTLB_ENTRY_BITS 5
|
||||
|
||||
// Legal number of PMP entries are 0, 16, or 64
|
||||
`define PMP_ENTRIES 16
|
||||
`define PMP_ENTRIES 64
|
||||
|
||||
// Address space
|
||||
`define RESET_VECTOR 64'h0000000080000000
|
||||
@ -65,26 +65,27 @@
|
||||
// Peripheral memory space extends from BASE to BASE+RANGE
|
||||
// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits
|
||||
|
||||
// *** each of these is `PA_BITS wide. is this paramaterizable INSIDE the config file?
|
||||
`define BOOTTIM_SUPPORTED 1'b1
|
||||
`define BOOTTIM_BASE 32'h00000000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder
|
||||
`define BOOTTIM_RANGE 32'h00003FFF
|
||||
//`define BOOTTIM_BASE 32'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder
|
||||
//`define BOOTTIM_RANGE 32'h00000FFF
|
||||
`define BOOTTIM_RANGE 56'h00003FFF
|
||||
`define BOOTTIM_BASE 56'h00000000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder
|
||||
//`define BOOTTIM_BASE 56'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder
|
||||
//`define BOOTTIM_RANGE 56'h00000FFF
|
||||
`define TIM_SUPPORTED 1'b1
|
||||
`define TIM_BASE 32'h80000000
|
||||
`define TIM_RANGE 32'h07FFFFFF
|
||||
`define TIM_BASE 56'h80000000
|
||||
`define TIM_RANGE 56'h07FFFFFF
|
||||
`define CLINT_SUPPORTED 1'b1
|
||||
`define CLINT_BASE 32'h02000000
|
||||
`define CLINT_RANGE 32'h0000FFFF
|
||||
`define CLINT_BASE 56'h02000000
|
||||
`define CLINT_RANGE 56'h0000FFFF
|
||||
`define GPIO_SUPPORTED 1'b1
|
||||
`define GPIO_BASE 32'h10012000
|
||||
`define GPIO_RANGE 32'h000000FF
|
||||
`define GPIO_BASE 56'h10012000
|
||||
`define GPIO_RANGE 56'h000000FF
|
||||
`define UART_SUPPORTED 1'b1
|
||||
`define UART_BASE 32'h10000000
|
||||
`define UART_RANGE 32'h00000007
|
||||
`define UART_BASE 56'h10000000
|
||||
`define UART_RANGE 56'h00000007
|
||||
`define PLIC_SUPPORTED 1'b1
|
||||
`define PLIC_BASE 32'h0C000000
|
||||
`define PLIC_RANGE 32'h03FFFFFF
|
||||
`define PLIC_BASE 56'h0C000000
|
||||
`define PLIC_RANGE 56'h03FFFFFF
|
||||
|
||||
// Test modes
|
||||
|
||||
|
@ -35,5 +35,6 @@ vopt work_busybear.testbench -o workopt_busybear
|
||||
|
||||
vsim workopt_busybear -suppress 8852,12070
|
||||
|
||||
run -all
|
||||
run -all
|
||||
quit
|
||||
|
@ -35,9 +35,10 @@ vopt +acc work.testbench -o workopt
|
||||
|
||||
vsim workopt -suppress 8852,12070
|
||||
|
||||
do ./wave-dos/linux-waves.do
|
||||
|
||||
|
||||
#-- Run the Simulation
|
||||
run -all
|
||||
do ./wave-dos/linux-waves.do
|
||||
run -all
|
||||
##quit
|
||||
|
@ -35,8 +35,8 @@ switch $argc {
|
||||
}
|
||||
# start and run simulation
|
||||
# remove +acc flag for faster sim during regressions if there is no need to access internal signals
|
||||
vopt +acc -gDEBUG=1 work.testbench -o workopt
|
||||
vsim workopt
|
||||
vopt -fsmdebug +acc -gDEBUG=1 work.testbench -o workopt
|
||||
vsim workopt -fsmdebug
|
||||
|
||||
|
||||
do wave.do
|
||||
|
@ -122,11 +122,11 @@ add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csrn/UEPC_REGW
|
||||
add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csrn/UTVEC_REGW
|
||||
add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csrn/UIP_REGW
|
||||
add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csrn/UIE_REGW
|
||||
add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csrm/PMPCFG01_REGW
|
||||
add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csrm/PMPCFG23_REGW
|
||||
add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csrm/PMPADDR_ARRAY_REGW
|
||||
add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csrm/MISA_REGW
|
||||
add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csru/FRM_REGW
|
||||
#add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csrm/PMPCFG01_REGW
|
||||
#add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csrm/PMPCFG23_REGW
|
||||
#add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csrm/PMPADDR_ARRAY_REGW
|
||||
#add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csrm/MISA_REGW
|
||||
#add wave -hex sim:/testbench/dut/hart/priv/csr/genblk1/csru/FRM_REGW
|
||||
|
||||
add wave -divider
|
||||
add wave -hex -r /testbench/*
|
||||
|
@ -7,32 +7,32 @@ add wave -noupdate -expand -group {Execution Stage} /testbench/FunctionName/Func
|
||||
add wave -noupdate -expand -group {Execution Stage} /testbench/dut/hart/ifu/PCE
|
||||
add wave -noupdate -expand -group {Execution Stage} /testbench/InstrEName
|
||||
add wave -noupdate -expand -group {Execution Stage} /testbench/dut/hart/ifu/InstrE
|
||||
add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/InstrMisalignedFaultM
|
||||
add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/InstrAccessFaultM
|
||||
add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/IllegalInstrFaultM
|
||||
add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/BreakpointFaultM
|
||||
add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/LoadMisalignedFaultM
|
||||
add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/StoreMisalignedFaultM
|
||||
add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/LoadAccessFaultM
|
||||
add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/StoreAccessFaultM
|
||||
add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/EcallFaultM
|
||||
add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/InstrPageFaultM
|
||||
add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/LoadPageFaultM
|
||||
add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/StorePageFaultM
|
||||
add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/InterruptM
|
||||
add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/BPPredWrongE
|
||||
add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/CSRWritePendingDEM
|
||||
add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/RetM
|
||||
add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/TrapM
|
||||
add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/LoadStallD
|
||||
add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/ICacheStallF
|
||||
add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/DataStall
|
||||
add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/MulDivStallD
|
||||
add wave -noupdate -expand -group HDU -expand -group Flush -color Yellow /testbench/dut/hart/hzu/FlushF
|
||||
add wave -noupdate -expand -group HDU -expand -group Flush -color Yellow /testbench/dut/hart/FlushD
|
||||
add wave -noupdate -expand -group HDU -expand -group Flush -color Yellow /testbench/dut/hart/FlushE
|
||||
add wave -noupdate -expand -group HDU -expand -group Flush -color Yellow /testbench/dut/hart/FlushM
|
||||
add wave -noupdate -expand -group HDU -expand -group Flush -color Yellow /testbench/dut/hart/FlushW
|
||||
add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/InstrMisalignedFaultM
|
||||
add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/InstrAccessFaultM
|
||||
add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/IllegalInstrFaultM
|
||||
add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/BreakpointFaultM
|
||||
add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/LoadMisalignedFaultM
|
||||
add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/StoreMisalignedFaultM
|
||||
add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/LoadAccessFaultM
|
||||
add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/StoreAccessFaultM
|
||||
add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/EcallFaultM
|
||||
add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/InstrPageFaultM
|
||||
add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/LoadPageFaultM
|
||||
add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/StorePageFaultM
|
||||
add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/InterruptM
|
||||
add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/BPPredWrongE
|
||||
add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/CSRWritePendingDEM
|
||||
add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/RetM
|
||||
add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/TrapM
|
||||
add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/LoadStallD
|
||||
add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/ICacheStallF
|
||||
add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/DCacheStall
|
||||
add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/MulDivStallD
|
||||
add wave -noupdate -expand -group HDU -group Flush -color Yellow /testbench/dut/hart/hzu/FlushF
|
||||
add wave -noupdate -expand -group HDU -group Flush -color Yellow /testbench/dut/hart/FlushD
|
||||
add wave -noupdate -expand -group HDU -group Flush -color Yellow /testbench/dut/hart/FlushE
|
||||
add wave -noupdate -expand -group HDU -group Flush -color Yellow /testbench/dut/hart/FlushM
|
||||
add wave -noupdate -expand -group HDU -group Flush -color Yellow /testbench/dut/hart/FlushW
|
||||
add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallF
|
||||
add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallD
|
||||
add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallE
|
||||
@ -89,6 +89,7 @@ add wave -noupdate -expand -group {instruction pipeline} /testbench/InstrFName
|
||||
add wave -noupdate -expand -group {instruction pipeline} /testbench/dut/hart/ifu/InstrD
|
||||
add wave -noupdate -expand -group {instruction pipeline} /testbench/dut/hart/ifu/InstrE
|
||||
add wave -noupdate -expand -group {instruction pipeline} /testbench/dut/hart/ifu/InstrM
|
||||
add wave -noupdate -expand -group {instruction pipeline} /testbench/InstrW
|
||||
add wave -noupdate -group {PCNext Generation} /testbench/dut/hart/ifu/PCNextF
|
||||
add wave -noupdate -group {PCNext Generation} /testbench/dut/hart/ifu/PCF
|
||||
add wave -noupdate -group {PCNext Generation} /testbench/dut/hart/ifu/PCPlus2or4F
|
||||
@ -104,7 +105,7 @@ add wave -noupdate -group {Decode Stage} /testbench/dut/hart/ieu/c/RegWriteD
|
||||
add wave -noupdate -group {Decode Stage} /testbench/dut/hart/ieu/dp/RdD
|
||||
add wave -noupdate -group {Decode Stage} /testbench/dut/hart/ieu/dp/Rs1D
|
||||
add wave -noupdate -group {Decode Stage} /testbench/dut/hart/ieu/dp/Rs2D
|
||||
add wave -noupdate -group RegFile /testbench/dut/hart/ieu/dp/regf/rf
|
||||
add wave -noupdate -group RegFile -expand /testbench/dut/hart/ieu/dp/regf/rf
|
||||
add wave -noupdate -group RegFile /testbench/dut/hart/ieu/dp/regf/a1
|
||||
add wave -noupdate -group RegFile /testbench/dut/hart/ieu/dp/regf/a2
|
||||
add wave -noupdate -group RegFile /testbench/dut/hart/ieu/dp/regf/a3
|
||||
@ -117,31 +118,18 @@ add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/hart
|
||||
add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/hart/ieu/dp/CSRReadValW
|
||||
add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/hart/ieu/dp/ResultSrcW
|
||||
add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/hart/ieu/dp/ResultW
|
||||
add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/a
|
||||
add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/b
|
||||
add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/alucontrol
|
||||
add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/result
|
||||
add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/flags
|
||||
add wave -noupdate -group alu -divider internals
|
||||
add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/overflow
|
||||
add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/carry
|
||||
add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/zero
|
||||
add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/neg
|
||||
add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/lt
|
||||
add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/ltu
|
||||
add wave -noupdate -group {dcache memory} /testbench/dut/hart/dmem/MemReadM
|
||||
add wave -noupdate -group {dcache memory} /testbench/dut/hart/dmem/MemWriteM
|
||||
add wave -noupdate -group {dcache memory} /testbench/dut/hart/dmem/MemAckW
|
||||
add wave -noupdate -group dcache -expand -group {cpu request} /testbench/dut/hart/dmem/MemRWM
|
||||
add wave -noupdate -group dcache -expand -group {cpu request} /testbench/dut/hart/dmem/AtomicM
|
||||
add wave -noupdate -group dcache -expand -group {cpu request} /testbench/dut/hart/MemAdrM
|
||||
add wave -noupdate -group dcache -expand -group {cpu request} /testbench/dut/hart/dmem/ReadDataW
|
||||
add wave -noupdate -group dcache -expand -group {cpu request} /testbench/dut/hart/WriteDataM
|
||||
add wave -noupdate -group dcache -color Gray90 /testbench/dut/hart/dmem/CurrState
|
||||
add wave -noupdate -group dcache /testbench/dut/hart/MemPAdrM
|
||||
add wave -noupdate -group dcache /testbench/dut/hart/dmem/MemAccessM
|
||||
add wave -noupdate -group dcache /testbench/dut/hart/dmem/AtomicMaskedM
|
||||
add wave -noupdate -group dcache /testbench/dut/hart/dmem/MemAckW
|
||||
add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/a
|
||||
add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/b
|
||||
add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/alucontrol
|
||||
add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/result
|
||||
add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/flags
|
||||
add wave -noupdate -expand -group alu -divider internals
|
||||
add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/overflow
|
||||
add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/carry
|
||||
add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/zero
|
||||
add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/neg
|
||||
add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/lt
|
||||
add wave -noupdate -expand -group alu /testbench/dut/hart/ieu/dp/alu/ltu
|
||||
add wave -noupdate -group Forward /testbench/dut/hart/ieu/fw/Rs1D
|
||||
add wave -noupdate -group Forward /testbench/dut/hart/ieu/fw/Rs2D
|
||||
add wave -noupdate -group Forward /testbench/dut/hart/ieu/fw/Rs1E
|
||||
@ -184,48 +172,60 @@ add wave -noupdate -group divider /testbench/dut/hart/mdu/genblk1/div/N
|
||||
add wave -noupdate -group divider /testbench/dut/hart/mdu/genblk1/div/D
|
||||
add wave -noupdate -group divider /testbench/dut/hart/mdu/genblk1/div/Q
|
||||
add wave -noupdate -group divider /testbench/dut/hart/mdu/genblk1/div/rem0
|
||||
add wave -noupdate -expand -group icache -color Orange /testbench/dut/hart/ifu/icache/controller/CurrState
|
||||
add wave -noupdate -expand -group icache /testbench/dut/hart/ifu/icache/controller/NextState
|
||||
add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/hit
|
||||
add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/spill
|
||||
add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/ICacheStallF
|
||||
add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/SavePC
|
||||
add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/spillSave
|
||||
add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/UnalignedSelect
|
||||
add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/PCMux
|
||||
add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/spillSave
|
||||
add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/CntReset
|
||||
add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/PreCntEn
|
||||
add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/CntEn
|
||||
add wave -noupdate -expand -group icache -expand -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/NUMLINES
|
||||
add wave -noupdate -expand -group icache -expand -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/BLOCKLEN
|
||||
add wave -noupdate -expand -group icache -expand -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/BLOCKBYTELEN
|
||||
add wave -noupdate -expand -group icache -expand -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/OFFSETLEN
|
||||
add wave -noupdate -expand -group icache -expand -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/INDEXLEN
|
||||
add wave -noupdate -expand -group icache -expand -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/TAGLEN
|
||||
add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/FetchCountFlag
|
||||
add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/FetchCount
|
||||
add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrPAdrF
|
||||
add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrReadF
|
||||
add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrAckF
|
||||
add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrInF
|
||||
add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/ICacheMemWriteEnable
|
||||
add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/ICacheMemWriteData
|
||||
add wave -noupdate -expand -group icache -expand -group memory -group {tag read} /testbench/dut/hart/ifu/icache/cachemem/DataValidBit
|
||||
add wave -noupdate -expand -group icache -expand -group memory -group {tag read} /testbench/dut/hart/ifu/icache/cachemem/cachetags/ReadData
|
||||
add wave -noupdate -expand -group icache -expand -group memory -group {tag write} /testbench/dut/hart/ifu/icache/cachemem/WriteEnable
|
||||
add wave -noupdate -expand -group icache -expand -group memory -group {tag write} /testbench/dut/hart/ifu/icache/cachemem/WriteLine
|
||||
add wave -noupdate -expand -group icache -expand -group memory -group {tag write} /testbench/dut/hart/ifu/icache/cachemem/cachetags/StoredData
|
||||
add wave -noupdate -expand -group icache -expand -group {instr to cpu} /testbench/dut/hart/ifu/icache/controller/FinalInstrRawF
|
||||
add wave -noupdate -expand -group icache -expand -group {instr to cpu} /testbench/dut/hart/ifu/icache/controller/AlignedInstrRawD
|
||||
add wave -noupdate -expand -group icache -expand -group {instr to cpu} /testbench/dut/hart/ifu/icache/controller/FlushDLastCyclen
|
||||
add wave -noupdate -expand -group icache -expand -group {instr to cpu} /testbench/dut/hart/ifu/icache/controller/InstrRawD
|
||||
add wave -noupdate -expand -group icache -expand -group pc /testbench/dut/hart/ifu/icache/controller/PCPF
|
||||
add wave -noupdate -expand -group icache -expand -group pc /testbench/dut/hart/ifu/icache/controller/PCPreFinalF
|
||||
add wave -noupdate -expand -group icache -expand -group pc /testbench/dut/hart/ifu/icache/controller/PCPFinalF
|
||||
add wave -noupdate -group AHB /testbench/dut/hart/ebu/BusState
|
||||
add wave -noupdate -group icache -color Orange /testbench/dut/hart/ifu/icache/controller/CurrState
|
||||
add wave -noupdate -group icache /testbench/dut/hart/ifu/icache/controller/NextState
|
||||
add wave -noupdate -group icache /testbench/dut/hart/ifu/ITLBMissF
|
||||
add wave -noupdate -group icache /testbench/dut/hart/ifu/icache/ITLBWriteF
|
||||
add wave -noupdate -group icache -group {tag read} /testbench/dut/hart/ifu/icache/cachemem/DataValidBit
|
||||
add wave -noupdate -group icache -group {tag read} /testbench/dut/hart/ifu/icache/cachemem/cachetags/ReadData
|
||||
add wave -noupdate -group icache -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/hit
|
||||
add wave -noupdate -group icache -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/spill
|
||||
add wave -noupdate -group icache -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/ICacheStallF
|
||||
add wave -noupdate -group icache -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/SavePC
|
||||
add wave -noupdate -group icache -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/spillSave
|
||||
add wave -noupdate -group icache -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/UnalignedSelect
|
||||
add wave -noupdate -group icache -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/PCMux
|
||||
add wave -noupdate -group icache -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/spillSave
|
||||
add wave -noupdate -group icache -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/CntReset
|
||||
add wave -noupdate -group icache -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/PreCntEn
|
||||
add wave -noupdate -group icache -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/CntEn
|
||||
add wave -noupdate -group icache -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/NUMLINES
|
||||
add wave -noupdate -group icache -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/BLOCKLEN
|
||||
add wave -noupdate -group icache -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/BLOCKBYTELEN
|
||||
add wave -noupdate -group icache -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/OFFSETLEN
|
||||
add wave -noupdate -group icache -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/INDEXLEN
|
||||
add wave -noupdate -group icache -group {icache parameters} -radix unsigned /testbench/dut/hart/ifu/icache/cachemem/TAGLEN
|
||||
add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/FetchCountFlag
|
||||
add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/FetchCount
|
||||
add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrPAdrF
|
||||
add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrReadF
|
||||
add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrAckF
|
||||
add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrInF
|
||||
add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/ICacheMemWriteEnable
|
||||
add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/ICacheMemWriteData
|
||||
add wave -noupdate -group icache -expand -group memory -group {tag write} /testbench/dut/hart/ifu/icache/cachemem/WriteEnable
|
||||
add wave -noupdate -group icache -expand -group memory -group {tag write} /testbench/dut/hart/ifu/icache/cachemem/WriteLine
|
||||
add wave -noupdate -group icache -expand -group memory -group {tag write} /testbench/dut/hart/ifu/icache/cachemem/cachetags/StoredData
|
||||
add wave -noupdate -group icache -expand -group {instr to cpu} /testbench/dut/hart/ifu/icache/controller/FinalInstrRawF
|
||||
add wave -noupdate -group icache -expand -group pc /testbench/dut/hart/ifu/icache/controller/PCPF
|
||||
add wave -noupdate -group icache -expand -group pc /testbench/dut/hart/ifu/icache/controller/PCPreFinalF
|
||||
add wave -noupdate -group AHB -expand -group read /testbench/dut/hart/ebu/HRDATA
|
||||
add wave -noupdate -group AHB -expand -group read /testbench/dut/hart/ebu/HRDATAMasked
|
||||
add wave -noupdate -group AHB -expand -group read /testbench/dut/hart/ebu/HRDATANext
|
||||
add wave -noupdate -group AHB -color Gold /testbench/dut/hart/ebu/BusState
|
||||
add wave -noupdate -group AHB /testbench/dut/hart/ebu/ProposedNextBusState
|
||||
add wave -noupdate -group AHB /testbench/dut/hart/ebu/NextBusState
|
||||
add wave -noupdate -group AHB /testbench/dut/hart/ebu/DSquashBusAccessM
|
||||
add wave -noupdate -group AHB /testbench/dut/hart/ebu/ISquashBusAccessF
|
||||
add wave -noupdate -group AHB -expand -group {input requests} /testbench/dut/hart/ebu/AtomicMaskedM
|
||||
add wave -noupdate -group AHB -expand -group {input requests} /testbench/dut/hart/ebu/MemReadM
|
||||
add wave -noupdate -group AHB -expand -group {input requests} /testbench/dut/hart/ebu/MemWriteM
|
||||
add wave -noupdate -group AHB -expand -group {input requests} /testbench/dut/hart/ebu/InstrReadF
|
||||
add wave -noupdate -group AHB -expand -group {input requests} /testbench/dut/hart/ebu/MemSizeM
|
||||
add wave -noupdate -group AHB /testbench/dut/hart/ebu/HCLK
|
||||
add wave -noupdate -group AHB /testbench/dut/hart/ebu/HRESETn
|
||||
add wave -noupdate -group AHB /testbench/dut/hart/ebu/HRDATA
|
||||
add wave -noupdate -group AHB /testbench/dut/hart/ebu/HRDATANext
|
||||
add wave -noupdate -group AHB /testbench/dut/hart/ebu/HREADY
|
||||
add wave -noupdate -group AHB /testbench/dut/hart/ebu/HRESP
|
||||
add wave -noupdate -group AHB /testbench/dut/hart/ebu/HADDR
|
||||
@ -239,12 +239,136 @@ add wave -noupdate -group AHB /testbench/dut/hart/ebu/HMASTLOCK
|
||||
add wave -noupdate -group AHB /testbench/dut/hart/ebu/HADDRD
|
||||
add wave -noupdate -group AHB /testbench/dut/hart/ebu/HSIZED
|
||||
add wave -noupdate -group AHB /testbench/dut/hart/ebu/HWRITED
|
||||
add wave -noupdate -group csr -color Aquamarine -label {br executed} -radix unsigned {/testbench/dut/hart/priv/csr/genblk1/counters/genblk2/HPMCOUNTER_REGW[5]}
|
||||
add wave -noupdate -group csr -color Aquamarine -label {br miss predicted} -radix unsigned {/testbench/dut/hart/priv/csr/genblk1/counters/genblk2/HPMCOUNTER_REGW[4]}
|
||||
add wave -noupdate -group csr -childformat {{{/testbench/dut/hart/priv/csr/genblk1/counters/genblk2/HPMCOUNTER_REGW[5]} -radix unsigned} {{/testbench/dut/hart/priv/csr/genblk1/counters/genblk2/HPMCOUNTER_REGW[4]} -radix unsigned}} -subitemconfig {{/testbench/dut/hart/priv/csr/genblk1/counters/genblk2/HPMCOUNTER_REGW[5]} {-height 16 -radix unsigned} {/testbench/dut/hart/priv/csr/genblk1/counters/genblk2/HPMCOUNTER_REGW[4]} {-height 16 -radix unsigned}} /testbench/dut/hart/priv/csr/genblk1/counters/genblk2/HPMCOUNTER_REGW
|
||||
add wave -noupdate -group AHB /testbench/dut/hart/ebu/StallW
|
||||
add wave -noupdate -expand -group lsu -color Gold /testbench/dut/hart/lsu/CurrState
|
||||
add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/DisableTranslation
|
||||
add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/MemRWM
|
||||
add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/DataStall
|
||||
add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/MemAdrM
|
||||
add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/MemPAdrM
|
||||
add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/ReadDataW
|
||||
add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/WriteDataM
|
||||
add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/AtomicMaskedM
|
||||
add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/DSquashBusAccessM
|
||||
add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/HRDATAW
|
||||
add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/MemAckW
|
||||
add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/StallW
|
||||
add wave -noupdate -group plic /testbench/dut/uncore/genblk2/plic/HCLK
|
||||
add wave -noupdate -group plic /testbench/dut/uncore/genblk2/plic/HSELPLIC
|
||||
add wave -noupdate -group plic /testbench/dut/uncore/genblk2/plic/HADDR
|
||||
add wave -noupdate -group plic /testbench/dut/uncore/genblk2/plic/HWRITE
|
||||
add wave -noupdate -group plic /testbench/dut/uncore/genblk2/plic/HREADY
|
||||
add wave -noupdate -group plic /testbench/dut/uncore/genblk2/plic/HTRANS
|
||||
add wave -noupdate -group plic /testbench/dut/uncore/genblk2/plic/HWDATA
|
||||
add wave -noupdate -group plic /testbench/dut/uncore/genblk2/plic/UARTIntr
|
||||
add wave -noupdate -group plic /testbench/dut/uncore/genblk2/plic/GPIOIntr
|
||||
add wave -noupdate -group plic /testbench/dut/uncore/genblk2/plic/HREADPLIC
|
||||
add wave -noupdate -group plic /testbench/dut/uncore/genblk2/plic/HRESPPLIC
|
||||
add wave -noupdate -group plic /testbench/dut/uncore/genblk2/plic/HREADYPLIC
|
||||
add wave -noupdate -group plic /testbench/dut/uncore/genblk2/plic/ExtIntM
|
||||
add wave -noupdate -group GPIO /testbench/dut/uncore/genblk3/gpio/HCLK
|
||||
add wave -noupdate -group GPIO /testbench/dut/uncore/genblk3/gpio/HSELGPIO
|
||||
add wave -noupdate -group GPIO /testbench/dut/uncore/genblk3/gpio/HADDR
|
||||
add wave -noupdate -group GPIO /testbench/dut/uncore/genblk3/gpio/HWDATA
|
||||
add wave -noupdate -group GPIO /testbench/dut/uncore/genblk3/gpio/HWRITE
|
||||
add wave -noupdate -group GPIO /testbench/dut/uncore/genblk3/gpio/HREADY
|
||||
add wave -noupdate -group GPIO /testbench/dut/uncore/genblk3/gpio/HTRANS
|
||||
add wave -noupdate -group GPIO /testbench/dut/uncore/genblk3/gpio/HREADGPIO
|
||||
add wave -noupdate -group GPIO /testbench/dut/uncore/genblk3/gpio/HRESPGPIO
|
||||
add wave -noupdate -group GPIO /testbench/dut/uncore/genblk3/gpio/HREADYGPIO
|
||||
add wave -noupdate -group GPIO /testbench/dut/uncore/genblk3/gpio/GPIOPinsIn
|
||||
add wave -noupdate -group GPIO /testbench/dut/uncore/genblk3/gpio/GPIOPinsOut
|
||||
add wave -noupdate -group GPIO /testbench/dut/uncore/genblk3/gpio/GPIOPinsEn
|
||||
add wave -noupdate -group GPIO /testbench/dut/uncore/genblk3/gpio/GPIOIntr
|
||||
add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/HCLK
|
||||
add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/HSELCLINT
|
||||
add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/HADDR
|
||||
add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/HWRITE
|
||||
add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/HWDATA
|
||||
add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/HREADY
|
||||
add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/HTRANS
|
||||
add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/HREADCLINT
|
||||
add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/HRESPCLINT
|
||||
add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/HREADYCLINT
|
||||
add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/MTIME
|
||||
add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/MTIMECMP
|
||||
add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/TimerIntM
|
||||
add wave -noupdate -group CLINT /testbench/dut/uncore/genblk1/clint/SwIntM
|
||||
add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/MMUTranslate
|
||||
add wave -noupdate -expand -group ptwalker -color Gold /testbench/dut/hart/pagetablewalker/WalkerState
|
||||
add wave -noupdate -expand -group ptwalker -color Salmon /testbench/dut/hart/pagetablewalker/HPTWStall
|
||||
add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/HPTWRead
|
||||
add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/MMUPAdr
|
||||
add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/MMUStall
|
||||
add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/EndWalk
|
||||
add wave -noupdate -expand -group ptwalker -expand -group pte /testbench/dut/hart/pagetablewalker/MMUReadPTE
|
||||
add wave -noupdate -expand -group ptwalker -expand -group pte /testbench/dut/hart/pagetablewalker/PRegEn
|
||||
add wave -noupdate -expand -group ptwalker -expand -group pte /testbench/dut/hart/pagetablewalker/CurrentPTE
|
||||
add wave -noupdate -expand -group ptwalker -divider data
|
||||
add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/TranslationPAdr
|
||||
add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/ValidPTE
|
||||
add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/LeafPTE
|
||||
add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/MMUStall
|
||||
add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/pagetablewalker/TranslationPAdr
|
||||
add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/pagetablewalker/PageTableEntry
|
||||
add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/pagetablewalker/PageType
|
||||
add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/pagetablewalker/ITLBWriteF
|
||||
add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/pagetablewalker/DTLBWriteM
|
||||
add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/pagetablewalker/WalkerInstrPageFaultF
|
||||
add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/pagetablewalker/WalkerLoadPageFaultM
|
||||
add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/pagetablewalker/WalkerStorePageFaultM
|
||||
add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/pagetablewalker/MMUStall
|
||||
add wave -noupdate -expand -group ptwalker -group {fsm outputs} /testbench/dut/hart/pagetablewalker/EndWalk
|
||||
add wave -noupdate -expand -group ptwalker /testbench/dut/hart/pagetablewalker/MMUPAdr
|
||||
add wave -noupdate -expand -group {LSU ARB} -color Gold /testbench/dut/hart/arbiter/CurrState
|
||||
add wave -noupdate -expand -group {LSU ARB} -color {Medium Orchid} /testbench/dut/hart/arbiter/SelPTW
|
||||
add wave -noupdate -expand -group {LSU ARB} /testbench/dut/hart/pagetablewalker/MMUStall
|
||||
add wave -noupdate -expand -group {LSU ARB} -expand -group hptw /testbench/dut/hart/arbiter/HPTWTranslate
|
||||
add wave -noupdate -expand -group {LSU ARB} -expand -group hptw /testbench/dut/hart/arbiter/HPTWRead
|
||||
add wave -noupdate -expand -group {LSU ARB} -expand -group hptw /testbench/dut/hart/arbiter/HPTWPAdr
|
||||
add wave -noupdate -expand -group {LSU ARB} -expand -group hptw /testbench/dut/hart/arbiter/HPTWReadPTE
|
||||
add wave -noupdate -expand -group {LSU ARB} -expand -group hptw /testbench/dut/hart/arbiter/HPTWReady
|
||||
add wave -noupdate -expand -group {LSU ARB} -group toLSU /testbench/dut/hart/arbiter/MemAdrMtoLSU
|
||||
add wave -noupdate /testbench/dut/hart/lsu/DataStall
|
||||
add wave -noupdate -group csr /testbench/dut/hart/priv/csr/MIP_REGW
|
||||
add wave -noupdate -group uart /testbench/dut/uncore/genblk4/uart/HCLK
|
||||
add wave -noupdate -group uart /testbench/dut/uncore/genblk4/uart/HRESETn
|
||||
add wave -noupdate -group uart /testbench/dut/uncore/genblk4/uart/HSELUART
|
||||
add wave -noupdate -group uart /testbench/dut/uncore/genblk4/uart/HADDR
|
||||
add wave -noupdate -group uart /testbench/dut/uncore/genblk4/uart/HWRITE
|
||||
add wave -noupdate -group uart /testbench/dut/uncore/genblk4/uart/HWDATA
|
||||
add wave -noupdate -group uart /testbench/dut/uncore/genblk4/uart/HREADUART
|
||||
add wave -noupdate -group uart /testbench/dut/uncore/genblk4/uart/HRESPUART
|
||||
add wave -noupdate -group uart /testbench/dut/uncore/genblk4/uart/HREADYUART
|
||||
add wave -noupdate -group uart /testbench/dut/uncore/genblk4/uart/SIN
|
||||
add wave -noupdate -group uart /testbench/dut/uncore/genblk4/uart/DSRb
|
||||
add wave -noupdate -group uart /testbench/dut/uncore/genblk4/uart/DCDb
|
||||
add wave -noupdate -group uart /testbench/dut/uncore/genblk4/uart/CTSb
|
||||
add wave -noupdate -group uart /testbench/dut/uncore/genblk4/uart/RIb
|
||||
add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/genblk4/uart/SOUT
|
||||
add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/genblk4/uart/RTSb
|
||||
add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/genblk4/uart/DTRb
|
||||
add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/genblk4/uart/OUT1b
|
||||
add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/genblk4/uart/OUT2b
|
||||
add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/genblk4/uart/INTR
|
||||
add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/genblk4/uart/TXRDYb
|
||||
add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/genblk4/uart/RXRDYb
|
||||
add wave -noupdate -group dtlb /testbench/dut/hart/lsu/dmmu/TLBMiss
|
||||
add wave -noupdate -group dtlb /testbench/dut/hart/lsu/dmmu/tlb/TLBWrite
|
||||
add wave -noupdate -group itlb /testbench/dut/hart/ifu/ITLBMissF
|
||||
add wave -noupdate /testbench/dut/hart/pagetablewalker/StartWalk
|
||||
add wave -noupdate /testbench/dut/hart/lsu/dmmu/tlb/DisableTranslation
|
||||
add wave -noupdate -group tlbread /testbench/dut/hart/lsu/dmmu/tlb/VirtualAddress
|
||||
add wave -noupdate -group tlbread /testbench/dut/hart/lsu/dmmu/tlb/tlbcam/CAMHit
|
||||
add wave -noupdate -group tlbread /testbench/dut/hart/lsu/dmmu/tlb/tlbcam/VPNIndex
|
||||
add wave -noupdate -group tlbread /testbench/dut/hart/lsu/dmmu/tlb/tlbcam/HitPageType
|
||||
add wave -noupdate -group tlbread /testbench/dut/hart/lsu/dmmu/tlb/tlbcam/VirtualPageNumber
|
||||
add wave -noupdate -group tlbwrite /testbench/dut/hart/lsu/dmmu/tlb/tlbcam/TLBWrite
|
||||
add wave -noupdate -group tlbwrite /testbench/dut/hart/lsu/dmmu/tlb/PTEWriteVal
|
||||
add wave -noupdate -group tlbwrite /testbench/dut/hart/lsu/dmmu/tlb/tlbcam/WriteLines
|
||||
TreeUpdate [SetDefaultTree]
|
||||
WaveRestoreCursors {{Cursor 4} {32648010 ns} 0} {{Cursor 5} {12105831 ns} 0}
|
||||
quietly wave cursor active 2
|
||||
WaveRestoreCursors {{Cursor 8} {4545 ns} 0} {{Cursor 3} {3377 ns} 0} {{Cursor 4} {3215 ns} 0}
|
||||
quietly wave cursor active 1
|
||||
configure wave -namecolwidth 250
|
||||
configure wave -valuecolwidth 189
|
||||
configure wave -justifyvalue left
|
||||
@ -259,4 +383,4 @@ configure wave -griddelta 40
|
||||
configure wave -timeline 0
|
||||
configure wave -timelineunits ns
|
||||
update
|
||||
WaveRestoreZoom {0 ns} {30754715 ns}
|
||||
WaveRestoreZoom {4209 ns} {4657 ns}
|
||||
|
398
wally-pipelined/src/cache/ICacheCntrl.sv
vendored
398
wally-pipelined/src/cache/ICacheCntrl.sv
vendored
@ -25,53 +25,57 @@
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module ICacheCntrl #(parameter BLOCKLEN = 256) (
|
||||
// Inputs from pipeline
|
||||
input logic clk, reset,
|
||||
input logic StallF, StallD,
|
||||
input logic FlushD,
|
||||
module ICacheCntrl #(parameter BLOCKLEN = 256)
|
||||
(
|
||||
// Inputs from pipeline
|
||||
input logic clk, reset,
|
||||
input logic StallF, StallD,
|
||||
input logic FlushD,
|
||||
|
||||
// Input the address to read
|
||||
// The upper bits of the physical pc
|
||||
input logic [`PA_BITS-1:0] PCNextF,
|
||||
input logic [`PA_BITS-1:0] PCPF,
|
||||
// Signals to/from cache memory
|
||||
// The read coming out of it
|
||||
input logic [31:0] ICacheMemReadData,
|
||||
input logic ICacheMemReadValid,
|
||||
// The address at which we want to search the cache memory
|
||||
output logic [`PA_BITS-1:0] PCTagF,
|
||||
output logic [`PA_BITS-1:0] PCNextIndexF,
|
||||
output logic ICacheReadEn,
|
||||
// Load data into the cache
|
||||
output logic ICacheMemWriteEnable,
|
||||
output logic [BLOCKLEN-1:0] ICacheMemWriteData,
|
||||
// Input the address to read
|
||||
// The upper bits of the physical pc
|
||||
input logic [`PA_BITS-1:0] PCNextF,
|
||||
input logic [`PA_BITS-1:0] PCPF,
|
||||
// Signals to/from cache memory
|
||||
// The read coming out of it
|
||||
input logic [31:0] ICacheMemReadData,
|
||||
input logic ICacheMemReadValid,
|
||||
// The address at which we want to search the cache memory
|
||||
output logic [`PA_BITS-1:0] PCTagF,
|
||||
output logic [`PA_BITS-1:0] PCNextIndexF,
|
||||
output logic ICacheReadEn,
|
||||
// Load data into the cache
|
||||
output logic ICacheMemWriteEnable,
|
||||
output logic [BLOCKLEN-1:0] ICacheMemWriteData,
|
||||
|
||||
// Outputs to rest of ifu
|
||||
// High if the instruction in the fetch stage is compressed
|
||||
output logic CompressedF,
|
||||
// The instruction that was requested
|
||||
// If this instruction is compressed, upper 16 bits may be the next 16 bits or may be zeros
|
||||
output logic [31:0] FinalInstrRawF,
|
||||
// Outputs to rest of ifu
|
||||
// High if the instruction in the fetch stage is compressed
|
||||
output logic CompressedF,
|
||||
// The instruction that was requested
|
||||
// If this instruction is compressed, upper 16 bits may be the next 16 bits or may be zeros
|
||||
output logic [31:0] FinalInstrRawF,
|
||||
|
||||
// Outputs to pipeline control stuff
|
||||
output logic ICacheStallF, EndFetchState,
|
||||
// Outputs to pipeline control stuff
|
||||
output logic ICacheStallF, EndFetchState,
|
||||
input logic ITLBMissF,
|
||||
input logic ITLBWriteF,
|
||||
input logic WalkerInstrPageFaultF,
|
||||
|
||||
// Signals to/from ahblite interface
|
||||
// A read containing the requested data
|
||||
input logic [`XLEN-1:0] InstrInF,
|
||||
input logic InstrAckF,
|
||||
// The read we request from main memory
|
||||
output logic [`PA_BITS-1:0] InstrPAdrF,
|
||||
output logic InstrReadF
|
||||
);
|
||||
// Signals to/from ahblite interface
|
||||
// A read containing the requested data
|
||||
input logic [`XLEN-1:0] InstrInF,
|
||||
input logic InstrAckF,
|
||||
// The read we request from main memory
|
||||
output logic [`PA_BITS-1:0] InstrPAdrF,
|
||||
output logic InstrReadF
|
||||
);
|
||||
|
||||
// FSM states
|
||||
localparam STATE_READY = 0;
|
||||
localparam STATE_HIT_SPILL = 1; // spill, block 0 hit
|
||||
localparam STATE_HIT_SPILL_MISS_FETCH_WDV = 2; // block 1 miss, issue read to AHB and wait data.
|
||||
localparam STATE_HIT_SPILL_MISS_FETCH_DONE = 3; // write data into SRAM/LUT
|
||||
localparam STATE_HIT_SPILL_MERGE = 4; // Read block 0 of CPU access, should be able to optimize into STATE_HIT_SPILL.
|
||||
localparam STATE_READY = 'h0;
|
||||
localparam STATE_HIT_SPILL = 'h1; // spill, block 0 hit
|
||||
localparam STATE_HIT_SPILL_MISS_FETCH_WDV = 'h2; // block 1 miss, issue read to AHB and wait data.
|
||||
localparam STATE_HIT_SPILL_MISS_FETCH_DONE = 'h3; // write data into SRAM/LUT
|
||||
localparam STATE_HIT_SPILL_MERGE = 'h4; // Read block 0 of CPU access, should be able to optimize into STATE_HIT_SPILL.
|
||||
|
||||
// a challenge is the spill signal gets us out of the ready state and moves us to
|
||||
// 1 of the 2 spill branches. However the original fsm design had us return to
|
||||
@ -87,28 +91,32 @@ module ICacheCntrl #(parameter BLOCKLEN = 256) (
|
||||
// between CPU stalling and that register.
|
||||
// Picking option 1.
|
||||
|
||||
localparam STATE_HIT_SPILL_FINAL = 5; // this state replicates STATE_READY's replay of the
|
||||
localparam STATE_HIT_SPILL_FINAL = 'h5; // this state replicates STATE_READY's replay of the
|
||||
// spill access but does nto consider spill. It also does not do another operation.
|
||||
|
||||
|
||||
localparam STATE_MISS_FETCH_WDV = 6; // aligned miss, issue read to AHB and wait for data.
|
||||
localparam STATE_MISS_FETCH_DONE = 7; // write data into SRAM/LUT
|
||||
localparam STATE_MISS_READ = 8; // read block 1 from SRAM/LUT
|
||||
localparam STATE_MISS_FETCH_WDV = 'h6; // aligned miss, issue read to AHB and wait for data.
|
||||
localparam STATE_MISS_FETCH_DONE = 'h7; // write data into SRAM/LUT
|
||||
localparam STATE_MISS_READ = 'h8; // read block 1 from SRAM/LUT
|
||||
|
||||
localparam STATE_MISS_SPILL_FETCH_WDV = 9; // spill, miss on block 0, issue read to AHB and wait
|
||||
localparam STATE_MISS_SPILL_FETCH_DONE = 10; // write data into SRAM/LUT
|
||||
localparam STATE_MISS_SPILL_READ1 = 11; // read block 0 from SRAM/LUT
|
||||
localparam STATE_MISS_SPILL_2 = 12; // return to ready if hit or do second block update.
|
||||
localparam STATE_MISS_SPILL_2_START = 13; // return to ready if hit or do second block update.
|
||||
localparam STATE_MISS_SPILL_MISS_FETCH_WDV = 14; // miss on block 1, issue read to AHB and wait
|
||||
localparam STATE_MISS_SPILL_MISS_FETCH_DONE = 15; // write data to SRAM/LUT
|
||||
localparam STATE_MISS_SPILL_MERGE = 16; // read block 0 of CPU access,
|
||||
localparam STATE_MISS_SPILL_FETCH_WDV = 'h9; // spill, miss on block 0, issue read to AHB and wait
|
||||
localparam STATE_MISS_SPILL_FETCH_DONE = 'ha; // write data into SRAM/LUT
|
||||
localparam STATE_MISS_SPILL_READ1 = 'hb; // read block 0 from SRAM/LUT
|
||||
localparam STATE_MISS_SPILL_2 = 'hc; // return to ready if hit or do second block update.
|
||||
localparam STATE_MISS_SPILL_2_START = 'hd; // return to ready if hit or do second block update.
|
||||
localparam STATE_MISS_SPILL_MISS_FETCH_WDV = 'he; // miss on block 1, issue read to AHB and wait
|
||||
localparam STATE_MISS_SPILL_MISS_FETCH_DONE = 'hf; // write data to SRAM/LUT
|
||||
localparam STATE_MISS_SPILL_MERGE = 'h10; // read block 0 of CPU access,
|
||||
|
||||
localparam STATE_MISS_SPILL_FINAL = 17; // this state replicates STATE_READY's replay of the
|
||||
localparam STATE_MISS_SPILL_FINAL = 'h11; // this state replicates STATE_READY's replay of the
|
||||
// spill access but does nto consider spill. It also does not do another operation.
|
||||
|
||||
|
||||
localparam STATE_INVALIDATE = 18; // *** not sure if invalidate or evict? invalidate by cache block or address?
|
||||
localparam STATE_INVALIDATE = 'h12; // *** not sure if invalidate or evict? invalidate by cache block or address?
|
||||
localparam STATE_TLB_MISS = 'h13;
|
||||
localparam STATE_TLB_MISS_DONE = 'h14;
|
||||
|
||||
|
||||
|
||||
localparam AHBByteLength = `XLEN / 8;
|
||||
localparam AHBOFFETWIDTH = $clog2(AHBByteLength);
|
||||
@ -119,39 +127,39 @@ module ICacheCntrl #(parameter BLOCKLEN = 256) (
|
||||
|
||||
localparam WORDSPERLINE = BLOCKLEN/`XLEN;
|
||||
localparam LOGWPL = $clog2(WORDSPERLINE);
|
||||
localparam integer PA_WIDTH = `PA_BITS - 2;
|
||||
localparam integer PA_WIDTH = `PA_BITS - 2;
|
||||
|
||||
|
||||
logic [4:0] CurrState, NextState;
|
||||
logic hit, spill;
|
||||
logic SavePC;
|
||||
logic [1:0] PCMux;
|
||||
logic CntReset;
|
||||
logic PreCntEn, CntEn;
|
||||
logic spillSave;
|
||||
logic UnalignedSelect;
|
||||
logic FetchCountFlag;
|
||||
logic [4:0] CurrState, NextState;
|
||||
logic hit, spill;
|
||||
logic SavePC;
|
||||
logic [1:0] PCMux;
|
||||
logic CntReset;
|
||||
logic PreCntEn, CntEn;
|
||||
logic spillSave;
|
||||
logic UnalignedSelect;
|
||||
logic FetchCountFlag;
|
||||
localparam FetchCountThreshold = WORDSPERLINE - 1;
|
||||
|
||||
logic [LOGWPL:0] FetchCount, NextFetchCount;
|
||||
logic [LOGWPL:0] FetchCount, NextFetchCount;
|
||||
|
||||
logic [`PA_BITS-1:0] PCPreFinalF, PCPSpillF;
|
||||
logic [`PA_BITS-1:0] PCPreFinalF, PCPSpillF;
|
||||
logic [`PA_BITS-1:OFFSETWIDTH] PCPTrunkF;
|
||||
|
||||
|
||||
logic [15:0] SpillDataBlock0;
|
||||
logic [15:0] SpillDataBlock0;
|
||||
|
||||
localparam [31:0] NOP = 32'h13;
|
||||
|
||||
logic reset_q;
|
||||
logic [1:0] PCMux_q;
|
||||
logic reset_q;
|
||||
logic [1:0] PCMux_q;
|
||||
|
||||
|
||||
// Misaligned signals
|
||||
//logic [`XLEN:0] MisalignedInstrRawF;
|
||||
//logic MisalignedStall;
|
||||
// Cache fault signals
|
||||
//logic FaultStall;
|
||||
// Misaligned signals
|
||||
//logic [`XLEN:0] MisalignedInstrRawF;
|
||||
//logic MisalignedStall;
|
||||
// Cache fault signals
|
||||
//logic FaultStall;
|
||||
|
||||
// on spill we want to get the first 2 bytes of the next cache block.
|
||||
// the spill only occurs if the PCPF mod BlockByteLength == -2. Therefore we can
|
||||
@ -175,7 +183,7 @@ module ICacheCntrl #(parameter BLOCKLEN = 256) (
|
||||
// truncate the offset from PCPF for memory address generation
|
||||
assign PCPTrunkF = PCTagF[`PA_BITS-1:OFFSETWIDTH];
|
||||
|
||||
// Detect if the instruction is compressed
|
||||
// Detect if the instruction is compressed
|
||||
assign CompressedF = FinalInstrRawF[1:0] != 2'b11;
|
||||
|
||||
|
||||
@ -205,167 +213,175 @@ module ICacheCntrl #(parameter BLOCKLEN = 256) (
|
||||
ICacheStallF = 1'b1;
|
||||
|
||||
case (CurrState)
|
||||
|
||||
STATE_READY: begin
|
||||
PCMux = 2'b00;
|
||||
ICacheReadEn = 1'b1;
|
||||
if (hit & ~spill) begin
|
||||
SavePC = 1'b1;
|
||||
ICacheStallF = 1'b0;
|
||||
NextState = STATE_READY;
|
||||
end else if (hit & spill) begin
|
||||
spillSave = 1'b1;
|
||||
PCMux = 2'b10;
|
||||
NextState = STATE_HIT_SPILL;
|
||||
end else if (~hit & ~spill) begin
|
||||
CntReset = 1'b1;
|
||||
NextState = STATE_MISS_FETCH_WDV;
|
||||
end else if (~hit & spill) begin
|
||||
CntReset = 1'b1;
|
||||
PCMux = 2'b01;
|
||||
NextState = STATE_MISS_SPILL_FETCH_WDV;
|
||||
end else begin
|
||||
PCMux = 2'b00;
|
||||
ICacheReadEn = 1'b1;
|
||||
if (ITLBMissF) begin
|
||||
NextState = STATE_TLB_MISS;
|
||||
end else if (hit & ~spill) begin
|
||||
SavePC = 1'b1;
|
||||
ICacheStallF = 1'b0;
|
||||
NextState = STATE_READY;
|
||||
end
|
||||
end else if (hit & spill) begin
|
||||
spillSave = 1'b1;
|
||||
PCMux = 2'b10;
|
||||
NextState = STATE_HIT_SPILL;
|
||||
end else if (~hit & ~spill) begin
|
||||
CntReset = 1'b1;
|
||||
NextState = STATE_MISS_FETCH_WDV;
|
||||
end else if (~hit & spill) begin
|
||||
CntReset = 1'b1;
|
||||
PCMux = 2'b01;
|
||||
NextState = STATE_MISS_SPILL_FETCH_WDV;
|
||||
end else begin
|
||||
NextState = STATE_READY;
|
||||
end
|
||||
end
|
||||
|
||||
// branch 1, hit spill and 2, miss spill hit
|
||||
STATE_HIT_SPILL: begin
|
||||
PCMux = 2'b10;
|
||||
UnalignedSelect = 1'b1;
|
||||
ICacheReadEn = 1'b1;
|
||||
if (hit) begin
|
||||
PCMux = 2'b10;
|
||||
UnalignedSelect = 1'b1;
|
||||
ICacheReadEn = 1'b1;
|
||||
if (hit) begin
|
||||
NextState = STATE_HIT_SPILL_FINAL;
|
||||
end else begin
|
||||
CntReset = 1'b1;
|
||||
end else begin
|
||||
CntReset = 1'b1;
|
||||
NextState = STATE_HIT_SPILL_MISS_FETCH_WDV;
|
||||
end
|
||||
end
|
||||
end
|
||||
STATE_HIT_SPILL_MISS_FETCH_WDV: begin
|
||||
PCMux = 2'b10;
|
||||
//InstrReadF = 1'b1;
|
||||
PreCntEn = 1'b1;
|
||||
if (FetchCountFlag & InstrAckF) begin
|
||||
NextState = STATE_HIT_SPILL_MISS_FETCH_DONE;
|
||||
end else begin
|
||||
NextState = STATE_HIT_SPILL_MISS_FETCH_WDV;
|
||||
end
|
||||
PCMux = 2'b10;
|
||||
//InstrReadF = 1'b1;
|
||||
PreCntEn = 1'b1;
|
||||
if (FetchCountFlag & InstrAckF) begin
|
||||
NextState = STATE_HIT_SPILL_MISS_FETCH_DONE;
|
||||
end else begin
|
||||
NextState = STATE_HIT_SPILL_MISS_FETCH_WDV;
|
||||
end
|
||||
end
|
||||
STATE_HIT_SPILL_MISS_FETCH_DONE: begin
|
||||
PCMux = 2'b10;
|
||||
ICacheMemWriteEnable = 1'b1;
|
||||
PCMux = 2'b10;
|
||||
ICacheMemWriteEnable = 1'b1;
|
||||
NextState = STATE_HIT_SPILL_MERGE;
|
||||
end
|
||||
STATE_HIT_SPILL_MERGE: begin
|
||||
PCMux = 2'b10;
|
||||
UnalignedSelect = 1'b1;
|
||||
ICacheReadEn = 1'b1;
|
||||
PCMux = 2'b10;
|
||||
UnalignedSelect = 1'b1;
|
||||
ICacheReadEn = 1'b1;
|
||||
NextState = STATE_HIT_SPILL_FINAL;
|
||||
end
|
||||
STATE_HIT_SPILL_FINAL: begin
|
||||
ICacheReadEn = 1'b1;
|
||||
PCMux = 2'b00;
|
||||
UnalignedSelect = 1'b1;
|
||||
SavePC = 1'b1;
|
||||
NextState = STATE_READY;
|
||||
ICacheStallF = 1'b0;
|
||||
ICacheReadEn = 1'b1;
|
||||
PCMux = 2'b00;
|
||||
UnalignedSelect = 1'b1;
|
||||
SavePC = 1'b1;
|
||||
NextState = STATE_READY;
|
||||
ICacheStallF = 1'b0;
|
||||
end
|
||||
|
||||
// branch 3 miss no spill
|
||||
STATE_MISS_FETCH_WDV: begin
|
||||
PCMux = 2'b01;
|
||||
//InstrReadF = 1'b1;
|
||||
PreCntEn = 1'b1;
|
||||
if (FetchCountFlag & InstrAckF) begin
|
||||
NextState = STATE_MISS_FETCH_DONE;
|
||||
end else begin
|
||||
NextState = STATE_MISS_FETCH_WDV;
|
||||
end
|
||||
PCMux = 2'b01;
|
||||
//InstrReadF = 1'b1;
|
||||
PreCntEn = 1'b1;
|
||||
if (FetchCountFlag & InstrAckF) begin
|
||||
NextState = STATE_MISS_FETCH_DONE;
|
||||
end else begin
|
||||
NextState = STATE_MISS_FETCH_WDV;
|
||||
end
|
||||
end
|
||||
STATE_MISS_FETCH_DONE: begin
|
||||
PCMux = 2'b01;
|
||||
ICacheMemWriteEnable = 1'b1;
|
||||
PCMux = 2'b01;
|
||||
ICacheMemWriteEnable = 1'b1;
|
||||
NextState = STATE_MISS_READ;
|
||||
end
|
||||
STATE_MISS_READ: begin
|
||||
PCMux = 2'b01;
|
||||
ICacheReadEn = 1'b1;
|
||||
NextState = STATE_READY;
|
||||
PCMux = 2'b01;
|
||||
ICacheReadEn = 1'b1;
|
||||
NextState = STATE_READY;
|
||||
end
|
||||
|
||||
// branch 4 miss spill hit, and 5 miss spill miss
|
||||
STATE_MISS_SPILL_FETCH_WDV: begin
|
||||
PCMux = 2'b01;
|
||||
PreCntEn = 1'b1;
|
||||
//InstrReadF = 1'b1;
|
||||
if (FetchCountFlag & InstrAckF) begin
|
||||
NextState = STATE_MISS_SPILL_FETCH_DONE;
|
||||
end else begin
|
||||
NextState = STATE_MISS_SPILL_FETCH_WDV;
|
||||
end
|
||||
PCMux = 2'b01;
|
||||
PreCntEn = 1'b1;
|
||||
//InstrReadF = 1'b1;
|
||||
if (FetchCountFlag & InstrAckF) begin
|
||||
NextState = STATE_MISS_SPILL_FETCH_DONE;
|
||||
end else begin
|
||||
NextState = STATE_MISS_SPILL_FETCH_WDV;
|
||||
end
|
||||
end
|
||||
STATE_MISS_SPILL_FETCH_DONE: begin
|
||||
PCMux = 2'b01;
|
||||
ICacheMemWriteEnable = 1'b1;
|
||||
NextState = STATE_MISS_SPILL_READ1;
|
||||
PCMux = 2'b01;
|
||||
ICacheMemWriteEnable = 1'b1;
|
||||
NextState = STATE_MISS_SPILL_READ1;
|
||||
end
|
||||
STATE_MISS_SPILL_READ1: begin // always be a hit as we just wrote that cache block.
|
||||
PCMux = 2'b01; // there is a 1 cycle delay after setting the address before the date arrives.
|
||||
ICacheReadEn = 1'b1;
|
||||
NextState = STATE_MISS_SPILL_2;
|
||||
PCMux = 2'b01; // there is a 1 cycle delay after setting the address before the date arrives.
|
||||
ICacheReadEn = 1'b1;
|
||||
NextState = STATE_MISS_SPILL_2;
|
||||
end
|
||||
STATE_MISS_SPILL_2: begin
|
||||
PCMux = 2'b10;
|
||||
UnalignedSelect = 1'b1;
|
||||
spillSave = 1'b1; /// *** Could pipeline these to make it clearer in the fsm.
|
||||
ICacheReadEn = 1'b1;
|
||||
NextState = STATE_MISS_SPILL_2_START;
|
||||
PCMux = 2'b10;
|
||||
UnalignedSelect = 1'b1;
|
||||
spillSave = 1'b1; /// *** Could pipeline these to make it clearer in the fsm.
|
||||
ICacheReadEn = 1'b1;
|
||||
NextState = STATE_MISS_SPILL_2_START;
|
||||
end
|
||||
STATE_MISS_SPILL_2_START: begin
|
||||
if (~hit) begin
|
||||
CntReset = 1'b1;
|
||||
NextState = STATE_MISS_SPILL_MISS_FETCH_WDV;
|
||||
end else begin
|
||||
NextState = STATE_READY;
|
||||
ICacheReadEn = 1'b1;
|
||||
PCMux = 2'b00;
|
||||
UnalignedSelect = 1'b1;
|
||||
SavePC = 1'b1;
|
||||
ICacheStallF = 1'b0;
|
||||
end
|
||||
if (~hit) begin
|
||||
CntReset = 1'b1;
|
||||
NextState = STATE_MISS_SPILL_MISS_FETCH_WDV;
|
||||
end else begin
|
||||
NextState = STATE_READY;
|
||||
ICacheReadEn = 1'b1;
|
||||
PCMux = 2'b00;
|
||||
UnalignedSelect = 1'b1;
|
||||
SavePC = 1'b1;
|
||||
ICacheStallF = 1'b0;
|
||||
end
|
||||
end
|
||||
STATE_MISS_SPILL_MISS_FETCH_WDV: begin
|
||||
PCMux = 2'b10;
|
||||
PreCntEn = 1'b1;
|
||||
//InstrReadF = 1'b1;
|
||||
if (FetchCountFlag & InstrAckF) begin
|
||||
NextState = STATE_MISS_SPILL_MISS_FETCH_DONE;
|
||||
end else begin
|
||||
NextState = STATE_MISS_SPILL_MISS_FETCH_WDV;
|
||||
end
|
||||
PCMux = 2'b10;
|
||||
PreCntEn = 1'b1;
|
||||
//InstrReadF = 1'b1;
|
||||
if (FetchCountFlag & InstrAckF) begin
|
||||
NextState = STATE_MISS_SPILL_MISS_FETCH_DONE;
|
||||
end else begin
|
||||
NextState = STATE_MISS_SPILL_MISS_FETCH_WDV;
|
||||
end
|
||||
end
|
||||
STATE_MISS_SPILL_MISS_FETCH_DONE: begin
|
||||
PCMux = 2'b10;
|
||||
ICacheMemWriteEnable = 1'b1;
|
||||
NextState = STATE_MISS_SPILL_MERGE;
|
||||
PCMux = 2'b10;
|
||||
ICacheMemWriteEnable = 1'b1;
|
||||
NextState = STATE_MISS_SPILL_MERGE;
|
||||
end
|
||||
STATE_MISS_SPILL_MERGE: begin
|
||||
PCMux = 2'b10;
|
||||
UnalignedSelect = 1'b1;
|
||||
ICacheReadEn = 1'b1;
|
||||
PCMux = 2'b10;
|
||||
UnalignedSelect = 1'b1;
|
||||
ICacheReadEn = 1'b1;
|
||||
NextState = STATE_MISS_SPILL_FINAL;
|
||||
end
|
||||
STATE_MISS_SPILL_FINAL: begin
|
||||
ICacheReadEn = 1'b1;
|
||||
PCMux = 2'b00;
|
||||
UnalignedSelect = 1'b1;
|
||||
SavePC = 1'b1;
|
||||
ICacheStallF = 1'b0;
|
||||
NextState = STATE_READY;
|
||||
ICacheReadEn = 1'b1;
|
||||
PCMux = 2'b00;
|
||||
UnalignedSelect = 1'b1;
|
||||
SavePC = 1'b1;
|
||||
ICacheStallF = 1'b0;
|
||||
NextState = STATE_READY;
|
||||
end
|
||||
STATE_TLB_MISS: begin
|
||||
if (ITLBWriteF | WalkerInstrPageFaultF) begin
|
||||
NextState = STATE_TLB_MISS_DONE;
|
||||
end else begin
|
||||
NextState = STATE_TLB_MISS;
|
||||
end
|
||||
end
|
||||
STATE_TLB_MISS_DONE : begin
|
||||
NextState = STATE_READY;
|
||||
end
|
||||
default: begin
|
||||
PCMux = 2'b01;
|
||||
NextState = STATE_READY;
|
||||
PCMux = 2'b01;
|
||||
NextState = STATE_READY;
|
||||
end
|
||||
// *** add in error handling and invalidate/evict
|
||||
endcase
|
||||
@ -407,7 +423,7 @@ module ICacheCntrl #(parameter BLOCKLEN = 256) (
|
||||
|
||||
|
||||
// store read data from memory interface before writing into SRAM.
|
||||
genvar i;
|
||||
genvar i;
|
||||
generate
|
||||
for (i = 0; i < WORDSPERLINE; i++) begin
|
||||
flopenr #(`XLEN) flop(.clk(clk),
|
||||
|
22
wally-pipelined/src/cache/icache.sv
vendored
22
wally-pipelined/src/cache/icache.sv
vendored
@ -28,24 +28,28 @@
|
||||
module icache
|
||||
(
|
||||
// Basic pipeline stuff
|
||||
input logic clk, reset,
|
||||
input logic StallF, StallD,
|
||||
input logic FlushD,
|
||||
input logic clk, reset,
|
||||
input logic StallF, StallD,
|
||||
input logic FlushD,
|
||||
input logic [`PA_BITS-1:0] PCNextF,
|
||||
input logic [`PA_BITS-1:0] PCPF,
|
||||
// Data read in from the ebu unit
|
||||
input logic [`XLEN-1:0] InstrInF,
|
||||
input logic InstrAckF,
|
||||
input logic [`XLEN-1:0] InstrInF,
|
||||
input logic InstrAckF,
|
||||
// Read requested from the ebu unit
|
||||
output logic [`PA_BITS-1:0] InstrPAdrF,
|
||||
output logic InstrReadF,
|
||||
output logic InstrReadF,
|
||||
// High if the instruction currently in the fetch stage is compressed
|
||||
output logic CompressedF,
|
||||
output logic CompressedF,
|
||||
// High if the icache is requesting a stall
|
||||
output logic ICacheStallF,
|
||||
output logic ICacheStallF,
|
||||
input logic ITLBMissF,
|
||||
input logic ITLBWriteF,
|
||||
input logic WalkerInstrPageFaultF,
|
||||
|
||||
// The raw (not decompressed) instruction that was requested
|
||||
// If this instruction is compressed, upper 16 bits may be the next 16 bits or may be zeros
|
||||
output logic [31:0] FinalInstrRawF
|
||||
output logic [31:0] FinalInstrRawF
|
||||
);
|
||||
|
||||
// Configuration parameters
|
||||
|
@ -51,18 +51,20 @@ module ahblite (
|
||||
input logic MemReadM, MemWriteM,
|
||||
input logic [`XLEN-1:0] WriteDataM,
|
||||
input logic [1:0] MemSizeM,
|
||||
//output logic DataStall,
|
||||
// Signals from MMU
|
||||
/* -----\/----- EXCLUDED -----\/-----
|
||||
input logic MMUStall,
|
||||
input logic [`XLEN-1:0] MMUPAdr,
|
||||
input logic MMUTranslate,
|
||||
output logic [`XLEN-1:0] MMUReadPTE,
|
||||
output logic MMUReady,
|
||||
-----/\----- EXCLUDED -----/\----- */
|
||||
// Signals from PMA checker
|
||||
input logic DSquashBusAccessM, ISquashBusAccessF,
|
||||
// Signals to PMA checker (metadata of proposed access)
|
||||
output logic AtomicAccessM, ExecuteAccessF, WriteAccessM, ReadAccessM,
|
||||
// Return from bus
|
||||
output logic [`XLEN-1:0] ReadDataW,
|
||||
output logic [`XLEN-1:0] HRDATAW,
|
||||
// AHB-Lite external signals
|
||||
input logic [`AHBW-1:0] HRDATA,
|
||||
input logic HREADY, HRESP,
|
||||
@ -80,14 +82,13 @@ module ahblite (
|
||||
output logic [3:0] HSIZED,
|
||||
output logic HWRITED,
|
||||
// Stalls
|
||||
output logic /*InstrUpdate, */DataStall,
|
||||
output logic CommitM, MemAckW
|
||||
);
|
||||
|
||||
logic GrantData;
|
||||
logic [31:0] AccessAddress;
|
||||
logic [2:0] AccessSize, PTESize, ISize;
|
||||
logic [`AHBW-1:0] HRDATAMasked, ReadDataM, CapturedData, ReadDataWnext, WriteData;
|
||||
logic [`AHBW-1:0] HRDATAMasked, ReadDataM, CapturedHRDATAMasked, HRDATANext, WriteData;
|
||||
logic IReady, DReady;
|
||||
logic CaptureDataM,CapturedDataAvailable;
|
||||
|
||||
@ -115,14 +116,16 @@ module ahblite (
|
||||
// interface that might be used in place of the ahblite.
|
||||
always_comb
|
||||
case (BusState)
|
||||
IDLE: if (MMUTranslate) ProposedNextBusState = MMUTRANSLATE;
|
||||
else if (AtomicMaskedM[1]) ProposedNextBusState = ATOMICREAD;
|
||||
IDLE: /*if (MMUTranslate) ProposedNextBusState = MMUTRANSLATE;
|
||||
else*/ if (AtomicMaskedM[1]) ProposedNextBusState = ATOMICREAD;
|
||||
else if (MemReadM) ProposedNextBusState = MEMREAD; // Memory has priority over instructions
|
||||
else if (MemWriteM) ProposedNextBusState = MEMWRITE;
|
||||
else if (InstrReadF) ProposedNextBusState = INSTRREAD;
|
||||
else ProposedNextBusState = IDLE;
|
||||
/* -----\/----- EXCLUDED -----\/-----
|
||||
MMUTRANSLATE: if (~HREADY) ProposedNextBusState = MMUTRANSLATE;
|
||||
else ProposedNextBusState = IDLE;
|
||||
-----/\----- EXCLUDED -----/\----- */
|
||||
ATOMICREAD: if (~HREADY) ProposedNextBusState = ATOMICREAD;
|
||||
else ProposedNextBusState = ATOMICWRITE;
|
||||
ATOMICWRITE: if (~HREADY) ProposedNextBusState = ATOMICWRITE;
|
||||
@ -140,21 +143,21 @@ module ahblite (
|
||||
endcase
|
||||
|
||||
// Determine access type (important for determining whether to fault)
|
||||
assign AtomicAccessM = (ProposedNextBusState == ATOMICREAD) || (ProposedNextBusState == ATOMICWRITE);
|
||||
assign ExecuteAccessF = (ProposedNextBusState == INSTRREAD);
|
||||
assign WriteAccessM = (ProposedNextBusState == MEMWRITE) || (ProposedNextBusState == ATOMICWRITE);
|
||||
assign ReadAccessM = (ProposedNextBusState == MEMREAD) || (ProposedNextBusState == ATOMICREAD) ||
|
||||
(ProposedNextBusState == MMUTRANSLATE);
|
||||
// (ProposedNextBusState == MMUTRANSLATE);
|
||||
|
||||
// The PMA and PMP checkers can decide to squash the access
|
||||
// *** this probably needs to be controlled by the caches rather than EBU dh 7/2/11
|
||||
assign NextBusState = (DSquashBusAccessM || ISquashBusAccessF) ? IDLE : ProposedNextBusState;
|
||||
|
||||
// stall signals
|
||||
// Note that we need to extend both stalls when MMUTRANSLATE goes to idle,
|
||||
// since translation might not be complete.
|
||||
// *** Ross Thompson remove this datastall
|
||||
/* -----\/----- EXCLUDED -----\/-----
|
||||
assign #2 DataStall = ((NextBusState == MEMREAD) || (NextBusState == MEMWRITE) ||
|
||||
(NextBusState == ATOMICREAD) || (NextBusState == ATOMICWRITE) ||
|
||||
MMUStall);
|
||||
(NextBusState == ATOMICREAD) || (NextBusState == ATOMICWRITE));
|
||||
-----/\----- EXCLUDED -----/\----- */
|
||||
|
||||
|
||||
//assign #1 InstrStall = ((NextBusState == INSTRREAD) || (NextBusState == INSTRREADC) ||
|
||||
// MMUStall);
|
||||
@ -163,14 +166,16 @@ module ahblite (
|
||||
assign #1 GrantData = (ProposedNextBusState == MEMREAD) || (ProposedNextBusState == MEMWRITE) ||
|
||||
(ProposedNextBusState == ATOMICREAD) || (ProposedNextBusState == ATOMICWRITE);
|
||||
assign #1 AccessAddress = (GrantData) ? MemPAdrM[31:0] : InstrPAdrF[31:0];
|
||||
assign #1 HADDR = (MMUTranslate) ? MMUPAdr[31:0] : AccessAddress;
|
||||
//assign #1 HADDR = (MMUTranslate) ? MMUPAdr[31:0] : AccessAddress;
|
||||
assign #1 HADDR = AccessAddress;
|
||||
generate
|
||||
if (`XLEN == 32) assign PTESize = 3'b010; // in rv32, PTEs are 4 bytes
|
||||
else assign PTESize = 3'b011; // in rv64, PTEs are 8 bytes
|
||||
endgenerate
|
||||
assign ISize = 3'b010; // 32 bit instructions for now; later improve for filling cache with full width; ignored on reads anyway
|
||||
assign #1 AccessSize = (GrantData) ? {1'b0, MemSizeM} : ISize;
|
||||
assign #1 HSIZE = (MMUTranslate) ? PTESize : AccessSize;
|
||||
//assign #1 HSIZE = (MMUTranslate) ? PTESize : AccessSize;
|
||||
assign #1 HSIZE = AccessSize;
|
||||
assign HBURST = 3'b000; // Single burst only supported; consider generalizing for cache fillsfH
|
||||
assign HPROT = 4'b0011; // not used; see Section 3.7
|
||||
assign HTRANS = (NextBusState != IDLE) ? 2'b10 : 2'b00; // NONSEQ if reading or writing, IDLE otherwise
|
||||
@ -186,7 +191,7 @@ module ahblite (
|
||||
// Route signals to Instruction and Data Caches
|
||||
// *** assumes AHBW = XLEN
|
||||
|
||||
assign MMUReady = (BusState == MMUTRANSLATE && HREADY);
|
||||
//assign MMUReady = (BusState == MMUTRANSLATE && HREADY);
|
||||
|
||||
assign InstrRData = HRDATA;
|
||||
assign InstrAckF = (BusState == INSTRREAD) && (NextBusState != INSTRREAD);
|
||||
@ -194,15 +199,14 @@ module ahblite (
|
||||
// *** Bracker 6/5/21: why is this W stage?
|
||||
assign MemAckW = (BusState == MEMREAD) && (NextBusState != MEMREAD) || (BusState == MEMWRITE) && (NextBusState != MEMWRITE) ||
|
||||
((BusState == ATOMICREAD) && (NextBusState != ATOMICREAD)) || ((BusState == ATOMICWRITE) && (NextBusState != ATOMICWRITE));
|
||||
assign MMUReadPTE = HRDATA;
|
||||
assign ReadDataM = HRDATAMasked; // changed from W to M dh 2/7/2021
|
||||
//assign MMUReadPTE = HRDATA;
|
||||
// Carefully decide when to update ReadDataW
|
||||
// ReadDataMstored holds the most recent memory read.
|
||||
// We need to wait until the pipeline actually advances before we can update the contents of ReadDataW
|
||||
// (or else the W stage will accidentally get the M stage's data when the pipeline does advance).
|
||||
assign CaptureDataM = ((BusState == MEMREAD) && (NextBusState != MEMREAD)) ||
|
||||
((BusState == ATOMICREAD) && (NextBusState != ATOMICREAD));
|
||||
flopenr #(`XLEN) ReadDataNewWReg(clk, reset, CaptureDataM, ReadDataM, CapturedData);
|
||||
flopenr #(`XLEN) ReadDataNewWReg(clk, reset, CaptureDataM, HRDATAMasked, CapturedHRDATAMasked);
|
||||
|
||||
always @(posedge HCLK, negedge HRESETn)
|
||||
if (~HRESETn)
|
||||
@ -211,11 +215,11 @@ module ahblite (
|
||||
CapturedDataAvailable <= #1 (StallW) ? (CaptureDataM | CapturedDataAvailable) : 1'b0;
|
||||
always_comb
|
||||
casez({StallW && (BusState != ATOMICREAD),CapturedDataAvailable})
|
||||
2'b00: ReadDataWnext = ReadDataM;
|
||||
2'b01: ReadDataWnext = CapturedData;
|
||||
2'b1?: ReadDataWnext = ReadDataW;
|
||||
2'b00: HRDATANext = HRDATAMasked;
|
||||
2'b01: HRDATANext = CapturedHRDATAMasked;
|
||||
2'b1?: HRDATANext = HRDATAW;
|
||||
endcase
|
||||
flopr #(`XLEN) ReadDataOldWReg(clk, reset, ReadDataWnext, ReadDataW);
|
||||
flopr #(`XLEN) ReadDataOldWReg(clk, reset, HRDATANext, HRDATAW);
|
||||
|
||||
// Extract and sign-extend subwords if necessary
|
||||
subwordread swr(.*);
|
||||
@ -226,7 +230,7 @@ module ahblite (
|
||||
logic [`XLEN-1:0] AMOResult;
|
||||
// amoalu amoalu(.a(HRDATA), .b(WriteDataM), .funct(Funct7M), .width(MemSizeM),
|
||||
// .result(AMOResult));
|
||||
amoalu amoalu(.srca(ReadDataW), .srcb(WriteDataM), .funct(Funct7M), .width(MemSizeM),
|
||||
amoalu amoalu(.srca(HRDATAW), .srcb(WriteDataM), .funct(Funct7M), .width(MemSizeM),
|
||||
.result(AMOResult));
|
||||
mux2 #(`XLEN) wdmux(WriteDataM, AMOResult, AtomicMaskedM[1], WriteData);
|
||||
end else
|
||||
|
@ -1,599 +0,0 @@
|
||||
// Brent-Kung Carry-save Prefix Adder
|
||||
|
||||
module bk128 (cout, sum, a, b, cin);
|
||||
|
||||
input [127:0] a, b;
|
||||
input cin;
|
||||
|
||||
output [127:0] sum;
|
||||
output cout;
|
||||
|
||||
wire [128:0] p,g,t;
|
||||
wire [127:0] c;
|
||||
|
||||
// pre-computation
|
||||
assign p={a^b,1'b0};
|
||||
assign g={a&b, cin};
|
||||
assign t[1]=p[1];
|
||||
assign t[2]=p[2];
|
||||
assign t[3]=p[3]^g[2];
|
||||
assign t[4]=p[4];
|
||||
assign t[5]=p[5]^g[4];
|
||||
assign t[6]=p[6];
|
||||
assign t[7]=p[7]^g[6];
|
||||
assign t[8]=p[8];
|
||||
assign t[9]=p[9]^g[8];
|
||||
assign t[10]=p[10];
|
||||
assign t[11]=p[11]^g[10];
|
||||
assign t[12]=p[12];
|
||||
assign t[13]=p[13]^g[12];
|
||||
assign t[14]=p[14];
|
||||
assign t[15]=p[15]^g[14];
|
||||
assign t[16]=p[16];
|
||||
assign t[17]=p[17]^g[16];
|
||||
assign t[18]=p[18];
|
||||
assign t[19]=p[19]^g[18];
|
||||
assign t[20]=p[20];
|
||||
assign t[21]=p[21]^g[20];
|
||||
assign t[22]=p[22];
|
||||
assign t[23]=p[23]^g[22];
|
||||
assign t[24]=p[24];
|
||||
assign t[25]=p[25]^g[24];
|
||||
assign t[26]=p[26];
|
||||
assign t[27]=p[27]^g[26];
|
||||
assign t[28]=p[28];
|
||||
assign t[29]=p[29]^g[28];
|
||||
assign t[30]=p[30];
|
||||
assign t[31]=p[31]^g[30];
|
||||
assign t[32]=p[32];
|
||||
assign t[33]=p[33]^g[32];
|
||||
assign t[34]=p[34];
|
||||
assign t[35]=p[35]^g[34];
|
||||
assign t[36]=p[36];
|
||||
assign t[37]=p[37]^g[36];
|
||||
assign t[38]=p[38];
|
||||
assign t[39]=p[39]^g[38];
|
||||
assign t[40]=p[40];
|
||||
assign t[41]=p[41]^g[40];
|
||||
assign t[42]=p[42];
|
||||
assign t[43]=p[43]^g[42];
|
||||
assign t[44]=p[44];
|
||||
assign t[45]=p[45]^g[44];
|
||||
assign t[46]=p[46];
|
||||
assign t[47]=p[47]^g[46];
|
||||
assign t[48]=p[48];
|
||||
assign t[49]=p[49]^g[48];
|
||||
assign t[50]=p[50];
|
||||
assign t[51]=p[51]^g[50];
|
||||
assign t[52]=p[52];
|
||||
assign t[53]=p[53]^g[52];
|
||||
assign t[54]=p[54];
|
||||
assign t[55]=p[55]^g[54];
|
||||
assign t[56]=p[56];
|
||||
assign t[57]=p[57]^g[56];
|
||||
assign t[58]=p[58];
|
||||
assign t[59]=p[59]^g[58];
|
||||
assign t[60]=p[60];
|
||||
assign t[61]=p[61]^g[60];
|
||||
assign t[62]=p[62];
|
||||
assign t[63]=p[63]^g[62];
|
||||
assign t[64]=p[64];
|
||||
assign t[65]=p[65]^g[64];
|
||||
assign t[66]=p[66];
|
||||
assign t[67]=p[67]^g[66];
|
||||
assign t[68]=p[68];
|
||||
assign t[69]=p[69]^g[68];
|
||||
assign t[70]=p[70];
|
||||
assign t[71]=p[71]^g[70];
|
||||
assign t[72]=p[72];
|
||||
assign t[73]=p[73]^g[72];
|
||||
assign t[74]=p[74];
|
||||
assign t[75]=p[75]^g[74];
|
||||
assign t[76]=p[76];
|
||||
assign t[77]=p[77]^g[76];
|
||||
assign t[78]=p[78];
|
||||
assign t[79]=p[79]^g[78];
|
||||
assign t[80]=p[80];
|
||||
assign t[81]=p[81]^g[80];
|
||||
assign t[82]=p[82];
|
||||
assign t[83]=p[83]^g[82];
|
||||
assign t[84]=p[84];
|
||||
assign t[85]=p[85]^g[84];
|
||||
assign t[86]=p[86];
|
||||
assign t[87]=p[87]^g[86];
|
||||
assign t[88]=p[88];
|
||||
assign t[89]=p[89]^g[88];
|
||||
assign t[90]=p[90];
|
||||
assign t[91]=p[91]^g[90];
|
||||
assign t[92]=p[92];
|
||||
assign t[93]=p[93]^g[92];
|
||||
assign t[94]=p[94];
|
||||
assign t[95]=p[95]^g[94];
|
||||
assign t[96]=p[96];
|
||||
assign t[97]=p[97]^g[96];
|
||||
assign t[98]=p[98];
|
||||
assign t[99]=p[99]^g[98];
|
||||
assign t[100]=p[100];
|
||||
assign t[101]=p[101]^g[100];
|
||||
assign t[102]=p[102];
|
||||
assign t[103]=p[103]^g[102];
|
||||
assign t[104]=p[104];
|
||||
assign t[105]=p[105]^g[104];
|
||||
assign t[106]=p[106];
|
||||
assign t[107]=p[107]^g[106];
|
||||
assign t[108]=p[108];
|
||||
assign t[109]=p[109]^g[108];
|
||||
assign t[110]=p[110];
|
||||
assign t[111]=p[111]^g[110];
|
||||
assign t[112]=p[112];
|
||||
assign t[113]=p[113]^g[112];
|
||||
assign t[114]=p[114];
|
||||
assign t[115]=p[115]^g[114];
|
||||
assign t[116]=p[116];
|
||||
assign t[117]=p[117]^g[116];
|
||||
assign t[118]=p[118];
|
||||
assign t[119]=p[119]^g[118];
|
||||
assign t[120]=p[120];
|
||||
assign t[121]=p[121]^g[120];
|
||||
assign t[122]=p[122];
|
||||
assign t[123]=p[123]^g[122];
|
||||
assign t[124]=p[124];
|
||||
assign t[125]=p[125]^g[124];
|
||||
assign t[126]=p[126];
|
||||
assign t[127]=p[127]^g[126];
|
||||
assign t[128]=p[128];
|
||||
|
||||
// prefix tree
|
||||
brent_kung_cs128 prefix_tree(c, p[127:0], g[127:0]);
|
||||
|
||||
// post-computation
|
||||
assign sum=p[128:1]^c;
|
||||
assign cout=g[128]|(p[128]&c[127]);
|
||||
|
||||
endmodule
|
||||
|
||||
module brent_kung_cs128 (c, p, g);
|
||||
|
||||
input [127:0] p;
|
||||
input [127:0] g;
|
||||
output [128:1] c;
|
||||
|
||||
|
||||
// parallel-prefix, Brent-Kung
|
||||
|
||||
// Stage 1: Generates G/P pairs that span 1 bits
|
||||
grey b_1_0 (G_1_0, {g[1],g[0]}, p[1]);
|
||||
black b_3_2 (G_3_2, P_3_2, {g[3],g[2]}, {p[3],p[2]});
|
||||
black b_5_4 (G_5_4, P_5_4, {g[5],g[4]}, {p[5],p[4]});
|
||||
black b_7_6 (G_7_6, P_7_6, {g[7],g[6]}, {p[7],p[6]});
|
||||
black b_9_8 (G_9_8, P_9_8, {g[9],g[8]}, {p[9],p[8]});
|
||||
black b_11_10 (G_11_10, P_11_10, {g[11],g[10]}, {p[11],p[10]});
|
||||
black b_13_12 (G_13_12, P_13_12, {g[13],g[12]}, {p[13],p[12]});
|
||||
black b_15_14 (G_15_14, P_15_14, {g[15],g[14]}, {p[15],p[14]});
|
||||
|
||||
black b_17_16 (G_17_16, P_17_16, {g[17],g[16]}, {p[17],p[16]});
|
||||
black b_19_18 (G_19_18, P_19_18, {g[19],g[18]}, {p[19],p[18]});
|
||||
black b_21_20 (G_21_20, P_21_20, {g[21],g[20]}, {p[21],p[20]});
|
||||
black b_23_22 (G_23_22, P_23_22, {g[23],g[22]}, {p[23],p[22]});
|
||||
black b_25_24 (G_25_24, P_25_24, {g[25],g[24]}, {p[25],p[24]});
|
||||
black b_27_26 (G_27_26, P_27_26, {g[27],g[26]}, {p[27],p[26]});
|
||||
black b_29_28 (G_29_28, P_29_28, {g[29],g[28]}, {p[29],p[28]});
|
||||
black b_31_30 (G_31_30, P_31_30, {g[31],g[30]}, {p[31],p[30]});
|
||||
|
||||
black b_33_32 (G_33_32, P_33_32, {g[33],g[32]}, {p[33],p[32]});
|
||||
black b_35_34 (G_35_34, P_35_34, {g[35],g[34]}, {p[35],p[34]});
|
||||
black b_37_36 (G_37_36, P_37_36, {g[37],g[36]}, {p[37],p[36]});
|
||||
black b_39_38 (G_39_38, P_39_38, {g[39],g[38]}, {p[39],p[38]});
|
||||
black b_41_40 (G_41_40, P_41_40, {g[41],g[40]}, {p[41],p[40]});
|
||||
black b_43_42 (G_43_42, P_43_42, {g[43],g[42]}, {p[43],p[42]});
|
||||
black b_45_44 (G_45_44, P_45_44, {g[45],g[44]}, {p[45],p[44]});
|
||||
black b_47_46 (G_47_46, P_47_46, {g[47],g[46]}, {p[47],p[46]});
|
||||
|
||||
black b_49_48 (G_49_48, P_49_48, {g[49],g[48]}, {p[49],p[48]});
|
||||
black b_51_50 (G_51_50, P_51_50, {g[51],g[50]}, {p[51],p[50]});
|
||||
black b_53_52 (G_53_52, P_53_52, {g[53],g[52]}, {p[53],p[52]});
|
||||
black b_55_54 (G_55_54, P_55_54, {g[55],g[54]}, {p[55],p[54]});
|
||||
black b_57_56 (G_57_56, P_57_56, {g[57],g[56]}, {p[57],p[56]});
|
||||
black b_59_58 (G_59_58, P_59_58, {g[59],g[58]}, {p[59],p[58]});
|
||||
black b_61_60 (G_61_60, P_61_60, {g[61],g[60]}, {p[61],p[60]});
|
||||
black b_63_62 (G_63_62, P_63_62, {g[63],g[62]}, {p[63],p[62]});
|
||||
|
||||
black b_65_64 (G_65_64, P_65_64, {g[65],g[64]}, {p[65],p[64]});
|
||||
black b_67_66 (G_67_66, P_67_66, {g[67],g[66]}, {p[67],p[66]});
|
||||
black b_69_68 (G_69_68, P_69_68, {g[69],g[68]}, {p[69],p[68]});
|
||||
black b_71_70 (G_71_70, P_71_70, {g[71],g[70]}, {p[71],p[70]});
|
||||
black b_73_72 (G_73_72, P_73_72, {g[73],g[72]}, {p[73],p[72]});
|
||||
black b_75_74 (G_75_74, P_75_74, {g[75],g[74]}, {p[75],p[74]});
|
||||
black b_77_76 (G_77_76, P_77_76, {g[77],g[76]}, {p[77],p[76]});
|
||||
black b_79_78 (G_79_78, P_79_78, {g[79],g[78]}, {p[79],p[78]});
|
||||
|
||||
black b_81_80 (G_81_80, P_81_80, {g[81],g[80]}, {p[81],p[80]});
|
||||
black b_83_82 (G_83_82, P_83_82, {g[83],g[82]}, {p[83],p[82]});
|
||||
black b_85_84 (G_85_84, P_85_84, {g[85],g[84]}, {p[85],p[84]});
|
||||
black b_87_86 (G_87_86, P_87_86, {g[87],g[86]}, {p[87],p[86]});
|
||||
black b_89_88 (G_89_88, P_89_88, {g[89],g[88]}, {p[89],p[88]});
|
||||
black b_91_90 (G_91_90, P_91_90, {g[91],g[90]}, {p[91],p[90]});
|
||||
black b_93_92 (G_93_92, P_93_92, {g[93],g[92]}, {p[93],p[92]});
|
||||
black b_95_94 (G_95_94, P_95_94, {g[95],g[94]}, {p[95],p[94]});
|
||||
|
||||
black b_97_96 (G_97_96, P_97_96, {g[97],g[96]}, {p[97],p[96]});
|
||||
black b_99_98 (G_99_98, P_99_98, {g[99],g[98]}, {p[99],p[98]});
|
||||
black b_101_100 (G_101_100, P_101_100, {g[101],g[100]}, {p[101],p[100]});
|
||||
black b_103_102 (G_103_102, P_103_102, {g[103],g[102]}, {p[103],p[102]});
|
||||
black b_105_104 (G_105_104, P_105_104, {g[105],g[104]}, {p[105],p[104]});
|
||||
black b_107_106 (G_107_106, P_107_106, {g[107],g[106]}, {p[107],p[106]});
|
||||
black b_109_108 (G_109_108, P_109_108, {g[109],g[108]}, {p[109],p[108]});
|
||||
black b_111_110 (G_111_110, P_111_110, {g[111],g[110]}, {p[111],p[110]});
|
||||
|
||||
black b_113_112 (G_113_112, P_113_112, {g[113],g[112]}, {p[113],p[112]});
|
||||
black b_115_114 (G_115_114, P_115_114, {g[115],g[114]}, {p[115],p[114]});
|
||||
black b_117_116 (G_117_116, P_117_116, {g[117],g[116]}, {p[117],p[116]});
|
||||
black b_119_118 (G_119_118, P_119_118, {g[119],g[118]}, {p[119],p[118]});
|
||||
black b_121_120 (G_121_120, P_121_120, {g[121],g[120]}, {p[121],p[120]});
|
||||
black b_123_122 (G_123_122, P_123_122, {g[123],g[122]}, {p[123],p[122]});
|
||||
black b_125_124 (G_125_124, P_125_124, {g[125],g[124]}, {p[125],p[124]});
|
||||
black b_127_126 (G_127_126, P_127_126, {g[127],g[126]}, {p[127],p[126]});
|
||||
|
||||
|
||||
// Stage 2: Generates G/P pairs that span 2 bits
|
||||
grey g_3_0 (G_3_0, {G_3_2,G_1_0}, P_3_2);
|
||||
black b_7_4 (G_7_4, P_7_4, {G_7_6,G_5_4}, {P_7_6,P_5_4});
|
||||
black b_11_8 (G_11_8, P_11_8, {G_11_10,G_9_8}, {P_11_10,P_9_8});
|
||||
black b_15_12 (G_15_12, P_15_12, {G_15_14,G_13_12}, {P_15_14,P_13_12});
|
||||
black b_19_16 (G_19_16, P_19_16, {G_19_18,G_17_16}, {P_19_18,P_17_16});
|
||||
black b_23_20 (G_23_20, P_23_20, {G_23_22,G_21_20}, {P_23_22,P_21_20});
|
||||
black b_27_24 (G_27_24, P_27_24, {G_27_26,G_25_24}, {P_27_26,P_25_24});
|
||||
black b_31_28 (G_31_28, P_31_28, {G_31_30,G_29_28}, {P_31_30,P_29_28});
|
||||
|
||||
black b_35_32 (G_35_32, P_35_32, {G_35_34,G_33_32}, {P_35_34,P_33_32});
|
||||
black b_39_36 (G_39_36, P_39_36, {G_39_38,G_37_36}, {P_39_38,P_37_36});
|
||||
black b_43_40 (G_43_40, P_43_40, {G_43_42,G_41_40}, {P_43_42,P_41_40});
|
||||
black b_47_44 (G_47_44, P_47_44, {G_47_46,G_45_44}, {P_47_46,P_45_44});
|
||||
black b_51_48 (G_51_48, P_51_48, {G_51_50,G_49_48}, {P_51_50,P_49_48});
|
||||
black b_55_52 (G_55_52, P_55_52, {G_55_54,G_53_52}, {P_55_54,P_53_52});
|
||||
black b_59_56 (G_59_56, P_59_56, {G_59_58,G_57_56}, {P_59_58,P_57_56});
|
||||
black b_63_60 (G_63_60, P_63_60, {G_63_62,G_61_60}, {P_63_62,P_61_60});
|
||||
|
||||
black b_67_64 (G_67_64, P_67_64, {G_67_66,G_65_64}, {P_67_66,P_65_64});
|
||||
black b_71_68 (G_71_68, P_71_68, {G_71_70,G_69_68}, {P_71_70,P_69_68});
|
||||
black b_75_72 (G_75_72, P_75_72, {G_75_74,G_73_72}, {P_75_74,P_73_72});
|
||||
black b_79_76 (G_79_76, P_79_76, {G_79_78,G_77_76}, {P_79_78,P_77_76});
|
||||
black b_83_80 (G_83_80, P_83_80, {G_83_82,G_81_80}, {P_83_82,P_81_80});
|
||||
black b_87_84 (G_87_84, P_87_84, {G_87_86,G_85_84}, {P_87_86,P_85_84});
|
||||
black b_91_88 (G_91_88, P_91_88, {G_91_90,G_89_88}, {P_91_90,P_89_88});
|
||||
black b_95_92 (G_95_92, P_95_92, {G_95_94,G_93_92}, {P_95_94,P_93_92});
|
||||
|
||||
black b_99_96 (G_99_96, P_99_96, {G_99_98,G_97_96}, {P_99_98,P_97_96});
|
||||
black b_103_100 (G_103_100, P_103_100, {G_103_102,G_101_100}, {P_103_102,P_101_100});
|
||||
black b_107_104 (G_107_104, P_107_104, {G_107_106,G_105_104}, {P_107_106,P_105_104});
|
||||
black b_111_108 (G_111_108, P_111_108, {G_111_110,G_109_108}, {P_111_110,P_109_108});
|
||||
black b_115_112 (G_115_112, P_115_112, {G_115_114,G_113_112}, {P_115_114,P_113_112});
|
||||
black b_119_116 (G_119_116, P_119_116, {G_119_118,G_117_116}, {P_119_118,P_117_116});
|
||||
black b_123_120 (G_123_120, P_123_120, {G_123_122,G_121_120}, {P_123_122,P_121_120});
|
||||
black b_127_124 (G_127_124, P_127_124, {G_127_126,G_125_124}, {P_127_126,P_125_124});
|
||||
|
||||
|
||||
// Stage 3: Generates G/P pairs that span 4 bits
|
||||
grey g_7_0 (G_7_0, {G_7_4,G_3_0}, P_7_4);
|
||||
black b_15_8 (G_15_8, P_15_8, {G_15_12,G_11_8}, {P_15_12,P_11_8});
|
||||
black b_23_16 (G_23_16, P_23_16, {G_23_20,G_19_16}, {P_23_20,P_19_16});
|
||||
black b_31_24 (G_31_24, P_31_24, {G_31_28,G_27_24}, {P_31_28,P_27_24});
|
||||
black b_39_32 (G_39_32, P_39_32, {G_39_36,G_35_32}, {P_39_36,P_35_32});
|
||||
black b_47_40 (G_47_40, P_47_40, {G_47_44,G_43_40}, {P_47_44,P_43_40});
|
||||
black b_55_48 (G_55_48, P_55_48, {G_55_52,G_51_48}, {P_55_52,P_51_48});
|
||||
black b_63_56 (G_63_56, P_63_56, {G_63_60,G_59_56}, {P_63_60,P_59_56});
|
||||
|
||||
black b_71_64 (G_71_64, P_71_64, {G_71_68,G_67_64}, {P_71_68,P_67_64});
|
||||
black b_79_72 (G_79_72, P_79_72, {G_79_76,G_75_72}, {P_79_76,P_75_72});
|
||||
black b_87_80 (G_87_80, P_87_80, {G_87_84,G_83_80}, {P_87_84,P_83_80});
|
||||
black b_95_88 (G_95_88, P_95_88, {G_95_92,G_91_88}, {P_95_92,P_91_88});
|
||||
black b_103_96 (G_103_96, P_103_96, {G_103_100,G_99_96}, {P_103_100,P_99_96});
|
||||
black b_111_104 (G_111_104, P_111_104, {G_111_108,G_107_104}, {P_111_108,P_107_104});
|
||||
black b_119_112 (G_119_112, P_119_112, {G_119_116,G_115_112}, {P_119_116,P_115_112});
|
||||
black b_127_120 (G_127_120, P_127_120, {G_127_124,G_123_120}, {P_127_124,P_123_120});
|
||||
|
||||
|
||||
// Stage 4: Generates G/P pairs that span 8 bits
|
||||
grey g_15_0 (G_15_0, {G_15_8,G_7_0}, P_15_8);
|
||||
black b_31_16 (G_31_16, P_31_16, {G_31_24,G_23_16}, {P_31_24,P_23_16});
|
||||
black b_47_32 (G_47_32, P_47_32, {G_47_40,G_39_32}, {P_47_40,P_39_32});
|
||||
black b_63_48 (G_63_48, P_63_48, {G_63_56,G_55_48}, {P_63_56,P_55_48});
|
||||
black b_79_64 (G_79_64, P_79_64, {G_79_72,G_71_64}, {P_79_72,P_71_64});
|
||||
black b_95_80 (G_95_80, P_95_80, {G_95_88,G_87_80}, {P_95_88,P_87_80});
|
||||
black b_111_96 (G_111_96, P_111_96, {G_111_104,G_103_96}, {P_111_104,P_103_96});
|
||||
black b_127_112 (G_127_112, P_127_112, {G_127_120,G_119_112}, {P_127_120,P_119_112});
|
||||
|
||||
|
||||
// Stage 5: Generates G/P pairs that span 16 bits
|
||||
grey g_31_0 (G_31_0, {G_31_16,G_15_0}, P_31_16);
|
||||
black b_63_32 (G_63_32, P_63_32, {G_63_48,G_47_32}, {P_63_48,P_47_32});
|
||||
black b_95_64 (G_95_64, P_95_64, {G_95_80,G_79_64}, {P_95_80,P_79_64});
|
||||
black b_127_96 (G_127_96, P_127_96, {G_127_112,G_111_96}, {P_127_112,P_111_96});
|
||||
|
||||
// Stage 6: Generates G/P pairs that span 32 bits
|
||||
grey g_63_0 (G_63_0, {G_63_32,G_31_0}, P_63_32);
|
||||
black b_127_64 (G_127_64, P_127_64, {G_127_96,G_95_64}, {P_127_96,P_95_64});
|
||||
|
||||
// Stage 7: Generates G/P pairs that span 64 bits
|
||||
grey g_127_0 (G_127_0, {G_127_64,G_63_0}, P_127_64);
|
||||
|
||||
// Stage 8: Generates G/P pairs that span 32 bits
|
||||
grey g_95_0 (G_95_0, {G_95_64,G_63_0}, P_95_64);
|
||||
|
||||
// Stage 9: Generates G/P pairs that span 16 bits
|
||||
grey g_47_0 (G_47_0, {G_47_32,G_31_0}, P_47_32);
|
||||
grey g_79_0 (G_79_0, {G_79_64,G_63_0}, P_79_64);
|
||||
grey g_111_0 (G_111_0, {G_111_96,G_95_0}, P_111_96);
|
||||
|
||||
// Stage 10: Generates G/P pairs that span 8 bits
|
||||
grey g_23_0 (G_23_0, {G_23_16,G_15_0}, P_23_16);
|
||||
grey g_39_0 (G_39_0, {G_39_32,G_31_0}, P_39_32);
|
||||
grey g_55_0 (G_55_0, {G_55_48,G_47_0}, P_55_48);
|
||||
grey g_71_0 (G_71_0, {G_71_64,G_63_0}, P_71_64);
|
||||
grey g_87_0 (G_87_0, {G_87_80,G_79_0}, P_87_80);
|
||||
grey g_103_0 (G_103_0, {G_103_96,G_95_0}, P_103_96);
|
||||
grey g_119_0 (G_119_0, {G_119_112,G_111_0}, P_119_112);
|
||||
|
||||
// Stage 11: Generates G/P pairs that span 4 bits
|
||||
grey g_11_0 (G_11_0, {G_11_8,G_7_0}, P_11_8);
|
||||
grey g_19_0 (G_19_0, {G_19_16,G_15_0}, P_19_16);
|
||||
grey g_27_0 (G_27_0, {G_27_24,G_23_0}, P_27_24);
|
||||
grey g_35_0 (G_35_0, {G_35_32,G_31_0}, P_35_32);
|
||||
grey g_43_0 (G_43_0, {G_43_40,G_39_0}, P_43_40);
|
||||
grey g_51_0 (G_51_0, {G_51_48,G_47_0}, P_51_48);
|
||||
grey g_59_0 (G_59_0, {G_59_56,G_55_0}, P_59_56);
|
||||
grey g_67_0 (G_67_0, {G_67_64,G_63_0}, P_67_64);
|
||||
grey g_75_0 (G_75_0, {G_75_72,G_71_0}, P_75_72);
|
||||
grey g_83_0 (G_83_0, {G_83_80,G_79_0}, P_83_80);
|
||||
grey g_91_0 (G_91_0, {G_91_88,G_87_0}, P_91_88);
|
||||
grey g_99_0 (G_99_0, {G_99_96,G_95_0}, P_99_96);
|
||||
grey g_107_0 (G_107_0, {G_107_104,G_103_0}, P_107_104);
|
||||
grey g_115_0 (G_115_0, {G_115_112,G_111_0}, P_115_112);
|
||||
grey g_123_0 (G_123_0, {G_123_120,G_119_0}, P_123_120);
|
||||
|
||||
// Stage 12: Generates G/P pairs that span 2 bits
|
||||
grey g_5_0 (G_5_0, {G_5_4,G_3_0}, P_5_4);
|
||||
grey g_9_0 (G_9_0, {G_9_8,G_7_0}, P_9_8);
|
||||
grey g_13_0 (G_13_0, {G_13_12,G_11_0}, P_13_12);
|
||||
grey g_17_0 (G_17_0, {G_17_16,G_15_0}, P_17_16);
|
||||
grey g_21_0 (G_21_0, {G_21_20,G_19_0}, P_21_20);
|
||||
grey g_25_0 (G_25_0, {G_25_24,G_23_0}, P_25_24);
|
||||
grey g_29_0 (G_29_0, {G_29_28,G_27_0}, P_29_28);
|
||||
grey g_33_0 (G_33_0, {G_33_32,G_31_0}, P_33_32);
|
||||
grey g_37_0 (G_37_0, {G_37_36,G_35_0}, P_37_36);
|
||||
grey g_41_0 (G_41_0, {G_41_40,G_39_0}, P_41_40);
|
||||
grey g_45_0 (G_45_0, {G_45_44,G_43_0}, P_45_44);
|
||||
grey g_49_0 (G_49_0, {G_49_48,G_47_0}, P_49_48);
|
||||
grey g_53_0 (G_53_0, {G_53_52,G_51_0}, P_53_52);
|
||||
grey g_57_0 (G_57_0, {G_57_56,G_55_0}, P_57_56);
|
||||
grey g_61_0 (G_61_0, {G_61_60,G_59_0}, P_61_60);
|
||||
grey g_65_0 (G_65_0, {G_65_64,G_63_0}, P_65_64);
|
||||
grey g_69_0 (G_69_0, {G_69_68,G_67_0}, P_69_68);
|
||||
grey g_73_0 (G_73_0, {G_73_72,G_71_0}, P_73_72);
|
||||
grey g_77_0 (G_77_0, {G_77_76,G_75_0}, P_77_76);
|
||||
grey g_81_0 (G_81_0, {G_81_80,G_79_0}, P_81_80);
|
||||
grey g_85_0 (G_85_0, {G_85_84,G_83_0}, P_85_84);
|
||||
grey g_89_0 (G_89_0, {G_89_88,G_87_0}, P_89_88);
|
||||
grey g_93_0 (G_93_0, {G_93_92,G_91_0}, P_93_92);
|
||||
grey g_97_0 (G_97_0, {G_97_96,G_95_0}, P_97_96);
|
||||
grey g_101_0 (G_101_0, {G_101_100,G_99_0}, P_101_100);
|
||||
grey g_105_0 (G_105_0, {G_105_104,G_103_0}, P_105_104);
|
||||
grey g_109_0 (G_109_0, {G_109_108,G_107_0}, P_109_108);
|
||||
grey g_113_0 (G_113_0, {G_113_112,G_111_0}, P_113_112);
|
||||
grey g_117_0 (G_117_0, {G_117_116,G_115_0}, P_117_116);
|
||||
grey g_121_0 (G_121_0, {G_121_120,G_119_0}, P_121_120);
|
||||
grey g_125_0 (G_125_0, {G_125_124,G_123_0}, P_125_124);
|
||||
|
||||
// Last grey cell stage
|
||||
grey g_2_0 (G_2_0, {g[2],G_1_0}, p[2]);
|
||||
grey g_4_0 (G_4_0, {g[4],G_3_0}, p[4]);
|
||||
grey g_6_0 (G_6_0, {g[6],G_5_0}, p[6]);
|
||||
grey g_8_0 (G_8_0, {g[8],G_7_0}, p[8]);
|
||||
grey g_10_0 (G_10_0, {g[10],G_9_0}, p[10]);
|
||||
grey g_12_0 (G_12_0, {g[12],G_11_0}, p[12]);
|
||||
grey g_14_0 (G_14_0, {g[14],G_13_0}, p[14]);
|
||||
grey g_16_0 (G_16_0, {g[16],G_15_0}, p[16]);
|
||||
grey g_18_0 (G_18_0, {g[18],G_17_0}, p[18]);
|
||||
grey g_20_0 (G_20_0, {g[20],G_19_0}, p[20]);
|
||||
grey g_22_0 (G_22_0, {g[22],G_21_0}, p[22]);
|
||||
grey g_24_0 (G_24_0, {g[24],G_23_0}, p[24]);
|
||||
grey g_26_0 (G_26_0, {g[26],G_25_0}, p[26]);
|
||||
grey g_28_0 (G_28_0, {g[28],G_27_0}, p[28]);
|
||||
grey g_30_0 (G_30_0, {g[30],G_29_0}, p[30]);
|
||||
grey g_32_0 (G_32_0, {g[32],G_31_0}, p[32]);
|
||||
grey g_34_0 (G_34_0, {g[34],G_33_0}, p[34]);
|
||||
grey g_36_0 (G_36_0, {g[36],G_35_0}, p[36]);
|
||||
grey g_38_0 (G_38_0, {g[38],G_37_0}, p[38]);
|
||||
grey g_40_0 (G_40_0, {g[40],G_39_0}, p[40]);
|
||||
grey g_42_0 (G_42_0, {g[42],G_41_0}, p[42]);
|
||||
grey g_44_0 (G_44_0, {g[44],G_43_0}, p[44]);
|
||||
grey g_46_0 (G_46_0, {g[46],G_45_0}, p[46]);
|
||||
grey g_48_0 (G_48_0, {g[48],G_47_0}, p[48]);
|
||||
grey g_50_0 (G_50_0, {g[50],G_49_0}, p[50]);
|
||||
grey g_52_0 (G_52_0, {g[52],G_51_0}, p[52]);
|
||||
grey g_54_0 (G_54_0, {g[54],G_53_0}, p[54]);
|
||||
grey g_56_0 (G_56_0, {g[56],G_55_0}, p[56]);
|
||||
grey g_58_0 (G_58_0, {g[58],G_57_0}, p[58]);
|
||||
grey g_60_0 (G_60_0, {g[60],G_59_0}, p[60]);
|
||||
grey g_62_0 (G_62_0, {g[62],G_61_0}, p[62]);
|
||||
grey g_64_0 (G_64_0, {g[64],G_63_0}, p[64]);
|
||||
grey g_66_0 (G_66_0, {g[66],G_65_0}, p[66]);
|
||||
grey g_68_0 (G_68_0, {g[68],G_67_0}, p[68]);
|
||||
grey g_70_0 (G_70_0, {g[70],G_69_0}, p[70]);
|
||||
grey g_72_0 (G_72_0, {g[72],G_71_0}, p[72]);
|
||||
grey g_74_0 (G_74_0, {g[74],G_73_0}, p[74]);
|
||||
grey g_76_0 (G_76_0, {g[76],G_75_0}, p[76]);
|
||||
grey g_78_0 (G_78_0, {g[78],G_77_0}, p[78]);
|
||||
grey g_80_0 (G_80_0, {g[80],G_79_0}, p[80]);
|
||||
grey g_82_0 (G_82_0, {g[82],G_81_0}, p[82]);
|
||||
grey g_84_0 (G_84_0, {g[84],G_83_0}, p[84]);
|
||||
grey g_86_0 (G_86_0, {g[86],G_85_0}, p[86]);
|
||||
grey g_88_0 (G_88_0, {g[88],G_87_0}, p[88]);
|
||||
grey g_90_0 (G_90_0, {g[90],G_89_0}, p[90]);
|
||||
grey g_92_0 (G_92_0, {g[92],G_91_0}, p[92]);
|
||||
grey g_94_0 (G_94_0, {g[94],G_93_0}, p[94]);
|
||||
grey g_96_0 (G_96_0, {g[96],G_95_0}, p[96]);
|
||||
grey g_98_0 (G_98_0, {g[98],G_97_0}, p[98]);
|
||||
grey g_100_0 (G_100_0, {g[100],G_99_0}, p[100]);
|
||||
grey g_102_0 (G_102_0, {g[102],G_101_0}, p[102]);
|
||||
grey g_104_0 (G_104_0, {g[104],G_103_0}, p[104]);
|
||||
grey g_106_0 (G_106_0, {g[106],G_105_0}, p[106]);
|
||||
grey g_108_0 (G_108_0, {g[108],G_107_0}, p[108]);
|
||||
grey g_110_0 (G_110_0, {g[110],G_109_0}, p[110]);
|
||||
grey g_112_0 (G_112_0, {g[112],G_111_0}, p[112]);
|
||||
grey g_114_0 (G_114_0, {g[114],G_113_0}, p[114]);
|
||||
grey g_116_0 (G_116_0, {g[116],G_115_0}, p[116]);
|
||||
grey g_118_0 (G_118_0, {g[118],G_117_0}, p[118]);
|
||||
grey g_120_0 (G_120_0, {g[120],G_119_0}, p[120]);
|
||||
grey g_122_0 (G_122_0, {g[122],G_121_0}, p[122]);
|
||||
grey g_124_0 (G_124_0, {g[124],G_123_0}, p[124]);
|
||||
grey g_126_0 (G_126_0, {g[126],G_125_0}, p[126]);
|
||||
|
||||
// Final Stage: Apply c_k+1=G_k_0
|
||||
assign c[1]=g[0];
|
||||
assign c[2]=G_1_0;
|
||||
assign c[3]=G_2_0;
|
||||
assign c[4]=G_3_0;
|
||||
assign c[5]=G_4_0;
|
||||
assign c[6]=G_5_0;
|
||||
assign c[7]=G_6_0;
|
||||
assign c[8]=G_7_0;
|
||||
assign c[9]=G_8_0;
|
||||
|
||||
assign c[10]=G_9_0;
|
||||
assign c[11]=G_10_0;
|
||||
assign c[12]=G_11_0;
|
||||
assign c[13]=G_12_0;
|
||||
assign c[14]=G_13_0;
|
||||
assign c[15]=G_14_0;
|
||||
assign c[16]=G_15_0;
|
||||
assign c[17]=G_16_0;
|
||||
|
||||
assign c[18]=G_17_0;
|
||||
assign c[19]=G_18_0;
|
||||
assign c[20]=G_19_0;
|
||||
assign c[21]=G_20_0;
|
||||
assign c[22]=G_21_0;
|
||||
assign c[23]=G_22_0;
|
||||
assign c[24]=G_23_0;
|
||||
assign c[25]=G_24_0;
|
||||
|
||||
assign c[26]=G_25_0;
|
||||
assign c[27]=G_26_0;
|
||||
assign c[28]=G_27_0;
|
||||
assign c[29]=G_28_0;
|
||||
assign c[30]=G_29_0;
|
||||
assign c[31]=G_30_0;
|
||||
assign c[32]=G_31_0;
|
||||
assign c[33]=G_32_0;
|
||||
|
||||
assign c[34]=G_33_0;
|
||||
assign c[35]=G_34_0;
|
||||
assign c[36]=G_35_0;
|
||||
assign c[37]=G_36_0;
|
||||
assign c[38]=G_37_0;
|
||||
assign c[39]=G_38_0;
|
||||
assign c[40]=G_39_0;
|
||||
assign c[41]=G_40_0;
|
||||
|
||||
assign c[42]=G_41_0;
|
||||
assign c[43]=G_42_0;
|
||||
assign c[44]=G_43_0;
|
||||
assign c[45]=G_44_0;
|
||||
assign c[46]=G_45_0;
|
||||
assign c[47]=G_46_0;
|
||||
assign c[48]=G_47_0;
|
||||
assign c[49]=G_48_0;
|
||||
|
||||
assign c[50]=G_49_0;
|
||||
assign c[51]=G_50_0;
|
||||
assign c[52]=G_51_0;
|
||||
assign c[53]=G_52_0;
|
||||
assign c[54]=G_53_0;
|
||||
assign c[55]=G_54_0;
|
||||
assign c[56]=G_55_0;
|
||||
assign c[57]=G_56_0;
|
||||
|
||||
assign c[58]=G_57_0;
|
||||
assign c[59]=G_58_0;
|
||||
assign c[60]=G_59_0;
|
||||
assign c[61]=G_60_0;
|
||||
assign c[62]=G_61_0;
|
||||
assign c[63]=G_62_0;
|
||||
assign c[64]=G_63_0;
|
||||
assign c[65]=G_64_0;
|
||||
|
||||
assign c[66]=G_65_0;
|
||||
assign c[67]=G_66_0;
|
||||
assign c[68]=G_67_0;
|
||||
assign c[69]=G_68_0;
|
||||
assign c[70]=G_69_0;
|
||||
assign c[71]=G_70_0;
|
||||
assign c[72]=G_71_0;
|
||||
assign c[73]=G_72_0;
|
||||
|
||||
assign c[74]=G_73_0;
|
||||
assign c[75]=G_74_0;
|
||||
assign c[76]=G_75_0;
|
||||
assign c[77]=G_76_0;
|
||||
assign c[78]=G_77_0;
|
||||
assign c[79]=G_78_0;
|
||||
assign c[80]=G_79_0;
|
||||
assign c[81]=G_80_0;
|
||||
|
||||
assign c[82]=G_81_0;
|
||||
assign c[83]=G_82_0;
|
||||
assign c[84]=G_83_0;
|
||||
assign c[85]=G_84_0;
|
||||
assign c[86]=G_85_0;
|
||||
assign c[87]=G_86_0;
|
||||
assign c[88]=G_87_0;
|
||||
assign c[89]=G_88_0;
|
||||
|
||||
assign c[90]=G_89_0;
|
||||
assign c[91]=G_90_0;
|
||||
assign c[92]=G_91_0;
|
||||
assign c[93]=G_92_0;
|
||||
assign c[94]=G_93_0;
|
||||
assign c[95]=G_94_0;
|
||||
assign c[96]=G_95_0;
|
||||
assign c[97]=G_96_0;
|
||||
|
||||
assign c[98]=G_97_0;
|
||||
assign c[99]=G_98_0;
|
||||
assign c[100]=G_99_0;
|
||||
assign c[101]=G_100_0;
|
||||
assign c[102]=G_101_0;
|
||||
assign c[103]=G_102_0;
|
||||
assign c[104]=G_103_0;
|
||||
assign c[105]=G_104_0;
|
||||
|
||||
assign c[106]=G_105_0;
|
||||
assign c[107]=G_106_0;
|
||||
assign c[108]=G_107_0;
|
||||
assign c[109]=G_108_0;
|
||||
assign c[110]=G_109_0;
|
||||
assign c[111]=G_110_0;
|
||||
assign c[112]=G_111_0;
|
||||
assign c[113]=G_112_0;
|
||||
|
||||
assign c[114]=G_113_0;
|
||||
assign c[115]=G_114_0;
|
||||
assign c[116]=G_115_0;
|
||||
assign c[117]=G_116_0;
|
||||
assign c[118]=G_117_0;
|
||||
assign c[119]=G_118_0;
|
||||
assign c[120]=G_119_0;
|
||||
assign c[121]=G_120_0;
|
||||
|
||||
assign c[122]=G_121_0;
|
||||
assign c[123]=G_122_0;
|
||||
assign c[124]=G_123_0;
|
||||
assign c[125]=G_124_0;
|
||||
assign c[126]=G_125_0;
|
||||
assign c[127]=G_126_0;
|
||||
assign c[128]=G_127_0;
|
||||
|
||||
endmodule // brent_kung_cs
|
||||
|
||||
|
@ -1,97 +0,0 @@
|
||||
// Brent-Kung Carry-save Prefix Adder
|
||||
|
||||
module bk13 (cout, sum, a, b, cin);
|
||||
input [12:0] a, b;
|
||||
input cin;
|
||||
output [12:0] sum;
|
||||
output cout;
|
||||
|
||||
wire [13:0] p,g,t;
|
||||
wire [12:0] c;
|
||||
|
||||
// pre-computation
|
||||
assign p={a^b,1'b0};
|
||||
assign g={a&b, cin};
|
||||
assign t[1]=p[1];
|
||||
assign t[2]=p[2];
|
||||
assign t[3]=p[3]^g[2];
|
||||
assign t[4]=p[4];
|
||||
assign t[5]=p[5]^g[4];
|
||||
assign t[6]=p[6];
|
||||
assign t[7]=p[7]^g[6];
|
||||
assign t[8]=p[8];
|
||||
assign t[9]=p[9]^g[8];
|
||||
assign t[10]=p[10];
|
||||
assign t[11]=p[11]^g[10];
|
||||
assign t[12]=p[12];
|
||||
assign t[13]=p[13];
|
||||
|
||||
// prefix tree
|
||||
brent_kung_cs13 prefix_tree(c, p[12:0], g[12:0]);
|
||||
|
||||
// post-computation
|
||||
assign sum=p[13:1]^c;
|
||||
assign cout=g[13]|(p[13]&c[12]);
|
||||
|
||||
endmodule
|
||||
|
||||
module brent_kung_cs13 (c, p, g);
|
||||
|
||||
input [13:0] p;
|
||||
input [13:0] g;
|
||||
output [13:1] c;
|
||||
|
||||
|
||||
// parallel-prefix, Brent-Kung
|
||||
|
||||
// Stage 1: Generates G/P pairs that span 1 bits
|
||||
grey b_1_0 (G_1_0, {g[1],g[0]}, p[1]);
|
||||
black b_3_2 (G_3_2, P_3_2, {g[3],g[2]}, {p[3],p[2]});
|
||||
black b_5_4 (G_5_4, P_5_4, {g[5],g[4]}, {p[5],p[4]});
|
||||
black b_7_6 (G_7_6, P_7_6, {g[7],g[6]}, {p[7],p[6]});
|
||||
black b_9_8 (G_9_8, P_9_8, {g[9],g[8]}, {p[9],p[8]});
|
||||
black b_11_10 (G_11_10, P_11_10, {g[11],g[10]}, {p[11],p[10]});
|
||||
black b_13_12 (G_13_12, P_13_12, {g[13],g[12]}, {p[13],p[12]});
|
||||
|
||||
// Stage 2: Generates G/P pairs that span 2 bits
|
||||
grey g_3_0 (G_3_0, {G_3_2,G_1_0}, P_3_2);
|
||||
black b_7_4 (G_7_4, P_7_4, {G_7_6,G_5_4}, {P_7_6,P_5_4});
|
||||
black b_11_8 (G_11_8, P_11_8, {G_11_10,G_9_8}, {P_11_10,P_9_8});
|
||||
|
||||
// Stage 3: Generates G/P pairs that span 4 bits
|
||||
grey g_7_0 (G_7_0, {G_7_4,G_3_0}, P_7_4);
|
||||
|
||||
// Stage 4: Generates G/P pairs that span 8 bits
|
||||
|
||||
// Stage 5: Generates G/P pairs that span 4 bits
|
||||
grey g_11_0 (G_11_0, {G_11_8,G_7_0}, P_11_8);
|
||||
|
||||
// Stage 6: Generates G/P pairs that span 2 bits
|
||||
grey g_5_0 (G_5_0, {G_5_4,G_3_0}, P_5_4);
|
||||
grey g_9_0 (G_9_0, {G_9_8,G_7_0}, P_9_8);
|
||||
|
||||
// Last grey cell stage
|
||||
grey g_2_0 (G_2_0, {g[2],G_1_0}, p[2]);
|
||||
grey g_4_0 (G_4_0, {g[4],G_3_0}, p[4]);
|
||||
grey g_6_0 (G_6_0, {g[6],G_5_0}, p[6]);
|
||||
grey g_8_0 (G_8_0, {g[8],G_7_0}, p[8]);
|
||||
grey g_10_0 (G_10_0, {g[10],G_9_0}, p[10]);
|
||||
grey g_12_0 (G_12_0, {g[12],G_11_0}, p[12]);
|
||||
|
||||
// Final Stage: Apply c_k+1=G_k_0
|
||||
assign c[1]=g[0];
|
||||
assign c[2]=G_1_0;
|
||||
assign c[3]=G_2_0;
|
||||
assign c[4]=G_3_0;
|
||||
assign c[5]=G_4_0;
|
||||
assign c[6]=G_5_0;
|
||||
assign c[7]=G_6_0;
|
||||
assign c[8]=G_7_0;
|
||||
assign c[9]=G_8_0;
|
||||
|
||||
assign c[10]=G_9_0;
|
||||
assign c[11]=G_10_0;
|
||||
assign c[12]=G_11_0;
|
||||
assign c[13]=G_12_0;
|
||||
|
||||
endmodule
|
@ -1,86 +0,0 @@
|
||||
// Brent-Kung Prefix Adder
|
||||
|
||||
module bk14 (cout, sum, a, b, cin);
|
||||
input [13:0] a, b;
|
||||
input cin;
|
||||
output [13:0] sum;
|
||||
output cout;
|
||||
|
||||
wire [14:0] p,g;
|
||||
wire [13:0] c;
|
||||
|
||||
// pre-computation
|
||||
assign p={a^b,1'b0};
|
||||
assign g={a&b, cin};
|
||||
|
||||
// prefix tree
|
||||
brent_kung14 prefix_tree(c, p[13:0], g[13:0]);
|
||||
|
||||
// post-computation
|
||||
assign sum=p[14:1]^c;
|
||||
assign cout=g[14]|(p[14]&c[13]);
|
||||
|
||||
endmodule
|
||||
|
||||
module brent_kung14 (c, p, g);
|
||||
|
||||
input [13:0] p;
|
||||
input [13:0] g;
|
||||
output [14:1] c;
|
||||
|
||||
|
||||
// parallel-prefix, Brent-Kung
|
||||
|
||||
// Stage 1: Generates G/P pairs that span 1 bits
|
||||
grey b_1_0 (G_1_0, {g[1],g[0]}, p[1]);
|
||||
black b_3_2 (G_3_2, P_3_2, {g[3],g[2]}, {p[3],p[2]});
|
||||
black b_5_4 (G_5_4, P_5_4, {g[5],g[4]}, {p[5],p[4]});
|
||||
black b_7_6 (G_7_6, P_7_6, {g[7],g[6]}, {p[7],p[6]});
|
||||
black b_9_8 (G_9_8, P_9_8, {g[9],g[8]}, {p[9],p[8]});
|
||||
black b_11_10 (G_11_10, P_11_10, {g[11],g[10]}, {p[11],p[10]});
|
||||
black b_13_12 (G_13_12, P_13_12, {g[13],g[12]}, {p[13],p[12]});
|
||||
|
||||
// Stage 2: Generates G/P pairs that span 2 bits
|
||||
grey g_3_0 (G_3_0, {G_3_2,G_1_0}, P_3_2);
|
||||
black b_7_4 (G_7_4, P_7_4, {G_7_6,G_5_4}, {P_7_6,P_5_4});
|
||||
black b_11_8 (G_11_8, P_11_8, {G_11_10,G_9_8}, {P_11_10,P_9_8});
|
||||
|
||||
// Stage 3: Generates G/P pairs that span 4 bits
|
||||
grey g_7_0 (G_7_0, {G_7_4,G_3_0}, P_7_4);
|
||||
|
||||
// Stage 4: Generates G/P pairs that span 8 bits
|
||||
|
||||
// Stage 5: Generates G/P pairs that span 4 bits
|
||||
grey g_11_0 (G_11_0, {G_11_8,G_7_0}, P_11_8);
|
||||
|
||||
// Stage 6: Generates G/P pairs that span 2 bits
|
||||
grey g_5_0 (G_5_0, {G_5_4,G_3_0}, P_5_4);
|
||||
grey g_9_0 (G_9_0, {G_9_8,G_7_0}, P_9_8);
|
||||
grey g_13_0 (G_13_0, {G_13_12,G_11_0}, P_13_12);
|
||||
|
||||
// Last grey cell stage
|
||||
grey g_2_0 (G_2_0, {g[2],G_1_0}, p[2]);
|
||||
grey g_4_0 (G_4_0, {g[4],G_3_0}, p[4]);
|
||||
grey g_6_0 (G_6_0, {g[6],G_5_0}, p[6]);
|
||||
grey g_8_0 (G_8_0, {g[8],G_7_0}, p[8]);
|
||||
grey g_10_0 (G_10_0, {g[10],G_9_0}, p[10]);
|
||||
grey g_12_0 (G_12_0, {g[12],G_11_0}, p[12]);
|
||||
|
||||
// Final Stage: Apply c_k+1=G_k_0
|
||||
assign c[1]=g[0];
|
||||
assign c[2]=G_1_0;
|
||||
assign c[3]=G_2_0;
|
||||
assign c[4]=G_3_0;
|
||||
assign c[5]=G_4_0;
|
||||
assign c[6]=G_5_0;
|
||||
assign c[7]=G_6_0;
|
||||
assign c[8]=G_7_0;
|
||||
assign c[9]=G_8_0;
|
||||
|
||||
assign c[10]=G_9_0;
|
||||
assign c[11]=G_10_0;
|
||||
assign c[12]=G_11_0;
|
||||
assign c[13]=G_12_0;
|
||||
assign c[14]=G_13_0;
|
||||
|
||||
endmodule
|
@ -1,70 +0,0 @@
|
||||
module ha (C, S, A, B) ;
|
||||
|
||||
input A, B;
|
||||
output S, C;
|
||||
|
||||
assign S = A^B;
|
||||
assign C = A&B;
|
||||
|
||||
endmodule // HA
|
||||
|
||||
// module fa (input logic a, b, c, output logic sum, carry);
|
||||
|
||||
// assign sum = a^b^c;
|
||||
// assign carry = a&b|a&c|b&c;
|
||||
|
||||
// endmodule // fa
|
||||
|
||||
// module csa #(parameter WIDTH=8) (a, b,c, sum, carry, cout);
|
||||
|
||||
// input logic [WIDTH-1:0] a, b, c;
|
||||
|
||||
// output logic [WIDTH-1:0] sum, carry;
|
||||
// output logic cout;
|
||||
|
||||
// logic [WIDTH:0] carry_temp;
|
||||
// genvar i;
|
||||
// generate
|
||||
// for (i=0;i<WIDTH;i=i+1)
|
||||
// begin : genbit
|
||||
// fa fa_inst (a[i], b[i], c[i], sum[i], carry_temp[i+1]);
|
||||
// end
|
||||
// endgenerate
|
||||
// assign carry = {1'b0, carry_temp[WIDTH-1:1], 1'b0};
|
||||
// assign cout = carry_temp[WIDTH];
|
||||
|
||||
// endmodule // csa
|
||||
|
||||
module FA_array (S, C, A, B, Ci) ;
|
||||
parameter n = 32;
|
||||
input [n-1:0] A;
|
||||
input [n-1:0] B;
|
||||
input [n-1:0] Ci;
|
||||
output [n-1:0] S;
|
||||
output [n-1:0] C;
|
||||
|
||||
wire [n-1:0] n0;
|
||||
wire [n-1:0] n1;
|
||||
wire [n-1:0] n2;
|
||||
|
||||
genvar i;
|
||||
generate
|
||||
for (i = 0; i < n; i = i + 1) begin : index
|
||||
fa FA1(.sum(S[i]), .carry(C[i]), .a(A[i]), .b(B[i]), .c(Ci[i]));
|
||||
end
|
||||
endgenerate
|
||||
|
||||
endmodule // FA_array
|
||||
|
||||
module HA_array (S, C, A, B) ;
|
||||
parameter n = 32;
|
||||
input [n-1:0] A, B;
|
||||
output [n-1:0] S, C;
|
||||
genvar i;
|
||||
generate
|
||||
for (i = 0; i < n; i = i + 1) begin : index
|
||||
ha ha1(.S(S[i]), .C(C[i]), .A(A[i]), .B(B[i]));
|
||||
end
|
||||
endgenerate
|
||||
|
||||
endmodule // HA_array
|
@ -68,9 +68,9 @@ module divconv (q1, qm1, qp1, q0, qm0, qp0,
|
||||
mux2 #(64) mx5 (muxb_out, mcand_q, sel_muxr&op_type, mplier);
|
||||
mux2 #(64) mx6 (muxa_out, mcand_q, sel_muxr, mcand);
|
||||
// TDM multiplier (carry/save)
|
||||
multiplier mult1 (mcand, mplier, Sum, Carry);
|
||||
multiplier mult1 (mcand, mplier, Sum, Carry); // ***multiply
|
||||
// Q*D - N (reversed but changed in rounder.v to account for sign reversal)
|
||||
csa #(128) csa1 (Sum, Carry, constant, Sum2, Carry2);
|
||||
csa #(128) csa1 (Sum, Carry, constant, Sum2, Carry2); //***adder
|
||||
// Add ulp for subtraction in remainder
|
||||
mux2 #(1) mx7 (1'b0, 1'b1, sel_muxr, muxr_out);
|
||||
|
||||
@ -80,15 +80,15 @@ module divconv (q1, qm1, qp1, q0, qm0, qp0,
|
||||
mux2 #(64) mxA ({64'hFFFF_FFFF_FFFF_F9FF}, {64'hFFFF_FF3F_FFFF_FFFF}, P, qm_const);
|
||||
|
||||
// CPA (from CSA)/Remainder addition/subtraction
|
||||
ldf128 cpa1 (cout1, mul_out, Sum2, Carry2, muxr_out);
|
||||
ldf128 cpa1 (cout1, mul_out, Sum2, Carry2, muxr_out); //***adder
|
||||
// Assuming [1,2) - q1
|
||||
ldf64 cpa2 (cout2, q_out1, regb_out, q_const, 1'b0);
|
||||
ldf64 cpa3 (cout3, qp_out1, regb_out, qp_const, 1'b0);
|
||||
ldf64 cpa4 (cout4, qm_out1, regb_out, qm_const, 1'b1);
|
||||
ldf64 cpa2 (cout2, q_out1, regb_out, q_const, 1'b0); //***adder
|
||||
ldf64 cpa3 (cout3, qp_out1, regb_out, qp_const, 1'b0); //***adder
|
||||
ldf64 cpa4 (cout4, qm_out1, regb_out, qm_const, 1'b1); //***adder
|
||||
// Assuming [0.5,1) - q0
|
||||
ldf64 cpa5 (cout5, q_out0, {regb_out[62:0], vss}, q_const, 1'b0);
|
||||
ldf64 cpa6 (cout6, qp_out0, {regb_out[62:0], vss}, qp_const, 1'b0);
|
||||
ldf64 cpa7 (cout7, qm_out0, {regb_out[62:0], vss}, qm_const, 1'b1);
|
||||
ldf64 cpa5 (cout5, q_out0, {regb_out[62:0], vss}, q_const, 1'b0); //***adder
|
||||
ldf64 cpa6 (cout6, qp_out0, {regb_out[62:0], vss}, qp_const, 1'b0); //***adder
|
||||
ldf64 cpa7 (cout7, qm_out0, {regb_out[62:0], vss}, qm_const, 1'b1); //***adder
|
||||
// One's complement instead of two's complement (for hw efficiency)
|
||||
assign three = {~mul_out[126], mul_out[126], ~mul_out[125:63]};
|
||||
mux2 #(64) mxTC (~mul_out[126:63], three[64:1], op_type, twocmp_out);
|
||||
|
62
wally-pipelined/src/fpu/fclassify.sv
Normal file
62
wally-pipelined/src/fpu/fclassify.sv
Normal file
@ -0,0 +1,62 @@
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module fclassify (
|
||||
input logic [63:0] SrcXE,
|
||||
input logic FmtE, // 0-Single 1-Double
|
||||
output logic [63:0] ClassResE
|
||||
);
|
||||
|
||||
logic [31:0] Single;
|
||||
logic [63:0] Double;
|
||||
logic Sgn;
|
||||
logic Inf, NaN, Zero, Norm, Denorm;
|
||||
logic PInf, QNaN, PZero, PNorm, PDenorm;
|
||||
logic NInf, SNaN, NZero, NNorm, NDenorm;
|
||||
logic MaxExp, ExpZero, ManZero, FirstBitFrac;
|
||||
|
||||
// Single and Double precision layouts
|
||||
assign Single = SrcXE[63:32];
|
||||
assign Double = SrcXE;
|
||||
assign Sgn = SrcXE[63];
|
||||
|
||||
// basic calculations for readabillity
|
||||
|
||||
assign ExpZero = FmtE ? ~|Double[62:52] : ~|Single[30:23];
|
||||
assign MaxExp = FmtE ? &Double[62:52] : &Single[30:23];
|
||||
assign ManZero = FmtE ? ~|Double[51:0] : ~|Single[22:0];
|
||||
assign FirstBitFrac = FmtE ? Double[51] : Single[22];
|
||||
|
||||
// determine the type of number
|
||||
assign NaN = MaxExp & ~ManZero;
|
||||
assign Inf = MaxExp & ManZero;
|
||||
assign Zero = ExpZero & ManZero;
|
||||
assign Denorm= ExpZero & ~ManZero;
|
||||
assign Norm = ~ExpZero;
|
||||
|
||||
// determine the sub categories
|
||||
assign QNaN = FirstBitFrac&NaN;
|
||||
assign SNaN = ~FirstBitFrac&NaN;
|
||||
assign PInf = ~Sgn&Inf;
|
||||
assign NInf = Sgn&Inf;
|
||||
assign PNorm = ~Sgn&Norm;
|
||||
assign NNorm = Sgn&Norm;
|
||||
assign PDenorm = ~Sgn&Denorm;
|
||||
assign NDenorm = Sgn&Denorm;
|
||||
assign PZero = ~Sgn&Zero;
|
||||
assign NZero = Sgn&Zero;
|
||||
|
||||
// determine sub category and combine into the result
|
||||
// bit 0 - -Inf
|
||||
// bit 1 - -Norm
|
||||
// bit 2 - -Denorm
|
||||
// bit 3 - -Zero
|
||||
// bit 4 - +Zero
|
||||
// bit 5 - +Denorm
|
||||
// bit 6 - +Norm
|
||||
// bit 7 - +Inf
|
||||
// bit 8 - signaling NaN
|
||||
// bit 9 - quiet NaN
|
||||
assign ClassResE = {{54{1'b0}}, QNaN, SNaN, PInf, PNorm, PDenorm, PZero, NZero, NDenorm, NNorm, NInf};
|
||||
|
||||
endmodule
|
@ -39,7 +39,7 @@
|
||||
// if either of the input operands is a signaling NaN per 754
|
||||
|
||||
`include "wally-config.vh"
|
||||
module fpucmp1 (
|
||||
module fcmp (
|
||||
input logic [63:0] op1,
|
||||
input logic [63:0] op2,
|
||||
input logic [2:0] FOpCtrlE,
|
||||
@ -48,7 +48,7 @@ module fpucmp1 (
|
||||
|
||||
output logic Invalid, // Invalid Operation
|
||||
// output logic [1:0] FCC, // Condition Codes
|
||||
output logic [63:0] FCmpResultE);
|
||||
output logic [63:0] CmpResE);
|
||||
// Perform magnitude comparison between the 63 least signficant bits
|
||||
// of the input operands. Only LT and EQ are returned, since GT can
|
||||
// be determined from these values.
|
||||
@ -392,7 +392,7 @@ module exception_cmp_2 (
|
||||
|
||||
output logic invalid,
|
||||
output logic [1:0] fcc,
|
||||
output logic [63:0] FCmpResultE,
|
||||
output logic [63:0] CmpResE,
|
||||
|
||||
input logic Azero,
|
||||
input logic Bzero,
|
||||
@ -453,12 +453,12 @@ module exception_cmp_2 (
|
||||
|
||||
always_comb begin
|
||||
case (FOpCtrlE[2:0])
|
||||
3'b111: FCmpResultE = LT ? A : B;//min
|
||||
3'b101: FCmpResultE = GT ? A : B;//max
|
||||
3'b010: FCmpResultE = {63'b0, EQ};//equal
|
||||
3'b001: FCmpResultE = {63'b0, LT};//less than
|
||||
3'b011: FCmpResultE = {63'b0, LT|EQ};//less than or equal
|
||||
default: FCmpResultE = 64'b0;
|
||||
3'b111: CmpResE = LT ? A : B;//min
|
||||
3'b101: CmpResE = GT ? A : B;//max
|
||||
3'b010: CmpResE = {63'b0, EQ};//equal
|
||||
3'b001: CmpResE = {63'b0, LT};//less than
|
||||
3'b011: CmpResE = {63'b0, LT|EQ};//less than or equal
|
||||
default: CmpResE = 64'b0;
|
||||
endcase
|
||||
end
|
||||
|
@ -64,30 +64,38 @@ module fctrl (
|
||||
else if (Funct3D[1:0] == 2'b00) ControlsD = `FCTRLW'b0_1_100_0100_00_01_0_0; // fmv.x.w
|
||||
else if (Funct3D[1:0] == 2'b01) ControlsD = `FCTRLW'b0_1_100_0101_00_01_0_0; // fmv.x.d
|
||||
else ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
|
||||
7'b1100000: case(Rs2D[0])
|
||||
1'b0: ControlsD = `FCTRLW'b0_1_010_0110_00_00_0_0; // fcvt.s.w
|
||||
1'b1: ControlsD = `FCTRLW'b0_1_010_0101_00_00_0_0; // fcvt.s.wu
|
||||
7'b1100000: case(Rs2D[1:0])
|
||||
2'b00: ControlsD = `FCTRLW'b0_1_100_0001_00_00_0_0; // fcvt.s.w
|
||||
2'b01: ControlsD = `FCTRLW'b0_1_100_0101_00_00_0_0; // fcvt.s.wu
|
||||
2'b10: ControlsD = `FCTRLW'b0_1_100_1001_00_00_0_0; // fcvt.s.l
|
||||
2'b11: ControlsD = `FCTRLW'b0_1_100_1101_00_00_0_0; // fcvt.s.lu
|
||||
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
|
||||
endcase
|
||||
7'b1101000: case(Rs2D[0])
|
||||
1'b0: ControlsD = `FCTRLW'b1_1_010_0100_00_00_0_0; // fcvt.w.s
|
||||
1'b1: ControlsD = `FCTRLW'b1_1_010_0101_00_00_0_0; // fcvt.wu.s
|
||||
7'b1101000: case(Rs2D[1:0])
|
||||
2'b00: ControlsD = `FCTRLW'b1_1_100_0010_00_00_0_0; // fcvt.w.s
|
||||
2'b01: ControlsD = `FCTRLW'b1_1_100_0110_00_00_0_0; // fcvt.wu.s
|
||||
2'b10: ControlsD = `FCTRLW'b1_1_100_1010_00_00_0_0; // fcvt.l.s
|
||||
2'b11: ControlsD = `FCTRLW'b1_1_100_1110_00_00_0_0; // fcvt.lu.s
|
||||
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
|
||||
endcase
|
||||
7'b1111000: ControlsD = `FCTRLW'b1_0_100_0000_00_00_0_0; // fmv.w.x
|
||||
7'b0100000: ControlsD = `FCTRLW'b1_0_010_0010_00_00_0_0; // fcvt.s.d
|
||||
7'b1100001: case(Rs2D[0])
|
||||
1'b0: ControlsD = `FCTRLW'b0_1_010_1110_00_00_0_0; // fcvt.d.w
|
||||
1'b1: ControlsD = `FCTRLW'b0_1_010_1111_00_00_0_0; // fcvt.d.wu
|
||||
7'b0100000: ControlsD = `FCTRLW'b1_0_010_0000_00_00_0_0; // fcvt.s.d
|
||||
7'b1100001: case(Rs2D[1:0])
|
||||
2'b00: ControlsD = `FCTRLW'b0_1_100_0001_00_00_0_0; // fcvt.d.w
|
||||
2'b01: ControlsD = `FCTRLW'b0_1_100_0101_00_00_0_0; // fcvt.d.wu
|
||||
2'b10: ControlsD = `FCTRLW'b0_1_100_1001_00_00_0_0; // fcvt.d.l
|
||||
2'b11: ControlsD = `FCTRLW'b0_1_100_1101_00_00_0_0; // fcvt.d.lu
|
||||
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
|
||||
endcase
|
||||
7'b1101001: case(Rs2D[0])
|
||||
1'b0: ControlsD = `FCTRLW'b1_0_010_1100_00_00_0_0; // fcvt.w.d
|
||||
1'b1: ControlsD = `FCTRLW'b1_0_010_1101_00_00_0_0; // fcvt.wu.d
|
||||
7'b1101001: case(Rs2D[1:0])
|
||||
2'b00: ControlsD = `FCTRLW'b1_0_100_0010_00_00_0_0; // fcvt.w.d
|
||||
2'b01: ControlsD = `FCTRLW'b1_0_100_0110_00_00_0_0; // fcvt.wu.d
|
||||
2'b10: ControlsD = `FCTRLW'b1_0_100_1010_00_00_0_0; // fcvt.l.d
|
||||
2'b11: ControlsD = `FCTRLW'b1_0_100_1110_00_00_0_0; // fcvt.lu.d
|
||||
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
|
||||
endcase
|
||||
7'b1111001: ControlsD = `FCTRLW'b1_0_100_0001_00_00_0_0; // fmv.d.x
|
||||
7'b0100001: ControlsD = `FCTRLW'b1_0_010_1000_00_00_0_0; // fcvt.d.s
|
||||
7'b0100001: ControlsD = `FCTRLW'b1_0_100_0000_00_00_0_0; // fcvt.d.s
|
||||
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
|
||||
endcase
|
||||
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
|
||||
@ -130,17 +138,26 @@ module fctrl (
|
||||
// add/sub/cnvt
|
||||
// fadd = 0000
|
||||
// fsub = 0001
|
||||
// fcvt.w.s = 0100
|
||||
// fcvt.wu.s = 0101
|
||||
// fcvt.s.w = 0110
|
||||
// fcvt.s.wu = 0111
|
||||
// fcvt.s.d = 0010
|
||||
// fcvt.w.d = 1100
|
||||
// fcvt.wu.d = 1101
|
||||
// fcvt.d.w = 1110
|
||||
// fcvt.d.wu = 1111
|
||||
// fcvt.d.s = 1000
|
||||
// { is double and not add/sub, is to/from int, is to int or float to double, is unsigned or sub}
|
||||
// cnvt
|
||||
// fcvt.w.s = 0010
|
||||
// fcvt.wu.s = 0110
|
||||
// fcvt.s.w = 0001
|
||||
// fcvt.s.wu = 0101
|
||||
// fcvt.s.d = 0000
|
||||
// fcvt.l.s = 1010
|
||||
// fcvt.lu.s = 1110
|
||||
// fcvt.s.l = 1001
|
||||
// fcvt.s.lu = 1101
|
||||
// fcvt.w.d = 0010
|
||||
// fcvt.wu.d = 0110
|
||||
// fcvt.d.w = 0001
|
||||
// fcvt.d.wu = 0101
|
||||
// fcvt.d.s = 0000
|
||||
// fcvt.l.d = 1010
|
||||
// fcvt.lu.d = 1110
|
||||
// fcvt.d.l = 1001
|
||||
// fcvt.d.lu = 1101
|
||||
// {long, unsigned, to int, from int} Fmt controls the output for fp -> fp
|
||||
|
||||
// fmv.w.x = ???0
|
||||
// fmv.w.d = ???1
|
||||
|
@ -23,7 +23,7 @@
|
||||
//
|
||||
|
||||
// `timescale 1ps/1ps
|
||||
module fpdiv (FDivSqrtDoneE, FDivResultM, FDivFlagsM, DivDenormM, DivInput1E, DivInput2E, FrmE, DivOpType, FmtE, DivOvEn, DivUnEn,
|
||||
module fdivsqrt (FDivSqrtDoneE, FDivResultM, FDivSqrtFlgM, DivInput1E, DivInput2E, FrmE, DivOpType, FmtE, DivOvEn, DivUnEn,
|
||||
FDivStartE, reset, clk, FDivBusyE, HoldInputs);
|
||||
|
||||
input [63:0] DivInput1E; // 1st input operand (A)
|
||||
@ -39,8 +39,7 @@ module fpdiv (FDivSqrtDoneE, FDivResultM, FDivFlagsM, DivDenormM, DivInput1E, Di
|
||||
input clk;
|
||||
|
||||
output [63:0] FDivResultM; // Result of operation
|
||||
output [4:0] FDivFlagsM; // IEEE exception flags
|
||||
output DivDenormM; // DivDenormM on input or output
|
||||
output [4:0] FDivSqrtFlgM; // IEEE exception flags
|
||||
output FDivSqrtDoneE;
|
||||
output FDivBusyE, HoldInputs;
|
||||
|
||||
@ -51,6 +50,7 @@ module fpdiv (FDivSqrtDoneE, FDivResultM, FDivFlagsM, DivDenormM, DivInput1E, Di
|
||||
wire [63:0] Float2;
|
||||
wire [63:0] IntValue;
|
||||
|
||||
wire DivDenormM; // DivDenormM on input or output
|
||||
wire [12:0] exp1, exp2, expF;
|
||||
wire [12:0] exp_diff, bias;
|
||||
wire [13:0] exp_sqrt;
|
||||
@ -103,7 +103,7 @@ module fpdiv (FDivSqrtDoneE, FDivResultM, FDivFlagsM, DivDenormM, DivInput1E, Di
|
||||
convert_inputs_div divconv1 (Float1, Float2, DivInput1E, DivInput2E, DivOpType, FmtE);
|
||||
|
||||
// Test for exceptions and return the "Invalid Operation" and
|
||||
// "Denormalized" Input FDivFlagsM. The "sel_inv" is used in
|
||||
// "Denormalized" Input FDivSqrtFlgM. The "sel_inv" is used in
|
||||
// the third pipeline stage to select the result. Also, op1_Norm
|
||||
// and op2_Norm are one if DivInput1E and DivInput2E are not zero or denormalized.
|
||||
// sub is one if the effective operation is subtaction.
|
||||
@ -120,12 +120,12 @@ module fpdiv (FDivSqrtDoneE, FDivResultM, FDivFlagsM, DivDenormM, DivInput1E, Di
|
||||
// bias : DP = 2^{11-1}-1 = 1023
|
||||
assign bias = {3'h0, 10'h3FF};
|
||||
// Divide exponent
|
||||
csa #(13) csa1 (exp1, ~exp2, bias, exp_s, exp_c);
|
||||
exp_add explogic1 (exp_cout1, {open, exp_diff},
|
||||
csa #(13) csa1 (exp1, ~exp2, bias, exp_s, exp_c); //***adder
|
||||
exp_add explogic1 (exp_cout1, {open, exp_diff}, //***adder?
|
||||
{vss, exp_s}, {vss, exp_c}, 1'b1);
|
||||
// Sqrt exponent (check if exponent is odd)
|
||||
assign exp_odd = Float1[52] ? vss : vdd;
|
||||
exp_add explogic2 (exp_cout2, exp_sqrt,
|
||||
exp_add explogic2 (exp_cout2, exp_sqrt, //***adder?
|
||||
{vss, exp1}, {4'h0, 10'h3ff}, exp_odd);
|
||||
// Choose correct exponent
|
||||
assign expF = DivOpType ? exp_sqrt[13:1] : exp_diff;
|
||||
@ -156,7 +156,7 @@ module fpdiv (FDivSqrtDoneE, FDivResultM, FDivFlagsM, DivDenormM, DivInput1E, Di
|
||||
// Store the final result and the exception flags in registers.
|
||||
flopenr #(64) rega (clk, reset, FDivSqrtDoneE, Result, FDivResultM);
|
||||
flopenr #(1) regb (clk, reset, FDivSqrtDoneE, DenormIO, DivDenormM);
|
||||
flopenr #(5) regc (clk, reset, FDivSqrtDoneE, FlagsIn, FDivFlagsM);
|
||||
flopenr #(5) regc (clk, reset, FDivSqrtDoneE, FlagsIn, FDivSqrtFlgM);
|
||||
|
||||
endmodule // fpadd
|
||||
|
@ -25,7 +25,7 @@
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module fpuhazard(
|
||||
module fhazard(
|
||||
input logic [4:0] Adr1E, Adr2E, Adr3E,
|
||||
input logic FWriteEnM, FWriteEnW,
|
||||
input logic [4:0] RdM, RdW,
|
@ -16,8 +16,8 @@ module fma2(
|
||||
input logic XZeroM, YZeroM, ZZeroM, // inputs are zero
|
||||
input logic XInfM, YInfM, ZInfM, // inputs are infinity
|
||||
input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN
|
||||
output logic [63:0] FmaResultM, // FMA final result
|
||||
output logic [4:0] FmaFlagsM); // FMA flags {invalid, divide by zero, overflow, underflow, inexact}
|
||||
output logic [63:0] FMAResM, // FMA final result
|
||||
output logic [4:0] FMAFlgM); // FMA flags {invalid, divide by zero, overflow, underflow, inexact}
|
||||
|
||||
|
||||
|
||||
@ -57,7 +57,7 @@ module fma2(
|
||||
logic [12:0] MaxExp; // maximum value of the exponent
|
||||
logic [12:0] FracLen; // length of the fraction
|
||||
logic SigNaN; // is an input a signaling NaN
|
||||
logic UnderflowFlag; // Underflow singal used in FmaFlagsM (used to avoid a circular depencency)
|
||||
logic UnderflowFlag; // Underflow singal used in FMAFlgM (used to avoid a circular depencency)
|
||||
logic [63:0] XNaNResult, YNaNResult, ZNaNResult, InvalidResult, OverflowResult, KillProdResult, UnderflowResult; // possible results
|
||||
|
||||
|
||||
@ -316,7 +316,7 @@ module fma2(
|
||||
// Combine flags
|
||||
// - FMA can't set the Divide by zero flag
|
||||
// - Don't set the underflow flag if the result was rounded up to a normal number
|
||||
assign FmaFlagsM = {Invalid, 1'b0, Overflow, UnderflowFlag, Inexact};
|
||||
assign FMAFlgM = {Invalid, 1'b0, Overflow, UnderflowFlag, Inexact};
|
||||
|
||||
|
||||
|
||||
@ -337,7 +337,7 @@ module fma2(
|
||||
assign InvalidResult = FmtM ? {ResultSgn, 11'h7ff, 1'b1, 51'b0} : {ResultSgn, 8'hff, 1'b1, 54'b0};
|
||||
assign KillProdResult = FmtM ?{ResultSgn, Addend[62:0] - {62'b0, (Minus1&AddendStickyM)}} + {62'b0, (Plus1&AddendStickyM)} : {ResultSgn, Addend[62:32] - {30'b0, (Minus1&AddendStickyM)} + {30'b0, (Plus1&AddendStickyM)}, 32'b0};
|
||||
assign UnderflowResult = FmtM ? {ResultSgn, 63'b0} + {63'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))} : {{ResultSgn, 31'b0} + {31'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))}, 32'b0};
|
||||
assign FmaResultM = XNaNM ? XNaNResult :
|
||||
assign FMAResM = XNaNM ? XNaNResult :
|
||||
YNaNM ? YNaNResult :
|
||||
ZNaNM ? ZNaNResult :
|
||||
Invalid ? InvalidResult : // has to be before inf
|
||||
|
@ -229,11 +229,11 @@ module fpadd (AS_Result, Flags, Denorm, op1, op2, rm, op_type, P, OvEn, UnEn);
|
||||
assign corr_sign = ~op_type[2]&~op_type[1]&op_type[0]&swap;
|
||||
|
||||
// 64-bit Mantissa Adder/Subtractor
|
||||
cla64 add1 (sum, mantissaA3, mantissaB3, sub);
|
||||
cla64 add1 (sum, mantissaA3, mantissaB3, sub); //***adder
|
||||
|
||||
// 64-bit Mantissa Subtractor - to get the two's complement of the
|
||||
// result when the sign from the adder/subtractor is negative.
|
||||
cla_sub64 sub1 (sum_tc, mantissaB3, mantissaA3);
|
||||
cla_sub64 sub1 (sum_tc, mantissaB3, mantissaA3); //***adder
|
||||
|
||||
// Determine the correct sign of the result
|
||||
assign sign_corr = ((corr_sign ^ signA) & ~convert) ^ sum[63];
|
||||
|
@ -34,7 +34,7 @@ module fpu (
|
||||
input logic [`XLEN-1:0] SrcAM, // Integer input being written into fpreg
|
||||
input logic StallE, StallM, StallW,
|
||||
input logic FlushE, FlushM, FlushW,
|
||||
output logic FStallD, // Stall the decode stage if Div/Sqrt instruction
|
||||
output logic FStallD, // Stall the decode stage
|
||||
output logic FWriteIntE, FWriteIntM, FWriteIntW, // Write integer register enable
|
||||
output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory
|
||||
output logic [`XLEN-1:0] FIntResM,
|
||||
@ -42,48 +42,38 @@ module fpu (
|
||||
output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction
|
||||
output logic [4:0] SetFflagsM, // FPU flags
|
||||
output logic [`XLEN-1:0] FPUResultW); // FPU result
|
||||
|
||||
// *** change FMA to do 16 - 32 - 64 - 128 FEXPBITS
|
||||
// control logic signal instantiation
|
||||
logic FWriteEnD, FWriteEnE, FWriteEnM, FWriteEnW; // FP register write enable
|
||||
logic [2:0] FrmD, FrmE, FrmM, FrmW; // FP rounding mode
|
||||
logic [2:0] FrmD, FrmE, FrmM; // FP rounding mode
|
||||
logic FmtD, FmtE, FmtM, FmtW; // FP precision 0-single 1-double
|
||||
logic FDivStartD, FDivStartE; // Start division
|
||||
logic FWriteIntD; // Write to integer register
|
||||
logic FOutputInput2D, FOutputInput2E; // Put Input2 in Input1 if a store instruction
|
||||
logic [1:0] FMemRWD; // Read and write enable for memory
|
||||
logic [1:0] ForwardXD, ForwardXE; // Input1 forwarding mux control signal
|
||||
logic [1:0] ForwardYD, ForwardYE; // Input2 forwarding mux control signal
|
||||
logic [1:0] ForwardZD, ForwardZE; // Input3 forwarding mux control signal
|
||||
logic SrcYUsedD; // Is input 2 used
|
||||
logic SrcZUsedD; // Is input 3 used
|
||||
logic [1:0] ForwardXE, ForwardYE, ForwardZE; // Input3 forwarding mux control signal
|
||||
logic [2:0] FResultSelD, FResultSelE, FResultSelM, FResultSelW; // Select FP result
|
||||
logic [3:0] FOpCtrlD, FOpCtrlE, FOpCtrlM, FOpCtrlW; // Select which opperation to do in each component
|
||||
logic [1:0] FResSelD, FResSelE, FResSelM;
|
||||
logic [1:0] FIntResSelD, FIntResSelE, FIntResSelM;
|
||||
logic [3:0] FOpCtrlD, FOpCtrlE, FOpCtrlM; // Select which opperation to do in each component
|
||||
logic [1:0] FResSelD, FResSelE, FResSelM;
|
||||
logic [1:0] FIntResSelD, FIntResSelE, FIntResSelM;
|
||||
logic [4:0] Adr1E, Adr2E, Adr3E;
|
||||
|
||||
// regfile signals
|
||||
logic [4:0] RdE, RdM, RdW; // what adress to write to // ***Can take from ieu insted of pipelining
|
||||
logic [63:0] FWDM; // Write data for FP register
|
||||
logic [63:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage
|
||||
logic [63:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage
|
||||
logic [63:0] SrcXE, SrcXM, SrcXW; // Input 1 to the various units (after forwarding)
|
||||
logic [`XLEN-1:0] SrcXMAligned;
|
||||
logic [63:0] SrcYE, SrcYM, SrcYW; // Input 2 to the various units (after forwarding)
|
||||
logic [63:0] SrcXE, SrcXM; // Input 1 to the various units (after forwarding)
|
||||
logic [63:0] SrcYE, SrcYM; // Input 2 to the various units (after forwarding)
|
||||
logic [63:0] SrcZE, SrcZM; // Input 3 to the various units (after forwarding)
|
||||
logic [63:0] FLoadResultW, FLoadStoreResultM, FLoadStoreResultW; // Result for load, store, and move to int-reg instructions
|
||||
|
||||
// div/sqrt signals
|
||||
logic DivDenormE, DivDenormM, DivDenormW;
|
||||
logic DivOvEn, DivUnEn;
|
||||
logic [63:0] FDivResultE, FDivResultM, FDivResultW;
|
||||
logic [4:0] FDivFlagsE, FDivFlagsM, FDivFlagsW;
|
||||
logic FDivSqrtDoneE, FDivSqrtDoneM;
|
||||
logic [63:0] FDivResultM, FDivResultW;
|
||||
logic [4:0] FDivSqrtFlgM, FDivSqrtFlgW;
|
||||
logic FDivSqrtDoneE;
|
||||
logic [63:0] DivInput1E, DivInput2E;
|
||||
logic HoldInputs; // keep forwarded inputs arround durring division
|
||||
|
||||
// FMA signals
|
||||
logic [105:0] ProdManE, ProdManM;
|
||||
logic [105:0] ProdManE, ProdManM; ///*** put pipline stages in units
|
||||
logic [161:0] AlignedAddendE, AlignedAddendM;
|
||||
logic [12:0] ProdExpE, ProdExpM;
|
||||
logic AddendStickyE, AddendStickyM;
|
||||
@ -91,93 +81,112 @@ module fpu (
|
||||
logic XZeroE, YZeroE, ZZeroE, XZeroM, YZeroM, ZZeroM;
|
||||
logic XInfE, YInfE, ZInfE, XInfM, YInfM, ZInfM;
|
||||
logic XNaNE, YNaNE, ZNaNE, XNaNM, YNaNM, ZNaNM;
|
||||
logic [63:0] FmaResultM, FmaResultW;
|
||||
logic [4:0] FmaFlagsM, FmaFlagsW;
|
||||
logic [63:0] FMAResM, FMAResW;
|
||||
logic [4:0] FMAFlgM, FMAFlgW;
|
||||
|
||||
// add/cvt signals
|
||||
logic [63:0] AddSumE, AddSumTcE;
|
||||
logic [3:0] AddSelInvE;
|
||||
logic [10:0] AddExpPostSumE;
|
||||
logic AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE;
|
||||
logic AddDenormInE, AddSwapE, AddNormOvflowE, AddSignAE;
|
||||
logic AddConvertE;
|
||||
logic [63:0] AddFloat1E, AddFloat2E;
|
||||
logic [11:0] AddExp1DenormE, AddExp2DenormE;
|
||||
logic [10:0] AddExponentE;
|
||||
logic [2:0] AddRmE;
|
||||
logic [3:0] AddOpTypeE;
|
||||
logic AddPE, AddOvEnE, AddUnEnE;
|
||||
logic AddDenormM;
|
||||
logic [63:0] AddSumM, AddSumTcM;
|
||||
logic [3:0] AddSelInvM;
|
||||
logic [10:0] AddExpPostSumM;
|
||||
logic AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM;
|
||||
logic AddDenormInM, AddSwapM, AddNormOvflowM, AddSignAM;
|
||||
logic AddConvertM, AddSignM;
|
||||
logic [63:0] AddFloat1M, AddFloat2M;
|
||||
logic [11:0] AddExp1DenormM, AddExp2DenormM;
|
||||
logic [10:0] AddExponentM;
|
||||
logic [63:0] AddOp1M, AddOp2M;
|
||||
logic [2:0] AddRmM;
|
||||
logic [3:0] AddOpTypeM;
|
||||
logic AddPM, AddOvEnM, AddUnEnM;
|
||||
logic [63:0] FAddResultM, FAddResultW;
|
||||
logic [4:0] FAddFlagsM, FAddFlagsW;
|
||||
logic [63:0] AddSumE, AddSumM;
|
||||
logic [63:0] AddSumTcE, AddSumTcM;
|
||||
logic [3:0] AddSelInvE, AddSelInvM;
|
||||
logic [10:0] AddExpPostSumE,AddExpPostSumM;
|
||||
logic AddCorrSignE, AddCorrSignM;
|
||||
logic AddOp1NormE, AddOp1NormM;
|
||||
logic AddOp2NormE, AddOp2NormM;
|
||||
logic AddOpANormE, AddOpANormM;
|
||||
logic AddOpBNormE, AddOpBNormM;
|
||||
logic AddInvalidE, AddInvalidM;
|
||||
logic AddDenormInE, AddDenormInM;
|
||||
logic AddSwapE, AddSwapM;
|
||||
logic AddNormOvflowE, AddNormOvflowM; //***this isn't used in addcvt2
|
||||
logic AddSignAE, AddSignAM;
|
||||
logic AddConvertE, AddConvertM;
|
||||
logic [63:0] AddFloat1E, AddFloat2E, AddFloat1M, AddFloat2M;
|
||||
logic [11:0] AddExp1DenormE, AddExp2DenormE, AddExp1DenormM, AddExp2DenormM;
|
||||
logic [10:0] AddExponentE, AddExponentM;
|
||||
logic [63:0] FAddResM, FAddResW;
|
||||
logic [4:0] FAddFlgM, FAddFlgW;
|
||||
|
||||
// cmp signals
|
||||
logic CmpInvalidE, CmpInvalidM, CmpInvalidW;
|
||||
logic [63:0] FCmpResultE, FCmpResultM, FCmpResultW;
|
||||
logic CmpNVE, CmpNVM, CmpNVW;
|
||||
logic [63:0] CmpResE, CmpResM, CmpResW;
|
||||
|
||||
// fsgn signals
|
||||
logic [63:0] SgnResultE, SgnResultM, SgnResultW;
|
||||
logic [4:0] SgnFlagsE, SgnFlagsM, SgnFlagsW;
|
||||
logic [63:0] SgnResE, SgnResM;
|
||||
logic SgnNVE, SgnNVM, SgnNVW;
|
||||
logic [63:0] FResM, FResW;
|
||||
logic FFlgM, FFlgW;
|
||||
logic FFlgM, FFlgW;
|
||||
|
||||
// instantiation of W stage regfile signals
|
||||
logic [63:0] AlignedSrcAM, ForwardSrcAM, SrcAW;
|
||||
logic [63:0] AlignedSrcAM;
|
||||
|
||||
// classify signals
|
||||
logic [63:0] ClassResultE, ClassResultM, ClassResultW;
|
||||
logic [63:0] ClassResE, ClassResM;
|
||||
|
||||
// 64-bit FPU result
|
||||
logic [63:0] FPUResult64W, FPUResult64E;
|
||||
logic [63:0] FPUResult64W;
|
||||
logic [4:0] FPUFlagsW;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
//DECODE STAGE
|
||||
|
||||
|
||||
// top-level controller for FPU
|
||||
fctrl ctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .*);
|
||||
fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]),
|
||||
.FRM_REGW, .IllegalFPUInstrD, .FWriteEnD, .FDivStartD, .FResultSelD, .FOpCtrlD, .FResSelD,
|
||||
.FIntResSelD, .FmtD, .FrmD, .FWriteIntD);
|
||||
|
||||
// regfile instantiation
|
||||
FPregfile fpregfile (clk, reset, FWriteEnW,
|
||||
fregfile fregfile (clk, reset, FWriteEnW,
|
||||
InstrD[19:15], InstrD[24:20], InstrD[31:27], RdW,
|
||||
FPUResult64W,
|
||||
FRD1D, FRD2D, FRD3D);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
//*****************
|
||||
// fpregfile D/E pipe registers
|
||||
// D/E pipe registers
|
||||
//*****************
|
||||
flopenrc #(64) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E);
|
||||
flopenrc #(64) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E);
|
||||
flopenrc #(64) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E);
|
||||
|
||||
//*****************
|
||||
// other D/E pipe registers
|
||||
//*****************
|
||||
flopenrc #(1) CtrlRegE1(clk, reset, FlushE, ~StallE, FDivStartD, FDivStartE);
|
||||
flopenrc #(15) CtrlRegE2(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]},
|
||||
flopenrc #(1) DECtrlRegE1(clk, reset, FlushE, ~StallE, FDivStartD, FDivStartE);
|
||||
flopenrc #(15) DECtrlRegE2(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]},
|
||||
{Adr1E, Adr2E, Adr3E});
|
||||
flopenrc #(22) DECtrlReg(clk, reset, FlushE, ~StallE,
|
||||
flopenrc #(22) DECtrlReg3(clk, reset, FlushE, ~StallE,
|
||||
{FWriteEnD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, InstrD[11:7], FOpCtrlD, FWriteIntD},
|
||||
{FWriteEnE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, RdE, FOpCtrlE, FWriteIntE});
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
//EXECUTION STAGE
|
||||
|
||||
// Hazard unit for FPU
|
||||
fpuhazard hazard(.*);
|
||||
fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FWriteEnM, .FWriteEnW, .RdM, .RdW, .FResultSelM, .FStallD,
|
||||
.ForwardXE, .ForwardYE, .ForwardZE);
|
||||
|
||||
// forwarding muxs
|
||||
mux3 #(64) fxemux(FRD1E, FPUResult64W, FResM, ForwardXE, SrcXE);
|
||||
@ -186,7 +195,9 @@ module fpu (
|
||||
|
||||
|
||||
// first of two-stage instance of floating-point fused multiply-add unit
|
||||
fma1 fma1 (.X(SrcXE), .Y(SrcYE), .Z(SrcZE), .FOpCtrlE(FOpCtrlE[2:0]),.*);
|
||||
fma1 fma1 (.X(SrcXE), .Y(SrcYE), .Z(SrcZE), .FOpCtrlE(FOpCtrlE[2:0]), .FmtE, .ProdManE, .AlignedAddendE,
|
||||
.ProdExpE, .AddendStickyE, .KillProdE, .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE,
|
||||
.XNaNE, .YNaNE, .ZNaNE );
|
||||
|
||||
// first and only instance of floating-point divider
|
||||
logic fpdivClk;
|
||||
@ -204,174 +215,140 @@ module fpu (
|
||||
.en(~HoldInputs), .clear(FDivSqrtDoneE),
|
||||
.reset(reset), .clk(clk));
|
||||
|
||||
fpdiv fpdivsqrt (.DivOpType(FOpCtrlE[0]), .clk(fpdivClk), .FmtE(~FmtE), .*);
|
||||
fdivsqrt fdivsqrt (.DivOpType(FOpCtrlE[0]), .clk(fpdivClk), .FmtE(~FmtE), .DivInput1E, .DivInput2E,
|
||||
.FrmE, .DivOvEn(1'b1), .DivUnEn(1'b1), .FDivStartE, .FDivResultM, .FDivSqrtFlgM,
|
||||
.FDivSqrtDoneE, .FDivBusyE, .HoldInputs, .reset);
|
||||
|
||||
|
||||
|
||||
// first of two-stage instance of floating-point add/cvt unit
|
||||
fpuaddcvt1 fpadd1 (.*);
|
||||
fpuaddcvt1 fpadd1 (.SrcXE, .SrcYE, .FOpCtrlE, .FmtE, .AddFloat1E, .AddFloat2E, .AddExponentE,
|
||||
.AddExpPostSumE, .AddExp1DenormE, .AddExp2DenormE, .AddSumE, .AddSumTcE, .AddSelInvE,
|
||||
.AddCorrSignE, .AddSignAE, .AddOp1NormE, .AddOp2NormE, .AddOpANormE, .AddOpBNormE, .AddInvalidE,
|
||||
.AddDenormInE, .AddConvertE, .AddSwapE, .AddNormOvflowE);
|
||||
|
||||
// first of two-stage instance of floating-point comparator
|
||||
fpucmp1 fpcmp1 (SrcXE, SrcYE, FOpCtrlE[2:0], FmtE, CmpInvalidE, FCmpResultE);
|
||||
// first and only instance of floating-point comparator
|
||||
fcmp fcmp (SrcXE, SrcYE, FOpCtrlE[2:0], FmtE, CmpNVE, CmpResE);
|
||||
|
||||
// first and only instance of floating-point sign converter
|
||||
fpusgn fpsgn (.SgnOpCodeE(FOpCtrlE[1:0]),.*);
|
||||
fsgn fsgn (.SgnOpCodeE(FOpCtrlE[1:0]), .SrcXE, .SrcYE, .SgnResE, .SgnNVE);
|
||||
|
||||
// first and only instance of floating-point classify unit
|
||||
fpuclassify fpuclass (.*);
|
||||
fclassify fclassify (.SrcXE, .FmtE, .ClassResE);
|
||||
|
||||
// output for store instructions
|
||||
assign FWriteDataE = FmtE ? SrcYE[63:64-`XLEN] : {{`XLEN-32{1'b0}}, SrcYE[63:32]};
|
||||
|
||||
//***swap to mux
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
//*****************
|
||||
//fpregfile D/E pipe registers
|
||||
// E/M pipe registers
|
||||
//*****************
|
||||
flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, SrcXE, SrcXM);
|
||||
flopenrc #(64) EMFpReg2(clk, reset, FlushM, ~StallM, SrcYE, SrcYM);
|
||||
flopenrc #(64) EMFpReg3(clk, reset, FlushM, ~StallM, SrcZE, SrcZM);
|
||||
|
||||
//*****************
|
||||
// fma E/M pipe registers
|
||||
//*****************
|
||||
flopenrc #(106) EMRegFma3(clk, reset, FlushM, ~StallM, ProdManE, ProdManM);
|
||||
flopenrc #(162) EMRegFma4(clk, reset, FlushM, ~StallM, AlignedAddendE, AlignedAddendM);
|
||||
flopenrc #(13) EMRegFma6(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM);
|
||||
flopenrc #(1) EMRegFma7(clk, reset, FlushM, ~StallM, AddendStickyE, AddendStickyM);
|
||||
flopenrc #(1) EMRegFma8(clk, reset, FlushM, ~StallM, KillProdE, KillProdM);
|
||||
flopenrc #(1) EMRegFma10(clk, reset, FlushM, ~StallM, XZeroE, XZeroM);
|
||||
flopenrc #(1) EMRegFma11(clk, reset, FlushM, ~StallM, YZeroE, YZeroM);
|
||||
flopenrc #(1) EMRegFma12(clk, reset, FlushM, ~StallM, ZZeroE, ZZeroM);
|
||||
flopenrc #(1) EMRegFma16(clk, reset, FlushM, ~StallM, XInfE, XInfM);
|
||||
flopenrc #(1) EMRegFma17(clk, reset, FlushM, ~StallM, YInfE, YInfM);
|
||||
flopenrc #(1) EMRegFma18(clk, reset, FlushM, ~StallM, ZInfE, ZInfM);
|
||||
flopenrc #(1) EMRegFma19(clk, reset, FlushM, ~StallM, XNaNE, XNaNM);
|
||||
flopenrc #(1) EMRegFma20(clk, reset, FlushM, ~StallM, YNaNE, YNaNM);
|
||||
flopenrc #(1) EMRegFma21(clk, reset, FlushM, ~StallM, ZNaNE, ZNaNM);
|
||||
flopenrc #(106) EMRegFma1(clk, reset, FlushM, ~StallM, ProdManE, ProdManM);
|
||||
flopenrc #(162) EMRegFma2(clk, reset, FlushM, ~StallM, AlignedAddendE, AlignedAddendM);
|
||||
flopenrc #(13) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM);
|
||||
flopenrc #(11) EMRegFma4(clk, reset, FlushM, ~StallM,
|
||||
{AddendStickyE, KillProdE, XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE},
|
||||
{AddendStickyM, KillProdM, XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM});
|
||||
|
||||
//*****************
|
||||
// fpadd E/M pipe registers
|
||||
//*****************
|
||||
flopenrc #(64) EMRegAdd1(clk, reset, FlushM, ~StallM, AddSumE, AddSumM);
|
||||
flopenrc #(64) EMRegAdd2(clk, reset, FlushM, ~StallM, AddSumTcE, AddSumTcM);
|
||||
flopenrc #(4) EMRegAdd3(clk, reset, FlushM, ~StallM, AddSelInvE, AddSelInvM);
|
||||
flopenrc #(11) EMRegAdd4(clk, reset, FlushM, ~StallM, AddExpPostSumE, AddExpPostSumM);
|
||||
flopenrc #(1) EMRegAdd5(clk, reset, FlushM, ~StallM, AddCorrSignE, AddCorrSignM);
|
||||
flopenrc #(1) EMRegAdd6(clk, reset, FlushM, ~StallM, AddOp1NormE, AddOp1NormM);
|
||||
flopenrc #(1) EMRegAdd7(clk, reset, FlushM, ~StallM, AddOp2NormE, AddOp2NormM);
|
||||
flopenrc #(1) EMRegAdd8(clk, reset, FlushM, ~StallM, AddOpANormE, AddOpANormM);
|
||||
flopenrc #(1) EMRegAdd9(clk, reset, FlushM, ~StallM, AddOpBNormE, AddOpBNormM);
|
||||
flopenrc #(1) EMRegAdd10(clk, reset, FlushM, ~StallM, AddInvalidE, AddInvalidM);
|
||||
flopenrc #(1) EMRegAdd11(clk, reset, FlushM, ~StallM, AddDenormInE, AddDenormInM);
|
||||
flopenrc #(1) EMRegAdd12(clk, reset, FlushM, ~StallM, AddConvertE, AddConvertM);
|
||||
flopenrc #(1) EMRegAdd13(clk, reset, FlushM, ~StallM, AddSwapE, AddSwapM);
|
||||
flopenrc #(1) EMRegAdd14(clk, reset, FlushM, ~StallM, AddNormOvflowE, AddNormOvflowM);
|
||||
flopenrc #(1) EMRegAdd15(clk, reset, FlushM, ~StallM, AddSignAE, AddSignAM);
|
||||
flopenrc #(64) EMRegAdd16(clk, reset, FlushM, ~StallM, AddFloat1E, AddFloat1M);
|
||||
flopenrc #(64) EMRegAdd17(clk, reset, FlushM, ~StallM, AddFloat2E, AddFloat2M);
|
||||
flopenrc #(12) EMRegAdd18(clk, reset, FlushM, ~StallM, AddExp1DenormE, AddExp1DenormM);
|
||||
flopenrc #(12) EMRegAdd19(clk, reset, FlushM, ~StallM, AddExp2DenormE, AddExp2DenormM);
|
||||
flopenrc #(11) EMRegAdd20(clk, reset, FlushM, ~StallM, AddExponentE, AddExponentM);
|
||||
flopenrc #(3) EMRegAdd23(clk, reset, FlushM, ~StallM, AddRmE, AddRmM);
|
||||
flopenrc #(4) EMRegAdd24(clk, reset, FlushM, ~StallM, AddOpTypeE, AddOpTypeM);
|
||||
flopenrc #(1) EMRegAdd25(clk, reset, FlushM, ~StallM, AddPE, AddPM);
|
||||
flopenrc #(1) EMRegAdd26(clk, reset, FlushM, ~StallM, AddOvEnE, AddOvEnM);
|
||||
flopenrc #(1) EMRegAdd27(clk, reset, FlushM, ~StallM, AddUnEnE, AddUnEnM);
|
||||
flopenrc #(11) EMRegAdd3(clk, reset, FlushM, ~StallM, AddExpPostSumE, AddExpPostSumM);
|
||||
flopenrc #(64) EMRegAdd4(clk, reset, FlushM, ~StallM, AddFloat1E, AddFloat1M);
|
||||
flopenrc #(64) EMRegAdd5(clk, reset, FlushM, ~StallM, AddFloat2E, AddFloat2M);
|
||||
flopenrc #(12) EMRegAdd6(clk, reset, FlushM, ~StallM, AddExp1DenormE, AddExp1DenormM);
|
||||
flopenrc #(12) EMRegAdd7(clk, reset, FlushM, ~StallM, AddExp2DenormE, AddExp2DenormM);
|
||||
flopenrc #(11) EMRegAdd8(clk, reset, FlushM, ~StallM, AddExponentE, AddExponentM);
|
||||
flopenrc #(15) EMRegAdd9(clk, reset, FlushM, ~StallM,
|
||||
{AddSelInvE, AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE, AddDenormInE, AddConvertE, AddSwapE, AddNormOvflowE, AddSignAE},
|
||||
{AddSelInvM, AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM, AddDenormInM, AddConvertM, AddSwapM, AddNormOvflowM, AddSignAM});
|
||||
|
||||
flopenrc #(1) EMRegCmp1(clk, reset, FlushM, ~StallM, CmpNVE, CmpNVM);
|
||||
flopenrc #(64) EMRegCmp2(clk, reset, FlushM, ~StallM, CmpResE, CmpResM);
|
||||
|
||||
//*****************
|
||||
// fpcmp E/M pipe registers
|
||||
//*****************
|
||||
flopenrc #(1) EMRegCmp1(clk, reset, FlushM, ~StallM, CmpInvalidE, CmpInvalidM);
|
||||
flopenrc #(64) EMRegCmp3(clk, reset, FlushM, ~StallM, FCmpResultE, FCmpResultM);
|
||||
flopenrc #(64) EMRegSgn1(clk, reset, FlushM, ~StallM, SgnResE, SgnResM);
|
||||
flopenrc #(1) EMRegSgn2(clk, reset, FlushM, ~StallM, SgnNVE, SgnNVM);
|
||||
|
||||
//*****************
|
||||
// fpsgn E/M pipe registers
|
||||
//*****************
|
||||
flopenrc #(64) EMRegSgn2(clk, reset, FlushM, ~StallM, SgnResultE, SgnResultM);
|
||||
flopenrc #(5) EMRegSgn3(clk, reset, FlushM, ~StallM, SgnFlagsE, SgnFlagsM);
|
||||
|
||||
//*****************
|
||||
// other E/M pipe registers
|
||||
//*****************
|
||||
flopenrc #(22) EMCtrlReg(clk, reset, FlushM, ~StallM,
|
||||
{FWriteEnE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, RdE, FOpCtrlE, FWriteIntE},
|
||||
{FWriteEnM, FResultSelM, FResSelM, FIntResSelM, FrmM, FmtM, RdM, FOpCtrlM, FWriteIntM});
|
||||
|
||||
flopenrc #(64) EMRegClass(clk, reset, FlushM, ~StallM, ClassResE, ClassResM);
|
||||
|
||||
//*****************
|
||||
// fpuclassify E/M pipe registers
|
||||
//*****************
|
||||
flopenrc #(64) EMRegClass(clk, reset, FlushM, ~StallM, ClassResultE, ClassResultM);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
//BEGIN MEMORY STAGE
|
||||
|
||||
mux3 #(64) FResMux(AlignedSrcAM, SgnResultM, FCmpResultM, FResSelM, FResM);
|
||||
assign FFlgM = CmpInvalidM & FResSelM[1];
|
||||
mux3 #(64) FResMux(AlignedSrcAM, SgnResM, CmpResM, FResSelM, FResM);
|
||||
mux3 #(1) FFlgMux(1'b0, SgnNVM, CmpNVM, FResSelM, FFlgM);
|
||||
|
||||
//***change to mux
|
||||
assign SrcXMAligned = FmtM ? SrcXM[63:64-`XLEN] : {{`XLEN-32{1'b0}}, SrcXM[63:32]};
|
||||
mux3 #(`XLEN) IntResMux(FCmpResultM[`XLEN-1:0], SrcXMAligned, ClassResultM[`XLEN-1:0], FIntResSelM, FIntResM);
|
||||
mux3 #(`XLEN) IntResMux(CmpResM[`XLEN-1:0], SrcXMAligned, ClassResM[`XLEN-1:0], FIntResSelM, FIntResM);
|
||||
|
||||
// second instance of two-stage FMA unit
|
||||
fma2 fma2(.X(SrcXM), .Y(SrcYM), .Z(SrcZM), .FOpCtrlM(FOpCtrlM[2:0]), .*);
|
||||
fma2 fma2(.X(SrcXM), .Y(SrcYM), .Z(SrcZM), .FOpCtrlM(FOpCtrlM[2:0]), .FrmM, .FmtM,
|
||||
.ProdManM, .AlignedAddendM, .ProdExpM, .AddendStickyM, .KillProdM,
|
||||
.XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .ZInfM, .XNaNM, .YNaNM, .ZNaNM,
|
||||
.FMAResM, .FMAFlgM);
|
||||
|
||||
// second instance of two-stage floating-point add/cvt unit
|
||||
fpuaddcvt2 fpadd2 (.*);
|
||||
fpuaddcvt2 fpadd2 (.FrmM, .FOpCtrlM, .FmtM, .AddSumM, .AddSumTcM, .AddFloat1M, .AddFloat2M,
|
||||
.AddExp1DenormM, .AddExp2DenormM, .AddExponentM, .AddExpPostSumM, .AddSelInvM,
|
||||
.AddOp1NormM, .AddOp2NormM, .AddOpANormM, .AddOpBNormM, .AddInvalidM, .AddDenormInM,
|
||||
.AddSignAM, .AddCorrSignM, .AddConvertM, .AddSwapM, .FAddResM, .FAddFlgM);
|
||||
|
||||
// Align SrcA to MSB when single precicion
|
||||
mux2 #(64) SrcAMux({SrcAM[31:0], 32'b0}, {{64-`XLEN{1'b0}}, SrcAM}, FmtM, AlignedSrcAM);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
//*****************
|
||||
//fpregfile M/W pipe registers
|
||||
// M/W pipe registers
|
||||
//*****************
|
||||
flopenrc #(64) MWFpReg1(clk, reset, FlushW, ~StallW, SrcXM, SrcXW);
|
||||
flopenrc #(64) MWFpReg2(clk, reset, FlushW, ~StallW, SrcYM, SrcYW);
|
||||
flopenrc #(64) MWRegFma1(clk, reset, FlushW, ~StallW, FMAResM, FMAResW);
|
||||
flopenrc #(5) MWRegFma2(clk, reset, FlushW, ~StallW, FMAFlgM, FMAFlgW);
|
||||
|
||||
//*****************
|
||||
// fma M/W pipe registers
|
||||
//*****************
|
||||
flopenrc #(64) MWRegFma1(clk, reset, FlushW, ~StallW, FmaResultM, FmaResultW);
|
||||
flopenrc #(5) MWRegFma2(clk, reset, FlushW, ~StallW, FmaFlagsM, FmaFlagsW);
|
||||
|
||||
//*****************
|
||||
// fpdiv M/W pipe registers
|
||||
//*****************
|
||||
flopenrc #(64) MWRegDiv1(clk, reset, FlushW, ~StallW, FDivResultM, FDivResultW);
|
||||
flopenrc #(5) MWRegDiv2(clk, reset, FlushW, ~StallW, FDivFlagsM, FDivFlagsW);
|
||||
flopenrc #(1) MWRegDiv3(clk, reset, FlushW, ~StallW, DivDenormM, DivDenormW);
|
||||
flopenrc #(5) MWRegDiv2(clk, reset, FlushW, ~StallW, FDivSqrtFlgM, FDivSqrtFlgW);
|
||||
|
||||
//*****************
|
||||
// fpadd M/W pipe registers
|
||||
//*****************
|
||||
flopenrc #(64) MWRegAdd1(clk, reset, FlushW, ~StallW, FAddResultM, FAddResultW);
|
||||
flopenrc #(5) MWRegAdd2(clk, reset, FlushW, ~StallW, FAddFlagsM, FAddFlagsW);
|
||||
flopenrc #(64) MWRegAdd1(clk, reset, FlushW, ~StallW, FAddResM, FAddResW);
|
||||
flopenrc #(5) MWRegAdd2(clk, reset, FlushW, ~StallW, FAddFlgM, FAddFlgW);
|
||||
|
||||
//*****************
|
||||
// fpcmp M/W pipe registers
|
||||
//*****************
|
||||
flopenrc #(1) MWRegCmp1(clk, reset, FlushW, ~StallW, CmpInvalidM, CmpInvalidW);
|
||||
// flopenrc #(2) MWRegCmp2(clk, reset, FlushW, ~StallW, CmpFCCM, CmpFCCW);
|
||||
flopenrc #(64) MWRegCmp3(clk, reset, FlushW, ~StallW, FCmpResultM, FCmpResultW);
|
||||
flopenrc #(1) MWRegCmp1(clk, reset, FlushW, ~StallW, CmpNVM, CmpNVW);
|
||||
flopenrc #(64) MWRegCmp3(clk, reset, FlushW, ~StallW, CmpResM, CmpResW);
|
||||
|
||||
flopenrc #(64) MWRegClass2(clk, reset, FlushW, ~StallW, FResM, FResW);
|
||||
flopenrc #(1) MWRegClass1(clk, reset, FlushW, ~StallW, FFlgM, FFlgW);
|
||||
|
||||
//*****************
|
||||
// fpsgn M/W pipe registers
|
||||
//*****************
|
||||
flopenrc #(64) MWRegSgn1(clk, reset, FlushW, ~StallW, SgnResultM, SgnResultW);
|
||||
flopenrc #(5) MWRegSgn2(clk, reset, FlushW, ~StallW, SgnFlagsM, SgnFlagsW);
|
||||
|
||||
//*****************
|
||||
// other M/W pipe registers
|
||||
//*****************
|
||||
flopenrc #(11) MWCtrlReg(clk, reset, FlushW, ~StallW,
|
||||
{FWriteEnM, FResultSelM, RdM, FmtM, FWriteIntM},
|
||||
{FWriteEnW, FResultSelW, RdW, FmtW, FWriteIntW});
|
||||
|
||||
//*****************
|
||||
// fpuclassify M/W pipe registers
|
||||
//*****************
|
||||
flopenrc #(64) MWRegClass(clk, reset, FlushW, ~StallW, ClassResultM, ClassResultW);
|
||||
flopenrc #(64) MWRegClass2(clk, reset, FlushW, ~StallW, FResM, FResW);
|
||||
flopenrc #(1) MWRegClass1(clk, reset, FlushW, ~StallW, FFlgM, FFlgW);
|
||||
|
||||
|
||||
|
||||
@ -385,13 +362,13 @@ module fpu (
|
||||
|
||||
|
||||
|
||||
|
||||
//***turn into muxs
|
||||
always_comb begin
|
||||
case (FResultSelW)
|
||||
3'b000 : FPUFlagsW = 5'b0;
|
||||
3'b001 : FPUFlagsW = FmaFlagsW;
|
||||
3'b010 : FPUFlagsW = FAddFlagsW;
|
||||
3'b011 : FPUFlagsW = FDivFlagsW;
|
||||
3'b001 : FPUFlagsW = FMAFlgW;
|
||||
3'b010 : FPUFlagsW = FAddFlgW;
|
||||
3'b011 : FPUFlagsW = FDivSqrtFlgW;
|
||||
3'b100 : FPUFlagsW = {4'b0,FFlgW};
|
||||
default : FPUFlagsW = 5'bxxxxx;
|
||||
endcase
|
||||
@ -400,8 +377,8 @@ module fpu (
|
||||
always_comb begin
|
||||
case (FResultSelW)
|
||||
3'b000 : FPUResult64W = FmtW ? {ReadDataW, {64-`XLEN{1'b0}}} : {ReadDataW[31:0], 32'b0};
|
||||
3'b001 : FPUResult64W = FmaResultW;
|
||||
3'b010 : FPUResult64W = FAddResultW;
|
||||
3'b001 : FPUResult64W = FMAResW;
|
||||
3'b010 : FPUResult64W = FAddResW;
|
||||
3'b011 : FPUResult64W = FDivResultW;
|
||||
3'b100 : FPUResult64W = FResW;
|
||||
default : FPUResult64W = 64'bxxxxx;
|
||||
@ -415,7 +392,9 @@ module fpu (
|
||||
// define offsets for LSB zero extension or truncation
|
||||
always_comb begin
|
||||
// zero extension
|
||||
//***turn into mux
|
||||
FPUResultW = FmtW ? FPUResult64W[63:64-`XLEN] : {{`XLEN-32{1'b0}}, FPUResult64W[63:32]};
|
||||
//*** put into mem stage
|
||||
SetFflagsM = FPUFlagsW;
|
||||
end
|
||||
|
||||
|
@ -183,11 +183,11 @@ module fpuaddcvt1 (AddSumE, AddSumTcE, AddSelInvE, AddExpPostSumE, AddCorrSignE,
|
||||
assign AddCorrSignE = ~FOpCtrlE[2]&~FOpCtrlE[1]&FOpCtrlE[0]&AddSwapE;
|
||||
|
||||
// 64-bit Mantissa Adder/Subtractor
|
||||
cla64 add1 (AddSumE, mantissaA3, mantissaB3, sub);
|
||||
cla64 add1 (AddSumE, mantissaA3, mantissaB3, sub); //***adder
|
||||
|
||||
// 64-bit Mantissa Subtractor - to get the two's complement of the
|
||||
// result when the sign from the adder/subtractor is negative.
|
||||
cla_sub64 sub1 (AddSumTcE, mantissaB3, mantissaA3);
|
||||
cla_sub64 sub1 (AddSumTcE, mantissaB3, mantissaA3); //***adder
|
||||
|
||||
// Finds normal underflow result to determine whether to round final exponent down
|
||||
//***KEP used to be (AddSumE == 16'h0) I am unsure what it's supposed to be
|
||||
|
@ -27,7 +27,7 @@
|
||||
//
|
||||
|
||||
|
||||
module fpuaddcvt2 (FAddResultM, FAddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddSelInvM, AddExpPostSumM, AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM, AddDenormInM, AddConvertM, AddSwapM, AddSignAM, AddFloat1M, AddFloat2M, AddExp1DenormM, AddExp2DenormM, AddExponentM, FrmM, FOpCtrlM, FmtM);
|
||||
module fpuaddcvt2 (FAddResM, FAddFlgM, AddSumM, AddSumTcM, AddSelInvM, AddExpPostSumM, AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM, AddDenormInM, AddConvertM, AddSwapM, AddSignAM, AddFloat1M, AddFloat2M, AddExp1DenormM, AddExp2DenormM, AddExponentM, FrmM, FOpCtrlM, FmtM);
|
||||
|
||||
input [2:0] FrmM; // Rounding mode - specify values
|
||||
input [3:0] FOpCtrlM; // Function opcode
|
||||
@ -51,9 +51,9 @@ module fpuaddcvt2 (FAddResultM, FAddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddS
|
||||
input AddSwapM;
|
||||
// input AddNormOvflowM;
|
||||
|
||||
output [63:0] FAddResultM; // Result of operation
|
||||
output [4:0] FAddFlagsM; // IEEE exception flags
|
||||
output AddDenormM; // AddDenormM on input or output
|
||||
output [63:0] FAddResM; // Result of operation
|
||||
output [4:0] FAddFlgM; // IEEE exception flags
|
||||
wire AddDenormM; // AddDenormM on input or output
|
||||
|
||||
wire P;
|
||||
assign P = ~FmtM | FOpCtrlM[2];
|
||||
@ -145,7 +145,7 @@ module fpuaddcvt2 (FAddResultM, FAddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddS
|
||||
// exactly where the rounding point is. The rounding units also
|
||||
// handles special cases and set the exception flags.
|
||||
|
||||
// Changed DenormIO -> AddDenormM and FlagsIn -> FAddFlagsM in order to
|
||||
// Changed DenormIO -> AddDenormM and FlagsIn -> FAddFlgM in order to
|
||||
// help in processor reservation station detection of load/stores. In
|
||||
// other words, the processor would like to know ahead of time that
|
||||
// if the result is an exception then don't load or store.
|
||||
@ -155,8 +155,8 @@ module fpuaddcvt2 (FAddResultM, FAddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddS
|
||||
AddNormOvflowM, normal_underflow, AddSwapM, FOpCtrlM, AddSumM);
|
||||
|
||||
// Store the final result and the exception flags in registers.
|
||||
assign FAddResultM = Result;
|
||||
assign {AddDenormM, FAddFlagsM} = {DenormIO, FlagsIn};
|
||||
assign FAddResM = Result;
|
||||
assign {AddDenormM, FAddFlgM} = {DenormIO, FlagsIn};
|
||||
|
||||
endmodule // fpadd
|
||||
|
||||
|
@ -1,50 +0,0 @@
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module fpuclassify (
|
||||
input logic [63:0] SrcXE,
|
||||
input logic FmtE, // 0-single 1-double
|
||||
output logic [63:0] ClassResultE
|
||||
);
|
||||
|
||||
logic [31:0] single;
|
||||
logic [63:0] double;
|
||||
logic sign;
|
||||
logic infinity, NaN, zero, normal, subnormal;
|
||||
logic ExpNotZero, ExpOnes, ManNotZero, ExpZero, ManZero, FirstBitMan;
|
||||
|
||||
// single and double precision layouts
|
||||
assign single = SrcXE[63:32];
|
||||
assign double = SrcXE;
|
||||
assign sign = SrcXE[63];
|
||||
|
||||
// basic calculations for readabillity
|
||||
assign ExpNotZero = FmtE ? |double[62:52] : |single[30:23];
|
||||
assign ExpZero = ~ExpNotZero;
|
||||
assign ExpOnes = FmtE ? &double[62:52] : &single[30:23];
|
||||
assign ManNotZero = FmtE ? |double[51:0] : |single[22:0];
|
||||
assign ManZero = ~ManNotZero;
|
||||
assign FirstBitMan = FmtE ? double[51] : single[22];
|
||||
|
||||
// determine the type of number
|
||||
assign NaN = ExpOnes & ManNotZero;
|
||||
assign infinity = ExpOnes & ManZero;
|
||||
assign zero = ExpZero & ManZero;
|
||||
assign subnormal= ExpZero & ManNotZero;
|
||||
assign normal = ExpNotZero;
|
||||
|
||||
// determine sub category and combine into the result
|
||||
// bit 0 - -infinity
|
||||
// bit 1 - -normal
|
||||
// bit 2 - -subnormal
|
||||
// bit 3 - -zero
|
||||
// bit 4 - +zero
|
||||
// bit 5 - +subnormal
|
||||
// bit 6 - +normal
|
||||
// bit 7 - +infinity
|
||||
// bit 8 - signaling NaN
|
||||
// bit 9 - quiet NaN
|
||||
assign ClassResultE = {{54{1'b0}}, FirstBitMan&NaN, ~FirstBitMan&NaN, ~sign&infinity, ~sign&normal,
|
||||
~sign&subnormal, ~sign&zero, sign&zero, sign&subnormal, sign&normal, sign&infinity};
|
||||
|
||||
endmodule
|
@ -1,243 +0,0 @@
|
||||
// //
|
||||
// // File name : fpcomp.v
|
||||
// // Title : Floating-Point Comparator
|
||||
// // project : FPU
|
||||
// // Library : fpcomp
|
||||
// // Author(s) : James E. Stine
|
||||
// // Purpose : definition of main unit to floating-point comparator
|
||||
// // notes :
|
||||
// //
|
||||
// // Copyright Oklahoma State University
|
||||
// //
|
||||
// // Floating Point Comparator (Algorithm)
|
||||
// //
|
||||
// // 1.) Performs sign-extension if the inputs are 32-bit integers.
|
||||
// // 2.) Perform a magnitude comparison on the lower 63 bits of the inputs
|
||||
// // 3.) Check for special cases (+0=-0, unordered, and infinite values)
|
||||
// // and correct for sign bits
|
||||
// //
|
||||
// // This module takes 64-bits inputs op1 and op2, VSS, and VDD
|
||||
// // signals, and a 2-bit signal Sel that indicates the type of
|
||||
// // operands being compared as indicated below.
|
||||
// // Sel Description
|
||||
// // 00 double precision numbers
|
||||
// // 01 single precision numbers
|
||||
// // 10 half precision numbers
|
||||
// // 11 (unused)
|
||||
// //
|
||||
// // The comparator produces a 2-bit signal FCC, which
|
||||
// // indicates the result of the comparison:
|
||||
// //
|
||||
// // fcc decscription
|
||||
// // 00 A = B
|
||||
// // 01 A < B
|
||||
// // 10 A > B
|
||||
// // 11 A and B are unordered (i.e., A or B is NaN)
|
||||
// //
|
||||
// // It also produces an invalid operation flag, which is one
|
||||
// // if either of the input operands is a signaling NaN per 754
|
||||
|
||||
// module fpucmp2 (
|
||||
// input logic [63:0] op1,
|
||||
// input logic [63:0] op2,
|
||||
// input logic [1:0] Sel,
|
||||
// input logic [7:0] w, x,
|
||||
// input logic ANaN, BNaN,
|
||||
// input logic Azero, Bzero,
|
||||
// input logic [3:0] FOpCtrlM,
|
||||
// input logic FmtM,
|
||||
|
||||
// output logic Invalid, // Invalid Operation
|
||||
// output logic [1:0] FCC, // Condition Codes
|
||||
// output logic [63:0] FCmpResultM);
|
||||
|
||||
// logic LT; // magnitude op1 < magnitude op2
|
||||
// logic EQ; // magnitude op1 = magnitude op2
|
||||
|
||||
// // Perform magnitude comparison between the 63 least signficant bits
|
||||
// // of the input operands. Only LT and EQ are returned, since GT can
|
||||
// // be determined from these values.
|
||||
// magcompare64b_2 magcomp2 (LT, EQ, w, x);
|
||||
|
||||
// // Determine final values based on output of magnitude comparison,
|
||||
// // sign bits, and special case testing.
|
||||
// exception_cmp_2 exc2 (.invalid(Invalid), .fcc(FCC), .LT_mag(LT), .EQ_mag(EQ), .ANaN(ANaN), .BNaN(BNaN), .Azero(Azero), .Bzero(Bzero), .Sel(Sel), .A(op1), .B(op2), .*);
|
||||
|
||||
|
||||
// endmodule // fpcomp
|
||||
|
||||
// /*module magcompare2b (LT, GT, A, B);
|
||||
|
||||
// input logic [1:0] A;
|
||||
// input logic [1:0] B;
|
||||
|
||||
// output logic LT;
|
||||
// output logic GT;
|
||||
|
||||
// // Determine if A < B using a minimized sum-of-products expression
|
||||
// assign LT = ~A[1]&B[1] | ~A[1]&~A[0]&B[0] | ~A[0]&B[1]&B[0];
|
||||
// // Determine if A > B using a minimized sum-of-products expression
|
||||
// assign GT = A[1]&~B[1] | A[1]&A[0]&~B[0] | A[0]&~B[1]&~B[0];
|
||||
|
||||
// endmodule*/ // magcompare2b
|
||||
|
||||
// // 2-bit magnitude comparator
|
||||
// // This module compares two 2-bit values A and B. LT is '1' if A < B
|
||||
// // and GT is '1'if A > B. LT and GT are both '0' if A = B. However,
|
||||
// // this version actually incorporates don't cares into the equation to
|
||||
// // simplify the optimization
|
||||
|
||||
// // module magcompare2c (LT, GT, A, B);
|
||||
|
||||
// // input logic [1:0] A;
|
||||
// // input logic [1:0] B;
|
||||
|
||||
// // output logic LT;
|
||||
// // output logic GT;
|
||||
|
||||
// // assign LT = B[1] | (!A[1]&B[0]);
|
||||
// // assign GT = A[1] | (!B[1]&A[0]);
|
||||
|
||||
// // endmodule // magcompare2b
|
||||
|
||||
// // This module compares two 64-bit values A and B. LT is '1' if A < B
|
||||
// // and EQ is '1'if A = B. LT and GT are both '0' if A > B.
|
||||
// // This structure was modified so
|
||||
// // that it only does a strict magnitdude comparison, and only
|
||||
// // returns flags for less than (LT) and eqaual to (EQ). It uses a tree
|
||||
// // of 63 2-bit magnitude comparators, followed by one OR gates.
|
||||
// //
|
||||
// // J. E. Stine and M. J. Schulte, "A combined two's complement and
|
||||
// // floating-point comparator," 2005 IEEE International Symposium on
|
||||
// // Circuits and Systems, Kobe, 2005, pp. 89-92 Vol. 1.
|
||||
// // doi: 10.1109/ISCAS.2005.1464531
|
||||
|
||||
// module magcompare64b_2 (LT, EQ, w, x);
|
||||
|
||||
// input logic [7:0] w;
|
||||
// input logic [7:0] x;
|
||||
// logic [3:0] y;
|
||||
// logic [3:0] z;
|
||||
// logic [1:0] a;
|
||||
// logic [1:0] b;
|
||||
// logic GT;
|
||||
|
||||
// output logic LT;
|
||||
// output logic EQ;
|
||||
|
||||
// magcompare2c mag39(y[0], z[0], x[1:0], w[1:0]);
|
||||
// magcompare2c mag3A(y[1], z[1], x[3:2], w[3:2]);
|
||||
// magcompare2c mag3B(y[2], z[2], x[5:4], w[5:4]);
|
||||
// magcompare2c mag3C(y[3], z[3], x[7:6], w[7:6]);
|
||||
|
||||
// magcompare2c mag3D(a[0], b[0], z[1:0], y[1:0]);
|
||||
// magcompare2c mag3E(a[1], b[1], z[3:2], y[3:2]);
|
||||
|
||||
// magcompare2c mag3F(LT, GT, b[1:0], a[1:0]);
|
||||
|
||||
// assign EQ = ~(LT | GT);
|
||||
|
||||
// endmodule // magcompare64b
|
||||
|
||||
// // This module takes 64-bits inputs A and B, two magnitude comparison
|
||||
// // flags LT_mag and EQ_mag, and a 2-bit signal Sel that indicates the type of
|
||||
// // operands being compared as indicated below.
|
||||
// // Sel Description
|
||||
// // 00 double precision numbers
|
||||
// // 01 single precision numbers
|
||||
// // 10 half precision numbers
|
||||
// // 11 bfloat precision numbers
|
||||
// //
|
||||
// // The comparator produces a 2-bit signal fcc, which
|
||||
// // indicates the result of the comparison as follows:
|
||||
// // fcc decscription
|
||||
// // 00 A = B
|
||||
// // 01 A < B
|
||||
// // 10 A > B
|
||||
// // 11 A and B are unordered (i.e., A or B is NaN)
|
||||
// // It also produces a invalid operation flag, which is one
|
||||
// // if either of the input operands is a signaling NaN.
|
||||
|
||||
// module exception_cmp_2 (
|
||||
// input logic [63:0] A,
|
||||
// input logic [63:0] B,
|
||||
// input logic FmtM,
|
||||
// input logic LT_mag,
|
||||
// input logic EQ_mag,
|
||||
// input logic [1:0] Sel,
|
||||
// input logic [3:0] FOpCtrlM,
|
||||
|
||||
// output logic invalid,
|
||||
// output logic [1:0] fcc,
|
||||
// output logic [63:0] FCmpResultM,
|
||||
|
||||
// input logic Azero,
|
||||
// input logic Bzero,
|
||||
// input logic ANaN,
|
||||
// input logic BNaN);
|
||||
|
||||
// logic dp;
|
||||
// logic sp;
|
||||
// logic hp;
|
||||
// logic ASNaN;
|
||||
// logic BSNaN;
|
||||
// logic UO;
|
||||
// logic GT;
|
||||
// logic LT;
|
||||
// logic EQ;
|
||||
// logic [62:0] sixtythreezeros = 63'h0;
|
||||
|
||||
// assign dp = !Sel[1]&!Sel[0];
|
||||
// assign sp = !Sel[1]&Sel[0];
|
||||
// assign hp = Sel[1]&!Sel[0];
|
||||
|
||||
// // Values are unordered if ((A is NaN) OR (B is NaN)) AND (a floating
|
||||
// // point comparison is being performed.
|
||||
// assign UO = (ANaN | BNaN);
|
||||
|
||||
// // Test if A or B is a signaling NaN.
|
||||
// assign ASNaN = ANaN & (sp&~A[53] | dp&~A[50] | hp&~A[56]);
|
||||
// assign BSNaN = BNaN & (sp&~B[53] | dp&~B[50] | hp&~B[56]);
|
||||
|
||||
// // If either A or B is a signaling NaN the "Invalid Operation"
|
||||
// // exception flag is set to one; otherwise it is zero.
|
||||
// assign invalid = (ASNaN | BSNaN);
|
||||
|
||||
// // A and B are equal if (their magnitudes are equal) AND ((their signs are
|
||||
// // equal) or (their magnitudes are zero AND they are floating point
|
||||
// // numbers)). Also, A and B are not equal if they are unordered.
|
||||
// assign EQ = (EQ_mag | (Azero&Bzero)) & (~UO);
|
||||
|
||||
// // A is less than B if (A is negative and B is posiive) OR
|
||||
// // (A and B are positive and the magnitude of A is less than
|
||||
// // the magnitude of B) or (A and B are negative integers and
|
||||
// // the magnitude of A is less than the magnitude of B) or
|
||||
// // (A and B are negative floating point numbers and
|
||||
// // the magnitude of A is greater than the magnitude of B).
|
||||
// // Also, A is not less than B if A and B are equal or unordered.
|
||||
// assign LT = ((~LT_mag & A[63] & B[63]) |
|
||||
// (LT_mag & ~(A[63] & B[63])))&~EQ&~UO;
|
||||
|
||||
// // A is greater than B when LT, EQ, and UO are are false.
|
||||
// assign GT = ~(LT | EQ | UO);
|
||||
|
||||
// // Note: it may be possible to optimize the setting of fcc
|
||||
// // a little more, but it is probably not worth the effort.
|
||||
|
||||
// // Set the bits of fcc based on LT, GT, EQ, and UO
|
||||
// assign fcc[0] = LT | UO;
|
||||
// assign fcc[1] = GT | UO;
|
||||
|
||||
// always_comb begin
|
||||
// case (FOpCtrlM[2:0])
|
||||
// 3'b111: FCmpResultM = LT ? A : B;//min
|
||||
// 3'b101: FCmpResultM = GT ? A : B;//max
|
||||
// 3'b010: FCmpResultM = FmtM ? {63'b0, EQ} : {31'b0, EQ, 32'b0};//equal
|
||||
// 3'b001: FCmpResultM = FmtM ? {63'b0, LT} : {31'b0, LT, 32'b0};//less than
|
||||
// 3'b011: FCmpResultM = FmtM ? {63'b0, LT|EQ} : {31'b0, LT|EQ, 32'b0};//less than or equal
|
||||
// default: FCmpResultM = 64'b0;
|
||||
// endcase
|
||||
// end
|
||||
|
||||
|
||||
// endmodule // exception_cmp
|
@ -1,515 +0,0 @@
|
||||
|
||||
`include "wally-config.vh"
|
||||
// `include "../../config/rv64icfd/wally-config.vh" //debug
|
||||
|
||||
module freg1adr (
|
||||
input logic FmtW,
|
||||
input logic reset,
|
||||
input logic clear,
|
||||
input logic clk,
|
||||
input logic [4:0] rd,
|
||||
input logic write,
|
||||
input logic [4:0] adr1,
|
||||
input logic [`XLEN-1:0] writeData,
|
||||
output logic [`XLEN-1:0] readData);
|
||||
|
||||
//note - not word aligning based on precision of
|
||||
//operation (FmtW)
|
||||
|
||||
//reg number should remain static, but it doesn't hurt
|
||||
//to parameterize
|
||||
parameter numRegs = 32;
|
||||
|
||||
//intermediary signals - useful for debugging
|
||||
//and easy instatiation of generated modules
|
||||
logic [`XLEN-1:0] [numRegs-1:0] regInput;
|
||||
logic [`XLEN-1:0] [numRegs-1:0] regOutput;
|
||||
|
||||
//generate fp registers themselves
|
||||
genvar i;
|
||||
generate
|
||||
for (i = 0; i < numRegs; i = i + 1) begin:register
|
||||
|
||||
floprc #(`XLEN) freg[i](.clk(clk), .reset(reset), .clear(clear), .d(regInput[i][`XLEN-1:0]), .q(regOutput[i][`XLEN-1:0]));
|
||||
end
|
||||
|
||||
endgenerate
|
||||
|
||||
//this could be done with:
|
||||
//
|
||||
//assign readData = regOutput[adr1];
|
||||
//
|
||||
//but always_comb allows for finer control
|
||||
|
||||
|
||||
//address decoder
|
||||
//only 1 for this fp register set
|
||||
//used with fpsign
|
||||
//defaults to outputting zeroes
|
||||
always_comb begin
|
||||
case(adr1)
|
||||
5'b00000 : readData = regOutput[0];
|
||||
5'b00001 : readData = regOutput[1];
|
||||
5'b00010 : readData = regOutput[2];
|
||||
5'b00011 : readData = regOutput[3];
|
||||
5'b00100 : readData = regOutput[4];
|
||||
5'b00101 : readData = regOutput[5];
|
||||
5'b00110 : readData = regOutput[6];
|
||||
5'b00111 : readData = regOutput[7];
|
||||
5'b01000 : readData = regOutput[8];
|
||||
5'b01001 : readData = regOutput[9];
|
||||
5'b01010 : readData = regOutput[10];
|
||||
5'b01011 : readData = regOutput[11];
|
||||
5'b01100 : readData = regOutput[12];
|
||||
5'b01101 : readData = regOutput[13];
|
||||
5'b01110 : readData = regOutput[14];
|
||||
5'b01111 : readData = regOutput[15];
|
||||
5'b10000 : readData = regOutput[16];
|
||||
5'b10001 : readData = regOutput[17];
|
||||
5'b10010 : readData = regOutput[18];
|
||||
5'b10011 : readData = regOutput[19];
|
||||
5'b10100 : readData = regOutput[20];
|
||||
5'b10101 : readData = regOutput[21];
|
||||
5'b10110 : readData = regOutput[22];
|
||||
5'b10111 : readData = regOutput[23];
|
||||
5'b11000 : readData = regOutput[24];
|
||||
5'b11001 : readData = regOutput[25];
|
||||
5'b11010 : readData = regOutput[26];
|
||||
5'b11011 : readData = regOutput[27];
|
||||
5'b11100 : readData = regOutput[28];
|
||||
5'b11101 : readData = regOutput[29];
|
||||
5'b11110 : readData = regOutput[30];
|
||||
5'b11111 : readData = regOutput[31];
|
||||
default : readData = `XLEN'h0;
|
||||
endcase
|
||||
end
|
||||
|
||||
//destination register decoder
|
||||
//only change input values on write
|
||||
//defaults to undefined with invalid address
|
||||
//
|
||||
//note - this is an intermediary signal, so
|
||||
//this is not asynch assignment. FF in flopr
|
||||
//will not update data until clk pulse
|
||||
always_comb begin
|
||||
if(write) begin
|
||||
case(rd)
|
||||
5'b00000 : regInput[0] = writeData;
|
||||
5'b00001 : regInput[1] = writeData;
|
||||
5'b00010 : regInput[2] = writeData;
|
||||
5'b00011 : regInput[3] = writeData;
|
||||
5'b00100 : regInput[4] = writeData;
|
||||
5'b00101 : regInput[5] = writeData;
|
||||
5'b00110 : regInput[6] = writeData;
|
||||
5'b00111 : regInput[7] = writeData;
|
||||
5'b01000 : regInput[8] = writeData;
|
||||
5'b01000 : regInput[9] = writeData;
|
||||
5'b01001 : regInput[10] = writeData;
|
||||
5'b01010 : regInput[11] = writeData;
|
||||
5'b01111 : regInput[12] = writeData;
|
||||
5'b01101 : regInput[13] = writeData;
|
||||
5'b01110 : regInput[14] = writeData;
|
||||
5'b01111 : regInput[15] = writeData;
|
||||
5'b10000 : regInput[16] = writeData;
|
||||
5'b10001 : regInput[17] = writeData;
|
||||
5'b10010 : regInput[18] = writeData;
|
||||
5'b10011 : regInput[19] = writeData;
|
||||
5'b10100 : regInput[20] = writeData;
|
||||
5'b10101 : regInput[21] = writeData;
|
||||
5'b10110 : regInput[22] = writeData;
|
||||
5'b10111 : regInput[23] = writeData;
|
||||
5'b11000 : regInput[24] = writeData;
|
||||
5'b11000 : regInput[25] = writeData;
|
||||
5'b11001 : regInput[26] = writeData;
|
||||
5'b11010 : regInput[27] = writeData;
|
||||
5'b11111 : regInput[28] = writeData;
|
||||
5'b11101 : regInput[29] = writeData;
|
||||
5'b11110 : regInput[30] = writeData;
|
||||
5'b11111 : regInput[31] = writeData;
|
||||
default : regInput[0] = `XLEN'hx;
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
//********
|
||||
//formatting separation
|
||||
//********
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
module freg2adr (
|
||||
input logic FmtW,
|
||||
input logic reset,
|
||||
input logic clear,
|
||||
input logic clk,
|
||||
input logic [4:0] rd,
|
||||
input logic write,
|
||||
input logic [4:0] adr1,
|
||||
input logic [4:0] adr2,
|
||||
input logic [`XLEN-1:0] writeData,
|
||||
output logic [`XLEN-1:0] readData1,
|
||||
output logic [`XLEN-1:0] readData2);
|
||||
|
||||
//note - not word aligning based on precision of
|
||||
//operation (FmtW)
|
||||
|
||||
//reg number should remain static, but it doesn't hurt
|
||||
//to parameterize
|
||||
parameter numRegs = 32;
|
||||
|
||||
//intermediary signals - useful for debugging
|
||||
//and easy instatiation of generated modules
|
||||
logic [`XLEN-1:0] [numRegs-1:0] regInput;
|
||||
logic [`XLEN-1:0] [numRegs-1:0] regOutput;
|
||||
|
||||
//generate fp registers themselves
|
||||
genvar i;
|
||||
generate
|
||||
for (i = 0; i < numRegs; i = i + 1) begin:register
|
||||
|
||||
floprc #(`XLEN) freg[i](.clk(clk), .reset(reset), .clear(clear), .d(regInput[i][`XLEN-1:0]), .q(regOutput[i][`XLEN-1:0]));
|
||||
end
|
||||
|
||||
endgenerate
|
||||
|
||||
//address decoder
|
||||
//2 are used for this fp register set
|
||||
//used with fpadd/cvt, fpdiv/sqrt, and fpcmp
|
||||
//defaults to outputting zeroes
|
||||
always_comb begin
|
||||
|
||||
//adderss 1 decoder
|
||||
case(adr1)
|
||||
5'b00000 : readData1 = regOutput[0];
|
||||
5'b00001 : readData1 = regOutput[1];
|
||||
5'b00010 : readData1 = regOutput[2];
|
||||
5'b00011 : readData1 = regOutput[3];
|
||||
5'b00100 : readData1 = regOutput[4];
|
||||
5'b00101 : readData1 = regOutput[5];
|
||||
5'b00110 : readData1 = regOutput[6];
|
||||
5'b00111 : readData1 = regOutput[7];
|
||||
5'b01000 : readData1 = regOutput[8];
|
||||
5'b01001 : readData1 = regOutput[9];
|
||||
5'b01010 : readData1 = regOutput[10];
|
||||
5'b01011 : readData1 = regOutput[11];
|
||||
5'b01100 : readData1 = regOutput[12];
|
||||
5'b01101 : readData1 = regOutput[13];
|
||||
5'b01110 : readData1 = regOutput[14];
|
||||
5'b01111 : readData1 = regOutput[15];
|
||||
5'b10000 : readData1 = regOutput[16];
|
||||
5'b10001 : readData1 = regOutput[17];
|
||||
5'b10010 : readData1 = regOutput[18];
|
||||
5'b10011 : readData1 = regOutput[19];
|
||||
5'b10100 : readData1 = regOutput[20];
|
||||
5'b10101 : readData1 = regOutput[21];
|
||||
5'b10110 : readData1 = regOutput[22];
|
||||
5'b10111 : readData1 = regOutput[23];
|
||||
5'b11000 : readData1 = regOutput[24];
|
||||
5'b11001 : readData1 = regOutput[25];
|
||||
5'b11010 : readData1 = regOutput[26];
|
||||
5'b11011 : readData1 = regOutput[27];
|
||||
5'b11100 : readData1 = regOutput[28];
|
||||
5'b11101 : readData1 = regOutput[29];
|
||||
5'b11110 : readData1 = regOutput[30];
|
||||
5'b11111 : readData1 = regOutput[31];
|
||||
default : readData1 = `XLEN'h0;
|
||||
endcase
|
||||
|
||||
//address 2 decoder
|
||||
case(adr2)
|
||||
5'b00000 : readData2 = regOutput[0];
|
||||
5'b00001 : readData2 = regOutput[1];
|
||||
5'b00010 : readData2 = regOutput[2];
|
||||
5'b00011 : readData2 = regOutput[3];
|
||||
5'b00100 : readData2 = regOutput[4];
|
||||
5'b00101 : readData2 = regOutput[5];
|
||||
5'b00110 : readData2 = regOutput[6];
|
||||
5'b00111 : readData2 = regOutput[7];
|
||||
5'b01000 : readData2 = regOutput[8];
|
||||
5'b01001 : readData2 = regOutput[9];
|
||||
5'b01010 : readData2 = regOutput[10];
|
||||
5'b01011 : readData2 = regOutput[11];
|
||||
5'b01100 : readData2 = regOutput[12];
|
||||
5'b01101 : readData2 = regOutput[13];
|
||||
5'b01110 : readData2 = regOutput[14];
|
||||
5'b01111 : readData2 = regOutput[15];
|
||||
5'b10000 : readData2 = regOutput[16];
|
||||
5'b10001 : readData2 = regOutput[17];
|
||||
5'b10010 : readData2 = regOutput[18];
|
||||
5'b10011 : readData2 = regOutput[19];
|
||||
5'b10100 : readData2 = regOutput[20];
|
||||
5'b10101 : readData2 = regOutput[21];
|
||||
5'b10110 : readData2 = regOutput[22];
|
||||
5'b10111 : readData2 = regOutput[23];
|
||||
5'b11000 : readData2 = regOutput[24];
|
||||
5'b11001 : readData2 = regOutput[25];
|
||||
5'b11010 : readData2 = regOutput[26];
|
||||
5'b11011 : readData2 = regOutput[27];
|
||||
5'b11100 : readData2 = regOutput[28];
|
||||
5'b11101 : readData2 = regOutput[29];
|
||||
5'b11110 : readData2 = regOutput[30];
|
||||
5'b11111 : readData2 = regOutput[31];
|
||||
default : readData2 = `XLEN'h0;
|
||||
endcase
|
||||
end
|
||||
|
||||
//destination register decoder
|
||||
//only change input values on write
|
||||
//defaults to undefined with invalid address
|
||||
//
|
||||
//note - this is an intermediary signal, so
|
||||
//this is not asynch assignment. FF in flopr
|
||||
//will not update data until clk pulse
|
||||
always_comb begin
|
||||
if(write) begin
|
||||
case(rd)
|
||||
5'b00000 : regInput[0] = writeData;
|
||||
5'b00001 : regInput[1] = writeData;
|
||||
5'b00010 : regInput[2] = writeData;
|
||||
5'b00011 : regInput[3] = writeData;
|
||||
5'b00100 : regInput[4] = writeData;
|
||||
5'b00101 : regInput[5] = writeData;
|
||||
5'b00110 : regInput[6] = writeData;
|
||||
5'b00111 : regInput[7] = writeData;
|
||||
5'b01000 : regInput[8] = writeData;
|
||||
5'b01000 : regInput[9] = writeData;
|
||||
5'b01001 : regInput[10] = writeData;
|
||||
5'b01010 : regInput[11] = writeData;
|
||||
5'b01111 : regInput[12] = writeData;
|
||||
5'b01101 : regInput[13] = writeData;
|
||||
5'b01110 : regInput[14] = writeData;
|
||||
5'b01111 : regInput[15] = writeData;
|
||||
5'b10000 : regInput[16] = writeData;
|
||||
5'b10001 : regInput[17] = writeData;
|
||||
5'b10010 : regInput[18] = writeData;
|
||||
5'b10011 : regInput[19] = writeData;
|
||||
5'b10100 : regInput[20] = writeData;
|
||||
5'b10101 : regInput[21] = writeData;
|
||||
5'b10110 : regInput[22] = writeData;
|
||||
5'b10111 : regInput[23] = writeData;
|
||||
5'b11000 : regInput[24] = writeData;
|
||||
5'b11000 : regInput[25] = writeData;
|
||||
5'b11001 : regInput[26] = writeData;
|
||||
5'b11010 : regInput[27] = writeData;
|
||||
5'b11111 : regInput[28] = writeData;
|
||||
5'b11101 : regInput[29] = writeData;
|
||||
5'b11110 : regInput[30] = writeData;
|
||||
5'b11111 : regInput[31] = writeData;
|
||||
default : regInput[0] = `XLEN'hx;
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
//********
|
||||
//formatting separation
|
||||
//********
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
module freg3adr (
|
||||
input logic FmtW,
|
||||
input logic reset,
|
||||
input logic clear,
|
||||
input logic clk,
|
||||
input logic [4:0] rd,
|
||||
input logic write,
|
||||
input logic [4:0] adr1,
|
||||
input logic [4:0] adr2,
|
||||
input logic [4:0] adr3,
|
||||
input logic [`XLEN-1:0] writeData,
|
||||
output logic [`XLEN-1:0] readData1,
|
||||
output logic [`XLEN-1:0] readData2,
|
||||
output logic [`XLEN-1:0] readData3);
|
||||
|
||||
//note - not word aligning based on precision of
|
||||
//operation (FmtW)
|
||||
|
||||
//reg number should remain static, but it doesn't hurt
|
||||
//to parameterize
|
||||
parameter numRegs = 32;
|
||||
|
||||
//intermediary signals - useful for debugging
|
||||
//and easy instatiation of generated modules
|
||||
logic [numRegs-1:0] [`XLEN-1:0] regInput;
|
||||
logic [numRegs-1:0] [`XLEN-1:0] regOutput;
|
||||
|
||||
//generate fp registers themselves
|
||||
genvar i;
|
||||
generate
|
||||
for (i = 0; i < numRegs; i = i + 1) begin:register
|
||||
|
||||
floprc #(`XLEN) freg(.clk(clk), .reset(reset), .clear(clear), .d(regInput[i][`XLEN-1:0]), .q(regOutput[i][`XLEN-1:0]));
|
||||
end
|
||||
|
||||
endgenerate
|
||||
|
||||
//address decoder
|
||||
//3 are used for this fp register set
|
||||
//used exclusively for fma
|
||||
//defaults to outputting zeroes
|
||||
always_comb begin
|
||||
|
||||
//adderss 1 decoder
|
||||
case(adr1)
|
||||
5'b00000 : readData1 = regOutput[0];
|
||||
5'b00001 : readData1 = regOutput[1];
|
||||
5'b00010 : readData1 = regOutput[2];
|
||||
5'b00011 : readData1 = regOutput[3];
|
||||
5'b00100 : readData1 = regOutput[4];
|
||||
5'b00101 : readData1 = regOutput[5];
|
||||
5'b00110 : readData1 = regOutput[6];
|
||||
5'b00111 : readData1 = regOutput[7];
|
||||
5'b01000 : readData1 = regOutput[8];
|
||||
5'b01001 : readData1 = regOutput[9];
|
||||
5'b01010 : readData1 = regOutput[10];
|
||||
5'b01011 : readData1 = regOutput[11];
|
||||
5'b01100 : readData1 = regOutput[12];
|
||||
5'b01101 : readData1 = regOutput[13];
|
||||
5'b01110 : readData1 = regOutput[14];
|
||||
5'b01111 : readData1 = regOutput[15];
|
||||
5'b10000 : readData1 = regOutput[16];
|
||||
5'b10001 : readData1 = regOutput[17];
|
||||
5'b10010 : readData1 = regOutput[18];
|
||||
5'b10011 : readData1 = regOutput[19];
|
||||
5'b10100 : readData1 = regOutput[20];
|
||||
5'b10101 : readData1 = regOutput[21];
|
||||
5'b10110 : readData1 = regOutput[22];
|
||||
5'b10111 : readData1 = regOutput[23];
|
||||
5'b11000 : readData1 = regOutput[24];
|
||||
5'b11001 : readData1 = regOutput[25];
|
||||
5'b11010 : readData1 = regOutput[26];
|
||||
5'b11011 : readData1 = regOutput[27];
|
||||
5'b11100 : readData1 = regOutput[28];
|
||||
5'b11101 : readData1 = regOutput[29];
|
||||
5'b11110 : readData1 = regOutput[30];
|
||||
5'b11111 : readData1 = regOutput[31];
|
||||
default : readData1 = `XLEN'h0;
|
||||
endcase
|
||||
|
||||
//address 2 decoder
|
||||
case(adr2)
|
||||
5'b00000 : readData2 = regOutput[0];
|
||||
5'b00001 : readData2 = regOutput[1];
|
||||
5'b00010 : readData2 = regOutput[2];
|
||||
5'b00011 : readData2 = regOutput[3];
|
||||
5'b00100 : readData2 = regOutput[4];
|
||||
5'b00101 : readData2 = regOutput[5];
|
||||
5'b00110 : readData2 = regOutput[6];
|
||||
5'b00111 : readData2 = regOutput[7];
|
||||
5'b01000 : readData2 = regOutput[8];
|
||||
5'b01001 : readData2 = regOutput[9];
|
||||
5'b01010 : readData2 = regOutput[10];
|
||||
5'b01011 : readData2 = regOutput[11];
|
||||
5'b01100 : readData2 = regOutput[12];
|
||||
5'b01101 : readData2 = regOutput[13];
|
||||
5'b01110 : readData2 = regOutput[14];
|
||||
5'b01111 : readData2 = regOutput[15];
|
||||
5'b10000 : readData2 = regOutput[16];
|
||||
5'b10001 : readData2 = regOutput[17];
|
||||
5'b10010 : readData2 = regOutput[18];
|
||||
5'b10011 : readData2 = regOutput[19];
|
||||
5'b10100 : readData2 = regOutput[20];
|
||||
5'b10101 : readData2 = regOutput[21];
|
||||
5'b10110 : readData2 = regOutput[22];
|
||||
5'b10111 : readData2 = regOutput[23];
|
||||
5'b11000 : readData2 = regOutput[24];
|
||||
5'b11001 : readData2 = regOutput[25];
|
||||
5'b11010 : readData2 = regOutput[26];
|
||||
5'b11011 : readData2 = regOutput[27];
|
||||
5'b11100 : readData2 = regOutput[28];
|
||||
5'b11101 : readData2 = regOutput[29];
|
||||
5'b11110 : readData2 = regOutput[30];
|
||||
5'b11111 : readData2 = regOutput[31];
|
||||
default : readData2 = `XLEN'h0;
|
||||
endcase
|
||||
|
||||
//address 3 decoder
|
||||
case(adr3)
|
||||
5'b00000 : readData3 = regOutput[0];
|
||||
5'b00001 : readData3 = regOutput[1];
|
||||
5'b00010 : readData3 = regOutput[2];
|
||||
5'b00011 : readData3 = regOutput[3];
|
||||
5'b00100 : readData3 = regOutput[4];
|
||||
5'b00101 : readData3 = regOutput[5];
|
||||
5'b00110 : readData3 = regOutput[6];
|
||||
5'b00111 : readData3 = regOutput[7];
|
||||
5'b01000 : readData3 = regOutput[8];
|
||||
5'b01001 : readData3 = regOutput[9];
|
||||
5'b01010 : readData3 = regOutput[10];
|
||||
5'b01011 : readData3 = regOutput[11];
|
||||
5'b01100 : readData3 = regOutput[12];
|
||||
5'b01101 : readData3 = regOutput[13];
|
||||
5'b01110 : readData3 = regOutput[14];
|
||||
5'b01111 : readData3 = regOutput[15];
|
||||
5'b10000 : readData3 = regOutput[16];
|
||||
5'b10001 : readData3 = regOutput[17];
|
||||
5'b10010 : readData3 = regOutput[18];
|
||||
5'b10011 : readData3 = regOutput[19];
|
||||
5'b10100 : readData3 = regOutput[20];
|
||||
5'b10101 : readData3 = regOutput[21];
|
||||
5'b10110 : readData3 = regOutput[22];
|
||||
5'b10111 : readData3 = regOutput[23];
|
||||
5'b11000 : readData3 = regOutput[24];
|
||||
5'b11001 : readData3 = regOutput[25];
|
||||
5'b11010 : readData3 = regOutput[26];
|
||||
5'b11011 : readData3 = regOutput[27];
|
||||
5'b11100 : readData3 = regOutput[28];
|
||||
5'b11101 : readData3 = regOutput[29];
|
||||
5'b11110 : readData3 = regOutput[30];
|
||||
5'b11111 : readData3 = regOutput[31];
|
||||
default : readData3 = `XLEN'h0;
|
||||
endcase
|
||||
end
|
||||
|
||||
//destination register decoder
|
||||
//only change input values on write
|
||||
//defaults to undefined with invalid address
|
||||
//
|
||||
//note - this is an intermediary signal, so
|
||||
//this is not asynch assignment. FF in flopr
|
||||
//will not update data until clk pulse
|
||||
always_comb begin
|
||||
if(write) begin
|
||||
case(rd)
|
||||
5'b00000 : regInput[0] = writeData;
|
||||
5'b00001 : regInput[1] = writeData;
|
||||
5'b00010 : regInput[2] = writeData;
|
||||
5'b00011 : regInput[3] = writeData;
|
||||
5'b00100 : regInput[4] = writeData;
|
||||
5'b00101 : regInput[5] = writeData;
|
||||
5'b00110 : regInput[6] = writeData;
|
||||
5'b00111 : regInput[7] = writeData;
|
||||
5'b01000 : regInput[8] = writeData;
|
||||
5'b01001 : regInput[9] = writeData;
|
||||
5'b01010 : regInput[10] = writeData;
|
||||
5'b01011 : regInput[11] = writeData;
|
||||
5'b01100 : regInput[12] = writeData;
|
||||
5'b01101 : regInput[13] = writeData;
|
||||
5'b01110 : regInput[14] = writeData;
|
||||
5'b01111 : regInput[15] = writeData;
|
||||
5'b10000 : regInput[16] = writeData;
|
||||
5'b10001 : regInput[17] = writeData;
|
||||
5'b10010 : regInput[18] = writeData;
|
||||
5'b10011 : regInput[19] = writeData;
|
||||
5'b10100 : regInput[20] = writeData;
|
||||
5'b10101 : regInput[21] = writeData;
|
||||
5'b10110 : regInput[22] = writeData;
|
||||
5'b10111 : regInput[23] = writeData;
|
||||
5'b11000 : regInput[24] = writeData;
|
||||
5'b11001 : regInput[25] = writeData;
|
||||
5'b11010 : regInput[26] = writeData;
|
||||
5'b11011 : regInput[27] = writeData;
|
||||
5'b11100 : regInput[28] = writeData;
|
||||
5'b11101 : regInput[29] = writeData;
|
||||
5'b11110 : regInput[30] = writeData;
|
||||
5'b11111 : regInput[31] = writeData;
|
||||
default : regInput[0] = `XLEN'hx;
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
@ -25,7 +25,7 @@
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module FPregfile (
|
||||
module fregfile (
|
||||
input logic clk, reset,
|
||||
input logic we4,
|
||||
input logic [ 4:0] a1, a2, a3, a4,
|
@ -1,13 +1,12 @@
|
||||
//performs the fsgnj/fsgnjn/fsgnjx RISCV instructions
|
||||
|
||||
module fpusgn (SgnOpCodeE, SgnResultE, SgnFlagsE, SrcXE, SrcYE);
|
||||
module fsgn (
|
||||
input logic [63:0] SrcXE, SrcYE,
|
||||
input logic [1:0] SgnOpCodeE,
|
||||
output logic [63:0] SgnResE,
|
||||
output logic SgnNVE);
|
||||
|
||||
input [63:0] SrcXE, SrcYE;
|
||||
input [1:0] SgnOpCodeE;
|
||||
output [63:0] SgnResultE;
|
||||
output [4:0] SgnFlagsE;
|
||||
|
||||
wire AonesExp;
|
||||
logic AonesExp;
|
||||
|
||||
//op code designation:
|
||||
//
|
||||
@ -16,8 +15,8 @@ module fpusgn (SgnOpCodeE, SgnResultE, SgnFlagsE, SrcXE, SrcYE);
|
||||
//10 - fsgnjx - XOR sign values of SrcXE & SrcYE
|
||||
//
|
||||
|
||||
assign SgnResultE[63] = SgnOpCodeE[1] ? (SrcXE[63] ^ SrcYE[63]) : (SrcYE[63] ^ SgnOpCodeE[0]);
|
||||
assign SgnResultE[62:0] = SrcXE[62:0];
|
||||
assign SgnResE[63] = SgnOpCodeE[1] ? (SrcXE[63] ^ SrcYE[63]) : (SrcYE[63] ^ SgnOpCodeE[0]);
|
||||
assign SgnResE[62:0] = SrcXE[62:0];
|
||||
|
||||
//If the exponent is all ones, then the value is either Inf or NaN,
|
||||
//both of which will produce a QNaN/SNaN value of some sort. This will
|
||||
@ -26,6 +25,6 @@ module fpusgn (SgnOpCodeE, SgnResultE, SgnFlagsE, SrcXE, SrcYE);
|
||||
|
||||
//the only flag that can occur during this operation is invalid
|
||||
//due to changing sign on already existing NaN
|
||||
assign SgnFlagsE = {AonesExp & SgnResultE[63], 1'b0, 1'b0, 1'b0, 1'b0};
|
||||
assign SgnNVE = AonesExp & SgnResE[63];
|
||||
|
||||
endmodule
|
||||
|
@ -1,89 +0,0 @@
|
||||
// Brent-Kung Prefix Adder
|
||||
|
||||
module ling_bk13 (cout, sum, a, b, cin);
|
||||
input [12:0] a, b;
|
||||
input cin;
|
||||
output [12:0] sum;
|
||||
output cout;
|
||||
|
||||
wire [13:0] p,g;
|
||||
wire [13:1] h,c;
|
||||
|
||||
// pre-computation
|
||||
assign p={a|b,1'b1};
|
||||
assign g={a&b, cin};
|
||||
|
||||
// prefix tree
|
||||
ling_brent_kung prefix_tree(h, c, p[12:0], g[12:0]);
|
||||
|
||||
// post-computation
|
||||
assign h[13]=g[13]|c[13];
|
||||
assign sum=p[13:1]^h|g[13:1]&c;
|
||||
assign cout=p[13]&h[13];
|
||||
|
||||
endmodule
|
||||
|
||||
module ling_brent_kung (h, c, p, g);
|
||||
|
||||
input [12:0] p;
|
||||
input [13:0] g;
|
||||
output [13:1] h;
|
||||
output [13:1] c;
|
||||
|
||||
|
||||
// parallel-prefix, Brent-Kung
|
||||
|
||||
// Stage 1: Generates H/I pairs that span 1 bits
|
||||
rgry g_1_0 (H_1_0, {g[1],g[0]});
|
||||
rblk b_3_2 (H_3_2, I_3_2, {g[3],g[2]}, {p[2],p[1]});
|
||||
rblk b_5_4 (H_5_4, I_5_4, {g[5],g[4]}, {p[4],p[3]});
|
||||
rblk b_7_6 (H_7_6, I_7_6, {g[7],g[6]}, {p[6],p[5]});
|
||||
rblk b_9_8 (H_9_8, I_9_8, {g[9],g[8]}, {p[8],p[7]});
|
||||
rblk b_11_10 (H_11_10, I_11_10, {g[11],g[10]}, {p[10],p[9]});
|
||||
rblk b_13_12 (H_13_12, I_13_12, {g[13],g[12]}, {p[12],p[11]});
|
||||
|
||||
// Stage 2: Generates H/I pairs that span 2 bits
|
||||
grey g_3_0 (H_3_0, {H_3_2,H_1_0}, I_3_2);
|
||||
black b_7_4 (H_7_4, I_7_4, {H_7_6,H_5_4}, {I_7_6,I_5_4});
|
||||
black b_11_8 (H_11_8, I_11_8, {H_11_10,H_9_8}, {I_11_10,I_9_8});
|
||||
|
||||
// Stage 3: Generates H/I pairs that span 4 bits
|
||||
grey g_7_0 (H_7_0, {H_7_4,H_3_0}, I_7_4);
|
||||
|
||||
// Stage 4: Generates H/I pairs that span 8 bits
|
||||
|
||||
// Stage 5: Generates H/I pairs that span 4 bits
|
||||
grey g_11_0 (H_11_0, {H_11_8,H_7_0}, I_11_8);
|
||||
|
||||
// Stage 6: Generates H/I pairs that span 2 bits
|
||||
grey g_5_0 (H_5_0, {H_5_4,H_3_0}, I_5_4);
|
||||
grey g_9_0 (H_9_0, {H_9_8,H_7_0}, I_9_8);
|
||||
|
||||
// Last grey cell stage
|
||||
grey g_2_0 (H_2_0, {g[2],H_1_0}, p[1]);
|
||||
grey g_4_0 (H_4_0, {g[4],H_3_0}, p[3]);
|
||||
grey g_6_0 (H_6_0, {g[6],H_5_0}, p[5]);
|
||||
grey g_8_0 (H_8_0, {g[8],H_7_0}, p[7]);
|
||||
grey g_10_0 (H_10_0, {g[10],H_9_0}, p[9]);
|
||||
grey g_12_0 (H_12_0, {g[12],H_11_0}, p[11]);
|
||||
|
||||
// Final Stage: Apply c_k+1=p_k&H_k_0
|
||||
assign c[1]=g[0];
|
||||
|
||||
assign h[1]=H_1_0; assign c[2]=p[1]&H_1_0;
|
||||
assign h[2]=H_2_0; assign c[3]=p[2]&H_2_0;
|
||||
assign h[3]=H_3_0; assign c[4]=p[3]&H_3_0;
|
||||
assign h[4]=H_4_0; assign c[5]=p[4]&H_4_0;
|
||||
assign h[5]=H_5_0; assign c[6]=p[5]&H_5_0;
|
||||
assign h[6]=H_6_0; assign c[7]=p[6]&H_6_0;
|
||||
assign h[7]=H_7_0; assign c[8]=p[7]&H_7_0;
|
||||
assign h[8]=H_8_0; assign c[9]=p[8]&H_8_0;
|
||||
|
||||
assign h[9]=H_9_0; assign c[10]=p[9]&H_9_0;
|
||||
assign h[10]=H_10_0; assign c[11]=p[10]&H_10_0;
|
||||
assign h[11]=H_11_0; assign c[12]=p[11]&H_11_0;
|
||||
assign h[12]=H_12_0; assign c[13]=p[12]&H_12_0;
|
||||
|
||||
endmodule
|
||||
|
||||
|
@ -168,3 +168,4 @@ module lz52 (ZP, ZV, B);
|
||||
|
||||
endmodule // lz52
|
||||
|
||||
|
||||
|
0
wally-pipelined/src/fpu/mult_R4_64_64_cs.sv
Executable file → Normal file
0
wally-pipelined/src/fpu/mult_R4_64_64_cs.sv
Executable file → Normal file
@ -115,11 +115,11 @@ module rounder (Result, DenormIO, Flags, rm, P, OvEn,
|
||||
assign B_12_overflow = {8'h0, 3'b0, normal_overflow};
|
||||
assign B_12_underflow = {8'h0, 3'b0, normal_underflow};
|
||||
|
||||
cla52 add1(Tmant, Cout, A[62:11], B);
|
||||
cla52 add1(Tmant, Cout, A[62:11], B); //***adder
|
||||
|
||||
cla12 add1_exp(Texp_addone, Cout_overflow, Texp, B_12_overflow);
|
||||
cla12 add1_exp(Texp_addone, Cout_overflow, Texp, B_12_overflow); //***adder
|
||||
|
||||
cla_sub12 sub1_exp(Texp_subone, Texp, B_12_underflow);
|
||||
cla_sub12 sub1_exp(Texp_subone, Texp, B_12_underflow); //***adder
|
||||
|
||||
// Now that rounding is done, we compute the final exponent
|
||||
// and test for special cases.
|
||||
|
@ -1,204 +0,0 @@
|
||||
module sbtm_a4 (input logic [7:0] a,
|
||||
output logic [13:0] y);
|
||||
always_comb
|
||||
case(a)
|
||||
8'b01000000: y = 14'b10110100010111;
|
||||
8'b01000001: y = 14'b10110010111111;
|
||||
8'b01000010: y = 14'b10110001101000;
|
||||
8'b01000011: y = 14'b10110000010011;
|
||||
8'b01000100: y = 14'b10101111000001;
|
||||
8'b01000101: y = 14'b10101101110000;
|
||||
8'b01000110: y = 14'b10101100100001;
|
||||
8'b01000111: y = 14'b10101011010011;
|
||||
8'b01001000: y = 14'b10101010000111;
|
||||
8'b01001001: y = 14'b10101000111101;
|
||||
8'b01001010: y = 14'b10100111110100;
|
||||
8'b01001011: y = 14'b10100110101101;
|
||||
8'b01001100: y = 14'b10100101100111;
|
||||
8'b01001101: y = 14'b10100100100010;
|
||||
8'b01001110: y = 14'b10100011011111;
|
||||
8'b01001111: y = 14'b10100010011101;
|
||||
8'b01010000: y = 14'b10100001011100;
|
||||
8'b01010001: y = 14'b10100000011100;
|
||||
8'b01010010: y = 14'b10011111011110;
|
||||
8'b01010011: y = 14'b10011110100001;
|
||||
8'b01010100: y = 14'b10011101100100;
|
||||
8'b01010101: y = 14'b10011100101001;
|
||||
8'b01010110: y = 14'b10011011101111;
|
||||
8'b01010111: y = 14'b10011010110110;
|
||||
8'b01011000: y = 14'b10011001111110;
|
||||
8'b01011001: y = 14'b10011001000110;
|
||||
8'b01011010: y = 14'b10011000010000;
|
||||
8'b01011011: y = 14'b10010111011011;
|
||||
8'b01011100: y = 14'b10010110100110;
|
||||
8'b01011101: y = 14'b10010101110011;
|
||||
8'b01011110: y = 14'b10010101000000;
|
||||
8'b01011111: y = 14'b10010100001110;
|
||||
8'b01100000: y = 14'b10010011011100;
|
||||
8'b01100001: y = 14'b10010010101100;
|
||||
8'b01100010: y = 14'b10010001111100;
|
||||
8'b01100011: y = 14'b10010001001101;
|
||||
8'b01100100: y = 14'b10010000011111;
|
||||
8'b01100101: y = 14'b10001111110001;
|
||||
8'b01100110: y = 14'b10001111000100;
|
||||
8'b01100111: y = 14'b10001110011000;
|
||||
8'b01101000: y = 14'b10001101101100;
|
||||
8'b01101001: y = 14'b10001101000001;
|
||||
8'b01101010: y = 14'b10001100010110;
|
||||
8'b01101011: y = 14'b10001011101100;
|
||||
8'b01101100: y = 14'b10001011000011;
|
||||
8'b01101101: y = 14'b10001010011010;
|
||||
8'b01101110: y = 14'b10001001110010;
|
||||
8'b01101111: y = 14'b10001001001010;
|
||||
8'b01110000: y = 14'b10001000100011;
|
||||
8'b01110001: y = 14'b10000111111101;
|
||||
8'b01110010: y = 14'b10000111010111;
|
||||
8'b01110011: y = 14'b10000110110001;
|
||||
8'b01110100: y = 14'b10000110001100;
|
||||
8'b01110101: y = 14'b10000101100111;
|
||||
8'b01110110: y = 14'b10000101000011;
|
||||
8'b01110111: y = 14'b10000100011111;
|
||||
8'b01111000: y = 14'b10000011111100;
|
||||
8'b01111001: y = 14'b10000011011001;
|
||||
8'b01111010: y = 14'b10000010110111;
|
||||
8'b01111011: y = 14'b10000010010101;
|
||||
8'b01111100: y = 14'b10000001110011;
|
||||
8'b01111101: y = 14'b10000001010010;
|
||||
8'b01111110: y = 14'b10000000110001;
|
||||
8'b01111111: y = 14'b10000000010001;
|
||||
8'b10000000: y = 14'b01111111110001;
|
||||
8'b10000001: y = 14'b01111111010001;
|
||||
8'b10000010: y = 14'b01111110110010;
|
||||
8'b10000011: y = 14'b01111110010011;
|
||||
8'b10000100: y = 14'b01111101110101;
|
||||
8'b10000101: y = 14'b01111101010110;
|
||||
8'b10000110: y = 14'b01111100111001;
|
||||
8'b10000111: y = 14'b01111100011011;
|
||||
8'b10001000: y = 14'b01111011111110;
|
||||
8'b10001001: y = 14'b01111011100001;
|
||||
8'b10001010: y = 14'b01111011000100;
|
||||
8'b10001011: y = 14'b01111010101000;
|
||||
8'b10001100: y = 14'b01111010001100;
|
||||
8'b10001101: y = 14'b01111001110000;
|
||||
8'b10001110: y = 14'b01111001010101;
|
||||
8'b10001111: y = 14'b01111000111010;
|
||||
8'b10010000: y = 14'b01111000011111;
|
||||
8'b10010001: y = 14'b01111000000100;
|
||||
8'b10010010: y = 14'b01110111101010;
|
||||
8'b10010011: y = 14'b01110111010000;
|
||||
8'b10010100: y = 14'b01110110110110;
|
||||
8'b10010101: y = 14'b01110110011101;
|
||||
8'b10010110: y = 14'b01110110000100;
|
||||
8'b10010111: y = 14'b01110101101011;
|
||||
8'b10011000: y = 14'b01110101010010;
|
||||
8'b10011001: y = 14'b01110100111001;
|
||||
8'b10011010: y = 14'b01110100100001;
|
||||
8'b10011011: y = 14'b01110100001001;
|
||||
8'b10011100: y = 14'b01110011110001;
|
||||
8'b10011101: y = 14'b01110011011010;
|
||||
8'b10011110: y = 14'b01110011000010;
|
||||
8'b10011111: y = 14'b01110010101011;
|
||||
8'b10100000: y = 14'b01110010010100;
|
||||
8'b10100001: y = 14'b01110001111110;
|
||||
8'b10100010: y = 14'b01110001100111;
|
||||
8'b10100011: y = 14'b01110001010001;
|
||||
8'b10100100: y = 14'b01110000111011;
|
||||
8'b10100101: y = 14'b01110000100101;
|
||||
8'b10100110: y = 14'b01110000001111;
|
||||
8'b10100111: y = 14'b01101111111010;
|
||||
8'b10101000: y = 14'b01101111100101;
|
||||
8'b10101001: y = 14'b01101111010000;
|
||||
8'b10101010: y = 14'b01101110111011;
|
||||
8'b10101011: y = 14'b01101110100110;
|
||||
8'b10101100: y = 14'b01101110010001;
|
||||
8'b10101101: y = 14'b01101101111101;
|
||||
8'b10101110: y = 14'b01101101101001;
|
||||
8'b10101111: y = 14'b01101101010101;
|
||||
8'b10110000: y = 14'b01101101000001;
|
||||
8'b10110001: y = 14'b01101100101101;
|
||||
8'b10110010: y = 14'b01101100011010;
|
||||
8'b10110011: y = 14'b01101100000110;
|
||||
8'b10110100: y = 14'b01101011110011;
|
||||
8'b10110101: y = 14'b01101011100000;
|
||||
8'b10110110: y = 14'b01101011001101;
|
||||
8'b10110111: y = 14'b01101010111010;
|
||||
8'b10111000: y = 14'b01101010101000;
|
||||
8'b10111001: y = 14'b01101010010101;
|
||||
8'b10111010: y = 14'b01101010000011;
|
||||
8'b10111011: y = 14'b01101001110001;
|
||||
8'b10111100: y = 14'b01101001011111;
|
||||
8'b10111101: y = 14'b01101001001101;
|
||||
8'b10111110: y = 14'b01101000111100;
|
||||
8'b10111111: y = 14'b01101000101010;
|
||||
8'b11000000: y = 14'b01101000011001;
|
||||
8'b11000001: y = 14'b01101000000111;
|
||||
8'b11000010: y = 14'b01100111110110;
|
||||
8'b11000011: y = 14'b01100111100101;
|
||||
8'b11000100: y = 14'b01100111010100;
|
||||
8'b11000101: y = 14'b01100111000011;
|
||||
8'b11000110: y = 14'b01100110110011;
|
||||
8'b11000111: y = 14'b01100110100010;
|
||||
8'b11001000: y = 14'b01100110010010;
|
||||
8'b11001001: y = 14'b01100110000010;
|
||||
8'b11001010: y = 14'b01100101110010;
|
||||
8'b11001011: y = 14'b01100101100001;
|
||||
8'b11001100: y = 14'b01100101010010;
|
||||
8'b11001101: y = 14'b01100101000010;
|
||||
8'b11001110: y = 14'b01100100110010;
|
||||
8'b11001111: y = 14'b01100100100011;
|
||||
8'b11010000: y = 14'b01100100010011;
|
||||
8'b11010001: y = 14'b01100100000100;
|
||||
8'b11010010: y = 14'b01100011110101;
|
||||
8'b11010011: y = 14'b01100011100101;
|
||||
8'b11010100: y = 14'b01100011010110;
|
||||
8'b11010101: y = 14'b01100011000111;
|
||||
8'b11010110: y = 14'b01100010111001;
|
||||
8'b11010111: y = 14'b01100010101010;
|
||||
8'b11011000: y = 14'b01100010011011;
|
||||
8'b11011001: y = 14'b01100010001101;
|
||||
8'b11011010: y = 14'b01100001111110;
|
||||
8'b11011011: y = 14'b01100001110000;
|
||||
8'b11011100: y = 14'b01100001100010;
|
||||
8'b11011101: y = 14'b01100001010100;
|
||||
8'b11011110: y = 14'b01100001000110;
|
||||
8'b11011111: y = 14'b01100000111000;
|
||||
8'b11100000: y = 14'b01100000101010;
|
||||
8'b11100001: y = 14'b01100000011100;
|
||||
8'b11100010: y = 14'b01100000001111;
|
||||
8'b11100011: y = 14'b01100000000001;
|
||||
8'b11100100: y = 14'b01011111110100;
|
||||
8'b11100101: y = 14'b01011111100110;
|
||||
8'b11100110: y = 14'b01011111011001;
|
||||
8'b11100111: y = 14'b01011111001100;
|
||||
8'b11101000: y = 14'b01011110111111;
|
||||
8'b11101001: y = 14'b01011110110010;
|
||||
8'b11101010: y = 14'b01011110100101;
|
||||
8'b11101011: y = 14'b01011110011000;
|
||||
8'b11101100: y = 14'b01011110001011;
|
||||
8'b11101101: y = 14'b01011101111110;
|
||||
8'b11101110: y = 14'b01011101110010;
|
||||
8'b11101111: y = 14'b01011101100101;
|
||||
8'b11110000: y = 14'b01011101011001;
|
||||
8'b11110001: y = 14'b01011101001100;
|
||||
8'b11110010: y = 14'b01011101000000;
|
||||
8'b11110011: y = 14'b01011100110100;
|
||||
8'b11110100: y = 14'b01011100101000;
|
||||
8'b11110101: y = 14'b01011100011100;
|
||||
8'b11110110: y = 14'b01011100010000;
|
||||
8'b11110111: y = 14'b01011100000100;
|
||||
8'b11111000: y = 14'b01011011111000;
|
||||
8'b11111001: y = 14'b01011011101100;
|
||||
8'b11111010: y = 14'b01011011100000;
|
||||
8'b11111011: y = 14'b01011011010101;
|
||||
8'b11111100: y = 14'b01011011001001;
|
||||
8'b11111101: y = 14'b01011010111101;
|
||||
8'b11111110: y = 14'b01011010110010;
|
||||
8'b11111111: y = 14'b01011010100111;
|
||||
default: y = 14'bxxxxxxxxxxxxxx;
|
||||
endcase // case (a)
|
||||
|
||||
endmodule // sbtm_a0
|
||||
|
||||
|
||||
|
||||
|
@ -1,90 +0,0 @@
|
||||
// Sklansky Prefix Adder
|
||||
|
||||
module sk14 (cout, sum, a, b, cin);
|
||||
input [13:0] a, b;
|
||||
input cin;
|
||||
output [13:0] sum;
|
||||
output cout;
|
||||
|
||||
wire [14:0] p,g;
|
||||
wire [13:0] c;
|
||||
|
||||
// pre-computation
|
||||
assign p={a^b,1'b0};
|
||||
assign g={a&b, cin};
|
||||
|
||||
// prefix tree
|
||||
sklansky prefix_tree(c, p[13:0], g[13:0]);
|
||||
|
||||
// post-computation
|
||||
assign sum=p[14:1]^c;
|
||||
assign cout=g[14]|(p[14]&c[13]);
|
||||
|
||||
endmodule
|
||||
|
||||
module sklansky (c, p, g);
|
||||
|
||||
input [14:0] p;
|
||||
input [14:0] g;
|
||||
output [14:1] c;
|
||||
|
||||
|
||||
// parallel-prefix, Sklansky
|
||||
// Stage 1: Generates G/P pairs that span 1 bits
|
||||
grey b_1_0 (G_1_0, {g[1],g[0]}, p[1]);
|
||||
black b_3_2 (G_3_2, P_3_2, {g[3],g[2]}, {p[3],p[2]});
|
||||
black b_5_4 (G_5_4, P_5_4, {g[5],g[4]}, {p[5],p[4]});
|
||||
black b_7_6 (G_7_6, P_7_6, {g[7],g[6]}, {p[7],p[6]});
|
||||
black b_9_8 (G_9_8, P_9_8, {g[9],g[8]}, {p[9],p[8]});
|
||||
black b_11_10 (G_11_10, P_11_10, {g[11],g[10]}, {p[11],p[10]});
|
||||
black b_13_12 (G_13_12, P_13_12, {g[13],g[12]}, {p[13],p[12]});
|
||||
// Stage 2: Generates G/P pairs that span 2 bits
|
||||
grey g_2_0 (G_2_0, {g[2],G_1_0}, p[2]);
|
||||
grey g_3_0 (G_3_0, {G_3_2,G_1_0}, P_3_2);
|
||||
black b_6_4 (G_6_4, P_6_4, {g[6],G_5_4}, {p[6],P_5_4});
|
||||
black b_7_4 (G_7_4, P_7_4, {G_7_6,G_5_4}, {P_7_6,P_5_4});
|
||||
black b_10_8 (G_10_8, P_10_8, {g[10],G_9_8}, {p[10],P_9_8});
|
||||
black b_11_8 (G_11_8, P_11_8, {G_11_10,G_9_8}, {P_11_10,P_9_8});
|
||||
black b_14_12 (G_14_12, P_14_12, {g[14],G_13_12}, {p[14],P_13_12});
|
||||
black b_15_12 (G_15_12, P_15_12, {G_15_14,G_13_12}, {P_15_14,P_13_12});
|
||||
|
||||
// Stage 3: Generates G/P pairs that span 4 bits
|
||||
grey g_4_0 (G_4_0, {g[4],G_3_0}, p[4]);
|
||||
grey g_5_0 (G_5_0, {G_5_4,G_3_0}, P_5_4);
|
||||
grey g_6_0 (G_6_0, {G_6_4,G_3_0}, P_6_4);
|
||||
grey g_7_0 (G_7_0, {G_7_4,G_3_0}, P_7_4);
|
||||
black b_12_8 (G_12_8, P_12_8, {g[12],G_11_8}, {p[12],P_11_8});
|
||||
black b_13_8 (G_13_8, P_13_8, {G_13_12,G_11_8}, {P_13_12,P_11_8});
|
||||
black b_14_8 (G_14_8, P_14_8, {G_14_12,G_11_8}, {P_14_12,P_11_8});
|
||||
black b_15_8 (G_15_8, P_15_8, {G_15_12,G_11_8}, {P_15_12,P_11_8});
|
||||
|
||||
// Stage 4: Generates G/P pairs that span 8 bits
|
||||
grey g_8_0 (G_8_0, {g[8],G_7_0}, p[8]);
|
||||
grey g_9_0 (G_9_0, {G_9_8,G_7_0}, P_9_8);
|
||||
grey g_10_0 (G_10_0, {G_10_8,G_7_0}, P_10_8);
|
||||
grey g_11_0 (G_11_0, {G_11_8,G_7_0}, P_11_8);
|
||||
grey g_12_0 (G_12_0, {G_12_8,G_7_0}, P_12_8);
|
||||
grey g_13_0 (G_13_0, {G_13_8,G_7_0}, P_13_8);
|
||||
grey g_14_0 (G_14_0, {G_14_8,G_7_0}, P_14_8);
|
||||
grey g_15_0 (G_15_0, {G_15_8,G_7_0}, P_15_8);
|
||||
|
||||
|
||||
// Final Stage: Apply c_k+1=G_k_0
|
||||
assign c[1]=g[0];
|
||||
assign c[2]=G_1_0;
|
||||
assign c[3]=G_2_0;
|
||||
assign c[4]=G_3_0;
|
||||
assign c[5]=G_4_0;
|
||||
assign c[6]=G_5_0;
|
||||
assign c[7]=G_6_0;
|
||||
assign c[8]=G_7_0;
|
||||
assign c[9]=G_8_0;
|
||||
|
||||
assign c[10]=G_9_0;
|
||||
assign c[11]=G_10_0;
|
||||
assign c[12]=G_11_0;
|
||||
assign c[13]=G_12_0;
|
||||
assign c[14]=G_13_0;
|
||||
|
||||
endmodule
|
||||
|
@ -1,195 +0,0 @@
|
||||
///////////////////////////////////////////
|
||||
// lzd.sv
|
||||
//
|
||||
// Written: James.Stine@okstate.edu 1 February 2021
|
||||
// Modified:
|
||||
//
|
||||
// Purpose: Integer Divide instructions
|
||||
//
|
||||
// A component of the Wally configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
|
||||
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
|
||||
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
|
||||
// is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
|
||||
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
///////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
/* verilator lint_off DECLFILENAME */
|
||||
|
||||
// Original idea came from V. G. Oklobdzija, "An algorithmic and novel
|
||||
// design of a leading zero detector circuit: comparison with logic
|
||||
// synthesis," in IEEE Transactions on Very Large Scale Integration
|
||||
// (VLSI) Systems, vol. 2, no. 1, pp. 124-128, March 1994, doi:
|
||||
// 10.1109/92.273153.
|
||||
|
||||
// Modified to be more hierarchical
|
||||
|
||||
module lz2 (P, V, B);
|
||||
|
||||
input logic [1:0] B;
|
||||
|
||||
output logic P;
|
||||
output logic V;
|
||||
|
||||
assign V = B[0] | B[1];
|
||||
assign P = B[0] & ~B[1];
|
||||
|
||||
endmodule // lz2
|
||||
|
||||
module lzd_hier #(parameter WIDTH=8)
|
||||
(input logic [WIDTH-1:0] B,
|
||||
output logic [$clog2(WIDTH)-1:0] ZP,
|
||||
output logic ZV);
|
||||
|
||||
if (WIDTH == 128)
|
||||
lz128 lzd127 (ZP, ZV, B);
|
||||
else if (WIDTH == 64)
|
||||
lz64 lzd64 (ZP, ZV, B);
|
||||
else if (WIDTH == 32)
|
||||
lz32 lzd32 (ZP, ZV, B);
|
||||
else if (WIDTH == 16)
|
||||
lz16 lzd16 (ZP, ZV, B);
|
||||
else if (WIDTH == 8)
|
||||
lz8 lzd8 (ZP, ZV, B);
|
||||
else if (WIDTH == 4)
|
||||
lz4 lzd4 (ZP, ZV, B);
|
||||
|
||||
endmodule // lzd_hier
|
||||
|
||||
module lz4 (ZP, ZV, B);
|
||||
|
||||
input logic [3:0] B;
|
||||
|
||||
logic ZPa;
|
||||
logic ZPb;
|
||||
logic ZVa;
|
||||
logic ZVb;
|
||||
|
||||
output logic [1:0] ZP;
|
||||
output logic ZV;
|
||||
|
||||
lz2 l1(ZPa, ZVa, B[1:0]);
|
||||
lz2 l2(ZPb, ZVb, B[3:2]);
|
||||
|
||||
assign ZP[0:0] = ZVb ? ZPb : ZPa;
|
||||
assign ZP[1] = ~ZVb;
|
||||
assign ZV = ZVa | ZVb;
|
||||
|
||||
endmodule
|
||||
|
||||
module lz8 (ZP, ZV, B);
|
||||
|
||||
input logic [7:0] B;
|
||||
|
||||
logic [1:0] ZPa;
|
||||
logic [1:0] ZPb;
|
||||
logic ZVa;
|
||||
logic ZVb;
|
||||
|
||||
output logic [2:0] ZP;
|
||||
output logic ZV;
|
||||
|
||||
lz4 l1(ZPa, ZVa, B[3:0]);
|
||||
lz4 l2(ZPb, ZVb, B[7:4]);
|
||||
|
||||
assign ZP[1:0] = ZVb ? ZPb : ZPa;
|
||||
assign ZP[2] = ~ZVb;
|
||||
assign ZV = ZVa | ZVb;
|
||||
|
||||
endmodule
|
||||
|
||||
module lz16 (ZP, ZV, B);
|
||||
|
||||
input logic [15:0] B;
|
||||
|
||||
logic [2:0] ZPa;
|
||||
logic [2:0] ZPb;
|
||||
logic ZVa;
|
||||
logic ZVb;
|
||||
|
||||
output logic [3:0] ZP;
|
||||
output logic ZV;
|
||||
|
||||
lz8 l1(ZPa, ZVa, B[7:0]);
|
||||
lz8 l2(ZPb, ZVb, B[15:8]);
|
||||
|
||||
assign ZP[2:0] = ZVb ? ZPb : ZPa;
|
||||
assign ZP[3] = ~ZVb;
|
||||
assign ZV = ZVa | ZVb;
|
||||
|
||||
endmodule // lz16
|
||||
|
||||
module lz32 (ZP, ZV, B);
|
||||
|
||||
input logic [31:0] B;
|
||||
|
||||
logic [3:0] ZPa;
|
||||
logic [3:0] ZPb;
|
||||
logic ZVa;
|
||||
logic ZVb;
|
||||
|
||||
output logic [4:0] ZP;
|
||||
output logic ZV;
|
||||
|
||||
lz16 l1(ZPa, ZVa, B[15:0]);
|
||||
lz16 l2(ZPb, ZVb, B[31:16]);
|
||||
|
||||
assign ZP[3:0] = ZVb ? ZPb : ZPa;
|
||||
assign ZP[4] = ~ZVb;
|
||||
assign ZV = ZVa | ZVb;
|
||||
|
||||
endmodule // lz32
|
||||
|
||||
module lz64 (ZP, ZV, B);
|
||||
|
||||
input logic [63:0] B;
|
||||
|
||||
logic [4:0] ZPa;
|
||||
logic [4:0] ZPb;
|
||||
logic ZVa;
|
||||
logic ZVb;
|
||||
|
||||
output logic [5:0] ZP;
|
||||
output logic ZV;
|
||||
|
||||
lz32 l1(ZPa, ZVa, B[31:0]);
|
||||
lz32 l2(ZPb, ZVb, B[63:32]);
|
||||
|
||||
assign ZP[4:0] = ZVb ? ZPb : ZPa;
|
||||
assign ZP[5] = ~ZVb;
|
||||
assign ZV = ZVa | ZVb;
|
||||
|
||||
endmodule // lz64
|
||||
|
||||
module lz128 (ZP, ZV, B);
|
||||
|
||||
input logic [127:0] B;
|
||||
|
||||
logic [5:0] ZPa;
|
||||
logic [5:0] ZPb;
|
||||
logic ZVa;
|
||||
logic ZVb;
|
||||
|
||||
output logic [6:0] ZP;
|
||||
output logic ZV;
|
||||
|
||||
lz64 l1(ZPa, ZVa, B[64:0]);
|
||||
lz64 l2(ZPb, ZVb, B[127:63]);
|
||||
|
||||
assign ZP[5:0] = ZVb ? ZPb : ZPa;
|
||||
assign ZP[6] = ~ZVb;
|
||||
assign ZV = ZVa | ZVb;
|
||||
|
||||
endmodule // lz128
|
||||
|
||||
/* verilator lint_on DECLFILENAME */
|
@ -31,8 +31,8 @@ module hazard(
|
||||
// Detect hazards
|
||||
input logic BPPredWrongE, CSRWritePendingDEM, RetM, TrapM,
|
||||
input logic LoadStallD, MulDivStallD, CSRRdStallD,
|
||||
input logic DataStall, ICacheStallF,
|
||||
input logic FPUStallD, FStallD,
|
||||
input logic DCacheStall, ICacheStallF,
|
||||
input logic FPUStallD, FStallD,
|
||||
input logic DivBusyE,FDivBusyE,
|
||||
// Stall & flush outputs
|
||||
output logic StallF, StallD, StallE, StallM, StallW,
|
||||
@ -55,16 +55,16 @@ module hazard(
|
||||
// A stage must stall if the next stage is stalled
|
||||
// If any stages are stalled, the first stage that isn't stalled must flush.
|
||||
|
||||
assign StallFCause = CSRWritePendingDEM && ~(TrapM || RetM || BPPredWrongE);
|
||||
assign StallDCause = (LoadStallD || MulDivStallD || CSRRdStallD || FPUStallD || FStallD) && ~(TrapM || RetM || BPPredWrongE); // stall in decode if instruction is a load/mul/csr dependent on previous
|
||||
assign StallECause = DivBusyE || FDivBusyE;
|
||||
assign StallFCause = CSRWritePendingDEM && ~(TrapM | RetM | BPPredWrongE);
|
||||
assign StallDCause = (LoadStallD | MulDivStallD | CSRRdStallD | FPUStallD | FStallD) & ~(TrapM | RetM | BPPredWrongE); // stall in decode if instruction is a load/mul/csr dependent on previous
|
||||
assign StallECause = DivBusyE | FDivBusyE;
|
||||
assign StallMCause = 0;
|
||||
assign StallWCause = DataStall || ICacheStallF;
|
||||
assign StallWCause = DCacheStall | ICacheStallF;
|
||||
|
||||
assign StallF = StallFCause || StallD;
|
||||
assign StallD = StallDCause || StallE;
|
||||
assign StallE = StallECause || StallM;
|
||||
assign StallM = StallMCause || StallW;
|
||||
assign StallF = StallFCause | StallD;
|
||||
assign StallD = StallDCause | StallE;
|
||||
assign StallE = StallECause | StallM;
|
||||
assign StallM = StallMCause | StallW;
|
||||
assign StallW = StallWCause;
|
||||
|
||||
//assign FirstUnstalledD = (~StallD & StallF & ~MulDivStallD);
|
||||
@ -76,8 +76,8 @@ module hazard(
|
||||
|
||||
// Each stage flushes if the previous stage is the last one stalled (for cause) or the system has reason to flush
|
||||
assign FlushF = BPPredWrongE;
|
||||
assign FlushD = FirstUnstalledD || TrapM || RetM || BPPredWrongE;
|
||||
assign FlushE = FirstUnstalledE || TrapM || RetM || BPPredWrongE;
|
||||
assign FlushM = FirstUnstalledM || TrapM || RetM;
|
||||
assign FlushW = FirstUnstalledW || TrapM;
|
||||
assign FlushD = FirstUnstalledD | TrapM | RetM | BPPredWrongE;
|
||||
assign FlushE = FirstUnstalledE | TrapM | RetM | BPPredWrongE;
|
||||
assign FlushM = FirstUnstalledM | TrapM | RetM;
|
||||
assign FlushW = FirstUnstalledW | TrapM;
|
||||
endmodule
|
||||
|
@ -31,31 +31,34 @@ module ieu (
|
||||
input logic [31:0] InstrD,
|
||||
input logic IllegalIEUInstrFaultD,
|
||||
output logic IllegalBaseInstrFaultD,
|
||||
output logic RegWriteD,
|
||||
output logic RegWriteD,
|
||||
// Execute Stage interface
|
||||
input logic [`XLEN-1:0] PCE,
|
||||
input logic [`XLEN-1:0] PCLinkE,
|
||||
input logic FWriteIntE,
|
||||
input logic IllegalFPUInstrE,
|
||||
input logic [`XLEN-1:0] FWriteDataE,
|
||||
input logic FWriteIntE,
|
||||
input logic IllegalFPUInstrE,
|
||||
input logic [`XLEN-1:0] FWriteDataE,
|
||||
output logic [`XLEN-1:0] PCTargetE,
|
||||
output logic MulDivE, W64E,
|
||||
output logic [2:0] Funct3E,
|
||||
output logic [`XLEN-1:0] SrcAE, SrcBE,
|
||||
input logic FWriteIntM,
|
||||
|
||||
// Memory stage interface
|
||||
input logic DataMisalignedM,
|
||||
input logic DataMisalignedM, // from LSU
|
||||
input logic SquashSCW, // from LSU
|
||||
output logic [1:0] MemRWM, // read/write control goes to LSU
|
||||
output logic [1:0] AtomicM, // atomic control goes to LSU
|
||||
output logic [`XLEN-1:0] MemAdrM, WriteDataM, // Address and write data to LSU
|
||||
|
||||
output logic [2:0] Funct3M, // size and signedness to LSU
|
||||
output logic [`XLEN-1:0] SrcAM, // to privilege and fpu
|
||||
input logic DataAccessFaultM,
|
||||
input logic FWriteIntM,
|
||||
input logic [`XLEN-1:0] FIntResM,
|
||||
output logic [1:0] MemRWM,
|
||||
output logic [1:0] AtomicM,
|
||||
output logic [`XLEN-1:0] MemAdrM, WriteDataM,
|
||||
output logic [`XLEN-1:0] SrcAM,
|
||||
output logic [2:0] Funct3M,
|
||||
input logic [`XLEN-1:0] FIntResM,
|
||||
|
||||
// Writeback stage
|
||||
input logic [`XLEN-1:0] CSRReadValW, ReadDataW, MulDivResultW,
|
||||
input logic FWriteIntW,
|
||||
input logic SquashSCW,
|
||||
input logic FWriteIntW,
|
||||
// input logic [`XLEN-1:0] PCLinkW,
|
||||
output logic InstrValidM, InstrValidW,
|
||||
// hazards
|
||||
|
@ -27,64 +27,62 @@
|
||||
`include "wally-config.vh"
|
||||
|
||||
module ifu (
|
||||
input logic clk, reset,
|
||||
input logic StallF, StallD, StallE, StallM, StallW,
|
||||
input logic FlushF, FlushD, FlushE, FlushM, FlushW,
|
||||
input logic clk, reset,
|
||||
input logic StallF, StallD, StallE, StallM, StallW,
|
||||
input logic FlushF, FlushD, FlushE, FlushM, FlushW,
|
||||
// Fetch
|
||||
input logic [`XLEN-1:0] InstrInF,
|
||||
input logic InstrAckF,
|
||||
output logic [`XLEN-1:0] PCF,
|
||||
input logic [`XLEN-1:0] InstrInF,
|
||||
input logic InstrAckF,
|
||||
output logic [`XLEN-1:0] PCF,
|
||||
output logic [`PA_BITS-1:0] InstrPAdrF,
|
||||
output logic InstrReadF,
|
||||
output logic ICacheStallF,
|
||||
output logic InstrReadF,
|
||||
output logic ICacheStallF,
|
||||
// Decode
|
||||
output logic [`XLEN-1:0] PCD,
|
||||
output logic [`XLEN-1:0] PCD,
|
||||
// Execute
|
||||
output logic [`XLEN-1:0] PCLinkE,
|
||||
input logic PCSrcE,
|
||||
input logic [`XLEN-1:0] PCTargetE,
|
||||
output logic [`XLEN-1:0] PCE,
|
||||
output logic BPPredWrongE,
|
||||
output logic [`XLEN-1:0] PCLinkE,
|
||||
input logic PCSrcE,
|
||||
input logic [`XLEN-1:0] PCTargetE,
|
||||
output logic [`XLEN-1:0] PCE,
|
||||
output logic BPPredWrongE,
|
||||
// Mem
|
||||
input logic RetM, TrapM,
|
||||
input logic [`XLEN-1:0] PrivilegedNextPCM,
|
||||
output logic [31:0] InstrD, InstrE, InstrM, InstrW,
|
||||
output logic [`XLEN-1:0] PCM,
|
||||
output logic [4:0] InstrClassM,
|
||||
output logic BPPredDirWrongM,
|
||||
output logic BTBPredPCWrongM,
|
||||
output logic RASPredPCWrongM,
|
||||
output logic BPPredClassNonCFIWrongM,
|
||||
input logic RetM, TrapM,
|
||||
input logic [`XLEN-1:0] PrivilegedNextPCM,
|
||||
output logic [31:0] InstrD, InstrE, InstrM, InstrW,
|
||||
output logic [`XLEN-1:0] PCM,
|
||||
output logic [4:0] InstrClassM,
|
||||
output logic BPPredDirWrongM,
|
||||
output logic BTBPredPCWrongM,
|
||||
output logic RASPredPCWrongM,
|
||||
output logic BPPredClassNonCFIWrongM,
|
||||
// Writeback
|
||||
// output logic [`XLEN-1:0] PCLinkW,
|
||||
// Faults
|
||||
input logic IllegalBaseInstrFaultD,
|
||||
output logic ITLBInstrPageFaultF,
|
||||
output logic IllegalIEUInstrFaultD,
|
||||
output logic InstrMisalignedFaultM,
|
||||
output logic [`XLEN-1:0] InstrMisalignedAdrM,
|
||||
input logic IllegalBaseInstrFaultD,
|
||||
output logic ITLBInstrPageFaultF,
|
||||
output logic IllegalIEUInstrFaultD,
|
||||
output logic InstrMisalignedFaultM,
|
||||
output logic [`XLEN-1:0] InstrMisalignedAdrM,
|
||||
|
||||
|
||||
// mmu management
|
||||
input logic [1:0] PrivilegeModeW,
|
||||
input logic [`XLEN-1:0] PageTableEntryF,
|
||||
input logic [1:0] PageTypeF,
|
||||
input logic [`XLEN-1:0] SATP_REGW,
|
||||
input logic STATUS_MXR, STATUS_SUM,
|
||||
input logic ITLBWriteF, ITLBFlushF,
|
||||
output logic ITLBMissF, ITLBHitF,
|
||||
input logic [1:0] PrivilegeModeW,
|
||||
input logic [`XLEN-1:0] PageTableEntryF,
|
||||
input logic [1:0] PageTypeF,
|
||||
input logic [`XLEN-1:0] SATP_REGW,
|
||||
input logic STATUS_MXR, STATUS_SUM,
|
||||
input logic ITLBWriteF, ITLBFlushF,
|
||||
input logic WalkerInstrPageFaultF,
|
||||
|
||||
output logic ITLBMissF, ITLBHitF,
|
||||
|
||||
// pmp/pma (inside mmu) signals. *** temporarily from AHB bus but eventually replace with internal versions pre H
|
||||
input logic [31:0] HADDR,
|
||||
input logic [2:0] HSIZE, HBURST,
|
||||
input logic HWRITE,
|
||||
input logic ExecuteAccessF, //read, write, and atomic access are all set to zero because this mmu is onlt working with instructinos in the F stage.
|
||||
input logic [63:0] PMPCFG01_REGW, PMPCFG23_REGW, // *** all of these come from the privileged unit, so they're gonna have to come over into ifu and dmem
|
||||
input var logic [63:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES/8-1:0],
|
||||
input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0],
|
||||
|
||||
output logic PMPInstrAccessFaultF, PMAInstrAccessFaultF,
|
||||
output logic ISquashBusAccessF,
|
||||
output logic [5:0] IHSELRegionsF
|
||||
output logic PMPInstrAccessFaultF, PMAInstrAccessFaultF,
|
||||
output logic ISquashBusAccessF
|
||||
// output logic [5:0] IHSELRegionsF
|
||||
|
||||
);
|
||||
|
||||
@ -105,24 +103,38 @@ module ifu (
|
||||
logic PMPLoadAccessFaultM, PMPStoreAccessFaultM; // *** these are just so that the mmu has somewhere to put these outputs, they're unused in this stage
|
||||
// if you're allowed to parameterize outputs/ inputs existence, these are an easy delete.
|
||||
|
||||
logic [`PA_BITS-1:0] PCPFmmu;
|
||||
logic [`PA_BITS-1:0] PCPFmmu, PCNextFPhys; // used to either truncate or expand PCPF and PCNextF into `PA_BITS width.
|
||||
|
||||
generate
|
||||
if (`XLEN==32)
|
||||
if (`XLEN==32) begin
|
||||
assign PCPF = PCPFmmu[31:0];
|
||||
else
|
||||
assign PCNextFPhys = {{(`PA_BITS-`XLEN){1'b0}}, PCNextF};
|
||||
end else begin
|
||||
assign PCPF = {8'b0, PCPFmmu};
|
||||
assign PCNextFPhys = PCNextF[`PA_BITS-1:0];
|
||||
end
|
||||
endgenerate
|
||||
|
||||
mmu #(.ENTRY_BITS(`ITLB_ENTRY_BITS), .IMMU(1)) itlb(.TLBAccessType(2'b10), .VirtualAddress(PCF), .Size(2'b10),
|
||||
.PTEWriteVal(PageTableEntryF), .PageTypeWriteVal(PageTypeF),
|
||||
.TLBWrite(ITLBWriteF), .TLBFlush(ITLBFlushF),
|
||||
.PhysicalAddress(PCPFmmu), .TLBMiss(ITLBMissF),
|
||||
.TLBHit(ITLBHitF), .TLBPageFault(ITLBInstrPageFaultF),
|
||||
|
||||
.AtomicAccessM(1'b0), .WriteAccessM(1'b0), .ReadAccessM(1'b0), // *** is this the right way force these bits constant? should they be someething else?
|
||||
.SquashBusAccess(ISquashBusAccessF), .HSELRegions(IHSELRegionsF),
|
||||
.*);
|
||||
mmu #(.ENTRY_BITS(`ITLB_ENTRY_BITS), .IMMU(1))
|
||||
itlb(.TLBAccessType(2'b10),
|
||||
.VirtualAddress(PCF),
|
||||
.Size(2'b10),
|
||||
.PTEWriteVal(PageTableEntryF),
|
||||
.PageTypeWriteVal(PageTypeF),
|
||||
.TLBWrite(ITLBWriteF),
|
||||
.TLBFlush(ITLBFlushF),
|
||||
.PhysicalAddress(PCPFmmu),
|
||||
.TLBMiss(ITLBMissF),
|
||||
.TLBHit(ITLBHitF),
|
||||
.TLBPageFault(ITLBInstrPageFaultF),
|
||||
.ExecuteAccessF(1'b1), // ***dh -- this should eventually change to only true if an instruction fetch is occurring
|
||||
.AtomicAccessM(1'b0),
|
||||
.ReadAccessM(1'b0),
|
||||
.WriteAccessM(1'b0),
|
||||
.SquashBusAccess(ISquashBusAccessF),
|
||||
// .HSELRegions(IHSELRegionsF),
|
||||
.DisableTranslation(1'b0),
|
||||
.*);
|
||||
|
||||
|
||||
// branch predictor signals
|
||||
@ -137,8 +149,9 @@ module ifu (
|
||||
// assign InstrReadF = 1; // *** & ICacheMissF; add later
|
||||
|
||||
icache icache(.*,
|
||||
.PCNextF(PCNextF[`PA_BITS-1:0]),
|
||||
.PCPF(PCPFmmu));
|
||||
.PCNextF(PCNextFPhys),
|
||||
.PCPF(PCPFmmu),
|
||||
.WalkerInstrPageFaultF(WalkerInstrPageFaultF));
|
||||
|
||||
flopenl #(32) AlignedInstrRawDFlop(clk, reset | reset_q, ~StallD, FlushD ? nop : FinalInstrRawF, nop, InstrRawD);
|
||||
|
||||
|
@ -29,104 +29,142 @@
|
||||
|
||||
// *** Ross Thompson amo misalignment check?
|
||||
module lsu (
|
||||
input logic clk, reset,
|
||||
input logic StallM, FlushM, StallW, FlushW,
|
||||
//output logic DataStall,
|
||||
input logic clk, reset,
|
||||
input logic StallM, FlushM, StallW, FlushW,
|
||||
output logic DataStall,
|
||||
output logic HPTWReady,
|
||||
// Memory Stage
|
||||
input logic [1:0] MemRWM,
|
||||
input logic [`XLEN-1:0] MemAdrM,
|
||||
input logic [2:0] Funct3M,
|
||||
//input logic [`XLEN-1:0] ReadDataW,
|
||||
input logic [`XLEN-1:0] WriteDataM,
|
||||
input logic [1:0] AtomicM,
|
||||
input logic CommitM,
|
||||
output logic [`PA_BITS-1:0] MemPAdrM,
|
||||
output logic MemReadM, MemWriteM,
|
||||
output logic [1:0] AtomicMaskedM,
|
||||
output logic DataMisalignedM,
|
||||
output logic CommittedM,
|
||||
// Writeback Stage
|
||||
input logic MemAckW,
|
||||
input logic [`XLEN-1:0] ReadDataW,
|
||||
output logic SquashSCW,
|
||||
|
||||
// connected to cpu (controls)
|
||||
input logic [1:0] MemRWM,
|
||||
input logic [2:0] Funct3M,
|
||||
input logic [1:0] AtomicM,
|
||||
output logic CommittedM,
|
||||
output logic SquashSCW,
|
||||
output logic DataMisalignedM,
|
||||
|
||||
// address and write data
|
||||
input logic [`XLEN-1:0] MemAdrM,
|
||||
input logic [`XLEN-1:0] WriteDataM,
|
||||
output logic [`XLEN-1:0] ReadDataW,
|
||||
|
||||
// cpu privilege
|
||||
input logic [1:0] PrivilegeModeW,
|
||||
input logic DTLBFlushM,
|
||||
// faults
|
||||
input logic NonBusTrapM,
|
||||
input logic DataAccessFaultM,
|
||||
output logic DTLBLoadPageFaultM, DTLBStorePageFaultM,
|
||||
output logic LoadMisalignedFaultM, LoadAccessFaultM,
|
||||
output logic StoreMisalignedFaultM, StoreAccessFaultM,
|
||||
|
||||
input logic NonBusTrapM,
|
||||
output logic DTLBLoadPageFaultM, DTLBStorePageFaultM,
|
||||
output logic LoadMisalignedFaultM, LoadAccessFaultM,
|
||||
// cpu hazard unit (trap)
|
||||
output logic StoreMisalignedFaultM, StoreAccessFaultM,
|
||||
|
||||
// connect to ahb
|
||||
input logic CommitM, // should this be generated in the abh interface?
|
||||
output logic [`PA_BITS-1:0] MemPAdrM, // to ahb
|
||||
output logic MemReadM, MemWriteM,
|
||||
output logic [1:0] AtomicMaskedM,
|
||||
input logic MemAckW, // from ahb
|
||||
input logic [`XLEN-1:0] HRDATAW, // from ahb
|
||||
output logic [2:0] Funct3MfromLSU,
|
||||
output logic StallWfromLSU,
|
||||
|
||||
|
||||
// mmu management
|
||||
input logic [1:0] PrivilegeModeW,
|
||||
input logic [`XLEN-1:0] PageTableEntryM,
|
||||
input logic [1:0] PageTypeM,
|
||||
input logic [`XLEN-1:0] SATP_REGW,
|
||||
input logic STATUS_MXR, STATUS_SUM,
|
||||
input logic DTLBWriteM, DTLBFlushM,
|
||||
output logic DTLBMissM, DTLBHitM,
|
||||
|
||||
// page table walker
|
||||
input logic [`XLEN-1:0] PageTableEntryM,
|
||||
input logic [1:0] PageTypeM,
|
||||
input logic [`XLEN-1:0] SATP_REGW, // from csr
|
||||
input logic STATUS_MXR, STATUS_SUM, // from csr
|
||||
input logic DTLBWriteM,
|
||||
output logic DTLBMissM,
|
||||
input logic DisableTranslation, // used to stop intermediate PTE physical addresses being saved to TLB.
|
||||
|
||||
|
||||
|
||||
output logic DTLBHitM, // not connected
|
||||
|
||||
// PMA/PMP (inside mmu) signals
|
||||
input logic [31:0] HADDR, // *** replace all of these H inputs with physical adress once pma checkers have been edited to use paddr as well.
|
||||
input logic [2:0] HSIZE, HBURST,
|
||||
input logic HWRITE,
|
||||
input logic AtomicAccessM, WriteAccessM, ReadAccessM, // execute access is hardwired to zero in this mmu because we're only working with data in the M stage.
|
||||
input logic [63:0] PMPCFG01_REGW, PMPCFG23_REGW, // *** all of these come from the privileged unit, so thwyre gonna have to come over into ifu and dmem
|
||||
input var logic [63:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES/8-1:0],
|
||||
input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0], // *** this one especially has a large note attached to it in pmpchecker.
|
||||
|
||||
output logic PMALoadAccessFaultM, PMAStoreAccessFaultM,
|
||||
output logic PMPLoadAccessFaultM, PMPStoreAccessFaultM, // *** can these be parameterized? we dont need the m stage ones for the immu and vice versa.
|
||||
|
||||
output logic DSquashBusAccessM,
|
||||
output logic [5:0] DHSELRegionsM
|
||||
output logic DSquashBusAccessM
|
||||
// output logic [5:0] DHSELRegionsM
|
||||
|
||||
);
|
||||
|
||||
logic SquashSCM;
|
||||
logic DTLBPageFaultM;
|
||||
logic MemAccessM;
|
||||
logic [1:0] CurrState, NextState;
|
||||
|
||||
logic preCommittedM;
|
||||
|
||||
localparam STATE_READY = 0;
|
||||
localparam STATE_FETCH = 1;
|
||||
localparam STATE_FETCH_AMO = 2;
|
||||
localparam STATE_STALLED = 3;
|
||||
typedef enum {STATE_READY,
|
||||
STATE_FETCH,
|
||||
STATE_FETCH_AMO_1,
|
||||
STATE_FETCH_AMO_2,
|
||||
STATE_STALLED,
|
||||
STATE_PTW_READY,
|
||||
STATE_PTW_FETCH,
|
||||
STATE_PTW_DONE} statetype;
|
||||
statetype CurrState, NextState;
|
||||
|
||||
|
||||
logic PMPInstrAccessFaultF, PMAInstrAccessFaultF; // *** these are just so that the mmu has somewhere to put these outputs since they aren't used in dmem
|
||||
// *** if you're allowed to parameterize outputs/ inputs existence, these are an easy delete.
|
||||
|
||||
mmu #(.ENTRY_BITS(`DTLB_ENTRY_BITS), .IMMU(0)) dmmu(.TLBAccessType(MemRWM), .VirtualAddress(MemAdrM), .Size(Funct3M[1:0]),
|
||||
.PTEWriteVal(PageTableEntryM), .PageTypeWriteVal(PageTypeM),
|
||||
.TLBWrite(DTLBWriteM), .TLBFlush(DTLBFlushM),
|
||||
.PhysicalAddress(MemPAdrM), .TLBMiss(DTLBMissM),
|
||||
.TLBHit(DTLBHitM), .TLBPageFault(DTLBPageFaultM),
|
||||
|
||||
.ExecuteAccessF(1'b0),
|
||||
.SquashBusAccess(DSquashBusAccessM), .HSELRegions(DHSELRegionsM),
|
||||
.*); // *** the pma/pmp instruction acess faults don't really matter here. is it possible to parameterize which outputs exist?
|
||||
// for time being until we have a dcache the AHB Lite read bus HRDATAW will be connected to the
|
||||
// CPU's read data input ReadDataW.
|
||||
assign ReadDataW = HRDATAW;
|
||||
|
||||
mmu #(.ENTRY_BITS(`DTLB_ENTRY_BITS), .IMMU(0))
|
||||
dmmu(.TLBAccessType(MemRWM),
|
||||
.VirtualAddress(MemAdrM),
|
||||
.Size(Funct3M[1:0]),
|
||||
.PTEWriteVal(PageTableEntryM),
|
||||
.PageTypeWriteVal(PageTypeM),
|
||||
.TLBWrite(DTLBWriteM),
|
||||
.TLBFlush(DTLBFlushM),
|
||||
.PhysicalAddress(MemPAdrM),
|
||||
.TLBMiss(DTLBMissM),
|
||||
.TLBHit(DTLBHitM),
|
||||
.TLBPageFault(DTLBPageFaultM),
|
||||
.ExecuteAccessF(1'b0),
|
||||
.AtomicAccessM(AtomicMaskedM[1]),
|
||||
.WriteAccessM(MemRWM[0]),
|
||||
.ReadAccessM(MemRWM[1]),
|
||||
.SquashBusAccess(DSquashBusAccessM),
|
||||
// .SelRegions(DHSELRegionsM),
|
||||
.*); // *** the pma/pmp instruction acess faults don't really matter here. is it possible to parameterize which outputs exist?
|
||||
|
||||
// Specify which type of page fault is occurring
|
||||
assign DTLBLoadPageFaultM = DTLBPageFaultM & MemRWM[1];
|
||||
assign DTLBStorePageFaultM = DTLBPageFaultM & MemRWM[0];
|
||||
|
||||
// Determine if an Unaligned access is taking place
|
||||
always_comb
|
||||
case(Funct3M[1:0])
|
||||
2'b00: DataMisalignedM = 0; // lb, sb, lbu
|
||||
2'b01: DataMisalignedM = MemAdrM[0]; // lh, sh, lhu
|
||||
2'b10: DataMisalignedM = MemAdrM[1] | MemAdrM[0]; // lw, sw, flw, fsw, lwu
|
||||
2'b11: DataMisalignedM = |MemAdrM[2:0]; // ld, sd, fld, fsd
|
||||
endcase
|
||||
// Determine if an Unaligned access is taking place
|
||||
always_comb
|
||||
case(Funct3M[1:0])
|
||||
2'b00: DataMisalignedM = 0; // lb, sb, lbu
|
||||
2'b01: DataMisalignedM = MemAdrM[0]; // lh, sh, lhu
|
||||
2'b10: DataMisalignedM = MemAdrM[1] | MemAdrM[0]; // lw, sw, flw, fsw, lwu
|
||||
2'b11: DataMisalignedM = |MemAdrM[2:0]; // ld, sd, fld, fsd
|
||||
endcase
|
||||
|
||||
// Squash unaligned data accesses and failed store conditionals
|
||||
// *** this is also the place to squash if the cache is hit
|
||||
// Changed DataMisalignedM to a larger combination of trap sources
|
||||
// NonBusTrapM is anything that the bus doesn't contribute to producing
|
||||
// By contrast, using TrapM results in circular logic errors
|
||||
assign MemReadM = MemRWM[1] & ~NonBusTrapM & CurrState != STATE_STALLED;
|
||||
assign MemWriteM = MemRWM[0] & ~NonBusTrapM && ~SquashSCM & CurrState != STATE_STALLED;
|
||||
assign MemReadM = MemRWM[1] & ~NonBusTrapM & ~DTLBMissM & CurrState != STATE_STALLED;
|
||||
assign MemWriteM = MemRWM[0] & ~NonBusTrapM & ~DTLBMissM & ~SquashSCM & CurrState != STATE_STALLED;
|
||||
assign AtomicMaskedM = CurrState != STATE_STALLED ? AtomicM : 2'b00 ;
|
||||
assign MemAccessM = |MemRWM;
|
||||
assign MemAccessM = MemReadM | MemWriteM;
|
||||
|
||||
// Determine if M stage committed
|
||||
// Reset whenever unstalled. Set when access successfully occurs
|
||||
@ -135,9 +173,9 @@ module lsu (
|
||||
|
||||
// Determine if address is valid
|
||||
assign LoadMisalignedFaultM = DataMisalignedM & MemRWM[1];
|
||||
assign LoadAccessFaultM = DataAccessFaultM & MemRWM[1];
|
||||
assign LoadAccessFaultM = MemRWM[1];
|
||||
assign StoreMisalignedFaultM = DataMisalignedM & MemRWM[0];
|
||||
assign StoreAccessFaultM = DataAccessFaultM & MemRWM[0];
|
||||
assign StoreAccessFaultM = MemRWM[0];
|
||||
|
||||
// Handle atomic load reserved / store conditional
|
||||
generate
|
||||
@ -165,33 +203,111 @@ module lsu (
|
||||
endgenerate
|
||||
|
||||
// Data stall
|
||||
//assign DataStall = 0;
|
||||
//assign DataStall = (NextState == STATE_FETCH) || (NextState == STATE_FETCH_AMO_1) || (NextState == STATE_FETCH_AMO_2);
|
||||
assign HPTWReady = (CurrState == STATE_READY);
|
||||
|
||||
|
||||
// Ross Thompson April 22, 2021
|
||||
// for now we need to handle the issue where the data memory interface repeately
|
||||
// requests data from memory rather than issuing a single request.
|
||||
|
||||
|
||||
flopr #(2) stateReg(.clk(clk),
|
||||
.reset(reset),
|
||||
.d(NextState),
|
||||
.q(CurrState));
|
||||
flopenl #(.TYPE(statetype)) stateReg(.clk(clk),
|
||||
.load(reset),
|
||||
.en(1'b1),
|
||||
.d(NextState),
|
||||
.val(STATE_READY),
|
||||
.q(CurrState));
|
||||
|
||||
always_comb begin
|
||||
case (CurrState)
|
||||
STATE_READY: if (MemRWM[1] & MemRWM[0]) NextState = STATE_FETCH_AMO; // *** should be some misalign check
|
||||
else if (MemAccessM & ~DataMisalignedM) NextState = STATE_FETCH;
|
||||
else NextState = STATE_READY;
|
||||
STATE_FETCH_AMO: if (MemAckW) NextState = STATE_FETCH;
|
||||
else NextState = STATE_FETCH_AMO;
|
||||
STATE_FETCH: if (MemAckW & ~StallW) NextState = STATE_READY;
|
||||
else if (MemAckW & StallW) NextState = STATE_STALLED;
|
||||
else NextState = STATE_FETCH;
|
||||
STATE_STALLED: if (~StallW) NextState = STATE_READY;
|
||||
else NextState = STATE_STALLED;
|
||||
default: NextState = STATE_READY;
|
||||
endcase // case (CurrState)
|
||||
end
|
||||
STATE_READY:
|
||||
if (DTLBMissM) begin
|
||||
NextState = STATE_PTW_READY;
|
||||
DataStall = 1'b1;
|
||||
end else if (AtomicMaskedM[1]) begin
|
||||
NextState = STATE_FETCH_AMO_1; // *** should be some misalign check
|
||||
DataStall = 1'b1;
|
||||
end else if((MemReadM & AtomicM[0]) | (MemWriteM & AtomicM[0])) begin
|
||||
NextState = STATE_FETCH_AMO_2;
|
||||
DataStall = 1'b1;
|
||||
end else if (MemAccessM & ~DataMisalignedM) begin
|
||||
NextState = STATE_FETCH;
|
||||
DataStall = 1'b1;
|
||||
end else begin
|
||||
NextState = STATE_READY;
|
||||
DataStall = 1'b0;
|
||||
end
|
||||
STATE_FETCH_AMO_1: begin
|
||||
DataStall = 1'b1;
|
||||
if (MemAckW) begin
|
||||
NextState = STATE_FETCH_AMO_2;
|
||||
end else begin
|
||||
NextState = STATE_FETCH_AMO_1;
|
||||
end
|
||||
end
|
||||
STATE_FETCH_AMO_2: begin
|
||||
DataStall = 1'b1;
|
||||
if (MemAckW & ~StallW) begin
|
||||
NextState = STATE_FETCH_AMO_2;
|
||||
end else if (MemAckW & StallW) begin
|
||||
NextState = STATE_STALLED;
|
||||
end else begin
|
||||
NextState = STATE_FETCH_AMO_2;
|
||||
end
|
||||
end
|
||||
STATE_FETCH: begin
|
||||
DataStall = 1'b1;
|
||||
if (MemAckW & ~StallW) begin
|
||||
NextState = STATE_READY;
|
||||
end else if (MemAckW & StallW) begin
|
||||
NextState = STATE_STALLED;
|
||||
end else begin
|
||||
NextState = STATE_FETCH;
|
||||
end
|
||||
end
|
||||
STATE_STALLED: begin
|
||||
DataStall = 1'b0;
|
||||
if (~StallW) begin
|
||||
NextState = STATE_READY;
|
||||
end else begin
|
||||
NextState = STATE_STALLED;
|
||||
end
|
||||
end
|
||||
STATE_PTW_READY: begin
|
||||
DataStall = 1'b0;
|
||||
if (DTLBWriteM) begin
|
||||
NextState = STATE_READY;
|
||||
end else if (MemReadM & ~DataMisalignedM) begin
|
||||
NextState = STATE_PTW_FETCH;
|
||||
end else begin
|
||||
NextState = STATE_PTW_READY;
|
||||
end
|
||||
end
|
||||
STATE_PTW_FETCH : begin
|
||||
DataStall = 1'b1;
|
||||
if (MemAckW & ~DTLBWriteM) begin
|
||||
NextState = STATE_PTW_READY;
|
||||
end else if (MemAckW & DTLBWriteM) begin
|
||||
NextState = STATE_READY;
|
||||
end else begin
|
||||
NextState = STATE_PTW_FETCH;
|
||||
end
|
||||
end
|
||||
STATE_PTW_DONE: begin
|
||||
NextState = STATE_READY;
|
||||
end
|
||||
default: begin
|
||||
DataStall = 1'b0;
|
||||
NextState = STATE_READY;
|
||||
end
|
||||
endcase
|
||||
end // always_comb
|
||||
|
||||
// *** for now just pass through size
|
||||
assign Funct3MfromLSU = Funct3M;
|
||||
assign StallWfromLSU = StallW;
|
||||
|
||||
|
||||
endmodule
|
||||
|
||||
|
178
wally-pipelined/src/lsu/lsuArb.sv
Normal file
178
wally-pipelined/src/lsu/lsuArb.sv
Normal file
@ -0,0 +1,178 @@
|
||||
///////////////////////////////////////////
|
||||
// lsuArb.sv
|
||||
//
|
||||
// Written: Ross THompson and Kip Macsai-Goren
|
||||
// Modified: kmacsaigoren@hmc.edu June 23, 2021
|
||||
//
|
||||
// Purpose: LSU arbiter between the CPU's demand request for data memory and
|
||||
// the page table walker
|
||||
//
|
||||
// A component of the Wally configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
|
||||
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
|
||||
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
|
||||
// is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
|
||||
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
///////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module lsuArb
|
||||
(input logic clk, reset,
|
||||
|
||||
// from page table walker
|
||||
input logic HPTWTranslate,
|
||||
input logic HPTWRead,
|
||||
input logic [`XLEN-1:0] HPTWPAdr,
|
||||
// to page table walker.
|
||||
output logic [`XLEN-1:0] HPTWReadPTE,
|
||||
output logic HPTWReady,
|
||||
output logic HPTWStall,
|
||||
|
||||
// from CPU
|
||||
input logic [1:0] MemRWM,
|
||||
input logic [2:0] Funct3M,
|
||||
input logic [1:0] AtomicM,
|
||||
input logic [`XLEN-1:0] MemAdrM,
|
||||
input logic [`XLEN-1:0] WriteDataM,
|
||||
input logic StallW,
|
||||
// to CPU
|
||||
output logic [`XLEN-1:0] ReadDataW,
|
||||
output logic CommittedM,
|
||||
output logic SquashSCW,
|
||||
output logic DataMisalignedM,
|
||||
output logic DCacheStall,
|
||||
|
||||
// to LSU
|
||||
output logic DisableTranslation,
|
||||
output logic [1:0] MemRWMtoLSU,
|
||||
output logic [2:0] Funct3MtoLSU,
|
||||
output logic [1:0] AtomicMtoLSU,
|
||||
output logic [`XLEN-1:0] MemAdrMtoLSU,
|
||||
output logic [`XLEN-1:0] WriteDataMtoLSU,
|
||||
output logic StallWtoLSU,
|
||||
// from LSU
|
||||
input logic CommittedMfromLSU,
|
||||
input logic SquashSCWfromLSU,
|
||||
input logic DataMisalignedMfromLSU,
|
||||
input logic [`XLEN-1:0] ReadDataWFromLSU,
|
||||
input logic HPTWReadyfromLSU,
|
||||
input logic DataStall
|
||||
|
||||
);
|
||||
|
||||
// HPTWTranslate is the request for memory by the page table walker. When
|
||||
// this is high the page table walker gains priority over the CPU's data
|
||||
// input. Note the ptw only makes a request after an instruction or data
|
||||
// tlb miss. It is entirely possible the dcache is currently processing
|
||||
// a data cache miss when an instruction tlb miss occurs. If an instruction
|
||||
// in the E stage causes a d cache miss, the d cache will immediately start
|
||||
// processing the request. Simultaneously the ITLB misses. By the time
|
||||
// the TLB miss causes the page table walker to issue the first request
|
||||
// to data memory the d cache is already busy. We can interlock by
|
||||
// leveraging Stall as a d cache busy. We will need an FSM to handle this.
|
||||
|
||||
typedef enum{StateReady,
|
||||
StatePTWPending,
|
||||
StatePTWActive} statetype;
|
||||
|
||||
|
||||
statetype CurrState, NextState;
|
||||
logic SelPTW;
|
||||
logic HPTWStallD;
|
||||
|
||||
|
||||
flopenl #(.TYPE(statetype)) StateReg(.clk(clk),
|
||||
.load(reset),
|
||||
.en(1'b1),
|
||||
.d(NextState),
|
||||
.val(StateReady),
|
||||
.q(CurrState));
|
||||
|
||||
always_comb begin
|
||||
case(CurrState)
|
||||
StateReady:
|
||||
if (HPTWTranslate) NextState = StatePTWActive;
|
||||
else NextState = StateReady;
|
||||
StatePTWActive:
|
||||
if (HPTWTranslate) NextState = StatePTWActive;
|
||||
else NextState = StateReady;
|
||||
default: NextState = StateReady;
|
||||
endcase
|
||||
end
|
||||
|
||||
/* -----\/----- EXCLUDED -----\/-----
|
||||
|
||||
always_comb begin
|
||||
case(CurrState)
|
||||
StateReady:
|
||||
/-* -----\/----- EXCLUDED -----\/-----
|
||||
if (HPTWTranslate & DataStall) NextState = StatePTWPending;
|
||||
else
|
||||
-----/\----- EXCLUDED -----/\----- *-/
|
||||
if (HPTWTranslate) NextState = StatePTWActive;
|
||||
else NextState = StateReady;
|
||||
StatePTWPending:
|
||||
if (HPTWTranslate & ~DataStall) NextState = StatePTWActive;
|
||||
else if (HPTWTranslate & DataStall) NextState = StatePTWPending;
|
||||
else NextState = StateReady;
|
||||
StatePTWActive:
|
||||
if (HPTWTranslate) NextState = StatePTWActive;
|
||||
else NextState = StateReady;
|
||||
default: NextState = StateReady;
|
||||
endcase
|
||||
end
|
||||
|
||||
-----/\----- EXCLUDED -----/\----- */
|
||||
|
||||
// multiplex the outputs to LSU
|
||||
assign DisableTranslation = SelPTW; // change names between SelPTW would be confusing in DTLB.
|
||||
assign SelPTW = (CurrState == StatePTWActive && HPTWTranslate) || (CurrState == StateReady && HPTWTranslate);
|
||||
assign MemRWMtoLSU = SelPTW ? {HPTWRead, 1'b0} : MemRWM;
|
||||
|
||||
generate
|
||||
if (`XLEN == 32) begin
|
||||
assign Funct3MtoLSU = SelPTW ? 3'b010 : Funct3M;
|
||||
end else begin
|
||||
assign Funct3MtoLSU = SelPTW ? 3'b011 : Funct3M;
|
||||
end
|
||||
endgenerate
|
||||
|
||||
assign AtomicMtoLSU = SelPTW ? 2'b00 : AtomicM;
|
||||
assign MemAdrMtoLSU = SelPTW ? HPTWPAdr : MemAdrM;
|
||||
assign WriteDataMtoLSU = SelPTW ? `XLEN'b0 : WriteDataM;
|
||||
assign StallWtoLSU = SelPTW ? 1'b0 : StallW;
|
||||
|
||||
// demux the inputs from LSU to walker or cpu's data port.
|
||||
|
||||
assign ReadDataW = SelPTW ? `XLEN'b0 : ReadDataWFromLSU; // probably can avoid this demux
|
||||
assign HPTWReadPTE = SelPTW ? ReadDataWFromLSU : `XLEN'b0 ; // probably can avoid this demux
|
||||
assign CommittedM = SelPTW ? 1'b0 : CommittedMfromLSU;
|
||||
assign SquashSCW = SelPTW ? 1'b0 : SquashSCWfromLSU;
|
||||
assign DataMisalignedM = SelPTW ? 1'b0 : DataMisalignedMfromLSU;
|
||||
assign HPTWReady = HPTWReadyfromLSU;
|
||||
// *** need to rename DcacheStall and Datastall.
|
||||
// not clear at all. I think it should be LSUStall from the LSU,
|
||||
// which is demuxed to HPTWStall and CPUDataStall? (not sure on this last one).
|
||||
assign HPTWStall = SelPTW ? DataStall : 1'b1;
|
||||
//assign HPTWStallD = SelPTW ? DataStall : 1'b1;
|
||||
/* -----\/----- EXCLUDED -----\/-----
|
||||
assign HPTWStallD = SelPTW ? DataStall : 1'b1;
|
||||
flopr #(1) HPTWStallReg (.clk(clk),
|
||||
.reset(reset),
|
||||
.d(HPTWStallD),
|
||||
.q(HPTWStall));
|
||||
-----/\----- EXCLUDED -----/\----- */
|
||||
|
||||
assign DCacheStall = SelPTW ? 1'b1 : DataStall; // *** this is probably going to change.
|
||||
|
||||
endmodule
|
@ -26,13 +26,13 @@
|
||||
`include "wally-config.vh"
|
||||
|
||||
module adrdec (
|
||||
input logic [31:0] HADDR,
|
||||
input logic [31:0] Base, Range,
|
||||
input logic Supported,
|
||||
input logic AccessValid,
|
||||
input logic [2:0] Size,
|
||||
input logic [3:0] SizeMask,
|
||||
output logic HSEL
|
||||
input logic [`PA_BITS-1:0] PhysicalAddress,
|
||||
input logic [`PA_BITS-1:0] Base, Range,
|
||||
input logic Supported,
|
||||
input logic AccessValid,
|
||||
input logic [1:0] Size,
|
||||
input logic [3:0] SizeMask,
|
||||
output logic Sel
|
||||
);
|
||||
|
||||
logic Match;
|
||||
@ -41,12 +41,12 @@ module adrdec (
|
||||
// determine if an address is in a range starting at the base
|
||||
// for example, if Base = 0x04002000 and range = 0x00000FFF,
|
||||
// then anything address between 0x04002000 and 0x04002FFF should match (HSEL=1)
|
||||
assign Match = &((HADDR ~^ Base) | Range);
|
||||
assign Match = &((PhysicalAddress ~^ Base) | Range);
|
||||
|
||||
// determine if legal size of access is being made (byte, halfword, word, doubleword)
|
||||
assign SizeValid = SizeMask[Size[1:0]];
|
||||
assign SizeValid = SizeMask[Size];
|
||||
|
||||
assign HSEL = Match && Supported && AccessValid && SizeValid;
|
||||
assign Sel = Match && Supported && AccessValid && SizeValid;
|
||||
|
||||
endmodule
|
||||
|
||||
|
@ -26,19 +26,20 @@
|
||||
`include "wally-config.vh"
|
||||
|
||||
module adrdecs (
|
||||
input logic [31:0] HADDR, // *** will need to use PAdr in mmu, stick with HADDR in uncore
|
||||
input logic AccessRW, AccessRX, AccessRWX,
|
||||
input logic [2:0] HSIZE,
|
||||
output logic [5:0] HSELRegions
|
||||
input logic [`PA_BITS-1:0] PhysicalAddress,
|
||||
input logic AccessRW, AccessRX, AccessRWX,
|
||||
input logic [1:0] Size,
|
||||
output logic [5:0] SelRegions
|
||||
);
|
||||
|
||||
// Determine which region of physical memory (if any) is being accessed
|
||||
// *** eventually uncomment Access signals
|
||||
adrdec boottimdec(HADDR, `BOOTTIM_BASE, `BOOTTIM_RANGE, `BOOTTIM_SUPPORTED, 1'b1/*AccessRX*/, HSIZE, 4'b1111, HSELRegions[5]);
|
||||
adrdec timdec(HADDR, `TIM_BASE, `TIM_RANGE, `TIM_SUPPORTED, 1'b1/*AccessRWX*/, HSIZE, 4'b1111, HSELRegions[4]);
|
||||
adrdec clintdec(HADDR, `CLINT_BASE, `CLINT_RANGE, `CLINT_SUPPORTED, AccessRW, HSIZE, 4'b1111, HSELRegions[3]);
|
||||
adrdec gpiodec(HADDR, `GPIO_BASE, `GPIO_RANGE, `GPIO_SUPPORTED, AccessRW, HSIZE, 4'b0100, HSELRegions[2]);
|
||||
adrdec uartdec(HADDR, `UART_BASE, `UART_RANGE, `UART_SUPPORTED, AccessRW, HSIZE, 4'b0001, HSELRegions[1]);
|
||||
adrdec plicdec(HADDR, `PLIC_BASE, `PLIC_RANGE, `PLIC_SUPPORTED, AccessRW, HSIZE, 4'b0100, HSELRegions[0]);
|
||||
adrdec boottimdec(PhysicalAddress, `BOOTTIM_BASE, `BOOTTIM_RANGE, `BOOTTIM_SUPPORTED, 1'b1/*AccessRX*/, Size, 4'b1111, SelRegions[5]);
|
||||
adrdec timdec(PhysicalAddress, `TIM_BASE, `TIM_RANGE, `TIM_SUPPORTED, 1'b1/*AccessRWX*/, Size, 4'b1111, SelRegions[4]);
|
||||
adrdec clintdec(PhysicalAddress, `CLINT_BASE, `CLINT_RANGE, `CLINT_SUPPORTED, AccessRW, Size, 4'b1111, SelRegions[3]);
|
||||
adrdec gpiodec(PhysicalAddress, `GPIO_BASE, `GPIO_RANGE, `GPIO_SUPPORTED, AccessRW, Size, 4'b0100, SelRegions[2]);
|
||||
adrdec uartdec(PhysicalAddress, `UART_BASE, `UART_RANGE, `UART_SUPPORTED, AccessRW, Size, 4'b0001, SelRegions[1]);
|
||||
adrdec plicdec(PhysicalAddress, `PLIC_BASE, `PLIC_RANGE, `PLIC_SUPPORTED, AccessRW, Size, 4'b0100, SelRegions[0]);
|
||||
|
||||
endmodule
|
||||
|
||||
|
@ -44,6 +44,7 @@ module mmu #(parameter ENTRY_BITS = 3,
|
||||
// x1 - TLB is accessed for a write
|
||||
// 11 - TLB is accessed for both read and write
|
||||
input logic [1:0] TLBAccessType,
|
||||
input logic DisableTranslation,
|
||||
|
||||
// Virtual address input
|
||||
input logic [`XLEN-1:0] VirtualAddress,
|
||||
@ -66,17 +67,14 @@ module mmu #(parameter ENTRY_BITS = 3,
|
||||
output logic TLBPageFault,
|
||||
|
||||
// PMA checker signals
|
||||
input logic [31:0] HADDR,
|
||||
input logic [2:0] HSIZE, HBURST,
|
||||
input logic HWRITE,
|
||||
input logic AtomicAccessM, ExecuteAccessF, WriteAccessM, ReadAccessM,
|
||||
input logic [63:0] PMPCFG01_REGW, PMPCFG23_REGW, // *** all of these come from the privileged unit, so thwyre gonna have to come over into ifu and dmem
|
||||
input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0],
|
||||
input var logic [63:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES/8-1:0],
|
||||
input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0],
|
||||
|
||||
output logic SquashBusAccess, // *** send to privileged unit
|
||||
output logic PMPInstrAccessFaultF, PMPLoadAccessFaultM, PMPStoreAccessFaultM,
|
||||
output logic PMAInstrAccessFaultF, PMALoadAccessFaultM, PMAStoreAccessFaultM,
|
||||
output logic [5:0] HSELRegions
|
||||
output logic PMAInstrAccessFaultF, PMALoadAccessFaultM, PMAStoreAccessFaultM
|
||||
// output logic [5:0] SelRegions
|
||||
|
||||
);
|
||||
|
||||
@ -96,4 +94,4 @@ module mmu #(parameter ENTRY_BITS = 3,
|
||||
|
||||
assign SquashBusAccess = PMASquashBusAccess || PMPSquashBusAccess;
|
||||
|
||||
endmodule
|
||||
endmodule
|
||||
|
@ -30,61 +30,92 @@
|
||||
`include "wally-config.vh"
|
||||
|
||||
/* ***
|
||||
TO-DO:
|
||||
- Implement faults on accessed/dirty behavior
|
||||
*/
|
||||
TO-DO:
|
||||
- Implement faults on accessed/dirty behavior
|
||||
*/
|
||||
|
||||
module pagetablewalker (
|
||||
// Control signals
|
||||
input logic HCLK, HRESETn,
|
||||
input logic [`XLEN-1:0] SATP_REGW,
|
||||
module pagetablewalker
|
||||
(
|
||||
// Control signals
|
||||
input logic clk, reset,
|
||||
input logic [`XLEN-1:0] SATP_REGW,
|
||||
|
||||
// Signals from TLBs (addresses to translate)
|
||||
input logic [`XLEN-1:0] PCF, MemAdrM,
|
||||
input logic ITLBMissF, DTLBMissM,
|
||||
input logic [1:0] MemRWM,
|
||||
// Signals from TLBs (addresses to translate)
|
||||
input logic [`XLEN-1:0] PCF, MemAdrM,
|
||||
input logic ITLBMissF, DTLBMissM,
|
||||
input logic [1:0] MemRWM,
|
||||
|
||||
// Outputs to the TLBs (PTEs to write)
|
||||
output logic [`XLEN-1:0] PageTableEntryF, PageTableEntryM,
|
||||
output logic [1:0] PageTypeF, PageTypeM,
|
||||
output logic ITLBWriteF, DTLBWriteM,
|
||||
// Outputs to the TLBs (PTEs to write)
|
||||
output logic [`XLEN-1:0] PageTableEntryF, PageTableEntryM,
|
||||
output logic [1:0] PageTypeF, PageTypeM,
|
||||
output logic ITLBWriteF, DTLBWriteM,
|
||||
|
||||
// Signals from ahblite (PTEs from memory)
|
||||
input logic [`XLEN-1:0] MMUReadPTE,
|
||||
input logic MMUReady,
|
||||
|
||||
// Signals to ahblite (memory addresses to access)
|
||||
output logic [`XLEN-1:0] MMUPAdr,
|
||||
output logic MMUTranslate,
|
||||
|
||||
// Stall signal
|
||||
output logic MMUStall,
|
||||
|
||||
// Faults
|
||||
output logic WalkerInstrPageFaultF,
|
||||
output logic WalkerLoadPageFaultM,
|
||||
output logic WalkerStorePageFaultM
|
||||
);
|
||||
// *** modify to send to LSU // *** KMG: These are inputs/results from the ahblite whose addresses should have already been checked, so I don't think they need to be sent through the LSU
|
||||
input logic [`XLEN-1:0] MMUReadPTE,
|
||||
input logic MMUReady,
|
||||
input logic HPTWStall,
|
||||
|
||||
// *** modify to send to LSU
|
||||
output logic [`XLEN-1:0] MMUPAdr, // this probalby should be `PA_BITS wide
|
||||
output logic MMUTranslate, // *** rename to HPTWReq
|
||||
output logic HPTWRead,
|
||||
|
||||
|
||||
|
||||
|
||||
// Stall signal
|
||||
output logic MMUStall,
|
||||
|
||||
// Faults
|
||||
output logic WalkerInstrPageFaultF,
|
||||
output logic WalkerLoadPageFaultM,
|
||||
output logic WalkerStorePageFaultM
|
||||
);
|
||||
|
||||
// Internal signals
|
||||
logic [`PPN_BITS-1:0] BasePageTablePPN;
|
||||
logic [`XLEN-1:0] TranslationVAdr;
|
||||
logic [`XLEN-1:0] SavedPTE, CurrentPTE;
|
||||
logic [`PA_BITS-1:0] TranslationPAdr;
|
||||
logic [`PPN_BITS-1:0] CurrentPPN;
|
||||
// register TLBs translation miss requests
|
||||
logic [`XLEN-1:0] TranslationVAdrQ;
|
||||
logic ITLBMissFQ, DTLBMissMQ;
|
||||
|
||||
logic [`PPN_BITS-1:0] BasePageTablePPN;
|
||||
logic [`XLEN-1:0] TranslationVAdr;
|
||||
logic [`XLEN-1:0] SavedPTE, CurrentPTE;
|
||||
logic [`PA_BITS-1:0] TranslationPAdr;
|
||||
logic [`PPN_BITS-1:0] CurrentPPN;
|
||||
logic [`SVMODE_BITS-1:0] SvMode;
|
||||
logic MemStore;
|
||||
logic MemStore;
|
||||
|
||||
// PTE Control Bits
|
||||
logic Dirty, Accessed, Global, User,
|
||||
Executable, Writable, Readable, Valid;
|
||||
logic Dirty, Accessed, Global, User,
|
||||
Executable, Writable, Readable, Valid;
|
||||
// PTE descriptions
|
||||
logic ValidPTE, AccessAlert, MegapageMisaligned, BadMegapage, LeafPTE;
|
||||
logic ValidPTE, AccessAlert, MegapageMisaligned, BadMegapage, LeafPTE;
|
||||
|
||||
// Outputs of walker
|
||||
logic [`XLEN-1:0] PageTableEntry;
|
||||
logic [1:0] PageType;
|
||||
logic [`XLEN-1:0] PageTableEntry;
|
||||
logic [1:0] PageType;
|
||||
logic StartWalk;
|
||||
logic EndWalk;
|
||||
|
||||
typedef enum {LEVEL0_WDV,
|
||||
LEVEL0,
|
||||
LEVEL1_WDV,
|
||||
LEVEL1,
|
||||
LEVEL2_WDV,
|
||||
LEVEL2,
|
||||
LEVEL3_WDV,
|
||||
LEVEL3,
|
||||
LEAF,
|
||||
IDLE,
|
||||
FAULT} statetype;
|
||||
|
||||
statetype WalkerState, NextWalkerState;
|
||||
|
||||
logic PRegEn;
|
||||
|
||||
assign SvMode = SATP_REGW[`XLEN-1:`XLEN-`SVMODE_BITS];
|
||||
|
||||
assign BasePageTablePPN = SATP_REGW[`PPN_BITS-1:0];
|
||||
@ -92,8 +123,41 @@ module pagetablewalker (
|
||||
assign MemStore = MemRWM[0];
|
||||
|
||||
// Prefer data address translations over instruction address translations
|
||||
assign TranslationVAdr = (DTLBMissM) ? MemAdrM : PCF;
|
||||
assign MMUTranslate = DTLBMissM || ITLBMissF;
|
||||
assign TranslationVAdr = (DTLBMissM) ? MemAdrM : PCF; // *** need to register TranslationVAdr
|
||||
flopenr #(`XLEN)
|
||||
TranslationVAdrReg(.clk(clk),
|
||||
.reset(reset),
|
||||
.en(StartWalk), // *** use enable later to save power
|
||||
.d(TranslationVAdr),
|
||||
.q(TranslationVAdrQ));
|
||||
|
||||
flopenrc #(1)
|
||||
DTLBMissMReg(.clk(clk),
|
||||
.reset(reset),
|
||||
.en(StartWalk | EndWalk),
|
||||
.clear(EndWalk),
|
||||
.d(DTLBMissM),
|
||||
.q(DTLBMissMQ));
|
||||
|
||||
flopenrc #(1)
|
||||
ITLBMissMReg(.clk(clk),
|
||||
.reset(reset),
|
||||
.en(StartWalk | EndWalk),
|
||||
.clear(EndWalk),
|
||||
.d(ITLBMissF),
|
||||
.q(ITLBMissFQ));
|
||||
|
||||
|
||||
assign StartWalk = WalkerState == IDLE && (DTLBMissM | ITLBMissF);
|
||||
assign EndWalk = WalkerState == LEAF ||
|
||||
//(WalkerState == LEVEL0 && ValidPTE && LeafPTE && ~AccessAlert) ||
|
||||
(WalkerState == LEVEL1 && ValidPTE && LeafPTE && ~AccessAlert) ||
|
||||
(WalkerState == LEVEL2 && ValidPTE && LeafPTE && ~AccessAlert) ||
|
||||
(WalkerState == LEVEL3 && ValidPTE && LeafPTE && ~AccessAlert) ||
|
||||
(WalkerState == FAULT);
|
||||
|
||||
assign MMUTranslate = (DTLBMissMQ | ITLBMissFQ) & ~EndWalk;
|
||||
//assign MMUTranslate = DTLBMissM | ITLBMissF;
|
||||
|
||||
// unswizzle PTE bits
|
||||
assign {Dirty, Accessed, Global, User,
|
||||
@ -102,7 +166,7 @@ module pagetablewalker (
|
||||
// Assign PTE descriptors common across all XLEN values
|
||||
assign LeafPTE = Executable | Writable | Readable;
|
||||
assign ValidPTE = Valid && ~(Writable && ~Readable);
|
||||
assign AccessAlert = ~Accessed || (MemStore && ~Dirty);
|
||||
assign AccessAlert = ~Accessed | (MemStore & ~Dirty);
|
||||
|
||||
// Assign specific outputs to general outputs
|
||||
assign PageTableEntryF = PageTableEntry;
|
||||
@ -110,43 +174,113 @@ module pagetablewalker (
|
||||
assign PageTypeF = PageType;
|
||||
assign PageTypeM = PageType;
|
||||
|
||||
localparam LEVEL0 = 3'h0;
|
||||
localparam LEVEL1 = 3'h1;
|
||||
// space left for more levels
|
||||
localparam LEAF = 3'h5;
|
||||
localparam IDLE = 3'h6;
|
||||
localparam FAULT = 3'h7;
|
||||
|
||||
logic [2:0] WalkerState, NextWalkerState;
|
||||
|
||||
generate
|
||||
if (`XLEN == 32) begin
|
||||
logic [9:0] VPN1, VPN0;
|
||||
|
||||
flopenl #(3) mmureg(HCLK, ~HRESETn, 1'b1, NextWalkerState, IDLE, WalkerState);
|
||||
flopenl #(.TYPE(statetype)) mmureg(clk, reset, 1'b1, NextWalkerState, IDLE, WalkerState);
|
||||
|
||||
/* -----\/----- EXCLUDED -----\/-----
|
||||
assign PRegEn = (WalkerState == LEVEL1_WDV || WalkerState == LEVEL0_WDV) && ~HPTWStall;
|
||||
-----/\----- EXCLUDED -----/\----- */
|
||||
|
||||
// State transition logic
|
||||
always_comb begin
|
||||
PRegEn = 1'b0;
|
||||
TranslationPAdr = '0;
|
||||
HPTWRead = 1'b0;
|
||||
MMUStall = 1'b1;
|
||||
PageTableEntry = '0;
|
||||
PageType = '0;
|
||||
DTLBWriteM = '0;
|
||||
ITLBWriteF = '0;
|
||||
|
||||
WalkerInstrPageFaultF = 1'b0;
|
||||
WalkerLoadPageFaultM = 1'b0;
|
||||
WalkerStorePageFaultM = 1'b0;
|
||||
|
||||
case (WalkerState)
|
||||
IDLE: if (MMUTranslate) NextWalkerState = LEVEL1;
|
||||
else NextWalkerState = IDLE;
|
||||
LEVEL1: if (~MMUReady) NextWalkerState = LEVEL1;
|
||||
// *** <FUTURE WORK> According to the architecture, we should
|
||||
// fault upon finding a superpage that is misaligned or has 0
|
||||
// access bit. The following commented line of code is
|
||||
// supposed to perform that check. However, it is untested.
|
||||
else if (ValidPTE && LeafPTE && ~BadMegapage) NextWalkerState = LEAF;
|
||||
// else if (ValidPTE && LeafPTE) NextWalkerState = LEAF; // *** Once the above line is properly tested, delete this line.
|
||||
else if (ValidPTE && ~LeafPTE) NextWalkerState = LEVEL0;
|
||||
else NextWalkerState = FAULT;
|
||||
LEVEL0: if (~MMUReady) NextWalkerState = LEVEL0;
|
||||
else if (ValidPTE && LeafPTE && ~AccessAlert)
|
||||
NextWalkerState = LEAF;
|
||||
else NextWalkerState = FAULT;
|
||||
LEAF: if (MMUTranslate) NextWalkerState = LEVEL1;
|
||||
else NextWalkerState = IDLE;
|
||||
FAULT: if (MMUTranslate) NextWalkerState = LEVEL1;
|
||||
else NextWalkerState = IDLE;
|
||||
IDLE: begin
|
||||
if (MMUTranslate && SvMode == `SV32) begin // *** Added SvMode
|
||||
NextWalkerState = LEVEL1_WDV;
|
||||
TranslationPAdr = {BasePageTablePPN, VPN1, 2'b00};
|
||||
HPTWRead = 1'b1;
|
||||
end else begin
|
||||
NextWalkerState = IDLE;
|
||||
TranslationPAdr = '0;
|
||||
MMUStall = 1'b0;
|
||||
end
|
||||
end
|
||||
|
||||
LEVEL1_WDV: begin
|
||||
TranslationPAdr = {BasePageTablePPN, VPN1, 2'b00};
|
||||
if (HPTWStall) begin
|
||||
NextWalkerState = LEVEL1_WDV;
|
||||
end else begin
|
||||
NextWalkerState = LEVEL1;
|
||||
PRegEn = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
LEVEL1: begin
|
||||
// *** <FUTURE WORK> According to the architecture, we should
|
||||
// fault upon finding a superpage that is misaligned or has 0
|
||||
// access bit. The following commented line of code is
|
||||
// supposed to perform that check. However, it is untested.
|
||||
if (ValidPTE && LeafPTE && ~BadMegapage) begin
|
||||
NextWalkerState = LEAF;
|
||||
PageTableEntry = CurrentPTE;
|
||||
PageType = (WalkerState == LEVEL1) ? 2'b01 : 2'b00; // *** not sure about this mux?
|
||||
DTLBWriteM = DTLBMissMQ;
|
||||
ITLBWriteF = ~DTLBMissMQ; // Prefer data over instructions
|
||||
TranslationPAdr = {2'b00, TranslationVAdrQ[31:0]};
|
||||
end
|
||||
// else if (ValidPTE && LeafPTE) NextWalkerState = LEAF; // *** Once the above line is properly tested, delete this line.
|
||||
else if (ValidPTE && ~LeafPTE) begin
|
||||
NextWalkerState = LEVEL0_WDV;
|
||||
TranslationPAdr = {CurrentPPN, VPN0, 2'b00};
|
||||
HPTWRead = 1'b1;
|
||||
end else begin
|
||||
NextWalkerState = FAULT;
|
||||
end
|
||||
end
|
||||
|
||||
LEVEL0_WDV: begin
|
||||
TranslationPAdr = {CurrentPPN, VPN0, 2'b00};
|
||||
if (HPTWStall) begin
|
||||
NextWalkerState = LEVEL0_WDV;
|
||||
end else begin
|
||||
NextWalkerState = LEVEL0;
|
||||
PRegEn = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
LEVEL0: begin
|
||||
if (ValidPTE & LeafPTE & ~AccessAlert) begin
|
||||
NextWalkerState = LEAF;
|
||||
PageTableEntry = CurrentPTE;
|
||||
PageType = (WalkerState == LEVEL1) ? 2'b01 : 2'b00;
|
||||
DTLBWriteM = DTLBMissMQ;
|
||||
ITLBWriteF = ~DTLBMissMQ; // Prefer data over instructions
|
||||
TranslationPAdr = {2'b00, TranslationVAdrQ[31:0]};
|
||||
end else begin
|
||||
NextWalkerState = FAULT;
|
||||
end
|
||||
end
|
||||
|
||||
LEAF: begin
|
||||
NextWalkerState = IDLE;
|
||||
MMUStall = 1'b0;
|
||||
end
|
||||
FAULT: begin
|
||||
NextWalkerState = IDLE;
|
||||
WalkerInstrPageFaultF = ~DTLBMissMQ;
|
||||
WalkerLoadPageFaultM = DTLBMissMQ && ~MemStore;
|
||||
WalkerStorePageFaultM = DTLBMissMQ && MemStore;
|
||||
MMUStall = 1'b0;
|
||||
end
|
||||
|
||||
// Default case should never happen, but is included for linter.
|
||||
default: NextWalkerState = IDLE;
|
||||
endcase
|
||||
@ -156,56 +290,23 @@ localparam LEVEL0 = 3'h0;
|
||||
assign MegapageMisaligned = |(CurrentPPN[9:0]);
|
||||
assign BadMegapage = MegapageMisaligned || AccessAlert; // *** Implement better access/dirty scheme
|
||||
|
||||
assign VPN1 = TranslationVAdr[31:22];
|
||||
assign VPN0 = TranslationVAdr[21:12];
|
||||
assign VPN1 = TranslationVAdrQ[31:22];
|
||||
assign VPN0 = TranslationVAdrQ[21:12];
|
||||
|
||||
// Assign combinational outputs
|
||||
always_comb begin
|
||||
// default values
|
||||
TranslationPAdr = '0;
|
||||
PageTableEntry = '0;
|
||||
PageType ='0;
|
||||
DTLBWriteM = '0;
|
||||
ITLBWriteF = '0;
|
||||
WalkerInstrPageFaultF = '0;
|
||||
WalkerLoadPageFaultM = '0;
|
||||
WalkerStorePageFaultM = '0;
|
||||
MMUStall = '1;
|
||||
//assign HPTWRead = (WalkerState == IDLE && MMUTranslate) ||
|
||||
// WalkerState == LEVEL2 || WalkerState == LEVEL1;
|
||||
|
||||
|
||||
case (NextWalkerState)
|
||||
IDLE: begin
|
||||
MMUStall = '0;
|
||||
end
|
||||
LEVEL1: begin
|
||||
TranslationPAdr = {BasePageTablePPN, VPN1, 2'b00};
|
||||
end
|
||||
LEVEL0: begin
|
||||
TranslationPAdr = {CurrentPPN, VPN0, 2'b00};
|
||||
end
|
||||
LEAF: begin
|
||||
// Keep physical address alive to prevent HADDR dropping to 0
|
||||
TranslationPAdr = {CurrentPPN, VPN0, 2'b00};
|
||||
PageTableEntry = CurrentPTE;
|
||||
PageType = (WalkerState == LEVEL1) ? 2'b01 : 2'b00;
|
||||
DTLBWriteM = DTLBMissM;
|
||||
ITLBWriteF = ~DTLBMissM; // Prefer data over instructions
|
||||
end
|
||||
FAULT: begin
|
||||
TranslationPAdr = {CurrentPPN, VPN0, 2'b00};
|
||||
WalkerInstrPageFaultF = ~DTLBMissM;
|
||||
WalkerLoadPageFaultM = DTLBMissM && ~MemStore;
|
||||
WalkerStorePageFaultM = DTLBMissM && MemStore;
|
||||
MMUStall = '0; // Drop the stall early to enter trap handling code
|
||||
end
|
||||
default: begin
|
||||
// nothing
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
// Capture page table entry from ahblite
|
||||
flopenr #(32) ptereg(HCLK, ~HRESETn, MMUReady, MMUReadPTE, SavedPTE);
|
||||
mux2 #(32) ptemux(SavedPTE, MMUReadPTE, MMUReady, CurrentPTE);
|
||||
// Capture page table entry from data cache
|
||||
// *** may need to delay reading this value until the next clock cycle.
|
||||
// The clk to q latency of the SRAM in the data cache will be long.
|
||||
// I cannot see directly using this value. This is no different than
|
||||
// a load delay hazard. This will require rewriting the walker fsm.
|
||||
// also need a new signal to save. Should be a mealy output of the fsm
|
||||
// request followed by ~stall.
|
||||
flopenr #(32) ptereg(clk, reset, PRegEn, MMUReadPTE, SavedPTE);
|
||||
//mux2 #(32) ptemux(SavedPTE, MMUReadPTE, PRegEn, CurrentPTE);
|
||||
assign CurrentPTE = SavedPTE;
|
||||
assign CurrentPPN = CurrentPTE[`PPN_BITS+9:10];
|
||||
|
||||
// Assign outputs to ahblite
|
||||
@ -214,64 +315,206 @@ localparam LEVEL0 = 3'h0;
|
||||
assign MMUPAdr = TranslationPAdr[31:0];
|
||||
|
||||
end else begin
|
||||
localparam LEVEL2 = 3'h2;
|
||||
localparam LEVEL3 = 3'h3;
|
||||
|
||||
logic [8:0] VPN3, VPN2, VPN1, VPN0;
|
||||
|
||||
logic TerapageMisaligned, GigapageMisaligned, BadTerapage, BadGigapage;
|
||||
logic TerapageMisaligned, GigapageMisaligned, BadTerapage, BadGigapage;
|
||||
|
||||
flopenl #(3) mmureg(HCLK, ~HRESETn, 1'b1, NextWalkerState, IDLE, WalkerState);
|
||||
flopenl #(.TYPE(statetype)) mmureg(clk, reset, 1'b1, NextWalkerState, IDLE, WalkerState);
|
||||
|
||||
/* -----\/----- EXCLUDED -----\/-----
|
||||
assign PRegEn = (WalkerState == LEVEL1_WDV || WalkerState == LEVEL0_WDV ||
|
||||
WalkerState == LEVEL2_WDV || WalkerState == LEVEL3_WDV) && ~HPTWStall;
|
||||
-----/\----- EXCLUDED -----/\----- */
|
||||
|
||||
//assign HPTWRead = (WalkerState == IDLE && MMUTranslate) || WalkerState == LEVEL3 ||
|
||||
// WalkerState == LEVEL2 || WalkerState == LEVEL1;
|
||||
|
||||
|
||||
always_comb begin
|
||||
PRegEn = 1'b0;
|
||||
TranslationPAdr = '0;
|
||||
HPTWRead = 1'b0;
|
||||
MMUStall = 1'b1;
|
||||
PageTableEntry = '0;
|
||||
PageType = '0;
|
||||
DTLBWriteM = '0;
|
||||
ITLBWriteF = '0;
|
||||
|
||||
WalkerInstrPageFaultF = 1'b0;
|
||||
WalkerLoadPageFaultM = 1'b0;
|
||||
WalkerStorePageFaultM = 1'b0;
|
||||
|
||||
case (WalkerState)
|
||||
IDLE: if (MMUTranslate && SvMode == `SV48) NextWalkerState = LEVEL3;
|
||||
else if (MMUTranslate && SvMode == `SV39) NextWalkerState = LEVEL2;
|
||||
else NextWalkerState = IDLE;
|
||||
IDLE: begin
|
||||
if (MMUTranslate && SvMode == `SV48) begin
|
||||
NextWalkerState = LEVEL3_WDV;
|
||||
TranslationPAdr = {BasePageTablePPN, VPN3, 3'b000};
|
||||
HPTWRead = 1'b1;
|
||||
end else if (MMUTranslate && SvMode == `SV39) begin
|
||||
NextWalkerState = LEVEL2_WDV;
|
||||
TranslationPAdr = {BasePageTablePPN, VPN2, 3'b000};
|
||||
HPTWRead = 1'b1;
|
||||
end else begin
|
||||
NextWalkerState = IDLE;
|
||||
TranslationPAdr = '0;
|
||||
MMUStall = 1'b0;
|
||||
end
|
||||
end
|
||||
|
||||
LEVEL3: if (~MMUReady) NextWalkerState = LEVEL3;
|
||||
// *** <FUTURE WORK> According to the architecture, we should
|
||||
// fault upon finding a superpage that is misaligned or has 0
|
||||
// access bit. The following commented line of code is
|
||||
// supposed to perform that check. However, it is untested.
|
||||
else if (ValidPTE && LeafPTE && ~BadTerapage) NextWalkerState = LEAF;
|
||||
// else if (ValidPTE && LeafPTE) NextWalkerState = LEAF; // *** Once the above line is properly tested, delete this line.
|
||||
else if (ValidPTE && ~LeafPTE) NextWalkerState = LEVEL2;
|
||||
else NextWalkerState = FAULT;
|
||||
LEVEL3_WDV: begin
|
||||
TranslationPAdr = {BasePageTablePPN, VPN3, 3'b000};
|
||||
if (HPTWStall) begin
|
||||
NextWalkerState = LEVEL3_WDV;
|
||||
end else begin
|
||||
NextWalkerState = LEVEL3;
|
||||
PRegEn = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
LEVEL3: begin
|
||||
// *** <FUTURE WORK> According to the architecture, we should
|
||||
// fault upon finding a superpage that is misaligned or has 0
|
||||
// access bit. The following commented line of code is
|
||||
// supposed to perform that check. However, it is untested.
|
||||
if (ValidPTE && LeafPTE && ~BadTerapage) begin
|
||||
NextWalkerState = LEAF;
|
||||
PageTableEntry = CurrentPTE;
|
||||
PageType = (WalkerState == LEVEL3) ? 2'b11 : // *** not sure about this mux?
|
||||
((WalkerState == LEVEL2) ? 2'b10 :
|
||||
((WalkerState == LEVEL1) ? 2'b01 : 2'b00));
|
||||
DTLBWriteM = DTLBMissMQ;
|
||||
ITLBWriteF = ~DTLBMissMQ; // Prefer data over instructions
|
||||
TranslationPAdr = TranslationVAdrQ[`PA_BITS-1:0];
|
||||
end
|
||||
// else if (ValidPTE && LeafPTE) NextWalkerState = LEAF; // *** Once the above line is properly tested, delete this line.
|
||||
else if (ValidPTE && ~LeafPTE) begin
|
||||
NextWalkerState = LEVEL2_WDV;
|
||||
TranslationPAdr = {(SvMode == `SV48) ? CurrentPPN : BasePageTablePPN, VPN2, 3'b000};
|
||||
HPTWRead = 1'b1;
|
||||
end else begin
|
||||
NextWalkerState = FAULT;
|
||||
end
|
||||
|
||||
LEVEL2: if (~MMUReady) NextWalkerState = LEVEL2;
|
||||
// *** <FUTURE WORK> According to the architecture, we should
|
||||
// fault upon finding a superpage that is misaligned or has 0
|
||||
// access bit. The following commented line of code is
|
||||
// supposed to perform that check. However, it is untested.
|
||||
else if (ValidPTE && LeafPTE && ~BadGigapage) NextWalkerState = LEAF;
|
||||
// else if (ValidPTE && LeafPTE) NextWalkerState = LEAF; // *** Once the above line is properly tested, delete this line.
|
||||
else if (ValidPTE && ~LeafPTE) NextWalkerState = LEVEL1;
|
||||
else NextWalkerState = FAULT;
|
||||
end
|
||||
|
||||
LEVEL1: if (~MMUReady) NextWalkerState = LEVEL1;
|
||||
// *** <FUTURE WORK> According to the architecture, we should
|
||||
// fault upon finding a superpage that is misaligned or has 0
|
||||
// access bit. The following commented line of code is
|
||||
// supposed to perform that check. However, it is untested.
|
||||
else if (ValidPTE && LeafPTE && ~BadMegapage) NextWalkerState = LEAF;
|
||||
// else if (ValidPTE && LeafPTE) NextWalkerState = LEAF; // *** Once the above line is properly tested, delete this line.
|
||||
else if (ValidPTE && ~LeafPTE) NextWalkerState = LEVEL0;
|
||||
else NextWalkerState = FAULT;
|
||||
LEVEL2_WDV: begin
|
||||
TranslationPAdr = {(SvMode == `SV48) ? CurrentPPN : BasePageTablePPN, VPN2, 3'b000};
|
||||
//HPTWRead = 1'b1;
|
||||
if (HPTWStall) begin
|
||||
NextWalkerState = LEVEL2_WDV;
|
||||
end else begin
|
||||
NextWalkerState = LEVEL2;
|
||||
PRegEn = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
LEVEL2: begin
|
||||
// *** <FUTURE WORK> According to the architecture, we should
|
||||
// fault upon finding a superpage that is misaligned or has 0
|
||||
// access bit. The following commented line of code is
|
||||
// supposed to perform that check. However, it is untested.
|
||||
if (ValidPTE && LeafPTE && ~BadGigapage) begin
|
||||
NextWalkerState = LEAF;
|
||||
PageTableEntry = CurrentPTE;
|
||||
PageType = (WalkerState == LEVEL3) ? 2'b11 :
|
||||
((WalkerState == LEVEL2) ? 2'b10 :
|
||||
((WalkerState == LEVEL1) ? 2'b01 : 2'b00));
|
||||
DTLBWriteM = DTLBMissMQ;
|
||||
ITLBWriteF = ~DTLBMissMQ; // Prefer data over instructions
|
||||
TranslationPAdr = TranslationVAdrQ[`PA_BITS-1:0];
|
||||
end
|
||||
// else if (ValidPTE && LeafPTE) NextWalkerState = LEAF; // *** Once the above line is properly tested, delete this line.
|
||||
else if (ValidPTE && ~LeafPTE) begin
|
||||
NextWalkerState = LEVEL1_WDV;
|
||||
TranslationPAdr = {CurrentPPN, VPN1, 3'b000};
|
||||
HPTWRead = 1'b1;
|
||||
end else begin
|
||||
NextWalkerState = FAULT;
|
||||
end
|
||||
|
||||
LEVEL0: if (~MMUReady) NextWalkerState = LEVEL0;
|
||||
else if (ValidPTE && LeafPTE && ~AccessAlert) NextWalkerState = LEAF;
|
||||
else NextWalkerState = FAULT;
|
||||
|
||||
LEAF: if (MMUTranslate && SvMode == `SV48) NextWalkerState = LEVEL3;
|
||||
else if (MMUTranslate && SvMode == `SV39) NextWalkerState = LEVEL2;
|
||||
else NextWalkerState = IDLE;
|
||||
end
|
||||
|
||||
LEVEL1_WDV: begin
|
||||
TranslationPAdr = {CurrentPPN, VPN1, 3'b000};
|
||||
//HPTWRead = 1'b1;
|
||||
if (HPTWStall) begin
|
||||
NextWalkerState = LEVEL1_WDV;
|
||||
end else begin
|
||||
NextWalkerState = LEVEL1;
|
||||
PRegEn = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
LEVEL1: begin
|
||||
// *** <FUTURE WORK> According to the architecture, we should
|
||||
// fault upon finding a superpage that is misaligned or has 0
|
||||
// access bit. The following commented line of code is
|
||||
// supposed to perform that check. However, it is untested.
|
||||
if (ValidPTE && LeafPTE && ~BadMegapage) begin
|
||||
NextWalkerState = LEAF;
|
||||
PageTableEntry = CurrentPTE;
|
||||
PageType = (WalkerState == LEVEL3) ? 2'b11 :
|
||||
((WalkerState == LEVEL2) ? 2'b10 :
|
||||
((WalkerState == LEVEL1) ? 2'b01 : 2'b00));
|
||||
DTLBWriteM = DTLBMissMQ;
|
||||
ITLBWriteF = ~DTLBMissMQ; // Prefer data over instructions
|
||||
TranslationPAdr = TranslationVAdrQ[`PA_BITS-1:0];
|
||||
|
||||
end
|
||||
// else if (ValidPTE && LeafPTE) NextWalkerState = LEAF; // *** Once the above line is properly tested, delete this line.
|
||||
else if (ValidPTE && ~LeafPTE) begin
|
||||
NextWalkerState = LEVEL0_WDV;
|
||||
TranslationPAdr = {CurrentPPN, VPN0, 3'b000};
|
||||
HPTWRead = 1'b1;
|
||||
end else begin
|
||||
NextWalkerState = FAULT;
|
||||
end
|
||||
end
|
||||
|
||||
LEVEL0_WDV: begin
|
||||
TranslationPAdr = {CurrentPPN, VPN0, 3'b000};
|
||||
if (HPTWStall) begin
|
||||
NextWalkerState = LEVEL0_WDV;
|
||||
end else begin
|
||||
NextWalkerState = LEVEL0;
|
||||
PRegEn = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
LEVEL0: begin
|
||||
if (ValidPTE && LeafPTE && ~AccessAlert) begin
|
||||
NextWalkerState = LEAF;
|
||||
PageTableEntry = CurrentPTE;
|
||||
PageType = (WalkerState == LEVEL3) ? 2'b11 :
|
||||
((WalkerState == LEVEL2) ? 2'b10 :
|
||||
((WalkerState == LEVEL1) ? 2'b01 : 2'b00));
|
||||
DTLBWriteM = DTLBMissMQ;
|
||||
ITLBWriteF = ~DTLBMissMQ; // Prefer data over instructions
|
||||
TranslationPAdr = TranslationVAdrQ[`PA_BITS-1:0];
|
||||
end else begin
|
||||
NextWalkerState = FAULT;
|
||||
end
|
||||
end
|
||||
|
||||
LEAF: begin
|
||||
NextWalkerState = IDLE;
|
||||
MMUStall = 1'b0;
|
||||
end
|
||||
|
||||
FAULT: begin
|
||||
NextWalkerState = IDLE;
|
||||
WalkerInstrPageFaultF = ~DTLBMissMQ;
|
||||
WalkerLoadPageFaultM = DTLBMissMQ && ~MemStore;
|
||||
WalkerStorePageFaultM = DTLBMissMQ && MemStore;
|
||||
MMUStall = 1'b0;
|
||||
end
|
||||
|
||||
// Default case should never happen
|
||||
default: begin
|
||||
NextWalkerState = IDLE;
|
||||
end
|
||||
|
||||
FAULT: if (MMUTranslate && SvMode == `SV48) NextWalkerState = LEVEL3;
|
||||
else if (MMUTranslate && SvMode == `SV39) NextWalkerState = LEVEL2;
|
||||
else NextWalkerState = IDLE;
|
||||
// Default case should never happen, but is included for linter.
|
||||
default: NextWalkerState = IDLE;
|
||||
endcase
|
||||
end
|
||||
|
||||
@ -288,76 +531,22 @@ localparam LEVEL0 = 3'h0;
|
||||
assign BadGigapage = GigapageMisaligned || AccessAlert; // *** Implement better access/dirty scheme
|
||||
assign BadMegapage = MegapageMisaligned || AccessAlert; // *** Implement better access/dirty scheme
|
||||
|
||||
assign VPN3 = TranslationVAdr[47:39];
|
||||
assign VPN2 = TranslationVAdr[38:30];
|
||||
assign VPN1 = TranslationVAdr[29:21];
|
||||
assign VPN0 = TranslationVAdr[20:12];
|
||||
assign VPN3 = TranslationVAdrQ[47:39];
|
||||
assign VPN2 = TranslationVAdrQ[38:30];
|
||||
assign VPN1 = TranslationVAdrQ[29:21];
|
||||
assign VPN0 = TranslationVAdrQ[20:12];
|
||||
|
||||
always_comb begin
|
||||
// default values
|
||||
TranslationPAdr = '0;
|
||||
PageTableEntry = '0;
|
||||
PageType = '0;
|
||||
DTLBWriteM = '0;
|
||||
ITLBWriteF = '0;
|
||||
WalkerInstrPageFaultF = '0;
|
||||
WalkerLoadPageFaultM = '0;
|
||||
WalkerStorePageFaultM = '0;
|
||||
|
||||
// The MMU defaults to stalling the processor
|
||||
MMUStall = '1;
|
||||
|
||||
case (NextWalkerState)
|
||||
IDLE: begin
|
||||
MMUStall = '0;
|
||||
end
|
||||
LEVEL3: begin
|
||||
TranslationPAdr = {BasePageTablePPN, VPN3, 3'b000};
|
||||
// *** this is a huge breaking point. if we're going through level3 every time, even when sv48 is off,
|
||||
// what should translationPAdr be when level3 is just off?
|
||||
end
|
||||
LEVEL2: begin
|
||||
TranslationPAdr = {(SvMode == `SV48) ? CurrentPPN : BasePageTablePPN, VPN2, 3'b000};
|
||||
end
|
||||
LEVEL1: begin
|
||||
TranslationPAdr = {CurrentPPN, VPN1, 3'b000};
|
||||
end
|
||||
LEVEL0: begin
|
||||
TranslationPAdr = {CurrentPPN, VPN0, 3'b000};
|
||||
end
|
||||
LEAF: begin
|
||||
// Keep physical address alive to prevent HADDR dropping to 0
|
||||
TranslationPAdr = {CurrentPPN, VPN0, 3'b000};
|
||||
PageTableEntry = CurrentPTE;
|
||||
PageType = (WalkerState == LEVEL3) ? 2'b11 :
|
||||
((WalkerState == LEVEL2) ? 2'b10 :
|
||||
((WalkerState == LEVEL1) ? 2'b01 : 2'b00));
|
||||
DTLBWriteM = DTLBMissM;
|
||||
ITLBWriteF = ~DTLBMissM; // Prefer data over instructions
|
||||
end
|
||||
FAULT: begin
|
||||
// Keep physical address alive to prevent HADDR dropping to 0
|
||||
TranslationPAdr = {CurrentPPN, VPN0, 3'b000};
|
||||
WalkerInstrPageFaultF = ~DTLBMissM;
|
||||
WalkerLoadPageFaultM = DTLBMissM && ~MemStore;
|
||||
WalkerStorePageFaultM = DTLBMissM && MemStore;
|
||||
MMUStall = '0; // Drop the stall early to enter trap handling code
|
||||
end
|
||||
default: begin
|
||||
// nothing
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
// Capture page table entry from ahblite
|
||||
flopenr #(`XLEN) ptereg(HCLK, ~HRESETn, MMUReady, MMUReadPTE, SavedPTE);
|
||||
mux2 #(`XLEN) ptemux(SavedPTE, MMUReadPTE, MMUReady, CurrentPTE);
|
||||
flopenr #(`XLEN) ptereg(clk, reset, PRegEn, MMUReadPTE, SavedPTE);
|
||||
//mux2 #(`XLEN) ptemux(SavedPTE, MMUReadPTE, PRegEn, CurrentPTE);
|
||||
assign CurrentPTE = SavedPTE;
|
||||
assign CurrentPPN = CurrentPTE[`PPN_BITS+9:10];
|
||||
|
||||
// Assign outputs to ahblite
|
||||
// *** Currently truncate address to 32 bits. This must be changed if
|
||||
// we support larger physical address spaces
|
||||
assign MMUPAdr = {{(`XLEN-32){1'b0}}, TranslationPAdr[31:0]};
|
||||
assign MMUPAdr = {{(`XLEN-`PA_BITS){1'b0}}, TranslationPAdr[`PA_BITS-1:0]};
|
||||
end
|
||||
endgenerate
|
||||
|
||||
|
@ -32,25 +32,20 @@ module pmachecker (
|
||||
|
||||
input logic [`PA_BITS-1:0] PhysicalAddress,
|
||||
input logic [1:0] Size,
|
||||
input logic [31:0] HADDR,
|
||||
input logic [2:0] HSIZE,
|
||||
// input logic [2:0] HBURST, // *** in AHBlite, HBURST is hardwired to zero for single bursts only allowed. consider removing from this module if unused.
|
||||
|
||||
input logic AtomicAccessM, ExecuteAccessF, WriteAccessM, ReadAccessM, // *** atomicaccessM is unused but might want to stay in for future use.
|
||||
|
||||
output logic Cacheable, Idempotent, AtomicAllowed,
|
||||
output logic PMASquashBusAccess,
|
||||
|
||||
output logic [5:0] HSELRegions,
|
||||
|
||||
output logic PMAInstrAccessFaultF,
|
||||
output logic PMALoadAccessFaultM,
|
||||
output logic PMAStoreAccessFaultM
|
||||
);
|
||||
|
||||
// logic BootTim, Tim, CLINT, GPIO, UART, PLIC;
|
||||
logic PMAAccessFault;
|
||||
logic AccessRW, AccessRWX, AccessRX;
|
||||
logic [5:0] SelRegions;
|
||||
|
||||
// Determine what type of access is being made
|
||||
assign AccessRW = ReadAccessM | WriteAccessM;
|
||||
@ -58,17 +53,18 @@ module pmachecker (
|
||||
assign AccessRX = ReadAccessM | ExecuteAccessF;
|
||||
|
||||
// Determine which region of physical memory (if any) is being accessed
|
||||
adrdecs adrdecs(HADDR, AccessRW, AccessRX, AccessRWX, HSIZE, HSELRegions);
|
||||
adrdecs adrdecs(PhysicalAddress, AccessRW, AccessRX, AccessRWX, Size, SelRegions);
|
||||
|
||||
// Only RAM memory regions are cacheable
|
||||
assign Cacheable = HSELRegions[5] | HSELRegions[4];
|
||||
assign Idempotent = HSELRegions[4];
|
||||
assign AtomicAllowed = HSELRegions[4];
|
||||
assign Cacheable = SelRegions[5] | SelRegions[4];
|
||||
assign Idempotent = SelRegions[4];
|
||||
assign AtomicAllowed = SelRegions[4];
|
||||
|
||||
// Detect access faults
|
||||
assign PMAAccessFault = (~|HSELRegions) && AccessRWX;
|
||||
assign PMAAccessFault = (~|SelRegions) & AccessRWX;
|
||||
assign PMAInstrAccessFaultF = ExecuteAccessF && PMAAccessFault;
|
||||
assign PMALoadAccessFaultM = ReadAccessM && PMAAccessFault;
|
||||
assign PMAStoreAccessFaultM = WriteAccessM && PMAAccessFault;
|
||||
assign PMASquashBusAccess = PMAAccessFault;
|
||||
endmodule
|
||||
|
||||
|
@ -30,35 +30,43 @@
|
||||
`include "wally-config.vh"
|
||||
|
||||
module pmpadrdec (
|
||||
input logic [31:0] HADDR, // *** replace with PAdr
|
||||
input logic [1:0] AdrMode,
|
||||
input logic [`XLEN-1:0] CurrentPMPAdr,
|
||||
input logic AdrAtLeastPreviousPMP,
|
||||
output logic AdrAtLeastCurrentPMP,
|
||||
output logic Match
|
||||
input logic [`PA_BITS-1:0] PhysicalAddress,
|
||||
input logic [7:0] PMPCfg,
|
||||
input logic [`XLEN-1:0] PMPAdr,
|
||||
input logic PAgePMPAdrIn,
|
||||
input logic NoLowerMatchIn,
|
||||
output logic PAgePMPAdrOut,
|
||||
output logic NoLowerMatchOut,
|
||||
output logic Match, Active,
|
||||
output logic L, X, W, R
|
||||
);
|
||||
|
||||
|
||||
localparam TOR = 2'b01;
|
||||
localparam NA4 = 2'b10;
|
||||
localparam NAPOT = 2'b11;
|
||||
|
||||
logic TORMatch, NAMatch;
|
||||
logic AdrBelowCurrentPMP;
|
||||
logic PAltPMPAdr;
|
||||
logic FirstMatch;
|
||||
logic [`PA_BITS-1:0] CurrentAdrFull;
|
||||
logic [`PA_BITS-1:0] FakePhysAdr;
|
||||
logic [1:0] AdrMode;
|
||||
|
||||
// ***replace this when the true physical address from MMU is available
|
||||
assign FakePhysAdr = {{(`PA_BITS-32){1'b0}}, HADDR};
|
||||
|
||||
assign AdrMode = PMPCfg[4:3];
|
||||
|
||||
// The two lsb of the physical address don't matter for this checking.
|
||||
// The following code includes them, but hardwires the PMP checker lsbs to 00
|
||||
// and masks them later. Logic synthesis should optimize away these bottom bits.
|
||||
|
||||
// Top-of-range (TOR)
|
||||
// Append two implicit trailing 0's to PMPAdr value
|
||||
assign CurrentAdrFull = {CurrentPMPAdr[`PA_BITS-3:0], 2'b00};
|
||||
assign AdrBelowCurrentPMP = /*HADDR */FakePhysAdr < CurrentAdrFull; // *** make sure unsigned comparison works correctly
|
||||
assign AdrAtLeastCurrentPMP = ~AdrBelowCurrentPMP;
|
||||
assign TORMatch = AdrAtLeastPreviousPMP && AdrBelowCurrentPMP;
|
||||
assign CurrentAdrFull = {PMPAdr[`PA_BITS-3:0], 2'b00};
|
||||
assign PAltPMPAdr = {1'b0, PhysicalAddress} < {1'b0, CurrentAdrFull}; // unsigned comparison
|
||||
assign PAgePMPAdrOut = ~PAltPMPAdr;
|
||||
assign TORMatch = PAgePMPAdrIn && PAltPMPAdr;
|
||||
|
||||
// Naturally aligned regions
|
||||
// *** should be able to optimize away bottom 2 bits
|
||||
|
||||
// verilator lint_off UNOPTFLAT
|
||||
logic [`PA_BITS-1:0] Mask;
|
||||
@ -69,69 +77,22 @@ module pmpadrdec (
|
||||
assign Mask[1:0] = 2'b11;
|
||||
assign Mask[2] = (AdrMode == NAPOT); // mask has 0s in upper bis for NA4 region
|
||||
for (i=3; i < `PA_BITS; i=i+1)
|
||||
assign Mask[i] = Mask[i-1] & CurrentPMPAdr[i-3]; // NAPOT mask: 1's indicate bits to ignore
|
||||
assign Mask[i] = Mask[i-1] & PMPAdr[i-3]; // NAPOT mask: 1's indicate bits to ignore
|
||||
endgenerate
|
||||
// verilator lint_on UNOPTFLAT
|
||||
|
||||
assign NAMatch = &((FakePhysAdr ~^ CurrentAdrFull) | Mask);
|
||||
|
||||
/* generate
|
||||
if (`XLEN == 32 || `XLEN == 64) begin // ***redo for various sizes
|
||||
// priority encoder to translate address to range
|
||||
// *** We'd like to replace this with a better priority encoder
|
||||
// *** We should not be truncating 64 bit physical addresses to 32 bits...
|
||||
// *** there is an easy combinatinoal way to do this with a cascade of AND gates O(32) rather than O(32^2) dh
|
||||
always_comb
|
||||
if (AdrMode == NA4) Range = (2**2) - 1;
|
||||
else casez (CurrentPMPAdr[31:0]) // NAPOT regions
|
||||
32'b???????????????????????????????0: Range = (2**3) - 1;
|
||||
32'b??????????????????????????????01: Range = (2**4) - 1;
|
||||
32'b?????????????????????????????011: Range = (2**5) - 1;
|
||||
32'b????????????????????????????0111: Range = (2**6) - 1;
|
||||
32'b???????????????????????????01111: Range = (2**7) - 1;
|
||||
32'b??????????????????????????011111: Range = (2**8) - 1;
|
||||
32'b?????????????????????????0111111: Range = (2**9) - 1;
|
||||
32'b????????????????????????01111111: Range = (2**10) - 1;
|
||||
32'b???????????????????????011111111: Range = (2**11) - 1;
|
||||
32'b??????????????????????0111111111: Range = (2**12) - 1;
|
||||
32'b?????????????????????01111111111: Range = (2**13) - 1;
|
||||
32'b????????????????????011111111111: Range = (2**14) - 1;
|
||||
32'b???????????????????0111111111111: Range = (2**15) - 1;
|
||||
32'b??????????????????01111111111111: Range = (2**16) - 1;
|
||||
32'b?????????????????011111111111111: Range = (2**17) - 1;
|
||||
32'b????????????????0111111111111111: Range = (2**18) - 1;
|
||||
32'b???????????????01111111111111111: Range = (2**19) - 1;
|
||||
32'b??????????????011111111111111111: Range = (2**20) - 1;
|
||||
32'b?????????????0111111111111111111: Range = (2**21) - 1;
|
||||
32'b????????????01111111111111111111: Range = (2**22) - 1;
|
||||
32'b???????????011111111111111111111: Range = (2**23) - 1;
|
||||
32'b??????????0111111111111111111111: Range = (2**24) - 1;
|
||||
32'b?????????01111111111111111111111: Range = (2**25) - 1;
|
||||
32'b????????011111111111111111111111: Range = (2**26) - 1;
|
||||
32'b???????0111111111111111111111111: Range = (2**27) - 1;
|
||||
32'b??????01111111111111111111111111: Range = (2**28) - 1;
|
||||
32'b?????011111111111111111111111111: Range = (2**29) - 1;
|
||||
32'b????0111111111111111111111111111: Range = (2**30) - 1;
|
||||
32'b???01111111111111111111111111111: Range = (2**31) - 1;
|
||||
32'b??011111111111111111111111111111: Range = (2**32) - 1;
|
||||
32'b?0111111111111111111111111111111: Range = (2**33) - 1;
|
||||
32'b01111111111111111111111111111111: Range = (2**34) - 1;
|
||||
32'b11111111111111111111111111111111: Range = (2**35) - 1;
|
||||
default: Range = '0;
|
||||
endcase
|
||||
end else begin
|
||||
assign Range = '0;
|
||||
end
|
||||
endgenerate
|
||||
|
||||
// *** Range should not be truncated... but our physical address space is
|
||||
// currently only 32 bits wide.
|
||||
// with a bit of combining of range selection, this could be shared with NA4Match ***
|
||||
assign NAMatch = &((HADDR ~^ CurrentAdrFull) | Range[31:0]);*/
|
||||
assign NAMatch = &((PhysicalAddress ~^ CurrentAdrFull) | Mask);
|
||||
|
||||
assign Match = (AdrMode == TOR) ? TORMatch :
|
||||
(AdrMode == NA4 || AdrMode == NAPOT) ? NAMatch :
|
||||
0;
|
||||
|
||||
endmodule
|
||||
assign FirstMatch = NoLowerMatchIn & Match;
|
||||
assign NoLowerMatchOut = NoLowerMatchIn & ~Match;
|
||||
assign L = PMPCfg[7] & FirstMatch;
|
||||
assign X = PMPCfg[2] & FirstMatch;
|
||||
assign W = PMPCfg[1] & FirstMatch;
|
||||
assign R = PMPCfg[0] & FirstMatch;
|
||||
assign Active = |PMPCfg[4:3];
|
||||
endmodule
|
||||
|
||||
|
@ -29,13 +29,8 @@
|
||||
`include "wally-config.vh"
|
||||
|
||||
module pmpchecker (
|
||||
// input logic clk, reset, //*** it seems like clk, reset is also not needed here?
|
||||
|
||||
input logic [31:0] HADDR,
|
||||
|
||||
input logic [1:0] PrivilegeModeW,
|
||||
|
||||
input logic [63:0] PMPCFG01_REGW, PMPCFG23_REGW,
|
||||
input logic [`PA_BITS-1:0] PhysicalAddress,
|
||||
input logic [1:0] PrivilegeModeW,
|
||||
|
||||
// *** ModelSim has a switch -svinputport which controls whether input ports
|
||||
// are nets (wires) or vars by default. The default setting of this switch is
|
||||
@ -44,10 +39,7 @@ module pmpchecker (
|
||||
// this will be understood as a var. However, if we don't supply the `var`
|
||||
// keyword, the compiler warns us that it's interpreting the signal as a var,
|
||||
// which we might not intend.
|
||||
// However, it's still bad form to pass 512 or 1024 signals across a module
|
||||
// boundary. It would be better to store the PMP address registers in a module
|
||||
// somewhere in the CSR hierarchy and do PMP checking _within_ that module, so
|
||||
// we don't have to pass around 16 whole registers.
|
||||
input var logic [63:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES/8-1:0],
|
||||
input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0],
|
||||
|
||||
input logic ExecuteAccessF, WriteAccessM, ReadAccessM,
|
||||
@ -59,98 +51,50 @@ module pmpchecker (
|
||||
output logic PMPStoreAccessFaultM
|
||||
);
|
||||
|
||||
// verilator lint_off UNOPTFLAT
|
||||
|
||||
// Bit i is high when the address falls in PMP region i
|
||||
logic [15:0] Regions;
|
||||
logic [3:0] MatchedRegion;
|
||||
logic Match, EnforcePMP;
|
||||
logic EnforcePMP;
|
||||
logic [7:0] PMPCFG [`PMP_ENTRIES-1:0];
|
||||
logic [`PMP_ENTRIES-1:0] Match; // PMP Entry matches
|
||||
logic [`PMP_ENTRIES-1:0] Active; // PMP register i is non-null
|
||||
logic [`PMP_ENTRIES-1:0] L, X, W, R; // PMP matches and has flag set
|
||||
logic [`PMP_ENTRIES:0] NoLowerMatch; // None of the lower PMP entries match
|
||||
logic [`PMP_ENTRIES:0] PAgePMPAdr; // for TOR PMP matching, PhysicalAddress > PMPAdr[i]
|
||||
genvar i,j;
|
||||
|
||||
logic [7:0] PMPCFG [15:0];
|
||||
assign PAgePMPAdr[0] = 1'b1;
|
||||
assign NoLowerMatch[0] = 1'b1;
|
||||
|
||||
generate
|
||||
// verilator lint_off WIDTH
|
||||
for (j=0; j<`PMP_ENTRIES; j = j+8)
|
||||
assign {PMPCFG[j+7], PMPCFG[j+6], PMPCFG[j+5], PMPCFG[j+4],
|
||||
PMPCFG[j+3], PMPCFG[j+2], PMPCFG[j+1], PMPCFG[j]} = PMPCFG_ARRAY_REGW[j/8];
|
||||
// verilator lint_on WIDTH
|
||||
for (i=0; i<`PMP_ENTRIES; i++)
|
||||
pmpadrdec pmpadrdec(.PhysicalAddress,
|
||||
.PMPCfg(PMPCFG[i]),
|
||||
.PMPAdr(PMPADDR_ARRAY_REGW[i]),
|
||||
.PAgePMPAdrIn(PAgePMPAdr[i]),
|
||||
.PAgePMPAdrOut(PAgePMPAdr[i+1]),
|
||||
.NoLowerMatchIn(NoLowerMatch[i]),
|
||||
.NoLowerMatchOut(NoLowerMatch[i+1]),
|
||||
.Match(Match[i]),
|
||||
.Active(Active[i]),
|
||||
.L(L[i]), .X(X[i]), .W(W[i]), .R(R[i])
|
||||
);
|
||||
|
||||
// Bit i is high when the address is greater than or equal to PMPADR[i]
|
||||
// Used for determining whether TOR PMP regions match
|
||||
logic [15:0] AboveRegion;
|
||||
|
||||
// Bit i is high if PMP register i is non-null
|
||||
logic [15:0] ActiveRegion;
|
||||
|
||||
logic L_Bit, X_Bit, W_Bit, R_Bit;
|
||||
logic InvalidExecute, InvalidWrite, InvalidRead;
|
||||
|
||||
// *** extend to optionally 64 configurations
|
||||
|
||||
assign {PMPCFG[15], PMPCFG[14], PMPCFG[13], PMPCFG[12],
|
||||
PMPCFG[11], PMPCFG[10], PMPCFG[9], PMPCFG[8]} = PMPCFG23_REGW;
|
||||
|
||||
assign {PMPCFG[7], PMPCFG[6], PMPCFG[5], PMPCFG[4],
|
||||
PMPCFG[3], PMPCFG[2], PMPCFG[1], PMPCFG[0]} = PMPCFG01_REGW;
|
||||
|
||||
pmpadrdec pmpadrdec(.HADDR(HADDR), .AdrMode(PMPCFG[0][4:3]),
|
||||
.CurrentPMPAdr(PMPADDR_ARRAY_REGW[0]),
|
||||
.AdrAtLeastPreviousPMP(1'b1),
|
||||
.AdrAtLeastCurrentPMP(AboveRegion[0]),
|
||||
.Match(Regions[0]));
|
||||
assign ActiveRegion[0] = |PMPCFG[0][4:3];
|
||||
|
||||
generate // *** only for PMP_ENTRIES > 0
|
||||
genvar i;
|
||||
for (i = 1; i < `PMP_ENTRIES; i++) begin
|
||||
pmpadrdec pmpadrdec(.HADDR(HADDR), .AdrMode(PMPCFG[i][4:3]),
|
||||
.CurrentPMPAdr(PMPADDR_ARRAY_REGW[i]),
|
||||
.AdrAtLeastPreviousPMP(AboveRegion[i-1]),
|
||||
.AdrAtLeastCurrentPMP(AboveRegion[i]),
|
||||
.Match(Regions[i]));
|
||||
|
||||
assign ActiveRegion[i] = |PMPCFG[i][4:3];
|
||||
end
|
||||
// verilator lint_on UNOPTFLAT
|
||||
endgenerate
|
||||
|
||||
assign Match = |Regions;
|
||||
// Only enforce PMP checking for S and U modes when at least one PMP is active or in Machine mode when L bit is set in selected region
|
||||
assign EnforcePMP = (PrivilegeModeW == `M_MODE) ? |L : |Active;
|
||||
|
||||
// Only enforce PMP checking for S and U modes when at least one PMP is active
|
||||
assign EnforcePMP = |ActiveRegion;
|
||||
assign PMPInstrAccessFaultF = EnforcePMP && ExecuteAccessF && ~|X;
|
||||
assign PMPStoreAccessFaultM = EnforcePMP && WriteAccessM && ~|W;
|
||||
assign PMPLoadAccessFaultM = EnforcePMP && ReadAccessM && ~|R;
|
||||
|
||||
// *** extend to up to 64, fold bit extraction to avoid need for binary encoding of region
|
||||
always_comb
|
||||
casez (Regions)
|
||||
16'b???????????????1: MatchedRegion = 0;
|
||||
16'b??????????????10: MatchedRegion = 1;
|
||||
16'b?????????????100: MatchedRegion = 2;
|
||||
16'b????????????1000: MatchedRegion = 3;
|
||||
16'b???????????10000: MatchedRegion = 4;
|
||||
16'b??????????100000: MatchedRegion = 5;
|
||||
16'b?????????1000000: MatchedRegion = 6;
|
||||
16'b????????10000000: MatchedRegion = 7;
|
||||
16'b???????100000000: MatchedRegion = 8;
|
||||
16'b??????1000000000: MatchedRegion = 9;
|
||||
16'b?????10000000000: MatchedRegion = 10;
|
||||
16'b????100000000000: MatchedRegion = 11;
|
||||
16'b???1000000000000: MatchedRegion = 12;
|
||||
16'b??10000000000000: MatchedRegion = 13;
|
||||
16'b?100000000000000: MatchedRegion = 14;
|
||||
16'b1000000000000000: MatchedRegion = 15;
|
||||
default: MatchedRegion = 0; // Should only occur if there is no match
|
||||
endcase
|
||||
|
||||
assign L_Bit = PMPCFG[MatchedRegion][7] && Match;
|
||||
assign X_Bit = PMPCFG[MatchedRegion][2] && Match;
|
||||
assign W_Bit = PMPCFG[MatchedRegion][1] && Match;
|
||||
assign R_Bit = PMPCFG[MatchedRegion][0] && Match;
|
||||
|
||||
assign InvalidExecute = ExecuteAccessF && ~X_Bit;
|
||||
assign InvalidWrite = WriteAccessM && ~W_Bit;
|
||||
assign InvalidRead = ReadAccessM && ~R_Bit;
|
||||
|
||||
// *** don't cause faults when there are no PMPs
|
||||
assign PMPInstrAccessFaultF = (PrivilegeModeW == `M_MODE) ?
|
||||
Match && L_Bit && InvalidExecute :
|
||||
EnforcePMP && InvalidExecute;
|
||||
assign PMPStoreAccessFaultM = (PrivilegeModeW == `M_MODE) ?
|
||||
Match && L_Bit && InvalidWrite :
|
||||
EnforcePMP && InvalidWrite;
|
||||
assign PMPLoadAccessFaultM = (PrivilegeModeW == `M_MODE) ?
|
||||
Match && L_Bit && InvalidRead :
|
||||
EnforcePMP && InvalidRead;
|
||||
|
||||
assign PMPSquashBusAccess = PMPInstrAccessFaultF || PMPLoadAccessFaultM || PMPStoreAccessFaultM;
|
||||
assign PMPSquashBusAccess = PMPInstrAccessFaultF | PMPLoadAccessFaultM | PMPStoreAccessFaultM;
|
||||
|
||||
endmodule
|
||||
|
@ -65,6 +65,7 @@ module tlb #(parameter ENTRY_BITS = 3,
|
||||
// x1 - TLB is accessed for a write
|
||||
// 11 - TLB is accessed for both read and write
|
||||
input logic [1:0] TLBAccessType,
|
||||
input logic DisableTranslation,
|
||||
|
||||
// Virtual address input
|
||||
input logic [`XLEN-1:0] VirtualAddress,
|
||||
@ -135,7 +136,7 @@ module tlb #(parameter ENTRY_BITS = 3,
|
||||
endgenerate
|
||||
|
||||
// Whether translation should occur
|
||||
assign Translate = (SvMode != `NO_TRANSLATE) & (PrivilegeModeW != `M_MODE);
|
||||
assign Translate = (SvMode != `NO_TRANSLATE) & (PrivilegeModeW != `M_MODE) & ~ DisableTranslation;
|
||||
|
||||
// Determine how the TLB is currently being used
|
||||
// Note that we use ReadAccess for both loads and instruction fetches
|
||||
|
@ -60,7 +60,7 @@ module csr #(parameter
|
||||
output logic STATUS_MIE, STATUS_SIE,
|
||||
output logic STATUS_MXR, STATUS_SUM,
|
||||
output logic STATUS_MPRV,
|
||||
output logic [63:0] PMPCFG01_REGW, PMPCFG23_REGW,
|
||||
output var logic [63:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES/8-1:0],
|
||||
output var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0],
|
||||
input logic [4:0] SetFflagsM,
|
||||
output logic [2:0] FRM_REGW,
|
||||
|
@ -48,25 +48,9 @@ module csrm #(parameter
|
||||
MTVAL = 12'h343,
|
||||
MIP = 12'h344,
|
||||
PMPCFG0 = 12'h3A0,
|
||||
PMPCFG1 = 12'h3A1,
|
||||
PMPCFG2 = 12'h3A2,
|
||||
PMPCFG3 = 12'h3A3,
|
||||
// .. up to 15 more at consecutive addresses
|
||||
PMPADDR0 = 12'h3B0,
|
||||
PMPADDR1 = 12'h3B1,
|
||||
PMPADDR2 = 12'h3B2,
|
||||
PMPADDR3 = 12'h3B3,
|
||||
PMPADDR4 = 12'h3B4,
|
||||
PMPADDR5 = 12'h3B5,
|
||||
PMPADDR6 = 12'h3B6,
|
||||
PMPADDR7 = 12'h3B7,
|
||||
PMPADDR8 = 12'h3B8,
|
||||
PMPADDR9 = 12'h3B9,
|
||||
PMPADDR10 = 12'h3BA,
|
||||
PMPADDR11 = 12'h3BB,
|
||||
PMPADDR12 = 12'h3BC,
|
||||
PMPADDR13 = 12'h3BD,
|
||||
PMPADDR14 = 12'h3BE,
|
||||
PMPADDR15 = 12'h3BF,
|
||||
// ... up to 63 more at consecutive addresses
|
||||
TSELECT = 12'h7A0,
|
||||
TDATA1 = 12'h7A1,
|
||||
TDATA2 = 12'h7A2,
|
||||
@ -90,7 +74,7 @@ module csrm #(parameter
|
||||
output logic [31:0] MCOUNTEREN_REGW, MCOUNTINHIBIT_REGW,
|
||||
output logic [`XLEN-1:0] MEDELEG_REGW, MIDELEG_REGW,
|
||||
// 64-bit registers in RV64, or two 32-bit registers in RV32
|
||||
output logic [63:0] PMPCFG01_REGW, PMPCFG23_REGW,
|
||||
output var logic [63:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES/8-1:0],
|
||||
output var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0],
|
||||
input logic [11:0] MIP_REGW, MIE_REGW,
|
||||
output logic WriteMSTATUSM,
|
||||
@ -103,8 +87,8 @@ module csrm #(parameter
|
||||
logic WriteMTVECM, WriteMEDELEGM, WriteMIDELEGM;
|
||||
logic WriteMSCRATCHM, WriteMEPCM, WriteMCAUSEM, WriteMTVALM;
|
||||
logic WriteMCOUNTERENM, WriteMCOUNTINHIBITM;
|
||||
logic WritePMPCFG0M, WritePMPCFG2M;
|
||||
logic WritePMPADDRM [15:0];
|
||||
logic [`PMP_ENTRIES/8-1:0] WritePMPCFGM, WritePMPCFGHM ;
|
||||
logic [`PMP_ENTRIES-1:0] WritePMPADDRM ;
|
||||
|
||||
localparam MISA_26 = (`MISA) & 32'h03ffffff;
|
||||
|
||||
@ -120,7 +104,7 @@ module csrm #(parameter
|
||||
assign WriteMEPCM = MTrapM | (CSRMWriteM && (CSRAdrM == MEPC)) && ~StallW;
|
||||
assign WriteMCAUSEM = MTrapM | (CSRMWriteM && (CSRAdrM == MCAUSE)) && ~StallW;
|
||||
assign WriteMTVALM = MTrapM | (CSRMWriteM && (CSRAdrM == MTVAL)) && ~StallW;
|
||||
assign WritePMPCFG0M = (CSRMWriteM && (CSRAdrM == PMPCFG0)) && ~StallW;
|
||||
/* assign WritePMPCFG0M = (CSRMWriteM && (CSRAdrM == PMPCFG0)) && ~StallW;
|
||||
assign WritePMPCFG2M = (CSRMWriteM && (CSRAdrM == PMPCFG2)) && ~StallW;
|
||||
assign WritePMPADDRM[0] = (CSRMWriteM && (CSRAdrM == PMPADDR0)) && ~StallW;
|
||||
assign WritePMPADDRM[1] = (CSRMWriteM && (CSRAdrM == PMPADDR1)) && ~StallW;
|
||||
@ -137,10 +121,13 @@ module csrm #(parameter
|
||||
assign WritePMPADDRM[12] = (CSRMWriteM && (CSRAdrM == PMPADDR12)) && ~StallW;
|
||||
assign WritePMPADDRM[13] = (CSRMWriteM && (CSRAdrM == PMPADDR13)) && ~StallW;
|
||||
assign WritePMPADDRM[14] = (CSRMWriteM && (CSRAdrM == PMPADDR14)) && ~StallW;
|
||||
assign WritePMPADDRM[15] = (CSRMWriteM && (CSRAdrM == PMPADDR15)) && ~StallW;
|
||||
assign WritePMPADDRM[15] = (CSRMWriteM && (CSRAdrM == PMPADDR15)) && ~StallW; */
|
||||
assign WriteMCOUNTERENM = CSRMWriteM && (CSRAdrM == MCOUNTEREN) && ~StallW;
|
||||
assign WriteMCOUNTINHIBITM = CSRMWriteM && (CSRAdrM == MCOUNTINHIBIT) && ~StallW;
|
||||
|
||||
|
||||
|
||||
|
||||
assign IllegalCSRMWriteReadonlyM = CSRMWriteM && (CSRAdrM == MVENDORID || CSRAdrM == MARCHID || CSRAdrM == MIMPID || CSRAdrM == MHARTID);
|
||||
|
||||
// CSRs
|
||||
@ -172,33 +159,39 @@ module csrm #(parameter
|
||||
flopenl #(32) MCOUNTINHIBITreg(clk, reset, WriteMCOUNTINHIBITM, CSRWriteValM[31:0], 32'hFFFFFFFF, MCOUNTINHIBIT_REGW);
|
||||
|
||||
// There are PMP_ENTRIES = 0, 16, or 64 PMPADDR registers, each of which has its own flop
|
||||
|
||||
// *** need to add support for locked PMPCFG and PMPADR
|
||||
genvar i;
|
||||
generate
|
||||
genvar i;
|
||||
for (i = 0; i < `PMP_ENTRIES; i++) begin: pmp_flop
|
||||
for(i=0; i<`PMP_ENTRIES; i++) begin
|
||||
assign WritePMPADDRM[i] = (CSRMWriteM && (CSRAdrM == PMPADDR0+i)) && ~StallW;
|
||||
flopenr #(`XLEN) PMPADDRreg(clk, reset, WritePMPADDRM[i], CSRWriteValM, PMPADDR_ARRAY_REGW[i]);
|
||||
end
|
||||
for (i=0; i<`PMP_ENTRIES/8; i++) begin
|
||||
if (`XLEN==64) begin
|
||||
assign WritePMPCFGM[i] = (CSRMWriteM && (CSRAdrM == PMPCFG0+2*i)) && ~StallW;
|
||||
flopenr #(`XLEN) PMPCFGreg(clk, reset, WritePMPCFGM[i], CSRWriteValM, PMPCFG_ARRAY_REGW[i]);
|
||||
end else begin
|
||||
assign WritePMPCFGM[i] = (CSRMWriteM && (CSRAdrM == PMPCFG0+2*i)) && ~StallW;
|
||||
assign WritePMPCFGHM[i] = (CSRMWriteM && (CSRAdrM == PMPCFG0+2*i+1)) && ~StallW;
|
||||
flopenr #(`XLEN) PMPCFGreg(clk, reset, WritePMPCFGM[i], CSRWriteValM, PMPCFG_ARRAY_REGW[i][31:0]);
|
||||
flopenr #(`XLEN) PMPCFGHreg(clk, reset, WritePMPCFGHM[i], CSRWriteValM, PMPCFG_ARRAY_REGW[i][63:32]);
|
||||
end
|
||||
end
|
||||
endgenerate
|
||||
|
||||
// PMPCFG registers are a pair of 64-bit in RV64 and four 32-bit in RV32
|
||||
generate
|
||||
if (`XLEN==64) begin
|
||||
flopenr #(`XLEN) PMPCFG01reg(clk, reset, WritePMPCFG0M, CSRWriteValM, PMPCFG01_REGW);
|
||||
flopenr #(`XLEN) PMPCFG23reg(clk, reset, WritePMPCFG2M, CSRWriteValM, PMPCFG23_REGW);
|
||||
end else begin
|
||||
logic WritePMPCFG1M, WritePMPCFG3M;
|
||||
assign WritePMPCFG1M = MTrapM | (CSRMWriteM && (CSRAdrM == PMPCFG1));
|
||||
assign WritePMPCFG3M = MTrapM | (CSRMWriteM && (CSRAdrM == PMPCFG3));
|
||||
flopenr #(`XLEN) PMPCFG0reg(clk, reset, WritePMPCFG0M, CSRWriteValM, PMPCFG01_REGW[31:0]);
|
||||
flopenr #(`XLEN) PMPCFG1reg(clk, reset, WritePMPCFG1M, CSRWriteValM, PMPCFG01_REGW[63:32]);
|
||||
flopenr #(`XLEN) PMPCFG2reg(clk, reset, WritePMPCFG2M, CSRWriteValM, PMPCFG23_REGW[31:0]);
|
||||
flopenr #(`XLEN) PMPCFG3reg(clk, reset, WritePMPCFG3M, CSRWriteValM, PMPCFG23_REGW[63:32]);
|
||||
end
|
||||
endgenerate
|
||||
// Read machine mode CSRs
|
||||
// verilator lint_off WIDTH
|
||||
always_comb begin
|
||||
IllegalCSRMAccessM = !(`S_SUPPORTED | `U_SUPPORTED & `N_SUPPORTED) &&
|
||||
(CSRAdrM == MEDELEG || CSRAdrM == MIDELEG); // trap on DELEG register access when no S or N-mode
|
||||
case (CSRAdrM)
|
||||
if (CSRAdrM >= PMPADDR0 && CSRAdrM < PMPADDR0 + `PMP_ENTRIES) // reading a PMP entry
|
||||
CSRMReadValM = PMPADDR_ARRAY_REGW[CSRAdrM - PMPADDR0];
|
||||
else if (CSRAdrM >= PMPCFG0 && CSRAdrM < PMPCFG0 + `PMP_ENTRIES/4) begin
|
||||
if (~CSRAdrM[0]) CSRMReadValM = PMPCFG_ARRAY_REGW[(CSRAdrM - PMPCFG0)/2][`XLEN-1:0];
|
||||
else CSRMReadValM = {{(`XLEN-32){1'b0}}, PMPCFG_ARRAY_REGW[(CSRAdrM - PMPCFG0-1)/2][63:32]};
|
||||
end
|
||||
else case (CSRAdrM)
|
||||
MISA_ADR: CSRMReadValM = MISA_REGW;
|
||||
MVENDORID: CSRMReadValM = 0;
|
||||
MARCHID: CSRMReadValM = 0;
|
||||
@ -219,7 +212,7 @@ module csrm #(parameter
|
||||
MTVAL: CSRMReadValM = MTVAL_REGW;
|
||||
MCOUNTEREN:CSRMReadValM = {{(`XLEN-32){1'b0}}, MCOUNTEREN_REGW};
|
||||
MCOUNTINHIBIT:CSRMReadValM = {{(`XLEN-32){1'b0}}, MCOUNTINHIBIT_REGW};
|
||||
PMPCFG0: CSRMReadValM = PMPCFG01_REGW[`XLEN-1:0];
|
||||
/* PMPCFG0: CSRMReadValM = PMPCFG01_REGW[`XLEN-1:0];
|
||||
PMPCFG1: CSRMReadValM = {{(`XLEN-32){1'b0}}, PMPCFG01_REGW[63:32]};
|
||||
PMPCFG2: CSRMReadValM = PMPCFG23_REGW[`XLEN-1:0];
|
||||
PMPCFG3: CSRMReadValM = {{(`XLEN-32){1'b0}}, PMPCFG23_REGW[63:32]};
|
||||
@ -238,11 +231,12 @@ module csrm #(parameter
|
||||
PMPADDR12: CSRMReadValM = PMPADDR_ARRAY_REGW[12];
|
||||
PMPADDR13: CSRMReadValM = PMPADDR_ARRAY_REGW[13];
|
||||
PMPADDR14: CSRMReadValM = PMPADDR_ARRAY_REGW[14];
|
||||
PMPADDR15: CSRMReadValM = PMPADDR_ARRAY_REGW[15];
|
||||
PMPADDR15: CSRMReadValM = PMPADDR_ARRAY_REGW[15]; */
|
||||
default: begin
|
||||
CSRMReadValM = 0;
|
||||
IllegalCSRMAccessM = 1;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
// verilator lint_on WIDTH
|
||||
endmodule
|
||||
|
@ -68,7 +68,7 @@ module privileged (
|
||||
output logic [1:0] PrivilegeModeW,
|
||||
output logic [`XLEN-1:0] SATP_REGW,
|
||||
output logic STATUS_MXR, STATUS_SUM,
|
||||
output logic [63:0] PMPCFG01_REGW, PMPCFG23_REGW,
|
||||
output var logic [63:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES/8-1:0],
|
||||
output var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0],
|
||||
output logic [2:0] FRM_REGW
|
||||
);
|
||||
|
@ -37,8 +37,8 @@ module dtim #(parameter BASE=0, RANGE = 65535) (
|
||||
output logic HRESPTim, HREADYTim
|
||||
);
|
||||
|
||||
localparam integer MemStartAddr = BASE>>(1+`XLEN/32);
|
||||
localparam integer MemEndAddr = (RANGE+BASE)>>1+(`XLEN/32);
|
||||
localparam MemStartAddr = BASE>>(1+`XLEN/32);
|
||||
localparam MemEndAddr = (RANGE+BASE)>>1+(`XLEN/32);
|
||||
|
||||
logic [`XLEN-1:0] RAM[BASE>>(1+`XLEN/32):(RANGE+BASE)>>1+(`XLEN/32)];
|
||||
logic [31:0] HWADDR, A;
|
||||
|
@ -74,7 +74,8 @@ module uncore (
|
||||
|
||||
// Determine which region of physical memory (if any) is being accessed
|
||||
// Use a trimmed down portion of the PMA checker - only the address decoders
|
||||
adrdecs adrdecs(HADDR, 1'b1, 1'b1, 1'b1, HSIZE, HSELRegions);
|
||||
// Set access types to all 1 as don't cares because the MMU has already done access checking
|
||||
adrdecs adrdecs({{(`PA_BITS-32){1'b0}}, HADDR}, 1'b1, 1'b1, 1'b1, HSIZE[1:0], HSELRegions);
|
||||
|
||||
// unswizzle HSEL signals
|
||||
assign {HSELBootTim, HSELTim, HSELCLINT, HSELGPIO, HSELUART, HSELPLIC} = HSELRegions;
|
||||
|
@ -26,167 +26,238 @@
|
||||
`include "wally-config.vh"
|
||||
/* verilator lint_on UNUSED */
|
||||
|
||||
module wallypipelinedhart (
|
||||
input logic clk, reset,
|
||||
output logic [`XLEN-1:0] PCF,
|
||||
// input logic [31:0] InstrF,
|
||||
// Privileged
|
||||
input logic TimerIntM, ExtIntM, SwIntM,
|
||||
input logic InstrAccessFaultF,
|
||||
input logic DataAccessFaultM,
|
||||
input logic [63:0] MTIME_CLINT, MTIMECMP_CLINT,
|
||||
// Bus Interface
|
||||
input logic [15:0] rd2, // bogus, delete when real multicycle fetch works
|
||||
input logic [`AHBW-1:0] HRDATA,
|
||||
input logic HREADY, HRESP,
|
||||
output logic HCLK, HRESETn,
|
||||
output logic [31:0] HADDR,
|
||||
output logic [`AHBW-1:0] HWDATA,
|
||||
output logic HWRITE,
|
||||
output logic [2:0] HSIZE,
|
||||
output logic [2:0] HBURST,
|
||||
output logic [3:0] HPROT,
|
||||
output logic [1:0] HTRANS,
|
||||
output logic HMASTLOCK,
|
||||
output logic [5:0] HSELRegions,
|
||||
// Delayed signals for subword write
|
||||
output logic [2:0] HADDRD,
|
||||
output logic [3:0] HSIZED,
|
||||
output logic HWRITED
|
||||
);
|
||||
module wallypipelinedhart
|
||||
(
|
||||
input logic clk, reset,
|
||||
output logic [`XLEN-1:0] PCF,
|
||||
// input logic [31:0] InstrF,
|
||||
// Privileged
|
||||
input logic TimerIntM, ExtIntM, SwIntM,
|
||||
input logic InstrAccessFaultF,
|
||||
input logic DataAccessFaultM,
|
||||
input logic [63:0] MTIME_CLINT, MTIMECMP_CLINT,
|
||||
// Bus Interface
|
||||
input logic [15:0] rd2, // bogus, delete when real multicycle fetch works
|
||||
input logic [`AHBW-1:0] HRDATA,
|
||||
input logic HREADY, HRESP,
|
||||
output logic HCLK, HRESETn,
|
||||
output logic [31:0] HADDR,
|
||||
output logic [`AHBW-1:0] HWDATA,
|
||||
output logic HWRITE,
|
||||
output logic [2:0] HSIZE,
|
||||
output logic [2:0] HBURST,
|
||||
output logic [3:0] HPROT,
|
||||
output logic [1:0] HTRANS,
|
||||
output logic HMASTLOCK,
|
||||
output logic [5:0] HSELRegions,
|
||||
// Delayed signals for subword write
|
||||
output logic [2:0] HADDRD,
|
||||
output logic [3:0] HSIZED,
|
||||
output logic HWRITED
|
||||
);
|
||||
|
||||
// logic [1:0] ForwardAE, ForwardBE;
|
||||
logic StallF, StallD, StallE, StallM, StallW;
|
||||
logic FlushF, FlushD, FlushE, FlushM, FlushW;
|
||||
logic RetM, TrapM, NonBusTrapM;
|
||||
// logic [1:0] ForwardAE, ForwardBE;
|
||||
logic StallF, StallD, StallE, StallM, StallW;
|
||||
logic FlushF, FlushD, FlushE, FlushM, FlushW;
|
||||
logic RetM, TrapM, NonBusTrapM;
|
||||
|
||||
// new signals that must connect through DP
|
||||
logic MulDivE, W64E;
|
||||
logic CSRReadM, CSRWriteM, PrivilegedM;
|
||||
logic [1:0] AtomicM;
|
||||
logic [`XLEN-1:0] SrcAE, SrcBE;
|
||||
logic [`XLEN-1:0] SrcAM;
|
||||
logic [2:0] Funct3E;
|
||||
logic MulDivE, W64E;
|
||||
logic CSRReadM, CSRWriteM, PrivilegedM;
|
||||
logic [1:0] AtomicM;
|
||||
logic [`XLEN-1:0] SrcAE, SrcBE;
|
||||
logic [`XLEN-1:0] SrcAM;
|
||||
logic [2:0] Funct3E;
|
||||
// logic [31:0] InstrF;
|
||||
logic [31:0] InstrD, InstrE, InstrM, InstrW;
|
||||
logic [`XLEN-1:0] PCD, PCE, PCM, PCLinkE, PCLinkW;
|
||||
logic [`XLEN-1:0] PCTargetE;
|
||||
logic [`XLEN-1:0] CSRReadValW, MulDivResultW;
|
||||
logic [`XLEN-1:0] PrivilegedNextPCM;
|
||||
logic [1:0] MemRWM;
|
||||
logic InstrValidM, InstrValidW;
|
||||
logic InstrMisalignedFaultM;
|
||||
logic DataMisalignedM;
|
||||
logic IllegalBaseInstrFaultD, IllegalIEUInstrFaultD;
|
||||
logic ITLBInstrPageFaultF, DTLBLoadPageFaultM, DTLBStorePageFaultM;
|
||||
logic WalkerInstrPageFaultF, WalkerLoadPageFaultM, WalkerStorePageFaultM;
|
||||
logic LoadMisalignedFaultM, LoadAccessFaultM;
|
||||
logic StoreMisalignedFaultM, StoreAccessFaultM;
|
||||
logic [`XLEN-1:0] InstrMisalignedAdrM;
|
||||
|
||||
logic PCSrcE;
|
||||
logic CSRWritePendingDEM;
|
||||
logic LoadStallD, MulDivStallD, CSRRdStallD;
|
||||
logic DivDoneE;
|
||||
logic DivBusyE;
|
||||
logic DivDoneW;
|
||||
logic RegWriteD;
|
||||
logic SquashSCM, SquashSCW;
|
||||
logic [31:0] InstrD, InstrE, InstrM, InstrW;
|
||||
logic [`XLEN-1:0] PCD, PCE, PCM, PCLinkE, PCLinkW;
|
||||
logic [`XLEN-1:0] PCTargetE;
|
||||
logic [`XLEN-1:0] CSRReadValW, MulDivResultW;
|
||||
logic [`XLEN-1:0] PrivilegedNextPCM;
|
||||
logic [1:0] MemRWM;
|
||||
logic InstrValidM, InstrValidW;
|
||||
logic InstrMisalignedFaultM;
|
||||
logic DataMisalignedM;
|
||||
logic IllegalBaseInstrFaultD, IllegalIEUInstrFaultD;
|
||||
logic ITLBInstrPageFaultF, DTLBLoadPageFaultM, DTLBStorePageFaultM;
|
||||
logic WalkerInstrPageFaultF, WalkerLoadPageFaultM, WalkerStorePageFaultM;
|
||||
logic LoadMisalignedFaultM, LoadAccessFaultM;
|
||||
logic StoreMisalignedFaultM, StoreAccessFaultM;
|
||||
logic [`XLEN-1:0] InstrMisalignedAdrM;
|
||||
|
||||
logic PCSrcE;
|
||||
logic CSRWritePendingDEM;
|
||||
logic DivDoneE;
|
||||
logic DivBusyE;
|
||||
logic RegWriteD;
|
||||
logic LoadStallD, MulDivStallD, CSRRdStallD;
|
||||
logic SquashSCM, SquashSCW;
|
||||
// floating point unit signals
|
||||
logic [2:0] FRM_REGW;
|
||||
logic [1:0] FMemRWM, FMemRWE;
|
||||
logic FStallD;
|
||||
logic FWriteIntE, FWriteIntM, FWriteIntW;
|
||||
logic [`XLEN-1:0] FWriteDataE;
|
||||
logic [`XLEN-1:0] FIntResM;
|
||||
logic FDivBusyE;
|
||||
logic IllegalFPUInstrD, IllegalFPUInstrE;
|
||||
logic FloatRegWriteW;
|
||||
logic FPUStallD;
|
||||
logic [4:0] SetFflagsM;
|
||||
logic [`XLEN-1:0] FPUResultW;
|
||||
logic [2:0] FRM_REGW;
|
||||
logic [1:0] FMemRWM, FMemRWE;
|
||||
logic FStallD;
|
||||
logic FWriteIntE, FWriteIntM, FWriteIntW;
|
||||
logic [`XLEN-1:0] FWriteDataE;
|
||||
logic [`XLEN-1:0] FIntResM;
|
||||
logic FDivBusyE;
|
||||
logic IllegalFPUInstrD, IllegalFPUInstrE;
|
||||
logic FloatRegWriteW;
|
||||
logic FPUStallD;
|
||||
logic [4:0] SetFflagsM;
|
||||
logic [`XLEN-1:0] FPUResultW;
|
||||
|
||||
// memory management unit signals
|
||||
logic ITLBWriteF, DTLBWriteM;
|
||||
logic ITLBFlushF, DTLBFlushM;
|
||||
logic ITLBMissF, ITLBHitF;
|
||||
logic DTLBMissM, DTLBHitM;
|
||||
logic [`XLEN-1:0] SATP_REGW;
|
||||
logic STATUS_MXR, STATUS_SUM;
|
||||
logic [1:0] PrivilegeModeW;
|
||||
logic [`XLEN-1:0] PageTableEntryF, PageTableEntryM;
|
||||
logic [1:0] PageTypeF, PageTypeM;
|
||||
logic ITLBWriteF, DTLBWriteM;
|
||||
logic ITLBFlushF, DTLBFlushM;
|
||||
logic ITLBMissF, ITLBHitF;
|
||||
logic DTLBMissM, DTLBHitM;
|
||||
logic [`XLEN-1:0] SATP_REGW;
|
||||
logic STATUS_MXR, STATUS_SUM;
|
||||
logic [1:0] PrivilegeModeW;
|
||||
logic [`XLEN-1:0] PageTableEntryF, PageTableEntryM;
|
||||
logic [1:0] PageTypeF, PageTypeM;
|
||||
|
||||
// PMA checker signals
|
||||
logic AtomicAccessM, ExecuteAccessF, WriteAccessM, ReadAccessM;
|
||||
logic PMPInstrAccessFaultF, PMPLoadAccessFaultM, PMPStoreAccessFaultM;
|
||||
logic PMAInstrAccessFaultF, PMALoadAccessFaultM, PMAStoreAccessFaultM;
|
||||
logic DSquashBusAccessM, ISquashBusAccessF;
|
||||
logic [5:0] DHSELRegionsM, IHSELRegionsF;
|
||||
|
||||
logic PMPInstrAccessFaultF, PMPLoadAccessFaultM, PMPStoreAccessFaultM;
|
||||
logic PMAInstrAccessFaultF, PMALoadAccessFaultM, PMAStoreAccessFaultM;
|
||||
logic DSquashBusAccessM, ISquashBusAccessF;
|
||||
var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0];
|
||||
logic [63:0] PMPCFG01_REGW, PMPCFG23_REGW; // signals being sent from privileged unit to pmp/pma in dmem and ifu.
|
||||
assign HSELRegions = ExecuteAccessF ? IHSELRegionsF : DHSELRegionsM; // *** this is a pure guess on how one of these should be selected. it passes tests, but is it the right way to do this?
|
||||
var logic [63:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES/8-1:0];
|
||||
|
||||
// IMem stalls
|
||||
logic ICacheStallF;
|
||||
logic [`XLEN-1:0] MMUPAdr, MMUReadPTE;
|
||||
logic MMUStall;
|
||||
logic MMUTranslate, MMUReady;
|
||||
logic ICacheStallF;
|
||||
logic DCacheStall;
|
||||
logic [`XLEN-1:0] MMUPAdr, MMUReadPTE;
|
||||
logic MMUStall;
|
||||
logic MMUTranslate, MMUReady;
|
||||
logic HPTWRead;
|
||||
logic HPTWReadyfromLSU;
|
||||
logic HPTWStall;
|
||||
|
||||
|
||||
// bus interface to dmem
|
||||
logic MemReadM, MemWriteM;
|
||||
logic [1:0] AtomicMaskedM;
|
||||
logic [2:0] Funct3M;
|
||||
logic [`XLEN-1:0] MemAdrM, WriteDataM;
|
||||
logic [`PA_BITS-1:0] MemPAdrM;
|
||||
logic [`XLEN-1:0] ReadDataW;
|
||||
logic [`PA_BITS-1:0] InstrPAdrF;
|
||||
logic [`XLEN-1:0] InstrRData;
|
||||
logic InstrReadF;
|
||||
logic DataStall;
|
||||
logic InstrAckF, MemAckW;
|
||||
logic CommitM, CommittedM;
|
||||
logic MemReadM, MemWriteM;
|
||||
logic [1:0] AtomicMaskedM;
|
||||
logic [2:0] Funct3M;
|
||||
logic [`XLEN-1:0] MemAdrM, WriteDataM;
|
||||
logic [`PA_BITS-1:0] MemPAdrM;
|
||||
logic [`XLEN-1:0] ReadDataW;
|
||||
logic [`PA_BITS-1:0] InstrPAdrF;
|
||||
logic [`XLEN-1:0] InstrRData;
|
||||
logic InstrReadF;
|
||||
logic DataStall;
|
||||
logic InstrAckF, MemAckW;
|
||||
logic CommitM, CommittedM;
|
||||
|
||||
logic BPPredWrongE;
|
||||
logic BPPredDirWrongM;
|
||||
logic BTBPredPCWrongM;
|
||||
logic RASPredPCWrongM;
|
||||
logic BPPredClassNonCFIWrongM;
|
||||
logic BPPredWrongE;
|
||||
logic BPPredDirWrongM;
|
||||
logic BTBPredPCWrongM;
|
||||
logic RASPredPCWrongM;
|
||||
logic BPPredClassNonCFIWrongM;
|
||||
|
||||
logic[`XLEN-1:0] WriteDatatmpM;
|
||||
logic [`XLEN-1:0] WriteDatatmpM;
|
||||
|
||||
logic [4:0] InstrClassM;
|
||||
|
||||
logic [`XLEN-1:0] HRDATAW;
|
||||
|
||||
// IEU vs HPTW arbitration signals to send to LSU
|
||||
logic DisableTranslation;
|
||||
logic [1:0] MemRWMtoLSU;
|
||||
logic [2:0] Funct3MtoLSU;
|
||||
logic [1:0] AtomicMtoLSU;
|
||||
logic [`XLEN-1:0] MemAdrMtoLSU;
|
||||
logic [`XLEN-1:0] WriteDataMtoLSU;
|
||||
logic [`XLEN-1:0] ReadDataWFromLSU;
|
||||
logic CommittedMfromLSU;
|
||||
logic SquashSCWfromLSU;
|
||||
logic DataMisalignedMfromLSU;
|
||||
logic StallWtoLSU;
|
||||
logic StallWfromLSU;
|
||||
logic [2:0] Funct3MfromLSU;
|
||||
|
||||
logic [4:0] InstrClassM;
|
||||
|
||||
|
||||
ifu ifu(.InstrInF(InstrRData), .*); // instruction fetch unit: PC, branch prediction, instruction cache
|
||||
ifu ifu(.InstrInF(InstrRData),
|
||||
.WalkerInstrPageFaultF(WalkerInstrPageFaultF),
|
||||
.*); // instruction fetch unit: PC, branch prediction, instruction cache
|
||||
|
||||
ieu ieu(.*); // integer execution unit: integer register file, datapath and controller
|
||||
|
||||
|
||||
// mux2 #(`XLEN) OutputInput2mux(WriteDataM, FWriteDataM, FMemRWM[0], WriteDatatmpM);
|
||||
lsu lsu(.*); // data cache unit
|
||||
|
||||
pagetablewalker pagetablewalker(.HPTWRead(HPTWRead),
|
||||
.*); // can send addresses to ahblite, send out pagetablestall
|
||||
// arbiter between IEU and pagetablewalker
|
||||
lsuArb arbiter(// HPTW connection
|
||||
.HPTWTranslate(MMUTranslate),
|
||||
.HPTWRead(HPTWRead),
|
||||
.HPTWPAdr(MMUPAdr),
|
||||
.HPTWReadPTE(MMUReadPTE),
|
||||
.HPTWReady(MMUReady),
|
||||
.HPTWStall(HPTWStall),
|
||||
// CPU connection
|
||||
.MemRWM(MemRWM),
|
||||
.Funct3M(Funct3M),
|
||||
.AtomicM(AtomicM),
|
||||
.MemAdrM(MemAdrM),
|
||||
.StallW(StallW),
|
||||
.WriteDataM(WriteDataM),
|
||||
.ReadDataW(ReadDataW),
|
||||
.CommittedM(CommittedM),
|
||||
.SquashSCW(SquashSCW),
|
||||
.DataMisalignedM(DataMisalignedM),
|
||||
.DCacheStall(DCacheStall),
|
||||
// LSU
|
||||
.DisableTranslation(DisableTranslation),
|
||||
.MemRWMtoLSU(MemRWMtoLSU),
|
||||
.Funct3MtoLSU(Funct3MtoLSU),
|
||||
.AtomicMtoLSU(AtomicMtoLSU),
|
||||
.MemAdrMtoLSU(MemAdrMtoLSU),
|
||||
.WriteDataMtoLSU(WriteDataMtoLSU),
|
||||
.StallWtoLSU(StallWtoLSU),
|
||||
.CommittedMfromLSU(CommittedMfromLSU),
|
||||
.SquashSCWfromLSU(SquashSCWfromLSU),
|
||||
.DataMisalignedMfromLSU(DataMisalignedMfromLSU),
|
||||
.ReadDataWFromLSU(ReadDataWFromLSU),
|
||||
.HPTWReadyfromLSU(HPTWReadyfromLSU),
|
||||
.DataStall(DataStall),
|
||||
.*);
|
||||
|
||||
|
||||
lsu lsu(.MemRWM(MemRWMtoLSU),
|
||||
.Funct3M(Funct3MtoLSU),
|
||||
.AtomicM(AtomicMtoLSU),
|
||||
.MemAdrM(MemAdrMtoLSU),
|
||||
.WriteDataM(WriteDataMtoLSU),
|
||||
.ReadDataW(ReadDataWFromLSU),
|
||||
.StallW(StallWtoLSU),
|
||||
|
||||
.CommittedM(CommittedMfromLSU),
|
||||
.SquashSCW(SquashSCWfromLSU),
|
||||
.DataMisalignedM(DataMisalignedMfromLSU),
|
||||
.DisableTranslation(DisableTranslation),
|
||||
|
||||
.DataStall(DataStall),
|
||||
.HPTWReady(HPTWReadyfromLSU),
|
||||
.Funct3MfromLSU(Funct3MfromLSU),
|
||||
.StallWfromLSU(StallWfromLSU),
|
||||
// .DataStall(LSUStall),
|
||||
.* ); // data cache unit
|
||||
|
||||
ahblite ebu(
|
||||
//.InstrReadF(1'b0),
|
||||
//.InstrRData(InstrF), // hook up InstrF later
|
||||
.ISquashBusAccessF(1'b0), // *** temporary hack to disable PMP instruction fetch checking
|
||||
.WriteDataM(WriteDataM),
|
||||
.MemSizeM(Funct3M[1:0]), .UnsignedLoadM(Funct3M[2]),
|
||||
.Funct7M(InstrM[31:25]),
|
||||
.*);
|
||||
//.InstrReadF(1'b0),
|
||||
//.InstrRData(InstrF), // hook up InstrF later
|
||||
.ISquashBusAccessF(1'b0), // *** temporary hack to disable PMP instruction fetch checking
|
||||
.WriteDataM(WriteDataM),
|
||||
.MemSizeM(Funct3MfromLSU[1:0]), .UnsignedLoadM(Funct3MfromLSU[2]),
|
||||
.Funct7M(InstrM[31:25]),
|
||||
.HRDATAW(HRDATAW),
|
||||
.StallW(StallWfromLSU),
|
||||
.*);
|
||||
|
||||
pagetablewalker pagetablewalker(.*); // can send addresses to ahblite, send out pagetablestall
|
||||
// *** can connect to hazard unit
|
||||
// changing from this to the line above breaks the program. auipc at 104 fails; seems to be flushed.
|
||||
// Would need to insertinstruction as InstrD, not InstrF
|
||||
/*ahblite ebu(
|
||||
.InstrReadF(1'b0),
|
||||
.InstrRData(), // hook up InstrF later
|
||||
.MemSizeM(Funct3M[1:0]), .UnsignedLoadM(Funct3M[2]),
|
||||
.*); */
|
||||
|
||||
|
||||
|
||||
muldiv mdu(.*); // multiply and divide unit
|
||||
|
||||
hazard hzu(.*); // global stall and flush control
|
||||
@ -200,5 +271,5 @@ module wallypipelinedhart (
|
||||
// presently stub out SetFlagsM and FloatRegWriteW
|
||||
//assign SetFflagsM = 0;
|
||||
//assign FloatRegWriteW = 0;
|
||||
|
||||
|
||||
endmodule
|
||||
|
@ -27,8 +27,8 @@
|
||||
|
||||
module testbench();
|
||||
|
||||
parameter waveOnICount = 2657000; // # of instructions at which to turn on waves in graphical sim
|
||||
|
||||
parameter waveOnICount = `BUSYBEAR*140000 + `BUILDROOT*2400000; // # of instructions at which to turn on waves in graphical sim
|
||||
parameter stopICount = `BUSYBEAR*143898 + `BUILDROOT*0000000; // # instructions at which to halt sim completely (set to 0 to let it run as far as it can)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
///////////////////////////////////// DUT /////////////////////////////////////
|
||||
@ -248,6 +248,9 @@ module testbench();
|
||||
if (instrs == waveOnICount) begin
|
||||
$display("turning on waves at %0d instructions", instrs);
|
||||
$stop;
|
||||
end else if (instrs == stopICount && stopICount != 0) begin
|
||||
$display("Ending sim at %0d instructions (set stopICount to 0 to let the sim go on)", instrs);
|
||||
$stop;
|
||||
end
|
||||
|
||||
// Check if PCD is going to be flushed due to a branch or jump
|
||||
|
Loading…
Reference in New Issue
Block a user