This commit is contained in:
ushakya22 2021-03-30 15:25:07 -04:00
parent fbed5d658e
commit 6b9ae41302
43 changed files with 6684 additions and 1104 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,100 @@
//////////////////////////////////////////
// wally-config.vh
//
// Written: David_Harris@hmc.edu 4 January 2021
// Modified:
//
// Purpose: Specify which features are configured
// Macros to determine which modes are supported based on MISA
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
// RV32 or RV64: XLEN = 32 or 64
`define XLEN 64
//`define MISA (32'h00000105)
`define MISA (32'h00001104 | 1<<5 | 1<<18 | 1 << 20 | 1 << 12 | 1 << 0)
`define A_SUPPORTED ((`MISA >> 0) % 2 == 1)
`define C_SUPPORTED ((`MISA >> 2) % 2 == 1)
`define D_SUPPORTED ((`MISA >> 3) % 2 == 1)
`define F_SUPPORTED ((`MISA >> 5) % 2 == 1)
`define M_SUPPORTED ((`MISA >> 12) % 2 == 1)
`define S_SUPPORTED ((`MISA >> 18) % 2 == 1)
`define U_SUPPORTED ((`MISA >> 20) % 2 == 1)
`define ZCSR_SUPPORTED 1
`define COUNTERS 31
`define ZCOUNTERS_SUPPORTED 1
// N-mode user-level interrupts are depricated per Andrew Waterman 1/13/21
//`define N_SUPPORTED ((MISA >> 13) % 2 == 1)
`define N_SUPPORTED 0
`define M_MODE (2'b11)
`define S_MODE (2'b01)
`define U_MODE (2'b00)
// Microarchitectural Features
`define UARCH_PIPELINED 1
`define UARCH_SUPERSCALR 0
`define UARCH_SINGLECYCLE 0
`define MEM_DCACHE 0
`define MEM_DTIM 1
`define MEM_ICACHE 0
`define MEM_VIRTMEM 0
// Address space
`define RESET_VECTOR 64'h0000000080000000
// Bus Interface width
`define AHBW 64
// Peripheral Physiccal Addresses
// Peripheral memory space extends from BASE to BASE+RANGE
// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits
`define BOOTTIMBASE 32'h00000000
`define BOOTTIMRANGE 32'h00003FFF
`define TIMBASE 32'h80000000
`define TIMRANGE 32'h0007FFFF
`define CLINTBASE 32'h02000000
`define CLINTRANGE 32'h0000FFFF
`define GPIOBASE 32'h10012000
`define GPIORANGE 32'h000000FF
`define UARTBASE 32'h10000000
`define UARTRANGE 32'h00000007
`define PLICBASE 32'h0C000000
`define PLICRANGE 32'h03FFFFFF
// Test modes
// Tie GPIO outputs back to inputs
`define GPIO_LOOPBACK_TEST 0
// Busybear special CSR config to match OVPSim
`define OVPSIM_CSR_CONFIG 0
// Hardware configuration
`define UART_PRESCALE 1
/* verilator lint_off STMTDLY */
/* verilator lint_off WIDTH */
/* verilator lint_off ASSIGNDLY */
/* verilator lint_off PINCONNECTEMPTY */
`define TWO_BIT_PRELOAD "../config/rv64ic/twoBitPredictor.txt"
`define BTB_PRELOAD "../config/rv64ic/BTBPredictor.txt"
`define BPTYPE "BPGSHARE" // BPGLOBAL or BPTWOBIT or BPGSHARE

View File

@ -0,0 +1,31 @@
//////////////////////////////////////////
// wally-constants.vh
//
// Written: tfleming@hmc.edu 4 March 2021
// Modified:
//
// Purpose: Specify certain constants defined in the RISC-V 64-bit architecture.
// These macros should not be changed, except in the event of an
// update to the architecture or particularly special circumstances.
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
// Virtual Memory Constants (sv39)
`define VPN_BITS 27
`define PPN_BITS 44
`define PA_BITS 56

View File

@ -0,0 +1,3 @@
#!/bin/sh
vsim -do $1

View File

@ -0,0 +1,3 @@
vsim -c <<!
do wally-pipelined-batch.do ../config/rv64imc rv64imc
!

View File

@ -45,13 +45,15 @@ add wave /testbench_busybear/reset
add wave -divider
add wave -hex /testbench_busybear/PCtext
add wave -hex /testbench_busybear/pcExpected
add wave -hex /testbench_busybear/dut/hart/ifu/PCF
add wave -hex /testbench_busybear/dut/hart/ifu/InstrF
add wave -hex /testbench_busybear/dut/hart/ifu/PCD
add wave -hex /testbench_busybear/dut/hart/ifu/InstrD
add wave -hex /testbench_busybear/dut/hart/ifu/StallD
add wave -hex /testbench_busybear/dut/hart/ifu/FlushD
add wave -hex /testbench_busybear/dut/hart/ifu/StallE
add wave -hex /testbench_busybear/dut/hart/ifu/FlushE
add wave -hex /testbench_busybear/dut/hart/ifu/InstrRawD
add wave /testbench_busybear/CheckInstrF
add wave /testbench_busybear/lastCheckInstrF
add wave /testbench_busybear/CheckInstrD
add wave /testbench_busybear/lastCheckInstrD
add wave /testbench_busybear/speculative
add wave /testbench_busybear/lastPC2
add wave -divider
@ -136,7 +138,7 @@ add wave /testbench_busybear/InstrMName
#add wave -hex /testbench_busybear/dut/hart/dmem/AdrM
#add wave -hex /testbench_busybear/dut/hart/dmem/WriteDataM
#add wave -divider
add wave -hex /testbench_busybear/dut/hart/ifu/PCW
add wave -hex /testbench_busybear/PCW
##add wave -hex /testbench_busybear/dut/hart/ifu/InstrW
add wave /testbench_busybear/InstrWName
#add wave /testbench_busybear/dut/hart/ieu/dp/RegWriteW

View File

@ -69,8 +69,8 @@ add wave -hex /testbench/dut/hart/ifu/PCM
add wave -hex /testbench/dut/hart/ifu/InstrM
add wave /testbench/InstrMName
add wave -divider Write
add wave -hex /testbench/dut/hart/ifu/PCW
add wave -hex /testbench/dut/hart/ifu/InstrW
add wave -hex /testbench/PCW
add wave -hex /testbench/InstrW
add wave /testbench/InstrWName
#add wave -hex /testbench/dut/hart/ieu/dp/SrcAE
#add wave -hex /testbench/dut/hart/ieu/dp/SrcBE
@ -81,7 +81,7 @@ add wave -divider Regfile_signals
#add wave -hex /testbench/dut/uncore/HADDR
#add wave -hex /testbench/dut/uncore/HWDATA
#add wave -divider
#add wave -hex /testbench/dut/hart/ifu/PCW
#add wave -hex /testbench/PCW
#add wave /testbench/InstrWName
#add wave /testbench/dut/hart/ieu/dp/RegWriteW
#add wave -hex /testbench/dut/hart/ieu/dp/ResultW

View File

@ -69,8 +69,8 @@ add wave -hex /testbench/dut/hart/ifu/PCM
add wave -hex /testbench/dut/hart/ifu/InstrM
add wave /testbench/InstrMName
add wave -divider Write
add wave -hex /testbench/dut/hart/ifu/PCW
add wave -hex /testbench/dut/hart/ifu/InstrW
add wave -hex /testbench/PCW
add wave -hex /testbench/InstrW
add wave /testbench/InstrWName
#add wave -hex /testbench/dut/hart/ieu/dp/SrcAE
#add wave -hex /testbench/dut/hart/ieu/dp/SrcBE
@ -81,7 +81,7 @@ add wave -divider Regfile_signals
#add wave -hex /testbench/dut/uncore/HADDR
#add wave -hex /testbench/dut/uncore/HWDATA
#add wave -divider
#add wave -hex /testbench/dut/hart/ifu/PCW
#add wave -hex /testbench/PCW
#add wave /testbench/InstrWName
#add wave /testbench/dut/hart/ieu/dp/RegWriteW
#add wave -hex /testbench/dut/hart/ieu/dp/ResultW

View File

@ -0,0 +1,43 @@
# wally-pipelined-batch.do
#
# Modification by Oklahoma State University & Harvey Mudd College
# Use with Testbench
# James Stine, 2008; David Harris 2021
# Go Cowboys!!!!!!
#
# Takes 1:10 to run RV64IC tests using gui
# Use this wally-pipelined-batch.do file to run this example.
# Either bring up ModelSim and type the following at the "ModelSim>" prompt:
# do wally-pipelined-batch.do
# or, to run from a shell, type the following at the shell prompt:
# vsim -do wally-pipelined-batch.do -c
# (omit the "-c" to see the GUI while running from the shell)
onbreak {resume}
# create library
if [file exists work_$2] {
vdel -lib work_$2 -all
}
vlib work_$2
# compile source files
# suppress spurious warnngs about
# "Extra checking for conflicts with always_comb done at vopt time"
# because vsim will run vopt
# default to config/rv64ic, but allow this to be overridden at the command line. For example:
# do wally-pipelined-batch.do ../config/rv32ic rv32ic
switch $argc {
0 {vlog +incdir+../config/rv64imc ../testbench/testbench-imperas.sv ../src/*/*.sv -suppress 2583}
1 {vlog +incdir+$1 ../testbench/testbench-imperas.sv ../src/*/*.sv -suppress 2583}
2 {vlog -work work_$2 +incdir+$1 ../testbench/testbench-imperas.sv ../src/*/*.sv -suppress 2583}
}
# start and run simulation
# remove +acc flag for faster sim during regressions if there is no need to access internal signals
vopt work_$2.testbench -work work_$2 -o workopt_$2
vsim -lib work_$2 workopt_$2
run -all
quit

View File

@ -42,7 +42,7 @@ vsim workopt
view wave
-- display input and output signals as hexidecimal values
do ./wave-dos/default-waves.do
do ./wave-dos/ahb-waves.do
-- Set Wave Output Items
TreeUpdate [SetDefaultTree]

View File

@ -45,7 +45,7 @@ add wave -noupdate /testbench/dut/uncore/dtim/memwrite
add wave -noupdate -radix hexadecimal /testbench/dut/uncore/HADDR
add wave -noupdate -radix hexadecimal /testbench/dut/uncore/HWDATA
add wave -noupdate -divider <NULL>
add wave -noupdate -radix hexadecimal /testbench/dut/hart/ifu/PCW
add wave -noupdate -radix hexadecimal /testbench/PCW
add wave -noupdate /testbench/InstrWName
add wave -noupdate /testbench/dut/hart/ieu/dp/RegWriteW
add wave -noupdate -radix hexadecimal /testbench/dut/hart/ieu/dp/ResultW
@ -219,7 +219,7 @@ add wave -noupdate -radix hexadecimal /testbench/dut/hart/ifu/IllegalCompInstrD
add wave -noupdate -radix hexadecimal /testbench/dut/hart/ifu/PCPlusUpperF
add wave -noupdate -radix hexadecimal /testbench/dut/hart/ifu/PCPlus2or4F
add wave -noupdate -radix hexadecimal /testbench/dut/hart/ifu/PCD
add wave -noupdate -radix hexadecimal /testbench/dut/hart/ifu/PCW
add wave -noupdate -radix hexadecimal /testbench/PCW
add wave -noupdate -radix hexadecimal /testbench/dut/hart/ifu/PCLinkD
add wave -noupdate -radix hexadecimal /testbench/dut/hart/ifu/PCLinkE
add wave -noupdate -radix hexadecimal /testbench/dut/hart/ifu/PCLinkM

View File

@ -14,14 +14,21 @@ add wave /testbench/dut/hart/FlushD
add wave /testbench/dut/hart/FlushE
add wave /testbench/dut/hart/FlushM
add wave /testbench/dut/hart/FlushW
add wave -divider
add wave -divider
add wave -hex /testbench/dut/hart/ifu/PCF
add wave -hex /testbench/dut/hart/ifu/InstrF
add wave /testbench/InstrFName
add wave -hex /testbench/dut/hart/ifu/PCD
add wave -hex /testbench/dut/hart/ifu/InstrD
add wave /testbench/InstrDName
add wave -hex /testbench/dut/hart/ifu/ic/InstrRawD
add wave -hex /testbench/dut/hart/ifu/ic/AlignedInstrD
add wave -divider
add wave -hex /testbench/dut/hart/ifu/ic/InstrPAdrF
add wave /testbench/dut/hart/ifu/ic/DelayF
add wave /testbench/dut/hart/ifu/ic/DelaySideF
add wave /testbench/dut/hart/ifu/ic/DelayD
add wave -hex /testbench/dut/hart/ifu/ic/MisalignedHalfInstrD
add wave -divider
add wave -hex /testbench/dut/hart/ifu/PCE
@ -55,6 +62,9 @@ add wave -hex /testbench/dut/hart/ebu/CaptureDataM
add wave -hex /testbench/dut/hart/ebu/InstrStall
add wave -divider
add wave -hex /testbench/dut/uncore/dtim/*
add wave -divider
add wave -hex /testbench/dut/hart/ifu/PCW
add wave -hex /testbench/dut/hart/ifu/InstrW
add wave /testbench/InstrWName
@ -67,4 +77,4 @@ add wave -divider
add wave -hex /testbench/dut/uncore/dtim/*
add wave -divider
add wave -hex -r /testbench/*
add wave -hex -r /testbench/*

View File

@ -19,11 +19,15 @@ add wave /testbench/dut/hart/FlushW
add wave -divider
add wave -hex /testbench/dut/hart/ifu/PCF
add wave -hex /testbench/dut/hart/ifu/InstrF
add wave /testbench/InstrFName
add wave -hex /testbench/dut/hart/ifu/PCD
add wave -hex /testbench/dut/hart/ifu/InstrD
add wave /testbench/InstrDName
add wave -hex /testbench/dut/hart/ifu/ic/InstrRawD
add wave -hex /testbench/dut/hart/ifu/ic/AlignedInstrD
add wave /testbench/dut/hart/ifu/ic/DelayF
add wave /testbench/dut/hart/ifu/ic/DelaySideF
add wave /testbench/dut/hart/ifu/ic/DelayD
add wave -hex /testbench/dut/hart/ifu/ic/MisalignedHalfInstrD
add wave -divider
add wave -hex /testbench/dut/hart/ifu/PCE
add wave -hex /testbench/dut/hart/ifu/InstrE
@ -40,12 +44,12 @@ add wave /testbench/dut/uncore/dtim/memwrite
add wave -hex /testbench/dut/uncore/HADDR
add wave -hex /testbench/dut/uncore/HWDATA
add wave -divider
add wave -hex /testbench/dut/hart/ifu/PCW
add wave -hex /testbench/dut/hart/ifu/InstrW
add wave -hex /testbench/PCW
add wave -hex /testbench/InstrW
add wave /testbench/InstrWName
add wave /testbench/dut/hart/ieu/dp/RegWriteW
add wave -hex /testbench/dut/hart/ieu/dp/ResultW
add wave -hex /testbench/dut/hart/ieu/dp/RdW
add wave -divider
#add ww
add wave -hex -r /testbench/*
add wave -hex -r /testbench/*

View File

@ -0,0 +1,75 @@
# wally-pipelined.do
#
# Modification by Oklahoma State University & Harvey Mudd College
# Use with Testbench
# James Stine, 2008; David Harris 2021
# Go Cowboys!!!!!!
#
# Takes 1:10 to run RV64IC tests using gui
# Use this wally-pipelined.do file to run this example.
# Either bring up ModelSim and type the following at the "ModelSim>" prompt:
# do wally-pipelined.do
# or, to run from a shell, type the following at the shell prompt:
# vsim -do wally-pipelined.do -c
# (omit the "-c" to see the GUI while running from the shell)
onbreak {resume}
# create library
if [file exists work] {
vdel -all
}
vlib work
# compile source files
# suppress spurious warnngs about
# "Extra checking for conflicts with always_comb done at vopt time"
# because vsim will run vopt
# default to config/rv64ic, but allow this to be overridden at the command line. For example:
# do wally-pipelined.do ../config/rv32ic
switch $argc {
0 {vlog +incdir+../config/rv64ic ../testbench/testbench-imperas.sv ../src/*/*.sv -suppress 2583}
1 {vlog +incdir+$1 ../testbench/testbench-imperas.sv ../src/*/*.sv -suppress 2583}
}
# start and run simulation
# remove +acc flag for faster sim during regressions if there is no need to access internal signals
vopt +acc work.testbench -o workopt
vsim workopt
view wave
-- display input and output signals as hexidecimal values
# Diplays All Signals recursively
-- display input and output signals as hexidecimal values
# Diplays All Signals recursively
add wave /testbench/clk
add wave /testbench/reset
add wave -noupdate -divider -height 32 "Datapath"
add wave -hex /testbench/dut/hart/ieu/dp/*
add wave -noupdate -divider -height 32 "RF"
add wave -hex /testbench/dut/hart/ieu/dp/regf/*
add wave -hex /testbench/dut/hart/ieu/dp/regf/rf
add wave -noupdate -divider -height 32 "Control"
add wave -hex /testbench/dut/hart/ieu/c/*
add wave -noupdate -divider -height 32 "Multiply/Divide"
add wave -hex /testbench/dut/hart/mdu/*
-- Set Wave Output Items
TreeUpdate [SetDefaultTree]
WaveRestoreZoom {0 ps} {100 ps}
configure wave -namecolwidth 250
configure wave -valuecolwidth 120
configure wave -justifyvalue left
configure wave -signalnamewidth 0
configure wave -snapdistance 10
configure wave -datasetprefix 0
configure wave -rowmargin 4
configure wave -childrowmargin 2
set DefaultRadix hexadecimal
-- Run the Simulation
#run 1000
run -all
#quit

View File

@ -125,7 +125,7 @@ add wave -noupdate -expand -group PCS /testbench/dut/hart/PCF
add wave -noupdate -expand -group PCS /testbench/dut/hart/ifu/PCD
add wave -noupdate -expand -group PCS /testbench/dut/hart/PCE
add wave -noupdate -expand -group PCS /testbench/dut/hart/PCM
add wave -noupdate -expand -group PCS /testbench/dut/hart/ifu/PCW
add wave -noupdate -expand -group PCS /testbench/PCW
add wave -noupdate -group {function radix debug} /testbench/functionRadix/function_radix/FunctionAddr
add wave -noupdate -group {function radix debug} -radix unsigned /testbench/functionRadix/function_radix/ProgramAddrIndex
add wave -noupdate -group {function radix debug} /testbench/functionRadix/function_radix/reset

93
wally-pipelined/src/cache/dmapped.sv vendored Normal file
View File

@ -0,0 +1,93 @@
///////////////////////////////////////////
// dmapped.sv
//
// Written: jaallen@g.hmc.edu 2021-03-23
// Modified:
//
// Purpose: An implementation of a direct-mapped cache memory
// This cache is read-only, so "write"s to the memory are loading new data
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
`include "wally-config.vh"
module rodirectmappedmem #(parameter LINESIZE = 256, parameter NUMLINES = 512, parameter WORDSIZE = `XLEN) (
// Pipeline stuff
input logic clk,
input logic reset,
// If flush is high, invalidate the entire cache
input logic flush,
// Select which address to read (broken for efficiency's sake)
input logic [`XLEN-1:12] ReadUpperPAdr,
input logic [11:0] ReadLowerAdr,
// Write new data to the cache
input logic WriteEnable,
input logic [LINESIZE-1:0] WriteLine,
input logic [`XLEN-1:0] WritePAdr,
// Output the word, as well as if it is valid
output logic [WORDSIZE-1:0] DataWord,
output logic DataValid
);
localparam integer SETWIDTH = $clog2(NUMLINES);
localparam integer OFFSETWIDTH = $clog2(LINESIZE/8);
localparam integer TAGWIDTH = `XLEN-SETWIDTH-OFFSETWIDTH;
logic [NUMLINES-1:0][WORDSIZE-1:0] LineOutputs;
logic [NUMLINES-1:0] ValidOutputs;
logic [NUMLINES-1:0][TAGWIDTH-1:0] TagOutputs;
logic [OFFSETWIDTH-1:0] WordSelect;
logic [`XLEN-1:0] ReadPAdr;
logic [SETWIDTH-1:0] ReadSet, WriteSet;
logic [TAGWIDTH-1:0] ReadTag, WriteTag;
// Swizzle bits to get the offset, set, and tag out of the read and write addresses
always_comb begin
// Read address
assign WordSelect = ReadLowerAdr[OFFSETWIDTH-1:0];
assign ReadPAdr = {ReadUpperPAdr, ReadLowerAdr};
assign ReadSet = ReadPAdr[SETWIDTH+OFFSETWIDTH-1:OFFSETWIDTH];
assign ReadTag = ReadPAdr[`XLEN-1:SETWIDTH+OFFSETWIDTH];
// Write address
assign WriteSet = WritePAdr[SETWIDTH+OFFSETWIDTH-1:OFFSETWIDTH];
assign WriteTag = WritePAdr[`XLEN-1:SETWIDTH+OFFSETWIDTH];
end
genvar i;
generate
for (i=0; i < NUMLINES; i++) begin
rocacheline #(LINESIZE, TAGWIDTH, WORDSIZE) lines (
.*,
.WriteEnable(WriteEnable & (WriteSet == i)),
.WriteData(WriteLine),
.WriteTag(WriteTag),
.DataWord(LineOutputs[i]),
.DataTag(TagOutputs[i]),
.DataValid(ValidOutputs[i])
);
end
endgenerate
// Get the data and valid out of the lines
always_comb begin
assign DataWord = LineOutputs[ReadSet];
assign DataValid = ValidOutputs[ReadSet] & (TagOutputs[ReadSet] == ReadTag);
end
endmodule

68
wally-pipelined/src/cache/line.sv vendored Normal file
View File

@ -0,0 +1,68 @@
///////////////////////////////////////////
// line.sv
//
// Written: jaallen@g.hmc.edu 2021-03-23
// Modified:
//
// Purpose: An implementation of a single cache line
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
`include "wally-config.vh"
// A read-only cache line ("write"ing to this line is loading new data, not writing to memory
module rocacheline #(parameter LINESIZE = 256, parameter TAGSIZE = 32, parameter WORDSIZE = `XLEN) (
// Pipeline stuff
input logic clk,
input logic reset,
// If flush is high, invalidate this word
input logic flush,
// Select which word within the line
input logic [$clog2(LINESIZE/8)-1:0] WordSelect,
// Write new data to the line
input logic WriteEnable,
input logic [LINESIZE-1:0] WriteData,
input logic [TAGSIZE-1:0] WriteTag,
// Output the word, as well as the tag and if it is valid
output logic [WORDSIZE-1:0] DataWord,
output logic [TAGSIZE-1:0] DataTag,
output logic DataValid
);
localparam integer OFFSETSIZE = $clog2(LINESIZE/8);
localparam integer NUMWORDS = LINESIZE/WORDSIZE;
logic [NUMWORDS-1:0][WORDSIZE-1:0] DataLinesIn, DataLinesOut;
flopenr #(1) ValidBitFlop(clk, reset, WriteEnable | flush, ~flush, DataValid);
flopenr #(TAGSIZE) TagFlop(clk, reset, WriteEnable, WriteTag, DataTag);
genvar i;
generate
for (i=0; i < NUMWORDS; i++) begin
assign DataLinesIn[i] = WriteData[NUMWORDS*i+WORDSIZE-1:NUMWORDS*i];
flopenr #(LINESIZE) LineFlop(clk, reset, WriteEnable, DataLinesIn[i], DataLinesOut[i]);
end
endgenerate
always_comb begin
assign DataWord = DataLinesOut[WordSelect[OFFSETSIZE-1:$clog2(WORDSIZE)]];
end
endmodule

View File

@ -29,7 +29,7 @@ module hazard(
// Detect hazards
input logic BPPredWrongE, CSRWritePendingDEM, RetM, TrapM,
input logic LoadStallD, MulDivStallD, CSRRdStallD,
input logic InstrStall, DataStall,
input logic InstrStall, DataStall, ICacheStallF,
// Stall & flush outputs
output logic StallF, StallD, StallE, StallM, StallW,
output logic FlushF, FlushD, FlushE, FlushM, FlushW

View File

@ -27,12 +27,13 @@
module forward(
// Detect hazards
input logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E, RdE, RdM, RdW,
input logic MemReadE, MulDivE, CSRReadE,
input logic RegWriteM, RegWriteW,
input logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E, RdE, RdM, RdW,
input logic MemReadE, MulDivE, CSRReadE,
input logic RegWriteM, RegWriteW,
input logic DivDoneW,
// Forwarding controls
output logic [1:0] ForwardAE, ForwardBE,
output logic LoadStallD, MulDivStallD, CSRRdStallD
output logic LoadStallD, MulDivStallD, CSRRdStallD
);
always_comb begin
@ -48,8 +49,8 @@ module forward(
end
// Stall on dependent operations that finish in Mem Stage and can't bypass in time
assign LoadStallD = MemReadE & ((Rs1D == RdE) | (Rs2D == RdE));
assign MulDivStallD = MulDivE & ((Rs1D == RdE) | (Rs2D == RdE)); // *** extend with stalls for divide
assign CSRRdStallD = CSRReadE & ((Rs1D == RdE) | (Rs2D == RdE));
assign LoadStallD = MemReadE & ((Rs1D == RdE) | (Rs2D == RdE));
assign MulDivStallD = MulDivE & ((Rs1D == RdE) | (Rs2D == RdE)) | MulDivE&~DivDoneW; // *** extend with stalls for divide
assign CSRRdStallD = CSRReadE & ((Rs1D == RdE) | (Rs2D == RdE));
endmodule

View File

@ -26,39 +26,40 @@
`include "wally-config.vh"
module ieu (
input logic clk, reset,
input logic clk, reset,
// Decode Stage interface
input logic [31:0] InstrD,
input logic IllegalIEUInstrFaultD,
output logic IllegalBaseInstrFaultD,
input logic [31:0] InstrD,
input logic IllegalIEUInstrFaultD,
output logic IllegalBaseInstrFaultD,
// Execute Stage interface
input logic [`XLEN-1:0] PCE,
input logic [`XLEN-1:0] PCLinkE,
input logic [`XLEN-1:0] PCE,
input logic [`XLEN-1:0] PCLinkE,
output logic [`XLEN-1:0] PCTargetE,
output logic MulDivE, W64E,
output logic [2:0] Funct3E,
output logic MulDivE, W64E,
output logic [2:0] Funct3E,
output logic [`XLEN-1:0] SrcAE, SrcBE,
// Memory stage interface
input logic DataMisalignedM,
input logic DataAccessFaultM,
input logic SquashSCW,
output logic [1:0] MemRWM,
output logic [1:0] AtomicM,
input logic DataMisalignedM,
input logic DataAccessFaultM,
input logic SquashSCW,
output logic [1:0] MemRWM,
output logic [1:0] AtomicM,
output logic [`XLEN-1:0] MemAdrM, WriteDataM,
output logic [`XLEN-1:0] SrcAM,
output logic [2:0] Funct3M,
output logic [2:0] Funct3M,
// Writeback stage
input logic [`XLEN-1:0] CSRReadValW, ReadDataW, MulDivResultW,
input logic [`XLEN-1:0] CSRReadValW, ReadDataW, MulDivResultW,
// input logic [`XLEN-1:0] PCLinkW,
output logic InstrValidW,
output logic InstrValidW,
// hazards
input logic StallE, StallM, StallW,
input logic FlushE, FlushM, FlushW,
output logic LoadStallD, MulDivStallD, CSRRdStallD,
output logic PCSrcE,
input logic StallE, StallM, StallW,
input logic FlushE, FlushM, FlushW,
output logic LoadStallD, MulDivStallD, CSRRdStallD,
output logic PCSrcE,
input logic DivDoneW,
output logic CSRReadM, CSRWriteM, PrivilegedM,
output logic CSRWritePendingDEM
output logic CSRReadM, CSRWriteM, PrivilegedM,
output logic CSRWritePendingDEM
);
logic [2:0] ImmSrcD;
@ -78,5 +79,6 @@ module ieu (
controller c(.*);
datapath dp(.*);
forward fw(.*);
endmodule

View File

@ -0,0 +1,138 @@
///////////////////////////////////////////
// icache.sv
//
// Written: jaallen@g.hmc.edu 2021-03-02
// Modified:
//
// Purpose: Cache instructions for the ifu so it can access memory less often, saving cycles
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
`include "wally-config.vh"
module icache(
// Basic pipeline stuff
input logic clk, reset,
input logic StallF, StallD,
input logic FlushD,
// Upper bits of physical address for PC
input logic [`XLEN-1:12] UpperPCPF,
// Lower 12 bits of virtual PC address, since it's faster this way
input logic [11:0] LowerPCF,
// Data read in from the ebu unit
input logic [`XLEN-1:0] InstrInF,
// Read requested from the ebu unit
output logic [`XLEN-1:0] InstrPAdrF,
output logic InstrReadF,
// High if the instruction currently in the fetch stage is compressed
output logic CompressedF,
// High if the icache is requesting a stall
output logic ICacheStallF,
// The raw (not decompressed) instruction that was requested
// If the next instruction is compressed, the upper 16 bits may be anything
output logic [31:0] InstrRawD
);
logic DelayF, DelaySideF, FlushDLastCyclen, DelayD;
logic [1:0] InstrDMuxChoice;
logic [15:0] MisalignedHalfInstrF, MisalignedHalfInstrD;
logic [31:0] InstrF, AlignedInstrD;
// Buffer the last read, for ease of accessing it again
logic LastReadDataValidF;
logic [`XLEN-1:0] LastReadDataF, LastReadAdrF, InDataF;
// instruction for NOP
logic [31:0] nop = 32'h00000013;
// Temporary change to bridge the new interface to old behaviors
logic [`XLEN-1:0] PCPF;
assign PCPF = {UpperPCPF, LowerPCF};
// This flop doesn't stall if StallF is high because we should output a nop
// when FlushD happens, even if the pipeline is also stalled.
flopr #(1) flushDLastCycleFlop(clk, reset, ~FlushD & (FlushDLastCyclen | ~StallF), FlushDLastCyclen);
flopenr #(1) delayDFlop(clk, reset, ~StallF, DelayF & ~CompressedF, DelayD);
flopenrc#(1) delayStateFlop(clk, reset, FlushD, ~StallF, DelayF & ~DelaySideF, DelaySideF);
// This flop stores the first half of a misaligned instruction while waiting for the other half
flopenr #(16) halfInstrFlop(clk, reset, DelayF & ~StallF, MisalignedHalfInstrF, MisalignedHalfInstrD);
// This flop is here to simulate pulling data out of the cache, which is edge-triggered
flopenr #(32) instrFlop(clk, reset, ~StallF, InstrF, AlignedInstrD);
// These flops cache the previous read, to accelerate things
flopenr #(`XLEN) lastReadDataFlop(clk, reset, InstrReadF & ~StallF, InstrInF, LastReadDataF);
flopenr #(1) lastReadDataVFlop(clk, reset, InstrReadF & ~StallF, 1'b1, LastReadDataValidF);
flopenr #(`XLEN) lastReadAdrFlop(clk, reset, InstrReadF & ~StallF, InstrPAdrF, LastReadAdrF);
// Decide which address needs to be fetched and sent out over InstrPAdrF
// If the requested address fits inside one read from memory, we fetch that
// address, adjusted to the bit width. Otherwise, we request the lower word
// and then the upper word, in that order.
generate
if (`XLEN == 32) begin
assign InstrPAdrF = PCPF[1] ? ((DelaySideF & ~CompressedF) ? {PCPF[31:2], 2'b00} : {PCPF[31:2], 2'b00}) : PCPF;
end else begin
assign InstrPAdrF = PCPF[2] ? (PCPF[1] ? ((DelaySideF & ~CompressedF) ? {PCPF[63:3]+1, 3'b000} : {PCPF[63:3], 3'b000}) : {PCPF[63:3], 3'b000}) : {PCPF[63:3], 3'b000};
end
endgenerate
// Read from memory if we don't have the address we want
always_comb if (LastReadDataValidF & (InstrPAdrF == LastReadAdrF)) begin
assign InstrReadF = 0;
end else begin
assign InstrReadF = 1;
end
// Pick from the memory input or from the previous read, as appropriate
mux2 #(`XLEN) inDataMux(LastReadDataF, InstrInF, InstrReadF, InDataF);
// If the instruction fits in one memory read, then we put the right bits
// into InstrF. Otherwise, we activate DelayF to signal the rest of the
// machinery to swizzle bits.
generate
if (`XLEN == 32) begin
assign InstrF = PCPF[1] ? {16'b0, InDataF[31:16]} : InDataF;
assign DelayF = PCPF[1];
assign MisalignedHalfInstrF = InDataF[31:16];
end else begin
assign InstrF = PCPF[2] ? (PCPF[1] ? {16'b0, InDataF[63:48]} : InDataF[63:32]) : (PCPF[1] ? InDataF[47:16] : InDataF[31:0]);
assign DelayF = PCPF[1] && PCPF[2];
assign MisalignedHalfInstrF = InDataF[63:48];
end
endgenerate
// We will likely need to stall later, but stalls are handled by the rest of the pipeline for now
assign ICacheStallF = 0;
// Detect if the instruction is compressed
assign CompressedF = InstrF[1:0] != 2'b11;
// Pick the correct output, depending on whether we have to assemble this
// instruction from two reads or not.
// Output the requested instruction (we don't need to worry if the read is
// incomplete, since the pipeline stalls for us when it isn't), or a NOP for
// the cycle when the first of two reads comes in.
always_comb if (~FlushDLastCyclen) begin
assign InstrDMuxChoice = 2'b10;
end else if (DelayD & (MisalignedHalfInstrD[1:0] != 2'b11)) begin
assign InstrDMuxChoice = 2'b11;
end else begin
assign InstrDMuxChoice = {1'b0, DelayD};
end
mux4 #(32) instrDMux (AlignedInstrD, {InstrInF[15:0], MisalignedHalfInstrD}, nop, {16'b0, MisalignedHalfInstrD}, InstrDMuxChoice, InstrRawD);
endmodule

View File

@ -2,7 +2,7 @@
// ifu.sv
//
// Written: David_Harris@hmc.edu 9 January 2021
// Modified:
// Modified:
//
// Purpose: Instrunction Fetch Unit
// PC, branch prediction, instruction cache
@ -35,6 +35,7 @@ module ifu (
output logic [`XLEN-1:0] PCF,
output logic [`XLEN-1:0] InstrPAdrF,
output logic InstrReadF,
output logic ICacheStallF,
// Decode
// Execute
output logic [`XLEN-1:0] PCLinkE,
@ -61,27 +62,25 @@ module ifu (
input logic [`XLEN-1:0] PageTableEntryF,
input logic [`XLEN-1:0] SATP_REGW,
input logic ITLBWriteF, // ITLBFlushF,
output logic ITLBMissF, ITLBHitF,
// bogus
input logic [15:0] rd2
output logic ITLBMissF, ITLBHitF
);
logic [`XLEN-1:0] UnalignedPCNextF, PCNextF;
logic misaligned, BranchMisalignedFaultE, BranchMisalignedFaultM, TrapMisalignedFaultM;
logic PrivilegedChangePCM;
logic IllegalCompInstrD;
logic [`XLEN-1:0] PCPlusUpperF, PCPlus2or4F, PCD, PCW, PCLinkD, PCLinkM;
logic CompressedF;
logic [31:0] InstrF, InstrRawD, InstrE, InstrW;
logic [31:0] nop = 32'h00000013; // instruction for NOP
logic misaligned, BranchMisalignedFaultE, BranchMisalignedFaultM, TrapMisalignedFaultM;
logic PrivilegedChangePCM;
logic IllegalCompInstrD;
logic [`XLEN-1:0] PCPlusUpperF, PCPlus2or4F, PCD, PCW, PCLinkD, PCLinkM, PCPF;
logic CompressedF;
logic [31:0] InstrRawD, InstrE, InstrW;
logic [31:0] nop = 32'h00000013; // instruction for NOP
logic [`XLEN-1:0] ITLBInstrPAdrF, ICacheInstrPAdrF;
// *** temporary hack until walker is hooked up -- Thomas F
// logic [`XLEN-1:0] PageTableEntryF = '0;
logic ITLBFlushF = '0;
// logic ITLBWriteF = '0;
tlb #(3) itlb(clk, reset, SATP_REGW, PrivilegeModeW, PCF, PageTableEntryF, ITLBWriteF, ITLBFlushF,
InstrPAdrF, ITLBMissF, ITLBHitF);
ITLBInstrPAdrF, ITLBMissF, ITLBHitF);
// branch predictor signals
logic SelBPPredF;
@ -92,11 +91,21 @@ module ifu (
// *** put memory interface on here, InstrF becomes output
//assign InstrPAdrF = PCF; // *** no MMU
//assign InstrReadF = ~StallD; // *** & ICacheMissF; add later
assign InstrReadF = 1; // *** & ICacheMissF; add later
// assign InstrReadF = 1; // *** & ICacheMissF; add later
// jarred 2021-03-14 Add instrution cache block to remove rd2
assign PCPF = PCF; // Temporary workaround until iTLB is live
icache ic(
.*,
.InstrPAdrF(ICacheInstrPAdrF),
.UpperPCPF(PCPF[`XLEN-1:12]),
.LowerPCF(PCF[11:0])
);
// Prioritize the iTLB for reads if it wants one
mux2 #(`XLEN) instrPAdrMux(ICacheInstrPAdrF, ITLBInstrPAdrF, ITLBMissF, InstrPAdrF);
assign PrivilegedChangePCM = RetM | TrapM;
//mux3 #(`XLEN) pcmux(PCPlus2or4F, PCCorrectE, PrivilegedNextPCM, {PrivilegedChangePCM, BPPredWrongE}, UnalignedPCNextF);
mux2 #(`XLEN) pcmux0(.d0(PCPlus2or4F),
.d1(BPPredPCF),
@ -114,7 +123,7 @@ module ifu (
.y(UnalignedPCNextF));
assign PCNextF = {UnalignedPCNextF[`XLEN-1:1], 1'b0}; // hart-SPEC p. 21 about 16-bit alignment
flopenl #(`XLEN) pcreg(clk, reset, ~StallF, PCNextF, `RESET_VECTOR, PCF);
flopenl #(`XLEN) pcreg(clk, reset, ~StallF & ~ICacheStallF, PCNextF, `RESET_VECTOR, PCF);
// branch and jump predictor
// I am making the port connection explicit for now as I want to see them and they will be changing.
@ -141,9 +150,7 @@ module ifu (
// pcadder
// add 2 or 4 to the PC, based on whether the instruction is 16 bits or 32
assign CompressedF = (InstrF[1:0] != 2'b11); // is it a 16-bit compressed instruction?
assign PCPlusUpperF = PCF[`XLEN-1:2] + 1; // add 4 to PC
// choose PC+2 or PC+4
always_comb
if (CompressedF) // add 2
@ -151,18 +158,7 @@ module ifu (
else PCPlus2or4F = {PCF[`XLEN-1:2], 2'b10};
else PCPlus2or4F = {PCPlusUpperF, PCF[1:0]}; // add 4
// harris 2/23/21 Add code to fetch instruction split across two words
generate
if (`XLEN==32) begin
assign InstrF = PCF[1] ? {rd2[15:0], InstrInF[31:16]} : InstrInF;
end else begin
assign InstrF = PCF[2] ? (PCF[1] ? {rd2[15:0], InstrInF[63:48]} : InstrInF[63:32])
: (PCF[1] ? InstrInF[47:16] : InstrInF[31:0]);
end
endgenerate
// Decode stage pipeline register and logic
flopenl #(32) InstrDReg(clk, reset, ~StallD | FlushD, (FlushD ? nop : InstrF), nop, InstrRawD);
flopenrc #(`XLEN) PCDReg(clk, reset, FlushD, ~StallD, PCF, PCD);
// expand 16-bit compressed instructions to 32 bits
@ -196,10 +192,10 @@ module ifu (
flopenr #(32) InstrEReg(clk, reset, ~StallE, FlushE ? nop : InstrD, InstrE);
flopenr #(32) InstrMReg(clk, reset, ~StallM, FlushM ? nop : InstrE, InstrM);
flopenr #(32) InstrWReg(clk, reset, ~StallW, FlushW ? nop : InstrM, InstrW); // just for testbench, delete later
// flopenr #(32) InstrWReg(clk, reset, ~StallW, FlushW ? nop : InstrM, InstrW); // just for testbench, delete later
flopenr #(`XLEN) PCEReg(clk, reset, ~StallE, PCD, PCE);
flopenr #(`XLEN) PCMReg(clk, reset, ~StallM, PCE, PCM);
flopenr #(`XLEN) PCWReg(clk, reset, ~StallW, PCM, PCW); // *** probably not needed; delete later
// flopenr #(`XLEN) PCWReg(clk, reset, ~StallW, PCM, PCW); // *** probably not needed; delete later
flopenrc #(4) InstrClassRegE(.clk(clk),
.reset(reset),

1535
wally-pipelined/src/muldiv/div.sv Executable file

File diff suppressed because it is too large Load Diff

1921
wally-pipelined/src/muldiv/div.sv~ Executable file

File diff suppressed because it is too large Load Diff

View File

@ -50,8 +50,9 @@ module int32div (Q, done, divdone, rem0, div0, N, D, clk, reset, start);
// #iter: N = m+v+s = m+(s+2) = m+2+s (mod k = 0)
// v = 2 since \rho < 1 (add 4 to make sure its a ceil)
bk6 cpa1 (co1, Num, {1'b0, P},
{3'h0, shiftResult, ~shiftResult,1'b0}, 1'b0);
adder #(6) cpa1 ({1'b0, P},
{3'h0, shiftResult, ~shiftResult,1'b0},
Num);
// Determine whether need to add just Q/Rem
assign shiftResult = P[0];
@ -160,10 +161,10 @@ module divide4x32 (Q, rem0, quotient, op1, op2, clk, reset, state0,
otfzero, enable, Qstar, QMstar);
// Correction and generation of Remainder
add36 cpa2 (cout1, rem1, SumN2[35:0], CarryN2[35:0], 1'b0);
adder #(36) cpa2 (SumN2[35:0], CarryN2[35:0], rem1);
// Add back +D as correction
csa #(36) csa2 (CarryN2[35:0], SumN2[35:0], divi1, SumR, CarryR);
add36 cpa3 (cout2, rem2, SumR, CarryR, 1'b0);
adder #(36) cpa3 (SumR, CarryR, rem2);
// Choose remainder (Rem or Rem+D)
mux2 #(36) mx6 (rem1, rem2, rem1[35], rem3);
// Choose correct Q or QM
@ -349,306 +350,7 @@ module floprc #(parameter WIDTH = 8)
endmodule // qst4
// Ladner-Fischer Prefix Adder
module add36 (cout, sum, a, b, cin);
input logic [35:0] a, b;
input logic cin;
output logic [35:0] sum;
output logic cout;
logic [36:0] p,g;
logic [35:0] c;
// pre-computation
assign p={a^b,1'b0};
assign g={a&b, cin};
// prefix tree
ladner_fischer36 prefix_tree(c, p[35:0], g[35:0]);
// post-computation
assign sum=p[36:1]^c;
assign cout=g[36]|(p[36]&c[35]);
endmodule // add36
module ladner_fischer36 (c, p, g);
input logic [35:0] p;
input logic [35:0] g;
output logic [36:1] c;
// parallel-prefix, Ladner-Fischer
// Stage 1: Generates G/P pairs that span 1 bits
grey b_1_0 (G_1_0, {g[1],g[0]}, p[1]);
black b_3_2 (G_3_2, P_3_2, {g[3],g[2]}, {p[3],p[2]});
black b_5_4 (G_5_4, P_5_4, {g[5],g[4]}, {p[5],p[4]});
black b_7_6 (G_7_6, P_7_6, {g[7],g[6]}, {p[7],p[6]});
black b_9_8 (G_9_8, P_9_8, {g[9],g[8]}, {p[9],p[8]});
black b_11_10 (G_11_10, P_11_10, {g[11],g[10]}, {p[11],p[10]});
black b_13_12 (G_13_12, P_13_12, {g[13],g[12]}, {p[13],p[12]});
black b_15_14 (G_15_14, P_15_14, {g[15],g[14]}, {p[15],p[14]});
black b_17_16 (G_17_16, P_17_16, {g[17],g[16]}, {p[17],p[16]});
black b_19_18 (G_19_18, P_19_18, {g[19],g[18]}, {p[19],p[18]});
black b_21_20 (G_21_20, P_21_20, {g[21],g[20]}, {p[21],p[20]});
black b_23_22 (G_23_22, P_23_22, {g[23],g[22]}, {p[23],p[22]});
black b_25_24 (G_25_24, P_25_24, {g[25],g[24]}, {p[25],p[24]});
black b_27_26 (G_27_26, P_27_26, {g[27],g[26]}, {p[27],p[26]});
black b_29_28 (G_29_28, P_29_28, {g[29],g[28]}, {p[29],p[28]});
black b_31_30 (G_31_30, P_31_30, {g[31],g[30]}, {p[31],p[30]});
black b_33_32 (G_33_32, P_33_32, {g[33],g[32]}, {p[33],p[32]});
black b_35_34 (G_35_34, P_35_34, {g[35],g[34]}, {p[35],p[34]});
// Stage 2: Generates G/P pairs that span 2 bits
grey g_3_0 (G_3_0, {G_3_2,G_1_0}, P_3_2);
black b_7_4 (G_7_4, P_7_4, {G_7_6,G_5_4}, {P_7_6,P_5_4});
black b_11_8 (G_11_8, P_11_8, {G_11_10,G_9_8}, {P_11_10,P_9_8});
black b_15_12 (G_15_12, P_15_12, {G_15_14,G_13_12}, {P_15_14,P_13_12});
black b_19_16 (G_19_16, P_19_16, {G_19_18,G_17_16}, {P_19_18,P_17_16});
black b_23_20 (G_23_20, P_23_20, {G_23_22,G_21_20}, {P_23_22,P_21_20});
black b_27_24 (G_27_24, P_27_24, {G_27_26,G_25_24}, {P_27_26,P_25_24});
black b_31_28 (G_31_28, P_31_28, {G_31_30,G_29_28}, {P_31_30,P_29_28});
black b_35_32 (G_35_32, P_35_32, {G_35_34,G_33_32}, {P_35_34,P_33_32});
// Stage 3: Generates G/P pairs that span 4 bits
grey g_5_0 (G_5_0, {G_5_4,G_3_0}, P_5_4);
grey g_7_0 (G_7_0, {G_7_4,G_3_0}, P_7_4);
black b_13_8 (G_13_8, P_13_8, {G_13_12,G_11_8}, {P_13_12,P_11_8});
black b_15_8 (G_15_8, P_15_8, {G_15_12,G_11_8}, {P_15_12,P_11_8});
black b_21_16 (G_21_16, P_21_16, {G_21_20,G_19_16}, {P_21_20,P_19_16});
black b_23_16 (G_23_16, P_23_16, {G_23_20,G_19_16}, {P_23_20,P_19_16});
black b_29_24 (G_29_24, P_29_24, {G_29_28,G_27_24}, {P_29_28,P_27_24});
black b_31_24 (G_31_24, P_31_24, {G_31_28,G_27_24}, {P_31_28,P_27_24});
black b_37_32 (G_37_32, P_37_32, {G_37_36,G_35_32}, {P_37_36,P_35_32});
black b_39_32 (G_39_32, P_39_32, {G_39_36,G_35_32}, {P_39_36,P_35_32});
// Stage 4: Generates G/P pairs that span 8 bits
grey g_9_0 (G_9_0, {G_9_8,G_7_0}, P_9_8);
grey g_11_0 (G_11_0, {G_11_8,G_7_0}, P_11_8);
grey g_13_0 (G_13_0, {G_13_8,G_7_0}, P_13_8);
grey g_15_0 (G_15_0, {G_15_8,G_7_0}, P_15_8);
black b_25_16 (G_25_16, P_25_16, {G_25_24,G_23_16}, {P_25_24,P_23_16});
black b_27_16 (G_27_16, P_27_16, {G_27_24,G_23_16}, {P_27_24,P_23_16});
black b_29_16 (G_29_16, P_29_16, {G_29_24,G_23_16}, {P_29_24,P_23_16});
black b_31_16 (G_31_16, P_31_16, {G_31_24,G_23_16}, {P_31_24,P_23_16});
black b_41_32 (G_41_32, P_41_32, {G_41_40,G_39_32}, {P_41_40,P_39_32});
black b_43_32 (G_43_32, P_43_32, {G_43_40,G_39_32}, {P_43_40,P_39_32});
black b_45_32 (G_45_32, P_45_32, {G_45_40,G_39_32}, {P_45_40,P_39_32});
black b_47_32 (G_47_32, P_47_32, {G_47_40,G_39_32}, {P_47_40,P_39_32});
// Stage 5: Generates G/P pairs that span 16 bits
grey g_17_0 (G_17_0, {G_17_16,G_15_0}, P_17_16);
grey g_19_0 (G_19_0, {G_19_16,G_15_0}, P_19_16);
grey g_21_0 (G_21_0, {G_21_16,G_15_0}, P_21_16);
grey g_23_0 (G_23_0, {G_23_16,G_15_0}, P_23_16);
grey g_25_0 (G_25_0, {G_25_16,G_15_0}, P_25_16);
grey g_27_0 (G_27_0, {G_27_16,G_15_0}, P_27_16);
grey g_29_0 (G_29_0, {G_29_16,G_15_0}, P_29_16);
grey g_31_0 (G_31_0, {G_31_16,G_15_0}, P_31_16);
black b_49_32 (G_49_32, P_49_32, {G_49_48,G_47_32}, {P_49_48,P_47_32});
black b_51_32 (G_51_32, P_51_32, {G_51_48,G_47_32}, {P_51_48,P_47_32});
black b_53_32 (G_53_32, P_53_32, {G_53_48,G_47_32}, {P_53_48,P_47_32});
black b_55_32 (G_55_32, P_55_32, {G_55_48,G_47_32}, {P_55_48,P_47_32});
black b_57_32 (G_57_32, P_57_32, {G_57_48,G_47_32}, {P_57_48,P_47_32});
black b_59_32 (G_59_32, P_59_32, {G_59_48,G_47_32}, {P_59_48,P_47_32});
black b_61_32 (G_61_32, P_61_32, {G_61_48,G_47_32}, {P_61_48,P_47_32});
black b_63_32 (G_63_32, P_63_32, {G_63_48,G_47_32}, {P_63_48,P_47_32});
// Stage 6: Generates G/P pairs that span 32 bits
grey g_33_0 (G_33_0, {G_33_32,G_31_0}, P_33_32);
grey g_35_0 (G_35_0, {G_35_32,G_31_0}, P_35_32);
grey g_37_0 (G_37_0, {G_37_32,G_31_0}, P_37_32);
grey g_39_0 (G_39_0, {G_39_32,G_31_0}, P_39_32);
grey g_41_0 (G_41_0, {G_41_32,G_31_0}, P_41_32);
grey g_43_0 (G_43_0, {G_43_32,G_31_0}, P_43_32);
grey g_45_0 (G_45_0, {G_45_32,G_31_0}, P_45_32);
grey g_47_0 (G_47_0, {G_47_32,G_31_0}, P_47_32);
grey g_49_0 (G_49_0, {G_49_32,G_31_0}, P_49_32);
grey g_51_0 (G_51_0, {G_51_32,G_31_0}, P_51_32);
grey g_53_0 (G_53_0, {G_53_32,G_31_0}, P_53_32);
grey g_55_0 (G_55_0, {G_55_32,G_31_0}, P_55_32);
grey g_57_0 (G_57_0, {G_57_32,G_31_0}, P_57_32);
grey g_59_0 (G_59_0, {G_59_32,G_31_0}, P_59_32);
grey g_61_0 (G_61_0, {G_61_32,G_31_0}, P_61_32);
grey g_63_0 (G_63_0, {G_63_32,G_31_0}, P_63_32);
// Extra grey cell stage
grey g_2_0 (G_2_0, {g[2],G_1_0}, p[2]);
grey g_4_0 (G_4_0, {g[4],G_3_0}, p[4]);
grey g_6_0 (G_6_0, {g[6],G_5_0}, p[6]);
grey g_8_0 (G_8_0, {g[8],G_7_0}, p[8]);
grey g_10_0 (G_10_0, {g[10],G_9_0}, p[10]);
grey g_12_0 (G_12_0, {g[12],G_11_0}, p[12]);
grey g_14_0 (G_14_0, {g[14],G_13_0}, p[14]);
grey g_16_0 (G_16_0, {g[16],G_15_0}, p[16]);
grey g_18_0 (G_18_0, {g[18],G_17_0}, p[18]);
grey g_20_0 (G_20_0, {g[20],G_19_0}, p[20]);
grey g_22_0 (G_22_0, {g[22],G_21_0}, p[22]);
grey g_24_0 (G_24_0, {g[24],G_23_0}, p[24]);
grey g_26_0 (G_26_0, {g[26],G_25_0}, p[26]);
grey g_28_0 (G_28_0, {g[28],G_27_0}, p[28]);
grey g_30_0 (G_30_0, {g[30],G_29_0}, p[30]);
grey g_32_0 (G_32_0, {g[32],G_31_0}, p[32]);
grey g_34_0 (G_34_0, {g[34],G_33_0}, p[34]);
// Final Stage: Apply c_k+1=G_k_0
assign c[1]=g[0];
assign c[2]=G_1_0;
assign c[3]=G_2_0;
assign c[4]=G_3_0;
assign c[5]=G_4_0;
assign c[6]=G_5_0;
assign c[7]=G_6_0;
assign c[8]=G_7_0;
assign c[9]=G_8_0;
assign c[10]=G_9_0;
assign c[11]=G_10_0;
assign c[12]=G_11_0;
assign c[13]=G_12_0;
assign c[14]=G_13_0;
assign c[15]=G_14_0;
assign c[16]=G_15_0;
assign c[17]=G_16_0;
assign c[18]=G_17_0;
assign c[19]=G_18_0;
assign c[20]=G_19_0;
assign c[21]=G_20_0;
assign c[22]=G_21_0;
assign c[23]=G_22_0;
assign c[24]=G_23_0;
assign c[25]=G_24_0;
assign c[26]=G_25_0;
assign c[27]=G_26_0;
assign c[28]=G_27_0;
assign c[29]=G_28_0;
assign c[30]=G_29_0;
assign c[31]=G_30_0;
assign c[32]=G_31_0;
assign c[33]=G_32_0;
assign c[34]=G_33_0;
assign c[35]=G_34_0;
assign c[36]=G_35_0;
endmodule // ladner_fischer36
// Brent-Kung Prefix Adder
module bk6 (cout, sum, a, b, cin);
input logic [5:0] a, b;
input logic cin;
output logic [5:0] sum;
output logic cout;
logic [6:0] p,g;
logic [5:0] c;
// pre-computation
assign p={a^b,1'b0};
assign g={a&b, cin};
// prefix tree
brent_kung prefix_tree(c, p[5:0], g[5:0]);
// post-computation
assign sum=p[6:1]^c;
assign cout=g[6]|(p[6]&c[5]);
endmodule // bk6
module brent_kung (c, p, g);
input logic [5:0] p;
input logic [5:0] g;
output logic [6:1] c;
// parallel-prefix, Brent-Kung
// Stage 1: Generates G/P pairs that span 1 bits
grey b_1_0 (G_1_0, {g[1],g[0]}, p[1]);
black b_3_2 (G_3_2, P_3_2, {g[3],g[2]}, {p[3],p[2]});
black b_5_4 (G_5_4, P_5_4, {g[5],g[4]}, {p[5],p[4]});
// Stage 2: Generates G/P pairs that span 2 bits
grey g_3_0 (G_3_0, {G_3_2,G_1_0}, P_3_2);
// Stage 3: Generates G/P pairs that span 4 bits
// Stage 4: Generates G/P pairs that span 2 bits
grey g_5_0 (G_5_0, {G_5_4,G_3_0}, P_5_4);
// Last grey cell stage
grey g_2_0 (G_2_0, {g[2],G_1_0}, p[2]);
grey g_4_0 (G_4_0, {g[4],G_3_0}, p[4]);
// Final Stage: Apply c_k+1=G_k_0
assign c[1]=g[0];
assign c[2]=G_1_0;
assign c[3]=G_2_0;
assign c[4]=G_3_0;
assign c[5]=G_4_0;
assign c[6]=G_5_0;
endmodule // brent_kung
// Black cell
module black (gout, pout, gin, pin);
input logic [1:0] gin, pin;
output logic gout, pout;
assign pout=pin[1]&pin[0];
assign gout=gin[1]|(pin[1]&gin[0]);
endmodule // black
// Grey cell
module grey (gout, gin, pin);
input logic [1:0] gin;
input logic pin;
output logic gout;
assign gout=gin[1]|(pin&gin[0]);
endmodule // grey
// reduced Black cell
module rblk (hout, iout, gin, pin);
input logic [1:0] gin, pin;
output logic hout, iout;
assign iout=pin[1]&pin[0];
assign hout=gin[1]|gin[0];
endmodule
// reduced Grey cell
module rgry (hout, gin);
input logic [1:0] gin;
output logic hout;
assign hout=gin[1]|gin[0];
endmodule // rgry
// LZD
module lz2 (P, V, B0, B1);
input logic B0;
@ -754,6 +456,8 @@ module lz32 (ZP, ZV, B);
endmodule // lz32
// FSM Control for Integer Divider
module fsm32 (en, state0, done, divdone, otfzero,
start, error, NumIter, clk, reset);

View File

@ -51,8 +51,9 @@ module int64div (Q, done, divdone, rem0, div0, N, D, clk, reset, start);
// #iter: N = m+v+s = m+(s+2) = m+2+s (mod k = 0)
// v = 2 since \rho < 1 (add 4 to make sure its a ceil)
bk8 cpa1 (co1, Num, {2'b0, P},
{5'h0, shiftResult, ~shiftResult, 1'b0}, 1'b0);
adder #(8) cpa1 ({2'b0, P},
{5'h0, shiftResult, ~shiftResult, 1'b0},
Num);
// Determine whether need to add just Q/Rem
assign shiftResult = P[0];
@ -71,7 +72,7 @@ module int64div (Q, done, divdone, rem0, div0, N, D, clk, reset, start);
flopr #(1) regc (~clk, reset, otfzerov, otfzero);
flopr #(1) regd (~clk, reset, enablev, enable);
flopr #(1) rege (~clk, reset, state0v, state0);
// To obtain a correct remainder the last bit of the
// quotient has to be aligned with a radix-r boundary.
// Since the quotient is in the range 1/2 < q < 2 (one
@ -161,10 +162,10 @@ module divide4x64 (Q, rem0, quotient, op1, op2, clk, reset, state0,
otfzero, enable, Qstar, QMstar);
// Correction and generation of Remainder
add68 cpa2 (cout1, rem1, SumN2[67:0], CarryN2[67:0], 1'b0);
adder #(68) cpa2 (SumN2[67:0], CarryN2[67:0], rem1);
// Add back +D as correction
csa #(68) csa2 (CarryN2[67:0], SumN2[67:0], divi1, SumR, CarryR);
add68 cpa3 (cout2, rem2, SumR, CarryR, 1'b0);
adder #(68) cpa3 (SumR, CarryR, rem2);
// Choose remainder (Rem or Rem+D)
mux2 #(68) mx6 (rem1, rem2, rem1[67], rem3);
// Choose correct Q or QM
@ -177,22 +178,25 @@ endmodule // divide4x64
module ls_control (quot, Qin, QMin, CshiftQ, CshiftQM);
input logic [3:0] quot;
input logic [3:0] quot;
output logic [1:0] Qin;
output logic [1:0] QMin;
output logic CshiftQ;
output logic CshiftQM;
output logic [1:0] Qin;
output logic [1:0] QMin;
output logic CshiftQ;
output logic CshiftQM;
assign Qin[1] = (quot[1]) | (quot[3]) | (quot[0]);
assign Qin[0] = (quot[1]) | (quot[2]);
assign QMin[1] = (quot[1]) | (!quot[3]&!quot[2]&!quot[1]&!quot[0]);
assign QMin[0] = (quot[3]) | (quot[0]) |
(!quot[3]&!quot[2]&!quot[1]&!quot[0]);
assign CshiftQ = (quot[1]) | (quot[0]);
assign CshiftQM = (quot[3]) | (quot[2]);
// Load/Store Control for OTF
assign Qin[1] = (quot[1]) | (quot[3]) | (quot[0]);
assign Qin[0] = (quot[1]) | (quot[2]);
assign QMin[1] = (quot[1]) | (!quot[3]&!quot[2]&!quot[1]&!quot[0]);
assign QMin[0] = (quot[3]) | (quot[0]) |
(!quot[3]&!quot[2]&!quot[1]&!quot[0]);
assign CshiftQ = (quot[1]) | (quot[0]);
assign CshiftQM = (quot[3]) | (quot[2]);
endmodule
endmodule
// On-the-fly Conversion per Ercegovac/Lang
module otf #(parameter WIDTH=8)
(Qin, QMin, CshiftQ, CshiftQM, clk, reset, enable, R2Q, R1Q);
@ -219,44 +223,44 @@ module otf #(parameter WIDTH=8)
assign Qstar = R2Q;
assign QMstar = R1Q;
endmodule // otf8
endmodule // otf8
module adder #(parameter WIDTH=8) (input logic [WIDTH-1:0] a, b,
output logic [WIDTH-1:0] y);
module adder #(parameter WIDTH=8) (input logic [WIDTH-1:0] a, b,
output logic [WIDTH-1:0] y);
assign y = a + b;
assign y = a + b;
endmodule // adder
endmodule // adder
module fa (input logic a, b, c, output logic sum, carry);
module fa (input logic a, b, c, output logic sum, carry);
assign sum = a^b^c;
assign carry = a&b|a&c|b&c;
assign sum = a^b^c;
assign carry = a&b|a&c|b&c;
endmodule // fa
endmodule // fa
module csa #(parameter WIDTH=8) (input logic [WIDTH-1:0] a, b, c,
output logic [WIDTH-1:0] sum, carry);
module csa #(parameter WIDTH=8) (input logic [WIDTH-1:0] a, b, c,
output logic [WIDTH-1:0] sum, carry);
logic [WIDTH:0] carry_temp;
genvar i;
generate
for (i=0;i<WIDTH;i=i+1)
begin : genbit
fa fa_inst (a[i], b[i], c[i], sum[i], carry_temp[i+1]);
end
endgenerate
assign carry = {1'b0, carry_temp[WIDTH-1:1], 1'b0};
logic [WIDTH:0] carry_temp;
genvar i;
generate
for (i=0;i<WIDTH;i=i+1)
begin : genbit
fa fa_inst (a[i], b[i], c[i], sum[i], carry_temp[i+1]);
end
endgenerate
assign carry = {1'b0, carry_temp[WIDTH-1:1], 1'b0};
endmodule // adder
endmodule // adder
module flopenr #(parameter WIDTH = 8)
(input logic clk, reset, en,
input logic [WIDTH-1:0] d, output logic [WIDTH-1:0] q);
always_ff @(posedge clk, posedge reset)
if (reset) q <= 0;
else if (en) q <= d;
always_ff @(posedge clk, posedge reset)
if (reset) q <= 0;
else if (en) q <= d;
endmodule // flopenr
@ -264,9 +268,9 @@ module flopr #(parameter WIDTH = 8)
(input logic clk, reset, input
logic [WIDTH-1:0] d, output logic [WIDTH-1:0] q);
always_ff @(posedge clk, posedge reset)
if (reset) q <= 0;
else q <= d;
always_ff @(posedge clk, posedge reset)
if (reset) q <= 0;
else q <= d;
endmodule // flopr
@ -274,12 +278,12 @@ module flopenrc #(parameter WIDTH = 8)
(input logic clk, reset, en, clear,
input logic [WIDTH-1:0] d, output logic [WIDTH-1:0] q);
always_ff @(posedge clk, posedge reset)
if (reset) q <= 0;
else
if (en)
if (clear) q <= 0;
else q <= d;
always_ff @(posedge clk, posedge reset)
if (reset) q <= 0;
else
if (en)
if (clear) q <= 0;
else q <= d;
endmodule // flopenrc
@ -287,13 +291,13 @@ module floprc #(parameter WIDTH = 8)
(input logic clk, reset, clear,
input logic [WIDTH-1:0] d, output logic [WIDTH-1:0] q);
always_ff @(posedge clk, posedge reset)
if (reset) q <= 0;
else
if (clear) q <= 0;
else q <= d;
always_ff @(posedge clk, posedge reset)
if (reset) q <= 0;
else
if (clear) q <= 0;
else q <= d;
endmodule // floprc
endmodule // floprc
module eqcmp #(parameter WIDTH = 8)
(input logic [WIDTH-1:0] a, b,
@ -349,483 +353,7 @@ module qst4 (input logic [6:0] s, input logic [2:0] d,
endmodule // qst4
// Ladner-Fischer Prefix Adder
module add68 (cout, sum, a, b, cin);
input logic [67:0] a, b;
input logic cin;
output logic [67:0] sum;
output logic cout;
logic [68:0] p,g;
logic [67:0] c;
// pre-computation
assign p={a^b, 1'b0};
assign g={a&b, cin};
// prefix tree
ladner_fischer68 prefix_tree(c, p[67:0], g[67:0]);
// post-computation
assign sum=p[68:1]^c;
assign cout=g[68]|(p[68]&c[67]);
endmodule
module ladner_fischer68 (c, p, g);
input logic [67:0] p;
input logic [67:0] g;
output logic [68:1] c;
// parallel-prefix, Ladner-Fischer
// Stage 1: Generates G/P pairs that span 1 bits
grey b_1_0 (G_1_0, {g[1],g[0]}, p[1]);
black b_3_2 (G_3_2, P_3_2, {g[3],g[2]}, {p[3],p[2]});
black b_5_4 (G_5_4, P_5_4, {g[5],g[4]}, {p[5],p[4]});
black b_7_6 (G_7_6, P_7_6, {g[7],g[6]}, {p[7],p[6]});
black b_9_8 (G_9_8, P_9_8, {g[9],g[8]}, {p[9],p[8]});
black b_11_10 (G_11_10, P_11_10, {g[11],g[10]}, {p[11],p[10]});
black b_13_12 (G_13_12, P_13_12, {g[13],g[12]}, {p[13],p[12]});
black b_15_14 (G_15_14, P_15_14, {g[15],g[14]}, {p[15],p[14]});
black b_17_16 (G_17_16, P_17_16, {g[17],g[16]}, {p[17],p[16]});
black b_19_18 (G_19_18, P_19_18, {g[19],g[18]}, {p[19],p[18]});
black b_21_20 (G_21_20, P_21_20, {g[21],g[20]}, {p[21],p[20]});
black b_23_22 (G_23_22, P_23_22, {g[23],g[22]}, {p[23],p[22]});
black b_25_24 (G_25_24, P_25_24, {g[25],g[24]}, {p[25],p[24]});
black b_27_26 (G_27_26, P_27_26, {g[27],g[26]}, {p[27],p[26]});
black b_29_28 (G_29_28, P_29_28, {g[29],g[28]}, {p[29],p[28]});
black b_31_30 (G_31_30, P_31_30, {g[31],g[30]}, {p[31],p[30]});
black b_33_32 (G_33_32, P_33_32, {g[33],g[32]}, {p[33],p[32]});
black b_35_34 (G_35_34, P_35_34, {g[35],g[34]}, {p[35],p[34]});
black b_37_36 (G_37_36, P_37_36, {g[37],g[36]}, {p[37],p[36]});
black b_39_38 (G_39_38, P_39_38, {g[39],g[38]}, {p[39],p[38]});
black b_41_40 (G_41_40, P_41_40, {g[41],g[40]}, {p[41],p[40]});
black b_43_42 (G_43_42, P_43_42, {g[43],g[42]}, {p[43],p[42]});
black b_45_44 (G_45_44, P_45_44, {g[45],g[44]}, {p[45],p[44]});
black b_47_46 (G_47_46, P_47_46, {g[47],g[46]}, {p[47],p[46]});
black b_49_48 (G_49_48, P_49_48, {g[49],g[48]}, {p[49],p[48]});
black b_51_50 (G_51_50, P_51_50, {g[51],g[50]}, {p[51],p[50]});
black b_53_52 (G_53_52, P_53_52, {g[53],g[52]}, {p[53],p[52]});
black b_55_54 (G_55_54, P_55_54, {g[55],g[54]}, {p[55],p[54]});
black b_57_56 (G_57_56, P_57_56, {g[57],g[56]}, {p[57],p[56]});
black b_59_58 (G_59_58, P_59_58, {g[59],g[58]}, {p[59],p[58]});
black b_61_60 (G_61_60, P_61_60, {g[61],g[60]}, {p[61],p[60]});
black b_63_62 (G_63_62, P_63_62, {g[63],g[62]}, {p[63],p[62]});
black b_65_64 (G_65_64, P_65_64, {g[65],g[64]}, {p[65],p[64]});
black b_67_66 (G_67_66, P_67_66, {g[67],g[66]}, {p[67],p[66]});
// Stage 2: Generates G/P pairs that span 2 bits
grey g_3_0 (G_3_0, {G_3_2,G_1_0}, P_3_2);
black b_7_4 (G_7_4, P_7_4, {G_7_6,G_5_4}, {P_7_6,P_5_4});
black b_11_8 (G_11_8, P_11_8, {G_11_10,G_9_8}, {P_11_10,P_9_8});
black b_15_12 (G_15_12, P_15_12, {G_15_14,G_13_12}, {P_15_14,P_13_12});
black b_19_16 (G_19_16, P_19_16, {G_19_18,G_17_16}, {P_19_18,P_17_16});
black b_23_20 (G_23_20, P_23_20, {G_23_22,G_21_20}, {P_23_22,P_21_20});
black b_27_24 (G_27_24, P_27_24, {G_27_26,G_25_24}, {P_27_26,P_25_24});
black b_31_28 (G_31_28, P_31_28, {G_31_30,G_29_28}, {P_31_30,P_29_28});
black b_35_32 (G_35_32, P_35_32, {G_35_34,G_33_32}, {P_35_34,P_33_32});
black b_39_36 (G_39_36, P_39_36, {G_39_38,G_37_36}, {P_39_38,P_37_36});
black b_43_40 (G_43_40, P_43_40, {G_43_42,G_41_40}, {P_43_42,P_41_40});
black b_47_44 (G_47_44, P_47_44, {G_47_46,G_45_44}, {P_47_46,P_45_44});
black b_51_48 (G_51_48, P_51_48, {G_51_50,G_49_48}, {P_51_50,P_49_48});
black b_55_52 (G_55_52, P_55_52, {G_55_54,G_53_52}, {P_55_54,P_53_52});
black b_59_56 (G_59_56, P_59_56, {G_59_58,G_57_56}, {P_59_58,P_57_56});
black b_63_60 (G_63_60, P_63_60, {G_63_62,G_61_60}, {P_63_62,P_61_60});
black b_67_64 (G_67_64, P_67_64, {G_67_66,G_65_64}, {P_67_66,P_65_64});
// Stage 3: Generates G/P pairs that span 4 bits
grey g_5_0 (G_5_0, {G_5_4,G_3_0}, P_5_4);
grey g_7_0 (G_7_0, {G_7_4,G_3_0}, P_7_4);
black b_13_8 (G_13_8, P_13_8, {G_13_12,G_11_8}, {P_13_12,P_11_8});
black b_15_8 (G_15_8, P_15_8, {G_15_12,G_11_8}, {P_15_12,P_11_8});
black b_21_16 (G_21_16, P_21_16, {G_21_20,G_19_16}, {P_21_20,P_19_16});
black b_23_16 (G_23_16, P_23_16, {G_23_20,G_19_16}, {P_23_20,P_19_16});
black b_29_24 (G_29_24, P_29_24, {G_29_28,G_27_24}, {P_29_28,P_27_24});
black b_31_24 (G_31_24, P_31_24, {G_31_28,G_27_24}, {P_31_28,P_27_24});
black b_37_32 (G_37_32, P_37_32, {G_37_36,G_35_32}, {P_37_36,P_35_32});
black b_39_32 (G_39_32, P_39_32, {G_39_36,G_35_32}, {P_39_36,P_35_32});
black b_45_40 (G_45_40, P_45_40, {G_45_44,G_43_40}, {P_45_44,P_43_40});
black b_47_40 (G_47_40, P_47_40, {G_47_44,G_43_40}, {P_47_44,P_43_40});
black b_53_48 (G_53_48, P_53_48, {G_53_52,G_51_48}, {P_53_52,P_51_48});
black b_55_48 (G_55_48, P_55_48, {G_55_52,G_51_48}, {P_55_52,P_51_48});
black b_61_56 (G_61_56, P_61_56, {G_61_60,G_59_56}, {P_61_60,P_59_56});
black b_63_56 (G_63_56, P_63_56, {G_63_60,G_59_56}, {P_63_60,P_59_56});
black b_69_64 (G_69_64, P_69_64, {G_69_68,G_67_64}, {P_69_68,P_67_64});
black b_71_64 (G_71_64, P_71_64, {G_71_68,G_67_64}, {P_71_68,P_67_64});
// Stage 4: Generates G/P pairs that span 8 bits
grey g_9_0 (G_9_0, {G_9_8,G_7_0}, P_9_8);
grey g_11_0 (G_11_0, {G_11_8,G_7_0}, P_11_8);
grey g_13_0 (G_13_0, {G_13_8,G_7_0}, P_13_8);
grey g_15_0 (G_15_0, {G_15_8,G_7_0}, P_15_8);
black b_25_16 (G_25_16, P_25_16, {G_25_24,G_23_16}, {P_25_24,P_23_16});
black b_27_16 (G_27_16, P_27_16, {G_27_24,G_23_16}, {P_27_24,P_23_16});
black b_29_16 (G_29_16, P_29_16, {G_29_24,G_23_16}, {P_29_24,P_23_16});
black b_31_16 (G_31_16, P_31_16, {G_31_24,G_23_16}, {P_31_24,P_23_16});
black b_41_32 (G_41_32, P_41_32, {G_41_40,G_39_32}, {P_41_40,P_39_32});
black b_43_32 (G_43_32, P_43_32, {G_43_40,G_39_32}, {P_43_40,P_39_32});
black b_45_32 (G_45_32, P_45_32, {G_45_40,G_39_32}, {P_45_40,P_39_32});
black b_47_32 (G_47_32, P_47_32, {G_47_40,G_39_32}, {P_47_40,P_39_32});
black b_57_48 (G_57_48, P_57_48, {G_57_56,G_55_48}, {P_57_56,P_55_48});
black b_59_48 (G_59_48, P_59_48, {G_59_56,G_55_48}, {P_59_56,P_55_48});
black b_61_48 (G_61_48, P_61_48, {G_61_56,G_55_48}, {P_61_56,P_55_48});
black b_63_48 (G_63_48, P_63_48, {G_63_56,G_55_48}, {P_63_56,P_55_48});
black b_73_64 (G_73_64, P_73_64, {G_73_72,G_71_64}, {P_73_72,P_71_64});
black b_75_64 (G_75_64, P_75_64, {G_75_72,G_71_64}, {P_75_72,P_71_64});
black b_77_64 (G_77_64, P_77_64, {G_77_72,G_71_64}, {P_77_72,P_71_64});
black b_79_64 (G_79_64, P_79_64, {G_79_72,G_71_64}, {P_79_72,P_71_64});
// Stage 5: Generates G/P pairs that span 16 bits
grey g_17_0 (G_17_0, {G_17_16,G_15_0}, P_17_16);
grey g_19_0 (G_19_0, {G_19_16,G_15_0}, P_19_16);
grey g_21_0 (G_21_0, {G_21_16,G_15_0}, P_21_16);
grey g_23_0 (G_23_0, {G_23_16,G_15_0}, P_23_16);
grey g_25_0 (G_25_0, {G_25_16,G_15_0}, P_25_16);
grey g_27_0 (G_27_0, {G_27_16,G_15_0}, P_27_16);
grey g_29_0 (G_29_0, {G_29_16,G_15_0}, P_29_16);
grey g_31_0 (G_31_0, {G_31_16,G_15_0}, P_31_16);
black b_49_32 (G_49_32, P_49_32, {G_49_48,G_47_32}, {P_49_48,P_47_32});
black b_51_32 (G_51_32, P_51_32, {G_51_48,G_47_32}, {P_51_48,P_47_32});
black b_53_32 (G_53_32, P_53_32, {G_53_48,G_47_32}, {P_53_48,P_47_32});
black b_55_32 (G_55_32, P_55_32, {G_55_48,G_47_32}, {P_55_48,P_47_32});
black b_57_32 (G_57_32, P_57_32, {G_57_48,G_47_32}, {P_57_48,P_47_32});
black b_59_32 (G_59_32, P_59_32, {G_59_48,G_47_32}, {P_59_48,P_47_32});
black b_61_32 (G_61_32, P_61_32, {G_61_48,G_47_32}, {P_61_48,P_47_32});
black b_63_32 (G_63_32, P_63_32, {G_63_48,G_47_32}, {P_63_48,P_47_32});
black b_81_64 (G_81_64, P_81_64, {G_81_80,G_79_64}, {P_81_80,P_79_64});
black b_83_64 (G_83_64, P_83_64, {G_83_80,G_79_64}, {P_83_80,P_79_64});
black b_85_64 (G_85_64, P_85_64, {G_85_80,G_79_64}, {P_85_80,P_79_64});
black b_87_64 (G_87_64, P_87_64, {G_87_80,G_79_64}, {P_87_80,P_79_64});
black b_89_64 (G_89_64, P_89_64, {G_89_80,G_79_64}, {P_89_80,P_79_64});
black b_91_64 (G_91_64, P_91_64, {G_91_80,G_79_64}, {P_91_80,P_79_64});
black b_93_64 (G_93_64, P_93_64, {G_93_80,G_79_64}, {P_93_80,P_79_64});
black b_95_64 (G_95_64, P_95_64, {G_95_80,G_79_64}, {P_95_80,P_79_64});
// Stage 6: Generates G/P pairs that span 32 bits
grey g_33_0 (G_33_0, {G_33_32,G_31_0}, P_33_32);
grey g_35_0 (G_35_0, {G_35_32,G_31_0}, P_35_32);
grey g_37_0 (G_37_0, {G_37_32,G_31_0}, P_37_32);
grey g_39_0 (G_39_0, {G_39_32,G_31_0}, P_39_32);
grey g_41_0 (G_41_0, {G_41_32,G_31_0}, P_41_32);
grey g_43_0 (G_43_0, {G_43_32,G_31_0}, P_43_32);
grey g_45_0 (G_45_0, {G_45_32,G_31_0}, P_45_32);
grey g_47_0 (G_47_0, {G_47_32,G_31_0}, P_47_32);
grey g_49_0 (G_49_0, {G_49_32,G_31_0}, P_49_32);
grey g_51_0 (G_51_0, {G_51_32,G_31_0}, P_51_32);
grey g_53_0 (G_53_0, {G_53_32,G_31_0}, P_53_32);
grey g_55_0 (G_55_0, {G_55_32,G_31_0}, P_55_32);
grey g_57_0 (G_57_0, {G_57_32,G_31_0}, P_57_32);
grey g_59_0 (G_59_0, {G_59_32,G_31_0}, P_59_32);
grey g_61_0 (G_61_0, {G_61_32,G_31_0}, P_61_32);
grey g_63_0 (G_63_0, {G_63_32,G_31_0}, P_63_32);
black b_97_64 (G_97_64, P_97_64, {G_97_96,G_95_64}, {P_97_96,P_95_64});
black b_99_64 (G_99_64, P_99_64, {G_99_96,G_95_64}, {P_99_96,P_95_64});
black b_101_64 (G_101_64, P_101_64, {G_101_96,G_95_64}, {P_101_96,P_95_64});
black b_103_64 (G_103_64, P_103_64, {G_103_96,G_95_64}, {P_103_96,P_95_64});
black b_105_64 (G_105_64, P_105_64, {G_105_96,G_95_64}, {P_105_96,P_95_64});
black b_107_64 (G_107_64, P_107_64, {G_107_96,G_95_64}, {P_107_96,P_95_64});
black b_109_64 (G_109_64, P_109_64, {G_109_96,G_95_64}, {P_109_96,P_95_64});
black b_111_64 (G_111_64, P_111_64, {G_111_96,G_95_64}, {P_111_96,P_95_64});
black b_113_64 (G_113_64, P_113_64, {G_113_96,G_95_64}, {P_113_96,P_95_64});
black b_115_64 (G_115_64, P_115_64, {G_115_96,G_95_64}, {P_115_96,P_95_64});
black b_117_64 (G_117_64, P_117_64, {G_117_96,G_95_64}, {P_117_96,P_95_64});
black b_119_64 (G_119_64, P_119_64, {G_119_96,G_95_64}, {P_119_96,P_95_64});
black b_121_64 (G_121_64, P_121_64, {G_121_96,G_95_64}, {P_121_96,P_95_64});
black b_123_64 (G_123_64, P_123_64, {G_123_96,G_95_64}, {P_123_96,P_95_64});
black b_125_64 (G_125_64, P_125_64, {G_125_96,G_95_64}, {P_125_96,P_95_64});
black b_127_64 (G_127_64, P_127_64, {G_127_96,G_95_64}, {P_127_96,P_95_64});
// Stage 7: Generates G/P pairs that span 64 bits
grey g_65_0 (G_65_0, {G_65_64,G_63_0}, P_65_64);
grey g_67_0 (G_67_0, {G_67_64,G_63_0}, P_67_64);
grey g_69_0 (G_69_0, {G_69_64,G_63_0}, P_69_64);
grey g_71_0 (G_71_0, {G_71_64,G_63_0}, P_71_64);
grey g_73_0 (G_73_0, {G_73_64,G_63_0}, P_73_64);
grey g_75_0 (G_75_0, {G_75_64,G_63_0}, P_75_64);
grey g_77_0 (G_77_0, {G_77_64,G_63_0}, P_77_64);
grey g_79_0 (G_79_0, {G_79_64,G_63_0}, P_79_64);
grey g_81_0 (G_81_0, {G_81_64,G_63_0}, P_81_64);
grey g_83_0 (G_83_0, {G_83_64,G_63_0}, P_83_64);
grey g_85_0 (G_85_0, {G_85_64,G_63_0}, P_85_64);
grey g_87_0 (G_87_0, {G_87_64,G_63_0}, P_87_64);
grey g_89_0 (G_89_0, {G_89_64,G_63_0}, P_89_64);
grey g_91_0 (G_91_0, {G_91_64,G_63_0}, P_91_64);
grey g_93_0 (G_93_0, {G_93_64,G_63_0}, P_93_64);
grey g_95_0 (G_95_0, {G_95_64,G_63_0}, P_95_64);
grey g_97_0 (G_97_0, {G_97_64,G_63_0}, P_97_64);
grey g_99_0 (G_99_0, {G_99_64,G_63_0}, P_99_64);
grey g_101_0 (G_101_0, {G_101_64,G_63_0}, P_101_64);
grey g_103_0 (G_103_0, {G_103_64,G_63_0}, P_103_64);
grey g_105_0 (G_105_0, {G_105_64,G_63_0}, P_105_64);
grey g_107_0 (G_107_0, {G_107_64,G_63_0}, P_107_64);
grey g_109_0 (G_109_0, {G_109_64,G_63_0}, P_109_64);
grey g_111_0 (G_111_0, {G_111_64,G_63_0}, P_111_64);
grey g_113_0 (G_113_0, {G_113_64,G_63_0}, P_113_64);
grey g_115_0 (G_115_0, {G_115_64,G_63_0}, P_115_64);
grey g_117_0 (G_117_0, {G_117_64,G_63_0}, P_117_64);
grey g_119_0 (G_119_0, {G_119_64,G_63_0}, P_119_64);
grey g_121_0 (G_121_0, {G_121_64,G_63_0}, P_121_64);
grey g_123_0 (G_123_0, {G_123_64,G_63_0}, P_123_64);
grey g_125_0 (G_125_0, {G_125_64,G_63_0}, P_125_64);
grey g_127_0 (G_127_0, {G_127_64,G_63_0}, P_127_64);
// Extra grey cell stage
grey g_2_0 (G_2_0, {g[2],G_1_0}, p[2]);
grey g_4_0 (G_4_0, {g[4],G_3_0}, p[4]);
grey g_6_0 (G_6_0, {g[6],G_5_0}, p[6]);
grey g_8_0 (G_8_0, {g[8],G_7_0}, p[8]);
grey g_10_0 (G_10_0, {g[10],G_9_0}, p[10]);
grey g_12_0 (G_12_0, {g[12],G_11_0}, p[12]);
grey g_14_0 (G_14_0, {g[14],G_13_0}, p[14]);
grey g_16_0 (G_16_0, {g[16],G_15_0}, p[16]);
grey g_18_0 (G_18_0, {g[18],G_17_0}, p[18]);
grey g_20_0 (G_20_0, {g[20],G_19_0}, p[20]);
grey g_22_0 (G_22_0, {g[22],G_21_0}, p[22]);
grey g_24_0 (G_24_0, {g[24],G_23_0}, p[24]);
grey g_26_0 (G_26_0, {g[26],G_25_0}, p[26]);
grey g_28_0 (G_28_0, {g[28],G_27_0}, p[28]);
grey g_30_0 (G_30_0, {g[30],G_29_0}, p[30]);
grey g_32_0 (G_32_0, {g[32],G_31_0}, p[32]);
grey g_34_0 (G_34_0, {g[34],G_33_0}, p[34]);
grey g_36_0 (G_36_0, {g[36],G_35_0}, p[36]);
grey g_38_0 (G_38_0, {g[38],G_37_0}, p[38]);
grey g_40_0 (G_40_0, {g[40],G_39_0}, p[40]);
grey g_42_0 (G_42_0, {g[42],G_41_0}, p[42]);
grey g_44_0 (G_44_0, {g[44],G_43_0}, p[44]);
grey g_46_0 (G_46_0, {g[46],G_45_0}, p[46]);
grey g_48_0 (G_48_0, {g[48],G_47_0}, p[48]);
grey g_50_0 (G_50_0, {g[50],G_49_0}, p[50]);
grey g_52_0 (G_52_0, {g[52],G_51_0}, p[52]);
grey g_54_0 (G_54_0, {g[54],G_53_0}, p[54]);
grey g_56_0 (G_56_0, {g[56],G_55_0}, p[56]);
grey g_58_0 (G_58_0, {g[58],G_57_0}, p[58]);
grey g_60_0 (G_60_0, {g[60],G_59_0}, p[60]);
grey g_62_0 (G_62_0, {g[62],G_61_0}, p[62]);
grey g_64_0 (G_64_0, {g[64],G_63_0}, p[64]);
grey g_66_0 (G_66_0, {g[66],G_65_0}, p[66]);
// Final Stage: Apply c_k+1=G_k_0
assign c[1]=g[0];
assign c[2]=G_1_0;
assign c[3]=G_2_0;
assign c[4]=G_3_0;
assign c[5]=G_4_0;
assign c[6]=G_5_0;
assign c[7]=G_6_0;
assign c[8]=G_7_0;
assign c[9]=G_8_0;
assign c[10]=G_9_0;
assign c[11]=G_10_0;
assign c[12]=G_11_0;
assign c[13]=G_12_0;
assign c[14]=G_13_0;
assign c[15]=G_14_0;
assign c[16]=G_15_0;
assign c[17]=G_16_0;
assign c[18]=G_17_0;
assign c[19]=G_18_0;
assign c[20]=G_19_0;
assign c[21]=G_20_0;
assign c[22]=G_21_0;
assign c[23]=G_22_0;
assign c[24]=G_23_0;
assign c[25]=G_24_0;
assign c[26]=G_25_0;
assign c[27]=G_26_0;
assign c[28]=G_27_0;
assign c[29]=G_28_0;
assign c[30]=G_29_0;
assign c[31]=G_30_0;
assign c[32]=G_31_0;
assign c[33]=G_32_0;
assign c[34]=G_33_0;
assign c[35]=G_34_0;
assign c[36]=G_35_0;
assign c[37]=G_36_0;
assign c[38]=G_37_0;
assign c[39]=G_38_0;
assign c[40]=G_39_0;
assign c[41]=G_40_0;
assign c[42]=G_41_0;
assign c[43]=G_42_0;
assign c[44]=G_43_0;
assign c[45]=G_44_0;
assign c[46]=G_45_0;
assign c[47]=G_46_0;
assign c[48]=G_47_0;
assign c[49]=G_48_0;
assign c[50]=G_49_0;
assign c[51]=G_50_0;
assign c[52]=G_51_0;
assign c[53]=G_52_0;
assign c[54]=G_53_0;
assign c[55]=G_54_0;
assign c[56]=G_55_0;
assign c[57]=G_56_0;
assign c[58]=G_57_0;
assign c[59]=G_58_0;
assign c[60]=G_59_0;
assign c[61]=G_60_0;
assign c[62]=G_61_0;
assign c[63]=G_62_0;
assign c[64]=G_63_0;
assign c[65]=G_64_0;
assign c[66]=G_65_0;
assign c[67]=G_66_0;
assign c[68]=G_67_0;
endmodule // ladner_fischer68
// Brent-Kung Carry-save Prefix Adder
module bk8 (cout, sum, a, b, cin);
input logic [7:0] a, b;
input logic cin;
output logic [7:0] sum;
output logic cout;
logic [8:0] p,g,t;
logic [7:0] c;
// pre-computation
assign p={a^b,1'b0};
assign g={a&b, cin};
assign t[1]=p[1];
assign t[2]=p[2];
assign t[3]=p[3]^g[2];
assign t[4]=p[4];
assign t[5]=p[5]^g[4];
assign t[6]=p[6];
assign t[7]=p[7]^g[6];
assign t[8]=p[8];
// prefix tree
brent_kung8 prefix_tree(c, p[7:0], g[7:0]);
// post-computation
assign sum=p[8:1]^c;
assign cout=g[8]|(p[8]&c[7]);
endmodule // bk8
module brent_kung8 (c, p, g);
input logic [7:0] p;
input logic [7:0] g;
output logic [8:1] c;
// parallel-prefix, Brent-Kung
// Stage 1: Generates G/P pairs that span 1 bits
grey b_1_0 (G_1_0, {g[1],g[0]}, p[1]);
black b_3_2 (G_3_2, P_3_2, {g[3],g[2]}, {p[3],p[2]});
black b_5_4 (G_5_4, P_5_4, {g[5],g[4]}, {p[5],p[4]});
black b_7_6 (G_7_6, P_7_6, {g[7],g[6]}, {p[7],p[6]});
// Stage 2: Generates G/P pairs that span 2 bits
grey g_3_0 (G_3_0, {G_3_2,G_1_0}, P_3_2);
black b_7_4 (G_7_4, P_7_4, {G_7_6,G_5_4}, {P_7_6,P_5_4});
// Stage 3: Generates G/P pairs that span 4 bits
grey g_7_0 (G_7_0, {G_7_4,G_3_0}, P_7_4);
// Stage 4: Generates G/P pairs that span 2 bits
grey g_5_0 (G_5_0, {G_5_4,G_3_0}, P_5_4);
// Last grey cell stage
grey g_2_0 (G_2_0, {g[2],G_1_0}, p[2]);
grey g_4_0 (G_4_0, {g[4],G_3_0}, p[4]);
grey g_6_0 (G_6_0, {g[6],G_5_0}, p[6]);
// Final Stage: Apply c_k+1=G_k_0
assign c[1]=g[0];
assign c[2]=G_1_0;
assign c[3]=G_2_0;
assign c[4]=G_3_0;
assign c[5]=G_4_0;
assign c[6]=G_5_0;
assign c[7]=G_6_0;
assign c[8]=G_7_0;
endmodule // brent_kung8
// Black cell
module black (gout, pout, gin, pin);
input logic [1:0] gin, pin;
output logic gout, pout;
assign pout=pin[1]&pin[0];
assign gout=gin[1]|(pin[1]&gin[0]);
endmodule // black
// Grey cell
module grey (gout, gin, pin);
input logic [1:0] gin;
input logic pin;
output logic gout;
assign gout=gin[1]|(pin&gin[0]);
endmodule // grey
// reduced Black cell
module rblk (hout, iout, gin, pin);
input logic [1:0] gin, pin;
output logic hout, iout;
assign iout=pin[1]&pin[0];
assign hout=gin[1]|gin[0];
endmodule
// reduced Grey cell
module rgry (hout, gin);
input logic [1:0] gin;
output logic hout;
assign hout=gin[1]|gin[0];
endmodule // rgry
// LZD
module lz2 (P, V, B0, B1);
@ -953,6 +481,8 @@ module lz64 (ZP, ZV, B);
endmodule // lz64
// FSM Control for Integer Divider
module fsm64 (en, state0, done, divdone, otfzero,
start, error, NumIter, clk, reset);

View File

@ -27,7 +27,7 @@ if [file exists work] {
vlib work
# compile source files
vlog muxs.sv shifters.sv divide4x32.sv test_int32div.sv
vlog mux_div.sv shifters_div.sv divide4x32.sv test_int32div.sv
# start and run simulation
vsim -voptargs=+acc work.tb
@ -109,6 +109,6 @@ configure wave -rowmargin 4
configure wave -childrowmargin 2
-- Run the Simulation
run 138ns
run 338ns

View File

@ -27,7 +27,7 @@ if [file exists work] {
vlib work
# compile source files
vlog muxs.sv shifters.sv divide4x64.sv test_int64div.sv
vlog mux_div.sv shifters_div.sv divide4x64.sv test_int64div.sv
# start and run simulation
vsim -voptargs=+acc work.tb
@ -109,6 +109,6 @@ configure wave -rowmargin 4
configure wave -childrowmargin 2
-- Run the Simulation
run 138ns
run 338ns

View File

@ -27,7 +27,7 @@ if [file exists work] {
vlib work
# compile source files
vlog muxs.sv shifters.sv divide4x32.sv test_iter32.sv
vlog mux_div.sv shifters_div.sv divide4x32.sv test_iter32.sv
# start and run simulation
vsim -voptargs=+acc work.tb

View File

@ -27,7 +27,7 @@ if [file exists work] {
vlib work
# compile source files
vlog muxs.sv shifters.sv divide4x64.sv test_iter64.sv
vlog mux_div.sv shifters_div.sv divide4x64.sv test_iter64.sv
# start and run simulation
vsim -voptargs=+acc work.tb

View File

@ -0,0 +1,106 @@
module shifter_l64 (Z, A, Shift);
input logic [63:0] A;
input logic [5:0] Shift;
logic [63:0] stage1;
logic [63:0] stage2;
logic [63:0] stage3;
logic [63:0] stage4;
logic [63:0] stage5;
logic [31:0] thirtytwozeros = 32'h0;
logic [15:0] sixteenzeros = 16'h0;
logic [ 7:0] eightzeros = 8'h0;
logic [ 3:0] fourzeros = 4'h0;
logic [ 1:0] twozeros = 2'b00;
logic onezero = 1'b0;
output logic [63:0] Z;
mux2 #(64) mx01(A, {A[31:0], thirtytwozeros}, Shift[5], stage1);
mux2 #(64) mx02(stage1, {stage1[47:0], sixteenzeros}, Shift[4], stage2);
mux2 #(64) mx03(stage2, {stage2[55:0], eightzeros}, Shift[3], stage3);
mux2 #(64) mx04(stage3, {stage3[59:0], fourzeros}, Shift[2], stage4);
mux2 #(64) mx05(stage4, {stage4[61:0], twozeros}, Shift[1], stage5);
mux2 #(64) mx06(stage5, {stage5[62:0], onezero}, Shift[0], Z);
endmodule // shifter_l64
module shifter_r64 (Z, A, Shift);
input logic [63:0] A;
input logic [5:0] Shift;
logic [63:0] stage1;
logic [63:0] stage2;
logic [63:0] stage3;
logic [63:0] stage4;
logic [63:0] stage5;
logic [31:0] thirtytwozeros = 32'h0;
logic [15:0] sixteenzeros = 16'h0;
logic [ 7:0] eightzeros = 8'h0;
logic [ 3:0] fourzeros = 4'h0;
logic [ 1:0] twozeros = 2'b00;
logic onezero = 1'b0;
output logic [63:0] Z;
mux2 #(64) mx01(A, {thirtytwozeros, A[63:32]}, Shift[5], stage1);
mux2 #(64) mx02(stage1, {sixteenzeros, stage1[63:16]}, Shift[4], stage2);
mux2 #(64) mx03(stage2, {eightzeros, stage2[63:8]}, Shift[3], stage3);
mux2 #(64) mx04(stage3, {fourzeros, stage3[63:4]}, Shift[2], stage4);
mux2 #(64) mx05(stage4, {twozeros, stage4[63:2]}, Shift[1], stage5);
mux2 #(64) mx06(stage5, {onezero, stage5[63:1]}, Shift[0], Z);
endmodule // shifter_r64
module shifter_l32 (Z, A, Shift);
input logic [31:0] A;
input logic [4:0] Shift;
logic [31:0] stage1;
logic [31:0] stage2;
logic [31:0] stage3;
logic [31:0] stage4;
logic [15:0] sixteenzeros = 16'h0;
logic [ 7:0] eightzeros = 8'h0;
logic [ 3:0] fourzeros = 4'h0;
logic [ 1:0] twozeros = 2'b00;
logic onezero = 1'b0;
output logic [31:0] Z;
mux2 #(32) mx01(A, {A[15:0], sixteenzeros}, Shift[4], stage1);
mux2 #(32) mx02(stage1, {stage1[23:0], eightzeros}, Shift[3], stage2);
mux2 #(32) mx03(stage2, {stage2[27:0], fourzeros}, Shift[2], stage3);
mux2 #(32) mx04(stage3, {stage3[29:0], twozeros}, Shift[1], stage4);
mux2 #(32) mx05(stage4, {stage4[30:0], onezero}, Shift[0], Z);
endmodule // shifter_l32
module shifter_r32 (Z, A, Shift);
input logic [31:0] A;
input logic [4:0] Shift;
logic [31:0] stage1;
logic [31:0] stage2;
logic [31:0] stage3;
logic [31:0] stage4;
logic [15:0] sixteenzeros = 16'h0;
logic [ 7:0] eightzeros = 8'h0;
logic [ 3:0] fourzeros = 4'h0;
logic [ 1:0] twozeros = 2'b00;
logic onezero = 1'b0;
output logic [31:0] Z;
mux2 #(32) mx01(A, {sixteenzeros, A[31:16]}, Shift[4], stage1);
mux2 #(32) mx02(stage1, {eightzeros, stage1[31:8]}, Shift[3], stage2);
mux2 #(32) mx03(stage2, {fourzeros, stage2[31:4]}, Shift[2], stage3);
mux2 #(32) mx04(stage3, {twozeros, stage3[31:2]}, Shift[1], stage4);
mux2 #(32) mx05(stage4, {onezero, stage4[31:1]}, Shift[0], Z);
endmodule // shifter_r32

View File

@ -38,10 +38,10 @@ module tb;
#0 start = 1'b0;
#0 reset = 1'b1;
#22 reset = 1'b0;
//#25 N = 32'h9830_07C0;
//#0 D = 32'h0000_000C;
#25 N = 32'h06b9_7b0d;
#0 D = 32'h46df_998d;
#25 N = 32'h9830_07C0;
#0 D = 32'h0000_000C;
//#25 N = 32'h06b9_7b0d;
//#0 D = 32'h46df_998d;
#0 start = 1'b1;
#50 start = 1'b0;

View File

@ -26,54 +26,61 @@
`include "wally-config.vh"
module muldiv (
input logic clk, reset,
// Decode Stage interface
input logic [31:0] InstrD,
// Execute Stage interface
input logic [`XLEN-1:0] SrcAE, SrcBE,
input logic [2:0] Funct3E,
input logic MulDivE, W64E,
// Writeback stage
output logic [`XLEN-1:0] MulDivResultW,
// hazards
input logic StallM, StallW, FlushM, FlushW
);
input logic clk, reset,
// Decode Stage interface
input logic [31:0] InstrD,
// Execute Stage interface
input logic [`XLEN-1:0] SrcAE, SrcBE,
input logic [2:0] Funct3E,
input logic MulDivE, W64E,
// Writeback stage
output logic [`XLEN-1:0] MulDivResultW,
// Divide Done
output logic DivDoneW,
// hazards
input logic StallM, StallW, FlushM, FlushW
);
generate
if (`M_SUPPORTED) begin
logic [`XLEN-1:0] MulDivResultE, MulDivResultM;
logic [`XLEN-1:0] PrelimResultE;
logic [`XLEN-1:0] QuotE, RemE;
logic [`XLEN*2-1:0] ProdE;
generate
if (`M_SUPPORTED) begin
logic [`XLEN-1:0] MulDivResultE, MulDivResultM;
logic [`XLEN-1:0] PrelimResultE;
logic [`XLEN-1:0] QuotE, RemE;
logic [`XLEN*2-1:0] ProdE;
// Multiplier
mul mul(.*);
// Multiplier
mul mul(.*);
// Divide
div div (QuotE, RemE, DivDoneE, div0error, SrcAE, SrcBE, clk, reset, MulDivE);
// Select result
always_comb
case (Funct3E)
3'b000: PrelimResultE = ProdE[`XLEN-1:0];
3'b001: PrelimResultE = ProdE[`XLEN*2-1:`XLEN];
3'b010: PrelimResultE = ProdE[`XLEN*2-1:`XLEN];
3'b011: PrelimResultE = ProdE[`XLEN*2-1:`XLEN];
3'b100: PrelimResultE = QuotE;
3'b101: PrelimResultE = QuotE;
3'b110: PrelimResultE = RemE;
3'b111: PrelimResultE = RemE;
endcase
// Handle sign extension for W-type instructions
if (`XLEN == 64) begin // RV64 has W-type instructions
assign MulDivResultE = W64E ? {{32{PrelimResultE[31]}}, PrelimResultE[31:0]} : PrelimResultE;
end else begin // RV32 has no W-type instructions
assign MulDivResultE = PrelimResultE;
// Select result
always_comb
case (Funct3E)
3'b000: PrelimResultE = ProdE[`XLEN-1:0];
3'b001: PrelimResultE = ProdE[`XLEN*2-1:`XLEN];
3'b010: PrelimResultE = ProdE[`XLEN*2-1:`XLEN];
3'b011: PrelimResultE = ProdE[`XLEN*2-1:`XLEN];
3'b100: PrelimResultE = QuotE;
3'b101: PrelimResultE = QuotE;
3'b110: PrelimResultE = RemE;
3'b111: PrelimResultE = RemE;
endcase
// Handle sign extension for W-type instructions
if (`XLEN == 64) begin // RV64 has W-type instructions
assign MulDivResultE = W64E ? {{32{PrelimResultE[31]}}, PrelimResultE[31:0]} : PrelimResultE;
end else begin // RV32 has no W-type instructions
assign MulDivResultE = PrelimResultE;
end
flopenrc #(`XLEN) MulDivResultMReg(clk, reset, FlushM, ~StallM, MulDivResultE, MulDivResultM);
flopenrc #(`XLEN) MulDivResultWReg(clk, reset, FlushW, ~StallW, MulDivResultM, MulDivResultW);
end else begin // no M instructions supported
assign MulDivResultW = 0;
end
endgenerate
endmodule // muldiv
flopenrc #(`XLEN) MulDivResultMReg(clk, reset, FlushM, ~StallM, MulDivResultE, MulDivResultM);
flopenrc #(`XLEN) MulDivResultWReg(clk, reset, FlushW, ~StallW, MulDivResultM, MulDivResultW);
end else begin // no M instructions supported
assign MulDivResultW = 0;
end
endgenerate
endmodule

View File

@ -53,7 +53,7 @@ module wallypipelinedhart (
output logic HWRITED
);
// logic [1:0] ForwardAE, ForwardBE;
// logic [1:0] ForwardAE, ForwardBE;
logic StallF, StallD, StallE, StallM, StallW;
logic FlushF, FlushD, FlushE, FlushM, FlushW;
logic RetM, TrapM;
@ -65,7 +65,7 @@ module wallypipelinedhart (
logic [`XLEN-1:0] SrcAE, SrcBE;
logic [`XLEN-1:0] SrcAM;
logic [2:0] Funct3E;
// logic [31:0] InstrF;
// logic [31:0] InstrF;
logic [31:0] InstrD, InstrM;
logic [`XLEN-1:0] PCE, PCM, PCLinkE, PCLinkW;
logic [`XLEN-1:0] PCTargetE;
@ -84,6 +84,7 @@ module wallypipelinedhart (
logic PCSrcE;
logic CSRWritePendingDEM;
logic LoadStallD, MulDivStallD, CSRRdStallD;
logic DivDoneW;
logic [4:0] SetFflagsM;
logic [2:0] FRM_REGW;
logic FloatRegWriteW;
@ -98,6 +99,8 @@ module wallypipelinedhart (
logic [`XLEN-1:0] PageTableEntryF, PageTableEntryM;
// IMem stalls
logic ICacheStallF;
logic [`XLEN-1:0] MMUPAdr, MMUReadPTE;
logic MMUTranslate, MMUTranslationComplete, MMUReady;
@ -128,12 +131,12 @@ module wallypipelinedhart (
.Funct7M(InstrM[31:25]),
.*);
pagetablewalker pagetablewalker(.*); // can send addresses to ahblite, send out pagetablestall
// *** can connect to hazard unit
// changing from this to the line above breaks the program. auipc at 104 fails; seems to be flushed.
// Would need to insertinstruction as InstrD, not InstrF
/*ahblite ebu(
.InstrReadF(1'b0),
pagetablewalker pagetablewalker(.*); // can send addresses to ahblite, send out pagetablestall
// *** can connect to hazard unit
// changing from this to the line above breaks the program. auipc at 104 fails; seems to be flushed.
// Would need to insertinstruction as InstrD, not InstrF
/*ahblite ebu(
.InstrReadF(1'b0),
.InstrRData(), // hook up InstrF later
.MemSizeM(Funct3M[1:0]), .UnsignedLoadM(Funct3M[2]),
.*); */

View File

@ -7,7 +7,7 @@ module testbench_busybear();
logic [31:0] GPIOPinsOut, GPIOPinsEn;
// instantiate device to be tested
logic [31:0] CheckInstrF;
logic [31:0] CheckInstrD;
logic [`AHBW-1:0] HRDATA;
logic [31:0] HADDR;
@ -143,6 +143,9 @@ module testbench_busybear();
logic [63:0] pcExpected;
logic [63:0] regExpected;
integer regNumExpected;
logic [`XLEN-1:0] PCW;
flopenr #(`XLEN) PCWReg(clk, reset, ~StallW, dut.hart.ifu.PCM, PCW);
genvar i;
generate
@ -191,8 +194,8 @@ module testbench_busybear();
logic [`XLEN-1:0] readAdrExpected;
always @(dut.HRDATA) begin
#1;
if (dut.hart.MemRWM[1] && ~HWRITE && HADDR != dut.PCF && dut.HRDATA !== {64{1'bx}}) begin
#2;
if (dut.hart.MemRWM[1] && ~HWRITE && HADDR[31:3] != dut.PCF[31:3] && dut.HRDATA !== {64{1'bx}}) begin
//$display("%0t", $time);
if($feof(data_file_memR)) begin
$display("no more memR data to read");
@ -262,7 +265,7 @@ module testbench_busybear();
end
always @(dut.hart.priv.csr.genblk1.csrm.MCAUSE_REGW) begin
if (dut.hart.priv.csr.genblk1.csrm.MCAUSE_REGW == 2 && instrs != 0) begin
if (dut.hart.priv.csr.genblk1.csrm.MCAUSE_REGW == 2 && instrs > 1) begin
$display("!!!!!! illegal instruction !!!!!!!!!!");
$display("(as a reminder, MCAUSE and MEPC are set by this)");
$display("at %0t ps, instr %0d, HADDR %x", $time, instrs, HADDR);
@ -334,7 +337,7 @@ module testbench_busybear();
`CHECK_CSR(STVEC)
initial begin //this is temporary until the bug can be fixed!!!
#18909760;
#11130100;
force dut.hart.ieu.dp.regf.rf[5] = 64'h0000000080000004;
#100;
release dut.hart.ieu.dp.regf.rf[5];
@ -344,13 +347,13 @@ module testbench_busybear();
initial begin
speculative = 0;
end
logic [63:0] lastCheckInstrF, lastPC, lastPC2;
logic [63:0] lastCheckInstrD, lastPC, lastPC2;
string PCtextW, PCtext2W;
logic [31:0] InstrWExpected;
logic [63:0] PCWExpected;
always @(dut.hart.ifu.PCW or dut.hart.ieu.InstrValidW) begin
if(dut.hart.ieu.InstrValidW && dut.hart.ifu.PCW != 0) begin
always @(PCW or dut.hart.ieu.InstrValidW) begin
if(dut.hart.ieu.InstrValidW && PCW != 0) begin
if($feof(data_file_PCW)) begin
$display("no more PC data to read");
`ERROR
@ -363,8 +366,8 @@ module testbench_busybear();
scan_file_PCW = $fscanf(data_file_PCW, "%x\n", InstrWExpected);
// then expected PC value
scan_file_PCW = $fscanf(data_file_PCW, "%x\n", PCWExpected);
if(~equal(dut.hart.ifu.PCW,PCWExpected,2)) begin
$display("%0t ps, instr %0d: PCW does not equal PCW expected: %x, %x", $time, instrs, dut.hart.ifu.PCW, PCWExpected);
if(~equal(PCW,PCWExpected,2)) begin
$display("%0t ps, instr %0d: PCW does not equal PCW expected: %x, %x", $time, instrs, PCW, PCWExpected);
`ERROR
end
//if(it.InstrW != InstrWExpected) begin
@ -379,109 +382,110 @@ module testbench_busybear();
end
logic [31:0] InstrMask;
logic forcedInstr;
logic [63:0] lastPCF;
always @(dut.PCF or dut.hart.ifu.InstrF or reset) begin
logic [63:0] lastPCD;
always @(dut.hart.ifu.PCD or dut.hart.ifu.InstrRawD or reset or negedge dut.hart.ifu.StallE) begin
if(~HWRITE) begin
#3;
if (~reset && dut.hart.ifu.InstrF[15:0] !== {16{1'bx}} && ~dut.hart.StallD) begin
if (dut.PCF !== lastPCF) begin
lastCheckInstrF = CheckInstrF;
lastPC <= dut.PCF;
lastPC2 <= lastPC;
if (speculative && (lastPC != pcExpected)) begin
speculative = ~equal(dut.PCF,pcExpected,3);
if(dut.PCF===pcExpected) begin
if(dut.hart.ifu.InstrF[6:0] == 7'b1010011) begin // for now, NOP out any float instrs
force CheckInstrF = 32'b0010011;
release CheckInstrF;
force dut.hart.ifu.InstrF = 32'b0010011;
#7;
release dut.hart.ifu.InstrF;
$display("warning: NOPing out %s at PC=%0x, instr %0d, time %0t", PCtext, dut.PCF, instrs, $time);
warningCount += 1;
forcedInstr = 1;
end
else begin
forcedInstr = 0;
#2;
if (~reset && dut.hart.ifu.InstrRawD[15:0] !== {16{1'bx}} && dut.hart.ifu.PCD !== 64'h0 && ~dut.hart.ifu.StallE) begin
if (dut.hart.ifu.PCD !== lastPCD) begin
lastCheckInstrD = CheckInstrD;
lastPC <= dut.hart.ifu.PCD;
lastPC2 <= lastPC;
if (speculative && (lastPC != pcExpected)) begin
speculative = ~equal(dut.hart.ifu.PCD,pcExpected,3);
if(dut.hart.ifu.PCD===pcExpected) begin
if(dut.hart.ifu.InstrRawD[6:0] == 7'b1010011) begin // for now, NOP out any float instrs
force CheckInstrD = 32'b0010011;
release CheckInstrD;
force dut.hart.ifu.InstrRawD = 32'b0010011;
#7;
release dut.hart.ifu.InstrRawD;
$display("warning: NOPing out %s at PC=%0x, instr %0d, time %0t", PCtext, dut.hart.ifu.PCD, instrs, $time);
warningCount += 1;
forcedInstr = 1;
end
else begin
forcedInstr = 0;
end
end
end
end
else begin
if($feof(data_file_PC)) begin
$display("no more PC data to read");
`ERROR
end
scan_file_PC = $fscanf(data_file_PC, "%s\n", PCtext);
if (PCtext != "ret" && PCtext != "fence" && PCtext != "nop" && PCtext != "mret" && PCtext != "sfence.vma" && PCtext != "unimp") begin
scan_file_PC = $fscanf(data_file_PC, "%s\n", PCtext2);
PCtext = {PCtext, " ", PCtext2};
end
scan_file_PC = $fscanf(data_file_PC, "%x\n", CheckInstrF);
if(dut.PCF === pcExpected) begin
if(dut.hart.ifu.InstrF[6:0] == 7'b1010011) begin // for now, NOP out any float instrs
force CheckInstrF = 32'b0010011;
release CheckInstrF;
force dut.hart.ifu.InstrF = 32'b0010011;
#7;
release dut.hart.ifu.InstrF;
$display("warning: NOPing out %s at PC=%0x, instr %0d, time %0t", PCtext, dut.PCF, instrs, $time);
warningCount += 1;
forcedInstr = 1;
else begin
if($feof(data_file_PC)) begin
$display("no more PC data to read");
`ERROR
end
else begin
forcedInstr = 0;
scan_file_PC = $fscanf(data_file_PC, "%s\n", PCtext);
if (PCtext != "ret" && PCtext != "fence" && PCtext != "nop" && PCtext != "mret" && PCtext != "sfence.vma" && PCtext != "unimp") begin
scan_file_PC = $fscanf(data_file_PC, "%s\n", PCtext2);
PCtext = {PCtext, " ", PCtext2};
end
end
// then expected PC value
scan_file_PC = $fscanf(data_file_PC, "%x\n", pcExpected);
if (instrs <= 10 || (instrs <= 100 && instrs % 10 == 0) ||
(instrs <= 1000 && instrs % 100 == 0) || (instrs <= 10000 && instrs % 1000 == 0) ||
(instrs <= 100000 && instrs % 10000 == 0) || (instrs <= 1000000 && instrs % 100000 == 0)) begin
$display("loaded %0d instructions", instrs);
end
instrs += 1;
// are we at a branch/jump?
casex (lastCheckInstrF[31:0])
32'b00000000001000000000000001110011, // URET
32'b00010000001000000000000001110011, // SRET
32'b00110000001000000000000001110011, // MRET
32'bXXXXXXXXXXXXXXXXXXXXXXXXX1101111, // JAL
32'bXXXXXXXXXXXXXXXXXXXXXXXXX1100111, // JALR
32'bXXXXXXXXXXXXXXXXXXXXXXXXX1100011, // B
32'bXXXXXXXXXXXXXXXX110XXXXXXXXXXX01, // C.BEQZ
32'bXXXXXXXXXXXXXXXX111XXXXXXXXXXX01, // C.BNEZ
32'bXXXXXXXXXXXXXXXX101XXXXXXXXXXX01: // C.J
speculative = 1;
32'bXXXXXXXXXXXXXXXX1001000000000010: // C.EBREAK:
speculative = 0; // tbh don't really know what should happen here
32'bXXXXXXXXXXXXXXXX1000XXXXX0000010, // C.JR
32'bXXXXXXXXXXXXXXXX1001XXXXX0000010: // C.JALR //this is RV64 only so no C.JAL
speculative = 1;
default:
speculative = 0;
endcase
scan_file_PC = $fscanf(data_file_PC, "%x\n", CheckInstrD);
if(dut.hart.ifu.PCD === pcExpected) begin
if(dut.hart.ifu.InstrRawD[6:0] == 7'b1010011) begin // for now, NOP out any float instrs
force CheckInstrD = 32'b0010011;
release CheckInstrD;
force dut.hart.ifu.InstrRawD = 32'b0010011;
#7;
release dut.hart.ifu.InstrRawD;
$display("warning: NOPing out %s at PC=%0x, instr %0d, time %0t", PCtext, dut.hart.ifu.PCD, instrs, $time);
warningCount += 1;
forcedInstr = 1;
end
else begin
forcedInstr = 0;
end
end
// then expected PC value
scan_file_PC = $fscanf(data_file_PC, "%x\n", pcExpected);
if (instrs <= 10 || (instrs <= 100 && instrs % 10 == 0) ||
(instrs <= 1000 && instrs % 100 == 0) || (instrs <= 10000 && instrs % 1000 == 0) ||
(instrs <= 100000 && instrs % 10000 == 0) || (instrs <= 1000000 && instrs % 100000 == 0)) begin
$display("loaded %0d instructions", instrs);
end
instrs += 1;
// are we at a branch/jump?
casex (lastCheckInstrD[31:0])
32'b00000000001000000000000001110011, // URET
32'b00010000001000000000000001110011, // SRET
32'b00110000001000000000000001110011, // MRET
32'bXXXXXXXXXXXXXXXXXXXXXXXXX1101111, // JAL
32'bXXXXXXXXXXXXXXXXXXXXXXXXX1100111, // JALR
32'bXXXXXXXXXXXXXXXXXXXXXXXXX1100011, // B
32'bXXXXXXXXXXXXXXXX110XXXXXXXXXXX01, // C.BEQZ
32'bXXXXXXXXXXXXXXXX111XXXXXXXXXXX01, // C.BNEZ
32'bXXXXXXXXXXXXXXXX101XXXXXXXXXXX01: // C.J
speculative = 1;
32'bXXXXXXXXXXXXXXXX1001000000000010: // C.EBREAK:
speculative = 0; // tbh don't really know what should happen here
32'bXXXXXXXXXXXXXXXX1000XXXXX0000010, // C.JR
32'bXXXXXXXXXXXXXXXX1001XXXXX0000010: // C.JALR //this is RV64 only so no C.JAL
speculative = 1;
default:
speculative = 0;
endcase
//check things!
if ((~speculative) && (~equal(dut.PCF,pcExpected,3))) begin
$display("%0t ps, instr %0d: PC does not equal PC expected: %x, %x", $time, instrs, dut.PCF, pcExpected);
`ERROR
end
InstrMask = CheckInstrF[1:0] == 2'b11 ? 32'hFFFFFFFF : 32'h0000FFFF;
if ((~forcedInstr) && (~speculative) && ((InstrMask & dut.hart.ifu.InstrF) !== (InstrMask & CheckInstrF))) begin
$display("%0t ps, instr %0d: InstrF does not equal CheckInstrF: %x, %x, PC: %x", $time, instrs, dut.hart.ifu.InstrF, CheckInstrF, dut.PCF);
`ERROR
//check things!
if ((~speculative) && (~equal(dut.hart.ifu.PCD,pcExpected,3))) begin
$display("%0t ps, instr %0d: PC does not equal PC expected: %x, %x", $time, instrs, dut.hart.ifu.PCD, pcExpected);
`ERROR
end
InstrMask = CheckInstrD[1:0] == 2'b11 ? 32'hFFFFFFFF : 32'h0000FFFF;
if ((~forcedInstr) && (~speculative) && ((InstrMask & dut.hart.ifu.InstrRawD) !== (InstrMask & CheckInstrD))) begin
$display("%0t ps, instr %0d: InstrD does not equal CheckInstrD: %x, %x, PC: %x", $time, instrs, dut.hart.ifu.InstrRawD, CheckInstrD, dut.hart.ifu.PCD);
`ERROR
end
end
end
lastPCD = dut.hart.ifu.PCD;
end
lastPCF = dut.PCF;
end
end
end
// Track names of instructions
string InstrFName, InstrDName, InstrEName, InstrMName, InstrWName;
logic [31:0] InstrW;
instrNameDecTB dec(dut.hart.ifu.InstrF, InstrFName);
flopenr #(32) InstrWReg(clk, reset, ~dut.hart.ieu.dp.StallW, dut.hart.ifu.InstrM, InstrW);
instrNameDecTB dec(dut.hart.ifu.ic.InstrF, InstrFName);
instrTrackerTB it(clk, reset, dut.hart.ieu.dp.FlushE,
dut.hart.ifu.InstrD, dut.hart.ifu.InstrE,
dut.hart.ifu.InstrM, InstrW,
@ -494,3 +498,128 @@ module testbench_busybear();
end
endmodule
module instrTrackerTB(
input logic clk, reset, FlushE,
input logic [31:0] InstrD,
input logic [31:0] InstrE, InstrM,
output logic [31:0] InstrW,
output string InstrDName, InstrEName, InstrMName, InstrWName);
// stage Instr to Writeback for visualization
//flopr #(32) InstrWReg(clk, reset, InstrM, InstrW);
instrNameDecTB ddec(InstrD, InstrDName);
instrNameDecTB edec(InstrE, InstrEName);
instrNameDecTB mdec(InstrM, InstrMName);
instrNameDecTB wdec(InstrW, InstrWName);
endmodule
// decode the instruction name, to help the test bench
module instrNameDecTB(
input logic [31:0] instr,
output string name);
logic [6:0] op;
logic [2:0] funct3;
logic [6:0] funct7;
logic [11:0] imm;
assign op = instr[6:0];
assign funct3 = instr[14:12];
assign funct7 = instr[31:25];
assign imm = instr[31:20];
// it would be nice to add the operands to the name
// create another variable called decoded
always_comb
casez({op, funct3})
10'b0000000_000: name = "BAD";
10'b0000011_000: name = "LB";
10'b0000011_001: name = "LH";
10'b0000011_010: name = "LW";
10'b0000011_011: name = "LD";
10'b0000011_100: name = "LBU";
10'b0000011_101: name = "LHU";
10'b0000011_110: name = "LWU";
10'b0010011_000: if (instr[31:15] == 0 && instr[11:7] ==0) name = "NOP/FLUSH";
else name = "ADDI";
10'b0010011_001: if (funct7[6:1] == 6'b000000) name = "SLLI";
else name = "ILLEGAL";
10'b0010011_010: name = "SLTI";
10'b0010011_011: name = "SLTIU";
10'b0010011_100: name = "XORI";
10'b0010011_101: if (funct7[6:1] == 6'b000000) name = "SRLI";
else if (funct7[6:1] == 6'b010000) name = "SRAI";
else name = "ILLEGAL";
10'b0010011_110: name = "ORI";
10'b0010011_111: name = "ANDI";
10'b0010111_???: name = "AUIPC";
10'b0100011_000: name = "SB";
10'b0100011_001: name = "SH";
10'b0100011_010: name = "SW";
10'b0100011_011: name = "SD";
10'b0011011_000: name = "ADDIW";
10'b0011011_001: name = "SLLIW";
10'b0011011_101: if (funct7 == 7'b0000000) name = "SRLIW";
else if (funct7 == 7'b0100000) name = "SRAIW";
else name = "ILLEGAL";
10'b0111011_000: if (funct7 == 7'b0000000) name = "ADDW";
else if (funct7 == 7'b0100000) name = "SUBW";
else name = "ILLEGAL";
10'b0111011_001: name = "SLLW";
10'b0111011_101: if (funct7 == 7'b0000000) name = "SRLW";
else if (funct7 == 7'b0100000) name = "SRAW";
else name = "ILLEGAL";
10'b0110011_000: if (funct7 == 7'b0000000) name = "ADD";
else if (funct7 == 7'b0000001) name = "MUL";
else if (funct7 == 7'b0100000) name = "SUB";
else name = "ILLEGAL";
10'b0110011_001: if (funct7 == 7'b0000000) name = "SLL";
else if (funct7 == 7'b0000001) name = "MULH";
else name = "ILLEGAL";
10'b0110011_010: if (funct7 == 7'b0000000) name = "SLT";
else if (funct7 == 7'b0000001) name = "MULHSU";
else name = "ILLEGAL";
10'b0110011_011: if (funct7 == 7'b0000000) name = "SLTU";
else if (funct7 == 7'b0000001) name = "DIV";
else name = "ILLEGAL";
10'b0110011_100: if (funct7 == 7'b0000000) name = "XOR";
else if (funct7 == 7'b0000001) name = "MUL";
else name = "ILLEGAL";
10'b0110011_101: if (funct7 == 7'b0000000) name = "SRL";
else if (funct7 == 7'b0000001) name = "DIVU";
else if (funct7 == 7'b0100000) name = "SRA";
else name = "ILLEGAL";
10'b0110011_110: if (funct7 == 7'b0000000) name = "OR";
else if (funct7 == 7'b0000001) name = "REM";
else name = "ILLEGAL";
10'b0110011_111: if (funct7 == 7'b0000000) name = "AND";
else if (funct7 == 7'b0000001) name = "REMU";
else name = "ILLEGAL";
10'b0110111_???: name = "LUI";
10'b1100011_000: name = "BEQ";
10'b1100011_001: name = "BNE";
10'b1100011_100: name = "BLT";
10'b1100011_101: name = "BGE";
10'b1100011_110: name = "BLTU";
10'b1100011_111: name = "BGEU";
10'b1100111_000: name = "JALR";
10'b1101111_???: name = "JAL";
10'b1110011_000: if (imm == 0) name = "ECALL";
else if (imm == 1) name = "EBREAK";
else if (imm == 2) name = "URET";
else if (imm == 258) name = "SRET";
else if (imm == 770) name = "MRET";
else name = "ILLEGAL";
10'b1110011_001: name = "CSRRW";
10'b1110011_010: name = "CSRRS";
10'b1110011_011: name = "CSRRC";
10'b1110011_101: name = "CSRRWI";
10'b1110011_110: name = "CSRRSI";
10'b1110011_111: name = "CSRRCI";
10'b0001111_???: name = "FENCE";
default: name = "ILLEGAL";
endcase
endmodule

View File

@ -60,12 +60,18 @@ module testbench();
assign HRDATAEXT = 0;
wallypipelinedsoc dut(.*);
// Track names of instructions
logic [31:0] InstrW;
flopenr #(32) InstrWReg(clk, reset, ~dut.hart.ieu.dp.StallW, dut.hart.ifu.InstrM, InstrW);
instrTrackerTB it(clk, reset, dut.hart.ieu.dp.FlushE,
dut.hart.ifu.InstrF,
dut.hart.ifu.InstrD, dut.hart.ifu.InstrE,
dut.hart.ifu.InstrM, dut.hart.ifu.InstrW,
dut.hart.ifu.InstrM, InstrW,
InstrFName, InstrDName, InstrEName, InstrMName, InstrWName);
// initialize tests
logic [`XLEN-1:0] PCW;
flopenr #(`XLEN) PCWReg(clk, reset, ~StallW, dut.hart.ifu.PCM, PCW);
integer j;
initial
begin

View File

@ -59,12 +59,20 @@ module testbench();
assign HRESPEXT = 0;
assign HRDATAEXT = 0;
wallypipelinedsoc dut(.*);
logic [31:0] InstrW;
flopenr #(32) InstrWReg(clk, reset, ~dut.hart.ieu.dp.StallW, dut.hart.ifu.InstrM, InstrW);
// Track names of instructions
instrTrackerTB it(clk, reset, dut.hart.ieu.dp.FlushE,
dut.hart.ifu.InstrF,
dut.hart.ifu.InstrD, dut.hart.ifu.InstrE,
dut.hart.ifu.InstrM, dut.hart.ifu.InstrW,
dut.hart.ifu.InstrM, InstrW,
InstrFName, InstrDName, InstrEName, InstrMName, InstrWName);
logic [`XLEN-1:0] PCW;
flopenr #(`XLEN) PCWReg(clk, reset, ~StallW, dut.hart.ifu.PCM, PCW);
// initialize tests
integer j;
initial

View File

@ -38,8 +38,13 @@ module testbench();
logic [`XLEN-1:0] signature[0:10000];
logic [`XLEN-1:0] testadr;
string InstrFName, InstrDName, InstrEName, InstrMName, InstrWName;
//logic [31:0] InstrW;
logic [31:0] InstrW;
logic [`XLEN-1:0] meminit;
string tests64f[] = '{
"rv64f/I-FADD-S-01", "2000",
"rv64f/I-FCLASS-S-01", "2000"
};
string tests64a[] = '{
"rv64a/WALLY-AMO", "2110",
"rv64a/WALLY-LRSC", "2110"
@ -51,7 +56,7 @@ module testbench();
"rv64m/I-MULHU-01", "3000",
"rv64m/I-MULW-01", "3000"
// "rv64m/I-DIV-01", "3000",
// "rv64m/I-DIVU-01", "3000",
// "rv64m/I-DIVU-01", "3000"
// "rv64m/I-DIVUW-01", "3000",
// "rv64m/I-DIVW-01", "3000",
// "rv64m/I-REM-01", "3000",
@ -60,7 +65,6 @@ module testbench();
// "rv64m/I-REMW-01", "3000"
};
string tests64ic[] = '{
"rv64ic/I-C-ADD-01", "3000",
"rv64ic/I-C-ADDI-01", "3000",
"rv64ic/I-C-ADDIW-01", "3000",
@ -332,19 +336,23 @@ string tests32i[] = {
logic [1:0] HTRANS;
logic HMASTLOCK;
logic HCLK, HRESETn;
logic [`XLEN-1:0] PCW;
flopenr #(`XLEN) PCWReg(clk, reset, ~dut.hart.ieu.dp.StallW, dut.hart.ifu.PCM, PCW);
flopenr #(32) InstrWReg(clk, reset, ~dut.hart.ieu.dp.StallW, dut.hart.ifu.InstrM, InstrW);
// pick tests based on modes supported
initial
if (`XLEN == 64) begin // RV64
if(TESTSBP) begin
tests = testsBP64;
if (TESTSBP) begin
tests = testsBP64;
end else begin
tests = {tests64i};
if (`C_SUPPORTED) tests = {tests, tests64ic};
else tests = {tests, tests64iNOc};
if (`M_SUPPORTED) tests = {tests, tests64m};
if (`A_SUPPORTED) tests = {tests, tests64a};
tests = {tests64i};
if (`C_SUPPORTED) tests = {tests, tests64ic};
else tests = {tests, tests64iNOc};
if (`M_SUPPORTED) tests = {tests, tests64m};
// if (`F_SUPPORTED) tests = {tests64f, tests};
// if (`D_SUPPORTED) tests = {tests64d, tests};
if (`A_SUPPORTED) tests = {tests, tests64a};
end
// tests = {tests64a, tests};
end else begin // RV32
@ -353,6 +361,7 @@ string tests32i[] = {
if (`C_SUPPORTED % 2 == 1) tests = {tests, tests32ic};
else tests = {tests, tests32iNOc};
if (`M_SUPPORTED % 2 == 1) tests = {tests, tests32m};
// if (`F_SUPPORTED) tests = {tests32f, tests};
if (`A_SUPPORTED) tests = {tests, tests32a};
end
string signame, memfilename;
@ -371,9 +380,9 @@ string tests32i[] = {
// Track names of instructions
instrTrackerTB it(clk, reset, dut.hart.ieu.dp.FlushE,
dut.hart.ifu.InstrF, dut.hart.ifu.InstrD, dut.hart.ifu.InstrE,
dut.hart.ifu.InstrM, dut.hart.ifu.InstrW,
InstrFName, InstrDName, InstrEName, InstrMName, InstrWName);
dut.hart.ifu.ic.InstrF, dut.hart.ifu.InstrD, dut.hart.ifu.InstrE,
dut.hart.ifu.InstrM, InstrW, InstrFName, InstrDName,
InstrEName, InstrMName, InstrWName);
// initialize tests
initial

View File

@ -73,13 +73,15 @@ module testbench();
assign HRDATAEXT = 0;
wallypipelinedsoc dut(.*);
flopenr #(32) InstrWReg(clk, reset, ~dut.hart.ieu.dp.StallW, dut.hart.ifu.InstrM, InstrW);
// Track names of instructions
instrTrackerTB it(clk, reset, dut.hart.ieu.dp.FlushE,
dut.hart.ifu.InstrD, dut.hart.ifu.InstrE,
dut.hart.ifu.InstrM, InstrW,
InstrDName, InstrEName, InstrMName, InstrWName);
logic [`XLEN-1:0] PCW;
flopenr #(`XLEN) PCWReg(clk, reset, ~StallW, dut.hart.ifu.PCM, PCW);
// initialize tests
initial
begin
@ -187,7 +189,7 @@ module instrTrackerTB(
output string InstrDName, InstrEName, InstrMName, InstrWName);
// stage Instr to Writeback for visualization
flopr #(32) InstrWReg(clk, reset, InstrM, InstrW);
//flopr #(32) InstrWReg(clk, reset, InstrM, InstrW);
instrNameDecTB ddec(InstrD, InstrDName);
instrNameDecTB edec(InstrE, InstrEName);

View File

@ -92,13 +92,15 @@ module testbench();
assign HRDATAEXT = 0;
wallypipelinedsoc dut(.*);
flopenr #(32) InstrWReg(clk, reset, ~dut.hart.ieu.dp.StallW, dut.hart.ifu.InstrM, InstrW);
// Track names of instructions
instrTrackerTBPriv it(clk, reset, dut.hart.ieu.dp.FlushE,
dut.hart.ifu.InstrF, dut.hart.ifu.InstrD, dut.hart.ifu.InstrE,
dut.hart.ifu.InstrM, dut.hart.ifu.InstrW,
InstrFName, InstrDName, InstrEName, InstrMName, InstrWName);
logic [`XLEN-1:0] PCW;
flopenr #(`XLEN) PCWReg(clk, reset, ~StallW, dut.hart.ifu.PCM, PCW);
// initialize tests
initial
begin
@ -226,7 +228,8 @@ module instrTrackerTBPriv(
output string InstrFName, InstrDName, InstrEName, InstrMName, InstrWName);
// stage Instr to Writeback for visualization
// flopr #(32) InstrWReg(clk, reset, InstrM, InstrW);
//flopr #(32) InstrWReg(clk, reset, InstrM, InstrW);
instrNameDecTB fdec(InstrF, InstrFName);
instrNameDecTB ddec(InstrD, InstrDName);