forked from Github_Repos/cvw
Merge branch 'main' of https://github.com/davidharrishmc/riscv-wally into main
This commit is contained in:
commit
74e67df080
1024
wally-pipelined/config/rv32icfd/BTBPredictor.txt
Normal file
1024
wally-pipelined/config/rv32icfd/BTBPredictor.txt
Normal file
File diff suppressed because it is too large
Load Diff
1024
wally-pipelined/config/rv32icfd/twoBitPredictor.txt
Normal file
1024
wally-pipelined/config/rv32icfd/twoBitPredictor.txt
Normal file
File diff suppressed because it is too large
Load Diff
106
wally-pipelined/config/rv32icfd/wally-config.vh
Normal file
106
wally-pipelined/config/rv32icfd/wally-config.vh
Normal file
@ -0,0 +1,106 @@
|
||||
//////////////////////////////////////////
|
||||
// wally-config.vh
|
||||
//
|
||||
// Written: David_Harris@hmc.edu 4 January 2021
|
||||
// Modified:
|
||||
//
|
||||
// Purpose: Specify which features are configured
|
||||
// Macros to determine which modes are supported based on MISA
|
||||
//
|
||||
// A component of the Wally configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
|
||||
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
|
||||
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
|
||||
// is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
|
||||
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
///////////////////////////////////////////
|
||||
|
||||
// include shared configuration
|
||||
`include "wally-shared.vh"
|
||||
|
||||
`define BUILDROOT 0
|
||||
`define BUSYBEAR 0
|
||||
|
||||
// RV32 or RV64: XLEN = 32 or 64
|
||||
`define XLEN 32
|
||||
|
||||
`define MISA (32'h00000104 | 1 << 5 | 1 << 20 | 1 << 18 | 1 << 12)
|
||||
`define ZCSR_SUPPORTED 1
|
||||
`define COUNTERS 32
|
||||
`define ZCOUNTERS_SUPPORTED 1
|
||||
|
||||
// Microarchitectural Features
|
||||
`define UARCH_PIPELINED 1
|
||||
`define UARCH_SUPERSCALR 0
|
||||
`define UARCH_SINGLECYCLE 0
|
||||
`define MEM_DCACHE 0
|
||||
`define MEM_DTIM 1
|
||||
`define MEM_ICACHE 0
|
||||
`define MEM_VIRTMEM 1
|
||||
`define VECTORED_INTERRUPTS_SUPPORTED 1
|
||||
|
||||
`define ITLB_ENTRIES 32
|
||||
`define DTLB_ENTRIES 32
|
||||
|
||||
// Legal number of PMP entries are 0, 16, or 64
|
||||
`define PMP_ENTRIES 16
|
||||
|
||||
// Address space
|
||||
`define RESET_VECTOR 32'h80000000
|
||||
|
||||
// Peripheral Addresses
|
||||
// Peripheral memory space extends from BASE to BASE+RANGE
|
||||
// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits
|
||||
|
||||
// *** each of these is `PA_BITS wide. is this paramaterizable INSIDE the config file?
|
||||
`define BOOTTIM_SUPPORTED 1'b1
|
||||
`define BOOTTIM_BASE 34'h00001000
|
||||
`define BOOTTIM_RANGE 34'h00000FFF
|
||||
`define TIM_SUPPORTED 1'b1
|
||||
`define TIM_BASE 34'h80000000
|
||||
`define TIM_RANGE 34'h07FFFFFF
|
||||
`define CLINT_SUPPORTED 1'b1
|
||||
`define CLINT_BASE 34'h02000000
|
||||
`define CLINT_RANGE 34'h0000FFFF
|
||||
`define GPIO_SUPPORTED 1'b1
|
||||
`define GPIO_BASE 34'h10012000
|
||||
`define GPIO_RANGE 34'h000000FF
|
||||
`define UART_SUPPORTED 1'b1
|
||||
`define UART_BASE 34'h10000000
|
||||
`define UART_RANGE 34'h00000007
|
||||
`define PLIC_SUPPORTED 1'b1
|
||||
`define PLIC_BASE 34'h0C000000
|
||||
`define PLIC_RANGE 34'h03FFFFFF
|
||||
|
||||
// Bus Interface width
|
||||
`define AHBW 32
|
||||
|
||||
// Test modes
|
||||
|
||||
// Tie GPIO outputs back to inputs
|
||||
`define GPIO_LOOPBACK_TEST 1
|
||||
|
||||
// Hardware configuration
|
||||
`define UART_PRESCALE 1
|
||||
|
||||
// Interrupt configuration
|
||||
`define PLIC_NUM_SRC 4
|
||||
// comment out the following if >=32 sources
|
||||
`define PLIC_NUM_SRC_LT_32
|
||||
`define PLIC_GPIO_ID 3
|
||||
`define PLIC_UART_ID 4
|
||||
|
||||
`define TWO_BIT_PRELOAD "../config/rv32icfd/twoBitPredictor.txt"
|
||||
`define BTB_PRELOAD "../config/rv32icfd/BTBPredictor.txt"
|
||||
`define BPRED_ENABLED 1
|
||||
`define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE
|
||||
`define TESTSBP 0
|
@ -1,109 +1,3 @@
|
||||
// //////////////////////////////////////////
|
||||
// // wally-config.vh
|
||||
// //
|
||||
// // Written: David_Harris@hmc.edu 4 January 2021
|
||||
// // Modified:
|
||||
// //
|
||||
// // Purpose: Specify which features are configured
|
||||
// // Macros to determine which modes are supported based on MISA
|
||||
// //
|
||||
// // A component of the Wally configurable RISC-V project.
|
||||
// //
|
||||
// // Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
|
||||
// //
|
||||
// // Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
|
||||
// // files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
|
||||
// // modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
|
||||
// // is furnished to do so, subject to the following conditions:
|
||||
// //
|
||||
// // The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
||||
// //
|
||||
// // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
// // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
// // BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
|
||||
// // OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
// ///////////////////////////////////////////
|
||||
|
||||
// // include shared configuration
|
||||
// `include "wally-shared.vh"
|
||||
|
||||
// `define BUILDROOT 0
|
||||
// `define BUSYBEAR 0
|
||||
|
||||
// // RV32 or RV64: XLEN = 32 or 64
|
||||
// `define XLEN 32
|
||||
|
||||
// `define MISA (32'h00000104 | 1 << 5 | 1 << 20 | 1 << 18 | 1 << 12)
|
||||
// `define ZCSR_SUPPORTED 1
|
||||
// `define COUNTERS 32
|
||||
// `define ZCOUNTERS_SUPPORTED 1
|
||||
|
||||
// // Microarchitectural Features
|
||||
// `define UARCH_PIPELINED 1
|
||||
// `define UARCH_SUPERSCALR 0
|
||||
// `define UARCH_SINGLECYCLE 0
|
||||
// `define MEM_DCACHE 0
|
||||
// `define MEM_DTIM 1
|
||||
// `define MEM_ICACHE 0
|
||||
// `define MEM_VIRTMEM 1
|
||||
// `define VECTORED_INTERRUPTS_SUPPORTED 1
|
||||
|
||||
// `define ITLB_ENTRIES 32
|
||||
// `define DTLB_ENTRIES 32
|
||||
|
||||
// // Legal number of PMP entries are 0, 16, or 64
|
||||
// `define PMP_ENTRIES 16
|
||||
|
||||
// // Address space
|
||||
// `define RESET_VECTOR 32'h80000000
|
||||
|
||||
// // Peripheral Addresses
|
||||
// // Peripheral memory space extends from BASE to BASE+RANGE
|
||||
// // Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits
|
||||
|
||||
// // *** each of these is `PA_BITS wide. is this paramaterizable INSIDE the config file?
|
||||
// `define BOOTTIM_SUPPORTED 1'b1
|
||||
// `define BOOTTIM_BASE 34'h00001000
|
||||
// `define BOOTTIM_RANGE 34'h00000FFF
|
||||
// `define TIM_SUPPORTED 1'b1
|
||||
// `define TIM_BASE 34'h80000000
|
||||
// `define TIM_RANGE 34'h07FFFFFF
|
||||
// `define CLINT_SUPPORTED 1'b1
|
||||
// `define CLINT_BASE 34'h02000000
|
||||
// `define CLINT_RANGE 34'h0000FFFF
|
||||
// `define GPIO_SUPPORTED 1'b1
|
||||
// `define GPIO_BASE 34'h10012000
|
||||
// `define GPIO_RANGE 34'h000000FF
|
||||
// `define UART_SUPPORTED 1'b1
|
||||
// `define UART_BASE 34'h10000000
|
||||
// `define UART_RANGE 34'h00000007
|
||||
// `define PLIC_SUPPORTED 1'b1
|
||||
// `define PLIC_BASE 34'h0C000000
|
||||
// `define PLIC_RANGE 34'h03FFFFFF
|
||||
|
||||
// // Bus Interface width
|
||||
// `define AHBW 32
|
||||
|
||||
// // Test modes
|
||||
|
||||
// // Tie GPIO outputs back to inputs
|
||||
// `define GPIO_LOOPBACK_TEST 1
|
||||
|
||||
// // Hardware configuration
|
||||
// `define UART_PRESCALE 1
|
||||
|
||||
// // Interrupt configuration
|
||||
// `define PLIC_NUM_SRC 4
|
||||
// // comment out the following if >=32 sources
|
||||
// `define PLIC_NUM_SRC_LT_32
|
||||
// `define PLIC_GPIO_ID 3
|
||||
// `define PLIC_UART_ID 4
|
||||
|
||||
// `define TWO_BIT_PRELOAD "../config/rv32ic/twoBitPredictor.txt"
|
||||
// `define BTB_PRELOAD "../config/rv32ic/BTBPredictor.txt"
|
||||
// `define BPRED_ENABLED 1
|
||||
// `define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE
|
||||
// `define TESTSBP 0
|
||||
//////////////////////////////////////////
|
||||
// wally-config.vh
|
||||
//
|
||||
|
3
wally-pipelined/regression/sim-wally-batch-rv32icfd
Executable file
3
wally-pipelined/regression/sim-wally-batch-rv32icfd
Executable file
@ -0,0 +1,3 @@
|
||||
vsim -c <<!
|
||||
do wally-pipelined-batch-rv32icfd.do ../config/rv32icfd rv32icfd
|
||||
!
|
1
wally-pipelined/regression/sim-wally-rv32icfd
Executable file
1
wally-pipelined/regression/sim-wally-rv32icfd
Executable file
@ -0,0 +1 @@
|
||||
vsim -do wally-pipelined-rv32icfd.do
|
42
wally-pipelined/regression/wally-pipelined-batch-rv32icfd.do
Normal file
42
wally-pipelined/regression/wally-pipelined-batch-rv32icfd.do
Normal file
@ -0,0 +1,42 @@
|
||||
# wally-pipelined-batch.do
|
||||
#
|
||||
# Modification by Oklahoma State University & Harvey Mudd College
|
||||
# Use with Testbench
|
||||
# James Stine, 2008; David Harris 2021
|
||||
# Go Cowboys!!!!!!
|
||||
#
|
||||
# Takes 1:10 to run RV64IC tests using gui
|
||||
|
||||
# Use this wally-pipelined-batch.do file to run this example.
|
||||
# Either bring up ModelSim and type the following at the "ModelSim>" prompt:
|
||||
# do wally-pipelined-batch.do
|
||||
# or, to run from a shell, type the following at the shell prompt:
|
||||
# vsim -do wally-pipelined-batch.do -c
|
||||
# (omit the "-c" to see the GUI while running from the shell)
|
||||
|
||||
onbreak {resume}
|
||||
|
||||
# create library
|
||||
if [file exists work_$2] {
|
||||
vdel -lib work_$2 -all
|
||||
}
|
||||
vlib work_$2
|
||||
|
||||
# compile source files
|
||||
# suppress spurious warnngs about
|
||||
# "Extra checking for conflicts with always_comb done at vopt time"
|
||||
# because vsim will run vopt
|
||||
|
||||
# default to config/rv64ic, but allow this to be overridden at the command line. For example:
|
||||
switch $argc {
|
||||
0 {vlog +incdir+../config/rv32icfd +incdir+../config/shared ../testbench/testbench-imperas.sv ../src/*/*.sv -suppress 2583}
|
||||
1 {vlog +incdir+$1 +incdir+../config/shared ../testbench/testbench-imperas.sv ../src/*/*.sv -suppress 2583}
|
||||
2 {vlog -work work_$2 +incdir+$1 +incdir+../config/shared ../testbench/testbench-imperas.sv ../src/*/*.sv -suppress 2583}
|
||||
}
|
||||
# start and run simulation
|
||||
# remove +acc flag for faster sim during regressions if there is no need to access internal signals
|
||||
vopt work_$2.testbench -work work_$2 -o workopt_$2
|
||||
vsim -lib work_$2 workopt_$2
|
||||
|
||||
run -all
|
||||
quit
|
50
wally-pipelined/regression/wally-pipelined-rv32icfd.do
Normal file
50
wally-pipelined/regression/wally-pipelined-rv32icfd.do
Normal file
@ -0,0 +1,50 @@
|
||||
# wally-pipelined.do
|
||||
#
|
||||
# Modification by Oklahoma State University & Harvey Mudd College
|
||||
# Use with Testbench
|
||||
# James Stine, 2008; David Harris 2021
|
||||
# Go Cowboys!!!!!!
|
||||
#
|
||||
# Takes 1:10 to run RV64IC tests using gui
|
||||
|
||||
# Use this wally-pipelined.do file to run this example.
|
||||
# Either bring up ModelSim and type the following at the "ModelSim>" prompt:
|
||||
# do wally-pipelined.do
|
||||
# or, to run from a shell, type the following at the shell prompt:
|
||||
# vsim -do wally-pipelined.do -c
|
||||
# (omit the "-c" to see the GUI while running from the shell)
|
||||
|
||||
onbreak {resume}
|
||||
|
||||
# create library
|
||||
if [file exists work] {
|
||||
vdel -all
|
||||
}
|
||||
vlib work
|
||||
|
||||
# compile source files
|
||||
# suppress spurious warnngs about
|
||||
# "Extra checking for conflicts with always_comb done at vopt time"
|
||||
# because vsim will run vopt
|
||||
|
||||
# default to config/rv64ic, but allow this to be overridden at the command line. For example:
|
||||
# do wally-pipelined.do ../config/rv32ic
|
||||
switch $argc {
|
||||
0 {vlog +incdir+../config/rv32icfd +incdir+../config/shared ../testbench/testbench-imperas.sv ../src/*/*.sv -suppress 2583}
|
||||
1 {vlog +incdir+$1 +incdir+../config/shared ../testbench/testbench-imperas.sv ../testbench/function_radix.sv ../src/*/*.sv -suppress 2583}
|
||||
}
|
||||
# start and run simulation
|
||||
# remove +acc flag for faster sim during regressions if there is no need to access internal signals
|
||||
vopt +acc work.testbench -o workopt
|
||||
vsim workopt
|
||||
|
||||
view wave
|
||||
-- display input and output signals as hexidecimal values
|
||||
do ./wave-dos/default-waves.do
|
||||
|
||||
-- Run the Simulation
|
||||
#run 5000
|
||||
run -all
|
||||
#quit
|
||||
noview ../testbench/testbench-imperas.sv
|
||||
view wave
|
@ -1,4 +1,3 @@
|
||||
`timescale 1ps/1ps
|
||||
module divconv (q1, qm1, qp1, q0, qm0, qp0, rega_out, regb_out, regc_out, regd_out,
|
||||
regr_out, d, n, sel_muxa, sel_muxb, sel_muxr, reset, clk, load_rega, load_regb,
|
||||
load_regc, load_regd, load_regr, load_regs, P, op_type, exp_odd);
|
||||
@ -106,123 +105,123 @@ module divconv (q1, qm1, qp1, q0, qm0, qp0, rega_out, regb_out, regc_out, regd_o
|
||||
|
||||
endmodule // divconv
|
||||
|
||||
module adder #(parameter WIDTH=8)
|
||||
(input logic [WIDTH-1:0] a, b,
|
||||
input logic cin,
|
||||
output logic [WIDTH-1:0] y,
|
||||
output logic cout);
|
||||
// module adder #(parameter WIDTH=8)
|
||||
// (input logic [WIDTH-1:0] a, b,
|
||||
// input logic cin,
|
||||
// output logic [WIDTH-1:0] y,
|
||||
// output logic cout);
|
||||
|
||||
assign {cout, y} = a + b + cin;
|
||||
// assign {cout, y} = a + b + cin;
|
||||
|
||||
endmodule // adder
|
||||
// endmodule // adder
|
||||
|
||||
module flopenr #(parameter WIDTH = 8)
|
||||
(input logic clk, reset, en,
|
||||
input logic [WIDTH-1:0] d,
|
||||
output logic [WIDTH-1:0] q);
|
||||
// module flopenr #(parameter WIDTH = 8)
|
||||
// (input logic clk, reset, en,
|
||||
// input logic [WIDTH-1:0] d,
|
||||
// output logic [WIDTH-1:0] q);
|
||||
|
||||
always_ff @(posedge clk, posedge reset)
|
||||
if (reset) q <= #10 0;
|
||||
else if (en) q <= #10 d;
|
||||
// always_ff @(posedge clk, posedge reset)
|
||||
// if (reset) q <= #10 0;
|
||||
// else if (en) q <= #10 d;
|
||||
|
||||
endmodule // flopenr
|
||||
// endmodule // flopenr
|
||||
|
||||
module flopr #(parameter WIDTH = 8)
|
||||
(input logic clk, reset,
|
||||
input logic [WIDTH-1:0] d,
|
||||
output logic [WIDTH-1:0] q);
|
||||
// module flopr #(parameter WIDTH = 8)
|
||||
// (input logic clk, reset,
|
||||
// input logic [WIDTH-1:0] d,
|
||||
// output logic [WIDTH-1:0] q);
|
||||
|
||||
always_ff @(posedge clk, posedge reset)
|
||||
if (reset) q <= #10 0;
|
||||
else q <= #10 d;
|
||||
// always_ff @(posedge clk, posedge reset)
|
||||
// if (reset) q <= #10 0;
|
||||
// else q <= #10 d;
|
||||
|
||||
endmodule // flopr
|
||||
// endmodule // flopr
|
||||
|
||||
module flopenrc #(parameter WIDTH = 8)
|
||||
(input logic clk, reset, en, clear,
|
||||
input logic [WIDTH-1:0] d,
|
||||
output logic [WIDTH-1:0] q);
|
||||
// module flopenrc #(parameter WIDTH = 8)
|
||||
// (input logic clk, reset, en, clear,
|
||||
// input logic [WIDTH-1:0] d,
|
||||
// output logic [WIDTH-1:0] q);
|
||||
|
||||
always_ff @(posedge clk, posedge reset)
|
||||
if (reset) q <= #10 0;
|
||||
else if (en)
|
||||
if (clear) q <= #10 0;
|
||||
else q <= #10 d;
|
||||
// always_ff @(posedge clk, posedge reset)
|
||||
// if (reset) q <= #10 0;
|
||||
// else if (en)
|
||||
// if (clear) q <= #10 0;
|
||||
// else q <= #10 d;
|
||||
|
||||
endmodule // flopenrc
|
||||
// endmodule // flopenrc
|
||||
|
||||
module floprc #(parameter WIDTH = 8)
|
||||
(input logic clk, reset, clear,
|
||||
input logic [WIDTH-1:0] d,
|
||||
output logic [WIDTH-1:0] q);
|
||||
// module floprc #(parameter WIDTH = 8)
|
||||
// (input logic clk, reset, clear,
|
||||
// input logic [WIDTH-1:0] d,
|
||||
// output logic [WIDTH-1:0] q);
|
||||
|
||||
always_ff @(posedge clk, posedge reset)
|
||||
if (reset) q <= #10 0;
|
||||
else
|
||||
if (clear) q <= #10 0;
|
||||
else q <= #10 d;
|
||||
// always_ff @(posedge clk, posedge reset)
|
||||
// if (reset) q <= #10 0;
|
||||
// else
|
||||
// if (clear) q <= #10 0;
|
||||
// else q <= #10 d;
|
||||
|
||||
endmodule // floprc
|
||||
// endmodule // floprc
|
||||
|
||||
module mux2 #(parameter WIDTH = 8)
|
||||
(input logic [WIDTH-1:0] d0, d1,
|
||||
input logic s,
|
||||
output logic [WIDTH-1:0] y);
|
||||
// module mux2 #(parameter WIDTH = 8)
|
||||
// (input logic [WIDTH-1:0] d0, d1,
|
||||
// input logic s,
|
||||
// output logic [WIDTH-1:0] y);
|
||||
|
||||
assign y = s ? d1 : d0;
|
||||
// assign y = s ? d1 : d0;
|
||||
|
||||
endmodule // mux2
|
||||
// endmodule // mux2
|
||||
|
||||
module mux3 #(parameter WIDTH = 8)
|
||||
(input logic [WIDTH-1:0] d0, d1, d2,
|
||||
input logic [1:0] s,
|
||||
output logic [WIDTH-1:0] y);
|
||||
// module mux3 #(parameter WIDTH = 8)
|
||||
// (input logic [WIDTH-1:0] d0, d1, d2,
|
||||
// input logic [1:0] s,
|
||||
// output logic [WIDTH-1:0] y);
|
||||
|
||||
assign y = s[1] ? d2 : (s[0] ? d1 : d0);
|
||||
// assign y = s[1] ? d2 : (s[0] ? d1 : d0);
|
||||
|
||||
endmodule // mux3
|
||||
// endmodule // mux3
|
||||
|
||||
module mux4 #(parameter WIDTH = 8)
|
||||
(input logic [WIDTH-1:0] d0, d1, d2, d3,
|
||||
input logic [1:0] s,
|
||||
output logic [WIDTH-1:0] y);
|
||||
// module mux4 #(parameter WIDTH = 8)
|
||||
// (input logic [WIDTH-1:0] d0, d1, d2, d3,
|
||||
// input logic [1:0] s,
|
||||
// output logic [WIDTH-1:0] y);
|
||||
|
||||
assign y = s[1] ? (s[0] ? d3 : d2) : (s[0] ? d1 : d0);
|
||||
// assign y = s[1] ? (s[0] ? d3 : d2) : (s[0] ? d1 : d0);
|
||||
|
||||
endmodule // mux4
|
||||
// endmodule // mux4
|
||||
|
||||
module mux5 #(parameter WIDTH = 8)
|
||||
(input logic [WIDTH-1:0] d0, d1, d2, d3, d4,
|
||||
input logic [2:0] s,
|
||||
output logic [WIDTH-1:0] y);
|
||||
// module mux5 #(parameter WIDTH = 8)
|
||||
// (input logic [WIDTH-1:0] d0, d1, d2, d3, d4,
|
||||
// input logic [2:0] s,
|
||||
// output logic [WIDTH-1:0] y);
|
||||
|
||||
always_comb
|
||||
casez (s)
|
||||
3'b000 : y = d0;
|
||||
3'b001 : y = d1;
|
||||
3'b010 : y = d2;
|
||||
3'b011 : y = d3;
|
||||
3'b1?? : y = d4;
|
||||
endcase // casez (s)
|
||||
// always_comb
|
||||
// casez (s)
|
||||
// 3'b000 : y = d0;
|
||||
// 3'b001 : y = d1;
|
||||
// 3'b010 : y = d2;
|
||||
// 3'b011 : y = d3;
|
||||
// 3'b1?? : y = d4;
|
||||
// endcase // casez (s)
|
||||
|
||||
endmodule // mux5
|
||||
// endmodule // mux5
|
||||
|
||||
module mux6 #(parameter WIDTH = 8)
|
||||
(input logic [WIDTH-1:0] d0, d1, d2, d3, d4, d5,
|
||||
input logic [2:0] s,
|
||||
output logic [WIDTH-1:0] y);
|
||||
// module mux6 #(parameter WIDTH = 8)
|
||||
// (input logic [WIDTH-1:0] d0, d1, d2, d3, d4, d5,
|
||||
// input logic [2:0] s,
|
||||
// output logic [WIDTH-1:0] y);
|
||||
|
||||
always_comb
|
||||
casez (s)
|
||||
3'b000 : y = d0;
|
||||
3'b001 : y = d1;
|
||||
3'b010 : y = d2;
|
||||
3'b011 : y = d3;
|
||||
3'b10? : y = d4;
|
||||
3'b11? : y = d5;
|
||||
endcase // casez (s)
|
||||
// always_comb
|
||||
// casez (s)
|
||||
// 3'b000 : y = d0;
|
||||
// 3'b001 : y = d1;
|
||||
// 3'b010 : y = d2;
|
||||
// 3'b011 : y = d3;
|
||||
// 3'b10? : y = d4;
|
||||
// 3'b11? : y = d5;
|
||||
// endcase // casez (s)
|
||||
|
||||
endmodule // mux6
|
||||
// endmodule // mux6
|
||||
|
||||
module eqcmp #(parameter WIDTH = 8)
|
||||
(input logic [WIDTH-1:0] a, b,
|
||||
@ -232,25 +231,25 @@ module eqcmp #(parameter WIDTH = 8)
|
||||
|
||||
endmodule // eqcmp
|
||||
|
||||
module fa (input logic a, b, c, output logic sum, carry);
|
||||
// module fa (input logic a, b, c, output logic sum, carry);
|
||||
|
||||
assign sum = a^b^c;
|
||||
assign carry = a&b|a&c|b&c;
|
||||
// assign sum = a^b^c;
|
||||
// assign carry = a&b|a&c|b&c;
|
||||
|
||||
endmodule // fa
|
||||
// endmodule // fa
|
||||
|
||||
module csa #(parameter WIDTH=8)
|
||||
(input logic [WIDTH-1:0] a, b, c,
|
||||
output logic [WIDTH-1:0] sum, carry);
|
||||
// module csa #(parameter WIDTH=8)
|
||||
// (input logic [WIDTH-1:0] a, b, c,
|
||||
// output logic [WIDTH-1:0] sum, carry);
|
||||
|
||||
logic [WIDTH:0] carry_temp;
|
||||
genvar i;
|
||||
generate
|
||||
for (i=0;i<WIDTH;i=i+1)
|
||||
begin : genbit
|
||||
fa fa_inst (a[i], b[i], c[i], sum[i], carry_temp[i+1]);
|
||||
end
|
||||
endgenerate
|
||||
assign carry = {1'b0, carry_temp[WIDTH-1:1], 1'b0};
|
||||
// logic [WIDTH:0] carry_temp;
|
||||
// genvar i;
|
||||
// generate
|
||||
// for (i=0;i<WIDTH;i=i+1)
|
||||
// begin : genbit
|
||||
// fa fa_inst (a[i], b[i], c[i], sum[i], carry_temp[i+1]);
|
||||
// end
|
||||
// endgenerate
|
||||
// assign carry = {1'b0, carry_temp[WIDTH-1:1], 1'b0};
|
||||
|
||||
endmodule // csa
|
||||
// endmodule // csa
|
||||
|
@ -31,8 +31,8 @@ module faddcvt(
|
||||
input logic reset,
|
||||
input logic FlushM,
|
||||
input logic StallM,
|
||||
input logic [63:0] SrcXE, // 1st input operand (A)
|
||||
input logic [63:0] SrcYE, // 2nd input operand (B)
|
||||
input logic [63:0] FSrcXE, // 1st input operand (A)
|
||||
input logic [63:0] FSrcYE, // 2nd input operand (B)
|
||||
input logic [3:0] FOpCtrlE, FOpCtrlM, // Function opcode
|
||||
input logic FmtE, FmtM, // Result Precision (0 for double, 1 for single)
|
||||
input logic [2:0] FrmM, // Rounding mode - specify values
|
||||
@ -59,7 +59,7 @@ module faddcvt(
|
||||
logic [10:0] AddExponentE, AddExponentM;
|
||||
|
||||
|
||||
fpuaddcvt1 fpadd1 (.SrcXE, .SrcYE, .FOpCtrlE, .FmtE, .AddFloat1E, .AddFloat2E, .AddExponentE,
|
||||
fpuaddcvt1 fpadd1 (.FSrcXE, .FSrcYE, .FOpCtrlE, .FmtE, .AddFloat1E, .AddFloat2E, .AddExponentE,
|
||||
.AddExpPostSumE, .AddExp1DenormE, .AddExp2DenormE, .AddSumE, .AddSumTcE, .AddSelInvE,
|
||||
.AddCorrSignE, .AddSignAE, .AddOp1NormE, .AddOp2NormE, .AddOpANormE, .AddOpBNormE, .AddInvalidE,
|
||||
.AddDenormInE, .AddConvertE, .AddSwapE, .AddNormOvflowE);
|
||||
@ -83,10 +83,10 @@ module faddcvt(
|
||||
.AddSignAM, .AddCorrSignM, .AddConvertM, .AddSwapM, .FAddResM, .FAddFlgM);
|
||||
endmodule
|
||||
|
||||
module fpuaddcvt1 (AddSumE, AddSumTcE, AddSelInvE, AddExpPostSumE, AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE, AddDenormInE, AddConvertE, AddSwapE, AddNormOvflowE, AddSignAE, AddFloat1E, AddFloat2E, AddExp1DenormE, AddExp2DenormE, AddExponentE, SrcXE, SrcYE, FOpCtrlE, FmtE);
|
||||
module fpuaddcvt1 (AddSumE, AddSumTcE, AddSelInvE, AddExpPostSumE, AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE, AddDenormInE, AddConvertE, AddSwapE, AddNormOvflowE, AddSignAE, AddFloat1E, AddFloat2E, AddExp1DenormE, AddExp2DenormE, AddExponentE, FSrcXE, FSrcYE, FOpCtrlE, FmtE);
|
||||
|
||||
input logic [63:0] SrcXE; // 1st input operand (A)
|
||||
input logic [63:0] SrcYE; // 2nd input operand (B)
|
||||
input logic [63:0] FSrcXE; // 1st input operand (A)
|
||||
input logic [63:0] FSrcYE; // 2nd input operand (B)
|
||||
input logic [3:0] FOpCtrlE; // Function opcode
|
||||
input logic FmtE; // Result Precision (1 for double, 0 for single)
|
||||
|
||||
@ -137,12 +137,12 @@ module fpuaddcvt1 (AddSumE, AddSumTcE, AddSelInvE, AddExpPostSumE, AddCorrSignE,
|
||||
// and the sign of the first operand is set appropratiately based on
|
||||
// if the operation is absolute value or negation.
|
||||
|
||||
convert_inputs conv1 (AddFloat1E, AddFloat2E, SrcXE, SrcYE, FOpCtrlE, P);
|
||||
convert_inputs conv1 (AddFloat1E, AddFloat2E, FSrcXE, FSrcYE, FOpCtrlE, P);
|
||||
|
||||
// Test for exceptions and return the "Invalid Operation" and
|
||||
// "Denormalized" Input Flags. The "AddSelInvE" is used in
|
||||
// the third pipeline stage to select the result. Also, AddOp1NormE
|
||||
// and AddOp2NormE are one if SrcXE and SrcYE are not zero or denormalized.
|
||||
// and AddOp2NormE are one if FSrcXE and FSrcYE are not zero or denormalized.
|
||||
// sub is one if the effective operation is subtaction.
|
||||
|
||||
exception exc1 (AddSelInvE, AddInvalidE, AddDenormInE, AddOp1NormE, AddOp2NormE, sub,
|
||||
@ -215,8 +215,8 @@ module fpuaddcvt1 (AddSumE, AddSumTcE, AddSelInvE, AddExpPostSumE, AddCorrSignE,
|
||||
|
||||
// Place either the sign-extened 32-bit value or the original 64-bit value
|
||||
// into IntValue (to be used for integer to floating point conversion)
|
||||
// assign IntValue [31:0] = SrcXE[31:0];
|
||||
// assign IntValue [63:32] = FOpCtrlE[0] ? {32{SrcXE[31]}} : SrcXE[63:32];
|
||||
// assign IntValue [31:0] = FSrcXE[31:0];
|
||||
// assign IntValue [63:32] = FOpCtrlE[0] ? {32{FSrcXE[31]}} : FSrcXE[63:32];
|
||||
|
||||
// If doing an integer to floating point conversion, mantissaA3 is set to
|
||||
// IntVal and the prenomalized exponent is set to 1084. Otherwise,
|
||||
|
@ -2,45 +2,52 @@
|
||||
`include "wally-config.vh"
|
||||
|
||||
module fclassify (
|
||||
input logic [63:0] SrcXE,
|
||||
input logic FmtE, // 0-Single 1-Double
|
||||
input logic XSgnE,
|
||||
input logic [51:0] XFracE,
|
||||
input logic XNaNE,
|
||||
input logic XSNaNE,
|
||||
input logic XNormE,
|
||||
input logic XDenormE,
|
||||
input logic XZeroE,
|
||||
input logic XInfE,
|
||||
// input logic FmtE, // 0-Single 1-Double
|
||||
output logic [63:0] ClassResE
|
||||
);
|
||||
|
||||
logic Sgn;
|
||||
logic Inf, NaN, Zero, Norm, Denorm;
|
||||
logic PInf, QNaN, PZero, PNorm, PDenorm;
|
||||
logic NInf, SNaN, NZero, NNorm, NDenorm;
|
||||
logic MaxExp, ExpZero, ManZero, FirstBitFrac;
|
||||
// logic XSgnE;
|
||||
// logic Inf, NaN, Zero, Norm, Denorm;
|
||||
logic PInf, PZero, PNorm, PDenorm;
|
||||
logic NInf, NZero, NNorm, NDenorm;
|
||||
// logic MaxExp, ExpZero, ManZero, FirstBitFrac;
|
||||
|
||||
// Single and Double precision layouts
|
||||
assign Sgn = FmtE ? SrcXE[63] : SrcXE[31];
|
||||
// assign XSgnE = FmtE ? FSrcXE[63] : FSrcXE[31];
|
||||
|
||||
// basic calculations for readabillity
|
||||
|
||||
assign ExpZero = FmtE ? ~|SrcXE[62:52] : ~|SrcXE[30:23];
|
||||
assign MaxExp = FmtE ? &SrcXE[62:52] : &SrcXE[30:23];
|
||||
assign ManZero = FmtE ? ~|SrcXE[51:0] : ~|SrcXE[22:0];
|
||||
assign FirstBitFrac = FmtE ? SrcXE[51] : SrcXE[22];
|
||||
// assign ExpZero = FmtE ? ~|FSrcXE[62:52] : ~|FSrcXE[30:23];
|
||||
// assign MaxExp = FmtE ? &FSrcXE[62:52] : &FSrcXE[30:23];
|
||||
// assign ManZero = FmtE ? ~|FSrcXE[51:0] : ~|FSrcXE[22:0];
|
||||
// assign FirstBitFrac = FmtE ? FSrcXE[51] : FSrcXE[22];
|
||||
|
||||
// determine the type of number
|
||||
assign NaN = MaxExp & ~ManZero;
|
||||
assign Inf = MaxExp & ManZero;
|
||||
assign Zero = ExpZero & ManZero;
|
||||
assign Denorm= ExpZero & ~ManZero;
|
||||
assign Norm = ~ExpZero;
|
||||
// assign NaN = MaxExp & ~ManZero;
|
||||
// assign Inf = MaxExp & ManZero;
|
||||
// assign Zero = ExpZero & ManZero;
|
||||
// assign Denorm= ExpZero & ~ManZero;
|
||||
// assign Norm = ~ExpZero;
|
||||
|
||||
// determine the sub categories
|
||||
assign QNaN = FirstBitFrac&NaN;
|
||||
assign SNaN = ~FirstBitFrac&NaN;
|
||||
assign PInf = ~Sgn&Inf;
|
||||
assign NInf = Sgn&Inf;
|
||||
assign PNorm = ~Sgn&Norm;
|
||||
assign NNorm = Sgn&Norm;
|
||||
assign PDenorm = ~Sgn&Denorm;
|
||||
assign NDenorm = Sgn&Denorm;
|
||||
assign PZero = ~Sgn&Zero;
|
||||
assign NZero = Sgn&Zero;
|
||||
// assign QNaN = FirstBitFrac&NaN;
|
||||
// assign SNaN = ~FirstBitFrac&NaN;
|
||||
assign PInf = ~XSgnE&XInfE;
|
||||
assign NInf = XSgnE&XInfE;
|
||||
assign PNorm = ~XSgnE&XNormE;
|
||||
assign NNorm = XSgnE&XNormE;
|
||||
assign PDenorm = ~XSgnE&XDenormE;
|
||||
assign NDenorm = XSgnE&XDenormE;
|
||||
assign PZero = ~XSgnE&XZeroE;
|
||||
assign NZero = XSgnE&XZeroE;
|
||||
|
||||
// determine sub category and combine into the result
|
||||
// bit 0 - -Inf
|
||||
@ -53,6 +60,6 @@ module fclassify (
|
||||
// bit 7 - +Inf
|
||||
// bit 8 - signaling NaN
|
||||
// bit 9 - quiet NaN
|
||||
assign ClassResE = {{54{1'b0}}, QNaN, SNaN, PInf, PNorm, PDenorm, PZero, NZero, NDenorm, NNorm, NInf};
|
||||
assign ClassResE = {{54{1'b0}}, XNaNE&~XSNaNE, XSNaNE, PInf, PNorm, PDenorm, PZero, NZero, NDenorm, NNorm, NInf};
|
||||
|
||||
endmodule
|
||||
|
@ -42,31 +42,32 @@
|
||||
module fcmp (
|
||||
input logic [63:0] op1,
|
||||
input logic [63:0] op2,
|
||||
input logic XNaNE, YNaNE,
|
||||
input logic XZeroE, YZeroE,
|
||||
input logic [63:0] FSrcXE,
|
||||
input logic [63:0] FSrcYE,
|
||||
input logic [2:0] FOpCtrlE,
|
||||
input logic FmtE,
|
||||
|
||||
|
||||
output logic Invalid, // Invalid Operation
|
||||
// output logic [1:0] FCC, // Condition Codes
|
||||
output logic [63:0] CmpResE);
|
||||
|
||||
// Perform magnitude comparison between the 63 least signficant bits
|
||||
// of the input operands. Only LT and EQ are returned, since GT can
|
||||
// be determined from these values.
|
||||
logic [1:0] FCC; // Condition Codes
|
||||
logic [7:0] w, x;
|
||||
logic ANaN, BNaN;
|
||||
logic Azero, Bzero;
|
||||
// logic ANaN, BNaN;
|
||||
// logic Azero, Bzero;
|
||||
logic LT; // magnitude op1 < magnitude op2
|
||||
logic EQ; // magnitude op1 = magnitude op2
|
||||
logic [63:0] PosOp1, PosOp2;
|
||||
|
||||
assign PosOp1 = FmtE ? {~op1[63], op1[62:0]} : {~op1[31], op1[30:0], 32'b0};
|
||||
assign PosOp2 = FmtE ? {~op2[63], op2[62:0]} : {~op2[31], op2[30:0], 32'b0};
|
||||
magcompare64b_1 magcomp1 (w, x, PosOp1, PosOp2);
|
||||
|
||||
|
||||
magcompare64b_1 magcomp1 (w, x, {~op1[63], op1[62:0]}, {~op2[63], op2[62:0]});
|
||||
|
||||
// Determine final values based on output of magnitude comparison,
|
||||
// sign bits, and special case testing.
|
||||
exception_cmp_1 exc1 (ANaN, BNaN, Azero, Bzero, op1, op2, FOpCtrlE);
|
||||
|
||||
// Perform magnitude comparison between the 63 least signficant bits
|
||||
// of the input operands. Only LT and EQ are returned, since GT can
|
||||
@ -75,24 +76,10 @@ module fcmp (
|
||||
|
||||
// Determine final values based on output of magnitude comparison,
|
||||
// sign bits, and special case testing.
|
||||
exception_cmp_2 exc2 (.invalid(Invalid), .fcc(FCC), .LT_mag(LT), .EQ_mag(EQ), .ANaN(ANaN), .BNaN(BNaN), .Azero(Azero), .Bzero(Bzero), .FOpCtrlE(FOpCtrlE), .A(op1), .B(op2), .*);
|
||||
exception_cmp_2 exc2 (.invalid(Invalid), .fcc(FCC), .LT_mag(LT), .EQ_mag(EQ), .ANaN(XNaNE), .BNaN(YNaNE), .Azero(XZeroE), .Bzero(YZeroE), .FOpCtrlE(FOpCtrlE), .A(op1), .B(op2), .FSrcXE, .FSrcYE, .*);
|
||||
|
||||
endmodule // fpcomp
|
||||
|
||||
// module magcompare2b (LT, GT, A, B);
|
||||
|
||||
// input logic [1:0] A;
|
||||
// input logic [1:0] B;
|
||||
|
||||
// output logic LT;
|
||||
// output logic GT;
|
||||
|
||||
// // Determine if A < B using a minimized sum-of-products expression
|
||||
// assign LT = ~A[1]&B[1] | ~A[1]&~A[0]&B[0] | ~A[0]&B[1]&B[0];
|
||||
// // Determine if A > B using a minimized sum-of-products expression
|
||||
// assign GT = A[1]&~B[1] | A[1]&A[0]&~B[0] | A[0]&~B[1]&~B[0];
|
||||
|
||||
// endmodule // magcompare2b
|
||||
|
||||
// 2-bit magnitude comparator
|
||||
// This module compares two 2-bit values A and B. LT is '1' if A < B
|
||||
@ -198,135 +185,6 @@ module magcompare64b_1 (w, x, A, B);
|
||||
|
||||
endmodule // magcompare64b
|
||||
|
||||
// This module takes 64-bits inputs A and B, two magnitude comparison
|
||||
// flags LT_mag and EQ_mag, and a 2-bit signal FOpCtrlE that indicates the type of
|
||||
// operands being compared as indicated below.
|
||||
// FOpCtrlE Description
|
||||
// 00 double precision numbers
|
||||
// 01 single precision numbers
|
||||
// 10 half precision numbers
|
||||
// 11 bfloat precision numbers
|
||||
//
|
||||
// The comparator produces a 2-bit signal fcc, which
|
||||
// indicates the result of the comparison as follows:
|
||||
// fcc decscription
|
||||
// 00 A = B
|
||||
// 01 A < B
|
||||
// 10 A > B
|
||||
// 11 A and B are unordered (i.e., A or B is NaN)
|
||||
// It also produces a invalid operation flag, which is one
|
||||
// if either of the input operands is a signaling NaN.
|
||||
|
||||
module exception_cmp_1 (ANaN, BNaN, Azero, Bzero, A, B, FOpCtrlE);
|
||||
|
||||
input logic [63:0] A;
|
||||
input logic [63:0] B;
|
||||
input logic [2:0] FOpCtrlE;
|
||||
|
||||
logic dp, sp, hp;
|
||||
|
||||
output logic ANaN;
|
||||
output logic BNaN;
|
||||
output logic Azero;
|
||||
output logic Bzero;
|
||||
|
||||
assign dp = !FOpCtrlE[1]&!FOpCtrlE[0];
|
||||
assign sp = !FOpCtrlE[1]&FOpCtrlE[0];
|
||||
assign hp = FOpCtrlE[1]&!FOpCtrlE[0];
|
||||
|
||||
// Test if A or B is NaN.
|
||||
assign ANaN = (A[62]&A[61]&A[60]&A[59]&A[58]) &
|
||||
((sp&A[57]&A[56]&A[55]&(A[54]|A[53])) |
|
||||
(dp&A[57]&A[56]&A[55]&A[54]&A[53]&A[52]&(A[51]|A[50])) |
|
||||
(hp&(A[57]|A[56])));
|
||||
|
||||
assign BNaN = (B[62]&B[61]&B[60]&B[59]&B[58]) &
|
||||
((sp&B[57]&B[56]&B[55]&(B[54]|B[53])) |
|
||||
(dp&B[57]&B[56]&B[55]&B[54]&B[53]&B[52]&(B[51]|B[50])) |
|
||||
(hp&(B[57]|B[56])));
|
||||
|
||||
// Test if A is +0 or -0 when viewed as a floating point number (i.e,
|
||||
// the 63 least siginficant bits of A are zero).
|
||||
// Depending on how this synthesizes, it may work better to replace
|
||||
// this with assign Azero = ~(A[62] | A[61] | ... | A[0])
|
||||
assign Azero = (A[62:0] == 63'h0);
|
||||
assign Bzero = (B[62:0] == 63'h0);
|
||||
|
||||
endmodule // exception_cmp
|
||||
//
|
||||
// File name : fpcomp.v
|
||||
// Title : Floating-Point Comparator
|
||||
// project : FPU
|
||||
// Library : fpcomp
|
||||
// Author(s) : James E. Stine
|
||||
// Purpose : definition of main unit to floating-point comparator
|
||||
// notes :
|
||||
//
|
||||
// Copyright Oklahoma State University
|
||||
//
|
||||
// Floating Point Comparator (Algorithm)
|
||||
//
|
||||
// 1.) Performs sign-extension if the inputs are 32-bit integers.
|
||||
// 2.) Perform a magnitude comparison on the lower 63 bits of the inputs
|
||||
// 3.) Check for special cases (+0=-0, unordered, and infinite values)
|
||||
// and correct for sign bits
|
||||
//
|
||||
// This module takes 64-bits inputs op1 and op2, VSS, and VDD
|
||||
// signals, and a 2-bit signal FOpCtrlE that indicates the type of
|
||||
// operands being compared as indicated below.
|
||||
// FOpCtrlE Description
|
||||
// 00 double precision numbers
|
||||
// 01 single precision numbers
|
||||
// 10 half precision numbers
|
||||
// 11 (unused)
|
||||
//
|
||||
// The comparator produces a 2-bit signal FCC, which
|
||||
// indicates the result of the comparison:
|
||||
//
|
||||
// fcc decscription
|
||||
// 00 A = B
|
||||
// 01 A < B
|
||||
// 10 A > B
|
||||
// 11 A and B are unordered (i.e., A or B is NaN)
|
||||
//
|
||||
// It also produces an invalid operation flag, which is one
|
||||
// if either of the input operands is a signaling NaN per 754
|
||||
|
||||
|
||||
/*module magcompare2b (LT, GT, A, B);
|
||||
|
||||
input logic [1:0] A;
|
||||
input logic [1:0] B;
|
||||
|
||||
output logic LT;
|
||||
output logic GT;
|
||||
|
||||
// Determine if A < B using a minimized sum-of-products expression
|
||||
assign LT = ~A[1]&B[1] | ~A[1]&~A[0]&B[0] | ~A[0]&B[1]&B[0];
|
||||
// Determine if A > B using a minimized sum-of-products expression
|
||||
assign GT = A[1]&~B[1] | A[1]&A[0]&~B[0] | A[0]&~B[1]&~B[0];
|
||||
|
||||
endmodule*/ // magcompare2b
|
||||
|
||||
// 2-bit magnitude comparator
|
||||
// This module compares two 2-bit values A and B. LT is '1' if A < B
|
||||
// and GT is '1'if A > B. LT and GT are both '0' if A = B. However,
|
||||
// this version actually incorporates don't cares into the equation to
|
||||
// simplify the optimization
|
||||
|
||||
// module magcompare2c (LT, GT, A, B);
|
||||
|
||||
// input logic [1:0] A;
|
||||
// input logic [1:0] B;
|
||||
|
||||
// output logic LT;
|
||||
// output logic GT;
|
||||
|
||||
// assign LT = B[1] | (!A[1]&B[0]);
|
||||
// assign GT = A[1] | (!B[1]&A[0]);
|
||||
|
||||
// endmodule // magcompare2b
|
||||
|
||||
// This module compares two 64-bit values A and B. LT is '1' if A < B
|
||||
// and EQ is '1'if A = B. LT and GT are both '0' if A > B.
|
||||
// This structure was modified so
|
||||
@ -388,6 +246,8 @@ endmodule // magcompare64b
|
||||
module exception_cmp_2 (
|
||||
input logic [63:0] A,
|
||||
input logic [63:0] B,
|
||||
input logic [63:0] FSrcXE,
|
||||
input logic [63:0] FSrcYE,
|
||||
input logic FmtE,
|
||||
input logic LT_mag,
|
||||
input logic EQ_mag,
|
||||
@ -456,8 +316,8 @@ module exception_cmp_2 (
|
||||
|
||||
always_comb begin
|
||||
case (FOpCtrlE[2:0])
|
||||
3'b111: CmpResE = LT ? A : B;//min
|
||||
3'b101: CmpResE = GT ? A : B;//max
|
||||
3'b111: CmpResE = LT ? FSrcXE : FSrcYE;//min
|
||||
3'b101: CmpResE = GT ? FSrcXE : FSrcYE;//max
|
||||
3'b010: CmpResE = {63'b0, EQ};//equal
|
||||
3'b001: CmpResE = {63'b0, LT};//less than
|
||||
3'b011: CmpResE = {63'b0, LT|EQ};//less than or equal
|
||||
|
@ -1,7 +1,15 @@
|
||||
|
||||
`include "wally-config.vh"
|
||||
module fcvt (
|
||||
input logic [63:0] X, // floating point input
|
||||
input logic XSgnE,
|
||||
input logic [10:0] XExpE,
|
||||
input logic [51:0] XFracE,
|
||||
input logic XAssumed1E,
|
||||
input logic XZeroE,
|
||||
input logic XNaNE,
|
||||
input logic XInfE,
|
||||
input logic XDenormE,
|
||||
input logic [10:0] BiasE,
|
||||
input logic [`XLEN-1:0] SrcAE, // integer input
|
||||
input logic [3:0] FOpCtrlE, // chooses which instruction is done (full list below)
|
||||
input logic [2:0] FrmE, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
|
||||
@ -9,15 +17,10 @@ module fcvt (
|
||||
output logic [63:0] CvtResE, // convert final result
|
||||
output logic [4:0] CvtFlgE); // convert flags {invalid, divide by zero, overflow, underflow, inexact}
|
||||
|
||||
logic XSgn; // FP input's sign
|
||||
logic [10:0] XExp; // FP input's exponent
|
||||
logic [51:0] XFrac; // FP input's fraction
|
||||
logic ResSgn; // FP result's sign
|
||||
logic [10:0] ResExp,TmpExp; // FP result's exponent
|
||||
logic [51:0] ResFrac; // FP result's fraction
|
||||
logic [5:0] LZResP; // lz output
|
||||
// logic LZResV;
|
||||
logic [11:0] Bias; // 1023 for double, 127 for single
|
||||
logic [7:0] Bits; // how many bits are in the integer result
|
||||
logic [7:0] SubBits; // subtract these bits from the exponent (FP result)
|
||||
logic [64+51:0] ShiftedManTmp; // Shifted mantissa
|
||||
@ -31,11 +34,7 @@ module fcvt (
|
||||
logic [64-1:0] PosInt; // absolute value of the integer input
|
||||
logic [63:0] CvtIntRes; // interger result from the fp -> int instructions
|
||||
logic [63:0] CvtFPRes; // floating point result from the int -> fp instructions
|
||||
logic XFracZero; // is the fraction of X zero?
|
||||
logic Of, Uf; // did the integer result underflow or overflow
|
||||
logic XExpZero; // is X's exponent zero
|
||||
logic XExpMax; // is the exponent all ones
|
||||
logic XNaN, XDenorm, XInf, XZero; // is X a special value
|
||||
logic Guard, Round, LSB, Sticky; // bits used to determine rounding
|
||||
logic Plus1,CalcPlus1; // do you add one for rounding
|
||||
logic SgnRes; // sign of the floating point result
|
||||
@ -62,31 +61,15 @@ module fcvt (
|
||||
// fcvt.d.lu = 1101
|
||||
// {long, unsigned, to int, from int}
|
||||
|
||||
// split the input into it's various parts
|
||||
assign XSgn = FmtE ? X[63] : X[31];
|
||||
assign XExp = FmtE ? X[62:52] : {3'b0, X[30:23]};
|
||||
assign XFrac = FmtE ? X[51:0] : {X[23:0], 29'b0};
|
||||
|
||||
// determine if the exponent and fraction are all zero or ones
|
||||
assign XExpZero = ~|XExp;
|
||||
assign XFracZero = ~|XFrac;
|
||||
assign XExpMax = FmtE ? &XExp[10:0] : &XExp[7:0];
|
||||
|
||||
// determine if X is a special value
|
||||
assign XNaN = XExpMax & ~XFracZero;
|
||||
assign XDenorm = XExpZero & ~XFracZero;
|
||||
assign XInf = XExpMax & XFracZero;
|
||||
assign XZero = XExpZero & XFracZero;
|
||||
|
||||
// calculate signals based off the input and output's size
|
||||
assign Bias = FmtE ? 12'h3ff : 12'h7f;
|
||||
// assign Bias = FmtE ? 12'h3ff : 12'h7f;
|
||||
assign Res64 = ((FOpCtrlE==4'b1010 || FOpCtrlE==4'b1110) | (FmtE&(FOpCtrlE==4'b0001 | FOpCtrlE==4'b0101 | FOpCtrlE==4'b0000 | FOpCtrlE==4'b1001 | FOpCtrlE==4'b1101)));
|
||||
assign In64 = ((FOpCtrlE==4'b1001 || FOpCtrlE==4'b1101) | (FmtE&(FOpCtrlE==4'b0010 | FOpCtrlE==4'b0110 | FOpCtrlE==4'b1010 | FOpCtrlE==4'b1110) | (FOpCtrlE==4'b1101 & ~FmtE)));
|
||||
assign SubBits = In64 ? 8'd64 : 8'd32;
|
||||
assign Bits = Res64 ? 8'd64 : 8'd32;
|
||||
|
||||
// calulate the unbiased exponent
|
||||
assign ExpVal = XExp - Bias + XDenorm;
|
||||
assign ExpVal = XExpE - BiasE + XDenormE;
|
||||
|
||||
////////////////////////////////////////////////////////
|
||||
|
||||
@ -97,11 +80,10 @@ module fcvt (
|
||||
// determine the integer's sign
|
||||
assign ResSgn = ~FOpCtrlE[2] ? IntIn[64-1] : 1'b0;
|
||||
|
||||
// This did not work \/
|
||||
// generate
|
||||
// if(64 == 64)
|
||||
// if(`XLEN == 64)
|
||||
// lz64 lz(LZResP, LZResV, PosInt);
|
||||
// else if(64 == 32) begin
|
||||
// else if(`XLEN == 32) begin
|
||||
// assign LZResP[5] = 1'b0;
|
||||
// lz32 lz(LZResP[4:0], LZResV, PosInt);
|
||||
// end
|
||||
@ -111,12 +93,12 @@ module fcvt (
|
||||
logic [8:0] i;
|
||||
always_comb begin
|
||||
i = 0;
|
||||
while (~PosInt[64-1-i] && i <= `XLEN) i = i+1; // search for leading one
|
||||
while (~PosInt[64-1-i] && i < `XLEN) i = i+1; // search for leading one
|
||||
LZResP = i+1; // compute shift count
|
||||
end
|
||||
|
||||
// if no one was found set to zero otherwise calculate the exponent
|
||||
assign TmpExp = i==`XLEN ? 0 : Bias + SubBits - LZResP;
|
||||
assign TmpExp = i==`XLEN ? 0 : BiasE + SubBits - LZResP;
|
||||
|
||||
|
||||
|
||||
@ -126,12 +108,12 @@ module fcvt (
|
||||
|
||||
// select the shift value and amount based on operation (to fp or int)
|
||||
assign ShiftCnt = FOpCtrlE[1] ? ExpVal : LZResP;
|
||||
assign ShiftVal = FOpCtrlE[1] ? {{64-2{1'b0}}, ~(XDenorm|XZero), XFrac} : {PosInt, 52'b0};
|
||||
assign ShiftVal = FOpCtrlE[1] ? {{64-2{1'b0}}, XAssumed1E, XFracE} : {PosInt, 52'b0};
|
||||
|
||||
// if shift = -1 then shift one bit right for gaurd bit (right shifting twice never rounds)
|
||||
// if the shift is negitive add a bit for sticky bit calculation
|
||||
// otherwise shift left
|
||||
assign ShiftedManTmp = &ShiftCnt ? {{64-1{1'b0}}, ~(XDenorm|XZero), XFrac[51:1]} : ShiftCnt[12] ? {{64+51{1'b0}}, ~XZero} : ShiftVal << ShiftCnt;
|
||||
assign ShiftedManTmp = &ShiftCnt ? {{64-1{1'b0}}, XAssumed1E, XFracE[51:1]} : ShiftCnt[12] ? {{64+51{1'b0}}, ~XZeroE} : ShiftVal << ShiftCnt;
|
||||
|
||||
// truncate the shifted mantissa
|
||||
assign ShiftedMan = ShiftedManTmp[64+51:50];
|
||||
@ -139,7 +121,7 @@ module fcvt (
|
||||
// calculate sticky bit
|
||||
// - take into account the possible right shift from before
|
||||
// - the sticky bit calculation covers three diffrent sizes depending on the opperation
|
||||
assign Sticky = |ShiftedManTmp[49:0] | &ShiftCnt&XFrac[0] | (FOpCtrlE[0]&|ShiftedManTmp[62:50]) | (FOpCtrlE[0]&~FmtE&|ShiftedManTmp[91:63]);
|
||||
assign Sticky = |ShiftedManTmp[49:0] | &ShiftCnt&XFracE[0] | (FOpCtrlE[0]&|ShiftedManTmp[62:50]) | (FOpCtrlE[0]&~FmtE&|ShiftedManTmp[91:63]);
|
||||
|
||||
|
||||
// determine guard, round, and least significant bit of the result
|
||||
@ -152,23 +134,23 @@ module fcvt (
|
||||
case (FrmE)
|
||||
3'b000: CalcPlus1 = Guard & (Round | Sticky | (~Round&~Sticky&LSB));//round to nearest even
|
||||
3'b001: CalcPlus1 = 0;//round to zero
|
||||
3'b010: CalcPlus1 = (XSgn&FOpCtrlE[1]) | (ResSgn&FOpCtrlE[0]);//round down
|
||||
3'b011: CalcPlus1 = (~XSgn&FOpCtrlE[1]) | (~ResSgn&FOpCtrlE[0]);//round up
|
||||
3'b010: CalcPlus1 = (XSgnE&FOpCtrlE[1]) | (ResSgn&FOpCtrlE[0]);//round down
|
||||
3'b011: CalcPlus1 = (~XSgnE&FOpCtrlE[1]) | (~ResSgn&FOpCtrlE[0]);//round up
|
||||
3'b100: CalcPlus1 = Guard & (Round | Sticky | (~Round&~Sticky));//round to nearest max magnitude
|
||||
default: CalcPlus1 = 1'bx;
|
||||
endcase
|
||||
end
|
||||
|
||||
// dont tound if the result is exact
|
||||
assign Plus1 = CalcPlus1 & (Guard|Round|Sticky)&~(XZero&FOpCtrlE[1]);
|
||||
assign Plus1 = CalcPlus1 & (Guard|Round|Sticky)&~(XZeroE&FOpCtrlE[1]);
|
||||
|
||||
// round the shifted mantissa
|
||||
assign RoundedTmp = ShiftedMan[64+1:2] + Plus1;
|
||||
assign {ResExp, ResFrac} = FmtE ? {TmpExp, ShiftedMan[64+1:14]} + Plus1 : {{TmpExp, ShiftedMan[64+1:43]} + Plus1, 29'b0} ;
|
||||
|
||||
// fit the rounded result into the appropriate size and take the 2's complement if needed
|
||||
assign Rounded = Res64 ? XSgn&FOpCtrlE[1] ? -RoundedTmp[63:0] : RoundedTmp[63:0] :
|
||||
XSgn ? {{32{1'b1}}, -RoundedTmp[31:0]} : {32'b0, RoundedTmp[31:0]};
|
||||
assign Rounded = Res64 ? XSgnE&FOpCtrlE[1] ? -RoundedTmp[63:0] : RoundedTmp[63:0] :
|
||||
XSgnE ? {{32{1'b1}}, -RoundedTmp[31:0]} : {32'b0, RoundedTmp[31:0]};
|
||||
|
||||
// extract the MSB and Sign for later use (will be used to determine underflow and overflow)
|
||||
assign RoundMSB = Res64 ? RoundedTmp[64] : RoundedTmp[32];
|
||||
@ -176,10 +158,10 @@ module fcvt (
|
||||
|
||||
|
||||
// check if the result overflows
|
||||
assign Of = (~XSgn&($signed(ShiftCnt) >= $signed(Bits))) | (~XSgn&RoundSgn&~FOpCtrlE[2]) | (RoundMSB&(ShiftCnt==(Bits-1))) | (~XSgn&XInf) | XNaN;
|
||||
assign Of = (~XSgnE&($signed(ShiftCnt) >= $signed(Bits))) | (~XSgnE&RoundSgn&~FOpCtrlE[2]) | (RoundMSB&(ShiftCnt==(Bits-1))) | (~XSgnE&XInfE) | XNaNE;
|
||||
|
||||
// check if the result underflows (this calculation changes if the result is signed or unsigned)
|
||||
assign Uf = FOpCtrlE[2] ? XSgn&~XZero | (XSgn&XInf) | (XSgn&~XZero&(~ShiftCnt[12]|CalcPlus1)) | (ShiftCnt[12]&Plus1) : (XSgn&XInf) | (XSgn&($signed(ShiftCnt) >= $signed(Bits))) | (XSgn&~RoundSgn&~ShiftCnt[12]); // assign CvtIntRes = (XSgn | ShiftCnt[12]) ? {64{1'b0}} : (ShiftCnt >= 64) ? {64{1'b1}} : Rounded;
|
||||
assign Uf = FOpCtrlE[2] ? XSgnE&~XZeroE | (XSgnE&XInfE) | (XSgnE&~XZeroE&(~ShiftCnt[12]|CalcPlus1)) | (ShiftCnt[12]&Plus1) : (XSgnE&XInfE) | (XSgnE&($signed(ShiftCnt) >= $signed(Bits))) | (XSgnE&~RoundSgn&~ShiftCnt[12]); // assign CvtIntRes = (XSgnE | ShiftCnt[12]) ? {64{1'b0}} : (ShiftCnt >= 64) ? {64{1'b1}} : Rounded;
|
||||
|
||||
// calculate the result's sign
|
||||
assign SgnRes = ~FOpCtrlE[3] & FOpCtrlE[1];
|
||||
|
@ -31,36 +31,36 @@ module fhazard(
|
||||
input logic [4:0] RdM, RdW,
|
||||
input logic [2:0] FResultSelM,
|
||||
output logic FStallD,
|
||||
output logic [1:0] ForwardXE, ForwardYE, ForwardZE
|
||||
output logic [1:0] FForwardXE, FForwardYE, FForwardZE
|
||||
);
|
||||
|
||||
|
||||
always_comb begin
|
||||
// set ReadData as default
|
||||
ForwardXE = 2'b00; // choose FRD1E
|
||||
ForwardYE = 2'b00; // choose FRD2E
|
||||
ForwardZE = 2'b00; // choose FRD3E
|
||||
FForwardXE = 2'b00; // choose FRD1E
|
||||
FForwardYE = 2'b00; // choose FRD2E
|
||||
FForwardZE = 2'b00; // choose FRD3E
|
||||
FStallD = 0;
|
||||
|
||||
if ((Adr1E == RdM) & FRegWriteM)
|
||||
// if the result will be FResM
|
||||
if(FResultSelM == 3'b100) ForwardXE = 2'b10; // choose FResM
|
||||
if(FResultSelM == 3'b100) FForwardXE = 2'b10; // choose FResM
|
||||
else FStallD = 1; // if the result won't be ready stall
|
||||
else if ((Adr1E == RdW) & FRegWriteW) ForwardXE = 2'b01; // choose FPUResult64W
|
||||
else if ((Adr1E == RdW) & FRegWriteW) FForwardXE = 2'b01; // choose FPUResult64W
|
||||
|
||||
|
||||
if ((Adr2E == RdM) & FRegWriteM)
|
||||
// if the result will be FResM
|
||||
if(FResultSelM == 3'b100) ForwardYE = 2'b10; // choose FResM
|
||||
if(FResultSelM == 3'b100) FForwardYE = 2'b10; // choose FResM
|
||||
else FStallD = 1; // if the result won't be ready stall
|
||||
else if ((Adr2E == RdW) & FRegWriteW) ForwardYE = 2'b01; // choose FPUResult64W
|
||||
else if ((Adr2E == RdW) & FRegWriteW) FForwardYE = 2'b01; // choose FPUResult64W
|
||||
|
||||
|
||||
if ((Adr3E == RdM) & FRegWriteM)
|
||||
// if the result will be FResM
|
||||
if(FResultSelM == 3'b100) ForwardZE = 2'b10; // choose FResM
|
||||
if(FResultSelM == 3'b100) FForwardZE = 2'b10; // choose FResM
|
||||
else FStallD = 1; // if the result won't be ready stall
|
||||
else if ((Adr3E == RdW) & FRegWriteW) ForwardZE = 2'b01; // choose FPUResult64W
|
||||
else if ((Adr3E == RdW) & FRegWriteW) FForwardZE = 2'b01; // choose FPUResult64W
|
||||
|
||||
end
|
||||
|
||||
|
@ -3,12 +3,23 @@ module fma(
|
||||
input logic reset,
|
||||
input logic FlushM,
|
||||
input logic StallM,
|
||||
input logic [63:0] SrcXE, SrcXM, // X
|
||||
input logic [63:0] SrcYE, SrcYM, // Y
|
||||
input logic [63:0] SrcZE, SrcZM, // Z
|
||||
input logic FmtE, FmtM, // precision 1 = double 0 = single
|
||||
input logic [2:0] FOpCtrlM, FOpCtrlE, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y)
|
||||
input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
|
||||
input logic XSgnE, YSgnE, ZSgnE,
|
||||
input logic [10:0] XExpE, YExpE, ZExpE,
|
||||
input logic [51:0] XFracE, YFracE, ZFracE,
|
||||
input logic XSgnM, YSgnM, ZSgnM,
|
||||
input logic [10:0] XExpM, YExpM, ZExpM,
|
||||
input logic [51:0] XFracM, YFracM, ZFracM,
|
||||
input logic XAssumed1E, YAssumed1E, ZAssumed1E,
|
||||
input logic XDenormE, YDenormE, ZDenormE,
|
||||
input logic XZeroE, YZeroE, ZZeroE,
|
||||
input logic XNaNM, YNaNM, ZNaNM,
|
||||
input logic XSNaNM, YSNaNM, ZSNaNM,
|
||||
input logic XZeroM, YZeroM, ZZeroM,
|
||||
input logic XInfM, YInfM, ZInfM,
|
||||
input logic [10:0] BiasE,
|
||||
output logic [63:0] FMAResM,
|
||||
output logic [4:0] FMAFlgM);
|
||||
|
||||
@ -18,24 +29,23 @@ module fma(
|
||||
logic [12:0] ProdExpE, ProdExpM;
|
||||
logic AddendStickyE, AddendStickyM;
|
||||
logic KillProdE, KillProdM;
|
||||
logic XZeroE, YZeroE, ZZeroE, XZeroM, YZeroM, ZZeroM;
|
||||
logic XInfE, YInfE, ZInfE, XInfM, YInfM, ZInfM;
|
||||
logic XNaNE, YNaNE, ZNaNE, XNaNM, YNaNM, ZNaNM;
|
||||
|
||||
fma1 fma1 (.X(SrcXE), .Y(SrcYE), .Z(SrcZE), .FOpCtrlE, .FmtE, .ProdManE, .AlignedAddendE,
|
||||
.ProdExpE, .AddendStickyE, .KillProdE, .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE,
|
||||
.XNaNE, .YNaNE, .ZNaNE );
|
||||
fma1 fma1 (.XExpE, .YExpE, .ZExpE, .XFracE, .YFracE, .ZFracE,
|
||||
.BiasE, .XAssumed1E, .YAssumed1E, .ZAssumed1E, .XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE,
|
||||
.FOpCtrlE, .FmtE, .ProdManE, .AlignedAddendE,
|
||||
.ProdExpE, .AddendStickyE, .KillProdE);
|
||||
|
||||
flopenrc #(106) EMRegFma1(clk, reset, FlushM, ~StallM, ProdManE, ProdManM);
|
||||
flopenrc #(162) EMRegFma2(clk, reset, FlushM, ~StallM, AlignedAddendE, AlignedAddendM);
|
||||
flopenrc #(13) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM);
|
||||
flopenrc #(11) EMRegFma4(clk, reset, FlushM, ~StallM,
|
||||
{AddendStickyE, KillProdE, XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE},
|
||||
{AddendStickyM, KillProdM, XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM});
|
||||
flopenrc #(2) EMRegFma4(clk, reset, FlushM, ~StallM,
|
||||
{AddendStickyE, KillProdE},
|
||||
{AddendStickyM, KillProdM});
|
||||
|
||||
fma2 fma2(.X(SrcXM), .Y(SrcYM), .Z(SrcZM), .FOpCtrlM, .FrmM, .FmtM,
|
||||
fma2 fma2(.XSgnM, .YSgnM, .ZSgnM, .XExpM, .YExpM, .ZExpM, .XFracM, .YFracM, .ZFracM,
|
||||
.FOpCtrlM, .FrmM, .FmtM,
|
||||
.ProdManM, .AlignedAddendM, .ProdExpM, .AddendStickyM, .KillProdM,
|
||||
.XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .ZInfM, .XNaNM, .YNaNM, .ZNaNM,
|
||||
.XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .ZInfM, .XNaNM, .YNaNM, .ZNaNM, .XSNaNM, .YSNaNM, .ZSNaNM,
|
||||
.FMAResM, .FMAFlgM);
|
||||
|
||||
endmodule
|
||||
@ -43,98 +53,27 @@ endmodule
|
||||
|
||||
|
||||
module fma1(
|
||||
|
||||
input logic [63:0] X, // X
|
||||
input logic [63:0] Y, // Y
|
||||
input logic [63:0] Z, // Z
|
||||
// input logic XSgnE, YSgnE, ZSgnE,
|
||||
input logic [10:0] XExpE, YExpE, ZExpE,
|
||||
input logic [51:0] XFracE, YFracE, ZFracE,
|
||||
input logic XAssumed1E, YAssumed1E, ZAssumed1E,
|
||||
input logic XDenormE, YDenormE, ZDenormE,
|
||||
input logic XZeroE, YZeroE, ZZeroE,
|
||||
input logic [10:0] BiasE,
|
||||
input logic [2:0] FOpCtrlE, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y)
|
||||
input logic FmtE, // precision 1 = double 0 = single
|
||||
output logic [105:0] ProdManE, // 1.X frac * 1.Y frac
|
||||
output logic [161:0] AlignedAddendE, // Z aligned for addition
|
||||
output logic [12:0] ProdExpE, // X exponent + Y exponent - bias
|
||||
output logic AddendStickyE, // sticky bit that is calculated during alignment
|
||||
output logic KillProdE, // set the product to zero before addition if the product is too small to matter
|
||||
output logic XZeroE, YZeroE, ZZeroE, // inputs are zero
|
||||
output logic XInfE, YInfE, ZInfE, // inputs are infinity
|
||||
output logic XNaNE, YNaNE, ZNaNE); // inputs are NaN
|
||||
output logic KillProdE // set the product to zero before addition if the product is too small to matter
|
||||
);
|
||||
|
||||
logic [51:0] XFrac,YFrac,ZFrac; // input fraction
|
||||
logic [52:0] XMan,YMan,ZMan; // input mantissa (with leading one)
|
||||
logic [12:0] XExp,YExp,ZExp; // input exponents
|
||||
logic XSgn,YSgn,ZSgn; // input signs
|
||||
logic [12:0] AlignCnt; // how far to shift the addend to align with the product
|
||||
logic [213:0] ZManShifted; // output of the alignment shifter including sticky bit
|
||||
logic [213:0] ZManPreShifted; // input to the alignment shifter
|
||||
logic XDenorm, YDenorm, ZDenorm; // inputs are denormal
|
||||
logic [63:0] Addend; // value to add (Z or zero)
|
||||
logic [12:0] Bias; // 1023 for double, 127 for single
|
||||
logic XExpZero, YExpZero, ZExpZero; // input exponent zero
|
||||
logic XFracZero, YFracZero, ZFracZero; // input fraction zero
|
||||
logic XExpMax, YExpMax, ZExpMax; // input exponent all 1s
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// split inputs into the sign bit, fraction, and exponent to handle single or double precision
|
||||
// - single precision is in the top half of the inputs
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Set addend to zero if FMUL instruction
|
||||
assign Addend = FOpCtrlE[2] ? 64'b0 : Z;
|
||||
|
||||
assign XSgn = X[63];
|
||||
assign YSgn = Y[63];
|
||||
assign ZSgn = Addend[63];
|
||||
|
||||
assign XExp = FmtE ? {2'b0, X[62:52]} : {5'b0, X[62:55]};
|
||||
assign YExp = FmtE ? {2'b0, Y[62:52]} : {5'b0, Y[62:55]};
|
||||
assign ZExp = FmtE ? {2'b0, Addend[62:52]} : {5'b0, Addend[62:55]};
|
||||
|
||||
assign XFrac = FmtE ? X[51:0] : {X[54:32], 29'b0};
|
||||
assign YFrac = FmtE ? Y[51:0] : {Y[54:32], 29'b0};
|
||||
assign ZFrac = FmtE ? Addend[51:0] : {Addend[54:32], 29'b0};
|
||||
|
||||
assign XMan = {~XExpZero, XFrac};
|
||||
assign YMan = {~YExpZero, YFrac};
|
||||
assign ZMan = {~ZExpZero, ZFrac};
|
||||
|
||||
assign Bias = FmtE ? 13'h3ff : 13'h7f;
|
||||
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// determine if an input is a special value
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
assign XExpZero = ~|XExp;
|
||||
assign YExpZero = ~|YExp;
|
||||
assign ZExpZero = ~|ZExp;
|
||||
|
||||
assign XFracZero = ~|XFrac;
|
||||
assign YFracZero = ~|YFrac;
|
||||
assign ZFracZero = ~|ZFrac;
|
||||
|
||||
assign XExpMax = FmtE ? &XExp[10:0] : &XExp[7:0];
|
||||
assign YExpMax = FmtE ? &YExp[10:0] : &YExp[7:0];
|
||||
assign ZExpMax = FmtE ? &ZExp[10:0] : &ZExp[7:0];
|
||||
|
||||
assign XNaNE = XExpMax & ~XFracZero;
|
||||
assign YNaNE = YExpMax & ~YFracZero;
|
||||
assign ZNaNE = ZExpMax & ~ZFracZero;
|
||||
|
||||
assign XDenorm = XExpZero & ~XFracZero;
|
||||
assign YDenorm = YExpZero & ~YFracZero;
|
||||
assign ZDenorm = ZExpZero & ~ZFracZero;
|
||||
|
||||
assign XInfE = XExpMax & XFracZero;
|
||||
assign YInfE = YExpMax & YFracZero;
|
||||
assign ZInfE = ZExpMax & ZFracZero;
|
||||
|
||||
assign XZeroE = XExpZero & XFracZero;
|
||||
assign YZeroE = YExpZero & YFracZero;
|
||||
assign ZZeroE = ZExpZero & ZFracZero;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Calculate the product
|
||||
// - When multipliying two fp numbers, add the exponents
|
||||
@ -145,11 +84,11 @@ module fma1(
|
||||
|
||||
// verilator lint_off WIDTH
|
||||
assign ProdExpE = (XZeroE|YZeroE) ? 13'b0 :
|
||||
XExp + YExp - Bias + XDenorm + YDenorm;
|
||||
XExpE + YExpE - BiasE + XDenormE + YDenormE;
|
||||
|
||||
// Calculate the product's mantissa
|
||||
// - Add the assumed one. If the number is denormalized or zero, it does not have an assumed one.
|
||||
assign ProdManE = XMan * YMan;
|
||||
assign ProdManE = {XAssumed1E, XFracE} * {YAssumed1E, YFracE};
|
||||
|
||||
|
||||
|
||||
@ -168,7 +107,7 @@ module fma1(
|
||||
// - positive means the product is larger, so shift Z right
|
||||
// - Denormal numbers have an an exponent value of 1, however they are
|
||||
// represented with an exponent of 0. add one to the exponent if it is a denormal number
|
||||
assign AlignCnt = ProdExpE - ZExp - ZDenorm;
|
||||
assign AlignCnt = ProdExpE - ZExpE - ZDenormE;
|
||||
// verilator lint_on WIDTH
|
||||
|
||||
|
||||
@ -177,7 +116,7 @@ module fma1(
|
||||
// |1'b0| addnend |
|
||||
|
||||
// the 1'b0 before the added is because the product's mantissa has two bits before the binary point (xx.xxxxxxxxxx...)
|
||||
assign ZManPreShifted = {55'b0, ZMan, 106'b0};
|
||||
assign ZManPreShifted = {55'b0, {ZAssumed1E, ZFracE}, 106'b0};
|
||||
always_comb
|
||||
begin
|
||||
|
||||
@ -187,7 +126,7 @@ module fma1(
|
||||
// | addnend |
|
||||
if ($signed(AlignCnt) <= $signed(-13'd56)) begin
|
||||
KillProdE = 1;
|
||||
ZManShifted = ZManPreShifted;//{107'b0, ZMan, 54'b0};
|
||||
ZManShifted = ZManPreShifted;//{107'b0, {~ZAssumed1E, ZFrac}, 54'b0};
|
||||
AddendStickyE = ~(XZeroE|YZeroE);
|
||||
|
||||
// If the Addend is shifted left (negitive AlignCnt)
|
||||
@ -229,10 +168,10 @@ endmodule
|
||||
|
||||
|
||||
module fma2(
|
||||
|
||||
input logic [63:0] X, // X
|
||||
input logic [63:0] Y, // Y
|
||||
input logic [63:0] Z, // Z
|
||||
|
||||
input logic XSgnM, YSgnM, ZSgnM,
|
||||
input logic [10:0] XExpM, YExpM, ZExpM,
|
||||
input logic [51:0] XFracM, YFracM, ZFracM,
|
||||
input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
|
||||
input logic [2:0] FOpCtrlM, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y)
|
||||
input logic FmtM, // precision 1 = double 0 = single
|
||||
@ -244,6 +183,7 @@ module fma2(
|
||||
input logic XZeroM, YZeroM, ZZeroM, // inputs are zero
|
||||
input logic XInfM, YInfM, ZInfM, // inputs are infinity
|
||||
input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN
|
||||
input logic XSNaNM, YSNaNM, ZSNaNM, // inputs are signaling NaNs
|
||||
output logic [63:0] FMAResM, // FMA final result
|
||||
output logic [4:0] FMAFlgM); // FMA flags {invalid, divide by zero, overflow, underflow, inexact}
|
||||
|
||||
@ -252,8 +192,6 @@ module fma2(
|
||||
logic [51:0] ResultFrac; // Result fraction
|
||||
logic [10:0] ResultExp; // Result exponent
|
||||
logic ResultSgn; // Result sign
|
||||
logic [10:0] ZExp; // input exponent
|
||||
logic XSgn, YSgn, ZSgn; // input sign
|
||||
logic PSgn; // product sign
|
||||
logic [105:0] ProdMan2; // product being added
|
||||
logic [162:0] AlignedAddend2; // possibly inverted aligned Z
|
||||
@ -289,28 +227,10 @@ module fma2(
|
||||
logic [63:0] XNaNResult, YNaNResult, ZNaNResult, InvalidResult, OverflowResult, KillProdResult, UnderflowResult; // possible results
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Select input fields
|
||||
// The following logic duplicates fma1 because it's cheaper to recompute than provide registers
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Set addend to zero if FMUL instruction
|
||||
assign Addend = FOpCtrlM[2] ? 64'b0 : Z;
|
||||
|
||||
// split inputs into the sign bit, and exponent to handle single or double precision
|
||||
// - single precision is in the top half of the inputs
|
||||
assign XSgn = X[63];
|
||||
assign YSgn = Y[63];
|
||||
assign ZSgn = Addend[63]^FOpCtrlM[0]; //Negate Z if subtraction
|
||||
|
||||
assign ZExp = FmtM ? Addend[62:52] : {3'b0, Addend[62:55]};
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// Calculate the product's sign
|
||||
// Negate product's sign if FNMADD or FNMSUB
|
||||
assign PSgn = XSgn ^ YSgn ^ FOpCtrlM[1];
|
||||
assign PSgn = XSgnM ^ YSgnM ^ FOpCtrlM[1];
|
||||
|
||||
|
||||
|
||||
@ -321,7 +241,7 @@ module fma2(
|
||||
// Negate Z when doing one of the following opperations:
|
||||
// -prod + Z
|
||||
// prod - Z
|
||||
assign InvZ = ZSgn ^ PSgn;
|
||||
assign InvZ = ZSgnM ^ PSgn;
|
||||
|
||||
// Choose an inverted or non-inverted addend - the one is added later
|
||||
assign AlignedAddend2 = InvZ ? ~{1'b0, AlignedAddendM} : {1'b0, AlignedAddendM};
|
||||
@ -376,7 +296,7 @@ module fma2(
|
||||
assign FracLen = FmtM ? 13'd52 : 13'd23;
|
||||
|
||||
// Determine if the result is denormal
|
||||
assign SumExpTmp = KillProdM ? {2'b0, ZExp} : ProdExpM + -({4'b0, NormCnt} - 13'd56);
|
||||
assign SumExpTmp = KillProdM ? {2'b0, ZExpM} : ProdExpM + -({4'b0, NormCnt} - 13'd56);
|
||||
assign ResultDenorm = $signed(SumExpTmp)<=0 & ($signed(SumExpTmp)>=$signed(-FracLen)) & ~SumZero;
|
||||
|
||||
// Determine the shift needed for denormal results
|
||||
@ -501,13 +421,13 @@ module fma2(
|
||||
// Determine the sign if the sum is zero
|
||||
// if cancelation then 0 unless round to -infinity
|
||||
// otherwise psign
|
||||
assign ZeroSgn = (PSgn^ZSgn)&~Underflow ? FrmM == 3'b010 : PSgn;
|
||||
assign ZeroSgn = (PSgn^ZSgnM)&~Underflow ? FrmM == 3'b010 : PSgn;
|
||||
|
||||
// is the result negitive
|
||||
// if p - z is the Sum negitive
|
||||
// if -p + z is the Sum positive
|
||||
// if -p - z then the Sum is negitive
|
||||
assign ResultSgnTmp = InvZ&(ZSgn)&NegSum | InvZ&PSgn&~NegSum | ((ZSgn)&PSgn);
|
||||
assign ResultSgnTmp = InvZ&(ZSgnM)&NegSum | InvZ&PSgn&~NegSum | ((ZSgnM)&PSgn);
|
||||
assign ResultSgn = SumZero ? ZeroSgn : ResultSgnTmp;
|
||||
|
||||
|
||||
@ -525,9 +445,8 @@ module fma2(
|
||||
// 2) Inf - Inf (unless x or y is NaN)
|
||||
// 3) 0 * Inf
|
||||
assign MaxExp = FmtM ? 13'd2047 : 13'd255;
|
||||
assign SigNaN = FmtM ? (XNaNM&~X[51]) | (YNaNM&~Y[51]) | (ZNaNM&~Addend[51]) :
|
||||
(XNaNM&~X[54]) | (YNaNM&~Y[54]) | (ZNaNM&~Addend[54]);
|
||||
assign Invalid = SigNaN | ((XInfM || YInfM) & ZInfM & (PSgn ^ ZSgn) & ~XNaNM & ~YNaNM) | (XZeroM & YInfM) | (YZeroM & XInfM);
|
||||
assign SigNaN = XSNaNM | YSNaNM | ZSNaNM;
|
||||
assign Invalid = SigNaN | ((XInfM || YInfM) & ZInfM & (PSgn ^ ZSgnM) & ~XNaNM & ~YNaNM) | (XZeroM & YInfM) | (YZeroM & XInfM);
|
||||
|
||||
// Set Overflow flag if the number is too big to be represented
|
||||
// - Don't set the overflow flag if an overflowed result isn't outputed
|
||||
@ -555,28 +474,28 @@ module fma2(
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Select the result
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
assign XNaNResult = FmtM ? {XSgn, X[62:52], 1'b1,X[50:0]} : {XSgn, X[62:55], 1'b1,X[53:0]};
|
||||
assign YNaNResult = FmtM ? {YSgn, Y[62:52], 1'b1,Y[50:0]} : {YSgn, Y[62:55], 1'b1,Y[53:0]};
|
||||
assign ZNaNResult = FmtM ? {ZSgn, Addend[62:52], 1'b1,Addend[50:0]} : {ZSgn, Addend[62:55], 1'b1,Addend[53:0]};
|
||||
assign XNaNResult = FmtM ? {XSgnM, XExpM, 1'b1, XFracM[50:0]} : {{32{1'b1}}, XSgnM, XExpM[7:0], 1'b1, XFracM[50:29]};
|
||||
assign YNaNResult = FmtM ? {YSgnM, YExpM, 1'b1, YFracM[50:0]} : {{32{1'b1}}, YSgnM, YExpM[7:0], 1'b1, YFracM[50:29]};
|
||||
assign ZNaNResult = FmtM ? {ZSgnM, ZExpM, 1'b1, ZFracM[50:0]} : {{32{1'b1}}, ZSgnM, ZExpM[7:0], 1'b1, ZFracM[50:29]};
|
||||
assign OverflowResult = FmtM ? ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, 11'h7fe, {52{1'b1}}} :
|
||||
{ResultSgn, 11'h7ff, 52'b0} :
|
||||
((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, 8'hfe, {23{1'b1}}, 32'b0} :
|
||||
{ResultSgn, 8'hff, 55'b0};
|
||||
assign InvalidResult = FmtM ? {ResultSgn, 11'h7ff, 1'b1, 51'b0} : {ResultSgn, 8'hff, 1'b1, 54'b0};
|
||||
assign KillProdResult = FmtM ?{ResultSgn, Addend[62:0] - {62'b0, (Minus1&AddendStickyM)}} + {62'b0, (Plus1&AddendStickyM)} : {ResultSgn, Addend[62:32] - {30'b0, (Minus1&AddendStickyM)} + {30'b0, (Plus1&AddendStickyM)}, 32'b0};
|
||||
assign UnderflowResult = FmtM ? {ResultSgn, 63'b0} + {63'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))} : {{ResultSgn, 31'b0} + {31'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))}, 32'b0};
|
||||
((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{32{1'b1}}, ResultSgn, 8'hfe, {23{1'b1}}} :
|
||||
{{32{1'b1}}, ResultSgn, 8'hff, 23'b0};
|
||||
assign InvalidResult = FmtM ? {ResultSgn, 11'h7ff, 1'b1, 51'b0} : {{32{1'b1}}, ResultSgn, 8'hff, 1'b1, 22'b0};
|
||||
assign KillProdResult = FmtM ? {ResultSgn, {ZExpM, ZFracM} - {62'b0, (Minus1&AddendStickyM)}} + {62'b0, (Plus1&AddendStickyM)} : {{32{1'b1}}, ResultSgn, {ZExpM[7:0], ZFracM[51:29]} - {30'b0, (Minus1&AddendStickyM)} + {30'b0, (Plus1&AddendStickyM)}};
|
||||
assign UnderflowResult = FmtM ? {ResultSgn, 63'b0} + {63'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))} : {{32{1'b1}}, {ResultSgn, 31'b0} + {31'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))}};
|
||||
assign FMAResM = XNaNM ? XNaNResult :
|
||||
YNaNM ? YNaNResult :
|
||||
ZNaNM ? ZNaNResult :
|
||||
Invalid ? InvalidResult : // has to be before inf
|
||||
XInfM ? {PSgn, X[62:0]} :
|
||||
YInfM ? {PSgn, Y[62:0]} :
|
||||
ZInfM ? {ZSgn, Addend[62:0]} :
|
||||
XInfM ? FmtM ? {PSgn, XExpM, XFracM} : {{32{1'b1}}, PSgn, XExpM[7:0], XFracM[51:29]} :
|
||||
XInfM ? FmtM ? {PSgn, YExpM, YFracM} : {{32{1'b1}}, PSgn, YExpM[7:0], YFracM[51:29]} :
|
||||
XInfM ? FmtM ? {ZSgnM, ZExpM, ZFracM} : {{32{1'b1}}, ZSgnM, ZExpM[7:0], ZFracM[51:29]} :
|
||||
Overflow ? OverflowResult :
|
||||
KillProdM ? KillProdResult : // has to be after Underflow
|
||||
Underflow & ~ResultDenorm ? UnderflowResult :
|
||||
FmtM ? {ResultSgn, ResultExp, ResultFrac} :
|
||||
{ResultSgn, ResultExp[7:0], ResultFrac, 3'b0};
|
||||
{{32{1'b1}}, ResultSgn, ResultExp[7:0], ResultFrac[51:29]};
|
||||
|
||||
|
||||
|
||||
|
@ -22,8 +22,8 @@
|
||||
// Step 7: Put quotient/remainder onto output.
|
||||
//
|
||||
|
||||
`timescale 1ps/1ps
|
||||
module fpdiv (done, AS_Result, Flags, Denorm, op1, op2, rm, op_type, P, OvEn, UnEn,
|
||||
// `timescale 1ps/1ps
|
||||
module fpdiv (AS_Result, Flags, Denorm, op1, op2, rm, op_type, P, OvEn, UnEn,
|
||||
start, reset, clk);
|
||||
|
||||
input [63:0] op1; // 1st input operand (A)
|
||||
@ -40,7 +40,8 @@ module fpdiv (done, AS_Result, Flags, Denorm, op1, op2, rm, op_type, P, OvEn, Un
|
||||
output [63:0] AS_Result; // Result of operation
|
||||
output [4:0] Flags; // IEEE exception flags
|
||||
output Denorm; // Denorm on input or output
|
||||
output done;
|
||||
logic done;
|
||||
// output done;
|
||||
|
||||
supply1 vdd;
|
||||
supply0 vss;
|
||||
|
@ -34,6 +34,7 @@ module fpu (
|
||||
input logic [`XLEN-1:0] SrcAM, // Integer input being written into fpreg
|
||||
input logic StallE, StallM, StallW,
|
||||
input logic FlushE, FlushM, FlushW,
|
||||
input logic [4:0] RdE, RdM, RdW,
|
||||
output logic FRegWriteM,
|
||||
output logic FStallD, // Stall the decode stage
|
||||
output logic FWriteIntE, FWriteIntM, FWriteIntW, // Write integer register enable
|
||||
@ -52,7 +53,7 @@ module fpu (
|
||||
logic FmtD, FmtE, FmtM, FmtW; // FP precision 0-single 1-double
|
||||
logic FDivStartD, FDivStartE; // Start division
|
||||
logic FWriteIntD; // Write to integer register
|
||||
logic [1:0] ForwardXE, ForwardYE, ForwardZE; // Input3 forwarding mux control signal
|
||||
logic [1:0] FForwardXE, FForwardYE, FForwardZE; // Input3 forwarding mux control signal
|
||||
logic [2:0] FResultSelD, FResultSelE, FResultSelM, FResultSelW; // Select FP result
|
||||
logic [3:0] FOpCtrlD, FOpCtrlE, FOpCtrlM; // Select which opperation to do in each component
|
||||
logic [1:0] FResSelD, FResSelE, FResSelM;
|
||||
@ -60,13 +61,34 @@ module fpu (
|
||||
logic [4:0] Adr1E, Adr2E, Adr3E;
|
||||
|
||||
// regfile signals
|
||||
logic [4:0] RdE, RdM, RdW; // what adress to write to // ***Can take from ieu insted of pipelining
|
||||
logic [63:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage
|
||||
logic [63:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage
|
||||
logic [`XLEN-1:0] SrcXMAligned;
|
||||
logic [63:0] SrcXE, SrcXM; // Input 1 to the various units (after forwarding)
|
||||
logic [63:0] SrcYE, SrcYM; // Input 2 to the various units (after forwarding)
|
||||
logic [63:0] SrcZE, SrcZM; // Input 3 to the various units (after forwarding)
|
||||
logic [`XLEN-1:0] FSrcXMAligned;
|
||||
logic [63:0] FSrcXE, FSrcXM; // Input 1 to the various units (after forwarding)
|
||||
logic [63:0] FSrcYE; // Input 2 to the various units (after forwarding)
|
||||
logic [63:0] FSrcZE; // Input 3 to the various units (after forwarding)
|
||||
|
||||
// unpacking signals
|
||||
logic XSgnE, YSgnE, ZSgnE;
|
||||
logic [10:0] XExpE, YExpE, ZExpE;
|
||||
logic [51:0] XFracE, YFracE, ZFracE;
|
||||
logic XAssumed1E, YAssumed1E, ZAssumed1E;
|
||||
logic XNaNE, YNaNE, ZNaNE;
|
||||
logic XSNaNE, YSNaNE, ZSNaNE;
|
||||
logic XDenormE, YDenormE, ZDenormE;
|
||||
logic XZeroE, YZeroE, ZZeroE;
|
||||
logic [10:0] BiasE;
|
||||
logic XInfE, YInfE, ZInfE;
|
||||
logic XExpMaxE;
|
||||
logic XNormE;
|
||||
|
||||
logic XSgnM, YSgnM, ZSgnM;
|
||||
logic [10:0] XExpM, YExpM, ZExpM;
|
||||
logic [51:0] XFracM, YFracM, ZFracM;
|
||||
logic XNaNM, YNaNM, ZNaNM;
|
||||
logic XSNaNM, YSNaNM, ZSNaNM;
|
||||
logic XZeroM, YZeroM, ZZeroM;
|
||||
logic XInfM, YInfM, ZInfM;
|
||||
|
||||
// div/sqrt signals
|
||||
logic [63:0] FDivResultM, FDivResultW;
|
||||
@ -131,26 +153,28 @@ module fpu (
|
||||
flopenrc #(1) DECtrlRegE1(clk, reset, FlushE, ~StallE, FDivStartD, FDivStartE);
|
||||
flopenrc #(15) DECtrlRegE2(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]},
|
||||
{Adr1E, Adr2E, Adr3E});
|
||||
flopenrc #(22) DECtrlReg3(clk, reset, FlushE, ~StallE,
|
||||
{FRegWriteD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, InstrD[11:7], FOpCtrlD, FWriteIntD},
|
||||
{FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, RdE, FOpCtrlE, FWriteIntE});
|
||||
flopenrc #(17) DECtrlReg3(clk, reset, FlushE, ~StallE,
|
||||
{FRegWriteD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD},
|
||||
{FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE});
|
||||
|
||||
|
||||
//EXECUTION STAGE
|
||||
|
||||
// Hazard unit for FPU
|
||||
fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FRegWriteM, .FRegWriteW, .RdM, .RdW, .FResultSelM, .FStallD,
|
||||
.ForwardXE, .ForwardYE, .ForwardZE);
|
||||
.FForwardXE, .FForwardYE, .FForwardZE);
|
||||
|
||||
// forwarding muxs
|
||||
mux3 #(64) fxemux(FRD1E, FPUResultW, FResM, ForwardXE, SrcXE);
|
||||
mux3 #(64) fyemux(FRD2E, FPUResultW, FResM, ForwardYE, SrcYE);
|
||||
mux3 #(64) fzemux(FRD3E, FPUResultW, FResM, ForwardZE, SrcZE);
|
||||
mux3 #(64) fxemux(FRD1E, FPUResultW, FResM, FForwardXE, FSrcXE);
|
||||
mux3 #(64) fyemux(FRD2E, FPUResultW, FResM, FForwardYE, FSrcYE);
|
||||
mux3 #(64) fzemux(FRD3E, FPUResultW, FResM, FForwardZE, FSrcZE);
|
||||
|
||||
|
||||
unpacking unpacking(.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), .FOpCtrlE(FOpCtrlE[2:0]), .FmtE, .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XFracE, .YFracE, .ZFracE, .XAssumed1E, .YAssumed1E, .ZAssumed1E, .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .BiasE, .XInfE, .YInfE, .ZInfE, .XExpMaxE, .XNormE);
|
||||
// first of two-stage instance of floating-point fused multiply-add unit
|
||||
fma fma (.clk, .reset, .FlushM, .StallM,
|
||||
.SrcXE, .SrcYE, .SrcZE, .SrcXM, .SrcYM, .SrcZM,
|
||||
.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XFracE, .YFracE, .ZFracE, .XAssumed1E, .YAssumed1E, .ZAssumed1E, .XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .BiasE,
|
||||
.XSgnM, .YSgnM, .ZSgnM, .XExpM, .YExpM, .ZExpM, .XFracM, .YFracM, .ZFracM, .XNaNM, .YNaNM, .ZNaNM, .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .ZInfM, .XSNaNM, .YSNaNM, .ZSNaNM,
|
||||
// .FSrcXE, .FSrcYE, .FSrcZE, .FSrcXM, .FSrcYM, .FSrcZM,
|
||||
.FOpCtrlE(FOpCtrlE[2:0]), .FOpCtrlM(FOpCtrlM[2:0]),
|
||||
.FmtE, .FmtM, .FrmM, .FMAFlgM, .FMAResM);
|
||||
|
||||
@ -163,43 +187,50 @@ module fpu (
|
||||
.ECLK(fpdivClk));
|
||||
|
||||
// capture the inputs for div/sqrt
|
||||
flopenrc #(64) reg_input1 (.d(SrcXE), .q(DivInput1E),
|
||||
flopenrc #(64) reg_input1 (.d(FSrcXE), .q(DivInput1E),
|
||||
.en(~HoldInputs), .clear(FDivSqrtDoneE),
|
||||
.reset(reset), .clk(clk));
|
||||
flopenrc #(64) reg_input2 (.d(SrcYE), .q(DivInput2E),
|
||||
flopenrc #(64) reg_input2 (.d(FSrcYE), .q(DivInput2E),
|
||||
.en(~HoldInputs), .clear(FDivSqrtDoneE),
|
||||
.reset(reset), .clk(clk));
|
||||
|
||||
fpdiv fdivsqrt (.DivOpType(FOpCtrlE[0]), .clk(fpdivClk), .FmtE(~FmtE), .DivInput1E, .DivInput2E,
|
||||
.FrmE, .DivOvEn(1'b1), .DivUnEn(1'b1), .FDivStartE, .FDivResultM, .FDivSqrtFlgM,
|
||||
.FDivSqrtDoneE, .FDivBusyE, .HoldInputs, .reset);
|
||||
|
||||
// fpdiv fdivsqrt (.DivOpType(FOpCtrlE[0]), .clk(fpdivClk), .FmtE(~FmtE), .DivInput1E, .DivInput2E,
|
||||
// .FrmE, .DivOvEn(1'b1), .DivUnEn(1'b1), .FDivStartE, .FDivResultM, .FDivSqrtFlgM,
|
||||
// .FDivSqrtDoneE, .FDivBusyE, .HoldInputs, .reset);
|
||||
assign FDivBusyE = 0;
|
||||
// first of two-stage instance of floating-point add/cvt unit
|
||||
faddcvt faddcvt (.clk, .reset, .FlushM, .StallM, .FrmM, .FOpCtrlM, .FmtE, .FmtM,
|
||||
.SrcXE, .SrcYE, .FOpCtrlE, .FAddResM, .FAddFlgM);
|
||||
.FSrcXE, .FSrcYE, .FOpCtrlE, .FAddResM, .FAddFlgM);
|
||||
|
||||
// first and only instance of floating-point comparator
|
||||
fcmp fcmp (SrcXE, SrcYE, FOpCtrlE[2:0], FmtE, CmpNVE, CmpResE);
|
||||
fcmp fcmp (.op1({XSgnE,XExpE,XFracE}), .op2({YSgnE,YExpE,YFracE}), .FSrcXE, .FSrcYE, .FOpCtrlE(FOpCtrlE[2:0]), .FmtE, .Invalid(CmpNVE), .CmpResE, .XNaNE, .YNaNE, .XZeroE, .YZeroE);
|
||||
|
||||
// first and only instance of floating-point sign converter
|
||||
fsgn fsgn (.SgnOpCodeE(FOpCtrlE[1:0]), .SrcXE, .SrcYE, .SgnResE, .SgnNVE);
|
||||
fsgn fsgn (.SgnOpCodeE(FOpCtrlE[1:0]), .XSgnE, .YSgnE, .XExpE, .XFracE, .FmtE, .SgnResE, .SgnNVE, .XExpMaxE);
|
||||
|
||||
// first and only instance of floating-point classify unit
|
||||
fclassify fclassify (.SrcXE, .FmtE, .ClassResE);
|
||||
fclassify fclassify (.XSgnE, .XFracE, .XDenormE, .XZeroE, .XNaNE, .XInfE, .XNormE, .XSNaNE, .ClassResE);
|
||||
|
||||
|
||||
fcvt fcvt (.X(SrcXE), .SrcAE, .FOpCtrlE, .FmtE, .FrmE, .CvtResE, .CvtFlgE);
|
||||
fcvt fcvt (.XSgnE, .XExpE, .XFracE, .XAssumed1E, .XZeroE, .XNaNE, .XInfE, .XDenormE, .BiasE, .SrcAE, .FOpCtrlE, .FmtE, .FrmE, .CvtResE, .CvtFlgE);
|
||||
|
||||
// output for store instructions
|
||||
// mux2 #(`XLEN) FWriteDataMux({{`XLEN-32{1'b0}}, SrcYE[63:32]}, SrcYE[63:64-`XLEN], FmtE, FWriteDataE);
|
||||
assign FWriteDataE = SrcYE[`XLEN-1:0];
|
||||
// mux2 #(`XLEN) FWriteDataMux({{`XLEN-32{1'b0}}, FSrcYE[63:32]}, FSrcYE[63:64-`XLEN], FmtE, FWriteDataE);
|
||||
assign FWriteDataE = FSrcYE[`XLEN-1:0];
|
||||
|
||||
//*****************
|
||||
// E/M pipe registers
|
||||
//*****************
|
||||
flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, SrcXE, SrcXM);
|
||||
flopenrc #(64) EMFpReg2(clk, reset, FlushM, ~StallM, SrcYE, SrcYM);
|
||||
flopenrc #(64) EMFpReg3(clk, reset, FlushM, ~StallM, SrcZE, SrcZM);
|
||||
flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, FSrcXE, FSrcXM);
|
||||
// flopenrc #(64) EMFpReg2(clk, reset, FlushM, ~StallM, FSrcYE, FSrcYM);
|
||||
// flopenrc #(64) EMFpReg3(clk, reset, FlushM, ~StallM, FSrcZE, FSrcZM);
|
||||
flopenrc #(64) EMFpReg4(clk, reset, FlushM, ~StallM, {XSgnE,XExpE,XFracE}, {XSgnM,XExpM,XFracM});
|
||||
flopenrc #(64) EMFpReg5(clk, reset, FlushM, ~StallM, {YSgnE,YExpE,YFracE}, {YSgnM,YExpM,YFracM});
|
||||
flopenrc #(64) EMFpReg6(clk, reset, FlushM, ~StallM, {ZSgnE,ZExpE,ZFracE}, {ZSgnM,ZExpM,ZFracM});
|
||||
flopenrc #(12) EMFpReg7(clk, reset, FlushM, ~StallM,
|
||||
{XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE},
|
||||
{XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM});
|
||||
|
||||
|
||||
|
||||
flopenrc #(1) EMRegCmp1(clk, reset, FlushM, ~StallM, CmpNVE, CmpNVM);
|
||||
@ -211,9 +242,9 @@ module fpu (
|
||||
flopenrc #(64) EMRegCvt1(clk, reset, FlushM, ~StallM, CvtResE, CvtResM);
|
||||
flopenrc #(5) EMRegCvt2(clk, reset, FlushM, ~StallM, CvtFlgE, CvtFlgM);
|
||||
|
||||
flopenrc #(22) EMCtrlReg(clk, reset, FlushM, ~StallM,
|
||||
{FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, RdE, FOpCtrlE, FWriteIntE},
|
||||
{FRegWriteM, FResultSelM, FResSelM, FIntResSelM, FrmM, FmtM, RdM, FOpCtrlM, FWriteIntM});
|
||||
flopenrc #(17) EMCtrlReg(clk, reset, FlushM, ~StallM,
|
||||
{FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE},
|
||||
{FRegWriteM, FResultSelM, FResSelM, FIntResSelM, FrmM, FmtM, FOpCtrlM, FWriteIntM});
|
||||
|
||||
flopenrc #(64) EMRegClass(clk, reset, FlushM, ~StallM, ClassResE, ClassResM);
|
||||
|
||||
@ -221,8 +252,8 @@ module fpu (
|
||||
mux4 #(64) FResMux(AlignedSrcAM, SgnResM, CmpResM, CvtResM, FResSelM, FResM);
|
||||
mux4 #(5) FFlgMux(5'b0, {4'b0, SgnNVM}, {4'b0, CmpNVM}, CvtFlgM, FResSelM, FFlgM);
|
||||
|
||||
// mux2 #(`XLEN) SrcXAlignedMux({{`XLEN-32{1'b0}}, SrcXM[63:32]}, SrcXM[63:64-`XLEN], FmtM, SrcXMAligned);
|
||||
mux4 #(`XLEN) IntResMux(CmpResM[`XLEN-1:0], SrcXM[`XLEN-1:0], ClassResM[`XLEN-1:0], CvtResM[`XLEN-1:0], FIntResSelM, FIntResM);
|
||||
// mux2 #(`XLEN) FSrcXAlignedMux({{`XLEN-32{1'b0}}, FSrcXM[63:32]}, FSrcXM[63:64-`XLEN], FmtM, FSrcXMAligned);
|
||||
mux4 #(`XLEN) IntResMux(CmpResM[`XLEN-1:0], FSrcXM[`XLEN-1:0], ClassResM[`XLEN-1:0], CvtResM[`XLEN-1:0], FIntResSelM, FIntResM);
|
||||
|
||||
// Align SrcA to MSB when single precicion
|
||||
mux2 #(64) SrcAMux({{32{1'b1}}, SrcAM[31:0]}, {{64-`XLEN{1'b1}}, SrcAM}, FmtM, AlignedSrcAM);
|
||||
@ -241,9 +272,9 @@ module fpu (
|
||||
|
||||
flopenrc #(64) MWRegClass2(clk, reset, FlushW, ~StallW, FResM, FResW);
|
||||
|
||||
flopenrc #(11) MWCtrlReg(clk, reset, FlushW, ~StallW,
|
||||
{FRegWriteM, FResultSelM, RdM, FmtM, FWriteIntM},
|
||||
{FRegWriteW, FResultSelW, RdW, FmtW, FWriteIntW});
|
||||
flopenrc #(6) MWCtrlReg(clk, reset, FlushW, ~StallW,
|
||||
{FRegWriteM, FResultSelM, FmtM, FWriteIntM},
|
||||
{FRegWriteW, FResultSelW, FmtW, FWriteIntW});
|
||||
|
||||
//#########################################
|
||||
// BEGIN WRITEBACK STAGE
|
||||
|
@ -1,30 +1,34 @@
|
||||
//performs the fsgnj/fsgnjn/fsgnjx RISCV instructions
|
||||
|
||||
module fsgn (
|
||||
input logic [63:0] SrcXE, SrcYE,
|
||||
module fsgn (
|
||||
input logic XSgnE, YSgnE,
|
||||
input logic [10:0] XExpE,
|
||||
input logic [51:0] XFracE,
|
||||
input logic XExpMaxE,
|
||||
input logic FmtE,
|
||||
input logic [1:0] SgnOpCodeE,
|
||||
output logic [63:0] SgnResE,
|
||||
output logic SgnNVE);
|
||||
|
||||
logic AonesExp;
|
||||
logic ResSgn;
|
||||
|
||||
//op code designation:
|
||||
//
|
||||
//00 - fsgnj - directly copy over sign value of SrcYE
|
||||
//01 - fsgnjn - negate sign value of SrcYE
|
||||
//10 - fsgnjx - XOR sign values of SrcXE & SrcYE
|
||||
//00 - fsgnj - directly copy over sign value of FSrcYE
|
||||
//01 - fsgnjn - negate sign value of FSrcYE
|
||||
//10 - fsgnjx - XOR sign values of FSrcXE & FSrcYE
|
||||
//
|
||||
|
||||
assign SgnResE[63] = SgnOpCodeE[1] ? (SrcXE[63] ^ SrcYE[63]) : (SrcYE[63] ^ SgnOpCodeE[0]);
|
||||
assign SgnResE[62:0] = SrcXE[62:0];
|
||||
assign ResSgn = SgnOpCodeE[1] ? (XSgnE ^ YSgnE) : (YSgnE ^ SgnOpCodeE[0]);
|
||||
assign SgnResE = FmtE ? {ResSgn, XExpE, XFracE} : {{32{1'b1}}, ResSgn, XExpE[7:0], XFracE[51:29]};
|
||||
|
||||
//If the exponent is all ones, then the value is either Inf or NaN,
|
||||
//both of which will produce a QNaN/SNaN value of some sort. This will
|
||||
//set the invalid flag high.
|
||||
assign AonesExp = SrcXE[62]&SrcXE[61]&SrcXE[60]&SrcXE[59]&SrcXE[58]&SrcXE[57]&SrcXE[56]&SrcXE[55]&SrcXE[54]&SrcXE[53]&SrcXE[52];
|
||||
|
||||
//the only flag that can occur during this operation is invalid
|
||||
//due to changing sign on already existing NaN
|
||||
assign SgnNVE = AonesExp & SgnResE[63];
|
||||
assign SgnNVE = XExpMaxE & SgnResE[63];
|
||||
|
||||
endmodule
|
||||
|
@ -1,3 +1,5 @@
|
||||
|
||||
`timescale 1ps/1ps
|
||||
module fsm_div (done, load_rega, load_regb, load_regc,
|
||||
load_regd, load_regr, load_regs,
|
||||
sel_muxa, sel_muxb, sel_muxr,
|
||||
|
@ -1,33 +1,33 @@
|
||||
module sbtm (input logic [11:0] a, output logic [10:0] ia_out);
|
||||
// module sbtm (input logic [11:0] a, output logic [10:0] ia_out);
|
||||
|
||||
// bit partitions
|
||||
logic [3:0] x0;
|
||||
logic [2:0] x1;
|
||||
logic [3:0] x2;
|
||||
logic [2:0] x2_1cmp;
|
||||
// mem outputs
|
||||
logic [12:0] y0;
|
||||
logic [4:0] y1;
|
||||
// input to CPA
|
||||
logic [14:0] op1;
|
||||
logic [14:0] op2;
|
||||
logic [14:0] p;
|
||||
// // bit partitions
|
||||
// logic [3:0] x0;
|
||||
// logic [2:0] x1;
|
||||
// logic [3:0] x2;
|
||||
// logic [2:0] x2_1cmp;
|
||||
// // mem outputs
|
||||
// logic [12:0] y0;
|
||||
// logic [4:0] y1;
|
||||
// // input to CPA
|
||||
// logic [14:0] op1;
|
||||
// logic [14:0] op2;
|
||||
// logic [14:0] p;
|
||||
|
||||
assign x0 = a[10:7];
|
||||
assign x1 = a[6:4];
|
||||
assign x2 = a[3:0];
|
||||
// assign x0 = a[10:7];
|
||||
// assign x1 = a[6:4];
|
||||
// assign x2 = a[3:0];
|
||||
|
||||
sbtm_a0 mem1 ({x0, x1}, y0);
|
||||
// 1s cmp per sbtm/stam
|
||||
assign x2_1cmp = x2[3] ? ~x2[2:0] : x2[2:0];
|
||||
sbtm_a1 mem2 ({x0, x2_1cmp}, y1);
|
||||
assign op1 = {1'b0, y0, 1'b0};
|
||||
// 1s cmp per sbtm/stam
|
||||
assign op2 = x2[3] ? {1'b1, {8{1'b1}}, ~y1, 1'b1} :
|
||||
{1'b0, 8'b0, y1, 1'b1};
|
||||
// CPA
|
||||
adder #(15) cp1 (op1, op2, 1'b0, p, cout);
|
||||
//assign ia_out = {p[14:4], {53{1'b0}}};
|
||||
assign ia_out = p[14:4];
|
||||
// sbtm_a0 mem1 ({x0, x1}, y0);
|
||||
// // 1s cmp per sbtm/stam
|
||||
// assign x2_1cmp = x2[3] ? ~x2[2:0] : x2[2:0];
|
||||
// sbtm_a1 mem2 ({x0, x2_1cmp}, y1);
|
||||
// assign op1 = {1'b0, y0, 1'b0};
|
||||
// // 1s cmp per sbtm/stam
|
||||
// assign op2 = x2[3] ? {1'b1, {8{1'b1}}, ~y1, 1'b1} :
|
||||
// {1'b0, 8'b0, y1, 1'b1};
|
||||
// // CPA
|
||||
// adder #(15) cp1 (op1, op2, 1'b0, p, cout);
|
||||
// //assign ia_out = {p[14:4], {53{1'b0}}};
|
||||
// assign ia_out = p[14:4];
|
||||
|
||||
endmodule // sbtm
|
||||
// endmodule // sbtm
|
||||
|
@ -1,39 +1,39 @@
|
||||
|
||||
module sbtm2 (input logic [11:0] a, output logic [10:0] y);
|
||||
// module sbtm2 (input logic [11:0] a, output logic [10:0] y);
|
||||
|
||||
// bit partitions
|
||||
logic [4:0] x0;
|
||||
logic [2:0] x1;
|
||||
logic [3:0] x2;
|
||||
logic [2:0] x2_1cmp;
|
||||
// mem outputs
|
||||
logic [12:0] y0;
|
||||
logic [5:0] y1;
|
||||
// input to CPA
|
||||
logic [14:0] op1;
|
||||
logic [14:0] op2;
|
||||
logic [14:0] p;
|
||||
logic cout;
|
||||
// // bit partitions
|
||||
// logic [4:0] x0;
|
||||
// logic [2:0] x1;
|
||||
// logic [3:0] x2;
|
||||
// logic [2:0] x2_1cmp;
|
||||
// // mem outputs
|
||||
// logic [12:0] y0;
|
||||
// logic [5:0] y1;
|
||||
// // input to CPA
|
||||
// logic [14:0] op1;
|
||||
// logic [14:0] op2;
|
||||
// logic [14:0] p;
|
||||
// logic cout;
|
||||
|
||||
assign x0 = a[11:7];
|
||||
assign x1 = a[6:4];
|
||||
assign x2 = a[3:0];
|
||||
// assign x0 = a[11:7];
|
||||
// assign x1 = a[6:4];
|
||||
// assign x2 = a[3:0];
|
||||
|
||||
sbtm_a2 mem1 ({x0[3:0], x1}, y0);
|
||||
assign op1 = {1'b0, y0, 1'b0};
|
||||
// sbtm_a2 mem1 ({x0[3:0], x1}, y0);
|
||||
// assign op1 = {1'b0, y0, 1'b0};
|
||||
|
||||
// 1s cmp per sbtm/stam
|
||||
assign x2_1cmp = x2[3] ? ~x2[2:0] : x2[2:0];
|
||||
sbtm_a3 mem2 ({x0, x2_1cmp}, y1);
|
||||
// 1s cmp per sbtm/stam
|
||||
assign op2 = x2[3] ? {{8{1'b1}}, ~y1, 1'b1} :
|
||||
{8'b0, y1, 1'b1};
|
||||
// // 1s cmp per sbtm/stam
|
||||
// assign x2_1cmp = x2[3] ? ~x2[2:0] : x2[2:0];
|
||||
// sbtm_a3 mem2 ({x0, x2_1cmp}, y1);
|
||||
// // 1s cmp per sbtm/stam
|
||||
// assign op2 = x2[3] ? {{8{1'b1}}, ~y1, 1'b1} :
|
||||
// {8'b0, y1, 1'b1};
|
||||
|
||||
// CPA
|
||||
bk15 cp1 (cout, p, op1, op2, 1'b0);
|
||||
assign y = p[14:4];
|
||||
// // CPA
|
||||
// bk15 cp1 (cout, p, op1, op2, 1'b0);
|
||||
// assign y = p[14:4];
|
||||
|
||||
endmodule // sbtm2
|
||||
// endmodule // sbtm2
|
||||
|
||||
|
||||
|
||||
|
@ -1,37 +1,37 @@
|
||||
module sbtm2 (input logic [11:0] a, output logic [10:0] y);
|
||||
// module sbtm2 (input logic [11:0] a, output logic [10:0] y);
|
||||
|
||||
// bit partitions
|
||||
logic [4:0] x0;
|
||||
logic [2:0] x1;
|
||||
logic [3:0] x2;
|
||||
logic [2:0] x2_1cmp;
|
||||
// mem outputs
|
||||
logic [13:0] y0;
|
||||
logic [5:0] y1;
|
||||
// input to CPA
|
||||
logic [14:0] op1;
|
||||
logic [14:0] op2;
|
||||
logic [14:0] p;
|
||||
// // bit partitions
|
||||
// logic [4:0] x0;
|
||||
// logic [2:0] x1;
|
||||
// logic [3:0] x2;
|
||||
// logic [2:0] x2_1cmp;
|
||||
// // mem outputs
|
||||
// logic [13:0] y0;
|
||||
// logic [5:0] y1;
|
||||
// // input to CPA
|
||||
// logic [14:0] op1;
|
||||
// logic [14:0] op2;
|
||||
// logic [14:0] p;
|
||||
|
||||
assign x0 = a[11:7];
|
||||
assign x1 = a[6:4];
|
||||
assign x2 = a[3:0];
|
||||
// assign x0 = a[11:7];
|
||||
// assign x1 = a[6:4];
|
||||
// assign x2 = a[3:0];
|
||||
|
||||
sbtm_a2 mem1 ({x0, x1}, y0);
|
||||
assign op1 = {y0, 1'b0};
|
||||
// sbtm_a2 mem1 ({x0, x1}, y0);
|
||||
// assign op1 = {y0, 1'b0};
|
||||
|
||||
// 1s cmp per sbtm/stam
|
||||
assign x2_1cmp = x2[3] ? ~x2[2:0] : x2[2:0];
|
||||
sbtm_a3 mem2 ({x0, x2_1cmp}, y1);
|
||||
// 1s cmp per sbtm/stam
|
||||
assign op2 = x2[3] ? {{8{1'b1}}, ~y1, 1'b1} :
|
||||
{8'b0, y1, 1'b1};
|
||||
// // 1s cmp per sbtm/stam
|
||||
// assign x2_1cmp = x2[3] ? ~x2[2:0] : x2[2:0];
|
||||
// sbtm_a3 mem2 ({x0, x2_1cmp}, y1);
|
||||
// // 1s cmp per sbtm/stam
|
||||
// assign op2 = x2[3] ? {{8{1'b1}}, ~y1, 1'b1} :
|
||||
// {8'b0, y1, 1'b1};
|
||||
|
||||
// CPA
|
||||
adder #(15) cp1 (op1, op2, 1'b0, p, cout);
|
||||
assign y = p[14:4];
|
||||
// // CPA
|
||||
// adder #(15) cp1 (op1, op2, 1'b0, p, cout);
|
||||
// assign y = p[14:4];
|
||||
|
||||
endmodule // sbtm2
|
||||
// endmodule // sbtm2
|
||||
|
||||
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
module sbtm_a2 (input logic [7:0] a,
|
||||
module sbtm_a4 (input logic [7:0] a,
|
||||
output logic [13:0] y);
|
||||
always_comb
|
||||
case(a)
|
||||
|
@ -1,4 +1,4 @@
|
||||
module sbtm_a3 (input logic [7:0] a,
|
||||
module sbtm_a5 (input logic [7:0] a,
|
||||
output logic [5:0] y);
|
||||
always_comb
|
||||
case(a)
|
||||
|
77
wally-pipelined/src/fpu/unpacking.sv
Normal file
77
wally-pipelined/src/fpu/unpacking.sv
Normal file
@ -0,0 +1,77 @@
|
||||
module unpacking (
|
||||
input logic [63:0] X, Y, Z,
|
||||
input logic FmtE,
|
||||
input logic [2:0] FOpCtrlE,
|
||||
output logic XSgnE, YSgnE, ZSgnE,
|
||||
output logic [10:0] XExpE, YExpE, ZExpE,
|
||||
output logic [51:0] XFracE, YFracE, ZFracE,
|
||||
output logic XAssumed1E, YAssumed1E, ZAssumed1E,
|
||||
output logic XNormE,
|
||||
output logic XNaNE, YNaNE, ZNaNE,
|
||||
output logic XSNaNE, YSNaNE, ZSNaNE,
|
||||
output logic XDenormE, YDenormE, ZDenormE,
|
||||
output logic XZeroE, YZeroE, ZZeroE,
|
||||
output logic [10:0] BiasE,
|
||||
output logic XInfE, YInfE, ZInfE,
|
||||
output logic XExpMaxE
|
||||
);
|
||||
|
||||
logic XFracZero, YFracZero, ZFracZero; // input fraction zero
|
||||
logic XExpZero, YExpZero, ZExpZero; // input exponent zero
|
||||
logic [63:0] Addend; // value to add (Z or zero)
|
||||
logic YExpMaxE, ZExpMaxE; // input exponent all 1s
|
||||
|
||||
assign Addend = FOpCtrlE[2] ? 64'b0 : Z; // Z is only used in the FMA, and is set to Zero if a multiply opperation
|
||||
assign XSgnE = FmtE ? X[63] : X[31];
|
||||
assign YSgnE = FmtE ? Y[63] : Y[31];
|
||||
assign ZSgnE = FmtE ? Addend[63]^FOpCtrlE[0] : Addend[31]^FOpCtrlE[0];
|
||||
|
||||
assign XExpE = FmtE ? X[62:52] : {3'b0, X[30:23]};
|
||||
assign YExpE = FmtE ? Y[62:52] : {3'b0, Y[30:23]};
|
||||
assign ZExpE = FmtE ? Addend[62:52] : {3'b0, Addend[30:23]};
|
||||
|
||||
assign XFracE = FmtE ? X[51:0] : {X[22:0], 29'b0};
|
||||
assign YFracE = FmtE ? Y[51:0] : {Y[22:0], 29'b0};
|
||||
assign ZFracE = FmtE ? Addend[51:0] : {Addend[22:0], 29'b0};
|
||||
|
||||
assign XAssumed1E = |XExpE;
|
||||
assign YAssumed1E = |YExpE;
|
||||
assign ZAssumed1E = |ZExpE;
|
||||
|
||||
assign XExpZero = ~XAssumed1E;
|
||||
assign YExpZero = ~YAssumed1E;
|
||||
assign ZExpZero = ~ZAssumed1E;
|
||||
|
||||
assign XFracZero = ~|XFracE;
|
||||
assign YFracZero = ~|YFracE;
|
||||
assign ZFracZero = ~|ZFracE;
|
||||
|
||||
assign XExpMaxE = FmtE ? &XExpE[10:0] : &XExpE[7:0];
|
||||
assign YExpMaxE = FmtE ? &YExpE[10:0] : &YExpE[7:0];
|
||||
assign ZExpMaxE = FmtE ? &ZExpE[10:0] : &ZExpE[7:0];
|
||||
|
||||
assign XNormE = ~(XExpMaxE|XExpZero);
|
||||
|
||||
assign XNaNE = XExpMaxE & ~XFracZero;
|
||||
assign YNaNE = YExpMaxE & ~YFracZero;
|
||||
assign ZNaNE = ZExpMaxE & ~ZFracZero;
|
||||
|
||||
assign XSNaNE = XNaNE&~XExpE[51];
|
||||
assign YSNaNE = YNaNE&~YExpE[51];
|
||||
assign ZSNaNE = ZNaNE&~ZExpE[51];
|
||||
|
||||
assign XDenormE = XExpZero & ~XFracZero;
|
||||
assign YDenormE = YExpZero & ~YFracZero;
|
||||
assign ZDenormE = ZExpZero & ~ZFracZero;
|
||||
|
||||
assign XInfE = XExpMaxE & XFracZero;
|
||||
assign YInfE = YExpMaxE & YFracZero;
|
||||
assign ZInfE = ZExpMaxE & ZFracZero;
|
||||
|
||||
assign XZeroE = XExpZero & XFracZero;
|
||||
assign YZeroE = YExpZero & YFracZero;
|
||||
assign ZZeroE = ZExpZero & ZFracZero;
|
||||
|
||||
assign BiasE = FmtE ? 13'h3ff : 13'h7f;
|
||||
|
||||
endmodule
|
@ -42,6 +42,7 @@ module ieu (
|
||||
output logic MulDivE, W64E,
|
||||
output logic [2:0] Funct3E,
|
||||
output logic [`XLEN-1:0] SrcAE, SrcBE,
|
||||
output logic [4:0] RdE,
|
||||
input logic FWriteIntM,
|
||||
|
||||
// Memory stage interface
|
||||
@ -53,12 +54,14 @@ module ieu (
|
||||
|
||||
output logic [2:0] Funct3M, // size and signedness to LSU
|
||||
output logic [`XLEN-1:0] SrcAM, // to privilege and fpu
|
||||
output logic [4:0] RdM,
|
||||
input logic DataAccessFaultM,
|
||||
input logic [`XLEN-1:0] FIntResM,
|
||||
|
||||
// Writeback stage
|
||||
input logic [`XLEN-1:0] CSRReadValW, ReadDataW, MulDivResultW,
|
||||
input logic FWriteIntW,
|
||||
output logic [4:0] RdW,
|
||||
// input logic [`XLEN-1:0] PCLinkW,
|
||||
output logic InstrValidM,
|
||||
// hazards
|
||||
@ -82,7 +85,7 @@ module ieu (
|
||||
logic InstrValidW;
|
||||
|
||||
// forwarding signals
|
||||
logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E, RdE, RdM, RdW;
|
||||
logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E;
|
||||
logic [1:0] ForwardAE, ForwardBE;
|
||||
logic RegWriteM, RegWriteW;
|
||||
logic MemReadE, CSRReadE;
|
||||
|
@ -94,6 +94,7 @@ module wallypipelinedhart
|
||||
// floating point unit signals
|
||||
logic [2:0] FRM_REGW;
|
||||
logic [1:0] FMemRWM, FMemRWE;
|
||||
logic [4:0] RdE, RdM, RdW;
|
||||
logic FStallD;
|
||||
logic FWriteIntE, FWriteIntM, FWriteIntW;
|
||||
logic [`XLEN-1:0] FWriteDataE;
|
||||
|
@ -57,12 +57,8 @@ module testbench();
|
||||
string tests32f[] = '{
|
||||
"rv32f/I-FADD-S-01", "2000",
|
||||
"rv32f/I-FCLASS-S-01", "2000",
|
||||
"rv32f/I-FCVT-S-L-01", "2000",
|
||||
"rv32f/I-FCVT-S-LU-01", "2000",
|
||||
"rv32f/I-FCVT-S-W-01", "2000",
|
||||
"rv32f/I-FCVT-S-WU-01", "2000",
|
||||
"rv32f/I-FCVT-L-S-01", "2000",
|
||||
"rv32f/I-FCVT-LU-S-01", "2000",
|
||||
"rv32f/I-FCVT-W-S-01", "2000",
|
||||
"rv32f/I-FCVT-WU-S-01", "2000",
|
||||
// "rv32f/I-FDIV-S-01", "2000",
|
||||
|
Loading…
Reference in New Issue
Block a user