This commit is contained in:
Kip Macsai-Goren 2021-07-15 10:52:39 -04:00
commit 74e67df080
28 changed files with 2774 additions and 748 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,106 @@
//////////////////////////////////////////
// wally-config.vh
//
// Written: David_Harris@hmc.edu 4 January 2021
// Modified:
//
// Purpose: Specify which features are configured
// Macros to determine which modes are supported based on MISA
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
// include shared configuration
`include "wally-shared.vh"
`define BUILDROOT 0
`define BUSYBEAR 0
// RV32 or RV64: XLEN = 32 or 64
`define XLEN 32
`define MISA (32'h00000104 | 1 << 5 | 1 << 20 | 1 << 18 | 1 << 12)
`define ZCSR_SUPPORTED 1
`define COUNTERS 32
`define ZCOUNTERS_SUPPORTED 1
// Microarchitectural Features
`define UARCH_PIPELINED 1
`define UARCH_SUPERSCALR 0
`define UARCH_SINGLECYCLE 0
`define MEM_DCACHE 0
`define MEM_DTIM 1
`define MEM_ICACHE 0
`define MEM_VIRTMEM 1
`define VECTORED_INTERRUPTS_SUPPORTED 1
`define ITLB_ENTRIES 32
`define DTLB_ENTRIES 32
// Legal number of PMP entries are 0, 16, or 64
`define PMP_ENTRIES 16
// Address space
`define RESET_VECTOR 32'h80000000
// Peripheral Addresses
// Peripheral memory space extends from BASE to BASE+RANGE
// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits
// *** each of these is `PA_BITS wide. is this paramaterizable INSIDE the config file?
`define BOOTTIM_SUPPORTED 1'b1
`define BOOTTIM_BASE 34'h00001000
`define BOOTTIM_RANGE 34'h00000FFF
`define TIM_SUPPORTED 1'b1
`define TIM_BASE 34'h80000000
`define TIM_RANGE 34'h07FFFFFF
`define CLINT_SUPPORTED 1'b1
`define CLINT_BASE 34'h02000000
`define CLINT_RANGE 34'h0000FFFF
`define GPIO_SUPPORTED 1'b1
`define GPIO_BASE 34'h10012000
`define GPIO_RANGE 34'h000000FF
`define UART_SUPPORTED 1'b1
`define UART_BASE 34'h10000000
`define UART_RANGE 34'h00000007
`define PLIC_SUPPORTED 1'b1
`define PLIC_BASE 34'h0C000000
`define PLIC_RANGE 34'h03FFFFFF
// Bus Interface width
`define AHBW 32
// Test modes
// Tie GPIO outputs back to inputs
`define GPIO_LOOPBACK_TEST 1
// Hardware configuration
`define UART_PRESCALE 1
// Interrupt configuration
`define PLIC_NUM_SRC 4
// comment out the following if >=32 sources
`define PLIC_NUM_SRC_LT_32
`define PLIC_GPIO_ID 3
`define PLIC_UART_ID 4
`define TWO_BIT_PRELOAD "../config/rv32icfd/twoBitPredictor.txt"
`define BTB_PRELOAD "../config/rv32icfd/BTBPredictor.txt"
`define BPRED_ENABLED 1
`define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE
`define TESTSBP 0

View File

@ -1,109 +1,3 @@
// //////////////////////////////////////////
// // wally-config.vh
// //
// // Written: David_Harris@hmc.edu 4 January 2021
// // Modified:
// //
// // Purpose: Specify which features are configured
// // Macros to determine which modes are supported based on MISA
// //
// // A component of the Wally configurable RISC-V project.
// //
// // Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
// //
// // Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// // files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// // modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// // is furnished to do so, subject to the following conditions:
// //
// // The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
// //
// // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// // BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// // OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
// ///////////////////////////////////////////
// // include shared configuration
// `include "wally-shared.vh"
// `define BUILDROOT 0
// `define BUSYBEAR 0
// // RV32 or RV64: XLEN = 32 or 64
// `define XLEN 32
// `define MISA (32'h00000104 | 1 << 5 | 1 << 20 | 1 << 18 | 1 << 12)
// `define ZCSR_SUPPORTED 1
// `define COUNTERS 32
// `define ZCOUNTERS_SUPPORTED 1
// // Microarchitectural Features
// `define UARCH_PIPELINED 1
// `define UARCH_SUPERSCALR 0
// `define UARCH_SINGLECYCLE 0
// `define MEM_DCACHE 0
// `define MEM_DTIM 1
// `define MEM_ICACHE 0
// `define MEM_VIRTMEM 1
// `define VECTORED_INTERRUPTS_SUPPORTED 1
// `define ITLB_ENTRIES 32
// `define DTLB_ENTRIES 32
// // Legal number of PMP entries are 0, 16, or 64
// `define PMP_ENTRIES 16
// // Address space
// `define RESET_VECTOR 32'h80000000
// // Peripheral Addresses
// // Peripheral memory space extends from BASE to BASE+RANGE
// // Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits
// // *** each of these is `PA_BITS wide. is this paramaterizable INSIDE the config file?
// `define BOOTTIM_SUPPORTED 1'b1
// `define BOOTTIM_BASE 34'h00001000
// `define BOOTTIM_RANGE 34'h00000FFF
// `define TIM_SUPPORTED 1'b1
// `define TIM_BASE 34'h80000000
// `define TIM_RANGE 34'h07FFFFFF
// `define CLINT_SUPPORTED 1'b1
// `define CLINT_BASE 34'h02000000
// `define CLINT_RANGE 34'h0000FFFF
// `define GPIO_SUPPORTED 1'b1
// `define GPIO_BASE 34'h10012000
// `define GPIO_RANGE 34'h000000FF
// `define UART_SUPPORTED 1'b1
// `define UART_BASE 34'h10000000
// `define UART_RANGE 34'h00000007
// `define PLIC_SUPPORTED 1'b1
// `define PLIC_BASE 34'h0C000000
// `define PLIC_RANGE 34'h03FFFFFF
// // Bus Interface width
// `define AHBW 32
// // Test modes
// // Tie GPIO outputs back to inputs
// `define GPIO_LOOPBACK_TEST 1
// // Hardware configuration
// `define UART_PRESCALE 1
// // Interrupt configuration
// `define PLIC_NUM_SRC 4
// // comment out the following if >=32 sources
// `define PLIC_NUM_SRC_LT_32
// `define PLIC_GPIO_ID 3
// `define PLIC_UART_ID 4
// `define TWO_BIT_PRELOAD "../config/rv32ic/twoBitPredictor.txt"
// `define BTB_PRELOAD "../config/rv32ic/BTBPredictor.txt"
// `define BPRED_ENABLED 1
// `define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE
// `define TESTSBP 0
//////////////////////////////////////////
// wally-config.vh
//

View File

@ -0,0 +1,3 @@
vsim -c <<!
do wally-pipelined-batch-rv32icfd.do ../config/rv32icfd rv32icfd
!

View File

@ -0,0 +1 @@
vsim -do wally-pipelined-rv32icfd.do

View File

@ -0,0 +1,42 @@
# wally-pipelined-batch.do
#
# Modification by Oklahoma State University & Harvey Mudd College
# Use with Testbench
# James Stine, 2008; David Harris 2021
# Go Cowboys!!!!!!
#
# Takes 1:10 to run RV64IC tests using gui
# Use this wally-pipelined-batch.do file to run this example.
# Either bring up ModelSim and type the following at the "ModelSim>" prompt:
# do wally-pipelined-batch.do
# or, to run from a shell, type the following at the shell prompt:
# vsim -do wally-pipelined-batch.do -c
# (omit the "-c" to see the GUI while running from the shell)
onbreak {resume}
# create library
if [file exists work_$2] {
vdel -lib work_$2 -all
}
vlib work_$2
# compile source files
# suppress spurious warnngs about
# "Extra checking for conflicts with always_comb done at vopt time"
# because vsim will run vopt
# default to config/rv64ic, but allow this to be overridden at the command line. For example:
switch $argc {
0 {vlog +incdir+../config/rv32icfd +incdir+../config/shared ../testbench/testbench-imperas.sv ../src/*/*.sv -suppress 2583}
1 {vlog +incdir+$1 +incdir+../config/shared ../testbench/testbench-imperas.sv ../src/*/*.sv -suppress 2583}
2 {vlog -work work_$2 +incdir+$1 +incdir+../config/shared ../testbench/testbench-imperas.sv ../src/*/*.sv -suppress 2583}
}
# start and run simulation
# remove +acc flag for faster sim during regressions if there is no need to access internal signals
vopt work_$2.testbench -work work_$2 -o workopt_$2
vsim -lib work_$2 workopt_$2
run -all
quit

View File

@ -0,0 +1,50 @@
# wally-pipelined.do
#
# Modification by Oklahoma State University & Harvey Mudd College
# Use with Testbench
# James Stine, 2008; David Harris 2021
# Go Cowboys!!!!!!
#
# Takes 1:10 to run RV64IC tests using gui
# Use this wally-pipelined.do file to run this example.
# Either bring up ModelSim and type the following at the "ModelSim>" prompt:
# do wally-pipelined.do
# or, to run from a shell, type the following at the shell prompt:
# vsim -do wally-pipelined.do -c
# (omit the "-c" to see the GUI while running from the shell)
onbreak {resume}
# create library
if [file exists work] {
vdel -all
}
vlib work
# compile source files
# suppress spurious warnngs about
# "Extra checking for conflicts with always_comb done at vopt time"
# because vsim will run vopt
# default to config/rv64ic, but allow this to be overridden at the command line. For example:
# do wally-pipelined.do ../config/rv32ic
switch $argc {
0 {vlog +incdir+../config/rv32icfd +incdir+../config/shared ../testbench/testbench-imperas.sv ../src/*/*.sv -suppress 2583}
1 {vlog +incdir+$1 +incdir+../config/shared ../testbench/testbench-imperas.sv ../testbench/function_radix.sv ../src/*/*.sv -suppress 2583}
}
# start and run simulation
# remove +acc flag for faster sim during regressions if there is no need to access internal signals
vopt +acc work.testbench -o workopt
vsim workopt
view wave
-- display input and output signals as hexidecimal values
do ./wave-dos/default-waves.do
-- Run the Simulation
#run 5000
run -all
#quit
noview ../testbench/testbench-imperas.sv
view wave

View File

@ -1,4 +1,3 @@
`timescale 1ps/1ps
module divconv (q1, qm1, qp1, q0, qm0, qp0, rega_out, regb_out, regc_out, regd_out,
regr_out, d, n, sel_muxa, sel_muxb, sel_muxr, reset, clk, load_rega, load_regb,
load_regc, load_regd, load_regr, load_regs, P, op_type, exp_odd);
@ -106,123 +105,123 @@ module divconv (q1, qm1, qp1, q0, qm0, qp0, rega_out, regb_out, regc_out, regd_o
endmodule // divconv
module adder #(parameter WIDTH=8)
(input logic [WIDTH-1:0] a, b,
input logic cin,
output logic [WIDTH-1:0] y,
output logic cout);
// module adder #(parameter WIDTH=8)
// (input logic [WIDTH-1:0] a, b,
// input logic cin,
// output logic [WIDTH-1:0] y,
// output logic cout);
assign {cout, y} = a + b + cin;
// assign {cout, y} = a + b + cin;
endmodule // adder
// endmodule // adder
module flopenr #(parameter WIDTH = 8)
(input logic clk, reset, en,
input logic [WIDTH-1:0] d,
output logic [WIDTH-1:0] q);
// module flopenr #(parameter WIDTH = 8)
// (input logic clk, reset, en,
// input logic [WIDTH-1:0] d,
// output logic [WIDTH-1:0] q);
always_ff @(posedge clk, posedge reset)
if (reset) q <= #10 0;
else if (en) q <= #10 d;
// always_ff @(posedge clk, posedge reset)
// if (reset) q <= #10 0;
// else if (en) q <= #10 d;
endmodule // flopenr
// endmodule // flopenr
module flopr #(parameter WIDTH = 8)
(input logic clk, reset,
input logic [WIDTH-1:0] d,
output logic [WIDTH-1:0] q);
// module flopr #(parameter WIDTH = 8)
// (input logic clk, reset,
// input logic [WIDTH-1:0] d,
// output logic [WIDTH-1:0] q);
always_ff @(posedge clk, posedge reset)
if (reset) q <= #10 0;
else q <= #10 d;
// always_ff @(posedge clk, posedge reset)
// if (reset) q <= #10 0;
// else q <= #10 d;
endmodule // flopr
// endmodule // flopr
module flopenrc #(parameter WIDTH = 8)
(input logic clk, reset, en, clear,
input logic [WIDTH-1:0] d,
output logic [WIDTH-1:0] q);
// module flopenrc #(parameter WIDTH = 8)
// (input logic clk, reset, en, clear,
// input logic [WIDTH-1:0] d,
// output logic [WIDTH-1:0] q);
always_ff @(posedge clk, posedge reset)
if (reset) q <= #10 0;
else if (en)
if (clear) q <= #10 0;
else q <= #10 d;
// always_ff @(posedge clk, posedge reset)
// if (reset) q <= #10 0;
// else if (en)
// if (clear) q <= #10 0;
// else q <= #10 d;
endmodule // flopenrc
// endmodule // flopenrc
module floprc #(parameter WIDTH = 8)
(input logic clk, reset, clear,
input logic [WIDTH-1:0] d,
output logic [WIDTH-1:0] q);
// module floprc #(parameter WIDTH = 8)
// (input logic clk, reset, clear,
// input logic [WIDTH-1:0] d,
// output logic [WIDTH-1:0] q);
always_ff @(posedge clk, posedge reset)
if (reset) q <= #10 0;
else
if (clear) q <= #10 0;
else q <= #10 d;
// always_ff @(posedge clk, posedge reset)
// if (reset) q <= #10 0;
// else
// if (clear) q <= #10 0;
// else q <= #10 d;
endmodule // floprc
// endmodule // floprc
module mux2 #(parameter WIDTH = 8)
(input logic [WIDTH-1:0] d0, d1,
input logic s,
output logic [WIDTH-1:0] y);
// module mux2 #(parameter WIDTH = 8)
// (input logic [WIDTH-1:0] d0, d1,
// input logic s,
// output logic [WIDTH-1:0] y);
assign y = s ? d1 : d0;
// assign y = s ? d1 : d0;
endmodule // mux2
// endmodule // mux2
module mux3 #(parameter WIDTH = 8)
(input logic [WIDTH-1:0] d0, d1, d2,
input logic [1:0] s,
output logic [WIDTH-1:0] y);
// module mux3 #(parameter WIDTH = 8)
// (input logic [WIDTH-1:0] d0, d1, d2,
// input logic [1:0] s,
// output logic [WIDTH-1:0] y);
assign y = s[1] ? d2 : (s[0] ? d1 : d0);
// assign y = s[1] ? d2 : (s[0] ? d1 : d0);
endmodule // mux3
// endmodule // mux3
module mux4 #(parameter WIDTH = 8)
(input logic [WIDTH-1:0] d0, d1, d2, d3,
input logic [1:0] s,
output logic [WIDTH-1:0] y);
// module mux4 #(parameter WIDTH = 8)
// (input logic [WIDTH-1:0] d0, d1, d2, d3,
// input logic [1:0] s,
// output logic [WIDTH-1:0] y);
assign y = s[1] ? (s[0] ? d3 : d2) : (s[0] ? d1 : d0);
// assign y = s[1] ? (s[0] ? d3 : d2) : (s[0] ? d1 : d0);
endmodule // mux4
// endmodule // mux4
module mux5 #(parameter WIDTH = 8)
(input logic [WIDTH-1:0] d0, d1, d2, d3, d4,
input logic [2:0] s,
output logic [WIDTH-1:0] y);
// module mux5 #(parameter WIDTH = 8)
// (input logic [WIDTH-1:0] d0, d1, d2, d3, d4,
// input logic [2:0] s,
// output logic [WIDTH-1:0] y);
always_comb
casez (s)
3'b000 : y = d0;
3'b001 : y = d1;
3'b010 : y = d2;
3'b011 : y = d3;
3'b1?? : y = d4;
endcase // casez (s)
// always_comb
// casez (s)
// 3'b000 : y = d0;
// 3'b001 : y = d1;
// 3'b010 : y = d2;
// 3'b011 : y = d3;
// 3'b1?? : y = d4;
// endcase // casez (s)
endmodule // mux5
// endmodule // mux5
module mux6 #(parameter WIDTH = 8)
(input logic [WIDTH-1:0] d0, d1, d2, d3, d4, d5,
input logic [2:0] s,
output logic [WIDTH-1:0] y);
// module mux6 #(parameter WIDTH = 8)
// (input logic [WIDTH-1:0] d0, d1, d2, d3, d4, d5,
// input logic [2:0] s,
// output logic [WIDTH-1:0] y);
always_comb
casez (s)
3'b000 : y = d0;
3'b001 : y = d1;
3'b010 : y = d2;
3'b011 : y = d3;
3'b10? : y = d4;
3'b11? : y = d5;
endcase // casez (s)
// always_comb
// casez (s)
// 3'b000 : y = d0;
// 3'b001 : y = d1;
// 3'b010 : y = d2;
// 3'b011 : y = d3;
// 3'b10? : y = d4;
// 3'b11? : y = d5;
// endcase // casez (s)
endmodule // mux6
// endmodule // mux6
module eqcmp #(parameter WIDTH = 8)
(input logic [WIDTH-1:0] a, b,
@ -232,25 +231,25 @@ module eqcmp #(parameter WIDTH = 8)
endmodule // eqcmp
module fa (input logic a, b, c, output logic sum, carry);
// module fa (input logic a, b, c, output logic sum, carry);
assign sum = a^b^c;
assign carry = a&b|a&c|b&c;
// assign sum = a^b^c;
// assign carry = a&b|a&c|b&c;
endmodule // fa
// endmodule // fa
module csa #(parameter WIDTH=8)
(input logic [WIDTH-1:0] a, b, c,
output logic [WIDTH-1:0] sum, carry);
// module csa #(parameter WIDTH=8)
// (input logic [WIDTH-1:0] a, b, c,
// output logic [WIDTH-1:0] sum, carry);
logic [WIDTH:0] carry_temp;
genvar i;
generate
for (i=0;i<WIDTH;i=i+1)
begin : genbit
fa fa_inst (a[i], b[i], c[i], sum[i], carry_temp[i+1]);
end
endgenerate
assign carry = {1'b0, carry_temp[WIDTH-1:1], 1'b0};
// logic [WIDTH:0] carry_temp;
// genvar i;
// generate
// for (i=0;i<WIDTH;i=i+1)
// begin : genbit
// fa fa_inst (a[i], b[i], c[i], sum[i], carry_temp[i+1]);
// end
// endgenerate
// assign carry = {1'b0, carry_temp[WIDTH-1:1], 1'b0};
endmodule // csa
// endmodule // csa

View File

@ -31,8 +31,8 @@ module faddcvt(
input logic reset,
input logic FlushM,
input logic StallM,
input logic [63:0] SrcXE, // 1st input operand (A)
input logic [63:0] SrcYE, // 2nd input operand (B)
input logic [63:0] FSrcXE, // 1st input operand (A)
input logic [63:0] FSrcYE, // 2nd input operand (B)
input logic [3:0] FOpCtrlE, FOpCtrlM, // Function opcode
input logic FmtE, FmtM, // Result Precision (0 for double, 1 for single)
input logic [2:0] FrmM, // Rounding mode - specify values
@ -59,7 +59,7 @@ module faddcvt(
logic [10:0] AddExponentE, AddExponentM;
fpuaddcvt1 fpadd1 (.SrcXE, .SrcYE, .FOpCtrlE, .FmtE, .AddFloat1E, .AddFloat2E, .AddExponentE,
fpuaddcvt1 fpadd1 (.FSrcXE, .FSrcYE, .FOpCtrlE, .FmtE, .AddFloat1E, .AddFloat2E, .AddExponentE,
.AddExpPostSumE, .AddExp1DenormE, .AddExp2DenormE, .AddSumE, .AddSumTcE, .AddSelInvE,
.AddCorrSignE, .AddSignAE, .AddOp1NormE, .AddOp2NormE, .AddOpANormE, .AddOpBNormE, .AddInvalidE,
.AddDenormInE, .AddConvertE, .AddSwapE, .AddNormOvflowE);
@ -83,10 +83,10 @@ module faddcvt(
.AddSignAM, .AddCorrSignM, .AddConvertM, .AddSwapM, .FAddResM, .FAddFlgM);
endmodule
module fpuaddcvt1 (AddSumE, AddSumTcE, AddSelInvE, AddExpPostSumE, AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE, AddDenormInE, AddConvertE, AddSwapE, AddNormOvflowE, AddSignAE, AddFloat1E, AddFloat2E, AddExp1DenormE, AddExp2DenormE, AddExponentE, SrcXE, SrcYE, FOpCtrlE, FmtE);
module fpuaddcvt1 (AddSumE, AddSumTcE, AddSelInvE, AddExpPostSumE, AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE, AddDenormInE, AddConvertE, AddSwapE, AddNormOvflowE, AddSignAE, AddFloat1E, AddFloat2E, AddExp1DenormE, AddExp2DenormE, AddExponentE, FSrcXE, FSrcYE, FOpCtrlE, FmtE);
input logic [63:0] SrcXE; // 1st input operand (A)
input logic [63:0] SrcYE; // 2nd input operand (B)
input logic [63:0] FSrcXE; // 1st input operand (A)
input logic [63:0] FSrcYE; // 2nd input operand (B)
input logic [3:0] FOpCtrlE; // Function opcode
input logic FmtE; // Result Precision (1 for double, 0 for single)
@ -137,12 +137,12 @@ module fpuaddcvt1 (AddSumE, AddSumTcE, AddSelInvE, AddExpPostSumE, AddCorrSignE,
// and the sign of the first operand is set appropratiately based on
// if the operation is absolute value or negation.
convert_inputs conv1 (AddFloat1E, AddFloat2E, SrcXE, SrcYE, FOpCtrlE, P);
convert_inputs conv1 (AddFloat1E, AddFloat2E, FSrcXE, FSrcYE, FOpCtrlE, P);
// Test for exceptions and return the "Invalid Operation" and
// "Denormalized" Input Flags. The "AddSelInvE" is used in
// the third pipeline stage to select the result. Also, AddOp1NormE
// and AddOp2NormE are one if SrcXE and SrcYE are not zero or denormalized.
// and AddOp2NormE are one if FSrcXE and FSrcYE are not zero or denormalized.
// sub is one if the effective operation is subtaction.
exception exc1 (AddSelInvE, AddInvalidE, AddDenormInE, AddOp1NormE, AddOp2NormE, sub,
@ -215,8 +215,8 @@ module fpuaddcvt1 (AddSumE, AddSumTcE, AddSelInvE, AddExpPostSumE, AddCorrSignE,
// Place either the sign-extened 32-bit value or the original 64-bit value
// into IntValue (to be used for integer to floating point conversion)
// assign IntValue [31:0] = SrcXE[31:0];
// assign IntValue [63:32] = FOpCtrlE[0] ? {32{SrcXE[31]}} : SrcXE[63:32];
// assign IntValue [31:0] = FSrcXE[31:0];
// assign IntValue [63:32] = FOpCtrlE[0] ? {32{FSrcXE[31]}} : FSrcXE[63:32];
// If doing an integer to floating point conversion, mantissaA3 is set to
// IntVal and the prenomalized exponent is set to 1084. Otherwise,

View File

@ -2,45 +2,52 @@
`include "wally-config.vh"
module fclassify (
input logic [63:0] SrcXE,
input logic FmtE, // 0-Single 1-Double
input logic XSgnE,
input logic [51:0] XFracE,
input logic XNaNE,
input logic XSNaNE,
input logic XNormE,
input logic XDenormE,
input logic XZeroE,
input logic XInfE,
// input logic FmtE, // 0-Single 1-Double
output logic [63:0] ClassResE
);
logic Sgn;
logic Inf, NaN, Zero, Norm, Denorm;
logic PInf, QNaN, PZero, PNorm, PDenorm;
logic NInf, SNaN, NZero, NNorm, NDenorm;
logic MaxExp, ExpZero, ManZero, FirstBitFrac;
// logic XSgnE;
// logic Inf, NaN, Zero, Norm, Denorm;
logic PInf, PZero, PNorm, PDenorm;
logic NInf, NZero, NNorm, NDenorm;
// logic MaxExp, ExpZero, ManZero, FirstBitFrac;
// Single and Double precision layouts
assign Sgn = FmtE ? SrcXE[63] : SrcXE[31];
// assign XSgnE = FmtE ? FSrcXE[63] : FSrcXE[31];
// basic calculations for readabillity
assign ExpZero = FmtE ? ~|SrcXE[62:52] : ~|SrcXE[30:23];
assign MaxExp = FmtE ? &SrcXE[62:52] : &SrcXE[30:23];
assign ManZero = FmtE ? ~|SrcXE[51:0] : ~|SrcXE[22:0];
assign FirstBitFrac = FmtE ? SrcXE[51] : SrcXE[22];
// assign ExpZero = FmtE ? ~|FSrcXE[62:52] : ~|FSrcXE[30:23];
// assign MaxExp = FmtE ? &FSrcXE[62:52] : &FSrcXE[30:23];
// assign ManZero = FmtE ? ~|FSrcXE[51:0] : ~|FSrcXE[22:0];
// assign FirstBitFrac = FmtE ? FSrcXE[51] : FSrcXE[22];
// determine the type of number
assign NaN = MaxExp & ~ManZero;
assign Inf = MaxExp & ManZero;
assign Zero = ExpZero & ManZero;
assign Denorm= ExpZero & ~ManZero;
assign Norm = ~ExpZero;
// assign NaN = MaxExp & ~ManZero;
// assign Inf = MaxExp & ManZero;
// assign Zero = ExpZero & ManZero;
// assign Denorm= ExpZero & ~ManZero;
// assign Norm = ~ExpZero;
// determine the sub categories
assign QNaN = FirstBitFrac&NaN;
assign SNaN = ~FirstBitFrac&NaN;
assign PInf = ~Sgn&Inf;
assign NInf = Sgn&Inf;
assign PNorm = ~Sgn&Norm;
assign NNorm = Sgn&Norm;
assign PDenorm = ~Sgn&Denorm;
assign NDenorm = Sgn&Denorm;
assign PZero = ~Sgn&Zero;
assign NZero = Sgn&Zero;
// assign QNaN = FirstBitFrac&NaN;
// assign SNaN = ~FirstBitFrac&NaN;
assign PInf = ~XSgnE&XInfE;
assign NInf = XSgnE&XInfE;
assign PNorm = ~XSgnE&XNormE;
assign NNorm = XSgnE&XNormE;
assign PDenorm = ~XSgnE&XDenormE;
assign NDenorm = XSgnE&XDenormE;
assign PZero = ~XSgnE&XZeroE;
assign NZero = XSgnE&XZeroE;
// determine sub category and combine into the result
// bit 0 - -Inf
@ -53,6 +60,6 @@ module fclassify (
// bit 7 - +Inf
// bit 8 - signaling NaN
// bit 9 - quiet NaN
assign ClassResE = {{54{1'b0}}, QNaN, SNaN, PInf, PNorm, PDenorm, PZero, NZero, NDenorm, NNorm, NInf};
assign ClassResE = {{54{1'b0}}, XNaNE&~XSNaNE, XSNaNE, PInf, PNorm, PDenorm, PZero, NZero, NDenorm, NNorm, NInf};
endmodule

View File

@ -42,31 +42,32 @@
module fcmp (
input logic [63:0] op1,
input logic [63:0] op2,
input logic XNaNE, YNaNE,
input logic XZeroE, YZeroE,
input logic [63:0] FSrcXE,
input logic [63:0] FSrcYE,
input logic [2:0] FOpCtrlE,
input logic FmtE,
output logic Invalid, // Invalid Operation
// output logic [1:0] FCC, // Condition Codes
output logic [63:0] CmpResE);
// Perform magnitude comparison between the 63 least signficant bits
// of the input operands. Only LT and EQ are returned, since GT can
// be determined from these values.
logic [1:0] FCC; // Condition Codes
logic [7:0] w, x;
logic ANaN, BNaN;
logic Azero, Bzero;
// logic ANaN, BNaN;
// logic Azero, Bzero;
logic LT; // magnitude op1 < magnitude op2
logic EQ; // magnitude op1 = magnitude op2
logic [63:0] PosOp1, PosOp2;
assign PosOp1 = FmtE ? {~op1[63], op1[62:0]} : {~op1[31], op1[30:0], 32'b0};
assign PosOp2 = FmtE ? {~op2[63], op2[62:0]} : {~op2[31], op2[30:0], 32'b0};
magcompare64b_1 magcomp1 (w, x, PosOp1, PosOp2);
magcompare64b_1 magcomp1 (w, x, {~op1[63], op1[62:0]}, {~op2[63], op2[62:0]});
// Determine final values based on output of magnitude comparison,
// sign bits, and special case testing.
exception_cmp_1 exc1 (ANaN, BNaN, Azero, Bzero, op1, op2, FOpCtrlE);
// Perform magnitude comparison between the 63 least signficant bits
// of the input operands. Only LT and EQ are returned, since GT can
@ -75,24 +76,10 @@ module fcmp (
// Determine final values based on output of magnitude comparison,
// sign bits, and special case testing.
exception_cmp_2 exc2 (.invalid(Invalid), .fcc(FCC), .LT_mag(LT), .EQ_mag(EQ), .ANaN(ANaN), .BNaN(BNaN), .Azero(Azero), .Bzero(Bzero), .FOpCtrlE(FOpCtrlE), .A(op1), .B(op2), .*);
exception_cmp_2 exc2 (.invalid(Invalid), .fcc(FCC), .LT_mag(LT), .EQ_mag(EQ), .ANaN(XNaNE), .BNaN(YNaNE), .Azero(XZeroE), .Bzero(YZeroE), .FOpCtrlE(FOpCtrlE), .A(op1), .B(op2), .FSrcXE, .FSrcYE, .*);
endmodule // fpcomp
// module magcompare2b (LT, GT, A, B);
// input logic [1:0] A;
// input logic [1:0] B;
// output logic LT;
// output logic GT;
// // Determine if A < B using a minimized sum-of-products expression
// assign LT = ~A[1]&B[1] | ~A[1]&~A[0]&B[0] | ~A[0]&B[1]&B[0];
// // Determine if A > B using a minimized sum-of-products expression
// assign GT = A[1]&~B[1] | A[1]&A[0]&~B[0] | A[0]&~B[1]&~B[0];
// endmodule // magcompare2b
// 2-bit magnitude comparator
// This module compares two 2-bit values A and B. LT is '1' if A < B
@ -198,135 +185,6 @@ module magcompare64b_1 (w, x, A, B);
endmodule // magcompare64b
// This module takes 64-bits inputs A and B, two magnitude comparison
// flags LT_mag and EQ_mag, and a 2-bit signal FOpCtrlE that indicates the type of
// operands being compared as indicated below.
// FOpCtrlE Description
// 00 double precision numbers
// 01 single precision numbers
// 10 half precision numbers
// 11 bfloat precision numbers
//
// The comparator produces a 2-bit signal fcc, which
// indicates the result of the comparison as follows:
// fcc decscription
// 00 A = B
// 01 A < B
// 10 A > B
// 11 A and B are unordered (i.e., A or B is NaN)
// It also produces a invalid operation flag, which is one
// if either of the input operands is a signaling NaN.
module exception_cmp_1 (ANaN, BNaN, Azero, Bzero, A, B, FOpCtrlE);
input logic [63:0] A;
input logic [63:0] B;
input logic [2:0] FOpCtrlE;
logic dp, sp, hp;
output logic ANaN;
output logic BNaN;
output logic Azero;
output logic Bzero;
assign dp = !FOpCtrlE[1]&!FOpCtrlE[0];
assign sp = !FOpCtrlE[1]&FOpCtrlE[0];
assign hp = FOpCtrlE[1]&!FOpCtrlE[0];
// Test if A or B is NaN.
assign ANaN = (A[62]&A[61]&A[60]&A[59]&A[58]) &
((sp&A[57]&A[56]&A[55]&(A[54]|A[53])) |
(dp&A[57]&A[56]&A[55]&A[54]&A[53]&A[52]&(A[51]|A[50])) |
(hp&(A[57]|A[56])));
assign BNaN = (B[62]&B[61]&B[60]&B[59]&B[58]) &
((sp&B[57]&B[56]&B[55]&(B[54]|B[53])) |
(dp&B[57]&B[56]&B[55]&B[54]&B[53]&B[52]&(B[51]|B[50])) |
(hp&(B[57]|B[56])));
// Test if A is +0 or -0 when viewed as a floating point number (i.e,
// the 63 least siginficant bits of A are zero).
// Depending on how this synthesizes, it may work better to replace
// this with assign Azero = ~(A[62] | A[61] | ... | A[0])
assign Azero = (A[62:0] == 63'h0);
assign Bzero = (B[62:0] == 63'h0);
endmodule // exception_cmp
//
// File name : fpcomp.v
// Title : Floating-Point Comparator
// project : FPU
// Library : fpcomp
// Author(s) : James E. Stine
// Purpose : definition of main unit to floating-point comparator
// notes :
//
// Copyright Oklahoma State University
//
// Floating Point Comparator (Algorithm)
//
// 1.) Performs sign-extension if the inputs are 32-bit integers.
// 2.) Perform a magnitude comparison on the lower 63 bits of the inputs
// 3.) Check for special cases (+0=-0, unordered, and infinite values)
// and correct for sign bits
//
// This module takes 64-bits inputs op1 and op2, VSS, and VDD
// signals, and a 2-bit signal FOpCtrlE that indicates the type of
// operands being compared as indicated below.
// FOpCtrlE Description
// 00 double precision numbers
// 01 single precision numbers
// 10 half precision numbers
// 11 (unused)
//
// The comparator produces a 2-bit signal FCC, which
// indicates the result of the comparison:
//
// fcc decscription
// 00 A = B
// 01 A < B
// 10 A > B
// 11 A and B are unordered (i.e., A or B is NaN)
//
// It also produces an invalid operation flag, which is one
// if either of the input operands is a signaling NaN per 754
/*module magcompare2b (LT, GT, A, B);
input logic [1:0] A;
input logic [1:0] B;
output logic LT;
output logic GT;
// Determine if A < B using a minimized sum-of-products expression
assign LT = ~A[1]&B[1] | ~A[1]&~A[0]&B[0] | ~A[0]&B[1]&B[0];
// Determine if A > B using a minimized sum-of-products expression
assign GT = A[1]&~B[1] | A[1]&A[0]&~B[0] | A[0]&~B[1]&~B[0];
endmodule*/ // magcompare2b
// 2-bit magnitude comparator
// This module compares two 2-bit values A and B. LT is '1' if A < B
// and GT is '1'if A > B. LT and GT are both '0' if A = B. However,
// this version actually incorporates don't cares into the equation to
// simplify the optimization
// module magcompare2c (LT, GT, A, B);
// input logic [1:0] A;
// input logic [1:0] B;
// output logic LT;
// output logic GT;
// assign LT = B[1] | (!A[1]&B[0]);
// assign GT = A[1] | (!B[1]&A[0]);
// endmodule // magcompare2b
// This module compares two 64-bit values A and B. LT is '1' if A < B
// and EQ is '1'if A = B. LT and GT are both '0' if A > B.
// This structure was modified so
@ -388,6 +246,8 @@ endmodule // magcompare64b
module exception_cmp_2 (
input logic [63:0] A,
input logic [63:0] B,
input logic [63:0] FSrcXE,
input logic [63:0] FSrcYE,
input logic FmtE,
input logic LT_mag,
input logic EQ_mag,
@ -456,8 +316,8 @@ module exception_cmp_2 (
always_comb begin
case (FOpCtrlE[2:0])
3'b111: CmpResE = LT ? A : B;//min
3'b101: CmpResE = GT ? A : B;//max
3'b111: CmpResE = LT ? FSrcXE : FSrcYE;//min
3'b101: CmpResE = GT ? FSrcXE : FSrcYE;//max
3'b010: CmpResE = {63'b0, EQ};//equal
3'b001: CmpResE = {63'b0, LT};//less than
3'b011: CmpResE = {63'b0, LT|EQ};//less than or equal

View File

@ -1,7 +1,15 @@
`include "wally-config.vh"
module fcvt (
input logic [63:0] X, // floating point input
input logic XSgnE,
input logic [10:0] XExpE,
input logic [51:0] XFracE,
input logic XAssumed1E,
input logic XZeroE,
input logic XNaNE,
input logic XInfE,
input logic XDenormE,
input logic [10:0] BiasE,
input logic [`XLEN-1:0] SrcAE, // integer input
input logic [3:0] FOpCtrlE, // chooses which instruction is done (full list below)
input logic [2:0] FrmE, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
@ -9,15 +17,10 @@ module fcvt (
output logic [63:0] CvtResE, // convert final result
output logic [4:0] CvtFlgE); // convert flags {invalid, divide by zero, overflow, underflow, inexact}
logic XSgn; // FP input's sign
logic [10:0] XExp; // FP input's exponent
logic [51:0] XFrac; // FP input's fraction
logic ResSgn; // FP result's sign
logic [10:0] ResExp,TmpExp; // FP result's exponent
logic [51:0] ResFrac; // FP result's fraction
logic [5:0] LZResP; // lz output
// logic LZResV;
logic [11:0] Bias; // 1023 for double, 127 for single
logic [7:0] Bits; // how many bits are in the integer result
logic [7:0] SubBits; // subtract these bits from the exponent (FP result)
logic [64+51:0] ShiftedManTmp; // Shifted mantissa
@ -31,11 +34,7 @@ module fcvt (
logic [64-1:0] PosInt; // absolute value of the integer input
logic [63:0] CvtIntRes; // interger result from the fp -> int instructions
logic [63:0] CvtFPRes; // floating point result from the int -> fp instructions
logic XFracZero; // is the fraction of X zero?
logic Of, Uf; // did the integer result underflow or overflow
logic XExpZero; // is X's exponent zero
logic XExpMax; // is the exponent all ones
logic XNaN, XDenorm, XInf, XZero; // is X a special value
logic Guard, Round, LSB, Sticky; // bits used to determine rounding
logic Plus1,CalcPlus1; // do you add one for rounding
logic SgnRes; // sign of the floating point result
@ -62,31 +61,15 @@ module fcvt (
// fcvt.d.lu = 1101
// {long, unsigned, to int, from int}
// split the input into it's various parts
assign XSgn = FmtE ? X[63] : X[31];
assign XExp = FmtE ? X[62:52] : {3'b0, X[30:23]};
assign XFrac = FmtE ? X[51:0] : {X[23:0], 29'b0};
// determine if the exponent and fraction are all zero or ones
assign XExpZero = ~|XExp;
assign XFracZero = ~|XFrac;
assign XExpMax = FmtE ? &XExp[10:0] : &XExp[7:0];
// determine if X is a special value
assign XNaN = XExpMax & ~XFracZero;
assign XDenorm = XExpZero & ~XFracZero;
assign XInf = XExpMax & XFracZero;
assign XZero = XExpZero & XFracZero;
// calculate signals based off the input and output's size
assign Bias = FmtE ? 12'h3ff : 12'h7f;
// assign Bias = FmtE ? 12'h3ff : 12'h7f;
assign Res64 = ((FOpCtrlE==4'b1010 || FOpCtrlE==4'b1110) | (FmtE&(FOpCtrlE==4'b0001 | FOpCtrlE==4'b0101 | FOpCtrlE==4'b0000 | FOpCtrlE==4'b1001 | FOpCtrlE==4'b1101)));
assign In64 = ((FOpCtrlE==4'b1001 || FOpCtrlE==4'b1101) | (FmtE&(FOpCtrlE==4'b0010 | FOpCtrlE==4'b0110 | FOpCtrlE==4'b1010 | FOpCtrlE==4'b1110) | (FOpCtrlE==4'b1101 & ~FmtE)));
assign SubBits = In64 ? 8'd64 : 8'd32;
assign Bits = Res64 ? 8'd64 : 8'd32;
// calulate the unbiased exponent
assign ExpVal = XExp - Bias + XDenorm;
assign ExpVal = XExpE - BiasE + XDenormE;
////////////////////////////////////////////////////////
@ -97,11 +80,10 @@ module fcvt (
// determine the integer's sign
assign ResSgn = ~FOpCtrlE[2] ? IntIn[64-1] : 1'b0;
// This did not work \/
// generate
// if(64 == 64)
// if(`XLEN == 64)
// lz64 lz(LZResP, LZResV, PosInt);
// else if(64 == 32) begin
// else if(`XLEN == 32) begin
// assign LZResP[5] = 1'b0;
// lz32 lz(LZResP[4:0], LZResV, PosInt);
// end
@ -111,12 +93,12 @@ module fcvt (
logic [8:0] i;
always_comb begin
i = 0;
while (~PosInt[64-1-i] && i <= `XLEN) i = i+1; // search for leading one
while (~PosInt[64-1-i] && i < `XLEN) i = i+1; // search for leading one
LZResP = i+1; // compute shift count
end
// if no one was found set to zero otherwise calculate the exponent
assign TmpExp = i==`XLEN ? 0 : Bias + SubBits - LZResP;
assign TmpExp = i==`XLEN ? 0 : BiasE + SubBits - LZResP;
@ -126,12 +108,12 @@ module fcvt (
// select the shift value and amount based on operation (to fp or int)
assign ShiftCnt = FOpCtrlE[1] ? ExpVal : LZResP;
assign ShiftVal = FOpCtrlE[1] ? {{64-2{1'b0}}, ~(XDenorm|XZero), XFrac} : {PosInt, 52'b0};
assign ShiftVal = FOpCtrlE[1] ? {{64-2{1'b0}}, XAssumed1E, XFracE} : {PosInt, 52'b0};
// if shift = -1 then shift one bit right for gaurd bit (right shifting twice never rounds)
// if the shift is negitive add a bit for sticky bit calculation
// otherwise shift left
assign ShiftedManTmp = &ShiftCnt ? {{64-1{1'b0}}, ~(XDenorm|XZero), XFrac[51:1]} : ShiftCnt[12] ? {{64+51{1'b0}}, ~XZero} : ShiftVal << ShiftCnt;
assign ShiftedManTmp = &ShiftCnt ? {{64-1{1'b0}}, XAssumed1E, XFracE[51:1]} : ShiftCnt[12] ? {{64+51{1'b0}}, ~XZeroE} : ShiftVal << ShiftCnt;
// truncate the shifted mantissa
assign ShiftedMan = ShiftedManTmp[64+51:50];
@ -139,7 +121,7 @@ module fcvt (
// calculate sticky bit
// - take into account the possible right shift from before
// - the sticky bit calculation covers three diffrent sizes depending on the opperation
assign Sticky = |ShiftedManTmp[49:0] | &ShiftCnt&XFrac[0] | (FOpCtrlE[0]&|ShiftedManTmp[62:50]) | (FOpCtrlE[0]&~FmtE&|ShiftedManTmp[91:63]);
assign Sticky = |ShiftedManTmp[49:0] | &ShiftCnt&XFracE[0] | (FOpCtrlE[0]&|ShiftedManTmp[62:50]) | (FOpCtrlE[0]&~FmtE&|ShiftedManTmp[91:63]);
// determine guard, round, and least significant bit of the result
@ -152,23 +134,23 @@ module fcvt (
case (FrmE)
3'b000: CalcPlus1 = Guard & (Round | Sticky | (~Round&~Sticky&LSB));//round to nearest even
3'b001: CalcPlus1 = 0;//round to zero
3'b010: CalcPlus1 = (XSgn&FOpCtrlE[1]) | (ResSgn&FOpCtrlE[0]);//round down
3'b011: CalcPlus1 = (~XSgn&FOpCtrlE[1]) | (~ResSgn&FOpCtrlE[0]);//round up
3'b010: CalcPlus1 = (XSgnE&FOpCtrlE[1]) | (ResSgn&FOpCtrlE[0]);//round down
3'b011: CalcPlus1 = (~XSgnE&FOpCtrlE[1]) | (~ResSgn&FOpCtrlE[0]);//round up
3'b100: CalcPlus1 = Guard & (Round | Sticky | (~Round&~Sticky));//round to nearest max magnitude
default: CalcPlus1 = 1'bx;
endcase
end
// dont tound if the result is exact
assign Plus1 = CalcPlus1 & (Guard|Round|Sticky)&~(XZero&FOpCtrlE[1]);
assign Plus1 = CalcPlus1 & (Guard|Round|Sticky)&~(XZeroE&FOpCtrlE[1]);
// round the shifted mantissa
assign RoundedTmp = ShiftedMan[64+1:2] + Plus1;
assign {ResExp, ResFrac} = FmtE ? {TmpExp, ShiftedMan[64+1:14]} + Plus1 : {{TmpExp, ShiftedMan[64+1:43]} + Plus1, 29'b0} ;
// fit the rounded result into the appropriate size and take the 2's complement if needed
assign Rounded = Res64 ? XSgn&FOpCtrlE[1] ? -RoundedTmp[63:0] : RoundedTmp[63:0] :
XSgn ? {{32{1'b1}}, -RoundedTmp[31:0]} : {32'b0, RoundedTmp[31:0]};
assign Rounded = Res64 ? XSgnE&FOpCtrlE[1] ? -RoundedTmp[63:0] : RoundedTmp[63:0] :
XSgnE ? {{32{1'b1}}, -RoundedTmp[31:0]} : {32'b0, RoundedTmp[31:0]};
// extract the MSB and Sign for later use (will be used to determine underflow and overflow)
assign RoundMSB = Res64 ? RoundedTmp[64] : RoundedTmp[32];
@ -176,10 +158,10 @@ module fcvt (
// check if the result overflows
assign Of = (~XSgn&($signed(ShiftCnt) >= $signed(Bits))) | (~XSgn&RoundSgn&~FOpCtrlE[2]) | (RoundMSB&(ShiftCnt==(Bits-1))) | (~XSgn&XInf) | XNaN;
assign Of = (~XSgnE&($signed(ShiftCnt) >= $signed(Bits))) | (~XSgnE&RoundSgn&~FOpCtrlE[2]) | (RoundMSB&(ShiftCnt==(Bits-1))) | (~XSgnE&XInfE) | XNaNE;
// check if the result underflows (this calculation changes if the result is signed or unsigned)
assign Uf = FOpCtrlE[2] ? XSgn&~XZero | (XSgn&XInf) | (XSgn&~XZero&(~ShiftCnt[12]|CalcPlus1)) | (ShiftCnt[12]&Plus1) : (XSgn&XInf) | (XSgn&($signed(ShiftCnt) >= $signed(Bits))) | (XSgn&~RoundSgn&~ShiftCnt[12]); // assign CvtIntRes = (XSgn | ShiftCnt[12]) ? {64{1'b0}} : (ShiftCnt >= 64) ? {64{1'b1}} : Rounded;
assign Uf = FOpCtrlE[2] ? XSgnE&~XZeroE | (XSgnE&XInfE) | (XSgnE&~XZeroE&(~ShiftCnt[12]|CalcPlus1)) | (ShiftCnt[12]&Plus1) : (XSgnE&XInfE) | (XSgnE&($signed(ShiftCnt) >= $signed(Bits))) | (XSgnE&~RoundSgn&~ShiftCnt[12]); // assign CvtIntRes = (XSgnE | ShiftCnt[12]) ? {64{1'b0}} : (ShiftCnt >= 64) ? {64{1'b1}} : Rounded;
// calculate the result's sign
assign SgnRes = ~FOpCtrlE[3] & FOpCtrlE[1];

View File

@ -31,36 +31,36 @@ module fhazard(
input logic [4:0] RdM, RdW,
input logic [2:0] FResultSelM,
output logic FStallD,
output logic [1:0] ForwardXE, ForwardYE, ForwardZE
output logic [1:0] FForwardXE, FForwardYE, FForwardZE
);
always_comb begin
// set ReadData as default
ForwardXE = 2'b00; // choose FRD1E
ForwardYE = 2'b00; // choose FRD2E
ForwardZE = 2'b00; // choose FRD3E
FForwardXE = 2'b00; // choose FRD1E
FForwardYE = 2'b00; // choose FRD2E
FForwardZE = 2'b00; // choose FRD3E
FStallD = 0;
if ((Adr1E == RdM) & FRegWriteM)
// if the result will be FResM
if(FResultSelM == 3'b100) ForwardXE = 2'b10; // choose FResM
if(FResultSelM == 3'b100) FForwardXE = 2'b10; // choose FResM
else FStallD = 1; // if the result won't be ready stall
else if ((Adr1E == RdW) & FRegWriteW) ForwardXE = 2'b01; // choose FPUResult64W
else if ((Adr1E == RdW) & FRegWriteW) FForwardXE = 2'b01; // choose FPUResult64W
if ((Adr2E == RdM) & FRegWriteM)
// if the result will be FResM
if(FResultSelM == 3'b100) ForwardYE = 2'b10; // choose FResM
if(FResultSelM == 3'b100) FForwardYE = 2'b10; // choose FResM
else FStallD = 1; // if the result won't be ready stall
else if ((Adr2E == RdW) & FRegWriteW) ForwardYE = 2'b01; // choose FPUResult64W
else if ((Adr2E == RdW) & FRegWriteW) FForwardYE = 2'b01; // choose FPUResult64W
if ((Adr3E == RdM) & FRegWriteM)
// if the result will be FResM
if(FResultSelM == 3'b100) ForwardZE = 2'b10; // choose FResM
if(FResultSelM == 3'b100) FForwardZE = 2'b10; // choose FResM
else FStallD = 1; // if the result won't be ready stall
else if ((Adr3E == RdW) & FRegWriteW) ForwardZE = 2'b01; // choose FPUResult64W
else if ((Adr3E == RdW) & FRegWriteW) FForwardZE = 2'b01; // choose FPUResult64W
end

View File

@ -3,12 +3,23 @@ module fma(
input logic reset,
input logic FlushM,
input logic StallM,
input logic [63:0] SrcXE, SrcXM, // X
input logic [63:0] SrcYE, SrcYM, // Y
input logic [63:0] SrcZE, SrcZM, // Z
input logic FmtE, FmtM, // precision 1 = double 0 = single
input logic [2:0] FOpCtrlM, FOpCtrlE, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y)
input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
input logic XSgnE, YSgnE, ZSgnE,
input logic [10:0] XExpE, YExpE, ZExpE,
input logic [51:0] XFracE, YFracE, ZFracE,
input logic XSgnM, YSgnM, ZSgnM,
input logic [10:0] XExpM, YExpM, ZExpM,
input logic [51:0] XFracM, YFracM, ZFracM,
input logic XAssumed1E, YAssumed1E, ZAssumed1E,
input logic XDenormE, YDenormE, ZDenormE,
input logic XZeroE, YZeroE, ZZeroE,
input logic XNaNM, YNaNM, ZNaNM,
input logic XSNaNM, YSNaNM, ZSNaNM,
input logic XZeroM, YZeroM, ZZeroM,
input logic XInfM, YInfM, ZInfM,
input logic [10:0] BiasE,
output logic [63:0] FMAResM,
output logic [4:0] FMAFlgM);
@ -18,24 +29,23 @@ module fma(
logic [12:0] ProdExpE, ProdExpM;
logic AddendStickyE, AddendStickyM;
logic KillProdE, KillProdM;
logic XZeroE, YZeroE, ZZeroE, XZeroM, YZeroM, ZZeroM;
logic XInfE, YInfE, ZInfE, XInfM, YInfM, ZInfM;
logic XNaNE, YNaNE, ZNaNE, XNaNM, YNaNM, ZNaNM;
fma1 fma1 (.X(SrcXE), .Y(SrcYE), .Z(SrcZE), .FOpCtrlE, .FmtE, .ProdManE, .AlignedAddendE,
.ProdExpE, .AddendStickyE, .KillProdE, .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE,
.XNaNE, .YNaNE, .ZNaNE );
fma1 fma1 (.XExpE, .YExpE, .ZExpE, .XFracE, .YFracE, .ZFracE,
.BiasE, .XAssumed1E, .YAssumed1E, .ZAssumed1E, .XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE,
.FOpCtrlE, .FmtE, .ProdManE, .AlignedAddendE,
.ProdExpE, .AddendStickyE, .KillProdE);
flopenrc #(106) EMRegFma1(clk, reset, FlushM, ~StallM, ProdManE, ProdManM);
flopenrc #(162) EMRegFma2(clk, reset, FlushM, ~StallM, AlignedAddendE, AlignedAddendM);
flopenrc #(13) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM);
flopenrc #(11) EMRegFma4(clk, reset, FlushM, ~StallM,
{AddendStickyE, KillProdE, XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE},
{AddendStickyM, KillProdM, XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM});
flopenrc #(2) EMRegFma4(clk, reset, FlushM, ~StallM,
{AddendStickyE, KillProdE},
{AddendStickyM, KillProdM});
fma2 fma2(.X(SrcXM), .Y(SrcYM), .Z(SrcZM), .FOpCtrlM, .FrmM, .FmtM,
fma2 fma2(.XSgnM, .YSgnM, .ZSgnM, .XExpM, .YExpM, .ZExpM, .XFracM, .YFracM, .ZFracM,
.FOpCtrlM, .FrmM, .FmtM,
.ProdManM, .AlignedAddendM, .ProdExpM, .AddendStickyM, .KillProdM,
.XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .ZInfM, .XNaNM, .YNaNM, .ZNaNM,
.XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .ZInfM, .XNaNM, .YNaNM, .ZNaNM, .XSNaNM, .YSNaNM, .ZSNaNM,
.FMAResM, .FMAFlgM);
endmodule
@ -43,98 +53,27 @@ endmodule
module fma1(
input logic [63:0] X, // X
input logic [63:0] Y, // Y
input logic [63:0] Z, // Z
// input logic XSgnE, YSgnE, ZSgnE,
input logic [10:0] XExpE, YExpE, ZExpE,
input logic [51:0] XFracE, YFracE, ZFracE,
input logic XAssumed1E, YAssumed1E, ZAssumed1E,
input logic XDenormE, YDenormE, ZDenormE,
input logic XZeroE, YZeroE, ZZeroE,
input logic [10:0] BiasE,
input logic [2:0] FOpCtrlE, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y)
input logic FmtE, // precision 1 = double 0 = single
output logic [105:0] ProdManE, // 1.X frac * 1.Y frac
output logic [161:0] AlignedAddendE, // Z aligned for addition
output logic [12:0] ProdExpE, // X exponent + Y exponent - bias
output logic AddendStickyE, // sticky bit that is calculated during alignment
output logic KillProdE, // set the product to zero before addition if the product is too small to matter
output logic XZeroE, YZeroE, ZZeroE, // inputs are zero
output logic XInfE, YInfE, ZInfE, // inputs are infinity
output logic XNaNE, YNaNE, ZNaNE); // inputs are NaN
output logic KillProdE // set the product to zero before addition if the product is too small to matter
);
logic [51:0] XFrac,YFrac,ZFrac; // input fraction
logic [52:0] XMan,YMan,ZMan; // input mantissa (with leading one)
logic [12:0] XExp,YExp,ZExp; // input exponents
logic XSgn,YSgn,ZSgn; // input signs
logic [12:0] AlignCnt; // how far to shift the addend to align with the product
logic [213:0] ZManShifted; // output of the alignment shifter including sticky bit
logic [213:0] ZManPreShifted; // input to the alignment shifter
logic XDenorm, YDenorm, ZDenorm; // inputs are denormal
logic [63:0] Addend; // value to add (Z or zero)
logic [12:0] Bias; // 1023 for double, 127 for single
logic XExpZero, YExpZero, ZExpZero; // input exponent zero
logic XFracZero, YFracZero, ZFracZero; // input fraction zero
logic XExpMax, YExpMax, ZExpMax; // input exponent all 1s
///////////////////////////////////////////////////////////////////////////////
// split inputs into the sign bit, fraction, and exponent to handle single or double precision
// - single precision is in the top half of the inputs
///////////////////////////////////////////////////////////////////////////////
// Set addend to zero if FMUL instruction
assign Addend = FOpCtrlE[2] ? 64'b0 : Z;
assign XSgn = X[63];
assign YSgn = Y[63];
assign ZSgn = Addend[63];
assign XExp = FmtE ? {2'b0, X[62:52]} : {5'b0, X[62:55]};
assign YExp = FmtE ? {2'b0, Y[62:52]} : {5'b0, Y[62:55]};
assign ZExp = FmtE ? {2'b0, Addend[62:52]} : {5'b0, Addend[62:55]};
assign XFrac = FmtE ? X[51:0] : {X[54:32], 29'b0};
assign YFrac = FmtE ? Y[51:0] : {Y[54:32], 29'b0};
assign ZFrac = FmtE ? Addend[51:0] : {Addend[54:32], 29'b0};
assign XMan = {~XExpZero, XFrac};
assign YMan = {~YExpZero, YFrac};
assign ZMan = {~ZExpZero, ZFrac};
assign Bias = FmtE ? 13'h3ff : 13'h7f;
///////////////////////////////////////////////////////////////////////////////
// determine if an input is a special value
///////////////////////////////////////////////////////////////////////////////
assign XExpZero = ~|XExp;
assign YExpZero = ~|YExp;
assign ZExpZero = ~|ZExp;
assign XFracZero = ~|XFrac;
assign YFracZero = ~|YFrac;
assign ZFracZero = ~|ZFrac;
assign XExpMax = FmtE ? &XExp[10:0] : &XExp[7:0];
assign YExpMax = FmtE ? &YExp[10:0] : &YExp[7:0];
assign ZExpMax = FmtE ? &ZExp[10:0] : &ZExp[7:0];
assign XNaNE = XExpMax & ~XFracZero;
assign YNaNE = YExpMax & ~YFracZero;
assign ZNaNE = ZExpMax & ~ZFracZero;
assign XDenorm = XExpZero & ~XFracZero;
assign YDenorm = YExpZero & ~YFracZero;
assign ZDenorm = ZExpZero & ~ZFracZero;
assign XInfE = XExpMax & XFracZero;
assign YInfE = YExpMax & YFracZero;
assign ZInfE = ZExpMax & ZFracZero;
assign XZeroE = XExpZero & XFracZero;
assign YZeroE = YExpZero & YFracZero;
assign ZZeroE = ZExpZero & ZFracZero;
///////////////////////////////////////////////////////////////////////////////
// Calculate the product
// - When multipliying two fp numbers, add the exponents
@ -145,11 +84,11 @@ module fma1(
// verilator lint_off WIDTH
assign ProdExpE = (XZeroE|YZeroE) ? 13'b0 :
XExp + YExp - Bias + XDenorm + YDenorm;
XExpE + YExpE - BiasE + XDenormE + YDenormE;
// Calculate the product's mantissa
// - Add the assumed one. If the number is denormalized or zero, it does not have an assumed one.
assign ProdManE = XMan * YMan;
assign ProdManE = {XAssumed1E, XFracE} * {YAssumed1E, YFracE};
@ -168,7 +107,7 @@ module fma1(
// - positive means the product is larger, so shift Z right
// - Denormal numbers have an an exponent value of 1, however they are
// represented with an exponent of 0. add one to the exponent if it is a denormal number
assign AlignCnt = ProdExpE - ZExp - ZDenorm;
assign AlignCnt = ProdExpE - ZExpE - ZDenormE;
// verilator lint_on WIDTH
@ -177,7 +116,7 @@ module fma1(
// |1'b0| addnend |
// the 1'b0 before the added is because the product's mantissa has two bits before the binary point (xx.xxxxxxxxxx...)
assign ZManPreShifted = {55'b0, ZMan, 106'b0};
assign ZManPreShifted = {55'b0, {ZAssumed1E, ZFracE}, 106'b0};
always_comb
begin
@ -187,7 +126,7 @@ module fma1(
// | addnend |
if ($signed(AlignCnt) <= $signed(-13'd56)) begin
KillProdE = 1;
ZManShifted = ZManPreShifted;//{107'b0, ZMan, 54'b0};
ZManShifted = ZManPreShifted;//{107'b0, {~ZAssumed1E, ZFrac}, 54'b0};
AddendStickyE = ~(XZeroE|YZeroE);
// If the Addend is shifted left (negitive AlignCnt)
@ -229,10 +168,10 @@ endmodule
module fma2(
input logic [63:0] X, // X
input logic [63:0] Y, // Y
input logic [63:0] Z, // Z
input logic XSgnM, YSgnM, ZSgnM,
input logic [10:0] XExpM, YExpM, ZExpM,
input logic [51:0] XFracM, YFracM, ZFracM,
input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
input logic [2:0] FOpCtrlM, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y)
input logic FmtM, // precision 1 = double 0 = single
@ -244,6 +183,7 @@ module fma2(
input logic XZeroM, YZeroM, ZZeroM, // inputs are zero
input logic XInfM, YInfM, ZInfM, // inputs are infinity
input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN
input logic XSNaNM, YSNaNM, ZSNaNM, // inputs are signaling NaNs
output logic [63:0] FMAResM, // FMA final result
output logic [4:0] FMAFlgM); // FMA flags {invalid, divide by zero, overflow, underflow, inexact}
@ -252,8 +192,6 @@ module fma2(
logic [51:0] ResultFrac; // Result fraction
logic [10:0] ResultExp; // Result exponent
logic ResultSgn; // Result sign
logic [10:0] ZExp; // input exponent
logic XSgn, YSgn, ZSgn; // input sign
logic PSgn; // product sign
logic [105:0] ProdMan2; // product being added
logic [162:0] AlignedAddend2; // possibly inverted aligned Z
@ -289,28 +227,10 @@ module fma2(
logic [63:0] XNaNResult, YNaNResult, ZNaNResult, InvalidResult, OverflowResult, KillProdResult, UnderflowResult; // possible results
///////////////////////////////////////////////////////////////////////////////
// Select input fields
// The following logic duplicates fma1 because it's cheaper to recompute than provide registers
///////////////////////////////////////////////////////////////////////////////
// Set addend to zero if FMUL instruction
assign Addend = FOpCtrlM[2] ? 64'b0 : Z;
// split inputs into the sign bit, and exponent to handle single or double precision
// - single precision is in the top half of the inputs
assign XSgn = X[63];
assign YSgn = Y[63];
assign ZSgn = Addend[63]^FOpCtrlM[0]; //Negate Z if subtraction
assign ZExp = FmtM ? Addend[62:52] : {3'b0, Addend[62:55]};
// Calculate the product's sign
// Negate product's sign if FNMADD or FNMSUB
assign PSgn = XSgn ^ YSgn ^ FOpCtrlM[1];
assign PSgn = XSgnM ^ YSgnM ^ FOpCtrlM[1];
@ -321,7 +241,7 @@ module fma2(
// Negate Z when doing one of the following opperations:
// -prod + Z
// prod - Z
assign InvZ = ZSgn ^ PSgn;
assign InvZ = ZSgnM ^ PSgn;
// Choose an inverted or non-inverted addend - the one is added later
assign AlignedAddend2 = InvZ ? ~{1'b0, AlignedAddendM} : {1'b0, AlignedAddendM};
@ -376,7 +296,7 @@ module fma2(
assign FracLen = FmtM ? 13'd52 : 13'd23;
// Determine if the result is denormal
assign SumExpTmp = KillProdM ? {2'b0, ZExp} : ProdExpM + -({4'b0, NormCnt} - 13'd56);
assign SumExpTmp = KillProdM ? {2'b0, ZExpM} : ProdExpM + -({4'b0, NormCnt} - 13'd56);
assign ResultDenorm = $signed(SumExpTmp)<=0 & ($signed(SumExpTmp)>=$signed(-FracLen)) & ~SumZero;
// Determine the shift needed for denormal results
@ -501,13 +421,13 @@ module fma2(
// Determine the sign if the sum is zero
// if cancelation then 0 unless round to -infinity
// otherwise psign
assign ZeroSgn = (PSgn^ZSgn)&~Underflow ? FrmM == 3'b010 : PSgn;
assign ZeroSgn = (PSgn^ZSgnM)&~Underflow ? FrmM == 3'b010 : PSgn;
// is the result negitive
// if p - z is the Sum negitive
// if -p + z is the Sum positive
// if -p - z then the Sum is negitive
assign ResultSgnTmp = InvZ&(ZSgn)&NegSum | InvZ&PSgn&~NegSum | ((ZSgn)&PSgn);
assign ResultSgnTmp = InvZ&(ZSgnM)&NegSum | InvZ&PSgn&~NegSum | ((ZSgnM)&PSgn);
assign ResultSgn = SumZero ? ZeroSgn : ResultSgnTmp;
@ -525,9 +445,8 @@ module fma2(
// 2) Inf - Inf (unless x or y is NaN)
// 3) 0 * Inf
assign MaxExp = FmtM ? 13'd2047 : 13'd255;
assign SigNaN = FmtM ? (XNaNM&~X[51]) | (YNaNM&~Y[51]) | (ZNaNM&~Addend[51]) :
(XNaNM&~X[54]) | (YNaNM&~Y[54]) | (ZNaNM&~Addend[54]);
assign Invalid = SigNaN | ((XInfM || YInfM) & ZInfM & (PSgn ^ ZSgn) & ~XNaNM & ~YNaNM) | (XZeroM & YInfM) | (YZeroM & XInfM);
assign SigNaN = XSNaNM | YSNaNM | ZSNaNM;
assign Invalid = SigNaN | ((XInfM || YInfM) & ZInfM & (PSgn ^ ZSgnM) & ~XNaNM & ~YNaNM) | (XZeroM & YInfM) | (YZeroM & XInfM);
// Set Overflow flag if the number is too big to be represented
// - Don't set the overflow flag if an overflowed result isn't outputed
@ -555,28 +474,28 @@ module fma2(
///////////////////////////////////////////////////////////////////////////////
// Select the result
///////////////////////////////////////////////////////////////////////////////
assign XNaNResult = FmtM ? {XSgn, X[62:52], 1'b1,X[50:0]} : {XSgn, X[62:55], 1'b1,X[53:0]};
assign YNaNResult = FmtM ? {YSgn, Y[62:52], 1'b1,Y[50:0]} : {YSgn, Y[62:55], 1'b1,Y[53:0]};
assign ZNaNResult = FmtM ? {ZSgn, Addend[62:52], 1'b1,Addend[50:0]} : {ZSgn, Addend[62:55], 1'b1,Addend[53:0]};
assign XNaNResult = FmtM ? {XSgnM, XExpM, 1'b1, XFracM[50:0]} : {{32{1'b1}}, XSgnM, XExpM[7:0], 1'b1, XFracM[50:29]};
assign YNaNResult = FmtM ? {YSgnM, YExpM, 1'b1, YFracM[50:0]} : {{32{1'b1}}, YSgnM, YExpM[7:0], 1'b1, YFracM[50:29]};
assign ZNaNResult = FmtM ? {ZSgnM, ZExpM, 1'b1, ZFracM[50:0]} : {{32{1'b1}}, ZSgnM, ZExpM[7:0], 1'b1, ZFracM[50:29]};
assign OverflowResult = FmtM ? ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, 11'h7fe, {52{1'b1}}} :
{ResultSgn, 11'h7ff, 52'b0} :
((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, 8'hfe, {23{1'b1}}, 32'b0} :
{ResultSgn, 8'hff, 55'b0};
assign InvalidResult = FmtM ? {ResultSgn, 11'h7ff, 1'b1, 51'b0} : {ResultSgn, 8'hff, 1'b1, 54'b0};
assign KillProdResult = FmtM ?{ResultSgn, Addend[62:0] - {62'b0, (Minus1&AddendStickyM)}} + {62'b0, (Plus1&AddendStickyM)} : {ResultSgn, Addend[62:32] - {30'b0, (Minus1&AddendStickyM)} + {30'b0, (Plus1&AddendStickyM)}, 32'b0};
assign UnderflowResult = FmtM ? {ResultSgn, 63'b0} + {63'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))} : {{ResultSgn, 31'b0} + {31'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))}, 32'b0};
((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{32{1'b1}}, ResultSgn, 8'hfe, {23{1'b1}}} :
{{32{1'b1}}, ResultSgn, 8'hff, 23'b0};
assign InvalidResult = FmtM ? {ResultSgn, 11'h7ff, 1'b1, 51'b0} : {{32{1'b1}}, ResultSgn, 8'hff, 1'b1, 22'b0};
assign KillProdResult = FmtM ? {ResultSgn, {ZExpM, ZFracM} - {62'b0, (Minus1&AddendStickyM)}} + {62'b0, (Plus1&AddendStickyM)} : {{32{1'b1}}, ResultSgn, {ZExpM[7:0], ZFracM[51:29]} - {30'b0, (Minus1&AddendStickyM)} + {30'b0, (Plus1&AddendStickyM)}};
assign UnderflowResult = FmtM ? {ResultSgn, 63'b0} + {63'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))} : {{32{1'b1}}, {ResultSgn, 31'b0} + {31'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))}};
assign FMAResM = XNaNM ? XNaNResult :
YNaNM ? YNaNResult :
ZNaNM ? ZNaNResult :
Invalid ? InvalidResult : // has to be before inf
XInfM ? {PSgn, X[62:0]} :
YInfM ? {PSgn, Y[62:0]} :
ZInfM ? {ZSgn, Addend[62:0]} :
XInfM ? FmtM ? {PSgn, XExpM, XFracM} : {{32{1'b1}}, PSgn, XExpM[7:0], XFracM[51:29]} :
XInfM ? FmtM ? {PSgn, YExpM, YFracM} : {{32{1'b1}}, PSgn, YExpM[7:0], YFracM[51:29]} :
XInfM ? FmtM ? {ZSgnM, ZExpM, ZFracM} : {{32{1'b1}}, ZSgnM, ZExpM[7:0], ZFracM[51:29]} :
Overflow ? OverflowResult :
KillProdM ? KillProdResult : // has to be after Underflow
Underflow & ~ResultDenorm ? UnderflowResult :
FmtM ? {ResultSgn, ResultExp, ResultFrac} :
{ResultSgn, ResultExp[7:0], ResultFrac, 3'b0};
{{32{1'b1}}, ResultSgn, ResultExp[7:0], ResultFrac[51:29]};

View File

@ -22,8 +22,8 @@
// Step 7: Put quotient/remainder onto output.
//
`timescale 1ps/1ps
module fpdiv (done, AS_Result, Flags, Denorm, op1, op2, rm, op_type, P, OvEn, UnEn,
// `timescale 1ps/1ps
module fpdiv (AS_Result, Flags, Denorm, op1, op2, rm, op_type, P, OvEn, UnEn,
start, reset, clk);
input [63:0] op1; // 1st input operand (A)
@ -40,7 +40,8 @@ module fpdiv (done, AS_Result, Flags, Denorm, op1, op2, rm, op_type, P, OvEn, Un
output [63:0] AS_Result; // Result of operation
output [4:0] Flags; // IEEE exception flags
output Denorm; // Denorm on input or output
output done;
logic done;
// output done;
supply1 vdd;
supply0 vss;

View File

@ -34,6 +34,7 @@ module fpu (
input logic [`XLEN-1:0] SrcAM, // Integer input being written into fpreg
input logic StallE, StallM, StallW,
input logic FlushE, FlushM, FlushW,
input logic [4:0] RdE, RdM, RdW,
output logic FRegWriteM,
output logic FStallD, // Stall the decode stage
output logic FWriteIntE, FWriteIntM, FWriteIntW, // Write integer register enable
@ -52,7 +53,7 @@ module fpu (
logic FmtD, FmtE, FmtM, FmtW; // FP precision 0-single 1-double
logic FDivStartD, FDivStartE; // Start division
logic FWriteIntD; // Write to integer register
logic [1:0] ForwardXE, ForwardYE, ForwardZE; // Input3 forwarding mux control signal
logic [1:0] FForwardXE, FForwardYE, FForwardZE; // Input3 forwarding mux control signal
logic [2:0] FResultSelD, FResultSelE, FResultSelM, FResultSelW; // Select FP result
logic [3:0] FOpCtrlD, FOpCtrlE, FOpCtrlM; // Select which opperation to do in each component
logic [1:0] FResSelD, FResSelE, FResSelM;
@ -60,13 +61,34 @@ module fpu (
logic [4:0] Adr1E, Adr2E, Adr3E;
// regfile signals
logic [4:0] RdE, RdM, RdW; // what adress to write to // ***Can take from ieu insted of pipelining
logic [63:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage
logic [63:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage
logic [`XLEN-1:0] SrcXMAligned;
logic [63:0] SrcXE, SrcXM; // Input 1 to the various units (after forwarding)
logic [63:0] SrcYE, SrcYM; // Input 2 to the various units (after forwarding)
logic [63:0] SrcZE, SrcZM; // Input 3 to the various units (after forwarding)
logic [`XLEN-1:0] FSrcXMAligned;
logic [63:0] FSrcXE, FSrcXM; // Input 1 to the various units (after forwarding)
logic [63:0] FSrcYE; // Input 2 to the various units (after forwarding)
logic [63:0] FSrcZE; // Input 3 to the various units (after forwarding)
// unpacking signals
logic XSgnE, YSgnE, ZSgnE;
logic [10:0] XExpE, YExpE, ZExpE;
logic [51:0] XFracE, YFracE, ZFracE;
logic XAssumed1E, YAssumed1E, ZAssumed1E;
logic XNaNE, YNaNE, ZNaNE;
logic XSNaNE, YSNaNE, ZSNaNE;
logic XDenormE, YDenormE, ZDenormE;
logic XZeroE, YZeroE, ZZeroE;
logic [10:0] BiasE;
logic XInfE, YInfE, ZInfE;
logic XExpMaxE;
logic XNormE;
logic XSgnM, YSgnM, ZSgnM;
logic [10:0] XExpM, YExpM, ZExpM;
logic [51:0] XFracM, YFracM, ZFracM;
logic XNaNM, YNaNM, ZNaNM;
logic XSNaNM, YSNaNM, ZSNaNM;
logic XZeroM, YZeroM, ZZeroM;
logic XInfM, YInfM, ZInfM;
// div/sqrt signals
logic [63:0] FDivResultM, FDivResultW;
@ -131,26 +153,28 @@ module fpu (
flopenrc #(1) DECtrlRegE1(clk, reset, FlushE, ~StallE, FDivStartD, FDivStartE);
flopenrc #(15) DECtrlRegE2(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]},
{Adr1E, Adr2E, Adr3E});
flopenrc #(22) DECtrlReg3(clk, reset, FlushE, ~StallE,
{FRegWriteD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, InstrD[11:7], FOpCtrlD, FWriteIntD},
{FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, RdE, FOpCtrlE, FWriteIntE});
flopenrc #(17) DECtrlReg3(clk, reset, FlushE, ~StallE,
{FRegWriteD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD},
{FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE});
//EXECUTION STAGE
// Hazard unit for FPU
fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FRegWriteM, .FRegWriteW, .RdM, .RdW, .FResultSelM, .FStallD,
.ForwardXE, .ForwardYE, .ForwardZE);
.FForwardXE, .FForwardYE, .FForwardZE);
// forwarding muxs
mux3 #(64) fxemux(FRD1E, FPUResultW, FResM, ForwardXE, SrcXE);
mux3 #(64) fyemux(FRD2E, FPUResultW, FResM, ForwardYE, SrcYE);
mux3 #(64) fzemux(FRD3E, FPUResultW, FResM, ForwardZE, SrcZE);
mux3 #(64) fxemux(FRD1E, FPUResultW, FResM, FForwardXE, FSrcXE);
mux3 #(64) fyemux(FRD2E, FPUResultW, FResM, FForwardYE, FSrcYE);
mux3 #(64) fzemux(FRD3E, FPUResultW, FResM, FForwardZE, FSrcZE);
unpacking unpacking(.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), .FOpCtrlE(FOpCtrlE[2:0]), .FmtE, .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XFracE, .YFracE, .ZFracE, .XAssumed1E, .YAssumed1E, .ZAssumed1E, .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .BiasE, .XInfE, .YInfE, .ZInfE, .XExpMaxE, .XNormE);
// first of two-stage instance of floating-point fused multiply-add unit
fma fma (.clk, .reset, .FlushM, .StallM,
.SrcXE, .SrcYE, .SrcZE, .SrcXM, .SrcYM, .SrcZM,
.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XFracE, .YFracE, .ZFracE, .XAssumed1E, .YAssumed1E, .ZAssumed1E, .XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .BiasE,
.XSgnM, .YSgnM, .ZSgnM, .XExpM, .YExpM, .ZExpM, .XFracM, .YFracM, .ZFracM, .XNaNM, .YNaNM, .ZNaNM, .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .ZInfM, .XSNaNM, .YSNaNM, .ZSNaNM,
// .FSrcXE, .FSrcYE, .FSrcZE, .FSrcXM, .FSrcYM, .FSrcZM,
.FOpCtrlE(FOpCtrlE[2:0]), .FOpCtrlM(FOpCtrlM[2:0]),
.FmtE, .FmtM, .FrmM, .FMAFlgM, .FMAResM);
@ -163,43 +187,50 @@ module fpu (
.ECLK(fpdivClk));
// capture the inputs for div/sqrt
flopenrc #(64) reg_input1 (.d(SrcXE), .q(DivInput1E),
flopenrc #(64) reg_input1 (.d(FSrcXE), .q(DivInput1E),
.en(~HoldInputs), .clear(FDivSqrtDoneE),
.reset(reset), .clk(clk));
flopenrc #(64) reg_input2 (.d(SrcYE), .q(DivInput2E),
flopenrc #(64) reg_input2 (.d(FSrcYE), .q(DivInput2E),
.en(~HoldInputs), .clear(FDivSqrtDoneE),
.reset(reset), .clk(clk));
fpdiv fdivsqrt (.DivOpType(FOpCtrlE[0]), .clk(fpdivClk), .FmtE(~FmtE), .DivInput1E, .DivInput2E,
.FrmE, .DivOvEn(1'b1), .DivUnEn(1'b1), .FDivStartE, .FDivResultM, .FDivSqrtFlgM,
.FDivSqrtDoneE, .FDivBusyE, .HoldInputs, .reset);
// fpdiv fdivsqrt (.DivOpType(FOpCtrlE[0]), .clk(fpdivClk), .FmtE(~FmtE), .DivInput1E, .DivInput2E,
// .FrmE, .DivOvEn(1'b1), .DivUnEn(1'b1), .FDivStartE, .FDivResultM, .FDivSqrtFlgM,
// .FDivSqrtDoneE, .FDivBusyE, .HoldInputs, .reset);
assign FDivBusyE = 0;
// first of two-stage instance of floating-point add/cvt unit
faddcvt faddcvt (.clk, .reset, .FlushM, .StallM, .FrmM, .FOpCtrlM, .FmtE, .FmtM,
.SrcXE, .SrcYE, .FOpCtrlE, .FAddResM, .FAddFlgM);
.FSrcXE, .FSrcYE, .FOpCtrlE, .FAddResM, .FAddFlgM);
// first and only instance of floating-point comparator
fcmp fcmp (SrcXE, SrcYE, FOpCtrlE[2:0], FmtE, CmpNVE, CmpResE);
fcmp fcmp (.op1({XSgnE,XExpE,XFracE}), .op2({YSgnE,YExpE,YFracE}), .FSrcXE, .FSrcYE, .FOpCtrlE(FOpCtrlE[2:0]), .FmtE, .Invalid(CmpNVE), .CmpResE, .XNaNE, .YNaNE, .XZeroE, .YZeroE);
// first and only instance of floating-point sign converter
fsgn fsgn (.SgnOpCodeE(FOpCtrlE[1:0]), .SrcXE, .SrcYE, .SgnResE, .SgnNVE);
fsgn fsgn (.SgnOpCodeE(FOpCtrlE[1:0]), .XSgnE, .YSgnE, .XExpE, .XFracE, .FmtE, .SgnResE, .SgnNVE, .XExpMaxE);
// first and only instance of floating-point classify unit
fclassify fclassify (.SrcXE, .FmtE, .ClassResE);
fclassify fclassify (.XSgnE, .XFracE, .XDenormE, .XZeroE, .XNaNE, .XInfE, .XNormE, .XSNaNE, .ClassResE);
fcvt fcvt (.X(SrcXE), .SrcAE, .FOpCtrlE, .FmtE, .FrmE, .CvtResE, .CvtFlgE);
fcvt fcvt (.XSgnE, .XExpE, .XFracE, .XAssumed1E, .XZeroE, .XNaNE, .XInfE, .XDenormE, .BiasE, .SrcAE, .FOpCtrlE, .FmtE, .FrmE, .CvtResE, .CvtFlgE);
// output for store instructions
// mux2 #(`XLEN) FWriteDataMux({{`XLEN-32{1'b0}}, SrcYE[63:32]}, SrcYE[63:64-`XLEN], FmtE, FWriteDataE);
assign FWriteDataE = SrcYE[`XLEN-1:0];
// mux2 #(`XLEN) FWriteDataMux({{`XLEN-32{1'b0}}, FSrcYE[63:32]}, FSrcYE[63:64-`XLEN], FmtE, FWriteDataE);
assign FWriteDataE = FSrcYE[`XLEN-1:0];
//*****************
// E/M pipe registers
//*****************
flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, SrcXE, SrcXM);
flopenrc #(64) EMFpReg2(clk, reset, FlushM, ~StallM, SrcYE, SrcYM);
flopenrc #(64) EMFpReg3(clk, reset, FlushM, ~StallM, SrcZE, SrcZM);
flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, FSrcXE, FSrcXM);
// flopenrc #(64) EMFpReg2(clk, reset, FlushM, ~StallM, FSrcYE, FSrcYM);
// flopenrc #(64) EMFpReg3(clk, reset, FlushM, ~StallM, FSrcZE, FSrcZM);
flopenrc #(64) EMFpReg4(clk, reset, FlushM, ~StallM, {XSgnE,XExpE,XFracE}, {XSgnM,XExpM,XFracM});
flopenrc #(64) EMFpReg5(clk, reset, FlushM, ~StallM, {YSgnE,YExpE,YFracE}, {YSgnM,YExpM,YFracM});
flopenrc #(64) EMFpReg6(clk, reset, FlushM, ~StallM, {ZSgnE,ZExpE,ZFracE}, {ZSgnM,ZExpM,ZFracM});
flopenrc #(12) EMFpReg7(clk, reset, FlushM, ~StallM,
{XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE},
{XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM});
flopenrc #(1) EMRegCmp1(clk, reset, FlushM, ~StallM, CmpNVE, CmpNVM);
@ -211,9 +242,9 @@ module fpu (
flopenrc #(64) EMRegCvt1(clk, reset, FlushM, ~StallM, CvtResE, CvtResM);
flopenrc #(5) EMRegCvt2(clk, reset, FlushM, ~StallM, CvtFlgE, CvtFlgM);
flopenrc #(22) EMCtrlReg(clk, reset, FlushM, ~StallM,
{FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, RdE, FOpCtrlE, FWriteIntE},
{FRegWriteM, FResultSelM, FResSelM, FIntResSelM, FrmM, FmtM, RdM, FOpCtrlM, FWriteIntM});
flopenrc #(17) EMCtrlReg(clk, reset, FlushM, ~StallM,
{FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE},
{FRegWriteM, FResultSelM, FResSelM, FIntResSelM, FrmM, FmtM, FOpCtrlM, FWriteIntM});
flopenrc #(64) EMRegClass(clk, reset, FlushM, ~StallM, ClassResE, ClassResM);
@ -221,8 +252,8 @@ module fpu (
mux4 #(64) FResMux(AlignedSrcAM, SgnResM, CmpResM, CvtResM, FResSelM, FResM);
mux4 #(5) FFlgMux(5'b0, {4'b0, SgnNVM}, {4'b0, CmpNVM}, CvtFlgM, FResSelM, FFlgM);
// mux2 #(`XLEN) SrcXAlignedMux({{`XLEN-32{1'b0}}, SrcXM[63:32]}, SrcXM[63:64-`XLEN], FmtM, SrcXMAligned);
mux4 #(`XLEN) IntResMux(CmpResM[`XLEN-1:0], SrcXM[`XLEN-1:0], ClassResM[`XLEN-1:0], CvtResM[`XLEN-1:0], FIntResSelM, FIntResM);
// mux2 #(`XLEN) FSrcXAlignedMux({{`XLEN-32{1'b0}}, FSrcXM[63:32]}, FSrcXM[63:64-`XLEN], FmtM, FSrcXMAligned);
mux4 #(`XLEN) IntResMux(CmpResM[`XLEN-1:0], FSrcXM[`XLEN-1:0], ClassResM[`XLEN-1:0], CvtResM[`XLEN-1:0], FIntResSelM, FIntResM);
// Align SrcA to MSB when single precicion
mux2 #(64) SrcAMux({{32{1'b1}}, SrcAM[31:0]}, {{64-`XLEN{1'b1}}, SrcAM}, FmtM, AlignedSrcAM);
@ -241,9 +272,9 @@ module fpu (
flopenrc #(64) MWRegClass2(clk, reset, FlushW, ~StallW, FResM, FResW);
flopenrc #(11) MWCtrlReg(clk, reset, FlushW, ~StallW,
{FRegWriteM, FResultSelM, RdM, FmtM, FWriteIntM},
{FRegWriteW, FResultSelW, RdW, FmtW, FWriteIntW});
flopenrc #(6) MWCtrlReg(clk, reset, FlushW, ~StallW,
{FRegWriteM, FResultSelM, FmtM, FWriteIntM},
{FRegWriteW, FResultSelW, FmtW, FWriteIntW});
//#########################################
// BEGIN WRITEBACK STAGE

View File

@ -1,30 +1,34 @@
//performs the fsgnj/fsgnjn/fsgnjx RISCV instructions
module fsgn (
input logic [63:0] SrcXE, SrcYE,
module fsgn (
input logic XSgnE, YSgnE,
input logic [10:0] XExpE,
input logic [51:0] XFracE,
input logic XExpMaxE,
input logic FmtE,
input logic [1:0] SgnOpCodeE,
output logic [63:0] SgnResE,
output logic SgnNVE);
logic AonesExp;
logic ResSgn;
//op code designation:
//
//00 - fsgnj - directly copy over sign value of SrcYE
//01 - fsgnjn - negate sign value of SrcYE
//10 - fsgnjx - XOR sign values of SrcXE & SrcYE
//00 - fsgnj - directly copy over sign value of FSrcYE
//01 - fsgnjn - negate sign value of FSrcYE
//10 - fsgnjx - XOR sign values of FSrcXE & FSrcYE
//
assign SgnResE[63] = SgnOpCodeE[1] ? (SrcXE[63] ^ SrcYE[63]) : (SrcYE[63] ^ SgnOpCodeE[0]);
assign SgnResE[62:0] = SrcXE[62:0];
assign ResSgn = SgnOpCodeE[1] ? (XSgnE ^ YSgnE) : (YSgnE ^ SgnOpCodeE[0]);
assign SgnResE = FmtE ? {ResSgn, XExpE, XFracE} : {{32{1'b1}}, ResSgn, XExpE[7:0], XFracE[51:29]};
//If the exponent is all ones, then the value is either Inf or NaN,
//both of which will produce a QNaN/SNaN value of some sort. This will
//set the invalid flag high.
assign AonesExp = SrcXE[62]&SrcXE[61]&SrcXE[60]&SrcXE[59]&SrcXE[58]&SrcXE[57]&SrcXE[56]&SrcXE[55]&SrcXE[54]&SrcXE[53]&SrcXE[52];
//the only flag that can occur during this operation is invalid
//due to changing sign on already existing NaN
assign SgnNVE = AonesExp & SgnResE[63];
assign SgnNVE = XExpMaxE & SgnResE[63];
endmodule

View File

@ -1,3 +1,5 @@
`timescale 1ps/1ps
module fsm_div (done, load_rega, load_regb, load_regc,
load_regd, load_regr, load_regs,
sel_muxa, sel_muxb, sel_muxr,

View File

@ -1,33 +1,33 @@
module sbtm (input logic [11:0] a, output logic [10:0] ia_out);
// module sbtm (input logic [11:0] a, output logic [10:0] ia_out);
// bit partitions
logic [3:0] x0;
logic [2:0] x1;
logic [3:0] x2;
logic [2:0] x2_1cmp;
// mem outputs
logic [12:0] y0;
logic [4:0] y1;
// input to CPA
logic [14:0] op1;
logic [14:0] op2;
logic [14:0] p;
// // bit partitions
// logic [3:0] x0;
// logic [2:0] x1;
// logic [3:0] x2;
// logic [2:0] x2_1cmp;
// // mem outputs
// logic [12:0] y0;
// logic [4:0] y1;
// // input to CPA
// logic [14:0] op1;
// logic [14:0] op2;
// logic [14:0] p;
assign x0 = a[10:7];
assign x1 = a[6:4];
assign x2 = a[3:0];
// assign x0 = a[10:7];
// assign x1 = a[6:4];
// assign x2 = a[3:0];
sbtm_a0 mem1 ({x0, x1}, y0);
// 1s cmp per sbtm/stam
assign x2_1cmp = x2[3] ? ~x2[2:0] : x2[2:0];
sbtm_a1 mem2 ({x0, x2_1cmp}, y1);
assign op1 = {1'b0, y0, 1'b0};
// 1s cmp per sbtm/stam
assign op2 = x2[3] ? {1'b1, {8{1'b1}}, ~y1, 1'b1} :
{1'b0, 8'b0, y1, 1'b1};
// CPA
adder #(15) cp1 (op1, op2, 1'b0, p, cout);
//assign ia_out = {p[14:4], {53{1'b0}}};
assign ia_out = p[14:4];
// sbtm_a0 mem1 ({x0, x1}, y0);
// // 1s cmp per sbtm/stam
// assign x2_1cmp = x2[3] ? ~x2[2:0] : x2[2:0];
// sbtm_a1 mem2 ({x0, x2_1cmp}, y1);
// assign op1 = {1'b0, y0, 1'b0};
// // 1s cmp per sbtm/stam
// assign op2 = x2[3] ? {1'b1, {8{1'b1}}, ~y1, 1'b1} :
// {1'b0, 8'b0, y1, 1'b1};
// // CPA
// adder #(15) cp1 (op1, op2, 1'b0, p, cout);
// //assign ia_out = {p[14:4], {53{1'b0}}};
// assign ia_out = p[14:4];
endmodule // sbtm
// endmodule // sbtm

View File

@ -1,39 +1,39 @@
module sbtm2 (input logic [11:0] a, output logic [10:0] y);
// module sbtm2 (input logic [11:0] a, output logic [10:0] y);
// bit partitions
logic [4:0] x0;
logic [2:0] x1;
logic [3:0] x2;
logic [2:0] x2_1cmp;
// mem outputs
logic [12:0] y0;
logic [5:0] y1;
// input to CPA
logic [14:0] op1;
logic [14:0] op2;
logic [14:0] p;
logic cout;
// // bit partitions
// logic [4:0] x0;
// logic [2:0] x1;
// logic [3:0] x2;
// logic [2:0] x2_1cmp;
// // mem outputs
// logic [12:0] y0;
// logic [5:0] y1;
// // input to CPA
// logic [14:0] op1;
// logic [14:0] op2;
// logic [14:0] p;
// logic cout;
assign x0 = a[11:7];
assign x1 = a[6:4];
assign x2 = a[3:0];
// assign x0 = a[11:7];
// assign x1 = a[6:4];
// assign x2 = a[3:0];
sbtm_a2 mem1 ({x0[3:0], x1}, y0);
assign op1 = {1'b0, y0, 1'b0};
// sbtm_a2 mem1 ({x0[3:0], x1}, y0);
// assign op1 = {1'b0, y0, 1'b0};
// 1s cmp per sbtm/stam
assign x2_1cmp = x2[3] ? ~x2[2:0] : x2[2:0];
sbtm_a3 mem2 ({x0, x2_1cmp}, y1);
// 1s cmp per sbtm/stam
assign op2 = x2[3] ? {{8{1'b1}}, ~y1, 1'b1} :
{8'b0, y1, 1'b1};
// // 1s cmp per sbtm/stam
// assign x2_1cmp = x2[3] ? ~x2[2:0] : x2[2:0];
// sbtm_a3 mem2 ({x0, x2_1cmp}, y1);
// // 1s cmp per sbtm/stam
// assign op2 = x2[3] ? {{8{1'b1}}, ~y1, 1'b1} :
// {8'b0, y1, 1'b1};
// CPA
bk15 cp1 (cout, p, op1, op2, 1'b0);
assign y = p[14:4];
// // CPA
// bk15 cp1 (cout, p, op1, op2, 1'b0);
// assign y = p[14:4];
endmodule // sbtm2
// endmodule // sbtm2

View File

@ -1,37 +1,37 @@
module sbtm2 (input logic [11:0] a, output logic [10:0] y);
// module sbtm2 (input logic [11:0] a, output logic [10:0] y);
// bit partitions
logic [4:0] x0;
logic [2:0] x1;
logic [3:0] x2;
logic [2:0] x2_1cmp;
// mem outputs
logic [13:0] y0;
logic [5:0] y1;
// input to CPA
logic [14:0] op1;
logic [14:0] op2;
logic [14:0] p;
// // bit partitions
// logic [4:0] x0;
// logic [2:0] x1;
// logic [3:0] x2;
// logic [2:0] x2_1cmp;
// // mem outputs
// logic [13:0] y0;
// logic [5:0] y1;
// // input to CPA
// logic [14:0] op1;
// logic [14:0] op2;
// logic [14:0] p;
assign x0 = a[11:7];
assign x1 = a[6:4];
assign x2 = a[3:0];
// assign x0 = a[11:7];
// assign x1 = a[6:4];
// assign x2 = a[3:0];
sbtm_a2 mem1 ({x0, x1}, y0);
assign op1 = {y0, 1'b0};
// sbtm_a2 mem1 ({x0, x1}, y0);
// assign op1 = {y0, 1'b0};
// 1s cmp per sbtm/stam
assign x2_1cmp = x2[3] ? ~x2[2:0] : x2[2:0];
sbtm_a3 mem2 ({x0, x2_1cmp}, y1);
// 1s cmp per sbtm/stam
assign op2 = x2[3] ? {{8{1'b1}}, ~y1, 1'b1} :
{8'b0, y1, 1'b1};
// // 1s cmp per sbtm/stam
// assign x2_1cmp = x2[3] ? ~x2[2:0] : x2[2:0];
// sbtm_a3 mem2 ({x0, x2_1cmp}, y1);
// // 1s cmp per sbtm/stam
// assign op2 = x2[3] ? {{8{1'b1}}, ~y1, 1'b1} :
// {8'b0, y1, 1'b1};
// CPA
adder #(15) cp1 (op1, op2, 1'b0, p, cout);
assign y = p[14:4];
// // CPA
// adder #(15) cp1 (op1, op2, 1'b0, p, cout);
// assign y = p[14:4];
endmodule // sbtm2
// endmodule // sbtm2

View File

@ -1,4 +1,4 @@
module sbtm_a2 (input logic [7:0] a,
module sbtm_a4 (input logic [7:0] a,
output logic [13:0] y);
always_comb
case(a)

View File

@ -1,4 +1,4 @@
module sbtm_a3 (input logic [7:0] a,
module sbtm_a5 (input logic [7:0] a,
output logic [5:0] y);
always_comb
case(a)

View File

@ -0,0 +1,77 @@
module unpacking (
input logic [63:0] X, Y, Z,
input logic FmtE,
input logic [2:0] FOpCtrlE,
output logic XSgnE, YSgnE, ZSgnE,
output logic [10:0] XExpE, YExpE, ZExpE,
output logic [51:0] XFracE, YFracE, ZFracE,
output logic XAssumed1E, YAssumed1E, ZAssumed1E,
output logic XNormE,
output logic XNaNE, YNaNE, ZNaNE,
output logic XSNaNE, YSNaNE, ZSNaNE,
output logic XDenormE, YDenormE, ZDenormE,
output logic XZeroE, YZeroE, ZZeroE,
output logic [10:0] BiasE,
output logic XInfE, YInfE, ZInfE,
output logic XExpMaxE
);
logic XFracZero, YFracZero, ZFracZero; // input fraction zero
logic XExpZero, YExpZero, ZExpZero; // input exponent zero
logic [63:0] Addend; // value to add (Z or zero)
logic YExpMaxE, ZExpMaxE; // input exponent all 1s
assign Addend = FOpCtrlE[2] ? 64'b0 : Z; // Z is only used in the FMA, and is set to Zero if a multiply opperation
assign XSgnE = FmtE ? X[63] : X[31];
assign YSgnE = FmtE ? Y[63] : Y[31];
assign ZSgnE = FmtE ? Addend[63]^FOpCtrlE[0] : Addend[31]^FOpCtrlE[0];
assign XExpE = FmtE ? X[62:52] : {3'b0, X[30:23]};
assign YExpE = FmtE ? Y[62:52] : {3'b0, Y[30:23]};
assign ZExpE = FmtE ? Addend[62:52] : {3'b0, Addend[30:23]};
assign XFracE = FmtE ? X[51:0] : {X[22:0], 29'b0};
assign YFracE = FmtE ? Y[51:0] : {Y[22:0], 29'b0};
assign ZFracE = FmtE ? Addend[51:0] : {Addend[22:0], 29'b0};
assign XAssumed1E = |XExpE;
assign YAssumed1E = |YExpE;
assign ZAssumed1E = |ZExpE;
assign XExpZero = ~XAssumed1E;
assign YExpZero = ~YAssumed1E;
assign ZExpZero = ~ZAssumed1E;
assign XFracZero = ~|XFracE;
assign YFracZero = ~|YFracE;
assign ZFracZero = ~|ZFracE;
assign XExpMaxE = FmtE ? &XExpE[10:0] : &XExpE[7:0];
assign YExpMaxE = FmtE ? &YExpE[10:0] : &YExpE[7:0];
assign ZExpMaxE = FmtE ? &ZExpE[10:0] : &ZExpE[7:0];
assign XNormE = ~(XExpMaxE|XExpZero);
assign XNaNE = XExpMaxE & ~XFracZero;
assign YNaNE = YExpMaxE & ~YFracZero;
assign ZNaNE = ZExpMaxE & ~ZFracZero;
assign XSNaNE = XNaNE&~XExpE[51];
assign YSNaNE = YNaNE&~YExpE[51];
assign ZSNaNE = ZNaNE&~ZExpE[51];
assign XDenormE = XExpZero & ~XFracZero;
assign YDenormE = YExpZero & ~YFracZero;
assign ZDenormE = ZExpZero & ~ZFracZero;
assign XInfE = XExpMaxE & XFracZero;
assign YInfE = YExpMaxE & YFracZero;
assign ZInfE = ZExpMaxE & ZFracZero;
assign XZeroE = XExpZero & XFracZero;
assign YZeroE = YExpZero & YFracZero;
assign ZZeroE = ZExpZero & ZFracZero;
assign BiasE = FmtE ? 13'h3ff : 13'h7f;
endmodule

View File

@ -42,6 +42,7 @@ module ieu (
output logic MulDivE, W64E,
output logic [2:0] Funct3E,
output logic [`XLEN-1:0] SrcAE, SrcBE,
output logic [4:0] RdE,
input logic FWriteIntM,
// Memory stage interface
@ -53,12 +54,14 @@ module ieu (
output logic [2:0] Funct3M, // size and signedness to LSU
output logic [`XLEN-1:0] SrcAM, // to privilege and fpu
output logic [4:0] RdM,
input logic DataAccessFaultM,
input logic [`XLEN-1:0] FIntResM,
// Writeback stage
input logic [`XLEN-1:0] CSRReadValW, ReadDataW, MulDivResultW,
input logic FWriteIntW,
output logic [4:0] RdW,
// input logic [`XLEN-1:0] PCLinkW,
output logic InstrValidM,
// hazards
@ -82,7 +85,7 @@ module ieu (
logic InstrValidW;
// forwarding signals
logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E, RdE, RdM, RdW;
logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E;
logic [1:0] ForwardAE, ForwardBE;
logic RegWriteM, RegWriteW;
logic MemReadE, CSRReadE;

View File

@ -94,6 +94,7 @@ module wallypipelinedhart
// floating point unit signals
logic [2:0] FRM_REGW;
logic [1:0] FMemRWM, FMemRWE;
logic [4:0] RdE, RdM, RdW;
logic FStallD;
logic FWriteIntE, FWriteIntM, FWriteIntW;
logic [`XLEN-1:0] FWriteDataE;

View File

@ -57,12 +57,8 @@ module testbench();
string tests32f[] = '{
"rv32f/I-FADD-S-01", "2000",
"rv32f/I-FCLASS-S-01", "2000",
"rv32f/I-FCVT-S-L-01", "2000",
"rv32f/I-FCVT-S-LU-01", "2000",
"rv32f/I-FCVT-S-W-01", "2000",
"rv32f/I-FCVT-S-WU-01", "2000",
"rv32f/I-FCVT-L-S-01", "2000",
"rv32f/I-FCVT-LU-S-01", "2000",
"rv32f/I-FCVT-W-S-01", "2000",
"rv32f/I-FCVT-WU-S-01", "2000",
// "rv32f/I-FDIV-S-01", "2000",