mirror of
https://github.com/openhwgroup/cvw
synced 2025-02-11 06:05:49 +00:00
renamed top level FPU wires
This commit is contained in:
parent
33cd133a65
commit
e7190b0690
@ -6,16 +6,15 @@ module fctrl (
|
||||
input logic [2:0] Funct3D,
|
||||
input logic [2:0] FRM_REGW,
|
||||
output logic IllegalFPUInstrD,
|
||||
output logic FRegWriteD,
|
||||
output logic DivSqrtStartD,
|
||||
//output logic [2:0] regSelD,
|
||||
output logic FWriteEnD,
|
||||
output logic FDivStartD,
|
||||
output logic [2:0] FResultSelD,
|
||||
output logic [3:0] OpCtrlD,
|
||||
output logic [3:0] FOpCtrlD,
|
||||
output logic FmtD,
|
||||
output logic [2:0] FrmD,
|
||||
output logic [1:0] FMemRWD,
|
||||
output logic OutputInput2D,
|
||||
output logic In2UsedD, In3UsedD,
|
||||
output logic FOutputInput2D,
|
||||
output logic FInput2UsedD, FInput3UsedD,
|
||||
output logic FWriteIntD);
|
||||
|
||||
|
||||
@ -102,9 +101,9 @@ module fctrl (
|
||||
end
|
||||
end
|
||||
|
||||
assign OutputInput2D = OpD == 7'b0100111;
|
||||
assign FOutputInput2D = OpD == 7'b0100111;
|
||||
|
||||
assign FMemRWD[0] = OutputInput2D;
|
||||
assign FMemRWD[0] = FOutputInput2D;
|
||||
assign FMemRWD[1] = OpD == 7'b0000111;
|
||||
|
||||
|
||||
@ -131,7 +130,7 @@ module fctrl (
|
||||
//this value is used enough to be shorthand
|
||||
|
||||
//if op is div/sqrt - start div/sqrt
|
||||
assign DivSqrtStartD = ~|FResultSelD; // is FResultSelD == 000
|
||||
assign FDivStartD = ~|FResultSelD; // is FResultSelD == 000
|
||||
|
||||
//operation control for each fp operation
|
||||
//has to be expanded over standard to account for
|
||||
@ -144,7 +143,7 @@ module fctrl (
|
||||
//version I used for this repo
|
||||
|
||||
//let's do separate SOP for each type of operation
|
||||
// assign OpCtrlD[3] = 1'b0;
|
||||
// assign FOpCtrlD[3] = 1'b0;
|
||||
//
|
||||
//
|
||||
|
||||
@ -152,12 +151,12 @@ module fctrl (
|
||||
|
||||
always_comb begin
|
||||
IllegalFPUInstr1D = 0;
|
||||
In3UsedD = 0;
|
||||
FInput3UsedD = 0;
|
||||
case (FResultSelD)
|
||||
// div/sqrt
|
||||
// fdiv = ???0
|
||||
// fsqrt = ???1
|
||||
3'b000 : begin OpCtrlD = {3'b0, Funct7D[5]}; In2UsedD = ~Funct7D[5]; end
|
||||
3'b000 : begin FOpCtrlD = {3'b0, Funct7D[5]}; FInput2UsedD = ~Funct7D[5]; end
|
||||
// cmp
|
||||
// fmin = ?100
|
||||
// fmax = ?101
|
||||
@ -165,7 +164,7 @@ module fctrl (
|
||||
// flt = ?001
|
||||
// fle = ?011
|
||||
// {?, is min or max, is eq or le, is lt or le}
|
||||
3'b001 : begin OpCtrlD = {1'b0, Funct7D[2], ~Funct3D[0], ~(|Funct3D[2:1])}; In2UsedD = 1'b1; end
|
||||
3'b001 : begin FOpCtrlD = {1'b0, Funct7D[2], ~Funct3D[0], ~(|Funct3D[2:1])}; FInput2UsedD = 1'b1; end
|
||||
//fma/mult
|
||||
// fmadd = ?000
|
||||
// fmsub = ?001
|
||||
@ -173,12 +172,12 @@ module fctrl (
|
||||
// fnmsub = ?011
|
||||
// fmul = ?100
|
||||
// {?, is mul, is negitive, is sub}
|
||||
3'b010 : begin OpCtrlD = {1'b0, OpD[4:2]}; In2UsedD = 1'b1; In3UsedD = ~OpD[4]; end
|
||||
3'b010 : begin FOpCtrlD = {1'b0, OpD[4:2]}; FInput2UsedD = 1'b1; FInput3UsedD = ~OpD[4]; end
|
||||
// sgn inj
|
||||
// fsgnj = ??00
|
||||
// fsgnjn = ??01
|
||||
// fsgnjx = ??10
|
||||
3'b011 : begin OpCtrlD = {2'b0, Funct3D[1:0]}; In2UsedD = 1'b1; end
|
||||
3'b011 : begin FOpCtrlD = {2'b0, Funct3D[1:0]}; FInput2UsedD = 1'b1; end
|
||||
// add/sub/cnvt
|
||||
// fadd = 0000
|
||||
// fsub = 0001
|
||||
@ -193,13 +192,13 @@ module fctrl (
|
||||
// fcvt.d.wu = 1111
|
||||
// fcvt.d.s = 1000
|
||||
// { is double and not add/sub, is to/from int, is to int or float to double, is unsigned or sub
|
||||
3'b100 : begin OpCtrlD = {Funct7D[0]&Funct7D[5], Funct7D[6], Funct7D[3] | (~Funct7D[6]&Funct7D[5]&~Funct7D[0]), Rs2D[0]|(Funct7D[2]&~Funct7D[5])}; In2UsedD = ~Funct7D[5]; end
|
||||
3'b100 : begin FOpCtrlD = {Funct7D[0]&Funct7D[5], Funct7D[6], Funct7D[3] | (~Funct7D[6]&Funct7D[5]&~Funct7D[0]), Rs2D[0]|(Funct7D[2]&~Funct7D[5])}; FInput2UsedD = ~Funct7D[5]; end
|
||||
// classify {?, ?, ?, ?}
|
||||
3'b101 : begin OpCtrlD = 4'b0; In2UsedD = 1'b0; end
|
||||
3'b101 : begin FOpCtrlD = 4'b0; FInput2UsedD = 1'b0; end
|
||||
// output SrcAW
|
||||
// fmv.w.x = ???0
|
||||
// fmv.w.d = ???1
|
||||
3'b110 : begin OpCtrlD = {3'b0, Funct7D[0]}; In2UsedD = 1'b0; end
|
||||
3'b110 : begin FOpCtrlD = {3'b0, Funct7D[0]}; FInput2UsedD = 1'b0; end
|
||||
// output Input1
|
||||
// flw = ?000
|
||||
// fld = ?001
|
||||
@ -207,9 +206,9 @@ module fctrl (
|
||||
// fsd = ?011 // output Input2
|
||||
// fmv.x.w = ?100
|
||||
// fmv.x.d = ?101
|
||||
// {?, is mv, is store, is double or fcvt.d.w}
|
||||
3'b111 : begin OpCtrlD = {1'b0, OpD[6:5], Funct3D[0] | (OpD[6]&Funct7D[0])}; In2UsedD = OpD[5]; end
|
||||
default : begin OpCtrlD = 4'b0; IllegalFPUInstr1D = 1'b1; In2UsedD = 1'b0; end
|
||||
// {?, is mv, is store, is double or fmv}
|
||||
3'b111 : begin FOpCtrlD = {1'b0, OpD[6:5], Funct3D[0] | (OpD[6]&Funct7D[0])}; FInput2UsedD = OpD[5]; end
|
||||
default : begin FOpCtrlD = 4'b0; IllegalFPUInstr1D = 1'b1; FInput2UsedD = 1'b0; end
|
||||
endcase
|
||||
end
|
||||
|
||||
@ -219,5 +218,5 @@ module fctrl (
|
||||
// is add/cvt and is to int or is classify or is cmp and not max/min or is output ReadData1 and is mv
|
||||
assign FWriteIntD = ((FResultSelD == 3'b100)&Funct7D[3]) | (FResultSelD == 3'b101) | ((FResultSelD == 3'b001)&~Funct7D[2]) | ((FResultSelD == 3'b111)&OpD[6]);
|
||||
// if not writting to int reg and not a store function and not move
|
||||
assign FRegWriteD = ~FWriteIntD & ~OpD[5] & ~((FResultSelD == 3'b111)&OpD[6]) & isFP;
|
||||
assign FWriteEnD = ~FWriteIntD & ~OpD[5] & ~((FResultSelD == 3'b111)&OpD[6]) & isFP;
|
||||
endmodule
|
||||
|
@ -15,13 +15,13 @@
|
||||
// normalize Normalization shifter
|
||||
// round Rounding of result
|
||||
// exception Handles exceptional cases
|
||||
// bypass Handles bypass of result to Input1E or Input3E inputs
|
||||
// bypass Handles bypass of result to FInput1E or FInput3E inputs
|
||||
// sign One bit sign handling block
|
||||
// special Catch special cases (inputs = 0 / infinity / etc.)
|
||||
//
|
||||
// The FMAC computes FmaResultM=Input1E*Input2E+Input3E, rounded with the mode specified by
|
||||
// The FMAC computes FmaResultM=FInput1E*FInput2E+FInput3E, rounded with the mode specified by
|
||||
// RN, RZ, RM, or RP. The result is optionally bypassed back to
|
||||
// the Input1E or Input3E inputs for use on the next cycle. In addition, four signals
|
||||
// the FInput1E or FInput3E inputs for use on the next cycle. In addition, four signals
|
||||
// are produced: trap, overflow, underflow, and inexact. Trap indicates
|
||||
// an infinity, NaN, or denormalized number to be handled in software;
|
||||
// the other three signals are IEEE flags.
|
||||
@ -29,15 +29,15 @@
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
module fma1(Input1E, Input2E, Input3E, FrmE,
|
||||
module fma1(FInput1E, FInput2E, FInput3E, FrmE,
|
||||
rE, sE, tE, bsE, killprodE, sumshiftE, sumshiftzeroE, aligncntE, aeE
|
||||
, xzeroE, yzeroE, zzeroE, xnanE,ynanE, znanE, xdenormE, ydenormE, zdenormE,
|
||||
xinfE, yinfE, zinfE, nanE, prodinfE);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input logic [63:0] Input1E; // input 1
|
||||
input logic [63:0] Input2E; // input 2
|
||||
input logic [63:0] Input3E; // input 3
|
||||
input logic [63:0] FInput1E; // input 1
|
||||
input logic [63:0] FInput2E; // input 2
|
||||
input logic [63:0] FInput3E; // input 3
|
||||
input logic [2:0] FrmE; // Rounding mode
|
||||
output logic [12:0] aligncntE; // status flags
|
||||
output logic [105:0] rE; // one result of partial product sum
|
||||
@ -45,7 +45,7 @@ module fma1(Input1E, Input2E, Input3E, FrmE,
|
||||
output logic [163:0] tE; // output logic of alignment shifter
|
||||
output logic [12:0] aeE; // multiplier expoent
|
||||
output logic bsE; // sticky bit of addend
|
||||
output logic killprodE; // Input3E >> product
|
||||
output logic killprodE; // FInput3E >> product
|
||||
output logic xzeroE;
|
||||
output logic yzeroE;
|
||||
output logic zzeroE;
|
||||
@ -68,7 +68,7 @@ module fma1(Input1E, Input2E, Input3E, FrmE,
|
||||
// output logic [12:0] aligncntE; // shift count for alignment
|
||||
|
||||
|
||||
logic prodof; // Input1E*Input2E out of range
|
||||
logic prodof; // FInput1E*FInput2E out of range
|
||||
|
||||
|
||||
|
||||
@ -84,12 +84,12 @@ module fma1(Input1E, Input2E, Input3E, FrmE,
|
||||
|
||||
// Instantiate fraction datapath
|
||||
|
||||
multiply multiply(.xman(Input1E[51:0]), .yman(Input2E[51:0]), .*);
|
||||
align align(.zman(Input3E[51:0]),.*);
|
||||
multiply multiply(.xman(FInput1E[51:0]), .yman(FInput2E[51:0]), .*);
|
||||
align align(.zman(FInput3E[51:0]),.*);
|
||||
|
||||
// Instantiate exponent datapath
|
||||
|
||||
expgen1 expgen1(.xexp(Input1E[62:52]),.yexp(Input2E[62:52]),.zexp(Input3E[62:52]),.*);
|
||||
expgen1 expgen1(.xexp(FInput1E[62:52]),.yexp(FInput2E[62:52]),.zexp(FInput3E[62:52]),.*);
|
||||
// Instantiate special case detection across datapath & exponent path
|
||||
|
||||
special special(.*);
|
||||
|
@ -15,13 +15,13 @@
|
||||
// normalize Normalization shifter
|
||||
// round Rounding of result
|
||||
// exception Handles exceptional cases
|
||||
// bypass Handles bypass of result to Input1M or Input3M input logics
|
||||
// bypass Handles bypass of result to FInput1M or FInput3M input logics
|
||||
// sign One bit sign handling block
|
||||
// special Catch special cases (input logics = 0 / infinity / etc.)
|
||||
//
|
||||
// The FMAC computes FmaResultM=Input1M*Input2M+Input3M, rounded with the mode specified by
|
||||
// The FMAC computes FmaResultM=FInput1M*FInput2M+FInput3M, rounded with the mode specified by
|
||||
// RN, RZ, RM, or RP. The result is optionally bypassed back to
|
||||
// the Input1M or Input3M input logics for use on the next cycle. In addition, four signals
|
||||
// the FInput1M or FInput3M input logics for use on the next cycle. In addition, four signals
|
||||
// are produced: trap, overflow, underflow, and inexact. Trap indicates
|
||||
// an infinity, NaN, or denormalized number to be handled in software;
|
||||
// the other three signals are IMMM flags.
|
||||
@ -29,7 +29,7 @@
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
module fma2(Input1M, Input2M, Input3M, FrmM,
|
||||
module fma2(FInput1M, FInput2M, FInput3M, FrmM,
|
||||
FmaResultM, FmaFlagsM, aligncntM, rM, sM,
|
||||
tM, normcntM, aeM, bsM,killprodM,
|
||||
xzeroM, yzeroM,zzeroM,xdenormM,ydenormM,
|
||||
@ -39,9 +39,9 @@ module fma2(Input1M, Input2M, Input3M, FrmM,
|
||||
);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input logic [63:0] Input1M; // input logic 1
|
||||
input logic [63:0] Input2M; // input logic 2
|
||||
input logic [63:0] Input3M; // input logic 3
|
||||
input logic [63:0] FInput1M; // input logic 1
|
||||
input logic [63:0] FInput2M; // input logic 2
|
||||
input logic [63:0] FInput3M; // input logic 3
|
||||
input logic [2:0] FrmM; // Rounding mode
|
||||
input logic [12:0] aligncntM; // status flags
|
||||
input logic [105:0] rM; // one result of partial product sum
|
||||
@ -50,7 +50,7 @@ module fma2(Input1M, Input2M, Input3M, FrmM,
|
||||
input logic [8:0] normcntM; // shift count for normalizer
|
||||
input logic [12:0] aeM; // multiplier expoent
|
||||
input logic bsM; // sticky bit of addend
|
||||
input logic killprodM; // Input3M >> product
|
||||
input logic killprodM; // FInput3M >> product
|
||||
input logic prodinfM;
|
||||
input logic xzeroM;
|
||||
input logic yzeroM;
|
||||
@ -69,7 +69,7 @@ module fma2(Input1M, Input2M, Input3M, FrmM,
|
||||
input logic sumshiftzeroM;
|
||||
|
||||
|
||||
output logic [63:0] FmaResultM; // output FmaResultM=Input1M*Input2M+Input3M
|
||||
output logic [63:0] FmaResultM; // output FmaResultM=FInput1M*FInput2M+FInput3M
|
||||
output logic [4:0] FmaFlagsM; // status flags
|
||||
|
||||
|
||||
@ -120,18 +120,18 @@ module fma2(Input1M, Input2M, Input3M, FrmM,
|
||||
|
||||
add add(.*);
|
||||
lza lza(.*);
|
||||
normalize normalize(.zexp(Input3M[62:52]),.*);
|
||||
round round(.xman(Input1M[51:0]), .yman(Input2M[51:0]),.zman(Input3M[51:0]),.*);
|
||||
normalize normalize(.zexp(FInput3M[62:52]),.*);
|
||||
round round(.xman(FInput1M[51:0]), .yman(FInput2M[51:0]),.zman(FInput3M[51:0]),.*);
|
||||
|
||||
// Instantiate exponent datapath
|
||||
|
||||
expgen2 expgen2(.xexp(Input1M[62:52]),.yexp(Input2M[62:52]),.zexp(Input3M[62:52]),.*);
|
||||
expgen2 expgen2(.xexp(FInput1M[62:52]),.yexp(FInput2M[62:52]),.zexp(FInput3M[62:52]),.*);
|
||||
|
||||
|
||||
// Instantiate control logic
|
||||
|
||||
sign sign(.xsign(Input1M[63]),.ysign(Input2M[63]),.zsign(Input3M[63]),.*);
|
||||
flag2 flag2(.xsign(Input1M[63]),.ysign(Input2M[63]),.zsign(Input3M[63]),.vbits(v[1:0]),.*);
|
||||
sign sign(.xsign(FInput1M[63]),.ysign(FInput2M[63]),.zsign(FInput3M[63]),.*);
|
||||
flag2 flag2(.xsign(FInput1M[63]),.ysign(FInput2M[63]),.zsign(FInput3M[63]),.vbits(v[1:0]),.*);
|
||||
|
||||
assign FmaResultM = {wsign,wexp,wman};
|
||||
|
||||
|
@ -23,25 +23,25 @@
|
||||
//
|
||||
|
||||
// `timescale 1ps/1ps
|
||||
module fpdiv (DivSqrtDone, DivResultM, DivFlagsM, DivDenormM, DivOp1, DivOp2, DivFrm, DivOpType, DivP, DivOvEn, DivUnEn,
|
||||
DivStart, reset, clk, DivBusyM);
|
||||
module fpdiv (FDivSqrtDoneM, FDivResultM, FDivFlagsM, DivDenormM, FInput1E, FInput2E, FrmE, DivOpType, FmtE, DivOvEn, DivUnEn,
|
||||
FDivStartE, reset, clk, DivBusyM);
|
||||
|
||||
input [63:0] DivOp1; // 1st input operand (A)
|
||||
input [63:0] DivOp2; // 2nd input operand (B)
|
||||
input [2:0] DivFrm; // Rounding mode - specify values
|
||||
input [63:0] FInput1E; // 1st input operand (A)
|
||||
input [63:0] FInput2E; // 2nd input operand (B)
|
||||
input [2:0] FrmE; // Rounding mode - specify values
|
||||
input DivOpType; // Function opcode
|
||||
input DivP; // Result Precision (0 for double, 1 for single)
|
||||
input FmtE; // Result Precision (0 for double, 1 for single)
|
||||
input DivOvEn; // Overflow trap enabled
|
||||
input DivUnEn; // Underflow trap enabled
|
||||
|
||||
input DivStart;
|
||||
input FDivStartE;
|
||||
input reset;
|
||||
input clk;
|
||||
|
||||
output [63:0] DivResultM; // Result of operation
|
||||
output [4:0] DivFlagsM; // IEEE exception flags
|
||||
output [63:0] FDivResultM; // Result of operation
|
||||
output [4:0] FDivFlagsM; // IEEE exception flags
|
||||
output DivDenormM; // DivDenormM on input or output
|
||||
output DivSqrtDone;
|
||||
output FDivSqrtDoneM;
|
||||
output DivBusyM;
|
||||
|
||||
supply1 vdd;
|
||||
@ -94,16 +94,16 @@ module fpdiv (DivSqrtDone, DivResultM, DivFlagsM, DivDenormM, DivOp1, DivOp2, Di
|
||||
|
||||
logic exp_cout1, exp_cout2, exp_odd, open;
|
||||
// Convert the input operands to their appropriate forms based on
|
||||
// the orignal operands, the DivOpType , and their precision DivP.
|
||||
// the orignal operands, the DivOpType , and their precision FmtE.
|
||||
// Single precision inputs are converted to double precision
|
||||
// and the sign of the first operand is set appropratiately based on
|
||||
// if the operation is absolute value or negation.
|
||||
convert_inputs_div divconv1 (Float1, Float2, DivOp1, DivOp2, DivOpType, DivP);
|
||||
convert_inputs_div divconv1 (Float1, Float2, FInput1E, FInput2E, DivOpType, FmtE);
|
||||
|
||||
// Test for exceptions and return the "Invalid Operation" and
|
||||
// "Denormalized" Input DivFlagsM. The "sel_inv" is used in
|
||||
// "Denormalized" Input FDivFlagsM. The "sel_inv" is used in
|
||||
// the third pipeline stage to select the result. Also, op1_Norm
|
||||
// and op2_Norm are one if DivOp1 and DivOp2 are not zero or denormalized.
|
||||
// and op2_Norm are one if FInput1E and FInput2E are not zero or denormalized.
|
||||
// sub is one if the effective operation is subtaction.
|
||||
exception_div divexc1 (sel_inv, Invalid, DenormIn, op1_Norm, op2_Norm,
|
||||
Float1, Float2, DivOpType);
|
||||
@ -135,26 +135,26 @@ module fpdiv (DivSqrtDone, DivResultM, DivFlagsM, DivDenormM, DivOp1, DivOp2, Di
|
||||
sel_muxa, sel_muxb, sel_muxr,
|
||||
reset, clk,
|
||||
load_rega, load_regb, load_regc, load_regd,
|
||||
load_regr, load_regs, DivP, DivOpType, exp_odd);
|
||||
load_regr, load_regs, FmtE, DivOpType, exp_odd);
|
||||
|
||||
// FSM : control divider
|
||||
fsm control (DivSqrtDone, load_rega, load_regb, load_regc, load_regd,
|
||||
fsm control (FDivSqrtDoneM, load_rega, load_regb, load_regc, load_regd,
|
||||
load_regr, load_regs, sel_muxa, sel_muxb, sel_muxr,
|
||||
clk, reset, DivStart, DivOpType, DivBusyM);
|
||||
clk, reset, FDivStartE, DivOpType, DivBusyM);
|
||||
|
||||
// Round the mantissa to a 52-bit value, with the leading one
|
||||
// removed. The rounding units also handles special cases and
|
||||
// set the exception flags.
|
||||
//***add max magnitude and swap negitive and positive infinity
|
||||
rounder_div divround1 (Result, DenormIO, FlagsIn,
|
||||
DivFrm, DivP, DivOvEn, DivUnEn, expF,
|
||||
FrmE, FmtE, DivOvEn, DivUnEn, expF,
|
||||
sel_inv, Invalid, DenormIn, signResult,
|
||||
q1, qm1, qp1, q0, qm0, qp0, regr_out);
|
||||
|
||||
// Store the final result and the exception flags in registers.
|
||||
flopenr #(64) rega (clk, reset, DivSqrtDone, Result, DivResultM);
|
||||
flopenr #(1) regb (clk, reset, DivSqrtDone, DenormIO, DivDenormM);
|
||||
flopenr #(5) regc (clk, reset, DivSqrtDone, FlagsIn, DivFlagsM);
|
||||
flopenr #(64) rega (clk, reset, FDivSqrtDoneM, Result, FDivResultM);
|
||||
flopenr #(1) regb (clk, reset, FDivSqrtDoneM, DenormIO, DivDenormM);
|
||||
flopenr #(5) regc (clk, reset, FDivSqrtDoneM, FlagsIn, FDivFlagsM);
|
||||
|
||||
endmodule // fpadd
|
||||
|
||||
@ -198,7 +198,7 @@ module brent_kung (c, p, g);
|
||||
logic G_7_0,G_11_0,G_5_0,G_9_0,G_13_0,G_2_0,G_4_0,G_6_0,G_8_0,G_10_0,G_12_0;
|
||||
// parallel-prefix, Brent-Kung
|
||||
|
||||
// Stage 1: Generates G/DivP pairs that span 1 bits
|
||||
// Stage 1: Generates G/FmtE pairs that span 1 bits
|
||||
grey b_1_0 (G_1_0, {g[1],g[0]}, p[1]);
|
||||
black b_3_2 (G_3_2, P_3_2, {g[3],g[2]}, {p[3],p[2]});
|
||||
black b_5_4 (G_5_4, P_5_4, {g[5],g[4]}, {p[5],p[4]});
|
||||
@ -207,20 +207,20 @@ module brent_kung (c, p, g);
|
||||
black b_11_10 (G_11_10, P_11_10, {g[11],g[10]}, {p[11],p[10]});
|
||||
black b_13_12 (G_13_12, P_13_12, {g[13],g[12]}, {p[13],p[12]});
|
||||
|
||||
// Stage 2: Generates G/DivP pairs that span 2 bits
|
||||
// Stage 2: Generates G/FmtE pairs that span 2 bits
|
||||
grey g_3_0 (G_3_0, {G_3_2,G_1_0}, P_3_2);
|
||||
black b_7_4 (G_7_4, P_7_4, {G_7_6,G_5_4}, {P_7_6,P_5_4});
|
||||
black b_11_8 (G_11_8, P_11_8, {G_11_10,G_9_8}, {P_11_10,P_9_8});
|
||||
|
||||
// Stage 3: Generates G/DivP pairs that span 4 bits
|
||||
// Stage 3: Generates G/FmtE pairs that span 4 bits
|
||||
grey g_7_0 (G_7_0, {G_7_4,G_3_0}, P_7_4);
|
||||
|
||||
// Stage 4: Generates G/DivP pairs that span 8 bits
|
||||
// Stage 4: Generates G/FmtE pairs that span 8 bits
|
||||
|
||||
// Stage 5: Generates G/DivP pairs that span 4 bits
|
||||
// Stage 5: Generates G/FmtE pairs that span 4 bits
|
||||
grey g_11_0 (G_11_0, {G_11_8,G_7_0}, P_11_8);
|
||||
|
||||
// Stage 6: Generates G/DivP pairs that span 2 bits
|
||||
// Stage 6: Generates G/FmtE pairs that span 2 bits
|
||||
grey g_5_0 (G_5_0, {G_5_4,G_3_0}, P_5_4);
|
||||
grey g_9_0 (G_9_0, {G_9_8,G_7_0}, P_9_8);
|
||||
grey g_13_0 (G_13_0, {G_13_12,G_11_0}, P_13_12);
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -27,16 +27,15 @@
|
||||
//
|
||||
|
||||
|
||||
module fpuaddcvt1 (sum, sum_tc, sel_inv, exponent_postsum, corr_sign, op1_Norm, op2_Norm, opA_Norm, opB_Norm, Invalid, DenormIn, convert, swap, normal_overflow, signA, Float1, Float2, exp1_denorm, exp2_denorm, exponent, op1, op2, rm, op_type, Pin);
|
||||
module fpuaddcvt1 (AddSumE, AddSumTcE, AddSelInvE, AddExpPostSumE, AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE, AddDenormInE, AddConvertE, AddSwapE, AddNormOvflowE, AddSignAE, AddFloat1E, AddFloat2E, AddExp1DenormE, AddExp2DenormE, AddExponentE, FInput1E, FInput2E, FOpCtrlE, FmtE);
|
||||
|
||||
input logic [63:0] op1; // 1st input operand (A)
|
||||
input logic [63:0] op2; // 2nd input operand (B)
|
||||
input logic [2:0] rm; // Rounding mode - specify values
|
||||
input logic [3:0] op_type; // Function opcode
|
||||
input logic Pin; // Result Precision (1 for double, 0 for single)
|
||||
input logic [63:0] FInput1E; // 1st input operand (A)
|
||||
input logic [63:0] FInput2E; // 2nd input operand (B)
|
||||
input logic [3:0] FOpCtrlE; // Function opcode
|
||||
input logic FmtE; // Result Precision (1 for double, 0 for single)
|
||||
|
||||
wire P;
|
||||
assign P = ~Pin | op_type[2];
|
||||
assign P = ~FmtE | FOpCtrlE[2];
|
||||
|
||||
wire [63:0] IntValue;
|
||||
wire [11:0] exp1, exp2;
|
||||
@ -54,44 +53,44 @@ module fpuaddcvt1 (sum, sum_tc, sel_inv, exponent_postsum, corr_sign, op1_Norm,
|
||||
wire zeroB;
|
||||
wire [5:0] align_shift;
|
||||
|
||||
output logic [63:0] Float1;
|
||||
output logic [63:0] Float2;
|
||||
output logic [10:0] exponent;
|
||||
output logic [10:0] exponent_postsum;
|
||||
output logic [11:0] exp1_denorm, exp2_denorm;//KEP used to be [10:0]
|
||||
output logic [63:0] sum, sum_tc;
|
||||
output logic [3:0] sel_inv;
|
||||
output logic corr_sign;
|
||||
output logic signA;
|
||||
output logic op1_Norm, op2_Norm;
|
||||
output logic opA_Norm, opB_Norm;
|
||||
output logic Invalid;
|
||||
output logic DenormIn;
|
||||
output logic [63:0] AddFloat1E;
|
||||
output logic [63:0] AddFloat2E;
|
||||
output logic [10:0] AddExponentE;
|
||||
output logic [10:0] AddExpPostSumE;
|
||||
output logic [11:0] AddExp1DenormE, AddExp2DenormE;//KEP used to be [10:0]
|
||||
output logic [63:0] AddSumE, AddSumTcE;
|
||||
output logic [3:0] AddSelInvE;
|
||||
output logic AddCorrSignE;
|
||||
output logic AddSignAE;
|
||||
output logic AddOp1NormE, AddOp2NormE;
|
||||
output logic AddOpANormE, AddOpBNormE;
|
||||
output logic AddInvalidE;
|
||||
output logic AddDenormInE;
|
||||
// output logic exp_valid;
|
||||
output logic convert;
|
||||
output logic swap;
|
||||
output logic normal_overflow;
|
||||
output logic AddConvertE;
|
||||
output logic AddSwapE;
|
||||
output logic AddNormOvflowE;
|
||||
wire [5:0] ZP_mantissaA;
|
||||
wire [5:0] ZP_mantissaB;
|
||||
wire ZV_mantissaA;
|
||||
wire ZV_mantissaB;
|
||||
|
||||
// Convert the input operands to their appropriate forms based on
|
||||
// the orignal operands, the op_type , and their precision P.
|
||||
// the orignal operands, the FOpCtrlE , and their precision P.
|
||||
// Single precision inputs are converted to double precision
|
||||
// and the sign of the first operand is set appropratiately based on
|
||||
// if the operation is absolute value or negation.
|
||||
|
||||
convert_inputs conv1 (Float1, Float2, op1, op2, op_type, P);
|
||||
convert_inputs conv1 (AddFloat1E, AddFloat2E, FInput1E, FInput2E, FOpCtrlE, P);
|
||||
|
||||
// Test for exceptions and return the "Invalid Operation" and
|
||||
// "Denormalized" Input Flags. The "sel_inv" is used in
|
||||
// the third pipeline stage to select the result. Also, op1_Norm
|
||||
// and op2_Norm are one if op1 and op2 are not zero or denormalized.
|
||||
// "Denormalized" Input Flags. The "AddSelInvE" is used in
|
||||
// the third pipeline stage to select the result. Also, AddOp1NormE
|
||||
// and AddOp2NormE are one if FInput1E and FInput2E are not zero or denormalized.
|
||||
// sub is one if the effective operation is subtaction.
|
||||
|
||||
exception exc1 (sel_inv, Invalid, DenormIn, op1_Norm, op2_Norm, sub,
|
||||
Float1, Float2, op_type);
|
||||
exception exc1 (AddSelInvE, AddInvalidE, AddDenormInE, AddOp1NormE, AddOp2NormE, sub,
|
||||
AddFloat1E, AddFloat2E, FOpCtrlE);
|
||||
|
||||
// Perform Exponent Subtraction (used for alignment). For performance
|
||||
// both exponent subtractions are performed in parallel. This was
|
||||
@ -99,25 +98,25 @@ module fpuaddcvt1 (sum, sum_tc, sel_inv, exponent_postsum, corr_sign, op1_Norm,
|
||||
// the two parallel additions. The input values are zero-extended to 12
|
||||
// bits prior to performing the addition.
|
||||
|
||||
assign exp1 = {1'b0, Float1[62:52]};
|
||||
assign exp2 = {1'b0, Float2[62:52]};
|
||||
assign exp1 = {1'b0, AddFloat1E[62:52]};
|
||||
assign exp2 = {1'b0, AddFloat2E[62:52]};
|
||||
assign exp_diff1 = exp1 - exp2;
|
||||
assign exp_diff2 = DenormIn ? ({Float2[63], exp2[10:0]} - {Float1[63], exp1[10:0]}): exp2 - exp1;
|
||||
assign exp_diff2 = AddDenormInE ? ({AddFloat2E[63], exp2[10:0]} - {AddFloat1E[63], exp1[10:0]}): exp2 - exp1;
|
||||
|
||||
// The second operand (B) should be set to zero, if op_type does not
|
||||
// The second operand (B) should be set to zero, if FOpCtrlE does not
|
||||
// specify addition or subtraction
|
||||
assign zeroB = op_type[2] | op_type[1];
|
||||
assign zeroB = FOpCtrlE[2] | FOpCtrlE[1];
|
||||
|
||||
// Swapped operands if zeroB is not one and exp1 < exp2.
|
||||
// Swapping causes exp2 to be used for the result exponent.
|
||||
// SwapFmtEg causes exp2 to be used for the result exponent.
|
||||
// Only the exponent of the larger operand is used to determine
|
||||
// the final result.
|
||||
assign swap = exp_diff1[11] & ~zeroB;
|
||||
assign exponent = swap ? exp2[10:0] : exp1[10:0];
|
||||
assign exponent_postsum = swap ? exp2[10:0] : exp1[10:0];
|
||||
assign mantissaA = swap ? Float2[51:0] : Float1[51:0];
|
||||
assign mantissaB = swap ? Float1[51:0] : Float2[51:0];
|
||||
assign signA = swap ? Float2[63] : Float1[63];
|
||||
assign AddSwapE = exp_diff1[11] & ~zeroB;
|
||||
assign AddExponentE = AddSwapE ? exp2[10:0] : exp1[10:0];
|
||||
assign AddExpPostSumE = AddSwapE ? exp2[10:0] : exp1[10:0];
|
||||
assign mantissaA = AddSwapE ? AddFloat2E[51:0] : AddFloat1E[51:0];
|
||||
assign mantissaB = AddSwapE ? AddFloat1E[51:0] : AddFloat2E[51:0];
|
||||
assign AddSignAE = AddSwapE ? AddFloat2E[63] : AddFloat1E[63];
|
||||
|
||||
// Leading-Zero Detector. Determine the size of the shift needed for
|
||||
// normalization. If sum_corrected is all zeros, the exp_valid is
|
||||
@ -127,12 +126,12 @@ module fpuaddcvt1 (sum, sum_tc, sel_inv, exponent_postsum, corr_sign, op1_Norm,
|
||||
lz52 lz_norm_2 (ZP_mantissaB, ZV_mantissaB, mantissaB);
|
||||
|
||||
// Denormalized exponents created by subtracting the leading zeroes from the original exponents
|
||||
assign exp1_denorm = swap ? (exp1 - {6'b0, ZP_mantissaB}) : (exp1 - {6'b0, ZP_mantissaA}); //KEP extended ZP_mantissa
|
||||
assign exp2_denorm = swap ? (exp2 - {6'b0, ZP_mantissaA}) : (exp2 - {6'b0, ZP_mantissaB});
|
||||
assign AddExp1DenormE = AddSwapE ? (exp1 - {6'b0, ZP_mantissaB}) : (exp1 - {6'b0, ZP_mantissaA}); //KEP extended ZP_mantissa
|
||||
assign AddExp2DenormE = AddSwapE ? (exp2 - {6'b0, ZP_mantissaA}) : (exp2 - {6'b0, ZP_mantissaB});
|
||||
|
||||
// Determine the alignment shift and limit it to 63. If any bit from
|
||||
// exp_shift[6] to exp_shift[11] is one, then shift is set to all ones.
|
||||
assign exp_shift = swap ? exp_diff2 : exp_diff1;
|
||||
assign exp_shift = AddSwapE ? exp_diff2 : exp_diff1;
|
||||
assign exp_gt63 = exp_shift[11] | exp_shift[10] | exp_shift[9]
|
||||
| exp_shift[8] | exp_shift[7] | exp_shift[6];
|
||||
assign align_shift = exp_shift[5:0] | {6{exp_gt63}}; //KEP used to be all of exp_shift
|
||||
@ -147,10 +146,10 @@ module fpuaddcvt1 (sum, sum_tc, sel_inv, exponent_postsum, corr_sign, op1_Norm,
|
||||
// and loss of sign information. The two bits to the right of the
|
||||
// original mantissa form the "guard" and "round" bits that are used
|
||||
// to round the result.
|
||||
assign opA_Norm = swap ? op2_Norm : op1_Norm;
|
||||
assign opB_Norm = swap ? op1_Norm : op2_Norm;
|
||||
assign mantissaA1 = {2'h0, opA_Norm, mantissaA[51:0]&{52{opA_Norm}}, 2'h0};
|
||||
assign mantissaB1 = {2'h0, opB_Norm, mantissaB[51:0]&{52{opB_Norm}}, 2'h0};
|
||||
assign AddOpANormE = AddSwapE ? AddOp2NormE : AddOp1NormE;
|
||||
assign AddOpBNormE = AddSwapE ? AddOp1NormE : AddOp2NormE;
|
||||
assign mantissaA1 = {2'h0, AddOpANormE, mantissaA[51:0]&{52{AddOpANormE}}, 2'h0};
|
||||
assign mantissaB1 = {2'h0, AddOpBNormE, mantissaB[51:0]&{52{AddOpBNormE}}, 2'h0};
|
||||
|
||||
// Perform mantissa alignment using a 57-bit barrel shifter
|
||||
// If any of the bits shifted out are one, Sticky_out is set.
|
||||
@ -160,8 +159,8 @@ module fpuaddcvt1 (sum, sum_tc, sel_inv, exponent_postsum, corr_sign, op1_Norm,
|
||||
|
||||
// Place either the sign-extened 32-bit value or the original 64-bit value
|
||||
// into IntValue (to be used for integer to floating point conversion)
|
||||
assign IntValue [31:0] = op1[31:0];
|
||||
assign IntValue [63:32] = op_type[0] ? {32{op1[31]}} : op1[63:32];
|
||||
assign IntValue [31:0] = FInput1E[31:0];
|
||||
assign IntValue [63:32] = FOpCtrlE[0] ? {32{FInput1E[31]}} : FInput1E[63:32];
|
||||
|
||||
// If doing an integer to floating point conversion, mantissaA3 is set to
|
||||
// IntVal and the prenomalized exponent is set to 1084. Otherwise,
|
||||
@ -169,30 +168,30 @@ module fpuaddcvt1 (sum, sum_tc, sel_inv, exponent_postsum, corr_sign, op1_Norm,
|
||||
// and the exponent value is left unchanged.
|
||||
// Under denormalized cases, the exponent before the rounder is set to 1
|
||||
// if the normal shift value is 11.
|
||||
assign convert = ~op_type[2] & op_type[1];
|
||||
assign mantissaA3 = (op_type[3]) ? (op_type[0] ? Float1 : ~Float1) : (DenormIn ? ({12'h0, mantissaA}) : (convert ? IntValue : {mantissaA1, 7'h0}));
|
||||
assign AddConvertE = ~FOpCtrlE[2] & FOpCtrlE[1];
|
||||
assign mantissaA3 = (FOpCtrlE[3]) ? (FOpCtrlE[0] ? AddFloat1E : ~AddFloat1E) : (AddDenormInE ? ({12'h0, mantissaA}) : (AddConvertE ? IntValue : {mantissaA1, 7'h0}));
|
||||
|
||||
// Put zero in for mantissaB3, if zeroB is one. Otherwise, B is extended to
|
||||
// 64-bits by setting the 7 LSBs to the Sticky_out bit followed by six
|
||||
// zeros.
|
||||
assign mantissaB3[63:7] = (op_type[3]) ? (57'h0) : (DenormIn ? {12'h0, mantissaB[51:7]} : mantissaB2 & {57{~zeroB}});
|
||||
assign mantissaB3[6] = (op_type[3]) ? (1'b0) : (DenormIn ? mantissaB[6] : Sticky_out & ~zeroB);
|
||||
assign mantissaB3[5:0] = (op_type[3]) ? (6'h01) : (DenormIn ? mantissaB[5:0] : 6'h0);
|
||||
assign mantissaB3[63:7] = (FOpCtrlE[3]) ? (57'h0) : (AddDenormInE ? {12'h0, mantissaB[51:7]} : mantissaB2 & {57{~zeroB}});
|
||||
assign mantissaB3[6] = (FOpCtrlE[3]) ? (1'b0) : (AddDenormInE ? mantissaB[6] : Sticky_out & ~zeroB);
|
||||
assign mantissaB3[5:0] = (FOpCtrlE[3]) ? (6'h01) : (AddDenormInE ? mantissaB[5:0] : 6'h0);
|
||||
|
||||
// The sign of the result needs to be corrected if the true
|
||||
// operation is subtraction and the input operands were swapped.
|
||||
assign corr_sign = ~op_type[2]&~op_type[1]&op_type[0]&swap;
|
||||
assign AddCorrSignE = ~FOpCtrlE[2]&~FOpCtrlE[1]&FOpCtrlE[0]&AddSwapE;
|
||||
|
||||
// 64-bit Mantissa Adder/Subtractor
|
||||
cla64 add1 (sum, mantissaA3, mantissaB3, sub);
|
||||
cla64 add1 (AddSumE, mantissaA3, mantissaB3, sub);
|
||||
|
||||
// 64-bit Mantissa Subtractor - to get the two's complement of the
|
||||
// result when the sign from the adder/subtractor is negative.
|
||||
cla_sub64 sub1 (sum_tc, mantissaB3, mantissaA3);
|
||||
cla_sub64 sub1 (AddSumTcE, mantissaB3, mantissaA3);
|
||||
|
||||
// Finds normal underflow result to determine whether to round final exponent down
|
||||
//***KEP used to be (sum == 16'h0) I am unsure what it's supposed to be
|
||||
assign normal_overflow = (DenormIn & (sum == 64'h0) & (opA_Norm | opB_Norm) & ~op_type[0]) ? 1'b1 : (sum[63] ? sum_tc[52] : sum[52]);
|
||||
//***KEP used to be (AddSumE == 16'h0) I am unsure what it's supposed to be
|
||||
assign AddNormOvflowE = (AddDenormInE & (AddSumE == 64'h0) & (AddOpANormE | AddOpBNormE) & ~FOpCtrlE[0]) ? 1'b1 : (AddSumE[63] ? AddSumTcE[52] : AddSumE[52]);
|
||||
|
||||
endmodule // fpadd
|
||||
|
||||
|
@ -27,15 +27,13 @@
|
||||
//
|
||||
|
||||
|
||||
module fpuaddcvt2 (AddResultM, AddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddSelInvM, AddExpPostSumM, AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM, AddDenormInM, AddConvertM, AddSwapM, AddSignAM, AddFloat1M, AddFloat2M, AddExp1DenormM, AddExp2DenormM, AddExponentM, AddOp1M, AddOp2M, AddRmM, AddOpTypeM, AddPM, AddOvEnM, AddUnEnM);
|
||||
module fpuaddcvt2 (FAddResultM, FAddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddSelInvM, AddExpPostSumM, AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM, AddDenormInM, AddConvertM, AddSwapM, AddSignAM, AddFloat1M, AddFloat2M, AddExp1DenormM, AddExp2DenormM, AddExponentM, FrmM, FOpCtrlM, FmtM);
|
||||
|
||||
input [63:0] AddOp1M; // 1st input operand (A)
|
||||
input [63:0] AddOp2M; // 2nd input operand (B)
|
||||
input [2:0] AddRmM; // Rounding mode - specify values
|
||||
input [3:0] AddOpTypeM; // Function opcode
|
||||
input AddPM; // Result Precision (0 for double, 1 for single)
|
||||
input AddOvEnM; // Overflow trap enabled
|
||||
input AddUnEnM; // Underflow trap enabled
|
||||
input [2:0] FrmM; // Rounding mode - specify values
|
||||
input [3:0] FOpCtrlM; // Function opcode
|
||||
input FmtM; // Result Precision (0 for double, 1 for single)
|
||||
// input AddOvEnM; // Overflow trap enabled
|
||||
// input AddUnEnM; // Underflow trap enabled
|
||||
input [63:0] AddSumM, AddSumTcM;
|
||||
input [63:0] AddFloat1M;
|
||||
input [63:0] AddFloat2M;
|
||||
@ -53,12 +51,12 @@ module fpuaddcvt2 (AddResultM, AddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddSel
|
||||
input AddSwapM;
|
||||
// input AddNormOvflowM;
|
||||
|
||||
output [63:0] AddResultM; // Result of operation
|
||||
output [4:0] AddFlagsM; // IEEE exception flags
|
||||
output [63:0] FAddResultM; // Result of operation
|
||||
output [4:0] FAddFlagsM; // IEEE exception flags
|
||||
output AddDenormM; // AddDenormM on input or output
|
||||
|
||||
wire P;
|
||||
assign P = AddPM | AddOpTypeM[2];
|
||||
assign P = FmtM | FOpCtrlM[2];
|
||||
|
||||
wire [10:0] exp_pre;
|
||||
wire [63:0] Result;
|
||||
@ -82,6 +80,12 @@ module fpuaddcvt2 (AddResultM, AddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddSel
|
||||
wire [63:0] sum_corr;
|
||||
logic AddNormOvflowM;
|
||||
|
||||
|
||||
logic AddOvEnM; // Overflow trap enabled
|
||||
logic AddUnEnM; // Underflow trap enabled
|
||||
|
||||
assign AddOvEnM = 1'b1;
|
||||
assign AddUnEnM = 1'b1;
|
||||
//AddExponentM value pre-rounding with considerations for denormalized
|
||||
//cases/conversion cases
|
||||
assign exp_pre = AddDenormInM ?
|
||||
@ -101,7 +105,7 @@ module fpuaddcvt2 (AddResultM, AddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddSel
|
||||
assign mantissa_comp_sum_tc = AddSwapM ? Float2_sum_tc_comp : Float1_sum_tc_comp;
|
||||
|
||||
// Determines the correct comparison result based on operation and sign of resulting AddSumM
|
||||
assign mantissa_comp = (AddOpTypeM[0] ^ AddSumM[63]) ? mantissa_comp_sum_tc : mantissa_comp_sum;
|
||||
assign mantissa_comp = (FOpCtrlM[0] ^ AddSumM[63]) ? mantissa_comp_sum_tc : mantissa_comp_sum;
|
||||
|
||||
// If the signs are different and both operands aren't denormalized
|
||||
// the normal underflow bit is needed and therefore updated.
|
||||
@ -113,12 +117,12 @@ module fpuaddcvt2 (AddResultM, AddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddSel
|
||||
// If the AddSumM is negative, use its two complement instead.
|
||||
// This value has to be 64-bits to correctly handle the
|
||||
// case 10...00
|
||||
assign sum_corr = (AddDenormInM & (AddOpANormM | AddOpBNormM) & ( ( (AddFloat1M[63] ~^ AddFloat2M[63]) & AddOpTypeM[0] ) | ((AddFloat1M[63] ^ AddFloat2M[63]) & ~AddOpTypeM[0]) ))
|
||||
? (AddSumM[63] ? AddSumM : AddSumTcM) : ( (AddOpTypeM[3]) ? AddSumM : (AddSumM[63] ? AddSumTcM : AddSumM));
|
||||
assign sum_corr = (AddDenormInM & (AddOpANormM | AddOpBNormM) & ( ( (AddFloat1M[63] ~^ AddFloat2M[63]) & FOpCtrlM[0] ) | ((AddFloat1M[63] ^ AddFloat2M[63]) & ~FOpCtrlM[0]) ))
|
||||
? (AddSumM[63] ? AddSumM : AddSumTcM) : ( (FOpCtrlM[3]) ? AddSumM : (AddSumM[63] ? AddSumTcM : AddSumM));
|
||||
|
||||
// Finds normal underflow result to determine whether to round final AddExponentM down
|
||||
//KEP used to be (AddSumM == 16'h0) not sure what it is supposed to be
|
||||
assign AddNormOvflowM = (AddDenormInM & (AddSumM == 64'h0) & (AddOpANormM | AddOpBNormM) & ~AddOpTypeM[0]) ? 1'b1 : (AddSumM[63] ? AddSumTcM[52] : AddSumM[52]);
|
||||
assign AddNormOvflowM = (AddDenormInM & (AddSumM == 64'h0) & (AddOpANormM | AddOpBNormM) & ~FOpCtrlM[0]) ? 1'b1 : (AddSumM[63] ? AddSumTcM[52] : AddSumM[52]);
|
||||
|
||||
// Leading-Zero Detector. Determine the size of the shift needed for
|
||||
// normalization. If sum_corrected is all zeros, the exp_valid is
|
||||
@ -132,7 +136,7 @@ module fpuaddcvt2 (AddResultM, AddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddSel
|
||||
// be right shifted. It outputs the normalized AddSumM.
|
||||
barrel_shifter_l64 bs2 (sum_norm, sum_corr, norm_shift_denorm);
|
||||
|
||||
assign sum_norm_w_bypass = (AddOpTypeM[3]) ? (AddOpTypeM[0] ? ~sum_corr : sum_corr) : (sum_norm);
|
||||
assign sum_norm_w_bypass = (FOpCtrlM[3]) ? (FOpCtrlM[0] ? ~sum_corr : sum_corr) : (sum_norm);
|
||||
|
||||
// Round the mantissa to a 52-bit value, with the leading one
|
||||
// removed. If the result is a single precision number, the actual
|
||||
@ -141,18 +145,18 @@ module fpuaddcvt2 (AddResultM, AddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddSel
|
||||
// exactly where the rounding point is. The rounding units also
|
||||
// handles special cases and set the exception flags.
|
||||
|
||||
// Changed DenormIO -> AddDenormM and FlagsIn -> AddFlagsM in order to
|
||||
// Changed DenormIO -> AddDenormM and FlagsIn -> FAddFlagsM in order to
|
||||
// help in processor reservation station detection of load/stores. In
|
||||
// other words, the processor would like to know ahead of time that
|
||||
// if the result is an exception then don't load or store.
|
||||
rounder round1 (Result, DenormIO, FlagsIn, AddRmM, P, AddOvEnM, AddUnEnM, exp_valid,
|
||||
rounder round1 (Result, DenormIO, FlagsIn, FrmM, P, AddOvEnM, AddUnEnM, exp_valid,
|
||||
AddSelInvM, AddInvalidM, AddDenormInM, AddConvertM, sign_corr, exp_pre, norm_shift, sum_norm_w_bypass,
|
||||
AddExpPostSumM, AddOp1NormM, AddOp2NormM, AddFloat1M[63:52], AddFloat2M[63:52],
|
||||
AddNormOvflowM, normal_underflow, AddSwapM, AddOpTypeM, AddSumM);
|
||||
AddNormOvflowM, normal_underflow, AddSwapM, FOpCtrlM, AddSumM);
|
||||
|
||||
// Store the final result and the exception flags in registers.
|
||||
assign AddResultM = Result;
|
||||
assign {AddDenormM, AddFlagsM} = {DenormIO, FlagsIn};
|
||||
assign FAddResultM = Result;
|
||||
assign {AddDenormM, FAddFlagsM} = {DenormIO, FlagsIn};
|
||||
|
||||
endmodule // fpadd
|
||||
|
||||
|
@ -37,7 +37,7 @@
|
||||
// It also produces an invalid operation flag, which is one
|
||||
// if either of the input operands is a signaling NaN per 754
|
||||
|
||||
module fpucmp1 (w, x, ANaN, BNaN, Azero, Bzero, op1, op2, Sel);
|
||||
module fpucmp1 (w, x, ANaN, BNaN, Azero, Bzero, op1, op2, Sel);///***fix Sel to match spec
|
||||
|
||||
input logic [63:0] op1;
|
||||
input logic [63:0] op2;
|
||||
|
@ -27,45 +27,45 @@
|
||||
|
||||
module fpuhazard(
|
||||
input logic [4:0] Adr1, Adr2, Adr3,
|
||||
input logic FRegWriteE, FRegWriteM, FRegWriteW,
|
||||
input logic FWriteEnE, FWriteEnM, FWriteEnW,
|
||||
input logic [4:0] RdE, RdM, RdW,
|
||||
input logic DivBusyM,
|
||||
input logic DivBusyM,
|
||||
input logic RegWriteD,
|
||||
input logic [2:0] FResultSelD, FResultSelE,
|
||||
input logic IllegalFPUInstrD,
|
||||
input logic In2UsedD, In3UsedD,
|
||||
input logic FInput2UsedD, FInput3UsedD,
|
||||
// Stall outputs
|
||||
output logic FStallD,
|
||||
output logic [1:0] Input1MuxD, Input2MuxD,
|
||||
output logic Input3MuxD
|
||||
output logic [1:0] FForwardInput1D, FForwardInput2D,
|
||||
output logic FForwardInput3D
|
||||
);
|
||||
|
||||
|
||||
always_comb begin
|
||||
// set ReadData as default
|
||||
Input1MuxD = 2'b00;
|
||||
Input2MuxD = 2'b00;
|
||||
Input3MuxD = 1'b0;
|
||||
FForwardInput1D = 2'b00;
|
||||
FForwardInput2D = 2'b00;
|
||||
FForwardInput3D = 1'b0;
|
||||
FStallD = DivBusyM;
|
||||
if (~IllegalFPUInstrD) begin
|
||||
// if taking a value from int register
|
||||
if ((Adr1 == RdE) & (FRegWriteE | ((FResultSelE == 3'b110) & RegWriteD)))
|
||||
if (FResultSelE == 3'b110) Input1MuxD = 2'b11; // choose SrcAM
|
||||
if ((Adr1 == RdE) & (FWriteEnE | ((FResultSelE == 3'b110) & RegWriteD)))
|
||||
if (FResultSelE == 3'b110) FForwardInput1D = 2'b11; // choose SrcAM
|
||||
else FStallD = 1'b1; // otherwise stall
|
||||
else if ((Adr1 == RdM) & FRegWriteM) Input1MuxD = 2'b01; // choose FPUResultDirW
|
||||
else if ((Adr1 == RdW) & FRegWriteW) Input1MuxD = 2'b11; // choose FPUResultDirE
|
||||
else if ((Adr1 == RdM) & FWriteEnM) FForwardInput1D = 2'b01; // choose FPUResultDirW
|
||||
else if ((Adr1 == RdW) & FWriteEnW) FForwardInput1D = 2'b11; // choose FPUResultDirE
|
||||
|
||||
|
||||
if(In2UsedD)
|
||||
if ((Adr2 == RdE) & FRegWriteE) FStallD = 1'b1;
|
||||
else if ((Adr2 == RdM) & FRegWriteM) Input2MuxD = 2'b01; // choose FPUResultDirW
|
||||
else if ((Adr2 == RdW) & FRegWriteW) Input2MuxD = 2'b10; // choose FPUResultDirE
|
||||
if(FInput2UsedD)
|
||||
if ((Adr2 == RdE) & FWriteEnE) FStallD = 1'b1;
|
||||
else if ((Adr2 == RdM) & FWriteEnM) FForwardInput2D = 2'b01; // choose FPUResultDirW
|
||||
else if ((Adr2 == RdW) & FWriteEnW) FForwardInput2D = 2'b10; // choose FPUResultDirE
|
||||
|
||||
|
||||
if(In3UsedD)
|
||||
if ((Adr3 == RdE) & FRegWriteE) FStallD = 1'b1;
|
||||
else if ((Adr3 == RdM) & FRegWriteM) FStallD = 1'b1;
|
||||
else if ((Adr3 == RdW) & FRegWriteW) Input3MuxD = 1'b1; // choose FPUResultDirE
|
||||
if(FInput3UsedD)
|
||||
if ((Adr3 == RdE) & FWriteEnE) FStallD = 1'b1;
|
||||
else if ((Adr3 == RdM) & FWriteEnM) FStallD = 1'b1;
|
||||
else if ((Adr3 == RdW) & FWriteEnW) FForwardInput3D = 1'b1; // choose FPUResultDirE
|
||||
end
|
||||
|
||||
end
|
||||
|
@ -1,8 +1,8 @@
|
||||
//performs the fsgnj/fsgnjn/fsgnjx RISCV instructions
|
||||
|
||||
module fpusgn (SgnOpCodeE, SgnResultE, SgnFlagsE, SgnOp1E, SgnOp2E);
|
||||
module fpusgn (SgnOpCodeE, SgnResultE, SgnFlagsE, FInput1E, FInput2E);
|
||||
|
||||
input [63:0] SgnOp1E, SgnOp2E;
|
||||
input [63:0] FInput1E, FInput2E;
|
||||
input [1:0] SgnOpCodeE;
|
||||
output [63:0] SgnResultE;
|
||||
output [4:0] SgnFlagsE;
|
||||
@ -11,18 +11,18 @@ module fpusgn (SgnOpCodeE, SgnResultE, SgnFlagsE, SgnOp1E, SgnOp2E);
|
||||
|
||||
//op code designation:
|
||||
//
|
||||
//00 - fsgnj - directly copy over sign value of SgnOp2E
|
||||
//01 - fsgnjn - negate sign value of SgnOp2E
|
||||
//10 - fsgnjx - XOR sign values of SgnOp1E & SgnOp2E
|
||||
//00 - fsgnj - directly copy over sign value of FInput2E
|
||||
//01 - fsgnjn - negate sign value of FInput2E
|
||||
//10 - fsgnjx - XOR sign values of FInput1E & FInput2E
|
||||
//
|
||||
|
||||
assign SgnResultE[63] = SgnOpCodeE[1] ? (SgnOp1E[63] ^ SgnOp2E[63]) : (SgnOp2E[63] ^ SgnOpCodeE[0]);
|
||||
assign SgnResultE[62:0] = SgnOp1E[62:0];
|
||||
assign SgnResultE[63] = SgnOpCodeE[1] ? (FInput1E[63] ^ FInput2E[63]) : (FInput2E[63] ^ SgnOpCodeE[0]);
|
||||
assign SgnResultE[62:0] = FInput1E[62:0];
|
||||
|
||||
//If the exponent is all ones, then the value is either Inf or NaN,
|
||||
//both of which will produce a QNaN/SNaN value of some sort. This will
|
||||
//set the invalid flag high.
|
||||
assign AonesExp = SgnOp1E[62]&SgnOp1E[61]&SgnOp1E[60]&SgnOp1E[59]&SgnOp1E[58]&SgnOp1E[57]&SgnOp1E[56]&SgnOp1E[55]&SgnOp1E[54]&SgnOp1E[53]&SgnOp1E[52];
|
||||
assign AonesExp = FInput1E[62]&FInput1E[61]&FInput1E[60]&FInput1E[59]&FInput1E[58]&FInput1E[57]&FInput1E[56]&FInput1E[55]&FInput1E[54]&FInput1E[53]&FInput1E[52];
|
||||
|
||||
//the only flag that can occur during this operation is invalid
|
||||
//due to changing sign on already existing NaN
|
||||
|
@ -10,46 +10,46 @@
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
module special(Input1E, Input2E, Input3E, xzeroE, yzeroE, zzeroE,
|
||||
module special(FInput1E, FInput2E, FInput3E, xzeroE, yzeroE, zzeroE,
|
||||
xnanE, ynanE, znanE, xdenormE, ydenormE, zdenormE, xinfE, yinfE, zinfE);
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
input logic [63:0] Input1E; // Input Input1E
|
||||
input logic [63:0] Input2E; // Input Input2E
|
||||
input logic [63:0] Input3E; // Input Input3E
|
||||
output logic xzeroE; // Input Input1E = 0
|
||||
output logic yzeroE; // Input Input2E = 0
|
||||
output logic zzeroE; // Input Input3E = 0
|
||||
output logic xnanE; // Input1E is NaN
|
||||
output logic ynanE; // Input2E is NaN
|
||||
output logic znanE; // Input3E is NaN
|
||||
output logic xdenormE; // Input1E is denormalized
|
||||
output logic ydenormE; // Input2E is denormalized
|
||||
output logic zdenormE; // Input3E is denormalized
|
||||
output logic xinfE; // Input1E is infinity
|
||||
output logic yinfE; // Input2E is infinity
|
||||
output logic zinfE; // Input3E is infinity
|
||||
input logic [63:0] FInput1E; // Input FInput1E
|
||||
input logic [63:0] FInput2E; // Input FInput2E
|
||||
input logic [63:0] FInput3E; // Input FInput3E
|
||||
output logic xzeroE; // Input FInput1E = 0
|
||||
output logic yzeroE; // Input FInput2E = 0
|
||||
output logic zzeroE; // Input FInput3E = 0
|
||||
output logic xnanE; // FInput1E is NaN
|
||||
output logic ynanE; // FInput2E is NaN
|
||||
output logic znanE; // FInput3E is NaN
|
||||
output logic xdenormE; // FInput1E is denormalized
|
||||
output logic ydenormE; // FInput2E is denormalized
|
||||
output logic zdenormE; // FInput3E is denormalized
|
||||
output logic xinfE; // FInput1E is infinity
|
||||
output logic yinfE; // FInput2E is infinity
|
||||
output logic zinfE; // FInput3E is infinity
|
||||
|
||||
// In the actual circuit design, the gates looking at bits
|
||||
// 51:0 and at bits 62:52 should be shared among the various detectors.
|
||||
|
||||
// Check if input is NaN
|
||||
|
||||
assign xnanE = &Input1E[62:52] && |Input1E[51:0];
|
||||
assign ynanE = &Input2E[62:52] && |Input2E[51:0];
|
||||
assign znanE = &Input3E[62:52] && |Input3E[51:0];
|
||||
assign xnanE = &FInput1E[62:52] && |FInput1E[51:0];
|
||||
assign ynanE = &FInput2E[62:52] && |FInput2E[51:0];
|
||||
assign znanE = &FInput3E[62:52] && |FInput3E[51:0];
|
||||
|
||||
// Check if input is denormalized
|
||||
|
||||
assign xdenormE = ~(|Input1E[62:52]) && |Input1E[51:0];
|
||||
assign ydenormE = ~(|Input2E[62:52]) && |Input2E[51:0];
|
||||
assign zdenormE = ~(|Input3E[62:52]) && |Input3E[51:0];
|
||||
assign xdenormE = ~(|FInput1E[62:52]) && |FInput1E[51:0];
|
||||
assign ydenormE = ~(|FInput2E[62:52]) && |FInput2E[51:0];
|
||||
assign zdenormE = ~(|FInput3E[62:52]) && |FInput3E[51:0];
|
||||
|
||||
// Check if input is infinity
|
||||
|
||||
assign xinfE = &Input1E[62:52] && ~(|Input1E[51:0]);
|
||||
assign yinfE = &Input2E[62:52] && ~(|Input2E[51:0]);
|
||||
assign zinfE = &Input3E[62:52] && ~(|Input3E[51:0]);
|
||||
assign xinfE = &FInput1E[62:52] && ~(|FInput1E[51:0]);
|
||||
assign yinfE = &FInput2E[62:52] && ~(|FInput2E[51:0]);
|
||||
assign zinfE = &FInput3E[62:52] && ~(|FInput3E[51:0]);
|
||||
|
||||
// Check if inputs are all zero
|
||||
// Also forces denormalized inputs to zero.
|
||||
@ -57,11 +57,11 @@ module special(Input1E, Input2E, Input3E, xzeroE, yzeroE, zzeroE,
|
||||
// to just check if the exponent is zero.
|
||||
|
||||
// KATHERINE - commented following (21/01/11)
|
||||
// assign xzeroE = ~(|Input1E[62:0]) || xdenormE;
|
||||
// assign yzeroE = ~(|Input2E[62:0]) || ydenormE;
|
||||
// assign zzeroE = ~(|Input3E[62:0]) || zdenormE;
|
||||
// assign xzeroE = ~(|FInput1E[62:0]) || xdenormE;
|
||||
// assign yzeroE = ~(|FInput2E[62:0]) || ydenormE;
|
||||
// assign zzeroE = ~(|FInput3E[62:0]) || zdenormE;
|
||||
// KATHERINE - removed denorm to prevent output logicing zero when computing with a denormalized number
|
||||
assign xzeroE = ~(|Input1E[62:0]);
|
||||
assign yzeroE = ~(|Input2E[62:0]);
|
||||
assign zzeroE = ~(|Input3E[62:0]);
|
||||
assign xzeroE = ~(|FInput1E[62:0]);
|
||||
assign yzeroE = ~(|FInput2E[62:0]);
|
||||
assign zzeroE = ~(|FInput3E[62:0]);
|
||||
endmodule
|
||||
|
@ -100,7 +100,7 @@ module wallypipelinedhart (
|
||||
logic FStallD;
|
||||
logic FWriteIntW, FWriteIntM;
|
||||
logic [31:0] FSROutW;
|
||||
logic DivSqrtDoneE;
|
||||
logic FDivSqrtDoneM;
|
||||
logic IllegalFPUInstrD, IllegalFPUInstrE;
|
||||
logic [`XLEN-1:0] FPUResultW;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user