Make changes to fpdiv - still working on clock issue with fsm that was changed from posedge to negedge - also updated fpdivsqrt rounding to handle testfloat

This commit is contained in:
James E. Stine 2021-10-06 08:26:09 -05:00
parent 5bcae393c9
commit a91c0c8fc7
13 changed files with 698 additions and 576 deletions

View File

@ -1,9 +1,26 @@
// This module takes as inputs two operands (op1 and op2) ///////////////////////////////////////////
// the operation type (op_type) and the result precision (P). //
// Based on the operation and precision , it conditionally // Written: James Stine
// converts single precision values to double precision values // Modified: 8/1/2018
// and modifies the sign of op1. The converted operands are Float1 //
// and Float2. // Purpose: Floating point divider/square root top unit (Goldschmidt)
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
module convert_inputs( module convert_inputs(
input [63:0] op1, // 1st input operand (A) input [63:0] op1, // 1st input operand (A)

View File

@ -23,9 +23,10 @@ module exception_div (
logic BNaN; // '1' if B is a not-a-number logic BNaN; // '1' if B is a not-a-number
logic ASNaN; // '1' if A is a signalling not-a-number logic ASNaN; // '1' if A is a signalling not-a-number
logic BSNaN; // '1' if B is a signalling not-a-number logic BSNaN; // '1' if B is a signalling not-a-number
logic ZQNaN; // '1' if result Z is a quiet NaN logic ZSNaN; // '1' if result Z is a quiet NaN
logic ZInf; // '1' if result Z is an infnity logic ZInf; // '1' if result Z is an infnity
logic Zero; // '1' if result is zero logic Zero; // '1' if result is zero
logic NegSqrt; // '1' if sqrt and operand is negative
//***take this module out and add more registers or just recalculate it all //***take this module out and add more registers or just recalculate it all
// Determine if mantissas are all zeros // Determine if mantissas are all zeros
@ -48,32 +49,34 @@ module exception_div (
assign AZero = AzeroE & AzeroM; assign AZero = AzeroE & AzeroM;
assign BZero = BzeroE & BzeroE; assign BZero = BzeroE & BzeroE;
// Is NaN if operand is negative and its a sqrt
assign NegSqrt = (A[63] & op_type & ~AZero);
// An "Invalid Operation" exception occurs if (A or B is a signalling NaN) // An "Invalid Operation" exception occurs if (A or B is a signalling NaN)
// or (A and B are both Infinite) // or (A and B are both Infinite)
assign Invalid = ASNaN | BSNaN | (((AInf & BInf) | (AZero & BZero))&~op_type) | assign Invalid = ASNaN | BSNaN | (((AInf & BInf) | (AZero & BZero))&~op_type) |
(A[63] & op_type); NegSqrt;
// The result is a quiet NaN if (an "Invalid Operation" exception occurs) // The result is a quiet NaN if (an "Invalid Operation" exception occurs)
// or (A is a NaN) or (B is a NaN). // or (A is a NaN) or (B is a NaN).
assign ZQNaN = Invalid | ANaN | BNaN; assign ZSNaN = Invalid | ANaN | BNaN;
// The result is zero // The result is zero
assign Zero = (AZero | BInf)&~op_type | AZero&op_type; assign Zero = (AZero | BInf)&~op_type | AZero&op_type;
// The result is +Inf if ((A is Inf) or (B is 0)) and (the // The result is +Inf if ((A is Inf) or (B is 0)) and (the
// result is not a quiet NaN). // result is not a quiet NaN).
assign ZInf = (AInf | BZero)&~ZQNaN&~op_type | AInf&op_type&~ZQNaN; assign ZInf = (AInf | BZero)&~ZSNaN&~op_type | AInf&op_type&~ZSNaN;
// Set the type of the result as follows: // Set the type of the result as follows:
// Ztype Result // Ztype Result
// 000 Normal // 000 Normal
// 001 Quiet NaN
// 010 Infinity // 010 Infinity
// 011 Zero // 011 Zero
// 110 DivZero // 110 Div by 0
assign Ztype[0] = ZQNaN | Zero; // 111 SNaN
assign Ztype[1] = ZInf | Zero; assign Ztype[2] = (ZSNaN);
assign Ztype[2] = BZero&~op_type; assign Ztype[1] = (ZSNaN) | (Zero) | (ZInf);
assign Ztype[0] = (ZSNaN) | (Zero);
endmodule // exception endmodule // exception

View File

@ -1,92 +1,86 @@
///////////////////////////////////////////
// //
// File name : fpdiv // Written: James Stine
// Title : Floating-Point Divider/Square-Root // Modified: 8/1/2018
// project : FPU
// Library : fpdiv
// Author(s) : James E. Stine, Jr.
// Purpose : definition of main unit to floating-point div/sqrt
// notes :
// //
// Copyright Oklahoma State University // Purpose: Floating point divider/square root top unit (Goldschmidt)
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
// //
// Basic Operations // Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
// //
// Step 1: Load operands, set flags, and convert SP to DP // The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
// Step 2: Check for special inputs ( +/- Infinity, NaN)
// Step 3: Exponent Logic
// Step 4: Divide/Sqrt using Goldschmidt
// Step 5: Normalize the result.//
// Shift left until normalized. Normalized when the value to the
// left of the binrary point is 1.
// Step 6: Round the result.//
// Step 7: Put quotient/remainder onto output.
// //
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
// `timescale 1ps/1ps // `timescale 1ps/1ps
module fpdiv ( module fpdiv (
input logic clk, input logic clk,
input logic reset, input logic reset,
input logic start, input logic start,
input logic [63:0] op1, // 1st input operand (A) input logic [63:0] op1,
input logic [63:0] op2, // 2nd input operand (B) input logic [63:0] op2,
input logic [1:0] rm, // Rounding mode - specify values input logic [1:0] rm,
input logic op_type, // Function opcode input logic op_type,
input logic P, // Result Precision (0 for double, 1 for single) input logic P,
input logic OvEn, // Overflow trap enabled input logic OvEn,
input logic UnEn, // Underflow trap enabled input logic UnEn,
output logic done, input logic XNaNQ,
output logic FDivBusyE, input logic YNaNQ,
output logic [63:0] AS_Result, // Result of operation input logic XZeroQ,
output logic [4:0] Flags); // IEEE exception flags input logic YZeroQ,
input logic XInfQ,
input logic YInfQ,
output logic done,
logic [63:0] Float1; output logic FDivBusyE,
logic [63:0] Float2; output logic [63:0] AS_Result,
output logic [4:0] Flags);
logic [12:0] exp1, exp2, expF; logic [63:0] Float1;
logic [12:0] exp_diff, bias; logic [63:0] Float2;
logic [13:0] exp_sqrt;
logic [12:0] exp_s;
logic [12:0] exp_c;
logic [10:0] exponent; logic [12:0] exp1, exp2, expF;
logic [63:0] Result; logic [12:0] exp_diff, bias;
logic [52:0] mantissaA; logic [13:0] exp_sqrt;
logic [52:0] mantissaB; logic [63:0] Result;
logic [52:0] mantissaA;
logic [52:0] mantissaB;
logic [2:0] sel_inv; logic [2:0] sel_inv;
logic Invalid; logic Invalid;
logic [4:0] FlagsIn; logic [4:0] FlagsIn;
logic signResult; logic signResult;
logic convert; logic convert;
logic sub; logic sub;
logic [63:0] q1, qm1, qp1, q0, qm0, qp0; logic [63:0] q1, qm1, qp1, q0, qm0, qp0;
logic [63:0] rega_out, regb_out, regc_out, regd_out; logic [63:0] rega_out, regb_out, regc_out, regd_out;
logic [127:0] regr_out; logic [127:0] regr_out;
logic [2:0] sel_muxa, sel_muxb; logic [2:0] sel_muxa, sel_muxb;
logic sel_muxr; logic sel_muxr;
logic load_rega, load_regb, load_regc, load_regd, load_regr; logic load_rega, load_regb, load_regc, load_regd, load_regr;
logic load_regs;
logic exp_cout1, exp_cout2;
logic exp_odd, open;
// div/sqrt logic load_regs;
// fdiv = 0 logic exp_cout1, exp_cout2;
// fsqrt = 1 logic exp_odd, open;
// op_type : fdiv=0, fsqrt=1
assign Float1 = op1; assign Float1 = op1;
assign Float2 = op_type ? op1 : op2; assign Float2 = op_type ? op1 : op2;
// Test for exceptions and return the "Invalid Operation" and // Exception detection
// "Denormalized" Input Flags. The "sel_inv" is used in exception_div exc1 (.A(Float1), .B(Float2), .op_type, .Ztype(sel_inv), .Invalid);
// the third pipeline stage to select the result. Also, op1_Norm
// and op2_Norm are one if op1 and op2 are not zero or denormalized.
// sub is one if the effective operation is subtaction.
exception_div exc1 (.A(Float1), .B(Float2), .op_type,
// output:
.Ztype(sel_inv), .Invalid);
// Determine Sign/Mantissa // Determine Sign/Mantissa
assign signResult = (Float1[63]^Float2[63]); assign signResult = (Float1[63]^Float2[63]);
assign mantissaA = {1'b1, Float1[51:0]}; assign mantissaA = {1'b1, Float1[51:0]};
@ -103,29 +97,30 @@ module fpdiv (
assign {exp_cout2, exp_sqrt} = {1'b0, exp1} + {4'h0, 10'h3ff} + exp_odd; assign {exp_cout2, exp_sqrt} = {1'b0, exp1} + {4'h0, 10'h3ff} + exp_odd;
// Choose correct exponent // Choose correct exponent
assign expF = op_type ? exp_sqrt[13:1] : exp_diff; assign expF = op_type ? exp_sqrt[13:1] : exp_diff;
// Main Goldschmidt/Division Routine // Main Goldschmidt/Division Routine
divconv goldy (.q1, .qm1, .qp1, .q0, .qm0, .qp0, .rega_out, .regb_out, .regc_out, .regd_out, divconv goldy (.q1, .qm1, .qp1, .q0, .qm0, .qp0, .rega_out, .regb_out, .regc_out, .regd_out,
.regr_out, .d(mantissaB), .n(mantissaA), .sel_muxa, .sel_muxb, .sel_muxr, .regr_out, .d(mantissaB), .n(mantissaA), .sel_muxa, .sel_muxb, .sel_muxr,
.reset, .clk, .load_rega, .load_regb, .load_regc, .load_regd, .reset, .clk, .load_rega, .load_regb, .load_regc, .load_regd,
.load_regr, .load_regs, .P, .op_type, .exp_odd); .load_regr, .load_regs, .P, .op_type, .exp_odd);
// FSM : control divider // FSM : control divider
fsm control (.clk, .reset, .start, .op_type, fsm control (.clk, .reset, .start, .op_type,
// outputs: .done, .load_rega, .load_regb, .load_regc, .load_regd,
.done, .load_rega, .load_regb, .load_regc, .load_regd, .load_regr, .load_regs, .sel_muxa, .sel_muxb, .sel_muxr,
.load_regr, .load_regs, .sel_muxa, .sel_muxb, .sel_muxr, .divBusy(FDivBusyE));
.divBusy(FDivBusyE));
// Round the mantissa to a 52-bit value, with the leading one // Round the mantissa to a 52-bit value, with the leading one
// removed. The rounding units also handles special cases and // removed. The rounding units also handles special cases and
// set the exception flags. // set the exception flags.
rounder_div round1 (.rm, .P, .OvEn, .UnEn, .exp_diff(expF), rounder_div round1 (.rm, .P, .OvEn, .UnEn, .exp_diff(expF),
.sel_inv, .Invalid, .SignR(signResult), .sel_inv, .Invalid, .SignR(signResult),
.q1, .qm1, .qp1, .q0, .qm0, .qp0, .regr_out, .Float1(op1), .Float2(op2),
// outputs: .XNaNQ, .YNaNQ, .XZeroQ, .YZeroQ,
.Result, .Flags(FlagsIn)); .XInfQ, .YInfQ, .op_type,
.q1, .qm1, .qp1, .q0, .qm0, .qp0, .regr_out,
.Result, .Flags(FlagsIn));
// Store the final result and the exception flags in registers. // Store the final result and the exception flags in registers.
flopenr #(64) rega (clk, reset, done, Result, AS_Result); flopenr #(64) rega (clk, reset, done, Result, AS_Result);
flopenr #(5) regc (clk, reset, done, FlagsIn, Flags); flopenr #(5) regc (clk, reset, done, FlagsIn, Flags);

View File

@ -1,6 +1,6 @@
/////////////////////////////////////////// ///////////////////////////////////////////
// //
// Written: Katherine Parry, Bret Mathis // Written: Katherine Parry, James Stine, Brett Mathis
// Modified: 6/23/2021 // Modified: 6/23/2021
// //
// Purpose: FPU // Purpose: FPU
@ -25,24 +25,24 @@
`include "wally-config.vh" `include "wally-config.vh"
module fpu ( module fpu (
input logic clk, input logic clk,
input logic reset, input logic reset,
input logic [2:0] FRM_REGW, // Rounding mode from CSR input logic [2:0] FRM_REGW, // Rounding mode from CSR
input logic [31:0] InstrD, // instruction from IFU input logic [31:0] InstrD, // instruction from IFU
input logic [`XLEN-1:0] ReadDataW,// Read data from memory input logic [`XLEN-1:0] ReadDataW,// Read data from memory
input logic [`XLEN-1:0] SrcAE, // Integer input being processed (from IEU) input logic [`XLEN-1:0] SrcAE, // Integer input being processed (from IEU)
input logic [`XLEN-1:0] SrcAM, // Integer input being written into fpreg (from IEU) input logic [`XLEN-1:0] SrcAM, // Integer input being written into fpreg (from IEU)
input logic StallE, StallM, StallW, // stall signals from HZU input logic StallE, StallM, StallW, // stall signals from HZU
input logic FlushE, FlushM, FlushW, // flush signals from HZU input logic FlushE, FlushM, FlushW, // flush signals from HZU
input logic [4:0] RdE, RdM, RdW, // which FP register to write to (from IEU) input logic [4:0] RdE, RdM, RdW, // which FP register to write to (from IEU)
output logic FRegWriteM, // FP register write enable output logic FRegWriteM, // FP register write enable
output logic FStallD, // Stall the decode stage output logic FStallD, // Stall the decode stage
output logic FWriteIntE, FWriteIntM, FWriteIntW, // integer register write enable output logic FWriteIntE, FWriteIntM, FWriteIntW, // integer register write enable
output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory
output logic [`XLEN-1:0] FIntResM, // data to be written to integer register output logic [`XLEN-1:0] FIntResM, // data to be written to integer register
output logic FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage) output logic FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage)
output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction
output logic [4:0] SetFflagsM // FMA flags (to privileged unit) output logic [4:0] SetFflagsM // FMA flags (to privileged unit)
); );
//*** make everything FLEN at some point //*** make everything FLEN at some point
@ -59,338 +59,257 @@ module fpu (
generate if (`F_SUPPORTED | `D_SUPPORTED) begin : fpu generate if (`F_SUPPORTED | `D_SUPPORTED) begin : fpu
// control signals // control signals
logic FRegWriteD, FRegWriteE, FRegWriteW; // FP register write enable logic FRegWriteD, FRegWriteE, FRegWriteW; // FP register write enable
logic [2:0] FrmD, FrmE, FrmM; // FP rounding mode logic [2:0] FrmD, FrmE, FrmM; // FP rounding mode
logic FmtD, FmtE, FmtM, FmtW; // FP precision 0-single 1-double logic FmtD, FmtE, FmtM, FmtW; // FP precision 0-single 1-double
logic FDivStartD, FDivStartE; // Start division or squareroot logic FDivStartD, FDivStartE; // Start division or squareroot
logic FWriteIntD; // Write to integer register logic FWriteIntD; // Write to integer register
logic [1:0] FForwardXE, FForwardYE, FForwardZE; // forwarding mux control signals logic [1:0] FForwardXE, FForwardYE, FForwardZE; // forwarding mux control signals
logic [1:0] FResultSelD, FResultSelE, FResultSelM, FResultSelW; // Select the result written to FP register logic [1:0] FResultSelD, FResultSelE, FResultSelM, FResultSelW; // Select the result written to FP register
logic [2:0] FOpCtrlD, FOpCtrlE, FOpCtrlM; // Select which opperation to do in each component logic [2:0] FOpCtrlD, FOpCtrlE, FOpCtrlM; // Select which opperation to do in each component
logic [2:0] FResSelD, FResSelE, FResSelM; // Select one of the results that finish in the memory stage logic [2:0] FResSelD, FResSelE, FResSelM; // Select one of the results that finish in the memory stage
logic [1:0] FIntResSelD, FIntResSelE, FIntResSelM; // Select the result written to the integer resister logic [1:0] FIntResSelD, FIntResSelE, FIntResSelM; // Select the result written to the integer resister
logic [4:0] Adr1E, Adr2E, Adr3E; // adresses of each input logic [4:0] Adr1E, Adr2E, Adr3E; // adresses of each input
// regfile signals
logic [63:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage
logic [63:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage
logic [63:0] FSrcXE, FSrcXM; // Input 1 to the various units (after forwarding)
logic [63:0] FPreSrcYE, FSrcYE; // Input 2 to the various units (after forwarding)
logic [63:0] FPreSrcZE, FSrcZE; // Input 3 to the various units (after forwarding)
// unpacking signals
logic XSgnE, YSgnE, ZSgnE; // input's sign - execute stage
logic XSgnM, YSgnM; // input's sign - memory stage
logic [10:0] XExpE, YExpE, ZExpE; // input's exponent - execute stage
logic [10:0] XExpM, YExpM, ZExpM; // input's exponent - memory stage
logic [52:0] XManE, YManE, ZManE; // input's fraction - execute stage
logic [52:0] XManM, YManM, ZManM; // input's fraction - memory stage
logic [10:0] BiasE; // bias based on precision (single=7f double=3ff - max expoent/2)
logic XNaNE, YNaNE, ZNaNE; // is the input a NaN - execute stage
logic XNaNM, YNaNM, ZNaNM; // is the input a NaN - memory stage
logic XSNaNE, YSNaNE, ZSNaNE; // is the input a signaling NaN - execute stage
logic XSNaNM, YSNaNM, ZSNaNM; // is the input a signaling NaN - memory stage
logic XDenormE, YDenormE, ZDenormE; // is the input denormalized
logic XZeroE, YZeroE, ZZeroE; // is the input zero - execute stage
logic XZeroM, YZeroM, ZZeroM; // is the input zero - memory stage
logic XInfE, YInfE, ZInfE; // is the input infinity - execute stage
logic XInfM, YInfM, ZInfM; // is the input infinity - memory stage
logic XExpMaxE; // is the exponent all ones (max value)
logic XNormE; // is normal
// result and flag signals
logic [63:0] FDivResM, FDivResW; // divide/squareroot result
logic [4:0] FDivFlgM, FDivFlgW; // divide/squareroot flags
logic [63:0] FMAResM, FMAResW; // FMA/multiply result
logic [4:0] FMAFlgM, FMAFlgW; // FMA/multiply result
logic [63:0] ReadResW; // read result (load instruction)
logic [63:0] CvtFpResE, CvtFpResM, CvtFpResW; // add/FP -> FP convert result
logic [4:0] CvtFpFlgE, CvtFpFlgM, CvtFpFlgW; // add/FP -> FP convert flags
logic [63:0] CvtResE, CvtResM; // FP <-> int convert result
logic [4:0] CvtFlgE, CvtFlgM; // FP <-> int convert flags //*** trim this
logic [63:0] ClassResE, ClassResM; // classify result
logic [63:0] CmpResE, CmpResM; // compare result
logic CmpNVE, CmpNVM; // compare invalid flag (Not Valid)
logic [63:0] SgnResE, SgnResM; // sign injection result
logic SgnNVE, SgnNVM; // sign injection invalid flag (Not Valid)
logic [63:0] FResE, FResM, FResW; // selected result that is ready in the memory stage
logic [4:0] FFlgE, FFlgM; // selected flag that is ready in the memory stage
logic [`XLEN-1:0] FIntResE;
logic [63:0] FPUResultW; // final FP result being written to the FP register
// other signals
logic FDivSqrtDoneE; // is divide done
logic [63:0] DivInput1E, DivInput2E; // inputs to divide/squareroot unit
logic FDivClk; // clock for divide/squareroot unit
logic [63:0] AlignedSrcAE; // align SrcA to the floating point format
// DECODE STAGE
// calculate FP control signals
fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .FRM_REGW,
.IllegalFPUInstrD, .FRegWriteD, .FDivStartD, .FResultSelD, .FOpCtrlD, .FResSelD,
.FIntResSelD, .FmtD, .FrmD, .FWriteIntD);
// regfile signals // FP register file
logic [63:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage // - can read 3 registers and write 1 register every cycle
logic [63:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage fregfile fregfile (.clk, .reset, .we4(FRegWriteW),
logic [63:0] FSrcXE, FSrcXM; // Input 1 to the various units (after forwarding) .a1(InstrD[19:15]), .a2(InstrD[24:20]), .a3(InstrD[31:27]), .a4(RdW),
logic [63:0] FPreSrcYE, FSrcYE; // Input 2 to the various units (after forwarding) .wd4(FPUResultW),
logic [63:0] FPreSrcZE, FSrcZE; // Input 3 to the various units (after forwarding) .rd1(FRD1D), .rd2(FRD2D), .rd3(FRD3D));
// unpacking signals // D/E pipeline registers
logic XSgnE, YSgnE, ZSgnE; // input's sign - execute stage flopenrc #(64) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E);
logic XSgnM, YSgnM; // input's sign - memory stage flopenrc #(64) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E);
logic [10:0] XExpE, YExpE, ZExpE; // input's exponent - execute stage flopenrc #(64) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E);
logic [10:0] XExpM, YExpM, ZExpM; // input's exponent - memory stage flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]},
logic [52:0] XManE, YManE, ZManE; // input's fraction - execute stage {Adr1E, Adr2E, Adr3E});
logic [52:0] XManM, YManM, ZManM; // input's fraction - memory stage flopenrc #(17) DECtrlReg3(clk, reset, FlushE, ~StallE,
logic [10:0] BiasE; // bias based on precision (single=7f double=3ff - max expoent/2) {FRegWriteD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD, FDivStartD},
logic XNaNE, YNaNE, ZNaNE; // is the input a NaN - execute stage {FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE, FDivStartE});
logic XNaNM, YNaNM, ZNaNM; // is the input a NaN - memory stage
logic XSNaNE, YSNaNE, ZSNaNE; // is the input a signaling NaN - execute stage // EXECUTION STAGE
logic XSNaNM, YSNaNM, ZSNaNM; // is the input a signaling NaN - memory stage // Hazard unit for FPU
logic XDenormE, YDenormE, ZDenormE; // is the input denormalized // - determines if any forwarding or stalls are needed
logic XZeroE, YZeroE, ZZeroE; // is the input zero - execute stage fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FRegWriteM, .FRegWriteW, .RdM, .RdW, .FResultSelM,
logic XZeroM, YZeroM, ZZeroM; // is the input zero - memory stage .FStallD, .FForwardXE, .FForwardYE, .FForwardZE);
logic XInfE, YInfE, ZInfE; // is the input infinity - execute stage
logic XInfM, YInfM, ZInfM; // is the input infinity - memory stage // forwarding muxs
logic XExpMaxE; // is the exponent all ones (max value) mux3 #(64) fxemux(FRD1E, FPUResultW, FResM, FForwardXE, FSrcXE);
logic XNormE; // is normal mux3 #(64) fyemux(FRD2E, FPUResultW, FResM, FForwardYE, FPreSrcYE);
mux3 #(64) fzemux(FRD3E, FPUResultW, FResM, FForwardZE, FPreSrcZE);
mux3 #(64) fyaddmux(FPreSrcYE, {{32{1'b1}}, 2'b0, {7{1'b1}}, 23'b0},
// result and flag signals {2'b0, {10{1'b1}}, 52'b0},
logic [63:0] FDivResM, FDivResW; // divide/squareroot result {FmtE&FOpCtrlE[2]&FOpCtrlE[1]&(FResultSelE==3'b01), ~FmtE&FOpCtrlE[2]&FOpCtrlE[1]&(FResultSelE==3'b01)},
logic [4:0] FDivFlgM, FDivFlgW; // divide/squareroot flags FSrcYE); // Force Z to be 0 for multiply instructions
// Force Z to be 0 for multiply instructions
mux3 #(64) fzmulmux(FPreSrcZE, 64'b0, FPreSrcYE, {FOpCtrlE[2]&FOpCtrlE[1], FOpCtrlE[2]&~FOpCtrlE[1]}, FSrcZE);
// unpacking unit
// - splits FP inputs into their various parts
// - does some classifications (SNaN, NaN, Denorm, Norm, Zero, Infifnity)
unpacking unpacking(.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), .FOpCtrlE, .FmtE,
.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE,
.XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .YDenormE, .ZDenormE,
.XZeroE, .YZeroE, .ZZeroE, .BiasE, .XInfE, .YInfE, .ZInfE, .XExpMaxE, .XNormE);
// FMA
// - two stage FMA
// - execute stage - multiplication and addend shifting
// - memory stage - addition and rounding
// - handles FMA and multiply instructions
fma fma (.clk, .reset, .FlushM, .StallM,
.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE,
.XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .BiasE,
.XSgnM, .YSgnM, .XExpM, .YExpM, .ZExpM, .XManM, .YManM, .ZManM,
.XNaNM, .YNaNM, .ZNaNM, .XZeroM, .YZeroM, .ZZeroM,
.XInfM, .YInfM, .ZInfM, .XSNaNM, .YSNaNM, .ZSNaNM,
.FOpCtrlE,
.FmtE, .FmtM, .FrmM,
.FMAFlgM, .FMAResM);
// clock gater
// - creates a clock that only runs durring divide/sqrt instructions
// - using the seperate clock gives the divide/sqrt unit some to get set up
// *** the module says not to use in synthisis
clockgater fpdivclkg(.E(FDivStartE),
.SE(1'b0),
.CLK(clk),
.ECLK(FDivClk));
// capture the inputs for divide/sqrt
// - if not captured any forwarded inputs will change durring computation
// - this problem is caused by stalling the execute stage
// - the other units don't have this problem, only div/sqrt stalls the execute stage
flopenrc #(64) reg_input1 (.d({XSgnE, XExpE, XManE[51:0]}), .q(DivInput1E),
.en(1'b1), .clear(FDivSqrtDoneE),
.reset(reset), .clk(FDivBusyE));
flopenrc #(64) reg_input2 (.d({YSgnE, YExpE, YManE[51:0]}), .q(DivInput2E),
.en(1'b1), .clear(FDivSqrtDoneE),
.reset(reset), .clk(FDivBusyE));
flopenrc #(6) reg_input3 (.d({XNaNE, YNaNE, XInfE, YInfE, XZeroE, YZeroE}),
.q({XNaNQ, YNaNQ, XInfQ, YInfQ, XZeroQ, YZeroQ}),
.en(1'b1), .clear(FDivSqrtDoneE),
.reset(reset), .clk(FDivBusyE));
// fpdivsqrt using Goldschmidt's iteration
fpdiv fdivsqrt (.op1(DivInput1E), .op2(DivInput2E), .rm(FrmE[1:0]), .op_type(FOpCtrlE[0]),
.reset, .clk(FDivClk), .start(FDivStartE), .P(~FmtE), .OvEn(1'b1), .UnEn(1'b1),
.XNaNQ, .YNaNQ, .XInfQ, .YInfQ, .XZeroQ, .YZeroQ,
.FDivBusyE, .done(FDivSqrtDoneE), .AS_Result(FDivResM), .Flags(FDivFlgM));
// convert from signle to double and vice versa
cvtfp cvtfp (.XExpE, .XManE, .XSgnE, .XZeroE, .XDenormE, .XInfE, .XNaNE, .XSNaNE, .FrmE, .FmtE, .CvtFpResE, .CvtFpFlgE);
// compare unit
// - computation is done in one stage
// - writes to FP file durring min/max instructions
// - other comparisons write a 1 or 0 to the integer register
fcmp fcmp (.op1({XSgnE,XExpE,XManE[`NF-1:0]}), .op2({YSgnE,YExpE,YManE[`NF-1:0]}),
.FSrcXE, .FSrcYE, .FOpCtrlE,
.FmtE, .XNaNE, .YNaNE, .XZeroE, .YZeroE,
.Invalid(CmpNVE), .CmpResE);
// sign injection unit
// - computation is done in one stage
fsgn fsgn (.SgnOpCodeE(FOpCtrlE[1:0]), .XSgnE, .YSgnE, .FSrcXE, .FmtE, .XExpMaxE,
.SgnNVE, .SgnResE);
// classify
// - computation is done in one stage
// - most of the work is done in the unpacking unit
// - result is written to the integer register
fclassify fclassify (.XSgnE, .XDenormE, .XZeroE, .XNaNE, .XInfE, .XNormE,
.XSNaNE, .ClassResE);
fcvt fcvt (.XSgnE, .XExpE, .XManE, .XZeroE, .XNaNE, .XInfE, .XDenormE, .BiasE, .SrcAE, .FOpCtrlE, .FmtE, .FrmE,
.CvtResE, .CvtFlgE);
// data to be stored in memory - to IEU
// - FP uses NaN-blocking format
// - if there are any unsused bits the most significant bits are filled with 1s
assign FWriteDataE = FSrcYE[`XLEN-1:0];
// Align SrcA to MSB when single precicion
mux2 #(64) SrcAMux({{32{1'b1}}, SrcAE[31:0]}, {{64-`XLEN{1'b1}}, SrcAE}, FmtE, AlignedSrcAE);
// select a result that may be written to the FP register
mux5 #(64) FResMux(AlignedSrcAE, SgnResE, CmpResE, CvtResE, CvtFpResE, FResSelE, FResE);
mux5 #(5) FFlgMux(5'b0, {4'b0, SgnNVE}, {4'b0, CmpNVE}, CvtFlgE, CvtFpFlgE, FResSelE, FFlgE);
// select the result that may be written to the integer register - to IEU
mux4 #(`XLEN) IntResMux(CmpResE[`XLEN-1:0], FSrcXE[`XLEN-1:0], ClassResE[`XLEN-1:0],
CvtResE[`XLEN-1:0], FIntResSelE, FIntResE);
// E/M pipe registers
// flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, FSrcXE, FSrcXM);
flopenrc #(65) EMFpReg2(clk, reset, FlushM, ~StallM, {XSgnE,XExpE,XManE}, {XSgnM,XExpM,XManM});
flopenrc #(65) EMFpReg3(clk, reset, FlushM, ~StallM, {YSgnE,YExpE,YManE}, {YSgnM,YExpM,YManM});
flopenrc #(64) EMFpReg4(clk, reset, FlushM, ~StallM, {ZExpE,ZManE}, {ZExpM,ZManM});
flopenrc #(12) EMFpReg5(clk, reset, FlushM, ~StallM,
{XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE},
{XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM});
flopenrc #(64) EMRegCmpRes(clk, reset, FlushM, ~StallM, FResE, FResM);
flopenrc #(5) EMRegCmpFlg(clk, reset, FlushM, ~StallM, FFlgE, FFlgM);
flopenrc #(`XLEN) EMRegSgnRes(clk, reset, FlushM, ~StallM, FIntResE, FIntResM);
flopenrc #(11) EMCtrlReg(clk, reset, FlushM, ~StallM,
{FRegWriteE, FResultSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE},
{FRegWriteM, FResultSelM, FrmM, FmtM, FOpCtrlM, FWriteIntM});
// BEGIN MEMORY STAGE
// FPU flag selection - to privileged
mux4 #(5) FPUFlgMux(5'b0, FMAFlgM, FDivFlgM, FFlgM, FResultSelW, SetFflagsM);
logic [63:0] FMAResM, FMAResW; // FMA/multiply result // M/W pipe registers
logic [4:0] FMAFlgM, FMAFlgW; // FMA/multiply result flopenrc #(64) MWRegFma(clk, reset, FlushW, ~StallW, FMAResM, FMAResW);
flopenrc #(64) MWRegDiv(clk, reset, FlushW, ~StallW, FDivResM, FDivResW);
logic [63:0] ReadResW; // read result (load instruction) flopenrc #(64) MWRegAdd(clk, reset, FlushW, ~StallW, CvtFpResM, CvtFpResW);
flopenrc #(64) MWRegClass(clk, reset, FlushW, ~StallW, FResM, FResW);
flopenrc #(5) MWCtrlReg(clk, reset, FlushW, ~StallW,
{FRegWriteM, FResultSelM, FmtM, FWriteIntM},
{FRegWriteW, FResultSelW, FmtW, FWriteIntW});
// BEGIN WRITEBACK STAGE
// put ReadData into NaN-blocking format
// - if there are any unsused bits the most significant bits are filled with 1s
// - for load instruction
mux2 #(64) ReadResMux({{32{1'b1}}, ReadDataW[31:0]}, {{64-`XLEN{1'b1}}, ReadDataW}, FmtW, ReadResW);
// select the result to be written to the FP register
mux4 #(64) FPUResultMux(ReadResW, FMAResW, FDivResW, FResW, FResultSelW, FPUResultW);
logic [63:0] CvtFpResE, CvtFpResM, CvtFpResW; // add/FP -> FP convert result
logic [4:0] CvtFpFlgE, CvtFpFlgM, CvtFpFlgW; // add/FP -> FP convert flags
logic [63:0] CvtResE, CvtResM; // FP <-> int convert result
logic [4:0] CvtFlgE, CvtFlgM; // FP <-> int convert flags //*** trim this
logic [63:0] ClassResE, ClassResM; // classify result
logic [63:0] CmpResE, CmpResM; // compare result
logic CmpNVE, CmpNVM; // compare invalid flag (Not Valid)
logic [63:0] SgnResE, SgnResM; // sign injection result
logic SgnNVE, SgnNVM; // sign injection invalid flag (Not Valid)
logic [63:0] FResE, FResM, FResW; // selected result that is ready in the memory stage
logic [4:0] FFlgE, FFlgM; // selected flag that is ready in the memory stage
logic [`XLEN-1:0] FIntResE;
logic [63:0] FPUResultW; // final FP result being written to the FP register
// other signals
logic FDivSqrtDoneE; // is divide done
logic [63:0] DivInput1E, DivInput2E; // inputs to divide/squareroot unit
logic FDivClk; // clock for divide/squareroot unit
logic [63:0] AlignedSrcAE; // align SrcA to the floating point format
////////////////////////////////////////////////////////////////////////////////////////
//DECODE STAGE
////////////////////////////////////////////////////////////////////////////////////////
// calculate FP control signals
fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .FRM_REGW,
// outputs:
.IllegalFPUInstrD, .FRegWriteD, .FDivStartD, .FResultSelD, .FOpCtrlD, .FResSelD,
.FIntResSelD, .FmtD, .FrmD, .FWriteIntD);
// FP register file
// - can read 3 registers and write 1 register every cycle
fregfile fregfile (.clk, .reset, .we4(FRegWriteW),
.a1(InstrD[19:15]), .a2(InstrD[24:20]), .a3(InstrD[31:27]), .a4(RdW),
.wd4(FPUResultW),
// outputs:
.rd1(FRD1D), .rd2(FRD2D), .rd3(FRD3D));
////////////////////////////////////////////////////////////////////////////////////////
// D/E pipeline registers
////////////////////////////////////////////////////////////////////////////////////////
flopenrc #(64) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E);
flopenrc #(64) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E);
flopenrc #(64) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E);
flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]},
{Adr1E, Adr2E, Adr3E});
flopenrc #(17) DECtrlReg3(clk, reset, FlushE, ~StallE,
{FRegWriteD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD, FDivStartD},
{FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE, FDivStartE});
////////////////////////////////////////////////////////////////////////////////////////
//EXECUTION STAGE
////////////////////////////////////////////////////////////////////////////////////////
// Hazard unit for FPU
// - determines if any forwarding or stalls are needed
fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FRegWriteM, .FRegWriteW, .RdM, .RdW, .FResultSelM,
// outputs:
.FStallD, .FForwardXE, .FForwardYE, .FForwardZE);
// forwarding muxs
mux3 #(64) fxemux(FRD1E, FPUResultW, FResM, FForwardXE, FSrcXE);
mux3 #(64) fyemux(FRD2E, FPUResultW, FResM, FForwardYE, FPreSrcYE);
mux3 #(64) fzemux(FRD3E, FPUResultW, FResM, FForwardZE, FPreSrcZE);
mux3 #(64) fyaddmux(FPreSrcYE, {{32{1'b1}}, 2'b0, {7{1'b1}}, 23'b0}, {2'b0, {10{1'b1}}, 52'b0}, {FmtE&FOpCtrlE[2]&FOpCtrlE[1]&(FResultSelE==3'b01), ~FmtE&FOpCtrlE[2]&FOpCtrlE[1]&(FResultSelE==3'b01)}, FSrcYE); // Force Z to be 0 for multiply instructions
mux3 #(64) fzmulmux(FPreSrcZE, 64'b0, FPreSrcYE, {FOpCtrlE[2]&FOpCtrlE[1], FOpCtrlE[2]&~FOpCtrlE[1]}, FSrcZE); // Force Z to be 0 for multiply instructions
// unpacking unit
// - splits FP inputs into their various parts
// - does some classifications (SNaN, NaN, Denorm, Norm, Zero, Infifnity)
unpacking unpacking(.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), .FOpCtrlE, .FmtE,
// outputs:
.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE,
.XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .YDenormE, .ZDenormE,
.XZeroE, .YZeroE, .ZZeroE, .BiasE, .XInfE, .YInfE, .ZInfE, .XExpMaxE, .XNormE);
// FMA
// - two stage FMA
// - execute stage - multiplication and addend shifting
// - memory stage - addition and rounding
// - handles FMA and multiply instructions
// - contains some E/M pipleine registers
// *** currently handles FLEN and 32 bits(dont know if 32 works with 128 - easy to fix) - change to handle only the supported formats
fma fma (.clk, .reset, .FlushM, .StallM,
.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE,
.XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .BiasE,
.XSgnM, .YSgnM, .XExpM, .YExpM, .ZExpM, .XManM, .YManM, .ZManM,
.XNaNM, .YNaNM, .ZNaNM, .XZeroM, .YZeroM, .ZZeroM,
.XInfM, .YInfM, .ZInfM, .XSNaNM, .YSNaNM, .ZSNaNM,
.FOpCtrlE,
.FmtE, .FmtM, .FrmM,
// outputs:
.FMAFlgM, .FMAResM);
// clock gater
// - creates a clock that only runs durring divide/sqrt instructions
// - using the seperate clock gives the divide/sqrt unit some to get set up
// *** the module says not to use in synthisis
clockgater fpdivclkg(.E(FDivStartE),
.SE(1'b0),
.CLK(clk),
.ECLK(FDivClk));
// capture the inputs for divide/sqrt
// - if not captured any forwarded inputs will change durring computation
// - this problem is caused by stalling the execute stage
// - the other units don't have this problem, only div/sqrt stalls the execute stage
flopenrc #(64) reg_input1 (.d({XSgnE, XExpE, XManE[51:0]}), .q(DivInput1E),
.en(1'b1), .clear(FDivSqrtDoneE),
.reset(reset), .clk(FDivBusyE));
flopenrc #(64) reg_input2 (.d({YSgnE, YExpE, YManE[51:0]}), .q(DivInput2E),
.en(1'b1), .clear(FDivSqrtDoneE),
.reset(reset), .clk(FDivBusyE));
// output for store instructions
//*** change to use the unpacking unit if possible
fpdiv fdivsqrt (.op1(DivInput1E), .op2(DivInput2E), .rm(FrmE[1:0]), .op_type(FOpCtrlE[0]),
.reset, .clk(FDivClk), .start(FDivStartE), .P(~FmtE), .OvEn(1'b1), .UnEn(1'b1),
// outputs:
.FDivBusyE, .done(FDivSqrtDoneE), .AS_Result(FDivResM), .Flags(FDivFlgM));
// convert from signle to double and vice versa
cvtfp cvtfp (.XExpE, .XManE, .XSgnE, .XZeroE, .XDenormE, .XInfE, .XNaNE, .XSNaNE, .FrmE, .FmtE, .CvtFpResE, .CvtFpFlgE);
// compare unit
// - computation is done in one stage
// - writes to FP file durring min/max instructions
// - other comparisons write a 1 or 0 to the integer register
fcmp fcmp (.op1({XSgnE,XExpE,XManE[`NF-1:0]}), .op2({YSgnE,YExpE,YManE[`NF-1:0]}),
.FSrcXE, .FSrcYE, .FOpCtrlE,
.FmtE, .XNaNE, .YNaNE, .XZeroE, .YZeroE,
// outputs:
.Invalid(CmpNVE), .CmpResE);
// sign injection unit
// - computation is done in one stage
fsgn fsgn (.SgnOpCodeE(FOpCtrlE[1:0]), .XSgnE, .YSgnE, .FSrcXE, .FmtE, .XExpMaxE,
// outputs:
.SgnNVE, .SgnResE);
// classify
// - computation is done in one stage
// - most of the work is done in the unpacking unit
// - result is written to the integer register
fclassify fclassify (.XSgnE, .XDenormE, .XZeroE, .XNaNE, .XInfE, .XNormE,
// outputs:
.XSNaNE, .ClassResE);
fcvt fcvt (.XSgnE, .XExpE, .XManE, .XZeroE, .XNaNE, .XInfE, .XDenormE, .BiasE, .SrcAE, .FOpCtrlE, .FmtE, .FrmE,
// outputs:
.CvtResE, .CvtFlgE);
// data to be stored in memory - to IEU
// - FP uses NaN-blocking format
// - if there are any unsused bits the most significant bits are filled with 1s
assign FWriteDataE = FSrcYE[`XLEN-1:0];
// Align SrcA to MSB when single precicion
mux2 #(64) SrcAMux({{32{1'b1}}, SrcAE[31:0]}, {{64-`XLEN{1'b1}}, SrcAE}, FmtE, AlignedSrcAE);
// select a result that may be written to the FP register
mux5 #(64) FResMux(AlignedSrcAE, SgnResE, CmpResE, CvtResE, CvtFpResE, FResSelE, FResE);
mux5 #(5) FFlgMux(5'b0, {4'b0, SgnNVE}, {4'b0, CmpNVE}, CvtFlgE, CvtFpFlgE, FResSelE, FFlgE);
// select the result that may be written to the integer register - to IEU
mux4 #(`XLEN) IntResMux(CmpResE[`XLEN-1:0], FSrcXE[`XLEN-1:0], ClassResE[`XLEN-1:0], CvtResE[`XLEN-1:0], FIntResSelE, FIntResE);
//***will synth remove registers of values that are always zero?
////////////////////////////////////////////////////////////////////////////////////////
// E/M pipe registers
////////////////////////////////////////////////////////////////////////////////////////
// flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, FSrcXE, FSrcXM);
flopenrc #(65) EMFpReg2(clk, reset, FlushM, ~StallM, {XSgnE,XExpE,XManE}, {XSgnM,XExpM,XManM});
flopenrc #(65) EMFpReg3(clk, reset, FlushM, ~StallM, {YSgnE,YExpE,YManE}, {YSgnM,YExpM,YManM});
flopenrc #(64) EMFpReg4(clk, reset, FlushM, ~StallM, {ZExpE,ZManE}, {ZExpM,ZManM});
flopenrc #(12) EMFpReg5(clk, reset, FlushM, ~StallM,
{XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE},
{XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM});
flopenrc #(64) EMRegCmpRes(clk, reset, FlushM, ~StallM, FResE, FResM);
flopenrc #(5) EMRegCmpFlg(clk, reset, FlushM, ~StallM, FFlgE, FFlgM);
flopenrc #(`XLEN) EMRegSgnRes(clk, reset, FlushM, ~StallM, FIntResE, FIntResM);
// flopenrc #(1) EMRegSgnFlg(clk, reset, FlushM, ~StallM, SgnNVE, SgnNVM);
//flopenrc #(64) EMRegCvtFpRes(clk, reset, FlushM, ~StallM, CvtFpResE, CvtFpResM);
//flopenrc #(5) EMRegCvtFpFlg(clk, reset, FlushM, ~StallM, CvtFpFlgE, CvtFpFlgM);
// flopenrc #(64) EMRegCvtRes(clk, reset, FlushM, ~StallM, CvtResE, CvtResM);
// flopenrc #(5) EMRegCvtFlg(clk, reset, FlushM, ~StallM, CvtFlgE, CvtFlgM);
// flopenrc #(64) EMRegClass(clk, reset, FlushM, ~StallM, ClassResE, ClassResM);
flopenrc #(11) EMCtrlReg(clk, reset, FlushM, ~StallM,
{FRegWriteE, FResultSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE},
{FRegWriteM, FResultSelM, FrmM, FmtM, FOpCtrlM, FWriteIntM});
////////////////////////////////////////////////////////////////////////////////////////
//BEGIN MEMORY STAGE
////////////////////////////////////////////////////////////////////////////////////////
// FPU flag selection - to privileged
mux4 #(5) FPUFlgMux(5'b0, FMAFlgM, FDivFlgM, FFlgM, FResultSelW, SetFflagsM);
////////////////////////////////////////////////////////////////////////////////////////
// M/W pipe registers
////////////////////////////////////////////////////////////////////////////////////////
flopenrc #(64) MWRegFma(clk, reset, FlushW, ~StallW, FMAResM, FMAResW);
flopenrc #(64) MWRegDiv(clk, reset, FlushW, ~StallW, FDivResM, FDivResW);
flopenrc #(64) MWRegAdd(clk, reset, FlushW, ~StallW, CvtFpResM, CvtFpResW);
flopenrc #(64) MWRegClass(clk, reset, FlushW, ~StallW, FResM, FResW);
flopenrc #(5) MWCtrlReg(clk, reset, FlushW, ~StallW,
{FRegWriteM, FResultSelM, FmtM, FWriteIntM},
{FRegWriteW, FResultSelW, FmtW, FWriteIntW});
////////////////////////////////////////////////////////////////////////////////////////
// BEGIN WRITEBACK STAGE
////////////////////////////////////////////////////////////////////////////////////////
// put ReadData into NaN-blocking format
// - if there are any unsused bits the most significant bits are filled with 1s
// - for load instruction
mux2 #(64) ReadResMux({{32{1'b1}}, ReadDataW[31:0]}, {{64-`XLEN{1'b1}}, ReadDataW}, FmtW, ReadResW);
// select the result to be written to the FP register
mux4 #(64) FPUResultMux(ReadResW, FMAResW, FDivResW, FResW, FResultSelW, FPUResultW);
end else begin // no F_SUPPORTED or D_SUPPORTED; tie outputs low end else begin // no F_SUPPORTED or D_SUPPORTED; tie outputs low
assign FStallD = 0; assign FStallD = 0;
assign FWriteIntE = 0; assign FWriteIntE = 0;
assign FWriteIntM = 0; assign FWriteIntM = 0;
assign FWriteIntW = 0; assign FWriteIntW = 0;
assign FWriteDataE = 0; assign FWriteDataE = 0;
assign FIntResM = 0; assign FIntResM = 0;
assign FDivBusyE = 0; assign FDivBusyE = 0;
assign IllegalFPUInstrD = 1; assign IllegalFPUInstrD = 1;
assign SetFflagsM = 0; assign SetFflagsM = 0;
end end
endgenerate endgenerate

View File

@ -1,10 +1,9 @@
/////////////////////////////////////////// ///////////////////////////////////////////
// regfile.sv
// //
// Written: David_Harris@hmc.edu 9 January 2021 // Written: David_Harris@hmc.edu 9 January 2021
// Modified: // Modified: James Stine
// //
// Purpose: 4-port register file // Purpose: 3-port output register file
// //
// A component of the Wally configurable RISC-V project. // A component of the Wally configurable RISC-V project.
// //
@ -26,22 +25,20 @@
`include "wally-config.vh" `include "wally-config.vh"
module fregfile ( module fregfile (
input logic clk, reset, input logic clk, reset,
input logic we4, input logic we4,
input logic [ 4:0] a1, a2, a3, a4, input logic [4:0] a1, a2, a3, a4,
input logic [63:0] wd4, input logic [63:0] wd4,
output logic [63:0] rd1, rd2, rd3); output logic [63:0] rd1, rd2, rd3);
logic [63:0] rf[31:0]; logic [63:0] rf[31:0];
integer i; integer i;
// three ported register file // three ported register file
// read three ports combinationally (A1/RD1, A2/RD2, A3/RD3) // read three ports combinationally (A1/RD1, A2/RD2, A3/RD3)
// write fourth port on rising edge of clock (A4/WD4/WE4) // write fourth port on rising edge of clock (A4/WD4/WE4)
// write occurs on falling edge of clock // write occurs on falling edge of clock
// reset is intended for simulation only, not synthesis
always_ff @(negedge clk or posedge reset) always_ff @(negedge clk or posedge reset)
if (reset) for(i=0; i<32; i++) rf[i] <= 0; if (reset) for(i=0; i<32; i++) rf[i] <= 0;
else if (we4) rf[a4] <= wd4; else if (we4) rf[a4] <= wd4;

View File

@ -1,49 +1,63 @@
module fsm ( ///////////////////////////////////////////
//
// Written: James Stine
// Modified: 9/28/2021
//
// Purpose: FSM for floating point divider/square root unit (Goldschmidt)
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
input logic clk, module fsm (
input logic reset, input logic clk,
input logic start, input logic reset,
input logic op_type, input logic start,
output logic done, // End of cycles input logic op_type,
output logic load_rega, // enable for regA output logic done,
output logic load_regb, // enable for regB output logic load_rega,
output logic load_regc, // enable for regC output logic load_regb,
output logic load_regd, // enable for regD output logic load_regc,
output logic load_regr, // enable for rem output logic load_regd,
output logic load_regs, // enable for q,qm,qp output logic load_regr,
output logic [2:0] sel_muxa, // Select muxA output logic load_regs,
output logic [2:0] sel_muxb, // Select muxB output logic [2:0] sel_muxa,
output logic sel_muxr, // Select rem mux output logic [2:0] sel_muxb,
output logic divBusy // calculation is happening output logic sel_muxr,
output logic divBusy
); );
typedef enum logic [4:0] {S0, S1, S2, S3, S4, S5, S6, S7, S8, S9,
reg [4:0] CURRENT_STATE; S10, S11, S12, S13, S14, S15, S16, S17, S18, S19,
reg [4:0] NEXT_STATE; S20, S21, S22, S23, S24, S25, S26, S27, S28, S29,
S30} statetype;
parameter [4:0]
S0=5'd0, S1=5'd1, S2=5'd2, statetype current_state, next_state;
S3=5'd3, S4=5'd4, S5=5'd5,
S6=5'd6, S7=5'd7, S8=5'd8,
S9=5'd9, S10=5'd10,
S13=5'd13, S14=5'd14, S15=5'd15,
S16=5'd16, S17=5'd17, S18=5'd18,
S19=5'd19, S20=5'd20, S21=5'd21,
S22=5'd22, S23=5'd23, S24=5'd24,
S25=5'd25, S26=5'd26, S27=5'd27,
S28=5'd28, S29=5'd29, S30=5'd30;
always @(negedge clk) always @(negedge clk)
begin begin
if(reset==1'b1) if (reset == 1'b1)
CURRENT_STATE=S0; current_state = S0;
else else
CURRENT_STATE=NEXT_STATE; current_state = next_state;
end end
always @(*) always @(*)
begin begin
case(CURRENT_STATE) case(current_state)
S0: // iteration 0 S0: // iteration 0
begin begin
if (start==1'b0) if (start==1'b0)
@ -59,7 +73,7 @@ module fsm (
sel_muxa = 3'b000; sel_muxa = 3'b000;
sel_muxb = 3'b000; sel_muxb = 3'b000;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S0; next_state = S0;
end end
else if (start==1'b1 && op_type==1'b0) else if (start==1'b1 && op_type==1'b0)
begin begin
@ -74,7 +88,7 @@ module fsm (
sel_muxa = 3'b001; sel_muxa = 3'b001;
sel_muxb = 3'b001; sel_muxb = 3'b001;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S1; next_state = S1;
end // if (start==1'b1 && op_type==1'b0) end // if (start==1'b1 && op_type==1'b0)
else if (start==1'b1 && op_type==1'b1) else if (start==1'b1 && op_type==1'b1)
begin begin
@ -89,7 +103,7 @@ module fsm (
sel_muxa = 3'b010; sel_muxa = 3'b010;
sel_muxb = 3'b000; sel_muxb = 3'b000;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S13; next_state = S13;
end end
else else
begin begin
@ -104,7 +118,7 @@ module fsm (
sel_muxa = 3'b000; sel_muxa = 3'b000;
sel_muxb = 3'b000; sel_muxb = 3'b000;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S0; next_state = S0;
end end
end // case: S0 end // case: S0
S1: S1:
@ -120,7 +134,7 @@ module fsm (
sel_muxa = 3'b010; sel_muxa = 3'b010;
sel_muxb = 3'b000; sel_muxb = 3'b000;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S2; next_state = S2;
end end
S2: // iteration 1 S2: // iteration 1
begin begin
@ -135,7 +149,7 @@ module fsm (
sel_muxa = 3'b011; sel_muxa = 3'b011;
sel_muxb = 3'b011; sel_muxb = 3'b011;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S3; next_state = S3;
end end
S3: S3:
begin begin
@ -150,7 +164,7 @@ module fsm (
sel_muxa = 3'b000; sel_muxa = 3'b000;
sel_muxb = 3'b010; sel_muxb = 3'b010;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S4; next_state = S4;
end end
S4: // iteration 2 S4: // iteration 2
begin begin
@ -165,7 +179,7 @@ module fsm (
sel_muxa = 3'b011; sel_muxa = 3'b011;
sel_muxb = 3'b011; sel_muxb = 3'b011;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S5; next_state = S5;
end end
S5: S5:
begin begin
@ -180,7 +194,7 @@ module fsm (
sel_muxa = 3'b000; sel_muxa = 3'b000;
sel_muxb = 3'b010; sel_muxb = 3'b010;
sel_muxr = 1'b0; // add sel_muxr = 1'b0; // add
NEXT_STATE = S6; next_state = S6;
end end
S6: // iteration 3 S6: // iteration 3
begin begin
@ -195,7 +209,7 @@ module fsm (
sel_muxa = 3'b011; sel_muxa = 3'b011;
sel_muxb = 3'b011; sel_muxb = 3'b011;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S8; next_state = S8;
end end
S7: S7:
begin begin
@ -210,7 +224,7 @@ module fsm (
sel_muxa = 3'b000; sel_muxa = 3'b000;
sel_muxb = 3'b010; sel_muxb = 3'b010;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S8; next_state = S8;
end // case: S7 end // case: S7
S8: // q,qm,qp S8: // q,qm,qp
begin begin
@ -225,7 +239,7 @@ module fsm (
sel_muxa = 3'b000; sel_muxa = 3'b000;
sel_muxb = 3'b000; sel_muxb = 3'b000;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S9; next_state = S9;
end end
S9: // rem S9: // rem
begin begin
@ -240,7 +254,7 @@ module fsm (
sel_muxa = 3'b000; sel_muxa = 3'b000;
sel_muxb = 3'b000; sel_muxb = 3'b000;
sel_muxr = 1'b1; sel_muxr = 1'b1;
NEXT_STATE = S10; next_state = S10;
end end
S10: // done S10: // done
begin begin
@ -255,7 +269,7 @@ module fsm (
sel_muxa = 3'b000; sel_muxa = 3'b000;
sel_muxb = 3'b000; sel_muxb = 3'b000;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S0; next_state = S0;
end end
S13: // start of sqrt path S13: // start of sqrt path
begin begin
@ -270,7 +284,7 @@ module fsm (
sel_muxa = 3'b010; sel_muxa = 3'b010;
sel_muxb = 3'b001; sel_muxb = 3'b001;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S14; next_state = S14;
end end
S14: S14:
begin begin
@ -285,7 +299,7 @@ module fsm (
sel_muxa = 3'b001; sel_muxa = 3'b001;
sel_muxb = 3'b100; sel_muxb = 3'b100;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S15; next_state = S15;
end end
S15: // iteration 1 S15: // iteration 1
begin begin
@ -300,7 +314,7 @@ module fsm (
sel_muxa = 3'b011; sel_muxa = 3'b011;
sel_muxb = 3'b011; sel_muxb = 3'b011;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S16; next_state = S16;
end end
S16: S16:
begin begin
@ -315,7 +329,7 @@ module fsm (
sel_muxa = 3'b000; sel_muxa = 3'b000;
sel_muxb = 3'b011; sel_muxb = 3'b011;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S17; next_state = S17;
end end
S17: S17:
begin begin
@ -330,7 +344,7 @@ module fsm (
sel_muxa = 3'b100; sel_muxa = 3'b100;
sel_muxb = 3'b010; sel_muxb = 3'b010;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S18; next_state = S18;
end end
S18: // iteration 2 S18: // iteration 2
begin begin
@ -345,7 +359,7 @@ module fsm (
sel_muxa = 3'b011; sel_muxa = 3'b011;
sel_muxb = 3'b011; sel_muxb = 3'b011;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S19; next_state = S19;
end end
S19: S19:
begin begin
@ -360,7 +374,7 @@ module fsm (
sel_muxa = 3'b000; sel_muxa = 3'b000;
sel_muxb = 3'b011; sel_muxb = 3'b011;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S20; next_state = S20;
end end
S20: S20:
begin begin
@ -375,7 +389,7 @@ module fsm (
sel_muxa = 3'b100; sel_muxa = 3'b100;
sel_muxb = 3'b010; sel_muxb = 3'b010;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S21; next_state = S21;
end end
S21: // iteration 3 S21: // iteration 3
begin begin
@ -390,7 +404,7 @@ module fsm (
sel_muxa = 3'b011; sel_muxa = 3'b011;
sel_muxb = 3'b011; sel_muxb = 3'b011;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S22; next_state = S22;
end end
S22: S22:
begin begin
@ -405,7 +419,7 @@ module fsm (
sel_muxa = 3'b000; sel_muxa = 3'b000;
sel_muxb = 3'b011; sel_muxb = 3'b011;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S23; next_state = S23;
end end
S23: S23:
begin begin
@ -420,7 +434,7 @@ module fsm (
sel_muxa = 3'b100; sel_muxa = 3'b100;
sel_muxb = 3'b010; sel_muxb = 3'b010;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S24; next_state = S24;
end end
S24: // q,qm,qp S24: // q,qm,qp
begin begin
@ -435,7 +449,7 @@ module fsm (
sel_muxa = 3'b000; sel_muxa = 3'b000;
sel_muxb = 3'b000; sel_muxb = 3'b000;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S25; next_state = S25;
end end
S25: // rem S25: // rem
begin begin
@ -450,7 +464,7 @@ module fsm (
sel_muxa = 3'b011; sel_muxa = 3'b011;
sel_muxb = 3'b110; sel_muxb = 3'b110;
sel_muxr = 1'b1; sel_muxr = 1'b1;
NEXT_STATE = S26; next_state = S26;
end end
S26: // done S26: // done
begin begin
@ -465,7 +479,7 @@ module fsm (
sel_muxa = 3'b000; sel_muxa = 3'b000;
sel_muxb = 3'b000; sel_muxb = 3'b000;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S0; next_state = S0;
end end
default: default:
begin begin
@ -480,9 +494,9 @@ module fsm (
sel_muxa = 3'b000; sel_muxa = 3'b000;
sel_muxb = 3'b000; sel_muxb = 3'b000;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S0; next_state = S0;
end end
endcase // case(CURRENT_STATE) endcase // case(current_state)
end // always @ (CURRENT_STATE or X) end // always @ (current_state or X)
endmodule // fsm endmodule // fsm

View File

@ -1,37 +1,55 @@
///////////////////////////////////////////
// //
// The rounder takes as inputs a 64-bit value to be rounded, A, the // Written: James Stine
// exponent of the value to be rounded, the sign of the final result, Sign, // Modified: 8/1/2018
// the precision of the results, P, and the two-bit rounding mode, rm.
// It produces a rounded 52-bit result, Z, the exponent of the rounded
// result, Z_exp, and a flag that indicates if the result was rounded,
// Inexact. The rounding mode has the following values.
// rm Mode
// 00 round-to-nearest-even
// 01 round-toward-zero
// 10 round-toward-plus infinity
// 11 round-toward-minus infinity
// //
// Purpose: Floating point divider/square root rounder unit (Goldschmidt)
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
module rounder_div ( module rounder_div (
input logic [1:0] rm, input logic [1:0] rm,
input logic P, input logic P,
input logic OvEn, input logic OvEn,
input logic UnEn, input logic UnEn,
input logic [12:0] exp_diff, input logic [12:0] exp_diff,
input logic [2:0] sel_inv, input logic [2:0] sel_inv,
input logic Invalid, input logic Invalid,
input logic SignR, input logic SignR,
input logic [63:0] Float1,
input logic [63:0] q1, input logic [63:0] Float2,
input logic [63:0] qm1, input logic XNaNQ,
input logic [63:0] qp1, input logic YNaNQ,
input logic [63:0] q0, input logic XZeroQ,
input logic [63:0] qm0, input logic YZeroQ,
input logic [63:0] qp0, input logic XInfQ,
input logic YInfQ,
input logic op_type,
input logic [63:0] q1,
input logic [63:0] qm1,
input logic [63:0] qp1,
input logic [63:0] q0,
input logic [63:0] qm0,
input logic [63:0] qp0,
input logic [127:0] regr_out, input logic [127:0] regr_out,
output logic [63:0] Result, output logic [63:0] Result,
output logic [4:0] Flags output logic [4:0] Flags
); );
logic Rsign; logic Rsign;
@ -56,11 +74,15 @@ module rounder_div (
logic Texp_l7z; logic Texp_l7z;
logic Texp_l7o; logic Texp_l7o;
logic OvCon; logic OvCon;
logic zero_rem; logic zero_rem;
logic [1:0] mux_mant; logic [1:0] mux_mant;
logic sign_rem; logic sign_rem;
logic [63:0] q, qm, qp; logic [63:0] q, qm, qp;
logic exp_ovf; logic exp_ovf;
logic [50:0] NaN_out;
logic NaN_Sign_out;
logic Sign_out;
// Remainder = 0? // Remainder = 0?
assign zero_rem = ~(|regr_out); assign zero_rem = ~(|regr_out);
@ -117,12 +139,11 @@ module rounder_div (
// the input was infinite or NaN or the output of the adder is zero. // the input was infinite or NaN or the output of the adder is zero.
// 00 = Valid // 00 = Valid
// 10 = NaN // 10 = NaN
assign Valid = (~sel_inv[2]&~sel_inv[1]&~sel_inv[0]); assign Valid = ~sel_inv[2]&~sel_inv[1]&~sel_inv[0];
assign NaN = ~sel_inv[1]& sel_inv[0]; assign NaN = sel_inv[2]&sel_inv[1]&sel_inv[0];
assign UnderFlow = (P & UnFlow_SP | UnFlow_DP) & Valid; assign UnderFlow = (P & UnFlow_SP | UnFlow_DP) & Valid;
assign OverFlow = (P & OvFlow_SP | OvFlow_DP) & Valid; assign OverFlow = (P & OvFlow_SP | OvFlow_DP) & Valid;
assign Div0 = sel_inv[2]&sel_inv[1]&~sel_inv[0]; assign Div0 = YZeroQ&~XZeroQ&~op_type&~NaN;
// The final result is Inexact if any rounding occurred ((i.e., R or S // The final result is Inexact if any rounding occurred ((i.e., R or S
// is one), or (if the result overflows ) or (if the result underflows and the // is one), or (if the result overflows ) or (if the result underflows and the
@ -161,18 +182,26 @@ module rounder_div (
// If the result is zero or infinity, the mantissa is all zeros. // If the result is zero or infinity, the mantissa is all zeros.
// If the result is NaN, the mantissa is 10...0 // If the result is NaN, the mantissa is 10...0
// If the result the largest floating point number, the mantissa // If the result the largest floating point number, the mantissa
// is all ones. Otherwise, the mantissa is not changed. // is all ones. Otherwise, the mantissa is not changed.
assign Rmant[51] = Largest | NaN | (Smant[51]&~Infinite&~Rzero); assign NaN_out = ~XNaNQ&YNaNQ ? Float2[50:0] : Float1[50:0];
assign Rmant[50:0] = {51{Largest}} | (Smant[50:0]&{51{~Infinite&Valid&~Rzero}}); assign NaN_Sign_out = ~XNaNQ&YNaNQ ? Float2[63] : Float1[63];
assign Sign_out = (XZeroQ&YZeroQ | XInfQ&YInfQ)&~op_type | Rsign&~XNaNQ&~YNaNQ |
NaN_Sign_out&(XNaNQ|YNaNQ);
// FIXME (jes) - Imperas gives sNaN a Sign=0 where x86 gives Sign=1
// | Float1[63]&op_type;
assign Rmant[51] = Largest | NaN | (Smant[51]&~Infinite&~Rzero);
assign Rmant[50:0] = ({51{Largest}} | (Smant[50:0]&{51{~Infinite&Valid&~Rzero}}) |
(NaN_out&{51{NaN}}))&({51{~(op_type&Float1[63]&~XZeroQ)}});
// For single precision, the 8 least significant bits of the exponent // For single precision, the 8 least significant bits of the exponent
// and 23 most significant bits of the mantissa contain bits used // and 23 most significant bits of the mantissa contain bits used
// for the final result. A double precision result is returned if // for the final result. A double precision result is returned if
// overflow has occurred, the overflow trap is enabled, and a conversion // overflow has occurred, the overflow trap is enabled, and a conversion
// is being performed. // is being performed.
assign OvCon = OverFlow & OvEn; assign OvCon = OverFlow & OvEn;
assign Result = (P&~OvCon) ? { {32{1'b1}}, Rsign, Rexp[7:0], Rmant[51:29]} assign Result = (P&~OvCon) ? { {32{1'b1}}, Sign_out, Rexp[7:0], Rmant[51:29]}
: {Rsign, Rexp, Rmant}; : {Sign_out, Rexp, Rmant};
endmodule // rounder endmodule // rounder

View File

@ -1,5 +1,30 @@
///////////////////////////////////////////
//
// Written: James Stine
// Modified: 8/1/2018
//
// Purpose: Bipartite Lookup
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
module sbtm_a0 (input logic [6:0] a, module sbtm_a0 (input logic [6:0] a,
output logic [12:0] y); output logic [12:0] y);
always_comb always_comb
case(a) case(a)
7'b0000000: y = 13'b1111111100010; 7'b0000000: y = 13'b1111111100010;
@ -137,4 +162,4 @@ endmodule // sbtm_a0

View File

@ -1,5 +1,30 @@
///////////////////////////////////////////
//
// Written: James Stine
// Modified: 8/1/2018
//
// Purpose: Bipartite Lookup
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
module sbtm_a1 (input logic [6:0] a, module sbtm_a1 (input logic [6:0] a,
output logic [4:0] y); output logic [4:0] y);
always_comb always_comb
case(a) case(a)
7'b0000000: y = 5'b11100; 7'b0000000: y = 5'b11100;
@ -137,4 +162,4 @@ endmodule // sbtm_a0

View File

@ -1,5 +1,30 @@
///////////////////////////////////////////
//
// Written: James Stine
// Modified: 8/1/2018
//
// Purpose: Bipartite Lookup
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
module sbtm_a2 (input logic [7:0] a, module sbtm_a2 (input logic [7:0] a,
output logic [13:0] y); output logic [13:0] y);
always_comb always_comb
case(a) case(a)
8'b01000000: y = 14'b10110100010111; 8'b01000000: y = 14'b10110100010111;
@ -201,4 +226,4 @@ endmodule // sbtm_a0

View File

@ -1,5 +1,30 @@
///////////////////////////////////////////
//
// Written: James Stine
// Modified: 8/1/2018
//
// Purpose: Bipartite Lookup
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
module sbtm_a3 (input logic [7:0] a, module sbtm_a3 (input logic [7:0] a,
output logic [5:0] y); output logic [5:0] y);
always_comb always_comb
case(a) case(a)
8'b01000000: y = 6'b100110; 8'b01000000: y = 6'b100110;

View File

@ -1,3 +1,27 @@
///////////////////////////////////////////
//
// Written: James Stine
// Modified: 8/1/2018
//
// Purpose: Bipartite Lookup for divide portion of fpdivsqrt
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
module sbtm_div (input logic [11:0] a, output logic [10:0] ia_out); module sbtm_div (input logic [11:0] a, output logic [10:0] ia_out);
// bit partitions // bit partitions

View File

@ -1,3 +1,27 @@
///////////////////////////////////////////
//
// Written: James Stine
// Modified: 8/1/2018
//
// Purpose: Bipartite Lookup for sqrt part of fpdivsqrt
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
module sbtm_sqrt (input logic [11:0] a, output logic [10:0] y); module sbtm_sqrt (input logic [11:0] a, output logic [10:0] y);
// bit partitions // bit partitions