Merge branch 'main' of github.com:davidharrishmc/riscv-wally into main

This commit is contained in:
kipmacsaigoren 2021-10-06 11:52:34 -05:00
commit 8db7ce002d
16 changed files with 776 additions and 580 deletions

View File

@ -1,9 +1,26 @@
// This module takes as inputs two operands (op1 and op2) ///////////////////////////////////////////
// the operation type (op_type) and the result precision (P). //
// Based on the operation and precision , it conditionally // Written: James Stine
// converts single precision values to double precision values // Modified: 8/1/2018
// and modifies the sign of op1. The converted operands are Float1 //
// and Float2. // Purpose: Floating point divider/square root top unit (Goldschmidt)
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
module convert_inputs( module convert_inputs(
input [63:0] op1, // 1st input operand (A) input [63:0] op1, // 1st input operand (A)

View File

@ -23,9 +23,10 @@ module exception_div (
logic BNaN; // '1' if B is a not-a-number logic BNaN; // '1' if B is a not-a-number
logic ASNaN; // '1' if A is a signalling not-a-number logic ASNaN; // '1' if A is a signalling not-a-number
logic BSNaN; // '1' if B is a signalling not-a-number logic BSNaN; // '1' if B is a signalling not-a-number
logic ZQNaN; // '1' if result Z is a quiet NaN logic ZSNaN; // '1' if result Z is a quiet NaN
logic ZInf; // '1' if result Z is an infnity logic ZInf; // '1' if result Z is an infnity
logic Zero; // '1' if result is zero logic Zero; // '1' if result is zero
logic NegSqrt; // '1' if sqrt and operand is negative
//***take this module out and add more registers or just recalculate it all //***take this module out and add more registers or just recalculate it all
// Determine if mantissas are all zeros // Determine if mantissas are all zeros
@ -48,32 +49,34 @@ module exception_div (
assign AZero = AzeroE & AzeroM; assign AZero = AzeroE & AzeroM;
assign BZero = BzeroE & BzeroE; assign BZero = BzeroE & BzeroE;
// Is NaN if operand is negative and its a sqrt
assign NegSqrt = (A[63] & op_type & ~AZero);
// An "Invalid Operation" exception occurs if (A or B is a signalling NaN) // An "Invalid Operation" exception occurs if (A or B is a signalling NaN)
// or (A and B are both Infinite) // or (A and B are both Infinite)
assign Invalid = ASNaN | BSNaN | (((AInf & BInf) | (AZero & BZero))&~op_type) | assign Invalid = ASNaN | BSNaN | (((AInf & BInf) | (AZero & BZero))&~op_type) |
(A[63] & op_type); NegSqrt;
// The result is a quiet NaN if (an "Invalid Operation" exception occurs) // The result is a quiet NaN if (an "Invalid Operation" exception occurs)
// or (A is a NaN) or (B is a NaN). // or (A is a NaN) or (B is a NaN).
assign ZQNaN = Invalid | ANaN | BNaN; assign ZSNaN = Invalid | ANaN | BNaN;
// The result is zero // The result is zero
assign Zero = (AZero | BInf)&~op_type | AZero&op_type; assign Zero = (AZero | BInf)&~op_type | AZero&op_type;
// The result is +Inf if ((A is Inf) or (B is 0)) and (the // The result is +Inf if ((A is Inf) or (B is 0)) and (the
// result is not a quiet NaN). // result is not a quiet NaN).
assign ZInf = (AInf | BZero)&~ZQNaN&~op_type | AInf&op_type&~ZQNaN; assign ZInf = (AInf | BZero)&~ZSNaN&~op_type | AInf&op_type&~ZSNaN;
// Set the type of the result as follows: // Set the type of the result as follows:
// Ztype Result // Ztype Result
// 000 Normal // 000 Normal
// 001 Quiet NaN
// 010 Infinity // 010 Infinity
// 011 Zero // 011 Zero
// 110 DivZero // 110 Div by 0
assign Ztype[0] = ZQNaN | Zero; // 111 SNaN
assign Ztype[1] = ZInf | Zero; assign Ztype[2] = (ZSNaN);
assign Ztype[2] = BZero&~op_type; assign Ztype[1] = (ZSNaN) | (Zero) | (ZInf);
assign Ztype[0] = (ZSNaN) | (Zero);
endmodule // exception endmodule // exception

View File

@ -1,91 +1,85 @@
///////////////////////////////////////////
// //
// File name : fpdiv // Written: James Stine
// Title : Floating-Point Divider/Square-Root // Modified: 8/1/2018
// project : FPU
// Library : fpdiv
// Author(s) : James E. Stine, Jr.
// Purpose : definition of main unit to floating-point div/sqrt
// notes :
// //
// Copyright Oklahoma State University // Purpose: Floating point divider/square root top unit (Goldschmidt)
// //
// Basic Operations // A component of the Wally configurable RISC-V project.
// //
// Step 1: Load operands, set flags, and convert SP to DP // Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
// Step 2: Check for special inputs ( +/- Infinity, NaN)
// Step 3: Exponent Logic
// Step 4: Divide/Sqrt using Goldschmidt
// Step 5: Normalize the result.//
// Shift left until normalized. Normalized when the value to the
// left of the binrary point is 1.
// Step 6: Round the result.//
// Step 7: Put quotient/remainder onto output.
// //
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
// `timescale 1ps/1ps // `timescale 1ps/1ps
module fpdiv ( module fpdiv (
input logic clk, input logic clk,
input logic reset, input logic reset,
input logic start, input logic start,
input logic [63:0] op1, // 1st input operand (A) input logic [63:0] op1,
input logic [63:0] op2, // 2nd input operand (B) input logic [63:0] op2,
input logic [1:0] rm, // Rounding mode - specify values input logic [1:0] rm,
input logic op_type, // Function opcode input logic op_type,
input logic P, // Result Precision (0 for double, 1 for single) input logic P,
input logic OvEn, // Overflow trap enabled input logic OvEn,
input logic UnEn, // Underflow trap enabled input logic UnEn,
output logic done, input logic XNaNQ,
output logic FDivBusyE, input logic YNaNQ,
output logic [63:0] AS_Result, // Result of operation input logic XZeroQ,
output logic [4:0] Flags); // IEEE exception flags input logic YZeroQ,
input logic XInfQ,
input logic YInfQ,
output logic done,
output logic FDivBusyE,
output logic [63:0] AS_Result,
output logic [4:0] Flags);
logic [63:0] Float1; logic [63:0] Float1;
logic [63:0] Float2; logic [63:0] Float2;
logic [12:0] exp1, exp2, expF; logic [12:0] exp1, exp2, expF;
logic [12:0] exp_diff, bias; logic [12:0] exp_diff, bias;
logic [13:0] exp_sqrt; logic [13:0] exp_sqrt;
logic [12:0] exp_s; logic [63:0] Result;
logic [12:0] exp_c; logic [52:0] mantissaA;
logic [52:0] mantissaB;
logic [10:0] exponent; logic [2:0] sel_inv;
logic [63:0] Result; logic Invalid;
logic [52:0] mantissaA; logic [4:0] FlagsIn;
logic [52:0] mantissaB;
logic [2:0] sel_inv;
logic Invalid;
logic [4:0] FlagsIn;
logic signResult; logic signResult;
logic convert; logic convert;
logic sub; logic sub;
logic [63:0] q1, qm1, qp1, q0, qm0, qp0; logic [63:0] q1, qm1, qp1, q0, qm0, qp0;
logic [63:0] rega_out, regb_out, regc_out, regd_out; logic [63:0] rega_out, regb_out, regc_out, regd_out;
logic [127:0] regr_out; logic [127:0] regr_out;
logic [2:0] sel_muxa, sel_muxb; logic [2:0] sel_muxa, sel_muxb;
logic sel_muxr; logic sel_muxr;
logic load_rega, load_regb, load_regc, load_regd, load_regr; logic load_rega, load_regb, load_regc, load_regd, load_regr;
logic load_regs; logic load_regs;
logic exp_cout1, exp_cout2; logic exp_cout1, exp_cout2;
logic exp_odd, open; logic exp_odd, open;
// div/sqrt // op_type : fdiv=0, fsqrt=1
// fdiv = 0
// fsqrt = 1
assign Float1 = op1; assign Float1 = op1;
assign Float2 = op_type ? op1 : op2; assign Float2 = op_type ? op1 : op2;
// Test for exceptions and return the "Invalid Operation" and // Exception detection
// "Denormalized" Input Flags. The "sel_inv" is used in exception_div exc1 (.A(Float1), .B(Float2), .op_type, .Ztype(sel_inv), .Invalid);
// the third pipeline stage to select the result. Also, op1_Norm
// and op2_Norm are one if op1 and op2 are not zero or denormalized.
// sub is one if the effective operation is subtaction.
exception_div exc1 (.A(Float1), .B(Float2), .op_type,
// output:
.Ztype(sel_inv), .Invalid);
// Determine Sign/Mantissa // Determine Sign/Mantissa
assign signResult = (Float1[63]^Float2[63]); assign signResult = (Float1[63]^Float2[63]);
@ -112,19 +106,20 @@ module fpdiv (
// FSM : control divider // FSM : control divider
fsm control (.clk, .reset, .start, .op_type, fsm control (.clk, .reset, .start, .op_type,
// outputs: .done, .load_rega, .load_regb, .load_regc, .load_regd,
.done, .load_rega, .load_regb, .load_regc, .load_regd, .load_regr, .load_regs, .sel_muxa, .sel_muxb, .sel_muxr,
.load_regr, .load_regs, .sel_muxa, .sel_muxb, .sel_muxr, .divBusy(FDivBusyE));
.divBusy(FDivBusyE));
// Round the mantissa to a 52-bit value, with the leading one // Round the mantissa to a 52-bit value, with the leading one
// removed. The rounding units also handles special cases and // removed. The rounding units also handles special cases and
// set the exception flags. // set the exception flags.
rounder_div round1 (.rm, .P, .OvEn, .UnEn, .exp_diff(expF), rounder_div round1 (.rm, .P, .OvEn, .UnEn, .exp_diff(expF),
.sel_inv, .Invalid, .SignR(signResult), .sel_inv, .Invalid, .SignR(signResult),
.q1, .qm1, .qp1, .q0, .qm0, .qp0, .regr_out, .Float1(op1), .Float2(op2),
// outputs: .XNaNQ, .YNaNQ, .XZeroQ, .YZeroQ,
.Result, .Flags(FlagsIn)); .XInfQ, .YInfQ, .op_type,
.q1, .qm1, .qp1, .q0, .qm0, .qp0, .regr_out,
.Result, .Flags(FlagsIn));
// Store the final result and the exception flags in registers. // Store the final result and the exception flags in registers.
flopenr #(64) rega (clk, reset, done, Result, AS_Result); flopenr #(64) rega (clk, reset, done, Result, AS_Result);

View File

@ -1,6 +1,6 @@
/////////////////////////////////////////// ///////////////////////////////////////////
// //
// Written: Katherine Parry, Bret Mathis // Written: Katherine Parry, James Stine, Brett Mathis
// Modified: 6/23/2021 // Modified: 6/23/2021
// //
// Purpose: FPU // Purpose: FPU
@ -25,24 +25,24 @@
`include "wally-config.vh" `include "wally-config.vh"
module fpu ( module fpu (
input logic clk, input logic clk,
input logic reset, input logic reset,
input logic [2:0] FRM_REGW, // Rounding mode from CSR input logic [2:0] FRM_REGW, // Rounding mode from CSR
input logic [31:0] InstrD, // instruction from IFU input logic [31:0] InstrD, // instruction from IFU
input logic [`XLEN-1:0] ReadDataW,// Read data from memory input logic [`XLEN-1:0] ReadDataW,// Read data from memory
input logic [`XLEN-1:0] SrcAE, // Integer input being processed (from IEU) input logic [`XLEN-1:0] SrcAE, // Integer input being processed (from IEU)
input logic [`XLEN-1:0] SrcAM, // Integer input being written into fpreg (from IEU) input logic [`XLEN-1:0] SrcAM, // Integer input being written into fpreg (from IEU)
input logic StallE, StallM, StallW, // stall signals from HZU input logic StallE, StallM, StallW, // stall signals from HZU
input logic FlushE, FlushM, FlushW, // flush signals from HZU input logic FlushE, FlushM, FlushW, // flush signals from HZU
input logic [4:0] RdE, RdM, RdW, // which FP register to write to (from IEU) input logic [4:0] RdE, RdM, RdW, // which FP register to write to (from IEU)
output logic FRegWriteM, // FP register write enable output logic FRegWriteM, // FP register write enable
output logic FStallD, // Stall the decode stage output logic FStallD, // Stall the decode stage
output logic FWriteIntE, FWriteIntM, FWriteIntW, // integer register write enable output logic FWriteIntE, FWriteIntM, FWriteIntW, // integer register write enable
output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory
output logic [`XLEN-1:0] FIntResM, // data to be written to integer register output logic [`XLEN-1:0] FIntResM, // data to be written to integer register
output logic FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage) output logic FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage)
output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction
output logic [4:0] SetFflagsM // FMA flags (to privileged unit) output logic [4:0] SetFflagsM // FMA flags (to privileged unit)
); );
//*** make everything FLEN at some point //*** make everything FLEN at some point
@ -59,338 +59,257 @@ module fpu (
generate if (`F_SUPPORTED | `D_SUPPORTED) begin : fpu generate if (`F_SUPPORTED | `D_SUPPORTED) begin : fpu
// control signals // control signals
logic FRegWriteD, FRegWriteE, FRegWriteW; // FP register write enable logic FRegWriteD, FRegWriteE, FRegWriteW; // FP register write enable
logic [2:0] FrmD, FrmE, FrmM; // FP rounding mode logic [2:0] FrmD, FrmE, FrmM; // FP rounding mode
logic FmtD, FmtE, FmtM, FmtW; // FP precision 0-single 1-double logic FmtD, FmtE, FmtM, FmtW; // FP precision 0-single 1-double
logic FDivStartD, FDivStartE; // Start division or squareroot logic FDivStartD, FDivStartE; // Start division or squareroot
logic FWriteIntD; // Write to integer register logic FWriteIntD; // Write to integer register
logic [1:0] FForwardXE, FForwardYE, FForwardZE; // forwarding mux control signals logic [1:0] FForwardXE, FForwardYE, FForwardZE; // forwarding mux control signals
logic [1:0] FResultSelD, FResultSelE, FResultSelM, FResultSelW; // Select the result written to FP register logic [1:0] FResultSelD, FResultSelE, FResultSelM, FResultSelW; // Select the result written to FP register
logic [2:0] FOpCtrlD, FOpCtrlE, FOpCtrlM; // Select which opperation to do in each component logic [2:0] FOpCtrlD, FOpCtrlE, FOpCtrlM; // Select which opperation to do in each component
logic [2:0] FResSelD, FResSelE, FResSelM; // Select one of the results that finish in the memory stage logic [2:0] FResSelD, FResSelE, FResSelM; // Select one of the results that finish in the memory stage
logic [1:0] FIntResSelD, FIntResSelE, FIntResSelM; // Select the result written to the integer resister logic [1:0] FIntResSelD, FIntResSelE, FIntResSelM; // Select the result written to the integer resister
logic [4:0] Adr1E, Adr2E, Adr3E; // adresses of each input logic [4:0] Adr1E, Adr2E, Adr3E; // adresses of each input
// regfile signals // regfile signals
logic [63:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage logic [63:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage
logic [63:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage logic [63:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage
logic [63:0] FSrcXE, FSrcXM; // Input 1 to the various units (after forwarding) logic [63:0] FSrcXE, FSrcXM; // Input 1 to the various units (after forwarding)
logic [63:0] FPreSrcYE, FSrcYE; // Input 2 to the various units (after forwarding) logic [63:0] FPreSrcYE, FSrcYE; // Input 2 to the various units (after forwarding)
logic [63:0] FPreSrcZE, FSrcZE; // Input 3 to the various units (after forwarding) logic [63:0] FPreSrcZE, FSrcZE; // Input 3 to the various units (after forwarding)
// unpacking signals // unpacking signals
logic XSgnE, YSgnE, ZSgnE; // input's sign - execute stage logic XSgnE, YSgnE, ZSgnE; // input's sign - execute stage
logic XSgnM, YSgnM; // input's sign - memory stage logic XSgnM, YSgnM; // input's sign - memory stage
logic [10:0] XExpE, YExpE, ZExpE; // input's exponent - execute stage logic [10:0] XExpE, YExpE, ZExpE; // input's exponent - execute stage
logic [10:0] XExpM, YExpM, ZExpM; // input's exponent - memory stage logic [10:0] XExpM, YExpM, ZExpM; // input's exponent - memory stage
logic [52:0] XManE, YManE, ZManE; // input's fraction - execute stage logic [52:0] XManE, YManE, ZManE; // input's fraction - execute stage
logic [52:0] XManM, YManM, ZManM; // input's fraction - memory stage logic [52:0] XManM, YManM, ZManM; // input's fraction - memory stage
logic [10:0] BiasE; // bias based on precision (single=7f double=3ff - max expoent/2) logic [10:0] BiasE; // bias based on precision (single=7f double=3ff - max expoent/2)
logic XNaNE, YNaNE, ZNaNE; // is the input a NaN - execute stage logic XNaNE, YNaNE, ZNaNE; // is the input a NaN - execute stage
logic XNaNM, YNaNM, ZNaNM; // is the input a NaN - memory stage logic XNaNM, YNaNM, ZNaNM; // is the input a NaN - memory stage
logic XSNaNE, YSNaNE, ZSNaNE; // is the input a signaling NaN - execute stage logic XSNaNE, YSNaNE, ZSNaNE; // is the input a signaling NaN - execute stage
logic XSNaNM, YSNaNM, ZSNaNM; // is the input a signaling NaN - memory stage logic XSNaNM, YSNaNM, ZSNaNM; // is the input a signaling NaN - memory stage
logic XDenormE, YDenormE, ZDenormE; // is the input denormalized logic XDenormE, YDenormE, ZDenormE; // is the input denormalized
logic XZeroE, YZeroE, ZZeroE; // is the input zero - execute stage logic XZeroE, YZeroE, ZZeroE; // is the input zero - execute stage
logic XZeroM, YZeroM, ZZeroM; // is the input zero - memory stage logic XZeroM, YZeroM, ZZeroM; // is the input zero - memory stage
logic XInfE, YInfE, ZInfE; // is the input infinity - execute stage logic XInfE, YInfE, ZInfE; // is the input infinity - execute stage
logic XInfM, YInfM, ZInfM; // is the input infinity - memory stage logic XInfM, YInfM, ZInfM; // is the input infinity - memory stage
logic XExpMaxE; // is the exponent all ones (max value) logic XExpMaxE; // is the exponent all ones (max value)
logic XNormE; // is normal logic XNormE; // is normal
// result and flag signals
// result and flag signals logic [63:0] FDivResM, FDivResW; // divide/squareroot result
logic [63:0] FDivResM, FDivResW; // divide/squareroot result logic [4:0] FDivFlgM, FDivFlgW; // divide/squareroot flags
logic [4:0] FDivFlgM, FDivFlgW; // divide/squareroot flags logic [63:0] FMAResM, FMAResW; // FMA/multiply result
logic [4:0] FMAFlgM, FMAFlgW; // FMA/multiply result
logic [63:0] FMAResM, FMAResW; // FMA/multiply result logic [63:0] ReadResW; // read result (load instruction)
logic [4:0] FMAFlgM, FMAFlgW; // FMA/multiply result logic [63:0] CvtFpResE, CvtFpResM, CvtFpResW; // add/FP -> FP convert result
logic [4:0] CvtFpFlgE, CvtFpFlgM, CvtFpFlgW; // add/FP -> FP convert flags
logic [63:0] ReadResW; // read result (load instruction) logic [63:0] CvtResE, CvtResM; // FP <-> int convert result
logic [4:0] CvtFlgE, CvtFlgM; // FP <-> int convert flags //*** trim this
logic [63:0] CvtFpResE, CvtFpResM, CvtFpResW; // add/FP -> FP convert result logic [63:0] ClassResE, ClassResM; // classify result
logic [4:0] CvtFpFlgE, CvtFpFlgM, CvtFpFlgW; // add/FP -> FP convert flags logic [63:0] CmpResE, CmpResM; // compare result
logic CmpNVE, CmpNVM; // compare invalid flag (Not Valid)
logic [63:0] CvtResE, CvtResM; // FP <-> int convert result logic [63:0] SgnResE, SgnResM; // sign injection result
logic [4:0] CvtFlgE, CvtFlgM; // FP <-> int convert flags //*** trim this logic SgnNVE, SgnNVM; // sign injection invalid flag (Not Valid)
logic [63:0] FResE, FResM, FResW; // selected result that is ready in the memory stage
logic [63:0] ClassResE, ClassResM; // classify result logic [4:0] FFlgE, FFlgM; // selected flag that is ready in the memory stage
logic [`XLEN-1:0] FIntResE;
logic [63:0] CmpResE, CmpResM; // compare result logic [63:0] FPUResultW; // final FP result being written to the FP register
logic CmpNVE, CmpNVM; // compare invalid flag (Not Valid)
// other signals
logic [63:0] SgnResE, SgnResM; // sign injection result logic FDivSqrtDoneE; // is divide done
logic SgnNVE, SgnNVM; // sign injection invalid flag (Not Valid) logic [63:0] DivInput1E, DivInput2E; // inputs to divide/squareroot unit
logic FDivClk; // clock for divide/squareroot unit
logic [63:0] FResE, FResM, FResW; // selected result that is ready in the memory stage logic [63:0] AlignedSrcAE; // align SrcA to the floating point format
logic [4:0] FFlgE, FFlgM; // selected flag that is ready in the memory stage
// DECODE STAGE
logic [`XLEN-1:0] FIntResE; // calculate FP control signals
fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .FRM_REGW,
logic [63:0] FPUResultW; // final FP result being written to the FP register .IllegalFPUInstrD, .FRegWriteD, .FDivStartD, .FResultSelD, .FOpCtrlD, .FResSelD,
.FIntResSelD, .FmtD, .FrmD, .FWriteIntD);
// other signals
logic FDivSqrtDoneE; // is divide done // FP register file
logic [63:0] DivInput1E, DivInput2E; // inputs to divide/squareroot unit // - can read 3 registers and write 1 register every cycle
logic FDivClk; // clock for divide/squareroot unit fregfile fregfile (.clk, .reset, .we4(FRegWriteW),
logic [63:0] AlignedSrcAE; // align SrcA to the floating point format .a1(InstrD[19:15]), .a2(InstrD[24:20]), .a3(InstrD[31:27]), .a4(RdW),
.wd4(FPUResultW),
.rd1(FRD1D), .rd2(FRD2D), .rd3(FRD3D));
// D/E pipeline registers
flopenrc #(64) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E);
//////////////////////////////////////////////////////////////////////////////////////// flopenrc #(64) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E);
//DECODE STAGE flopenrc #(64) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E);
//////////////////////////////////////////////////////////////////////////////////////// flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]},
{Adr1E, Adr2E, Adr3E});
flopenrc #(17) DECtrlReg3(clk, reset, FlushE, ~StallE,
{FRegWriteD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD, FDivStartD},
// calculate FP control signals {FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE, FDivStartE});
fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .FRM_REGW,
// outputs: // EXECUTION STAGE
.IllegalFPUInstrD, .FRegWriteD, .FDivStartD, .FResultSelD, .FOpCtrlD, .FResSelD, // Hazard unit for FPU
.FIntResSelD, .FmtD, .FrmD, .FWriteIntD); // - determines if any forwarding or stalls are needed
fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FRegWriteM, .FRegWriteW, .RdM, .RdW, .FResultSelM,
// FP register file .FStallD, .FForwardXE, .FForwardYE, .FForwardZE);
// - can read 3 registers and write 1 register every cycle
fregfile fregfile (.clk, .reset, .we4(FRegWriteW), // forwarding muxs
.a1(InstrD[19:15]), .a2(InstrD[24:20]), .a3(InstrD[31:27]), .a4(RdW), mux3 #(64) fxemux(FRD1E, FPUResultW, FResM, FForwardXE, FSrcXE);
.wd4(FPUResultW), mux3 #(64) fyemux(FRD2E, FPUResultW, FResM, FForwardYE, FPreSrcYE);
// outputs: mux3 #(64) fzemux(FRD3E, FPUResultW, FResM, FForwardZE, FPreSrcZE);
.rd1(FRD1D), .rd2(FRD2D), .rd3(FRD3D)); mux3 #(64) fyaddmux(FPreSrcYE, {{32{1'b1}}, 2'b0, {7{1'b1}}, 23'b0},
{2'b0, {10{1'b1}}, 52'b0},
{FmtE&FOpCtrlE[2]&FOpCtrlE[1]&(FResultSelE==3'b01), ~FmtE&FOpCtrlE[2]&FOpCtrlE[1]&(FResultSelE==3'b01)},
FSrcYE); // Force Z to be 0 for multiply instructions
// Force Z to be 0 for multiply instructions
mux3 #(64) fzmulmux(FPreSrcZE, 64'b0, FPreSrcYE, {FOpCtrlE[2]&FOpCtrlE[1], FOpCtrlE[2]&~FOpCtrlE[1]}, FSrcZE);
////////////////////////////////////////////////////////////////////////////////////////
// D/E pipeline registers // unpacking unit
//////////////////////////////////////////////////////////////////////////////////////// // - splits FP inputs into their various parts
// - does some classifications (SNaN, NaN, Denorm, Norm, Zero, Infifnity)
flopenrc #(64) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E); unpacking unpacking(.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), .FOpCtrlE, .FmtE,
flopenrc #(64) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E); .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE,
flopenrc #(64) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E); .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .YDenormE, .ZDenormE,
flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]}, .XZeroE, .YZeroE, .ZZeroE, .BiasE, .XInfE, .YInfE, .ZInfE, .XExpMaxE, .XNormE);
{Adr1E, Adr2E, Adr3E});
flopenrc #(17) DECtrlReg3(clk, reset, FlushE, ~StallE, // FMA
{FRegWriteD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD, FDivStartD}, // - two stage FMA
{FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE, FDivStartE}); // - execute stage - multiplication and addend shifting
// - memory stage - addition and rounding
// - handles FMA and multiply instructions
fma fma (.clk, .reset, .FlushM, .StallM,
.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE,
.XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .BiasE,
.XSgnM, .YSgnM, .XExpM, .YExpM, .ZExpM, .XManM, .YManM, .ZManM,
.XNaNM, .YNaNM, .ZNaNM, .XZeroM, .YZeroM, .ZZeroM,
//////////////////////////////////////////////////////////////////////////////////////// .XInfM, .YInfM, .ZInfM, .XSNaNM, .YSNaNM, .ZSNaNM,
//EXECUTION STAGE .FOpCtrlE,
//////////////////////////////////////////////////////////////////////////////////////// .FmtE, .FmtM, .FrmM,
.FMAFlgM, .FMAResM);
// Hazard unit for FPU // clock gater
// - determines if any forwarding or stalls are needed // - creates a clock that only runs durring divide/sqrt instructions
fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FRegWriteM, .FRegWriteW, .RdM, .RdW, .FResultSelM, // - using the seperate clock gives the divide/sqrt unit some to get set up
// outputs: // *** the module says not to use in synthisis
.FStallD, .FForwardXE, .FForwardYE, .FForwardZE); clockgater fpdivclkg(.E(FDivStartE),
.SE(1'b0),
.CLK(clk),
// forwarding muxs .ECLK(FDivClk));
mux3 #(64) fxemux(FRD1E, FPUResultW, FResM, FForwardXE, FSrcXE);
mux3 #(64) fyemux(FRD2E, FPUResultW, FResM, FForwardYE, FPreSrcYE); // capture the inputs for divide/sqrt
mux3 #(64) fzemux(FRD3E, FPUResultW, FResM, FForwardZE, FPreSrcZE); // - if not captured any forwarded inputs will change durring computation
mux3 #(64) fyaddmux(FPreSrcYE, {{32{1'b1}}, 2'b0, {7{1'b1}}, 23'b0}, {2'b0, {10{1'b1}}, 52'b0}, {FmtE&FOpCtrlE[2]&FOpCtrlE[1]&(FResultSelE==3'b01), ~FmtE&FOpCtrlE[2]&FOpCtrlE[1]&(FResultSelE==3'b01)}, FSrcYE); // Force Z to be 0 for multiply instructions // - this problem is caused by stalling the execute stage
mux3 #(64) fzmulmux(FPreSrcZE, 64'b0, FPreSrcYE, {FOpCtrlE[2]&FOpCtrlE[1], FOpCtrlE[2]&~FOpCtrlE[1]}, FSrcZE); // Force Z to be 0 for multiply instructions // - the other units don't have this problem, only div/sqrt stalls the execute stage
flopenrc #(64) reg_input1 (.d({XSgnE, XExpE, XManE[51:0]}), .q(DivInput1E),
.en(1'b1), .clear(FDivSqrtDoneE),
// unpacking unit .reset(reset), .clk(FDivBusyE));
// - splits FP inputs into their various parts flopenrc #(64) reg_input2 (.d({YSgnE, YExpE, YManE[51:0]}), .q(DivInput2E),
// - does some classifications (SNaN, NaN, Denorm, Norm, Zero, Infifnity) .en(1'b1), .clear(FDivSqrtDoneE),
unpacking unpacking(.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), .FOpCtrlE, .FmtE, .reset(reset), .clk(FDivBusyE));
// outputs: flopenrc #(6) reg_input3 (.d({XNaNE, YNaNE, XInfE, YInfE, XZeroE, YZeroE}),
.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE, .q({XNaNQ, YNaNQ, XInfQ, YInfQ, XZeroQ, YZeroQ}),
.XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .YDenormE, .ZDenormE, .en(1'b1), .clear(FDivSqrtDoneE),
.XZeroE, .YZeroE, .ZZeroE, .BiasE, .XInfE, .YInfE, .ZInfE, .XExpMaxE, .XNormE); .reset(reset), .clk(FDivBusyE));
// FMA // fpdivsqrt using Goldschmidt's iteration
// - two stage FMA fpdiv fdivsqrt (.op1(DivInput1E), .op2(DivInput2E), .rm(FrmE[1:0]), .op_type(FOpCtrlE[0]),
// - execute stage - multiplication and addend shifting .reset, .clk(FDivClk), .start(FDivStartE), .P(~FmtE), .OvEn(1'b1), .UnEn(1'b1),
// - memory stage - addition and rounding .XNaNQ, .YNaNQ, .XInfQ, .YInfQ, .XZeroQ, .YZeroQ,
// - handles FMA and multiply instructions .FDivBusyE, .done(FDivSqrtDoneE), .AS_Result(FDivResM), .Flags(FDivFlgM));
// - contains some E/M pipleine registers
// *** currently handles FLEN and 32 bits(dont know if 32 works with 128 - easy to fix) - change to handle only the supported formats // convert from signle to double and vice versa
fma fma (.clk, .reset, .FlushM, .StallM, cvtfp cvtfp (.XExpE, .XManE, .XSgnE, .XZeroE, .XDenormE, .XInfE, .XNaNE, .XSNaNE, .FrmE, .FmtE, .CvtFpResE, .CvtFpFlgE);
.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE,
.XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .BiasE, // compare unit
.XSgnM, .YSgnM, .XExpM, .YExpM, .ZExpM, .XManM, .YManM, .ZManM, // - computation is done in one stage
.XNaNM, .YNaNM, .ZNaNM, .XZeroM, .YZeroM, .ZZeroM, // - writes to FP file durring min/max instructions
.XInfM, .YInfM, .ZInfM, .XSNaNM, .YSNaNM, .ZSNaNM, // - other comparisons write a 1 or 0 to the integer register
.FOpCtrlE, fcmp fcmp (.op1({XSgnE,XExpE,XManE[`NF-1:0]}), .op2({YSgnE,YExpE,YManE[`NF-1:0]}),
.FmtE, .FmtM, .FrmM, .FSrcXE, .FSrcYE, .FOpCtrlE,
// outputs: .FmtE, .XNaNE, .YNaNE, .XZeroE, .YZeroE,
.FMAFlgM, .FMAResM); .Invalid(CmpNVE), .CmpResE);
// clock gater // sign injection unit
// - creates a clock that only runs durring divide/sqrt instructions // - computation is done in one stage
// - using the seperate clock gives the divide/sqrt unit some to get set up fsgn fsgn (.SgnOpCodeE(FOpCtrlE[1:0]), .XSgnE, .YSgnE, .FSrcXE, .FmtE, .XExpMaxE,
// *** the module says not to use in synthisis .SgnNVE, .SgnResE);
clockgater fpdivclkg(.E(FDivStartE),
.SE(1'b0), // classify
.CLK(clk), // - computation is done in one stage
.ECLK(FDivClk)); // - most of the work is done in the unpacking unit
// - result is written to the integer register
// capture the inputs for divide/sqrt fclassify fclassify (.XSgnE, .XDenormE, .XZeroE, .XNaNE, .XInfE, .XNormE,
// - if not captured any forwarded inputs will change durring computation .XSNaNE, .ClassResE);
// - this problem is caused by stalling the execute stage
// - the other units don't have this problem, only div/sqrt stalls the execute stage fcvt fcvt (.XSgnE, .XExpE, .XManE, .XZeroE, .XNaNE, .XInfE, .XDenormE, .BiasE, .SrcAE, .FOpCtrlE, .FmtE, .FrmE,
flopenrc #(64) reg_input1 (.d({XSgnE, XExpE, XManE[51:0]}), .q(DivInput1E), .CvtResE, .CvtFlgE);
.en(1'b1), .clear(FDivSqrtDoneE),
.reset(reset), .clk(FDivBusyE)); // data to be stored in memory - to IEU
flopenrc #(64) reg_input2 (.d({YSgnE, YExpE, YManE[51:0]}), .q(DivInput2E), // - FP uses NaN-blocking format
.en(1'b1), .clear(FDivSqrtDoneE), // - if there are any unsused bits the most significant bits are filled with 1s
.reset(reset), .clk(FDivBusyE)); assign FWriteDataE = FSrcYE[`XLEN-1:0];
// output for store instructions // Align SrcA to MSB when single precicion
//*** change to use the unpacking unit if possible mux2 #(64) SrcAMux({{32{1'b1}}, SrcAE[31:0]}, {{64-`XLEN{1'b1}}, SrcAE}, FmtE, AlignedSrcAE);
fpdiv fdivsqrt (.op1(DivInput1E), .op2(DivInput2E), .rm(FrmE[1:0]), .op_type(FOpCtrlE[0]),
.reset, .clk(FDivClk), .start(FDivStartE), .P(~FmtE), .OvEn(1'b1), .UnEn(1'b1), // select a result that may be written to the FP register
// outputs: mux5 #(64) FResMux(AlignedSrcAE, SgnResE, CmpResE, CvtResE, CvtFpResE, FResSelE, FResE);
.FDivBusyE, .done(FDivSqrtDoneE), .AS_Result(FDivResM), .Flags(FDivFlgM)); mux5 #(5) FFlgMux(5'b0, {4'b0, SgnNVE}, {4'b0, CmpNVE}, CvtFlgE, CvtFpFlgE, FResSelE, FFlgE);
// convert from signle to double and vice versa // select the result that may be written to the integer register - to IEU
cvtfp cvtfp (.XExpE, .XManE, .XSgnE, .XZeroE, .XDenormE, .XInfE, .XNaNE, .XSNaNE, .FrmE, .FmtE, .CvtFpResE, .CvtFpFlgE); mux4 #(`XLEN) IntResMux(CmpResE[`XLEN-1:0], FSrcXE[`XLEN-1:0], ClassResE[`XLEN-1:0],
CvtResE[`XLEN-1:0], FIntResSelE, FIntResE);
// compare unit
// - computation is done in one stage // E/M pipe registers
// - writes to FP file durring min/max instructions
// - other comparisons write a 1 or 0 to the integer register // flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, FSrcXE, FSrcXM);
fcmp fcmp (.op1({XSgnE,XExpE,XManE[`NF-1:0]}), .op2({YSgnE,YExpE,YManE[`NF-1:0]}), flopenrc #(65) EMFpReg2(clk, reset, FlushM, ~StallM, {XSgnE,XExpE,XManE}, {XSgnM,XExpM,XManM});
.FSrcXE, .FSrcYE, .FOpCtrlE, flopenrc #(65) EMFpReg3(clk, reset, FlushM, ~StallM, {YSgnE,YExpE,YManE}, {YSgnM,YExpM,YManM});
.FmtE, .XNaNE, .YNaNE, .XZeroE, .YZeroE, flopenrc #(64) EMFpReg4(clk, reset, FlushM, ~StallM, {ZExpE,ZManE}, {ZExpM,ZManM});
// outputs: flopenrc #(12) EMFpReg5(clk, reset, FlushM, ~StallM,
.Invalid(CmpNVE), .CmpResE); {XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE},
{XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM});
// sign injection unit flopenrc #(64) EMRegCmpRes(clk, reset, FlushM, ~StallM, FResE, FResM);
// - computation is done in one stage flopenrc #(5) EMRegCmpFlg(clk, reset, FlushM, ~StallM, FFlgE, FFlgM);
fsgn fsgn (.SgnOpCodeE(FOpCtrlE[1:0]), .XSgnE, .YSgnE, .FSrcXE, .FmtE, .XExpMaxE, flopenrc #(`XLEN) EMRegSgnRes(clk, reset, FlushM, ~StallM, FIntResE, FIntResM);
// outputs: flopenrc #(11) EMCtrlReg(clk, reset, FlushM, ~StallM,
.SgnNVE, .SgnResE); {FRegWriteE, FResultSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE},
{FRegWriteM, FResultSelM, FrmM, FmtM, FOpCtrlM, FWriteIntM});
// classify
// - computation is done in one stage // BEGIN MEMORY STAGE
// - most of the work is done in the unpacking unit // FPU flag selection - to privileged
// - result is written to the integer register mux4 #(5) FPUFlgMux(5'b0, FMAFlgM, FDivFlgM, FFlgM, FResultSelW, SetFflagsM);
fclassify fclassify (.XSgnE, .XDenormE, .XZeroE, .XNaNE, .XInfE, .XNormE,
// outputs: // M/W pipe registers
.XSNaNE, .ClassResE); flopenrc #(64) MWRegFma(clk, reset, FlushW, ~StallW, FMAResM, FMAResW);
flopenrc #(64) MWRegDiv(clk, reset, FlushW, ~StallW, FDivResM, FDivResW);
fcvt fcvt (.XSgnE, .XExpE, .XManE, .XZeroE, .XNaNE, .XInfE, .XDenormE, .BiasE, .SrcAE, .FOpCtrlE, .FmtE, .FrmE, flopenrc #(64) MWRegAdd(clk, reset, FlushW, ~StallW, CvtFpResM, CvtFpResW);
// outputs: flopenrc #(64) MWRegClass(clk, reset, FlushW, ~StallW, FResM, FResW);
.CvtResE, .CvtFlgE); flopenrc #(5) MWCtrlReg(clk, reset, FlushW, ~StallW,
{FRegWriteM, FResultSelM, FmtM, FWriteIntM},
// data to be stored in memory - to IEU {FRegWriteW, FResultSelW, FmtW, FWriteIntW});
// - FP uses NaN-blocking format
// - if there are any unsused bits the most significant bits are filled with 1s // BEGIN WRITEBACK STAGE
assign FWriteDataE = FSrcYE[`XLEN-1:0];
// put ReadData into NaN-blocking format
// - if there are any unsused bits the most significant bits are filled with 1s
// Align SrcA to MSB when single precicion // - for load instruction
mux2 #(64) SrcAMux({{32{1'b1}}, SrcAE[31:0]}, {{64-`XLEN{1'b1}}, SrcAE}, FmtE, AlignedSrcAE); mux2 #(64) ReadResMux({{32{1'b1}}, ReadDataW[31:0]}, {{64-`XLEN{1'b1}}, ReadDataW}, FmtW, ReadResW);
// select a result that may be written to the FP register // select the result to be written to the FP register
mux5 #(64) FResMux(AlignedSrcAE, SgnResE, CmpResE, CvtResE, CvtFpResE, FResSelE, FResE); mux4 #(64) FPUResultMux(ReadResW, FMAResW, FDivResW, FResW, FResultSelW, FPUResultW);
mux5 #(5) FFlgMux(5'b0, {4'b0, SgnNVE}, {4'b0, CmpNVE}, CvtFlgE, CvtFpFlgE, FResSelE, FFlgE);
// select the result that may be written to the integer register - to IEU
mux4 #(`XLEN) IntResMux(CmpResE[`XLEN-1:0], FSrcXE[`XLEN-1:0], ClassResE[`XLEN-1:0], CvtResE[`XLEN-1:0], FIntResSelE, FIntResE);
//***will synth remove registers of values that are always zero?
////////////////////////////////////////////////////////////////////////////////////////
// E/M pipe registers
////////////////////////////////////////////////////////////////////////////////////////
// flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, FSrcXE, FSrcXM);
flopenrc #(65) EMFpReg2(clk, reset, FlushM, ~StallM, {XSgnE,XExpE,XManE}, {XSgnM,XExpM,XManM});
flopenrc #(65) EMFpReg3(clk, reset, FlushM, ~StallM, {YSgnE,YExpE,YManE}, {YSgnM,YExpM,YManM});
flopenrc #(64) EMFpReg4(clk, reset, FlushM, ~StallM, {ZExpE,ZManE}, {ZExpM,ZManM});
flopenrc #(12) EMFpReg5(clk, reset, FlushM, ~StallM,
{XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE},
{XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM});
flopenrc #(64) EMRegCmpRes(clk, reset, FlushM, ~StallM, FResE, FResM);
flopenrc #(5) EMRegCmpFlg(clk, reset, FlushM, ~StallM, FFlgE, FFlgM);
flopenrc #(`XLEN) EMRegSgnRes(clk, reset, FlushM, ~StallM, FIntResE, FIntResM);
// flopenrc #(1) EMRegSgnFlg(clk, reset, FlushM, ~StallM, SgnNVE, SgnNVM);
//flopenrc #(64) EMRegCvtFpRes(clk, reset, FlushM, ~StallM, CvtFpResE, CvtFpResM);
//flopenrc #(5) EMRegCvtFpFlg(clk, reset, FlushM, ~StallM, CvtFpFlgE, CvtFpFlgM);
// flopenrc #(64) EMRegCvtRes(clk, reset, FlushM, ~StallM, CvtResE, CvtResM);
// flopenrc #(5) EMRegCvtFlg(clk, reset, FlushM, ~StallM, CvtFlgE, CvtFlgM);
// flopenrc #(64) EMRegClass(clk, reset, FlushM, ~StallM, ClassResE, ClassResM);
flopenrc #(11) EMCtrlReg(clk, reset, FlushM, ~StallM,
{FRegWriteE, FResultSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE},
{FRegWriteM, FResultSelM, FrmM, FmtM, FOpCtrlM, FWriteIntM});
////////////////////////////////////////////////////////////////////////////////////////
//BEGIN MEMORY STAGE
////////////////////////////////////////////////////////////////////////////////////////
// FPU flag selection - to privileged
mux4 #(5) FPUFlgMux(5'b0, FMAFlgM, FDivFlgM, FFlgM, FResultSelW, SetFflagsM);
////////////////////////////////////////////////////////////////////////////////////////
// M/W pipe registers
////////////////////////////////////////////////////////////////////////////////////////
flopenrc #(64) MWRegFma(clk, reset, FlushW, ~StallW, FMAResM, FMAResW);
flopenrc #(64) MWRegDiv(clk, reset, FlushW, ~StallW, FDivResM, FDivResW);
flopenrc #(64) MWRegAdd(clk, reset, FlushW, ~StallW, CvtFpResM, CvtFpResW);
flopenrc #(64) MWRegClass(clk, reset, FlushW, ~StallW, FResM, FResW);
flopenrc #(5) MWCtrlReg(clk, reset, FlushW, ~StallW,
{FRegWriteM, FResultSelM, FmtM, FWriteIntM},
{FRegWriteW, FResultSelW, FmtW, FWriteIntW});
////////////////////////////////////////////////////////////////////////////////////////
// BEGIN WRITEBACK STAGE
////////////////////////////////////////////////////////////////////////////////////////
// put ReadData into NaN-blocking format
// - if there are any unsused bits the most significant bits are filled with 1s
// - for load instruction
mux2 #(64) ReadResMux({{32{1'b1}}, ReadDataW[31:0]}, {{64-`XLEN{1'b1}}, ReadDataW}, FmtW, ReadResW);
// select the result to be written to the FP register
mux4 #(64) FPUResultMux(ReadResW, FMAResW, FDivResW, FResW, FResultSelW, FPUResultW);
end else begin // no F_SUPPORTED or D_SUPPORTED; tie outputs low end else begin // no F_SUPPORTED or D_SUPPORTED; tie outputs low
assign FStallD = 0; assign FStallD = 0;
assign FWriteIntE = 0; assign FWriteIntE = 0;
assign FWriteIntM = 0; assign FWriteIntM = 0;
assign FWriteIntW = 0; assign FWriteIntW = 0;
assign FWriteDataE = 0; assign FWriteDataE = 0;
assign FIntResM = 0; assign FIntResM = 0;
assign FDivBusyE = 0; assign FDivBusyE = 0;
assign IllegalFPUInstrD = 1; assign IllegalFPUInstrD = 1;
assign SetFflagsM = 0; assign SetFflagsM = 0;
end end
endgenerate endgenerate

View File

@ -1,10 +1,9 @@
/////////////////////////////////////////// ///////////////////////////////////////////
// regfile.sv
// //
// Written: David_Harris@hmc.edu 9 January 2021 // Written: David_Harris@hmc.edu 9 January 2021
// Modified: // Modified: James Stine
// //
// Purpose: 4-port register file // Purpose: 3-port output register file
// //
// A component of the Wally configurable RISC-V project. // A component of the Wally configurable RISC-V project.
// //
@ -26,21 +25,19 @@
`include "wally-config.vh" `include "wally-config.vh"
module fregfile ( module fregfile (
input logic clk, reset, input logic clk, reset,
input logic we4, input logic we4,
input logic [ 4:0] a1, a2, a3, a4, input logic [4:0] a1, a2, a3, a4,
input logic [63:0] wd4, input logic [63:0] wd4,
output logic [63:0] rd1, rd2, rd3); output logic [63:0] rd1, rd2, rd3);
logic [63:0] rf[31:0]; logic [63:0] rf[31:0];
integer i; integer i;
// three ported register file // three ported register file
// read three ports combinationally (A1/RD1, A2/RD2, A3/RD3) // read three ports combinationally (A1/RD1, A2/RD2, A3/RD3)
// write fourth port on rising edge of clock (A4/WD4/WE4) // write fourth port on rising edge of clock (A4/WD4/WE4)
// write occurs on falling edge of clock // write occurs on falling edge of clock
// reset is intended for simulation only, not synthesis
always_ff @(negedge clk or posedge reset) always_ff @(negedge clk or posedge reset)
if (reset) for(i=0; i<32; i++) rf[i] <= 0; if (reset) for(i=0; i<32; i++) rf[i] <= 0;

View File

@ -1,49 +1,63 @@
module fsm ( ///////////////////////////////////////////
//
// Written: James Stine
// Modified: 9/28/2021
//
// Purpose: FSM for floating point divider/square root unit (Goldschmidt)
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
input logic clk, module fsm (
input logic reset, input logic clk,
input logic start, input logic reset,
input logic op_type, input logic start,
output logic done, // End of cycles input logic op_type,
output logic load_rega, // enable for regA output logic done,
output logic load_regb, // enable for regB output logic load_rega,
output logic load_regc, // enable for regC output logic load_regb,
output logic load_regd, // enable for regD output logic load_regc,
output logic load_regr, // enable for rem output logic load_regd,
output logic load_regs, // enable for q,qm,qp output logic load_regr,
output logic [2:0] sel_muxa, // Select muxA output logic load_regs,
output logic [2:0] sel_muxb, // Select muxB output logic [2:0] sel_muxa,
output logic sel_muxr, // Select rem mux output logic [2:0] sel_muxb,
output logic divBusy // calculation is happening output logic sel_muxr,
output logic divBusy
); );
typedef enum logic [4:0] {S0, S1, S2, S3, S4, S5, S6, S7, S8, S9,
S10, S11, S12, S13, S14, S15, S16, S17, S18, S19,
S20, S21, S22, S23, S24, S25, S26, S27, S28, S29,
S30} statetype;
reg [4:0] CURRENT_STATE; statetype current_state, next_state;
reg [4:0] NEXT_STATE;
parameter [4:0]
S0=5'd0, S1=5'd1, S2=5'd2,
S3=5'd3, S4=5'd4, S5=5'd5,
S6=5'd6, S7=5'd7, S8=5'd8,
S9=5'd9, S10=5'd10,
S13=5'd13, S14=5'd14, S15=5'd15,
S16=5'd16, S17=5'd17, S18=5'd18,
S19=5'd19, S20=5'd20, S21=5'd21,
S22=5'd22, S23=5'd23, S24=5'd24,
S25=5'd25, S26=5'd26, S27=5'd27,
S28=5'd28, S29=5'd29, S30=5'd30;
always @(negedge clk) always @(negedge clk)
begin begin
if(reset==1'b1) if (reset == 1'b1)
CURRENT_STATE=S0; current_state = S0;
else else
CURRENT_STATE=NEXT_STATE; current_state = next_state;
end end
always @(*) always @(*)
begin begin
case(CURRENT_STATE) case(current_state)
S0: // iteration 0 S0: // iteration 0
begin begin
if (start==1'b0) if (start==1'b0)
@ -59,7 +73,7 @@ module fsm (
sel_muxa = 3'b000; sel_muxa = 3'b000;
sel_muxb = 3'b000; sel_muxb = 3'b000;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S0; next_state = S0;
end end
else if (start==1'b1 && op_type==1'b0) else if (start==1'b1 && op_type==1'b0)
begin begin
@ -74,7 +88,7 @@ module fsm (
sel_muxa = 3'b001; sel_muxa = 3'b001;
sel_muxb = 3'b001; sel_muxb = 3'b001;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S1; next_state = S1;
end // if (start==1'b1 && op_type==1'b0) end // if (start==1'b1 && op_type==1'b0)
else if (start==1'b1 && op_type==1'b1) else if (start==1'b1 && op_type==1'b1)
begin begin
@ -89,7 +103,7 @@ module fsm (
sel_muxa = 3'b010; sel_muxa = 3'b010;
sel_muxb = 3'b000; sel_muxb = 3'b000;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S13; next_state = S13;
end end
else else
begin begin
@ -104,7 +118,7 @@ module fsm (
sel_muxa = 3'b000; sel_muxa = 3'b000;
sel_muxb = 3'b000; sel_muxb = 3'b000;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S0; next_state = S0;
end end
end // case: S0 end // case: S0
S1: S1:
@ -120,7 +134,7 @@ module fsm (
sel_muxa = 3'b010; sel_muxa = 3'b010;
sel_muxb = 3'b000; sel_muxb = 3'b000;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S2; next_state = S2;
end end
S2: // iteration 1 S2: // iteration 1
begin begin
@ -135,7 +149,7 @@ module fsm (
sel_muxa = 3'b011; sel_muxa = 3'b011;
sel_muxb = 3'b011; sel_muxb = 3'b011;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S3; next_state = S3;
end end
S3: S3:
begin begin
@ -150,7 +164,7 @@ module fsm (
sel_muxa = 3'b000; sel_muxa = 3'b000;
sel_muxb = 3'b010; sel_muxb = 3'b010;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S4; next_state = S4;
end end
S4: // iteration 2 S4: // iteration 2
begin begin
@ -165,7 +179,7 @@ module fsm (
sel_muxa = 3'b011; sel_muxa = 3'b011;
sel_muxb = 3'b011; sel_muxb = 3'b011;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S5; next_state = S5;
end end
S5: S5:
begin begin
@ -180,7 +194,7 @@ module fsm (
sel_muxa = 3'b000; sel_muxa = 3'b000;
sel_muxb = 3'b010; sel_muxb = 3'b010;
sel_muxr = 1'b0; // add sel_muxr = 1'b0; // add
NEXT_STATE = S6; next_state = S6;
end end
S6: // iteration 3 S6: // iteration 3
begin begin
@ -195,7 +209,7 @@ module fsm (
sel_muxa = 3'b011; sel_muxa = 3'b011;
sel_muxb = 3'b011; sel_muxb = 3'b011;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S8; next_state = S8;
end end
S7: S7:
begin begin
@ -210,7 +224,7 @@ module fsm (
sel_muxa = 3'b000; sel_muxa = 3'b000;
sel_muxb = 3'b010; sel_muxb = 3'b010;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S8; next_state = S8;
end // case: S7 end // case: S7
S8: // q,qm,qp S8: // q,qm,qp
begin begin
@ -225,7 +239,7 @@ module fsm (
sel_muxa = 3'b000; sel_muxa = 3'b000;
sel_muxb = 3'b000; sel_muxb = 3'b000;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S9; next_state = S9;
end end
S9: // rem S9: // rem
begin begin
@ -240,7 +254,7 @@ module fsm (
sel_muxa = 3'b000; sel_muxa = 3'b000;
sel_muxb = 3'b000; sel_muxb = 3'b000;
sel_muxr = 1'b1; sel_muxr = 1'b1;
NEXT_STATE = S10; next_state = S10;
end end
S10: // done S10: // done
begin begin
@ -255,7 +269,7 @@ module fsm (
sel_muxa = 3'b000; sel_muxa = 3'b000;
sel_muxb = 3'b000; sel_muxb = 3'b000;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S0; next_state = S0;
end end
S13: // start of sqrt path S13: // start of sqrt path
begin begin
@ -270,7 +284,7 @@ module fsm (
sel_muxa = 3'b010; sel_muxa = 3'b010;
sel_muxb = 3'b001; sel_muxb = 3'b001;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S14; next_state = S14;
end end
S14: S14:
begin begin
@ -285,7 +299,7 @@ module fsm (
sel_muxa = 3'b001; sel_muxa = 3'b001;
sel_muxb = 3'b100; sel_muxb = 3'b100;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S15; next_state = S15;
end end
S15: // iteration 1 S15: // iteration 1
begin begin
@ -300,7 +314,7 @@ module fsm (
sel_muxa = 3'b011; sel_muxa = 3'b011;
sel_muxb = 3'b011; sel_muxb = 3'b011;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S16; next_state = S16;
end end
S16: S16:
begin begin
@ -315,7 +329,7 @@ module fsm (
sel_muxa = 3'b000; sel_muxa = 3'b000;
sel_muxb = 3'b011; sel_muxb = 3'b011;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S17; next_state = S17;
end end
S17: S17:
begin begin
@ -330,7 +344,7 @@ module fsm (
sel_muxa = 3'b100; sel_muxa = 3'b100;
sel_muxb = 3'b010; sel_muxb = 3'b010;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S18; next_state = S18;
end end
S18: // iteration 2 S18: // iteration 2
begin begin
@ -345,7 +359,7 @@ module fsm (
sel_muxa = 3'b011; sel_muxa = 3'b011;
sel_muxb = 3'b011; sel_muxb = 3'b011;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S19; next_state = S19;
end end
S19: S19:
begin begin
@ -360,7 +374,7 @@ module fsm (
sel_muxa = 3'b000; sel_muxa = 3'b000;
sel_muxb = 3'b011; sel_muxb = 3'b011;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S20; next_state = S20;
end end
S20: S20:
begin begin
@ -375,7 +389,7 @@ module fsm (
sel_muxa = 3'b100; sel_muxa = 3'b100;
sel_muxb = 3'b010; sel_muxb = 3'b010;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S21; next_state = S21;
end end
S21: // iteration 3 S21: // iteration 3
begin begin
@ -390,7 +404,7 @@ module fsm (
sel_muxa = 3'b011; sel_muxa = 3'b011;
sel_muxb = 3'b011; sel_muxb = 3'b011;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S22; next_state = S22;
end end
S22: S22:
begin begin
@ -405,7 +419,7 @@ module fsm (
sel_muxa = 3'b000; sel_muxa = 3'b000;
sel_muxb = 3'b011; sel_muxb = 3'b011;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S23; next_state = S23;
end end
S23: S23:
begin begin
@ -420,7 +434,7 @@ module fsm (
sel_muxa = 3'b100; sel_muxa = 3'b100;
sel_muxb = 3'b010; sel_muxb = 3'b010;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S24; next_state = S24;
end end
S24: // q,qm,qp S24: // q,qm,qp
begin begin
@ -435,7 +449,7 @@ module fsm (
sel_muxa = 3'b000; sel_muxa = 3'b000;
sel_muxb = 3'b000; sel_muxb = 3'b000;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S25; next_state = S25;
end end
S25: // rem S25: // rem
begin begin
@ -450,7 +464,7 @@ module fsm (
sel_muxa = 3'b011; sel_muxa = 3'b011;
sel_muxb = 3'b110; sel_muxb = 3'b110;
sel_muxr = 1'b1; sel_muxr = 1'b1;
NEXT_STATE = S26; next_state = S26;
end end
S26: // done S26: // done
begin begin
@ -465,7 +479,7 @@ module fsm (
sel_muxa = 3'b000; sel_muxa = 3'b000;
sel_muxb = 3'b000; sel_muxb = 3'b000;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S0; next_state = S0;
end end
default: default:
begin begin
@ -480,9 +494,9 @@ module fsm (
sel_muxa = 3'b000; sel_muxa = 3'b000;
sel_muxb = 3'b000; sel_muxb = 3'b000;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S0; next_state = S0;
end end
endcase // case(CURRENT_STATE) endcase // case(current_state)
end // always @ (CURRENT_STATE or X) end // always @ (current_state or X)
endmodule // fsm endmodule // fsm

View File

@ -1,37 +1,55 @@
///////////////////////////////////////////
// //
// The rounder takes as inputs a 64-bit value to be rounded, A, the // Written: James Stine
// exponent of the value to be rounded, the sign of the final result, Sign, // Modified: 8/1/2018
// the precision of the results, P, and the two-bit rounding mode, rm.
// It produces a rounded 52-bit result, Z, the exponent of the rounded
// result, Z_exp, and a flag that indicates if the result was rounded,
// Inexact. The rounding mode has the following values.
// rm Mode
// 00 round-to-nearest-even
// 01 round-toward-zero
// 10 round-toward-plus infinity
// 11 round-toward-minus infinity
// //
// Purpose: Floating point divider/square root rounder unit (Goldschmidt)
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
module rounder_div ( module rounder_div (
input logic [1:0] rm, input logic [1:0] rm,
input logic P, input logic P,
input logic OvEn, input logic OvEn,
input logic UnEn, input logic UnEn,
input logic [12:0] exp_diff, input logic [12:0] exp_diff,
input logic [2:0] sel_inv, input logic [2:0] sel_inv,
input logic Invalid, input logic Invalid,
input logic SignR, input logic SignR,
input logic [63:0] Float1,
input logic [63:0] q1, input logic [63:0] Float2,
input logic [63:0] qm1, input logic XNaNQ,
input logic [63:0] qp1, input logic YNaNQ,
input logic [63:0] q0, input logic XZeroQ,
input logic [63:0] qm0, input logic YZeroQ,
input logic [63:0] qp0, input logic XInfQ,
input logic YInfQ,
input logic op_type,
input logic [63:0] q1,
input logic [63:0] qm1,
input logic [63:0] qp1,
input logic [63:0] q0,
input logic [63:0] qm0,
input logic [63:0] qp0,
input logic [127:0] regr_out, input logic [127:0] regr_out,
output logic [63:0] Result, output logic [63:0] Result,
output logic [4:0] Flags output logic [4:0] Flags
); );
logic Rsign; logic Rsign;
@ -56,12 +74,16 @@ module rounder_div (
logic Texp_l7z; logic Texp_l7z;
logic Texp_l7o; logic Texp_l7o;
logic OvCon; logic OvCon;
logic zero_rem; logic zero_rem;
logic [1:0] mux_mant; logic [1:0] mux_mant;
logic sign_rem; logic sign_rem;
logic [63:0] q, qm, qp; logic [63:0] q, qm, qp;
logic exp_ovf; logic exp_ovf;
logic [50:0] NaN_out;
logic NaN_Sign_out;
logic Sign_out;
// Remainder = 0? // Remainder = 0?
assign zero_rem = ~(|regr_out); assign zero_rem = ~(|regr_out);
// Remainder Sign // Remainder Sign
@ -117,12 +139,11 @@ module rounder_div (
// the input was infinite or NaN or the output of the adder is zero. // the input was infinite or NaN or the output of the adder is zero.
// 00 = Valid // 00 = Valid
// 10 = NaN // 10 = NaN
assign Valid = (~sel_inv[2]&~sel_inv[1]&~sel_inv[0]); assign Valid = ~sel_inv[2]&~sel_inv[1]&~sel_inv[0];
assign NaN = ~sel_inv[1]& sel_inv[0]; assign NaN = sel_inv[2]&sel_inv[1]&sel_inv[0];
assign UnderFlow = (P & UnFlow_SP | UnFlow_DP) & Valid; assign UnderFlow = (P & UnFlow_SP | UnFlow_DP) & Valid;
assign OverFlow = (P & OvFlow_SP | OvFlow_DP) & Valid; assign OverFlow = (P & OvFlow_SP | OvFlow_DP) & Valid;
assign Div0 = sel_inv[2]&sel_inv[1]&~sel_inv[0]; assign Div0 = YZeroQ&~XZeroQ&~op_type&~NaN;
// The final result is Inexact if any rounding occurred ((i.e., R or S // The final result is Inexact if any rounding occurred ((i.e., R or S
// is one), or (if the result overflows ) or (if the result underflows and the // is one), or (if the result overflows ) or (if the result underflows and the
@ -162,8 +183,16 @@ module rounder_div (
// If the result is NaN, the mantissa is 10...0 // If the result is NaN, the mantissa is 10...0
// If the result the largest floating point number, the mantissa // If the result the largest floating point number, the mantissa
// is all ones. Otherwise, the mantissa is not changed. // is all ones. Otherwise, the mantissa is not changed.
assign NaN_out = ~XNaNQ&YNaNQ ? Float2[50:0] : Float1[50:0];
assign NaN_Sign_out = ~XNaNQ&YNaNQ ? Float2[63] : Float1[63];
assign Sign_out = (XZeroQ&YZeroQ | XInfQ&YInfQ)&~op_type | Rsign&~XNaNQ&~YNaNQ |
NaN_Sign_out&(XNaNQ|YNaNQ);
// FIXME (jes) - Imperas gives sNaN a Sign=0 where x86 gives Sign=1
// | Float1[63]&op_type;
assign Rmant[51] = Largest | NaN | (Smant[51]&~Infinite&~Rzero); assign Rmant[51] = Largest | NaN | (Smant[51]&~Infinite&~Rzero);
assign Rmant[50:0] = {51{Largest}} | (Smant[50:0]&{51{~Infinite&Valid&~Rzero}}); assign Rmant[50:0] = ({51{Largest}} | (Smant[50:0]&{51{~Infinite&Valid&~Rzero}}) |
(NaN_out&{51{NaN}}))&({51{~(op_type&Float1[63]&~XZeroQ)}});
// For single precision, the 8 least significant bits of the exponent // For single precision, the 8 least significant bits of the exponent
// and 23 most significant bits of the mantissa contain bits used // and 23 most significant bits of the mantissa contain bits used
@ -171,8 +200,8 @@ module rounder_div (
// overflow has occurred, the overflow trap is enabled, and a conversion // overflow has occurred, the overflow trap is enabled, and a conversion
// is being performed. // is being performed.
assign OvCon = OverFlow & OvEn; assign OvCon = OverFlow & OvEn;
assign Result = (P&~OvCon) ? { {32{1'b1}}, Rsign, Rexp[7:0], Rmant[51:29]} assign Result = (P&~OvCon) ? { {32{1'b1}}, Sign_out, Rexp[7:0], Rmant[51:29]}
: {Rsign, Rexp, Rmant}; : {Sign_out, Rexp, Rmant};
endmodule // rounder endmodule // rounder

View File

@ -1,5 +1,30 @@
///////////////////////////////////////////
//
// Written: James Stine
// Modified: 8/1/2018
//
// Purpose: Bipartite Lookup
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
module sbtm_a0 (input logic [6:0] a, module sbtm_a0 (input logic [6:0] a,
output logic [12:0] y); output logic [12:0] y);
always_comb always_comb
case(a) case(a)
7'b0000000: y = 13'b1111111100010; 7'b0000000: y = 13'b1111111100010;

View File

@ -1,5 +1,30 @@
///////////////////////////////////////////
//
// Written: James Stine
// Modified: 8/1/2018
//
// Purpose: Bipartite Lookup
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
module sbtm_a1 (input logic [6:0] a, module sbtm_a1 (input logic [6:0] a,
output logic [4:0] y); output logic [4:0] y);
always_comb always_comb
case(a) case(a)
7'b0000000: y = 5'b11100; 7'b0000000: y = 5'b11100;

View File

@ -1,5 +1,30 @@
///////////////////////////////////////////
//
// Written: James Stine
// Modified: 8/1/2018
//
// Purpose: Bipartite Lookup
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
module sbtm_a2 (input logic [7:0] a, module sbtm_a2 (input logic [7:0] a,
output logic [13:0] y); output logic [13:0] y);
always_comb always_comb
case(a) case(a)
8'b01000000: y = 14'b10110100010111; 8'b01000000: y = 14'b10110100010111;

View File

@ -1,5 +1,30 @@
///////////////////////////////////////////
//
// Written: James Stine
// Modified: 8/1/2018
//
// Purpose: Bipartite Lookup
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
module sbtm_a3 (input logic [7:0] a, module sbtm_a3 (input logic [7:0] a,
output logic [5:0] y); output logic [5:0] y);
always_comb always_comb
case(a) case(a)
8'b01000000: y = 6'b100110; 8'b01000000: y = 6'b100110;

View File

@ -1,3 +1,27 @@
///////////////////////////////////////////
//
// Written: James Stine
// Modified: 8/1/2018
//
// Purpose: Bipartite Lookup for divide portion of fpdivsqrt
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
module sbtm_div (input logic [11:0] a, output logic [10:0] ia_out); module sbtm_div (input logic [11:0] a, output logic [10:0] ia_out);
// bit partitions // bit partitions

View File

@ -1,3 +1,27 @@
///////////////////////////////////////////
//
// Written: James Stine
// Modified: 8/1/2018
//
// Purpose: Bipartite Lookup for sqrt part of fpdivsqrt
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
module sbtm_sqrt (input logic [11:0] a, output logic [10:0] y); module sbtm_sqrt (input logic [11:0] a, output logic [10:0] y);
// bit partitions // bit partitions

View File

@ -0,0 +1,30 @@
#!/bin/sh
./testfloat_gen -rnear_even f32_add > f32_add_rne.tv
./testfloat_gen -rminMag f32_add > f32_add_rz.tv
./testfloat_gen -rmax f32_add > f32_add_ru.tv
./testfloat_gen -rmin f32_add > f32_add_rd.tv
./testfloat_gen -rnear_even f32_sub > f32_sub_rne.tv
./testfloat_gen -rminMag f32_sub > f32_sub_rz.tv
./testfloat_gen -rmax f32_sub > f32_sub_ru.tv
./testfloat_gen -rmin f32_sub > f32_sub_rd.tv
./testfloat_gen -rnear_even f32_mul > f32_mul_rne.tv
./testfloat_gen -rminMag f32_mul > f32_mul_rz.tv
./testfloat_gen -rmax f32_mul > f32_mul_ru.tv
./testfloat_gen -rmin f32_mul > f32_mul_rd.tv
./testfloat_gen -rnear_even f32_mulAdd > f32_fma_rne.tv
./testfloat_gen -rminMag f32_mulAdd > f32_fma_rz.tv
./testfloat_gen -rmax f32_mulAdd > f32_fma_ru.tv
./testfloat_gen -rmin f32_mulAdd > f32_fma_rd.tv
./testfloat_gen -rnear_even f32_div > f32_div_rne.tv
./testfloat_gen -rminMag f32_div > f32_div_rz.tv
./testfloat_gen -rmax f32_div > f32_div_ru.tv
./testfloat_gen -rmin f32_div > f32_div_rd.tv
./testfloat_gen -rnear_even f32_sqrt > f32_sqrt_rne.tv
./testfloat_gen -rminMag f32_sqrt > f32_sqrt_rz.tv
./testfloat_gen -rmax f32_sqrt > f32_sqrt_ru.tv
./testfloat_gen -rmin f32_sqrt > f32_sqrt_rd.tv

View File

@ -0,0 +1,30 @@
#!/bin/sh
./testfloat_gen -rnear_even f64_add > f64_add_rne.tv
./testfloat_gen -rminMag f64_add > f64_add_rz.tv
./testfloat_gen -rmax f64_add > f64_add_ru.tv
./testfloat_gen -rmin f64_add > f64_add_rd.tv
./testfloat_gen -rnear_even f64_sub > f64_sub_rne.tv
./testfloat_gen -rminMag f64_sub > f64_sub_rz.tv
./testfloat_gen -rmax f64_sub > f64_sub_ru.tv
./testfloat_gen -rmin f64_sub > f64_sub_rd.tv
./testfloat_gen -rnear_even f64_mul > f64_mul_rne.tv
./testfloat_gen -rminMag f64_mul > f64_mul_rz.tv
./testfloat_gen -rmax f64_mul > f64_mul_ru.tv
./testfloat_gen -rmin f64_mul > f64_mul_rd.tv
./testfloat_gen -rnear_even f64_mulAdd > f64_fma_rne.tv
./testfloat_gen -rminMag f64_mulAdd > f64_fma_rz.tv
./testfloat_gen -rmax f64_mulAdd > f64_fma_ru.tv
./testfloat_gen -rmin f64_mulAdd > f64_fma_rd.tv
./testfloat_gen -rnear_even f64_div > f64_div_rne.tv
./testfloat_gen -rminMag f64_div > f64_div_rz.tv
./testfloat_gen -rmax f64_div > f64_div_ru.tv
./testfloat_gen -rmin f64_div > f64_div_rd.tv
./testfloat_gen -rnear_even f64_sqrt > f64_sqrt_rne.tv
./testfloat_gen -rminMag f64_sqrt > f64_sqrt_rz.tv
./testfloat_gen -rmax f64_sqrt > f64_sqrt_ru.tv
./testfloat_gen -rmin f64_sqrt > f64_sqrt_rd.tv

View File

@ -38,7 +38,7 @@
module testbench(); module testbench();
parameter waveOnICount = `BUSYBEAR*140000 + `BUILDROOT*8700000; // # of instructions at which to turn on waves in graphical sim parameter waveOnICount = `BUSYBEAR*140000 + `BUILDROOT*3100000; // # of instructions at which to turn on waves in graphical sim
string ProgramAddrMapFile, ProgramLabelMapFile; string ProgramAddrMapFile, ProgramLabelMapFile;
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
@ -137,6 +137,7 @@ module testbench();
integer NumCSRWIndex; integer NumCSRWIndex;
integer NumCSRPostWIndex; integer NumCSRPostWIndex;
logic [`XLEN-1:0] InstrCountW; logic [`XLEN-1:0] InstrCountW;
integer RequestDelayedMIP;
// ------ // ------
// Macros // Macros
@ -246,9 +247,16 @@ module testbench();
MarkerIndex += 2; MarkerIndex += 2;
// match MIP to QEMU's because interrupts are imprecise // match MIP to QEMU's because interrupts are imprecise
if(ExpectedCSRArrayM[NumCSRM].substr(0, 2) == "mip") begin if(ExpectedCSRArrayM[NumCSRM].substr(0, 2) == "mip") begin
$display("%tns: Updating MIP to %x",$time,ExpectedCSRArrayValueM[NumCSRM]); $display("%tn: ExpectedCSRArrayM[7] (MEPC) = %x",$time,ExpectedCSRArrayM[7]);
MIPexpected = ExpectedCSRArrayValueM[NumCSRM]; $display("%tn: ExpectedPCM = %x",$time,ExpectedPCM);
force dut.hart.priv.csr.genblk1.csri.MIP_REGW = MIPexpected; // if PC does not equal MEPC, request delayed MIP is True
if(ExpectedPCM != ExpectedCSRArrayM[7]) begin
RequestDelayedMIP = 1;
end else begin
$display("%tns: Updating MIP to %x",$time,ExpectedCSRArrayValueM[NumCSRM]);
MIPexpected = ExpectedCSRArrayValueM[NumCSRM];
force dut.hart.priv.csr.genblk1.csri.MIP_REGW = MIPexpected;
end
end end
NumCSRM++; NumCSRM++;
end end
@ -326,6 +334,12 @@ module testbench();
// step2: make all checks in the write back stage. // step2: make all checks in the write back stage.
always @(negedge clk) begin always @(negedge clk) begin
if(RequestDelayedMIP) begin
$display("%tns: Updating MIP to %x",$time,ExpectedCSRArrayValueW[NumCSRM]);
MIPexpected = ExpectedCSRArrayValueW[NumCSRM];
force dut.hart.priv.csr.genblk1.csri.MIP_REGW = MIPexpected;
RequestDelayedMIP = 0;
end
// always check PC, instruction bits // always check PC, instruction bits
if (checkInstrW) begin if (checkInstrW) begin
InstrCountW += 1; InstrCountW += 1;