Make changes to fpdiv - still working on clock issue with fsm that was changed from posedge to negedge - also updated fpdivsqrt rounding to handle testfloat

This commit is contained in:
James E. Stine 2021-10-06 08:26:09 -05:00
parent 5bcae393c9
commit a91c0c8fc7
13 changed files with 698 additions and 576 deletions

View File

@ -1,9 +1,26 @@
// This module takes as inputs two operands (op1 and op2) ///////////////////////////////////////////
// the operation type (op_type) and the result precision (P). //
// Based on the operation and precision , it conditionally // Written: James Stine
// converts single precision values to double precision values // Modified: 8/1/2018
// and modifies the sign of op1. The converted operands are Float1 //
// and Float2. // Purpose: Floating point divider/square root top unit (Goldschmidt)
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
module convert_inputs( module convert_inputs(
input [63:0] op1, // 1st input operand (A) input [63:0] op1, // 1st input operand (A)

View File

@ -23,9 +23,10 @@ module exception_div (
logic BNaN; // '1' if B is a not-a-number logic BNaN; // '1' if B is a not-a-number
logic ASNaN; // '1' if A is a signalling not-a-number logic ASNaN; // '1' if A is a signalling not-a-number
logic BSNaN; // '1' if B is a signalling not-a-number logic BSNaN; // '1' if B is a signalling not-a-number
logic ZQNaN; // '1' if result Z is a quiet NaN logic ZSNaN; // '1' if result Z is a quiet NaN
logic ZInf; // '1' if result Z is an infnity logic ZInf; // '1' if result Z is an infnity
logic Zero; // '1' if result is zero logic Zero; // '1' if result is zero
logic NegSqrt; // '1' if sqrt and operand is negative
//***take this module out and add more registers or just recalculate it all //***take this module out and add more registers or just recalculate it all
// Determine if mantissas are all zeros // Determine if mantissas are all zeros
@ -48,32 +49,34 @@ module exception_div (
assign AZero = AzeroE & AzeroM; assign AZero = AzeroE & AzeroM;
assign BZero = BzeroE & BzeroE; assign BZero = BzeroE & BzeroE;
// Is NaN if operand is negative and its a sqrt
assign NegSqrt = (A[63] & op_type & ~AZero);
// An "Invalid Operation" exception occurs if (A or B is a signalling NaN) // An "Invalid Operation" exception occurs if (A or B is a signalling NaN)
// or (A and B are both Infinite) // or (A and B are both Infinite)
assign Invalid = ASNaN | BSNaN | (((AInf & BInf) | (AZero & BZero))&~op_type) | assign Invalid = ASNaN | BSNaN | (((AInf & BInf) | (AZero & BZero))&~op_type) |
(A[63] & op_type); NegSqrt;
// The result is a quiet NaN if (an "Invalid Operation" exception occurs) // The result is a quiet NaN if (an "Invalid Operation" exception occurs)
// or (A is a NaN) or (B is a NaN). // or (A is a NaN) or (B is a NaN).
assign ZQNaN = Invalid | ANaN | BNaN; assign ZSNaN = Invalid | ANaN | BNaN;
// The result is zero // The result is zero
assign Zero = (AZero | BInf)&~op_type | AZero&op_type; assign Zero = (AZero | BInf)&~op_type | AZero&op_type;
// The result is +Inf if ((A is Inf) or (B is 0)) and (the // The result is +Inf if ((A is Inf) or (B is 0)) and (the
// result is not a quiet NaN). // result is not a quiet NaN).
assign ZInf = (AInf | BZero)&~ZQNaN&~op_type | AInf&op_type&~ZQNaN; assign ZInf = (AInf | BZero)&~ZSNaN&~op_type | AInf&op_type&~ZSNaN;
// Set the type of the result as follows: // Set the type of the result as follows:
// Ztype Result // Ztype Result
// 000 Normal // 000 Normal
// 001 Quiet NaN
// 010 Infinity // 010 Infinity
// 011 Zero // 011 Zero
// 110 DivZero // 110 Div by 0
assign Ztype[0] = ZQNaN | Zero; // 111 SNaN
assign Ztype[1] = ZInf | Zero; assign Ztype[2] = (ZSNaN);
assign Ztype[2] = BZero&~op_type; assign Ztype[1] = (ZSNaN) | (Zero) | (ZInf);
assign Ztype[0] = (ZSNaN) | (Zero);
endmodule // exception endmodule // exception

View File

@ -1,44 +1,50 @@
///////////////////////////////////////////
// //
// File name : fpdiv // Written: James Stine
// Title : Floating-Point Divider/Square-Root // Modified: 8/1/2018
// project : FPU
// Library : fpdiv
// Author(s) : James E. Stine, Jr.
// Purpose : definition of main unit to floating-point div/sqrt
// notes :
// //
// Copyright Oklahoma State University // Purpose: Floating point divider/square root top unit (Goldschmidt)
// //
// Basic Operations // A component of the Wally configurable RISC-V project.
// //
// Step 1: Load operands, set flags, and convert SP to DP // Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
// Step 2: Check for special inputs ( +/- Infinity, NaN)
// Step 3: Exponent Logic
// Step 4: Divide/Sqrt using Goldschmidt
// Step 5: Normalize the result.//
// Shift left until normalized. Normalized when the value to the
// left of the binrary point is 1.
// Step 6: Round the result.//
// Step 7: Put quotient/remainder onto output.
// //
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
// `timescale 1ps/1ps // `timescale 1ps/1ps
module fpdiv ( module fpdiv (
input logic clk, input logic clk,
input logic reset, input logic reset,
input logic start, input logic start,
input logic [63:0] op1, // 1st input operand (A) input logic [63:0] op1,
input logic [63:0] op2, // 2nd input operand (B) input logic [63:0] op2,
input logic [1:0] rm, // Rounding mode - specify values input logic [1:0] rm,
input logic op_type, // Function opcode input logic op_type,
input logic P, // Result Precision (0 for double, 1 for single) input logic P,
input logic OvEn, // Overflow trap enabled input logic OvEn,
input logic UnEn, // Underflow trap enabled input logic UnEn,
input logic XNaNQ,
input logic YNaNQ,
input logic XZeroQ,
input logic YZeroQ,
input logic XInfQ,
input logic YInfQ,
output logic done, output logic done,
output logic FDivBusyE, output logic FDivBusyE,
output logic [63:0] AS_Result, // Result of operation output logic [63:0] AS_Result,
output logic [4:0] Flags); // IEEE exception flags output logic [4:0] Flags);
logic [63:0] Float1; logic [63:0] Float1;
logic [63:0] Float2; logic [63:0] Float2;
@ -46,10 +52,6 @@ module fpdiv (
logic [12:0] exp1, exp2, expF; logic [12:0] exp1, exp2, expF;
logic [12:0] exp_diff, bias; logic [12:0] exp_diff, bias;
logic [13:0] exp_sqrt; logic [13:0] exp_sqrt;
logic [12:0] exp_s;
logic [12:0] exp_c;
logic [10:0] exponent;
logic [63:0] Result; logic [63:0] Result;
logic [52:0] mantissaA; logic [52:0] mantissaA;
logic [52:0] mantissaB; logic [52:0] mantissaB;
@ -72,20 +74,12 @@ module fpdiv (
logic exp_cout1, exp_cout2; logic exp_cout1, exp_cout2;
logic exp_odd, open; logic exp_odd, open;
// div/sqrt // op_type : fdiv=0, fsqrt=1
// fdiv = 0
// fsqrt = 1
assign Float1 = op1; assign Float1 = op1;
assign Float2 = op_type ? op1 : op2; assign Float2 = op_type ? op1 : op2;
// Test for exceptions and return the "Invalid Operation" and // Exception detection
// "Denormalized" Input Flags. The "sel_inv" is used in exception_div exc1 (.A(Float1), .B(Float2), .op_type, .Ztype(sel_inv), .Invalid);
// the third pipeline stage to select the result. Also, op1_Norm
// and op2_Norm are one if op1 and op2 are not zero or denormalized.
// sub is one if the effective operation is subtaction.
exception_div exc1 (.A(Float1), .B(Float2), .op_type,
// output:
.Ztype(sel_inv), .Invalid);
// Determine Sign/Mantissa // Determine Sign/Mantissa
assign signResult = (Float1[63]^Float2[63]); assign signResult = (Float1[63]^Float2[63]);
@ -112,7 +106,6 @@ module fpdiv (
// FSM : control divider // FSM : control divider
fsm control (.clk, .reset, .start, .op_type, fsm control (.clk, .reset, .start, .op_type,
// outputs:
.done, .load_rega, .load_regb, .load_regc, .load_regd, .done, .load_rega, .load_regb, .load_regc, .load_regd,
.load_regr, .load_regs, .sel_muxa, .sel_muxb, .sel_muxr, .load_regr, .load_regs, .sel_muxa, .sel_muxb, .sel_muxr,
.divBusy(FDivBusyE)); .divBusy(FDivBusyE));
@ -122,8 +115,10 @@ module fpdiv (
// set the exception flags. // set the exception flags.
rounder_div round1 (.rm, .P, .OvEn, .UnEn, .exp_diff(expF), rounder_div round1 (.rm, .P, .OvEn, .UnEn, .exp_diff(expF),
.sel_inv, .Invalid, .SignR(signResult), .sel_inv, .Invalid, .SignR(signResult),
.Float1(op1), .Float2(op2),
.XNaNQ, .YNaNQ, .XZeroQ, .YZeroQ,
.XInfQ, .YInfQ, .op_type,
.q1, .qm1, .qp1, .q0, .qm0, .qp0, .regr_out, .q1, .qm1, .qp1, .q0, .qm0, .qp0, .regr_out,
// outputs:
.Result, .Flags(FlagsIn)); .Result, .Flags(FlagsIn));
// Store the final result and the exception flags in registers. // Store the final result and the exception flags in registers.

View File

@ -1,6 +1,6 @@
/////////////////////////////////////////// ///////////////////////////////////////////
// //
// Written: Katherine Parry, Bret Mathis // Written: Katherine Parry, James Stine, Brett Mathis
// Modified: 6/23/2021 // Modified: 6/23/2021
// //
// Purpose: FPU // Purpose: FPU
@ -99,35 +99,24 @@ module fpu (
logic XExpMaxE; // is the exponent all ones (max value) logic XExpMaxE; // is the exponent all ones (max value)
logic XNormE; // is normal logic XNormE; // is normal
// result and flag signals // result and flag signals
logic [63:0] FDivResM, FDivResW; // divide/squareroot result logic [63:0] FDivResM, FDivResW; // divide/squareroot result
logic [4:0] FDivFlgM, FDivFlgW; // divide/squareroot flags logic [4:0] FDivFlgM, FDivFlgW; // divide/squareroot flags
logic [63:0] FMAResM, FMAResW; // FMA/multiply result logic [63:0] FMAResM, FMAResW; // FMA/multiply result
logic [4:0] FMAFlgM, FMAFlgW; // FMA/multiply result logic [4:0] FMAFlgM, FMAFlgW; // FMA/multiply result
logic [63:0] ReadResW; // read result (load instruction) logic [63:0] ReadResW; // read result (load instruction)
logic [63:0] CvtFpResE, CvtFpResM, CvtFpResW; // add/FP -> FP convert result logic [63:0] CvtFpResE, CvtFpResM, CvtFpResW; // add/FP -> FP convert result
logic [4:0] CvtFpFlgE, CvtFpFlgM, CvtFpFlgW; // add/FP -> FP convert flags logic [4:0] CvtFpFlgE, CvtFpFlgM, CvtFpFlgW; // add/FP -> FP convert flags
logic [63:0] CvtResE, CvtResM; // FP <-> int convert result logic [63:0] CvtResE, CvtResM; // FP <-> int convert result
logic [4:0] CvtFlgE, CvtFlgM; // FP <-> int convert flags //*** trim this logic [4:0] CvtFlgE, CvtFlgM; // FP <-> int convert flags //*** trim this
logic [63:0] ClassResE, ClassResM; // classify result logic [63:0] ClassResE, ClassResM; // classify result
logic [63:0] CmpResE, CmpResM; // compare result logic [63:0] CmpResE, CmpResM; // compare result
logic CmpNVE, CmpNVM; // compare invalid flag (Not Valid) logic CmpNVE, CmpNVM; // compare invalid flag (Not Valid)
logic [63:0] SgnResE, SgnResM; // sign injection result logic [63:0] SgnResE, SgnResM; // sign injection result
logic SgnNVE, SgnNVM; // sign injection invalid flag (Not Valid) logic SgnNVE, SgnNVM; // sign injection invalid flag (Not Valid)
logic [63:0] FResE, FResM, FResW; // selected result that is ready in the memory stage logic [63:0] FResE, FResM, FResW; // selected result that is ready in the memory stage
logic [4:0] FFlgE, FFlgM; // selected flag that is ready in the memory stage logic [4:0] FFlgE, FFlgM; // selected flag that is ready in the memory stage
logic [`XLEN-1:0] FIntResE; logic [`XLEN-1:0] FIntResE;
logic [63:0] FPUResultW; // final FP result being written to the FP register logic [63:0] FPUResultW; // final FP result being written to the FP register
// other signals // other signals
@ -136,19 +125,9 @@ module fpu (
logic FDivClk; // clock for divide/squareroot unit logic FDivClk; // clock for divide/squareroot unit
logic [63:0] AlignedSrcAE; // align SrcA to the floating point format logic [63:0] AlignedSrcAE; // align SrcA to the floating point format
////////////////////////////////////////////////////////////////////////////////////////
// DECODE STAGE // DECODE STAGE
////////////////////////////////////////////////////////////////////////////////////////
// calculate FP control signals // calculate FP control signals
fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .FRM_REGW, fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .FRM_REGW,
// outputs:
.IllegalFPUInstrD, .FRegWriteD, .FDivStartD, .FResultSelD, .FOpCtrlD, .FResSelD, .IllegalFPUInstrD, .FRegWriteD, .FDivStartD, .FResultSelD, .FOpCtrlD, .FResSelD,
.FIntResSelD, .FmtD, .FrmD, .FWriteIntD); .FIntResSelD, .FmtD, .FrmD, .FWriteIntD);
@ -157,17 +136,9 @@ module fpu (
fregfile fregfile (.clk, .reset, .we4(FRegWriteW), fregfile fregfile (.clk, .reset, .we4(FRegWriteW),
.a1(InstrD[19:15]), .a2(InstrD[24:20]), .a3(InstrD[31:27]), .a4(RdW), .a1(InstrD[19:15]), .a2(InstrD[24:20]), .a3(InstrD[31:27]), .a4(RdW),
.wd4(FPUResultW), .wd4(FPUResultW),
// outputs:
.rd1(FRD1D), .rd2(FRD2D), .rd3(FRD3D)); .rd1(FRD1D), .rd2(FRD2D), .rd3(FRD3D));
////////////////////////////////////////////////////////////////////////////////////////
// D/E pipeline registers // D/E pipeline registers
////////////////////////////////////////////////////////////////////////////////////////
flopenrc #(64) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E); flopenrc #(64) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E);
flopenrc #(64) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E); flopenrc #(64) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E);
flopenrc #(64) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E); flopenrc #(64) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E);
@ -177,37 +148,27 @@ module fpu (
{FRegWriteD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD, FDivStartD}, {FRegWriteD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD, FDivStartD},
{FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE, FDivStartE}); {FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE, FDivStartE});
////////////////////////////////////////////////////////////////////////////////////////
// EXECUTION STAGE // EXECUTION STAGE
////////////////////////////////////////////////////////////////////////////////////////
// Hazard unit for FPU // Hazard unit for FPU
// - determines if any forwarding or stalls are needed // - determines if any forwarding or stalls are needed
fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FRegWriteM, .FRegWriteW, .RdM, .RdW, .FResultSelM, fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FRegWriteM, .FRegWriteW, .RdM, .RdW, .FResultSelM,
// outputs:
.FStallD, .FForwardXE, .FForwardYE, .FForwardZE); .FStallD, .FForwardXE, .FForwardYE, .FForwardZE);
// forwarding muxs // forwarding muxs
mux3 #(64) fxemux(FRD1E, FPUResultW, FResM, FForwardXE, FSrcXE); mux3 #(64) fxemux(FRD1E, FPUResultW, FResM, FForwardXE, FSrcXE);
mux3 #(64) fyemux(FRD2E, FPUResultW, FResM, FForwardYE, FPreSrcYE); mux3 #(64) fyemux(FRD2E, FPUResultW, FResM, FForwardYE, FPreSrcYE);
mux3 #(64) fzemux(FRD3E, FPUResultW, FResM, FForwardZE, FPreSrcZE); mux3 #(64) fzemux(FRD3E, FPUResultW, FResM, FForwardZE, FPreSrcZE);
mux3 #(64) fyaddmux(FPreSrcYE, {{32{1'b1}}, 2'b0, {7{1'b1}}, 23'b0}, {2'b0, {10{1'b1}}, 52'b0}, {FmtE&FOpCtrlE[2]&FOpCtrlE[1]&(FResultSelE==3'b01), ~FmtE&FOpCtrlE[2]&FOpCtrlE[1]&(FResultSelE==3'b01)}, FSrcYE); // Force Z to be 0 for multiply instructions mux3 #(64) fyaddmux(FPreSrcYE, {{32{1'b1}}, 2'b0, {7{1'b1}}, 23'b0},
mux3 #(64) fzmulmux(FPreSrcZE, 64'b0, FPreSrcYE, {FOpCtrlE[2]&FOpCtrlE[1], FOpCtrlE[2]&~FOpCtrlE[1]}, FSrcZE); // Force Z to be 0 for multiply instructions {2'b0, {10{1'b1}}, 52'b0},
{FmtE&FOpCtrlE[2]&FOpCtrlE[1]&(FResultSelE==3'b01), ~FmtE&FOpCtrlE[2]&FOpCtrlE[1]&(FResultSelE==3'b01)},
FSrcYE); // Force Z to be 0 for multiply instructions
// Force Z to be 0 for multiply instructions
mux3 #(64) fzmulmux(FPreSrcZE, 64'b0, FPreSrcYE, {FOpCtrlE[2]&FOpCtrlE[1], FOpCtrlE[2]&~FOpCtrlE[1]}, FSrcZE);
// unpacking unit // unpacking unit
// - splits FP inputs into their various parts // - splits FP inputs into their various parts
// - does some classifications (SNaN, NaN, Denorm, Norm, Zero, Infifnity) // - does some classifications (SNaN, NaN, Denorm, Norm, Zero, Infifnity)
unpacking unpacking(.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), .FOpCtrlE, .FmtE, unpacking unpacking(.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), .FOpCtrlE, .FmtE,
// outputs:
.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE, .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE,
.XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .YDenormE, .ZDenormE, .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .YDenormE, .ZDenormE,
.XZeroE, .YZeroE, .ZZeroE, .BiasE, .XInfE, .YInfE, .ZInfE, .XExpMaxE, .XNormE); .XZeroE, .YZeroE, .ZZeroE, .BiasE, .XInfE, .YInfE, .ZInfE, .XExpMaxE, .XNormE);
@ -217,8 +178,6 @@ module fpu (
// - execute stage - multiplication and addend shifting // - execute stage - multiplication and addend shifting
// - memory stage - addition and rounding // - memory stage - addition and rounding
// - handles FMA and multiply instructions // - handles FMA and multiply instructions
// - contains some E/M pipleine registers
// *** currently handles FLEN and 32 bits(dont know if 32 works with 128 - easy to fix) - change to handle only the supported formats
fma fma (.clk, .reset, .FlushM, .StallM, fma fma (.clk, .reset, .FlushM, .StallM,
.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE, .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE,
.XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .BiasE, .XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .BiasE,
@ -227,7 +186,6 @@ module fpu (
.XInfM, .YInfM, .ZInfM, .XSNaNM, .YSNaNM, .ZSNaNM, .XInfM, .YInfM, .ZInfM, .XSNaNM, .YSNaNM, .ZSNaNM,
.FOpCtrlE, .FOpCtrlE,
.FmtE, .FmtM, .FrmM, .FmtE, .FmtM, .FrmM,
// outputs:
.FMAFlgM, .FMAResM); .FMAFlgM, .FMAResM);
// clock gater // clock gater
@ -249,12 +207,15 @@ module fpu (
flopenrc #(64) reg_input2 (.d({YSgnE, YExpE, YManE[51:0]}), .q(DivInput2E), flopenrc #(64) reg_input2 (.d({YSgnE, YExpE, YManE[51:0]}), .q(DivInput2E),
.en(1'b1), .clear(FDivSqrtDoneE), .en(1'b1), .clear(FDivSqrtDoneE),
.reset(reset), .clk(FDivBusyE)); .reset(reset), .clk(FDivBusyE));
flopenrc #(6) reg_input3 (.d({XNaNE, YNaNE, XInfE, YInfE, XZeroE, YZeroE}),
.q({XNaNQ, YNaNQ, XInfQ, YInfQ, XZeroQ, YZeroQ}),
.en(1'b1), .clear(FDivSqrtDoneE),
.reset(reset), .clk(FDivBusyE));
// output for store instructions // fpdivsqrt using Goldschmidt's iteration
//*** change to use the unpacking unit if possible
fpdiv fdivsqrt (.op1(DivInput1E), .op2(DivInput2E), .rm(FrmE[1:0]), .op_type(FOpCtrlE[0]), fpdiv fdivsqrt (.op1(DivInput1E), .op2(DivInput2E), .rm(FrmE[1:0]), .op_type(FOpCtrlE[0]),
.reset, .clk(FDivClk), .start(FDivStartE), .P(~FmtE), .OvEn(1'b1), .UnEn(1'b1), .reset, .clk(FDivClk), .start(FDivStartE), .P(~FmtE), .OvEn(1'b1), .UnEn(1'b1),
// outputs: .XNaNQ, .YNaNQ, .XInfQ, .YInfQ, .XZeroQ, .YZeroQ,
.FDivBusyE, .done(FDivSqrtDoneE), .AS_Result(FDivResM), .Flags(FDivFlgM)); .FDivBusyE, .done(FDivSqrtDoneE), .AS_Result(FDivResM), .Flags(FDivFlgM));
// convert from signle to double and vice versa // convert from signle to double and vice versa
@ -267,13 +228,11 @@ module fpu (
fcmp fcmp (.op1({XSgnE,XExpE,XManE[`NF-1:0]}), .op2({YSgnE,YExpE,YManE[`NF-1:0]}), fcmp fcmp (.op1({XSgnE,XExpE,XManE[`NF-1:0]}), .op2({YSgnE,YExpE,YManE[`NF-1:0]}),
.FSrcXE, .FSrcYE, .FOpCtrlE, .FSrcXE, .FSrcYE, .FOpCtrlE,
.FmtE, .XNaNE, .YNaNE, .XZeroE, .YZeroE, .FmtE, .XNaNE, .YNaNE, .XZeroE, .YZeroE,
// outputs:
.Invalid(CmpNVE), .CmpResE); .Invalid(CmpNVE), .CmpResE);
// sign injection unit // sign injection unit
// - computation is done in one stage // - computation is done in one stage
fsgn fsgn (.SgnOpCodeE(FOpCtrlE[1:0]), .XSgnE, .YSgnE, .FSrcXE, .FmtE, .XExpMaxE, fsgn fsgn (.SgnOpCodeE(FOpCtrlE[1:0]), .XSgnE, .YSgnE, .FSrcXE, .FmtE, .XExpMaxE,
// outputs:
.SgnNVE, .SgnResE); .SgnNVE, .SgnResE);
// classify // classify
@ -281,11 +240,9 @@ module fpu (
// - most of the work is done in the unpacking unit // - most of the work is done in the unpacking unit
// - result is written to the integer register // - result is written to the integer register
fclassify fclassify (.XSgnE, .XDenormE, .XZeroE, .XNaNE, .XInfE, .XNormE, fclassify fclassify (.XSgnE, .XDenormE, .XZeroE, .XNaNE, .XInfE, .XNormE,
// outputs:
.XSNaNE, .ClassResE); .XSNaNE, .ClassResE);
fcvt fcvt (.XSgnE, .XExpE, .XManE, .XZeroE, .XNaNE, .XInfE, .XDenormE, .BiasE, .SrcAE, .FOpCtrlE, .FmtE, .FrmE, fcvt fcvt (.XSgnE, .XExpE, .XManE, .XZeroE, .XNaNE, .XInfE, .XDenormE, .BiasE, .SrcAE, .FOpCtrlE, .FmtE, .FrmE,
// outputs:
.CvtResE, .CvtFlgE); .CvtResE, .CvtFlgE);
// data to be stored in memory - to IEU // data to be stored in memory - to IEU
@ -293,7 +250,6 @@ module fpu (
// - if there are any unsused bits the most significant bits are filled with 1s // - if there are any unsused bits the most significant bits are filled with 1s
assign FWriteDataE = FSrcYE[`XLEN-1:0]; assign FWriteDataE = FSrcYE[`XLEN-1:0];
// Align SrcA to MSB when single precicion // Align SrcA to MSB when single precicion
mux2 #(64) SrcAMux({{32{1'b1}}, SrcAE[31:0]}, {{64-`XLEN{1'b1}}, SrcAE}, FmtE, AlignedSrcAE); mux2 #(64) SrcAMux({{32{1'b1}}, SrcAE[31:0]}, {{64-`XLEN{1'b1}}, SrcAE}, FmtE, AlignedSrcAE);
@ -302,14 +258,10 @@ module fpu (
mux5 #(5) FFlgMux(5'b0, {4'b0, SgnNVE}, {4'b0, CmpNVE}, CvtFlgE, CvtFpFlgE, FResSelE, FFlgE); mux5 #(5) FFlgMux(5'b0, {4'b0, SgnNVE}, {4'b0, CmpNVE}, CvtFlgE, CvtFpFlgE, FResSelE, FFlgE);
// select the result that may be written to the integer register - to IEU // select the result that may be written to the integer register - to IEU
mux4 #(`XLEN) IntResMux(CmpResE[`XLEN-1:0], FSrcXE[`XLEN-1:0], ClassResE[`XLEN-1:0], CvtResE[`XLEN-1:0], FIntResSelE, FIntResE); mux4 #(`XLEN) IntResMux(CmpResE[`XLEN-1:0], FSrcXE[`XLEN-1:0], ClassResE[`XLEN-1:0],
CvtResE[`XLEN-1:0], FIntResSelE, FIntResE);
//***will synth remove registers of values that are always zero?
////////////////////////////////////////////////////////////////////////////////////////
// E/M pipe registers // E/M pipe registers
////////////////////////////////////////////////////////////////////////////////////////
// flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, FSrcXE, FSrcXM); // flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, FSrcXE, FSrcXM);
flopenrc #(65) EMFpReg2(clk, reset, FlushM, ~StallM, {XSgnE,XExpE,XManE}, {XSgnM,XExpM,XManM}); flopenrc #(65) EMFpReg2(clk, reset, FlushM, ~StallM, {XSgnE,XExpE,XManE}, {XSgnM,XExpM,XManM});
@ -318,45 +270,18 @@ module fpu (
flopenrc #(12) EMFpReg5(clk, reset, FlushM, ~StallM, flopenrc #(12) EMFpReg5(clk, reset, FlushM, ~StallM,
{XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE}, {XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE},
{XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM}); {XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM});
flopenrc #(64) EMRegCmpRes(clk, reset, FlushM, ~StallM, FResE, FResM); flopenrc #(64) EMRegCmpRes(clk, reset, FlushM, ~StallM, FResE, FResM);
flopenrc #(5) EMRegCmpFlg(clk, reset, FlushM, ~StallM, FFlgE, FFlgM); flopenrc #(5) EMRegCmpFlg(clk, reset, FlushM, ~StallM, FFlgE, FFlgM);
flopenrc #(`XLEN) EMRegSgnRes(clk, reset, FlushM, ~StallM, FIntResE, FIntResM); flopenrc #(`XLEN) EMRegSgnRes(clk, reset, FlushM, ~StallM, FIntResE, FIntResM);
// flopenrc #(1) EMRegSgnFlg(clk, reset, FlushM, ~StallM, SgnNVE, SgnNVM);
//flopenrc #(64) EMRegCvtFpRes(clk, reset, FlushM, ~StallM, CvtFpResE, CvtFpResM);
//flopenrc #(5) EMRegCvtFpFlg(clk, reset, FlushM, ~StallM, CvtFpFlgE, CvtFpFlgM);
// flopenrc #(64) EMRegCvtRes(clk, reset, FlushM, ~StallM, CvtResE, CvtResM);
// flopenrc #(5) EMRegCvtFlg(clk, reset, FlushM, ~StallM, CvtFlgE, CvtFlgM);
// flopenrc #(64) EMRegClass(clk, reset, FlushM, ~StallM, ClassResE, ClassResM);
flopenrc #(11) EMCtrlReg(clk, reset, FlushM, ~StallM, flopenrc #(11) EMCtrlReg(clk, reset, FlushM, ~StallM,
{FRegWriteE, FResultSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE}, {FRegWriteE, FResultSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE},
{FRegWriteM, FResultSelM, FrmM, FmtM, FOpCtrlM, FWriteIntM}); {FRegWriteM, FResultSelM, FrmM, FmtM, FOpCtrlM, FWriteIntM});
////////////////////////////////////////////////////////////////////////////////////////
// BEGIN MEMORY STAGE // BEGIN MEMORY STAGE
////////////////////////////////////////////////////////////////////////////////////////
// FPU flag selection - to privileged // FPU flag selection - to privileged
mux4 #(5) FPUFlgMux(5'b0, FMAFlgM, FDivFlgM, FFlgM, FResultSelW, SetFflagsM); mux4 #(5) FPUFlgMux(5'b0, FMAFlgM, FDivFlgM, FFlgM, FResultSelW, SetFflagsM);
////////////////////////////////////////////////////////////////////////////////////////
// M/W pipe registers // M/W pipe registers
////////////////////////////////////////////////////////////////////////////////////////
flopenrc #(64) MWRegFma(clk, reset, FlushW, ~StallW, FMAResM, FMAResW); flopenrc #(64) MWRegFma(clk, reset, FlushW, ~StallW, FMAResM, FMAResW);
flopenrc #(64) MWRegDiv(clk, reset, FlushW, ~StallW, FDivResM, FDivResW); flopenrc #(64) MWRegDiv(clk, reset, FlushW, ~StallW, FDivResM, FDivResW);
flopenrc #(64) MWRegAdd(clk, reset, FlushW, ~StallW, CvtFpResM, CvtFpResW); flopenrc #(64) MWRegAdd(clk, reset, FlushW, ~StallW, CvtFpResM, CvtFpResW);
@ -365,12 +290,7 @@ module fpu (
{FRegWriteM, FResultSelM, FmtM, FWriteIntM}, {FRegWriteM, FResultSelM, FmtM, FWriteIntM},
{FRegWriteW, FResultSelW, FmtW, FWriteIntW}); {FRegWriteW, FResultSelW, FmtW, FWriteIntW});
////////////////////////////////////////////////////////////////////////////////////////
// BEGIN WRITEBACK STAGE // BEGIN WRITEBACK STAGE
////////////////////////////////////////////////////////////////////////////////////////
// put ReadData into NaN-blocking format // put ReadData into NaN-blocking format
// - if there are any unsused bits the most significant bits are filled with 1s // - if there are any unsused bits the most significant bits are filled with 1s
@ -380,7 +300,6 @@ module fpu (
// select the result to be written to the FP register // select the result to be written to the FP register
mux4 #(64) FPUResultMux(ReadResW, FMAResW, FDivResW, FResW, FResultSelW, FPUResultW); mux4 #(64) FPUResultMux(ReadResW, FMAResW, FDivResW, FResW, FResultSelW, FPUResultW);
end else begin // no F_SUPPORTED or D_SUPPORTED; tie outputs low end else begin // no F_SUPPORTED or D_SUPPORTED; tie outputs low
assign FStallD = 0; assign FStallD = 0;
assign FWriteIntE = 0; assign FWriteIntE = 0;

View File

@ -1,10 +1,9 @@
/////////////////////////////////////////// ///////////////////////////////////////////
// regfile.sv
// //
// Written: David_Harris@hmc.edu 9 January 2021 // Written: David_Harris@hmc.edu 9 January 2021
// Modified: // Modified: James Stine
// //
// Purpose: 4-port register file // Purpose: 3-port output register file
// //
// A component of the Wally configurable RISC-V project. // A component of the Wally configurable RISC-V project.
// //
@ -40,8 +39,6 @@ module fregfile (
// write fourth port on rising edge of clock (A4/WD4/WE4) // write fourth port on rising edge of clock (A4/WD4/WE4)
// write occurs on falling edge of clock // write occurs on falling edge of clock
// reset is intended for simulation only, not synthesis
always_ff @(negedge clk or posedge reset) always_ff @(negedge clk or posedge reset)
if (reset) for(i=0; i<32; i++) rf[i] <= 0; if (reset) for(i=0; i<32; i++) rf[i] <= 0;
else if (we4) rf[a4] <= wd4; else if (we4) rf[a4] <= wd4;

View File

@ -1,49 +1,63 @@
module fsm ( ///////////////////////////////////////////
//
// Written: James Stine
// Modified: 9/28/2021
//
// Purpose: FSM for floating point divider/square root unit (Goldschmidt)
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
module fsm (
input logic clk, input logic clk,
input logic reset, input logic reset,
input logic start, input logic start,
input logic op_type, input logic op_type,
output logic done, // End of cycles output logic done,
output logic load_rega, // enable for regA output logic load_rega,
output logic load_regb, // enable for regB output logic load_regb,
output logic load_regc, // enable for regC output logic load_regc,
output logic load_regd, // enable for regD output logic load_regd,
output logic load_regr, // enable for rem output logic load_regr,
output logic load_regs, // enable for q,qm,qp output logic load_regs,
output logic [2:0] sel_muxa, // Select muxA output logic [2:0] sel_muxa,
output logic [2:0] sel_muxb, // Select muxB output logic [2:0] sel_muxb,
output logic sel_muxr, // Select rem mux output logic sel_muxr,
output logic divBusy // calculation is happening output logic divBusy
); );
typedef enum logic [4:0] {S0, S1, S2, S3, S4, S5, S6, S7, S8, S9,
S10, S11, S12, S13, S14, S15, S16, S17, S18, S19,
S20, S21, S22, S23, S24, S25, S26, S27, S28, S29,
S30} statetype;
reg [4:0] CURRENT_STATE; statetype current_state, next_state;
reg [4:0] NEXT_STATE;
parameter [4:0]
S0=5'd0, S1=5'd1, S2=5'd2,
S3=5'd3, S4=5'd4, S5=5'd5,
S6=5'd6, S7=5'd7, S8=5'd8,
S9=5'd9, S10=5'd10,
S13=5'd13, S14=5'd14, S15=5'd15,
S16=5'd16, S17=5'd17, S18=5'd18,
S19=5'd19, S20=5'd20, S21=5'd21,
S22=5'd22, S23=5'd23, S24=5'd24,
S25=5'd25, S26=5'd26, S27=5'd27,
S28=5'd28, S29=5'd29, S30=5'd30;
always @(negedge clk) always @(negedge clk)
begin begin
if (reset == 1'b1) if (reset == 1'b1)
CURRENT_STATE=S0; current_state = S0;
else else
CURRENT_STATE=NEXT_STATE; current_state = next_state;
end end
always @(*) always @(*)
begin begin
case(CURRENT_STATE) case(current_state)
S0: // iteration 0 S0: // iteration 0
begin begin
if (start==1'b0) if (start==1'b0)
@ -59,7 +73,7 @@ module fsm (
sel_muxa = 3'b000; sel_muxa = 3'b000;
sel_muxb = 3'b000; sel_muxb = 3'b000;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S0; next_state = S0;
end end
else if (start==1'b1 && op_type==1'b0) else if (start==1'b1 && op_type==1'b0)
begin begin
@ -74,7 +88,7 @@ module fsm (
sel_muxa = 3'b001; sel_muxa = 3'b001;
sel_muxb = 3'b001; sel_muxb = 3'b001;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S1; next_state = S1;
end // if (start==1'b1 && op_type==1'b0) end // if (start==1'b1 && op_type==1'b0)
else if (start==1'b1 && op_type==1'b1) else if (start==1'b1 && op_type==1'b1)
begin begin
@ -89,7 +103,7 @@ module fsm (
sel_muxa = 3'b010; sel_muxa = 3'b010;
sel_muxb = 3'b000; sel_muxb = 3'b000;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S13; next_state = S13;
end end
else else
begin begin
@ -104,7 +118,7 @@ module fsm (
sel_muxa = 3'b000; sel_muxa = 3'b000;
sel_muxb = 3'b000; sel_muxb = 3'b000;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S0; next_state = S0;
end end
end // case: S0 end // case: S0
S1: S1:
@ -120,7 +134,7 @@ module fsm (
sel_muxa = 3'b010; sel_muxa = 3'b010;
sel_muxb = 3'b000; sel_muxb = 3'b000;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S2; next_state = S2;
end end
S2: // iteration 1 S2: // iteration 1
begin begin
@ -135,7 +149,7 @@ module fsm (
sel_muxa = 3'b011; sel_muxa = 3'b011;
sel_muxb = 3'b011; sel_muxb = 3'b011;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S3; next_state = S3;
end end
S3: S3:
begin begin
@ -150,7 +164,7 @@ module fsm (
sel_muxa = 3'b000; sel_muxa = 3'b000;
sel_muxb = 3'b010; sel_muxb = 3'b010;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S4; next_state = S4;
end end
S4: // iteration 2 S4: // iteration 2
begin begin
@ -165,7 +179,7 @@ module fsm (
sel_muxa = 3'b011; sel_muxa = 3'b011;
sel_muxb = 3'b011; sel_muxb = 3'b011;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S5; next_state = S5;
end end
S5: S5:
begin begin
@ -180,7 +194,7 @@ module fsm (
sel_muxa = 3'b000; sel_muxa = 3'b000;
sel_muxb = 3'b010; sel_muxb = 3'b010;
sel_muxr = 1'b0; // add sel_muxr = 1'b0; // add
NEXT_STATE = S6; next_state = S6;
end end
S6: // iteration 3 S6: // iteration 3
begin begin
@ -195,7 +209,7 @@ module fsm (
sel_muxa = 3'b011; sel_muxa = 3'b011;
sel_muxb = 3'b011; sel_muxb = 3'b011;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S8; next_state = S8;
end end
S7: S7:
begin begin
@ -210,7 +224,7 @@ module fsm (
sel_muxa = 3'b000; sel_muxa = 3'b000;
sel_muxb = 3'b010; sel_muxb = 3'b010;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S8; next_state = S8;
end // case: S7 end // case: S7
S8: // q,qm,qp S8: // q,qm,qp
begin begin
@ -225,7 +239,7 @@ module fsm (
sel_muxa = 3'b000; sel_muxa = 3'b000;
sel_muxb = 3'b000; sel_muxb = 3'b000;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S9; next_state = S9;
end end
S9: // rem S9: // rem
begin begin
@ -240,7 +254,7 @@ module fsm (
sel_muxa = 3'b000; sel_muxa = 3'b000;
sel_muxb = 3'b000; sel_muxb = 3'b000;
sel_muxr = 1'b1; sel_muxr = 1'b1;
NEXT_STATE = S10; next_state = S10;
end end
S10: // done S10: // done
begin begin
@ -255,7 +269,7 @@ module fsm (
sel_muxa = 3'b000; sel_muxa = 3'b000;
sel_muxb = 3'b000; sel_muxb = 3'b000;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S0; next_state = S0;
end end
S13: // start of sqrt path S13: // start of sqrt path
begin begin
@ -270,7 +284,7 @@ module fsm (
sel_muxa = 3'b010; sel_muxa = 3'b010;
sel_muxb = 3'b001; sel_muxb = 3'b001;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S14; next_state = S14;
end end
S14: S14:
begin begin
@ -285,7 +299,7 @@ module fsm (
sel_muxa = 3'b001; sel_muxa = 3'b001;
sel_muxb = 3'b100; sel_muxb = 3'b100;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S15; next_state = S15;
end end
S15: // iteration 1 S15: // iteration 1
begin begin
@ -300,7 +314,7 @@ module fsm (
sel_muxa = 3'b011; sel_muxa = 3'b011;
sel_muxb = 3'b011; sel_muxb = 3'b011;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S16; next_state = S16;
end end
S16: S16:
begin begin
@ -315,7 +329,7 @@ module fsm (
sel_muxa = 3'b000; sel_muxa = 3'b000;
sel_muxb = 3'b011; sel_muxb = 3'b011;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S17; next_state = S17;
end end
S17: S17:
begin begin
@ -330,7 +344,7 @@ module fsm (
sel_muxa = 3'b100; sel_muxa = 3'b100;
sel_muxb = 3'b010; sel_muxb = 3'b010;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S18; next_state = S18;
end end
S18: // iteration 2 S18: // iteration 2
begin begin
@ -345,7 +359,7 @@ module fsm (
sel_muxa = 3'b011; sel_muxa = 3'b011;
sel_muxb = 3'b011; sel_muxb = 3'b011;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S19; next_state = S19;
end end
S19: S19:
begin begin
@ -360,7 +374,7 @@ module fsm (
sel_muxa = 3'b000; sel_muxa = 3'b000;
sel_muxb = 3'b011; sel_muxb = 3'b011;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S20; next_state = S20;
end end
S20: S20:
begin begin
@ -375,7 +389,7 @@ module fsm (
sel_muxa = 3'b100; sel_muxa = 3'b100;
sel_muxb = 3'b010; sel_muxb = 3'b010;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S21; next_state = S21;
end end
S21: // iteration 3 S21: // iteration 3
begin begin
@ -390,7 +404,7 @@ module fsm (
sel_muxa = 3'b011; sel_muxa = 3'b011;
sel_muxb = 3'b011; sel_muxb = 3'b011;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S22; next_state = S22;
end end
S22: S22:
begin begin
@ -405,7 +419,7 @@ module fsm (
sel_muxa = 3'b000; sel_muxa = 3'b000;
sel_muxb = 3'b011; sel_muxb = 3'b011;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S23; next_state = S23;
end end
S23: S23:
begin begin
@ -420,7 +434,7 @@ module fsm (
sel_muxa = 3'b100; sel_muxa = 3'b100;
sel_muxb = 3'b010; sel_muxb = 3'b010;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S24; next_state = S24;
end end
S24: // q,qm,qp S24: // q,qm,qp
begin begin
@ -435,7 +449,7 @@ module fsm (
sel_muxa = 3'b000; sel_muxa = 3'b000;
sel_muxb = 3'b000; sel_muxb = 3'b000;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S25; next_state = S25;
end end
S25: // rem S25: // rem
begin begin
@ -450,7 +464,7 @@ module fsm (
sel_muxa = 3'b011; sel_muxa = 3'b011;
sel_muxb = 3'b110; sel_muxb = 3'b110;
sel_muxr = 1'b1; sel_muxr = 1'b1;
NEXT_STATE = S26; next_state = S26;
end end
S26: // done S26: // done
begin begin
@ -465,7 +479,7 @@ module fsm (
sel_muxa = 3'b000; sel_muxa = 3'b000;
sel_muxb = 3'b000; sel_muxb = 3'b000;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S0; next_state = S0;
end end
default: default:
begin begin
@ -480,9 +494,9 @@ module fsm (
sel_muxa = 3'b000; sel_muxa = 3'b000;
sel_muxb = 3'b000; sel_muxb = 3'b000;
sel_muxr = 1'b0; sel_muxr = 1'b0;
NEXT_STATE = S0; next_state = S0;
end end
endcase // case(CURRENT_STATE) endcase // case(current_state)
end // always @ (CURRENT_STATE or X) end // always @ (current_state or X)
endmodule // fsm endmodule // fsm

View File

@ -1,16 +1,26 @@
///////////////////////////////////////////
// //
// The rounder takes as inputs a 64-bit value to be rounded, A, the // Written: James Stine
// exponent of the value to be rounded, the sign of the final result, Sign, // Modified: 8/1/2018
// the precision of the results, P, and the two-bit rounding mode, rm.
// It produces a rounded 52-bit result, Z, the exponent of the rounded
// result, Z_exp, and a flag that indicates if the result was rounded,
// Inexact. The rounding mode has the following values.
// rm Mode
// 00 round-to-nearest-even
// 01 round-toward-zero
// 10 round-toward-plus infinity
// 11 round-toward-minus infinity
// //
// Purpose: Floating point divider/square root rounder unit (Goldschmidt)
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
module rounder_div ( module rounder_div (
input logic [1:0] rm, input logic [1:0] rm,
@ -21,7 +31,15 @@ module rounder_div (
input logic [2:0] sel_inv, input logic [2:0] sel_inv,
input logic Invalid, input logic Invalid,
input logic SignR, input logic SignR,
input logic [63:0] Float1,
input logic [63:0] Float2,
input logic XNaNQ,
input logic YNaNQ,
input logic XZeroQ,
input logic YZeroQ,
input logic XInfQ,
input logic YInfQ,
input logic op_type,
input logic [63:0] q1, input logic [63:0] q1,
input logic [63:0] qm1, input logic [63:0] qm1,
input logic [63:0] qp1, input logic [63:0] qp1,
@ -62,6 +80,10 @@ module rounder_div (
logic [63:0] q, qm, qp; logic [63:0] q, qm, qp;
logic exp_ovf; logic exp_ovf;
logic [50:0] NaN_out;
logic NaN_Sign_out;
logic Sign_out;
// Remainder = 0? // Remainder = 0?
assign zero_rem = ~(|regr_out); assign zero_rem = ~(|regr_out);
// Remainder Sign // Remainder Sign
@ -117,12 +139,11 @@ module rounder_div (
// the input was infinite or NaN or the output of the adder is zero. // the input was infinite or NaN or the output of the adder is zero.
// 00 = Valid // 00 = Valid
// 10 = NaN // 10 = NaN
assign Valid = (~sel_inv[2]&~sel_inv[1]&~sel_inv[0]); assign Valid = ~sel_inv[2]&~sel_inv[1]&~sel_inv[0];
assign NaN = ~sel_inv[1]& sel_inv[0]; assign NaN = sel_inv[2]&sel_inv[1]&sel_inv[0];
assign UnderFlow = (P & UnFlow_SP | UnFlow_DP) & Valid; assign UnderFlow = (P & UnFlow_SP | UnFlow_DP) & Valid;
assign OverFlow = (P & OvFlow_SP | OvFlow_DP) & Valid; assign OverFlow = (P & OvFlow_SP | OvFlow_DP) & Valid;
assign Div0 = sel_inv[2]&sel_inv[1]&~sel_inv[0]; assign Div0 = YZeroQ&~XZeroQ&~op_type&~NaN;
// The final result is Inexact if any rounding occurred ((i.e., R or S // The final result is Inexact if any rounding occurred ((i.e., R or S
// is one), or (if the result overflows ) or (if the result underflows and the // is one), or (if the result overflows ) or (if the result underflows and the
@ -162,8 +183,16 @@ module rounder_div (
// If the result is NaN, the mantissa is 10...0 // If the result is NaN, the mantissa is 10...0
// If the result the largest floating point number, the mantissa // If the result the largest floating point number, the mantissa
// is all ones. Otherwise, the mantissa is not changed. // is all ones. Otherwise, the mantissa is not changed.
assign NaN_out = ~XNaNQ&YNaNQ ? Float2[50:0] : Float1[50:0];
assign NaN_Sign_out = ~XNaNQ&YNaNQ ? Float2[63] : Float1[63];
assign Sign_out = (XZeroQ&YZeroQ | XInfQ&YInfQ)&~op_type | Rsign&~XNaNQ&~YNaNQ |
NaN_Sign_out&(XNaNQ|YNaNQ);
// FIXME (jes) - Imperas gives sNaN a Sign=0 where x86 gives Sign=1
// | Float1[63]&op_type;
assign Rmant[51] = Largest | NaN | (Smant[51]&~Infinite&~Rzero); assign Rmant[51] = Largest | NaN | (Smant[51]&~Infinite&~Rzero);
assign Rmant[50:0] = {51{Largest}} | (Smant[50:0]&{51{~Infinite&Valid&~Rzero}}); assign Rmant[50:0] = ({51{Largest}} | (Smant[50:0]&{51{~Infinite&Valid&~Rzero}}) |
(NaN_out&{51{NaN}}))&({51{~(op_type&Float1[63]&~XZeroQ)}});
// For single precision, the 8 least significant bits of the exponent // For single precision, the 8 least significant bits of the exponent
// and 23 most significant bits of the mantissa contain bits used // and 23 most significant bits of the mantissa contain bits used
@ -171,8 +200,8 @@ module rounder_div (
// overflow has occurred, the overflow trap is enabled, and a conversion // overflow has occurred, the overflow trap is enabled, and a conversion
// is being performed. // is being performed.
assign OvCon = OverFlow & OvEn; assign OvCon = OverFlow & OvEn;
assign Result = (P&~OvCon) ? { {32{1'b1}}, Rsign, Rexp[7:0], Rmant[51:29]} assign Result = (P&~OvCon) ? { {32{1'b1}}, Sign_out, Rexp[7:0], Rmant[51:29]}
: {Rsign, Rexp, Rmant}; : {Sign_out, Rexp, Rmant};
endmodule // rounder endmodule // rounder

View File

@ -1,5 +1,30 @@
///////////////////////////////////////////
//
// Written: James Stine
// Modified: 8/1/2018
//
// Purpose: Bipartite Lookup
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
module sbtm_a0 (input logic [6:0] a, module sbtm_a0 (input logic [6:0] a,
output logic [12:0] y); output logic [12:0] y);
always_comb always_comb
case(a) case(a)
7'b0000000: y = 13'b1111111100010; 7'b0000000: y = 13'b1111111100010;

View File

@ -1,5 +1,30 @@
///////////////////////////////////////////
//
// Written: James Stine
// Modified: 8/1/2018
//
// Purpose: Bipartite Lookup
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
module sbtm_a1 (input logic [6:0] a, module sbtm_a1 (input logic [6:0] a,
output logic [4:0] y); output logic [4:0] y);
always_comb always_comb
case(a) case(a)
7'b0000000: y = 5'b11100; 7'b0000000: y = 5'b11100;

View File

@ -1,5 +1,30 @@
///////////////////////////////////////////
//
// Written: James Stine
// Modified: 8/1/2018
//
// Purpose: Bipartite Lookup
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
module sbtm_a2 (input logic [7:0] a, module sbtm_a2 (input logic [7:0] a,
output logic [13:0] y); output logic [13:0] y);
always_comb always_comb
case(a) case(a)
8'b01000000: y = 14'b10110100010111; 8'b01000000: y = 14'b10110100010111;

View File

@ -1,5 +1,30 @@
///////////////////////////////////////////
//
// Written: James Stine
// Modified: 8/1/2018
//
// Purpose: Bipartite Lookup
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
module sbtm_a3 (input logic [7:0] a, module sbtm_a3 (input logic [7:0] a,
output logic [5:0] y); output logic [5:0] y);
always_comb always_comb
case(a) case(a)
8'b01000000: y = 6'b100110; 8'b01000000: y = 6'b100110;

View File

@ -1,3 +1,27 @@
///////////////////////////////////////////
//
// Written: James Stine
// Modified: 8/1/2018
//
// Purpose: Bipartite Lookup for divide portion of fpdivsqrt
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
module sbtm_div (input logic [11:0] a, output logic [10:0] ia_out); module sbtm_div (input logic [11:0] a, output logic [10:0] ia_out);
// bit partitions // bit partitions

View File

@ -1,3 +1,27 @@
///////////////////////////////////////////
//
// Written: James Stine
// Modified: 8/1/2018
//
// Purpose: Bipartite Lookup for sqrt part of fpdivsqrt
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
module sbtm_sqrt (input logic [11:0] a, output logic [10:0] y); module sbtm_sqrt (input logic [11:0] a, output logic [10:0] y);
// bit partitions // bit partitions