From 6216bd717242f8a946003603f620e4e31665206c Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Mon, 28 Jun 2021 18:53:58 -0400 Subject: [PATCH] FPU control signals changed and FMA works --- wally-pipelined/src/fpu/FMA/tbgen/tb.sv | 5 +- wally-pipelined/src/fpu/FMA/tbgen/test_gen.sh | 2 +- wally-pipelined/src/fpu/fctrl.sv | 267 ++++----- wally-pipelined/src/fpu/fma1.sv | 281 +++++----- wally-pipelined/src/fpu/fma2.sv | 516 +++++++++--------- wally-pipelined/src/fpu/fpu.sv | 149 ++--- wally-pipelined/src/fpu/fpuhazard.sv | 6 +- wally-pipelined/src/ieu/datapath.sv | 8 +- wally-pipelined/src/ieu/ieu.sv | 3 +- .../src/wally/wallypipelinedhart.sv | 23 +- 10 files changed, 571 insertions(+), 689 deletions(-) diff --git a/wally-pipelined/src/fpu/FMA/tbgen/tb.sv b/wally-pipelined/src/fpu/FMA/tbgen/tb.sv index 4c93cd575..5a8e7a868 100644 --- a/wally-pipelined/src/fpu/FMA/tbgen/tb.sv +++ b/wally-pipelined/src/fpu/FMA/tbgen/tb.sv @@ -45,8 +45,8 @@ assign FOpCtrlE = 3'b0; // down - 010 // up - 011 // nearest max mag - 100 -assign FrmE = 3'b010; -assign FmtE = 1'b1; +assign FrmE = 3'b011; +assign FmtE = 1'b0; assign wnan = FmtE ? &FmaResultM[62:52] && |FmaResultM[51:0] : &FmaResultM[62:55] && |FmaResultM[54:32]; @@ -110,7 +110,6 @@ always @(posedge clk) if(ans >= 64'h7FF8000000000000 && ans <= 64'h7FFfffffffffffff ) $display( "ans=qutNaN "); if(ans >= 64'hFFF8000000000000 && ans <= 64'hFFFfffffffffffff ) $display( "ans=qutNaN "); errors = errors + 1; - if (errors == 20) $stop; end if((FmtE==1'b0)&(FmaFlagsM != flags[4:0] || (!wnan && (FmaResultM != ans)) || (wnan && ansnan && ~(((xnan && (FmaResultM[62:0] == {FInput1E[62:55],1'b1,FInput1E[53:0]})) || (ynan && (FmaResultM[62:0] == {FInput2E[62:55],1'b1,FInput2E[53:0]})) || (znan && (FmaResultM[62:0] == {FInput3E[62:55],1'b1,FInput3E[53:0]})) || (FmaResultM[62:0] == ans[62:0]))) ))) begin diff --git a/wally-pipelined/src/fpu/FMA/tbgen/test_gen.sh b/wally-pipelined/src/fpu/FMA/tbgen/test_gen.sh index dc9562b1a..5f12e143c 100755 --- a/wally-pipelined/src/fpu/FMA/tbgen/test_gen.sh +++ b/wally-pipelined/src/fpu/FMA/tbgen/test_gen.sh @@ -1,3 +1,3 @@ -testfloat_gen f64_mulAdd -tininessafter -n 6133248 -rmin -seed 113355 -level 1 > testFloat +testfloat_gen f32_mulAdd -tininessafter -n 6133248 -rmax -seed 113355 -level 1 > testFloat tr -d ' ' < testFloat > testFloatNoSpace diff --git a/wally-pipelined/src/fpu/fctrl.sv b/wally-pipelined/src/fpu/fctrl.sv index a9fcb564e..3be9b281a 100755 --- a/wally-pipelined/src/fpu/fctrl.sv +++ b/wally-pipelined/src/fpu/fctrl.sv @@ -6,176 +6,128 @@ module fctrl ( input logic [2:0] Funct3D, input logic [2:0] FRM_REGW, output logic IllegalFPUInstrD, - output logic IsFPD, output logic FWriteEnD, output logic FDivStartD, output logic [2:0] FResultSelD, output logic [3:0] FOpCtrlD, + output logic [1:0] FResSelD, + output logic [1:0] FIntResSelD, output logic FmtD, output logic [2:0] FrmD, - output logic [1:0] FMemRWD, - output logic FOutputInput2D, - output logic FInput2UsedD, FInput3UsedD, output logic FWriteIntD); - - logic IllegalFPUInstr1D, IllegalFPUInstr2D; - // *** fix rounding for dynamic rounding + `define FCTRLW 15 + logic [`FCTRLW-1:0] ControlsD; + // FPU Instruction Decoder + always_comb + case(OpD) + // FWriteEn_FWriteInt_FResultSel_FOpCtrl_FResSel_FIntResSel_FDivStart_IllegalFPUInstr + 7'b0000111: case(Funct3D) + 3'b010: ControlsD = `FCTRLW'b1_0_000_0000_00_00_0_0; // flw + 3'b011: ControlsD = `FCTRLW'b1_0_000_0001_00_00_0_0; // fld + default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction + endcase + 7'b0100111: case(Funct3D) + 3'b010: ControlsD = `FCTRLW'b0_0_000_0010_00_00_0_0; // fsw + 3'b011: ControlsD = `FCTRLW'b0_0_000_0011_00_00_0_0; // fsd + default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction + endcase + 7'b1000011: ControlsD = `FCTRLW'b1_0_001_0000_00_00_0_0; // fmadd + 7'b1000111: ControlsD = `FCTRLW'b1_0_001_0001_00_00_0_0; // fmsub + 7'b1001011: ControlsD = `FCTRLW'b1_0_001_0010_00_00_0_0; // fnmsub + 7'b1001111: ControlsD = `FCTRLW'b1_0_001_0011_00_00_0_0; // fnmadd + 7'b1010011: casez(Funct7D) + 7'b00000??: ControlsD = `FCTRLW'b1_0_010_0000_00_00_0_0; // fadd + 7'b00001??: ControlsD = `FCTRLW'b1_0_010_0001_00_00_0_0; // fsub + 7'b00010??: ControlsD = `FCTRLW'b1_0_001_0100_00_00_0_0; // fmul + 7'b00011??: ControlsD = `FCTRLW'b1_0_011_0000_00_00_1_0; // fdiv + 7'b01011??: ControlsD = `FCTRLW'b1_0_011_0001_00_00_1_0; // fsqrt + 7'b00100??: case(Funct3D) + 3'b000: ControlsD = `FCTRLW'b1_0_100_0000_01_00_0_0; // fsgnj + 3'b001: ControlsD = `FCTRLW'b1_0_100_0001_01_00_0_0; // fsgnjn + 3'b010: ControlsD = `FCTRLW'b1_0_100_0010_01_00_0_0; // fsgnjx + default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction + endcase + 7'b00101??: case(Funct3D) + 3'b000: ControlsD = `FCTRLW'b1_0_100_0111_10_00_0_0; // fmin + 3'b001: ControlsD = `FCTRLW'b1_0_100_0101_10_00_0_0; // fmax + default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction + endcase + 7'b10100??: case(Funct3D) + 3'b010: ControlsD = `FCTRLW'b0_1_100_0010_00_00_0_0; // feq + 3'b001: ControlsD = `FCTRLW'b0_1_100_0001_00_00_0_0; // flt + 3'b000: ControlsD = `FCTRLW'b0_1_100_0011_00_00_0_0; // fle + default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction + endcase + 7'b11100??: if (Funct3D == 3'b001) + ControlsD = `FCTRLW'b0_1_100_0000_00_10_0_0; // fclass + else if (Funct3D[1:0] == 2'b00) ControlsD = `FCTRLW'b0_1_100_0100_00_01_0_0; // fmv.x.w + else if (Funct3D[1:0] == 2'b01) ControlsD = `FCTRLW'b0_1_100_0101_00_01_0_0; // fmv.x.d + else ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction + 7'b1100000: case(Rs2D[0]) + 1'b0: ControlsD = `FCTRLW'b0_1_010_0110_00_00_0_0; // fcvt.s.w + 1'b1: ControlsD = `FCTRLW'b0_1_010_0101_00_00_0_0; // fcvt.s.wu + default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction + endcase + 7'b1101000: case(Rs2D[0]) + 1'b0: ControlsD = `FCTRLW'b1_1_010_0100_00_00_0_0; // fcvt.w.s + 1'b1: ControlsD = `FCTRLW'b1_1_010_0101_00_00_0_0; // fcvt.wu.s + default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction + endcase + 7'b1111000: ControlsD = `FCTRLW'b1_0_100_0000_00_00_0_0; // fmv.w.x + 7'b0100000: ControlsD = `FCTRLW'b1_0_010_0010_00_00_0_0; // fcvt.s.d + 7'b1100001: case(Rs2D[0]) + 1'b0: ControlsD = `FCTRLW'b0_1_010_1110_00_00_0_0; // fcvt.d.w + 1'b1: ControlsD = `FCTRLW'b0_1_010_1111_00_00_0_0; // fcvt.d.wu + default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction + endcase + 7'b1101001: case(Rs2D[0]) + 1'b0: ControlsD = `FCTRLW'b1_0_010_1100_00_00_0_0; // fcvt.w.d + 1'b1: ControlsD = `FCTRLW'b1_0_010_1101_00_00_0_0; // fcvt.wu.d + default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction + endcase + 7'b1111001: ControlsD = `FCTRLW'b1_0_100_0001_00_00_0_0; // fmv.d.x + 7'b0100001: ControlsD = `FCTRLW'b1_0_010_1000_00_00_0_0; // fcvt.d.s + default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction + endcase + default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction + endcase + // unswizzle control bits + assign {FWriteEnD, FWriteIntD, FResultSelD, FOpCtrlD, FResSelD, FIntResSelD, FDivStartD, IllegalFPUInstrD} = ControlsD; + + // if dynamic rounding, choose FRM_REGW assign FrmD = &Funct3D ? FRM_REGW : Funct3D; - //all subsequent logic is based on the table present - //in Section 5 of Wally Architecture Specification - - //write is enabled for all fp instruciton op codes - //sans fp load - always_comb begin - //case statement is easier to modify - //in case of errors - case(OpD) - //fp instructions sans load - 7'b1010011 : IsFPD = 1'b1; - 7'b1000011 : IsFPD = 1'b1; - 7'b1000111 : IsFPD = 1'b1; - 7'b1001011 : IsFPD = 1'b1; - 7'b1001111 : IsFPD = 1'b1; - 7'b0100111 : IsFPD = 1'b1; - 7'b0000111 : IsFPD = 1'b1;// KEP change 7'b1010011 to 7'b0000111 - default : IsFPD = 1'b0; - endcase - end - - - - //useful intermediary signals - // - //(mult only not supported in current datapath) - //set third FMA operand to zero in this case - //(or equivalent) - - always_comb begin - //checks all but FMA/store/load - IllegalFPUInstr2D = 0; - FDivStartD = 1'b0; - if(OpD == 7'b1010011) begin - casez(Funct7D) - //compare - 7'b10100?? : FResultSelD = 3'b001; - //div/sqrt - 7'b0?011?? : begin FResultSelD = 3'b000; FDivStartD = 1'b1; end - //add/sub - 7'b0000??? : FResultSelD = 3'b100; - //mult - 7'b00010?? : FResultSelD = 3'b010; - //convert (not precision) - 7'b110?0?? : FResultSelD = 3'b100; - //convert (precision) - 7'b010000? : FResultSelD = 3'b100; - //Min/Max - 7'b00101?? : FResultSelD = 3'b001; - //sign injection - 7'b00100?? : FResultSelD = 3'b011; - //classify //only if funct3 = 001 - 7'b11100?? : if(Funct3D == 3'b001) FResultSelD = 3'b101; - //output ReadData1 - else if (Funct7D[1] == 0) FResultSelD = 3'b111; - //output SrcW - 7'b111100? : FResultSelD = 3'b110; - default : begin FResultSelD = 3'b0; IllegalFPUInstr2D = 1'b1; end - endcase - end - //FMA/store/load - else begin - case(OpD) - //4 FMA instructions - 7'b1000011 : FResultSelD = 3'b010; - 7'b1000111 : FResultSelD = 3'b010; - 7'b1001011 : FResultSelD = 3'b010; - 7'b1001111 : FResultSelD = 3'b010; - //store - 7'b0100111 : FResultSelD = 3'b111; - //load - 7'b0000111 : FResultSelD = 3'b111; - default : begin FResultSelD = 3'b0; IllegalFPUInstr2D = 1'b1; end - endcase - end - end - - assign FOutputInput2D = OpD == 7'b0100111; - - assign FMemRWD[0] = FOutputInput2D; - assign FMemRWD[1] = OpD == 7'b0000111; - - - - //register is chosen based on operation performed - //---- - //write selection is chosen in the same way as - //register selection - // - - // reg/write sel logic and assignment - // - // 3'b000 = div/sqrt - // 3'b001 = cmp - // 3'b010 = fma/mult - // 3'b011 = sgn inj - // 3'b100 = add/sub/cnvt - // 3'b101 = classify - // 3'b110 = output SrcAW - // 3'b111 = output ReadData1 - // - //reg select - - //this value is used enough to be shorthand - - - //operation control for each fp operation - //has to be expanded over standard to account for - //integrated fpadd/cvt - // - //will integrate FMA opcodes into design later - // - //conversion instructions will - //also need to be added later as I find the opcode - //version I used for this repo - - //let's do separate SOP for each type of operation -// assign FOpCtrlD[3] = 1'b0; -// -// - - - - always_comb begin - IllegalFPUInstr1D = 0; - FInput3UsedD = 0; - case (FResultSelD) - // div/sqrt + // Precision + // 0-single + // 1-double + assign FmtD = FResultSelD == 3'b000 ? Funct3D[0] : Funct7D[0]; + // div/sqrt // fdiv = ???0 // fsqrt = ???1 - 3'b000 : begin FOpCtrlD = {3'b0, Funct7D[5]}; FInput2UsedD = ~Funct7D[5]; end - // cmp + + // cmp // fmin = ?111 // fmax = ?101 // feq = ?010 // flt = ?001 // fle = ?011 // {?, is min or max, is eq or le, is lt or le} - 3'b001 : begin FOpCtrlD = {1'b0, Funct7D[2], ~Funct3D[0], ~(|Funct3D[2:1])}; FInput2UsedD = 1'b1; end - //fma/mult + + //fma/mult // fmadd = ?000 // fmsub = ?001 // fnmsub = ?010 -(a*b)+c // fnmadd = ?011 -(a*b)-c // fmul = ?100 // {?, is mul, is negitive, is sub} - 3'b010 : begin FOpCtrlD = {1'b0, OpD[4:2]}; FInput2UsedD = 1'b1; FInput3UsedD = ~OpD[4]; end - // sgn inj + + // sgn inj // fsgnj = ??00 // fsgnjn = ??01 // fsgnjx = ??10 - 3'b011 : begin FOpCtrlD = {2'b0, Funct3D[1:0]}; FInput2UsedD = 1'b1; end - // add/sub/cnvt + + // add/sub/cnvt // fadd = 0000 // fsub = 0001 // fcvt.w.s = 0100 @@ -188,35 +140,18 @@ module fctrl ( // fcvt.d.w = 1110 // fcvt.d.wu = 1111 // fcvt.d.s = 1000 - // { is double and not add/sub, is to/from int, is to int or float to double, is unsigned or sub - 3'b100 : begin FOpCtrlD = {Funct7D[0]&Funct7D[5], Funct7D[6], Funct7D[3] | (~Funct7D[6]&Funct7D[5]&~Funct7D[0]), (Rs2D[0]&Funct7D[5])|(Funct7D[2]&~Funct7D[5])}; FInput2UsedD = ~Funct7D[5]; end - // classify {?, ?, ?, ?} - 3'b101 : begin FOpCtrlD = 4'b0; FInput2UsedD = 1'b0; end - // output SrcAW + // { is double and not add/sub, is to/from int, is to int or float to double, is unsigned or sub} + // fmv.w.x = ???0 // fmv.w.d = ???1 - 3'b110 : begin FOpCtrlD = {3'b0, Funct7D[0]}; FInput2UsedD = 1'b0; end - // output Input1 + // flw = ?000 // fld = ?001 - // fsw = ?010 // output Input2 - // fsd = ?011 // output Input2 + // fsw = ?010 + // fsd = ?011 // fmv.x.w = ?100 // fmv.x.d = ?101 // {?, is mv, is store, is double or fmv} - 3'b111 : begin FOpCtrlD = {1'b0, OpD[6:5], Funct3D[0] | (OpD[6]&Funct7D[0])}; FInput2UsedD = OpD[5]; end - default : begin FOpCtrlD = 4'b0; IllegalFPUInstr1D = 1'b1; FInput2UsedD = 1'b0; end - endcase - end + - //precision - assign FmtD = (~&FResultSelD & Funct7D[0]) | (&FResultSelD & FOpCtrlD[0]); - - assign IllegalFPUInstrD = IllegalFPUInstr1D | IllegalFPUInstr2D; - //write to integer source if conv to int occurs - //AND of Funct7 for int results - // is add/cvt and is to int or is classify or is cmp and not max/min or is output ReadData1 and is mv - assign FWriteIntD = ((FResultSelD == 3'b100)&Funct7D[3]) | (FResultSelD == 3'b101) | ((FResultSelD == 3'b001)&~Funct7D[2]) | ((FResultSelD == 3'b111)&OpD[6]); - // if not writting to int reg and not a store function and not move - assign FWriteEnD = ~FWriteIntD & ~OpD[5] & ~((FResultSelD == 3'b111)&OpD[6]) & IsFPD; endmodule diff --git a/wally-pipelined/src/fpu/fma1.sv b/wally-pipelined/src/fpu/fma1.sv index ab9d2bb17..76f7316ba 100644 --- a/wally-pipelined/src/fpu/fma1.sv +++ b/wally-pipelined/src/fpu/fma1.sv @@ -1,111 +1,111 @@ module fma1( - input logic [63:0] X, // X - input logic [63:0] Y, // Y - input logic [63:0] Z, // Z - input logic [2:0] FOpCtrlE, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y) - input logic FmtE, // precision 1 = double 0 = single - output logic [105:0] ProdManE, // 1.X frac * 1.Y frac - output logic [161:0] AlignedAddendE, // Z aligned for addition - output logic [12:0] ProdExpE, // X exponent + Y exponent - bias - output logic AddendStickyE, // sticky bit that is calculated during alignment - output logic KillProdE, // set the product to zero before addition if the product is too small to matter - output logic XZeroE, YZeroE, ZZeroE, // inputs are zero - output logic XInfE, YInfE, ZInfE, // inputs are infinity - output logic XNaNE, YNaNE, ZNaNE); // inputs are NaN + input logic [63:0] X, // X + input logic [63:0] Y, // Y + input logic [63:0] Z, // Z + input logic [2:0] FOpCtrlE, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y) + input logic FmtE, // precision 1 = double 0 = single + output logic [105:0] ProdManE, // 1.X frac * 1.Y frac + output logic [161:0] AlignedAddendE, // Z aligned for addition + output logic [12:0] ProdExpE, // X exponent + Y exponent - bias + output logic AddendStickyE, // sticky bit that is calculated during alignment + output logic KillProdE, // set the product to zero before addition if the product is too small to matter + output logic XZeroE, YZeroE, ZZeroE, // inputs are zero + output logic XInfE, YInfE, ZInfE, // inputs are infinity + output logic XNaNE, YNaNE, ZNaNE); // inputs are NaN - logic [51:0] XFrac,YFrac,ZFrac; // input fraction - logic [52:0] XMan,YMan,ZMan; // input mantissa (with leading one) - logic [12:0] XExp,YExp,ZExp; // input exponents - logic XSgn,YSgn,ZSgn; // input signs - logic [12:0] AlignCnt; // how far to shift the addend to align with the product - logic [211:0] ZManShifted; // output of the alignment shifter including sticky bit - logic [211:0] ZManPreShifted; // input to the alignment shifter - logic XDenorm, YDenorm, ZDenorm; // inputs are denormal - logic [63:0] Addend; // value to add (Z or zero) - logic [12:0] Bias; // 1023 for double, 127 for single - logic XExpZero, YExpZero, ZExpZero; // input exponent zero - logic XFracZero, YFracZero, ZFracZero; // input fraction zero - logic XExpMax, YExpMax, ZExpMax; // input exponent all 1s + logic [51:0] XFrac,YFrac,ZFrac; // input fraction + logic [52:0] XMan,YMan,ZMan; // input mantissa (with leading one) + logic [12:0] XExp,YExp,ZExp; // input exponents + logic XSgn,YSgn,ZSgn; // input signs + logic [12:0] AlignCnt; // how far to shift the addend to align with the product + logic [213:0] ZManShifted; // output of the alignment shifter including sticky bit + logic [213:0] ZManPreShifted; // input to the alignment shifter + logic XDenorm, YDenorm, ZDenorm; // inputs are denormal + logic [63:0] Addend; // value to add (Z or zero) + logic [12:0] Bias; // 1023 for double, 127 for single + logic XExpZero, YExpZero, ZExpZero; // input exponent zero + logic XFracZero, YFracZero, ZFracZero; // input fraction zero + logic XExpMax, YExpMax, ZExpMax; // input exponent all 1s - /////////////////////////////////////////////////////////////////////////////// - // split inputs into the sign bit, fraction, and exponent to handle single or double precision - // - single precision is in the top half of the inputs - /////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////// + // split inputs into the sign bit, fraction, and exponent to handle single or double precision + // - single precision is in the top half of the inputs + /////////////////////////////////////////////////////////////////////////////// - // Set addend to zero if FMUL instruction - assign Addend = FOpCtrlE[2] ? 64'b0 : Z; + // Set addend to zero if FMUL instruction + assign Addend = FOpCtrlE[2] ? 64'b0 : Z; - assign XSgn = X[63]; - assign YSgn = Y[63]; - assign ZSgn = Addend[63]; + assign XSgn = X[63]; + assign YSgn = Y[63]; + assign ZSgn = Addend[63]; - assign XExp = FmtE ? {2'b0, X[62:52]} : {5'b0, X[62:55]}; - assign YExp = FmtE ? {2'b0, Y[62:52]} : {5'b0, Y[62:55]}; - assign ZExp = FmtE ? {2'b0, Addend[62:52]} : {5'b0, Addend[62:55]}; + assign XExp = FmtE ? {2'b0, X[62:52]} : {5'b0, X[62:55]}; + assign YExp = FmtE ? {2'b0, Y[62:52]} : {5'b0, Y[62:55]}; + assign ZExp = FmtE ? {2'b0, Addend[62:52]} : {5'b0, Addend[62:55]}; - assign XFrac = FmtE ? X[51:0] : {X[54:32], 29'b0}; - assign YFrac = FmtE ? Y[51:0] : {Y[54:32], 29'b0}; - assign ZFrac = FmtE ? Addend[51:0] : {Addend[54:32], 29'b0}; - - assign XMan = {~XExpZero, XFrac}; - assign YMan = {~YExpZero, YFrac}; - assign ZMan = {~ZExpZero, ZFrac}; + assign XFrac = FmtE ? X[51:0] : {X[54:32], 29'b0}; + assign YFrac = FmtE ? Y[51:0] : {Y[54:32], 29'b0}; + assign ZFrac = FmtE ? Addend[51:0] : {Addend[54:32], 29'b0}; + + assign XMan = {~XExpZero, XFrac}; + assign YMan = {~YExpZero, YFrac}; + assign ZMan = {~ZExpZero, ZFrac}; - assign Bias = FmtE ? 13'h3ff : 13'h7f; + assign Bias = FmtE ? 13'h3ff : 13'h7f; - /////////////////////////////////////////////////////////////////////////////// - // determine if an input is a special value - /////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////// + // determine if an input is a special value + /////////////////////////////////////////////////////////////////////////////// - assign XExpZero = ~|XExp; - assign YExpZero = ~|YExp; - assign ZExpZero = ~|ZExp; - - assign XFracZero = ~|XFrac; - assign YFracZero = ~|YFrac; - assign ZFracZero = ~|ZFrac; + assign XExpZero = ~|XExp; + assign YExpZero = ~|YExp; + assign ZExpZero = ~|ZExp; + + assign XFracZero = ~|XFrac; + assign YFracZero = ~|YFrac; + assign ZFracZero = ~|ZFrac; - assign XExpMax = FmtE ? &XExp[10:0] : &XExp[7:0]; - assign YExpMax = FmtE ? &YExp[10:0] : &YExp[7:0]; - assign ZExpMax = FmtE ? &ZExp[10:0] : &ZExp[7:0]; - - assign XNaNE = XExpMax & ~XFracZero; - assign YNaNE = YExpMax & ~YFracZero; - assign ZNaNE = ZExpMax & ~ZFracZero; + assign XExpMax = FmtE ? &XExp[10:0] : &XExp[7:0]; + assign YExpMax = FmtE ? &YExp[10:0] : &YExp[7:0]; + assign ZExpMax = FmtE ? &ZExp[10:0] : &ZExp[7:0]; + + assign XNaNE = XExpMax & ~XFracZero; + assign YNaNE = YExpMax & ~YFracZero; + assign ZNaNE = ZExpMax & ~ZFracZero; - assign XDenorm = XExpZero & ~XFracZero; - assign YDenorm = YExpZero & ~YFracZero; - assign ZDenorm = ZExpZero & ~ZFracZero; + assign XDenorm = XExpZero & ~XFracZero; + assign YDenorm = YExpZero & ~YFracZero; + assign ZDenorm = ZExpZero & ~ZFracZero; - assign XInfE = XExpMax & XFracZero; - assign YInfE = YExpMax & YFracZero; - assign ZInfE = ZExpMax & ZFracZero; + assign XInfE = XExpMax & XFracZero; + assign YInfE = YExpMax & YFracZero; + assign ZInfE = ZExpMax & ZFracZero; - assign XZeroE = XExpZero & XFracZero; - assign YZeroE = YExpZero & YFracZero; - assign ZZeroE = ZExpZero & ZFracZero; + assign XZeroE = XExpZero & XFracZero; + assign YZeroE = YExpZero & YFracZero; + assign ZZeroE = ZExpZero & ZFracZero; - /////////////////////////////////////////////////////////////////////////////// - // Calculate the product - // - When multipliying two fp numbers, add the exponents - // - Subtract the bias (XExp + YExp has two biases, one from each exponent) - // - Denormal numbers have an an exponent value of 1, however they are - // represented with an exponent of 0. add one if there is a denormal number - /////////////////////////////////////////////////////////////////////////////// - - // verilator lint_off WIDTH - assign ProdExpE = (XZeroE|YZeroE) ? 13'b0 : - XExp + YExp - Bias + XDenorm + YDenorm; + /////////////////////////////////////////////////////////////////////////////// + // Calculate the product + // - When multipliying two fp numbers, add the exponents + // - Subtract the bias (XExp + YExp has two biases, one from each exponent) + // - Denormal numbers have an an exponent value of 1, however they are + // represented with an exponent of 0. add one if there is a denormal number + /////////////////////////////////////////////////////////////////////////////// + + // verilator lint_off WIDTH + assign ProdExpE = (XZeroE|YZeroE) ? 13'b0 : + XExp + YExp - Bias + XDenorm + YDenorm; - // Calculate the product's mantissa - // - Add the assumed one. If the number is denormalized or zero, it does not have an assumed one. - assign ProdManE = XMan * YMan; + // Calculate the product's mantissa + // - Add the assumed one. If the number is denormalized or zero, it does not have an assumed one. + assign ProdManE = XMan * YMan; @@ -114,72 +114,71 @@ module fma1( - - /////////////////////////////////////////////////////////////////////////////// - // Alignment shifter - /////////////////////////////////////////////////////////////////////////////// + + /////////////////////////////////////////////////////////////////////////////// + // Alignment shifter + /////////////////////////////////////////////////////////////////////////////// - // determine the shift count for alignment - // - negitive means Z is larger, so shift Z left - // - positive means the product is larger, so shift Z right - // - Denormal numbers have an an exponent value of 1, however they are - // represented with an exponent of 0. add one to the exponent if it is a denormal number - assign AlignCnt = ProdExpE - ZExp - ZDenorm; - // verilator lint_on WIDTH + // determine the shift count for alignment + // - negitive means Z is larger, so shift Z left + // - positive means the product is larger, so shift Z right + // - Denormal numbers have an an exponent value of 1, however they are + // represented with an exponent of 0. add one to the exponent if it is a denormal number + assign AlignCnt = ProdExpE - ZExp - ZDenorm; + // verilator lint_on WIDTH - // Defualt Addition without shifting - // | 55'b0 | 106'b(product) | 2'b0 | - // |1'b0| addnend | + // Defualt Addition without shifting + // | 55'b0 | 106'b(product) | 2'b0 | + // |1'b0| addnend | - // the 1'b0 before the added is because the product's mantissa has two bits before the binary point (xx.xxxxxxxxxx...) - assign ZManPreShifted = {55'b0, ZMan, 104'b0}; - always_comb - begin - - // If the product is too small to effect the sum, kill the product + // the 1'b0 before the added is because the product's mantissa has two bits before the binary point (xx.xxxxxxxxxx...) + assign ZManPreShifted = {55'b0, ZMan, 106'b0}; + always_comb + begin + + // If the product is too small to effect the sum, kill the product - // | 55'b0 | 106'b(product) | 2'b0 | - // | addnend | - if ($signed(AlignCnt) <= $signed(-13'd56)) begin - KillProdE = 1; - ZManShifted = {107'b0, ZMan, 52'b0}; - AddendStickyE = ~(XZeroE|YZeroE); + // | 54'b0 | 106'b(product) | 2'b0 | + // | addnend | + if ($signed(AlignCnt) <= $signed(-13'd56)) begin + KillProdE = 1; + ZManShifted = ZManPreShifted;//{107'b0, ZMan, 54'b0}; + AddendStickyE = ~(XZeroE|YZeroE); - // If the Addend is shifted left (negitive AlignCnt) + // If the Addend is shifted left (negitive AlignCnt) - // | 55'b0 | 106'b(product) | 2'b0 | - // | addnend | - end else if($signed(AlignCnt) <= $signed(13'd0)) begin - KillProdE = 0; - ZManShifted = ZManPreShifted << -AlignCnt; - AddendStickyE = |(ZManShifted[49:0]); + // | 54'b0 | 106'b(product) | 2'b0 | + // | addnend | + end else if($signed(AlignCnt) <= $signed(13'd0)) begin + KillProdE = 0; + ZManShifted = ZManPreShifted << -AlignCnt; + AddendStickyE = |(ZManShifted[51:0]); - // If the Addend is shifted right (positive AlignCnt) + // If the Addend is shifted right (positive AlignCnt) - // | 55'b0 | 106'b(product) | 2'b0 | - // | addnend | - end else if ($signed(AlignCnt)<=$signed(13'd104)) begin - KillProdE = 0; - ZManShifted = ZManPreShifted >> AlignCnt; - AddendStickyE = |(ZManShifted[49:0]); + // | 54'b0 | 106'b(product) | 2'b0 | + // | addnend | + end else if ($signed(AlignCnt)<=$signed(13'd106)) begin + KillProdE = 0; + ZManShifted = ZManPreShifted >> AlignCnt; + AddendStickyE = |(ZManShifted[51:0]); - // If the addend is too small to effect the addition - // - The addend has to shift two past the end of the addend to be considered too small - // - The 2 extra bits are needed for rounding + // If the addend is too small to effect the addition + // - The addend has to shift two past the end of the addend to be considered too small + // - The 2 extra bits are needed for rounding - // | 55'b0 | 106'b(product) | 2'b0 | - // | addnend | - end else begin - KillProdE = 0; - ZManShifted = 0; - AddendStickyE = ~ZZeroE; + // | 54'b0 | 106'b(product) | 2'b0 | + // | addnend | + end else begin + KillProdE = 0; + ZManShifted = 0; + AddendStickyE = ~ZZeroE; - end - end + end + end - - assign AlignedAddendE = ZManShifted[211:50]; - -endmodule + + assign AlignedAddendE = ZManShifted[213:52]; +endmodule \ No newline at end of file diff --git a/wally-pipelined/src/fpu/fma2.sv b/wally-pipelined/src/fpu/fma2.sv index f9efe93e8..131f98394 100644 --- a/wally-pipelined/src/fpu/fma2.sv +++ b/wally-pipelined/src/fpu/fma2.sv @@ -1,127 +1,131 @@ + + module fma2( - input logic [63:0] X, // X - input logic [63:0] Y, // Y - input logic [63:0] Z, // Z - input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude - input logic [2:0] FOpCtrlM, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y) - input logic FmtM, // precision 1 = double 0 = single - input logic [105:0] ProdManM, // 1.X frac * 1.Y frac - input logic [161:0] AlignedAddendM, // Z aligned for addition - input logic [12:0] ProdExpM, // X exponent + Y exponent - bias - input logic AddendStickyM, // sticky bit that is calculated during alignment - input logic KillProdM, // set the product to zero before addition if the product is too small to matter - input logic XZeroM, YZeroM, ZZeroM, // inputs are zero - input logic XInfM, YInfM, ZInfM, // inputs are infinity - input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN - output logic [63:0] FmaResultM, // FMA final result - output logic [4:0] FmaFlagsM); // FMA flags {invalid, divide by zero, overflow, underflow, inexact} - + input logic [63:0] X, // X + input logic [63:0] Y, // Y + input logic [63:0] Z, // Z + input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude + input logic [2:0] FOpCtrlM, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y) + input logic FmtM, // precision 1 = double 0 = single + input logic [105:0] ProdManM, // 1.X frac * 1.Y frac + input logic [161:0] AlignedAddendM, // Z aligned for addition + input logic [12:0] ProdExpM, // X exponent + Y exponent - bias + input logic AddendStickyM, // sticky bit that is calculated during alignment + input logic KillProdM, // set the product to zero before addition if the product is too small to matter + input logic XZeroM, YZeroM, ZZeroM, // inputs are zero + input logic XInfM, YInfM, ZInfM, // inputs are infinity + input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN + output logic [63:0] FmaResultM, // FMA final result + output logic [4:0] FmaFlagsM); // FMA flags {invalid, divide by zero, overflow, underflow, inexact} + - logic [51:0] ResultFrac; // Result fraction - logic [10:0] ResultExp; // Result exponent - logic ResultSgn; // Result sign - logic [10:0] ZExp; // input exponent - logic XSgn, YSgn, ZSgn; // input sign - logic PSgn; // product sign - logic [105:0] ProdMan2; // product being added - logic [162:0] AlignedAddend2; // possibly inverted aligned Z - logic [161:0] Sum; // positive sum - logic [162:0] PreSum; // possibly negitive sum - logic [12:0] SumExp; // exponent of the normalized sum - logic [12:0] SumExpTmp; // exponent of the normalized sum not taking into account denormal or zero results - logic [12:0] SumExpTmpMinus1; // SumExpTmp-1 - logic [12:0] FullResultExp; // ResultExp with bits to determine sign and overflow - logic [53:0] NormSum; // normalized sum - logic [161:0] SumShifted; // sum shifted for normalization - logic [8:0] NormCnt; // output of the leading zero detector - logic NormSumSticky; // sticky bit calulated from the normalized sum - logic SumZero; // is the sum zero - logic NegSum; // is the sum negitive - logic InvZ; // invert Z if there is a subtraction (-product + Z or product - Z) - logic ResultDenorm; // is the result denormalized - logic Sticky; // Sticky bit - logic Plus1, Minus1, CalcPlus1, CalcMinus1; // do you add or subtract one for rounding - logic Invalid,Underflow,Overflow,Inexact; // flags - logic [8:0] DenormShift; // right shift if the result is denormalized - logic SubBySmallNum; // was there supposed to be a subtraction by a small number - logic [63:0] Addend; // value to add (Z or zero) - logic ZeroSgn; // the result's sign if the sum is zero - logic ResultSgnTmp; // the result's sign assuming the result is not zero - logic Guard, Round, LSBNormSum; // bits needed to determine rounding - logic [12:0] MaxExp; // maximum value of the exponent - logic [12:0] FracLen; // length of the fraction - logic SigNaN; // is an input a signaling NaN - logic UnderflowFlag; // Underflow singal used in FmaFlagsM (used to avoid a circular depencency) - logic [63:0] XNaNResult, YNaNResult, ZNaNResult, InvalidResult, OverflowResult, KillProdResult, UnderflowResult; // possible results + logic [51:0] ResultFrac; // Result fraction + logic [10:0] ResultExp; // Result exponent + logic ResultSgn; // Result sign + logic [10:0] ZExp; // input exponent + logic XSgn, YSgn, ZSgn; // input sign + logic PSgn; // product sign + logic [105:0] ProdMan2; // product being added + logic [162:0] AlignedAddend2; // possibly inverted aligned Z + logic [161:0] Sum; // positive sum + logic [162:0] PreSum; // possibly negitive sum + logic [12:0] SumExp; // exponent of the normalized sum + logic [12:0] SumExpTmp; // exponent of the normalized sum not taking into account denormal or zero results + logic [12:0] SumExpTmpMinus1; // SumExpTmp-1 + logic [12:0] FullResultExp; // ResultExp with bits to determine sign and overflow + logic [54:0] NormSum; // normalized sum + logic [161:0] SumShifted; // sum shifted for normalization + logic [8:0] NormCnt; // output of the leading zero detector + logic NormSumSticky; // sticky bit calulated from the normalized sum + logic SumZero; // is the sum zero + logic NegSum; // is the sum negitive + logic InvZ; // invert Z if there is a subtraction (-product + Z or product - Z) + logic ResultDenorm; // is the result denormalized + logic Sticky; // Sticky bit + logic Plus1, Minus1, CalcPlus1, CalcMinus1; // do you add or subtract one for rounding + logic UfPlus1, UfCalcPlus1; // do you add one (for determining underflow flag) + logic Invalid,Underflow,Overflow,Inexact; // flags + logic [8:0] DenormShift; // right shift if the result is denormalized + logic SubBySmallNum; // was there supposed to be a subtraction by a small number + logic [63:0] Addend; // value to add (Z or zero) + logic ZeroSgn; // the result's sign if the sum is zero + logic ResultSgnTmp; // the result's sign assuming the result is not zero + logic Guard, Round, LSBNormSum; // bits needed to determine rounding + logic UfGuard, UfRound, UfLSBNormSum; // bits needed to determine rounding for underflow flag + logic [12:0] MaxExp; // maximum value of the exponent + logic [12:0] FracLen; // length of the fraction + logic SigNaN; // is an input a signaling NaN + logic UnderflowFlag; // Underflow singal used in FmaFlagsM (used to avoid a circular depencency) + logic [63:0] XNaNResult, YNaNResult, ZNaNResult, InvalidResult, OverflowResult, KillProdResult, UnderflowResult; // possible results - - /////////////////////////////////////////////////////////////////////////////// - // Select input fields - // The following logic duplicates fma1 because it's cheaper to recompute than provide registers - /////////////////////////////////////////////////////////////////////////////// + + /////////////////////////////////////////////////////////////////////////////// + // Select input fields + // The following logic duplicates fma1 because it's cheaper to recompute than provide registers + /////////////////////////////////////////////////////////////////////////////// - // Set addend to zero if FMUL instruction - assign Addend = FOpCtrlM[2] ? 64'b0 : Z; + // Set addend to zero if FMUL instruction + assign Addend = FOpCtrlM[2] ? 64'b0 : Z; - // split inputs into the sign bit, and exponent to handle single or double precision - // - single precision is in the top half of the inputs - assign XSgn = X[63]; - assign YSgn = Y[63]; - assign ZSgn = Addend[63]^FOpCtrlM[0]; //Negate Z if subtraction + // split inputs into the sign bit, and exponent to handle single or double precision + // - single precision is in the top half of the inputs + assign XSgn = X[63]; + assign YSgn = Y[63]; + assign ZSgn = Addend[63]^FOpCtrlM[0]; //Negate Z if subtraction - assign ZExp = FmtM ? Addend[62:52] : {3'b0, Addend[62:55]}; + assign ZExp = FmtM ? Addend[62:52] : {3'b0, Addend[62:55]}; - // Calculate the product's sign - // Negate product's sign if FNMADD or FNMSUB - assign PSgn = XSgn ^ YSgn ^ FOpCtrlM[1]; + // Calculate the product's sign + // Negate product's sign if FNMADD or FNMSUB + assign PSgn = XSgn ^ YSgn ^ FOpCtrlM[1]; - /////////////////////////////////////////////////////////////////////////////// - // Addition - /////////////////////////////////////////////////////////////////////////////// - - // Negate Z when doing one of the following opperations: - // -prod + Z - // prod - Z - assign InvZ = ZSgn ^ PSgn; + /////////////////////////////////////////////////////////////////////////////// + // Addition + /////////////////////////////////////////////////////////////////////////////// + + // Negate Z when doing one of the following opperations: + // -prod + Z + // prod - Z + assign InvZ = ZSgn ^ PSgn; - // Choose an inverted or non-inverted addend - the one is added later - assign AlignedAddend2 = InvZ ? ~{1'b0, AlignedAddendM} : {1'b0, AlignedAddendM}; - // Kill the product if the product is too small to effect the addition (determined in fma1.sv) - assign ProdMan2 = KillProdM ? 106'b0 : ProdManM; + // Choose an inverted or non-inverted addend - the one is added later + assign AlignedAddend2 = InvZ ? ~{1'b0, AlignedAddendM} : {1'b0, AlignedAddendM}; + // Kill the product if the product is too small to effect the addition (determined in fma1.sv) + assign ProdMan2 = KillProdM ? 106'b0 : ProdManM; - // Do the addition - // - add one to negate if the added was inverted - // - the 2 extra bits at the begining and end are needed for rounding - assign PreSum = AlignedAddend2 + {55'b0, ProdMan2, 2'b0} + {162'b0, InvZ}; - - // Is the sum negitive - assign NegSum = PreSum[162]; - // If the sum is negitive, negate the sum. - assign Sum = NegSum ? -PreSum[161:0] : PreSum[161:0]; + // Do the addition + // - add one to negate if the added was inverted + // - the 2 extra bits at the begining and end are needed for rounding + assign PreSum = AlignedAddend2 + {55'b0, ProdMan2, 2'b0} + {162'b0, InvZ}; + + // Is the sum negitive + assign NegSum = PreSum[162]; + // If the sum is negitive, negate the sum. + assign Sum = NegSum ? -PreSum[161:0] : PreSum[161:0]; - /////////////////////////////////////////////////////////////////////////////// - // Leading one detector - /////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////// + // Leading one detector + /////////////////////////////////////////////////////////////////////////////// - //*** replace with non-behavoral code - logic [8:0] i; - always_comb begin - i = 0; - while (~Sum[161-i] && $unsigned(i) <= $unsigned(9'd161)) i = i+1; // search for leading one - NormCnt = i+1; // compute shift count - end + //*** replace with non-behavoral code + logic [8:0] i; + always_comb begin + i = 0; + while (~Sum[161-i] && $unsigned(i) <= $unsigned(9'd161)) i = i+1; // search for leading one + NormCnt = i+1; // compute shift count + end @@ -133,112 +137,127 @@ module fma2( - /////////////////////////////////////////////////////////////////////////////// - // Normalization - /////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////// + // Normalization + /////////////////////////////////////////////////////////////////////////////// - // Determine if the sum is zero - assign SumZero = ~(|Sum); + // Determine if the sum is zero + assign SumZero = ~(|Sum); - // determine the length of the fraction based on precision - assign FracLen = FmtM ? 13'd52 : 13'd23; + // determine the length of the fraction based on precision + assign FracLen = FmtM ? 13'd52 : 13'd23; - // Determine if the result is denormal - assign SumExpTmp = KillProdM ? {2'b0, ZExp} : ProdExpM + -({4'b0, NormCnt} - 13'd56); - assign ResultDenorm = $signed(SumExpTmp)<=0 & ($signed(SumExpTmp)>=$signed(-FracLen)) & ~SumZero; + // Determine if the result is denormal + assign SumExpTmp = KillProdM ? {2'b0, ZExp} : ProdExpM + -({4'b0, NormCnt} - 13'd56); + assign ResultDenorm = $signed(SumExpTmp)<=0 & ($signed(SumExpTmp)>=$signed(-FracLen)) & ~SumZero; - // Determine the shift needed for denormal results - assign SumExpTmpMinus1 = SumExpTmp-1; - assign DenormShift = ResultDenorm ? SumExpTmpMinus1[8:0] : 9'b0; + // Determine the shift needed for denormal results + assign SumExpTmpMinus1 = SumExpTmp-1; + assign DenormShift = ResultDenorm ? SumExpTmpMinus1[8:0] : 9'b0; - // Normalize the sum - assign SumShifted = SumZero ? 162'b0 : Sum << NormCnt+DenormShift; - assign NormSum = SumShifted[161:108]; - // Calculate the sticky bit - assign NormSumSticky = FmtM ? (|SumShifted[107:0]) : (|SumShifted[136:0]); - assign Sticky = AddendStickyM | NormSumSticky; + // Normalize the sum + assign SumShifted = SumZero ? 162'b0 : Sum << NormCnt+DenormShift; + assign NormSum = SumShifted[161:107]; + // Calculate the sticky bit + assign NormSumSticky = FmtM ? (|SumShifted[107:0]) : (|SumShifted[136:0]); + assign Sticky = AddendStickyM | NormSumSticky; - // Determine sum's exponent - assign SumExp = SumZero ? 13'b0 : - ResultDenorm ? 13'b0 : - SumExpTmp; + // Determine sum's exponent + assign SumExp = SumZero ? 13'b0 : + ResultDenorm ? 13'b0 : + SumExpTmp; - /////////////////////////////////////////////////////////////////////////////// - // Rounding - /////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////// + // Rounding + /////////////////////////////////////////////////////////////////////////////// - // round to nearest even - // {Guard, Round, Sticky} - // 0xx - do nothing - // 100 - tie - Plus1 if result is odd (LSBNormSum = 1) - // - don't add 1 if a small number was supposed to be subtracted - // 101 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number) - // 110/111 - Plus1 + // round to nearest even + // {Guard, Round, Sticky} + // 0xx - do nothing + // 100 - tie - Plus1 if result is odd (LSBNormSum = 1) + // - don't add 1 if a small number was supposed to be subtracted + // 101 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number) + // 110/111 - Plus1 - // round to zero - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0 + // round to zero - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0 - // round to -infinity - // - Plus1 if negative unless a small number was supposed to be subtracted from a result with guard and round bits of 0 - // - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0 + // round to -infinity + // - Plus1 if negative unless a small number was supposed to be subtracted from a result with guard and round bits of 0 + // - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0 - // round to infinity - // - Plus1 if positive unless a small number was supposed to be subtracted from a result with guard and round bits of 0 - // - subtract 1 if a small number was supposed to be subtracted from a negative result with guard and round bits of 0 + // round to infinity + // - Plus1 if positive unless a small number was supposed to be subtracted from a result with guard and round bits of 0 + // - subtract 1 if a small number was supposed to be subtracted from a negative result with guard and round bits of 0 - // round to nearest max magnitude - // {Guard, Round, Sticky} - // 0xx - do nothing - // 100 - tie - Plus1 - // - don't add 1 if a small number was supposed to be subtracted - // 101 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number) - // 110/111 - Plus1 + // round to nearest max magnitude + // {Guard, Round, Sticky} + // 0xx - do nothing + // 100 - tie - Plus1 + // - don't add 1 if a small number was supposed to be subtracted + // 101 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number) + // 110/111 - Plus1 - // determine guard, round, and least significant bit of the result - assign Guard = FmtM ? NormSum[1] : NormSum[30]; - assign Round = FmtM ? NormSum[0] : NormSum[29]; - assign LSBNormSum = FmtM ? NormSum[2] : NormSum[31]; + // determine guard, round, and least significant bit of the result + assign Guard = FmtM ? NormSum[2] : NormSum[31]; + assign Round = FmtM ? NormSum[1] : NormSum[30]; + assign LSBNormSum = FmtM ? NormSum[3] : NormSum[32]; - // Deterimine if a small number was supposed to be subtrated - assign SubBySmallNum = AddendStickyM&InvZ&~(NormSumSticky)&~ZZeroM; + // used to determine underflow flag + assign UfGuard = FmtM ? NormSum[1] : NormSum[30]; + assign UfRound = FmtM ? NormSum[0] : NormSum[29]; + assign UfLSBNormSum = FmtM ? NormSum[2] : NormSum[31]; - always_comb begin - // Determine if you add 1 - case (FrmM) - 3'b000: CalcPlus1 = Guard & (Round | (Sticky&~(~Round&SubBySmallNum)) | (~Round&~Sticky&LSBNormSum&~SubBySmallNum));//round to nearest even - 3'b001: CalcPlus1 = 0;//round to zero - 3'b010: CalcPlus1 = ResultSgn & ~(SubBySmallNum & ~Guard & ~Round);//round down - 3'b011: CalcPlus1 = ~ResultSgn & ~(SubBySmallNum & ~Guard & ~Round);//round up - 3'b100: CalcPlus1 = (Guard & (Round | (Sticky&~(~Round&SubBySmallNum)) | (~Round&~Sticky&~SubBySmallNum)));//round to nearest max magnitude - default: CalcPlus1 = 1'bx; - endcase - // Determine if you subtract 1 - case (FrmM) - 3'b000: CalcMinus1 = 0;//round to nearest even - 3'b001: CalcMinus1 = SubBySmallNum & ~Guard & ~Round;//round to zero - 3'b010: CalcMinus1 = ~ResultSgn & ~Guard & ~Round & SubBySmallNum;//round down - 3'b011: CalcMinus1 = ResultSgn & ~Guard & ~Round & SubBySmallNum;//round up - 3'b100: CalcMinus1 = 0;//round to nearest max magnitude - default: CalcMinus1 = 1'bx; - endcase - - end + // Deterimine if a small number was supposed to be subtrated + assign SubBySmallNum = AddendStickyM&InvZ&~(NormSumSticky)&~ZZeroM; - // If an answer is exact don't round - assign Plus1 = CalcPlus1 & (Sticky | Guard | Round); - assign Minus1 = CalcMinus1 & (Sticky | Guard | Round); + always_comb begin + // Determine if you add 1 + case (FrmM) + 3'b000: CalcPlus1 = Guard & (Round | ((Sticky|UfGuard)&~(~Round&SubBySmallNum)) | (~Round&~(Sticky|UfGuard)&LSBNormSum&~SubBySmallNum));//round to nearest even + 3'b001: CalcPlus1 = 0;//round to zero + 3'b010: CalcPlus1 = ResultSgn & ~(SubBySmallNum & ~Guard & ~Round);//round down + 3'b011: CalcPlus1 = ~ResultSgn & ~(SubBySmallNum & ~Guard & ~Round);//round up + 3'b100: CalcPlus1 = (Guard & (Round | ((Sticky|UfGuard)&~(~Round&SubBySmallNum)) | (~Round&~(Sticky|UfGuard)&~SubBySmallNum)));//round to nearest max magnitude + default: CalcPlus1 = 1'bx; + endcase + // Determine if you add 1 (for underflow flag) + case (FrmM) + 3'b000: UfCalcPlus1 = UfGuard & (UfRound | (Sticky&~(~UfRound&SubBySmallNum)) | (~UfRound&~Sticky&UfLSBNormSum&~SubBySmallNum));//round to nearest even + 3'b001: UfCalcPlus1 = 0;//round to zero + 3'b010: UfCalcPlus1 = ResultSgn & ~(SubBySmallNum & ~UfGuard & ~UfRound);//round down + 3'b011: UfCalcPlus1 = ~ResultSgn & ~(SubBySmallNum & ~UfGuard & ~UfRound);//round up + 3'b100: UfCalcPlus1 = (UfGuard & (UfRound | (Sticky&~(~UfRound&SubBySmallNum)) | (~UfRound&~Sticky&~SubBySmallNum)));//round to nearest max magnitude + default: UfCalcPlus1 = 1'bx; + endcase + // Determine if you subtract 1 + case (FrmM) + 3'b000: CalcMinus1 = 0;//round to nearest even + 3'b001: CalcMinus1 = SubBySmallNum & ~Guard & ~Round;//round to zero + 3'b010: CalcMinus1 = ~ResultSgn & ~Guard & ~Round & SubBySmallNum;//round down + 3'b011: CalcMinus1 = ResultSgn & ~Guard & ~Round & SubBySmallNum;//round up + 3'b100: CalcMinus1 = 0;//round to nearest max magnitude + default: CalcMinus1 = 1'bx; + endcase + + end - // Compute rounded result - logic [64:0] RoundAdd; - logic [51:0] NormSumTruncated; - assign RoundAdd = FmtM ? Minus1 ? {65{1'b1}} : {64'b0, Plus1} : - Minus1 ? {{36{1'b1}}, 29'b0} : {35'b0, Plus1, 29'b0}; - assign NormSumTruncated = FmtM ? NormSum[53:2] : {NormSum[53:31], 29'b0}; + // If an answer is exact don't round + assign Plus1 = CalcPlus1 & (Sticky | UfGuard | Guard | Round); + assign UfPlus1 = UfCalcPlus1 & (Sticky | UfGuard | UfRound); + assign Minus1 = CalcMinus1 & (Sticky | UfGuard | Guard | Round); - assign {FullResultExp, ResultFrac} = {SumExp, NormSumTruncated} + RoundAdd; + // Compute rounded result + logic [64:0] RoundAdd; + logic [51:0] NormSumTruncated; + assign RoundAdd = FmtM ? Minus1 ? {65{1'b1}} : {64'b0, Plus1} : + Minus1 ? {{36{1'b1}}, 29'b0} : {35'b0, Plus1, 29'b0}; + assign NormSumTruncated = FmtM ? NormSum[54:3] : {NormSum[54:32], 29'b0}; + + assign {FullResultExp, ResultFrac} = {SumExp, NormSumTruncated} + RoundAdd; assign ResultExp = FullResultExp[10:0]; @@ -247,58 +266,57 @@ module fma2( - /////////////////////////////////////////////////////////////////////////////// - // Sign calculation - /////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////// + // Sign calculation + /////////////////////////////////////////////////////////////////////////////// - // Determine the sign if the sum is zero - // if cancelation then 0 unless round to -infinity - // otherwise psign - assign ZeroSgn = (PSgn^ZSgn)&~Underflow ? FrmM == 3'b010 : PSgn; + // Determine the sign if the sum is zero + // if cancelation then 0 unless round to -infinity + // otherwise psign + assign ZeroSgn = (PSgn^ZSgn)&~Underflow ? FrmM == 3'b010 : PSgn; - // is the result negitive - // if p - z is the Sum negitive - // if -p + z is the Sum positive - // if -p - z then the Sum is negitive - assign ResultSgnTmp = InvZ&(ZSgn)&NegSum | InvZ&PSgn&~NegSum | ((ZSgn)&PSgn); - assign ResultSgn = SumZero ? ZeroSgn : ResultSgnTmp; + // is the result negitive + // if p - z is the Sum negitive + // if -p + z is the Sum positive + // if -p - z then the Sum is negitive + assign ResultSgnTmp = InvZ&(ZSgn)&NegSum | InvZ&PSgn&~NegSum | ((ZSgn)&PSgn); + assign ResultSgn = SumZero ? ZeroSgn : ResultSgnTmp; - /////////////////////////////////////////////////////////////////////////////// - // Flags - /////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////// + // Flags + /////////////////////////////////////////////////////////////////////////////// - // Set Invalid flag for following cases: - // 1) Inf - Inf (unless x or y is NaN) - // 2) 0 * Inf - // 3) any input is a signaling NaN - assign MaxExp = FmtM ? 13'd2047 : 13'd255; - assign SigNaN = FmtM ? (XNaNM&~X[51]) | (YNaNM&~Y[51]) | (ZNaNM&~Addend[51]) : - (XNaNM&~X[54]) | (YNaNM&~Y[54]) | (ZNaNM&~Addend[54]); - assign Invalid = SigNaN | ((XInfM || YInfM) & ZInfM & (PSgn ^ ZSgn) & ~XNaNM & ~YNaNM) | (XZeroM & YInfM) | (YZeroM & XInfM); - - // Set Overflow flag if the number is too big to be represented - // - Don't set the overflow flag if an overflowed result isn't outputed - assign Overflow = FullResultExp >= MaxExp & ~FullResultExp[12]&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM); + // Set Invalid flag for following cases: + // 1) any input is a signaling NaN + // 2) Inf - Inf (unless x or y is NaN) + // 3) 0 * Inf + assign MaxExp = FmtM ? 13'd2047 : 13'd255; + assign SigNaN = FmtM ? (XNaNM&~X[51]) | (YNaNM&~Y[51]) | (ZNaNM&~Addend[51]) : + (XNaNM&~X[54]) | (YNaNM&~Y[54]) | (ZNaNM&~Addend[54]); + assign Invalid = SigNaN | ((XInfM || YInfM) & ZInfM & (PSgn ^ ZSgn) & ~XNaNM & ~YNaNM) | (XZeroM & YInfM) | (YZeroM & XInfM); + + // Set Overflow flag if the number is too big to be represented + // - Don't set the overflow flag if an overflowed result isn't outputed + assign Overflow = FullResultExp >= MaxExp & ~FullResultExp[12]&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM); - // Set Underflow flag if the number is too small to be represented in normal numbers - // - Don't set the underflow flag if the result is exact - assign Underflow = (SumExp[12] | ((SumExp == 0) & (Round|Guard|Sticky)))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM); - //assign UnderflowFlag = (Underflow | (FullResultExp == 0)&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM)&(Round|Guard|Sticky)) & ~(FullResultExp == 1); - assign UnderflowFlag = (Underflow | (FullResultExp == 0)&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM)&(Round|Guard|Sticky)) & ~(FullResultExp == 1); - // Set Inexact flag if the result is diffrent from what would be outputed given infinite precision - // - Don't set the underflow flag if an underflowed result isn't outputed - assign Inexact = (Sticky|Overflow|Guard|Round|Underflow)&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM); + // Set Underflow flag if the number is too small to be represented in normal numbers + // - Don't set the underflow flag if the result is exact + assign Underflow = (SumExp[12] | ((SumExp == 0) & (Round|Guard|Sticky|UfGuard)))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM); + assign UnderflowFlag = (FullResultExp[12] | ((FullResultExp == 0) | ((FullResultExp == 1) & (SumExp == 0) & ~(UfPlus1&UfLSBNormSum)))&(Round|Guard|Sticky))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM); + // Set Inexact flag if the result is diffrent from what would be outputed given infinite precision + // - Don't set the underflow flag if an underflowed result isn't outputed + assign Inexact = (Sticky|UfGuard|Overflow|Guard|Round|Underflow)&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM); - // Combine flags - // - FMA can't set the Divide by zero flag - // - Don't set the underflow flag if the result was rounded up to a normal number - assign FmaFlagsM = {Invalid, 1'b0, Overflow, UnderflowFlag, Inexact}; + // Combine flags + // - FMA can't set the Divide by zero flag + // - Don't set the underflow flag if the result was rounded up to a normal number + assign FmaFlagsM = {Invalid, 1'b0, Overflow, UnderflowFlag, Inexact}; @@ -306,31 +324,31 @@ module fma2( - /////////////////////////////////////////////////////////////////////////////// - // Select the result - /////////////////////////////////////////////////////////////////////////////// - assign XNaNResult = FmtM ? {XSgn, X[62:52], 1'b1,X[50:0]} : {XSgn, X[62:55], 1'b1,X[53:0]}; - assign YNaNResult = FmtM ? {YSgn, Y[62:52], 1'b1,Y[50:0]} : {YSgn, Y[62:55], 1'b1,Y[53:0]}; - assign ZNaNResult = FmtM ? {ZSgn, Addend[62:52], 1'b1,Addend[50:0]} : {ZSgn, Addend[62:55], 1'b1,Addend[53:0]}; - assign OverflowResult = FmtM ? ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, 11'h7fe, {52{1'b1}}} : - {ResultSgn, 11'h7ff, 52'b0} : - ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, 8'hfe, {23{1'b1}}, 32'b0} : - {ResultSgn, 8'hff, 55'b0}; - assign InvalidResult = FmtM ? {ResultSgn, 11'h7ff, 1'b1, 51'b0} : {ResultSgn, 8'hff, 1'b1, 54'b0}; - assign KillProdResult = FmtM ?{ResultSgn, Addend[62:0] - {62'b0, (Minus1&AddendStickyM)}} + {62'b0, (Plus1&AddendStickyM)} : {ResultSgn, Addend[62:32] - {30'b0, (Minus1&AddendStickyM)} + {30'b0, (Plus1&AddendStickyM)}, 32'b0}; - assign UnderflowResult = FmtM ? {ResultSgn, 63'b0} + {63'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))} : {{ResultSgn, 31'b0} + {31'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))}, 32'b0}; - assign FmaResultM = XNaNM ? XNaNResult : - YNaNM ? YNaNResult : - ZNaNM ? ZNaNResult : - Invalid ? InvalidResult : // has to be before inf - XInfM ? {PSgn, X[62:0]} : - YInfM ? {PSgn, Y[62:0]} : - ZInfM ? {ZSgn, Addend[62:0]} : - Overflow ? OverflowResult : - KillProdM ? KillProdResult : // has to be after Underflow - Underflow & ~ResultDenorm ? UnderflowResult : - FmtM ? {ResultSgn, ResultExp, ResultFrac} : - {ResultSgn, ResultExp[7:0], ResultFrac, 3'b0}; + /////////////////////////////////////////////////////////////////////////////// + // Select the result + /////////////////////////////////////////////////////////////////////////////// + assign XNaNResult = FmtM ? {XSgn, X[62:52], 1'b1,X[50:0]} : {XSgn, X[62:55], 1'b1,X[53:0]}; + assign YNaNResult = FmtM ? {YSgn, Y[62:52], 1'b1,Y[50:0]} : {YSgn, Y[62:55], 1'b1,Y[53:0]}; + assign ZNaNResult = FmtM ? {ZSgn, Addend[62:52], 1'b1,Addend[50:0]} : {ZSgn, Addend[62:55], 1'b1,Addend[53:0]}; + assign OverflowResult = FmtM ? ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, 11'h7fe, {52{1'b1}}} : + {ResultSgn, 11'h7ff, 52'b0} : + ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, 8'hfe, {23{1'b1}}, 32'b0} : + {ResultSgn, 8'hff, 55'b0}; + assign InvalidResult = FmtM ? {ResultSgn, 11'h7ff, 1'b1, 51'b0} : {ResultSgn, 8'hff, 1'b1, 54'b0}; + assign KillProdResult = FmtM ?{ResultSgn, Addend[62:0] - {62'b0, (Minus1&AddendStickyM)}} + {62'b0, (Plus1&AddendStickyM)} : {ResultSgn, Addend[62:32] - {30'b0, (Minus1&AddendStickyM)} + {30'b0, (Plus1&AddendStickyM)}, 32'b0}; + assign UnderflowResult = FmtM ? {ResultSgn, 63'b0} + {63'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))} : {{ResultSgn, 31'b0} + {31'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))}, 32'b0}; + assign FmaResultM = XNaNM ? XNaNResult : + YNaNM ? YNaNResult : + ZNaNM ? ZNaNResult : + Invalid ? InvalidResult : // has to be before inf + XInfM ? {PSgn, X[62:0]} : + YInfM ? {PSgn, Y[62:0]} : + ZInfM ? {ZSgn, Addend[62:0]} : + Overflow ? OverflowResult : + KillProdM ? KillProdResult : // has to be after Underflow + Underflow & ~ResultDenorm ? UnderflowResult : + FmtM ? {ResultSgn, ResultExp, ResultFrac} : + {ResultSgn, ResultExp[7:0], ResultFrac, 3'b0}; diff --git a/wally-pipelined/src/fpu/fpu.sv b/wally-pipelined/src/fpu/fpu.sv index 7f93d33a7..5c15268ed 100755 --- a/wally-pipelined/src/fpu/fpu.sv +++ b/wally-pipelined/src/fpu/fpu.sv @@ -34,7 +34,6 @@ module fpu ( input logic [`XLEN-1:0] SrcAM, // Integer input being written into fpreg input logic StallE, StallM, StallW, input logic FlushE, FlushM, FlushW, - output logic IsFPD, IsFPE, // Read/write enable for memory {read, write} output logic FStallD, // Stall the decode stage if Div/Sqrt instruction output logic FWriteIntE, FWriteIntM, FWriteIntW, // Write integer register enable output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory @@ -59,8 +58,8 @@ module fpu ( logic SrcZUsedD; // Is input 3 used logic [2:0] FResultSelD, FResultSelE, FResultSelM, FResultSelW; // Select FP result logic [3:0] FOpCtrlD, FOpCtrlE, FOpCtrlM, FOpCtrlW; // Select which opperation to do in each component - logic SelLoadInputE, SelLoadInputM; // Select which adress to load when single precision - logic FInput2UsedD, FInput3UsedD; + logic [1:0] FResSelD, FResSelE, FResSelM; + logic [1:0] FIntResSelD, FIntResSelE, FIntResSelM; logic [4:0] Adr1E, Adr2E, Adr3E; // regfile signals @@ -132,7 +131,8 @@ module fpu ( // fsgn signals logic [63:0] SgnResultE, SgnResultM, SgnResultW; logic [4:0] SgnFlagsE, SgnFlagsM, SgnFlagsW; - logic [63:0] FResM; + logic [63:0] FResM, FResW; + logic FFlgM, FFlgW; // instantiation of W stage regfile signals logic [63:0] AlignedSrcAM, ForwardSrcAM, SrcAW; @@ -167,38 +167,19 @@ module fpu ( //***************** // other D/E pipe registers //***************** - // flopenrc #(64) DEReg14(clk, reset, FlushE, ~StallE, FPUResult64W, FPUResult64E); - // flopenrc #(1) CtrlRegE1(clk, reset, FlushE, ~StallE, FWriteEnD, FWriteEnE); - // flopenrc #(3) CtrlRegE2(clk, reset, FlushE, ~StallE, FResultSelD, FResultSelE); - // flopenrc #(3) CtrlRegE3(clk, reset, FlushE, ~StallE, FrmD, FrmE); - // flopenrc #(1) CtrlRegE4(clk, reset, FlushE, ~StallE, FmtD, FmtE); - // flopenrc #(5) CtrlRegE5(clk, reset, FlushE, ~StallE, InstrD[11:7], RdE); - // flopenrc #(4) CtrlRegE6(clk, reset, FlushE, ~StallE, FOpCtrlD, FOpCtrlE); flopenrc #(1) CtrlRegE1(clk, reset, FlushE, ~StallE, FDivStartD, FDivStartE); flopenrc #(15) CtrlRegE2(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]}, - {Adr1E, Adr2E, Adr3E}); - // flopenrc #(1) CtrlRegE8(clk, reset, FlushE, ~StallE, FWriteIntD, FWriteIntE); - // flopenrc #(1) CtrlRegE9(clk, reset, FlushE, ~StallE, FOutputInput2D, FOutputInput2E); - // flopenrc #(2) CtrlRegE10(clk, reset, FlushE, ~StallE, FMemRWD, FMemRWE); - // flopenrc #(1) CtrlRegE11(clk, reset, FlushE, ~StallE, InstrD[15], SelLoadInputE); - flopenrc #(20) CtrlRegE(clk, reset, FlushE, ~StallE, - {FWriteEnD, FResultSelD, FrmD, FmtD, InstrD[11:7], FOpCtrlD, FWriteIntD, InstrD[15], IsFPD}, - {FWriteEnE, FResultSelE, FrmE, FmtE, RdE, FOpCtrlE, FWriteIntE, SelLoadInputE, IsFPE}); + {Adr1E, Adr2E, Adr3E}); + flopenrc #(22) DECtrlReg(clk, reset, FlushE, ~StallE, + {FWriteEnD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, InstrD[11:7], FOpCtrlD, FWriteIntD}, + {FWriteEnE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, RdE, FOpCtrlE, FWriteIntE}); //EXECUTION STAGE - // input muxs for forwarding - // single vs double for SRCAM - // mux2 #(64) SrcAMuxForward({SrcAM[31:0], 32'b0}, {SrcAM, {64-`XLEN{1'b0}}}, FmtM, ForwardSrcAM); - // //input 1 forwarding mux - // mux4 #(64) SrcXEmux(FRD1E, FPUResult64W, FPUResult64E, ForwardSrcAM, ForwardXE, SrcXtmpE); - // mux3 #(64) SrcYEmux(FRD2E, FPUResult64W, FPUResult64E, ForwardYE, SrcYE); - // mux2 #(64) SrcZEmux(FRD3E, FPUResult64E, ForwardZE, SrcZE); - // mux2 #(64) FOutputInput2mux(SrcXtmpE, SrcYE, FOutputInput2E, SrcXE); - // Hazard unit for FPU fpuhazard hazard(.*); + // forwarding muxs mux3 #(64) fxemux(FRD1E, FPUResult64W, FResM, ForwardXE, SrcXE); mux3 #(64) fyemux(FRD2E, FPUResult64W, FResM, ForwardYE, SrcYE); mux3 #(64) fzemux(FRD3E, FPUResult64W, FResM, ForwardZE, SrcZE); @@ -225,6 +206,8 @@ module fpu ( fpdiv fpdivsqrt (.DivOpType(FOpCtrlE[0]), .clk(fpdivClk), .FmtE(~FmtE), .*); + + // first of two-stage instance of floating-point add/cvt unit fpuaddcvt1 fpadd1 (.*); @@ -236,6 +219,8 @@ module fpu ( // first and only instance of floating-point classify unit fpuclassify fpuclass (.*); + + // output for store instructions assign FWriteDataE = FmtE ? SrcYE[63:64-`XLEN] : {{`XLEN-32{1'b0}}, SrcYE[63:32]}; //***************** @@ -295,17 +280,9 @@ module fpu ( //***************** // fpcmp E/M pipe registers //***************** - // flopenrc #(8) EMRegCmp1(clk, reset, FlushM, ~StallM, WE, WM); - // flopenrc #(8) EMRegCmp2(clk, reset, FlushM, ~StallM, XE, XM); - // flopenrc #(1) EMRegcmp3(clk, reset, FlushM, ~StallM, ANaNE, ANaNM); - // flopenrc #(1) EMRegCmp4(clk, reset, FlushM, ~StallM, BNaNE, BNaNM); - // flopenrc #(1) EMRegCmp5(clk, reset, FlushM, ~StallM, AzeroE, AzeroM); - // flopenrc #(1) EMRegCmp6(clk, reset, FlushM, ~StallM, BzeroE, BzeroM); flopenrc #(1) EMRegCmp1(clk, reset, FlushM, ~StallM, CmpInvalidE, CmpInvalidM); - // flopenrc #(2) EMRegCmp2(clk, reset, FlushM, ~StallM, CmpFCCE, CmpFCCM); flopenrc #(64) EMRegCmp3(clk, reset, FlushM, ~StallM, FCmpResultE, FCmpResultM); - // put this in for the event we want to delay fsgn - will otherwise bypass //***************** // fpsgn E/M pipe registers //***************** @@ -315,15 +292,9 @@ module fpu ( //***************** // other E/M pipe registers //***************** - flopenrc #(1) EMReg1(clk, reset, FlushM, ~StallM, FWriteEnE, FWriteEnM); - flopenrc #(3) EMReg2(clk, reset, FlushM, ~StallM, FResultSelE, FResultSelM); - flopenrc #(3) EMReg3(clk, reset, FlushM, ~StallM, FrmE, FrmM); - flopenrc #(1) EMReg4(clk, reset, FlushM, ~StallM, FmtE, FmtM); - flopenrc #(5) EMReg5(clk, reset, FlushM, ~StallM, RdE, RdM); - flopenrc #(4) EMReg6(clk, reset, FlushM, ~StallM, FOpCtrlE, FOpCtrlM); - flopenrc #(1) EMReg7(clk, reset, FlushM, ~StallM, FWriteIntE, FWriteIntM); - // flopenrc #(2) EMReg8(clk, reset, FlushM, ~StallM, FMemRWE, FMemRWM); - flopenrc #(1) EMReg9(clk, reset, FlushM, ~StallM, SelLoadInputE, SelLoadInputM); + flopenrc #(22) EMCtrlReg(clk, reset, FlushM, ~StallM, + {FWriteEnE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, RdE, FOpCtrlE, FWriteIntE}, + {FWriteEnM, FResultSelM, FResSelM, FIntResSelM, FrmM, FmtM, RdM, FOpCtrlM, FWriteIntM}); //***************** // fpuclassify E/M pipe registers @@ -332,24 +303,18 @@ module fpu ( //BEGIN MEMORY STAGE - mux2 #(64) FResMux(AlignedSrcAM, SgnResultM, FResultSelM == 3'b011, FResM); - assign SrcXMAligned = FmtM ? SrcXM[63:64-`XLEN] : {{`XLEN-32{1'b0}}, SrcXM[63:32]}; - mux3 #(`XLEN) IntResMux(SrcXMAligned, FCmpResultM[`XLEN-1:0], ClassResultM[`XLEN-1:0], {FResultSelM == 3'b101, FResultSelM == 3'b001}, FIntResM); + mux3 #(64) FResMux(AlignedSrcAM, SgnResultM, FCmpResultM, FResSelM, FResM); + assign FFlgM = CmpInvalidM & FResSelM[1]; - //adjecent adress values are sent to the FPU, select the correct one - // -imm is 80000 most of the time vs the error one which is 00000 - // mux3 #(64) FLoadResultMux({HRDATA[31:0], {64-`AHBW+(`XLEN-32){1'b0}}}, {HRDATA[`AHBW-1:`AHBW-32], {64-`AHBW+(`XLEN-32){1'b0}}}, {HRDATA, {64-`AHBW{1'b0}}}, {FmtM, SelLoadInputM}, FLoadResultM); - // mux2 #(64) FLoadStoreResultMux(FLoadResultM, SrcXM, |FOpCtrlM[2:1], FLoadStoreResultM); - + assign SrcXMAligned = FmtM ? SrcXM[63:64-`XLEN] : {{`XLEN-32{1'b0}}, SrcXM[63:32]}; + mux3 #(`XLEN) IntResMux(FCmpResultM[`XLEN-1:0], SrcXMAligned, ClassResultM[`XLEN-1:0], FIntResSelM, FIntResM); + + // second instance of two-stage FMA unit fma2 fma2(.X(SrcXM), .Y(SrcYM), .Z(SrcZM), .FOpCtrlM(FOpCtrlM[2:0]), .*); // second instance of two-stage floating-point add/cvt unit fpuaddcvt2 fpadd2 (.*); - // second instance of two-stage floating-point comparator - // fpucmp2 fpcmp2 (.Invalid(CmpInvalidM), .FCC(CmpFCCM), .ANaN(ANaNM), .BNaN(BNaNM), .Azero(AzeroM), - // .Bzero(BzeroM), .w(WM), .x(XM), .Sel({1'b0, FmtM}), .op1(SrcXM), .op2(SrcYM), .*); - // Align SrcA to MSB when single precicion mux2 #(64) SrcAMux({SrcAM[31:0], 32'b0}, {{64-`XLEN{1'b0}}, SrcAM}, FmtM, AlignedSrcAM); @@ -397,19 +362,16 @@ module fpu ( //***************** // other M/W pipe registers //***************** - flopenrc #(1) MWReg1(clk, reset, FlushW, ~StallW, FWriteEnM, FWriteEnW); - flopenrc #(3) MWReg2(clk, reset, FlushW, ~StallW, FResultSelM, FResultSelW); - flopenrc #(1) MWReg3(clk, reset, FlushW, ~StallW, FmtM, FmtW); - flopenrc #(5) MWReg4(clk, reset, FlushW, ~StallW, RdM, RdW); - flopenrc #(64) MWReg5(clk, reset, FlushW, ~StallW, AlignedSrcAM, SrcAW); - // flopenrc #(64) MWReg6(clk, reset, FlushW, ~StallW, FLoadStoreResultM, FLoadStoreResultW); - flopenrc #(1) MWReg7(clk, reset, FlushW, ~StallW, FWriteIntM, FWriteIntW); - flopenrc #(4) MWReg6(clk, reset, FlushW, ~StallW, FOpCtrlM, FOpCtrlW); + flopenrc #(11) MWCtrlReg(clk, reset, FlushW, ~StallW, + {FWriteEnM, FResultSelM, RdM, FmtM, FWriteIntM}, + {FWriteEnW, FResultSelW, RdW, FmtW, FWriteIntW}); //***************** // fpuclassify M/W pipe registers //***************** flopenrc #(64) MWRegClass(clk, reset, FlushW, ~StallW, ClassResultM, ClassResultW); + flopenrc #(64) MWRegClass2(clk, reset, FlushW, ~StallW, FResM, FResW); + flopenrc #(1) MWRegClass1(clk, reset, FlushW, ~StallW, FFlgM, FFlgW); @@ -418,14 +380,6 @@ module fpu ( //######################################### // BEGIN WRITEBACK STAGE //######################################### - - - // mux3 #(64) FLoadResultMux({ReadD[31:0], {64-`AHBW+(`XLEN-32){1'b0}}}, {HRDATA[`AHBW-1:`AHBW-32], {64-`AHBW+(`XLEN-32){1'b0}}}, {HRDATA, {64-`AHBW{1'b0}}}, {FmtM, SelLoadInputM}, FLoadResultM); - // mux2 #(64) FLoadStoreResultMux(FLoadResultM, SrcXM, |FOpCtrlM[2:1], FLoadStoreResultM); - //***RV32D needs to give two bus transactions - mux2 #(64) FLoadResultMux({ReadDataW[31:0], {32{1'b0}}}, {ReadDataW, {64-`XLEN{1'b0}}}, FmtW, FLoadResultW); - mux2 #(64) FLoadStoreResultMux(FLoadResultW, SrcYW, |FOpCtrlW[2:1], FLoadStoreResultW); - @@ -434,47 +388,26 @@ module fpu ( always_comb begin case (FResultSelW) - // div/sqrt - 3'b000 : FPUFlagsW = FDivFlagsW; - // cmp - 3'b001 : FPUFlagsW = {CmpInvalidW, 4'b0}; - //fma/mult - 3'b010 : FPUFlagsW = FmaFlagsW; - // sgn inj - 3'b011 : FPUFlagsW = SgnFlagsW; - // add/sub/cnvt - 3'b100 : FPUFlagsW = FAddFlagsW; - // classify - 3'b101 : FPUFlagsW = 5'b0; - // output SrcAW - 3'b110 : FPUFlagsW = 5'b0; - // output FRD1 - 3'b111 : FPUFlagsW = 5'b0; + 3'b000 : FPUFlagsW = 5'b0; + 3'b001 : FPUFlagsW = FmaFlagsW; + 3'b010 : FPUFlagsW = FAddFlagsW; + 3'b011 : FPUFlagsW = FDivFlagsW; + 3'b100 : FPUFlagsW = {4'b0,FFlgW}; default : FPUFlagsW = 5'bxxxxx; endcase end - + always_comb begin case (FResultSelW) - // div/sqrt - 3'b000 : FPUResult64W = FDivResultW; - // cmp - 3'b001 : FPUResult64W = FCmpResultW; - //fma/mult - 3'b010 : FPUResult64W = FmaResultW; - // sgn inj - 3'b011 : FPUResult64W = SgnResultW; - // add/sub/cnvt - 3'b100 : FPUResult64W = FAddResultW; - // classify - 3'b101 : FPUResult64W = ClassResultW; - // output SrcAW - 3'b110 : FPUResult64W = SrcAW; - // Load/Store/Move to FP-register - 3'b111 : FPUResult64W = FLoadStoreResultW; - default : FPUResult64W = {64{1'bx}}; + 3'b000 : FPUResult64W = FmtW ? {ReadDataW, {64-`XLEN{1'b0}}} : {ReadDataW[31:0], 32'b0}; + 3'b001 : FPUResult64W = FmaResultW; + 3'b010 : FPUResult64W = FAddResultW; + 3'b011 : FPUResult64W = FDivResultW; + 3'b100 : FPUResult64W = FResW; + default : FPUResult64W = 64'bxxxxx; endcase - end // always_comb + end + // interface between XLEN size datapath and double-precision sized // floating-point results diff --git a/wally-pipelined/src/fpu/fpuhazard.sv b/wally-pipelined/src/fpu/fpuhazard.sv index 03667d84f..4d0895a77 100644 --- a/wally-pipelined/src/fpu/fpuhazard.sv +++ b/wally-pipelined/src/fpu/fpuhazard.sv @@ -44,21 +44,21 @@ module fpuhazard( if ((Adr1E == RdM) & FWriteEnM) // if the result will be FResM - if(FResultSelM == 3'b110 | FResultSelM == 3'b011) ForwardXE = 2'b10; // choose FResM + if(FResultSelM == 3'b100) ForwardXE = 2'b10; // choose FResM else FStallD = 1; // if the result won't be ready stall else if ((Adr1E == RdW) & FWriteEnW) ForwardXE = 2'b01; // choose FPUResult64W if ((Adr2E == RdM) & FWriteEnM) // if the result will be FResM - if(FResultSelM == 3'b110 | FResultSelM == 3'b011) ForwardYE = 2'b10; // choose FResM + if(FResultSelM == 3'b100) ForwardYE = 2'b10; // choose FResM else FStallD = 1; // if the result won't be ready stall else if ((Adr2E == RdW) & FWriteEnW) ForwardYE = 2'b01; // choose FPUResult64W if ((Adr3E == RdM) & FWriteEnM) // if the result will be FResM - if(FResultSelM == 3'b110 | FResultSelM == 3'b011) ForwardZE = 2'b10; // choose FResM + if(FResultSelM == 3'b100) ForwardZE = 2'b10; // choose FResM else FStallD = 1; // if the result won't be ready stall else if ((Adr3E == RdW) & FWriteEnW) ForwardZE = 2'b01; // choose FPUResult64W diff --git a/wally-pipelined/src/ieu/datapath.sv b/wally-pipelined/src/ieu/datapath.sv index c3303f9ac..44a40045a 100644 --- a/wally-pipelined/src/ieu/datapath.sv +++ b/wally-pipelined/src/ieu/datapath.sv @@ -37,7 +37,7 @@ module datapath ( input logic ALUSrcAE, ALUSrcBE, input logic TargetSrcE, input logic JumpE, - input logic IsFPE, + input logic IllegalFPUInstrE, input logic [1:0] MemRWE, input logic [`XLEN-1:0] FWriteDataE, input logic [`XLEN-1:0] PCE, @@ -105,9 +105,9 @@ module datapath ( flopenrc #(5) Rs2EReg(clk, reset, FlushE, ~StallE, Rs2D, Rs2E); flopenrc #(5) RdEReg(clk, reset, FlushE, ~StallE, RdD, RdE); - mux3 #(`XLEN) faemux(RD1E, WriteDataW, ALUResultM, ForwardAE, PreSrcAE); - mux3 #(`XLEN) fbemux(RD2E, WriteDataW, ALUResultM, ForwardBE, PreSrcBE); - mux2 #(`XLEN) writedatamux(PreSrcBE, FWriteDataE, IsFPE, WriteDataE); + mux3 #(`XLEN) faemux(RD1E, WriteDataW, ResultM, ForwardAE, PreSrcAE); + mux3 #(`XLEN) fbemux(RD2E, WriteDataW, ResultM, ForwardBE, PreSrcBE); + mux2 #(`XLEN) writedatamux(PreSrcBE, FWriteDataE, ~IllegalFPUInstrE, WriteDataE); mux2 #(`XLEN) srcamux(PreSrcAE, PCE, ALUSrcAE, SrcAE); mux2 #(`XLEN) srcamux2(SrcAE, PCLinkE, JumpE, SrcAE2); mux2 #(`XLEN) srcbmux(PreSrcBE, ExtImmE, ALUSrcBE, SrcBE); diff --git a/wally-pipelined/src/ieu/ieu.sv b/wally-pipelined/src/ieu/ieu.sv index 2515f3230..50bf79e80 100644 --- a/wally-pipelined/src/ieu/ieu.sv +++ b/wally-pipelined/src/ieu/ieu.sv @@ -36,8 +36,7 @@ module ieu ( input logic [`XLEN-1:0] PCE, input logic [`XLEN-1:0] PCLinkE, input logic FWriteIntE, - input logic IsFPE, - //input logic [1:0] FMemRWE, + input logic IllegalFPUInstrE, input logic [`XLEN-1:0] FWriteDataE, output logic [`XLEN-1:0] PCTargetE, output logic MulDivE, W64E, diff --git a/wally-pipelined/src/wally/wallypipelinedhart.sv b/wally-pipelined/src/wally/wallypipelinedhart.sv index a77c3ab01..fe1f057ce 100644 --- a/wally-pipelined/src/wally/wallypipelinedhart.sv +++ b/wally-pipelined/src/wally/wallypipelinedhart.sv @@ -95,18 +95,17 @@ module wallypipelinedhart ( // floating point unit signals logic [2:0] FRM_REGW; - logic [1:0] FMemRWM, FMemRWE; - logic FStallD; - logic FWriteIntE, FWriteIntM, FWriteIntW; - logic [`XLEN-1:0] FWriteDataE; - logic [`XLEN-1:0] FIntResM; - logic FDivBusyE; - logic IsFPD, IsFPE; - logic IllegalFPUInstrD, IllegalFPUInstrE; - logic FloatRegWriteW; - logic FPUStallD; - logic [4:0] SetFflagsM; - logic [`XLEN-1:0] FPUResultW; + logic [1:0] FMemRWM, FMemRWE; + logic FStallD; + logic FWriteIntE, FWriteIntM, FWriteIntW; + logic [`XLEN-1:0] FWriteDataE; + logic [`XLEN-1:0] FIntResM; + logic FDivBusyE; + logic IllegalFPUInstrD, IllegalFPUInstrE; + logic FloatRegWriteW; + logic FPUStallD; + logic [4:0] SetFflagsM; + logic [`XLEN-1:0] FPUResultW; // memory management unit signals logic ITLBWriteF, DTLBWriteM;