From 7e026f3e78f20d1d64237c983398eca3ae8f06a6 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sat, 18 Dec 2021 10:21:17 -0800 Subject: [PATCH 1/7] Simplified Shifter Right input --- wally-pipelined/src/ieu/alu.sv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/wally-pipelined/src/ieu/alu.sv b/wally-pipelined/src/ieu/alu.sv index a2aa1a44..0558cbe1 100644 --- a/wally-pipelined/src/ieu/alu.sv +++ b/wally-pipelined/src/ieu/alu.sv @@ -51,8 +51,8 @@ module alu #(parameter WIDTH=32) ( assign {Carry, Sum} = A + CondInvB + {{(WIDTH-1){1'b0}}, SubArith}; // Shifts - assign Right = (Funct3[2:0] == 3'b101); // sra or srl - shifter sh(A, B[5:0], Right, SubArith, W64, Shift); + assign Right = Funct3[2]; // sra or srl + shifter sh(A, B[`LOG_XLEN-1:0], Right, SubArith, W64, Shift); // condition code flags based on add/subtract output // Overflow occurs when the numbers being added have the same sign From 721d0b5bcf59a48047512fc85091a3bbd8f11f55 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sat, 18 Dec 2021 10:25:40 -0800 Subject: [PATCH 2/7] Simplified shifter right input --- wally-pipelined/src/ieu/alu.sv | 6 ++--- wally-pipelined/src/ieu/shifter.sv | 42 +++++++++++++++--------------- 2 files changed, 23 insertions(+), 25 deletions(-) diff --git a/wally-pipelined/src/ieu/alu.sv b/wally-pipelined/src/ieu/alu.sv index 0558cbe1..827aeb8a 100644 --- a/wally-pipelined/src/ieu/alu.sv +++ b/wally-pipelined/src/ieu/alu.sv @@ -33,7 +33,6 @@ module alu #(parameter WIDTH=32) ( output logic [WIDTH-1:0] Sum); logic [WIDTH-1:0] CondInvB, Shift, SLT, SLTU, FullResult; - logic Right; logic Carry, Neg; logic LT, LTU; logic Overflow; @@ -51,9 +50,8 @@ module alu #(parameter WIDTH=32) ( assign {Carry, Sum} = A + CondInvB + {{(WIDTH-1){1'b0}}, SubArith}; // Shifts - assign Right = Funct3[2]; // sra or srl - shifter sh(A, B[`LOG_XLEN-1:0], Right, SubArith, W64, Shift); - + shifter sh(.A, .Amt(B[`LOG_XLEN-1:0]), .Right(Funct3[2]), .Arith(SubArith), .W64, .Y(Shift)); + // condition code flags based on add/subtract output // Overflow occurs when the numbers being added have the same sign // and the result has the opposite sign diff --git a/wally-pipelined/src/ieu/shifter.sv b/wally-pipelined/src/ieu/shifter.sv index fc170e75..232f7241 100644 --- a/wally-pipelined/src/ieu/shifter.sv +++ b/wally-pipelined/src/ieu/shifter.sv @@ -26,10 +26,10 @@ `include "wally-config.vh" module shifter ( - input logic [`XLEN-1:0] a, - input logic [`LOG_XLEN-1:0] amt, - input logic right, arith, w64, - output logic [`XLEN-1:0] y); + input logic [`XLEN-1:0] A, + input logic [`LOG_XLEN-1:0] Amt, + input logic Right, Arith, W64, + output logic [`XLEN-1:0] Y); logic [2*`XLEN-2:0] z, zshift; logic [`LOG_XLEN-1:0] amttrunc, offset; @@ -42,34 +42,34 @@ module shifter ( generate if (`XLEN==32) begin:shifter // RV32 always_comb // funnel mux - if (right) - if (arith) z = {{31{a[31]}}, a}; - else z = {31'b0, a}; - else z = {a, 31'b0}; - assign amttrunc = amt; // shift amount + if (Right) + if (Arith) z = {{31{A[31]}}, A}; + else z = {31'b0, A}; + else z = {A, 31'b0}; + assign amttrunc = Amt; // shift amount end else begin:shifter // RV64 always_comb // funnel mux - if (w64) begin // 32-bit shifts - if (right) - if (arith) z = {64'b0, {31{a[31]}}, a[31:0]}; - else z = {95'b0, a[31:0]}; - else z = {32'b0, a[31:0], 63'b0}; + if (W64) begin // 32-bit shifts + if (Right) + if (Arith) z = {64'b0, {31{A[31]}}, A[31:0]}; + else z = {95'b0, A[31:0]}; + else z = {32'b0, A[31:0], 63'b0}; end else begin - if (right) - if (arith) z = {{63{a[63]}}, a}; - else z = {63'b0, a}; - else z = {a, 63'b0}; + if (Right) + if (Arith) z = {{63{A[63]}}, A}; + else z = {63'b0, A}; + else z = {A, 63'b0}; end - assign amttrunc = w64 ? {1'b0, amt[4:0]} : amt; // 32 or 64-bit shift + assign amttrunc = W64 ? {1'b0, Amt[4:0]} : Amt; // 32 or 64-bit shift end endgenerate // opposite offset for right shfits - assign offset = right ? amttrunc : ~amttrunc; + assign offset = Right ? amttrunc : ~amttrunc; // funnel operation assign zshift = z >> offset; - assign y = zshift[`XLEN-1:0]; + assign Y = zshift[`XLEN-1:0]; endmodule From 67577d7c91ffaf574e96cdb28aee0aadf0ba813b Mon Sep 17 00:00:00 2001 From: David Harris Date: Sat, 18 Dec 2021 21:26:00 -0800 Subject: [PATCH 3/7] Renamed RD1D to R1D, etc. --- wally-pipelined/src/ieu/alu.sv | 8 ++++---- wally-pipelined/src/ieu/datapath.sv | 14 +++++++------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/wally-pipelined/src/ieu/alu.sv b/wally-pipelined/src/ieu/alu.sv index 827aeb8a..a93ebd29 100644 --- a/wally-pipelined/src/ieu/alu.sv +++ b/wally-pipelined/src/ieu/alu.sv @@ -52,10 +52,10 @@ module alu #(parameter WIDTH=32) ( // Shifts shifter sh(.A, .Amt(B[`LOG_XLEN-1:0]), .Right(Funct3[2]), .Arith(SubArith), .W64, .Y(Shift)); - // condition code flags based on add/subtract output - // Overflow occurs when the numbers being added have the same sign - // and the result has the opposite sign - assign Overflow = (A[WIDTH-1] ~^ CondInvB[WIDTH-1]) & (A[WIDTH-1] ^ Sum[WIDTH-1]); + // condition code flags based on subtract output + // Overflow occurs when the numbers being subtracted have the opposite sign + // and the result has the opposite sign of A + assign Overflow = (A[WIDTH-1] ^ B[WIDTH-1]) & (A[WIDTH-1] ^ Sum[WIDTH-1]); assign Neg = Sum[WIDTH-1]; assign LT = Neg ^ Overflow; assign LTU = ~Carry; diff --git a/wally-pipelined/src/ieu/datapath.sv b/wally-pipelined/src/ieu/datapath.sv index c36077d2..9111a61f 100644 --- a/wally-pipelined/src/ieu/datapath.sv +++ b/wally-pipelined/src/ieu/datapath.sv @@ -66,11 +66,11 @@ module datapath ( // Fetch stage signals // Decode stage signals - logic [`XLEN-1:0] RD1D, RD2D; + logic [`XLEN-1:0] R1D, R2D; logic [`XLEN-1:0] ExtImmD; logic [4:0] RdD; // Execute stage signals - logic [`XLEN-1:0] RD1E, RD2E; + logic [`XLEN-1:0] R1E, R2E; logic [`XLEN-1:0] ExtImmE; // logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, SrcAE2, SrcBE2; // *** MAde forwardedsrcae an output to get rid of a mux in the critical path. @@ -91,19 +91,19 @@ module datapath ( assign Rs1D = InstrD[19:15]; assign Rs2D = InstrD[24:20]; assign RdD = InstrD[11:7]; - regfile regf(clk, reset, RegWriteW, Rs1D, Rs2D, RdW, WriteDataW, RD1D, RD2D); + regfile regf(clk, reset, RegWriteW, Rs1D, Rs2D, RdW, WriteDataW, R1D, R2D); extend ext(.InstrD(InstrD[31:7]), .ImmSrcD, .ExtImmD); // Execute stage pipeline register and logic - flopenrc #(`XLEN) RD1EReg(clk, reset, FlushE, ~StallE, RD1D, RD1E); - flopenrc #(`XLEN) RD2EReg(clk, reset, FlushE, ~StallE, RD2D, RD2E); + flopenrc #(`XLEN) RD1EReg(clk, reset, FlushE, ~StallE, R1D, R1E); + flopenrc #(`XLEN) RD2EReg(clk, reset, FlushE, ~StallE, R2D, R2E); flopenrc #(`XLEN) ExtImmEReg(clk, reset, FlushE, ~StallE, ExtImmD, ExtImmE); flopenrc #(5) Rs1EReg(clk, reset, FlushE, ~StallE, Rs1D, Rs1E); flopenrc #(5) Rs2EReg(clk, reset, FlushE, ~StallE, Rs2D, Rs2E); flopenrc #(5) RdEReg(clk, reset, FlushE, ~StallE, RdD, RdE); - mux3 #(`XLEN) faemux(RD1E, WriteDataW, ResultM, ForwardAE, ForwardedSrcAE); - mux3 #(`XLEN) fbemux(RD2E, WriteDataW, ResultM, ForwardBE, ForwardedSrcBE); + mux3 #(`XLEN) faemux(R1E, WriteDataW, ResultM, ForwardAE, ForwardedSrcAE); + mux3 #(`XLEN) fbemux(R2E, WriteDataW, ResultM, ForwardBE, ForwardedSrcBE); comparator #(`XLEN) comp(ForwardedSrcAE, ForwardedSrcBE, FlagsE); mux2 #(`XLEN) srcamux(ForwardedSrcAE, PCE, ALUSrcAE, SrcAE); mux2 #(`XLEN) srcbmux(ForwardedSrcBE, ExtImmE, ALUSrcBE, SrcBE); From 406f129bedd9a9dc1a1830a5d5cbaa473d1c8098 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sat, 18 Dec 2021 22:08:23 -0800 Subject: [PATCH 4/7] Controller fix --- wally-pipelined/src/ieu/controller.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wally-pipelined/src/ieu/controller.sv b/wally-pipelined/src/ieu/controller.sv index 907aa650..94f3d65c 100644 --- a/wally-pipelined/src/ieu/controller.sv +++ b/wally-pipelined/src/ieu/controller.sv @@ -173,7 +173,7 @@ module controller( // ALU Decoding assign sltD = (Funct3D == 3'b010); assign sltuD = (Funct3D == 3'b011); - assign subD = (Funct3D == 3'b000 & Funct7D[5] & OpD[5]); + assign subD = (Funct3D == 3'b000 & Funct7D[5] & OpD[5]); // OpD[5] needed; ***explain why assign sraD = (Funct3D == 3'b101 & Funct7D[5]); assign SubArithD = ALUOpD & (subD | sraD | sltD | sltuD); // TRUE for R-type subtracts and sra, slt, sltu assign ALUControlD = {W64D, SubArithD, ALUOpD}; From f201af4bb7720b0344fe256ea9cf0ce148b17790 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 19 Dec 2021 11:49:15 -0800 Subject: [PATCH 5/7] Renamed zero to eq in flag generation --- wally-pipelined/src/ieu/comparator.sv | 6 +++--- wally-pipelined/src/ieu/controller.sv | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/wally-pipelined/src/ieu/comparator.sv b/wally-pipelined/src/ieu/comparator.sv index 14117274..0c161d6d 100644 --- a/wally-pipelined/src/ieu/comparator.sv +++ b/wally-pipelined/src/ieu/comparator.sv @@ -30,7 +30,7 @@ module comparator #(parameter WIDTH=32) ( output logic [2:0] flags); logic [WIDTH-1:0] bbar, diff; - logic carry, zero, neg, overflow, lt, ltu; + logic carry, eq, neg, overflow, lt, ltu; // NOTE: This can be replaced by some faster logic optimized // to just compute flags and not the difference. @@ -40,13 +40,13 @@ module comparator #(parameter WIDTH=32) ( assign {carry, diff} = a + bbar + 1; // condition code flags based on add/subtract output - assign zero = (diff == 0); + assign eq = (diff == 0); assign neg = diff[WIDTH-1]; // overflow occurs when the numbers being subtracted have the opposite sign // and the result has the opposite sign fron the first assign overflow = (a[WIDTH-1] ^ b[WIDTH-1]) & (a[WIDTH-1] ^ diff[WIDTH-1]); assign lt = neg ^ overflow; assign ltu = ~carry; - assign flags = {zero, lt, ltu}; + assign flags = {eq, lt, ltu}; endmodule diff --git a/wally-pipelined/src/ieu/controller.sv b/wally-pipelined/src/ieu/controller.sv index 94f3d65c..b081d40f 100644 --- a/wally-pipelined/src/ieu/controller.sv +++ b/wally-pipelined/src/ieu/controller.sv @@ -97,7 +97,7 @@ module controller( logic SubArithD; logic subD, sraD, sltD, sltuD; logic BranchTakenE; - logic zeroE, ltE, ltuE; + logic eqE, ltE, ltuE; logic unused; logic BranchFlagE; logic IEURegWriteE; @@ -202,8 +202,8 @@ module controller( {IEURegWriteE, ResultSrcE, MemRWE, JumpE, BranchE, ALUControlE, ALUSrcAE, ALUSrcBE, ALUResultSrcE, CSRReadE, CSRWriteE, PrivilegedE, Funct3E, W64E, MulDivE, AtomicE, InvalidateICacheE, FlushDCacheE, InstrValidE}); // Branch Logic - assign {zeroE, ltE, ltuE} = FlagsE; - mux4 #(1) branchflagmux(zeroE, 1'b0, ltE, ltuE, Funct3E[2:1], BranchFlagE); + assign {eqE, ltE, ltuE} = FlagsE; + mux4 #(1) branchflagmux(eqE, 1'b0, ltE, ltuE, Funct3E[2:1], BranchFlagE); assign BranchTakenE = BranchFlagE ^ Funct3E[0]; assign PCSrcE = JumpE | BranchE & BranchTakenE; From e3f2a252cdbb69e20c59d431dcea284c158782a5 Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Sun, 19 Dec 2021 13:51:46 -0800 Subject: [PATCH 6/7] fixed some small errors in FMA --- wally-pipelined/src/fpu/fma.sv | 171 +++++++++++---------------------- 1 file changed, 56 insertions(+), 115 deletions(-) diff --git a/wally-pipelined/src/fpu/fma.sv b/wally-pipelined/src/fpu/fma.sv index 6ad3f986..a90848f5 100644 --- a/wally-pipelined/src/fpu/fma.sv +++ b/wally-pipelined/src/fpu/fma.sv @@ -28,6 +28,7 @@ // `define NE 11//(`Q_SUPPORTED ? 15 : `D_SUPPORTED ? 11 : 8) // `define NF 52//(`Q_SUPPORTED ? 112 : `D_SUPPORTED ? 52 : 23) // `define XLEN 64 +`define NANPAYLOAD 1 module fma( input logic clk, input logic reset, @@ -117,9 +118,8 @@ module fma1( logic [3*`NF+6:0] AlignedAddendInv; // aligned addend possibly inverted logic [2*`NF+1:0] ProdManKilled; // the product's mantissa possibly killed logic [3*`NF+4:0] NegProdManKilled; // a negated ProdManKilled - logic [8:0] PNormCnt, NNormCnt; // the positive and nagitive LOA results logic [3*`NF+6:0] PreSum, NegPreSum; // positive and negitve versions of the sum - + logic [`NE-1:0] XExpVal, YExpVal; // exponent value after taking into accound denormals /////////////////////////////////////////////////////////////////////////////// // Calculate the product // - When multipliying two fp numbers, add the exponents @@ -130,7 +130,7 @@ module fma1( // calculate the product's exponent - expadd expadd(.FmtE, .XExpE, .YExpE, .XZeroE, .YZeroE, .XDenormE, .YDenormE, + expadd expadd(.FmtE, .XExpE, .YExpE, .XZeroE, .YZeroE, .XDenormE, .YDenormE, .XExpVal, .YExpVal, .Denorm, .ProdExpE); // multiplication of the mantissa's @@ -140,7 +140,7 @@ module fma1( // Alignment shifter /////////////////////////////////////////////////////////////////////////////// - align align(.ZExpE, .ZManE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .ProdExpE, .Denorm, + align align(.ZExpE, .ZManE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .ProdExpE, .Denorm, .XExpVal, .YExpVal, .AlignedAddendE, .AddendStickyE, .KillProdE); // calculate the signs and take the opperation into account @@ -150,9 +150,9 @@ module fma1( // // Addition/LZA // /////////////////////////////////////////////////////////////////////////////// - add add(.AlignedAddendE, .ProdManE, .PSgnE, .ZSgnEffE, .KillProdE, .AlignedAddendInv, .ProdManKilled, .NegProdManKilled, .NegSumE, .PreSum, .NegPreSum, .InvZE, .XZeroE, .YZeroE); + add add(.AlignedAddendE, .ProdManE, .PSgnE, .ZSgnEffE, .KillProdE, .AlignedAddendInv, .ProdManKilled, .NegSumE, .PreSum, .NegPreSum, .InvZE, .XZeroE, .YZeroE); - loa loa(.A(AlignedAddendInv+{162'b0,InvZE}), .P(ProdManKilled), .NegSumE, .NormCntE); + loa loa(.A(AlignedAddendInv+{162'b0,InvZE}), .P(ProdManKilled), .NormCntE); // Choose the positive sum and accompanying LZA result. assign SumE = NegSumE ? NegPreSum[3*`NF+5:0] : PreSum[3*`NF+5:0]; @@ -167,11 +167,11 @@ module expadd( input logic [`NE-1:0] XExpE, YExpE, // input exponents input logic XDenormE, YDenormE, // are the inputs denormalized input logic XZeroE, YZeroE, // are the inputs zero + output logic [`NE-1:0] XExpVal, YExpVal, // Exponent value after taking into account denormals output logic [`NE-1:0] Denorm, // value of denormalized exponent output logic [`NE+1:0] ProdExpE // product's exponent B^(1023)NE+2 ); - logic [`NE-1:0] XExpVal, YExpVal; // Exponent value after taking into account denormals // denormalized numbers have diffrent values depending on which precison it is. // double - 1 @@ -233,6 +233,7 @@ module align( input logic [`NF:0] ZManE, // fractions in U(0.NF) format] input logic ZDenormE, // is the input denormal input logic XZeroE, YZeroE, ZZeroE, // is the input zero + input logic [`NE-1:0] XExpVal, YExpVal, // Exponent value after taking into account denormals input logic [`NE+1:0] ProdExpE, // the product's exponent input logic [`NE-1:0] Denorm, // the biased value of a denormalized number output logic [3*`NF+5:0] AlignedAddendE, // Z aligned for addition in U(NF+5.2NF+1) @@ -254,7 +255,8 @@ module align( // - positive means the product is larger, so shift Z right // - Denormal numbers have a diffrent exponent value depending on the precision assign ZExpVal = ZDenormE ? Denorm : ZExpE; - assign AlignCnt = ProdExpE - {2'b0, ZExpVal} + (`NF+3); + // assign AlignCnt = ProdExpE - {2'b0, ZExpVal} + (`NF+3); + assign AlignCnt = XZeroE|YZeroE ? -1 : {2'b0, XExpVal} + {2'b0, YExpVal} - 1020+`NF - {2'b0, ZExpVal}; // Defualt Addition without shifting // | 54'b0 | 106'b(product) | 2'b0 | @@ -312,14 +314,14 @@ module add( input logic PSgnE, ZSgnEffE,// the product and modified Z signs input logic KillProdE, // should the product be set to 0 input logic XZeroE, YZeroE, // is the input zero - output logic [3*`NF+6:0] AlignedAddendInv, // aligned addend possibly inverted - output logic [2*`NF+1:0] ProdManKilled, // the product's mantissa possibly killed - output logic [3*`NF+4:0] NegProdManKilled, // a negated ProdManKilled + output logic [3*`NF+6:0] AlignedAddendInv, // aligned addend possibly inverted + output logic [2*`NF+1:0] ProdManKilled, // the product's mantissa possibly killed output logic NegSumE, // was the sum negitive output logic InvZE, // do you invert Z - output logic [3*`NF+6:0] PreSum, NegPreSum// possibly negitive sum + output logic [3*`NF+6:0] PreSum, NegPreSum// possibly negitive sum ); + logic [3*`NF+4:0] NegProdManKilled; // a negated ProdManKilled /////////////////////////////////////////////////////////////////////////////// // Addition /////////////////////////////////////////////////////////////////////////////// @@ -334,17 +336,17 @@ module add( // Kill the product if the product is too small to effect the addition (determined in fma1.sv) assign ProdManKilled = ProdManE&{2*`NF+2{~KillProdE}}; // Negate ProdMan for LZA and the negitive sum calculation - assign NegProdManKilled = {{`NF+3{~(XZeroE|YZeroE|KillProdE)}}, ~ProdManKilled&{2*`NF+2{~(XZeroE|YZeroE)}}}; + assign NegProdManKilled = {{`NF+3{~(XZeroE|YZeroE|KillProdE)}}, ~ProdManKilled&{2*`NF+2{~(XZeroE|YZeroE|KillProdE)}}}; - // Is the sum negitive - assign NegSumE = (AlignedAddendE > {54'b0, ProdManKilled, 2'b0})&InvZE; //***use this to avoid addition and final muxing??? // Do the addition // - calculate a positive and negitive sum in parallel assign PreSum = AlignedAddendInv + {55'b0, ProdManKilled, 2'b0} + {{3*`NF+6{1'b0}}, InvZE}; - assign NegPreSum = AlignedAddendE + {NegProdManKilled, 2'b0} + {{(3*`NF+3){1'b0}},~(XZeroE|YZeroE),2'b0}; + assign NegPreSum = AlignedAddendE + {NegProdManKilled, 2'b0} + {{(3*`NF+3){1'b0}},~(XZeroE|YZeroE|KillProdE),2'b0}; + // Is the sum negitive + assign NegSumE = PreSum[3*`NF+6]; endmodule @@ -352,28 +354,32 @@ endmodule module loa( //https://ieeexplore.ieee.org/abstract/document/930098 input logic [3*`NF+6:0] A, // addend input logic [2*`NF+1:0] P, // product - input logic NegSumE, // is the sum negitive output logic [8:0] NormCntE // normalization shift count for the positive result ); - logic [3*`NF+6:0] T; - logic [3*`NF+5:0] G; - logic [3*`NF+5:0] Z; + logic [3*`NF+6:0] G; + logic [3*`NF+6:0] Z; assign T[3*`NF+6:2*`NF+4] = A[3*`NF+6:2*`NF+4]; - assign G[3*`NF+5:2*`NF+4] = 0; - assign Z[3*`NF+5:2*`NF+4] = ~A[3*`NF+5:2*`NF+4]; + assign G[3*`NF+6:2*`NF+4] = 0; + assign Z[3*`NF+6:2*`NF+4] = ~A[3*`NF+6:2*`NF+4]; assign T[2*`NF+3:2] = A[2*`NF+3:2]^P; assign G[2*`NF+3:2] = A[2*`NF+3:2]&P; assign Z[2*`NF+3:2] = ~A[2*`NF+3:2]&~P; assign T[1:0] = A[1:0]; assign G[1:0] = 0; assign Z[1:0] = ~A[1:0]; - + // Apply function to determine Leading pattern + // - note: the paper linked above uses the numbering system where 0 is the most significant bit + //f[n] = ~T[n]&T[n-1] note: n is the MSB + //f[i] = (T[i+1]&(G[i]&~Z[i-1] | Z[i]&~G[i-1])) | (~T[i+1]&(Z[i]&~Z[i-1] | G[i]&~G[i-1])) logic [3*`NF+6:0] f; - assign f = NegSumE ? T^{~G[3*`NF+5:0],1'b1} : T^{~Z[3*`NF+5:0], 1'b1}; + assign f[3*`NF+6] = ~T[3*`NF+6]&T[3*`NF+5]; + assign f[3*`NF+5:0] = (T[3*`NF+6:1]&(G[3*`NF+5:0]&{~Z[3*`NF+4:0], 1'b0} | Z[3*`NF+5:0]&{~G[3*`NF+4:0], 1'b1})) | (~T[3*`NF+6:1]&(Z[3*`NF+5:0]&{~Z[3*`NF+4:0], 1'b0} | G[3*`NF+5:0]&{~G[3*`NF+4:0], 1'b1})); + + lzc lzc(.f, .NormCntE); @@ -426,7 +432,7 @@ module fma2( logic [`NF-1:0] ResultFrac; // Result fraction logic [`NE-1:0] ResultExp; // Result exponent - logic ResultSgn; // Result sign + logic ResultSgn, ResultSgnTmp; // Result sign logic [`NE+1:0] SumExp; // exponent of the normalized sum logic [`NE+1:0] FullResultExp; // ResultExp with bits to determine sign and overflow logic [`NF+2:0] NormSum; // normalized sum @@ -464,7 +470,7 @@ module fma2( // round to infinity // round to nearest max magnitude - fmaround fmaround(.FmtM, .FrmM, .Sticky, .UfSticky, .NormSum, .AddendStickyM, .NormSumSticky, .ZZeroM, .InvZM, .ResultSgn, .SumExp, + fmaround fmaround(.FmtM, .FrmM, .Sticky, .UfSticky, .NormSum, .AddendStickyM, .NormSumSticky, .ZZeroM, .InvZM, .ResultSgnTmp, .SumExp, .CalcPlus1, .Plus1, .UfPlus1, .Minus1, .FullResultExp, .ResultFrac, .ResultExp, .Round, .Guard, .UfLSBNormSum); @@ -476,7 +482,7 @@ module fma2( /////////////////////////////////////////////////////////////////////////////// - resultsign resultsign(.FrmM, .PSgnM, .ZSgnEffM, .Underflow, .InvZM, .NegSumM, .SumZero, .ResultSgn); + resultsign resultsign(.FrmM, .PSgnM, .ZSgnEffM, .Underflow, .InvZM, .NegSumM, .SumZero, .ResultSgnTmp, .ResultSgn); @@ -512,11 +518,12 @@ module resultsign( input logic InvZM, input logic NegSumM, input logic SumZero, + output logic ResultSgnTmp, output logic ResultSgn ); logic ZeroSgn; - logic ResultSgnTmp; + // logic ResultSgnTmp; // Determine the sign if the sum is zero // if cancelation then 0 unless round to -infinity @@ -554,15 +561,24 @@ module resultselect( ); logic [`FLEN-1:0] XNaNResult, YNaNResult, ZNaNResult, InvalidResult, OverflowResult, KillProdResult, UnderflowResult; // possible results - assign XNaNResult = FmtM ? {XSgnM, XExpM, 1'b1, XManM[`NF-2:0]} : {{32{1'b1}}, XSgnM, XExpM[7:0], 1'b1, XManM[50:29]}; - assign YNaNResult = FmtM ? {YSgnM, YExpM, 1'b1, YManM[`NF-2:0]} : {{32{1'b1}}, YSgnM, YExpM[7:0], 1'b1, YManM[50:29]}; - assign ZNaNResult = FmtM ? {ZSgnEffM, ZExpM, 1'b1, ZManM[`NF-2:0]} : {{32{1'b1}}, ZSgnEffM, ZExpM[7:0], 1'b1, ZManM[50:29]}; + generate if(`NANPAYLOAD) begin + assign XNaNResult = FmtM ? {XSgnM, XExpM, 1'b1, XManM[`NF-2:0]} : {{32{1'b1}}, XSgnM, XExpM[7:0], 1'b1, XManM[50:29]}; + assign YNaNResult = FmtM ? {YSgnM, YExpM, 1'b1, YManM[`NF-2:0]} : {{32{1'b1}}, YSgnM, YExpM[7:0], 1'b1, YManM[50:29]}; + assign ZNaNResult = FmtM ? {ZSgnEffM, ZExpM, 1'b1, ZManM[`NF-2:0]} : {{32{1'b1}}, ZSgnEffM, ZExpM[7:0], 1'b1, ZManM[50:29]}; + end else begin + assign XNaNResult = FmtM ? {XSgnM, XExpM, 1'b1, 51'b0} : {{32{1'b1}}, XSgnM, XExpM[7:0], 1'b1, 22'b0}; + assign YNaNResult = FmtM ? {YSgnM, YExpM, 1'b1, 51'b0} : {{32{1'b1}}, YSgnM, YExpM[7:0], 1'b1, 22'b0}; + assign ZNaNResult = FmtM ? {ZSgnEffM, ZExpM, 1'b1, 51'b0} : {{32{1'b1}}, ZSgnEffM, ZExpM[7:0], 1'b1, 22'b0}; + end + endgenerate + + assign OverflowResult = FmtM ? ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}} : ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{32{1'b1}}, ResultSgn, 8'hfe, {23{1'b1}}} : {{32{1'b1}}, ResultSgn, 8'hff, 23'b0}; assign InvalidResult = FmtM ? {ResultSgn, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{32{1'b1}}, ResultSgn, 8'hff, 1'b1, 22'b0}; - assign KillProdResult = FmtM ? {ResultSgn, {ZExpM, ZManM[`NF-1:0]} - {62'b0, (Minus1&AddendStickyM) + (Plus1&AddendStickyM)}} : {{32{1'b1}}, ResultSgn, {ZExpM[`NE-1],ZExpM[6:0], ZManM[51:29]} - {30'b0, (Minus1&AddendStickyM)} + {30'b0, (Plus1&AddendStickyM)}}; + assign KillProdResult = FmtM ? {ResultSgn, {ZExpM, ZManM[`NF-1:0]} - {62'b0, (Minus1&AddendStickyM)} + {62'b0, (Plus1&AddendStickyM)}} : {{32{1'b1}}, ResultSgn, {ZExpM[`NE-1],ZExpM[6:0], ZManM[51:29]} - {30'b0, (Minus1&AddendStickyM)} + {30'b0, (Plus1&AddendStickyM)}}; assign UnderflowResult = FmtM ? {ResultSgn, {`FLEN-1{1'b0}}} + {63'b0,(CalcPlus1&(AddendStickyM|FrmM[1]))} : {{32{1'b1}}, {ResultSgn, 31'b0} + {31'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))}}; assign FMAResM = XNaNM ? XNaNResult : YNaNM ? YNaNResult : @@ -579,81 +595,6 @@ module resultselect( endmodule - -// module normalize( -// input logic [3*`NF+5:0] SumM, // the positive sum -// input logic [`NE-1:0] ZExpM, // exponent of Z -// input logic [`NE+1:0] ProdExpM, // X exponent + Y exponent - bias -// input logic [8:0] NormCntM, // normalization shift count -// input logic FmtM, // precision 1 = double 0 = single -// input logic KillProdM, // is the product set to zero -// input logic AddendStickyM, // the sticky bit caclulated from the aligned addend -// input logic NegSumM, // was the sum negitive -// output logic [`NF+2:0] NormSum, // normalized sum -// output logic SumZero, // is the sum zero -// output logic NormSumSticky, UfSticky, // sticky bits -// output logic [`NE+1:0] SumExp, // exponent of the normalized sum -// output logic ResultDenorm // is the result denormalized -// ); -// logic [`NE+1:0] FracLen; // length of the fraction -// logic [`NE+1:0] SumExpTmp; // exponent of the normalized sum not taking into account denormal or zero results -// logic [8:0] DenormShift; // right shift if the result is denormalized //***change this later -// logic [3*`NF+5:0] CorrSumShifted; // the shifted sum after LZA correction -// logic [3*`NF+7:0] SumShifted; // the shifted sum before LZA correction -// logic [`NE+1:0] SumExpTmpTmp; // the exponent of the normalized sum with the `FLEN bias -// logic PreResultDenorm; // is the result denormalized - calculated before LZA corection -// logic PreResultDenorm2; // is the result denormalized - calculated before LZA corection -// logic LZAPlus1; // add one to the sum's exponent due to LZA correction - -// /////////////////////////////////////////////////////////////////////////////// -// // Normalization -// /////////////////////////////////////////////////////////////////////////////// - -// // Determine if the sum is zero -// assign SumZero = ~(|SumM); - -// // determine the length of the fraction based on precision -// assign FracLen = FmtM ? `NF+1 : 13'd24; - -// // calculate the sum's exponent -// assign SumExpTmpTmp = KillProdM ? {2'b0, ZExpM} : ProdExpM + -({4'b0, NormCntM} + 1 - (`NF+4)); // ****try moving this into previous stage -// assign SumExpTmp = FmtM ? SumExpTmpTmp : (SumExpTmpTmp-1023+127)&{`NE+2{|SumExpTmpTmp}}; // ***move this ^ the subtraction by a constant isn't simplified - -// logic SumDLTEZ, SumDGEFL, SumSLTEZ, SumSGEFL; -// assign SumDLTEZ = SumExpTmpTmp[`NE+1] | ~|SumExpTmpTmp; -// assign SumDGEFL = ($signed(SumExpTmpTmp)>=$signed(-(13'd`NF+13'd1))); -// assign SumSLTEZ = $signed(SumExpTmpTmp) <= $signed(13'd1023-13'd127); -// assign SumSGEFL = ($signed(SumExpTmpTmp)>=$signed(-13'd24+13'd1023-13'd127)) | ~|SumExpTmpTmp; -// assign PreResultDenorm2 = (FmtM ? SumDLTEZ : SumSLTEZ) & (FmtM ? SumDGEFL : SumSGEFL) & ~SumZero; //***make sure math good -// // always_comb begin -// // assert (PreResultDenorm == PreResultDenorm2) else $fatal ("PreResultDenorms not equal"); -// // end - - - -// // Determine if the result is denormal -// // assign PreResultDenorm = $signed(SumExpTmp)<=0 & ($signed(SumExpTmp)>=$signed(-FracLen)) & ~SumZero; - -// // Determine the shift needed for denormal results -// // - if not denorm add 1 to shift out the leading 1 -// assign DenormShift = PreResultDenorm2 ? SumExpTmp[8:0] : 1; //*** change this when changing the size of DenormShift also change to an and opperation -// // Normalize the sum -// assign SumShifted = {2'b0, SumM} << NormCntM+DenormShift; //*** fix mux's with constants in them //***NormCnt can be simplified -// // LZA correction -// assign LZAPlus1 = SumShifted[3*`NF+7]; -// assign CorrSumShifted = LZAPlus1 ? SumShifted[3*`NF+6:1] : SumShifted[3*`NF+5:0]; -// assign NormSum = CorrSumShifted[3*`NF+5:2*`NF+3]; -// // Calculate the sticky bit -// assign NormSumSticky = (|CorrSumShifted[2*`NF+2:0]) | (|CorrSumShifted[136:2*`NF+3]&~FmtM); -// assign UfSticky = AddendStickyM | NormSumSticky; - -// // Determine sum's exponent -// assign SumExp = (SumExpTmp+{12'b0, LZAPlus1}+{12'b0, ~|SumExpTmp&SumShifted[3*`NF+6]}) & {`NE+2{~(SumZero|ResultDenorm)}}; -// // recalculate if the result is denormalized -// assign ResultDenorm = PreResultDenorm2&~SumShifted[3*`NF+6]&~SumShifted[3*`NF+7]; - -// endmodule - module normalize( input logic [3*`NF+5:0] SumM, // the positive sum input logic [`NE-1:0] ZExpM, // exponent of Z @@ -733,7 +674,7 @@ module normalize( assign LZAPlus1 = SumShifted[3*`NF+7]; assign LZAPlus2 = SumShifted[3*`NF+8]; // the only possible mantissa for a plus two is all zeroes - a one has to propigate all the way through a sum. so we can leave the bottom statement alone - assign CorrSumShifted = LZAPlus1 ? SumShifted[3*`NF+6:1] : SumShifted[3*`NF+5:0]; + assign CorrSumShifted = LZAPlus1&~KillProdM ? SumShifted[3*`NF+6:1] : SumShifted[3*`NF+5:0]; assign NormSum = CorrSumShifted[3*`NF+5:2*`NF+3]; // Calculate the sticky bit assign NormSumSticky = (|CorrSumShifted[2*`NF+2:0]) | (|CorrSumShifted[136:2*`NF+3]&~FmtM); @@ -757,7 +698,7 @@ module fmaround( input logic ZZeroM, // is Z zero input logic InvZM, // invert Z input logic [`NE+1:0] SumExp, // exponent of the normalized sum - input logic ResultSgn, // the result's sign + input logic ResultSgnTmp, // the result's sign output logic CalcPlus1, Plus1, UfPlus1, Minus1, // do you add or subtract on from the result output logic [`NE+1:0] FullResultExp, // ResultExp with bits to determine sign and overflow output logic [`NF-1:0] ResultFrac, // Result fraction @@ -824,8 +765,8 @@ module fmaround( case (FrmM) 3'b000: CalcPlus1 = Guard & (Round | ((Sticky)&~(~Round&SubBySmallNum)) | (~Round&~(Sticky)&LSBNormSum&~SubBySmallNum));//round to nearest even 3'b001: CalcPlus1 = 0;//round to zero - 3'b010: CalcPlus1 = ResultSgn & ~(SubBySmallNum & ~Guard & ~Round);//round down - 3'b011: CalcPlus1 = ~ResultSgn & ~(SubBySmallNum & ~Guard & ~Round);//round up + 3'b010: CalcPlus1 = ResultSgnTmp & ~(SubBySmallNum & ~Guard & ~Round);//round down + 3'b011: CalcPlus1 = ~ResultSgnTmp & ~(SubBySmallNum & ~Guard & ~Round);//round up 3'b100: CalcPlus1 = (Guard & (Round | ((Sticky)&~(~Round&SubBySmallNum)) | (~Round&~(Sticky)&~SubBySmallNum)));//round to nearest max magnitude default: CalcPlus1 = 1'bx; endcase @@ -833,8 +774,8 @@ module fmaround( case (FrmM) 3'b000: UfCalcPlus1 = UfGuard & (UfRound | (UfSticky&UfRound|~UfSubBySmallNum) | (~Sticky&UfLSBNormSum&~UfSubBySmallNum));//round to nearest even 3'b001: UfCalcPlus1 = 0;//round to zero - 3'b010: UfCalcPlus1 = ResultSgn & ~(UfSubBySmallNum & ~UfGuard & ~UfRound);//round down - 3'b011: UfCalcPlus1 = ~ResultSgn & ~(UfSubBySmallNum & ~UfGuard & ~UfRound);//round up + 3'b010: UfCalcPlus1 = ResultSgnTmp & ~(UfSubBySmallNum & ~UfGuard & ~UfRound);//round down + 3'b011: UfCalcPlus1 = ~ResultSgnTmp & ~(UfSubBySmallNum & ~UfGuard & ~UfRound);//round up 3'b100: UfCalcPlus1 = (UfGuard & (UfRound | (UfSticky&~(~UfRound&UfSubBySmallNum)) | (~Sticky&~UfSubBySmallNum)));//round to nearest max magnitude default: UfCalcPlus1 = 1'bx; endcase @@ -842,8 +783,8 @@ module fmaround( case (FrmM) 3'b000: CalcMinus1 = 0;//round to nearest even 3'b001: CalcMinus1 = SubBySmallNum & ~Guard & ~Round;//round to zero - 3'b010: CalcMinus1 = ~ResultSgn & ~Guard & ~Round & SubBySmallNum;//round down - 3'b011: CalcMinus1 = ResultSgn & ~Guard & ~Round & SubBySmallNum;//round up + 3'b010: CalcMinus1 = ~ResultSgnTmp & ~Guard & ~Round & SubBySmallNum;//round down + 3'b011: CalcMinus1 = ResultSgnTmp & ~Guard & ~Round & SubBySmallNum;//round up 3'b100: CalcMinus1 = 0;//round to nearest max magnitude default: CalcMinus1 = 1'bx; endcase From 9e6c9c38c0e78eae6ad768fdd0b30a10c7f09a81 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 19 Dec 2021 13:53:45 -0800 Subject: [PATCH 7/7] ALUControl cleanup --- wally-pipelined/src/ieu/controller.sv | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/wally-pipelined/src/ieu/controller.sv b/wally-pipelined/src/ieu/controller.sv index b081d40f..040fa018 100644 --- a/wally-pipelined/src/ieu/controller.sv +++ b/wally-pipelined/src/ieu/controller.sv @@ -170,10 +170,10 @@ module controller( assign CSRZeroSrcD = InstrD[14] ? (InstrD[19:15] == 0) : (Rs1D == 0); // Is a CSR instruction using zero as the source? assign CSRWriteD = CSRReadD & !(CSRZeroSrcD && InstrD[13]); // Don't write if setting or clearing zeros - // ALU Decoding + // ALU Decoding is lazy, only using func7[5] to distinguish add/sub and srl/sra assign sltD = (Funct3D == 3'b010); assign sltuD = (Funct3D == 3'b011); - assign subD = (Funct3D == 3'b000 & Funct7D[5] & OpD[5]); // OpD[5] needed; ***explain why + assign subD = (Funct3D == 3'b000 & Funct7D[5] & OpD[5]); // OpD[5] needed to distinguish sub from addi assign sraD = (Funct3D == 3'b101 & Funct7D[5]); assign SubArithD = ALUOpD & (subD | sraD | sltD | sltuD); // TRUE for R-type subtracts and sra, slt, sltu assign ALUControlD = {W64D, SubArithD, ALUOpD}; @@ -205,12 +205,11 @@ module controller( assign {eqE, ltE, ltuE} = FlagsE; mux4 #(1) branchflagmux(eqE, 1'b0, ltE, ltuE, Funct3E[2:1], BranchFlagE); assign BranchTakenE = BranchFlagE ^ Funct3E[0]; - assign PCSrcE = JumpE | BranchE & BranchTakenE; + // Other execute stage controller signals assign MemReadE = MemRWE[1]; assign SCE = (ResultSrcE == 3'b100); - assign RegWriteE = IEURegWriteE | FWriteIntE; // IRF register writes could come from IEU or FPU controllers // Memory stage pipeline control register