diff --git a/addins/riscv-arch-test b/addins/riscv-arch-test index 84d043817..be67c99bd 160000 --- a/addins/riscv-arch-test +++ b/addins/riscv-arch-test @@ -1 +1 @@ -Subproject commit 84d043817f75f752c9873326475e11f16e3a6f7c +Subproject commit be67c99bd461742aa1c100bcc0732657faae2230 diff --git a/wally-pipelined/fpu-testfloat/FMA/tbgen/tb.sv b/wally-pipelined/fpu-testfloat/FMA/tbgen/tb.sv index de5e849d9..7d984dff1 100644 --- a/wally-pipelined/fpu-testfloat/FMA/tbgen/tb.sv +++ b/wally-pipelined/fpu-testfloat/FMA/tbgen/tb.sv @@ -1,5 +1,10 @@ -`include "../../../config/rv64icfd/wally-config.vh" +//`include "../../../config/old/rv64icfd/wally-config.vh" + +`define FLEN 64//(`Q_SUPPORTED ? 128 : `D_SUPPORTED ? 64 : 32) +`define NE 11//(`Q_SUPPORTED ? 15 : `D_SUPPORTED ? 11 : 8) +`define NF 52//(`Q_SUPPORTED ? 112 : `D_SUPPORTED ? 52 : 23) +`define XLEN 64 module testbench3(); logic [31:0] errors=0; @@ -174,8 +179,9 @@ always @(posedge clk) // check results on falling edge of clk always @(negedge clk) begin - // fp = $fopen("/home/kparry/riscv-wally/wally-pipelined/src/fpu/FMA/tbgen/results.dat","w"); if((FmtE==1'b1) & (FMAFlgM != flags[4:0] || (!wnan && (FMAResM != ans)) || (wnan && ansnan && ~((XNaNE && (FMAResM[`FLEN-2:0] == {XExpE,1'b1,X[`NF-2:0]})) || (YNaNE && (FMAResM[`FLEN-2:0] == {YExpE,1'b1,Y[`NF-2:0]})) || (ZNaNE && (FMAResM[`FLEN-2:0] == {ZExpE,1'b1,Z[`NF-2:0]})) || (FMAResM[`FLEN-2:0] == ans[`FLEN-2:0]))))) begin + // fp = $fopen("/home/kparry/riscv-wally/wally-pipelined/src/fpu/FMA/tbgen/results.dat","w"); + // if((FmtE==1'b1) & (FMAFlgM != flags[4:0] || (FMAResM != ans))) begin $display( "%h %h %h %h %h %h %h Wrong ",X,Y, Z, FMAResM, ans, FMAFlgM, flags); if(FMAResM == 64'h8000000000000000) $display( "FMAResM=-zero "); if(XDenormE) $display( "xdenorm "); @@ -193,7 +199,7 @@ always @(posedge clk) if(ans[`FLEN-2:`NF] == {`NE{1'b1}} && ans[`NF-1:0] != 0 && ~ans[`NF-1]) $display( "ans=sigNaN "); if(ans[`FLEN-2:`NF] == {`NE{1'b1}} && ans[`NF-1:0] != 0 && ans[`NF-1]) $display( "ans=qutNaN "); errors = errors + 1; - + //if (errors == 10) $stop; end if((FmtE==1'b0)&(FMAFlgM != flags[4:0] || (!wnan && (FMAResM != ans)) || (wnan && ansnan && ~(((XNaNE && (FMAResM[30:0] == {X[30:23],1'b1,X[21:0]})) || (YNaNE && (FMAResM[30:0] == {Y[30:23],1'b1,Y[21:0]})) || (ZNaNE && (FMAResM[30:0] == {Z[30:23],1'b1,Z[21:0]})) || (FMAResM[30:0] == ans[30:0]))) ))) begin diff --git a/wally-pipelined/regression/make-tests.sh b/wally-pipelined/regression/make-tests.sh index e64f94455..dfc7d6182 100755 --- a/wally-pipelined/regression/make-tests.sh +++ b/wally-pipelined/regression/make-tests.sh @@ -9,4 +9,6 @@ make allclean make make XLEN=32 exe2memfile.pl work/*/*/*.elf -cd ../../wally-pipelined/regression +cd ../linux-testgen/linux-testvectors +./tvLinker.sh +cd ../../../wally-pipelined/regression diff --git a/wally-pipelined/regression/regression-wally.py b/wally-pipelined/regression/regression-wally.py index 810e7ca9d..a0591d6ad 100755 --- a/wally-pipelined/regression/regression-wally.py +++ b/wally-pipelined/regression/regression-wally.py @@ -10,9 +10,11 @@ # output. # ################################## -import sys +import sys,os from collections import namedtuple +regressionDir = os.path.dirname(os.path.abspath(__file__)) +os.chdir(regressionDir) TestCase = namedtuple("TestCase", ['name', 'cmd', 'grepstr']) # name: the name of this test configuration (used in printing human-readable # output and picking logfile names) @@ -32,7 +34,7 @@ configs = [ ] def getBuildrootTC(short): INSTR_LIMIT = 100000 # multiple of 100000 - MAX_EXPECTED = 182000000 + MAX_EXPECTED = 246000000 if short: BRcmd="vsim > {} -c < {54'b0, ProdManKilled, 2'b0})&InvZE; //***use this to avoid addition and final muxing??? // Do the addition // - calculate a positive and negitive sum in parallel - assign PreSum = AlignedAddendInv + {55'b0, ProdManKilled, 2'b0}; - assign NegPreSum = AlignedAddendE + NegProdManKilled; + assign PreSum = AlignedAddendInv + {55'b0, ProdManKilled, 2'b0} + {{3*`NF+6{1'b0}}, InvZE}; + assign NegPreSum = AlignedAddendE + {NegProdManKilled, 2'b0} + {{(3*`NF+3){1'b0}},~(XZeroE|YZeroE),2'b0}; - // Is the sum negitive - assign NegSumE = PreSum[3*`NF+6]; endmodule -module loa( - input logic [3*`NF+5:0] AlignedAddendE, // Z aligned for addition in U(NF+5.2NF+1) - input logic [3*`NF+6:0] AlignedAddendInv, // aligned addend possibly inverted - input logic [2*`NF+1:0] ProdManKilled, // the product's mantissa possibly killed - input logic [3*`NF+6:0] NegProdManKilled, // a negated ProdManKilled - output logic [8:0] PNormCnt, NNormCnt // positive and negitive LOA result -); - - // LZAs one for the positive result and one for the negitive - // - the +1 from inverting causes problems for normalization - posloa posloa(AlignedAddendInv, ProdManKilled, PNormCnt); - negloa negloa({1'b0,AlignedAddendE}, NegProdManKilled, NNormCnt); - -endmodule - - -module posloa( +module loa( //https://ieeexplore.ieee.org/abstract/document/930098 input logic [3*`NF+6:0] A, // addend input logic [2*`NF+1:0] P, // product - output logic [8:0] PCnt // normalization shift count for the positive result + input logic NegSumE, // is the sum negitive + output logic [8:0] NormCntE // normalization shift count for the positive result ); - // calculate the propagate (T) and kill (Z) bits logic [3*`NF+6:0] T; + logic [3*`NF+5:0] G; logic [3*`NF+5:0] Z; assign T[3*`NF+6:2*`NF+4] = A[3*`NF+6:2*`NF+4]; - assign Z[3*`NF+5:2*`NF+4] = A[3*`NF+5:2*`NF+4]; + assign G[3*`NF+5:2*`NF+4] = 0; + assign Z[3*`NF+5:2*`NF+4] = ~A[3*`NF+5:2*`NF+4]; assign T[2*`NF+3:2] = A[2*`NF+3:2]^P; - assign Z[2*`NF+3:2] = A[2*`NF+3:2]|P; + assign G[2*`NF+3:2] = A[2*`NF+3:2]&P; + assign Z[2*`NF+3:2] = ~A[2*`NF+3:2]&~P; assign T[1:0] = A[1:0]; - assign Z[1:0] = A[1:0]; + assign G[1:0] = 0; + assign Z[1:0] = ~A[1:0]; // Apply function to determine Leading pattern logic [3*`NF+6:0] f; - assign f = T^{Z[3*`NF+5:0], 1'b0}; + assign f = NegSumE ? T^{~G[3*`NF+5:0],1'b1} : T^{~Z[3*`NF+5:0], 1'b1}; - lzc lzc(.f, .Cnt(PCnt)); + lzc lzc(.f, .NormCntE); endmodule -module negloa( - input logic [3*`NF+6:0] A, // addend - input logic [3*`NF+6:0] P, // product - output logic [8:0] NCnt // normalization shift count for the negitive result - ); - - // calculate the propagate (T) and kill (Z) bits - logic [3*`NF+6:0] T; - logic [3*`NF+5:0] Z; - assign T = A^P; - assign Z = ~(A[3*`NF+5:0]|P[3*`NF+5:0]); - - - // Apply function to determine Leading pattern - logic [3*`NF+6:0] f; - assign f = T^{~Z, 1'b0}; - - lzc lzc(.f, .Cnt(NCnt)); - -endmodule - - module lzc( input logic [3*`NF+6:0] f, - output logic [8:0] Cnt // normalization shift count for the negitive result + output logic [8:0] NormCntE // normalization shift ); logic [8:0] i; always_comb begin i = 0; while (~f[3*`NF+6-i] && $unsigned(i) <= $unsigned(9'd3*9'd`NF+9'd6)) i = i+1; // search for leading one - Cnt = i; + NormCntE = i; end endmodule @@ -479,7 +448,7 @@ module fma2( // Normalization /////////////////////////////////////////////////////////////////////////////// - normalize normalize(.SumM, .ZExpM, .ProdExpM, .NormCntM, .FmtM, .KillProdM, .AddendStickyM, .NormSum, + normalize normalize(.SumM, .ZExpM, .ProdExpM, .NormCntM, .FmtM, .KillProdM, .AddendStickyM, .NormSum, .NegSumM, .SumZero, .NormSumSticky, .UfSticky, .SumExp, .ResultDenorm); @@ -611,6 +580,80 @@ module resultselect( endmodule +// module normalize( +// input logic [3*`NF+5:0] SumM, // the positive sum +// input logic [`NE-1:0] ZExpM, // exponent of Z +// input logic [`NE+1:0] ProdExpM, // X exponent + Y exponent - bias +// input logic [8:0] NormCntM, // normalization shift count +// input logic FmtM, // precision 1 = double 0 = single +// input logic KillProdM, // is the product set to zero +// input logic AddendStickyM, // the sticky bit caclulated from the aligned addend +// input logic NegSumM, // was the sum negitive +// output logic [`NF+2:0] NormSum, // normalized sum +// output logic SumZero, // is the sum zero +// output logic NormSumSticky, UfSticky, // sticky bits +// output logic [`NE+1:0] SumExp, // exponent of the normalized sum +// output logic ResultDenorm // is the result denormalized +// ); +// logic [`NE+1:0] FracLen; // length of the fraction +// logic [`NE+1:0] SumExpTmp; // exponent of the normalized sum not taking into account denormal or zero results +// logic [8:0] DenormShift; // right shift if the result is denormalized //***change this later +// logic [3*`NF+5:0] CorrSumShifted; // the shifted sum after LZA correction +// logic [3*`NF+7:0] SumShifted; // the shifted sum before LZA correction +// logic [`NE+1:0] SumExpTmpTmp; // the exponent of the normalized sum with the `FLEN bias +// logic PreResultDenorm; // is the result denormalized - calculated before LZA corection +// logic PreResultDenorm2; // is the result denormalized - calculated before LZA corection +// logic LZAPlus1; // add one to the sum's exponent due to LZA correction + +// /////////////////////////////////////////////////////////////////////////////// +// // Normalization +// /////////////////////////////////////////////////////////////////////////////// + +// // Determine if the sum is zero +// assign SumZero = ~(|SumM); + +// // determine the length of the fraction based on precision +// assign FracLen = FmtM ? `NF+1 : 13'd24; + +// // calculate the sum's exponent +// assign SumExpTmpTmp = KillProdM ? {2'b0, ZExpM} : ProdExpM + -({4'b0, NormCntM} + 1 - (`NF+4)); // ****try moving this into previous stage +// assign SumExpTmp = FmtM ? SumExpTmpTmp : (SumExpTmpTmp-1023+127)&{`NE+2{|SumExpTmpTmp}}; // ***move this ^ the subtraction by a constant isn't simplified + +// logic SumDLTEZ, SumDGEFL, SumSLTEZ, SumSGEFL; +// assign SumDLTEZ = SumExpTmpTmp[`NE+1] | ~|SumExpTmpTmp; +// assign SumDGEFL = ($signed(SumExpTmpTmp)>=$signed(-(13'd`NF+13'd1))); +// assign SumSLTEZ = $signed(SumExpTmpTmp) <= $signed(13'd1023-13'd127); +// assign SumSGEFL = ($signed(SumExpTmpTmp)>=$signed(-13'd24+13'd1023-13'd127)) | ~|SumExpTmpTmp; +// assign PreResultDenorm2 = (FmtM ? SumDLTEZ : SumSLTEZ) & (FmtM ? SumDGEFL : SumSGEFL) & ~SumZero; //***make sure math good +// // always_comb begin +// // assert (PreResultDenorm == PreResultDenorm2) else $fatal ("PreResultDenorms not equal"); +// // end + + + +// // Determine if the result is denormal +// // assign PreResultDenorm = $signed(SumExpTmp)<=0 & ($signed(SumExpTmp)>=$signed(-FracLen)) & ~SumZero; + +// // Determine the shift needed for denormal results +// // - if not denorm add 1 to shift out the leading 1 +// assign DenormShift = PreResultDenorm2 ? SumExpTmp[8:0] : 1; //*** change this when changing the size of DenormShift also change to an and opperation +// // Normalize the sum +// assign SumShifted = {2'b0, SumM} << NormCntM+DenormShift; //*** fix mux's with constants in them //***NormCnt can be simplified +// // LZA correction +// assign LZAPlus1 = SumShifted[3*`NF+7]; +// assign CorrSumShifted = LZAPlus1 ? SumShifted[3*`NF+6:1] : SumShifted[3*`NF+5:0]; +// assign NormSum = CorrSumShifted[3*`NF+5:2*`NF+3]; +// // Calculate the sticky bit +// assign NormSumSticky = (|CorrSumShifted[2*`NF+2:0]) | (|CorrSumShifted[136:2*`NF+3]&~FmtM); +// assign UfSticky = AddendStickyM | NormSumSticky; + +// // Determine sum's exponent +// assign SumExp = (SumExpTmp+{12'b0, LZAPlus1}+{12'b0, ~|SumExpTmp&SumShifted[3*`NF+6]}) & {`NE+2{~(SumZero|ResultDenorm)}}; +// // recalculate if the result is denormalized +// assign ResultDenorm = PreResultDenorm2&~SumShifted[3*`NF+6]&~SumShifted[3*`NF+7]; + +// endmodule + module normalize( input logic [3*`NF+5:0] SumM, // the positive sum input logic [`NE-1:0] ZExpM, // exponent of Z @@ -619,6 +662,7 @@ module normalize( input logic FmtM, // precision 1 = double 0 = single input logic KillProdM, // is the product set to zero input logic AddendStickyM, // the sticky bit caclulated from the aligned addend + input logic NegSumM, // was the sum negitive output logic [`NF+2:0] NormSum, // normalized sum output logic SumZero, // is the sum zero output logic NormSumSticky, UfSticky, // sticky bits @@ -629,15 +673,29 @@ module normalize( logic [`NE+1:0] SumExpTmp; // exponent of the normalized sum not taking into account denormal or zero results logic [8:0] DenormShift; // right shift if the result is denormalized //***change this later logic [3*`NF+5:0] CorrSumShifted; // the shifted sum after LZA correction - logic [3*`NF+7:0] SumShifted; // the shifted sum before LZA correction + logic [3*`NF+8:0] SumShifted; // the shifted sum before LZA correction logic [`NE+1:0] SumExpTmpTmp; // the exponent of the normalized sum with the `FLEN bias logic PreResultDenorm; // is the result denormalized - calculated before LZA corection - logic LZAPlus1; // add one to the sum's exponent due to LZA correction + logic PreResultDenorm2; // is the result denormalized - calculated before LZA corection + logic LZAPlus1, LZAPlus2; // add one or two to the sum's exponent due to LZA correction /////////////////////////////////////////////////////////////////////////////// // Normalization /////////////////////////////////////////////////////////////////////////////// + + // logic [8:0] supposedNormCnt; + // logic [8:0] i; + // always_comb begin + // i = 0; + // while (~SumM[3*`NF+5-i] && $unsigned(i) <= $unsigned(3*`NF+5)) i = i+1; // search for leading one + // supposedNormCnt = i; // compute shift count + // end + + // always_comb begin + // assert (NormCntM == supposedNormCnt | NormCntM == supposedNormCnt+1 | NormCntM == supposedNormCnt+2) else $fatal ("normcnt not expected"); + // end + // Determine if the sum is zero assign SumZero = ~(|SumM); @@ -645,19 +703,36 @@ module normalize( assign FracLen = FmtM ? `NF+1 : 13'd24; // calculate the sum's exponent - assign SumExpTmpTmp = KillProdM ? {2'b0, ZExpM} : ProdExpM + -({4'b0, NormCntM} + 1 - (`NF+4)); - assign SumExpTmp = FmtM ? SumExpTmpTmp : (SumExpTmpTmp-1023+127)&{`NE+2{|SumExpTmpTmp}}; + assign SumExpTmpTmp = KillProdM ? {2'b0, ZExpM} : ProdExpM + -({4'b0, NormCntM} + 1 - (`NF+4)); // ****try moving this into previous stage + assign SumExpTmp = FmtM ? SumExpTmpTmp : (SumExpTmpTmp-1023+127)&{`NE+2{|SumExpTmpTmp}}; // ***move this ^ the subtraction by a constant isn't simplified + + logic SumDLTEZ, SumDGEFL, SumSLTEZ, SumSGEFL; + assign SumDLTEZ = SumExpTmpTmp[`NE+1] | ~|SumExpTmpTmp; + assign SumDGEFL = ($signed(SumExpTmpTmp)>=$signed(-(13'd`NF+13'd1))); + assign SumSLTEZ = $signed(SumExpTmpTmp) <= $signed(13'd1023-13'd127); + assign SumSGEFL = ($signed(SumExpTmpTmp)>=$signed(-13'd24+13'd1023-13'd127)) | ~|SumExpTmpTmp; + assign PreResultDenorm2 = (FmtM ? SumDLTEZ : SumSLTEZ) & (FmtM ? SumDGEFL : SumSGEFL) & ~SumZero; //***make sure math good + // always_comb begin + // assert (PreResultDenorm == PreResultDenorm2) else $fatal ("PreResultDenorms not equal"); + // end + + // 010. when should be 001. + // - shift left one + // - add one from exp + // - if kill prod dont add to exp // Determine if the result is denormal - assign PreResultDenorm = $signed(SumExpTmp)<=0 & ($signed(SumExpTmp)>=$signed(-FracLen)) & ~SumZero; + // assign PreResultDenorm = $signed(SumExpTmp)<=0 & ($signed(SumExpTmp)>=$signed(-FracLen)) & ~SumZero; // Determine the shift needed for denormal results // - if not denorm add 1 to shift out the leading 1 - assign DenormShift = PreResultDenorm ? SumExpTmp[8:0] : 1; //*** change this when changing the size of DenormShift also change to an and opperation + assign DenormShift = PreResultDenorm2 ? SumExpTmp[8:0] : 1; //*** change this when changing the size of DenormShift also change to an and opperation // Normalize the sum - assign SumShifted = {2'b0, SumM} << NormCntM+DenormShift; //*** fix mux's with constants in them //***NormCnt can be simplified + assign SumShifted = {3'b0, SumM} << NormCntM+DenormShift; //*** fix mux's with constants in them //***NormCnt can be simplified // LZA correction assign LZAPlus1 = SumShifted[3*`NF+7]; + assign LZAPlus2 = SumShifted[3*`NF+8]; + // the only possible mantissa for a plus two is all zeroes - a one has to propigate all the way through a sum. so we can leave the bottom statement alone assign CorrSumShifted = LZAPlus1 ? SumShifted[3*`NF+6:1] : SumShifted[3*`NF+5:0]; assign NormSum = CorrSumShifted[3*`NF+5:2*`NF+3]; // Calculate the sticky bit @@ -665,9 +740,10 @@ module normalize( assign UfSticky = AddendStickyM | NormSumSticky; // Determine sum's exponent - assign SumExp = (SumExpTmp+{12'b0, LZAPlus1}+{12'b0, ~|SumExpTmp&SumShifted[3*`NF+6]}) & {`NE+2{~(SumZero|ResultDenorm)}}; + // if plus1 If plus2 if said denorm but norm plus 1 if said denorm (-1 val) but norm plus 2 + assign SumExp = (SumExpTmp+{12'b0, LZAPlus1&~KillProdM}+{11'b0, LZAPlus2&~KillProdM, 1'b0}+{12'b0, ~|SumExpTmp&SumShifted[3*`NF+6]&~KillProdM}+{11'b0, &SumExpTmp&SumShifted[3*`NF+6]&~KillProdM, 1'b0}) & {`NE+2{~(SumZero|ResultDenorm)}}; // recalculate if the result is denormalized - assign ResultDenorm = PreResultDenorm&~SumShifted[3*`NF+6]&~SumShifted[3*`NF+7]; + assign ResultDenorm = PreResultDenorm2&~SumShifted[3*`NF+6]&~SumShifted[3*`NF+7]; endmodule diff --git a/wally-pipelined/src/ieu/datapath.sv b/wally-pipelined/src/ieu/datapath.sv index 838d83a79..0679f8c35 100644 --- a/wally-pipelined/src/ieu/datapath.sv +++ b/wally-pipelined/src/ieu/datapath.sv @@ -96,7 +96,7 @@ module datapath ( //Mux for writting floating point regfile regf(clk, reset, {RegWriteW | FWriteIntW}, Rs1D, Rs2D, RdW, WriteDataW, RD1D, RD2D); - extend ext(.InstrD(InstrD[31:7]), .*); + extend ext(.InstrD(InstrD[31:7]), .ImmSrcD, .ExtImmD); // Execute stage pipeline register and logic flopenrc #(`XLEN) RD1EReg(clk, reset, FlushE, ~StallE, RD1D, RD1E); diff --git a/wally-pipelined/src/ieu/ieu.sv b/wally-pipelined/src/ieu/ieu.sv index e9c6d1174..068495220 100644 --- a/wally-pipelined/src/ieu/ieu.sv +++ b/wally-pipelined/src/ieu/ieu.sv @@ -90,9 +90,69 @@ module ieu ( logic MemReadE, CSRReadE; logic JumpE; - controller c(.*); - datapath dp(.*); - forward fw(.*); + controller c( + .clk, .reset, + // Decode stage control signals + .StallD, .FlushD, .InstrD, .ImmSrcD, + .IllegalIEUInstrFaultD, .IllegalBaseInstrFaultD, + // Execute stage control signals + .StallE, .FlushE, .FlagsE, + .PCSrcE, // for datapath and Hazard Unit + .ALUControlE, .ALUSrcAE, .ALUSrcBE, + .TargetSrcE, + .MemReadE, .CSRReadE, // for Hazard Unit + .Funct3E, .MulDivE, .W64E, + .JumpE, + // Memory stage control signals + .StallM, .FlushM, .MemRWM, + .CSRReadM, .CSRWriteM, .PrivilegedM, + .SCE, .AtomicE, .AtomicM, .Funct3M, + .RegWriteM, // for Hazard Unit + .InvalidateICacheM, .FlushDCacheM, .InstrValidM, + // Writeback stage control signals + .StallW, .FlushW, + .RegWriteW, // for datapath and Hazard Unit + .ResultSrcW, + // Stall during CSRs + .CSRWritePendingDEM, + .StoreStallD + ); + + datapath dp( + .clk, .reset, + // Decode stage signals + .ImmSrcD, .InstrD, + // Execute stage signals + .StallE, .FlushE, .ForwardAE, .ForwardBE, + .ALUControlE, .ALUSrcAE, .ALUSrcBE, + .TargetSrcE, .JumpE, .IllegalFPUInstrE, + .FWriteDataE, .PCE, .PCLinkE, .FlagsE, + .PCTargetE, + .ForwardedSrcAE, .ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B + .SrcAE, .SrcBE, + // Memory stage signals + .StallM, .FlushM, .FWriteIntM, .FIntResM, + .SrcAM, .WriteDataM, .MemAdrM, .MemAdrE, + // Writeback stage signals + .StallW, .FlushW, .FWriteIntW, .RegWriteW, + .SquashSCW, .ResultSrcW, .ReadDataW, + // input logic [`XLEN-1:0] PCLinkW, + .CSRReadValW, .ReadDataM, .MulDivResultW, + // Hazard Unit signals + .Rs1D, .Rs2D, .Rs1E, .Rs2E, + .RdE, .RdM, .RdW + ); + + forward fw( + .Rs1D, .Rs2D, .Rs1E, .Rs2E, .RdE, .RdM, .RdW, + .MemReadE, .MulDivE, .CSRReadE, + .RegWriteM, .RegWriteW, + .FWriteIntE, .FWriteIntM, .FWriteIntW, + .SCE, + // Forwarding controls + .ForwardAE, .ForwardBE, + .FPUStallD, .LoadStallD, .MulDivStallD, .CSRRdStallD + ); endmodule diff --git a/wally-pipelined/src/lsu/lsu.sv b/wally-pipelined/src/lsu/lsu.sv index a62502033..75ae4ccd3 100644 --- a/wally-pipelined/src/lsu/lsu.sv +++ b/wally-pipelined/src/lsu/lsu.sv @@ -187,7 +187,9 @@ module lsu .DCacheStall(DCacheStall)); mmu #(.TLB_ENTRIES(`DTLB_ENTRIES), .IMMU(0)) - dmmu(.PAdr(MemPAdrMtoDCache), + dmmu(.clk, .reset, .SATP_REGW, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, + .PrivilegeModeW, .DisableTranslation(DisableTranslation), + .PAdr(MemPAdrMtoDCache), .VAdr(MemAdrM), .Size(Funct3MtoDCache[1:0]), .PTE(PTE), @@ -196,18 +198,16 @@ module lsu .TLBFlush(DTLBFlushM), .PhysicalAddress(MemPAdrM), .TLBMiss(DTLBMissM), - .TLBPageFault(DTLBPageFaultM), - .ExecuteAccessF(1'b0), - //.AtomicAccessM(AtomicMaskedM[1]), - .AtomicAccessM(1'b0), - .WriteAccessM(MemRWMtoLRSC[0]), - .ReadAccessM(MemRWMtoLRSC[1]), - .DisableTranslation(DisableTranslation), - .InstrAccessFaultF(), .Cacheable(CacheableM), .Idempotent(), .AtomicAllowed(), - .*); // *** the pma/pmp instruction access faults don't really matter here. is it possible to parameterize which outputs exist? + .TLBPageFault(DTLBPageFaultM), + .InstrAccessFaultF(), .LoadAccessFaultM, .StoreAccessFaultM, + .AtomicAccessM(1'b0), .ExecuteAccessF(1'b0), + .WriteAccessM(MemRWMtoLRSC[0]), .ReadAccessM(MemRWMtoLRSC[1]), + .PMPCFG_ARRAY_REGW, .PMPADDR_ARRAY_REGW + //.AtomicAccessM(AtomicMaskedM[1]), + ); // *** the pma/pmp instruction access faults don't really matter here. is it possible to parameterize which outputs exist? assign MemReadM = MemRWMtoLRSC[1] & ~(ExceptionM | PendingInterruptMtoDCache) & ~DTLBMissM; // & ~NonBusTrapM & ~DTLBMissM & CurrState != STATE_STALLED; diff --git a/wally-pipelined/src/uncore/uncore.sv b/wally-pipelined/src/uncore/uncore.sv index 8e3998e90..50941c325 100644 --- a/wally-pipelined/src/uncore/uncore.sv +++ b/wally-pipelined/src/uncore/uncore.sv @@ -85,40 +85,85 @@ module uncore ( assign {HSELEXT, HSELBootTim, HSELTim, HSELCLINT, HSELGPIO, HSELUART, HSELPLIC, HSELSDC} = HSELRegions[7:0]; // subword accesses: converts HWDATAIN to HWDATA - subwordwrite sww(.*); + subwordwrite sww( + .HRDATA, + .HADDRD, .HSIZED, + .HWDATAIN, .HWDATA); generate // tightly integrated memory if (`TIM_SUPPORTED) begin : dtim - dtim #(.BASE(`TIM_BASE), .RANGE(`TIM_RANGE)) dtim (.*); + dtim #( + .BASE(`TIM_BASE), .RANGE(`TIM_RANGE)) dtim ( + .HCLK, .HRESETn, + .HSELTim, .HADDR, + .HWRITE, .HREADY, + .HTRANS, .HWDATA, .HREADTim, + .HRESPTim, .HREADYTim); end if (`BOOTTIM_SUPPORTED) begin : bootdtim dtim #(.BASE(`BOOTTIM_BASE), .RANGE(`BOOTTIM_RANGE), .PRELOAD("blink-led.mem")) - bootdtim(.HSELTim(HSELBootTim), .HREADTim(HREADBootTim), .HRESPTim(HRESPBootTim), .HREADYTim(HREADYBootTim), .*); + bootdtim( + .HCLK, .HRESETn, + .HSELTim(HSELBootTim), .HADDR, + .HWRITE, .HREADY, .HTRANS, + .HWDATA, + .HREADTim(HREADBootTim), .HRESPTim(HRESPBootTim), .HREADYTim(HREADYBootTim)); end // memory-mapped I/O peripherals if (`CLINT_SUPPORTED == 1) begin : clint - clint clint(.HADDR(HADDR[15:0]), .MTIME(MTIME_CLINT), .MTIMECMP(MTIMECMP_CLINT), .*); + clint clint( + .HCLK, .HRESETn, + .HSELCLINT, .HADDR(HADDR[15:0]), .HWRITE, + .HWDATA, .HREADY, .HTRANS, + .HREADCLINT, + .HRESPCLINT, .HREADYCLINT, + .MTIME(MTIME_CLINT), .MTIMECMP(MTIMECMP_CLINT), + .TimerIntM, .SwIntM); + end else begin : clint assign MTIME_CLINT = 0; assign MTIMECMP_CLINT = 0; assign TimerIntM = 0; assign SwIntM = 0; end if (`PLIC_SUPPORTED == 1) begin : plic - plic plic(.HADDR(HADDR[27:0]), .*); + plic plic( + .HCLK, .HRESETn, + .HSELPLIC, .HADDR(HADDR[27:0]), + .HWRITE, .HREADY, .HTRANS, .HWDATA, + .UARTIntr, .GPIOIntr, + .HREADPLIC, .HRESPPLIC, .HREADYPLIC, + .ExtIntM); end else begin : plic assign ExtIntM = 0; end if (`GPIO_SUPPORTED == 1) begin : gpio - gpio gpio(.HADDR(HADDR[7:0]), .*); + gpio gpio( + .HCLK, .HRESETn, .HSELGPIO, + .HADDR(HADDR[7:0]), + .HWDATA, + .HWRITE, .HREADY, + .HTRANS, + .HREADGPIO, + .HRESPGPIO, .HREADYGPIO, + .GPIOPinsIn, + .GPIOPinsOut, .GPIOPinsEn, + .GPIOIntr); + end else begin : gpio assign GPIOPinsOut = 0; assign GPIOPinsEn = 0; assign GPIOIntr = 0; end if (`UART_SUPPORTED == 1) begin : uart - uart uart(.HADDR(HADDR[2:0]), .TXRDYb(), .RXRDYb(), .INTR(UARTIntr), .SIN(UARTSin), .SOUT(UARTSout), - .DSRb(1'b1), .DCDb(1'b1), .CTSb(1'b0), .RIb(1'b1), - .RTSb(), .DTRb(), .OUT1b(), .OUT2b(), .*); + uart uart( + .HCLK, .HRESETn, + .HSELUART, + .HADDR(HADDR[2:0]), + .HWRITE, .HWDATA, + .HREADUART, .HRESPUART, .HREADYUART, + .SIN(UARTSin), .DSRb(1'b1), .DCDb(1'b1), .CTSb(1'b0), .RIb(1'b1), // from E1A driver from RS232 interface + .SOUT(UARTSout), .RTSb(), .DTRb(), // to E1A driver to RS232 interface + .OUT1b(), .OUT2b(), .INTR(UARTIntr), .TXRDYb(), .RXRDYb()); // to CPU end else begin : uart assign UARTSout = 0; assign UARTIntr = 0; end diff --git a/wally-pipelined/testbench/testbench-linux.sv b/wally-pipelined/testbench/testbench-linux.sv index 50559b9d0..a757e4f7d 100644 --- a/wally-pipelined/testbench/testbench-linux.sv +++ b/wally-pipelined/testbench/testbench-linux.sv @@ -48,7 +48,7 @@ module testbench(); /////////////////////////////////////////////////////////////////////////////// ////////////////////////////////// HARDWARE /////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// - logic clk, reset, reset_ext; + logic clk, reset_ext; initial begin reset_ext <= 1; # 22; reset_ext <= 0; end always begin clk <= 1; # 5; clk <= 0; # 5; end @@ -85,6 +85,9 @@ module testbench(); .UARTSin, .UARTSout, .SDCCLK, .SDCCmdIn, .SDCCmdOut, .SDCCmdOE, .SDCDatIn); + logic reset; + assign reset = dut.reset; + // Write Back stage signals not needed by Wally itself parameter nop = 'h13; logic [`XLEN-1:0] PCW;