diff --git a/wally-pipelined/config/rv64ic/wally-config.vh b/wally-pipelined/config/rv64ic/wally-config.vh index 88d3fd03..fab0a4df 100644 --- a/wally-pipelined/config/rv64ic/wally-config.vh +++ b/wally-pipelined/config/rv64ic/wally-config.vh @@ -78,8 +78,6 @@ // *** each of these is `PA_BITS wide. is this paramaterizable INSIDE the config file? `define BOOTTIM_SUPPORTED 1'b1 -//`define BOOTTIM_RANGE 56'h00003FFF -//`define BOOTTIM_BASE 56'h00000000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder `define BOOTTIM_BASE 56'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder `define BOOTTIM_RANGE 56'h00000FFF `define TIM_SUPPORTED 1'b1 diff --git a/wally-pipelined/config/shared/wally-shared.vh b/wally-pipelined/config/shared/wally-shared.vh index c3709a56..b19ba908 100644 --- a/wally-pipelined/config/shared/wally-shared.vh +++ b/wally-pipelined/config/shared/wally-shared.vh @@ -49,9 +49,9 @@ `define PMPCFG_ENTRIES (`PMP_ENTRIES/8) // Floating point length FLEN and number of exponent (NE) and fraction (NF) bits -`define FLEN (`Q_SUPPORTED ? 128 : `D_SUPPORTED ? 64 : 32) -`define NE (`Q_SUPPORTED ? 15 : `D_SUPPORTED ? 11 : 8) -`define NF (`Q_SUPPORTED ? 112 : `D_SUPPORTED ? 52 : 23) +`define FLEN 64//(`Q_SUPPORTED ? 128 : `D_SUPPORTED ? 64 : 32) +`define NE 11//(`Q_SUPPORTED ? 15 : `D_SUPPORTED ? 11 : 8) +`define NF 52//(`Q_SUPPORTED ? 112 : `D_SUPPORTED ? 52 : 23) // Disable spurious Verilator warnings diff --git a/wally-pipelined/linux-testgen/testvector-generation/debugBuildroot.sh b/wally-pipelined/linux-testgen/testvector-generation/debugBuildroot.sh index 8807bcdd..1ddb9aae 100755 --- a/wally-pipelined/linux-testgen/testvector-generation/debugBuildroot.sh +++ b/wally-pipelined/linux-testgen/testvector-generation/debugBuildroot.sh @@ -30,10 +30,11 @@ outDir="../linux-testvectors" # - Makes qemu_in_gdb_format.txt # - Splits qemu_in_gdb_format.txt into chunks of 100,000 instrs #cat $intermedDir/qemu_output.txt | ./parse_qemu.py >$intermedDir/qemu_in_gdb_format.txt -cd $intermedDir -split -d -l 5000000 ./qemu_in_gdb_format.txt --verbose -cd ../../testvector-generation +#cd $intermedDir +#split -d -l 5000000 ./qemu_in_gdb_format.txt --verbose +#cd ../../testvector-generation # Uncomment this version for parse_gdb_output.py debugging # - Uses qemu_in_gdb_format.txt -# - Makes testvectors#cat $intermedDir/qemu_in_gdb_format.txt | ./parse_gdb_output.py "$outDir" +# - Makes testvectors +#cat $intermedDir/qemu_in_gdb_format.txt | ./parse_gdb_output.py "$outDir" diff --git a/wally-pipelined/linux-testgen/testvector-generation/gdbinit_debug b/wally-pipelined/linux-testgen/testvector-generation/gdbinit_debug index 2cd32aad..1b25fbda 100644 --- a/wally-pipelined/linux-testgen/testvector-generation/gdbinit_debug +++ b/wally-pipelined/linux-testgen/testvector-generation/gdbinit_debug @@ -1,10 +1,12 @@ set pagination off file ../buildroot-image-output/fw_jump.elf target extended-remote :1237 -b irqchip_plic_warm_init +b *0x80200040 c file ../buildroot-image-output/vmlinux -b plic_init -c +#b irqchip_plic_warm_init +#c +#b plic_init +#c b do_idle c diff --git a/wally-pipelined/linux-testgen/testvector-generation/parse_qemu.py b/wally-pipelined/linux-testgen/testvector-generation/parse_qemu.py index 2e9f477a..1c44f70b 100755 --- a/wally-pipelined/linux-testgen/testvector-generation/parse_qemu.py +++ b/wally-pipelined/linux-testgen/testvector-generation/parse_qemu.py @@ -36,12 +36,15 @@ def printCSRs(): def parseCSRs(l): global parseState, inPageFault, CSRs, pageFaultCSRs, regs, pageFaultCSRs, instrs - if l.strip() and (not l.startswith("Disassembler")) and (not l.startswith("Please")) and not inPageFault: + if l.strip() and (not l.startswith("Disassembler")) and (not l.startswith("Please")): + # If we've hit the register file if l.startswith(' x0/zero'): parseState = "regFile" - instr = instrs[CSRs["pc"]] - printPC(instr) + if not inPageFault: + instr = instrs[CSRs["pc"]] + printPC(instr) parseRegs(l) + # If we've hit a CSR else: csr = l.split()[0] val = int(l.split()[1],16) @@ -64,11 +67,16 @@ def parseCSRs(l): # However SEPC and STVAL do get corrupted upon exiting if endPageFault and ((csr == 'sepc') or (csr == 'stval')): CSRs[csr] = returnAdr + pageFaultCSRs[csr] = val + elif pageFaultCSRs and (csr in pageFaultCSRs): + if (val != pageFaultCSRs[csr]): + del pageFaultCSRs[csr] + CSRs[csr] = val else: CSRs[csr] = val def parseRegs(l): - global parseState, inPageFault, CSRs, pageFaultCSRs, regs, pageFaultCSRs, instrs + global parseState, inPageFault, CSRs, pageFaultCSRs, regs, pageFaultCSRs, instrs, pageFaultRegs if "pc" in l: printCSRs() # New non-disassembled instruction @@ -86,6 +94,7 @@ def parseRegs(l): val = int(s[i+1], 16) if inPageFault: pageFaultRegs[reg] = val + sys.stderr.write(str(pageFaultRegs)) else: if pageFaultRegs and (reg in pageFaultRegs): if (val != pageFaultRegs[reg]): @@ -110,9 +119,10 @@ for l in fileinput.input(): elif (parseState == "instr") and l.startswith('0x'): if "out of bounds" in l: sys.stderr.write("Detected QEMU page fault error\n") - beginPageFault = ~(inPageFault) + beginPageFault = not inPageFault if beginPageFault: returnAdr = int(l.split()[0][2:-1], 16) + sys.stderr.write('Saving SEPC of '+hex(returnAdr)+'\n') inPageFault = 1 else: endPageFault = inPageFault diff --git a/wally-pipelined/src/fpu/fma.sv b/wally-pipelined/src/fpu/fma.sv index 9c53194d..a4105aa5 100644 --- a/wally-pipelined/src/fpu/fma.sv +++ b/wally-pipelined/src/fpu/fma.sv @@ -23,6 +23,7 @@ /////////////////////////////////////////// `include "wally-config.vh" +// `include "../../../config/rv64icfd/wally-config.vh" module fma( input logic clk, @@ -33,11 +34,11 @@ module fma( input logic [2:0] FOpCtrlM, FOpCtrlE, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y) input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude input logic XSgnE, YSgnE, ZSgnE, - input logic [10:0] XExpE, YExpE, ZExpE, - input logic [51:0] XFracE, YFracE, ZFracE, + input logic [`NE-1:0] XExpE, YExpE, ZExpE, + input logic [`NF-1:0] XFracE, YFracE, ZFracE, input logic XSgnM, YSgnM, ZSgnM, - input logic [10:0] XExpM, YExpM, ZExpM, - input logic [51:0] XFracM, YFracM, ZFracM, + input logic [`NE-1:0] XExpM, YExpM, ZExpM, + input logic [`NF-1:0] XFracM, YFracM, ZFracM, input logic XAssumed1E, YAssumed1E, ZAssumed1E, input logic XDenormE, YDenormE, ZDenormE, input logic XZeroE, YZeroE, ZZeroE, @@ -46,13 +47,13 @@ module fma( input logic XZeroM, YZeroM, ZZeroM, input logic XInfM, YInfM, ZInfM, input logic [10:0] BiasE, - output logic [63:0] FMAResM, + output logic [`FLEN-1:0] FMAResM, output logic [4:0] FMAFlgM); - logic [105:0] ProdManE, ProdManM; - logic [161:0] AlignedAddendE, AlignedAddendM; - logic [12:0] ProdExpE, ProdExpM; + logic [2*`NF+1:0] ProdManE, ProdManM; + logic [3*`NF+5:0] AlignedAddendE, AlignedAddendM; + logic [`NE+1:0] ProdExpE, ProdExpM; logic AddendStickyE, AddendStickyM; logic KillProdE, KillProdM; @@ -128,7 +129,7 @@ module fma1( assign AlignCnt = ProdExpE - ZExpE - ZDenormE; // Defualt Addition without shifting - // | 55'b0 | 106'b(product) | 2'b0 | + // | 54'b0 | 106'b(product) | 2'b0 | // |1'b0| addnend | // the 1'b0 before the added is because the product's mantissa has two bits before the binary point (xx.xxxxxxxxxx...) @@ -140,7 +141,7 @@ module fma1( // | 54'b0 | 106'b(product) | 2'b0 | // | addnend | - if ($signed(AlignCnt) <= /*$signed(-13'd56)*/-(`NF+4)) begin + if ($signed(AlignCnt) <= $signed(-(`NF+4))) begin KillProdE = 1; ZManShifted = ZManPreShifted;//{107'b0, {~ZAssumed1E, ZFrac}, 54'b0}; AddendStickyE = ~(XZeroE|YZeroE); @@ -149,19 +150,19 @@ module fma1( // | 54'b0 | 106'b(product) | 2'b0 | // | addnend | - end else if($signed(AlignCnt) <= 0) begin + end else if($signed(AlignCnt) <= $signed(0)) begin KillProdE = 0; ZManShifted = ZManPreShifted << -AlignCnt; - AddendStickyE = |(ZManShifted[51:0]); + AddendStickyE = |(ZManShifted[`NF-1:0]); // If the Addend is shifted right (positive AlignCnt) // | 54'b0 | 106'b(product) | 2'b0 | // | addnend | - end else if ($signed(AlignCnt)<=(2*`NF+2)) begin + end else if ($signed(AlignCnt)<=$signed(2*`NF+1)) begin KillProdE = 0; ZManShifted = ZManPreShifted >> AlignCnt; - AddendStickyE = |(ZManShifted[51:0]); + AddendStickyE = |(ZManShifted[`NF-1:0]); // If the addend is too small to effect the addition // - The addend has to shift two past the end of the addend to be considered too small @@ -176,47 +177,47 @@ module fma1( end end - assign AlignedAddendE = ZManShifted[(4*`NF+5):`NF]; + assign AlignedAddendE = ZManShifted[4*`NF+5:`NF]; endmodule module fma2( input logic XSgnM, YSgnM, ZSgnM, - input logic [10:0] XExpM, YExpM, ZExpM, - input logic [51:0] XFracM, YFracM, ZFracM, + input logic [`NE-1:0] XExpM, YExpM, ZExpM, + input logic [`NF-1:0] XFracM, YFracM, ZFracM, input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude input logic [2:0] FOpCtrlM, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y) input logic FmtM, // precision 1 = double 0 = single - input logic [105:0] ProdManM, // 1.X frac * 1.Y frac - input logic [161:0] AlignedAddendM, // Z aligned for addition - input logic [12:0] ProdExpM, // X exponent + Y exponent - bias + input logic [2*`NF+1:0] ProdManM, // 1.X frac * 1.Y frac + input logic [3*`NF+5:0] AlignedAddendM, // Z aligned for addition + input logic [`NE+1:0] ProdExpM, // X exponent + Y exponent - bias input logic AddendStickyM, // sticky bit that is calculated during alignment input logic KillProdM, // set the product to zero before addition if the product is too small to matter input logic XZeroM, YZeroM, ZZeroM, // inputs are zero input logic XInfM, YInfM, ZInfM, // inputs are infinity input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN input logic XSNaNM, YSNaNM, ZSNaNM, // inputs are signaling NaNs - output logic [63:0] FMAResM, // FMA final result + output logic [`FLEN-1:0] FMAResM, // FMA final result output logic [4:0] FMAFlgM); // FMA flags {invalid, divide by zero, overflow, underflow, inexact} - logic [51:0] ResultFrac; // Result fraction - logic [10:0] ResultExp; // Result exponent + logic [`NF-1:0] ResultFrac; // Result fraction + logic [`NE-1:0] ResultExp; // Result exponent logic ResultSgn; // Result sign logic PSgn; // product sign - logic [105:0] ProdMan2; // product being added - logic [162:0] AlignedAddend2; // possibly inverted aligned Z - logic [161:0] Sum; // positive sum - logic [162:0] PreSum; // possibly negitive sum - logic [12:0] SumExp; // exponent of the normalized sum - logic [12:0] SumExpTmp; // exponent of the normalized sum not taking into account denormal or zero results - logic [12:0] SumExpTmpMinus1; // SumExpTmp-1 - logic [12:0] FullResultExp; // ResultExp with bits to determine sign and overflow - logic [54:0] NormSum; // normalized sum - logic [161:0] SumShifted; // sum shifted for normalization - logic [8:0] NormCnt; // output of the leading zero detector + logic [2*`NF+1:0] ProdMan2; // product being added + logic [3*`NF+6:0] AlignedAddend2; // possibly inverted aligned Z + logic [3*`NF+5:0] Sum; // positive sum + logic [3*`NF+6:0] PreSum; // possibly negitive sum + logic [`NE+1:0] SumExp; // exponent of the normalized sum + logic [`NE+1:0] SumExpTmp; // exponent of the normalized sum not taking into account denormal or zero results + logic [`NE+1:0] SumExpTmpMinus1; // SumExpTmp-1 + logic [`NE+1:0] FullResultExp; // ResultExp with bits to determine sign and overflow + logic [`NF+2:0] NormSum; // normalized sum + logic [3*`NF+5:0] SumShifted; // sum shifted for normalization + logic [8:0] NormCnt; // output of the leading zero detector //***change this later logic NormSumSticky; // sticky bit calulated from the normalized sum logic SumZero; // is the sum zero logic NegSum; // is the sum negitive @@ -226,18 +227,18 @@ module fma2( logic Plus1, Minus1, CalcPlus1, CalcMinus1; // do you add or subtract one for rounding logic UfPlus1, UfCalcPlus1; // do you add one (for determining underflow flag) logic Invalid,Underflow,Overflow,Inexact; // flags - logic [8:0] DenormShift; // right shift if the result is denormalized + logic [8:0] DenormShift; // right shift if the result is denormalized //***change this later logic SubBySmallNum; // was there supposed to be a subtraction by a small number - logic [63:0] Addend; // value to add (Z or zero) + logic [`FLEN-1:0] Addend; // value to add (Z or zero) logic ZeroSgn; // the result's sign if the sum is zero logic ResultSgnTmp; // the result's sign assuming the result is not zero logic Guard, Round, LSBNormSum; // bits needed to determine rounding logic UfGuard, UfRound, UfLSBNormSum; // bits needed to determine rounding for underflow flag - logic [12:0] MaxExp; // maximum value of the exponent - logic [12:0] FracLen; // length of the fraction + logic [`NE+1:0] MaxExp; // maximum value of the exponent + logic [`NE+1:0] FracLen; // length of the fraction logic SigNaN; // is an input a signaling NaN logic UnderflowFlag; // Underflow singal used in FMAFlgM (used to avoid a circular depencency) - logic [63:0] XNaNResult, YNaNResult, ZNaNResult, InvalidResult, OverflowResult, KillProdResult, UnderflowResult; // possible results + logic [`FLEN-1:0] XNaNResult, YNaNResult, ZNaNResult, InvalidResult, OverflowResult, KillProdResult, UnderflowResult; // possible results @@ -259,17 +260,17 @@ module fma2( // Choose an inverted or non-inverted addend - the one is added later assign AlignedAddend2 = InvZ ? ~{1'b0, AlignedAddendM} : {1'b0, AlignedAddendM}; // Kill the product if the product is too small to effect the addition (determined in fma1.sv) - assign ProdMan2 = KillProdM ? 106'b0 : ProdManM; + assign ProdMan2 = KillProdM ? 0 : ProdManM; // Do the addition // - add one to negate if the added was inverted // - the 2 extra bits at the begining and end are needed for rounding - assign PreSum = AlignedAddend2 + {55'b0, ProdMan2, 2'b0} + {162'b0, InvZ}; + assign PreSum = AlignedAddend2 + {ProdMan2, 2'b0} + InvZ; // Is the sum negitive - assign NegSum = PreSum[162]; + assign NegSum = PreSum[3*`NF+6]; // If the sum is negitive, negate the sum. - assign Sum = NegSum ? -PreSum[161:0] : PreSum[161:0]; + assign Sum = NegSum ? -PreSum[3*`NF+5:0] : PreSum[3*`NF+5:0]; @@ -284,7 +285,7 @@ module fma2( logic [8:0] i; always_comb begin i = 0; - while (~Sum[161-i] && $unsigned(i) <= $unsigned(9'd161)) i = i+1; // search for leading one + while (~Sum[3*`NF+5-i] && $unsigned(i) <= $unsigned(3*`NF+5)) i = i+1; // search for leading one NormCnt = i+1; // compute shift count end @@ -306,26 +307,26 @@ module fma2( assign SumZero = ~(|Sum); // determine the length of the fraction based on precision - assign FracLen = FmtM ? 13'd52 : 13'd23; + assign FracLen = FmtM ? `NF : 13'd23; // Determine if the result is denormal - assign SumExpTmp = KillProdM ? {2'b0, ZExpM} : ProdExpM + -({4'b0, NormCnt} - 13'd56); + assign SumExpTmp = KillProdM ? {2'b0, ZExpM} : ProdExpM + -({4'b0, NormCnt} - (`NF+4)); assign ResultDenorm = $signed(SumExpTmp)<=0 & ($signed(SumExpTmp)>=$signed(-FracLen)) & ~SumZero; // Determine the shift needed for denormal results assign SumExpTmpMinus1 = SumExpTmp-1; - assign DenormShift = ResultDenorm ? SumExpTmpMinus1[8:0] : 9'b0; + assign DenormShift = ResultDenorm ? SumExpTmpMinus1[8:0] : 0; //*** change this when changing the size of DenormShift also change to an and opperation // Normalize the sum - assign SumShifted = SumZero ? 162'b0 : Sum << NormCnt+DenormShift; - assign NormSum = SumShifted[161:107]; + assign SumShifted = SumZero ? 0 : Sum << NormCnt+DenormShift; //*** fix mux's with constants in them + assign NormSum = SumShifted[3*`NF+5:2*`NF+3]; // Calculate the sticky bit - assign NormSumSticky = FmtM ? (|SumShifted[107:0]) : (|SumShifted[136:0]); + assign NormSumSticky = FmtM ? (|SumShifted[2*`NF+3:0]) : (|SumShifted[136:0]); assign Sticky = AddendStickyM | NormSumSticky; // Determine sum's exponent - assign SumExp = SumZero ? 13'b0 : - ResultDenorm ? 13'b0 : + assign SumExp = SumZero ? 0 : //***again fix mux + ResultDenorm ? 0 : SumExpTmp; @@ -412,14 +413,14 @@ module fma2( assign Minus1 = CalcMinus1 & (Sticky | UfGuard | Guard | Round); // Compute rounded result - logic [64:0] RoundAdd; - logic [51:0] NormSumTruncated; - assign RoundAdd = FmtM ? Minus1 ? {65{1'b1}} : {64'b0, Plus1} : + logic [`FLEN:0] RoundAdd; //*** move this up + logic [`NF-1:0] NormSumTruncated; + assign RoundAdd = FmtM ? Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, Plus1} : Minus1 ? {{36{1'b1}}, 29'b0} : {35'b0, Plus1, 29'b0}; - assign NormSumTruncated = FmtM ? NormSum[54:3] : {NormSum[54:32], 29'b0}; + assign NormSumTruncated = FmtM ? NormSum[`NF+2:3] : {NormSum[54:32], 29'b0}; assign {FullResultExp, ResultFrac} = {SumExp, NormSumTruncated} + RoundAdd; - assign ResultExp = FullResultExp[10:0]; + assign ResultExp = FullResultExp[`NE-1:0]; @@ -457,18 +458,18 @@ module fma2( // 1) any input is a signaling NaN // 2) Inf - Inf (unless x or y is NaN) // 3) 0 * Inf - assign MaxExp = FmtM ? 13'd2047 : 13'd255; + assign MaxExp = FmtM ? {`NE{1'b1}} : 13'd255; assign SigNaN = XSNaNM | YSNaNM | ZSNaNM; assign Invalid = SigNaN | ((XInfM || YInfM) & ZInfM & (PSgn ^ ZSgnM) & ~XNaNM & ~YNaNM) | (XZeroM & YInfM) | (YZeroM & XInfM); // Set Overflow flag if the number is too big to be represented // - Don't set the overflow flag if an overflowed result isn't outputed - assign Overflow = FullResultExp >= MaxExp & ~FullResultExp[12]&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM); + assign Overflow = FullResultExp >= MaxExp & ~FullResultExp[`NE+1]&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM); // Set Underflow flag if the number is too small to be represented in normal numbers // - Don't set the underflow flag if the result is exact - assign Underflow = (SumExp[12] | ((SumExp == 0) & (Round|Guard|Sticky|UfGuard)))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM); - assign UnderflowFlag = (FullResultExp[12] | ((FullResultExp == 0) | ((FullResultExp == 1) & (SumExp == 0) & ~(UfPlus1&UfLSBNormSum)))&(Round|Guard|Sticky))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM); + assign Underflow = (SumExp[`NE+1] | ((SumExp == 0) & (Round|Guard|Sticky|UfGuard)))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM); + assign UnderflowFlag = (FullResultExp[`NE+1] | ((FullResultExp == 0) | ((FullResultExp == 1) & (SumExp == 0) & ~(UfPlus1&UfLSBNormSum)))&(Round|Guard|Sticky))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM); // Set Inexact flag if the result is diffrent from what would be outputed given infinite precision // - Don't set the underflow flag if an underflowed result isn't outputed assign Inexact = (Sticky|UfGuard|Overflow|Guard|Round|Underflow)&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM); @@ -489,23 +490,23 @@ module fma2( /////////////////////////////////////////////////////////////////////////////// // Select the result /////////////////////////////////////////////////////////////////////////////// - assign XNaNResult = FmtM ? {XSgnM, XExpM, 1'b1, XFracM[50:0]} : {{32{1'b1}}, XSgnM, XExpM[7:0], 1'b1, XFracM[50:29]}; - assign YNaNResult = FmtM ? {YSgnM, YExpM, 1'b1, YFracM[50:0]} : {{32{1'b1}}, YSgnM, YExpM[7:0], 1'b1, YFracM[50:29]}; - assign ZNaNResult = FmtM ? {ZSgnM, ZExpM, 1'b1, ZFracM[50:0]} : {{32{1'b1}}, ZSgnM, ZExpM[7:0], 1'b1, ZFracM[50:29]}; - assign OverflowResult = FmtM ? ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, 11'h7fe, {52{1'b1}}} : - {ResultSgn, 11'h7ff, 52'b0} : + assign XNaNResult = FmtM ? {XSgnM, XExpM, 1'b1, XFracM[`NF-2:0]} : {{32{1'b1}}, XSgnM, XExpM[7:0], 1'b1, XFracM[50:29]}; + assign YNaNResult = FmtM ? {YSgnM, YExpM, 1'b1, YFracM[`NF-2:0]} : {{32{1'b1}}, YSgnM, YExpM[7:0], 1'b1, YFracM[50:29]}; + assign ZNaNResult = FmtM ? {ZSgnM, ZExpM, 1'b1, ZFracM[`NF-2:0]} : {{32{1'b1}}, ZSgnM, ZExpM[7:0], 1'b1, ZFracM[50:29]}; + assign OverflowResult = FmtM ? ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : + {ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}} : ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{32{1'b1}}, ResultSgn, 8'hfe, {23{1'b1}}} : {{32{1'b1}}, ResultSgn, 8'hff, 23'b0}; - assign InvalidResult = FmtM ? {ResultSgn, 11'h7ff, 1'b1, 51'b0} : {{32{1'b1}}, ResultSgn, 8'hff, 1'b1, 22'b0}; - assign KillProdResult = FmtM ? {ResultSgn, {ZExpM, ZFracM} - {62'b0, (Minus1&AddendStickyM)}} + {62'b0, (Plus1&AddendStickyM)} : {{32{1'b1}}, ResultSgn, {ZExpM[7:0], ZFracM[51:29]} - {30'b0, (Minus1&AddendStickyM)} + {30'b0, (Plus1&AddendStickyM)}}; - assign UnderflowResult = FmtM ? {ResultSgn, 63'b0} + {63'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))} : {{32{1'b1}}, {ResultSgn, 31'b0} + {31'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))}}; + assign InvalidResult = FmtM ? {ResultSgn, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{32{1'b1}}, ResultSgn, 8'hff, 1'b1, 22'b0}; + assign KillProdResult = FmtM ? {ResultSgn, {ZExpM, ZFracM} - (Minus1&AddendStickyM) + (Plus1&AddendStickyM)} : {{32{1'b1}}, ResultSgn, {ZExpM[7:0], ZFracM[51:29]} - {30'b0, (Minus1&AddendStickyM)} + {30'b0, (Plus1&AddendStickyM)}}; + assign UnderflowResult = FmtM ? {ResultSgn, {`FLEN-1{1'b0}}} + (CalcPlus1&(AddendStickyM|FrmM[1])) : {{32{1'b1}}, {ResultSgn, 31'b0} + {31'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))}}; assign FMAResM = XNaNM ? XNaNResult : YNaNM ? YNaNResult : ZNaNM ? ZNaNResult : Invalid ? InvalidResult : // has to be before inf XInfM ? FmtM ? {PSgn, XExpM, XFracM} : {{32{1'b1}}, PSgn, XExpM[7:0], XFracM[51:29]} : - XInfM ? FmtM ? {PSgn, YExpM, YFracM} : {{32{1'b1}}, PSgn, YExpM[7:0], YFracM[51:29]} : - XInfM ? FmtM ? {ZSgnM, ZExpM, ZFracM} : {{32{1'b1}}, ZSgnM, ZExpM[7:0], ZFracM[51:29]} : + YInfM ? FmtM ? {PSgn, YExpM, YFracM} : {{32{1'b1}}, PSgn, YExpM[7:0], YFracM[51:29]} : + ZInfM ? FmtM ? {ZSgnM, ZExpM, ZFracM} : {{32{1'b1}}, ZSgnM, ZExpM[7:0], ZFracM[51:29]} : Overflow ? OverflowResult : KillProdM ? KillProdResult : // has to be after Underflow Underflow & ~ResultDenorm ? UnderflowResult : diff --git a/wally-pipelined/src/fpu/fpu.sv b/wally-pipelined/src/fpu/fpu.sv index f283f5e4..344500d4 100755 --- a/wally-pipelined/src/fpu/fpu.sv +++ b/wally-pipelined/src/fpu/fpu.sv @@ -204,7 +204,7 @@ module fpu ( fpdiv fdivsqrt (.op1(DivInput1E), .op2(DivInput2E), .done(FDivSqrtDoneE), .rm(FrmE[1:0]), .op_type(FOpCtrlE[0]), .P(~FmtE), .FDivBusyE, .HoldInputs, .OvEn(1'b1), .UnEn(1'b1), - .start(FDivStartE), .reset, .clk(~clk), .AS_Result(FDivResultM), .Flags(FDivSqrtFlgM)); + .start(FDivStartE), .reset, .clk(fpdivClk), .AS_Result(FDivResultM), .Flags(FDivSqrtFlgM)); // .DivOpType(FOpCtrlE[0]), .clk(fpdivClk), .FmtE(~FmtE), .DivInput1E, .DivInput2E, // .FrmE, .DivOvEn(1'b1), .DivUnEn(1'b1), .FDivStartE, .FDivResultM, .FDivSqrtFlgM, diff --git a/wally-pipelined/src/fpu/fsm.sv b/wally-pipelined/src/fpu/fsm.sv index 8991fb71..434f56e3 100755 --- a/wally-pipelined/src/fpu/fsm.sv +++ b/wally-pipelined/src/fpu/fsm.sv @@ -476,7 +476,7 @@ module fsm (done, load_rega, load_regb, load_regc, sel_muxa = 3'b011; sel_muxb = 3'b110; sel_muxr = 1'b1; - NEXT_STATE = S27; + NEXT_STATE = S26; end S26: // done begin diff --git a/wally-pipelined/src/fpu/unpacking.sv b/wally-pipelined/src/fpu/unpacking.sv index 2280f380..1133a403 100644 --- a/wally-pipelined/src/fpu/unpacking.sv +++ b/wally-pipelined/src/fpu/unpacking.sv @@ -56,9 +56,9 @@ module unpacking ( assign YNaNE = YExpMaxE & ~YFracZero; assign ZNaNE = ZExpMaxE & ~ZFracZero; - assign XSNaNE = XNaNE&~XExpE[51]; - assign YSNaNE = YNaNE&~YExpE[51]; - assign ZSNaNE = ZNaNE&~ZExpE[51]; + assign XSNaNE = XNaNE&~XFracE[51]; + assign YSNaNE = YNaNE&~YFracE[51]; + assign ZSNaNE = ZNaNE&~ZFracE[51]; assign XDenormE = XExpZero & ~XFracZero; assign YDenormE = YExpZero & ~YFracZero; diff --git a/wally-pipelined/src/uncore/uncore.sv b/wally-pipelined/src/uncore/uncore.sv index e5f5fdd7..78a89b02 100644 --- a/wally-pipelined/src/uncore/uncore.sv +++ b/wally-pipelined/src/uncore/uncore.sv @@ -87,31 +87,32 @@ module uncore ( generate // tightly integrated memory dtim #(.BASE(`TIM_BASE), .RANGE(`TIM_RANGE)) dtim (.*); - //if (`BOOTTIM_SUPPORTED) *** restore when naming is figured out + if (`BOOTTIM_SUPPORTED) begin : bootdtim dtim #(.BASE(`BOOTTIM_BASE), .RANGE(`BOOTTIM_RANGE)) bootdtim(.HSELTim(HSELBootTim), .HREADTim(HREADBootTim), .HRESPTim(HRESPBootTim), .HREADYTim(HREADYBootTim), .*); + end // memory-mapped I/O peripherals - if (`CLINT_SUPPORTED == 1) + if (`CLINT_SUPPORTED == 1) begin : clint clint clint(.HADDR(HADDR[15:0]), .MTIME(MTIME_CLINT), .MTIMECMP(MTIMECMP_CLINT), .*); - else begin + end else begin : clint assign MTIME_CLINT = 0; assign MTIMECMP_CLINT = 0; assign TimerIntM = 0; assign SwIntM = 0; end - if (`PLIC_SUPPORTED == 1) + if (`PLIC_SUPPORTED == 1) begin : plic plic plic(.HADDR(HADDR[27:0]), .*); - else begin + end else begin : plic assign ExtIntM = 0; end - if (`GPIO_SUPPORTED == 1) + if (`GPIO_SUPPORTED == 1) begin : gpio gpio gpio(.HADDR(HADDR[7:0]), .*); - else begin + end else begin : gpio assign GPIOPinsOut = 0; assign GPIOPinsEn = 0; assign GPIOIntr = 0; end - if (`UART_SUPPORTED == 1) + if (`UART_SUPPORTED == 1) begin : uart uart uart(.HADDR(HADDR[2:0]), .TXRDYb(), .RXRDYb(), .INTR(UARTIntr), .SIN(UARTSin), .SOUT(UARTSout), .DSRb(1'b1), .DCDb(1'b1), .CTSb(1'b0), .RIb(1'b1), .RTSb(), .DTRb(), .OUT1b(), .OUT2b(), .*); - else begin + end else begin : uart assign UARTSout = 0; assign UARTIntr = 0; end endgenerate diff --git a/wally-pipelined/testbench/testbench-imperas.sv b/wally-pipelined/testbench/testbench-imperas.sv index 079ac6b1..20df6e23 100644 --- a/wally-pipelined/testbench/testbench-imperas.sv +++ b/wally-pipelined/testbench/testbench-imperas.sv @@ -121,7 +121,7 @@ string tests32f[] = '{ "rv64d/I-FLD-01", "2420", "rv64d/I-FMV-X-D-01", "2000", "rv64d/I-FMV-D-X-01", "2000", - // "rv64d/I-FDIV-D-01", "2000", + "rv64d/I-FDIV-D-01", "2000", "rv64d/I-FCVT-D-L-01", "2000", "rv64d/I-FCVT-D-LU-01", "2000", "rv64d/I-FCVT-D-S-01", "2000", @@ -147,7 +147,7 @@ string tests32f[] = '{ "rv64d/I-FSGNJ-D-01", "2000", "rv64d/I-FSGNJN-D-01", "2000", "rv64d/I-FSGNJX-D-01", "2000", - // "rv64d/I-FSQRT-D-01", "2000", + "rv64d/I-FSQRT-D-01", "2000", "rv64d/I-FSUB-D-01", "2000" }; @@ -753,6 +753,7 @@ module riscvassertions(); assert (`ICACHE_NUMWAYS == 1 || `MEM_ICACHE == 0) else $error("Multiple Instruction Cache ways not yet implemented"); assert (2**$clog2(`ITLB_ENTRIES) == `ITLB_ENTRIES) else $error("ITLB_ENTRIES must be a power of 2"); assert (2**$clog2(`DTLB_ENTRIES) == `DTLB_ENTRIES) else $error("DTLB_ENTRIES must be a power of 2"); + assert (`TIM_RANGE >= 56'h07FFFFFF) else $error("Some regression tests will fail if TIM_RANGE is less than 56'h07FFFFFF"); end endmodule diff --git a/wally-pipelined/testbench/testbench-linux.sv b/wally-pipelined/testbench/testbench-linux.sv index 7e688413..6c44ac6f 100644 --- a/wally-pipelined/testbench/testbench-linux.sv +++ b/wally-pipelined/testbench/testbench-linux.sv @@ -27,7 +27,7 @@ module testbench(); - parameter waveOnICount = `BUSYBEAR*140000 + `BUILDROOT*0900000; // # of instructions at which to turn on waves in graphical sim + parameter waveOnICount = `BUSYBEAR*140000 + `BUILDROOT*3160000; // # of instructions at which to turn on waves in graphical sim parameter stopICount = `BUSYBEAR*143898 + `BUILDROOT*0000000; // # instructions at which to halt sim completely (set to 0 to let it run as far as it can) /////////////////////////////////////////////////////////////////////////////// @@ -140,7 +140,7 @@ module testbench(); end // initial loading of memories initial begin - $readmemh({`LINUX_TEST_VECTORS,"bootmem.txt"}, dut.uncore.bootdtim.RAM, 'h1000 >> 3); + $readmemh({`LINUX_TEST_VECTORS,"bootmem.txt"}, dut.uncore.bootdtim.bootdtim.RAM, 'h1000 >> 3); $readmemh({`LINUX_TEST_VECTORS,"ram.txt"}, dut.uncore.dtim.RAM); $readmemb(`TWO_BIT_PRELOAD, dut.hart.ifu.bpred.bpred.Predictor.DirPredictor.PHT.memory); $readmemb(`BTB_PRELOAD, dut.hart.ifu.bpred.bpred.TargetPredictor.memory.memory); @@ -222,24 +222,24 @@ module testbench(); `SCAN_PC(data_file_PCF, scan_file_PCF, PCtextF, PCtextF2, InstrFExpected, PCFexpected); `SCAN_PC(data_file_PCD, scan_file_PCD, PCtextD, PCtextD2, InstrDExpected, PCDexpected); - // NOP out certain instructions - if(dut.hart.ifu.PCD===PCDexpected) begin - if((dut.hart.ifu.PCD == 32'h80001dc6) || // for now, NOP out any stores to PLIC - (dut.hart.ifu.PCD == 32'h80001de0) || - (dut.hart.ifu.PCD == 32'h80001de2)) begin - $display("warning: NOPing out %s at PCD=%0x, instr %0d, time %0t", PCtextD, dut.hart.ifu.PCD, instrs, $time); - force InstrDExpected = 32'b0010011; - force dut.hart.ifu.InstrRawD = 32'b0010011; - while (clk != 0) #1; - while (clk != 1) #1; - release dut.hart.ifu.InstrRawD; - release InstrDExpected; - warningCount += 1; - forcedInstr = 1; - end else begin - forcedInstr = 0; - end - end + // NOP out certain instructions <-- commented out because no duh hardcoded addressses break easily + //if(dut.hart.ifu.PCD===PCDexpected) begin + // if((dut.hart.ifu.PCD == 32'h80001dc6) || // for now, NOP out any stores to PLIC + // (dut.hart.ifu.PCD == 32'h80001de0) || + // (dut.hart.ifu.PCD == 32'h80001de2)) begin + // $display("warning: NOPing out %s at PCD=%0x, instr %0d, time %0t", PCtextD, dut.hart.ifu.PCD, instrs, $time); + // force InstrDExpected = 32'b0010011; + // force dut.hart.ifu.InstrRawD = 32'b0010011; + // while (clk != 0) #1; + // while (clk != 1) #1; + // release dut.hart.ifu.InstrRawD; + // release InstrDExpected; + // warningCount += 1; + // forcedInstr = 1; + // end else begin + // forcedInstr = 0; + // end + //end // Increment instruction count if (instrs <= 10 || (instrs <= 100 && instrs % 10 == 0) || @@ -260,7 +260,7 @@ module testbench(); // Check if PCD is going to be flushed due to a branch or jump if (`BPRED_ENABLED) begin - PCDwrong = dut.hart.hzu.FlushD || (PCtextE.substr(0,3) == "mret"); //Old version: dut.hart.ifu.bpred.bpred.BPPredWrongE; <-- This old version failed to account for MRET. + PCDwrong = dut.hart.hzu.FlushD || (PCtextE.substr(0,3) == "mret") || dut.hart.priv.InstrPageFaultF || dut.hart.priv.InstrPageFaultD || dut.hart.priv.InstrPageFaultE || dut.hart.priv.InstrPageFaultM; end // Check PCD, InstrD @@ -283,10 +283,10 @@ module testbench(); scan_file_memR = $fscanf(data_file_memR, "%x\n", readAdrExpected); scan_file_memR = $fscanf(data_file_memR, "%x\n", readDataExpected); // Next force a timer interrupt (*** this may later need generalizing) - force dut.uncore.genblk1.clint.MTIME = dut.uncore.genblk1.clint.MTIMECMP + 1; + force dut.uncore.clint.clint.MTIME = dut.uncore.clint.clint.MTIMECMP + 1; while (clk != 0) #1; while (clk != 1) #1; - release dut.uncore.genblk1.clint.MTIME; + release dut.uncore.clint.clint.MTIME; end end end @@ -526,6 +526,7 @@ module testbench(); string MTVALstring = "MTVAL"; string SEPCstring = "SEPC"; string SCAUSEstring = "SCAUSE"; + string STVALstring = "STVAL"; string SSTATUSstring = "SSTATUS"; logic [63:0] expectedCSR; @@ -556,6 +557,7 @@ module testbench(); if (``CSR``name == MTVALstring) #3; \ if (``CSR``name == SEPCstring) #1; \ if (``CSR``name == SCAUSEstring) #2; \ + if (``CSR``name == STVALstring) #3; \ if (``CSR``name == SSTATUSstring) #3; \ scan_file_csr = $fscanf(data_file_csr, "%s\n", expectedCSRname); \ scan_file_csr = $fscanf(data_file_csr, "%x\n", expectedCSR); \