From b1c2a77fc2eefb25c2a9a19316a2bd8f73d8b598 Mon Sep 17 00:00:00 2001 From: bbracker Date: Wed, 22 Sep 2021 12:31:10 -0400 Subject: [PATCH 01/36] update setup scripts to new testvector files --- wally-pipelined/linux-testgen/linux-testvectors/tvLinker.sh | 6 +----- .../linux-testgen/linux-testvectors/tvUnlinker.sh | 6 +----- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/wally-pipelined/linux-testgen/linux-testvectors/tvLinker.sh b/wally-pipelined/linux-testgen/linux-testvectors/tvLinker.sh index 11c34645f..f694b112a 100755 --- a/wally-pipelined/linux-testgen/linux-testvectors/tvLinker.sh +++ b/wally-pipelined/linux-testgen/linux-testvectors/tvLinker.sh @@ -1,9 +1,5 @@ echo "Warning: this script will only work if your repo is on Tera" -ln -s /courses/e190ax/buildroot_boot/parsedCSRs.txt parsedCSRs.txt -ln -s /courses/e190ax/buildroot_boot/parsedMemRead.txt parsedMemRead.txt -ln -s /courses/e190ax/buildroot_boot/parsedMemWrite.txt parsedMemWrite.txt -ln -s /courses/e190ax/buildroot_boot/parsedPC.txt parsedPC.txt -ln -s /courses/e190ax/buildroot_boot/parsedRegs.txt parsedRegs.txt +ln -s /courses/e190ax/buildroot_boot/all.txt all.txt ln -s /courses/e190ax/buildroot_boot/bootmem.txt bootmem.txt ln -s /courses/e190ax/buildroot_boot/ram.txt ram.txt echo "Done!" diff --git a/wally-pipelined/linux-testgen/linux-testvectors/tvUnlinker.sh b/wally-pipelined/linux-testgen/linux-testvectors/tvUnlinker.sh index 183d6a6ec..bded8a16e 100755 --- a/wally-pipelined/linux-testgen/linux-testvectors/tvUnlinker.sh +++ b/wally-pipelined/linux-testgen/linux-testvectors/tvUnlinker.sh @@ -1,10 +1,6 @@ # This could be nice to use if you want to mess with the testvectors # without corrupting the stable copies on Tera. -unlink parsedCSRs.txt -unlink parsedMemRead.txt -unlink parsedMemWrite.txt -unlink parsedPC.txt -unlink parsedRegs.txt +unlink all.txt unlink bootmem.txt unlink ram.txt echo "Done!" From 441759b81c79f2494bef989f88b3114dbac54d12 Mon Sep 17 00:00:00 2001 From: bbracker Date: Wed, 22 Sep 2021 12:33:11 -0400 Subject: [PATCH 02/36] switch testbench-linux's interrupts from xcause to mip and improve warning messages --- wally-pipelined/testbench/testbench-linux.sv | 321 +++++-------------- 1 file changed, 80 insertions(+), 241 deletions(-) diff --git a/wally-pipelined/testbench/testbench-linux.sv b/wally-pipelined/testbench/testbench-linux.sv index 64bd24e70..a09c0838f 100644 --- a/wally-pipelined/testbench/testbench-linux.sv +++ b/wally-pipelined/testbench/testbench-linux.sv @@ -30,15 +30,13 @@ module testbench(); - parameter waveOnICount = `BUSYBEAR*140000 + `BUILDROOT*6779000; // # of instructions at which to turn on waves in graphical sim - + parameter waveOnICount = `BUSYBEAR*140000 + `BUILDROOT*8700000; // # of instructions at which to turn on waves in graphical sim string ProgramAddrMapFile, ProgramLabelMapFile; /////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////// DUT ///////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// logic clk, reset; - logic [`AHBW-1:0] readDataExpected; logic [31:0] HADDR; logic [`AHBW-1:0] HWDATA; @@ -51,7 +49,6 @@ module testbench(); logic HCLK, HRESETn; logic [`AHBW-1:0] HRDATAEXT; logic HREADYEXT, HRESPEXT; - logic [31:0] GPIOPinsIn; logic [31:0] GPIOPinsOut, GPIOPinsEn; logic UARTSin, UARTSout; @@ -73,6 +70,7 @@ module testbench(); // Testbench Core integer warningCount = 0; integer errorCount = 0; + integer MIPexpected; // P, Instr Checking logic [`XLEN-1:0] PCW; integer data_file_all; @@ -85,7 +83,6 @@ module testbench(); logic checkInstrW; //integer RegAdr; - integer fault; logic TrapW; @@ -129,18 +126,29 @@ module testbench(); logic forcedInterrupt; integer NumCSRMIndex; integer NumCSRWIndex; - integer NumCSRPostWIndex; -// logic CurrentInterruptForce; + integer NumCSRPostWIndex; logic [`XLEN-1:0] InstrCountW; - // ----------- - // Error Macro - // ----------- + // ------------ + // Error Macros + // ------------ `define ERROR \ errorCount +=1; \ $display("processed %0d instructions with %0d warnings", InstrCountW, warningCount); \ $stop; + `define CSRwarn(CSR) \ + begin \ + if(`DEBUG_TRACE > 0) begin \ + $display("CSR: %s = %016x, expected = %016x", ExpectedCSRArrayW[NumCSRPostWIndex], CSR, ExpectedCSRArrayValueW[NumCSRPostWIndex]); \ + end \ + if (CSR != ExpectedCSRArrayValueW[NumCSRPostWIndex]) begin \ + $display("%tns, %d instrs: CSR %s = %016x, does not equal expected value %016x", $time, InstrCountW, ExpectedCSRArrayW[NumCSRPostWIndex], CSR, ExpectedCSRArrayValueW[NumCSRPostWIndex]); \ + if(!`DontHaltOnCSRMisMatch) fault = 1; \ + end \ + end + + initial begin data_file_all = $fopen({`LINUX_TEST_VECTORS,"all.txt"}, "r"); InstrCountW = '0; @@ -149,16 +157,12 @@ module testbench(); force dut.hart.priv.ExtIntM = 0; end -/* -----\/----- EXCLUDED -----\/----- - initial begin - CurrentInterruptForce = 1'b0; - end - -----/\----- EXCLUDED -----/\----- */ assign checkInstrM = dut.hart.ieu.InstrValidM & ~dut.hart.priv.trap.InstrPageFaultM & ~dut.hart.priv.trap.InterruptM & ~dut.hart.StallM; // trapW will already be invalid in there was an InstrPageFault in the previous instruction. assign checkInstrW = dut.hart.ieu.InstrValidW & ~dut.hart.StallW; + // Additonal W stage registers flopenrc #(`XLEN) MemAdrWReg(clk, reset, dut.hart.FlushW, ~dut.hart.StallW, dut.hart.ieu.dp.MemAdrM, MemAdrW); flopenrc #(`XLEN) WriteDataWReg(clk, reset, dut.hart.FlushW, ~dut.hart.StallW, dut.hart.WriteDataM, WriteDataW); flopenrc #(`XLEN) PCWReg(clk, reset, dut.hart.FlushW, ~dut.hart.ieu.dp.StallW, dut.hart.ifu.PCM, PCW); @@ -176,8 +180,9 @@ module testbench(); // always check PC, instruction bits if (checkInstrM) begin // read 1 line of the trace file - matchCount = $fgets(line, data_file_all); + matchCount = $fgets(line, data_file_all); if(`DEBUG_TRACE > 1) $display("Time %t, line %x", $time, line); + // extract PC, Instr matchCount = $sscanf(line, "%x %x %s", ExpectedPCM, ExpectedInstrM, textM); //$display("matchCount %d, PCM %x ExpectedInstrM %x textM %x", matchCount, ExpectedPCM, ExpectedInstrM, textM); @@ -213,21 +218,17 @@ module testbench(); RegWriteM = ExpectedTokens[MarkerIndex]; matchCount = $sscanf(ExpectedTokens[MarkerIndex+1], "%d", ExpectedRegAdrM); matchCount = $sscanf(ExpectedTokens[MarkerIndex+2], "%x", ExpectedRegValueM); - MarkerIndex += 3; - - // parse memory address, read data, and/or write data + // parse memory address, read data, and/or write data end else if(ExpectedTokens[MarkerIndex].substr(0, 2) == "Mem") begin MemOpM = ExpectedTokens[MarkerIndex]; matchCount = $sscanf(ExpectedTokens[MarkerIndex+1], "%x", ExpectedMemAdrM); matchCount = $sscanf(ExpectedTokens[MarkerIndex+2], "%x", ExpectedMemWriteDataM); matchCount = $sscanf(ExpectedTokens[MarkerIndex+3], "%x", ExpectedMemReadDataM); - MarkerIndex += 4; - - // parse CSRs, because there are 1 or more CSRs after the CSR token - // we check if the CSR token or the number of CSRs is greater than 0. - // if so then we want to parse for a CSR. + // parse CSRs, because there are 1 or more CSRs after the CSR token + // we check if the CSR token or the number of CSRs is greater than 0. + // if so then we want to parse for a CSR. end else if(ExpectedTokens[MarkerIndex] == "CSR" || NumCSRM > 0) begin if(ExpectedTokens[MarkerIndex] == "CSR") begin // all additional CSR's won't have this token. @@ -235,30 +236,13 @@ module testbench(); end matchCount = $sscanf(ExpectedTokens[MarkerIndex], "%s", ExpectedCSRArrayM[NumCSRM]); matchCount = $sscanf(ExpectedTokens[MarkerIndex+1], "%x", ExpectedCSRArrayValueM[NumCSRM]); - MarkerIndex += 2; - - // if we get an xcause with the interrupt bit set we must generate an interrupt as interrupts - // are imprecise. Forcing the trap at this time will allow wally to track what qemu does. - // the msb of xcause will be set. - // bits 1:0 select mode; 0 = user, 1 = superviser, 3 = machine - // bits 3:2 select the type of interrupt, 0 = software, 1 = timer, 2 = external - if(ExpectedCSRArrayM[NumCSRM].substr(1, 5) == "cause" && (ExpectedCSRArrayValueM[NumCSRM][`XLEN-1] == 1'b1)) begin - //what type? - ExpectedIntType = ExpectedCSRArrayValueM[NumCSRM] & 64'h0000_000C; - $display("%tns, %d instrs: CSR = %s. Forcing interrupt of cause = %x", $time, InstrCountW, ExpectedCSRArrayM[NumCSRM], ExpectedCSRArrayValueM[NumCSRM]); - forcedInterrupt = 1; - if(ExpectedIntType == 0) begin - force dut.hart.priv.SwIntM = 1'b1; - $display("Activate spoofed SwIntM"); - end else if(ExpectedIntType == 4) begin - force dut.hart.priv.TimerIntM = 1'b1; - $display("Activate spoofed TimeIntM"); - end else if(ExpectedIntType == 8) begin - force dut.hart.priv.ExtIntM = 1'b1; - $display("Activate spoofed ExtIntM"); - end else forcedInterrupt = 0; - end + // match MIP to QEMU's because interrupts are imprecise + if(ExpectedCSRArrayM[NumCSRM].substr(0, 2) == "mip") begin + $display("%tns: Updating MIP to %x",$time,ExpectedCSRArrayValueM[NumCSRM]); + MIPexpected = ExpectedCSRArrayValueM[NumCSRM]; + force dut.hart.priv.csr.genblk1.csri.MIP_REGW = MIPexpected; + end NumCSRM++; end end @@ -268,12 +252,10 @@ module testbench(); force dut.hart.ieu.dp.ReadDataM = ExpectedMemReadDataM; end if(textM.substr(0,5) == "rdtime") begin - $display("%tns, %d instrs: Overwrite MTIME_CLINT on read of MTIME in memory stage.", $time, InstrCountW); + //$display("%tns, %d instrs: Overwrite MTIME_CLINT on read of MTIME in memory stage.", $time, InstrCountW); force dut.uncore.clint.clint.MTIME = ExpectedRegValueM; - //dut.hart.ieu.dp.regf.wd3 end - - end // if (checkInstrM) + end end // step 1: register expected state into the write back stage. @@ -320,37 +302,16 @@ module testbench(); ExpectedCSRArrayValueW[NumCSRWIndex] = ExpectedCSRArrayValueM[NumCSRWIndex]; end end - // override on special conditions #1; - - + // override on special conditions if(~dut.hart.StallW) begin if(textW.substr(0,5) == "rdtime") begin - $display("%tns, %d instrs: Releasing force of MTIME_CLINT.", $time, InstrCountW); + //$display("%tns, %d instrs: Releasing force of MTIME_CLINT.", $time, InstrCountW); release dut.uncore.clint.clint.MTIME; - //release dut.hart.ieu.dp.regf.wd3; - end - + end if (ExpectedMemAdrM == 'h10000005) begin //$display("%tns, %d instrs: releasing force of ReadDataM.", $time, InstrCountW); - release dut.hart.ieu.dp.ReadDataM; - end - - // force interrupts to 0 - if (forcedInterrupt) begin - forcedInterrupt = 0; - if(ExpectedIntType == 0) begin - force dut.hart.priv.SwIntM = 1'b0; - $display("Deactivate spoofed SwIntM"); - end - else if(ExpectedIntType == 4) begin - force dut.hart.priv.TimerIntM = 1'b0; - $display("Deactivate spoofed TimeIntM"); - end - else if(ExpectedIntType == 8) begin - force dut.hart.priv.ExtIntM = 1'b0; - $display("Deactivate spoofed ExtIntM"); - end + release dut.hart.ieu.dp.ReadDataM; end end end @@ -368,197 +329,75 @@ module testbench(); // check PCW fault = 0; if(PCW != ExpectedPCW) begin - $display("PCW: %016x does not equal ExpectedPCW: %016x", PCW, ExpectedPCW); + $display("%tns, %d instrs: PCW %016x does not equal ExpectedPCW: %016x", $time, InstrCountW, PCW, ExpectedPCW); fault = 1; end - // check instruction value if(dut.hart.ifu.InstrW != ExpectedInstrW) begin - $display("InstrW: %x does not equal ExpectedInstrW: %x", dut.hart.ifu.InstrW, ExpectedInstrW); + $display("%tns, %d instrs: InstrW %x does not equal ExpectedInstrW: %x", $time, InstrCountW, dut.hart.ifu.InstrW, ExpectedInstrW); fault = 1; end - // check the number of instructions if(dut.hart.priv.csr.genblk1.counters.genblk1.INSTRET_REGW != InstrCountW) begin $display("%t, Number of instruction Retired = %d does not equal number of instructions in trace = %d", $time, dut.hart.priv.csr.genblk1.counters.genblk1.INSTRET_REGW, InstrCountW); if(!`DontHaltOnCSRMisMatch) fault = 1; end - #2; // delay 2 ns. - - if(`DEBUG_TRACE > 2) begin - $display("Reg Write Address: %02d ? expected value: %02d", dut.hart.ieu.dp.regf.a3, ExpectedRegAdrW); - $display("RF[%02d]: %016x ? expected value: %016x", ExpectedRegAdrW, dut.hart.ieu.dp.regf.rf[ExpectedRegAdrW], ExpectedRegValueW); + $display("%tns, %d instrs: Reg Write Address %02d ? expected value: %02d", $time, InstrCountW, dut.hart.ieu.dp.regf.a3, ExpectedRegAdrW); + $display("%tns, %d instrs: RF[%02d] %016x ? expected value: %016x", $time, InstrCountW, ExpectedRegAdrW, dut.hart.ieu.dp.regf.rf[ExpectedRegAdrW], ExpectedRegValueW); end - if (RegWriteW == "GPR") begin if (dut.hart.ieu.dp.regf.a3 != ExpectedRegAdrW) begin - $display("Reg Write Address: %02d does not equal expected value: %02d", dut.hart.ieu.dp.regf.a3, ExpectedRegAdrW); + $display("%tns, %d instrs: Reg Write Address %02d does not equal expected value: %02d", $time, InstrCountW, dut.hart.ieu.dp.regf.a3, ExpectedRegAdrW); + fault = 1; + end + if (dut.hart.ieu.dp.regf.rf[ExpectedRegAdrW] != ExpectedRegValueW) begin + $display("%tns, %d instrs: RF[%02d] %016x does not equal expected value: %016x", $time, InstrCountW, ExpectedRegAdrW, dut.hart.ieu.dp.regf.rf[ExpectedRegAdrW], ExpectedRegValueW); fault = 1; end - - if (dut.hart.ieu.dp.regf.rf[ExpectedRegAdrW] != ExpectedRegValueW) begin - $display("RF[%02d]: %016x does not equal expected value: %016x", ExpectedRegAdrW, dut.hart.ieu.dp.regf.rf[ExpectedRegAdrW], ExpectedRegValueW); - fault = 1; - end end if (MemOpW.substr(0,2) == "Mem") begin - if(`DEBUG_TRACE > 3) $display("\tMemAdrW: %016x ? expected: %016x", MemAdrW, ExpectedMemAdrW); - - // always check address - if (MemAdrW != ExpectedMemAdrW) begin - $display("MemAdrW: %016x does not equal expected value: %016x", MemAdrW, ExpectedMemAdrW); - fault = 1; - end - - // check read data - if(MemOpW == "MemR" || MemOpW == "MemRW") begin - if(`DEBUG_TRACE > 3) $display("\tReadDataW: %016x ? expected: %016x", dut.hart.ieu.dp.ReadDataW, ExpectedMemReadDataW); - if (dut.hart.ieu.dp.ReadDataW != ExpectedMemReadDataW) begin - $display("ReadDataW: %016x does not equal expected value: %016x", dut.hart.ieu.dp.ReadDataW, ExpectedMemReadDataW); - fault = 1; + if(`DEBUG_TRACE > 3) $display("\tMemAdrW: %016x ? expected: %016x", MemAdrW, ExpectedMemAdrW); + // always check address + if (MemAdrW != ExpectedMemAdrW) begin + $display("%tns, %d instrs: MemAdrW %016x does not equal expected value: %016x", $time, InstrCountW, MemAdrW, ExpectedMemAdrW); + fault = 1; + end + // check read data + if(MemOpW == "MemR" || MemOpW == "MemRW") begin + if(`DEBUG_TRACE > 3) $display("\tReadDataW: %016x ? expected: %016x", dut.hart.ieu.dp.ReadDataW, ExpectedMemReadDataW); + if (dut.hart.ieu.dp.ReadDataW != ExpectedMemReadDataW) begin + $display("%tns, %d instrs: ReadDataW %016x does not equal expected value: %016x", $time, InstrCountW, dut.hart.ieu.dp.ReadDataW, ExpectedMemReadDataW); + fault = 1; + end + // check write data + end else if(ExpectedTokens[MarkerIndex] == "MemW" || ExpectedTokens[MarkerIndex] == "MemRW") begin + if(`DEBUG_TRACE > 3) $display("\tWriteDataW: %016x ? expected: %016x", WriteDataW, ExpectedMemWriteDataW); + if (WriteDataW != ExpectedMemWriteDataW) begin + $display("%tns, %d instrs: WriteDataW %016x does not equal expected value: %016x", $time, InstrCountW, WriteDataW, ExpectedMemWriteDataW); + fault = 1; + end + end end - end - - // check write data - else if(ExpectedTokens[MarkerIndex] == "MemW" || ExpectedTokens[MarkerIndex] == "MemRW") begin - if(`DEBUG_TRACE > 3) $display("\tWriteDataW: %016x ? expected: %016x", WriteDataW, ExpectedMemWriteDataW); - if (WriteDataW != ExpectedMemWriteDataW) begin - $display("WriteDataW: %016x does not equal expected value: %016x", WriteDataW, ExpectedMemWriteDataW); - fault = 1; - end - end - end - // check csr - //$display("%t, about to check csr, NumCSRW = %d", $time, NumCSRW); for(NumCSRPostWIndex = 0; NumCSRPostWIndex < NumCSRW; NumCSRPostWIndex++) begin - /* -----\/----- EXCLUDED -----\/----- - if(`DEBUG_TRACE > 0) begin - $display("%t, NumCSRPostWIndex = %d, Expected CSR: %s = %016x", $time, NumCSRPostWIndex, ExpectedCSRArrayW[NumCSRPostWIndex], ExpectedCSRArrayValueW[NumCSRPostWIndex]); - end - -----/\----- EXCLUDED -----/\----- */ case(ExpectedCSRArrayW[NumCSRPostWIndex]) - "mhartid": begin - if(`DEBUG_TRACE > 0) begin - $display("CSR: %s = %016x, expected = %016x", ExpectedCSRArrayW[NumCSRPostWIndex], dut.hart.priv.csr.genblk1.csrm.MHARTID_REGW, ExpectedCSRArrayValueW[NumCSRPostWIndex]); - end - if (dut.hart.priv.csr.genblk1.csrm.MHARTID_REGW != ExpectedCSRArrayValueW[NumCSRPostWIndex]) begin - $display("%t, CSR: %s = %016x, does not equal expected value %016x", $time, ExpectedCSRArrayW[NumCSRPostWIndex], dut.hart.priv.csr.genblk1.csrm.MHARTID_REGW, ExpectedCSRArrayValueW[NumCSRPostWIndex]); - if(!`DontHaltOnCSRMisMatch) fault = 1; - end - end - "mstatus": begin - if(`DEBUG_TRACE > 0) begin - $display("CSR: %s = %016x, expected = %016x", ExpectedCSRArrayW[NumCSRPostWIndex], dut.hart.priv.csr.genblk1.csrm.MSTATUS_REGW, ExpectedCSRArrayValueW[NumCSRPostWIndex]); - end - if ((dut.hart.priv.csr.genblk1.csrm.MSTATUS_REGW) != (ExpectedCSRArrayValueW[NumCSRPostWIndex])) begin - $display("%t, CSR: %s = %016x, does not equal expected value %016x", $time, ExpectedCSRArrayW[NumCSRPostWIndex], dut.hart.priv.csr.genblk1.csrm.MSTATUS_REGW, ExpectedCSRArrayValueW[NumCSRPostWIndex]); - if(!`DontHaltOnCSRMisMatch) fault = 1; - end - end - "mtvec": begin - if(`DEBUG_TRACE > 0) begin - $display("CSR: %s = %016x, expected = %016x", ExpectedCSRArrayW[NumCSRPostWIndex], dut.hart.priv.csr.genblk1.csrm.MTVEC_REGW, ExpectedCSRArrayValueW[NumCSRPostWIndex]); - end - if (dut.hart.priv.csr.genblk1.csrm.MTVEC_REGW != ExpectedCSRArrayValueW[NumCSRPostWIndex]) begin - $display("%t, CSR: %s = %016x, does not equal expected value %016x", $time, ExpectedCSRArrayW[NumCSRPostWIndex], dut.hart.priv.csr.genblk1.csrm.MTVEC_REGW, ExpectedCSRArrayValueW[NumCSRPostWIndex]); - if(!`DontHaltOnCSRMisMatch) fault = 1; - end - end - "mip": begin - if(`DEBUG_TRACE > 0) begin - $display("CSR: %s = %016x, expected = %016x", ExpectedCSRArrayW[NumCSRPostWIndex], dut.hart.priv.csr.genblk1.csrm.MIP_REGW, ExpectedCSRArrayValueW[NumCSRPostWIndex]); - end - if (dut.hart.priv.csr.genblk1.csrm.MIP_REGW != ExpectedCSRArrayValueW[NumCSRPostWIndex]) begin - $display("%t, CSR: %s = %016x, does not equal expected value %016x", $time, ExpectedCSRArrayW[NumCSRPostWIndex], dut.hart.priv.csr.genblk1.csrm.MIP_REGW, ExpectedCSRArrayValueW[NumCSRPostWIndex]); - if(!`DontHaltOnCSRMisMatch) fault = 1; - end - end - "mie": begin - if(`DEBUG_TRACE > 0) begin - $display("CSR: %s = %016x, expected = %016x", ExpectedCSRArrayW[NumCSRPostWIndex], dut.hart.priv.csr.genblk1.csrm.MIE_REGW, ExpectedCSRArrayValueW[NumCSRPostWIndex]); - end - if (dut.hart.priv.csr.genblk1.csrm.MIE_REGW != ExpectedCSRArrayValueW[NumCSRPostWIndex]) begin - $display("%t, CSR: %s = %016x, does not equal expected value %016x", $time, ExpectedCSRArrayW[NumCSRPostWIndex], dut.hart.priv.csr.genblk1.csrm.MIE_REGW, ExpectedCSRArrayValueW[NumCSRPostWIndex]); - if(!`DontHaltOnCSRMisMatch) fault = 1; - end - end - "mideleg": begin - if(`DEBUG_TRACE > 0) begin - $display("CSR: %s = %016x, expected = %016x", ExpectedCSRArrayW[NumCSRPostWIndex], dut.hart.priv.csr.genblk1.csrm.MIDELEG_REGW, ExpectedCSRArrayValueW[NumCSRPostWIndex]); - end - if (dut.hart.priv.csr.genblk1.csrm.MIDELEG_REGW != ExpectedCSRArrayValueW[NumCSRPostWIndex]) begin - $display("%t, CSR: %s = %016x, does not equal expected value %016x", $time, ExpectedCSRArrayW[NumCSRPostWIndex], dut.hart.priv.csr.genblk1.csrm.MIDELEG_REGW, ExpectedCSRArrayValueW[NumCSRPostWIndex]); - if(!`DontHaltOnCSRMisMatch) fault = 1; - end - end - "medeleg": begin - if(`DEBUG_TRACE > 0) begin - $display("CSR: %s = %016x, expected = %016x", ExpectedCSRArrayW[NumCSRPostWIndex], dut.hart.priv.csr.genblk1.csrm.MEDELEG_REGW, ExpectedCSRArrayValueW[NumCSRPostWIndex]); - end - if (dut.hart.priv.csr.genblk1.csrm.MEDELEG_REGW != ExpectedCSRArrayValueW[NumCSRPostWIndex]) begin - $display("%t, CSR: %s = %016x, does not equal expected value %016x", $time, ExpectedCSRArrayW[NumCSRPostWIndex], dut.hart.priv.csr.genblk1.csrm.MEDELEG_REGW, ExpectedCSRArrayValueW[NumCSRPostWIndex]); - if(!`DontHaltOnCSRMisMatch) fault = 1; - end - end - "mepc": begin - if(`DEBUG_TRACE > 0) begin - $display("CSR: %s = %016x, expected = %016x", ExpectedCSRArrayW[NumCSRPostWIndex], dut.hart.priv.csr.genblk1.csrm.MEPC_REGW, ExpectedCSRArrayValueW[NumCSRPostWIndex]); - end - if (dut.hart.priv.csr.genblk1.csrm.MEPC_REGW != ExpectedCSRArrayValueW[NumCSRPostWIndex]) begin - $display("%t, CSR: %s = %016x, does not equal expected value %016x", $time, ExpectedCSRArrayW[NumCSRPostWIndex], dut.hart.priv.csr.genblk1.csrm.MEPC_REGW, ExpectedCSRArrayValueW[NumCSRPostWIndex]); - if(!`DontHaltOnCSRMisMatch) fault = 1; - end - end - "mtval": begin - if(`DEBUG_TRACE > 0) begin - $display("CSR: %s = %016x, expected = %016x", ExpectedCSRArrayW[NumCSRPostWIndex], dut.hart.priv.csr.genblk1.csrm.MTVAL_REGW, ExpectedCSRArrayValueW[NumCSRPostWIndex]); - end - if (dut.hart.priv.csr.genblk1.csrm.MTVAL_REGW != ExpectedCSRArrayValueW[NumCSRPostWIndex]) begin - $display("%t, CSR: %s = %016x, does not equal expected value %016x", $time, ExpectedCSRArrayW[NumCSRPostWIndex], dut.hart.priv.csr.genblk1.csrm.MTVAL_REGW, ExpectedCSRArrayValueW[NumCSRPostWIndex]); - if(!`DontHaltOnCSRMisMatch) fault = 1; - end - end - "sepc": begin - if(`DEBUG_TRACE > 0) begin - $display("CSR: %s = %016x, expected = %016x", ExpectedCSRArrayW[NumCSRPostWIndex], dut.hart.priv.csr.genblk1.csrs.SEPC_REGW, ExpectedCSRArrayValueW[NumCSRPostWIndex]); - end - if (dut.hart.priv.csr.genblk1.csrs.SEPC_REGW != ExpectedCSRArrayValueW[NumCSRPostWIndex]) begin - $display("%t, CSR: %s = %016x, does not equal expected value %016x", $time, ExpectedCSRArrayW[NumCSRPostWIndex], dut.hart.priv.csr.genblk1.csrs.SEPC_REGW, ExpectedCSRArrayValueW[NumCSRPostWIndex]); - if(!`DontHaltOnCSRMisMatch) fault = 1; - end - end - "scause": begin - if(`DEBUG_TRACE > 0) begin - $display("CSR: %s = %016x, expected = %016x", ExpectedCSRArrayW[NumCSRPostWIndex], dut.hart.priv.csr.genblk1.csrs.genblk1.SCAUSE_REGW, ExpectedCSRArrayValueW[NumCSRPostWIndex]); - end - if (dut.hart.priv.csr.genblk1.csrs.genblk1.SCAUSE_REGW != ExpectedCSRArrayValueW[NumCSRPostWIndex]) begin - $display("%t, CSR: %s = %016x, does not equal expected value %016x", $time, ExpectedCSRArrayW[NumCSRPostWIndex], dut.hart.priv.csr.genblk1.csrs.genblk1.SCAUSE_REGW, ExpectedCSRArrayValueW[NumCSRPostWIndex]); - if(!`DontHaltOnCSRMisMatch) fault = 1; - end - end - "stvec": begin - if(`DEBUG_TRACE > 0) begin - $display("CSR: %s = %016x, expected = %016x", ExpectedCSRArrayW[NumCSRPostWIndex], dut.hart.priv.csr.genblk1.csrs.STVEC_REGW, ExpectedCSRArrayValueW[NumCSRPostWIndex]); - end - if (dut.hart.priv.csr.genblk1.csrs.STVEC_REGW != ExpectedCSRArrayValueW[NumCSRPostWIndex]) begin - $display("%t, CSR: %s = %016x, does not equal expected value %016x", $time, ExpectedCSRArrayW[NumCSRPostWIndex], dut.hart.priv.csr.genblk1.csrs.STVEC_REGW, ExpectedCSRArrayValueW[NumCSRPostWIndex]); - if(!`DontHaltOnCSRMisMatch) fault = 1; - end - end - "stval": begin - if(`DEBUG_TRACE > 0) begin - $display("CSR: %s = %016x, expected = %016x", ExpectedCSRArrayW[NumCSRPostWIndex], dut.hart.priv.csr.genblk1.csrs.genblk1.STVAL_REGW, ExpectedCSRArrayValueW[NumCSRPostWIndex]); - end - if (dut.hart.priv.csr.genblk1.csrs.genblk1.STVAL_REGW != ExpectedCSRArrayValueW[NumCSRPostWIndex]) begin - $display("%t, CSR: %s = %016x, does not equal expected value %016x", $time, ExpectedCSRArrayW[NumCSRPostWIndex], dut.hart.priv.csr.genblk1.csrs.genblk1.STVAL_REGW, ExpectedCSRArrayValueW[NumCSRPostWIndex]); - if(!`DontHaltOnCSRMisMatch) fault = 1; - end - end + "mhartid": `CSRwarn(dut.hart.priv.csr.genblk1.csrm.MHARTID_REGW) + "mstatus": `CSRwarn(dut.hart.priv.csr.genblk1.csrm.MSTATUS_REGW) + "mtvec": `CSRwarn(dut.hart.priv.csr.genblk1.csrm.MTVEC_REGW) + "mip": `CSRwarn(dut.hart.priv.csr.genblk1.csrm.MIP_REGW) + "mie": `CSRwarn(dut.hart.priv.csr.genblk1.csrm.MIE_REGW) + "mideleg":`CSRwarn(dut.hart.priv.csr.genblk1.csrm.MIDELEG_REGW) + "medeleg": `CSRwarn(dut.hart.priv.csr.genblk1.csrm.MEDELEG_REGW) + "mepc": `CSRwarn(dut.hart.priv.csr.genblk1.csrm.MEPC_REGW) + "mtval": `CSRwarn(dut.hart.priv.csr.genblk1.csrm.MTVAL_REGW) + "sepc": `CSRwarn(dut.hart.priv.csr.genblk1.csrs.SEPC_REGW) + "scause": `CSRwarn(dut.hart.priv.csr.genblk1.csrs.genblk1.SCAUSE_REGW) + "stvec": `CSRwarn(dut.hart.priv.csr.genblk1.csrs.STVEC_REGW) + "stval": `CSRwarn(dut.hart.priv.csr.genblk1.csrs.genblk1.STVAL_REGW) endcase // case (ExpectedCSRArrayW[NumCSRPostWIndex]) end // for (NumCSRPostWIndex = 0; NumCSRPostWIndex < NumCSRW; NumCSRPostWIndex++) if (fault == 1) begin `ERROR end From 2ffdbdf6d29fc91ccae8e8d3aedfc689cfde7ab4 Mon Sep 17 00:00:00 2001 From: bbracker Date: Mon, 27 Sep 2021 03:03:11 -0400 Subject: [PATCH 07/36] condense testbench code; debug_level of 0 means don't check at all --- wally-pipelined/testbench/testbench-linux.sv | 158 ++++++++----------- 1 file changed, 70 insertions(+), 88 deletions(-) diff --git a/wally-pipelined/testbench/testbench-linux.sv b/wally-pipelined/testbench/testbench-linux.sv index a09c0838f..76a1841b8 100644 --- a/wally-pipelined/testbench/testbench-linux.sv +++ b/wally-pipelined/testbench/testbench-linux.sv @@ -21,12 +21,20 @@ // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS // BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT // OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +// When letting Wally go for it, let wally generate own interrupts /////////////////////////////////////////// `include "wally-config.vh" `define DEBUG_TRACE 0 -`define DontHaltOnCSRMisMatch 1 +// Debug Levels +// 0: don't check against QEMU +// 1: print disagreements with QEMU, but only halt on PCW disagreements +// 2: halt on any disagreement with QEMU except CSRs +// 3: halt on all disagreements with QEMU +// 4: print memory accesses whenever they happen +// 5: print everything module testbench(); @@ -74,6 +82,7 @@ module testbench(); // P, Instr Checking logic [`XLEN-1:0] PCW; integer data_file_all; + string name; // Write Back stage signals needed for trace compare, but don't actually // exist in CPU. @@ -129,25 +138,24 @@ module testbench(); integer NumCSRPostWIndex; logic [`XLEN-1:0] InstrCountW; - // ------------ - // Error Macros - // ------------ - `define ERROR \ - errorCount +=1; \ - $display("processed %0d instructions with %0d warnings", InstrCountW, warningCount); \ - $stop; + // ------ + // Macros + // ------ `define CSRwarn(CSR) \ begin \ - if(`DEBUG_TRACE > 0) begin \ - $display("CSR: %s = %016x, expected = %016x", ExpectedCSRArrayW[NumCSRPostWIndex], CSR, ExpectedCSRArrayValueW[NumCSRPostWIndex]); \ - end \ + $display("CSR: %s = %016x, expected = %016x", ExpectedCSRArrayW[NumCSRPostWIndex], CSR, ExpectedCSRArrayValueW[NumCSRPostWIndex]); \ if (CSR != ExpectedCSRArrayValueW[NumCSRPostWIndex]) begin \ $display("%tns, %d instrs: CSR %s = %016x, does not equal expected value %016x", $time, InstrCountW, ExpectedCSRArrayW[NumCSRPostWIndex], CSR, ExpectedCSRArrayValueW[NumCSRPostWIndex]); \ - if(!`DontHaltOnCSRMisMatch) fault = 1; \ + if(`DEBUG_TRACE >= 3) fault = 1; \ end \ end + `define checkEQ(NAME, VAL, EXPECTED) \ + if(VAL != EXPECTED) begin \ + $display("%tns, %d instrs: %s %x differs from expected %x", $time, InstrCountW, NAME, VAL, EXPECTED); \ + if ((NAME == "PCW") || (`DEBUG_TRACE >= 2)) fault = 1; \ + end initial begin data_file_all = $fopen({`LINUX_TEST_VECTORS,"all.txt"}, "r"); @@ -159,8 +167,7 @@ module testbench(); assign checkInstrM = dut.hart.ieu.InstrValidM & ~dut.hart.priv.trap.InstrPageFaultM & ~dut.hart.priv.trap.InterruptM & ~dut.hart.StallM; - // trapW will already be invalid in there was an InstrPageFault in the previous instruction. - assign checkInstrW = dut.hart.ieu.InstrValidW & ~dut.hart.StallW; + assign checkInstrW = dut.hart.ieu.InstrValidW & ~dut.hart.StallW; // trapW will already be invalid in there was an InstrPageFault in the previous instruction. // Additonal W stage registers flopenrc #(`XLEN) MemAdrWReg(clk, reset, dut.hart.FlushW, ~dut.hart.StallW, dut.hart.ieu.dp.MemAdrM, MemAdrW); @@ -181,7 +188,7 @@ module testbench(); if (checkInstrM) begin // read 1 line of the trace file matchCount = $fgets(line, data_file_all); - if(`DEBUG_TRACE > 1) $display("Time %t, line %x", $time, line); + if(`DEBUG_TRACE >= 5) $display("Time %t, line %x", $time, line); // extract PC, Instr matchCount = $sscanf(line, "%x %x %s", ExpectedPCM, ExpectedInstrM, textM); //$display("matchCount %d, PCM %x ExpectedInstrM %x textM %x", matchCount, ExpectedPCM, ExpectedInstrM, textM); @@ -326,90 +333,65 @@ module testbench(); if (InstrCountW == waveOnICount) $stop; // print progress message if (InstrCountW % 'd100000 == 0) $display("Reached %d instructions", InstrCountW); - // check PCW fault = 0; - if(PCW != ExpectedPCW) begin - $display("%tns, %d instrs: PCW %016x does not equal ExpectedPCW: %016x", $time, InstrCountW, PCW, ExpectedPCW); - fault = 1; - end - // check instruction value - if(dut.hart.ifu.InstrW != ExpectedInstrW) begin - $display("%tns, %d instrs: InstrW %x does not equal ExpectedInstrW: %x", $time, InstrCountW, dut.hart.ifu.InstrW, ExpectedInstrW); - fault = 1; - end - // check the number of instructions - if(dut.hart.priv.csr.genblk1.counters.genblk1.INSTRET_REGW != InstrCountW) begin - $display("%t, Number of instruction Retired = %d does not equal number of instructions in trace = %d", $time, dut.hart.priv.csr.genblk1.counters.genblk1.INSTRET_REGW, InstrCountW); - if(!`DontHaltOnCSRMisMatch) fault = 1; - end - #2; // delay 2 ns. - if(`DEBUG_TRACE > 2) begin - $display("%tns, %d instrs: Reg Write Address %02d ? expected value: %02d", $time, InstrCountW, dut.hart.ieu.dp.regf.a3, ExpectedRegAdrW); - $display("%tns, %d instrs: RF[%02d] %016x ? expected value: %016x", $time, InstrCountW, ExpectedRegAdrW, dut.hart.ieu.dp.regf.rf[ExpectedRegAdrW], ExpectedRegValueW); - end - if (RegWriteW == "GPR") begin - if (dut.hart.ieu.dp.regf.a3 != ExpectedRegAdrW) begin - $display("%tns, %d instrs: Reg Write Address %02d does not equal expected value: %02d", $time, InstrCountW, dut.hart.ieu.dp.regf.a3, ExpectedRegAdrW); - fault = 1; + if (`DEBUG_TRACE >= 1) begin + `checkEQ("PCW",PCW,ExpectedPCW) + `checkEQ("InstrW",dut.hart.ifu.InstrW,ExpectedInstrW) + `checkEQ("Instr Count",dut.hart.priv.csr.genblk1.counters.genblk1.INSTRET_REGW,InstrCountW) + #2; // delay 2 ns. + if(`DEBUG_TRACE >= 5) begin + $display("%tns, %d instrs: Reg Write Address %02d ? expected value: %02d", $time, InstrCountW, dut.hart.ieu.dp.regf.a3, ExpectedRegAdrW); + $display("%tns, %d instrs: RF[%02d] %016x ? expected value: %016x", $time, InstrCountW, ExpectedRegAdrW, dut.hart.ieu.dp.regf.rf[ExpectedRegAdrW], ExpectedRegValueW); end - if (dut.hart.ieu.dp.regf.rf[ExpectedRegAdrW] != ExpectedRegValueW) begin - $display("%tns, %d instrs: RF[%02d] %016x does not equal expected value: %016x", $time, InstrCountW, ExpectedRegAdrW, dut.hart.ieu.dp.regf.rf[ExpectedRegAdrW], ExpectedRegValueW); - fault = 1; + if (RegWriteW == "GPR") begin + `checkEQ("Reg Write Address",dut.hart.ieu.dp.regf.a3,ExpectedRegAdrW) + $sprintf(name,"RF[%02d]",ExpectedRegAdrW); + `checkEQ(name, dut.hart.ieu.dp.regf.rf[ExpectedRegAdrW], ExpectedRegValueW) end - end - - if (MemOpW.substr(0,2) == "Mem") begin - if(`DEBUG_TRACE > 3) $display("\tMemAdrW: %016x ? expected: %016x", MemAdrW, ExpectedMemAdrW); - // always check address - if (MemAdrW != ExpectedMemAdrW) begin - $display("%tns, %d instrs: MemAdrW %016x does not equal expected value: %016x", $time, InstrCountW, MemAdrW, ExpectedMemAdrW); - fault = 1; - end - // check read data - if(MemOpW == "MemR" || MemOpW == "MemRW") begin - if(`DEBUG_TRACE > 3) $display("\tReadDataW: %016x ? expected: %016x", dut.hart.ieu.dp.ReadDataW, ExpectedMemReadDataW); - if (dut.hart.ieu.dp.ReadDataW != ExpectedMemReadDataW) begin - $display("%tns, %d instrs: ReadDataW %016x does not equal expected value: %016x", $time, InstrCountW, dut.hart.ieu.dp.ReadDataW, ExpectedMemReadDataW); - fault = 1; - end - // check write data - end else if(ExpectedTokens[MarkerIndex] == "MemW" || ExpectedTokens[MarkerIndex] == "MemRW") begin - if(`DEBUG_TRACE > 3) $display("\tWriteDataW: %016x ? expected: %016x", WriteDataW, ExpectedMemWriteDataW); - if (WriteDataW != ExpectedMemWriteDataW) begin - $display("%tns, %d instrs: WriteDataW %016x does not equal expected value: %016x", $time, InstrCountW, WriteDataW, ExpectedMemWriteDataW); - fault = 1; + if (MemOpW.substr(0,2) == "Mem") begin + if(`DEBUG_TRACE >= 4) $display("\tMemAdrW: %016x ? expected: %016x", MemAdrW, ExpectedMemAdrW); + `checkEQ("MemAdrW",MemAdrW,ExpectedMemAdrW) + if(MemOpW == "MemR" || MemOpW == "MemRW") begin + if(`DEBUG_TRACE >= 4) $display("\tReadDataW: %016x ? expected: %016x", dut.hart.ieu.dp.ReadDataW, ExpectedMemReadDataW); + `checkEQ("ReadDataW",dut.hart.ieu.dp.ReadDataW,ExpectedMemReadDataW) + end else if(ExpectedTokens[MarkerIndex] == "MemW" || ExpectedTokens[MarkerIndex] == "MemRW") begin + if(`DEBUG_TRACE >= 4) $display("\tWriteDataW: %016x ? expected: %016x", WriteDataW, ExpectedMemWriteDataW); + `checkEQ("WriteDataW",ExpectedMemWriteDataW,ExpectedMemWriteDataW) end end - end - - // check csr - for(NumCSRPostWIndex = 0; NumCSRPostWIndex < NumCSRW; NumCSRPostWIndex++) begin - case(ExpectedCSRArrayW[NumCSRPostWIndex]) - "mhartid": `CSRwarn(dut.hart.priv.csr.genblk1.csrm.MHARTID_REGW) - "mstatus": `CSRwarn(dut.hart.priv.csr.genblk1.csrm.MSTATUS_REGW) - "mtvec": `CSRwarn(dut.hart.priv.csr.genblk1.csrm.MTVEC_REGW) - "mip": `CSRwarn(dut.hart.priv.csr.genblk1.csrm.MIP_REGW) - "mie": `CSRwarn(dut.hart.priv.csr.genblk1.csrm.MIE_REGW) - "mideleg":`CSRwarn(dut.hart.priv.csr.genblk1.csrm.MIDELEG_REGW) - "medeleg": `CSRwarn(dut.hart.priv.csr.genblk1.csrm.MEDELEG_REGW) - "mepc": `CSRwarn(dut.hart.priv.csr.genblk1.csrm.MEPC_REGW) - "mtval": `CSRwarn(dut.hart.priv.csr.genblk1.csrm.MTVAL_REGW) - "sepc": `CSRwarn(dut.hart.priv.csr.genblk1.csrs.SEPC_REGW) - "scause": `CSRwarn(dut.hart.priv.csr.genblk1.csrs.genblk1.SCAUSE_REGW) - "stvec": `CSRwarn(dut.hart.priv.csr.genblk1.csrs.STVEC_REGW) - "stval": `CSRwarn(dut.hart.priv.csr.genblk1.csrs.genblk1.STVAL_REGW) - endcase // case (ExpectedCSRArrayW[NumCSRPostWIndex]) - end // for (NumCSRPostWIndex = 0; NumCSRPostWIndex < NumCSRW; NumCSRPostWIndex++) - if (fault == 1) begin `ERROR end + // check csr + for(NumCSRPostWIndex = 0; NumCSRPostWIndex < NumCSRW; NumCSRPostWIndex++) begin + case(ExpectedCSRArrayW[NumCSRPostWIndex]) + "mhartid": `CSRwarn(dut.hart.priv.csr.genblk1.csrm.MHARTID_REGW) + "mstatus": `CSRwarn(dut.hart.priv.csr.genblk1.csrm.MSTATUS_REGW) + "mtvec": `CSRwarn(dut.hart.priv.csr.genblk1.csrm.MTVEC_REGW) + "mip": `CSRwarn(dut.hart.priv.csr.genblk1.csrm.MIP_REGW) + "mie": `CSRwarn(dut.hart.priv.csr.genblk1.csrm.MIE_REGW) + "mideleg":`CSRwarn(dut.hart.priv.csr.genblk1.csrm.MIDELEG_REGW) + "medeleg": `CSRwarn(dut.hart.priv.csr.genblk1.csrm.MEDELEG_REGW) + "mepc": `CSRwarn(dut.hart.priv.csr.genblk1.csrm.MEPC_REGW) + "mtval": `CSRwarn(dut.hart.priv.csr.genblk1.csrm.MTVAL_REGW) + "sepc": `CSRwarn(dut.hart.priv.csr.genblk1.csrs.SEPC_REGW) + "scause": `CSRwarn(dut.hart.priv.csr.genblk1.csrs.genblk1.SCAUSE_REGW) + "stvec": `CSRwarn(dut.hart.priv.csr.genblk1.csrs.STVEC_REGW) + "stval": `CSRwarn(dut.hart.priv.csr.genblk1.csrs.genblk1.STVAL_REGW) + endcase + end + if (fault == 1) begin + errorCount +=1; + $display("processed %0d instructions with %0d warnings", InstrCountW, warningCount); + $stop; + end + end // if (`DEBUG_TRACE >= 1) end // if (checkInstrW) end // always @ (negedge clk) // track the current function FunctionName FunctionName(.reset(reset), - .clk(clk), - .ProgramAddrMapFile(ProgramAddrMapFile), - .ProgramLabelMapFile(ProgramLabelMapFile)); + .clk(clk), + .ProgramAddrMapFile(ProgramAddrMapFile), + .ProgramLabelMapFile(ProgramLabelMapFile)); /////////////////////////////////////////////////////////////////////////////// From a47448c4d000dceb022902874998242b5e4b1411 Mon Sep 17 00:00:00 2001 From: bbracker Date: Tue, 28 Sep 2021 22:33:47 -0400 Subject: [PATCH 08/36] first attemtpt at checkpoint infrastructure --- .../GenerateCheckpoint.sh | 33 ++++++++ .../testvector-generation/checkpoint.gdb | 53 +++++++++++++ .../testvector-generation/fix_mem.py | 5 +- .../testvector-generation/parseNew.py | 5 ++ .../testvector-generation/parseState.py | 79 +++++++++++++++++++ 5 files changed, 173 insertions(+), 2 deletions(-) create mode 100755 wally-pipelined/linux-testgen/testvector-generation/GenerateCheckpoint.sh create mode 100755 wally-pipelined/linux-testgen/testvector-generation/checkpoint.gdb create mode 100755 wally-pipelined/linux-testgen/testvector-generation/parseState.py diff --git a/wally-pipelined/linux-testgen/testvector-generation/GenerateCheckpoint.sh b/wally-pipelined/linux-testgen/testvector-generation/GenerateCheckpoint.sh new file mode 100755 index 000000000..e9bf5167d --- /dev/null +++ b/wally-pipelined/linux-testgen/testvector-generation/GenerateCheckpoint.sh @@ -0,0 +1,33 @@ +#!/bin/bash +# Oftentimes this script runs so long you'll go to sleep. +# But you don't want the script to die when your computer goes to sleep. +# So consider invoking this with nohup (i.e. "nohup ./logAllBuildroot.sh") +# You can run "tail -f nohup.out" to see what would've +# outputted to the terminal if you didn't use nohup + +# use on tera. +customQemu="/courses/e190ax/qemu_sim/rv64_initrd/qemu_experimental/qemu/build/qemu-system-riscv64" +# use on other systems +#customQemu="qemu-system-riscv64" + +instrs=8500000 + +imageDir="../buildroot-image-output" +outDir="../linux-testvectors/checkpoint$instrs" +intermedDir="$outDir/intermediate-outputs" + + +read -p "This scripts is going to create a checkpoint at $instrs instrs. +Is that what you wanted? (y/n) " -n 1 -r +echo +if [[ $REPLY =~ ^[Yy]$ ]] +then + mkdir -p $outDir + mkdir -p $intermedDir + ($customQemu -M virt -nographic -bios $imageDir/fw_jump.elf -kernel $imageDir/Image -append "root=/dev/vda ro" -initrd $imageDir/rootfs.cpio -rtc clock=vm -icount shift=1 -d nochain,cpu,in_asm -serial /dev/null -singlestep -gdb tcp::1240 -S 2>&1 1>&2 | ./parse_qemu.py | ./parseNew.py | ./remove_dup.awk > $intermedDir/rawTrace.txt) & riscv64-unknown-elf-gdb -x ./checkpoint.gdb -ex "createCheckpoint $instrs \"$intermedDir\"" + ./fix_mem.py "$intermedDir/ramGDB.txt" "$outDir/ram.txt" + ./parseState.py "$outDir" +else + echo "You can change the number of instructions by editing the \"instrs\" variable in this script." + echo "Have a nice day!" +fi diff --git a/wally-pipelined/linux-testgen/testvector-generation/checkpoint.gdb b/wally-pipelined/linux-testgen/testvector-generation/checkpoint.gdb new file mode 100755 index 000000000..8ffd8e982 --- /dev/null +++ b/wally-pipelined/linux-testgen/testvector-generation/checkpoint.gdb @@ -0,0 +1,53 @@ +define createCheckpoint + # GDB config + set pagination off + set logging overwrite on + set logging redirect on + set confirm off + + # QEMU must also use TCP port 1240 + target extended-remote :1240 + + # Argument Parsing + set $statePath=$arg1 + set $ramPath=$arg1 + eval "set $statePath = \"%s/stateGDB.txt\"", $statePath + eval "set $ramPath = \"%s/ramGDB.txt\"", $ramPath + + # Symbol file + file ../buildroot-image-output/vmlinux + + # Step over reset vector into actual code + stepi 1000 + # Set breakpoint for where to stop + b do_idle + # Proceed to checkpoint + printf "GDB proceeding to checkpoint at %d instrs\n", $arg0 + stepi $arg0-1000 + + printf "Reached checkpoint at %d instrs\n", $arg0 + + # Log all registers to a file + printf "GDB storing state to %s\n", $statePath + set logging file $statePath + set logging on + info all-registers + set logging off + + # Log main memory to a file + printf "GDB storing RAM to %s\n", $ramPath + set logging file ../linux-testvectors/intermediate-outputs/ramGDB.txt + set logging on + x/134217728xb 0x80000000 + set logging off + + # Continue to checkpoint; stop on the 3rd time + # Should reach login prompt by then + printf "GDB continuing execution to login prompt\n" + ignore 1 2 + c + + printf "GDB reached login prompt!\n" + kill + q +end diff --git a/wally-pipelined/linux-testgen/testvector-generation/fix_mem.py b/wally-pipelined/linux-testgen/testvector-generation/fix_mem.py index 66ff9cf03..0e2fbf82c 100755 --- a/wally-pipelined/linux-testgen/testvector-generation/fix_mem.py +++ b/wally-pipelined/linux-testgen/testvector-generation/fix_mem.py @@ -6,9 +6,10 @@ if len(sys.argv) != 3: inputFile = sys.argv[1] outputFile = sys.argv[2] if not os.path.exists(inputFile): - sys.exit('Error input file '+inputFile+'not found') -print('Translating '+os.path.basename(inputFile)+' to '+os.path.basename(outputFile)) + sys.exit('Error input file '+inputFile+' not found') +print('Begin translating '+os.path.basename(inputFile)+' to '+os.path.basename(outputFile)) with open(inputFile, 'r') as f: with open(outputFile, 'w') as w: for l in f: w.write(f'{"".join([x[2:] for x in l.split()[:0:-1]])}\n') +print('Finished translating '+os.path.basename(inputFile)+' to '+os.path.basename(outputFile)+'!') diff --git a/wally-pipelined/linux-testgen/testvector-generation/parseNew.py b/wally-pipelined/linux-testgen/testvector-generation/parseNew.py index 719286a2f..7c2c00245 100755 --- a/wally-pipelined/linux-testgen/testvector-generation/parseNew.py +++ b/wally-pipelined/linux-testgen/testvector-generation/parseNew.py @@ -140,6 +140,7 @@ CurrentInstr = ['0', '0', None, 'other', {'zero': 0, 'ra': 0, 'sp': 0, 'gp': 0, lineNum = 0 StartLine = 0 EndLine = 0 +numInstrs = 0 #instructions = [] MemAdr = 0 lines = [] @@ -195,6 +196,10 @@ for line in fileinput.input('-'): lines.clear() #instructions.append(MoveInstrToRegWriteLst) PrintInstr(MoveInstrToRegWriteLst, sys.stdout) + numInstrs +=1 + if (numInstrs % 1e4 == 0): + sys.stderr.write('Trace parser reached '+str(numInstrs/1.0e6)+' million instrs.\n') + sys.stderr.flush() lineNum += 1 diff --git a/wally-pipelined/linux-testgen/testvector-generation/parseState.py b/wally-pipelined/linux-testgen/testvector-generation/parseState.py new file mode 100755 index 000000000..0a0c8c8b5 --- /dev/null +++ b/wally-pipelined/linux-testgen/testvector-generation/parseState.py @@ -0,0 +1,79 @@ +#! /usr/bin/python3 +import sys + +################ +# Helper Funcs # +################ + +def tokenize(string): + tokens = [] + token = '' + whitespace = 0 + prevWhitespace = 0 + for char in string: + prevWhitespace = whitespace + whitespace = char in ' \t\n' + if (whitespace): + if ((not prevWhitespace) and (token != '')): + tokens.append(token) + token = '' + else: + token = token + char + return tokens + +############# +# Main Code # +############# +print("Begin parsing state.") + +# Parse Args +if len(sys.argv) != 2: + sys.exit('Error parseState.py expects 1 arg:\n parseState.py ') +outDir = sys.argv[1] +stateGDBpath = outDir+'/intermediate-outputs/stateGDB1K.txt' +if not os.path.exists(stateGDBpath): + sys.exit('Error input file '+stateGDBpath+'not found') + +listCSRs = ['hpmcounter','pmpcfg','pmpaddr'] +singleCSRs = ['mip','mie','mscratch','mcause','mepc','mtvec','medeleg','mideleg','mcounteren','sscratch','scause','sepc','stvec','sedeleg','sideleg','scounteren','satp','mstatus'] + +# Initialize List CSR files to empty +# (because later we'll open them in append mode) +for csr in listCSRs: + outFileName = 'checkpoint-'+csr.upper() + outFile = open(outDir+outFileName, 'w') + outFile.close() + +# Initial State for Main Loop +currState = 'regFile' +regFileIndex = 0 +outFileName = 'checkpoint-regfile.txt' +outFile = open(outDir+outFileName, 'w') + +# Main Loop +with open(stateGDBpath, 'r') as stateGDB: + for line in stateGDB: + line = tokenize(line) + name = line[0] + val = line[1][2:] + if (currState == 'regFile'): + if (regFileIndex == 0 and name != 'zero'): + print('Whoops! Expected regFile registers to come first, starting with zero') + exit(1) + outFile.write(val+'\n') + regFileIndex += 1 + if (regFileIndex == 32): + outFile.close() + currState = 'CSRs' + elif (currState == 'CSRs'): + if name in singleCSRs: + outFileName = 'checkpoint-'+name.upper() + outFile = open(outDir+outFileName, 'w') + outFile.write(val+'\n') + outFile.close() + elif name.strip('0123456789') in listCSRs: + outFileName = 'checkpoint-'+name.upper().strip('0123456789') + outFile = open(outDir+outFileName, 'a') + outFile.write(val+'\n') + outFile.close() +print("Finished parsing state!") From f6ef8e56566f85b06b58f41a9d10db06c55d328c Mon Sep 17 00:00:00 2001 From: bbracker Date: Tue, 28 Sep 2021 23:17:58 -0400 Subject: [PATCH 09/36] first attempt at verilog side of checkpoint functionality --- wally-pipelined/src/generic/flop.sv | 22 +- wally-pipelined/src/ieu/regfile.sv | 8 +- wally-pipelined/src/privileged/csrc.sv | 251 ++++++++++--------- wally-pipelined/src/privileged/csri.sv | 14 +- wally-pipelined/src/privileged/csrm.sv | 60 +++-- wally-pipelined/src/privileged/csrs.sv | 40 ++- wally-pipelined/src/privileged/csrsr.sv | 49 ++-- wally-pipelined/testbench/testbench-linux.sv | 9 +- 8 files changed, 278 insertions(+), 175 deletions(-) diff --git a/wally-pipelined/src/generic/flop.sv b/wally-pipelined/src/generic/flop.sv index cb583de2e..82c64c567 100644 --- a/wally-pipelined/src/generic/flop.sv +++ b/wally-pipelined/src/generic/flop.sv @@ -25,6 +25,8 @@ `include "wally-config.vh" /* verilator lint_off DECLFILENAME */ +// Note that non-zero RESET_VAL's are only ever intended for simulation purposes (to start mid-execution from a checkpoint) + // ordinary flip-flop module flop #(parameter WIDTH = 8) ( @@ -40,10 +42,11 @@ endmodule module flopr #(parameter WIDTH = 8) ( input logic clk, reset, input logic [WIDTH-1:0] d, - output logic [WIDTH-1:0] q); + output logic [WIDTH-1:0] q, + input var [WIDTH-1:0] RESET_VAL=0); always_ff @(posedge clk, posedge reset) - if (reset) q <= #1 0; + if (reset) q <= #1 RESET_VAL; else q <= #1 d; endmodule @@ -61,10 +64,11 @@ endmodule module flopenrc #(parameter WIDTH = 8) ( input logic clk, reset, clear, en, input logic [WIDTH-1:0] d, - output logic [WIDTH-1:0] q); + output logic [WIDTH-1:0] q, + input var [WIDTH-1:0] RESET_VAL=0); always_ff @(posedge clk, posedge reset) - if (reset) q <= #1 0; + if (reset) q <= #1 RESET_VAL; else if (en) if (clear) q <= #1 0; else q <= #1 d; @@ -74,10 +78,11 @@ endmodule module flopenr #(parameter WIDTH = 8) ( input logic clk, reset, en, input logic [WIDTH-1:0] d, - output logic [WIDTH-1:0] q); + output logic [WIDTH-1:0] q, + input var [WIDTH-1:0] RESET_VAL=0); always_ff @(posedge clk, posedge reset) - if (reset) q <= #1 0; + if (reset) q <= #1 RESET_VAL; else if (en) q <= #1 d; endmodule @@ -99,10 +104,11 @@ module floprc #(parameter WIDTH = 8) ( input logic reset, input logic clear, input logic [WIDTH-1:0] d, - output logic [WIDTH-1:0] q); + output logic [WIDTH-1:0] q, + input var RESET_VAL=0); always_ff @(posedge clk, posedge reset) - if (reset) q <= #1 0; + if (reset) q <= #1 RESET_VAL; else if (clear) q <= #1 0; else q <= #1 d; diff --git a/wally-pipelined/src/ieu/regfile.sv b/wally-pipelined/src/ieu/regfile.sv index 73b62a579..8139e0b35 100644 --- a/wally-pipelined/src/ieu/regfile.sv +++ b/wally-pipelined/src/ieu/regfile.sv @@ -44,7 +44,13 @@ module regfile ( // reset is intended for simulation only, not synthesis always_ff @(negedge clk or posedge reset) - if (reset) for(i=1; i<32; i++) rf[i] <= 0; + if (reset) + `ifdef CHECKPOINT + $readmemh({`LINUX_CHECKPOINT,"checkpoint-regfile.txt"}, rf); + `else + for(i=1; i<32; i++) rf[i] <= 0; + `endif + else if (we3) rf[a3] <= wd3; assign #2 rd1 = (a1 != 0) ? rf[a1] : 0; diff --git a/wally-pipelined/src/privileged/csrc.sv b/wally-pipelined/src/privileged/csrc.sv index 3b1e544d7..da8aca05b 100644 --- a/wally-pipelined/src/privileged/csrc.sv +++ b/wally-pipelined/src/privileged/csrc.sv @@ -70,24 +70,24 @@ module csrc #(parameter // ... more counters //HPMCOUNTER31H = 12'hC9F ) ( - input logic clk, reset, - input logic StallD, StallE, StallM, StallW, + input logic clk, reset, + input logic StallD, StallE, StallM, StallW, input logic FlushD, FlushE, FlushM, FlushW, - input logic InstrValidM, LoadStallD, CSRMWriteM, - input logic BPPredDirWrongM, - input logic BTBPredPCWrongM, - input logic RASPredPCWrongM, - input logic BPPredClassNonCFIWrongM, - input logic [4:0] InstrClassM, - input logic DCacheMiss, - input logic DCacheAccess, - input logic [11:0] CSRAdrM, - input logic [1:0] PrivilegeModeW, + input logic InstrValidM, LoadStallD, CSRMWriteM, + input logic BPPredDirWrongM, + input logic BTBPredPCWrongM, + input logic RASPredPCWrongM, + input logic BPPredClassNonCFIWrongM, + input logic [4:0] InstrClassM, + input logic DCacheMiss, + input logic DCacheAccess, + input logic [11:0] CSRAdrM, + input logic [1:0] PrivilegeModeW, input logic [`XLEN-1:0] CSRWriteValM, - input logic [31:0] MCOUNTINHIBIT_REGW, MCOUNTEREN_REGW, SCOUNTEREN_REGW, - input logic [63:0] MTIME_CLINT, MTIMECMP_CLINT, + input logic [31:0] MCOUNTINHIBIT_REGW, MCOUNTEREN_REGW, SCOUNTEREN_REGW, + input logic [63:0] MTIME_CLINT, MTIMECMP_CLINT, output logic [`XLEN-1:0] CSRCReadValM, - output logic IllegalCSRCAccessM + output logic IllegalCSRCAccessM ); generate @@ -97,14 +97,22 @@ module csrc #(parameter logic [63:0] HPMCOUNTER3_REGW, HPMCOUNTER4_REGW; // add more performance counters here if desired logic [63:0] CYCLEPlusM, INSTRETPlusM; logic [63:0] HPMCOUNTER3PlusM, HPMCOUNTER4PlusM; - // logic [`XLEN-1:0] NextTIMEM; + // logic [`XLEN-1:0] NextTIMEM; logic [`XLEN-1:0] NextCYCLEM, NextINSTRETM; logic [`XLEN-1:0] NextHPMCOUNTER3M, NextHPMCOUNTER4M; logic WriteCYCLEM, WriteINSTRETM; logic WriteHPMCOUNTER3M, WriteHPMCOUNTER4M; logic [4:0] CounterNumM; logic [`COUNTERS-1:3][`XLEN-1:0] HPMCOUNTER_REGW, HPMCOUNTERH_REGW; - logic InstrValidNotFlushedM; + var [`COUNTERS-1:3][`XLEN-1:0] initHPMCOUNTER; + logic InstrValidNotFlushedM; + + initial + `ifdef CHECKPOINT + $readmemh({`LINUX_CHECKPOINT,"checkpoint-HPMCOUNTER.txt"}, initHPMCOUNTER); + `else + initHPMCOUNTER = {(`COUNTERS-3){`XLEN'b0}}; + `endif assign InstrValidNotFlushedM = InstrValidM & ~StallW & ~FlushW; @@ -130,121 +138,116 @@ module csrc #(parameter //assign NextHPMCOUNTER3M = WriteHPMCOUNTER3M ? CSRWriteValM : HPMCOUNTER3PlusM[`XLEN-1:0]; //assign NextHPMCOUNTER4M = WriteHPMCOUNTER4M ? CSRWriteValM : HPMCOUNTER4PlusM[`XLEN-1:0]; - // parameterized number of additional counters - if (`COUNTERS > 3) begin + // parameterized number of additional counters + if (`COUNTERS > 3) begin logic [`COUNTERS-1:3] WriteHPMCOUNTERM; logic [`COUNTERS-1:0] CounterEvent; logic [63:0] /*HPMCOUNTER_REGW[`COUNTERS-1:3], */ HPMCOUNTERPlusM[`COUNTERS-1:3]; logic [`XLEN-1:0] NextHPMCOUNTERM[`COUNTERS-1:3]; genvar i; - // could replace special counters 0-2 with this loop for all counters assign CounterEvent[0] = 1'b1; assign CounterEvent[1] = 1'b0; - if(`QEMU) begin - assign CounterEvent[`COUNTERS-1:2] = 0; - end else begin - - logic LoadStallE, LoadStallM; - - flopenrc #(1) LoadStallEReg(.clk, .reset, .clear(FlushE), .en(~StallE), .d(LoadStallD), .q(LoadStallE)); - flopenrc #(1) LoadStallMReg(.clk, .reset, .clear(FlushM), .en(~StallM), .d(LoadStallE), .q(LoadStallM)); - - assign CounterEvent[2] = InstrValidNotFlushedM; - assign CounterEvent[3] = LoadStallM & InstrValidNotFlushedM; - assign CounterEvent[4] = BPPredDirWrongM & InstrValidNotFlushedM; - assign CounterEvent[5] = InstrClassM[0] & InstrValidNotFlushedM; - assign CounterEvent[6] = BTBPredPCWrongM & InstrValidNotFlushedM; - assign CounterEvent[7] = (InstrClassM[4] | InstrClassM[2] | InstrClassM[1]) & InstrValidNotFlushedM; - assign CounterEvent[8] = RASPredPCWrongM & InstrValidNotFlushedM; - assign CounterEvent[9] = InstrClassM[3] & InstrValidNotFlushedM; - assign CounterEvent[10] = BPPredClassNonCFIWrongM & InstrValidNotFlushedM; - assign CounterEvent[11] = DCacheAccess & InstrValidNotFlushedM; - assign CounterEvent[12] = DCacheMiss & InstrValidNotFlushedM; - assign CounterEvent[`COUNTERS-1:13] = 0; // eventually give these sources, including FP instructions, I$/D$ misses, branches and mispredictions - end - - for (i = 3; i < `COUNTERS; i = i+1) begin - assign WriteHPMCOUNTERM[i] = CSRMWriteM && (CSRAdrM == MHPMCOUNTERBASE + i); - assign NextHPMCOUNTERM[i][`XLEN-1:0] = WriteHPMCOUNTERM[i] ? CSRWriteValM : HPMCOUNTERPlusM[i][`XLEN-1:0]; - always @(posedge clk, posedge reset) // ModelSim doesn't like syntax of passing array element to flop - if (reset) HPMCOUNTER_REGW[i][`XLEN-1:0] <= #1 0; - else if (~StallW) HPMCOUNTER_REGW[i][`XLEN-1:0] <= #1 NextHPMCOUNTERM[i]; - //flopr #(`XLEN) HPMCOUNTERreg[i](clk, reset, NextHPMCOUNTERM[i], HPMCOUNTER_REGW[i]); - - if (`XLEN==32) begin - logic [`COUNTERS-1:3] WriteHPMCOUNTERHM; - logic [`XLEN-1:0] NextHPMCOUNTERHM[`COUNTERS-1:3]; - assign HPMCOUNTERPlusM[i] = {HPMCOUNTERH_REGW[i], HPMCOUNTER_REGW[i]} + {63'b0, CounterEvent[i] & ~MCOUNTINHIBIT_REGW[i]}; - assign WriteHPMCOUNTERHM[i] = CSRMWriteM && (CSRAdrM == MHPMCOUNTERHBASE + i); - assign NextHPMCOUNTERHM[i] = WriteHPMCOUNTERHM[i] ? CSRWriteValM : HPMCOUNTERPlusM[i][63:32]; - always @(posedge clk, posedge reset) // ModelSim doesn't like syntax of passing array element to flop - if (reset) HPMCOUNTERH_REGW[i][`XLEN-1:0] <= #1 0; - else if (~StallW) HPMCOUNTERH_REGW[i][`XLEN-1:0] <= #1 NextHPMCOUNTERHM[i]; - //flopr #(`XLEN) HPMCOUNTERHreg[i](clk, reset, NextHPMCOUNTERHM[i], HPMCOUNTER_REGW[i][63:32]); - end else begin - assign HPMCOUNTERPlusM[i] = HPMCOUNTER_REGW[i] + {63'b0, CounterEvent[i] & ~MCOUNTINHIBIT_REGW[i]}; - end + if(`QEMU) assign CounterEvent[`COUNTERS-1:2] = 0; + else begin + logic LoadStallE, LoadStallM; + flopenrc #(1) LoadStallEReg(.clk, .reset, .clear(FlushE), .en(~StallE), .d(LoadStallD), .q(LoadStallE)); + flopenrc #(1) LoadStallMReg(.clk, .reset, .clear(FlushM), .en(~StallM), .d(LoadStallE), .q(LoadStallM)); + + assign CounterEvent[2] = InstrValidNotFlushedM; + assign CounterEvent[3] = LoadStallM & InstrValidNotFlushedM; + assign CounterEvent[4] = BPPredDirWrongM & InstrValidNotFlushedM; + assign CounterEvent[5] = InstrClassM[0] & InstrValidNotFlushedM; + assign CounterEvent[6] = BTBPredPCWrongM & InstrValidNotFlushedM; + assign CounterEvent[7] = (InstrClassM[4] | InstrClassM[2] | InstrClassM[1]) & InstrValidNotFlushedM; + assign CounterEvent[8] = RASPredPCWrongM & InstrValidNotFlushedM; + assign CounterEvent[9] = InstrClassM[3] & InstrValidNotFlushedM; + assign CounterEvent[10] = BPPredClassNonCFIWrongM & InstrValidNotFlushedM; + assign CounterEvent[11] = DCacheAccess & InstrValidNotFlushedM; + assign CounterEvent[12] = DCacheMiss & InstrValidNotFlushedM; + assign CounterEvent[`COUNTERS-1:13] = 0; // eventually give these sources, including FP instructions, I$/D$ misses, branches and mispredictions end - end + + for (i = 3; i < `COUNTERS; i = i+1) begin + assign WriteHPMCOUNTERM[i] = CSRMWriteM && (CSRAdrM == MHPMCOUNTERBASE + i); + assign NextHPMCOUNTERM[i][`XLEN-1:0] = WriteHPMCOUNTERM[i] ? CSRWriteValM : HPMCOUNTERPlusM[i][`XLEN-1:0]; + always @(posedge clk, posedge reset) // ModelSim doesn't like syntax of passing array element to flop + if (reset) HPMCOUNTER_REGW[i][`XLEN-1:0] <= #1 initHPMCOUNTER[i]; + else if (~StallW) HPMCOUNTER_REGW[i][`XLEN-1:0] <= #1 NextHPMCOUNTERM[i]; + //flopr #(`XLEN) HPMCOUNTERreg[i](clk, reset, NextHPMCOUNTERM[i], HPMCOUNTER_REGW[i]); + + if (`XLEN==32) begin + logic [`COUNTERS-1:3] WriteHPMCOUNTERHM; + logic [`XLEN-1:0] NextHPMCOUNTERHM[`COUNTERS-1:3]; + assign HPMCOUNTERPlusM[i] = {HPMCOUNTERH_REGW[i], HPMCOUNTER_REGW[i]} + {63'b0, CounterEvent[i] & ~MCOUNTINHIBIT_REGW[i]}; + assign WriteHPMCOUNTERHM[i] = CSRMWriteM && (CSRAdrM == MHPMCOUNTERHBASE + i); + assign NextHPMCOUNTERHM[i] = WriteHPMCOUNTERHM[i] ? CSRWriteValM : HPMCOUNTERPlusM[i][63:32]; + always @(posedge clk, posedge reset) // ModelSim doesn't like syntax of passing array element to flop + if (reset) HPMCOUNTERH_REGW[i][`XLEN-1:0] <= #1 0; + else if (~StallW) HPMCOUNTERH_REGW[i][`XLEN-1:0] <= #1 NextHPMCOUNTERHM[i]; + //flopr #(`XLEN) HPMCOUNTERHreg[i](clk, reset, NextHPMCOUNTERHM[i], HPMCOUNTER_REGW[i][63:32]); + end else begin + assign HPMCOUNTERPlusM[i] = HPMCOUNTER_REGW[i] + {63'b0, CounterEvent[i] & ~MCOUNTINHIBIT_REGW[i]}; + end + end + end // Write / update counters // Only the Machine mode versions of the counter CSRs are writable - if (`XLEN==64) begin// 64-bit counters - // flopr #(64) TIMEreg(clk, reset, WriteTIMEM ? CSRWriteValM : TIME_REGW + 1, TIME_REGW); // may count off a different clock*** - // flopenr #(64) TIMECMPreg(clk, reset, WriteTIMECMPM, CSRWriteValM, TIMECMP_REGW); - flopr #(64) CYCLEreg(clk, reset, NextCYCLEM, CYCLE_REGW); - flopr #(64) INSTRETreg(clk, reset, NextINSTRETM, INSTRET_REGW); - //flopr #(64) HPMCOUNTER3reg(clk, reset, NextHPMCOUNTER3M, HPMCOUNTER3_REGW); - //flopr #(64) HPMCOUNTER4reg(clk, reset, NextHPMCOUNTER4M, HPMCOUNTER4_REGW); - end else begin // 32-bit low and high counters - logic WriteTIMEHM, WriteTIMECMPHM, WriteCYCLEHM, WriteINSTRETHM; - //logic WriteHPMCOUNTER3HM, WriteHPMCOUNTER4HM; - logic [`XLEN-1:0] NextCYCLEHM, NextTIMEHM, NextINSTRETHM; - //logic [`XLEN-1:0] NextHPMCOUNTER3HM, NextHPMCOUNTER4HM; + if (`XLEN==64) begin// 64-bit counters + // flopr #(64) TIMEreg(clk, reset, WriteTIMEM ? CSRWriteValM : TIME_REGW + 1, TIME_REGW); // may count off a different clock*** + // flopenr #(64) TIMECMPreg(clk, reset, WriteTIMECMPM, CSRWriteValM, TIMECMP_REGW); + flopr #(64) CYCLEreg(clk, reset, NextCYCLEM, CYCLE_REGW); + flopr #(64) INSTRETreg(clk, reset, NextINSTRETM, INSTRET_REGW); + //flopr #(64) HPMCOUNTER3reg(clk, reset, NextHPMCOUNTER3M, HPMCOUNTER3_REGW); + //flopr #(64) HPMCOUNTER4reg(clk, reset, NextHPMCOUNTER4M, HPMCOUNTER4_REGW); + end else begin // 32-bit low and high counters + logic WriteTIMEHM, WriteTIMECMPHM, WriteCYCLEHM, WriteINSTRETHM; + //logic WriteHPMCOUNTER3HM, WriteHPMCOUNTER4HM; + logic [`XLEN-1:0] NextCYCLEHM, NextTIMEHM, NextINSTRETHM; + //logic [`XLEN-1:0] NextHPMCOUNTER3HM, NextHPMCOUNTER4HM; - // Write Enables - // assign WriteTIMEHM = CSRMWriteM && (CSRAdrM == MTIMEH); - // assign WriteTIMECMPHM = CSRMWriteM && (CSRAdrM == MTIMECMPH); - assign WriteCYCLEHM = CSRMWriteM && (CSRAdrM == MCYCLEH); - assign WriteINSTRETHM = CSRMWriteM && (CSRAdrM == MINSTRETH); - //assign WriteHPMCOUNTER3HM = CSRMWriteM && (CSRAdrM == MHPMCOUNTER3H); - //assign WriteHPMCOUNTER4HM = CSRMWriteM && (CSRAdrM == MHPMCOUNTER4H); - assign NextCYCLEHM = WriteCYCLEM ? CSRWriteValM : CYCLEPlusM[63:32]; - // assign NextTIMEHM = WriteTIMEHM ? CSRWriteValM : TIMEPlusM[63:32]; - assign NextINSTRETHM = WriteINSTRETHM ? CSRWriteValM : INSTRETPlusM[63:32]; - //assign NextHPMCOUNTER3HM = WriteHPMCOUNTER3HM ? CSRWriteValM : HPMCOUNTER3PlusM[63:32]; - //assign NextHPMCOUNTER4HM = WriteHPMCOUNTER4HM ? CSRWriteValM : HPMCOUNTER4PlusM[63:32]; + // Write Enables + // assign WriteTIMEHM = CSRMWriteM && (CSRAdrM == MTIMEH); + // assign WriteTIMECMPHM = CSRMWriteM && (CSRAdrM == MTIMECMPH); + assign WriteCYCLEHM = CSRMWriteM && (CSRAdrM == MCYCLEH); + assign WriteINSTRETHM = CSRMWriteM && (CSRAdrM == MINSTRETH); + //assign WriteHPMCOUNTER3HM = CSRMWriteM && (CSRAdrM == MHPMCOUNTER3H); + //assign WriteHPMCOUNTER4HM = CSRMWriteM && (CSRAdrM == MHPMCOUNTER4H); + assign NextCYCLEHM = WriteCYCLEM ? CSRWriteValM : CYCLEPlusM[63:32]; + // assign NextTIMEHM = WriteTIMEHM ? CSRWriteValM : TIMEPlusM[63:32]; + assign NextINSTRETHM = WriteINSTRETHM ? CSRWriteValM : INSTRETPlusM[63:32]; + //assign NextHPMCOUNTER3HM = WriteHPMCOUNTER3HM ? CSRWriteValM : HPMCOUNTER3PlusM[63:32]; + //assign NextHPMCOUNTER4HM = WriteHPMCOUNTER4HM ? CSRWriteValM : HPMCOUNTER4PlusM[63:32]; - // Counter CSRs - // flopr #(32) TIMEreg(clk, reset, NextTIMEM, TIME_REGW); // may count off a different clock*** - // flopenr #(32) TIMECMPreg(clk, reset, WriteTIMECMPM, CSRWriteValM, TIMECMP_REGW[31:0]); - flopr #(32) CYCLEreg(clk, reset, NextCYCLEM, CYCLE_REGW[31:0]); - flopr #(32) INSTRETreg(clk, reset, NextINSTRETM, INSTRET_REGW[31:0]); - //flopr #(32) HPMCOUNTER3reg(clk, reset, NextHPMCOUNTER3M, HPMCOUNTER3_REGW[31:0]); - //flopr #(32) HPMCOUNTER4reg(clk, reset, NextHPMCOUNTER4M, HPMCOUNTER4_REGW[31:0]); - // flopr #(32) TIMEHreg(clk, reset, NextTIMEHM, TIME_REGW); // may count off a different clock*** - // flopenr #(32) TIMECMPHreg(clk, reset, WriteTIMECMPHM, CSRWriteValM, TIMECMP_REGW[63:32]); - flopr #(32) CYCLEHreg(clk, reset, NextCYCLEHM, CYCLE_REGW[63:32]); - flopr #(32) INSTRETHreg(clk, reset, NextINSTRETHM, INSTRET_REGW[63:32]); - //flopr #(32) HPMCOUNTER3Hreg(clk, reset, NextHPMCOUNTER3HM, HPMCOUNTER3_REGW[63:32]); - //flopr #(32) HPMCOUNTER4Hreg(clk, reset, NextHPMCOUNTER4HM, HPMCOUNTER4_REGW[63:32]); - end + // Counter CSRs + // flopr #(32) TIMEreg(clk, reset, NextTIMEM, TIME_REGW); // may count off a different clock*** + // flopenr #(32) TIMECMPreg(clk, reset, WriteTIMECMPM, CSRWriteValM, TIMECMP_REGW[31:0]); + flopr #(32) CYCLEreg(clk, reset, NextCYCLEM, CYCLE_REGW[31:0]); + flopr #(32) INSTRETreg(clk, reset, NextINSTRETM, INSTRET_REGW[31:0]); + // flopr #(32) HPMCOUNTER3reg(clk, reset, NextHPMCOUNTER3M, HPMCOUNTER3_REGW[31:0]); + // flopr #(32) HPMCOUNTER4reg(clk, reset, NextHPMCOUNTER4M, HPMCOUNTER4_REGW[31:0]); + // flopr #(32) TIMEHreg(clk, reset, NextTIMEHM, TIME_REGW); // may count off a different clock*** + // flopenr #(32) TIMECMPHreg(clk, reset, WriteTIMECMPHM, CSRWriteValM, TIMECMP_REGW[63:32]); + flopr #(32) CYCLEHreg(clk, reset, NextCYCLEHM, CYCLE_REGW[63:32]); + flopr #(32) INSTRETHreg(clk, reset, NextINSTRETHM, INSTRET_REGW[63:32]); + //flopr #(32) HPMCOUNTER3Hreg(clk, reset, NextHPMCOUNTER3HM, HPMCOUNTER3_REGW[63:32]); + //flopr #(32) HPMCOUNTER4Hreg(clk, reset, NextHPMCOUNTER4HM, HPMCOUNTER4_REGW[63:32]); + end - // eventually move TIME and TIMECMP to the CLINT -- Ben 06/17/21: sure let's give that a shot! - // run TIME off asynchronous reference clock - // synchronize write enable to TIME - // four phase handshake to synchronize reads from TIME + // eventually move TIME and TIMECMP to the CLINT -- Ben 06/17/21: sure let's give that a shot! + // run TIME off asynchronous reference clock + // synchronize write enable to TIME + // four phase handshake to synchronize reads from TIME - // interrupt on timer compare - // ability to disable optional CSRs + // interrupt on timer compare + // ability to disable optional CSRs // Read Counters, or cause excepiton if insufficient privilege in light of COUNTEREN flags assign CounterNumM = CSRAdrM[4:0]; // which counter to read? if (`XLEN==64) // 64-bit counter reads always_comb - if (PrivilegeModeW == `M_MODE || - MCOUNTEREN_REGW[CounterNumM] && (PrivilegeModeW == `S_MODE || SCOUNTEREN_REGW[CounterNumM])) begin + if (PrivilegeModeW == `M_MODE || MCOUNTEREN_REGW[CounterNumM] && (PrivilegeModeW == `S_MODE || SCOUNTEREN_REGW[CounterNumM])) begin IllegalCSRCAccessM = 0; if (CSRAdrM >= MHPMCOUNTERBASE+3 && CSRAdrM < MHPMCOUNTERBASE+`COUNTERS) CSRCReadValM = HPMCOUNTER_REGW[CSRAdrM-MHPMCOUNTERBASE]; else if (CSRAdrM >= HPMCOUNTERBASE+3 && CSRAdrM < HPMCOUNTERBASE+`COUNTERS) CSRCReadValM = HPMCOUNTER_REGW[CSRAdrM-HPMCOUNTERBASE]; @@ -309,7 +312,7 @@ module csrc #(parameter IllegalCSRCAccessM = 1; // no privileges for this csr CSRCReadValM = 0; end - end else begin + end else begin // not `ZICOUNTERS_SUPPORTED assign CSRCReadValM = 0; assign IllegalCSRCAccessM = 1; end @@ -356,20 +359,20 @@ module csrc #(parameter MPHMEVENTBASE = 12'h320, HPMCOUNTERBASE = 12'hC00, HPMCOUNTERHBASE = 12'hC80, - )(input logic clk, reset, - input logic StallD, StallE, StallM, StallW, - input logic InstrValidM, LoadStallD, CSRMWriteM, - input logic BPPredDirWrongM, - input logic BTBPredPCWrongM, - input logic RASPredPCWrongM, - input logic BPPredClassNonCFIWrongM, - input logic [4:0] InstrClassM, - input logic [11:0] CSRAdrM, - input logic [1:0] PrivilegeModeW, + )(input logic clk, reset, + input logic StallD, StallE, StallM, StallW, + input logic InstrValidM, LoadStallD, CSRMWriteM, + input logic BPPredDirWrongM, + input logic BTBPredPCWrongM, + input logic RASPredPCWrongM, + input logic BPPredClassNonCFIWrongM, + input logic [4:0] InstrClassM, + input logic [11:0] CSRAdrM, + input logic [1:0] PrivilegeModeW, input logic [`XLEN-1:0] CSRWriteValM, - input logic [31:0] MCOUNTINHIBIT_REGW, MCOUNTEREN_REGW, SCOUNTEREN_REGW, + input logic [31:0] MCOUNTINHIBIT_REGW, MCOUNTEREN_REGW, SCOUNTEREN_REGW, output logic [`XLEN-1:0] CSRCReadValM, - output logic IllegalCSRCAccessM); + output logic IllegalCSRCAccessM); // counters diff --git a/wally-pipelined/src/privileged/csri.sv b/wally-pipelined/src/privileged/csri.sv index 3b54d871a..7ef9051f7 100644 --- a/wally-pipelined/src/privileged/csri.sv +++ b/wally-pipelined/src/privileged/csri.sv @@ -79,14 +79,24 @@ module csri #(parameter assign SIP_WRITE_MASK = 12'h000; end always @(posedge clk, posedge reset) begin // *** I strongly feel that IntInM should go directly to IP_REGW -- Ben 9/7/21 - if (reset) IP_REGW_writeable <= 10'b0; + if (reset) + `ifdef CHECKPOINT + $readmemh({`LINUX_CHECKPOINT,"checkpoint-MIP.txt"}, IP_REGW_writeable); + `else + IP_REGW_writeable <= 10'b0; + `endif else if (WriteMIPM) IP_REGW_writeable <= (CSRWriteValM[9:0] & MIP_WRITE_MASK[9:0]) | IntInM[9:0]; // MTIP unclearable else if (WriteSIPM) IP_REGW_writeable <= (CSRWriteValM[9:0] & SIP_WRITE_MASK[9:0]) | IntInM[9:0]; // MTIP unclearable // else if (WriteUIPM) IP_REGW = (CSRWriteValM & 12'hBBB) | (NextIPM & 12'h080); // MTIP unclearable else IP_REGW_writeable <= IP_REGW_writeable | IntInM[9:0]; // *** check this turns off interrupts properly even when MIDELEG changes end always @(posedge clk, posedge reset) begin - if (reset) IE_REGW <= 12'b0; + if (reset) + `ifdef CHECKPOINT + $readmemh({`LINUX_CHECKPOINT,"checkpoint-MIE.txt"}, IE_REGW); + `else + IE_REGW <= 12'b0; + `endif else if (WriteMIEM) IE_REGW <= (CSRWriteValM[11:0] & 12'hAAA); // MIE controls M and S fields else if (WriteSIEM) IE_REGW <= (CSRWriteValM[11:0] & 12'h222) | (IE_REGW & 12'h888); // only S fields // else if (WriteUIEM) IE_REGW = (CSRWriteValM & 12'h111) | (IE_REGW & 12'hAAA); // only U field diff --git a/wally-pipelined/src/privileged/csrm.sv b/wally-pipelined/src/privileged/csrm.sv index a3baaaec4..f3f5d631b 100644 --- a/wally-pipelined/src/privileged/csrm.sv +++ b/wally-pipelined/src/privileged/csrm.sv @@ -85,15 +85,45 @@ module csrm #(parameter logic [`XLEN-1:0] MISA_REGW, MHARTID_REGW; logic [`XLEN-1:0] MSCRATCH_REGW, MCAUSE_REGW, MTVAL_REGW; - logic WriteMTVECM, WriteMEDELEGM, WriteMIDELEGM; - logic WriteMSCRATCHM, WriteMEPCM, WriteMCAUSEM, WriteMTVALM; - logic WriteMCOUNTERENM, WriteMCOUNTINHIBITM; + var [`XLEN-1:0] initMSCRATCH, initMCAUSE, initMEPC, initMTVEC, initMEDELEG, initMIDELEG; + var [31:0] initMCOUNTEREN, initMCOUNTINHIBIT; + var [`PMP_ENTRIES-1:0][7:0] initPMPCFG_ARRAY; + var [`PMP_ENTRIES-1:0][`XLEN-1:0] initPMPADDR_ARRAY; + + logic WriteMTVECM, WriteMEDELEGM, WriteMIDELEGM; + logic WriteMSCRATCHM, WriteMEPCM, WriteMCAUSEM, WriteMTVALM; + logic WriteMCOUNTERENM, WriteMCOUNTINHIBITM; logic [`PMP_ENTRIES-1:0] WritePMPCFGM; logic [`PMP_ENTRIES-1:0] WritePMPADDRM ; logic [`PMP_ENTRIES-1:0] ADDRLocked, CFGLocked; localparam MISA_26 = (`MISA) & 32'h03ffffff; + initial begin + `ifdef CHECKPOINT + $readmemh({`LINUX_CHECKPOINT,"checkpoint-MSCRATCH.txt"}, initMSCRATCH); + $readmemh({`LINUX_CHECKPOINT,"checkpoint-MCAUSE.txt"}, initMCAUSE); + $readmemh({`LINUX_CHECKPOINT,"checkpoint-MEPC.txt"}, initMEPC); + $readmemh({`LINUX_CHECKPOINT,"checkpoint-MTVEC.txt"}, initMTVEC); + $readmemh({`LINUX_CHECKPOINT,"checkpoint-MEDELEG.txt"}, initMEDELEG); + $readmemh({`LINUX_CHECKPOINT,"checkpoint-MIDELEG.txt"}, initMIDELEG); + $readmemh({`LINUX_CHECKPOINT,"checkpoint-MCOUNTEREN.txt"}, initMCOUNTEREN); + $readmemh({`LINUX_CHECKPOINT,"checkpoint-PMPCFG.txt"}, initPMPCFG_ARRAY); + $readmemh({`LINUX_CHECKPOINT,"checkpoint-PMPADDR.txt"}, initPMPADDR_ARRAY); + `else + initMSCRATCH = `XLEN'b0; + initMCAUSE = `XLEN'b0; + initMEPC = `XLEN'b0; + initMTVEC = `XLEN'b0; + initMEDELEG = `XLEN'b0; + initMIDELEG = `XLEN'b0; + initMCOUNTEREN = 32'b0; + initMCOUNTINHIBIT = 32'b0; + initPMPCFG_ARRAY = {`PMP_ENTRIES{8'b0}}; + initPMPADDR_ARRAY = {`PMP_ENTRIES{`XLEN'b0}}; + `endif + end + // MISA is hardwired. Spec says it could be written to disable features, but this is not supported by Wally assign MISA_REGW = {(`XLEN == 32 ? 2'b01 : 2'b10), {(`XLEN-28){1'b0}}, MISA_26[25:0]}; @@ -115,33 +145,31 @@ module csrm #(parameter assign IllegalCSRMWriteReadonlyM = CSRMWriteM && (CSRAdrM == MVENDORID || CSRAdrM == MARCHID || CSRAdrM == MIMPID || CSRAdrM == MHARTID); // CSRs - flopenl #(`XLEN) MTVECreg(clk, reset, WriteMTVECM, {CSRWriteValM[`XLEN-1:2], 1'b0, CSRWriteValM[0]}, `XLEN'b0, MTVEC_REGW); //busybear: changed reset value to 0 + flopenl #(`XLEN) MTVECreg(clk, reset, WriteMTVECM, {CSRWriteValM[`XLEN-1:2], 1'b0, CSRWriteValM[0]}, initMTVEC, MTVEC_REGW); //busybear: changed reset value to 0 generate if (`S_SUPPORTED | (`U_SUPPORTED & `N_SUPPORTED)) begin // DELEG registers should exist - flopenl #(`XLEN) MEDELEGreg(clk, reset, WriteMEDELEGM, CSRWriteValM & MEDELEG_MASK /*12'h7FF*/, `XLEN'b0, MEDELEG_REGW); - flopenl #(`XLEN) MIDELEGreg(clk, reset, WriteMIDELEGM, CSRWriteValM & MIDELEG_MASK /*12'h222*/, `XLEN'b0, MIDELEG_REGW); + flopenl #(`XLEN) MEDELEGreg(clk, reset, WriteMEDELEGM, CSRWriteValM & MEDELEG_MASK /*12'h7FF*/, initMEDELEG, MEDELEG_REGW); + flopenl #(`XLEN) MIDELEGreg(clk, reset, WriteMIDELEGM, CSRWriteValM & MIDELEG_MASK /*12'h222*/, initMIDELEG, MIDELEG_REGW); end else begin assign MEDELEG_REGW = 0; assign MIDELEG_REGW = 0; end endgenerate -// flopenl #(`XLEN) MIPreg(clk, reset, WriteMIPM, CSRWriteValM, zero, MIP_REGW); -// flopenl #(`XLEN) MIEreg(clk, reset, WriteMIEM, CSRWriteValM, zero, MIE_REGW); - flopenr #(`XLEN) MSCRATCHreg(clk, reset, WriteMSCRATCHM, CSRWriteValM, MSCRATCH_REGW); - flopenr #(`XLEN) MEPCreg(clk, reset, WriteMEPCM, NextEPCM, MEPC_REGW); - flopenr #(`XLEN) MCAUSEreg(clk, reset, WriteMCAUSEM, NextCauseM, MCAUSE_REGW); + flopenr #(`XLEN) MSCRATCHreg(clk, reset, WriteMSCRATCHM, CSRWriteValM, MSCRATCH_REGW, initMSCRATCH); + flopenr #(`XLEN) MEPCreg(clk, reset, WriteMEPCM, NextEPCM, MEPC_REGW, initMEPC); + flopenr #(`XLEN) MCAUSEreg(clk, reset, WriteMCAUSEM, NextCauseM, MCAUSE_REGW, initMCAUSE); if(`QEMU) assign MTVAL_REGW = `XLEN'b0; else flopenr #(`XLEN) MTVALreg(clk, reset, WriteMTVALM, NextMtvalM, MTVAL_REGW); generate if (`BUSYBEAR == 1) flopenl #(32) MCOUNTERENreg(clk, reset, WriteMCOUNTERENM, {CSRWriteValM[31:2],1'b0,CSRWriteValM[0]}, 32'b0, MCOUNTEREN_REGW); else if (`BUILDROOT == 1) - flopenl #(32) MCOUNTERENreg(clk, reset, WriteMCOUNTERENM, CSRWriteValM[31:0], 32'h0, MCOUNTEREN_REGW); + flopenl #(32) MCOUNTERENreg(clk, reset, WriteMCOUNTERENM, CSRWriteValM[31:0], initMCOUNTEREN, MCOUNTEREN_REGW); else flopenl #(32) MCOUNTERENreg(clk, reset, WriteMCOUNTERENM, CSRWriteValM[31:0], 32'hFFFFFFFF, MCOUNTEREN_REGW); endgenerate - flopenl #(32) MCOUNTINHIBITreg(clk, reset, WriteMCOUNTINHIBITM, CSRWriteValM[31:0], 32'h0, MCOUNTINHIBIT_REGW); + flopenl #(32) MCOUNTINHIBITreg(clk, reset, WriteMCOUNTINHIBITM, CSRWriteValM[31:0], initMCOUNTINHIBIT, MCOUNTINHIBIT_REGW); // There are PMP_ENTRIES = 0, 16, or 64 PMPADDR registers, each of which has its own flop @@ -158,14 +186,14 @@ module csrm #(parameter assign ADDRLocked[i] = PMPCFG_ARRAY_REGW[i][7] | (PMPCFG_ARRAY_REGW[i+1][7] & PMPCFG_ARRAY_REGW[i+1][4:3] == 2'b01); assign WritePMPADDRM[i] = (CSRMWriteM & (CSRAdrM == (PMPADDR0+i))) & ~StallW & ~ADDRLocked[i]; - flopenr #(`XLEN) PMPADDRreg(clk, reset, WritePMPADDRM[i], CSRWriteValM, PMPADDR_ARRAY_REGW[i]); + flopenr #(`XLEN) PMPADDRreg(clk, reset, WritePMPADDRM[i], CSRWriteValM, PMPADDR_ARRAY_REGW[i], initPMPADDR_ARRAY[i]); if (`XLEN==64) begin assign WritePMPCFGM[i] = (CSRMWriteM & (CSRAdrM == (PMPCFG0+2*(i/8)))) & ~StallW & ~CFGLocked[i]; - flopenr #(8) PMPCFGreg(clk, reset, WritePMPCFGM[i], CSRWriteValM[(i%8)*8+7:(i%8)*8], PMPCFG_ARRAY_REGW[i]); + flopenr #(8) PMPCFGreg(clk, reset, WritePMPCFGM[i], CSRWriteValM[(i%8)*8+7:(i%8)*8], PMPCFG_ARRAY_REGW[i], initPMPCFG_ARRAY[i]); end else begin assign WritePMPCFGM[i] = (CSRMWriteM & (CSRAdrM == (PMPCFG0+i/4))) & ~StallW & ~CFGLocked[i]; // assign WritePMPCFGHM[i] = (CSRMWriteM && (CSRAdrM == PMPCFG0+2*i+1)) && ~StallW; - flopenr #(8) PMPCFGreg(clk, reset, WritePMPCFGM[i], CSRWriteValM[(i%4)*8+7:(i%4)*8], PMPCFG_ARRAY_REGW[i]); + flopenr #(8) PMPCFGreg(clk, reset, WritePMPCFGM[i], CSRWriteValM[(i%4)*8+7:(i%4)*8], PMPCFG_ARRAY_REGW[i], initPMPCFG_ARRAY[i]); // flopenr #(`XLEN) PMPCFGHreg(clk, reset, WritePMPCFGHM[i], CSRWriteValM, PMPCFG_ARRAY_REGW[i][63:32]); end end diff --git a/wally-pipelined/src/privileged/csrs.sv b/wally-pipelined/src/privileged/csrs.sv index f3c9a4f94..2fffbced2 100644 --- a/wally-pipelined/src/privileged/csrs.sv +++ b/wally-pipelined/src/privileged/csrs.sv @@ -74,6 +74,30 @@ module csrs #(parameter logic WriteSSCRATCHM, WriteSEPCM; logic WriteSCAUSEM, WriteSTVALM, WriteSATPM, WriteSCOUNTERENM; logic [`XLEN-1:0] SSCRATCH_REGW, SCAUSE_REGW, STVAL_REGW; + var [`XLEN-1:0] initSSCRATCH, initSCAUSE, initSEPC, initSTVEC, initSEDELEG, initSIDELEG, initSATP; + var [31:0] initSCOUNTEREN; + + initial begin + `ifdef CHECKPOINT + $readmemh({`LINUX_CHECKPOINT,"checkpoint-SSCRATCH.txt"}, initSSCRATCH); + $readmemh({`LINUX_CHECKPOINT,"checkpoint-SCAUSE.txt"}, initSCAUSE); + $readmemh({`LINUX_CHECKPOINT,"checkpoint-SEPC.txt"}, initSEPC); + $readmemh({`LINUX_CHECKPOINT,"checkpoint-STVEC.txt"}, initSTVEC); + $readmemh({`LINUX_CHECKPOINT,"checkpoint-SEDELEG.txt"}, initSEDELEG); + $readmemh({`LINUX_CHECKPOINT,"checkpoint-SIDELEG.txt"}, initSIDELEG); + $readmemh({`LINUX_CHECKPOINT,"checkpoint-SCOUNTEREN.txt"}, initSCOUNTEREN); + $readmemh({`LINUX_CHECKPOINT,"checkpoint-SATP.txt"}, initSATP); + `else + initSSCRATCH = `XLEN'b0; + initSCAUSE = `XLEN'b0; + initSEPC = `XLEN'b0; + initSTVEC = `XLEN'b0; + initSEDELEG = `XLEN'b0; + initSIDELEG = `XLEN'b0; + initSCOUNTEREN = 32'b0; + initSATP = `XLEN'b0; + `endif + end assign WriteSSTATUSM = CSRSWriteM && (CSRAdrM == SSTATUS) && ~StallW; assign WriteSTVECM = CSRSWriteM && (CSRAdrM == STVEC) && ~StallW; @@ -85,28 +109,28 @@ module csrs #(parameter assign WriteSCOUNTERENM = CSRSWriteM && (CSRAdrM == SCOUNTEREN) && ~StallW; // CSRs - flopenl #(`XLEN) STVECreg(clk, reset, WriteSTVECM, {CSRWriteValM[`XLEN-1:2], 1'b0, CSRWriteValM[0]}, `XLEN'b0, STVEC_REGW); //busybear: change reset to 0 - flopenr #(`XLEN) SSCRATCHreg(clk, reset, WriteSSCRATCHM, CSRWriteValM, SSCRATCH_REGW); - flopenr #(`XLEN) SEPCreg(clk, reset, WriteSEPCM, NextEPCM, SEPC_REGW); - flopenl #(`XLEN) SCAUSEreg(clk, reset, WriteSCAUSEM, NextCauseM, `XLEN'b0, SCAUSE_REGW); + flopenl #(`XLEN) STVECreg(clk, reset, WriteSTVECM, {CSRWriteValM[`XLEN-1:2], 1'b0, CSRWriteValM[0]}, initSTVEC, STVEC_REGW); //busybear: change reset to 0 + flopenr #(`XLEN) SSCRATCHreg(clk, reset, WriteSSCRATCHM, CSRWriteValM, SSCRATCH_REGW, initSSCRATCH); + flopenr #(`XLEN) SEPCreg(clk, reset, WriteSEPCM, NextEPCM, SEPC_REGW, initSEPC); + flopenl #(`XLEN) SCAUSEreg(clk, reset, WriteSCAUSEM, NextCauseM, initSCAUSE, SCAUSE_REGW); if(`QEMU) assign STVAL_REGW = `XLEN'b0; else flopenr #(`XLEN) STVALreg(clk, reset, WriteSTVALM, NextMtvalM, STVAL_REGW); if (`MEM_VIRTMEM) - flopenr #(`XLEN) SATPreg(clk, reset, WriteSATPM, CSRWriteValM, SATP_REGW); + flopenr #(`XLEN) SATPreg(clk, reset, WriteSATPM, CSRWriteValM, SATP_REGW, initSATP); else assign SATP_REGW = 0; // hardwire to zero if virtual memory not supported if (`BUSYBEAR == 1) flopenl #(32) SCOUNTERENreg(clk, reset, WriteSCOUNTERENM, {CSRWriteValM[31:2],1'b0,CSRWriteValM[0]}, 32'b0, SCOUNTEREN_REGW); else if (`BUILDROOT == 1) - flopenl #(32) SCOUNTERENreg(clk, reset, WriteSCOUNTERENM, CSRWriteValM[31:0], 32'h0, SCOUNTEREN_REGW); + flopenl #(32) SCOUNTERENreg(clk, reset, WriteSCOUNTERENM, CSRWriteValM[31:0], initSCOUNTEREN, SCOUNTEREN_REGW); else flopenl #(32) SCOUNTERENreg(clk, reset, WriteSCOUNTERENM, CSRWriteValM[31:0], 32'hFFFFFFFF, SCOUNTEREN_REGW); if (`N_SUPPORTED) begin logic WriteSEDELEGM, WriteSIDELEGM; assign WriteSEDELEGM = CSRSWriteM && (CSRAdrM == SEDELEG); assign WriteSIDELEGM = CSRSWriteM && (CSRAdrM == SIDELEG); - flopenl #(`XLEN) SEDELEGreg(clk, reset, WriteSEDELEGM, CSRWriteValM & SEDELEG_MASK /* 12'h1FF */, `XLEN'b0, SEDELEG_REGW); - flopenl #(`XLEN) SIDELEGreg(clk, reset, WriteSIDELEGM, CSRWriteValM, `XLEN'b0, SIDELEG_REGW); + flopenl #(`XLEN) SEDELEGreg(clk, reset, WriteSEDELEGM, CSRWriteValM & SEDELEG_MASK /* 12'h1FF */, initSEDELEG, SEDELEG_REGW); + flopenl #(`XLEN) SIDELEGreg(clk, reset, WriteSIDELEGM, CSRWriteValM, initSIDELEG, SIDELEG_REGW); end else begin assign SEDELEG_REGW = 0; assign SIDELEG_REGW = 0; diff --git a/wally-pipelined/src/privileged/csrsr.sv b/wally-pipelined/src/privileged/csrsr.sv index dfa2132d9..113515b26 100644 --- a/wally-pipelined/src/privileged/csrsr.sv +++ b/wally-pipelined/src/privileged/csrsr.sv @@ -46,6 +46,15 @@ module csrsr ( logic [1:0] STATUS_SXL, STATUS_UXL, STATUS_XS, STATUS_FS, STATUS_FS_INT, STATUS_MPP_NEXT; logic STATUS_MPIE, STATUS_SPIE, STATUS_UPIE, STATUS_UIE; + var [`XLEN-1:0] initMSTATUS; + initial begin + `ifdef CHECKPOINT + $readmemh({`LINUX_CHECKPOINT,"checkpoint-MSTATUS.txt"}, initMSTATUS); + `else + initMSTATUS = `XLEN'b0; + `endif + end + // STATUS REGISTER FIELD // See Privileged Spec Section 3.1.6 // Lower privilege status registers are a subset of the full status register @@ -108,23 +117,33 @@ module csrsr ( // registers for STATUS bits // complex register with reset, write enable, and the ability to update other bits in certain cases + // these null things are needed to make the following LHS assignment legal; this is probably a crappy way of doing things always_ff @(posedge clk, posedge reset) if (reset) begin - STATUS_TSR_INT <= #1 0; - STATUS_TW_INT <= #1 0; - STATUS_TVM_INT <= #1 0; - STATUS_MXR_INT <= #1 0; - STATUS_SUM_INT <= #1 0; - STATUS_MPRV_INT <= #1 0; // Per Priv 3.3 - STATUS_FS_INT <= #1 0; //2'b01; // busybear: change all these reset values to 0 - STATUS_MPP <= #1 0; //`M_MODE; - STATUS_SPP <= #1 0; //1'b1; - STATUS_MPIE <= #1 0; //1; - STATUS_SPIE <= #1 0; //`S_SUPPORTED; - STATUS_UPIE <= #1 0; // `U_SUPPORTED; - STATUS_MIE <= #1 0; // Per Priv 3.3 - STATUS_SIE <= #1 0; //`S_SUPPORTED; - STATUS_UIE <= #1 0; //`U_SUPPORTED; + //STATUS_TSR_INT <= #1 0; + //STATUS_TW_INT <= #1 0; + //STATUS_TVM_INT <= #1 0; + //STATUS_MXR_INT <= #1 0; + //STATUS_SUM_INT <= #1 0; + //STATUS_MPRV_INT <= #1 0; // Per Priv 3.3 + //STATUS_FS_INT <= #1 0; //2'b01; // busybear: change all these reset values to 0 + //STATUS_MPP <= #1 0; //`M_MODE; + //STATUS_SPP <= #1 0; //1'b1; + //STATUS_MPIE <= #1 0; //1; + //STATUS_SPIE <= #1 0; //`S_SUPPORTED; + //STATUS_UPIE <= #1 0; // `U_SUPPORTED; + //STATUS_MIE <= #1 0; // Per Priv 3.3 + //STATUS_SIE <= #1 0; //`S_SUPPORTED; + //STATUS_UIE <= #1 0; //`U_SUPPORTED; + // + // *** this assumes XLEN == 64. + // I don't like using generates to respond to XLEN. + // I'd rather have an XLEN64 so that we could use `ifdefs -- Ben 9/21 + {STATUS_TSR_INT,STATUS_TW_INT,STATUS_TVM_INT,STATUS_MXR_INT,STATUS_SUM_INT,STATUS_MPRV_INT} <= #1 initMSTATUS[22:17]; + {STATUS_FS_INT,STATUS_MPP} <= #1 initMSTATUS[14:11]; + {STATUS_SPP,STATUS_MPIE} <= #1 initMSTATUS[8:7]; + {STATUS_SPIE,STATUS_UPIE,STATUS_MIE} <= #1 initMSTATUS[5:3]; + {STATUS_SIE,STATUS_UIE} <= #1 initMSTATUS[1:0]; end else if (~StallW) begin if (FRegWriteM | WriteFRMM | WriteFFLAGSM) STATUS_FS_INT <= #12'b11; // mark Float State dirty *** this should happen in M stage, be part of if/else; diff --git a/wally-pipelined/testbench/testbench-linux.sv b/wally-pipelined/testbench/testbench-linux.sv index 76a1841b8..7dbed0e09 100644 --- a/wally-pipelined/testbench/testbench-linux.sv +++ b/wally-pipelined/testbench/testbench-linux.sv @@ -27,6 +27,9 @@ `include "wally-config.vh" +//`define CHECKPOINT +`define LINUX_CHECKPOINT "../linux-testgen/linux-testvectors/checkpoint1K" + `define DEBUG_TRACE 0 // Debug Levels // 0: don't check against QEMU @@ -408,7 +411,11 @@ module testbench(); // initial loading of memories initial begin $readmemh({`LINUX_TEST_VECTORS,"bootmem.txt"}, dut.uncore.bootdtim.bootdtim.RAM, 'h1000 >> 3); - $readmemh({`LINUX_TEST_VECTORS,"ram.txt"}, dut.uncore.dtim.RAM); + `ifdef CHECKPOINT + $readmemh({`LINUX_CHECKPOINT,"ram.txt"}, dut.uncore.dtim.RAM); + `else + $readmemh({`LINUX_TEST_VECTORS,"ram.txt"}, dut.uncore.dtim.RAM); + `endif $readmemb(`TWO_BIT_PRELOAD, dut.hart.ifu.bpred.bpred.Predictor.DirPredictor.PHT.memory); $readmemb(`BTB_PRELOAD, dut.hart.ifu.bpred.bpred.TargetPredictor.memory.memory); ProgramAddrMapFile = {`LINUX_TEST_VECTORS,"vmlinux.objdump.addr"}; From e1ad732178c1d055e37f42c1cc9c5e483766d15e Mon Sep 17 00:00:00 2001 From: David Harris Date: Thu, 30 Sep 2021 12:17:24 -0400 Subject: [PATCH 10/36] SRT Division unsigned passing Imperas tests --- wally-pipelined/regression/wally-pipelined.do | 2 +- wally-pipelined/src/generic/abs.sv | 38 ++++++++++++ wally-pipelined/src/generic/neg.sv | 34 ++++++++++ wally-pipelined/src/ieu/forward.sv | 3 +- .../src/muldiv/intdiv_restoring.sv | 45 ++++++++++---- wally-pipelined/src/muldiv/muldiv.sv | 62 +++++-------------- .../testbench/common/instrTrackerTB.sv | 2 +- .../testbench/testbench-imperas.sv | 18 +++--- 8 files changed, 135 insertions(+), 69 deletions(-) create mode 100644 wally-pipelined/src/generic/abs.sv create mode 100644 wally-pipelined/src/generic/neg.sv diff --git a/wally-pipelined/regression/wally-pipelined.do b/wally-pipelined/regression/wally-pipelined.do index 861657308..76e3d8668 100644 --- a/wally-pipelined/regression/wally-pipelined.do +++ b/wally-pipelined/regression/wally-pipelined.do @@ -43,7 +43,7 @@ view wave do ./wave-dos/peripheral-waves.do -- Run the Simulation -#run 5000 +#run 3600 run -all #quit noview ../testbench/testbench-imperas.sv diff --git a/wally-pipelined/src/generic/abs.sv b/wally-pipelined/src/generic/abs.sv new file mode 100644 index 000000000..7ddbd38b6 --- /dev/null +++ b/wally-pipelined/src/generic/abs.sv @@ -0,0 +1,38 @@ +/////////////////////////////////////////// +// neg.sv +// +// Written: David_Harris@hmc.edu 28 September 2021 +// Modified: +// +// Purpose: 2's complement negator +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +`include "wally-config.vh" + +module abs #(parameter WIDTH = 8) ( + input logic [WIDTH-1:0] a, + output logic [WIDTH-1:0] y); + + logic [WIDTH-1:0] minusa; + + // select -a if sign bit of a is 1 + neg #(WIDTH) neg(a, minusa); + mux2 #(WIDTH) absmux(a, minusa, a[WIDTH-1], y); +endmodule + diff --git a/wally-pipelined/src/generic/neg.sv b/wally-pipelined/src/generic/neg.sv new file mode 100644 index 000000000..a162a5c92 --- /dev/null +++ b/wally-pipelined/src/generic/neg.sv @@ -0,0 +1,34 @@ +/////////////////////////////////////////// +// neg.sv +// +// Written: David_Harris@hmc.edu 28 September 2021 +// Modified: +// +// Purpose: 2's complement negator +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +`include "wally-config.vh" + +module neg #(parameter WIDTH = 8) ( + input logic [WIDTH-1:0] a, + output logic [WIDTH-1:0] y); + + assign y = ~a + 1; +endmodule + diff --git a/wally-pipelined/src/ieu/forward.sv b/wally-pipelined/src/ieu/forward.sv index e7b3ff247..47a649f85 100644 --- a/wally-pipelined/src/ieu/forward.sv +++ b/wally-pipelined/src/ieu/forward.sv @@ -33,6 +33,7 @@ module forward( input logic DivDoneE, DivBusyE, input logic FWriteIntE, FWriteIntM, FWriteIntW, input logic SCE, + input logic StallD, // Forwarding controls output logic [1:0] ForwardAE, ForwardBE, output logic FPUStallD, LoadStallD, MulDivStallD, CSRRdStallD @@ -53,7 +54,7 @@ module forward( // Stall on dependent operations that finish in Mem Stage and can't bypass in time assign FPUStallD = FWriteIntE & ((Rs1D == RdE) | (Rs2D == RdE)); assign LoadStallD = (MemReadE|SCE) & ((Rs1D == RdE) | (Rs2D == RdE)); - assign MulDivStallD = MulDivE & ((Rs1D == RdE) | (Rs2D == RdE)) | MulDivE | DivBusyE; // *** extend with stalls for divide + assign MulDivStallD = MulDivE & ((Rs1D == RdE) | (Rs2D == RdE)) /*| DivBusyE */; // *** extend with stalls for divide assign CSRRdStallD = CSRReadE & ((Rs1D == RdE) | (Rs2D == RdE)); endmodule diff --git a/wally-pipelined/src/muldiv/intdiv_restoring.sv b/wally-pipelined/src/muldiv/intdiv_restoring.sv index 9571ba721..e6118cd40 100644 --- a/wally-pipelined/src/muldiv/intdiv_restoring.sv +++ b/wally-pipelined/src/muldiv/intdiv_restoring.sv @@ -35,32 +35,52 @@ module intdiv_restoring ( output logic [`XLEN-1:0] Q, REM ); - logic [`XLEN-1:0] W, Win, Wshift, Wprime, Wnext, XQ, XQin, XQshift; - logic qi; // curent quotient bit + logic [`XLEN-1:0] W, Win, Wshift, Wprime, Wnext, XQ, XQin, XQshift, Dsaved, Din, Dabs, D2, Xabs, Xinit; + logic qi, qib; // curent quotient bit localparam STEPBITS = $clog2(`XLEN); logic [STEPBITS:0] step; logic div0; + // Setup for signed division + abs #(`XLEN) absd(D, Dabs); + mux2 #(`XLEN) dabsmux(D, Dabs, signedDivide, D2); + flopen #(`XLEN) dsavereg(clk, start, D2, Dsaved); + mux2 #(`XLEN) dfirstmux(Dsaved, D, start, Din); // *** change start to init (could be delayed one from start) + + abs #(`XLEN) absx(X, Xabs); + mux2 #(`XLEN) xabsmux(X, Xabs, signedDivide, Xinit); + // restoring division mux2 #(`XLEN) wmux(W, 0, start, Win); - mux2 #(`XLEN) xmux(0, X, start, XQin); + mux2 #(`XLEN) xmux(XQ, Xinit, start, XQin); assign {Wshift, XQshift} = {Win[`XLEN-2:0], XQin, qi}; - assign {qi, Wprime} = Wshift - D; // subtractor, carry out determines quotient bit + assign {qib, Wprime} = {1'b0, Wshift} + ~{1'b0, Din} + 1; // subtractor, carry out determines quotient bit + assign qi = ~qib; mux2 #(`XLEN) wrestoremux(Wshift, Wprime, qi, Wnext); - flopen #(`XLEN) wreg(clk, busy, Wnext, W); - flopen #(`XLEN) xreg(clk, busy, XQshift, XQ); + flopen #(`XLEN) wreg(clk, start | busy, Wnext, W); + flopen #(`XLEN) xreg(clk, start | busy, XQshift, XQ); + + // save D, which comes from SrcAE forwarding mux and could change because register file read is stalled during divide + // flopen #(`XLEN) dreg(clk, start, D, Dsaved); + //mux2 #(`XLEN) dmux(Dsaved, D, start, Din); // outputs // *** sign extension, handling W instructions - assign div0 = (D == 0); + assign div0 = (Din == 0); mux2 #(`XLEN) qmux(XQ, {`XLEN{1'b1}}, div0, Q); // Q taken from XQ register, or all 1s when dividing by zero mux2 #(`XLEN) remmux(W, X, div0, REM); // REM taken from W register, or from X when dividing by zero + // busy logic - always_ff @(posedge clk) - if (start) begin - busy = 1; done = 0; step = 0; - end else if (busy) begin + always_ff @(posedge clk) + if (reset) begin + busy = 0; done = 0; step = 0; + end else if (start) begin + if (div0) done = 1; + else begin + busy = 1; done = 0; step = 1; + end + end else if (busy & ~done) begin step = step + 1; if (step[STEPBITS] | div0) begin // *** early terminate on division by 0 step = 0; @@ -69,7 +89,10 @@ module intdiv_restoring ( end end else if (done) begin done = 0; + busy = 0; end + + endmodule // muldiv diff --git a/wally-pipelined/src/muldiv/muldiv.sv b/wally-pipelined/src/muldiv/muldiv.sv index 714f7ebe7..75ac11f3d 100644 --- a/wally-pipelined/src/muldiv/muldiv.sv +++ b/wally-pipelined/src/muldiv/muldiv.sv @@ -50,14 +50,13 @@ module muldiv ( logic [`XLEN*2-1:0] ProdE; logic enable_q; - logic [2:0] Funct3E_Q; + //logic [2:0] Funct3E_Q; logic div0error; // ***unused - logic [`XLEN-1:0] N, D; - logic [`XLEN-1:0] Num0, Den0; + logic [`XLEN-1:0] X, D; + //logic [`XLEN-1:0] Num0, Den0; logic gclk; - logic DivStartE; - logic startDivideE; + logic startDivideE, busy; logic signedDivide; // Multiplier @@ -72,37 +71,21 @@ module muldiv ( // Handle sign extension for W-type instructions if (`XLEN == 64) begin // RV64 has W-type instructions - assign Num0 = W64E ? {{32{SrcAE[31]&signedDivide}}, SrcAE[31:0]} : SrcAE; - assign Den0 = W64E ? {{32{SrcBE[31]&signedDivide}}, SrcBE[31:0]} : SrcBE; + assign X = W64E ? {{32{SrcAE[31]&signedDivide}}, SrcAE[31:0]} : SrcAE; + assign D = W64E ? {{32{SrcBE[31]&signedDivide}}, SrcBE[31:0]} : SrcBE; end else begin // RV32 has no W-type instructions - assign Num0 = SrcAE; - assign Den0 = SrcBE; + assign X = SrcAE; + assign D = SrcBE; end - // capture the Numerator/Denominator - flopenrc #(`XLEN) reg_num (.d(Num0), .q(N), - .en(startDivideE), .clear(DivDoneE), - .reset(reset), .clk(~gclk)); - flopenrc #(`XLEN) reg_den (.d(Den0), .q(D), - .en(startDivideE), .clear(DivDoneE), - .reset(reset), .clk(~gclk)); - - assign signedDivide = (Funct3E[2]&~Funct3E[1]&~Funct3E[0]) | (Funct3E[2]&Funct3E[1]&~Funct3E[0]); - intdiv #(`XLEN) div (QuotE, RemE, DivDoneE, DivBusyE, div0error, N, D, gclk, reset, startDivideE, signedDivide); - //intdiv_restoring div(.clk, .reset, .signedDivide, .start(startDivideE), .X(N), .D(D), .busy(DivBusyE), .done(DivDoneE), .Q(QuotE), .REM(RemE)); + assign signedDivide = ~Funct3E[0]; // simplified from (Funct3E[2]&~Funct3E[1]&~Funct3E[0]) | (Funct3E[2]&Funct3E[1]&~Funct3E[0]); + //intdiv #(`XLEN) div (QuotE, RemE, DivDoneE, DivBusyE, div0error, N, D, gclk, reset, startDivideE, signedDivide); + intdiv_restoring div(.clk, .reset, .signedDivide, .start(startDivideE), .X(X), .D(D), .busy(busy), .done(DivDoneE), .Q(QuotE), .REM(RemE)); - // Added for debugging of start signal for divide - assign startDivideE = MulDivE&DivStartE&~DivBusyE; - - // capture the start control signals since they are not held constant. - // *** appears to be unused - flopenrc #(3) funct3ereg (.d(Funct3E), - .q(Funct3E_Q), - .en(DivStartE), - .clear(DivDoneE), - .reset(reset), - .clk(clk)); - + // Start a divide when a new division instruction is received and the divider isn't already busy or finishing + assign startDivideE = MulDivE & Funct3E[2] & ~busy & ~DivDoneE; + assign DivBusyE = startDivideE | busy; + // Select result always_comb case (Funct3E) @@ -115,19 +98,6 @@ module muldiv ( 3'b110: PrelimResultE = RemE; 3'b111: PrelimResultE = RemE; endcase // case (Funct3E) - - // Start Divide process. This simplifies to DivStartE = Funct3E[2]; - always_comb - case (Funct3E) - 3'b000: DivStartE = 1'b0; - 3'b001: DivStartE = 1'b0; - 3'b010: DivStartE = 1'b0; - 3'b011: DivStartE = 1'b0; - 3'b100: DivStartE = 1'b1; - 3'b101: DivStartE = 1'b1; - 3'b110: DivStartE = 1'b1; - 3'b111: DivStartE = 1'b1; - endcase // Handle sign extension for W-type instructions if (`XLEN == 64) begin // RV64 has W-type instructions @@ -136,7 +106,7 @@ module muldiv ( assign MulDivResultE = PrelimResultE; end - flopenrc #(`XLEN) MulDivResultMReg(clk, reset, FlushM, ~StallM, MulDivResultE, MulDivResultM); + flopenrc #(`XLEN) MulDivResultMReg(clk, reset, FlushM, ~StallM, MulDivResultE, MulDivResultM); // could let part of multiplication spill into Memory stage flopenrc #(`XLEN) MulDivResultWReg(clk, reset, FlushW, ~StallW, MulDivResultM, MulDivResultW); end else begin // no M instructions supported diff --git a/wally-pipelined/testbench/common/instrTrackerTB.sv b/wally-pipelined/testbench/common/instrTrackerTB.sv index 0283f6502..2b0ca7c50 100644 --- a/wally-pipelined/testbench/common/instrTrackerTB.sv +++ b/wally-pipelined/testbench/common/instrTrackerTB.sv @@ -13,5 +13,5 @@ module instrTrackerTB( instrNameDecTB ddec(InstrD, InstrDName); instrNameDecTB edec(InstrE, InstrEName); instrNameDecTB mdec(InstrM, InstrMName); - instrNameDecTB wdec(InstrW, InstrWName); + instrNameDecTB wdec(InstrW, InstrWName); // *** delete this because InstrW is deleted from IFU endmodule diff --git a/wally-pipelined/testbench/testbench-imperas.sv b/wally-pipelined/testbench/testbench-imperas.sv index 318140769..50b447039 100644 --- a/wally-pipelined/testbench/testbench-imperas.sv +++ b/wally-pipelined/testbench/testbench-imperas.sv @@ -318,14 +318,14 @@ string tests32f[] = '{ }; string tests32m[] = '{ + "rv32m/I-DIVU-01", "2000", + "rv32m/I-REMU-01", "2000", + "rv32m/I-DIV-01", "2000", + "rv32m/I-REM-01", "2000", "rv32m/I-MUL-01", "2000", "rv32m/I-MULH-01", "2000", "rv32m/I-MULHSU-01", "2000", - "rv32m/I-MULHU-01", "2000", - "rv32m/I-DIV-01", "2000", - "rv32m/I-DIVU-01", "2000", - "rv32m/I-REM-01", "2000", - "rv32m/I-REMU-01", "2000" + "rv32m/I-MULHU-01", "2000" }; string tests32ic[] = '{ @@ -551,12 +551,12 @@ string tests32f[] = '{ tests = tests32p; else begin tests = {tests32i, tests32p};//,tests32periph}; *** broken at the moment - if (`C_SUPPORTED % 2 == 1) tests = {tests, tests32ic}; + if (`C_SUPPORTED) tests = {tests, tests32ic}; else tests = {tests, tests32iNOc}; - if (`M_SUPPORTED % 2 == 1) tests = {tests, tests32m}; if (`F_SUPPORTED) tests = {tests32f, tests}; if (`MEM_VIRTMEM) tests = {tests32mmu, tests}; if (`A_SUPPORTED) tests = {tests32a, tests}; + if (`M_SUPPORTED) tests = {tests32m, tests}; end end end @@ -607,9 +607,9 @@ string tests32f[] = '{ end // read test vectors into memory memfilename = {"../../imperas-riscv-tests/work/", tests[test], ".elf.memfile"}; - romfilename = {"../../imperas-riscv-tests/imperas-boottim.txt"}; +// romfilename = {"../../imperas-riscv-tests/imperas-boottim.txt"}; $readmemh(memfilename, dut.uncore.dtim.RAM); - $readmemh(romfilename, dut.uncore.bootdtim.bootdtim.RAM); +// $readmemh(romfilename, dut.uncore.bootdtim.bootdtim.RAM); ProgramAddrMapFile = {"../../imperas-riscv-tests/work/", tests[test], ".elf.objdump.addr"}; ProgramLabelMapFile = {"../../imperas-riscv-tests/work/", tests[test], ".elf.objdump.lab"}; $display("Read memfile %s", memfilename); From 953c8931edde69dff5ea2c9df8375736a3e361cd Mon Sep 17 00:00:00 2001 From: David Harris Date: Thu, 30 Sep 2021 15:24:43 -0400 Subject: [PATCH 11/36] RV32 div/rem working signed and unsigned --- .../src/muldiv/intdiv_restoring.sv | 44 ++++++++++++++----- 1 file changed, 33 insertions(+), 11 deletions(-) diff --git a/wally-pipelined/src/muldiv/intdiv_restoring.sv b/wally-pipelined/src/muldiv/intdiv_restoring.sv index e6118cd40..65f843d83 100644 --- a/wally-pipelined/src/muldiv/intdiv_restoring.sv +++ b/wally-pipelined/src/muldiv/intdiv_restoring.sv @@ -35,11 +35,12 @@ module intdiv_restoring ( output logic [`XLEN-1:0] Q, REM ); - logic [`XLEN-1:0] W, Win, Wshift, Wprime, Wnext, XQ, XQin, XQshift, Dsaved, Din, Dabs, D2, Xabs, Xinit; + logic [`XLEN-1:0] W, W2, Win, Wshift, Wprime, Wn, Wnn, Wnext, XQ, XQin, XQshift, XQn, XQnn, XQnext, Dsaved, Din, Dabs, D2, Xabs, Xinit; logic qi, qib; // curent quotient bit localparam STEPBITS = $clog2(`XLEN); logic [STEPBITS:0] step; logic div0; + logic negate, init, startd, SignX, SignD, NegW, NegQ; // Setup for signed division abs #(`XLEN) absd(D, Dabs); @@ -51,14 +52,22 @@ module intdiv_restoring ( mux2 #(`XLEN) xabsmux(X, Xabs, signedDivide, Xinit); // restoring division - mux2 #(`XLEN) wmux(W, 0, start, Win); - mux2 #(`XLEN) xmux(XQ, Xinit, start, XQin); + mux2 #(`XLEN) wmux(W, {`XLEN{1'b0}}, init, Win); + mux2 #(`XLEN) xmux(XQ, Xinit, init, XQin); assign {Wshift, XQshift} = {Win[`XLEN-2:0], XQin, qi}; assign {qib, Wprime} = {1'b0, Wshift} + ~{1'b0, Din} + 1; // subtractor, carry out determines quotient bit assign qi = ~qib; - mux2 #(`XLEN) wrestoremux(Wshift, Wprime, qi, Wnext); + mux2 #(`XLEN) wrestoremux(Wshift, Wprime, qi, W2); + + // conditionally negate outputs at end of signed operation + neg #(`XLEN) wneg(W, Wn); + mux2 #(`XLEN) wnegmux(W, Wn, NegW, Wnn); + mux2 #(`XLEN) wnextmux(W2, Wnn, negate, Wnext); + neg #(`XLEN) qneg(XQ, XQn); + mux2 #(`XLEN) qnegmux(XQ, XQn, NegQ, XQnn); + mux2 #(`XLEN) qnextmux(XQshift, XQnn, negate, XQnext); flopen #(`XLEN) wreg(clk, start | busy, Wnext, W); - flopen #(`XLEN) xreg(clk, start | busy, XQshift, XQ); + flopen #(`XLEN) xreg(clk, start | busy, XQnext, XQ); // save D, which comes from SrcAE forwarding mux and could change because register file read is stalled during divide // flopen #(`XLEN) dreg(clk, start, D, Dsaved); @@ -70,29 +79,42 @@ module intdiv_restoring ( mux2 #(`XLEN) qmux(XQ, {`XLEN{1'b1}}, div0, Q); // Q taken from XQ register, or all 1s when dividing by zero mux2 #(`XLEN) remmux(W, X, div0, REM); // REM taken from W register, or from X when dividing by zero - // busy logic always_ff @(posedge clk) if (reset) begin - busy = 0; done = 0; step = 0; + busy = 0; done = 0; step = 0; negate = 0; end else if (start) begin if (div0) done = 1; else begin - busy = 1; done = 0; step = 1; + busy = 1; step = 1; end - end else if (busy & ~done) begin + end else if (busy & ~done & ~(startd & signedDivide)) begin // pause one cycle at beginning of signed operations for absolute value step = step + 1; - if (step[STEPBITS] | div0) begin // *** early terminate on division by 0 + if (step[STEPBITS]) begin // *** early terminate on division by 0 + if (signedDivide & ~negate) begin + negate = 1; + end else begin step = 0; busy = 0; + negate = 0; done = 1; + end end end else if (done) begin done = 0; busy = 0; + negate = 0; end - + // initialize on the start cycle for unsigned operations, or one cycle later for signed operations (giving time for abs) + flop #(1) initflop(clk, start, startd); + mux2 #(1) initmux(start, startd, signedDivide, init); + + // save signs of original inputs + flopen #(2) signflops(clk, start, {D[`XLEN-1], X[`XLEN-1]}, {SignD, SignX}); + // On final setp of signed operations, negate outputs as needed + assign NegW = SignX & negate; + assign NegQ = (SignX ^ SignD) & negate; endmodule // muldiv From a8573a27d4013bed82f2ea603928220699ca42d1 Mon Sep 17 00:00:00 2001 From: David Harris Date: Thu, 30 Sep 2021 20:07:22 -0400 Subject: [PATCH 12/36] Integer Divide/Rem passing all regression. --- .../src/muldiv/intdiv_restoring.sv | 27 +++++++++++-------- wally-pipelined/src/muldiv/muldiv.sv | 2 +- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/wally-pipelined/src/muldiv/intdiv_restoring.sv b/wally-pipelined/src/muldiv/intdiv_restoring.sv index 65f843d83..ea337c188 100644 --- a/wally-pipelined/src/muldiv/intdiv_restoring.sv +++ b/wally-pipelined/src/muldiv/intdiv_restoring.sv @@ -28,6 +28,7 @@ module intdiv_restoring ( input logic clk, input logic reset, + input logic StallM, input logic signedDivide, input logic start, input logic [`XLEN-1:0] X, D, @@ -35,7 +36,7 @@ module intdiv_restoring ( output logic [`XLEN-1:0] Q, REM ); - logic [`XLEN-1:0] W, W2, Win, Wshift, Wprime, Wn, Wnn, Wnext, XQ, XQin, XQshift, XQn, XQnn, XQnext, Dsaved, Din, Dabs, D2, Xabs, Xinit; + logic [`XLEN-1:0] W, W2, Win, Wshift, Wprime, Wn, Wnn, Wnext, XQ, XQin, XQshift, XQn, XQnn, XQnext, Dsaved, Din, Dabs, D2, Xabs, X2, Xsaved, Xinit; logic qi, qib; // curent quotient bit localparam STEPBITS = $clog2(`XLEN); logic [STEPBITS:0] step; @@ -46,10 +47,12 @@ module intdiv_restoring ( abs #(`XLEN) absd(D, Dabs); mux2 #(`XLEN) dabsmux(D, Dabs, signedDivide, D2); flopen #(`XLEN) dsavereg(clk, start, D2, Dsaved); - mux2 #(`XLEN) dfirstmux(Dsaved, D, start, Din); // *** change start to init (could be delayed one from start) + mux2 #(`XLEN) dfirstmux(Dsaved, D, start, Din); abs #(`XLEN) absx(X, Xabs); - mux2 #(`XLEN) xabsmux(X, Xabs, signedDivide, Xinit); + mux2 #(`XLEN) xabsmux(X, Xabs, signedDivide & ~div0, X2); // need original X as remainder if doing divide by 0 + flopen #(`XLEN) xsavereg(clk, start, X2, Xsaved); + mux2 #(`XLEN) xfirstmux(Xsaved, X, start, Xinit); // restoring division mux2 #(`XLEN) wmux(W, {`XLEN{1'b0}}, init, Win); @@ -61,13 +64,15 @@ module intdiv_restoring ( // conditionally negate outputs at end of signed operation neg #(`XLEN) wneg(W, Wn); - mux2 #(`XLEN) wnegmux(W, Wn, NegW, Wnn); - mux2 #(`XLEN) wnextmux(W2, Wnn, negate, Wnext); +// mux2 #(`XLEN) wnegmux(W, Wn, NegW, Wnn); +// mux2 #(`XLEN) wnextmux(W2, Wnn, negate, Wnext); + mux2 #(`XLEN) wnextmux(W2, Wn, NegW, Wnext); neg #(`XLEN) qneg(XQ, XQn); - mux2 #(`XLEN) qnegmux(XQ, XQn, NegQ, XQnn); - mux2 #(`XLEN) qnextmux(XQshift, XQnn, negate, XQnext); - flopen #(`XLEN) wreg(clk, start | busy, Wnext, W); - flopen #(`XLEN) xreg(clk, start | busy, XQnext, XQ); +// mux2 #(`XLEN) qnegmux(XQ, XQn, NegQ, XQnn); +// mux2 #(`XLEN) qnextmux(XQshift, XQnn, negate, XQnext); + mux2 #(`XLEN) qnextmux(XQshift, XQn, NegQ, XQnext); + flopen #(`XLEN) wreg(clk, start | (busy & (~negate | NegW)), Wnext, W); + flopen #(`XLEN) xreg(clk, start | (busy & (~negate | NegQ)), XQnext, XQ); // save D, which comes from SrcAE forwarding mux and could change because register file read is stalled during divide // flopen #(`XLEN) dreg(clk, start, D, Dsaved); @@ -77,13 +82,13 @@ module intdiv_restoring ( // *** sign extension, handling W instructions assign div0 = (Din == 0); mux2 #(`XLEN) qmux(XQ, {`XLEN{1'b1}}, div0, Q); // Q taken from XQ register, or all 1s when dividing by zero - mux2 #(`XLEN) remmux(W, X, div0, REM); // REM taken from W register, or from X when dividing by zero + mux2 #(`XLEN) remmux(W, Xsaved, div0, REM); // REM taken from W register, or from X when dividing by zero // busy logic always_ff @(posedge clk) if (reset) begin busy = 0; done = 0; step = 0; negate = 0; - end else if (start) begin + end else if (start & ~StallM) begin if (div0) done = 1; else begin busy = 1; step = 1; diff --git a/wally-pipelined/src/muldiv/muldiv.sv b/wally-pipelined/src/muldiv/muldiv.sv index 75ac11f3d..ca9b47b43 100644 --- a/wally-pipelined/src/muldiv/muldiv.sv +++ b/wally-pipelined/src/muldiv/muldiv.sv @@ -80,7 +80,7 @@ module muldiv ( assign signedDivide = ~Funct3E[0]; // simplified from (Funct3E[2]&~Funct3E[1]&~Funct3E[0]) | (Funct3E[2]&Funct3E[1]&~Funct3E[0]); //intdiv #(`XLEN) div (QuotE, RemE, DivDoneE, DivBusyE, div0error, N, D, gclk, reset, startDivideE, signedDivide); - intdiv_restoring div(.clk, .reset, .signedDivide, .start(startDivideE), .X(X), .D(D), .busy(busy), .done(DivDoneE), .Q(QuotE), .REM(RemE)); + intdiv_restoring div(.clk, .reset, .StallM, .signedDivide, .start(startDivideE), .X(X), .D(D), .busy(busy), .done(DivDoneE), .Q(QuotE), .REM(RemE)); // Start a divide when a new division instruction is received and the divider isn't already busy or finishing assign startDivideE = MulDivE & Funct3E[2] & ~busy & ~DivDoneE; From 5022647041d493cbd36c3cc72e0ce98e34ad15b6 Mon Sep 17 00:00:00 2001 From: bbracker Date: Thu, 30 Sep 2021 20:45:26 -0400 Subject: [PATCH 13/36] Revert "first attempt at verilog side of checkpoint functionality" This reverts commit f6ef8e56566f85b06b58f41a9d10db06c55d328c. --- wally-pipelined/src/generic/flop.sv | 22 +- wally-pipelined/src/ieu/regfile.sv | 8 +- wally-pipelined/src/privileged/csrc.sv | 249 +++++++++---------- wally-pipelined/src/privileged/csri.sv | 14 +- wally-pipelined/src/privileged/csrm.sv | 60 ++--- wally-pipelined/src/privileged/csrs.sv | 40 +-- wally-pipelined/src/privileged/csrsr.sv | 49 ++-- wally-pipelined/testbench/testbench-linux.sv | 9 +- 8 files changed, 174 insertions(+), 277 deletions(-) diff --git a/wally-pipelined/src/generic/flop.sv b/wally-pipelined/src/generic/flop.sv index 82c64c567..cb583de2e 100644 --- a/wally-pipelined/src/generic/flop.sv +++ b/wally-pipelined/src/generic/flop.sv @@ -25,8 +25,6 @@ `include "wally-config.vh" /* verilator lint_off DECLFILENAME */ -// Note that non-zero RESET_VAL's are only ever intended for simulation purposes (to start mid-execution from a checkpoint) - // ordinary flip-flop module flop #(parameter WIDTH = 8) ( @@ -42,11 +40,10 @@ endmodule module flopr #(parameter WIDTH = 8) ( input logic clk, reset, input logic [WIDTH-1:0] d, - output logic [WIDTH-1:0] q, - input var [WIDTH-1:0] RESET_VAL=0); + output logic [WIDTH-1:0] q); always_ff @(posedge clk, posedge reset) - if (reset) q <= #1 RESET_VAL; + if (reset) q <= #1 0; else q <= #1 d; endmodule @@ -64,11 +61,10 @@ endmodule module flopenrc #(parameter WIDTH = 8) ( input logic clk, reset, clear, en, input logic [WIDTH-1:0] d, - output logic [WIDTH-1:0] q, - input var [WIDTH-1:0] RESET_VAL=0); + output logic [WIDTH-1:0] q); always_ff @(posedge clk, posedge reset) - if (reset) q <= #1 RESET_VAL; + if (reset) q <= #1 0; else if (en) if (clear) q <= #1 0; else q <= #1 d; @@ -78,11 +74,10 @@ endmodule module flopenr #(parameter WIDTH = 8) ( input logic clk, reset, en, input logic [WIDTH-1:0] d, - output logic [WIDTH-1:0] q, - input var [WIDTH-1:0] RESET_VAL=0); + output logic [WIDTH-1:0] q); always_ff @(posedge clk, posedge reset) - if (reset) q <= #1 RESET_VAL; + if (reset) q <= #1 0; else if (en) q <= #1 d; endmodule @@ -104,11 +99,10 @@ module floprc #(parameter WIDTH = 8) ( input logic reset, input logic clear, input logic [WIDTH-1:0] d, - output logic [WIDTH-1:0] q, - input var RESET_VAL=0); + output logic [WIDTH-1:0] q); always_ff @(posedge clk, posedge reset) - if (reset) q <= #1 RESET_VAL; + if (reset) q <= #1 0; else if (clear) q <= #1 0; else q <= #1 d; diff --git a/wally-pipelined/src/ieu/regfile.sv b/wally-pipelined/src/ieu/regfile.sv index 8139e0b35..73b62a579 100644 --- a/wally-pipelined/src/ieu/regfile.sv +++ b/wally-pipelined/src/ieu/regfile.sv @@ -44,13 +44,7 @@ module regfile ( // reset is intended for simulation only, not synthesis always_ff @(negedge clk or posedge reset) - if (reset) - `ifdef CHECKPOINT - $readmemh({`LINUX_CHECKPOINT,"checkpoint-regfile.txt"}, rf); - `else - for(i=1; i<32; i++) rf[i] <= 0; - `endif - + if (reset) for(i=1; i<32; i++) rf[i] <= 0; else if (we3) rf[a3] <= wd3; assign #2 rd1 = (a1 != 0) ? rf[a1] : 0; diff --git a/wally-pipelined/src/privileged/csrc.sv b/wally-pipelined/src/privileged/csrc.sv index da8aca05b..3b1e544d7 100644 --- a/wally-pipelined/src/privileged/csrc.sv +++ b/wally-pipelined/src/privileged/csrc.sv @@ -70,24 +70,24 @@ module csrc #(parameter // ... more counters //HPMCOUNTER31H = 12'hC9F ) ( - input logic clk, reset, - input logic StallD, StallE, StallM, StallW, + input logic clk, reset, + input logic StallD, StallE, StallM, StallW, input logic FlushD, FlushE, FlushM, FlushW, - input logic InstrValidM, LoadStallD, CSRMWriteM, - input logic BPPredDirWrongM, - input logic BTBPredPCWrongM, - input logic RASPredPCWrongM, - input logic BPPredClassNonCFIWrongM, - input logic [4:0] InstrClassM, - input logic DCacheMiss, - input logic DCacheAccess, - input logic [11:0] CSRAdrM, - input logic [1:0] PrivilegeModeW, + input logic InstrValidM, LoadStallD, CSRMWriteM, + input logic BPPredDirWrongM, + input logic BTBPredPCWrongM, + input logic RASPredPCWrongM, + input logic BPPredClassNonCFIWrongM, + input logic [4:0] InstrClassM, + input logic DCacheMiss, + input logic DCacheAccess, + input logic [11:0] CSRAdrM, + input logic [1:0] PrivilegeModeW, input logic [`XLEN-1:0] CSRWriteValM, - input logic [31:0] MCOUNTINHIBIT_REGW, MCOUNTEREN_REGW, SCOUNTEREN_REGW, - input logic [63:0] MTIME_CLINT, MTIMECMP_CLINT, + input logic [31:0] MCOUNTINHIBIT_REGW, MCOUNTEREN_REGW, SCOUNTEREN_REGW, + input logic [63:0] MTIME_CLINT, MTIMECMP_CLINT, output logic [`XLEN-1:0] CSRCReadValM, - output logic IllegalCSRCAccessM + output logic IllegalCSRCAccessM ); generate @@ -97,22 +97,14 @@ module csrc #(parameter logic [63:0] HPMCOUNTER3_REGW, HPMCOUNTER4_REGW; // add more performance counters here if desired logic [63:0] CYCLEPlusM, INSTRETPlusM; logic [63:0] HPMCOUNTER3PlusM, HPMCOUNTER4PlusM; - // logic [`XLEN-1:0] NextTIMEM; + // logic [`XLEN-1:0] NextTIMEM; logic [`XLEN-1:0] NextCYCLEM, NextINSTRETM; logic [`XLEN-1:0] NextHPMCOUNTER3M, NextHPMCOUNTER4M; logic WriteCYCLEM, WriteINSTRETM; logic WriteHPMCOUNTER3M, WriteHPMCOUNTER4M; logic [4:0] CounterNumM; logic [`COUNTERS-1:3][`XLEN-1:0] HPMCOUNTER_REGW, HPMCOUNTERH_REGW; - var [`COUNTERS-1:3][`XLEN-1:0] initHPMCOUNTER; - logic InstrValidNotFlushedM; - - initial - `ifdef CHECKPOINT - $readmemh({`LINUX_CHECKPOINT,"checkpoint-HPMCOUNTER.txt"}, initHPMCOUNTER); - `else - initHPMCOUNTER = {(`COUNTERS-3){`XLEN'b0}}; - `endif + logic InstrValidNotFlushedM; assign InstrValidNotFlushedM = InstrValidM & ~StallW & ~FlushW; @@ -138,116 +130,121 @@ module csrc #(parameter //assign NextHPMCOUNTER3M = WriteHPMCOUNTER3M ? CSRWriteValM : HPMCOUNTER3PlusM[`XLEN-1:0]; //assign NextHPMCOUNTER4M = WriteHPMCOUNTER4M ? CSRWriteValM : HPMCOUNTER4PlusM[`XLEN-1:0]; - // parameterized number of additional counters - if (`COUNTERS > 3) begin + // parameterized number of additional counters + if (`COUNTERS > 3) begin logic [`COUNTERS-1:3] WriteHPMCOUNTERM; logic [`COUNTERS-1:0] CounterEvent; logic [63:0] /*HPMCOUNTER_REGW[`COUNTERS-1:3], */ HPMCOUNTERPlusM[`COUNTERS-1:3]; logic [`XLEN-1:0] NextHPMCOUNTERM[`COUNTERS-1:3]; genvar i; + // could replace special counters 0-2 with this loop for all counters assign CounterEvent[0] = 1'b1; assign CounterEvent[1] = 1'b0; - if(`QEMU) assign CounterEvent[`COUNTERS-1:2] = 0; - else begin - logic LoadStallE, LoadStallM; - flopenrc #(1) LoadStallEReg(.clk, .reset, .clear(FlushE), .en(~StallE), .d(LoadStallD), .q(LoadStallE)); - flopenrc #(1) LoadStallMReg(.clk, .reset, .clear(FlushM), .en(~StallM), .d(LoadStallE), .q(LoadStallM)); - - assign CounterEvent[2] = InstrValidNotFlushedM; - assign CounterEvent[3] = LoadStallM & InstrValidNotFlushedM; - assign CounterEvent[4] = BPPredDirWrongM & InstrValidNotFlushedM; - assign CounterEvent[5] = InstrClassM[0] & InstrValidNotFlushedM; - assign CounterEvent[6] = BTBPredPCWrongM & InstrValidNotFlushedM; - assign CounterEvent[7] = (InstrClassM[4] | InstrClassM[2] | InstrClassM[1]) & InstrValidNotFlushedM; - assign CounterEvent[8] = RASPredPCWrongM & InstrValidNotFlushedM; - assign CounterEvent[9] = InstrClassM[3] & InstrValidNotFlushedM; - assign CounterEvent[10] = BPPredClassNonCFIWrongM & InstrValidNotFlushedM; - assign CounterEvent[11] = DCacheAccess & InstrValidNotFlushedM; - assign CounterEvent[12] = DCacheMiss & InstrValidNotFlushedM; - assign CounterEvent[`COUNTERS-1:13] = 0; // eventually give these sources, including FP instructions, I$/D$ misses, branches and mispredictions - end - - for (i = 3; i < `COUNTERS; i = i+1) begin - assign WriteHPMCOUNTERM[i] = CSRMWriteM && (CSRAdrM == MHPMCOUNTERBASE + i); - assign NextHPMCOUNTERM[i][`XLEN-1:0] = WriteHPMCOUNTERM[i] ? CSRWriteValM : HPMCOUNTERPlusM[i][`XLEN-1:0]; - always @(posedge clk, posedge reset) // ModelSim doesn't like syntax of passing array element to flop - if (reset) HPMCOUNTER_REGW[i][`XLEN-1:0] <= #1 initHPMCOUNTER[i]; - else if (~StallW) HPMCOUNTER_REGW[i][`XLEN-1:0] <= #1 NextHPMCOUNTERM[i]; - //flopr #(`XLEN) HPMCOUNTERreg[i](clk, reset, NextHPMCOUNTERM[i], HPMCOUNTER_REGW[i]); + if(`QEMU) begin + assign CounterEvent[`COUNTERS-1:2] = 0; + end else begin - if (`XLEN==32) begin - logic [`COUNTERS-1:3] WriteHPMCOUNTERHM; - logic [`XLEN-1:0] NextHPMCOUNTERHM[`COUNTERS-1:3]; - assign HPMCOUNTERPlusM[i] = {HPMCOUNTERH_REGW[i], HPMCOUNTER_REGW[i]} + {63'b0, CounterEvent[i] & ~MCOUNTINHIBIT_REGW[i]}; - assign WriteHPMCOUNTERHM[i] = CSRMWriteM && (CSRAdrM == MHPMCOUNTERHBASE + i); - assign NextHPMCOUNTERHM[i] = WriteHPMCOUNTERHM[i] ? CSRWriteValM : HPMCOUNTERPlusM[i][63:32]; - always @(posedge clk, posedge reset) // ModelSim doesn't like syntax of passing array element to flop - if (reset) HPMCOUNTERH_REGW[i][`XLEN-1:0] <= #1 0; - else if (~StallW) HPMCOUNTERH_REGW[i][`XLEN-1:0] <= #1 NextHPMCOUNTERHM[i]; - //flopr #(`XLEN) HPMCOUNTERHreg[i](clk, reset, NextHPMCOUNTERHM[i], HPMCOUNTER_REGW[i][63:32]); - end else begin - assign HPMCOUNTERPlusM[i] = HPMCOUNTER_REGW[i] + {63'b0, CounterEvent[i] & ~MCOUNTINHIBIT_REGW[i]}; - end - end + logic LoadStallE, LoadStallM; + + flopenrc #(1) LoadStallEReg(.clk, .reset, .clear(FlushE), .en(~StallE), .d(LoadStallD), .q(LoadStallE)); + flopenrc #(1) LoadStallMReg(.clk, .reset, .clear(FlushM), .en(~StallM), .d(LoadStallE), .q(LoadStallM)); + + assign CounterEvent[2] = InstrValidNotFlushedM; + assign CounterEvent[3] = LoadStallM & InstrValidNotFlushedM; + assign CounterEvent[4] = BPPredDirWrongM & InstrValidNotFlushedM; + assign CounterEvent[5] = InstrClassM[0] & InstrValidNotFlushedM; + assign CounterEvent[6] = BTBPredPCWrongM & InstrValidNotFlushedM; + assign CounterEvent[7] = (InstrClassM[4] | InstrClassM[2] | InstrClassM[1]) & InstrValidNotFlushedM; + assign CounterEvent[8] = RASPredPCWrongM & InstrValidNotFlushedM; + assign CounterEvent[9] = InstrClassM[3] & InstrValidNotFlushedM; + assign CounterEvent[10] = BPPredClassNonCFIWrongM & InstrValidNotFlushedM; + assign CounterEvent[11] = DCacheAccess & InstrValidNotFlushedM; + assign CounterEvent[12] = DCacheMiss & InstrValidNotFlushedM; + assign CounterEvent[`COUNTERS-1:13] = 0; // eventually give these sources, including FP instructions, I$/D$ misses, branches and mispredictions end + + for (i = 3; i < `COUNTERS; i = i+1) begin + assign WriteHPMCOUNTERM[i] = CSRMWriteM && (CSRAdrM == MHPMCOUNTERBASE + i); + assign NextHPMCOUNTERM[i][`XLEN-1:0] = WriteHPMCOUNTERM[i] ? CSRWriteValM : HPMCOUNTERPlusM[i][`XLEN-1:0]; + always @(posedge clk, posedge reset) // ModelSim doesn't like syntax of passing array element to flop + if (reset) HPMCOUNTER_REGW[i][`XLEN-1:0] <= #1 0; + else if (~StallW) HPMCOUNTER_REGW[i][`XLEN-1:0] <= #1 NextHPMCOUNTERM[i]; + //flopr #(`XLEN) HPMCOUNTERreg[i](clk, reset, NextHPMCOUNTERM[i], HPMCOUNTER_REGW[i]); + + if (`XLEN==32) begin + logic [`COUNTERS-1:3] WriteHPMCOUNTERHM; + logic [`XLEN-1:0] NextHPMCOUNTERHM[`COUNTERS-1:3]; + assign HPMCOUNTERPlusM[i] = {HPMCOUNTERH_REGW[i], HPMCOUNTER_REGW[i]} + {63'b0, CounterEvent[i] & ~MCOUNTINHIBIT_REGW[i]}; + assign WriteHPMCOUNTERHM[i] = CSRMWriteM && (CSRAdrM == MHPMCOUNTERHBASE + i); + assign NextHPMCOUNTERHM[i] = WriteHPMCOUNTERHM[i] ? CSRWriteValM : HPMCOUNTERPlusM[i][63:32]; + always @(posedge clk, posedge reset) // ModelSim doesn't like syntax of passing array element to flop + if (reset) HPMCOUNTERH_REGW[i][`XLEN-1:0] <= #1 0; + else if (~StallW) HPMCOUNTERH_REGW[i][`XLEN-1:0] <= #1 NextHPMCOUNTERHM[i]; + //flopr #(`XLEN) HPMCOUNTERHreg[i](clk, reset, NextHPMCOUNTERHM[i], HPMCOUNTER_REGW[i][63:32]); + end else begin + assign HPMCOUNTERPlusM[i] = HPMCOUNTER_REGW[i] + {63'b0, CounterEvent[i] & ~MCOUNTINHIBIT_REGW[i]}; + end + end + end // Write / update counters // Only the Machine mode versions of the counter CSRs are writable - if (`XLEN==64) begin// 64-bit counters - // flopr #(64) TIMEreg(clk, reset, WriteTIMEM ? CSRWriteValM : TIME_REGW + 1, TIME_REGW); // may count off a different clock*** - // flopenr #(64) TIMECMPreg(clk, reset, WriteTIMECMPM, CSRWriteValM, TIMECMP_REGW); - flopr #(64) CYCLEreg(clk, reset, NextCYCLEM, CYCLE_REGW); - flopr #(64) INSTRETreg(clk, reset, NextINSTRETM, INSTRET_REGW); - //flopr #(64) HPMCOUNTER3reg(clk, reset, NextHPMCOUNTER3M, HPMCOUNTER3_REGW); - //flopr #(64) HPMCOUNTER4reg(clk, reset, NextHPMCOUNTER4M, HPMCOUNTER4_REGW); - end else begin // 32-bit low and high counters - logic WriteTIMEHM, WriteTIMECMPHM, WriteCYCLEHM, WriteINSTRETHM; - //logic WriteHPMCOUNTER3HM, WriteHPMCOUNTER4HM; - logic [`XLEN-1:0] NextCYCLEHM, NextTIMEHM, NextINSTRETHM; - //logic [`XLEN-1:0] NextHPMCOUNTER3HM, NextHPMCOUNTER4HM; + if (`XLEN==64) begin// 64-bit counters + // flopr #(64) TIMEreg(clk, reset, WriteTIMEM ? CSRWriteValM : TIME_REGW + 1, TIME_REGW); // may count off a different clock*** + // flopenr #(64) TIMECMPreg(clk, reset, WriteTIMECMPM, CSRWriteValM, TIMECMP_REGW); + flopr #(64) CYCLEreg(clk, reset, NextCYCLEM, CYCLE_REGW); + flopr #(64) INSTRETreg(clk, reset, NextINSTRETM, INSTRET_REGW); + //flopr #(64) HPMCOUNTER3reg(clk, reset, NextHPMCOUNTER3M, HPMCOUNTER3_REGW); + //flopr #(64) HPMCOUNTER4reg(clk, reset, NextHPMCOUNTER4M, HPMCOUNTER4_REGW); + end else begin // 32-bit low and high counters + logic WriteTIMEHM, WriteTIMECMPHM, WriteCYCLEHM, WriteINSTRETHM; + //logic WriteHPMCOUNTER3HM, WriteHPMCOUNTER4HM; + logic [`XLEN-1:0] NextCYCLEHM, NextTIMEHM, NextINSTRETHM; + //logic [`XLEN-1:0] NextHPMCOUNTER3HM, NextHPMCOUNTER4HM; - // Write Enables - // assign WriteTIMEHM = CSRMWriteM && (CSRAdrM == MTIMEH); - // assign WriteTIMECMPHM = CSRMWriteM && (CSRAdrM == MTIMECMPH); - assign WriteCYCLEHM = CSRMWriteM && (CSRAdrM == MCYCLEH); - assign WriteINSTRETHM = CSRMWriteM && (CSRAdrM == MINSTRETH); - //assign WriteHPMCOUNTER3HM = CSRMWriteM && (CSRAdrM == MHPMCOUNTER3H); - //assign WriteHPMCOUNTER4HM = CSRMWriteM && (CSRAdrM == MHPMCOUNTER4H); - assign NextCYCLEHM = WriteCYCLEM ? CSRWriteValM : CYCLEPlusM[63:32]; - // assign NextTIMEHM = WriteTIMEHM ? CSRWriteValM : TIMEPlusM[63:32]; - assign NextINSTRETHM = WriteINSTRETHM ? CSRWriteValM : INSTRETPlusM[63:32]; - //assign NextHPMCOUNTER3HM = WriteHPMCOUNTER3HM ? CSRWriteValM : HPMCOUNTER3PlusM[63:32]; - //assign NextHPMCOUNTER4HM = WriteHPMCOUNTER4HM ? CSRWriteValM : HPMCOUNTER4PlusM[63:32]; + // Write Enables + // assign WriteTIMEHM = CSRMWriteM && (CSRAdrM == MTIMEH); + // assign WriteTIMECMPHM = CSRMWriteM && (CSRAdrM == MTIMECMPH); + assign WriteCYCLEHM = CSRMWriteM && (CSRAdrM == MCYCLEH); + assign WriteINSTRETHM = CSRMWriteM && (CSRAdrM == MINSTRETH); + //assign WriteHPMCOUNTER3HM = CSRMWriteM && (CSRAdrM == MHPMCOUNTER3H); + //assign WriteHPMCOUNTER4HM = CSRMWriteM && (CSRAdrM == MHPMCOUNTER4H); + assign NextCYCLEHM = WriteCYCLEM ? CSRWriteValM : CYCLEPlusM[63:32]; + // assign NextTIMEHM = WriteTIMEHM ? CSRWriteValM : TIMEPlusM[63:32]; + assign NextINSTRETHM = WriteINSTRETHM ? CSRWriteValM : INSTRETPlusM[63:32]; + //assign NextHPMCOUNTER3HM = WriteHPMCOUNTER3HM ? CSRWriteValM : HPMCOUNTER3PlusM[63:32]; + //assign NextHPMCOUNTER4HM = WriteHPMCOUNTER4HM ? CSRWriteValM : HPMCOUNTER4PlusM[63:32]; - // Counter CSRs - // flopr #(32) TIMEreg(clk, reset, NextTIMEM, TIME_REGW); // may count off a different clock*** - // flopenr #(32) TIMECMPreg(clk, reset, WriteTIMECMPM, CSRWriteValM, TIMECMP_REGW[31:0]); - flopr #(32) CYCLEreg(clk, reset, NextCYCLEM, CYCLE_REGW[31:0]); - flopr #(32) INSTRETreg(clk, reset, NextINSTRETM, INSTRET_REGW[31:0]); - // flopr #(32) HPMCOUNTER3reg(clk, reset, NextHPMCOUNTER3M, HPMCOUNTER3_REGW[31:0]); - // flopr #(32) HPMCOUNTER4reg(clk, reset, NextHPMCOUNTER4M, HPMCOUNTER4_REGW[31:0]); - // flopr #(32) TIMEHreg(clk, reset, NextTIMEHM, TIME_REGW); // may count off a different clock*** - // flopenr #(32) TIMECMPHreg(clk, reset, WriteTIMECMPHM, CSRWriteValM, TIMECMP_REGW[63:32]); - flopr #(32) CYCLEHreg(clk, reset, NextCYCLEHM, CYCLE_REGW[63:32]); - flopr #(32) INSTRETHreg(clk, reset, NextINSTRETHM, INSTRET_REGW[63:32]); - //flopr #(32) HPMCOUNTER3Hreg(clk, reset, NextHPMCOUNTER3HM, HPMCOUNTER3_REGW[63:32]); - //flopr #(32) HPMCOUNTER4Hreg(clk, reset, NextHPMCOUNTER4HM, HPMCOUNTER4_REGW[63:32]); - end + // Counter CSRs + // flopr #(32) TIMEreg(clk, reset, NextTIMEM, TIME_REGW); // may count off a different clock*** + // flopenr #(32) TIMECMPreg(clk, reset, WriteTIMECMPM, CSRWriteValM, TIMECMP_REGW[31:0]); + flopr #(32) CYCLEreg(clk, reset, NextCYCLEM, CYCLE_REGW[31:0]); + flopr #(32) INSTRETreg(clk, reset, NextINSTRETM, INSTRET_REGW[31:0]); + //flopr #(32) HPMCOUNTER3reg(clk, reset, NextHPMCOUNTER3M, HPMCOUNTER3_REGW[31:0]); + //flopr #(32) HPMCOUNTER4reg(clk, reset, NextHPMCOUNTER4M, HPMCOUNTER4_REGW[31:0]); + // flopr #(32) TIMEHreg(clk, reset, NextTIMEHM, TIME_REGW); // may count off a different clock*** + // flopenr #(32) TIMECMPHreg(clk, reset, WriteTIMECMPHM, CSRWriteValM, TIMECMP_REGW[63:32]); + flopr #(32) CYCLEHreg(clk, reset, NextCYCLEHM, CYCLE_REGW[63:32]); + flopr #(32) INSTRETHreg(clk, reset, NextINSTRETHM, INSTRET_REGW[63:32]); + //flopr #(32) HPMCOUNTER3Hreg(clk, reset, NextHPMCOUNTER3HM, HPMCOUNTER3_REGW[63:32]); + //flopr #(32) HPMCOUNTER4Hreg(clk, reset, NextHPMCOUNTER4HM, HPMCOUNTER4_REGW[63:32]); + end - // eventually move TIME and TIMECMP to the CLINT -- Ben 06/17/21: sure let's give that a shot! - // run TIME off asynchronous reference clock - // synchronize write enable to TIME - // four phase handshake to synchronize reads from TIME + // eventually move TIME and TIMECMP to the CLINT -- Ben 06/17/21: sure let's give that a shot! + // run TIME off asynchronous reference clock + // synchronize write enable to TIME + // four phase handshake to synchronize reads from TIME - // interrupt on timer compare - // ability to disable optional CSRs + // interrupt on timer compare + // ability to disable optional CSRs // Read Counters, or cause excepiton if insufficient privilege in light of COUNTEREN flags assign CounterNumM = CSRAdrM[4:0]; // which counter to read? if (`XLEN==64) // 64-bit counter reads always_comb - if (PrivilegeModeW == `M_MODE || MCOUNTEREN_REGW[CounterNumM] && (PrivilegeModeW == `S_MODE || SCOUNTEREN_REGW[CounterNumM])) begin + if (PrivilegeModeW == `M_MODE || + MCOUNTEREN_REGW[CounterNumM] && (PrivilegeModeW == `S_MODE || SCOUNTEREN_REGW[CounterNumM])) begin IllegalCSRCAccessM = 0; if (CSRAdrM >= MHPMCOUNTERBASE+3 && CSRAdrM < MHPMCOUNTERBASE+`COUNTERS) CSRCReadValM = HPMCOUNTER_REGW[CSRAdrM-MHPMCOUNTERBASE]; else if (CSRAdrM >= HPMCOUNTERBASE+3 && CSRAdrM < HPMCOUNTERBASE+`COUNTERS) CSRCReadValM = HPMCOUNTER_REGW[CSRAdrM-HPMCOUNTERBASE]; @@ -312,7 +309,7 @@ module csrc #(parameter IllegalCSRCAccessM = 1; // no privileges for this csr CSRCReadValM = 0; end - end else begin // not `ZICOUNTERS_SUPPORTED + end else begin assign CSRCReadValM = 0; assign IllegalCSRCAccessM = 1; end @@ -359,20 +356,20 @@ module csrc #(parameter MPHMEVENTBASE = 12'h320, HPMCOUNTERBASE = 12'hC00, HPMCOUNTERHBASE = 12'hC80, - )(input logic clk, reset, - input logic StallD, StallE, StallM, StallW, - input logic InstrValidM, LoadStallD, CSRMWriteM, - input logic BPPredDirWrongM, - input logic BTBPredPCWrongM, - input logic RASPredPCWrongM, - input logic BPPredClassNonCFIWrongM, - input logic [4:0] InstrClassM, - input logic [11:0] CSRAdrM, - input logic [1:0] PrivilegeModeW, + )(input logic clk, reset, + input logic StallD, StallE, StallM, StallW, + input logic InstrValidM, LoadStallD, CSRMWriteM, + input logic BPPredDirWrongM, + input logic BTBPredPCWrongM, + input logic RASPredPCWrongM, + input logic BPPredClassNonCFIWrongM, + input logic [4:0] InstrClassM, + input logic [11:0] CSRAdrM, + input logic [1:0] PrivilegeModeW, input logic [`XLEN-1:0] CSRWriteValM, - input logic [31:0] MCOUNTINHIBIT_REGW, MCOUNTEREN_REGW, SCOUNTEREN_REGW, + input logic [31:0] MCOUNTINHIBIT_REGW, MCOUNTEREN_REGW, SCOUNTEREN_REGW, output logic [`XLEN-1:0] CSRCReadValM, - output logic IllegalCSRCAccessM); + output logic IllegalCSRCAccessM); // counters diff --git a/wally-pipelined/src/privileged/csri.sv b/wally-pipelined/src/privileged/csri.sv index 7ef9051f7..3b54d871a 100644 --- a/wally-pipelined/src/privileged/csri.sv +++ b/wally-pipelined/src/privileged/csri.sv @@ -79,24 +79,14 @@ module csri #(parameter assign SIP_WRITE_MASK = 12'h000; end always @(posedge clk, posedge reset) begin // *** I strongly feel that IntInM should go directly to IP_REGW -- Ben 9/7/21 - if (reset) - `ifdef CHECKPOINT - $readmemh({`LINUX_CHECKPOINT,"checkpoint-MIP.txt"}, IP_REGW_writeable); - `else - IP_REGW_writeable <= 10'b0; - `endif + if (reset) IP_REGW_writeable <= 10'b0; else if (WriteMIPM) IP_REGW_writeable <= (CSRWriteValM[9:0] & MIP_WRITE_MASK[9:0]) | IntInM[9:0]; // MTIP unclearable else if (WriteSIPM) IP_REGW_writeable <= (CSRWriteValM[9:0] & SIP_WRITE_MASK[9:0]) | IntInM[9:0]; // MTIP unclearable // else if (WriteUIPM) IP_REGW = (CSRWriteValM & 12'hBBB) | (NextIPM & 12'h080); // MTIP unclearable else IP_REGW_writeable <= IP_REGW_writeable | IntInM[9:0]; // *** check this turns off interrupts properly even when MIDELEG changes end always @(posedge clk, posedge reset) begin - if (reset) - `ifdef CHECKPOINT - $readmemh({`LINUX_CHECKPOINT,"checkpoint-MIE.txt"}, IE_REGW); - `else - IE_REGW <= 12'b0; - `endif + if (reset) IE_REGW <= 12'b0; else if (WriteMIEM) IE_REGW <= (CSRWriteValM[11:0] & 12'hAAA); // MIE controls M and S fields else if (WriteSIEM) IE_REGW <= (CSRWriteValM[11:0] & 12'h222) | (IE_REGW & 12'h888); // only S fields // else if (WriteUIEM) IE_REGW = (CSRWriteValM & 12'h111) | (IE_REGW & 12'hAAA); // only U field diff --git a/wally-pipelined/src/privileged/csrm.sv b/wally-pipelined/src/privileged/csrm.sv index f3f5d631b..a3baaaec4 100644 --- a/wally-pipelined/src/privileged/csrm.sv +++ b/wally-pipelined/src/privileged/csrm.sv @@ -85,45 +85,15 @@ module csrm #(parameter logic [`XLEN-1:0] MISA_REGW, MHARTID_REGW; logic [`XLEN-1:0] MSCRATCH_REGW, MCAUSE_REGW, MTVAL_REGW; - var [`XLEN-1:0] initMSCRATCH, initMCAUSE, initMEPC, initMTVEC, initMEDELEG, initMIDELEG; - var [31:0] initMCOUNTEREN, initMCOUNTINHIBIT; - var [`PMP_ENTRIES-1:0][7:0] initPMPCFG_ARRAY; - var [`PMP_ENTRIES-1:0][`XLEN-1:0] initPMPADDR_ARRAY; - - logic WriteMTVECM, WriteMEDELEGM, WriteMIDELEGM; - logic WriteMSCRATCHM, WriteMEPCM, WriteMCAUSEM, WriteMTVALM; - logic WriteMCOUNTERENM, WriteMCOUNTINHIBITM; + logic WriteMTVECM, WriteMEDELEGM, WriteMIDELEGM; + logic WriteMSCRATCHM, WriteMEPCM, WriteMCAUSEM, WriteMTVALM; + logic WriteMCOUNTERENM, WriteMCOUNTINHIBITM; logic [`PMP_ENTRIES-1:0] WritePMPCFGM; logic [`PMP_ENTRIES-1:0] WritePMPADDRM ; logic [`PMP_ENTRIES-1:0] ADDRLocked, CFGLocked; localparam MISA_26 = (`MISA) & 32'h03ffffff; - initial begin - `ifdef CHECKPOINT - $readmemh({`LINUX_CHECKPOINT,"checkpoint-MSCRATCH.txt"}, initMSCRATCH); - $readmemh({`LINUX_CHECKPOINT,"checkpoint-MCAUSE.txt"}, initMCAUSE); - $readmemh({`LINUX_CHECKPOINT,"checkpoint-MEPC.txt"}, initMEPC); - $readmemh({`LINUX_CHECKPOINT,"checkpoint-MTVEC.txt"}, initMTVEC); - $readmemh({`LINUX_CHECKPOINT,"checkpoint-MEDELEG.txt"}, initMEDELEG); - $readmemh({`LINUX_CHECKPOINT,"checkpoint-MIDELEG.txt"}, initMIDELEG); - $readmemh({`LINUX_CHECKPOINT,"checkpoint-MCOUNTEREN.txt"}, initMCOUNTEREN); - $readmemh({`LINUX_CHECKPOINT,"checkpoint-PMPCFG.txt"}, initPMPCFG_ARRAY); - $readmemh({`LINUX_CHECKPOINT,"checkpoint-PMPADDR.txt"}, initPMPADDR_ARRAY); - `else - initMSCRATCH = `XLEN'b0; - initMCAUSE = `XLEN'b0; - initMEPC = `XLEN'b0; - initMTVEC = `XLEN'b0; - initMEDELEG = `XLEN'b0; - initMIDELEG = `XLEN'b0; - initMCOUNTEREN = 32'b0; - initMCOUNTINHIBIT = 32'b0; - initPMPCFG_ARRAY = {`PMP_ENTRIES{8'b0}}; - initPMPADDR_ARRAY = {`PMP_ENTRIES{`XLEN'b0}}; - `endif - end - // MISA is hardwired. Spec says it could be written to disable features, but this is not supported by Wally assign MISA_REGW = {(`XLEN == 32 ? 2'b01 : 2'b10), {(`XLEN-28){1'b0}}, MISA_26[25:0]}; @@ -145,31 +115,33 @@ module csrm #(parameter assign IllegalCSRMWriteReadonlyM = CSRMWriteM && (CSRAdrM == MVENDORID || CSRAdrM == MARCHID || CSRAdrM == MIMPID || CSRAdrM == MHARTID); // CSRs - flopenl #(`XLEN) MTVECreg(clk, reset, WriteMTVECM, {CSRWriteValM[`XLEN-1:2], 1'b0, CSRWriteValM[0]}, initMTVEC, MTVEC_REGW); //busybear: changed reset value to 0 + flopenl #(`XLEN) MTVECreg(clk, reset, WriteMTVECM, {CSRWriteValM[`XLEN-1:2], 1'b0, CSRWriteValM[0]}, `XLEN'b0, MTVEC_REGW); //busybear: changed reset value to 0 generate if (`S_SUPPORTED | (`U_SUPPORTED & `N_SUPPORTED)) begin // DELEG registers should exist - flopenl #(`XLEN) MEDELEGreg(clk, reset, WriteMEDELEGM, CSRWriteValM & MEDELEG_MASK /*12'h7FF*/, initMEDELEG, MEDELEG_REGW); - flopenl #(`XLEN) MIDELEGreg(clk, reset, WriteMIDELEGM, CSRWriteValM & MIDELEG_MASK /*12'h222*/, initMIDELEG, MIDELEG_REGW); + flopenl #(`XLEN) MEDELEGreg(clk, reset, WriteMEDELEGM, CSRWriteValM & MEDELEG_MASK /*12'h7FF*/, `XLEN'b0, MEDELEG_REGW); + flopenl #(`XLEN) MIDELEGreg(clk, reset, WriteMIDELEGM, CSRWriteValM & MIDELEG_MASK /*12'h222*/, `XLEN'b0, MIDELEG_REGW); end else begin assign MEDELEG_REGW = 0; assign MIDELEG_REGW = 0; end endgenerate - flopenr #(`XLEN) MSCRATCHreg(clk, reset, WriteMSCRATCHM, CSRWriteValM, MSCRATCH_REGW, initMSCRATCH); - flopenr #(`XLEN) MEPCreg(clk, reset, WriteMEPCM, NextEPCM, MEPC_REGW, initMEPC); - flopenr #(`XLEN) MCAUSEreg(clk, reset, WriteMCAUSEM, NextCauseM, MCAUSE_REGW, initMCAUSE); +// flopenl #(`XLEN) MIPreg(clk, reset, WriteMIPM, CSRWriteValM, zero, MIP_REGW); +// flopenl #(`XLEN) MIEreg(clk, reset, WriteMIEM, CSRWriteValM, zero, MIE_REGW); + flopenr #(`XLEN) MSCRATCHreg(clk, reset, WriteMSCRATCHM, CSRWriteValM, MSCRATCH_REGW); + flopenr #(`XLEN) MEPCreg(clk, reset, WriteMEPCM, NextEPCM, MEPC_REGW); + flopenr #(`XLEN) MCAUSEreg(clk, reset, WriteMCAUSEM, NextCauseM, MCAUSE_REGW); if(`QEMU) assign MTVAL_REGW = `XLEN'b0; else flopenr #(`XLEN) MTVALreg(clk, reset, WriteMTVALM, NextMtvalM, MTVAL_REGW); generate if (`BUSYBEAR == 1) flopenl #(32) MCOUNTERENreg(clk, reset, WriteMCOUNTERENM, {CSRWriteValM[31:2],1'b0,CSRWriteValM[0]}, 32'b0, MCOUNTEREN_REGW); else if (`BUILDROOT == 1) - flopenl #(32) MCOUNTERENreg(clk, reset, WriteMCOUNTERENM, CSRWriteValM[31:0], initMCOUNTEREN, MCOUNTEREN_REGW); + flopenl #(32) MCOUNTERENreg(clk, reset, WriteMCOUNTERENM, CSRWriteValM[31:0], 32'h0, MCOUNTEREN_REGW); else flopenl #(32) MCOUNTERENreg(clk, reset, WriteMCOUNTERENM, CSRWriteValM[31:0], 32'hFFFFFFFF, MCOUNTEREN_REGW); endgenerate - flopenl #(32) MCOUNTINHIBITreg(clk, reset, WriteMCOUNTINHIBITM, CSRWriteValM[31:0], initMCOUNTINHIBIT, MCOUNTINHIBIT_REGW); + flopenl #(32) MCOUNTINHIBITreg(clk, reset, WriteMCOUNTINHIBITM, CSRWriteValM[31:0], 32'h0, MCOUNTINHIBIT_REGW); // There are PMP_ENTRIES = 0, 16, or 64 PMPADDR registers, each of which has its own flop @@ -186,14 +158,14 @@ module csrm #(parameter assign ADDRLocked[i] = PMPCFG_ARRAY_REGW[i][7] | (PMPCFG_ARRAY_REGW[i+1][7] & PMPCFG_ARRAY_REGW[i+1][4:3] == 2'b01); assign WritePMPADDRM[i] = (CSRMWriteM & (CSRAdrM == (PMPADDR0+i))) & ~StallW & ~ADDRLocked[i]; - flopenr #(`XLEN) PMPADDRreg(clk, reset, WritePMPADDRM[i], CSRWriteValM, PMPADDR_ARRAY_REGW[i], initPMPADDR_ARRAY[i]); + flopenr #(`XLEN) PMPADDRreg(clk, reset, WritePMPADDRM[i], CSRWriteValM, PMPADDR_ARRAY_REGW[i]); if (`XLEN==64) begin assign WritePMPCFGM[i] = (CSRMWriteM & (CSRAdrM == (PMPCFG0+2*(i/8)))) & ~StallW & ~CFGLocked[i]; - flopenr #(8) PMPCFGreg(clk, reset, WritePMPCFGM[i], CSRWriteValM[(i%8)*8+7:(i%8)*8], PMPCFG_ARRAY_REGW[i], initPMPCFG_ARRAY[i]); + flopenr #(8) PMPCFGreg(clk, reset, WritePMPCFGM[i], CSRWriteValM[(i%8)*8+7:(i%8)*8], PMPCFG_ARRAY_REGW[i]); end else begin assign WritePMPCFGM[i] = (CSRMWriteM & (CSRAdrM == (PMPCFG0+i/4))) & ~StallW & ~CFGLocked[i]; // assign WritePMPCFGHM[i] = (CSRMWriteM && (CSRAdrM == PMPCFG0+2*i+1)) && ~StallW; - flopenr #(8) PMPCFGreg(clk, reset, WritePMPCFGM[i], CSRWriteValM[(i%4)*8+7:(i%4)*8], PMPCFG_ARRAY_REGW[i], initPMPCFG_ARRAY[i]); + flopenr #(8) PMPCFGreg(clk, reset, WritePMPCFGM[i], CSRWriteValM[(i%4)*8+7:(i%4)*8], PMPCFG_ARRAY_REGW[i]); // flopenr #(`XLEN) PMPCFGHreg(clk, reset, WritePMPCFGHM[i], CSRWriteValM, PMPCFG_ARRAY_REGW[i][63:32]); end end diff --git a/wally-pipelined/src/privileged/csrs.sv b/wally-pipelined/src/privileged/csrs.sv index 2fffbced2..f3c9a4f94 100644 --- a/wally-pipelined/src/privileged/csrs.sv +++ b/wally-pipelined/src/privileged/csrs.sv @@ -74,30 +74,6 @@ module csrs #(parameter logic WriteSSCRATCHM, WriteSEPCM; logic WriteSCAUSEM, WriteSTVALM, WriteSATPM, WriteSCOUNTERENM; logic [`XLEN-1:0] SSCRATCH_REGW, SCAUSE_REGW, STVAL_REGW; - var [`XLEN-1:0] initSSCRATCH, initSCAUSE, initSEPC, initSTVEC, initSEDELEG, initSIDELEG, initSATP; - var [31:0] initSCOUNTEREN; - - initial begin - `ifdef CHECKPOINT - $readmemh({`LINUX_CHECKPOINT,"checkpoint-SSCRATCH.txt"}, initSSCRATCH); - $readmemh({`LINUX_CHECKPOINT,"checkpoint-SCAUSE.txt"}, initSCAUSE); - $readmemh({`LINUX_CHECKPOINT,"checkpoint-SEPC.txt"}, initSEPC); - $readmemh({`LINUX_CHECKPOINT,"checkpoint-STVEC.txt"}, initSTVEC); - $readmemh({`LINUX_CHECKPOINT,"checkpoint-SEDELEG.txt"}, initSEDELEG); - $readmemh({`LINUX_CHECKPOINT,"checkpoint-SIDELEG.txt"}, initSIDELEG); - $readmemh({`LINUX_CHECKPOINT,"checkpoint-SCOUNTEREN.txt"}, initSCOUNTEREN); - $readmemh({`LINUX_CHECKPOINT,"checkpoint-SATP.txt"}, initSATP); - `else - initSSCRATCH = `XLEN'b0; - initSCAUSE = `XLEN'b0; - initSEPC = `XLEN'b0; - initSTVEC = `XLEN'b0; - initSEDELEG = `XLEN'b0; - initSIDELEG = `XLEN'b0; - initSCOUNTEREN = 32'b0; - initSATP = `XLEN'b0; - `endif - end assign WriteSSTATUSM = CSRSWriteM && (CSRAdrM == SSTATUS) && ~StallW; assign WriteSTVECM = CSRSWriteM && (CSRAdrM == STVEC) && ~StallW; @@ -109,28 +85,28 @@ module csrs #(parameter assign WriteSCOUNTERENM = CSRSWriteM && (CSRAdrM == SCOUNTEREN) && ~StallW; // CSRs - flopenl #(`XLEN) STVECreg(clk, reset, WriteSTVECM, {CSRWriteValM[`XLEN-1:2], 1'b0, CSRWriteValM[0]}, initSTVEC, STVEC_REGW); //busybear: change reset to 0 - flopenr #(`XLEN) SSCRATCHreg(clk, reset, WriteSSCRATCHM, CSRWriteValM, SSCRATCH_REGW, initSSCRATCH); - flopenr #(`XLEN) SEPCreg(clk, reset, WriteSEPCM, NextEPCM, SEPC_REGW, initSEPC); - flopenl #(`XLEN) SCAUSEreg(clk, reset, WriteSCAUSEM, NextCauseM, initSCAUSE, SCAUSE_REGW); + flopenl #(`XLEN) STVECreg(clk, reset, WriteSTVECM, {CSRWriteValM[`XLEN-1:2], 1'b0, CSRWriteValM[0]}, `XLEN'b0, STVEC_REGW); //busybear: change reset to 0 + flopenr #(`XLEN) SSCRATCHreg(clk, reset, WriteSSCRATCHM, CSRWriteValM, SSCRATCH_REGW); + flopenr #(`XLEN) SEPCreg(clk, reset, WriteSEPCM, NextEPCM, SEPC_REGW); + flopenl #(`XLEN) SCAUSEreg(clk, reset, WriteSCAUSEM, NextCauseM, `XLEN'b0, SCAUSE_REGW); if(`QEMU) assign STVAL_REGW = `XLEN'b0; else flopenr #(`XLEN) STVALreg(clk, reset, WriteSTVALM, NextMtvalM, STVAL_REGW); if (`MEM_VIRTMEM) - flopenr #(`XLEN) SATPreg(clk, reset, WriteSATPM, CSRWriteValM, SATP_REGW, initSATP); + flopenr #(`XLEN) SATPreg(clk, reset, WriteSATPM, CSRWriteValM, SATP_REGW); else assign SATP_REGW = 0; // hardwire to zero if virtual memory not supported if (`BUSYBEAR == 1) flopenl #(32) SCOUNTERENreg(clk, reset, WriteSCOUNTERENM, {CSRWriteValM[31:2],1'b0,CSRWriteValM[0]}, 32'b0, SCOUNTEREN_REGW); else if (`BUILDROOT == 1) - flopenl #(32) SCOUNTERENreg(clk, reset, WriteSCOUNTERENM, CSRWriteValM[31:0], initSCOUNTEREN, SCOUNTEREN_REGW); + flopenl #(32) SCOUNTERENreg(clk, reset, WriteSCOUNTERENM, CSRWriteValM[31:0], 32'h0, SCOUNTEREN_REGW); else flopenl #(32) SCOUNTERENreg(clk, reset, WriteSCOUNTERENM, CSRWriteValM[31:0], 32'hFFFFFFFF, SCOUNTEREN_REGW); if (`N_SUPPORTED) begin logic WriteSEDELEGM, WriteSIDELEGM; assign WriteSEDELEGM = CSRSWriteM && (CSRAdrM == SEDELEG); assign WriteSIDELEGM = CSRSWriteM && (CSRAdrM == SIDELEG); - flopenl #(`XLEN) SEDELEGreg(clk, reset, WriteSEDELEGM, CSRWriteValM & SEDELEG_MASK /* 12'h1FF */, initSEDELEG, SEDELEG_REGW); - flopenl #(`XLEN) SIDELEGreg(clk, reset, WriteSIDELEGM, CSRWriteValM, initSIDELEG, SIDELEG_REGW); + flopenl #(`XLEN) SEDELEGreg(clk, reset, WriteSEDELEGM, CSRWriteValM & SEDELEG_MASK /* 12'h1FF */, `XLEN'b0, SEDELEG_REGW); + flopenl #(`XLEN) SIDELEGreg(clk, reset, WriteSIDELEGM, CSRWriteValM, `XLEN'b0, SIDELEG_REGW); end else begin assign SEDELEG_REGW = 0; assign SIDELEG_REGW = 0; diff --git a/wally-pipelined/src/privileged/csrsr.sv b/wally-pipelined/src/privileged/csrsr.sv index 113515b26..dfa2132d9 100644 --- a/wally-pipelined/src/privileged/csrsr.sv +++ b/wally-pipelined/src/privileged/csrsr.sv @@ -46,15 +46,6 @@ module csrsr ( logic [1:0] STATUS_SXL, STATUS_UXL, STATUS_XS, STATUS_FS, STATUS_FS_INT, STATUS_MPP_NEXT; logic STATUS_MPIE, STATUS_SPIE, STATUS_UPIE, STATUS_UIE; - var [`XLEN-1:0] initMSTATUS; - initial begin - `ifdef CHECKPOINT - $readmemh({`LINUX_CHECKPOINT,"checkpoint-MSTATUS.txt"}, initMSTATUS); - `else - initMSTATUS = `XLEN'b0; - `endif - end - // STATUS REGISTER FIELD // See Privileged Spec Section 3.1.6 // Lower privilege status registers are a subset of the full status register @@ -117,33 +108,23 @@ module csrsr ( // registers for STATUS bits // complex register with reset, write enable, and the ability to update other bits in certain cases - // these null things are needed to make the following LHS assignment legal; this is probably a crappy way of doing things always_ff @(posedge clk, posedge reset) if (reset) begin - //STATUS_TSR_INT <= #1 0; - //STATUS_TW_INT <= #1 0; - //STATUS_TVM_INT <= #1 0; - //STATUS_MXR_INT <= #1 0; - //STATUS_SUM_INT <= #1 0; - //STATUS_MPRV_INT <= #1 0; // Per Priv 3.3 - //STATUS_FS_INT <= #1 0; //2'b01; // busybear: change all these reset values to 0 - //STATUS_MPP <= #1 0; //`M_MODE; - //STATUS_SPP <= #1 0; //1'b1; - //STATUS_MPIE <= #1 0; //1; - //STATUS_SPIE <= #1 0; //`S_SUPPORTED; - //STATUS_UPIE <= #1 0; // `U_SUPPORTED; - //STATUS_MIE <= #1 0; // Per Priv 3.3 - //STATUS_SIE <= #1 0; //`S_SUPPORTED; - //STATUS_UIE <= #1 0; //`U_SUPPORTED; - // - // *** this assumes XLEN == 64. - // I don't like using generates to respond to XLEN. - // I'd rather have an XLEN64 so that we could use `ifdefs -- Ben 9/21 - {STATUS_TSR_INT,STATUS_TW_INT,STATUS_TVM_INT,STATUS_MXR_INT,STATUS_SUM_INT,STATUS_MPRV_INT} <= #1 initMSTATUS[22:17]; - {STATUS_FS_INT,STATUS_MPP} <= #1 initMSTATUS[14:11]; - {STATUS_SPP,STATUS_MPIE} <= #1 initMSTATUS[8:7]; - {STATUS_SPIE,STATUS_UPIE,STATUS_MIE} <= #1 initMSTATUS[5:3]; - {STATUS_SIE,STATUS_UIE} <= #1 initMSTATUS[1:0]; + STATUS_TSR_INT <= #1 0; + STATUS_TW_INT <= #1 0; + STATUS_TVM_INT <= #1 0; + STATUS_MXR_INT <= #1 0; + STATUS_SUM_INT <= #1 0; + STATUS_MPRV_INT <= #1 0; // Per Priv 3.3 + STATUS_FS_INT <= #1 0; //2'b01; // busybear: change all these reset values to 0 + STATUS_MPP <= #1 0; //`M_MODE; + STATUS_SPP <= #1 0; //1'b1; + STATUS_MPIE <= #1 0; //1; + STATUS_SPIE <= #1 0; //`S_SUPPORTED; + STATUS_UPIE <= #1 0; // `U_SUPPORTED; + STATUS_MIE <= #1 0; // Per Priv 3.3 + STATUS_SIE <= #1 0; //`S_SUPPORTED; + STATUS_UIE <= #1 0; //`U_SUPPORTED; end else if (~StallW) begin if (FRegWriteM | WriteFRMM | WriteFFLAGSM) STATUS_FS_INT <= #12'b11; // mark Float State dirty *** this should happen in M stage, be part of if/else; diff --git a/wally-pipelined/testbench/testbench-linux.sv b/wally-pipelined/testbench/testbench-linux.sv index 7dbed0e09..76a1841b8 100644 --- a/wally-pipelined/testbench/testbench-linux.sv +++ b/wally-pipelined/testbench/testbench-linux.sv @@ -27,9 +27,6 @@ `include "wally-config.vh" -//`define CHECKPOINT -`define LINUX_CHECKPOINT "../linux-testgen/linux-testvectors/checkpoint1K" - `define DEBUG_TRACE 0 // Debug Levels // 0: don't check against QEMU @@ -411,11 +408,7 @@ module testbench(); // initial loading of memories initial begin $readmemh({`LINUX_TEST_VECTORS,"bootmem.txt"}, dut.uncore.bootdtim.bootdtim.RAM, 'h1000 >> 3); - `ifdef CHECKPOINT - $readmemh({`LINUX_CHECKPOINT,"ram.txt"}, dut.uncore.dtim.RAM); - `else - $readmemh({`LINUX_TEST_VECTORS,"ram.txt"}, dut.uncore.dtim.RAM); - `endif + $readmemh({`LINUX_TEST_VECTORS,"ram.txt"}, dut.uncore.dtim.RAM); $readmemb(`TWO_BIT_PRELOAD, dut.hart.ifu.bpred.bpred.Predictor.DirPredictor.PHT.memory); $readmemb(`BTB_PRELOAD, dut.hart.ifu.bpred.bpred.TargetPredictor.memory.memory); ProgramAddrMapFile = {`LINUX_TEST_VECTORS,"vmlinux.objdump.addr"}; From 73d852b1efcb0f9bcd3dc18606f12c45bc46b4fb Mon Sep 17 00:00:00 2001 From: David Harris Date: Sat, 2 Oct 2021 09:19:25 -0400 Subject: [PATCH 14/36] Divide performs 2 steps per cycle --- ...intdiv_restoring.sv => intdivrestoring.sv} | 48 +++++++++++-------- wally-pipelined/src/muldiv/muldiv.sv | 13 +++-- wally-pipelined/testbench/testbench-arch.sv | 4 +- 3 files changed, 37 insertions(+), 28 deletions(-) rename wally-pipelined/src/muldiv/{intdiv_restoring.sv => intdivrestoring.sv} (82%) diff --git a/wally-pipelined/src/muldiv/intdiv_restoring.sv b/wally-pipelined/src/muldiv/intdivrestoring.sv similarity index 82% rename from wally-pipelined/src/muldiv/intdiv_restoring.sv rename to wally-pipelined/src/muldiv/intdivrestoring.sv index ea337c188..21e96c6e9 100644 --- a/wally-pipelined/src/muldiv/intdiv_restoring.sv +++ b/wally-pipelined/src/muldiv/intdivrestoring.sv @@ -1,10 +1,10 @@ /////////////////////////////////////////// -// intdiv_restoring.sv +// intdivrestoring.sv // // Written: David_Harris@hmc.edu 12 September 2021 // Modified: // -// Purpose: Restoring integer division using a shift register a subtractor +// Purpose: Restoring integer division using a shift register and subtractor // // A component of the Wally configurable RISC-V project. // @@ -25,7 +25,7 @@ `include "wally-config.vh" -module intdiv_restoring ( +module intdivrestoring ( input logic clk, input logic reset, input logic StallM, @@ -36,9 +36,9 @@ module intdiv_restoring ( output logic [`XLEN-1:0] Q, REM ); - logic [`XLEN-1:0] W, W2, Win, Wshift, Wprime, Wn, Wnn, Wnext, XQ, XQin, XQshift, XQn, XQnn, XQnext, Dsaved, Din, Dabs, D2, Xabs, X2, Xsaved, Xinit; + logic [`XLEN-1:0] W, W2, Win, Wshift, Wprime, Wn, Wnn, Wnext, XQ, XQin, XQshift, XQn, XQnn, XQnext, Dsaved, Din, Dabs, D2, Xabs, X2, Xsaved, Xinit, DAbsB, W1, XQ1; logic qi, qib; // curent quotient bit - localparam STEPBITS = $clog2(`XLEN); + localparam STEPBITS = $clog2(`XLEN)-1; logic [STEPBITS:0] step; logic div0; logic negate, init, startd, SignX, SignD, NegW, NegQ; @@ -53,33 +53,25 @@ module intdiv_restoring ( mux2 #(`XLEN) xabsmux(X, Xabs, signedDivide & ~div0, X2); // need original X as remainder if doing divide by 0 flopen #(`XLEN) xsavereg(clk, start, X2, Xsaved); mux2 #(`XLEN) xfirstmux(Xsaved, X, start, Xinit); - - // restoring division + mux2 #(`XLEN) wmux(W, {`XLEN{1'b0}}, init, Win); mux2 #(`XLEN) xmux(XQ, Xinit, init, XQin); - assign {Wshift, XQshift} = {Win[`XLEN-2:0], XQin, qi}; - assign {qib, Wprime} = {1'b0, Wshift} + ~{1'b0, Din} + 1; // subtractor, carry out determines quotient bit - assign qi = ~qib; - mux2 #(`XLEN) wrestoremux(Wshift, Wprime, qi, W2); + + assign DAbsB = ~Din; + + intdivrestoringstep step1(Win, XQin, DAbsB, W1, XQ1); + intdivrestoringstep step2(W1, XQ1, DAbsB, W2, XQshift); // conditionally negate outputs at end of signed operation + // *** move into M stage neg #(`XLEN) wneg(W, Wn); -// mux2 #(`XLEN) wnegmux(W, Wn, NegW, Wnn); -// mux2 #(`XLEN) wnextmux(W2, Wnn, negate, Wnext); - mux2 #(`XLEN) wnextmux(W2, Wn, NegW, Wnext); + mux2 #(`XLEN) wnextmux(W2, Wn, NegW, Wnext); //*** neg #(`XLEN) qneg(XQ, XQn); -// mux2 #(`XLEN) qnegmux(XQ, XQn, NegQ, XQnn); -// mux2 #(`XLEN) qnextmux(XQshift, XQnn, negate, XQnext); mux2 #(`XLEN) qnextmux(XQshift, XQn, NegQ, XQnext); flopen #(`XLEN) wreg(clk, start | (busy & (~negate | NegW)), Wnext, W); flopen #(`XLEN) xreg(clk, start | (busy & (~negate | NegQ)), XQnext, XQ); - // save D, which comes from SrcAE forwarding mux and could change because register file read is stalled during divide - // flopen #(`XLEN) dreg(clk, start, D, Dsaved); - //mux2 #(`XLEN) dmux(Dsaved, D, start, Din); - // outputs - // *** sign extension, handling W instructions assign div0 = (Din == 0); mux2 #(`XLEN) qmux(XQ, {`XLEN{1'b1}}, div0, Q); // Q taken from XQ register, or all 1s when dividing by zero mux2 #(`XLEN) remmux(W, Xsaved, div0, REM); // REM taken from W register, or from X when dividing by zero @@ -124,3 +116,17 @@ module intdiv_restoring ( endmodule // muldiv +module intdivrestoringstep( + input logic [`XLEN-1:0] W, XQ, DAbsB, + output logic [`XLEN-1:0] WOut, XQOut); + + logic [`XLEN-1:0] WShift, WPrime; + logic qi, qib; + + assign {WShift, XQOut} = {W[`XLEN-2:0], XQ, qi}; + assign {qib, WPrime} = {1'b0, WShift} + {1'b1, DAbsB} + 1; // subtractor, carry out determines quotient bit ***replace with add + assign qi = ~qib; + mux2 #(`XLEN) wrestoremux(WShift, WPrime, qi, WOut); +endmodule + +// *** clean up internal signals \ No newline at end of file diff --git a/wally-pipelined/src/muldiv/muldiv.sv b/wally-pipelined/src/muldiv/muldiv.sv index ca9b47b43..43cfba80d 100644 --- a/wally-pipelined/src/muldiv/muldiv.sv +++ b/wally-pipelined/src/muldiv/muldiv.sv @@ -47,7 +47,7 @@ module muldiv ( logic [`XLEN-1:0] MulDivResultE, MulDivResultM; logic [`XLEN-1:0] PrelimResultE; logic [`XLEN-1:0] QuotE, RemE; - logic [`XLEN*2-1:0] ProdE; + logic [`XLEN*2-1:0] ProdE, ProdM; logic enable_q; //logic [2:0] Funct3E_Q; @@ -55,19 +55,21 @@ module muldiv ( logic [`XLEN-1:0] X, D; //logic [`XLEN-1:0] Num0, Den0; - logic gclk; + // logic gclk; logic startDivideE, busy; logic signedDivide; // Multiplier mul mul(.*); + flopenrc #(`XLEN*2) ProdMReg(clk, reset, FlushM, ~StallM, ProdE, ProdM); + // Divide - // *** replace this clock gater + /*// *** replace this clock gater always @(negedge clk) begin enable_q <= ~StallM; end - assign gclk = enable_q & clk; + assign gclk = enable_q & clk; */ // Handle sign extension for W-type instructions if (`XLEN == 64) begin // RV64 has W-type instructions @@ -80,7 +82,8 @@ module muldiv ( assign signedDivide = ~Funct3E[0]; // simplified from (Funct3E[2]&~Funct3E[1]&~Funct3E[0]) | (Funct3E[2]&Funct3E[1]&~Funct3E[0]); //intdiv #(`XLEN) div (QuotE, RemE, DivDoneE, DivBusyE, div0error, N, D, gclk, reset, startDivideE, signedDivide); - intdiv_restoring div(.clk, .reset, .StallM, .signedDivide, .start(startDivideE), .X(X), .D(D), .busy(busy), .done(DivDoneE), .Q(QuotE), .REM(RemE)); +// intdivrestoring div(.clk, .reset, .StallM, .signedDivide, .start(startDivideE), .X(X), .D(D), .busy(busy), .done(DivDoneE), .Q(QuotE), .REM(RemE)); + intdivrestoring div(.clk, .reset, .StallM, .signedDivide, .start(startDivideE), .X(X), .D(D), .busy(busy), .done(DivDoneE), .Q(QuotE), .REM(RemE)); // Start a divide when a new division instruction is received and the divider isn't already busy or finishing assign startDivideE = MulDivE & Funct3E[2] & ~busy & ~DivDoneE; diff --git a/wally-pipelined/testbench/testbench-arch.sv b/wally-pipelined/testbench/testbench-arch.sv index 7f4233ff3..c1ef5a237 100644 --- a/wally-pipelined/testbench/testbench-arch.sv +++ b/wally-pipelined/testbench/testbench-arch.sv @@ -430,7 +430,7 @@ string tests32f[] = '{ // tests = {tests64p,tests64i, tests64periph}; if (`C_SUPPORTED) tests = {tests, tests64ic}; // else tests = {tests, tests64iNOc}; - if (`M_SUPPORTED) tests = {tests, tests64m}; + if (`M_SUPPORTED) tests = {tests64m, tests}; /* if (`F_SUPPORTED) tests = {tests64f, tests}; if (`D_SUPPORTED) tests = {tests64d, tests}; if (`MEM_VIRTMEM) tests = {tests64mmu, tests}; @@ -449,7 +449,7 @@ string tests32f[] = '{ tests = {tests32priv, tests32i}; //tests = {tests32i, tests32priv}; if (`C_SUPPORTED) tests = {tests, tests32ic}; - if (`M_SUPPORTED) tests = {tests, tests32m}; + if (`M_SUPPORTED) tests = {tests32m, tests}; //if (`C_SUPPORTED) tests = {tests32ic, tests}; //if (`M_SUPPORTED) tests = {tests32m, tests}; /* tests = {tests32i, tests32p};//,tests32periph}; *** broken at the moment From 735132191cf91fcf2e30034f6eb71f104c41d074 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sat, 2 Oct 2021 09:38:02 -0400 Subject: [PATCH 15/36] Moved muldiv result selection to M stage for performance --- wally-pipelined/src/muldiv/intdivrestoring.sv | 1 + wally-pipelined/src/muldiv/muldiv.sv | 55 +++++++++---------- 2 files changed, 26 insertions(+), 30 deletions(-) diff --git a/wally-pipelined/src/muldiv/intdivrestoring.sv b/wally-pipelined/src/muldiv/intdivrestoring.sv index 21e96c6e9..ed78718cf 100644 --- a/wally-pipelined/src/muldiv/intdivrestoring.sv +++ b/wally-pipelined/src/muldiv/intdivrestoring.sv @@ -59,6 +59,7 @@ module intdivrestoring ( assign DAbsB = ~Din; + // *** parameterize steps per cycle intdivrestoringstep step1(Win, XQin, DAbsB, W1, XQ1); intdivrestoringstep step2(W1, XQ1, DAbsB, W2, XQshift); diff --git a/wally-pipelined/src/muldiv/muldiv.sv b/wally-pipelined/src/muldiv/muldiv.sv index 43cfba80d..03df97e7c 100644 --- a/wally-pipelined/src/muldiv/muldiv.sv +++ b/wally-pipelined/src/muldiv/muldiv.sv @@ -31,7 +31,7 @@ module muldiv ( input logic [31:0] InstrD, // Execute Stage interface input logic [`XLEN-1:0] SrcAE, SrcBE, - input logic [2:0] Funct3E, + input logic [2:0] Funct3E, Funct3M, input logic MulDivE, W64E, // Writeback stage output logic [`XLEN-1:0] MulDivResultW, @@ -45,8 +45,8 @@ module muldiv ( generate if (`M_SUPPORTED) begin logic [`XLEN-1:0] MulDivResultE, MulDivResultM; - logic [`XLEN-1:0] PrelimResultE; - logic [`XLEN-1:0] QuotE, RemE; + logic [`XLEN-1:0] PrelimResultM; + logic [`XLEN-1:0] QuotM, RemM; logic [`XLEN*2-1:0] ProdE, ProdM; logic enable_q; @@ -57,7 +57,9 @@ module muldiv ( // logic gclk; logic startDivideE, busy; - logic signedDivide; + logic SignedDivideE; + logic W64M; + // Multiplier mul mul(.*); @@ -65,51 +67,44 @@ module muldiv ( // Divide - /*// *** replace this clock gater - always @(negedge clk) begin - enable_q <= ~StallM; - end - assign gclk = enable_q & clk; */ - // Handle sign extension for W-type instructions if (`XLEN == 64) begin // RV64 has W-type instructions - assign X = W64E ? {{32{SrcAE[31]&signedDivide}}, SrcAE[31:0]} : SrcAE; - assign D = W64E ? {{32{SrcBE[31]&signedDivide}}, SrcBE[31:0]} : SrcBE; + assign X = W64E ? {{32{SrcAE[31]&SignedDivideE}}, SrcAE[31:0]} : SrcAE; + assign D = W64E ? {{32{SrcBE[31]&SignedDivideE}}, SrcBE[31:0]} : SrcBE; end else begin // RV32 has no W-type instructions assign X = SrcAE; assign D = SrcBE; end - assign signedDivide = ~Funct3E[0]; // simplified from (Funct3E[2]&~Funct3E[1]&~Funct3E[0]) | (Funct3E[2]&Funct3E[1]&~Funct3E[0]); - //intdiv #(`XLEN) div (QuotE, RemE, DivDoneE, DivBusyE, div0error, N, D, gclk, reset, startDivideE, signedDivide); -// intdivrestoring div(.clk, .reset, .StallM, .signedDivide, .start(startDivideE), .X(X), .D(D), .busy(busy), .done(DivDoneE), .Q(QuotE), .REM(RemE)); - intdivrestoring div(.clk, .reset, .StallM, .signedDivide, .start(startDivideE), .X(X), .D(D), .busy(busy), .done(DivDoneE), .Q(QuotE), .REM(RemE)); + assign SignedDivideE = ~Funct3E[0]; // simplified from (Funct3E[2]&~Funct3E[1]&~Funct3E[0]) | (Funct3E[2]&Funct3E[1]&~Funct3E[0]); + //intdiv #(`XLEN) div (QuotE, RemE, DivDoneE, DivBusyE, div0error, N, D, gclk, reset, startDivideE, SignedDivideE); + intdivrestoring div(.clk, .reset, .StallM, .signedDivide(SignedDivideE), .start(startDivideE), .X(X), .D(D), .busy(busy), .done(DivDoneE), .Q(QuotM), .REM(RemM)); // Start a divide when a new division instruction is received and the divider isn't already busy or finishing - assign startDivideE = MulDivE & Funct3E[2] & ~busy & ~DivDoneE; + assign startDivideE = MulDivE & Funct3E[2] & ~busy & ~DivDoneE; // *** mabye DivDone should be M stage assign DivBusyE = startDivideE | busy; // Select result always_comb - case (Funct3E) - 3'b000: PrelimResultE = ProdE[`XLEN-1:0]; - 3'b001: PrelimResultE = ProdE[`XLEN*2-1:`XLEN]; - 3'b010: PrelimResultE = ProdE[`XLEN*2-1:`XLEN]; - 3'b011: PrelimResultE = ProdE[`XLEN*2-1:`XLEN]; - 3'b100: PrelimResultE = QuotE; - 3'b101: PrelimResultE = QuotE; - 3'b110: PrelimResultE = RemE; - 3'b111: PrelimResultE = RemE; - endcase // case (Funct3E) + case (Funct3M) + 3'b000: PrelimResultM = ProdM[`XLEN-1:0]; + 3'b001: PrelimResultM = ProdM[`XLEN*2-1:`XLEN]; + 3'b010: PrelimResultM = ProdM[`XLEN*2-1:`XLEN]; + 3'b011: PrelimResultM = ProdM[`XLEN*2-1:`XLEN]; + 3'b100: PrelimResultM = QuotM; + 3'b101: PrelimResultM = QuotM; + 3'b110: PrelimResultM = RemM; + 3'b111: PrelimResultM = RemM; + endcase // Handle sign extension for W-type instructions + flopenrc #(1) W64MReg(clk, reset, FlushM, ~StallM, W64E, W64M); if (`XLEN == 64) begin // RV64 has W-type instructions - assign MulDivResultE = W64E ? {{32{PrelimResultE[31]}}, PrelimResultE[31:0]} : PrelimResultE; + assign MulDivResultM = W64M ? {{32{PrelimResultM[31]}}, PrelimResultM[31:0]} : PrelimResultM; end else begin // RV32 has no W-type instructions - assign MulDivResultE = PrelimResultE; + assign MulDivResultM = PrelimResultM; end - flopenrc #(`XLEN) MulDivResultMReg(clk, reset, FlushM, ~StallM, MulDivResultE, MulDivResultM); // could let part of multiplication spill into Memory stage flopenrc #(`XLEN) MulDivResultWReg(clk, reset, FlushW, ~StallW, MulDivResultM, MulDivResultW); end else begin // no M instructions supported From 0e0e204d3d3a6460aee63c683ea1ad15a9b473fb Mon Sep 17 00:00:00 2001 From: David Harris Date: Sat, 2 Oct 2021 10:03:02 -0400 Subject: [PATCH 16/36] Moved negating divider otuput to M stage --- .../regression/wave-dos/peripheral-waves.do | 21 +++++- wally-pipelined/src/muldiv/intdivrestoring.sv | 74 ++++++++++--------- wally-pipelined/src/muldiv/muldiv.sv | 11 +-- 3 files changed, 65 insertions(+), 41 deletions(-) diff --git a/wally-pipelined/regression/wave-dos/peripheral-waves.do b/wally-pipelined/regression/wave-dos/peripheral-waves.do index 2da82b869..2362b0511 100644 --- a/wally-pipelined/regression/wave-dos/peripheral-waves.do +++ b/wally-pipelined/regression/wave-dos/peripheral-waves.do @@ -35,6 +35,23 @@ add wave -hex /testbench/dut/hart/ieu/dp/SrcAE add wave -hex /testbench/dut/hart/ieu/dp/SrcBE add wave -hex /testbench/dut/hart/ieu/dp/ALUResultE #add wave /testbench/dut/hart/ieu/dp/PCSrcE +add wave /testbench/dut/hart/mdu/genblk1/div/start +add wave /testbench/dut/hart/mdu/DivBusyE +add wave /testbench/dut/hart/mdu/DivDoneE +add wave -hex /testbench/dut/hart/mdu/genblk1/div/D +add wave -hex /testbench/dut/hart/mdu/genblk1/div/Din +add wave -hex /testbench/dut/hart/mdu/genblk1/div/X +add wave -hex /testbench/dut/hart/mdu/genblk1/div/Win +add wave -hex /testbench/dut/hart/mdu/genblk1/div/XQin +add wave -hex /testbench/dut/hart/mdu/genblk1/div/Wshift +add wave -hex /testbench/dut/hart/mdu/genblk1/div/XQshift +add wave -hex /testbench/dut/hart/mdu/genblk1/div/Wnext +add wave -hex /testbench/dut/hart/mdu/genblk1/div/qi +add wave -hex /testbench/dut/hart/mdu/genblk1/div/Wprime +add wave -hex /testbench/dut/hart/mdu/genblk1/div/W +add wave -hex /testbench/dut/hart/mdu/genblk1/div/XQ +add wave -hex /testbench/dut/hart/mdu/genblk1/div/REM + add wave -divider add wave -hex /testbench/dut/hart/ifu/PCM add wave -hex /testbench/dut/hart/ifu/InstrM @@ -48,9 +65,9 @@ add wave -hex /testbench/dut/hart/lsu/dcache/ReadDataM add wave -hex /testbench/dut/hart/ebu/ReadDataM add wave -divider add wave -hex /testbench/PCW -add wave -hex /testbench/InstrW +#add wave -hex /testbench/InstrW add wave -hex /testbench/dut/hart/ieu/c/InstrValidW -add wave /testbench/InstrWName +#add wave /testbench/InstrWName add wave -hex /testbench/dut/hart/ReadDataW add wave -hex /testbench/dut/hart/ieu/dp/ResultW add wave -hex /testbench/dut/hart/ieu/dp/RegWriteW diff --git a/wally-pipelined/src/muldiv/intdivrestoring.sv b/wally-pipelined/src/muldiv/intdivrestoring.sv index ed78718cf..831fe9d02 100644 --- a/wally-pipelined/src/muldiv/intdivrestoring.sv +++ b/wally-pipelined/src/muldiv/intdivrestoring.sv @@ -28,9 +28,9 @@ module intdivrestoring ( input logic clk, input logic reset, - input logic StallM, - input logic signedDivide, - input logic start, + input logic StallM, FlushM, + input logic SignedDivideE, + input logic StartDivideE, input logic [`XLEN-1:0] X, D, output logic busy, done, output logic [`XLEN-1:0] Q, REM @@ -41,78 +41,84 @@ module intdivrestoring ( localparam STEPBITS = $clog2(`XLEN)-1; logic [STEPBITS:0] step; logic div0; - logic negate, init, startd, SignX, SignD, NegW, NegQ; + logic init, startd, SignX, SignD, NegW, NegQ; + logic SignedDivideM; + // *** add pipe stages to everything // Setup for signed division abs #(`XLEN) absd(D, Dabs); - mux2 #(`XLEN) dabsmux(D, Dabs, signedDivide, D2); - flopen #(`XLEN) dsavereg(clk, start, D2, Dsaved); - mux2 #(`XLEN) dfirstmux(Dsaved, D, start, Din); + mux2 #(`XLEN) dabsmux(D, Dabs, SignedDivideE, D2); + flopen #(`XLEN) dsavereg(clk, StartDivideE, D2, Dsaved); + mux2 #(`XLEN) dfirstmux(Dsaved, D, StartDivideE, Din); abs #(`XLEN) absx(X, Xabs); - mux2 #(`XLEN) xabsmux(X, Xabs, signedDivide & ~div0, X2); // need original X as remainder if doing divide by 0 - flopen #(`XLEN) xsavereg(clk, start, X2, Xsaved); - mux2 #(`XLEN) xfirstmux(Xsaved, X, start, Xinit); + mux2 #(`XLEN) xabsmux(X, Xabs, SignedDivideE & ~div0, X2); // need original X as remainder if doing divide by 0 + flopen #(`XLEN) xsavereg(clk, StartDivideE, X2, Xsaved); + mux2 #(`XLEN) xfirstmux(Xsaved, X, StartDivideE, Xinit); mux2 #(`XLEN) wmux(W, {`XLEN{1'b0}}, init, Win); mux2 #(`XLEN) xmux(XQ, Xinit, init, XQin); assign DAbsB = ~Din; + assign div0 = (Din == 0); // *** eventually replace with just the negedge saved D // *** parameterize steps per cycle intdivrestoringstep step1(Win, XQin, DAbsB, W1, XQ1); - intdivrestoringstep step2(W1, XQ1, DAbsB, W2, XQshift); +// intdivrestoringstep step2(W1, XQ1, DAbsB, W2, XQshift); + intdivrestoringstep step2(W1, XQ1, DAbsB, Wnext, XQnext); // conditionally negate outputs at end of signed operation - // *** move into M stage - neg #(`XLEN) wneg(W, Wn); - mux2 #(`XLEN) wnextmux(W2, Wn, NegW, Wnext); //*** - neg #(`XLEN) qneg(XQ, XQn); - mux2 #(`XLEN) qnextmux(XQshift, XQn, NegQ, XQnext); - flopen #(`XLEN) wreg(clk, start | (busy & (~negate | NegW)), Wnext, W); - flopen #(`XLEN) xreg(clk, start | (busy & (~negate | NegQ)), XQnext, XQ); + +// flopen #(`XLEN) wreg(clk, StartDivideE | (busy & (~negate | NegW)), Wnext, W); +// flopen #(`XLEN) xreg(clk, StartDivideE | (busy & (~negate | NegQ)), XQnext, XQ); + flopen #(`XLEN) wreg(clk, StartDivideE | busy, Wnext, W); // *** could become just busy once start moves to its own cycle + flopen #(`XLEN) xreg(clk, StartDivideE | busy, XQnext, XQ); // outputs - assign div0 = (Din == 0); - mux2 #(`XLEN) qmux(XQ, {`XLEN{1'b1}}, div0, Q); // Q taken from XQ register, or all 1s when dividing by zero - mux2 #(`XLEN) remmux(W, Xsaved, div0, REM); // REM taken from W register, or from X when dividing by zero + neg #(`XLEN) wneg(W, Wn); +// mux2 #(`XLEN) wnextmux(W2, Wn, NegW, Wnext); //*** + neg #(`XLEN) qneg(XQ, XQn); +// mux2 #(`XLEN) qnextmux(XQshift, XQn, NegQ, XQnext); + mux3 #(`XLEN) qmux(XQ, XQn, {`XLEN{1'b1}}, {div0, NegQ}, Q); // Q taken from XQ register, or all 1s when dividing by zero *** + mux3 #(`XLEN) remmux(W, Wn, Xsaved, {div0, NegW}, REM); // REM taken from W register, or from X when dividing by zero // busy logic always_ff @(posedge clk) if (reset) begin - busy = 0; done = 0; step = 0; negate = 0; - end else if (start & ~StallM) begin + busy = 0; done = 0; step = 0; //negate = 0; + end else if (StartDivideE & ~StallM) begin if (div0) done = 1; else begin busy = 1; step = 1; end - end else if (busy & ~done & ~(startd & signedDivide)) begin // pause one cycle at beginning of signed operations for absolute value + end else if (busy & ~done & ~(startd & SignedDivideE)) begin // pause one cycle at beginning of signed operations for absolute value step = step + 1; if (step[STEPBITS]) begin // *** early terminate on division by 0 - if (signedDivide & ~negate) begin +/* if (SignedDivideE & ~negate) begin negate = 1; - end else begin + end else begin*/ step = 0; busy = 0; - negate = 0; + //negate = 0; done = 1; - end + //end end end else if (done) begin done = 0; busy = 0; - negate = 0; + //negate = 0; end // initialize on the start cycle for unsigned operations, or one cycle later for signed operations (giving time for abs) - flop #(1) initflop(clk, start, startd); - mux2 #(1) initmux(start, startd, signedDivide, init); + flop #(1) initflop(clk, StartDivideE, startd); + mux2 #(1) initmux(StartDivideE, startd, SignedDivideE, init); // save signs of original inputs - flopen #(2) signflops(clk, start, {D[`XLEN-1], X[`XLEN-1]}, {SignD, SignX}); + flopenrc #(1) SignedDivideMReg(clk, reset, FlushM, ~StallM, SignedDivideE, SignedDivideM); + flopen #(2) signflops(clk, StartDivideE, {D[`XLEN-1], X[`XLEN-1]}, {SignD, SignX}); // On final setp of signed operations, negate outputs as needed - assign NegW = SignX & negate; - assign NegQ = (SignX ^ SignD) & negate; + assign NegW = SignedDivideM & SignX; // & negate; + assign NegQ = SignedDivideM & (SignX ^ SignD); // & negate; endmodule // muldiv diff --git a/wally-pipelined/src/muldiv/muldiv.sv b/wally-pipelined/src/muldiv/muldiv.sv index 03df97e7c..17e943490 100644 --- a/wally-pipelined/src/muldiv/muldiv.sv +++ b/wally-pipelined/src/muldiv/muldiv.sv @@ -56,7 +56,7 @@ module muldiv ( //logic [`XLEN-1:0] Num0, Den0; // logic gclk; - logic startDivideE, busy; + logic StartDivideE, busy; logic SignedDivideE; logic W64M; @@ -77,12 +77,13 @@ module muldiv ( end assign SignedDivideE = ~Funct3E[0]; // simplified from (Funct3E[2]&~Funct3E[1]&~Funct3E[0]) | (Funct3E[2]&Funct3E[1]&~Funct3E[0]); - //intdiv #(`XLEN) div (QuotE, RemE, DivDoneE, DivBusyE, div0error, N, D, gclk, reset, startDivideE, SignedDivideE); - intdivrestoring div(.clk, .reset, .StallM, .signedDivide(SignedDivideE), .start(startDivideE), .X(X), .D(D), .busy(busy), .done(DivDoneE), .Q(QuotM), .REM(RemM)); + //intdiv #(`XLEN) div (QuotE, RemE, DivDoneE, DivBusyE, div0error, N, D, gclk, reset, StartDivideE, SignedDivideE); + intdivrestoring div(.clk, .reset, .StallM, .FlushM, + .SignedDivideE, .StartDivideE, .X(X), .D(D), .busy(busy), .done(DivDoneE), .Q(QuotM), .REM(RemM)); // Start a divide when a new division instruction is received and the divider isn't already busy or finishing - assign startDivideE = MulDivE & Funct3E[2] & ~busy & ~DivDoneE; // *** mabye DivDone should be M stage - assign DivBusyE = startDivideE | busy; + assign StartDivideE = MulDivE & Funct3E[2] & ~busy & ~DivDoneE; // *** mabye DivDone should be M stage + assign DivBusyE = StartDivideE | busy; // Select result always_comb From d4437b842a072e318598cbd8a50dd702a67a03f5 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sat, 2 Oct 2021 10:13:49 -0400 Subject: [PATCH 17/36] Divider code cleanup --- .../regression/wave-dos/peripheral-waves.do | 2 +- wally-pipelined/src/muldiv/intdivrestoring.sv | 43 +++++++------------ wally-pipelined/src/muldiv/muldiv.sv | 8 ++-- 3 files changed, 20 insertions(+), 33 deletions(-) diff --git a/wally-pipelined/regression/wave-dos/peripheral-waves.do b/wally-pipelined/regression/wave-dos/peripheral-waves.do index 2362b0511..9ff6e4fe2 100644 --- a/wally-pipelined/regression/wave-dos/peripheral-waves.do +++ b/wally-pipelined/regression/wave-dos/peripheral-waves.do @@ -35,7 +35,7 @@ add wave -hex /testbench/dut/hart/ieu/dp/SrcAE add wave -hex /testbench/dut/hart/ieu/dp/SrcBE add wave -hex /testbench/dut/hart/ieu/dp/ALUResultE #add wave /testbench/dut/hart/ieu/dp/PCSrcE -add wave /testbench/dut/hart/mdu/genblk1/div/start +add wave /testbench/dut/hart/mdu/genblk1/div/StartDivideE add wave /testbench/dut/hart/mdu/DivBusyE add wave /testbench/dut/hart/mdu/DivDoneE add wave -hex /testbench/dut/hart/mdu/genblk1/div/D diff --git a/wally-pipelined/src/muldiv/intdivrestoring.sv b/wally-pipelined/src/muldiv/intdivrestoring.sv index 831fe9d02..79a978ef0 100644 --- a/wally-pipelined/src/muldiv/intdivrestoring.sv +++ b/wally-pipelined/src/muldiv/intdivrestoring.sv @@ -32,7 +32,7 @@ module intdivrestoring ( input logic SignedDivideE, input logic StartDivideE, input logic [`XLEN-1:0] X, D, - output logic busy, done, + output logic BusyE, done, output logic [`XLEN-1:0] Q, REM ); @@ -64,49 +64,39 @@ module intdivrestoring ( // *** parameterize steps per cycle intdivrestoringstep step1(Win, XQin, DAbsB, W1, XQ1); -// intdivrestoringstep step2(W1, XQ1, DAbsB, W2, XQshift); intdivrestoringstep step2(W1, XQ1, DAbsB, Wnext, XQnext); - // conditionally negate outputs at end of signed operation - -// flopen #(`XLEN) wreg(clk, StartDivideE | (busy & (~negate | NegW)), Wnext, W); -// flopen #(`XLEN) xreg(clk, StartDivideE | (busy & (~negate | NegQ)), XQnext, XQ); - flopen #(`XLEN) wreg(clk, StartDivideE | busy, Wnext, W); // *** could become just busy once start moves to its own cycle - flopen #(`XLEN) xreg(clk, StartDivideE | busy, XQnext, XQ); + flopen #(`XLEN) wreg(clk, StartDivideE | BusyE, Wnext, W); // *** could become just busy once start moves to its own cycle + flopen #(`XLEN) xreg(clk, StartDivideE | BusyE, XQnext, XQ); // outputs + // On final setp of signed operations, negate outputs as needed + assign NegW = SignedDivideM & SignX; + assign NegQ = SignedDivideM & (SignX ^ SignD); neg #(`XLEN) wneg(W, Wn); -// mux2 #(`XLEN) wnextmux(W2, Wn, NegW, Wnext); //*** neg #(`XLEN) qneg(XQ, XQn); -// mux2 #(`XLEN) qnextmux(XQshift, XQn, NegQ, XQnext); - mux3 #(`XLEN) qmux(XQ, XQn, {`XLEN{1'b1}}, {div0, NegQ}, Q); // Q taken from XQ register, or all 1s when dividing by zero *** - mux3 #(`XLEN) remmux(W, Wn, Xsaved, {div0, NegW}, REM); // REM taken from W register, or from X when dividing by zero + mux3 #(`XLEN) qmux(XQ, XQn, {`XLEN{1'b1}}, {div0, NegQ}, Q); // Q taken from XQ register, negated if necessary, or all 1s when dividing by zero + mux3 #(`XLEN) remmux(W, Wn, Xsaved, {div0, NegW}, REM); // REM taken from W register, negated if necessary, or from X when dividing by zero // busy logic always_ff @(posedge clk) if (reset) begin - busy = 0; done = 0; step = 0; //negate = 0; + BusyE = 0; done = 0; step = 0; end else if (StartDivideE & ~StallM) begin if (div0) done = 1; else begin - busy = 1; step = 1; + BusyE = 1; step = 1; end - end else if (busy & ~done & ~(startd & SignedDivideE)) begin // pause one cycle at beginning of signed operations for absolute value + end else if (BusyE & ~done & ~(startd & SignedDivideE)) begin // pause one cycle at beginning of signed operations for absolute value step = step + 1; - if (step[STEPBITS]) begin // *** early terminate on division by 0 -/* if (SignedDivideE & ~negate) begin - negate = 1; - end else begin*/ + if (step[STEPBITS]) begin step = 0; - busy = 0; - //negate = 0; + BusyE = 0; done = 1; - //end end end else if (done) begin done = 0; - busy = 0; - //negate = 0; + BusyE = 0; end // initialize on the start cycle for unsigned operations, or one cycle later for signed operations (giving time for abs) @@ -115,10 +105,7 @@ module intdivrestoring ( // save signs of original inputs flopenrc #(1) SignedDivideMReg(clk, reset, FlushM, ~StallM, SignedDivideE, SignedDivideM); - flopen #(2) signflops(clk, StartDivideE, {D[`XLEN-1], X[`XLEN-1]}, {SignD, SignX}); - // On final setp of signed operations, negate outputs as needed - assign NegW = SignedDivideM & SignX; // & negate; - assign NegQ = SignedDivideM & (SignX ^ SignD); // & negate; + flopen #(2) signflops(clk, StartDivideE, {D[`XLEN-1], X[`XLEN-1]}, {SignD, SignX}); // *** shouldn't be necessary when capturing inputs properly endmodule // muldiv diff --git a/wally-pipelined/src/muldiv/muldiv.sv b/wally-pipelined/src/muldiv/muldiv.sv index 17e943490..be49bf057 100644 --- a/wally-pipelined/src/muldiv/muldiv.sv +++ b/wally-pipelined/src/muldiv/muldiv.sv @@ -56,7 +56,7 @@ module muldiv ( //logic [`XLEN-1:0] Num0, Den0; // logic gclk; - logic StartDivideE, busy; + logic StartDivideE, BusyE; logic SignedDivideE; logic W64M; @@ -79,11 +79,11 @@ module muldiv ( assign SignedDivideE = ~Funct3E[0]; // simplified from (Funct3E[2]&~Funct3E[1]&~Funct3E[0]) | (Funct3E[2]&Funct3E[1]&~Funct3E[0]); //intdiv #(`XLEN) div (QuotE, RemE, DivDoneE, DivBusyE, div0error, N, D, gclk, reset, StartDivideE, SignedDivideE); intdivrestoring div(.clk, .reset, .StallM, .FlushM, - .SignedDivideE, .StartDivideE, .X(X), .D(D), .busy(busy), .done(DivDoneE), .Q(QuotM), .REM(RemM)); + .SignedDivideE, .StartDivideE, .X(X), .D(D), .BusyE, .done(DivDoneE), .Q(QuotM), .REM(RemM)); // Start a divide when a new division instruction is received and the divider isn't already busy or finishing - assign StartDivideE = MulDivE & Funct3E[2] & ~busy & ~DivDoneE; // *** mabye DivDone should be M stage - assign DivBusyE = StartDivideE | busy; + assign StartDivideE = MulDivE & Funct3E[2] & ~BusyE & ~DivDoneE; // *** mabye DivDone should be M stage + assign DivBusyE = StartDivideE | BusyE; // Select result always_comb From d532bde931009e1768a85068861e6192424e4713 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sat, 2 Oct 2021 10:36:51 -0400 Subject: [PATCH 18/36] Added negative edge triggered flop to save inputs; do absolute value in first cycle for signed division --- wally-pipelined/src/generic/abs.sv | 38 ----------------- wally-pipelined/src/muldiv/intdivrestoring.sv | 42 +++++++++++-------- 2 files changed, 25 insertions(+), 55 deletions(-) delete mode 100644 wally-pipelined/src/generic/abs.sv diff --git a/wally-pipelined/src/generic/abs.sv b/wally-pipelined/src/generic/abs.sv deleted file mode 100644 index 7ddbd38b6..000000000 --- a/wally-pipelined/src/generic/abs.sv +++ /dev/null @@ -1,38 +0,0 @@ -/////////////////////////////////////////// -// neg.sv -// -// Written: David_Harris@hmc.edu 28 September 2021 -// Modified: -// -// Purpose: 2's complement negator -// -// A component of the Wally configurable RISC-V project. -// -// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University -// -// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, -// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software -// is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT -// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -/////////////////////////////////////////// - -`include "wally-config.vh" - -module abs #(parameter WIDTH = 8) ( - input logic [WIDTH-1:0] a, - output logic [WIDTH-1:0] y); - - logic [WIDTH-1:0] minusa; - - // select -a if sign bit of a is 1 - neg #(WIDTH) neg(a, minusa); - mux2 #(WIDTH) absmux(a, minusa, a[WIDTH-1], y); -endmodule - diff --git a/wally-pipelined/src/muldiv/intdivrestoring.sv b/wally-pipelined/src/muldiv/intdivrestoring.sv index 79a978ef0..d6bdea523 100644 --- a/wally-pipelined/src/muldiv/intdivrestoring.sv +++ b/wally-pipelined/src/muldiv/intdivrestoring.sv @@ -36,7 +36,7 @@ module intdivrestoring ( output logic [`XLEN-1:0] Q, REM ); - logic [`XLEN-1:0] W, W2, Win, Wshift, Wprime, Wn, Wnn, Wnext, XQ, XQin, XQshift, XQn, XQnn, XQnext, Dsaved, Din, Dabs, D2, Xabs, X2, Xsaved, Xinit, DAbsB, W1, XQ1; + logic [`XLEN-1:0] W, W2, Win, Wshift, Wprime, Wn, Wnn, Wnext, XQ, XQin, XQshift, XQn, XQnn, XQnext, Dsaved, Din, Dabs, D2, Dn, Xn, Xabs, X2, Xsaved, Xinit, DAbsB, W1, XQ1; logic qi, qib; // curent quotient bit localparam STEPBITS = $clog2(`XLEN)-1; logic [STEPBITS:0] step; @@ -45,22 +45,28 @@ module intdivrestoring ( logic SignedDivideM; // *** add pipe stages to everything - // Setup for signed division - abs #(`XLEN) absd(D, Dabs); - mux2 #(`XLEN) dabsmux(D, Dabs, SignedDivideE, D2); - flopen #(`XLEN) dsavereg(clk, StartDivideE, D2, Dsaved); - mux2 #(`XLEN) dfirstmux(Dsaved, D, StartDivideE, Din); + // save inputs on the negative edge of the execute clock. + // This is unusual practice, but the inputs are not guaranteed to be stable due to some hazard and forwarding logic. + // Saving the inputs is the most hardware-efficient way to fix the issue. + flopen #(`XLEN) dsavereg(~clk, StartDivideE, D, Dsaved); + flopen #(`XLEN) xsavereg(~clk, StartDivideE, X, Xsaved); + assign SignD = Dsaved[`XLEN-1]; // *** do some of these need pipelining for consecutive divides? + assign SignX = Xsaved[`XLEN-1]; + assign div0 = (Dsaved == 0); // *** eventually replace with just the negedge saved D - abs #(`XLEN) absx(X, Xabs); - mux2 #(`XLEN) xabsmux(X, Xabs, SignedDivideE & ~div0, X2); // need original X as remainder if doing divide by 0 - flopen #(`XLEN) xsavereg(clk, StartDivideE, X2, Xsaved); - mux2 #(`XLEN) xfirstmux(Xsaved, X, StartDivideE, Xinit); + // Setup for signed division + neg #(`XLEN) negd(Dsaved, Dn); + mux2 #(`XLEN) dabsmux(Dsaved, Dn, SignedDivideE & SignD, Din); // take absolute value for signed operations + assign DAbsB = ~Din; +// mux2 #(`XLEN) dfirstmux(Dsaved, D, StartDivideE, Din); + + neg #(`XLEN) negx(Xsaved, Xn); + mux2 #(`XLEN) xabsmux(Xsaved, Xn, SignedDivideE & SignX, Xinit); // need original X as remainder if doing divide by 0 +// mux2 #(`XLEN) xfirstmux(Xsaved, X, StartDivideE, Xinit); mux2 #(`XLEN) wmux(W, {`XLEN{1'b0}}, init, Win); mux2 #(`XLEN) xmux(XQ, Xinit, init, XQin); - assign DAbsB = ~Din; - assign div0 = (Din == 0); // *** eventually replace with just the negedge saved D // *** parameterize steps per cycle intdivrestoringstep step1(Win, XQin, DAbsB, W1, XQ1); @@ -71,6 +77,8 @@ module intdivrestoring ( // outputs // On final setp of signed operations, negate outputs as needed + //flopen #(2) signflops(clk, StartDivideE, {D[`XLEN-1], X[`XLEN-1]}, {SignD, SignX}); // *** shouldn't be necessary when capturing inputs properly + assign NegW = SignedDivideM & SignX; assign NegQ = SignedDivideM & (SignX ^ SignD); neg #(`XLEN) wneg(W, Wn); @@ -85,9 +93,9 @@ module intdivrestoring ( end else if (StartDivideE & ~StallM) begin if (div0) done = 1; else begin - BusyE = 1; step = 1; + BusyE = 1; step = 0; end - end else if (BusyE & ~done & ~(startd & SignedDivideE)) begin // pause one cycle at beginning of signed operations for absolute value + end else if (BusyE & ~done) begin // pause one cycle at beginning of signed operations for absolute value step = step + 1; if (step[STEPBITS]) begin step = 0; @@ -98,14 +106,14 @@ module intdivrestoring ( done = 0; BusyE = 0; end + assign init = (step == 0); // initialize on the start cycle for unsigned operations, or one cycle later for signed operations (giving time for abs) - flop #(1) initflop(clk, StartDivideE, startd); - mux2 #(1) initmux(StartDivideE, startd, SignedDivideE, init); +// flop #(1) initflop(clk, StartDivideE, startd); +// mux2 #(1) initmux(StartDivideE, startd, SignedDivideE, init); // save signs of original inputs flopenrc #(1) SignedDivideMReg(clk, reset, FlushM, ~StallM, SignedDivideE, SignedDivideM); - flopen #(2) signflops(clk, StartDivideE, {D[`XLEN-1], X[`XLEN-1]}, {SignD, SignX}); // *** shouldn't be necessary when capturing inputs properly endmodule // muldiv From a86ce5cd37a842636dd69b934b9427f216d583c7 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sat, 2 Oct 2021 10:41:09 -0400 Subject: [PATCH 19/36] Divider code cleanup --- wally-pipelined/src/muldiv/intdivrestoring.sv | 24 +++++++------------ 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/wally-pipelined/src/muldiv/intdivrestoring.sv b/wally-pipelined/src/muldiv/intdivrestoring.sv index d6bdea523..f9c8a735f 100644 --- a/wally-pipelined/src/muldiv/intdivrestoring.sv +++ b/wally-pipelined/src/muldiv/intdivrestoring.sv @@ -54,35 +54,33 @@ module intdivrestoring ( assign SignX = Xsaved[`XLEN-1]; assign div0 = (Dsaved == 0); // *** eventually replace with just the negedge saved D - // Setup for signed division + // Take absolute value for signed operations neg #(`XLEN) negd(Dsaved, Dn); mux2 #(`XLEN) dabsmux(Dsaved, Dn, SignedDivideE & SignD, Din); // take absolute value for signed operations - assign DAbsB = ~Din; -// mux2 #(`XLEN) dfirstmux(Dsaved, D, StartDivideE, Din); - neg #(`XLEN) negx(Xsaved, Xn); mux2 #(`XLEN) xabsmux(Xsaved, Xn, SignedDivideE & SignX, Xinit); // need original X as remainder if doing divide by 0 -// mux2 #(`XLEN) xfirstmux(Xsaved, X, StartDivideE, Xinit); + // Negate D for subtraction + assign DAbsB = ~Din; + + // initialization multiplexers on first cycle of operation (one cycle after start is asserted) mux2 #(`XLEN) wmux(W, {`XLEN{1'b0}}, init, Win); mux2 #(`XLEN) xmux(XQ, Xinit, init, XQin); - // *** parameterize steps per cycle intdivrestoringstep step1(Win, XQin, DAbsB, W1, XQ1); intdivrestoringstep step2(W1, XQ1, DAbsB, Wnext, XQnext); - flopen #(`XLEN) wreg(clk, StartDivideE | BusyE, Wnext, W); // *** could become just busy once start moves to its own cycle - flopen #(`XLEN) xreg(clk, StartDivideE | BusyE, XQnext, XQ); + flopen #(`XLEN) wreg(clk, /*StartDivideE | */BusyE, Wnext, W); // *** could become just busy once start moves to its own cycle + flopen #(`XLEN) xreg(clk, /*StartDivideE | */BusyE, XQnext, XQ); - // outputs + // Output selection logic in Memory Stage // On final setp of signed operations, negate outputs as needed - //flopen #(2) signflops(clk, StartDivideE, {D[`XLEN-1], X[`XLEN-1]}, {SignD, SignX}); // *** shouldn't be necessary when capturing inputs properly - assign NegW = SignedDivideM & SignX; assign NegQ = SignedDivideM & (SignX ^ SignD); neg #(`XLEN) wneg(W, Wn); neg #(`XLEN) qneg(XQ, XQn); + // Select appropriate output: normal, negated, or for divide by zero mux3 #(`XLEN) qmux(XQ, XQn, {`XLEN{1'b1}}, {div0, NegQ}, Q); // Q taken from XQ register, negated if necessary, or all 1s when dividing by zero mux3 #(`XLEN) remmux(W, Wn, Xsaved, {div0, NegW}, REM); // REM taken from W register, negated if necessary, or from X when dividing by zero @@ -108,10 +106,6 @@ module intdivrestoring ( end assign init = (step == 0); - // initialize on the start cycle for unsigned operations, or one cycle later for signed operations (giving time for abs) -// flop #(1) initflop(clk, StartDivideE, startd); -// mux2 #(1) initmux(StartDivideE, startd, SignedDivideE, init); - // save signs of original inputs flopenrc #(1) SignedDivideMReg(clk, reset, FlushM, ~StallM, SignedDivideE, SignedDivideM); From fe69513bb77bd04014846385b82b9647cea4b14f Mon Sep 17 00:00:00 2001 From: David Harris Date: Sat, 2 Oct 2021 20:55:37 -0400 Subject: [PATCH 20/36] Partial divider cleanup --- .../regression/wave-dos/peripheral-waves.do | 7 ++-- wally-pipelined/src/muldiv/intdivrestoring.sv | 35 +++++++++---------- wally-pipelined/src/muldiv/muldiv.sv | 12 +++---- 3 files changed, 27 insertions(+), 27 deletions(-) diff --git a/wally-pipelined/regression/wave-dos/peripheral-waves.do b/wally-pipelined/regression/wave-dos/peripheral-waves.do index 9ff6e4fe2..57eb5babc 100644 --- a/wally-pipelined/regression/wave-dos/peripheral-waves.do +++ b/wally-pipelined/regression/wave-dos/peripheral-waves.do @@ -38,9 +38,9 @@ add wave -hex /testbench/dut/hart/ieu/dp/ALUResultE add wave /testbench/dut/hart/mdu/genblk1/div/StartDivideE add wave /testbench/dut/hart/mdu/DivBusyE add wave /testbench/dut/hart/mdu/DivDoneE -add wave -hex /testbench/dut/hart/mdu/genblk1/div/D +add wave -hex /testbench/dut/hart/mdu/genblk1/div/DE add wave -hex /testbench/dut/hart/mdu/genblk1/div/Din -add wave -hex /testbench/dut/hart/mdu/genblk1/div/X +add wave -hex /testbench/dut/hart/mdu/genblk1/div/XE add wave -hex /testbench/dut/hart/mdu/genblk1/div/Win add wave -hex /testbench/dut/hart/mdu/genblk1/div/XQin add wave -hex /testbench/dut/hart/mdu/genblk1/div/Wshift @@ -50,7 +50,8 @@ add wave -hex /testbench/dut/hart/mdu/genblk1/div/qi add wave -hex /testbench/dut/hart/mdu/genblk1/div/Wprime add wave -hex /testbench/dut/hart/mdu/genblk1/div/W add wave -hex /testbench/dut/hart/mdu/genblk1/div/XQ -add wave -hex /testbench/dut/hart/mdu/genblk1/div/REM +add wave -hex /testbench/dut/hart/mdu/genblk1/div/RemM +add wave -hex /testbench/dut/hart/mdu/genblk1/div/QuotM add wave -divider add wave -hex /testbench/dut/hart/ifu/PCM diff --git a/wally-pipelined/src/muldiv/intdivrestoring.sv b/wally-pipelined/src/muldiv/intdivrestoring.sv index f9c8a735f..45ffbfb58 100644 --- a/wally-pipelined/src/muldiv/intdivrestoring.sv +++ b/wally-pipelined/src/muldiv/intdivrestoring.sv @@ -31,12 +31,12 @@ module intdivrestoring ( input logic StallM, FlushM, input logic SignedDivideE, input logic StartDivideE, - input logic [`XLEN-1:0] X, D, + input logic [`XLEN-1:0] XE, DE, output logic BusyE, done, - output logic [`XLEN-1:0] Q, REM + output logic [`XLEN-1:0] QuotM, RemM ); - logic [`XLEN-1:0] W, W2, Win, Wshift, Wprime, Wn, Wnn, Wnext, XQ, XQin, XQshift, XQn, XQnn, XQnext, Dsaved, Din, Dabs, D2, Dn, Xn, Xabs, X2, Xsaved, Xinit, DAbsB, W1, XQ1; + logic [`XLEN-1:0] W, W2, Win, Wshift, Wprime, Wn, Wnn, Wnext, XQ, XQin, XQshift, XQn, XQnn, XQnext, DSavedE, Din, Dabs, D2, Dn, Xn, Xabs, X2, XSavedE, Xinit, DAbsB, W1, XQ1; logic qi, qib; // curent quotient bit localparam STEPBITS = $clog2(`XLEN)-1; logic [STEPBITS:0] step; @@ -48,17 +48,18 @@ module intdivrestoring ( // save inputs on the negative edge of the execute clock. // This is unusual practice, but the inputs are not guaranteed to be stable due to some hazard and forwarding logic. // Saving the inputs is the most hardware-efficient way to fix the issue. - flopen #(`XLEN) dsavereg(~clk, StartDivideE, D, Dsaved); - flopen #(`XLEN) xsavereg(~clk, StartDivideE, X, Xsaved); - assign SignD = Dsaved[`XLEN-1]; // *** do some of these need pipelining for consecutive divides? - assign SignX = Xsaved[`XLEN-1]; - assign div0 = (Dsaved == 0); // *** eventually replace with just the negedge saved D + flopen #(`XLEN) dsavereg(~clk, StartDivideE, DE, DSavedE); + flopen #(`XLEN) xsavereg(~clk, StartDivideE, XE, XSavedE); + flopenrc #(1) SignedDivideMReg(clk, reset, FlushM, ~StallM, SignedDivideE, SignedDivideM); + assign SignD = DSavedE[`XLEN-1]; // *** do some of these need pipelining for consecutive divides? + assign SignX = XSavedE[`XLEN-1]; + assign div0 = (DSavedE == 0); // *** eventually replace with just the negedge saved D // Take absolute value for signed operations - neg #(`XLEN) negd(Dsaved, Dn); - mux2 #(`XLEN) dabsmux(Dsaved, Dn, SignedDivideE & SignD, Din); // take absolute value for signed operations - neg #(`XLEN) negx(Xsaved, Xn); - mux2 #(`XLEN) xabsmux(Xsaved, Xn, SignedDivideE & SignX, Xinit); // need original X as remainder if doing divide by 0 + neg #(`XLEN) negd(DSavedE, Dn); + mux2 #(`XLEN) dabsmux(DSavedE, Dn, SignedDivideE & SignD, Din); // take absolute value for signed operations + neg #(`XLEN) negx(XSavedE, Xn); + mux2 #(`XLEN) xabsmux(XSavedE, Xn, SignedDivideE & SignX, Xinit); // need original X as remainder if doing divide by 0 // Negate D for subtraction assign DAbsB = ~Din; @@ -71,8 +72,8 @@ module intdivrestoring ( intdivrestoringstep step1(Win, XQin, DAbsB, W1, XQ1); intdivrestoringstep step2(W1, XQ1, DAbsB, Wnext, XQnext); - flopen #(`XLEN) wreg(clk, /*StartDivideE | */BusyE, Wnext, W); // *** could become just busy once start moves to its own cycle - flopen #(`XLEN) xreg(clk, /*StartDivideE | */BusyE, XQnext, XQ); + flopen #(`XLEN) wreg(clk, BusyE, Wnext, W); // *** could become just busy once start moves to its own cycle + flopen #(`XLEN) xreg(clk, BusyE, XQnext, XQ); // Output selection logic in Memory Stage // On final setp of signed operations, negate outputs as needed @@ -81,8 +82,8 @@ module intdivrestoring ( neg #(`XLEN) wneg(W, Wn); neg #(`XLEN) qneg(XQ, XQn); // Select appropriate output: normal, negated, or for divide by zero - mux3 #(`XLEN) qmux(XQ, XQn, {`XLEN{1'b1}}, {div0, NegQ}, Q); // Q taken from XQ register, negated if necessary, or all 1s when dividing by zero - mux3 #(`XLEN) remmux(W, Wn, Xsaved, {div0, NegW}, REM); // REM taken from W register, negated if necessary, or from X when dividing by zero + mux3 #(`XLEN) qmux(XQ, XQn, {`XLEN{1'b1}}, {div0, NegQ}, QuotM); // Q taken from XQ register, negated if necessary, or all 1s when dividing by zero + mux3 #(`XLEN) remmux(W, Wn, XSavedE, {div0, NegW}, RemM); // REM taken from W register, negated if necessary, or from X when dividing by zero // busy logic always_ff @(posedge clk) @@ -106,8 +107,6 @@ module intdivrestoring ( end assign init = (step == 0); - // save signs of original inputs - flopenrc #(1) SignedDivideMReg(clk, reset, FlushM, ~StallM, SignedDivideE, SignedDivideM); endmodule // muldiv diff --git a/wally-pipelined/src/muldiv/muldiv.sv b/wally-pipelined/src/muldiv/muldiv.sv index be49bf057..734965195 100644 --- a/wally-pipelined/src/muldiv/muldiv.sv +++ b/wally-pipelined/src/muldiv/muldiv.sv @@ -52,7 +52,7 @@ module muldiv ( logic enable_q; //logic [2:0] Funct3E_Q; logic div0error; // ***unused - logic [`XLEN-1:0] X, D; + logic [`XLEN-1:0] XE, DE; //logic [`XLEN-1:0] Num0, Den0; // logic gclk; @@ -69,17 +69,17 @@ module muldiv ( // Handle sign extension for W-type instructions if (`XLEN == 64) begin // RV64 has W-type instructions - assign X = W64E ? {{32{SrcAE[31]&SignedDivideE}}, SrcAE[31:0]} : SrcAE; - assign D = W64E ? {{32{SrcBE[31]&SignedDivideE}}, SrcBE[31:0]} : SrcBE; + assign XE = W64E ? {{32{SrcAE[31]&SignedDivideE}}, SrcAE[31:0]} : SrcAE; + assign DE = W64E ? {{32{SrcBE[31]&SignedDivideE}}, SrcBE[31:0]} : SrcBE; end else begin // RV32 has no W-type instructions - assign X = SrcAE; - assign D = SrcBE; + assign XE = SrcAE; + assign DE = SrcBE; end assign SignedDivideE = ~Funct3E[0]; // simplified from (Funct3E[2]&~Funct3E[1]&~Funct3E[0]) | (Funct3E[2]&Funct3E[1]&~Funct3E[0]); //intdiv #(`XLEN) div (QuotE, RemE, DivDoneE, DivBusyE, div0error, N, D, gclk, reset, StartDivideE, SignedDivideE); intdivrestoring div(.clk, .reset, .StallM, .FlushM, - .SignedDivideE, .StartDivideE, .X(X), .D(D), .BusyE, .done(DivDoneE), .Q(QuotM), .REM(RemM)); + .SignedDivideE, .StartDivideE, .XE, .DE, .BusyE, .done(DivDoneE), .QuotM, .RemM); // Start a divide when a new division instruction is received and the divider isn't already busy or finishing assign StartDivideE = MulDivE & Funct3E[2] & ~BusyE & ~DivDoneE; // *** mabye DivDone should be M stage From 775520c05a5b388d7f1d17e7dd43957e0274d719 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sat, 2 Oct 2021 20:57:54 -0400 Subject: [PATCH 21/36] Partial divider cleanup 2 --- wally-pipelined/src/muldiv/intdivrestoring.sv | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/wally-pipelined/src/muldiv/intdivrestoring.sv b/wally-pipelined/src/muldiv/intdivrestoring.sv index 45ffbfb58..894de243d 100644 --- a/wally-pipelined/src/muldiv/intdivrestoring.sv +++ b/wally-pipelined/src/muldiv/intdivrestoring.sv @@ -88,13 +88,14 @@ module intdivrestoring ( // busy logic always_ff @(posedge clk) if (reset) begin - BusyE = 0; done = 0; step = 0; + BusyE = 0; done = 0; step = 0; init = 0; end else if (StartDivideE & ~StallM) begin if (div0) done = 1; else begin - BusyE = 1; step = 0; + BusyE = 1; step = 0; init = 1; end end else if (BusyE & ~done) begin // pause one cycle at beginning of signed operations for absolute value + init = 0; step = step + 1; if (step[STEPBITS]) begin step = 0; @@ -105,7 +106,7 @@ module intdivrestoring ( done = 0; BusyE = 0; end - assign init = (step == 0); + //assign init = (step == 0); endmodule // muldiv From 67690c2ed7e7f732a17ff1ddf05e78ee5556029c Mon Sep 17 00:00:00 2001 From: David Harris Date: Sat, 2 Oct 2021 21:00:13 -0400 Subject: [PATCH 22/36] Partial divider cleanup 3 --- wally-pipelined/src/muldiv/intdivrestoring.sv | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/wally-pipelined/src/muldiv/intdivrestoring.sv b/wally-pipelined/src/muldiv/intdivrestoring.sv index 894de243d..7b3509ea0 100644 --- a/wally-pipelined/src/muldiv/intdivrestoring.sv +++ b/wally-pipelined/src/muldiv/intdivrestoring.sv @@ -36,7 +36,7 @@ module intdivrestoring ( output logic [`XLEN-1:0] QuotM, RemM ); - logic [`XLEN-1:0] W, W2, Win, Wshift, Wprime, Wn, Wnn, Wnext, XQ, XQin, XQshift, XQn, XQnn, XQnext, DSavedE, Din, Dabs, D2, Dn, Xn, Xabs, X2, XSavedE, Xinit, DAbsB, W1, XQ1; + logic [`XLEN-1:0] W, W2, Win, Wshift, Wprime, Wn, Wnn, Wnext, XQ, XQin, XQshift, XQn, XQnn, XQnext, DSavedE, Din, Dabs, D2, DnE, XnE, Xabs, X2, XSavedE, Xinit, DAbsB, W1, XQ1; logic qi, qib; // curent quotient bit localparam STEPBITS = $clog2(`XLEN)-1; logic [STEPBITS:0] step; @@ -56,10 +56,10 @@ module intdivrestoring ( assign div0 = (DSavedE == 0); // *** eventually replace with just the negedge saved D // Take absolute value for signed operations - neg #(`XLEN) negd(DSavedE, Dn); - mux2 #(`XLEN) dabsmux(DSavedE, Dn, SignedDivideE & SignD, Din); // take absolute value for signed operations - neg #(`XLEN) negx(XSavedE, Xn); - mux2 #(`XLEN) xabsmux(XSavedE, Xn, SignedDivideE & SignX, Xinit); // need original X as remainder if doing divide by 0 + neg #(`XLEN) negd(DSavedE, DnE); + mux2 #(`XLEN) dabsmux(DSavedE, DnE, SignedDivideE & SignD, Din); // take absolute value for signed operations + neg #(`XLEN) negx(XSavedE, XnE); + mux2 #(`XLEN) xabsmux(XSavedE, XnE, SignedDivideE & SignX, Xinit); // need original X as remainder if doing divide by 0 // Negate D for subtraction assign DAbsB = ~Din; From 3441991d93999fb5155ec294d0a5f2b7d20ded83 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sat, 2 Oct 2021 21:10:35 -0400 Subject: [PATCH 23/36] Divider mostly cleaned up --- wally-pipelined/src/muldiv/intdivrestoring.sv | 26 +++--------- .../src/muldiv/intdivrestoringstep.sv | 40 +++++++++++++++++++ 2 files changed, 45 insertions(+), 21 deletions(-) create mode 100644 wally-pipelined/src/muldiv/intdivrestoringstep.sv diff --git a/wally-pipelined/src/muldiv/intdivrestoring.sv b/wally-pipelined/src/muldiv/intdivrestoring.sv index 7b3509ea0..e9221cc50 100644 --- a/wally-pipelined/src/muldiv/intdivrestoring.sv +++ b/wally-pipelined/src/muldiv/intdivrestoring.sv @@ -50,10 +50,10 @@ module intdivrestoring ( // Saving the inputs is the most hardware-efficient way to fix the issue. flopen #(`XLEN) dsavereg(~clk, StartDivideE, DE, DSavedE); flopen #(`XLEN) xsavereg(~clk, StartDivideE, XE, XSavedE); - flopenrc #(1) SignedDivideMReg(clk, reset, FlushM, ~StallM, SignedDivideE, SignedDivideM); + flopenrc #(1) SignedDivideMReg(clk, reset, FlushM, ~StallM, SignedDivideE, SignedDivideM); assign SignD = DSavedE[`XLEN-1]; // *** do some of these need pipelining for consecutive divides? assign SignX = XSavedE[`XLEN-1]; - assign div0 = (DSavedE == 0); // *** eventually replace with just the negedge saved D + assign div0 = (DSavedE == 0); // Take absolute value for signed operations neg #(`XLEN) negd(DSavedE, DnE); @@ -72,7 +72,7 @@ module intdivrestoring ( intdivrestoringstep step1(Win, XQin, DAbsB, W1, XQ1); intdivrestoringstep step2(W1, XQ1, DAbsB, Wnext, XQnext); - flopen #(`XLEN) wreg(clk, BusyE, Wnext, W); // *** could become just busy once start moves to its own cycle + flopen #(`XLEN) wreg(clk, BusyE, Wnext, W); flopen #(`XLEN) xreg(clk, BusyE, XQnext, XQ); // Output selection logic in Memory Stage @@ -105,24 +105,8 @@ module intdivrestoring ( end else if (done) begin done = 0; BusyE = 0; - end - //assign init = (step == 0); - + end -endmodule // muldiv - - -module intdivrestoringstep( - input logic [`XLEN-1:0] W, XQ, DAbsB, - output logic [`XLEN-1:0] WOut, XQOut); - - logic [`XLEN-1:0] WShift, WPrime; - logic qi, qib; - - assign {WShift, XQOut} = {W[`XLEN-2:0], XQ, qi}; - assign {qib, WPrime} = {1'b0, WShift} + {1'b1, DAbsB} + 1; // subtractor, carry out determines quotient bit ***replace with add - assign qi = ~qib; - mux2 #(`XLEN) wrestoremux(WShift, WPrime, qi, WOut); -endmodule +endmodule // *** clean up internal signals \ No newline at end of file diff --git a/wally-pipelined/src/muldiv/intdivrestoringstep.sv b/wally-pipelined/src/muldiv/intdivrestoringstep.sv new file mode 100644 index 000000000..3dcf7da50 --- /dev/null +++ b/wally-pipelined/src/muldiv/intdivrestoringstep.sv @@ -0,0 +1,40 @@ +/////////////////////////////////////////// +// intdivrestoringstep.sv +// +// Written: David_Harris@hmc.edu 2 October 2021 +// Modified: +// +// Purpose: Restoring integer division using a shift register and subtractor +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +`include "wally-config.vh" + +module intdivrestoringstep( + input logic [`XLEN-1:0] W, XQ, DAbsB, + output logic [`XLEN-1:0] WOut, XQOut); + + logic [`XLEN-1:0] WShift, WPrime; + logic qi, qib; + + assign {WShift, XQOut} = {W[`XLEN-2:0], XQ, qi}; + assign {qib, WPrime} = {1'b0, WShift} + {1'b1, DAbsB} + 1; // subtractor, carry out determines quotient bit ***replace with add + assign qi = ~qib; + mux2 #(`XLEN) wrestoremux(WShift, WPrime, qi, WOut); +endmodule + From 24bb3f4bafa275bfb2e6491b7fb87b7b13a73f2a Mon Sep 17 00:00:00 2001 From: David Harris Date: Sat, 2 Oct 2021 22:54:01 -0400 Subject: [PATCH 24/36] Added more pipeline stage suffixes to divider --- wally-pipelined/regression/linux-wave.do | 2 +- .../regression/wave-dos/peripheral-waves.do | 1 - wally-pipelined/regression/wave.do | 2 +- wally-pipelined/src/ieu/forward.sv | 4 +- wally-pipelined/src/ieu/ieu.sv | 1 - wally-pipelined/src/muldiv/intdivrestoring.sv | 47 +++++++++++-------- wally-pipelined/src/muldiv/muldiv.sv | 8 ++-- .../src/wally/wallypipelinedhart.sv | 1 - 8 files changed, 35 insertions(+), 31 deletions(-) diff --git a/wally-pipelined/regression/linux-wave.do b/wally-pipelined/regression/linux-wave.do index 10d264d8d..7a0ee7bd9 100644 --- a/wally-pipelined/regression/linux-wave.do +++ b/wally-pipelined/regression/linux-wave.do @@ -176,7 +176,7 @@ add wave -noupdate -group muldiv /testbench/dut/hart/mdu/FlushM add wave -noupdate -group muldiv /testbench/dut/hart/mdu/FlushW add wave -noupdate -group muldiv /testbench/dut/hart/mdu/MulDivResultW add wave -noupdate -group muldiv /testbench/dut/hart/mdu/genblk1/div/start -add wave -noupdate -group muldiv /testbench/dut/hart/mdu/DivDoneE +add wave -noupdate -group muldiv /testbench/dut/hart/mdu/DivDoneM add wave -noupdate -group muldiv /testbench/dut/hart/mdu/DivBusyE add wave -noupdate -group divider /testbench/dut/hart/mdu/genblk1/div/fsm1/CURRENT_STATE add wave -noupdate -group divider /testbench/dut/hart/mdu/genblk1/div/N diff --git a/wally-pipelined/regression/wave-dos/peripheral-waves.do b/wally-pipelined/regression/wave-dos/peripheral-waves.do index 57eb5babc..30f9718fb 100644 --- a/wally-pipelined/regression/wave-dos/peripheral-waves.do +++ b/wally-pipelined/regression/wave-dos/peripheral-waves.do @@ -37,7 +37,6 @@ add wave -hex /testbench/dut/hart/ieu/dp/ALUResultE #add wave /testbench/dut/hart/ieu/dp/PCSrcE add wave /testbench/dut/hart/mdu/genblk1/div/StartDivideE add wave /testbench/dut/hart/mdu/DivBusyE -add wave /testbench/dut/hart/mdu/DivDoneE add wave -hex /testbench/dut/hart/mdu/genblk1/div/DE add wave -hex /testbench/dut/hart/mdu/genblk1/div/Din add wave -hex /testbench/dut/hart/mdu/genblk1/div/XE diff --git a/wally-pipelined/regression/wave.do b/wally-pipelined/regression/wave.do index d176c63f1..8bff207f1 100644 --- a/wally-pipelined/regression/wave.do +++ b/wally-pipelined/regression/wave.do @@ -179,7 +179,7 @@ add wave -noupdate -group muldiv /testbench/dut/hart/mdu/FlushM add wave -noupdate -group muldiv /testbench/dut/hart/mdu/FlushW add wave -noupdate -group muldiv /testbench/dut/hart/mdu/MulDivResultW add wave -noupdate -group muldiv /testbench/dut/hart/mdu/genblk1/div/start -add wave -noupdate -group muldiv /testbench/dut/hart/mdu/DivDoneE +add wave -noupdate -group muldiv /testbench/dut/hart/mdu/DivDoneM add wave -noupdate -group muldiv /testbench/dut/hart/mdu/DivBusyE add wave -noupdate -group divider /testbench/dut/hart/mdu/genblk1/div/fsm1/CURRENT_STATE add wave -noupdate -group divider /testbench/dut/hart/mdu/genblk1/div/N diff --git a/wally-pipelined/src/ieu/forward.sv b/wally-pipelined/src/ieu/forward.sv index 47a649f85..3e25ca7cc 100644 --- a/wally-pipelined/src/ieu/forward.sv +++ b/wally-pipelined/src/ieu/forward.sv @@ -30,7 +30,7 @@ module forward( input logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E, RdE, RdM, RdW, input logic MemReadE, MulDivE, CSRReadE, input logic RegWriteM, RegWriteW, - input logic DivDoneE, DivBusyE, + input logic DivBusyE, input logic FWriteIntE, FWriteIntM, FWriteIntW, input logic SCE, input logic StallD, @@ -54,7 +54,7 @@ module forward( // Stall on dependent operations that finish in Mem Stage and can't bypass in time assign FPUStallD = FWriteIntE & ((Rs1D == RdE) | (Rs2D == RdE)); assign LoadStallD = (MemReadE|SCE) & ((Rs1D == RdE) | (Rs2D == RdE)); - assign MulDivStallD = MulDivE & ((Rs1D == RdE) | (Rs2D == RdE)) /*| DivBusyE */; // *** extend with stalls for divide + assign MulDivStallD = MulDivE & ((Rs1D == RdE) | (Rs2D == RdE)); assign CSRRdStallD = CSRReadE & ((Rs1D == RdE) | (Rs2D == RdE)); endmodule diff --git a/wally-pipelined/src/ieu/ieu.sv b/wally-pipelined/src/ieu/ieu.sv index f2984d7ff..234f767a3 100644 --- a/wally-pipelined/src/ieu/ieu.sv +++ b/wally-pipelined/src/ieu/ieu.sv @@ -73,7 +73,6 @@ module ieu ( input logic FlushD, FlushE, FlushM, FlushW, output logic FPUStallD, LoadStallD, MulDivStallD, CSRRdStallD, output logic PCSrcE, - input logic DivDoneE, input logic DivBusyE, output logic CSRReadM, CSRWriteM, PrivilegedM, output logic CSRWritePendingDEM, diff --git a/wally-pipelined/src/muldiv/intdivrestoring.sv b/wally-pipelined/src/muldiv/intdivrestoring.sv index e9221cc50..8f4947da3 100644 --- a/wally-pipelined/src/muldiv/intdivrestoring.sv +++ b/wally-pipelined/src/muldiv/intdivrestoring.sv @@ -32,7 +32,7 @@ module intdivrestoring ( input logic SignedDivideE, input logic StartDivideE, input logic [`XLEN-1:0] XE, DE, - output logic BusyE, done, + output logic BusyE, DivDoneM, output logic [`XLEN-1:0] QuotM, RemM ); @@ -40,8 +40,8 @@ module intdivrestoring ( logic qi, qib; // curent quotient bit localparam STEPBITS = $clog2(`XLEN)-1; logic [STEPBITS:0] step; - logic div0; - logic init, startd, SignX, SignD, NegW, NegQ; + logic Div0E, Div0M; + logic init, startd, SignXE, SignXM, SignDE, SignDM, NegWM, NegQM; logic SignedDivideM; // *** add pipe stages to everything @@ -50,19 +50,27 @@ module intdivrestoring ( // Saving the inputs is the most hardware-efficient way to fix the issue. flopen #(`XLEN) dsavereg(~clk, StartDivideE, DE, DSavedE); flopen #(`XLEN) xsavereg(~clk, StartDivideE, XE, XSavedE); + assign SignDE = DSavedE[`XLEN-1]; // *** do some of these need pipelining for consecutive divides? + assign SignXE = XSavedE[`XLEN-1]; + assign Div0E = (DSavedE == 0); + + // pipeline registers flopenrc #(1) SignedDivideMReg(clk, reset, FlushM, ~StallM, SignedDivideE, SignedDivideM); - assign SignD = DSavedE[`XLEN-1]; // *** do some of these need pipelining for consecutive divides? - assign SignX = XSavedE[`XLEN-1]; - assign div0 = (DSavedE == 0); + flopenrc #(1) Div0eMReg(clk, reset, FlushM, ~StallM, Div0E, Div0M); + flopenrc #(1) SignDMReg(clk, reset, FlushM, ~StallM, SignDE, SignDM); + flopenrc #(1) SignXMReg(clk, reset, FlushM, ~StallM, SignXE, SignXM); + flopenrc #(`XLEN) XSavedMReg(clk, reset, FlushM, ~StallM, XSavedE, XSavedM); // is this truly necessary? // Take absolute value for signed operations neg #(`XLEN) negd(DSavedE, DnE); - mux2 #(`XLEN) dabsmux(DSavedE, DnE, SignedDivideE & SignD, Din); // take absolute value for signed operations + mux2 #(`XLEN) dabsmux(DSavedE, DnE, SignedDivideE & SignDE, Din); // take absolute value for signed operations neg #(`XLEN) negx(XSavedE, XnE); - mux2 #(`XLEN) xabsmux(XSavedE, XnE, SignedDivideE & SignX, Xinit); // need original X as remainder if doing divide by 0 + mux2 #(`XLEN) xabsmux(XSavedE, XnE, SignedDivideE & SignXE, Xinit); // need original X as remainder if doing divide by 0 // Negate D for subtraction assign DAbsB = ~Din; + // *** merge this into dabsmux if possible + // Put suffixes on Xinit, init->DivInitE, Wn, XQn // initialization multiplexers on first cycle of operation (one cycle after start is asserted) mux2 #(`XLEN) wmux(W, {`XLEN{1'b0}}, init, Win); @@ -77,33 +85,34 @@ module intdivrestoring ( // Output selection logic in Memory Stage // On final setp of signed operations, negate outputs as needed - assign NegW = SignedDivideM & SignX; - assign NegQ = SignedDivideM & (SignX ^ SignD); + assign NegWM = SignedDivideM & SignXM; + assign NegQM = SignedDivideM & (SignXM ^ SignDM); neg #(`XLEN) wneg(W, Wn); neg #(`XLEN) qneg(XQ, XQn); // Select appropriate output: normal, negated, or for divide by zero - mux3 #(`XLEN) qmux(XQ, XQn, {`XLEN{1'b1}}, {div0, NegQ}, QuotM); // Q taken from XQ register, negated if necessary, or all 1s when dividing by zero - mux3 #(`XLEN) remmux(W, Wn, XSavedE, {div0, NegW}, RemM); // REM taken from W register, negated if necessary, or from X when dividing by zero - + mux3 #(`XLEN) qmux(XQ, XQn, {`XLEN{1'b1}}, {Div0M, NegQM}, QuotM); // Q taken from XQ register, negated if necessary, or all 1s when dividing by zero + mux3 #(`XLEN) remmux(W, Wn, XSavedM, {Div0M, NegWM}, RemM); // REM taken from W register, negated if necessary, or from X when dividing by zero + // verify it's really necessary to have XSavedM + // busy logic always_ff @(posedge clk) if (reset) begin - BusyE = 0; done = 0; step = 0; init = 0; + BusyE = 0; DivDoneM = 0; step = 0; init = 0; end else if (StartDivideE & ~StallM) begin - if (div0) done = 1; + if (Div0E) DivDoneM = 1; else begin BusyE = 1; step = 0; init = 1; end - end else if (BusyE & ~done) begin // pause one cycle at beginning of signed operations for absolute value + end else if (BusyE & ~DivDoneM) begin // pause one cycle at beginning of signed operations for absolute value init = 0; step = step + 1; if (step[STEPBITS]) begin step = 0; BusyE = 0; - done = 1; + DivDoneM = 1; end - end else if (done) begin - done = 0; + end else if (DivDoneM) begin + DivDoneM = 0; BusyE = 0; end diff --git a/wally-pipelined/src/muldiv/muldiv.sv b/wally-pipelined/src/muldiv/muldiv.sv index 734965195..7cccf2d72 100644 --- a/wally-pipelined/src/muldiv/muldiv.sv +++ b/wally-pipelined/src/muldiv/muldiv.sv @@ -36,7 +36,6 @@ module muldiv ( // Writeback stage output logic [`XLEN-1:0] MulDivResultW, // Divide Done - output logic DivDoneE, output logic DivBusyE, // hazards input logic StallE, StallM, StallW, FlushM, FlushW @@ -56,7 +55,7 @@ module muldiv ( //logic [`XLEN-1:0] Num0, Den0; // logic gclk; - logic StartDivideE, BusyE; + logic StartDivideE, BusyE, DivDoneM; logic SignedDivideE; logic W64M; @@ -79,10 +78,10 @@ module muldiv ( assign SignedDivideE = ~Funct3E[0]; // simplified from (Funct3E[2]&~Funct3E[1]&~Funct3E[0]) | (Funct3E[2]&Funct3E[1]&~Funct3E[0]); //intdiv #(`XLEN) div (QuotE, RemE, DivDoneE, DivBusyE, div0error, N, D, gclk, reset, StartDivideE, SignedDivideE); intdivrestoring div(.clk, .reset, .StallM, .FlushM, - .SignedDivideE, .StartDivideE, .XE, .DE, .BusyE, .done(DivDoneE), .QuotM, .RemM); + .SignedDivideE, .StartDivideE, .XE, .DE, .BusyE, .DivDoneM, .QuotM, .RemM); // Start a divide when a new division instruction is received and the divider isn't already busy or finishing - assign StartDivideE = MulDivE & Funct3E[2] & ~BusyE & ~DivDoneE; // *** mabye DivDone should be M stage + assign StartDivideE = MulDivE & Funct3E[2] & ~BusyE & ~DivDoneM; assign DivBusyE = StartDivideE | BusyE; // Select result @@ -111,7 +110,6 @@ module muldiv ( end else begin // no M instructions supported assign MulDivResultW = 0; assign DivBusyE = 0; - assign DivDoneE = 0; end endgenerate diff --git a/wally-pipelined/src/wally/wallypipelinedhart.sv b/wally-pipelined/src/wally/wallypipelinedhart.sv index eb5169eb2..8a298594f 100644 --- a/wally-pipelined/src/wally/wallypipelinedhart.sv +++ b/wally-pipelined/src/wally/wallypipelinedhart.sv @@ -88,7 +88,6 @@ module wallypipelinedhart logic InvalidateICacheM, FlushDCacheM; logic PCSrcE; logic CSRWritePendingDEM; - logic DivDoneE; logic DivBusyE; logic RegWriteD; logic LoadStallD, StoreStallD, MulDivStallD, CSRRdStallD; From 371f9d9a4a8672a0f55f99d910512ba565e55953 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 3 Oct 2021 00:06:57 -0400 Subject: [PATCH 25/36] Added more pipeline stage suffixes to divider --- wally-pipelined/src/muldiv/intdivrestoring.sv | 4 ++-- wally-pipelined/src/muldiv/intdivrestoringstep.sv | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/wally-pipelined/src/muldiv/intdivrestoring.sv b/wally-pipelined/src/muldiv/intdivrestoring.sv index 8f4947da3..f0731ea05 100644 --- a/wally-pipelined/src/muldiv/intdivrestoring.sv +++ b/wally-pipelined/src/muldiv/intdivrestoring.sv @@ -63,12 +63,12 @@ module intdivrestoring ( // Take absolute value for signed operations neg #(`XLEN) negd(DSavedE, DnE); - mux2 #(`XLEN) dabsmux(DSavedE, DnE, SignedDivideE & SignDE, Din); // take absolute value for signed operations + mux2 #(`XLEN) dabsmux(DnE, DSavedE, SignedDivideE & SignDE, DAbsB); // take absolute value for signed operations, and negate for subtraction setp neg #(`XLEN) negx(XSavedE, XnE); mux2 #(`XLEN) xabsmux(XSavedE, XnE, SignedDivideE & SignXE, Xinit); // need original X as remainder if doing divide by 0 // Negate D for subtraction - assign DAbsB = ~Din; + //assign DAbsB = ~Din; // *** merge this into dabsmux if possible // Put suffixes on Xinit, init->DivInitE, Wn, XQn diff --git a/wally-pipelined/src/muldiv/intdivrestoringstep.sv b/wally-pipelined/src/muldiv/intdivrestoringstep.sv index 3dcf7da50..8fce91f68 100644 --- a/wally-pipelined/src/muldiv/intdivrestoringstep.sv +++ b/wally-pipelined/src/muldiv/intdivrestoringstep.sv @@ -33,7 +33,7 @@ module intdivrestoringstep( logic qi, qib; assign {WShift, XQOut} = {W[`XLEN-2:0], XQ, qi}; - assign {qib, WPrime} = {1'b0, WShift} + {1'b1, DAbsB} + 1; // subtractor, carry out determines quotient bit ***replace with add + assign {qib, WPrime} = {1'b0, WShift} + {1'b1, DAbsB} /*+ 1*/; // subtractor, carry out determines quotient bit ***replace with add assign qi = ~qib; mux2 #(`XLEN) wrestoremux(WShift, WPrime, qi, WOut); endmodule From 6aa2521959c549c6b1d2aeda9da0e494a6727835 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 3 Oct 2021 00:10:12 -0400 Subject: [PATCH 26/36] Eliminated extra inversion for subtraction in divider --- wally-pipelined/src/muldiv/intdivrestoring.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wally-pipelined/src/muldiv/intdivrestoring.sv b/wally-pipelined/src/muldiv/intdivrestoring.sv index f0731ea05..5cc19bf0d 100644 --- a/wally-pipelined/src/muldiv/intdivrestoring.sv +++ b/wally-pipelined/src/muldiv/intdivrestoring.sv @@ -36,7 +36,7 @@ module intdivrestoring ( output logic [`XLEN-1:0] QuotM, RemM ); - logic [`XLEN-1:0] W, W2, Win, Wshift, Wprime, Wn, Wnn, Wnext, XQ, XQin, XQshift, XQn, XQnn, XQnext, DSavedE, Din, Dabs, D2, DnE, XnE, Xabs, X2, XSavedE, Xinit, DAbsB, W1, XQ1; + logic [`XLEN-1:0] W, W2, Win, Wshift, Wprime, Wn, Wnn, Wnext, XQ, XQin, XQshift, XQn, XQnn, XQnext, DSavedE, Din, Dabs, D2, DnE, XnE, Xabs, X2, XSavedE, XSavedM, Xinit, DAbsB, W1, XQ1; logic qi, qib; // curent quotient bit localparam STEPBITS = $clog2(`XLEN)-1; logic [STEPBITS:0] step; From dcbbee66237f900eea6f931a96e4af71cff342de Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 3 Oct 2021 00:20:35 -0400 Subject: [PATCH 27/36] More divider cleanup --- wally-pipelined/src/generic/adder.sv | 35 +++++++++++++++++++ wally-pipelined/src/muldiv/div.sv | 3 +- wally-pipelined/src/muldiv/intdivrestoring.sv | 7 ++-- .../src/muldiv/intdivrestoringstep.sv | 3 +- 4 files changed, 41 insertions(+), 7 deletions(-) create mode 100644 wally-pipelined/src/generic/adder.sv diff --git a/wally-pipelined/src/generic/adder.sv b/wally-pipelined/src/generic/adder.sv new file mode 100644 index 000000000..77cdf1ba4 --- /dev/null +++ b/wally-pipelined/src/generic/adder.sv @@ -0,0 +1,35 @@ +/////////////////////////////////////////// +// adder.sv +// +// Written: David_Harris@hmc.edu 2 October 2021 +// Modified: +// +// Purpose: Adder +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +`include "wally-config.vh" + +module adder #(parameter WIDTH=8) ( + input logic [WIDTH-1:0] a, b, + output logic [WIDTH-1:0] y); + + assign y = a + b; +endmodule + + diff --git a/wally-pipelined/src/muldiv/div.sv b/wally-pipelined/src/muldiv/div.sv index 30ea394f3..b299af032 100755 --- a/wally-pipelined/src/muldiv/div.sv +++ b/wally-pipelined/src/muldiv/div.sv @@ -278,13 +278,14 @@ module otf #(parameter WIDTH=8) assign QMstar = R1Q; endmodule // otf8 - +/* module adder #(parameter WIDTH=8) (input logic [WIDTH-1:0] a, b, output logic [WIDTH-1:0] y); assign y = a + b; endmodule // adder +*/ module fa (input logic a, b, c, output logic sum, carry); diff --git a/wally-pipelined/src/muldiv/intdivrestoring.sv b/wally-pipelined/src/muldiv/intdivrestoring.sv index 5cc19bf0d..512c00ece 100644 --- a/wally-pipelined/src/muldiv/intdivrestoring.sv +++ b/wally-pipelined/src/muldiv/intdivrestoring.sv @@ -61,16 +61,13 @@ module intdivrestoring ( flopenrc #(1) SignXMReg(clk, reset, FlushM, ~StallM, SignXE, SignXM); flopenrc #(`XLEN) XSavedMReg(clk, reset, FlushM, ~StallM, XSavedE, XSavedM); // is this truly necessary? - // Take absolute value for signed operations + // Take absolute value for signed operations, and negate D to handle subtraction in divider stages neg #(`XLEN) negd(DSavedE, DnE); mux2 #(`XLEN) dabsmux(DnE, DSavedE, SignedDivideE & SignDE, DAbsB); // take absolute value for signed operations, and negate for subtraction setp neg #(`XLEN) negx(XSavedE, XnE); mux2 #(`XLEN) xabsmux(XSavedE, XnE, SignedDivideE & SignXE, Xinit); // need original X as remainder if doing divide by 0 - // Negate D for subtraction - //assign DAbsB = ~Din; - // *** merge this into dabsmux if possible - // Put suffixes on Xinit, init->DivInitE, Wn, XQn + // Put suffixes on Xinit, init->DivInitE, Wn, XQn // initialization multiplexers on first cycle of operation (one cycle after start is asserted) mux2 #(`XLEN) wmux(W, {`XLEN{1'b0}}, init, Win); diff --git a/wally-pipelined/src/muldiv/intdivrestoringstep.sv b/wally-pipelined/src/muldiv/intdivrestoringstep.sv index 8fce91f68..fe32da554 100644 --- a/wally-pipelined/src/muldiv/intdivrestoringstep.sv +++ b/wally-pipelined/src/muldiv/intdivrestoringstep.sv @@ -33,7 +33,8 @@ module intdivrestoringstep( logic qi, qib; assign {WShift, XQOut} = {W[`XLEN-2:0], XQ, qi}; - assign {qib, WPrime} = {1'b0, WShift} + {1'b1, DAbsB} /*+ 1*/; // subtractor, carry out determines quotient bit ***replace with add + adder #(`XLEN+1) wdsub({1'b0, WShift}, {1'b1, DAbsB}, {qib, WPrime}); + //assign {qib, WPrime} = {1'b0, WShift} + {1'b1, DAbsB}; // effective subtractor, carry out determines quotient bit assign qi = ~qib; mux2 #(`XLEN) wrestoremux(WShift, WPrime, qi, WOut); endmodule From a202c705cd446ce208beb23425dd8f0ca7c78912 Mon Sep 17 00:00:00 2001 From: bbracker Date: Sun, 3 Oct 2021 00:30:04 -0400 Subject: [PATCH 28/36] checkpoint generator bugfixes --- .../testvector-generation/GenerateCheckpoint.sh | 6 ++++-- .../linux-testgen/testvector-generation/checkpoint.gdb | 7 +++++-- .../linux-testgen/testvector-generation/parseState.py | 6 +++--- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/wally-pipelined/linux-testgen/testvector-generation/GenerateCheckpoint.sh b/wally-pipelined/linux-testgen/testvector-generation/GenerateCheckpoint.sh index e9bf5167d..bd54523e1 100755 --- a/wally-pipelined/linux-testgen/testvector-generation/GenerateCheckpoint.sh +++ b/wally-pipelined/linux-testgen/testvector-generation/GenerateCheckpoint.sh @@ -10,7 +10,7 @@ customQemu="/courses/e190ax/qemu_sim/rv64_initrd/qemu_experimental/qemu/build/qe # use on other systems #customQemu="qemu-system-riscv64" -instrs=8500000 +instrs=50000000 imageDir="../buildroot-image-output" outDir="../linux-testvectors/checkpoint$instrs" @@ -24,9 +24,11 @@ if [[ $REPLY =~ ^[Yy]$ ]] then mkdir -p $outDir mkdir -p $intermedDir + # Simulate QEMU, parse QEMU trace, run GDB script which logs a bunch of data at the checkpoint ($customQemu -M virt -nographic -bios $imageDir/fw_jump.elf -kernel $imageDir/Image -append "root=/dev/vda ro" -initrd $imageDir/rootfs.cpio -rtc clock=vm -icount shift=1 -d nochain,cpu,in_asm -serial /dev/null -singlestep -gdb tcp::1240 -S 2>&1 1>&2 | ./parse_qemu.py | ./parseNew.py | ./remove_dup.awk > $intermedDir/rawTrace.txt) & riscv64-unknown-elf-gdb -x ./checkpoint.gdb -ex "createCheckpoint $instrs \"$intermedDir\"" - ./fix_mem.py "$intermedDir/ramGDB.txt" "$outDir/ram.txt" + # Post-Process GDB outputs ./parseState.py "$outDir" + ./fix_mem.py "$intermedDir/ramGDB.txt" "$outDir/ram.txt" else echo "You can change the number of instructions by editing the \"instrs\" variable in this script." echo "Have a nice day!" diff --git a/wally-pipelined/linux-testgen/testvector-generation/checkpoint.gdb b/wally-pipelined/linux-testgen/testvector-generation/checkpoint.gdb index 8ffd8e982..1b2c64f16 100755 --- a/wally-pipelined/linux-testgen/testvector-generation/checkpoint.gdb +++ b/wally-pipelined/linux-testgen/testvector-generation/checkpoint.gdb @@ -7,6 +7,9 @@ define createCheckpoint # QEMU must also use TCP port 1240 target extended-remote :1240 + + # QEMU Config + maintenance packet Qqemu.PhyMemMode:1 # Argument Parsing set $statePath=$arg1 @@ -29,14 +32,14 @@ define createCheckpoint # Log all registers to a file printf "GDB storing state to %s\n", $statePath - set logging file $statePath + eval "set logging file %s", $statePath set logging on info all-registers set logging off # Log main memory to a file printf "GDB storing RAM to %s\n", $ramPath - set logging file ../linux-testvectors/intermediate-outputs/ramGDB.txt + eval "set logging file %s", $ramPath set logging on x/134217728xb 0x80000000 set logging off diff --git a/wally-pipelined/linux-testgen/testvector-generation/parseState.py b/wally-pipelined/linux-testgen/testvector-generation/parseState.py index 0a0c8c8b5..cd49ccdf8 100755 --- a/wally-pipelined/linux-testgen/testvector-generation/parseState.py +++ b/wally-pipelined/linux-testgen/testvector-generation/parseState.py @@ -1,5 +1,5 @@ #! /usr/bin/python3 -import sys +import sys, os ################ # Helper Funcs # @@ -29,8 +29,8 @@ print("Begin parsing state.") # Parse Args if len(sys.argv) != 2: sys.exit('Error parseState.py expects 1 arg:\n parseState.py ') -outDir = sys.argv[1] -stateGDBpath = outDir+'/intermediate-outputs/stateGDB1K.txt' +outDir = sys.argv[1]+'/' +stateGDBpath = outDir+'intermediate-outputs/stateGDB.txt' if not os.path.exists(stateGDBpath): sys.exit('Error input file '+stateGDBpath+'not found') From 8f362975691c969b3df734880d7d0ae845ef8790 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 3 Oct 2021 00:32:58 -0400 Subject: [PATCH 29/36] Added suffixes to more divider signals --- .../regression/wave-dos/peripheral-waves.do | 10 ----- wally-pipelined/src/muldiv/intdivrestoring.sv | 41 +++++++++---------- 2 files changed, 20 insertions(+), 31 deletions(-) diff --git a/wally-pipelined/regression/wave-dos/peripheral-waves.do b/wally-pipelined/regression/wave-dos/peripheral-waves.do index 30f9718fb..c90197eda 100644 --- a/wally-pipelined/regression/wave-dos/peripheral-waves.do +++ b/wally-pipelined/regression/wave-dos/peripheral-waves.do @@ -38,17 +38,7 @@ add wave -hex /testbench/dut/hart/ieu/dp/ALUResultE add wave /testbench/dut/hart/mdu/genblk1/div/StartDivideE add wave /testbench/dut/hart/mdu/DivBusyE add wave -hex /testbench/dut/hart/mdu/genblk1/div/DE -add wave -hex /testbench/dut/hart/mdu/genblk1/div/Din add wave -hex /testbench/dut/hart/mdu/genblk1/div/XE -add wave -hex /testbench/dut/hart/mdu/genblk1/div/Win -add wave -hex /testbench/dut/hart/mdu/genblk1/div/XQin -add wave -hex /testbench/dut/hart/mdu/genblk1/div/Wshift -add wave -hex /testbench/dut/hart/mdu/genblk1/div/XQshift -add wave -hex /testbench/dut/hart/mdu/genblk1/div/Wnext -add wave -hex /testbench/dut/hart/mdu/genblk1/div/qi -add wave -hex /testbench/dut/hart/mdu/genblk1/div/Wprime -add wave -hex /testbench/dut/hart/mdu/genblk1/div/W -add wave -hex /testbench/dut/hart/mdu/genblk1/div/XQ add wave -hex /testbench/dut/hart/mdu/genblk1/div/RemM add wave -hex /testbench/dut/hart/mdu/genblk1/div/QuotM diff --git a/wally-pipelined/src/muldiv/intdivrestoring.sv b/wally-pipelined/src/muldiv/intdivrestoring.sv index 512c00ece..e733887ca 100644 --- a/wally-pipelined/src/muldiv/intdivrestoring.sv +++ b/wally-pipelined/src/muldiv/intdivrestoring.sv @@ -36,12 +36,11 @@ module intdivrestoring ( output logic [`XLEN-1:0] QuotM, RemM ); - logic [`XLEN-1:0] W, W2, Win, Wshift, Wprime, Wn, Wnn, Wnext, XQ, XQin, XQshift, XQn, XQnn, XQnext, DSavedE, Din, Dabs, D2, DnE, XnE, Xabs, X2, XSavedE, XSavedM, Xinit, DAbsB, W1, XQ1; - logic qi, qib; // curent quotient bit + logic [`XLEN-1:0] DSavedE, XSavedE, XSavedM, DnE, DAbsBE, XnE, XInitE, WE, XQE, W1E, XQ1E, WNextE, XQNextE, WM, XQM, WnM, XQnM; localparam STEPBITS = $clog2(`XLEN)-1; logic [STEPBITS:0] step; logic Div0E, Div0M; - logic init, startd, SignXE, SignXM, SignDE, SignDM, NegWM, NegQM; + logic DivInitE, SignXE, SignXM, SignDE, SignDM, NegWM, NegQM; logic SignedDivideM; // *** add pipe stages to everything @@ -50,7 +49,7 @@ module intdivrestoring ( // Saving the inputs is the most hardware-efficient way to fix the issue. flopen #(`XLEN) dsavereg(~clk, StartDivideE, DE, DSavedE); flopen #(`XLEN) xsavereg(~clk, StartDivideE, XE, XSavedE); - assign SignDE = DSavedE[`XLEN-1]; // *** do some of these need pipelining for consecutive divides? + assign SignDE = DSavedE[`XLEN-1]; assign SignXE = XSavedE[`XLEN-1]; assign Div0E = (DSavedE == 0); @@ -63,45 +62,45 @@ module intdivrestoring ( // Take absolute value for signed operations, and negate D to handle subtraction in divider stages neg #(`XLEN) negd(DSavedE, DnE); - mux2 #(`XLEN) dabsmux(DnE, DSavedE, SignedDivideE & SignDE, DAbsB); // take absolute value for signed operations, and negate for subtraction setp + mux2 #(`XLEN) dabsmux(DnE, DSavedE, SignedDivideE & SignDE, DAbsBE); // take absolute value for signed operations, and negate for subtraction setp neg #(`XLEN) negx(XSavedE, XnE); - mux2 #(`XLEN) xabsmux(XSavedE, XnE, SignedDivideE & SignXE, Xinit); // need original X as remainder if doing divide by 0 + mux2 #(`XLEN) xabsmux(XSavedE, XnE, SignedDivideE & SignXE, XInitE); // need original X as remainder if doing divide by 0 - // Put suffixes on Xinit, init->DivInitE, Wn, XQn + // Put suffixes on XInitE, init->DivInitE, Wn, XQn // initialization multiplexers on first cycle of operation (one cycle after start is asserted) - mux2 #(`XLEN) wmux(W, {`XLEN{1'b0}}, init, Win); - mux2 #(`XLEN) xmux(XQ, Xinit, init, XQin); + mux2 #(`XLEN) wmux(WM, {`XLEN{1'b0}}, DivInitE, WE); + mux2 #(`XLEN) xmux(XQM, XInitE, DivInitE, XQE); // *** parameterize steps per cycle - intdivrestoringstep step1(Win, XQin, DAbsB, W1, XQ1); - intdivrestoringstep step2(W1, XQ1, DAbsB, Wnext, XQnext); + intdivrestoringstep step1(WE, XQE, DAbsBE, W1E, XQ1E); + intdivrestoringstep step2(W1E, XQ1E, DAbsBE, WNextE, XQNextE); - flopen #(`XLEN) wreg(clk, BusyE, Wnext, W); - flopen #(`XLEN) xreg(clk, BusyE, XQnext, XQ); + flopen #(`XLEN) wreg(clk, BusyE, WNextE, WM); + flopen #(`XLEN) xreg(clk, BusyE, XQNextE, XQM); // Output selection logic in Memory Stage // On final setp of signed operations, negate outputs as needed assign NegWM = SignedDivideM & SignXM; assign NegQM = SignedDivideM & (SignXM ^ SignDM); - neg #(`XLEN) wneg(W, Wn); - neg #(`XLEN) qneg(XQ, XQn); + neg #(`XLEN) wneg(WM, WnM); + neg #(`XLEN) qneg(XQM, XQnM); // Select appropriate output: normal, negated, or for divide by zero - mux3 #(`XLEN) qmux(XQ, XQn, {`XLEN{1'b1}}, {Div0M, NegQM}, QuotM); // Q taken from XQ register, negated if necessary, or all 1s when dividing by zero - mux3 #(`XLEN) remmux(W, Wn, XSavedM, {Div0M, NegWM}, RemM); // REM taken from W register, negated if necessary, or from X when dividing by zero + mux3 #(`XLEN) qmux(XQM, XQnM, {`XLEN{1'b1}}, {Div0M, NegQM}, QuotM); // Q taken from XQ register, negated if necessary, or all 1s when dividing by zero + mux3 #(`XLEN) remmux(WM, WnM, XSavedM, {Div0M, NegWM}, RemM); // REM taken from W register, negated if necessary, or from X when dividing by zero // verify it's really necessary to have XSavedM - // busy logic + // Divider FSM to sequence Init, Busy, and Done always_ff @(posedge clk) if (reset) begin - BusyE = 0; DivDoneM = 0; step = 0; init = 0; + BusyE = 0; DivDoneM = 0; step = 0; DivInitE = 0; end else if (StartDivideE & ~StallM) begin if (Div0E) DivDoneM = 1; else begin - BusyE = 1; step = 0; init = 1; + BusyE = 1; step = 0; DivInitE = 1; end end else if (BusyE & ~DivDoneM) begin // pause one cycle at beginning of signed operations for absolute value - init = 0; + DivInitE = 0; step = step + 1; if (step[STEPBITS]) begin step = 0; From 078ddfd341180a077f85b50dad8add12a8d55c5d Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 3 Oct 2021 00:41:41 -0400 Subject: [PATCH 30/36] Divider cleanup --- wally-pipelined/src/muldiv/intdivrestoring.sv | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/wally-pipelined/src/muldiv/intdivrestoring.sv b/wally-pipelined/src/muldiv/intdivrestoring.sv index e733887ca..15ec9e005 100644 --- a/wally-pipelined/src/muldiv/intdivrestoring.sv +++ b/wally-pipelined/src/muldiv/intdivrestoring.sv @@ -42,8 +42,7 @@ module intdivrestoring ( logic Div0E, Div0M; logic DivInitE, SignXE, SignXM, SignDE, SignDM, NegWM, NegQM; logic SignedDivideM; - // *** add pipe stages to everything - + // save inputs on the negative edge of the execute clock. // This is unusual practice, but the inputs are not guaranteed to be stable due to some hazard and forwarding logic. // Saving the inputs is the most hardware-efficient way to fix the issue. @@ -66,8 +65,6 @@ module intdivrestoring ( neg #(`XLEN) negx(XSavedE, XnE); mux2 #(`XLEN) xabsmux(XSavedE, XnE, SignedDivideE & SignXE, XInitE); // need original X as remainder if doing divide by 0 - // Put suffixes on XInitE, init->DivInitE, Wn, XQn - // initialization multiplexers on first cycle of operation (one cycle after start is asserted) mux2 #(`XLEN) wmux(WM, {`XLEN{1'b0}}, DivInitE, WE); mux2 #(`XLEN) xmux(XQM, XInitE, DivInitE, XQE); @@ -76,6 +73,7 @@ module intdivrestoring ( intdivrestoringstep step1(WE, XQE, DAbsBE, W1E, XQ1E); intdivrestoringstep step2(W1E, XQ1E, DAbsBE, WNextE, XQNextE); + // registers after division steps flopen #(`XLEN) wreg(clk, BusyE, WNextE, WM); flopen #(`XLEN) xreg(clk, BusyE, XQNextE, XQM); From 30ec68d5678d5fc505b2078bb9ccdacf466431d6 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 3 Oct 2021 01:10:15 -0400 Subject: [PATCH 31/36] Parameterized number of bits per cycle for integer division --- .../config/buildroot/wally-config.vh | 4 +++ .../config/busybear/wally-config.vh | 4 +++ .../config/coremark/wally-config.vh | 7 +++++ .../config/coremark_bare/wally-config.vh | 4 +++ wally-pipelined/config/rv32ic/wally-config.vh | 4 +++ .../config/rv32icfd/wally-config.vh | 4 +++ wally-pipelined/config/rv64BP/wally-config.vh | 7 +++++ wally-pipelined/config/rv64ic/wally-config.vh | 4 +++ .../config/rv64icfd/wally-config.vh | 4 +++ .../config/rv64imc/wally-config.vh | 7 +++++ wally-pipelined/src/muldiv/intdivrestoring.sv | 27 ++++++++++++------- .../src/muldiv/intdivrestoringstep.sv | 3 +++ .../testbench/testbench-imperas.sv | 1 + 13 files changed, 70 insertions(+), 10 deletions(-) diff --git a/wally-pipelined/config/buildroot/wally-config.vh b/wally-pipelined/config/buildroot/wally-config.vh index 1b6e030ff..0a59bc3ad 100644 --- a/wally-pipelined/config/buildroot/wally-config.vh +++ b/wally-pipelined/config/buildroot/wally-config.vh @@ -66,6 +66,10 @@ `define ICACHE_WAYSIZEINBYTES 4096 `define ICACHE_BLOCKLENINBITS 256 +// Integer Divider Configuration +// DIV_BITSPERCYCLE must be 1, 2, or 4 +`define DIV_BITSPERCYCLE 4 + // Legal number of PMP entries are 0, 16, or 64 `define PMP_ENTRIES 16 diff --git a/wally-pipelined/config/busybear/wally-config.vh b/wally-pipelined/config/busybear/wally-config.vh index 86385bd88..614cfb2e5 100644 --- a/wally-pipelined/config/busybear/wally-config.vh +++ b/wally-pipelined/config/busybear/wally-config.vh @@ -66,6 +66,10 @@ `define ICACHE_WAYSIZEINBYTES 4096 `define ICACHE_BLOCKLENINBITS 256 +// Integer Divider Configuration +// DIV_BITSPERCYCLE must be 1, 2, or 4 +`define DIV_BITSPERCYCLE 4 + // Legal number of PMP entries are 0, 16, or 64 `define PMP_ENTRIES 16 diff --git a/wally-pipelined/config/coremark/wally-config.vh b/wally-pipelined/config/coremark/wally-config.vh index 32006c690..8b1ae7dc7 100644 --- a/wally-pipelined/config/coremark/wally-config.vh +++ b/wally-pipelined/config/coremark/wally-config.vh @@ -65,6 +65,13 @@ `define ICACHE_WAYSIZEINBYTES 4096 `define ICACHE_BLOCKLENINBITS 256 +// Integer Divider Configuration +// DIV_BITSPERCYCLE must be 1, 2, or 4 +`define DIV_BITSPERCYCLE 4 + +// Legal number of PMP entries are 0, 16, or 64 +`define PMP_ENTRIES 16 + // Address space `define RESET_VECTOR 64'h00000000000100b0 diff --git a/wally-pipelined/config/coremark_bare/wally-config.vh b/wally-pipelined/config/coremark_bare/wally-config.vh index 8f79212b5..be4a83205 100644 --- a/wally-pipelined/config/coremark_bare/wally-config.vh +++ b/wally-pipelined/config/coremark_bare/wally-config.vh @@ -66,6 +66,10 @@ `define ICACHE_WAYSIZEINBYTES 4096 `define ICACHE_BLOCKLENINBITS 256 +// Integer Divider Configuration +// DIV_BITSPERCYCLE must be 1, 2, or 4 +`define DIV_BITSPERCYCLE 4 + // Legal number of PMP entries are 0, 16, or 64 `define PMP_ENTRIES 64 diff --git a/wally-pipelined/config/rv32ic/wally-config.vh b/wally-pipelined/config/rv32ic/wally-config.vh index dfe1c61e0..3280c3759 100644 --- a/wally-pipelined/config/rv32ic/wally-config.vh +++ b/wally-pipelined/config/rv32ic/wally-config.vh @@ -64,6 +64,10 @@ `define ICACHE_WAYSIZEINBYTES 4096 `define ICACHE_BLOCKLENINBITS 256 +// Integer Divider Configuration +// DIV_BITSPERCYCLE must be 1, 2, or 4 +`define DIV_BITSPERCYCLE 4 + // Legal number of PMP entries are 0, 16, or 64 `define PMP_ENTRIES 16 diff --git a/wally-pipelined/config/rv32icfd/wally-config.vh b/wally-pipelined/config/rv32icfd/wally-config.vh index 2f0bc378e..432906c85 100644 --- a/wally-pipelined/config/rv32icfd/wally-config.vh +++ b/wally-pipelined/config/rv32icfd/wally-config.vh @@ -64,6 +64,10 @@ `define ICACHE_WAYSIZEINBYTES 4096 `define ICACHE_BLOCKLENINBITS 256 +// Integer Divider Configuration +// DIV_BITSPERCYCLE must be 1, 2, or 4 +`define DIV_BITSPERCYCLE 4 + // Legal number of PMP entries are 0, 16, or 64 `define PMP_ENTRIES 16 diff --git a/wally-pipelined/config/rv64BP/wally-config.vh b/wally-pipelined/config/rv64BP/wally-config.vh index c189cb0fe..162192495 100644 --- a/wally-pipelined/config/rv64BP/wally-config.vh +++ b/wally-pipelined/config/rv64BP/wally-config.vh @@ -66,6 +66,13 @@ `define ICACHE_WAYSIZEINBYTES 4096 `define ICACHE_BLOCKLENINBITS 256 +// Integer Divider Configuration +// DIV_BITSPERCYCLE must be 1, 2, or 4 +`define DIV_BITSPERCYCLE 4 + +// Legal number of PMP entries are 0, 16, or 64 +`define PMP_ENTRIES 16 + // Address space `define RESET_VECTOR 64'h0000000000000000 diff --git a/wally-pipelined/config/rv64ic/wally-config.vh b/wally-pipelined/config/rv64ic/wally-config.vh index ef935ae2c..518da71c7 100644 --- a/wally-pipelined/config/rv64ic/wally-config.vh +++ b/wally-pipelined/config/rv64ic/wally-config.vh @@ -65,6 +65,10 @@ `define ICACHE_WAYSIZEINBYTES 4096 `define ICACHE_BLOCKLENINBITS 256 +// Integer Divider Configuration +// DIV_BITSPERCYCLE must be 1, 2, or 4 +`define DIV_BITSPERCYCLE 4 + // Legal number of PMP entries are 0, 16, or 64 `define PMP_ENTRIES 64 diff --git a/wally-pipelined/config/rv64icfd/wally-config.vh b/wally-pipelined/config/rv64icfd/wally-config.vh index a91531dd0..d3587ff4d 100644 --- a/wally-pipelined/config/rv64icfd/wally-config.vh +++ b/wally-pipelined/config/rv64icfd/wally-config.vh @@ -66,6 +66,10 @@ `define ICACHE_WAYSIZEINBYTES 4096 `define ICACHE_BLOCKLENINBITS 256 +// Integer Divider Configuration +// DIV_BITSPERCYCLE must be 1, 2, or 4 +`define DIV_BITSPERCYCLE 4 + // Legal number of PMP entries are 0, 16, or 64 `define PMP_ENTRIES 64 diff --git a/wally-pipelined/config/rv64imc/wally-config.vh b/wally-pipelined/config/rv64imc/wally-config.vh index 0a874a72d..437a0040f 100644 --- a/wally-pipelined/config/rv64imc/wally-config.vh +++ b/wally-pipelined/config/rv64imc/wally-config.vh @@ -64,6 +64,13 @@ `define ICACHE_WAYSIZEINBYTES 4096 `define ICACHE_BLOCKLENINBITS 256 +// Integer Divider Configuration +// DIV_BITSPERCYCLE must be 1, 2, or 4 +`define DIV_BITSPERCYCLE 4 + +// Legal number of PMP entries are 0, 16, or 64 +`define PMP_ENTRIES 64 + // Address space `define RESET_VECTOR 64'h0000000080000000 diff --git a/wally-pipelined/src/muldiv/intdivrestoring.sv b/wally-pipelined/src/muldiv/intdivrestoring.sv index 15ec9e005..c579f274e 100644 --- a/wally-pipelined/src/muldiv/intdivrestoring.sv +++ b/wally-pipelined/src/muldiv/intdivrestoring.sv @@ -25,6 +25,8 @@ `include "wally-config.vh" + /* verilator lint_off UNOPTFLAT */ + module intdivrestoring ( input logic clk, input logic reset, @@ -36,8 +38,10 @@ module intdivrestoring ( output logic [`XLEN-1:0] QuotM, RemM ); - logic [`XLEN-1:0] DSavedE, XSavedE, XSavedM, DnE, DAbsBE, XnE, XInitE, WE, XQE, W1E, XQ1E, WNextE, XQNextE, WM, XQM, WnM, XQnM; - localparam STEPBITS = $clog2(`XLEN)-1; + logic [`XLEN-1:0] WE[`DIV_BITSPERCYCLE:0]; + logic [`XLEN-1:0] XQE[`DIV_BITSPERCYCLE:0]; + logic [`XLEN-1:0] DSavedE, XSavedE, XSavedM, DnE, DAbsBE, XnE, XInitE, WM, XQM, WnM, XQnM; + localparam STEPBITS = $clog2(`XLEN/`DIV_BITSPERCYCLE); logic [STEPBITS:0] step; logic Div0E, Div0M; logic DivInitE, SignXE, SignXM, SignDE, SignDM, NegWM, NegQM; @@ -66,16 +70,19 @@ module intdivrestoring ( mux2 #(`XLEN) xabsmux(XSavedE, XnE, SignedDivideE & SignXE, XInitE); // need original X as remainder if doing divide by 0 // initialization multiplexers on first cycle of operation (one cycle after start is asserted) - mux2 #(`XLEN) wmux(WM, {`XLEN{1'b0}}, DivInitE, WE); - mux2 #(`XLEN) xmux(XQM, XInitE, DivInitE, XQE); + mux2 #(`XLEN) wmux(WM, {`XLEN{1'b0}}, DivInitE, WE[0]); + mux2 #(`XLEN) xmux(XQM, XInitE, DivInitE, XQE[0]); - // *** parameterize steps per cycle - intdivrestoringstep step1(WE, XQE, DAbsBE, W1E, XQ1E); - intdivrestoringstep step2(W1E, XQ1E, DAbsBE, WNextE, XQNextE); + // one copy of divstep for each bit produced per cycle + generate + genvar i; + for (i=0; i<`DIV_BITSPERCYCLE; i = i+1) + intdivrestoringstep divstep(WE[i], XQE[i], DAbsBE, WE[i+1], XQE[i+1]); + endgenerate // registers after division steps - flopen #(`XLEN) wreg(clk, BusyE, WNextE, WM); - flopen #(`XLEN) xreg(clk, BusyE, XQNextE, XQM); + flopen #(`XLEN) wreg(clk, BusyE, WE[`DIV_BITSPERCYCLE], WM); + flopen #(`XLEN) xreg(clk, BusyE, XQE[`DIV_BITSPERCYCLE], XQM); // Output selection logic in Memory Stage // On final setp of signed operations, negate outputs as needed @@ -112,4 +119,4 @@ module intdivrestoring ( endmodule -// *** clean up internal signals \ No newline at end of file +/* verilator lint_on UNOPTFLAT */ diff --git a/wally-pipelined/src/muldiv/intdivrestoringstep.sv b/wally-pipelined/src/muldiv/intdivrestoringstep.sv index fe32da554..339695fcf 100644 --- a/wally-pipelined/src/muldiv/intdivrestoringstep.sv +++ b/wally-pipelined/src/muldiv/intdivrestoringstep.sv @@ -25,6 +25,8 @@ `include "wally-config.vh" +/* verilator lint_off UNOPTFLAT */ + module intdivrestoringstep( input logic [`XLEN-1:0] W, XQ, DAbsB, output logic [`XLEN-1:0] WOut, XQOut); @@ -39,3 +41,4 @@ module intdivrestoringstep( mux2 #(`XLEN) wrestoremux(WShift, WPrime, qi, WOut); endmodule +/* verilator lint_on UNOPTFLAT */ diff --git a/wally-pipelined/testbench/testbench-imperas.sv b/wally-pipelined/testbench/testbench-imperas.sv index 50b447039..65fc56810 100644 --- a/wally-pipelined/testbench/testbench-imperas.sv +++ b/wally-pipelined/testbench/testbench-imperas.sv @@ -743,6 +743,7 @@ module riscvassertions(); // Legal number of PMP entries are 0, 16, or 64 initial begin assert (`PMP_ENTRIES == 0 || `PMP_ENTRIES==16 || `PMP_ENTRIES==64) else $error("Illegal number of PMP entries: PMP_ENTRIES must be 0, 16, or 64"); + assert (`DIV_BITSPERCYCLE == 1 || `DIV_BITSPERCYCLE==2 || `DIV_BITSPERCYCLE==4) else $error("Illegal number of divider bits/cycle: DIV_BITSPERCYCLE must be 1, 2, or 4"); assert (`F_SUPPORTED || ~`D_SUPPORTED) else $error("Can't support double without supporting float"); assert (`XLEN == 64 || ~`D_SUPPORTED) else $error("Wally does not yet support D extensions on RV32"); assert (`DCACHE_WAYSIZEINBYTES <= 4096 || `MEM_DCACHE == 0 || `MEM_VIRTMEM == 0) else $error("DCACHE_WAYSIZEINBYTES cannot exceed 4 KiB when caches and vitual memory is enabled (to prevent aliasing)"); From bd61ec544bcc9b1b833ac017c5f12d6a2d1704c0 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 3 Oct 2021 01:12:40 -0400 Subject: [PATCH 32/36] Divider comments cleanup --- wally-pipelined/src/muldiv/intdivrestoring.sv | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/wally-pipelined/src/muldiv/intdivrestoring.sv b/wally-pipelined/src/muldiv/intdivrestoring.sv index c579f274e..b28f63e61 100644 --- a/wally-pipelined/src/muldiv/intdivrestoring.sv +++ b/wally-pipelined/src/muldiv/intdivrestoring.sv @@ -86,14 +86,13 @@ module intdivrestoring ( // Output selection logic in Memory Stage // On final setp of signed operations, negate outputs as needed - assign NegWM = SignedDivideM & SignXM; - assign NegQM = SignedDivideM & (SignXM ^ SignDM); + assign NegWM = SignedDivideM & SignXM; // Remainder should have same sign as X + assign NegQM = SignedDivideM & (SignXM ^ SignDM); // Quotient should be negative if one operand is positive and the other is negative neg #(`XLEN) wneg(WM, WnM); neg #(`XLEN) qneg(XQM, XQnM); // Select appropriate output: normal, negated, or for divide by zero mux3 #(`XLEN) qmux(XQM, XQnM, {`XLEN{1'b1}}, {Div0M, NegQM}, QuotM); // Q taken from XQ register, negated if necessary, or all 1s when dividing by zero mux3 #(`XLEN) remmux(WM, WnM, XSavedM, {Div0M, NegWM}, RemM); // REM taken from W register, negated if necessary, or from X when dividing by zero - // verify it's really necessary to have XSavedM // Divider FSM to sequence Init, Busy, and Done always_ff @(posedge clk) From bf0061be66877682554adc0746a40b0e73a039c4 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 3 Oct 2021 09:42:22 -0400 Subject: [PATCH 33/36] Reduced cycle count for DIVW/DIVUW by two --- wally-pipelined/config/rv64ic/wally-config.vh | 2 +- wally-pipelined/src/muldiv/intdivrestoring.sv | 33 +++++++++++++------ .../src/muldiv/intdivrestoringstep.sv | 7 ++-- wally-pipelined/src/muldiv/muldiv.sv | 14 ++------ .../testbench/testbench-imperas.sv | 12 +++---- 5 files changed, 36 insertions(+), 32 deletions(-) diff --git a/wally-pipelined/config/rv64ic/wally-config.vh b/wally-pipelined/config/rv64ic/wally-config.vh index 518da71c7..bedfc4f3f 100644 --- a/wally-pipelined/config/rv64ic/wally-config.vh +++ b/wally-pipelined/config/rv64ic/wally-config.vh @@ -67,7 +67,7 @@ // Integer Divider Configuration // DIV_BITSPERCYCLE must be 1, 2, or 4 -`define DIV_BITSPERCYCLE 4 +`define DIV_BITSPERCYCLE 1 // Legal number of PMP entries are 0, 16, or 64 `define PMP_ENTRIES 64 diff --git a/wally-pipelined/src/muldiv/intdivrestoring.sv b/wally-pipelined/src/muldiv/intdivrestoring.sv index b28f63e61..a0ddb309e 100644 --- a/wally-pipelined/src/muldiv/intdivrestoring.sv +++ b/wally-pipelined/src/muldiv/intdivrestoring.sv @@ -31,7 +31,7 @@ module intdivrestoring ( input logic clk, input logic reset, input logic StallM, FlushM, - input logic SignedDivideE, + input logic SignedDivideE, W64E, input logic StartDivideE, input logic [`XLEN-1:0] XE, DE, output logic BusyE, DivDoneM, @@ -40,7 +40,7 @@ module intdivrestoring ( logic [`XLEN-1:0] WE[`DIV_BITSPERCYCLE:0]; logic [`XLEN-1:0] XQE[`DIV_BITSPERCYCLE:0]; - logic [`XLEN-1:0] DSavedE, XSavedE, XSavedM, DnE, DAbsBE, XnE, XInitE, WM, XQM, WnM, XQnM; + logic [`XLEN-1:0] DSavedE, XSavedE, XSavedM, DinE, XinE, DnE, DAbsBE, XnE, XInitE, WM, XQM, WnM, XQnM; localparam STEPBITS = $clog2(`XLEN/`DIV_BITSPERCYCLE); logic [STEPBITS:0] step; logic Div0E, Div0M; @@ -52,9 +52,22 @@ module intdivrestoring ( // Saving the inputs is the most hardware-efficient way to fix the issue. flopen #(`XLEN) dsavereg(~clk, StartDivideE, DE, DSavedE); flopen #(`XLEN) xsavereg(~clk, StartDivideE, XE, XSavedE); - assign SignDE = DSavedE[`XLEN-1]; - assign SignXE = XSavedE[`XLEN-1]; - assign Div0E = (DSavedE == 0); + + // Handle sign extension for W-type instructions + generate + if (`XLEN == 64) begin // RV64 has W-type instructions + mux2 #(`XLEN) xinmux(XSavedE, {XSavedE[31:0], 32'b0}, W64E, XinE); + mux2 #(`XLEN) dinmux(DSavedE, {{32{DSavedE[31]&SignedDivideE}}, DSavedE[31:0]}, W64E, DinE); + end else begin // RV32 has no W-type instructions + assign XinE = XSavedE; + assign DinE = DSavedE; + end + endgenerate + + // Extract sign bits and check fo division by zero + assign SignDE = DinE[`XLEN-1]; + assign SignXE = XinE[`XLEN-1]; + assign Div0E = (DinE == 0); // pipeline registers flopenrc #(1) SignedDivideMReg(clk, reset, FlushM, ~StallM, SignedDivideE, SignedDivideM); @@ -64,10 +77,10 @@ module intdivrestoring ( flopenrc #(`XLEN) XSavedMReg(clk, reset, FlushM, ~StallM, XSavedE, XSavedM); // is this truly necessary? // Take absolute value for signed operations, and negate D to handle subtraction in divider stages - neg #(`XLEN) negd(DSavedE, DnE); - mux2 #(`XLEN) dabsmux(DnE, DSavedE, SignedDivideE & SignDE, DAbsBE); // take absolute value for signed operations, and negate for subtraction setp - neg #(`XLEN) negx(XSavedE, XnE); - mux2 #(`XLEN) xabsmux(XSavedE, XnE, SignedDivideE & SignXE, XInitE); // need original X as remainder if doing divide by 0 + neg #(`XLEN) negd(DinE, DnE); + mux2 #(`XLEN) dabsmux(DnE, DinE, SignedDivideE & SignDE, DAbsBE); // take absolute value for signed operations, and negate for subtraction setp + neg #(`XLEN) negx(XinE, XnE); + mux2 #(`XLEN) xabsmux(XinE, XnE, SignedDivideE & SignXE, XInitE); // need original X as remainder if doing divide by 0 // initialization multiplexers on first cycle of operation (one cycle after start is asserted) mux2 #(`XLEN) wmux(WM, {`XLEN{1'b0}}, DivInitE, WE[0]); @@ -106,7 +119,7 @@ module intdivrestoring ( end else if (BusyE & ~DivDoneM) begin // pause one cycle at beginning of signed operations for absolute value DivInitE = 0; step = step + 1; - if (step[STEPBITS]) begin + if (step[STEPBITS] | (`XLEN==64) & W64E & step[STEPBITS-1]) begin // complete in half the time for W-type instructions step = 0; BusyE = 0; DivDoneM = 1; diff --git a/wally-pipelined/src/muldiv/intdivrestoringstep.sv b/wally-pipelined/src/muldiv/intdivrestoringstep.sv index 339695fcf..73c4b5463 100644 --- a/wally-pipelined/src/muldiv/intdivrestoringstep.sv +++ b/wally-pipelined/src/muldiv/intdivrestoringstep.sv @@ -34,11 +34,10 @@ module intdivrestoringstep( logic [`XLEN-1:0] WShift, WPrime; logic qi, qib; - assign {WShift, XQOut} = {W[`XLEN-2:0], XQ, qi}; - adder #(`XLEN+1) wdsub({1'b0, WShift}, {1'b1, DAbsB}, {qib, WPrime}); - //assign {qib, WPrime} = {1'b0, WShift} + {1'b1, DAbsB}; // effective subtractor, carry out determines quotient bit + assign {WShift, XQOut} = {W[`XLEN-2:0], XQ, qi}; // shift W and X/Q left, insert quotient bit at bottom + adder #(`XLEN+1) wdsub({1'b0, WShift}, {1'b1, DAbsB}, {qib, WPrime}); // effective subtractor, carry out determines quotient bit assign qi = ~qib; - mux2 #(`XLEN) wrestoremux(WShift, WPrime, qi, WOut); + mux2 #(`XLEN) wrestoremux(WShift, WPrime, qi, WOut); // if quotient is zero, restore W endmodule /* verilator lint_on UNOPTFLAT */ diff --git a/wally-pipelined/src/muldiv/muldiv.sv b/wally-pipelined/src/muldiv/muldiv.sv index 7cccf2d72..b887fa9dd 100644 --- a/wally-pipelined/src/muldiv/muldiv.sv +++ b/wally-pipelined/src/muldiv/muldiv.sv @@ -65,20 +65,12 @@ module muldiv ( flopenrc #(`XLEN*2) ProdMReg(clk, reset, FlushM, ~StallM, ProdE, ProdM); // Divide - - // Handle sign extension for W-type instructions - if (`XLEN == 64) begin // RV64 has W-type instructions - assign XE = W64E ? {{32{SrcAE[31]&SignedDivideE}}, SrcAE[31:0]} : SrcAE; - assign DE = W64E ? {{32{SrcBE[31]&SignedDivideE}}, SrcBE[31:0]} : SrcBE; - end else begin // RV32 has no W-type instructions - assign XE = SrcAE; - assign DE = SrcBE; - end - + assign XE = SrcAE; + assign DE = SrcBE; assign SignedDivideE = ~Funct3E[0]; // simplified from (Funct3E[2]&~Funct3E[1]&~Funct3E[0]) | (Funct3E[2]&Funct3E[1]&~Funct3E[0]); //intdiv #(`XLEN) div (QuotE, RemE, DivDoneE, DivBusyE, div0error, N, D, gclk, reset, StartDivideE, SignedDivideE); intdivrestoring div(.clk, .reset, .StallM, .FlushM, - .SignedDivideE, .StartDivideE, .XE, .DE, .BusyE, .DivDoneM, .QuotM, .RemM); + .SignedDivideE, .W64E, .StartDivideE, .XE, .DE, .BusyE, .DivDoneM, .QuotM, .RemM); // Start a divide when a new division instruction is received and the divider isn't already busy or finishing assign StartDivideE = MulDivE & Funct3E[2] & ~BusyE & ~DivDoneM; diff --git a/wally-pipelined/testbench/testbench-imperas.sv b/wally-pipelined/testbench/testbench-imperas.sv index 65fc56810..3f6d449e0 100644 --- a/wally-pipelined/testbench/testbench-imperas.sv +++ b/wally-pipelined/testbench/testbench-imperas.sv @@ -161,6 +161,10 @@ string tests32f[] = '{ }; string tests64m[] = '{ + "rv64m/I-REMUW-01", "3000", + "rv64m/I-REMW-01", "3000", + "rv64m/I-DIVUW-01", "3000", + "rv64m/I-DIVW-01", "3000", "rv64m/I-MUL-01", "3000", "rv64m/I-MULH-01", "3000", "rv64m/I-MULHSU-01", "3000", @@ -168,12 +172,8 @@ string tests32f[] = '{ "rv64m/I-MULW-01", "3000", "rv64m/I-DIV-01", "3000", "rv64m/I-DIVU-01", "3000", - "rv64m/I-DIVUW-01", "3000", - "rv64m/I-DIVW-01", "3000", "rv64m/I-REM-01", "3000", - "rv64m/I-REMU-01", "3000", - "rv64m/I-REMUW-01", "3000", - "rv64m/I-REMW-01", "3000" + "rv64m/I-REMU-01", "3000" }; string tests64ic[] = '{ @@ -536,11 +536,11 @@ string tests32f[] = '{ tests = {tests64p,tests64i, tests64periph}; if (`C_SUPPORTED) tests = {tests, tests64ic}; else tests = {tests, tests64iNOc}; - if (`M_SUPPORTED) tests = {tests, tests64m}; if (`F_SUPPORTED) tests = {tests64f, tests}; if (`D_SUPPORTED) tests = {tests64d, tests}; if (`MEM_VIRTMEM) tests = {tests64mmu, tests}; if (`A_SUPPORTED) tests = {tests64a, tests}; + if (`M_SUPPORTED) tests = {tests64m, tests}; end //tests = {tests64a, tests}; end else begin // RV32 From 9809e57d0c4ac4f0041c6fd7e78647292b6ecf83 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 3 Oct 2021 11:11:53 -0400 Subject: [PATCH 34/36] Replacing XE and DE with SrcAE and SrcBE in divider --- wally-pipelined/regression/wave-dos/peripheral-waves.do | 2 -- wally-pipelined/src/muldiv/muldiv.sv | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/wally-pipelined/regression/wave-dos/peripheral-waves.do b/wally-pipelined/regression/wave-dos/peripheral-waves.do index c90197eda..0203836e3 100644 --- a/wally-pipelined/regression/wave-dos/peripheral-waves.do +++ b/wally-pipelined/regression/wave-dos/peripheral-waves.do @@ -37,8 +37,6 @@ add wave -hex /testbench/dut/hart/ieu/dp/ALUResultE #add wave /testbench/dut/hart/ieu/dp/PCSrcE add wave /testbench/dut/hart/mdu/genblk1/div/StartDivideE add wave /testbench/dut/hart/mdu/DivBusyE -add wave -hex /testbench/dut/hart/mdu/genblk1/div/DE -add wave -hex /testbench/dut/hart/mdu/genblk1/div/XE add wave -hex /testbench/dut/hart/mdu/genblk1/div/RemM add wave -hex /testbench/dut/hart/mdu/genblk1/div/QuotM diff --git a/wally-pipelined/src/muldiv/muldiv.sv b/wally-pipelined/src/muldiv/muldiv.sv index b887fa9dd..0667bf82a 100644 --- a/wally-pipelined/src/muldiv/muldiv.sv +++ b/wally-pipelined/src/muldiv/muldiv.sv @@ -67,10 +67,10 @@ module muldiv ( // Divide assign XE = SrcAE; assign DE = SrcBE; - assign SignedDivideE = ~Funct3E[0]; // simplified from (Funct3E[2]&~Funct3E[1]&~Funct3E[0]) | (Funct3E[2]&Funct3E[1]&~Funct3E[0]); + assign SignedDivideE = ~Funct3E[0]; //intdiv #(`XLEN) div (QuotE, RemE, DivDoneE, DivBusyE, div0error, N, D, gclk, reset, StartDivideE, SignedDivideE); intdivrestoring div(.clk, .reset, .StallM, .FlushM, - .SignedDivideE, .W64E, .StartDivideE, .XE, .DE, .BusyE, .DivDoneM, .QuotM, .RemM); + .SignedDivideE, .W64E, .StartDivideE, .XE(SrcAE), .DE(SrcBE), .BusyE, .DivDoneM, .QuotM, .RemM); // Start a divide when a new division instruction is received and the divider isn't already busy or finishing assign StartDivideE = MulDivE & Funct3E[2] & ~BusyE & ~DivDoneM; From 3398328bf1fed1852c0a94b0f6aa6a7b3958b63d Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 3 Oct 2021 11:16:48 -0400 Subject: [PATCH 35/36] Divider cleanup --- wally-pipelined/src/muldiv/muldiv.sv | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/wally-pipelined/src/muldiv/muldiv.sv b/wally-pipelined/src/muldiv/muldiv.sv index 0667bf82a..dac98f3ee 100644 --- a/wally-pipelined/src/muldiv/muldiv.sv +++ b/wally-pipelined/src/muldiv/muldiv.sv @@ -48,35 +48,23 @@ module muldiv ( logic [`XLEN-1:0] QuotM, RemM; logic [`XLEN*2-1:0] ProdE, ProdM; - logic enable_q; - //logic [2:0] Funct3E_Q; - logic div0error; // ***unused - logic [`XLEN-1:0] XE, DE; - //logic [`XLEN-1:0] Num0, Den0; - - // logic gclk; logic StartDivideE, BusyE, DivDoneM; logic SignedDivideE; logic W64M; - // Multiplier mul mul(.*); flopenrc #(`XLEN*2) ProdMReg(clk, reset, FlushM, ~StallM, ProdE, ProdM); // Divide - assign XE = SrcAE; - assign DE = SrcBE; - assign SignedDivideE = ~Funct3E[0]; - //intdiv #(`XLEN) div (QuotE, RemE, DivDoneE, DivBusyE, div0error, N, D, gclk, reset, StartDivideE, SignedDivideE); - intdivrestoring div(.clk, .reset, .StallM, .FlushM, - .SignedDivideE, .W64E, .StartDivideE, .XE(SrcAE), .DE(SrcBE), .BusyE, .DivDoneM, .QuotM, .RemM); - // Start a divide when a new division instruction is received and the divider isn't already busy or finishing assign StartDivideE = MulDivE & Funct3E[2] & ~BusyE & ~DivDoneM; assign DivBusyE = StartDivideE | BusyE; + assign SignedDivideE = ~Funct3E[0]; + intdivrestoring div(.clk, .reset, .StallM, .FlushM, + .SignedDivideE, .W64E, .StartDivideE, .XE(SrcAE), .DE(SrcBE), .BusyE, .DivDoneM, .QuotM, .RemM); - // Select result + // Result multiplexer always_comb case (Funct3M) 3'b000: PrelimResultM = ProdM[`XLEN-1:0]; @@ -97,6 +85,8 @@ module muldiv ( assign MulDivResultM = PrelimResultM; end + // Writeback stage pipeline register + flopenrc #(`XLEN) MulDivResultWReg(clk, reset, FlushW, ~StallW, MulDivResultM, MulDivResultW); end else begin // no M instructions supported From cc41d40d612419e7d2ef3615c579c9ef8ec36a21 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 3 Oct 2021 11:22:34 -0400 Subject: [PATCH 36/36] Divider cleaup --- wally-pipelined/src/muldiv/intdivrestoring.sv | 6 +++--- wally-pipelined/src/muldiv/muldiv.sv | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/wally-pipelined/src/muldiv/intdivrestoring.sv b/wally-pipelined/src/muldiv/intdivrestoring.sv index a0ddb309e..5ab9b2b8c 100644 --- a/wally-pipelined/src/muldiv/intdivrestoring.sv +++ b/wally-pipelined/src/muldiv/intdivrestoring.sv @@ -33,7 +33,7 @@ module intdivrestoring ( input logic StallM, FlushM, input logic SignedDivideE, W64E, input logic StartDivideE, - input logic [`XLEN-1:0] XE, DE, + input logic [`XLEN-1:0] SrcAE, SrcBE, output logic BusyE, DivDoneM, output logic [`XLEN-1:0] QuotM, RemM ); @@ -50,8 +50,8 @@ module intdivrestoring ( // save inputs on the negative edge of the execute clock. // This is unusual practice, but the inputs are not guaranteed to be stable due to some hazard and forwarding logic. // Saving the inputs is the most hardware-efficient way to fix the issue. - flopen #(`XLEN) dsavereg(~clk, StartDivideE, DE, DSavedE); - flopen #(`XLEN) xsavereg(~clk, StartDivideE, XE, XSavedE); + flopen #(`XLEN) xsavereg(~clk, StartDivideE, SrcAE, XSavedE); + flopen #(`XLEN) dsavereg(~clk, StartDivideE, SrcBE, DSavedE); // Handle sign extension for W-type instructions generate diff --git a/wally-pipelined/src/muldiv/muldiv.sv b/wally-pipelined/src/muldiv/muldiv.sv index dac98f3ee..8ffe91e9e 100644 --- a/wally-pipelined/src/muldiv/muldiv.sv +++ b/wally-pipelined/src/muldiv/muldiv.sv @@ -62,7 +62,7 @@ module muldiv ( assign DivBusyE = StartDivideE | BusyE; assign SignedDivideE = ~Funct3E[0]; intdivrestoring div(.clk, .reset, .StallM, .FlushM, - .SignedDivideE, .W64E, .StartDivideE, .XE(SrcAE), .DE(SrcBE), .BusyE, .DivDoneM, .QuotM, .RemM); + .SignedDivideE, .W64E, .StartDivideE, .SrcAE, .SrcBE, .BusyE, .DivDoneM, .QuotM, .RemM); // Result multiplexer always_comb @@ -86,7 +86,7 @@ module muldiv ( end // Writeback stage pipeline register - + flopenrc #(`XLEN) MulDivResultWReg(clk, reset, FlushW, ~StallW, MulDivResultM, MulDivResultW); end else begin // no M instructions supported