From 7249295f53e4021f3d155de17f7b56452f00b0d8 Mon Sep 17 00:00:00 2001 From: cturek Date: Mon, 27 Jun 2022 23:55:21 +0000 Subject: [PATCH 01/10] Updated radix 2 divider to work with integers and floats in new structure. Integers still might not work. --- addins/riscv-arch-test | 2 +- pipelined/config/shared/wally-shared.vh | 6 +++--- pipelined/srt/exptestgen.c | 2 +- pipelined/srt/srt.sv | 19 +++++++++---------- pipelined/srt/testbench.sv | 12 ++++++------ 5 files changed, 20 insertions(+), 21 deletions(-) diff --git a/addins/riscv-arch-test b/addins/riscv-arch-test index be67c99bd..307c77b26 160000 --- a/addins/riscv-arch-test +++ b/addins/riscv-arch-test @@ -1 +1 @@ -Subproject commit be67c99bd461742aa1c100bcc0732657faae2230 +Subproject commit 307c77b26e070ae85ffea665ad9b642b40e33c86 diff --git a/pipelined/config/shared/wally-shared.vh b/pipelined/config/shared/wally-shared.vh index 51c45ef00..5db8af1cf 100644 --- a/pipelined/config/shared/wally-shared.vh +++ b/pipelined/config/shared/wally-shared.vh @@ -94,9 +94,9 @@ `define BIAS2 ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? `S_BIAS : `H_BIAS) // largest length in IEU/FPU -`define CVTLEN ((`NF<`XLEN) ? `XLEN : `NF) -`define DIVLEN ((`NF < `XLEN) ? `XLEN : `NF) -`define LLEN ((`FLEN<`XLEN) ? `XLEN : `FLEN) +`define CVTLEN ((`NF<`XLEN) ? (`XLEN) : (`NF)) +`define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF)) +`define LLEN ((`FLEN<`XLEN) ? (`XLEN) : (`FLEN)) `define LOGCVTLEN $unsigned($clog2(`CVTLEN+1)) `define NORMSHIFTSZ ((`DIVLEN+`NF+3) > (3*`NF+8) ? (`DIVLEN+`NF+3) : (3*`NF+9)) `define CORRSHIFTSZ ((`DIVLEN+`NF+3) > (3*`NF+8) ? (`DIVLEN+`NF+3) : (3*`NF+6)) diff --git a/pipelined/srt/exptestgen.c b/pipelined/srt/exptestgen.c index bd51126e7..61fe74aa4 100644 --- a/pipelined/srt/exptestgen.c +++ b/pipelined/srt/exptestgen.c @@ -46,7 +46,7 @@ void main(void) int i, j; int bias = 1023; - if ((fptr = fopen("testvectors","w")) == NULL) { + if ((fptr = fopen("testvectors","w")) == NULL) { fprintf(stderr, "Couldn't write testvectors file\n"); exit(1); } diff --git a/pipelined/srt/srt.sv b/pipelined/srt/srt.sv index e40f27589..5adeced47 100644 --- a/pipelined/srt/srt.sv +++ b/pipelined/srt/srt.sv @@ -2,7 +2,7 @@ // srt.sv // // Written: David_Harris@hmc.edu 13 January 2022 -// Modified: +// Modified: cturek@hmc.edu June 2022 // // Purpose: Combined Divide and Square Root Floating Point and Integer Unit // @@ -29,10 +29,8 @@ //////////////////////////////////////////////////////////////////////////////////////////////// `include "wally-config.vh" - -`define DIVLEN ((`NF<(`XLEN+1)) ? (`XLEN + 1) : `NF) -`define EXTRAFRACBITS ((`NF<(`XLEN+1)) ? (`XLEN - `NF + 1) : 0) -`define EXTRAINTBITS ((`NF<(`XLEN+1)) ? 0 : (`NF - `XLEN)) +`define EXTRAFRACBITS ((`NF<(`XLEN)) ? (`XLEN - `NF) : 0) +`define EXTRAINTBITS ((`NF<(`XLEN)) ? 0 : (`NF - `XLEN)) module srt ( input logic clk, @@ -131,11 +129,11 @@ module srtpreproc ( lzc #(`XLEN) lzcA (PosA, zeroCntA); lzc #(`XLEN) lzcB (PosB, zeroCntB); - assign ExtraA = {1'b0, PosA, {`EXTRAINTBITS{1'b0}}}; - assign ExtraB = {1'b0, PosB, {`EXTRAINTBITS{1'b0}}}; + assign ExtraA = {PosA, {`EXTRAINTBITS{1'b0}}}; + assign ExtraB = {PosB, {`EXTRAINTBITS{1'b0}}}; assign PreprocA = ExtraA << zeroCntA; - assign PreprocB = ExtraB << (zeroCntB + 1); + assign PreprocB = ExtraB << zeroCntB; assign PreprocX = {SrcXFrac, {`EXTRAFRACBITS{1'b0}}}; assign PreprocY = {SrcYFrac, {`EXTRAFRACBITS{1'b0}}}; @@ -228,14 +226,15 @@ module otfc2 #(parameter N=65) ( // // QM is Q-1. It allows us to write negative bits // without using a costly CPA. - logic [N+2:0] Q, QM, QNext, QMNext; + logic [N+2:0] Q, QM, QNext, QMNext, QMMux; // QR and QMR are the shifted versions of Q and QM. // They are treated as [N-1:r] size signals, and // discard the r most significant bits of Q and QM. logic [N+1:0] QR, QMR; flopr #(N+3) Qreg(clk, Start, QNext, Q); - flopr #(N+3) QMreg(clk, Start, QMNext, QM); + mux2 #(`DIVLEN+3) QMmux(QMNext, {`DIVLEN+3{1'b1}}, Start, QMMux); + flop #(`DIVLEN+3) QMreg(clk, QMMux, QM); always_comb begin QR = Q[N+1:0]; diff --git a/pipelined/srt/testbench.sv b/pipelined/srt/testbench.sv index 93da74752..9655d7f70 100644 --- a/pipelined/srt/testbench.sv +++ b/pipelined/srt/testbench.sv @@ -1,4 +1,4 @@ -`define DIVLEN 65 +`define DIVLEN 64 ///////////// // counter // @@ -17,7 +17,7 @@ module counter(input logic clk, always @(posedge clk) begin - if (count == `DIVLEN+1) done <= #1 1; + if (count == `DIVLEN + 2) done <= #1 1; else if (done | req) done <= #1 0; if (req) count <= #1 0; else count <= #1 count+1; @@ -101,8 +101,8 @@ module testbench; b = Vec[`memb]; {bsign, bExp, bfrac} = b; nextr = Vec[`memr]; - r = Quot[`DIVLEN:`DIVLEN - 52]; - rOTFC = QuotOTFC[`DIVLEN:`DIVLEN - 52]; + r = Quot[(`DIVLEN - 1):(`DIVLEN - 52)]; + rOTFC = QuotOTFC[(`DIVLEN - 1):(`DIVLEN - 52)]; req <= #5 1; end @@ -110,8 +110,8 @@ module testbench; always @(posedge clk) begin - r = Quot[`DIVLEN:`DIVLEN - 52]; - rOTFC = QuotOTFC[`DIVLEN:`DIVLEN - 52]; + r = Quot[(`DIVLEN - 1):(`DIVLEN - 52)]; + rOTFC = QuotOTFC[(`DIVLEN - 1):(`DIVLEN - 52)]; if (done) begin req <= #5 1; From 0417a6a45b660e7415bcef23365e9db59f28add6 Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Tue, 28 Jun 2022 00:16:22 +0000 Subject: [PATCH 02/10] very basic early termination passes testfloat 64-bit tests --- pipelined/src/fpu/divshiftcalc.sv | 25 +++++++++++-------------- pipelined/src/fpu/lzacorrection.sv | 14 +++++++++++++- pipelined/src/fpu/postprocess.sv | 7 +++++-- 3 files changed, 29 insertions(+), 17 deletions(-) diff --git a/pipelined/src/fpu/divshiftcalc.sv b/pipelined/src/fpu/divshiftcalc.sv index d867efc44..51698590e 100644 --- a/pipelined/src/fpu/divshiftcalc.sv +++ b/pipelined/src/fpu/divshiftcalc.sv @@ -7,16 +7,15 @@ module divshiftcalc( input logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2M, output logic [$clog2(`NORMSHIFTSZ)-1:0] DivShiftAmt, output logic [`NORMSHIFTSZ-1:0] DivShiftIn, - output logic [`NE+1:0] CorrDivExp + output logic DivResDenorm, + output logic [`NE+1:0] DivDenormShift ); - logic ResDenorm; - logic [`NE+1:0] DenormShift; logic [`NE+1:0] NormShift; logic [`NE+1:0] Nf, NfPlus1; // is the result denromalized // if the exponent is 1 then the result needs to be normalized then the result is denormalizes - assign ResDenorm = DivCalcExpM[`NE+1]|(~|DivCalcExpM[`NE+1:1]&~(DivCalcExpM[0]&Quot[`DIVLEN+2])); + assign DivResDenorm = DivCalcExpM[`NE+1]|(~|DivCalcExpM[`NE+1:0]); // select the proper fraction lengnth if (`FPSIZES == 1) begin assign Nf = (`NE+2)'(`NF); @@ -70,24 +69,22 @@ module divshiftcalc( // if the result is denormalized // 00000000x.xxxxxx... Exp = DivCalcExp // .00000000xxxxxxx... >> NF+1 Exp = DivCalcExp+NF+1 - // .000xxxxxxxxxxxx... << DivCalcExp+NF+1 Exp = 0 + // .00xxxxxxxxxxxxx... << DivCalcExp+NF+1 Exp = +1 // .0000xxxxxxxxxxx... >> 1 Exp = 1 // Left shift amount = DivCalcExp+NF+1-1 - assign DenormShift = Nf+DivCalcExpM; + assign DivDenormShift = Nf+DivCalcExpM; // if the result is normalized // 00000000x.xxxxxx... Exp = DivCalcExp // .00000000xxxxxxx... >> NF+1 Exp = DivCalcExp+NF+1 - // 00000000x.xxxxxx... << NF+1 Exp = DivCalcExp - // 00000000xx.xxxxx... << 1? Exp = DivCalcExp-1 - // Left shift amount = NF+1 plus 1 if normalization required - assign NormShift = NfPlus1 + {(`NE+1)'(0), ~Quot[`DIVLEN+2]}; + // 00000000.xxxxxxx... << NF Exp = DivCalcExp+1 + // 00000000x.xxxxxx... << NF Exp = DivCalcExp (extra shift done afterwards) + // 00000000xx.xxxxx... << 1? Exp = DivCalcExp-1 (determined after) + // inital Left shift amount = NF + assign NormShift = Nf; // if the shift amount is negitive then dont shift (keep sticky bit) - assign DivShiftAmt = (ResDenorm ? DenormShift[$clog2(`NORMSHIFTSZ)-1:0]&{$clog2(`NORMSHIFTSZ){~DenormShift[`NE+1]}} : NormShift[$clog2(`NORMSHIFTSZ)-1:0])+{{$clog2(`NORMSHIFTSZ)-$clog2(`DIVLEN/2+3)-1{1'b0}}, EarlyTermShiftDiv2M, 1'b0}; + assign DivShiftAmt = (DivResDenorm ? DivDenormShift[$clog2(`NORMSHIFTSZ)-1:0]&{$clog2(`NORMSHIFTSZ){~DivDenormShift[`NE+1]}} : NormShift[$clog2(`NORMSHIFTSZ)-1:0])+{{$clog2(`NORMSHIFTSZ)-$clog2(`DIVLEN/2+3)-1{1'b0}}, EarlyTermShiftDiv2M&{$clog2(`DIVLEN/2+3){~DivDenormShift[`NE+1]}}, 1'b0}; // *** may be able to reduce shifter size assign DivShiftIn = {{`NF{1'b0}}, Quot[`DIVLEN+2:0], {`NORMSHIFTSZ-`DIVLEN-3-`NF{1'b0}}}; - // the quotent is in the range [.5,2) if there is no early termination - // if the quotent < 1 and not denormal then subtract 1 to account for the normalization shift - assign CorrDivExp = (ResDenorm&~DenormShift[`NE+1]) ? (`NE+2)'(0) : DivCalcExpM - {(`NE+1)'(0), ~Quot[`DIVLEN+2]}; endmodule diff --git a/pipelined/src/fpu/lzacorrection.sv b/pipelined/src/fpu/lzacorrection.sv index f06dd84a9..e5a2d5c34 100644 --- a/pipelined/src/fpu/lzacorrection.sv +++ b/pipelined/src/fpu/lzacorrection.sv @@ -3,14 +3,20 @@ module lzacorrection( input logic [`NORMSHIFTSZ-1:0] Shifted, // the shifted sum before LZA correction input logic FmaOp, + input logic DivOp, + input logic DivResDenorm, + input logic [`NE+1:0] DivCalcExpM, + input logic [`NE+1:0] DivDenormShift, input logic [`NE+1:0] ConvNormSumExp, // exponent of the normalized sum not taking into account denormal or zero results input logic PreResultDenorm, // is the result denormalized - calculated before LZA corection input logic KillProdM, // is the product set to zero input logic SumZero, output logic [`CORRSHIFTSZ-1:0] CorrShifted, // the shifted sum before LZA correction + output logic [`NE+1:0] CorrDivExp, output logic [`NE+1:0] SumExp // exponent of the normalized sum ); logic [3*`NF+5:0] CorrSumShifted; // the shifted sum after LZA correction + logic [`CORRSHIFTSZ:0] CorrQuotShifted; logic ResDenorm; // is the result denormalized logic LZAPlus1, LZAPlus2; // add one or two to the sum's exponent due to LZA correction @@ -19,11 +25,17 @@ module lzacorrection( assign LZAPlus2 = Shifted[`NORMSHIFTSZ-1]; // the only possible mantissa for a plus two is all zeroes - a one has to propigate all the way through a sum. so we can leave the bottom statement alone assign CorrSumShifted = LZAPlus1 ? Shifted[`NORMSHIFTSZ-3:1] : Shifted[`NORMSHIFTSZ-4:0]; - assign CorrShifted = FmaOp ? {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}} : Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ]; + // if the msb is 1 or the exponent was one, but the shifted quotent was < 1 (Denorm) + assign CorrQuotShifted = {LZAPlus2|(DivCalcExpM==1&~LZAPlus2) ? Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ] : {Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ], 1'b0}, 1'b0}; + // if the result of the divider was calculated to be denormalized, then the result was correctly normalized, so select the top shifted bits + assign CorrShifted = FmaOp ? {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}} : DivOp&~DivResDenorm ? CorrQuotShifted[`CORRSHIFTSZ-1:0] : Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ]; // Determine sum's exponent // if plus1 If plus2 if said denorm but norm plus 1 if said denorm but norm plus 2 assign SumExp = (ConvNormSumExp+{{`NE+1{1'b0}}, LZAPlus1&~KillProdM}+{{`NE{1'b0}}, LZAPlus2&~KillProdM, 1'b0}+{{`NE+1{1'b0}}, ~ResDenorm&PreResultDenorm&~KillProdM}+{{`NE+1{1'b0}}, &ConvNormSumExp&Shifted[3*`NF+6]&~KillProdM}) & {`NE+2{~(SumZero|ResDenorm)}}; // recalculate if the result is denormalized assign ResDenorm = PreResultDenorm&~Shifted[`NORMSHIFTSZ-3]&~Shifted[`NORMSHIFTSZ-2]; + // the quotent is in the range [.5,2) if there is no early termination + // if the quotent < 1 and not denormal then subtract 1 to account for the normalization shift + assign CorrDivExp = ((DivResDenorm)&~DivDenormShift[`NE+1]) ? (`NE+2)'(0) : DivCalcExpM - {(`NE+1)'(0), ~LZAPlus2}; endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/postprocess.sv b/pipelined/src/fpu/postprocess.sv index 217e3f586..ab06a9406 100644 --- a/pipelined/src/fpu/postprocess.sv +++ b/pipelined/src/fpu/postprocess.sv @@ -112,6 +112,8 @@ module postprocess( logic UfLSBRes; logic Sqrt; logic [`FMTBITS-1:0] OutFmt; + logic DivResDenorm; + logic [`NE+1:0] DivDenormShift; // signals to help readability assign Signed = FOpCtrlM[0]; @@ -144,7 +146,7 @@ module postprocess( .XZeroM, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn); fmashiftcalc fmashiftcalc(.SumM, .ZExpM, .ProdExpM, .FmaNormCntM, .FmtM, .KillProdM, .ConvNormSumExp, .ZDenormM, .SumZero, .PreResultDenorm, .FmaShiftAmt, .FmaShiftIn); - divshiftcalc divshiftcalc(.FmtM, .Quot, .DivCalcExpM, .EarlyTermShiftDiv2M, .CorrDivExp, .DivShiftAmt, .DivShiftIn); + divshiftcalc divshiftcalc(.FmtM, .Quot, .DivCalcExpM, .EarlyTermShiftDiv2M, .DivResDenorm, .DivDenormShift, .DivShiftAmt, .DivShiftIn); always_comb case(PostProcSelM) @@ -169,7 +171,8 @@ module postprocess( normshift normshift (.ShiftIn, .ShiftAmt, .Shifted); lzacorrection lzacorrection(.FmaOp, .KillProdM, .PreResultDenorm, .ConvNormSumExp, - .SumZero, .Shifted, .SumExp, .CorrShifted); + .DivResDenorm, .DivDenormShift, .DivOp, .DivCalcExpM, + .CorrDivExp, .SumZero, .Shifted, .SumExp, .CorrShifted); /////////////////////////////////////////////////////////////////////////////// // Rounding From 033ec135f8e2695cbd2bcd32e3dcbb16b701062b Mon Sep 17 00:00:00 2001 From: slmnemo Date: Mon, 27 Jun 2022 18:56:35 -0700 Subject: [PATCH 03/10] Added reset read testcodes to GPIO --- .../references/WALLY-gpio-01.reference_output | 13 +++++++++++++ .../rv32i_m/privilege/src/WALLY-gpio-01.S | 18 +++++++++++++++--- 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-gpio-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-gpio-01.reference_output index 3cbf56ae5..3f6dcc8e1 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-gpio-01.reference_output +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-gpio-01.reference_output @@ -1,5 +1,18 @@ 00000000 # test reset to zero 00000000 +00000000 # output_en +00000000 # output_val +00000000 # rise_ie +00000000 # rise_ip +00000000 # fall_ie +00000000 # fall_ip +00000000 # high_ie +00000000 # high_ip +00000000 # fall_ie +ffffffff # fall_ip +00000000 # iof_en +00000000 # iof_sel +00000000 # out_xor A5A5A5A5 # test output pins 5A5AFFFF 00000000 # test input enables diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-gpio-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-gpio-01.S index be40c0e26..4b2496a77 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-gpio-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-gpio-01.S @@ -70,9 +70,21 @@ test_cases: # =========== Verify all registers reset to zero =========== -.4byte input_val, 0x00000000, read32_test # input_val reset to zero -.4byte input_en, 0x00000000, read32_test # input_en reset to zero -# *** add more +.4byte input_val, 0x00000000, read32_test # input_val reset to zero +.4byte input_en, 0x00000000, read32_test # input_en reset to zero +.4byte output_en, 0x00000000, read32_test # output_en reset to zero +.4byte output_val, 0x00000000, read32_test # output_val reset to zero +.4byte rise_ie, 0x00000000, read32_test # rise_ie reset to zero +.4byte rise_ip, 0x00000000, read32_test # rise_ip reset to zero +.4byte fall_ie, 0x00000000, read32_test # fall_ie reset to zero +.4byte fall_ip, 0xffffffff, read32_test # fall_ip reset to ones (input_val is zero) +.4byte high_ie, 0x00000000, read32_test # high_ie reset to zero +.4byte high_ip, 0x00000000, read32_test # high_ip reset to zero +.4byte low_ie, 0x00000000, read32_test # low_ie reset to zero +.4byte low_ip, 0x00000000, read32_test # low_ip reset to zero +.4byte iof_en, 0x00000000, read32_test # iof_en reset to zero +.4byte iof_sel, 0x00000000, read32_test # iof_sel reset to zero +.4byte out_xor, 0x00000000, read32_test # out_xor reset to zero # =========== Test output and input pins =========== From 7a5dba4b30287d2d5845d48c72be0ae83ed60e83 Mon Sep 17 00:00:00 2001 From: slmnemo Date: Mon, 27 Jun 2022 18:59:44 -0700 Subject: [PATCH 04/10] will this work in git --- .../rv64i_m/privilege/src/WALLY-TEST-LIB-64.h | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-TEST-LIB-64.h b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-TEST-LIB-64.h index c24952b42..fdfc3e6d5 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-TEST-LIB-64.h +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-TEST-LIB-64.h @@ -857,6 +857,27 @@ trap_handler_end_\MODE\(): // place to jump to so we can skip the trap handler a addi a6, a6, 8 .endm +.macro SETUP_PLIC + # Setup PLIC with a series of register writes + + .equ PLIC_INTPRI_GPIO, 0x0C00000C # GPIO is interrupt 3 + .equ PLIC_INTPRI_UART, 0x0C000028 # UART is interrupt 10 + .equ PLIC_INTPENDING0, 0x0C001000 # intPending0 register + .equ PLIC_INTEN00, 0x0C002000 # interrupt enables for context 0 (machine mode) sources 31:1 + .equ PLIC_INTEN10, 0x0C002080 # interrupt enables for context 1 (supervisor mode) sources 31:1 + .equ PLIC_THRESH0, 0x0C200000 # Priority threshold for context 0 (machine mode) + .equ PLIC_CLAIM0, 0x0C200004 # Claim/Complete register for context 0 + .equ PLIC_THRESH1, 0x0C201000 # Priority threshold for context 1 (supervisor mode) + .equ PLIC_CLAIM1, 0x0C201004 # Claim/Complete register for context 1 + + .4byte PLIC_THRESH0, 0, write32_test # Set PLIC machine mode interrupt threshold to 0 to accept all interrupts + .4byte PLIC_THRESH1, 7, write32_test # Set PLIC supervisor mode interrupt threshold to 7 to accept no interrupts + .4byte PLIC_INTPRI_GPIO, 7, write32_test # Set GPIO to high priority + .4byte PLIC_INTPRI_UART, 7, write32_test # Set UART to high priority + .4byte PLIC_INTEN00, 0xFFFFFFFF, write32_test # Enable all interrupt sources for machine mode + .4byte PLIC_INTEN10, 0x00000000, write32_test # Disable all interrupt sources for supervisor mode +.endm + .macro END_TESTS // invokes one final ecall to return to machine mode then terminates this program, so the output is // 0x8: termination called from U mode @@ -984,6 +1005,20 @@ read08_test: addi a6, a6, 8 j test_loop // go to next test case +readmip_test: // read the MIP into the signature + csrr t2, mip + sw t2, 0(t1) + addi t1, t1, 4 + addi a6, a6, 4 + j test_loop // go to next test case + +readsip_test: // read the MIP into the signature + csrr t2, sip + sw t2, 0(t1) + addi t1, t1, 4 + addi a6, a6, 4 + j test_loop // go to next test case + goto_s_mode: // return to address in t3, li a0, 3 // Trap handler behavior (go to supervisor mode) From bb62ebc84f63e49306749390c4912f63492688dd Mon Sep 17 00:00:00 2001 From: Madeleine Masser-Frye <51804758+mmasserfrye@users.noreply.github.com> Date: Tue, 28 Jun 2022 02:23:29 +0000 Subject: [PATCH 05/10] make clean rm extra files --- synthDC/Makefile | 6 ++++-- synthDC/runAllSynths.sh | 1 + 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/synthDC/Makefile b/synthDC/Makefile index 53faa4522..611dcfef9 100755 --- a/synthDC/Makefile +++ b/synthDC/Makefile @@ -5,8 +5,8 @@ NAME := synth # defaults export DESIGN ?= wallypipelinedcore -export FREQ ?= 4000 -export CONFIG ?= rv64gc +export FREQ ?= 3402 +export CONFIG ?= rv32e # sky130 and sky90 presently supported export TECH ?= tsmc28 # MAXCORES allows parallel compilation, which is faster but less CPU-efficient @@ -126,6 +126,8 @@ clean: rm -f command.log rm -f filenames*.log rm -f power.saif + rm -f Synopsys_stack_trace_*.txt + rm -f crte_*.txt diff --git a/synthDC/runAllSynths.sh b/synthDC/runAllSynths.sh index 1b81a6cd0..6944552d4 100755 --- a/synthDC/runAllSynths.sh +++ b/synthDC/runAllSynths.sh @@ -1,5 +1,6 @@ #!/usr/bin/bash +make clean mv runs runArchive/$(date +"%Y_%m_%d_%I_%M_%p") mv newRuns runs mkdir newRuns From 726992540f5c6d7677d4719739804b4eacc3d688 Mon Sep 17 00:00:00 2001 From: Madeleine Masser-Frye <51804758+mmasserfrye@users.noreply.github.com> Date: Tue, 28 Jun 2022 02:28:13 +0000 Subject: [PATCH 06/10] update wally synth analysis --- synthDC/extractSummary.py | 49 ++++++++++++++++++++++----------------- synthDC/wallySynth.py | 12 ++++++---- 2 files changed, 35 insertions(+), 26 deletions(-) diff --git a/synthDC/extractSummary.py b/synthDC/extractSummary.py index 4469d4bea..a2f6a9b50 100755 --- a/synthDC/extractSummary.py +++ b/synthDC/extractSummary.py @@ -7,6 +7,7 @@ import subprocess from matplotlib.cbook import flatten import matplotlib.pyplot as plt import matplotlib.lines as lines +from wallySynth import testFreq def synthsintocsv(): @@ -26,7 +27,7 @@ def synthsintocsv(): writer.writerow(['Width', 'Config', 'Special', 'Tech', 'Target Freq', 'Delay', 'Area']) for oneSynth in allSynths: - descrip = specReg.findall(oneSynth) + descrip = specReg.findall(oneSynth) #[30:] width = descrip[2][:4] config = descrip[2][4:] if descrip[3][-2:] == 'nm': @@ -46,7 +47,7 @@ def synthsintocsv(): nums = [float(m) for m in nums] metrics += nums except: - print(config + tech + freq + " doesn't have reports") + print(width + config + tech + '_' + freq + " doesn't have reports") if metrics == []: pass else: @@ -56,7 +57,7 @@ def synthsintocsv(): file.close() def synthsfromcsv(filename): - Synth = namedtuple("Synth", " width config special tech freq delay area") + Synth = namedtuple("Synth", "width config special tech freq delay area") with open(filename, newline='') as csvfile: csvreader = csv.reader(csvfile) global allSynths @@ -110,23 +111,26 @@ def freqPlot(tech, width, config): plt.savefig('./plots/wally/freqSweep_' + tech + '_' + width + config + '.png') # plt.show() -def areaDelay(width, tech, freq, config=None, special=None): +def areaDelay(tech, freq, width=None, config=None, special=None): delays, areas, labels = ([] for i in range(3)) for oneSynth in allSynths: - if (width == oneSynth.width) & (tech == oneSynth.tech) & (freq == oneSynth.freq): - if (special != None) & (oneSynth.special == special): - delays += [oneSynth.delay] - areas += [oneSynth.area] - labels += [oneSynth.config] - elif (config != None) & (oneSynth.config == config): - delays += [oneSynth.delay] - areas += [oneSynth.area] - labels += [oneSynth.special] - else: - delays += [oneSynth.delay] - areas += [oneSynth.area] - labels += [oneSynth.config + '_' + oneSynth.special] + if (width==None) or (width == oneSynth.width): + if (tech == oneSynth.tech) & (freq == oneSynth.freq): + if (special != None) & (oneSynth.special == special): + delays += [oneSynth.delay] + areas += [oneSynth.area] + labels += [oneSynth.width + oneSynth.config] + elif (config != None) & (oneSynth.config == config): + delays += [oneSynth.delay] + areas += [oneSynth.area] + labels += [oneSynth.special] + # else: + # delays += [oneSynth.delay] + # areas += [oneSynth.area] + # labels += [oneSynth.config + '_' + oneSynth.special] + if width == None: + width = '' f, (ax1) = plt.subplots(1, 1) plt.scatter(delays, areas) @@ -154,8 +158,11 @@ def areaDelay(width, tech, freq, config=None, special=None): # ending freq in 42 means fpu was turned off manually if __name__ == '__main__': - synthsintocsv() + # synthsintocsv() synthsfromcsv('Summary.csv') - freqPlot('tsmc28', 'rv64', 'gc') - areaDelay('rv32', 'tsmc28', 4200, config='gc') - areaDelay('rv32', 'tsmc28', 3042, special='') \ No newline at end of file + freqPlot('tsmc28', 'rv32', 'e') + freqPlot('sky90', 'rv32', 'e') + areaDelay('tsmc28', testFreq[1], width= 'rv64', config='gc') + areaDelay('tsmc28', testFreq[1], special='') + areaDelay('sky90', testFreq[0], width='rv64', config='gc') + areaDelay('sky90', testFreq[0], special='') \ No newline at end of file diff --git a/synthDC/wallySynth.py b/synthDC/wallySynth.py index bf32b6f9b..99d70e813 100755 --- a/synthDC/wallySynth.py +++ b/synthDC/wallySynth.py @@ -8,20 +8,22 @@ def runCommand(config, tech, freq): command = "make synth DESIGN=wallypipelinedcore CONFIG={} TECH={} DRIVE=FLOP FREQ={} MAXOPT=0 MAXCORES=1".format(config, tech, freq) subprocess.Popen(command, shell=True) +testFreq = [3000, 10000] + if __name__ == '__main__': techs = ['sky90', 'tsmc28'] - bestAchieved = [750, 3000] + sweepCenter = [870, 3000] synthsToRun = [] - arr = [-8, -6, -4, -2, 0, 2, 4, 6, 8] for i in [0, 1]: tech = techs[i] - f = bestAchieved[i] - for freq in [round(f+f*x/100) for x in arr]: # rv32e freq sweep + sc = sweepCenter[i] + f = testFreq[i] + for freq in [round(sc+sc*x/100) for x in arr]: # rv32e freq sweep synthsToRun += [['rv32e', tech, freq]] - for config in ['rv32gc', 'rv32ic', 'rv64gc', 'rv64i', 'rv64ic']: # configs + for config in ['rv32gc', 'rv32ic', 'rv64gc', 'rv64i', 'rv64ic', 'rv32e']: # configs synthsToRun += [[config, tech, f]] for mod in ['FPUoff', 'noMulDiv', 'noPriv', 'PMP0', 'PMP16']: # rv64gc path variations config = 'rv64gc_' + mod From 228028c8375aa52ebb7659a895ec5264c40e5233 Mon Sep 17 00:00:00 2001 From: slmnemo Date: Mon, 27 Jun 2022 20:09:58 -0700 Subject: [PATCH 07/10] Add CLINT tests from book --- pipelined/testbench/tests.vh | 5 +- .../rv32i_m/privilege/Makefrag | 1 + .../WALLY-clint-01.reference_output | 9 ++ .../rv32i_m/privilege/src/WALLY-clint-01.S | 102 ++++++++++++++++++ 4 files changed, 116 insertions(+), 1 deletion(-) create mode 100644 tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-clint-01.reference_output create mode 100644 tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-clint-01.S diff --git a/pipelined/testbench/tests.vh b/pipelined/testbench/tests.vh index c17cef914..30b00cf48 100644 --- a/pipelined/testbench/tests.vh +++ b/pipelined/testbench/tests.vh @@ -1601,6 +1601,9 @@ string wally32i[] = '{ string wally32periph[] = '{ `WALLYTEST, - "rv32i_m/privilege/WALLY-gpio-01" + "rv32i_m/privilege/WALLY-gpio-01", + "rv32i_m/privilege/WALLY-clint-01" + // "rv32i_m/privilege/WALLY-plic-01" + // "rv32i_m/privilege/WALLY-uart-01" }; diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/Makefrag b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/Makefrag index 5d98f81cc..56b3bc01f 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/Makefrag +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/Makefrag @@ -54,6 +54,7 @@ target_tests_nosim = \ WALLY-status-sie-01 \ WALLY-status-tw-01 \ WALLY-gpio-01 \ + WALLY-clint-01 \ rv32i_tests = $(addsuffix .elf, $(rv32i_sc_tests)) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-clint-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-clint-01.reference_output new file mode 100644 index 000000000..013ef4604 --- /dev/null +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-clint-01.reference_output @@ -0,0 +1,9 @@ +00000000 # msip zero on reset +00000000 # mip is zero +00000008 # mip msip bit is set +00000000 # mip msip bit is reset +00000000 # mip mtip bit is reset +FFFFFFFF # mtimecmp is same as written value +A5A5A5A5 # mtimecmph is same as written value +00000000 # mip mtip is zero +00000080 # mip mtip is set diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-clint-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-clint-01.S new file mode 100644 index 000000000..65f078b60 --- /dev/null +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-clint-01.S @@ -0,0 +1,102 @@ +/////////////////////////////////////////// +// +// WALLY-gpio +// +// Author: David_Harris@hmc.edu and Nicholas Lucio +// +// Created 2022-06-16 +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +#include "WALLY-TEST-LIB-32.h" + +INIT_TESTS + +TRAP_HANDLER m + +j run_test_loop // begin test loop/table tests instead of executing inline code. + +INIT_TEST_TABLE + +END_TESTS + +TEST_STACK_AND_DATA + +.align 2 +test_cases: +# --------------------------------------------------------------------------------------------- +# Test Contents +# +# Here is where the actual tests are held, or rather, what the actual tests do. +# each entry consists of 3 values that will be read in as follows: +# +# '.4byte [x28 Value], [x29 Value], [x30 value]' +# or +# '.4byte [address], [value], [test type]' +# +# The encoding for x30 test type values can be found in the test handler in the framework file +# +# --------------------------------------------------------------------------------------------- + +# =========== Define CLINT registers =========== + +.equ CLINT, 0x02000000 +.equ msip, (CLINT+0x00) +.equ mtimecmp, (CLINT+0x4000) # doesn't necessarily reset to zero +.equ mtimecmph,(CLINT+0x4004) +.equ mtime, (CLINT+0xBFF8) # resets to zero but cannot be easily tested +.equ mtimeh, (CLINT+0xBFFC) + +# =========== Verify verifiable registers reset to zero =========== + +.4byte msip, 0x00000000, read32_test # msip reset to zero + +# =========== msip tests =========== + +.4byte msip, 0xFFFFFFFE, write32_test # write to invalid bits of msip +.4byte 0x0, 0x00000000, readmip_test # msip bit should be zero +.4byte msip, 0x00000001, write32_test # set msip to one +.4byte 0x0, 0x00000008, readmip_test # msip bit is set +.4byte msip, 0x00000000, write32_test # set msip to zero +.4byte 0x0, 0x00000000, readmip_test # msip bit is released + +# =========== mtime write tests =========== + +.4byte mtime, 0x00000000, write32_test # test we can write to mtime +.4byte mtimeh, 0x00000000, write32_test # test we can write to mtimeh +.4byte 0x0,0x00000000, readmip_test # mtip bit should be zero + +# =========== mtimecmp tests =========== + +.4byte mtimecmp, 0xFFFFFFFF, write32_test # verify mtimecmp is writable +.4byte mtimecmph, 0xA5A5A5A5, write32_test # verify mtimecmph is writable +.4byte mtimecmp, 0xFFFFFFFF, read32_test # read back value written to mtimecmp +.4byte mtimecmph, 0xA5A5A5A5, read32_test # read back value written to mtimecmph +.4byte mtime, 0xFFFFFFFF, write32_test # write to mtime +.4byte 0x0, 0x00000000, readmip_test # mtip should still be zero +.4byte mtimeh, 0xA5A5A5A6, write32_test # cause mtip to go high by making mtime > mtimecmp +.4byte 0x0, 0x00000080, readmip_test # mtip should be set + + +# =========== Experimental mtime counting test =========== + +# .4byte mtimecmph, 0xFFFFFFFF, write32_test # make sure mtip isn't set until ready +# .4byte mtimeh, 0x0FFFFFFF, write32_test # write near max value to mtimeh +# .4byte mtime, 0x00000000, write32_test # write small value to mtime +# .4byte 0x0, 0x000000000, readmip_test # mtip should be zero +# .4byte mtimecmp, 0x00000001, write32_test # write slightly larger value than mtime to test mtime counting +# .4byte mtimecmph, 0x0FFFFFFF, write32_test # write same value as mtimeh to test mtime counting +# .4byte 0x0, 0x00000080, readmip_test # mtip should be set since it has been at least two cycles From f21c3114fdcba8f0d6adb7b7742ed63d84297c8f Mon Sep 17 00:00:00 2001 From: slmnemo Date: Mon, 27 Jun 2022 20:16:29 -0700 Subject: [PATCH 08/10] Added termination line to CLINT test --- .../riscv-test-suite/rv32i_m/privilege/src/WALLY-clint-01.S | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-clint-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-clint-01.S index 65f078b60..7cfd83c1a 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-clint-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-clint-01.S @@ -90,6 +90,7 @@ test_cases: .4byte mtimeh, 0xA5A5A5A6, write32_test # cause mtip to go high by making mtime > mtimecmp .4byte 0x0, 0x00000080, readmip_test # mtip should be set +.4byte 0x0, 0x0, terminate_test # terminate tests # =========== Experimental mtime counting test =========== From d13a4c337861c95bba53a39ae5a461354d63c6a9 Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Tue, 28 Jun 2022 18:01:11 +0000 Subject: [PATCH 09/10] removed an adder out of early termination --- addins/riscv-arch-test | 2 +- pipelined/srt/srt-radix4.sv | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/addins/riscv-arch-test b/addins/riscv-arch-test index 307c77b26..be67c99bd 160000 --- a/addins/riscv-arch-test +++ b/addins/riscv-arch-test @@ -1 +1 @@ -Subproject commit 307c77b26e070ae85ffea665ad9b642b40e33c86 +Subproject commit be67c99bd461742aa1c100bcc0732657faae2230 diff --git a/pipelined/srt/srt-radix4.sv b/pipelined/srt/srt-radix4.sv index 179fbf45a..39432c9e3 100644 --- a/pipelined/srt/srt-radix4.sv +++ b/pipelined/srt/srt-radix4.sv @@ -143,12 +143,13 @@ module earlytermination( logic [$clog2(`DIVLEN/2+3)-1:0] Count; logic WZero; + logic [`DIVLEN+3:0] W; - assign WZero = (WS+WC == 0)|XZeroE|YZeroE|XInfE|YInfE|XNaNE|YNaNE; //*** temporary - // *** rather than Counting should just be able to check if one of the two msbs of the quotent is 1 then stop??? + assign WZero = ((WS^WC)=={WS[`DIVLEN+2:0]|WC[`DIVLEN+2:0], 1'b0})|XZeroE|YZeroE|XInfE|YInfE|XNaNE|YNaNE; assign DivDone = (DivStickyE | WZero); assign DivStickyE = ~|Count; - assign DivNegStickyE = $signed(WS+WC) < 0; + assign W = WC+WS; + assign DivNegStickyE = W[`DIVLEN+3]; //*** is there a better way to do this??? assign EarlyTermShiftDiv2E = Count; // +1 for setup // `DIVLEN/2 to get required number of bits From 8f98f3bfabf03cea00fc10e176a7c6cd119192ef Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Tue, 28 Jun 2022 21:33:31 +0000 Subject: [PATCH 10/10] added rv32 double precision stores - untested --- pipelined/src/cache/cache.sv | 13 ++++++++++--- pipelined/src/cache/cacheway.sv | 11 +++++++++-- pipelined/src/fpu/fctrl.sv | 6 +++--- pipelined/src/fpu/fpu.sv | 23 ++++++++++++++++++----- pipelined/src/ieu/datapath.sv | 10 ++++++++-- pipelined/src/ifu/ifu.sv | 2 +- pipelined/src/lsu/lsu.sv | 8 +++++--- pipelined/src/lsu/subwordread.sv | 14 +++++++------- pipelined/src/wally/wallypipelinedcore.sv | 11 ++++++++--- 9 files changed, 69 insertions(+), 29 deletions(-) diff --git a/pipelined/src/cache/cache.sv b/pipelined/src/cache/cache.sv index 2374b4938..d380bfc83 100644 --- a/pipelined/src/cache/cache.sv +++ b/pipelined/src/cache/cache.sv @@ -43,6 +43,9 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER input logic [`PA_BITS-1:0] PAdr, // physical address input logic [(`XLEN-1)/8:0] ByteMask, input logic [`XLEN-1:0] FinalWriteData, + input logic [`FLEN-1:0] FWriteDataM, + input logic FLoad2, + input logic FpLoadStoreM, output logic CacheCommitted, output logic CacheStall, // to performance counters to cpu @@ -120,7 +123,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER // Array of cache ways, along with victim, hit, dirty, and read merging logic cacheway #(NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN) - CacheWays[NUMWAYS-1:0](.clk, .reset, .RAdr, .PAdr, .CacheWriteData, .ByteMask, + CacheWays[NUMWAYS-1:0](.clk, .reset, .RAdr, .PAdr, .CacheWriteData, .ByteMask, .FLoad2, .SetValidWay, .ClearValidWay, .SetDirtyWay, .ClearDirtyWay, .SelEvict, .VictimWay, .FlushWay, .SelFlush, .ReadDataLineWay, .HitWay, .VictimDirtyWay, .VictimTagWay, .Invalidate(InvalidateCacheM)); @@ -159,8 +162,12 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER ///////////////////////////////////////////////////////////////////////////////////////////// // Write Path: Write data and address. Muxes between writes from bus and writes from CPU. ///////////////////////////////////////////////////////////////////////////////////////////// - mux2 #(LINELEN) WriteDataMux(.d0({WORDSPERLINE{FinalWriteData}}), - .d1(CacheBusWriteData), .s(SetValid), .y(CacheWriteData)); + if (`LLEN>`XLEN) + mux3 #(LINELEN) WriteDataMux(.d0({WORDSPERLINE{FinalWriteData}}), + .d1({WORDSPERLINE/2{FWriteDataM}}), .d2(CacheBusWriteData), .s({SetValid,FpLoadStoreM&~SetValid}), .y(CacheWriteData)); + else + mux2 #(LINELEN) WriteDataMux(.d0({WORDSPERLINE{FinalWriteData}}), + .d1(CacheBusWriteData), .s(SetValid), .y(CacheWriteData)); mux3 #(`PA_BITS) CacheBusAdrMux(.d0({PAdr[`PA_BITS-1:OFFSETLEN], {{OFFSETLEN}{1'b0}}}), .d1({VictimTag, PAdr[SETTOP-1:OFFSETLEN], {{OFFSETLEN}{1'b0}}}), .d2({VictimTag, FlushAdr, {{OFFSETLEN}{1'b0}}}), diff --git a/pipelined/src/cache/cacheway.sv b/pipelined/src/cache/cacheway.sv index d9a478612..ac1e26e8f 100644 --- a/pipelined/src/cache/cacheway.sv +++ b/pipelined/src/cache/cacheway.sv @@ -38,6 +38,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, input logic [$clog2(NUMLINES)-1:0] RAdr, input logic [`PA_BITS-1:0] PAdr, input logic [LINELEN-1:0] CacheWriteData, + input logic FLoad2, input logic SetValidWay, input logic ClearValidWay, input logic SetDirtyWay, @@ -74,8 +75,14 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, ///////////////////////////////////////////////////////////////////////////////////////////// // Write Enable demux ///////////////////////////////////////////////////////////////////////////////////////////// - onehotdecoder #(LOGWPL) adrdec( - .bin(PAdr[LOGWPL+LOGXLENBYTES-1:LOGXLENBYTES]), .decoded(MemPAdrDecoded)); + if(`LLEN>`XLEN)begin + logic [2**LOGWPL-1:0] MemPAdrDecodedtmp; + onehotdecoder #(LOGWPL) adrdec( + .bin(PAdr[LOGWPL+LOGXLENBYTES-1:LOGXLENBYTES]), .decoded(MemPAdrDecodedtmp)); + assign MemPAdrDecoded = MemPAdrDecodedtmp|{MemPAdrDecodedtmp[2**LOGWPL-2:0]&{2**LOGWPL-1{FLoad2}}, 1'b0}; + end else + onehotdecoder #(LOGWPL) adrdec( + .bin(PAdr[LOGWPL+LOGXLENBYTES-1:LOGXLENBYTES]), .decoded(MemPAdrDecoded)); // If writing the whole line set all write enables to 1, else only set the correct word. assign SelectedWriteWordEn = SetValidWay ? '1 : SetDirtyWay ? MemPAdrDecoded : '0; // OR-AND assign FinalByteMask = SetValidWay ? '1 : ByteMask; // OR diff --git a/pipelined/src/fpu/fctrl.sv b/pipelined/src/fpu/fctrl.sv index 60d260027..f6ed650af 100755 --- a/pipelined/src/fpu/fctrl.sv +++ b/pipelined/src/fpu/fctrl.sv @@ -33,8 +33,8 @@ module fctrl ( default: ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1; // non-implemented instruction endcase 7'b0100111: case(Funct3D) - 3'b010: ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_0; // fsw - 3'b011: ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_0; // fsd + 3'b010: ControlsD = `FCTRLW'b0_0_10_xx_0xx_0_0; // fsw + 3'b011: ControlsD = `FCTRLW'b0_0_10_xx_0xx_0_0; // fsd default: ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1; // non-implemented instruction endcase 7'b1000011: ControlsD = `FCTRLW'b1_0_01_10_000_0_0; // fmadd @@ -121,7 +121,7 @@ module fctrl ( assign FmtD = 0; else if (`FPSIZES == 2)begin logic [1:0] FmtTmp; - assign FmtTmp = ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0]; + assign FmtTmp = ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : (~OpD[6]&(&OpD[2:0])) ? {~Funct3D[1], ~(Funct3D[1]^Funct3D[0])} : Funct7D[1:0]; assign FmtD = (`FMT == FmtTmp); end else if (`FPSIZES == 3|`FPSIZES == 4) diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv index aba1a8f48..25b39d69b 100755 --- a/pipelined/src/fpu/fpu.sv +++ b/pipelined/src/fpu/fpu.sv @@ -41,10 +41,12 @@ module fpu ( input logic [4:0] RdM, RdW, // which FP register to write to (from IEU) input logic [1:0] STATUS_FS, // Is floating-point enabled? output logic FRegWriteM, // FP register write enable - output logic FpLoadM, // Fp load instruction? + output logic FpLoadStoreM, // Fp load instruction? + output logic FLoad2, output logic FStallD, // Stall the decode stage output logic FWriteIntE, // integer register write enables output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory + output logic [`FLEN-1:0] FWriteDataM, // Data to be written to memory output logic [`XLEN-1:0] FIntResM, // data to be written to integer register output logic [`XLEN-1:0] FCvtIntResW, // data to be written to integer register output logic [1:0] FResSelW, @@ -292,8 +294,19 @@ module fpu ( // data to be stored in memory - to IEU // - FP uses NaN-blocking format // - if there are any unsused bits the most significant bits are filled with 1s - if (`FLEN>`XLEN) assign FWriteDataE = FSrcYE[`XLEN-1:0]; - else assign FWriteDataE = {{`XLEN-`FLEN{FSrcYE[`FLEN-1]}}, FSrcYE}; + if (`LLEN==`XLEN) begin + assign FWriteDataE = FSrcYE[`XLEN-1:0]; + end else begin + logic [`FLEN-1:0] FWriteDataE; + if(`FMTBITS == 2) assign FLoad2 = FmtM == `FMT; + else assign FLoad2 = FmtM; + + if (`FPSIZES==1) assign FWriteDataE = FSrcYE; + else if (`FPSIZES==2) assign FWriteDataE = FmtE ? FSrcYE : {2{FSrcYE[`LEN1-1:0]}}; + else assign FWriteDataE = FmtE == `FMT ? FSrcYE : {2{FSrcYE[`LEN1-1:0]}}; + + flopenrc #(`FLEN) EMWriteDataReg (clk, reset, FlushM, ~StallM, FWriteDataE, FWriteDataM); + end // NaN Block SrcA generate @@ -311,7 +324,7 @@ module fpu ( assign PreNVE = CmpNVE&(FOpCtrlE[2]|FWriteIntE); // select the result that may be written to the integer register - to IEU - if (`FLEN>`XLEN) + if (`FLEN>`XLEN) assign IntSrcXE = FSrcXE[`XLEN-1:0]; else assign IntSrcXE = {{`XLEN-`FLEN{FSrcXE[`FLEN-1:0]}}, FSrcXE}; @@ -356,7 +369,7 @@ module fpu ( // ||| ||| ////////////////////////////////////////////////////////////////////////////////////////// - assign FpLoadM = FResSelM[1]; + assign FpLoadStoreM = FResSelM[1]; postprocess postprocess(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .FrmM, .FmtM, .ProdExpM, .EarlyTermShiftDiv2M, .AddendStickyM, .KillProdM, .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .Quot, diff --git a/pipelined/src/ieu/datapath.sv b/pipelined/src/ieu/datapath.sv index b7a6a9644..df711695e 100644 --- a/pipelined/src/ieu/datapath.sv +++ b/pipelined/src/ieu/datapath.sv @@ -124,12 +124,18 @@ module datapath ( flopenrc #(5) RdWReg(clk, reset, FlushW, ~StallW, RdM, RdW); // floating point interactions: fcvt, fp stores - if (`F_SUPPORTED) begin:fpmux + if (`F_SUPPORTED&(`LLEN>`XLEN)) begin:fpmux + logic [`XLEN-1:0] IFCvtResultW; + mux2 #(`XLEN) resultmuxM(IEUResultM, FIntResM, FWriteIntM, IFResultM); + assign WriteDataE = ForwardedSrcBE; + mux2 #(`XLEN) cvtresultmuxW(IFResultW, FCvtIntResW, ~FResSelW[1]&FResSelW[0], IFCvtResultW); + mux5 #(`XLEN) resultmuxW(IFCvtResultW, ReadDataW, CSRReadValW, MDUResultW, SCResultW, ResultSrcW, ResultW); + end else if (`F_SUPPORTED) begin:fpmux logic [`XLEN-1:0] IFCvtResultW; mux2 #(`XLEN) resultmuxM(IEUResultM, FIntResM, FWriteIntM, IFResultM); mux2 #(`XLEN) writedatamux(ForwardedSrcBE, FWriteDataE, ~IllegalFPUInstrE, WriteDataE); mux2 #(`XLEN) cvtresultmuxW(IFResultW, FCvtIntResW, ~FResSelW[1]&FResSelW[0], IFCvtResultW); - mux5 #(`XLEN) resultmuxW(IFCvtResultW, ReadDataW, CSRReadValW, MDUResultW, SCResultW, ResultSrcW, ResultW); + mux5 #(`XLEN) resultmuxW(IFCvtResultW, ReadDataW, CSRReadValW, MDUResultW, SCResultW, ResultSrcW, ResultW); end else begin:fpmux assign IFResultM = IEUResultM; assign WriteDataE = ForwardedSrcBE; mux5 #(`XLEN) resultmuxW(IFResultW, ReadDataW, CSRReadValW, MDUResultW, SCResultW, ResultSrcW, ResultW); diff --git a/pipelined/src/ifu/ifu.sv b/pipelined/src/ifu/ifu.sv index 29d07cc2c..02e748f31 100644 --- a/pipelined/src/ifu/ifu.sv +++ b/pipelined/src/ifu/ifu.sv @@ -227,7 +227,7 @@ module ifu ( icache(.clk, .reset, .CPUBusy, .IgnoreRequestTLB(ITLBMissF), .TrapM(TrapM), .IgnoreRequestTrapM('0), .CacheBusWriteData(ICacheBusWriteData), .CacheBusAck(ICacheBusAck), .CacheBusAdr(ICacheBusAdr), .CacheStall(ICacheStallF), - .CacheFetchLine(ICacheFetchLine), + .CacheFetchLine(ICacheFetchLine), .FWriteDataM(), .FpLoadStoreM(), .FLoad2(), .CacheWriteLine(), .ReadDataWord(FinalInstrRawF), .Cacheable(CacheableF), .CacheMiss(ICacheMiss), .CacheAccess(ICacheAccess), diff --git a/pipelined/src/lsu/lsu.sv b/pipelined/src/lsu/lsu.sv index 7234a7cac..5c56b1356 100644 --- a/pipelined/src/lsu/lsu.sv +++ b/pipelined/src/lsu/lsu.sv @@ -57,7 +57,9 @@ module lsu ( input logic BigEndianM, input logic sfencevmaM, // fpu - input logic FpLoadM, + input logic [`FLEN-1:0] FWriteDataM, + input logic FLoad2, + input logic FpLoadStoreM, // faults output logic LoadPageFaultM, StoreAmoPageFaultM, output logic LoadMisalignedFaultM, LoadAccessFaultM, @@ -235,7 +237,7 @@ module lsu ( .NUMWAYS(`DCACHE_NUMWAYS), .LOGWPL(LOGWPL), .WORDLEN(`LLEN), .MUXINTERVAL(`XLEN), .DCACHE(1)) dcache( .clk, .reset, .CPUBusy, .LSUBusWriteCrit, .RW(LSURWM), .Atomic(LSUAtomicM), .FlushCache(FlushDCacheM), .NextAdr(LSUAdrE), .PAdr(LSUPAdrM), - .ByteMask(ByteMaskM), .WordCount, + .ByteMask(ByteMaskM), .WordCount, .FpLoadStoreM, .FWriteDataM, .FLoad2, .FinalWriteData(FinalWriteDataM), .Cacheable(CacheableM), .CacheStall(DCacheStallM), .CacheMiss(DCacheMiss), .CacheAccess(DCacheAccess), .IgnoreRequestTLB, .IgnoreRequestTrapM, .TrapM(1'b0), .CacheCommitted(DCacheCommittedM), @@ -269,7 +271,7 @@ module lsu ( subwordwrite subwordwrite(.LSUPAdrM(LSUPAdrM[2:0]), .LSUFunct3M, .AMOWriteDataM, .LittleEndianWriteDataM, .ByteMaskM); subwordread subwordread(.ReadDataWordMuxM, .LSUPAdrM(LSUPAdrM[2:0]), - .FpLoadM, .Funct3M(LSUFunct3M), .ReadDataM); + .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); ///////////////////////////////////////////////////////////////////////////////////////////// // MW Pipeline Register diff --git a/pipelined/src/lsu/subwordread.sv b/pipelined/src/lsu/subwordread.sv index 4a6d99bfc..d38595d49 100644 --- a/pipelined/src/lsu/subwordread.sv +++ b/pipelined/src/lsu/subwordread.sv @@ -35,7 +35,7 @@ module subwordread input logic [`LLEN-1:0] ReadDataWordMuxM, input logic [2:0] LSUPAdrM, input logic [2:0] Funct3M, - input logic FpLoadM, + input logic FpLoadStoreM, output logic [`LLEN-1:0] ReadDataM ); @@ -83,16 +83,16 @@ module subwordread case(Funct3M) 3'b000: ReadDataM = {{`LLEN-8{ByteM[7]}}, ByteM}; // lb 3'b001: if(`ZFH_SUPPORTED) - ReadDataM = {{`LLEN-16{HalfwordM[15]|FpLoadM}}, HalfwordM[15:0]}; // lh/flh + ReadDataM = {{`LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh else ReadDataM = {{`LLEN-16{HalfwordM[15]}}, HalfwordM[15:0]}; // lh 3'b010: if(`F_SUPPORTED) - ReadDataM = {{`LLEN-32{WordM[31]|FpLoadM}}, WordM[31:0]}; // lw/flw + ReadDataM = {{`LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw else ReadDataM = {{`LLEN-32{WordM[31]}}, WordM[31:0]}; // lw 3'b011: if(`D_SUPPORTED) - ReadDataM = {{`LLEN-64{DblWordM[63]|FpLoadM}}, DblWordM[63:0]}; // ld/fld + ReadDataM = {{`LLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld else ReadDataM = {{`LLEN-64{DblWordM[63]}}, DblWordM[63:0]}; // ld/fld 3'b100: if(`Q_SUPPORTED) - ReadDataM = FpLoadM ? ReadDataWordMuxM : {{`LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq + ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{`LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq else ReadDataM = {{`LLEN-8{1'b0}}, ByteM[7:0]}; // lbu 3'b101: ReadDataM = {{`LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu @@ -122,10 +122,10 @@ module subwordread case(Funct3M) 3'b000: ReadDataM = {{`LLEN-8{ByteM[7]}}, ByteM}; // lb 3'b001: if(`ZFH_SUPPORTED) - ReadDataM = {{`LLEN-16{HalfwordM[15]|FpLoadM}}, HalfwordM[15:0]}; // lh/flh + ReadDataM = {{`LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh else ReadDataM = {{`LLEN-16{HalfwordM[15]}}, HalfwordM[15:0]}; // lh 3'b010: if(`F_SUPPORTED) - ReadDataM = {{`LLEN-32{ReadDataWordMuxM[31]|FpLoadM}}, ReadDataWordMuxM[31:0]}; // lw/flw + ReadDataM = {{`LLEN-32{ReadDataWordMuxM[31]|FpLoadStoreM}}, ReadDataWordMuxM[31:0]}; // lw/flw else ReadDataM = {{`LLEN-32{ReadDataWordMuxM[31]}}, ReadDataWordMuxM[31:0]}; // lw 3'b011: ReadDataM = ReadDataWordMuxM; // fld 3'b100: ReadDataM = {{`LLEN-8{1'b0}}, ByteM[7:0]}; // lbu diff --git a/pipelined/src/wally/wallypipelinedcore.sv b/pipelined/src/wally/wallypipelinedcore.sv index b3f11680b..8ef8ec18b 100644 --- a/pipelined/src/wally/wallypipelinedcore.sv +++ b/pipelined/src/wally/wallypipelinedcore.sv @@ -92,13 +92,15 @@ module wallypipelinedcore ( logic FStallD; logic FWriteIntE; logic [`XLEN-1:0] FWriteDataE; + logic FLoad2; + logic [`FLEN-1:0] FWriteDataM; logic [`XLEN-1:0] FIntResM; logic [`XLEN-1:0] FCvtIntResW; logic FDivBusyE; logic IllegalFPUInstrD, IllegalFPUInstrE; logic FRegWriteM; logic FPUStallD; - logic FpLoadM; + logic FpLoadStoreM; logic [1:0] FResSelW; logic [4:0] SetFflagsM; @@ -253,7 +255,8 @@ module wallypipelinedcore ( .AtomicM, .TrapM, .CommittedM, .DCacheMiss, .DCacheAccess, .SquashSCW, - .FpLoadM, + .FpLoadStoreM, + .FWriteDataM, .FLoad2, //.DataMisalignedM(DataMisalignedM), .IEUAdrE, .IEUAdrM, .WriteDataE, .ReadDataW, .FlushDCacheM, @@ -391,10 +394,12 @@ module wallypipelinedcore ( .RdM, .RdW, // which FP register to write to (from IEU) .STATUS_FS, // is floating-point enabled? .FRegWriteM, // FP register write enable - .FpLoadM, + .FpLoadStoreM, + .FLoad2, .FStallD, // Stall the decode stage .FWriteIntE, // integer register write enable .FWriteDataE, // Data to be written to memory + .FWriteDataM, // Data to be written to memory .FIntResM, // data to be written to integer register .FCvtIntResW, // fp -> int conversion result to be stored in int register .FResSelW, // fpu result selection