From ceac0352f79581eb45315e45ec803f85d4e5d4ab Mon Sep 17 00:00:00 2001 From: bbracker Date: Sun, 4 Jul 2021 18:17:06 -0400 Subject: [PATCH 1/8] ICacheCntrl now reacts differently to InstrPageFaultF vs ITLBWriteF --- wally-pipelined/src/cache/ICacheCntrl.sv | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/wally-pipelined/src/cache/ICacheCntrl.sv b/wally-pipelined/src/cache/ICacheCntrl.sv index 748b3f5e..ee58ed6f 100644 --- a/wally-pipelined/src/cache/ICacheCntrl.sv +++ b/wally-pipelined/src/cache/ICacheCntrl.sv @@ -115,8 +115,8 @@ module ICacheCntrl #(parameter BLOCKLEN = 256) localparam STATE_INVALIDATE = 'h12; // *** not sure if invalidate or evict? invalidate by cache block or address? localparam STATE_TLB_MISS = 'h13; localparam STATE_TLB_MISS_DONE = 'h14; - - + localparam STATE_INSTR_PAGE_FAULT = 'h15; + localparam AHBByteLength = `XLEN / 8; localparam AHBOFFETWIDTH = $clog2(AHBByteLength); @@ -370,13 +370,20 @@ module ICacheCntrl #(parameter BLOCKLEN = 256) NextState = STATE_READY; end STATE_TLB_MISS: begin - if (ITLBWriteF | WalkerInstrPageFaultF) begin + if (WalkerInstrPageFaultF) begin + NextState = STATE_INSTR_PAGE_FAULT; + ICacheStallF = 1'b0; + end else if (ITLBWriteF) begin NextState = STATE_TLB_MISS_DONE; end else begin NextState = STATE_TLB_MISS; end end - STATE_TLB_MISS_DONE : begin + STATE_TLB_MISS_DONE: begin + NextState = STATE_READY; + end + STATE_INSTR_PAGE_FAULT: begin + ICacheStallF = 1'b0; NextState = STATE_READY; end default: begin From 07f2064c19ff11180167d9ac5f295e4316bc127b Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 4 Jul 2021 18:17:09 -0400 Subject: [PATCH 2/8] Touched up TLB D and A bit checks --- wally-pipelined/src/ebu/ahblite.sv | 2 -- wally-pipelined/src/mmu/tlb.sv | 9 +++++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/wally-pipelined/src/ebu/ahblite.sv b/wally-pipelined/src/ebu/ahblite.sv index b0c6f033..edbaad68 100644 --- a/wally-pipelined/src/ebu/ahblite.sv +++ b/wally-pipelined/src/ebu/ahblite.sv @@ -219,8 +219,6 @@ module ahblite ( generate if (`A_SUPPORTED) begin logic [`XLEN-1:0] AMOResult; -// amoalu amoalu(.a(HRDATA), .b(WriteDataM), .funct(Funct7M), .width(MemSizeM), -// .result(AMOResult)); amoalu amoalu(.srca(HRDATAW), .srcb(WriteDataM), .funct(Funct7M), .width(MemSizeM), .result(AMOResult)); mux2 #(`XLEN) wdmux(WriteDataM, AMOResult, AtomicMaskedM[1], WriteData); diff --git a/wally-pipelined/src/mmu/tlb.sv b/wally-pipelined/src/mmu/tlb.sv index 75021265..34400647 100644 --- a/wally-pipelined/src/mmu/tlb.sv +++ b/wally-pipelined/src/mmu/tlb.sv @@ -111,6 +111,7 @@ module tlb #(parameter TLB_ENTRIES = 8, logic [1:0] HitPageType; logic CAMHit; logic [`ASID_BITS-1:0] ASID; + logic DAFault; // Grab the sv mode from SATP and determine whether translation should occur assign SvMode = SATP_REGW[`XLEN-1:`XLEN-`SVMODE_BITS]; @@ -165,7 +166,9 @@ module tlb #(parameter TLB_ENTRIES = 8, // only execute non-user mode pages. assign ImproperPrivilege = ((EffectivePrivilegeMode == `U_MODE) && ~PTE_U) || ((EffectivePrivilegeMode == `S_MODE) && PTE_U); - assign TLBPageFault = Translate && TLBHit && (ImproperPrivilege || ~PTE_X); + // fault for software handling if access bit is off + assign DAFault = ~PTE_A; + assign TLBPageFault = Translate && TLBHit && (ImproperPrivilege || ~PTE_X || DAFault); end else begin logic ImproperPrivilege, InvalidRead, InvalidWrite; @@ -180,7 +183,9 @@ module tlb #(parameter TLB_ENTRIES = 8, // Check for write error. Writes are invalid when the page's write bit is // low. assign InvalidWrite = WriteAccess && ~PTE_W; - assign TLBPageFault = Translate && TLBHit && (ImproperPrivilege || InvalidRead || InvalidWrite); + // Fault for software handling if access bit is off or writing a page with dirty bit off + assign DAFault = ~PTE_A | WriteAccess & ~PTE_D; + assign TLBPageFault = Translate && TLBHit && (ImproperPrivilege || InvalidRead || InvalidWrite || DAFault); end endgenerate From b23192cf1b0fc2c43ffa2a0e5fe503cf7fef3936 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 4 Jul 2021 18:52:16 -0400 Subject: [PATCH 3/8] Gave names to for loops in generate blocks for ease of reference --- wally-pipelined/src/cache/ICacheCntrl.sv | 4 ++-- wally-pipelined/src/cache/dmapped.sv | 4 ++-- wally-pipelined/src/ebu/ahblite.sv | 2 +- wally-pipelined/src/generic/shift.sv | 14 ++++++-------- wally-pipelined/src/ieu/alu.sv | 2 +- wally-pipelined/src/ieu/datapath.sv | 2 +- wally-pipelined/src/ifu/SRAM2P1R1W.sv | 8 ++++---- wally-pipelined/src/ifu/ifu.sv | 2 +- wally-pipelined/src/ifu/localHistoryPredictor.sv | 2 +- wally-pipelined/src/lsu/dcache.sv | 2 +- wally-pipelined/src/lsu/lsuArb.sv | 6 ++++-- wally-pipelined/src/mmu/pmpadrdec.sv | 3 ++- wally-pipelined/src/mmu/pmpchecker.sv | 7 ------- wally-pipelined/src/mmu/tlbpriority.sv | 3 ++- wally-pipelined/src/muldiv/div.sv | 7 +++---- wally-pipelined/src/privileged/csrc.sv | 2 +- wally-pipelined/src/privileged/csri.sv | 2 +- wally-pipelined/src/privileged/csrn.sv | 2 +- wally-pipelined/src/privileged/csrs.sv | 2 +- wally-pipelined/src/privileged/csru.sv | 2 +- wally-pipelined/src/uncore/gpio.sv | 2 +- wally-pipelined/src/uncore/uartPC16550D.sv | 2 +- 22 files changed, 38 insertions(+), 44 deletions(-) diff --git a/wally-pipelined/src/cache/ICacheCntrl.sv b/wally-pipelined/src/cache/ICacheCntrl.sv index 748b3f5e..6c1981ee 100644 --- a/wally-pipelined/src/cache/ICacheCntrl.sv +++ b/wally-pipelined/src/cache/ICacheCntrl.sv @@ -425,8 +425,8 @@ module ICacheCntrl #(parameter BLOCKLEN = 256) // store read data from memory interface before writing into SRAM. genvar i; generate - for (i = 0; i < WORDSPERLINE; i++) begin - flopenr #(`XLEN) flop(.clk(clk), + for (i = 0; i < WORDSPERLINE; i++) begin:storebuffer + flopenr #(`XLEN) sb(.clk(clk), .reset(reset), .en(InstrAckF & (i == FetchCount)), .d(InstrInF), diff --git a/wally-pipelined/src/cache/dmapped.sv b/wally-pipelined/src/cache/dmapped.sv index f40da412..42669752 100644 --- a/wally-pipelined/src/cache/dmapped.sv +++ b/wally-pipelined/src/cache/dmapped.sv @@ -106,7 +106,7 @@ module rodirectmappedmem #(parameter NUMLINES=512, parameter LINESIZE = 256, par assign DataWord = ReadLineTransformed[ReadOffset]; genvar i; generate - for (i=0; i < LINESIZE/WORDSIZE; i++) begin + for (i=0; i < LINESIZE/WORDSIZE; i++) begin:readline assign ReadLineTransformed[i] = ReadLine[(i+1)*WORDSIZE-1:i*WORDSIZE]; end endgenerate @@ -214,7 +214,7 @@ module wtdirectmappedmem #(parameter NUMLINES=512, parameter LINESIZE = 256, par assign DataWord = ReadLineTransformed[ReadOffset]; genvar i; generate - for (i=0; i < LINESIZE/WORDSIZE; i++) begin + for (i=0; i < LINESIZE/WORDSIZE; i++) begin:readline assign ReadLineTransformed[i] = ReadLine[(i+1)*WORDSIZE-1:i*WORDSIZE]; end endgenerate diff --git a/wally-pipelined/src/ebu/ahblite.sv b/wally-pipelined/src/ebu/ahblite.sv index edbaad68..4bd079e9 100644 --- a/wally-pipelined/src/ebu/ahblite.sv +++ b/wally-pipelined/src/ebu/ahblite.sv @@ -216,7 +216,7 @@ module ahblite ( subwordread swr(.*); // Handle AMO instructions if applicable - generate + generate if (`A_SUPPORTED) begin logic [`XLEN-1:0] AMOResult; amoalu amoalu(.srca(HRDATAW), .srcb(WriteDataM), .funct(Funct7M), .width(MemSizeM), diff --git a/wally-pipelined/src/generic/shift.sv b/wally-pipelined/src/generic/shift.sv index 88152588..70e1076d 100755 --- a/wally-pipelined/src/generic/shift.sv +++ b/wally-pipelined/src/generic/shift.sv @@ -38,13 +38,12 @@ module shift_right #(parameter WIDTH=8) assign stage[0] = A; generate - for (i=0;i<$clog2(WIDTH);i=i+1) - begin : genbit - mux2 #(WIDTH) mux_inst (stage[i], + for (i=0;i<$clog2(WIDTH);i=i+1) begin : genbit + mux2 #(WIDTH) mux_inst (stage[i], {{(WIDTH/(2**(i+1))){1'b0}}, stage[i][WIDTH-1:WIDTH/(2**(i+1))]}, Shift[$clog2(WIDTH)-i-1], stage[i+1]); - end + end endgenerate assign Z = stage[$clog2(WIDTH)]; @@ -60,13 +59,12 @@ module shift_left #(parameter WIDTH=8) assign stage[0] = A; generate - for (i=0;i<$clog2(WIDTH);i=i+1) - begin : genbit - mux2 #(WIDTH) mux_inst (stage[i], + for (i=0;i<$clog2(WIDTH);i=i+1) begin : genbit + mux2 #(WIDTH) mux_inst (stage[i], {stage[i][WIDTH-1-WIDTH/(2**(i+1)):0], {(WIDTH/(2**(i+1))){1'b0}}}, Shift[$clog2(WIDTH)-i-1], stage[i+1]); - end + end endgenerate assign Z = stage[$clog2(WIDTH)]; diff --git a/wally-pipelined/src/ieu/alu.sv b/wally-pipelined/src/ieu/alu.sv index 102fbbed..ac2c06dd 100644 --- a/wally-pipelined/src/ieu/alu.sv +++ b/wally-pipelined/src/ieu/alu.sv @@ -42,7 +42,7 @@ module alu #(parameter WIDTH=32) ( assign {carry, presum} = a + condinvb + {{(WIDTH-1){1'b0}},alucontrol[3]}; // support W-type RV64I ADDW/SUBW/ADDIW that sign-extend 32-bit result to 64 bits - generate + generate if (WIDTH==64) assign sum = w64 ? {{32{presum[31]}}, presum[31:0]} : presum; else diff --git a/wally-pipelined/src/ieu/datapath.sv b/wally-pipelined/src/ieu/datapath.sv index 44a40045..f041fce6 100644 --- a/wally-pipelined/src/ieu/datapath.sv +++ b/wally-pipelined/src/ieu/datapath.sv @@ -129,7 +129,7 @@ module datapath ( flopenrc #(5) RdWEg(clk, reset, FlushW, ~StallW, RdM, RdW); // handle Store Conditional result if atomic extension supported - generate + generate if (`A_SUPPORTED) assign SCResultW = SquashSCW ? {{(`XLEN-1){1'b0}}, 1'b1} : {{(`XLEN-1){1'b0}}, 1'b0}; else diff --git a/wally-pipelined/src/ifu/SRAM2P1R1W.sv b/wally-pipelined/src/ifu/SRAM2P1R1W.sv index d71f8bc4..046aacc6 100644 --- a/wally-pipelined/src/ifu/SRAM2P1R1W.sv +++ b/wally-pipelined/src/ifu/SRAM2P1R1W.sv @@ -97,11 +97,11 @@ module SRAM2P1R1W // write port generate - for (index = 0; index < Width; index = index + 1) begin + for (index = 0; index < Width; index = index + 1) begin:mem always_ff @ (posedge clk) begin - if (WEN1Q & BitWEN1[index]) begin - memory[WA1Q][index] <= WD1Q[index]; - end + if (WEN1Q & BitWEN1[index]) begin + memory[WA1Q][index] <= WD1Q[index]; + end end end endgenerate diff --git a/wally-pipelined/src/ifu/ifu.sv b/wally-pipelined/src/ifu/ifu.sv index 4fcefe85..24952edf 100644 --- a/wally-pipelined/src/ifu/ifu.sv +++ b/wally-pipelined/src/ifu/ifu.sv @@ -188,7 +188,7 @@ module ifu ( flopenl #(`XLEN) pcreg(clk, reset, ~StallF & ~ICacheStallF, PCNextF, `RESET_VECTOR, PCF); // branch and jump predictor - generate + generate if (`BPRED_ENABLED == 1) begin : bpred // I am making the port connection explicit for now as I want to see them and they will be changing. bpred bpred(.*, diff --git a/wally-pipelined/src/ifu/localHistoryPredictor.sv b/wally-pipelined/src/ifu/localHistoryPredictor.sv index 8aaa85c0..6c5c9478 100644 --- a/wally-pipelined/src/ifu/localHistoryPredictor.sv +++ b/wally-pipelined/src/ifu/localHistoryPredictor.sv @@ -67,7 +67,7 @@ module localHistoryPredictor genvar index; generate - for (index = 0; index < 2**m; index = index +1) begin + for (index = 0; index < 2**m; index = index +1) begin:localhist flopenr #(k) LocalHistoryRegister(.clk(clk), .reset(reset), diff --git a/wally-pipelined/src/lsu/dcache.sv b/wally-pipelined/src/lsu/dcache.sv index fec70ef4..e8dfeb5c 100644 --- a/wally-pipelined/src/lsu/dcache.sv +++ b/wally-pipelined/src/lsu/dcache.sv @@ -151,7 +151,7 @@ module dcachecontroller #(parameter LINESIZE = 256) ( genvar i; generate - for (i=0; i < WORDSPERLINE; i++) begin + for (i=0; i < WORDSPERLINE; i++) begin:sb flopenr #(`XLEN) flop(clk, reset, FetchState & (i == FetchWordNum), ReadDataW, DCacheMemWriteData[(i+1)*`XLEN-1:i*`XLEN]); end endgenerate diff --git a/wally-pipelined/src/lsu/lsuArb.sv b/wally-pipelined/src/lsu/lsuArb.sv index 3f57cabb..dc77ec9d 100644 --- a/wally-pipelined/src/lsu/lsuArb.sv +++ b/wally-pipelined/src/lsu/lsuArb.sv @@ -138,12 +138,14 @@ module lsuArb assign MemRWMtoLSU = SelPTW ? {HPTWRead, 1'b0} : MemRWM; generate - if (`XLEN == 32) begin + assign PTWSize = (`XLEN==32 ? 3'b010 : 3'b011); // 32 or 64-bit access from htpw + /* if (`XLEN == 32) begin assign Funct3MtoLSU = SelPTW ? 3'b010 : Funct3M; end else begin assign Funct3MtoLSU = SelPTW ? 3'b011 : Funct3M; - end + end*/ endgenerate + mux2 sizemux(Funct3M, PTWSize, SelPTW, Funct3MtoLSU); assign AtomicMtoLSU = SelPTW ? 2'b00 : AtomicM; assign MemAdrMtoLSU = SelPTW ? HPTWPAdr : MemAdrM; diff --git a/wally-pipelined/src/mmu/pmpadrdec.sv b/wally-pipelined/src/mmu/pmpadrdec.sv index 50d399ae..0a14d832 100644 --- a/wally-pipelined/src/mmu/pmpadrdec.sv +++ b/wally-pipelined/src/mmu/pmpadrdec.sv @@ -76,8 +76,9 @@ module pmpadrdec ( generate assign Mask[1:0] = 2'b11; assign Mask[2] = (AdrMode == NAPOT); // mask has 0s in upper bis for NA4 region - for (i=3; i < `PA_BITS; i=i+1) + for (i=3; i < `PA_BITS; i=i+1) begin:mask assign Mask[i] = Mask[i-1] & PMPAdr[i-3]; // NAPOT mask: 1's indicate bits to ignore + end endgenerate // verilator lint_on UNOPTFLAT diff --git a/wally-pipelined/src/mmu/pmpchecker.sv b/wally-pipelined/src/mmu/pmpchecker.sv index ee4b261d..9c7f11da 100644 --- a/wally-pipelined/src/mmu/pmpchecker.sv +++ b/wally-pipelined/src/mmu/pmpchecker.sv @@ -63,12 +63,6 @@ module pmpchecker ( // verilator lint_on UNOPTFLAT logic [`PMP_ENTRIES-1:0] PAgePMPAdr; // for TOR PMP matching, PhysicalAddress > PMPAdr[i] genvar i,j; - /* - generate // extract 8-bit chunks from PMPCFG array - for (j=0; j<`PMP_ENTRIES; j = j+8) - assign {PMPCfg[j+7], PMPCfg[j+6], PMPCfg[j+5], PMPCfg[j+4], - PMPCfg[j+3], PMPCfg[j+2], PMPCfg[j+1], PMPCfg[j]} = PMPCFG_ARRAY_REGW[j/8]; - endgenerate */ pmpadrdec pmpadrdecs[`PMP_ENTRIES-1:0]( .PhysicalAddress, @@ -80,7 +74,6 @@ module pmpchecker ( .NoLowerMatchOut(NoLowerMatch), .Match, .Active, .L, .X, .W, .R); - // Only enforce PMP checking for S and U modes when at least one PMP is active or in Machine mode when L bit is set in selected region assign EnforcePMP = (PrivilegeModeW == `M_MODE) ? |L : |Active; diff --git a/wally-pipelined/src/mmu/tlbpriority.sv b/wally-pipelined/src/mmu/tlbpriority.sv index a061f622..5096cae6 100644 --- a/wally-pipelined/src/mmu/tlbpriority.sv +++ b/wally-pipelined/src/mmu/tlbpriority.sv @@ -41,8 +41,9 @@ module tlbpriority #(parameter ENTRIES = 8) ( genvar i; generate assign nolower[0] = 1; - for (i=1; i 0) assign rxfullbit[i] = ((rxfifohead==i) | rxfullbit[i-1]) & (rxfifotail != i); From 600e7802dd14693dc49602ae10f511d1ff1cd9d0 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 4 Jul 2021 18:56:30 -0400 Subject: [PATCH 4/8] Renamed Funct3ToLSU/fromLSU -> SizeToLSU/FromLSU and simplified size muxing in lsuArb --- wally-pipelined/src/lsu/lsu.sv | 12 ++++++------ wally-pipelined/src/lsu/lsuArb.sv | 10 +++------- wally-pipelined/src/wally/wallypipelinedhart.sv | 8 ++++---- 3 files changed, 13 insertions(+), 17 deletions(-) diff --git a/wally-pipelined/src/lsu/lsu.sv b/wally-pipelined/src/lsu/lsu.sv index 8d4df6ec..a2bcf52b 100644 --- a/wally-pipelined/src/lsu/lsu.sv +++ b/wally-pipelined/src/lsu/lsu.sv @@ -64,7 +64,7 @@ module lsu ( output logic [1:0] AtomicMaskedM, input logic MemAckW, // from ahb input logic [`XLEN-1:0] HRDATAW, // from ahb - output logic [2:0] Funct3MfromLSU, + output logic [2:0] SizeFromLSU, output logic StallWfromLSU, @@ -132,7 +132,7 @@ module lsu ( logic MMUTranslate; logic HPTWRead; logic [1:0] MemRWMtoLSU; - logic [2:0] Funct3MtoLSU; + logic [2:0] SizeToLSU; logic [1:0] AtomicMtoLSU; logic [`XLEN-1:0] MemAdrMtoLSU; logic [`XLEN-1:0] WriteDataMtoLSU; @@ -204,7 +204,7 @@ module lsu ( // LSU .DisableTranslation(DisableTranslation), .MemRWMtoLSU(MemRWMtoLSU), - .Funct3MtoLSU(Funct3MtoLSU), + .SizeToLSU(SizeToLSU), .AtomicMtoLSU(AtomicMtoLSU), .MemAdrMtoLSU(MemAdrMtoLSU), .WriteDataMtoLSU(WriteDataMtoLSU), // *** ?????????????? @@ -220,7 +220,7 @@ module lsu ( mmu #(.TLB_ENTRIES(`DTLB_ENTRIES), .IMMU(0)) dmmu(.TLBAccessType(MemRWMtoLSU), .VirtualAddress(MemAdrMtoLSU), - .Size(Funct3MtoLSU[1:0]), + .Size(SizeToLSU[1:0]), .PTEWriteVal(PageTableEntryM), .PageTypeWriteVal(PageTypeM), .TLBWrite(DTLBWriteM), @@ -244,7 +244,7 @@ module lsu ( // Determine if an Unaligned access is taking place always_comb - case(Funct3MtoLSU[1:0]) + case(SizeToLSU[1:0]) 2'b00: DataMisalignedMfromLSU = 0; // lb, sb, lbu 2'b01: DataMisalignedMfromLSU = MemAdrMtoLSU[0]; // lh, sh, lhu 2'b10: DataMisalignedMfromLSU = MemAdrMtoLSU[1] | MemAdrMtoLSU[0]; // lw, sw, flw, fsw, lwu @@ -400,7 +400,7 @@ module lsu ( end // always_comb // *** for now just pass through size - assign Funct3MfromLSU = Funct3MtoLSU; + assign SizeFromLSU = SizeToLSU; assign StallWfromLSU = StallWtoLSU; diff --git a/wally-pipelined/src/lsu/lsuArb.sv b/wally-pipelined/src/lsu/lsuArb.sv index dc77ec9d..23e88970 100644 --- a/wally-pipelined/src/lsu/lsuArb.sv +++ b/wally-pipelined/src/lsu/lsuArb.sv @@ -54,7 +54,7 @@ module lsuArb // to LSU output logic DisableTranslation, output logic [1:0] MemRWMtoLSU, - output logic [2:0] Funct3MtoLSU, + output logic [2:0] SizeToLSU, output logic [1:0] AtomicMtoLSU, output logic [`XLEN-1:0] MemAdrMtoLSU, output logic [`XLEN-1:0] WriteDataMtoLSU, @@ -87,6 +87,7 @@ module lsuArb statetype CurrState, NextState; logic SelPTW; logic HPTWStallD; + logic [2:0] PTWSize; flopenl #(.TYPE(statetype)) StateReg(.clk(clk), @@ -139,13 +140,8 @@ module lsuArb generate assign PTWSize = (`XLEN==32 ? 3'b010 : 3'b011); // 32 or 64-bit access from htpw - /* if (`XLEN == 32) begin - assign Funct3MtoLSU = SelPTW ? 3'b010 : Funct3M; - end else begin - assign Funct3MtoLSU = SelPTW ? 3'b011 : Funct3M; - end*/ endgenerate - mux2 sizemux(Funct3M, PTWSize, SelPTW, Funct3MtoLSU); + mux2 #(3) sizemux(Funct3M, PTWSize, SelPTW, SizeToLSU); assign AtomicMtoLSU = SelPTW ? 2'b00 : AtomicM; assign MemAdrMtoLSU = SelPTW ? HPTWPAdr : MemAdrM; diff --git a/wally-pipelined/src/wally/wallypipelinedhart.sv b/wally-pipelined/src/wally/wallypipelinedhart.sv index 1c44565f..f18d5af4 100644 --- a/wally-pipelined/src/wally/wallypipelinedhart.sv +++ b/wally-pipelined/src/wally/wallypipelinedhart.sv @@ -159,7 +159,7 @@ module wallypipelinedhart // IEU vs HPTW arbitration signals to send to LSU logic [1:0] MemRWMtoLSU; - logic [2:0] Funct3MtoLSU; + logic [2:0] SizeToLSU; logic [1:0] AtomicMtoLSU; logic [`XLEN-1:0] MemAdrMtoLSU; logic [`XLEN-1:0] WriteDataMtoLSU; @@ -169,7 +169,7 @@ module wallypipelinedhart logic DataMisalignedMfromLSU; logic StallWtoLSU; logic StallWfromLSU; - logic [2:0] Funct3MfromLSU; + logic [2:0] SizeFromLSU; ifu ifu(.InstrInF(InstrRData), @@ -207,7 +207,7 @@ module wallypipelinedhart .AtomicMaskedM(AtomicMaskedM), .MemAckW(MemAckW), .HRDATAW(HRDATAW), - .Funct3MfromLSU(Funct3MfromLSU), // stays the same + .SizeFromLSU(SizeFromLSU), // stays the same .StallWfromLSU(StallWfromLSU), // stays the same .DSquashBusAccessM(DSquashBusAccessM), // probalby removed after dcache implemenation? // currently not connected (but will need to be used for lsu talking to ahb. @@ -261,7 +261,7 @@ module wallypipelinedhart //.InstrRData(InstrF), // hook up InstrF later .ISquashBusAccessF(1'b0), // *** temporary hack to disable PMP instruction fetch checking .WriteDataM(WriteDataM), - .MemSizeM(Funct3MfromLSU[1:0]), .UnsignedLoadM(Funct3MfromLSU[2]), + .MemSizeM(SizeFromLSU[1:0]), .UnsignedLoadM(SizeFromLSU[2]), .Funct7M(InstrM[31:25]), .HRDATAW(HRDATAW), .StallW(StallWfromLSU), From 0aae58abedd0ee979c1573f675e315480a6fcb30 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 4 Jul 2021 19:02:56 -0400 Subject: [PATCH 5/8] Renamed Funct3ToLSU/fromLSU -> SizeToLSU/FromLSU and simplified size muxing in lsuArb --- wally-pipelined/src/uncore/plic.sv | 42 +++++++++++++++++------------- 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/wally-pipelined/src/uncore/plic.sv b/wally-pipelined/src/uncore/plic.sv index dc50eb4f..70c72189 100644 --- a/wally-pipelined/src/uncore/plic.sv +++ b/wally-pipelined/src/uncore/plic.sv @@ -164,27 +164,31 @@ module plic ( flopr #(N) intPendingFlop(HCLK,~HRESETn,nextIntPending,intPending); // pending array - indexed by priority_lvl x source_ID - genvar i; + genvar i, j; generate - for (i=1; i<=N; i=i+1) begin - // *** make sure that this synthesizes into N decoders, not 7*N 3-bit equality comparators (right?) - assign pendingArray[7][i] = (intPriority[i]==7) & intEn[i] & intPending[i]; - assign pendingArray[6][i] = (intPriority[i]==6) & intEn[i] & intPending[i]; - assign pendingArray[5][i] = (intPriority[i]==5) & intEn[i] & intPending[i]; - assign pendingArray[4][i] = (intPriority[i]==4) & intEn[i] & intPending[i]; - assign pendingArray[3][i] = (intPriority[i]==3) & intEn[i] & intPending[i]; - assign pendingArray[2][i] = (intPriority[i]==2) & intEn[i] & intPending[i]; - assign pendingArray[1][i] = (intPriority[i]==1) & intEn[i] & intPending[i]; + for (j=1; j<=7; j++) begin: pending + for (i=1; i<=N; i=i+1) begin: pendingbit + // *** make sure that this synthesizes into N decoders, not 7*N 3-bit equality comparators (right?) + assign pendingArray[j][i] = (intPriority[i]==j) & intEn[i] & intPending[i]; +/* assign pendingArray[6][i] = (intPriority[i]==6) & intEn[i] & intPending[i]; + assign pendingArray[5][i] = (intPriority[i]==5) & intEn[i] & intPending[i]; + assign pendingArray[4][i] = (intPriority[i]==4) & intEn[i] & intPending[i]; + assign pendingArray[3][i] = (intPriority[i]==3) & intEn[i] & intPending[i]; + assign pendingArray[2][i] = (intPriority[i]==2) & intEn[i] & intPending[i]; + assign pendingArray[1][i] = (intPriority[i]==1) & intEn[i] & intPending[i]; */ + end end endgenerate // pending array, except grouped by priority - assign pendingPGrouped[7:1] = {|pendingArray[7], +/* assign pendingPGrouped[7:1] = {|pendingArray[7], |pendingArray[6], |pendingArray[5], |pendingArray[4], |pendingArray[3], |pendingArray[2], - |pendingArray[1]}; + |pendingArray[1]}; */ + assign pendingPGrouped = pendingArray.or; + // pendingPGrouped, except only topmost priority is active assign pendingMaxP[7:1] = {pendingPGrouped[7], pendingPGrouped[6] & ~|pendingPGrouped[7], @@ -202,14 +206,16 @@ module plic ( | ({N{pendingMaxP[2]}} & pendingArray[2]) | ({N{pendingMaxP[1]}} & pendingArray[1]); // find the lowest ID amongst active interrupts at the highest priority - integer j; + genvar k; // *** verify that this synthesizes to a reasonable priority encoder and that j doesn't actually exist in hardware - always_comb begin - intClaim = 6'b0; - for(j=N; j>0; j=j-1) begin - if(pendingRequestsAtMaxP[j]) intClaim = j[5:0]; + generate + always_comb begin + intClaim = 6'b0; + for(k=N; k>0; k=k-1) begin:priorityenc + if(pendingRequestsAtMaxP[k]) intClaim = k; + end end - end + endgenerate // create threshold mask always_comb begin From 004cac91e1759a39f41df5e5cd6de52ee49e29c5 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 4 Jul 2021 19:17:15 -0400 Subject: [PATCH 6/8] Simplified PLIC with generate --- wally-pipelined/src/uncore/plic.sv | 44 ++++++++++++------------------ 1 file changed, 18 insertions(+), 26 deletions(-) diff --git a/wally-pipelined/src/uncore/plic.sv b/wally-pipelined/src/uncore/plic.sv index 70c72189..ef7ecdd5 100644 --- a/wally-pipelined/src/uncore/plic.sv +++ b/wally-pipelined/src/uncore/plic.sv @@ -170,24 +170,18 @@ module plic ( for (i=1; i<=N; i=i+1) begin: pendingbit // *** make sure that this synthesizes into N decoders, not 7*N 3-bit equality comparators (right?) assign pendingArray[j][i] = (intPriority[i]==j) & intEn[i] & intPending[i]; -/* assign pendingArray[6][i] = (intPriority[i]==6) & intEn[i] & intPending[i]; - assign pendingArray[5][i] = (intPriority[i]==5) & intEn[i] & intPending[i]; - assign pendingArray[4][i] = (intPriority[i]==4) & intEn[i] & intPending[i]; - assign pendingArray[3][i] = (intPriority[i]==3) & intEn[i] & intPending[i]; - assign pendingArray[2][i] = (intPriority[i]==2) & intEn[i] & intPending[i]; - assign pendingArray[1][i] = (intPriority[i]==1) & intEn[i] & intPending[i]; */ end end endgenerate // pending array, except grouped by priority -/* assign pendingPGrouped[7:1] = {|pendingArray[7], + assign pendingPGrouped[7:1] = {|pendingArray[7], |pendingArray[6], |pendingArray[5], |pendingArray[4], |pendingArray[3], |pendingArray[2], - |pendingArray[1]}; */ - assign pendingPGrouped = pendingArray.or; + |pendingArray[1]}; + //assign pendingPGrouped = pendingArray.or; // pendingPGrouped, except only topmost priority is active assign pendingMaxP[7:1] = {pendingPGrouped[7], @@ -206,26 +200,24 @@ module plic ( | ({N{pendingMaxP[2]}} & pendingArray[2]) | ({N{pendingMaxP[1]}} & pendingArray[1]); // find the lowest ID amongst active interrupts at the highest priority - genvar k; - // *** verify that this synthesizes to a reasonable priority encoder and that j doesn't actually exist in hardware - generate - always_comb begin - intClaim = 6'b0; - for(k=N; k>0; k=k-1) begin:priorityenc - if(pendingRequestsAtMaxP[k]) intClaim = k; - end + int k; + // *** verify that this synthesizes to a reasonable priority encoder and that k doesn't actually exist in hardware + always_comb begin + intClaim = 6'b0; + for(k=N; k>0; k=k-1) begin + if(pendingRequestsAtMaxP[k]) intClaim = k[5:0]; end - endgenerate + end // create threshold mask - always_comb begin - threshMask[7] = ~(7==intThreshold); - threshMask[6] = ~(6==intThreshold) & threshMask[7]; - threshMask[5] = ~(5==intThreshold) & threshMask[6]; - threshMask[4] = ~(4==intThreshold) & threshMask[5]; - threshMask[3] = ~(3==intThreshold) & threshMask[4]; - threshMask[2] = ~(2==intThreshold) & threshMask[3]; - threshMask[1] = ~(1==intThreshold) & threshMask[2]; + always_comb begin + threshMask[7] = (intThreshold != 7); + threshMask[6] = (intThreshold != 6) & threshMask[7]; + threshMask[5] = (intThreshold != 5) & threshMask[6]; + threshMask[4] = (intThreshold != 4) & threshMask[5]; + threshMask[3] = (intThreshold != 3) & threshMask[4]; + threshMask[2] = (intThreshold != 2) & threshMask[3]; + threshMask[1] = (intThreshold != 1) & threshMask[2]; end // is the max priority > threshold? // *** would it be any better to first priority encode maxPriority into binary and then ">" with threshold? From ac163e091c8f61cdb1ebf37ba5050f6c92e8b5b4 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 4 Jul 2021 19:33:46 -0400 Subject: [PATCH 7/8] Fixed disabling MulDiv when not supported. Started adding generate for FPU unsupported --- wally-pipelined/src/fpu/fpu.sv | 678 ++++++++++++++------------- wally-pipelined/src/muldiv/muldiv.sv | 4 +- 2 files changed, 351 insertions(+), 331 deletions(-) diff --git a/wally-pipelined/src/fpu/fpu.sv b/wally-pipelined/src/fpu/fpu.sv index ff29dfd7..3b1a4ed1 100755 --- a/wally-pipelined/src/fpu/fpu.sv +++ b/wally-pipelined/src/fpu/fpu.sv @@ -43,90 +43,94 @@ module fpu ( output logic [4:0] SetFflagsM, // FPU flags output logic [`XLEN-1:0] FPUResultW); // FPU result // *** change FMA to do 16 - 32 - 64 - 128 FEXPBITS - // control logic signal instantiation - logic FWriteEnD, FWriteEnE, FWriteEnM, FWriteEnW; // FP register write enable - logic [2:0] FrmD, FrmE, FrmM; // FP rounding mode - logic FmtD, FmtE, FmtM, FmtW; // FP precision 0-single 1-double - logic FDivStartD, FDivStartE; // Start division - logic FWriteIntD; // Write to integer register - logic [1:0] ForwardXE, ForwardYE, ForwardZE; // Input3 forwarding mux control signal - logic [2:0] FResultSelD, FResultSelE, FResultSelM, FResultSelW; // Select FP result - logic [3:0] FOpCtrlD, FOpCtrlE, FOpCtrlM; // Select which opperation to do in each component - logic [1:0] FResSelD, FResSelE, FResSelM; - logic [1:0] FIntResSelD, FIntResSelE, FIntResSelM; - logic [4:0] Adr1E, Adr2E, Adr3E; - - // regfile signals - logic [4:0] RdE, RdM, RdW; // what adress to write to // ***Can take from ieu insted of pipelining - logic [63:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage - logic [63:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage - logic [`XLEN-1:0] SrcXMAligned; - logic [63:0] SrcXE, SrcXM; // Input 1 to the various units (after forwarding) - logic [63:0] SrcYE, SrcYM; // Input 2 to the various units (after forwarding) - logic [63:0] SrcZE, SrcZM; // Input 3 to the various units (after forwarding) - - // div/sqrt signals - logic [63:0] FDivResultM, FDivResultW; - logic [4:0] FDivSqrtFlgM, FDivSqrtFlgW; - logic FDivSqrtDoneE; - logic [63:0] DivInput1E, DivInput2E; - logic HoldInputs; // keep forwarded inputs arround durring division - - // FMA signals - logic [105:0] ProdManE, ProdManM; ///*** put pipline stages in units - logic [161:0] AlignedAddendE, AlignedAddendM; - logic [12:0] ProdExpE, ProdExpM; - logic AddendStickyE, AddendStickyM; - logic KillProdE, KillProdM; - logic XZeroE, YZeroE, ZZeroE, XZeroM, YZeroM, ZZeroM; - logic XInfE, YInfE, ZInfE, XInfM, YInfM, ZInfM; - logic XNaNE, YNaNE, ZNaNE, XNaNM, YNaNM, ZNaNM; - logic [63:0] FMAResM, FMAResW; - logic [4:0] FMAFlgM, FMAFlgW; - // add/cvt signals - logic [63:0] AddSumE, AddSumM; - logic [63:0] AddSumTcE, AddSumTcM; - logic [3:0] AddSelInvE, AddSelInvM; - logic [10:0] AddExpPostSumE,AddExpPostSumM; - logic AddCorrSignE, AddCorrSignM; - logic AddOp1NormE, AddOp1NormM; - logic AddOp2NormE, AddOp2NormM; - logic AddOpANormE, AddOpANormM; - logic AddOpBNormE, AddOpBNormM; - logic AddInvalidE, AddInvalidM; - logic AddDenormInE, AddDenormInM; - logic AddSwapE, AddSwapM; - logic AddNormOvflowE, AddNormOvflowM; //***this isn't used in addcvt2 - logic AddSignAE, AddSignAM; - logic AddConvertE, AddConvertM; - logic [63:0] AddFloat1E, AddFloat2E, AddFloat1M, AddFloat2M; - logic [11:0] AddExp1DenormE, AddExp2DenormE, AddExp1DenormM, AddExp2DenormM; - logic [10:0] AddExponentE, AddExponentM; - logic [63:0] FAddResM, FAddResW; - logic [4:0] FAddFlgM, FAddFlgW; - - // cmp signals - logic CmpNVE, CmpNVM, CmpNVW; - logic [63:0] CmpResE, CmpResM, CmpResW; - - // fsgn signals - logic [63:0] SgnResE, SgnResM; - logic SgnNVE, SgnNVM, SgnNVW; - logic [63:0] FResM, FResW; - logic FFlgM, FFlgW; - - // instantiation of W stage regfile signals - logic [63:0] AlignedSrcAM; - - // classify signals - logic [63:0] ClassResE, ClassResM; - - // 64-bit FPU result - logic [63:0] FPUResult64W; - logic [4:0] FPUFlagsW; - - + /*generate + if (`F_SUPPORTED) begin */ + + // control logic signal instantiation + logic FWriteEnD, FWriteEnE, FWriteEnM, FWriteEnW; // FP register write enable + logic [2:0] FrmD, FrmE, FrmM; // FP rounding mode + logic FmtD, FmtE, FmtM, FmtW; // FP precision 0-single 1-double + logic FDivStartD, FDivStartE; // Start division + logic FWriteIntD; // Write to integer register + logic [1:0] ForwardXE, ForwardYE, ForwardZE; // Input3 forwarding mux control signal + logic [2:0] FResultSelD, FResultSelE, FResultSelM, FResultSelW; // Select FP result + logic [3:0] FOpCtrlD, FOpCtrlE, FOpCtrlM; // Select which opperation to do in each component + logic [1:0] FResSelD, FResSelE, FResSelM; + logic [1:0] FIntResSelD, FIntResSelE, FIntResSelM; + logic [4:0] Adr1E, Adr2E, Adr3E; + + // regfile signals + logic [4:0] RdE, RdM, RdW; // what adress to write to // ***Can take from ieu insted of pipelining + logic [63:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage + logic [63:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage + logic [`XLEN-1:0] SrcXMAligned; + logic [63:0] SrcXE, SrcXM; // Input 1 to the various units (after forwarding) + logic [63:0] SrcYE, SrcYM; // Input 2 to the various units (after forwarding) + logic [63:0] SrcZE, SrcZM; // Input 3 to the various units (after forwarding) + + // div/sqrt signals + logic [63:0] FDivResultM, FDivResultW; + logic [4:0] FDivSqrtFlgM, FDivSqrtFlgW; + logic FDivSqrtDoneE; + logic [63:0] DivInput1E, DivInput2E; + logic HoldInputs; // keep forwarded inputs arround durring division + + // FMA signals + logic [105:0] ProdManE, ProdManM; ///*** put pipline stages in units + logic [161:0] AlignedAddendE, AlignedAddendM; + logic [12:0] ProdExpE, ProdExpM; + logic AddendStickyE, AddendStickyM; + logic KillProdE, KillProdM; + logic XZeroE, YZeroE, ZZeroE, XZeroM, YZeroM, ZZeroM; + logic XInfE, YInfE, ZInfE, XInfM, YInfM, ZInfM; + logic XNaNE, YNaNE, ZNaNE, XNaNM, YNaNM, ZNaNM; + logic [63:0] FMAResM, FMAResW; + logic [4:0] FMAFlgM, FMAFlgW; + + // add/cvt signals + logic [63:0] AddSumE, AddSumM; + logic [63:0] AddSumTcE, AddSumTcM; + logic [3:0] AddSelInvE, AddSelInvM; + logic [10:0] AddExpPostSumE,AddExpPostSumM; + logic AddCorrSignE, AddCorrSignM; + logic AddOp1NormE, AddOp1NormM; + logic AddOp2NormE, AddOp2NormM; + logic AddOpANormE, AddOpANormM; + logic AddOpBNormE, AddOpBNormM; + logic AddInvalidE, AddInvalidM; + logic AddDenormInE, AddDenormInM; + logic AddSwapE, AddSwapM; + logic AddNormOvflowE, AddNormOvflowM; //***this isn't used in addcvt2 + logic AddSignAE, AddSignAM; + logic AddConvertE, AddConvertM; + logic [63:0] AddFloat1E, AddFloat2E, AddFloat1M, AddFloat2M; + logic [11:0] AddExp1DenormE, AddExp2DenormE, AddExp1DenormM, AddExp2DenormM; + logic [10:0] AddExponentE, AddExponentM; + logic [63:0] FAddResM, FAddResW; + logic [4:0] FAddFlgM, FAddFlgW; + + // cmp signals + logic CmpNVE, CmpNVM, CmpNVW; + logic [63:0] CmpResE, CmpResM, CmpResW; + + // fsgn signals + logic [63:0] SgnResE, SgnResM; + logic SgnNVE, SgnNVM, SgnNVW; + logic [63:0] FResM, FResW; + logic FFlgM, FFlgW; + + // instantiation of W stage regfile signals + logic [63:0] AlignedSrcAM; + + // classify signals + logic [63:0] ClassResE, ClassResM; + + // 64-bit FPU result + logic [63:0] FPUResult64W; + logic [4:0] FPUFlagsW; + + @@ -134,189 +138,19 @@ module fpu ( - //DECODE STAGE - - - // top-level controller for FPU - fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), - .FRM_REGW, .IllegalFPUInstrD, .FWriteEnD, .FDivStartD, .FResultSelD, .FOpCtrlD, .FResSelD, - .FIntResSelD, .FmtD, .FrmD, .FWriteIntD); - - // regfile instantiation - fregfile fregfile (clk, reset, FWriteEnW, - InstrD[19:15], InstrD[24:20], InstrD[31:27], RdW, - FPUResult64W, - FRD1D, FRD2D, FRD3D); - - - - - - - - - - //***************** - // D/E pipe registers - //***************** - flopenrc #(64) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E); - flopenrc #(64) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E); - flopenrc #(64) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E); - flopenrc #(1) DECtrlRegE1(clk, reset, FlushE, ~StallE, FDivStartD, FDivStartE); - flopenrc #(15) DECtrlRegE2(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]}, - {Adr1E, Adr2E, Adr3E}); - flopenrc #(22) DECtrlReg3(clk, reset, FlushE, ~StallE, - {FWriteEnD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, InstrD[11:7], FOpCtrlD, FWriteIntD}, - {FWriteEnE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, RdE, FOpCtrlE, FWriteIntE}); - - - - - - - - - - - - - - - //EXECUTION STAGE - - // Hazard unit for FPU - fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FWriteEnM, .FWriteEnW, .RdM, .RdW, .FResultSelM, .FStallD, - .ForwardXE, .ForwardYE, .ForwardZE); - - // forwarding muxs - mux3 #(64) fxemux(FRD1E, FPUResult64W, FResM, ForwardXE, SrcXE); - mux3 #(64) fyemux(FRD2E, FPUResult64W, FResM, ForwardYE, SrcYE); - mux3 #(64) fzemux(FRD3E, FPUResult64W, FResM, ForwardZE, SrcZE); - - - // first of two-stage instance of floating-point fused multiply-add unit - fma1 fma1 (.X(SrcXE), .Y(SrcYE), .Z(SrcZE), .FOpCtrlE(FOpCtrlE[2:0]), .FmtE, .ProdManE, .AlignedAddendE, - .ProdExpE, .AddendStickyE, .KillProdE, .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE, - .XNaNE, .YNaNE, .ZNaNE ); - - // first and only instance of floating-point divider - logic fpdivClk; - - clockgater fpdivclkg(.E(FDivStartE), - .SE(1'b0), - .CLK(clk), - .ECLK(fpdivClk)); - - // capture the inputs for div/sqrt - flopenrc #(64) reg_input1 (.d(SrcXE), .q(DivInput1E), - .en(~HoldInputs), .clear(FDivSqrtDoneE), - .reset(reset), .clk(clk)); - flopenrc #(64) reg_input2 (.d(SrcYE), .q(DivInput2E), - .en(~HoldInputs), .clear(FDivSqrtDoneE), - .reset(reset), .clk(clk)); - - fdivsqrt fdivsqrt (.DivOpType(FOpCtrlE[0]), .clk(fpdivClk), .FmtE(~FmtE), .DivInput1E, .DivInput2E, - .FrmE, .DivOvEn(1'b1), .DivUnEn(1'b1), .FDivStartE, .FDivResultM, .FDivSqrtFlgM, - .FDivSqrtDoneE, .FDivBusyE, .HoldInputs, .reset); - - - - // first of two-stage instance of floating-point add/cvt unit - fpuaddcvt1 fpadd1 (.SrcXE, .SrcYE, .FOpCtrlE, .FmtE, .AddFloat1E, .AddFloat2E, .AddExponentE, - .AddExpPostSumE, .AddExp1DenormE, .AddExp2DenormE, .AddSumE, .AddSumTcE, .AddSelInvE, - .AddCorrSignE, .AddSignAE, .AddOp1NormE, .AddOp2NormE, .AddOpANormE, .AddOpBNormE, .AddInvalidE, - .AddDenormInE, .AddConvertE, .AddSwapE, .AddNormOvflowE); - - // first and only instance of floating-point comparator - fcmp fcmp (SrcXE, SrcYE, FOpCtrlE[2:0], FmtE, CmpNVE, CmpResE); - - // first and only instance of floating-point sign converter - fsgn fsgn (.SgnOpCodeE(FOpCtrlE[1:0]), .SrcXE, .SrcYE, .SgnResE, .SgnNVE); - - // first and only instance of floating-point classify unit - fclassify fclassify (.SrcXE, .FmtE, .ClassResE); - - // output for store instructions - assign FWriteDataE = FmtE ? SrcYE[63:64-`XLEN] : {{`XLEN-32{1'b0}}, SrcYE[63:32]}; - //***swap to mux - - - - - - - - - - - //***************** - // E/M pipe registers - //***************** - flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, SrcXE, SrcXM); - flopenrc #(64) EMFpReg2(clk, reset, FlushM, ~StallM, SrcYE, SrcYM); - flopenrc #(64) EMFpReg3(clk, reset, FlushM, ~StallM, SrcZE, SrcZM); - - flopenrc #(106) EMRegFma1(clk, reset, FlushM, ~StallM, ProdManE, ProdManM); - flopenrc #(162) EMRegFma2(clk, reset, FlushM, ~StallM, AlignedAddendE, AlignedAddendM); - flopenrc #(13) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM); - flopenrc #(11) EMRegFma4(clk, reset, FlushM, ~StallM, - {AddendStickyE, KillProdE, XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE}, - {AddendStickyM, KillProdM, XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM}); - - flopenrc #(64) EMRegAdd1(clk, reset, FlushM, ~StallM, AddSumE, AddSumM); - flopenrc #(64) EMRegAdd2(clk, reset, FlushM, ~StallM, AddSumTcE, AddSumTcM); - flopenrc #(11) EMRegAdd3(clk, reset, FlushM, ~StallM, AddExpPostSumE, AddExpPostSumM); - flopenrc #(64) EMRegAdd4(clk, reset, FlushM, ~StallM, AddFloat1E, AddFloat1M); - flopenrc #(64) EMRegAdd5(clk, reset, FlushM, ~StallM, AddFloat2E, AddFloat2M); - flopenrc #(12) EMRegAdd6(clk, reset, FlushM, ~StallM, AddExp1DenormE, AddExp1DenormM); - flopenrc #(12) EMRegAdd7(clk, reset, FlushM, ~StallM, AddExp2DenormE, AddExp2DenormM); - flopenrc #(11) EMRegAdd8(clk, reset, FlushM, ~StallM, AddExponentE, AddExponentM); - flopenrc #(15) EMRegAdd9(clk, reset, FlushM, ~StallM, - {AddSelInvE, AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE, AddDenormInE, AddConvertE, AddSwapE, AddNormOvflowE, AddSignAE}, - {AddSelInvM, AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM, AddDenormInM, AddConvertM, AddSwapM, AddNormOvflowM, AddSignAM}); - - flopenrc #(1) EMRegCmp1(clk, reset, FlushM, ~StallM, CmpNVE, CmpNVM); - flopenrc #(64) EMRegCmp2(clk, reset, FlushM, ~StallM, CmpResE, CmpResM); - - flopenrc #(64) EMRegSgn1(clk, reset, FlushM, ~StallM, SgnResE, SgnResM); - flopenrc #(1) EMRegSgn2(clk, reset, FlushM, ~StallM, SgnNVE, SgnNVM); - - flopenrc #(22) EMCtrlReg(clk, reset, FlushM, ~StallM, - {FWriteEnE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, RdE, FOpCtrlE, FWriteIntE}, - {FWriteEnM, FResultSelM, FResSelM, FIntResSelM, FrmM, FmtM, RdM, FOpCtrlM, FWriteIntM}); - - flopenrc #(64) EMRegClass(clk, reset, FlushM, ~StallM, ClassResE, ClassResM); - - - - - - - - - //BEGIN MEMORY STAGE - - mux3 #(64) FResMux(AlignedSrcAM, SgnResM, CmpResM, FResSelM, FResM); - mux3 #(1) FFlgMux(1'b0, SgnNVM, CmpNVM, FResSelM, FFlgM); - - //***change to mux - assign SrcXMAligned = FmtM ? SrcXM[63:64-`XLEN] : {{`XLEN-32{1'b0}}, SrcXM[63:32]}; - mux3 #(`XLEN) IntResMux(CmpResM[`XLEN-1:0], SrcXMAligned, ClassResM[`XLEN-1:0], FIntResSelM, FIntResM); - - // second instance of two-stage FMA unit - fma2 fma2(.X(SrcXM), .Y(SrcYM), .Z(SrcZM), .FOpCtrlM(FOpCtrlM[2:0]), .FrmM, .FmtM, - .ProdManM, .AlignedAddendM, .ProdExpM, .AddendStickyM, .KillProdM, - .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .ZInfM, .XNaNM, .YNaNM, .ZNaNM, - .FMAResM, .FMAFlgM); - - // second instance of two-stage floating-point add/cvt unit - fpuaddcvt2 fpadd2 (.FrmM, .FOpCtrlM, .FmtM, .AddSumM, .AddSumTcM, .AddFloat1M, .AddFloat2M, - .AddExp1DenormM, .AddExp2DenormM, .AddExponentM, .AddExpPostSumM, .AddSelInvM, - .AddOp1NormM, .AddOp2NormM, .AddOpANormM, .AddOpBNormM, .AddInvalidM, .AddDenormInM, - .AddSignAM, .AddCorrSignM, .AddConvertM, .AddSwapM, .FAddResM, .FAddFlgM); - - // Align SrcA to MSB when single precicion - mux2 #(64) SrcAMux({SrcAM[31:0], 32'b0}, {{64-`XLEN{1'b0}}, SrcAM}, FmtM, AlignedSrcAM); + //DECODE STAGE + + + // top-level controller for FPU + fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), + .FRM_REGW, .IllegalFPUInstrD, .FWriteEnD, .FDivStartD, .FResultSelD, .FOpCtrlD, .FResSelD, + .FIntResSelD, .FmtD, .FrmD, .FWriteIntD); + + // regfile instantiation + fregfile fregfile (clk, reset, FWriteEnW, + InstrD[19:15], InstrD[24:20], InstrD[31:27], RdW, + FPUResult64W, + FRD1D, FRD2D, FRD3D); @@ -326,77 +160,261 @@ module fpu ( + //***************** + // D/E pipe registers + //***************** + flopenrc #(64) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E); + flopenrc #(64) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E); + flopenrc #(64) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E); + flopenrc #(1) DECtrlRegE1(clk, reset, FlushE, ~StallE, FDivStartD, FDivStartE); + flopenrc #(15) DECtrlRegE2(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]}, + {Adr1E, Adr2E, Adr3E}); + flopenrc #(22) DECtrlReg3(clk, reset, FlushE, ~StallE, + {FWriteEnD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, InstrD[11:7], FOpCtrlD, FWriteIntD}, + {FWriteEnE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, RdE, FOpCtrlE, FWriteIntE}); + + + + + + + + + + + + + + + //EXECUTION STAGE + + // Hazard unit for FPU + fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FWriteEnM, .FWriteEnW, .RdM, .RdW, .FResultSelM, .FStallD, + .ForwardXE, .ForwardYE, .ForwardZE); + + // forwarding muxs + mux3 #(64) fxemux(FRD1E, FPUResult64W, FResM, ForwardXE, SrcXE); + mux3 #(64) fyemux(FRD2E, FPUResult64W, FResM, ForwardYE, SrcYE); + mux3 #(64) fzemux(FRD3E, FPUResult64W, FResM, ForwardZE, SrcZE); + + + // first of two-stage instance of floating-point fused multiply-add unit + fma1 fma1 (.X(SrcXE), .Y(SrcYE), .Z(SrcZE), .FOpCtrlE(FOpCtrlE[2:0]), .FmtE, .ProdManE, .AlignedAddendE, + .ProdExpE, .AddendStickyE, .KillProdE, .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE, + .XNaNE, .YNaNE, .ZNaNE ); + + // first and only instance of floating-point divider + logic fpdivClk; + + clockgater fpdivclkg(.E(FDivStartE), + .SE(1'b0), + .CLK(clk), + .ECLK(fpdivClk)); + + // capture the inputs for div/sqrt + flopenrc #(64) reg_input1 (.d(SrcXE), .q(DivInput1E), + .en(~HoldInputs), .clear(FDivSqrtDoneE), + .reset(reset), .clk(clk)); + flopenrc #(64) reg_input2 (.d(SrcYE), .q(DivInput2E), + .en(~HoldInputs), .clear(FDivSqrtDoneE), + .reset(reset), .clk(clk)); + + fdivsqrt fdivsqrt (.DivOpType(FOpCtrlE[0]), .clk(fpdivClk), .FmtE(~FmtE), .DivInput1E, .DivInput2E, + .FrmE, .DivOvEn(1'b1), .DivUnEn(1'b1), .FDivStartE, .FDivResultM, .FDivSqrtFlgM, + .FDivSqrtDoneE, .FDivBusyE, .HoldInputs, .reset); + + + + // first of two-stage instance of floating-point add/cvt unit + fpuaddcvt1 fpadd1 (.SrcXE, .SrcYE, .FOpCtrlE, .FmtE, .AddFloat1E, .AddFloat2E, .AddExponentE, + .AddExpPostSumE, .AddExp1DenormE, .AddExp2DenormE, .AddSumE, .AddSumTcE, .AddSelInvE, + .AddCorrSignE, .AddSignAE, .AddOp1NormE, .AddOp2NormE, .AddOpANormE, .AddOpBNormE, .AddInvalidE, + .AddDenormInE, .AddConvertE, .AddSwapE, .AddNormOvflowE); + + // first and only instance of floating-point comparator + fcmp fcmp (SrcXE, SrcYE, FOpCtrlE[2:0], FmtE, CmpNVE, CmpResE); + + // first and only instance of floating-point sign converter + fsgn fsgn (.SgnOpCodeE(FOpCtrlE[1:0]), .SrcXE, .SrcYE, .SgnResE, .SgnNVE); + + // first and only instance of floating-point classify unit + fclassify fclassify (.SrcXE, .FmtE, .ClassResE); + + // output for store instructions + assign FWriteDataE = FmtE ? SrcYE[63:64-`XLEN] : {{`XLEN-32{1'b0}}, SrcYE[63:32]}; + //***swap to mux + + + + + + + + + + + //***************** + // E/M pipe registers + //***************** + flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, SrcXE, SrcXM); + flopenrc #(64) EMFpReg2(clk, reset, FlushM, ~StallM, SrcYE, SrcYM); + flopenrc #(64) EMFpReg3(clk, reset, FlushM, ~StallM, SrcZE, SrcZM); + + flopenrc #(106) EMRegFma1(clk, reset, FlushM, ~StallM, ProdManE, ProdManM); + flopenrc #(162) EMRegFma2(clk, reset, FlushM, ~StallM, AlignedAddendE, AlignedAddendM); + flopenrc #(13) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM); + flopenrc #(11) EMRegFma4(clk, reset, FlushM, ~StallM, + {AddendStickyE, KillProdE, XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE}, + {AddendStickyM, KillProdM, XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM}); + + flopenrc #(64) EMRegAdd1(clk, reset, FlushM, ~StallM, AddSumE, AddSumM); + flopenrc #(64) EMRegAdd2(clk, reset, FlushM, ~StallM, AddSumTcE, AddSumTcM); + flopenrc #(11) EMRegAdd3(clk, reset, FlushM, ~StallM, AddExpPostSumE, AddExpPostSumM); + flopenrc #(64) EMRegAdd4(clk, reset, FlushM, ~StallM, AddFloat1E, AddFloat1M); + flopenrc #(64) EMRegAdd5(clk, reset, FlushM, ~StallM, AddFloat2E, AddFloat2M); + flopenrc #(12) EMRegAdd6(clk, reset, FlushM, ~StallM, AddExp1DenormE, AddExp1DenormM); + flopenrc #(12) EMRegAdd7(clk, reset, FlushM, ~StallM, AddExp2DenormE, AddExp2DenormM); + flopenrc #(11) EMRegAdd8(clk, reset, FlushM, ~StallM, AddExponentE, AddExponentM); + flopenrc #(15) EMRegAdd9(clk, reset, FlushM, ~StallM, + {AddSelInvE, AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE, AddDenormInE, AddConvertE, AddSwapE, AddNormOvflowE, AddSignAE}, + {AddSelInvM, AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM, AddDenormInM, AddConvertM, AddSwapM, AddNormOvflowM, AddSignAM}); + + flopenrc #(1) EMRegCmp1(clk, reset, FlushM, ~StallM, CmpNVE, CmpNVM); + flopenrc #(64) EMRegCmp2(clk, reset, FlushM, ~StallM, CmpResE, CmpResM); + + flopenrc #(64) EMRegSgn1(clk, reset, FlushM, ~StallM, SgnResE, SgnResM); + flopenrc #(1) EMRegSgn2(clk, reset, FlushM, ~StallM, SgnNVE, SgnNVM); + + flopenrc #(22) EMCtrlReg(clk, reset, FlushM, ~StallM, + {FWriteEnE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, RdE, FOpCtrlE, FWriteIntE}, + {FWriteEnM, FResultSelM, FResSelM, FIntResSelM, FrmM, FmtM, RdM, FOpCtrlM, FWriteIntM}); + + flopenrc #(64) EMRegClass(clk, reset, FlushM, ~StallM, ClassResE, ClassResM); + + + + + + + + + //BEGIN MEMORY STAGE + + mux3 #(64) FResMux(AlignedSrcAM, SgnResM, CmpResM, FResSelM, FResM); + mux3 #(1) FFlgMux(1'b0, SgnNVM, CmpNVM, FResSelM, FFlgM); + + //***change to mux + assign SrcXMAligned = FmtM ? SrcXM[63:64-`XLEN] : {{`XLEN-32{1'b0}}, SrcXM[63:32]}; + mux3 #(`XLEN) IntResMux(CmpResM[`XLEN-1:0], SrcXMAligned, ClassResM[`XLEN-1:0], FIntResSelM, FIntResM); + + // second instance of two-stage FMA unit + fma2 fma2(.X(SrcXM), .Y(SrcYM), .Z(SrcZM), .FOpCtrlM(FOpCtrlM[2:0]), .FrmM, .FmtM, + .ProdManM, .AlignedAddendM, .ProdExpM, .AddendStickyM, .KillProdM, + .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .ZInfM, .XNaNM, .YNaNM, .ZNaNM, + .FMAResM, .FMAFlgM); + + // second instance of two-stage floating-point add/cvt unit + fpuaddcvt2 fpadd2 (.FrmM, .FOpCtrlM, .FmtM, .AddSumM, .AddSumTcM, .AddFloat1M, .AddFloat2M, + .AddExp1DenormM, .AddExp2DenormM, .AddExponentM, .AddExpPostSumM, .AddSelInvM, + .AddOp1NormM, .AddOp2NormM, .AddOpANormM, .AddOpBNormM, .AddInvalidM, .AddDenormInM, + .AddSignAM, .AddCorrSignM, .AddConvertM, .AddSwapM, .FAddResM, .FAddFlgM); + + // Align SrcA to MSB when single precicion + mux2 #(64) SrcAMux({SrcAM[31:0], 32'b0}, {{64-`XLEN{1'b0}}, SrcAM}, FmtM, AlignedSrcAM); - //***************** - // M/W pipe registers - //***************** - flopenrc #(64) MWRegFma1(clk, reset, FlushW, ~StallW, FMAResM, FMAResW); - flopenrc #(5) MWRegFma2(clk, reset, FlushW, ~StallW, FMAFlgM, FMAFlgW); - - flopenrc #(64) MWRegDiv1(clk, reset, FlushW, ~StallW, FDivResultM, FDivResultW); - flopenrc #(5) MWRegDiv2(clk, reset, FlushW, ~StallW, FDivSqrtFlgM, FDivSqrtFlgW); - - flopenrc #(64) MWRegAdd1(clk, reset, FlushW, ~StallW, FAddResM, FAddResW); - flopenrc #(5) MWRegAdd2(clk, reset, FlushW, ~StallW, FAddFlgM, FAddFlgW); - - flopenrc #(1) MWRegCmp1(clk, reset, FlushW, ~StallW, CmpNVM, CmpNVW); - flopenrc #(64) MWRegCmp3(clk, reset, FlushW, ~StallW, CmpResM, CmpResW); - - flopenrc #(64) MWRegClass2(clk, reset, FlushW, ~StallW, FResM, FResW); - flopenrc #(1) MWRegClass1(clk, reset, FlushW, ~StallW, FFlgM, FFlgW); - - flopenrc #(11) MWCtrlReg(clk, reset, FlushW, ~StallW, - {FWriteEnM, FResultSelM, RdM, FmtM, FWriteIntM}, - {FWriteEnW, FResultSelW, RdW, FmtW, FWriteIntW}); - - - - - - - //######################################### - // BEGIN WRITEBACK STAGE - //######################################### -//***turn into muxs - always_comb begin - case (FResultSelW) - 3'b000 : FPUFlagsW = 5'b0; - 3'b001 : FPUFlagsW = FMAFlgW; - 3'b010 : FPUFlagsW = FAddFlgW; - 3'b011 : FPUFlagsW = FDivSqrtFlgW; - 3'b100 : FPUFlagsW = {4'b0,FFlgW}; - default : FPUFlagsW = 5'bxxxxx; - endcase - end - - always_comb begin - case (FResultSelW) - 3'b000 : FPUResult64W = FmtW ? {ReadDataW, {64-`XLEN{1'b0}}} : {ReadDataW[31:0], 32'b0}; - 3'b001 : FPUResult64W = FMAResW; - 3'b010 : FPUResult64W = FAddResW; - 3'b011 : FPUResult64W = FDivResultW; - 3'b100 : FPUResult64W = FResW; - default : FPUResult64W = 64'bxxxxx; - endcase - end - - - // interface between XLEN size datapath and double-precision sized - // floating-point results - // - // define offsets for LSB zero extension or truncation - always_comb begin - // zero extension -//***turn into mux - FPUResultW = FmtW ? FPUResult64W[63:64-`XLEN] : {{`XLEN-32{1'b0}}, FPUResult64W[63:32]}; - //*** put into mem stage - SetFflagsM = FPUFlagsW; + + + + + //***************** + // M/W pipe registers + //***************** + flopenrc #(64) MWRegFma1(clk, reset, FlushW, ~StallW, FMAResM, FMAResW); + flopenrc #(5) MWRegFma2(clk, reset, FlushW, ~StallW, FMAFlgM, FMAFlgW); + + flopenrc #(64) MWRegDiv1(clk, reset, FlushW, ~StallW, FDivResultM, FDivResultW); + flopenrc #(5) MWRegDiv2(clk, reset, FlushW, ~StallW, FDivSqrtFlgM, FDivSqrtFlgW); + + flopenrc #(64) MWRegAdd1(clk, reset, FlushW, ~StallW, FAddResM, FAddResW); + flopenrc #(5) MWRegAdd2(clk, reset, FlushW, ~StallW, FAddFlgM, FAddFlgW); + + flopenrc #(1) MWRegCmp1(clk, reset, FlushW, ~StallW, CmpNVM, CmpNVW); + flopenrc #(64) MWRegCmp3(clk, reset, FlushW, ~StallW, CmpResM, CmpResW); + + flopenrc #(64) MWRegClass2(clk, reset, FlushW, ~StallW, FResM, FResW); + flopenrc #(1) MWRegClass1(clk, reset, FlushW, ~StallW, FFlgM, FFlgW); + + flopenrc #(11) MWCtrlReg(clk, reset, FlushW, ~StallW, + {FWriteEnM, FResultSelM, RdM, FmtM, FWriteIntM}, + {FWriteEnW, FResultSelW, RdW, FmtW, FWriteIntW}); + + + + + + + //######################################### + // BEGIN WRITEBACK STAGE + //######################################### + + + + + + //***turn into muxs + always_comb begin + case (FResultSelW) + 3'b000 : FPUFlagsW = 5'b0; + 3'b001 : FPUFlagsW = FMAFlgW; + 3'b010 : FPUFlagsW = FAddFlgW; + 3'b011 : FPUFlagsW = FDivSqrtFlgW; + 3'b100 : FPUFlagsW = {4'b0,FFlgW}; + default : FPUFlagsW = 5'bxxxxx; + endcase + end + + always_comb begin + case (FResultSelW) + 3'b000 : FPUResult64W = FmtW ? {ReadDataW, {64-`XLEN{1'b0}}} : {ReadDataW[31:0], 32'b0}; + 3'b001 : FPUResult64W = FMAResW; + 3'b010 : FPUResult64W = FAddResW; + 3'b011 : FPUResult64W = FDivResultW; + 3'b100 : FPUResult64W = FResW; + default : FPUResult64W = 64'bxxxxx; + endcase + end + + + // interface between XLEN size datapath and double-precision sized + // floating-point results + // + // define offsets for LSB zero extension or truncation + always_comb begin + // zero extension + //***turn into mux + FPUResultW = FmtW ? FPUResult64W[63:64-`XLEN] : {{`XLEN-32{1'b0}}, FPUResult64W[63:32]}; + //*** put into mem stage + SetFflagsM = FPUFlagsW; + end + + /* end else begin + assign FStallD = 0; + assign FWriteIntE = 0; + assign FWriteIntM = 0; + assign FWriteIntW = 0; + assign FWriteDataE = 0; + assign FIntResM = 0; + assign FDivBusyE = 0; + assign IllegalFPUInstrD = 0; + assign SetFflagsM = 0; + assign FPUResultW = 0; end + endgenerate*/ endmodule // fpu diff --git a/wally-pipelined/src/muldiv/muldiv.sv b/wally-pipelined/src/muldiv/muldiv.sv index e10b0c55..7288229c 100644 --- a/wally-pipelined/src/muldiv/muldiv.sv +++ b/wally-pipelined/src/muldiv/muldiv.sv @@ -138,7 +138,9 @@ module muldiv ( flopenrc #(`XLEN) MulDivResultWReg(clk, reset, FlushW, ~StallW, MulDivResultM, MulDivResultW); end else begin // no M instructions supported - assign MulDivResultW = 0; + assign MulDivResultW = 0; + assign DivBusyE = 0; + assign DivDoneE = 0; end endgenerate From 5f91b339aa07f818ebd96c7425bd2c004ae8f6db Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 5 Jul 2021 10:30:46 -0400 Subject: [PATCH 8/8] Added F_SUPPORTED flag to disable floating point unit when not in MISA --- .../regression/wave-dos/peripheral-waves.do | 3 ++- wally-pipelined/src/fpu/fpu.sv | 12 +++++------- wally-pipelined/testbench/testbench-imperas.sv | 14 ++++++++------ 3 files changed, 15 insertions(+), 14 deletions(-) diff --git a/wally-pipelined/regression/wave-dos/peripheral-waves.do b/wally-pipelined/regression/wave-dos/peripheral-waves.do index 1304b40c..a42bfbd4 100644 --- a/wally-pipelined/regression/wave-dos/peripheral-waves.do +++ b/wally-pipelined/regression/wave-dos/peripheral-waves.do @@ -9,7 +9,8 @@ add wave /testbench/clk add wave /testbench/reset add wave -divider -add wave /testbench/dut/hart/DataStall +#add wave /testbench/dut/hart/DataStall +add wave /testbench/debug add wave /testbench/dut/hart/StallF add wave /testbench/dut/hart/StallD add wave /testbench/dut/hart/StallE diff --git a/wally-pipelined/src/fpu/fpu.sv b/wally-pipelined/src/fpu/fpu.sv index 3b1a4ed1..59f5e439 100755 --- a/wally-pipelined/src/fpu/fpu.sv +++ b/wally-pipelined/src/fpu/fpu.sv @@ -44,9 +44,8 @@ module fpu ( output logic [`XLEN-1:0] FPUResultW); // FPU result // *** change FMA to do 16 - 32 - 64 - 128 FEXPBITS - /*generate - if (`F_SUPPORTED) begin */ - + generate + if (`F_SUPPORTED) begin // control logic signal instantiation logic FWriteEnD, FWriteEnE, FWriteEnM, FWriteEnW; // FP register write enable logic [2:0] FrmD, FrmE, FrmM; // FP rounding mode @@ -401,8 +400,7 @@ module fpu ( //*** put into mem stage SetFflagsM = FPUFlagsW; end - - /* end else begin + end else begin // no F_SUPPORTED; tie outputs low assign FStallD = 0; assign FWriteIntE = 0; assign FWriteIntM = 0; @@ -410,11 +408,11 @@ module fpu ( assign FWriteDataE = 0; assign FIntResM = 0; assign FDivBusyE = 0; - assign IllegalFPUInstrD = 0; + assign IllegalFPUInstrD = 1; assign SetFflagsM = 0; assign FPUResultW = 0; end - endgenerate*/ + endgenerate endmodule // fpu diff --git a/wally-pipelined/testbench/testbench-imperas.sv b/wally-pipelined/testbench/testbench-imperas.sv index 8c3e28c3..2cf37c17 100644 --- a/wally-pipelined/testbench/testbench-imperas.sv +++ b/wally-pipelined/testbench/testbench-imperas.sv @@ -514,6 +514,9 @@ string tests32f[] = '{ logic HMASTLOCK; logic HCLK, HRESETn; logic [`XLEN-1:0] PCW; + + logic [`XLEN-1:0] debug; + assign debug = dut.uncore.dtim.RAM[536872960]; flopenr #(`XLEN) PCWReg(clk, reset, ~dut.hart.ieu.dp.StallW, dut.hart.ifu.PCM, PCW); flopenr #(32) InstrWReg(clk, reset, ~dut.hart.ieu.dp.StallW, dut.hart.ifu.InstrM, InstrW); @@ -656,10 +659,7 @@ string tests32f[] = '{ // Check errors errors = (i == SIGNATURESIZE+1); // error if file is empty i = 0; - if (`XLEN == 32) - testadr = (`TIM_BASE+tests[test+1].atohex())/4; - else - testadr = (`TIM_BASE+tests[test+1].atohex())/8; + testadr = (`TIM_BASE+tests[test+1].atohex())/(`XLEN/8); /* verilator lint_off INFINITELOOP */ while (signature[i] !== 'bx) begin //$display("signature[%h] = %h", i, signature[i]); @@ -669,14 +669,16 @@ string tests32f[] = '{ // kind of hacky test for garbage right now errors = errors+1; $display(" Error on test %s result %d: adr = %h sim = %h, signature = %h", - tests[test], i, (testadr+i)*`XLEN/8, dut.uncore.dtim.RAM[testadr+i], signature[i]); + tests[test], i, (testadr+i)*(`XLEN/8), dut.uncore.dtim.RAM[testadr+i], signature[i]); $stop;//***debug end end i = i + 1; end /* verilator lint_on INFINITELOOP */ - if (errors == 0) $display("%s succeeded. Brilliant!!!", tests[test]); + if (errors == 0) begin + $display("%s succeeded. Brilliant!!!", tests[test]); + end else begin $display("%s failed with %d errors. :(", tests[test], errors); totalerrors = totalerrors+1;