Second update to divide that didn't get in for some silly git reason

This commit is contained in:
James E. Stine 2021-03-30 14:21:45 -05:00
parent f4a533b6f6
commit b2039e5b9a
22 changed files with 5918 additions and 932 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,100 @@
//////////////////////////////////////////
// wally-config.vh
//
// Written: David_Harris@hmc.edu 4 January 2021
// Modified:
//
// Purpose: Specify which features are configured
// Macros to determine which modes are supported based on MISA
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
// RV32 or RV64: XLEN = 32 or 64
`define XLEN 64
//`define MISA (32'h00000105)
`define MISA (32'h00001104 | 1<<5 | 1<<18 | 1 << 20 | 1 << 12 | 1 << 0)
`define A_SUPPORTED ((`MISA >> 0) % 2 == 1)
`define C_SUPPORTED ((`MISA >> 2) % 2 == 1)
`define D_SUPPORTED ((`MISA >> 3) % 2 == 1)
`define F_SUPPORTED ((`MISA >> 5) % 2 == 1)
`define M_SUPPORTED ((`MISA >> 12) % 2 == 1)
`define S_SUPPORTED ((`MISA >> 18) % 2 == 1)
`define U_SUPPORTED ((`MISA >> 20) % 2 == 1)
`define ZCSR_SUPPORTED 1
`define COUNTERS 31
`define ZCOUNTERS_SUPPORTED 1
// N-mode user-level interrupts are depricated per Andrew Waterman 1/13/21
//`define N_SUPPORTED ((MISA >> 13) % 2 == 1)
`define N_SUPPORTED 0
`define M_MODE (2'b11)
`define S_MODE (2'b01)
`define U_MODE (2'b00)
// Microarchitectural Features
`define UARCH_PIPELINED 1
`define UARCH_SUPERSCALR 0
`define UARCH_SINGLECYCLE 0
`define MEM_DCACHE 0
`define MEM_DTIM 1
`define MEM_ICACHE 0
`define MEM_VIRTMEM 0
// Address space
`define RESET_VECTOR 64'h0000000080000000
// Bus Interface width
`define AHBW 64
// Peripheral Physiccal Addresses
// Peripheral memory space extends from BASE to BASE+RANGE
// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits
`define BOOTTIMBASE 32'h00000000
`define BOOTTIMRANGE 32'h00003FFF
`define TIMBASE 32'h80000000
`define TIMRANGE 32'h0007FFFF
`define CLINTBASE 32'h02000000
`define CLINTRANGE 32'h0000FFFF
`define GPIOBASE 32'h10012000
`define GPIORANGE 32'h000000FF
`define UARTBASE 32'h10000000
`define UARTRANGE 32'h00000007
`define PLICBASE 32'h0C000000
`define PLICRANGE 32'h03FFFFFF
// Test modes
// Tie GPIO outputs back to inputs
`define GPIO_LOOPBACK_TEST 0
// Busybear special CSR config to match OVPSim
`define OVPSIM_CSR_CONFIG 0
// Hardware configuration
`define UART_PRESCALE 1
/* verilator lint_off STMTDLY */
/* verilator lint_off WIDTH */
/* verilator lint_off ASSIGNDLY */
/* verilator lint_off PINCONNECTEMPTY */
`define TWO_BIT_PRELOAD "../config/rv64ic/twoBitPredictor.txt"
`define BTB_PRELOAD "../config/rv64ic/BTBPredictor.txt"
`define BPTYPE "BPGSHARE" // BPGLOBAL or BPTWOBIT or BPGSHARE

View File

@ -0,0 +1,31 @@
//////////////////////////////////////////
// wally-constants.vh
//
// Written: tfleming@hmc.edu 4 March 2021
// Modified:
//
// Purpose: Specify certain constants defined in the RISC-V 64-bit architecture.
// These macros should not be changed, except in the event of an
// update to the architecture or particularly special circumstances.
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
// Virtual Memory Constants (sv39)
`define VPN_BITS 27
`define PPN_BITS 44
`define PA_BITS 56

View File

@ -27,12 +27,13 @@
module forward(
// Detect hazards
input logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E, RdE, RdM, RdW,
input logic MemReadE, MulDivE, CSRReadE,
input logic RegWriteM, RegWriteW,
input logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E, RdE, RdM, RdW,
input logic MemReadE, MulDivE, CSRReadE,
input logic RegWriteM, RegWriteW,
input logic DivDoneW,
// Forwarding controls
output logic [1:0] ForwardAE, ForwardBE,
output logic LoadStallD, MulDivStallD, CSRRdStallD
output logic LoadStallD, MulDivStallD, CSRRdStallD
);
always_comb begin
@ -48,8 +49,8 @@ module forward(
end
// Stall on dependent operations that finish in Mem Stage and can't bypass in time
assign LoadStallD = MemReadE & ((Rs1D == RdE) | (Rs2D == RdE));
assign MulDivStallD = MulDivE & ((Rs1D == RdE) | (Rs2D == RdE)); // *** extend with stalls for divide
assign CSRRdStallD = CSRReadE & ((Rs1D == RdE) | (Rs2D == RdE));
assign LoadStallD = MemReadE & ((Rs1D == RdE) | (Rs2D == RdE));
assign MulDivStallD = MulDivE & ((Rs1D == RdE) | (Rs2D == RdE)) | MulDivE&~DivDoneW; // *** extend with stalls for divide
assign CSRRdStallD = CSRReadE & ((Rs1D == RdE) | (Rs2D == RdE));
endmodule

View File

@ -26,39 +26,40 @@
`include "wally-config.vh"
module ieu (
input logic clk, reset,
input logic clk, reset,
// Decode Stage interface
input logic [31:0] InstrD,
input logic IllegalIEUInstrFaultD,
output logic IllegalBaseInstrFaultD,
input logic [31:0] InstrD,
input logic IllegalIEUInstrFaultD,
output logic IllegalBaseInstrFaultD,
// Execute Stage interface
input logic [`XLEN-1:0] PCE,
input logic [`XLEN-1:0] PCLinkE,
input logic [`XLEN-1:0] PCE,
input logic [`XLEN-1:0] PCLinkE,
output logic [`XLEN-1:0] PCTargetE,
output logic MulDivE, W64E,
output logic [2:0] Funct3E,
output logic MulDivE, W64E,
output logic [2:0] Funct3E,
output logic [`XLEN-1:0] SrcAE, SrcBE,
// Memory stage interface
input logic DataMisalignedM,
input logic DataAccessFaultM,
input logic SquashSCW,
output logic [1:0] MemRWM,
output logic [1:0] AtomicM,
input logic DataMisalignedM,
input logic DataAccessFaultM,
input logic SquashSCW,
output logic [1:0] MemRWM,
output logic [1:0] AtomicM,
output logic [`XLEN-1:0] MemAdrM, WriteDataM,
output logic [`XLEN-1:0] SrcAM,
output logic [2:0] Funct3M,
output logic [2:0] Funct3M,
// Writeback stage
input logic [`XLEN-1:0] CSRReadValW, ReadDataW, MulDivResultW,
input logic [`XLEN-1:0] CSRReadValW, ReadDataW, MulDivResultW,
// input logic [`XLEN-1:0] PCLinkW,
output logic InstrValidW,
output logic InstrValidW,
// hazards
input logic StallE, StallM, StallW,
input logic FlushE, FlushM, FlushW,
output logic LoadStallD, MulDivStallD, CSRRdStallD,
output logic PCSrcE,
input logic StallE, StallM, StallW,
input logic FlushE, FlushM, FlushW,
output logic LoadStallD, MulDivStallD, CSRRdStallD,
output logic PCSrcE,
input logic DivDoneW,
output logic CSRReadM, CSRWriteM, PrivilegedM,
output logic CSRWritePendingDEM
output logic CSRReadM, CSRWriteM, PrivilegedM,
output logic CSRWritePendingDEM
);
logic [2:0] ImmSrcD;
@ -78,5 +79,6 @@ module ieu (
controller c(.*);
datapath dp(.*);
forward fw(.*);
endmodule

View File

@ -80,7 +80,7 @@ module ifu (
logic ITLBFlushF = '0;
// logic ITLBWriteF = '0;
tlb #(3) itlb(clk, reset, SATP_REGW, PrivilegeModeW, PCF, PageTableEntryF, ITLBWriteF, ITLBFlushF,
ITLBInstrPAdrF, ITLBMissF, ITLBHitF);
ITLBInstrPAdrF, ITLBMissF, ITLBHitF);
// branch predictor signals
logic SelBPPredF;

1535
wally-pipelined/src/muldiv/div.sv Executable file

File diff suppressed because it is too large Load Diff

1921
wally-pipelined/src/muldiv/div.sv~ Executable file

File diff suppressed because it is too large Load Diff

View File

@ -50,8 +50,9 @@ module int32div (Q, done, divdone, rem0, div0, N, D, clk, reset, start);
// #iter: N = m+v+s = m+(s+2) = m+2+s (mod k = 0)
// v = 2 since \rho < 1 (add 4 to make sure its a ceil)
bk6 cpa1 (co1, Num, {1'b0, P},
{3'h0, shiftResult, ~shiftResult,1'b0}, 1'b0);
adder #(6) cpa1 ({1'b0, P},
{3'h0, shiftResult, ~shiftResult,1'b0},
Num);
// Determine whether need to add just Q/Rem
assign shiftResult = P[0];
@ -160,10 +161,10 @@ module divide4x32 (Q, rem0, quotient, op1, op2, clk, reset, state0,
otfzero, enable, Qstar, QMstar);
// Correction and generation of Remainder
add36 cpa2 (cout1, rem1, SumN2[35:0], CarryN2[35:0], 1'b0);
adder #(36) cpa2 (SumN2[35:0], CarryN2[35:0], rem1);
// Add back +D as correction
csa #(36) csa2 (CarryN2[35:0], SumN2[35:0], divi1, SumR, CarryR);
add36 cpa3 (cout2, rem2, SumR, CarryR, 1'b0);
adder #(36) cpa3 (SumR, CarryR, rem2);
// Choose remainder (Rem or Rem+D)
mux2 #(36) mx6 (rem1, rem2, rem1[35], rem3);
// Choose correct Q or QM
@ -349,306 +350,7 @@ module floprc #(parameter WIDTH = 8)
endmodule // qst4
// Ladner-Fischer Prefix Adder
module add36 (cout, sum, a, b, cin);
input logic [35:0] a, b;
input logic cin;
output logic [35:0] sum;
output logic cout;
logic [36:0] p,g;
logic [35:0] c;
// pre-computation
assign p={a^b,1'b0};
assign g={a&b, cin};
// prefix tree
ladner_fischer36 prefix_tree(c, p[35:0], g[35:0]);
// post-computation
assign sum=p[36:1]^c;
assign cout=g[36]|(p[36]&c[35]);
endmodule // add36
module ladner_fischer36 (c, p, g);
input logic [35:0] p;
input logic [35:0] g;
output logic [36:1] c;
// parallel-prefix, Ladner-Fischer
// Stage 1: Generates G/P pairs that span 1 bits
grey b_1_0 (G_1_0, {g[1],g[0]}, p[1]);
black b_3_2 (G_3_2, P_3_2, {g[3],g[2]}, {p[3],p[2]});
black b_5_4 (G_5_4, P_5_4, {g[5],g[4]}, {p[5],p[4]});
black b_7_6 (G_7_6, P_7_6, {g[7],g[6]}, {p[7],p[6]});
black b_9_8 (G_9_8, P_9_8, {g[9],g[8]}, {p[9],p[8]});
black b_11_10 (G_11_10, P_11_10, {g[11],g[10]}, {p[11],p[10]});
black b_13_12 (G_13_12, P_13_12, {g[13],g[12]}, {p[13],p[12]});
black b_15_14 (G_15_14, P_15_14, {g[15],g[14]}, {p[15],p[14]});
black b_17_16 (G_17_16, P_17_16, {g[17],g[16]}, {p[17],p[16]});
black b_19_18 (G_19_18, P_19_18, {g[19],g[18]}, {p[19],p[18]});
black b_21_20 (G_21_20, P_21_20, {g[21],g[20]}, {p[21],p[20]});
black b_23_22 (G_23_22, P_23_22, {g[23],g[22]}, {p[23],p[22]});
black b_25_24 (G_25_24, P_25_24, {g[25],g[24]}, {p[25],p[24]});
black b_27_26 (G_27_26, P_27_26, {g[27],g[26]}, {p[27],p[26]});
black b_29_28 (G_29_28, P_29_28, {g[29],g[28]}, {p[29],p[28]});
black b_31_30 (G_31_30, P_31_30, {g[31],g[30]}, {p[31],p[30]});
black b_33_32 (G_33_32, P_33_32, {g[33],g[32]}, {p[33],p[32]});
black b_35_34 (G_35_34, P_35_34, {g[35],g[34]}, {p[35],p[34]});
// Stage 2: Generates G/P pairs that span 2 bits
grey g_3_0 (G_3_0, {G_3_2,G_1_0}, P_3_2);
black b_7_4 (G_7_4, P_7_4, {G_7_6,G_5_4}, {P_7_6,P_5_4});
black b_11_8 (G_11_8, P_11_8, {G_11_10,G_9_8}, {P_11_10,P_9_8});
black b_15_12 (G_15_12, P_15_12, {G_15_14,G_13_12}, {P_15_14,P_13_12});
black b_19_16 (G_19_16, P_19_16, {G_19_18,G_17_16}, {P_19_18,P_17_16});
black b_23_20 (G_23_20, P_23_20, {G_23_22,G_21_20}, {P_23_22,P_21_20});
black b_27_24 (G_27_24, P_27_24, {G_27_26,G_25_24}, {P_27_26,P_25_24});
black b_31_28 (G_31_28, P_31_28, {G_31_30,G_29_28}, {P_31_30,P_29_28});
black b_35_32 (G_35_32, P_35_32, {G_35_34,G_33_32}, {P_35_34,P_33_32});
// Stage 3: Generates G/P pairs that span 4 bits
grey g_5_0 (G_5_0, {G_5_4,G_3_0}, P_5_4);
grey g_7_0 (G_7_0, {G_7_4,G_3_0}, P_7_4);
black b_13_8 (G_13_8, P_13_8, {G_13_12,G_11_8}, {P_13_12,P_11_8});
black b_15_8 (G_15_8, P_15_8, {G_15_12,G_11_8}, {P_15_12,P_11_8});
black b_21_16 (G_21_16, P_21_16, {G_21_20,G_19_16}, {P_21_20,P_19_16});
black b_23_16 (G_23_16, P_23_16, {G_23_20,G_19_16}, {P_23_20,P_19_16});
black b_29_24 (G_29_24, P_29_24, {G_29_28,G_27_24}, {P_29_28,P_27_24});
black b_31_24 (G_31_24, P_31_24, {G_31_28,G_27_24}, {P_31_28,P_27_24});
black b_37_32 (G_37_32, P_37_32, {G_37_36,G_35_32}, {P_37_36,P_35_32});
black b_39_32 (G_39_32, P_39_32, {G_39_36,G_35_32}, {P_39_36,P_35_32});
// Stage 4: Generates G/P pairs that span 8 bits
grey g_9_0 (G_9_0, {G_9_8,G_7_0}, P_9_8);
grey g_11_0 (G_11_0, {G_11_8,G_7_0}, P_11_8);
grey g_13_0 (G_13_0, {G_13_8,G_7_0}, P_13_8);
grey g_15_0 (G_15_0, {G_15_8,G_7_0}, P_15_8);
black b_25_16 (G_25_16, P_25_16, {G_25_24,G_23_16}, {P_25_24,P_23_16});
black b_27_16 (G_27_16, P_27_16, {G_27_24,G_23_16}, {P_27_24,P_23_16});
black b_29_16 (G_29_16, P_29_16, {G_29_24,G_23_16}, {P_29_24,P_23_16});
black b_31_16 (G_31_16, P_31_16, {G_31_24,G_23_16}, {P_31_24,P_23_16});
black b_41_32 (G_41_32, P_41_32, {G_41_40,G_39_32}, {P_41_40,P_39_32});
black b_43_32 (G_43_32, P_43_32, {G_43_40,G_39_32}, {P_43_40,P_39_32});
black b_45_32 (G_45_32, P_45_32, {G_45_40,G_39_32}, {P_45_40,P_39_32});
black b_47_32 (G_47_32, P_47_32, {G_47_40,G_39_32}, {P_47_40,P_39_32});
// Stage 5: Generates G/P pairs that span 16 bits
grey g_17_0 (G_17_0, {G_17_16,G_15_0}, P_17_16);
grey g_19_0 (G_19_0, {G_19_16,G_15_0}, P_19_16);
grey g_21_0 (G_21_0, {G_21_16,G_15_0}, P_21_16);
grey g_23_0 (G_23_0, {G_23_16,G_15_0}, P_23_16);
grey g_25_0 (G_25_0, {G_25_16,G_15_0}, P_25_16);
grey g_27_0 (G_27_0, {G_27_16,G_15_0}, P_27_16);
grey g_29_0 (G_29_0, {G_29_16,G_15_0}, P_29_16);
grey g_31_0 (G_31_0, {G_31_16,G_15_0}, P_31_16);
black b_49_32 (G_49_32, P_49_32, {G_49_48,G_47_32}, {P_49_48,P_47_32});
black b_51_32 (G_51_32, P_51_32, {G_51_48,G_47_32}, {P_51_48,P_47_32});
black b_53_32 (G_53_32, P_53_32, {G_53_48,G_47_32}, {P_53_48,P_47_32});
black b_55_32 (G_55_32, P_55_32, {G_55_48,G_47_32}, {P_55_48,P_47_32});
black b_57_32 (G_57_32, P_57_32, {G_57_48,G_47_32}, {P_57_48,P_47_32});
black b_59_32 (G_59_32, P_59_32, {G_59_48,G_47_32}, {P_59_48,P_47_32});
black b_61_32 (G_61_32, P_61_32, {G_61_48,G_47_32}, {P_61_48,P_47_32});
black b_63_32 (G_63_32, P_63_32, {G_63_48,G_47_32}, {P_63_48,P_47_32});
// Stage 6: Generates G/P pairs that span 32 bits
grey g_33_0 (G_33_0, {G_33_32,G_31_0}, P_33_32);
grey g_35_0 (G_35_0, {G_35_32,G_31_0}, P_35_32);
grey g_37_0 (G_37_0, {G_37_32,G_31_0}, P_37_32);
grey g_39_0 (G_39_0, {G_39_32,G_31_0}, P_39_32);
grey g_41_0 (G_41_0, {G_41_32,G_31_0}, P_41_32);
grey g_43_0 (G_43_0, {G_43_32,G_31_0}, P_43_32);
grey g_45_0 (G_45_0, {G_45_32,G_31_0}, P_45_32);
grey g_47_0 (G_47_0, {G_47_32,G_31_0}, P_47_32);
grey g_49_0 (G_49_0, {G_49_32,G_31_0}, P_49_32);
grey g_51_0 (G_51_0, {G_51_32,G_31_0}, P_51_32);
grey g_53_0 (G_53_0, {G_53_32,G_31_0}, P_53_32);
grey g_55_0 (G_55_0, {G_55_32,G_31_0}, P_55_32);
grey g_57_0 (G_57_0, {G_57_32,G_31_0}, P_57_32);
grey g_59_0 (G_59_0, {G_59_32,G_31_0}, P_59_32);
grey g_61_0 (G_61_0, {G_61_32,G_31_0}, P_61_32);
grey g_63_0 (G_63_0, {G_63_32,G_31_0}, P_63_32);
// Extra grey cell stage
grey g_2_0 (G_2_0, {g[2],G_1_0}, p[2]);
grey g_4_0 (G_4_0, {g[4],G_3_0}, p[4]);
grey g_6_0 (G_6_0, {g[6],G_5_0}, p[6]);
grey g_8_0 (G_8_0, {g[8],G_7_0}, p[8]);
grey g_10_0 (G_10_0, {g[10],G_9_0}, p[10]);
grey g_12_0 (G_12_0, {g[12],G_11_0}, p[12]);
grey g_14_0 (G_14_0, {g[14],G_13_0}, p[14]);
grey g_16_0 (G_16_0, {g[16],G_15_0}, p[16]);
grey g_18_0 (G_18_0, {g[18],G_17_0}, p[18]);
grey g_20_0 (G_20_0, {g[20],G_19_0}, p[20]);
grey g_22_0 (G_22_0, {g[22],G_21_0}, p[22]);
grey g_24_0 (G_24_0, {g[24],G_23_0}, p[24]);
grey g_26_0 (G_26_0, {g[26],G_25_0}, p[26]);
grey g_28_0 (G_28_0, {g[28],G_27_0}, p[28]);
grey g_30_0 (G_30_0, {g[30],G_29_0}, p[30]);
grey g_32_0 (G_32_0, {g[32],G_31_0}, p[32]);
grey g_34_0 (G_34_0, {g[34],G_33_0}, p[34]);
// Final Stage: Apply c_k+1=G_k_0
assign c[1]=g[0];
assign c[2]=G_1_0;
assign c[3]=G_2_0;
assign c[4]=G_3_0;
assign c[5]=G_4_0;
assign c[6]=G_5_0;
assign c[7]=G_6_0;
assign c[8]=G_7_0;
assign c[9]=G_8_0;
assign c[10]=G_9_0;
assign c[11]=G_10_0;
assign c[12]=G_11_0;
assign c[13]=G_12_0;
assign c[14]=G_13_0;
assign c[15]=G_14_0;
assign c[16]=G_15_0;
assign c[17]=G_16_0;
assign c[18]=G_17_0;
assign c[19]=G_18_0;
assign c[20]=G_19_0;
assign c[21]=G_20_0;
assign c[22]=G_21_0;
assign c[23]=G_22_0;
assign c[24]=G_23_0;
assign c[25]=G_24_0;
assign c[26]=G_25_0;
assign c[27]=G_26_0;
assign c[28]=G_27_0;
assign c[29]=G_28_0;
assign c[30]=G_29_0;
assign c[31]=G_30_0;
assign c[32]=G_31_0;
assign c[33]=G_32_0;
assign c[34]=G_33_0;
assign c[35]=G_34_0;
assign c[36]=G_35_0;
endmodule // ladner_fischer36
// Brent-Kung Prefix Adder
module bk6 (cout, sum, a, b, cin);
input logic [5:0] a, b;
input logic cin;
output logic [5:0] sum;
output logic cout;
logic [6:0] p,g;
logic [5:0] c;
// pre-computation
assign p={a^b,1'b0};
assign g={a&b, cin};
// prefix tree
brent_kung prefix_tree(c, p[5:0], g[5:0]);
// post-computation
assign sum=p[6:1]^c;
assign cout=g[6]|(p[6]&c[5]);
endmodule // bk6
module brent_kung (c, p, g);
input logic [5:0] p;
input logic [5:0] g;
output logic [6:1] c;
// parallel-prefix, Brent-Kung
// Stage 1: Generates G/P pairs that span 1 bits
grey b_1_0 (G_1_0, {g[1],g[0]}, p[1]);
black b_3_2 (G_3_2, P_3_2, {g[3],g[2]}, {p[3],p[2]});
black b_5_4 (G_5_4, P_5_4, {g[5],g[4]}, {p[5],p[4]});
// Stage 2: Generates G/P pairs that span 2 bits
grey g_3_0 (G_3_0, {G_3_2,G_1_0}, P_3_2);
// Stage 3: Generates G/P pairs that span 4 bits
// Stage 4: Generates G/P pairs that span 2 bits
grey g_5_0 (G_5_0, {G_5_4,G_3_0}, P_5_4);
// Last grey cell stage
grey g_2_0 (G_2_0, {g[2],G_1_0}, p[2]);
grey g_4_0 (G_4_0, {g[4],G_3_0}, p[4]);
// Final Stage: Apply c_k+1=G_k_0
assign c[1]=g[0];
assign c[2]=G_1_0;
assign c[3]=G_2_0;
assign c[4]=G_3_0;
assign c[5]=G_4_0;
assign c[6]=G_5_0;
endmodule // brent_kung
// Black cell
module black (gout, pout, gin, pin);
input logic [1:0] gin, pin;
output logic gout, pout;
assign pout=pin[1]&pin[0];
assign gout=gin[1]|(pin[1]&gin[0]);
endmodule // black
// Grey cell
module grey (gout, gin, pin);
input logic [1:0] gin;
input logic pin;
output logic gout;
assign gout=gin[1]|(pin&gin[0]);
endmodule // grey
// reduced Black cell
module rblk (hout, iout, gin, pin);
input logic [1:0] gin, pin;
output logic hout, iout;
assign iout=pin[1]&pin[0];
assign hout=gin[1]|gin[0];
endmodule
// reduced Grey cell
module rgry (hout, gin);
input logic [1:0] gin;
output logic hout;
assign hout=gin[1]|gin[0];
endmodule // rgry
// LZD
module lz2 (P, V, B0, B1);
input logic B0;
@ -754,6 +456,8 @@ module lz32 (ZP, ZV, B);
endmodule // lz32
// FSM Control for Integer Divider
module fsm32 (en, state0, done, divdone, otfzero,
start, error, NumIter, clk, reset);

View File

@ -51,8 +51,9 @@ module int64div (Q, done, divdone, rem0, div0, N, D, clk, reset, start);
// #iter: N = m+v+s = m+(s+2) = m+2+s (mod k = 0)
// v = 2 since \rho < 1 (add 4 to make sure its a ceil)
bk8 cpa1 (co1, Num, {2'b0, P},
{5'h0, shiftResult, ~shiftResult, 1'b0}, 1'b0);
adder #(8) cpa1 ({2'b0, P},
{5'h0, shiftResult, ~shiftResult, 1'b0},
Num);
// Determine whether need to add just Q/Rem
assign shiftResult = P[0];
@ -71,7 +72,7 @@ module int64div (Q, done, divdone, rem0, div0, N, D, clk, reset, start);
flopr #(1) regc (~clk, reset, otfzerov, otfzero);
flopr #(1) regd (~clk, reset, enablev, enable);
flopr #(1) rege (~clk, reset, state0v, state0);
// To obtain a correct remainder the last bit of the
// quotient has to be aligned with a radix-r boundary.
// Since the quotient is in the range 1/2 < q < 2 (one
@ -161,10 +162,10 @@ module divide4x64 (Q, rem0, quotient, op1, op2, clk, reset, state0,
otfzero, enable, Qstar, QMstar);
// Correction and generation of Remainder
add68 cpa2 (cout1, rem1, SumN2[67:0], CarryN2[67:0], 1'b0);
adder #(68) cpa2 (SumN2[67:0], CarryN2[67:0], rem1);
// Add back +D as correction
csa #(68) csa2 (CarryN2[67:0], SumN2[67:0], divi1, SumR, CarryR);
add68 cpa3 (cout2, rem2, SumR, CarryR, 1'b0);
adder #(68) cpa3 (SumR, CarryR, rem2);
// Choose remainder (Rem or Rem+D)
mux2 #(68) mx6 (rem1, rem2, rem1[67], rem3);
// Choose correct Q or QM
@ -177,22 +178,25 @@ endmodule // divide4x64
module ls_control (quot, Qin, QMin, CshiftQ, CshiftQM);
input logic [3:0] quot;
input logic [3:0] quot;
output logic [1:0] Qin;
output logic [1:0] QMin;
output logic CshiftQ;
output logic CshiftQM;
output logic [1:0] Qin;
output logic [1:0] QMin;
output logic CshiftQ;
output logic CshiftQM;
assign Qin[1] = (quot[1]) | (quot[3]) | (quot[0]);
assign Qin[0] = (quot[1]) | (quot[2]);
assign QMin[1] = (quot[1]) | (!quot[3]&!quot[2]&!quot[1]&!quot[0]);
assign QMin[0] = (quot[3]) | (quot[0]) |
(!quot[3]&!quot[2]&!quot[1]&!quot[0]);
assign CshiftQ = (quot[1]) | (quot[0]);
assign CshiftQM = (quot[3]) | (quot[2]);
// Load/Store Control for OTF
assign Qin[1] = (quot[1]) | (quot[3]) | (quot[0]);
assign Qin[0] = (quot[1]) | (quot[2]);
assign QMin[1] = (quot[1]) | (!quot[3]&!quot[2]&!quot[1]&!quot[0]);
assign QMin[0] = (quot[3]) | (quot[0]) |
(!quot[3]&!quot[2]&!quot[1]&!quot[0]);
assign CshiftQ = (quot[1]) | (quot[0]);
assign CshiftQM = (quot[3]) | (quot[2]);
endmodule
endmodule
// On-the-fly Conversion per Ercegovac/Lang
module otf #(parameter WIDTH=8)
(Qin, QMin, CshiftQ, CshiftQM, clk, reset, enable, R2Q, R1Q);
@ -219,44 +223,44 @@ module otf #(parameter WIDTH=8)
assign Qstar = R2Q;
assign QMstar = R1Q;
endmodule // otf8
endmodule // otf8
module adder #(parameter WIDTH=8) (input logic [WIDTH-1:0] a, b,
output logic [WIDTH-1:0] y);
module adder #(parameter WIDTH=8) (input logic [WIDTH-1:0] a, b,
output logic [WIDTH-1:0] y);
assign y = a + b;
assign y = a + b;
endmodule // adder
endmodule // adder
module fa (input logic a, b, c, output logic sum, carry);
module fa (input logic a, b, c, output logic sum, carry);
assign sum = a^b^c;
assign carry = a&b|a&c|b&c;
assign sum = a^b^c;
assign carry = a&b|a&c|b&c;
endmodule // fa
endmodule // fa
module csa #(parameter WIDTH=8) (input logic [WIDTH-1:0] a, b, c,
output logic [WIDTH-1:0] sum, carry);
module csa #(parameter WIDTH=8) (input logic [WIDTH-1:0] a, b, c,
output logic [WIDTH-1:0] sum, carry);
logic [WIDTH:0] carry_temp;
genvar i;
generate
for (i=0;i<WIDTH;i=i+1)
begin : genbit
fa fa_inst (a[i], b[i], c[i], sum[i], carry_temp[i+1]);
end
endgenerate
assign carry = {1'b0, carry_temp[WIDTH-1:1], 1'b0};
logic [WIDTH:0] carry_temp;
genvar i;
generate
for (i=0;i<WIDTH;i=i+1)
begin : genbit
fa fa_inst (a[i], b[i], c[i], sum[i], carry_temp[i+1]);
end
endgenerate
assign carry = {1'b0, carry_temp[WIDTH-1:1], 1'b0};
endmodule // adder
endmodule // adder
module flopenr #(parameter WIDTH = 8)
(input logic clk, reset, en,
input logic [WIDTH-1:0] d, output logic [WIDTH-1:0] q);
always_ff @(posedge clk, posedge reset)
if (reset) q <= 0;
else if (en) q <= d;
always_ff @(posedge clk, posedge reset)
if (reset) q <= 0;
else if (en) q <= d;
endmodule // flopenr
@ -264,9 +268,9 @@ module flopr #(parameter WIDTH = 8)
(input logic clk, reset, input
logic [WIDTH-1:0] d, output logic [WIDTH-1:0] q);
always_ff @(posedge clk, posedge reset)
if (reset) q <= 0;
else q <= d;
always_ff @(posedge clk, posedge reset)
if (reset) q <= 0;
else q <= d;
endmodule // flopr
@ -274,12 +278,12 @@ module flopenrc #(parameter WIDTH = 8)
(input logic clk, reset, en, clear,
input logic [WIDTH-1:0] d, output logic [WIDTH-1:0] q);
always_ff @(posedge clk, posedge reset)
if (reset) q <= 0;
else
if (en)
if (clear) q <= 0;
else q <= d;
always_ff @(posedge clk, posedge reset)
if (reset) q <= 0;
else
if (en)
if (clear) q <= 0;
else q <= d;
endmodule // flopenrc
@ -287,13 +291,13 @@ module floprc #(parameter WIDTH = 8)
(input logic clk, reset, clear,
input logic [WIDTH-1:0] d, output logic [WIDTH-1:0] q);
always_ff @(posedge clk, posedge reset)
if (reset) q <= 0;
else
if (clear) q <= 0;
else q <= d;
always_ff @(posedge clk, posedge reset)
if (reset) q <= 0;
else
if (clear) q <= 0;
else q <= d;
endmodule // floprc
endmodule // floprc
module eqcmp #(parameter WIDTH = 8)
(input logic [WIDTH-1:0] a, b,
@ -349,483 +353,7 @@ module qst4 (input logic [6:0] s, input logic [2:0] d,
endmodule // qst4
// Ladner-Fischer Prefix Adder
module add68 (cout, sum, a, b, cin);
input logic [67:0] a, b;
input logic cin;
output logic [67:0] sum;
output logic cout;
logic [68:0] p,g;
logic [67:0] c;
// pre-computation
assign p={a^b, 1'b0};
assign g={a&b, cin};
// prefix tree
ladner_fischer68 prefix_tree(c, p[67:0], g[67:0]);
// post-computation
assign sum=p[68:1]^c;
assign cout=g[68]|(p[68]&c[67]);
endmodule
module ladner_fischer68 (c, p, g);
input logic [67:0] p;
input logic [67:0] g;
output logic [68:1] c;
// parallel-prefix, Ladner-Fischer
// Stage 1: Generates G/P pairs that span 1 bits
grey b_1_0 (G_1_0, {g[1],g[0]}, p[1]);
black b_3_2 (G_3_2, P_3_2, {g[3],g[2]}, {p[3],p[2]});
black b_5_4 (G_5_4, P_5_4, {g[5],g[4]}, {p[5],p[4]});
black b_7_6 (G_7_6, P_7_6, {g[7],g[6]}, {p[7],p[6]});
black b_9_8 (G_9_8, P_9_8, {g[9],g[8]}, {p[9],p[8]});
black b_11_10 (G_11_10, P_11_10, {g[11],g[10]}, {p[11],p[10]});
black b_13_12 (G_13_12, P_13_12, {g[13],g[12]}, {p[13],p[12]});
black b_15_14 (G_15_14, P_15_14, {g[15],g[14]}, {p[15],p[14]});
black b_17_16 (G_17_16, P_17_16, {g[17],g[16]}, {p[17],p[16]});
black b_19_18 (G_19_18, P_19_18, {g[19],g[18]}, {p[19],p[18]});
black b_21_20 (G_21_20, P_21_20, {g[21],g[20]}, {p[21],p[20]});
black b_23_22 (G_23_22, P_23_22, {g[23],g[22]}, {p[23],p[22]});
black b_25_24 (G_25_24, P_25_24, {g[25],g[24]}, {p[25],p[24]});
black b_27_26 (G_27_26, P_27_26, {g[27],g[26]}, {p[27],p[26]});
black b_29_28 (G_29_28, P_29_28, {g[29],g[28]}, {p[29],p[28]});
black b_31_30 (G_31_30, P_31_30, {g[31],g[30]}, {p[31],p[30]});
black b_33_32 (G_33_32, P_33_32, {g[33],g[32]}, {p[33],p[32]});
black b_35_34 (G_35_34, P_35_34, {g[35],g[34]}, {p[35],p[34]});
black b_37_36 (G_37_36, P_37_36, {g[37],g[36]}, {p[37],p[36]});
black b_39_38 (G_39_38, P_39_38, {g[39],g[38]}, {p[39],p[38]});
black b_41_40 (G_41_40, P_41_40, {g[41],g[40]}, {p[41],p[40]});
black b_43_42 (G_43_42, P_43_42, {g[43],g[42]}, {p[43],p[42]});
black b_45_44 (G_45_44, P_45_44, {g[45],g[44]}, {p[45],p[44]});
black b_47_46 (G_47_46, P_47_46, {g[47],g[46]}, {p[47],p[46]});
black b_49_48 (G_49_48, P_49_48, {g[49],g[48]}, {p[49],p[48]});
black b_51_50 (G_51_50, P_51_50, {g[51],g[50]}, {p[51],p[50]});
black b_53_52 (G_53_52, P_53_52, {g[53],g[52]}, {p[53],p[52]});
black b_55_54 (G_55_54, P_55_54, {g[55],g[54]}, {p[55],p[54]});
black b_57_56 (G_57_56, P_57_56, {g[57],g[56]}, {p[57],p[56]});
black b_59_58 (G_59_58, P_59_58, {g[59],g[58]}, {p[59],p[58]});
black b_61_60 (G_61_60, P_61_60, {g[61],g[60]}, {p[61],p[60]});
black b_63_62 (G_63_62, P_63_62, {g[63],g[62]}, {p[63],p[62]});
black b_65_64 (G_65_64, P_65_64, {g[65],g[64]}, {p[65],p[64]});
black b_67_66 (G_67_66, P_67_66, {g[67],g[66]}, {p[67],p[66]});
// Stage 2: Generates G/P pairs that span 2 bits
grey g_3_0 (G_3_0, {G_3_2,G_1_0}, P_3_2);
black b_7_4 (G_7_4, P_7_4, {G_7_6,G_5_4}, {P_7_6,P_5_4});
black b_11_8 (G_11_8, P_11_8, {G_11_10,G_9_8}, {P_11_10,P_9_8});
black b_15_12 (G_15_12, P_15_12, {G_15_14,G_13_12}, {P_15_14,P_13_12});
black b_19_16 (G_19_16, P_19_16, {G_19_18,G_17_16}, {P_19_18,P_17_16});
black b_23_20 (G_23_20, P_23_20, {G_23_22,G_21_20}, {P_23_22,P_21_20});
black b_27_24 (G_27_24, P_27_24, {G_27_26,G_25_24}, {P_27_26,P_25_24});
black b_31_28 (G_31_28, P_31_28, {G_31_30,G_29_28}, {P_31_30,P_29_28});
black b_35_32 (G_35_32, P_35_32, {G_35_34,G_33_32}, {P_35_34,P_33_32});
black b_39_36 (G_39_36, P_39_36, {G_39_38,G_37_36}, {P_39_38,P_37_36});
black b_43_40 (G_43_40, P_43_40, {G_43_42,G_41_40}, {P_43_42,P_41_40});
black b_47_44 (G_47_44, P_47_44, {G_47_46,G_45_44}, {P_47_46,P_45_44});
black b_51_48 (G_51_48, P_51_48, {G_51_50,G_49_48}, {P_51_50,P_49_48});
black b_55_52 (G_55_52, P_55_52, {G_55_54,G_53_52}, {P_55_54,P_53_52});
black b_59_56 (G_59_56, P_59_56, {G_59_58,G_57_56}, {P_59_58,P_57_56});
black b_63_60 (G_63_60, P_63_60, {G_63_62,G_61_60}, {P_63_62,P_61_60});
black b_67_64 (G_67_64, P_67_64, {G_67_66,G_65_64}, {P_67_66,P_65_64});
// Stage 3: Generates G/P pairs that span 4 bits
grey g_5_0 (G_5_0, {G_5_4,G_3_0}, P_5_4);
grey g_7_0 (G_7_0, {G_7_4,G_3_0}, P_7_4);
black b_13_8 (G_13_8, P_13_8, {G_13_12,G_11_8}, {P_13_12,P_11_8});
black b_15_8 (G_15_8, P_15_8, {G_15_12,G_11_8}, {P_15_12,P_11_8});
black b_21_16 (G_21_16, P_21_16, {G_21_20,G_19_16}, {P_21_20,P_19_16});
black b_23_16 (G_23_16, P_23_16, {G_23_20,G_19_16}, {P_23_20,P_19_16});
black b_29_24 (G_29_24, P_29_24, {G_29_28,G_27_24}, {P_29_28,P_27_24});
black b_31_24 (G_31_24, P_31_24, {G_31_28,G_27_24}, {P_31_28,P_27_24});
black b_37_32 (G_37_32, P_37_32, {G_37_36,G_35_32}, {P_37_36,P_35_32});
black b_39_32 (G_39_32, P_39_32, {G_39_36,G_35_32}, {P_39_36,P_35_32});
black b_45_40 (G_45_40, P_45_40, {G_45_44,G_43_40}, {P_45_44,P_43_40});
black b_47_40 (G_47_40, P_47_40, {G_47_44,G_43_40}, {P_47_44,P_43_40});
black b_53_48 (G_53_48, P_53_48, {G_53_52,G_51_48}, {P_53_52,P_51_48});
black b_55_48 (G_55_48, P_55_48, {G_55_52,G_51_48}, {P_55_52,P_51_48});
black b_61_56 (G_61_56, P_61_56, {G_61_60,G_59_56}, {P_61_60,P_59_56});
black b_63_56 (G_63_56, P_63_56, {G_63_60,G_59_56}, {P_63_60,P_59_56});
black b_69_64 (G_69_64, P_69_64, {G_69_68,G_67_64}, {P_69_68,P_67_64});
black b_71_64 (G_71_64, P_71_64, {G_71_68,G_67_64}, {P_71_68,P_67_64});
// Stage 4: Generates G/P pairs that span 8 bits
grey g_9_0 (G_9_0, {G_9_8,G_7_0}, P_9_8);
grey g_11_0 (G_11_0, {G_11_8,G_7_0}, P_11_8);
grey g_13_0 (G_13_0, {G_13_8,G_7_0}, P_13_8);
grey g_15_0 (G_15_0, {G_15_8,G_7_0}, P_15_8);
black b_25_16 (G_25_16, P_25_16, {G_25_24,G_23_16}, {P_25_24,P_23_16});
black b_27_16 (G_27_16, P_27_16, {G_27_24,G_23_16}, {P_27_24,P_23_16});
black b_29_16 (G_29_16, P_29_16, {G_29_24,G_23_16}, {P_29_24,P_23_16});
black b_31_16 (G_31_16, P_31_16, {G_31_24,G_23_16}, {P_31_24,P_23_16});
black b_41_32 (G_41_32, P_41_32, {G_41_40,G_39_32}, {P_41_40,P_39_32});
black b_43_32 (G_43_32, P_43_32, {G_43_40,G_39_32}, {P_43_40,P_39_32});
black b_45_32 (G_45_32, P_45_32, {G_45_40,G_39_32}, {P_45_40,P_39_32});
black b_47_32 (G_47_32, P_47_32, {G_47_40,G_39_32}, {P_47_40,P_39_32});
black b_57_48 (G_57_48, P_57_48, {G_57_56,G_55_48}, {P_57_56,P_55_48});
black b_59_48 (G_59_48, P_59_48, {G_59_56,G_55_48}, {P_59_56,P_55_48});
black b_61_48 (G_61_48, P_61_48, {G_61_56,G_55_48}, {P_61_56,P_55_48});
black b_63_48 (G_63_48, P_63_48, {G_63_56,G_55_48}, {P_63_56,P_55_48});
black b_73_64 (G_73_64, P_73_64, {G_73_72,G_71_64}, {P_73_72,P_71_64});
black b_75_64 (G_75_64, P_75_64, {G_75_72,G_71_64}, {P_75_72,P_71_64});
black b_77_64 (G_77_64, P_77_64, {G_77_72,G_71_64}, {P_77_72,P_71_64});
black b_79_64 (G_79_64, P_79_64, {G_79_72,G_71_64}, {P_79_72,P_71_64});
// Stage 5: Generates G/P pairs that span 16 bits
grey g_17_0 (G_17_0, {G_17_16,G_15_0}, P_17_16);
grey g_19_0 (G_19_0, {G_19_16,G_15_0}, P_19_16);
grey g_21_0 (G_21_0, {G_21_16,G_15_0}, P_21_16);
grey g_23_0 (G_23_0, {G_23_16,G_15_0}, P_23_16);
grey g_25_0 (G_25_0, {G_25_16,G_15_0}, P_25_16);
grey g_27_0 (G_27_0, {G_27_16,G_15_0}, P_27_16);
grey g_29_0 (G_29_0, {G_29_16,G_15_0}, P_29_16);
grey g_31_0 (G_31_0, {G_31_16,G_15_0}, P_31_16);
black b_49_32 (G_49_32, P_49_32, {G_49_48,G_47_32}, {P_49_48,P_47_32});
black b_51_32 (G_51_32, P_51_32, {G_51_48,G_47_32}, {P_51_48,P_47_32});
black b_53_32 (G_53_32, P_53_32, {G_53_48,G_47_32}, {P_53_48,P_47_32});
black b_55_32 (G_55_32, P_55_32, {G_55_48,G_47_32}, {P_55_48,P_47_32});
black b_57_32 (G_57_32, P_57_32, {G_57_48,G_47_32}, {P_57_48,P_47_32});
black b_59_32 (G_59_32, P_59_32, {G_59_48,G_47_32}, {P_59_48,P_47_32});
black b_61_32 (G_61_32, P_61_32, {G_61_48,G_47_32}, {P_61_48,P_47_32});
black b_63_32 (G_63_32, P_63_32, {G_63_48,G_47_32}, {P_63_48,P_47_32});
black b_81_64 (G_81_64, P_81_64, {G_81_80,G_79_64}, {P_81_80,P_79_64});
black b_83_64 (G_83_64, P_83_64, {G_83_80,G_79_64}, {P_83_80,P_79_64});
black b_85_64 (G_85_64, P_85_64, {G_85_80,G_79_64}, {P_85_80,P_79_64});
black b_87_64 (G_87_64, P_87_64, {G_87_80,G_79_64}, {P_87_80,P_79_64});
black b_89_64 (G_89_64, P_89_64, {G_89_80,G_79_64}, {P_89_80,P_79_64});
black b_91_64 (G_91_64, P_91_64, {G_91_80,G_79_64}, {P_91_80,P_79_64});
black b_93_64 (G_93_64, P_93_64, {G_93_80,G_79_64}, {P_93_80,P_79_64});
black b_95_64 (G_95_64, P_95_64, {G_95_80,G_79_64}, {P_95_80,P_79_64});
// Stage 6: Generates G/P pairs that span 32 bits
grey g_33_0 (G_33_0, {G_33_32,G_31_0}, P_33_32);
grey g_35_0 (G_35_0, {G_35_32,G_31_0}, P_35_32);
grey g_37_0 (G_37_0, {G_37_32,G_31_0}, P_37_32);
grey g_39_0 (G_39_0, {G_39_32,G_31_0}, P_39_32);
grey g_41_0 (G_41_0, {G_41_32,G_31_0}, P_41_32);
grey g_43_0 (G_43_0, {G_43_32,G_31_0}, P_43_32);
grey g_45_0 (G_45_0, {G_45_32,G_31_0}, P_45_32);
grey g_47_0 (G_47_0, {G_47_32,G_31_0}, P_47_32);
grey g_49_0 (G_49_0, {G_49_32,G_31_0}, P_49_32);
grey g_51_0 (G_51_0, {G_51_32,G_31_0}, P_51_32);
grey g_53_0 (G_53_0, {G_53_32,G_31_0}, P_53_32);
grey g_55_0 (G_55_0, {G_55_32,G_31_0}, P_55_32);
grey g_57_0 (G_57_0, {G_57_32,G_31_0}, P_57_32);
grey g_59_0 (G_59_0, {G_59_32,G_31_0}, P_59_32);
grey g_61_0 (G_61_0, {G_61_32,G_31_0}, P_61_32);
grey g_63_0 (G_63_0, {G_63_32,G_31_0}, P_63_32);
black b_97_64 (G_97_64, P_97_64, {G_97_96,G_95_64}, {P_97_96,P_95_64});
black b_99_64 (G_99_64, P_99_64, {G_99_96,G_95_64}, {P_99_96,P_95_64});
black b_101_64 (G_101_64, P_101_64, {G_101_96,G_95_64}, {P_101_96,P_95_64});
black b_103_64 (G_103_64, P_103_64, {G_103_96,G_95_64}, {P_103_96,P_95_64});
black b_105_64 (G_105_64, P_105_64, {G_105_96,G_95_64}, {P_105_96,P_95_64});
black b_107_64 (G_107_64, P_107_64, {G_107_96,G_95_64}, {P_107_96,P_95_64});
black b_109_64 (G_109_64, P_109_64, {G_109_96,G_95_64}, {P_109_96,P_95_64});
black b_111_64 (G_111_64, P_111_64, {G_111_96,G_95_64}, {P_111_96,P_95_64});
black b_113_64 (G_113_64, P_113_64, {G_113_96,G_95_64}, {P_113_96,P_95_64});
black b_115_64 (G_115_64, P_115_64, {G_115_96,G_95_64}, {P_115_96,P_95_64});
black b_117_64 (G_117_64, P_117_64, {G_117_96,G_95_64}, {P_117_96,P_95_64});
black b_119_64 (G_119_64, P_119_64, {G_119_96,G_95_64}, {P_119_96,P_95_64});
black b_121_64 (G_121_64, P_121_64, {G_121_96,G_95_64}, {P_121_96,P_95_64});
black b_123_64 (G_123_64, P_123_64, {G_123_96,G_95_64}, {P_123_96,P_95_64});
black b_125_64 (G_125_64, P_125_64, {G_125_96,G_95_64}, {P_125_96,P_95_64});
black b_127_64 (G_127_64, P_127_64, {G_127_96,G_95_64}, {P_127_96,P_95_64});
// Stage 7: Generates G/P pairs that span 64 bits
grey g_65_0 (G_65_0, {G_65_64,G_63_0}, P_65_64);
grey g_67_0 (G_67_0, {G_67_64,G_63_0}, P_67_64);
grey g_69_0 (G_69_0, {G_69_64,G_63_0}, P_69_64);
grey g_71_0 (G_71_0, {G_71_64,G_63_0}, P_71_64);
grey g_73_0 (G_73_0, {G_73_64,G_63_0}, P_73_64);
grey g_75_0 (G_75_0, {G_75_64,G_63_0}, P_75_64);
grey g_77_0 (G_77_0, {G_77_64,G_63_0}, P_77_64);
grey g_79_0 (G_79_0, {G_79_64,G_63_0}, P_79_64);
grey g_81_0 (G_81_0, {G_81_64,G_63_0}, P_81_64);
grey g_83_0 (G_83_0, {G_83_64,G_63_0}, P_83_64);
grey g_85_0 (G_85_0, {G_85_64,G_63_0}, P_85_64);
grey g_87_0 (G_87_0, {G_87_64,G_63_0}, P_87_64);
grey g_89_0 (G_89_0, {G_89_64,G_63_0}, P_89_64);
grey g_91_0 (G_91_0, {G_91_64,G_63_0}, P_91_64);
grey g_93_0 (G_93_0, {G_93_64,G_63_0}, P_93_64);
grey g_95_0 (G_95_0, {G_95_64,G_63_0}, P_95_64);
grey g_97_0 (G_97_0, {G_97_64,G_63_0}, P_97_64);
grey g_99_0 (G_99_0, {G_99_64,G_63_0}, P_99_64);
grey g_101_0 (G_101_0, {G_101_64,G_63_0}, P_101_64);
grey g_103_0 (G_103_0, {G_103_64,G_63_0}, P_103_64);
grey g_105_0 (G_105_0, {G_105_64,G_63_0}, P_105_64);
grey g_107_0 (G_107_0, {G_107_64,G_63_0}, P_107_64);
grey g_109_0 (G_109_0, {G_109_64,G_63_0}, P_109_64);
grey g_111_0 (G_111_0, {G_111_64,G_63_0}, P_111_64);
grey g_113_0 (G_113_0, {G_113_64,G_63_0}, P_113_64);
grey g_115_0 (G_115_0, {G_115_64,G_63_0}, P_115_64);
grey g_117_0 (G_117_0, {G_117_64,G_63_0}, P_117_64);
grey g_119_0 (G_119_0, {G_119_64,G_63_0}, P_119_64);
grey g_121_0 (G_121_0, {G_121_64,G_63_0}, P_121_64);
grey g_123_0 (G_123_0, {G_123_64,G_63_0}, P_123_64);
grey g_125_0 (G_125_0, {G_125_64,G_63_0}, P_125_64);
grey g_127_0 (G_127_0, {G_127_64,G_63_0}, P_127_64);
// Extra grey cell stage
grey g_2_0 (G_2_0, {g[2],G_1_0}, p[2]);
grey g_4_0 (G_4_0, {g[4],G_3_0}, p[4]);
grey g_6_0 (G_6_0, {g[6],G_5_0}, p[6]);
grey g_8_0 (G_8_0, {g[8],G_7_0}, p[8]);
grey g_10_0 (G_10_0, {g[10],G_9_0}, p[10]);
grey g_12_0 (G_12_0, {g[12],G_11_0}, p[12]);
grey g_14_0 (G_14_0, {g[14],G_13_0}, p[14]);
grey g_16_0 (G_16_0, {g[16],G_15_0}, p[16]);
grey g_18_0 (G_18_0, {g[18],G_17_0}, p[18]);
grey g_20_0 (G_20_0, {g[20],G_19_0}, p[20]);
grey g_22_0 (G_22_0, {g[22],G_21_0}, p[22]);
grey g_24_0 (G_24_0, {g[24],G_23_0}, p[24]);
grey g_26_0 (G_26_0, {g[26],G_25_0}, p[26]);
grey g_28_0 (G_28_0, {g[28],G_27_0}, p[28]);
grey g_30_0 (G_30_0, {g[30],G_29_0}, p[30]);
grey g_32_0 (G_32_0, {g[32],G_31_0}, p[32]);
grey g_34_0 (G_34_0, {g[34],G_33_0}, p[34]);
grey g_36_0 (G_36_0, {g[36],G_35_0}, p[36]);
grey g_38_0 (G_38_0, {g[38],G_37_0}, p[38]);
grey g_40_0 (G_40_0, {g[40],G_39_0}, p[40]);
grey g_42_0 (G_42_0, {g[42],G_41_0}, p[42]);
grey g_44_0 (G_44_0, {g[44],G_43_0}, p[44]);
grey g_46_0 (G_46_0, {g[46],G_45_0}, p[46]);
grey g_48_0 (G_48_0, {g[48],G_47_0}, p[48]);
grey g_50_0 (G_50_0, {g[50],G_49_0}, p[50]);
grey g_52_0 (G_52_0, {g[52],G_51_0}, p[52]);
grey g_54_0 (G_54_0, {g[54],G_53_0}, p[54]);
grey g_56_0 (G_56_0, {g[56],G_55_0}, p[56]);
grey g_58_0 (G_58_0, {g[58],G_57_0}, p[58]);
grey g_60_0 (G_60_0, {g[60],G_59_0}, p[60]);
grey g_62_0 (G_62_0, {g[62],G_61_0}, p[62]);
grey g_64_0 (G_64_0, {g[64],G_63_0}, p[64]);
grey g_66_0 (G_66_0, {g[66],G_65_0}, p[66]);
// Final Stage: Apply c_k+1=G_k_0
assign c[1]=g[0];
assign c[2]=G_1_0;
assign c[3]=G_2_0;
assign c[4]=G_3_0;
assign c[5]=G_4_0;
assign c[6]=G_5_0;
assign c[7]=G_6_0;
assign c[8]=G_7_0;
assign c[9]=G_8_0;
assign c[10]=G_9_0;
assign c[11]=G_10_0;
assign c[12]=G_11_0;
assign c[13]=G_12_0;
assign c[14]=G_13_0;
assign c[15]=G_14_0;
assign c[16]=G_15_0;
assign c[17]=G_16_0;
assign c[18]=G_17_0;
assign c[19]=G_18_0;
assign c[20]=G_19_0;
assign c[21]=G_20_0;
assign c[22]=G_21_0;
assign c[23]=G_22_0;
assign c[24]=G_23_0;
assign c[25]=G_24_0;
assign c[26]=G_25_0;
assign c[27]=G_26_0;
assign c[28]=G_27_0;
assign c[29]=G_28_0;
assign c[30]=G_29_0;
assign c[31]=G_30_0;
assign c[32]=G_31_0;
assign c[33]=G_32_0;
assign c[34]=G_33_0;
assign c[35]=G_34_0;
assign c[36]=G_35_0;
assign c[37]=G_36_0;
assign c[38]=G_37_0;
assign c[39]=G_38_0;
assign c[40]=G_39_0;
assign c[41]=G_40_0;
assign c[42]=G_41_0;
assign c[43]=G_42_0;
assign c[44]=G_43_0;
assign c[45]=G_44_0;
assign c[46]=G_45_0;
assign c[47]=G_46_0;
assign c[48]=G_47_0;
assign c[49]=G_48_0;
assign c[50]=G_49_0;
assign c[51]=G_50_0;
assign c[52]=G_51_0;
assign c[53]=G_52_0;
assign c[54]=G_53_0;
assign c[55]=G_54_0;
assign c[56]=G_55_0;
assign c[57]=G_56_0;
assign c[58]=G_57_0;
assign c[59]=G_58_0;
assign c[60]=G_59_0;
assign c[61]=G_60_0;
assign c[62]=G_61_0;
assign c[63]=G_62_0;
assign c[64]=G_63_0;
assign c[65]=G_64_0;
assign c[66]=G_65_0;
assign c[67]=G_66_0;
assign c[68]=G_67_0;
endmodule // ladner_fischer68
// Brent-Kung Carry-save Prefix Adder
module bk8 (cout, sum, a, b, cin);
input logic [7:0] a, b;
input logic cin;
output logic [7:0] sum;
output logic cout;
logic [8:0] p,g,t;
logic [7:0] c;
// pre-computation
assign p={a^b,1'b0};
assign g={a&b, cin};
assign t[1]=p[1];
assign t[2]=p[2];
assign t[3]=p[3]^g[2];
assign t[4]=p[4];
assign t[5]=p[5]^g[4];
assign t[6]=p[6];
assign t[7]=p[7]^g[6];
assign t[8]=p[8];
// prefix tree
brent_kung8 prefix_tree(c, p[7:0], g[7:0]);
// post-computation
assign sum=p[8:1]^c;
assign cout=g[8]|(p[8]&c[7]);
endmodule // bk8
module brent_kung8 (c, p, g);
input logic [7:0] p;
input logic [7:0] g;
output logic [8:1] c;
// parallel-prefix, Brent-Kung
// Stage 1: Generates G/P pairs that span 1 bits
grey b_1_0 (G_1_0, {g[1],g[0]}, p[1]);
black b_3_2 (G_3_2, P_3_2, {g[3],g[2]}, {p[3],p[2]});
black b_5_4 (G_5_4, P_5_4, {g[5],g[4]}, {p[5],p[4]});
black b_7_6 (G_7_6, P_7_6, {g[7],g[6]}, {p[7],p[6]});
// Stage 2: Generates G/P pairs that span 2 bits
grey g_3_0 (G_3_0, {G_3_2,G_1_0}, P_3_2);
black b_7_4 (G_7_4, P_7_4, {G_7_6,G_5_4}, {P_7_6,P_5_4});
// Stage 3: Generates G/P pairs that span 4 bits
grey g_7_0 (G_7_0, {G_7_4,G_3_0}, P_7_4);
// Stage 4: Generates G/P pairs that span 2 bits
grey g_5_0 (G_5_0, {G_5_4,G_3_0}, P_5_4);
// Last grey cell stage
grey g_2_0 (G_2_0, {g[2],G_1_0}, p[2]);
grey g_4_0 (G_4_0, {g[4],G_3_0}, p[4]);
grey g_6_0 (G_6_0, {g[6],G_5_0}, p[6]);
// Final Stage: Apply c_k+1=G_k_0
assign c[1]=g[0];
assign c[2]=G_1_0;
assign c[3]=G_2_0;
assign c[4]=G_3_0;
assign c[5]=G_4_0;
assign c[6]=G_5_0;
assign c[7]=G_6_0;
assign c[8]=G_7_0;
endmodule // brent_kung8
// Black cell
module black (gout, pout, gin, pin);
input logic [1:0] gin, pin;
output logic gout, pout;
assign pout=pin[1]&pin[0];
assign gout=gin[1]|(pin[1]&gin[0]);
endmodule // black
// Grey cell
module grey (gout, gin, pin);
input logic [1:0] gin;
input logic pin;
output logic gout;
assign gout=gin[1]|(pin&gin[0]);
endmodule // grey
// reduced Black cell
module rblk (hout, iout, gin, pin);
input logic [1:0] gin, pin;
output logic hout, iout;
assign iout=pin[1]&pin[0];
assign hout=gin[1]|gin[0];
endmodule
// reduced Grey cell
module rgry (hout, gin);
input logic [1:0] gin;
output logic hout;
assign hout=gin[1]|gin[0];
endmodule // rgry
// LZD
module lz2 (P, V, B0, B1);
@ -953,6 +481,8 @@ module lz64 (ZP, ZV, B);
endmodule // lz64
// FSM Control for Integer Divider
module fsm64 (en, state0, done, divdone, otfzero,
start, error, NumIter, clk, reset);

View File

@ -27,7 +27,7 @@ if [file exists work] {
vlib work
# compile source files
vlog muxs.sv shifters.sv divide4x32.sv test_int32div.sv
vlog mux_div.sv shifters_div.sv divide4x32.sv test_int32div.sv
# start and run simulation
vsim -voptargs=+acc work.tb
@ -109,6 +109,6 @@ configure wave -rowmargin 4
configure wave -childrowmargin 2
-- Run the Simulation
run 138ns
run 338ns

View File

@ -27,7 +27,7 @@ if [file exists work] {
vlib work
# compile source files
vlog muxs.sv shifters.sv divide4x64.sv test_int64div.sv
vlog mux_div.sv shifters_div.sv divide4x64.sv test_int64div.sv
# start and run simulation
vsim -voptargs=+acc work.tb
@ -109,6 +109,6 @@ configure wave -rowmargin 4
configure wave -childrowmargin 2
-- Run the Simulation
run 138ns
run 338ns

View File

@ -27,7 +27,7 @@ if [file exists work] {
vlib work
# compile source files
vlog muxs.sv shifters.sv divide4x32.sv test_iter32.sv
vlog mux_div.sv shifters_div.sv divide4x32.sv test_iter32.sv
# start and run simulation
vsim -voptargs=+acc work.tb

View File

@ -27,7 +27,7 @@ if [file exists work] {
vlib work
# compile source files
vlog muxs.sv shifters.sv divide4x64.sv test_iter64.sv
vlog mux_div.sv shifters_div.sv divide4x64.sv test_iter64.sv
# start and run simulation
vsim -voptargs=+acc work.tb

View File

@ -0,0 +1,106 @@
module shifter_l64 (Z, A, Shift);
input logic [63:0] A;
input logic [5:0] Shift;
logic [63:0] stage1;
logic [63:0] stage2;
logic [63:0] stage3;
logic [63:0] stage4;
logic [63:0] stage5;
logic [31:0] thirtytwozeros = 32'h0;
logic [15:0] sixteenzeros = 16'h0;
logic [ 7:0] eightzeros = 8'h0;
logic [ 3:0] fourzeros = 4'h0;
logic [ 1:0] twozeros = 2'b00;
logic onezero = 1'b0;
output logic [63:0] Z;
mux2 #(64) mx01(A, {A[31:0], thirtytwozeros}, Shift[5], stage1);
mux2 #(64) mx02(stage1, {stage1[47:0], sixteenzeros}, Shift[4], stage2);
mux2 #(64) mx03(stage2, {stage2[55:0], eightzeros}, Shift[3], stage3);
mux2 #(64) mx04(stage3, {stage3[59:0], fourzeros}, Shift[2], stage4);
mux2 #(64) mx05(stage4, {stage4[61:0], twozeros}, Shift[1], stage5);
mux2 #(64) mx06(stage5, {stage5[62:0], onezero}, Shift[0], Z);
endmodule // shifter_l64
module shifter_r64 (Z, A, Shift);
input logic [63:0] A;
input logic [5:0] Shift;
logic [63:0] stage1;
logic [63:0] stage2;
logic [63:0] stage3;
logic [63:0] stage4;
logic [63:0] stage5;
logic [31:0] thirtytwozeros = 32'h0;
logic [15:0] sixteenzeros = 16'h0;
logic [ 7:0] eightzeros = 8'h0;
logic [ 3:0] fourzeros = 4'h0;
logic [ 1:0] twozeros = 2'b00;
logic onezero = 1'b0;
output logic [63:0] Z;
mux2 #(64) mx01(A, {thirtytwozeros, A[63:32]}, Shift[5], stage1);
mux2 #(64) mx02(stage1, {sixteenzeros, stage1[63:16]}, Shift[4], stage2);
mux2 #(64) mx03(stage2, {eightzeros, stage2[63:8]}, Shift[3], stage3);
mux2 #(64) mx04(stage3, {fourzeros, stage3[63:4]}, Shift[2], stage4);
mux2 #(64) mx05(stage4, {twozeros, stage4[63:2]}, Shift[1], stage5);
mux2 #(64) mx06(stage5, {onezero, stage5[63:1]}, Shift[0], Z);
endmodule // shifter_r64
module shifter_l32 (Z, A, Shift);
input logic [31:0] A;
input logic [4:0] Shift;
logic [31:0] stage1;
logic [31:0] stage2;
logic [31:0] stage3;
logic [31:0] stage4;
logic [15:0] sixteenzeros = 16'h0;
logic [ 7:0] eightzeros = 8'h0;
logic [ 3:0] fourzeros = 4'h0;
logic [ 1:0] twozeros = 2'b00;
logic onezero = 1'b0;
output logic [31:0] Z;
mux2 #(32) mx01(A, {A[15:0], sixteenzeros}, Shift[4], stage1);
mux2 #(32) mx02(stage1, {stage1[23:0], eightzeros}, Shift[3], stage2);
mux2 #(32) mx03(stage2, {stage2[27:0], fourzeros}, Shift[2], stage3);
mux2 #(32) mx04(stage3, {stage3[29:0], twozeros}, Shift[1], stage4);
mux2 #(32) mx05(stage4, {stage4[30:0], onezero}, Shift[0], Z);
endmodule // shifter_l32
module shifter_r32 (Z, A, Shift);
input logic [31:0] A;
input logic [4:0] Shift;
logic [31:0] stage1;
logic [31:0] stage2;
logic [31:0] stage3;
logic [31:0] stage4;
logic [15:0] sixteenzeros = 16'h0;
logic [ 7:0] eightzeros = 8'h0;
logic [ 3:0] fourzeros = 4'h0;
logic [ 1:0] twozeros = 2'b00;
logic onezero = 1'b0;
output logic [31:0] Z;
mux2 #(32) mx01(A, {sixteenzeros, A[31:16]}, Shift[4], stage1);
mux2 #(32) mx02(stage1, {eightzeros, stage1[31:8]}, Shift[3], stage2);
mux2 #(32) mx03(stage2, {fourzeros, stage2[31:4]}, Shift[2], stage3);
mux2 #(32) mx04(stage3, {twozeros, stage3[31:2]}, Shift[1], stage4);
mux2 #(32) mx05(stage4, {onezero, stage4[31:1]}, Shift[0], Z);
endmodule // shifter_r32

View File

@ -38,10 +38,10 @@ module tb;
#0 start = 1'b0;
#0 reset = 1'b1;
#22 reset = 1'b0;
//#25 N = 32'h9830_07C0;
//#0 D = 32'h0000_000C;
#25 N = 32'h06b9_7b0d;
#0 D = 32'h46df_998d;
#25 N = 32'h9830_07C0;
#0 D = 32'h0000_000C;
//#25 N = 32'h06b9_7b0d;
//#0 D = 32'h46df_998d;
#0 start = 1'b1;
#50 start = 1'b0;

View File

@ -26,54 +26,61 @@
`include "wally-config.vh"
module muldiv (
input logic clk, reset,
// Decode Stage interface
input logic [31:0] InstrD,
// Execute Stage interface
input logic [`XLEN-1:0] SrcAE, SrcBE,
input logic [2:0] Funct3E,
input logic MulDivE, W64E,
// Writeback stage
output logic [`XLEN-1:0] MulDivResultW,
// hazards
input logic StallM, StallW, FlushM, FlushW
);
input logic clk, reset,
// Decode Stage interface
input logic [31:0] InstrD,
// Execute Stage interface
input logic [`XLEN-1:0] SrcAE, SrcBE,
input logic [2:0] Funct3E,
input logic MulDivE, W64E,
// Writeback stage
output logic [`XLEN-1:0] MulDivResultW,
// Divide Done
output logic DivDoneW,
// hazards
input logic StallM, StallW, FlushM, FlushW
);
generate
if (`M_SUPPORTED) begin
logic [`XLEN-1:0] MulDivResultE, MulDivResultM;
logic [`XLEN-1:0] PrelimResultE;
logic [`XLEN-1:0] QuotE, RemE;
logic [`XLEN*2-1:0] ProdE;
generate
if (`M_SUPPORTED) begin
logic [`XLEN-1:0] MulDivResultE, MulDivResultM;
logic [`XLEN-1:0] PrelimResultE;
logic [`XLEN-1:0] QuotE, RemE;
logic [`XLEN*2-1:0] ProdE;
// Multiplier
mul mul(.*);
// Multiplier
mul mul(.*);
// Divide
div div (QuotE, RemE, DivDoneE, div0error, SrcAE, SrcBE, clk, reset, MulDivE);
// Select result
always_comb
case (Funct3E)
3'b000: PrelimResultE = ProdE[`XLEN-1:0];
3'b001: PrelimResultE = ProdE[`XLEN*2-1:`XLEN];
3'b010: PrelimResultE = ProdE[`XLEN*2-1:`XLEN];
3'b011: PrelimResultE = ProdE[`XLEN*2-1:`XLEN];
3'b100: PrelimResultE = QuotE;
3'b101: PrelimResultE = QuotE;
3'b110: PrelimResultE = RemE;
3'b111: PrelimResultE = RemE;
endcase
// Handle sign extension for W-type instructions
if (`XLEN == 64) begin // RV64 has W-type instructions
assign MulDivResultE = W64E ? {{32{PrelimResultE[31]}}, PrelimResultE[31:0]} : PrelimResultE;
end else begin // RV32 has no W-type instructions
assign MulDivResultE = PrelimResultE;
// Select result
always_comb
case (Funct3E)
3'b000: PrelimResultE = ProdE[`XLEN-1:0];
3'b001: PrelimResultE = ProdE[`XLEN*2-1:`XLEN];
3'b010: PrelimResultE = ProdE[`XLEN*2-1:`XLEN];
3'b011: PrelimResultE = ProdE[`XLEN*2-1:`XLEN];
3'b100: PrelimResultE = QuotE;
3'b101: PrelimResultE = QuotE;
3'b110: PrelimResultE = RemE;
3'b111: PrelimResultE = RemE;
endcase
// Handle sign extension for W-type instructions
if (`XLEN == 64) begin // RV64 has W-type instructions
assign MulDivResultE = W64E ? {{32{PrelimResultE[31]}}, PrelimResultE[31:0]} : PrelimResultE;
end else begin // RV32 has no W-type instructions
assign MulDivResultE = PrelimResultE;
end
flopenrc #(`XLEN) MulDivResultMReg(clk, reset, FlushM, ~StallM, MulDivResultE, MulDivResultM);
flopenrc #(`XLEN) MulDivResultWReg(clk, reset, FlushW, ~StallW, MulDivResultM, MulDivResultW);
end else begin // no M instructions supported
assign MulDivResultW = 0;
end
endgenerate
endmodule // muldiv
flopenrc #(`XLEN) MulDivResultMReg(clk, reset, FlushM, ~StallM, MulDivResultE, MulDivResultM);
flopenrc #(`XLEN) MulDivResultWReg(clk, reset, FlushW, ~StallW, MulDivResultM, MulDivResultW);
end else begin // no M instructions supported
assign MulDivResultW = 0;
end
endgenerate
endmodule

View File

@ -53,7 +53,7 @@ module wallypipelinedhart (
output logic HWRITED
);
// logic [1:0] ForwardAE, ForwardBE;
// logic [1:0] ForwardAE, ForwardBE;
logic StallF, StallD, StallE, StallM, StallW;
logic FlushF, FlushD, FlushE, FlushM, FlushW;
logic RetM, TrapM;
@ -65,7 +65,7 @@ module wallypipelinedhart (
logic [`XLEN-1:0] SrcAE, SrcBE;
logic [`XLEN-1:0] SrcAM;
logic [2:0] Funct3E;
// logic [31:0] InstrF;
// logic [31:0] InstrF;
logic [31:0] InstrD, InstrM;
logic [`XLEN-1:0] PCE, PCM, PCLinkE, PCLinkW;
logic [`XLEN-1:0] PCTargetE;
@ -84,6 +84,7 @@ module wallypipelinedhart (
logic PCSrcE;
logic CSRWritePendingDEM;
logic LoadStallD, MulDivStallD, CSRRdStallD;
logic DivDoneW;
logic [4:0] SetFflagsM;
logic [2:0] FRM_REGW;
logic FloatRegWriteW;
@ -130,12 +131,12 @@ module wallypipelinedhart (
.Funct7M(InstrM[31:25]),
.*);
pagetablewalker pagetablewalker(.*); // can send addresses to ahblite, send out pagetablestall
// *** can connect to hazard unit
// changing from this to the line above breaks the program. auipc at 104 fails; seems to be flushed.
// Would need to insertinstruction as InstrD, not InstrF
/*ahblite ebu(
.InstrReadF(1'b0),
pagetablewalker pagetablewalker(.*); // can send addresses to ahblite, send out pagetablestall
// *** can connect to hazard unit
// changing from this to the line above breaks the program. auipc at 104 fails; seems to be flushed.
// Would need to insertinstruction as InstrD, not InstrF
/*ahblite ebu(
.InstrReadF(1'b0),
.InstrRData(), // hook up InstrF later
.MemSizeM(Funct3M[1:0]), .UnsignedLoadM(Funct3M[2]),
.*); */

View File

@ -56,7 +56,7 @@ module testbench();
"rv64m/I-MULHU-01", "3000",
"rv64m/I-MULW-01", "3000"
// "rv64m/I-DIV-01", "3000",
// "rv64m/I-DIVU-01", "3000",
// "rv64m/I-DIVU-01", "3000"
// "rv64m/I-DIVUW-01", "3000",
// "rv64m/I-DIVW-01", "3000",
// "rv64m/I-REM-01", "3000",