This commit is contained in:
DTowersM 2022-07-07 23:11:35 +00:00
commit 4786fb9fd6
155 changed files with 1819 additions and 19909 deletions

6
.gitmodules vendored
View File

@ -20,9 +20,3 @@
[submodule "addins/coremark"]
path = addins/coremark
url = https://github.com/eembc/coremark
[submodule "addins/sky130_osu_sc_t18"]
path = addins/sky130_osu_sc_t18
url = https://foss-eda-tools.googlesource.com/skywater-pdk/libs/sky130_osu_sc_t18
[submodule "addins/sky130_osu_sc_t12"]
path = addins/sky130_osu_sc_t12
url = https://foss-eda-tools.googlesource.com/skywater-pdk/libs/sky130_osu_sc_t12

@ -1 +0,0 @@
Subproject commit f1eef844734f73d3c79d83b82352118263eb7686

@ -1 +0,0 @@
Subproject commit 83f5245e1a599c628d6c73e76c1774b8ab5cab91

View File

@ -39,7 +39,7 @@
// MISA RISC-V configuration per specification
// ZYXWVUTSRQPONMLKJIHGFEDCBA
`define MISA 32'b0000000000101000001000100101101
`define MISA 32'b0000000000101000001000100100101
`define ZICSR_SUPPORTED 1
`define ZIFENCEI_SUPPORTED 1
`define COUNTERS 32

View File

@ -101,6 +101,8 @@
`define NORMSHIFTSZ ((`DIVLEN+`NF+3) > (3*`NF+8) ? (`DIVLEN+`NF+3) : (3*`NF+9))
`define CORRSHIFTSZ ((`DIVLEN+`NF+3) > (3*`NF+8) ? (`DIVLEN+`NF+3) : (3*`NF+6))
`define USE_SRAM 0
// Disable spurious Verilator warnings
/* verilator lint_off STMTDLY */

View File

@ -32,5 +32,5 @@ echo "CHECKPOINT = ${CHECKPOINT}"
# *** change config from buildroot to rv64gc
vsim -c <<!
do wally-pipelined-batch.do buildroot buildroot $RISCV $INSTR_LIMIT $INSTR_WAVEON $CHECKPOINT
do wally-pipelined-batch.do buildroot buildroot-no-trace $RISCV $INSTR_LIMIT $INSTR_WAVEON $CHECKPOINT
!

View File

@ -32,7 +32,7 @@ vlib work
# start and run simulation
# remove +acc flag for faster sim during regressions if there is no need to access internal signals
# $num = the added words after the call
vlog +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench-fp.sv ../src/fpu/*.sv ../srt/srt-radix4.sv ../src/generic/*.sv ../src/generic/flop/*.sv -suppress 2583,7063,8607,2697
vlog +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench-fp.sv ../src/fpu/*.sv ../src/generic/*.sv ../src/generic/flop/*.sv -suppress 2583,7063,8607,2697
vsim -voptargs=+acc work.testbenchfp -G TEST=$2

View File

@ -41,6 +41,20 @@ if {$2 eq "buildroot" || $2 eq "buildroot-checkpoint"} {
run -all
run -all
exec ./slack-notifier/slack-notifier.py
} elseif {$2 eq "buildroot-no-trace"} {
vlog -lint -work work_${1}_${2} +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench-linux.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583
# start and run simulation
vopt +acc work_${1}_${2}.testbench -work work_${1}_${2} -G RISCV_DIR=$3 -G INSTR_LIMIT=$4 -G INSTR_WAVEON=$5 -G CHECKPOINT=$6 -G NO_SPOOFING=1 -o testbenchopt
vsim -lib work_${1}_${2} testbenchopt -suppress 8852,12070,3084,3829 -fatal 7
#-- Run the Simulation
echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
echo "Don't forget to change DEBUG_LEVEL = 0."
echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
run -all
run -all
exec ./slack-notifier/slack-notifier.py
} else {
vlog -lint -work wkdir/work_${1}_${2} +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583 -suppress 7063
# start and run simulation

View File

@ -8,7 +8,8 @@ add wave -noupdate /testbenchfp/Z
add wave -noupdate /testbenchfp/Res
add wave -noupdate /testbenchfp/Ans
add wave -noupdate /testbenchfp/DivStart
add wave -noupdate /testbenchfp/DivDone
add wave -noupdate /testbenchfp/DivBusy
add wave -noupdate /testbenchfp/srtfsm/state
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/*
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/resultselect/*
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/flags/*
@ -17,12 +18,13 @@ add wave -group {PostProc} -noupdate /testbenchfp/postprocess/lzacorrection/*
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/resultsign/*
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/round/*
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/fmashiftcalc/*
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/divshiftcalc/*
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/cvtshiftcalc/*
add wave -group {Divide} -noupdate /testbenchfp/srtradix4/*
add wave -group {Divide} -noupdate /testbenchfp/srtradix4/qsel4/*
add wave -group {Divide} -noupdate /testbenchfp/srtradix4/otfc4/*
add wave -group {Divide} -noupdate /testbenchfp/srtradix4/preproc/*
add wave -group {Divide} -noupdate /testbenchfp/srtradix4/earlytermination/*
add wave -group {Divide} -noupdate /testbenchfp/srtpreproc/*
add wave -group {Divide} -noupdate /testbenchfp/srtradix4/expcalc/*
add wave -group {Divide} -noupdate /testbenchfp/srtfsm/*
add wave -group {Testbench} -noupdate /testbenchfp/*
add wave -group {Testbench} -noupdate /testbenchfp/readvectors/*

View File

@ -33,6 +33,8 @@
// WIDTH is number of bits in one "word" of the memory, DEPTH is number of such words
`include "wally-config.vh"
module sram1p1rw #(parameter DEPTH=128, WIDTH=256) (
input logic clk,
input logic [$clog2(DEPTH)-1:0] Adr,
@ -47,37 +49,33 @@ module sram1p1rw #(parameter DEPTH=128, WIDTH=256) (
always_ff @(posedge clk) AdrD <= Adr;
genvar index;
/* -----\/----- EXCLUDED -----\/-----
for(index = 0; index < WIDTH/8; index++) begin
always_ff @(posedge clk) begin
if (WriteEnable & ByteMask[index]) begin
StoredData[Adr][8*(index+1)-1:8*index] <= #1 CacheWriteData[8*(index+1)-1:8*index];
end
end
end
-----/\----- EXCLUDED -----/\----- */
if (WIDTH%8 != 0) // handle msbs if not a multiple of 8
always_ff @(posedge clk)
if (WriteEnable & ByteMask[WIDTH/8])
StoredData[Adr][WIDTH-1:WIDTH-WIDTH%8] <= #1
CacheWriteData[WIDTH-1:WIDTH-WIDTH%8];
for(index = 0; index < WIDTH/8; index++)
always_ff @(posedge clk)
if(WriteEnable & ByteMask[index])
StoredData[Adr][index*8 +: 8] <= #1 CacheWriteData[index*8 +: 8];
/*
// if not a multiple of 8, MSByte is not 8 bits long.
if(WIDTH%8 != 0) begin
always_ff @(posedge clk) begin
if (WriteEnable & ByteMask[WIDTH/8]) begin
StoredData[Adr][WIDTH-1:WIDTH-WIDTH%8] <= #1 CacheWriteData[WIDTH-1:WIDTH-WIDTH%8];
end
end
end
*/
assign ReadData = StoredData[AdrD];
if (`USE_SRAM == 1) begin
// 64 x 128-bit SRAM
logic [WIDTH-1:0] BitWriteMask;
for (index=0; index < WIDTH; index++)
assign BitWriteMask[index] = ByteMask[index/8];
TS1N28HPCPSVTB64X128M4SWBASO sram(
.SLP(1'b0), .SD(1'b0), .CLK(clk), .CEB(1'b0), .WEB(~WriteEnable),
.CEBM(1'b0), .WEBM(1'b0), .AWT(1'b0), .A(Adr), .D(CacheWriteData),
.BWEB(~BitWriteMask), .AM('b0), .DM('b0), .BWEBM('b0), .BIST(1'b0), .Q(ReadData)
);
end else begin
if (WIDTH%8 != 0) // handle msbs if not a multiple of 8
always_ff @(posedge clk)
if (WriteEnable & ByteMask[WIDTH/8])
StoredData[Adr][WIDTH-1:WIDTH-WIDTH%8] <= #1
CacheWriteData[WIDTH-1:WIDTH-WIDTH%8];
for(index = 0; index < WIDTH/8; index++)
always_ff @(posedge clk)
if(WriteEnable & ByteMask[index])
StoredData[Adr][index*8 +: 8] <= #1 CacheWriteData[index*8 +: 8];
assign ReadData = StoredData[AdrD];
end
endmodule

View File

@ -0,0 +1 @@
/home/jstine/memory/ts1n28hpcpsvtb64x128m4swbaso_180a/VERILOG/ts1n28hpcpsvtb64x128m4swbaso_180a_tt1v25c.v

View File

@ -1,74 +0,0 @@
///////////////////////////////////////////
//
// Written: James Stine
// Modified: 8/1/2018
//
// Purpose: Floating point divider/square root top unit (Goldschmidt)
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
module convert_inputs(
input [63:0] op1, // 1st input operand (A)
input [63:0] op2, // 2nd input operand (B)
input [2:0] op_type, // Function opcode
input P, // Result Precision (0 for double, 1 for single)
output [63:0] Float1, // Converted 1st input operand
output [63:0] Float2 // Converted 2nd input operand
);
wire conv_SP; // Convert from SP to DP
wire Zexp1; // One if the exponent of op1 is zero
wire Zexp2; // One if the exponent of op2 is zero
wire Oexp1; // One if the exponent of op1 is all ones
wire Oexp2; // One if the exponent of op2 is all ones
// Convert from single precision to double precision if (op_type is 11X
// and P is 0) or (op_type is not 11X and P is one).
assign conv_SP = ~P;
// Test if the input exponent is zero, because if it is then the
// exponent of the converted number should be zero.
assign Zexp1 = ~(|op1[30:23]);
assign Zexp2 = ~(|op2[30:23]);
assign Oexp1 = (&op1[30:23]);
assign Oexp2 = (&op2[30:23]);
// Conditionally convert op1. Lower 29 bits are zero for single precision.
assign Float1[62:29] = conv_SP ? {op1[30], {3{(~op1[30]&~Zexp1)|Oexp1}}, op1[29:0]}
: op1[62:29];
assign Float1[28:0] = op1[28:0] & {29{~conv_SP}};
// Conditionally convert op2. Lower 29 bits are zero for single precision.
assign Float2[62:29] = conv_SP ? {op2[30], {3{(~op2[30]&~Zexp2)|Oexp2}}, op2[29:0]}
: op2[62:29];
assign Float2[28:0] = op2[28:0] & {29{~conv_SP}};
// Set the sign of Float1 based on its original sign and if the operation
// is negation (op_type = 101) or absolute value (op_type = 100)
assign Float1[63] = conv_SP ? op1[31] : op1[63];
assign Float2[63] = conv_SP ? op2[31] : op2[63];
endmodule // convert_inputs

View File

@ -1,47 +0,0 @@
// This module takes as inputs two operands (op1 and op2)
// and the result precision (P). Based on the operation and precision,
// it conditionally converts single precision values to double
// precision values and modifies the sign of op1.
// The converted operands are Float1 and Float2.
module convert_inputs_div (
input logic [63:0] op1, // 1st input operand (A)
input logic [63:0] op2, // 2nd input operand (B)
input logic P, // Result Precision (0 for double, 1 for single)
input logic op_type, // Operation
output logic [63:0] Float1, // Converted 1st input operand
output logic [63:0] Float2b // Converted 2nd input operand
);
logic [63:0] Float2;
logic Zexp1; // One if the exponent of op1 is zero
logic Zexp2; // One if the exponent of op2 is zero
logic Oexp1; // One if the exponent of op1 is all ones
logic Oexp2; // One if the exponent of op2 is all ones
// Test if the input exponent is zero, because if it is then the
// exponent of the converted number should be zero.
assign Zexp1 = ~(|op1[30:23]);
assign Zexp2 = ~(|op2[30:23]);
assign Oexp1 = (&op1[30:23]);
assign Oexp2 = (&op2[30:23]);
// Conditionally convert op1. Lower 29 bits are zero for single precision.
assign Float1[62:29] = P ? {op1[30], {3{(~op1[30]&~Zexp1)|Oexp1}}, op1[29:0]}
: op1[62:29];
assign Float1[28:0] = op1[28:0] & {29{~P}};
// Conditionally convert op2. Lower 29 bits are zero for single precision.
assign Float2[62:29] = P ? {op2[30], {3{(~op2[30]&~Zexp2)|Oexp2}}, op2[29:0]}
: op2[62:29];
assign Float2[28:0] = op2[28:0] & {29{~P}};
// Set the sign of Float1 based on its original sign
assign Float1[63] = P ? op1[31] : op1[63];
assign Float2[63] = P ? op2[31] : op2[63];
// For sqrt, assign Float2 same as Float1 for simplicity
assign Float2b = op_type ? Float1 : Float2;
endmodule // convert_inputs

View File

@ -1,11 +1,39 @@
///////////////////////////////////////////
//
// Written: me@KatherineParry.com
// Modified: 7/5/2022
//
// Purpose: Conversion shift calculation
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module cvtshiftcalc(
input logic XZeroM,
input logic XZero,
input logic ToInt,
input logic IntToFp,
input logic [`NE:0] CvtCalcExpM, // the calculated expoent
input logic [`NF:0] XManM, // input mantissas
input logic [`NF:0] Xm, // input mantissas
input logic [`FMTBITS-1:0] OutFmt, // output format
input logic [`CVTLEN-1:0] CvtLzcInM, // input to the Leading Zero Counter (priority encoder)
input logic CvtResDenormUfM,
@ -32,8 +60,8 @@ module cvtshiftcalc(
// - otherwise:
// | LzcInM | 0's if nessisary |
// change to int shift to the left one
assign CvtShiftIn = ToInt ? {{`XLEN{1'b0}}, XManM[`NF]&~CvtCalcExpM[`NE], XManM[`NF-1]|(CvtCalcExpM[`NE]&XManM[`NF]), XManM[`NF-2:0], {`CVTLEN-`XLEN{1'b0}}} :
CvtResDenormUfM ? {{`NF-1{1'b0}}, XManM, {`CVTLEN-`NF+1{1'b0}}} :
assign CvtShiftIn = ToInt ? {{`XLEN{1'b0}}, Xm[`NF]&~CvtCalcExpM[`NE], Xm[`NF-1]|(CvtCalcExpM[`NE]&Xm[`NF]), Xm[`NF-2:0], {`CVTLEN-`XLEN{1'b0}}} :
CvtResDenormUfM ? {{`NF-1{1'b0}}, Xm, {`CVTLEN-`NF+1{1'b0}}} :
{CvtLzcInM, {`NF+1{1'b0}}};
@ -65,6 +93,6 @@ module cvtshiftcalc(
// determine if the result underflows ??? -> fp
// - if the first 1 is shifted out of the result then the result underflows
// - can't underflow an integer to fp conversions
assign CvtResUf = ($signed(CvtCalcExpM) < $signed({{`NE-$clog2(`NF){1'b1}}, ResNegNF}))&~XZeroM&~IntToFp;
assign CvtResUf = ($signed(CvtCalcExpM) < $signed({{`NE-$clog2(`NF){1'b1}}, ResNegNF}))&~XZero&~IntToFp;
endmodule

View File

@ -1,126 +0,0 @@
///////////////////////////////////////////
//
// Written: James Stine
// Modified: 9/28/2021
//
// Purpose: Main convergence routine for floating point divider/square root unit (Goldschmidt)
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
module divconv (
input logic [52:0] d, n,
input logic [2:0] sel_muxa, sel_muxb,
input logic sel_muxr,
input logic load_rega, load_regb, load_regc, load_regd,
input logic load_regr, load_regs,
input logic P,
input logic op_type,
input logic exp_odd,
input logic reset,
input logic clk,
output logic [59:0] q1, qp1, qm1,
output logic [59:0] q0, qp0, qm0,
output logic [59:0] rega_out, regb_out, regc_out, regd_out,
output logic [119:0] regr_out
);
logic [59:0] muxa_out, muxb_out;
logic [10:0] ia_div, ia_sqrt;
logic [59:0] ia_out;
logic [119:0] mul_out;
logic [59:0] q_out1, qm_out1, qp_out1;
logic [59:0] q_out0, qm_out0, qp_out0;
logic [59:0] mcand, mplier, mcand_q;
logic [59:0] twocmp_out;
logic [60:0] three;
logic [119:0] constant, constant2;
logic [59:0] q_const, qp_const, qm_const;
logic [59:0] d2, n2;
logic muxr_out;
logic cout1, cout2, cout3, cout4, cout5, cout6, cout7;
// Check if exponent is odd for sqrt
// If exp_odd=1 and sqrt, then M/2 and use ia_addr=0 as IA
assign d2 = (exp_odd&op_type) ? {1'b0, d, 6'h0} : {d, 7'h0};
assign n2 = op_type ? d2 : {n, 7'h0};
// IA div/sqrt
sbtm_div ia1 (d[52:41], ia_div);
sbtm_sqrt ia2 (d2[59:48], ia_sqrt);
assign ia_out = op_type ? {ia_sqrt, {49{1'b0}}} : {ia_div, {49{1'b0}}};
// Choose IA or iteration
mux6 #(60) mx1 (d2, ia_out, rega_out, regc_out, regd_out, regb_out, sel_muxb, muxb_out);
mux5 #(60) mx2 (regc_out, n2, ia_out, regb_out, regd_out, sel_muxa, muxa_out);
// Deal with remainder if [0.5, 1) instead of [1, 2)
mux2 #(120) mx3a ({~n, {67{1'b1}}}, {{1'b1}, ~n, {66{1'b1}}}, q1[59], constant2);
// Select Mcand, Remainder/Q''
mux2 #(120) mx3 (120'h0, constant2, sel_muxr, constant);
// Select mcand - remainder should always choose q1 [1,2) because
// adjustment of N in the from XX.FFFFFFF
mux2 #(60) mx4 (q0, q1, q1[59], mcand_q);
mux2 #(60) mx5 (muxb_out, mcand_q, sel_muxr&op_type, mplier);
mux2 #(60) mx6 (muxa_out, mcand_q, sel_muxr, mcand);
// Q*D - N (reversed but changed in rounder.v to account for sign reversal)
// Add ulp for subtraction in remainder
mux2 #(1) mx7 (1'b0, 1'b1, sel_muxr, muxr_out);
// Constant for Q''
mux2 #(60) mx8 ({60'h0000_0000_0000_020}, {60'h0000_0040_0000_000}, P, q_const);
mux2 #(60) mx9 ({60'h0000_0000_0000_0A0}, {60'h0000_0140_0000_000}, P, qp_const);
mux2 #(60) mxA ({60'hFFFF_FFFF_FFFF_F9F}, {60'hFFFF_FF3F_FFFF_FFF}, P, qm_const);
// CPA (from CSA)/Remainder addition/subtraction
assign {cout1, mul_out} = (mcand*mplier) + constant + {119'b0, muxr_out};
// Assuming [1,2) - q1
assign {cout2, q_out1} = regb_out + q_const;
assign {cout3, qp_out1} = regb_out + qp_const;
assign {cout4, qm_out1} = regb_out + qm_const + 1'b1;
// Assuming [0.5,1) - q0
assign {cout5, q_out0} = {regb_out[58:0], 1'b0} + q_const;
assign {cout6, qp_out0} = {regb_out[58:0], 1'b0} + qp_const;
assign {cout7, qm_out0} = {regb_out[58:0], 1'b0} + qm_const + 1'b1;
// One's complement instead of two's complement (for hw efficiency)
assign three = {~mul_out[118], mul_out[118], ~mul_out[117:59]};
mux2 #(60) mxTC (~mul_out[118:59], three[60:1], op_type, twocmp_out);
// regs
flopenr #(60) regc (clk, reset, load_regc, twocmp_out, regc_out);
flopenr #(60) regb (clk, reset, load_regb, mul_out[118:59], regb_out);
flopenr #(60) rega (clk, reset, load_rega, mul_out[118:59], rega_out);
flopenr #(60) regd (clk, reset, load_regd, mul_out[118:59], regd_out);
flopenr #(120) regr (clk, reset, load_regr, mul_out, regr_out);
// Assuming [1,2)
flopenr #(60) rege (clk, reset, load_regs, {q_out1[59:35], (q_out1[34:6] & {29{~P}}), 6'h0}, q1);
flopenr #(60) regf (clk, reset, load_regs, {qm_out1[59:35], (qm_out1[34:6] & {29{~P}}), 6'h0}, qm1);
flopenr #(60) regg (clk, reset, load_regs, {qp_out1[59:35], (qp_out1[34:6] & {29{~P}}), 6'h0}, qp1);
// Assuming [0,1)
flopenr #(60) regh (clk, reset, load_regs, {q_out0[59:35], (q_out0[34:6] & {29{~P}}), 6'h0}, q0);
flopenr #(60) regj (clk, reset, load_regs, {qm_out0[59:35], (qm_out0[34:6] & {29{~P}}), 6'h0}, qm0);
flopenr #(60) regk (clk, reset, load_regs, {qp_out0[59:35], (qp_out0[34:6] & {29{~P}}), 6'h0}, qp0);
endmodule // divconv

View File

@ -1,198 +0,0 @@
///////////////////////////////////////////
//
// Written: James Stine
// Modified: 8/1/2018
//
// Purpose: Convergence unit for pipelined floating point divider/square root top unit (Goldschmidt)
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module divconv_pipe (q1, qm1, qp1, q0, qm0, qp0, rega_out, regb_out, regc_out, regd_out,
regr_out, d, n, sel_muxa, sel_muxb, sel_muxr, reset, clk,
load_rega, load_regb, load_regc, load_regd, load_regr, load_regs, load_regp,
P, op_type, exp_odd);
input logic [52:0] d, n;
input logic [2:0] sel_muxa, sel_muxb;
input logic sel_muxr;
input logic load_rega, load_regb, load_regc, load_regd;
input logic load_regr, load_regs;
input logic load_regp;
input logic P;
input logic op_type;
input logic exp_odd;
input logic reset;
input logic clk;
output logic [59:0] q1, qp1, qm1;
output logic [59:0] q0, qp0, qm0;
output logic [59:0] rega_out, regb_out, regc_out, regd_out;
output logic [119:0] regr_out;
supply1 vdd;
supply0 vss;
logic [59:0] muxa_out, muxb_out;
logic muxr_out;
logic [10:0] ia_div, ia_sqrt;
logic [59:0] ia_out;
logic [119:0] mul_out;
logic [59:0] q_out1, qm_out1, qp_out1;
logic [59:0] q_out0, qm_out0, qp_out0;
logic [59:0] mcand, mplier, mcand_q;
logic [59:0] twocmp_out;
logic [60:0] three;
logic [119:0] Carry, Carry2;
logic [119:0] Sum, Sum2;
logic [119:0] constant, constant2;
logic [59:0] q_const, qp_const, qm_const;
logic [59:0] d2, n2;
logic [11:0] d3;
// Check if exponent is odd for sqrt
// If exp_odd=1 and sqrt, then M/2 and use ia_addr=0 as IA
assign d2 = (exp_odd&op_type) ? {vss, d, 6'h0} : {d, 7'h0};
assign n2 = op_type ? d2 : {n, 7'h0};
// IA div/sqrt
sbtm_div ia1 (d[52:41], ia_div);
sbtm_sqrt ia2 (d2[59:48], ia_sqrt);
assign ia_out = op_type ? {ia_sqrt, {49{1'b0}}} : {ia_div, {49{1'b0}}};
// Choose IA or iteration
mux6 #(60) mx1 (d2, ia_out, rega_out, regc_out, regd_out, regb_out, sel_muxb, muxb_out);
mux5 #(60) mx2 (regc_out, n2, ia_out, regb_out, regd_out, sel_muxa, muxa_out);
// Deal with remainder if [0.5, 1) instead of [1, 2)
mux2 #(120) mx3a ({~n, {67{1'b1}}}, {{1'b1}, ~n, {66{1'b1}}}, q1[59], constant2);
// Select Mcand, Remainder/Q''
mux2 #(120) mx3 (120'h0, constant2, sel_muxr, constant);
// Select mcand - remainder should always choose q1 [1,2) because
// adjustment of N in the from XX.FFFFFFF
mux2 #(60) mx4 (q0, q1, q1[59], mcand_q);
mux2 #(60) mx5 (muxb_out, mcand_q, sel_muxr&op_type, mplier);
mux2 #(60) mx6 (muxa_out, mcand_q, sel_muxr, mcand);
// R4 Booth TDM multiplier (carry/save)
redundantmul #(60) bigmul(.a(mcand), .b(mplier), .out0(Sum), .out1(Carry));
// Q*D - N (reversed but changed in rounder.v to account for sign reversal)
csa #(120) csa1 (Sum, Carry, constant, Sum2, Carry2);
// Add ulp for subtraction in remainder
mux2 #(1) mx7 (1'b0, 1'b1, sel_muxr, muxr_out);
// Constant for Q''
mux2 #(60) mx8 ({60'h0000_0000_0000_020}, {60'h0000_0040_0000_000}, P, q_const);
mux2 #(60) mx9 ({60'h0000_0000_0000_0A0}, {60'h0000_0140_0000_000}, P, qp_const);
mux2 #(60) mxA ({60'hFFFF_FFFF_FFFF_F9F}, {60'hFFFF_FF3F_FFFF_FFF}, P, qm_const);
logic [119:0] Sum_pipe;
logic [119:0] Carry_pipe;
logic muxr_pipe;
logic rega_pipe;
logic regb_pipe;
logic regc_pipe;
logic regd_pipe;
logic regs_pipe;
logic regs_pipe2;
logic regr_pipe;
logic P_pipe;
logic op_type_pipe;
logic [59:0] q_const_pipe;
logic [59:0] qm_const_pipe;
logic [59:0] qp_const_pipe;
logic [59:0] q_const_pipe2;
logic [59:0] qm_const_pipe2;
logic [59:0] qp_const_pipe2;
// Stage 1
flopenr #(120) regp1 (clk, reset, load_regp, Sum2, Sum_pipe);
flopenr #(120) regp2 (clk, reset, load_regp, Carry2, Carry_pipe);
flopenr #(1) regp3 (clk, reset, load_regp, muxr_out, muxr_pipe);
flopenr #(1) regp4 (clk, reset, load_regp, load_rega, rega_pipe);
flopenr #(1) regp5 (clk, reset, load_regp, load_regb, regb_pipe);
flopenr #(1) regp6 (clk, reset, load_regp, load_regc, regc_pipe);
flopenr #(1) regp7 (clk, reset, load_regp, load_regd, regd_pipe);
flopenr #(1) regp8 (clk, reset, load_regp, load_regs, regs_pipe);
flopenr #(1) regp9 (clk, reset, load_regp, load_regr, regr_pipe);
flopenr #(1) regpA (clk, reset, load_regp, P, P_pipe);
flopenr #(1) regpB (clk, reset, load_regp, op_type, op_type_pipe);
flopenr #(60) regpC (clk, reset, load_regp, q_const, q_const_pipe);
flopenr #(60) regpD (clk, reset, load_regp, qp_const, qp_const_pipe);
flopenr #(60) regpE (clk, reset, load_regp, qm_const, qm_const_pipe);
// CPA (from CSA)/Remainder addition/subtraction
assign mul_out = Sum_pipe + Carry_pipe + {119'h0, muxr_pipe};
// One's complement instead of two's complement (for hw efficiency)
assign three = {~mul_out[118] , mul_out[118], ~mul_out[117:59]};
mux2 #(60) mxTC (~mul_out[118:59], three[60:1], op_type_pipe, twocmp_out);
// Stage 2
flopenr #(60) regc (clk, reset, regc_pipe, twocmp_out, regc_out);
flopenr #(60) regb (clk, reset, regb_pipe, mul_out[118:59], regb_out);
flopenr #(60) rega (clk, reset, rega_pipe, mul_out[118:59], rega_out);
flopenr #(60) regd (clk, reset, regd_pipe, mul_out[118:59], regd_out);
flopenr #(120) regr (clk, reset, regr_pipe, mul_out, regr_out);
flopenr #(1) regl (clk, reset, regs_pipe, regs_pipe, regs_pipe2);
flopenr #(60) regm (clk, reset, regs_pipe, q_const_pipe, q_const_pipe2);
flopenr #(60) regn (clk, reset, regs_pipe, qp_const_pipe, qp_const_pipe2);
flopenr #(60) rego (clk, reset, regs_pipe, qm_const_pipe, qm_const_pipe2);
// Assuming [1,2) - q1
assign q_out1 = regb_out + q_const;
assign qp_out1 = regb_out + qp_const;
assign qm_out1 = regb_out + qm_const + 1'b1;
// Assuming [0.5,1) - q0
assign q_out0 = {regb_out[58:0], 1'b0} + q_const;
assign qp_out0 = {regb_out[58:0], 1'b0} + qp_const;
assign qm_out0 = {regb_out[58:0], 1'b0} + qm_const + 1'b1;
// Stage 3
// Assuming [1,2)
flopenr #(60) rege (clk, reset, regs_pipe2, {q_out1[59:35], (q_out1[34:6] & {29{~P_pipe}}), 6'h0}, q1);
flopenr #(60) regf (clk, reset, regs_pipe2, {qm_out1[59:35], (qm_out1[34:6] & {29{~P_pipe}}), 6'h0}, qm1);
flopenr #(60) regg (clk, reset, regs_pipe2, {qp_out1[59:35], (qp_out1[34:6] & {29{~P_pipe}}), 6'h0}, qp1);
// Assuming [0,1)
flopenr #(60) regh (clk, reset, regs_pipe2, {q_out0[59:35], (q_out0[34:6] & {29{~P_pipe}}), 6'h0}, q0);
flopenr #(60) regj (clk, reset, regs_pipe2, {qm_out0[59:35], (qm_out0[34:6] & {29{~P_pipe}}), 6'h0}, qm0);
flopenr #(60) regk (clk, reset, regs_pipe2, {qp_out0[59:35], (qp_out0[34:6] & {29{~P_pipe}}), 6'h0}, qp0);
endmodule // divconv
// *** rewrote behaviorally dh 5 Jan 2021 for speed
module csa #(parameter WIDTH=8) (
input logic [WIDTH-1:0] a, b, c,
output logic [WIDTH-1:0] sum, carry);
assign sum = a ^ b ^ c;
assign carry = (a & (b | c)) | (b & c);
/*
logic [WIDTH:0] carry_temp;
genvar i;
for (i=0;i<WIDTH;i=i+1) begin : genbit
fa fa_inst (a[i], b[i], c[i], sum[i], carry_temp[i+1]);
end
assign carry = {carry_temp[WIDTH-1:1], 1'b0};
*/
endmodule // csa

View File

@ -2,9 +2,9 @@
module divshiftcalc(
input logic [`DIVLEN+2:0] Quot,
input logic [`NE+1:0] DivCalcExpM,
input logic [`FMTBITS-1:0] FmtM,
input logic [`FMTBITS-1:0] Fmt,
input logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2M,
input logic [`NE+1:0] DivCalcExpM,
output logic [$clog2(`NORMSHIFTSZ)-1:0] DivShiftAmt,
output logic [`NORMSHIFTSZ-1:0] DivShiftIn,
output logic DivResDenorm,
@ -17,44 +17,44 @@ module divshiftcalc(
// if the exponent is 1 then the result needs to be normalized then the result is denormalizes
assign DivResDenorm = DivCalcExpM[`NE+1]|(~|DivCalcExpM[`NE+1:0]);
// select the proper fraction lengnth
if (`FPSIZES == 1) begin
assign Nf = (`NE+2)'(`NF);
// if (`FPSIZES == 1) begin
// assign Nf = (`NE+2)'(`NF);
end else if (`FPSIZES == 2) begin
assign Nf = FmtM ? (`NE+2)'(`NF) : (`NE+2)'(`NF1);
// end else if (`FPSIZES == 2) begin
// assign Nf = Fmt ? (`NE+2)'(`NF) : (`NE+2)'(`NF1);
end else if (`FPSIZES == 3) begin
always_comb
case (FmtM)
`FMT: Nf = (`NE+2)'(`NF);
`FMT1: Nf = (`NE+2)'(`NF1);
`FMT2: Nf = (`NE+2)'(`NF2);
default: Nf = 1'bx;
endcase
end else if (`FPSIZES == 4) begin
always_comb
case (FmtM)
2'h3: Nf = (`NE+2)'(`Q_NF);
2'h1: Nf = (`NE+2)'(`D_NF);
2'h0: Nf = (`NE+2)'(`S_NF);
2'h2: Nf = (`NE+2)'(`H_NF);
endcase
end
// end else if (`FPSIZES == 3) begin
// always_comb
// case (Fmt)
// `FMT: Nf = (`NE+2)'(`NF);
// `FMT1: Nf = (`NE+2)'(`NF1);
// `FMT2: Nf = (`NE+2)'(`NF2);
// default: Nf = 1'bx;
// endcase
// end else if (`FPSIZES == 4) begin
// always_comb
// case (Fmt)
// 2'h3: Nf = (`NE+2)'(`Q_NF);
// 2'h1: Nf = (`NE+2)'(`D_NF);
// 2'h0: Nf = (`NE+2)'(`S_NF);
// 2'h2: Nf = (`NE+2)'(`H_NF);
// endcase
// end
// if the result is denormalized
// 00000000x.xxxxxx... Exp = DivCalcExp
// .00000000xxxxxxx... >> NF+1 Exp = DivCalcExp+NF+1
// .00xxxxxxxxxxxxx... << DivCalcExp+NF+1 Exp = +1
// 00000000x.xxxxxx... Exp = DivCalcExpM
// .00000000xxxxxxx... >> NF+1 Exp = DivCalcExpM+NF+1
// .00xxxxxxxxxxxxx... << DivCalcExpM+NF+1 Exp = +1
// .0000xxxxxxxxxxx... >> 1 Exp = 1
// Left shift amount = DivCalcExp+NF+1-1
assign DivDenormShift = Nf+DivCalcExpM;
// Left shift amount = DivCalcExpM+NF+1-1
assign DivDenormShift = (`NE+2)'(`NF)+DivCalcExpM;
// if the result is normalized
// 00000000x.xxxxxx... Exp = DivCalcExp
// .00000000xxxxxxx... >> NF+1 Exp = DivCalcExp+NF+1
// 00000000.xxxxxxx... << NF Exp = DivCalcExp+1
// 00000000x.xxxxxx... << NF Exp = DivCalcExp (extra shift done afterwards)
// 00000000xx.xxxxx... << 1? Exp = DivCalcExp-1 (determined after)
// 00000000x.xxxxxx... Exp = DivCalcExpM
// .00000000xxxxxxx... >> NF+1 Exp = DivCalcExpM+NF+1
// 00000000.xxxxxxx... << NF Exp = DivCalcExpM+1
// 00000000x.xxxxxx... << NF Exp = DivCalcExpM (extra shift done afterwards)
// 00000000xx.xxxxx... << 1? Exp = DivCalcExpM-1 (determined after)
// inital Left shift amount = NF
assign NormShift = Nf;
assign NormShift = (`NE+2)'(`NF);
// if the shift amount is negitive then dont shift (keep sticky bit)
assign DivShiftAmt = (DivResDenorm ? DivDenormShift[$clog2(`NORMSHIFTSZ)-1:0]&{$clog2(`NORMSHIFTSZ){~DivDenormShift[`NE+1]}} : NormShift[$clog2(`NORMSHIFTSZ)-1:0])+{{$clog2(`NORMSHIFTSZ)-$clog2(`DIVLEN/2+3)-1{1'b0}}, EarlyTermShiftDiv2M&{$clog2(`DIVLEN/2+3){~DivDenormShift[`NE+1]}}, 1'b0};

View File

@ -0,0 +1,68 @@
///////////////////////////////////////////
// srt.sv
//
// Written: David_Harris@hmc.edu, me@KatherineParry.com, Cedar Turek
// Modified:13 January 2022
//
// Purpose: Combined Divide and Square Root Floating Point and Integer Unit
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module divsqrt(
input logic clk,
input logic reset,
input logic [`FMTBITS-1:0] FmtE,
input logic [`NF:0] XManE, YManE,
input logic [`NE-1:0] XExpE, YExpE,
input logic XInfE, YInfE,
input logic XZeroE, YZeroE,
input logic XNaNE, YNaNE,
input logic DivStartE,
input logic StallM,
input logic StallE,
output logic DivStickyM,
output logic DivNegStickyM,
output logic DivBusy,
output logic DivDone,
output logic [`NE+1:0] DivCalcExpM,
output logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2M,
output logic [`DIVLEN+2:0] QuotM
// output logic [`XLEN-1:0] RemM,
);
logic [`DIVLEN+3:0] WSN, WCN;
logic [`DIVLEN+3:0] WS, WC;
logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt;
logic [`DIVLEN-1:0] X;
logic [`DIVLEN-1:0] Dpreproc;
logic [$clog2(`DIVLEN/2+3)-1:0] Dur;
srtpreproc srtpreproc(.XManE, .Dur, .YManE,.X,.Dpreproc, .XZeroCnt, .YZeroCnt);
srtfsm srtfsm(.reset, .WSN, .WCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivStickyE(DivStickyM), .XNaNE, .YNaNE,
.XInfE, .YInfE, .DivNegStickyE(DivNegStickyM), .EarlyTermShiftDiv2E(EarlyTermShiftDiv2M));
srtradix4 srtradix4(.clk, .FmtE, .X,.Dpreproc, .XZeroCnt, .YZeroCnt, .WS, .WC, .WSN, .WCN, .DivStart(DivStartE), .XExpE, .YExpE, .XZeroE, .YZeroE,
.DivBusy, .Quot(QuotM), .Rem(), .DivCalcExpM);
endmodule

View File

@ -1,82 +0,0 @@
// Exception logic for the floating point adder. Note: We may
// actually want to move to where the result is computed.
module exception_div (
input logic [63:0] A, // 1st input operand (op1)
input logic [63:0] B, // 2nd input operand (op2)
input logic op_type, // Determine operation
output logic [2:0] Ztype, // Indicates type of result (Z)
output logic Invalid // Invalid operation exception
);
logic AzeroM; // '1' if the mantissa of A is zero
logic BzeroM; // '1' if the mantissa of B is zero
logic AzeroE; // '1' if the exponent of A is zero
logic BzeroE; // '1' if the exponent of B is zero
logic AonesE; // '1' if the exponent of A is all ones
logic BonesE; // '1' if the exponent of B is all ones
logic AInf; // '1' if A is infinite
logic BInf; // '1' if B is infinite
logic AZero; // '1' if A is 0
logic BZero; // '1' if B is 0
logic ANaN; // '1' if A is a not-a-number
logic BNaN; // '1' if B is a not-a-number
logic ASNaN; // '1' if A is a signalling not-a-number
logic BSNaN; // '1' if B is a signalling not-a-number
logic ZSNaN; // '1' if result Z is a quiet NaN
logic ZInf; // '1' if result Z is an infnity
logic Zero; // '1' if result is zero
logic NegSqrt; // '1' if sqrt and operand is negative
//***take this module out and add more registers or just recalculate it all
// Determine if mantissas are all zeros
assign AzeroM = (A[51:0] == 52'h0);
assign BzeroM = (B[51:0] == 52'h0);
// Determine if exponents are all ones or all zeros
assign AonesE = A[62]&A[61]&A[60]&A[59]&A[58]&A[57]&A[56]&A[55]&A[54]&A[53]&A[52];
assign BonesE = B[62]&B[61]&B[60]&B[59]&B[58]&B[57]&B[56]&B[55]&B[54]&B[53]&B[52];
assign AzeroE = ~(A[62]|A[61]|A[60]|A[59]|A[58]|A[57]|A[56]|A[55]|A[54]|A[53]|A[52]);
assign BzeroE = ~(B[62]|B[61]|B[60]|B[59]|B[58]|B[57]|B[56]|B[55]|B[54]|B[53]|B[52]);
// Determine special cases. Note: Zero is not really a special case.
assign AInf = AonesE & AzeroM;
assign BInf = BonesE & BzeroM;
assign ANaN = AonesE & ~AzeroM;
assign BNaN = BonesE & ~BzeroM;
assign ASNaN = ANaN & A[50];
assign BSNaN = ANaN & A[50];
assign AZero = AzeroE & AzeroM;
assign BZero = BzeroE & BzeroE;
// Is NaN if operand is negative and its a sqrt
assign NegSqrt = (A[63] & op_type & ~AZero);
// An "Invalid Operation" exception occurs if (A or B is a signalling NaN)
// or (A and B are both Infinite)
assign Invalid = ASNaN | BSNaN | (((AInf & BInf) | (AZero & BZero))&~op_type) |
NegSqrt;
// The result is a quiet NaN if (an "Invalid Operation" exception occurs)
// or (A is a NaN) or (B is a NaN).
assign ZSNaN = Invalid | ANaN | BNaN;
// The result is zero
assign Zero = (AZero | BInf)&~op_type | AZero&op_type;
// The result is +Inf if ((A is Inf) or (B is 0)) and (the
// result is not a quiet NaN).
assign ZInf = (AInf | BZero)&~ZSNaN&~op_type | AInf&op_type&~ZSNaN;
// Set the type of the result as follows:
// Ztype Result
// 000 Normal
// 010 Infinity
// 011 Zero
// 110 Div by 0
// 111 SNaN
assign Ztype[2] = (ZSNaN);
assign Ztype[1] = (ZSNaN) | (Zero) | (ZInf);
assign Ztype[0] = (ZSNaN) | (Zero);
endmodule // exception

View File

@ -1,4 +1,31 @@
///////////////////////////////////////////
//
// Written: me@KatherineParry.com
// Modified: 7/5/2022
//
// Purpose: classify unit
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module fclassify (

View File

@ -1,4 +1,32 @@
///////////////////////////////////////////
//
// Written: me@KatherineParry.com
// Modified: 7/5/2022
//
// Purpose: Comparison unit
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
// FOpCtrlE values

View File

@ -1,3 +1,31 @@
///////////////////////////////////////////
//
// Written: me@KatherineParry.com
// Modified: 7/5/2022
//
// Purpose: control unit
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module fctrl (

View File

@ -1,4 +1,33 @@
///////////////////////////////////////////
//
// Written: me@KatherineParry.com
// Modified: 7/5/2022
//
// Purpose: Floating point conversions of configurable size
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module fcvt (

View File

@ -1,12 +1,40 @@
///////////////////////////////////////////
//
// Written: me@KatherineParry.com
// Modified: 7/5/2022
//
// Purpose: Post-Processing flag calculation
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module flags(
input logic XSgnM,
input logic Xs,
input logic XSNaNM, YSNaNM, ZSNaNM, // inputs are signaling NaNs
input logic XInfM, YInfM, ZInfM, // inputs are infinity
input logic Plus1,
input logic InfIn, // is a Inf input being used
input logic XZeroM, YZeroM, // inputs are zero
input logic XZero, YZero, // inputs are zero
input logic XNaNM, YNaNM, // inputs are NaN
input logic NaNIn, // is a NaN input being used
input logic Sqrt, // Sqrt?
@ -108,7 +136,7 @@ module flags(
// if the res is too small to be represented and not 0
// | and if the res is not invalid (outside the integer bounds)
// | |
assign IntInexact = ((CvtCalcExpM[`NE]&~XZeroM)|Sticky|Round)&~IntInvalid;
assign IntInexact = ((CvtCalcExpM[`NE]&~XZero)|Sticky|Round)&~IntInvalid;
// select the inexact flag to output
assign Inexact = ToInt ? IntInexact : FpInexact;
@ -125,18 +153,18 @@ module flags(
// | | | | or the res rounds up out of bounds
// | | | | and the res didn't underflow
// | | | | |
assign IntInvalid = XNaNM|XInfM|(ShiftGtIntSz&~FullResExp[`NE+1])|((XSgnM&~Signed)&(~((CvtCalcExpM[`NE]|(~|CvtCalcExpM))&~Plus1)))|(NegResMSBS[1]^NegResMSBS[0]);
assign IntInvalid = XNaNM|XInfM|(ShiftGtIntSz&~FullResExp[`NE+1])|((Xs&~Signed)&(~((CvtCalcExpM[`NE]|(~|CvtCalcExpM))&~Plus1)))|(NegResMSBS[1]^NegResMSBS[0]);
// |
// or when the positive res rounds up out of range
assign SigNaN = (XSNaNM&~(IntToFp&CvtOp)) | (YSNaNM&~CvtOp) | (ZSNaNM&FmaOp);
assign FmaInvalid = ((XInfM | YInfM) & ZInfM & (PSgnM ^ ZSgnEffM) & ~XNaNM & ~YNaNM) | (XZeroM & YInfM) | (YZeroM & XInfM);
assign DivInvalid = ((XInfM & YInfM) | (XZeroM & YZeroM))&~Sqrt | (XSgnM&Sqrt);
assign FmaInvalid = ((XInfM | YInfM) & ZInfM & (PSgnM ^ ZSgnEffM) & ~XNaNM & ~YNaNM) | (XZero & YInfM) | (YZero & XInfM);
assign DivInvalid = ((XInfM & YInfM) | (XZero & YZero))&~Sqrt | (Xs&Sqrt);
assign Invalid = SigNaN | (FmaInvalid&FmaOp) | (DivInvalid&DivOp);
// if dividing by zero and not 0/0
// - don't set flag if an input is NaN or Inf(IEEE says has to be a finite numerator)
assign DivByZero = YZeroM&DivOp&~(XZeroM|NaNIn|InfIn);
assign DivByZero = YZero&DivOp&~(XZero|NaNIn|InfIn);
// Combine flags
// - to integer results do not set the underflow or overflow flags

View File

@ -1,6 +1,6 @@
///////////////////////////////////////////
//
// Written: Katherine Parry, David Harris
// Written: me@KatherineParry.com, David Harris
// Modified: 6/23/2021
//
// Purpose: Floating point multiply-accumulate of configurable size
@ -33,23 +33,23 @@ module fma(
input logic Xs, Ys, Zs, // input's signs
input logic [`NE-1:0] Xe, Ye, Ze, // input's biased exponents in B(NE.0) format
input logic [`NF:0] Xm, Ym, Zm, // input's significands in U(0.NF) format
input logic XZeroE, YZeroE, ZZeroE, // is the input zero
input logic [2:0] FOpCtrlE, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y)
input logic [`FMTBITS-1:0] FmtE, // precision 1 = double 0 = single
input logic XZero, YZero, ZZero, // is the input zero
input logic [2:0] FOpCtrl, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y)
input logic [`FMTBITS-1:0] Fmt, // format of the result single double half or quad
output logic [`NE+1:0] Pe, // the product's exponent B(NE+2.0) format; adds 2 bits to allow for size of number and negative sign
output logic AddendStickyE, // sticky bit that is calculated during alignment
output logic KillProdE, // set the product to zero before addition if the product is too small to matter
output logic [3*`NF+5:0] Sm, // the positive sum
output logic NegSumE, // was the sum negitive
output logic InvA, // intert Z
output logic ZSgnEffE, // the modified Z sign
output logic ZmSticky, // sticky bit that is calculated during alignment
output logic KillProd, // set the product to zero before addition if the product is too small to matter
output logic [3*`NF+5:0] Sm, // the positive sum's significand
output logic NegSum, // was the sum negitive
output logic InvA, // Was A inverted for effective subtraction (P-A or -P+A)
output logic As, // the aligned addend's sign (modified Z sign for other opperations)
output logic Ps, // the product's sign
output logic [$clog2(3*`NF+7)-1:0] FmaNormCntE // normalization shift cnt
output logic [$clog2(3*`NF+7)-1:0] NCnt // normalization shift count
);
logic [2*`NF+1:0] Pm; // the product's significand in U(2.2Nf) format
logic [3*`NF+5:0] Am; // Z aligned for addition in U(NF+5.2NF+1)
logic [3*`NF+6:0] AmInv; // aligned addend possibly inverted
logic [3*`NF+5:0] Am; // addend aligned's mantissa for addition in U(NF+5.2NF+1)
logic [3*`NF+6:0] AmInv; // aligned addend's mantissa possibly inverted
logic [2*`NF+1:0] PmKilled; // the product's mantissa possibly killed
logic [3*`NF+6:0] PreSum, NegPreSum; // positive and negitve versions of the sum
///////////////////////////////////////////////////////////////////////////////
@ -62,7 +62,7 @@ module fma(
// calculate the product's exponent
expadd expadd(.FmtE, .Xe, .Ye, .XZeroE, .YZeroE, .Pe);
expadd expadd(.Fmt, .Xe, .Ye, .XZero, .YZero, .Pe);
// multiplication of the mantissa's
mult mult(.Xm, .Ym, .Pm);
@ -71,31 +71,31 @@ module fma(
// Alignment shifter
///////////////////////////////////////////////////////////////////////////////
align align(.Ze, .Zm, .XZeroE, .YZeroE, .ZZeroE, .Xe, .Ye,
.Am, .AddendStickyE, .KillProdE);
align align(.Ze, .Zm, .XZero, .YZero, .ZZero, .Xe, .Ye,
.Am, .ZmSticky, .KillProd);
// calculate the signs and take the opperation into account
sign sign(.FOpCtrlE, .Xs, .Ys, .Zs, .Ps, .ZSgnEffE);
sign sign(.FOpCtrl, .Xs, .Ys, .Zs, .Ps, .As);
// ///////////////////////////////////////////////////////////////////////////////
// // Addition/LZA
// ///////////////////////////////////////////////////////////////////////////////
add add(.Am, .Pm, .Ps, .ZSgnEffE, .KillProdE, .AmInv, .PmKilled, .NegSumE, .PreSum, .NegPreSum, .InvA, .XZeroE, .YZeroE, .Sm);
add add(.Am, .Pm, .Ps, .As, .KillProd, .AmInv, .PmKilled, .NegSum, .PreSum, .NegPreSum, .InvA, .XZero, .YZero, .Sm);
loa loa(.A(AmInv+{(3*`NF+6)'(0),InvA}), .P(PmKilled), .FmaNormCntE);
loa loa(.A(AmInv+{(3*`NF+6)'(0),InvA}), .P(PmKilled), .NCnt);
endmodule
module expadd(
input logic [`FMTBITS-1:0] FmtE, // precision
input logic [`NE-1:0] Xe, Ye, // input exponents
input logic XZeroE, YZeroE, // are the inputs zero
input logic [`FMTBITS-1:0] Fmt, // format of the output: single double half quad
input logic [`NE-1:0] Xe, Ye, // input's exponents
input logic XZero, YZero, // are the inputs zero
output logic [`NE+1:0] Pe // product's exponent B^(1023)NE+2
);
// kill the exponent if the product is zero - either X or Y is 0
assign Pe = ({2'b0, Xe} + {2'b0, Ye} - {2'b0, (`NE)'(`BIAS)})&{`NE+2{~(XZeroE|YZeroE)}};
assign Pe = ({2'b0, Xe} + {2'b0, Ye} - {2'b0, (`NE)'(`BIAS)})&{`NE+2{~(XZero|YZero)}};
endmodule
@ -118,19 +118,19 @@ endmodule
module sign(
input logic [2:0] FOpCtrlE, // precision
input logic Xs, Ys, Zs, // are the inputs denormalized
input logic [2:0] FOpCtrl, // opperation contol
input logic Xs, Ys, Zs, // sign of the inputs
output logic Ps, // the product's sign - takes opperation into account
output logic ZSgnEffE // Z sign used in fma - takes opperation into account
output logic As // aligned addend sign used in fma - takes opperation into account
);
// Calculate the product's sign
// Negate product's sign if FNMADD or FNMSUB
// flip is negation opperation
assign Ps = Xs ^ Ys ^ (FOpCtrlE[1]&~FOpCtrlE[2]);
assign Ps = Xs ^ Ys ^ (FOpCtrl[1]&~FOpCtrl[2]);
// flip if subtraction
assign ZSgnEffE = Zs^FOpCtrlE[0];
assign As = Zs^FOpCtrl[0];
endmodule
@ -143,16 +143,16 @@ endmodule
module align(
input logic [`NE-1:0] Xe, Ye, Ze, // biased exponents in B(NE.0) format
input logic [`NF:0] Zm, // fractions in U(0.NF) format]
input logic XZeroE, YZeroE, ZZeroE, // is the input zero
output logic [3*`NF+5:0] Am, // Z aligned for addition in U(NF+5.2NF+1)
output logic AddendStickyE, // Sticky bit calculated from the aliged addend
output logic KillProdE // should the product be set to zero
input logic [`NF:0] Zm, // significand in U(0.NF) format]
input logic XZero, YZero, ZZero, // is the input zero
output logic [3*`NF+5:0] Am, // addend aligned for addition in U(NF+5.2NF+1)
output logic ZmSticky, // Sticky bit calculated from the aliged addend
output logic KillProd // should the product be set to zero
);
logic [`NE+1:0] AlignCnt; // how far to shift the addend to align with the product in Q(NE+2.0) format
logic [4*`NF+5:0] ZManShifted; // output of the alignment shifter including sticky bits U(NF+5.3NF+1)
logic [4*`NF+5:0] ZManPreShifted; // input to the alignment shifter U(NF+5.3NF+1)
logic [`NE+1:0] ACnt; // how far to shift the addend to align with the product in Q(NE+2.0) format
logic [4*`NF+5:0] ZmShifted; // output of the alignment shifter including sticky bits U(NF+5.3NF+1)
logic [4*`NF+5:0] ZmPreshifted; // input to the alignment shifter U(NF+5.3NF+1)
logic KillZ;
///////////////////////////////////////////////////////////////////////////////
@ -162,18 +162,18 @@ module align(
// determine the shift count for alignment
// - negitive means Z is larger, so shift Z left
// - positive means the product is larger, so shift Z right
// This could have been done using Pe, but AlignCnt is on the critical path so we replicate logic for speed
assign AlignCnt = {2'b0, Xe} + {2'b0, Ye} - {2'b0, (`NE)'(`BIAS)} + (`NE+2)'(`NF+3) - {2'b0, Ze};
// This could have been done using Pe, but ACnt is on the critical path so we replicate logic for speed
assign ACnt = {2'b0, Xe} + {2'b0, Ye} - {2'b0, (`NE)'(`BIAS)} + (`NE+2)'(`NF+3) - {2'b0, Ze};
// Defualt Addition without shifting
// | 54'b0 | 106'b(product) | 2'b0 |
// | addnend |
// the 1'b0 before the added is because the product's mantissa has two bits before the binary point (xx.xxxxxxxxxx...)
assign ZManPreShifted = {Zm,(3*`NF+5)'(0)};
assign ZmPreshifted = {Zm,(3*`NF+5)'(0)};
assign KillProdE = AlignCnt[`NE+1]|XZeroE|YZeroE;
assign KillZ = $signed(AlignCnt)>$signed((`NE+2)'(3)*(`NE+2)'(`NF)+(`NE+2)'(5));
assign KillProd = ACnt[`NE+1]|XZero|YZero;
assign KillZ = $signed(ACnt)>$signed((`NE+2)'(3)*(`NE+2)'(`NF)+(`NE+2)'(5));
always_comb
begin
@ -182,9 +182,9 @@ module align(
// | 54'b0 | 106'b(product) | 2'b0 |
// | addnend |
if (KillProdE) begin
ZManShifted = ZManPreShifted;
AddendStickyE = ~(XZeroE|YZeroE);
if (KillProd) begin
ZmShifted = ZmPreshifted;
ZmSticky = ~(XZero|YZero);
// If the addend is too small to effect the addition
// - The addend has to shift two past the end of the addend to be considered too small
@ -193,20 +193,20 @@ module align(
// | 54'b0 | 106'b(product) | 2'b0 |
// | addnend |
end else if (KillZ) begin
ZManShifted = 0;
AddendStickyE = ~ZZeroE;
ZmShifted = 0;
ZmSticky = ~ZZero;
// If the Addend is shifted right
// | 54'b0 | 106'b(product) | 2'b0 |
// | addnend |
end else begin
ZManShifted = ZManPreShifted >> AlignCnt;
AddendStickyE = |(ZManShifted[`NF-1:0]);
ZmShifted = ZmPreshifted >> ACnt;
ZmSticky = |(ZmShifted[`NF-1:0]);
end
end
assign Am = ZManShifted[4*`NF+5:`NF];
assign Am = ZmShifted[4*`NF+5:`NF];
endmodule
@ -217,15 +217,15 @@ endmodule
module add(
input logic [3*`NF+5:0] Am, // Z aligned for addition in U(NF+5.2NF+1)
input logic [3*`NF+5:0] Am, // aligned addend's mantissa for addition in U(NF+5.2NF+1)
input logic [2*`NF+1:0] Pm, // the product's mantissa
input logic Ps, ZSgnEffE,// the product and modified Z signs
input logic KillProdE, // should the product be set to 0
input logic XZeroE, YZeroE, // is the input zero
input logic Ps, As,// the product sign and the alligend addeded's sign (Modified Z sign for other opperations)
input logic KillProd, // should the product be set to 0
input logic XZero, YZero, // is the input zero
output logic [3*`NF+6:0] AmInv, // aligned addend possibly inverted
output logic [2*`NF+1:0] PmKilled, // the product's mantissa possibly killed
output logic NegSumE, // was the sum negitive
output logic InvA, // do you invert Z
output logic NegSum, // was the sum negitive
output logic InvA, // do you invert the aligned addend
output logic [3*`NF+5:0] Sm, // the positive sum
output logic [3*`NF+6:0] PreSum, NegPreSum// possibly negitive sum
);
@ -237,12 +237,12 @@ module add(
// Negate Z when doing one of the following opperations:
// -prod + Z
// prod - Z
assign InvA = ZSgnEffE ^ Ps;
assign InvA = As ^ Ps;
// Choose an inverted or non-inverted addend - the one has to be added now for the LZA
assign AmInv = InvA ? {1'b1, ~Am} : {1'b0, Am};
// Kill the product if the product is too small to effect the addition (determined in fma1.sv)
assign PmKilled = Pm&{2*`NF+2{~KillProdE}};
assign PmKilled = Pm&{2*`NF+2{~KillProd}};
@ -252,17 +252,17 @@ module add(
assign NegPreSum = {1'b0, Am} + {{`NF+3{1'b1}}, ~PmKilled, 2'b0} + {(3*`NF+7)'(4)};
// Is the sum negitive
assign NegSumE = PreSum[3*`NF+6];
assign NegSum = PreSum[3*`NF+6];
// Choose the positive sum and accompanying LZA result.
assign Sm = NegSumE ? NegPreSum[3*`NF+5:0] : PreSum[3*`NF+5:0];
assign Sm = NegSum ? NegPreSum[3*`NF+5:0] : PreSum[3*`NF+5:0];
endmodule
module loa( // [Schmookler & Nowka, Leading zero anticipation and detection, IEEE Sym. Computer Arithmetic, 2001]
input logic [3*`NF+6:0] A, // addend
input logic [2*`NF+1:0] P, // product
output logic [$clog2(3*`NF+7)-1:0] FmaNormCntE // normalization shift count for the positive result
output logic [$clog2(3*`NF+7)-1:0] NCnt // normalization shift count for the positive result
);
logic [3*`NF+6:0] T;
@ -290,6 +290,6 @@ module loa( // [Schmookler & Nowka, Leading zero anticipation and detection, IEE
lzc #(3*`NF+7) lzc (.num(f), .ZeroCnt(FmaNormCntE));
lzc #(3*`NF+7) lzc (.num(f), .ZeroCnt(NCnt));
endmodule

View File

@ -1,11 +1,39 @@
///////////////////////////////////////////
//
// Written: me@KatherineParry.com
// Modified: 7/5/2022
//
// Purpose: Fma shift calculation
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module fmashiftcalc(
input logic [3*`NF+5:0] SumM, // the positive sum
input logic [`NE-1:0] ZExpM, // exponent of Z
input logic [`NE-1:0] Ze, // exponent of Z
input logic [`NE+1:0] ProdExpM, // X exponent + Y exponent - bias
input logic [$clog2(3*`NF+7)-1:0] FmaNormCntM, // normalization shift count
input logic [`FMTBITS-1:0] FmtM, // precision 1 = double 0 = single
input logic [`FMTBITS-1:0] Fmt, // precision 1 = double 0 = single
input logic KillProdM, // is the product set to zero
input logic ZDenormM,
output logic [`NE+1:0] ConvNormSumExp, // exponent of the normalized sum not taking into account denormal or zero results
@ -25,18 +53,18 @@ module fmashiftcalc(
assign SumZero = ~(|SumM);
// calculate the sum's exponent
assign NormSumExp = KillProdM ? {2'b0, ZExpM[`NE-1:1], ZExpM[0]&~ZDenormM} : ProdExpM + -{{`NE+2-$unsigned($clog2(3*`NF+7)){1'b0}}, FmaNormCntM} - 1 + (`NE+2)'(`NF+4);
assign NormSumExp = KillProdM ? {2'b0, Ze[`NE-1:1], Ze[0]&~ZDenormM} : ProdExpM + -{{`NE+2-$unsigned($clog2(3*`NF+7)){1'b0}}, FmaNormCntM} - 1 + (`NE+2)'(`NF+4);
//convert the sum's exponent into the proper percision
if (`FPSIZES == 1) begin
assign ConvNormSumExp = NormSumExp;
end else if (`FPSIZES == 2) begin
assign ConvNormSumExp = FmtM ? NormSumExp : (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}};
assign ConvNormSumExp = Fmt ? NormSumExp : (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}};
end else if (`FPSIZES == 3) begin
always_comb begin
case (FmtM)
case (Fmt)
`FMT: ConvNormSumExp = NormSumExp;
`FMT1: ConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}};
`FMT2: ConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS2))&{`NE+2{|NormSumExp}};
@ -46,7 +74,7 @@ module fmashiftcalc(
end else if (`FPSIZES == 4) begin
always_comb begin
case (FmtM)
case (Fmt)
2'h3: ConvNormSumExp = NormSumExp;
2'h1: ConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`D_BIAS))&{`NE+2{|NormSumExp}};
2'h0: ConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`S_BIAS))&{`NE+2{|NormSumExp}};
@ -70,7 +98,7 @@ module fmashiftcalc(
assign Sum0GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2));
assign Sum1LEZ = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1));
assign Sum1GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF1+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1)) | ~|NormSumExp;
assign PreResultDenorm = (FmtM ? Sum0LEZ : Sum1LEZ) & (FmtM ? Sum0GEFL : Sum1GEFL) & ~SumZero;
assign PreResultDenorm = (Fmt ? Sum0LEZ : Sum1LEZ) & (Fmt ? Sum0GEFL : Sum1GEFL) & ~SumZero;
end else if (`FPSIZES == 3) begin
logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL;
@ -81,7 +109,7 @@ module fmashiftcalc(
assign Sum2LEZ = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS2));
assign Sum2GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF2+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS2)) | ~|NormSumExp;
always_comb begin
case (FmtM)
case (Fmt)
`FMT: PreResultDenorm = Sum0LEZ & Sum0GEFL & ~SumZero;
`FMT1: PreResultDenorm = Sum1LEZ & Sum1GEFL & ~SumZero;
`FMT2: PreResultDenorm = Sum2LEZ & Sum2GEFL & ~SumZero;
@ -100,7 +128,7 @@ module fmashiftcalc(
assign Sum3LEZ = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`H_BIAS));
assign Sum3GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`H_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`H_BIAS)) | ~|NormSumExp;
always_comb begin
case (FmtM)
case (Fmt)
2'h3: PreResultDenorm = Sum0LEZ & Sum0GEFL & ~SumZero;
2'h1: PreResultDenorm = Sum1LEZ & Sum1GEFL & ~SumZero;
2'h0: PreResultDenorm = Sum2LEZ & Sum2GEFL & ~SumZero;

View File

@ -1,132 +0,0 @@
///////////////////////////////////////////
//
// Written: James Stine
// Modified: 8/1/2018
//
// Purpose: Floating point divider/square root top unit (Goldschmidt)
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
// `timescale 1ps/1ps
module fpdiv (
input logic clk,
input logic reset,
input logic start,
input logic [63:0] op1,
input logic [63:0] op2,
input logic [1:0] rm,
input logic op_type,
input logic P,
input logic OvEn,
input logic UnEn,
input logic XNaNQ,
input logic YNaNQ,
input logic XZeroQ,
input logic YZeroQ,
input logic XInfQ,
input logic YInfQ,
output logic done,
output logic FDivBusyE,
output logic [63:0] AS_Result,
output logic [4:0] Flags);
logic [63:0] Float1;
logic [63:0] Float2;
logic [12:0] exp1, exp2, expF;
logic [12:0] exp_diff, bias;
logic [13:0] exp_sqrt;
logic [63:0] Result;
logic [52:0] mantissaA;
logic [52:0] mantissaB;
logic [2:0] sel_inv;
logic Invalid;
logic [4:0] FlagsIn;
logic signResult;
logic [59:0] q1, qm1, qp1, q0, qm0, qp0;
logic [59:0] rega_out, regb_out, regc_out, regd_out;
logic [119:0] regr_out;
logic [2:0] sel_muxa, sel_muxb;
logic sel_muxr;
logic load_rega, load_regb, load_regc, load_regd, load_regr;
logic load_regs;
logic exp_cout1, exp_cout2;
logic exp_odd, open;
// op_type : fdiv=0, fsqrt=1
assign Float1 = op1;
assign Float2 = op_type ? op1 : op2;
// Exception detection
exception_div exc1 (.A(Float1), .B(Float2), .op_type, .Ztype(sel_inv), .Invalid);
// Determine Sign/Mantissa
assign signResult = (Float1[63]^Float2[63]);
assign mantissaA = {1'b1, Float1[51:0]};
assign mantissaB = {1'b1, Float2[51:0]};
// Perform Exponent Subtraction - expA - expB + Bias
assign exp1 = {2'b0, Float1[62:52]};
assign exp2 = {2'b0, Float2[62:52]};
assign bias = {3'h0, 10'h3FF};
// Divide exponent
assign {exp_cout1, open, exp_diff} = {2'b0, exp1} - {2'b0, exp2} + {2'b0, bias};
// Sqrt exponent (check if exponent is odd)
assign exp_odd = Float1[52] ? 1'b0 : 1'b1;
assign {exp_cout2, exp_sqrt} = {1'b0, exp1} + {4'h0, 10'h3ff} + {13'b0, exp_odd};
// Choose correct exponent
assign expF = op_type ? exp_sqrt[13:1] : exp_diff;
// Main Goldschmidt/Division Routine
divconv goldy (.q1, .qm1, .qp1, .q0, .qm0, .qp0, .rega_out, .regb_out, .regc_out, .regd_out,
.regr_out, .d(mantissaB), .n(mantissaA), .sel_muxa, .sel_muxb, .sel_muxr,
.reset, .clk, .load_rega, .load_regb, .load_regc, .load_regd,
.load_regr, .load_regs, .P, .op_type, .exp_odd);
// FSM : control divider
fsm_fpdiv control (.clk, .reset, .start, .op_type,
.done, .load_rega, .load_regb, .load_regc, .load_regd,
.load_regr, .load_regs, .sel_muxa, .sel_muxb, .sel_muxr,
.divBusy(FDivBusyE));
// Round the mantissa to a 52-bit value, with the leading one
// removed. The rounding units also handles special cases and
// set the exception flags.
rounder_div round1 (.rm, .P, .OvEn, .UnEn, .exp_diff(expF),
.sel_inv, .Invalid, .SignR(signResult),
.Float1(op1), .Float2(op2),
.XNaNQ, .YNaNQ, .XZeroQ, .YZeroQ,
.XInfQ, .YInfQ, .op_type,
.q1, .qm1, .qp1, .q0, .qm0, .qp0, .regr_out,
.Result, .Flags(FlagsIn));
// Store the final result and the exception flags in registers.
flopenr #(64) rega (clk, reset, done, Result, AS_Result);
flopenr #(5) regc (clk, reset, done, FlagsIn, Flags);
endmodule // fpadd

View File

@ -1,170 +0,0 @@
///////////////////////////////////////////
//
// Written: James Stine
// Modified: 8/1/2018
//
// Purpose: Floating point divider/square root top unit pipelined version (Goldschmidt)
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
module fpdiv_pipe (
input logic clk,
input logic reset,
input logic start,
input logic [63:0] op1,
input logic [63:0] op2,
input logic [1:0] rm,
input logic op_type,
input logic P,
input logic OvEn,
input logic UnEn,
input logic XNaNQ,
input logic YNaNQ,
input logic XZeroQ,
input logic YZeroQ,
input logic XInfQ,
input logic YInfQ,
output logic done,
output logic FDivBusyE,
output logic load_preload,
output logic [63:0] AS_Result,
output logic [4:0] Flags);
supply1 vdd;
supply0 vss;
logic [63:0] Float1;
logic [63:0] Float2;
logic [63:0] IntValue;
logic [12:0] exp1, exp2, expF;
logic [14:0] exp_pre_diff;
logic [12:0] exp_diff, bias;
logic [13:0] exp_sqrt;
logic [63:0] Result;
logic [52:0] mantissaA;
logic [52:0] mantissaB;
logic [2:0] sel_inv;
logic Invalid;
logic [4:0] FlagsIn;
logic exp_gt63;
logic Sticky_out;
logic signResult, sign_corr;
logic corr_sign;
logic zeroB;
logic convert;
logic swap;
logic sub;
logic [59:0] q1, qm1, qp1, q0, qm0, qp0;
logic [59:0] rega_out, regb_out, regc_out, regd_out;
logic [119:0] regr_out;
logic [2:0] sel_muxa, sel_muxb;
logic sel_muxr;
logic load_rega, load_regb, load_regc, load_regd, load_regr;
logic load_regp, load_regs;
logic exp_odd, exp_odd1;
logic start1;
logic P1;
logic op_type1;
logic [12:0] expF1;
logic [52:0] mantissaA1;
logic [52:0] mantissaB1;
logic [2:0] sel_inv1;
logic signResult1;
logic Invalid1;
// op_type : fdiv=0, fsqrt=1
assign Float1 = op1;
assign Float2 = op_type ? op1 : op2;
// Exception detection
exception_div exc1 (.A(Float1), .B(Float2), .op_type, .Ztype(sel_inv), .Invalid);
// Determine Sign/Mantissa
assign signResult = ((Float1[63]^Float2[63])&~op_type);
assign mantissaA = {vdd, Float1[51:0]};
assign mantissaB = {vdd, Float2[51:0]};
// Perform Exponent Subtraction - expA - expB + Bias
assign exp1 = {2'b0, Float1[62:52]};
assign exp2 = {2'b0, Float2[62:52]};
// bias : DP = 2^{11-1}-1 = 1023
assign bias = {3'h0, 10'h3FF};
// Divide exponent
assign exp_pre_diff = {2'b0, exp1} - {2'b0, exp2} + {2'b0, bias};
assign exp_diff = exp_pre_diff[12:0];
// Sqrt exponent (check if exponent is odd)
assign exp_odd = Float1[52] ? 1'b0 : 1'b1;
assign exp_sqrt = {1'b0, exp1} + {4'h0, 10'h3ff} + {13'b0, exp_odd};
// Choose correct exponent
assign expF = op_type ? exp_sqrt[13:1] : exp_diff;
flopenr #(1) rega (clk, reset, 1'b1, exp_odd, exp_odd1);
flopenr #(1) regb (clk, reset, 1'b1, P, P1);
flopenr #(1) regc (clk, reset, 1'b1, op_type, op_type1);
flopenr #(13) regd (clk, reset, 1'b1, expF, expF1);
flopenr #(53) rege (clk, reset, 1'b1, mantissaA, mantissaA1);
flopenr #(53) regf (clk, reset, 1'b1, mantissaB, mantissaB1);
flopenr #(1) regg (clk, reset, 1'b1, start, start1);
flopenr #(3) regh (clk, reset, 1'b1, sel_inv, sel_inv1);
flopenr #(1) regj (clk, reset, 1'b1, signResult, signResult1);
flopenr #(1) regk (clk, reset, 1'b1, Invalid, Invalid1);
// Main Goldschmidt/Division Routine
divconv_pipe goldy (.q1, .qm1, .qp1, .q0, .qm0, .qp0,
.rega_out, .regb_out, .regc_out, .regd_out,
.regr_out, .d(mantissaB1), .n(mantissaA1),
.sel_muxa, .sel_muxb, .sel_muxr, .reset, .clk,
.load_rega, .load_regb, .load_regc, .load_regd,
.load_regr, .load_regs, .load_regp,
.P(P), .op_type(op_type1), .exp_odd(exp_odd1));
// FSM : control divider
fsm_fpdiv_pipe control (.clk, .reset, .start(start), .op_type(op_type1), .P(P),
.done, .load_rega, .load_regb, .load_regc, .load_regd,
.load_regr, .load_regs, .load_regp, .load_preload,
.sel_muxa, .sel_muxb, .sel_muxr, .divBusy(FDivBusyE));
// Round the mantissa to a 52-bit value, with the leading one
// removed. The rounding units also handles special cases and
// set the exception flags.
rounder_div round1 (.rm, .P(P), .OvEn(1'b0), .UnEn(1'b0), .exp_diff(expF1),
.sel_inv(sel_inv1), .Invalid(Invalid1), .SignR(signResult1),
.Float1(op1), .Float2(op2),
.XNaNQ, .YNaNQ, .XZeroQ, .YZeroQ,
.XInfQ, .YInfQ, .op_type(op_type1),
.q1, .qm1, .qp1, .q0, .qm0, .qp0, .regr_out,
.Result, .Flags(FlagsIn));
// Store the final result and the exception flags in registers.
flopenr #(64) regl (clk, reset, done, Result, AS_Result);
flopenr #(5) regn (clk, reset, done, FlagsIn, Flags);
endmodule // fpdiv_pipe

View File

@ -1,6 +1,6 @@
///////////////////////////////////////////
//
// Written: Katherine Parry, James Stine, Brett Mathis
// Written: me@KatherineParry.com, James Stine, Brett Mathis
// Modified: 6/23/2021
//
// Purpose: FPU
@ -125,11 +125,12 @@ module fpu (
logic [`CVTLEN-1:0] CvtLzcInE, CvtLzcInM; // input to the Leading Zero Counter (priority encoder)
//divide signals
logic [`DIVLEN+2:0] Quot;
logic [`NE+1:0] DivCalcExpM;
logic DivNegStickyM;
logic DivStickyM;
logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2M;
logic [`DIVLEN+2:0] QuotE, QuotM;
logic [`NE+1:0] DivCalcExpE, DivCalcExpM;
logic DivNegStickyE, DivNegStickyM;
logic DivStickyE, DivStickyM;
logic DivDoneM;
logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2E, EarlyTermShiftDiv2M;
// result and flag signals
logic [63:0] FDivResM, FDivResW; // divide/squareroot result
@ -185,9 +186,10 @@ module fpu (
flopenrc #(`FLEN) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E);
flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]},
{Adr1E, Adr2E, Adr3E});
flopenrc #(13+int'(`FMTBITS)) DECtrlReg3(clk, reset, FlushE, ~StallE,
{FRegWriteD, PostProcSelD, FResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD, FDivStartD},
{FRegWriteE, PostProcSelE, FResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE, FDivStartE});
flopenrc #(12+`FMTBITS) DECtrlReg3(clk, reset, FlushE, ~StallE,
{FRegWriteD, PostProcSelD, FResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD},
{FRegWriteE, PostProcSelE, FResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE});
flopenrc #(1) DEDivStartReg(clk, reset, FlushE, ~StallE|FDivBusyE, FDivStartD, FDivStartE);
// EXECUTION STAGE
@ -249,37 +251,45 @@ module fpu (
.XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE, .XExpMaxE);
// fma - does multiply, add, and multiply-add instructions
fma fma (.Xs(XSgnE), .Ys(YSgnE), .Zs(ZSgnE), .Xe(XExpE), .Ye(YExpE), .Ze(ZExpE),
.Xm(XManE), .Ym(YManE), .Zm(ZManE), .XZeroE, .YZeroE, .ZZeroE,
.FOpCtrlE, .FmtE, .Sm(SumE), .NegSumE, .InvA(InvAE), .FmaNormCntE,
.ZSgnEffE, .Ps(PSgnE), .Pe(ProdExpE), .AddendStickyE, .KillProdE);
// fpdivsqrt using Goldschmidt's iteration
if(`FLEN == 64) begin
flopenrc #(64) reg_input1 (.d({FSrcXE[63:0]}), .q(DivInput1E),
.clear(FDivSqrtDoneE), .en(load_preload),
.reset(reset), .clk(clk));
flopenrc #(64) reg_input2 (.d({FSrcYE[63:0]}), .q(DivInput2E),
.clear(FDivSqrtDoneE), .en(load_preload),
.reset(reset), .clk(clk));
end
else if (`FLEN == 32) begin
flopenrc #(64) reg_input1 (.d({32'b0, FSrcXE[31:0]}), .q(DivInput1E),
.clear(FDivSqrtDoneE), .en(load_preload),
.reset(reset), .clk(clk));
flopenrc #(64) reg_input2 (.d({32'b0, FSrcYE[31:0]}), .q(DivInput2E),
.clear(FDivSqrtDoneE), .en(load_preload),
.reset(reset), .clk(clk));
end
flopenrc #(8) reg_input3 (.d({XNaNE, YNaNE, XInfE, YInfE, XZeroE, YZeroE, FmtE[0], FOpCtrlE[0]}),
.q({XNaNQ, YNaNQ, XInfQ, YInfQ, XZeroQ, YZeroQ, FmtQ, FOpCtrlQ}),
.clear(FDivSqrtDoneE), .en(load_preload),
.reset(reset), .clk(clk));
fpdiv_pipe fdivsqrt (.op1(DivInput1E[63:0]), .op2(DivInput2E[63:0]), .rm(FrmE[1:0]), .op_type(FOpCtrlQ),
.reset, .clk(clk), .start(FDivStartE), .P(~FmtQ), .OvEn(1'b1), .UnEn(1'b1),
.XNaNQ, .YNaNQ, .XInfQ, .YInfQ, .XZeroQ, .YZeroQ, .load_preload,
.FDivBusyE, .done(FDivSqrtDoneE), .AS_Result(FDivResM), .Flags(FDivFlgM));
fma fma (.Xs(XSgnE), .Ys(YSgnE), .Zs(ZSgnE),
.Xe(XExpE), .Ye(YExpE), .Ze(ZExpE),
.Xm(XManE), .Ym(YManE), .Zm(ZManE),
.XZero(XZeroE), .YZero(YZeroE), .ZZero(ZZeroE),
.FOpCtrl(FOpCtrlE), .Fmt(FmtE),
.As(ZSgnEffE), .Ps(PSgnE),
.Sm(SumE), .Pe(ProdExpE),
.NegSum(NegSumE), .InvA(InvAE), .NCnt(FmaNormCntE),
.ZmSticky(AddendStickyE), .KillProd(KillProdE));
// // fpdivsqrt using Goldschmidt's iteration
// if(`FLEN == 64) begin
// flopenrc #(64) reg_input1 (.d({FSrcXE[63:0]}), .q(DivInput1E),
// .clear(FDivSqrtDoneE), .en(load_preload),
// .reset(reset), .clk(clk));
// flopenrc #(64) reg_input2 (.d({FSrcYE[63:0]}), .q(DivInput2E),
// .clear(FDivSqrtDoneE), .en(load_preload),
// .reset(reset), .clk(clk));
// end
// else if (`FLEN == 32) begin
// flopenrc #(64) reg_input1 (.d({32'b0, FSrcXE[31:0]}), .q(DivInput1E),
// .clear(FDivSqrtDoneE), .en(load_preload),
// .reset(reset), .clk(clk));
// flopenrc #(64) reg_input2 (.d({32'b0, FSrcYE[31:0]}), .q(DivInput2E),
// .clear(FDivSqrtDoneE), .en(load_preload),
// .reset(reset), .clk(clk));
// end
// flopenrc #(8) reg_input3 (.d({XNaNE, YNaNE, XInfE, YInfE, XZeroE, YZeroE, FmtE[0], FOpCtrlE[0]}),
// .q({XNaNQ, YNaNQ, XInfQ, YInfQ, XZeroQ, YZeroQ, FmtQ, FOpCtrlQ}),
// .clear(FDivSqrtDoneE), .en(load_preload),
// .reset(reset), .clk(clk));
// fpdiv_pipe fdivsqrt (.op1(DivInput1E[63:0]), .op2(DivInput2E[63:0]), .rm(FrmE[1:0]), .op_type(FOpCtrlQ),
// .reset, .clk(clk), .start(FDivStartE), .P(~FmtQ), .OvEn(1'b1), .UnEn(1'b1),
// .XNaNQ, .YNaNQ, .XInfQ, .YInfQ, .XZeroQ, .YZeroQ, .load_preload,
// .FDivBusyE, .done(FDivSqrtDoneE), .AS_Result(FDivResM), .Flags(FDivFlgM));
divsqrt divsqrt(.clk, .reset, .FmtE, .XManE, .YManE, .XExpE, .YExpE,
.XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .DivStartE(FDivStartE),
.StallE, .StallM, .DivStickyM, .DivNegStickyM, .DivBusy(FDivBusyE), .DivCalcExpM, //***change divbusyE to M signal
.EarlyTermShiftDiv2M, .QuotM, .DivDone(DivDoneM));
// other FP execution units
fcmp fcmp (.FmtE, .FOpCtrlE, .XSgnE, .YSgnE, .XExpE, .YExpE, .XManE, .YManE,
.XZeroE, .YZeroE, .XNaNE, .YNaNE, .XSNaNE, .YSNaNE, .FSrcXE, .FSrcYE, .CmpNVE, .CmpFpResE, .CmpIntResE);
@ -371,10 +381,10 @@ module fpu (
assign FpLoadStoreM = FResSelM[1];
postprocess postprocess(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .FrmM, .FmtM, .ProdExpM, .EarlyTermShiftDiv2M,
.AddendStickyM, .KillProdM, .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .Quot,
.ZInfM, .XNaNM, .YNaNM, .ZNaNM, .XSNaNM, .YSNaNM, .ZSNaNM, .SumM, .DivCalcExpM,
.NegSumM, .InvZM(InvAM), .ZDenormM, .ZSgnEffM, .PSgnM, .FOpCtrlM, .FmaNormCntM, .DivNegStickyM,
postprocess postprocess(.Xs(XSgnM), .Ys(YSgnM), .Ze(ZExpM), .Xm(XManM), .Ym(YManM), .Zm(ZManM), .Frm(FrmM), .Fmt(FmtM), .ProdExpM, .EarlyTermShiftDiv2M,
.AddendStickyM, .KillProdM, .XZero(XZeroM), .YZero(YZeroM), .ZZero(ZZeroM), .XInfM, .YInfM, .Quot(QuotM),
.ZInfM, .XNaNM, .YNaNM, .ZNaNM, .XSNaNM, .YSNaNM, .ZSNaNM, .SumM, .DivCalcExpM, .DivDone(DivDoneM),
.NegSumM, .InvZM(InvAM), .ZDenormM, .ZSgnEffM, .PSgnM, .FOpCtrl(FOpCtrlM), .FmaNormCntM, .DivNegStickyM,
.CvtCalcExpM, .CvtResDenormUfM,.CvtShiftAmtM, .CvtResSgnM, .FWriteIntM, .DivStickyM,
.CvtLzcInM, .IntZeroM, .PostProcSelM, .PostProcResM, .PostProcFlgM, .FCvtIntResM);

View File

@ -1,74 +0,0 @@
///////////////////////////////////////////
//
// Written: David Harris
// Modified: 11 September 2021
//
// Purpose: Recurrence-based SRT Division and Square Root
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module fpudivsqrtrecur (
input logic clk,
input logic reset,
input logic FlushM, // flush the memory stage
input logic StallM, // stall memory stage
input logic FDivSqrtStart, // start a computation
input logic FmtE, // precision 1 = double 0 = single
input logic FDivE, FSqrtE,
input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
input logic XSgnE, YSgnE, // input signs - execute stage
input logic [`NE-1:0] XExpE, YExpE, // input exponents - execute stage
input logic [`NF:0] XManE, YManE, // input mantissa - execute stage
input logic XDenormE, YDenormE, // is denorm
input logic XZeroE, YZeroE, // is zero - execute stage
input logic XNaNE, YNaNE, // is NaN
input logic XSNaNE, YSNaNE, // is signaling NaN
input logic XInfE, YInfE, ZInfE, // is infinity
input logic [10:0] BiasE, // bias (max exponent/2) ***parameterize in unpacking unit
output logic FDviSqrtBusy, FDivSqrtDone, //currently occpied, or done with operation
output logic [`FLEN-1:0] FDivSqrtResM, // result
output logic [4:0] FDivSqrtFlgM // flags
);
logic FDivSqrtResSgn;
logic [`FLEN-1:0] FDivSqrtRecurRes;
// Radix-2 SRT Division and Square Root
// Special Cases
// *** shift to handle denorms in hardware
assign FDivSqrtResSgn = FDivE & (XSgnE ^ YSgnE); // Sign is negative for division if inputs have opposite signs
always_comb begin
if (FSqrtE & XSgnE | FDivE & XZeroE & YZeroE | XNaNE | FDivE & YNaNE) FDivSqrtResM = 0; // ***replace with NAN; // *** which one
else if (FDivE & YZeroE | XInfE) FDivSqrtResM = {FDivSqrtResSgn, (`NE)'(1), (`NF)'(0)}; // infinity
else if (FDivE & YInfE) FDivSqrtResM = {FDivSqrtResSgn, (`NE)'(0), (`NF)'(0)}; // zero
else FDivSqrtResM = FDivSqrtRecurRes;
end
// *** handle early termination in the special cases
// *** handle signaling NANs
endmodule

View File

@ -1,105 +0,0 @@
///////////////////////////////////////////
//
// Written: David Harris
// Modified: 11 September 2021
//
// Purpose: Recurrence-based SRT Division and Square Root
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
// Bit counts:
// Inputs are originally normalized floating point numbers with NF fractional bits and a leading 1 integer bit
// x is right shifted by up to 2 to be in the range of 1/4 <= x < 1/2 for divide, 1/4 <= x < 1 for sqrt
// Hence, x now has NF+2 fractional bits and 0 integer bits
// d is right shifted by 1 to be in the range of 1/2 <= d < 1. It thus has NF+1 fractional bits and 0 integer bits
// q is eventually in the range of 1/4 < q < 1 and hence needs NF+2 bits to keep NF bits when normalized, plus some*** more bits for rounding
// The partial
/*
module fpudivsqrtrecurcore (
input logic clk,
input logic reset,
input logic start, // start a computation
input logic busy, // computation running
input logic fmt, // precision 1 = double 0 = single
input logic [`NF+1:0] x, // in range 1/4 <= x < 1/2 for divide, 1/4 <=x < 1 for sqrt
input logic [`NF+1:0] din, // in range 1/2 <= d < 1 for divide
input logic FDiv, FSqrt, // *** not yet used
output logic [`FLEN-1:0] FDivSqrtRecurRes // result
);
assign FDivSqrtRecurRes = 0;
logic [***] d, ws, wsout, wsnext, wc, wcout, wcnext;
logic [1:0] q; // 00 = 0, 01 = 1, 10 = -1
// Radix-2 SRT Division
// registers for divisor and partial remainder
flopen #(NF+1) dreg(clk, start, din, d);
mux2 #(NF+1) wsmux(wsout, x, start, wsnext);
flopen #(NF+1) wsreg(clk, busy, wsnext, ws);
mux2 #(NF+1) wcmux(wcout, 0, start, wcnext);
flopen #(NF+1) wcreg(clk, busy, wcnext, wc);
// quotient selection
qsel qsel(ws[***4bits], wc[***], q);
// partial remainder update
always_comb begin // select -d * q to add to partial remainder
if (q[1]) dq = d;
else if (q[0]) dq = ~d;
else dq = 0;
end
csa #(***) csa(ws, wc, dq, q[1], wsout, wcout);
endmodule
*/
/*
module csa #(parameter N=4) (
input logic [N-1:0] sin, cin, ain,
input logic carry,
output logic [N-1:0] sum, cout
);
logic [N-1:0] c;
assign c = {cin[N-2:0], carry}; // shift carries left and inject optional 1 into lsb
assign sum = sin ^ ain ^ c;
assign cout = sin & ain | sin & c | ain & c;
endmodule
*/
module qsel( // radix 2 SRT division quotient selection
input logic [3:0] wc, ws,
output logic [1:0] q
);
endmodule

View File

@ -1,6 +1,6 @@
///////////////////////////////////////////
//
// Written: Katherine Parry
// Written: me@KatherineParry.com
// Modified: 6/23/2021
//
// Purpose: FPU Sign Injection instructions

View File

@ -1,537 +0,0 @@
///////////////////////////////////////////
//
// Written: James Stine
// Modified: 9/28/2021
//
// Purpose: FSM for floating point divider/square root unit (Goldschmidt)
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
module fsm_fpdiv (
input logic clk,
input logic reset,
input logic start,
input logic op_type,
output logic done,
output logic load_rega,
output logic load_regb,
output logic load_regc,
output logic load_regd,
output logic load_regr,
output logic load_regs,
output logic [2:0] sel_muxa,
output logic [2:0] sel_muxb,
output logic sel_muxr,
output logic divBusy
);
typedef enum logic [4:0] {S0, S1, S2, S3, S4, S5, S6, S7, S8, S9,
S10, S11, S12, S13, S14, S15, S16, S17, S18, S19,
S20, S21, S22, S23, S24, S25, S26, S27, S28, S29,
S30} statetype;
statetype current_state, next_state;
always @(posedge clk)
begin
if (reset == 1'b1)
current_state = S0;
else
current_state = next_state;
end
always_comb
begin
case(current_state)
S0: // iteration 0
begin
if (start==1'b0)
begin
done = 1'b0;
divBusy = 1'b0;
load_rega = 1'b0;
load_regb = 1'b0;
load_regc = 1'b0;
load_regd = 1'b0;
load_regr = 1'b0;
load_regs = 1'b0;
sel_muxa = 3'b000;
sel_muxb = 3'b000;
sel_muxr = 1'b0;
next_state = S0;
end
else if (start==1'b1 & op_type==1'b0)
begin
done = 1'b0;
divBusy = 1'b1;
load_rega = 1'b0;
load_regb = 1'b1;
load_regc = 1'b0;
load_regd = 1'b0;
load_regr = 1'b0;
load_regs = 1'b0;
sel_muxa = 3'b001;
sel_muxb = 3'b001;
sel_muxr = 1'b0;
next_state = S1;
end // if (start==1'b1 & op_type==1'b0)
else if (start==1'b1 & op_type==1'b1)
begin
done = 1'b0;
divBusy = 1'b1;
load_rega = 1'b0;
load_regb = 1'b1;
load_regc = 1'b0;
load_regd = 1'b0;
load_regr = 1'b0;
load_regs = 1'b0;
sel_muxa = 3'b010;
sel_muxb = 3'b000;
sel_muxr = 1'b0;
next_state = S13;
end
else
begin
done = 1'b0;
divBusy = 1'b0;
load_rega = 1'b0;
load_regb = 1'b0;
load_regc = 1'b0;
load_regd = 1'b0;
load_regr = 1'b0;
load_regs = 1'b0;
sel_muxa = 3'b000;
sel_muxb = 3'b000;
sel_muxr = 1'b0;
next_state = S0;
end
end // case: S0
S1:
begin
done = 1'b0;
divBusy = 1'b1;
load_rega = 1'b1;
load_regb = 1'b0;
load_regc = 1'b1;
load_regd = 1'b0;
load_regr = 1'b0;
load_regs = 1'b0;
sel_muxa = 3'b010;
sel_muxb = 3'b000;
sel_muxr = 1'b0;
next_state = S2;
end
S2: // iteration 1
begin
done = 1'b0;
divBusy = 1'b1;
load_rega = 1'b0;
load_regb = 1'b1;
load_regc = 1'b0;
load_regd = 1'b0;
load_regr = 1'b0;
load_regs = 1'b0;
sel_muxa = 3'b011;
sel_muxb = 3'b011;
sel_muxr = 1'b0;
next_state = S3;
end
S3:
begin
done = 1'b0;
divBusy = 1'b1;
load_rega = 1'b1;
load_regb = 1'b0;
load_regc = 1'b1;
load_regd = 1'b0;
load_regr = 1'b0;
load_regs = 1'b0;
sel_muxa = 3'b000;
sel_muxb = 3'b010;
sel_muxr = 1'b0;
next_state = S4;
end
S4: // iteration 2
begin
done = 1'b0;
divBusy = 1'b1;
load_rega = 1'b0;
load_regb = 1'b1;
load_regc = 1'b0;
load_regd = 1'b0;
load_regr = 1'b0;
load_regs = 1'b0;
sel_muxa = 3'b011;
sel_muxb = 3'b011;
sel_muxr = 1'b0;
next_state = S5;
end
S5:
begin
done = 1'b0;
divBusy = 1'b1;
load_rega = 1'b1;
load_regb = 1'b0;
load_regc = 1'b1;
load_regd = 1'b0;
load_regr = 1'b0;
load_regs = 1'b0;
sel_muxa = 3'b000;
sel_muxb = 3'b010;
sel_muxr = 1'b0; // add
next_state = S6;
end
S6: // iteration 3
begin
done = 1'b0;
divBusy = 1'b1;
load_rega = 1'b0;
load_regb = 1'b1;
load_regc = 1'b0;
load_regd = 1'b0;
load_regr = 1'b0;
load_regs = 1'b0;
sel_muxa = 3'b011;
sel_muxb = 3'b011;
sel_muxr = 1'b0;
next_state = S8;
end
S7:
begin
done = 1'b0;
divBusy = 1'b1;
load_rega = 1'b1;
load_regb = 1'b0;
load_regc = 1'b1;
load_regd = 1'b0;
load_regr = 1'b0;
load_regs = 1'b0;
sel_muxa = 3'b000;
sel_muxb = 3'b010;
sel_muxr = 1'b0;
next_state = S8;
end // case: S7
S8: // q,qm,qp
begin
done = 1'b0;
divBusy = 1'b1;
load_rega = 1'b0;
load_regb = 1'b0;
load_regc = 1'b0;
load_regd = 1'b0;
load_regr = 1'b0;
load_regs = 1'b1;
sel_muxa = 3'b000;
sel_muxb = 3'b000;
sel_muxr = 1'b0;
next_state = S9;
end
S9: // rem
begin
done = 1'b0;
divBusy = 1'b1;
load_rega = 1'b0;
load_regb = 1'b0;
load_regc = 1'b0;
load_regd = 1'b0;
load_regr = 1'b1;
load_regs = 1'b0;
sel_muxa = 3'b000;
sel_muxb = 3'b000;
sel_muxr = 1'b1;
next_state = S10;
end
S10: // done
begin
done = 1'b1;
divBusy = 1'b0;
load_rega = 1'b0;
load_regb = 1'b0;
load_regc = 1'b0;
load_regd = 1'b0;
load_regr = 1'b0;
load_regs = 1'b0;
sel_muxa = 3'b000;
sel_muxb = 3'b000;
sel_muxr = 1'b0;
next_state = S11;
end // case: S10
S11: // done
begin
done = 1'b0;
divBusy = 1'b0;
load_rega = 1'b0;
load_regb = 1'b0;
load_regc = 1'b0;
load_regd = 1'b0;
load_regr = 1'b0;
load_regs = 1'b0;
sel_muxa = 3'b000;
sel_muxb = 3'b000;
sel_muxr = 1'b0;
next_state = S0;
end
S13: // start of sqrt path
begin
done = 1'b0;
divBusy = 1'b1;
load_rega = 1'b0;
load_regb = 1'b0;
load_regc = 1'b0;
load_regd = 1'b1;
load_regr = 1'b0;
load_regs = 1'b0;
sel_muxa = 3'b010;
sel_muxb = 3'b001;
sel_muxr = 1'b0;
next_state = S14;
end
S14:
begin
done = 1'b0;
divBusy = 1'b1;
load_rega = 1'b1;
load_regb = 1'b0;
load_regc = 1'b1;
load_regd = 1'b0;
load_regr = 1'b0;
load_regs = 1'b0;
sel_muxa = 3'b001;
sel_muxb = 3'b100;
sel_muxr = 1'b0;
next_state = S15;
end
S15: // iteration 1
begin
done = 1'b0;
divBusy = 1'b1;
load_rega = 1'b0;
load_regb = 1'b1;
load_regc = 1'b0;
load_regd = 1'b0;
load_regr = 1'b0;
load_regs = 1'b0;
sel_muxa = 3'b011;
sel_muxb = 3'b011;
sel_muxr = 1'b0;
next_state = S16;
end
S16:
begin
done = 1'b0;
divBusy = 1'b1;
load_rega = 1'b0;
load_regb = 1'b0;
load_regc = 1'b0;
load_regd = 1'b1;
load_regr = 1'b0;
load_regs = 1'b0;
sel_muxa = 3'b000;
sel_muxb = 3'b011;
sel_muxr = 1'b0;
next_state = S17;
end
S17:
begin
done = 1'b0;
divBusy = 1'b1;
load_rega = 1'b1;
load_regb = 1'b0;
load_regc = 1'b1;
load_regd = 1'b0;
load_regr = 1'b0;
load_regs = 1'b0;
sel_muxa = 3'b100;
sel_muxb = 3'b010;
sel_muxr = 1'b0;
next_state = S18;
end
S18: // iteration 2
begin
done = 1'b0;
divBusy = 1'b1;
load_rega = 1'b0;
load_regb = 1'b1;
load_regc = 1'b0;
load_regd = 1'b0;
load_regr = 1'b0;
load_regs = 1'b0;
sel_muxa = 3'b011;
sel_muxb = 3'b011;
sel_muxr = 1'b0;
next_state = S19;
end
S19:
begin
done = 1'b0;
divBusy = 1'b1;
load_rega = 1'b0;
load_regb = 1'b0;
load_regc = 1'b0;
load_regd = 1'b1;
load_regr = 1'b0;
load_regs = 1'b0;
sel_muxa = 3'b000;
sel_muxb = 3'b011;
sel_muxr = 1'b0;
next_state = S20;
end
S20:
begin
done = 1'b0;
divBusy = 1'b1;
load_rega = 1'b1;
load_regb = 1'b0;
load_regc = 1'b1;
load_regd = 1'b0;
load_regr = 1'b0;
load_regs = 1'b0;
sel_muxa = 3'b100;
sel_muxb = 3'b010;
sel_muxr = 1'b0;
next_state = S21;
end
S21: // iteration 3
begin
done = 1'b0;
divBusy = 1'b1;
load_rega = 1'b0;
load_regb = 1'b1;
load_regc = 1'b0;
load_regd = 1'b0;
load_regr = 1'b0;
load_regs = 1'b0;
sel_muxa = 3'b011;
sel_muxb = 3'b011;
sel_muxr = 1'b0;
next_state = S22;
end
S22:
begin
done = 1'b0;
divBusy = 1'b1;
load_rega = 1'b0;
load_regb = 1'b0;
load_regc = 1'b0;
load_regd = 1'b1;
load_regr = 1'b0;
load_regs = 1'b0;
sel_muxa = 3'b000;
sel_muxb = 3'b011;
sel_muxr = 1'b0;
next_state = S23;
end
S23:
begin
done = 1'b0;
divBusy = 1'b1;
load_rega = 1'b1;
load_regb = 1'b0;
load_regc = 1'b1;
load_regd = 1'b0;
load_regr = 1'b0;
load_regs = 1'b0;
sel_muxa = 3'b100;
sel_muxb = 3'b010;
sel_muxr = 1'b0;
next_state = S24;
end
S24: // q,qm,qp
begin
done = 1'b0;
divBusy = 1'b1;
load_rega = 1'b0;
load_regb = 1'b0;
load_regc = 1'b0;
load_regd = 1'b0;
load_regr = 1'b0;
load_regs = 1'b1;
sel_muxa = 3'b000;
sel_muxb = 3'b000;
sel_muxr = 1'b0;
next_state = S25;
end
S25: // rem
begin
done = 1'b0;
divBusy = 1'b1;
load_rega = 1'b0;
load_regb = 1'b0;
load_regc = 1'b0;
load_regd = 1'b0;
load_regr = 1'b1;
load_regs = 1'b0;
sel_muxa = 3'b011;
sel_muxb = 3'b110;
sel_muxr = 1'b1;
next_state = S26;
end
S26: // done
begin
done = 1'b1;
divBusy = 1'b0;
load_rega = 1'b0;
load_regb = 1'b0;
load_regc = 1'b0;
load_regd = 1'b0;
load_regr = 1'b0;
load_regs = 1'b0;
sel_muxa = 3'b000;
sel_muxb = 3'b000;
sel_muxr = 1'b0;
next_state = S27;
end // case: S26
S27: // done
begin
done = 1'b0;
divBusy = 1'b0;
load_rega = 1'b0;
load_regb = 1'b0;
load_regc = 1'b0;
load_regd = 1'b0;
load_regr = 1'b0;
load_regs = 1'b0;
sel_muxa = 3'b000;
sel_muxb = 3'b000;
sel_muxr = 1'b0;
next_state = S0;
end
default:
begin
done = 1'b0;
divBusy = 1'b0;
load_rega = 1'b0;
load_regb = 1'b0;
load_regc = 1'b0;
load_regd = 1'b0;
load_regr = 1'b0;
load_regs = 1'b0;
sel_muxa = 3'b000;
sel_muxb = 3'b000;
sel_muxr = 1'b0;
next_state = S0;
end
endcase // case(current_state)
end // always @ (current_state or X)
endmodule // fsm

File diff suppressed because it is too large Load Diff

View File

@ -1,3 +1,31 @@
///////////////////////////////////////////
//
// Written: me@KatherineParry.com
// Modified: 7/5/2022
//
// Purpose: shift correction
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module lzacorrection(

View File

@ -1,171 +0,0 @@
// module lz2 (P, V, B0, B1);
// input B0;
// input B1;
// output P;
// output V;
// assign V = B0 | B1;
// assign P = B0 & ~B1;
// endmodule // lz2
// Note: This module is not made out of two lz2's - why not? (MJS)
// module lz4 (ZP, ZV, B0, B1, V0, V1);
// input B0;
// input B1;
// input V0;
// input V1;
// output [1:0] ZP;
// output ZV;
// assign ZP[0] = V0 ? B0 : B1;
// assign ZP[1] = ~V0;
// assign ZV = V0 | V1;
// endmodule // lz4
// // Note: This module is not made out of two lz4's - why not? (MJS)
// module lz8 (ZP, ZV, B);
// input [7:0] B;
// wire s1p0;
// wire s1v0;
// wire s1p1;
// wire s1v1;
// wire s2p0;
// wire s2v0;
// wire s2p1;
// wire s2v1;
// wire [1:0] ZPa;
// wire [1:0] ZPb;
// wire ZVa;
// wire ZVb;
// output [2:0] ZP;
// output ZV;
// lz2 l1(s1p0, s1v0, B[2], B[3]);
// lz2 l2(s1p1, s1v1, B[0], B[1]);
// lz4 l3(ZPa, ZVa, s1p0, s1p1, s1v0, s1v1);
// lz2 l4(s2p0, s2v0, B[6], B[7]);
// lz2 l5(s2p1, s2v1, B[4], B[5]);
// lz4 l6(ZPb, ZVb, s2p0, s2p1, s2v0, s2v1);
// assign ZP[1:0] = ZVb ? ZPb : ZPa;
// assign ZP[2] = ~ZVb;
// assign ZV = ZVa | ZVb;
// endmodule // lz8
// module lz16 (ZP, ZV, B);
// input [15:0] B;
// wire [2:0] ZPa;
// wire [2:0] ZPb;
// wire ZVa;
// wire ZVb;
// output [3:0] ZP;
// output ZV;
// lz8 l1(ZPa, ZVa, B[7:0]);
// lz8 l2(ZPb, ZVb, B[15:8]);
// assign ZP[2:0] = ZVb ? ZPb : ZPa;
// assign ZP[3] = ~ZVb;
// assign ZV = ZVa | ZVb;
// endmodule // lz16
// module lz32 (ZP, ZV, B);
// input [31:0] B;
// wire [3:0] ZPa;
// wire [3:0] ZPb;
// wire ZVa;
// wire ZVb;
// output [4:0] ZP;
// output ZV;
// lz16 l1(ZPa, ZVa, B[15:0]);
// lz16 l2(ZPb, ZVb, B[31:16]);
// assign ZP[3:0] = ZVb ? ZPb : ZPa;
// assign ZP[4] = ~ZVb;
// assign ZV = ZVa | ZVb;
// endmodule // lz32
// // This module returns the number of leading zeros ZP in the 64-bit
// // number B. If there are no ones in B, then ZP and ZV are both 0.
// module lz64 (ZP, ZV, B);
// input [63:0] B;
// wire [4:0] ZPa;
// wire [4:0] ZPb;
// wire ZVa;
// wire ZVb;
// output [5:0] ZP;
// output ZV;
// lz32 l1(ZPa, ZVa, B[31:0]);
// lz32 l2(ZPb, ZVb, B[63:32]);
// assign ZV = ZVa | ZVb;
// assign ZP[4:0] = (ZVb ? ZPb : ZPa) & {5{ZV}};
// assign ZP[5] = ~ZVb & ZV;
// endmodule // lz64
// This module returns the number of leading zeros ZP in the 52-bit
// number B. If there are no ones in B, then ZP and ZV are both 0.
module lz52 (ZP, ZV, B);
input [51:0] B;
wire [4:0] ZP_32;
wire [3:0] ZP_16;
wire [1:0] ZP_4;
wire ZV_32;
wire ZV_16;
wire ZV_4;
wire ZP_2_1;
wire ZP_2_2;
wire ZV_2_1;
wire ZV_2_2;
output [5:0] ZP;
output ZV;
lz32 l1 (ZP_32, ZV_32, B[51:20]);
lz16 l2 (ZP_16, ZV_16, B[19:4]);
lz2 l3_1 (ZP_2_1, ZV_2_1, B[3], B[2]);
lz2 l3_2 (ZP_2_2, ZV_2_2, B[1], B[0]);
lz4 l3_final (ZP_4, ZV_4, ZP_2_1, ZP_2_2, ZV_2_1, ZV_2_2);
assign ZV = ZV_32 | ZV_16 | ZV_4;
assign ZP[5] = ~ZV_32;
assign ZP[4] = ZV_32 ? ZP_32[4] : ~ZV_16;
assign ZP[3:2] = ZV_32 ? ZP_32[3:2] : (ZV_16 ? ZP_16[3:2] : 2'b0);
assign ZP[1:0] = ZV_32 ? ZP_32[1:0] : (ZV_16 ? ZP_16[1:0] : ZP_4);
endmodule // lz52

View File

@ -1,7 +1,35 @@
///////////////////////////////////////////
//
// Written: me@KatherineParry.com
// Modified: 7/5/2022
//
// Purpose: Negate integer result
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module negateintres(
input logic XSgnM,
input logic Xs,
input logic [`NORMSHIFTSZ-1:0] Shifted,
input logic Signed,
input logic Int64,
@ -12,7 +40,7 @@ module negateintres(
// round and negate the positive res if needed
assign NegRes = XSgnM ? -({2'b0, Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1}) : {2'b0, Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1};
assign NegRes = Xs ? -({2'b0, Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1}) : {2'b0, Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1};
assign NegResMSBS = Signed ? Int64 ? NegRes[`XLEN:`XLEN-1] : NegRes[32:31] :
Int64 ? NegRes[`XLEN+1:`XLEN] : NegRes[33:32];

View File

@ -1,3 +1,31 @@
///////////////////////////////////////////
//
// Written: me@KatherineParry.com
// Modified: 7/5/2022
//
// Purpose: normalization shifter
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"

View File

@ -1,9 +1,9 @@
///////////////////////////////////////////
//
// Written: Katherine Parry, David Harris
// Modified: 6/23/2021
// Written: me@KatherineParry.com
// Modified: 7/5/2022
//
// Purpose: Floating point multiply-accumulate of configurable size
// Purpose: Post-Processing
//
// A component of the Wally configurable RISC-V project.
//
@ -31,13 +31,13 @@
module postprocess(
// general signals
input logic XSgnM, YSgnM, // input signs
input logic [`NE-1:0] ZExpM, // input exponents
input logic [`NF:0] XManM, YManM, ZManM, // input mantissas
input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
input logic [`FMTBITS-1:0] FmtM, // precision 1 = double 0 = single
input logic [2:0] FOpCtrlM, // choose which opperation (look below for values)
input logic XZeroM, YZeroM, ZZeroM, // inputs are zero
input logic Xs, Ys, // input signs
input logic [`NE-1:0] Ze, // input exponents
input logic [`NF:0] Xm, Ym, Zm, // input mantissas
input logic [2:0] Frm, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
input logic [`FMTBITS-1:0] Fmt, // precision 1 = double 0 = single
input logic [2:0] FOpCtrl, // choose which opperation (look below for values)
input logic XZero, YZero, ZZero, // inputs are zero
input logic XInfM, YInfM, ZInfM, // inputs are infinity
input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN
input logic XSNaNM, YSNaNM, ZSNaNM, // inputs are signaling NaNs
@ -55,9 +55,10 @@ module postprocess(
input logic [$clog2(3*`NF+7)-1:0] FmaNormCntM, // the normalization shift count
//divide signals
input logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2M,
input logic [`NE+1:0] DivCalcExpM, // the calculated expoent
input logic DivStickyM,
input logic DivNegStickyM,
input logic DivDone,
input logic [`NE+1:0] DivCalcExpM,
input logic [`DIVLEN+2:0] Quot,
// conversion signals
input logic [`NE:0] CvtCalcExpM, // the calculated expoent
@ -125,15 +126,15 @@ module postprocess(
logic Sqrt;
// signals to help readability
assign Signed = FOpCtrlM[0];
assign Int64 = FOpCtrlM[1];
assign IntToFp = FOpCtrlM[2];
assign Signed = FOpCtrl[0];
assign Int64 = FOpCtrl[1];
assign IntToFp = FOpCtrl[2];
assign ToInt = FWriteIntM;
assign Mult = FOpCtrlM[2]&~FOpCtrlM[1]&~FOpCtrlM[0];
assign Mult = FOpCtrl[2]&~FOpCtrl[1]&~FOpCtrl[0];
assign CvtOp = (PostProcSelM == 2'b00);
assign FmaOp = (PostProcSelM == 2'b10);
assign DivOp = (PostProcSelM == 2'b01);
assign Sqrt = FOpCtrlM[0];
assign DivOp = (PostProcSelM == 2'b01)&DivDone;
assign Sqrt = FOpCtrl[0];
// is there an input of infinity or NaN being used
assign InfIn = (XInfM&~(IntToFp&CvtOp))|(YInfM&~CvtOp)|(ZInfM&FmaOp);
@ -141,21 +142,21 @@ module postprocess(
// choose the ouptut format depending on the opperation
// - fp -> fp: OpCtrl contains the percision of the output
// - otherwise: FmtM contains the percision of the output
// - otherwise: Fmt contains the percision of the output
if (`FPSIZES == 2)
assign OutFmt = IntToFp|~CvtOp ? FmtM : (FOpCtrlM[1:0] == `FMT);
assign OutFmt = IntToFp|~CvtOp ? Fmt : (FOpCtrl[1:0] == `FMT);
else if (`FPSIZES == 3 | `FPSIZES == 4)
assign OutFmt = IntToFp|~CvtOp ? FmtM : FOpCtrlM[1:0];
assign OutFmt = IntToFp|~CvtOp ? Fmt : FOpCtrl[1:0];
///////////////////////////////////////////////////////////////////////////////
// Normalization
///////////////////////////////////////////////////////////////////////////////
cvtshiftcalc cvtshiftcalc(.ToInt, .CvtCalcExpM, .CvtResDenormUfM, .XManM, .CvtLzcInM,
.XZeroM, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn);
fmashiftcalc fmashiftcalc(.SumM, .ZExpM, .ProdExpM, .FmaNormCntM, .FmtM, .KillProdM, .ConvNormSumExp,
cvtshiftcalc cvtshiftcalc(.ToInt, .CvtCalcExpM, .CvtResDenormUfM, .Xm, .CvtLzcInM,
.XZero, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn);
fmashiftcalc fmashiftcalc(.SumM, .Ze, .ProdExpM, .FmaNormCntM, .Fmt, .KillProdM, .ConvNormSumExp,
.ZDenormM, .SumZero, .PreResultDenorm, .FmaShiftAmt, .FmaShiftIn);
divshiftcalc divshiftcalc(.FmtM, .Quot, .DivCalcExpM, .EarlyTermShiftDiv2M, .DivResDenorm, .DivDenormShift, .DivShiftAmt, .DivShiftIn);
divshiftcalc divshiftcalc(.Fmt, .DivCalcExpM, .Quot, .EarlyTermShiftDiv2M, .DivResDenorm, .DivDenormShift, .DivShiftAmt, .DivShiftIn);
always_comb
case(PostProcSelM)
@ -168,8 +169,13 @@ module postprocess(
ShiftIn = {CvtShiftIn, {`NORMSHIFTSZ-`CVTLEN-`NF-1{1'b0}}};
end
2'b01: begin //div
ShiftAmt = DivShiftAmt;
ShiftIn = DivShiftIn;
if(DivDone) begin
ShiftAmt = DivShiftAmt;
ShiftIn = DivShiftIn;
end else begin
ShiftAmt = '0;
ShiftIn = '0;
end
end
default: begin
ShiftAmt = {$clog2(`NORMSHIFTSZ){1'bx}};
@ -193,27 +199,28 @@ module postprocess(
// round to infinity
// round to nearest max magnitude
round round(.OutFmt, .FrmM, .Sticky, .AddendStickyM, .ZZeroM, .Plus1, .PostProcSelM, .CvtCalcExpM, .CorrDivExp,
roundsign roundsign(.PSgnM, .ZSgnEffM, .InvZM, .FmaOp, .DivOp, .CvtOp, .NegSumM,
.Xs, .Ys, .CvtResSgnM, .RoundSgn);
round round(.OutFmt, .Frm, .Sticky, .AddendStickyM, .ZZero, .Plus1, .PostProcSelM, .CvtCalcExpM, .CorrDivExp,
.InvZM, .RoundSgn, .SumExp, .FmaOp, .CvtOp, .CvtResDenormUfM, .CorrShifted, .ToInt, .CvtResUf,
.DivStickyM, .DivNegStickyM,
.DivStickyM, .DivNegStickyM, .DivDone,
.DivOp, .UfPlus1, .FullResExp, .ResFrac, .ResExp, .Round, .RoundAdd, .UfLSBRes, .RoundExp);
///////////////////////////////////////////////////////////////////////////////
// Sign calculation
///////////////////////////////////////////////////////////////////////////////
resultsign resultsign(.FrmM, .PSgnM, .ZSgnEffM, .SumExp, .Round, .Sticky,
resultsign resultsign(.Frm, .PSgnM, .ZSgnEffM, .SumExp, .Round, .Sticky,
.FmaOp, .ZInfM, .InfIn, .SumZero, .Mult, .RoundSgn, .ResSgn);
roundsign roundsign(.PSgnM, .ZSgnEffM, .InvZM, .FmaOp, .DivOp, .CvtOp, .NegSumM,
.XSgnM, .YSgnM, .CvtResSgnM, .RoundSgn);
///////////////////////////////////////////////////////////////////////////////
// Flags
///////////////////////////////////////////////////////////////////////////////
flags flags(.XSNaNM, .YSNaNM, .ZSNaNM, .XInfM, .YInfM, .ZInfM, .InfIn, .XZeroM, .YZeroM,
.XSgnM, .Sqrt, .ToInt, .IntToFp, .Int64, .Signed, .OutFmt, .CvtCalcExpM,
flags flags(.XSNaNM, .YSNaNM, .ZSNaNM, .XInfM, .YInfM, .ZInfM, .InfIn, .XZero, .YZero,
.Xs, .Sqrt, .ToInt, .IntToFp, .Int64, .Signed, .OutFmt, .CvtCalcExpM,
.XNaNM, .YNaNM, .NaNIn, .ZSgnEffM, .PSgnM, .Round, .IntInvalid, .DivByZero,
.UfLSBRes, .Sticky, .UfPlus1, .CvtOp, .DivOp, .FmaOp, .FullResExp, .Plus1,
.RoundExp, .NegResMSBS, .Invalid, .Overflow, .PostProcFlgM);
@ -222,9 +229,9 @@ module postprocess(
// Select the result
///////////////////////////////////////////////////////////////////////////////
negateintres negateintres(.XSgnM, .Shifted, .Signed, .Int64, .Plus1, .NegResMSBS, .NegRes);
resultselect resultselect(.XSgnM, .XManM, .YManM, .ZManM, .XZeroM, .IntInvalid,
.IntZeroM, .FrmM, .OutFmt, .XNaNM, .YNaNM, .ZNaNM, .CvtResUf,
negateintres negateintres(.Xs, .Shifted, .Signed, .Int64, .Plus1, .NegResMSBS, .NegRes);
resultselect resultselect(.Xs, .Xm, .Ym, .Zm, .XZero, .IntInvalid,
.IntZeroM, .Frm, .OutFmt, .XNaNM, .YNaNM, .ZNaNM, .CvtResUf,
.NaNIn, .IntToFp, .Int64, .Signed, .CvtOp, .FmaOp, .Plus1, .Invalid, .Overflow, .InfIn, .NegRes,
.XInfM, .YInfM, .DivOp,
.DivByZero, .FullResExp, .CvtCalcExpM, .ResSgn, .ResExp, .ResFrac, .PostProcResM, .FCvtIntResM);

View File

@ -1,58 +0,0 @@
///////////////////////////////////////////
// redundantmul.sv
//
// Written: David_Harris@hmc.edu and ssanghai@hm.edu 10/11/2021
// Modified:
//
// Purpose: multiplier with output in redundant carry-sum form
// This can be faster than a mutiplier that requires a final adder to obtain the nonredundant answer.
// The module has several implementations controlled by the DESIGN_COMPILER flag.
// When DESIGN_COMPILER = 1, use the Synopsys DesignWare DW02_multp block. This will give highest quality results
// but doesn't work in simulation or when using different tools
// When DESIGN_COMPILER = 2, use the Wally mult_cs block with Radix 2 Booth encoding and a Wallace Tree
// This simulates and synthesizes, but quality of results ae lower than DesignWare
// Otherwise, just use a nonredundant multiplier and set one word to 0. This is best for FPGAs, which have
// block multipliers, and also simulates fastest.
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module redundantmul #(parameter WIDTH =8)(
input logic [WIDTH-1:0] a,b,
output logic [2*WIDTH-1:0] out0, out1);
if (`DESIGN_COMPILER == 1) begin:mul
logic [2*WIDTH-1+2:0] tmp_out0;
logic [2*WIDTH-1+2:0] tmp_out1;
DW02_multp #(WIDTH, WIDTH, 2*WIDTH+2) mul(.a, .b, .tc(1'b0), .out0(tmp_out0), .out1(tmp_out1));
assign out0 = tmp_out0[2*WIDTH-1:0];
assign out1 = tmp_out1[2*WIDTH-1:0];
end else begin:mul // force a nonredunant multipler. This will simulate properly and also is appropriate for FPGAs.
assign out0 = a * b;
assign out1 = 0;
end
endmodule

View File

@ -1,14 +1,43 @@
///////////////////////////////////////////
//
// Written: me@KatherineParry.com
// Modified: 7/5/2022
//
// Purpose: special case selection
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module resultselect(
input logic XSgnM, // input signs
input logic [`NF:0] XManM, YManM, ZManM, // input mantissas
input logic Xs, // input signs
input logic [`NF:0] Xm, Ym, Zm, // input mantissas
input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN
input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
input logic [2:0] Frm, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
input logic [`FMTBITS-1:0] OutFmt, // output format
input logic InfIn,
input logic XInfM, YInfM,
input logic XZeroM,
input logic XZero,
input logic IntZeroM,
input logic NaNIn,
input logic IntToFp,
@ -39,29 +68,29 @@ module resultselect(
// does the overflow result output the maximum normalized floating point number
// output infinity if the input is infinity
assign OfResMax = (~InfIn|(IntToFp&CvtOp))&~DivByZero&((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResSgn) | (FrmM[1:0]==2'b11&ResSgn));
assign OfResMax = (~InfIn|(IntToFp&CvtOp))&~DivByZero&((Frm[1:0]==2'b01) | (Frm[1:0]==2'b10&~ResSgn) | (Frm[1:0]==2'b11&ResSgn));
if (`FPSIZES == 1) begin
//NaN res selection depending on standard
if(`IEEE754) begin
assign XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]};
assign YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]};
assign ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]};
assign XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
assign YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]};
assign ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]};
assign InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
end else begin
assign InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
end
assign OfRes = OfResMax ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}};
assign UfRes = {ResSgn, {`FLEN-1{1'b0}}, Plus1&FrmM[1]&~(DivOp&YInfM)};
assign UfRes = {ResSgn, {`FLEN-2{1'b0}}, Plus1&Frm[1]&~(DivOp&YInfM)};
assign NormRes = {ResSgn, ResExp, ResFrac};
end else if (`FPSIZES == 2) begin //will the format conversion in killprod work in other conversions?
if(`IEEE754) begin
assign XNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, XManM[`NF-2:`NF-`NF1]};
assign YNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, YManM[`NF-2:`NF-`NF1]};
assign ZNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`NF1]};
assign XNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]};
assign YNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Ym[`NF-2:`NF-`NF1]};
assign ZNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Zm[`NF-2:`NF-`NF1]};
assign InvalidRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
end else begin
assign InvalidRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
@ -69,7 +98,7 @@ module resultselect(
assign OfRes = OutFmt ? OfResMax ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}} :
OfResMax ? {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1{1'b1}}, (`NF1)'(0)};
assign UfRes = OutFmt ? {ResSgn, (`FLEN-2)'(0), Plus1&FrmM[1]&~(DivOp&YInfM)} : {{`FLEN-`LEN1{1'b1}}, ResSgn, (`LEN1-2)'(0), Plus1&FrmM[1]&~(DivOp&YInfM)};
assign UfRes = OutFmt ? {ResSgn, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInfM)} : {{`FLEN-`LEN1{1'b1}}, ResSgn, (`LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInfM)};
assign NormRes = OutFmt ? {ResSgn, ResExp, ResFrac} : {{`FLEN-`LEN1{1'b1}}, ResSgn, ResExp[`NE1-1:0], ResFrac[`NF-1:`NF-`NF1]};
end else if (`FPSIZES == 3) begin
@ -77,43 +106,43 @@ module resultselect(
case (OutFmt)
`FMT: begin
if(`IEEE754) begin
XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]};
YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]};
ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]};
XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]};
ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]};
InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
end else begin
InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
end
OfRes = OfResMax ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}};
UfRes = {ResSgn, (`FLEN-2)'(0), Plus1&FrmM[1]&~(DivOp&YInfM)};
UfRes = {ResSgn, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInfM)};
NormRes = {ResSgn, ResExp, ResFrac};
end
`FMT1: begin
if(`IEEE754) begin
XNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, XManM[`NF-2:`NF-`NF1]};
YNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, YManM[`NF-2:`NF-`NF1]};
ZNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`NF1]};
XNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]};
YNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Ym[`NF-2:`NF-`NF1]};
ZNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Zm[`NF-2:`NF-`NF1]};
InvalidRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
end else begin
InvalidRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
end
OfRes = OfResMax ? {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1{1'b1}}, (`NF1)'(0)};
UfRes = {{`FLEN-`LEN1{1'b1}}, ResSgn, (`LEN1-2)'(0), Plus1&FrmM[1]&~(DivOp&YInfM)};
UfRes = {{`FLEN-`LEN1{1'b1}}, ResSgn, (`LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInfM)};
NormRes = {{`FLEN-`LEN1{1'b1}}, ResSgn, ResExp[`NE1-1:0], ResFrac[`NF-1:`NF-`NF1]};
end
`FMT2: begin
if(`IEEE754) begin
XNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, XManM[`NF-2:`NF-`NF2]};
YNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, YManM[`NF-2:`NF-`NF2]};
ZNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`NF2]};
XNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF2]};
YNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, Ym[`NF-2:`NF-`NF2]};
ZNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, Zm[`NF-2:`NF-`NF2]};
InvalidRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
end else begin
InvalidRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
end
OfRes = OfResMax ? {{`FLEN-`LEN2{1'b1}}, ResSgn, {`NE2-1{1'b1}}, 1'b0, {`NF2{1'b1}}} : {{`FLEN-`LEN2{1'b1}}, ResSgn, {`NE2{1'b1}}, (`NF2)'(0)};
UfRes = {{`FLEN-`LEN2{1'b1}}, ResSgn, (`LEN2-2)'(0), Plus1&FrmM[1]&~(DivOp&YInfM)};
UfRes = {{`FLEN-`LEN2{1'b1}}, ResSgn, (`LEN2-2)'(0), Plus1&Frm[1]&~(DivOp&YInfM)};
NormRes = {{`FLEN-`LEN2{1'b1}}, ResSgn, ResExp[`NE2-1:0], ResFrac[`NF-1:`NF-`NF2]};
end
default: begin
@ -136,50 +165,50 @@ module resultselect(
case (OutFmt)
2'h3: begin
if(`IEEE754) begin
XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]};
YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]};
ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]};
XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]};
ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]};
InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
end else begin
InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
end
OfRes = OfResMax ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}};
UfRes = {ResSgn, (`FLEN-2)'(0), Plus1&FrmM[1]&~(DivOp&YInfM)};
UfRes = {ResSgn, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInfM)};
NormRes = {ResSgn, ResExp, ResFrac};
end
2'h1: begin
if(`IEEE754) begin
XNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, XManM[`NF-2:`NF-`D_NF]};
YNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, YManM[`NF-2:`NF-`D_NF]};
ZNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`D_NF]};
XNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`D_NF]};
YNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, Ym[`NF-2:`NF-`D_NF]};
ZNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, Zm[`NF-2:`NF-`D_NF]};
InvalidRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
end else begin
InvalidRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
end
OfRes = OfResMax ? {{`FLEN-`D_LEN{1'b1}}, ResSgn, {`D_NE-1{1'b1}}, 1'b0, {`D_NF{1'b1}}} : {{`FLEN-`D_LEN{1'b1}}, ResSgn, {`D_NE{1'b1}}, (`D_NF)'(0)};
UfRes = {{`FLEN-`D_LEN{1'b1}}, ResSgn, (`D_LEN-2)'(0), Plus1&FrmM[1]&~(DivOp&YInfM)};
UfRes = {{`FLEN-`D_LEN{1'b1}}, ResSgn, (`D_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInfM)};
NormRes = {{`FLEN-`D_LEN{1'b1}}, ResSgn, ResExp[`D_NE-1:0], ResFrac[`NF-1:`NF-`D_NF]};
end
2'h0: begin
if(`IEEE754) begin
XNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, XManM[`NF-2:`NF-`S_NF]};
YNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, YManM[`NF-2:`NF-`S_NF]};
ZNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`S_NF]};
XNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`S_NF]};
YNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, Ym[`NF-2:`NF-`S_NF]};
ZNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, Zm[`NF-2:`NF-`S_NF]};
InvalidRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
end else begin
InvalidRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
end
OfRes = OfResMax ? {{`FLEN-`S_LEN{1'b1}}, ResSgn, {`S_NE-1{1'b1}}, 1'b0, {`S_NF{1'b1}}} : {{`FLEN-`S_LEN{1'b1}}, ResSgn, {`S_NE{1'b1}}, (`S_NF)'(0)};
UfRes = {{`FLEN-`S_LEN{1'b1}}, ResSgn, (`S_LEN-2)'(0), Plus1&FrmM[1]&~(DivOp&YInfM)};
UfRes = {{`FLEN-`S_LEN{1'b1}}, ResSgn, (`S_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInfM)};
NormRes = {{`FLEN-`S_LEN{1'b1}}, ResSgn, ResExp[`S_NE-1:0], ResFrac[`NF-1:`NF-`S_NF]};
end
2'h2: begin
if(`IEEE754) begin
XNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, XManM[`NF-2:`NF-`H_NF]};
YNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, YManM[`NF-2:`NF-`H_NF]};
ZNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`H_NF]};
XNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`H_NF]};
YNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, Ym[`NF-2:`NF-`H_NF]};
ZNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, Zm[`NF-2:`NF-`H_NF]};
InvalidRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
end else begin
InvalidRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
@ -187,7 +216,7 @@ module resultselect(
OfRes = OfResMax ? {{`FLEN-`H_LEN{1'b1}}, ResSgn, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} : {{`FLEN-`H_LEN{1'b1}}, ResSgn, {`H_NE{1'b1}}, (`H_NF)'(0)};
// zero is exact fi dividing by infinity so don't add 1
UfRes = {{`FLEN-`H_LEN{1'b1}}, ResSgn, (`H_LEN-2)'(0), Plus1&FrmM[1]&~(DivOp&YInfM)};
UfRes = {{`FLEN-`H_LEN{1'b1}}, ResSgn, (`H_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInfM)};
NormRes = {{`FLEN-`H_LEN{1'b1}}, ResSgn, ResExp[`H_NE-1:0], ResFrac[`NF-1:`NF-`H_NF]};
end
endcase
@ -202,7 +231,7 @@ module resultselect(
// - do so if the res underflows, is zero (the exp doesnt calculate correctly). or the integer input is 0
// - dont set to zero if fp input is zero but not using the fp input
// - dont set to zero if int input is zero but not using the int input
assign KillRes = CvtOp ? (CvtResUf|(XZeroM&~IntToFp)|(IntZeroM&IntToFp)) : FullResExp[`NE+1] | (((YInfM&~XInfM)|XZeroM)&DivOp);//Underflow & ~ResDenorm & (ResExp!=1);
assign KillRes = CvtOp ? (CvtResUf|(XZero&~IntToFp)|(IntZeroM&IntToFp)) : FullResExp[`NE+1] | (((YInfM&~XInfM)|XZero)&DivOp);//Underflow & ~ResDenorm & (ResExp!=1);
assign SelOfRes = Overflow|DivByZero|(InfIn&~(YInfM&DivOp));
// output infinity with result sign if divide by zero
if(`IEEE754) begin
@ -243,9 +272,9 @@ module resultselect(
// unsigned | 2^32-1 | 2^64-1 |
//
// other: 32 bit unsinged res should be sign extended as if it were a signed number
assign OfIntRes = Signed ? XSgnM&~XNaNM ? Int64 ? {1'b1, {`XLEN-1{1'b0}}} : {{`XLEN-32{1'b1}}, 1'b1, {31{1'b0}}} : // signed negitive
assign OfIntRes = Signed ? Xs&~XNaNM ? Int64 ? {1'b1, {`XLEN-1{1'b0}}} : {{`XLEN-32{1'b1}}, 1'b1, {31{1'b0}}} : // signed negitive
Int64 ? {1'b0, {`XLEN-1{1'b1}}} : {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}} : // signed positive
XSgnM&~XNaNM ? {`XLEN{1'b0}} : // unsigned negitive
Xs&~XNaNM ? {`XLEN{1'b0}} : // unsigned negitive
{`XLEN{1'b1}};// unsigned positive
@ -256,6 +285,6 @@ module resultselect(
// - otherwise output a rounded 0
// - otherwise output the normal res (trmined and sign extended if nessisary)
assign FCvtIntResM = IntInvalid ? OfIntRes :
CvtCalcExpM[`NE] ? XSgnM&Signed&Plus1 ? {{`XLEN{1'b1}}} : {{`XLEN-1{1'b0}}, Plus1} : //CalcExp has to come after invalid ***swap to actual mux at some point??
CvtCalcExpM[`NE] ? Xs&Signed&Plus1 ? {{`XLEN{1'b1}}} : {{`XLEN-1{1'b0}}, Plus1} : //CalcExp has to come after invalid ***swap to actual mux at some point??
Int64 ? NegRes[`XLEN-1:0] : {{`XLEN-32{NegRes[31]}}, NegRes[31:0]};
endmodule

View File

@ -1,7 +1,35 @@
///////////////////////////////////////////
//
// Written: me@KatherineParry.com
// Modified: 7/5/2022
//
// Purpose: calculating the result's sign
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module resultsign(
input logic [2:0] FrmM,
input logic [2:0] Frm,
input logic PSgnM, ZSgnEffM,
input logic ZInfM,
input logic InfIn,
@ -25,7 +53,7 @@ module resultsign(
// if multiply then Psgn
// otherwise psign
assign Underflow = SumExp[`NE+1] | ((SumExp == 0) & (Round|Sticky));
assign ZeroSgn = (PSgnM^ZSgnEffM)&~Underflow&~Mult ? FrmM[1:0] == 2'b10 : PSgnM;
assign ZeroSgn = (PSgnM^ZSgnEffM)&~Underflow&~Mult ? Frm[1:0] == 2'b10 : PSgnM;
// is the result negitive

View File

@ -1,3 +1,31 @@
///////////////////////////////////////////
//
// Written: me@KatherineParry.com
// Modified: 7/5/2022
//
// Purpose: Rounder
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
// what position is XLEN in?
// options:
@ -9,17 +37,18 @@
module round(
input logic [`FMTBITS-1:0] OutFmt, // precision 1 = double 0 = single
input logic [2:0] FrmM, // rounding mode
input logic [2:0] Frm, // rounding mode
input logic FmaOp,
input logic DivOp,
input logic CvtOp,
input logic ToInt,
input logic DivDone,
input logic [1:0] PostProcSelM,
input logic CvtResDenormUfM,
input logic CvtResUf,
input logic [`CORRSHIFTSZ-1:0] CorrShifted,
input logic AddendStickyM, // addend's sticky bit
input logic ZZeroM, // is Z zero
input logic ZZero, // is Z zero
input logic InvZM, // invert Z
input logic [`NE+1:0] SumExp, // exponent of the normalized sum
input logic RoundSgn, // the result's sign
@ -227,13 +256,13 @@ module round(
// Deterimine if a small number was supposed to be subtrated
// - for FMA or if division has a negitive sticky bit
assign SubBySmallNum = ((AddendStickyM&FmaOp&~ZZeroM&InvZM) | (DivNegStickyM&DivOp)) & ~(NormSumSticky|UfRound);
assign UfSubBySmallNum = ((AddendStickyM&FmaOp&~ZZeroM&InvZM) | (DivNegStickyM&DivOp)) & ~NormSumSticky;
assign SubBySmallNum = ((AddendStickyM&FmaOp&~ZZero&InvZM) | (DivNegStickyM&DivOp)) & ~(NormSumSticky|UfRound);
assign UfSubBySmallNum = ((AddendStickyM&FmaOp&~ZZero&InvZM) | (DivNegStickyM&DivOp)) & ~NormSumSticky;
always_comb begin
// Determine if you add 1
case (FrmM)
case (Frm)
3'b000: CalcPlus1 = Round & ((Sticky| LSBRes)&~SubBySmallNum);//round to nearest even
3'b001: CalcPlus1 = 0;//round to zero
3'b010: CalcPlus1 = RoundSgn & ~(SubBySmallNum & ~Round);//round down
@ -242,7 +271,7 @@ module round(
default: CalcPlus1 = 1'bx;
endcase
// Determine if you add 1 (for underflow flag)
case (FrmM)
case (Frm)
3'b000: UfCalcPlus1 = UfRound & ((UfSticky| UfLSBRes)&~UfSubBySmallNum);//round to nearest even
3'b001: UfCalcPlus1 = 0;//round to zero
3'b010: UfCalcPlus1 = RoundSgn & ~(UfSubBySmallNum & ~UfRound);//round down
@ -251,7 +280,7 @@ module round(
default: UfCalcPlus1 = 1'bx;
endcase
// Determine if you subtract 1
case (FrmM)
case (Frm)
3'b000: CalcMinus1 = 0;//round to nearest even
3'b001: CalcMinus1 = SubBySmallNum & ~Round;//round to zero
3'b010: CalcMinus1 = ~RoundSgn & ~Round & SubBySmallNum;//round down
@ -309,8 +338,8 @@ module round(
case(PostProcSelM)
2'b10: RoundExp = SumExp; // fma
2'b00: RoundExp = {CvtCalcExpM[`NE], CvtCalcExpM}&{`NE+2{~CvtResDenormUfM|CvtResUf}}; // cvt
2'b01: RoundExp = CorrDivExp; // divide
default: RoundExp = 0;
2'b01: RoundExp = DivDone ? CorrDivExp : '0; // divide
default: RoundExp = '0;
endcase
// round the result

View File

@ -1,259 +0,0 @@
// The rounder takes as input logics a 64-bit value to be rounded, A, the
// exponent of the value to be rounded, the sign of the final result, Sign,
// the precision of the results, P, and the two-bit rounding mode, rm.
// It produces a rounded 52-bit result, Z, the exponent of the rounded
// result, Z_exp, and a flag that indicates if the result was rounded,
// Inexact. The rounding mode has the following values.
// rm Modee
// 00 round-to-nearest-even
// 01 round-toward-zero
// 10 round-toward-plus infinity
// 11 round-toward-minus infinity
// The rounding algorithm determines if '1' should be added to the
// truncated signficant result, based on three significant bits
// (least (L), round (R) and sticky (S)), the rounding mode (rm)
// and the sign of the final result (Sign). Visually, L and R appear as
// xxxxxL,Rxxxxxxx
// where , denotes the rounding boundary. S is the logical OR of all the
// bits to the right of R.
module rounder (
input logic [2:0] rm,
input logic P,
input logic OvEn,
input logic UnEn,
input logic exp_valid,
input logic [3:0] sel_inv,
input logic Invalid,
input logic DenormIn,
input logic Asign,
input logic [10:0] Aexp,
input logic [5:0] norm_shift,
input logic [63:0] A,
input logic [10:0] exponent_postsum,
input logic A_Norm,
input logic B_Norm,
input logic [11:0] exp_A_unmodified,
input logic [11:0] exp_B_unmodified,
input logic normal_overflow,
input logic normal_underflow,
input logic swap,
input logic [2:0] op_type,
input logic [63:0] sum,
output logic [63:0] Result,
output logic DenormIO,
output logic [4:0] Flags
);
wire Rsign;
wire Sticky_out;
wire [51:0] ShiftMant;
wire [63:0] ShiftMant_64;
wire [10:0] Rexp;
wire [10:0] Rexp_denorm;
wire [11:0] Texp; //Parallelized for denorm exponent
wire [11:0] Texp_addone; //results
wire [11:0] Texp_subone;
wire [51:0] Rmant;
wire [51:0] Tmant;
wire Rzero;
wire VSS = 1'b0;
wire VDD = 1'b1;
wire [51:0] B; // Value used to add the "ones"
wire [11:0] B_12_overflow; // Value used to add one to exponent
wire [11:0] B_12_underflow; // Value used to subtract one from exponent
wire S_SP; // Single precision sticky bit
wire S_DP; // Double precision sticky bit
wire S; // Actual sticky bit
wire R; // Round bit
wire L; // Least significant bit
wire add_one; // '1' if one should be added
wire UnFlow_SP, UnFlow_DP, UnderFlow;
wire OvFlow_SP, OvFlow_DP, OverFlow;
wire Inexact;
wire Round_zero;
wire Infinite;
wire VeryLarge;
wire Largest;
wire Adj_exp;
wire Valid;
wire NaN;
wire Cout;
wire Cout_overflow;
wire Texp_l7z;
wire Texp_l7o;
// Determine the sticky bits for double and single precision
assign S_DP= A[9]|A[8]|A[7]|A[6]|A[5]|A[4]|A[3]|A[2]|A[1]|A[0];
assign S_SP = S_DP |A[38]|A[37]|A[36]|A[35]|A[34]|A[33]|A[32]|A[31]|A[30]|
A[29]|A[28]|A[27]|A[26]|A[25]|A[24]|A[23]|A[22]|A[21]|A[20]|
A[19]|A[18]|A[17]|A[16]|A[15]|A[14]|A[13]|A[12]|A[11]|A[10];
// Set the least (L), round (R), and sticky (S) bits based on
// the precision.
assign {L, R, S} = P ? {A[40],A[39],S_SP} : {A[11],A[10],S_DP};
// Add one if ((the rounding mode is round-to-nearest) and (R is one) and
// (S or L is one)) or ((the rounding mode is towards plus or minus
// infinity (rm[1] = 1)) and (the sign and rm[0] are the same) and
// (R or S is one)).
assign add_one = ~rm[2] & ((~rm[1]&~rm[0]&R&(L|S)) | (rm[1]&(Asign^~rm[0])&(R|S))) | (rm[2] & R);
// Add one using a 52-bit adder. The one is added to the LSB B[0] for
// double precision or to B[29] for single precision.
// This could be simplified by using a specialized adder.
// The current adder is actually 64-bits. The leading one
// for normalized results in not included in the addition.
assign B = {{22{VSS}}, add_one&P, {28{VSS}}, add_one&~P};
assign B_12_overflow = {8'h0, 3'b0, normal_overflow};
assign B_12_underflow = {8'h0, 3'b0, normal_underflow};
cla52 add1(Tmant, Cout, A[62:11], B); //***adder
cla12 add1_exp(Texp_addone, Cout_overflow, Texp, B_12_overflow); //***adder
cla_sub12 sub1_exp(Texp_subone, Texp, B_12_underflow); //***adder
// Now that rounding is done, we compute the final exponent
// and test for special cases.
// Compute the value of the exponent by subtracting the shift
// value from the previous exponent and then adding 2 + cout.
// If needed this could be optimized to used a specialized
// adder.
assign Texp = DenormIn ? ({1'b0, exponent_postsum}) : ({VSS, Aexp} - {{6{VSS}}, norm_shift} +{{10{VSS}}, VDD, Cout});
// Overflow only occurs for double precision, if Texp[10] to Texp[0] are
// all ones. To encourage sharing with single precision overflow detection,
// the lower 7 bits are tested separately.
assign Texp_l7o = Texp[6]&Texp[5]&Texp[4]&Texp[3]&Texp[2]&Texp[1]&Texp[0];
assign OvFlow_DP = Texp[10]&Texp[9]&Texp[8]&Texp[7]&Texp_l7o;
// Overflow occurs for single precision if (Texp[10] is one) and
// ((Texp[9] or Texp[8] or Texp[7]) is one) or (Texp[6] to Texp[0]
// are all ones.
assign OvFlow_SP = Texp[10]&(Texp[9]|Texp[8]|Texp[7]|Texp_l7o);
// Underflow occurs for double precision if (Texp[11] is one) or Texp[10] to
// Texp[0] are all zeros.
assign Texp_l7z = ~Texp[6]&~Texp[5]&~Texp[4]&~Texp[3]&~Texp[2]&~Texp[1]&~Texp[0];
assign UnFlow_DP = Texp[11] | ~Texp[10]&~Texp[9]&~Texp[8]&~Texp[7]&Texp_l7z;
// Underflow occurs for single precision if (Texp[10] is zero) and
// (Texp[9] or Texp[8] or Texp[7]) is zero.
assign UnFlow_SP = (~Texp[10]&(~Texp[9]|~Texp[8]|~Texp[7]|Texp_l7z));
// Set the overflow and underflow flags. They should not be set if
// the input logic was infinite or NaN or the output logic of the adder is zero.
// 00 = Valid
// 10 = NaN
assign Valid = (~sel_inv[2]&~sel_inv[1]&~sel_inv[0]);
assign NaN = ~sel_inv[2]&~sel_inv[1]& sel_inv[0];
assign UnderFlow = ((P & UnFlow_SP | UnFlow_DP)&Valid&exp_valid) |
(~Aexp[10]&Aexp[9]&Aexp[8]&Aexp[7]&~Aexp[6]
&~Aexp[5]&~Aexp[4]&~Aexp[3]&~Aexp[2]
&~Aexp[1]&~Aexp[0]&sel_inv[3]);
assign OverFlow = (P & OvFlow_SP | OvFlow_DP)&Valid&~UnderFlow&exp_valid;
// The DenormIO is set if underflow has occurred or if their was a
// denormalized input logic.
assign DenormIO = DenormIn | UnderFlow;
// The final result is Inexact if any rounding occurred ((i.e., R or S
// is one), or (if the result overflows ) or (if the result underflows and the
// underflow trap is not enabled)) and (value of the result was not previous set
// by an exception case).
assign Inexact = (R|S|OverFlow|(UnderFlow&~UnEn))&Valid;
// Set the IEEE Exception Flags: Inexact, Underflow, Overflow, Div_By_0,
// Invlalid.
assign Flags = {UnderFlow, VSS, OverFlow, Invalid, Inexact};
// Determine the final result.
// The sign of the final result is one if the result is not zero and
// the sign of A is one, or if the result is zero and the the rounding
// mode is round-to-minus infinity. The final result is zero, if exp_valid
// is zero. If underflow occurs, then the result is set to zero.
//
// For Zero (goes equally for subtraction although
// signs may alter operands sign):
// -0 + -0 = -0 (always)
// +0 + +0 = +0 (always)
// -0 + +0 = +0 (for RN, RZ, RU)
// -0 + +0 = -0 (for RD)
assign Rzero = ~exp_valid | UnderFlow;
assign Rsign = DenormIn ?
( ~(op_type[1] | op_type[0]) ?
( (sum[63] & (A_Norm | B_Norm) & (exp_A_unmodified[11] ^ exp_B_unmodified[11])) ?
~Asign : Asign)
: ( ((A_Norm ^ B_Norm) & (exp_A_unmodified[11] ~^ exp_B_unmodified[11])) ?
(normal_underflow ? ~Asign : Asign) : Asign)
)
: ( ((Asign&exp_valid |
(sel_inv[2]&~sel_inv[1]&sel_inv[0]&rm[1]&rm[0] |
sel_inv[2]&sel_inv[1]&~sel_inv[0] |
~exp_valid&rm[1]&rm[0]&~sel_inv[2] |
UnderFlow&rm[1]&rm[0])) & ~sel_inv[3]) |
(Asign & sel_inv[3]) );
// The exponent of the final result is zero if the final result is
// zero or a denorm, all ones if the final result is NaN or Infinite
// or overflow occurred and the magnitude of the number is
// not rounded toward from zero, and all ones with an LSB of zero
// if overflow occurred and the magnitude of the number is
// rounded toward zero. If the result is single precision,
// Texp[7] shoud be inverted. When the Overflow trap is enabled (OvEn = 1)
// and overflow occurs and the operation is not conversion, bits 10 and 9 are
// inverted for double precision, and bits 7 and 6 are inverted for single precision.
assign Round_zero = ~rm[1]&rm[0] | ~Asign&rm[0] | Asign&rm[1]&~rm[0];
assign VeryLarge = OverFlow & ~OvEn;
assign Infinite = (VeryLarge & ~Round_zero) | (~sel_inv[2] & sel_inv[1]);
assign Largest = VeryLarge & Round_zero;
assign Adj_exp = OverFlow & OvEn;
assign Rexp[10:1] = ({10{~Valid}} |
{Texp[10]&~Adj_exp, Texp[9]&~Adj_exp, Texp[8],
(Texp[7]^P)&~(Adj_exp&P), Texp[6]&~(Adj_exp&P), Texp[5:1]} |
{10{VeryLarge}})&{10{~Rzero | NaN}};
assign Rexp[0] = ({~Valid} | Texp[0] | Infinite)&(~Rzero | NaN)&~Largest;
// The denormalized rounded exponent uses the overflow/underflow values
// computed in the fpadd component to round the exponent up or down
// Depending on the operation and the signs of the orignal operands,
// underflow may or may not be needed to round.
assign Rexp_denorm = DenormIn ?
((~op_type[1] & op_type[0]) ?
( ((A_Norm != B_Norm) & (exp_A_unmodified[11] == exp_B_unmodified[11])) ?
( (normal_overflow == normal_underflow) ? Texp[10:0] : (normal_overflow ? Texp_addone[10:0] : Texp_subone[10:0]) )
: ( normal_overflow ? Texp_addone[10:0] : Texp[10:0] ) )
: ( ((A_Norm != B_Norm) & (exp_A_unmodified[11] != exp_B_unmodified[11])) ?
( (normal_overflow == normal_underflow) ? Texp[10:0] : (normal_overflow ? Texp_addone[10:0] : Texp_subone[10:0]) )
: ( normal_overflow ? Texp_addone[10:0] : Texp[10:0] ) )
) :
Rexp; //KEP used to be all of exp_A_unmodified
// If the result is zero or infinity, the mantissa is all zeros.
// If the result is NaN, the mantissa is 10...0
// If the result the largest floating point number, the mantissa
// is all ones. Otherwise, the mantissa is not changed.
// If operation is denormalized, take the mantissa directly from
// its normalized value.
assign Rmant[51] = Largest | NaN | (Tmant[51]&~Infinite&~Rzero);
assign Rmant[50:0] = {51{Largest}} | (Tmant[50:0]&{51{~Infinite&Valid&~Rzero}});
assign ShiftMant = A[51:0];
// For single precision, the 8 least significant bits of the exponent
// and 23 most significant bits of the mantissa contain bits used
// for the final result. A double precision result is returned if
// overflow has occurred, the overflow trap is enabled, and a conversion
// is being performed.
assign Result = DenormIn ? {Rsign, Rexp_denorm, ShiftMant} : (P ? {{32{1'b1}}, Rsign, Rexp[7:0], Rmant[51:29]}
: {Rsign, Rexp, Rmant});
endmodule // rounder

View File

@ -1,212 +0,0 @@
///////////////////////////////////////////
//
// Written: James Stine
// Modified: 8/1/2018
//
// Purpose: Floating point divider/square root rounder unit (Goldschmidt)
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
module rounder_div (
input logic [1:0] rm,
input logic P,
input logic OvEn,
input logic UnEn,
input logic [12:0] exp_diff,
input logic [2:0] sel_inv,
input logic Invalid,
input logic SignR,
input logic [63:0] Float1,
input logic [63:0] Float2,
input logic XNaNQ,
input logic YNaNQ,
input logic XZeroQ,
input logic YZeroQ,
input logic XInfQ,
input logic YInfQ,
input logic op_type,
input logic [59:0] q1,
input logic [59:0] qm1,
input logic [59:0] qp1,
input logic [59:0] q0,
input logic [59:0] qm0,
input logic [59:0] qp0,
input logic [119:0] regr_out,
output logic [63:0] Result,
output logic [4:0] Flags
);
logic Rsign;
logic [10:0] Rexp;
logic [12:0] Texp;
logic [51:0] Rmant;
logic [59:0] Tmant;
logic [51:0] Smant;
logic Rzero;
logic Gdp, Gsp, G;
logic UnFlow_SP, UnFlow_DP, UnderFlow;
logic OvFlow_SP, OvFlow_DP, OverFlow;
logic Inexact;
logic Round_zero;
logic Infinite;
logic VeryLarge;
logic Largest;
logic Div0;
logic Adj_exp;
logic Valid;
logic NaN;
logic Texp_l7z;
logic Texp_l7o;
logic OvCon;
logic zero_rem;
logic [1:0] mux_mant;
logic sign_rem;
logic [59:0] q, qm, qp;
logic exp_ovf;
logic [50:0] NaN_out;
logic NaN_Sign_out;
logic Sign_out;
// Remainder = 0?
assign zero_rem = ~(|regr_out);
// Remainder Sign
assign sign_rem = ~regr_out[119];
// choose correct Guard bit [1,2) or [0,1)
assign Gdp = q1[59] ? q1[6] : q0[6];
assign Gsp = q1[59] ? q1[35] : q0[35];
assign G = P ? Gsp : Gdp;
// Selection of Rounding (from logic/switching)
assign mux_mant[1] = (SignR&rm[1]&rm[0]&G) | (!SignR&rm[1]&!rm[0]&G) |
(!rm[1]&!rm[0]&G&!sign_rem) |
(SignR&rm[1]&rm[0]&!zero_rem&!sign_rem) |
(!SignR&rm[1]&!rm[0]&!zero_rem&!sign_rem);
assign mux_mant[0] = (!SignR&rm[0]&!G&!zero_rem&sign_rem) |
(!rm[1]&rm[0]&!G&!zero_rem&sign_rem) |
(SignR&rm[1]&!rm[0]&!G&!zero_rem&sign_rem);
// Which Q?
mux2 #(60) mx1 (q0, q1, q1[59], q);
mux2 #(60) mx2 (qm0, qm1, q1[59], qm);
mux2 #(60) mx3 (qp0, qp1, q1[59], qp);
// Choose Q, Q+1, Q-1
mux3 #(60) mx4 (q, qm, qp, mux_mant, Tmant);
assign Smant = Tmant[58:7];
// Compute the value of the exponent
// exponent is modified if we choose:
// 1.) we choose any qm0, qp0, q0 (since we shift mant)
// 2.) we choose qp and we overflow (for RU)
assign exp_ovf = |{qp[58:36], (qp[35:7] & {29{~P}})};
assign Texp = exp_diff - {{12{1'b0}}, ~q1[59]} + {{12{1'b0}}, mux_mant[1]&qp1[59]&~exp_ovf};
// Overflow only occurs for double precision, if Texp[10] to Texp[0] are
// all ones. To encourage sharing with single precision overflow detection,
// the lower 7 bits are tested separately.
assign Texp_l7o = Texp[6]&Texp[5]&Texp[4]&Texp[3]&Texp[2]&Texp[1]&Texp[0];
assign OvFlow_DP = (~Texp[12]&Texp[11]) | (Texp[10]&Texp[9]&Texp[8]&Texp[7]&Texp_l7o);
// Overflow occurs for single precision if (Texp[10] is one) and
// ((Texp[9] or Texp[8] or Texp[7]) is one) or (Texp[6] to Texp[0]
// are all ones.
assign OvFlow_SP = Texp[10]&(Texp[9]|Texp[8]|Texp[7]|Texp_l7o);
// Underflow occurs for double precision if (Texp[11]/Texp[10] is one) or
// Texp[10] to Texp[0] are all zeros.
assign Texp_l7z = ~Texp[6]&~Texp[5]&~Texp[4]&~Texp[3]&~Texp[2]&~Texp[1]&~Texp[0];
assign UnFlow_DP = (Texp[12]&Texp[11]) | ~Texp[11]&~Texp[10]&~Texp[9]&~Texp[8]&~Texp[7]&Texp_l7z;
// Underflow occurs for single precision if (Texp[10] is zero) and
// (Texp[9] or Texp[8] or Texp[7]) is zero.
assign UnFlow_SP = ~Texp[10]&(~Texp[9]|~Texp[8]|~Texp[7]|Texp_l7z);
// Set the overflow and underflow flags. They should not be set if
// the input was infinite or NaN or the output of the adder is zero.
// 00 = Valid
// 10 = NaN
assign Valid = ~sel_inv[2]&~sel_inv[1]&~sel_inv[0];
assign NaN = sel_inv[2]&sel_inv[1]&sel_inv[0];
assign UnderFlow = (P & UnFlow_SP | UnFlow_DP) & Valid;
assign OverFlow = (P & OvFlow_SP | OvFlow_DP) & Valid;
assign Div0 = YZeroQ&~XZeroQ&~op_type&~NaN;
// The final result is Inexact if any rounding occurred ((i.e., R or S
// is one), or (if the result overflows ) or (if the result underflows and the
// underflow trap is not enabled)) and (value of the result was not previous set
// by an exception case).
assign Inexact = (G|~zero_rem|OverFlow|(UnderFlow&~UnEn))&Valid;
// Set the IEEE Exception Flags: Inexact, Underflow, Overflow, Div_By_0,
// Invlalid.
assign Flags = {Inexact, UnderFlow, OverFlow, Div0, Invalid};
// Determine sign
assign Rzero = UnderFlow | (~sel_inv[2]&sel_inv[1]&sel_inv[0]);
assign Rsign = SignR;
// The exponent of the final result is zero if the final result is
// zero or a denorm, all ones if the final result is NaN or Infinite
// or overflow occurred and the magnitude of the number is
// not rounded toward from zero, and all ones with an LSB of zero
// if overflow occurred and the magnitude of the number is
// rounded toward zero. If the result is single precision,
// Texp[7] shoud be inverted. When the Overflow trap is enabled (OvEn = 1)
// and overflow occurs and the operation is not conversion, bits 10 and 9 are
// inverted for double precision, and bits 7 and 6 are inverted for single precision.
assign Round_zero = ~rm[1]&rm[0] | ~SignR&rm[0] | SignR&rm[1]&~rm[0];
assign VeryLarge = OverFlow & ~OvEn;
assign Infinite = (VeryLarge & ~Round_zero) | sel_inv[1];
assign Largest = VeryLarge & Round_zero;
assign Adj_exp = OverFlow & OvEn;
assign Rexp[10:1] = ({10{~Valid}} |
{Texp[10]&~Adj_exp, Texp[9]&~Adj_exp, Texp[8],
(Texp[7]^P)&~(Adj_exp&P), Texp[6]&~(Adj_exp&P), Texp[5:1]} |
{10{VeryLarge}})&{10{~Rzero | NaN}};
assign Rexp[0] = ({~Valid} | Texp[0] | Infinite)&(~Rzero | NaN)&~Largest;
// If the result is zero or infinity, the mantissa is all zeros.
// If the result is NaN, the mantissa is 10...0
// If the result the largest floating point number, the mantissa
// is all ones. Otherwise, the mantissa is not changed.
assign NaN_out = ~XNaNQ&YNaNQ ? Float2[50:0] : Float1[50:0];
assign NaN_Sign_out = ~XNaNQ&YNaNQ ? Float2[63] : Float1[63];
assign Sign_out = (XZeroQ&YZeroQ | XInfQ&YInfQ)&~op_type | Rsign&~XNaNQ&~YNaNQ |
NaN_Sign_out&(XNaNQ|YNaNQ);
// FIXME (jes) - Imperas gives sNaN a Sign=0 where x86 gives Sign=1
// | Float1[63]&op_type; (logic to fix this but removed for now)
assign Rmant[51] = Largest | NaN | (Smant[51]&~Infinite&~Rzero);
assign Rmant[50:0] = ({51{Largest}} | (Smant[50:0]&{51{~Infinite&Valid&~Rzero}}) |
(NaN_out&{51{NaN}}))&({51{~(op_type&Float1[63]&~XZeroQ)}});
// For single precision, the 8 least significant bits of the exponent
// and 23 most significant bits of the mantissa contain bits used
// for the final result. A double precision result is returned if
// overflow has occurred, the overflow trap is enabled, and a conversion
// is being performed.
assign OvCon = OverFlow & OvEn;
assign Result = (P&~OvCon) ? { {32{1'b1}}, Sign_out, Rexp[7:0], Rmant[51:29]}
: {Sign_out, Rexp, Rmant};
endmodule // rounder

View File

@ -1,10 +1,38 @@
///////////////////////////////////////////
//
// Written: me@KatherineParry.com
// Modified: 7/5/2022
//
// Purpose: Sign calculation ofr rounding
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module roundsign(
input logic PSgnM, ZSgnEffM,
input logic InvZM,
input logic XSgnM,
input logic YSgnM,
input logic Xs,
input logic Ys,
input logic NegSumM,
input logic FmaOp,
input logic DivOp,
@ -24,7 +52,7 @@ module roundsign(
// assign FmaResSgnTmp = InvZM&(ZSgnEffM)&NegSumM | InvZM&PSgnM&~NegSumM | (ZSgnEffM&PSgnM);
assign DivSgn = XSgnM^YSgnM;
assign DivSgn = Xs^Ys;
// Sign for rounding calulation
assign RoundSgn = (FmaResSgnTmp&FmaOp) | (CvtResSgnM&CvtOp) | (DivSgn&DivOp);

View File

@ -1,170 +0,0 @@
///////////////////////////////////////////
//
// Written: James Stine
// Modified: 8/1/2018
//
// Purpose: Bipartite Lookup
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
module sbtm_a0 (input logic [6:0] a,
output logic [12:0] y);
always_comb
case(a)
7'b0000000: y = 13'b1111111100010;
7'b0000001: y = 13'b1111110100011;
7'b0000010: y = 13'b1111101100101;
7'b0000011: y = 13'b1111100101000;
7'b0000100: y = 13'b1111011101100;
7'b0000101: y = 13'b1111010110000;
7'b0000110: y = 13'b1111001110110;
7'b0000111: y = 13'b1111000111100;
7'b0001000: y = 13'b1111000000100;
7'b0001001: y = 13'b1110111001100;
7'b0001010: y = 13'b1110110010101;
7'b0001011: y = 13'b1110101011110;
7'b0001100: y = 13'b1110100101001;
7'b0001101: y = 13'b1110011110100;
7'b0001110: y = 13'b1110011000000;
7'b0001111: y = 13'b1110010001101;
7'b0010000: y = 13'b1110001011010;
7'b0010001: y = 13'b1110000101000;
7'b0010010: y = 13'b1101111110111;
7'b0010011: y = 13'b1101111000110;
7'b0010100: y = 13'b1101110010111;
7'b0010101: y = 13'b1101101100111;
7'b0010110: y = 13'b1101100111001;
7'b0010111: y = 13'b1101100001011;
7'b0011000: y = 13'b1101011011101;
7'b0011001: y = 13'b1101010110001;
7'b0011010: y = 13'b1101010000100;
7'b0011011: y = 13'b1101001011001;
7'b0011100: y = 13'b1101000101110;
7'b0011101: y = 13'b1101000000011;
7'b0011110: y = 13'b1100111011001;
7'b0011111: y = 13'b1100110101111;
7'b0100000: y = 13'b1100110000110;
7'b0100001: y = 13'b1100101011110;
7'b0100010: y = 13'b1100100110110;
7'b0100011: y = 13'b1100100001111;
7'b0100100: y = 13'b1100011101000;
7'b0100101: y = 13'b1100011000001;
7'b0100110: y = 13'b1100010011011;
7'b0100111: y = 13'b1100001110101;
7'b0101000: y = 13'b1100001010000;
7'b0101001: y = 13'b1100000101011;
7'b0101010: y = 13'b1100000000111;
7'b0101011: y = 13'b1011111100011;
7'b0101100: y = 13'b1011111000000;
7'b0101101: y = 13'b1011110011101;
7'b0101110: y = 13'b1011101111010;
7'b0101111: y = 13'b1011101011000;
7'b0110000: y = 13'b1011100110110;
7'b0110001: y = 13'b1011100010101;
7'b0110010: y = 13'b1011011110011;
7'b0110011: y = 13'b1011011010011;
7'b0110100: y = 13'b1011010110010;
7'b0110101: y = 13'b1011010010010;
7'b0110110: y = 13'b1011001110011;
7'b0110111: y = 13'b1011001010011;
7'b0111000: y = 13'b1011000110100;
7'b0111001: y = 13'b1011000010110;
7'b0111010: y = 13'b1010111110111;
7'b0111011: y = 13'b1010111011001;
7'b0111100: y = 13'b1010110111100;
7'b0111101: y = 13'b1010110011110;
7'b0111110: y = 13'b1010110000001;
7'b0111111: y = 13'b1010101100100;
7'b1000000: y = 13'b1010101001000;
7'b1000001: y = 13'b1010100101100;
7'b1000010: y = 13'b1010100010000;
7'b1000011: y = 13'b1010011110100;
7'b1000100: y = 13'b1010011011001;
7'b1000101: y = 13'b1010010111110;
7'b1000110: y = 13'b1010010100011;
7'b1000111: y = 13'b1010010001001;
7'b1001000: y = 13'b1010001101111;
7'b1001001: y = 13'b1010001010101;
7'b1001010: y = 13'b1010000111011;
7'b1001011: y = 13'b1010000100001;
7'b1001100: y = 13'b1010000001000;
7'b1001101: y = 13'b1001111101111;
7'b1001110: y = 13'b1001111010111;
7'b1001111: y = 13'b1001110111110;
7'b1010000: y = 13'b1001110100110;
7'b1010001: y = 13'b1001110001110;
7'b1010010: y = 13'b1001101110110;
7'b1010011: y = 13'b1001101011111;
7'b1010100: y = 13'b1001101000111;
7'b1010101: y = 13'b1001100110000;
7'b1010110: y = 13'b1001100011001;
7'b1010111: y = 13'b1001100000010;
7'b1011000: y = 13'b1001011101100;
7'b1011001: y = 13'b1001011010110;
7'b1011010: y = 13'b1001011000000;
7'b1011011: y = 13'b1001010101010;
7'b1011100: y = 13'b1001010010100;
7'b1011101: y = 13'b1001001111111;
7'b1011110: y = 13'b1001001101001;
7'b1011111: y = 13'b1001001010100;
7'b1100000: y = 13'b1001000111111;
7'b1100001: y = 13'b1001000101011;
7'b1100010: y = 13'b1001000010110;
7'b1100011: y = 13'b1001000000010;
7'b1100100: y = 13'b1000111101110;
7'b1100101: y = 13'b1000111011010;
7'b1100110: y = 13'b1000111000110;
7'b1100111: y = 13'b1000110110010;
7'b1101000: y = 13'b1000110011111;
7'b1101001: y = 13'b1000110001011;
7'b1101010: y = 13'b1000101111000;
7'b1101011: y = 13'b1000101100101;
7'b1101100: y = 13'b1000101010010;
7'b1101101: y = 13'b1000101000000;
7'b1101110: y = 13'b1000100101101;
7'b1101111: y = 13'b1000100011011;
7'b1110000: y = 13'b1000100001001;
7'b1110001: y = 13'b1000011110110;
7'b1110010: y = 13'b1000011100101;
7'b1110011: y = 13'b1000011010011;
7'b1110100: y = 13'b1000011000001;
7'b1110101: y = 13'b1000010110000;
7'b1110110: y = 13'b1000010011110;
7'b1110111: y = 13'b1000010001101;
7'b1111000: y = 13'b1000001111100;
7'b1111001: y = 13'b1000001101011;
7'b1111010: y = 13'b1000001011010;
7'b1111011: y = 13'b1000001001010;
7'b1111100: y = 13'b1000000111001;
7'b1111101: y = 13'b1000000101001;
7'b1111110: y = 13'b1000000011001;
7'b1111111: y = 13'b1000000001001;
default: y = 13'bxxxxxxxxxxxxx;
endcase // case (a)
endmodule // sbtm_a0

View File

@ -1,170 +0,0 @@
///////////////////////////////////////////
//
// Written: James Stine
// Modified: 8/1/2018
//
// Purpose: Bipartite Lookup
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
module sbtm_a1 (input logic [6:0] a,
output logic [4:0] y);
always_comb
case(a)
7'b0000000: y = 5'b11100;
7'b0000001: y = 5'b11000;
7'b0000010: y = 5'b10100;
7'b0000011: y = 5'b10000;
7'b0000100: y = 5'b01101;
7'b0000101: y = 5'b01001;
7'b0000110: y = 5'b00101;
7'b0000111: y = 5'b00001;
7'b0001000: y = 5'b11001;
7'b0001001: y = 5'b10101;
7'b0001010: y = 5'b10010;
7'b0001011: y = 5'b01111;
7'b0001100: y = 5'b01011;
7'b0001101: y = 5'b01000;
7'b0001110: y = 5'b00101;
7'b0001111: y = 5'b00001;
7'b0010000: y = 5'b10110;
7'b0010001: y = 5'b10011;
7'b0010010: y = 5'b10000;
7'b0010011: y = 5'b01101;
7'b0010100: y = 5'b01010;
7'b0010101: y = 5'b00111;
7'b0010110: y = 5'b00100;
7'b0010111: y = 5'b00001;
7'b0011000: y = 5'b10100;
7'b0011001: y = 5'b10001;
7'b0011010: y = 5'b01110;
7'b0011011: y = 5'b01100;
7'b0011100: y = 5'b01001;
7'b0011101: y = 5'b00110;
7'b0011110: y = 5'b00100;
7'b0011111: y = 5'b00001;
7'b0100000: y = 5'b10010;
7'b0100001: y = 5'b01111;
7'b0100010: y = 5'b01101;
7'b0100011: y = 5'b01010;
7'b0100100: y = 5'b01000;
7'b0100101: y = 5'b00110;
7'b0100110: y = 5'b00011;
7'b0100111: y = 5'b00001;
7'b0101000: y = 5'b10000;
7'b0101001: y = 5'b01110;
7'b0101010: y = 5'b01100;
7'b0101011: y = 5'b01001;
7'b0101100: y = 5'b00111;
7'b0101101: y = 5'b00101;
7'b0101110: y = 5'b00011;
7'b0101111: y = 5'b00001;
7'b0110000: y = 5'b01111;
7'b0110001: y = 5'b01101;
7'b0110010: y = 5'b01011;
7'b0110011: y = 5'b01001;
7'b0110100: y = 5'b00111;
7'b0110101: y = 5'b00101;
7'b0110110: y = 5'b00011;
7'b0110111: y = 5'b00001;
7'b0111000: y = 5'b01101;
7'b0111001: y = 5'b01100;
7'b0111010: y = 5'b01010;
7'b0111011: y = 5'b01000;
7'b0111100: y = 5'b00110;
7'b0111101: y = 5'b00100;
7'b0111110: y = 5'b00010;
7'b0111111: y = 5'b00000;
7'b1000000: y = 5'b01100;
7'b1000001: y = 5'b01011;
7'b1000010: y = 5'b01001;
7'b1000011: y = 5'b00111;
7'b1000100: y = 5'b00101;
7'b1000101: y = 5'b00100;
7'b1000110: y = 5'b00010;
7'b1000111: y = 5'b00000;
7'b1001000: y = 5'b01011;
7'b1001001: y = 5'b01010;
7'b1001010: y = 5'b01000;
7'b1001011: y = 5'b00111;
7'b1001100: y = 5'b00101;
7'b1001101: y = 5'b00011;
7'b1001110: y = 5'b00010;
7'b1001111: y = 5'b00000;
7'b1010000: y = 5'b01010;
7'b1010001: y = 5'b01001;
7'b1010010: y = 5'b01000;
7'b1010011: y = 5'b00110;
7'b1010100: y = 5'b00101;
7'b1010101: y = 5'b00011;
7'b1010110: y = 5'b00010;
7'b1010111: y = 5'b00000;
7'b1011000: y = 5'b01010;
7'b1011001: y = 5'b01000;
7'b1011010: y = 5'b00111;
7'b1011011: y = 5'b00110;
7'b1011100: y = 5'b00100;
7'b1011101: y = 5'b00011;
7'b1011110: y = 5'b00010;
7'b1011111: y = 5'b00000;
7'b1100000: y = 5'b01001;
7'b1100001: y = 5'b01000;
7'b1100010: y = 5'b00110;
7'b1100011: y = 5'b00101;
7'b1100100: y = 5'b00100;
7'b1100101: y = 5'b00011;
7'b1100110: y = 5'b00001;
7'b1100111: y = 5'b00000;
7'b1101000: y = 5'b01000;
7'b1101001: y = 5'b00111;
7'b1101010: y = 5'b00110;
7'b1101011: y = 5'b00101;
7'b1101100: y = 5'b00100;
7'b1101101: y = 5'b00010;
7'b1101110: y = 5'b00001;
7'b1101111: y = 5'b00000;
7'b1110000: y = 5'b01000;
7'b1110001: y = 5'b00111;
7'b1110010: y = 5'b00110;
7'b1110011: y = 5'b00100;
7'b1110100: y = 5'b00011;
7'b1110101: y = 5'b00010;
7'b1110110: y = 5'b00001;
7'b1110111: y = 5'b00000;
7'b1111000: y = 5'b00111;
7'b1111001: y = 5'b00110;
7'b1111010: y = 5'b00101;
7'b1111011: y = 5'b00100;
7'b1111100: y = 5'b00011;
7'b1111101: y = 5'b00010;
7'b1111110: y = 5'b00001;
7'b1111111: y = 5'b00000;
default: y = 5'bxxxxx;
endcase // case (a)
endmodule // sbtm_a0

View File

@ -1,234 +0,0 @@
///////////////////////////////////////////
//
// Written: James Stine
// Modified: 8/1/2018
//
// Purpose: Bipartite Lookup
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
module sbtm_a2 (input logic [7:0] a,
output logic [13:0] y);
always_comb
case(a)
8'b01000000: y = 14'b10110100010111;
8'b01000001: y = 14'b10110010111111;
8'b01000010: y = 14'b10110001101000;
8'b01000011: y = 14'b10110000010011;
8'b01000100: y = 14'b10101111000001;
8'b01000101: y = 14'b10101101110000;
8'b01000110: y = 14'b10101100100001;
8'b01000111: y = 14'b10101011010011;
8'b01001000: y = 14'b10101010000111;
8'b01001001: y = 14'b10101000111101;
8'b01001010: y = 14'b10100111110100;
8'b01001011: y = 14'b10100110101101;
8'b01001100: y = 14'b10100101100111;
8'b01001101: y = 14'b10100100100010;
8'b01001110: y = 14'b10100011011111;
8'b01001111: y = 14'b10100010011101;
8'b01010000: y = 14'b10100001011100;
8'b01010001: y = 14'b10100000011100;
8'b01010010: y = 14'b10011111011110;
8'b01010011: y = 14'b10011110100001;
8'b01010100: y = 14'b10011101100100;
8'b01010101: y = 14'b10011100101001;
8'b01010110: y = 14'b10011011101111;
8'b01010111: y = 14'b10011010110110;
8'b01011000: y = 14'b10011001111110;
8'b01011001: y = 14'b10011001000110;
8'b01011010: y = 14'b10011000010000;
8'b01011011: y = 14'b10010111011011;
8'b01011100: y = 14'b10010110100110;
8'b01011101: y = 14'b10010101110011;
8'b01011110: y = 14'b10010101000000;
8'b01011111: y = 14'b10010100001110;
8'b01100000: y = 14'b10010011011100;
8'b01100001: y = 14'b10010010101100;
8'b01100010: y = 14'b10010001111100;
8'b01100011: y = 14'b10010001001101;
8'b01100100: y = 14'b10010000011111;
8'b01100101: y = 14'b10001111110001;
8'b01100110: y = 14'b10001111000100;
8'b01100111: y = 14'b10001110011000;
8'b01101000: y = 14'b10001101101100;
8'b01101001: y = 14'b10001101000001;
8'b01101010: y = 14'b10001100010110;
8'b01101011: y = 14'b10001011101100;
8'b01101100: y = 14'b10001011000011;
8'b01101101: y = 14'b10001010011010;
8'b01101110: y = 14'b10001001110010;
8'b01101111: y = 14'b10001001001010;
8'b01110000: y = 14'b10001000100011;
8'b01110001: y = 14'b10000111111101;
8'b01110010: y = 14'b10000111010111;
8'b01110011: y = 14'b10000110110001;
8'b01110100: y = 14'b10000110001100;
8'b01110101: y = 14'b10000101100111;
8'b01110110: y = 14'b10000101000011;
8'b01110111: y = 14'b10000100011111;
8'b01111000: y = 14'b10000011111100;
8'b01111001: y = 14'b10000011011001;
8'b01111010: y = 14'b10000010110111;
8'b01111011: y = 14'b10000010010101;
8'b01111100: y = 14'b10000001110011;
8'b01111101: y = 14'b10000001010010;
8'b01111110: y = 14'b10000000110001;
8'b01111111: y = 14'b10000000010001;
8'b10000000: y = 14'b01111111110001;
8'b10000001: y = 14'b01111111010001;
8'b10000010: y = 14'b01111110110010;
8'b10000011: y = 14'b01111110010011;
8'b10000100: y = 14'b01111101110101;
8'b10000101: y = 14'b01111101010110;
8'b10000110: y = 14'b01111100111001;
8'b10000111: y = 14'b01111100011011;
8'b10001000: y = 14'b01111011111110;
8'b10001001: y = 14'b01111011100001;
8'b10001010: y = 14'b01111011000100;
8'b10001011: y = 14'b01111010101000;
8'b10001100: y = 14'b01111010001100;
8'b10001101: y = 14'b01111001110000;
8'b10001110: y = 14'b01111001010101;
8'b10001111: y = 14'b01111000111010;
8'b10010000: y = 14'b01111000011111;
8'b10010001: y = 14'b01111000000100;
8'b10010010: y = 14'b01110111101010;
8'b10010011: y = 14'b01110111010000;
8'b10010100: y = 14'b01110110110110;
8'b10010101: y = 14'b01110110011101;
8'b10010110: y = 14'b01110110000100;
8'b10010111: y = 14'b01110101101011;
8'b10011000: y = 14'b01110101010010;
8'b10011001: y = 14'b01110100111001;
8'b10011010: y = 14'b01110100100001;
8'b10011011: y = 14'b01110100001001;
8'b10011100: y = 14'b01110011110001;
8'b10011101: y = 14'b01110011011010;
8'b10011110: y = 14'b01110011000010;
8'b10011111: y = 14'b01110010101011;
8'b10100000: y = 14'b01110010010100;
8'b10100001: y = 14'b01110001111110;
8'b10100010: y = 14'b01110001100111;
8'b10100011: y = 14'b01110001010001;
8'b10100100: y = 14'b01110000111011;
8'b10100101: y = 14'b01110000100101;
8'b10100110: y = 14'b01110000001111;
8'b10100111: y = 14'b01101111111010;
8'b10101000: y = 14'b01101111100101;
8'b10101001: y = 14'b01101111010000;
8'b10101010: y = 14'b01101110111011;
8'b10101011: y = 14'b01101110100110;
8'b10101100: y = 14'b01101110010001;
8'b10101101: y = 14'b01101101111101;
8'b10101110: y = 14'b01101101101001;
8'b10101111: y = 14'b01101101010101;
8'b10110000: y = 14'b01101101000001;
8'b10110001: y = 14'b01101100101101;
8'b10110010: y = 14'b01101100011010;
8'b10110011: y = 14'b01101100000110;
8'b10110100: y = 14'b01101011110011;
8'b10110101: y = 14'b01101011100000;
8'b10110110: y = 14'b01101011001101;
8'b10110111: y = 14'b01101010111010;
8'b10111000: y = 14'b01101010101000;
8'b10111001: y = 14'b01101010010101;
8'b10111010: y = 14'b01101010000011;
8'b10111011: y = 14'b01101001110001;
8'b10111100: y = 14'b01101001011111;
8'b10111101: y = 14'b01101001001101;
8'b10111110: y = 14'b01101000111100;
8'b10111111: y = 14'b01101000101010;
8'b11000000: y = 14'b01101000011001;
8'b11000001: y = 14'b01101000000111;
8'b11000010: y = 14'b01100111110110;
8'b11000011: y = 14'b01100111100101;
8'b11000100: y = 14'b01100111010100;
8'b11000101: y = 14'b01100111000011;
8'b11000110: y = 14'b01100110110011;
8'b11000111: y = 14'b01100110100010;
8'b11001000: y = 14'b01100110010010;
8'b11001001: y = 14'b01100110000010;
8'b11001010: y = 14'b01100101110010;
8'b11001011: y = 14'b01100101100001;
8'b11001100: y = 14'b01100101010010;
8'b11001101: y = 14'b01100101000010;
8'b11001110: y = 14'b01100100110010;
8'b11001111: y = 14'b01100100100011;
8'b11010000: y = 14'b01100100010011;
8'b11010001: y = 14'b01100100000100;
8'b11010010: y = 14'b01100011110101;
8'b11010011: y = 14'b01100011100101;
8'b11010100: y = 14'b01100011010110;
8'b11010101: y = 14'b01100011000111;
8'b11010110: y = 14'b01100010111001;
8'b11010111: y = 14'b01100010101010;
8'b11011000: y = 14'b01100010011011;
8'b11011001: y = 14'b01100010001101;
8'b11011010: y = 14'b01100001111110;
8'b11011011: y = 14'b01100001110000;
8'b11011100: y = 14'b01100001100010;
8'b11011101: y = 14'b01100001010100;
8'b11011110: y = 14'b01100001000110;
8'b11011111: y = 14'b01100000111000;
8'b11100000: y = 14'b01100000101010;
8'b11100001: y = 14'b01100000011100;
8'b11100010: y = 14'b01100000001111;
8'b11100011: y = 14'b01100000000001;
8'b11100100: y = 14'b01011111110100;
8'b11100101: y = 14'b01011111100110;
8'b11100110: y = 14'b01011111011001;
8'b11100111: y = 14'b01011111001100;
8'b11101000: y = 14'b01011110111111;
8'b11101001: y = 14'b01011110110010;
8'b11101010: y = 14'b01011110100101;
8'b11101011: y = 14'b01011110011000;
8'b11101100: y = 14'b01011110001011;
8'b11101101: y = 14'b01011101111110;
8'b11101110: y = 14'b01011101110010;
8'b11101111: y = 14'b01011101100101;
8'b11110000: y = 14'b01011101011001;
8'b11110001: y = 14'b01011101001100;
8'b11110010: y = 14'b01011101000000;
8'b11110011: y = 14'b01011100110100;
8'b11110100: y = 14'b01011100101000;
8'b11110101: y = 14'b01011100011100;
8'b11110110: y = 14'b01011100010000;
8'b11110111: y = 14'b01011100000100;
8'b11111000: y = 14'b01011011111000;
8'b11111001: y = 14'b01011011101100;
8'b11111010: y = 14'b01011011100000;
8'b11111011: y = 14'b01011011010101;
8'b11111100: y = 14'b01011011001001;
8'b11111101: y = 14'b01011010111101;
8'b11111110: y = 14'b01011010110010;
8'b11111111: y = 14'b01011010100111;
default: y = 14'bxxxxxxxxxxxxxx;
endcase // case (a)
endmodule // sbtm_a0

View File

@ -1,230 +0,0 @@
///////////////////////////////////////////
//
// Written: James Stine
// Modified: 8/1/2018
//
// Purpose: Bipartite Lookup
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
module sbtm_a3 (input logic [7:0] a,
output logic [5:0] y);
always_comb
case(a)
8'b01000000: y = 6'b100110;
8'b01000001: y = 6'b100001;
8'b01000010: y = 6'b011100;
8'b01000011: y = 6'b010111;
8'b01000100: y = 6'b010010;
8'b01000101: y = 6'b001100;
8'b01000110: y = 6'b000111;
8'b01000111: y = 6'b000010;
8'b01001000: y = 6'b100000;
8'b01001001: y = 6'b011100;
8'b01001010: y = 6'b011000;
8'b01001011: y = 6'b010011;
8'b01001100: y = 6'b001111;
8'b01001101: y = 6'b001010;
8'b01001110: y = 6'b000110;
8'b01001111: y = 6'b000010;
8'b01010000: y = 6'b011100;
8'b01010001: y = 6'b011000;
8'b01010010: y = 6'b010100;
8'b01010011: y = 6'b010000;
8'b01010100: y = 6'b001101;
8'b01010101: y = 6'b001001;
8'b01010110: y = 6'b000101;
8'b01010111: y = 6'b000001;
8'b01011000: y = 6'b011000;
8'b01011001: y = 6'b010101;
8'b01011010: y = 6'b010010;
8'b01011011: y = 6'b001110;
8'b01011100: y = 6'b001011;
8'b01011101: y = 6'b001000;
8'b01011110: y = 6'b000100;
8'b01011111: y = 6'b000001;
8'b01100000: y = 6'b010101;
8'b01100001: y = 6'b010010;
8'b01100010: y = 6'b001111;
8'b01100011: y = 6'b001101;
8'b01100100: y = 6'b001010;
8'b01100101: y = 6'b000111;
8'b01100110: y = 6'b000100;
8'b01100111: y = 6'b000001;
8'b01101000: y = 6'b010011;
8'b01101001: y = 6'b010000;
8'b01101010: y = 6'b001110;
8'b01101011: y = 6'b001011;
8'b01101100: y = 6'b001001;
8'b01101101: y = 6'b000110;
8'b01101110: y = 6'b000011;
8'b01101111: y = 6'b000001;
8'b01110000: y = 6'b010001;
8'b01110001: y = 6'b001111;
8'b01110010: y = 6'b001100;
8'b01110011: y = 6'b001010;
8'b01110100: y = 6'b001000;
8'b01110101: y = 6'b000101;
8'b01110110: y = 6'b000011;
8'b01110111: y = 6'b000001;
8'b01111000: y = 6'b001111;
8'b01111001: y = 6'b001101;
8'b01111010: y = 6'b001011;
8'b01111011: y = 6'b001001;
8'b01111100: y = 6'b000111;
8'b01111101: y = 6'b000101;
8'b01111110: y = 6'b000011;
8'b01111111: y = 6'b000001;
8'b10000000: y = 6'b001110;
8'b10000001: y = 6'b001100;
8'b10000010: y = 6'b001010;
8'b10000011: y = 6'b001000;
8'b10000100: y = 6'b000110;
8'b10000101: y = 6'b000100;
8'b10000110: y = 6'b000010;
8'b10000111: y = 6'b000000;
8'b10001000: y = 6'b001101;
8'b10001001: y = 6'b001011;
8'b10001010: y = 6'b001001;
8'b10001011: y = 6'b000111;
8'b10001100: y = 6'b000110;
8'b10001101: y = 6'b000100;
8'b10001110: y = 6'b000010;
8'b10001111: y = 6'b000000;
8'b10010000: y = 6'b001100;
8'b10010001: y = 6'b001010;
8'b10010010: y = 6'b001000;
8'b10010011: y = 6'b000111;
8'b10010100: y = 6'b000101;
8'b10010101: y = 6'b000100;
8'b10010110: y = 6'b000010;
8'b10010111: y = 6'b000000;
8'b10011000: y = 6'b001011;
8'b10011001: y = 6'b001001;
8'b10011010: y = 6'b001000;
8'b10011011: y = 6'b000110;
8'b10011100: y = 6'b000101;
8'b10011101: y = 6'b000011;
8'b10011110: y = 6'b000010;
8'b10011111: y = 6'b000000;
8'b10100000: y = 6'b001010;
8'b10100001: y = 6'b001000;
8'b10100010: y = 6'b000111;
8'b10100011: y = 6'b000110;
8'b10100100: y = 6'b000100;
8'b10100101: y = 6'b000011;
8'b10100110: y = 6'b000010;
8'b10100111: y = 6'b000000;
8'b10101000: y = 6'b001001;
8'b10101001: y = 6'b001000;
8'b10101010: y = 6'b000111;
8'b10101011: y = 6'b000101;
8'b10101100: y = 6'b000100;
8'b10101101: y = 6'b000011;
8'b10101110: y = 6'b000001;
8'b10101111: y = 6'b000000;
8'b10110000: y = 6'b001000;
8'b10110001: y = 6'b000111;
8'b10110010: y = 6'b000110;
8'b10110011: y = 6'b000101;
8'b10110100: y = 6'b000100;
8'b10110101: y = 6'b000010;
8'b10110110: y = 6'b000001;
8'b10110111: y = 6'b000000;
8'b10111000: y = 6'b001000;
8'b10111001: y = 6'b000111;
8'b10111010: y = 6'b000110;
8'b10111011: y = 6'b000101;
8'b10111100: y = 6'b000011;
8'b10111101: y = 6'b000010;
8'b10111110: y = 6'b000001;
8'b10111111: y = 6'b000000;
8'b11000000: y = 6'b000111;
8'b11000001: y = 6'b000110;
8'b11000010: y = 6'b000101;
8'b11000011: y = 6'b000100;
8'b11000100: y = 6'b000011;
8'b11000101: y = 6'b000010;
8'b11000110: y = 6'b000001;
8'b11000111: y = 6'b000000;
8'b11001000: y = 6'b000111;
8'b11001001: y = 6'b000110;
8'b11001010: y = 6'b000101;
8'b11001011: y = 6'b000100;
8'b11001100: y = 6'b000011;
8'b11001101: y = 6'b000010;
8'b11001110: y = 6'b000001;
8'b11001111: y = 6'b000000;
8'b11010000: y = 6'b000111;
8'b11010001: y = 6'b000110;
8'b11010010: y = 6'b000101;
8'b11010011: y = 6'b000100;
8'b11010100: y = 6'b000011;
8'b11010101: y = 6'b000010;
8'b11010110: y = 6'b000001;
8'b11010111: y = 6'b000000;
8'b11011000: y = 6'b000110;
8'b11011001: y = 6'b000101;
8'b11011010: y = 6'b000100;
8'b11011011: y = 6'b000011;
8'b11011100: y = 6'b000011;
8'b11011101: y = 6'b000010;
8'b11011110: y = 6'b000001;
8'b11011111: y = 6'b000000;
8'b11100000: y = 6'b000110;
8'b11100001: y = 6'b000101;
8'b11100010: y = 6'b000100;
8'b11100011: y = 6'b000011;
8'b11100100: y = 6'b000010;
8'b11100101: y = 6'b000010;
8'b11100110: y = 6'b000001;
8'b11100111: y = 6'b000000;
8'b11101000: y = 6'b000101;
8'b11101001: y = 6'b000101;
8'b11101010: y = 6'b000100;
8'b11101011: y = 6'b000011;
8'b11101100: y = 6'b000010;
8'b11101101: y = 6'b000001;
8'b11101110: y = 6'b000001;
8'b11101111: y = 6'b000000;
8'b11110000: y = 6'b000101;
8'b11110001: y = 6'b000100;
8'b11110010: y = 6'b000100;
8'b11110011: y = 6'b000011;
8'b11110100: y = 6'b000010;
8'b11110101: y = 6'b000001;
8'b11110110: y = 6'b000001;
8'b11110111: y = 6'b000000;
8'b11111000: y = 6'b000101;
8'b11111001: y = 6'b000100;
8'b11111010: y = 6'b000011;
8'b11111011: y = 6'b000011;
8'b11111100: y = 6'b000010;
8'b11111101: y = 6'b000001;
8'b11111110: y = 6'b000001;
8'b11111111: y = 6'b000000;
default: y = 6'bxxxxxx;
endcase // case (a)
endmodule // sbtm_a0

View File

@ -1,62 +0,0 @@
///////////////////////////////////////////
//
// Written: James Stine
// Modified: 8/1/2018
//
// Purpose: Bipartite Lookup for divide portion of fpdivsqrt
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
module sbtm_div (input logic [11:0] a, output logic [10:0] ia_out);
// bit partitions
logic [3:0] x0;
logic [2:0] x1;
logic [3:0] x2;
logic [2:0] x2_1cmp;
// mem outputs
logic [12:0] y0;
logic [4:0] y1;
// input to CPA
logic [14:0] op1;
logic [14:0] op2;
logic [14:0] p;
logic cout;
assign x0 = a[10:7];
assign x1 = a[6:4];
assign x2 = a[3:0];
sbtm_a0 mem1 ({x0, x1}, y0);
// 1s cmp per sbtm/stam
assign x2_1cmp = x2[3] ? ~x2[2:0] : x2[2:0];
sbtm_a1 mem2 ({x0, x2_1cmp}, y1);
assign op1 = {1'b0, y0, 1'b0};
// 1s cmp per sbtm/stam
assign op2 = x2[3] ? {1'b1, {8{1'b1}}, ~y1, 1'b1} :
{1'b0, 8'b0, y1, 1'b1};
// CPA
assign {cout, p} = op1 + op2;
assign ia_out = p[14:4];
endmodule // sbtm

View File

@ -1,68 +0,0 @@
///////////////////////////////////////////
//
// Written: James Stine
// Modified: 8/1/2018
//
// Purpose: Bipartite Lookup for sqrt part of fpdivsqrt
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
module sbtm_sqrt (input logic [11:0] a, output logic [10:0] y);
// bit partitions
logic [4:0] x0;
logic [2:0] x1;
logic [3:0] x2;
logic [2:0] x2_1cmp;
// mem outputs
logic [13:0] y0;
logic [5:0] y1;
// input to CPA
logic [14:0] op1;
logic [14:0] op2;
logic [14:0] p;
logic cout;
assign x0 = a[11:7];
assign x1 = a[6:4];
assign x2 = a[3:0];
sbtm_a2 mem1 ({x0, x1}, y0);
assign op1 = {y0, 1'b0};
// 1s cmp per sbtm/stam
assign x2_1cmp = x2[3] ? ~x2[2:0] : x2[2:0];
sbtm_a3 mem2 ({x0, x2_1cmp}, y1);
// 1s cmp per sbtm/stam
assign op2 = x2[3] ? {{8{1'b1}}, ~y1, 1'b1} :
{8'b0, y1, 1'b1};
// CPA
assign {cout, p} = op1 + op2;
assign y = p[14:4];
endmodule // sbtm2

View File

@ -1,164 +0,0 @@
// MJS - This module implements a 57-bit 2-to-1 multiplexor, which is
// used in the barrel shifter for significand alignment.
module mux21x57 (Z, A, B, Sel);
input [56:0] A;
input [56:0] B;
input Sel;
output [56:0] Z;
assign Z = Sel ? B : A;
endmodule // mux21x57
// MJS - This module implements a 64-bit 2-to-1 multiplexor, which is
// used in the barrel shifter for significand normalization.
module mux21x64 (Z, A, B, Sel);
input [63:0] A;
input [63:0] B;
input Sel;
output [63:0] Z;
assign Z = Sel ? B : A;
endmodule // mux21x64
// The implementation of the barrel shifter was modified to use
// fewer gates. It is now implemented using six 64-bit 2-to-1 muxes. The
// barrel shifter takes a 64-bit input A and shifts it left by up to
// 63-bits, as specified by Shift, to produce a 63-bit output Z.
// Bits to the right are filled with zeros.
// The 64 bit shift is implemented using 6 stages of shifts of 32
// 16, 8, 4, 2, and 1 bit shifts.
module barrel_shifter_l64 (Z, A, Shift);
input [63:0] A;
input [5:0] Shift;
wire [63:0] stage1;
wire [63:0] stage2;
wire [63:0] stage3;
wire [63:0] stage4;
wire [63:0] stage5;
wire [31:0] thirtytwozeros = 32'h0;
wire [15:0] sixteenzeros = 16'h0;
wire [ 7:0] eightzeros = 8'h0;
wire [ 3:0] fourzeros = 4'h0;
wire [ 1:0] twozeros = 2'b00;
wire onezero = 1'b0;
output [63:0] Z;
mux21x64 mx01(stage1, A, {A[31:0], thirtytwozeros}, Shift[5]);
mux21x64 mx02(stage2, stage1, {stage1[47:0], sixteenzeros}, Shift[4]);
mux21x64 mx03(stage3, stage2, {stage2[55:0], eightzeros}, Shift[3]);
mux21x64 mx04(stage4, stage3, {stage3[59:0], fourzeros}, Shift[2]);
mux21x64 mx05(stage5, stage4, {stage4[61:0], twozeros}, Shift[1]);
mux21x64 mx06(Z , stage5, {stage5[62:0], onezero}, Shift[0]);
endmodule // barrel_shifter_l63
// The implementation of the barrel shifter was modified to use
// fewer gates. It is now implemented using six 57-bit 2-to-1 muxes. The
// barrel shifter takes a 57-bit input A and right shifts it by up to
// 63-bits, as specified by Shift, to produce a 57-bit output Z.
// It also computes a Sticky bit, which is set to
// one if any of the bits that were shifted out was one.
// Bits shifted into the left are filled with zeros.
// The 63 bit shift is implemented using 6 stages of shifts of 32
// 16, 8, 4, 2, and 1 bits.
module barrel_shifter_r57 (Z, Sticky, A, Shift);
input [56:0] A;
input [5:0] Shift;
output Sticky;
output [56:0] Z;
wire [56:0] stage1;
wire [56:0] stage2;
wire [56:0] stage3;
wire [56:0] stage4;
wire [56:0] stage5;
wire [62:0] sixtythreezeros = 63'h0;
wire [31:0] thirtytwozeros = 32'h0;
wire [15:0] sixteenzeros = 16'h0;
wire [ 7:0] eightzeros = 8'h0;
wire [ 3:0] fourzeros = 4'h0;
wire [ 1:0] twozeros = 2'b00;
wire onezero = 1'b0;
wire [62:0] S;
// Shift operations
mux21x57 mx01(stage1, A, {thirtytwozeros, A[56:32]}, Shift[5]);
mux21x57 mx02(stage2, stage1, {sixteenzeros, stage1[56:16]}, Shift[4]);
mux21x57 mx03(stage3, stage2, {eightzeros, stage2[56:8]}, Shift[3]);
mux21x57 mx04(stage4, stage3, {fourzeros, stage3[56:4]}, Shift[2]);
mux21x57 mx05(stage5, stage4, {twozeros, stage4[56:2]}, Shift[1]);
mux21x57 mx06(Z , stage5, {onezero, stage5[56:1]}, Shift[0]);
// Sticky bit calculation. The Sticky bit is set to one if any of the
// bits that were shifter out were one
assign S[31:0] = {32{Shift[5]}} & A[31:0];
assign S[47:32] = {16{Shift[4]}} & stage1[15:0];
assign S[55:48] = { 8{Shift[3]}} & stage2[7:0];
assign S[59:56] = { 4{Shift[2]}} & stage3[3:0];
assign S[61:60] = { 2{Shift[1]}} & stage4[1:0];
assign S[62] = Shift[0] & stage5[0];
assign Sticky = (S != sixtythreezeros);
endmodule // barrel_shifter_r57
/*
module barrel_shifter_r64 (Z, Sticky, A, Shift);
input [63:0] A;
input [5:0] Shift;
output Sticky;
output [63:0] Z;
wire [63:0] stage1;
wire [63:0] stage2;
wire [63:0] stage3;
wire [63:0] stage4;
wire [63:0] stage5;
wire [62:0] sixtythreezeros = 63'h0;
wire [31:0] thirtytwozeros = 32'h0;
wire [15:0] sixteenzeros = 16'h0;
wire [ 7:0] eightzeros = 8'h0;
wire [ 3:0] fourzeros = 4'h0;
wire [ 1:0] twozeros = 2'b00;
wire onezero = 1'b0;
wire [62:0] S;
// Shift operations
mux21x64 mx01(stage1, A, {thirtytwozeros, A[63:32]}, Shift[5]);
mux21x64 mx02(stage2, stage1, {sixteenzeros, stage1[63:16]}, Shift[4]);
mux21x64 mx03(stage3, stage2, {eightzeros, stage2[63:8]}, Shift[3]);
mux21x64 mx04(stage4, stage3, {fourzeros, stage3[63:4]}, Shift[2]);
mux21x64 mx05(stage5, stage4, {twozeros, stage4[63:2]}, Shift[1]);
mux21x64 mx06(Z , stage5, {onezero, stage5[63:1]}, Shift[0]);
// Sticky bit calculation. The Sticky bit is set to one if any of the
// bits that were shifter out were one
assign S[31:0] = {32{Shift[5]}} & A[31:0];
assign S[47:32] = {16{Shift[4]}} & stage1[15:0];
assign S[55:48] = { 8{Shift[3]}} & stage2[7:0];
assign S[59:56] = { 4{Shift[2]}} & stage3[3:0];
assign S[61:60] = { 2{Shift[1]}} & stage4[1:0];
assign S[62] = Shift[0] & stage5[0];
assign Sticky = (S != sixtythreezeros);
endmodule // barrel_shifter_r64
*/

View File

@ -1,8 +1,8 @@
///////////////////////////////////////////
// srt.sv
//
// Written: David_Harris@hmc.edu 13 January 2022
// Modified:
// Written: David_Harris@hmc.edu, me@KatherineParry.com, Cedar Turek
// Modified:13 January 2022
//
// Purpose: Combined Divide and Square Root Floating Point and Integer Unit
//
@ -33,38 +33,27 @@
module srtradix4 (
input logic clk,
input logic DivStart,
input logic DivBusy,
input logic [`FMTBITS-1:0] FmtE,
input logic [`NE-1:0] XExpE, YExpE,
input logic [`NF:0] XManE, YManE,
input logic [`XLEN-1:0] SrcA, SrcB,
input logic XInfE, YInfE,
input logic XZeroE, YZeroE,
input logic XNaNE, YNaNE,
input logic W64, // 32-bit ints on XLEN=64
input logic Signed, // Interpret integers as signed 2's complement
input logic Int, // Choose integer inputs
input logic Sqrt, // perform square root, not divide
output logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2E,
output logic DivDone,
output logic DivStickyE,
output logic DivNegStickyE,
input logic [`DIVLEN-1:0] X,
input logic [`DIVLEN-1:0] Dpreproc,
input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
output logic [`DIVLEN+2:0] Quot,
output logic [`XLEN-1:0] Rem, // *** later handle integers
output logic [`NE+1:0] DivCalcExpE
output logic [`DIVLEN+3:0] WSN, WCN,
output logic [`DIVLEN+3:0] WS, WC,
output logic [`NE+1:0] DivCalcExpM,
output logic [`XLEN-1:0] Rem
);
logic [3:0] q;
logic [`NE+1:0] DivCalcExp;
logic [`DIVLEN-1:0] X;
logic [`DIVLEN-1:0] Dpreproc;
logic [`DIVLEN+3:0] WS, WSA, WSN;
logic [`DIVLEN+3:0] WC, WCA, WCN;
logic [`DIVLEN+3:0] WSA;
logic [`DIVLEN+3:0] WCA;
logic [`DIVLEN+3:0] D, DBar, D2, DBar2, Dsel;
logic [`NE+1:0] DivCalcExp;
logic [$clog2(`XLEN+1)-1:0] intExp;
logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt;
logic intSign;
srtpreproc preproc(.SrcA, .SrcB, .XManE, .YManE, .W64, .Signed, .Int, .Sqrt, .X,
.XZeroCnt, .YZeroCnt, .Dpreproc, .intExp, .intSign);
// Top Muxes and Registers
// When start is asserted, the inputs are loaded into the divider.
@ -79,6 +68,7 @@ module srtradix4 (
mux2 #(`DIVLEN+4) wcmux({WCA[`DIVLEN+1:0], 2'b0}, {`DIVLEN+4{1'b0}}, DivStart, WCN);
flop #(`DIVLEN+4) wcflop(clk, WCN, WC);
flopen #(`DIVLEN+4) dflop(clk, DivStart, {4'b0001, Dpreproc}, D);
flopen #(`NE+2) expflop(clk, DivStart, DivCalcExp, DivCalcExpM);
// Quotient Selection logic
// Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm)
@ -91,9 +81,6 @@ module srtradix4 (
// 0001 = -2
qsel4 qsel4(.D, .WS, .WC, .q);
// Store the expoenent and sign until division is DivDone
flopen #(`NE+2) expflop(clk, DivStart, DivCalcExp, DivCalcExpE);
// Divisor Selection logic
// *** radix 4 change to choose -2 to 2
// - choose the negitive version of what's being selected
@ -116,12 +103,9 @@ module srtradix4 (
csa #(`DIVLEN+4) csa(WS, WC, Dsel, |q[3:2], WSA, WCA);
//*** change for radix 4
otfc4 otfc4(.clk, .DivStart, .q, .Quot);
otfc4 otfc4(.clk, .DivStart, .DivBusy, .q, .Quot);
expcalc expcalc(.XExpE, .YExpE, .XZeroE, .XZeroCnt, .YZeroCnt, .DivCalcExp);
earlytermination earlytermination(.clk, .WC, .WS, .XZeroE, .YZeroE, .XInfE, .EarlyTermShiftDiv2E,
.YInfE, .XNaNE, .YNaNE, .DivStickyE, .DivNegStickyE, .DivStart, .DivDone);
expcalc expcalc(.FmtE, .XExpE, .YExpE, .XZeroE, .XZeroCnt, .YZeroCnt, .DivCalcExp);
endmodule
@ -129,38 +113,7 @@ endmodule
// Submodules //
////////////////
module earlytermination(
input logic clk,
input logic [`DIVLEN+3:0] WS, WC,
input logic XInfE, YInfE,
input logic XZeroE, YZeroE,
input logic XNaNE, YNaNE,
input logic DivStart,
output logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2E,
output logic DivStickyE,
output logic DivNegStickyE,
output logic DivDone);
logic [$clog2(`DIVLEN/2+3)-1:0] Count;
logic WZero;
logic [`DIVLEN+3:0] W;
assign WZero = ((WS^WC)=={WS[`DIVLEN+2:0]|WC[`DIVLEN+2:0], 1'b0})|XZeroE|YZeroE|XInfE|YInfE|XNaNE|YNaNE;
assign DivDone = (DivStickyE | WZero);
assign DivStickyE = ~|Count;
assign W = WC+WS;
assign DivNegStickyE = W[`DIVLEN+3]; //*** is there a better way to do this???
assign EarlyTermShiftDiv2E = Count;
// +1 for setup
// `DIVLEN/2 to get required number of bits
// +1 for possible .5 and round bit
// Count down Counter
always @(posedge clk)
begin
if (DivStart) Count <= #1 `DIVLEN/2+2;
else Count <= #1 Count-1;
end
endmodule
module qsel4 (
input logic [`DIVLEN+3:0] D,
@ -234,58 +187,13 @@ module qsel4 (
endmodule
///////////////////
// Preprocessing //
///////////////////
module srtpreproc (
input logic [`XLEN-1:0] SrcA, SrcB,
input logic [`NF:0] XManE, YManE,
input logic W64, // 32-bit ints on XLEN=64
input logic Signed, // Interpret integers as signed 2's complement
input logic Int, // Choose integer inputs
input logic Sqrt, // perform square root, not divide
output logic [`DIVLEN-1:0] X,
output logic [`DIVLEN-1:0] Dpreproc,
output logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
output logic [$clog2(`XLEN+1)-1:0] intExp, // Quotient integer exponent
output logic intSign // Quotient integer sign
);
// logic [`XLEN-1:0] PosA, PosB;
// logic [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY;
logic [`DIVLEN-1:0] PreprocA, PreprocX;
logic [`DIVLEN-1:0] PreprocB, PreprocY;
// assign PosA = (Signed & SrcA[`XLEN - 1]) ? -SrcA : SrcA;
// assign PosB = (Signed & SrcB[`XLEN - 1]) ? -SrcB : SrcB;
// lzc #(`XLEN) lzcA (PosA, zeroCntA);
// lzc #(`XLEN) lzcB (PosB, zeroCntB);
// ***can probably merge X LZC with conversion
// cout the number of leading zeros
lzc #(`NF+1) lzcA (XManE, XZeroCnt);
lzc #(`NF+1) lzcB (YManE, YZeroCnt);
// assign ExtraA = {PosA, {`DIVLEN-`XLEN{1'b0}}};
// assign ExtraB = {PosB, {`DIVLEN-`XLEN{1'b0}}};
// assign PreprocA = ExtraA << zeroCntA;
// assign PreprocB = ExtraB << (zeroCntB + 1);
assign PreprocX = {XManE[`NF-1:0]<<XZeroCnt, {`DIVLEN-`NF{1'b0}}};
assign PreprocY = {YManE[`NF-1:0]<<YZeroCnt, {`DIVLEN-`NF{1'b0}}};
assign X = Int ? PreprocA : PreprocX;
assign Dpreproc = Int ? PreprocB : PreprocY;
// assign intExp = zeroCntB - zeroCntA + 1;
// assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]);
endmodule
///////////////////////////////////
// On-The-Fly Converter, Radix 2 //
///////////////////////////////////
module otfc4 (
input logic clk,
input logic DivStart,
input logic DivBusy,
input logic [3:0] q,
output logic [`DIVLEN+2:0] Quot
);
@ -307,7 +215,7 @@ module otfc4 (
// if starting a new divison set Q to 0 and QM to -1
mux2 #(`DIVLEN+3) Qmux(QNext, {`DIVLEN+3{1'b0}}, DivStart, QMux);
mux2 #(`DIVLEN+3) QMmux(QMNext, {`DIVLEN+3{1'b1}}, DivStart, QMMux);
flop #(`DIVLEN+3) Qreg(clk, QMux, Quot); // *** have to connect Quot directly to M stage
flopen #(`DIVLEN+3) Qreg(clk, DivBusy|DivStart, QMux, Quot); // *** have to connect Quot directly to M stage
flop #(`DIVLEN+3) QMreg(clk, QMMux, QM);
// shift Q (quotent) and QM (quotent-1)
@ -361,23 +269,44 @@ module csa #(parameter N=69) (
// bit, leaving room in the least significant bit to
// insert cin.
assign #1 out1 = in1 ^ in2 ^ in3;
assign #1 out2 = {in1[N-2:0] & (in2[N-2:0] | in3[N-2:0]) |
assign out1 = in1 ^ in2 ^ in3;
assign out2 = {in1[N-2:0] & (in2[N-2:0] | in3[N-2:0]) |
(in2[N-2:0] & in3[N-2:0]), cin};
endmodule
//////////////
// expcalc //
//////////////
module expcalc(
input logic [`NE-1:0] XExpE, YExpE,
input logic XZeroE,
input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
output logic [`NE+1:0] DivCalcExp
);
input logic [`FMTBITS-1:0] FmtE,
input logic [`NE-1:0] XExpE, YExpE,
input logic XZeroE,
input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
output logic [`NE+1:0] DivCalcExp
);
logic [`NE-2:0] Bias;
if (`FPSIZES == 1) begin
assign Bias = (`NE-1)'(`BIAS);
// correct exponent for denormalized input's normalization shifts
assign DivCalcExp = (XExpE - XZeroCnt - YExpE + YZeroCnt + (`NE)'(`BIAS))&{`NE+2{~XZeroE}};
end else if (`FPSIZES == 2) begin
assign Bias = FmtE ? (`NE-1)'(`BIAS) : (`NE-1)'(`BIAS1);
endmodule
end else if (`FPSIZES == 3) begin
always_comb
case (FmtE)
`FMT: Bias = (`NE-1)'(`BIAS);
`FMT1: Bias = (`NE-1)'(`BIAS1);
`FMT2: Bias = (`NE-1)'(`BIAS2);
default: Bias = 'x;
endcase
end else if (`FPSIZES == 4) begin
always_comb
case (FmtE)
2'h3: Bias = (`NE-1)'(`Q_BIAS);
2'h1: Bias = (`NE-1)'(`D_BIAS);
2'h0: Bias = (`NE-1)'(`S_BIAS);
2'h2: Bias = (`NE-1)'(`H_BIAS);
endcase
end
// correct exponent for denormalized input's normalization shifts
assign DivCalcExp = ({2'b0, XExpE} - {{`NE+1-$clog2(`NF+2){1'b0}}, XZeroCnt} - {2'b0, YExpE} + {{`NE+1-$clog2(`NF+2){1'b0}}, YZeroCnt} + {3'b0, Bias})&{`NE+2{~XZeroE}};
endmodule

View File

@ -0,0 +1,85 @@
///////////////////////////////////////////
// srt.sv
//
// Written: David_Harris@hmc.edu, me@KatherineParry.com, Cedar Turek
// Modified:13 January 2022
//
// Purpose: Combined Divide and Square Root Floating Point and Integer Unit
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module srtfsm(
input logic clk,
input logic reset,
input logic [`DIVLEN+3:0] WSN, WCN, WS, WC,
input logic XInfE, YInfE,
input logic XZeroE, YZeroE,
input logic XNaNE, YNaNE,
input logic DivStart,
input logic StallE,
input logic StallM,
input logic [$clog2(`DIVLEN/2+3)-1:0] Dur,
output logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2E,
output logic DivStickyE,
output logic DivDone,
output logic DivNegStickyE,
output logic DivBusy
);
typedef enum logic [1:0] {IDLE, BUSY, DONE} statetype;
statetype state;
logic [$clog2(`DIVLEN/2+3)-1:0] step;
logic WZero;
//logic [$clog2(`DIVLEN/2+3)-1:0] Dur;
logic [`DIVLEN+3:0] W;
//flopen #($clog2(`DIVLEN/2+3)) durflop(clk, DivStart, CalcDur, Dur);
assign DivBusy = (state == BUSY);
assign WZero = ((WSN^WCN)=={WSN[`DIVLEN+2:0]|WCN[`DIVLEN+2:0], 1'b0});
assign DivStickyE = ~WZero;
assign DivDone = (state == DONE);
assign W = WC+WS;
assign DivNegStickyE = W[`DIVLEN+3]; //*** is there a better way to do this???
assign EarlyTermShiftDiv2E = step;
always_ff @(posedge clk) begin
if (reset) begin
state <= #1 IDLE;
end else if (DivStart&~StallE) begin
step <= Dur;
if (XZeroE|YZeroE|XInfE|YInfE|XNaNE|YNaNE) state <= #1 DONE;
else state <= #1 BUSY;
end else if (state == BUSY) begin
if ((~|step[$clog2(`DIVLEN/2+3)-1:1]&step[0])|WZero) begin
state <= #1 DONE;
end
step <= step - 1;
end else if (state == DONE) begin
if (StallM) state <= #1 DONE;
else state <= #1 IDLE;
end
end
endmodule

View File

@ -0,0 +1,72 @@
///////////////////////////////////////////
// srt.sv
//
// Written: David_Harris@hmc.edu, me@KatherineParry.com, Cedar Turek
// Modified:13 January 2022
//
// Purpose: Combined Divide and Square Root Floating Point and Integer Unit
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module srtpreproc (
input logic [`NF:0] XManE, YManE,
output logic [`DIVLEN-1:0] X,
output logic [`DIVLEN-1:0] Dpreproc,
output logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
output logic [$clog2(`DIVLEN/2+3)-1:0] Dur
);
// logic [`XLEN-1:0] PosA, PosB;
// logic [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY;
logic [`DIVLEN-1:0] PreprocA, PreprocX;
logic [`DIVLEN-1:0] PreprocB, PreprocY;
// assign PosA = (Signed & SrcA[`XLEN - 1]) ? -SrcA : SrcA;
// assign PosB = (Signed & SrcB[`XLEN - 1]) ? -SrcB : SrcB;
// lzc #(`XLEN) lzcA (PosA, zeroCntA);
// lzc #(`XLEN) lzcB (PosB, zeroCntB);
// ***can probably merge X LZC with conversion
// cout the number of leading zeros
lzc #(`NF+1) lzcA (XManE, XZeroCnt);
lzc #(`NF+1) lzcB (YManE, YZeroCnt);
// assign ExtraA = {PosA, {`DIVLEN-`XLEN{1'b0}}};
// assign ExtraB = {PosB, {`DIVLEN-`XLEN{1'b0}}};
// assign PreprocA = ExtraA << zeroCntA;
// assign PreprocB = ExtraB << (zeroCntB + 1);
assign PreprocX = {XManE[`NF-1:0]<<XZeroCnt, {`DIVLEN-`NF{1'b0}}};
assign PreprocY = {YManE[`NF-1:0]<<YZeroCnt, {`DIVLEN-`NF{1'b0}}};
assign X = PreprocX;
assign Dpreproc = PreprocY;
assign Dur = ($clog2(`DIVLEN/2+3))'(`DIVLEN/2+2);
// assign intExp = zeroCntB - zeroCntA + 1;
// assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]);
endmodule

View File

@ -1,3 +1,31 @@
///////////////////////////////////////////
//
// Written: me@KatherineParry.com
// Modified: 7/5/2022
//
// Purpose: unpack all inputs
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module unpack (

View File

@ -1,3 +1,31 @@
///////////////////////////////////////////
//
// Written: me@KatherineParry.com
// Modified: 7/5/2022
//
// Purpose: unpack input
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module unpackinput (

View File

@ -1,3 +1,31 @@
///////////////////////////////////////////
//
// Written: me@KatherineParry.com
// Modified: 7/5/2022
//
// Purpose: Leading Zero Counter
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
//leading zero counter i.e. priority encoder
module lzc #(parameter WIDTH = 1) (
input logic [WIDTH-1:0] num,

View File

@ -64,9 +64,9 @@ module hazard(
assign StallFCause = CSRWriteFencePendingDEM & ~(TrapM | RetM | BPPredWrongE);
// stall in decode if instruction is a load/mul/csr dependent on previous
assign StallDCause = (LoadStallD | StoreStallD | MDUStallD | CSRRdStallD | FPUStallD | FStallD) & ~(TrapM | RetM | BPPredWrongE);
assign StallECause = (DivBusyE | FDivBusyE) & ~(TrapM); // *** can we move to decode stage (KP?)
assign StallECause = (DivBusyE) & ~(TrapM); // *** can we move to decode stage (KP?)
// WFI terminates if any enabled interrupt is pending, even if global interrupts are disabled. It could also terminate with TW trap
assign StallMCause = wfiM & (~TrapM & ~IntPendingM);
assign StallMCause = (wfiM & (~TrapM & ~IntPendingM)) | FDivBusyE;
assign StallWCause = LSUStallM | IFUStallF;
assign #1 StallF = StallFCause | StallD;

View File

@ -52,7 +52,7 @@ module clint_apb (
integer i, j;
assign memwrite = PWRITE & PENABLE & PSEL; // only write in access phase
assign PREADY = 1'b1; // GPIO never takes >1 cycle to respond
assign PREADY = 1'b1; // CLINT never takes >1 cycle to respond
// word aligned reads
if (`XLEN==64) assign #2 entry = {PADDR[15:3], 3'b000};

View File

@ -35,6 +35,7 @@
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
/*
`include "wally-config.vh"
`define N `PLIC_NUM_SRC
@ -257,3 +258,4 @@ module plic (
assign SExtInt = |(threshMask[1] & priorities_with_irqs[1]);
endmodule
*/

View File

@ -0,0 +1,273 @@
///////////////////////////////////////////
// plic_apb.sv
//
// Written: bbracker@hmc.edu 18 January 2021
// Modified:
//
// Purpose: Platform-Level Interrupt Controller
// Based on RISC-V spec (https://github.com/riscv/riscv-plic-spec/blob/master/riscv-plic.adoc)
// With clarifications from ROA's existing implementation (https://roalogic.github.io/plic/docs/AHB-Lite_PLIC_Datasheet.pdf)
// Supports only 1 target core and only a global threshold.
//
// *** Big questions:
// Do we detect requests as level-triggered or edge-trigged?
// If edge-triggered, do we want to allow 1 source to be able to make a number of repeated requests?
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
`define N `PLIC_NUM_SRC
// number of interrupt sources
// does not include source 0, which does not connect to anything according to spec
// up to 63 sources supported; *** in the future, allow up to 1023 sources
`define C 2
// number of conexts
// hardcoded to 2 contexts for now; *** later upgrade to arbitrary (up to 15872) contexts
module plic_apb (
input logic PCLK, PRESETn,
input logic PSEL,
input logic [27:0] PADDR,
input logic [`XLEN-1:0] PWDATA,
input logic [`XLEN/8-1:0] PSTRB,
input logic PWRITE,
input logic PENABLE,
output logic [`XLEN-1:0] PRDATA,
output logic PREADY,
/*
input logic PCLK, PRESETn,
input logic HSELPLIC,
input logic [27:0] HADDR, // *** could factor out entry into HADDRd at the level of uncore
input logic HWRITE,
input logic HREADY,
input logic [1:0] HTRANS,
input logic [`XLEN-1:0] HWDATA,
output logic [`XLEN-1:0] PRDATA,
output logic HRESPPLIC, HREADYPLIC, */
input logic UARTIntr,GPIOIntr,
(* mark_debug = "true" *) output logic MExtInt, SExtInt);
logic memwrite, memread, initTrans;
logic [23:0] entry;
logic [31:0] Din, Dout;
// context-independent signals
(* mark_debug = "true" *) logic [`N:1] requests;
(* mark_debug = "true" *) logic [`N:1][2:0] intPriority;
(* mark_debug = "true" *) logic [`N:1] intInProgress, intPending, nextIntPending;
// context-dependent signals
logic [`C-1:0][2:0] intThreshold;
(* mark_debug = "true" *) logic [`C-1:0][`N:1] intEn;
logic [`C-1:0][5:0] intClaim; // ID's are 6 bits if we stay within 63 sources
(* mark_debug = "true" *) logic [`C-1:0][7:1][`N:1] irqMatrix;
logic [`C-1:0][7:1] priorities_with_irqs;
logic [`C-1:0][7:1] max_priority_with_irqs;
logic [`C-1:0][`N:1] irqs_at_max_priority;
logic [`C-1:0][7:1] threshMask;
// =======
// AHB I/O
// =======
assign memwrite = PWRITE & PENABLE & PSEL; // only write in access phase
assign memread = ~PWRITE & PSEL; // read at start of access phase. PENABLE hasn't set up before this
assign PREADY = 1'b1; // PLIC never takes >1 cycle to respond
assign entry = {PADDR[23:2],2'b0};
/*
assign initTrans = HREADY & HSELPLIC & (HTRANS != 2'b00);
assign memread = initTrans & ~HWRITE;
// entryd and memwrite are delayed by a cycle because AHB controller waits a cycle before outputting write data
flopr #(1) memwriteflop(PCLK, ~HRESETn, initTrans & HWRITE, memwrite);
flopr #(24) entrydflop(PCLK, ~PRESETn, entry, entryd);
assign HRESPPLIC = 0; // OK
assign HREADYPLIC = 1'b1; // PLIC never takes >1 cycle to respond */
// account for subword read/write circuitry
// -- Note PLIC registers are 32 bits no matter what; access them with LW SW.
if (`XLEN == 64) begin
assign Din = entry[2] ? PWDATA[63:32] : PWDATA[31:0];
assign PRDATA = entry[2] ? {Dout,32'b0} : {32'b0,Dout};
end else begin // 32-bit
assign PRDATA = Dout;
assign Din = PWDATA[31:0];
end
// ==================
// Register Interface
// ==================
always @(posedge PCLK,negedge PRESETn) begin
// resetting
if (~PRESETn) begin
intPriority <= #1 {`N{3'b0}};
intEn <= #1 {2{`N'b0}};
intThreshold <= #1 {2{3'b0}};
intInProgress <= #1 `N'b0;
// writing
end else begin
if (memwrite)
casez(entry)
24'h0000??: intPriority[entry[7:2]] <= #1 Din[2:0];
`ifdef PLIC_NUM_SRC_LT_32 // *** switch to a generate for loop so as to deprecate PLIC_NUM_SRC_LT_32 and allow up to 1023 sources
24'h002000: intEn[0][`N:1] <= #1 Din[`N:1];
24'h002080: intEn[1][`N:1] <= #1 Din[`N:1];
`endif
`ifndef PLIC_NUM_SRC_LT_32
24'h002000: intEn[0][31:1] <= #1 Din[31:1];
24'h002004: intEn[0][`N:32] <= #1 Din[31:0];
24'h002080: intEn[1][31:1] <= #1 Din[31:1];
24'h002084: intEn[1][`N:32] <= #1 Din[31:0];
`endif
24'h200000: intThreshold[0] <= #1 Din[2:0];
24'h200004: intInProgress <= #1 intInProgress & ~(`N'b1 << (Din[5:0]-1)); // lower "InProgress" to signify completion
24'h201000: intThreshold[1] <= #1 Din[2:0];
24'h201004: intInProgress <= #1 intInProgress & ~(`N'b1 << (Din[5:0]-1)); // lower "InProgress" to signify completion
endcase
// Read synchronously because a read can have side effect of changing intInProgress
if (memread)
casez(entry)
24'h0000??: Dout <= #1 {29'b0,intPriority[entry[7:2]]};
`ifdef PLIC_NUM_SRC_LT_32
24'h001000: Dout <= #1 {{(31-`N){1'b0}},intPending,1'b0};
24'h002000: Dout <= #1 {{(31-`N){1'b0}},intEn[0],1'b0};
24'h002080: Dout <= #1 {{(31-`N){1'b0}},intEn[1],1'b0};
`endif
`ifndef PLIC_NUM_SRC_LT_32
24'h001000: Dout <= #1 {intPending[31:1],1'b0};
24'h001004: Dout <= #1 {{(63-`N){1'b0}},intPending[`N:32]};
24'h002000: Dout <= #1 {intEn[0][31:1],1'b0};
24'h002004: Dout <= #1 {{(63-`N){1'b0}},intEn[0][`N:32]};
24'h002080: Dout <= #1 {intEn[0][31:1],1'b0};
24'h002084: Dout <= #1 {{(63-`N){1'b0}},intEn[1][`N:32]};
`endif
24'h200000: Dout <= #1 {29'b0,intThreshold[0]};
24'h200004: begin
Dout <= #1 {26'b0,intClaim[0]};
intInProgress <= #1 intInProgress | (`N'b1 << (intClaim[0]-1)); // claimed requests are currently in progress of being serviced until they are completed
end
24'h201000: Dout <= #1 {29'b0,intThreshold[1]};
24'h201004: begin
Dout <= #1 {26'b0,intClaim[1]};
intInProgress <= #1 intInProgress | (`N'b1 << (intClaim[1]-1)); // claimed requests are currently in progress of being serviced until they are completed
end
default: Dout <= #1 32'h0; // invalid access
endcase
else Dout <= #1 32'h0;
end
end
// connect sources to requests
always_comb begin
requests = `N'b0;
`ifdef PLIC_GPIO_ID
requests[`PLIC_GPIO_ID] = GPIOIntr;
`endif
`ifdef PLIC_UART_ID
requests[`PLIC_UART_ID] = UARTIntr;
`endif
end
// pending interrupt requests
//assign nextIntPending = (intPending | requests) & ~intInProgress; //
assign nextIntPending = requests; // DH: RT made this change May 2022, but it seems to be a bug to not consider intInProgress; see May 23, 2022 slack discussion
flopr #(`N) intPendingFlop(PCLK,~PRESETn,nextIntPending,intPending);
// context-dependent signals
genvar ctx;
for (ctx=0; ctx<`C; ctx++) begin
// request matrix
// priority level (rows) X source ID (columns)
//
// irqMatrix[ctx][pri][src] is high if source <src>
// has priority level <pri> and has an "active" interrupt request
// ("active" meaning it is enabled in context <ctx> and is pending)
genvar src, pri;
for (pri=1; pri<=7; pri++) begin
for (src=1; src<=`N; src++) begin
assign irqMatrix[ctx][pri][src] = (intPriority[src]==pri) & intPending[src] & intEn[ctx][src];
end
end
// which prority levels have one or more active requests?
assign priorities_with_irqs[ctx][7:1] = {
|irqMatrix[ctx][7],
|irqMatrix[ctx][6],
|irqMatrix[ctx][5],
|irqMatrix[ctx][4],
|irqMatrix[ctx][3],
|irqMatrix[ctx][2],
|irqMatrix[ctx][1]
};
// get the highest priority level that has active requests
assign max_priority_with_irqs[ctx][7:1] = {
priorities_with_irqs[ctx][7],
priorities_with_irqs[ctx][6] & ~|priorities_with_irqs[ctx][7],
priorities_with_irqs[ctx][5] & ~|priorities_with_irqs[ctx][7:6],
priorities_with_irqs[ctx][4] & ~|priorities_with_irqs[ctx][7:5],
priorities_with_irqs[ctx][3] & ~|priorities_with_irqs[ctx][7:4],
priorities_with_irqs[ctx][2] & ~|priorities_with_irqs[ctx][7:3],
priorities_with_irqs[ctx][1] & ~|priorities_with_irqs[ctx][7:2]
};
// of the sources at the highest priority level that has active requests,
// which sources have active requests?
assign irqs_at_max_priority[ctx][`N:1] =
({`N{max_priority_with_irqs[ctx][7]}} & irqMatrix[ctx][7]) |
({`N{max_priority_with_irqs[ctx][6]}} & irqMatrix[ctx][6]) |
({`N{max_priority_with_irqs[ctx][5]}} & irqMatrix[ctx][5]) |
({`N{max_priority_with_irqs[ctx][4]}} & irqMatrix[ctx][4]) |
({`N{max_priority_with_irqs[ctx][3]}} & irqMatrix[ctx][3]) |
({`N{max_priority_with_irqs[ctx][2]}} & irqMatrix[ctx][2]) |
({`N{max_priority_with_irqs[ctx][1]}} & irqMatrix[ctx][1]);
// of the sources at the highest priority level that has active requests,
// choose the source with the lowest source ID to be the most urgent
// and set intClaim to the source ID of the most urgent active request
integer k;
always_comb begin
intClaim[ctx] = 6'b0;
for (k=`N; k>0; k--) begin
if (irqs_at_max_priority[ctx][k]) intClaim[ctx] = k[5:0];
end
end
// create threshold mask
always_comb begin
threshMask[ctx][7] = (intThreshold[ctx] != 7);
threshMask[ctx][6] = (intThreshold[ctx] != 6) & threshMask[ctx][7];
threshMask[ctx][5] = (intThreshold[ctx] != 5) & threshMask[ctx][6];
threshMask[ctx][4] = (intThreshold[ctx] != 4) & threshMask[ctx][5];
threshMask[ctx][3] = (intThreshold[ctx] != 3) & threshMask[ctx][4];
threshMask[ctx][2] = (intThreshold[ctx] != 2) & threshMask[ctx][3];
threshMask[ctx][1] = (intThreshold[ctx] != 1) & threshMask[ctx][2];
end
end
// is the max priority > threshold?
// *** would it be any better to first priority encode maxPriority into binary and then ">" with threshold?
assign MExtInt = |(threshMask[0] & priorities_with_irqs[0]);
assign SExtInt = |(threshMask[1] & priorities_with_irqs[1]);
endmodule

View File

@ -30,6 +30,7 @@
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
/*
`include "wally-config.vh"
module uart (
@ -103,3 +104,4 @@ module uart (
endmodule
*/

View File

@ -40,7 +40,7 @@
module uartPC16550D(
// Processor Interface
input logic HCLK, HRESETn,
input logic PCLK, PRESETn,
input logic [2:0] A,
input logic [7:0] Din,
output logic [7:0] Dout,
@ -132,7 +132,7 @@ module uartPC16550D(
///////////////////////////////////////////
// Input synchronization: 2-stage synchronizer
///////////////////////////////////////////
always_ff @(posedge HCLK) begin
always_ff @(posedge PCLK) begin
{SINd, DSRbd, DCDbd, CTSbd, RIbd} <= #1 {SIN, DSRb, DCDb, CTSb, RIb};
{SINsync, DSRbsync, DCDbsync, CTSbsync, RIbsync} <= #1 loop ? {SOUTbit, ~MCR[0], ~MCR[3], ~MCR[1], ~MCR[2]} :
{SINd, DSRbd, DCDbd, CTSbd, RIbd}; // syncrhonized signals, handle loopback testing
@ -142,8 +142,8 @@ module uartPC16550D(
///////////////////////////////////////////
// Register interface (Table 1, note some are read only and some write only)
///////////////////////////////////////////
always_ff @(posedge HCLK, negedge HRESETn)
if (~HRESETn) begin // Table 3 Reset Configuration
always_ff @(posedge PCLK, negedge PRESETn)
if (~PRESETn) begin // Table 3 Reset Configuration
IER <= #1 4'b0;
FCR <= #1 8'b0;
if (`QEMU) LCR <= #1 8'b0; else LCR <= #1 8'b11; // fpga only **** BUG
@ -229,8 +229,8 @@ module uartPC16550D(
///////////////////////////////////////////
// Ross Thompson: Found a bug. If the baud rate dividers DLM, and DLL are reloaded
// the baudcount is not reset to {DLM, DLL, UART_PRESCALE}
always_ff @(posedge HCLK, negedge HRESETn)
if (~HRESETn) begin
always_ff @(posedge PCLK, negedge PRESETn)
if (~PRESETn) begin
baudcount <= #1 1;
baudpulse <= #1 0;
end else if (~MEMWb & DLAB & (A == 3'b0 | A == 3'b1)) begin
@ -254,8 +254,8 @@ module uartPC16550D(
///////////////////////////////////////////
// receive timing and control
///////////////////////////////////////////
always_ff @(posedge HCLK, negedge HRESETn)
if (~HRESETn) begin
always_ff @(posedge PCLK, negedge PRESETn)
if (~PRESETn) begin
rxoversampledcnt <= #1 0;
rxstate <= #1 UART_IDLE;
rxbitsreceived <= #1 0;
@ -288,8 +288,8 @@ module uartPC16550D(
///////////////////////////////////////////
// receive shift register, buffer register, FIFO
///////////////////////////////////////////
always_ff @(posedge HCLK, negedge HRESETn)
if (~HRESETn) rxshiftreg <= #1 10'b0000000001; // initialize so that there is a valid stop bit
always_ff @(posedge PCLK, negedge PRESETn)
if (~PRESETn) rxshiftreg <= #1 10'b0000000001; // initialize so that there is a valid stop bit
else if (rxcentered) rxshiftreg <= #1 {rxshiftreg[8:0], SINsync}; // capture bit
assign rxparitybit = rxshiftreg[1]; // parity, if it exists, in bit 1 when all done
assign rxstopbit = rxshiftreg[0];
@ -310,8 +310,8 @@ module uartPC16550D(
assign rxbreak = rxframingerr & (rxdata9 == 9'b0); // break when 0 for start + data + parity + stop time
// receive FIFO and register
always_ff @(posedge HCLK, negedge HRESETn)
if (~HRESETn) begin
always_ff @(posedge PCLK, negedge PRESETn)
if (~PRESETn) begin
rxfifohead <= #1 0; rxfifotail <= #1 0; rxdataready <= #1 0; RXBR <= #1 0;
end else begin
if (rxstate == UART_DONE) begin
@ -367,8 +367,8 @@ module uartPC16550D(
assign rxfifohaserr = |(RXerrbit & rxfullbit);
// receive buffer register and ready bit
always_ff @(posedge HCLK, negedge HRESETn) // track rxrdy for DMA mode (FCR3 = FCR0 = 1)
if (~HRESETn) rxfifodmaready <= #1 0;
always_ff @(posedge PCLK, negedge PRESETn) // track rxrdy for DMA mode (FCR3 = FCR0 = 1)
if (~PRESETn) rxfifodmaready <= #1 0;
else if (rxfifotriggered | rxfifotimeout) rxfifodmaready <= #1 1;
else if (rxfifoempty) rxfifodmaready <= #1 0;
@ -386,8 +386,8 @@ module uartPC16550D(
///////////////////////////////////////////
// transmit timing and control
///////////////////////////////////////////
always_ff @(posedge HCLK, negedge HRESETn)
if (~HRESETn) begin
always_ff @(posedge PCLK, negedge PRESETn)
if (~PRESETn) begin
txoversampledcnt <= #1 0;
txstate <= #1 UART_IDLE;
txbitssent <= #1 0;
@ -435,8 +435,8 @@ module uartPC16550D(
end
// registers & FIFO
always_ff @(posedge HCLK, negedge HRESETn)
if (~HRESETn) begin
always_ff @(posedge PCLK, negedge PRESETn)
if (~PRESETn) begin
txfifohead <= #1 0; txfifotail <= #1 0; txhrfull <= #1 0; txsrfull <= #1 0; TXHR <= #1 0; txsr <= #1 12'hfff;
end else begin
if (~MEMWb & A == 3'b000 & ~DLAB) begin // writing transmit holding register or fifo
@ -477,8 +477,8 @@ module uartPC16550D(
assign txfifofull = (txfifoentries == 4'b1111);
// transmit buffer ready bit
always_ff @(posedge HCLK, negedge HRESETn) // track txrdy for DMA mode (FCR3 = FCR0 = 1)
if (~HRESETn) txfifodmaready <= #1 0;
always_ff @(posedge PCLK, negedge PRESETn) // track txrdy for DMA mode (FCR3 = FCR0 = 1)
if (~PRESETn) txfifodmaready <= #1 0;
else if (txfifoempty) txfifodmaready <= #1 1;
else if (txfifofull) txfifodmaready <= #1 0;
@ -514,18 +514,18 @@ module uartPC16550D(
intrpending = 0;
end
end
always @(posedge HCLK) INTR <= #1 intrpending; // prevent glitches on interrupt pin
always @(posedge PCLK) INTR <= #1 intrpending; // prevent glitches on interrupt pin
// Side effect of reading LSR is lowering overrun, parity, framing, break intr's
assign setSquashRXerrIP = ~MEMRb & (A==3'b101);
assign resetSquashRXerrIP = (rxstate == UART_DONE);
assign squashRXerrIP = (prevSquashRXerrIP | setSquashRXerrIP) & ~resetSquashRXerrIP;
flopr #(1) squashRXerrIPreg(HCLK, ~HRESETn, squashRXerrIP, prevSquashRXerrIP);
flopr #(1) squashRXerrIPreg(PCLK, ~PRESETn, squashRXerrIP, prevSquashRXerrIP);
// Side effect of reading IIR is lowering THRE_IP if most significant intr
assign setSquashTHRE_IP = ~MEMRb & (A==3'b010) & (intrID==3'h1); // there's a 1-cycle delay on set squash so that THRE_IP doesn't change during the process of reading IIR (otherwise combinational loop)
assign resetSquashTHRE_IP = ~THRE;
assign squashTHRE_IP = prevSquashTHRE_IP & ~resetSquashTHRE_IP;
flopr #(1) squashTHRE_IPreg(HCLK, ~HRESETn, squashTHRE_IP | setSquashTHRE_IP, prevSquashTHRE_IP);
flopr #(1) squashTHRE_IPreg(PCLK, ~PRESETn, squashTHRE_IP | setSquashTHRE_IP, prevSquashTHRE_IP);
///////////////////////////////////////////
// modem control logic

View File

@ -0,0 +1,123 @@
///////////////////////////////////////////
// uart_apb.sv
//
// Written: David_Harris@hmc.edu 21 January 2021
// Modified:
//
// Purpose: Interface to Universial Asynchronous Receiver/ Transmitter with FIFOs
// Emulates interface of Texas Instruments PC165550D
// Compatible with UART in Imperas Virtio model ***
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module uart_apb (
input logic PCLK, PRESETn,
input logic PSEL,
input logic [2:0] PADDR,
input logic [`XLEN-1:0] PWDATA,
input logic [`XLEN/8-1:0] PSTRB,
input logic PWRITE,
input logic PENABLE,
output logic [`XLEN-1:0] PRDATA,
output logic PREADY,
/*
input logic HCLK, HRESETn,
input logic HSELUART,
input logic [2:0] HADDR,
input logic HWRITE,
input logic [`XLEN-1:0] PWDATA,
output logic [`XLEN-1:0] HREADUART,
output logic HRESPUART, HREADYUART, */
(* mark_debug = "true" *) input logic SIN, DSRb, DCDb, CTSb, RIb, // from E1A driver from RS232 interface
(* mark_debug = "true" *) output logic SOUT, RTSb, DTRb, // to E1A driver to RS232 interface
(* mark_debug = "true" *) output logic OUT1b, OUT2b, INTR, TXRDYb, RXRDYb); // to CPU
// UART interface signals
logic [2:0] entry;
logic MEMRb, MEMWb, memread, memwrite;
logic [7:0] Din, Dout;
assign memwrite = PWRITE & PENABLE & PSEL; // only write in access phase
assign memread = ~PWRITE & PENABLE & PSEL;
assign PREADY = 1'b1; // CLINT never takes >1 cycle to respond
assign entry = PADDR[2:0];
assign MEMRb = ~memread;
assign MEMWb = ~memwrite;
/*
// rename processor interface signals to match PC16550D and provide one-byte interface
flopr #(1) memreadreg(HCLK, ~HRESETn, (HSELUART & ~HWRITE), memread);
flopr #(1) memwritereg(HCLK, ~HRESETn, (HSELUART & HWRITE), memwrite);
flopr #(3) haddrreg(HCLK, ~HRESETn, HADDR[2:0], A);
assign MEMRb = ~memread;
assign MEMWb = ~memwrite;
assign HRESPUART = 0; // OK
assign HREADYUART = 1; // should idle high during address phase and respond high when done; will need to be modified if UART ever needs more than 1 cycle to do something
*/
if (`XLEN == 64) begin:uart
always_comb begin
PRDATA = {Dout, Dout, Dout, Dout, Dout, Dout, Dout, Dout};
case (entry)
3'b000: Din = PWDATA[7:0];
3'b001: Din = PWDATA[15:8];
3'b010: Din = PWDATA[23:16];
3'b011: Din = PWDATA[31:24];
3'b100: Din = PWDATA[39:32];
3'b101: Din = PWDATA[47:40];
3'b110: Din = PWDATA[55:48];
3'b111: Din = PWDATA[63:56];
endcase
end
end else begin:uart // 32-bit
always_comb begin
PRDATA = {Dout, Dout, Dout, Dout};
case (entry[1:0])
2'b00: Din = PWDATA[7:0];
2'b01: Din = PWDATA[15:8];
2'b10: Din = PWDATA[23:16];
2'b11: Din = PWDATA[31:24];
endcase
end
end
logic BAUDOUTb; // loop tx clock BAUDOUTb back to rx clock RCLK
// *** make sure reads don't occur on UART unless fully selected because they could change state. This applies to all peripherals
uartPC16550D u(
// Processor Interface
.PCLK, .PRESETn,
.A(entry), .Din,
.Dout,
.MEMRb, .MEMWb,
.INTR, .TXRDYb, .RXRDYb,
// Clocks
.BAUDOUTb, .RCLK(BAUDOUTb),
// E1A Driver
.SIN, .DSRb, .DCDb, .CTSb, .RIb,
.SOUT, .RTSb, .DTRb, .OUT1b, .OUT2b
);
endmodule

View File

@ -84,11 +84,11 @@ module uncore (
logic PCLK, PRESETn, PWRITE, PENABLE;
// logic PSEL, PREADY;
logic [1:0] PSEL, PREADY;
logic [3:0] PSEL, PREADY;
logic [31:0] PADDR;
logic [`XLEN-1:0] PWDATA;
logic [`XLEN/8-1:0] PSTRB;
logic [1:0][`XLEN-1:0] PRDATA;
logic [3:0][`XLEN-1:0] PRDATA;
// logic [`XLEN-1:0][8:0] PRDATA;
logic [`XLEN-1:0] HREADBRIDGE;
logic HRESPBRIDGE, HREADYBRIDGE, HSELBRIDGE, HSELBRIDGED;
@ -107,11 +107,11 @@ module uncore (
assign {HSELEXT, HSELBootRom, HSELRam, HSELCLINT, HSELGPIO, HSELUART, HSELPLIC, HSELSDC} = HSELRegions[7:0];
// AHB -> APB bridge
ahbapbbridge #(2) ahbapbbridge
(.HCLK, .HRESETn, .HSEL({HSELCLINT, HSELGPIO}), .HADDR, .HWDATA, .HWSTRB, .HWRITE, .HTRANS, .HREADY,
ahbapbbridge #(4) ahbapbbridge
(.HCLK, .HRESETn, .HSEL({HSELUART, HSELPLIC, HSELCLINT, HSELGPIO}), .HADDR, .HWDATA, .HWSTRB, .HWRITE, .HTRANS, .HREADY,
.HRDATA(HREADBRIDGE), .HRESP(HRESPBRIDGE), .HREADYOUT(HREADYBRIDGE),
.PCLK, .PRESETn, .PSEL, .PWRITE, .PENABLE, .PADDR, .PWDATA, .PSTRB, .PREADY, .PRDATA);
assign HSELBRIDGE = HSELGPIO | HSELCLINT; // if any of the bridge signals are selected
assign HSELBRIDGE = HSELGPIO | HSELCLINT | HSELPLIC | HSELUART; // if any of the bridge signals are selected
// on-chip RAM
if (`RAM_SUPPORTED) begin : ram
@ -155,12 +155,17 @@ module uncore (
assign MTimerInt = 0; assign MSwInt = 0;
end
if (`PLIC_SUPPORTED == 1) begin : plic
plic plic(
/* plic plic(
.HCLK, .HRESETn,
.HSELPLIC, .HADDR(HADDR[27:0]),
.HWRITE, .HREADY, .HTRANS, .HWDATA,
.UARTIntr, .GPIOIntr,
.HREADPLIC, .HRESPPLIC, .HREADYPLIC,
.MExtInt, .SExtInt); */
plic_apb plic(
.PCLK, .PRESETn, .PSEL(PSEL[2]), .PADDR(PADDR[27:0]), .PWDATA, .PSTRB, .PWRITE, .PENABLE,
.PRDATA(PRDATA[2]), .PREADY(PREADY[2]),
.UARTIntr, .GPIOIntr,
.MExtInt, .SExtInt);
end else begin : plic
assign MExtInt = 0;
@ -186,7 +191,7 @@ module uncore (
assign GPIOPinsOut = 0; assign GPIOPinsEn = 0; assign GPIOIntr = 0;
end
if (`UART_SUPPORTED == 1) begin : uart
uart uart(
/* uart uart(
.HCLK, .HRESETn,
.HSELUART,
.HADDR(HADDR[2:0]),
@ -194,6 +199,12 @@ module uncore (
.HREADUART, .HRESPUART, .HREADYUART,
.SIN(UARTSin), .DSRb(1'b1), .DCDb(1'b1), .CTSb(1'b0), .RIb(1'b1), // from E1A driver from RS232 interface
.SOUT(UARTSout), .RTSb(), .DTRb(), // to E1A driver to RS232 interface
.OUT1b(), .OUT2b(), .INTR(UARTIntr), .TXRDYb(), .RXRDYb()); // to CPU */
uart_apb uart(
.PCLK, .PRESETn, .PSEL(PSEL[3]), .PADDR(PADDR[2:0]), .PWDATA, .PSTRB, .PWRITE, .PENABLE,
.PRDATA(PRDATA[3]), .PREADY(PREADY[3]),
.SIN(UARTSin), .DSRb(1'b1), .DCDb(1'b1), .CTSb(1'b0), .RIb(1'b1), // from E1A driver from RS232 interface
.SOUT(UARTSout), .RTSb(), .DTRb(), // to E1A driver to RS232 interface
.OUT1b(), .OUT2b(), .INTR(UARTIntr), .TXRDYb(), .RXRDYb()); // to CPU
end else begin : uart
assign UARTSout = 0; assign UARTIntr = 0;
@ -217,35 +228,35 @@ module uncore (
assign HRDATA = ({`XLEN{HSELRamD}} & HREADRam) |
({`XLEN{HSELEXTD}} & HRDATAEXT) |
// ({`XLEN{HSELCLINTD}} & HREADCLINT) |
({`XLEN{HSELPLICD}} & HREADPLIC) |
// ({`XLEN{HSELPLICD}} & HREADPLIC) |
// ({`XLEN{HSELGPIOD}} & HREADGPIO) |
({`XLEN{HSELBRIDGED}} & HREADBRIDGE) |
({`XLEN{HSELBootRomD}} & HREADBootRom) |
({`XLEN{HSELUARTD}} & HREADUART) |
// ({`XLEN{HSELUARTD}} & HREADUART) |
({`XLEN{HSELSDCD}} & HREADSDC);
assign HRESP = HSELRamD & HRESPRam |
HSELEXTD & HRESPEXT |
// HSELCLINTD & HRESPCLINT |
HSELPLICD & HRESPPLIC |
// HSELPLICD & HRESPPLIC |
// HSELGPIOD & HRESPGPIO |
HSELBRIDGE & HRESPBRIDGE |
HSELBootRomD & HRESPBootRom |
HSELUARTD & HRESPUART |
// HSELUARTD & HRESPUART |
HSELSDC & HRESPSDC;
assign HREADY = HSELRamD & HREADYRam |
HSELEXTD & HREADYEXT |
// HSELCLINTD & HREADYCLINT |
HSELPLICD & HREADYPLIC |
// HSELPLICD & HREADYPLIC |
// HSELGPIOD & HREADYGPIO |
HSELBRIDGED & HREADYBRIDGE |
HSELBootRomD & HREADYBootRom |
HSELUARTD & HREADYUART |
// HSELUARTD & HREADYUART |
HSELSDCD & HREADYSDC |
HSELNoneD; // don't lock up the bus if no region is being accessed
// *** remove HREADYGPIO, others
// *** remove HREADYGPIO, others that are now unused
// Address Decoder Delay (figure 4-2 in spec)
flopr #(9) hseldelayreg(HCLK, ~HRESETn, HSELRegions, {HSELNoneD, HSELEXTD, HSELBootRomD, HSELRamD, HSELCLINTD, HSELGPIOD, HSELUARTD, HSELPLICD, HSELSDCD});

View File

@ -42,6 +42,7 @@ module wallypipelinedsocwrapper (
output HCLK, HRESETn,
output [31:0] HADDR,
output [`AHBW-1:0] HWDATA,
output logic [`XLEN/8-1:0] HWSTRB,
output HWRITE,
output [2:0] HSIZE,
output [2:0] HBURST,

View File

@ -1,32 +0,0 @@
all: exptestgen testgen qslc_r4a2 qslc_r4a2b qslc_sqrt_r4a2
sqrttestgen: sqrttestgen.c
gcc sqrttestgen.c -o sqrttestgen -lm
testgen: testgen.c
gcc testgen.c -o testgen -lm
./testgen
exptestgen: exptestgen.c
gcc -o exptestgen exptestgen.c -lm
./exptestgen
qslc_r4a2: qslc_r4a2.c
gcc qslc_r4a2.c -o qslc_r4a2 -lm
./qslc_r4a2 > qslc_r4a2.sv
qslc_r4a2b: qslc_r4a2b.c
gcc qslc_r4a2b.c -o qslc_r4a2b -lm
./qslc_r4a2b > qslc_r4a2b.tv
qslc_sqrt_r4a2: qslc_sqrt_r4a2.c
gcc qslc_sqrt_r4a2.c -o qslc_sqrt_r4a2 -lm
./qslc_sqrt_r4a2 > qslc_sqrt_r4a2.sv
inttestgen: inttestgen.c
gcc -lm -o inttestgen inttestgen.c
./inttestgen
clean:
rm -f testgen exptestgen qslc_r4a2 qslc_r4a2b qslc_sqrt_r4a2

View File

@ -1,127 +0,0 @@
/* testgen.c */
/* Written 2/19/2022 by David Harris
This program creates test vectors for mantissa and exponent components
of an IEEE floating point divider.
Builds upon program that creates test vectors for mantissa component only.
*/
/* #includes */
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
/* Constants */
#define ENTRIES 17
#define RANDOM_VECS 500
// #define BIAS 1023 // Bias is for double precision
/* Prototypes */
void output(FILE *fptr, int aSign, int aExp, double aFrac, int bSign, int bExp, double bFrac, int rSign, int rExp, double rFrac);
void printhex(FILE *fptr, double x);
double random_input(void);
double random_input_e(void);
/* Main */
void main(void)
{
FILE *fptr;
// aExp & bExp are exponents
// aFrac & bFrac are mantissas
// rFrac is result of fractional divsion
// rExp is result of exponent division
double aFrac, bFrac, rFrac;
int aExp, bExp, rExp;
int aSign, bSign, rSign;
double mantissa[ENTRIES] = {1, 1.5, 1.25, 1.125, 1.0625,
1.75, 1.875, 1.99999,
1.1, 1.2, 1.01, 1.001, 1.0001,
1/1.1, 1/1.5, 1/1.25, 1/1.125};
int exponent[ENTRIES] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17};
int i, j;
int bias = 1023;
if ((fptr = fopen("testvectors","w")) == NULL) {
fprintf(stderr, "Couldn't write testvectors file\n");
exit(1);
}
for (i=0; i<ENTRIES; i++) {
bFrac = mantissa[i];
bExp = exponent[i] + bias;
bSign = i%2;
for (j=0; j<ENTRIES; j++) {
aFrac = mantissa[j];
aExp = exponent[j] + bias;
aSign = j%2;
rFrac = aFrac/bFrac;
rExp = aExp - bExp + bias;
rSign = (i+j)%2;
output(fptr, aSign, aExp, aFrac, bSign, bExp, bFrac, rSign, rExp, rFrac);
}
}
// for (i = 0; i< RANDOM_VECS; i++) {
// aFrac = random_input();
// bFrac = random_input();
// aExp = random_input_e() + BIAS; // make new random input function for exponents
// bExp = random_input_e() + BIAS;
// rFrac = a/b;
// rEx[] = e1 - e2 + BIAS;
// output(fptr, aExp, aFrac, bExp, bFrac, rExp, rFrac);
// }
fclose(fptr);
}
/* Functions */
void output(FILE *fptr, int aSign, int aExp, double aFrac, int bSign, int bExp, double bFrac, int rSign, int rExp, double rFrac)
{
// Print a in standard double format
fprintf(fptr, "%03x", aExp|(aSign<<11));
printhex(fptr, aFrac);
fprintf(fptr, "_");
// Print b in standard double format
fprintf(fptr, "%03x", bExp|(bSign<<11));
printhex(fptr, bFrac);
fprintf(fptr, "_");
// Print r in standard double format
fprintf(fptr, "%03x", rExp|(rSign<<11));
printhex(fptr, rFrac);
fprintf(fptr, "\n");
}
void printhex(FILE *fptr, double m)
{
int i, val, len;
len = 52;
while (m<1) m *= 2;
while (m>2) m /= 2;
for (i=0; i<len; i+=4) {
m = m - floor(m);
m = m * 16;
val = (int)(m)%16;
fprintf(fptr, "%x", val);
}
}
double random_input(void)
{
return 1.0 + rand()/32767.0;
}
double random_input_e(void)
{
return rand() % 300 + 1;
}

Binary file not shown.

View File

@ -1,83 +0,0 @@
/* testgen.c */
/* Written 10/31/96 by David Harris
This program creates test vectors for mantissa component
of an IEEE floating point divider.
*/
/* #includes */
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
/* Constants */
#define ENTRIES 10
#define RANDOM_VECS 500
/* Prototypes */
void output(FILE *fptr, long a, long b, long r, long rem);
void printhex(FILE *fptr, long x);
double random_input(void);
/* Main */
void main(void)
{
FILE *fptr;
long a, b, r, rem;
long list[ENTRIES] = {1, 3, 5, 18, 25, 33, 42, 65, 103, 255};
int i, j;
if ((fptr = fopen("inttestvectors","w")) == NULL) {
fprintf(stderr, "Couldn't write testvectors file\n");
exit(1);
}
for (i=0; i<ENTRIES; i++) {
b = list[i];
for (j=0; j<ENTRIES; j++) {
a = list[j];
r = a/b;
rem = a%b;
output(fptr, a, b, r, rem);
}
}
// for (i = 0; i< RANDOM_VECS; i++) {
// a = random_input();
// b = random_input();
// r = a/b;
// output(fptr, a, b, r);
// }
fclose(fptr);
}
/* Functions */
void output(FILE *fptr, long a, long b, long r, long rem)
{
printhex(fptr, a);
fprintf(fptr, "_");
printhex(fptr, b);
fprintf(fptr, "_");
printhex(fptr, r);
fprintf(fptr, "_");
printhex(fptr, rem);
fprintf(fptr, "\n");
}
void printhex(FILE *fptr, long m)
{
fprintf(fptr, "%016llx", m);
}
double random_input(void)
{
return 1.0 + rand()/32767.0;
}

View File

@ -1,2 +0,0 @@
verilator --lint-only --top-module srt srt.sv -I../config/rv64gc -I../config/shared ../src/generic/*.sv ../src/generic/flop/*.sv
verilator --lint-only --top-module srtradix4 srt-radix4.sv qsel4.sv -I../config/rv64gc -I../config/shared ../src/generic/*.sv ../src/generic/flop/*.sv

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,198 +0,0 @@
/*
Program: qslc_r4a2.c
Description: Prints out Quotient Selection Table (assumes CPA is utilized to reduce memory)
User: James E. Stine
*/
#include <stdio.h>
#include <math.h>
#define DIVISOR_SIZE 3
#define CARRY_SIZE 7
#define SUM_SIZE 7
#define TOT_SIZE 7
void disp_binary(double, int, int);
struct bits {
unsigned int divisor : DIVISOR_SIZE;
int tot : TOT_SIZE;
} pla;
/*
Function: disp_binary
Description: This function displays a Double-Precision number into
four 16 bit integers using the global union variable
dp_number
Argument List: double x The value to be converted
int bits_to_left Number of bits left of radix point
int bits_to_right Number of bits right of radix point
Return value: none
*/
void disp_binary(double x, int bits_to_left, int bits_to_right) {
int i;
double diff;
if (fabs(x) < pow(2.0, ((double) -bits_to_right)) ) {
for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
printf("0");
}
if (i == bits_to_right+1)
;
return;
}
if (x < 0.0)
x = pow(2.0, ((double) bits_to_left)) + x;
for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
diff = pow(2.0, ((double) -i) );
if (x < diff)
printf("0");
else {
printf("1");
x -= diff;
}
if (i == 0)
;
}
}
int main() {
int m;
int n;
int o;
pla.divisor = 0;
pla.tot = 0;
printf("\tcase({D[5:3],Wmsbs})\n");
for (o=0; o < pow(2.0, DIVISOR_SIZE); o++) {
for (m=0; m < pow(2.0, TOT_SIZE); m++) {
printf("\t\t10'b");
disp_binary((double) pla.divisor, DIVISOR_SIZE, 0);
printf("_");
disp_binary((double) pla.tot, TOT_SIZE, 0);
printf(": q = 4'b");
/*
4 bits for Radix 4 (a=2)
1000 = +2
0100 = +1
0000 = 0
0010 = -1
0001 = -2
*/
switch (pla.divisor) {
case 0:
if ((pla.tot) >= 12)
printf("1000");
else if ((pla.tot) >= 4)
printf("0100");
else if ((pla.tot) >= -4)
printf("0000");
else if ((pla.tot) >= -13)
printf("0010");
else
printf("0001");
break;
case 1:
if ((pla.tot) >= 14)
printf("1000");
else if ((pla.tot) >= 4)
printf("0100");
else if ((pla.tot) >= -6)
printf("0000");
else if ((pla.tot) >= -15)
printf("0010");
else
printf("0001");
break;
case 2:
if ((pla.tot) >= 15)
printf("1000");
else if ((pla.tot) >= 4)
printf("0100");
else if ((pla.tot) >= -6)
printf("0000");
else if ((pla.tot) >= -16)
printf("0010");
else
printf("0001");
break;
case 3:
if ((pla.tot) >= 16)
printf("1000");
else if ((pla.tot) >= 4)
printf("0100");
else if ((pla.tot) >= -6)
printf("0000");
else if ((pla.tot) >= -18)
printf("0010");
else
printf("0001");
break;
case 4:
if ((pla.tot) >= 18)
printf("1000");
else if ((pla.tot) >= 6)
printf("0100");
else if ((pla.tot) >= -8)
printf("0000");
else if ((pla.tot) >= -20)
printf("0010");
else
printf("0001");
break;
case 5:
if ((pla.tot) >= 20)
printf("1000");
else if ((pla.tot) >= 6)
printf("0100");
else if ((pla.tot) >= -8)
printf("0000");
else if ((pla.tot) >= -20)
printf("0010");
else
printf("0001");
break;
case 6:
if ((pla.tot) >= 20)
printf("1000");
else if ((pla.tot) >= 8)
printf("0100");
else if ((pla.tot) >= -8)
printf("0000");
else if ((pla.tot) >= -22)
printf("0010");
else
printf("0001");
break;
case 7:
if ((pla.tot) >= 24)
printf("1000");
else if ((pla.tot) >= 8)
printf("0100");
else if ((pla.tot) >= -8)
printf("0000");
else if ((pla.tot) >= -24)
printf("0010");
else
printf("0001");
break;
default: printf ("XXX");
}
printf(";\n");
(pla.tot)++;
}
(pla.divisor)++;
}
printf("\tendcase\n");
}

Binary file not shown.

View File

@ -1,190 +0,0 @@
/*
Program: qslc_r4a2.c
Description: Prints out Quotient Selection Table (assumes CPA is utilized to reduce memory)
User: James E. Stine
*/
#include <stdio.h>
#include <math.h>
#define DIVISOR_SIZE 3
#define CARRY_SIZE 7
#define SUM_SIZE 7
#define TOT_SIZE 7
void disp_binary(double, int, int);
struct bits {
unsigned int divisor : DIVISOR_SIZE;
int tot : TOT_SIZE;
} pla;
/*
Function: disp_binary
Description: This function displays a Double-Precision number into
four 16 bit integers using the global union variable
dp_number
Argument List: double x The value to be converted
int bits_to_left Number of bits left of radix point
int bits_to_right Number of bits right of radix point
Return value: none
*/
void disp_binary(double x, int bits_to_left, int bits_to_right) {
int i;
double diff;
if (fabs(x) < pow(2.0, ((double) -bits_to_right)) ) {
for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
printf("0");
}
if (i == bits_to_right+1)
;
return;
}
if (x < 0.0)
x = pow(2.0, ((double) bits_to_left)) + x;
for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
diff = pow(2.0, ((double) -i) );
if (x < diff)
printf("0");
else {
printf("1");
x -= diff;
}
if (i == 0)
;
}
}
int main() {
int m;
int n;
int o;
pla.divisor = 0;
pla.tot = 0;
for (o=0; o < pow(2.0, DIVISOR_SIZE); o++) {
for (m=0; m < pow(2.0, TOT_SIZE); m++) {
/*
4 bits for Radix 4 (a=2)
1000 = +2
0100 = +1
0000 = 0
0010 = -1
0001 = -2
*/
switch (pla.divisor) {
case 0:
if ((pla.tot) >= 12)
printf("8");
else if ((pla.tot) >= 4)
printf("4");
else if ((pla.tot) >= -4)
printf("0");
else if ((pla.tot) >= -13)
printf("2");
else
printf("1");
break;
case 1:
if ((pla.tot) >= 14)
printf("8");
else if ((pla.tot) >= 4)
printf("4");
else if ((pla.tot) >= -6)
printf("0");
else if ((pla.tot) >= -15)
printf("2");
else
printf("1");
break;
case 2:
if ((pla.tot) >= 15)
printf("8");
else if ((pla.tot) >= 4)
printf("4");
else if ((pla.tot) >= -6)
printf("0");
else if ((pla.tot) >= -16)
printf("2");
else
printf("1");
break;
case 3:
if ((pla.tot) >= 16)
printf("8");
else if ((pla.tot) >= 4)
printf("4");
else if ((pla.tot) >= -6)
printf("0");
else if ((pla.tot) >= -18)
printf("2");
else
printf("1");
break;
case 4:
if ((pla.tot) >= 18)
printf("8");
else if ((pla.tot) >= 6)
printf("4");
else if ((pla.tot) >= -8)
printf("0");
else if ((pla.tot) >= -20)
printf("2");
else
printf("1");
break;
case 5:
if ((pla.tot) >= 20)
printf("8");
else if ((pla.tot) >= 6)
printf("4");
else if ((pla.tot) >= -8)
printf("0");
else if ((pla.tot) >= -20)
printf("2");
else
printf("1");
break;
case 6:
if ((pla.tot) >= 20)
printf("8");
else if ((pla.tot) >= 8)
printf("4");
else if ((pla.tot) >= -8)
printf("0");
else if ((pla.tot) >= -22)
printf("2");
else
printf("1");
break;
case 7:
if ((pla.tot) >= 24)
printf("8");
else if ((pla.tot) >= 8)
printf("4");
else if ((pla.tot) >= -8)
printf("0");
else if ((pla.tot) >= -24)
printf("2");
else
printf("1");
break;
default: printf ("X");
}
printf("\n");
(pla.tot)++;
}
(pla.divisor)++;
}
}

File diff suppressed because it is too large Load Diff

Binary file not shown.

View File

@ -1,198 +0,0 @@
/*
Program: qslc_r4a2.c
Description: Prints out Quotient Selection Table (assumes CPA is utilized to reduce memory)
User: James E. Stine
*/
#include <stdio.h>
#include <math.h>
#define DIVISOR_SIZE 3
#define CARRY_SIZE 7
#define SUM_SIZE 7
#define TOT_SIZE 7
void disp_binary(double, int, int);
struct bits {
unsigned int divisor : DIVISOR_SIZE;
int tot : TOT_SIZE;
} pla;
/*
Function: disp_binary
Description: This function displays a Double-Precision number into
four 16 bit integers using the global union variable
dp_number
Argument List: double x The value to be converted
int bits_to_left Number of bits left of radix point
int bits_to_right Number of bits right of radix point
Return value: none
*/
void disp_binary(double x, int bits_to_left, int bits_to_right) {
int i;
double diff;
if (fabs(x) < pow(2.0, ((double) -bits_to_right)) ) {
for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
printf("0");
}
if (i == bits_to_right+1)
;
return;
}
if (x < 0.0)
x = pow(2.0, ((double) bits_to_left)) + x;
for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
diff = pow(2.0, ((double) -i) );
if (x < diff)
printf("0");
else {
printf("1");
x -= diff;
}
if (i == 0)
;
}
}
int main() {
int m;
int n;
int o;
pla.divisor = 0;
pla.tot = 0;
printf("\tcase({D[5:3],Wmsbs})\n");
for (o=0; o < pow(2.0, DIVISOR_SIZE); o++) {
for (m=0; m < pow(2.0, TOT_SIZE); m++) {
printf("\t\t11'b");
disp_binary((double) pla.divisor, DIVISOR_SIZE, 0);
printf("_");
disp_binary((double) pla.tot, TOT_SIZE, 0);
printf(": q = 4'b");
/*
4 bits for Radix 4 (a=2)
1000 = +2
0100 = +1
0000 = 0
0010 = -1
0001 = -2
*/
switch (pla.divisor) {
case 0:
if ((pla.tot) >= 24)
printf("1000");
else if ((pla.tot) >= 8)
printf("0100");
else if ((pla.tot) >= -8)
printf("0000");
else if ((pla.tot) >= -26)
printf("0010");
else
printf("0001");
break;
case 1:
if ((pla.tot) >= 28)
printf("1000");
else if ((pla.tot) >= 8)
printf("0100");
else if ((pla.tot) >= -10)
printf("0000");
else if ((pla.tot) >= -28)
printf("0010");
else
printf("0001");
break;
case 2:
if ((pla.tot) >= 32)
printf("1000");
else if ((pla.tot) >= 8)
printf("0100");
else if ((pla.tot) >= -12)
printf("0000");
else if ((pla.tot) >= -32)
printf("0010");
else
printf("0001");
break;
case 3:
if ((pla.tot) >= 32)
printf("1000");
else if ((pla.tot) >= 8)
printf("0100");
else if ((pla.tot) >= -12)
printf("0000");
else if ((pla.tot) >= -34)
printf("0010");
else
printf("0001");
break;
case 4:
if ((pla.tot) >= 36)
printf("1000");
else if ((pla.tot) >= 12)
printf("0100");
else if ((pla.tot) >= -12)
printf("0000");
else if ((pla.tot) >= -36)
printf("0010");
else
printf("0001");
break;
case 5:
if ((pla.tot) >= 40)
printf("1000");
else if ((pla.tot) >= 12)
printf("0100");
else if ((pla.tot) >= -16)
printf("0000");
else if ((pla.tot) >= -40)
printf("0010");
else
printf("0001");
break;
case 6:
if ((pla.tot) >= 40)
printf("1000");
else if ((pla.tot) >= 16)
printf("0100");
else if ((pla.tot) >= -16)
printf("0000");
else if ((pla.tot) >= -44)
printf("0010");
else
printf("0001");
break;
case 7:
if ((pla.tot) >= 44)
printf("1000");
else if ((pla.tot) >= 16)
printf("0100");
else if ((pla.tot) >= -16)
printf("0000");
else if ((pla.tot) >= -46)
printf("0010");
else
printf("0001");
break;
default: printf ("XXX");
}
printf(";\n");
(pla.tot)++;
}
(pla.divisor)++;
}
printf("\tendcase\n");
}

File diff suppressed because it is too large Load Diff

View File

@ -1,2 +0,0 @@
vsim -do "do srt.do"

View File

@ -1 +0,0 @@
vsim -c -do "do srt.do"

View File

@ -1,2 +0,0 @@
vsim -do "do srt-radix4.do"

View File

@ -1 +0,0 @@
vsim -c -do "do srt-radix4.do"

Binary file not shown.

View File

@ -1,89 +0,0 @@
/* sqrttestgen.c */
/* Written 19 October 2021 David_Harris@hmc.edu
This program creates test vectors for mantissa component
of an IEEE floating point square root.
*/
/* #includes */
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
/* Constants */
#define ENTRIES 17
#define RANDOM_VECS 500
/* Prototypes */
void output(FILE *fptr, double a, double r);
void printhex(FILE *fptr, double x);
double random_input(void);
/* Main */
void main(void)
{
FILE *fptr;
double a, b, r;
double list[ENTRIES] = {1, 1.5, 1.25, 1.125, 1.0625,
1.75, 1.875, 1.99999,
1.1, 1.2, 1.01, 1.001, 1.0001,
1/1.1, 1/1.5, 1/1.25, 1/1.125};
int i, j;
if ((fptr = fopen("sqrttestvectors","w")) == NULL) {
fprintf(stderr, "Couldn't write sqrttestvectors file\n");
exit(1);
}
for (i=0; i<ENTRIES; i++) {
a = list[i];
r = sqrt(a);
output(fptr, a, r);
}
for (i = 0; i< RANDOM_VECS; i++) {
a = random_input();
r = sqrt(a);
output(fptr, a, r);
}
fclose(fptr);
}
/* Functions */
void output(FILE *fptr, double a, double r)
{
printf("sqrt(%lf) = %lf\n", a, r);
printhex(fptr, a);
fprintf(fptr, "_");
printhex(fptr, r);
fprintf(fptr, "\n");
}
void printhex(FILE *fptr, double m)
{
int i, val;
while (m<1) m *= 2;
while (m>2) m /= 2;
for (i=0; i<52; i+=4) {
m = m - floor(m);
m = m * 16;
val = (int)(m)%16;
fprintf(fptr, "%x", val);
}
}
double random_input(void)
{
return 1.0 + rand()/32767.0;
}

View File

@ -1,517 +0,0 @@
0000000000000_0000000000000
8000000000000_3988e1409212e
4000000000000_1e3779b97f4a8
2000000000000_0f876ccdf6cd9
1000000000000_07e0f66afed07
c000000000000_52a7fa9d2f8ea
e000000000000_5e8add236a58f
ffff583a53b8e_6a09ab16ee3d0
199999999999a_0c7ebc96a56f6
3333333333333_186f174f88472
028f5c28f5c29_0146dd68287f3
004189374bc6a_0020c2830b9c7
00068db8bac71_000346d6ff116
d1745d1745d17_e82c3f9d89e1c
5555555555555_a20bd700c2c3e
999999999999a_c9f25c5bfedd9
c71c71c71c71c_e2b7dddfefa66
ae3271fce3f9c_d551d18e54277
93e045e88bd11_418bf3cc1e4c3
90f7838f071e1_c5184e372ee71
98d2536ca6d95_c982e901a1e14
d2c916d22da46_e8decc85822fb
94a0f921f243e_c728c4dbee1d3
574b50dea1bd4_2873820e10e0c
895a7660ecc1e_c0c5ced51afa7
1c77322e645cd_0ddb946295434
1ba62a7c54f8b_7d169e3a2659b
e8e2978d2f1a6_61c59e7574d95
41ffe2a7c54f9_9608c143bfd66
7590faa9f553f_353eee44a1afa
06e089a913522_6ede89bf49029
e79076a8ed51e_f3a1feab3b7d6
d51d2f4e5e9cc_ea168f50673ac
45808ced19da3_983c902a22c03
6f466990d321a_b1a42fd6b592a
220ac945928b2_815be8939b369
36c90d6e1adc3_8ee6afea03f82
0b53a3a7474e9_059a20c9f6405
f17a816502ca0_f8afe204e2600
1917108e211c4_7b5d8ccee92ea
9bc245b48b691_cb26e86f5735a
40eb7926f24de_955a5577ffe7a
9a985ff8bff18_4435dbe84773a
09d9a6834d06a_70f0257a8ab67
bdc1c7c38f872_51ceac06eed23
ff7907ba0f742_ffbc7f69e3efc
bf08f7f1efe3e_de6a83a26fd1d
06a3206640cc8_6eb3ad01c9815
ada579aef35de_d504e625a2d6d
39b060f4c1e98_90c29e1123eb5
2f2947da8fb52_16959cbd1d48c
4671cd139a273_98d3bd2eff117
0c7377beef7de_72bd0582548eb
f975c46b88d71_67b81123f8dce
f2157586eb0dd_f8fe755da5331
2b8fdc2fb85f7_14ecfca93ae4c
8af47b4cf699f_c1af76b04ddd5
0db59ffb3ff68_739b3726df36e
8a3739de73bce_c143ac24df9ed
99dcbbd977b2f_43ebbe8469bbd
c87c1d503aa07_e3723a3635fdf
222386ff0dfe2_10890df0885f0
68f1a9235246a_2ff9f6505d566
9d934c9a99353_cc29e5f0e6998
d690506ca0d94_ead84585b61ec
1dc9f0d3e1a7c_0e7c1165efbfe
e613feebfdd80_f2dee7435c007
0d535dd6bbad7_73578043fac8f
608ed9bdb37b6_2c6ca8cc4e6bb
89b27d04fa09f_c0f802ca71172
539721fa43f48_a0fa3fcb09adb
c7d220f041e08_e3182e88ae49f
654afb15f62bf_2e6f61bf98e0e
06ea8f751eea4_036f615bb315f
48515122a2454_21e9a04cd1f6b
d4b76d06da0db_5a65d547598d0
0282cd059a0b3_01409dbd6fa12
e80b2216442c9_f3e0d516d00bd
f0fcd6e9add36_f8701fac9a977
cdf3d353a6a75_e6553654da734
b3b1297a52f4a_d84eb4d0cdd1c
1116f5a5eb4bd_086824801e0f1
145fa05f40be8_782b4f7607e38
803d426284c51_39a1e28fda198
85445d08ba117_be6f8226d6c13
066f15de2bbc5_6e8f568cc6f8d
55e48f491e924_a26383073cc76
1032d851b0a36_7551305922e9c
41eb5d56baad7_95fbd0e6c36fd
c02b923f247e5_52b8721f6429d
dd1f363e6c7ce_ee40ded8c9bdb
dc98d325a64b5_edfb3ec1fe213
71258ca319463_b2beaf8de715d
232415902b205_11017231bc63a
7a261fb83f708_b803901d08750
47afdd07ba0f7_999ab6f13db2a
6a9291cd239a4_30a9519b120a8
60341ea03d408_a8a695fe1273c
53f4eb59d6b3b_a133d02e5f0b1
c2b2483c90792_53ac7d5cd5d67
c29ebde17bc2f_e054a2134123f
a8925cd0b9a17_d23db60b50520
524a8a0d141a3_264878d4d966a
d4f8ed91db23b_ea039e961a422
c96daec35d86c_e3f2144d791a1
66cb9f1f3e3e8_2f1200c8b758c
5f97f1e7e3cfc_a848653bfa858
e9bc086410c82_f4be56d9e1746
2d66981930326_88d50f4e55eaf
508ec8c991932_9f1c8c264d1f5
b7a9dbafb75f7_da74869225afe
c2238317062e1_5376ac34cb03c
d918439c87391_ec29c1ab0b399
9806f815f02be_4331e7926a75f
a12e62c0c5819_ce2a519890e29
5e56a0a141428_a786593154104
d27091e923d24_e8b06fcba35cc
ee19e403c8079_63a777df9bd21
ba0de3abc7579_dbbe4f307b7a9
e68d47be8f7d2_f31d205d919e7
d720bb25764af_eb2391d186941
2e72bc85790af_8983a68b1933f
c3201ae035c07_e0998f5edcc08
484047c08f812_99f4ef763a198
ba5973dae7b5d_5083801deb09a
3d403a907520f_930773446aea4
1fd4498093012_0f72d0c56b2e7
927402d405a81_c5ef16b504e39
3adcb25164a2d_1be8dfa703db9
c9cabf357e6b0_565651a123f9d
cf1c9ba937527_e6f156560fab1
801c5c08b8117_bb780dcd4a3ce
1ae378c6f18de_0d1bd1404d89c
1cea6bf4d7e9b_7df032936ca73
aa86a4cd499a9_4a70a8d0586dd
5b65ac7b58f6b_a5be43f803917
d04e3b847708f_e791e8d64ca05
a6aa223444689_48f0e09b7504f
023dcdfb9bf73_6b9ec1c492343
fb5da72b4e56a_68658275b9f12
85620d141a283_be8087eda1701
f8374f2e9e5d4_fc17d6b6aa491
debe95252a4a5_ef17d49382367
5e7450a8a1514_a7984aa86726d
886996632cc66_3cf350a8e3f14
7fe6f7ddefbbe_bb593a8c74da0
7989e283c5079_36e2b9b0780e8
2d3eba2d745af_15b3dccad59d9
dbb7a75f4ebea_ed867fc2e2d84
2b466a2cd459b_8771cd81d47f3
f49a9335266a5_fa45142e25067
382293d527aa5_8fc4312e812d9
76e1195232a46_b61b865625966
0102fe95fd2c0_6ac0db2f8bcba
9646ecb5d96bb_c815d9b329126
501f4cde99bd3_9ed7c5d5bc785
7d7efa39f473f_b9f4fb5c3d080
31588b9117223_17961d26f5102
e679d60bac176_f3132728a8d37
ae7535aa6b54d_d5763b26476cf
0b0e710ce219c_71c62b418032b
68ebf3b7e76fd_addfd161ac4b7
ebb1a9835306a_f5be89408b278
987e2d705ae0b_c953d0c9914d0
772368e2d1c5a_b64243fae3fb7
502dbcc7798ef_9ee0ae7d41d9b
ef55989331266_f799268f564e9
476ba46348c69_9970116fd2787
8501011202240_be48e041c087a
7ef86050c0a18_391d2f0629239
1451dfc3bf878_7821f369d1226
0a5d0e1e1c3c4_714b482f78206
4082d985b30b6_1e71f84b709d7
1e686870d0e1a_0ec7049bce04d
a32d7afaf5f5f_cf4515600a0db
d8864ccc99993_5bcd565a71793
a3b204ec09d81_cf8e4d0f9e74a
258e5004a0094_83afadcd1ef88
82e0837d06fa1_bd101d541955f
a99eebfdd7fbb_d2d1141d12617
433ae8a5d14ba_96cf2f2b9b8c3
00029425284a5_00014a11bf5c4
a2824ed49da94_cee674f907509
c7add37ba6f75_e304f163dffc6
00ff9faf3f5e8_6abe7a37761f6
fee5806f00de0_ff72acb649dc2
bafa6ab4d569b_dc3d85026c40a
bdbce225c44b9_ddb8afc7bfccb
28548ec91d924_136d8e62015c4
11be577caef96_08b91fd0554ab
d8a235806b00d_ebec55b8bf00e
301702e605cc1_8a94b08c2982f
7171e683cd07a_b2eba2d7a9729
4e227f64feca0_9d9d5317da9b6
911e609cc1398_407278920f577
d37de283c5079_e93d701b76ce6
a3b45ca4b9497_cf8f9841cb9d6
6fbd91b323664_32d331f930e2e
1ae386df0dbe2_7c93c78e97a2c
28af5daebb5d7_85bf3c0e14efe
cf77a9c7538ea_5873e435c4655
5ff5fed3fda80_a88120c300c87
98335e26bc4d8_43437c938880b
0fca30186030c_750964d64ec9c
83c10be217c43_bd91314e9c2f4
379f95072a0e5_1a721a6753344
fc0b567eacfd6_fe04afe9c2350
276d10ca21944_84eb4f9969281
c15c1a8835107_dfa88eb80f3a5
7ee5d9f3b3e76_bac4aa8497839
4204ff89ff140_960bfa7d01fdf
224d4ada95b53_818814678ee18
7ee50236046c1_bac42dc7ca58c
aaa3933f267e5_d35fef27b94ef
d9d0a8295052a_ec89a1e80c752
bf22ea21d443b_de7865a94a4bb
a98313a6274c5_d2c1ceb7337ca
f57e4ed89db14_fab82ed7ff119
7cd9cc3f987f3_b995432eff078
ce8ba57f4afe9_e6a51a7901e9d
f79fcb87970f3_fbcb7a062af68
557601bc03780_a21fd8a725b25
fd3950baa1754_690e4b24fda4d
4fd9148229045_9eac6e56877bb
a8fca195432a8_d2780bc6b98a9
c729d8afb15f6_e2bef95620f17
3b713d027a04f_1c2bd001532f4
4caafe31fc640_9cb4a80c2fa32
fc0a86050c0a2_fe04474537bc8
424fae7f5cfec_963b0dac95fa2
d5516f62dec5c_ea31d99df7240
66bc7668ecd1e_ac921f868adac
447f5a96b52d7_2038947b4b29b
50b18f3b1e764_25965fa129e26
da0299e533ca6_eca396b34a8cd
2fd5c94b92972_16e4d4254bac7
44269225244a5_976360c639740
ca17f3bfe77fd_e44c1dd968501
4d65c9f393e72_9d28763d5cfbd
057caca5594ab_6de5e38acee76
f169d6e3adc76_f8a76dc8df97f
1ec992eb25d65_0ef4ef3449518
179f815302a60_7a5fc96aa31be
7048d911b2236_b23ca645e8430
d012e565cacb9_58ad8ec8be73b
e2d452f0a5e15_5f92f5ffaefd3
2f65c32f865f1_8a21a078f2055
e3837056e0adc_f18d8a27380ab
cdc4f091e123c_57d232be8a40f
58731ece3d9c8_28f31f19298aa
b20915ce2b9c5_d768a11d16e12
bcf887910f222_518261ad16d10
a9911b2236447_d2c98074dcff2
613f7c4ef89df_2cb7e160d7c89
327088ed11da2_8c1a3372f503b
aaa432b46568d_d360467f228ce
df0e5b3cb6797_ef411299da1bf
59dcb08d611ac_a4cf75540e2c1
ee9261bcc3798_63d2d29caeecf
ed8870a0e141c_63731aff23c30
384cbe097c130_1ac088bb0dd3f
6c9b8cbb19763_b01053166e905
75eabac5758af_b58b788c3d84c
3e640c5418a83_93c0a50ff06df
4ee5450a8a151_9e15ce99d389c
a7f2aa5554aab_49709f0acf22a
645cc57d8afb1_ab2686a652109
5912e675cceba_a454a133a1a93
467d521aa4355_98daf3bf22c37
63791ed23da48_2da9f6a2bb9b8
7a264c5c98b93_b803aa160a737
37e128f651eca_8f9a4ab9a1e87
411f58deb1bd6_957b16ee69083
7627146a28d45_b5aec6723866d
5048040808101_25685807290de
7b1fb58b6b16d_b894ad98eaf3c
9e2cf769eed3e_cc7f5b1f41ba4
d787c5338a671_eb59441cd889b
5e9d25da4bb49_a7b0f75669cbe
4e6bb6236c46e_9dcaa120e794c
0776a925524aa_03b4778fccb7b
10a03ba47748f_759c2bedefe31
670fcf3f9e7f4_2f2ecbe910302
0ac5cb6396c73_0554b03e81ada
c13df9cbf397e_df987a0fb187c
5f5082d505aa1_a81d4926c24ed
8008c2f185e31_398c74e8df332
c7dc849909321_559d54cc2b0c4
724e66b4cd69a_33e4bbc30c9eb
2748493c92792_84d31934513a7
2fbdc8e391c72_8a5ac8a2e8886
554b925f24be5_a205dcaa3c364
27bbabdf57beb_1326782142cac
8d367798ef31e_c2f808c87d9f1
27533d567aacf_12f5deec64f17
51945488a9115_25f92d6ea26b2
84a732ee65dcd_be155990ed3be
f8046428c8519_fbfe2e707160d
d710b2216442d_eb1b360f3527a
a7a51eda3db48_d1bb607f46386
5375e713ce27a_26ca91c1f0ce8
817a8f251e4a4_bc42086f3f1e3
bf008f911f224_524760d1e709e
ead84c6898d13_f54f95bc682e5
d683b837706ee_ead1b3e4bde66
87a249cc93992_bfca701ca531f
65f201f003e00_ac191f863ce79
f047b04f609ec_646fffc092bd4
5f240ef81df04_a802734b8a7f3
89124f849f094_3d376cd3db8df
8c72ac315862b_c288d5b00e9a9
e2dd638ec71d9_f1381345d33f3
d5276366c6cd9_ea1be3b9eb936
b952a6194c32a_db598334d83a2
a0e924e249c49_ce03f563fdc69
965f5d2aba557_c823913ba08d9
189d28e651cca_7b0b409c0a17a
3072c32d865b1_172cd6665d696
cf58edbddb7bb_e7110ba7fe74a
d1c25c40b8817_e85524c53f258
bf7faad355a6b_deaa03698b005
fe216d3ada75b_69608b485dc01
2707ee97dd2fc_84a8b81452ea1
4d5e02fc05f81_9d23a492cab16
18825c5cb8b97_0bf97c45ddb75
baaf8d471a8e3_dc15450d64a61
f83f5606ac0d6_674989f2b429b
dad1f5d3eba7d_5ca55963e3317
b2adf167e2cfc_d7c21f6b1ca69
fbe9776aeed5e_6897272bfb5cc
2a10337866f0d_143b6e39b27f8
718b93b7276e5_b2fabfa759d04
5e515f82bf058_a7832c1f42b4f
74866188c3118_b4bac8738fa28
1cd4c5398a731_7de1af32f25fe
34cf17b22f646_8da17681a4b5c
f845569aad356_674bad5d4924d
ad25333e667cd_d4bedd0aecd3c
72d60dbc1b783_b3bd135e57267
6d02e905d20ba_b04d8c9bc773b
c6a16bf2d7e5b_e2769b569a79e
fe8573cae795d_6983f76829169
f0c0ee41dc83c_649b871bad319
1b26f96df2dbe_7cc1236af05ca
711ebfed7fdb0_3366654dceb41
4ca0ce019c034_23cf3daf6a36c
dd271b8a37147_ee44f5c421b0b
d12a88dd11ba2_e80586b7805d9
3e87c897912f2_93d74ce63be2d
ac7a094012802_d461590559b4f
a2e6e515ca2b9_cf1e126c2a7b4
fc02e6c5cd8ba_68a02e6a93e0d
57097faaff560_285715a40378d
93d1a7534ea6a_418621e995c81
5156f421e843d_9f97ebad5261b
37c3ee03dc07c_8f8790704d084
0924c1d183a30_04881c489753d
35c27c74f8e9f_8e3e0ae532ece
2937474e8e9d2_13d6d7821da0f
b9d29245248a5_50502e27b8267
4b635ad6b5ad7_9be93a4cb06c1
745a054c0a981_34be1df77fe08
27366e6cdcd9c_12e8756a6f38e
535d74a2e945d_26bff41decc57
756b2136426c8_352f434ee70b5
b59cd111a2234_4eb501f2ec4eb
de7bd203a4075_eef54dfc17c7e
2ace0f781ef04_87230fc21ac9f
0fdb377e6efce_07cf1e235f818
514bdfb7bf6f8_9f91186d6fd41
860cc9b993732_bee2615ddfe41
f327ed77daefb_657801de5581d
42226664ccc9a_961e8383f1d72
c422e781cf03a_e1235641b538d
405bb51b6a36d_94ff7fc057495
0915f98ff31fe_706845cad2ee6
a9b539ea73d4e_d2dd4f3f72272
1d7cd09da13b4_7e52454aa77ff
b47504d209a41_4e43d00a88125
a8f317122e246_d272cf495d24d
93e1b3ab6756d_418c8565c5fc9
f46d9ddb3bb67_fa2e57dc465a1
4ddd14da29b45_2459d84009dd8
757399af335e6_b545bfaaeaa5c
47023b207640f_992e23a866582
f8437e92fd260_fc1dfa5d8e2c1
5a630f561eac4_29c898a1bc51b
cb929c0938127_e5141d2922f10
16b876a0ed41e_79c357aa29d9a
a4ace129c2538_482a7a94cd950
637726ee4ddca_2da920d3c03e9
910f787ef0fde_c525d7242a063
8c81733ee67dd_c2913b3167ea7
2cc04b0896113_15798fdab384d
d5c196432c866_5ac821396e6b4
bb2cd765aecb6_dc589f396a3db
712c8e0d1c1a4_b2c2cf96b0ce5
92e04c4098813_c62c2457074fe
cbe9cdab9b573_5721221a0bf90
f91829d053a0a_fc8914b7cdd21
6a77e47fc8ff9_aecb5b85be439
afeec21d843b1_d643ef2b90f14
e54c410882110_6078bd1f8333d
bafe7e18fc320_dc3fb5df72e0c
80c4c00980130_39d929a387cc1
97deacbd597ab_4321f1d40e697
881cef41de83c_3cd45a5f9ee1b
3507ccf799ef3_1944a8caee28e
5076a6354c6aa_9f0da98ac6c59
9e46fa09f413f_cc8dd08fb8be8
0dc3f617ec2fe_73a5175d66c26
a618007000e00_d0e0e43954673
b5c617cc2f986_d96f38a1e24ed
d4bb0fee1fdc4_5a672d42d6981
8fd319b6336c6_c472f924b09f7
625c9c9d393a7_a9f3256ef10d4
c5360d041a083_549ebeb643612
e7f50aca15943_616fa109cde0f
2de938a27144e_892a268a2acc0
2847242a48549_857ac0a5f747c
0f5311f223e44_74b79cf898d09
30ae26f04de0a_8af6b35ff6236
72a1394a7294e_3407285970c2c
379d0a5214a43_1a70f382c5425
c70c4c7498e93_e2af4dc60c104
e80d9edb3db68_617887a7722a9
5bd5721ae435d_a602171c5580b
3832da7db4fb7_8fce9d43fd697
0d32d435a86b5_73411135aca74
3ce8436086c11_92cf8f6ebc849
31450446088c1_8b58701eb7b1e
de92e2b5c56b9_ef013bd7260e7
a8e1f3e7e7cfd_d26967647c8c8
1f2926124c24a_0f2211bad7fca
94e9b5f36be6d_41f58c8807554
d8f88bed17da3_ec1941ea35d9c
5b7be977d2efa_a5cbc36bac10c
ed535cbeb97d7_635ffd630a85b
cdd9c98393072_e6478154d8edf
a753a09341268_d18e92ec81d15
29d550caa1954_14202398e8fff
6dcadfd5bfab8_3202e02068f82
c1b3209a41348_dfd6fe91e15e3
608a13c427885_2c6aa026e29ec
a140f7e1efc3e_ce349c86121be
51803a487490f_9fb15781c905b
29ba6774cee9a_866e7b273cac2
07abfcb7f96ff_03cebea460554
8e7cdba5b74b7_c3b12ac430dbc
4082df19be338_9518407191a9a
ac1b2286450c9_d42d7783a38fe
3b93db1fb63f7_1c3b675d40b07
c4ac180830106_e16c4f5d7d3e5
9596fc8df91bf_c7b30e07afbd6
399672b8e571d_90b20d5068710
c18a9395272a5_533cfc9f3fe5c
596a8e951d2a4_a48a00d82c458
9089cf3f9e7f4_c4da4e411869b
70622cf059e0b_b24b94843b053
9a5d8b0b16163_ca5fd85efc4f9
9ad825304a609_444f08369b06e
4342625cc4b99_1fabad9842b85
bc7436e06dc0e_51502fc43175e
d92019f033e06_ec2dd53f69949
8b08abe157c2b_3e01e15ae4182
10d5522ea45d5_75c08a7bf9165
3cea498493092_92d0d8ba07bc4
1c61504ea09d4_7d94416e09d5b
de40218043008_eed66db4947dd
855e098413082_3bb7e1b37331c
875de1bbc3778_3c872161079b4
38c351d6a3ad4_1af6318279595
799274ace959d_b7ad9deab7b4b
0ab822e045c09_054dff9072a02
4ca4b2cd659ad_9cb0c07cc188c
1acd6c86d90db_7c84e91c740a7
d6da4bbc97793_eafed9ccde54b
5f3e5948b2916_a812527e18662
9ea16ae2d5c5b_ccc013621ca2b
6551275a4eb4a_abb8e42c4fd71
3f7670b4e169c_1df9ee59883b6
4ab68d651aca3_9b7dc70153f53
8a8a7114e229c_3dcf0e6129473
10e43e607cc10_75cac2f8b142b
b01a4c8899113_d65ba26339e53
3ca9879b0f362_92a7aeed082a3
491390d321a64_9a78ca81ce7d7
097edb15b62b7_70b11fde081ea
9a5aada55b4ab_441d7f4eb8f26
72df7b76f6edf_3421065197feb
70143dd87bb0f_b21da1b300cb4
9a97696ed2dda_ca8029002c6f3
5b0eeee9ddd3c_a58999cd748d9
39216502ca059_90673d90505b0
0d96fe2dfc5c0_73861c5399359
046a26944d28a_0232a8f504ee4
5f1bab6f56deb_a7fd62ab6c98d
8048091012202_bb9143863c355
3cec95252a4a5_92d24e2d35070
6692abf957f2b_ac792841ad4d7
22bf07060e0c2_81d392ae8a6a9
2b0bf617ec2fe_14affea7c72e7
772a6e54dca9c_35e839db8616e
3896f75deebbe_1ae22082f4d3d
0bdf2ed65dacc_05de5efbd3b35
4f9107ae0f5c2_9e7ff0fac94aa
b716e835d06ba_da2535bdda60b
730f8c7f18fe3_b3ded974083fa
5d92c1dd83bb0_2b266a35dbe6c
55b25314a6295_a244c51c2b5ba
c2130a7a14f43_e00a279d14229
4e82994932926_9dd8c9c26b402
40c5c0e381c70_1e8fdc3761c6f
c5261f5c3eb88_e1ad2eb83b960
7d835e86bd0d8_b9f786983a82c
41dcb3c96792d_95f291eddaca8
01c9a6ef4ddea_6b4cf2282cb85

View File

@ -1,31 +0,0 @@
# srt.do
#
# David_Harris@hmc.edu 19 October 2021
# Use this wally-pipelined.do file to run this example.
# Either bring up ModelSim and type the following at the "ModelSim>" prompt:
# do wally-pipelined.do
# or, to run from a shell, type the following at the shell prompt:
# vsim -do wally-pipelined.do -c
# (omit the "-c" to see the GUI while running from the shell)
onbreak {resume}
# create library
if [file exists work] {
vdel -all
}
vlib work
vlog +incdir+../config/rv64gc +incdir+../config/shared srt-radix4.sv testbench-radix4.sv ../src/generic/flop/flop*.sv ../src/generic/mux.sv ../src/generic/lzc.sv
vopt +acc work.testbenchradix4 -o workopt
vsim workopt
-- display input and output signals as hexidecimal values
add wave /testbenchradix4/*
add wave /testbenchradix4/srtradix4/*
add wave /testbenchradix4/srtradix4/qsel4/*
add wave /testbenchradix4/srtradix4/otfc4/*
-- Run the Simulation
run -all

View File

@ -1,5 +0,0 @@
add wave -noupdate /testbench/*
add wave -noupdate /testbench/srt/*
add wave -noupdate /testbench/srt/otfc2/*
add wave -noupdate /testbench/srt/preproc/*
add wave -noupdate /testbench/srt/divcounter/*

View File

@ -1,28 +0,0 @@
# srt.do
#
# David_Harris@hmc.edu 19 October 2021
# Use this wally-pipelined.do file to run this example.
# Either bring up ModelSim and type the following at the "ModelSim>" prompt:
# do wally-pipelined.do
# or, to run from a shell, type the following at the shell prompt:
# vsim -do wally-pipelined.do -c
# (omit the "-c" to see the GUI while running from the shell)
onbreak {resume}
# create library
if [file exists work] {
vdel -all
}
vlib work
vlog +incdir+../config/rv64gc +incdir+../config/shared srt.sv testbench.sv ../src/generic/flop/flop*.sv ../src/generic/mux.sv ../src/generic/lzc.sv
vopt +acc work.testbench -o workopt
vsim workopt
-- display input and output signals as hexidecimal values
do ./srt-waves.do
-- Run the Simulation
run -all

View File

@ -1,315 +0,0 @@
///////////////////////////////////////////
// srt.sv
//
// Written: David_Harris@hmc.edu 13 January 2022
// Modified: cturek@hmc.edu June 2022
//
// Purpose: Combined Divide and Square Root Floating Point and Integer Unit
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
`define EXTRAFRACBITS ((`NF<(`XLEN)) ? (`XLEN - `NF) : 0)
`define EXTRAINTBITS ((`NF<(`XLEN)) ? 0 : (`NF - `XLEN))
module srt (
input logic clk,
input logic Start,
input logic Stall, // *** multiple pipe stages
input logic Flush, // *** multiple pipe stages
// Floating Point Inputs
// later add exponents, signs, special cases
input logic XSign, YSign,
input logic [`NE-1:0] XExp, YExp,
input logic [`NF-1:0] SrcXFrac, SrcYFrac,
input logic [`XLEN-1:0] SrcA, SrcB,
input logic [1:0] Fmt, // Floats: 00 = 16 bit, 01 = 32 bit, 10 = 64 bit, 11 = 128 bit
input logic W64, // 32-bit ints on XLEN=64
input logic Signed, // Interpret integers as signed 2's complement
input logic Int, // Choose integer inputs
input logic Sqrt, // perform square root, not divide
output logic rsign, done,
output logic [`DIVLEN-1:0] Rem, Quot, // *** later handle integers
output logic [`NE-1:0] rExp,
output logic [3:0] Flags
);
logic qp, qz, qm; // quotient is +1, 0, or -1
logic [`NE-1:0] calcExp;
logic calcSign;
logic [`DIVLEN-1:0] X, Dpreproc;
logic [`DIVLEN+3:0] WS, WSA, WSN, WC, WCA, WCN, D, Db, Dsel;
logic [$clog2(`XLEN+1)-1:0] intExp, dur, calcDur;
logic intSign;
srtpreproc preproc(SrcA, SrcB, SrcXFrac, SrcYFrac, Fmt, W64, Signed, Int, Sqrt, X, Dpreproc, intExp, calcDur, intSign);
// Top Muxes and Registers
// When start is asserted, the inputs are loaded into the divider.
// Otherwise, the divisor is retained and the partial remainder
// is fed back for the next iteration.
mux2 #(`DIVLEN+4) wsmux({WSA[`DIVLEN+2:0], 1'b0}, {4'b0001, X}, Start, WSN);
flop #(`DIVLEN+4) wsflop(clk, WSN, WS);
mux2 #(`DIVLEN+4) wcmux({WCA[`DIVLEN+2:0], 1'b0}, {(`DIVLEN+4){1'b0}}, Start, WCN);
flop #(`DIVLEN+4) wcflop(clk, WCN, WC);
flopen #(`DIVLEN+4) dflop(clk, Start, {4'b0001, Dpreproc}, D);
// Quotient Selection logic
// Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm)
qsel2 qsel2(WS[`DIVLEN+3:`DIVLEN], WC[`DIVLEN+3:`DIVLEN], qp, qz, qm);
flopen #(`NE) expflop(clk, Start, calcExp, rExp);
flopen #(1) signflop(clk, Start, calcSign, rsign);
flopen #(7) durflop(clk, Start, calcDur, dur);
counter divcounter(clk, Start, dur, done);
// Divisor Selection logic
assign Db = ~D;
mux3onehot #(`DIVLEN) divisorsel(Db, {(`DIVLEN+4){1'b0}}, D, qp, qz, qm, Dsel);
// Partial Product Generation
csa #(`DIVLEN+4) csa(WS, WC, Dsel, qp, WSA, WCA);
otfc2 #(`DIVLEN) otfc2(clk, Start, qp, qz, qm, Quot);
expcalc expcalc(.XExp, .YExp, .calcExp);
signcalc signcalc(.XSign, .YSign, .calcSign);
endmodule
////////////////
// Submodules //
////////////////
///////////////////
// Preprocessing //
///////////////////
module srtpreproc (
input logic [`XLEN-1:0] SrcA, SrcB,
input logic [`NF-1:0] SrcXFrac, SrcYFrac,
input logic [1:0] Fmt, // Floats: 00 = 16 bit, 01 = 32 bit, 10 = 64 bit, 11 = 128 bit
input logic W64, // 32-bit ints on XLEN=64
input logic Signed, // Interpret integers as signed 2's complement
input logic Int, // Choose integer inputs
input logic Sqrt, // perform square root, not divide
output logic [`DIVLEN-1:0] X, D,
output logic [$clog2(`XLEN+1)-1:0] intExp, dur, // Quotient integer exponent
output logic intSign // Quotient integer sign
);
logic [$clog2(`XLEN+1)-1:0] zeroCntA, zeroCntB;
logic [`XLEN-1:0] PosA, PosB;
logic [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY;
assign PosA = (Signed & SrcA[`XLEN - 1]) ? -SrcA : SrcA;
assign PosB = (Signed & SrcB[`XLEN - 1]) ? -SrcB : SrcB;
lzc #(`XLEN) lzcA (PosA, zeroCntA);
lzc #(`XLEN) lzcB (PosB, zeroCntB);
assign ExtraA = {PosA, {`EXTRAINTBITS{1'b0}}};
assign ExtraB = {PosB, {`EXTRAINTBITS{1'b0}}};
assign PreprocA = ExtraA << (zeroCntA + 1);
assign PreprocB = ExtraB << (zeroCntB + 1);
assign PreprocX = {SrcXFrac, {`EXTRAFRACBITS{1'b0}}};
assign PreprocY = {SrcYFrac, {`EXTRAFRACBITS{1'b0}}};
assign X = Int ? PreprocA : PreprocX;
assign D = Int ? PreprocB : PreprocY;
assign intExp = zeroCntB - zeroCntA + 1;
assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]);
assign dur = Int ? (intExp & {7{~intExp[6]}}) : (`DIVLEN + 2);
endmodule
/////////////////////////////////
// Quotient Selection, Radix 2 //
/////////////////////////////////
module qsel2 ( // *** eventually just change to 4 bits
input logic [`DIVLEN+3:`DIVLEN] ps, pc,
output logic qp, qz, qm
);
logic [`DIVLEN+3:`DIVLEN] p, g;
logic magnitude, sign, cout;
// The quotient selection logic is presented for simplicity, not
// for efficiency. You can probably optimize your logic to
// select the proper divisor with less delay.
// Quotient equations from EE371 lecture notes 13-20
assign p = ps ^ pc;
assign g = ps & pc;
assign #1 magnitude = ~(&p[`DIVLEN+2:`DIVLEN]);
assign #1 cout = g[`DIVLEN+2] | (p[`DIVLEN+2] & (g[`DIVLEN+1] | p[`DIVLEN+1] & g[`DIVLEN]));
assign #1 sign = p[`DIVLEN+3] ^ cout;
/* assign #1 magnitude = ~((ps[54]^pc[54]) & (ps[53]^pc[53]) &
(ps[52]^pc[52]));
assign #1 sign = (ps[55]^pc[55])^
(ps[54] & pc[54] | ((ps[54]^pc[54]) &
(ps[53]&pc[53] | ((ps[53]^pc[53]) &
(ps[52]&pc[52]))))); */
// Produce quotient = +1, 0, or -1
assign #1 qp = magnitude & ~sign;
assign #1 qz = ~magnitude;
assign #1 qm = magnitude & sign;
endmodule
///////////////////////////////////
// On-The-Fly Converter, Radix 2 //
///////////////////////////////////
module otfc2 #(parameter N=64) (
input logic clk,
input logic Start,
input logic qp, qz, qm,
output logic [N-1:0] r
);
// The on-the-fly converter transfers the quotient
// bits to the quotient as they come.
//
// This code follows the psuedocode presented in the
// floating point chapter of the book. Right now,
// it is written for Radix-2 division.
//
// QM is Q-1. It allows us to write negative bits
// without using a costly CPA.
logic [N+2:0] Q, QM, QNext, QMNext, QMMux;
// QR and QMR are the shifted versions of Q and QM.
// They are treated as [N-1:r] size signals, and
// discard the r most significant bits of Q and QM.
logic [N+1:0] QR, QMR;
flopr #(N+3) Qreg(clk, Start, QNext, Q);
mux2 #(`DIVLEN+3) QMmux(QMNext, {`DIVLEN+3{1'b1}}, Start, QMMux);
flop #(`DIVLEN+3) QMreg(clk, QMMux, QM);
always_comb begin
QR = Q[N+1:0];
QMR = QM[N+1:0]; // Shift Q and QM
if (qp) begin
QNext = {QR, 1'b1};
QMNext = {QR, 1'b0};
end else if (qz) begin
QNext = {QR, 1'b0};
QMNext = {QMR, 1'b1};
end else begin // If qp and qz are not true, then qm is
QNext = {QMR, 1'b1};
QMNext = {QMR, 1'b0};
end
end
assign r = Q[N+2] ? Q[N+1:2] : Q[N:1];
endmodule
/////////////
// counter //
/////////////
module counter(input logic clk,
input logic req,
input logic [$clog2(`XLEN+1)-1:0] dur,
output logic done);
logic [$clog2(`XLEN+1)-1:0] count;
// This block of control logic sequences the divider
// through its iterations. You may modify it if you
// build a divider which completes in fewer iterations.
// You are not responsible for the (trivial) circuit
// design of the block.
always @(posedge clk)
begin
if (count == dur) done <= #1 1;
else if (done | req) done <= #1 0;
if (req) count <= #1 0;
else count <= #1 count+1;
end
endmodule
//////////
// mux3 //
//////////
module mux3onehot #(parameter N=65) (
input logic [N+3:0] in0, in1, in2,
input logic sel0, sel1, sel2,
output logic [N+3:0] out
);
// lazy inspection of the selects
// really we should make sure selects are mutually exclusive
assign #1 out = sel0 ? in0 : (sel1 ? in1 : in2);
endmodule
/////////
// csa //
/////////
module csa #(parameter N=69) (
input logic [N-1:0] in1, in2, in3,
input logic cin,
output logic [N-1:0] out1, out2
);
// This block adds in1, in2, in3, and cin to produce
// a result out1 / out2 in carry-save redundant form.
// cin is just added to the least significant bit and
// is required to handle adding a negative divisor.
// Fortunately, the carry (out2) is shifted left by one
// bit, leaving room in the least significant bit to
// insert cin.
assign #1 out1 = in1 ^ in2 ^ in3;
assign #1 out2 = {in1[N-2:0] & (in2[N-2:0] | in3[N-2:0]) |
(in2[N-2:0] & in3[N-2:0]), cin};
endmodule
//////////////
// expcalc //
//////////////
module expcalc(
input logic [`NE-1:0] XExp, YExp,
output logic [`NE-1:0] calcExp
);
assign calcExp = XExp - YExp + (`NE)'(`BIAS);
endmodule
//////////////
// signcalc //
//////////////
module signcalc(
input logic XSign, YSign,
output logic calcSign
);
assign calcSign = XSign ^ YSign;
endmodule

View File

@ -1,355 +0,0 @@
///////////////////////////////////////////////////////
// srt.sv //
// //
// Written 10/31/96 by David Harris harrisd@leland //
// Updated 10/19/21 David_Harris@hmc.edu //
// //
// This file models a simple Radix 2 SRT divider. //
// //
///////////////////////////////////////////////////////
// This Verilog file models a radix 2 SRT divider which
// produces one quotient digit per cycle. The divider
// keeps the partial remainder in carry-save form.
/////////
// srt //
/////////
module srt(input logic clk,
input logic req,
input logic sqrt, // 1 to compute sqrt(a), 0 to compute a/b
input logic [51:0] a, b,
output logic [54:0] rp, rm);
// A simple Radix 2 SRT divider/sqrt
// Internal signals
logic [55:0] ps, pc; // partial remainder in carry-save form
logic [55:0] d; // divisor
logic [55:0] psa, pca; // partial remainder result of csa
logic [55:0] psn, pcn; // partial remainder for next cycle
logic [55:0] dn; // divisor for next cycle
logic [55:0] dsel; // selected divisor multiple
logic qp, qz, qm; // quotient is +1, 0, or -1
logic [55:0] d_b; // inverse of divisor
// Top Muxes and Registers
// When start is asserted, the inputs are loaded into the divider.
// Otherwise, the divisor is retained and the partial remainder
// is fed back for the next iteration.
mux2 psmux({psa[54:0], 1'b0}, {4'b0001, a}, req, psn);
flop psflop(clk, psn, ps);
mux2 pcmux({pca[54:0], 1'b0}, 56'b0, req, pcn);
flop pcflop(clk, pcn, pc);
mux2 dmux(d, {4'b0001, b}, req, dn);
flop dflop(clk, dn, d);
// Quotient Selection logic
// Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm)
// Accumulate quotient digits in a shift register
qsel qsel(ps[55:52], pc[55:52], qp, qz, qm);
qacc qacc(clk, req, qp, qz, qm, rp, rm);
// Divisor Selection logic
inv dinv(d, d_b);
mux3 divisorsel(d_b, 56'b0, d, qp, qz, qm, dsel);
// Partial Product Generation
csa csa(ps, pc, dsel, qp, psa, pca);
endmodule
//////////
// mux2 //
//////////
module mux2(input logic [55:0] in0, in1,
input logic sel,
output logic [55:0] out);
assign #1 out = sel ? in1 : in0;
endmodule
//////////
// flop //
//////////
module flop(clk, in, out);
input clk;
input [55:0] in;
output [55:0] out;
logic [55:0] state;
always @(posedge clk)
state <= #1 in;
assign #1 out = state;
endmodule
//////////
// qsel //
//////////
module qsel(input logic [55:52] ps, pc,
output logic qp, qz, qm);
logic [55:52] p, g;
logic magnitude, sign, cout;
// The quotient selection logic is presented for simplicity, not
// for efficiency. You can probably optimize your logic to
// select the proper divisor with less delay.
// Quotient equations from EE371 lecture notes 13-20
assign p = ps ^ pc;
assign g = ps & pc;
assign #1 magnitude = ~(&p[54:52]);
assign #1 cout = g[54] | (p[54] & (g[53] | p[53] & g[52]));
assign #1 sign = p[55] ^ cout;
/* assign #1 magnitude = ~((ps[54]^pc[54]) & (ps[53]^pc[53]) &
(ps[52]^pc[52]));
assign #1 sign = (ps[55]^pc[55])^
(ps[54] & pc[54] | ((ps[54]^pc[54]) &
(ps[53]&pc[53] | ((ps[53]^pc[53]) &
(ps[52]&pc[52]))))); */
// Produce quotient = +1, 0, or -1
assign #1 qp = magnitude & ~sign;
assign #1 qz = ~magnitude;
assign #1 qm = magnitude & sign;
endmodule
//////////
// qacc //
//////////
module qacc(clk, req, qp, qz, qm, rp, rm);
input clk;
input req;
input qp;
input qz;
input qm;
output [54:0] rp;
output [54:0] rm;
logic [54:0] rp, rm; // quotient bit is +/- 1;
logic [7:0] count;
always @(posedge clk)
begin
if (req)
begin
rp <= #1 0;
rm <= #1 0;
end
else
begin
rp <= #1 {rp[54:0], qp};
rm <= #1 {rm[54:0], qm};
end
end
endmodule
/////////
// inv //
/////////
module inv(input logic [55:0] in,
output logic [55:0] out);
assign #1 out = ~in;
endmodule
//////////
// mux3 //
//////////
module mux3(in0, in1, in2, sel0, sel1, sel2, out);
input [55:0] in0;
input [55:0] in1;
input [55:0] in2;
input sel0;
input sel1;
input sel2;
output [55:0] out;
// lazy inspection of the selects
// really we should make sure selects are mutually exclusive
assign #1 out = sel0 ? in0 : (sel1 ? in1 : in2);
endmodule
/////////
// csa //
/////////
module csa(in1, in2, in3, cin, out1, out2);
input [55:0] in1;
input [55:0] in2;
input [55:0] in3;
input cin;
output [55:0] out1;
output [55:0] out2;
// This block adds in1, in2, in3, and cin to produce
// a result out1 / out2 in carry-save redundant form.
// cin is just added to the least significant bit and
// is required to handle adding a negative divisor.
// Fortunately, the carry (out2) is shifted left by one
// bit, leaving room in the least significant bit to
// insert cin.
assign #1 out1 = in1 ^ in2 ^ in3;
assign #1 out2 = {in1[54:0] & (in2[54:0] | in3[54:0]) |
(in2[54:0] & in3[54:0]), cin};
endmodule
//////////////
// finaladd //
//////////////
module finaladd(rp, rm, r);
input [54:0] rp;
input [54:0] rm;
output [51:0] r;
logic [54:0] diff;
// this magic block performs the final addition for you
// to convert the positive and negative quotient digits
// into a normalized mantissa. It returns the 52 bit
// mantissa after shifting to guarantee a leading 1.
// You can assume this block operates in one cycle
// and do not need to budget it in your area and power
// calculations.
// Since no rounding is performed, the result may be too
// small by one unit in the least significant place (ulp).
// The checker ignores such an error.
assign #1 diff = rp - rm;
assign #1 r = diff[54] ? diff[53:2] : diff[52:1];
endmodule
/////////////
// counter //
/////////////
module counter(input logic clk,
input logic req,
output logic done);
logic [5:0] count;
// This block of control logic sequences the divider
// through its iterations. You may modify it if you
// build a divider which completes in fewer iterations.
// You are not responsible for the (trivial) circuit
// design of the block.
always @(posedge clk)
begin
if (count == 54) done <= #1 1;
else if (done | req) done <= #1 0;
if (req) count <= #1 0;
else count <= #1 count+1;
end
endmodule
///////////
// clock //
///////////
module clock(clk);
output clk;
// Internal clk signal
logic clk;
endmodule
//////////
// testbench //
//////////
module testbench;
logic clk;
logic req;
logic done;
logic [51:0] a;
logic [51:0] b;
logic [51:0] r;
logic [54:0] rp, rm; // positive quotient digits
// Test parameters
parameter MEM_SIZE = 40000;
parameter MEM_WIDTH = 52+52+52;
`define memr 51:0
`define memb 103:52
`define mema 155:104
// Test logicisters
logic [MEM_WIDTH-1:0] Tests [0:MEM_SIZE]; // Space for input file
logic [MEM_WIDTH-1:0] Vec; // Verilog doesn't allow direct access to a
// bit field of an array
logic [51:0] correctr, nextr;
integer testnum, errors;
// Divider
srt srt(clk, req, a, b, rp, rm);
// Final adder converts quotient digits to 2's complement & normalizes
finaladd finaladd(rp, rm, r);
// Counter
counter counter(clk, req, done);
initial
forever
begin
clk = 1; #17;
clk = 0; #16;
end
// Read test vectors from disk
initial
begin
testnum = 0;
errors = 0;
$readmemh ("testvectors", Tests);
Vec = Tests[testnum];
a = Vec[`mema];
b = Vec[`memb];
nextr = Vec[`memr];
req <= #5 1;
end
// Apply directed test vectors read from file.
always @(posedge clk)
begin
if (done)
begin
req <= #5 1;
$display("result was %h, should be %h\n", r, correctr);
if ((correctr - r) > 1) // check if accurate to 1 ulp
begin
errors = errors+1;
$display("failed\n");
$stop;
end
if (a === 52'hxxxxxxxxxxxxx)
begin
$display("Tests completed successfully");
$stop;
end
end
if (req)
begin
req <= #5 0;
correctr = nextr;
testnum = testnum+1;
Vec = Tests[testnum];
$display("a = %h b = %h",a,b);
a = Vec[`mema];
b = Vec[`memb];
nextr = Vec[`memr];
end
end
endmodule

View File

@ -1,27 +0,0 @@
CC = gcc
CFLAGS = -lm
LIBS =
OBJS4 = disp.o srt4div.o
OBJS2 = disp.o srt2div.o
all: srt4div srt2div
disp.o: disp.h disp.c
$(CC) -g -c -o disp.o disp.c
srt4div.o: srt4div.c
$(CC) -g -c -o srt4div.o srt4div.c
srt2div.o: srt2div.c
$(CC) -g -c -o srt2div.o srt2div.c
srt4div: $(OBJS4)
$(CC) -g -O3 -o srt4div $(OBJS4) $(CFLAGS)
srt2div: $(OBJS2)
$(CC) -g -O3 -o srt2div $(OBJS2) $(CFLAGS)
clean:
rm -f *.o *~
rm -f core

View File

@ -1 +0,0 @@
vsim -do iter64.do -c

View File

@ -1,22 +0,0 @@
This is a novel integer divider using r4 division by recurrence. The
reference is:
J. E. Stine and K. Hill, "An Efficient Implementation of Radix-4
Integer Division Using Scaling," 2020 IEEE 63rd International Midwest
Symposium on Circuits and Systems (MWSCAS), Springfield, MA, USA,
2020, pp. 1092-1095, doi: 10.1109/MWSCAS48704.2020.9184631.
Although this version does not contain scaling, it could do this, if
needed. Moreover, a higher radix or overlapped radix can be done
easily to expand the the size. Also, the implementations here are
initially unsigned but hope to expand for signed, which should be
easy.
There are two types of tests in this directory within each testbench.
One tests for 32-bits and the other 64-bits:
int32div.do and int64div.do = test individual vector for debugging
iter32.do and iter64.do = do not use any waveform generation and just
output lots of tests

View File

@ -1,19 +0,0 @@
#!/bin/sh
cat iter64_signed.out | grep "0 1$"
cat iter64_signed.out | grep "1 0$"
cat iter64_signed.out | grep "0 0$"
cat iter64_unsigned.out | grep "0 1$"
cat iter64_unsigned.out | grep "1 0$"
cat iter64_unsigned.out | grep "0 0$"
cat iter32_signed.out | grep "0 1$"
cat iter32_signed.out | grep "1 0$"
cat iter32_signed.out | grep "0 0$"
cat iter32_unsigned.out | grep "0 1$"
cat iter32_unsigned.out | grep "1 0$"
cat iter32_unsigned.out | grep "0 0$"
cat iter128_signed.out | grep "0 1$"
cat iter128_signed.out | grep "1 0$"
cat iter128_signed.out | grep "0 0$"
cat iter128_unsigned.out | grep "0 1$"
cat iter128_unsigned.out | grep "1 0$"
cat iter128_unsigned.out | grep "0 0$"

View File

@ -1,60 +0,0 @@
#include "disp.h"
double rnd_zero(double x, double bits) {
if (x < 0)
return ceiling(x, bits);
else
return flr(x, bits);
}
double rne(double x, double precision) {
double scale, x_round;
scale = pow(2.0, precision);
x_round = rint(x * scale) / scale;
return x_round;
}
double flr(double x, double precision) {
double scale, x_round;
scale = pow(2.0, precision);
x_round = floor(x * scale) / scale;
return x_round;
}
double ceiling(double x, double precision) {
double scale, x_round;
scale = pow(2.0, precision);
x_round = ceil(x * scale) / scale;
return x_round;
}
void disp_bin(double x, int bits_to_left, int bits_to_right, FILE *out_file) {
double diff;
int i;
if (fabs(x) < pow(2.0, -bits_to_right)) {
for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
fprintf(out_file,"0");
}
return;
}
if (x < 0.0) {
// fprintf(out_file, "-");
// x = - x;
x = pow(2.0, ((double) bits_to_left)) + x;
}
for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
diff = pow(2.0, -i);
if (x < diff) {
fprintf(out_file, "0");
}
else {
fprintf(out_file, "1");
x -= diff;
}
if (i == 0) {
fprintf(out_file, ".");
}
}
}

View File

@ -1,18 +0,0 @@
#include <stdlib.h>
#include <math.h>
#include <stdio.h>
#ifndef DISP
#define DISP
double rnd_zero(double x, double bits);
double rne(double x, double precision);
double flr(double x, double precision);
double ceiling(double x, double precision);
void disp_bin(double x, int bits_to_left, int bits_to_right, FILE *out_file);
#endif

Some files were not shown because too many files have changed in this diff Show More