10 Commits

5 changed files with 676 additions and 0 deletions

View File

@ -0,0 +1,322 @@
module out_axis_fifo #(
parameter ACCUM_WIDTH = 32,
parameter WINDOW_SIZE = 65,
parameter PACKET_SIZE = 1024
) (
input logic eth_clk_in,
input logic acc_clk_in,
input logic rst,
input logic [31:0] smp_num,
// AXI stream master for output, eth_clk_in domain
output logic [7:0] s_axis_tdata,
output logic s_axis_tvalid,
input logic s_axis_tready,
output logic s_axis_tlast,
// eth handshake
input logic req_ready,
output logic send_req,
output logic [15:0] udp_data_length,
// data from acc
input logic [ACCUM_WIDTH-1:0] acc_din,
input logic din_valid,
// input pulse
input logic readout_begin,
// output pulses
output logic batch_req,
output logic finish
);
// sync reset
reg [1:0] rst_sync_ff;
reg rst_eth;
always @(posedge acc_clk_in or posedge rst) begin
if (rst) begin
rst_sync_ff <= 2'b11;
end else begin
rst_sync_ff <= {rst_sync_ff[0], 1'b0};
end
end
assign rst_eth = rst_sync_ff[1];
logic [1:0] rst_acc_ff;
logic rst_acc;
always_ff @(posedge acc_clk_in or posedge rst) begin
if (rst)
rst_acc_ff <= 2'b11;
else
rst_acc_ff <= {rst_acc_ff[0], 1'b0};
end
assign rst_acc = rst_acc_ff[1];
// fifo params calc
// round up to be enough for 2xPACKET_SIZE storage
localparam int MIN_BYTES = 2 * PACKET_SIZE;
localparam int MIN_BITS = MIN_BYTES * 8;
localparam int MIN_WR_WORDS = (MIN_BITS + ACCUM_WIDTH - 1) / ACCUM_WIDTH; // ceil div
localparam int WDEPTH_BITS = $clog2(MIN_WR_WORDS);
localparam int FIFO_WDEPTH = 1 << WDEPTH_BITS;
localparam int FIFO_RDEPTH = FIFO_WDEPTH * ACCUM_WIDTH / 8;
localparam int RDEPTH_BITS = $clog2(FIFO_RDEPTH) + 1;
wire wr_unavail;
wire wr_rst_busy;
reg rd_en;
typedef enum logic [2:0] {
WR_IDLE = 3'd0,
WR_CHECK = 3'd1,
WR_RUN = 3'd2,
WR_END = 3'd3
} wr_state_t;
(* MARK_DEBUG="true" *) wr_state_t wr_state;
// Write FSM
reg [31:0] wr_cnt; // current BIT mem ptr
reg [31:0] wr_batch_tgt; // next 'target' that should be written from batch
reg [31:0] wr_total; // total BITS to be sent!
wire [WDEPTH_BITS:0] wr_data_count;
// NOTE:
// each written "acc_din" ACCUM_WIDTH word
// is counted as WINDOWS_SIZE samples actually
// because hw division for counters is painful
// so we just increased the counter sizes
always_ff @(posedge acc_clk_in) begin
if (rst_acc) begin
wr_state <= WR_IDLE;
wr_cnt <= 32'b0;
wr_batch_tgt <= 32'b0;
wr_total <= 32'b0;
batch_req <= 0;
finish <= 0;
end else begin
case (wr_state)
// wait until readout is requested
WR_IDLE: begin
if (readout_begin) begin
wr_cnt <= 32'b0;
wr_state <= WR_CHECK;
wr_total <= smp_num * ACCUM_WIDTH;
wr_batch_tgt <= 32'b0;
batch_req <= 0;
finish <= 0;
end
end
// wait until we can request a word
// depends on prog_full signal
WR_CHECK: begin
if (~wr_unavail && ~wr_rst_busy) begin
batch_req <= 1;
// should give us exactly PACKET_SIZE * 8 bits
// multiplied by WINDOW_SIZE, because we count
// each given ACCUM_WIDTH word as WINDOWS_SIZE samples !!!
wr_batch_tgt <= wr_batch_tgt + (8 * WINDOW_SIZE * PACKET_SIZE);
wr_state <= WR_RUN;
end else begin
batch_req <= 0;
end
end
// wait until all requested packet is written
WR_RUN: begin
batch_req <= 0;
if (wr_cnt == wr_batch_tgt) begin
// got enough words
wr_state <= WR_END;
end else if (wr_cnt > wr_batch_tgt) begin
// weird case when accum gave us too much words
// block resets
wr_cnt <= 32'hffffffff; // sort of signal for sim/ila
wr_state <= WR_END;
end
if (din_valid) begin
// data supplied
// count as we got WINDOW_SIZE samples
wr_cnt <= wr_cnt + ACCUM_WIDTH * WINDOW_SIZE;
end
end
// check if this was last data batch
WR_END: begin
// here we check that we sent enough data
// wr_cnt should be by design PACKET_SIZE-aligned
if (wr_cnt >= wr_total) begin
// wait until all data is sent
if (wr_data_count == 0) begin
finish <= 1;
wr_state <= WR_IDLE;
end
end else begin
// next word
wr_state <= WR_CHECK;
end
end
endcase
end
end
// Readout FSM with ethernet request
assign udp_data_length = PACKET_SIZE; // fixed packet size
reg [15:0] sent_cnt;
typedef enum logic [2:0] {
RD_IDLE = 3'd0,
RD_CHECK = 3'd1,
RD_SEND = 3'd2
} rd_state_t;
(* MARK_DEBUG="true" *) rd_state_t rd_state;
wire rd_valid;
wire [RDEPTH_BITS-1:0] rd_data_count;
always_ff @(posedge eth_clk_in) begin
if (rst_eth) begin
rd_state <= RD_IDLE;
send_req <= 1'b0;
sent_cnt <= 16'd0;
s_axis_tlast <= 1'b0;
s_axis_tvalid <= 1'b0;
rd_en <= 1'b0;
end else begin
case (rd_state)
// wait until fifo has enough data to send
RD_IDLE: begin
if (rd_data_count == PACKET_SIZE) begin
// enough data to send packet, begin
rd_state <= RD_CHECK;
end
send_req <= 1'b0;
sent_cnt <= 16'd0;
rd_en <= 1'b0;
s_axis_tlast <= 1'b0;
s_axis_tvalid <= 1'b0;
end
// await udp ready
RD_CHECK: begin
if (req_ready) begin
send_req <= 1'b1;
rd_state <= RD_SEND;
end
end
// send data
RD_SEND: begin
// udp is ready and fifo is ready = sent
send_req <= 1'b0;
if (s_axis_tready && rd_valid) begin
rd_en <= 1'b1;
s_axis_tvalid <= 1'b1;
sent_cnt <= sent_cnt + 1;
// final packet of the batch
if (sent_cnt == PACKET_SIZE - 1) begin
rd_state <= RD_IDLE;
s_axis_tlast <= 1'b1;
end
end else begin
rd_en <= 1'b0;
s_axis_tvalid <= 1'b0;
end
end
endcase
end
end
logic [ACCUM_WIDTH-1:0] fifo_din_r, acc_din_reg, din_valid_reg;
logic fifo_wr_en_r;
always_ff @(posedge acc_clk_in) begin
if (rst_acc) begin
fifo_din_r <= '0;
fifo_wr_en_r <= 1'b0;
din_valid_reg <= 1'b0;
end else begin
fifo_wr_en_r <= 1'b0;
acc_din_reg <= acc_din;
if (!wr_rst_busy && din_valid_reg) begin
fifo_din_r <= acc_din_reg;
fifo_wr_en_r <= 1'b1;
end
din_valid_reg <= din_valid;
end
end
// xpm_fifo_async: Asynchronous FIFO
// Xilinx Parameterized Macro, version 2025.1
xpm_fifo_async #(
.DOUT_RESET_VALUE("0"), // String
.FIFO_READ_LATENCY(1), // DECIMAL
.FIFO_WRITE_DEPTH(FIFO_WDEPTH),
.FULL_RESET_VALUE(0),
.PROG_EMPTY_THRESH(PACKET_SIZE),
.PROG_FULL_THRESH(PACKET_SIZE / (ACCUM_WIDTH / 8)),
.RD_DATA_COUNT_WIDTH(RDEPTH_BITS),
.READ_DATA_WIDTH(8), // always 8 bit for eth
.READ_MODE("fwft"),
.SIM_ASSERT_CHK(1), // DECIMAL; 0=disable simulation messages, 1=enable simulation messages
.USE_ADV_FEATURES("1616"), // String
.WRITE_DATA_WIDTH(ACCUM_WIDTH),
.WR_DATA_COUNT_WIDTH(WDEPTH_BITS+1)
)
xpm_fifo_async_inst (
.data_valid(rd_valid), // 1-bit output: Read Data Valid: When asserted, this signal indicates that valid data is available on the
// output bus (dout).
.dout(s_axis_tdata),
.empty( ),
.full( ),
.prog_full(wr_unavail), // 1-bit output: Programmable Full: This signal is asserted when the number of words in the FIFO is greater than
// or equal to the programmable full threshold value. It is de-asserted when the number of words in the FIFO is
// less than the programmable full threshold value.
.rd_data_count(rd_data_count), // RD_DATA_COUNT_WIDTH-bit output: Read Data Count: This bus indicates the number of words read from the FIFO.
.wr_data_count(wr_data_count), // WR_DATA_COUNT_WIDTH-bit output: Write Data Count: This bus indicates the number of words written into the
// FIFO.
.rd_clk(eth_clk_in), // 1-bit input: Read clock: Used for read operation. rd_clk must be a free running clock.
.rd_en(rd_en), // 1-bit input: Read Enable: If the FIFO is not empty, asserting this signal causes data (on dout) to be read
// from the FIFO. Must be held active-low when rd_rst_busy is active high.
.rst(rst),
.din(fifo_din_r), // WRITE_DATA_WIDTH-bit input: Write Data: The input data bus used when writing the FIFO.
.wr_clk(acc_clk_in), // 1-bit input: Write clock: Used for write operation. wr_clk must be a free running clock.
.wr_en(fifo_wr_en_r),
.wr_rst_busy(wr_rst_busy)
);
endmodule

52
rtl/accum/tests/Makefile Normal file
View File

@ -0,0 +1,52 @@
# SPDX-License-Identifier: MIT
#
# Copyright (c) 2025 FPGA Ninja, LLC
#
# Authors:
# - Alex Forencich
#
# FPGA settings
FPGA_PART = xc7a35tfgg484-1
FPGA_TOP = accum
FPGA_ARCH = artix7
RTL_DIR = ../src
include ../../../scripts/vivado.mk
SYN_FILES += $(sort $(shell find ../src -type f \( -name '*.v' -o -name '*.sv' \)))
XCI_FILES = $(sort $(shell find ../src -type f -name '*.xci'))
XDC_FILES += ../../../constraints/ax7a035b.xdc
XDC_FILES += test_timing.xdc
SYN_FILES += out_axis_fifo_tb.sv
SIM_TOP = control_tb
program: $(PROJECT).bit
echo "open_hw_manager" > program.tcl
echo "connect_hw_server" >> program.tcl
echo "open_hw_target" >> program.tcl
echo "current_hw_device [lindex [get_hw_devices] 0]" >> program.tcl
echo "refresh_hw_device -update_hw_probes false [current_hw_device]" >> program.tcl
echo "set_property PROGRAM.FILE {$(PROJECT).bit} [current_hw_device]" >> program.tcl
echo "program_hw_devices [current_hw_device]" >> program.tcl
echo "exit" >> program.tcl
vivado -nojournal -nolog -mode batch -source program.tcl
$(PROJECT).mcs $(PROJECT).prm: $(PROJECT).bit
echo "write_cfgmem -force -format mcs -size 16 -interface SPIx4 -loadbit {up 0x0000000 $*.bit} -checksum -file $*.mcs" > generate_mcs.tcl
echo "exit" >> generate_mcs.tcl
vivado -nojournal -nolog -mode batch -source generate_mcs.tcl
mkdir -p rev
COUNT=100; \
while [ -e rev/$*_rev$$COUNT.bit ]; \
do COUNT=$$((COUNT+1)); done; \
COUNT=$$((COUNT-1)); \
for x in .mcs .prm; \
do cp $*$$x rev/$*_rev$$COUNT$$x; \
echo "Output: rev/$*_rev$$COUNT$$x"; done;

View File

@ -0,0 +1,290 @@
`timescale 1ns/1ps
module tb_out_axis_fifo;
localparam int ACCUM_WIDTH = 32;
localparam int WINDOW_SIZE = 65;
localparam int PACKET_SIZE = 1024;
localparam int BYTES_PER_WORD = ACCUM_WIDTH / 8;
localparam int WORDS_PER_BATCH = PACKET_SIZE / BYTES_PER_WORD; // 1024 / 4 = 256 слов
logic eth_clk_in;
logic acc_clk_in;
logic rst;
logic [31:0] smp_num;
logic [7:0] s_axis_tdata;
logic s_axis_tvalid;
logic s_axis_tready;
logic s_axis_tlast;
logic [ACCUM_WIDTH-1:0] acc_din;
logic din_valid;
logic send_req;
logic req_ready;
logic readout_begin;
logic batch_req;
logic finish;
out_axis_fifo #(
.ACCUM_WIDTH(ACCUM_WIDTH),
.WINDOW_SIZE(WINDOW_SIZE),
.PACKET_SIZE(PACKET_SIZE)
) dut (
.eth_clk_in (eth_clk_in),
.acc_clk_in (acc_clk_in),
.rst (rst),
.smp_num (smp_num),
.s_axis_tdata (s_axis_tdata),
.s_axis_tvalid (s_axis_tvalid),
.s_axis_tready (s_axis_tready),
.s_axis_tlast (s_axis_tlast),
.acc_din (acc_din),
.din_valid (din_valid),
.readout_begin (readout_begin),
.req_ready (req_ready),
.send_req (send_req),
.batch_req (batch_req),
.finish (finish)
);
// clocks
initial begin
eth_clk_in = 0;
forever #6 eth_clk_in = ~eth_clk_in; // 125
end
initial begin
acc_clk_in = 0;
forever #7.692307692 acc_clk_in = ~acc_clk_in; // 65
end
// scoreboard
byte expected_bytes[$];
int unsigned compared_bytes;
int unsigned mismatch_count;
int unsigned total_pushed_words;
task automatic scoreboard_reset();
begin
expected_bytes.delete();
compared_bytes = 0;
mismatch_count = 0;
total_pushed_words = 0;
end
endtask
task automatic push_expected_word(input logic [ACCUM_WIDTH-1:0] word);
begin
// queue push
expected_bytes.push_back(word[7:0]);
expected_bytes.push_back(word[15:8]);
expected_bytes.push_back(word[23:16]);
expected_bytes.push_back(word[31:24]);
total_pushed_words++;
end
endtask
task automatic check_expected_empty(string case_name);
begin
if (expected_bytes.size() != 0) begin
$error("[%0t] %s: expected_bytes is not empty, remaining=%0d",
$time, case_name, expected_bytes.size());
end else begin
$display("[%0t] %s: scoreboard queue empty, all expected bytes were transmitted",
$time, case_name);
end
end
endtask
// axis check
always_ff @(posedge eth_clk_in or posedge rst) begin
byte exp_byte;
if (rst) begin
compared_bytes <= 0;
mismatch_count <= 0;
end else begin
if (s_axis_tvalid && s_axis_tready) begin
if (expected_bytes.size() == 0) begin
$error("[%0t] AXIS produced unexpected byte 0x%02x: expected queue is empty",
$time, s_axis_tdata);
mismatch_count <= mismatch_count + 1;
end else begin
exp_byte = expected_bytes.pop_front();
compared_bytes <= compared_bytes + 1;
if (s_axis_tdata !== exp_byte) begin
$error("[%0t] AXIS mismatch at byte #%0d: got=0x%02x expected=0x%02x",
$time, compared_bytes, s_axis_tdata, exp_byte);
mismatch_count <= mismatch_count + 1;
end
end
end
end
end
// helpers
task automatic do_reset();
begin
rst = 1'b1;
readout_begin = 1'b0;
din_valid = 1'b0;
acc_din = '0;
smp_num = '0;
scoreboard_reset();
repeat (10) @(posedge acc_clk_in);
rst = 1'b0;
repeat (10) @(posedge acc_clk_in);
end
endtask
task automatic pulse_readout_begin(input logic [31:0] smp_num_i);
begin
smp_num = smp_num_i;
@(posedge acc_clk_in);
readout_begin <= 1'b1;
@(posedge acc_clk_in);
readout_begin <= 1'b0;
end
endtask
task automatic send_random_words(input int unsigned n_words);
int unsigned i;
logic [ACCUM_WIDTH-1:0] rand_word;
begin
for (i = 0; i < n_words; i++) begin
rand_word = $urandom;
@(posedge acc_clk_in);
din_valid <= 1'b1;
acc_din <= rand_word;
// expected result
push_expected_word(rand_word);
end
@(posedge acc_clk_in);
din_valid <= 1'b0;
acc_din <= '0;
end
endtask
// 1. set smp_num
// 2. pulse readout_begon
// 3. send 1KB (PACKET_SIZE) after each batch_req pulse
// 4. wait for finish
// 5. compare axis result
task automatic run_case(input logic [31:0] smp_num_i);
int batch_count;
string case_name;
begin
batch_count = 0;
case_name = $sformatf("run_case(smp_num=%0d)", smp_num_i);
$display("[%0t] %s start", $time, case_name);
pulse_readout_begin(smp_num_i);
while (finish !== 1'b1) begin
@(posedge acc_clk_in);
if (batch_req) begin
batch_count++;
$display("[%0t] %s: batch_req #%0d -> send %0d words",
$time, case_name, batch_count, WORDS_PER_BATCH);
send_random_words(WORDS_PER_BATCH);
end
end
repeat (200) @(posedge eth_clk_in);
$display("[%0t] %s done: batches=%0d, pushed_words=%0d, compared_bytes=%0d, mismatches=%0d, wr_cnt=%0d, wr_total=%0d",
$time, case_name, batch_count, total_pushed_words, compared_bytes, mismatch_count,
dut.wr_cnt, dut.wr_total);
check_expected_empty(case_name);
if (mismatch_count != 0) begin
$fatal(1, "[%0t] %s FAILED: mismatches=%0d", $time, case_name, mismatch_count);
end else begin
$display("[%0t] %s PASSED", $time, case_name);
end
@(posedge acc_clk_in);
end
endtask
// eth beh simulator
int axis_byte_count;
always_ff @(posedge eth_clk_in or posedge rst) begin
if (rst) begin
axis_byte_count <= 0;
req_ready <= 0;
s_axis_tready <= 1'b0;
end else begin
req_ready <= 1;
// request send
if (send_req) begin
s_axis_tready <= 1'b1;
req_ready <= 0;
end
if (s_axis_tvalid && s_axis_tready) begin
axis_byte_count <= axis_byte_count + 1;
end
end
end
// main
initial begin
// init
rst = 1'b0;
readout_begin = 1'b0;
din_valid = 1'b0;
acc_din = '0;
smp_num = '0;
repeat (500) @(posedge acc_clk_in);
// 1
do_reset();
repeat (500) @(posedge acc_clk_in);
run_case(32'd17);
repeat (20) @(posedge acc_clk_in);
// 2
do_reset();
run_case(32'd1024);
repeat (20) @(posedge acc_clk_in);
// 3
do_reset();
run_case(32'd77777);
repeat (20) @(posedge acc_clk_in);
do_reset();
repeat (20) @(posedge acc_clk_in);
$display("[%0t] ALL TESTS DONE", $time);
$finish;
end
endmodule

View File

@ -0,0 +1,10 @@
# Primary clocks
create_clock -name eth_clk -period 8.000 [get_ports eth_clk_in]
create_clock -name acc_clk -period 15.385 [get_ports acc_clk_in]
# Asynchronous clock groups
set_clock_groups -name ASYNC_ETH_ACC -asynchronous \
-group [get_clocks eth_clk] \
-group [get_clocks acc_clk]

View File

@ -180,6 +180,8 @@ sim: $(PROJECT).xpr gen_ip
echo "update_compile_order -fileset sources_1" >> run_sim.tcl
echo "update_compile_order -fileset sim_1" >> run_sim.tcl
echo "launch_simulation" >> run_sim.tcl
echo "run 1000 us" >> run_sim.tcl
echo "quit" >> run_sim.tcl
vivado -mode batch -source run_sim.tcl
simclean: