rtl: first impl of adder+accum

This commit is contained in:
Phil
2026-04-22 16:38:22 +03:00
parent b54e69dec0
commit a8a3aff498
2 changed files with 324 additions and 0 deletions

272
rtl/accum/src/accum.sv Normal file
View File

@ -0,0 +1,272 @@
`timescale 1ns / 1ps
module accumulator
#(
parameter DATA_WIDTH = 12,
parameter ACCUM_WIDTH = 32,
parameter N_MAX = 4096,
parameter WINDOW_SIZE = 4,
parameter PACKET_SIZE = 8,
parameter READ_BATCH_SIZE =(PACKET_SIZE*8)/(ACCUM_WIDTH)
)
(
input clk_in,
input rst,
input [DATA_WIDTH-1:0] s_axis_tdata,
input s_axis_tvalid,
input start,
input [31:0] smp_num,
input [15:0] seq_num,
output [ACCUM_WIDTH-1:0] out_data,
output out_valid,
output readout_begin,
input batch_req,
input finish
);
logic [31:0] smp_num_reg, cnt_smp_num;
logic [15:0] seq_num_reg, cnt_seq_num;
logic [15:0] cnt_addr, addra, addrb;
logic [ACCUM_WIDTH-1:0] data;
logic valid_data;
logic [ACCUM_WIDTH-1:0] data_bram_in, data_bram_out;
logic wea, enb;
logic readout_begin_reg;
logic [ACCUM_WIDTH-1:0] out_data_reg;
logic out_valid_reg;
logic finish_reg, finish_buf;
// registers for port b data request
reg req_data_b;
reg [15:0] req_addr_b;
typedef enum logic [3:0] {
IDLE,
INIT_MEM,
BEGIN_SEQ,
REQ_WORD_B,
ACCUM,
READOUT_START,
READOUT_AWAIT,
READOUT_DELAY,
READOUT_PUT,
READOUT_LAST,
FINISH
} wr_state_t;
(* MARK_DEBUG="true" *) wr_state_t wr_state;
always @(posedge clk_in) begin
if (rst) begin
smp_num_reg <= '0;
cnt_smp_num <= '0;
seq_num_reg <= '0;
cnt_seq_num <= '0;
cnt_addr <= '0;
wea <= 0;
enb <= 0;
wr_state <= IDLE;
finish_reg <= 0;
out_valid_reg <= 0;
end else begin
finish_buf <= finish;
// FSM
case(wr_state)
IDLE: begin
// wait for start signal
wea <= 0;
enb <= 0;
readout_begin_reg <= 0;
finish_reg <= 0;
out_valid_reg <= 0;
if (start) begin
smp_num_reg <= smp_num;
seq_num_reg <= seq_num;
wr_state <= INIT_MEM;
end
end
INIT_MEM: begin
// first run to initialize memory with first batch of values
wea <= 0;
if (valid_data) begin
data_bram_in <= data;
addra <= cnt_addr;
wea <= 1;
cnt_addr <= cnt_addr + 1;
cnt_smp_num <= cnt_smp_num + WINDOW_SIZE;
end
if (cnt_smp_num >= smp_num_reg) begin
wr_state <= BEGIN_SEQ;
end
end
BEGIN_SEQ: begin
// start new acc seq
wea <= 0;
enb <= 0;
if (cnt_seq_num == seq_num_reg - 1) begin
cnt_seq_num <= '0;
cnt_smp_num <= '0;
cnt_addr <= '0;
wr_state <= READOUT_START;
addrb <= '0;
enb <= 0;
end else begin
// beginning of new data sequence
cnt_seq_num <= cnt_seq_num + 1;
cnt_smp_num <= '0;
cnt_addr <= '0;
wea <= 0;
addrb <= 0;
wr_state <= REQ_WORD_B;
end
end
REQ_WORD_B: begin
// pre-request data for port b
wea <= 0;
enb <= 1;
addrb <= cnt_addr;
wr_state <= ACCUM;
end
ACCUM: begin
// sum mem+input
enb <= 0;
if (valid_data) begin
addra <= cnt_addr;
wea <= 1;
data_bram_in <= data + data_bram_out;
cnt_smp_num <= cnt_smp_num + WINDOW_SIZE;
if (cnt_smp_num + WINDOW_SIZE >= smp_num_reg) begin
wr_state <= BEGIN_SEQ;
end else begin
cnt_addr <= cnt_addr + 1;
wr_state <= REQ_WORD_B;
end
end
end
READOUT_START: begin
readout_begin_reg <= 1'b1;
wr_state <= READOUT_AWAIT;
addrb <= 0;
enb <= 0;
end
READOUT_AWAIT: begin
// req await + delay for every-clock readout.
if (batch_req) begin
enb <= 1;
wr_state <= READOUT_DELAY;
end else if (finish_buf) begin
wr_state <= FINISH;
end else begin
enb <= 0;
out_valid_reg <= 0;
end
end
READOUT_DELAY: begin
// wait for mem latency
addrb <= addrb + 1;
wr_state <= READOUT_PUT;
end
READOUT_PUT: begin
// main data output
if (addrb == READ_BATCH_SIZE) begin
wr_state <= READOUT_LAST;
enb <= 0;
end
addrb <= addrb + 1;
out_valid_reg <= 1;
out_data_reg <= data_bram_out;
end
READOUT_LAST: begin
// last word of packet
out_valid_reg <= 0;
out_data_reg <= data_bram_out;
wr_state <= FINISH;
end
FINISH: begin
out_valid_reg <= 0;
enb <= 0;
wr_state <= IDLE;
end
default: wr_state <= IDLE;
endcase
end
end
adder
#(
.DATA_WIDTH(DATA_WIDTH),
.WINDOW_SIZE(WINDOW_SIZE),
.ACCUM_WIDTH(ACCUM_WIDTH)
) adder_dut
(
.clk_in(clk_in),
.rst(rst),
.s_axis_tdata(s_axis_tdata),
.s_axis_tvalid(s_axis_tvalid),
.sum_data(data),
.sum_valid(valid_data)
);
xpm_memory_sdpram #(
.ADDR_WIDTH_A(16), // DECIMAL
.ADDR_WIDTH_B(16), // DECIMAL
.AUTO_SLEEP_TIME(0), // DECIMAL
.BYTE_WRITE_WIDTH_A(ACCUM_WIDTH), // DECIMAL
.CASCADE_HEIGHT(0), // DECIMAL
.CLOCKING_MODE("common_clock"), // String
.ECC_MODE("no_ecc"), // String
.MEMORY_INIT_FILE("none"), // String
.MEMORY_INIT_PARAM("0"), // String
.MEMORY_OPTIMIZATION("true"), // String
.MEMORY_PRIMITIVE("auto"), // String
.MEMORY_SIZE(N_MAX*ACCUM_WIDTH), // DECIMAL
.MESSAGE_CONTROL(0), // DECIMAL
.READ_DATA_WIDTH_B(ACCUM_WIDTH), // DECIMAL
.READ_LATENCY_B(1), // DECIMAL
.READ_RESET_VALUE_B("0"), // String
.RST_MODE_A("SYNC"), // String
.RST_MODE_B("SYNC"), // String
.SIM_ASSERT_CHK(0), // DECIMAL; 0=disable simulation messages, 1=enable simulation messages
.USE_EMBEDDED_CONSTRAINT(0), // DECIMAL
.USE_MEM_INIT(1), // DECIMAL
.USE_MEM_INIT_MMI(0), // DECIMAL
.WAKEUP_TIME("disable_sleep"), // String
.WRITE_DATA_WIDTH_A(ACCUM_WIDTH), // DECIMAL
.WRITE_MODE_B("no_change"), // String
.WRITE_PROTECT(1) // DECIMAL
)
xpm_memory_sdpram_inst (
.doutb(data_bram_out),
.addra(addra),
.addrb(addrb),
.clka(clk_in),
.clkb(clk_in),
.dina(data_bram_in),
.ena(1'b1),
.enb(enb),
.wea(wea)
);
assign readout_begin = readout_begin_reg;
assign out_data = out_data_reg;
assign out_valid = out_valid_reg;
endmodule

52
rtl/accum/src/adder.sv Normal file
View File

@ -0,0 +1,52 @@
`timescale 1ns / 1ps
module adder
#(
parameter DATA_WIDTH = 12,
parameter WINDOW_SIZE = 4,
parameter ACCUM_WIDTH = 32
)
(
input clk_in,
input rst,
input [DATA_WIDTH-1:0] s_axis_tdata,
input s_axis_tvalid,
output [ACCUM_WIDTH-1:0] sum_data,
output sum_valid
);
logic [ACCUM_WIDTH-1:0] accum, res;
logic [DATA_WIDTH-1:0] axis_data;
logic res_valid, axis_valid;
(* MARK_DEBUG = "TRUE" *) logic [15:0] cnt;
always @(posedge clk_in) begin
if (rst) begin
accum <= '0;
cnt <= '0;
res <= '0;
res_valid <= 0;
end else begin
res_valid <= 0;
axis_data <= s_axis_tdata;
axis_valid <= s_axis_tvalid;
if ( axis_valid) begin
if (cnt == WINDOW_SIZE-1) begin
res <= accum + axis_data;
res_valid <= 1;
accum <= '0;
cnt <= '0;
end else begin
accum <= accum + axis_data;
cnt <= cnt + 1;
end
end
end
end
assign sum_valid = res_valid;
assign sum_data = res;
endmodule