diff --git a/rtl/accum/src/accum.sv b/rtl/accum/src/accum.sv new file mode 100644 index 0000000..51e2285 --- /dev/null +++ b/rtl/accum/src/accum.sv @@ -0,0 +1,272 @@ +`timescale 1ns / 1ps + +module accumulator +#( + parameter DATA_WIDTH = 12, + parameter ACCUM_WIDTH = 32, + parameter N_MAX = 4096, + parameter WINDOW_SIZE = 4, + parameter PACKET_SIZE = 8, + parameter READ_BATCH_SIZE =(PACKET_SIZE*8)/(ACCUM_WIDTH) + ) +( + input clk_in, + input rst, + input [DATA_WIDTH-1:0] s_axis_tdata, + input s_axis_tvalid, + input start, + input [31:0] smp_num, + input [15:0] seq_num, + + output [ACCUM_WIDTH-1:0] out_data, + output out_valid, + output readout_begin, + input batch_req, + input finish + ); + + logic [31:0] smp_num_reg, cnt_smp_num; + logic [15:0] seq_num_reg, cnt_seq_num; + logic [15:0] cnt_addr, addra, addrb; + + logic [ACCUM_WIDTH-1:0] data; + logic valid_data; + logic [ACCUM_WIDTH-1:0] data_bram_in, data_bram_out; + logic wea, enb; + + logic readout_begin_reg; + logic [ACCUM_WIDTH-1:0] out_data_reg; + logic out_valid_reg; + logic finish_reg, finish_buf; + + // registers for port b data request + reg req_data_b; + reg [15:0] req_addr_b; + + typedef enum logic [3:0] { + IDLE, + INIT_MEM, + BEGIN_SEQ, + REQ_WORD_B, + ACCUM, + READOUT_START, + READOUT_AWAIT, + READOUT_DELAY, + READOUT_PUT, + READOUT_LAST, + FINISH + } wr_state_t; + (* MARK_DEBUG="true" *) wr_state_t wr_state; + + always @(posedge clk_in) begin + if (rst) begin + smp_num_reg <= '0; + cnt_smp_num <= '0; + seq_num_reg <= '0; + cnt_seq_num <= '0; + cnt_addr <= '0; + wea <= 0; + enb <= 0; + wr_state <= IDLE; + finish_reg <= 0; + out_valid_reg <= 0; + end else begin + finish_buf <= finish; + + // FSM + case(wr_state) + + IDLE: begin + // wait for start signal + wea <= 0; + enb <= 0; + readout_begin_reg <= 0; + finish_reg <= 0; + out_valid_reg <= 0; + if (start) begin + smp_num_reg <= smp_num; + seq_num_reg <= seq_num; + wr_state <= INIT_MEM; + end + + end + INIT_MEM: begin + // first run to initialize memory with first batch of values + wea <= 0; + if (valid_data) begin + data_bram_in <= data; + addra <= cnt_addr; + wea <= 1; + cnt_addr <= cnt_addr + 1; + cnt_smp_num <= cnt_smp_num + WINDOW_SIZE; + + end + if (cnt_smp_num >= smp_num_reg) begin + wr_state <= BEGIN_SEQ; + end + + end + BEGIN_SEQ: begin + // start new acc seq + wea <= 0; + enb <= 0; + if (cnt_seq_num == seq_num_reg - 1) begin + cnt_seq_num <= '0; + cnt_smp_num <= '0; + cnt_addr <= '0; + wr_state <= READOUT_START; + addrb <= '0; + enb <= 0; + end else begin + // beginning of new data sequence + cnt_seq_num <= cnt_seq_num + 1; + cnt_smp_num <= '0; + cnt_addr <= '0; + wea <= 0; + addrb <= 0; + wr_state <= REQ_WORD_B; + end + end + + REQ_WORD_B: begin + // pre-request data for port b + wea <= 0; + enb <= 1; + addrb <= cnt_addr; + wr_state <= ACCUM; + end + + ACCUM: begin + // sum mem+input + enb <= 0; + if (valid_data) begin + addra <= cnt_addr; + wea <= 1; + data_bram_in <= data + data_bram_out; + cnt_smp_num <= cnt_smp_num + WINDOW_SIZE; + if (cnt_smp_num + WINDOW_SIZE >= smp_num_reg) begin + wr_state <= BEGIN_SEQ; + end else begin + cnt_addr <= cnt_addr + 1; + wr_state <= REQ_WORD_B; + end + end + end + READOUT_START: begin + readout_begin_reg <= 1'b1; + wr_state <= READOUT_AWAIT; + addrb <= 0; + enb <= 0; + end + + READOUT_AWAIT: begin + // req await + delay for every-clock readout. + if (batch_req) begin + enb <= 1; + wr_state <= READOUT_DELAY; + end else if (finish_buf) begin + wr_state <= FINISH; + end else begin + enb <= 0; + out_valid_reg <= 0; + end + end + + READOUT_DELAY: begin + // wait for mem latency + addrb <= addrb + 1; + wr_state <= READOUT_PUT; + end + + READOUT_PUT: begin + // main data output + if (addrb == READ_BATCH_SIZE) begin + wr_state <= READOUT_LAST; + enb <= 0; + end + addrb <= addrb + 1; + out_valid_reg <= 1; + out_data_reg <= data_bram_out; + end + + READOUT_LAST: begin + // last word of packet + out_valid_reg <= 0; + out_data_reg <= data_bram_out; + wr_state <= FINISH; + end + + FINISH: begin + out_valid_reg <= 0; + enb <= 0; + wr_state <= IDLE; + end + + default: wr_state <= IDLE; + endcase + end + end + + + adder + #( + .DATA_WIDTH(DATA_WIDTH), + .WINDOW_SIZE(WINDOW_SIZE), + .ACCUM_WIDTH(ACCUM_WIDTH) + ) adder_dut + ( + .clk_in(clk_in), + .rst(rst), + .s_axis_tdata(s_axis_tdata), + .s_axis_tvalid(s_axis_tvalid), + .sum_data(data), + .sum_valid(valid_data) + ); + + xpm_memory_sdpram #( + .ADDR_WIDTH_A(16), // DECIMAL + .ADDR_WIDTH_B(16), // DECIMAL + .AUTO_SLEEP_TIME(0), // DECIMAL + .BYTE_WRITE_WIDTH_A(ACCUM_WIDTH), // DECIMAL + .CASCADE_HEIGHT(0), // DECIMAL + .CLOCKING_MODE("common_clock"), // String + .ECC_MODE("no_ecc"), // String + .MEMORY_INIT_FILE("none"), // String + .MEMORY_INIT_PARAM("0"), // String + .MEMORY_OPTIMIZATION("true"), // String + .MEMORY_PRIMITIVE("auto"), // String + .MEMORY_SIZE(N_MAX*ACCUM_WIDTH), // DECIMAL + .MESSAGE_CONTROL(0), // DECIMAL + .READ_DATA_WIDTH_B(ACCUM_WIDTH), // DECIMAL + .READ_LATENCY_B(1), // DECIMAL + .READ_RESET_VALUE_B("0"), // String + .RST_MODE_A("SYNC"), // String + .RST_MODE_B("SYNC"), // String + .SIM_ASSERT_CHK(0), // DECIMAL; 0=disable simulation messages, 1=enable simulation messages + .USE_EMBEDDED_CONSTRAINT(0), // DECIMAL + .USE_MEM_INIT(1), // DECIMAL + .USE_MEM_INIT_MMI(0), // DECIMAL + .WAKEUP_TIME("disable_sleep"), // String + .WRITE_DATA_WIDTH_A(ACCUM_WIDTH), // DECIMAL + .WRITE_MODE_B("no_change"), // String + .WRITE_PROTECT(1) // DECIMAL + ) + xpm_memory_sdpram_inst ( + + .doutb(data_bram_out), + + .addra(addra), + .addrb(addrb), + .clka(clk_in), + .clkb(clk_in), + .dina(data_bram_in), + .ena(1'b1), + .enb(enb), + .wea(wea) + ); + + assign readout_begin = readout_begin_reg; + assign out_data = out_data_reg; + assign out_valid = out_valid_reg; + +endmodule diff --git a/rtl/accum/src/adder.sv b/rtl/accum/src/adder.sv new file mode 100644 index 0000000..dd1c3c9 --- /dev/null +++ b/rtl/accum/src/adder.sv @@ -0,0 +1,52 @@ +`timescale 1ns / 1ps + + +module adder +#( + parameter DATA_WIDTH = 12, + parameter WINDOW_SIZE = 4, + parameter ACCUM_WIDTH = 32 + ) +( + input clk_in, + input rst, + input [DATA_WIDTH-1:0] s_axis_tdata, + input s_axis_tvalid, + + output [ACCUM_WIDTH-1:0] sum_data, + output sum_valid + ); + + logic [ACCUM_WIDTH-1:0] accum, res; + logic [DATA_WIDTH-1:0] axis_data; + logic res_valid, axis_valid; + (* MARK_DEBUG = "TRUE" *) logic [15:0] cnt; + + always @(posedge clk_in) begin + if (rst) begin + accum <= '0; + cnt <= '0; + res <= '0; + res_valid <= 0; + end else begin + res_valid <= 0; + axis_data <= s_axis_tdata; + axis_valid <= s_axis_tvalid; + if ( axis_valid) begin + if (cnt == WINDOW_SIZE-1) begin + res <= accum + axis_data; + res_valid <= 1; + accum <= '0; + cnt <= '0; + end else begin + accum <= accum + axis_data; + cnt <= cnt + 1; + end + end + end + end + + assign sum_valid = res_valid; + assign sum_data = res; + +endmodule