`timescale 1ns / 1ps module accumulator #( parameter DATA_WIDTH = 12, parameter ACCUM_WIDTH = 32, parameter N_MAX = 4096, parameter WINDOW_SIZE = 4, parameter PACKET_SIZE = 8, parameter READ_BATCH_SIZE =(PACKET_SIZE*8)/(ACCUM_WIDTH) ) ( input clk_in, input rst, input [DATA_WIDTH-1:0] s_axis_tdata, input s_axis_tvalid, input start, input [31:0] smp_num, input [15:0] seq_num, output [ACCUM_WIDTH-1:0] out_data, output out_valid, output readout_begin, input batch_req, input finish ); logic [31:0] smp_num_reg, cnt_smp_num; logic [15:0] seq_num_reg, cnt_seq_num; logic [15:0] cnt_addr, addra, addrb; logic [ACCUM_WIDTH-1:0] data; logic valid_data; logic [ACCUM_WIDTH-1:0] data_bram_in, data_bram_out; logic wea, enb; logic readout_begin_reg; logic [ACCUM_WIDTH-1:0] out_data_reg; logic out_valid_reg; logic finish_reg, finish_buf; // registers for port b data request reg req_data_b; reg [15:0] req_addr_b; typedef enum logic [3:0] { IDLE, INIT_MEM, BEGIN_SEQ, REQ_WORD_B, ACCUM, READOUT_START, READOUT_AWAIT, READOUT_DELAY, READOUT_PUT, READOUT_LAST, FINISH } wr_state_t; (* MARK_DEBUG="true" *) wr_state_t wr_state; always @(posedge clk_in) begin if (rst) begin smp_num_reg <= '0; cnt_smp_num <= '0; seq_num_reg <= '0; cnt_seq_num <= '0; cnt_addr <= '0; wea <= 0; enb <= 0; wr_state <= IDLE; finish_reg <= 0; out_valid_reg <= 0; end else begin finish_buf <= finish; // FSM case(wr_state) IDLE: begin // wait for start signal wea <= 0; enb <= 0; readout_begin_reg <= 0; finish_reg <= 0; out_valid_reg <= 0; if (start) begin smp_num_reg <= smp_num; seq_num_reg <= seq_num; wr_state <= INIT_MEM; end end INIT_MEM: begin // first run to initialize memory with first batch of values wea <= 0; if (valid_data) begin data_bram_in <= data; addra <= cnt_addr; wea <= 1; cnt_addr <= cnt_addr + 1; cnt_smp_num <= cnt_smp_num + WINDOW_SIZE; end if (cnt_smp_num >= smp_num_reg) begin wr_state <= BEGIN_SEQ; end end BEGIN_SEQ: begin // start new acc seq wea <= 0; enb <= 0; if (cnt_seq_num == seq_num_reg - 1) begin cnt_seq_num <= '0; cnt_smp_num <= '0; cnt_addr <= '0; wr_state <= READOUT_START; addrb <= '0; enb <= 0; end else begin // beginning of new data sequence cnt_seq_num <= cnt_seq_num + 1; cnt_smp_num <= '0; cnt_addr <= '0; wea <= 0; addrb <= 0; wr_state <= REQ_WORD_B; end end REQ_WORD_B: begin // pre-request data for port b wea <= 0; enb <= 1; addrb <= cnt_addr; wr_state <= ACCUM; end ACCUM: begin // sum mem+input enb <= 0; if (valid_data) begin addra <= cnt_addr; wea <= 1; data_bram_in <= data + data_bram_out; cnt_smp_num <= cnt_smp_num + WINDOW_SIZE; if (cnt_smp_num + WINDOW_SIZE >= smp_num_reg) begin wr_state <= BEGIN_SEQ; end else begin cnt_addr <= cnt_addr + 1; wr_state <= REQ_WORD_B; end end end READOUT_START: begin readout_begin_reg <= 1'b1; wr_state <= READOUT_AWAIT; enb <= 0; end READOUT_AWAIT: begin // req await + delay for every-clock readout. if (batch_req) begin enb <= 1; wr_state <= READOUT_DELAY; end else if (finish_buf) begin wr_state <= FINISH; end else begin enb <= 0; out_valid_reg <= 0; end end READOUT_DELAY: begin // wait for mem latency addrb <= addrb + 1; wr_state <= READOUT_PUT; end READOUT_PUT: begin // main data output if ((addrb % READ_BATCH_SIZE) == 0) begin wr_state <= READOUT_LAST; enb <= 0; end else addrb <= addrb + 1; out_valid_reg <= 1; out_data_reg <= data_bram_out; end READOUT_LAST: begin // last word of packet out_valid_reg <= 0; out_data_reg <= data_bram_out; wr_state <= READOUT_START; end FINISH: begin out_valid_reg <= 0; enb <= 0; wr_state <= IDLE; end default: wr_state <= IDLE; endcase end end adder #( .DATA_WIDTH(DATA_WIDTH), .WINDOW_SIZE(WINDOW_SIZE), .ACCUM_WIDTH(ACCUM_WIDTH) ) adder_dut ( .clk_in(clk_in), .rst(rst), .s_axis_tdata(s_axis_tdata), .s_axis_tvalid(s_axis_tvalid), .sum_data(data), .sum_valid(valid_data) ); xpm_memory_sdpram #( .ADDR_WIDTH_A(16), // DECIMAL .ADDR_WIDTH_B(16), // DECIMAL .AUTO_SLEEP_TIME(0), // DECIMAL .BYTE_WRITE_WIDTH_A(ACCUM_WIDTH), // DECIMAL .CASCADE_HEIGHT(0), // DECIMAL .CLOCKING_MODE("common_clock"), // String .ECC_MODE("no_ecc"), // String .MEMORY_INIT_FILE("none"), // String .MEMORY_INIT_PARAM("0"), // String .MEMORY_OPTIMIZATION("true"), // String .MEMORY_PRIMITIVE("auto"), // String .MEMORY_SIZE(N_MAX*ACCUM_WIDTH), // DECIMAL .MESSAGE_CONTROL(0), // DECIMAL .READ_DATA_WIDTH_B(ACCUM_WIDTH), // DECIMAL .READ_LATENCY_B(1), // DECIMAL .READ_RESET_VALUE_B("0"), // String .RST_MODE_A("SYNC"), // String .RST_MODE_B("SYNC"), // String .SIM_ASSERT_CHK(0), // DECIMAL; 0=disable simulation messages, 1=enable simulation messages .USE_EMBEDDED_CONSTRAINT(0), // DECIMAL .USE_MEM_INIT(1), // DECIMAL .USE_MEM_INIT_MMI(0), // DECIMAL .WAKEUP_TIME("disable_sleep"), // String .WRITE_DATA_WIDTH_A(ACCUM_WIDTH), // DECIMAL .WRITE_MODE_B("no_change"), // String .WRITE_PROTECT(1) // DECIMAL ) xpm_memory_sdpram_inst ( .doutb(data_bram_out), .addra(addra), .addrb(addrb), .clka(clk_in), .clkb(clk_in), .dina(data_bram_in), .ena(1'b1), .enb(enb), .wea(wea) ); assign readout_begin = readout_begin_reg; assign out_data = out_data_reg; assign out_valid = out_valid_reg; endmodule