From dd652f41f900152aa864dde411e678604ed266a3 Mon Sep 17 00:00:00 2001 From: awe Date: Mon, 27 Apr 2026 17:22:03 +0300 Subject: [PATCH] try to speed up --- main.cpp | 448 +++++++++++++++++++++++----------------- tty_protocol_writer.cpp | 23 ++- 2 files changed, 280 insertions(+), 191 deletions(-) diff --git a/main.cpp b/main.cpp index 1b727b3..c276285 100644 --- a/main.cpp +++ b/main.cpp @@ -86,6 +86,7 @@ struct Config { uint32_t recv_block_words = 32768; uint32_t recv_timeout_ms = 50; + double tty_latency_ms = 2.0; uint32_t stats_period_ms = 1000; uint32_t start_wait_ms = 10000; uint32_t input_buffer_words = 8 * 1024 * 1024; @@ -94,6 +95,7 @@ struct Config { uint32_t svg_history_packets = 50; bool recv_block_specified = false; + bool recv_timeout_specified = false; bool input_buffer_specified = false; bool input_step_specified = false; bool live_update_specified = false; @@ -474,6 +476,7 @@ void print_help(const char* exe_name) { << " [duration_ms:100] [packet_limit:0] [csv:capture.csv] [svg:capture.svg]\n" << " [live_html:live_plot.html] [live_json:live_plot.json] [tty:/tmp/ttyADC_data] [di1_group_avg]\n" << " [do1_toggle_per_frame] [do1_noise_subtract] [noise_avg_steps:N]\n" + << " [tty_latency_ms:2.0]\n" << " [recv_block:32768] [stats_period_ms:1000] [live_update_period_ms:1000] [svg_history_packets:50] [start_wait_ms:10000]\n" << " [buffer_words:8388608] [step_words:32768]\n" << " [pullup_syn1] [pullup_syn2] [pulldown_conv_in] [pulldown_start_in]\n" @@ -517,6 +520,8 @@ void print_help(const char* exe_name) { << " (only corrected DO1=LOW steps are sent to tty)\n" << " noise_avg_steps:N -> number of recent DO1=HIGH noise steps per channel used as subtraction baseline\n" << " (required when do1_noise_subtract is enabled)\n" + << " tty_latency_ms:2.0 -> target chunk latency for tty fast stream-only modes; used only when\n" + << " recv_block/step_words are not set explicitly\n" << " tty fast stream-only modes -> di1_group_avg or do1_noise_subtract; skip CSV/SVG/live outputs\n" << " If sample_clock_hz is omitted together with clock:internal, the maximum ADC speed is used\n" << "\n" @@ -694,6 +699,11 @@ Config parse_args(int argc, char** argv) { } if (starts_with(arg, "recv_timeout_ms:")) { cfg.recv_timeout_ms = parse_u32(arg.substr(16), "recv_timeout_ms"); + cfg.recv_timeout_specified = true; + continue; + } + if (starts_with(arg, "tty_latency_ms:")) { + cfg.tty_latency_ms = parse_double(arg.substr(15), "tty_latency_ms"); continue; } if (starts_with(arg, "stats_period_ms:")) { @@ -754,6 +764,9 @@ Config parse_args(int argc, char** argv) { if (!cfg.max_internal_clock && (cfg.sample_clock_hz <= 0.0)) { fail("sample_clock_hz must be > 0"); } + if (cfg.tty_latency_ms <= 0.0) { + fail("tty_latency_ms must be > 0"); + } if (cfg.max_internal_clock && (cfg.sync_mode != X502_SYNC_INTERNAL)) { fail("sample_clock_hz:max is only valid together with clock:internal"); } @@ -776,14 +789,26 @@ Config parse_args(int argc, char** argv) { } } if (cfg.tty_path && (cfg.di1_group_average || cfg.do1_noise_subtract)) { + const double estimated_adc_hz = + use_internal_max_clock(cfg) ? static_cast(cfg.internal_ref_freq) : cfg.sample_clock_hz; + const double estimated_combined_words_hz = std::max(1.0, estimated_adc_hz * 2.0); + const long long raw_chunk_words = std::llround(estimated_combined_words_hz * cfg.tty_latency_ms / 1000.0); + uint32_t chunk_words = + static_cast(std::clamp(raw_chunk_words, 1024LL, 16384LL)); + chunk_words = static_cast((static_cast(chunk_words) + 255ULL) & ~255ULL); + if (!cfg.recv_block_specified) { - cfg.recv_block_words = std::max(cfg.recv_block_words, 65536U); + cfg.recv_block_words = chunk_words; } if (!cfg.input_step_specified) { - cfg.input_step_words = std::max(cfg.input_step_words, 65536U); + cfg.input_step_words = chunk_words; } - if (!cfg.input_buffer_specified) { - cfg.input_buffer_words = std::max(cfg.input_buffer_words, 16U * 1024U * 1024U); + if (!cfg.recv_timeout_specified) { + const uint32_t tuned_timeout_ms = + static_cast(std::clamp(static_cast(std::ceil(cfg.tty_latency_ms)), + 1LL, + 5LL)); + cfg.recv_timeout_ms = tuned_timeout_ms; } } if (cfg.recv_block_words == 0) { @@ -1642,6 +1667,8 @@ int run(const Config& cfg) { << " recv block words: " << cfg.recv_block_words << "\n" << " input step words: " << cfg.input_step_words << "\n" << " input buffer words: " << cfg.input_buffer_words << "\n" + << " recv timeout: " << cfg.recv_timeout_ms << " ms\n" + << " tty latency target: " << cfg.tty_latency_ms << " ms\n" << " tty di1_group_avg: " << (tty_di1_group_average ? "enabled" : "disabled") << "\n" << " tty do1_noise_subtract: " << (tty_do1_noise_subtract ? "enabled" : "disabled") << "\n" << " do1_noise_subtract marker: " @@ -1658,6 +1685,7 @@ int run(const Config& cfg) { << " recv block words: " << cfg.recv_block_words << "\n" << " input step words: " << cfg.input_step_words << "\n" << " input buffer words: " << cfg.input_buffer_words << "\n" + << " recv timeout: " << cfg.recv_timeout_ms << " ms\n" << " tty di1_group_avg: " << (tty_di1_group_average ? std::string("enabled") : (cfg.di1_group_average ? std::string("requested, tty disabled") @@ -1728,8 +1756,9 @@ int run(const Config& cfg) { std::vector adc_raw_buffer(read_capacity_words); std::vector din_buffer(read_capacity_words); std::deque pending_adc; - std::deque pending_adc_raw; std::deque pending_din; + std::vector fast_pending_adc_raw; + std::vector fast_pending_din; std::deque packets; PacketAccumulator current_packet; TtyContinuousState tty_state; @@ -1737,6 +1766,8 @@ int run(const Config& cfg) { Do1NoiseSubtractState tty_do1_noise_state; std::vector tty_frame_words; tty_frame_words.reserve(static_cast(read_capacity_words) * 2U + 16U); + fast_pending_adc_raw.reserve(read_capacity_words); + fast_pending_din.reserve(read_capacity_words); if (tty_do1_noise_subtract) { tty_do1_noise_state.configure(*cfg.noise_avg_steps); } @@ -1764,6 +1795,8 @@ int run(const Config& cfg) { TickMs stats_window_start = capture_loop_start; TickMs last_stats_print = capture_loop_start; TickMs last_live_update = 0; + TickMs packet_start_ms = capture_loop_start; + bool first_corrected_step_reported = false; uint64_t total_raw_words = 0; uint64_t total_adc_samples = 0; @@ -1835,11 +1868,32 @@ int run(const Config& cfg) { } }; + auto flush_tty_frames = [&]() { + if (!tty_writer || tty_frame_words.empty()) { + return; + } + + tty_writer->enqueue_encoded_frames(tty_frame_words.data(), tty_frame_words.size() / 4U); + tty_frame_words.clear(); + + const auto tty_stats = tty_writer->stats(); + if (!tty_overflow_warning_printed && (tty_stats.ring_overflows != 0U)) { + std::cerr << "Warning: TTY ring buffer overflowed; dropping oldest frames to keep the stream continuous\n"; + tty_overflow_warning_printed = true; + } + }; + + constexpr std::size_t kTtyFlushFrameThreshold = 16U; + auto append_tty_frame = [&](uint16_t word0, uint16_t word1, uint16_t word2, uint16_t word3) { tty_frame_words.push_back(word0); tty_frame_words.push_back(word1); tty_frame_words.push_back(word2); tty_frame_words.push_back(word3); + + if (fast_tty_avg_stream_mode && ((tty_frame_words.size() / 4U) >= kTtyFlushFrameThreshold)) { + flush_tty_frames(); + } }; auto append_tty_packet_start = [&]() { @@ -1859,7 +1913,7 @@ int run(const Config& cfg) { (cfg.channel_count <= 1U) ? 0.0 : (tty_group_state.sum_ch2 / static_cast(tty_group_state.count_ch2)); - append_tty_frame( (cfg.profile == CaptureProfile::Amplitude) ? 0x001AU : 0x000AU, + append_tty_frame((cfg.profile == CaptureProfile::Amplitude) ? 0x001AU : 0x000AU, static_cast(tty_group_state.next_index), static_cast(pack_raw_code_to_int16(ch1_avg)), static_cast(pack_raw_code_to_int16(ch2_avg))); @@ -1867,18 +1921,18 @@ int run(const Config& cfg) { ++packet_avg_steps; }; - auto append_tty_do1_subtracted_step = [&]() { + auto append_tty_do1_subtracted_step = [&]() -> bool { if (!tty_do1_noise_subtract) { - return; + return false; } if (!tty_do1_noise_state.step_level_initialized) { - return; + return false; } if (!tty_do1_noise_state.has_complete_step(cfg.channel_count)) { tty_do1_noise_state.finish_step(); - return; + return false; } const double ch1_avg = tty_do1_noise_state.step_average(0U); @@ -1890,7 +1944,7 @@ int run(const Config& cfg) { tty_do1_noise_state.push_noise_average(1U, ch2_avg); } tty_do1_noise_state.finish_step(); - return; + return false; } if (tty_do1_noise_state.next_index >= 0xFFFFU) { @@ -1908,21 +1962,15 @@ int run(const Config& cfg) { ++tty_do1_noise_state.next_index; ++packet_avg_steps; tty_do1_noise_state.finish_step(); - }; - auto flush_tty_frames = [&]() { - if (!tty_writer || tty_frame_words.empty()) { - return; + if (!first_corrected_step_reported) { + const TickMs now = tick_count_ms(); + std::cout << " first_corrected_tty_step_ms=" << (now - packet_start_ms) << "\n"; + first_corrected_step_reported = true; } - tty_writer->enqueue_encoded_frames(tty_frame_words.data(), tty_frame_words.size() / 4U); - tty_frame_words.clear(); - - const auto tty_stats = tty_writer->stats(); - if (!tty_overflow_warning_printed && (tty_stats.ring_overflows != 0U)) { - std::cerr << "Warning: TTY ring buffer overflowed; dropping oldest frames to keep the stream continuous\n"; - tty_overflow_warning_printed = true; - } + flush_tty_frames(); + return true; }; auto start_packet = [&]() { @@ -1934,6 +1982,8 @@ int run(const Config& cfg) { packet_clock_count = 0; packet_di2_high_clocks = 0; packet_di2_low_clocks = 0; + packet_start_ms = tick_count_ms(); + first_corrected_step_reported = false; if (!fast_tty_avg_stream_mode) { current_packet.reset(target_frames, cfg.channel_count); } @@ -2086,6 +2136,147 @@ int run(const Config& cfg) { } }; + auto process_aligned_sample = [&](double adc_value, double adc_raw_value, uint32_t din_value) { + // Keep ADC logical-channel phase aligned even when samples are skipped outside packet windows. + const uint32_t lch = next_lch; + next_lch = (next_lch + 1U) % cfg.channel_count; + + const bool di1_level = (din_value & kE502Digital1Mask) != 0U; + bool di1_changed = false; + if (!di1_initialized) { + di1_prev_level = di1_level; + di1_initialized = true; + } else if (di1_level != di1_prev_level) { + di1_changed = true; + di1_prev_level = di1_level; + } + + const bool di_syn2_level = (din_value & kE502DiSyn2Mask) != 0U; + bool start_edge = false; + bool stop_edge = false; + if (!trigger_level_initialized) { + trigger_prev_level = di_syn2_level; + trigger_level_initialized = true; + if (!packet_active) { + if (cfg.sync_start_mode == X502_SYNC_INTERNAL) { + start_packet(); + } else if (sync_uses_di_syn2(cfg.sync_start_mode) && di_syn2_level) { + start_packet(); + } else if (!sync_uses_di_syn2(cfg.sync_start_mode)) { + start_packet(); + } + } + } else { + start_edge = matches_sync_edge(cfg.sync_start_mode, trigger_prev_level, di_syn2_level); + if (cfg.stop_mode != StopMode::TargetFrames) { + stop_edge = matches_stop_edge(cfg.stop_mode, trigger_prev_level, di_syn2_level); + } + trigger_prev_level = di_syn2_level; + } + if (!packet_active && (cfg.sync_start_mode == X502_SYNC_INTERNAL)) { + start_packet(); + } + + if (packet_active && start_edge) { + finish_packet(PacketCloseReason::ExternalStopEdge); + if ((cfg.packet_limit != 0U) && (total_completed_packets >= cfg.packet_limit)) { + stop_loop_requested = true; + return; + } + start_packet(); + } else if (!packet_active && start_edge) { + start_packet(); + } + + if (packet_active && stop_edge) { + finish_packet(PacketCloseReason::ExternalStopEdge); + if ((cfg.packet_limit != 0U) && (total_completed_packets >= cfg.packet_limit)) { + stop_loop_requested = true; + } + return; + } + + if (!packet_active) { + return; + } + + if ((din_value & kE502Digital2Mask) != 0U) { + ++packet_di2_high_clocks; + } else { + ++packet_di2_low_clocks; + } + ++packet_clock_count; + + if (tty_di1_group_average && di1_changed) { + append_tty_group_step(); + tty_group_state.clear_step(); + } else if (tty_do1_noise_subtract) { + if (!tty_do1_noise_state.step_level_initialized) { + tty_do1_noise_state.start_new_step(di1_level); + } else if (di1_changed) { + append_tty_do1_subtracted_step(); + tty_do1_noise_state.start_new_step(di1_level); + } + } + + if (!fast_tty_avg_stream_mode && + (cfg.di1_mode == Di1Mode::Trace) && + ((cfg.channel_count <= 1U) || (lch == 0U))) { + current_packet.pending_frame_di1 = static_cast(di1_level ? 1U : 0U); + current_packet.pending_frame_di1_valid = true; + } + + if (fast_tty_avg_stream_mode) { + if (fast_packet_frames < target_frames) { + if (tty_do1_noise_subtract) { + tty_do1_noise_state.add_sample(lch, adc_raw_value); + } else { + tty_group_state.add_sample(lch, adc_raw_value); + } + if (lch == (cfg.channel_count - 1U)) { + ++fast_packet_frames; + ++total_completed_frames; + ++stats_completed_frames; + } + } + } else { + double stored_value = adc_value; + if ((cfg.di1_mode == Di1Mode::ZeroOnChange) && di1_changed) { + stored_value = 0.0; + ++total_zeroed_samples; + ++stats_zeroed_samples; + ++current_packet.zeroed_samples; + } + + if (current_packet.channels[lch].size() < target_frames) { + current_packet.channels[lch].push_back(stored_value); + ++current_packet.stored_samples; + ++total_stored_adc_samples; + ++stats_stored_adc_samples; + if (lch == (cfg.channel_count - 1U)) { + if ((cfg.di1_mode == Di1Mode::Trace) && + current_packet.pending_frame_di1_valid && + (current_packet.di1.size() < target_frames)) { + current_packet.di1.push_back(current_packet.pending_frame_di1); + current_packet.pending_frame_di1_valid = false; + } + ++total_completed_frames; + ++stats_completed_frames; + } + } + } + + const std::size_t completed_frames = fast_tty_avg_stream_mode + ? fast_packet_frames + : current_packet.frame_count(cfg.channel_count); + if (completed_frames >= target_frames) { + finish_packet(PacketCloseReason::DurationLimit); + if ((cfg.packet_limit != 0U) && (total_completed_packets >= cfg.packet_limit)) { + stop_loop_requested = true; + } + } + }; + while (!stop_loop_requested) { if (tty_writer) { tty_writer->throw_if_failed(); @@ -2239,175 +2430,64 @@ int run(const Config& cfg) { stats_adc_samples += adc_count; stats_din_samples += din_count; if (fast_tty_avg_stream_mode) { - for (uint32_t i = 0; i < raw_adc_count; ++i) { - pending_adc_raw.push_back(adc_raw_buffer[i]); + std::size_t carry_adc_idx = 0; + std::size_t carry_din_idx = 0; + while ((carry_adc_idx < fast_pending_adc_raw.size()) && + (carry_din_idx < fast_pending_din.size()) && + !stop_loop_requested) { + process_aligned_sample(0.0, fast_pending_adc_raw[carry_adc_idx], fast_pending_din[carry_din_idx]); + ++carry_adc_idx; + ++carry_din_idx; + } + if (carry_adc_idx != 0U) { + fast_pending_adc_raw.erase(fast_pending_adc_raw.begin(), + fast_pending_adc_raw.begin() + static_cast(carry_adc_idx)); + } + if (carry_din_idx != 0U) { + fast_pending_din.erase(fast_pending_din.begin(), + fast_pending_din.begin() + static_cast(carry_din_idx)); + } + + std::size_t adc_idx = 0; + std::size_t din_idx = 0; + while ((adc_idx < raw_adc_count) && (din_idx < din_count) && !stop_loop_requested) { + process_aligned_sample(0.0, adc_raw_buffer[adc_idx], din_buffer[din_idx]); + ++adc_idx; + ++din_idx; + } + + if (adc_idx < raw_adc_count) { + fast_pending_adc_raw.insert(fast_pending_adc_raw.end(), + adc_raw_buffer.begin() + static_cast(adc_idx), + adc_raw_buffer.begin() + static_cast(raw_adc_count)); + } + if (din_idx < din_count) { + fast_pending_din.insert(fast_pending_din.end(), + din_buffer.begin() + static_cast(din_idx), + din_buffer.begin() + static_cast(din_count)); + } + + if ((fast_pending_adc_raw.size() > 1000000U) || (fast_pending_din.size() > 1000000U)) { + fail("Internal backlog grew too large while aligning ADC and DIN samples"); } } else { for (uint32_t i = 0; i < adc_count; ++i) { pending_adc.push_back(adc_buffer[i]); } - } - for (uint32_t i = 0; i < din_count; ++i) { - pending_din.push_back(din_buffer[i]); - } + for (uint32_t i = 0; i < din_count; ++i) { + pending_din.push_back(din_buffer[i]); + } - if (((!fast_tty_avg_stream_mode) && (pending_adc.size() > 1000000U)) || - (pending_din.size() > 1000000U) || - (fast_tty_avg_stream_mode && (pending_adc_raw.size() > 1000000U))) { - fail("Internal backlog grew too large while aligning ADC and DIN samples"); - } + if ((pending_adc.size() > 1000000U) || (pending_din.size() > 1000000U)) { + fail("Internal backlog grew too large while aligning ADC and DIN samples"); + } - while ((fast_tty_avg_stream_mode ? !pending_adc_raw.empty() : !pending_adc.empty()) && - !pending_din.empty() && - !stop_loop_requested) { - const double adc_value = fast_tty_avg_stream_mode ? 0.0 : pending_adc.front(); - if (!fast_tty_avg_stream_mode) { + while (!pending_adc.empty() && !pending_din.empty() && !stop_loop_requested) { + const double adc_value = pending_adc.front(); pending_adc.pop_front(); - } - const double adc_raw_value = fast_tty_avg_stream_mode ? pending_adc_raw.front() : 0.0; - if (fast_tty_avg_stream_mode) { - pending_adc_raw.pop_front(); - } - - const uint32_t din_value = pending_din.front(); - pending_din.pop_front(); - // Keep ADC logical-channel phase aligned even when samples are skipped outside packet windows. - const uint32_t lch = next_lch; - next_lch = (next_lch + 1U) % cfg.channel_count; - - const bool di1_level = (din_value & kE502Digital1Mask) != 0U; - bool di1_changed = false; - if (!di1_initialized) { - di1_prev_level = di1_level; - di1_initialized = true; - } else if (di1_level != di1_prev_level) { - di1_changed = true; - di1_prev_level = di1_level; - } - - const bool di_syn2_level = (din_value & kE502DiSyn2Mask) != 0U; - bool start_edge = false; - bool stop_edge = false; - if (!trigger_level_initialized) { - trigger_prev_level = di_syn2_level; - trigger_level_initialized = true; - if (!packet_active) { - if (cfg.sync_start_mode == X502_SYNC_INTERNAL) { - start_packet(); - } else if (sync_uses_di_syn2(cfg.sync_start_mode) && di_syn2_level) { - start_packet(); - } else if (!sync_uses_di_syn2(cfg.sync_start_mode)) { - start_packet(); - } - } - } else { - start_edge = matches_sync_edge(cfg.sync_start_mode, trigger_prev_level, di_syn2_level); - if (cfg.stop_mode != StopMode::TargetFrames) { - stop_edge = matches_stop_edge(cfg.stop_mode, trigger_prev_level, di_syn2_level); - } - trigger_prev_level = di_syn2_level; - } - if (!packet_active && (cfg.sync_start_mode == X502_SYNC_INTERNAL)) { - start_packet(); - } - - if (packet_active && start_edge) { - finish_packet(PacketCloseReason::ExternalStopEdge); - if ((cfg.packet_limit != 0U) && (total_completed_packets >= cfg.packet_limit)) { - stop_loop_requested = true; - continue; - } - start_packet(); - } else if (!packet_active && start_edge) { - start_packet(); - } - - if (packet_active && stop_edge) { - finish_packet(PacketCloseReason::ExternalStopEdge); - if ((cfg.packet_limit != 0U) && (total_completed_packets >= cfg.packet_limit)) { - stop_loop_requested = true; - } - continue; - } - - if (!packet_active) { - continue; - } - - if ((din_value & kE502Digital2Mask) != 0U) { - ++packet_di2_high_clocks; - } else { - ++packet_di2_low_clocks; - } - ++packet_clock_count; - - if (tty_di1_group_average && di1_changed) { - append_tty_group_step(); - tty_group_state.clear_step(); - } else if (tty_do1_noise_subtract) { - if (!tty_do1_noise_state.step_level_initialized) { - tty_do1_noise_state.start_new_step(di1_level); - } else if (di1_changed) { - append_tty_do1_subtracted_step(); - tty_do1_noise_state.start_new_step(di1_level); - } - } - - if (!fast_tty_avg_stream_mode && - (cfg.di1_mode == Di1Mode::Trace) && - ((cfg.channel_count <= 1U) || (lch == 0U))) { - current_packet.pending_frame_di1 = static_cast(di1_level ? 1U : 0U); - current_packet.pending_frame_di1_valid = true; - } - - if (fast_tty_avg_stream_mode) { - if (fast_packet_frames < target_frames) { - if (tty_do1_noise_subtract) { - tty_do1_noise_state.add_sample(lch, adc_raw_value); - } else { - tty_group_state.add_sample(lch, adc_raw_value); - } - if (lch == (cfg.channel_count - 1U)) { - ++fast_packet_frames; - ++total_completed_frames; - ++stats_completed_frames; - } - } - } else { - double stored_value = adc_value; - if ((cfg.di1_mode == Di1Mode::ZeroOnChange) && di1_changed) { - stored_value = 0.0; - ++total_zeroed_samples; - ++stats_zeroed_samples; - ++current_packet.zeroed_samples; - } - - if (current_packet.channels[lch].size() < target_frames) { - current_packet.channels[lch].push_back(stored_value); - ++current_packet.stored_samples; - ++total_stored_adc_samples; - ++stats_stored_adc_samples; - if (lch == (cfg.channel_count - 1U)) { - if ((cfg.di1_mode == Di1Mode::Trace) && - current_packet.pending_frame_di1_valid && - (current_packet.di1.size() < target_frames)) { - current_packet.di1.push_back(current_packet.pending_frame_di1); - current_packet.pending_frame_di1_valid = false; - } - ++total_completed_frames; - ++stats_completed_frames; - } - } - } - - const std::size_t completed_frames = fast_tty_avg_stream_mode - ? fast_packet_frames - : current_packet.frame_count(cfg.channel_count); - if (completed_frames >= target_frames) { - finish_packet(PacketCloseReason::DurationLimit); - if ((cfg.packet_limit != 0U) && (total_completed_packets >= cfg.packet_limit)) { - stop_loop_requested = true; - } + const uint32_t din_value = pending_din.front(); + pending_din.pop_front(); + process_aligned_sample(adc_value, 0.0, din_value); } } diff --git a/tty_protocol_writer.cpp b/tty_protocol_writer.cpp index f7e5c43..40d5bdf 100644 --- a/tty_protocol_writer.cpp +++ b/tty_protocol_writer.cpp @@ -77,6 +77,8 @@ namespace { constexpr std::size_t kFrameWordCount = 4U; constexpr std::size_t kFrameByteCount = kFrameWordCount * sizeof(uint16_t); +constexpr std::size_t kWriteBatchFrames = 256U; +constexpr std::size_t kWriteBatchBytes = kWriteBatchFrames * kFrameByteCount; using EncodedFrame = std::array; @@ -376,8 +378,10 @@ void TtyProtocolWriter::enqueue_frame(uint16_t word0, uint16_t word1, uint16_t w } void TtyProtocolWriter::worker_loop() { + std::array batch_bytes {}; + for (;;) { - EncodedFrame frame {}; + std::size_t batch_frames = 0; { std::unique_lock lock(impl_->mutex); impl_->data_ready_cv.wait(lock, [this]() { @@ -388,13 +392,18 @@ void TtyProtocolWriter::worker_loop() { return; } - frame = impl_->ring[impl_->head]; - impl_->head = (impl_->head + 1U) % impl_->capacity_frames; - --impl_->size; + batch_frames = std::min(kWriteBatchFrames, impl_->size); + for (std::size_t i = 0; i < batch_frames; ++i) { + std::memcpy(batch_bytes.data() + (i * kFrameByteCount), + impl_->ring[impl_->head].data(), + kFrameByteCount); + impl_->head = (impl_->head + 1U) % impl_->capacity_frames; + } + impl_->size -= batch_frames; } - const std::uint8_t* bytes = frame.data(); - std::size_t remaining = frame.size(); + const std::uint8_t* bytes = batch_bytes.data(); + std::size_t remaining = batch_frames * kFrameByteCount; while (remaining != 0U) { const ssize_t written = ::write(impl_->fd, bytes, remaining); if (written < 0) { @@ -436,7 +445,7 @@ void TtyProtocolWriter::worker_loop() { { std::lock_guard lock(impl_->mutex); - ++impl_->stats.frames_written; + impl_->stats.frames_written += static_cast(batch_frames); } } }