From b7c16cd6a07b4db459f021fd29891d7b4c2c68e0 Mon Sep 17 00:00:00 2001 From: Kshitij Rana Date: Tue, 26 Dec 2023 01:12:29 -0500 Subject: [PATCH 01/11] Updated RX, TX and stats collection; Extensive testing required --- software/examples/ensogen_new.cpp | 915 ++++++++++++++++++++++++++++++ software/examples/meson.build | 2 + 2 files changed, 917 insertions(+) create mode 100644 software/examples/ensogen_new.cpp diff --git a/software/examples/ensogen_new.cpp b/software/examples/ensogen_new.cpp new file mode 100644 index 00000000..e98ec789 --- /dev/null +++ b/software/examples/ensogen_new.cpp @@ -0,0 +1,915 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// Number of loop iterations to wait before probing the TX notification buffer +// again when reclaiming buffer space. +#define TX_RECLAIM_DELAY 1024 + +// Scientific notation for 10^6, treated as double. Used for stats calculations. +#define ONE_MILLION 1e6 + +// Scientific notation for 10^3, treated as double. Used for stats calculations. +#define ONE_THOUSAND 1e3 + +// Packet overhead added by the FPGA in bytes +#define FPGA_PACKET_OVERHEAD 24 + +// Minimum size of a packet aligned to cache +#define MIN_PACKET_ALIGNED_SIZE 64 + +// Minimum size of a raw packet +#define MIN_PACKET_RAW_SIZE 60 + +// If defined, ignore received packets. +// #define IGNORE_RX + +// When we are done transmitting. The RX thread still tries to receive all +// packets. The following defines the maximum number of times that we can try to +// receive packets in a row while getting no packet back. Once this happens we +// assume that we are no longer receiving packets and can stop trying. +#define ITER_NO_PKT_THRESH (1 << 28) + +// Default core ID to run. +#define DEFAULT_CORE_ID 0 + +// Default number of queues to use. +#define DEFAULT_NB_QUEUES 4 + +// Default histogram array offset. +#define DEFAULT_HIST_OFFSET 400 + +// Default histogram array length. +#define DEFAULT_HIST_LEN 1000000 + +// Default delay between displayed stats (in milliseconds). +#define DEFAULT_STATS_DELAY 1000 + +// Number of CLI arguments. +#define NB_CLI_ARGS 3 + +// Maximum number of bytes that we can receive at once. +#define RECV_BUF_LEN 10000000 + +// Huge page size that we are using (in bytes). +#define HUGEPAGE_SIZE (2UL << 20) + +// Size of the buffer that we keep packets in. +#define BUFFER_SIZE enso::kMaxTransferLen + +// Num of min sized packets that would fit in a BUFFER_SIZE bytes buffer +#define MAX_PKTS_IN_BUFFER 2048 + +// Number of transfers required to send a buffer full of packets. +#define TRANSFERS_PER_BUFFER (((BUFFER_SIZE - 1) / enso::kMaxTransferLen) + 1) + +static volatile int keep_running = 1; +static volatile int force_stop = 0; +static volatile int rx_ready = 0; +static volatile int rx_done = 0; +static volatile int tx_done = 0; + +using enso::Device; +using enso::RxPipe; +using enso::TxPipe; + +void int_handler(int signal __attribute__((unused))) { + if (!keep_running) { + force_stop = 1; + } + keep_running = 0; +} + +static void print_usage(const char* program_name) { + printf( + "%s PCAP_FILE RATE_NUM RATE_DEN\n" + " [--help]\n" + " [--count NB_PKTS]\n" + " [--core CORE_ID]\n" + " [--queues NB_QUEUES]\n" + " [--save SAVE_FILE]\n" + " [--single-core]\n" + " [--rtt]\n" + " [--rtt-hist HIST_FILE]\n" + " [--rtt-hist-offset HIST_OFFSET]\n" + " [--rtt-hist-len HIST_LEN]\n" + " [--stats-delay STATS_DELAY]\n" + " [--pcie-addr PCIE_ADDR]\n\n" + + " PCAP_FILE: Pcap file with packets to transmit.\n" + " RATE_NUM: Numerator of the rate used to transmit packets.\n" + " RATE_DEN: Denominator of the rate used to transmit packets.\n\n" + + " --help: Show this help and exit.\n" + " --count: Specify number of packets to transmit.\n" + " --core: Specify CORE_ID to run on (default: %d).\n" + " --queues: Specify number of RX queues (default: %d).\n" + " --save: Save RX and TX stats to SAVE_FILE.\n" + " --single-core: Use the same core for receiving and transmitting.\n" + " --rtt: Enable packet timestamping and report average RTT.\n" + " --rtt-hist: Save RTT histogram to HIST_FILE (implies --rtt).\n" + " --rtt-hist-offset: Offset to be used when saving the histogram\n" + " (default: %d).\n" + " --rtt-hist-len: Size of the histogram array (default: %d).\n" + " If an RTT is outside the RTT hist array range, it\n" + " will still be saved, but there will be a\n" + " performance penalty.\n" + " --stats-delay: Delay between displayed stats in milliseconds\n" + " (default: %d).\n" + " --pcie-addr: Specify the PCIe address of the NIC to use.\n", + program_name, DEFAULT_CORE_ID, DEFAULT_NB_QUEUES, DEFAULT_HIST_OFFSET, + DEFAULT_HIST_LEN, DEFAULT_STATS_DELAY); +} + +#define CMD_OPT_HELP "help" +#define CMD_OPT_COUNT "count" +#define CMD_OPT_CORE "core" +#define CMD_OPT_QUEUES "queues" +#define CMD_OPT_SAVE "save" +#define CMD_OPT_SINGLE_CORE "single-core" +#define CMD_OPT_RTT "rtt" +#define CMD_OPT_RTT_HIST "rtt-hist" +#define CMD_OPT_RTT_HIST_OFF "rtt-hist-offset" +#define CMD_OPT_RTT_HIST_LEN "rtt-hist-len" +#define CMD_OPT_STATS_DELAY "stats-delay" +#define CMD_OPT_PCIE_ADDR "pcie-addr" + +// Map long options to short options. +enum { + CMD_OPT_HELP_NUM = 256, + CMD_OPT_COUNT_NUM, + CMD_OPT_CORE_NUM, + CMD_OPT_QUEUES_NUM, + CMD_OPT_SAVE_NUM, + CMD_OPT_SINGLE_CORE_NUM, + CMD_OPT_RTT_NUM, + CMD_OPT_RTT_HIST_NUM, + CMD_OPT_RTT_HIST_OFF_NUM, + CMD_OPT_RTT_HIST_LEN_NUM, + CMD_OPT_STATS_DELAY_NUM, + CMD_OPT_PCIE_ADDR_NUM, +}; + +static const char short_options[] = ""; + +static const struct option long_options[] = { + {CMD_OPT_HELP, no_argument, NULL, CMD_OPT_HELP_NUM}, + {CMD_OPT_COUNT, required_argument, NULL, CMD_OPT_COUNT_NUM}, + {CMD_OPT_CORE, required_argument, NULL, CMD_OPT_CORE_NUM}, + {CMD_OPT_QUEUES, required_argument, NULL, CMD_OPT_QUEUES_NUM}, + {CMD_OPT_SAVE, required_argument, NULL, CMD_OPT_SAVE_NUM}, + {CMD_OPT_SINGLE_CORE, no_argument, NULL, CMD_OPT_SINGLE_CORE_NUM}, + {CMD_OPT_RTT, no_argument, NULL, CMD_OPT_RTT_NUM}, + {CMD_OPT_RTT_HIST, required_argument, NULL, CMD_OPT_RTT_HIST_NUM}, + {CMD_OPT_RTT_HIST_OFF, required_argument, NULL, CMD_OPT_RTT_HIST_OFF_NUM}, + {CMD_OPT_RTT_HIST_LEN, required_argument, NULL, CMD_OPT_RTT_HIST_LEN_NUM}, + {CMD_OPT_STATS_DELAY, required_argument, NULL, CMD_OPT_STATS_DELAY_NUM}, + {CMD_OPT_PCIE_ADDR, required_argument, NULL, CMD_OPT_PCIE_ADDR_NUM}, + {0, 0, 0, 0}}; + +struct parsed_args_t { + int core_id; + uint32_t nb_queues; + bool save; + bool single_core; + bool enable_rtt; + bool enable_rtt_history; + std::string hist_file; + std::string pcap_file; + std::string save_file; + uint16_t rate_num; + uint16_t rate_den; + uint64_t nb_pkts; + uint32_t rtt_hist_offset; + uint32_t rtt_hist_len; + uint32_t stats_delay; + std::string pcie_addr; +}; + +static int parse_args(int argc, char** argv, + struct parsed_args_t& parsed_args) { + int opt; + int long_index; + + parsed_args.nb_pkts = 0; + parsed_args.core_id = DEFAULT_CORE_ID; + parsed_args.nb_queues = DEFAULT_NB_QUEUES; + parsed_args.save = false; + parsed_args.single_core = false; + parsed_args.enable_rtt = false; + parsed_args.enable_rtt_history = false; + parsed_args.rtt_hist_offset = DEFAULT_HIST_OFFSET; + parsed_args.rtt_hist_len = DEFAULT_HIST_LEN; + parsed_args.stats_delay = DEFAULT_STATS_DELAY; + + while ((opt = getopt_long(argc, argv, short_options, long_options, + &long_index)) != EOF) { + switch (opt) { + case CMD_OPT_HELP_NUM: + return 1; + case CMD_OPT_COUNT_NUM: + parsed_args.nb_pkts = atoi(optarg); + break; + case CMD_OPT_CORE_NUM: + parsed_args.core_id = atoi(optarg); + break; + case CMD_OPT_QUEUES_NUM: + parsed_args.nb_queues = atoi(optarg); + break; + case CMD_OPT_SAVE_NUM: + parsed_args.save = true; + parsed_args.save_file = optarg; + break; + case CMD_OPT_SINGLE_CORE_NUM: + parsed_args.single_core = true; + break; + case CMD_OPT_RTT_HIST_NUM: + parsed_args.enable_rtt_history = true; + parsed_args.hist_file = optarg; + // fall through + case CMD_OPT_RTT_NUM: + parsed_args.enable_rtt = true; + break; + case CMD_OPT_RTT_HIST_OFF_NUM: + parsed_args.rtt_hist_offset = atoi(optarg); + break; + case CMD_OPT_RTT_HIST_LEN_NUM: + parsed_args.rtt_hist_len = atoi(optarg); + break; + case CMD_OPT_STATS_DELAY_NUM: + parsed_args.stats_delay = atoi(optarg); + break; + case CMD_OPT_PCIE_ADDR_NUM: + parsed_args.pcie_addr = optarg; + break; + default: + return -1; + } + } + + if ((argc - optind) != NB_CLI_ARGS) { + return -1; + } + + parsed_args.pcap_file = argv[optind++]; + parsed_args.rate_num = atoi(argv[optind++]); + parsed_args.rate_den = atoi(argv[optind++]); + + if (parsed_args.rate_num == 0) { + std::cerr << "Rate must be greater than 0" << std::endl; + return -1; + } + + if (parsed_args.rate_den == 0) { + std::cerr << "Rate denominator must be greater than 0" << std::endl; + return -1; + } + + return 0; +} + +struct PcapHandlerContext { + uint8_t *buf; + uint32_t nb_bytes; + uint32_t nb_good_bytes; + uint32_t nb_pkts; + pcap_t* pcap; +}; + +struct RxStats { + explicit RxStats(uint32_t rtt_hist_len = 0, uint32_t rtt_hist_offset = 0) + : pkts(0), + bytes(0), + rtt_sum(0), + nb_batches(0), + rtt_hist_len(rtt_hist_len), + rtt_hist_offset(rtt_hist_offset) { + if (rtt_hist_len > 0) { + rtt_hist = new uint64_t[rtt_hist_len](); + } + } + ~RxStats() { + if (rtt_hist_len > 0) { + delete[] rtt_hist; + } + } + + RxStats(const RxStats& other) = delete; + RxStats(RxStats&& other) = default; + RxStats& operator=(const RxStats& other) = delete; + RxStats& operator=(RxStats&& other) = delete; + + inline void add_rtt_to_hist(const uint32_t rtt) { + // Insert RTTs into the rtt_hist array if they are in its range, + // otherwise use the backup_rtt_hist. + if (unlikely((rtt >= (rtt_hist_len - rtt_hist_offset)) || + (rtt < rtt_hist_offset))) { + backup_rtt_hist[rtt]++; + } else { + rtt_hist[rtt - rtt_hist_offset]++; + } + } + + uint64_t pkts; + uint64_t bytes; + uint64_t rtt_sum; + uint64_t nb_batches; + const uint32_t rtt_hist_len; + const uint32_t rtt_hist_offset; + uint64_t* rtt_hist; + std::unordered_map backup_rtt_hist; +}; + +struct RxArgs { + bool enable_rtt; + bool enable_rtt_history; +}; + +struct TxStats { + TxStats() : pkts(0), bytes(0) {} + uint64_t pkts; + uint64_t bytes; +}; + +struct TxArgs { + TxArgs(TxPipe *pipe, uint8_t *buf, uint64_t pkts_in_buf, + uint64_t total_pkts_to_send) + : tx_pipe(pipe), + main_buf(buf), + pkts_in_main_buf(pkts_in_buf), + total_remaining_pkts(total_pkts_to_send) {} + TxPipe *tx_pipe; + uint8_t *main_buf; + uint64_t pkts_in_main_buf; + uint64_t total_remaining_pkts; + uint32_t transmissions_pending; +}; + +void pcap_pkt_handler(u_char* user, const struct pcap_pkthdr* pkt_hdr, + const u_char* pkt_bytes) { + (void)pkt_hdr; + struct PcapHandlerContext* context = (struct PcapHandlerContext*)user; + + const struct ether_header* l2_hdr = (struct ether_header*)pkt_bytes; + if (l2_hdr->ether_type != htons(ETHERTYPE_IP)) { + std::cerr << "Non-IPv4 packets are not supported" << std::endl; + exit(8); + } + context->nb_pkts++; + if(context->nb_pkts > MAX_PKTS_IN_BUFFER) { + std::cerr << "Only " << MAX_PKTS_IN_BUFFER << " can be in the PCAP file" + << std::endl; + free(context->buf); + exit(9); + } + + uint32_t len = enso::get_pkt_len(pkt_bytes); + uint32_t nb_flits = (len - 1) / MIN_PACKET_ALIGNED_SIZE + 1; + memcpy(context->buf + context->nb_bytes, pkt_bytes, len); + context->nb_bytes += nb_flits * MIN_PACKET_ALIGNED_SIZE; + context->nb_good_bytes += len; +} + +inline uint64_t receive_pkts(const struct RxArgs& rx_args, + struct RxStats& rx_stats, + std::unique_ptr &dev) { + uint64_t nb_pkts = 0; +#ifdef IGNORE_RX + (void)rx_args; + (void)rx_stats; +#else // IGNORE_RX + RxPipe* rx_pipe = dev->NextRxPipeToRecv(); + if (unlikely(rx_pipe == nullptr)) { + return 0; + } + auto batch = rx_pipe->PeekPkts(); + uint64_t recv_bytes = 0; + for (auto pkt : batch) { + uint16_t pkt_len = enso::get_pkt_len(pkt); + + if (rx_args.enable_rtt) { + uint32_t rtt = enso::get_pkt_rtt(pkt); + rx_stats.rtt_sum += rtt; + + if (rx_args.enable_rtt_history) { + rx_stats.add_rtt_to_hist(rtt); + } + } + + recv_bytes += pkt_len; + ++nb_pkts; + } + + uint32_t batch_length = batch.processed_bytes(); + rx_pipe->ConfirmBytes(batch_length); + + rx_stats.pkts += nb_pkts; + ++(rx_stats.nb_batches); + rx_stats.bytes += recv_bytes; + + rx_pipe->Clear(); + +#endif // IGNORE_RX + return nb_pkts; +} + +inline void transmit_pkts(struct TxArgs& tx_args, struct TxStats& tx_stats) { + // decide whether we need to send an entire buffer worth of packets + // or less than that based on user request + uint32_t nb_pkts_to_send = std::min(tx_args.pkts_in_main_buf, + tx_args.total_remaining_pkts); + // the packets are copied in the main buffer based on the minimum packet size + uint32_t transmission_length = nb_pkts_to_send * MIN_PACKET_ALIGNED_SIZE; + + // allocate the bytes in the TX pipe and copy the required + // number of bytes from the main buffer + uint8_t* pipe_buf = tx_args.tx_pipe->AllocateBuf(transmission_length); + if(pipe_buf == NULL) { + std::cout << "Buffer allocation for TX pipe failed" << std::endl; + return; + } + // memcpy(pipe_buf, tx_args.main_buf, transmission_length); + enso::memcpy_64_align(pipe_buf, tx_args.main_buf, transmission_length); + // send the packets + tx_args.tx_pipe->SendAndFree(transmission_length); + + // update the stats + // the stats need be calculated based on good bytes + // rather than the transmission length + tx_stats.pkts += nb_pkts_to_send; + tx_stats.bytes += nb_pkts_to_send * MIN_PACKET_RAW_SIZE; + tx_args.total_remaining_pkts -= nb_pkts_to_send; + if(tx_args.total_remaining_pkts == 0) { + keep_running = 0; + return; + } +} + +int main(int argc, char** argv) { + struct parsed_args_t parsed_args; + int ret = parse_args(argc, argv, parsed_args); + if (ret) { + print_usage(argv[0]); + if (ret == 1) { + return 0; + } + return 1; + } + + // Parse the PCI address in format 0000:00:00.0 or 00:00.0. + if (parsed_args.pcie_addr != "") { + uint32_t domain, bus, dev, func; + if (sscanf(parsed_args.pcie_addr.c_str(), "%x:%x:%x.%x", &domain, &bus, + &dev, &func) != 4) { + if (sscanf(parsed_args.pcie_addr.c_str(), "%x:%x.%x", &bus, &dev, + &func) != 3) { + std::cerr << "Invalid PCI address" << std::endl; + return 1; + } + } + uint16_t bdf = (bus << 8) | (dev << 3) | (func & 0x7); + enso::set_bdf(bdf); + } + + char errbuf[PCAP_ERRBUF_SIZE]; + + pcap_t* pcap = pcap_open_offline(parsed_args.pcap_file.c_str(), errbuf); + if (pcap == NULL) { + std::cerr << "Error loading pcap file (" << errbuf << ")" << std::endl; + return 2; + } + + // we copy the packets in this buffer using libpcap + uint8_t *pkt_buf = (uint8_t *) malloc(BUFFER_SIZE); + if(pkt_buf == NULL) { + std::cerr << "Could not allocate packet buffer" << std::endl; + exit(1); + } + + struct PcapHandlerContext context; + context.pcap = pcap; + context.buf = pkt_buf; + context.nb_bytes = 0; + context.nb_good_bytes = 0; + context.nb_pkts = 0; + + // Initialize packet buffers with packets read from pcap file. + if (pcap_loop(pcap, 0, pcap_pkt_handler, (u_char*)&context) < 0) { + std::cerr << "Error while reading pcap (" << pcap_geterr(pcap) << ")" + << std::endl; + return 3; + } + + // For small pcaps we copy the same packets over the remaining of the + // buffer. This reduces the number of transfers that we need to issue. + if (context.nb_bytes < BUFFER_SIZE) { + uint32_t original_buf_length = context.nb_bytes; + uint32_t original_nb_pkts = context.nb_pkts; + uint32_t original_good_bytes = context.nb_good_bytes; + while ((context.nb_bytes + original_buf_length) <= BUFFER_SIZE) { + memcpy(pkt_buf + context.nb_bytes, pkt_buf, original_buf_length); + context.nb_bytes += original_buf_length; + context.nb_pkts += original_nb_pkts; + context.nb_good_bytes += original_good_bytes; + } + } + + uint64_t total_pkts_in_buffer = context.nb_pkts; + uint64_t total_pkts_to_send; + if (parsed_args.nb_pkts > 0) { + total_pkts_to_send = parsed_args.nb_pkts; + } else { + // Treat nb_pkts == 0 as unbounded. The following value should be enough + // to send 64-byte packets for around 400 years using Tb Ethernet. + total_pkts_to_send = 0xffffffffffffffff; + } + + uint32_t rtt_hist_len = 0; + uint32_t rtt_hist_offset = 0; + + if (parsed_args.enable_rtt_history) { + rtt_hist_len = parsed_args.rtt_hist_len; + rtt_hist_offset = parsed_args.rtt_hist_offset; + } + + RxStats rx_stats(rtt_hist_len, rtt_hist_offset); + TxStats tx_stats; + + signal(SIGINT, int_handler); + + std::vector threads; + + std::unique_ptr dev = Device::Create(); + if (!dev) { + std::cerr << "Problem creating device" << std::endl; + free(pkt_buf); + exit(2); + } + + // When using single_core we use the same thread for RX and TX, otherwise we + // launch separate threads for RX and TX. + if (!parsed_args.single_core) { + std::thread rx_thread = std::thread([&parsed_args, &rx_stats, &dev] { + std::this_thread::sleep_for(std::chrono::milliseconds(500)); + + std::vector rx_pipes; + + for (uint32_t i = 0; i < parsed_args.nb_queues; ++i) { + RxPipe* rx_pipe = dev->AllocateRxPipe(true); + if (!rx_pipe) { + std::cerr << "Problem creating RX pipe" << std::endl; + exit(3); + } + rx_pipes.push_back(rx_pipe); + } + + dev->EnableRateLimiting(parsed_args.rate_num, parsed_args.rate_den); + dev->EnableRoundRobin(); + + if (parsed_args.enable_rtt) { + dev->EnableTimeStamping(); + } + else { + dev->DisableTimeStamping(); + } + + RxArgs rx_args; + rx_args.enable_rtt = parsed_args.enable_rtt; + rx_args.enable_rtt_history = parsed_args.enable_rtt_history; + + std::cout << "Running RX on core " << sched_getcpu() << std::endl; + + rx_ready = 1; + + while (keep_running) { + receive_pkts(rx_args, rx_stats, dev); + } + + uint64_t nb_iters_no_pkt = 0; + + // Receive packets until packets stop arriving or user force stops. + while (!force_stop && (nb_iters_no_pkt < ITER_NO_PKT_THRESH)) { + uint64_t nb_pkts = receive_pkts(rx_args, rx_stats, dev); + if (unlikely(nb_pkts == 0)) { + ++nb_iters_no_pkt; + } else { + nb_iters_no_pkt = 0; + } + } + + rx_done = true; + + dev->DisableRateLimiting(); + dev->DisableRoundRobin(); + + if (parsed_args.enable_rtt) { + dev->DisableTimeStamping(); + } + + }); + + std::thread tx_thread = std::thread( + [pkt_buf, total_pkts_in_buffer, total_pkts_to_send, + &parsed_args, &tx_stats, &dev] { + std::this_thread::sleep_for(std::chrono::seconds(1)); + + TxPipe* tx_pipe = dev->AllocateTxPipe(); + if (!tx_pipe) { + std::cerr << "Problem creating TX pipe" << std::endl; + exit(3); + } + + while (!rx_ready) continue; + + std::cout << "Running TX on core " << sched_getcpu() << std::endl; + + TxArgs tx_args(tx_pipe, pkt_buf, total_pkts_in_buffer, + total_pkts_to_send); + + while (keep_running) { + transmit_pkts(tx_args, tx_stats); + } + + tx_done = 1; + + while (!rx_done) continue; + + }); + + cpu_set_t cpuset; + CPU_ZERO(&cpuset); + CPU_SET(parsed_args.core_id, &cpuset); + int result = pthread_setaffinity_np(rx_thread.native_handle(), + sizeof(cpuset), &cpuset); + if (result < 0) { + std::cerr << "Error setting CPU affinity for RX thread." << std::endl; + return 6; + } + + CPU_ZERO(&cpuset); + CPU_SET(parsed_args.core_id + 1, &cpuset); + result = pthread_setaffinity_np(tx_thread.native_handle(), sizeof(cpuset), + &cpuset); + if (result < 0) { + std::cerr << "Error setting CPU affinity for TX thread." << std::endl; + return 7; + } + + threads.push_back(std::move(rx_thread)); + threads.push_back(std::move(tx_thread)); + + } else { + // Send and receive packets within the same thread. + std::thread rx_tx_thread = std::thread( + [pkt_buf, total_pkts_in_buffer, total_pkts_to_send, + &parsed_args, &tx_stats, &rx_stats, &dev] { + std::this_thread::sleep_for(std::chrono::milliseconds(500)); + + std::vector rx_pipes; + + for (uint32_t i = 0; i < parsed_args.nb_queues; ++i) { + RxPipe* rx_pipe = dev->AllocateRxPipe(true); + if (!rx_pipe) { + std::cerr << "Problem creating RX pipe" << std::endl; + exit(3); + } + rx_pipes.push_back(rx_pipe); + } + + dev->EnableRateLimiting(parsed_args.rate_num, parsed_args.rate_den); + dev->EnableRoundRobin(); + + if (parsed_args.enable_rtt) { + dev->EnableTimeStamping(); + } + else { + dev->DisableTimeStamping(); + } + + std::cout << "Running RX and TX on core " << sched_getcpu() + << std::endl; + + RxArgs rx_args; + rx_args.enable_rtt = parsed_args.enable_rtt; + rx_args.enable_rtt_history = parsed_args.enable_rtt_history; + + TxPipe* tx_pipe = dev->AllocateTxPipe(); + if (!tx_pipe) { + std::cerr << "Problem creating TX pipe" << std::endl; + exit(3); + } + + TxArgs tx_args(tx_pipe, pkt_buf, total_pkts_in_buffer, + total_pkts_to_send); + + rx_ready = 1; + + while (keep_running) { + receive_pkts(rx_args, rx_stats, dev); + transmit_pkts(tx_args, tx_stats); + } + + tx_done = 1; + + uint64_t nb_iters_no_pkt = 0; + + // Receive packets until packets stop arriving or user force stops. + while (!force_stop && (nb_iters_no_pkt < ITER_NO_PKT_THRESH)) { + uint64_t nb_pkts = receive_pkts(rx_args, rx_stats, dev); + if (unlikely(nb_pkts == 0)) { + ++nb_iters_no_pkt; + } else { + nb_iters_no_pkt = 0; + } + } + + rx_done = true; + + dev->DisableRateLimiting(); + dev->DisableRoundRobin(); + + if (parsed_args.enable_rtt) { + dev->DisableTimeStamping(); + } + + }); + + cpu_set_t cpuset; + CPU_ZERO(&cpuset); + CPU_SET(parsed_args.core_id, &cpuset); + int result = pthread_setaffinity_np(rx_tx_thread.native_handle(), + sizeof(cpuset), &cpuset); + if (result < 0) { + std::cerr << "Error setting CPU affinity for RX thread." << std::endl; + return 6; + } + + threads.push_back(std::move(rx_tx_thread)); + } + + // Write header to save file. + if (parsed_args.save) { + std::ofstream save_file; + save_file.open(parsed_args.save_file); + save_file + << "rx_goodput_mbps,rx_tput_mbps,rx_pkt_rate_kpps,rx_bytes,rx_packets," + "tx_goodput_mbps,tx_tput_mbps,tx_pkt_rate_kpps,tx_bytes,tx_packets"; + if (parsed_args.enable_rtt) { + save_file << ",mean_rtt_ns"; + } + save_file << std::endl; + save_file.close(); + } + + while (!rx_ready) continue; + + std::cout << "Starting..." << std::endl; + + // Continuously print statistics. + while (!rx_done) { + _enso_compiler_memory_barrier(); + uint64_t last_rx_bytes = rx_stats.bytes; + uint64_t last_rx_pkts = rx_stats.pkts; + uint64_t last_tx_bytes = tx_stats.bytes; + uint64_t last_tx_pkts = tx_stats.pkts; + uint64_t last_aggregated_rtt_ns = + rx_stats.rtt_sum * enso::kNsPerTimestampCycle; + + std::this_thread::sleep_for( + std::chrono::milliseconds(parsed_args.stats_delay)); + + uint64_t rx_bytes = rx_stats.bytes; + uint64_t rx_pkts = rx_stats.pkts; + uint64_t tx_bytes = tx_stats.bytes; + uint64_t tx_pkts = tx_stats.pkts; + + double interval_s = (double) parsed_args.stats_delay / ONE_THOUSAND; + + uint64_t rx_pkt_diff = rx_pkts - last_rx_pkts; + uint64_t rx_goodput_mbps = + (rx_bytes - last_rx_bytes) * 8. / (ONE_MILLION * interval_s); + uint64_t rx_pkt_rate = (rx_pkt_diff / interval_s); + uint64_t rx_pkt_rate_kpps = rx_pkt_rate / ONE_THOUSAND; + uint64_t rx_tput_mbps = rx_goodput_mbps + FPGA_PACKET_OVERHEAD + * 8 * rx_pkt_rate / ONE_MILLION; + + uint64_t tx_pkt_diff = tx_pkts - last_tx_pkts; + uint64_t tx_goodput_mbps = + (tx_bytes - last_tx_bytes) * 8. / (ONE_MILLION * interval_s); + uint64_t tx_tput_mbps = + (tx_bytes - last_tx_bytes + tx_pkt_diff * FPGA_PACKET_OVERHEAD) * 8. + / (ONE_MILLION * interval_s); + uint64_t tx_pkt_rate = (tx_pkt_diff / interval_s); + uint64_t tx_pkt_rate_kpps = tx_pkt_rate / ONE_THOUSAND; + + uint64_t rtt_sum_ns = rx_stats.rtt_sum * enso::kNsPerTimestampCycle; + uint64_t rtt_ns; + if (rx_pkt_diff != 0) { + rtt_ns = (rtt_sum_ns - last_aggregated_rtt_ns) / rx_pkt_diff; + } else { + rtt_ns = 0; + } + + std::cout << std::dec << " RX: Throughput: " << rx_tput_mbps << " Mbps" + << " Rate: " << rx_pkt_rate_kpps << " kpps" << std::endl + + << " #bytes: " << rx_bytes << " #packets: " << rx_pkts + << std::endl; + + std::cout << " TX: Throughput: " << tx_tput_mbps << " Mbps" + << " Rate: " << tx_pkt_rate_kpps << " kpps" << std::endl + + << " #bytes: " << tx_bytes << " #packets: " << tx_pkts + << std::endl; + + if (parsed_args.enable_rtt) { + std::cout << "Mean RTT: " << rtt_ns << " ns " << std::endl; + } + + if (parsed_args.save) { + std::ofstream save_file; + save_file.open(parsed_args.save_file, std::ios_base::app); + save_file << rx_goodput_mbps << "," << rx_tput_mbps << "," + << rx_pkt_rate_kpps << "," << rx_bytes << "," << rx_pkts << "," + << tx_goodput_mbps << "," << tx_pkt_rate_kpps << "," + << tx_tput_mbps << "," << tx_bytes << "," << tx_pkts; + if (parsed_args.enable_rtt) { + save_file << "," << rtt_ns; + } + save_file << std::endl; + save_file.close(); + } + + std::cout << std::endl; + } + + if (parsed_args.save) { + std::cout << "Saved statistics to \"" << parsed_args.save_file << "\"" + << std::endl; + } + + ret = 0; + if (parsed_args.enable_rtt_history) { + std::ofstream hist_file; + hist_file.open(parsed_args.hist_file); + + for (uint32_t rtt = 0; rtt < parsed_args.rtt_hist_len; ++rtt) { + if (rx_stats.rtt_hist[rtt] != 0) { + uint32_t corrected_rtt = + (rtt + parsed_args.rtt_hist_offset) * enso::kNsPerTimestampCycle; + hist_file << corrected_rtt << "," << rx_stats.rtt_hist[rtt] + << std::endl; + } + } + + if (rx_stats.backup_rtt_hist.size() != 0) { + std::cout << "Warning: " << rx_stats.backup_rtt_hist.size() + << " rtt hist entries in backup" << std::endl; + for (auto const& i : rx_stats.backup_rtt_hist) { + hist_file << i.first * enso::kNsPerTimestampCycle << "," << i.second + << std::endl; + } + } + + hist_file.close(); + std::cout << "Saved RTT histogram to \"" << parsed_args.hist_file << "\"" + << std::endl; + + if (rx_stats.pkts != tx_stats.pkts) { + std::cout << "Warning: did not get all packets back." << std::endl; + ret = 1; + } + } + + for (auto& thread : threads) { + thread.join(); + } + + free(pkt_buf); + return ret; +} diff --git a/software/examples/meson.build b/software/examples/meson.build index a6ac0063..983b04f0 100644 --- a/software/examples/meson.build +++ b/software/examples/meson.build @@ -16,3 +16,5 @@ executable('capture', 'capture.cpp', dependencies: [thread_dep, pcap_dep], link_with: enso_lib, include_directories: inc) executable('l2_forward', 'l2_forward.cpp', dependencies: thread_dep, link_with: enso_lib, include_directories: inc) +executable('ensogen_new', 'ensogen_new.cpp', dependencies: [thread_dep, pcap_dep], + link_with: enso_lib, include_directories: inc) From f444a8a20014ede9684e9b58d90332ebf311cacd Mon Sep 17 00:00:00 2001 From: Kshitij Rana Date: Sun, 7 Jan 2024 17:01:57 -0500 Subject: [PATCH 02/11] Completed testing and added documentation --- scripts/ensogen.sh | 2 +- software/examples/ensogen.cpp | 917 ++++++++++++++---------------- software/examples/ensogen_new.cpp | 915 ----------------------------- software/examples/meson.build | 2 - software/include/enso/pipe.h | 25 + software/src/enso/pipe.cpp | 8 + 6 files changed, 473 insertions(+), 1396 deletions(-) delete mode 100644 software/examples/ensogen_new.cpp diff --git a/scripts/ensogen.sh b/scripts/ensogen.sh index f59274ae..c5994abe 100755 --- a/scripts/ensogen.sh +++ b/scripts/ensogen.sh @@ -14,7 +14,7 @@ GET_PCAP_SIZE_CMD_PATH=$(realpath $GET_PCAP_SIZE_CMD_PATH) if [ $# -lt 2 ]; then echo "Usage: ./ensogen.sh PCAP_FILE RATE_GBPS [OPTIONS]" - echo "Example: ./ensogen.sh /tmp/pcap_file.pcap 100 --pcie-addr 65:00.0" + echo "Example: ./ensogen.sh /tmp/pcap_file.pcap 100" exit 1 fi diff --git a/software/examples/ensogen.cpp b/software/examples/ensogen.cpp index a3570baf..57c5fe18 100644 --- a/software/examples/ensogen.cpp +++ b/software/examples/ensogen.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, Carnegie Mellon University + * Copyright (c) 2023, Carnegie Mellon University * * Redistribution and use in source and binary forms, with or without * modification, are permitted (subject to the limitations in the disclaimer @@ -29,10 +29,23 @@ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +/* + * @file: ensogen.cpp + * + * @brief: Packet generator program that uses the Enso library to send and + * receive packets. It uses libpcap to read packets from a pcap file. The program + * assumes that the file contains only minimum sized packets. + * + * Example: + * + * sudo ./scripts/ensogen.sh ./scripts/sample_pcaps/2_64_1_2.pcap 100 + * + * */ #include #include #include +#include #include #include #include @@ -58,9 +71,27 @@ #include #include +/****************************************************************************** + * Macros and Globals + *****************************************************************************/ // Number of loop iterations to wait before probing the TX notification buffer // again when reclaiming buffer space. -#define TX_RECLAIM_DELAY 1024 +#define TX_RECLAIM_DELAY 1024 + +// Scientific notation for 10^6, treated as double. Used for stats calculations. +#define ONE_MILLION 1e6 + +// Scientific notation for 10^3, treated as double. Used for stats calculations. +#define ONE_THOUSAND 1e3 + +// Ethernet's per packet overhead added by the FPGA (in bytes). +#define FPGA_PACKET_OVERHEAD 24 + +// Minimum size of a packet aligned to cache (in bytes). +#define MIN_PACKET_ALIGNED_SIZE 64 + +// Minimum size of a raw packet read from the PCAP file (in bytes). +#define MIN_PACKET_RAW_SIZE 60 // If defined, ignore received packets. // #define IGNORE_RX @@ -98,22 +129,200 @@ // Size of the buffer that we keep packets in. #define BUFFER_SIZE enso::kMaxTransferLen +// Num of min sized packets that would fit in a BUFFER_SIZE bytes buffer +#define MAX_PKTS_IN_BUFFER 2048 + // Number of transfers required to send a buffer full of packets. #define TRANSFERS_PER_BUFFER (((BUFFER_SIZE - 1) / enso::kMaxTransferLen) + 1) +// Macros for cmd line option names +#define CMD_OPT_HELP "help" +#define CMD_OPT_COUNT "count" +#define CMD_OPT_CORE "core" +#define CMD_OPT_QUEUES "queues" +#define CMD_OPT_SAVE "save" +#define CMD_OPT_SINGLE_CORE "single-core" +#define CMD_OPT_RTT "rtt" +#define CMD_OPT_RTT_HIST "rtt-hist" +#define CMD_OPT_RTT_HIST_OFF "rtt-hist-offset" +#define CMD_OPT_RTT_HIST_LEN "rtt-hist-len" +#define CMD_OPT_STATS_DELAY "stats-delay" + static volatile int keep_running = 1; static volatile int force_stop = 0; static volatile int rx_ready = 0; static volatile int rx_done = 0; static volatile int tx_done = 0; +using enso::Device; +using enso::RxPipe; +using enso::TxPipe; + +/****************************************************************************** + * Structure Definitions + *****************************************************************************/ +/* + * @brief: Structure to store the command linde arguments. + * + * */ +struct parsed_args_t { + int core_id; + uint32_t nb_queues; + bool save; + bool single_core; + bool enable_rtt; + bool enable_rtt_history; + std::string hist_file; + std::string pcap_file; + std::string save_file; + uint16_t rate_num; + uint16_t rate_den; + uint64_t nb_pkts; + uint32_t rtt_hist_offset; + uint32_t rtt_hist_len; + uint32_t stats_delay; +}; + +/* + * @brief: Structure to store the PCAP related variables that need + * to be passed to the callback function. + * + * */ +struct PcapHandlerContext { + // Buffer to store the packet data + uint8_t *buf; + // Total number of packet bytes aligned to the cache + uint32_t nb_bytes; + // Total number of raw packet bytes + uint32_t nb_good_bytes; + // Total number of packets + uint32_t nb_pkts; + // libpcap object associated with the opened PCAP file + pcap_t* pcap; +}; + +/* + * @brief: Structure to store the Rx related stats. + * + * */ +struct RxStats { + explicit RxStats(uint32_t rtt_hist_len = 0, uint32_t rtt_hist_offset = 0) + : pkts(0), + bytes(0), + rtt_sum(0), + nb_batches(0), + rtt_hist_len(rtt_hist_len), + rtt_hist_offset(rtt_hist_offset) { + if (rtt_hist_len > 0) { + rtt_hist = new uint64_t[rtt_hist_len](); + } + } + ~RxStats() { + if (rtt_hist_len > 0) { + delete[] rtt_hist; + } + } + + RxStats(const RxStats& other) = delete; + RxStats(RxStats&& other) = default; + RxStats& operator=(const RxStats& other) = delete; + RxStats& operator=(RxStats&& other) = delete; + + inline void add_rtt_to_hist(const uint32_t rtt) { + // Insert RTTs into the rtt_hist array if they are in its range, + // otherwise use the backup_rtt_hist. + if (unlikely((rtt >= (rtt_hist_len - rtt_hist_offset)) || + (rtt < rtt_hist_offset))) { + backup_rtt_hist[rtt]++; + } else { + rtt_hist[rtt - rtt_hist_offset]++; + } + } + + uint64_t pkts; + uint64_t bytes; + uint64_t rtt_sum; + uint64_t nb_batches; + const uint32_t rtt_hist_len; + const uint32_t rtt_hist_offset; + uint64_t* rtt_hist; + std::unordered_map backup_rtt_hist; +}; + +/* + * @brief: Structure to store the variables needed by the receive_pkts + * function. + * + * */ +struct RxArgs { + RxArgs(bool enbl_rtt, bool enbl_rtt_hist, std::unique_ptr &dev_) : + enable_rtt(enbl_rtt), + enable_rtt_history(enbl_rtt_hist), + dev(dev_) {} + bool enable_rtt; + bool enable_rtt_history; + std::unique_ptr &dev; +}; + +/* + * @brief: Structure to store the Tx related stats. + * + * */ +struct TxStats { + TxStats() : pkts(0), bytes(0) {} + uint64_t pkts; + uint64_t bytes; +}; + +/* + * @brief: Structure to store the arguments needed by the transmit_pkts + * function. + * + * */ +struct TxArgs { + TxArgs(TxPipe *pipe, uint64_t pkts_in_buf, uint64_t total_pkts_to_send, + std::unique_ptr &dev_) + : tx_pipe(pipe), + pkts_in_pipe(pkts_in_buf), + total_remaining_pkts(total_pkts_to_send), + transmissions_pending(0), + ignored_reclaims(0), + dev(dev_) {} + // TxPipe associated with the thread + TxPipe *tx_pipe; + // Total number of packets in the pipe + uint64_t pkts_in_pipe; + // Total number of pakcets that need to be sent + uint64_t total_remaining_pkts; + // Total number of notifications created and sent by the application + uint32_t transmissions_pending; + // Used to track the number of times the thread did not check for notification + // consumption by the NIC + uint32_t ignored_reclaims; + // Pointer to the Enso device object + std::unique_ptr &dev; +}; + +/****************************************************************************** + * Function Definitions + *****************************************************************************/ +/* + * @brief: Signal handler for SIGINT (Ctrl+C). + * + * */ void int_handler(int signal __attribute__((unused))) { if (!keep_running) { + // user interrupted the second time, we force stop force_stop = 1; } + // user interrupted the first time, we signal the thread(s) to stop keep_running = 0; } +/* + * @brief: Prints the help message on stdout. + * + * */ static void print_usage(const char* program_name) { printf( "%s PCAP_FILE RATE_NUM RATE_DEN\n" @@ -128,7 +337,6 @@ static void print_usage(const char* program_name) { " [--rtt-hist-offset HIST_OFFSET]\n" " [--rtt-hist-len HIST_LEN]\n" " [--stats-delay STATS_DELAY]\n" - " [--pcie-addr PCIE_ADDR]\n\n" " PCAP_FILE: Pcap file with packets to transmit.\n" " RATE_NUM: Numerator of the rate used to transmit packets.\n" @@ -149,26 +357,15 @@ static void print_usage(const char* program_name) { " will still be saved, but there will be a\n" " performance penalty.\n" " --stats-delay: Delay between displayed stats in milliseconds\n" - " (default: %d).\n" - " --pcie-addr: Specify the PCIe address of the NIC to use.\n", + " (default: %d).\n", program_name, DEFAULT_CORE_ID, DEFAULT_NB_QUEUES, DEFAULT_HIST_OFFSET, DEFAULT_HIST_LEN, DEFAULT_STATS_DELAY); } -#define CMD_OPT_HELP "help" -#define CMD_OPT_COUNT "count" -#define CMD_OPT_CORE "core" -#define CMD_OPT_QUEUES "queues" -#define CMD_OPT_SAVE "save" -#define CMD_OPT_SINGLE_CORE "single-core" -#define CMD_OPT_RTT "rtt" -#define CMD_OPT_RTT_HIST "rtt-hist" -#define CMD_OPT_RTT_HIST_OFF "rtt-hist-offset" -#define CMD_OPT_RTT_HIST_LEN "rtt-hist-len" -#define CMD_OPT_STATS_DELAY "stats-delay" -#define CMD_OPT_PCIE_ADDR "pcie-addr" - -// Map long options to short options. +/* + * Command line options related. Used in parse_args function. + * + * */ enum { CMD_OPT_HELP_NUM = 256, CMD_OPT_COUNT_NUM, @@ -181,7 +378,6 @@ enum { CMD_OPT_RTT_HIST_OFF_NUM, CMD_OPT_RTT_HIST_LEN_NUM, CMD_OPT_STATS_DELAY_NUM, - CMD_OPT_PCIE_ADDR_NUM, }; static const char short_options[] = ""; @@ -198,28 +394,18 @@ static const struct option long_options[] = { {CMD_OPT_RTT_HIST_OFF, required_argument, NULL, CMD_OPT_RTT_HIST_OFF_NUM}, {CMD_OPT_RTT_HIST_LEN, required_argument, NULL, CMD_OPT_RTT_HIST_LEN_NUM}, {CMD_OPT_STATS_DELAY, required_argument, NULL, CMD_OPT_STATS_DELAY_NUM}, - {CMD_OPT_PCIE_ADDR, required_argument, NULL, CMD_OPT_PCIE_ADDR_NUM}, - {0, 0, 0, 0}}; - -struct parsed_args_t { - int core_id; - uint32_t nb_queues; - bool save; - bool single_core; - bool enable_rtt; - bool enable_rtt_history; - std::string hist_file; - std::string pcap_file; - std::string save_file; - uint16_t rate_num; - uint16_t rate_den; - uint64_t nb_pkts; - uint32_t rtt_hist_offset; - uint32_t rtt_hist_len; - uint32_t stats_delay; - std::string pcie_addr; + {0, 0, 0, 0} }; +/* + * @brief: Parses the command line arguments. Called from the main function. + * + * @param argc: Number of arguments entered by the user. + * @param argv: Value of the arguments entered by the user. + * @param parsed_args: Structure filled by this function after parsing the + * arguments and used in main(). + * + * */ static int parse_args(int argc, char** argv, struct parsed_args_t& parsed_args) { int opt; @@ -273,9 +459,6 @@ static int parse_args(int argc, char** argv, case CMD_OPT_STATS_DELAY_NUM: parsed_args.stats_delay = atoi(optarg); break; - case CMD_OPT_PCIE_ADDR_NUM: - parsed_args.pcie_addr = optarg; - break; default: return -1; } @@ -302,182 +485,16 @@ static int parse_args(int argc, char** argv, return 0; } -// Adapted from ixy. -static void* get_huge_page(size_t size) { - static int id = 0; - int fd; - char huge_pages_path[128]; - - snprintf(huge_pages_path, sizeof(huge_pages_path), "/mnt/huge/ensogen:%i", - id); - ++id; - - fd = open(huge_pages_path, O_CREAT | O_RDWR, S_IRWXU); - if (fd == -1) { - std::cerr << "(" << errno << ") Problem opening huge page file descriptor" - << std::endl; - return NULL; - } - - if (ftruncate(fd, (off_t)size)) { - std::cerr << "(" << errno - << ") Could not truncate huge page to size: " << size - << std::endl; - close(fd); - unlink(huge_pages_path); - return NULL; - } - - void* virt_addr = (void*)mmap(NULL, size, PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_HUGETLB, fd, 0); - - if (virt_addr == (void*)-1) { - std::cerr << "(" << errno << ") Could not mmap huge page" << std::endl; - close(fd); - unlink(huge_pages_path); - return NULL; - } - - if (mlock(virt_addr, size)) { - std::cerr << "(" << errno << ") Could not lock huge page" << std::endl; - munmap(virt_addr, size); - close(fd); - unlink(huge_pages_path); - return NULL; - } - - // Don't keep it around in the hugetlbfs. - close(fd); - unlink(huge_pages_path); - - return virt_addr; -} - -// Adapted from ixy. -static uint64_t virt_to_phys(void* virt) { - long pagesize = sysconf(_SC_PAGESIZE); - int fd = open("/proc/self/pagemap", O_RDONLY); - if (fd < 0) { - return 0; - } - // pagemap is an array of pointers for each normal-sized page - if (lseek(fd, (uintptr_t)virt / pagesize * sizeof(uintptr_t), SEEK_SET) < 0) { - close(fd); - return 0; - } - - uintptr_t phy = 0; - if (read(fd, &phy, sizeof(phy)) < 0) { - close(fd); - return 0; - } - close(fd); - - if (!phy) { - return 0; - } - // bits 0-54 are the page number - return (uint64_t)((phy & 0x7fffffffffffffULL) * pagesize + - ((uintptr_t)virt) % pagesize); -} - -struct EnsoPipe { - EnsoPipe(uint8_t* buf, uint32_t length, uint32_t good_bytes, uint32_t nb_pkts) - : buf(buf), length(length), good_bytes(good_bytes), nb_pkts(nb_pkts) { - phys_addr = virt_to_phys(buf); - } - uint8_t* buf; - uint32_t length; - uint32_t good_bytes; - uint32_t nb_pkts; - uint64_t phys_addr; -}; - -struct PcapHandlerContext { - std::vector enso_pipes; - uint32_t free_flits; - uint32_t hugepage_offset; - pcap_t* pcap; -}; - -struct RxStats { - explicit RxStats(uint32_t rtt_hist_len = 0, uint32_t rtt_hist_offset = 0) - : pkts(0), - bytes(0), - rtt_sum(0), - nb_batches(0), - rtt_hist_len(rtt_hist_len), - rtt_hist_offset(rtt_hist_offset) { - if (rtt_hist_len > 0) { - rtt_hist = new uint64_t[rtt_hist_len](); - } - } - ~RxStats() { - if (rtt_hist_len > 0) { - delete[] rtt_hist; - } - } - - RxStats(const RxStats& other) = delete; - RxStats(RxStats&& other) = default; - RxStats& operator=(const RxStats& other) = delete; - RxStats& operator=(RxStats&& other) = delete; - - inline void add_rtt_to_hist(const uint32_t rtt) { - // Insert RTTs into the rtt_hist array if they are in its range, - // otherwise use the backup_rtt_hist. - if (unlikely((rtt >= (rtt_hist_len - rtt_hist_offset)) || - (rtt < rtt_hist_offset))) { - backup_rtt_hist[rtt]++; - } else { - rtt_hist[rtt - rtt_hist_offset]++; - } - } - - uint64_t pkts; - uint64_t bytes; - uint64_t rtt_sum; - uint64_t nb_batches; - const uint32_t rtt_hist_len; - const uint32_t rtt_hist_offset; - uint64_t* rtt_hist; - std::unordered_map backup_rtt_hist; -}; - -struct RxArgs { - bool enable_rtt; - bool enable_rtt_history; - int socket_fd; -}; - -struct TxStats { - TxStats() : pkts(0), bytes(0) {} - uint64_t pkts; - uint64_t bytes; -}; - -struct TxArgs { - TxArgs(std::vector& enso_pipes, uint64_t total_bytes_to_send, - uint64_t total_good_bytes_to_send, uint64_t pkts_in_last_buffer, - int socket_fd) - : ignored_reclaims(0), - total_remaining_bytes(total_bytes_to_send), - total_remaining_good_bytes(total_good_bytes_to_send), - transmissions_pending(0), - pkts_in_last_buffer(pkts_in_last_buffer), - enso_pipes(enso_pipes), - current_enso_pipe(enso_pipes.begin()), - socket_fd(socket_fd) {} - uint64_t ignored_reclaims; - uint64_t total_remaining_bytes; - uint64_t total_remaining_good_bytes; - uint32_t transmissions_pending; - uint64_t pkts_in_last_buffer; - std::vector& enso_pipes; - std::vector::iterator current_enso_pipe; - int socket_fd; -}; - +/* + * @brief: libpcap callback registered by the main function. Called for each + * packet present in the PCAP file by libpcap. . We assume that the PCAP file + * provided by the user has `MAX_PKTS_IN_BUFFER` number of packets at max. + * + * @param user: Structure allocated in main to read and store relevant information. + * @param pkt_hdr: Contains packet metadata like timestamp, length, etc. (UNUSED) + * @param pkt_bytes: Packet data to be copied into a buffer. + * + * */ void pcap_pkt_handler(u_char* user, const struct pcap_pkthdr* pkt_hdr, const u_char* pkt_bytes) { (void)pkt_hdr; @@ -488,40 +505,32 @@ void pcap_pkt_handler(u_char* user, const struct pcap_pkthdr* pkt_hdr, std::cerr << "Non-IPv4 packets are not supported" << std::endl; exit(8); } - - uint32_t len = enso::get_pkt_len(pkt_bytes); - uint32_t nb_flits = (len - 1) / 64 + 1; - - if (nb_flits > context->free_flits) { - uint8_t* buf; - if ((context->hugepage_offset + BUFFER_SIZE) > HUGEPAGE_SIZE) { - // Need to allocate another huge page. - buf = (uint8_t*)get_huge_page(HUGEPAGE_SIZE); - if (buf == NULL) { - pcap_breakloop(context->pcap); - return; - } - context->hugepage_offset = BUFFER_SIZE; - } else { - struct EnsoPipe& enso_pipe = context->enso_pipes.back(); - buf = enso_pipe.buf + BUFFER_SIZE; - context->hugepage_offset += BUFFER_SIZE; - } - context->enso_pipes.emplace_back(buf, 0, 0, 0); - context->free_flits = BUFFER_SIZE / 64; + context->nb_pkts++; + if(context->nb_pkts > MAX_PKTS_IN_BUFFER) { + std::cerr << "Only " << MAX_PKTS_IN_BUFFER << " can be in the PCAP file" + << std::endl; + free(context->buf); + exit(9); } - struct EnsoPipe& enso_pipe = context->enso_pipes.back(); - uint8_t* dest = enso_pipe.buf + enso_pipe.length; - - memcpy(dest, pkt_bytes, len); - - enso_pipe.length += nb_flits * 64; // Packets must be cache aligned. - enso_pipe.good_bytes += len; - ++(enso_pipe.nb_pkts); - context->free_flits -= nb_flits; + uint32_t len = enso::get_pkt_len(pkt_bytes); + uint32_t nb_flits = (len - 1) / MIN_PACKET_ALIGNED_SIZE + 1; + memcpy(context->buf + context->nb_bytes, pkt_bytes, len); + context->nb_bytes += nb_flits * MIN_PACKET_ALIGNED_SIZE; + context->nb_good_bytes += len; } +/* + * @brief: This function is used to receive packets. The approach used in this + * function is slightly different from the one described in Enso's library for + * the RxPipe abstraction (Allocate->Bind->Recv->Clear). We use the NextRxPipeToRecv + * abstraction to take advantage of notification prefetching and use fallback + * queues. + * + * @param rx_args: Arguments needed by this function. See RxArgs definition. + * @param rx_stats: Rx stats that need to be updated in every iteration. + * + * */ inline uint64_t receive_pkts(const struct RxArgs& rx_args, struct RxStats& rx_stats) { uint64_t nb_pkts = 0; @@ -529,102 +538,97 @@ inline uint64_t receive_pkts(const struct RxArgs& rx_args, (void)rx_args; (void)rx_stats; #else // IGNORE_RX - uint8_t* recv_buf; - int socket_fd; - int recv_len = enso::recv_select(rx_args.socket_fd, &socket_fd, - (void**)&recv_buf, RECV_BUF_LEN, 0); - - if (unlikely(recv_len < 0)) { - std::cerr << "Error receiving" << std::endl; - exit(7); + RxPipe* rx_pipe = rx_args.dev->NextRxPipeToRecv(); + if (unlikely(rx_pipe == nullptr)) { + return 0; } + auto batch = rx_pipe->PeekPkts(); + uint64_t recv_bytes = 0; + for (auto pkt : batch) { + uint16_t pkt_len = enso::get_pkt_len(pkt); - if (likely(recv_len > 0)) { - int processed_bytes = 0; - uint64_t recv_bytes = 0; - uint8_t* pkt = recv_buf; + if (rx_args.enable_rtt) { + uint32_t rtt = enso::get_pkt_rtt(pkt); + rx_stats.rtt_sum += rtt; - while (processed_bytes < recv_len) { - uint16_t pkt_len = enso::get_pkt_len(pkt); - uint16_t nb_flits = (pkt_len - 1) / 64 + 1; - uint16_t pkt_aligned_len = nb_flits * 64; - - if (rx_args.enable_rtt) { - uint32_t rtt = enso::get_pkt_rtt(pkt); - rx_stats.rtt_sum += rtt; - - if (rx_args.enable_rtt_history) { - rx_stats.add_rtt_to_hist(rtt); - } + if (rx_args.enable_rtt_history) { + rx_stats.add_rtt_to_hist(rtt); } - - pkt += pkt_aligned_len; - processed_bytes += pkt_aligned_len; - recv_bytes += pkt_len; - ++nb_pkts; } - rx_stats.pkts += nb_pkts; - ++(rx_stats.nb_batches); - rx_stats.bytes += recv_bytes; - enso::free_enso_pipe(socket_fd, recv_len); + recv_bytes += pkt_len; + ++nb_pkts; } -#endif // IGNORE_RX - return nb_pkts; -} - -inline void transmit_pkts(struct TxArgs& tx_args, struct TxStats& tx_stats) { - // Avoid transmitting new data when the TX buffer is full. - const uint32_t buf_fill_thresh = - enso::kNotificationBufSize - TRANSFERS_PER_BUFFER - 1; - - if (likely(tx_args.transmissions_pending < buf_fill_thresh)) { - uint32_t transmission_length = (uint32_t)std::min( - (uint64_t)(BUFFER_SIZE), tx_args.total_remaining_bytes); - transmission_length = - std::min(transmission_length, tx_args.current_enso_pipe->length); - - uint32_t good_transmission_length = - (uint32_t)std::min(tx_args.total_remaining_good_bytes, - (uint64_t)tx_args.current_enso_pipe->good_bytes); - uint64_t phys_addr = tx_args.current_enso_pipe->phys_addr; + uint32_t batch_length = batch.processed_bytes(); + rx_pipe->ConfirmBytes(batch_length); - enso::send(tx_args.socket_fd, phys_addr, transmission_length, 0); - tx_stats.bytes += good_transmission_length; - ++tx_args.transmissions_pending; + rx_stats.pkts += nb_pkts; + ++(rx_stats.nb_batches); + rx_stats.bytes += recv_bytes; - tx_args.total_remaining_bytes -= transmission_length; - tx_args.total_remaining_good_bytes -= good_transmission_length; + rx_pipe->Clear(); - if (unlikely(tx_args.total_remaining_bytes == 0)) { - tx_stats.pkts += tx_args.pkts_in_last_buffer; - keep_running = 0; - return; - } +#endif // IGNORE_RX + return nb_pkts; +} - // Move to next packet buffer. - tx_stats.pkts += tx_args.current_enso_pipe->nb_pkts; - tx_args.current_enso_pipe = std::next(tx_args.current_enso_pipe); - if (tx_args.current_enso_pipe == tx_args.enso_pipes.end()) { - tx_args.current_enso_pipe = tx_args.enso_pipes.begin(); - } +/* + * @brief: This function is called to send packets. Note that the approach we + * use here to send packets is different from the one defined in Enso's library + * using the TxPipe abstraction. This approach dissociates the sending part + * (creating TX notifications) from processing the completions (which TX notif- + * ications have been consumed by the NIC). It needed to be done this way to meet + * the performance requirements (full 100 G) for single core. + * + * @param tx_args: Arguments needed by this function. See TxArgs definition. + * @param tx_stats: Tx stats that need to be updated in every iteration. + * + * */ +inline void transmit_pkts(struct TxArgs& tx_args, + struct TxStats& tx_stats) { + // decide whether we need to send an entire buffer worth of packets + // or less than that based on user request + uint32_t nb_pkts_to_send = std::min(tx_args.pkts_in_pipe, + tx_args.total_remaining_pkts); + // the packets are copied in the main buffer based on the minimum packet size + uint32_t transmission_length = nb_pkts_to_send * MIN_PACKET_ALIGNED_SIZE; + + // send the packets + uint64_t buf_phys_addr = tx_args.tx_pipe->GetBufPhysAddr(); + tx_args.dev->SendOnly(buf_phys_addr, transmission_length); + + // update the stats + // the stats need be calculated based on good bytes + // rather than the transmission length + tx_stats.pkts += nb_pkts_to_send; + tx_stats.bytes += nb_pkts_to_send * MIN_PACKET_RAW_SIZE; + tx_args.total_remaining_pkts -= nb_pkts_to_send; + if(tx_args.total_remaining_pkts == 0) { + keep_running = 0; + return; } // Reclaim TX notification buffer space. if ((tx_args.transmissions_pending > (enso::kNotificationBufSize / 4))) { if (tx_args.ignored_reclaims > TX_RECLAIM_DELAY) { tx_args.ignored_reclaims = 0; - tx_args.transmissions_pending -= enso::get_completions(tx_args.socket_fd); + tx_args.transmissions_pending -= tx_args.dev->ProcessCompletionsOnly(); } else { ++tx_args.ignored_reclaims; } } } +/* + * @brief: Waits until the NIC has consumed all the Tx notifications. + * + * @param tx_args: Arguments needed by this function. See TxArgs definition. + * + * */ inline void reclaim_all_buffers(struct TxArgs& tx_args) { while (tx_args.transmissions_pending) { - tx_args.transmissions_pending -= enso::get_completions(tx_args.socket_fd); + tx_args.transmissions_pending -= tx_args.dev->ProcessCompletionsOnly(); } } @@ -639,21 +643,6 @@ int main(int argc, char** argv) { return 1; } - // Parse the PCI address in format 0000:00:00.0 or 00:00.0. - if (parsed_args.pcie_addr != "") { - uint32_t domain, bus, dev, func; - if (sscanf(parsed_args.pcie_addr.c_str(), "%x:%x:%x.%x", &domain, &bus, - &dev, &func) != 4) { - if (sscanf(parsed_args.pcie_addr.c_str(), "%x:%x.%x", &bus, &dev, - &func) != 3) { - std::cerr << "Invalid PCI address" << std::endl; - return 1; - } - } - uint16_t bdf = (bus << 8) | (dev << 3) | (func & 0x7); - enso::set_bdf(bdf); - } - char errbuf[PCAP_ERRBUF_SIZE]; pcap_t* pcap = pcap_open_offline(parsed_args.pcap_file.c_str(), errbuf); @@ -662,11 +651,19 @@ int main(int argc, char** argv) { return 2; } + // we copy the packets in this buffer using libpcap + uint8_t *pkt_buf = (uint8_t *) malloc(BUFFER_SIZE); + if(pkt_buf == NULL) { + std::cerr << "Could not allocate packet buffer" << std::endl; + exit(1); + } + struct PcapHandlerContext context; - context.free_flits = 0; - context.hugepage_offset = HUGEPAGE_SIZE; context.pcap = pcap; - std::vector& enso_pipes = context.enso_pipes; + context.buf = pkt_buf; + context.nb_bytes = 0; + context.nb_good_bytes = 0; + context.nb_pkts = 0; // Initialize packet buffers with packets read from pcap file. if (pcap_loop(pcap, 0, pcap_pkt_handler, (u_char*)&context) < 0) { @@ -677,70 +674,26 @@ int main(int argc, char** argv) { // For small pcaps we copy the same packets over the remaining of the // buffer. This reduces the number of transfers that we need to issue. - if ((enso_pipes.size() == 1) && - (enso_pipes.front().length < BUFFER_SIZE / 2)) { - EnsoPipe& buffer = enso_pipes.front(); - uint32_t original_buf_length = buffer.length; - uint32_t original_good_bytes = buffer.good_bytes; - uint32_t original_nb_pkts = buffer.nb_pkts; - while ((buffer.length + original_buf_length) <= BUFFER_SIZE) { - memcpy(buffer.buf + buffer.length, buffer.buf, original_buf_length); - buffer.length += original_buf_length; - buffer.good_bytes += original_good_bytes; - buffer.nb_pkts += original_nb_pkts; + if (context.nb_bytes < BUFFER_SIZE) { + uint32_t original_buf_length = context.nb_bytes; + uint32_t original_nb_pkts = context.nb_pkts; + uint32_t original_good_bytes = context.nb_good_bytes; + while ((context.nb_bytes + original_buf_length) <= BUFFER_SIZE) { + memcpy(pkt_buf + context.nb_bytes, pkt_buf, original_buf_length); + context.nb_bytes += original_buf_length; + context.nb_pkts += original_nb_pkts; + context.nb_good_bytes += original_good_bytes; } } - uint64_t total_pkts_in_buffers = 0; - uint64_t total_bytes_in_buffers = 0; - uint64_t total_good_bytes_in_buffers = 0; - for (auto& buffer : enso_pipes) { - total_pkts_in_buffers += buffer.nb_pkts; - total_bytes_in_buffers += buffer.length; - total_good_bytes_in_buffers += buffer.good_bytes; - } - - // To restrict the number of packets, we track the total number of bytes. - // This avoids the need to look at every sent packet only to figure out the - // number bytes to send in the very last buffer. But to be able to do this, - // we need to compute the total number of bytes that we have to send. - uint64_t total_bytes_to_send; - uint64_t total_good_bytes_to_send; - uint64_t pkts_in_last_buffer = 0; + uint64_t total_pkts_in_buffer = context.nb_pkts; + uint64_t total_pkts_to_send; if (parsed_args.nb_pkts > 0) { - uint64_t nb_pkts_remaining = parsed_args.nb_pkts % total_pkts_in_buffers; - uint64_t nb_full_iters = parsed_args.nb_pkts / total_pkts_in_buffers; - - total_bytes_to_send = nb_full_iters * total_bytes_in_buffers; - total_good_bytes_to_send = nb_full_iters * total_good_bytes_in_buffers; - - if (nb_pkts_remaining == 0) { - pkts_in_last_buffer = enso_pipes.back().nb_pkts; - } - - for (auto& buffer : enso_pipes) { - if (nb_pkts_remaining < buffer.nb_pkts) { - uint8_t* pkt = buffer.buf; - while (nb_pkts_remaining > 0) { - uint16_t pkt_len = enso::get_pkt_len(pkt); - uint16_t nb_flits = (pkt_len - 1) / 64 + 1; - - total_bytes_to_send += nb_flits * 64; - --nb_pkts_remaining; - ++pkts_in_last_buffer; - - pkt = enso::get_next_pkt(pkt); - } - break; - } - total_bytes_to_send += buffer.length; - nb_pkts_remaining -= buffer.nb_pkts; - } + total_pkts_to_send = parsed_args.nb_pkts; } else { // Treat nb_pkts == 0 as unbounded. The following value should be enough // to send 64-byte packets for around 400 years using Tb Ethernet. - total_bytes_to_send = 0xffffffffffffffff; - total_good_bytes_to_send = 0xffffffffffffffff; + total_pkts_to_send = 0xffffffffffffffff; } uint32_t rtt_hist_len = 0; @@ -758,41 +711,44 @@ int main(int argc, char** argv) { std::vector threads; + std::unique_ptr dev = Device::Create(); + if (!dev) { + std::cerr << "Problem creating device" << std::endl; + free(pkt_buf); + exit(2); + } + // When using single_core we use the same thread for RX and TX, otherwise we // launch separate threads for RX and TX. if (!parsed_args.single_core) { - std::thread rx_thread = std::thread([&parsed_args, &rx_stats] { + std::thread rx_thread = std::thread([&parsed_args, &rx_stats, &dev] { std::this_thread::sleep_for(std::chrono::milliseconds(500)); - std::vector socket_fds; + std::vector rx_pipes; - int socket_fd = 0; for (uint32_t i = 0; i < parsed_args.nb_queues; ++i) { - socket_fd = enso::socket(AF_INET, SOCK_DGRAM, 0, true); - - if (socket_fd == -1) { - std::cerr << "Problem creating socket (" << errno - << "): " << strerror(errno) << std::endl; - exit(2); + // we create fallback queues by passing true in AllocateRxPipe + RxPipe* rx_pipe = dev->AllocateRxPipe(true); + if (!rx_pipe) { + std::cerr << "Problem creating RX pipe" << std::endl; + exit(3); } - - socket_fds.push_back(socket_fd); + rx_pipes.push_back(rx_pipe); } - enso::enable_device_rate_limit(socket_fd, parsed_args.rate_num, - parsed_args.rate_den); - enso::enable_device_round_robin(socket_fd); + dev->EnableRateLimiting(parsed_args.rate_num, parsed_args.rate_den); + dev->EnableRoundRobin(); if (parsed_args.enable_rtt) { - enso::enable_device_timestamp(socket_fd); - } else { - enso::disable_device_timestamp(socket_fd); + dev->EnableTimeStamping(); + } + else { + dev->DisableTimeStamping(); } - RxArgs rx_args; - rx_args.enable_rtt = parsed_args.enable_rtt; - rx_args.enable_rtt_history = parsed_args.enable_rtt_history; - rx_args.socket_fd = socket_fd; + RxArgs rx_args(parsed_args.enable_rtt, + parsed_args.enable_rtt_history, + dev); std::cout << "Running RX on core " << sched_getcpu() << std::endl; @@ -816,38 +772,40 @@ int main(int argc, char** argv) { rx_done = true; - enso::disable_device_rate_limit(socket_fd); - enso::disable_device_round_robin(socket_fd); + dev->DisableRateLimiting(); + dev->DisableRoundRobin(); if (parsed_args.enable_rtt) { - enso::disable_device_timestamp(socket_fd); + dev->DisableTimeStamping(); } - for (auto& s : socket_fds) { - enso::shutdown(s, SHUT_RDWR); - } }); std::thread tx_thread = std::thread( - [total_bytes_to_send, total_good_bytes_to_send, pkts_in_last_buffer, - &parsed_args, &enso_pipes, &tx_stats] { + [pkt_buf, total_pkts_in_buffer, total_pkts_to_send, + &parsed_args, &tx_stats, &dev] { std::this_thread::sleep_for(std::chrono::seconds(1)); - int socket_fd = enso::socket(AF_INET, SOCK_DGRAM, 0, false); - - if (socket_fd == -1) { - std::cerr << "Problem creating socket (" << errno - << "): " << strerror(errno) << std::endl; - exit(2); + TxPipe* tx_pipe = dev->AllocateTxPipe(); + if (!tx_pipe) { + std::cerr << "Problem creating TX pipe" << std::endl; + exit(3); + } + // allocate the bytes in the TX pipe and copy the required + // number of bytes from the main buffer + uint32_t pipe_alloc_len = total_pkts_in_buffer * MIN_PACKET_ALIGNED_SIZE; + uint8_t* pipe_buf = tx_pipe->AllocateBuf(pipe_alloc_len); + if(pipe_buf == NULL) { + std::cout << "Buffer allocation for TX pipe failed" << std::endl; + return; } + memcpy(pipe_buf, pkt_buf, pipe_alloc_len); while (!rx_ready) continue; std::cout << "Running TX on core " << sched_getcpu() << std::endl; - TxArgs tx_args(enso_pipes, total_bytes_to_send, - total_good_bytes_to_send, pkts_in_last_buffer, - socket_fd); + TxArgs tx_args(tx_pipe, total_pkts_in_buffer, total_pkts_to_send, dev); while (keep_running) { transmit_pkts(tx_args, tx_stats); @@ -858,6 +816,7 @@ int main(int argc, char** argv) { while (!rx_done) continue; reclaim_all_buffers(tx_args); + }); cpu_set_t cpuset; @@ -885,44 +844,56 @@ int main(int argc, char** argv) { } else { // Send and receive packets within the same thread. std::thread rx_tx_thread = std::thread( - [&parsed_args, &rx_stats, total_bytes_to_send, total_good_bytes_to_send, - pkts_in_last_buffer, &enso_pipes, &tx_stats] { + [pkt_buf, total_pkts_in_buffer, total_pkts_to_send, + &parsed_args, &tx_stats, &rx_stats, &dev] { std::this_thread::sleep_for(std::chrono::milliseconds(500)); - std::vector socket_fds; + std::vector rx_pipes; - int socket_fd = 0; for (uint32_t i = 0; i < parsed_args.nb_queues; ++i) { - socket_fd = enso::socket(AF_INET, SOCK_DGRAM, 0, true); - - if (socket_fd == -1) { - std::cerr << "Problem creating socket (" << errno - << "): " << strerror(errno) << std::endl; - exit(2); + // we create fallback queues by passing true in AllocateRxPipe + RxPipe* rx_pipe = dev->AllocateRxPipe(true); + if (!rx_pipe) { + std::cerr << "Problem creating RX pipe" << std::endl; + exit(3); } - - socket_fds.push_back(socket_fd); + rx_pipes.push_back(rx_pipe); } - enso::enable_device_rate_limit(socket_fd, parsed_args.rate_num, - parsed_args.rate_den); - enso::enable_device_round_robin(socket_fd); + dev->EnableRateLimiting(parsed_args.rate_num, parsed_args.rate_den); + dev->EnableRoundRobin(); if (parsed_args.enable_rtt) { - enso::enable_device_timestamp(socket_fd); + dev->EnableTimeStamping(); + } + else { + dev->DisableTimeStamping(); } std::cout << "Running RX and TX on core " << sched_getcpu() << std::endl; - RxArgs rx_args; - rx_args.enable_rtt = parsed_args.enable_rtt; - rx_args.enable_rtt_history = parsed_args.enable_rtt_history; - rx_args.socket_fd = socket_fd; + RxArgs rx_args(parsed_args.enable_rtt, + parsed_args.enable_rtt_history, + dev); + + TxPipe* tx_pipe = dev->AllocateTxPipe(); + if (!tx_pipe) { + std::cerr << "Problem creating TX pipe" << std::endl; + exit(3); + } + + // allocate the bytes in the TX pipe and copy the required + // number of bytes from the main buffer + uint32_t pipe_alloc_len = total_pkts_in_buffer * MIN_PACKET_ALIGNED_SIZE; + uint8_t* pipe_buf = tx_pipe->AllocateBuf(pipe_alloc_len); + if(pipe_buf == NULL) { + std::cout << "Buffer allocation for TX pipe failed" << std::endl; + return; + } + memcpy(pipe_buf, pkt_buf, pipe_alloc_len); - TxArgs tx_args(enso_pipes, total_bytes_to_send, - total_good_bytes_to_send, pkts_in_last_buffer, - socket_fd); + TxArgs tx_args(tx_pipe, total_pkts_in_buffer, total_pkts_to_send, dev); rx_ready = 1; @@ -949,16 +920,13 @@ int main(int argc, char** argv) { reclaim_all_buffers(tx_args); - enso::disable_device_rate_limit(socket_fd); - enso::disable_device_round_robin(socket_fd); + dev->DisableRateLimiting(); + dev->DisableRoundRobin(); if (parsed_args.enable_rtt) { - enso::disable_device_timestamp(socket_fd); + dev->DisableTimeStamping(); } - for (auto& s : socket_fds) { - enso::shutdown(s, SHUT_RDWR); - } }); cpu_set_t cpuset; @@ -1010,22 +978,24 @@ int main(int argc, char** argv) { uint64_t tx_bytes = tx_stats.bytes; uint64_t tx_pkts = tx_stats.pkts; - double interval_s = parsed_args.stats_delay / 1000.; + double interval_s = (double) parsed_args.stats_delay / ONE_THOUSAND; uint64_t rx_pkt_diff = rx_pkts - last_rx_pkts; uint64_t rx_goodput_mbps = - (rx_bytes - last_rx_bytes) * 8. / (1e6 * interval_s); + (rx_bytes - last_rx_bytes) * 8. / (ONE_MILLION * interval_s); uint64_t rx_pkt_rate = (rx_pkt_diff / interval_s); - uint64_t rx_pkt_rate_kpps = rx_pkt_rate / 1e3; - uint64_t rx_tput_mbps = rx_goodput_mbps + 24 * 8 * rx_pkt_rate / 1e6; + uint64_t rx_pkt_rate_kpps = rx_pkt_rate / ONE_THOUSAND; + uint64_t rx_tput_mbps = rx_goodput_mbps + FPGA_PACKET_OVERHEAD + * 8 * rx_pkt_rate / ONE_MILLION; uint64_t tx_pkt_diff = tx_pkts - last_tx_pkts; uint64_t tx_goodput_mbps = - (tx_bytes - last_tx_bytes) * 8. / (1e6 * interval_s); + (tx_bytes - last_tx_bytes) * 8. / (ONE_MILLION * interval_s); uint64_t tx_tput_mbps = - (tx_bytes - last_tx_bytes + tx_pkt_diff * 24) * 8. / (1e6 * interval_s); + (tx_bytes - last_tx_bytes + tx_pkt_diff * FPGA_PACKET_OVERHEAD) * 8. + / (ONE_MILLION * interval_s); uint64_t tx_pkt_rate = (tx_pkt_diff / interval_s); - uint64_t tx_pkt_rate_kpps = tx_pkt_rate / 1e3; + uint64_t tx_pkt_rate_kpps = tx_pkt_rate / ONE_THOUSAND; uint64_t rtt_sum_ns = rx_stats.rtt_sum * enso::kNsPerTimestampCycle; uint64_t rtt_ns; @@ -1035,9 +1005,6 @@ int main(int argc, char** argv) { rtt_ns = 0; } - // TODO(sadok): don't print metrics that are unreliable before the first - // two samples. - std::cout << std::dec << " RX: Throughput: " << rx_tput_mbps << " Mbps" << " Rate: " << rx_pkt_rate_kpps << " kpps" << std::endl @@ -1113,12 +1080,6 @@ int main(int argc, char** argv) { thread.join(); } - for (auto& buffer : enso_pipes) { - // Only free hugepage-aligned buffers. - if ((buffer.phys_addr & (HUGEPAGE_SIZE - 1)) == 0) { - munmap(buffer.buf, HUGEPAGE_SIZE); - } - } - + free(pkt_buf); return ret; } diff --git a/software/examples/ensogen_new.cpp b/software/examples/ensogen_new.cpp deleted file mode 100644 index e98ec789..00000000 --- a/software/examples/ensogen_new.cpp +++ /dev/null @@ -1,915 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -// Number of loop iterations to wait before probing the TX notification buffer -// again when reclaiming buffer space. -#define TX_RECLAIM_DELAY 1024 - -// Scientific notation for 10^6, treated as double. Used for stats calculations. -#define ONE_MILLION 1e6 - -// Scientific notation for 10^3, treated as double. Used for stats calculations. -#define ONE_THOUSAND 1e3 - -// Packet overhead added by the FPGA in bytes -#define FPGA_PACKET_OVERHEAD 24 - -// Minimum size of a packet aligned to cache -#define MIN_PACKET_ALIGNED_SIZE 64 - -// Minimum size of a raw packet -#define MIN_PACKET_RAW_SIZE 60 - -// If defined, ignore received packets. -// #define IGNORE_RX - -// When we are done transmitting. The RX thread still tries to receive all -// packets. The following defines the maximum number of times that we can try to -// receive packets in a row while getting no packet back. Once this happens we -// assume that we are no longer receiving packets and can stop trying. -#define ITER_NO_PKT_THRESH (1 << 28) - -// Default core ID to run. -#define DEFAULT_CORE_ID 0 - -// Default number of queues to use. -#define DEFAULT_NB_QUEUES 4 - -// Default histogram array offset. -#define DEFAULT_HIST_OFFSET 400 - -// Default histogram array length. -#define DEFAULT_HIST_LEN 1000000 - -// Default delay between displayed stats (in milliseconds). -#define DEFAULT_STATS_DELAY 1000 - -// Number of CLI arguments. -#define NB_CLI_ARGS 3 - -// Maximum number of bytes that we can receive at once. -#define RECV_BUF_LEN 10000000 - -// Huge page size that we are using (in bytes). -#define HUGEPAGE_SIZE (2UL << 20) - -// Size of the buffer that we keep packets in. -#define BUFFER_SIZE enso::kMaxTransferLen - -// Num of min sized packets that would fit in a BUFFER_SIZE bytes buffer -#define MAX_PKTS_IN_BUFFER 2048 - -// Number of transfers required to send a buffer full of packets. -#define TRANSFERS_PER_BUFFER (((BUFFER_SIZE - 1) / enso::kMaxTransferLen) + 1) - -static volatile int keep_running = 1; -static volatile int force_stop = 0; -static volatile int rx_ready = 0; -static volatile int rx_done = 0; -static volatile int tx_done = 0; - -using enso::Device; -using enso::RxPipe; -using enso::TxPipe; - -void int_handler(int signal __attribute__((unused))) { - if (!keep_running) { - force_stop = 1; - } - keep_running = 0; -} - -static void print_usage(const char* program_name) { - printf( - "%s PCAP_FILE RATE_NUM RATE_DEN\n" - " [--help]\n" - " [--count NB_PKTS]\n" - " [--core CORE_ID]\n" - " [--queues NB_QUEUES]\n" - " [--save SAVE_FILE]\n" - " [--single-core]\n" - " [--rtt]\n" - " [--rtt-hist HIST_FILE]\n" - " [--rtt-hist-offset HIST_OFFSET]\n" - " [--rtt-hist-len HIST_LEN]\n" - " [--stats-delay STATS_DELAY]\n" - " [--pcie-addr PCIE_ADDR]\n\n" - - " PCAP_FILE: Pcap file with packets to transmit.\n" - " RATE_NUM: Numerator of the rate used to transmit packets.\n" - " RATE_DEN: Denominator of the rate used to transmit packets.\n\n" - - " --help: Show this help and exit.\n" - " --count: Specify number of packets to transmit.\n" - " --core: Specify CORE_ID to run on (default: %d).\n" - " --queues: Specify number of RX queues (default: %d).\n" - " --save: Save RX and TX stats to SAVE_FILE.\n" - " --single-core: Use the same core for receiving and transmitting.\n" - " --rtt: Enable packet timestamping and report average RTT.\n" - " --rtt-hist: Save RTT histogram to HIST_FILE (implies --rtt).\n" - " --rtt-hist-offset: Offset to be used when saving the histogram\n" - " (default: %d).\n" - " --rtt-hist-len: Size of the histogram array (default: %d).\n" - " If an RTT is outside the RTT hist array range, it\n" - " will still be saved, but there will be a\n" - " performance penalty.\n" - " --stats-delay: Delay between displayed stats in milliseconds\n" - " (default: %d).\n" - " --pcie-addr: Specify the PCIe address of the NIC to use.\n", - program_name, DEFAULT_CORE_ID, DEFAULT_NB_QUEUES, DEFAULT_HIST_OFFSET, - DEFAULT_HIST_LEN, DEFAULT_STATS_DELAY); -} - -#define CMD_OPT_HELP "help" -#define CMD_OPT_COUNT "count" -#define CMD_OPT_CORE "core" -#define CMD_OPT_QUEUES "queues" -#define CMD_OPT_SAVE "save" -#define CMD_OPT_SINGLE_CORE "single-core" -#define CMD_OPT_RTT "rtt" -#define CMD_OPT_RTT_HIST "rtt-hist" -#define CMD_OPT_RTT_HIST_OFF "rtt-hist-offset" -#define CMD_OPT_RTT_HIST_LEN "rtt-hist-len" -#define CMD_OPT_STATS_DELAY "stats-delay" -#define CMD_OPT_PCIE_ADDR "pcie-addr" - -// Map long options to short options. -enum { - CMD_OPT_HELP_NUM = 256, - CMD_OPT_COUNT_NUM, - CMD_OPT_CORE_NUM, - CMD_OPT_QUEUES_NUM, - CMD_OPT_SAVE_NUM, - CMD_OPT_SINGLE_CORE_NUM, - CMD_OPT_RTT_NUM, - CMD_OPT_RTT_HIST_NUM, - CMD_OPT_RTT_HIST_OFF_NUM, - CMD_OPT_RTT_HIST_LEN_NUM, - CMD_OPT_STATS_DELAY_NUM, - CMD_OPT_PCIE_ADDR_NUM, -}; - -static const char short_options[] = ""; - -static const struct option long_options[] = { - {CMD_OPT_HELP, no_argument, NULL, CMD_OPT_HELP_NUM}, - {CMD_OPT_COUNT, required_argument, NULL, CMD_OPT_COUNT_NUM}, - {CMD_OPT_CORE, required_argument, NULL, CMD_OPT_CORE_NUM}, - {CMD_OPT_QUEUES, required_argument, NULL, CMD_OPT_QUEUES_NUM}, - {CMD_OPT_SAVE, required_argument, NULL, CMD_OPT_SAVE_NUM}, - {CMD_OPT_SINGLE_CORE, no_argument, NULL, CMD_OPT_SINGLE_CORE_NUM}, - {CMD_OPT_RTT, no_argument, NULL, CMD_OPT_RTT_NUM}, - {CMD_OPT_RTT_HIST, required_argument, NULL, CMD_OPT_RTT_HIST_NUM}, - {CMD_OPT_RTT_HIST_OFF, required_argument, NULL, CMD_OPT_RTT_HIST_OFF_NUM}, - {CMD_OPT_RTT_HIST_LEN, required_argument, NULL, CMD_OPT_RTT_HIST_LEN_NUM}, - {CMD_OPT_STATS_DELAY, required_argument, NULL, CMD_OPT_STATS_DELAY_NUM}, - {CMD_OPT_PCIE_ADDR, required_argument, NULL, CMD_OPT_PCIE_ADDR_NUM}, - {0, 0, 0, 0}}; - -struct parsed_args_t { - int core_id; - uint32_t nb_queues; - bool save; - bool single_core; - bool enable_rtt; - bool enable_rtt_history; - std::string hist_file; - std::string pcap_file; - std::string save_file; - uint16_t rate_num; - uint16_t rate_den; - uint64_t nb_pkts; - uint32_t rtt_hist_offset; - uint32_t rtt_hist_len; - uint32_t stats_delay; - std::string pcie_addr; -}; - -static int parse_args(int argc, char** argv, - struct parsed_args_t& parsed_args) { - int opt; - int long_index; - - parsed_args.nb_pkts = 0; - parsed_args.core_id = DEFAULT_CORE_ID; - parsed_args.nb_queues = DEFAULT_NB_QUEUES; - parsed_args.save = false; - parsed_args.single_core = false; - parsed_args.enable_rtt = false; - parsed_args.enable_rtt_history = false; - parsed_args.rtt_hist_offset = DEFAULT_HIST_OFFSET; - parsed_args.rtt_hist_len = DEFAULT_HIST_LEN; - parsed_args.stats_delay = DEFAULT_STATS_DELAY; - - while ((opt = getopt_long(argc, argv, short_options, long_options, - &long_index)) != EOF) { - switch (opt) { - case CMD_OPT_HELP_NUM: - return 1; - case CMD_OPT_COUNT_NUM: - parsed_args.nb_pkts = atoi(optarg); - break; - case CMD_OPT_CORE_NUM: - parsed_args.core_id = atoi(optarg); - break; - case CMD_OPT_QUEUES_NUM: - parsed_args.nb_queues = atoi(optarg); - break; - case CMD_OPT_SAVE_NUM: - parsed_args.save = true; - parsed_args.save_file = optarg; - break; - case CMD_OPT_SINGLE_CORE_NUM: - parsed_args.single_core = true; - break; - case CMD_OPT_RTT_HIST_NUM: - parsed_args.enable_rtt_history = true; - parsed_args.hist_file = optarg; - // fall through - case CMD_OPT_RTT_NUM: - parsed_args.enable_rtt = true; - break; - case CMD_OPT_RTT_HIST_OFF_NUM: - parsed_args.rtt_hist_offset = atoi(optarg); - break; - case CMD_OPT_RTT_HIST_LEN_NUM: - parsed_args.rtt_hist_len = atoi(optarg); - break; - case CMD_OPT_STATS_DELAY_NUM: - parsed_args.stats_delay = atoi(optarg); - break; - case CMD_OPT_PCIE_ADDR_NUM: - parsed_args.pcie_addr = optarg; - break; - default: - return -1; - } - } - - if ((argc - optind) != NB_CLI_ARGS) { - return -1; - } - - parsed_args.pcap_file = argv[optind++]; - parsed_args.rate_num = atoi(argv[optind++]); - parsed_args.rate_den = atoi(argv[optind++]); - - if (parsed_args.rate_num == 0) { - std::cerr << "Rate must be greater than 0" << std::endl; - return -1; - } - - if (parsed_args.rate_den == 0) { - std::cerr << "Rate denominator must be greater than 0" << std::endl; - return -1; - } - - return 0; -} - -struct PcapHandlerContext { - uint8_t *buf; - uint32_t nb_bytes; - uint32_t nb_good_bytes; - uint32_t nb_pkts; - pcap_t* pcap; -}; - -struct RxStats { - explicit RxStats(uint32_t rtt_hist_len = 0, uint32_t rtt_hist_offset = 0) - : pkts(0), - bytes(0), - rtt_sum(0), - nb_batches(0), - rtt_hist_len(rtt_hist_len), - rtt_hist_offset(rtt_hist_offset) { - if (rtt_hist_len > 0) { - rtt_hist = new uint64_t[rtt_hist_len](); - } - } - ~RxStats() { - if (rtt_hist_len > 0) { - delete[] rtt_hist; - } - } - - RxStats(const RxStats& other) = delete; - RxStats(RxStats&& other) = default; - RxStats& operator=(const RxStats& other) = delete; - RxStats& operator=(RxStats&& other) = delete; - - inline void add_rtt_to_hist(const uint32_t rtt) { - // Insert RTTs into the rtt_hist array if they are in its range, - // otherwise use the backup_rtt_hist. - if (unlikely((rtt >= (rtt_hist_len - rtt_hist_offset)) || - (rtt < rtt_hist_offset))) { - backup_rtt_hist[rtt]++; - } else { - rtt_hist[rtt - rtt_hist_offset]++; - } - } - - uint64_t pkts; - uint64_t bytes; - uint64_t rtt_sum; - uint64_t nb_batches; - const uint32_t rtt_hist_len; - const uint32_t rtt_hist_offset; - uint64_t* rtt_hist; - std::unordered_map backup_rtt_hist; -}; - -struct RxArgs { - bool enable_rtt; - bool enable_rtt_history; -}; - -struct TxStats { - TxStats() : pkts(0), bytes(0) {} - uint64_t pkts; - uint64_t bytes; -}; - -struct TxArgs { - TxArgs(TxPipe *pipe, uint8_t *buf, uint64_t pkts_in_buf, - uint64_t total_pkts_to_send) - : tx_pipe(pipe), - main_buf(buf), - pkts_in_main_buf(pkts_in_buf), - total_remaining_pkts(total_pkts_to_send) {} - TxPipe *tx_pipe; - uint8_t *main_buf; - uint64_t pkts_in_main_buf; - uint64_t total_remaining_pkts; - uint32_t transmissions_pending; -}; - -void pcap_pkt_handler(u_char* user, const struct pcap_pkthdr* pkt_hdr, - const u_char* pkt_bytes) { - (void)pkt_hdr; - struct PcapHandlerContext* context = (struct PcapHandlerContext*)user; - - const struct ether_header* l2_hdr = (struct ether_header*)pkt_bytes; - if (l2_hdr->ether_type != htons(ETHERTYPE_IP)) { - std::cerr << "Non-IPv4 packets are not supported" << std::endl; - exit(8); - } - context->nb_pkts++; - if(context->nb_pkts > MAX_PKTS_IN_BUFFER) { - std::cerr << "Only " << MAX_PKTS_IN_BUFFER << " can be in the PCAP file" - << std::endl; - free(context->buf); - exit(9); - } - - uint32_t len = enso::get_pkt_len(pkt_bytes); - uint32_t nb_flits = (len - 1) / MIN_PACKET_ALIGNED_SIZE + 1; - memcpy(context->buf + context->nb_bytes, pkt_bytes, len); - context->nb_bytes += nb_flits * MIN_PACKET_ALIGNED_SIZE; - context->nb_good_bytes += len; -} - -inline uint64_t receive_pkts(const struct RxArgs& rx_args, - struct RxStats& rx_stats, - std::unique_ptr &dev) { - uint64_t nb_pkts = 0; -#ifdef IGNORE_RX - (void)rx_args; - (void)rx_stats; -#else // IGNORE_RX - RxPipe* rx_pipe = dev->NextRxPipeToRecv(); - if (unlikely(rx_pipe == nullptr)) { - return 0; - } - auto batch = rx_pipe->PeekPkts(); - uint64_t recv_bytes = 0; - for (auto pkt : batch) { - uint16_t pkt_len = enso::get_pkt_len(pkt); - - if (rx_args.enable_rtt) { - uint32_t rtt = enso::get_pkt_rtt(pkt); - rx_stats.rtt_sum += rtt; - - if (rx_args.enable_rtt_history) { - rx_stats.add_rtt_to_hist(rtt); - } - } - - recv_bytes += pkt_len; - ++nb_pkts; - } - - uint32_t batch_length = batch.processed_bytes(); - rx_pipe->ConfirmBytes(batch_length); - - rx_stats.pkts += nb_pkts; - ++(rx_stats.nb_batches); - rx_stats.bytes += recv_bytes; - - rx_pipe->Clear(); - -#endif // IGNORE_RX - return nb_pkts; -} - -inline void transmit_pkts(struct TxArgs& tx_args, struct TxStats& tx_stats) { - // decide whether we need to send an entire buffer worth of packets - // or less than that based on user request - uint32_t nb_pkts_to_send = std::min(tx_args.pkts_in_main_buf, - tx_args.total_remaining_pkts); - // the packets are copied in the main buffer based on the minimum packet size - uint32_t transmission_length = nb_pkts_to_send * MIN_PACKET_ALIGNED_SIZE; - - // allocate the bytes in the TX pipe and copy the required - // number of bytes from the main buffer - uint8_t* pipe_buf = tx_args.tx_pipe->AllocateBuf(transmission_length); - if(pipe_buf == NULL) { - std::cout << "Buffer allocation for TX pipe failed" << std::endl; - return; - } - // memcpy(pipe_buf, tx_args.main_buf, transmission_length); - enso::memcpy_64_align(pipe_buf, tx_args.main_buf, transmission_length); - // send the packets - tx_args.tx_pipe->SendAndFree(transmission_length); - - // update the stats - // the stats need be calculated based on good bytes - // rather than the transmission length - tx_stats.pkts += nb_pkts_to_send; - tx_stats.bytes += nb_pkts_to_send * MIN_PACKET_RAW_SIZE; - tx_args.total_remaining_pkts -= nb_pkts_to_send; - if(tx_args.total_remaining_pkts == 0) { - keep_running = 0; - return; - } -} - -int main(int argc, char** argv) { - struct parsed_args_t parsed_args; - int ret = parse_args(argc, argv, parsed_args); - if (ret) { - print_usage(argv[0]); - if (ret == 1) { - return 0; - } - return 1; - } - - // Parse the PCI address in format 0000:00:00.0 or 00:00.0. - if (parsed_args.pcie_addr != "") { - uint32_t domain, bus, dev, func; - if (sscanf(parsed_args.pcie_addr.c_str(), "%x:%x:%x.%x", &domain, &bus, - &dev, &func) != 4) { - if (sscanf(parsed_args.pcie_addr.c_str(), "%x:%x.%x", &bus, &dev, - &func) != 3) { - std::cerr << "Invalid PCI address" << std::endl; - return 1; - } - } - uint16_t bdf = (bus << 8) | (dev << 3) | (func & 0x7); - enso::set_bdf(bdf); - } - - char errbuf[PCAP_ERRBUF_SIZE]; - - pcap_t* pcap = pcap_open_offline(parsed_args.pcap_file.c_str(), errbuf); - if (pcap == NULL) { - std::cerr << "Error loading pcap file (" << errbuf << ")" << std::endl; - return 2; - } - - // we copy the packets in this buffer using libpcap - uint8_t *pkt_buf = (uint8_t *) malloc(BUFFER_SIZE); - if(pkt_buf == NULL) { - std::cerr << "Could not allocate packet buffer" << std::endl; - exit(1); - } - - struct PcapHandlerContext context; - context.pcap = pcap; - context.buf = pkt_buf; - context.nb_bytes = 0; - context.nb_good_bytes = 0; - context.nb_pkts = 0; - - // Initialize packet buffers with packets read from pcap file. - if (pcap_loop(pcap, 0, pcap_pkt_handler, (u_char*)&context) < 0) { - std::cerr << "Error while reading pcap (" << pcap_geterr(pcap) << ")" - << std::endl; - return 3; - } - - // For small pcaps we copy the same packets over the remaining of the - // buffer. This reduces the number of transfers that we need to issue. - if (context.nb_bytes < BUFFER_SIZE) { - uint32_t original_buf_length = context.nb_bytes; - uint32_t original_nb_pkts = context.nb_pkts; - uint32_t original_good_bytes = context.nb_good_bytes; - while ((context.nb_bytes + original_buf_length) <= BUFFER_SIZE) { - memcpy(pkt_buf + context.nb_bytes, pkt_buf, original_buf_length); - context.nb_bytes += original_buf_length; - context.nb_pkts += original_nb_pkts; - context.nb_good_bytes += original_good_bytes; - } - } - - uint64_t total_pkts_in_buffer = context.nb_pkts; - uint64_t total_pkts_to_send; - if (parsed_args.nb_pkts > 0) { - total_pkts_to_send = parsed_args.nb_pkts; - } else { - // Treat nb_pkts == 0 as unbounded. The following value should be enough - // to send 64-byte packets for around 400 years using Tb Ethernet. - total_pkts_to_send = 0xffffffffffffffff; - } - - uint32_t rtt_hist_len = 0; - uint32_t rtt_hist_offset = 0; - - if (parsed_args.enable_rtt_history) { - rtt_hist_len = parsed_args.rtt_hist_len; - rtt_hist_offset = parsed_args.rtt_hist_offset; - } - - RxStats rx_stats(rtt_hist_len, rtt_hist_offset); - TxStats tx_stats; - - signal(SIGINT, int_handler); - - std::vector threads; - - std::unique_ptr dev = Device::Create(); - if (!dev) { - std::cerr << "Problem creating device" << std::endl; - free(pkt_buf); - exit(2); - } - - // When using single_core we use the same thread for RX and TX, otherwise we - // launch separate threads for RX and TX. - if (!parsed_args.single_core) { - std::thread rx_thread = std::thread([&parsed_args, &rx_stats, &dev] { - std::this_thread::sleep_for(std::chrono::milliseconds(500)); - - std::vector rx_pipes; - - for (uint32_t i = 0; i < parsed_args.nb_queues; ++i) { - RxPipe* rx_pipe = dev->AllocateRxPipe(true); - if (!rx_pipe) { - std::cerr << "Problem creating RX pipe" << std::endl; - exit(3); - } - rx_pipes.push_back(rx_pipe); - } - - dev->EnableRateLimiting(parsed_args.rate_num, parsed_args.rate_den); - dev->EnableRoundRobin(); - - if (parsed_args.enable_rtt) { - dev->EnableTimeStamping(); - } - else { - dev->DisableTimeStamping(); - } - - RxArgs rx_args; - rx_args.enable_rtt = parsed_args.enable_rtt; - rx_args.enable_rtt_history = parsed_args.enable_rtt_history; - - std::cout << "Running RX on core " << sched_getcpu() << std::endl; - - rx_ready = 1; - - while (keep_running) { - receive_pkts(rx_args, rx_stats, dev); - } - - uint64_t nb_iters_no_pkt = 0; - - // Receive packets until packets stop arriving or user force stops. - while (!force_stop && (nb_iters_no_pkt < ITER_NO_PKT_THRESH)) { - uint64_t nb_pkts = receive_pkts(rx_args, rx_stats, dev); - if (unlikely(nb_pkts == 0)) { - ++nb_iters_no_pkt; - } else { - nb_iters_no_pkt = 0; - } - } - - rx_done = true; - - dev->DisableRateLimiting(); - dev->DisableRoundRobin(); - - if (parsed_args.enable_rtt) { - dev->DisableTimeStamping(); - } - - }); - - std::thread tx_thread = std::thread( - [pkt_buf, total_pkts_in_buffer, total_pkts_to_send, - &parsed_args, &tx_stats, &dev] { - std::this_thread::sleep_for(std::chrono::seconds(1)); - - TxPipe* tx_pipe = dev->AllocateTxPipe(); - if (!tx_pipe) { - std::cerr << "Problem creating TX pipe" << std::endl; - exit(3); - } - - while (!rx_ready) continue; - - std::cout << "Running TX on core " << sched_getcpu() << std::endl; - - TxArgs tx_args(tx_pipe, pkt_buf, total_pkts_in_buffer, - total_pkts_to_send); - - while (keep_running) { - transmit_pkts(tx_args, tx_stats); - } - - tx_done = 1; - - while (!rx_done) continue; - - }); - - cpu_set_t cpuset; - CPU_ZERO(&cpuset); - CPU_SET(parsed_args.core_id, &cpuset); - int result = pthread_setaffinity_np(rx_thread.native_handle(), - sizeof(cpuset), &cpuset); - if (result < 0) { - std::cerr << "Error setting CPU affinity for RX thread." << std::endl; - return 6; - } - - CPU_ZERO(&cpuset); - CPU_SET(parsed_args.core_id + 1, &cpuset); - result = pthread_setaffinity_np(tx_thread.native_handle(), sizeof(cpuset), - &cpuset); - if (result < 0) { - std::cerr << "Error setting CPU affinity for TX thread." << std::endl; - return 7; - } - - threads.push_back(std::move(rx_thread)); - threads.push_back(std::move(tx_thread)); - - } else { - // Send and receive packets within the same thread. - std::thread rx_tx_thread = std::thread( - [pkt_buf, total_pkts_in_buffer, total_pkts_to_send, - &parsed_args, &tx_stats, &rx_stats, &dev] { - std::this_thread::sleep_for(std::chrono::milliseconds(500)); - - std::vector rx_pipes; - - for (uint32_t i = 0; i < parsed_args.nb_queues; ++i) { - RxPipe* rx_pipe = dev->AllocateRxPipe(true); - if (!rx_pipe) { - std::cerr << "Problem creating RX pipe" << std::endl; - exit(3); - } - rx_pipes.push_back(rx_pipe); - } - - dev->EnableRateLimiting(parsed_args.rate_num, parsed_args.rate_den); - dev->EnableRoundRobin(); - - if (parsed_args.enable_rtt) { - dev->EnableTimeStamping(); - } - else { - dev->DisableTimeStamping(); - } - - std::cout << "Running RX and TX on core " << sched_getcpu() - << std::endl; - - RxArgs rx_args; - rx_args.enable_rtt = parsed_args.enable_rtt; - rx_args.enable_rtt_history = parsed_args.enable_rtt_history; - - TxPipe* tx_pipe = dev->AllocateTxPipe(); - if (!tx_pipe) { - std::cerr << "Problem creating TX pipe" << std::endl; - exit(3); - } - - TxArgs tx_args(tx_pipe, pkt_buf, total_pkts_in_buffer, - total_pkts_to_send); - - rx_ready = 1; - - while (keep_running) { - receive_pkts(rx_args, rx_stats, dev); - transmit_pkts(tx_args, tx_stats); - } - - tx_done = 1; - - uint64_t nb_iters_no_pkt = 0; - - // Receive packets until packets stop arriving or user force stops. - while (!force_stop && (nb_iters_no_pkt < ITER_NO_PKT_THRESH)) { - uint64_t nb_pkts = receive_pkts(rx_args, rx_stats, dev); - if (unlikely(nb_pkts == 0)) { - ++nb_iters_no_pkt; - } else { - nb_iters_no_pkt = 0; - } - } - - rx_done = true; - - dev->DisableRateLimiting(); - dev->DisableRoundRobin(); - - if (parsed_args.enable_rtt) { - dev->DisableTimeStamping(); - } - - }); - - cpu_set_t cpuset; - CPU_ZERO(&cpuset); - CPU_SET(parsed_args.core_id, &cpuset); - int result = pthread_setaffinity_np(rx_tx_thread.native_handle(), - sizeof(cpuset), &cpuset); - if (result < 0) { - std::cerr << "Error setting CPU affinity for RX thread." << std::endl; - return 6; - } - - threads.push_back(std::move(rx_tx_thread)); - } - - // Write header to save file. - if (parsed_args.save) { - std::ofstream save_file; - save_file.open(parsed_args.save_file); - save_file - << "rx_goodput_mbps,rx_tput_mbps,rx_pkt_rate_kpps,rx_bytes,rx_packets," - "tx_goodput_mbps,tx_tput_mbps,tx_pkt_rate_kpps,tx_bytes,tx_packets"; - if (parsed_args.enable_rtt) { - save_file << ",mean_rtt_ns"; - } - save_file << std::endl; - save_file.close(); - } - - while (!rx_ready) continue; - - std::cout << "Starting..." << std::endl; - - // Continuously print statistics. - while (!rx_done) { - _enso_compiler_memory_barrier(); - uint64_t last_rx_bytes = rx_stats.bytes; - uint64_t last_rx_pkts = rx_stats.pkts; - uint64_t last_tx_bytes = tx_stats.bytes; - uint64_t last_tx_pkts = tx_stats.pkts; - uint64_t last_aggregated_rtt_ns = - rx_stats.rtt_sum * enso::kNsPerTimestampCycle; - - std::this_thread::sleep_for( - std::chrono::milliseconds(parsed_args.stats_delay)); - - uint64_t rx_bytes = rx_stats.bytes; - uint64_t rx_pkts = rx_stats.pkts; - uint64_t tx_bytes = tx_stats.bytes; - uint64_t tx_pkts = tx_stats.pkts; - - double interval_s = (double) parsed_args.stats_delay / ONE_THOUSAND; - - uint64_t rx_pkt_diff = rx_pkts - last_rx_pkts; - uint64_t rx_goodput_mbps = - (rx_bytes - last_rx_bytes) * 8. / (ONE_MILLION * interval_s); - uint64_t rx_pkt_rate = (rx_pkt_diff / interval_s); - uint64_t rx_pkt_rate_kpps = rx_pkt_rate / ONE_THOUSAND; - uint64_t rx_tput_mbps = rx_goodput_mbps + FPGA_PACKET_OVERHEAD - * 8 * rx_pkt_rate / ONE_MILLION; - - uint64_t tx_pkt_diff = tx_pkts - last_tx_pkts; - uint64_t tx_goodput_mbps = - (tx_bytes - last_tx_bytes) * 8. / (ONE_MILLION * interval_s); - uint64_t tx_tput_mbps = - (tx_bytes - last_tx_bytes + tx_pkt_diff * FPGA_PACKET_OVERHEAD) * 8. - / (ONE_MILLION * interval_s); - uint64_t tx_pkt_rate = (tx_pkt_diff / interval_s); - uint64_t tx_pkt_rate_kpps = tx_pkt_rate / ONE_THOUSAND; - - uint64_t rtt_sum_ns = rx_stats.rtt_sum * enso::kNsPerTimestampCycle; - uint64_t rtt_ns; - if (rx_pkt_diff != 0) { - rtt_ns = (rtt_sum_ns - last_aggregated_rtt_ns) / rx_pkt_diff; - } else { - rtt_ns = 0; - } - - std::cout << std::dec << " RX: Throughput: " << rx_tput_mbps << " Mbps" - << " Rate: " << rx_pkt_rate_kpps << " kpps" << std::endl - - << " #bytes: " << rx_bytes << " #packets: " << rx_pkts - << std::endl; - - std::cout << " TX: Throughput: " << tx_tput_mbps << " Mbps" - << " Rate: " << tx_pkt_rate_kpps << " kpps" << std::endl - - << " #bytes: " << tx_bytes << " #packets: " << tx_pkts - << std::endl; - - if (parsed_args.enable_rtt) { - std::cout << "Mean RTT: " << rtt_ns << " ns " << std::endl; - } - - if (parsed_args.save) { - std::ofstream save_file; - save_file.open(parsed_args.save_file, std::ios_base::app); - save_file << rx_goodput_mbps << "," << rx_tput_mbps << "," - << rx_pkt_rate_kpps << "," << rx_bytes << "," << rx_pkts << "," - << tx_goodput_mbps << "," << tx_pkt_rate_kpps << "," - << tx_tput_mbps << "," << tx_bytes << "," << tx_pkts; - if (parsed_args.enable_rtt) { - save_file << "," << rtt_ns; - } - save_file << std::endl; - save_file.close(); - } - - std::cout << std::endl; - } - - if (parsed_args.save) { - std::cout << "Saved statistics to \"" << parsed_args.save_file << "\"" - << std::endl; - } - - ret = 0; - if (parsed_args.enable_rtt_history) { - std::ofstream hist_file; - hist_file.open(parsed_args.hist_file); - - for (uint32_t rtt = 0; rtt < parsed_args.rtt_hist_len; ++rtt) { - if (rx_stats.rtt_hist[rtt] != 0) { - uint32_t corrected_rtt = - (rtt + parsed_args.rtt_hist_offset) * enso::kNsPerTimestampCycle; - hist_file << corrected_rtt << "," << rx_stats.rtt_hist[rtt] - << std::endl; - } - } - - if (rx_stats.backup_rtt_hist.size() != 0) { - std::cout << "Warning: " << rx_stats.backup_rtt_hist.size() - << " rtt hist entries in backup" << std::endl; - for (auto const& i : rx_stats.backup_rtt_hist) { - hist_file << i.first * enso::kNsPerTimestampCycle << "," << i.second - << std::endl; - } - } - - hist_file.close(); - std::cout << "Saved RTT histogram to \"" << parsed_args.hist_file << "\"" - << std::endl; - - if (rx_stats.pkts != tx_stats.pkts) { - std::cout << "Warning: did not get all packets back." << std::endl; - ret = 1; - } - } - - for (auto& thread : threads) { - thread.join(); - } - - free(pkt_buf); - return ret; -} diff --git a/software/examples/meson.build b/software/examples/meson.build index 983b04f0..a6ac0063 100644 --- a/software/examples/meson.build +++ b/software/examples/meson.build @@ -16,5 +16,3 @@ executable('capture', 'capture.cpp', dependencies: [thread_dep, pcap_dep], link_with: enso_lib, include_directories: inc) executable('l2_forward', 'l2_forward.cpp', dependencies: thread_dep, link_with: enso_lib, include_directories: inc) -executable('ensogen_new', 'ensogen_new.cpp', dependencies: [thread_dep, pcap_dep], - link_with: enso_lib, include_directories: inc) diff --git a/software/include/enso/pipe.h b/software/include/enso/pipe.h index e9e2cfe6..79850b88 100644 --- a/software/include/enso/pipe.h +++ b/software/include/enso/pipe.h @@ -277,6 +277,22 @@ class Device { */ int DisableRoundRobin(); + /** + * @brief Vanilla function that sends a given number of bytes from a physical + * address. Used only by Ensogen. + * + */ + void SendOnly(uint64_t phys_addr, uint32_t nb_bytes); + + /** + * @brief Vanilla function that checks for the number of Tx notifications + * consumed by the NIC. Used only by Ensogen. + * + * @return number of Tx notifications successfully processed by the NIC. + * + */ + uint32_t ProcessCompletionsOnly(); + private: struct TxPendingRequest { uint32_t pipe_id; @@ -819,6 +835,15 @@ class TxPipe { device_->Send(kId, phys_addr, nb_bytes); } + /* + * @brief: Used to get the physical address of the pipe's buffer. + * Used only by EnsoGen as of now. + * + * */ + inline uint64_t GetBufPhysAddr() { + return buf_phys_addr_ + app_begin_; + } + /** * @brief Explicitly requests a best-effort buffer extension. * diff --git a/software/src/enso/pipe.cpp b/software/src/enso/pipe.cpp index 226aeba8..a72d4998 100644 --- a/software/src/enso/pipe.cpp +++ b/software/src/enso/pipe.cpp @@ -390,6 +390,14 @@ void Device::ProcessCompletions() { } } +void Device::SendOnly(uint64_t phys_addr, uint32_t nb_bytes) { + send_to_queue(¬ification_buf_pair_, phys_addr, nb_bytes); +} + +uint32_t Device::ProcessCompletionsOnly() { + return get_unreported_completions(¬ification_buf_pair_); +} + int Device::EnableTimeStamping() { return enable_timestamp(¬ification_buf_pair_); } From c563643a1202dc4ac0023caf594bd2f68977265a Mon Sep 17 00:00:00 2001 From: Kshitij Rana Date: Sun, 7 Jan 2024 17:31:07 -0500 Subject: [PATCH 03/11] Added missing free call --- software/examples/ensogen.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/software/examples/ensogen.cpp b/software/examples/ensogen.cpp index 57c5fe18..92896899 100644 --- a/software/examples/ensogen.cpp +++ b/software/examples/ensogen.cpp @@ -503,6 +503,7 @@ void pcap_pkt_handler(u_char* user, const struct pcap_pkthdr* pkt_hdr, const struct ether_header* l2_hdr = (struct ether_header*)pkt_bytes; if (l2_hdr->ether_type != htons(ETHERTYPE_IP)) { std::cerr << "Non-IPv4 packets are not supported" << std::endl; + free(context->buf); exit(8); } context->nb_pkts++; From ce0aa4b4ebff156b302ad3bdd2b76ae38946de69 Mon Sep 17 00:00:00 2001 From: Kshitij Rana Date: Mon, 8 Jan 2024 12:25:56 -0500 Subject: [PATCH 04/11] Reverted PCIe addr cmd line support removal --- scripts/ensogen.sh | 2 +- software/examples/ensogen.cpp | 16 +++++++++++++--- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/scripts/ensogen.sh b/scripts/ensogen.sh index c5994abe..f59274ae 100755 --- a/scripts/ensogen.sh +++ b/scripts/ensogen.sh @@ -14,7 +14,7 @@ GET_PCAP_SIZE_CMD_PATH=$(realpath $GET_PCAP_SIZE_CMD_PATH) if [ $# -lt 2 ]; then echo "Usage: ./ensogen.sh PCAP_FILE RATE_GBPS [OPTIONS]" - echo "Example: ./ensogen.sh /tmp/pcap_file.pcap 100" + echo "Example: ./ensogen.sh /tmp/pcap_file.pcap 100 --pcie-addr 65:00.0" exit 1 fi diff --git a/software/examples/ensogen.cpp b/software/examples/ensogen.cpp index 92896899..58aacc34 100644 --- a/software/examples/ensogen.cpp +++ b/software/examples/ensogen.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, Carnegie Mellon University + * Copyright (c) 2024, Carnegie Mellon University * * Redistribution and use in source and binary forms, with or without * modification, are permitted (subject to the limitations in the disclaimer @@ -147,6 +147,7 @@ #define CMD_OPT_RTT_HIST_OFF "rtt-hist-offset" #define CMD_OPT_RTT_HIST_LEN "rtt-hist-len" #define CMD_OPT_STATS_DELAY "stats-delay" +#define CMD_OPT_PCIE_ADDR "pcie-addr" static volatile int keep_running = 1; static volatile int force_stop = 0; @@ -181,6 +182,7 @@ struct parsed_args_t { uint32_t rtt_hist_offset; uint32_t rtt_hist_len; uint32_t stats_delay; + std::string pcie_addr; }; /* @@ -337,6 +339,7 @@ static void print_usage(const char* program_name) { " [--rtt-hist-offset HIST_OFFSET]\n" " [--rtt-hist-len HIST_LEN]\n" " [--stats-delay STATS_DELAY]\n" + " [--pcie-addr PCIE_ADDR]\n\n" " PCAP_FILE: Pcap file with packets to transmit.\n" " RATE_NUM: Numerator of the rate used to transmit packets.\n" @@ -357,7 +360,8 @@ static void print_usage(const char* program_name) { " will still be saved, but there will be a\n" " performance penalty.\n" " --stats-delay: Delay between displayed stats in milliseconds\n" - " (default: %d).\n", + " (default: %d).\n" + " --pcie-addr: Specify the PCIe address of the NIC to use.\n", program_name, DEFAULT_CORE_ID, DEFAULT_NB_QUEUES, DEFAULT_HIST_OFFSET, DEFAULT_HIST_LEN, DEFAULT_STATS_DELAY); } @@ -378,6 +382,7 @@ enum { CMD_OPT_RTT_HIST_OFF_NUM, CMD_OPT_RTT_HIST_LEN_NUM, CMD_OPT_STATS_DELAY_NUM, + CMD_OPT_PCIE_ADDR_NUM, }; static const char short_options[] = ""; @@ -394,6 +399,7 @@ static const struct option long_options[] = { {CMD_OPT_RTT_HIST_OFF, required_argument, NULL, CMD_OPT_RTT_HIST_OFF_NUM}, {CMD_OPT_RTT_HIST_LEN, required_argument, NULL, CMD_OPT_RTT_HIST_LEN_NUM}, {CMD_OPT_STATS_DELAY, required_argument, NULL, CMD_OPT_STATS_DELAY_NUM}, + {CMD_OPT_PCIE_ADDR, required_argument, NULL, CMD_OPT_PCIE_ADDR_NUM}, {0, 0, 0, 0} }; @@ -459,6 +465,9 @@ static int parse_args(int argc, char** argv, case CMD_OPT_STATS_DELAY_NUM: parsed_args.stats_delay = atoi(optarg); break; + case CMD_OPT_PCIE_ADDR_NUM: + parsed_args.pcie_addr = optarg; + break; default: return -1; } @@ -712,7 +721,7 @@ int main(int argc, char** argv) { std::vector threads; - std::unique_ptr dev = Device::Create(); + std::unique_ptr dev = Device::Create(parsed_args.pcie_addr); if (!dev) { std::cerr << "Problem creating device" << std::endl; free(pkt_buf); @@ -1082,5 +1091,6 @@ int main(int argc, char** argv) { } free(pkt_buf); + dev.reset(); return ret; } From dc64a9f8dd4a6b706160e598aaed52cfba466660 Mon Sep 17 00:00:00 2001 From: Kshitij Rana Date: Fri, 12 Jan 2024 16:20:37 -0500 Subject: [PATCH 05/11] Added support for larger PCAP files and variable packet sizes --- software/examples/ensogen.cpp | 413 +++++++++++++++++++++------------- software/src/enso/helpers.cpp | 28 ++- 2 files changed, 277 insertions(+), 164 deletions(-) diff --git a/software/examples/ensogen.cpp b/software/examples/ensogen.cpp index 58aacc34..63d4ec12 100644 --- a/software/examples/ensogen.cpp +++ b/software/examples/ensogen.cpp @@ -33,12 +33,12 @@ * @file: ensogen.cpp * * @brief: Packet generator program that uses the Enso library to send and - * receive packets. It uses libpcap to read packets from a pcap file. The program - * assumes that the file contains only minimum sized packets. + * receive packets. It uses libpcap to read and process packets from a pcap file. * * Example: * - * sudo ./scripts/ensogen.sh ./scripts/sample_pcaps/2_64_1_2.pcap 100 + * sudo ./scripts/ensogen.sh ./scripts/sample_pcaps/2_64_1_2.pcap 100 \ + * --pcie-addr 65:00.0 * * */ @@ -90,9 +90,6 @@ // Minimum size of a packet aligned to cache (in bytes). #define MIN_PACKET_ALIGNED_SIZE 64 -// Minimum size of a raw packet read from the PCAP file (in bytes). -#define MIN_PACKET_RAW_SIZE 60 - // If defined, ignore received packets. // #define IGNORE_RX @@ -129,9 +126,6 @@ // Size of the buffer that we keep packets in. #define BUFFER_SIZE enso::kMaxTransferLen -// Num of min sized packets that would fit in a BUFFER_SIZE bytes buffer -#define MAX_PKTS_IN_BUFFER 2048 - // Number of transfers required to send a buffer full of packets. #define TRANSFERS_PER_BUFFER (((BUFFER_SIZE - 1) / enso::kMaxTransferLen) + 1) @@ -163,7 +157,7 @@ using enso::TxPipe; * Structure Definitions *****************************************************************************/ /* - * @brief: Structure to store the command linde arguments. + * @brief: Structure to store the command line arguments. * * */ struct parsed_args_t { @@ -186,19 +180,42 @@ struct parsed_args_t { }; /* - * @brief: Structure to store the PCAP related variables that need - * to be passed to the callback function. + * @brief: Structure to store an Enso TxPipe object and attributes related + * to it. + * + * */ +struct EnsoTxPipe { + EnsoTxPipe(TxPipe *pipe) + : tx_pipe(pipe), nb_aligned_bytes(0), nb_raw_bytes(0), + nb_pkts(0) {} + // Enso TxPipe + TxPipe *tx_pipe; + // Number of cache aligned bytes in the pipe + uint32_t nb_aligned_bytes; + // Number of raw bytes in the pipe + uint32_t nb_raw_bytes; + // Number of packets in the pipe + uint32_t nb_pkts; +}; + + +/* + * @brief: Structure to store variables needed for processing the PCAP + * file and are passed to the callback function. * * */ struct PcapHandlerContext { - // Buffer to store the packet data + PcapHandlerContext(std::unique_ptr &dev_, pcap_t* pcap_) : + dev(dev_), buf(NULL), free_flits_cur_pipe(0), + pcap(pcap_) {} + // Pointer to Enso device + std::unique_ptr &dev; + // Pipes to store the packets from the PCAP file + std::vector tx_pipes; + // Pointer to the buffer of the current pipe uint8_t *buf; - // Total number of packet bytes aligned to the cache - uint32_t nb_bytes; - // Total number of raw packet bytes - uint32_t nb_good_bytes; - // Total number of packets - uint32_t nb_pkts; + // Total number of free flits in the current pipe + uint32_t free_flits_cur_pipe; // libpcap object associated with the opened PCAP file pcap_t* pcap; }; @@ -211,8 +228,8 @@ struct RxStats { explicit RxStats(uint32_t rtt_hist_len = 0, uint32_t rtt_hist_offset = 0) : pkts(0), bytes(0), - rtt_sum(0), nb_batches(0), + rtt_sum(0), rtt_hist_len(rtt_hist_len), rtt_hist_offset(rtt_hist_offset) { if (rtt_hist_len > 0) { @@ -236,15 +253,20 @@ struct RxStats { if (unlikely((rtt >= (rtt_hist_len - rtt_hist_offset)) || (rtt < rtt_hist_offset))) { backup_rtt_hist[rtt]++; - } else { + } + else { rtt_hist[rtt - rtt_hist_offset]++; } } + // Number of packets received uint64_t pkts; + // Number of bytes received uint64_t bytes; - uint64_t rtt_sum; + // Number of RxNotifications or batches uint64_t nb_batches; + // RTT calculation related + uint64_t rtt_sum; const uint32_t rtt_hist_len; const uint32_t rtt_hist_offset; uint64_t* rtt_hist; @@ -261,8 +283,11 @@ struct RxArgs { enable_rtt(enbl_rtt), enable_rtt_history(enbl_rtt_hist), dev(dev_) {} + // Check for whether RTT needs to be calculated bool enable_rtt; + // Check for whether RTT history needs to be calculated bool enable_rtt_history; + // Pointer to the Enso device std::unique_ptr &dev; }; @@ -272,7 +297,9 @@ struct RxArgs { * */ struct TxStats { TxStats() : pkts(0), bytes(0) {} + // Number of packets received uint64_t pkts; + // Number of bytes received uint64_t bytes; }; @@ -282,20 +309,30 @@ struct TxStats { * * */ struct TxArgs { - TxArgs(TxPipe *pipe, uint64_t pkts_in_buf, uint64_t total_pkts_to_send, - std::unique_ptr &dev_) - : tx_pipe(pipe), - pkts_in_pipe(pkts_in_buf), - total_remaining_pkts(total_pkts_to_send), + TxArgs(std::vector &pipes, uint64_t total_aligned_bytes, + uint64_t total_raw_bytes, uint64_t pkts_in_last_pipe, + uint32_t pipes_size, std::unique_ptr &dev_) + : tx_pipes(pipes), + total_remaining_aligned_bytes(total_aligned_bytes), + total_remaining_raw_bytes(total_raw_bytes), + nb_pkts_in_last_pipe(pkts_in_last_pipe), + cur_ind(0), + total_pipes(pipes_size), transmissions_pending(0), ignored_reclaims(0), dev(dev_) {} - // TxPipe associated with the thread - TxPipe *tx_pipe; - // Total number of packets in the pipe - uint64_t pkts_in_pipe; - // Total number of pakcets that need to be sent - uint64_t total_remaining_pkts; + // TxPipes handled by the thread + std::vector &tx_pipes; + // Number of aligned bytes that need to be sent + uint64_t total_remaining_aligned_bytes; + // Number of raw bytes that need to be sent + uint64_t total_remaining_raw_bytes; + // Number of packets in the last pipe - needed for stats calculation + uint64_t nb_pkts_in_last_pipe; + // Index in tx_pipes vector. Points to the current pipe being sent + uint32_t cur_ind; + // Total number of pipes in tx_pipes vector + uint32_t total_pipes; // Total number of notifications created and sent by the application uint32_t transmissions_pending; // Used to track the number of times the thread did not check for notification @@ -496,8 +533,7 @@ static int parse_args(int argc, char** argv, /* * @brief: libpcap callback registered by the main function. Called for each - * packet present in the PCAP file by libpcap. . We assume that the PCAP file - * provided by the user has `MAX_PKTS_IN_BUFFER` number of packets at max. + * packet present in the PCAP file by libpcap. * * @param user: Structure allocated in main to read and store relevant information. * @param pkt_hdr: Contains packet metadata like timestamp, length, etc. (UNUSED) @@ -506,7 +542,7 @@ static int parse_args(int argc, char** argv, * */ void pcap_pkt_handler(u_char* user, const struct pcap_pkthdr* pkt_hdr, const u_char* pkt_bytes) { - (void)pkt_hdr; + (void) pkt_hdr; struct PcapHandlerContext* context = (struct PcapHandlerContext*)user; const struct ether_header* l2_hdr = (struct ether_header*)pkt_bytes; @@ -515,19 +551,35 @@ void pcap_pkt_handler(u_char* user, const struct pcap_pkthdr* pkt_hdr, free(context->buf); exit(8); } - context->nb_pkts++; - if(context->nb_pkts > MAX_PKTS_IN_BUFFER) { - std::cerr << "Only " << MAX_PKTS_IN_BUFFER << " can be in the PCAP file" - << std::endl; - free(context->buf); - exit(9); - } uint32_t len = enso::get_pkt_len(pkt_bytes); uint32_t nb_flits = (len - 1) / MIN_PACKET_ALIGNED_SIZE + 1; - memcpy(context->buf + context->nb_bytes, pkt_bytes, len); - context->nb_bytes += nb_flits * MIN_PACKET_ALIGNED_SIZE; - context->nb_good_bytes += len; + + if (nb_flits > context->free_flits_cur_pipe) { + // initialize a new pipe + TxPipe* tx_pipe = context->dev->AllocateTxPipe(); + if (!tx_pipe) { + std::cerr << "Problem creating TX pipe" << std::endl; + pcap_breakloop(context->pcap); + return; + } + struct EnsoTxPipe enso_tx_pipe(tx_pipe); + context->tx_pipes.push_back(enso_tx_pipe); + context->free_flits_cur_pipe = BUFFER_SIZE / MIN_PACKET_ALIGNED_SIZE; + context->buf = tx_pipe->buf(); + } + + // We copy the packets in the pipe's buffer in multiples of 64 bytes + // or MIN_PACKET_ALIGNED SIZE. However, we also keep track of the number + // of raw bytes on a per pipe basis since we need it for stats calculation. + struct EnsoTxPipe& tx_pipe = context->tx_pipes.back(); + uint8_t* dest = context->buf + tx_pipe.nb_aligned_bytes; + memcpy(dest, pkt_bytes, len); + + tx_pipe.nb_aligned_bytes += nb_flits * MIN_PACKET_ALIGNED_SIZE; + tx_pipe.nb_raw_bytes += len; + tx_pipe.nb_pkts++; + context->free_flits_cur_pipe -= nb_flits; } /* @@ -567,14 +619,14 @@ inline uint64_t receive_pkts(const struct RxArgs& rx_args, } recv_bytes += pkt_len; - ++nb_pkts; + nb_pkts++; } uint32_t batch_length = batch.processed_bytes(); rx_pipe->ConfirmBytes(batch_length); rx_stats.pkts += nb_pkts; - ++(rx_stats.nb_batches); + rx_stats.nb_batches++; rx_stats.bytes += recv_bytes; rx_pipe->Clear(); @@ -597,35 +649,50 @@ inline uint64_t receive_pkts(const struct RxArgs& rx_args, * */ inline void transmit_pkts(struct TxArgs& tx_args, struct TxStats& tx_stats) { - // decide whether we need to send an entire buffer worth of packets - // or less than that based on user request - uint32_t nb_pkts_to_send = std::min(tx_args.pkts_in_pipe, - tx_args.total_remaining_pkts); - // the packets are copied in the main buffer based on the minimum packet size - uint32_t transmission_length = nb_pkts_to_send * MIN_PACKET_ALIGNED_SIZE; - - // send the packets - uint64_t buf_phys_addr = tx_args.tx_pipe->GetBufPhysAddr(); - tx_args.dev->SendOnly(buf_phys_addr, transmission_length); - - // update the stats - // the stats need be calculated based on good bytes - // rather than the transmission length - tx_stats.pkts += nb_pkts_to_send; - tx_stats.bytes += nb_pkts_to_send * MIN_PACKET_RAW_SIZE; - tx_args.total_remaining_pkts -= nb_pkts_to_send; - if(tx_args.total_remaining_pkts == 0) { - keep_running = 0; - return; + // Avoid transmitting new data when too many TX notifications are pending + const uint32_t buf_fill_thresh = enso::kNotificationBufSize + - TRANSFERS_PER_BUFFER - 1; + if (likely(tx_args.transmissions_pending < buf_fill_thresh)) { + struct EnsoTxPipe &cur_pipe = tx_args.tx_pipes[tx_args.cur_ind]; + uint32_t transmission_length = std::min(tx_args.total_remaining_aligned_bytes, + (uint64_t) cur_pipe.nb_aligned_bytes); + uint32_t transmission_raw_length = std::min(tx_args.total_remaining_raw_bytes, + (uint64_t) cur_pipe.nb_raw_bytes); + + uint64_t buf_phys_addr = cur_pipe.tx_pipe->GetBufPhysAddr(); + tx_args.dev->SendOnly(buf_phys_addr, transmission_length); + tx_args.transmissions_pending++; + tx_args.total_remaining_aligned_bytes -= transmission_length; + tx_args.total_remaining_raw_bytes -= transmission_raw_length; + + // update the stats + // the stats need be calculated based on raw bytes + tx_stats.bytes += transmission_raw_length; + if(tx_args.total_remaining_aligned_bytes == 0) { + keep_running = 0; + tx_stats.pkts += tx_args.nb_pkts_in_last_pipe; + return; + } + tx_stats.pkts += cur_pipe.nb_pkts; + + // move to the next pipe + tx_args.cur_ind = (tx_args.cur_ind + 1) % tx_args.total_pipes; } // Reclaim TX notification buffer space. if ((tx_args.transmissions_pending > (enso::kNotificationBufSize / 4))) { if (tx_args.ignored_reclaims > TX_RECLAIM_DELAY) { tx_args.ignored_reclaims = 0; - tx_args.transmissions_pending -= tx_args.dev->ProcessCompletionsOnly(); - } else { - ++tx_args.ignored_reclaims; + uint32_t num_processed = tx_args.dev->ProcessCompletionsOnly(); + if(num_processed > tx_args.transmissions_pending) { + tx_args.transmissions_pending = 0; + } + else { + tx_args.transmissions_pending -= num_processed; + } + } + else { + tx_args.ignored_reclaims++; } } } @@ -637,8 +704,13 @@ inline void transmit_pkts(struct TxArgs& tx_args, * * */ inline void reclaim_all_buffers(struct TxArgs& tx_args) { - while (tx_args.transmissions_pending) { - tx_args.transmissions_pending -= tx_args.dev->ProcessCompletionsOnly(); + while (tx_args.transmissions_pending > 0) { + uint32_t num_processed = tx_args.dev->ProcessCompletionsOnly(); + if(num_processed > tx_args.transmissions_pending) { + tx_args.transmissions_pending = 0; + break; + } + tx_args.transmissions_pending -= num_processed; } } @@ -653,6 +725,12 @@ int main(int argc, char** argv) { return 1; } + std::unique_ptr dev = Device::Create(parsed_args.pcie_addr); + if (!dev) { + std::cerr << "Problem creating device" << std::endl; + exit(2); + } + char errbuf[PCAP_ERRBUF_SIZE]; pcap_t* pcap = pcap_open_offline(parsed_args.pcap_file.c_str(), errbuf); @@ -661,21 +739,11 @@ int main(int argc, char** argv) { return 2; } - // we copy the packets in this buffer using libpcap - uint8_t *pkt_buf = (uint8_t *) malloc(BUFFER_SIZE); - if(pkt_buf == NULL) { - std::cerr << "Could not allocate packet buffer" << std::endl; - exit(1); - } + struct PcapHandlerContext context(dev, pcap); - struct PcapHandlerContext context; - context.pcap = pcap; - context.buf = pkt_buf; - context.nb_bytes = 0; - context.nb_good_bytes = 0; - context.nb_pkts = 0; + std::vector &tx_pipes = context.tx_pipes; - // Initialize packet buffers with packets read from pcap file. + // Initialize pipes with packets read from pcap file. if (pcap_loop(pcap, 0, pcap_pkt_handler, (u_char*)&context) < 0) { std::cerr << "Error while reading pcap (" << pcap_geterr(pcap) << ")" << std::endl; @@ -684,26 +752,78 @@ int main(int argc, char** argv) { // For small pcaps we copy the same packets over the remaining of the // buffer. This reduces the number of transfers that we need to issue. - if (context.nb_bytes < BUFFER_SIZE) { - uint32_t original_buf_length = context.nb_bytes; - uint32_t original_nb_pkts = context.nb_pkts; - uint32_t original_good_bytes = context.nb_good_bytes; - while ((context.nb_bytes + original_buf_length) <= BUFFER_SIZE) { - memcpy(pkt_buf + context.nb_bytes, pkt_buf, original_buf_length); - context.nb_bytes += original_buf_length; - context.nb_pkts += original_nb_pkts; - context.nb_good_bytes += original_good_bytes; + // If there is only one pipe, nb_bytes contains the number of bytes + // in that pipe only. + if ((tx_pipes.size() == 1) && + (tx_pipes.front().nb_aligned_bytes < BUFFER_SIZE / 2)) { + struct EnsoTxPipe& tx_pipe = tx_pipes.front(); + uint8_t *pipe_buf = tx_pipe.tx_pipe->buf(); + uint32_t cur_buf_length = tx_pipe.nb_aligned_bytes; + uint32_t original_buf_length = cur_buf_length; + uint32_t original_nb_pkts = tx_pipe.nb_pkts; + uint32_t original_raw_bytes = tx_pipe.nb_raw_bytes; + while ((cur_buf_length + original_buf_length) <= BUFFER_SIZE) { + memcpy(pipe_buf + cur_buf_length, pipe_buf, original_buf_length); + cur_buf_length += original_buf_length; + tx_pipe.nb_pkts += original_nb_pkts; + tx_pipe.nb_raw_bytes += original_raw_bytes; } + tx_pipe.nb_aligned_bytes = cur_buf_length; } - uint64_t total_pkts_in_buffer = context.nb_pkts; - uint64_t total_pkts_to_send; + // calculate total aligned bytes, raw bytes and packets in all the pipes + uint64_t total_pkts_in_pipes = 0; + uint64_t total_aligned_bytes_in_pipes = 0; + uint64_t total_raw_bytes_in_pipes = 0; + for (auto& pipe : tx_pipes) { + total_pkts_in_pipes += pipe.nb_pkts; + total_aligned_bytes_in_pipes += pipe.nb_aligned_bytes; + total_raw_bytes_in_pipes += pipe.nb_raw_bytes; + } + + // Handling the --count option. calculate the number of bytes that + // need to be sent. if the user requests 'x' packets, we start sending + // from the start of the first pipe (order is the same as the PCAP file) + // and wrap around if x is greater than total_pkts_in_pipes. + uint64_t total_aligned_bytes_to_send; + uint64_t total_raw_bytes_to_send; + uint64_t pkts_in_last_pipe = 0; if (parsed_args.nb_pkts > 0) { - total_pkts_to_send = parsed_args.nb_pkts; - } else { + uint64_t nb_full_iters = parsed_args.nb_pkts / total_pkts_in_pipes; + uint64_t nb_pkts_remaining = parsed_args.nb_pkts % total_pkts_in_pipes; + + total_aligned_bytes_to_send = nb_full_iters * total_aligned_bytes_in_pipes; + total_raw_bytes_to_send = nb_full_iters * total_raw_bytes_in_pipes; + + if (nb_pkts_remaining == 0) { + pkts_in_last_pipe = tx_pipes.back().nb_pkts; + } + + // calculate the length of the first 'x % total_pkts_in_pipes' packets + for (auto& pipe : tx_pipes) { + if (nb_pkts_remaining < pipe.nb_pkts) { + uint8_t* pkt = pipe.tx_pipe->buf(); + while (nb_pkts_remaining > 0) { + uint16_t pkt_len = enso::get_pkt_len(pkt); + uint16_t nb_flits = (pkt_len - 1) / 64 + 1; + + total_aligned_bytes_to_send += nb_flits * 64; + nb_pkts_remaining--; + pkts_in_last_pipe++; + + pkt = enso::get_next_pkt(pkt); + } + break; + } + total_aligned_bytes_to_send += pipe.nb_aligned_bytes; + nb_pkts_remaining -= pipe.nb_pkts; + } + } + else { // Treat nb_pkts == 0 as unbounded. The following value should be enough // to send 64-byte packets for around 400 years using Tb Ethernet. - total_pkts_to_send = 0xffffffffffffffff; + total_aligned_bytes_to_send = 0xffffffffffffffff; + total_raw_bytes_to_send = 0xffffffffffffffff; } uint32_t rtt_hist_len = 0; @@ -721,13 +841,6 @@ int main(int argc, char** argv) { std::vector threads; - std::unique_ptr dev = Device::Create(parsed_args.pcie_addr); - if (!dev) { - std::cerr << "Problem creating device" << std::endl; - free(pkt_buf); - exit(2); - } - // When using single_core we use the same thread for RX and TX, otherwise we // launch separate threads for RX and TX. if (!parsed_args.single_core) { @@ -736,7 +849,7 @@ int main(int argc, char** argv) { std::vector rx_pipes; - for (uint32_t i = 0; i < parsed_args.nb_queues; ++i) { + for (uint32_t i = 0; i < parsed_args.nb_queues; i++) { // we create fallback queues by passing true in AllocateRxPipe RxPipe* rx_pipe = dev->AllocateRxPipe(true); if (!rx_pipe) { @@ -774,8 +887,9 @@ int main(int argc, char** argv) { while (!force_stop && (nb_iters_no_pkt < ITER_NO_PKT_THRESH)) { uint64_t nb_pkts = receive_pkts(rx_args, rx_stats); if (unlikely(nb_pkts == 0)) { - ++nb_iters_no_pkt; - } else { + nb_iters_no_pkt++; + } + else { nb_iters_no_pkt = 0; } } @@ -792,30 +906,17 @@ int main(int argc, char** argv) { }); std::thread tx_thread = std::thread( - [pkt_buf, total_pkts_in_buffer, total_pkts_to_send, - &parsed_args, &tx_stats, &dev] { + [total_aligned_bytes_to_send, total_raw_bytes_to_send, + pkts_in_last_pipe, &parsed_args, &tx_stats, &dev, &tx_pipes] { std::this_thread::sleep_for(std::chrono::seconds(1)); - TxPipe* tx_pipe = dev->AllocateTxPipe(); - if (!tx_pipe) { - std::cerr << "Problem creating TX pipe" << std::endl; - exit(3); - } - // allocate the bytes in the TX pipe and copy the required - // number of bytes from the main buffer - uint32_t pipe_alloc_len = total_pkts_in_buffer * MIN_PACKET_ALIGNED_SIZE; - uint8_t* pipe_buf = tx_pipe->AllocateBuf(pipe_alloc_len); - if(pipe_buf == NULL) { - std::cout << "Buffer allocation for TX pipe failed" << std::endl; - return; - } - memcpy(pipe_buf, pkt_buf, pipe_alloc_len); - while (!rx_ready) continue; std::cout << "Running TX on core " << sched_getcpu() << std::endl; - TxArgs tx_args(tx_pipe, total_pkts_in_buffer, total_pkts_to_send, dev); + TxArgs tx_args(tx_pipes, total_aligned_bytes_to_send, + total_raw_bytes_to_send, pkts_in_last_pipe, + tx_pipes.size(), dev); while (keep_running) { transmit_pkts(tx_args, tx_stats); @@ -850,17 +951,18 @@ int main(int argc, char** argv) { threads.push_back(std::move(rx_thread)); threads.push_back(std::move(tx_thread)); - - } else { + } + else { // Send and receive packets within the same thread. std::thread rx_tx_thread = std::thread( - [pkt_buf, total_pkts_in_buffer, total_pkts_to_send, - &parsed_args, &tx_stats, &rx_stats, &dev] { + [total_aligned_bytes_to_send, total_raw_bytes_to_send, + pkts_in_last_pipe, &parsed_args, &tx_stats, &rx_stats, + &dev, &tx_pipes] { std::this_thread::sleep_for(std::chrono::milliseconds(500)); std::vector rx_pipes; - for (uint32_t i = 0; i < parsed_args.nb_queues; ++i) { + for (uint32_t i = 0; i < parsed_args.nb_queues; i++) { // we create fallback queues by passing true in AllocateRxPipe RxPipe* rx_pipe = dev->AllocateRxPipe(true); if (!rx_pipe) { @@ -887,23 +989,9 @@ int main(int argc, char** argv) { parsed_args.enable_rtt_history, dev); - TxPipe* tx_pipe = dev->AllocateTxPipe(); - if (!tx_pipe) { - std::cerr << "Problem creating TX pipe" << std::endl; - exit(3); - } - - // allocate the bytes in the TX pipe and copy the required - // number of bytes from the main buffer - uint32_t pipe_alloc_len = total_pkts_in_buffer * MIN_PACKET_ALIGNED_SIZE; - uint8_t* pipe_buf = tx_pipe->AllocateBuf(pipe_alloc_len); - if(pipe_buf == NULL) { - std::cout << "Buffer allocation for TX pipe failed" << std::endl; - return; - } - memcpy(pipe_buf, pkt_buf, pipe_alloc_len); - - TxArgs tx_args(tx_pipe, total_pkts_in_buffer, total_pkts_to_send, dev); + TxArgs tx_args(tx_pipes, total_aligned_bytes_to_send, + total_raw_bytes_to_send, pkts_in_last_pipe, + tx_pipes.size(), dev); rx_ready = 1; @@ -920,8 +1008,9 @@ int main(int argc, char** argv) { while (!force_stop && (nb_iters_no_pkt < ITER_NO_PKT_THRESH)) { uint64_t nb_pkts = receive_pkts(rx_args, rx_stats); if (unlikely(nb_pkts == 0)) { - ++nb_iters_no_pkt; - } else { + nb_iters_no_pkt++; + } + else { nb_iters_no_pkt = 0; } } @@ -957,8 +1046,8 @@ int main(int argc, char** argv) { std::ofstream save_file; save_file.open(parsed_args.save_file); save_file - << "rx_goodput_mbps,rx_tput_mbps,rx_pkt_rate_kpps,rx_bytes,rx_packets," - "tx_goodput_mbps,tx_tput_mbps,tx_pkt_rate_kpps,tx_bytes,tx_packets"; + << "rx_rawput_mbps,rx_tput_mbps,rx_pkt_rate_kpps,rx_bytes,rx_packets," + "tx_rawput_mbps,tx_tput_mbps,tx_pkt_rate_kpps,tx_bytes,tx_packets"; if (parsed_args.enable_rtt) { save_file << ",mean_rtt_ns"; } @@ -991,15 +1080,15 @@ int main(int argc, char** argv) { double interval_s = (double) parsed_args.stats_delay / ONE_THOUSAND; uint64_t rx_pkt_diff = rx_pkts - last_rx_pkts; - uint64_t rx_goodput_mbps = + uint64_t rx_rawput_mbps = (rx_bytes - last_rx_bytes) * 8. / (ONE_MILLION * interval_s); uint64_t rx_pkt_rate = (rx_pkt_diff / interval_s); uint64_t rx_pkt_rate_kpps = rx_pkt_rate / ONE_THOUSAND; - uint64_t rx_tput_mbps = rx_goodput_mbps + FPGA_PACKET_OVERHEAD + uint64_t rx_tput_mbps = rx_rawput_mbps + FPGA_PACKET_OVERHEAD * 8 * rx_pkt_rate / ONE_MILLION; uint64_t tx_pkt_diff = tx_pkts - last_tx_pkts; - uint64_t tx_goodput_mbps = + uint64_t tx_rawput_mbps = (tx_bytes - last_tx_bytes) * 8. / (ONE_MILLION * interval_s); uint64_t tx_tput_mbps = (tx_bytes - last_tx_bytes + tx_pkt_diff * FPGA_PACKET_OVERHEAD) * 8. @@ -1011,7 +1100,8 @@ int main(int argc, char** argv) { uint64_t rtt_ns; if (rx_pkt_diff != 0) { rtt_ns = (rtt_sum_ns - last_aggregated_rtt_ns) / rx_pkt_diff; - } else { + } + else { rtt_ns = 0; } @@ -1034,9 +1124,9 @@ int main(int argc, char** argv) { if (parsed_args.save) { std::ofstream save_file; save_file.open(parsed_args.save_file, std::ios_base::app); - save_file << rx_goodput_mbps << "," << rx_tput_mbps << "," + save_file << rx_rawput_mbps << "," << rx_tput_mbps << "," << rx_pkt_rate_kpps << "," << rx_bytes << "," << rx_pkts << "," - << tx_goodput_mbps << "," << tx_pkt_rate_kpps << "," + << tx_rawput_mbps << "," << tx_pkt_rate_kpps << "," << tx_tput_mbps << "," << tx_bytes << "," << tx_pkts; if (parsed_args.enable_rtt) { save_file << "," << rtt_ns; @@ -1058,7 +1148,7 @@ int main(int argc, char** argv) { std::ofstream hist_file; hist_file.open(parsed_args.hist_file); - for (uint32_t rtt = 0; rtt < parsed_args.rtt_hist_len; ++rtt) { + for (uint32_t rtt = 0; rtt < parsed_args.rtt_hist_len; rtt++) { if (rx_stats.rtt_hist[rtt] != 0) { uint32_t corrected_rtt = (rtt + parsed_args.rtt_hist_offset) * enso::kNsPerTimestampCycle; @@ -1090,7 +1180,6 @@ int main(int argc, char** argv) { thread.join(); } - free(pkt_buf); dev.reset(); return ret; } diff --git a/software/src/enso/helpers.cpp b/software/src/enso/helpers.cpp index ea57d954..9a90492e 100644 --- a/software/src/enso/helpers.cpp +++ b/software/src/enso/helpers.cpp @@ -43,6 +43,18 @@ #include #include #include +#include + +/****************************************************************************** + * Macros + *****************************************************************************/ +// Scientific notation for 10^6, treated as double. Used for stats calculations. +#define ONE_MILLION 1e6 +// FPGA packet overhead for 64 byte packets +#define FPGA_PACKET_OVERHEAD_64 20 +// FPGA packet overhead for 1536 byte packets +#define FPGA_PACKET_OVERHEAD_1536 2 + namespace enso { uint16_t get_bdf_from_pcie_addr(const std::string& pcie_addr) { @@ -143,8 +155,20 @@ int set_core_id(std::thread& thread, int core_id) { static void print_stats_line(uint64_t recv_bytes, uint64_t nb_batches, uint64_t nb_pkts, uint64_t delta_bytes, uint64_t delta_pkts, uint64_t delta_batches) { - std::cout << std::dec << (delta_bytes + delta_pkts * 20) * 8. / 1e6 - << " Mbps " << delta_pkts / 1e6 << " Mpps " << recv_bytes + uint64_t rx_tput_mbps = (delta_bytes * 8.) / ONE_MILLION; + if(rx_tput_mbps > 0) { + uint32_t packet_size = round((long double) delta_bytes / delta_pkts); + if(packet_size == 64) { + rx_tput_mbps = rx_tput_mbps + + (FPGA_PACKET_OVERHEAD_64 * delta_pkts * 8) / ONE_MILLION; + } + else if(packet_size == 1536) { + rx_tput_mbps = rx_tput_mbps + + (FPGA_PACKET_OVERHEAD_1536 * delta_pkts * 8) / ONE_MILLION; + } + } + std::cout << std::dec << rx_tput_mbps + << " Mbps " << delta_pkts / ONE_MILLION << " Mpps " << recv_bytes << " B " << nb_batches << " batches " << nb_pkts << " pkts"; if (delta_batches > 0) { From db28d7d61564dc17fc4cb868304fd5c73ae53faa Mon Sep 17 00:00:00 2001 From: Kshitij Rana Date: Fri, 12 Jan 2024 16:45:54 -0500 Subject: [PATCH 06/11] Load bitstream enhancement --- scripts/load_bitstream.sh | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/scripts/load_bitstream.sh b/scripts/load_bitstream.sh index 353abe12..781238b2 100755 --- a/scripts/load_bitstream.sh +++ b/scripts/load_bitstream.sh @@ -8,8 +8,22 @@ DEVICE_ID="0000" FPGA_NB=${1:-"1-13"} +BITSTREAM_NAME="enso.sof" + cd $SCRIPT_DIR +# Check and download the bitstream +if ! [ -f $PWD/$BITSTREAM_NAME ]; then + $PWD/update_bitstream.sh --download + if [ $? -eq 0 ]; then + echo "Programming bitstream now..." + else + echo "Failed: Could not download bitstream!" + exit 1 + fi +fi + + # We use taskset and chrt to benefit from multiple cores even when they are # isolated from the linux scheduler. This significantly speeds up loading the # bitstream. Note that we use all but the last core. From d3e7c88a7a046a48652c4c6ea758f1aeb32e7ada Mon Sep 17 00:00:00 2001 From: Kshitij Rana Date: Tue, 16 Jan 2024 15:24:06 -0500 Subject: [PATCH 07/11] Cleaned up socket.cpp --- software/src/enso/meson.build | 1 - software/src/enso/socket.cpp | 254 ---------------------------------- 2 files changed, 255 deletions(-) delete mode 100644 software/src/enso/socket.cpp diff --git a/software/src/enso/meson.build b/software/src/enso/meson.build index 0f86c06d..c644f842 100644 --- a/software/src/enso/meson.build +++ b/software/src/enso/meson.build @@ -4,7 +4,6 @@ enso_sources = files( 'helpers.cpp', 'ixy_helpers.cpp', 'pipe.cpp', - 'socket.cpp', ) project_sources += enso_sources diff --git a/software/src/enso/socket.cpp b/software/src/enso/socket.cpp deleted file mode 100644 index 7d34f09e..00000000 --- a/software/src/enso/socket.cpp +++ /dev/null @@ -1,254 +0,0 @@ -/* - * Copyright (c) 2022, Carnegie Mellon University - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted (subject to the limitations in the disclaimer - * below) provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * * Neither the name of the copyright holder nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY - * THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND - * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT - * NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; - * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR - * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF - * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/** - * @file - * @brief Socket-like API. - * @deprecated Use the API defined in `pipe.h` instead. - * - * @author Hugo Sadok - */ - -#include "enso/socket.h" - -#include -#include - -#include -#include - -#include "../pcie.h" - -namespace enso { - -static struct NotificationBufPair notification_buf_pair[MAX_NB_CORES]; - -// TODO(sadok) replace with hash table? -static struct SocketInternal open_sockets[MAX_NB_SOCKETS]; -static unsigned int nb_open_sockets = 0; -static uint16_t bdf = 0; - -// HACK(sadok): We need a better way to specify the BDF. -void set_bdf(uint16_t bdf_) { bdf = bdf_; } - -int socket([[maybe_unused]] int domain, [[maybe_unused]] int type, - [[maybe_unused]] int protocol, bool fallback) noexcept { - if (unlikely(nb_open_sockets >= MAX_NB_SOCKETS)) { - std::cerr << "Maximum number of sockets reached" << std::endl; - return -1; - } - - struct SocketInternal socket_entry; - - struct NotificationBufPair* nb_pair = ¬ification_buf_pair[sched_getcpu()]; - socket_entry.notification_buf_pair = nb_pair; - - struct RxEnsoPipeInternal* enso_pipe = &socket_entry.enso_pipe; - - int bar = -1; - int socket_id = dma_init(nb_pair, enso_pipe, bdf, bar, - std::string(kHugePageDefaultPrefix), fallback); - if (unlikely(socket_id < 0)) { - std::cerr << "Problem initializing DMA" << std::endl; - return -1; - } - - open_sockets[socket_id] = socket_entry; - - // FIXME(sadok): Use __sync_fetch_and_add to update atomically. - ++nb_open_sockets; - - return socket_id; -} - -int bind(int sockfd, const struct sockaddr* addr, socklen_t addrlen) noexcept { - (void)addrlen; // Avoid unused warnings. - struct SocketInternal* socket = &open_sockets[sockfd]; - sockaddr_in* addr_in = (sockaddr_in*)addr; - - uint32_t enso_pipe_id = get_enso_pipe_id_from_socket(socket); - - // TODO(sadok): insert flow entry from kernel. - insert_flow_entry(socket->notification_buf_pair, ntohs(addr_in->sin_port), 0, - ntohl(addr_in->sin_addr.s_addr), 0, - 0x11, // TODO(sadok): support protocols other than UDP. - enso_pipe_id); - - return 0; -} - -/* - * Return physical address of the buffer associated with the socket. - */ -uint64_t get_socket_phys_addr(int sockfd) { - return open_sockets[sockfd].enso_pipe.buf_phys_addr; -} - -/* - * Return virtual address of the buffer associated with the socket. - */ -void* get_socket_virt_addr(int sockfd) { - return (void*)open_sockets[sockfd].enso_pipe.buf; -} - -/* - * Convert a socket buffer virtual address to physical address. - */ -uint64_t convert_buf_addr_to_phys(int sockfd, void* addr) { - return (uint64_t)addr + open_sockets[sockfd].enso_pipe.phys_buf_offset; -} - -ssize_t recv(int sockfd, void* buf, size_t len, int flags) { - (void)len; - (void)flags; - - void* ring_buf; - struct SocketInternal* socket = &open_sockets[sockfd]; - struct RxEnsoPipeInternal* enso_pipe = &socket->enso_pipe; - struct NotificationBufPair* notification_buf_pair = - socket->notification_buf_pair; - - get_new_tails(notification_buf_pair); - - ssize_t bytes_received = - get_next_batch_from_queue(enso_pipe, notification_buf_pair, &ring_buf); - - if (unlikely(bytes_received <= 0)) { - return bytes_received; - } - - memcpy(buf, ring_buf, bytes_received); - - advance_pipe(enso_pipe, bytes_received); - - return bytes_received; -} - -ssize_t recv_zc(int sockfd, void** buf, size_t len, int flags) { - (void)len; - (void)flags; - - struct SocketInternal* socket = &open_sockets[sockfd]; - struct RxEnsoPipeInternal* enso_pipe = &socket->enso_pipe; - struct NotificationBufPair* notification_buf_pair = - socket->notification_buf_pair; - - get_new_tails(notification_buf_pair); - - return get_next_batch_from_queue(enso_pipe, notification_buf_pair, buf); -} - -ssize_t recv_select(int ref_sockfd, int* sockfd, void** buf, size_t len, - int flags) { - (void)len; - (void)flags; - - struct NotificationBufPair* notification_buf_pair = - open_sockets[ref_sockfd].notification_buf_pair; - return get_next_batch(notification_buf_pair, open_sockets, sockfd, buf); -} - -ssize_t send(int sockfd, uint64_t phys_addr, size_t len, int flags) { - (void)flags; - return send_to_queue(open_sockets[sockfd].notification_buf_pair, phys_addr, - len); -} - -uint32_t get_completions(int ref_sockfd) { - struct NotificationBufPair* notification_buf_pair = - open_sockets[ref_sockfd].notification_buf_pair; - return get_unreported_completions(notification_buf_pair); -} - -void free_enso_pipe(int sockfd, size_t len) { - advance_pipe(&(open_sockets[sockfd].enso_pipe), len); -} - -int enable_device_timestamp(int ref_sockfd) { - if (nb_open_sockets == 0) { - return -2; - } - return enable_timestamp(open_sockets[ref_sockfd].notification_buf_pair); -} - -int disable_device_timestamp(int ref_sockfd) { - if (nb_open_sockets == 0) { - return -2; - } - return disable_timestamp(open_sockets[ref_sockfd].notification_buf_pair); -} - -int enable_device_rate_limit(int ref_sockfd, uint16_t num, uint16_t den) { - if (nb_open_sockets == 0) { - return -2; - } - return enable_rate_limit(open_sockets[ref_sockfd].notification_buf_pair, num, - den); -} - -int disable_device_rate_limit(int ref_sockfd) { - if (nb_open_sockets == 0) { - return -2; - } - return disable_rate_limit(open_sockets[ref_sockfd].notification_buf_pair); -} - -int enable_device_round_robin(int ref_sockfd) { - if (nb_open_sockets == 0) { - return -2; - } - return enable_round_robin(open_sockets[ref_sockfd].notification_buf_pair); -} - -int disable_device_round_robin(int ref_sockfd) { - if (nb_open_sockets == 0) { - return -2; - } - return disable_round_robin(open_sockets[ref_sockfd].notification_buf_pair); -} - -int shutdown(int sockfd, int how __attribute__((unused))) noexcept { - dma_finish(&open_sockets[sockfd]); - - // TODO(sadok): Remove entry from the NIC flow table. - - --nb_open_sockets; - - return 0; -} - -void print_sock_stats(int sockfd) { - struct SocketInternal* socket = &open_sockets[sockfd]; - print_stats(socket, socket->enso_pipe.id == 0); -} - -} // namespace enso From 42b9588da2b4a65088e745e867882f44ce1da55b Mon Sep 17 00:00:00 2001 From: Kshitij Rana Date: Mon, 29 Jan 2024 09:01:30 -0500 Subject: [PATCH 08/11] Updated documentation --- software/examples/ensogen.cpp | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/software/examples/ensogen.cpp b/software/examples/ensogen.cpp index 63d4ec12..a1cd7290 100644 --- a/software/examples/ensogen.cpp +++ b/software/examples/ensogen.cpp @@ -533,7 +533,7 @@ static int parse_args(int argc, char** argv, /* * @brief: libpcap callback registered by the main function. Called for each - * packet present in the PCAP file by libpcap. + * packet present in the PCAP file. * * @param user: Structure allocated in main to read and store relevant information. * @param pkt_hdr: Contains packet metadata like timestamp, length, etc. (UNUSED) @@ -636,12 +636,9 @@ inline uint64_t receive_pkts(const struct RxArgs& rx_args, } /* - * @brief: This function is called to send packets. Note that the approach we - * use here to send packets is different from the one defined in Enso's library - * using the TxPipe abstraction. This approach dissociates the sending part - * (creating TX notifications) from processing the completions (which TX notif- - * ications have been consumed by the NIC). It needed to be done this way to meet - * the performance requirements (full 100 G) for single core. + * @brief: This function is called periodically to send packets and update + * the TX stats. In case too many transmissions are already pending it will + * wait for the NIC to process them before sending another batch. * * @param tx_args: Arguments needed by this function. See TxArgs definition. * @param tx_stats: Tx stats that need to be updated in every iteration. From 17821198f6cc74714cb366de227c1a11e21c7c13 Mon Sep 17 00:00:00 2001 From: Kshitij Rana <125673622+glass-hash@users.noreply.github.com> Date: Mon, 29 Jan 2024 14:44:57 -0500 Subject: [PATCH 09/11] Update latency opt for eval tests --- frontend/enso/enso_nic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/enso/enso_nic.py b/frontend/enso/enso_nic.py index 7d892533..c0865fc6 100644 --- a/frontend/enso/enso_nic.py +++ b/frontend/enso/enso_nic.py @@ -52,7 +52,7 @@ def __init__( tx_credits: int = DEFAULT_NB_TX_CREDITS, ethernet_port: int = DEFAULT_ETH_PORT, desc_per_pkt: bool = False, - latency_opt: bool = False, + latency_opt: bool = True, skip_config: bool = False, verbose: bool = False, log_file: Union[bool, TextIO] = False, From 199540694e58ce02a8eb833cd388229b9f641c00 Mon Sep 17 00:00:00 2001 From: Kshitij Rana <125673622+glass-hash@users.noreply.github.com> Date: Thu, 8 Feb 2024 08:52:12 -0500 Subject: [PATCH 10/11] Updated latency_opt in setup.sh --- setup.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.sh b/setup.sh index 6623ced4..42f45a60 100755 --- a/setup.sh +++ b/setup.sh @@ -63,7 +63,7 @@ else fi # Setup the software. -./scripts/sw_setup.sh 16384 32768 false +./scripts/sw_setup.sh 16384 32768 true return_code=$? if [ $return_code -ne 0 ]; then From 1cd2dd6bf3790a1396cf0060444417dfecd558a2 Mon Sep 17 00:00:00 2001 From: Kshitij Rana Date: Mon, 29 Apr 2024 18:54:46 -0400 Subject: [PATCH 11/11] Incorporate review comments --- .pre-commit-config.yaml | 1 + software/examples/capture.cpp | 1 + software/examples/echo.cpp | 1 + software/examples/echo_copy.cpp | 1 + software/examples/echo_event.cpp | 1 + software/examples/echo_prefetch.cpp | 1 + software/examples/ensogen.cpp | 317 +++++++++++++--------------- software/examples/l2_forward.cpp | 1 + software/include/enso/meson.build | 3 +- software/include/enso/pipe.h | 28 +-- software/include/enso/socket.h | 143 ------------- software/src/enso/helpers.cpp | 28 +-- software/src/enso/pipe.cpp | 4 +- 13 files changed, 171 insertions(+), 359 deletions(-) delete mode 100644 software/include/enso/socket.h diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 55d07fe3..b4b604c5 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -18,6 +18,7 @@ repos: args: ["-style=Google"] - id: cpplint # linter (or style-error checker) for Google C++ Style Guide - id: cppcheck # static analyzer of C/C++ code + args: ["--check-level=exhaustive"] - repo: https://github.com/charliermarsh/ruff-pre-commit rev: 'v0.0.255' # Ruff version. hooks: diff --git a/software/examples/capture.cpp b/software/examples/capture.cpp index cd4c1ddf..8208159f 100644 --- a/software/examples/capture.cpp +++ b/software/examples/capture.cpp @@ -159,6 +159,7 @@ int main(int argc, const char* argv[]) { while (!setup_done) continue; // Wait for setup to be done. + std::cout << "The bandwidth statistics are approximated." << std::endl; show_stats(thread_stats, &keep_running); socket_thread.join(); diff --git a/software/examples/echo.cpp b/software/examples/echo.cpp index 7d4cd291..fa2b8517 100644 --- a/software/examples/echo.cpp +++ b/software/examples/echo.cpp @@ -140,6 +140,7 @@ int main(int argc, const char* argv[]) { while (!setup_done) continue; // Wait for setup to be done. + std::cout << "The bandwidth statistics are approximated." << std::endl; show_stats(thread_stats, &keep_running); for (auto& thread : threads) { diff --git a/software/examples/echo_copy.cpp b/software/examples/echo_copy.cpp index 1ef2d610..d9e28714 100644 --- a/software/examples/echo_copy.cpp +++ b/software/examples/echo_copy.cpp @@ -157,6 +157,7 @@ int main(int argc, const char* argv[]) { while (!setup_done) continue; // Wait for setup to be done. + std::cout << "The bandwidth statistics are approximated." << std::endl; show_stats(thread_stats, &keep_running); for (auto& thread : threads) { diff --git a/software/examples/echo_event.cpp b/software/examples/echo_event.cpp index 859df939..b9ff50cf 100644 --- a/software/examples/echo_event.cpp +++ b/software/examples/echo_event.cpp @@ -140,6 +140,7 @@ int main(int argc, const char* argv[]) { while (!setup_done) continue; // Wait for setup to be done. + std::cout << "The bandwidth statistics are approximated." << std::endl; show_stats(thread_stats, &keep_running); for (auto& thread : threads) { diff --git a/software/examples/echo_prefetch.cpp b/software/examples/echo_prefetch.cpp index 4eabc442..3e7dfa4e 100644 --- a/software/examples/echo_prefetch.cpp +++ b/software/examples/echo_prefetch.cpp @@ -151,6 +151,7 @@ int main(int argc, const char* argv[]) { while (!setup_done) continue; // Wait for setup to be done. + std::cout << "The bandwidth statistics are approximated." << std::endl; show_stats(thread_stats, &keep_running); for (auto& thread : threads) { diff --git a/software/examples/ensogen.cpp b/software/examples/ensogen.cpp index a1cd7290..971f8952 100644 --- a/software/examples/ensogen.cpp +++ b/software/examples/ensogen.cpp @@ -29,22 +29,22 @@ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -/* + +/** * @file: ensogen.cpp * - * @brief: Packet generator program that uses the Enso library to send and - * receive packets. It uses libpcap to read and process packets from a pcap file. + * @brief Packet generator program that uses the Enso library to send and + * receive packets. It uses libpcap to read and process packets from a pcap + * file. * * Example: * * sudo ./scripts/ensogen.sh ./scripts/sample_pcaps/2_64_1_2.pcap 100 \ * --pcie-addr 65:00.0 - * - * */ + */ #include #include -#include #include #include #include @@ -76,19 +76,19 @@ *****************************************************************************/ // Number of loop iterations to wait before probing the TX notification buffer // again when reclaiming buffer space. -#define TX_RECLAIM_DELAY 1024 +#define TX_RECLAIM_DELAY 1024 // Scientific notation for 10^6, treated as double. Used for stats calculations. -#define ONE_MILLION 1e6 +#define ONE_MILLION 1e6 // Scientific notation for 10^3, treated as double. Used for stats calculations. -#define ONE_THOUSAND 1e3 +#define ONE_THOUSAND 1e3 // Ethernet's per packet overhead added by the FPGA (in bytes). -#define FPGA_PACKET_OVERHEAD 24 +#define FPGA_PACKET_OVERHEAD 24 // Minimum size of a packet aligned to cache (in bytes). -#define MIN_PACKET_ALIGNED_SIZE 64 +#define MIN_PACKET_ALIGNED_SIZE 64 // If defined, ignore received packets. // #define IGNORE_RX @@ -156,10 +156,9 @@ using enso::TxPipe; /****************************************************************************** * Structure Definitions *****************************************************************************/ -/* - * @brief: Structure to store the command line arguments. - * - * */ +/** + * @brief Structure to store the command line arguments. + */ struct parsed_args_t { int core_id; uint32_t nb_queues; @@ -179,17 +178,15 @@ struct parsed_args_t { std::string pcie_addr; }; -/* - * @brief: Structure to store an Enso TxPipe object and attributes related +/** + * @brief Structure to store an Enso TxPipe object and attributes related * to it. - * - * */ + */ struct EnsoTxPipe { - EnsoTxPipe(TxPipe *pipe) - : tx_pipe(pipe), nb_aligned_bytes(0), nb_raw_bytes(0), - nb_pkts(0) {} + explicit EnsoTxPipe(TxPipe* pipe) + : tx_pipe(pipe), nb_aligned_bytes(0), nb_raw_bytes(0), nb_pkts(0) {} // Enso TxPipe - TxPipe *tx_pipe; + TxPipe* tx_pipe; // Number of cache aligned bytes in the pipe uint32_t nb_aligned_bytes; // Number of raw bytes in the pipe @@ -198,32 +195,28 @@ struct EnsoTxPipe { uint32_t nb_pkts; }; - -/* - * @brief: Structure to store variables needed for processing the PCAP +/** + * @brief Structure to store variables needed for processing the PCAP * file and are passed to the callback function. - * - * */ + */ struct PcapHandlerContext { - PcapHandlerContext(std::unique_ptr &dev_, pcap_t* pcap_) : - dev(dev_), buf(NULL), free_flits_cur_pipe(0), - pcap(pcap_) {} + PcapHandlerContext(std::unique_ptr& dev_, pcap_t* pcap_) + : dev(dev_), buf(NULL), free_flits_cur_pipe(0), pcap(pcap_) {} // Pointer to Enso device - std::unique_ptr &dev; + std::unique_ptr& dev; // Pipes to store the packets from the PCAP file std::vector tx_pipes; // Pointer to the buffer of the current pipe - uint8_t *buf; + uint8_t* buf; // Total number of free flits in the current pipe uint32_t free_flits_cur_pipe; // libpcap object associated with the opened PCAP file pcap_t* pcap; }; -/* - * @brief: Structure to store the Rx related stats. - * - * */ +/** + * @brief Structure to store the Rx related stats. + */ struct RxStats { explicit RxStats(uint32_t rtt_hist_len = 0, uint32_t rtt_hist_offset = 0) : pkts(0), @@ -253,8 +246,7 @@ struct RxStats { if (unlikely((rtt >= (rtt_hist_len - rtt_hist_offset)) || (rtt < rtt_hist_offset))) { backup_rtt_hist[rtt]++; - } - else { + } else { rtt_hist[rtt - rtt_hist_offset]++; } } @@ -273,28 +265,24 @@ struct RxStats { std::unordered_map backup_rtt_hist; }; -/* - * @brief: Structure to store the variables needed by the receive_pkts +/** + * @brief Structure to store the variables needed by the receive_pkts * function. - * - * */ + */ struct RxArgs { - RxArgs(bool enbl_rtt, bool enbl_rtt_hist, std::unique_ptr &dev_) : - enable_rtt(enbl_rtt), - enable_rtt_history(enbl_rtt_hist), - dev(dev_) {} + RxArgs(bool enbl_rtt, bool enbl_rtt_hist, std::unique_ptr& dev_) + : enable_rtt(enbl_rtt), enable_rtt_history(enbl_rtt_hist), dev(dev_) {} // Check for whether RTT needs to be calculated bool enable_rtt; // Check for whether RTT history needs to be calculated bool enable_rtt_history; // Pointer to the Enso device - std::unique_ptr &dev; + std::unique_ptr& dev; }; -/* - * @brief: Structure to store the Tx related stats. - * - * */ +/** + * @brief Structure to store the Tx related stats. + */ struct TxStats { TxStats() : pkts(0), bytes(0) {} // Number of packets received @@ -303,26 +291,25 @@ struct TxStats { uint64_t bytes; }; -/* - * @brief: Structure to store the arguments needed by the transmit_pkts +/** + * @brief Structure to store the arguments needed by the transmit_pkts * function. - * - * */ + */ struct TxArgs { - TxArgs(std::vector &pipes, uint64_t total_aligned_bytes, + TxArgs(std::vector& pipes, uint64_t total_aligned_bytes, uint64_t total_raw_bytes, uint64_t pkts_in_last_pipe, - uint32_t pipes_size, std::unique_ptr &dev_) - : tx_pipes(pipes), - total_remaining_aligned_bytes(total_aligned_bytes), - total_remaining_raw_bytes(total_raw_bytes), - nb_pkts_in_last_pipe(pkts_in_last_pipe), - cur_ind(0), - total_pipes(pipes_size), - transmissions_pending(0), - ignored_reclaims(0), - dev(dev_) {} + uint32_t pipes_size, std::unique_ptr& dev_) + : tx_pipes(pipes), + total_remaining_aligned_bytes(total_aligned_bytes), + total_remaining_raw_bytes(total_raw_bytes), + nb_pkts_in_last_pipe(pkts_in_last_pipe), + cur_ind(0), + total_pipes(pipes_size), + transmissions_pending(0), + ignored_reclaims(0), + dev(dev_) {} // TxPipes handled by the thread - std::vector &tx_pipes; + std::vector& tx_pipes; // Number of aligned bytes that need to be sent uint64_t total_remaining_aligned_bytes; // Number of raw bytes that need to be sent @@ -339,16 +326,15 @@ struct TxArgs { // consumption by the NIC uint32_t ignored_reclaims; // Pointer to the Enso device object - std::unique_ptr &dev; + std::unique_ptr& dev; }; /****************************************************************************** * Function Definitions *****************************************************************************/ -/* - * @brief: Signal handler for SIGINT (Ctrl+C). - * - * */ +/** + * @brief Signal handler for SIGINT (Ctrl+C). + */ void int_handler(int signal __attribute__((unused))) { if (!keep_running) { // user interrupted the second time, we force stop @@ -358,10 +344,9 @@ void int_handler(int signal __attribute__((unused))) { keep_running = 0; } -/* - * @brief: Prints the help message on stdout. - * - * */ +/** + * @brief Prints the help message on stdout. + */ static void print_usage(const char* program_name) { printf( "%s PCAP_FILE RATE_NUM RATE_DEN\n" @@ -403,10 +388,9 @@ static void print_usage(const char* program_name) { DEFAULT_HIST_LEN, DEFAULT_STATS_DELAY); } -/* +/** * Command line options related. Used in parse_args function. - * - * */ + */ enum { CMD_OPT_HELP_NUM = 256, CMD_OPT_COUNT_NUM, @@ -437,18 +421,18 @@ static const struct option long_options[] = { {CMD_OPT_RTT_HIST_LEN, required_argument, NULL, CMD_OPT_RTT_HIST_LEN_NUM}, {CMD_OPT_STATS_DELAY, required_argument, NULL, CMD_OPT_STATS_DELAY_NUM}, {CMD_OPT_PCIE_ADDR, required_argument, NULL, CMD_OPT_PCIE_ADDR_NUM}, - {0, 0, 0, 0} -}; + {0, 0, 0, 0}}; -/* - * @brief: Parses the command line arguments. Called from the main function. +/** + * @brief Parses the command line arguments. Called from the main function. * - * @param argc: Number of arguments entered by the user. - * @param argv: Value of the arguments entered by the user. - * @param parsed_args: Structure filled by this function after parsing the + * @param argc Number of arguments entered by the user. + * @param argv Value of the arguments entered by the user. + * @param parsed_args Structure filled by this function after parsing the * arguments and used in main(). * - * */ + * @return 0 on success. -1 on failure. 1 for help message. + */ static int parse_args(int argc, char** argv, struct parsed_args_t& parsed_args) { int opt; @@ -531,24 +515,24 @@ static int parse_args(int argc, char** argv, return 0; } -/* - * @brief: libpcap callback registered by the main function. Called for each +/** + * @brief libpcap callback registered by the main function. Called for each * packet present in the PCAP file. * - * @param user: Structure allocated in main to read and store relevant information. - * @param pkt_hdr: Contains packet metadata like timestamp, length, etc. (UNUSED) - * @param pkt_bytes: Packet data to be copied into a buffer. - * - * */ + * @param user Structure allocated in main to read and store relevant + * information. + * @param pkt_hdr Contains packet metadata like timestamp, length, etc. + * (UNUSED) + * @param pkt_bytes Packet data to be copied into a buffer. + */ void pcap_pkt_handler(u_char* user, const struct pcap_pkthdr* pkt_hdr, const u_char* pkt_bytes) { - (void) pkt_hdr; + (void)pkt_hdr; struct PcapHandlerContext* context = (struct PcapHandlerContext*)user; const struct ether_header* l2_hdr = (struct ether_header*)pkt_bytes; if (l2_hdr->ether_type != htons(ETHERTYPE_IP)) { std::cerr << "Non-IPv4 packets are not supported" << std::endl; - free(context->buf); exit(8); } @@ -582,24 +566,25 @@ void pcap_pkt_handler(u_char* user, const struct pcap_pkthdr* pkt_hdr, context->free_flits_cur_pipe -= nb_flits; } -/* - * @brief: This function is used to receive packets. The approach used in this +/** + * @brief This function is used to receive packets. The approach used in this * function is slightly different from the one described in Enso's library for - * the RxPipe abstraction (Allocate->Bind->Recv->Clear). We use the NextRxPipeToRecv - * abstraction to take advantage of notification prefetching and use fallback - * queues. + * the RxPipe abstraction (Allocate->Bind->Recv->Clear). We use the + * NextRxPipeToRecv abstraction to take advantage of notification prefetching + * and use fallback queues. * - * @param rx_args: Arguments needed by this function. See RxArgs definition. - * @param rx_stats: Rx stats that need to be updated in every iteration. + * @param rx_args Arguments needed by this function. See RxArgs definition. + * @param rx_stats Rx stats that need to be updated in every iteration. * - * */ + * @return Number of packets received. + */ inline uint64_t receive_pkts(const struct RxArgs& rx_args, struct RxStats& rx_stats) { uint64_t nb_pkts = 0; #ifdef IGNORE_RX (void)rx_args; (void)rx_stats; -#else // IGNORE_RX +#else // IGNORE_RX RxPipe* rx_pipe = rx_args.dev->NextRxPipeToRecv(); if (unlikely(rx_pipe == nullptr)) { return 0; @@ -635,29 +620,28 @@ inline uint64_t receive_pkts(const struct RxArgs& rx_args, return nb_pkts; } -/* - * @brief: This function is called periodically to send packets and update +/** + * @brief This function is called periodically to send packets and update * the TX stats. In case too many transmissions are already pending it will * wait for the NIC to process them before sending another batch. * - * @param tx_args: Arguments needed by this function. See TxArgs definition. - * @param tx_stats: Tx stats that need to be updated in every iteration. - * - * */ -inline void transmit_pkts(struct TxArgs& tx_args, - struct TxStats& tx_stats) { + * @param tx_args Arguments needed by this function. See TxArgs definition. + * @param tx_stats Tx stats that need to be updated in every iteration. + */ +inline void transmit_pkts(struct TxArgs& tx_args, struct TxStats& tx_stats) { // Avoid transmitting new data when too many TX notifications are pending - const uint32_t buf_fill_thresh = enso::kNotificationBufSize - - TRANSFERS_PER_BUFFER - 1; + const uint32_t buf_fill_thresh = + enso::kNotificationBufSize - TRANSFERS_PER_BUFFER - 1; if (likely(tx_args.transmissions_pending < buf_fill_thresh)) { - struct EnsoTxPipe &cur_pipe = tx_args.tx_pipes[tx_args.cur_ind]; - uint32_t transmission_length = std::min(tx_args.total_remaining_aligned_bytes, - (uint64_t) cur_pipe.nb_aligned_bytes); - uint32_t transmission_raw_length = std::min(tx_args.total_remaining_raw_bytes, - (uint64_t) cur_pipe.nb_raw_bytes); + struct EnsoTxPipe& cur_pipe = tx_args.tx_pipes[tx_args.cur_ind]; + uint32_t transmission_length = + std::min(tx_args.total_remaining_aligned_bytes, + (uint64_t)cur_pipe.nb_aligned_bytes); + uint32_t transmission_raw_length = std::min( + tx_args.total_remaining_raw_bytes, (uint64_t)cur_pipe.nb_raw_bytes); uint64_t buf_phys_addr = cur_pipe.tx_pipe->GetBufPhysAddr(); - tx_args.dev->SendOnly(buf_phys_addr, transmission_length); + tx_args.dev->SendBatch(buf_phys_addr, transmission_length); tx_args.transmissions_pending++; tx_args.total_remaining_aligned_bytes -= transmission_length; tx_args.total_remaining_raw_bytes -= transmission_raw_length; @@ -665,7 +649,7 @@ inline void transmit_pkts(struct TxArgs& tx_args, // update the stats // the stats need be calculated based on raw bytes tx_stats.bytes += transmission_raw_length; - if(tx_args.total_remaining_aligned_bytes == 0) { + if (tx_args.total_remaining_aligned_bytes == 0) { keep_running = 0; tx_stats.pkts += tx_args.nb_pkts_in_last_pipe; return; @@ -680,30 +664,27 @@ inline void transmit_pkts(struct TxArgs& tx_args, if ((tx_args.transmissions_pending > (enso::kNotificationBufSize / 4))) { if (tx_args.ignored_reclaims > TX_RECLAIM_DELAY) { tx_args.ignored_reclaims = 0; - uint32_t num_processed = tx_args.dev->ProcessCompletionsOnly(); - if(num_processed > tx_args.transmissions_pending) { + uint32_t num_processed = tx_args.dev->ConsumeBatches(); + if (num_processed > tx_args.transmissions_pending) { tx_args.transmissions_pending = 0; - } - else { + } else { tx_args.transmissions_pending -= num_processed; } - } - else { + } else { tx_args.ignored_reclaims++; } } } -/* - * @brief: Waits until the NIC has consumed all the Tx notifications. - * - * @param tx_args: Arguments needed by this function. See TxArgs definition. +/** + * @brief Waits until the NIC has consumed all the Tx notifications. * - * */ + * @param tx_args Arguments needed by this function. See TxArgs definition. + */ inline void reclaim_all_buffers(struct TxArgs& tx_args) { while (tx_args.transmissions_pending > 0) { - uint32_t num_processed = tx_args.dev->ProcessCompletionsOnly(); - if(num_processed > tx_args.transmissions_pending) { + uint32_t num_processed = tx_args.dev->ConsumeBatches(); + if (num_processed > tx_args.transmissions_pending) { tx_args.transmissions_pending = 0; break; } @@ -738,7 +719,7 @@ int main(int argc, char** argv) { struct PcapHandlerContext context(dev, pcap); - std::vector &tx_pipes = context.tx_pipes; + std::vector& tx_pipes = context.tx_pipes; // Initialize pipes with packets read from pcap file. if (pcap_loop(pcap, 0, pcap_pkt_handler, (u_char*)&context) < 0) { @@ -754,7 +735,7 @@ int main(int argc, char** argv) { if ((tx_pipes.size() == 1) && (tx_pipes.front().nb_aligned_bytes < BUFFER_SIZE / 2)) { struct EnsoTxPipe& tx_pipe = tx_pipes.front(); - uint8_t *pipe_buf = tx_pipe.tx_pipe->buf(); + uint8_t* pipe_buf = tx_pipe.tx_pipe->buf(); uint32_t cur_buf_length = tx_pipe.nb_aligned_bytes; uint32_t original_buf_length = cur_buf_length; uint32_t original_nb_pkts = tx_pipe.nb_pkts; @@ -815,8 +796,7 @@ int main(int argc, char** argv) { total_aligned_bytes_to_send += pipe.nb_aligned_bytes; nb_pkts_remaining -= pipe.nb_pkts; } - } - else { + } else { // Treat nb_pkts == 0 as unbounded. The following value should be enough // to send 64-byte packets for around 400 years using Tb Ethernet. total_aligned_bytes_to_send = 0xffffffffffffffff; @@ -861,13 +841,11 @@ int main(int argc, char** argv) { if (parsed_args.enable_rtt) { dev->EnableTimeStamping(); - } - else { + } else { dev->DisableTimeStamping(); } - RxArgs rx_args(parsed_args.enable_rtt, - parsed_args.enable_rtt_history, + RxArgs rx_args(parsed_args.enable_rtt, parsed_args.enable_rtt_history, dev); std::cout << "Running RX on core " << sched_getcpu() << std::endl; @@ -885,8 +863,7 @@ int main(int argc, char** argv) { uint64_t nb_pkts = receive_pkts(rx_args, rx_stats); if (unlikely(nb_pkts == 0)) { nb_iters_no_pkt++; - } - else { + } else { nb_iters_no_pkt = 0; } } @@ -899,7 +876,6 @@ int main(int argc, char** argv) { if (parsed_args.enable_rtt) { dev->DisableTimeStamping(); } - }); std::thread tx_thread = std::thread( @@ -924,7 +900,6 @@ int main(int argc, char** argv) { while (!rx_done) continue; reclaim_all_buffers(tx_args); - }); cpu_set_t cpuset; @@ -948,13 +923,12 @@ int main(int argc, char** argv) { threads.push_back(std::move(rx_thread)); threads.push_back(std::move(tx_thread)); - } - else { + } else { // Send and receive packets within the same thread. - std::thread rx_tx_thread = std::thread( - [total_aligned_bytes_to_send, total_raw_bytes_to_send, - pkts_in_last_pipe, &parsed_args, &tx_stats, &rx_stats, - &dev, &tx_pipes] { + std::thread rx_tx_thread = + std::thread([total_aligned_bytes_to_send, total_raw_bytes_to_send, + pkts_in_last_pipe, &parsed_args, &tx_stats, &rx_stats, + &dev, &tx_pipes] { std::this_thread::sleep_for(std::chrono::milliseconds(500)); std::vector rx_pipes; @@ -974,16 +948,14 @@ int main(int argc, char** argv) { if (parsed_args.enable_rtt) { dev->EnableTimeStamping(); - } - else { + } else { dev->DisableTimeStamping(); } std::cout << "Running RX and TX on core " << sched_getcpu() << std::endl; - RxArgs rx_args(parsed_args.enable_rtt, - parsed_args.enable_rtt_history, + RxArgs rx_args(parsed_args.enable_rtt, parsed_args.enable_rtt_history, dev); TxArgs tx_args(tx_pipes, total_aligned_bytes_to_send, @@ -1006,8 +978,7 @@ int main(int argc, char** argv) { uint64_t nb_pkts = receive_pkts(rx_args, rx_stats); if (unlikely(nb_pkts == 0)) { nb_iters_no_pkt++; - } - else { + } else { nb_iters_no_pkt = 0; } } @@ -1022,7 +993,6 @@ int main(int argc, char** argv) { if (parsed_args.enable_rtt) { dev->DisableTimeStamping(); } - }); cpu_set_t cpuset; @@ -1043,8 +1013,8 @@ int main(int argc, char** argv) { std::ofstream save_file; save_file.open(parsed_args.save_file); save_file - << "rx_rawput_mbps,rx_tput_mbps,rx_pkt_rate_kpps,rx_bytes,rx_packets," - "tx_rawput_mbps,tx_tput_mbps,tx_pkt_rate_kpps,tx_bytes,tx_packets"; + << "rx_goodput_mbps,rx_tput_mbps,rx_pkt_rate_kpps,rx_bytes,rx_packets," + "tx_goodput_mbps,tx_tput_mbps,tx_pkt_rate_kpps,tx_bytes,tx_packets"; if (parsed_args.enable_rtt) { save_file << ",mean_rtt_ns"; } @@ -1074,22 +1044,22 @@ int main(int argc, char** argv) { uint64_t tx_bytes = tx_stats.bytes; uint64_t tx_pkts = tx_stats.pkts; - double interval_s = (double) parsed_args.stats_delay / ONE_THOUSAND; + double interval_s = (double)parsed_args.stats_delay / ONE_THOUSAND; uint64_t rx_pkt_diff = rx_pkts - last_rx_pkts; - uint64_t rx_rawput_mbps = + uint64_t rx_goodput_mbps = (rx_bytes - last_rx_bytes) * 8. / (ONE_MILLION * interval_s); uint64_t rx_pkt_rate = (rx_pkt_diff / interval_s); uint64_t rx_pkt_rate_kpps = rx_pkt_rate / ONE_THOUSAND; - uint64_t rx_tput_mbps = rx_rawput_mbps + FPGA_PACKET_OVERHEAD - * 8 * rx_pkt_rate / ONE_MILLION; + uint64_t rx_tput_mbps = + rx_goodput_mbps + FPGA_PACKET_OVERHEAD * 8 * rx_pkt_rate / ONE_MILLION; uint64_t tx_pkt_diff = tx_pkts - last_tx_pkts; - uint64_t tx_rawput_mbps = + uint64_t tx_goodput_mbps = (tx_bytes - last_tx_bytes) * 8. / (ONE_MILLION * interval_s); uint64_t tx_tput_mbps = - (tx_bytes - last_tx_bytes + tx_pkt_diff * FPGA_PACKET_OVERHEAD) * 8. - / (ONE_MILLION * interval_s); + (tx_bytes - last_tx_bytes + tx_pkt_diff * FPGA_PACKET_OVERHEAD) * 8. / + (ONE_MILLION * interval_s); uint64_t tx_pkt_rate = (tx_pkt_diff / interval_s); uint64_t tx_pkt_rate_kpps = tx_pkt_rate / ONE_THOUSAND; @@ -1097,8 +1067,7 @@ int main(int argc, char** argv) { uint64_t rtt_ns; if (rx_pkt_diff != 0) { rtt_ns = (rtt_sum_ns - last_aggregated_rtt_ns) / rx_pkt_diff; - } - else { + } else { rtt_ns = 0; } @@ -1121,9 +1090,9 @@ int main(int argc, char** argv) { if (parsed_args.save) { std::ofstream save_file; save_file.open(parsed_args.save_file, std::ios_base::app); - save_file << rx_rawput_mbps << "," << rx_tput_mbps << "," + save_file << rx_goodput_mbps << "," << rx_tput_mbps << "," << rx_pkt_rate_kpps << "," << rx_bytes << "," << rx_pkts << "," - << tx_rawput_mbps << "," << tx_pkt_rate_kpps << "," + << tx_goodput_mbps << "," << tx_pkt_rate_kpps << "," << tx_tput_mbps << "," << tx_bytes << "," << tx_pkts; if (parsed_args.enable_rtt) { save_file << "," << rtt_ns; diff --git a/software/examples/l2_forward.cpp b/software/examples/l2_forward.cpp index 8b12fe24..924ffa61 100644 --- a/software/examples/l2_forward.cpp +++ b/software/examples/l2_forward.cpp @@ -157,6 +157,7 @@ int main(int argc, const char* argv[]) { while (!setup_done) continue; // Wait for setup to be done. + std::cout << "The bandwidth statistics are approximated." << std::endl; show_stats(thread_stats, &keep_running); for (auto& thread : threads) { diff --git a/software/include/enso/meson.build b/software/include/enso/meson.build index 0371d3fd..b1beb264 100644 --- a/software/include/enso/meson.build +++ b/software/include/enso/meson.build @@ -5,8 +5,7 @@ public_enso_headers = files( 'ixy_helpers.h', 'internals.h', 'queue.h', - 'pipe.h', - 'socket.h' + 'pipe.h' ) install_headers(public_enso_headers, subdir: 'enso') diff --git a/software/include/enso/pipe.h b/software/include/enso/pipe.h index ca194fce..c90fa346 100644 --- a/software/include/enso/pipe.h +++ b/software/include/enso/pipe.h @@ -286,20 +286,25 @@ class Device { int DisableRoundRobin(); /** - * @brief Vanilla function that sends a given number of bytes from a physical - * address. Used only by Ensogen. + * @brief Sends a batch of packets to the NIC by creating and appending a Tx + * Notification. Use this function if you need to only send a batch + * and not process completions as done by `SendAndFree()`. * + * @param phys_addr Physical address of the buffer that contains the packets. + * @param nb_bytes The number of bytes that need to be sent starting from the + * physical address. */ - void SendOnly(uint64_t phys_addr, uint32_t nb_bytes); + void SendBatch(uint64_t phys_addr, uint32_t nb_bytes); /** - * @brief Vanilla function that checks for the number of Tx notifications - * consumed by the NIC. Used only by Ensogen. + * @brief Checks and returns the numbers of Tx Notifications consumed by the + * NIC. Use this function if you only need to check the number of + * notification consumed and not process them as done by + * `ProcessCompletions()`. * * @return number of Tx notifications successfully processed by the NIC. - * */ - uint32_t ProcessCompletionsOnly(); + uint32_t ConsumeBatches(); /** * @brief Gets the round robin status for the device. @@ -860,13 +865,12 @@ class TxPipe { } /* - * @brief: Used to get the physical address of the pipe's buffer. - * Used only by EnsoGen as of now. + * @brief Used to get the physical address of the pipe's buffer starting + * at offset of the current application data. * + * @return Physical address of the buffer. * */ - inline uint64_t GetBufPhysAddr() { - return buf_phys_addr_ + app_begin_; - } + inline uint64_t GetBufPhysAddr() { return buf_phys_addr_ + app_begin_; } /** * @brief Explicitly requests a best-effort buffer extension. diff --git a/software/include/enso/socket.h b/software/include/enso/socket.h deleted file mode 100644 index 1655c38c..00000000 --- a/software/include/enso/socket.h +++ /dev/null @@ -1,143 +0,0 @@ -/* - * Copyright (c) 2022, Carnegie Mellon University - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted (subject to the limitations in the disclaimer - * below) provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * * Neither the name of the copyright holder nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY - * THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND - * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT - * NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; - * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR - * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF - * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/** - * @file - * @brief Socket-like API. - * @deprecated Use the API defined in `pipe.h` instead. - * - * @author Hugo Sadok - */ - -#ifndef SOFTWARE_INCLUDE_ENSO_SOCKET_H_ -#define SOFTWARE_INCLUDE_ENSO_SOCKET_H_ - -#include -#include - -namespace enso { - -typedef unsigned short sa_family_t; -typedef unsigned int socklen_t; - -#define MAX_NB_CORES 128 -#define MAX_NB_SOCKETS MAX_NB_FLOWS - -void set_bdf(uint16_t bdf_); - -int socket(int domain, int type, int protocol, bool fallback) noexcept; - -int bind(int sockfd, const struct sockaddr *addr, socklen_t addrlen) noexcept; - -uint64_t get_socket_phys_addr(int sockfd); - -void *get_socket_virt_addr(int sockfd); - -uint64_t convert_buf_addr_to_phys(int sockfd, void *addr); - -int shutdown(int sockfd, int how) noexcept; - -/* - * Receives packets using a POSIX-like interface. Here *buf is the address to a - * buffer allocated by the user. The function will copy the received data to - * this buffer. - */ -ssize_t recv(int sockfd, void *buf, size_t len, int flags); - -ssize_t recv_zc(int sockfd, void **buf, size_t len, int flags); - -ssize_t recv_select(int ref_sockfd, int *sockfd, void **buf, size_t len, - int flags); - -/* - * Send the bytes pointed by address `phys_addr` through the `sockfd` socket. - * There are two important differences to a traditional POSIX `send`: - * - Memory must be pinned (phys_addr needs to be a physical address); - * - It is not safe to change the buffer content until the transmission is done. - * - * This function blocks until it can send but returns before the transmission is - * over. To figure out when the transmission is over, use the `get_completions` - * function. - */ -ssize_t send(int sockfd, uint64_t phys_addr, size_t len, int flags); - -/* - * Return the number of transmission requests that were completed since the last - * call to this function. Since transmissions are always completed in order, one - * can figure out which transmissions were completed by keeping track of all the - * calls to `send`. There can be only up to `kMaxPendingTxRequests` requests - * completed between two calls to `send`. However, if `send` is called multiple - * times, without calling `get_completions` the number of completed requests can - * surpass `kMaxPendingTxRequests`. - */ -uint32_t get_completions(int ref_sockfd); - -/* - * Enable hardware timestamping for the device. This applies to all sockets. - */ -int enable_device_timestamp(int ref_sockfd); - -/* - * Disable hardware timestamping for the device. This applies to all sockets. - */ -int disable_device_timestamp(int ref_sockfd); - -/* - * Enable hardware rate limit for the device. This applies to all sockets. - */ -int enable_device_rate_limit(int ref_sockfd, uint16_t num, uint16_t den); - -/* - * Disable hardware rate limit for the device. This applies to all sockets. - */ -int disable_device_rate_limit(int ref_sockfd); - -/* - * Enable round robin for the device. This applies to all sockets. - */ -int enable_device_round_robin(int ref_sockfd); - -/* - * Disable round robin for the device. This applies to all sockets. - */ -int disable_device_round_robin(int ref_sockfd); - -/* - * Free packet buffer. Use this to free received packets. - */ -void free_enso_pipe(int sockfd, size_t len); - -void print_sock_stats(int sockfd); - -} // namespace enso - -#endif // SOFTWARE_INCLUDE_ENSO_SOCKET_H_ diff --git a/software/src/enso/helpers.cpp b/software/src/enso/helpers.cpp index 9a90492e..ea57d954 100644 --- a/software/src/enso/helpers.cpp +++ b/software/src/enso/helpers.cpp @@ -43,18 +43,6 @@ #include #include #include -#include - -/****************************************************************************** - * Macros - *****************************************************************************/ -// Scientific notation for 10^6, treated as double. Used for stats calculations. -#define ONE_MILLION 1e6 -// FPGA packet overhead for 64 byte packets -#define FPGA_PACKET_OVERHEAD_64 20 -// FPGA packet overhead for 1536 byte packets -#define FPGA_PACKET_OVERHEAD_1536 2 - namespace enso { uint16_t get_bdf_from_pcie_addr(const std::string& pcie_addr) { @@ -155,20 +143,8 @@ int set_core_id(std::thread& thread, int core_id) { static void print_stats_line(uint64_t recv_bytes, uint64_t nb_batches, uint64_t nb_pkts, uint64_t delta_bytes, uint64_t delta_pkts, uint64_t delta_batches) { - uint64_t rx_tput_mbps = (delta_bytes * 8.) / ONE_MILLION; - if(rx_tput_mbps > 0) { - uint32_t packet_size = round((long double) delta_bytes / delta_pkts); - if(packet_size == 64) { - rx_tput_mbps = rx_tput_mbps + - (FPGA_PACKET_OVERHEAD_64 * delta_pkts * 8) / ONE_MILLION; - } - else if(packet_size == 1536) { - rx_tput_mbps = rx_tput_mbps + - (FPGA_PACKET_OVERHEAD_1536 * delta_pkts * 8) / ONE_MILLION; - } - } - std::cout << std::dec << rx_tput_mbps - << " Mbps " << delta_pkts / ONE_MILLION << " Mpps " << recv_bytes + std::cout << std::dec << (delta_bytes + delta_pkts * 20) * 8. / 1e6 + << " Mbps " << delta_pkts / 1e6 << " Mpps " << recv_bytes << " B " << nb_batches << " batches " << nb_pkts << " pkts"; if (delta_batches > 0) { diff --git a/software/src/enso/pipe.cpp b/software/src/enso/pipe.cpp index bb083703..a114d0ad 100644 --- a/software/src/enso/pipe.cpp +++ b/software/src/enso/pipe.cpp @@ -397,11 +397,11 @@ void Device::ProcessCompletions() { } } -void Device::SendOnly(uint64_t phys_addr, uint32_t nb_bytes) { +void Device::SendBatch(uint64_t phys_addr, uint32_t nb_bytes) { send_to_queue(¬ification_buf_pair_, phys_addr, nb_bytes); } -uint32_t Device::ProcessCompletionsOnly() { +uint32_t Device::ConsumeBatches() { return get_unreported_completions(¬ification_buf_pair_); }