From bbaa3abf6d083580ae0513edc36f47e0ccd671b3 Mon Sep 17 00:00:00 2001 From: ziyang Date: Fri, 17 Aug 2018 16:24:00 +0800 Subject: [PATCH] Fix rdmap message to be splited into two segments on receiver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 【BUG DESCRIPTION】 Testcase `ib_write_lat -d urdma_0 -i 1 -x 0 -R -n 5 -s 976` will hang if run with 82599 NIC. On the receiver, `ddp_place_tagged_data` can only handle one mbuf segment, but the rdmap message will be splited into two segments by the 82599 port. Dump tx mbuf on sender: ```txt USER1: [TX] dump mbuf at 0x7002e7ecd800, iova=1d9d8cd8c0, buf_len=1654 pkt_len=1042, ol_flags=f0000000000000, nb_segs=2, in_port=65535 segment at 0x7002e7ecd800, data=0x7002e7ecd916, data_len=52 Dump data at [0x7002e7ecd916], len=52 00000000: 70 10 6F AE CD B5 70 10 6F AE CD 9D 08 00 45 00 | p.o...p.o.....E. 00000010: 04 04 00 00 00 00 40 11 00 00 C0 A7 01 03 C0 A7 | ......@......... 00000020: 01 04 F8 44 AF DD 03 F0 87 57 00 00 00 01 00 00 | ...D.....W...... 00000030: 00 00 00 00 | | | | | | | | | | | | | .... segment at 0x7002e7ecdfc0, data=0x7002e7ecd172, data_len=990 Dump data at [0x7002e7ecd172], len=990 00000000: C1 40 00 65 49 DD 00 00 00 00 00 69 44 00 00 00 | .@.eI......iD... 00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 | ................ 00000020: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 | ................ . . . 000003B0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 | ................ 000003C0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 | ................ 000003D0: 00 00 00 00 00 00 00 00 00 00 00 00 00 01 | | | .............. ``` Dump rx mbuf on receiver: ```txt USER1: [RX] dump mbuf at 0x7002e9004500, iova=1f2d204580, buf_len=1654 pkt_len=1042, ol_flags=180, nb_segs=2, in_port=0 segment at 0x7002e9004500, data=0x7002e9004600, data_len=1024 Dump data at [0x7002e9004600], len=1024 00000000: 70 10 6F AE CD B5 70 10 6F AE CD 9D 08 00 45 00 | p.o...p.o.....E. 00000010: 04 04 00 00 00 00 40 11 F3 93 C0 A7 01 03 C0 A7 | ......@......... 00000020: 01 04 F8 44 AF DD 03 F0 7C A7 00 00 00 01 00 00 | ...D....|....... 00000030: 00 00 00 00 C1 40 00 65 49 DD 00 00 00 00 00 69 | .....@.eI......i 00000040: 44 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 | D............... 00000050: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 | ................ . . . 000003D0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 | ................ 000003E0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 | ................ 000003F0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 | ................ segment at 0x7002e9004c40, data=0x7002e9004d40, data_len=18 Dump data at [0x7002e9004d40], len=18 00000000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 | ................ 00000010: 00 01 | | | | | | | | | | | | | | | .. ``` 【REASON】 The `mbuf_size` will less than 2K if the MTU is 1500, than the ixgbe PMD will set the rx buffer to 1KB. Refer to the code below: ```c ixgbe_dev_rx_init(struct rte_eth_dev *dev) { ... /* * Configure the RX buffer size in the BSIZEPACKET field of * the SRRCTL register of the queue. * The value is in 1 KB resolution. Valid values can be from * 1 KB to 16 KB. */ buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) - RTE_PKTMBUF_HEADROOM); srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) & IXGBE_SRRCTL_BSIZEPKT_MASK); IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl); ... } ``` 【FIX】 Refer to the comment of `RTE_MBUF_DEFAULT_BUF_SIZE`, it's the recommended minimal buffer length. ```c /** * Some NICs need at least 2KB buffer to RX standard Ethernet frame without * splitting it into multiple segments. * So, for mbufs that planned to be involved into RX/TX, the recommended * minimal buffer length is 2KB + RTE_PKTMBUF_HEADROOM. */ (RTE_MBUF_DEFAULT_DATAROOM + RTE_PKTMBUF_HEADROOM) ``` Signed-off-by: ziyang --- src/urdmad/main.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/urdmad/main.c b/src/urdmad/main.c index bf338f7..5d61fcd 100644 --- a/src/urdmad/main.c +++ b/src/urdmad/main.c @@ -999,6 +999,11 @@ usiw_port_init(struct usiw_port *iface, struct usiw_port_config *port_config) mbuf_size = RTE_PKTMBUF_HEADROOM + port_config->mtu + ETHER_HDR_LEN + ETHER_CRC_LEN + urdma_vlan_space; + /* Some NICs (e.g. ixgbe) need at least 2KB buffer to RX standard + * Ethernet frame without splitting it into multiple segments. */ + mbuf_size = mbuf_size > RTE_MBUF_DEFAULT_BUF_SIZE ? + mbuf_size : RTE_MBUF_DEFAULT_BUF_SIZE; + snprintf(name, RTE_MEMPOOL_NAMESIZE, "port_%u_rx_mempool", iface->portid); RTE_LOG(DEBUG, USER1, "create rx mempool for port %" PRIu16 " with %u mbufs of size %zu\n",