NVIDIA DOCA SDK Data Center on a Chip Framework Documentation
psp_gw_pkt_rss.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2024-2025 NVIDIA CORPORATION AND AFFILIATES. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  */
25 
26 #include <rte_ethdev.h>
27 #include <rte_ether.h>
28 #include <rte_arp.h>
29 #include <rte_icmp.h>
30 #include <netinet/icmp6.h>
31 
32 #include <doca_log.h>
33 
34 #include <psp_gw_config.h>
35 #include <psp_gw_flows.h>
36 #include <psp_gw_pkt_rss.h>
37 #include <psp_gw_svc_impl.h>
38 
40 
41 #define MAX_RX_BURST_SIZE 256
42 
43 static uint16_t max_tx_retries = 10;
44 
52 static bool is_ns_request(struct rte_ether_hdr *eth_hdr, uint16_t ether_type)
53 {
54  struct rte_ipv6_hdr *ipv6_hdr;
55  struct rte_flow_item_icmp6_nd_ns *icmp6_ns_hdr;
56 
58  return false;
59  }
60  ipv6_hdr = (struct rte_ipv6_hdr *)((char *)eth_hdr + sizeof(rte_ether_hdr));
61  if (ipv6_hdr->proto != IPPROTO_ICMPV6) {
62  return false;
63  }
64  icmp6_ns_hdr = (struct rte_flow_item_icmp6_nd_ns *)(ipv6_hdr + 1);
65  uint16_t ns_op = icmp6_ns_hdr->type;
66  if (ns_op != ND_NEIGHBOR_SOLICIT) {
67  return false;
68  }
69  return true;
70 }
71 
84 static void handle_packet(struct lcore_params *params,
85  uint16_t port_id,
86  uint16_t queue_id,
87  rte_ether_addr *port_src_mac,
88  struct rte_mbuf *packet)
89 {
90  struct rte_ether_hdr *eth_hdr = rte_pktmbuf_mtod(packet, struct rte_ether_hdr *);
91  uint16_t ether_type = htons(eth_hdr->ether_type);
92  uint32_t pkt_meta = rte_flow_dynf_metadata_get(packet);
93  bool is_ingress_sampled = pkt_meta == params->config->ingress_sample_meta_indicator;
94  bool is_egress_sampled = pkt_meta == params->config->egress_sample_meta_indicator;
95  bool ns_packet = is_ns_request(eth_hdr, ether_type);
96  if (is_ingress_sampled || is_egress_sampled) {
97  if (params->config->show_sampled_packets) {
98  DOCA_LOG_INFO("SAMPLED PACKET: port %d, queue_id %d, pkt_meta 0x%x, %s",
99  port_id,
100  queue_id,
101  pkt_meta,
102  is_ingress_sampled ? "INGRESS" : "EGRESS");
103  rte_pktmbuf_dump(stdout, packet, packet->data_len);
104  }
105  // sampled packets are NOT sent to the rpc service
106  } else {
107  if (params->config->show_rss_rx_packets) {
108  DOCA_LOG_INFO("RSS: Received port %d, queue_id %d, pkt_meta 0x%x", port_id, queue_id, pkt_meta);
109  rte_pktmbuf_dump(stdout, packet, packet->data_len);
110  }
111 
114  port_id,
115  queue_id,
116  port_src_mac,
117  packet,
118  params->config->return_to_vf_indicator);
119  return;
120  }
121  if (ns_packet) {
123  port_id,
124  queue_id,
125  port_src_mac,
126  packet,
127  params->config->return_to_vf_indicator);
128  return;
129  }
130  params->psp_svc->handle_miss_packet(packet);
131  }
132 }
133 
134 int lcore_pkt_proc_func(void *lcore_args)
135 {
136  auto *params = (struct lcore_params *)lcore_args;
137 
138  uint32_t lcore_id = rte_lcore_id();
139 
140  // Note lcore_id==0 is reserved for main()
141  if (lcore_id == 0) {
142  rte_exit(EXIT_FAILURE, "Unexpectedly entered RSS handler from main thread\n");
143  }
144 
145  uint16_t queue_id = lcore_id - 1;
146 
147  struct rte_mbuf *rx_packets[MAX_RX_BURST_SIZE];
148 
149  double tsc_to_seconds = 1.0 / (double)rte_get_timer_hz();
150 
151  DOCA_LOG_INFO("L-Core %d polling queue %d (all ports)", lcore_id, queue_id);
152 
153  while (!*params->force_quit) {
154  uint16_t port_id = params->pf_dev->port_id;
155  uint64_t t_start = rte_rdtsc();
156 
157  uint16_t nb_rx_packets = rte_eth_rx_burst(port_id, queue_id, rx_packets, MAX_RX_BURST_SIZE);
158 
159  if (!nb_rx_packets)
160  continue;
161 
162  for (int i = 0; i < nb_rx_packets && !*params->force_quit; i++) {
163  handle_packet(params, port_id, queue_id, &params->pf_dev->src_mac, rx_packets[i]);
164  }
165 
166  rte_pktmbuf_free_bulk(rx_packets, nb_rx_packets);
167 
168  if (params->config->show_rss_durations) {
169  double sec = (double)(rte_rdtsc() - t_start) * tsc_to_seconds;
170  DOCA_LOG_INFO("L-Core %d port %d: processed %d packets in %f seconds",
171  lcore_id,
172  port_id,
173  nb_rx_packets,
174  sec);
175  }
176  }
177  DOCA_LOG_INFO("L-Core %d exiting", lcore_id);
178 
179  return 0;
180 }
181 
182 bool reinject_packet(struct rte_mbuf *packet, uint16_t port_id)
183 {
184  uint32_t lcore_id = rte_lcore_id();
185  if (lcore_id == 0) {
186  DOCA_LOG_ERR("Cannot reinject packet from core 0");
187  return false;
188  }
189  uint16_t queue_id = lcore_id - 1;
190 
191  uint16_t nsent = 0;
192  for (uint16_t i = 0; i < max_tx_retries && nsent < 1; i++) {
193  nsent = rte_eth_tx_burst(port_id, queue_id, &packet, 1);
194  }
195  DOCA_LOG_DBG("Reinjected packet on port %d", port_id);
196  return nsent == 1;
197 }
198 
199 uint16_t handle_arp(struct rte_mempool *mpool,
200  uint16_t port_id,
201  uint16_t queue_id,
202  rte_ether_addr *port_src_mac,
203  const struct rte_mbuf *request_pkt,
204  uint32_t arp_response_meta_flag)
205 {
206  const struct rte_ether_hdr *request_eth_hdr = rte_pktmbuf_mtod(request_pkt, struct rte_ether_hdr *);
207  const struct rte_arp_hdr *request_arp_hdr = (rte_arp_hdr *)&request_eth_hdr[1];
208 
209  uint16_t arp_op = RTE_BE16(request_arp_hdr->arp_opcode);
210  if (arp_op != RTE_ARP_OP_REQUEST) {
211  DOCA_LOG_ERR("RSS ARP Handler: expected op %d, got %d", RTE_ARP_OP_REQUEST, arp_op);
212  return 0;
213  }
214 
215  struct rte_mbuf *response_pkt = rte_pktmbuf_alloc(mpool);
216  if (!response_pkt) {
217  DOCA_LOG_ERR("Out of memory for ARP response packets; exiting");
218  return ENOMEM;
219  }
220 
221  *RTE_MBUF_DYNFIELD(response_pkt, rte_flow_dynf_metadata_offs, uint32_t *) = arp_response_meta_flag;
222  response_pkt->ol_flags |= rte_flow_dynf_metadata_mask;
223 
224  uint32_t pkt_size = sizeof(struct rte_ether_hdr) + sizeof(struct rte_arp_hdr);
225  response_pkt->data_len = pkt_size;
226  response_pkt->pkt_len = pkt_size;
227 
228  struct rte_ether_hdr *response_eth_hdr = rte_pktmbuf_mtod(response_pkt, struct rte_ether_hdr *);
229  struct rte_arp_hdr *response_arp_hdr = (rte_arp_hdr *)&response_eth_hdr[1];
230 
231  memcpy(&response_eth_hdr->src_addr, port_src_mac, RTE_ETHER_ADDR_LEN);
232  response_eth_hdr->dst_addr = request_eth_hdr->src_addr;
233  response_eth_hdr->ether_type = RTE_BE16(DOCA_FLOW_ETHER_TYPE_ARP);
234 
235  response_arp_hdr->arp_hardware = RTE_BE16(RTE_ARP_HRD_ETHER);
236  response_arp_hdr->arp_protocol = RTE_BE16(RTE_ETHER_TYPE_IPV4);
237  response_arp_hdr->arp_hlen = RTE_ETHER_ADDR_LEN;
238  response_arp_hdr->arp_plen = sizeof(uint32_t);
239  response_arp_hdr->arp_opcode = RTE_BE16(RTE_ARP_OP_REPLY);
240  memcpy(&response_arp_hdr->arp_data.arp_sha, port_src_mac, RTE_ETHER_ADDR_LEN);
241  response_arp_hdr->arp_data.arp_tha = request_arp_hdr->arp_data.arp_sha;
242  response_arp_hdr->arp_data.arp_sip = request_arp_hdr->arp_data.arp_tip;
243  response_arp_hdr->arp_data.arp_tip = request_arp_hdr->arp_data.arp_sip;
244 
245  uint16_t nb_tx_packets = 0;
246  while (nb_tx_packets < 1) {
247  // This ARP reply will go to the empty pipe.
248  nb_tx_packets = rte_eth_tx_burst(port_id, queue_id, &response_pkt, 1);
249  if (nb_tx_packets != 1) {
250  DOCA_LOG_WARN("ARP reinject: rte_eth_tx_burst returned %d", nb_tx_packets);
251  }
252  }
253 
254  char ip_addr_str[INET_ADDRSTRLEN];
255  inet_ntop(AF_INET, &request_arp_hdr->arp_data.arp_tip, ip_addr_str, INET_ADDRSTRLEN);
256  DOCA_LOG_DBG("Port %d replied to ARP request for IP %s", port_id, ip_addr_str);
257 
258  return 1;
259 }
260 
261 uint16_t handle_neighbor_solicitation(struct rte_mempool *mpool,
262  uint16_t port_id,
263  uint16_t queue_id,
264  rte_ether_addr *port_src_mac,
265  const struct rte_mbuf *request_pkt,
266  uint32_t na_response_meta_flag)
267 {
268  uint8_t option_header_size = RTE_ETHER_ADDR_LEN + 2;
269  const struct rte_ether_hdr *request_eth_hdr = rte_pktmbuf_mtod(request_pkt, struct rte_ether_hdr *); // extract
270  // the eth
271  // header
272  struct rte_ipv6_hdr *request_ipv6_hdr =
273  (struct rte_ipv6_hdr *)((char *)request_eth_hdr + sizeof(rte_ether_hdr));
274  struct rte_flow_item_icmp6_nd_ns *request_icmp6_ns_hdr =
275  (struct rte_flow_item_icmp6_nd_ns *)(request_ipv6_hdr + 1);
276 
277  struct rte_mbuf *response_pkt = rte_pktmbuf_alloc(mpool);
278  if (!response_pkt) {
279  DOCA_LOG_ERR("Out of memory for NS response packets; exiting");
280  return ENOMEM;
281  }
282 
283  *RTE_MBUF_DYNFIELD(response_pkt, rte_flow_dynf_metadata_offs, uint32_t *) = na_response_meta_flag;
284  response_pkt->ol_flags |= rte_flow_dynf_metadata_mask;
285 
286  uint32_t pkt_size = sizeof(struct rte_ether_hdr) + sizeof(struct rte_ipv6_hdr) +
287  sizeof(struct rte_flow_item_icmp6_nd_na) + option_header_size;
288  response_pkt->data_len = pkt_size;
289  response_pkt->pkt_len = pkt_size;
290 
291  struct rte_ether_hdr *response_eth_hdr = rte_pktmbuf_mtod(response_pkt, struct rte_ether_hdr *);
292  struct rte_ipv6_hdr *response_ipv6_hdr = (struct rte_ipv6_hdr *)(response_eth_hdr + 1);
293  struct rte_flow_item_icmp6_nd_na *response_na_hdr = (struct rte_flow_item_icmp6_nd_na *)(response_ipv6_hdr + 1);
294 
295  memcpy(&response_eth_hdr->src_addr, port_src_mac, RTE_ETHER_ADDR_LEN);
296  response_eth_hdr->dst_addr = request_eth_hdr->src_addr;
297  response_eth_hdr->ether_type = RTE_BE16(DOCA_FLOW_ETHER_TYPE_IPV6);
298 
299  response_ipv6_hdr->vtc_flow = htonl((6 << 28));
300  response_ipv6_hdr->payload_len = RTE_BE16(sizeof(struct rte_flow_item_icmp6_nd_ns) + option_header_size);
301  response_ipv6_hdr->proto = IPPROTO_ICMPV6;
302  response_ipv6_hdr->hop_limits = 255;
303 
304  memcpy(response_ipv6_hdr->src_addr, request_icmp6_ns_hdr->target_addr, IPV6_ADDR_LEN); // icmpv6 contains full
305  // dst ipv6 addr
306  memcpy(response_ipv6_hdr->dst_addr, request_ipv6_hdr->src_addr, IPV6_ADDR_LEN);
307 
308  response_na_hdr->type = ND_NEIGHBOR_ADVERT;
309  response_na_hdr->code = 0;
310  response_na_hdr->checksum = 0;
311  memcpy(response_na_hdr->target_addr, request_icmp6_ns_hdr->target_addr, IPV6_ADDR_LEN);
312 
313  uint8_t *options = (uint8_t *)(response_na_hdr + 1);
314  options[0] = 2;
315  options[1] = 1;
316  memcpy(&options[2], port_src_mac, RTE_ETHER_ADDR_LEN);
317  response_na_hdr->checksum = rte_ipv6_udptcp_cksum(response_ipv6_hdr, response_na_hdr);
318 
319  uint16_t nb_tx_packets = 0;
320  while (nb_tx_packets < 1) {
321  // This NS reply will go to the empty pipe.
322  nb_tx_packets = rte_eth_tx_burst(port_id, queue_id, &response_pkt, 1);
323  if (nb_tx_packets != 1) {
324  DOCA_LOG_WARN("Neighbor Solicitation reinject: rte_eth_tx_burst returned %d", nb_tx_packets);
325  }
326  }
327 
328  char ip_addr_str[INET6_ADDRSTRLEN];
329  inet_ntop(AF_INET6, &request_icmp6_ns_hdr->target_addr, ip_addr_str, INET6_ADDRSTRLEN);
330  DOCA_LOG_DBG("Port %d replied to Neighbor Solicitation request for IP %s", port_id, ip_addr_str);
331 
332  return 1;
333 }
doca_error_t handle_miss_packet(struct rte_mbuf *packet)
Handles any "miss" packets received by RSS which indicate a new tunnel connection is needed.
#define DOCA_FLOW_ETHER_TYPE_ARP
Definition: doca_flow_net.h:60
#define DOCA_FLOW_ETHER_TYPE_IPV6
Definition: doca_flow_net.h:58
#define DOCA_LOG_ERR(format,...)
Generates an ERROR application log message.
Definition: doca_log.h:466
#define DOCA_LOG_WARN(format,...)
Generates a WARNING application log message.
Definition: doca_log.h:476
#define DOCA_LOG_INFO(format,...)
Generates an INFO application log message.
Definition: doca_log.h:486
#define DOCA_LOG_DBG(format,...)
Generates a DEBUG application log message.
Definition: doca_log.h:496
uint16_t queue_id
Definition: ip_frag_dp.c:1
uint16_t ether_type
Definition: packets.h:2
static constexpr uint32_t IPV6_ADDR_LEN
Definition: psp_gw_config.h:77
int lcore_pkt_proc_func(void *lcore_args)
The entry point for each L-Core's main processing loop. Each L-Core polls a different Rx queue on the...
static uint16_t max_tx_retries
uint16_t handle_neighbor_solicitation(struct rte_mempool *mpool, uint16_t port_id, uint16_t queue_id, rte_ether_addr *port_src_mac, const struct rte_mbuf *request_pkt, uint32_t na_response_meta_flag)
Used to reply to a Neighbor Solicitation packet.
static void handle_packet(struct lcore_params *params, uint16_t port_id, uint16_t queue_id, rte_ether_addr *port_src_mac, struct rte_mbuf *packet)
High-level Rx Queue packet handler routine Optionally logs the packet to the console....
uint16_t handle_arp(struct rte_mempool *mpool, uint16_t port_id, uint16_t queue_id, rte_ether_addr *port_src_mac, const struct rte_mbuf *request_pkt, uint32_t arp_response_meta_flag)
Used to reply to an ARP request.
static bool is_ns_request(struct rte_ether_hdr *eth_hdr, uint16_t ether_type)
determine whether a given packet or request corresponds to a "Neighbor Solicitation"
#define MAX_RX_BURST_SIZE
DOCA_LOG_REGISTER(PSP_RSS)
bool reinject_packet(struct rte_mbuf *packet, uint16_t port_id)
Used by the psp_svc to re-inject a packet via the Host PF Tx queue after a new tunnel has been establ...
struct rte_mempool * mbuf_pool
Definition: dpdk_utils.h:72
The parameters needed by each L-Core's main loop.
PSP_GatewayImpl * psp_svc
psp_gw_app_config * config
uint32_t egress_sample_meta_indicator
uint32_t ingress_sample_meta_indicator
struct application_dpdk_config dpdk_config
uint32_t return_to_vf_indicator