NVIDIA DOCA SDK Data Center on a Chip Framework Documentation
np_switch_telemetry_dev_main.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2024 NVIDIA CORPORATION AND AFFILIATES. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  */
25 
26 #include <doca_pcc_np_dev.h>
27 #include "pcc_common_dev.h"
28 
30 #define TELEMETRY_DB_LOG_MAX_DEVICES (10)
32 #define TELEMETRY_DB_LOG_MAX_PORTS_PER_DEVICE (5)
34 #define TELEMETRY_DB_LOG_SIZE (TELEMETRY_DB_LOG_MAX_DEVICES + TELEMETRY_DB_LOG_MAX_PORTS_PER_DEVICE)
36 #define TELEMETRY_DB_MASK ((1UL << TELEMETRY_DB_LOG_SIZE) - 1UL)
38 #define TELEMETRY_DB_SIZE (1UL << TELEMETRY_DB_LOG_SIZE)
40 #define DB_ENTRY_AGING_TIME (5000000)
43 #define INVALID_MD_TIME_INTERVAL (2000000000)
44 
46 uint8_t max_hop_limit;
48 uint32_t mailbox_done = 0;
49 
55  uint32_t tx_bytes0; /* last received TX bytes */
56  uint32_t tx_ts_nano0; /* last received TX timestamp in nanoseconds */
57  uint32_t tx_bytes1; /* last to last received TX bytes */
58  uint32_t tx_ts_nano1; /* last to last received TX timestamp in nanoseconds */
59  uint32_t valid; /* valid flag */
60  uint32_t last_access_ts; /* timestamp of last access to entry in microseconds */
61  uint32_t rsvd0; /* reserved */
62  uint32_t rsvd1; /* reserved */
63 } __attribute__((aligned(32)));
64 
67 
69 struct ifa2_md_hdr {
70  uint8_t request_vec; /* Request Vector (8 bits) */
71  uint8_t action_vec; /* Action Vector (8 bits) */
72  uint8_t hop_limit; /* Hop Limit (8 bits) */
73  uint8_t cur_length; /* Current Length (8 bits) */
74 };
75 
77 struct switch_md {
78  uint32_t dev_id_high : 24; /* device ID - high bits */
79  uint32_t pt : 2; /* port type */
80  uint32_t dev_id_low : 6; /* device ID - low bits */
81  /* ----------------------------------- */
82  uint8_t congestion : 5; /* congestion */
83  uint8_t tid : 3; /* TID */
84  uint8_t tx_bytes_hi; /* TX bytes - high bits */
85  uint8_t ttl; /* TTL */
86  uint8_t queue_id; /* queue ID */
87  /* ----------------------------------- */
88  uint32_t rx_ts_sec_hi; /* RX timestamp (sec) - high bits */
89  /* ----------------------------------- */
90  uint16_t rx_ts_sec_lo; /* RX timestamp (sec) - low bits */
91  uint16_t rx_ts_nano_hi; /* RX timestamp (nano) - high bits */
92  /* ----------------------------------- */
93  uint16_t rx_ts_nano_lo; /* RX timestamp (nano) - low bits */
94  uint16_t tx_ts_nano_hi; /* TX timestamp (nano) - high bits */
95  /* ----------------------------------- */
96  uint16_t tx_ts_nano_lo; /* TX timestamp (nano) - low bits */
97  uint16_t eg_queue_cell_cnt; /* queue cell count */
98  /* ----------------------------------- */
99  uint16_t src_port; /* source port */
100  uint16_t dst_port; /* destination port */
101  /* ----------------------------------- */
102  uint32_t tx_bytes_lo; /* TX bytes - low bits */
103 };
104 
106 struct int_response {
107  uint16_t qlen; /* queue length */
108  uint8_t index : 3; /* index (3 bits) */
109  uint8_t reserved_1 : 5; /* reserved (5 bits) */
110  uint8_t pt : 2; /* port type (2 bits) */
111  uint8_t valid : 1; /* valid flag */
112  uint8_t reserved_2 : 5; /* reserved (5 bits) */
113  uint32_t tx_bytes; /* TX bytes */
114  uint32_t tx_ts; /* TX timestamp */
115 };
116 
117 /*
118  * Get number of metadatas added by switches metadata header
119  *
120  * @md_hdr [in]: switch metadata header
121  * @return: number of metadatas
122  */
123 static inline uint8_t get_md_num_from_md_hdr(struct ifa2_md_hdr *md_hdr)
124 {
125  uint8_t num_mds = max_hop_limit - md_hdr->hop_limit;
126  return num_mds;
127 }
128 
129 /*
130  * Hash function to index telemetry database
131  *
132  * @device_id [in]: switch device ID
133  * @port [in]: port number
134  * @return: index in telemetry DB
135  */
136 static inline uint16_t telemetry_db_hash(uint32_t device_id, uint16_t port)
137 {
138  uint16_t idx = (((uint16_t)device_id) & 0x3FFu) | ((((uint16_t)port) & 0x1Fu) << 10);
139  return (idx & TELEMETRY_DB_MASK);
140 }
141 
142 /*
143  * Get TX timestamp (nanoseconds) from switch metadata
144  *
145  * @md [in]: switch metadata
146  * @return: TX timestamp
147  */
148 static inline uint32_t telemetry_get_tx_ts_nano(struct switch_md *md)
149 {
150  return ((uint32_t)(__builtin_bswap16(md->tx_ts_nano_hi))) << 16 |
151  ((uint32_t)(__builtin_bswap16(md->tx_ts_nano_lo)));
152 }
153 
154 /*
155  * Get TX bytes from switch metadata
156  *
157  * @md [in]: switch metadata
158  * @return: TX bytes
159  */
160 static inline uint32_t telemetry_get_tx_bytes(struct switch_md *md)
161 {
162  /* tx_bytes_hi aren't in use */
163  return __builtin_bswap32(((uint32_t)md->tx_bytes_lo));
164 }
165 
166 /*
167  * Get queue length from switch metadata
168  *
169  * @md [in]: switch metadata
170  * @return: switch queue length
171  */
172 static inline uint32_t telemetry_get_q_len(struct switch_md *md)
173 {
174  /* check switch format type */
175  if (((__builtin_bswap32((md->rx_ts_sec_hi))) >> 16) == 0x8000u) {
176  /* Mellanox format */
177  return ((__builtin_bswap32(md->rx_ts_sec_hi)) & 0xFFFFu) << 16 |
178  (uint32_t)(__builtin_bswap16(md->eg_queue_cell_cnt));
179  } else {
180  /* Broadcom format */
181  return ((uint32_t)(__builtin_bswap16(md->eg_queue_cell_cnt))) << 8;
182  }
183 }
184 
185 /*
186  * process switch metadata and prepare response packet
187  *
188  * @md [in]: switch metadata
189  * @response_pkt [in]: response packet
190  * @return: 0 on success, negative value otherwise
191  */
192 static inline int telemetry_process_md(struct switch_md *md, struct int_response *response_pkt)
193 {
196 
197  uint16_t db_idx;
198  struct telemetry_db_entry_t *db_entry;
199  uint8_t first_packet = 0;
200  struct bytes_ts_t new_bytes_ts, old_bytes_ts;
201  uint32_t last_access_ts, curr_ts;
202 
203  db_idx = telemetry_db_hash(__builtin_bswap32(md->dev_id_high) << 6 | md->dev_id_low,
204  __builtin_bswap16(md->dst_port));
205  new_bytes_ts.ts = telemetry_get_tx_ts_nano(md);
206  new_bytes_ts.bytes = telemetry_get_tx_bytes(md);
207  db_entry = &telemetry_db[db_idx];
208  /* check for first use of entry */
209  if (db_entry->valid == 0) {
210  db_entry->tx_bytes0 = 0;
211  db_entry->tx_ts_nano0 = 0;
212  db_entry->tx_bytes1 = 0;
213  db_entry->tx_ts_nano1 = 0;
215  db_entry->valid = 1;
216  first_packet = 1;
217  } else { /* consider case of aging for database entry */
218  last_access_ts = db_entry->last_access_ts;
219  curr_ts = doca_pcc_dev_get_timer_lo();
220  /* consider packet as first if arrival of new packet passed pre-defined time period */
222  first_packet = 1;
223  db_entry->last_access_ts = curr_ts;
224  }
225 
226  /* get last metadata from entry */
227  old_bytes_ts.bytes = db_entry->tx_bytes0;
228  old_bytes_ts.ts = db_entry->tx_ts_nano0;
229 
230  /* consider case of invalid metadata timestamp */
231  if (!first_packet && diff_with_wrap32(new_bytes_ts.ts, old_bytes_ts.ts) > INVALID_MD_TIME_INTERVAL) {
232  response_pkt->valid = 0;
233  return -1;
234  }
235 
236  /* check to handle duplicate metadata by switch */
237  if (new_bytes_ts.bytes != old_bytes_ts.bytes) {
238  /* store last metadata */
239  db_entry->tx_bytes1 = db_entry->tx_bytes0;
240  db_entry->tx_ts_nano1 = db_entry->tx_ts_nano0;
241  }
242 
243  /* store new metadata in database entry */
244  db_entry->tx_bytes0 = new_bytes_ts.bytes;
245  db_entry->tx_ts_nano0 = new_bytes_ts.ts;
246  old_bytes_ts.bytes = db_entry->tx_bytes1;
247  old_bytes_ts.ts = db_entry->tx_ts_nano1;
248 
249  response_pkt->tx_bytes = diff_with_wrap32(new_bytes_ts.bytes, old_bytes_ts.bytes);
250  response_pkt->tx_ts = diff_with_wrap32(new_bytes_ts.ts, old_bytes_ts.ts);
251  response_pkt->tx_bytes = __builtin_bswap32(response_pkt->tx_bytes);
252  response_pkt->tx_ts = __builtin_bswap32(response_pkt->tx_ts);
253  response_pkt->qlen = __builtin_bswap16(telemetry_get_q_len(md) >> 8);
254  response_pkt->pt = md->pt & 3;
255  response_pkt->valid = !first_packet;
256  return 0;
257 }
258 
259 /*
260  * User callback - packet handler
261  */
262 doca_pcc_dev_error_t doca_pcc_dev_np_user_packet_handler(struct doca_pcc_np_dev_request_packet *in,
264 {
265  struct switch_md *md;
266  struct int_response *response_pkt = (struct int_response *)(out->data);
268  uint32_t num_mds = get_md_num_from_md_hdr((struct ifa2_md_hdr *)ifa2_md_hdr);
269 
270  if (num_mds > 0 && mailbox_done) {
271  md = (struct switch_md *)(ifa2_md_hdr + sizeof(struct ifa2_md_hdr));
272  telemetry_process_md(md, response_pkt);
273  response_pkt->index = (num_mds - 1);
274  }
275 
276  return DOCA_PCC_DEV_STATUS_OK;
277 }
278 
279 /*
280  * Called when host sends a mailbox send request.
281  * Used to save the hop limit that was set by user in host.
282  */
284  uint32_t request_size,
285  uint32_t max_response_size,
286  void *response,
287  uint32_t *response_size)
288 {
289  if (request_size != sizeof(uint32_t))
291 
292  max_hop_limit = *(uint8_t *)(request);
293  doca_pcc_dev_printf("Mailbox initiated hop limit = %d\n", max_hop_limit);
294 
295  mailbox_done = 1;
297 
298  (void)(max_response_size);
299  (void)(response);
300  (void)(response_size);
301 
302  return DOCA_PCC_DEV_STATUS_OK;
303 }
#define __DPA_MEMORY
DPA 'Memory' memory space.
Definition: dpaintrin.h:32
#define __dpa_thread_fence(MEMORY_SPACE, PRED_OP, SUCC_OP)
Definition: dpaintrin.h:54
#define __DPA_W
Write memory operation.
Definition: dpaintrin.h:41
doca_pcc_dev_error_t
API functions return status.
@ DOCA_PCC_DEV_STATUS_OK
@ DOCA_PCC_DEV_STATUS_FAIL
DOCA_EXPERIMENTAL ALWAYS_INLINE uint8_t * doca_pcc_np_dev_get_l4_header(const struct doca_pcc_np_dev_request_packet *input)
Returns a pointer to the L4/udp header of the packet.
doca_pcc_dev_error_t doca_pcc_dev_np_user_packet_handler(struct doca_pcc_np_dev_request_packet *in, struct doca_pcc_np_dev_response_packet *out)
Main user function (implemented by the user) Called by the lib upon receiving a packet....
doca_pcc_dev_error_t doca_pcc_dev_user_mailbox_handle(void *request, uint32_t request_size, uint32_t max_response_size, void *response, uint32_t *response_size)
User callback to process a request from host to device.
DOCA_STABLE void doca_pcc_dev_printf(const char *format,...) __attribute__((format(printf
Print to Host.
DOCA_STABLE FORCE_INLINE uint32_t doca_pcc_dev_get_timer_lo(void)
Core timer access (elapsed time in uSec) function 32 bits.
#define DB_ENTRY_AGING_TIME
#define INVALID_MD_TIME_INTERVAL
static uint32_t telemetry_get_tx_bytes(struct switch_md *md)
uint32_t mailbox_done
#define TELEMETRY_DB_MASK
uint8_t max_hop_limit
static uint16_t telemetry_db_hash(uint32_t device_id, uint16_t port)
static int telemetry_process_md(struct switch_md *md, struct int_response *response_pkt)
struct ifa2_md_hdr __attribute__
uint32_t last_access_ts
static uint32_t telemetry_get_q_len(struct switch_md *md)
static uint32_t telemetry_get_tx_ts_nano(struct switch_md *md)
struct telemetry_db_entry_t telemetry_db[TELEMETRY_DB_SIZE]
static uint8_t get_md_num_from_md_hdr(struct ifa2_md_hdr *md_hdr)
#define TELEMETRY_DB_SIZE
ALWAYS_INLINE uint32_t diff_with_wrap32(uint32_t greater_num, uint32_t smaller_num)
#define UDP_HDR_SIZE
uint32_t ts
uint32_t bytes
structure for response packet
uint32_t valid
uint32_t last_access_ts
uint32_t rsvd1
uint32_t tx_ts_nano0
uint32_t tx_bytes1
uint32_t rsvd0
uint32_t tx_ts_nano1
uint32_t tx_bytes0