NVIDIA DOCA SDK Data Center on a Chip Framework Documentation
dpa_all_to_all.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2022-2023 NVIDIA CORPORATION AND AFFILIATES. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  */
25 
26 #include <stdlib.h>
27 
28 #include <doca_log.h>
29 #include <doca_argp.h>
30 
31 #include "dpa_all_to_all_core.h"
32 
34 
35 /*
36  * ARGP Callback - Handle message size parameter
37  *
38  * @param [in]: Input parameter
39  * @config [in/out]: Program configuration context
40  * @return: DOCA_SUCCESS on success and DOCA_ERROR otherwise
41  */
42 static doca_error_t msgsize_callback(void *param, void *config)
43 {
44  struct a2a_config *a2a_cgf = (struct a2a_config *)config;
45  int msgsize = *((int *)param);
46 
47  if (msgsize % sizeof(int) != 0) {
48  DOCA_LOG_ERR("Entered message size is not in multiplies of integer size (%lu)", sizeof(int));
50  }
51  a2a_cgf->msgsize = msgsize;
52 
53  return DOCA_SUCCESS;
54 }
55 
56 /*
57  * ARGP Callback - Handle RDMA device names parameter
58  *
59  * @param [in]: Input parameter
60  * @config [in/out]: Program configuration context
61  * @return: DOCA_SUCCESS on success and DOCA_ERROR otherwise
62  */
63 static doca_error_t pf_devices_name_callback(void *param, void *config)
64 {
65  struct a2a_config *a2a_cfg = (struct a2a_config *)config;
66  char *devices_names = (char *)param;
67  char *devices_names_list;
68  int len;
69 
70  len = strnlen(devices_names, MAX_IB_DEVICE_NAME_LEN);
71  if (len == MAX_IB_DEVICE_NAME_LEN) {
72  DOCA_LOG_ERR("Entered IB device name exceeding the maximum size of %d",
75  }
76 
77  /* Split the devices names by space */
78  devices_names_list = strtok(devices_names, ",");
79  strncpy(a2a_cfg->pf_device1_name, devices_names_list, MAX_USER_IB_DEVICE_NAME_LEN);
80 
82  DOCA_LOG_ERR("Entered IB device name: %s doesn't exist or doesn't support DPA",
83  a2a_cfg->pf_device1_name);
85  }
86 
87  /* If another name was provided then get it as well */
88  devices_names_list = strtok(NULL, ",");
89  if (devices_names_list != NULL) {
90  strncpy(a2a_cfg->pf_device2_name, devices_names_list, MAX_USER_IB_DEVICE_NAME_LEN);
92  DOCA_LOG_ERR("Entered IB device name: %s doesn't exist or doesn't support DPA",
93  a2a_cfg->pf_device2_name);
95  }
96  /* Max two devices, so check if a third device was added */
97  devices_names_list = strtok(NULL, ",");
98  if (devices_names_list != NULL) {
99  DOCA_LOG_ERR("Entered more than two IB devices");
101  }
102  }
103 
104  return DOCA_SUCCESS;
105 }
106 
107 #ifdef DOCA_ARCH_DPU
108 /*
109  * ARGP Callback - Handle RDMA device names parameter
110  *
111  * @param [in]: Input parameter
112  * @config [in/out]: Program configuration context
113  * @return: DOCA_SUCCESS on success and DOCA_ERROR otherwise
114  */
115 static doca_error_t rdma_devices_name_callback(void *param, void *config)
116 {
117  struct a2a_config *a2a_cfg = (struct a2a_config *)config;
118  char *devices_names = (char *)param;
119  char *devices_names_list;
120  int len;
121 
122  len = strnlen(devices_names, MAX_IB_DEVICE_NAME_LEN);
123  if (len == MAX_IB_DEVICE_NAME_LEN) {
124  DOCA_LOG_ERR("Entered IB device name exceeding the maximum size of %d",
127  }
128 
129  /* Split the devices names by space */
130  devices_names_list = strtok(devices_names, ",");
131  strncpy(a2a_cfg->rdma_device1_name, devices_names_list, MAX_USER_IB_DEVICE_NAME_LEN);
132 
134  DOCA_LOG_ERR("Entered IB device name: %s doesn't exist or doesn't support RDMA",
135  a2a_cfg->rdma_device1_name);
137  }
138 
139  /* If another name was provided then get it as well */
140  devices_names_list = strtok(NULL, ",");
141  if (devices_names_list != NULL) {
142  strncpy(a2a_cfg->rdma_device2_name, devices_names_list, MAX_USER_IB_DEVICE_NAME_LEN);
144  DOCA_LOG_ERR("Entered IB device name: %s doesn't exist or doesn't support RDMA",
145  a2a_cfg->rdma_device2_name);
147  }
148  /* Max two devices, so check if a third device was added */
149  devices_names_list = strtok(NULL, ",");
150  if (devices_names_list != NULL) {
151  DOCA_LOG_ERR("Entered more than two IB devices");
153  }
154  }
155 
156  return DOCA_SUCCESS;
157 }
158 #endif
159 
160 /*
161  * Register the command line parameters for the All to All application.
162  *
163  * @return: DOCA_SUCCESS on success and DOCA_ERROR otherwise
164  */
166 {
168  struct doca_argp_param *msgsize_param;
169  struct doca_argp_param *pf_devices_param;
170 
171  result = doca_argp_param_create(&msgsize_param);
172  if (result != DOCA_SUCCESS) {
173  DOCA_LOG_ERR("Failed to create ARGP param: %s", doca_error_get_descr(result));
174  return result;
175  }
176  doca_argp_param_set_short_name(msgsize_param, "m");
177  doca_argp_param_set_long_name(msgsize_param, "msgsize");
178  doca_argp_param_set_arguments(msgsize_param, "<Message size>");
180  msgsize_param,
181  "The message size - the size of the sendbuf and recvbuf (in bytes). Must be in multiplies of integer size. Default is size of one integer times the number of processes.");
184  result = doca_argp_register_param(msgsize_param);
185  if (result != DOCA_SUCCESS) {
186  DOCA_LOG_ERR("Failed to register program param: %s", doca_error_get_descr(result));
187  return result;
188  }
189 
190  result = doca_argp_param_create(&pf_devices_param);
191  if (result != DOCA_SUCCESS) {
192  DOCA_LOG_ERR("Failed to create ARGP param: %s", doca_error_get_descr(result));
193  return result;
194  }
195  doca_argp_param_set_short_name(pf_devices_param, "pf_devs");
196  doca_argp_param_set_long_name(pf_devices_param, "pf-devices");
197  doca_argp_param_set_arguments(pf_devices_param, "<PF device name>");
199  pf_devices_param,
200  "PF devices names that supports DPA, separated by comma without spaces (max of two devices). If not provided then a random device will be chosen.");
203  result = doca_argp_register_param(pf_devices_param);
204  if (result != DOCA_SUCCESS) {
205  DOCA_LOG_ERR("Failed to register program param: %s", doca_error_get_descr(result));
206  return result;
207  }
208 
209 #ifdef DOCA_ARCH_DPU
210  struct doca_argp_param *rdma_devices_param;
211  result = doca_argp_param_create(&rdma_devices_param);
212  if (result != DOCA_SUCCESS) {
213  DOCA_LOG_ERR("Failed to create ARGP param: %s", doca_error_get_descr(result));
214  return result;
215  }
216  doca_argp_param_set_short_name(rdma_devices_param, "rdma_devs");
217  doca_argp_param_set_long_name(rdma_devices_param, "rdma-devices");
218  doca_argp_param_set_arguments(rdma_devices_param, "<RDMA device names>");
220  rdma_devices_param,
221  "devices names that supports RDMA, separated by comma without spaces (max of two devices). If not provided then a random device will be chosen.");
222  doca_argp_param_set_callback(rdma_devices_param, rdma_devices_name_callback);
223  doca_argp_param_set_type(rdma_devices_param, DOCA_ARGP_TYPE_STRING);
224  result = doca_argp_register_param(rdma_devices_param);
225  if (result != DOCA_SUCCESS) {
226  DOCA_LOG_ERR("Failed to register program param: %s", doca_error_get_descr(result));
227  return result;
228  }
229 #endif
230 
231  return DOCA_SUCCESS;
232 }
233 
234 /*
235  * Prepare the arg parser user parameters
236  *
237  * @argc [in]: command line arguments size
238  * @argv [in]: array of command line arguments
239  * @cfg [out]: User configuration parameters
240  * @return: DOCA_SUCCESS on success and DOCA_ERROR otherwise
241  */
242 static doca_error_t prepare_argp_parameters(int argc, char **argv, struct a2a_config *cfg)
243 {
245 
246  /* Initialize arg parser for the All to All application */
248  if (result != DOCA_SUCCESS) {
249  DOCA_LOG_ERR("Failed to init ARGP resources: %s", doca_error_get_descr(result));
250  return result;
251  }
252 
253  /* Register all_to_all params */
255  if (result != DOCA_SUCCESS) {
256  DOCA_LOG_ERR("Failed to register application parameters: %s", doca_error_get_descr(result));
258  return result;
259  }
260 
261  /* Start arg parser */
262  result = doca_argp_start(argc, argv);
263  if (result != DOCA_SUCCESS) {
264  DOCA_LOG_ERR("Failed to parse application input: %s", doca_error_get_descr(result));
266  }
267 
268  return result;
269 }
270 
271 /*
272  * Application main function
273  *
274  * @argc [in]: command line arguments size
275  * @argv [in]: array of command line arguments
276  * @return: EXIT_SUCCESS on success and EXIT_FAILURE otherwise
277  */
278 int main(int argc, char **argv)
279 {
280  int rank, size;
281  struct a2a_config cfg = {0};
283  struct doca_log_backend *sdk_log;
284  int exit_status = EXIT_SUCCESS;
285 
286  /* Register a logger backend */
288  if (result != DOCA_SUCCESS)
289  return EXIT_FAILURE;
290 
291  /* Register a logger backend for internal SDK errors and warnings */
292  result = doca_log_backend_create_with_file_sdk(stderr, &sdk_log);
293  if (result != DOCA_SUCCESS)
294  return EXIT_FAILURE;
296  if (result != DOCA_SUCCESS)
297  return EXIT_FAILURE;
298 
299  /* Initialize MPI variables */
300  MPI_Init(&argc, &argv);
301  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
302  MPI_Comm_size(MPI_COMM_WORLD, &size);
303 
304  /* Set default value for devices names */
305  strcpy(cfg.pf_device1_name, IB_DEVICE_DEFAULT_NAME);
306  strcpy(cfg.rdma_device1_name, IB_DEVICE_DEFAULT_NAME);
307  strcpy(cfg.pf_device2_name, IB_DEVICE_DEFAULT_NAME);
308  strcpy(cfg.rdma_device2_name, IB_DEVICE_DEFAULT_NAME);
309 
310  /* Set default value of message size */
311  cfg.msgsize = MESSAGE_SIZE_DEFAULT_LEN;
312 
313  /* Process of rank 0 will prepare the parameters and send them to the rest of the processes */
314  if (rank == 0)
315  result = prepare_argp_parameters(argc, argv, &cfg);
316 
317  /* Using MPI Bcast, send the result from process rank 0 to the rest of the processes */
318  MPI_Bcast(&result, sizeof(result), MPI_BYTE, 0, MPI_COMM_WORLD);
319  if (result != DOCA_SUCCESS) {
320  exit_status = EXIT_FAILURE;
321  goto destroy_resources;
322  }
323 
324  /* Using MPI Bcast, send the parameter configuration struct from process rank 0 to the rest of the processes */
325  MPI_Bcast(&cfg, sizeof(cfg), MPI_BYTE, 0, MPI_COMM_WORLD);
326 
327  /* All to all logic */
328  result = dpa_a2a(argc, argv, &cfg);
329  if (result != DOCA_SUCCESS) {
330  if (rank == 0)
331  DOCA_LOG_ERR("dpa_a2a() encountered errors: %s", doca_error_get_descr(result));
332  exit_status = EXIT_FAILURE;
333  }
334 
335 destroy_resources:
336  if (rank == 0)
338  MPI_Finalize();
339 
340  return exit_status;
341 }
#define NULL
Definition: __stddef_null.h:26
int32_t result
uint64_t len
static doca_error_t register_all_to_all_params(void)
static doca_error_t msgsize_callback(void *param, void *config)
DOCA_LOG_REGISTER(A2A)
int main(int argc, char **argv)
static doca_error_t pf_devices_name_callback(void *param, void *config)
static doca_error_t prepare_argp_parameters(int argc, char **argv, struct a2a_config *cfg)
bool rdma_device_exists_check(const char *device_name)
doca_error_t dpa_a2a(int argc, char **argv, struct a2a_config *cfg)
bool dpa_device_exists_check(const char *device_name)
#define MAX_DEVICES
#define MESSAGE_SIZE_DEFAULT_LEN
#define MAX_USER_IB_DEVICE_NAME_LEN
#define IB_DEVICE_DEFAULT_NAME
#define MAX_IB_DEVICE_NAME_LEN
DOCA_EXPERIMENTAL void doca_argp_param_set_description(struct doca_argp_param *param, const char *description)
Set the description of the program param, used during program usage.
DOCA_EXPERIMENTAL void doca_argp_param_set_long_name(struct doca_argp_param *param, const char *name)
Set the long name of the program param.
DOCA_EXPERIMENTAL doca_error_t doca_argp_start(int argc, char **argv)
Parse incoming arguments (cmd line/json).
DOCA_EXPERIMENTAL void doca_argp_param_set_arguments(struct doca_argp_param *param, const char *arguments)
Set the description of the expected arguments of the program param, used during program usage.
DOCA_EXPERIMENTAL doca_error_t doca_argp_init(const char *program_name, void *program_config)
Initialize the parser interface.
DOCA_EXPERIMENTAL void doca_argp_param_set_callback(struct doca_argp_param *param, doca_argp_param_cb_t callback)
Set the callback function of the program param.
DOCA_EXPERIMENTAL doca_error_t doca_argp_param_create(struct doca_argp_param **param)
Create new program param.
DOCA_EXPERIMENTAL void doca_argp_param_set_type(struct doca_argp_param *param, enum doca_argp_type type)
Set the type of the param arguments.
DOCA_EXPERIMENTAL void doca_argp_param_set_short_name(struct doca_argp_param *param, const char *name)
Set the short name of the program param.
DOCA_EXPERIMENTAL doca_error_t doca_argp_destroy(void)
ARG Parser destroy.
DOCA_EXPERIMENTAL doca_error_t doca_argp_register_param(struct doca_argp_param *input_param)
Register a program flag.
@ DOCA_ARGP_TYPE_STRING
Definition: doca_argp.h:56
@ DOCA_ARGP_TYPE_INT
Definition: doca_argp.h:57
enum doca_error doca_error_t
DOCA API return codes.
DOCA_STABLE const char * doca_error_get_descr(doca_error_t error)
Returns the description string of an error code.
@ DOCA_ERROR_INVALID_VALUE
Definition: doca_error.h:44
@ DOCA_SUCCESS
Definition: doca_error.h:38
DOCA_EXPERIMENTAL doca_error_t doca_log_backend_create_standard(void)
Create default, non configurable backend for application messages.
#define DOCA_LOG_ERR(format,...)
Generates an ERROR application log message.
Definition: doca_log.h:466
DOCA_EXPERIMENTAL doca_error_t doca_log_backend_create_with_file_sdk(FILE *fptr, struct doca_log_backend **backend)
Create a logging backend with a FILE* stream for SDK messages.
DOCA_EXPERIMENTAL doca_error_t doca_log_backend_set_sdk_level(struct doca_log_backend *backend, uint32_t level)
Set the log level limit for SDK logging backends.
@ DOCA_LOG_LEVEL_WARNING
Definition: doca_log.h:47
const struct ip_frag_config * cfg
Definition: ip_frag_dp.c:0
char pf_device2_name[MAX_IB_DEVICE_NAME_LEN]
char pf_device1_name[MAX_IB_DEVICE_NAME_LEN]
char rdma_device1_name[MAX_IB_DEVICE_NAME_LEN]
char rdma_device2_name[MAX_IB_DEVICE_NAME_LEN]