NVIDIA DOCA SDK Data Center on a Chip Framework Documentation
dpa_all_to_all_core.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2022-2023 NVIDIA CORPORATION AND AFFILIATES. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  */
25 
26 #ifndef DPA_ALL_TO_ALL_CORE_H_
27 #define DPA_ALL_TO_ALL_CORE_H_
28 
29 #include <mpi.h>
30 
31 #include <doca_dpa.h>
32 #include <doca_buf_array.h>
33 #include <doca_mmap.h>
34 #include <doca_dev.h>
35 #include <doca_error.h>
36 #include <doca_sync_event.h>
37 #include <doca_rdma.h>
38 
39 #define MAX_DEVICES (2) /* Max number of IB devices to use*/
40 #define MAX_USER_IB_DEVICE_NAME_LEN (256) /* Maximum user IB device name string length */
41 #define MAX_IB_DEVICE_NAME_LEN (MAX_USER_IB_DEVICE_NAME_LEN + 1) /* Maximum IB device name string length */
42 #define IB_DEVICE_DEFAULT_NAME "NOT_SET" /* IB device default name */
43 #define MAX_NUM_THREADS (8) /* Maximum number of threads to run the kernel */
44 #define MESSAGE_SIZE_DEFAULT_LEN (-1) /* Message size default length */
45 #define MAX_NUM_PROC (16) /* Maximum number of processes */
46 #define SYNC_EVENT_MASK_FFS (0xFFFFFFFFFFFFFFFF) /* Mask for doca_sync_event_wait_gt() wait value */
47 
48 /* Configuration struct */
49 struct a2a_config {
50  int msgsize; /* Message size of sendbuf (in bytes) */
51  char pf_device1_name[MAX_IB_DEVICE_NAME_LEN]; /* PF DOCA device name used to create DOCA DPA context */
52  char rdma_device1_name[MAX_IB_DEVICE_NAME_LEN]; /* When running from DPU: SF DOCA device name used to create
53  RDMA context When running from Host: will be equal to pf_device1_name
54  */
55  char pf_device2_name[MAX_IB_DEVICE_NAME_LEN]; /* PF DOCA device name used to create second DOCA DPA context */
56  char rdma_device2_name[MAX_IB_DEVICE_NAME_LEN]; /* When running from DPU: second SF DOCA device name used to
57  create RDMA context When running from Host: will be equal to
58  pf_device2_name */
59 };
60 
61 /* A struct that includes all the resources needed for DPA */
62 struct a2a_resources {
63  char pf_device_name[MAX_IB_DEVICE_NAME_LEN]; /* Buffer that holds the PF device name */
64  struct doca_dev *pf_doca_device; /* PF DOCA device used to create the DOCA DPA context */
65  struct doca_dpa *pf_doca_dpa; /* DOCA DPA context created on PF device */
66  char rdma_device_name[MAX_IB_DEVICE_NAME_LEN]; /* Buffer that holds the RDMA device name */
67  struct doca_dev *rdma_doca_device; /* When running from DPU: SF DOCA device used to create RDMA context
68  When running from Host: will be equal to pf_doca_device */
69  struct doca_dpa *rdma_doca_dpa; /* When running from DPU: extended DOCA DPA context created on RDMA DOCA device
70  When running from Host: will be equal to pf_doca_dpa */
71  doca_dpa_dev_t rdma_doca_dpa_handle; /* Extended DOCA DPA context handle */
72  void *sendbuf; /* The send buffer we get from the alltoall call */
73  void *recvbuf; /* The receive buffer we get from the alltoall call */
74  struct doca_sync_event *comp_event; /* DOCA sync event for DPA completion event */
75  uint64_t a2a_seq_num; /* Sequence number for the completion event */
76  struct doca_sync_event **kernel_events; /* DOCA sync events for kernel */
77  doca_dpa_dev_sync_event_t *kernel_events_handle; /* DOCA sync events handles for DPA kernel */
78  doca_dpa_dev_uintptr_t devptr_kernel_events_handle; /* DOCA DPA local processes remote events for kernel device
79  pointers */
80  uint8_t *lp_remote_kernel_events_export_data; /* DOCA DPA local process device remote events export data */
81  size_t *lp_remote_kernel_events_export_sizes; /* DOCA DPA local process device remote events export sizes */
82  const uint8_t **rp_remote_kernel_events_export_data; /* DOCA DPA remote process device remote events export data
83  */
84  size_t *rp_remote_kernel_events_export_sizes; /* DOCA DPA remote process device remote events export sizes */
85  struct doca_sync_event_remote_net **rp_kernel_events; /* DOCA DPA remote process device remote events */
87  remote events */
88  doca_dpa_dev_uintptr_t devptr_rp_remote_kernel_events; /* DOCA DPA remote processes remote events DPA handles */
89  struct doca_dpa_completion **dpa_completions; /* DOCA DPA completion contexts */
90  struct doca_rdma **rdmas; /* DOCA RDMA contexts */
91  doca_dpa_dev_uintptr_t devptr_rdmas; /* DOCA DPA RDMA handlers device pointers */
92  struct doca_mmap *sendbuf_mmap; /* DOCA mmap for sendbuf host memory */
93  doca_dpa_dev_mmap_t sendbuf_dpa_mmap_handle; /* DPA handle for sendbuf DOCA mmap */
94  struct doca_mmap *recvbuf_mmap; /* DOCA mmap for recvbuf host memory */
95  struct doca_mmap **export_mmaps; /* DOCA mmap export from remote processes */
96  doca_dpa_dev_mmap_t *export_mmaps_dpa_handle; /* DPA handle for DOCA mmap export from remote processes */
97  doca_dpa_dev_uintptr_t devptr_recvbufs_mmap_handles; /* DOCA DPA recvbuf mmap handles device pointers */
98  doca_dpa_dev_uintptr_t devptr_recvbufs; /* DOCA DPA recvbuf device pointers */
99  int num_ranks; /* Number of running processes */
100  int my_rank; /* Rank of the current process */
101  int mesg_count; /* Message count */
102  MPI_Datatype msg_type; /* MPI Datatype of the message */
103  MPI_Aint extent; /* The extent of the message type */
104  MPI_Comm comm; /* MPI communication group */
105 };
106 
107 /* DPA Alltoall request that is used to check the completion of the non-blocking alltoall call */
109  struct a2a_resources *resources; /* Alltoall resources */
110 };
111 
112 /*
113  * Check if the provided device name is a name of a valid IB device with DPA capabilities
114  *
115  * @device_name [in]: The wanted IB device name
116  * @return: True if device_name is IB_DEVICE_DEFAULT_NAME or if an IB device with DPA capabilities with name
117  * same as device_name is found, false otherwise.
118  */
119 bool dpa_device_exists_check(const char *device_name);
120 
121 /*
122  * Check if the provided device name is a name of a valid IB device with RDMA capabilities
123  *
124  * @device_name [in]: The wanted IB device name
125  * @return: True if device_name is IB_DEVICE_DEFAULT_NAME or if an IB device with RDMA capabilities with name
126  * same as device_name is found, false otherwise.
127  */
128 bool rdma_device_exists_check(const char *device_name);
129 
130 /*
131  * Finalize the request and destroy the associated resources
132  *
133  * @req [in]: DOCA DPA alltoall request that was sent to a previous ialltoall call
134  * @return: DOCA_SUCCESS on success and DOCA_ERROR otherwise
135  */
137 
138 /*
139  * Wait for the requested non-blocking alltoall call to finish
140  *
141  * @req [in]: DPA alltoall request
142  * @return: DOCA_SUCCESS on success and DOCA_ERROR otherwise
143  */
145 
146 /*
147  * Initialize the DOCA DPA all to all resources, which includes creating DOCA DPA context, allocating and connecting
148  * DOCA DPA endpoints and creating DOCA DPA host and device memories.
149  *
150  * @resources [in/out]: All to all resources
151  * @return: DOCA_SUCCESS on success and DOCA_ERROR otherwise
152  */
154 
155 /*
156  * Destroy the all to all resources
157  *
158  * @resources [in]: All to all resources
159  * @return: DOCA_SUCCESS on success and DOCA_ERROR otherwise
160  */
162 
163 /*
164  * MPI non-blocking all to all using DOCA DPA. This function sends data to all processes from all processes.
165  *
166  * @sendbuf [in]: The starting address of send buffer
167  * @sendcount [in]: The number of elements to be sent to each process
168  * @sendtype [in]: The datatype of the receive buff elements
169  * @recvbuf [in]: The starting address of the receive buffer
170  * @recvcount [in]: The number of elements to be received from each process
171  * @recvtype [in]: The datatype of the send buff elements
172  * @comm [in]: The communicator over which the data is to be exchanged
173  * @req [out]: DPA request that is used to check if the alltoall is finished using dpa_mpi_req_wait().
174  * Note that after finishing we must finalize the request using dpa_mpi_req_finalize().
175  * @return: DOCA_SUCCESS on success and DOCA_ERROR otherwise
176  */
178  int sendcount,
179  MPI_Datatype sendtype,
180  void *recvbuf,
181  int recvcount,
182  MPI_Datatype recvtype,
183  MPI_Comm comm,
184  struct dpa_a2a_request *req);
185 
186 /*
187  * MPI blocking all to all using DOCA DPA. This function sends data to all processes from all processes.
188  *
189  * @sendbuf [in]: The starting address of send buffer
190  * @sendcount [in]: The number of elements to be sent to each process
191  * @sendtype [in]: The datatype of the receive buff elements
192  * @recvbuf [in]: The starting address of the receive buffer
193  * @recvcount [in]: The number of elements to be received from each process
194  * @recvtype [in]: The datatype of the send buff elements
195  * @comm [in]: The communicator over which the data is to be exchanged
196  * @return: DOCA_SUCCESS on success and DOCA_ERROR otherwise
197  */
199  int sendcount,
200  MPI_Datatype sendtype,
201  void *recvbuf,
202  int recvcount,
203  MPI_Datatype recvtype,
204  MPI_Comm comm);
205 
206 /*
207  * Perform all to all example using DOCA DPA
208  *
209  * @argc [in]: command line arguments size
210  * @argv [in]: array of command line arguments
211  * @cfg [in]: All to all user configurations
212  * @return: DOCA_SUCCESS on success and DOCA_ERROR otherwise
213  */
214 doca_error_t dpa_a2a(int argc, char **argv, struct a2a_config *cfg);
215 
216 #endif /* DPA_ALL_TO_ALL_CORE_H_ */
bool rdma_device_exists_check(const char *device_name)
doca_error_t dpa_a2a_req_finalize(struct dpa_a2a_request *req)
doca_error_t dpa_ialltoall(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm, struct dpa_a2a_request *req)
doca_error_t dpa_a2a(int argc, char **argv, struct a2a_config *cfg)
doca_error_t dpa_a2a_req_wait(struct dpa_a2a_request *req)
doca_error_t dpa_a2a_init(struct a2a_resources *resources)
doca_error_t dpa_alltoall(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm)
bool dpa_device_exists_check(const char *device_name)
doca_error_t dpa_a2a_destroy(struct a2a_resources *resources)
#define MAX_IB_DEVICE_NAME_LEN
struct rdma_resources resources
uint64_t doca_dpa_dev_t
DPA context handle type definition.
Definition: doca_dpa.h:73
uint64_t doca_dpa_dev_uintptr_t
DPA pointer type definition.
Definition: doca_dpa.h:78
enum doca_error doca_error_t
DOCA API return codes.
uint32_t doca_dpa_dev_mmap_t
Handle on the DPA for a doca_mmap instance.
Definition: doca_mmap.h:74
uint64_t doca_dpa_dev_sync_event_t
DOCA Sync Event DPA handle.
uint64_t doca_dpa_dev_sync_event_remote_net_t
DOCA Sync Event remote DPA handle.
const struct ip_frag_config * cfg
Definition: ip_frag_dp.c:0
char pf_device2_name[MAX_IB_DEVICE_NAME_LEN]
char pf_device1_name[MAX_IB_DEVICE_NAME_LEN]
char rdma_device1_name[MAX_IB_DEVICE_NAME_LEN]
char rdma_device2_name[MAX_IB_DEVICE_NAME_LEN]
struct doca_sync_event_remote_net ** rp_kernel_events
struct doca_rdma ** rdmas
const uint8_t ** rp_remote_kernel_events_export_data
struct doca_mmap ** export_mmaps
struct doca_dpa * pf_doca_dpa
char rdma_device_name[MAX_IB_DEVICE_NAME_LEN]
struct doca_dpa * rdma_doca_dpa
doca_dpa_dev_sync_event_remote_net_t * rp_kernel_events_dpa_handles
struct doca_mmap * sendbuf_mmap
MPI_Datatype msg_type
doca_dpa_dev_t rdma_doca_dpa_handle
doca_dpa_dev_uintptr_t devptr_recvbufs_mmap_handles
size_t * lp_remote_kernel_events_export_sizes
doca_dpa_dev_uintptr_t devptr_kernel_events_handle
doca_dpa_dev_sync_event_t * kernel_events_handle
char pf_device_name[MAX_IB_DEVICE_NAME_LEN]
struct doca_dev * pf_doca_device
size_t * rp_remote_kernel_events_export_sizes
struct doca_sync_event ** kernel_events
doca_dpa_dev_mmap_t * export_mmaps_dpa_handle
doca_dpa_dev_mmap_t sendbuf_dpa_mmap_handle
doca_dpa_dev_uintptr_t devptr_rp_remote_kernel_events
struct doca_dev * rdma_doca_device
struct doca_mmap * recvbuf_mmap
uint8_t * lp_remote_kernel_events_export_data
struct doca_dpa_completion ** dpa_completions
doca_dpa_dev_uintptr_t devptr_recvbufs
struct doca_sync_event * comp_event
doca_dpa_dev_uintptr_t devptr_rdmas
struct a2a_resources * resources