NVIDIA DOCA SDK Data Center on a Chip Framework Documentation
dpa_all_to_all_dev.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2022-2023 NVIDIA CORPORATION AND AFFILIATES. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  */
25 
26 #include <doca_dpa_dev.h>
27 #include <doca_dpa_dev_rdma.h>
28 #include <doca_dpa_dev_buf.h>
30 
31 #define SYNC_EVENT_MASK_FFS (0xFFFFFFFFFFFFFFFF) /* Mask for doca_dpa_dev_sync_event_wait_gt() wait value */
32 
33 /*
34  * Alltoall kernel function.
35  * Performs RDMA write operations using doca_dpa_dev_rdma_write() from local buffer to remote buffer.
36  *
37  * @rdma_dpa_ctx_handle [in]: Extended DPA context handle for RDMA DOCA device. Needed when running from DPU
38  * @rdmas_dev_ptr [in]: An array of DOCA DPA RDMA handles
39  * @local_buf_addr [in]: local buffer address for alltoall
40  * @local_buf_mmap_handle [in]: local buffer mmap handle for alltoall
41  * @count [in]: Number of elements to write
42  * @type_length [in]: Length of each element
43  * @num_ranks [in]: Number of the MPI ranks
44  * @my_rank [in]: The rank of the current process
45  * @remote_recvbufs_dev_ptr [in]: Device pointer of array holding remote buffers addresses for alltoall
46  * @remote_recvbufs_mmap_handles_dev_ptr [in]: Device pointer of array holding remote buffers mmap handle for alltoall
47  * @local_events_dev_ptr [in]: Device pointer of DPA handles to communication events that will be updated by remote MPI
48  * ranks
49  * @remote_events_dev_ptr [in]: Device pointer of DPA handles to communication events on other nodes that will be
50  * updated by this rank
51  * @a2a_seq_num [in]: The number of times we called the alltoall_kernel in iterations
52  */
54  doca_dpa_dev_uintptr_t rdmas_dev_ptr,
55  uint64_t local_buf_addr,
56  doca_dpa_dev_mmap_t local_buf_mmap_handle,
57  uint64_t count,
58  uint64_t type_length,
59  uint64_t num_ranks,
60  uint64_t my_rank,
61  doca_dpa_dev_uintptr_t remote_recvbufs_dev_ptr,
62  doca_dpa_dev_uintptr_t remote_recvbufs_mmap_handles_dev_ptr,
63  doca_dpa_dev_uintptr_t local_events_dev_ptr,
64  doca_dpa_dev_uintptr_t remote_events_dev_ptr,
65  uint64_t a2a_seq_num)
66 {
67  /* Convert the RDMA DPA device pointer to rdma handle type */
68  doca_dpa_dev_rdma_t *rdma_handles = (doca_dpa_dev_rdma_t *)rdmas_dev_ptr;
69  /* Convert the remote receive buffer addresses DPA device pointer to array of pointers */
70  uintptr_t *remote_recvbufs = (uintptr_t *)remote_recvbufs_dev_ptr;
71  /* Convert the remote receive buffer mmap handles DPA device pointer to array of mmap handle type */
72  doca_dpa_dev_mmap_t *remote_recvbufs_mmap_handles = (doca_dpa_dev_mmap_t *)remote_recvbufs_mmap_handles_dev_ptr;
73  /* Convert the local events DPA device pointer to local events handle type */
74  doca_dpa_dev_sync_event_t *local_events = (doca_dpa_dev_sync_event_t *)local_events_dev_ptr;
75  /* Convert the remote events DPA device pointer to remote events handle type */
77  (doca_dpa_dev_sync_event_remote_net_t *)remote_events_dev_ptr;
78  /* Get the rank of current thread that is running */
79  unsigned int thread_rank = doca_dpa_dev_thread_rank();
80  /* Get the number of all threads that are running this kernel */
81  unsigned int num_threads = doca_dpa_dev_num_threads();
82  unsigned int i;
83 
84  if (rdma_dpa_ctx_handle) {
85  doca_dpa_dev_device_set(rdma_dpa_ctx_handle);
86  }
87 
88  /*
89  * Each process should perform as the number of processes RDMA write operations with local and remote buffers
90  * according to the rank of the local process and the rank of the remote processes (we iterate over the rank
91  * of the remote process).
92  * Each process runs num_threads threads on this kernel so we divide the number RDMA write operations (which is
93  * the number of processes) by the number of threads.
94  */
95  for (i = thread_rank; i < num_ranks; i += num_threads) {
96  doca_dpa_dev_rdma_post_write(rdma_handles[i],
97  0,
98  remote_recvbufs_mmap_handles[i],
99  remote_recvbufs[i] + (my_rank * count * type_length),
100  local_buf_mmap_handle,
101  local_buf_addr + (i * count * type_length),
102  (type_length * count),
105 
106  doca_dpa_dev_rdma_signal_set(rdma_handles[i], 0, remote_events[i], a2a_seq_num);
107  }
108 
109  /*
110  * Each thread should wait on his local events to make sure that the
111  * remote processes have finished RDMA write operations.
112  */
113  for (i = thread_rank; i < num_ranks; i += num_threads) {
114  doca_dpa_dev_sync_event_wait_gt(local_events[i], a2a_seq_num - 1, SYNC_EVENT_MASK_FFS);
115  }
116 }
#define SYNC_EVENT_MASK_FFS
__dpa_global__ void alltoall_kernel(doca_dpa_dev_t rdma_dpa_ctx_handle, doca_dpa_dev_uintptr_t rdmas_dev_ptr, uint64_t local_buf_addr, doca_dpa_dev_mmap_t local_buf_mmap_handle, uint64_t count, uint64_t type_length, uint64_t num_ranks, uint64_t my_rank, doca_dpa_dev_uintptr_t remote_recvbufs_dev_ptr, doca_dpa_dev_uintptr_t remote_recvbufs_mmap_handles_dev_ptr, doca_dpa_dev_uintptr_t local_events_dev_ptr, doca_dpa_dev_uintptr_t remote_events_dev_ptr, uint64_t a2a_seq_num)
struct dpa_thread_arg __dpa_global__
DOCA_EXPERIMENTAL void doca_dpa_dev_rdma_signal_set(doca_dpa_dev_rdma_t rdma, uint32_t connection_id, doca_dpa_dev_sync_event_remote_net_t remote_sync_event, uint64_t count)
Signal to set a remote sync event count.
DOCA_EXPERIMENTAL void doca_dpa_dev_rdma_post_write(doca_dpa_dev_rdma_t rdma, uint32_t connection_id, doca_dpa_dev_mmap_t dst_mmap_handle, uint64_t dst_addr, doca_dpa_dev_mmap_t src_mmap_handle, uint64_t src_addr, size_t length, uint32_t flags)
Post an RDMA write operation.
DOCA_EXPERIMENTAL void doca_dpa_dev_sync_event_wait_gt(doca_dpa_dev_sync_event_t dpa_dev_se_handle, uint64_t value, uint64_t mask)
Wait for the value of a DOCA Sync Event to be greater than a given value.
DOCA_EXPERIMENTAL unsigned int doca_dpa_dev_num_threads(void)
Obtains the number of threads running the kernel.
DOCA_EXPERIMENTAL void doca_dpa_dev_device_set(doca_dpa_dev_t dpa_handle)
Set a DPA device.
DOCA_EXPERIMENTAL unsigned int doca_dpa_dev_thread_rank(void)
Obtains the thread rank.
@ DOCA_DPA_DEV_SUBMIT_FLAG_OPTIMIZE_REPORTS
Definition: doca_dpa_dev.h:113
@ DOCA_DPA_DEV_SUBMIT_FLAG_FLUSH
Definition: doca_dpa_dev.h:106
uint64_t doca_dpa_dev_t
DPA context handle type definition.
Definition: doca_dpa.h:73
uint64_t doca_dpa_dev_uintptr_t
DPA pointer type definition.
Definition: doca_dpa.h:78
uint32_t doca_dpa_dev_mmap_t
Handle on the DPA for a doca_mmap instance.
Definition: doca_mmap.h:74
uint64_t doca_dpa_dev_rdma_t
Definition: doca_rdma.h:79
uint64_t doca_dpa_dev_sync_event_t
DOCA Sync Event DPA handle.
uint64_t doca_dpa_dev_sync_event_remote_net_t
DOCA Sync Event remote DPA handle.
__UINTPTR_TYPE__ uintptr_t
Definition: stdint.h:298