NVIDIA DOCA SDK Data Center on a Chip Framework Documentation
rdma_common.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2024 NVIDIA CORPORATION AND AFFILIATES. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  */
25 
26 #ifndef GPURDMA_COMMON_H_
27 #define GPURDMA_COMMON_H_
28 
29 #include <stdbool.h>
30 #include <stddef.h>
31 #include <stdint.h>
32 #include <time.h>
33 #include <sys/socket.h>
34 #include <arpa/inet.h>
35 
36 #include <cuda.h>
37 #include <cuda_runtime.h>
38 
39 #include <doca_log.h>
40 #include <doca_dev.h>
41 #include <doca_rdma.h>
42 #include <doca_gpunetio.h>
43 #include <doca_mmap.h>
44 #include <doca_pe.h>
45 #include <doca_error.h>
46 #include <doca_buf_array.h>
47 
48 #define MAX_PCI_ADDRESS_LEN 32U
49 #define MAX_IP_ADDRESS_LEN 128
50 #define GPU_BUF_SIZE_A 256
51 #define GPU_BUF_SIZE_B 128
52 #define GPU_BUF_SIZE_C 128
53 #define GPU_BUF_SIZE_F sizeof(uint8_t)
54 #define GPU_BUF_NUM 4
55 #define GPU_NUM_OP_X_BUF 2
56 #define RDMA_SEND_QUEUE_SIZE 8192
57 #define RDMA_RECV_QUEUE_SIZE 8192
58 #define ROUND_UP(unaligned_mapping_size, align_val) ((unaligned_mapping_size) + (align_val)-1) & (~((align_val)-1))
59 #define MAX(a, b) (((a) > (b)) ? (a) : (b))
60 #define SERVER_ADDR_LEN (MAX(MAX(DOCA_DEVINFO_IPV4_ADDR_SIZE, DOCA_DEVINFO_IPV6_ADDR_SIZE), DOCA_GID_BYTE_LENGTH))
61 #define SERVER_ADDR_TYPE_LEN 6
62 #define DEFAULT_CM_PORT 13579
63 
64 struct rdma_config {
65  char device_name[DOCA_DEVINFO_IBDEV_NAME_SIZE]; /* DOCA device name */
66  char gpu_pcie_addr[MAX_PCI_ADDRESS_LEN]; /* GPU PCIe address */
67  char server_ip_addr[MAX_IP_ADDRESS_LEN]; /* DOCA device name */
68  bool is_server; /* Sample is acting as server or client */
69  bool is_gid_index_set; /* Is the set_index parameter passed */
70  uint32_t gid_index; /* GID index for DOCA RDMA */
71  bool use_rdma_cm; /* Use RDMA CM */
72  uint32_t cm_port; /* CM port */
73  char cm_addr[SERVER_ADDR_LEN + 1]; /* RDMA_CM server IPv4/IPv6/GID address */
74  enum doca_rdma_addr_type cm_addr_type; /* RDMA_CM server address type, IPv4, IPv6 or GID,
75  * Only useful for client
76  **/
77 };
78 
80  struct rdma_config *cfg; /* RDMA samples configuration parameters */
81  struct doca_dev *doca_device; /* DOCA device */
82  struct doca_gpu *gpudev; /* DOCA GPU device */
83  struct doca_rdma *rdma; /* DOCA RDMA instance */
84  struct doca_gpu_dev_rdma *gpu_rdma; /* DOCA RDMA instance GPU handler */
85  struct doca_ctx *rdma_ctx; /* DOCA context to be used with DOCA RDMA */
86  struct doca_pe *pe; /* DOCA progress engine -- needed by server only */
87  const void *connection_details; /* Remote peer connection details */
88  size_t conn_det_len; /* Remote peer connection details data length */
89 
90  /* rdma_cm resources */
91  struct doca_rdma_addr *cm_addr; /* CM server address to connect by a client */
92  struct doca_rdma_connection *connection; /* The RDMA_CM connection instance */
93  bool connection_established; /* Indicate whether connection is established */
94  bool connection_error; /* Indicate connection error */
95  bool server_listen_active; /* Indicate if server listen_to_port is active */
96 
97  struct doca_rdma_connection *connection2; /* The RDMA_CM connection instance */
98  bool connection2_established; /* Indicate whether connection is established */
99  bool connection2_error; /* Indicate connection error */
100 };
101 
102 /* Sample rdma mmap object */
104  struct doca_dev *doca_device; /* DOCA network device */
105  uint32_t permissions; /* RDMA permission flags */
106  void *memrange_addr; /* Memory mapped area address */
107  size_t memrange_len; /* Memory mapped area size */
108  struct doca_mmap *mmap; /* DOCA mmap obj */
109  const void *rdma_export; /* RDMA export object to share with remote peer */
110  size_t export_len; /* RDMA export object size */
111 };
112 
113 /* Sample buffer array object */
114 struct buf_arr_obj {
115  struct doca_gpu *gpudev; /* DOCA GPU device */
116  struct doca_mmap *mmap; /* DOCA mmap obj */
117  uint32_t num_elem; /* Number of elements in buffer array */
118  size_t elem_size; /* Size of each element in buffer array */
119  struct doca_buf_arr *buf_arr; /* DOCA buffer array */
120  struct doca_gpu_buf_arr *gpu_buf_arr; /* DOCA buffer array GPU obj */
121 };
122 
123 /*
124  * OOB connection to exchange RDMA info - server side
125  *
126  * @oob_sock_fd [out]: Socket FD
127  * @oob_client_sock [out]: Client socket FD
128  * @return: positive integer on success and -1 otherwise
129  */
131 
132 /*
133  * OOB connection to exchange RDMA info - server side closure
134  *
135  * @oob_sock_fd [in]: Socket FD
136  * @oob_client_sock [in]: Client socket FD
137  */
139 
140 /*
141  * OOB connection to exchange RDMA info - client side
142  *
143  * @server_ip [in]: Server IP address to connect
144  * @oob_sock_fd [out]: Socket FD
145  * @return: positive integer on success and -1 otherwise
146  */
147 int oob_connection_client_setup(const char *server_ip, int *oob_sock_fd);
148 
149 /*
150  * OOB connection to exchange RDMA info - client side closure
151  *
152  * @oob_sock_fd [in]: Socket FD
153  */
155 
156 /*
157  * Wrapper to fix const type of doca_rdma_cap_task_write_is_supported
158  *
159  * @devinfo [in]: RDMA device info
160  * @return: DOCA_SUCCESS on success and DOCA_ERROR otherwise
161  */
162 doca_error_t wrapper_doca_rdma_cap_task_write_is_supported(struct doca_devinfo *devinfo);
163 
164 /*
165  * Create and initialize DOCA RDMA resources
166  *
167  * @cfg [in]: Configuration parameters
168  * @rdma_permissions [in]: Access permission flags for DOCA RDMA
169  * @resources [in/out]: DOCA RDMA resources to create
170  * @return: DOCA_SUCCESS on success and DOCA_ERROR otherwise
171  */
173  const uint32_t rdma_permissions,
174  struct rdma_resources *resources);
175 
176 /*
177  * Destroy DOCA RDMA resources
178  *
179  * @resources [in]: DOCA RDMA resources to destroy
180  * @return: DOCA_SUCCESS on success and DOCA_ERROR otherwise
181  */
183 
184 /*
185  * Create a DOCA mmap object
186  *
187  * @mmap_obj [in]: mmap object to populate
188  * @return: DOCA_SUCCESS on success and DOCA_ERROR otherwise
189  */
190 doca_error_t create_mmap(struct rdma_mmap_obj *mmap_obj);
191 
192 /*
193  * Create a buffer array on GPU
194  *
195  * @buf_arr_obj [in]: buffer array object to populate
196  * @return: DOCA_SUCCESS on success and DOCA_ERROR otherwise
197  */
199 
200 /*
201  * Server side of the RDMA write
202  *
203  * @cfg [in]: Configuration parameters
204  * @return: DOCA_SUCCESS on success and DOCA_ERROR otherwise
205  */
207 
208 /*
209  * Client side of the RDMA write
210  *
211  * @cfg [in]: Configuration parameters
212  * @return: DOCA_SUCCESS on success and DOCA_ERROR otherwise
213  */
215 
216 #if __cplusplus
217 extern "C" {
218 #endif
219 
220 /*
221  * Launch a CUDA kernel doing RDMA Write client
222  *
223  * @stream [in]: CUDA Stream to launch the kernel
224  * @rdma_gpu [in]: RDMA GPU object
225  * @client_local_buf_arr_B [in]: GPU buffer with local data B
226  * @client_local_buf_arr_C [in]: GPU buffer with local data C
227  * @client_local_buf_arr_F [in]: GPU buffer with local data F
228  * @client_remote_buf_arr_A [in]: GPU buffer on remote server with data A
229  * @return: DOCA_SUCCESS on success and DOCA_ERROR otherwise
230  */
231 doca_error_t kernel_write_client(cudaStream_t stream,
232  struct doca_gpu_dev_rdma *rdma_gpu,
233  struct doca_gpu_buf_arr *client_local_buf_arr_B,
234  struct doca_gpu_buf_arr *client_local_buf_arr_C,
235  struct doca_gpu_buf_arr *client_local_buf_arr_F,
236  struct doca_gpu_buf_arr *client_remote_buf_arr_A,
237  uint32_t connection_index,
238  uint32_t *exit_flag);
239 
240 /*
241  * Launch a CUDA kernel doing RDMA Write server
242  *
243  * @stream [in]: CUDA Stream to launch the kernel
244  * @rdma_gpu [in]: RDMA GPU object
245  * @server_local_buf_arr_A [in]: GPU buffer with local data A
246  * @server_remote_buf_arr_F [in]: GPU buffer on remote server with data F
247  * @return: DOCA_SUCCESS on success and DOCA_ERROR otherwise
248  */
249 doca_error_t kernel_write_server(cudaStream_t stream,
250  struct doca_gpu_dev_rdma *rdma_gpu,
251  struct doca_gpu_buf_arr *server_local_buf_arr_A,
252  struct doca_gpu_buf_arr *server_remote_buf_arr_F,
253  uint32_t connection_index);
254 
255 /*
256  * Launch a CUDA kernel for RDMA Write Bandwidth benchmark
257  *
258  * @stream [in]: CUDA Stream to launch the kernel
259  * @rdma_gpu [in]: RDMA GPU object
260  * @num_iter [in]: Number of write iterations in the CUDA kernel
261  * @num_cta [in]: Number CUDA kernel blocks
262  * @num_threads_per_cta [in]: Number of CUDA threads per CUDA block
263  * @msg_size [in]: Message size
264  * @server_local_buf_arr [in]: GPU buffer with local data
265  * @server_remote_buf_arr [in]: GPU buffer on remote server
266  * @return: DOCA_SUCCESS on success and DOCA_ERROR otherwise
267  */
269  struct doca_gpu_dev_rdma *rdma_gpu,
270  int num_iter,
271  int num_cta,
272  int num_threads_per_cta,
273  size_t msg_size,
274  struct doca_gpu_buf_arr *server_local_buf_arr,
275  struct doca_gpu_buf_arr *server_remote_buf_arr);
276 
277 #if __cplusplus
278 }
279 #endif
280 
281 #endif /* GPURDMA_COMMON_H_ */
doca_error_t kernel_write_server(cudaStream_t stream, struct doca_gpu_dev_rdma *rdma_gpu, struct doca_gpu_buf_arr *server_local_buf_arr_A, struct doca_gpu_buf_arr *server_remote_buf_arr_F, uint32_t connection_index)
doca_error_t wrapper_doca_rdma_cap_task_write_is_supported(struct doca_devinfo *devinfo)
Definition: rdma_common.c:286
doca_error_t create_buf_arr_on_gpu(struct buf_arr_obj *buf_arr_obj)
Definition: rdma_common.c:618
doca_error_t destroy_rdma_resources(struct rdma_resources *resources)
Definition: rdma_common.c:470
doca_error_t kernel_write_client(cudaStream_t stream, struct doca_gpu_dev_rdma *rdma_gpu, struct doca_gpu_buf_arr *client_local_buf_arr_B, struct doca_gpu_buf_arr *client_local_buf_arr_C, struct doca_gpu_buf_arr *client_local_buf_arr_F, struct doca_gpu_buf_arr *client_remote_buf_arr_A, uint32_t connection_index, uint32_t *exit_flag)
int oob_connection_client_setup(const char *server_ip, int *oob_sock_fd)
Definition: rdma_common.c:238
doca_error_t rdma_write_client(struct rdma_config *cfg)
doca_error_t create_mmap(struct rdma_mmap_obj *mmap_obj)
Definition: rdma_common.c:569
#define MAX_IP_ADDRESS_LEN
Definition: rdma_common.h:49
void oob_connection_server_close(int oob_sock_fd, int oob_client_sock)
Definition: rdma_common.c:222
int oob_connection_server_setup(int *oob_sock_fd, int *oob_client_sock)
Definition: rdma_common.c:153
doca_error_t create_rdma_resources(struct rdma_config *cfg, const uint32_t rdma_permissions, struct rdma_resources *resources)
Definition: rdma_common.c:299
doca_error_t gpunetio_rdma_write_bw(cudaStream_t stream, struct doca_gpu_dev_rdma *rdma_gpu, int num_iter, int num_cta, int num_threads_per_cta, size_t msg_size, struct doca_gpu_buf_arr *server_local_buf_arr, struct doca_gpu_buf_arr *server_remote_buf_arr)
doca_error_t rdma_write_server(struct rdma_config *cfg)
#define MAX_PCI_ADDRESS_LEN
Definition: rdma_common.h:48
void oob_connection_client_close(int oob_sock_fd)
Definition: rdma_common.c:274
#define SERVER_ADDR_LEN
Definition: rdma_common.h:60
struct buf_arr_obj client_local_buf_arr_B[NUM_CONN]
struct buf_arr_obj server_local_buf_arr_A[NUM_CONN]
struct buf_arr_obj client_remote_buf_arr_A[NUM_CONN]
struct buf_arr_obj server_remote_buf_arr_F[NUM_CONN]
struct rdma_resources resources
struct buf_arr_obj client_local_buf_arr_C[NUM_CONN]
struct buf_arr_obj client_local_buf_arr_F[NUM_CONN]
#define DOCA_DEVINFO_IBDEV_NAME_SIZE
Buffer size to hold Infiniband/RoCE device name. Including a null terminator.
Definition: doca_dev.h:309
enum doca_error doca_error_t
DOCA API return codes.
doca_rdma_addr_type
Definition: doca_rdma.h:56
const struct ip_frag_config * cfg
Definition: ip_frag_dp.c:0
uint32_t num_elem
Definition: rdma_common.h:117
struct doca_gpu_buf_arr * gpu_buf_arr
Definition: rdma_common.h:120
size_t elem_size
Definition: rdma_common.h:118
struct doca_gpu * gpudev
Definition: rdma_common.h:115
struct doca_buf_arr * buf_arr
Definition: rdma_common.h:119
struct doca_mmap * mmap
Definition: rdma_common.h:116
enum doca_rdma_addr_type cm_addr_type
Definition: rdma_common.h:74
char device_name[DOCA_DEVINFO_IBDEV_NAME_SIZE]
Definition: rdma_common.h:65
char cm_addr[SERVER_ADDR_LEN+1]
Definition: rdma_common.h:73
uint32_t gid_index
Definition: rdma_common.h:70
bool use_rdma_cm
Definition: rdma_common.h:71
uint32_t cm_port
Definition: rdma_common.h:72
char gpu_pcie_addr[MAX_PCI_ADDRESS_LEN]
Definition: rdma_common.h:66
char server_ip_addr[MAX_IP_ADDRESS_LEN]
Definition: rdma_common.h:67
bool is_gid_index_set
Definition: rdma_common.h:69
bool is_server
Definition: rdma_common.h:68
struct doca_dev * doca_device
Definition: rdma_common.h:104
size_t memrange_len
Definition: rdma_common.h:107
void * memrange_addr
Definition: rdma_common.h:106
struct doca_mmap * mmap
Definition: rdma_common.h:108
uint32_t permissions
Definition: rdma_common.h:105
size_t export_len
Definition: rdma_common.h:110
const void * rdma_export
Definition: rdma_common.h:109
bool server_listen_active
Definition: rdma_common.h:95
struct doca_rdma_connection * connection2
Definition: rdma_common.h:97
struct doca_rdma_connection * connection
Definition: rdma_common.h:92
struct doca_ctx * rdma_ctx
Definition: rdma_common.h:85
const void * connection_details
Definition: rdma_common.h:87
bool connection2_established
Definition: rdma_common.h:98
struct rdma_config * cfg
Definition: rdma_common.h:80
struct doca_rdma_addr * cm_addr
Definition: rdma_common.h:91
struct doca_rdma * rdma
Definition: rdma_common.h:83
struct doca_pe * pe
Definition: rdma_common.h:86
struct doca_gpu_dev_rdma * gpu_rdma
Definition: rdma_common.h:84
size_t conn_det_len
Definition: rdma_common.h:88
struct doca_dev * doca_device
Definition: rdma_common.h:81
bool connection2_error
Definition: rdma_common.h:99
bool connection_error
Definition: rdma_common.h:94
bool connection_established
Definition: rdma_common.h:93
struct doca_gpu * gpudev
Definition: rdma_common.h:82