NVIDIA DOCA SDK Data Center on a Chip Framework Documentation
gpunetio_rdma_client_server_write_sample.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2023-2024 NVIDIA CORPORATION AND AFFILIATES. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  */
25 
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <unistd.h>
29 #include <string.h>
30 
31 #include <doca_log.h>
32 #include <doca_error.h>
33 #include <doca_argp.h>
34 
35 #include "rdma_common.h"
36 #include "common.h"
37 
38 DOCA_LOG_REGISTER(GPURDMA::SAMPLE);
39 
40 #define SLEEP_IN_NANOS (10 * 1000)
41 #define NUM_CONN 2
42 
43 struct rdma_resources resources = {0};
48 struct doca_mmap *server_remote_mmap_F[NUM_CONN];
49 struct doca_mmap *client_remote_mmap_A[NUM_CONN];
64 cudaStream_t cstream;
65 int oob_sock_fd = -1;
66 int oob_client_sock = -1;
67 
68 /*
69  * Create local and remote mmap and buffer array for server
70  *
71  * @oob_sock_fd [in]: socket fd
72  * @resources [in]: rdma resources
73  * @return: DOCA_SUCCESS on success and DOCA_ERROR otherwise
74  */
76  struct rdma_resources *resources,
77  int conn_idx,
78  cudaStream_t stream)
79 {
80  void *server_remote_export_F = NULL;
81  size_t server_remote_export_F_len;
83  cudaError_t cuda_err;
84 
85  /* Buffer A */
86  /* Register local source buffer obtain an object representing the memory */
87  result = doca_gpu_mem_alloc(resources->gpudev,
88  (size_t)GPU_BUF_NUM * GPU_BUF_SIZE_A,
89  4096,
91  (void **)&server_local_buf_A_gpu[conn_idx],
92  (void **)&server_local_buf_A_cpu[conn_idx]);
93  if (result != DOCA_SUCCESS) {
94  DOCA_LOG_ERR("Function doca_gpu_mem_alloc failed: %s", doca_error_get_descr(result));
95  goto error;
96  }
97 
98  cuda_err = cudaMemsetAsync(server_local_buf_A_gpu[conn_idx], 0x1, GPU_BUF_NUM * GPU_BUF_SIZE_A, stream);
99  if (cuda_err != cudaSuccess) {
100  DOCA_LOG_ERR("Can't CUDA memset buffer A: %d", cuda_err);
101  goto error;
102  }
103 
108 
109  DOCA_LOG_INFO("Create local server mmap A context");
111  if (result != DOCA_SUCCESS) {
112  DOCA_LOG_ERR("Function create_mmap failed: %s", doca_error_get_descr(result));
113  goto error;
114  }
115 
116  /* Application does out-of-band passing of exported mmap to remote side and receiving exported mmap */
117  DOCA_LOG_INFO("Send exported mmap A to remote client");
118  if (send(oob_sock_fd, &server_local_mmap_obj_A[conn_idx].export_len, sizeof(size_t), 0) < 0) {
119  DOCA_LOG_ERR("Failed to send exported mmap");
121  goto error;
122  }
123 
124  if (send(oob_sock_fd,
125  server_local_mmap_obj_A[conn_idx].rdma_export,
126  server_local_mmap_obj_A[conn_idx].export_len,
127  0) < 0) {
128  DOCA_LOG_ERR("Failed to send exported mmap");
130  goto error;
131  }
132 
133  DOCA_LOG_INFO("Receive client mmap F export");
134  if (recv(oob_sock_fd, &server_remote_export_F_len, sizeof(size_t), 0) < 0) {
135  DOCA_LOG_ERR("Failed to receive remote connection details");
137  goto error;
138  }
139 
140  server_remote_export_F = calloc(1, server_remote_export_F_len);
141  if (server_remote_export_F == NULL) {
142  DOCA_LOG_ERR("Failed to allocate memory for remote mmap export");
144  goto error;
145  }
146 
147  if (recv(oob_sock_fd, server_remote_export_F, server_remote_export_F_len, 0) < 0) {
148  DOCA_LOG_ERR("Failed to receive remote connection details");
150  goto error;
151  }
152 
154  server_remote_export_F,
155  server_remote_export_F_len,
157  &server_remote_mmap_F[conn_idx]);
158  if (result != DOCA_SUCCESS) {
159  DOCA_LOG_ERR("Function doca_mmap_create_from_export failed: %s", doca_error_get_descr(result));
160  goto error;
161  }
162 
163  /* create local and remote buf arrays */
168 
169  DOCA_LOG_INFO("Create local DOCA buf array context A");
171  if (result != DOCA_SUCCESS) {
172  DOCA_LOG_ERR("Function create_buf_arr_on_gpu failed: %s", doca_error_get_descr(result));
173  goto error;
174  }
175 
177  server_remote_buf_arr_F[conn_idx].mmap = server_remote_mmap_F[conn_idx];
178  server_remote_buf_arr_F[conn_idx].num_elem = 1;
180 
181  DOCA_LOG_INFO("Create remote DOCA buf array context F");
183  if (result != DOCA_SUCCESS) {
184  DOCA_LOG_ERR("Function create_buf_arr_on_gpu failed: %s", doca_error_get_descr(result));
186  goto error;
187  }
188 
189  free(server_remote_export_F);
190 
191  return DOCA_SUCCESS;
192 
193 error:
194  if (server_remote_export_F)
195  free(server_remote_export_F);
196 
197  return result;
198 }
199 
200 /*
201  * Create local and remote mmap and buffer array for client
202  *
203  * @oob_sock_fd [in]: socket fd
204  * @resources [in]: rdma resources
205  * @return: DOCA_SUCCESS on success and DOCA_ERROR otherwise
206  */
208  struct rdma_resources *resources,
209  int conn_idx,
210  cudaStream_t stream)
211 {
212  void *client_remote_export_A = NULL;
213  size_t client_remote_export_A_len;
215  cudaError_t cuda_err;
216 
217  DOCA_LOG_INFO("Alloc local client mmap B context");
218  /* Buffer B - 512B */
219  /* Register local source buffer obtain an object representing the memory */
220  result = doca_gpu_mem_alloc(resources->gpudev,
221  (size_t)GPU_BUF_NUM * GPU_BUF_SIZE_B,
222  4096,
224  (void **)&client_local_buf_B_gpu[conn_idx],
225  (void **)&client_local_buf_B_cpu[conn_idx]);
226  if (result != DOCA_SUCCESS) {
227  DOCA_LOG_ERR("Function doca_gpu_mem_alloc failed: %s", doca_error_get_descr(result));
228  goto error;
229  }
230 
231  DOCA_LOG_INFO("Memset local client mmap B context");
232 
233  cuda_err = cudaMemsetAsync(client_local_buf_B_gpu[conn_idx], 0x2, GPU_BUF_NUM * GPU_BUF_SIZE_B, stream);
234  if (cuda_err != cudaSuccess) {
235  DOCA_LOG_ERR("Can't CUDA memset buffer B: %d", cuda_err);
236  goto error;
237  }
238 
243 
244  /* create local mmap object */
245  DOCA_LOG_INFO("Create local client mmap B context");
247  if (result != DOCA_SUCCESS) {
248  DOCA_LOG_ERR("Function create_mmap failed: %s", doca_error_get_descr(result));
249  goto error;
250  }
251 
252  /* Buffer C - 512B */
253  /* Register local source buffer obtain an object representing the memory */
254  result = doca_gpu_mem_alloc(resources->gpudev,
255  (size_t)GPU_BUF_NUM * GPU_BUF_SIZE_C,
256  4096,
258  (void **)&client_local_buf_C_gpu[conn_idx],
259  (void **)&client_local_buf_C_cpu[conn_idx]);
260  if (result != DOCA_SUCCESS) {
261  DOCA_LOG_ERR("Function doca_gpu_mem_alloc failed: %s", doca_error_get_descr(result));
262  goto error;
263  }
264 
265  cuda_err = cudaMemsetAsync(client_local_buf_C_gpu[conn_idx], 0x3, GPU_BUF_NUM * GPU_BUF_SIZE_C, stream);
266  if (cuda_err != cudaSuccess) {
267  DOCA_LOG_ERR("Can't CUDA memset buffer C: %d", cuda_err);
268  goto error;
269  }
270 
275 
276  /* create local mmap object */
277  DOCA_LOG_INFO("Create local client mmap C context");
279  if (result != DOCA_SUCCESS) {
280  DOCA_LOG_ERR("Function create_mmap failed: %s", doca_error_get_descr(result));
281  goto error;
282  }
283 
284  /* Buffer F - 4B */
285  /* Register local source buffer obtain an object representing the memory */
286  result = doca_gpu_mem_alloc(resources->gpudev,
287  (size_t)GPU_BUF_NUM * GPU_BUF_SIZE_F,
288  4096,
290  (void **)&client_local_buf_F[conn_idx],
291  NULL);
292  if (result != DOCA_SUCCESS) {
293  DOCA_LOG_ERR("Function doca_gpu_mem_alloc failed: %s", doca_error_get_descr(result));
294  goto error;
295  }
296 
301 
302  /* create local mmap object */
303  DOCA_LOG_INFO("Create local client mmap F context");
305  if (result != DOCA_SUCCESS) {
306  DOCA_LOG_ERR("Function create_mmap failed: %s", doca_error_get_descr(result));
307  goto error;
308  }
309 
310  /* Application does out-of-band passing of exported mmap to remote side and receiving exported mmap */
311 
312  /* Receive server remote A */
313  DOCA_LOG_INFO("Receive remote mmap A export from server");
314  if (recv(oob_sock_fd, &client_remote_export_A_len, sizeof(size_t), 0) < 0) {
315  DOCA_LOG_ERR("Failed to receive remote connection details");
317  goto error;
318  }
319 
320  client_remote_export_A = calloc(1, client_remote_export_A_len);
321  if (client_remote_export_A == NULL) {
322  DOCA_LOG_ERR("Failed to allocate memory for remote mmap export");
324  goto error;
325  }
326 
327  if (recv(oob_sock_fd, client_remote_export_A, client_remote_export_A_len, 0) < 0) {
328  DOCA_LOG_ERR("Failed to receive remote connection details");
330  goto error;
331  }
332 
334  client_remote_export_A,
335  client_remote_export_A_len,
337  &client_remote_mmap_A[conn_idx]);
338  if (result != DOCA_SUCCESS) {
339  DOCA_LOG_ERR("Function doca_mmap_create_from_export failed: %s", doca_error_get_descr(result));
340  goto error;
341  }
342 
343  /* Send client local F */
344  DOCA_LOG_INFO("Send exported mmap F to remote server");
345  if (send(oob_sock_fd, &client_local_mmap_obj_F[conn_idx].export_len, sizeof(size_t), 0) < 0) {
346  DOCA_LOG_ERR("Failed to send exported mmap");
348  goto error;
349  }
350 
351  if (send(oob_sock_fd,
352  client_local_mmap_obj_F[conn_idx].rdma_export,
353  client_local_mmap_obj_F[conn_idx].export_len,
354  0) < 0) {
355  DOCA_LOG_ERR("Failed to send exported mmap");
357  goto error;
358  }
359 
360  /* create local and remote buf arrays */
365 
366  /* create local buf array object */
367  DOCA_LOG_INFO("Create local DOCA buf array context B");
369  if (result != DOCA_SUCCESS) {
370  DOCA_LOG_ERR("Function create_buf_arr_on_gpu failed: %s", doca_error_get_descr(result));
371  goto error;
372  }
373 
378 
379  /* create local buf array object */
380  DOCA_LOG_INFO("Create local DOCA buf array context C");
382  if (result != DOCA_SUCCESS) {
384  DOCA_LOG_ERR("Function create_buf_arr_on_gpu failed: %s", doca_error_get_descr(result));
385  goto error;
386  }
387 
390  client_local_buf_arr_F[conn_idx].num_elem = 1;
392 
393  /* create local buf array object */
394  DOCA_LOG_INFO("Create local DOCA buf array context F");
396  if (result != DOCA_SUCCESS) {
398  DOCA_LOG_ERR("Function create_buf_arr_on_gpu failed: %s", doca_error_get_descr(result));
399  goto error;
400  }
401 
403  client_remote_buf_arr_A[conn_idx].mmap = client_remote_mmap_A[conn_idx];
406 
407  /* create remote buf array object */
408  DOCA_LOG_INFO("Create remote DOCA buf array context");
410  if (result != DOCA_SUCCESS) {
411  DOCA_LOG_ERR("Function create_buf_arr_on_gpu failed: %s", doca_error_get_descr(result));
414  goto error;
415  }
416 
417  free(client_remote_export_A);
418 
419  return DOCA_SUCCESS;
420 
421 error:
422  if (client_remote_export_A)
423  free(client_remote_export_A);
424 
425  return result;
426 }
427 
428 /*
429  * Destroy local and remote mmap and buffer array, server side
430  *
431  * @resources [in]: rdma resources
432  */
434 {
436 
437  for (int conn_idx = 0; conn_idx < NUM_CONN; conn_idx++) {
438  if (server_local_mmap_obj_A[conn_idx].mmap) {
440  if (result != DOCA_SUCCESS)
441  DOCA_LOG_ERR("Function doca_mmap_destroy failed: %s", doca_error_get_descr(result));
442  }
443 
444  if (server_remote_mmap_F[conn_idx]) {
446  if (result != DOCA_SUCCESS)
447  DOCA_LOG_ERR("Function doca_mmap_destroy failed: %s", doca_error_get_descr(result));
448  }
449 
450  if (server_local_buf_A_gpu[conn_idx]) {
451  result = doca_gpu_mem_free(resources->gpudev, server_local_buf_A_gpu[conn_idx]);
452  if (result != DOCA_SUCCESS)
453  DOCA_LOG_ERR("Function doca_gpu_mem_free failed: %s", doca_error_get_descr(result));
454  }
455 
456  if (server_local_buf_arr_A[conn_idx].buf_arr) {
458  if (result != DOCA_SUCCESS)
459  DOCA_LOG_ERR("Function doca_buf_arr_destroy failed: %s", doca_error_get_descr(result));
460  }
461 
462  if (server_remote_buf_arr_F[conn_idx].buf_arr) {
464  if (result != DOCA_SUCCESS)
465  DOCA_LOG_ERR("Function doca_buf_arr_destroy failed: %s", doca_error_get_descr(result));
466  }
467  }
468 }
469 
470 /*
471  * Destroy local and remote mmap and buffer array, client side
472  *
473  * @resources [in]: rdma resources
474  */
476 {
478 
479  for (int conn_idx = 0; conn_idx < NUM_CONN; conn_idx++) {
480  if (client_local_mmap_obj_B[conn_idx].mmap) {
482  if (result != DOCA_SUCCESS)
483  DOCA_LOG_ERR("Function doca_mmap_destroy failed: %s", doca_error_get_descr(result));
484  }
485 
486  if (client_local_mmap_obj_C[conn_idx].mmap) {
488  if (result != DOCA_SUCCESS)
489  DOCA_LOG_ERR("Function doca_mmap_destroy failed: %s", doca_error_get_descr(result));
490  }
491 
492  if (client_local_mmap_obj_F[conn_idx].mmap) {
494  if (result != DOCA_SUCCESS)
495  DOCA_LOG_ERR("Function doca_mmap_destroy failed: %s", doca_error_get_descr(result));
496  }
497 
498  if (client_remote_mmap_A[conn_idx]) {
500  if (result != DOCA_SUCCESS)
501  DOCA_LOG_ERR("Function doca_mmap_destroy failed: %s", doca_error_get_descr(result));
502  }
503 
504  if (client_local_buf_B_gpu[conn_idx]) {
505  result = doca_gpu_mem_free(resources->gpudev, client_local_buf_B_gpu[conn_idx]);
506  if (result != DOCA_SUCCESS)
507  DOCA_LOG_ERR("Function doca_gpu_mem_free failed: %s", doca_error_get_descr(result));
508  }
509 
510  if (client_local_buf_C_gpu[conn_idx]) {
511  result = doca_gpu_mem_free(resources->gpudev, client_local_buf_C_gpu[conn_idx]);
512  if (result != DOCA_SUCCESS)
513  DOCA_LOG_ERR("Function doca_gpu_mem_free failed: %s", doca_error_get_descr(result));
514  }
515 
516  if (client_local_buf_F[conn_idx]) {
517  result = doca_gpu_mem_free(resources->gpudev, client_local_buf_F[conn_idx]);
518  if (result != DOCA_SUCCESS)
519  DOCA_LOG_ERR("Function doca_gpu_mem_free failed: %s", doca_error_get_descr(result));
520  }
521 
522  if (client_local_buf_arr_B[conn_idx].buf_arr) {
524  if (result != DOCA_SUCCESS)
525  DOCA_LOG_ERR("Function doca_buf_arr_destroy failed: %s", doca_error_get_descr(result));
526  }
527 
528  if (client_local_buf_arr_C[conn_idx].buf_arr) {
530  if (result != DOCA_SUCCESS)
531  DOCA_LOG_ERR("Function doca_buf_arr_destroy failed: %s", doca_error_get_descr(result));
532  }
533 
534  if (client_local_buf_arr_F[conn_idx].buf_arr) {
536  if (result != DOCA_SUCCESS)
537  DOCA_LOG_ERR("Function doca_buf_arr_destroy failed: %s", doca_error_get_descr(result));
538  }
539 
540  if (client_remote_buf_arr_A[conn_idx].buf_arr) {
542  if (result != DOCA_SUCCESS)
543  DOCA_LOG_ERR("Function doca_buf_arr_destroy failed: %s", doca_error_get_descr(result));
544  }
545  }
546 }
547 
548 /*
549  * Server side of the RDMA write
550  *
551  * @cfg [in]: Configuration parameters
552  * @return: DOCA_SUCCESS on success and DOCA_ERROR otherwise
553  */
555 {
556  struct doca_rdma_connection *connection = NULL;
557  const uint32_t rdma_permissions = access_params;
558  doca_error_t result, tmp_result;
559  void *remote_conn_details = NULL;
560  size_t remote_conn_details_len = 0;
561  cudaError_t cuda_ret;
562  int ret = 0;
563  struct timespec ts = {
564  .tv_sec = 0,
565  .tv_nsec = SLEEP_IN_NANOS,
566  };
567 
568  /* Allocate resources */
569  result = create_rdma_resources(cfg, rdma_permissions, &resources);
570  if (result != DOCA_SUCCESS) {
571  DOCA_LOG_ERR("Failed to allocate RDMA resources: %s", doca_error_get_descr(result));
572  return result;
573  }
574 
575  /* Get GPU RDMA handle */
577  if (result != DOCA_SUCCESS) {
578  DOCA_LOG_ERR("Failed to get RDMA GPU handler: %s", doca_error_get_descr(result));
579  goto destroy_resources;
580  }
581 
582  /* Setup OOB connection */
584  if (ret < 0) {
585  DOCA_LOG_ERR("Failed to setup OOB connection with remote peer");
587  goto destroy_resources;
588  }
589 
590  if (!cfg->use_rdma_cm) {
591  /* Export connection details */
595  &connection);
596  if (result != DOCA_SUCCESS) {
597  DOCA_LOG_ERR("Failed to export RDMA with connection details");
598  goto close_connection;
599  }
600 
601  /* Application does out-of-band passing of rdma address to remote side and receiving remote address */
602  DOCA_LOG_INFO("Send connection details to remote peer size %zd str %s",
604  (char *)resources.connection_details);
605  if (send(oob_client_sock, &resources.conn_det_len, sizeof(size_t), 0) < 0) {
606  DOCA_LOG_ERR("Failed to send connection details");
608  goto close_connection;
609  }
610 
612  DOCA_LOG_ERR("Failed to send connection details");
614  goto close_connection;
615  }
616 
617  DOCA_LOG_INFO("Receive remote connection details");
618  if (recv(oob_client_sock, &remote_conn_details_len, sizeof(size_t), 0) < 0) {
619  DOCA_LOG_ERR("Failed to receive remote connection details");
621  goto close_connection;
622  }
623 
624  if (remote_conn_details_len <= 0 || remote_conn_details_len >= (size_t)-1) {
625  DOCA_LOG_ERR("Received wrong remote connection details");
627  goto close_connection;
628  }
629 
630  remote_conn_details = calloc(1, remote_conn_details_len);
631  if (remote_conn_details == NULL) {
632  DOCA_LOG_ERR("Failed to allocate memory for remote connection details");
634  goto close_connection;
635  }
636 
637  if (recv(oob_client_sock, remote_conn_details, remote_conn_details_len, 0) < 0) {
638  DOCA_LOG_ERR("Failed to receive remote connection details");
640  goto close_connection;
641  }
642 
643  /* Connect local rdma to the remote rdma */
644  DOCA_LOG_INFO("Connect DOCA RDMA to remote RDMA");
645  result = doca_rdma_connect(resources.rdma, remote_conn_details, remote_conn_details_len, connection);
646  if (result != DOCA_SUCCESS) {
647  DOCA_LOG_ERR("Function doca_rdma_connect failed: %s", doca_error_get_descr(result));
648  goto close_connection;
649  }
650 
651  free(remote_conn_details);
652  remote_conn_details = NULL;
653  } else { /* Case of RDMA CM */
655  if (result != DOCA_SUCCESS) {
656  DOCA_LOG_ERR("Server failed to call doca_rdma_start_listen_to_port: %s",
658  goto close_connection;
659  }
660 
662 
663  DOCA_LOG_INFO("Server is waiting for new connections using RDMA CM");
664  /* Wait for a new connection */
666  if (doca_pe_progress(resources.pe) == 0)
667  nanosleep(&ts, &ts);
668  }
669 
671  DOCA_LOG_ERR("Failed to connect to remote peer, connection error");
673  goto close_connection;
674  }
675 
676  DOCA_LOG_INFO("Server - Connection 1 is established");
677  }
678 
679  cuda_ret = cudaStreamCreateWithFlags(&cstream, cudaStreamNonBlocking);
680  if (cuda_ret != cudaSuccess) {
681  DOCA_LOG_ERR("Function cudaStreamCreateWithFlags error %d", cuda_ret);
683  goto close_connection;
684  }
685 
687  if (result != DOCA_SUCCESS) {
688  DOCA_LOG_ERR("Function create_memory_local_remote_server failed: %s", doca_error_get_descr(result));
689  goto close_connection;
690  }
691 
692  DOCA_LOG_INFO("Before launching CUDA kernel, buffer array A is:");
693  for (int idx = 0; idx < 4; idx++) {
694  DOCA_LOG_INFO("Buffer %d -> offset 0: %x%x%x%x | offset %d: %x%x%x%x",
695  idx,
696  server_local_buf_A_cpu[0][(GPU_BUF_SIZE_A * idx) + 0],
697  server_local_buf_A_cpu[0][(GPU_BUF_SIZE_A * idx) + 1],
698  server_local_buf_A_cpu[0][(GPU_BUF_SIZE_A * idx) + 2],
699  server_local_buf_A_cpu[0][(GPU_BUF_SIZE_A * idx) + 3],
705  }
706 
711  0);
712  if (result != DOCA_SUCCESS) {
713  DOCA_LOG_ERR("Function kernel_write_server failed: %s", doca_error_get_descr(result));
714  goto close_connection;
715  }
716 
717  if (cfg->use_rdma_cm) {
718  /* Wait for a new connection */
720  if (doca_pe_progress(resources.pe) == 0)
721  nanosleep(&ts, &ts);
722  }
723 
725  DOCA_LOG_ERR("Failed to connect to remote peer, connection error");
727  goto close_connection;
728  }
729 
730  DOCA_LOG_INFO("Server - Connection 2 is established");
731 
733  if (result != DOCA_SUCCESS) {
734  DOCA_LOG_ERR("Function create_memory_local_remote_server failed: %s",
736  goto close_connection;
737  }
738 
739  DOCA_LOG_INFO("Server - Connection 2 memory info exchanged");
740 
741  /* Differently from client, here the server uses the same stream for the two CUDA kernels */
746  1);
747  if (result != DOCA_SUCCESS) {
748  DOCA_LOG_ERR("Function kernel_write_server failed: %s", doca_error_get_descr(result));
749  goto close_connection;
750  }
751  }
752 
753  cudaStreamSynchronize(cstream);
754 
755  DOCA_LOG_INFO("After launching CUDA kernel, buffer array A is:");
756  for (int idx = 0; idx < 4; idx++) {
757  DOCA_LOG_INFO("Buffer %d -> offset 0: %x%x%x%x | offset %d: %x%x%x%x",
758  idx,
759  server_local_buf_A_cpu[0][(GPU_BUF_SIZE_A * idx) + 0],
760  server_local_buf_A_cpu[0][(GPU_BUF_SIZE_A * idx) + 1],
761  server_local_buf_A_cpu[0][(GPU_BUF_SIZE_A * idx) + 2],
762  server_local_buf_A_cpu[0][(GPU_BUF_SIZE_A * idx) + 3],
768  }
769 
770  if (cfg->use_rdma_cm) {
771  DOCA_LOG_INFO("After launching CUDA kernel for connection 2, buffer array A is:");
772  for (int idx = 0; idx < 4; idx++) {
773  DOCA_LOG_INFO("Buffer %d -> offset 0: %x%x%x%x | offset %d: %x%x%x%x",
774  idx,
775  server_local_buf_A_cpu[1][(GPU_BUF_SIZE_A * idx) + 0],
776  server_local_buf_A_cpu[1][(GPU_BUF_SIZE_A * idx) + 1],
777  server_local_buf_A_cpu[1][(GPU_BUF_SIZE_A * idx) + 2],
778  server_local_buf_A_cpu[1][(GPU_BUF_SIZE_A * idx) + 3],
784  }
785  }
786 
788 
790 
792  if (result != DOCA_SUCCESS) {
793  DOCA_LOG_ERR("Failed to destroy DOCA RDMA resources: %s", doca_error_get_descr(result));
794  return result;
795  }
796 
797  return DOCA_SUCCESS;
798 
799 close_connection:
801 
802 destroy_resources:
803 
805 
806  tmp_result = destroy_rdma_resources(&resources);
807  if (tmp_result != DOCA_SUCCESS)
808  DOCA_LOG_ERR("Failed to destroy DOCA RDMA resources: %s", doca_error_get_descr(tmp_result));
809 
810  if (remote_conn_details)
811  free(remote_conn_details);
812 
813  return result;
814 }
815 
816 /*
817  * Client side of the RDMA write
818  *
819  * @cfg [in]: Configuration parameters
820  * @return: DOCA_SUCCESS on success and DOCA_ERROR otherwise
821  */
823 {
824  struct doca_rdma_connection *connection = NULL;
825  const uint32_t rdma_permissions = access_params;
826  doca_error_t result, temp_result;
827  cudaError_t cuda_ret;
828  void *remote_conn_details = NULL;
829  size_t remote_conn_details_len = 0;
830  int ret = 0;
831  union doca_data connection_data;
832  uint32_t *cpu_exit_flag;
833  uint32_t *gpu_exit_flag;
834  struct timespec ts = {
835  .tv_sec = 0,
836  .tv_nsec = SLEEP_IN_NANOS,
837  };
838 
839  /* Allocate resources */
840  result = create_rdma_resources(cfg, rdma_permissions, &resources);
841  if (result != DOCA_SUCCESS) {
842  DOCA_LOG_ERR("Failed to allocate RDMA resources: %s", doca_error_get_descr(result));
843  return result;
844  }
845 
846  /* Get GPU RDMA handle */
848  if (result != DOCA_SUCCESS) {
849  DOCA_LOG_ERR("Failed to get RDMA GPU handler: %s", doca_error_get_descr(result));
850  goto destroy_resources;
851  }
852 
853  /* Setup OOB connection */
854  ret = oob_connection_client_setup(cfg->server_ip_addr, &oob_sock_fd);
855  if (ret < 0) {
856  DOCA_LOG_ERR("Failed to setup OOB connection with remote peer");
858  goto destroy_resources;
859  }
860 
861  if (!cfg->use_rdma_cm) {
862  /* Export connection details */
866  &connection);
867  if (result != DOCA_SUCCESS) {
868  DOCA_LOG_ERR("Failed to export RDMA with connection details");
869  goto close_connection;
870  }
871 
872  /* Application does out-of-band passing of rdma address to remote side and receiving remote address */
873  DOCA_LOG_INFO("Receive remote connection details");
874  if (recv(oob_sock_fd, &remote_conn_details_len, sizeof(size_t), 0) < 0) {
875  DOCA_LOG_ERR("Failed to receive remote connection details");
877  goto close_connection;
878  }
879 
880  if (remote_conn_details_len <= 0 || remote_conn_details_len >= (size_t)-1) {
881  DOCA_LOG_ERR("Received wrong remote connection details");
883  goto close_connection;
884  }
885 
886  remote_conn_details = calloc(1, remote_conn_details_len);
887  if (remote_conn_details == NULL) {
888  DOCA_LOG_ERR("Failed to allocate memory for remote connection details");
890  goto close_connection;
891  }
892 
893  if (recv(oob_sock_fd, remote_conn_details, remote_conn_details_len, 0) < 0) {
894  DOCA_LOG_ERR("Failed to receive remote connection details");
896  goto close_connection;
897  }
898 
899  DOCA_LOG_INFO("Send connection details to remote peer size %zd str %s",
901  (char *)resources.connection_details);
902  if (send(oob_sock_fd, &resources.conn_det_len, sizeof(size_t), 0) < 0) {
903  DOCA_LOG_ERR("Failed to send connection details");
905  goto close_connection;
906  }
907 
909  DOCA_LOG_ERR("Failed to send connection details");
911  goto close_connection;
912  }
913 
914  /* Connect local rdma to the remote rdma */
915  DOCA_LOG_INFO("Connect DOCA RDMA to remote RDMA");
916  result = doca_rdma_connect(resources.rdma, remote_conn_details, remote_conn_details_len, connection);
917  if (result != DOCA_SUCCESS) {
918  DOCA_LOG_ERR("Function doca_rdma_connect failed: %s", doca_error_get_descr(result));
919  goto close_connection;
920  }
921 
922  free(remote_conn_details);
923  remote_conn_details = NULL;
924  } else { /* Case of RDMA CM */
925  result = doca_rdma_addr_create(cfg->cm_addr_type, cfg->cm_addr, cfg->cm_port, &resources.cm_addr);
926  if (result != DOCA_SUCCESS) {
927  DOCA_LOG_ERR("Failed to create rdma cm connection address %s", doca_error_get_descr(result));
928  goto close_connection;
929  }
930 
931  connection_data.ptr = (void *)&resources;
933  if (result != DOCA_SUCCESS) {
934  DOCA_LOG_ERR("Client failed to call doca_rdma_connect_to_addr %s",
936  goto close_connection;
937  }
938 
939  DOCA_LOG_INFO("Client is waiting for a connection establishment");
940  /* Wait for a new connection */
942  if (doca_pe_progress(resources.pe) == 0)
943  nanosleep(&ts, &ts);
944  }
945 
947  DOCA_LOG_ERR("Failed to connect to remote peer, connection error");
949  goto close_connection;
950  }
951 
952  DOCA_LOG_INFO("Client - Connection 1 is established");
953  }
954 
956  if (result != DOCA_SUCCESS) {
957  DOCA_LOG_ERR("Function create_memory_local_remote_client failed: %s", doca_error_get_descr(result));
958  goto close_connection;
959  }
960 
961  result = doca_gpu_mem_alloc(resources.gpudev,
962  sizeof(uint32_t),
963  4096,
965  (void **)&gpu_exit_flag,
966  (void **)&cpu_exit_flag);
967  if (result != DOCA_SUCCESS || gpu_exit_flag == NULL || cpu_exit_flag == NULL) {
968  DOCA_LOG_ERR("Function doca_gpu_mem_alloc returned %s", doca_error_get_descr(result));
969  goto close_connection;
970  }
971  cpu_exit_flag[0] = 0;
972 
973  cuda_ret = cudaStreamCreateWithFlags(&cstream, cudaStreamNonBlocking);
974  if (cuda_ret != cudaSuccess) {
975  DOCA_LOG_ERR("Function cudaStreamCreateWithFlags error %d", cuda_ret);
977  goto close_connection;
978  }
979 
980  /* First client kernel on default CUDA stream */
987  0,
988  gpu_exit_flag);
989  if (result != DOCA_SUCCESS) {
990  DOCA_LOG_ERR("Function kernel_write_client failed: %s", doca_error_get_descr(result));
991  goto close_connection;
992  }
993 
994  if (cfg->use_rdma_cm) {
995  DOCA_LOG_INFO("Establishing connection 2..");
996 
997  /* Establish a new connection while the CUDA kernel working on first connection is still running */
999  if (result != DOCA_SUCCESS) {
1000  DOCA_LOG_ERR("Client failed to call doca_rdma_connect_to_addr %s",
1002  goto close_connection;
1003  }
1004 
1005  DOCA_LOG_INFO("Client is waiting for a connection establishment");
1006  /* Wait for a new connection */
1008  if (doca_pe_progress(resources.pe) == 0)
1009  nanosleep(&ts, &ts);
1010  }
1011 
1013  DOCA_LOG_ERR("Failed to connect to remote peer, connection error");
1015  goto close_connection;
1016  }
1017 
1018  DOCA_LOG_INFO("Client - Connection 2 is established");
1019 
1021  if (result != DOCA_SUCCESS) {
1022  DOCA_LOG_ERR("Function create_memory_local_remote_client failed: %s",
1024  goto close_connection;
1025  }
1026 
1027  DOCA_LOG_INFO("Client - Connection 2 memory info exchanged");
1028 
1029  /* Second client kernel on non-default CUDA stream */
1036  1,
1037  gpu_exit_flag);
1038  if (result != DOCA_SUCCESS) {
1039  DOCA_LOG_ERR("Function kernel_write_client failed: %s", doca_error_get_descr(result));
1040  goto close_connection;
1041  }
1042  }
1043 
1044  DOCA_LOG_INFO("Client, terminate kernels");
1045  DOCA_GPUNETIO_VOLATILE(*cpu_exit_flag) = 1;
1046  cudaStreamSynchronize(0);
1047 
1048  if (cfg->use_rdma_cm) {
1049  cudaStreamSynchronize(cstream);
1050  cudaStreamDestroy(cstream);
1051  }
1052 
1054 
1056 
1058  if (result != DOCA_SUCCESS) {
1059  DOCA_LOG_ERR("Failed to destroy DOCA RDMA resources: %s", doca_error_get_descr(result));
1060  return result;
1061  }
1062 
1063  return DOCA_SUCCESS;
1064 
1065 close_connection:
1067 
1068 destroy_resources:
1069 
1071 
1072  temp_result = destroy_rdma_resources(&resources);
1073  if (temp_result != DOCA_SUCCESS)
1074  DOCA_LOG_ERR("Failed to destroy DOCA RDMA resources: %s", doca_error_get_descr(temp_result));
1075 
1076  if (remote_conn_details)
1077  free(remote_conn_details);
1078 
1079  return result;
1080 }
#define NULL
Definition: __stddef_null.h:26
__SIZE_TYPE__ size_t
int32_t result
doca_dpa_dev_mmap_t mmap
doca_error_t create_buf_arr_on_gpu(struct buf_arr_obj *buf_arr_obj)
Definition: rdma_common.c:618
doca_error_t destroy_rdma_resources(struct rdma_resources *resources)
Definition: rdma_common.c:470
int oob_connection_client_setup(const char *server_ip, int *oob_sock_fd)
Definition: rdma_common.c:238
void oob_connection_server_close(int oob_sock_fd, int oob_client_sock)
Definition: rdma_common.c:222
int oob_connection_server_setup(int *oob_sock_fd, int *oob_client_sock)
Definition: rdma_common.c:153
doca_error_t create_rdma_resources(struct rdma_config *cfg, const uint32_t rdma_permissions, struct rdma_resources *resources)
Definition: rdma_common.c:299
void oob_connection_client_close(int oob_sock_fd)
Definition: rdma_common.c:274
doca_error_t kernel_write_server(cudaStream_t stream, struct doca_gpu_dev_rdma *rdma_gpu, struct doca_gpu_buf_arr *server_local_buf_arr_A, struct doca_gpu_buf_arr *server_remote_buf_arr_F, uint32_t connection_index)
#define GPU_BUF_SIZE_A
Definition: rdma_common.h:50
doca_error_t kernel_write_client(cudaStream_t stream, struct doca_gpu_dev_rdma *rdma_gpu, struct doca_gpu_buf_arr *client_local_buf_arr_B, struct doca_gpu_buf_arr *client_local_buf_arr_C, struct doca_gpu_buf_arr *client_local_buf_arr_F, struct doca_gpu_buf_arr *client_remote_buf_arr_A, uint32_t connection_index, uint32_t *exit_flag)
#define GPU_BUF_SIZE_B
Definition: rdma_common.h:51
#define GPU_BUF_NUM
Definition: rdma_common.h:54
#define GPU_BUF_SIZE_C
Definition: rdma_common.h:52
#define GPU_BUF_SIZE_F
Definition: rdma_common.h:53
static doca_error_t create_mmap(struct doca_dev *doca_device, unsigned int mmap_permissions, void *memrange_addr, size_t memrange_len, struct doca_mmap **mmap, doca_dpa_dev_mmap_t *dpa_mmap_handle)
struct buf_arr_obj client_local_buf_arr_B[NUM_CONN]
static doca_error_t create_memory_local_remote_client(int oob_sock_fd, struct rdma_resources *resources, int conn_idx, cudaStream_t stream)
uint8_t * client_local_buf_F[NUM_CONN]
uint8_t * client_local_buf_C_cpu[NUM_CONN]
DOCA_LOG_REGISTER(GPURDMA::SAMPLE)
static void destroy_memory_local_remote_client(struct rdma_resources *resources)
struct rdma_mmap_obj client_local_mmap_obj_C[NUM_CONN]
uint8_t * server_local_buf_A_gpu[NUM_CONN]
doca_error_t rdma_write_client(struct rdma_config *cfg)
struct buf_arr_obj server_local_buf_arr_A[NUM_CONN]
struct doca_mmap * client_remote_mmap_A[NUM_CONN]
struct rdma_mmap_obj client_local_mmap_obj_B[NUM_CONN]
struct buf_arr_obj client_remote_buf_arr_A[NUM_CONN]
uint8_t * client_local_buf_B_cpu[NUM_CONN]
struct buf_arr_obj server_remote_buf_arr_F[NUM_CONN]
struct doca_mmap * server_remote_mmap_F[NUM_CONN]
static void destroy_memory_local_remote_server(struct rdma_resources *resources)
static doca_error_t create_memory_local_remote_server(int oob_sock_fd, struct rdma_resources *resources, int conn_idx, cudaStream_t stream)
uint8_t * client_local_buf_B_gpu[NUM_CONN]
doca_error_t rdma_write_server(struct rdma_config *cfg)
struct rdma_resources resources
uint8_t * server_local_buf_A_cpu[NUM_CONN]
struct buf_arr_obj client_local_buf_arr_C[NUM_CONN]
struct buf_arr_obj client_local_buf_arr_F[NUM_CONN]
const uint32_t access_params
struct rdma_mmap_obj client_local_mmap_obj_F[NUM_CONN]
struct rdma_mmap_obj server_local_mmap_obj_A[NUM_CONN]
uint8_t * client_local_buf_C_gpu[NUM_CONN]
DOCA_EXPERIMENTAL doca_error_t doca_buf_arr_destroy(struct doca_buf_arr *buf_arr)
Destroys a doca buf array instance.
enum doca_error doca_error_t
DOCA API return codes.
DOCA_STABLE const char * doca_error_get_descr(doca_error_t error)
Returns the description string of an error code.
@ DOCA_ERROR_CONNECTION_ABORTED
Definition: doca_error.h:50
@ DOCA_SUCCESS
Definition: doca_error.h:38
@ DOCA_ERROR_NO_MEMORY
Definition: doca_error.h:45
@ DOCA_ERROR_DRIVER
Definition: doca_error.h:59
#define DOCA_LOG_ERR(format,...)
Generates an ERROR application log message.
Definition: doca_log.h:466
#define DOCA_LOG_INFO(format,...)
Generates an INFO application log message.
Definition: doca_log.h:486
DOCA_STABLE doca_error_t doca_mmap_destroy(struct doca_mmap *mmap)
Destroy DOCA Memory Map structure.
DOCA_STABLE doca_error_t doca_mmap_create_from_export(const union doca_data *user_data, const void *export_desc, size_t export_desc_len, struct doca_dev *dev, struct doca_mmap **mmap)
Creates a memory map object representing memory ranges in remote system memory space.
DOCA_STABLE uint8_t doca_pe_progress(struct doca_pe *pe)
Run the progress engine.
DOCA_EXPERIMENTAL doca_error_t doca_rdma_get_gpu_handle(struct doca_rdma *rdma, struct doca_gpu_dev_rdma **gpu_rdma)
Retrieve the handle in the gpu memory space of a doca_rdma.
DOCA_EXPERIMENTAL doca_error_t doca_rdma_connect_to_addr(struct doca_rdma *rdma, struct doca_rdma_addr *addr, union doca_data connection_user_data)
Connect to a remote doca_rdma peer listening for a connection. Can be called when the ctx is in DOCA_...
DOCA_EXPERIMENTAL doca_error_t doca_rdma_addr_create(enum doca_rdma_addr_type addr_type, const char *address, uint16_t port, struct doca_rdma_addr **addr)
Set connection address object for doca_rdma. The object can be queried using doca_rdma_connection_get...
DOCA_EXPERIMENTAL doca_error_t doca_rdma_export(struct doca_rdma *rdma, const void **local_rdma_conn_details, size_t *local_rdma_conn_details_size, struct doca_rdma_connection **rdma_connection)
Export doca_rdma connection details object The doca_rdma_conn_details are used in doca_rdma_connect()...
DOCA_EXPERIMENTAL doca_error_t doca_rdma_start_listen_to_port(struct doca_rdma *rdma, uint16_t port)
Start listening for a connection from a remote doca_rdma peer. Can be called when the ctx is in DOCA_...
DOCA_EXPERIMENTAL doca_error_t doca_rdma_connect(struct doca_rdma *rdma, const void *remote_rdma_conn_details, size_t remote_rdma_conn_details_size, struct doca_rdma_connection *rdma_connection)
Connect to remote doca_rdma peer. Can only be called when the ctx is in DOCA_CTX_STATE_STARTING state...
@ DOCA_GPU_MEM_TYPE_GPU_CPU
Definition: doca_types.h:133
@ DOCA_GPU_MEM_TYPE_GPU
Definition: doca_types.h:131
@ DOCA_ACCESS_FLAG_LOCAL_READ_WRITE
Definition: doca_types.h:83
@ DOCA_ACCESS_FLAG_RDMA_WRITE
Definition: doca_types.h:85
const struct ip_frag_config * cfg
Definition: ip_frag_dp.c:0
uint32_t num_elem
Definition: rdma_common.h:117
struct doca_gpu_buf_arr * gpu_buf_arr
Definition: rdma_common.h:120
size_t elem_size
Definition: rdma_common.h:118
struct doca_gpu * gpudev
Definition: rdma_common.h:115
struct doca_buf_arr * buf_arr
Definition: rdma_common.h:119
struct doca_mmap * mmap
Definition: rdma_common.h:116
struct doca_dev * doca_device
Definition: rdma_common.h:104
size_t memrange_len
Definition: rdma_common.h:107
void * memrange_addr
Definition: rdma_common.h:106
struct doca_mmap * mmap
Definition: rdma_common.h:108
uint32_t permissions
Definition: rdma_common.h:105
bool server_listen_active
Definition: rdma_common.h:95
const void * connection_details
Definition: rdma_common.h:87
bool connection2_established
Definition: rdma_common.h:98
struct doca_rdma_addr * cm_addr
Definition: rdma_common.h:91
struct doca_rdma * rdma
Definition: rdma_common.h:83
struct doca_pe * pe
Definition: rdma_common.h:86
struct doca_gpu_dev_rdma * gpu_rdma
Definition: rdma_common.h:84
size_t conn_det_len
Definition: rdma_common.h:88
struct doca_dev * doca_device
Definition: rdma_common.h:81
bool connection2_error
Definition: rdma_common.h:99
bool connection_error
Definition: rdma_common.h:94
bool connection_established
Definition: rdma_common.h:93
struct doca_gpu * gpudev
Definition: rdma_common.h:82
Convenience type for representing opaque data.
Definition: doca_types.h:56
void * ptr
Definition: doca_types.h:57