NVIDIA DOCA SDK Data Center on a Chip Framework Documentation
dpaintrin.h File Reference
#include <stdint.h>
Include dependency graph for dpaintrin.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Macros

#define DPA_INTRIN_VERSION_USED   (DPA_INTRIN_VERSION(1, 3))
 
#define __DPA_HEAP   __MSPACE_HEAP
 DPA 'Heap' memory space. More...
 
#define __DPA_MEMORY   __MSPACE_MEMORY
 DPA 'Memory' memory space. More...
 
#define __DPA_MMIO   __MSPACE_MMIO
 DPA 'MMIO' memory space. More...
 
#define __DPA_SYSTEM   __MSPACE_SYSTEM
 DPA 'System' memory space. More...
 
#define __DPA_R   __MOP_R
 Read memory operation. More...
 
#define __DPA_W   __MOP_W
 Write memory operation. More...
 
#define __DPA_RW   __MOP_RW
 Read and Write memory operation. More...
 
#define __dpa_thread_fence(MEMORY_SPACE, PRED_OP, SUCC_OP)    __dpa_thread_fence_internal_1_3(MEMORY_SPACE, PRED_OP, SUCC_OP);
 
#define __dpa_thread_memory_fence(OP1, OP2)    __dpa_thread_fence(__DPA_MEMORY, OP1, OP2)
 Equivalent to calling __dpa_thread_fence(__DPA_MEMORY, OP1, OP2) More...
 
#define __dpa_thread_outbox_fence(OP1, OP2)    __dpa_thread_fence(__DPA_MMIO, OP1, OP2)
 Equivalent to calling __dpa_thread_fence(__DPA_MMIO, OP1, OP2) More...
 
#define __dpa_thread_window_fence(OP1, OP2)    __dpa_thread_fence(__DPA_MMIO, OP1, OP2)
 Equivalent to calling __dpa_thread_fence(__DPA_MMIO, OP1, OP2) More...
 
#define __dpa_thread_system_fence()    __dpa_thread_fence(__DPA_SYSTEM, __DPA_RW, __DPA_RW)
 Equivalent to calling __dpa_thread_fence(__DPA_SYSTEM, __DPA_RW, __DPA_RW) More...
 
#define __dpa_thread_window_read_inv()    __dpa_thread_fence(__DPA_MMIO, __DPA_R, __DPA_R)
 
#define __dpa_thread_window_writeback()    __dpa_thread_fence(__DPA_MMIO, __DPA_W, __DPA_W)
 
#define __dpa_thread_l1_flush()    __dpa_thread_l1_flush_internal_1_3()
 Flush L1 Cache. More...
 
#define __dpa_thread_memory_writeback()    __dpa_thread_fence(__DPA_MEMORY, __DPA_W, __DPA_W)
 
#define __dpa_thread_cycles()   __dpa_thread_cycles_internal_1_3()
 
#define __dpa_thread_inst_ret()   __dpa_thread_inst_ret_internal_1_3()
 
#define __dpa_thread_time()   __dpa_thread_time_internal_1_3()
 
#define __dpa_remote_atomic_load(PTR, MEMORDER)    __dpa_remote_atomic_load_internal_1_3(PTR, MEMORDER)
 
#define __dpa_remote_atomic_exchange(PTR, VAL, MEMORDER)    __dpa_remote_atomic_exchange_internal_1_3(PTR, VAL, MEMORDER)
 
#define __dpa_remote_atomic_add_fetch(PTR, VAL, MEMORDER)    __dpa_remote_atomic_add_fetch_internal_1_3(PTR, VAL, MEMORDER)
 
#define __dpa_remote_atomic_sub_fetch(PTR, VAL, MEMORDER)    __dpa_remote_atomic_sub_fetch_internal_1_3(PTR, VAL, MEMORDER)
 
#define __dpa_remote_atomic_fetch_add(PTR, VAL, MEMORDER)    __dpa_remote_atomic_fetch_add_internal_1_3(PTR, VAL, MEMORDER)
 
#define __dpa_remote_atomic_fetch_sub(PTR, VAL, MEMORDER)    __dpa_remote_atomic_fetch_sub_internal_1_3(PTR, VAL, MEMORDER)
 
#define __extract_fields(dst, src, ...)
 

Functions

static __attribute__ ((always_inline)) void __extract_fields_internal(uint64_t *restrict dst
 ARG varg integer array of triplets (dst offset, src offset, length in bytes) More...
 
 if (bitoffset % 64+bitlength > 64) result|
 

Variables

static int bitoffset
 
static int int bitlength
 
return result
 
static uint64_t *restrict src
 
static uint64_t *restrict int extract_count
 

Macro Definition Documentation

◆ __DPA_HEAP

#define __DPA_HEAP   __MSPACE_HEAP

DPA 'Heap' memory space.

Definition at line 30 of file dpaintrin.h.

◆ __DPA_MEMORY

#define __DPA_MEMORY   __MSPACE_MEMORY

DPA 'Memory' memory space.

Definition at line 32 of file dpaintrin.h.

◆ __DPA_MMIO

#define __DPA_MMIO   __MSPACE_MMIO

DPA 'MMIO' memory space.

Definition at line 34 of file dpaintrin.h.

◆ __DPA_R

#define __DPA_R   __MOP_R

Read memory operation.

Definition at line 39 of file dpaintrin.h.

◆ __dpa_remote_atomic_add_fetch

#define __dpa_remote_atomic_add_fetch (   PTR,
  VAL,
  MEMORDER 
)     __dpa_remote_atomic_add_fetch_internal_1_3(PTR, VAL, MEMORDER)

Atomically perform add/sub operation on the value at PTR with VAL, store the result back into PTR and return the result.

Parameters
PTRPointer to load operand from and store result to.
VALValue to add/sub to *PTR.
MEMORDERMust be on of __ATOMIC_RELAXED, __ATOMIC_CONSUME, __ATOMIC_ACQUIRE, __ATOMIC_RELEASE, __ATOMIC_ACQ_REL and __ATOMIC_SEQ_CST.

Definition at line 164 of file dpaintrin.h.

◆ __dpa_remote_atomic_exchange

#define __dpa_remote_atomic_exchange (   PTR,
  VAL,
  MEMORDER 
)     __dpa_remote_atomic_exchange_internal_1_3(PTR, VAL, MEMORDER)

Atomically writes VAL into *PTR and returns the old value in *PTR.

Parameters
PTRPointer to memory location to write and load from.
VALValue to write to the location PTR.
MEMORDERMust be __ATOMIC_RELAXED, __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE, __ATOMIC_CONSUME, __ATOMIC_RELEASE, and __ATOMIC_ACQ_REL

Definition at line 154 of file dpaintrin.h.

◆ __dpa_remote_atomic_fetch_add

#define __dpa_remote_atomic_fetch_add (   PTR,
  VAL,
  MEMORDER 
)     __dpa_remote_atomic_fetch_add_internal_1_3(PTR, VAL, MEMORDER)

Atomically perform add/sub operation on the value at PTR with VAL, store the result back into PTR and return the original value at PTR.

Parameters
PTRPointer to load operand from and store result to.
VALValue to add/sub to *PTR.
MEMORDERMust be on of __ATOMIC_RELAXED, __ATOMIC_CONSUME, __ATOMIC_ACQUIRE, __ATOMIC_RELEASE, __ATOMIC_ACQ_REL and __ATOMIC_SEQ_CST.

Definition at line 176 of file dpaintrin.h.

◆ __dpa_remote_atomic_fetch_sub

#define __dpa_remote_atomic_fetch_sub (   PTR,
  VAL,
  MEMORDER 
)     __dpa_remote_atomic_fetch_sub_internal_1_3(PTR, VAL, MEMORDER)

Definition at line 178 of file dpaintrin.h.

◆ __dpa_remote_atomic_load

#define __dpa_remote_atomic_load (   PTR,
  MEMORDER 
)     __dpa_remote_atomic_load_internal_1_3(PTR, MEMORDER)

Atomically loads the value at PTR and returns it.

Parameters
PTRPointer to memory location to load from.
MEMORDERMust be __ATOMIC_RELAXED, __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE, __ATOMIC_CONSUME.

Definition at line 145 of file dpaintrin.h.

◆ __dpa_remote_atomic_sub_fetch

#define __dpa_remote_atomic_sub_fetch (   PTR,
  VAL,
  MEMORDER 
)     __dpa_remote_atomic_sub_fetch_internal_1_3(PTR, VAL, MEMORDER)

Definition at line 166 of file dpaintrin.h.

◆ __DPA_RW

#define __DPA_RW   __MOP_RW

Read and Write memory operation.

Definition at line 43 of file dpaintrin.h.

◆ __DPA_SYSTEM

#define __DPA_SYSTEM   __MSPACE_SYSTEM

DPA 'System' memory space.

Definition at line 36 of file dpaintrin.h.

◆ __dpa_thread_cycles

#define __dpa_thread_cycles ( )    __dpa_thread_cycles_internal_1_3()

Returns a counter containing the number of cycles from an arbitrary start point in the past on the execution unit the thread is currently scheduled on. Note that the value returned by this function in the thread is meaningful only for the duration of when the thread remains associated with this execution unit.

Definition at line 127 of file dpaintrin.h.

◆ __dpa_thread_fence

#define __dpa_thread_fence (   MEMORY_SPACE,
  PRED_OP,
  SUCC_OP 
)     __dpa_thread_fence_internal_1_3(MEMORY_SPACE, PRED_OP, SUCC_OP);

Ensures that all operations (PRED_OP) performed by the calling thread, before the call to __dpa_thread_fence(), are performed and made visible to all threads in the DPA, host, NIC engines, and peer devices as occurring before all operations (SUCC_OP) to the memory space after the call to __dpa_thread_fence()

Parameters
MEMORY_SPACEThe DPA memory space to apply fence operation.
PRED_OPPredecessor operation.
SUCC_OPSuccessor operation. Example: __dpa_thread_fence(__DPA_SYSTEM, __DPA_RW, __DPA_RW);

Definition at line 54 of file dpaintrin.h.

◆ __dpa_thread_inst_ret

#define __dpa_thread_inst_ret ( )    __dpa_thread_inst_ret_internal_1_3()

Returns a counter containing the number of instructions retired from an arbitrary start point in the past by the execution unit the thread is currently scheduled on. Note that the value returned by this function in the software thread is meaningful only for the duration of when the thread remains associated with this execution unit.

Definition at line 133 of file dpaintrin.h.

◆ __dpa_thread_l1_flush

#define __dpa_thread_l1_flush ( )     __dpa_thread_l1_flush_internal_1_3()

Flush L1 Cache.

Definition at line 90 of file dpaintrin.h.

◆ __dpa_thread_memory_fence

#define __dpa_thread_memory_fence (   OP1,
  OP2 
)     __dpa_thread_fence(__DPA_MEMORY, OP1, OP2)

Equivalent to calling __dpa_thread_fence(__DPA_MEMORY, OP1, OP2)

Definition at line 58 of file dpaintrin.h.

◆ __dpa_thread_memory_writeback

#define __dpa_thread_memory_writeback ( )     __dpa_thread_fence(__DPA_MEMORY, __DPA_W, __DPA_W)

Ensures that the contents in the Memory address space of the thread before the call to __dpa_thread_writeback_memory() are performed and made visible to all threads in the DPA, host, NIC engines, and peer devices as occurring before any write operations after the call to __dpa_thread_writeback_memory().

Definition at line 100 of file dpaintrin.h.

◆ __dpa_thread_outbox_fence

#define __dpa_thread_outbox_fence (   OP1,
  OP2 
)     __dpa_thread_fence(__DPA_MMIO, OP1, OP2)

Equivalent to calling __dpa_thread_fence(__DPA_MMIO, OP1, OP2)

Definition at line 62 of file dpaintrin.h.

◆ __dpa_thread_system_fence

#define __dpa_thread_system_fence ( )     __dpa_thread_fence(__DPA_SYSTEM, __DPA_RW, __DPA_RW)

Equivalent to calling __dpa_thread_fence(__DPA_SYSTEM, __DPA_RW, __DPA_RW)

Definition at line 70 of file dpaintrin.h.

◆ __dpa_thread_time

#define __dpa_thread_time ( )    __dpa_thread_time_internal_1_3()

Returns the number of timer ticks from an arbitrary start point in the past on the execution unit the thread is currently scheduled on. Note that the value returned by this function in the thread is meaningful only for the duration of when the thread remains associated with this execution unit.

Definition at line 138 of file dpaintrin.h.

◆ __dpa_thread_window_fence

#define __dpa_thread_window_fence (   OP1,
  OP2 
)     __dpa_thread_fence(__DPA_MMIO, OP1, OP2)

Equivalent to calling __dpa_thread_fence(__DPA_MMIO, OP1, OP2)

Definition at line 66 of file dpaintrin.h.

◆ __dpa_thread_window_read_inv

#define __dpa_thread_window_read_inv ( )     __dpa_thread_fence(__DPA_MMIO, __DPA_R, __DPA_R)

Ensures that contents in the window memory space of the thread before the call to __dpa_thread_window_read_inv() are invalidated before read operations made by the calling thread after the call to __dpa_thread_window_read_inv().

Definition at line 77 of file dpaintrin.h.

◆ __dpa_thread_window_writeback

#define __dpa_thread_window_writeback ( )     __dpa_thread_fence(__DPA_MMIO, __DPA_W, __DPA_W)

Ensures that contents in the window memory space of the thread before the call to __dpa_thread_window_writeback() are performed and made visible to all threads in the DPA, host, NIC engines, and peer devices as occurring before any write operations after the call to __dpa_thread_window_writeback().

Definition at line 85 of file dpaintrin.h.

◆ __DPA_W

#define __DPA_W   __MOP_W

Write memory operation.

Definition at line 41 of file dpaintrin.h.

◆ __extract_fields

#define __extract_fields (   dst,
  src,
  ... 
)
Value:
__extract_fields_internal(dst, src, \
sizeof((int []) {__VA_ARGS__}) / sizeof(int) / 3, \
(int []) {__VA_ARGS__})
static uint64_t *restrict src
Definition: dpaintrin.h:230

Extracts multiple fields from source buffer to destination buffer in minimal number of load/store operations. Typically used to extract fields from a packet header to a struct. src and dst must be aligned to 8 bytes. If length is negative, |length| number of bytes are extracted in reverse order and length has to be 2, 4, or 8. Minimal loads/stores are generated only if all lengths and offsets are compile-time constants.

Parameters
dstDestination buffer where extracted fields will be stored.
srcSource buffer from which fields will be extracted.
...Variable arguments specifying triplets of(dst offset, src offset, length). Each triplet defines a field extraction operation. Offsets and lengths are in bytes. If length is negative, the field is byte-reversed.

Definition at line 211 of file dpaintrin.h.

◆ DPA_INTRIN_VERSION_USED

#define DPA_INTRIN_VERSION_USED   (DPA_INTRIN_VERSION(1, 3))

Definition at line 25 of file dpaintrin.h.

Function Documentation

◆ __attribute__()

static __attribute__ ( (always_inline)  )
inlinestatic

ARG varg integer array of triplets (dst offset, src offset, length in bytes)

REQUIRES length <= 64 extract AT LEAST bitlength bits

◆ if()

if ( bitoffset % 64+  bitlength,
64   
)

Variable Documentation

◆ bitlength

int int bitlength
Initial value:
{
uint64_t result = arr[bitoffset/64] >> (bitoffset % 64)
static int bitoffset
Definition: dpaintrin.h:219
return result
Definition: dpaintrin.h:225

Definition at line 219 of file dpaintrin.h.

◆ bitoffset

int bitoffset

Definition at line 219 of file dpaintrin.h.

◆ extract_count

uint64_t* restrict int extract_count

Definition at line 231 of file dpaintrin.h.

◆ result

return result

Definition at line 225 of file dpaintrin.h.

◆ src

uint64_t* restrict src

Definition at line 230 of file dpaintrin.h.