Skip to main content

core\stdarch\crates\core_arch\src\nvptx/
mod.rs

1//! NVPTX intrinsics (experimental)
2//!
3//! These intrinsics form the foundation of the CUDA
4//! programming model.
5//!
6//! The reference is the [CUDA C Programming Guide][cuda_c]. Relevant is also
7//! the [LLVM NVPTX Backend documentation][llvm_docs].
8//!
9//! [cuda_c]:
10//! http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html
11//! [llvm_docs]:
12//! https://llvm.org/docs/NVPTXUsage.html
13
14use crate::ffi::c_void;
15
16mod packed;
17
18#[unstable(feature = "stdarch_nvptx", issue = "111199")]
19pub use packed::*;
20
21#[allow(improper_ctypes)]
22unsafe extern "C" {
23    #[link_name = "llvm.nvvm.barrier0"]
24    fn syncthreads() -> ();
25    #[link_name = "llvm.nvvm.read.ptx.sreg.ntid.x"]
26    fn block_dim_x() -> u32;
27    #[link_name = "llvm.nvvm.read.ptx.sreg.ntid.y"]
28    fn block_dim_y() -> u32;
29    #[link_name = "llvm.nvvm.read.ptx.sreg.ntid.z"]
30    fn block_dim_z() -> u32;
31    #[link_name = "llvm.nvvm.read.ptx.sreg.ctaid.x"]
32    fn block_idx_x() -> u32;
33    #[link_name = "llvm.nvvm.read.ptx.sreg.ctaid.y"]
34    fn block_idx_y() -> u32;
35    #[link_name = "llvm.nvvm.read.ptx.sreg.ctaid.z"]
36    fn block_idx_z() -> u32;
37    #[link_name = "llvm.nvvm.read.ptx.sreg.nctaid.x"]
38    fn grid_dim_x() -> u32;
39    #[link_name = "llvm.nvvm.read.ptx.sreg.nctaid.y"]
40    fn grid_dim_y() -> u32;
41    #[link_name = "llvm.nvvm.read.ptx.sreg.nctaid.z"]
42    fn grid_dim_z() -> u32;
43    #[link_name = "llvm.nvvm.read.ptx.sreg.tid.x"]
44    fn thread_idx_x() -> u32;
45    #[link_name = "llvm.nvvm.read.ptx.sreg.tid.y"]
46    fn thread_idx_y() -> u32;
47    #[link_name = "llvm.nvvm.read.ptx.sreg.tid.z"]
48    fn thread_idx_z() -> u32;
49}
50
51/// Synchronizes all threads in the block.
52///
53#[doc = include_str!("../amdgpu/intrinsic_is_convergent.md")]
54#[inline]
55#[unstable(feature = "stdarch_nvptx", issue = "111199")]
56pub unsafe fn _syncthreads() -> () {
57    syncthreads()
58}
59
60/// x-th thread-block dimension.
61#[inline]
62#[unstable(feature = "stdarch_nvptx", issue = "111199")]
63pub unsafe fn _block_dim_x() -> u32 {
64    block_dim_x()
65}
66
67/// y-th thread-block dimension.
68#[inline]
69#[unstable(feature = "stdarch_nvptx", issue = "111199")]
70pub unsafe fn _block_dim_y() -> u32 {
71    block_dim_y()
72}
73
74/// z-th thread-block dimension.
75#[inline]
76#[unstable(feature = "stdarch_nvptx", issue = "111199")]
77pub unsafe fn _block_dim_z() -> u32 {
78    block_dim_z()
79}
80
81/// x-th thread-block index.
82#[inline]
83#[unstable(feature = "stdarch_nvptx", issue = "111199")]
84pub unsafe fn _block_idx_x() -> u32 {
85    block_idx_x()
86}
87
88/// y-th thread-block index.
89#[inline]
90#[unstable(feature = "stdarch_nvptx", issue = "111199")]
91pub unsafe fn _block_idx_y() -> u32 {
92    block_idx_y()
93}
94
95/// z-th thread-block index.
96#[inline]
97#[unstable(feature = "stdarch_nvptx", issue = "111199")]
98pub unsafe fn _block_idx_z() -> u32 {
99    block_idx_z()
100}
101
102/// x-th block-grid dimension.
103#[inline]
104#[unstable(feature = "stdarch_nvptx", issue = "111199")]
105pub unsafe fn _grid_dim_x() -> u32 {
106    grid_dim_x()
107}
108
109/// y-th block-grid dimension.
110#[inline]
111#[unstable(feature = "stdarch_nvptx", issue = "111199")]
112pub unsafe fn _grid_dim_y() -> u32 {
113    grid_dim_y()
114}
115
116/// z-th block-grid dimension.
117#[inline]
118#[unstable(feature = "stdarch_nvptx", issue = "111199")]
119pub unsafe fn _grid_dim_z() -> u32 {
120    grid_dim_z()
121}
122
123/// x-th thread index.
124#[inline]
125#[unstable(feature = "stdarch_nvptx", issue = "111199")]
126pub unsafe fn _thread_idx_x() -> u32 {
127    thread_idx_x()
128}
129
130/// y-th thread index.
131#[inline]
132#[unstable(feature = "stdarch_nvptx", issue = "111199")]
133pub unsafe fn _thread_idx_y() -> u32 {
134    thread_idx_y()
135}
136
137/// z-th thread index.
138#[inline]
139#[unstable(feature = "stdarch_nvptx", issue = "111199")]
140pub unsafe fn _thread_idx_z() -> u32 {
141    thread_idx_z()
142}
143
144/// Generates the trap instruction `TRAP`
145#[inline]
146#[unstable(feature = "stdarch_nvptx", issue = "111199")]
147pub unsafe fn trap() -> ! {
148    crate::intrinsics::abort()
149}
150
151// Basic CUDA syscall declarations.
152unsafe extern "C" {
153    /// Print formatted output from a kernel to a host-side output stream.
154    ///
155    /// Syscall arguments:
156    /// * `status`: The status value that is returned by `vprintf`.
157    /// * `format`: A pointer to the format specifier input (uses common `printf` format).
158    /// * `valist`: A pointer to the valist input.
159    ///
160    /// ```
161    /// #[repr(C)]
162    /// struct PrintArgs(f32, f32, f32, i32);
163    ///
164    /// vprintf(
165    ///     "int(%f + %f) = int(%f) = %d\n".as_ptr(),
166    ///     transmute(&PrintArgs(a, b, a + b, (a + b) as i32)),
167    /// );
168    /// ```
169    ///
170    /// Sources:
171    /// [Programming Guide](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#formatted-output),
172    /// [PTX Interoperability](https://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability/index.html#system-calls).
173    #[unstable(feature = "stdarch_nvptx", issue = "111199")]
174    pub fn vprintf(format: *const u8, valist: *const c_void) -> i32;
175
176    /// Allocate memory dynamically from a fixed-size heap in global memory.
177    ///
178    /// The CUDA in-kernel `malloc()` function allocates at least `size` bytes
179    /// from the device heap and returns a pointer to the allocated memory
180    /// or `NULL` if insufficient memory exists to fulfill the request.
181    ///
182    /// The returned pointer is guaranteed to be aligned to a 16-byte boundary.
183    ///
184    /// The memory allocated by a given CUDA thread via `malloc()` remains allocated
185    /// for the lifetime of the CUDA context, or until it is explicitly released
186    /// by a call to `free()`. It can be used by any other CUDA threads
187    /// even from subsequent kernel launches.
188    ///
189    /// Sources:
190    /// [Programming Guide](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#dynamic-global-memory-allocation-and-operations),
191    /// [PTX Interoperability](https://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability/index.html#system-calls).
192    // FIXME(denzp): assign `malloc` and `nothrow` attributes.
193    #[unstable(feature = "stdarch_nvptx", issue = "111199")]
194    pub fn malloc(size: usize) -> *mut c_void;
195
196    /// Free previously dynamically allocated memory.
197    ///
198    /// The CUDA in-kernel `free()` function deallocates the memory pointed to by `ptr`,
199    /// which must have been returned by a previous call to `malloc()`. If `ptr` is NULL,
200    /// the call to `free()` is ignored.
201    ///
202    /// Any CUDA thread may free memory allocated by another thread, but care should be taken
203    /// to ensure that the same pointer is not freed more than once. Repeated calls to `free()`
204    /// with the same `ptr` has undefined behavior.
205    ///
206    /// Sources:
207    /// [Programming Guide](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#dynamic-global-memory-allocation-and-operations),
208    /// [PTX Interoperability](https://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability/index.html#system-calls).
209    // FIXME(denzp): assign `nothrow` attribute.
210    #[unstable(feature = "stdarch_nvptx", issue = "111199")]
211    pub fn free(ptr: *mut c_void);
212
213    // Internal declaration of the syscall. Exported variant has
214    // the `char_size` parameter set to `1` (single char size in bytes).
215    fn __assertfail(
216        message: *const u8,
217        file: *const u8,
218        line: u32,
219        function: *const u8,
220        char_size: usize,
221    );
222}
223
224/// Syscall to be used whenever the *assert expression produces a `false` value*.
225///
226/// Syscall arguments:
227/// * `message`: The pointer to the string that should be output.
228/// * `file`: The pointer to the file name string associated with the assert.
229/// * `line`: The line number associated with the assert.
230/// * `function`: The pointer to the function name string associated with the assert.
231///
232/// Source:
233/// [PTX Interoperability](https://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability/index.html#system-calls).
234#[inline]
235#[unstable(feature = "stdarch_nvptx", issue = "111199")]
236pub unsafe fn __assert_fail(message: *const u8, file: *const u8, line: u32, function: *const u8) {
237    __assertfail(message, file, line, function, 1)
238}