Skip to main content

std\io\error/
repr_bitpacked.rs

1//! This is a densely packed error representation which is used on targets with
2//! 64-bit pointers.
3//!
4//! (Note that `bitpacked` vs `unpacked` here has no relationship to
5//! `#[repr(packed)]`, it just refers to attempting to use any available bits in
6//! a more clever manner than `rustc`'s default layout algorithm would).
7//!
8//! Conceptually, it stores the same data as the "unpacked" equivalent we use on
9//! other targets. Specifically, you can imagine it as an optimized version of
10//! the following enum (which is roughly equivalent to what's stored by
11//! `repr_unpacked::Repr`, e.g. `super::ErrorData<Box<Custom>>`):
12//!
13//! ```ignore (exposition-only)
14//! enum ErrorData {
15//!    Os(i32),
16//!    Simple(ErrorKind),
17//!    SimpleMessage(&'static SimpleMessage),
18//!    Custom(Box<Custom>),
19//! }
20//! ```
21//!
22//! However, it packs this data into a 64bit non-zero value.
23//!
24//! This optimization not only allows `io::Error` to occupy a single pointer,
25//! but improves `io::Result` as well, especially for situations like
26//! `io::Result<()>` (which is now 64 bits) or `io::Result<u64>` (which is now
27//! 128 bits), which are quite common.
28//!
29//! # Layout
30//! Tagged values are 64 bits, with the 2 least significant bits used for the
31//! tag. This means there are 4 "variants":
32//!
33//! - **Tag 0b00**: The first variant is equivalent to
34//!   `ErrorData::SimpleMessage`, and holds a `&'static SimpleMessage` directly.
35//!
36//!   `SimpleMessage` has an alignment >= 4 (which is requested with
37//!   `#[repr(align)]` and checked statically at the bottom of this file), which
38//!   means every `&'static SimpleMessage` should have the both tag bits as 0,
39//!   meaning its tagged and untagged representation are equivalent.
40//!
41//!   This means we can skip tagging it, which is necessary as this variant can
42//!   be constructed from a `const fn`, which probably cannot tag pointers (or
43//!   at least it would be difficult).
44//!
45//! - **Tag 0b01**: The other pointer variant holds the data for
46//!   `ErrorData::Custom` and the remaining 62 bits are used to store a
47//!   `Box<Custom>`. `Custom` also has alignment >= 4, so the bottom two bits
48//!   are free to use for the tag.
49//!
50//!   The only important thing to note is that `ptr::wrapping_add` and
51//!   `ptr::wrapping_sub` are used to tag the pointer, rather than bitwise
52//!   operations. This should preserve the pointer's provenance, which would
53//!   otherwise be lost.
54//!
55//! - **Tag 0b10**: Holds the data for `ErrorData::Os(i32)`. We store the `i32`
56//!   in the pointer's most significant 32 bits, and don't use the bits `2..32`
57//!   for anything. Using the top 32 bits is just to let us easily recover the
58//!   `i32` code with the correct sign.
59//!
60//! - **Tag 0b11**: Holds the data for `ErrorData::Simple(ErrorKind)`. This
61//!   stores the `ErrorKind` in the top 32 bits as well, although it doesn't
62//!   occupy nearly that many. Most of the bits are unused here, but it's not
63//!   like we need them for anything else yet.
64//!
65//! # Use of `NonNull<()>`
66//!
67//! Everything is stored in a `NonNull<()>`, which is odd, but actually serves a
68//! purpose.
69//!
70//! Conceptually you might think of this more like:
71//!
72//! ```ignore (exposition-only)
73//! union Repr {
74//!     // holds integer (Simple/Os) variants, and
75//!     // provides access to the tag bits.
76//!     bits: NonZero<u64>,
77//!     // Tag is 0, so this is stored untagged.
78//!     msg: &'static SimpleMessage,
79//!     // Tagged (offset) `Box<Custom>` pointer.
80//!     tagged_custom: NonNull<()>,
81//! }
82//! ```
83//!
84//! But there are a few problems with this:
85//!
86//! 1. Union access is equivalent to a transmute, so this representation would
87//!    require we transmute between integers and pointers in at least one
88//!    direction, which may be UB (and even if not, it is likely harder for a
89//!    compiler to reason about than explicit ptr->int operations).
90//!
91//! 2. Even if all fields of a union have a niche, the union itself doesn't,
92//!    although this may change in the future. This would make things like
93//!    `io::Result<()>` and `io::Result<usize>` larger, which defeats part of
94//!    the motivation of this bitpacking.
95//!
96//! Storing everything in a `NonZero<usize>` (or some other integer) would be a
97//! bit more traditional for pointer tagging, but it would lose provenance
98//! information, couldn't be constructed from a `const fn`, and would probably
99//! run into other issues as well.
100//!
101//! The `NonNull<()>` seems like the only alternative, even if it's fairly odd
102//! to use a pointer type to store something that may hold an integer, some of
103//! the time.
104
105use core::marker::PhantomData;
106use core::num::NonZeroUsize;
107use core::ptr::NonNull;
108
109use super::{Custom, ErrorData, ErrorKind, RawOsError, SimpleMessage};
110
111// The 2 least-significant bits are used as tag.
112const TAG_MASK: usize = 0b11;
113const TAG_SIMPLE_MESSAGE: usize = 0b00;
114const TAG_CUSTOM: usize = 0b01;
115const TAG_OS: usize = 0b10;
116const TAG_SIMPLE: usize = 0b11;
117
118/// The internal representation.
119///
120/// See the module docs for more, this is just a way to hack in a check that we
121/// indeed are not unwind-safe.
122///
123/// ```compile_fail,E0277
124/// fn is_unwind_safe<T: core::panic::UnwindSafe>() {}
125/// is_unwind_safe::<std::io::Error>();
126/// ```
127#[repr(transparent)]
128#[rustc_insignificant_dtor]
129pub(super) struct Repr(NonNull<()>, PhantomData<ErrorData<Box<Custom>>>);
130
131// All the types `Repr` stores internally are Send + Sync, and so is it.
132unsafe impl Send for Repr {}
133unsafe impl Sync for Repr {}
134
135impl Repr {
136    pub(super) fn new_custom(b: Box<Custom>) -> Self {
137        let p = Box::into_raw(b).cast::<u8>();
138        // Should only be possible if an allocator handed out a pointer with
139        // wrong alignment.
140        debug_assert_eq!(p.addr() & TAG_MASK, 0);
141        // Note: We know `TAG_CUSTOM <= size_of::<Custom>()` (static_assert at
142        // end of file), and both the start and end of the expression must be
143        // valid without address space wraparound due to `Box`'s semantics.
144        //
145        // This means it would be correct to implement this using `ptr::add`
146        // (rather than `ptr::wrapping_add`), but it's unclear this would give
147        // any benefit, so we just use `wrapping_add` instead.
148        let tagged = p.wrapping_add(TAG_CUSTOM).cast::<()>();
149        // Safety: `TAG_CUSTOM + p` is the same as `TAG_CUSTOM | p`,
150        // because `p`'s alignment means it isn't allowed to have any of the
151        // `TAG_BITS` set (you can verify that addition and bitwise-or are the
152        // same when the operands have no bits in common using a truth table).
153        //
154        // Then, `TAG_CUSTOM | p` is not zero, as that would require
155        // `TAG_CUSTOM` and `p` both be zero, and neither is (as `p` came from a
156        // box, and `TAG_CUSTOM` just... isn't zero -- it's `0b01`). Therefore,
157        // `TAG_CUSTOM + p` isn't zero and so `tagged` can't be, and the
158        // `new_unchecked` is safe.
159        let res = Self(unsafe { NonNull::new_unchecked(tagged) }, PhantomData);
160        // quickly smoke-check we encoded the right thing (This generally will
161        // only run in std's tests, unless the user uses -Zbuild-std)
162        debug_assert!(matches!(res.data(), ErrorData::Custom(_)), "repr(custom) encoding failed");
163        res
164    }
165
166    #[inline]
167    pub(super) fn new_os(code: RawOsError) -> Self {
168        let utagged = ((code as usize) << 32) | TAG_OS;
169        // Safety: `TAG_OS` is not zero, so the result of the `|` is not 0.
170        let res = Self(
171            NonNull::without_provenance(unsafe { NonZeroUsize::new_unchecked(utagged) }),
172            PhantomData,
173        );
174        // quickly smoke-check we encoded the right thing (This generally will
175        // only run in std's tests, unless the user uses -Zbuild-std)
176        debug_assert!(
177            matches!(res.data(), ErrorData::Os(c) if c == code),
178            "repr(os) encoding failed for {code}"
179        );
180        res
181    }
182
183    #[inline]
184    pub(super) fn new_simple(kind: ErrorKind) -> Self {
185        let utagged = ((kind as usize) << 32) | TAG_SIMPLE;
186        // Safety: `TAG_SIMPLE` is not zero, so the result of the `|` is not 0.
187        let res = Self(
188            NonNull::without_provenance(unsafe { NonZeroUsize::new_unchecked(utagged) }),
189            PhantomData,
190        );
191        // quickly smoke-check we encoded the right thing (This generally will
192        // only run in std's tests, unless the user uses -Zbuild-std)
193        debug_assert!(
194            matches!(res.data(), ErrorData::Simple(k) if k == kind),
195            "repr(simple) encoding failed {:?}",
196            kind,
197        );
198        res
199    }
200
201    #[inline]
202    pub(super) const fn new_simple_message(m: &'static SimpleMessage) -> Self {
203        // Safety: References are never null.
204        Self(unsafe { NonNull::new_unchecked(m as *const _ as *mut ()) }, PhantomData)
205    }
206
207    #[inline]
208    pub(super) fn data(&self) -> ErrorData<&Custom> {
209        // Safety: We're a Repr, decode_repr is fine.
210        unsafe { decode_repr(self.0, |c| &*c) }
211    }
212
213    #[inline]
214    pub(super) fn data_mut(&mut self) -> ErrorData<&mut Custom> {
215        // Safety: We're a Repr, decode_repr is fine.
216        unsafe { decode_repr(self.0, |c| &mut *c) }
217    }
218
219    #[inline]
220    pub(super) fn into_data(self) -> ErrorData<Box<Custom>> {
221        let this = core::mem::ManuallyDrop::new(self);
222        // Safety: We're a Repr, decode_repr is fine. The `Box::from_raw` is
223        // safe because we prevent double-drop using `ManuallyDrop`.
224        unsafe { decode_repr(this.0, |p| Box::from_raw(p)) }
225    }
226}
227
228impl Drop for Repr {
229    #[inline]
230    fn drop(&mut self) {
231        // Safety: We're a Repr, decode_repr is fine. The `Box::from_raw` is
232        // safe because we're being dropped.
233        unsafe {
234            let _ = decode_repr(self.0, |p| Box::<Custom>::from_raw(p));
235        }
236    }
237}
238
239// Shared helper to decode a `Repr`'s internal pointer into an ErrorData.
240//
241// Safety: `ptr`'s bits should be encoded as described in the document at the
242// top (it should `some_repr.0`)
243#[inline]
244unsafe fn decode_repr<C, F>(ptr: NonNull<()>, make_custom: F) -> ErrorData<C>
245where
246    F: FnOnce(*mut Custom) -> C,
247{
248    let bits = ptr.as_ptr().addr();
249    match bits & TAG_MASK {
250        TAG_OS => {
251            let code = ((bits as i64) >> 32) as RawOsError;
252            ErrorData::Os(code)
253        }
254        TAG_SIMPLE => {
255            let kind_bits = (bits >> 32) as u32;
256            let kind = ErrorKind::from_prim(kind_bits).unwrap_or_else(|| {
257                debug_assert!(false, "Invalid io::error::Repr bits: `Repr({:#018x})`", bits);
258                // This means the `ptr` passed in was not valid, which violates
259                // the unsafe contract of `decode_repr`.
260                //
261                // Using this rather than unwrap meaningfully improves the code
262                // for callers which only care about one variant (usually
263                // `Custom`)
264                unsafe { core::hint::unreachable_unchecked() };
265            });
266            ErrorData::Simple(kind)
267        }
268        TAG_SIMPLE_MESSAGE => {
269            // SAFETY: per tag
270            unsafe { ErrorData::SimpleMessage(&*ptr.cast::<SimpleMessage>().as_ptr()) }
271        }
272        TAG_CUSTOM => {
273            // It would be correct for us to use `ptr::byte_sub` here (see the
274            // comment above the `wrapping_add` call in `new_custom` for why),
275            // but it isn't clear that it makes a difference, so we don't.
276            let custom = ptr.as_ptr().wrapping_byte_sub(TAG_CUSTOM).cast::<Custom>();
277            ErrorData::Custom(make_custom(custom))
278        }
279        _ => {
280            // Can't happen, and compiler can tell
281            unreachable!();
282        }
283    }
284}
285
286// Some static checking to alert us if a change breaks any of the assumptions
287// that our encoding relies on for correctness and soundness. (Some of these are
288// a bit overly thorough/cautious, admittedly)
289//
290// If any of these are hit on a platform that std supports, we should likely
291// just use `repr_unpacked.rs` there instead (unless the fix is easy).
292macro_rules! static_assert {
293    ($condition:expr) => {
294        const _: () = assert!($condition);
295    };
296    (@usize_eq: $lhs:expr, $rhs:expr) => {
297        const _: [(); $lhs] = [(); $rhs];
298    };
299}
300
301// The bitpacking we use requires pointers be exactly 64 bits.
302static_assert!(@usize_eq: size_of::<NonNull<()>>(), 8);
303
304// We also require pointers and usize be the same size.
305static_assert!(@usize_eq: size_of::<NonNull<()>>(), size_of::<usize>());
306
307// `Custom` and `SimpleMessage` need to be thin pointers.
308static_assert!(@usize_eq: size_of::<&'static SimpleMessage>(), 8);
309static_assert!(@usize_eq: size_of::<Box<Custom>>(), 8);
310
311static_assert!((TAG_MASK + 1).is_power_of_two());
312// And they must have sufficient alignment.
313static_assert!(align_of::<SimpleMessage>() >= TAG_MASK + 1);
314static_assert!(align_of::<Custom>() >= TAG_MASK + 1);
315
316static_assert!(@usize_eq: TAG_MASK & TAG_SIMPLE_MESSAGE, TAG_SIMPLE_MESSAGE);
317static_assert!(@usize_eq: TAG_MASK & TAG_CUSTOM, TAG_CUSTOM);
318static_assert!(@usize_eq: TAG_MASK & TAG_OS, TAG_OS);
319static_assert!(@usize_eq: TAG_MASK & TAG_SIMPLE, TAG_SIMPLE);
320
321// This is obviously true (`TAG_CUSTOM` is `0b01`), but in `Repr::new_custom` we
322// offset a pointer by this value, and expect it to both be within the same
323// object, and to not wrap around the address space. See the comment in that
324// function for further details.
325//
326// Actually, at the moment we use `ptr::wrapping_add`, not `ptr::add`, so this
327// check isn't needed for that one, although the assertion that we don't
328// actually wrap around in that wrapping_add does simplify the safety reasoning
329// elsewhere considerably.
330static_assert!(size_of::<Custom>() >= TAG_CUSTOM);
331
332// These two store a payload which is allowed to be zero, so they must be
333// non-zero to preserve the `NonNull`'s range invariant.
334static_assert!(TAG_OS != 0);
335static_assert!(TAG_SIMPLE != 0);
336// We can't tag `SimpleMessage`s, the tag must be 0.
337static_assert!(@usize_eq: TAG_SIMPLE_MESSAGE, 0);
338
339// Check that the point of all of this still holds.
340//
341// We'd check against `io::Error`, but *technically* it's allowed to vary,
342// as it's not `#[repr(transparent)]`/`#[repr(C)]`. We could add that, but
343// the `#[repr()]` would show up in rustdoc, which might be seen as a stable
344// commitment.
345static_assert!(@usize_eq: size_of::<Repr>(), 8);
346static_assert!(@usize_eq: size_of::<Option<Repr>>(), 8);
347static_assert!(@usize_eq: size_of::<Result<(), Repr>>(), 8);
348static_assert!(@usize_eq: size_of::<Result<usize, Repr>>(), 16);