Skip to main content

std\io\buffered\bufreader/
buffer.rs

1//! An encapsulation of `BufReader`'s buffer management logic.
2//!
3//! This module factors out the basic functionality of `BufReader` in order to protect two core
4//! invariants:
5//! * `filled` bytes of `buf` are always initialized
6//! * `pos` is always <= `filled`
7//! Since this module encapsulates the buffer management logic, we can ensure that the range
8//! `pos..filled` is always a valid index into the initialized region of the buffer. This means
9//! that user code which wants to do reads from a `BufReader` via `buffer` + `consume` can do so
10//! without encountering any runtime bounds checks.
11
12use crate::cmp;
13use crate::io::{self, BorrowedBuf, ErrorKind, Read};
14use crate::mem::MaybeUninit;
15
16pub struct Buffer {
17    // The buffer.
18    buf: Box<[MaybeUninit<u8>]>,
19    // The current seek offset into `buf`, must always be <= `filled`.
20    pos: usize,
21    // Each call to `fill_buf` sets `filled` to indicate how many bytes at the start of `buf` are
22    // initialized with bytes from a read.
23    filled: usize,
24    // Whether `buf` has been fully initialized. We track this so that we can accurately tell
25    // `read_buf` how many bytes of buf are initialized, to bypass as much of its defensive
26    // initialization as possible. Note that while this often the same as `filled`, it doesn't need
27    // to be. Calls to `fill_buf` are not required to actually fill the buffer, and omitting this
28    // is a huge perf regression for `Read` impls that do not.
29    initialized: bool,
30}
31
32impl Buffer {
33    #[inline]
34    pub fn with_capacity(capacity: usize) -> Self {
35        let buf = Box::new_uninit_slice(capacity);
36        Self { buf, pos: 0, filled: 0, initialized: false }
37    }
38
39    #[inline]
40    pub fn try_with_capacity(capacity: usize) -> io::Result<Self> {
41        match Box::try_new_uninit_slice(capacity) {
42            Ok(buf) => Ok(Self { buf, pos: 0, filled: 0, initialized: false }),
43            Err(_) => {
44                Err(io::const_error!(ErrorKind::OutOfMemory, "failed to allocate read buffer"))
45            }
46        }
47    }
48
49    #[inline]
50    pub fn buffer(&self) -> &[u8] {
51        // SAFETY: self.pos and self.filled are valid, and self.filled >= self.pos, and
52        // that region is initialized because those are all invariants of this type.
53        unsafe { self.buf.get_unchecked(self.pos..self.filled).assume_init_ref() }
54    }
55
56    #[inline]
57    pub fn capacity(&self) -> usize {
58        self.buf.len()
59    }
60
61    #[inline]
62    pub fn filled(&self) -> usize {
63        self.filled
64    }
65
66    #[inline]
67    pub fn pos(&self) -> usize {
68        self.pos
69    }
70
71    // This is only used by a test which asserts that the initialization-tracking is correct.
72    #[cfg(test)]
73    pub fn initialized(&self) -> bool {
74        self.initialized
75    }
76
77    #[inline]
78    pub fn discard_buffer(&mut self) {
79        self.pos = 0;
80        self.filled = 0;
81    }
82
83    #[inline]
84    pub fn consume(&mut self, amt: usize) {
85        self.pos = cmp::min(self.pos + amt, self.filled);
86    }
87
88    /// If there are `amt` bytes available in the buffer, pass a slice containing those bytes to
89    /// `visitor` and return true. If there are not enough bytes available, return false.
90    #[inline]
91    pub fn consume_with<V>(&mut self, amt: usize, mut visitor: V) -> bool
92    where
93        V: FnMut(&[u8]),
94    {
95        if let Some(claimed) = self.buffer().get(..amt) {
96            visitor(claimed);
97            // If the indexing into self.buffer() succeeds, amt must be a valid increment.
98            self.pos += amt;
99            true
100        } else {
101            false
102        }
103    }
104
105    #[inline]
106    pub fn unconsume(&mut self, amt: usize) {
107        self.pos = self.pos.saturating_sub(amt);
108    }
109
110    /// Read more bytes into the buffer without discarding any of its contents
111    pub fn read_more(&mut self, mut reader: impl Read) -> io::Result<usize> {
112        let mut buf = BorrowedBuf::from(&mut self.buf[self.filled..]);
113
114        if self.initialized {
115            // SAFETY: `self.initialized` is only set after `self.buf` was
116            // fully initialized, and once `self.buf` is fully initialized
117            // no part will become uninitialized.
118            unsafe { buf.set_init() };
119        }
120
121        reader.read_buf(buf.unfilled())?;
122        self.filled += buf.len();
123        self.initialized = buf.is_init();
124        Ok(buf.len())
125    }
126
127    /// Remove bytes that have already been read from the buffer.
128    pub fn backshift(&mut self) {
129        self.buf.copy_within(self.pos..self.filled, 0);
130        self.filled -= self.pos;
131        self.pos = 0;
132    }
133
134    #[inline]
135    pub fn fill_buf(&mut self, mut reader: impl Read) -> io::Result<&[u8]> {
136        // If we've reached the end of our internal buffer then we need to fetch
137        // some more data from the reader.
138        // Branch using `>=` instead of the more correct `==`
139        // to tell the compiler that the pos..cap slice is always valid.
140        if self.pos >= self.filled {
141            debug_assert!(self.pos == self.filled);
142
143            let mut buf = BorrowedBuf::from(&mut *self.buf);
144
145            if self.initialized {
146                // SAFETY: `self.initialized` is only set after `self.buf` was
147                // fully initialized, and once `self.buf` is fully initialized
148                // no part will become uninitialized.
149                unsafe { buf.set_init() };
150            }
151
152            let result = reader.read_buf(buf.unfilled());
153
154            self.pos = 0;
155            self.filled = buf.len();
156            self.initialized = buf.is_init();
157
158            result?;
159        }
160        Ok(self.buffer())
161    }
162}