Skip to main content

miri/shims/
files.rs

1use std::any::Any;
2use std::collections::BTreeMap;
3use std::fs::File;
4use std::io::{ErrorKind, IsTerminal, Read, Seek, SeekFrom, Write};
5use std::marker::CoercePointee;
6use std::ops::Deref;
7use std::rc::{Rc, Weak};
8use std::{fs, io};
9
10use rustc_abi::Size;
11
12use crate::shims::unix::UnixFileDescription;
13use crate::*;
14
15/// A unique id for file descriptions. While we could use the address, considering that
16/// is definitely unique, the address would expose interpreter internal state when used
17/// for sorting things. So instead we generate a unique id per file description which is the same
18/// for all `dup`licates and is never reused.
19#[derive(Debug, Copy, Clone, Default, Eq, PartialEq, Ord, PartialOrd)]
20pub struct FdId(usize);
21
22impl FdId {
23    pub fn to_usize(self) -> usize {
24        self.0
25    }
26
27    /// Create a new fd id from a `usize` without checking if this fd exists.
28    pub fn new_unchecked(id: usize) -> Self {
29        Self(id)
30    }
31}
32
33#[derive(Debug, Clone)]
34struct FdIdWith<T: ?Sized> {
35    id: FdId,
36    inner: T,
37}
38
39/// A refcounted pointer to a file description, also tracking the
40/// globally unique ID of this file description.
41#[repr(transparent)]
42#[derive(CoercePointee, Debug)]
43pub struct FileDescriptionRef<T: ?Sized>(Rc<FdIdWith<T>>);
44
45impl<T: ?Sized> Clone for FileDescriptionRef<T> {
46    fn clone(&self) -> Self {
47        FileDescriptionRef(self.0.clone())
48    }
49}
50
51impl<T: ?Sized> Deref for FileDescriptionRef<T> {
52    type Target = T;
53    fn deref(&self) -> &T {
54        &self.0.inner
55    }
56}
57
58impl<T: ?Sized> FileDescriptionRef<T> {
59    pub fn id(&self) -> FdId {
60        self.0.id
61    }
62}
63
64/// Holds a weak reference to the actual file description.
65#[derive(Debug)]
66pub struct WeakFileDescriptionRef<T: ?Sized>(Weak<FdIdWith<T>>);
67
68impl<T: ?Sized> Clone for WeakFileDescriptionRef<T> {
69    fn clone(&self) -> Self {
70        WeakFileDescriptionRef(self.0.clone())
71    }
72}
73
74impl<T: ?Sized> FileDescriptionRef<T> {
75    pub fn downgrade(this: &Self) -> WeakFileDescriptionRef<T> {
76        WeakFileDescriptionRef(Rc::downgrade(&this.0))
77    }
78}
79
80impl<T: ?Sized> WeakFileDescriptionRef<T> {
81    pub fn upgrade(&self) -> Option<FileDescriptionRef<T>> {
82        self.0.upgrade().map(FileDescriptionRef)
83    }
84}
85
86impl<T> VisitProvenance for WeakFileDescriptionRef<T> {
87    fn visit_provenance(&self, _visit: &mut VisitWith<'_>) {
88        // A weak reference can never be the only reference to some pointer or place.
89        // Since the actual file description is tracked by strong ref somewhere,
90        // it is ok to make this a NOP operation.
91    }
92}
93
94/// A helper trait to indirectly allow downcasting on `Rc<FdIdWith<dyn _>>`.
95/// Ideally we'd just add a `FdIdWith<Self>: Any` bound to the `FileDescription` trait,
96/// but that does not allow upcasting.
97pub trait FileDescriptionExt: 'static {
98    fn into_rc_any(self: FileDescriptionRef<Self>) -> Rc<dyn Any>;
99
100    /// We wrap the regular `close` function generically, so both handle `Rc::into_inner`
101    /// and epoll interest management.
102    fn close_ref<'tcx>(
103        self: FileDescriptionRef<Self>,
104        communicate_allowed: bool,
105        ecx: &mut MiriInterpCx<'tcx>,
106    ) -> InterpResult<'tcx, io::Result<()>>;
107}
108
109impl<T: FileDescription + 'static> FileDescriptionExt for T {
110    fn into_rc_any(self: FileDescriptionRef<Self>) -> Rc<dyn Any> {
111        self.0
112    }
113
114    fn close_ref<'tcx>(
115        self: FileDescriptionRef<Self>,
116        communicate_allowed: bool,
117        ecx: &mut MiriInterpCx<'tcx>,
118    ) -> InterpResult<'tcx, io::Result<()>> {
119        match Rc::into_inner(self.0) {
120            Some(fd) => {
121                // There might have been epolls interested in this FD. Remove that.
122                ecx.machine.epoll_interests.remove_epolls(fd.id);
123
124                fd.inner.destroy(fd.id, communicate_allowed, ecx)
125            }
126            None => {
127                // Not the last reference.
128                interp_ok(Ok(()))
129            }
130        }
131    }
132}
133
134pub type DynFileDescriptionRef = FileDescriptionRef<dyn FileDescription>;
135
136impl FileDescriptionRef<dyn FileDescription> {
137    pub fn downcast<T: FileDescription + 'static>(self) -> Option<FileDescriptionRef<T>> {
138        let inner = self.into_rc_any().downcast::<FdIdWith<T>>().ok()?;
139        Some(FileDescriptionRef(inner))
140    }
141}
142
143/// Represents an open file description.
144pub trait FileDescription: std::fmt::Debug + FileDescriptionExt {
145    fn name(&self) -> &'static str;
146
147    /// Reads as much as possible into the given buffer `ptr`.
148    /// `len` indicates how many bytes we should try to read.
149    ///
150    /// When the read is done, `finish` will be called. Note that `read` itself may return before
151    /// that happens! Everything that should happen "after" the `read` needs to happen inside
152    /// `finish`.
153    fn read<'tcx>(
154        self: FileDescriptionRef<Self>,
155        _communicate_allowed: bool,
156        _ptr: Pointer,
157        _len: usize,
158        _ecx: &mut MiriInterpCx<'tcx>,
159        _finish: DynMachineCallback<'tcx, Result<usize, IoError>>,
160    ) -> InterpResult<'tcx> {
161        throw_unsup_format!("cannot read from {}", self.name());
162    }
163
164    /// Writes as much as possible from the given buffer `ptr`.
165    /// `len` indicates how many bytes we should try to write.
166    ///
167    /// When the write is done, `finish` will be called. Note that `write` itself may return before
168    /// that happens! Everything that should happen "after" the `write` needs to happen inside
169    /// `finish`.
170    fn write<'tcx>(
171        self: FileDescriptionRef<Self>,
172        _communicate_allowed: bool,
173        _ptr: Pointer,
174        _len: usize,
175        _ecx: &mut MiriInterpCx<'tcx>,
176        _finish: DynMachineCallback<'tcx, Result<usize, IoError>>,
177    ) -> InterpResult<'tcx> {
178        throw_unsup_format!("cannot write to {}", self.name());
179    }
180
181    /// Determines whether this FD non-deterministically has its reads and writes shortened.
182    fn short_fd_operations(&self) -> bool {
183        // We only enable this for FD kinds where we think short accesses gain useful test coverage.
184        false
185    }
186
187    /// Seeks to the given offset (which can be relative to the beginning, end, or current position).
188    /// Returns the new position from the start of the stream.
189    fn seek<'tcx>(
190        &self,
191        _communicate_allowed: bool,
192        _offset: SeekFrom,
193    ) -> InterpResult<'tcx, io::Result<u64>> {
194        throw_unsup_format!("cannot seek on {}", self.name());
195    }
196
197    /// Destroys the file description. Only called when the last duplicate file descriptor is closed.
198    ///
199    /// `self_addr` is the address that this file description used to be stored at.
200    fn destroy<'tcx>(
201        self,
202        _self_id: FdId,
203        _communicate_allowed: bool,
204        _ecx: &mut MiriInterpCx<'tcx>,
205    ) -> InterpResult<'tcx, io::Result<()>>
206    where
207        Self: Sized,
208    {
209        throw_unsup_format!("cannot close {}", self.name());
210    }
211
212    /// Returns the metadata for this FD, if available.
213    /// This is either host metadata, or a non-file-backed-FD type.
214    /// The latter is for new represented as a string storing a `libc` name so we only
215    /// support that kind of metadata on Unix targets.
216    fn metadata<'tcx>(&self) -> InterpResult<'tcx, Either<io::Result<fs::Metadata>, &'static str>> {
217        throw_unsup_format!("obtaining metadata is only supported on file-backed file descriptors");
218    }
219
220    fn is_tty(&self, _communicate_allowed: bool) -> bool {
221        // Most FDs are not tty's and the consequence of a wrong `false` are minor,
222        // so we use a default impl here.
223        false
224    }
225
226    fn as_unix<'tcx>(&self, _ecx: &MiriInterpCx<'tcx>) -> &dyn UnixFileDescription {
227        panic!("Not a unix file descriptor: {}", self.name());
228    }
229
230    /// Implementation of fcntl(F_GETFL) for this FD.
231    fn get_flags<'tcx>(&self, _ecx: &mut MiriInterpCx<'tcx>) -> InterpResult<'tcx, Scalar> {
232        throw_unsup_format!("fcntl: {} is not supported for F_GETFL", self.name());
233    }
234
235    /// Implementation of fcntl(F_SETFL) for this FD.
236    fn set_flags<'tcx>(
237        &self,
238        _flag: i32,
239        _ecx: &mut MiriInterpCx<'tcx>,
240    ) -> InterpResult<'tcx, Scalar> {
241        throw_unsup_format!("fcntl: {} is not supported for F_SETFL", self.name());
242    }
243}
244
245impl FileDescription for io::Stdin {
246    fn name(&self) -> &'static str {
247        "stdin"
248    }
249
250    fn read<'tcx>(
251        self: FileDescriptionRef<Self>,
252        communicate_allowed: bool,
253        ptr: Pointer,
254        len: usize,
255        ecx: &mut MiriInterpCx<'tcx>,
256        finish: DynMachineCallback<'tcx, Result<usize, IoError>>,
257    ) -> InterpResult<'tcx> {
258        if !communicate_allowed {
259            // We want isolation mode to be deterministic, so we have to disallow all reads, even stdin.
260            helpers::isolation_abort_error("`read` from stdin")?;
261        }
262
263        let mut stdin = &*self;
264        let result = ecx.read_from_host(|buf| stdin.read(buf), len, ptr)?;
265        finish.call(ecx, result)
266    }
267
268    fn destroy<'tcx>(
269        self,
270        _self_id: FdId,
271        _communicate_allowed: bool,
272        _ecx: &mut MiriInterpCx<'tcx>,
273    ) -> InterpResult<'tcx, io::Result<()>> {
274        interp_ok(Ok(()))
275    }
276
277    fn is_tty(&self, communicate_allowed: bool) -> bool {
278        communicate_allowed && self.is_terminal()
279    }
280}
281
282impl FileDescription for io::Stdout {
283    fn name(&self) -> &'static str {
284        "stdout"
285    }
286
287    fn write<'tcx>(
288        self: FileDescriptionRef<Self>,
289        _communicate_allowed: bool,
290        ptr: Pointer,
291        len: usize,
292        ecx: &mut MiriInterpCx<'tcx>,
293        finish: DynMachineCallback<'tcx, Result<usize, IoError>>,
294    ) -> InterpResult<'tcx> {
295        // We allow writing to stdout even with isolation enabled.
296        let result = ecx.write_to_host(&*self, len, ptr)?;
297        // Stdout is buffered, flush to make sure it appears on the
298        // screen.  This is the write() syscall of the interpreted
299        // program, we want it to correspond to a write() syscall on
300        // the host -- there is no good in adding extra buffering
301        // here.
302        io::stdout().flush().unwrap();
303
304        finish.call(ecx, result)
305    }
306
307    fn destroy<'tcx>(
308        self,
309        _self_id: FdId,
310        _communicate_allowed: bool,
311        _ecx: &mut MiriInterpCx<'tcx>,
312    ) -> InterpResult<'tcx, io::Result<()>> {
313        interp_ok(Ok(()))
314    }
315
316    fn is_tty(&self, communicate_allowed: bool) -> bool {
317        communicate_allowed && self.is_terminal()
318    }
319}
320
321impl FileDescription for io::Stderr {
322    fn name(&self) -> &'static str {
323        "stderr"
324    }
325
326    fn destroy<'tcx>(
327        self,
328        _self_id: FdId,
329        _communicate_allowed: bool,
330        _ecx: &mut MiriInterpCx<'tcx>,
331    ) -> InterpResult<'tcx, io::Result<()>> {
332        interp_ok(Ok(()))
333    }
334
335    fn write<'tcx>(
336        self: FileDescriptionRef<Self>,
337        _communicate_allowed: bool,
338        ptr: Pointer,
339        len: usize,
340        ecx: &mut MiriInterpCx<'tcx>,
341        finish: DynMachineCallback<'tcx, Result<usize, IoError>>,
342    ) -> InterpResult<'tcx> {
343        // We allow writing to stderr even with isolation enabled.
344        let result = ecx.write_to_host(&*self, len, ptr)?;
345        // No need to flush, stderr is not buffered.
346        finish.call(ecx, result)
347    }
348
349    fn is_tty(&self, communicate_allowed: bool) -> bool {
350        communicate_allowed && self.is_terminal()
351    }
352}
353
354#[derive(Debug)]
355pub struct FileHandle {
356    pub(crate) file: File,
357    pub(crate) writable: bool,
358}
359
360impl FileDescription for FileHandle {
361    fn name(&self) -> &'static str {
362        "file"
363    }
364
365    fn read<'tcx>(
366        self: FileDescriptionRef<Self>,
367        communicate_allowed: bool,
368        ptr: Pointer,
369        len: usize,
370        ecx: &mut MiriInterpCx<'tcx>,
371        finish: DynMachineCallback<'tcx, Result<usize, IoError>>,
372    ) -> InterpResult<'tcx> {
373        assert!(communicate_allowed, "isolation should have prevented even opening a file");
374
375        let mut file = &self.file;
376        let result = ecx.read_from_host(|buf| file.read(buf), len, ptr)?;
377        finish.call(ecx, result)
378    }
379
380    fn write<'tcx>(
381        self: FileDescriptionRef<Self>,
382        communicate_allowed: bool,
383        ptr: Pointer,
384        len: usize,
385        ecx: &mut MiriInterpCx<'tcx>,
386        finish: DynMachineCallback<'tcx, Result<usize, IoError>>,
387    ) -> InterpResult<'tcx> {
388        assert!(communicate_allowed, "isolation should have prevented even opening a file");
389
390        if !self.writable {
391            // Linux hosts return EBADF here which we can't translate via the platform-independent
392            // code since it does not map to any `io::ErrorKind` -- so if we don't do anything
393            // special, we'd throw an "unsupported error code" here. Windows returns something that
394            // gets translated to `PermissionDenied`. That seems like a good value so let's just use
395            // this everywhere, even if it means behavior on Unix targets does not match the real
396            // thing.
397            return finish.call(ecx, Err(ErrorKind::PermissionDenied.into()));
398        }
399        let result = ecx.write_to_host(&self.file, len, ptr)?;
400        finish.call(ecx, result)
401    }
402
403    fn seek<'tcx>(
404        &self,
405        communicate_allowed: bool,
406        offset: SeekFrom,
407    ) -> InterpResult<'tcx, io::Result<u64>> {
408        assert!(communicate_allowed, "isolation should have prevented even opening a file");
409        interp_ok((&mut &self.file).seek(offset))
410    }
411
412    fn destroy<'tcx>(
413        self,
414        _self_id: FdId,
415        communicate_allowed: bool,
416        _ecx: &mut MiriInterpCx<'tcx>,
417    ) -> InterpResult<'tcx, io::Result<()>> {
418        assert!(communicate_allowed, "isolation should have prevented even opening a file");
419        // We sync the file if it was opened in a mode different than read-only.
420        if self.writable {
421            // `File::sync_all` does the checks that are done when closing a file. We do this to
422            // to handle possible errors correctly.
423            let result = self.file.sync_all();
424            // Now we actually close the file and return the result.
425            drop(self.file);
426            interp_ok(result)
427        } else {
428            // We drop the file, this closes it but ignores any errors
429            // produced when closing it. This is done because
430            // `File::sync_all` cannot be done over files like
431            // `/dev/urandom` which are read-only. Check
432            // https://github.com/rust-lang/miri/issues/999#issuecomment-568920439
433            // for a deeper discussion.
434            drop(self.file);
435            interp_ok(Ok(()))
436        }
437    }
438
439    fn metadata<'tcx>(&self) -> InterpResult<'tcx, Either<io::Result<fs::Metadata>, &'static str>> {
440        interp_ok(Either::Left(self.file.metadata()))
441    }
442
443    fn is_tty(&self, communicate_allowed: bool) -> bool {
444        communicate_allowed && self.file.is_terminal()
445    }
446
447    fn short_fd_operations(&self) -> bool {
448        // While short accesses on file-backed FDs are very rare (at least for sufficiently small
449        // accesses), they can realistically happen when a signal interrupts the syscall.
450        // FIXME: we should return `false` if this is a named pipe...
451        true
452    }
453
454    fn as_unix<'tcx>(&self, ecx: &MiriInterpCx<'tcx>) -> &dyn UnixFileDescription {
455        assert!(
456            ecx.target_os_is_unix(),
457            "unix file operations are only available for unix targets"
458        );
459        self
460    }
461}
462
463/// Like /dev/null
464#[derive(Debug)]
465pub struct NullOutput;
466
467impl FileDescription for NullOutput {
468    fn name(&self) -> &'static str {
469        "stderr and stdout"
470    }
471
472    fn write<'tcx>(
473        self: FileDescriptionRef<Self>,
474        _communicate_allowed: bool,
475        _ptr: Pointer,
476        len: usize,
477        ecx: &mut MiriInterpCx<'tcx>,
478        finish: DynMachineCallback<'tcx, Result<usize, IoError>>,
479    ) -> InterpResult<'tcx> {
480        // We just don't write anything, but report to the user that we did.
481        finish.call(ecx, Ok(len))
482    }
483
484    fn destroy<'tcx>(
485        self,
486        _self_id: FdId,
487        _communicate_allowed: bool,
488        _ecx: &mut MiriInterpCx<'tcx>,
489    ) -> InterpResult<'tcx, io::Result<()>> {
490        interp_ok(Ok(()))
491    }
492}
493
494/// Internal type of a file-descriptor - this is what [`FdTable`] expects
495pub type FdNum = i32;
496
497/// The file descriptor table
498#[derive(Debug)]
499pub struct FdTable {
500    pub fds: BTreeMap<FdNum, DynFileDescriptionRef>,
501    /// Unique identifier for file description, used to differentiate between various file description.
502    next_file_description_id: FdId,
503}
504
505impl VisitProvenance for FdTable {
506    fn visit_provenance(&self, _visit: &mut VisitWith<'_>) {
507        // All our FileDescription instances do not have any tags.
508    }
509}
510
511impl FdTable {
512    fn new() -> Self {
513        FdTable { fds: BTreeMap::new(), next_file_description_id: FdId(0) }
514    }
515    pub(crate) fn init(mute_stdout_stderr: bool) -> FdTable {
516        let mut fds = FdTable::new();
517        fds.insert_new(io::stdin());
518        if mute_stdout_stderr {
519            assert_eq!(fds.insert_new(NullOutput), 1);
520            assert_eq!(fds.insert_new(NullOutput), 2);
521        } else {
522            assert_eq!(fds.insert_new(io::stdout()), 1);
523            assert_eq!(fds.insert_new(io::stderr()), 2);
524        }
525        fds
526    }
527
528    pub fn new_ref<T: FileDescription>(&mut self, fd: T) -> FileDescriptionRef<T> {
529        let file_handle =
530            FileDescriptionRef(Rc::new(FdIdWith { id: self.next_file_description_id, inner: fd }));
531        self.next_file_description_id = FdId(self.next_file_description_id.0.strict_add(1));
532        file_handle
533    }
534
535    /// Insert a new file description to the FdTable.
536    pub fn insert_new(&mut self, fd: impl FileDescription) -> FdNum {
537        let fd_ref = self.new_ref(fd);
538        self.insert(fd_ref)
539    }
540
541    pub fn insert(&mut self, fd_ref: DynFileDescriptionRef) -> FdNum {
542        self.insert_with_min_num(fd_ref, 0)
543    }
544
545    /// Insert a file description, giving it a file descriptor that is at least `min_fd_num`.
546    pub fn insert_with_min_num(
547        &mut self,
548        file_handle: DynFileDescriptionRef,
549        min_fd_num: FdNum,
550    ) -> FdNum {
551        // Find the lowest unused FD, starting from min_fd. If the first such unused FD is in
552        // between used FDs, the find_map combinator will return it. If the first such unused FD
553        // is after all other used FDs, the find_map combinator will return None, and we will use
554        // the FD following the greatest FD thus far.
555        let candidate_new_fd =
556            self.fds.range(min_fd_num..).zip(min_fd_num..).find_map(|((fd_num, _fd), counter)| {
557                if *fd_num != counter {
558                    // There was a gap in the fds stored, return the first unused one
559                    // (note that this relies on BTreeMap iterating in key order)
560                    Some(counter)
561                } else {
562                    // This fd is used, keep going
563                    None
564                }
565            });
566        let new_fd_num = candidate_new_fd.unwrap_or_else(|| {
567            // find_map ran out of BTreeMap entries before finding a free fd, use one plus the
568            // maximum fd in the map
569            self.fds.last_key_value().map(|(fd_num, _)| fd_num.strict_add(1)).unwrap_or(min_fd_num)
570        });
571
572        self.fds.try_insert(new_fd_num, file_handle).unwrap();
573        new_fd_num
574    }
575
576    pub fn get(&self, fd_num: FdNum) -> Option<DynFileDescriptionRef> {
577        let fd = self.fds.get(&fd_num)?;
578        Some(fd.clone())
579    }
580
581    pub fn remove(&mut self, fd_num: FdNum) -> Option<DynFileDescriptionRef> {
582        self.fds.remove(&fd_num)
583    }
584
585    pub fn is_fd_num(&self, fd_num: FdNum) -> bool {
586        self.fds.contains_key(&fd_num)
587    }
588}
589
590impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {}
591pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
592    /// Read data from a host `Read` type, store the result into machine memory,
593    /// and return whether that worked.
594    fn read_from_host(
595        &mut self,
596        mut read_cb: impl FnMut(&mut [u8]) -> io::Result<usize>,
597        len: usize,
598        ptr: Pointer,
599    ) -> InterpResult<'tcx, Result<usize, IoError>> {
600        let this = self.eval_context_mut();
601
602        let mut bytes = vec![0; len];
603        let result = read_cb(&mut bytes);
604        match result {
605            Ok(read_size) => {
606                // If reading to `bytes` did not fail, we write those bytes to the buffer.
607                // Crucially, if fewer than `bytes.len()` bytes were read, only write
608                // that much into the output buffer!
609                this.write_bytes_ptr(ptr, bytes[..read_size].iter().copied())?;
610                interp_ok(Ok(read_size))
611            }
612            Err(e) => interp_ok(Err(IoError::HostError(e))),
613        }
614    }
615
616    /// Write data to a host `Write` type, with the bytes taken from machine memory.
617    fn write_to_host(
618        &mut self,
619        mut file: impl io::Write,
620        len: usize,
621        ptr: Pointer,
622    ) -> InterpResult<'tcx, Result<usize, IoError>> {
623        let this = self.eval_context_mut();
624
625        let bytes = this.read_bytes_ptr_strip_provenance(ptr, Size::from_bytes(len))?;
626        let result = file.write(bytes);
627        interp_ok(result.map_err(IoError::HostError))
628    }
629}