Skip to main content

cargo/lints/rules/
text_direction_codepoint_in_comment.rs

1use std::path::Path;
2
3use cargo_util_schemas::manifest::TomlToolLints;
4use cargo_util_terminal::report::AnnotationKind;
5use cargo_util_terminal::report::Group;
6use cargo_util_terminal::report::Level;
7use cargo_util_terminal::report::Snippet;
8use toml_parser::Source;
9use toml_parser::Span;
10use toml_parser::decoder::Encoding;
11use toml_parser::parser::Event;
12use toml_parser::parser::EventKind;
13use toml_parser::parser::EventReceiver;
14
15use crate::CargoResult;
16use crate::GlobalContext;
17use crate::core::MaybePackage;
18use crate::lints::CORRECTNESS;
19use crate::lints::Lint;
20use crate::lints::LintLevel;
21use crate::lints::ManifestFor;
22use crate::lints::rel_cwd_manifest_path;
23
24pub static LINT: &Lint = &Lint {
25    name: "text_direction_codepoint_in_comment",
26    desc: "unicode codepoint changing visible direction of text present in comment",
27    primary_group: &CORRECTNESS,
28    msrv: Some(super::CARGO_LINTS_MSRV),
29    feature_gate: None,
30    docs: Some(
31        r#"
32### What it does
33Detects Unicode codepoints in manifest comments that change the visual representation of text on screen
34in a way that does not correspond to their on memory representation.
35
36### Why it is bad
37Unicode allows changing the visual flow of text on screen
38in order to support scripts that are written right-to-left,
39but a specially crafted comment can make code that will be compiled appear to be part of a comment,
40depending on the software used to read the code.
41To avoid potential problems or confusion,
42such as in CVE-2021-42574,
43by default we deny their use.
44"#,
45    ),
46};
47
48pub fn text_direction_codepoint_in_comment(
49    manifest: ManifestFor<'_>,
50    manifest_path: &Path,
51    cargo_lints: &TomlToolLints,
52    error_count: &mut usize,
53    gctx: &GlobalContext,
54) -> CargoResult<()> {
55    let (lint_level, source) = manifest.lint_level(cargo_lints, LINT);
56    if lint_level == LintLevel::Allow {
57        return Ok(());
58    }
59
60    if matches!(
61        &manifest,
62        ManifestFor::Workspace {
63            maybe_pkg: MaybePackage::Package { .. },
64            ..
65        }
66    ) {
67        // For real manifests, lint as a package, rather than a workspace
68        return Ok(());
69    }
70
71    let Some(contents) = manifest.contents() else {
72        return Ok(());
73    };
74
75    let bidi_spans = contents
76        .char_indices()
77        .filter(|(_i, c)| {
78            UNICODE_BIDI_CODEPOINTS
79                .iter()
80                .any(|(bidi, _name)| c == bidi)
81        })
82        .map(|(i, c)| (i, i + c.len_utf8()))
83        .collect::<Vec<_>>();
84    if bidi_spans.is_empty() {
85        return Ok(());
86    }
87
88    let events = bidi_events(contents, &bidi_spans);
89    let manifest_path = rel_cwd_manifest_path(manifest_path, gctx);
90    let mut emitted_source = None;
91    for event in events {
92        if lint_level.is_error() {
93            *error_count += 1;
94        }
95
96        let token_span = event.token.span();
97        let token_span = token_span.start()..token_span.end();
98        let mut snippet = Snippet::source(contents).path(&manifest_path).annotation(
99            AnnotationKind::Context
100                .span(token_span)
101                .label("this comment contains an invisible unicode text flow control codepoint"),
102        );
103        for bidi_span in event.bidi_spans {
104            let bidi_span = bidi_span.0..bidi_span.1;
105            let escaped = format!("{:?}", &contents[bidi_span.clone()]);
106            snippet = snippet.annotation(AnnotationKind::Primary.span(bidi_span).label(escaped));
107        }
108
109        let level = lint_level.to_diagnostic_level();
110        let mut primary = Group::with_title(level.primary_title(LINT.desc)).element(snippet);
111        if emitted_source.is_none() {
112            emitted_source = Some(LINT.emitted_source(lint_level, source));
113            primary = primary.element(Level::NOTE.message(emitted_source.as_ref().unwrap()));
114        }
115
116        let report = [primary];
117        gctx.shell().print_report(&report, lint_level.force())?;
118    }
119
120    Ok(())
121}
122
123const UNICODE_BIDI_CODEPOINTS: &[(char, &str)] = &[
124    ('\u{202A}', "LEFT-TO-RIGHT EMBEDDING"),
125    ('\u{202B}', "RIGHT-TO-LEFT EMBEDDING"),
126    ('\u{202C}', "POP DIRECTIONAL FORMATTING"),
127    ('\u{202D}', "LEFT-TO-RIGHT OVERRIDE"),
128    ('\u{202E}', "RIGHT-TO-LEFT OVERRIDE"),
129    ('\u{2066}', "LEFT-TO-RIGHT ISOLATE"),
130    ('\u{2067}', "RIGHT-TO-LEFT ISOLATE"),
131    ('\u{2068}', "FIRST STRONG ISOLATE"),
132    ('\u{2069}', "POP DIRECTIONAL ISOLATE"),
133];
134
135struct BiDiEvent {
136    token: Event,
137    bidi_spans: Vec<(usize, usize)>,
138}
139
140fn bidi_events(contents: &str, bidi_spans: &[(usize, usize)]) -> Vec<BiDiEvent> {
141    let mut bidi_spans = bidi_spans.iter();
142    let bidi_span = bidi_spans.next().copied();
143
144    let source = Source::new(contents);
145    let tokens = source.lex().into_vec();
146    let mut collector = BiDiCollector {
147        bidi_span,
148        bidi_spans,
149        events: Vec::new(),
150    };
151    let mut errors = ();
152    toml_parser::parser::parse_document(&tokens, &mut collector, &mut errors);
153
154    collector.events
155}
156
157struct BiDiCollector<'b> {
158    bidi_span: Option<(usize, usize)>,
159    bidi_spans: std::slice::Iter<'b, (usize, usize)>,
160    events: Vec<BiDiEvent>,
161}
162
163impl BiDiCollector<'_> {
164    fn process(&mut self, kind: EventKind, encoding: Option<Encoding>, span: Span) {
165        let mut event_bidi_spans = Vec::new();
166        while let Some(bidi_span) = self.bidi_span {
167            if bidi_span.0 < span.start() {
168                self.bidi_span = self.bidi_spans.next().copied();
169                continue;
170            } else if span.end() <= bidi_span.0 {
171                break;
172            }
173
174            event_bidi_spans.push(bidi_span);
175            self.bidi_span = self.bidi_spans.next().copied();
176        }
177
178        if !event_bidi_spans.is_empty() {
179            let token = Event::new_unchecked(kind, encoding, span);
180            self.events.push(BiDiEvent {
181                token,
182                bidi_spans: event_bidi_spans,
183            });
184        }
185    }
186}
187
188impl EventReceiver for BiDiCollector<'_> {
189    fn comment(&mut self, span: Span, _error: &mut dyn toml_parser::ErrorSink) {
190        self.process(EventKind::Comment, None, span)
191    }
192}