1use std::path::Path;
2
3use cargo_util_schemas::manifest::TomlToolLints;
4use cargo_util_terminal::report::AnnotationKind;
5use cargo_util_terminal::report::Group;
6use cargo_util_terminal::report::Level;
7use cargo_util_terminal::report::Patch;
8use cargo_util_terminal::report::Snippet;
9use toml_parser::Source;
10use toml_parser::Span;
11use toml_parser::decoder::Encoding;
12use toml_parser::parser::Event;
13use toml_parser::parser::EventKind;
14use toml_parser::parser::EventReceiver;
15
16use crate::CargoResult;
17use crate::GlobalContext;
18use crate::core::MaybePackage;
19use crate::lints::CORRECTNESS;
20use crate::lints::Lint;
21use crate::lints::LintLevel;
22use crate::lints::ManifestFor;
23use crate::lints::rel_cwd_manifest_path;
24
25pub static LINT: &Lint = &Lint {
26 name: "text_direction_codepoint_in_literal",
27 desc: "unicode codepoint changing visible direction of text present in literal",
28 primary_group: &CORRECTNESS,
29 msrv: Some(super::CARGO_LINTS_MSRV),
30 feature_gate: None,
31 docs: Some(
32 r#"
33### What it does
34Detects Unicode codepoints in literals in manifests that change the visual representation of text on screen
35in a way that does not correspond to their on memory representation.
36
37### Why it is bad
38Unicode allows changing the visual flow of text on screen
39in order to support scripts that are written right-to-left,
40but a specially crafted literal can make code that will be compiled appear to be part of a literal,
41depending on the software used to read the code.
42To avoid potential problems or confusion,
43such as in CVE-2021-42574,
44by default we deny their use.
45"#,
46 ),
47};
48
49pub fn text_direction_codepoint_in_literal(
50 manifest: ManifestFor<'_>,
51 manifest_path: &Path,
52 cargo_lints: &TomlToolLints,
53 error_count: &mut usize,
54 gctx: &GlobalContext,
55) -> CargoResult<()> {
56 let (lint_level, source) = manifest.lint_level(cargo_lints, LINT);
57 if lint_level == LintLevel::Allow {
58 return Ok(());
59 }
60
61 if matches!(
62 &manifest,
63 ManifestFor::Workspace {
64 maybe_pkg: MaybePackage::Package { .. },
65 ..
66 }
67 ) {
68 return Ok(());
70 }
71
72 let Some(contents) = manifest.contents() else {
73 return Ok(());
74 };
75
76 let bidi_spans = contents
77 .char_indices()
78 .filter(|(_i, c)| {
79 UNICODE_BIDI_CODEPOINTS
80 .iter()
81 .any(|(bidi, _, _name)| c == bidi)
82 })
83 .map(|(i, c)| (i, i + c.len_utf8()))
84 .collect::<Vec<_>>();
85 if bidi_spans.is_empty() {
86 return Ok(());
87 }
88
89 let toml_source = Source::new(contents);
90 let events = bidi_events(&toml_source, &bidi_spans);
91 let manifest_path = rel_cwd_manifest_path(manifest_path, gctx);
92 let mut emitted_source = None;
93 for event in events {
94 if lint_level.is_error() {
95 *error_count += 1;
96 }
97
98 let token_span = event.token.span();
99 let token_span = token_span.start()..token_span.end();
100 let mut snippet = Snippet::source(contents).path(&manifest_path).annotation(
101 AnnotationKind::Context
102 .span(token_span.clone())
103 .label("this literal contains an invisible unicode text flow control codepoint"),
104 );
105 for bidi_span in event.bidi_spans {
106 let bidi_span = bidi_span.0..bidi_span.1;
107 let escaped = format!("{:?}", &contents[bidi_span.clone()]);
108 snippet = snippet.annotation(AnnotationKind::Primary.span(bidi_span).label(escaped));
109 }
110 let mut help_snippet = Snippet::source(contents).path(&manifest_path);
111 if let Some(original_raw) = toml_source.get(&event.token) {
112 let mut decoded = String::new();
113 let replacement = match event.token.kind() {
114 toml_parser::parser::EventKind::SimpleKey => {
115 use toml_writer::ToTomlKey as _;
116 original_raw.decode_key(&mut decoded, &mut ());
117 let builder = toml_writer::TomlKeyBuilder::new(&decoded);
118 let replacement = builder.as_basic();
119 Some(replacement.to_toml_key())
120 }
121 toml_parser::parser::EventKind::Scalar => {
122 use toml_writer::ToTomlValue as _;
123 let kind = original_raw.decode_scalar(&mut decoded, &mut ());
124 if matches!(kind, toml_parser::decoder::ScalarKind::String) {
125 let builder = toml_writer::TomlStringBuilder::new(&decoded);
126 let replacement = match event.token.encoding() {
127 Some(toml_parser::decoder::Encoding::BasicString)
128 | Some(toml_parser::decoder::Encoding::LiteralString)
129 | None => builder.as_basic(),
130 Some(toml_parser::decoder::Encoding::MlBasicString)
131 | Some(toml_parser::decoder::Encoding::MlLiteralString) => {
132 builder.as_ml_basic()
133 }
134 };
135 Some(replacement.to_toml_value())
136 } else {
137 None
138 }
139 }
140 _ => None,
141 };
142 if let Some(mut replacement) = replacement {
143 for (bidi, escaped, _) in UNICODE_BIDI_CODEPOINTS {
144 replacement = replacement.replace(*bidi, escaped);
145 }
146 help_snippet = help_snippet.patch(Patch::new(token_span.clone(), replacement));
147 }
148 }
149
150 let level = lint_level.to_diagnostic_level();
151 let mut primary = Group::with_title(level.primary_title(LINT.desc)).element(snippet);
152 if emitted_source.is_none() {
153 emitted_source = Some(LINT.emitted_source(lint_level, source));
154 primary = primary.element(Level::NOTE.message(emitted_source.as_ref().unwrap()));
155 }
156
157 let help = Group::with_title(Level::HELP.secondary_title("if you want to keep them but make them visible in your source code, you can escape them")).element(help_snippet);
158
159 let report = [primary, help];
160 gctx.shell().print_report(&report, lint_level.force())?;
161 }
162
163 Ok(())
164}
165
166const UNICODE_BIDI_CODEPOINTS: &[(char, &str, &str)] = &[
167 ('\u{202A}', r"\u{202A}", "LEFT-TO-RIGHT EMBEDDING"),
168 ('\u{202B}', r"\u{202B}", "RIGHT-TO-LEFT EMBEDDING"),
169 ('\u{202C}', r"\u{202C}", "POP DIRECTIONAL FORMATTING"),
170 ('\u{202D}', r"\u{202D}", "LEFT-TO-RIGHT OVERRIDE"),
171 ('\u{202E}', r"\u{202E}", "RIGHT-TO-LEFT OVERRIDE"),
172 ('\u{2066}', r"\u{2066}", "LEFT-TO-RIGHT ISOLATE"),
173 ('\u{2067}', r"\u{2067}", "RIGHT-TO-LEFT ISOLATE"),
174 ('\u{2068}', r"\u{2068}", "FIRST STRONG ISOLATE"),
175 ('\u{2069}', r"\u{2069}", "POP DIRECTIONAL ISOLATE"),
176];
177
178struct BiDiEvent {
179 token: Event,
180 bidi_spans: Vec<(usize, usize)>,
181}
182
183fn bidi_events(source: &Source<'_>, bidi_spans: &[(usize, usize)]) -> Vec<BiDiEvent> {
184 let mut bidi_spans = bidi_spans.iter();
185 let bidi_span = bidi_spans.next().copied();
186
187 let tokens = source.lex().into_vec();
188 let mut collector = BiDiCollector {
189 bidi_span,
190 bidi_spans,
191 events: Vec::new(),
192 };
193 let mut errors = ();
194 toml_parser::parser::parse_document(&tokens, &mut collector, &mut errors);
195
196 collector.events
197}
198
199struct BiDiCollector<'b> {
200 bidi_span: Option<(usize, usize)>,
201 bidi_spans: std::slice::Iter<'b, (usize, usize)>,
202 events: Vec<BiDiEvent>,
203}
204
205impl BiDiCollector<'_> {
206 fn process(&mut self, kind: EventKind, encoding: Option<Encoding>, span: Span) {
207 let mut event_bidi_spans = Vec::new();
208 while let Some(bidi_span) = self.bidi_span {
209 if bidi_span.0 < span.start() {
210 self.bidi_span = self.bidi_spans.next().copied();
211 continue;
212 } else if span.end() <= bidi_span.0 {
213 break;
214 }
215
216 event_bidi_spans.push(bidi_span);
217 self.bidi_span = self.bidi_spans.next().copied();
218 }
219
220 if !event_bidi_spans.is_empty() {
221 let token = Event::new_unchecked(kind, encoding, span);
222 self.events.push(BiDiEvent {
223 token,
224 bidi_spans: event_bidi_spans,
225 });
226 }
227 }
228}
229
230impl EventReceiver for BiDiCollector<'_> {
231 fn simple_key(
232 &mut self,
233 span: Span,
234 encoding: Option<Encoding>,
235 _error: &mut dyn toml_parser::ErrorSink,
236 ) {
237 self.process(EventKind::SimpleKey, encoding, span)
238 }
239 fn scalar(
240 &mut self,
241 span: Span,
242 encoding: Option<Encoding>,
243 _error: &mut dyn toml_parser::ErrorSink,
244 ) {
245 self.process(EventKind::Scalar, encoding, span)
246 }
247}