Skip to main content

core/char/
methods.rs

1//! impl char {}
2
3use super::*;
4use crate::panic::const_panic;
5use crate::slice;
6use crate::str::from_utf8_unchecked_mut;
7use crate::ub_checks::assert_unsafe_precondition;
8use crate::unicode::{self, conversions};
9
10impl char {
11    /// The lowest valid code point a `char` can have, `'\0'`.
12    ///
13    /// Unlike integer types, `char` actually has a gap in the middle,
14    /// meaning that the range of possible `char`s is smaller than you
15    /// might expect. Ranges of `char` will automatically hop this gap
16    /// for you:
17    ///
18    /// ```
19    /// let dist = u32::from(char::MAX) - u32::from(char::MIN);
20    /// let size = (char::MIN..=char::MAX).count() as u32;
21    /// assert!(size < dist);
22    /// ```
23    ///
24    /// Despite this gap, the `MIN` and [`MAX`] values can be used as bounds for
25    /// all `char` values.
26    ///
27    /// [`MAX`]: char::MAX
28    ///
29    /// # Examples
30    ///
31    /// ```
32    /// # fn something_which_returns_char() -> char { 'a' }
33    /// let c: char = something_which_returns_char();
34    /// assert!(char::MIN <= c);
35    ///
36    /// let value_at_min = u32::from(char::MIN);
37    /// assert_eq!(char::from_u32(value_at_min), Some('\0'));
38    /// ```
39    #[stable(feature = "char_min", since = "1.83.0")]
40    pub const MIN: char = '\0';
41
42    /// The highest valid code point a `char` can have, `'\u{10FFFF}'`.
43    ///
44    /// Unlike integer types, `char` actually has a gap in the middle,
45    /// meaning that the range of possible `char`s is smaller than you
46    /// might expect. Ranges of `char` will automatically hop this gap
47    /// for you:
48    ///
49    /// ```
50    /// let dist = u32::from(char::MAX) - u32::from(char::MIN);
51    /// let size = (char::MIN..=char::MAX).count() as u32;
52    /// assert!(size < dist);
53    /// ```
54    ///
55    /// Despite this gap, the [`MIN`] and `MAX` values can be used as bounds for
56    /// all `char` values.
57    ///
58    /// [`MIN`]: char::MIN
59    ///
60    /// # Examples
61    ///
62    /// ```
63    /// # fn something_which_returns_char() -> char { 'a' }
64    /// let c: char = something_which_returns_char();
65    /// assert!(c <= char::MAX);
66    ///
67    /// let value_at_max = u32::from(char::MAX);
68    /// assert_eq!(char::from_u32(value_at_max), Some('\u{10FFFF}'));
69    /// assert_eq!(char::from_u32(value_at_max + 1), None);
70    /// ```
71    #[stable(feature = "assoc_char_consts", since = "1.52.0")]
72    pub const MAX: char = '\u{10FFFF}';
73
74    /// The maximum number of bytes required to [encode](char::encode_utf8) a `char` to
75    /// UTF-8 encoding.
76    #[stable(feature = "char_max_len_assoc", since = "1.93.0")]
77    pub const MAX_LEN_UTF8: usize = 4;
78
79    /// The maximum number of two-byte units required to [encode](char::encode_utf16) a `char`
80    /// to UTF-16 encoding.
81    #[stable(feature = "char_max_len_assoc", since = "1.93.0")]
82    pub const MAX_LEN_UTF16: usize = 2;
83
84    /// `U+FFFD REPLACEMENT CHARACTER` (�) is used in Unicode to represent a
85    /// decoding error.
86    ///
87    /// It can occur, for example, when giving ill-formed UTF-8 bytes to
88    /// [`String::from_utf8_lossy`](../std/string/struct.String.html#method.from_utf8_lossy).
89    #[stable(feature = "assoc_char_consts", since = "1.52.0")]
90    pub const REPLACEMENT_CHARACTER: char = '\u{FFFD}';
91
92    /// The version of [Unicode](https://www.unicode.org/) that the Unicode parts of
93    /// `char` and `str` methods are based on.
94    ///
95    /// New versions of Unicode are released regularly, and subsequently all methods
96    /// in the standard library depending on Unicode are updated. Therefore, the
97    /// behavior of some `char` and `str` methods, and the value of this constant,
98    /// change over time (within the boundaries of Unicode's [stability policies]).
99    /// This is *not* considered to be a breaking change.
100    ///
101    /// [stability policies]: https://www.unicode.org/policies/stability_policy.html
102    ///
103    /// The version numbering scheme is explained in
104    /// [Section 3.1 (Version Numbering)] of the Unicode Standard.
105    ///
106    /// [Section 3.1 (Version Numbering)]: https://www.unicode.org/versions/latest/core-spec/chapter-3/#G49512
107    #[stable(feature = "assoc_char_consts", since = "1.52.0")]
108    pub const UNICODE_VERSION: (u8, u8, u8) = crate::unicode::UNICODE_VERSION;
109
110    /// Creates an iterator over the native endian UTF-16 encoded code points in `iter`,
111    /// returning unpaired surrogates as `Err`s.
112    ///
113    /// # Examples
114    ///
115    /// Basic usage:
116    ///
117    /// ```
118    /// // 𝄞mus<invalid>ic<invalid>
119    /// let v = [
120    ///     0xD834, 0xDD1E, 0x006d, 0x0075, 0x0073, 0xDD1E, 0x0069, 0x0063, 0xD834,
121    /// ];
122    ///
123    /// assert_eq!(
124    ///     char::decode_utf16(v)
125    ///         .map(|r| r.map_err(|e| e.unpaired_surrogate()))
126    ///         .collect::<Vec<_>>(),
127    ///     vec![
128    ///         Ok('𝄞'),
129    ///         Ok('m'), Ok('u'), Ok('s'),
130    ///         Err(0xDD1E),
131    ///         Ok('i'), Ok('c'),
132    ///         Err(0xD834)
133    ///     ]
134    /// );
135    /// ```
136    ///
137    /// A lossy decoder can be obtained by replacing `Err` results with the replacement character:
138    ///
139    /// ```
140    /// // 𝄞mus<invalid>ic<invalid>
141    /// let v = [
142    ///     0xD834, 0xDD1E, 0x006d, 0x0075, 0x0073, 0xDD1E, 0x0069, 0x0063, 0xD834,
143    /// ];
144    ///
145    /// assert_eq!(
146    ///     char::decode_utf16(v)
147    ///        .map(|r| r.unwrap_or(char::REPLACEMENT_CHARACTER))
148    ///        .collect::<String>(),
149    ///     "𝄞mus�ic�"
150    /// );
151    /// ```
152    #[stable(feature = "assoc_char_funcs", since = "1.52.0")]
153    #[inline]
154    pub fn decode_utf16<I: IntoIterator<Item = u16>>(iter: I) -> DecodeUtf16<I::IntoIter> {
155        super::decode::decode_utf16(iter)
156    }
157
158    /// Converts a `u32` to a `char`.
159    ///
160    /// Note that all `char`s are valid [`u32`]s, and can be cast to one with
161    /// [`as`](../std/keyword.as.html):
162    ///
163    /// ```
164    /// let c = '💯';
165    /// let i = c as u32;
166    ///
167    /// assert_eq!(128175, i);
168    /// ```
169    ///
170    /// However, the reverse is not true: not all valid [`u32`]s are valid
171    /// `char`s. `from_u32()` will return `None` if the input is not a valid value
172    /// for a `char`.
173    ///
174    /// For an unsafe version of this function which ignores these checks, see
175    /// [`from_u32_unchecked`].
176    ///
177    /// [`from_u32_unchecked`]: #method.from_u32_unchecked
178    ///
179    /// # Examples
180    ///
181    /// Basic usage:
182    ///
183    /// ```
184    /// let c = char::from_u32(0x2764);
185    ///
186    /// assert_eq!(Some('❤'), c);
187    /// ```
188    ///
189    /// Returning `None` when the input is not a valid `char`:
190    ///
191    /// ```
192    /// let c = char::from_u32(0x110000);
193    ///
194    /// assert_eq!(None, c);
195    /// ```
196    #[stable(feature = "assoc_char_funcs", since = "1.52.0")]
197    #[rustc_const_stable(feature = "const_char_convert", since = "1.67.0")]
198    #[must_use]
199    #[inline]
200    pub const fn from_u32(i: u32) -> Option<char> {
201        super::convert::from_u32(i)
202    }
203
204    /// Converts a `u32` to a `char`, ignoring validity.
205    ///
206    /// Note that all `char`s are valid [`u32`]s, and can be cast to one with
207    /// `as`:
208    ///
209    /// ```
210    /// let c = '💯';
211    /// let i = c as u32;
212    ///
213    /// assert_eq!(128175, i);
214    /// ```
215    ///
216    /// However, the reverse is not true: not all valid [`u32`]s are valid
217    /// `char`s. `from_u32_unchecked()` will ignore this, and blindly cast to
218    /// `char`, possibly creating an invalid one.
219    ///
220    /// # Safety
221    ///
222    /// This function is unsafe, as it may construct invalid `char` values.
223    ///
224    /// For a safe version of this function, see the [`from_u32`] function.
225    ///
226    /// [`from_u32`]: #method.from_u32
227    ///
228    /// # Examples
229    ///
230    /// Basic usage:
231    ///
232    /// ```
233    /// let c = unsafe { char::from_u32_unchecked(0x2764) };
234    ///
235    /// assert_eq!('❤', c);
236    /// ```
237    #[stable(feature = "assoc_char_funcs", since = "1.52.0")]
238    #[rustc_const_stable(feature = "const_char_from_u32_unchecked", since = "1.81.0")]
239    #[must_use]
240    #[inline]
241    pub const unsafe fn from_u32_unchecked(i: u32) -> char {
242        // SAFETY: the safety contract must be upheld by the caller.
243        unsafe { super::convert::from_u32_unchecked(i) }
244    }
245
246    /// Converts a digit in the given radix to a `char`.
247    ///
248    /// A 'radix' here is sometimes also called a 'base'. A radix of two
249    /// indicates a binary number, a radix of ten, decimal, and a radix of
250    /// sixteen, hexadecimal, to give some common values. Arbitrary
251    /// radices are supported.
252    ///
253    /// `from_digit()` will return `None` if the input is not a digit in
254    /// the given radix.
255    ///
256    /// # Panics
257    ///
258    /// Panics if given a radix larger than 36.
259    ///
260    /// # Examples
261    ///
262    /// Basic usage:
263    ///
264    /// ```
265    /// let c = char::from_digit(4, 10);
266    ///
267    /// assert_eq!(Some('4'), c);
268    ///
269    /// // Decimal 11 is a single digit in base 16
270    /// let c = char::from_digit(11, 16);
271    ///
272    /// assert_eq!(Some('b'), c);
273    /// ```
274    ///
275    /// Returning `None` when the input is not a digit:
276    ///
277    /// ```
278    /// let c = char::from_digit(20, 10);
279    ///
280    /// assert_eq!(None, c);
281    /// ```
282    ///
283    /// Passing a large radix, causing a panic:
284    ///
285    /// ```should_panic
286    /// // this panics
287    /// let _c = char::from_digit(1, 37);
288    /// ```
289    #[stable(feature = "assoc_char_funcs", since = "1.52.0")]
290    #[rustc_const_stable(feature = "const_char_convert", since = "1.67.0")]
291    #[must_use]
292    #[inline]
293    pub const fn from_digit(num: u32, radix: u32) -> Option<char> {
294        super::convert::from_digit(num, radix)
295    }
296
297    /// Checks if a `char` is a digit in the given radix.
298    ///
299    /// A 'radix' here is sometimes also called a 'base'. A radix of two
300    /// indicates a binary number, a radix of ten, decimal, and a radix of
301    /// sixteen, hexadecimal, to give some common values. Arbitrary
302    /// radices are supported.
303    ///
304    /// Compared to [`is_numeric()`], this function only recognizes the characters
305    /// `0-9`, `a-z` and `A-Z`.
306    ///
307    /// 'Digit' is defined to be only the following characters:
308    ///
309    /// * `0-9`
310    /// * `a-z`
311    /// * `A-Z`
312    ///
313    /// For a more comprehensive understanding of 'digit', see [`is_numeric()`].
314    ///
315    /// [`is_numeric()`]: #method.is_numeric
316    ///
317    /// # Panics
318    ///
319    /// Panics if given a radix smaller than 2 or larger than 36.
320    ///
321    /// # Examples
322    ///
323    /// Basic usage:
324    ///
325    /// ```
326    /// assert!('1'.is_digit(10));
327    /// assert!('f'.is_digit(16));
328    /// assert!(!'f'.is_digit(10));
329    /// ```
330    ///
331    /// Passing a large radix, causing a panic:
332    ///
333    /// ```should_panic
334    /// // this panics
335    /// '1'.is_digit(37);
336    /// ```
337    ///
338    /// Passing a small radix, causing a panic:
339    ///
340    /// ```should_panic
341    /// // this panics
342    /// '1'.is_digit(1);
343    /// ```
344    #[stable(feature = "rust1", since = "1.0.0")]
345    #[rustc_const_stable(feature = "const_char_classify", since = "1.87.0")]
346    #[inline]
347    pub const fn is_digit(self, radix: u32) -> bool {
348        self.to_digit(radix).is_some()
349    }
350
351    /// Converts a `char` to a digit in the given radix.
352    ///
353    /// A 'radix' here is sometimes also called a 'base'. A radix of two
354    /// indicates a binary number, a radix of ten, decimal, and a radix of
355    /// sixteen, hexadecimal, to give some common values. Arbitrary
356    /// radices are supported.
357    ///
358    /// 'Digit' is defined to be only the following characters:
359    ///
360    /// * `0-9`
361    /// * `a-z`
362    /// * `A-Z`
363    ///
364    /// # Errors
365    ///
366    /// Returns `None` if the `char` does not refer to a digit in the given radix.
367    ///
368    /// # Panics
369    ///
370    /// Panics if given a radix smaller than 2 or larger than 36.
371    ///
372    /// # Examples
373    ///
374    /// Basic usage:
375    ///
376    /// ```
377    /// assert_eq!('1'.to_digit(10), Some(1));
378    /// assert_eq!('f'.to_digit(16), Some(15));
379    /// ```
380    ///
381    /// Passing a non-digit results in failure:
382    ///
383    /// ```
384    /// assert_eq!('f'.to_digit(10), None);
385    /// assert_eq!('z'.to_digit(16), None);
386    /// ```
387    ///
388    /// Passing a large radix, causing a panic:
389    ///
390    /// ```should_panic
391    /// // this panics
392    /// let _ = '1'.to_digit(37);
393    /// ```
394    /// Passing a small radix, causing a panic:
395    ///
396    /// ```should_panic
397    /// // this panics
398    /// let _ = '1'.to_digit(1);
399    /// ```
400    #[stable(feature = "rust1", since = "1.0.0")]
401    #[rustc_const_stable(feature = "const_char_convert", since = "1.67.0")]
402    #[rustc_diagnostic_item = "char_to_digit"]
403    #[must_use = "this returns the result of the operation, \
404                  without modifying the original"]
405    #[inline]
406    pub const fn to_digit(self, radix: u32) -> Option<u32> {
407        assert!(
408            radix >= 2 && radix <= 36,
409            "to_digit: invalid radix -- radix must be in the range 2 to 36 inclusive"
410        );
411        // check radix to remove letter handling code when radix is a known constant
412        let value = if self > '9' && radix > 10 {
413            // mask to convert ASCII letters to uppercase
414            const TO_UPPERCASE_MASK: u32 = !0b0010_0000;
415            // Converts an ASCII letter to its corresponding integer value:
416            // A-Z => 10-35, a-z => 10-35. Other characters produce values >= 36.
417            //
418            // Add Overflow Safety:
419            // By applying the mask after the subtraction, the first addendum is
420            // constrained such that it never exceeds u32::MAX - 0x20.
421            ((self as u32).wrapping_sub('A' as u32) & TO_UPPERCASE_MASK) + 10
422        } else {
423            // convert digit to value, non-digits wrap to values > 36
424            (self as u32).wrapping_sub('0' as u32)
425        };
426        // FIXME(const-hack): once then_some is const fn, use it here
427        if value < radix { Some(value) } else { None }
428    }
429
430    /// Returns an iterator that yields the hexadecimal Unicode escape of a
431    /// character as `char`s.
432    ///
433    /// This will escape characters with the Rust syntax of the form
434    /// `\u{NNNNNN}` where `NNNNNN` is a hexadecimal representation.
435    ///
436    /// # Examples
437    ///
438    /// As an iterator:
439    ///
440    /// ```
441    /// for c in '❤'.escape_unicode() {
442    ///     print!("{c}");
443    /// }
444    /// println!();
445    /// ```
446    ///
447    /// Using `println!` directly:
448    ///
449    /// ```
450    /// println!("{}", '❤'.escape_unicode());
451    /// ```
452    ///
453    /// Both are equivalent to:
454    ///
455    /// ```
456    /// println!("\\u{{2764}}");
457    /// ```
458    ///
459    /// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string):
460    ///
461    /// ```
462    /// assert_eq!('❤'.escape_unicode().to_string(), "\\u{2764}");
463    /// ```
464    #[must_use = "this returns the escaped char as an iterator, \
465                  without modifying the original"]
466    #[stable(feature = "rust1", since = "1.0.0")]
467    #[inline]
468    pub fn escape_unicode(self) -> EscapeUnicode {
469        EscapeUnicode::new(self)
470    }
471
472    /// An extended version of `escape_debug` that optionally permits escaping
473    /// Extended Grapheme codepoints, single quotes, and double quotes. This
474    /// allows us to format characters like nonspacing marks better when they're
475    /// at the start of a string, and allows escaping single quotes in
476    /// characters, and double quotes in strings.
477    #[inline]
478    pub(crate) fn escape_debug_ext(self, args: EscapeDebugExtArgs) -> EscapeDebug {
479        match self {
480            // Special escapes
481            '\"' if args.escape_double_quote => EscapeDebug::backslash(ascii::Char::QuotationMark),
482            '\'' if args.escape_single_quote => EscapeDebug::backslash(ascii::Char::Apostrophe),
483            '\\' => EscapeDebug::backslash(ascii::Char::ReverseSolidus),
484            '\n' => EscapeDebug::backslash(ascii::Char::SmallN),
485            '\t' => EscapeDebug::backslash(ascii::Char::SmallT),
486            '\r' => EscapeDebug::backslash(ascii::Char::SmallR),
487            '\0' => EscapeDebug::backslash(ascii::Char::Digit0),
488
489            // ASCII fast path
490            '\x20'..='\x7E' => EscapeDebug::printable(self),
491
492            _ if self.is_control()
493                || self.is_private_use()
494                || self.is_whitespace()
495                || args.escape_grapheme_extender && self.is_grapheme_extender()
496                || self.is_default_ignorable()
497                || self.is_format_control()
498                || self.is_unassigned() =>
499            {
500                EscapeDebug::unicode(self)
501            }
502
503            _ => EscapeDebug::printable(self),
504        }
505    }
506
507    /// Returns an iterator that yields the literal escape code of a character
508    /// as `char`s.
509    ///
510    /// This will escape the characters similar to the [`Debug`](core::fmt::Debug) implementations
511    /// of `str` or `char`.
512    ///
513    /// # Examples
514    ///
515    /// As an iterator:
516    ///
517    /// ```
518    /// for c in '\n'.escape_debug() {
519    ///     print!("{c}");
520    /// }
521    /// println!();
522    /// ```
523    ///
524    /// Using `println!` directly:
525    ///
526    /// ```
527    /// println!("{}", '\n'.escape_debug());
528    /// ```
529    ///
530    /// Both are equivalent to:
531    ///
532    /// ```
533    /// println!("\\n");
534    /// ```
535    ///
536    /// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string):
537    ///
538    /// ```
539    /// assert_eq!('\n'.escape_debug().to_string(), "\\n");
540    /// ```
541    #[must_use = "this returns the escaped char as an iterator, \
542                  without modifying the original"]
543    #[stable(feature = "char_escape_debug", since = "1.20.0")]
544    #[inline]
545    pub fn escape_debug(self) -> EscapeDebug {
546        self.escape_debug_ext(EscapeDebugExtArgs::ESCAPE_ALL)
547    }
548
549    /// Returns an iterator that yields the literal escape code of a character
550    /// as `char`s.
551    ///
552    /// The default is chosen with a bias toward producing literals that are
553    /// legal in a variety of languages, including C++11 and similar C-family
554    /// languages. The exact rules are:
555    ///
556    /// * Tab is escaped as `\t`.
557    /// * Carriage return is escaped as `\r`.
558    /// * Line feed is escaped as `\n`.
559    /// * Single quote is escaped as `\'`.
560    /// * Double quote is escaped as `\"`.
561    /// * Backslash is escaped as `\\`.
562    /// * Any character in the 'printable ASCII' range `0x20` .. `0x7e`
563    ///   inclusive is not escaped.
564    /// * All other characters are given hexadecimal Unicode escapes; see
565    ///   [`escape_unicode`].
566    ///
567    /// [`escape_unicode`]: #method.escape_unicode
568    ///
569    /// # Examples
570    ///
571    /// As an iterator:
572    ///
573    /// ```
574    /// for c in '"'.escape_default() {
575    ///     print!("{c}");
576    /// }
577    /// println!();
578    /// ```
579    ///
580    /// Using `println!` directly:
581    ///
582    /// ```
583    /// println!("{}", '"'.escape_default());
584    /// ```
585    ///
586    /// Both are equivalent to:
587    ///
588    /// ```
589    /// println!("\\\"");
590    /// ```
591    ///
592    /// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string):
593    ///
594    /// ```
595    /// assert_eq!('"'.escape_default().to_string(), "\\\"");
596    /// ```
597    #[must_use = "this returns the escaped char as an iterator, \
598                  without modifying the original"]
599    #[stable(feature = "rust1", since = "1.0.0")]
600    #[inline]
601    pub fn escape_default(self) -> EscapeDefault {
602        match self {
603            '\t' => EscapeDefault::backslash(ascii::Char::SmallT),
604            '\r' => EscapeDefault::backslash(ascii::Char::SmallR),
605            '\n' => EscapeDefault::backslash(ascii::Char::SmallN),
606            '\\' | '\'' | '\"' => EscapeDefault::backslash(self.as_ascii().unwrap()),
607            '\x20'..='\x7e' => EscapeDefault::printable(self.as_ascii().unwrap()),
608            _ => EscapeDefault::unicode(self),
609        }
610    }
611
612    /// Returns the number of bytes this `char` would need if encoded in UTF-8.
613    ///
614    /// That number of bytes is always between 1 and 4, inclusive.
615    ///
616    /// # Examples
617    ///
618    /// Basic usage:
619    ///
620    /// ```
621    /// let len = 'A'.len_utf8();
622    /// assert_eq!(len, 1);
623    ///
624    /// let len = 'ß'.len_utf8();
625    /// assert_eq!(len, 2);
626    ///
627    /// let len = 'ℝ'.len_utf8();
628    /// assert_eq!(len, 3);
629    ///
630    /// let len = '💣'.len_utf8();
631    /// assert_eq!(len, 4);
632    /// ```
633    ///
634    /// The `&str` type guarantees that its contents are UTF-8, and so we can compare the length it
635    /// would take if each code point was represented as a `char` vs in the `&str` itself:
636    ///
637    /// ```
638    /// // as chars
639    /// let eastern = '東';
640    /// let capital = '京';
641    ///
642    /// // both can be represented as three bytes
643    /// assert_eq!(3, eastern.len_utf8());
644    /// assert_eq!(3, capital.len_utf8());
645    ///
646    /// // as a &str, these two are encoded in UTF-8
647    /// let tokyo = "東京";
648    ///
649    /// let len = eastern.len_utf8() + capital.len_utf8();
650    ///
651    /// // we can see that they take six bytes total...
652    /// assert_eq!(6, tokyo.len());
653    ///
654    /// // ... just like the &str
655    /// assert_eq!(len, tokyo.len());
656    /// ```
657    #[stable(feature = "rust1", since = "1.0.0")]
658    #[rustc_const_stable(feature = "const_char_len_utf", since = "1.52.0")]
659    #[inline]
660    #[must_use]
661    pub const fn len_utf8(self) -> usize {
662        len_utf8(self as u32)
663    }
664
665    /// Returns the number of 16-bit code units this `char` would need if
666    /// encoded in UTF-16.
667    ///
668    /// That number of code units is always either 1 or 2, for unicode scalar values in
669    /// the [basic multilingual plane] or [supplementary planes] respectively.
670    ///
671    /// See the documentation for [`len_utf8()`] for more explanation of this
672    /// concept. This function is a mirror, but for UTF-16 instead of UTF-8.
673    ///
674    /// [basic multilingual plane]: http://www.unicode.org/glossary/#basic_multilingual_plane
675    /// [supplementary planes]: http://www.unicode.org/glossary/#supplementary_planes
676    /// [`len_utf8()`]: #method.len_utf8
677    ///
678    /// # Examples
679    ///
680    /// Basic usage:
681    ///
682    /// ```
683    /// let n = 'ß'.len_utf16();
684    /// assert_eq!(n, 1);
685    ///
686    /// let len = '💣'.len_utf16();
687    /// assert_eq!(len, 2);
688    /// ```
689    #[stable(feature = "rust1", since = "1.0.0")]
690    #[rustc_const_stable(feature = "const_char_len_utf", since = "1.52.0")]
691    #[inline]
692    #[must_use]
693    pub const fn len_utf16(self) -> usize {
694        len_utf16(self as u32)
695    }
696
697    /// Encodes this character as UTF-8 into the provided byte buffer,
698    /// and then returns the subslice of the buffer that contains the encoded character.
699    ///
700    /// # Panics
701    ///
702    /// Panics if the buffer is not large enough.
703    /// A buffer of length four is large enough to encode any `char`.
704    ///
705    /// # Examples
706    ///
707    /// In both of these examples, 'ß' takes two bytes to encode.
708    ///
709    /// ```
710    /// let mut b = [0; 2];
711    ///
712    /// let result = 'ß'.encode_utf8(&mut b);
713    ///
714    /// assert_eq!(result, "ß");
715    ///
716    /// assert_eq!(result.len(), 2);
717    /// ```
718    ///
719    /// A buffer that's too small:
720    ///
721    /// ```should_panic
722    /// let mut b = [0; 1];
723    ///
724    /// // this panics
725    /// 'ß'.encode_utf8(&mut b);
726    /// ```
727    #[stable(feature = "unicode_encode_char", since = "1.15.0")]
728    #[rustc_const_stable(feature = "const_char_encode_utf8", since = "1.83.0")]
729    #[inline]
730    pub const fn encode_utf8(self, dst: &mut [u8]) -> &mut str {
731        // SAFETY: `char` is not a surrogate, so this is valid UTF-8.
732        unsafe { from_utf8_unchecked_mut(encode_utf8_raw(self as u32, dst)) }
733    }
734
735    /// Encodes this character as native endian UTF-16 into the provided `u16` buffer,
736    /// and then returns the subslice of the buffer that contains the encoded character.
737    ///
738    /// # Panics
739    ///
740    /// Panics if the buffer is not large enough.
741    /// A buffer of length 2 is large enough to encode any `char`.
742    ///
743    /// # Examples
744    ///
745    /// In both of these examples, '𝕊' takes two `u16`s to encode.
746    ///
747    /// ```
748    /// let mut b = [0; 2];
749    ///
750    /// let result = '𝕊'.encode_utf16(&mut b);
751    ///
752    /// assert_eq!(result.len(), 2);
753    /// ```
754    ///
755    /// A buffer that's too small:
756    ///
757    /// ```should_panic
758    /// let mut b = [0; 1];
759    ///
760    /// // this panics
761    /// '𝕊'.encode_utf16(&mut b);
762    /// ```
763    #[stable(feature = "unicode_encode_char", since = "1.15.0")]
764    #[rustc_const_stable(feature = "const_char_encode_utf16", since = "1.84.0")]
765    #[inline]
766    pub const fn encode_utf16(self, dst: &mut [u16]) -> &mut [u16] {
767        encode_utf16_raw(self as u32, dst)
768    }
769
770    /// Returns `true` if this `char` has the `Alphabetic` property.
771    ///
772    /// `Alphabetic` is [described] in Chapter 4 (Character Properties) of the Unicode Standard, and
773    /// [specified] in the Unicode Character Database [`DerivedCoreProperties.txt`].
774    ///
775    /// [described]: https://www.unicode.org/versions/latest/core-spec/chapter-4/#G32524
776    /// [specified]: https://www.unicode.org/reports/tr44/#Alphabetic
777    /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
778    ///
779    /// # Examples
780    ///
781    /// Basic usage:
782    ///
783    /// ```
784    /// assert!('a'.is_alphabetic());
785    /// assert!('京'.is_alphabetic());
786    ///
787    /// let c = '💝';
788    /// // love is many things, but it is not alphabetic
789    /// assert!(!c.is_alphabetic());
790    /// ```
791    #[must_use]
792    #[stable(feature = "rust1", since = "1.0.0")]
793    #[inline]
794    pub fn is_alphabetic(self) -> bool {
795        match self {
796            'a'..='z' | 'A'..='Z' => true,
797            '\0'..='\u{A9}' => false,
798            _ => unicode::Alphabetic(self),
799        }
800    }
801
802    /// Returns `true` if this `char` has the `Cased` property.
803    /// A character is cased if and only if it is uppercase, lowercase, or titlecase.
804    ///
805    /// `Cased` is [described] in Chapter 3 (Character Properties) of the Unicode Standard and
806    /// [specified] in the Unicode Character Database [`DerivedCoreProperties.txt`].
807    ///
808    /// [described]: https://www.unicode.org/versions/latest/core-spec/chapter-3/#G44595
809    /// [specified]: https://www.unicode.org/reports/tr44/#Cased
810    /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
811    ///
812    /// # Examples
813    ///
814    /// Basic usage:
815    ///
816    /// ```
817    /// #![feature(titlecase)]
818    /// assert!('A'.is_cased());
819    /// assert!('a'.is_cased());
820    /// assert!(!'京'.is_cased());
821    /// ```
822    #[must_use]
823    #[unstable(feature = "titlecase", issue = "153892")]
824    #[inline]
825    pub fn is_cased(self) -> bool {
826        match self {
827            'a'..='z' | 'A'..='Z' => true,
828            '\0'..='\u{A9}' => false,
829            _ => unicode::Lowercase(self) || unicode::Uppercase(self) || unicode::Lt(self),
830        }
831    }
832
833    /// Returns the case of this character:
834    /// [`Some(CharCase::Upper)`][`CharCase::Upper`] if [`self.is_uppercase()`][`char::is_uppercase`],
835    /// [`Some(CharCase::Lower)`][`CharCase::Lower`] if [`self.is_lowercase()`][`char::is_lowercase`],
836    /// [`Some(CharCase::Title)`][`CharCase::Title`] if [`self.is_titlecase()`][`char::is_titlecase`], and
837    /// `None` if [`!self.is_cased()`][`char::is_cased`].
838    ///
839    /// # Examples
840    ///
841    /// ```
842    /// #![feature(titlecase)]
843    /// use core::char::CharCase;
844    /// assert_eq!('a'.case(), Some(CharCase::Lower));
845    /// assert_eq!('δ'.case(), Some(CharCase::Lower));
846    /// assert_eq!('A'.case(), Some(CharCase::Upper));
847    /// assert_eq!('Δ'.case(), Some(CharCase::Upper));
848    /// assert_eq!('Dž'.case(), Some(CharCase::Title));
849    /// assert_eq!('中'.case(), None);
850    /// ```
851    #[must_use]
852    #[unstable(feature = "titlecase", issue = "153892")]
853    #[inline]
854    pub fn case(self) -> Option<CharCase> {
855        match self {
856            'a'..='z' => Some(CharCase::Lower),
857            'A'..='Z' => Some(CharCase::Upper),
858            '\0'..='\u{A9}' => None,
859            _ if unicode::Lowercase(self) => Some(CharCase::Lower),
860            _ if unicode::Uppercase(self) => Some(CharCase::Upper),
861            _ if unicode::Lt(self) => Some(CharCase::Title),
862            _ => None,
863        }
864    }
865
866    /// Returns `true` if this `char` has the `Lowercase` property.
867    ///
868    /// `Lowercase` is [described] in Chapter 4 (Character Properties) of the Unicode Standard, and
869    /// [specified] in the Unicode Character Database [`DerivedCoreProperties.txt`].
870    ///
871    /// [described]: https://www.unicode.org/versions/latest/core-spec/chapter-4/#G136255
872    /// [specified]: https://www.unicode.org/reports/tr44/#Lowercase
873    /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
874    ///
875    /// # Examples
876    ///
877    /// Basic usage:
878    ///
879    /// ```
880    /// assert!('a'.is_lowercase());
881    /// assert!('δ'.is_lowercase());
882    /// assert!(!'A'.is_lowercase());
883    /// assert!(!'Δ'.is_lowercase());
884    ///
885    /// // The various Chinese scripts and punctuation do not have case, and so:
886    /// assert!(!'中'.is_lowercase());
887    /// assert!(!' '.is_lowercase());
888    /// ```
889    ///
890    /// In a const context:
891    ///
892    /// ```
893    /// const CAPITAL_DELTA_IS_LOWERCASE: bool = 'Δ'.is_lowercase();
894    /// assert!(!CAPITAL_DELTA_IS_LOWERCASE);
895    /// ```
896    #[must_use]
897    #[stable(feature = "rust1", since = "1.0.0")]
898    #[rustc_const_stable(feature = "const_unicode_case_lookup", since = "1.84.0")]
899    #[inline]
900    pub const fn is_lowercase(self) -> bool {
901        match self {
902            'a'..='z' => true,
903            '\0'..='\u{A9}' => false,
904            _ => unicode::Lowercase(self),
905        }
906    }
907
908    /// Returns `true` if this `char` is in the general category for titlecase letters.
909    /// Conceptually, these characters consist of an uppercase portion followed by a lowercase portion.
910    ///
911    /// Titlecase letters (code points with the general category of `Lt`) are [described] in Chapter 4
912    /// (Character Properties) of the Unicode Standard, and [specified] in the Unicode Character
913    /// Database [`UnicodeData.txt`].
914    ///
915    /// [described]: https://www.unicode.org/versions/latest/core-spec/chapter-4/#G124722
916    /// [specified]: https://www.unicode.org/reports/tr44/#GC_Values_Table
917    /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
918    ///
919    /// # Examples
920    ///
921    /// Basic usage:
922    ///
923    /// ```
924    /// #![feature(titlecase)]
925    /// assert!('Dž'.is_titlecase());
926    /// assert!('ῼ'.is_titlecase());
927    /// assert!(!'D'.is_titlecase());
928    /// assert!(!'z'.is_titlecase());
929    /// assert!(!'中'.is_titlecase());
930    /// assert!(!' '.is_titlecase());
931    /// ```
932    #[must_use]
933    #[unstable(feature = "titlecase", issue = "153892")]
934    #[inline]
935    pub fn is_titlecase(self) -> bool {
936        match self {
937            '\0'..='\u{01C4}' => false,
938            _ => unicode::Lt(self),
939        }
940    }
941
942    /// Returns `true` if this `char` has the `Uppercase` property.
943    ///
944    /// `Uppercase` is [described] in Chapter 4 (Character Properties) of the Unicode Standard, and
945    /// [specified] in the Unicode Character Database [`DerivedCoreProperties.txt`].
946    ///
947    /// [described]: https://www.unicode.org/versions/latest/core-spec/chapter-4/#G136255
948    /// [specified]: https://www.unicode.org/reports/tr44/#Uppercase
949    /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
950    ///
951    /// # Examples
952    ///
953    /// Basic usage:
954    ///
955    /// ```
956    /// assert!(!'a'.is_uppercase());
957    /// assert!(!'δ'.is_uppercase());
958    /// assert!('A'.is_uppercase());
959    /// assert!('Δ'.is_uppercase());
960    ///
961    /// // The various Chinese scripts and punctuation do not have case, and so:
962    /// assert!(!'中'.is_uppercase());
963    /// assert!(!' '.is_uppercase());
964    /// ```
965    ///
966    /// In a const context:
967    ///
968    /// ```
969    /// const CAPITAL_DELTA_IS_UPPERCASE: bool = 'Δ'.is_uppercase();
970    /// assert!(CAPITAL_DELTA_IS_UPPERCASE);
971    /// ```
972    #[must_use]
973    #[stable(feature = "rust1", since = "1.0.0")]
974    #[rustc_const_stable(feature = "const_unicode_case_lookup", since = "1.84.0")]
975    #[inline]
976    pub const fn is_uppercase(self) -> bool {
977        match self {
978            'A'..='Z' => true,
979            '\0'..='\u{BF}' => false,
980            _ => unicode::Uppercase(self),
981        }
982    }
983
984    /// Returns `true` if this `char` has one of the general categories for numbers.
985    ///
986    /// The general categories for numbers (`Nd` for decimal digits, `Nl` for letter-like numeric
987    /// characters, and `No` for other numeric characters) are [specified] in the Unicode Character
988    /// Database [`UnicodeData.txt`].
989    ///
990    /// This method doesn't cover everything that could be considered a number, e.g. ideographic numbers like '三'.
991    /// If you want everything including characters with overlapping purposes, then you might want to use
992    /// a Unicode or language-processing library that exposes the appropriate character properties
993    /// (e.g. [`Numeric_Type`]) instead of looking at the Unicode categories.
994    ///
995    /// If you want to parse ASCII decimal digits (0-9) or ASCII base-N, use
996    /// `is_ascii_digit` or `is_digit` instead.
997    ///
998    /// [specified]: https://www.unicode.org/reports/tr44/#GC_Values_Table
999    /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
1000    /// [`Numeric_Type`]: https://www.unicode.org/reports/tr44/#Numeric_Type
1001    ///
1002    /// # Examples
1003    ///
1004    /// Basic usage:
1005    ///
1006    /// ```
1007    /// assert!('٣'.is_numeric());
1008    /// assert!('7'.is_numeric());
1009    /// assert!('৬'.is_numeric());
1010    /// assert!('¾'.is_numeric());
1011    /// assert!('①'.is_numeric());
1012    /// assert!(!'K'.is_numeric());
1013    /// assert!(!'و'.is_numeric());
1014    /// assert!(!'藏'.is_numeric());
1015    /// assert!(!'三'.is_numeric());
1016    /// ```
1017    #[must_use]
1018    #[stable(feature = "rust1", since = "1.0.0")]
1019    #[inline]
1020    pub fn is_numeric(self) -> bool {
1021        match self {
1022            '0'..='9' => true,
1023            '\0'..='\u{B1}' => false,
1024            _ => unicode::N(self),
1025        }
1026    }
1027
1028    /// Returns `true` if this `char` satisfies either [`is_alphabetic()`] or [`is_numeric()`].
1029    ///
1030    /// [`is_alphabetic()`]: Self::is_alphabetic
1031    /// [`is_numeric()`]: Self::is_numeric
1032    ///
1033    /// # Examples
1034    ///
1035    /// Basic usage:
1036    ///
1037    /// ```
1038    /// assert!('٣'.is_alphanumeric());
1039    /// assert!('7'.is_alphanumeric());
1040    /// assert!('৬'.is_alphanumeric());
1041    /// assert!('¾'.is_alphanumeric());
1042    /// assert!('①'.is_alphanumeric());
1043    /// assert!('K'.is_alphanumeric());
1044    /// assert!('و'.is_alphanumeric());
1045    /// assert!('藏'.is_alphanumeric());
1046    /// ```
1047    #[must_use]
1048    #[stable(feature = "rust1", since = "1.0.0")]
1049    #[inline]
1050    pub fn is_alphanumeric(self) -> bool {
1051        match self {
1052            'a'..='z' | 'A'..='Z' | '0'..='9' => true,
1053            '\0'..='\u{A9}' => false,
1054            _ => unicode::Alphabetic(self) || unicode::N(self),
1055        }
1056    }
1057
1058    /// Returns `true` if this `char` has the `White_Space` property.
1059    ///
1060    /// `White_Space` is [specified] in the Unicode Character Database [`PropList.txt`].
1061    ///
1062    /// [specified]: https://www.unicode.org/reports/tr44/#White_Space
1063    /// [`PropList.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/PropList.txt
1064    ///
1065    /// # Examples
1066    ///
1067    /// Basic usage:
1068    ///
1069    /// ```
1070    /// assert!(' '.is_whitespace());
1071    ///
1072    /// // line break
1073    /// assert!('\n'.is_whitespace());
1074    ///
1075    /// // a non-breaking space
1076    /// assert!('\u{A0}'.is_whitespace());
1077    ///
1078    /// assert!(!'越'.is_whitespace());
1079    /// ```
1080    #[must_use]
1081    #[stable(feature = "rust1", since = "1.0.0")]
1082    #[rustc_const_stable(feature = "const_char_classify", since = "1.87.0")]
1083    #[inline]
1084    pub const fn is_whitespace(self) -> bool {
1085        match self {
1086            ' ' | '\x09'..='\x0d' => true,
1087            '\0'..='\u{84}' => false,
1088            _ => unicode::White_Space(self),
1089        }
1090    }
1091
1092    /// Returns `true` if this `char` has the general category for control codes.
1093    ///
1094    /// Control codes (code points with the general category of `Cc`) are [described] in Chapter 23
1095    /// (Special Areas and Format Characters) of the Unicode Standard, and [specified] in the Unicode Character
1096    /// Database [`UnicodeData.txt`]. The full set of Unicode control codes is
1097    /// `'\0'..='\x1f' | '\x7f'..='\u{9f}'`, and will never change.
1098    ///
1099    /// [described]: https://www.unicode.org/versions/latest/core-spec/chapter-23/#G20365
1100    /// [specified]: https://www.unicode.org/reports/tr44/#GC_Values_Table
1101    /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
1102    ///
1103    /// # Examples
1104    ///
1105    /// Basic usage:
1106    ///
1107    /// ```
1108    /// assert!('\t'.is_control());
1109    /// assert!('\n'.is_control());
1110    /// assert!('\u{9C}'.is_control()); // STRING TERMINATOR
1111    /// assert!(!'q'.is_control());
1112    /// ```
1113    #[must_use]
1114    #[stable(feature = "rust1", since = "1.0.0")]
1115    #[rustc_const_stable(feature = "const_is_control", since = "1.97.0")]
1116    #[inline]
1117    pub const fn is_control(self) -> bool {
1118        // According to
1119        // https://www.unicode.org/policies/stability_policy.html#Property_Value,
1120        // the set of codepoints in `Cc` will never change.
1121        // So we can just hard-code the patterns to match against instead of using a table.
1122        matches!(self, '\0'..='\x1f' | '\x7f'..='\u{9f}')
1123    }
1124
1125    /// Returns `true` if this `char` has the general category for [private-use characters].
1126    /// These characters do not have an interpretation specified by Unicode; individual programs
1127    /// and users are free to assign them whatever meaning they like.
1128    ///
1129    /// [private-use characters]: https://www.unicode.org/faq/private_use#private_use
1130    ///
1131    /// Private-use characters (code points with the general category of `Co`) are [described] in Chapter 23
1132    /// (Special Areas and Format Characters) of the Unicode Standard, and [specified] in the
1133    /// Unicode Character Database [`UnicodeData.txt`]. The full set of private-use characters is
1134    /// `'\u{E000}'..='\u{F8FF}' | '\u{F0000}'..='\u{FFFFD}' | '\u{100000}'..='\u{10FFFD}'`,
1135    /// and will never change.
1136    ///
1137    /// [described]: https://www.unicode.org/versions/latest/core-spec/chapter-23/#G19184
1138    /// [specified]: https://www.unicode.org/reports/tr44/#GC_Values_Table
1139    /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
1140    ///
1141    #[must_use]
1142    #[inline]
1143    const fn is_private_use(self) -> bool {
1144        // According to
1145        // https://www.unicode.org/policies/stability_policy.html#Property_Value,
1146        // the set of codepoints in `Co` will never change.
1147        // So we can just hard-code the patterns to match against instead of using a table.
1148        matches!(self, '\u{E000}'..='\u{F8FF}' | '\u{F0000}'..='\u{FFFFD}' | '\u{100000}'..='\u{10FFFD}')
1149    }
1150
1151    /// Returns `true` if this `char` has the general category for format control characters.
1152    ///
1153    /// Format controls (code points with the general category of `Cf`) are [described] in Chapter 4
1154    /// (Character Properties) of the Unicode Standard, and [specified] in the Unicode Character
1155    /// Database [`UnicodeData.txt`].
1156    ///
1157    /// [described]: https://www.unicode.org/versions/latest/core-spec/chapter-4/#G134153
1158    /// [specified]: https://www.unicode.org/reports/tr44/#GC_Values_Table
1159    /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
1160    ///
1161    /// # Examples
1162    ///
1163    /// Basic usage:
1164    ///
1165    /// ```ignore(private)
1166    /// assert!('\u{AD}'.is_format_control()); // SOFT HYPHEN
1167    /// assert!('\u{200B}'.is_format_control()); // ZERO WIDTH SPACE
1168    /// assert!('\u{E0041}'.is_format_control()); // TAG LATIN CAPITAL LETTER A
1169    /// assert!('۝'.is_format_control()); // ARABIC END OF AYAH
1170    /// assert!('𓐲'.is_format_control()); // EGYPTIAN HIEROGLYPH INSERT AT TOP START
1171    /// assert!(!'q'.is_format_control());
1172    /// ```
1173    #[must_use]
1174    #[inline]
1175    fn is_format_control(self) -> bool {
1176        self > '\u{AC}' && unicode::Cf(self)
1177    }
1178
1179    /// Returns `true` if this `char` has not yet been assigned a meaning by Unicode, as of
1180    /// [`UNICODE_VERSION`].
1181    ///
1182    /// [`UNICODE_VERSION`]: Self::UNICODE_VERSION
1183    ///
1184    /// These characters may have a meaning assigned in the future,
1185    /// except for the 66 [noncharacters] which will never be assigned a meaning.
1186    ///
1187    /// [noncharacters]: https://www.unicode.org/faq/private_use#noncharacters
1188    ///
1189    /// Many of Unicode's [stability policies] apply only to assigned characters.
1190    ///
1191    /// [stability policies]: https://www.unicode.org/policies/stability_policy.html
1192    ///
1193    /// Unassigned characters (code points with the general category of `Cn`) are [described] in Chapter 4
1194    /// (Character Properties) of the Unicode Standard, and [specified] in the Unicode Character Database
1195    /// by their exclusion from [`UnicodeData.txt`].
1196    ///
1197    /// [described]: https://www.unicode.org/versions/latest/core-spec/chapter-4/#G134153
1198    /// [specified]: https://www.unicode.org/reports/tr44/#GC_Values_Table
1199    /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
1200    ///
1201    /// # Examples
1202    ///
1203    /// Basic usage:
1204    ///
1205    /// ```ignore(private)
1206    /// assert!('\u{FFFE}'.is_unassigned()); // noncharacter, will never be assigned
1207    ///
1208    /// //assert!('\u{7AAAA}'.is_unassigned()); // not currently assigned, but may be in the future,
1209    ///                                         // so we shouldn't rely on the current status
1210    ///
1211    /// assert!(!'γ'.is_unassigned()); // once a character is assigned, it stays assigned forever
1212    /// ```
1213    #[must_use]
1214    #[inline]
1215    fn is_unassigned(self) -> bool {
1216        match self {
1217            '\0'..='\u{377}' => false,
1218            '\u{378}'..='\u{3FFFD}' => unicode::Cn_planes_0_3(self),
1219            // Assigned character ranges in planes 4 and above.
1220            // `src/tools/unicode-table-generator/src/main.rs` asserts that this is correct
1221            '\u{E0001}'
1222            | '\u{E0020}'..='\u{E007F}'
1223            | '\u{E0100}'..='\u{E01EF}'
1224            | '\u{F0000}'..='\u{FFFFD}'
1225            | '\u{100000}'..='\u{10FFFD}' => false,
1226            _ => true,
1227        }
1228    }
1229
1230    /// Returns `true` if this `char` has the `Default_Ignorable_Code_Point` property.
1231    /// These characters [should be displayed as invisible in fallback rendering](https://www.unicode.org/faq/unsup_char#3).
1232    ///
1233    /// `Default_Ignorable_Code_Point` is [described] in Chapter 5 (Implementation Guidelines) of the Unicode Standard,
1234    /// and [specified] in the Unicode Character Database [`DerivedCoreProperties.txt`].
1235    ///
1236    /// [described]: https://www.unicode.org/versions/latest/core-spec/chapter-5/#G40120
1237    /// [specified]: https://www.unicode.org/reports/tr44/#Default_Ignorable_Code_Point
1238    /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
1239    ///
1240    /// # Examples
1241    ///
1242    /// Basic usage:
1243    ///
1244    /// ```ignore(private)
1245    /// assert!('\u{AD}'.is_default_ignorable()); // SOFT HYPHEN
1246    /// assert!('\u{115F}'.is_default_ignorable()); // HANGUL CHOSEONG FILLER
1247    /// assert!('\u{200B}'.is_default_ignorable()); // ZERO WIDTH SPACE
1248    /// assert!('\u{E0041}'.is_default_ignorable()); // TAG LATIN CAPITAL LETTER A
1249    /// assert!(!'۝'.is_default_ignorable()); // ARABIC END OF AYAH
1250    /// assert!(!'𓐲'.is_default_ignorable()); // EGYPTIAN HIEROGLYPH INSERT AT TOP START
1251    /// assert!(!' '.is_default_ignorable());
1252    /// assert!(!'\n'.is_default_ignorable());
1253    /// assert!(!'\0'.is_default_ignorable());
1254    /// assert!(!'q'.is_default_ignorable());
1255    #[must_use]
1256    #[inline]
1257    fn is_default_ignorable(self) -> bool {
1258        self > '\u{AC}' && unicode::Default_Ignorable_Code_Point(self)
1259    }
1260
1261    /// Returns `true` if this `char` has the `Grapheme_Extend` property.
1262    ///
1263    /// `Grapheme_Extend` is [described] in Chapter 3 (Conformance) of the Unicode Standard,
1264    /// and [specified] in the Unicode Character Database [`DerivedCoreProperties.txt`].
1265    ///
1266    /// [described]: https://www.unicode.org/versions/latest/core-spec/chapter-3/#G41165
1267    /// [specified]: https://www.unicode.org/reports/tr44/#Grapheme_Extend
1268    /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
1269    #[must_use]
1270    #[inline]
1271    fn is_grapheme_extender(self) -> bool {
1272        self > '\u{02FF}' && unicode::Grapheme_Extend(self)
1273    }
1274
1275    /// Returns `true` if this `char` has the `Case_Ignorable` property. This narrow-use property
1276    /// is used to implement context-dependent casing for the Greek letter sigma (uppercase 'Σ'),
1277    /// which has two lowercase forms.
1278    ///
1279    /// `Case_Ignorable` is [described] in Chapter 3 (Conformance) of the Unicode Core Specification,
1280    /// and [specified] in the Unicode Character Database [`DerivedCoreProperties.txt`].
1281    /// See those resources, as well as [`to_lowercase()`]'s documentation, for more information.
1282    ///
1283    /// [described]: https://www.unicode.org/versions/latest/core-spec/chapter-3/#G63116
1284    /// [specified]: https://www.unicode.org/reports/tr44/#Case_Ignorable
1285    /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
1286    /// [`to_lowercase()`]: Self::to_lowercase()
1287    #[must_use]
1288    #[inline]
1289    #[unstable(feature = "case_ignorable", issue = "154848")]
1290    pub fn is_case_ignorable(self) -> bool {
1291        if self.is_ascii() {
1292            matches!(self, '\'' | '.' | ':' | '^' | '`')
1293        } else {
1294            unicode::Case_Ignorable(self)
1295        }
1296    }
1297
1298    /// Returns an iterator that yields the lowercase mapping of this `char` as one or more
1299    /// `char`s.
1300    ///
1301    /// If this `char` does not have a lowercase mapping, the iterator yields the same `char`.
1302    ///
1303    /// If this `char` has a one-to-one lowercase mapping given by the [Unicode Character
1304    /// Database][ucd] [`UnicodeData.txt`], the iterator yields that `char`.
1305    ///
1306    /// [ucd]: https://www.unicode.org/reports/tr44/
1307    /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
1308    ///
1309    /// If this `char` expands to multiple `char`s, the iterator yields the `char`s given by
1310    /// [`SpecialCasing.txt`]. The maximum number of `char`s in a case mapping is 3.
1311    ///
1312    /// This operation performs an unconditional mapping without tailoring. That is, the conversion
1313    /// is independent of context and language. See [below](#notes-on-context-and-locale)
1314    /// for more information.
1315    ///
1316    /// In the [Unicode Standard], Chapter 4 (Character Properties) discusses case mapping in
1317    /// general and Chapter 3 (Conformance) discusses the default algorithm for case conversion.
1318    ///
1319    /// [Unicode Standard]: https://www.unicode.org/versions/latest/
1320    ///
1321    /// # Examples
1322    ///
1323    /// As an iterator:
1324    ///
1325    /// ```
1326    /// for c in 'İ'.to_lowercase() {
1327    ///     print!("{c}");
1328    /// }
1329    /// println!();
1330    /// ```
1331    ///
1332    /// Using `println!` directly:
1333    ///
1334    /// ```
1335    /// println!("{}", 'İ'.to_lowercase());
1336    /// ```
1337    ///
1338    /// Both are equivalent to:
1339    ///
1340    /// ```
1341    /// println!("i\u{307}");
1342    /// ```
1343    ///
1344    /// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string):
1345    ///
1346    /// ```
1347    /// assert_eq!('C'.to_lowercase().to_string(), "c");
1348    ///
1349    /// // Sometimes the result is more than one character:
1350    /// assert_eq!('İ'.to_lowercase().to_string(), "i\u{307}");
1351    ///
1352    /// // Characters that do not have both uppercase and lowercase
1353    /// // convert into themselves.
1354    /// assert_eq!('山'.to_lowercase().to_string(), "山");
1355    /// ```
1356    /// # Notes on context and locale
1357    ///
1358    /// As stated earlier, this method does not take into account language or context.
1359    /// Below is a non-exhaustive list of situations where this can be relevant.
1360    /// If you need to handle locale-depedendent casing in your code, consider using
1361    /// an external crate, like [`icu_casemap`](https://crates.io/crates/icu_casemap)
1362    /// which is developed by Unicode.
1363    ///
1364    /// ## Greek sigma
1365    ///
1366    /// In Greek, the letter simga (uppercase 'Σ') has two lowercase forms:
1367    /// 'σ' which is used in most situations, and 'ς' which appears only
1368    /// at the end of a word. [`char::to_lowercase()`] always uses the first form:
1369    ///
1370    /// ```
1371    /// assert_eq!('Σ'.to_lowercase().to_string(), "σ");
1372    /// ```
1373    ///
1374    /// `str::to_lowercase()` (only available with the `alloc` crate)
1375    /// *does* properly handle this contextual mapping,
1376    /// so prefer using that method if you can. Alternatively, you can use
1377    /// [`is_cased()`] and [`is_case_ignorable()`] to implement it yourself.
1378    /// See `Final_Sigma` in [Table 3.17] of the Unicode Standard,
1379    /// along with [`SpecialCasing.txt`], for more details.
1380    ///
1381    /// [`is_cased()`]: Self::is_cased()
1382    /// [`is_case_ignorable()`]: Self::is_case_ignorable()
1383    /// [Table 3.17]: https://www.unicode.org/versions/latest/core-spec/chapter-3/#G54277
1384    ///
1385    /// ## Turkish and Azeri I/ı/İ/i
1386    ///
1387    /// In Turkish and Azeri, the equivalent of 'i' in Latin has five forms instead of two:
1388    ///
1389    /// * 'Dotless': I / ı, sometimes written ï
1390    /// * 'Dotted': İ / i
1391    ///
1392    /// Note that the uppercase undotted 'I' is the same codepoint as the Latin. Therefore:
1393    ///
1394    /// ```
1395    /// let lower_i = 'I'.to_lowercase().to_string();
1396    /// ```
1397    ///
1398    /// `'I'`'s correct lowercase relies on the language of the text: if we're
1399    /// in `en-US`, it should be `"i"`, but if we're in `tr-TR` or `az-AZ`, it should
1400    /// be `"ı"`. `to_lowercase()` does not take this into account, and so:
1401    ///
1402    /// ```
1403    /// let lower_i = 'I'.to_lowercase().to_string();
1404    ///
1405    /// assert_eq!(lower_i, "i");
1406    /// ```
1407    ///
1408    /// holds across languages.
1409    ///
1410    /// [`SpecialCasing.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/SpecialCasing.txt
1411    #[must_use = "this returns the lowercased character as a new iterator, \
1412                  without modifying the original"]
1413    #[stable(feature = "rust1", since = "1.0.0")]
1414    #[inline]
1415    pub fn to_lowercase(self) -> ToLowercase {
1416        ToLowercase(CaseMappingIter::new(conversions::to_lower(self)))
1417    }
1418
1419    /// Returns an iterator that yields the titlecase mapping of this `char` as one or more
1420    /// `char`s.
1421    ///
1422    /// This is usually, but not always, equivalent to the uppercase mapping
1423    /// returned by [`to_uppercase()`]. Prefer this method when seeking to capitalize
1424    /// Only The First Letter of a word, but use [`to_uppercase()`] for ALL CAPS.
1425    /// See [below](#difference-from-uppercase) for a thorough explanation
1426    /// of the difference between the two methods.
1427    ///
1428    /// If this `char` does not have a titlecase mapping, the iterator yields the same `char`.
1429    ///
1430    /// If this `char` has a one-to-one titlecase mapping given by the [Unicode Character
1431    /// Database][ucd] [`UnicodeData.txt`], the iterator yields that `char`.
1432    ///
1433    /// [ucd]: https://www.unicode.org/reports/tr44/
1434    /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
1435    ///
1436    /// If this `char` expands to multiple `char`s, the iterator yields the `char`s given by
1437    /// [`SpecialCasing.txt`]. The maximum number of `char`s in a case mapping is 3.
1438    ///
1439    /// [`SpecialCasing.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/SpecialCasing.txt
1440    ///
1441    /// This operation performs an unconditional mapping without tailoring. That is, the conversion
1442    /// is independent of context and language. See [below](#note-on-locale)
1443    /// for more information.
1444    ///
1445    /// In the [Unicode Standard], Chapter 4 (Character Properties) discusses case mapping in
1446    /// general and Chapter 3 (Conformance) discusses the default algorithm for case conversion.
1447    ///
1448    /// [Unicode Standard]: https://www.unicode.org/versions/latest/
1449    ///
1450    /// # Examples
1451    ///
1452    /// As an iterator:
1453    ///
1454    /// ```
1455    /// #![feature(titlecase)]
1456    /// for c in 'ß'.to_titlecase() {
1457    ///     print!("{c}");
1458    /// }
1459    /// println!();
1460    /// ```
1461    ///
1462    /// Using `println!` directly:
1463    ///
1464    /// ```
1465    /// #![feature(titlecase)]
1466    /// println!("{}", 'ß'.to_titlecase());
1467    /// ```
1468    ///
1469    /// Both are equivalent to:
1470    ///
1471    /// ```
1472    /// println!("Ss");
1473    /// ```
1474    ///
1475    /// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string):
1476    ///
1477    /// ```
1478    /// #![feature(titlecase)]
1479    /// assert_eq!('c'.to_titlecase().to_string(), "C");
1480    /// assert_eq!('ა'.to_titlecase().to_string(), "ა");
1481    /// assert_eq!('dž'.to_titlecase().to_string(), "Dž");
1482    /// assert_eq!('ᾨ'.to_titlecase().to_string(), "ᾨ");
1483    ///
1484    /// // Sometimes the result is more than one character:
1485    /// assert_eq!('ß'.to_titlecase().to_string(), "Ss");
1486    ///
1487    /// // Characters that do not have separate cased forms
1488    /// // convert into themselves.
1489    /// assert_eq!('山'.to_titlecase().to_string(), "山");
1490    /// ```
1491    ///
1492    /// # Difference from uppercase
1493    ///
1494    /// Currently, there are three classes of characters where [`to_uppercase()`]
1495    /// and `to_titlecase()` give different results:
1496    ///
1497    /// ## Georgian script
1498    ///
1499    /// Each letter in the modern Georgian alphabet can be written in one of two forms:
1500    /// the typical lowercase-like "mkhedruli" form, and a variant uppercase-like "mtavruli"
1501    /// form. However, unlike uppercase in most cased scripts, mtavruli is not typically used
1502    /// to start sentences, denote proper nouns, or for any other purpose
1503    /// in running text. It is instead confined to titles and headings, which are written entirely
1504    /// in mtavruli. For this reason, [`to_uppercase()`] applied to a Georgian letter
1505    /// will return the mtavruli form, but `to_titlecase()` will return the mkhedruli form.
1506    ///
1507    /// ```
1508    /// #![feature(titlecase)]
1509    /// let ani = 'ა'; // First letter of the Georgian alphabet, in mkhedruli form
1510    ///
1511    /// // Titlecasing mkhedruli maps it to itself...
1512    /// assert_eq!(ani.to_titlecase().to_string(), ani.to_string());
1513    ///
1514    /// // but uppercasing it maps it to mtavruli
1515    /// assert_eq!(ani.to_uppercase().to_string(), "Ა");
1516    /// ```
1517    ///
1518    /// ## Compatibility digraphs for Latin-alphabet Serbo-Croatian
1519    ///
1520    /// The standard Latin alphabet for the Serbo-Croatian language
1521    /// (Bosnian, Croatian, Montenegrin, and Serbian) contains
1522    /// three digraphs: Dž, Lj, and Nj. These are usually represented as
1523    /// two characters. However, for compatibility with older character sets,
1524    /// Unicode includes single-character versions of these digraphs.
1525    /// Each has a uppercase, titlecase, and lowercase version:
1526    ///
1527    /// - `'DŽ'`, `'Dž'`, `'dž'`
1528    /// - `'LJ'`, `'Lj'`, `'lj'`
1529    /// - `'NJ'`, `'Nj'`, `'nj'`
1530    ///
1531    /// Unicode additionally encodes a casing triad for the Dz digraph
1532    /// without the caron: `'DZ'`, `'Dz'`, `'dz'`.
1533    ///
1534    /// ## Iota-subscritped Greek vowels
1535    ///
1536    /// In ancient Greek, the long vowels alpha (α), eta (η), and omega (ω)
1537    /// were sometimes followed by an iota (ι), forming a diphthong. Over time,
1538    /// the diphthong pronunciation was slowly lost, with the iota becoming mute.
1539    /// Eventually, the ι disappeared from the spelling as well.
1540    /// However, there remains a need to represent ancient texts faithfully.
1541    ///
1542    /// Modern editions of ancient Greek texts commonly use a reduced-sized
1543    /// ι symbol to denote mute iotas, while distinguishing them from ιs
1544    /// which continued to affect pronunciation. The exact standard differs
1545    /// between different publications. Some render the mute ι below its associated
1546    /// vowel (subscript), while others place it to the right of said vowel (adscript).
1547    /// The interaction of mute ι symbols with casing also varies.
1548    ///
1549    /// The Unicode Standard, for its default casing rules, chose to make lowercase
1550    /// Greek vowels with iota subscipt (e.g. `'ᾠ'`) titlecase to the uppercase vowel
1551    /// with iota subscript (`'ᾨ'`) but uppercase to the uppercase vowel followed by
1552    /// full-size uppercase iota (`"ὨΙ"`). This is just one convention among many
1553    /// in common use, but it is the one Unicode settled on,
1554    /// so it is what this method does also.
1555    ///
1556    /// # Note on locale
1557    ///
1558    /// As stated above, this method is locale-insensitive.
1559    /// If you need locale support, consider using an external crate,
1560    /// like [`icu_casemap`](https://crates.io/crates/icu_casemap)
1561    /// which is developed by Unicode. A description of one common
1562    /// locale-dependent casing issue follows (there are others):
1563    ///
1564    /// In Turkish and Azeri, the equivalent of 'i' in Latin has five forms instead of two:
1565    ///
1566    /// * 'Dotless': I / ı, sometimes written ï
1567    /// * 'Dotted': İ / i
1568    ///
1569    /// Note that the lowercase dotted 'i' is the same codepoint as the Latin. Therefore:
1570    ///
1571    /// ```
1572    /// #![feature(titlecase)]
1573    /// let upper_i = 'i'.to_titlecase().to_string();
1574    /// ```
1575    ///
1576    /// `'i'`'s correct titlecase relies on the language of the text: if we're
1577    /// in `en-US`, it should be `"I"`, but if we're in `tr-TR` or `az-AZ`, it should
1578    /// be `"İ"`. `to_titlecase()` does not take this into account, and so:
1579    ///
1580    /// ```
1581    /// #![feature(titlecase)]
1582    /// let upper_i = 'i'.to_titlecase().to_string();
1583    ///
1584    /// assert_eq!(upper_i, "I");
1585    /// ```
1586    ///
1587    /// holds across languages.
1588    ///
1589    /// [`to_uppercase()`]: Self::to_uppercase()
1590    #[must_use = "this returns the titlecased character as a new iterator, \
1591                  without modifying the original"]
1592    #[unstable(feature = "titlecase", issue = "153892")]
1593    #[inline]
1594    pub fn to_titlecase(self) -> ToTitlecase {
1595        ToTitlecase(CaseMappingIter::new(conversions::to_title(self)))
1596    }
1597
1598    /// Returns an iterator that yields the uppercase mapping of this `char` as one or more
1599    /// `char`s.
1600    ///
1601    /// Prefer this method when converting a word into ALL CAPS, but consider [`to_titlecase()`]
1602    /// instead if you seek to capitalize Only The First Letter. See that method's documentation
1603    /// for more information on the difference between the two.
1604    ///
1605    /// If this `char` does not have an uppercase mapping, the iterator yields the same `char`.
1606    ///
1607    /// If this `char` has a one-to-one uppercase mapping given by the [Unicode Character
1608    /// Database][ucd] [`UnicodeData.txt`], the iterator yields that `char`.
1609    ///
1610    /// [ucd]: https://www.unicode.org/reports/tr44/
1611    /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
1612    ///
1613    /// If this `char` expands to multiple `char`s, the iterator yields the `char`s given by
1614    /// [`SpecialCasing.txt`]. The maximum number of `char`s in a case mapping is 3.
1615    ///
1616    /// [`SpecialCasing.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/SpecialCasing.txt
1617    ///
1618    /// This operation performs an unconditional mapping without tailoring. That is, the conversion
1619    /// is independent of context and language. See [below](#note-on-locale)
1620    /// for more information.
1621    ///
1622    /// In the [Unicode Standard], Chapter 4 (Character Properties) discusses case mapping in
1623    /// general and Chapter 3 (Conformance) discusses the default algorithm for case conversion.
1624    ///
1625    /// [Unicode Standard]: https://www.unicode.org/versions/latest/
1626    ///
1627    /// # Examples
1628    ///
1629    /// `'ſt'` (U+FB05) is a single Unicode code point (a ligature) that maps to "ST" in uppercase.
1630    ///
1631    /// As an iterator:
1632    ///
1633    /// ```
1634    /// for c in 'ſt'.to_uppercase() {
1635    ///     print!("{c}");
1636    /// }
1637    /// println!();
1638    /// ```
1639    ///
1640    /// Using `println!` directly:
1641    ///
1642    /// ```
1643    /// println!("{}", 'ſt'.to_uppercase());
1644    /// ```
1645    ///
1646    /// Both are equivalent to:
1647    ///
1648    /// ```
1649    /// println!("ST");
1650    /// ```
1651    ///
1652    /// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string):
1653    ///
1654    /// ```
1655    /// assert_eq!('c'.to_uppercase().to_string(), "C");
1656    /// assert_eq!('ა'.to_uppercase().to_string(), "Ა");
1657    /// assert_eq!('dž'.to_uppercase().to_string(), "DŽ");
1658    ///
1659    /// // Sometimes the result is more than one character:
1660    /// assert_eq!('ſt'.to_uppercase().to_string(), "ST");
1661    /// assert_eq!('ᾨ'.to_uppercase().to_string(), "ὨΙ");
1662    ///
1663    /// // Characters that do not have both uppercase and lowercase
1664    /// // convert into themselves.
1665    /// assert_eq!('山'.to_uppercase().to_string(), "山");
1666    /// ```
1667    ///
1668    /// # Note on locale
1669    ///
1670    /// As stated above, this method is locale-insensitive.
1671    /// If you need locale support, consider using an external crate,
1672    /// like [`icu_casemap`](https://crates.io/crates/icu_casemap)
1673    /// which is developed by Unicode. A description of one common
1674    /// locale-dependent casing issue follows (there are others):
1675    ///
1676    /// In Turkish and Azeri, the equivalent of 'i' in Latin has five forms instead of two:
1677    ///
1678    /// * 'Dotless': I / ı, sometimes written ï
1679    /// * 'Dotted': İ / i
1680    ///
1681    /// Note that the lowercase dotted 'i' is the same codepoint as the Latin. Therefore:
1682    ///
1683    /// ```
1684    /// let upper_i = 'i'.to_uppercase().to_string();
1685    /// ```
1686    ///
1687    /// `'i'`'s correct uppercase relies on the language of the text: if we're
1688    /// in `en-US`, it should be `"I"`, but if we're in `tr-TR` or `az-AZ`, it should
1689    /// be `"İ"`. `to_uppercase()` does not take this into account, and so:
1690    ///
1691    /// ```
1692    /// let upper_i = 'i'.to_uppercase().to_string();
1693    ///
1694    /// assert_eq!(upper_i, "I");
1695    /// ```
1696    ///
1697    /// holds across languages.
1698    ///
1699    /// [`to_titlecase()`]: Self::to_titlecase()
1700    #[must_use = "this returns the uppercased character as a new iterator, \
1701                  without modifying the original"]
1702    #[stable(feature = "rust1", since = "1.0.0")]
1703    #[inline]
1704    pub fn to_uppercase(self) -> ToUppercase {
1705        ToUppercase(CaseMappingIter::new(conversions::to_upper(self)))
1706    }
1707
1708    /// Returns an iterator that yields the case folding of this `char` as one or more
1709    /// `char`s.
1710    ///
1711    /// Case folding is meant to be used when performing case-insensitive string comparisons.
1712    /// Case-folded strings should not usually be exposed directly to users. For most,
1713    /// but not all, characters, the casefold mapping is identical to the lowercase one.
1714    ///
1715    /// This iterator yields the `char`(s) in the common or full case folding for this `char`,
1716    /// as given by the [Unicode Character Database][ucd] [`CaseFolding.txt`].
1717    /// The maximum number of `char`s in a case folding is 3.
1718    ///
1719    /// [ucd]: https://www.unicode.org/reports/tr44/
1720    /// [`CaseFolding.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/CaseFolding.txt
1721    ///
1722    ///
1723    /// No [normalization] (e.g. NFC) is performed, so visually and semantically identical characters
1724    /// might still casefold differently. For example, `'ά'` (U+03AC GREEK SMALL LETTER ALPHA WITH TONOS)
1725    /// is considered distinct from `'ά'` (U+1F71 GREEK SMALL LETTER ALPHA WITH OXIA),
1726    /// even though Unicode considers them canonically equivalent.
1727    ///
1728    /// In addition, this method is independent of language/locale,
1729    /// so the special behavior of I/ı/İ/i in Turkish and Azeri is not handled.
1730    ///
1731    /// In the [Unicode Standard], Chapter 4 (Character Properties) discusses case folding in
1732    /// general and Chapter 3 (Conformance) discusses the default algorithm for case folding.
1733    ///
1734    /// [Unicode Standard]: https://www.unicode.org/versions/latest/
1735    ///
1736    /// # Examples
1737    ///
1738    /// The German sharp S `'ß'` (U+DF) is a single Unicode code point
1739    /// that casefolds to `"ss"`. Its uppercase variant '`ẞ`' (U+1E9E)
1740    /// has the same case-folding.
1741    ///
1742    /// As an iterator:
1743    ///
1744    /// ```
1745    /// #![feature(casefold)]
1746    /// assert!('ß'.to_casefold_unnormalized().eq(['s', 's']));
1747    /// assert!('ẞ'.to_casefold_unnormalized().eq(['s', 's']));
1748    /// ```
1749    ///
1750    /// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string):
1751    ///
1752    /// ```
1753    /// #![feature(casefold)]
1754    /// assert_eq!('ß'.to_casefold_unnormalized().to_string(), "ss");
1755    /// assert_eq!('ẞ'.to_casefold_unnormalized().to_string(), "ss");
1756    /// ```
1757    ///
1758    /// No [normalization] is performed:
1759    ///
1760    /// ```rust
1761    /// #![feature(casefold)]
1762    /// // These two characters are visually and semantically identical;
1763    /// // Unicode considers them to be canonically equivalent.
1764    /// let alpha_tonos = 'ά';
1765    /// let alpha_oxia = 'ά';
1766    ///
1767    /// // However, they are different codepoints:
1768    /// assert_eq!(alpha_tonos, '\u{03AC}');
1769    /// assert_eq!(alpha_oxia, '\u{1F71}');
1770    ///
1771    /// // Their case-foldings are likewise unequal:
1772    /// assert!(alpha_tonos.to_casefold_unnormalized().eq(['\u{03AC}']));
1773    /// assert!(alpha_oxia.to_casefold_unnormalized().eq(['\u{1F71}']));
1774    /// ```
1775    ///
1776    /// # Note on locale
1777    ///
1778    /// In Turkish and Azeri, the equivalent of 'i' in Latin has five forms instead of two:
1779    ///
1780    /// * 'Dotless': I / ı, sometimes written ï
1781    /// * 'Dotted': İ / i
1782    ///
1783    /// Note that the uppercase undotted 'I' is the same codepoint as the Latin. Therefore:
1784    ///
1785    /// ```
1786    /// #![feature(casefold)]
1787    /// let casefold_i = 'I'.to_casefold_unnormalized().to_string();
1788    /// ```
1789    ///
1790    /// `'I'`'s correct case folding relies on the language of the text: if we're
1791    /// in `en-US`, it should be `"i"`, but if we're in `tr-TR` or `az-AZ`, it should
1792    /// be `"ı"`. `to_casefold_unnormalized()` does not take this into account, and so:
1793    ///
1794    /// ```
1795    /// #![feature(casefold)]
1796    /// let casefold_i = 'I'.to_casefold_unnormalized().to_string();
1797    ///
1798    /// assert_eq!(casefold_i, "i");
1799    /// ```
1800    ///
1801    /// holds across languages.
1802    ///
1803    /// [normalization]: https://www.unicode.org/faq/normalization.html
1804    #[must_use = "this returns the case-folded character as a new iterator, \
1805                  without modifying the original"]
1806    #[unstable(feature = "casefold", issue = "154742")]
1807    #[inline]
1808    pub fn to_casefold_unnormalized(self) -> ToCasefold {
1809        ToCasefold(CaseMappingIter::new(conversions::to_casefold(self)))
1810    }
1811
1812    /// Checks if the value is within the ASCII range.
1813    ///
1814    /// # Examples
1815    ///
1816    /// ```
1817    /// let ascii = 'a';
1818    /// let non_ascii = '❤';
1819    ///
1820    /// assert!(ascii.is_ascii());
1821    /// assert!(!non_ascii.is_ascii());
1822    /// ```
1823    #[must_use]
1824    #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
1825    #[rustc_const_stable(feature = "const_char_is_ascii", since = "1.32.0")]
1826    #[rustc_diagnostic_item = "char_is_ascii"]
1827    #[inline]
1828    pub const fn is_ascii(&self) -> bool {
1829        *self as u32 <= 0x7F
1830    }
1831
1832    /// Returns `Some` if the value is within the ASCII range,
1833    /// or `None` if it's not.
1834    ///
1835    /// This is preferred to [`Self::is_ascii`] when you're passing the value
1836    /// along to something else that can take [`ascii::Char`] rather than
1837    /// needing to check again for itself whether the value is in ASCII.
1838    #[must_use]
1839    #[unstable(feature = "ascii_char", issue = "110998")]
1840    #[inline]
1841    pub const fn as_ascii(&self) -> Option<ascii::Char> {
1842        if self.is_ascii() {
1843            // SAFETY: Just checked that this is ASCII.
1844            Some(unsafe { ascii::Char::from_u8_unchecked(*self as u8) })
1845        } else {
1846            None
1847        }
1848    }
1849
1850    /// Converts this char into an [ASCII character](`ascii::Char`), without
1851    /// checking whether it is valid.
1852    ///
1853    /// # Safety
1854    ///
1855    /// This char must be within the ASCII range, or else this is UB.
1856    #[must_use]
1857    #[unstable(feature = "ascii_char", issue = "110998")]
1858    #[inline]
1859    pub const unsafe fn as_ascii_unchecked(&self) -> ascii::Char {
1860        assert_unsafe_precondition!(
1861            check_library_ub,
1862            "as_ascii_unchecked requires that the char is valid ASCII",
1863            (it: &char = self) => it.is_ascii()
1864        );
1865
1866        // SAFETY: the caller promised that this char is ASCII.
1867        unsafe { ascii::Char::from_u8_unchecked(*self as u8) }
1868    }
1869
1870    /// Makes a copy of the value in its ASCII upper case equivalent.
1871    ///
1872    /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
1873    /// but non-ASCII letters are unchanged.
1874    ///
1875    /// To uppercase the value in-place, use [`make_ascii_uppercase()`].
1876    ///
1877    /// To uppercase ASCII characters in addition to non-ASCII characters, use
1878    /// [`to_uppercase()`].
1879    ///
1880    /// # Examples
1881    ///
1882    /// ```
1883    /// let ascii = 'a';
1884    /// let non_ascii = '❤';
1885    ///
1886    /// assert_eq!('A', ascii.to_ascii_uppercase());
1887    /// assert_eq!('❤', non_ascii.to_ascii_uppercase());
1888    /// ```
1889    ///
1890    /// [`make_ascii_uppercase()`]: #method.make_ascii_uppercase
1891    /// [`to_uppercase()`]: #method.to_uppercase
1892    #[must_use = "to uppercase the value in-place, use `make_ascii_uppercase()`"]
1893    #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
1894    #[rustc_const_stable(feature = "const_ascii_methods_on_intrinsics", since = "1.52.0")]
1895    #[inline]
1896    pub const fn to_ascii_uppercase(&self) -> char {
1897        if self.is_ascii_lowercase() {
1898            (*self as u8).ascii_change_case_unchecked() as char
1899        } else {
1900            *self
1901        }
1902    }
1903
1904    /// Makes a copy of the value in its ASCII lower case equivalent.
1905    ///
1906    /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
1907    /// but non-ASCII letters are unchanged.
1908    ///
1909    /// To lowercase the value in-place, use [`make_ascii_lowercase()`].
1910    ///
1911    /// To lowercase ASCII characters in addition to non-ASCII characters, use
1912    /// [`to_lowercase()`].
1913    ///
1914    /// # Examples
1915    ///
1916    /// ```
1917    /// let ascii = 'A';
1918    /// let non_ascii = '❤';
1919    ///
1920    /// assert_eq!('a', ascii.to_ascii_lowercase());
1921    /// assert_eq!('❤', non_ascii.to_ascii_lowercase());
1922    /// ```
1923    ///
1924    /// [`make_ascii_lowercase()`]: #method.make_ascii_lowercase
1925    /// [`to_lowercase()`]: #method.to_lowercase
1926    #[must_use = "to lowercase the value in-place, use `make_ascii_lowercase()`"]
1927    #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
1928    #[rustc_const_stable(feature = "const_ascii_methods_on_intrinsics", since = "1.52.0")]
1929    #[inline]
1930    pub const fn to_ascii_lowercase(&self) -> char {
1931        if self.is_ascii_uppercase() {
1932            (*self as u8).ascii_change_case_unchecked() as char
1933        } else {
1934            *self
1935        }
1936    }
1937
1938    /// Checks that two values are an ASCII case-insensitive match.
1939    ///
1940    /// Equivalent to <code>[to_ascii_lowercase]\(a) == [to_ascii_lowercase]\(b)</code>.
1941    ///
1942    /// # Examples
1943    ///
1944    /// ```
1945    /// let upper_a = 'A';
1946    /// let lower_a = 'a';
1947    /// let lower_z = 'z';
1948    ///
1949    /// assert!(upper_a.eq_ignore_ascii_case(&lower_a));
1950    /// assert!(upper_a.eq_ignore_ascii_case(&upper_a));
1951    /// assert!(!upper_a.eq_ignore_ascii_case(&lower_z));
1952    /// ```
1953    ///
1954    /// [to_ascii_lowercase]: #method.to_ascii_lowercase
1955    #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
1956    #[rustc_const_stable(feature = "const_ascii_methods_on_intrinsics", since = "1.52.0")]
1957    #[inline]
1958    pub const fn eq_ignore_ascii_case(&self, other: &char) -> bool {
1959        self.to_ascii_lowercase() == other.to_ascii_lowercase()
1960    }
1961
1962    /// Converts this type to its ASCII upper case equivalent in-place.
1963    ///
1964    /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
1965    /// but non-ASCII letters are unchanged.
1966    ///
1967    /// To return a new uppercased value without modifying the existing one, use
1968    /// [`to_ascii_uppercase()`].
1969    ///
1970    /// # Examples
1971    ///
1972    /// ```
1973    /// let mut ascii = 'a';
1974    ///
1975    /// ascii.make_ascii_uppercase();
1976    ///
1977    /// assert_eq!('A', ascii);
1978    /// ```
1979    ///
1980    /// [`to_ascii_uppercase()`]: #method.to_ascii_uppercase
1981    #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
1982    #[rustc_const_stable(feature = "const_make_ascii", since = "1.84.0")]
1983    #[inline]
1984    pub const fn make_ascii_uppercase(&mut self) {
1985        *self = self.to_ascii_uppercase();
1986    }
1987
1988    /// Converts this type to its ASCII lower case equivalent in-place.
1989    ///
1990    /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
1991    /// but non-ASCII letters are unchanged.
1992    ///
1993    /// To return a new lowercased value without modifying the existing one, use
1994    /// [`to_ascii_lowercase()`].
1995    ///
1996    /// # Examples
1997    ///
1998    /// ```
1999    /// let mut ascii = 'A';
2000    ///
2001    /// ascii.make_ascii_lowercase();
2002    ///
2003    /// assert_eq!('a', ascii);
2004    /// ```
2005    ///
2006    /// [`to_ascii_lowercase()`]: #method.to_ascii_lowercase
2007    #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
2008    #[rustc_const_stable(feature = "const_make_ascii", since = "1.84.0")]
2009    #[inline]
2010    pub const fn make_ascii_lowercase(&mut self) {
2011        *self = self.to_ascii_lowercase();
2012    }
2013
2014    /// Checks if the value is an ASCII alphabetic character:
2015    ///
2016    /// - U+0041 'A' ..= U+005A 'Z', or
2017    /// - U+0061 'a' ..= U+007A 'z'.
2018    ///
2019    /// # Examples
2020    ///
2021    /// ```
2022    /// let uppercase_a = 'A';
2023    /// let uppercase_g = 'G';
2024    /// let a = 'a';
2025    /// let g = 'g';
2026    /// let zero = '0';
2027    /// let percent = '%';
2028    /// let space = ' ';
2029    /// let lf = '\n';
2030    /// let esc = '\x1b';
2031    ///
2032    /// assert!(uppercase_a.is_ascii_alphabetic());
2033    /// assert!(uppercase_g.is_ascii_alphabetic());
2034    /// assert!(a.is_ascii_alphabetic());
2035    /// assert!(g.is_ascii_alphabetic());
2036    /// assert!(!zero.is_ascii_alphabetic());
2037    /// assert!(!percent.is_ascii_alphabetic());
2038    /// assert!(!space.is_ascii_alphabetic());
2039    /// assert!(!lf.is_ascii_alphabetic());
2040    /// assert!(!esc.is_ascii_alphabetic());
2041    /// ```
2042    #[must_use]
2043    #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
2044    #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
2045    #[inline]
2046    pub const fn is_ascii_alphabetic(&self) -> bool {
2047        matches!(*self, 'a'..='z' | 'A'..='Z')
2048    }
2049
2050    /// Checks if the value is an ASCII uppercase character:
2051    /// U+0041 'A' ..= U+005A 'Z'.
2052    ///
2053    /// # Examples
2054    ///
2055    /// ```
2056    /// let uppercase_a = 'A';
2057    /// let uppercase_g = 'G';
2058    /// let a = 'a';
2059    /// let g = 'g';
2060    /// let zero = '0';
2061    /// let percent = '%';
2062    /// let space = ' ';
2063    /// let lf = '\n';
2064    /// let esc = '\x1b';
2065    ///
2066    /// assert!(uppercase_a.is_ascii_uppercase());
2067    /// assert!(uppercase_g.is_ascii_uppercase());
2068    /// assert!(!a.is_ascii_uppercase());
2069    /// assert!(!g.is_ascii_uppercase());
2070    /// assert!(!zero.is_ascii_uppercase());
2071    /// assert!(!percent.is_ascii_uppercase());
2072    /// assert!(!space.is_ascii_uppercase());
2073    /// assert!(!lf.is_ascii_uppercase());
2074    /// assert!(!esc.is_ascii_uppercase());
2075    /// ```
2076    #[must_use]
2077    #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
2078    #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
2079    #[inline]
2080    pub const fn is_ascii_uppercase(&self) -> bool {
2081        matches!(*self, 'A'..='Z')
2082    }
2083
2084    /// Checks if the value is an ASCII lowercase character:
2085    /// U+0061 'a' ..= U+007A 'z'.
2086    ///
2087    /// # Examples
2088    ///
2089    /// ```
2090    /// let uppercase_a = 'A';
2091    /// let uppercase_g = 'G';
2092    /// let a = 'a';
2093    /// let g = 'g';
2094    /// let zero = '0';
2095    /// let percent = '%';
2096    /// let space = ' ';
2097    /// let lf = '\n';
2098    /// let esc = '\x1b';
2099    ///
2100    /// assert!(!uppercase_a.is_ascii_lowercase());
2101    /// assert!(!uppercase_g.is_ascii_lowercase());
2102    /// assert!(a.is_ascii_lowercase());
2103    /// assert!(g.is_ascii_lowercase());
2104    /// assert!(!zero.is_ascii_lowercase());
2105    /// assert!(!percent.is_ascii_lowercase());
2106    /// assert!(!space.is_ascii_lowercase());
2107    /// assert!(!lf.is_ascii_lowercase());
2108    /// assert!(!esc.is_ascii_lowercase());
2109    /// ```
2110    #[must_use]
2111    #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
2112    #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
2113    #[inline]
2114    pub const fn is_ascii_lowercase(&self) -> bool {
2115        matches!(*self, 'a'..='z')
2116    }
2117
2118    /// Checks if the value is an ASCII alphanumeric character:
2119    ///
2120    /// - U+0041 'A' ..= U+005A 'Z', or
2121    /// - U+0061 'a' ..= U+007A 'z', or
2122    /// - U+0030 '0' ..= U+0039 '9'.
2123    ///
2124    /// # Examples
2125    ///
2126    /// ```
2127    /// let uppercase_a = 'A';
2128    /// let uppercase_g = 'G';
2129    /// let a = 'a';
2130    /// let g = 'g';
2131    /// let zero = '0';
2132    /// let percent = '%';
2133    /// let space = ' ';
2134    /// let lf = '\n';
2135    /// let esc = '\x1b';
2136    ///
2137    /// assert!(uppercase_a.is_ascii_alphanumeric());
2138    /// assert!(uppercase_g.is_ascii_alphanumeric());
2139    /// assert!(a.is_ascii_alphanumeric());
2140    /// assert!(g.is_ascii_alphanumeric());
2141    /// assert!(zero.is_ascii_alphanumeric());
2142    /// assert!(!percent.is_ascii_alphanumeric());
2143    /// assert!(!space.is_ascii_alphanumeric());
2144    /// assert!(!lf.is_ascii_alphanumeric());
2145    /// assert!(!esc.is_ascii_alphanumeric());
2146    /// ```
2147    #[must_use]
2148    #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
2149    #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
2150    #[inline]
2151    pub const fn is_ascii_alphanumeric(&self) -> bool {
2152        matches!(*self, '0'..='9') | matches!(*self, 'A'..='Z') | matches!(*self, 'a'..='z')
2153    }
2154
2155    /// Checks if the value is an ASCII decimal digit:
2156    /// U+0030 '0' ..= U+0039 '9'.
2157    ///
2158    /// # Examples
2159    ///
2160    /// ```
2161    /// let uppercase_a = 'A';
2162    /// let uppercase_g = 'G';
2163    /// let a = 'a';
2164    /// let g = 'g';
2165    /// let zero = '0';
2166    /// let percent = '%';
2167    /// let space = ' ';
2168    /// let lf = '\n';
2169    /// let esc = '\x1b';
2170    ///
2171    /// assert!(!uppercase_a.is_ascii_digit());
2172    /// assert!(!uppercase_g.is_ascii_digit());
2173    /// assert!(!a.is_ascii_digit());
2174    /// assert!(!g.is_ascii_digit());
2175    /// assert!(zero.is_ascii_digit());
2176    /// assert!(!percent.is_ascii_digit());
2177    /// assert!(!space.is_ascii_digit());
2178    /// assert!(!lf.is_ascii_digit());
2179    /// assert!(!esc.is_ascii_digit());
2180    /// ```
2181    #[must_use]
2182    #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
2183    #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
2184    #[inline]
2185    pub const fn is_ascii_digit(&self) -> bool {
2186        matches!(*self, '0'..='9')
2187    }
2188
2189    /// Checks if the value is an ASCII octal digit:
2190    /// U+0030 '0' ..= U+0037 '7'.
2191    ///
2192    /// # Examples
2193    ///
2194    /// ```
2195    /// #![feature(is_ascii_octdigit)]
2196    ///
2197    /// let uppercase_a = 'A';
2198    /// let a = 'a';
2199    /// let zero = '0';
2200    /// let seven = '7';
2201    /// let nine = '9';
2202    /// let percent = '%';
2203    /// let lf = '\n';
2204    ///
2205    /// assert!(!uppercase_a.is_ascii_octdigit());
2206    /// assert!(!a.is_ascii_octdigit());
2207    /// assert!(zero.is_ascii_octdigit());
2208    /// assert!(seven.is_ascii_octdigit());
2209    /// assert!(!nine.is_ascii_octdigit());
2210    /// assert!(!percent.is_ascii_octdigit());
2211    /// assert!(!lf.is_ascii_octdigit());
2212    /// ```
2213    #[must_use]
2214    #[unstable(feature = "is_ascii_octdigit", issue = "101288")]
2215    #[inline]
2216    pub const fn is_ascii_octdigit(&self) -> bool {
2217        matches!(*self, '0'..='7')
2218    }
2219
2220    /// Checks if the value is an ASCII hexadecimal digit:
2221    ///
2222    /// - U+0030 '0' ..= U+0039 '9', or
2223    /// - U+0041 'A' ..= U+0046 'F', or
2224    /// - U+0061 'a' ..= U+0066 'f'.
2225    ///
2226    /// # Examples
2227    ///
2228    /// ```
2229    /// let uppercase_a = 'A';
2230    /// let uppercase_g = 'G';
2231    /// let a = 'a';
2232    /// let g = 'g';
2233    /// let zero = '0';
2234    /// let percent = '%';
2235    /// let space = ' ';
2236    /// let lf = '\n';
2237    /// let esc = '\x1b';
2238    ///
2239    /// assert!(uppercase_a.is_ascii_hexdigit());
2240    /// assert!(!uppercase_g.is_ascii_hexdigit());
2241    /// assert!(a.is_ascii_hexdigit());
2242    /// assert!(!g.is_ascii_hexdigit());
2243    /// assert!(zero.is_ascii_hexdigit());
2244    /// assert!(!percent.is_ascii_hexdigit());
2245    /// assert!(!space.is_ascii_hexdigit());
2246    /// assert!(!lf.is_ascii_hexdigit());
2247    /// assert!(!esc.is_ascii_hexdigit());
2248    /// ```
2249    #[must_use]
2250    #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
2251    #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
2252    #[inline]
2253    pub const fn is_ascii_hexdigit(&self) -> bool {
2254        matches!(*self, '0'..='9') | matches!(*self, 'A'..='F') | matches!(*self, 'a'..='f')
2255    }
2256
2257    /// Checks if the value is an ASCII punctuation or symbol character
2258    /// (i.e. not alphanumeric, whitespace, or control):
2259    ///
2260    /// - U+0021 ..= U+002F `! " # $ % & ' ( ) * + , - . /`, or
2261    /// - U+003A ..= U+0040 `: ; < = > ? @`, or
2262    /// - U+005B ..= U+0060 ``[ \ ] ^ _ ` ``, or
2263    /// - U+007B ..= U+007E `{ | } ~`
2264    ///
2265    /// # Examples
2266    ///
2267    /// ```
2268    /// let uppercase_a = 'A';
2269    /// let uppercase_g = 'G';
2270    /// let a = 'a';
2271    /// let g = 'g';
2272    /// let zero = '0';
2273    /// let percent = '%';
2274    /// let space = ' ';
2275    /// let lf = '\n';
2276    /// let esc = '\x1b';
2277    ///
2278    /// assert!(!uppercase_a.is_ascii_punctuation());
2279    /// assert!(!uppercase_g.is_ascii_punctuation());
2280    /// assert!(!a.is_ascii_punctuation());
2281    /// assert!(!g.is_ascii_punctuation());
2282    /// assert!(!zero.is_ascii_punctuation());
2283    /// assert!(percent.is_ascii_punctuation());
2284    /// assert!(!space.is_ascii_punctuation());
2285    /// assert!(!lf.is_ascii_punctuation());
2286    /// assert!(!esc.is_ascii_punctuation());
2287    /// ```
2288    #[must_use]
2289    #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
2290    #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
2291    #[inline]
2292    pub const fn is_ascii_punctuation(&self) -> bool {
2293        matches!(*self, '!'..='/')
2294            | matches!(*self, ':'..='@')
2295            | matches!(*self, '['..='`')
2296            | matches!(*self, '{'..='~')
2297    }
2298
2299    /// Checks if the value is an ASCII graphic character
2300    /// (i.e. not whitespace or control):
2301    /// U+0021 '!' ..= U+007E '~'.
2302    ///
2303    /// # Examples
2304    ///
2305    /// ```
2306    /// let uppercase_a = 'A';
2307    /// let uppercase_g = 'G';
2308    /// let a = 'a';
2309    /// let g = 'g';
2310    /// let zero = '0';
2311    /// let percent = '%';
2312    /// let space = ' ';
2313    /// let lf = '\n';
2314    /// let esc = '\x1b';
2315    ///
2316    /// assert!(uppercase_a.is_ascii_graphic());
2317    /// assert!(uppercase_g.is_ascii_graphic());
2318    /// assert!(a.is_ascii_graphic());
2319    /// assert!(g.is_ascii_graphic());
2320    /// assert!(zero.is_ascii_graphic());
2321    /// assert!(percent.is_ascii_graphic());
2322    /// assert!(!space.is_ascii_graphic());
2323    /// assert!(!lf.is_ascii_graphic());
2324    /// assert!(!esc.is_ascii_graphic());
2325    /// ```
2326    #[must_use]
2327    #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
2328    #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
2329    #[inline]
2330    pub const fn is_ascii_graphic(&self) -> bool {
2331        matches!(*self, '!'..='~')
2332    }
2333
2334    /// Checks if the value is an ASCII whitespace character:
2335    /// U+0020 SPACE, U+0009 HORIZONTAL TAB, U+000A LINE FEED,
2336    /// U+000C FORM FEED, or U+000D CARRIAGE RETURN.
2337    ///
2338    /// **Warning:** Because the list above excludes U+000B VERTICAL TAB,
2339    /// `c.is_ascii_whitespace()` is **not** equivalent to `c.is_ascii() && c.is_whitespace()`.
2340    ///
2341    /// Rust uses the WhatWG Infra Standard's [definition of ASCII
2342    /// whitespace][infra-aw]. There are several other definitions in
2343    /// wide use. For instance, [the POSIX locale][pct] includes
2344    /// U+000B VERTICAL TAB as well as all the above characters,
2345    /// but—from the very same specification—[the default rule for
2346    /// "field splitting" in the Bourne shell][bfs] considers *only*
2347    /// SPACE, HORIZONTAL TAB, and LINE FEED as whitespace.
2348    ///
2349    /// If you are writing a program that will process an existing
2350    /// file format, check what that format's definition of whitespace is
2351    /// before using this function.
2352    ///
2353    /// [infra-aw]: https://infra.spec.whatwg.org/#ascii-whitespace
2354    /// [pct]: https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap07.html#tag_07_03_01
2355    /// [bfs]: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_06_05
2356    ///
2357    /// # Examples
2358    ///
2359    /// ```
2360    /// let uppercase_a = 'A';
2361    /// let uppercase_g = 'G';
2362    /// let a = 'a';
2363    /// let g = 'g';
2364    /// let zero = '0';
2365    /// let percent = '%';
2366    /// let space = ' ';
2367    /// let lf = '\n';
2368    /// let esc = '\x1b';
2369    ///
2370    /// assert!(!uppercase_a.is_ascii_whitespace());
2371    /// assert!(!uppercase_g.is_ascii_whitespace());
2372    /// assert!(!a.is_ascii_whitespace());
2373    /// assert!(!g.is_ascii_whitespace());
2374    /// assert!(!zero.is_ascii_whitespace());
2375    /// assert!(!percent.is_ascii_whitespace());
2376    /// assert!(space.is_ascii_whitespace());
2377    /// assert!(lf.is_ascii_whitespace());
2378    /// assert!(!esc.is_ascii_whitespace());
2379    /// ```
2380    #[must_use]
2381    #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
2382    #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
2383    #[inline]
2384    pub const fn is_ascii_whitespace(&self) -> bool {
2385        matches!(*self, '\t' | '\n' | '\x0C' | '\r' | ' ')
2386    }
2387
2388    /// Checks if the value is an ASCII control character:
2389    /// U+0000 NUL ..= U+001F UNIT SEPARATOR, or U+007F DELETE.
2390    /// Note that most ASCII whitespace characters are control
2391    /// characters, but SPACE is not.
2392    ///
2393    /// # Examples
2394    ///
2395    /// ```
2396    /// let uppercase_a = 'A';
2397    /// let uppercase_g = 'G';
2398    /// let a = 'a';
2399    /// let g = 'g';
2400    /// let zero = '0';
2401    /// let percent = '%';
2402    /// let space = ' ';
2403    /// let lf = '\n';
2404    /// let esc = '\x1b';
2405    ///
2406    /// assert!(!uppercase_a.is_ascii_control());
2407    /// assert!(!uppercase_g.is_ascii_control());
2408    /// assert!(!a.is_ascii_control());
2409    /// assert!(!g.is_ascii_control());
2410    /// assert!(!zero.is_ascii_control());
2411    /// assert!(!percent.is_ascii_control());
2412    /// assert!(!space.is_ascii_control());
2413    /// assert!(lf.is_ascii_control());
2414    /// assert!(esc.is_ascii_control());
2415    /// ```
2416    #[must_use]
2417    #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
2418    #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
2419    #[inline]
2420    pub const fn is_ascii_control(&self) -> bool {
2421        matches!(*self, '\0'..='\x1F' | '\x7F')
2422    }
2423}
2424
2425pub(crate) struct EscapeDebugExtArgs {
2426    /// Escape Grapheme Extender codepoints?
2427    pub(crate) escape_grapheme_extender: bool,
2428
2429    /// Escape single quotes?
2430    pub(crate) escape_single_quote: bool,
2431
2432    /// Escape double quotes?
2433    pub(crate) escape_double_quote: bool,
2434}
2435
2436impl EscapeDebugExtArgs {
2437    pub(crate) const ESCAPE_ALL: Self = Self {
2438        escape_grapheme_extender: true,
2439        escape_single_quote: true,
2440        escape_double_quote: true,
2441    };
2442}
2443
2444#[inline]
2445#[must_use]
2446const fn len_utf8(code: u32) -> usize {
2447    match code {
2448        ..MAX_ONE_B => 1,
2449        ..MAX_TWO_B => 2,
2450        ..MAX_THREE_B => 3,
2451        _ => 4,
2452    }
2453}
2454
2455#[inline]
2456#[must_use]
2457const fn len_utf16(code: u32) -> usize {
2458    if (code & 0xFFFF) == code { 1 } else { 2 }
2459}
2460
2461/// Encodes a raw `u32` value as UTF-8 into the provided byte buffer,
2462/// and then returns the subslice of the buffer that contains the encoded character.
2463///
2464/// Unlike `char::encode_utf8`, this method also handles codepoints in the surrogate range.
2465/// (Creating a `char` in the surrogate range is UB.)
2466/// The result is valid [generalized UTF-8] but not valid UTF-8.
2467///
2468/// [generalized UTF-8]: https://simonsapin.github.io/wtf-8/#generalized-utf8
2469///
2470/// # Panics
2471///
2472/// Panics if the buffer is not large enough.
2473/// A buffer of length four is large enough to encode any `char`.
2474#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
2475#[doc(hidden)]
2476#[inline]
2477pub const fn encode_utf8_raw(code: u32, dst: &mut [u8]) -> &mut [u8] {
2478    let len = len_utf8(code);
2479    if dst.len() < len {
2480        const_panic!(
2481            "encode_utf8: buffer does not have enough bytes to encode code point",
2482            "encode_utf8: need {len} bytes to encode U+{code:04X} but buffer has just {dst_len}",
2483            code: u32 = code,
2484            len: usize = len,
2485            dst_len: usize = dst.len(),
2486        );
2487    }
2488
2489    // SAFETY: `dst` is checked to be at least the length needed to encode the codepoint.
2490    unsafe { encode_utf8_raw_unchecked(code, dst.as_mut_ptr()) };
2491
2492    // SAFETY: `<&mut [u8]>::as_mut_ptr` is guaranteed to return a valid pointer and `len` has been tested to be within bounds.
2493    unsafe { slice::from_raw_parts_mut(dst.as_mut_ptr(), len) }
2494}
2495
2496/// Encodes a raw `u32` value as UTF-8 into the byte buffer pointed to by `dst`.
2497///
2498/// Unlike `char::encode_utf8`, this method also handles codepoints in the surrogate range.
2499/// (Creating a `char` in the surrogate range is UB.)
2500/// The result is valid [generalized UTF-8] but not valid UTF-8.
2501///
2502/// [generalized UTF-8]: https://simonsapin.github.io/wtf-8/#generalized-utf8
2503///
2504/// # Safety
2505///
2506/// The behavior is undefined if the buffer pointed to by `dst` is not
2507/// large enough to hold the encoded codepoint. A buffer of length four
2508/// is large enough to encode any `char`.
2509///
2510/// For a safe version of this function, see the [`encode_utf8_raw`] function.
2511#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
2512#[doc(hidden)]
2513#[inline]
2514pub const unsafe fn encode_utf8_raw_unchecked(code: u32, dst: *mut u8) {
2515    let len = len_utf8(code);
2516    // SAFETY: The caller must guarantee that the buffer pointed to by `dst`
2517    // is at least `len` bytes long.
2518    unsafe {
2519        if len == 1 {
2520            *dst = code as u8;
2521            return;
2522        }
2523
2524        let last1 = (code >> 0 & 0x3F) as u8 | TAG_CONT;
2525        let last2 = (code >> 6 & 0x3F) as u8 | TAG_CONT;
2526        let last3 = (code >> 12 & 0x3F) as u8 | TAG_CONT;
2527        let last4 = (code >> 18 & 0x3F) as u8 | TAG_FOUR_B;
2528
2529        if len == 2 {
2530            *dst = last2 | TAG_TWO_B;
2531            *dst.add(1) = last1;
2532            return;
2533        }
2534
2535        if len == 3 {
2536            *dst = last3 | TAG_THREE_B;
2537            *dst.add(1) = last2;
2538            *dst.add(2) = last1;
2539            return;
2540        }
2541
2542        *dst = last4;
2543        *dst.add(1) = last3;
2544        *dst.add(2) = last2;
2545        *dst.add(3) = last1;
2546    }
2547}
2548
2549/// Encodes a raw `u32` value as native endian UTF-16 into the provided `u16` buffer,
2550/// and then returns the subslice of the buffer that contains the encoded character.
2551///
2552/// Unlike `char::encode_utf16`, this method also handles codepoints in the surrogate range.
2553/// (Creating a `char` in the surrogate range is UB.)
2554///
2555/// # Panics
2556///
2557/// Panics if the buffer is not large enough.
2558/// A buffer of length 2 is large enough to encode any `char`.
2559#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
2560#[doc(hidden)]
2561#[inline]
2562pub const fn encode_utf16_raw(mut code: u32, dst: &mut [u16]) -> &mut [u16] {
2563    let len = len_utf16(code);
2564    match (len, &mut *dst) {
2565        (1, [a, ..]) => {
2566            *a = code as u16;
2567        }
2568        (2, [a, b, ..]) => {
2569            code -= 0x1_0000;
2570            *a = (code >> 10) as u16 | 0xD800;
2571            *b = (code & 0x3FF) as u16 | 0xDC00;
2572        }
2573        _ => {
2574            const_panic!(
2575                "encode_utf16: buffer does not have enough bytes to encode code point",
2576                "encode_utf16: need {len} bytes to encode U+{code:04X} but buffer has just {dst_len}",
2577                code: u32 = code,
2578                len: usize = len,
2579                dst_len: usize = dst.len(),
2580            )
2581        }
2582    };
2583    // SAFETY: `<&mut [u16]>::as_mut_ptr` is guaranteed to return a valid pointer and `len` has been tested to be within bounds.
2584    unsafe { slice::from_raw_parts_mut(dst.as_mut_ptr(), len) }
2585}