File: __nsstring.rs

package info (click to toggle)
rust-coreutils 0.7.0-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 505,620 kB
  • sloc: ansic: 103,594; asm: 28,570; sh: 8,910; python: 5,581; makefile: 472; cpp: 97; javascript: 72
file content (108 lines) | stat: -rw-r--r-- 4,111 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
use core::ffi::c_char;
use core::slice;
use core::str;

use crate::ffi::NSUInteger;
use crate::msg_send;
use crate::rc::AutoreleasePool;
use crate::runtime::NSObject;

// Note: While this is not public, it is still a breaking change to modify,
// since `objc2-foundation` relies on it.
#[cfg(not(feature = "gnustep-1-7"))]
pub const UTF8_ENCODING: usize = 4;
#[cfg(feature = "gnustep-1-7")]
pub const UTF8_ENCODING: i32 = 4;

/// The number of UTF-8 code units in the given string.
///
/// # Safety
///
/// The object must be an instance of `NSString`.
//
// Note: While this is not public, it is still a breaking change to modify,
// since `objc2-foundation` relies on it.
#[inline]
pub unsafe fn nsstring_len(obj: &NSObject) -> NSUInteger {
    unsafe { msg_send![obj, lengthOfBytesUsingEncoding: UTF8_ENCODING] }
}

/// Extract a [`str`](`prim@str`) representation out of the given NSString.
///
/// Uses [`UTF8String`] under the hood.
///
/// [`UTF8String`]: https://developer.apple.com/documentation/foundation/nsstring/1411189-utf8string?language=objc
///
///
/// # Safety
///
/// - The object must be an instance of `NSString`.
/// - The returned string must not be moved outside the autorelease pool into
///   which it (may) have been released.
///
/// Furthermore, the object must not, as is always the case for strings, be
/// mutated in parallel.
//
// Note: While this is not public, it is still a breaking change to modify,
// since `objc2-foundation` relies on it.
pub unsafe fn nsstring_to_str<'r, 's: 'r, 'p: 'r>(
    obj: &'s NSObject,
    pool: AutoreleasePool<'p>,
) -> &'r str {
    // This is necessary until `auto` types stabilizes.
    pool.__verify_is_inner();

    // The documentation on `UTF8String` is quite sparse, but with educated
    // guesses, testing, reading the code for `CFString` and a bit of
    // reverse-engineering, we've determined that `NSString` stores a pointer
    // to the string data, sometimes with an UTF-8 encoding (usual for ascii
    // data), sometimes in other encodings (often UTF-16).
    //
    // `UTF8String` then checks the internal encoding:
    // - If the data is UTF-8 encoded, and (since macOS 10.6) if the string is
    //   immutable, it returns the internal pointer using
    //   `CFStringGetCStringPtr`.
    // - Otherwise, if the data is in another encoding or is mutable, it
    //   creates a new allocation, writes the UTF-8 representation of the
    //   string into it, autoreleases the allocation, and returns a pointer to
    //   it (similar to `CFStringGetCString`).
    //
    // If the string is a tagged pointer, or a custom subclass, then another
    // code-path is taken that always creates a new allocation and copies the
    // string into that using (effectively) `length` and `characterAtIndex:`.
    //
    // As a result, the lifetime of the returned pointer is either the same as
    // the passed-in `NSString` OR the lifetime of the current / innermost
    // `@autoreleasepool`.
    //
    // Furthermore, we can allow creating a `&str` from `&obj`, even if the
    // string is originally a `NSMutableString` which may be mutated later on,
    // since in that case the lifetime will be tied to the pool and not the
    // string.
    let bytes: *const c_char = unsafe { msg_send![obj, UTF8String] };
    let bytes: *const u8 = bytes.cast();

    // SAFETY: Caller ensures that the object is an instance of `NSString`.
    let len = unsafe { nsstring_len(obj) };

    // SAFETY:
    // The held AutoreleasePool is the innermost, and the reference is
    // constrained both by the pool and the NSString.
    //
    // `len` is the length of the string in the UTF-8 encoding.
    //
    // `bytes` is a null-terminated C string (with length = len + 1), so
    // it is never a NULL pointer.
    let bytes: &'r [u8] = unsafe { slice::from_raw_parts(bytes, len) };

    // SAFETY: The bytes are valid UTF-8.
    #[cfg(not(debug_assertions))]
    unsafe {
        str::from_utf8_unchecked(bytes)
    }

    #[cfg(debug_assertions)]
    {
        str::from_utf8(bytes).expect("invalid UTF-8 in NSString")
    }
}