File: FileTextEncoding.swift

package info (click to toggle)
swiftlang 6.2.3-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 2,856,264 kB
  • sloc: cpp: 9,995,718; ansic: 2,234,019; asm: 1,092,167; python: 313,940; objc: 82,726; f90: 80,126; lisp: 38,373; pascal: 25,580; sh: 20,378; ml: 5,058; perl: 4,751; makefile: 4,725; awk: 3,535; javascript: 3,018; xml: 918; fortran: 664; cs: 573; ruby: 396
file content (114 lines) | stat: -rw-r--r-- 4,944 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift open source project
//
// Copyright (c) 2025 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See http://swift.org/LICENSE.txt for license information
// See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//

import SWBUtil
public import SWBProtocol

public typealias FileTextEncoding = SWBProtocol.FileTextEncoding

public import struct Foundation.Data
public import class Foundation.NSNumber
public import class Foundation.NSString

#if canImport(Darwin)
import class CoreFoundation.CFString
import var CoreFoundation.kCFStringEncodingInvalidId
import func CoreFoundation.CFStringConvertEncodingToNSStringEncoding
import func CoreFoundation.CFStringConvertEncodingToIANACharSetName
import func CoreFoundation.CFStringConvertIANACharSetNameToEncoding
import func CoreFoundation.CFStringConvertNSStringEncodingToEncoding
#endif

#if canImport(Darwin)
import struct CoreFoundation.ObjCBool
public import struct Foundation.StringEncodingDetectionOptionsKey
#endif

// The naming convention in Foundation is rather unfortunate.
// The explicit-endian encodings do NOT add any BOM, and the non-explicit endian
// encodings (except UTF-8) DO add a BOM based on the host byte order. Be aware.
public extension FileTextEncoding {
    init?(stringEncoding: String.Encoding) {
        #if canImport(Darwin)
        let cfencoding = CFStringConvertNSStringEncodingToEncoding(stringEncoding.rawValue)
        if cfencoding != kCFStringEncodingInvalidId, let name = CFStringConvertEncodingToIANACharSetName(cfencoding).map(String.init) {
            self.init(name)
            return
        }
        #endif
        return nil
    }

    /// Convert the given encoding to an `NSStringEncoding`.
    var stringEncoding: String.Encoding? {
        #if canImport(Darwin)
        let cfencoding = CFStringConvertIANACharSetNameToEncoding(rawValue.asCFString)
        if cfencoding != kCFStringEncodingInvalidId {
            return String.Encoding(rawValue: CFStringConvertEncodingToNSStringEncoding(cfencoding))
        }
        #endif
        return nil
    }

    /// Returns a localized, human-readable name of the encoding.
    var localizedName: String? {
        if let stringEncoding {
            return String.localizedName(of: stringEncoding)
        }
        return nil
    }

    /// Initializes a string from the given byte sequence.
    static func string(from bytes: [UInt8], encoding: FileTextEncoding?) -> (string: String, originalEncoding: FileTextEncoding)? {
        if let encoding {
            guard let string = String(bytes, encoding: encoding) else { return nil }
            return (string, encoding)
        }

        #if canImport(Darwin)
        var convertedString: NSString?
        var usedLossyConversion: ObjCBool = true
        let stringEncoding = String.Encoding(rawValue: NSString.stringEncoding(for: Data(bytes), encodingOptions: [.allowLossyKey: NSNumber(value: false)], convertedString: &convertedString, usedLossyConversion: &usedLossyConversion))
        if let convertedString = convertedString as String?, let discoveredEncoding = FileTextEncoding(stringEncoding: stringEncoding), !usedLossyConversion.boolValue {
            // Always detect ASCII as UTF-8, because we want to prefer Unicode encodings
            return (convertedString, stringEncoding == .ascii ? FileTextEncoding.utf8 : discoveredEncoding)
        }
        #endif

        return nil
    }
}

fileprivate extension String {
    init?<C: RandomAccessCollection>(_ bytes: C, encoding: FileTextEncoding) where C.Index : SignedInteger, C.Element == UInt8 {
        switch encoding {
        case .utf8:
            self.init(decodingBytes: bytes, as: Unicode.UTF8.self)
        case .utf16:
            guard let encoding = [FileTextEncoding]([.utf16be, .utf16le]).first(where: { bytes.starts(with: $0.byteOrderMark) }) else { return nil }
            self.init(bytes.dropFirst(encoding.byteOrderMark.count), encoding: encoding)
        case .utf16be:
            self.init(decodingBytes: bytes, as: Unicode.UTF16.self)
        case .utf16le:
            self.init(decodingBytes: bytes, as: Unicode.UTF16.self, byteSwap: true)
        case .utf32:
            guard let encoding = [FileTextEncoding]([.utf32be, .utf32le]).first(where: { bytes.starts(with: $0.byteOrderMark) }) else { return nil }
            self.init(bytes.dropFirst(encoding.byteOrderMark.count), encoding: encoding)
        case .utf32be:
            self.init(decodingBytes: bytes, as: Unicode.UTF32.self)
        case .utf32le:
            self.init(decodingBytes: bytes, as: Unicode.UTF32.self, byteSwap: true)
        default:
            return nil
        }
    }
}