1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395
|
/*
* utf.h
*
* This file is a part of NSIS.
*
* Copyright (C) 2011-2020 Anders Kjersem
*
* Licensed under the zlib/libpng license (the "License");
* you may not use this file except in compliance with the License.
*
* Licence details can be found in the file COPYING.
*
* This software is provided 'as-is', without any express or implied
* warranty.
*
*/
#ifndef NSIS_UTF_H
#define NSIS_UTF_H
#include "Platform.h"
const WORD UNICODE_REPLACEMENT_CHARACTER = 0xfffd;
#define TSTR_INPUTCHARSET _T("ACP|OEM|CP#|UTF8|UTF16<LE|BE>")
#define TSTR_OUTPUTCHARSET _T("ACP|OEM|CP#|UTF8[SIG]|UTF16<LE|BE>[BOM]")
template<typename T> T S7ChLwr(T c) { return c>='A' && c<='Z' ? (T)(c|32) : c; }
template<typename T> T S7ChUpr(T c) { return c>='a' && c<='z' ? (T)(c-'a'+'A') : c; }
template<typename T> bool S7IsChEqualI(char ch,T cmp)
{
return S7ChLwr((T)ch) == S7ChLwr(cmp);
}
inline bool IsValidUnicodeCodePoint(UINT32 c,bool StrictUTF32=false)
{
// Unicode 6.1: 16.7 Noncharacters
if ((c&0xfffe) == 0xfffe) return false; // ..FFFE & ..FFFF is reserved in each plane
if (c >= 0xfdd0 && c <= 0xfdef) return false; // Reserved in BMP
if (StrictUTF32 && c > 0x10ffff) return false;
return true;
}
inline bool IsLeadSurrogateUTF16(unsigned short c) { return c >= 0xd800 && c <= 0xdbff; }
inline bool IsTrailSurrogateUTF16(unsigned short c) { return c >= 0xdc00 && c <= 0xdfff; }
inline UINT32 CodePointFromUTF16SurrogatePair(unsigned short lea,unsigned short tra)
{
const UINT32 surrogate_offset = 0x10000 - (0xD800 << 10) - 0xDC00;
return ((UINT32)lea << 10) + tra + surrogate_offset;
}
inline bool UTF8_GetTrailCount(unsigned char chFirst, unsigned char &cb)
{
// This function should only be used to get a rough idea of how large the encoded
// codepoint is, just because it returns true does not mean that it is valid UTF-8!
cb = 0;
if (0xC0 == (0xC0 & chFirst))
{
++cb;
if (0xE0 == (0xE0 & chFirst))
{
++cb;
if (0xF0 == (0xF0 & chFirst))
{
++cb;
if (0xF8 == (0xF8 & chFirst))
{
++cb;
if (0xFC == (0xFE & chFirst)) ++cb; else return 0xFC >= (0xFE & chFirst);
}
}
}
return true;
}
return (signed char)chFirst >= 0;
}
inline UINT InlineStrLenUTF16(const void*str)
{
unsigned short *p = (unsigned short *) str;
UINT cch = 0;
for(;p[cch];) ++cch;
return cch;
}
#ifdef MAKENSIS
#include <stdlib.h>
#include <stdio.h>
#include "tstring.h"
#include "util.h" // iconvdescriptor & my_fopen
#ifdef _WIN32
#include <io.h> // For _setmode
#include <fcntl.h> // For _O_BINARY
#endif
FILE* my_fopen(const TCHAR *path, const char *mode); // from util.h
void UTF16InplaceEndianSwap(void*Buffer, UINT cch);
UINT StrLenUTF16(const void*str);
bool StrSetUTF16LE(tstring&dest, const void*src);
UINT WCFromCodePoint(wchar_t*Dest,UINT cchDest,UINT32 CodPt);
#define DWCFBF_ALLOWOPTIMIZEDRETURN 0x80000000 // DupWCFromBytes can return input Buffer
wchar_t* DupWCFromBytes(void*Buffer,UINT cbBuffer,UINT32 SrcCP);
UINT DetectUTFBOM(void*Buffer,UINT cb);
UINT DetectUTFBOM(FILE*strm);
WORD GetEncodingFromString(const TCHAR*s, bool&BOM);
WORD GetEncodingFromString(const TCHAR*s);
class CharEncConv {
char *m_Result;
WORD m_TE, m_FE;
#ifdef _WIN32
bool m_AllowOptimizedReturn; // Can Convert() return Src buffer?
bool m_OptimizedReturn;
#else
iconvdescriptor m_iconvd;
#endif
protected:
size_t GuessOutputSize(size_t cbConverted);
static bool IsWE(WORD Encoding) { return (WORD)-1 == Encoding; }
static bool IsWE(UINT32 Encoding) { return (UINT32)-1 == Encoding; }
public:
CharEncConv() : m_Result(0) { SetAllowOptimizedReturn(false); }
~CharEncConv() { Close(); }
void Close()
{
#ifdef _WIN32
if (!m_OptimizedReturn)
#endif
free(m_Result);
m_Result = 0;
#ifndef _WIN32
m_iconvd.Close();
#endif
}
void* Detach() { void *p = m_Result; m_Result = 0; return p; }
bool Initialize(UINT32 ToEnc, UINT32 FromEnc);
void* Convert(const void*Src, size_t cbSrc = -1, size_t*cbOut = 0);
#ifdef _WIN32
void SetAllowOptimizedReturn(bool val = true) { m_AllowOptimizedReturn = val; }
#else
void SetAllowOptimizedReturn(bool val = false) {}
#endif
static BOOL IsValidCodePage(UINT cp);
};
class WCToUTF16LEHlpr {
unsigned short* m_s;
public:
WCToUTF16LEHlpr() : m_s(0) {}
bool Create(const TCHAR*in, unsigned int codepage = CP_ACP)
#if !defined(_WIN32) || !defined(_UNICODE)
;
#else
{
m_s = (unsigned short*) in;
return true;
}
#endif
void Destroy()
{
#if !defined(_WIN32) || !defined(_UNICODE)
free(m_s);
#endif
}
const unsigned short* Get() const { return m_s; }
UINT GetLen() const { return StrLenUTF16(m_s); }
UINT GetSize() const { return (GetLen()+1) * 2; }
unsigned short* Detach() { unsigned short *r = m_s; m_s = 0; return r; }
void CopyTo(unsigned short*Dest) const { memcpy(Dest, Get(), GetSize()); }
};
class NStreamEncoding {
protected:
WORD m_cp;
public:
enum {
ACP = CP_ACP,
OEMCP = 1,
UTF16LE = 1200,
UTF16BE = 1201,
UTF32LE = 12000,
UTF32BE = 12001,
UTF8 = CP_UTF8,
UNKNOWN = (0xffff-0),
AUTO = (0xffff-1),
CPCOUNT = (0xffff-2) // Must be less than our other magic numbers
};
NStreamEncoding() { Reset(); }
NStreamEncoding(WORD cp) { Reset(), SetCodepage(cp); }
WORD GetCodepage() const { return m_cp; }
void SetCodepage(WORD cp) { m_cp = cp; }
void SafeSetCodepage(WORD cp)
{
if (NStreamEncoding::AUTO==cp) cp = GetPlatformDefaultCodepage();
SetCodepage(cp);
}
void Reset() { SetCodepage(GetPlatformDefaultCodepage()); }
WORD GetPlatformDefaultCodepage() const
{
#ifdef _WIN32
return ACP;
#else
return UTF8;
#endif
}
bool IsUTF8() const { return UTF8==GetCodepage(); }
bool IsUTF16() const { return (UTF16LE|1)==(GetCodepage()|1); }
bool IsUTF16LE() const { return UTF16LE==GetCodepage(); }
bool IsUTF16BE() const { return UTF16BE==GetCodepage(); }
bool IsUnicode() const { return IsUnicodeCodepage(GetCodepage()); }
void GetCPDisplayName(TCHAR*Buf) { GetCPDisplayName(m_cp, Buf); }
static UINT GetCodeUnitSize(WORD cp)
{
if ((UTF16LE|1)==(cp|1)) return 2;
if ((UTF32LE|1)==(cp|1)) return 4;
return 1;
}
static bool IsUnicodeCodepage(WORD cp)
{
return UTF8==cp || (UTF16LE|1)==(cp|1) || (UTF32LE|1)==(cp|1);
}
static void GetCPDisplayName(WORD CP, TCHAR*Buf);
};
class NStream {
public:
enum {
OK = 0,
ERR_BUFFEROVERFLOW,
ERR_IOERROR,
ERR_INVALIDENCODING,
ERR_UNSUPPORTEDENCODING,
};
static bool IsNewline(wchar_t chW, bool HandleUnicodeNL)
{
if (L'\n'==chW || L'\r'==chW) return true;
if (HandleUnicodeNL)
{
// www.unicode.org/standard/reports/tr13/tr13-5.html#UNICODE NEWLINE GUIDELINES
if (L'\f'==chW) return true; // FF/Form Feed
if (L'\v'==chW) return true; // VT/Vertical Tab
// NOTIMPLEMENTED: NEL/Next Line/U+0085
// NOTIMPLEMENTED: LS/Line Separator/U+2028
// NOTIMPLEMENTED: PS/Paragraph Separator/U+2029
}
return false;
}
#ifdef _WIN32
static bool SetBinaryMode(int fd) { return -1 != _setmode(fd, _O_BINARY); }
static bool SetBinaryMode(FILE*f) { return SetBinaryMode(_fileno(f)); }
#else
static bool SetBinaryMode(int fd) { return true; }
static bool SetBinaryMode(FILE*f) { return true; }
#endif
};
class NBaseStream {
protected:
FILE* m_hFile;
NStreamEncoding m_Enc;
public:
NBaseStream(FILE *hFile = 0) : m_hFile(hFile) {}
~NBaseStream() { Close(); }
FILE* GetHandle() const { return m_hFile; }
NStreamEncoding& StreamEncoding() { return m_Enc; }
bool IsEOF() const { return feof(m_hFile) != 0; }
bool IsError() const { return ferror(m_hFile) != 0; }
bool IsUnicode() const { return m_Enc.IsUnicode(); }
void Close()
{
FILE*hF = Detach();
if (hF) fclose(hF);
}
bool OpenFileForReading(const TCHAR* Path, WORD enc = NStreamEncoding::AUTO)
{
return Attach(my_fopen(Path, "rb"), enc);
}
bool OpenFileForReading(const TCHAR* Path, NStreamEncoding&Enc)
{
return OpenFileForReading(Path, Enc.GetCodepage());
}
FILE* Detach()
{
FILE *hFile = m_hFile;
m_hFile = 0;
return hFile;
}
bool Attach(FILE*hFile, WORD enc, bool Seek = true);
UINT ReadOctets(void*Buffer, UINT cbBuf)
{
size_t cb = fread(Buffer, 1, cbBuf, m_hFile);
return (UINT) cb;
}
bool ReadOctets(void*Buffer, UINT*pcbBuf)
{
UINT cbReq = *pcbBuf, cb = ReadOctets(Buffer, cbReq);
*pcbBuf = cb;
return cbReq == cb;
}
bool ReadOctet(void*Buffer) { return 1 == ReadOctets(Buffer, 1); }
bool ReadInt16(void*Buffer) { return 2 == ReadOctets(Buffer, 2); }
};
class NIStream : public NBaseStream {
public:
bool OpenStdIn(WORD enc = NStreamEncoding::AUTO)
{
return Attach(stdin, enc, false);
}
bool OpenStdIn(NStreamEncoding&Enc)
{
return OpenStdIn(Enc.GetCodepage());
}
};
class NOStream : public NBaseStream {
public:
NOStream(FILE *hFile = 0) : NBaseStream(hFile) {}
bool CreateFileForWriting(const TCHAR* Path, WORD enc = NStreamEncoding::AUTO)
{
return Attach(my_fopen(Path, "w+b"), enc);
}
bool CreateFileForWriting(const TCHAR* Path, NStreamEncoding&Enc)
{
return CreateFileForWriting(Path, Enc.GetCodepage());
}
bool CreateFileForAppending(const TCHAR* Path, WORD enc = NStreamEncoding::AUTO)
{
return Attach(my_fopen(Path, "a+b"), enc);
}
bool CreateFileForAppending(const TCHAR* Path, NStreamEncoding&Enc)
{
return CreateFileForAppending(Path, Enc.GetCodepage());
}
bool WriteOctets(void*Buffer, size_t cbBuf)
{
return cbBuf == fwrite(Buffer, 1, cbBuf, m_hFile);
}
bool WriteBOM(NStreamEncoding&Enc)
{
static const unsigned char u8b[] = {0xEF,0xBB,0xBF};
static const unsigned char u16lb[] = {0xFF,0xFE}, u16bb[] = {0xFE,0xFF};
switch(Enc.GetCodepage())
{
case NStreamEncoding::UTF8: return WriteOctets((void*) u8b, sizeof(u8b));
case NStreamEncoding::UTF16LE: return WriteOctets((void*) u16lb, sizeof(u16lb));
case NStreamEncoding::UTF16BE: return WriteOctets((void*) u16bb, sizeof(u16bb));
}
return false;
}
bool WriteString(const wchar_t*Str, size_t cch = -1);
bool WritePlatformNLString(const wchar_t*Str, size_t cch = -1);
};
class NStreamLineReader {
protected:
NIStream &m_Strm;
wchar_t m_PrevNL;
public:
NStreamLineReader(NIStream &Strm) : m_Strm(Strm), m_PrevNL(0) {}
NIStream& GetStream() { return m_Strm; }
NStreamEncoding& StreamEncoding() { return m_Strm.StreamEncoding(); }
bool IsEOF() const { return m_Strm.IsEOF(); }
bool IsUnicode() const { return m_Strm.IsUnicode(); }
UINT ReadLine(wchar_t*Buffer, UINT cchBuf);
tstring GetErrorMessage(UINT Error, const TCHAR*Filename=0, UINT Line=0);
protected:
bool CompleteLine(wchar_t*&BufWC, UINT cchWC, UINT&cchRemain, bool HandleUnicodeNL)
{
const wchar_t chW = *BufWC;
BufWC += cchWC, cchRemain -= cchWC;
if (0 == --cchWC) // We only care about code points that fit in a single wchar_t
return NStream::IsNewline(chW, HandleUnicodeNL);
return false;
}
};
#endif // MAKENSIS
#endif // NSIS_UTF_H
|