1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148
|
/*!
* UTF-8 Validation Code originally from:
* ws: a node.js websocket client
* Copyright(c) 2015 Einar Otto Stangvik <einaros@gmail.com>
* MIT Licensed
*/
#include <v8.h>
#include <node.h>
#include <node_version.h>
#include <node_buffer.h>
#include <node_object_wrap.h>
#include <stdlib.h>
#include <wchar.h>
#include <stdio.h>
#include "nan.h"
using namespace v8;
using namespace node;
#define UNI_SUR_HIGH_START (uint32_t) 0xD800
#define UNI_SUR_LOW_END (uint32_t) 0xDFFF
#define UNI_REPLACEMENT_CHAR (uint32_t) 0x0000FFFD
#define UNI_MAX_LEGAL_UTF32 (uint32_t) 0x0010FFFF
static const uint8_t trailingBytesForUTF8[256] = {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
};
static const uint32_t offsetsFromUTF8[6] = {
0x00000000, 0x00003080, 0x000E2080,
0x03C82080, 0xFA082080, 0x82082080
};
static int isLegalUTF8(const uint8_t *source, const int length)
{
uint8_t a;
const uint8_t *srcptr = source+length;
switch (length) {
default: return 0;
/* Everything else falls through when "true"... */
/* RFC3629 makes 5 & 6 bytes UTF-8 illegal
case 6: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0;
case 5: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0; */
case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0;
case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0;
case 2: if ((a = (*--srcptr)) > 0xBF) return 0;
switch (*source) {
/* no fall-through in this inner switch */
case 0xE0: if (a < 0xA0) return 0; break;
case 0xED: if (a > 0x9F) return 0; break;
case 0xF0: if (a < 0x90) return 0; break;
case 0xF4: if (a > 0x8F) return 0; break;
default: if (a < 0x80) return 0;
}
case 1: if (*source >= 0x80 && *source < 0xC2) return 0;
}
if (*source > 0xF4) return 0;
return 1;
}
int is_valid_utf8 (size_t len, char *value)
{
/* is the string valid UTF-8? */
for (unsigned int i = 0; i < len; i++) {
uint32_t ch = 0;
uint8_t extrabytes = trailingBytesForUTF8[(uint8_t) value[i]];
if (extrabytes + i >= len)
return 0;
if (isLegalUTF8 ((uint8_t *) (value + i), extrabytes + 1) == 0) return 0;
switch (extrabytes) {
case 5 : ch += (uint8_t) value[i++]; ch <<= 6;
case 4 : ch += (uint8_t) value[i++]; ch <<= 6;
case 3 : ch += (uint8_t) value[i++]; ch <<= 6;
case 2 : ch += (uint8_t) value[i++]; ch <<= 6;
case 1 : ch += (uint8_t) value[i++]; ch <<= 6;
case 0 : ch += (uint8_t) value[i];
}
ch -= offsetsFromUTF8[extrabytes];
if (ch <= UNI_MAX_LEGAL_UTF32) {
if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END)
return 0;
} else {
return 0;
}
}
return 1;
}
class Validation : public ObjectWrap
{
public:
static void Initialize(v8::Handle<v8::Object> target)
{
Nan::HandleScope scope;
Local<FunctionTemplate> t = Nan::New<FunctionTemplate>(New);
t->InstanceTemplate()->SetInternalFieldCount(1);
Nan::SetMethod(t, "isValidUTF8", Validation::IsValidUTF8);
Nan::Set(target, Nan::New<String>("Validation").ToLocalChecked(), t->GetFunction());
}
protected:
static NAN_METHOD(New)
{
Nan::HandleScope scope;
Validation* validation = new Validation();
validation->Wrap(info.This());
info.GetReturnValue().Set(info.This());
}
static NAN_METHOD(IsValidUTF8)
{
Nan::HandleScope scope;
if (!Buffer::HasInstance(info[0])) {
return Nan::ThrowTypeError("First argument needs to be a buffer");
}
Local<Object> buffer_obj = info[0]->ToObject();
char *buffer_data = Buffer::Data(buffer_obj);
size_t buffer_length = Buffer::Length(buffer_obj);
info.GetReturnValue().Set(is_valid_utf8(buffer_length, buffer_data) == 1 ? Nan::True() : Nan::False());
}
};
#if !NODE_VERSION_AT_LEAST(0,10,0)
extern "C"
#endif
void init (Handle<Object> target)
{
Nan::HandleScope scope;
Validation::Initialize(target);
}
NODE_MODULE(validation, init)
|