File: validation.cc

package info (click to toggle)
node-websocket 1.0.28-3
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 564 kB
  • sloc: cpp: 220; makefile: 31; sh: 1
file content (148 lines) | stat: -rw-r--r-- 4,436 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
/*!
 * UTF-8 Validation Code originally from:
 * ws: a node.js websocket client
 * Copyright(c) 2015 Einar Otto Stangvik <einaros@gmail.com>
 * MIT Licensed
 */

#include <v8.h>
#include <node.h>
#include <node_version.h>
#include <node_buffer.h>
#include <node_object_wrap.h>
#include <stdlib.h>
#include <wchar.h>
#include <stdio.h>
#include "nan.h"

using namespace v8;
using namespace node;

#define UNI_SUR_HIGH_START   (uint32_t) 0xD800
#define UNI_SUR_LOW_END    (uint32_t) 0xDFFF
#define UNI_REPLACEMENT_CHAR (uint32_t) 0x0000FFFD
#define UNI_MAX_LEGAL_UTF32  (uint32_t) 0x0010FFFF

static const uint8_t trailingBytesForUTF8[256] = {
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
};

static const uint32_t offsetsFromUTF8[6] = {
  0x00000000, 0x00003080, 0x000E2080,
  0x03C82080, 0xFA082080, 0x82082080
};

static int isLegalUTF8(const uint8_t *source, const int length)
{
  uint8_t a;
  const uint8_t *srcptr = source+length;
  switch (length) {
  default: return 0;
  /* Everything else falls through when "true"... */
  /* RFC3629 makes 5 & 6 bytes UTF-8 illegal
  case 6: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0;
  case 5: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0; */
  case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0;
  case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0;
  case 2: if ((a = (*--srcptr)) > 0xBF) return 0;
    switch (*source) {
      /* no fall-through in this inner switch */
      case 0xE0: if (a < 0xA0) return 0; break;
      case 0xED: if (a > 0x9F) return 0; break;
      case 0xF0: if (a < 0x90) return 0; break;
      case 0xF4: if (a > 0x8F) return 0; break;
      default:   if (a < 0x80) return 0;
    }

  case 1: if (*source >= 0x80 && *source < 0xC2) return 0;
  }
  if (*source > 0xF4) return 0;
  return 1;
}

int is_valid_utf8 (size_t len, char *value)
{
  /* is the string valid UTF-8? */
  for (unsigned int i = 0; i < len; i++) {
    uint32_t ch = 0;
    uint8_t  extrabytes = trailingBytesForUTF8[(uint8_t) value[i]];

    if (extrabytes + i >= len)
      return 0;

    if (isLegalUTF8 ((uint8_t *) (value + i), extrabytes + 1) == 0) return 0;

    switch (extrabytes) {
      case 5 : ch += (uint8_t) value[i++]; ch <<= 6;
      case 4 : ch += (uint8_t) value[i++]; ch <<= 6;
      case 3 : ch += (uint8_t) value[i++]; ch <<= 6;
      case 2 : ch += (uint8_t) value[i++]; ch <<= 6;
      case 1 : ch += (uint8_t) value[i++]; ch <<= 6;
      case 0 : ch += (uint8_t) value[i];
    }

    ch -= offsetsFromUTF8[extrabytes];

    if (ch <= UNI_MAX_LEGAL_UTF32) {
      if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END)
        return 0;
    } else {
      return 0;
    }
  }

  return 1;
}

class Validation : public ObjectWrap
{
public:

  static void Initialize(v8::Handle<v8::Object> target)
  {
    Nan::HandleScope scope;
    Local<FunctionTemplate> t = Nan::New<FunctionTemplate>(New);
    t->InstanceTemplate()->SetInternalFieldCount(1);
    Nan::SetMethod(t, "isValidUTF8", Validation::IsValidUTF8);
    Nan::Set(target, Nan::New<String>("Validation").ToLocalChecked(), t->GetFunction());
  }

protected:

  static NAN_METHOD(New)
  {
    Nan::HandleScope scope;
    Validation* validation = new Validation();
    validation->Wrap(info.This());
    info.GetReturnValue().Set(info.This());
  }

  static NAN_METHOD(IsValidUTF8)
  {
    Nan::HandleScope scope;
    if (!Buffer::HasInstance(info[0])) {
      return Nan::ThrowTypeError("First argument needs to be a buffer");
    }
    Local<Object> buffer_obj = info[0]->ToObject();
    char *buffer_data = Buffer::Data(buffer_obj);
    size_t buffer_length = Buffer::Length(buffer_obj);
    info.GetReturnValue().Set(is_valid_utf8(buffer_length, buffer_data) == 1 ? Nan::True() : Nan::False());
  }
};
#if !NODE_VERSION_AT_LEAST(0,10,0)
extern "C"
#endif
void init (Handle<Object> target)
{
  Nan::HandleScope scope;
  Validation::Initialize(target);
}

NODE_MODULE(validation, init)