File: benchmark_utf16.cpp

package info (click to toggle)
emscripten 3.1.69%2Bdfsg-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 121,872 kB
  • sloc: ansic: 636,110; cpp: 425,974; javascript: 78,401; python: 58,404; sh: 49,154; pascal: 5,237; makefile: 3,365; asm: 2,415; lisp: 1,869
file content (69 lines) | stat: -rw-r--r-- 2,150 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
// Copyright 2016 The Emscripten Authors.  All rights reserved.
// Emscripten is available under two separate licenses, the MIT license and the
// University of Illinois/NCSA Open Source License.  Both these licenses can be
// found in the LICENSE file.

#include <stdio.h>
#include <string.h>
#include <wchar.h>
#include <iostream>
#include <cassert>
#include <emscripten.h>

EM_JS_DEPS(deps, "$UTF16ToString");

double test(const unsigned short *str) {
  double res = EM_ASM_DOUBLE({
    var t0 = _emscripten_get_now();
    var str = UTF16ToString($0);
    var t1 = _emscripten_get_now();
    out('t: ' + (t1 - t0) + ', len(result): ' + str.length + ', result: ' + str.slice(0, 100));
    return (t1-t0);
  }, str);
  return res;
}

unsigned short *utf16_corpus = 0;
long utf16_corpus_length = 0;

unsigned short *randomString(int len) {
  if (!utf16_corpus) {
//    FILE *handle = fopen("ascii_corpus.txt", "rb");
    FILE *handle = fopen("utf16_corpus.txt", "rb");
    fseek(handle, 0, SEEK_END);
    utf16_corpus_length = ftell(handle)/2;
    assert(utf16_corpus_length > 0);
    utf16_corpus = new unsigned short[utf16_corpus_length+1];
    fseek(handle, 0, SEEK_SET);
    fread(utf16_corpus, 2, utf16_corpus_length, handle);
    fclose(handle);
    utf16_corpus[utf16_corpus_length] = 0;
  }
  int startIdx = rand() % (utf16_corpus_length - len);
  while((utf16_corpus[startIdx] & 0xFF00) == 0xDC00) {
    ++startIdx;
    if (startIdx + len > utf16_corpus_length) len = utf16_corpus_length - startIdx;
  }
  assert(len > 0);
  unsigned short *s = new unsigned short[len+1];
  memcpy(s, utf16_corpus + startIdx, len*2);
  s[len] = 0;
  while(((unsigned short)s[len-1] & 0xFF00) == 0xD800) { s[--len] = 0; }
  assert(len >= 0);
  return s;
}

int main() {
  srand(time(NULL));
  double t = 0;
  double t2 = emscripten_get_now();
  for(int i = 0; i < 10; ++i) {
    // FF Nightly: Already on small strings of 64 bytes in length, TextDecoder trumps in performance.
    unsigned short *str = randomString(100);
    t += test(str);
    delete [] str;
  }
  double t3 = emscripten_get_now();
  printf("OK. Time: %f (%f).\n", t, t3-t2);
  return 0;
}