File: benchmark_utf8.cpp

package info (click to toggle)
emscripten 2.0.12~dfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 108,440 kB
  • sloc: ansic: 510,324; cpp: 384,763; javascript: 84,341; python: 51,362; sh: 50,019; pascal: 4,159; makefile: 3,409; asm: 2,150; lisp: 1,869; ruby: 488; cs: 142
file content (68 lines) | stat: -rw-r--r-- 2,118 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
// Copyright 2016 The Emscripten Authors.  All rights reserved.
// Emscripten is available under two separate licenses, the MIT license and the
// University of Illinois/NCSA Open Source License.  Both these licenses can be
// found in the LICENSE file.

#include <stdio.h>
#include <string.h>
#include <wchar.h>
#include <iostream>
#include <cassert>
#include <emscripten.h>

double test(const char *str) {
  double res = EM_ASM_DOUBLE({
    var t0 = _emscripten_get_now();
    var str = Module.UTF8ToString($0);
    var t1 = _emscripten_get_now();
//    out('t: ' + (t1 - t0) + ', len(result): ' + str.length + ', result: ' + str.slice(0, 100));
    return (t1-t0);
  }, str);
  return res;
}

char *utf8_corpus = 0;
long utf8_corpus_length = 0;

char *randomString(int len) {
  if (!utf8_corpus) {
//    FILE *handle = fopen("ascii_corpus.txt", "rb");
    FILE *handle = fopen("utf8_corpus.txt", "rb");
    fseek(handle, 0, SEEK_END);
    utf8_corpus_length = ftell(handle);
    assert(utf8_corpus_length > 0);
    utf8_corpus = new char[utf8_corpus_length+1];
    fseek(handle, 0, SEEK_SET);
    fread(utf8_corpus, 1, utf8_corpus_length, handle);
    fclose(handle);
    utf8_corpus[utf8_corpus_length] = '\0';
  }
  int startIdx = rand() % (utf8_corpus_length - len);
  while(((unsigned char)utf8_corpus[startIdx] & 0xC0) == 0x80) {
    ++startIdx;
    if (startIdx + len > utf8_corpus_length) len = utf8_corpus_length - startIdx;
  }
  assert(len > 0);
  char *s = new char[len+1];
  memcpy(s, utf8_corpus + startIdx, len);
  s[len] = '\0';
  while(((unsigned char)s[len-1] & 0xC0) == 0x80) { s[--len] = '\0'; }
  while(((unsigned char)s[len-1] & 0xC0) == 0xC0) { s[--len] = '\0'; }
  assert(len >= 0);
  return s;
}

int main() {
  srand(time(NULL));
  double t = 0;
  double t2 = emscripten_get_now();
  for(int i = 0; i < 100000; ++i) {
    // FF Nightly: Already on small strings of 64 bytes in length, TextDecoder trumps in performance.
    char *str = randomString(8);
    t += test(str);
    delete [] str;
  }
  double t3 = emscripten_get_now();
  printf("OK. Time: %f (%f).\n", t, t3-t2);
  return 0;
}