File: tokenizer_test.js

package info (click to toggle)
node-lunr 2.3.5~dfsg-7
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 584 kB
  • sloc: makefile: 84; sh: 67
file content (102 lines) | stat: -rw-r--r-- 3,143 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
suite('lunr.tokenizer', function () {
  var toString = function (o) { return o.toString() }

  test('splitting into tokens', function () {
    var tokens = lunr.tokenizer('foo bar baz')
      .map(toString)

    assert.sameMembers(['foo', 'bar', 'baz'], tokens)
  })

  test('downcases tokens', function () {
    var tokens = lunr.tokenizer('Foo BAR BAZ')
      .map(toString)

    assert.sameMembers(['foo', 'bar', 'baz'], tokens)
  })

  test('array of strings', function () {
    var tokens = lunr.tokenizer(['foo', 'bar', 'baz'])
      .map(toString)

    assert.sameMembers(['foo', 'bar', 'baz'], tokens)
  })

  test('undefined is converted to empty string', function () {
    var tokens = lunr.tokenizer(['foo', undefined, 'baz'])
      .map(toString)

    assert.sameMembers(['foo', '', 'baz'], tokens)
  })

  test('null is converted to empty string', function () {
    var tokens = lunr.tokenizer(['foo', null, 'baz'])
      .map(toString)

    assert.sameMembers(['foo', '', 'baz'], tokens)
  })

  test('multiple white space is stripped', function () {
    var tokens = lunr.tokenizer('   foo    bar   baz  ')
      .map(toString)

    assert.sameMembers(['foo', 'bar', 'baz'], tokens)
  })

  test('handling null-like arguments', function () {
    assert.lengthOf(lunr.tokenizer(), 0)
    assert.lengthOf(lunr.tokenizer(undefined), 0)
    assert.lengthOf(lunr.tokenizer(null), 0)
  })

  test('converting a date to tokens', function () {
    var date = new Date(Date.UTC(2013, 0, 1, 12))

    // NOTE: slicing here to prevent asserting on parts
    // of the date that might be affected by the timezone
    // the test is running in.
    assert.sameMembers(['tue', 'jan', '01', '2013'], lunr.tokenizer(date).slice(0, 4).map(toString))
  })

  test('converting a number to tokens', function () {
    assert.equal('41', lunr.tokenizer(41).map(toString))
  })

  test('converting a boolean to tokens', function () {
    assert.equal('false', lunr.tokenizer(false).map(toString))
  })

  test('converting an object to tokens', function () {
    var obj = {
      toString: function () { return 'custom object' }
    }

    assert.sameMembers(lunr.tokenizer(obj).map(toString), ['custom', 'object'])
  })

  test('splits strings with hyphens', function () {
    assert.sameMembers(lunr.tokenizer('foo-bar').map(toString), ['foo', 'bar'])
  })

  test('splits strings with hyphens and spaces', function () {
    assert.sameMembers(lunr.tokenizer('foo - bar').map(toString), ['foo', 'bar'])
  })

  test('tracking the token index', function () {
    var tokens = lunr.tokenizer('foo bar')
    assert.equal(tokens[0].metadata.index, 0)
    assert.equal(tokens[1].metadata.index, 1)
  })

  test('tracking the token position', function () {
    var tokens = lunr.tokenizer('foo bar')
    assert.deepEqual(tokens[0].metadata.position, [0, 3])
    assert.deepEqual(tokens[1].metadata.position, [4, 3])
  })

  test('providing additional metadata', function () {
    var tokens = lunr.tokenizer('foo bar', { 'hurp': 'durp' })
    assert.deepEqual(tokens[0].metadata.hurp, 'durp')
    assert.deepEqual(tokens[1].metadata.hurp, 'durp')
  })
})