File: SuffixTree.h

package info (click to toggle)
chromium-browser 57.0.2987.98-1~deb8u1
  • links: PTS, VCS
  • area: main
  • in suites: jessie
  • size: 2,637,852 kB
  • ctags: 2,544,394
  • sloc: cpp: 12,815,961; ansic: 3,676,222; python: 1,147,112; asm: 526,608; java: 523,212; xml: 286,794; perl: 92,654; sh: 86,408; objc: 73,271; makefile: 27,698; cs: 18,487; yacc: 13,031; tcl: 12,957; pascal: 4,875; ml: 4,716; lex: 3,904; sql: 3,862; ruby: 1,982; lisp: 1,508; php: 1,368; exp: 404; awk: 325; csh: 117; jsp: 39; sed: 37
file content (128 lines) | stat: -rw-r--r-- 3,778 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
/*
 * Copyright (C) 2010 Adam Barth. All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#ifndef SuffixTree_h
#define SuffixTree_h

#include "wtf/Allocator.h"
#include "wtf/Noncopyable.h"
#include "wtf/Vector.h"
#include "wtf/text/WTFString.h"

namespace blink {

class UnicodeCodebook {
  STATIC_ONLY(UnicodeCodebook);

 public:
  static int codeWord(UChar c) { return c; }
  enum { codeSize = 1 << 8 * sizeof(UChar) };
};

class ASCIICodebook {
  STATIC_ONLY(ASCIICodebook);

 public:
  static int codeWord(UChar c) { return c & (codeSize - 1); }
  enum { codeSize = 1 << (8 * sizeof(char) - 1) };
};

template <typename Codebook>
class SuffixTree {
  USING_FAST_MALLOC(SuffixTree);
  WTF_MAKE_NONCOPYABLE(SuffixTree);

 public:
  SuffixTree(const String& text, unsigned depth)
      : m_depth(depth), m_leaf(true) {
    build(text);
  }

  bool mightContain(const String& query) {
    Node* current = &m_root;
    int limit = std::min(m_depth, query.length());
    for (int i = 0; i < limit; ++i) {
      current = current->at(Codebook::codeWord(query[i]));
      if (!current)
        return false;
    }
    return true;
  }

 private:
  class Node {
    USING_FAST_MALLOC(Node);
    WTF_MAKE_NONCOPYABLE(Node);

   public:
    Node(bool isLeaf = false) {
      m_children.resize(Codebook::codeSize);
      m_children.fill(0);
      m_isLeaf = isLeaf;
    }

    ~Node() {
      for (unsigned i = 0; i < m_children.size(); ++i) {
        Node* child = m_children.at(i);
        if (child && !child->m_isLeaf)
          delete child;
      }
    }

    Node*& at(int codeWord) { return m_children.at(codeWord); }

   private:
    typedef Vector<Node*, Codebook::codeSize> ChildrenVector;

    ChildrenVector m_children;
    bool m_isLeaf;
  };

  void build(const String& text) {
    for (unsigned base = 0; base < text.length(); ++base) {
      Node* current = &m_root;
      unsigned limit = std::min(base + m_depth, text.length());
      for (unsigned offset = 0; base + offset < limit; ++offset) {
        ASSERT(current != &m_leaf);
        Node*& child = current->at(Codebook::codeWord(text[base + offset]));
        if (!child)
          child = base + offset + 1 == limit ? &m_leaf : new Node();
        current = child;
      }
    }
  }

  Node m_root;
  unsigned m_depth;

  // Instead of allocating a fresh empty leaf node for ever leaf in the tree
  // (there can be a lot of these), we alias all the leaves to this "static"
  // leaf node.
  Node m_leaf;
};

}  // namespace blink

#endif  // SuffixTree_h