File: stringtable.cxx

package info (click to toggle)
systemtap 5.1-5
  • links: PTS, VCS
  • area: main
  • in suites: sid, trixie
  • size: 47,964 kB
  • sloc: cpp: 80,838; ansic: 54,757; xml: 49,725; exp: 43,665; sh: 11,527; python: 5,003; perl: 2,252; tcl: 1,312; makefile: 1,006; javascript: 149; lisp: 105; awk: 101; asm: 91; java: 70; sed: 16
file content (176 lines) | stat: -rw-r--r-- 4,679 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
// interned string table
// Copyright (C) 2015 Red Hat Inc.
//
// This file is part of systemtap, and is free software.  You can
// redistribute it and/or modify it under the terms of the GNU General
// Public License (GPL); either version 2, or (at your option) any
// later version.

#include "config.h"
#include "stringtable.h"

#include "stap-probe.h"

#include <string>
#include <cstring>
#include <fstream>
#include <unordered_set>


using namespace std;
#if defined(HAVE_BOOST_UTILITY_STRING_REF_HPP)
using namespace boost;


#if INTERNED_STRING_INSTRUMENT
static bool whitespace_p (char c)
{
  return isspace(c);
}
#endif


#if INTERNED_STRING_CUSTOM_HASH
// A custom hash 
struct stringtable_hash
{
  size_t operator()(const string& c) const {
    const char* b = c.data();
    size_t real_length = c.size();
    const size_t blocksize = 32; // a cache line or two

    // hash the length
    size_t hash = real_length;

    // hash the beginning
    size_t length = real_length;
    if (length > blocksize)
      length = blocksize;
    while (length-- > 0)
      hash = (hash * 131) + *b++;

    // hash the middle
    if (real_length > blocksize * 3)
      {
        length = blocksize; // more likely not to span a cache line
        b = (const char*)c.data() + (real_length/2);
        while (length-- > 0)
          hash = (hash * 131) + *b++;
      }

    // the ends, especially of generated bits, are likely to be } } }
    // \n kinds of similar things

#if INTERNED_STRING_INSTRUMENT
    ofstream f ("/tmp/hash.log", ios::app);
    string s = c.substr(0,32);
    s.erase (remove_if(s.begin(), s.end(), whitespace_p), s.end());
    f << hash << " " << c.length() << " " << s << endl;
    f.close();
#endif
      
    return hash;
  }
};
typedef unordered_set<std::string, stringtable_hash> stringtable_t;
#else
typedef unordered_set<std::string> stringtable_t;
#endif


static char chartable[256];
static stringtable_t stringtable;
// XXX: set a larger initial size?  For reference, a
//
//    probe kernel.function("*") {}
//
// can intern some 450,000 entries.


// Generate a long-lived string_ref for the given input string.  In
// the absence of proper refcounting, memory is kept for the whole
// duration of the systemtap run.  Try to reuse the same string
// object for multiple invocations.  Old string_refs remain valid 
// because std::set<> guarantees iterator validity across inserts,
// which means that our value strings stay put.

// static
interned_string interned_string::intern(const string& value)
{
  if (value.empty())
    return interned_string ();

  if (value.size() == 1)
    return intern(value[0]);

  pair<stringtable_t::iterator,bool> result = stringtable.insert(value);
  PROBE2(stap, intern_string, value.c_str(), result.second);
  stringtable_t::iterator it = result.first; // persistent iterator!
  return string_ref (it->data(), it->length()); // hope for RVO/elision

  // XXX: for future consideration, consider searching the stringtable
  // for instances where 'value' is a substring.  We could string_ref
  // to substrings just fine.  The trouble is that searching the
  // stringtable naively is very timetaking; it saves memory but costs
  // mucho CPU.
}

// static
interned_string interned_string::intern(const char* value)
{
  if (!value || !value[0])
    return interned_string ();

  if (!value[1])
    return intern(value[0]);

  return intern(string(value));
}

// static
interned_string interned_string::intern(char value)
{
  if (!value)
    return interned_string ();

  size_t i = (unsigned char) value;
  if (!chartable[i]) // lazy init
    chartable[i] = value;

  return string_ref (&chartable[i], 1);
}

#if INTERNED_STRING_FIND_MEMMEM
size_t interned_string::find (const boost::string_ref& f) const
{
  const char *ptr = (const char*) memmem (this->data(), this->size(),
                                          f.data(), f.size());
  if (ptr)
    return (ptr - this->data());
  else
    return boost::string_ref::npos;
}

size_t interned_string::find (const std::string& f) const
{
  const char *ptr = (const char*) memmem (this->data(), this->size(),
                                          f.data(), f.size());
  if (ptr)
    return (ptr - this->data());
  else
    return boost::string_ref::npos;
}

size_t interned_string::find (const char *f) const
{
  const char *ptr = (const char*) memmem (this->data(), this->size(),
                                          f, strlen(f));
  if (ptr)
    return (ptr - this->data());
  else
    return boost::string_ref::npos;
}
#endif
#endif /* defined(HAVE_BOOST_UTILITY_STRING_REF_HPP) */

/* vim: set sw=2 ts=8 cino=>4,n-2,{2,^-2,t0,(0,u0,w1,M1 : */