File: stringtable.cxx

package info (click to toggle)
systemtap 5.3-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 47,556 kB
  • sloc: cpp: 81,117; ansic: 54,933; xml: 49,795; exp: 43,595; sh: 11,526; python: 5,003; perl: 2,252; tcl: 1,312; makefile: 1,006; javascript: 149; lisp: 105; awk: 101; asm: 91; java: 70; sed: 16
file content (195 lines) | stat: -rw-r--r-- 5,153 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
// interned string table
// Copyright (C) 2015 Red Hat Inc.
//
// This file is part of systemtap, and is free software.  You can
// redistribute it and/or modify it under the terms of the GNU General
// Public License (GPL); either version 2, or (at your option) any
// later version.

#include "config.h"
#include "stringtable.h"

#include "stap-probe.h"

#include <string>
#include <cstring>
#include <fstream>
#include <unordered_set>
#include <mutex>


using namespace std;

// PR32788 - parallelization - conflicts with string interning and its
// frequently-used lookup table.  So we disable it entirely, and may
// well remove the code later.

#if 0 && defined(HAVE_BOOST_UTILITY_STRING_REF_HPP)
using namespace boost;

#if INTERNED_STRING_INSTRUMENT
static bool whitespace_p (char c)
{
  return isspace(c);
}
#endif


#if INTERNED_STRING_CUSTOM_HASH
// A custom hash 
struct stringtable_hash
{
  size_t operator()(const string& c) const {
    const char* b = c.data();
    size_t real_length = c.size();
    const size_t blocksize = 32; // a cache line or two

    // hash the length
    size_t hash = real_length;

    // hash the beginning
    size_t length = real_length;
    if (length > blocksize)
      length = blocksize;
    while (length-- > 0)
      hash = (hash * 131) + *b++;

    // hash the middle
    if (real_length > blocksize * 3)
      {
        length = blocksize; // more likely not to span a cache line
        b = (const char*)c.data() + (real_length/2);
        while (length-- > 0)
          hash = (hash * 131) + *b++;
      }

    // the ends, especially of generated bits, are likely to be } } }
    // \n kinds of similar things

#if INTERNED_STRING_INSTRUMENT
    ofstream f ("/tmp/hash.log", ios::app);
    string s = c.substr(0,32);
    s.erase (remove_if(s.begin(), s.end(), whitespace_p), s.end());
    f << hash << " " << c.length() << " " << s << endl;
    f.close();
#endif
      
    return hash;
  }
};
typedef unordered_set<std::string, stringtable_hash> stringtable_t;
#else
typedef unordered_set<std::string> stringtable_t;
#endif


#if 0 /* PR32788 experiment */
static mutex st_lock; // protects the following two globals:
#endif

static char chartable[256];
static stringtable_t stringtable;


// XXX: set a larger initial size?  For reference, a
//
//    probe kernel.function("*") {}
//
// can intern some 450,000 entries.


// Generate a long-lived string_ref for the given input string.  In
// the absence of proper refcounting, memory is kept for the whole
// duration of the systemtap run.  Try to reuse the same string
// object for multiple invocations.  Old string_refs remain valid 
// because std::set<> guarantees iterator validity across inserts,
// which means that our value strings stay put.

// static
interned_string interned_string::intern(const string& value)
{
  if (value.empty())
    return interned_string ();

  if (value.size() == 1)
    return intern(value[0]);

#if 0 /* PR32788 experiment */
  lock_guard<mutex> stringtable_lock (st_lock);
#endif
  
  pair<stringtable_t::iterator,bool> result = stringtable.insert(value);
  PROBE2(stap, intern_string, value.c_str(), result.second);
  stringtable_t::iterator it = result.first; // persistent iterator!
  return string_ref (it->data(), it->length()); // hope for RVO/elision

  // XXX: for future consideration, consider searching the stringtable
  // for instances where 'value' is a substring.  We could string_ref
  // to substrings just fine.  The trouble is that searching the
  // stringtable naively is very timetaking; it saves memory but costs
  // mucho CPU.
}

// static
interned_string interned_string::intern(const char* value)
{
  if (!value || !value[0])
    return interned_string ();

  if (!value[1])
    return intern(value[0]);

  return intern(string(value));
}

// static
interned_string interned_string::intern(char value)
{
#if 0 /* PR32788 experiment */  
  lock_guard<mutex> chartable_lock (st_lock);
#endif
  
  if (!value)
    return interned_string ();

  size_t i = (unsigned char) value;
  if (!chartable[i]) // lazy init
    chartable[i] = value;

  return string_ref (&chartable[i], 1);
}

#if INTERNED_STRING_FIND_MEMMEM
size_t interned_string::find (const boost::string_ref& f) const
{
  const char *ptr = (const char*) memmem (this->data(), this->size(),
                                          f.data(), f.size());
  if (ptr)
    return (ptr - this->data());
  else
    return boost::string_ref::npos;
}

size_t interned_string::find (const std::string& f) const
{
  const char *ptr = (const char*) memmem (this->data(), this->size(),
                                          f.data(), f.size());
  if (ptr)
    return (ptr - this->data());
  else
    return boost::string_ref::npos;
}

size_t interned_string::find (const char *f) const
{
  const char *ptr = (const char*) memmem (this->data(), this->size(),
                                          f, strlen(f));
  if (ptr)
    return (ptr - this->data());
  else
    return boost::string_ref::npos;
}
#endif
#endif /* defined(HAVE_BOOST_UTILITY_STRING_REF_HPP) */

/* vim: set sw=2 ts=8 cino=>4,n-2,{2,^-2,t0,(0,u0,w1,M1 : */