1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176
|
// interned string table
// Copyright (C) 2015 Red Hat Inc.
//
// This file is part of systemtap, and is free software. You can
// redistribute it and/or modify it under the terms of the GNU General
// Public License (GPL); either version 2, or (at your option) any
// later version.
#include "config.h"
#include "stringtable.h"
#include "stap-probe.h"
#include <string>
#include <cstring>
#include <fstream>
#include <unordered_set>
using namespace std;
#if defined(HAVE_BOOST_UTILITY_STRING_REF_HPP)
using namespace boost;
#if INTERNED_STRING_INSTRUMENT
static bool whitespace_p (char c)
{
return isspace(c);
}
#endif
#if INTERNED_STRING_CUSTOM_HASH
// A custom hash
struct stringtable_hash
{
size_t operator()(const string& c) const {
const char* b = c.data();
size_t real_length = c.size();
const size_t blocksize = 32; // a cache line or two
// hash the length
size_t hash = real_length;
// hash the beginning
size_t length = real_length;
if (length > blocksize)
length = blocksize;
while (length-- > 0)
hash = (hash * 131) + *b++;
// hash the middle
if (real_length > blocksize * 3)
{
length = blocksize; // more likely not to span a cache line
b = (const char*)c.data() + (real_length/2);
while (length-- > 0)
hash = (hash * 131) + *b++;
}
// the ends, especially of generated bits, are likely to be } } }
// \n kinds of similar things
#if INTERNED_STRING_INSTRUMENT
ofstream f ("/tmp/hash.log", ios::app);
string s = c.substr(0,32);
s.erase (remove_if(s.begin(), s.end(), whitespace_p), s.end());
f << hash << " " << c.length() << " " << s << endl;
f.close();
#endif
return hash;
}
};
typedef unordered_set<std::string, stringtable_hash> stringtable_t;
#else
typedef unordered_set<std::string> stringtable_t;
#endif
static char chartable[256];
static stringtable_t stringtable;
// XXX: set a larger initial size? For reference, a
//
// probe kernel.function("*") {}
//
// can intern some 450,000 entries.
// Generate a long-lived string_ref for the given input string. In
// the absence of proper refcounting, memory is kept for the whole
// duration of the systemtap run. Try to reuse the same string
// object for multiple invocations. Old string_refs remain valid
// because std::set<> guarantees iterator validity across inserts,
// which means that our value strings stay put.
// static
interned_string interned_string::intern(const string& value)
{
if (value.empty())
return interned_string ();
if (value.size() == 1)
return intern(value[0]);
pair<stringtable_t::iterator,bool> result = stringtable.insert(value);
PROBE2(stap, intern_string, value.c_str(), result.second);
stringtable_t::iterator it = result.first; // persistent iterator!
return string_ref (it->data(), it->length()); // hope for RVO/elision
// XXX: for future consideration, consider searching the stringtable
// for instances where 'value' is a substring. We could string_ref
// to substrings just fine. The trouble is that searching the
// stringtable naively is very timetaking; it saves memory but costs
// mucho CPU.
}
// static
interned_string interned_string::intern(const char* value)
{
if (!value || !value[0])
return interned_string ();
if (!value[1])
return intern(value[0]);
return intern(string(value));
}
// static
interned_string interned_string::intern(char value)
{
if (!value)
return interned_string ();
size_t i = (unsigned char) value;
if (!chartable[i]) // lazy init
chartable[i] = value;
return string_ref (&chartable[i], 1);
}
#if INTERNED_STRING_FIND_MEMMEM
size_t interned_string::find (const boost::string_ref& f) const
{
const char *ptr = (const char*) memmem (this->data(), this->size(),
f.data(), f.size());
if (ptr)
return (ptr - this->data());
else
return boost::string_ref::npos;
}
size_t interned_string::find (const std::string& f) const
{
const char *ptr = (const char*) memmem (this->data(), this->size(),
f.data(), f.size());
if (ptr)
return (ptr - this->data());
else
return boost::string_ref::npos;
}
size_t interned_string::find (const char *f) const
{
const char *ptr = (const char*) memmem (this->data(), this->size(),
f, strlen(f));
if (ptr)
return (ptr - this->data());
else
return boost::string_ref::npos;
}
#endif
#endif /* defined(HAVE_BOOST_UTILITY_STRING_REF_HPP) */
/* vim: set sw=2 ts=8 cino=>4,n-2,{2,^-2,t0,(0,u0,w1,M1 : */
|