1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374
|
#ifdef RCSID
static char RCSid[] =
"$Header: d:/cvsroot/tads/html/htmlhash.cpp,v 1.2 1999/05/17 02:52:21 MJRoberts Exp $";
#endif
/*
* Copyright (c) 1997 by Michael J. Roberts. All Rights Reserved.
*
* Please see the accompanying license file, LICENSE.TXT, for information
* on using and copying this software.
*/
/*
Name
htmlhash.cpp - hash table implementation
Function
Notes
Modified
10/25/97 MJRoberts - Creation
*/
#include <assert.h>
#include <memory.h>
#include <string.h>
#ifndef TADSHTML_H
#include "tadshtml.h"
#endif
#ifndef HTMLHASH_H
#include "htmlhash.h"
#endif
/* ------------------------------------------------------------------------ */
/*
* Simple case-insensitive hash function implementation.
*/
unsigned int CHtmlHashFuncCI::compute_hash(const textchar_t *s, size_t l)
{
unsigned int acc;
/*
* add up all the character values in the string, converting all
* characters to upper-case
*/
for (acc = 0 ; l != 0 ; ++s, --l)
acc += is_lower(*s) ? to_upper(*s) : *s;
/* return the accumulated value */
return acc;
}
/* ------------------------------------------------------------------------ */
/*
* Simple case-sensitive hash function implementation
*/
unsigned int CHtmlHashFuncCS::compute_hash(const textchar_t *s, size_t l)
{
unsigned int acc;
/*
* add up all the character values in the string, treating case as
* significant
*/
for (acc = 0 ; l != 0 ; ++s, --l)
acc += *s;
/* return the accumulated value */
return acc;
}
/* ------------------------------------------------------------------------ */
/*
* Hash table symbol entry. This is an abstract class; subclasses must
* provide a symbol-matching method.
*/
CHtmlHashEntry::CHtmlHashEntry(const textchar_t *str, size_t len, int copy)
{
/* not linked into a list yet */
nxt_ = 0;
/* see if we can use the original string or need to make a private copy */
if (copy)
{
textchar_t *buf;
/* allocate space for a copy */
buf = new textchar_t[len];
/* copy it into our buffer */
memcpy(buf, str, len * sizeof(*buf));
/* remember it */
str_ = buf;
}
else
{
/* we can use the original */
str_ = str;
}
/* remember the length */
len_ = len;
/* remember whether or not we own the string */
copy_ = copy;
}
CHtmlHashEntry::~CHtmlHashEntry()
{
/* if we made a private copy of the string, we own it, so delete it */
if (copy_)
delete [] str_;
}
/* ------------------------------------------------------------------------ */
/*
* Concrete subclass of CHtmlHashEntry providing a case-insensitive
* symbol match implementation
*/
int CHtmlHashEntryCI::matches(const textchar_t *str, size_t len)
{
/*
* it's a match if the strings are the same length and all
* characters match, ignoring case
*/
return (len == len_
&& memicmp(str, str_, len * sizeof(*str)) == 0);
}
/* ------------------------------------------------------------------------ */
/*
* Concrete subclass of CHtmlHashEntry providing a case-sensitive symbol
* match implementation
*/
int CHtmlHashEntryCS::matches(const textchar_t *str, size_t len)
{
/*
* it's a match if the strings are the same length and all
* characters match, treating case as significant
*/
return (len == len_
&& memcmp(str, str_, len * sizeof(*str)) == 0);
}
/* ------------------------------------------------------------------------ */
/*
* Hash table
*/
CHtmlHashTable::CHtmlHashTable(int hash_table_size,
CHtmlHashFunc *hash_function)
{
CHtmlHashEntry **entry;
size_t i;
/* make sure it's a power of two */
assert(is_power_of_two(hash_table_size));
/* make sure we got a hash function */
assert(hash_function != 0);
/* save the hash function */
hash_function_ = hash_function;
/* allocate the table */
table_ = new CHtmlHashEntry *[hash_table_size];
table_size_ = hash_table_size;
/* clear the table */
for (entry = table_, i = 0 ; i < table_size_ ; ++i, ++entry)
*entry = 0;
}
CHtmlHashTable::~CHtmlHashTable()
{
/* delete the hash function object */
delete hash_function_;
/* delete each entry in the hash table */
delete_all_entries();
/* delete the hash table */
delete [] table_;
}
/*
* delete all entries in the hash table, but keep the table itself
*/
void CHtmlHashTable::delete_all_entries()
{
CHtmlHashEntry **tableptr;
size_t i;
for (tableptr = table_, i = 0 ; i < table_size_ ; ++i, ++tableptr)
{
CHtmlHashEntry *entry;
CHtmlHashEntry *nxt;
/* delete each entry in the list at this element */
for (entry = *tableptr ; entry ; entry = nxt)
{
/* remember the next entry */
nxt = entry->nxt_;
/* delete this entry */
delete entry;
}
/* there's nothing at this table entry now */
*tableptr = 0;
}
}
/*
* Verify that a value is a power of two. Hash table sizes must be
* powers of two.
*/
int CHtmlHashTable::is_power_of_two(int n)
{
/* divide by two until we have an odd number */
while ((n & 1) == 0) n >>= 1;
/* make sure the result is 1 */
return (n == 1);
}
/*
* Compute the hash value for an entry
*/
unsigned int CHtmlHashTable::compute_hash(CHtmlHashEntry *entry)
{
return compute_hash(entry->getstr(), entry->getlen());
}
/*
* Compute the hash value for a string
*/
unsigned int CHtmlHashTable::compute_hash(const textchar_t *str, size_t len)
{
return (hash_function_->compute_hash(str, len) & (table_size_ - 1));
}
/*
* Add an object to the table
*/
void CHtmlHashTable::add(CHtmlHashEntry *entry)
{
unsigned int hash;
/* compute the hash value for this entry */
hash = compute_hash(entry);
/* link it into the slot for this hash value */
entry->nxt_ = table_[hash];
table_[hash] = entry;
}
/*
* Remove an object
*/
void CHtmlHashTable::remove(CHtmlHashEntry *entry)
{
unsigned int hash;
/* compute the hash value for this entry */
hash = compute_hash(entry);
/*
* if it's the first item in the chain, advance the head over it;
* otherwise, we'll need to find the previous item to unlink it
*/
if (table_[hash] == entry)
{
/* it's the first item - simply advance the head to the next item */
table_[hash] = entry->nxt_;
}
else
{
CHtmlHashEntry *prv;
/* find the previous item in the list for this hash value */
for (prv = table_[hash] ; prv != 0 && prv->nxt_ != entry ;
prv = prv->nxt_) ;
/* if we found it, unlink this item */
if (prv != 0)
prv->nxt_ = entry->nxt_;
}
}
/*
* Find an object in the table matching a given string.
*/
CHtmlHashEntry *CHtmlHashTable::find(const textchar_t *str, size_t len)
{
unsigned int hash;
CHtmlHashEntry *entry;
/* compute the hash value for this entry */
hash = compute_hash(str, len);
/* scan the list at this hash value looking for a match */
for (entry = table_[hash] ; entry ; entry = entry->nxt_)
{
/* if this one matches, return it */
if (entry->matches(str, len))
return entry;
}
/* didn't find anything */
return 0;
}
/*
* Find an object in the table matching a given leading substring.
* We'll return the longest-named entry that matches a leading substring
* of the given string. For example, if there's are entires A, AB, ABC,
* and ABCE, and this routine is called to find something matching
* ABCDEFGH, we'll return ABC as the match (not ABCE, since it doesn't
* match any leading substring of the given string, and not A or AB,
* even though they match, since ABC also matches and it's longer).
*/
CHtmlHashEntry *CHtmlHashTable::find_leading_substr(
const textchar_t *str, size_t len)
{
size_t sublen;
CHtmlHashEntry *entry;
/*
* try to find each leading substring, starting with the longest,
* decreasing by one character on each iteration, until we've used
* the whole string
*/
for (sublen = len ; sublen > 0 ; --sublen)
{
/* if this substring matches, use it */
if ((entry = find(str, sublen)) != 0)
return entry;
}
/* we didn't find it */
return 0;
}
/*
* Enumerate all entries
*/
void CHtmlHashTable::enum_entries(void (*func)(void *, CHtmlHashEntry *),
void *ctx)
{
CHtmlHashEntry **tableptr;
size_t i;
/* go through each hash value */
for (tableptr = table_, i = 0 ; i < table_size_ ; ++i, ++tableptr)
{
/* go through each entry at this hash value */
CHtmlHashEntry *entry, *nxt;
for (entry = *tableptr ; entry ; entry = nxt)
{
/* remember the next entry in case the callback deletes this one */
nxt = entry->nxt_;
/* invoke the callback on this entry */
(*func)(ctx, entry);
}
}
}
|