1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215
|
//
// HtURLSeedScore.cc
//
// URLSeedScore:
// Holds a list of configured adjustments to be applied on a given
// score and given URL.
//
// Part of the ht://Dig package <http://www.htdig.org/>
// Copyright (c) 2000-2004 The ht://Dig Group
// For copyright details, see the file COPYING in your distribution
// or the GNU Library General Public License (LGPL) version 2 or later
// <http://www.gnu.org/copyleft/lgpl.html>
//
// $Id: HtURLSeedScore.cc,v 1.6 2004/05/28 13:15:24 lha Exp $
#ifdef HAVE_CONFIG_H
#include "htconfig.h"
#endif /* HAVE_CONFIG_H */
#include "StringList.h"
#include "HtRegex.h"
#include "HtURLSeedScore.h"
#include <stdio.h>
#include <ctype.h>
// This class is only used in private members of URLSeedScore.
// The OO-right thing would be to nest this inside the private
// declaration of HtURLSeedScore, but that would cause portability
// problems according to
// <URL:http://www.mozilla.org/hacking/portable-cpp.html#inner_classes>.
class ScoreAdjustItem : public Object
{
public:
// Construct from a string applicable to StringMatch, and a string to
// parse for a formula.
ScoreAdjustItem(String &, String &);
~ScoreAdjustItem();
// Does this item match?
inline bool Match(const String &s) { return match.match(s, 1, 0) != 0; }
// Return the argument adjusted according to this item.
double adjust_score(double orig)
{ return orig*my_mul_factor + my_add_constant; }
// Error in parsing? Message given here if non-empty string.
String& ErrMsg() { return myErrMsg; }
private:
double my_add_constant;
double my_mul_factor;
HtRegex match;
static String myErrMsg;
// These member functions are not supposed to be implemented, but
// mentioned here as private so the compiler will not generate them if
// someone puts in buggy code that would use them.
ScoreAdjustItem();
ScoreAdjustItem(const ScoreAdjustItem &);
void operator= (const ScoreAdjustItem &);
};
// Definition of myErrMsg.
String ScoreAdjustItem::myErrMsg("");
ScoreAdjustItem::ScoreAdjustItem(String &url_regex, String &formula)
{
double mul_factor = 1;
double add_constant = 0;
bool factor_found = false;
bool constant_found = false;
int chars_so_far;
StringList l(url_regex.get(), '|');
match.setEscaped(l);
// FIXME: Missing method to check if the regex was in error.
// myErrMsg = form("%s is not a valid regex", url_regex.get());
char *s = formula.get();
// Parse the ([*]N[ ]*)?[+]?M format.
if (s[0] == '*')
{
// Skip past the '*'.
s++;
// There is a mul_factor. Let's parse it.
chars_so_far = 0;
sscanf(s, "%lf%n", &mul_factor, &chars_so_far);
// If '%lf' failed to match, then it will show up as either no
// assignment to chars_so_far, or as writing 0 there.
if (chars_so_far == 0)
{
myErrMsg = form("%s is not a valid adjustment formula", s);
return;
}
// Skip past the number.
s += chars_so_far;
// Skip any whitespaces.
while (isspace(*s))
s++;
// Eat any plus-sign; it's redundant if alone, and may come before a
// minus.
if (*s == '+')
s++;
factor_found = true;
}
// If there's anything here, it must be the additive constant.
if (*s)
{
chars_so_far = 0;
sscanf(s, "%lf%n", &add_constant, &chars_so_far);
// If '%lf' failed to match, then it will show up as either no
// assignment to chars_so_far, or as writing 0 there.
// We also need to check that it was the end of the input.
if (chars_so_far == 0 || s[chars_so_far] != 0)
{
myErrMsg = form("%s is not a valid adjustment formula",
formula.get());
return;
}
constant_found = true;
}
// Either part must be there.
if (!factor_found && !constant_found)
{
myErrMsg = form("%s is not a valid formula", formula.get());
return;
}
my_add_constant = add_constant;
my_mul_factor = mul_factor;
}
ScoreAdjustItem::~ScoreAdjustItem()
{
}
URLSeedScore::URLSeedScore(Configuration &config)
{
char *config_item = "url_seed_score";
StringList sl(config[config_item], "\t \r\n");
myAdjustmentList = new List();
if (sl.Count() % 2)
{
myErrMsg = form("%s is not a list of pairs (odd number of items)",
config_item);
// We *could* continue, but that just means the error will be harder
// to find, unless someone actually sees the error message.
return;
}
// Parse each as in TemplateList::createFromString.
for (int i = 0; i < sl.Count(); i += 2)
{
String url_regex = sl[i];
String adjust_formula = sl[i+1];
ScoreAdjustItem *adjust_item
= new ScoreAdjustItem(url_regex, adjust_formula);
if (adjust_item->ErrMsg().length() != 0)
{
// No point in continuing beyond the error; we might just
// overwrite the first error.
myErrMsg = form("While parsing %s: %s",
config_item,
adjust_item->ErrMsg().get());
return;
}
myAdjustmentList->Add(adjust_item);
}
}
URLSeedScore::~URLSeedScore()
{
delete myAdjustmentList;
}
double
URLSeedScore::noninline_adjust_score(double orig_score, const String &url)
{
List *adjlist = myAdjustmentList;
ScoreAdjustItem *adjust_item;
adjlist->Start_Get();
while ((adjust_item = (ScoreAdjustItem *) adjlist->Get_Next()))
{
// Use the first match only.
if (adjust_item->Match(url))
return adjust_item->adjust_score(orig_score);
}
// We'll get here if no match was found.
return orig_score;
}
|