File: ControlMatcher.h

package info (click to toggle)
trafficserver 9.2.5%2Bds-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 53,008 kB
  • sloc: cpp: 345,484; ansic: 31,134; python: 24,200; sh: 7,271; makefile: 3,045; perl: 2,261; java: 277; pascal: 119; sql: 94; xml: 2
file content (357 lines) | stat: -rw-r--r-- 10,923 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
/** @file

  A brief file description

  @section license License

  Licensed to the Apache Software Foundation (ASF) under one
  or more contributor license agreements.  See the NOTICE file
  distributed with this work for additional information
  regarding copyright ownership.  The ASF licenses this file
  to you under the Apache License, Version 2.0 (the
  "License"); you may not use this file except in compliance
  with the License.  You may obtain a copy of the License at

      http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License.
 */

/*****************************************************************************
 *
 *  ControlMatcher.h - Interface to general purpose matcher
 *
 *
 *
 *
 *  Description:
 *
 *     The control matcher module provides the ability to lookup arbitrary
 *  information specific to a URL and IP address.  The outside
 *  world only sees the ControlMatcher class which parses the relevant
 *  configuration file and builds the lookup table
 *
 *     Four types of matched are supported: hostname, domain name, ip address
 *  and URL regex.  For these four types, three lookup tables are used.  Regex and
 *  ip lookups have there own tables and host and domain lookups share a single
 *  table
 *
 *  Module Purpose & Specifications
 *  -------------------------------
 *   -  to provide a generic mechanism for matching configuration data
 *       against hostname, domain name, ip address and URL regex
 *   -  the generic mechanism should require minimum effort to apply it
 *       to new features that require per request matching
 *   -  for the mechanism to be efficient such that lookups against
 *       the tables are not a performance problem when they are both done
 *       for every request through the proxy and set of matching
 *       is very large
 *
 *  Lookup Table Descriptions
 *  -------------------------
 *
 *   regex table - implemented as a linear list of regular expressions to
 *       match against
 *
 *   host/domain table - The host domain table is logically implemented as
 *       tree, broken up at each partition in a hostname.  Three mechanism
 *       are used to move from one level to the next: a hash table, a fixed
 *       sized array and a constant time index (class charIndex).  The constant
 *       time index is only used to from the root domain to the first
 *       level partition (ie: .com). The fixed array is used for subsequent
 *       paritions until the fan out exceeds the arrays fixed size at which
 *       time, the fixed array is converted to a hash table
 *
 *   ip table - supports ip ranges.  A single ip address is treated as
 *       a range with the same beginning and end address.  The table is
 *       is divided up into a fixed number of  levels, indexed 8 bit
 *       boundaries, starting at the high bit of the address.  Subsequent
 *       levels are allocated only when needed.
 *
 ****************************************************************************/

//
// IMPORTANT: Instantiating these templates
//
//    The Implementation for these templates appears in
//     ControlMatcher.cc   To get the templates instantiated
//     correctly on all compilers new uses MUST explicitly
//     instantiate the new instance at the bottom of
//     ControlMatcher.cc
//

#pragma once

#include "tscore/IpMap.h"
#include "tscore/Result.h"
#include "tscore/MatcherUtils.h"

#include "tscore/ink_apidefs.h"
#include "tscore/ink_defs.h"
#include "HTTP.h"
#include "tscore/Regex.h"
#include "URL.h"

#include <unordered_map>

#ifdef HAVE_CTYPE_H
#include <cctype>
#endif

#define SignalError(_buf, _already)                         \
  {                                                         \
    if (_already == false)                                  \
      pmgmt->signalManager(MGMT_SIGNAL_CONFIG_ERROR, _buf); \
    _already = true;                                        \
    Error("%s", _buf);                                      \
  }

class HostLookup;
struct HttpApiInfo;
struct matcher_line;
struct matcher_tags;

struct RequestData {
public:
  // First three are the lookup keys to the tables
  //  get_ip() can be either client_ip or server_ip
  //  depending on how the module user wants to key
  //  the table
  virtual ~RequestData() {}
  virtual char *get_string()       = 0;
  virtual const char *get_host()   = 0;
  virtual sockaddr const *get_ip() = 0;

  virtual sockaddr const *get_client_ip() = 0;
};

class HttpRequestData : public RequestData
{
public:
  char *get_string() override;
  const char *get_host() override;
  sockaddr const *get_ip() override;
  sockaddr const *get_client_ip() override;

  HttpRequestData()

  {
    ink_zero(src_ip);
    ink_zero(dest_ip);
  }

  HTTPHdr *hdr          = nullptr;
  char *hostname_str    = nullptr;
  HttpApiInfo *api_info = nullptr;
  time_t xact_start     = 0;
  IpEndpoint src_ip;
  IpEndpoint dest_ip;
  uint16_t incoming_port                = 0;
  char *tag                             = nullptr;
  bool internal_txn                     = false;
  URL **cache_info_lookup_url           = nullptr;
  URL **cache_info_parent_selection_url = nullptr;
};

// Mixin class for shared info across all templates. This just wraps the
// shared members such that we don't have to duplicate all these initialixers
// etc. If someone wants to rewrite all this code to use setters and getters,
// by all means, please do so. The plumbing is in place :).
template <class Data> class BaseMatcher
{
public:
  BaseMatcher(const char *name, const char *filename) : matcher_name(name), file_name(filename) {}

  ~BaseMatcher() { delete[] data_array; }

protected:
  int num_el               = -1;        // number of elements in the table
  const char *matcher_name = "unknown"; // Used for Debug/Warning/Error messages
  const char *file_name    = nullptr;   // Used for Debug/Warning/Error messages
  Data *data_array         = nullptr;   // Array with the Data elements
  int array_len            = -1;        // length of the arrays (all three are the same length)
};

template <class Data, class MatchResult> class UrlMatcher : protected BaseMatcher<Data>
{
  typedef BaseMatcher<Data> super;

public:
  UrlMatcher(const char *name, const char *filename);
  ~UrlMatcher();

  void AllocateSpace(int num_entries);
  Result NewEntry(matcher_line *line_info);

  void Match(RequestData *rdata, MatchResult *result) const;
  void Print() const;

  using super::num_el;
  using super::matcher_name;
  using super::file_name;
  using super::data_array;
  using super::array_len;

private:
  std::unordered_map<std::string, int> url_ht;
  char **url_str = nullptr; // array of url strings
  int *url_value = nullptr; // array of posion of url strings
};

template <class Data, class MatchResult> class RegexMatcher : protected BaseMatcher<Data>
{
  typedef BaseMatcher<Data> super;

public:
  RegexMatcher(const char *name, const char *filename);
  ~RegexMatcher();

  void AllocateSpace(int num_entries);
  Result NewEntry(matcher_line *line_info);

  void Match(RequestData *rdata, MatchResult *result) const;
  void Print() const;

  using super::num_el;
  using super::matcher_name;
  using super::file_name;
  using super::data_array;
  using super::array_len;

protected:
  pcre **re_array = nullptr; // array of compiled regexs
  char **re_str   = nullptr; // array of uncompiled regex strings
};

template <class Data, class MatchResult> class HostRegexMatcher : public RegexMatcher<Data, MatchResult>
{
  typedef BaseMatcher<Data> super;

public:
  HostRegexMatcher(const char *name, const char *filename);
  void Match(RequestData *rdata, MatchResult *result) const;

  using super::num_el;
  using super::matcher_name;
  using super::file_name;
  using super::data_array;
  using super::array_len;
};

template <class Data, class MatchResult> class HostMatcher : protected BaseMatcher<Data>
{
  typedef BaseMatcher<Data> super;

public:
  HostMatcher(const char *name, const char *filename);
  ~HostMatcher();

  void AllocateSpace(int num_entries);
  Result NewEntry(matcher_line *line_info);

  void Match(RequestData *rdata, MatchResult *result) const;
  void Print() const;

  using super::num_el;
  using super::matcher_name;
  using super::file_name;
  using super::data_array;
  using super::array_len;

  HostLookup *
  getHLookup()
  {
    return host_lookup;
  }

private:
  static void PrintFunc(void *opaque_data);
  HostLookup *host_lookup = nullptr; // Data structure to do the lookups
};

template <class Data, class MatchResult> class IpMatcher : protected BaseMatcher<Data>
{
  typedef BaseMatcher<Data> super;

public:
  IpMatcher(const char *name, const char *filename);

  void AllocateSpace(int num_entries);
  Result NewEntry(matcher_line *line_info);

  void Match(sockaddr const *ip_addr, RequestData *rdata, MatchResult *result) const;
  void Print() const;

  using super::num_el;
  using super::matcher_name;
  using super::file_name;
  using super::data_array;
  using super::array_len;

private:
  static void PrintFunc(void *opaque_data);
  IpMap ip_map; // Data structure to do lookups
};

#define ALLOW_HOST_TABLE 1 << 0
#define ALLOW_IP_TABLE 1 << 1
#define ALLOW_REGEX_TABLE 1 << 2
#define ALLOW_HOST_REGEX_TABLE 1 << 3
#define ALLOW_URL_TABLE 1 << 4
#define DONT_BUILD_TABLE 1 << 5 // for testing

template <class Data, class MatchResult> class ControlMatcher
{
public:
  // Parameter name must not be deallocated before this object is
  ControlMatcher(const char *file_var, const char *name, const matcher_tags *tags,
                 int flags_in = (ALLOW_HOST_TABLE | ALLOW_IP_TABLE | ALLOW_REGEX_TABLE | ALLOW_HOST_REGEX_TABLE | ALLOW_URL_TABLE));
  ~ControlMatcher();

  int BuildTable();
  int BuildTableFromString(char *str);

  void Match(RequestData *rdata, MatchResult *result) const;
  void Print() const;

  int
  getEntryCount() const
  {
    return m_numEntries;
  }

  HostMatcher<Data, MatchResult> *
  getHostMatcher()
  {
    return hostMatch;
  }

  RegexMatcher<Data, MatchResult> *
  getReMatcher()
  {
    return reMatch;
  }

  IpMatcher<Data, MatchResult> *
  getIPMatcher()
  {
    return ipMatch;
  }

  // private
  RegexMatcher<Data, MatchResult> *reMatch;
  UrlMatcher<Data, MatchResult> *urlMatch;
  HostMatcher<Data, MatchResult> *hostMatch;
  IpMatcher<Data, MatchResult> *ipMatch;
  HostRegexMatcher<Data, MatchResult> *hrMatch;

  const matcher_tags *config_tags = nullptr;
  char config_file_path[PATH_NAME_MAX];
  int flags                = 0;
  int m_numEntries         = 0;
  const char *matcher_name = "unknown"; // Used for Debug/Warning/Error messages
};