File: patternize.h

package info (click to toggle)
syslog-ng 3.3.5-4
  • links: PTS
  • area: main
  • in suites: wheezy
  • size: 14,120 kB
  • sloc: ansic: 60,880; sh: 12,423; yacc: 7,308; xml: 1,554; makefile: 1,242; python: 801; lex: 262; perl: 216; awk: 184
file content (78 lines) | stat: -rw-r--r-- 2,434 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
/*
 * Copyright (c) 2002-2009 Peter Gyongyosi, Budapest, Hungary
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 as published
 * by the Free Software Foundation.
 *
 * Note that this permission is granted for only version 2 of the GPL.
 *
 * As an additional exemption you are allowed to compile & link against the
 * OpenSSL libraries as published by the OpenSSL project. See the file
 * COPYING for details.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */

#ifndef PATTERNIZE_H_INCLUDED
#define PATTERNIZE_H_INCLUDED

#define PTZ_ALGO_SLCT 1
#define PTZ_ALGO_LOGHOUND 2

#define PTZ_ITERATE_NONE 0
#define PTZ_ITERATE_OUTLIERS 1
#define PTZ_ITERATE_HIEARARCH 2

#define PTZ_SEPARATOR_CHAR 0x1E
#define PTZ_PARSER_MARKER_CHAR 0x1A

#define PTZ_NUM_OF_PARSERS 1
#define PTZ_PARSER_ESTRING 0

#include "syslog-ng.h"

typedef struct _Patternizer
{
  guint algo;
  guint iterate;
  guint support;
  guint num_of_samples;
  gdouble support_treshold;
  gchar *delimiters;

  // NOTE: for now, we store all logs read in in the memory.
  // This brings in some obvious constraints and should be solved
  // in a more optimized way later.
  GPtrArray *logs;

} Patternizer;

typedef struct _Cluster
{
  GPtrArray *loglines;
  char **words;
  GPtrArray *samples;
} Cluster;

/* only declared for the test program */
GHashTable *ptz_find_frequent_words(GPtrArray *logs, guint support, gchar *delimiters, gboolean two_pass);
GHashTable *ptz_find_clusters_slct(GPtrArray *logs, guint support, gchar *delimiters, guint num_of_samples);


GHashTable *ptz_find_clusters(Patternizer *self);
void ptz_print_patterndb(GHashTable *clusters, gchar *delimiters, gboolean named_parsers);

gboolean ptz_load_file(Patternizer *self, gchar *input_file, gboolean no_parse, GError **error);

Patternizer *ptz_new(gdouble support_treshold, guint algo, guint iterate, guint num_of_samples, gchar *delimiters);
void ptz_free(Patternizer *self);

#endif