File: regexfilter.py

package info (click to toggle)
python-stetl 1.2%2Bds-1
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 89,988 kB
  • sloc: python: 5,007; xml: 707; sql: 430; makefile: 155; sh: 50
file content (63 lines) | stat: -rw-r--r-- 1,796 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Extracts data from a string using a regular expression and generates a record.
#
# Author: Frank Steggink

from stetl.component import Config
from stetl.filter import Filter
from stetl.packet import FORMAT
from stetl.util import Util
import re

log = Util.get_log("regexfilter")


class RegexFilter(Filter):
    """
    Extracts data from a string using a regular expression and returns the named groups as a record.
    consumes=FORMAT.string, produces=FORMAT.record
    """

    # Start attribute config meta
    # Applying Decorator pattern with the Config class to provide
    # read-only config values from the configured properties.

    @Config(ptype=str, default=None, required=True)
    def pattern_string(self):
        """
        Regex pattern string. Should contain named groups.
        """
        pass

    # End attribute config meta

    # Constructor
    def __init__(self, configdict, section, consumes=FORMAT.string, produces=FORMAT.record):
        Filter.__init__(self, configdict, section, consumes, produces)

        self.regex_object = re.compile(self.pattern_string, re.S)

    def init(self):
        log.info('Init: regex filter')
        if self.pattern_string is None:
            # If no pattern_string is present:
            err_s = 'The pattern_string needs to be configured'
            log.error(err_s)
            raise ValueError('The pattern_string needs to be configured')

    def exit(self):
        log.info('Exit: regex filter')

    def invoke(self, packet):
        if packet.data is None:
            return packet

        m = self.regex_object.match(packet.data)
        if m is not None:
            packet.data = m.groupdict()
        else:
            packet.data = {}

        return packet