File: gmlfeatureextractor.py

package info (click to toggle)
python-stetl 1.0.9%2Bds-1
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 89,428 kB
  • ctags: 720
  • sloc: python: 3,527; xml: 699; sql: 428; makefile: 153; sh: 45
file content (54 lines) | stat: -rw-r--r-- 1,820 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Extracts arrays of etree GML features from an GML etree document.
#
# Author: Just van den Broecke
#
from stetl.util import Util
from stetl.filter import Filter
from stetl.packet import FORMAT

log = Util.get_log('gmlfeatureextractor')


class GmlFeatureExtractor(Filter):
    """
    Extract arrays of GML features etree elements from etree docs.

    consumes=FORMAT.etree_doc, produces=FORMAT.etree_feature_array
    """

    # XPATH Query base for extracting features by (non-namespaced thus local-name) tagname
    xpath_base = "//*[local-name() = '%s']"

    # Constructor
    def __init__(self, configdict, section='gml_feature_extractor'):
        Filter.__init__(self, configdict, section, consumes=FORMAT.etree_doc, produces=FORMAT.etree_feature_array)

        log.info("cfg = %s" % self.cfg.to_string())

        # Build the Xpath expresion from configures tagnames
        self.feature_tags = self.cfg.get('feature_tags').split(',')
        self.total_features = 0
        self.xpath_expression = ''
        index = 0
        for feature_tag in self.feature_tags:
            if index > 0:
                self.xpath_expression += '|'
            self.xpath_expression += GmlFeatureExtractor.xpath_base % feature_tag
            index += 1

        log.info("xpath expression = %s" % self.xpath_expression)

    def invoke(self, packet):
        if packet.data is None or packet.is_end_of_stream():
            return packet

        # Input is etree_docs so extract all features into an array using XPATH
        packet.data = packet.data.xpath(self.xpath_expression)
        self.total_features += len(packet.data)
        log.info('extracted %d features from GML etree doc (total = %d)' % (len(packet.data), self.total_features))
        return packet