File: line_reader.py

package info (click to toggle)
python-efilter 1.5-2.1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, bullseye
  • size: 596 kB
  • sloc: python: 4,342; makefile: 51
file content (104 lines) | stat: -rw-r--r-- 2,826 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
# EFILTER Forensic Query Language
#
# Copyright 2015 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Implements IRepeated for text files and some common formats.
"""

__author__ = "Adam Sindelar <adamsh@google.com>"

import six
import threading

from efilter.protocols import counted
from efilter.protocols import repeated


class LazyLineReader(object):
    """Reads in a line at a time and supports restarting."""

    fd = None
    _seek_lock = None

    def __init__(self, fd):
        self.fd = fd
        self._seek_lock = threading.Lock()

    def __iter__(self):
        return self.getvalues()

    def __del__(self):
        """Close 'fd' if it hasn't been closed already.

        If LazyLineReader was instantiated using EFILTER's stdlib.io functions
        then it won't be inside of a with block and we need to close fd when
        the repeated is deallocated.
        """
        if not self.fd.closed:
            self.fd.close()

    # IRepeated implementation.

    def readline_at_offset(self, offset):
        self._seek_lock.acquire()
        self.fd.seek(offset)
        line = self.fd.readline()
        new_offset = self.fd.tell()
        self._seek_lock.release()

        return line, new_offset

    def getvalues(self):
        line, offset = self.readline_at_offset(0)
        while line:
            yield line
            line, offset = self.readline_at_offset(offset)

    def value_type(self):
        return six.string_types[0]

    def value_eq(self, other):
        if isinstance(other, type(self)):
            return self.fd == other.fd

        return list(self) == list(other)

    def value_apply(self, f):
        for value in self:
            yield f(value)

    # Counted implementation.

    def count(self):
        c = 0
        for _ in self:
            c += 1

        return c

counted.ICounted.implicit_static(for_type=LazyLineReader)
repeated.IRepeated.implicit_static(LazyLineReader)


if six.PY2:
    # Python 3 doesn't have a file class. open() just returns a StringIO
    repeated.lines.implement(for_type=file, implementation=LazyLineReader)

if six.PY3:
    import io
    repeated.lines.implement(for_type=io.IOBase, implementation=LazyLineReader)

repeated.lines.implement(for_type=six.StringIO, implementation=LazyLineReader)