File: test_grep.py

package info (click to toggle)
csvkit 2.2.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 40,664 kB
  • sloc: python: 4,924; perl: 1,000; makefile: 131; sql: 4
file content (143 lines) | stat: -rw-r--r-- 4,966 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import re
import unittest

from csvkit.exceptions import ColumnIdentifierError
from csvkit.grep import FilteringCSVReader


class TestGrep(unittest.TestCase):

    def setUp(self):
        self.tab1 = [
            ['id', 'name', 'i_work_here'],
            ['1', 'Chicago Reader', 'first'],
            ['2', 'Chicago Sun-Times', 'only'],
            ['3', 'Chicago Tribune', 'only'],
            ['1', 'Chicago Reader', 'second']]

        self.tab2 = [
            ['id', 'age', 'i_work_here'],
            ['1', 'first', '0'],
            ['4', 'only', '0'],
            ['1', 'second', '0'],
            ['2', 'only', '0', '0']]  # Note extra value in this column

    def test_pattern(self):
        fcr = FilteringCSVReader(iter(self.tab1), patterns=['1'])
        self.assertEqual(self.tab1[0], next(fcr))
        self.assertEqual(self.tab1[1], next(fcr))
        self.assertEqual(self.tab1[4], next(fcr))
        try:
            next(fcr)
            self.fail("Should be no more rows left.")
        except StopIteration:
            pass

    def test_no_header(self):
        fcr = FilteringCSVReader(iter(self.tab1), patterns={2: 'only'}, header=False)
        self.assertEqual(self.tab1[2], next(fcr))
        self.assertEqual(self.tab1[3], next(fcr))
        try:
            next(fcr)
            self.fail("Should be no more rows left.")
        except StopIteration:
            pass

    def test_regex(self):
        pattern = re.compile(".*(Reader|Tribune).*")
        fcr = FilteringCSVReader(iter(self.tab1), patterns={1: pattern})

        self.assertEqual(self.tab1[0], next(fcr))
        self.assertEqual(self.tab1[1], next(fcr))
        self.assertEqual(self.tab1[3], next(fcr))
        self.assertEqual(self.tab1[4], next(fcr))
        try:
            next(fcr)
            self.fail("Should be no more rows left.")
        except StopIteration:
            pass

    def test_inverse(self):
        fcr = FilteringCSVReader(iter(self.tab2), patterns=['1'], inverse=True)
        self.assertEqual(self.tab2[0], next(fcr))
        self.assertEqual(self.tab2[2], next(fcr))
        self.assertEqual(self.tab2[4], next(fcr))
        try:
            next(fcr)
            self.fail("Should be no more rows left.")
        except StopIteration:
            pass

    def test_column_names_in_patterns(self):
        fcr = FilteringCSVReader(iter(self.tab2), patterns={'age': 'only'})
        self.assertEqual(self.tab2[0], next(fcr))
        self.assertEqual(self.tab2[2], next(fcr))
        self.assertEqual(self.tab2[4], next(fcr))
        try:
            next(fcr)
            self.fail("Should be no more rows left.")
        except StopIteration:
            pass

    def test_mixed_indices_and_column_names_in_patterns(self):
        fcr = FilteringCSVReader(iter(self.tab2), patterns={'age': 'only', 0: '2'})
        self.assertEqual(self.tab2[0], next(fcr))
        self.assertEqual(self.tab2[4], next(fcr))
        try:
            next(fcr)
            self.fail("Should be no more rows left.")
        except StopIteration:
            pass

    def test_duplicate_column_ids_in_patterns(self):
        try:
            FilteringCSVReader(iter(self.tab2), patterns={'age': 'only', 1: 'second'})
            self.fail("Should be an exception.")
        except ColumnIdentifierError:
            pass

    def test_index_out_of_range(self):
        fcr = FilteringCSVReader(iter(self.tab2), patterns={3: '0'})
        self.assertEqual(self.tab2[0], next(fcr))
        self.assertEqual(self.tab2[4], next(fcr))
        try:
            next(fcr)
            self.fail("Should be no more rows left.")
        except StopIteration:
            pass

    def test_any_match(self):
        fcr = FilteringCSVReader(iter(self.tab2), patterns={'age': 'only', 0: '2'}, any_match=True)
        self.assertEqual(self.tab2[0], next(fcr))
        self.assertEqual(self.tab2[2], next(fcr))
        self.assertEqual(self.tab2[4], next(fcr))
        try:
            next(fcr)
            self.fail("Should be no more rows left.")
        except StopIteration:
            pass

    def test_any_match_and_inverse(self):
        fcr = FilteringCSVReader(iter(self.tab2), patterns={'age': 'only', 0: '2'}, any_match=True, inverse=True)
        self.assertEqual(self.tab2[0], next(fcr))
        self.assertEqual(self.tab2[1], next(fcr))
        self.assertEqual(self.tab2[3], next(fcr))
        try:
            next(fcr)
            self.fail("Should be no more rows left.")
        except StopIteration:
            pass

    def test_multiline(self):
        table = [
            ['a', 'b'],
            ['1', 'foo\nbar'],
        ]
        fcr = FilteringCSVReader(iter(table), patterns={'b': re.compile('bar')})
        self.assertEqual(table[0], next(fcr))
        self.assertEqual(table[1], next(fcr))
        try:
            next(fcr)
            self.fail("Should be no more rows left.")
        except StopIteration:
            pass