File: test_s3.py

package info (click to toggle)
python-papermill 2.6.0-3.1
  • links: PTS, VCS
  • area: main
  • in suites: forky, trixie
  • size: 2,216 kB
  • sloc: python: 4,977; makefile: 17; sh: 5
file content (218 lines) | stat: -rw-r--r-- 5,649 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
# The following tests are purposely limited to the exposed interface by iorw.py

import os.path

import boto3
import moto
import pytest
from moto import mock_aws

from papermill.s3 import S3, Bucket, Key, Prefix


@pytest.fixture
def bucket_no_service():
    """Returns a bucket instance with no services"""
    return Bucket('my_test_bucket')


@pytest.fixture
def bucket_with_service():
    """Returns a bucket instance with a service"""
    return Bucket('my_sqs_bucket', ['sqs'])


@pytest.fixture
def bucket_sqs():
    """Returns a bucket instance with a sqs service"""
    return Bucket('my_sqs_bucket', ['sqs'])


@pytest.fixture
def bucket_ec2():
    """Returns a bucket instance with a ec2 service"""
    return Bucket('my_sqs_bucket', ['ec2'])


@pytest.fixture
def bucket_multiservice():
    """Returns a bucket instance with a ec2 service"""
    return Bucket('my_sqs_bucket', ['ec2', 'sqs'])


def test_bucket_init():
    assert Bucket('my_test_bucket')
    assert Bucket('my_sqs_bucket', 'sqs')


def test_bucket_defaults():
    name = 'a bucket'

    b1 = Bucket(name)
    b2 = Bucket(name, None)

    assert b1.name == b2.name
    assert b1.service == b2.service


def test_bucket_missing_params():
    with pytest.raises(TypeError):
        Bucket(service=None)

    with pytest.raises(TypeError):
        Bucket()


def test_bucket_list(bucket_sqs):
    # prefix_test = ''
    # assert bucket_sqs.list(prefix_test)
    #
    # prefix_test = 'abc'
    # assert bucket_sqs.list(prefix_test) is None
    #
    # prefix_test = 'ec2'
    # assert bucket_sqs.list(prefix_test) is None
    #
    # prefix_test = 'sqs'
    # assert bucket_sqs.list(prefix_test)
    pass


def test_prefix_init():
    with pytest.raises(TypeError):
        Prefix()

    with pytest.raises(TypeError):
        Prefix(service=None)

    with pytest.raises(TypeError):
        Prefix('my_test_prefix')

    b1 = Bucket('my_test_bucket')
    p1 = Prefix(b1, 'sqs_test', service='sqs')
    assert Prefix(b1, 'test_bucket')
    assert Prefix(b1, 'test_bucket', service=None)
    assert Prefix(b1, 'test_bucket', None)
    assert p1.bucket.service == p1.service


def test_prefix_defaults():
    bucket = Bucket('my data pool')
    name = 'bigdata bucket'

    p1 = Prefix(bucket, name)
    p2 = Prefix(bucket, name, None)
    assert p1.name == p2.name
    assert p1.service == p2.service


def test_prefix_str(bucket_sqs):
    p1 = Prefix(bucket_sqs, 'sqs_prefix_test', 'sqs')
    assert str(p1) == f"s3://{str(bucket_sqs)}/sqs_prefix_test"


def test_prefix_repr(bucket_sqs):
    p1 = Prefix(bucket_sqs, 'sqs_prefix_test', 'sqs')
    assert repr(p1) == f"s3://{str(bucket_sqs)}/sqs_prefix_test"


def test_key_init():
    pass


def test_key_repr():
    k = Key("foo", "bar")
    assert repr(k) == "s3://foo/bar"


def test_key_defaults():
    bucket = Bucket('my data pool')
    name = 'bigdata bucket'

    k1 = Key(bucket, name)
    k2 = Key(bucket, name, None, None, None, None, None)
    assert k1.size == k2.size
    assert k1.etag == k2.etag
    assert k1.storage_class == k2.storage_class
    assert k1.service == k2.service
    assert k1.is_prefix is False


@mock_aws
def test_s3_defaults():
    s1 = S3()
    s2 = S3()
    assert s1.session == s2.session
    assert s1.client == s2.client
    assert s1.s3 == s2.s3


local_dir = os.path.dirname(os.path.abspath(__file__))
test_bucket_name = 'test-pm-bucket'
test_string = 'Hello'
test_file_path = 'notebooks/s3/s3_in/s3-simple_notebook.ipynb'
test_empty_file_path = 'notebooks/s3/s3_in/s3-empty.ipynb'

with open(os.path.join(local_dir, test_file_path)) as f:
    test_nb_content = f.read()

no_empty_lines = lambda s: "\n".join([ln for ln in s.split('\n') if ln])
test_clean_nb_content = no_empty_lines(test_nb_content)

read_from_gen = lambda g: "\n".join(g)


@pytest.fixture(scope="function")
def s3_client():
    mock_aws = moto.mock_aws()
    mock_aws.start()

    client = boto3.client('s3')
    client.create_bucket(Bucket=test_bucket_name, CreateBucketConfiguration={'LocationConstraint': 'us-west-2'})
    client.put_object(Bucket=test_bucket_name, Key=test_file_path, Body=test_nb_content)
    client.put_object(Bucket=test_bucket_name, Key=test_empty_file_path, Body='')
    yield S3()
    try:
        client.delete_object(Bucket=test_bucket_name, Key=test_file_path)
        client.delete_object(Bucket=test_bucket_name, Key=f"{test_file_path}.txt")
        client.delete_object(Bucket=test_bucket_name, Key=test_empty_file_path)
    except Exception:
        pass
    mock_aws.stop()


def test_s3_read(s3_client):
    s3_path = f"s3://{test_bucket_name}/{test_file_path}"
    data = read_from_gen(s3_client.read(s3_path))
    assert data == test_clean_nb_content


def test_s3_read_empty(s3_client):
    s3_path = f"s3://{test_bucket_name}/{test_empty_file_path}"
    data = read_from_gen(s3_client.read(s3_path))
    assert data == ''


def test_s3_write(s3_client):
    s3_path = f"s3://{test_bucket_name}/{test_file_path}.txt"
    s3_client.cp_string(test_string, s3_path)

    data = read_from_gen(s3_client.read(s3_path))
    assert data == test_string


def test_s3_overwrite(s3_client):
    s3_path = f"s3://{test_bucket_name}/{test_file_path}"
    s3_client.cp_string(test_string, s3_path)

    data = read_from_gen(s3_client.read(s3_path))
    assert data == test_string


def test_s3_listdir(s3_client):
    dir_name = os.path.dirname(test_file_path)
    s3_dir = f"s3://{test_bucket_name}/{dir_name}"
    s3_path = f"s3://{test_bucket_name}/{test_file_path}"
    dir_listings = s3_client.listdir(s3_dir)
    assert len(dir_listings) == 2
    assert s3_path in dir_listings