1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233
|
import os
import sys
from pathlib import Path
import re
from subprocess import run, PIPE
# Note: typing.Pattern is deprecated, for removal in 3.13 in favour of re.Pattern introduced in 3.8
from typing import List, Union, Pattern
import pytest
NOTEBOOKS_FOLDER = Path('tests/e2e_notebooks')
TEST_CASES = [
('test_drop_empty_cells.ipynb', 'test_drop_empty_cells_dontdrop.ipynb.expected', []),
('test_drop_empty_cells.ipynb', 'test_drop_empty_cells.ipynb.expected', ['--drop-empty-cells']),
('test_drop_tagged_cells.ipynb', 'test_drop_tagged_cells_dontdrop.ipynb.expected', []),
('test_drop_tagged_cells.ipynb', 'test_drop_tagged_cells.ipynb.expected', ['--drop-tagged-cells=test']),
('test_execution_timing.ipynb', 'test_execution_timing.ipynb.expected', []),
('test_max_size.ipynb', 'test_max_size.ipynb.expected', ['--max-size', '50', '--keep-id']),
('test_max_size.ipynb', 'test_max_size.ipynb.expected_sequential_id', ['--max-size', '50']),
('test_empty_metadata.ipynb', 'test_empty_metadata.ipynb.expected', []),
('test_metadata.ipynb', 'test_metadata.ipynb.expected', []),
(
'test_metadata.ipynb',
'test_metadata_extra_keys.ipynb.expected',
['--extra-keys', 'metadata.kernelspec metadata.language_info'],
),
('test_metadata.ipynb', 'test_metadata_keep_count.ipynb.expected', ['--keep-count']),
('test_metadata.ipynb', 'test_metadata_keep_output.ipynb.expected', ['--keep-output']),
('test_metadata.ipynb', 'test_metadata_keep_output_keep_count.ipynb.expected', ['--keep-output', '--keep-count']),
('test_metadata_notebook.ipynb', 'test_metadata_notebook.ipynb.expected', []),
(
'test_keep_metadata_keys.ipynb',
'test_keep_metadata_keys.ipynb.expected',
['--keep-metadata-keys', 'cell.metadata.scrolled cell.metadata.collapsed metadata.a'],
),
(
'test_metadata_period.ipynb',
'test_metadata_period.ipynb.expected',
[
'--extra-keys',
'cell.metadata.application/vnd.databricks.v1+cell metadata.application/vnd.databricks.v1+notebook',
],
),
('test_strip_init_cells.ipynb', 'test_strip_init_cells.ipynb.expected', ['--strip-init-cells']),
('test_nbformat2.ipynb', 'test_nbformat2.ipynb.expected', []),
('test_nbformat45.ipynb', 'test_nbformat45.ipynb.expected', ['--keep-id']),
('test_nbformat45.ipynb', 'test_nbformat45.ipynb.expected_sequential_id', []),
('test_missing_nbformat.ipynb', 'test_missing_nbformat.ipynb.expected', []),
('test_unicode.ipynb', 'test_unicode.ipynb.expected', []),
('test_widgets.ipynb', 'test_widgets.ipynb.expected', []),
('test_zeppelin.zpln', 'test_zeppelin.zpln.expected', ['--mode', 'zeppelin']),
]
DRY_RUN_CASES = [
('test_metadata.ipynb', [], True),
('test_zeppelin.zpln', ['--mode', 'zeppelin'], True),
('test_nochange.ipynb', [], False),
]
ERR_OUTPUT_CASES = [
(
'test_metadata.ipynb',
['Ignoring invalid extra key `invalid`', 'Ignoring invalid extra key `foo.invalid`'],
['--extra-keys', 'invalid foo.invalid'],
),
(
'test_metadata_exception.ipynb',
[
re.compile(
'.*MetadataError: cell metadata contradicts tags: `keep_output` is false, but `keep_output` in tags'
)
],
[],
),
('test_invalid_json.ipynb', ['No valid notebook detected on stdin'], []),
]
def nbstripout_exe():
return os.environ.get('NBSTRIPOUT_EXE', 'nbstripout')
@pytest.mark.parametrize('input_file, expected_file, args', TEST_CASES)
@pytest.mark.parametrize('verify', (True, False))
def test_end_to_end_stdin(input_file: str, expected_file: str, args: List[str], verify: bool):
with open(NOTEBOOKS_FOLDER / expected_file, mode='r') as f:
expected = f.read()
with open(NOTEBOOKS_FOLDER / input_file, mode='r') as f:
input_ = f.read()
with open(NOTEBOOKS_FOLDER / input_file, mode='r') as f:
args = [nbstripout_exe()] + args
if verify:
args.append('--verify')
pc = run(args, stdin=f, stdout=PIPE, universal_newlines=True)
output = pc.stdout
if verify:
# When using stin, the dry run flag is disregarded.
assert pc.returncode == (1 if input_ != expected else 0)
else:
assert output == expected
assert pc.returncode == 0
@pytest.mark.parametrize('input_file, expected_file, args', TEST_CASES)
@pytest.mark.parametrize('verify', (True, False))
def test_end_to_end_file(input_file: str, expected_file: str, args: List[str], tmp_path, verify: bool):
with open(NOTEBOOKS_FOLDER / expected_file, mode='r') as f:
expected = f.read()
p = tmp_path / input_file
with open(NOTEBOOKS_FOLDER / input_file, mode='r') as f:
p.write_text(f.read())
with open(NOTEBOOKS_FOLDER / input_file, mode='r') as f:
input_ = f.read()
args = [nbstripout_exe(), p] + args
if verify:
args.append('--verify')
pc = run(args, stdout=PIPE, universal_newlines=True)
output = pc.stdout.strip()
if verify:
if expected != input_:
assert 'Dry run: would have stripped' in output
assert pc.returncode == 1
# Since verify implies --dry-run, we make sure the file is not modified
with open(NOTEBOOKS_FOLDER / input_file, mode='r') as f:
output_ = f.read()
assert output_ == input_
else:
assert pc.returncode == 0
assert not pc.stdout and p.read_text() == expected
@pytest.mark.parametrize('input_file, extra_args, any_change', DRY_RUN_CASES)
@pytest.mark.parametrize('verify', (True, False))
def test_dry_run_stdin(input_file: str, extra_args: List[str], any_change: bool, verify: bool):
expected = 'Dry run: would have stripped input from stdin\n'
with open(NOTEBOOKS_FOLDER / input_file, mode='r') as f:
args = [nbstripout_exe(), '--dry-run'] + extra_args
if verify:
args.append('--verify')
pc = run(args, stdin=f, stdout=PIPE, universal_newlines=True)
output = pc.stdout
assert output == (expected if any_change else '')
assert pc.returncode == (1 if verify and any_change else 0)
@pytest.mark.parametrize('input_file, extra_args, any_change', DRY_RUN_CASES)
@pytest.mark.parametrize('verify', (True, False))
def test_dry_run_args(input_file: str, extra_args: List[str], any_change: bool, verify: bool):
expected_regex = re.compile(f'Dry run: would have stripped .*[/\\\\]{input_file}\n')
args = [
nbstripout_exe(),
str(NOTEBOOKS_FOLDER / input_file),
'--dry-run',
] + extra_args
if verify:
args.append('--verify')
pc = run(args, stdout=PIPE, universal_newlines=True)
output = pc.stdout
assert expected_regex.match(output) if any_change else output == ''
assert pc.returncode == (1 if verify and any_change else 0)
@pytest.mark.parametrize('input_file, expected_errs, extra_args', ERR_OUTPUT_CASES)
def test_make_errors(input_file: str, expected_errs: List[Union[str, Pattern]], extra_args: List[str]):
with open(NOTEBOOKS_FOLDER / input_file, mode='r') as f:
pc = run([nbstripout_exe(), '--dry-run'] + extra_args, stdin=f, stderr=PIPE, universal_newlines=True)
err_output = pc.stderr
for e in expected_errs:
if isinstance(e, Pattern):
assert e.search(err_output)
else:
assert e in err_output
def test_nochange_notebook_unchanged():
ipynb_file = NOTEBOOKS_FOLDER / 'test_nochange.ipynb'
ipynb_mtime_before = ipynb_file.stat().st_mtime_ns
run([nbstripout_exe(), ipynb_file])
ipynb_mtime_after = ipynb_file.stat().st_mtime_ns
assert ipynb_mtime_after == ipynb_mtime_before
zpln_file = NOTEBOOKS_FOLDER / 'test_zeppelin.zpln.expected'
zpln_mtime_before = zpln_file.stat().st_mtime_ns
run([nbstripout_exe(), '--force', '--mode', 'zeppelin', zpln_file])
zpln_mtime_after = zpln_file.stat().st_mtime_ns
assert zpln_mtime_after == zpln_mtime_before
def test_newline_behavior(tmp_path: Path):
input_content = (NOTEBOOKS_FOLDER / 'test_drop_empty_cells.ipynb').read_bytes().replace(b'\n', b'\r\n')
to_os_eol = tmp_path / 'should-have-os-eol.ipynb'
to_os_eol.write_bytes(input_content)
run([nbstripout_exe(), to_os_eol])
if sys.platform == 'win32':
assert b'\r\n' in to_os_eol.read_bytes()
else:
assert b'\r\n' not in to_os_eol.read_bytes()
pc = run([nbstripout_exe(), '--textconv', to_os_eol], stdout=PIPE)
if sys.platform == 'win32':
assert b'\r\n' in pc.stdout
else:
assert b'\r\n' not in pc.stdout
to_lf_eol = tmp_path / 'should-have-lf-eol.ipynb'
to_lf_eol.write_bytes(input_content)
run([nbstripout_exe(), '--unix-newlines', to_lf_eol])
assert b'\r\n' not in to_lf_eol.read_bytes()
pc = run([nbstripout_exe(), '--unix-newlines', '--textconv', to_lf_eol], stdout=PIPE)
assert b'\r\n' not in pc.stdout
|