File: test_srt.py

package info (click to toggle)
python-srt 3.5.3-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 236 kB
  • sloc: python: 1,599; makefile: 13
file content (767 lines) | stat: -rw-r--r-- 25,422 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
#!/usr/bin/env python
# coding=utf8

from __future__ import unicode_literals
from datetime import timedelta
import collections
import functools
import os
import re
import string
from io import StringIO

import pytest
from hypothesis import given, settings, HealthCheck, assume, example
import hypothesis.strategies as st

import srt

REGISTER_SETTINGS = lambda name, **kwargs: settings.register_profile(
    name, suppress_health_check=[HealthCheck.too_slow], deadline=None, **kwargs
)

REGISTER_SETTINGS("base")
REGISTER_SETTINGS("release", max_examples=1000)

settings.load_profile(os.getenv("HYPOTHESIS_PROFILE", "base"))

HOURS_IN_DAY = 24
TIMEDELTA_MAX_DAYS = 999999999
CONTENTLESS_SUB = functools.partial(
    srt.Subtitle, index=1, start=timedelta(seconds=1), end=timedelta(seconds=2)
)


def is_strictly_legal_content(content):
    """
    Filter out things that would violate strict mode. Illegal content
    includes:

    - A content section that starts or ends with a newline
    - A content section that contains blank lines
    """

    if content.strip("\r\n") != content:
        return False
    elif not content.strip():
        return False
    elif "\n\n" in content:
        return False
    else:
        return True


def subs_eq(got, expected, any_order=False):
    """
    Compare Subtitle objects using vars() so that differences are easy to
    identify.
    """
    got_vars = [frozenset(vars(sub).items()) for sub in got]
    expected_vars = [frozenset(vars(sub).items()) for sub in expected]
    if any_order:
        assert collections.Counter(got_vars) == collections.Counter(expected_vars)
    else:
        assert got_vars == expected_vars


def timedeltas(min_value=0, max_value=TIMEDELTA_MAX_DAYS):
    """
    A Hypothesis strategy to generate timedeltas.

    Right now {min,max}_value are shoved into multiple fields in timedelta(),
    which is not very customisable, but it's good enough for our current test
    purposes. If you need more precise control, you may need to add more
    parameters to this function to be able to customise more freely.
    """
    time_unit_strategy = st.integers(min_value=min_value, max_value=max_value)
    timestamp_strategy = st.builds(
        timedelta,
        hours=time_unit_strategy,
        minutes=time_unit_strategy,
        seconds=time_unit_strategy,
    )
    return timestamp_strategy


def equivalent_timestamps(min_value=0, max_value=TIMEDELTA_MAX_DAYS):
    def string_timestamp(hours, minutes, seconds, msecs, paddings):
        hours, minutes, seconds, msecs = map(
            lambda v_and_p: "0" * v_and_p[1] + str(v_and_p[0]),
            zip((hours, minutes, seconds, msecs), paddings),
        )
        return "{}:{}:{},{}".format(hours, minutes, seconds, msecs)

    def ts_field_value():
        return st.integers(min_value=min_value, max_value=max_value)

    def zero_padding():
        return st.integers(min_value=0, max_value=2)

    @st.composite
    def maybe_off_by_one_fields(draw):
        field = draw(ts_field_value())
        field_maybe_plus_one = draw(st.integers(min_value=field, max_value=field + 1))
        return field_maybe_plus_one, field

    def get_equiv_timestamps(h, m, s, ms2, ts1paddings, ts2paddings):
        h2, h1 = h
        m2, m1 = m
        s2, s1 = s
        ms1 = (
            (h2 - h1) * 60 * 60 * 1000 + (m2 - m1) * 60 * 1000 + (s2 - s1) * 1000 + ms2
        )
        return (
            string_timestamp(h2, m2, s2, ms2, ts2paddings),
            string_timestamp(h1, m1, s1, ms1, ts1paddings),
        )

    return st.builds(
        get_equiv_timestamps,
        maybe_off_by_one_fields(),
        maybe_off_by_one_fields(),
        maybe_off_by_one_fields(),
        ts_field_value(),
        st.tuples(*[zero_padding() for _ in range(4)]),
        st.tuples(*[zero_padding() for _ in range(4)]),
    )


def subtitles(strict=True):
    """A Hypothesis strategy to generate Subtitle objects."""
    # max_value settings are just to avoid overflowing TIMEDELTA_MAX_DAYS by
    # using arbitrary low enough numbers.
    #
    # We also skip subs with start time >= end time, so we split them into two
    # groups to avoid overlap.
    start_timestamp_strategy = timedeltas(min_value=0, max_value=500000)
    end_timestamp_strategy = timedeltas(min_value=500001, max_value=999999)

    # \r is not legal inside Subtitle.content, it should have already been
    # normalised to \n.
    content_strategy = st.text(min_size=1).filter(lambda x: "\r" not in x)
    proprietary_strategy = st.text().filter(
        lambda x: all(eol not in x for eol in "\r\n")
    )

    if strict:
        content_strategy = content_strategy.filter(is_strictly_legal_content)

    subtitle_strategy = st.builds(
        srt.Subtitle,
        index=st.integers(min_value=0),
        start=start_timestamp_strategy,
        end=end_timestamp_strategy,
        proprietary=proprietary_strategy,
        content=content_strategy,
    )

    return subtitle_strategy


@given(st.lists(subtitles()))
def test_compose_and_parse_from_file(input_subs):
    srt_file = StringIO(srt.compose(input_subs, reindex=False))
    reparsed_subs = srt.parse(srt_file)
    subs_eq(reparsed_subs, input_subs)


@given(st.lists(subtitles()))
def test_compose_and_parse_from_file_bom(input_subs):
    srt_file = StringIO("\ufeff" + srt.compose(input_subs, reindex=False))
    reparsed_subs = srt.parse(srt_file)
    subs_eq(reparsed_subs, input_subs)


@given(st.lists(subtitles()))
def test_compose_and_parse_strict(input_subs):
    composed = srt.compose(input_subs, reindex=False)
    reparsed_subs = srt.parse(composed)
    subs_eq(reparsed_subs, input_subs)


@given(st.lists(subtitles()))
def test_can_compose_without_ending_blank_line(input_subs):
    """
    Many sub editors don't add a blank line to the end, and many editors accept
    it. We should just accept this too in input.
    """
    composed = srt.compose(input_subs, reindex=False)
    composed_without_ending_blank = composed[:-1]
    reparsed_subs = srt.parse(composed_without_ending_blank)
    subs_eq(reparsed_subs, input_subs)


@given(st.lists(subtitles()))
def test_can_compose_without_eol_at_all(input_subs):
    composed = srt.compose(input_subs, reindex=False)
    composed_without_ending_blank = composed.rstrip("\r\n")
    reparsed_subs = srt.parse(composed_without_ending_blank)
    subs_eq(reparsed_subs, input_subs)


@given(st.text().filter(is_strictly_legal_content))
def test_compose_and_parse_strict_mode(content):
    # sub.content should not have OS-specific line separators, only \n
    assume("\r" not in content)

    content = "\n" + content + "\n\n" + content + "\n"
    sub = CONTENTLESS_SUB(content=content)

    parsed_strict = list(srt.parse(sub.to_srt()))[0]
    parsed_unstrict = list(srt.parse(sub.to_srt(strict=False)))[0]

    # Strict mode should remove blank lines in content, leading, and trailing
    # newlines.
    assert not parsed_strict.content.startswith("\n")
    assert not parsed_strict.content.endswith("\n")
    assert "\n\n" not in parsed_strict.content

    # When strict mode is false, no processing should be applied to the
    # content (other than \r\n becoming \n).
    assert parsed_unstrict.content == sub.content.replace("\r\n", "\n")


@given(st.integers(min_value=1, max_value=TIMEDELTA_MAX_DAYS))
def test_timedelta_to_srt_timestamp_can_go_over_24_hours(days):
    srt_timestamp = srt.timedelta_to_srt_timestamp(timedelta(days=days))
    srt_timestamp_hours = int(srt_timestamp.split(":")[0])
    assert srt_timestamp_hours == days * HOURS_IN_DAY


@given(subtitles())
def test_subtitle_equality(sub_1):
    sub_2 = srt.Subtitle(**vars(sub_1))
    assert sub_1 == sub_2


@given(subtitles())
def test_subtitle_inequality(sub_1):
    sub_2 = srt.Subtitle(**vars(sub_1))
    sub_2.index += 1
    assert sub_1 != sub_2


@given(subtitles())
def test_subtitle_from_scratch_equality(subtitle):
    srt_block = subtitle.to_srt()

    # Get two totally new sets of objects so as not to affect the hash
    # comparison
    sub_1 = list(srt.parse(srt_block))[0]
    sub_2 = list(srt.parse(srt_block))[0]

    subs_eq([sub_1], [sub_2])
    # In case subs_eq and eq disagree for some reason
    assert sub_1 == sub_2
    assert hash(sub_1) == hash(sub_2)


@given(st.lists(subtitles()))
def test_parsing_spaced_arrow(subs):
    spaced_block = srt.compose(subs, reindex=False, strict=False).replace("-->", "- >")
    reparsed_subtitles = srt.parse(spaced_block)
    subs_eq(reparsed_subtitles, subs)


@given(st.lists(subtitles()))
def test_parsing_spaced_ender_arrow(subs):
    # Seen in BSG subtitles
    spaced_block = srt.compose(subs, reindex=False, strict=False).replace("-->", "-- >")
    reparsed_subtitles = srt.parse(spaced_block)
    subs_eq(reparsed_subtitles, subs)


@given(st.lists(subtitles()))
def test_parsing_no_ws_arrow(subs):
    spaced_block = srt.compose(subs, reindex=False, strict=False).replace(
        " --> ", "-->"
    )
    reparsed_subtitles = srt.parse(spaced_block)
    subs_eq(reparsed_subtitles, subs)


@given(st.text(string.whitespace), st.lists(subtitles()))
def test_parsing_leading_whitespace(ws, subs):
    prews_block = ws + srt.compose(subs, reindex=False, strict=False)
    reparsed_subtitles = srt.parse(prews_block)
    subs_eq(reparsed_subtitles, subs)


@given(st.lists(subtitles()))
def test_parsing_negative_index(subs):
    for sub in subs:
        sub.index *= -1
    prews_block = srt.compose(subs, reindex=False, strict=False)
    reparsed_subtitles = srt.parse(prews_block)
    subs_eq(reparsed_subtitles, subs)


@given(st.lists(subtitles()))
def test_parsing_content_with_blank_lines(subs):
    for subtitle in subs:
        # We stuff a blank line in the middle so as to trigger the "special"
        # content parsing for erroneous SRT files that have blank lines.
        subtitle.content = subtitle.content + "\n\n" + subtitle.content

    reparsed_subtitles = srt.parse(srt.compose(subs, reindex=False, strict=False))
    subs_eq(reparsed_subtitles, subs)


@given(st.lists(subtitles()))
def test_parsing_no_content(subs):
    for subtitle in subs:
        subtitle.content = ""

    reparsed_subtitles = srt.parse(srt.compose(subs, reindex=False, strict=False))
    subs_eq(reparsed_subtitles, subs)


@given(st.lists(subtitles()), st.lists(subtitles()), st.text(alphabet="\n\r\t "))
def test_subs_missing_content_removed(content_subs, contentless_subs, contentless_text):
    for sub in contentless_subs:
        sub.content = contentless_text

    subs = contentless_subs + content_subs
    composed_subs = list(srt.sort_and_reindex(subs, in_place=True))

    # We should have composed the same subs as there are in content_subs, as
    # all contentless_subs should have been stripped.
    subs_eq(composed_subs, content_subs, any_order=True)

    # The subtitles should be reindexed starting at start_index, excluding
    # contentless subs
    default_start_index = 1
    assert [sub.index for sub in composed_subs] == list(
        range(default_start_index, default_start_index + len(composed_subs))
    )


@given(
    st.lists(subtitles()),
    st.lists(subtitles()),
    timedeltas(min_value=-999, max_value=-1),
)
def test_subs_starts_before_zero_removed(positive_subs, negative_subs, negative_td):
    for sub in negative_subs:
        sub.start = negative_td
        sub.end = negative_td  # Just to avoid tripping any start >= end errors

    subs = positive_subs + negative_subs
    composed_subs = list(srt.sort_and_reindex(subs, in_place=True))

    # There should be no negative subs
    subs_eq(composed_subs, positive_subs, any_order=True)


@given(st.lists(subtitles(), min_size=1), st.integers(min_value=0))
def test_sort_and_reindex(input_subs, start_index):
    for sub in input_subs:
        # Pin all subs to same end time and index so that start time is
        # compared only, must be guaranteed to be < sub.start, see how
        # start_timestamp_strategy is done
        sub.end = timedelta(500001)
        sub.index = 1

    reindexed_subs = list(
        srt.sort_and_reindex(input_subs, start_index=start_index, in_place=True)
    )

    # The subtitles should be reindexed starting at start_index
    assert [sub.index for sub in reindexed_subs] == list(
        range(start_index, start_index + len(input_subs))
    )

    # The subtitles should be sorted by start time
    expected_sorting = sorted(input_subs, key=lambda sub: sub.start)
    assert reindexed_subs == expected_sorting


@given(st.lists(subtitles()))
def test_sort_and_reindex_no_skip(input_subs):
    # end time > start time should not trigger a skip if skip=False
    for sub in input_subs:
        old_start = sub.start
        sub.start = sub.end
        sub.end = old_start

    reindexed_subs = list(srt.sort_and_reindex(input_subs, skip=False))

    # Nothing should have been skipped
    assert len(reindexed_subs) == len(input_subs)


@given(st.lists(subtitles()))
def test_sort_and_reindex_handles_no_index(input_subs):
    # end time > start time should not trigger a skip if skip=False
    for sub in input_subs:
        old_start = sub.start
        sub.start = sub.end
        sub.end = old_start
        sub.index = None

    reindexed_subs = list(srt.sort_and_reindex(input_subs))

    # Everything should have been skipped
    assert not reindexed_subs


@given(st.lists(subtitles(), min_size=1))
def test_sort_and_reindex_same_start_time_uses_end(input_subs):
    for sub in input_subs:
        # Pin all subs to same start time and index so that end time is
        # compared only
        sub.start = timedelta(1)
        sub.index = 1

    reindexed_subs = list(srt.sort_and_reindex(input_subs, in_place=True))

    # The subtitles should be sorted by end time when start time is the same
    expected_sorting = sorted(input_subs, key=lambda sub: sub.end)
    assert reindexed_subs == expected_sorting


@given(st.lists(subtitles(), min_size=1))
def test_sort_and_reindex_same_start_and_end_time_uses_index(input_subs):
    for sub in input_subs:
        # Pin all subs to same start and end time so that index is compared
        # only
        sub.start = timedelta(1)
        sub.end = timedelta(2)

    reindexed_subs = list(srt.sort_and_reindex(input_subs, in_place=True))

    # The subtitles should be sorted by index when start and end time are the
    # same
    expected_sorting = sorted(input_subs, key=lambda sub: sub.index)
    assert reindexed_subs == expected_sorting


@given(st.lists(subtitles(), min_size=1), st.integers(min_value=0))
def test_sort_and_reindex_not_in_place_matches(input_subs, start_index):
    # Make copies for both sort_and_reindex calls so that they can't affect
    # each other
    not_in_place_subs = [srt.Subtitle(**vars(sub)) for sub in input_subs]
    in_place_subs = [srt.Subtitle(**vars(sub)) for sub in input_subs]

    nip_ids = [id(sub) for sub in not_in_place_subs]
    ip_ids = [id(sub) for sub in in_place_subs]

    not_in_place_output = list(
        srt.sort_and_reindex(not_in_place_subs, start_index=start_index)
    )
    in_place_output = list(
        srt.sort_and_reindex(in_place_subs, start_index=start_index, in_place=True)
    )

    # The results in each case should be the same
    subs_eq(not_in_place_output, in_place_output)

    # Not in place sort_and_reindex should have created new subs
    assert not any(id(sub) in nip_ids for sub in not_in_place_output)

    # In place sort_and_reindex should be reusing the same subs
    assert all(id(sub) in ip_ids for sub in in_place_output)


@given(
    st.lists(subtitles(), min_size=1),
    st.integers(min_value=0),
    st.text(min_size=1),
    timedeltas(),
)
def test_parser_noncontiguous(subs, fake_idx, garbage, fake_timedelta):
    composed = srt.compose(subs)

    # Put some garbage between subs that should trigger our failed parsing
    # detection. Since we do some magic to try and detect blank lines that
    # don't really delimit subtitles, it has to look at least a little like an
    # SRT block.
    srt_timestamp = srt.timedelta_to_srt_timestamp(fake_timedelta)
    composed = composed.replace(
        "\n\n", "\n\n%d\n%s %s" % (fake_idx, srt_timestamp, garbage)
    )

    with pytest.raises(srt.SRTParseError):
        list(srt.parse(composed))


@given(
    st.lists(subtitles(), min_size=1),
    st.integers(min_value=0),
    st.text(min_size=1),
    timedeltas(),
)
def test_parser_noncontiguous_ignore_errors(subs, fake_idx, garbage, fake_timedelta):
    composed = srt.compose(subs)
    srt_timestamp = srt.timedelta_to_srt_timestamp(fake_timedelta)
    composed = composed.replace(
        "\n\n", "\n\n%d\n%s %s" % (fake_idx, srt_timestamp, garbage)
    )
    # Should not raise, we have ignore_errors
    list(srt.parse(composed, ignore_errors=True))


def _parseable_as_int(text):
    try:
        int(text)
    except ValueError:
        return False
    return True


def _parseable_as_float(text):
    try:
        float(text)
    except ValueError:
        return False
    return True


@given(st.lists(subtitles()), st.text(min_size=1))
def test_parser_noncontiguous_leading(subs, garbage):
    # Issue #50 permits leading whitespace, see test_parsing_leading_whitespace
    assume(not garbage.isspace())

    # Issue #56 permits negative indexes, see test_parsing_negative_index. It
    # also shouldn't just be a number, because then we'd confuse it with our
    # index...
    assume(garbage.strip()[0] != ".")
    assume(garbage.strip()[0] != "-")
    assume(not _parseable_as_int(garbage.strip()))
    assume(not _parseable_as_float(garbage.strip()))

    # Put some garbage at the beginning that should trigger our noncontiguity
    # checks
    composed = garbage + srt.compose(subs)

    with pytest.raises(srt.SRTParseError):
        list(srt.parse(composed))


@given(
    st.lists(subtitles(), min_size=1),
    st.integers(min_value=0),
    st.text(min_size=1),
    timedeltas(),
)
def test_parser_didnt_match_to_end_raises(subs, fake_idx, garbage, fake_timedelta):
    srt_blocks = [sub.to_srt() for sub in subs]
    srt_timestamp = srt.timedelta_to_srt_timestamp(fake_timedelta)
    garbage = "\n\n%d\n%s %s" % (fake_idx, srt_timestamp, garbage)
    srt_blocks.append(garbage)
    composed = "".join(srt_blocks)

    with pytest.raises(srt.SRTParseError) as thrown_exc:
        list(srt.parse(composed))

    # Since we will consume as many \n as needed until we meet the lookahead
    # assertion, leading newlines in `garbage` will be stripped.
    garbage_stripped = garbage.lstrip("\n")

    assert garbage_stripped == thrown_exc.value.unmatched_content
    assert len(composed) - len(garbage_stripped) == thrown_exc.value.expected_start
    assert len(composed) == thrown_exc.value.actual_start


@given(st.lists(subtitles()))
def test_parser_can_parse_with_dot_msec_delimiter(subs):
    original_srt_blocks = [sub.to_srt() for sub in subs]
    dot_srt_blocks = []

    for srt_block in original_srt_blocks:
        srt_lines = srt_block.split("\n")
        # We should only do the first two, as it might also be in the
        # proprietary metadata, causing this test to fail.
        dot_timestamp = srt_lines[1].replace(",", ".", 2)
        srt_lines[1] = dot_timestamp
        dot_srt_blocks.append("\n".join(srt_lines))

    composed_with_dots = "".join(dot_srt_blocks)
    reparsed_subs = srt.parse(composed_with_dots)
    subs_eq(reparsed_subs, subs)


@given(st.lists(subtitles()))
def test_parser_can_parse_with_fullwidth_delimiter(subs):
    original_srt_blocks = [sub.to_srt() for sub in subs]
    dot_srt_blocks = []

    for srt_block in original_srt_blocks:
        srt_lines = srt_block.split("\n")
        dot_timestamp = srt_lines[1].replace(",", ",", 1).replace(":", ":", 1)
        srt_lines[1] = dot_timestamp
        dot_srt_blocks.append("\n".join(srt_lines))

    composed_with_fullwidth = "".join(dot_srt_blocks)
    reparsed_subs = srt.parse(composed_with_fullwidth)
    subs_eq(reparsed_subs, subs)


@given(st.lists(subtitles()))
def test_parser_can_parse_with_no_msec(subs):
    original_srt_blocks = [sub.to_srt() for sub in subs]
    srt_blocks = []

    for srt_block in original_srt_blocks:
        srt_lines = srt_block.split("\n")
        # We should only do the first two, as it might also be in the
        # proprietary metadata, causing this test to fail.
        srt_lines[1] = re.sub(",[0-9]+", "", srt_lines[1], 2)
        srt_blocks.append("\n".join(srt_lines))

    composed = "".join(srt_blocks)
    reparsed_subs = srt.parse(composed)
    subs_eq(reparsed_subs, subs)


@given(subtitles())
def test_repr_doesnt_crash(sub):
    # Not much we can do here, but we should make sure __repr__ doesn't crash
    # or anything and it does at least vaguely look like what we want
    assert "Subtitle" in repr(sub)
    assert str(sub.index) in repr(sub)


@given(subtitles(), subtitles())
def test_parser_accepts_final_no_newline_no_content(sub1, sub2):
    # Limit size so we know how much to remove
    sub2.content = ""
    subs = [sub1, sub2]

    # Remove the last newlines so that there are none. Cannot use rstrip since
    # there might be other stuff that gets matched in proprietary
    stripped_srt_blocks = srt.compose(subs, reindex=False)[:-2]

    reparsed_subs = srt.parse(stripped_srt_blocks)
    subs_eq(reparsed_subs, subs)


@given(st.lists(subtitles()))
def test_parser_accepts_newline_no_content(subs):
    for sub in subs:
        # Limit size so we know how many lines to remove
        sub.content = ""

    # Remove the last \n so that there is only one
    stripped_srt_blocks = "".join(sub.to_srt()[:-1] for sub in subs)

    reparsed_subs = srt.parse(stripped_srt_blocks)
    subs_eq(reparsed_subs, subs)


@given(st.lists(subtitles()))
def test_compose_and_parse_strict_crlf(input_subs):
    composed_raw = srt.compose(input_subs, reindex=False)
    composed = composed_raw.replace("\n", "\r\n")
    reparsed_subs = list(srt.parse(composed))

    for sub in reparsed_subs:
        sub.content = sub.content.replace("\r\n", "\n")

    subs_eq(reparsed_subs, input_subs)


@given(st.lists(subtitles()), st.one_of(st.just("\n"), st.just("\r\n")))
def test_compose_and_parse_strict_custom_eol(input_subs, eol):
    composed = srt.compose(input_subs, reindex=False, eol=eol)
    reparsed_subs = srt.parse(composed)
    subs_eq(reparsed_subs, input_subs)


@given(equivalent_timestamps())
def test_equal_timestamps_despite_different_fields_parsed_as_equal(timestamps):
    ts1, ts2 = timestamps
    assert srt.srt_timestamp_to_timedelta(ts1) == srt.srt_timestamp_to_timedelta(ts2)


@given(timedeltas())
def test_bad_timestamp_format_raises(ts):
    ts = srt.timedelta_to_srt_timestamp(ts)
    ts = ts.replace(":", "t", 1)
    with pytest.raises(srt.TimestampParseError):
        srt.srt_timestamp_to_timedelta(ts)


@given(st.lists(subtitles()), st.lists(st.sampled_from(string.whitespace)))
def test_can_parse_index_trailing_ws(input_subs, whitespace):
    out = ""

    for sub in input_subs:
        lines = sub.to_srt().split("\n")
        lines[0] = lines[0] + "".join(whitespace)
        out += "\n".join(lines)

    reparsed_subs = srt.parse(out)
    subs_eq(reparsed_subs, input_subs)


@given(st.lists(subtitles()))
def test_can_parse_index_with_dot(input_subs):
    # Seen in Battlestar Galactica subs
    out = ""

    for sub in input_subs:
        lines = sub.to_srt().split("\n")
        lines[0] = lines[0] + "." + lines[0]
        out += "\n".join(lines)

    reparsed_subs = srt.parse(out)
    subs_eq(reparsed_subs, input_subs)


@given(st.lists(subtitles()), st.lists(st.just("0")))
def test_can_parse_index_leading_zeroes(input_subs, zeroes):
    out = ""

    for sub in input_subs:
        lines = sub.to_srt().split("\n")
        lines[0] = "".join(zeroes) + lines[0]
        out += "\n".join(lines)

    reparsed_subs = srt.parse(out)
    subs_eq(reparsed_subs, input_subs)


@given(st.lists(subtitles(), min_size=1))
def test_parse_file_with_missing_index(input_subs):  # cf. issue #51
    for sub in input_subs:
        try:
            int(sub.content.strip().split("\n")[-1])
        except ValueError:
            pass
        else:
            # If the final line with actual content is a number, we'll parse it
            # as the index, so ignore that
            assume(False)

    out_no_index = ""
    out_zero_index = ""

    for sub in input_subs:
        block = sub.to_srt()
        block = block[block.index("\n") + 1 :]
        out_no_index += block

    input_subs_copy = [srt.Subtitle(**vars(sub)) for sub in input_subs]
    for sub in input_subs_copy:
        # sub.index == None will get rendered in to_srt as 0
        sub.index = 0
        out_zero_index += sub.to_srt()

    subs_no_index = list(srt.parse(out_no_index))
    subs_zero_index = list(srt.parse(out_zero_index))

    # One should have index == None, one should have index == 0...
    assert subs_no_index
    assert subs_zero_index
    assert all(sub.index == None for sub in subs_no_index)
    assert all(sub.index == 0 for sub in subs_zero_index)
    assert subs_no_index != subs_zero_index

    # ...but they should render the same...
    assert srt.compose(subs_no_index, reindex=False) == srt.compose(
        subs_zero_index, reindex=False
    )

    # ...and sort the same.
    assert srt.compose(subs_no_index) == srt.compose(subs_zero_index)