File: sbv.py

package info (click to toggle)
python-webvtt 0.5.1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 640 kB
  • sloc: python: 3,777; makefile: 29; sh: 6
file content (117 lines) | stat: -rw-r--r-- 2,917 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
"""SBV format module."""

import typing
import re

from . import utils
from .models import Caption
from .errors import MalformedFileError


class SBVCueBlock:
    """Representation of a cue timing block."""

    CUE_TIMINGS_PATTERN = re.compile(
        r'\s*(\d{1,2}:\d{1,2}:\d{1,2}.\d{3}),(\d{1,2}:\d{1,2}:\d{1,2}.\d{3})'
        )

    def __init__(
            self,
            start: str,
            end: str,
            payload: typing.Sequence[str]
            ):
        """
        Initialize.

        :param start: start time
        :param end: end time
        :param payload: caption text
        """
        self.start = start
        self.end = end
        self.payload = payload

    @classmethod
    def is_valid(
            cls,
            lines: typing.Sequence[str]
            ) -> bool:
        """
        Validate the lines for a match of a cue time block.

        :param lines: the lines to be validated
        :returns: true for a matching cue time block
        """
        return bool(
          len(lines) >= 2 and
          re.match(cls.CUE_TIMINGS_PATTERN, lines[0]) and
          lines[1].strip()
          )

    @classmethod
    def from_lines(
            cls,
            lines: typing.Sequence[str]
            ) -> 'SBVCueBlock':
        """
        Create a `SBVCueBlock` from lines of text.

        :param lines: the lines of text
        :returns: `SBVCueBlock` instance
        """
        match = re.match(cls.CUE_TIMINGS_PATTERN, lines[0])
        assert match is not None

        payload = lines[1:]

        return cls(match.group(1), match.group(2), payload)


def parse(lines: typing.Sequence[str]) -> typing.List[Caption]:
    """
    Parse SBV captions from lines of text.

    :param lines: lines of text
    :returns: list of `Caption` objects
    """
    if not _is_valid_content(lines):
        raise MalformedFileError('Invalid format')

    return _parse_captions(lines)


def _is_valid_content(lines: typing.Sequence[str]) -> bool:
    """
    Validate lines of text for valid SBV content.

    :param lines: lines of text
    :returns: true for a valid SBV content
    """
    if len(lines) < 2:
        return False

    first_block = next(utils.iter_blocks_of_lines(lines))
    return bool(first_block and SBVCueBlock.is_valid(first_block))


def _parse_captions(lines: typing.Sequence[str]) -> typing.List[Caption]:
    """
    Parse captions from the text.

    :param lines: lines of text
    :returns: list of `Caption` objects
    """
    captions = []

    for block_lines in utils.iter_blocks_of_lines(lines):
        if not SBVCueBlock.is_valid(block_lines):
            continue

        cue_block = SBVCueBlock.from_lines(block_lines)
        captions.append(Caption(cue_block.start,
                                cue_block.end,
                                cue_block.payload
                                ))

    return captions