File: subrip.lua

package info (click to toggle)
libquvi-scripts 0.9.20131130-3
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 2,828 kB
  • sloc: sh: 11,744; ansic: 2,587; makefile: 226
file content (97 lines) | stat: -rw-r--r-- 2,872 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
-- libquvi-scripts v0.9.20131130
-- Copyright (C) 2013  Toni Gundogdu <legatvs@gmail.com>
--
-- This file is part of libquvi-scripts <http://quvi.sourceforge.net/>.
--
-- This program is free software: you can redistribute it and/or
-- modify it under the terms of the GNU Affero General Public
-- License as published by the Free Software Foundation, either
-- version 3 of the License, or (at your option) any later version.
--
-- This program is distributed in the hope that it will be useful,
-- but WITHOUT ANY WARRANTY; without even the implied warranty of
-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-- GNU Affero General Public License for more details.
--
-- You should have received a copy of the GNU Affero General
-- Public License along with this program.  If not, see
-- <http://www.gnu.org/licenses/>.
--

--[[
Notes
  * http://en.wikipedia.org/wiki/SubRip#SubRip_text_file_format
  * Uses comma (,) for a decimal separator
  * Uses CRLF, with LF line terminators
]]--

local SubRip = {format='srt'} -- Utility functions unique to this script

-- Identify the script.
function ident(qargs)
  return {
    can_export_data = (qargs.to_format == SubRip.format),
    export_format = SubRip.format
  }
end

-- Export data.
function export(qargs)
  local C = require 'quvi/const'
  if qargs.from_format == C.sif_tt then
    return SubRip.from_tt(qargs)
  else
    error(string.format('unsupported subtitle format: 0x%x',
            qargs.from_format))
  end
end

--
-- Utility functions
--

-- timed-text (tt) - YouTube uses this for both CCs and TTSes.
function SubRip.from_tt(qargs)

  local f = '%d\r\n%02d:%02d:%06.3f --> %02d:%02d:%06.3f\r\n%s\r\n\r\n'
  local E = require 'quvi/entity'
  local T = require 'quvi/time'
  local U = require 'quvi/util'
  local L = require 'lxp.lom'

  local x = quvi.http.fetch(qargs.input_url).data
  local t = L.parse(x)
  local r = {}

  local last_start = 0

  --
  -- NOTE: Building up a large string by concatenation will create a lot
  --       temporary strings burdening the Lua garbage collector. The
  --       Lua way is to put the strings into a table.
  --

  for i=1, #t do
    if t[i].tag == 'text' then
      local start = tonumber(t[i].attr['start'] or 0)
      local dur = tonumber(t[i].attr['dur'] or (start-last_start))
      local end_sec = tonumber(start) + dur

      local text = U.trim( E.convert_html(t[i][1]) )

      local start_tc = T.to_timecode(start)
      local end_tc = T.to_timecode(end_sec)

      local s = string.format(f, i, start_tc.hh, start_tc.mm, start_tc.ss,
                              end_tc.hh, end_tc.mm, end_tc.ss, text)

      -- Use comma for a decimal separator.
      table.insert(r, (s:gsub('(%d+)%.(%d+)', '%1,%2')))
      last_start = start
    end
  end
  qargs.data = table.concat(r)
  return qargs
end

-- vim: set ts=2 sw=2 tw=72 expandtab: