File: histogram.py

package info (click to toggle)
ccextractor 0.87%2Bds1-1
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 10,064 kB
  • sloc: ansic: 172,772; makefile: 777; sh: 622; python: 319
file content (68 lines) | stat: -rw-r--r-- 2,179 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#!/usr/bin/python
# Author   : Harry Yu
# Email    : harryyunull@gmail.com
# Link     : https://github.com/harrynull

from __future__ import print_function
from builtins import range
from builtins import object
Version = "1.0.0"

import sys
import time
import datetime
from io import open

class Line(object):
    def __init__(self, lines):
        lines = [line.replace("\n", "").strip() for line in lines]
        self.id = int(lines[0]) # the first line is the number of the subtile
        self.start_time, self.end_time = lines[1].split(" --> ")
        self.content = lines[2:]
        
def time_to_minute(time):
    time_tuple = datetime.datetime.strptime(time[:-4], '%H:%M:%S').timetuple()
    return time_tuple.tm_hour*60+time_tuple.tm_min

def read_srt_file(filename):
    lines = []
    with open(filename, encoding = "utf-8") as file:
        raw_lines = []
        for line in file:
            if line == "\n" or line == "\r\n":
                lines.append(Line(raw_lines))
                raw_lines = []
            else:
                raw_lines.append(line)
    return lines

def main():
    if len(sys.argv)<2:
        print("{0}: missing file name.\nUse {0} /path/to/srt/file".format(sys.argv[0]))
        exit()
    elif len(sys.argv)>2:
        print("{0}: too many arguments:\nUse {0} /path/to/srt/file".format(sys.argv[0]))
        exit()
        
    print("histogram.py %s Made by Harry Yu" % Version)
    print("Histogram for %s\n" % sys.argv[1])
    
    # Read subtitles from file
    lines = read_srt_file(sys.argv[1])

    # Count how many subtitles per minute.
    subtitle_per_minute = {}
    for line in lines:
        start_time = time_to_minute(line.start_time)
        end_time = time_to_minute(line.end_time)
        for i in range(start_time, end_time + 1):
            subtitle_per_minute[i]=subtitle_per_minute.get(i, 0) + 1
            
    # Print the result
    for i in range(0, max(subtitle_per_minute.keys())):
        print("Minute %d\t%d\t%s"%(i,subtitle_per_minute.get(i, 0),"+"*subtitle_per_minute.get(i, 0)))
    
    print("\nTotal subtitles: %d"%sum(subtitle_per_minute.values()))
    
if __name__ == '__main__':
    main()