File: asr.py

package info (click to toggle)
pytorch-audio 0.7.2-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 5,512 kB
  • sloc: python: 15,606; cpp: 1,352; sh: 257; makefile: 21
file content (47 lines) | stat: -rw-r--r-- 1,462 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
#!/usr/bin/env python3
# Copyright (c) 2017-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the license found in the LICENSE file in
# the root directory of this source tree. An additional grant of patent rights
# can be found in the PATENTS file in the same directory.
"""
Run inference for pre-processed data with a trained model.
"""

import datetime as dt
import logging

from fairseq import options

from interactive_asr.utils import add_asr_eval_argument, setup_asr, get_microphone_transcription, transcribe_file


def main(args):
    logger = logging.getLogger(__name__)
    logger.setLevel(logging.INFO)
    task, generator, models, sp, tgt_dict = setup_asr(args, logger)

    print("READY!")
    if args.input_file:
        transcription_time, transcription = transcribe_file(args, task, generator, models, sp, tgt_dict)
        print("transcription:", transcription)
        print("transcription_time:", transcription_time)
    else:
        for transcription in get_microphone_transcription(args, task, generator, models, sp, tgt_dict):
            print(
                "{}: {}".format(
                    dt.datetime.now().strftime("%H:%M:%S"), transcription[0][0]
                )
            )


def cli_main():
    parser = options.get_generation_parser()
    parser = add_asr_eval_argument(parser)
    args = options.parse_args_and_arch(parser)
    main(args)


if __name__ == "__main__":
    cli_main()