File: system_live_caption_service.h

package info (click to toggle)
chromium 139.0.7258.127-1
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 6,122,068 kB
  • sloc: cpp: 35,100,771; ansic: 7,163,530; javascript: 4,103,002; python: 1,436,920; asm: 946,517; xml: 746,709; pascal: 187,653; perl: 88,691; sh: 88,436; objc: 79,953; sql: 51,488; cs: 44,583; fortran: 24,137; makefile: 22,147; tcl: 15,277; php: 13,980; yacc: 8,984; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (181 lines) | stat: -rw-r--r-- 6,778 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
// Copyright 2022 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef CHROME_BROWSER_ASH_ACCESSIBILITY_LIVE_CAPTION_SYSTEM_LIVE_CAPTION_SERVICE_H_
#define CHROME_BROWSER_ASH_ACCESSIBILITY_LIVE_CAPTION_SYSTEM_LIVE_CAPTION_SERVICE_H_

#include <memory>

#include "ash/accessibility/caption_bubble_context_ash.h"
#include "base/memory/raw_ptr.h"
#include "base/memory/weak_ptr.h"
#include "base/scoped_observation.h"
#include "base/timer/timer.h"
#include "chrome/browser/profiles/profile_keyed_service_factory.h"
#include "chrome/browser/speech/speech_recognition_recognizer_client_impl.h"
#include "chrome/browser/speech/speech_recognizer_delegate.h"
#include "chromeos/ash/components/audio/cras_audio_handler.h"
#include "components/keyed_service/core/keyed_service.h"
#include "components/live_caption/live_translate_controller.h"
#include "components/live_caption/translation_util.h"
#include "components/soda/constants.h"
#include "components/soda/soda_installer.h"
#include "media/mojo/mojom/speech_recognition.mojom-shared.h"
#include "media/mojo/mojom/speech_recognition.mojom.h"
#include "mojo/public/cpp/bindings/receiver.h"

class Profile;
class SpeechRecognitionRecognizerClientImpl;

namespace captions {
class LiveCaptionController;
}  // namespace captions

namespace media {
class AudioSystem;
}  // namespace media

namespace ash {

// Responsible for running the live captioning model on audio from non-web (e.g.
// Android, linux) sources. Internally uses the "audio stream" speech
// recognition API on (what will eventually be) a "loopback" audio stream.
//
// This class doesn't track preferences, package installation or audio status at
// all; it is told to start/stop by the classes that actually do so.
//
// For the moment, this is prototype logic only: it processes the input device
// stream (c.f. a not-yet-existing "non-web only" loopback) and processes the
// stream even when no audio is being produced.
//
// TODO(b/253114860): Until these issues are addressed, this class can't be used
//                    in production.
class SystemLiveCaptionService
    : public KeyedService,
      public SpeechRecognizerDelegate,
      public media::mojom::SpeechRecognitionBrowserObserver,
      public CrasAudioHandler::AudioObserver {
 public:
  enum class AudioSource {
    kLoopback,
    kUserMicrophone,
  };

  explicit SystemLiveCaptionService(
      Profile* profile,
      AudioSource source = AudioSource::kLoopback);
  ~SystemLiveCaptionService() override;

  SystemLiveCaptionService(const SystemLiveCaptionService&) = delete;
  SystemLiveCaptionService& operator=(const SystemLiveCaptionService&) = delete;

  // KeyedService overrides:
  void Shutdown() override;

  // SpeechRecognizerDelegate overrides:
  void OnSpeechResult(const std::u16string& text,
                      bool is_final,
                      const std::optional<media::SpeechRecognitionResult>&
                          full_result) override;
  void OnSpeechSoundLevelChanged(int16_t level) override;
  void OnSpeechRecognitionStateChanged(
      SpeechRecognizerStatus new_state) override;
  void OnSpeechRecognitionStopped() override;
  void OnLanguageIdentificationEvent(
      media::mojom::LanguageIdentificationEventPtr event) override;

  // media::mojom::SpeechRecognitionBrowserObserver overrides:
  void SpeechRecognitionAvailabilityChanged(
      bool is_speech_recognition_available) override;
  void SpeechRecognitionLanguageChanged(const std::string& language) override;
  void SpeechRecognitionMaskOffensiveWordsChanged(
      bool mask_offensive_words) override;

  void set_audio_system_factory_for_testing(
      base::RepeatingCallback<std::unique_ptr<media::AudioSystem>()>
          create_audio_system_for_testing) {
    create_audio_system_for_testing_ =
        std::move(create_audio_system_for_testing);
  }

  void set_num_non_chrome_output_streams_for_testing(
      uint32_t num_output_streams) {
    num_output_streams_for_testing_ = num_output_streams;
  }

  // CrasAudioHandler::AudioObserver overrides
  void OnNonChromeOutputStarted() override;

  void OnNonChromeOutputStopped() override;

 protected:
  virtual media::mojom::RecognizerClientType GetRecognizerClientType();

 private:
  void OnTranslationCallback(const std::string& cached_translation,
                             const std::string& original_transcription,
                             const std::string& source_language,
                             const std::string& target_language,
                             bool is_final,
                             const ::captions::TranslateEvent& result);

  void AttemptDispatch(const std::string& text, bool is_final);

  // Binds to the correct observer list based on `source_`
  void BindToBrowserInterface();
  // Gets language code based on the preference this keyed_service
  // is listening to.
  virtual std::string GetPrimaryLanguageCode() const;
  // The source language code of the audio stream.
  std::string source_language_;
  SpeechRecognizerStatus current_recognizer_status_ =
      SpeechRecognizerStatus::SPEECH_RECOGNIZER_OFF;
  bool output_running_ = false;

  std::unique_ptr<base::OneShotTimer> stop_countdown_timer_;

  // Stops and destructs audio stream recognizing client.
  void StopRecognizing();

  void CreateClient();
  void StopTimeoutFinished();

  void OpenCaptionSettings();

  // wrapper around CrasAudioHandler's NumberOfNonChromeOutputStreams.  If
  // we inject a value for the number of non chrome output streams this method
  // will instead return that value.
  uint32_t GetNumberOfNonChromeOutputStreams();

  ::captions::TranslationCache translation_cache_;

  const raw_ptr<Profile> profile_;
  raw_ptr<::captions::LiveCaptionController> controller_;
  ash::captions::CaptionBubbleContextAsh context_;

  std::unique_ptr<SpeechRecognitionRecognizerClientImpl> client_;

  // Which audio source this service is listening to.
  const AudioSource source_;

  // The number of characters sent to the translation service.
  int characters_translated_ = 0;

  // If set during a test this number will be used to determine the
  // number of non chrome output streams.
  std::optional<uint32_t> num_output_streams_for_testing_;

  mojo::Receiver<media::mojom::SpeechRecognitionBrowserObserver>
      browser_observer_receiver_{this};

  // Used to inject a fake audio system into our client in tests.
  base::RepeatingCallback<std::unique_ptr<media::AudioSystem>()>
      create_audio_system_for_testing_;

  base::WeakPtrFactory<SystemLiveCaptionService> weak_ptr_factory_{this};
};

}  // namespace ash

#endif  // CHROME_BROWSER_ASH_ACCESSIBILITY_LIVE_CAPTION_SYSTEM_LIVE_CAPTION_SERVICE_H_