File: chrome_speech_recognition_client.h

package info (click to toggle)
chromium 139.0.7258.127-1
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 6,122,068 kB
  • sloc: cpp: 35,100,771; ansic: 7,163,530; javascript: 4,103,002; python: 1,436,920; asm: 946,517; xml: 746,709; pascal: 187,653; perl: 88,691; sh: 88,436; objc: 79,953; sql: 51,488; cs: 44,583; fortran: 24,137; makefile: 22,147; tcl: 15,277; php: 13,980; yacc: 8,984; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (146 lines) | stat: -rw-r--r-- 5,750 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
// Copyright 2020 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef CHROME_RENDERER_MEDIA_CHROME_SPEECH_RECOGNITION_CLIENT_H_
#define CHROME_RENDERER_MEDIA_CHROME_SPEECH_RECOGNITION_CLIENT_H_

#include <memory>
#include <string>

#include "base/memory/weak_ptr.h"
#include "base/sequence_checker.h"
#include "base/synchronization/lock.h"
#include "content/public/renderer/render_frame_observer.h"
#include "media/base/audio_buffer.h"
#include "media/base/speech_recognition_client.h"
#include "media/mojo/common/audio_data_s16_converter.h"
#include "media/mojo/mojom/speech_recognition.mojom.h"
#include "mojo/public/cpp/bindings/receiver.h"
#include "mojo/public/cpp/bindings/remote.h"

namespace content {
class RenderFrame;
}  // namespace content

namespace media {
class ReconfigurableAudioBusPoolImpl;
}  // namespace media

class ChromeSpeechRecognitionClient
    : public content::RenderFrameObserver,
      public media::SpeechRecognitionClient,
      public media::mojom::SpeechRecognitionBrowserObserver,
      public media::AudioDataS16Converter {
 public:
  using SendAudioToSpeechRecognitionServiceCallback =
      base::RepeatingCallback<void(
          media::mojom::AudioDataS16Ptr audio_data,
          std::optional<base::TimeDelta> media_start_pts)>;
  using InitializeCallback = base::RepeatingCallback<void()>;

  explicit ChromeSpeechRecognitionClient(content::RenderFrame* render_frame);
  ChromeSpeechRecognitionClient(const ChromeSpeechRecognitionClient&) = delete;
  ChromeSpeechRecognitionClient& operator=(
      const ChromeSpeechRecognitionClient&) = delete;
  ~ChromeSpeechRecognitionClient() override;

  // content::RenderFrameObserver
  void OnDestruct() override;

  // media::SpeechRecognitionClient
  void AddAudio(scoped_refptr<media::AudioBuffer> buffer,
                std::optional<base::TimeDelta> media_start_pts) override;

  // Must call Reconfigure() first and can't be called concurrently with
  // Reconfigure().
  void AddAudio(const media::AudioBus& audio_bus) override;

  bool IsSpeechRecognitionAvailable() override;
  void SetOnReadyCallback(
      SpeechRecognitionClient::OnReadyCallback callback) override;

  // Must be called on the main owning sequence. Must be called before the first
  // call to AddAudio(media::AudioBus*), cannot be called concurrently with
  // AddAudio().
  void Reconfigure(const media::AudioParameters& audio_parameters) override;

  // Callback executed when the recognizer is bound. Sets the flag indicating
  // whether the speech recognition service supports multichannel audio.
  void OnRecognizerBound(bool is_multichannel_supported);

  // media::mojom::SpeechRecognitionBrowserObserver
  void SpeechRecognitionAvailabilityChanged(
      bool is_speech_recognition_available) override;
  void SpeechRecognitionLanguageChanged(const std::string& language) override;
  void SpeechRecognitionMaskOffensiveWordsChanged(
      bool mask_offensive_words) override;

 private:
  using AddAudioCallback = base::RepeatingCallback<
      void(std::unique_ptr<media::AudioBus>, int, media::ChannelLayout)>;

  // Initialize the speech recognition client and construct all of the mojo
  // pipes.
  void Initialize();

  // Resets the mojo pipe to the speech recognition recognizer and speech
  // recognition service. Maintains the pipe to the browser so that it may be
  // notified when to reinitialize the pipes.
  void Reset();

  // Processes an audio bus on on the main sequence.
  void AddAudioBusOnMainSequence(std::unique_ptr<media::AudioBus> audio_bus,
                                 int sample_rate,
                                 media::ChannelLayout channel_layout);

  void SendAudioToSpeechRecognitionService(
      media::mojom::AudioDataS16Ptr audio_data,
      std::optional<base::TimeDelta> media_start_pts);

  // Called when the speech recognition context or the speech recognition
  // recognizer is disconnected. Sends an error message to the UI and halts
  // future transcriptions.
  void OnRecognizerDisconnected();

  ChromeSpeechRecognitionClient::InitializeCallback initialize_callback_;

  media::SpeechRecognitionClient::OnReadyCallback on_ready_callback_;

  base::RepeatingClosure reset_callback_;

  // Sends audio to the speech recognition thread on the renderer thread.
  SendAudioToSpeechRecognitionServiceCallback send_audio_callback_;

  mojo::Receiver<media::mojom::SpeechRecognitionBrowserObserver>
      speech_recognition_availability_observer_{this};
  mojo::Remote<media::mojom::SpeechRecognitionClientBrowserInterface>
      speech_recognition_client_browser_interface_;

  mojo::Remote<media::mojom::SpeechRecognitionContext>
      speech_recognition_context_;
  mojo::Remote<media::mojom::SpeechRecognitionRecognizer>
      speech_recognition_recognizer_;

  AddAudioCallback add_audio_on_main_sequence_callback_;
  std::unique_ptr<media::ReconfigurableAudioBusPoolImpl> audio_bus_pool_;
  SEQUENCE_CHECKER(main_sequence_checker_);

  // Cached audio parameters used with media::AudioBus.
  media::AudioParameters audio_parameters_;

  // Whether all mojo pipes are bound to the speech recognition service.
  bool GUARDED_BY(is_recognizer_bound_lock_) is_recognizer_bound_ = false;

  // Protects `is_recognizer_bound_` when it's accessed from the main and
  // rendering threads concurrently.
  mutable base::Lock is_recognizer_bound_lock_;

  // A flag indicating whether the speech recognition service supports
  // multichannel audio.
  bool is_multichannel_supported_ = false;

  base::WeakPtrFactory<ChromeSpeechRecognitionClient> weak_factory_{this};
};

#endif  // CHROME_RENDERER_MEDIA_CHROME_SPEECH_RECOGNITION_CLIENT_H_