File: soda_speech_recognition_engine_impl.h

package info (click to toggle)

chromium 139.0.7258.127-2

links: PTS, VCS
area: main
in suites: forky, sid
size: 6,122,156 kB
sloc: cpp: 35,100,771; ansic: 7,163,530; javascript: 4,103,002; python: 1,436,920; asm: 946,517; xml: 746,709; pascal: 187,653; perl: 88,691; sh: 88,436; objc: 79,953; sql: 51,488; cs: 44,583; fortran: 24,137; makefile: 22,147; tcl: 15,277; php: 13,980; yacc: 8,984; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36

file content (117 lines) | stat: -rw-r--r-- 4,564 bytes

parent folder | download | duplicates (6)

// Copyright 2024 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef CONTENT_BROWSER_SPEECH_SODA_SPEECH_RECOGNITION_ENGINE_IMPL_H_
#define CONTENT_BROWSER_SPEECH_SODA_SPEECH_RECOGNITION_ENGINE_IMPL_H_

#include "base/memory/weak_ptr.h"
#include "base/sequence_checker.h"
#include "content/browser/speech/speech_recognition_engine.h"
#include "content/common/content_export.h"
#include "content/public/browser/speech_recognition_session_config.h"
#include "media/mojo/mojom/audio_data.mojom.h"
#include "media/mojo/mojom/speech_recognition.mojom.h"
#include "mojo/public/cpp/bindings/receiver.h"
#include "mojo/public/cpp/bindings/remote.h"

namespace content {

class SpeechRecognitionManagerDelegate;

// This is the on-device implementation for `SpeechRecognitionEngine`.
//
// This class establishes a connection to the on-device speech recognition
// service using the content::SpeechRecognitionManagerDelegate. It will bind to
// the speech::CrosSpeechRecognitionService in ChromeOS-Ash. On other platforms,
// it will bind to the speech::ChromeSpeechRecognitionService if the on-device
// speech recognition service is available. This class will be in the speech
// recognition available state when successfully bound.

class CONTENT_EXPORT SodaSpeechRecognitionEngineImpl
    : public SpeechRecognitionEngine,
      public media::mojom::SpeechRecognitionRecognizerClient {
 public:
  using SendAudioToSpeechRecognitionServiceCallback =
      base::RepeatingCallback<void(media::mojom::AudioDataS16Ptr audio_data)>;

  explicit SodaSpeechRecognitionEngineImpl(
      const SpeechRecognitionSessionConfig& config);
  ~SodaSpeechRecognitionEngineImpl() override;
  SodaSpeechRecognitionEngineImpl(const SodaSpeechRecognitionEngineImpl&) =
      delete;
  SodaSpeechRecognitionEngineImpl& operator=(
      const SodaSpeechRecognitionEngineImpl&) = delete;

  // Sets the delegate for tests.
  static void SetSpeechRecognitionManagerDelegateForTesting(
      SpeechRecognitionManagerDelegate*);

  bool Initialize();
  void SetOnReadyCallback(base::OnceCallback<void()> callback);

  // content::SodaSpeechRecognitionEngineImpl:
  void StartRecognition() override;
  void UpdateRecognitionContext(
      const media::SpeechRecognitionRecognitionContext& recognition_context)
      override;
  void EndRecognition() override;
  void TakeAudioChunk(const AudioChunk& data) override;
  void AudioChunksEnded() override;
  int GetDesiredAudioChunkDurationMs() const override;

  // media::mojom::SpeechRecognitionRecognizerClient:
  void OnSpeechRecognitionRecognitionEvent(
      const media::SpeechRecognitionResult& result,
      OnSpeechRecognitionRecognitionEventCallback reply) override;
  void OnSpeechRecognitionError() override;
  void OnLanguageIdentificationEvent(
      media::mojom::LanguageIdentificationEventPtr event) override;
  void OnSpeechRecognitionStopped() override;

 private:
  // Callback executed when the recognizer is bound. Sets the flag indicating
  // whether the speech recognition service supports multichannel audio.
  void OnRecognizerBound(bool is_multichannel_supported);

  // Called when the speech recognition context or the speech recognition
  // recognizer is disconnected. Sends an error message to the UI and halts
  // future transcriptions.
  void OnRecognizerDisconnected();

  void SendAudioToSpeechRecognitionService(
      media::mojom::AudioDataS16Ptr audio_data);

  void MarkDone();

  void Abort(media::mojom::SpeechRecognitionErrorCode error);

  media::mojom::AudioDataS16Ptr ConvertToAudioDataS16(const AudioChunk& data);

  base::OnceCallback<void()> on_ready_callback_;

  // Sends audio to the speech recognition thread on the main thread.
  SendAudioToSpeechRecognitionServiceCallback send_audio_callback_;

  base::RepeatingCallback<void()> mark_done_callback_;

  mojo::Remote<media::mojom::SpeechRecognitionContext>
      speech_recognition_context_;
  mojo::Remote<media::mojom::SpeechRecognitionRecognizer>
      speech_recognition_recognizer_;
  mojo::Receiver<media::mojom::SpeechRecognitionRecognizerClient>
      speech_recognition_recognizer_client_{this};

  SpeechRecognitionSessionConfig config_;

  SEQUENCE_CHECKER(main_sequence_checker_);

  // A flag indicating the recognition state.
  bool is_start_recognition_ = false;

  base::WeakPtrFactory<SodaSpeechRecognitionEngineImpl> weak_factory_{this};
};

}  // namespace content

#endif  // CONTENT_BROWSER_SPEECH_SODA_SPEECH_RECOGNITION_ENGINE_IMPL_H_