File: soda_speech_recognition_engine_impl.h

package info (click to toggle)

chromium 138.0.7204.183-1~deb12u1

links: PTS, VCS
area: main
in suites: bookworm-proposed-updates
size: 6,080,960 kB
sloc: cpp: 34,937,079; ansic: 7,176,967; javascript: 4,110,704; python: 1,419,954; asm: 946,768; xml: 739,971; pascal: 187,324; sh: 89,623; perl: 88,663; objc: 79,944; sql: 50,304; cs: 41,786; fortran: 24,137; makefile: 21,811; php: 13,980; tcl: 13,166; yacc: 8,925; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36

file content (117 lines) | stat: -rw-r--r-- 4,564 bytes

parent folder | download | duplicates (5)

// Copyright 2024 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef CONTENT_BROWSER_SPEECH_SODA_SPEECH_RECOGNITION_ENGINE_IMPL_H_
#define CONTENT_BROWSER_SPEECH_SODA_SPEECH_RECOGNITION_ENGINE_IMPL_H_

#include "base/memory/weak_ptr.h"
#include "base/sequence_checker.h"
#include "content/browser/speech/speech_recognition_engine.h"
#include "content/common/content_export.h"
#include "content/public/browser/speech_recognition_session_config.h"
#include "media/mojo/mojom/audio_data.mojom.h"
#include "media/mojo/mojom/speech_recognition.mojom.h"
#include "mojo/public/cpp/bindings/receiver.h"
#include "mojo/public/cpp/bindings/remote.h"

namespace content {

class SpeechRecognitionManagerDelegate;

// This is the on-device implementation for `SpeechRecognitionEngine`.
//
// This class establishes a connection to the on-device speech recognition
// service using the content::SpeechRecognitionManagerDelegate. It will bind to
// the speech::CrosSpeechRecognitionService in ChromeOS-Ash. On other platforms,
// it will bind to the speech::ChromeSpeechRecognitionService if the on-device
// speech recognition service is available. This class will be in the speech
// recognition available state when successfully bound.

class CONTENT_EXPORT SodaSpeechRecognitionEngineImpl
    : public SpeechRecognitionEngine,
      public media::mojom::SpeechRecognitionRecognizerClient {
 public:
  using SendAudioToSpeechRecognitionServiceCallback =
      base::RepeatingCallback<void(media::mojom::AudioDataS16Ptr audio_data)>;

  explicit SodaSpeechRecognitionEngineImpl(
      const SpeechRecognitionSessionConfig& config);
  ~SodaSpeechRecognitionEngineImpl() override;
  SodaSpeechRecognitionEngineImpl(const SodaSpeechRecognitionEngineImpl&) =
      delete;
  SodaSpeechRecognitionEngineImpl& operator=(
      const SodaSpeechRecognitionEngineImpl&) = delete;

  // Sets the delegate for tests.
  static void SetSpeechRecognitionManagerDelegateForTesting(
      SpeechRecognitionManagerDelegate*);

  bool Initialize();
  void SetOnReadyCallback(base::OnceCallback<void()> callback);

  // content::SodaSpeechRecognitionEngineImpl:
  void StartRecognition() override;
  void UpdateRecognitionContext(
      const media::SpeechRecognitionRecognitionContext& recognition_context)
      override;
  void EndRecognition() override;
  void TakeAudioChunk(const AudioChunk& data) override;
  void AudioChunksEnded() override;
  int GetDesiredAudioChunkDurationMs() const override;

  // media::mojom::SpeechRecognitionRecognizerClient:
  void OnSpeechRecognitionRecognitionEvent(
      const media::SpeechRecognitionResult& result,
      OnSpeechRecognitionRecognitionEventCallback reply) override;
  void OnSpeechRecognitionError() override;
  void OnLanguageIdentificationEvent(
      media::mojom::LanguageIdentificationEventPtr event) override;
  void OnSpeechRecognitionStopped() override;

 private:
  // Callback executed when the recognizer is bound. Sets the flag indicating
  // whether the speech recognition service supports multichannel audio.
  void OnRecognizerBound(bool is_multichannel_supported);

  // Called when the speech recognition context or the speech recognition
  // recognizer is disconnected. Sends an error message to the UI and halts
  // future transcriptions.
  void OnRecognizerDisconnected();

  void SendAudioToSpeechRecognitionService(
      media::mojom::AudioDataS16Ptr audio_data);

  void MarkDone();

  void Abort(media::mojom::SpeechRecognitionErrorCode error);

  media::mojom::AudioDataS16Ptr ConvertToAudioDataS16(const AudioChunk& data);

  base::OnceCallback<void()> on_ready_callback_;

  // Sends audio to the speech recognition thread on the main thread.
  SendAudioToSpeechRecognitionServiceCallback send_audio_callback_;

  base::RepeatingCallback<void()> mark_done_callback_;

  mojo::Remote<media::mojom::SpeechRecognitionContext>
      speech_recognition_context_;
  mojo::Remote<media::mojom::SpeechRecognitionRecognizer>
      speech_recognition_recognizer_;
  mojo::Receiver<media::mojom::SpeechRecognitionRecognizerClient>
      speech_recognition_recognizer_client_{this};

  SpeechRecognitionSessionConfig config_;

  SEQUENCE_CHECKER(main_sequence_checker_);

  // A flag indicating the recognition state.
  bool is_start_recognition_ = false;

  base::WeakPtrFactory<SodaSpeechRecognitionEngineImpl> weak_factory_{this};
};

}  // namespace content

#endif  // CONTENT_BROWSER_SPEECH_SODA_SPEECH_RECOGNITION_ENGINE_IMPL_H_