File: speech_synthesis_impl.cc

package info (click to toggle)
chromium 138.0.7204.183-1~deb12u1
links: PTS, VCS
area: main
in suites: bookworm-proposed-updates
size: 6,080,960 kB
sloc: cpp: 34,937,079; ansic: 7,176,967; javascript: 4,110,704; python: 1,419,954; asm: 946,768; xml: 739,971; pascal: 187,324; sh: 89,623; perl: 88,663; objc: 79,944; sql: 50,304; cs: 41,786; fortran: 24,137; makefile: 21,811; php: 13,980; tcl: 13,166; yacc: 8,925; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (195 lines) | stat: -rw-r--r-- 6,925 bytes
parent folder | download | duplicates (4)
// Copyright 2019 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "content/browser/speech/speech_synthesis_impl.h"

#include "content/browser/media/audio_stream_monitor.h"
#include "content/browser/renderer_host/render_frame_host_impl.h"
#include "content/browser/speech/tts_utterance_impl.h"
#include "content/browser/web_contents/web_contents_impl.h"
#include "content/public/browser/web_contents.h"

namespace content {
namespace {

using AudibleCB = base::RepeatingCallback<
    std::unique_ptr<AudioStreamMonitor::AudibleClientRegistration>()>;

// The lifetime of instances of this class is manually bound to the lifetime of
// the associated TtsUtterance. See OnTtsEvent.
class EventThunk : public UtteranceEventDelegate {
 public:
  EventThunk(mojo::PendingRemote<blink::mojom::SpeechSynthesisClient> client,
             AudibleCB audible_cb)
      : client_(std::move(client)), audible_cb_(std::move(audible_cb)) {}
  ~EventThunk() override = default;

  // UtteranceEventDelegate methods:
  void OnTtsEvent(TtsUtterance* utterance,
                  TtsEventType event_type,
                  int char_index,
                  int char_length,
                  const std::string& error_message) override {
    // These values are unsigned in the web speech API, so -1 cannot be used as
    // a sentinel value. Use 0 instead to match web standards.
    char_index = std::max(char_index, 0);
    char_length = std::max(char_length, 0);

    switch (event_type) {
      case TTS_EVENT_START:
        audible_client_ = audible_cb_.Run();
        client_->OnStartedSpeaking();
        break;
      case TTS_EVENT_END:
        audible_client_.reset();
        client_->OnFinishedSpeaking(
            blink::mojom::SpeechSynthesisErrorCode::kNoError);
        break;
      case TTS_EVENT_INTERRUPTED:
        audible_client_.reset();
        client_->OnFinishedSpeaking(
            blink::mojom::SpeechSynthesisErrorCode::kInterrupted);
        break;
      case TTS_EVENT_CANCELLED:
        audible_client_.reset();
        client_->OnFinishedSpeaking(
            blink::mojom::SpeechSynthesisErrorCode::kCancelled);
        break;
      case TTS_EVENT_WORD:
        client_->OnEncounteredWordBoundary(char_index, char_length);
        break;
      case TTS_EVENT_SENTENCE:
        client_->OnEncounteredSentenceBoundary(char_index, 0);
        break;
      case TTS_EVENT_MARKER:
        // The web platform API does not support this event.
        break;
      case TTS_EVENT_ERROR:
        audible_client_.reset();
        // The web platform API does not support error text.
        client_->OnEncounteredSpeakingError();
        break;
      case TTS_EVENT_PAUSE:
        audible_client_.reset();
        client_->OnPausedSpeaking();
        break;
      case TTS_EVENT_RESUME:
        audible_client_ = audible_cb_.Run();
        client_->OnResumedSpeaking();
        break;
    }

    if (utterance->IsFinished())
      delete this;
  }

 private:
  mojo::Remote<blink::mojom::SpeechSynthesisClient> client_;
  AudibleCB audible_cb_;
  std::unique_ptr<AudioStreamMonitor::AudibleClientRegistration>
      audible_client_;
};

void SendVoiceListToObserver(
    blink::mojom::SpeechSynthesisVoiceListObserver* observer,
    const std::vector<VoiceData>& voices) {
  std::vector<blink::mojom::SpeechSynthesisVoicePtr> out_voices;
  out_voices.resize(voices.size());
  for (size_t i = 0; i < voices.size(); ++i) {
    blink::mojom::SpeechSynthesisVoicePtr& out_voice = out_voices[i];
    out_voice = blink::mojom::SpeechSynthesisVoice::New();
    out_voice->voice_uri = voices[i].name;
    out_voice->name = voices[i].name;
    out_voice->lang = voices[i].lang;
    out_voice->is_local_service = !voices[i].remote;
    out_voice->is_default = (i == 0);
  }
  observer->OnSetVoiceList(std::move(out_voices));
}

}  // namespace

SpeechSynthesisImpl::SpeechSynthesisImpl(BrowserContext* browser_context,
                                         RenderFrameHostImpl* rfh)
    : browser_context_(browser_context),
      web_contents_(WebContents::FromRenderFrameHost((rfh))),
      frame_id_(rfh->GetGlobalId()) {
  DCHECK(browser_context_);
  DCHECK(web_contents_);
  TtsController::GetInstance()->AddVoicesChangedDelegate(this);
}

SpeechSynthesisImpl::~SpeechSynthesisImpl() {
  TtsController::GetInstance()->RemoveVoicesChangedDelegate(this);

  // NOTE: Some EventThunk instances may outlive this class, and that's okay.
  // They have their lifetime bound to their associated TtsUtterance instance,
  // and the TtsController manages the lifetime of those.
}

void SpeechSynthesisImpl::AddReceiver(
    mojo::PendingReceiver<blink::mojom::SpeechSynthesis> receiver) {
  receiver_set_.Add(this, std::move(receiver));
}

void SpeechSynthesisImpl::AddVoiceListObserver(
    mojo::PendingRemote<blink::mojom::SpeechSynthesisVoiceListObserver>
        pending_observer) {
  mojo::Remote<blink::mojom::SpeechSynthesisVoiceListObserver> observer(
      std::move(pending_observer));

  std::vector<VoiceData> voices;
  TtsController::GetInstance()->GetVoices(browser_context_, GURL(), &voices);
  SendVoiceListToObserver(observer.get(), voices);

  observer_set_.Add(std::move(observer));
}

void SpeechSynthesisImpl::Speak(
    blink::mojom::SpeechSynthesisUtterancePtr utterance,
    mojo::PendingRemote<blink::mojom::SpeechSynthesisClient> client) {
  if (web_contents_->IsAudioMuted())
    return;

  std::unique_ptr<TtsUtterance> tts_utterance =
      std::make_unique<TtsUtteranceImpl>(browser_context_, web_contents_);
  tts_utterance->SetText(utterance->text);
  tts_utterance->SetLang(utterance->lang);
  tts_utterance->SetVoiceName(utterance->voice);
  tts_utterance->SetShouldClearQueue(false);
  tts_utterance->SetContinuousParameters(utterance->rate, utterance->pitch,
                                         utterance->volume);

  // See comments on EventThunk about how lifetime of this instance is managed.
  tts_utterance->SetEventDelegate(new EventThunk(
      std::move(client),
      base::BindRepeating(
          &AudioStreamMonitor::RegisterAudibleClient,
          base::Unretained(static_cast<WebContentsImpl*>(web_contents_)
                               ->audio_stream_monitor()),
          frame_id_)));

  TtsController::GetInstance()->SpeakOrEnqueue(std::move(tts_utterance));
}

void SpeechSynthesisImpl::Pause() {
  TtsController::GetInstance()->Pause();
}

void SpeechSynthesisImpl::Resume() {
  TtsController::GetInstance()->Resume();
}

void SpeechSynthesisImpl::Cancel() {
  TtsController::GetInstance()->Stop();
}

void SpeechSynthesisImpl::OnVoicesChanged() {
  std::vector<VoiceData> voices;
  TtsController::GetInstance()->GetVoices(browser_context_, GURL(), &voices);
  for (auto& observer : observer_set_)
    SendVoiceListToObserver(observer.get(), voices);
}

}  // namespace content