File: speech_recognizer_fsm.cc

package info (click to toggle)
chromium 139.0.7258.127-1
links: PTS, VCS
area: main
in suites:
size: 6,122,068 kB
sloc: cpp: 35,100,771; ansic: 7,163,530; javascript: 4,103,002; python: 1,436,920; asm: 946,517; xml: 746,709; pascal: 187,653; perl: 88,691; sh: 88,436; objc: 79,953; sql: 51,488; cs: 44,583; fortran: 24,137; makefile: 22,147; tcl: 15,277; php: 13,980; yacc: 8,984; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (189 lines) | stat: -rw-r--r-- 6,654 bytes
parent folder | download | duplicates (6)
// Copyright 2024 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "components/speech/speech_recognizer_fsm.h"

#include "base/notreached.h"
#include "components/speech/audio_buffer.h"
#include "media/mojo/mojom/audio_data.mojom.h"
#include "media/mojo/mojom/speech_recognition_error.mojom.h"
#include "media/mojo/mojom/speech_recognition_result.mojom.h"

namespace speech {

SpeechRecognizerFsm::FSMEventArgs::FSMEventArgs(FSMEvent event_value)
    : event(event_value),
      engine_error(media::mojom::SpeechRecognitionErrorCode::kNone,
                   media::mojom::SpeechAudioErrorDetails::kNone) {}

SpeechRecognizerFsm::FSMEventArgs::FSMEventArgs(const FSMEventArgs& other)
    : event(other.event),
      audio_data(other.audio_data ? other.audio_data->Clone() : nullptr),
      audio_chunk(other.audio_chunk),
      recognition_context(other.recognition_context),
      engine_error(other.engine_error) {
  engine_results = mojo::Clone(other.engine_results);
}

SpeechRecognizerFsm::FSMEventArgs::~FSMEventArgs() = default;

SpeechRecognizerFsm::FSMState
SpeechRecognizerFsm::ExecuteTransitionAndGetNextState(
    const FSMEventArgs& event_args) {
  const FSMEvent event = event_args.event;
  switch (state_) {
    case STATE_IDLE:
      switch (event) {
        case EVENT_ABORT:
          return AbortSilently(event_args);
        case EVENT_PREPARE:
          return PrepareRecognition(event_args);
        case EVENT_START:
          return NotFeasible(event_args);
        case EVENT_STOP_CAPTURE:
          return AbortSilently(event_args);
        case EVENT_UPDATE_RECOGNITION_CONTEXT:
          return UpdateRecognitionContext(event_args);
        case EVENT_AUDIO_DATA:     // Corner cases related to queued messages
        case EVENT_ENGINE_RESULT:  // being lately dispatched.
        case EVENT_ENGINE_ERROR:
        case EVENT_AUDIO_ERROR:
          return DoNothing(event_args);
      }
      break;
    case STATE_PREPARING:
      switch (event) {
        case EVENT_ABORT:
          return AbortSilently(event_args);
        case EVENT_PREPARE:
          return NotFeasible(event_args);
        case EVENT_START:
          return StartRecording(event_args);
        case EVENT_STOP_CAPTURE:
          return AbortSilently(event_args);
        case EVENT_UPDATE_RECOGNITION_CONTEXT:
          return UpdateRecognitionContext(event_args);
        case EVENT_AUDIO_DATA:     // Corner cases related to queued messages
        case EVENT_ENGINE_RESULT:  // being lately dispatched.
        case EVENT_ENGINE_ERROR:
        case EVENT_AUDIO_ERROR:
          return DoNothing(event_args);
      }
      break;
    case STATE_STARTING:
      switch (event) {
        case EVENT_ABORT:
          return AbortWithError(event_args);
        case EVENT_PREPARE:
          return NotFeasible(event_args);
        case EVENT_START:
          return NotFeasible(event_args);
        case EVENT_STOP_CAPTURE:
          return AbortSilently(event_args);
        case EVENT_AUDIO_DATA:
          return StartRecognitionEngine(event_args);
        case EVENT_ENGINE_RESULT:
          if (event_args.audio_data) {
            return ProcessIntermediateResult(event_args);
          }
          return NotFeasible(event_args);
        case EVENT_UPDATE_RECOGNITION_CONTEXT:
          return UpdateRecognitionContext(event_args);
        case EVENT_ENGINE_ERROR:
        case EVENT_AUDIO_ERROR:
          return AbortWithError(event_args);
      }
      break;
    case STATE_ESTIMATING_ENVIRONMENT:
      switch (event) {
        case EVENT_ABORT:
          return AbortWithError(event_args);
        case EVENT_PREPARE:
          return NotFeasible(event_args);
        case EVENT_START:
          return NotFeasible(event_args);
        case EVENT_STOP_CAPTURE:
          return StopCaptureAndWaitForResult(event_args);
        case EVENT_AUDIO_DATA:
          return WaitEnvironmentEstimationCompletion(event_args);
        case EVENT_ENGINE_RESULT:
          return ProcessIntermediateResult(event_args);
        case EVENT_UPDATE_RECOGNITION_CONTEXT:
          return UpdateRecognitionContext(event_args);
        case EVENT_ENGINE_ERROR:
        case EVENT_AUDIO_ERROR:
          return AbortWithError(event_args);
      }
      break;
    case STATE_WAITING_FOR_SPEECH:
      switch (event) {
        case EVENT_ABORT:
          return AbortWithError(event_args);
        case EVENT_PREPARE:
          return NotFeasible(event_args);
        case EVENT_START:
          return NotFeasible(event_args);
        case EVENT_STOP_CAPTURE:
          return StopCaptureAndWaitForResult(event_args);
        case EVENT_AUDIO_DATA:
          return DetectUserSpeechOrTimeout(event_args);
        case EVENT_ENGINE_RESULT:
          return ProcessIntermediateResult(event_args);
        case EVENT_UPDATE_RECOGNITION_CONTEXT:
          return UpdateRecognitionContext(event_args);
        case EVENT_ENGINE_ERROR:
        case EVENT_AUDIO_ERROR:
          return AbortWithError(event_args);
      }
      break;
    case STATE_RECOGNIZING:
      switch (event) {
        case EVENT_ABORT:
          return AbortWithError(event_args);
        case EVENT_PREPARE:
          return NotFeasible(event_args);
        case EVENT_START:
          return NotFeasible(event_args);
        case EVENT_STOP_CAPTURE:
          return StopCaptureAndWaitForResult(event_args);
        case EVENT_AUDIO_DATA:
          return DetectEndOfSpeech(event_args);
        case EVENT_ENGINE_RESULT:
          return ProcessIntermediateResult(event_args);
        case EVENT_UPDATE_RECOGNITION_CONTEXT:
          return UpdateRecognitionContext(event_args);
        case EVENT_ENGINE_ERROR:
        case EVENT_AUDIO_ERROR:
          return AbortWithError(event_args);
      }
      break;
    case STATE_WAITING_FINAL_RESULT:
      switch (event) {
        case EVENT_ABORT:
          return AbortWithError(event_args);
        case EVENT_PREPARE:
          return NotFeasible(event_args);
        case EVENT_START:
          return NotFeasible(event_args);
        case EVENT_STOP_CAPTURE:
        case EVENT_AUDIO_DATA:
          return DoNothing(event_args);
        case EVENT_ENGINE_RESULT:
          return ProcessFinalResult(event_args);
        case EVENT_UPDATE_RECOGNITION_CONTEXT:
          return UpdateRecognitionContext(event_args);
        case EVENT_ENGINE_ERROR:
        case EVENT_AUDIO_ERROR:
          return AbortWithError(event_args);
      }
      break;

    case STATE_ENDED:
      return DoNothing(event_args);
  }

  NOTREACHED();
}

}  // namespace speech