1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170
|
/*
* Copyright (C) 2011 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package android.speech.tts;
import android.annotation.IntDef;
import android.annotation.IntRange;
import android.media.AudioFormat;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
/**
* A callback to return speech data synthesized by a text to speech engine.
*
* The engine can provide streaming audio by calling
* {@link #start}, then {@link #audioAvailable} until all audio has been provided, then finally
* {@link #done}.
*
* {@link #error} can be called at any stage in the synthesis process to
* indicate that an error has occurred, but if the call is made after a call
* to {@link #done}, it might be discarded.
*
* {@link #done} must be called at the end of synthesis, regardless of errors.
*
* All methods can be only called on the synthesis thread.
*/
public interface SynthesisCallback {
/** @hide */
@Retention(RetentionPolicy.SOURCE)
@IntDef({
AudioFormat.ENCODING_PCM_8BIT,
AudioFormat.ENCODING_PCM_16BIT,
AudioFormat.ENCODING_PCM_FLOAT
})
@interface SupportedAudioFormat {};
/**
* @return the maximum number of bytes that the TTS engine can pass in a single call of {@link
* #audioAvailable}. Calls to {@link #audioAvailable} with data lengths larger than this
* value will not succeed.
*/
int getMaxBufferSize();
/**
* The service should call this when it starts to synthesize audio for this request.
*
* <p>This method should only be called on the synthesis thread, while in {@link
* TextToSpeechService#onSynthesizeText}.
*
* @param sampleRateInHz Sample rate in HZ of the generated audio.
* @param audioFormat Audio format of the generated audio. Must be one of {@link
* AudioFormat#ENCODING_PCM_8BIT} or {@link AudioFormat#ENCODING_PCM_16BIT}. Can also be
* {@link AudioFormat#ENCODING_PCM_FLOAT} when targetting Android N and above.
* @param channelCount The number of channels. Must be {@code 1} or {@code 2}.
* @return {@link android.speech.tts.TextToSpeech#SUCCESS}, {@link
* android.speech.tts.TextToSpeech#ERROR} or {@link android.speech.tts.TextToSpeech#STOPPED}.
*/
int start(
int sampleRateInHz,
@SupportedAudioFormat int audioFormat,
@IntRange(from = 1, to = 2) int channelCount);
/**
* The service should call this method when synthesized audio is ready for consumption.
*
* <p>This method should only be called on the synthesis thread, while in {@link
* TextToSpeechService#onSynthesizeText}.
*
* @param buffer The generated audio data. This method will not hold on to {@code buffer}, so the
* caller is free to modify it after this method returns.
* @param offset The offset into {@code buffer} where the audio data starts.
* @param length The number of bytes of audio data in {@code buffer}. This must be less than or
* equal to the return value of {@link #getMaxBufferSize}.
* @return {@link android.speech.tts.TextToSpeech#SUCCESS}, {@link
* android.speech.tts.TextToSpeech#ERROR} or {@link android.speech.tts.TextToSpeech#STOPPED}.
*/
int audioAvailable(byte[] buffer, int offset, int length);
/**
* The service should call this method when all the synthesized audio for a request has been
* passed to {@link #audioAvailable}.
*
* <p>This method should only be called on the synthesis thread, while in {@link
* TextToSpeechService#onSynthesizeText}.
*
* <p>This method has to be called if {@link #start} and/or {@link #error} was called.
*
* @return {@link android.speech.tts.TextToSpeech#SUCCESS}, {@link
* android.speech.tts.TextToSpeech#ERROR} or {@link android.speech.tts.TextToSpeech#STOPPED}.
*/
int done();
/**
* The service should call this method if the speech synthesis fails.
*
* <p>This method should only be called on the synthesis thread, while in {@link
* TextToSpeechService#onSynthesizeText}.
*/
void error();
/**
* The service should call this method if the speech synthesis fails.
*
* <p>This method should only be called on the synthesis thread, while in {@link
* TextToSpeechService#onSynthesizeText}.
*
* @param errorCode Error code to pass to the client. One of the ERROR_ values from {@link
* android.speech.tts.TextToSpeech}
*/
void error(@TextToSpeech.Error int errorCode);
/**
* Check if {@link #start} was called or not.
*
* <p>This method should only be called on the synthesis thread, while in {@link
* TextToSpeechService#onSynthesizeText}.
*
* <p>Useful for checking if a fallback from network request is possible.
*/
boolean hasStarted();
/**
* Check if {@link #done} was called or not.
*
* <p>This method should only be called on the synthesis thread, while in {@link
* TextToSpeechService#onSynthesizeText}.
*
* <p>Useful for checking if a fallback from network request is possible.
*/
boolean hasFinished();
/**
* The service may call this method to provide timing information about the spoken text.
*
* <p>Calling this method means that at the given audio frame, the given range of the input is
* about to be spoken. If this method is called the client will receive a callback on the
* listener ({@link UtteranceProgressListener#onRangeStart}) at the moment that frame has been
* reached by the playback head.
*
* <p>This information can be used by the client, for example, to highlight ranges of the text
* while it is spoken.
*
* <p>The markerInFrames is a frame index into the audio for this synthesis request, i.e. into
* the concatenation of the audio bytes sent to audioAvailable for this synthesis request. The
* definition of a frame depends on the format given by {@link #start}. See {@link AudioFormat}
* for more information.
*
* <p>This method should only be called on the synthesis thread, while in {@link
* TextToSpeechService#onSynthesizeText}.
*
* @param markerInFrames The position in frames in the audio where this range is spoken.
* @param start The start index of the range in the input text.
* @param end The end index (exclusive) of the range in the input text.
*/
default void rangeStart(int markerInFrames, int start, int end) {}
}
|