1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589
|
/***************************************************************************
* Copyright (C) 2005 to 2014 by Jonathan Duddington *
* email: jonsd@users.sourceforge.net *
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program; if not, write see: *
* <http://www.gnu.org/licenses/>. *
***************************************************************************/
#define espeakINITIALIZE_PHONEME_IPA 0x0002 // move this to speak_lib.h, after eSpeak version 1.46.02
#define N_PHONEME_LIST 1000 // enough for source[N_TR_SOURCE] full of text, else it will truncate
#define MAX_HARMONIC 400 // 400 * 50Hz = 20 kHz, more than enough
#define N_SEQ_FRAMES 25 // max frames in a spectrum sequence (real max is ablut 8)
#define STEPSIZE 64 // 2.9mS at 22 kHz sample rate
// flags set for frames within a spectrum sequence
#define FRFLAG_KLATT 0x01 // this frame includes extra data for Klatt synthesizer
#define FRFLAG_VOWEL_CENTRE 0x02 // centre point of vowel
#define FRFLAG_LEN_MOD 0x04 // reduce effect of length adjustment
#define FRFLAG_BREAK_LF 0x08 // but keep f3 upwards
#define FRFLAG_BREAK 0x10 // don't merge with next frame
#define FRFLAG_BREAK_2 0x18 // FRFLAG_BREAK_LF or FRFLAG_BREAK
#define FRFLAG_FORMANT_RATE 0x20 // Flag5 allow increased rate of change of formant freq
#define FRFLAG_MODULATE 0x40 // Flag6 modulate amplitude of some cycles to give trill
#define FRFLAG_DEFER_WAV 0x80 // Flag7 defer mixing WAV until the next frame
#define FRFLAG_LEN_MOD2 0x4000 // reduce effect of length adjustment, used for the start of a vowel
#define FRFLAG_COPIED 0x8000 // This frame has been copied into temporary rw memory
#define SFLAG_SEQCONTINUE 0x01 // a liquid or nasal after a vowel, but not followed by a vowel
#define SFLAG_EMBEDDED 0x02 // there are embedded commands before this phoneme
#define SFLAG_SYLLABLE 0x04 // vowel or syllabic consonant
#define SFLAG_LENGTHEN 0x08 // lengthen symbol : included after this phoneme
#define SFLAG_DICTIONARY 0x10 // the pronunciation of this word was listed in the xx_list dictionary
#define SFLAG_SWITCHED_LANG 0x20 // this word uses phonemes from a different language
#define SFLAG_PROMOTE_STRESS 0x40 // this unstressed word can be promoted to stressed
#define SFLAG_PREV_PAUSE 0x1000 // consider previous phoneme as pause
#define SFLAG_NEXT_PAUSE 0x2000 // consider next phoneme as pause
// embedded command numbers
#define EMBED_P 1 // pitch
#define EMBED_S 2 // speed (used in setlengths)
#define EMBED_A 3 // amplitude/volume
#define EMBED_R 4 // pitch range/expression
#define EMBED_H 5 // echo/reverberation
#define EMBED_T 6 // different tone for announcing punctuation (not used)
#define EMBED_I 7 // sound icon
#define EMBED_S2 8 // speed (used in synthesize)
#define EMBED_Y 9 // say-as commands
#define EMBED_M 10 // mark name
#define EMBED_U 11 // audio uri
#define EMBED_B 12 // break
#define EMBED_F 13 // emphasis
#define EMBED_C 14 // capital letter indication
#define N_EMBEDDED_VALUES 15
extern int embedded_value[N_EMBEDDED_VALUES];
extern int embedded_default[N_EMBEDDED_VALUES];
#define N_PEAKS 9
#define N_PEAKS2 9 // plus Notch and Fill (not yet implemented)
#define N_MARKERS 8
#define N_KLATTP 10 // this affects the phoneme data file format
#define N_KLATTP2 14 // used in vowel files, with extra parameters for future extensions
#define KLATT_AV 0
#define KLATT_FNZ 1 // nasal zero freq
#define KLATT_Tilt 2
#define KLATT_Aspr 3
#define KLATT_Skew 4
#define KLATT_Kopen 5
#define KLATT_AVp 6
#define KLATT_Fric 7
#define KLATT_FricBP 8
#define KLATT_Turb 9
typedef struct { // 64 bytes
short frflags;
short ffreq[7];
unsigned char length;
unsigned char rms;
unsigned char fheight[8];
unsigned char fwidth[6]; // width/4 f0-5
unsigned char fright[3]; // width/4 f0-2
unsigned char bw[4]; // Klatt bandwidth BNZ /2, f1,f2,f3
unsigned char klattp[5]; // AV, FNZ, Tilt, Aspr, Skew
unsigned char klattp2[5]; // continuation of klattp[], Avp, Fric, FricBP, Turb
unsigned char klatt_ap[7]; // Klatt parallel amplitude
unsigned char klatt_bp[7]; // Klatt parallel bandwidth /2
unsigned char spare; // pad to multiple of 4 bytes
} frame_t; // with extra Klatt parameters for parallel resonators
typedef struct { // 44 bytes
short frflags;
short ffreq[7];
unsigned char length;
unsigned char rms;
unsigned char fheight[8];
unsigned char fwidth[6]; // width/4 f0-5
unsigned char fright[3]; // width/4 f0-2
unsigned char bw[4]; // Klatt bandwidth BNZ /2, f1,f2,f3
unsigned char klattp[5]; // AV, FNZ, Tilt, Aspr, Skew
} frame_t2; // without the extra Klatt parameters
// formant data used by wavegen
typedef struct {
int freq; // Hz<<16
int height; // height<<15
int left; // Hz<<16
int right; // Hz<<16
DOUBLEX freq1; // floating point versions of the above
DOUBLEX height1;
DOUBLEX left1;
DOUBLEX right1;
DOUBLEX freq_inc; // increment by this every 64 samples
DOUBLEX height_inc;
DOUBLEX left_inc;
DOUBLEX right_inc;
} wavegen_peaks_t;
typedef struct {
unsigned char *pitch_env;
int pitch; // pitch Hz*256
int pitch_ix; // index into pitch envelope (*256)
int pitch_inc; // increment to pitch_ix
int pitch_base; // Hz*256 low, before modified by envelope
int pitch_range; // Hz*256 range of envelope
unsigned char *mix_wavefile; // wave file to be added to synthesis
int n_mix_wavefile; // length in bytes
int mix_wave_scale; // 0=2 byte samples
int mix_wave_amp;
int mix_wavefile_ix;
int mix_wavefile_max; // length of available WAV data (in bytes)
int mix_wavefile_offset;
int amplitude;
int amplitude_v;
int amplitude_fmt; // percentage amplitude adjustment for formant synthesis
} WGEN_DATA;
typedef struct {
double a;
double b;
double c;
double x1;
double x2;
} RESONATOR;
typedef struct {
short length_total; // not used
unsigned char n_frames;
unsigned char sqflags;
frame_t2 frame[N_SEQ_FRAMES]; // max. frames in a spectrum sequence
} SPECT_SEQ; // sequence of espeak formant frames
typedef struct {
short length_total; // not used
unsigned char n_frames;
unsigned char sqflags;
frame_t frame[N_SEQ_FRAMES]; // max. frames in a spectrum sequence
} SPECT_SEQK; // sequence of klatt formants frames
typedef struct {
short length;
short frflags;
frame_t *frame;
} frameref_t;
// a clause translated into phoneme codes (first stage)
typedef struct {
unsigned short synthflags; // NOTE Put shorts on 32bit boundaries, because of RISC OS compiler bug?
unsigned char phcode;
unsigned char stresslevel;
unsigned short sourceix; // ix into the original source text string, only set at the start of a word
unsigned char wordstress; // the highest level stress in this word
unsigned char tone_ph; // tone phoneme to use with this vowel
} PHONEME_LIST2;
typedef struct {
// The first section is a copy of PHONEME_LIST2
unsigned short synthflags;
unsigned char phcode;
unsigned char stresslevel;
unsigned short sourceix; // ix into the original source text string, only set at the start of a word
unsigned char wordstress; // the highest level stress in this word
unsigned char tone_ph; // tone phoneme to use with this vowel
PHONEME_TAB *ph;
unsigned int length; // length_mod
unsigned char env; // pitch envelope number
unsigned char type;
unsigned char prepause;
unsigned char postpause;
unsigned char amp;
unsigned char newword; // bit 0=start of word, bit 1=end of clause, bit 2=start of sentence
unsigned char pitch1;
unsigned char pitch2;
#ifdef _ESPEAKEDIT
unsigned char std_length;
unsigned int phontab_addr;
int sound_param;
#endif
} PHONEME_LIST;
#define pd_FMT 0
#define pd_WAV 1
#define pd_VWLSTART 2
#define pd_VWLEND 3
#define pd_ADDWAV 4
#define N_PHONEME_DATA_PARAM 16
#define pd_INSERTPHONEME i_INSERT_PHONEME
#define pd_APPENDPHONEME i_APPEND_PHONEME
#define pd_CHANGEPHONEME i_CHANGE_PHONEME
#define pd_CHANGE_NEXTPHONEME i_REPLACE_NEXT_PHONEME
#define pd_LENGTHMOD i_SET_LENGTH
#define pd_FORNEXTPH 0x2
#define pd_DONTLENGTHEN 0x4
#define pd_REDUCELENGTHCHANGE 0x8
typedef struct {
int pd_control;
int pd_param[N_PHONEME_DATA_PARAM]; // set from group 0 instructions
int sound_addr[5];
int sound_param[5];
int vowel_transition[4];
int pitch_env;
int amp_env;
char ipa_string[18];
} PHONEME_DATA;
typedef struct {
int fmt_control;
int use_vowelin;
int fmt_addr;
int fmt_length;
int fmt_amp;
int fmt2_addr;
int fmt2_lenadj;
int wav_addr;
int wav_amp;
int transition0;
int transition1;
int std_length;
} FMT_PARAMS;
typedef struct {
PHONEME_LIST prev_vowel;
} WORD_PH_DATA;
// instructions
#define i_RETURN 0x0001
#define i_CONTINUE 0x0002
#define i_NOT 0x0003
// Group 0 instrcutions with 8 bit operand. These values go into bits 8-15 of the instruction
#define i_CHANGE_PHONEME 0x01
#define i_REPLACE_NEXT_PHONEME 0x02
#define i_INSERT_PHONEME 0x03
#define i_APPEND_PHONEME 0x04
#define i_APPEND_IFNEXTVOWEL 0x05
#define i_VOICING_SWITCH 0x06
#define i_PAUSE_BEFORE 0x07
#define i_PAUSE_AFTER 0x08
#define i_LENGTH_MOD 0x09
#define i_SET_LENGTH 0x0a
#define i_LONG_LENGTH 0x0b
#define i_CHANGE_PHONEME2 0x0c // not yet used
#define i_IPA_NAME 0x0d
#define i_CHANGE_IF 0x10 // 0x10 to 0x14
#define i_ADD_LENGTH 0x0c
// conditions and jumps
#define i_CONDITION 0x2000
#define i_OR 0x1000 // added to i_CONDITION
#define i_JUMP 0x6000
#define i_JUMP_FALSE 0x6800
#define i_SWITCH_NEXTVOWEL 0x6a00
#define i_SWITCH_PREVVOWEL 0x6c00
#define MAX_JUMP 255 // max jump distance
// multi-word instructions
#define i_CALLPH 0x9100
#define i_PITCHENV 0x9200
#define i_AMPENV 0x9300
#define i_VOWELIN 0xa100
#define i_VOWELOUT 0xa200
#define i_FMT 0xb000
#define i_WAV 0xc000
#define i_VWLSTART 0xd000
#define i_VWLENDING 0xe000
#define i_WAVADD 0xf000
// conditions
#define i_isDiminished 0x80
#define i_isUnstressed 0x81
#define i_isNotStressed 0x82
#define i_isStressed 0x83
#define i_isMaxStress 0x84
#define i_isBreak 0x85
#define i_isWordStart 0x86
#define i_notWordStart 0x87
#define i_isWordEnd 0x88
#define i_isAfterStress 0x89
#define i_isNotVowel 0x8a
#define i_isFinalVowel 0x8b
#define i_isVoiced 0x8c
#define i_isFirstVowel 0x8d
#define i_isSecondVowel 0x8e
#define i_isSeqFlag1 0x8f
#define i_IsTranslationGiven 0x90
// place of articulation
#define i_isVel 0x28
// phflags
#define i_isSibilant 0x45 // bit 5 in phflags
#define i_isPalatal 0x49 // bit 9 in phflags
#define i_isLong 0x55 // bit 21 in phflags
#define i_isRhotic 0x57 // bit 23 in phflags
#define i_isFlag1 0x5c
#define i_isFlag2 0x5d
#define i_isFlag3 0x5e
#define i_StressLevel 0x800
typedef struct {
int name;
int length;
char *data;
char *filename;
} SOUND_ICON;
typedef struct {
int name;
unsigned int next_phoneme;
int mbr_name;
int mbr_name2;
int percent; // percentage length of first component
int control;
} MBROLA_TAB;
typedef struct {
int pause_factor;
int clause_pause_factor;
unsigned int min_pause;
int wav_factor;
int lenmod_factor;
int lenmod2_factor;
int min_sample_len;
int loud_consonants;
int fast_settings[8];
} SPEED_FACTORS;
typedef struct {
char name[12];
unsigned char flags[4];
signed char head_extend[8];
unsigned char prehead_start;
unsigned char prehead_end;
unsigned char stressed_env;
unsigned char stressed_drop;
unsigned char secondary_drop;
unsigned char unstressed_shape;
unsigned char onset;
unsigned char head_start;
unsigned char head_end;
unsigned char head_last;
unsigned char head_max_steps;
unsigned char n_head_extend;
signed char unstr_start[3]; // for: onset, head, last
signed char unstr_end[3];
unsigned char nucleus0_env; // pitch envelope, tonic syllable is at end, no tail
unsigned char nucleus0_max;
unsigned char nucleus0_min;
unsigned char nucleus1_env; // when followed by a tail
unsigned char nucleus1_max;
unsigned char nucleus1_min;
unsigned char tail_start;
unsigned char tail_end;
unsigned char split_nucleus_env;
unsigned char split_nucleus_max;
unsigned char split_nucleus_min;
unsigned char split_tail_start;
unsigned char split_tail_end;
unsigned char split_tune;
unsigned char spare[8];
int spare2; // the struct length should be a multiple of 4 bytes
} TUNE;
extern int n_tunes;
extern TUNE *tunes;
// phoneme table
extern PHONEME_TAB *phoneme_tab[N_PHONEME_TAB];
// list of phonemes in a clause
extern int n_phoneme_list;
extern PHONEME_LIST phoneme_list[N_PHONEME_LIST+1];
extern unsigned int embedded_list[];
extern unsigned char env_fall[128];
extern unsigned char env_rise[128];
extern unsigned char env_frise[128];
#define MAX_PITCH_VALUE 101
extern unsigned char pitch_adjust_tab[MAX_PITCH_VALUE+1];
// queue of commands for wavegen
#define WCMD_KLATT 1
#define WCMD_KLATT2 2
#define WCMD_SPECT 3
#define WCMD_SPECT2 4
#define WCMD_PAUSE 5
#define WCMD_WAVE 6
#define WCMD_WAVE2 7
#define WCMD_AMPLITUDE 8
#define WCMD_PITCH 9
#define WCMD_MARKER 10
#define WCMD_VOICE 11
#define WCMD_EMBEDDED 12
#define WCMD_MBROLA_DATA 13
#define WCMD_FMT_AMPLITUDE 14
#define WCMD_SONIC_SPEED 15
#define N_WCMDQ 170
#define MIN_WCMDQ 25 // need this many free entries before adding new phoneme
extern long64 wcmdq[N_WCMDQ][4];
extern int wcmdq_head;
extern int wcmdq_tail;
// from Wavegen file
int WcmdqFree();
void WcmdqStop();
int WcmdqUsed();
void WcmdqInc();
int WavegenOpenSound();
int WavegenCloseSound();
int WavegenInitSound();
void WavegenInit(int rate, int wavemult_fact);
float polint(float xa[],float ya[],int n,float x);
int WavegenFill(int fill_zeros);
void MarkerEvent(int type, unsigned int char_position, int value, int value2, unsigned char *out_ptr);
extern unsigned char *wavefile_data;
extern int samplerate;
extern int samplerate_native;
extern int wavefile_ix;
extern int wavefile_amp;
extern int wavefile_ix2;
extern int wavefile_amp2;
extern int vowel_transition[4];
extern int vowel_transition0, vowel_transition1;
#define N_ECHO_BUF 5500 // max of 250mS at 22050 Hz
extern int echo_head;
extern int echo_tail;
extern int echo_amp;
extern short echo_buf[N_ECHO_BUF];
extern int mbrola_delay;
extern char mbrola_name[20];
// from synthdata file
unsigned int LookupSound(PHONEME_TAB *ph1, PHONEME_TAB *ph2, int which, int *match_level, int control);
frameref_t *LookupSpect(PHONEME_TAB *this_ph, int which, FMT_PARAMS *fmt_params, int *n_frames, PHONEME_LIST *plist);
unsigned char *LookupEnvelope(int ix);
int LoadPhData(int *srate);
void SynthesizeInit(void);
int Generate(PHONEME_LIST *phoneme_list, int *n_ph, int resume);
void MakeWave2(PHONEME_LIST *p, int n_ph);
int SynthOnTimer(void);
int SpeakNextClause(FILE *f_text, const void *text_in, int control);
int SynthStatus(void);
void SetSpeed(int control);
void SetEmbedded(int control, int value);
void SelectPhonemeTable(int number);
int SelectPhonemeTableName(const char *name);
void Write4Bytes(FILE *f, int value);
int Read4Bytes(FILE *f);
int Reverse4Bytes(int word);
int CompileDictionary(const char *dsource, const char *dict_name, FILE *log, char *err_name,int flags);
#define ENV_LEN 128 // length of pitch envelopes
#define PITCHfall 0 // standard pitch envelopes
#define PITCHrise 2
#define N_ENVELOPE_DATA 20
extern unsigned char *envelope_data[N_ENVELOPE_DATA];
extern int formant_rate[]; // max rate of change of each formant
extern SPEED_FACTORS speed;
extern long count_samples;
extern int outbuf_size;
extern unsigned char *out_ptr;
extern unsigned char *out_start;
extern unsigned char *out_end;
extern int event_list_ix;
extern espeak_EVENT *event_list;
extern t_espeak_callback* synth_callback;
extern int option_log_frames;
extern const char *version_string;
extern const int version_phdata;
extern double sonicSpeed;
#define N_SOUNDICON_TAB 80 // total entries in soundicon_tab
#define N_SOUNDICON_SLOTS 4 // number of slots reserved for dynamic loading of audio files
extern int n_soundicon_tab;
extern SOUND_ICON soundicon_tab[N_SOUNDICON_TAB];
espeak_ERROR SetVoiceByName(const char *name);
espeak_ERROR SetVoiceByProperties(espeak_VOICE *voice_selector);
espeak_ERROR LoadMbrolaTable(const char *mbrola_voice, const char *phtrans, int srate);
void SetParameter(int parameter, int value, int relative);
int MbrolaTranslate(PHONEME_LIST *plist, int n_phonemes, int resume, FILE *f_mbrola);
int MbrolaGenerate(PHONEME_LIST *phoneme_list, int *n_ph, int resume);
int MbrolaFill(int length, int resume, int amplitude);
void MbrolaReset(void);
void DoEmbedded(int *embix, int sourceix);
void DoMarker(int type, int char_posn, int length, int value);
void DoPhonemeMarker(int type, int char_posn, int length, char *name);
int DoSample3(PHONEME_DATA *phdata, int length_mod, int amp);
int DoSpect2(PHONEME_TAB *this_ph, int which, FMT_PARAMS *fmt_params, PHONEME_LIST *plist, int modulation);
int PauseLength(int pause, int control);
int LookupPhonemeTable(const char *name);
unsigned char *GetEnvelope(int index);
int NumInstnWords(USHORT *prog);
void InitBreath(void);
void KlattInit();
void KlattReset(int control);
int Wavegen_Klatt2(int length, int modulation, int resume, frame_t *fr1, frame_t *fr2);
|