1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350
|
#include "dict.h"
#include "parts.h"
#include "longline.h"
#include "alloc.h"
/*Routines for parsing the CMU pronouncing dictionary file.
Copyright (C) 2000,2001,2002 Brian Langenberger
Released under the terms of the GNU Public License.
See the file "COPYING" for full details.*/
struct WordEntry *stringToWordEntry(char *string) {
struct WordEntry *toreturn;
struct StringPart *parts;
struct StringPart *current;
parts = getStringParts(string, 0, strlen(string));
if (countStringParts(parts) < 2) return NULL;
toreturn = (struct WordEntry *)testalloc(sizeof(struct WordEntry));
toreturn->word = parts;
toreturn->phonemes = parts->next;
toreturn->rhymekey = NULL;
toreturn->syllables = 0;
for (current = parts->next ; current != NULL ;
current = current->next) {
if (isSyllable(current)) {
toreturn->syllables++;
if (isPrimaryStress(current))
toreturn->rhymekey = current;
}
}
/*words without primary keys are dropped entirely
if (toreturn->rhymekey == NULL) {
free(toreturn);
return NULL;
}*/
toreturn->alternate = NULL;
toreturn->string = string;
return toreturn;
}
int isMultiplePronunciation(struct WordEntry *entry) {
if (entry->word->string[entry->word->length - 1] == ')')
return 1;
else
return 0;
}
void printWordEntry(FILE *stream, struct WordEntry *entry) {
int i;
struct StringPart *current;
if (entry == NULL) {
fprintf(stream, "<BLANK>\n");
return;
}
for (i = 0 ; i < entry->word->length ; i++)
fputc(entry->word->string[i], stream);
fputc(' ', stream);
for (current = entry->rhymekey; current->next != NULL;
current = current->next) {
for (i = 0 ; i < current->length ; i++)
fputc(current->string[i], stream);
fputc('-', stream);
}
for (i = 0 ; i < current->length ; i++)
fputc(current->string[i], stream);
fprintf(stream, " %d\n", entry->syllables);
}
void freeWordEntry(struct WordEntry *entry) {
free(entry->string);
free(entry);
}
void pushWordEntry(struct WordEntryStack **stack, struct WordEntry *entry) {
struct WordEntryStack *head;
head = (struct WordEntryStack *)testalloc(sizeof(struct WordEntryStack));
head->entry = entry;
head->next = *stack;
*stack = head;
}
struct WordEntry *popWordEntry(struct WordEntryStack **stack) {
struct WordEntryStack *head;
struct WordEntry *toreturn;
head = *stack;
if (head == NULL) return NULL;
*stack = head->next;
toreturn = head->entry;
free(head);
return toreturn;
}
struct WordEntryStack *reverseWordEntryStack(struct WordEntryStack *stack) {
struct WordEntryStack *toreturn = NULL;
while (stack != NULL) {
pushWordEntry(&toreturn, popWordEntry(&stack));
}
return toreturn;
}
void freeWordEntryStack(struct WordEntryStack *stack) {
if (stack == NULL) return;
else {
freeWordEntryStack(stack->next);
freeWordEntry(stack->entry);
free(stack);
}
}
struct WordEntryTree *addToEntryTree(struct WordEntryTree *tree,
struct WordEntry *entry) {
struct WordEntryTree *newtree;
int cmp;
if (tree == NULL) { /*if there is no tree, make one*/
newtree = (struct WordEntryTree *)testalloc(sizeof(struct WordEntryTree));
newtree->rhymes = NULL;
pushWordEntry(&newtree->rhymes, entry);
newtree->rhymekey = entry->rhymekey;
newtree->greater = NULL;
newtree->lesser = NULL;
return newtree;
} else { /*if there is a tree*/
cmp = cmpStringParts(tree->rhymekey, entry->rhymekey);
if (cmp == 0) { /*if the keys match*/
pushWordEntry(&tree->rhymes, entry); /*push the word on the stack*/
} else if (cmp > 0) {
/*add to the lesser tree if the word is below the current syllables*/
tree->lesser = addToEntryTree(tree->lesser, entry);
} else {
/*otherwise, add to the greater tree*/
tree->greater = addToEntryTree(tree->greater, entry);
}
/*and, finally, return the existing tree*/
return tree;
}
}
void freeOnlyEntryTree(struct WordEntryTree *tree) {
if (tree == NULL) return;
else {
freeOnlyEntryTree(tree->lesser);
freeOnlyEntryTree(tree->greater);
free(tree);
}
}
int isBaseWord(struct WordEntry *entry, struct WordEntry *multiple) {
int i;
if (isMultiplePronunciation(entry)) return 0; /*multiples can't be the base*/
for (i = 0 ; multiple->word->string[i] != '(' ; i++) {
if (entry->word->string[i] != multiple->word->string[i])
return 0;
}
/*if there's still more of the entry, it's not the base either*/
if (i == entry->word->length)
return 1;
else
return 0;
}
int addMultiplePronunciation(struct WordEntryStack *stack,
struct WordEntry *entry,
int skipduplicates) {
struct WordEntryStack *base;
struct WordEntry *alternate;
for (base = stack ; (base != NULL) && (!isBaseWord(base->entry, entry)) ;
base = base->next) /*do nothing*/;
/*This returns 0 because even though the word has no base, it should
still be added to the rhymes. It is only conflicts with
existing rhymekeys for multiple pronunciations that we must worry
about*/
if (base == NULL) return 1;
/*Now go to the end of the list, checking for an identical
rhymekey along the way (to eliminate nasty duplicates)*/
alternate = base->entry;
while (alternate->alternate != NULL) {
/*skip identical alternate rhymes*/
if (skipduplicates &&
(cmpStringParts(entry->rhymekey, alternate->rhymekey) == 0)) return 0;
else alternate = alternate->alternate;
}
if (!skipduplicates ||
cmpStringParts(entry->rhymekey, alternate->rhymekey) != 0) {
alternate->alternate = entry;
return 1;
} else {
return 0;
}
}
void printEntryTree(FILE *stream, struct WordEntryTree *tree) {
int i;
struct StringPart *current;
if (tree == NULL) return;
else {
printEntryTree(stream, tree->lesser);
for (current = tree->rhymekey; current->next != NULL;
current = current->next) {
for (i = 0 ; i < current->length ; i++)
fputc(current->string[i], stream);
fputc('-', stream);
}
for (i = 0 ; i < current->length ; i++)
fputc(current->string[i], stream);
printEntryRow(stream, tree->rhymes);
fputc('\n', stream);
printEntryTree(stream, tree->greater);
}
}
void printEntryRow(FILE *stream, struct WordEntryStack *stack) {
int i;
if (stack == NULL) return;
else {
printEntryRow(stream, stack->next);
fputc(' ', stream);
for (i = 0 ; i < stack->entry->word->length ; i++) {
fputc(stack->entry->word->string[i], stream);
}
}
}
void printWordEntries(FILE *stream, struct WordEntryStack *stack) {
struct WordEntryStack *head;
for (head = stack ; head != NULL ; head = head->next) {
printWordEntry(stream, head->entry);
}
}
void printMultiples(FILE *stream, struct WordEntryStack *stack) {
struct WordEntryStack *head;
struct WordEntry *multiple;
int i;
for (head = stack ; head != NULL ; head = head->next) {
if ((head->entry->alternate != NULL) &&
!isMultiplePronunciation(head->entry)) {
for (i = 0 ; i < head->entry->word->length ; i++)
fputc(head->entry->word->string[i], stream);
fputc(' ', stream);
for (i = 0 ; i < head->entry->word->length ; i++)
fputc(head->entry->word->string[i], stream);
for (multiple = head->entry->alternate;
multiple != NULL ;
multiple = multiple->alternate) {
fputc(' ', stream);
for (i = 0 ; i < multiple->word->length ; i++)
fputc(multiple->word->string[i], stream);
}
fputc('\n', stream);
}
}
}
/*
int main(int argc, char *argv[]) {
char *string;
struct WordEntry *entry;
struct WordEntryStack *stack = NULL;
struct WordEntryTree *tree = NULL;
for (string = readLongLine(stdin); string != NULL ;
string = readLongLine(stdin)) {
entry = stringToWordEntry(string);
if (entry->rhymekey != NULL) {
if (isMultiplePronunciation(entry)) {
if (addMultiplePronunciation(stack, entry, 1)) {
pushWordEntry(&stack, entry);
tree = addToEntryTree(tree, entry);
}
} else {
pushWordEntry(&stack, entry);
tree = addToEntryTree(tree, entry);
}
}
}
stack = reverseWordEntryStack(stack);
printWordEntries(stdout, stack);
printMultiples(stdout, stack);
printEntryTree(stdout, tree);*/
/*
for (entry = popWordEntry(&stack); entry != NULL ;
entry = popWordEntry(&stack)) {
if (isMultiplePronunciation(entry))
printWordEntry(stdout, entry);
freeWordEntry(entry);
}
return 0;
}
*/
|