File: slib_str.cc

package info (click to toggle)
speech-tools 1:2.5.0-5
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 9,848 kB
  • sloc: cpp: 67,350; ansic: 12,175; sh: 4,047; java: 3,748; makefile: 1,109; lisp: 711; perl: 501; awk: 85; xml: 9
file content (422 lines) | stat: -rw-r--r-- 10,530 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
/*  
 *                   COPYRIGHT (c) 1988-1994 BY                             *
 *        PARADIGM ASSOCIATES INCORPORATED, CAMBRIDGE, MASSACHUSETTS.       *
 *        See the source file SLIB.C for more information.                  *

 * Reorganization of files (Mar 1999) by Alan W Black <awb@cstr.ed.ac.uk>

 * String functions

*/
#include <cstdio>
#include <cstring>
#include <setjmp.h>
#include <cstdlib>
#include <cctype>

#include "EST_Pathname.h"
#include "EST_cutils.h"
#include "siod.h"
#include "siodp.h"

LISP strintern(const char *data)
{
    return strcons(strlen(data),data);
}

LISP strcons(long length,const char *data)
{long flag;
 LISP s;
 flag = no_interrupt(1);
 s = cons(NIL,NIL);
 s->type = tc_string;
 s->storage_as.string.data = must_malloc(length+1);
 s->storage_as.string.dim = length;
 if (data)
   memmove(s->storage_as.string.data,data,length+1);
 no_interrupt(flag);
 return(s);}

LISP cstrcons(const char *data)
{long flag;
 LISP s;
 flag = no_interrupt(1);
 s = cons(NIL,NIL);
 s->type = tc_string;
 s->storage_as.string.data = (char *)(void *)data;
 s->storage_as.string.dim = strlen(data);
 no_interrupt(flag);
 return(s);}

static int rfs_getc(unsigned char **p)
{int i;
 i = **p;
 if (!i) return(EOF);
 *p = *p + 1;
 return(i);}

static void rfs_ungetc(unsigned char c,unsigned char **p)
{(void)c;
 *p = *p - 1;}

LISP read_from_lstring(LISP x)
{return read_from_string(get_c_string(x));}

LISP read_from_string(const char *string)
{char *p,*q;
 LISP r; 
 struct gen_readio s;
 q = wstrdup(string);
 p = q;
 s.getc_fcn = (int (*)(char *))rfs_getc;
 s.ungetc_fcn = (void (*)(int, char *))rfs_ungetc;
 s.cb_argument = (char *) &p;
 r = readtl(&s);
 wfree(q);
 return r;
}

LISP string_append(LISP args)
{long size;
 LISP l,s;
 char *data;
 size = 0;
 for(l=args;NNULLP(l);l=cdr(l))
   size += strlen(get_c_string(car(l)));
 s = strcons(size,NULL);
 data = s->storage_as.string.data;
 data[0] = 0;
 for(l=args;NNULLP(l);l=cdr(l))
   strcat(data,get_c_string(car(l)));
 return(s);}

LISP string_length(LISP string)
{if NTYPEP(string,tc_string) err("not a string",string);
 return(flocons((double)string->storage_as.string.dim));}

LISP parse_number(LISP x)
{const char *c;
 c = get_c_string(x);
 return(flocons(atof(c)));}

LISP string_downcase(LISP symbol)
{
    const char *symname = get_c_string(symbol);
    char *downsym = wstrdup(symname);
    LISP newsym;
    int i;

    for (i=0; symname[i] != '\0'; i++)
	if (isupper(symname[i]))
	    downsym[i] = tolower(symname[i]);
	else
	    downsym[i] = symname[i];
    downsym[i] = '\0';
    newsym = strintern(downsym);
    wfree(downsym);
    
    return newsym;
}

LISP string_upcase(LISP symbol)
{
    const char *symname = get_c_string(symbol);
    char *upsym = wstrdup(symname);
    LISP newsym;
    int i;

    for (i=0; symname[i] != '\0'; i++)
	if (islower(symname[i]))
	    upsym[i] = toupper(symname[i]);
	else
	    upsym[i] = symname[i];
    upsym[i] = '\0';
    newsym = strintern(upsym);
    wfree(upsym);
    
    return newsym;
}

LISP path_is_dirname(LISP lpath)
{
  EST_Pathname path(get_c_string(lpath));

  return path.is_dirname()?lpath:NIL;
}

LISP path_is_filename(LISP lpath)
{
  EST_Pathname path(get_c_string(lpath));

  return path.is_filename()?lpath:NIL;
}

LISP path_as_directory(LISP lpath)
{
  EST_Pathname path(get_c_string(lpath));
  EST_Pathname res(path.as_directory());
  return strintern(res);
}

LISP path_as_file(LISP lpath)
{
  EST_Pathname path(get_c_string(lpath));
  EST_Pathname res(path.as_file());

  return strintern(res);
}

LISP path_append(LISP lpaths)
{
  if (CONSP(lpaths))
    {
      EST_Pathname res(get_c_string(car(lpaths)));
      lpaths = cdr(lpaths);
      while(lpaths != NIL)
	{
	  res = res +get_c_string(car(lpaths));
	  lpaths = cdr(lpaths);
	}
      return strintern(res);
    }
  return NIL;
}

LISP path_basename(LISP lpath)
{
  EST_Pathname path(get_c_string(lpath));
  EST_Pathname res(path.basename(1));

  return strintern(res);
}

LISP symbol_basename(LISP path, LISP suffix)
{
    //  Like UNIX basename
    const char *pathstr = get_c_string(path);
    const char *suff;
    char *bname;
    int i, j, k, start, end;
    LISP newsym;

    if (suffix == NIL)
	suff = "";
    else
	suff = get_c_string(suffix);

    for (i=strlen(pathstr); i >= 0; i--)
	if (pathstr[i] == '/')
	    break;
    start = i+1;
    for (j=strlen(pathstr),k=strlen(suff); k >= 0; k--,j--)
	if (pathstr[j] != suff[k])
	    break;
    if (k != -1)
	end = strlen(pathstr);
    else
	end = j+1;

    bname = walloc(char,end-start+1);
    memcpy(bname,&pathstr[start],end-start);
    bname[end-start] = '\0';
    newsym = strcons(strlen(bname),bname);
    wfree(bname);
    
    return newsym;
}


static LISP lisp_to_string(LISP l)
{
    EST_String s;

    s = siod_sprint(l);
    printf("%s\n",(const char *)s);
    return strintern(s);
}

static LISP symbolconc(LISP args)
{long size;
 LISP l,s;
 size = 0;
 tkbuffer[0] = 0;
 for(l=args;NNULLP(l);l=cdr(l))
   {s = car(l);
    if NSYMBOLP(s) err("wrong type of argument(non-symbol) to symbolconc",s);
    size = size + strlen(PNAME(s));
    if (size >  TKBUFFERN) err("symbolconc buffer overflow",NIL);
    strcat(tkbuffer,PNAME(s));}
 return(rintern(tkbuffer));}

LISP symbolexplode(LISP name)
{
    LISP e=NIL;
    const char *pname = get_c_string(name);
    char tt[2];
    int i;

    tt[1]='\0';

    for (i=0; pname[i] != '\0'; i++)
    {
	tt[0] = pname[i];
	e = cons(rintern(tt),e);
    }
    return reverse(e);
}

LISP l_matches(LISP atom, LISP regex)
{
    // t if printname of atom matches regex, nil otherwise
    const EST_String pname = get_c_string(atom);

    if (pname.matches(make_regex(get_c_string(regex))) == TRUE)
	return truth;
    else
	return NIL;
}

LISP l_strequal(LISP atom1, LISP atom2)
{
    
    if (streq(get_c_string(atom1),get_c_string(atom2)))
	return truth;
    else
	return NIL;
}

LISP l_substring(LISP string, LISP l_start, LISP l_length)
{
    // As string might actually be a buffer containing nulls we
    // do this a little carefully.
    if (NTYPEP(string,tc_string))
	err("not a string",string);
	
    const char *data = string->storage_as.string.data;
    int dim = string->storage_as.string.dim;

    int start = ( get_c_int(l_start) < dim ? get_c_int(l_start) : dim );
    int length = ( (get_c_int(l_length) + start) < dim ? 
		   get_c_int(l_length) 
		   : dim-start
		   );

    char *nbuffer = walloc(char, length+1);
    memmove(nbuffer,data+start,length);
    nbuffer[length] = '\0';

    LISP ncell = strcons(length, nbuffer);

    wfree(nbuffer);

    return ncell;
}

static LISP l_sbefore(LISP atom, LISP before)
{
    // Wraparound for EST_String.before function 
    EST_String pname = get_c_string(atom);
    EST_String b = get_c_string(before);
    EST_String n = pname.before(b);
    
    return strintern(n);
}

static LISP l_safter(LISP atom, LISP after)
{
    // Wraparound for EST_String.after function 
    EST_String pname = get_c_string(atom);
    EST_String a = get_c_string(after);
    EST_String n = pname.after(a);

    return strintern(n);
}

void init_subrs_str(void)
{
 init_lsubr("string-append",string_append,
 "(string-append STR1 STR2 ...)\n\
 Return a string made from the concatenation of the print names of STR1\n\
 STR2 ...");
 init_subr_1("string-length",string_length,
  "(string-length SYMBOL)\n\
 Return the number of characters in the print name of SYMBOL.");
 init_subr_1("print_string",lisp_to_string,
 "(print_string DATA)\n\
  Returns a string representing the printing of DATA." );
 init_subr_1("read-from-string",read_from_lstring,
  "(read-from-string SYMBOL)\n\
 Return first s-expression in print name of SYMBOL.");
 init_subr_1("downcase",string_downcase,
 "(downcase SYMBOL)\n\
  Returns a string with the downcased version of SYMBOL's printname.");
 init_subr_1("upcase",string_upcase,
 "(upcase SYMBOL)\n\
  Returns a string with the upcased version of SYMBOL's printname.");
 init_subr_2("string-matches",l_matches,
 "(string-matches ATOM REGEX)\n\
  Returns t if ATOM's printname matches the regular expression REGEX,\n\
  otherwise it returns nil.");
 init_subr_2("string-equal",l_strequal,
 "(string-equal ATOM1 ATOM2)\n\
  Returns t if ATOM's printname is equal to ATOM's print name, otherwise\n\
  it returns nil.");
 init_subr_3("substring", l_substring,
 "(substring STRING START LENGTH)\n\
  Return a substring of STRING starting at START of length LENGTH.");
 init_subr_2("string-before",l_sbefore,
 "(string-before ATOM BEFORE)\n\
  Returns an atom whose printname is the substring of ATOM's printname \n\
  which appears before BEFORE.  This is a wraparound for the EST_String.before \n\
  function in C++, and hence has the same conditions for boundary cases.");
 init_subr_2("string-after",l_safter,
 "(string-after ATOM AFTER)\n\
  Returns an atom whose printname is the substring of ATOM's printname \n\
  which appears after AFTER.  This is a wraparound for the EST_String.after \n\
  function in C++, and hence has the same conditions for boundary cases.");

 init_lsubr("symbolconc",symbolconc,
 "(symbolconc SYMBOL1 SYMBOL2 ...)\n\
  Form new symbol by concatenation of the print forms of each of SYMBOL1\n\
  SYMBOL2 etc.");
 init_subr_1("symbolexplode",symbolexplode,
 "(symbolexplode SYMBOL)\n\
  Returns list of atoms one for each character in the print name of SYMBOL.");

 init_subr_1("parse-number",parse_number,
 "(parse-number SYMBOL)\n\
  Returns a number form a symbol or string whose print name is a number.");

 init_subr_2("basename",symbol_basename,
 "(basename PATH SUFFIX)\n\
  Return a string with directory removed from basename.  If SUFFIX is\n\
  specified remove that from end of PATH.  Basically the same function\n\
  as the UNIX command of the same name.");


 init_subr_1("path-is-filename", path_is_filename,
 "(path-is-filename PATHNAME)\n\
  Is PATH a non-directory name.");

 init_subr_1("path-as-directory", path_as_directory,
 "(path-as-directory PATHNAME)\n\
  Return PATH as a directory name.");

 init_subr_1("path-as-file", path_as_file,
 "(path-as-file PATHNAME)\n\
  Return PATH as a non-directory name.");

 init_lsubr("path-append", path_append,
 "(path-append DIRECTORY-PATH ADDITION1 ADDITION2 ...)\n\
  Return a the path for ADDITION in DIRECTORY.");

 init_subr_1("path-basename", path_basename,
 "(path-basename PATHNAME)\n\
  Return name part of PATH.");


 init_subr_1("path-is-dirname", path_is_dirname,
 "(path-is-dirname PATHNAME)\n\
  Is PATH a directory name.");

}