File: readln.pl

package info (click to toggle)
swi-prolog 6.6.6-1~bpo70%2B1
  • links: PTS, VCS
  • area: main
  • in suites: wheezy-backports
  • size: 82,312 kB
  • sloc: ansic: 322,250; perl: 245,822; sh: 6,651; java: 5,254; makefile: 4,423; cpp: 4,153; ruby: 1,594; yacc: 843; xml: 82; sed: 12; sql: 6
file content (208 lines) | stat: -rw-r--r-- 6,251 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
/*  $Id$

    Read a sentence from the current input stream and convert it
    into a list of atoms and numbers.
    Letters(A-Z, a-z) are converted to atoms
    Digits (0-9) (and a '.' if a real number) are converted to numbers
	Some obscure 'rounding' is done, so you have most of the times
	only 6 significant digits with an exponent part. (This is caused
	by the system predicate 'name'. If you want looonnnggg numbers
	then define digits as parts of words).
	(N.B. reals work only if '.' is not defined as 'stop-char' but
		'escape' will work in this case)

    The reader is >>flexible<<, you can define yourself:
	- the character on which reading will stop
		(this character is escapable with \
		 to read a \ type this character twice!!)
	- the character(s) that make up a word (execpt the
	  characters A-Z, a-z that always make up words!!
	  and (real)-numbers that always are grouped together!!)
	- whether you want conversion of uppercase letters to
	  lowercase letters.

    readln/1
	The default setting for readln/1 is
		- read up till newline
		- see underscore('_') and numbers 0-9 as part of words
		- make lowercase

        - If nothing is read readln/1 succeeds with []
        - If an end_of_file is read readln/1 succeeds with [..|end_of_file]


    readln/5
	This predicate gives you the flexibility.
	It succeeds with arg1 = list of word&atoms
			 arg2 = Ascii code of last character
				(but '-1' in case of ^D).
	To change one or more of the defaults you have to
	instantiate argument3 and/or argument4 and/or argument5.
	 !! Uninstantiated arguments are defaulted !!
	- stop character(s):
		instantiate argument 3 with the list of ASCII code's
		of the desired stop characters (Note: you can also
		say: ".!?", what is equivalent to [46,33,63]).
	- word character(s):
		instantiate argument 4 with the list of ASCII code's
		of the desired word-part characters (Note: wou can also
		say: "", what is equivalent to [] ; i.e. no extra
		characters).
	- lowercase conversion:
		instantiate argument 5 with lowercase


Main predicates provided:

    readln(P)		- Read a sentence up till NewLine and
			  unify <P> with the list of atoms/numbers
			  (identical to:
				 readln(P, [10],"_01213456789",uppercase).)
    readln(P, LastCh)   - idem as above but the second argument is unified
			  with the last character read (the ascii-code for
			  the stop-character or -1)
    readln(P, LastCh, Arg1, Arg2, Arg3)
			- idem as above but the default setting is changed
			  for the instantiated args:
			  Arg1: List of stop characters
			  Arg2: List of word_part characters
			  Arg3: uppercase/lowercase conversion

Examples:
	read_sentence(P,Case) :-
		readln(P,_,".!?","_0123456789",Case).

	read_in(P) :-				% with numbers as separate
		readln(P,Eof,_,"", _).	% entities.

	read_atom(A) :-			% stop on newline,
		readln(A,_,_," ",_).		% space is part of word


   Author: Wouter Jansweijer
   Date: 26 april 1985

   Modified: Jan Wielemaker
   Date: 19 feb 2001

   Modernised a bit and fixed some end_of_file/-1 issues.  As we have modules
   since a while I removed the ugly $ stuff :-)
******************************************************************************/

:- module(readln,
	  [ readln/1,			% -Line
	    readln/2,			% -Line, +EOL
	    readln/5			% See above
	  ]).
:- use_module(library(lists)).
:- license(swipl).

readln(Read) :-			% the default is read up to EOL
	rl_readln(Line, LastCh, [10], "_0123456789", uppercase),
	(   LastCh == -1
	->  append(Line,[end_of_file], Read)
	;   Read = Line
	).

readln(Read, LastCh):-
	rl_readln(Read, LastCh, [10], "_0123456789", uppercase).

readln(P, EOF, StopChars, WordChars, Case) :-
	(   var(StopChars)
	->  Arg1 = [10]
	;   Arg1 = StopChars
	),
	(   var(WordChars)
	->  Arg2 = "01234567890_"
	;   Arg2 = WordChars
	),
	(   var(Case)
	->  Arg3 = lowercase
	;   Arg3 = Case
	),
	rl_readln(P, EOF, Arg1, Arg2, Arg3).

rl_readln(P, EOF, StopChars, WordChars, Case) :-
	rl_initread(L, EOF, StopChars),
	rl_blanks(L, LL), !,
	rl_words(P, LL,[], options(WordChars, Case)), !.

rl_initread(S, EOF, StopChars) :-
	get0(K),
	rl_readrest(K, S, EOF, StopChars).

rl_readrest(-1, [], end_of_file, _) :- !.
rl_readrest(0'\\, [K1|R], EOF, StopChars) :-
	get0(K1),			% skip it, take next char
	get0(K2),
	rl_readrest(K2, R, EOF, StopChars).
rl_readrest(K, [K], K, StopChars) :-	% the stop char(s)
	member(K, StopChars), !.
rl_readrest(K, [K|R], EOF, StopChars) :-	% the normal case
	get0(K1),
	rl_readrest(K1, R, EOF, StopChars).

rl_words([W|Ws], S1, S4, Options) :-
	rl_word(W, S1, S2, Options), !,
	rl_blanks(S2, S3),
	rl_words(Ws, S3, S4, Options).
rl_words([], S1, S2, _) :-
	rl_blanks(S1, S2), !.
rl_words([], S, S, _).

rl_word(N, [46|S1], S3, _) :-		% the dot can be in the beginning of
	rl_basic_num(N1, S1, S2),	!,	% a real number.
	rl_basic_nums(Rest, S2, S3, dot),	% only ONE dot IN a number !!
	name(N,[48, 46, N1|Rest]).	% i.e '0.<number>'
rl_word(N, S0, S2, _) :-
	rl_basic_num(N1, S0, S1), !,
	rl_basic_nums(Rest, S1, S2, _),
	name(N,[N1|Rest]).
rl_word(W, S0, S2, Options) :-
	rl_basic_char(C1, S0, S1, Options), !,
	rl_basic_chars(Rest, S1, S2, Options),
	name(W, [C1|Rest]).
rl_word(P,[C|R], R, _) :-
	name(P, [C]), !.

rl_basic_chars([A|As], S0, S2, Options) :-
	rl_basic_char(A, S0, S1, Options), !,
	rl_basic_chars(As, S1, S2, Options).
rl_basic_chars([], S, S, _).

rl_basic_nums([46,N|As], [46|S1], S3, Dot) :- % a dot followed by >= one digit
	var(Dot),			% but not found a dot already
	rl_basic_num(N, S1, S2), !,
	rl_basic_nums(As, S2, S3, dot).
rl_basic_nums([A|As], S0, S2, Dot) :-
	rl_basic_num(A, S0, S1), !,
	rl_basic_nums(As, S1, S2, Dot).
rl_basic_nums([], S, S, _).

rl_blanks([C|S0], S1) :-
	rl_blank(C), !,
	rl_blanks(S0, S1).
rl_blanks(S, S).

/* Basic Character types that form rl_words together */

rl_basic_char(A, [C|S], S, options(WordChars, Case)) :-
	rl_lc(C, A, WordChars, Case).

rl_basic_num(N, [N|R], R) :-
	code_type(N, digit).

rl_blank(X) :-
	code_type(X, space).

rl_lc(X, X1, _, Case) :-
	code_type(X, upper), !,
	rl_fix_case(Case, X, X1).
rl_lc(X, X, _, _) :-
	code_type(X, lower).
rl_lc(X, X, WordChars, _) :-
	memberchk(X, WordChars).

rl_fix_case(lowercase, U, L) :- !,
	code_type(L, lower(U)).
rl_fix_case(_, C, C).