File: chopword.cc

package info (click to toggle)
gri 2.4.2-1
  • links: PTS
  • area: main
  • in suites: potato
  • size: 4,540 kB
  • ctags: 1,966
  • sloc: cpp: 32,542; lisp: 3,243; perl: 806; makefile: 548; sh: 253
file content (145 lines) | stat: -rw-r--r-- 3,693 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
#include "gr.hh"

// DESCRIPTION:  Get words (stored in strings w[0], w[1], ...)
// from string `s', finding at most 'max' words.  Then
// set 'nw' to number words read.
// 
// NOTE: input string `s' is destroyed in the process!
//
// NB: 11 Feb 95: not using 'isspace' speeds by factor of 1.9, which
//     yields a 9% speedup in 'read grid data' with 55,000 elements.
//
// 1999-12-15: permit TAB type separator
bool
chop_into_words(char *s, char **w, int *nw, int max)
{
	register char   c, *cp;
	max--;
	cp = s;
	*nw = 0;
	if (*cp == '\0')
		return true;
	// Traverse s, getting pointers to words and terminating ends
	while (*nw < max) {
		// Skip space and tabs; break if done.
		while (*cp == ' ' || *cp == '\t' || *cp == '\n' || *cp == '\r')
			cp++;
		if (!*cp)
			break;
		// Now point to non-blank.  Different actions depending on whether
		// it is a double-quoted string
		if (*cp == '"') {
				// It's a word beginning with `"'. Set word to point to first
				// character after the `"', and break the word at the last
				// character before the final `"'.  Intermediate quotes may be
				// protected with a backslash; these are left in the word as \",
				// which must be removed later if needed.
			register char   last = *cp;
			w[*nw] = cp;
			while (*++cp && !(*cp == '"' && last != '\\'))
				last = *cp;
			cp++;		// save the quote
			c = *cp;
			*cp++ = '\0';
			(*nw)++;		// increment number-of-words
		} else {
				// It's a word not beginning with `"'
			w[(*nw)++] = cp;
			while (*++cp 
			       && !(*cp == ' ' 
				    || *cp == '\t' 
				    || *cp == '\n'
				    || *cp == '\r')) {
				;		// EMPTY
			}
			c = *cp;
			*cp++ = '\0';
		}
		// Break if done
		if (!c)
			break;
	}
	return true;
}

// As above, but obey the separator from 'set input data separator'
bool
chop_into_data_words(char *s, char **w, int *nw, int max)
{
	extern char _input_data_separator;
	if (_input_data_separator == ' ') {
		max--;
		char c;
		register char *cp = s;
		*nw = 0;
		if (*cp == '\0')
			return true;
		// Traverse s, getting pointers to words and terminating ends
		while (*nw < max) {
			// Skip space and tabs; break if done.
			while (*cp == ' ' || *cp == '\t' || *cp == '\n' || *cp == '\r')
				cp++;
			if (!*cp)
				break;
			// Now point to non-blank.  Different actions depending on whether
			// it is a double-quoted string
			if (*cp == '"') {
				// It's a word beginning with `"'. Set word to point to first
				// character after the `"', and break the word at the last
				// character before the final `"'.  Intermediate quotes may be
				// protected with a backslash; these are left in the word as \",
				// which must be removed later if needed.
				register char last = *cp;
				w[*nw] = cp;
				while (*++cp && !(*cp == '"' && last != '\\'))
					last = *cp;
				cp++;		// save the quote
				c = *cp;
				*cp++ = '\0';
				(*nw)++;		// increment number-of-words
			} else {
				// It's a word not beginning with `"'
				w[(*nw)++] = cp;
				while (*++cp 
				       && !(*cp == ' ' 
					    || *cp == '\t' 
					    || *cp == '\n'
					    || *cp == '\r')) {
					;		// EMPTY
				}
				c = *cp;
				*cp++ = '\0';
			}
			// Break if done
			if (!c)
				break;
		}
		return true;
	} else if (_input_data_separator == '\t') {
		max--;		// save <= below (is this faster or slower?)
		register char *cp = s;
		*nw = 0;
		if (*cp == '\0')
			return true;
		while (*nw < max) {
			w[(*nw)++] = cp;
			if (*cp == '\t') {
				*cp++ = '\0';
				continue;
			}
			while (*++cp != '\0' && *cp != '\t') {
				;		// EMPTY
			}
			char c = *cp;
			*cp++ = '\0';
			if (!c)
				break;
		}
		return true;
	} else {
		*nw = 0;
		return false;	// huh??
	}

}