File: tokenre.awk

package info (click to toggle)
runawk 1.6.1-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 712 kB
  • sloc: awk: 1,127; ansic: 736; sh: 420; makefile: 103
file content (73 lines) | stat: -rw-r--r-- 1,578 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# Written by Aleksey Cheusov <vle@gmx.net>, public domain
#
# This awk module is a part of RunAWK distribution,
#        http://sourceforge.net/projects/runawk
#
############################################################

# =head2 tokenre.awk
#
# By default AWK splits input lines into tokens according to regular
# expression that defines "spaces" between tokens using special
# variable FS. In many situations it is more useful to define regular
# expressions for tokens themselves. This is what this module does.
#
# =over 2
#
# =item I<tokenre(STRING, REGEXP)>
#
# extracts substrings from STRING
# according to REGEXP from the left to the right and assigns $1, $2
# etc. and NF variable.
#
# =item I<tokenre0(REGEXP)>
#
# Does the the same as `tokenre' but splits $0 instead.
#
# =item I<splitre(STRING, ARR, REGEXP)>
#
# The same as `tokenre' but ARR[1], ARR[2]... are assigned.
# A number of extracted tokens is a return value.
#
# =item I<TRE>
#
# global variable. If it is set to non-empty string, all input
# lines are split automatically.
#
# =back
#

# See example/demo_tokenre for the sample of usage

function tokenre (s, re){
	NF = 0
	while (match(s, re)){
		++NF
		$NF = substr(s, RSTART, RLENGTH)
		s = substr(s, RSTART+RLENGTH)
	}
}

function tokenre0 (re){
	tokenre($0, re)
}

function splitre (s, arr, re,             cnt){
	cnt = 0
	while (match(s, re)){
		++cnt
		arr [cnt] = substr(s, RSTART, RLENGTH)
		s = substr(s, RSTART+RLENGTH)
	}
	return cnt
}

function splitre0 (arr, re){
	return splitre($0, arr, re)
}

{
	if (TRE != ""){
		tokenre0(TRE)
	}
}