File: multisub.awk

package info (click to toggle)
runawk 1.6.1-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 712 kB
  • sloc: awk: 1,127; ansic: 736; sh: 420; makefile: 103
file content (100 lines) | stat: -rw-r--r-- 2,441 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
# Written by Aleksey Cheusov <vle@gmx.net>, public domain
#
# This awk module is a part of RunAWK distribution,
#        http://sourceforge.net/projects/runawk
#
############################################################

# =head2 multisub.awk
#
# =over 2
#
# =item I<multisub(STRING, SUBST_REPLS[, KEEP])>
#
# is a substitution function. It searches for
# a list of substrings, specified in SUBST_REPL
# in a left-most longest order and (if found) replaces
# found fragments with appropriate replacement.
# SUBST_REPL format: "SUBSTRING1:REPLACEMENT1   SUBSTRING2:REPLACEMENT2...".
# Three spaces separate substring:replacement pairs from each other.
# If KEEP is specified and some REPLACEMENT(N) is equal to it, then
# appropriate SUBSTRING(N) is treated as a regular expression
# and matched text is kept as is, i.e. not changed.
#
# For example:
#      print multisub("ABBABBBBBBAAB", "ABB:c   BBA:d   AB:e")
#      |- ccBBde
#
# =back
#

#use "alt_assert.awk"
#use "str2regexp.awk"

BEGIN {
	__runawk_multisub_num = -1
}

function __runawk_multisub_prepare (repls, keep,
	arr, i, repl_left, repl_right, re) # local vars
{
	if (!repls){
		return -1
	}else if (repls in __runawk_multisub){
		return __runawk_multisub [repls]
	}else{
		++__runawk_multisub_num

		__runawk_multisub [repls] = __runawk_multisub_num
		split(repls, arr, /   /)

		for (i in arr){
			# split into 'repl_left' and 'repl_right'
			repl_right = repl_left = arr [i]
			sub(/:.*$/, "", repl_left)
			sub(/^.*:/, "", repl_right)

			# whole regexp
			if (re != ""){
				re = re "|"
			}

			# substr to repl
			if (repl_right != keep){
				__runawk_tr_repl [__runawk_multisub_num, repl_left] = repl_right
				repl_left = str2regexp(repl_left)
			}

			re = re repl_left
		}

		__runawk_tr_regexp [__runawk_multisub_num] = re

		return __runawk_multisub_num
	}
}

function multisub (str, repls, keep,
	n,middle,beg,end,ret,repl) #local vars
{
	n = __runawk_multisub_prepare(repls, keep)
	if (n < 0 || !match(str, __runawk_tr_regexp [n])){
		return str
	}else{
		middle = substr(str, RSTART, RLENGTH)
		beg    = substr(str, 1, RSTART-1)
		end    = substr(str, RSTART+RLENGTH)

		if ((n SUBSEP middle) in __runawk_tr_repl)
			ret = beg __runawk_tr_repl [n, middle] multisub(end, repls)
		else
			ret = beg middle multisub(end, repls)

		return ret
	}
}

BEGIN {
	assert("ccBBde" == multisub("ABBABBBBBBAAB", "ABB:c   BBA:d   AB:e"),
		   "Email bug to the author")
}