1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
|
# Written by Aleksey Cheusov <vle@gmx.net>, public domain
#
# This awk module is a part of RunAWK distribution,
# http://sourceforge.net/projects/runawk
#
############################################################
# =head2 multisub.awk
#
# =over 2
#
# =item I<multisub(STRING, SUBST_REPLS[, KEEP])>
#
# is a substitution function. It searches for
# a list of substrings, specified in SUBST_REPL
# in a left-most longest order and (if found) replaces
# found fragments with appropriate replacement.
# SUBST_REPL format: "SUBSTRING1:REPLACEMENT1 SUBSTRING2:REPLACEMENT2...".
# Three spaces separate substring:replacement pairs from each other.
# If KEEP is specified and some REPLACEMENT(N) is equal to it, then
# appropriate SUBSTRING(N) is treated as a regular expression
# and matched text is kept as is, i.e. not changed.
#
# For example:
# print multisub("ABBABBBBBBAAB", "ABB:c BBA:d AB:e")
# |- ccBBde
#
# =back
#
#use "alt_assert.awk"
#use "str2regexp.awk"
BEGIN {
__runawk_multisub_num = -1
}
function __runawk_multisub_prepare (repls, keep,
arr, i, repl_left, repl_right, re) # local vars
{
if (!repls){
return -1
}else if (repls in __runawk_multisub){
return __runawk_multisub [repls]
}else{
++__runawk_multisub_num
__runawk_multisub [repls] = __runawk_multisub_num
split(repls, arr, / /)
for (i in arr){
# split into 'repl_left' and 'repl_right'
repl_right = repl_left = arr [i]
sub(/:.*$/, "", repl_left)
sub(/^.*:/, "", repl_right)
# whole regexp
if (re != ""){
re = re "|"
}
# substr to repl
if (repl_right != keep){
__runawk_tr_repl [__runawk_multisub_num, repl_left] = repl_right
repl_left = str2regexp(repl_left)
}
re = re repl_left
}
__runawk_tr_regexp [__runawk_multisub_num] = re
return __runawk_multisub_num
}
}
function multisub (str, repls, keep,
n,middle,beg,end,ret,repl) #local vars
{
n = __runawk_multisub_prepare(repls, keep)
if (n < 0 || !match(str, __runawk_tr_regexp [n])){
return str
}else{
middle = substr(str, RSTART, RLENGTH)
beg = substr(str, 1, RSTART-1)
end = substr(str, RSTART+RLENGTH)
if ((n SUBSEP middle) in __runawk_tr_repl)
ret = beg __runawk_tr_repl [n, middle] multisub(end, repls)
else
ret = beg middle multisub(end, repls)
return ret
}
}
BEGIN {
assert("ccBBde" == multisub("ABBABBBBBBAAB", "ABB:c BBA:d AB:e"),
"Email bug to the author")
}
|