File: rosetta-tokenizer.tcl

package info (click to toggle)
nsf 2.4.0-1
  • links: PTS, VCS
  • area: main
  • in suites: sid, trixie
  • size: 13,208 kB
  • sloc: ansic: 32,687; tcl: 10,723; sh: 660; pascal: 176; javascript: 135; lisp: 41; makefile: 24
file content (59 lines) | stat: -rw-r--r-- 1,554 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# Assumes Tcl 8.6 (couroutine support)
if {[catch {package req Tcl 8.6}]} return

#
# == Rosetta example: Tokenize a string with escaping
#
#
# Write a class which allows for splitting a string at each non-escaped
# occurrence of a separator character.
# 
# See https://rosettacode.org/wiki/Tokenize_a_string_with_escaping
#

package req nx
package req nx::test

nx::Class create Tokenizer {
    :property s:required
    :method init {} {
	:require namespace
        set coro [coroutine [current]::nextCoro [current] iter ${:s}]
        :public object forward next $coro
    }
    :public method iter {s} {
        yield [info coroutine]
        for {set i 0} {$i < [string length $s]} {incr i} {
            yield [string index $s $i]
        }
        return -code break
    }
    :public object method tokenize {{-sep |} {-escape ^} s} {
	set t [[current] new -s $s]
	set part ""
	set parts [list]
	while {1} {
	    set c [$t next]
	    if {$c eq $escape} {
		append part [$t next]
	    } elseif {$c eq $sep} {
		lappend parts $part
		set part ""
	    } else {
		append part $c
	    }
	}
	lappend parts $part
	return $parts	
    }
}

# Run some tests including the escape character:

? {Tokenizer tokenize -sep | -escape ^ ^|} {|}
? {Tokenizer tokenize -sep | -escape ^ ^|^|} {||}
? {Tokenizer tokenize -sep | -escape ^ ^^^|} {^|}
? {Tokenizer tokenize -sep | -escape ^ |} {{} {}}

# Test for the output required by the Rosetta example:
? {Tokenizer tokenize -sep | -escape ^ one^|uno||three^^^^|four^^^|^cuatro|} {one|uno {} three^^ four^|cuatro {}}