File: split.tcl

package info (click to toggle)
tcllib 1.8-1
  • links: PTS
  • area: main
  • in suites: etch, etch-m68k
  • size: 13,628 kB
  • ctags: 4,897
  • sloc: tcl: 88,012; sh: 7,856; ansic: 4,174; xml: 1,765; yacc: 753; perl: 84; f90: 84; makefile: 60; python: 33; ruby: 13; php: 11
file content (152 lines) | stat: -rw-r--r-- 4,824 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
namespace eval ::textutil {

    namespace eval split {

	namespace export splitx splitn

	# This will be redefined later. We need it just to let
	# a chance for the next import subcommand to work
	#
	proc splitx [list str [list regexp "\[\t \r\n\]+"]] {}
	proc splitn {str {len 1}} {}
    }

    namespace import -force split::splitx split::splitn
    namespace export splitx splitn

}

########################################################################
# This one was written by Bob Techentin (RWT in Tcl'ers Wiki):
# http://www.techentin.net
# mailto:techentin.robert@mayo.edu
#
# Later, he send me an email stated that I can use it anywhere, because
# no copyright was added, so the code is defacto in the public domain.
#
# You can found it in the Tcl'ers Wiki here:
# http://mini.net/cgi-bin/wikit/460.html
#
# Bob wrote:
# If you need to split string into list using some more complicated rule
# than builtin split command allows, use following function. It mimics
# Perl split operator which allows regexp as element separator, but,
# like builtin split, it expects string to split as first arg and regexp
# as second (optional) By default, it splits by any amount of whitespace. 
# Note that if you add parenthesis into regexp, parenthesed part of separator
# would be added into list as additional element. Just like in Perl. -- cary 
#
# Speed improvement by Reinhard Max:
# Instead of repeatedly copying around the not yet matched part of the
# string, I use [regexp]'s -start option to restrict the match to that
# part. This reduces the complexity from something like O(n^1.5) to
# O(n). My test case for that was:
# 
# foreach i {1 10 100 1000 10000} {
#     set s [string repeat x $i]
#     puts [time {splitx $s .}]
# }
#

if {[package vsatisfies [package provide Tcl] 8.3]} {

    proc ::textutil::split::splitx {str {regexp {[\t \r\n]+}}} {
        # Bugfix 476988
        if {[string length $str] == 0} {
            return {}
        }
        if {[string length $regexp] == 0} {
            return [::split $str ""]
        }
        set list  {}
        set start 0
        while {[regexp -start $start -indices -- $regexp $str match submatch]} {
            foreach {subStart subEnd} $submatch break
            foreach {matchStart matchEnd} $match break
            incr matchStart -1
            incr matchEnd
            lappend list [string range $str $start $matchStart]
            if {$subStart >= $start} {
                lappend list [string range $str $subStart $subEnd]
            }
            set start $matchEnd
        }
        lappend list [string range $str $start end]
        return $list
    }

} else {
    
    # For tcl <= 8.2 we do not have regexp -start...
    proc ::textutil::split::splitx [list str [list regexp "\[\t \r\n\]+"]] {

        if {[string length $str] == 0} {
            return {}
        }
        if {[string length $regexp] == 0} {
            return [::split $str {}]
        }

        set list  {}
        while {[regexp -indices -- $regexp $str match submatch]} {
            lappend list [string range $str 0 [expr {[lindex $match 0] -1}]]
            if {[lindex $submatch 0] >= 0} {
                lappend list [string range $str [lindex $submatch 0] \
                                  [lindex $submatch 1]]
            }
            set str [string range $str [expr {[lindex $match 1]+1}] end]
        }
        lappend list $str
        return $list
    }
    
}

#
# splitn --
#
# splitn splits the string $str into chunks of length $len.  These
# chunks are returned as a list.
#
# If $str really contains a ByteArray object (as retrieved from binary
# encoded channels) splitn must honor this by splitting the string
# into chunks of $len bytes.
#
# It is an error to call splitn with a nonpositive $len.
#
# If splitn is called with an empty string, it returns the empty list.
#
# If the length of $str is not an entire multiple of the chunk length,
# the last chunk in the generated list will be shorter than $len.
#
# The implementation presented here was given by Bryan Oakley, as
# part of a ``contest'' I staged on c.l.t in July 2004.  I selected
# this version, as it does not rely on runtime generated code, is
# very fast for chunk size one, not too bad in all the other cases,
# and uses [split] or [string range] which have been around for quite
# some time.
#
# -- Robert Suetterlin (robert@mpe.mpg.de)
#
proc ::textutil::split::splitn {str {len 1}} {

    if {$len <= 0} {
        return -code error "len must be > 0"
    }

    if {$len == 1} {
        return [split $str {}]
    }

    set result [list]
    set max [string length $str]
    set i 0
    set j [expr {$len -1}]
    while {$i < $max} {
        lappend result [string range $str $i $j]
        incr i $len
        incr j $len
    }

    return $result
}