File: split.tcl

package info (click to toggle)
tcllib 2.0%2Bdfsg-5
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 83,560 kB
  • sloc: tcl: 306,798; ansic: 14,272; sh: 3,035; xml: 1,766; yacc: 1,157; pascal: 881; makefile: 124; perl: 84; f90: 84; python: 33; ruby: 13; php: 11
file content (144 lines) | stat: -rw-r--r-- 4,570 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
# split.tcl --
#
#	Various ways of splitting a string.
#
# Copyright (c) 2000      by Ajuba Solutions.
# Copyright (c) 2000      by Eric Melski <ericm@ajubasolutions.com>
# Copyright (c) 2001      by Reinhard Max <max@suse.de>
# Copyright (c) 2003      by Pat Thoyts <patthoyts@users.sourceforge.net>
# Copyright (c) 2001-2006 by Andreas Kupries <andreas_kupries@users.sourceforge.net>
#
# See the file "license.terms" for information on usage and redistribution
# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
# 
# RCS: @(#) $Id: split.tcl,v 1.7 2006/04/21 04:42:28 andreas_kupries Exp $

# ### ### ### ######### ######### #########
## Requirements

package require Tcl 8.5 9

namespace eval ::textutil::split {}

########################################################################
# This one was written by Bob Techentin (RWT in Tcl'ers Wiki):
# http://www.techentin.net
# mailto:techentin.robert@mayo.edu
#
# Later, he send me an email stated that I can use it anywhere, because
# no copyright was added, so the code is defacto in the public domain.
#
# You can found it in the Tcl'ers Wiki here:
# http://mini.net/cgi-bin/wikit/460.html
#
# Bob wrote:
# If you need to split string into list using some more complicated rule
# than builtin split command allows, use following function. It mimics
# Perl split operator which allows regexp as element separator, but,
# like builtin split, it expects string to split as first arg and regexp
# as second (optional) By default, it splits by any amount of whitespace. 
# Note that if you add parenthesis into regexp, parenthesed part of separator
# would be added into list as additional element. Just like in Perl. -- cary 
#
# Speed improvement by Reinhard Max:
# Instead of repeatedly copying around the not yet matched part of the
# string, I use [regexp]'s -start option to restrict the match to that
# part. This reduces the complexity from something like O(n^1.5) to
# O(n). My test case for that was:
# 
# foreach i {1 10 100 1000 10000} {
#     set s [string repeat x $i]
#     puts [time {splitx $s .}]
# }
#

proc ::textutil::split::splitx {str {regexp {[\t \r\n]+}}} {
    # Bugfix 476988
    if {[string length $str] == 0} {
        return {}
    }
    if {[string length $regexp] == 0} {
        return [::split $str ""]
    }
    if {[regexp $regexp {}]} {
        return -code error \
            "splitting on regexp \"$regexp\" would cause infinite loop"
    }

    set list  {}
    set start 0
    while {[regexp -start $start -indices -- $regexp $str match submatch]} {
        foreach {subStart subEnd} $submatch break
        foreach {matchStart matchEnd} $match break
        incr matchStart -1
        incr matchEnd
        lappend list [string range $str $start $matchStart]
        if {$subStart >= $start} {
            lappend list [string range $str $subStart $subEnd]
        }
        set start $matchEnd
    }
    lappend list [string range $str $start end]
    return $list
}

#
# splitn --
#
# splitn splits the string $str into chunks of length $len.  These
# chunks are returned as a list.
#
# If $str really contains a ByteArray object (as retrieved from binary
# encoded channels) splitn must honor this by splitting the string
# into chunks of $len bytes.
#
# It is an error to call splitn with a nonpositive $len.
#
# If splitn is called with an empty string, it returns the empty list.
#
# If the length of $str is not an entire multiple of the chunk length,
# the last chunk in the generated list will be shorter than $len.
#
# The implementation presented here was given by Bryan Oakley, as
# part of a ``contest'' I staged on c.l.t in July 2004.  I selected
# this version, as it does not rely on runtime generated code, is
# very fast for chunk size one, not too bad in all the other cases,
# and uses [split] or [string range] which have been around for quite
# some time.
#
# -- Robert Suetterlin (robert@mpe.mpg.de)
#
proc ::textutil::split::splitn {str {len 1}} {

    if {$len <= 0} {
        return -code error "len must be > 0"
    }

    if {$len == 1} {
        return [split $str {}]
    }

    set result [list]
    set max [string length $str]
    set i 0
    set j [expr {$len -1}]
    while {$i < $max} {
        lappend result [string range $str $i $j]
        incr i $len
        incr j $len
    }

    return $result
}

# ### ### ### ######### ######### #########
## Data structures

namespace eval ::textutil::split {
    namespace export splitx splitn
}

# ### ### ### ######### ######### #########
## Ready

package provide textutil::split 0.9