File: split.tcl

package info (click to toggle)
tcllib 1.8-1
links: PTS
area: main
in suites: etch, etch-m68k
size: 13,628 kB
ctags: 4,897
sloc: tcl: 88,012; sh: 7,856; ansic: 4,174; xml: 1,765; yacc: 753; perl: 84; f90: 84; makefile: 60; python: 33; ruby: 13; php: 11
file content (152 lines) | stat: -rw-r--r-- 4,824 bytes
namespace eval ::textutil {

    namespace eval split {

	namespace export splitx splitn

	# This will be redefined later. We need it just to let
	# a chance for the next import subcommand to work
	#
	proc splitx [list str [list regexp "\[\t \r\n\]+"]] {}
	proc splitn {str {len 1}} {}
    }

    namespace import -force split::splitx split::splitn
    namespace export splitx splitn

}

########################################################################
# This one was written by Bob Techentin (RWT in Tcl'ers Wiki):
# http://www.techentin.net
# mailto:techentin.robert@mayo.edu
#
# Later, he send me an email stated that I can use it anywhere, because
# no copyright was added, so the code is defacto in the public domain.
#
# You can found it in the Tcl'ers Wiki here:
# http://mini.net/cgi-bin/wikit/460.html
#
# Bob wrote:
# If you need to split string into list using some more complicated rule
# than builtin split command allows, use following function. It mimics
# Perl split operator which allows regexp as element separator, but,
# like builtin split, it expects string to split as first arg and regexp
# as second (optional) By default, it splits by any amount of whitespace. 
# Note that if you add parenthesis into regexp, parenthesed part of separator
# would be added into list as additional element. Just like in Perl. -- cary 
#
# Speed improvement by Reinhard Max:
# Instead of repeatedly copying around the not yet matched part of the
# string, I use [regexp]'s -start option to restrict the match to that
# part. This reduces the complexity from something like O(n^1.5) to
# O(n). My test case for that was:
# 
# foreach i {1 10 100 1000 10000} {
#     set s [string repeat x $i]
#     puts [time {splitx $s .}]
# }
#

if {[package vsatisfies [package provide Tcl] 8.3]} {

    proc ::textutil::split::splitx {str {regexp {[\t \r\n]+}}} {
        # Bugfix 476988
        if {[string length $str] == 0} {
            return {}
        }
        if {[string length $regexp] == 0} {
            return [::split $str ""]
        }
        set list  {}
        set start 0
        while {[regexp -start $start -indices -- $regexp $str match submatch]} {
            foreach {subStart subEnd} $submatch break
            foreach {matchStart matchEnd} $match break
            incr matchStart -1
            incr matchEnd
            lappend list [string range $str $start $matchStart]
            if {$subStart >= $start} {
                lappend list [string range $str $subStart $subEnd]
            }
            set start $matchEnd
        }
        lappend list [string range $str $start end]
        return $list
    }

} else {
    
    # For tcl <= 8.2 we do not have regexp -start...
    proc ::textutil::split::splitx [list str [list regexp "\[\t \r\n\]+"]] {

        if {[string length $str] == 0} {
            return {}
        }
        if {[string length $regexp] == 0} {
            return [::split $str {}]
        }

        set list  {}
        while {[regexp -indices -- $regexp $str match submatch]} {
            lappend list [string range $str 0 [expr {[lindex $match 0] -1}]]
            if {[lindex $submatch 0] >= 0} {
                lappend list [string range $str [lindex $submatch 0] \
                                  [lindex $submatch 1]]
            }
            set str [string range $str [expr {[lindex $match 1]+1}] end]
        }
        lappend list $str
        return $list
    }
    
}

#
# splitn --
#
# splitn splits the string $str into chunks of length $len.  These
# chunks are returned as a list.
#
# If $str really contains a ByteArray object (as retrieved from binary
# encoded channels) splitn must honor this by splitting the string
# into chunks of $len bytes.
#
# It is an error to call splitn with a nonpositive $len.
#
# If splitn is called with an empty string, it returns the empty list.
#
# If the length of $str is not an entire multiple of the chunk length,
# the last chunk in the generated list will be shorter than $len.
#
# The implementation presented here was given by Bryan Oakley, as
# part of a ``contest'' I staged on c.l.t in July 2004.  I selected
# this version, as it does not rely on runtime generated code, is
# very fast for chunk size one, not too bad in all the other cases,
# and uses [split] or [string range] which have been around for quite
# some time.
#
# -- Robert Suetterlin (robert@mpe.mpg.de)
#
proc ::textutil::split::splitn {str {len 1}} {

    if {$len <= 0} {
        return -code error "len must be > 0"
    }

    if {$len == 1} {
        return [split $str {}]
    }

    set result [list]
    set max [string length $str]
    set i 0
    set j [expr {$len -1}]
    while {$i < $max} {
        lappend result [string range $str $i $j]
        incr i $len
        incr j $len
    }

    return $result
}