File: mixedsort.R

package info (click to toggle)
gtools 2.6.2-1
  • links: PTS
  • area: main
  • in suites: squeeze
  • size: 328 kB
  • ctags: 5
  • sloc: asm: 127; ansic: 69; makefile: 1
file content (102 lines) | stat: -rw-r--r-- 2,853 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
# $Id: mixedsort.R 1087 2007-04-07 13:41:51Z warnes $

mixedsort <- function(x) x[mixedorder(x)]

mixedorder <- function(x)
  {
    # - Split each each character string into an vector of strings and
    #   numbers
    # - Separately rank numbers and strings
    # - Combine orders so that strings follow numbers

    delim="\\$\\@\\$"

    numeric <- function(x)
      {
        optwarn = options("warn")
        on.exit( options(optwarn) )
        options(warn=-1)
        as.numeric(x)
      }

    nonnumeric <- function(x)
      {
        optwarn = options("warn")
        on.exit( options(optwarn) )
        options(warn=-1)

        ifelse(is.na(as.numeric(x)), toupper(x), NA)
      }


    x <- as.character(x)

    which.nas <- which(is.na(x))
    which.blanks <- which(x=="")

    if(length(which.blanks) >0)
    x[ which.blanks ] <- -Inf
    if(length(which.nas) >0)
    x[ which.nas ] <- Inf

    ####
    # - Convert each character string into an vector containing single
    #   character and  numeric values.
    ####

    # find and mark numbers in the form of +1.23e+45.67
    delimited <- gsub("([+-]{0,1}[0-9\\.]+([eE][\\+\\-]{0,1}[0-9\\.]+){0,1})",
                      paste(delim,"\\1",delim,sep=""), x)

    # separate out numbers
    step1 <- strsplit(delimited, delim)

    # remove empty elements
    step1 <- lapply( step1, function(x) x[x>""] )

    # create numeric version of data
    step1.numeric <- lapply( step1, numeric )

    # create non-numeric version of data
    step1.character <- lapply( step1, nonnumeric )

    # now transpose so that 1st vector contains 1st element from each
    # original string
    maxelem <- max(sapply(step1, length))

    step1.numeric.t <- lapply(1:maxelem,
                              function(i)
                                 sapply(step1.numeric,
                                        function(x)x[i])
                              )

    step1.character.t <- lapply(1:maxelem,
                              function(i)
                                 sapply(step1.character,
                                        function(x)x[i])
                              )

    # now order them
    rank.numeric   <- sapply(step1.numeric.t,rank)
    rank.character <- sapply(step1.character.t,
                             function(x) as.numeric(factor(x)))

    # and merge
    rank.numeric[!is.na(rank.character)] <- 0  # mask off string values

    rank.character <- t(
                        t(rank.character) +
                        apply(matrix(rank.numeric),2,max,na.rm=TRUE)
                        )
    
    rank.overall <- ifelse(is.na(rank.character),rank.numeric,rank.character)

    order.frame <- as.data.frame(rank.overall)
    if(length(which.nas) > 0)
      order.frame[which.nas,] <- Inf
    retval <- do.call("order",order.frame)

    return(retval)
  }