File: mixedsort.R

package info (click to toggle)
gtools 3.4.1-1
  • links: PTS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 384 kB
  • ctags: 5
  • sloc: asm: 127; ansic: 69; makefile: 1
file content (104 lines) | stat: -rw-r--r-- 2,867 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
# $Id: mixedsort.R 1774 2014-03-01 20:02:08Z warnes $

mixedsort <- function(x) x[mixedorder(x)]

mixedorder <- function(x)
  {
    # - Split each each character string into an vector of strings and
    #   numbers
    # - Separately rank numbers and strings
    # - Combine orders so that strings follow numbers

    if(length(x)<1)
        return(NULL)
    else if(length(x)==1)
        return(1)

    if( is.numeric(x) )
        return( order(x) )


    delim="\\$\\@\\$"

    numeric <- function(x)
      {
        suppressWarnings( as.numeric(x) )
      }

    nonnumeric <- function(x)
      {
        suppressWarnings( ifelse(is.na(as.numeric(x)), toupper(x), NA) )
      }

    x <- as.character(x)

    which.nas <- which(is.na(x))
    which.blanks <- which(x=="")

    if(length(which.blanks) >0)
        x[ which.blanks ] <- -Inf

    if(length(which.nas) >0)
        x[ which.nas ] <- Inf

    ####
    # - Convert each character string into an vector containing single
    #   character and  numeric values.
    ####

    # find and mark numbers in the form of +1.23e+45.67
    delimited <- gsub("([+-]{0,1}[0-9]+\\.{0,1}[0-9]*([eE][\\+\\-]{0,1}[0-9]+\\.{0,1}[0-9]*){0,1})",
                      paste(delim,"\\1",delim,sep=""), x)

    # separate out numbers
    step1 <- strsplit(delimited, delim)

    # remove empty elements
    step1 <- lapply( step1, function(x) x[x>""] )

    # create numeric version of data
    step1.numeric <- lapply( step1, numeric )

    # create non-numeric version of data
    step1.character <- lapply( step1, nonnumeric )

    # now transpose so that 1st vector contains 1st element from each
    # original string
    maxelem <- max(sapply(step1, length))

    step1.numeric.t <- lapply(1:maxelem,
                              function(i)
                                 sapply(step1.numeric,
                                        function(x)x[i])
                              )

    step1.character.t <- lapply(1:maxelem,
                              function(i)
                                 sapply(step1.character,
                                        function(x)x[i])
                              )

    # now order them
    rank.numeric   <- sapply(step1.numeric.t,rank)
    rank.character <- sapply(step1.character.t,
                             function(x) as.numeric(factor(x)))

    # and merge
    rank.numeric[!is.na(rank.character)] <- 0  # mask off string values

    rank.character <- t(
                        t(rank.character) +
                        apply(matrix(rank.numeric),2,max,na.rm=TRUE)
                        )

    rank.overall <- ifelse(is.na(rank.character),rank.numeric,rank.character)

    order.frame <- as.data.frame(rank.overall)
    if(length(which.nas) > 0)
      order.frame[which.nas,] <- Inf
    retval <- do.call("order",order.frame)

    return(retval)
  }