File: string.R

package info (click to toggle)
r-cran-nlp 0.3-2-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 456 kB
  • sloc: makefile: 2
file content (99 lines) | stat: -rw-r--r-- 2,500 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
## A simple string class.

String <-
function(x)
{
    .String_from_string(as.character(x)[[1L]])
}

## Note subscripting by [[: this insists on the first element, and
## hence gives an error instead of NA_character_ if there is none.

as.String <-
function(x)
    UseMethod("as.String")

as.String.String <- identity

as.String.default <-
function(x)
    String(paste(x, collapse = "\n"))

is.String <-
function(x)
    inherits(x, "String")

print.String <-
function(x, ...)
    writeLines(x)

## Provide a '[' method performing slicing (as we cannot provide S3
## methods for substr, and clearly substrings of strings should be
## strings.

## Note that we have no distinction between spans and span arrays (same
## issue as having no distinction between strings and string arrays in
## base R).  Hence, we take spans to always operate in an array context
## (for now: we could add a drop = FALSE argument to have subscripting
## turn character vectors of length one back to strings again).

`[.String` <-
function(x, i, j)
{
    mysubstring <- function(x, i, j) {
        ## substring() recycles to max length of args only when this is
        ## positive.
        if(!length(i))
            character()
        else
            substring(x, i, j)
    }

    if(missing(j)) {
        if(is.Span(i))
            return(mysubstring(x, i$start, i$end))
        if(is.list(i)) {
            if(!length(i))
                return(list())
            else if(all(vapply(i, is.Span, NA))) 
                return(lapply(i,
                              function(e)
                                  mysubstring(x, e$start, e$end)))
        }
    }
    ## Regular slicing operators in a scalar context.
    String(substr(x, i, j))
}

## More methods?
##
## A popular mailing list discussion item is to use a Java style '+'
## operator for concatenating strings (not uniformly liked as the
## corresponding operation is not commutative):

`+.String` <-
function(e1, e2)
    .String_from_string(paste0(as.String(e1), as.String(e2)))

## Also provide Python-style string repetition.

`*.String` <-
function(e1, e2)
{
    if(is.numeric(e1) && (length(e1) == 1L))
        .String_from_string(paste(rep.int(e2, e1), collapse = ""))
    else if(is.numeric(e2) && (length(e2) == 1L))
        .String_from_string(paste(rep.int(e1, e2), collapse = ""))
    else
        stop("Invalid operands.")
}

## What about c.String?

.String_from_string <-
function(x)
{
    y <- enc2utf8(x)
    class(y) <- "String"
    y
}