File: splitByPattern.R

package info (click to toggle)
r-cran-r.utils 2.5.0-1
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 2,936 kB
  • sloc: sh: 18; makefile: 6
file content (74 lines) | stat: -rwxr-xr-x 2,029 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#########################################################################/**
# @RdocDefault splitByPattern
#
# @title "Splits a single character string by pattern"
# 
# \description{
#   @get "title".  The main difference compared to @see "base::strsplit"
#   is that this method also returns the part of the string that matched
#   the pattern. Also, it only takes a single character string.
# }
#
# @synopsis
#
# \arguments{
#  \item{str}{A single @character string to be split.}
#  \item{pattern}{A regular expression @character string.}
#  \item{...}{Not used.}
# }
#
# \value{
#   Returns a named @character @vector with names equal to \code{"TRUE"}
#   if element is a pattern part and \code{"FALSE"} otherwise.
# }
#
# @examples "../incl/splitByPattern.Rex"
#
# @author
#
# \seealso{
#   Compare to @see "base::strsplit".
# }
#
# @keyword programming
#*/######################################################################### 
setMethodS3("splitByPattern", "default",  function(str, pattern, ...) {
  # Argument 'str':
  str <- Arguments$getCharacter(str);

  # Argument 'pattern':
  pattern <- Arguments$getCharacter(pattern);

  parts <- c();
  while(TRUE) {
    pos <- regexpr(pattern, str);
    if (pos == -1)
      break;
    text <- substring(str, first=1, last=pos-1);        # This is allowed!
    lastPos <- pos+attr(pos, "match.length")-1;
    flag <- substring(str, first=pos, last=lastPos);
    str <- substring(str, first=lastPos+1);
    parts <- c(parts, text, flag);
  }
  if (nchar(str) > 0)
    parts <- c(parts, str);

  # Add indicator if a pattern string or not.
  isPattern <- rep(c(FALSE, TRUE), length.out=length(parts));
  names(parts) <- isPattern;

  if (nchar(parts[1]) == 0)
    parts <- parts[-1];

  parts;
}) # splitByPattern()


############################################################################
# HISTORY: 
# 2005-07-27
# o BUG FIX: Used 'Argument' instead of 'Arguments'.
# 2005-07-06
# o Created.
############################################################################