File: dbcal.R

package info (click to toggle)
r-cran-readstata13 0.11.0-1
  • links: PTS, VCS
  • area: main
  • in suites: sid, trixie
  • size: 804 kB
  • sloc: cpp: 1,770; ansic: 278; makefile: 2
file content (175 lines) | stat: -rw-r--r-- 6,433 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
#
# Copyright (C) 2014-2025 Jan Marvin Garbuszus and Sebastian Jeworutzki
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
# more details.
#
# You should have received a copy of the GNU General Public License along
# with this program. If not, see <http://www.gnu.org/licenses/>.


#' Parse Stata business calendar files
#'
#' Create conversion table for business calendar dates.
#'
#' @param stbcalfile \emph{stbcal-file} Stata business calendar file created by
#'  Stata.
#' @return Returns a data.frame with two cols:
#' \describe{
#' \item{range:}{The date matching the businessdate. Date format.}
#' \item{buisdays:}{The Stata business calendar day. Integer format.}
#' }
#' @details Stata 12 introduced business calendar format. Business dates are
#' integer numbers in a certain range of days, weeks, months or years. In this
#' range some days are omitted (e.g. weekends or holidays). If a business
#' calendar was created, a stbcal file matching this calendar was created. This
#' file is required to read the business calendar. This parser reads the stbcal-
#' file and returns a data.frame with dates matching business calendar dates.
#'
#' A dta-file containing Stata business dates imported with read.stata13() shows
#' in formats which stdcal file is required (e.g. "%tbsp500" requires
#' sp500.stbcal).
#'
#' Stata allows adding a short description called purpose. This is added as an
#' attribute of the resulting data.frame.
#' @author Jan Marvin Garbuszus \email{jan.garbuszus@@ruhr-uni-bochum.de}
#' @author Sebastian Jeworutzki \email{sebastian.jeworutzki@@ruhr-uni-bochum.de}
#' @examples
#' sp500 <- stbcal(system.file("extdata/sp500.stbcal", package="readstata13"))
#' @importFrom stats complete.cases
#' @export
stbcal <- function(stbcalfile) {

  # Otherwise localised dates will be used.
  lct <- Sys.getlocale("LC_TIME"); Sys.setlocale("LC_TIME", "C")

  # Parse full file
  stbcal <- file(stbcalfile, "rb")
  x <- readLines(stbcal, file.info(stbcalfile)$size)
  close(stbcal)

  # Dateformat can be ymd, ydm, myd, mdy, dym or dmy
  if(any(grepl("dateformat ymd", x)))
    dateformat <- "%Y%b%d"
  if(any(grepl("dateformat ydm", x)))
    dateformat <- "%Y%d%b"
  if(any(grepl("dateformat myd", x)))
    dateformat <- "%b%Y%d"
  if(any(grepl("dateformat mdy", x)))
    dateformat <- "%b%d%Y"
  if(any(grepl("dateformat dym", x)))
    dateformat <- "%b%Y%d"
  if(any(grepl("dateformat dmy", x)))
    dateformat <- "%d%b%Y"

  # Range of stbcal. Range is required, contains start and end.
  rangepos <- grep("range", x)
  range <- x[rangepos]
  range <- strsplit(range, " ")
  rangestart <- range[[1]][2]
  rangestop <- range[[1]][3]
  range <- seq(from= as.Date(rangestart, dateformat),
               to= as.Date(rangestop, dateformat), "days")

  # Centerdate of stbcal. Date that matches 0.
  centerpos <- grep("centerdate", x)
  centerdate <- x[centerpos]
  centerdate <- gsub("centerdate ","",centerdate)
  centerdate <- as.Date(centerdate, dateformat)

  # Omit Dayofweek
  omitdayofweekpos <- grep ("omit dayofweek", x)
  omitdayofweek <- x[omitdayofweekpos]

  # Mo, Tu, We, Th, Fr, Sa, Su
  daysofweek <- weekdays(as.Date(range))

  stbcal <- data.frame(range = range, daysofweek=daysofweek)

  # Weekdays every week
  if (any(grepl("Mo", omitdayofweek)))
    stbcal$daysofweek[stbcal$daysofweek=="Monday"] <- NA
  if (any(grepl("Tu", omitdayofweek)))
    stbcal$daysofweek[stbcal$daysofweek=="Tuesday"] <- NA
  if (any(grepl("We", omitdayofweek)))
    stbcal$daysofweek[stbcal$daysofweek=="Wednesday"] <- NA
  if (any(grepl("Th", omitdayofweek)))
    stbcal$daysofweek[stbcal$daysofweek=="Thursday"] <- NA
  if (any(grepl("Fr", omitdayofweek)))
    stbcal$daysofweek[stbcal$daysofweek=="Friday"] <- NA
  if (any(grepl("Sa", omitdayofweek)))
    stbcal$daysofweek[stbcal$daysofweek=="Saturday"] <- NA
  if (any(grepl("Su", omitdayofweek)))
    stbcal$daysofweek[stbcal$daysofweek=="Sunday"] <- NA

  # Special days to be omitted
  if (any(grepl("omit date", x))) {
    dates <- grep("omit date", x)

    omitdates <- x[dates]
    omitdates <- gsub("omit date ", "", omitdates)
    dates <- as.Date(omitdates, dateformat)

    stbcal$daysofweek[which(stbcal$range%in%dates)] <- NA

    # Keep only wanted days stbcal$daysofweek behalten
    stbcal <- stbcal[complete.cases(stbcal$daysofweek),]
  }

  # In case centerdate is not rangestart:
  stbcal$buisdays <- NA
  stbcal$buisdays[stbcal$range==centerdate] <- 0
  stbcal$buisdays[stbcal$range<centerdate] <- seq(
    from=-length(stbcal$range[stbcal$range<centerdate]),
    to=-1)
  stbcal$buisdays[stbcal$range>centerdate] <- seq(
    from=1,
    to=length(stbcal$range[stbcal$range>centerdate]))

  # Add purpose
  if (any(grepl("purpose", x))) {
    purposepos <- grep("purpose", x)
    purpose <- x[purposepos]
    attr(stbcal, "purpose") <- purpose
  }

  # restore locale
  Sys.setlocale("LC_TIME", lct)

  return(stbcal)
}

#' Convert Stata business calendar dates in readable dates.
#'
#' Convert Stata business calendar dates in readable dates.
#'
#' @param buisdays numeric Vector of business dates
#' @param cal data.frame Conversion table for business calendar dates
#' @param format character String with date format as in \code{\link{as.Date}}
#' @return Returns a vector of readable dates.
#' @author Jan Marvin Garbuszus \email{jan.garbuszus@@ruhr-uni-bochum.de}
#' @author Sebastian Jeworutzki \email{sebastian.jeworutzki@@ruhr-uni-bochum.de}
#' @examples
#' # read business calendar and data
#' sp500 <- stbcal(system.file("extdata/sp500.stbcal", package="readstata13"))
#' dat <- read.dta13(system.file("extdata/statacar.dta", package="readstata13"))
#'
#' # convert dates and check
#' dat$ldatescal2 <- as.caldays(dat$ldate, sp500)
#' all(dat$ldatescal2==dat$ldatescal)
#' @export
as.caldays  <- function(buisdays, cal, format="%Y-%m-%d") {
  rownames(cal) <- cal$buisdays
  dates  <- cal[as.character(buisdays), "range"]

  if(!is.null(format))
    as.Date(dates, format = format)
  return(dates)
}