File: CFCalendar.R

package info (click to toggle)
r-cran-cftime 1.5.1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,752 kB
  • sloc: sh: 13; makefile: 2
file content (357 lines) | stat: -rw-r--r-- 16,736 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
#' @import R6
NULL

#' @title Basic CF calendar
#'
#' @description This class represents a basic CF calendar. It should not be
#'   instantiated directly; instead, use one of the descendant classes.
#'
#'   This internal class stores the information to represent date and time
#'   values using the CF conventions. An instance is created by the exported
#'   [CFTime] class, which also exposes the relevant properties of this class.
#'
#'   The following calendars are supported:
#'
#' \itemize{
#'   \item [`gregorian\standard`][CFCalendarStandard], the international standard calendar for civil use.
#'   \item [`proleptic_gregorian`][CFCalendarProleptic], the standard calendar but extending before 1582-10-15
#'   when the Gregorian calendar was adopted.
#'   \item [`tai`][CFCalendarTAI], International Atomic Time clock with dates expressed using the Gregorian calendar.
#'   \item [`utc`][CFCalendarUTC], Coordinated Universal Time clock with dates expressed using the Gregorian calendar.
#'   \item [`julian`][CFCalendarJulian], every fourth year is a leap year (so including the years 1700, 1800, 1900, 2100, etc).
#'   \item [`noleap\365_day`][CFCalendar365], all years have 365 days.
#'   \item [`all_leap\366_day`][CFCalendar366], all years have 366 days.
#'   \item [`360_day`][CFCalendar360], all years have 360 days, divided over 12 months of 30 days.
#' }
#' @references
#'   https://cfconventions.org/Data/cf-conventions/cf-conventions-1.12/cf-conventions.html#calendar
#' @docType class
CFCalendar <- R6::R6Class("CFCalendar",
  public = list(
    #' @field name Descriptive name of the calendar, as per the CF Metadata
    #' Conventions.
    name = "",

    #' @field definition The string that defines the units and the origin, as
    #' per the CF Metadata Conventions.
    definition = "",

    #' @field unit The numeric id of the unit of the calendar.
    unit = -1L,

    #' @field origin `data.frame` with fields for the origin of the calendar.
    origin = data.frame(),

    #' @description Create a new CF calendar.
    #' @param nm The name of the calendar. This must follow the CF Metadata
    #' Conventions.
    #' @param definition The string that defines the units and the origin, as
    #' per the CF Metadata Conventions.
    initialize = function(nm, definition) {
      stopifnot(length(definition) ==  1L, length(nm) == 1L)
      self$name <- tolower(nm)
      self$definition <- definition

      parts <- strsplit(definition, " ")[[1L]]
      if ((length(parts) < 3L) || !(tolower(parts[2L]) %in% c("since", "after", "from", "ref", "per")))
        stop("Definition string does not appear to be a CF-compliant time coordinate description", call. = FALSE)
      u <- which(CFt$CFunits$unit == tolower(parts[1L]))
      if (length(u) == 0L) stop("Unsupported unit: ", parts[1L], call. = FALSE)
      self$unit <- CFt$CFunits$id[u]

      dt <- self$parse(paste(parts[3L:length(parts)], collapse = " "))
      if (is.na(dt$year[1L]))
        stop("Definition string does not appear to be a CF-compliant time coordinate description: invalid base date specification", call. = FALSE)
      self$origin <- dt
    },

    #' @description Print information about the calendar to the console.
    #' @param ... Ignored.
    #' @return `self`, invisibly.
    print = function(...) {
      tz <- self$timezone
      if (tz == "+0000") tz <- ""
      cat("CF calendar:",
          "\n  Origin  : ", self$origin_date, " ", self$origin_time, tz,
          "\n  Units   : ", CFt$units$name[self$unit],
          "\n  Type    : ", self$name, "\n",
          sep = "")
      invisible(self)
    },

    #' @description Indicate which of the supplied dates are valid.
    #' @param ymd `data.frame` with dates parsed into their parts in columns
    #'   `year`, `month` and `day`. Any other columns are disregarded.
    #' @return `NULL`. A warning will be generated to the effect that a
    #' descendant class should be used for this method.
    valid_days = function(ymd) {
      warning("Use a descendant class from `CFCalendar` to call this method.", call. = FALSE) # nocov
      NULL                                                                                    # nocov
    },

    #' @description Indicate if the time series described using this calendar
    #'   can be safely converted to a standard date-time type (`POSIXct`,
    #'   `POSIXlt`, `Date`).
    #'
    #'   Only the 'standard' calendar and the 'proleptic_gregorian' calendar
    #'   when all dates in the time series are more recent than 1582-10-15
    #'   (inclusive) can be safely converted, so this method returns `FALSE` by
    #'   default to cover the majority of cases.
    #' @param offsets The offsets from the CFtime instance.
    #' @return `FALSE` by default.
    POSIX_compatible = function(offsets) {
      FALSE
    },

    #' @description This method tests if the `CFCalendar` instance in argument
    #'   `cal` is compatible with `self`, meaning that they are of the same
    #'   class and have the same unit. Calendars "standard", and "gregorian" are
    #'   compatible, as are the pairs of "365_day" and "no_leap", and "366_day"
    #'   and "all_leap".
    #' @param cal Instance of a descendant of the `CFCalendar` class.
    #' @return `TRUE` if the instance in argument `cal` is compatible with
    #'   `self`, `FALSE` otherwise.
    is_compatible = function(cal) {
      self$unit == cal$unit && class(self)[1L] == class(cal)[1L]
    },

    #' @description This method tests if the `CFCalendar` instance in argument
    #'   `cal` is equivalent to `self`, meaning that they are of the same class,
    #'   have the same unit, and equivalent origins. Calendars "standard", and
    #'   "gregorian" are equivalent, as are the pairs of "365_day" and
    #'   "no_leap", and "366_day" and "all_leap".
    #'
    #'   Note that the origins need not be identical, but their parsed values
    #'   have to be. "2000-01" is parsed the same as "2000-01-01 00:00:00", for
    #'   instance.
    #' @param cal Instance of a descendant of the `CFCalendar` class.
    #' @return `TRUE` if the instance in argument `cal` is equivalent to
    #'   `self`, `FALSE` otherwise.
    is_equivalent = function(cal) {
      sum(self$origin[1L,1L:6L] == cal$origin[1L,1L:6L]) == 6L &&  # Offset column is NA
      self$is_compatible(cal)
    },

    #' @description Parsing a vector of date-time character strings into parts.
    #' @param d character. A character vector of date-times.
    #' @return A `data.frame` with columns year, month, day, hour, minute,
    #'   second, time zone, and offset. Invalid input data will appear as `NA`.
    parse = function(d) {
      # Parsers

      # UDUNITS broken timestamp definition, with some changes
      # broken_timestamp	{broken_date}({space|T}+{broken_clock})? -- T not in definition but present in lexer code
      # broken_date		    {year}-{month}(-{day})?
      # year		        	[+-]?[0-9]{1,4}
      # month			        0?[1-9]|1[0-2]
      # day			          0?[1-9]|[1-2][0-9]|30|31
      # broken_clock		  {hour}:{minute}(:{second})?
      # hour			        [0-1]?[0-9]|2[0-3]      -- sign on hour not allowed, but see timezone
      # minute			      [0-5]?[0-9]
      # second		      	{minute}?               -- leap second not supported
      # fractional part   (\.[0-9]*)?
      # timezone          [+-]?{hour}(:{minute})? -- added, present in lexer code
      broken <- paste0(
        "^",                                    # anchor string at start
        "([+-]?[0-9]{1,4})",                    # year, with optional sign
        "-(0?[1-9]|1[012])",                    # month
        "(?:-(0?[1-9]|[12][0-9]|3[01]))?",      # day, optional
        "(?:[T ]",                              # if a time is following, separate with a single whitespace character or a "T"
        "([01]?[0-9]|2[0-3])",                  # hour
        ":([0-5]?[0-9])",                       # minute
        "(?::([0-5]?[0-9]))?",                  # second, optional
        "(?:\\.([0-9]*))?",                     # optional fractional part of the smallest specified unit
        ")?",                                   # close optional time capture group
        "(?:\\s",                               # if a time zone offset is following, separate with a single whitespace character
        "([+-])?([01]?[0-9]|2[0-3])",           # tz hour, with optional sign
        "(?::(00|15|30|45))?",                  # optional tz minute, only 4 possible values
        ")?",                                   # close optional timezone capture group
        "$"                                     # anchor string at end
      )

      iso8601 <- paste0(
        "^",
        "([0-9]{4})",
        "-(0[1-9]|1[012])",
        "-(0[1-9]|[12][0-9]|3[01])?",
        "(?:",
        "[T ]([01][0-9]|2[0-3])",
        "(?::([0-5][0-9]))?",
        "(?::([0-5][0-9]))?",
        "(?:[\\.,]([0-9]*))?",
        ")?",
        "(?:([Z+-])([01][0-9]|2[0-3])?(?::(00|15|30|45))?", ## FIXME: Z?, smaller number of captures
        ")?$"
      )

      # UDUNITS packed timestamp definition - NOT YET USED
      # packed_timestamp	{packed_date}({space|T}+{packed_clock})?  -- T and space only allowed in packed time follows
      # packed_date	    	{year}({month}{day}?)?           -- must be YYYYMMDD or else format is ambiguous, as per lexer code
      # packed_clock		  {hour}({minute}{second}?)?       -- must be HHMMSS to be unambiguous
      # timezone          [+-]?{hour}({minute})?           -- added, present in lexer code, must be HHMM
      # packed <- stringi::stri_join(
      #   "^",                                    # anchor string at start
      #   "([+-]?[0-9]{4})",                      # year, with optional sign
      #   "(0[1-9]|1[012])?",                     # month, optional
      #   "(0[1-9]|[12][0-9]|3[01])?",            # day, optional
      #   "(?:[T,\\s]",                           # if a time is following, separate with a single whitespace character or a "T"
      #     "([01][0-9]|2[0-3])?",                # hour
      #     "([0-5][0-9])?",                      # minute, optional
      #     "([0-5]?[0-9](?:\\.[0-9]*)?)?",       # second, optional, with optional fractional part
      #   ")?",                                   # close optional time capture group
      #   "(?:\\s",                               # if a time zone offset is following, separate with a single whitespace character
      #     "([+-]?[01][0-9]|2[0-3])?",           # hour, with optional sign
      #     "(00|15|30|45)?",                     # minute, only 4 possible values
      #   ")?",                                   # close optional timezone capture group
      #   "$"                                     # anchor string at end
      # )

      parse <- data.frame(year = integer(), month = integer(), day = integer(),
                          hour = integer(), minute = integer(), second = numeric(), frac = character(),
                          tz_sign = character(), tz_hour = character(), tz_min = character())

      # Drop "UTC", if given
      d <- trimws(gsub("UTC$", "", d))

      cap <- utils::strcapture(iso8601, d, parse)
      missing <- which(is.na(cap$year))
      if (length(missing) > 0L)
        cap[missing,] <- utils::strcapture(broken, d[missing], parse)

      # Assign any fraction to the appropriate time part
      cap$frac[is.na(cap$frac)] <- "0"
      frac <- as.numeric(paste0("0.", cap$frac))
      if (sum(frac) > 0) {
        ndx <- which(!(is.na(cap$second)) & frac > 0)
        if (length(ndx) > 0L) cap$second[ndx] <- cap$second[ndx] + frac[ndx]
        ndx <- which(!(is.na(cap$minute)) & is.na(cap$second) & frac > 0)
        if (length(ndx) > 0L) cap$second[ndx] <- 60L * frac[ndx]
        ndx <- which(!(is.na(cap$hour)) & is.na(cap$minute) & frac > 0)
        if (length(ndx) > 0L) {
          secs <- 3600 * frac
          cap$minute[ndx] <- secs[ndx] %/% 60
          cap$second[ndx] <- secs[ndx] %% 60
        }
      }
      cap$frac <- NULL

      # Convert NA time parts to 0 - in CF default time is 00:00:00 when not specified
      cap$hour[is.na(cap$hour)] <- 0L
      cap$minute[is.na(cap$minute)] <- 0L
      cap$second[is.na(cap$second)] <- 0L

      # Set timezone to default value where needed
      ndx <- which(cap$tz_sign == "Z")
      if (length(ndx) > 0L) {
        cap$tz_sign[ndx] <- "+"
        cap$tz_hour[ndx] <- "00"
        cap$tz_min[ndx] <- "00"
      }
      cap$tz <- paste0(ifelse(cap$tz_sign == "", "+", cap$tz_sign),
                       ifelse(cap$tz_hour == "", "00", cap$tz_hour),
                       ifelse(cap$tz_min == "", "00", cap$tz_min))
      cap$tz <- ifelse(cap$tz =="NANANA", "+0000", cap$tz)
      cap$tz_sign <- cap$tz_hour <- cap$tz_min <- NULL

      # Set optional date parts to 1 if not specified
      cap$month[is.na(cap$month)] <- 1L
      cap$day[is.na(cap$day)] <- 1L

      # Check date validity
      invalid <- !self$valid_days(cap)
      invalid[is.na(invalid)] <- TRUE
      if (sum(invalid) > 0L) cap[invalid,] <- rep(NA, 7)

      # Calculate offsets
      if (nrow(self$origin) == 0L) {        # if there's no origin yet, don't calculate offsets
        cap$offset <- rep(0, nrow(cap))     # this happens, f.i., when a CFCalendar is created
      } else {
        days <- self$date2offset(cap)
        cap$offset <- round((days * 86400 + (cap$hour - self$origin$hour[1]) * 3600 +
                             (cap$minute - self$origin$minute[1]) * 60 +
                             cap$second - self$origin$second) / CFt$units$seconds[self$unit], 6)
      }
      cap
    },

    #' @description Decompose a vector of offsets, in units of the calendar, to
    #'   their timestamp values. This adds a specified amount of time to the
    #'   origin of a `CFTime` object.
    #'
    #'   This method may introduce inaccuracies where the calendar unit is
    #'   "months" or "years", due to the ambiguous definition of these units.
    #' @param offsets Vector of numeric offsets to add to the origin of the
    #'   calendar.
    #' @return A `data.frame` with columns for the timestamp elements and as
    #'   many rows as there are offsets.
    offsets2time = function(offsets = NULL) {
      if(is.null(offsets) || (len <- length(offsets)) == 0L)
        return(data.frame(year = integer(), month = integer(), day = integer(),
                          hour = integer(), minute = integer(), second = numeric(),
                          tz = character(), offset = numeric()))

      if (self$unit <= 4L) { # Days, hours, minutes, seconds
        # First add time: convert to seconds first, then recompute time parts
        secs <- offsets * CFt$units$seconds[self$unit] +
                self$origin$hour * 3600 + self$origin$minute * 60 + self$origin$second
        days <- secs %/% 86400L            # overflow days
        secs <- round(secs %% 86400L, 3L)  # drop overflow days from time, round down to milli-seconds to avoid errors

        # Time elements for output
        hrs <- secs %/% 3600L
        mins <- (secs %% 3600L) %/% 60L
        secs <- secs %% 60L

        # Now add days using the calendar
        out <- if (any(days != 0L))
          self$offset2date(days)
        else
          data.frame(year = rep(self$origin$year, len),
                     month = rep(self$origin$month, len),
                     day = rep(self$origin$day, len))

        # Put it all back together again
        out$hour <- hrs
        out$minute <- mins
        out$second <- secs
        out$tz <- rep(self$timezone, len)
      } else { # Months, years
        out <- self$origin[rep(1L, len), ]
        if (self$unit == 5L) { # Offsets are months
          months <- out$month + offsets - 1L
          out$month <- months %% 12L + 1L
          out$year <- out$year + months %/% 12L
        } else {               # Offsets are years
          out$year <- out$year + offsets
        }
      }
      out$offset <- offsets
      out
    }

  ),
  active = list(
    #' @field origin_date (read-only) Character string with the date of the
    #'   calendar.
    origin_date = function(value) {
      if (missing(value)) {
        sprintf("%04d-%02d-%02d", self$origin$year, self$origin$month, self$origin$day)
      }
    },

    #' @field origin_time (read-only) Character string with the time of the
    #'   calendar.
    origin_time = function(value) {
      if (missing(value)) {
        .format_time(self$origin)
      }
    },

    #' @field timezone (read-only) Character string with the time zone of the
    #' origin of the calendar.
    timezone = function(value) {
      if (missing(value))
        self$origin$tz
    }
  )
)