1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272
|
"""FrequencyInferer analog for cftime.datetime objects"""
# The infer_freq method and the _CFTimeFrequencyInferer
# subclass defined here were copied and adapted for
# use with cftime.datetime objects based on the source code in
# pandas.tseries.Frequencies._FrequencyInferer
# For reference, here is a copy of the pandas copyright notice:
# (c) 2011-2012, Lambda Foundry, Inc. and PyData Development Team
# All rights reserved.
# Copyright (c) 2008-2011 AQR Capital Management, LLC
# All rights reserved.
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above
# copyright notice, this list of conditions and the following
# disclaimer in the documentation and/or other materials provided
# with the distribution.
# * Neither the name of the copyright holder nor the names of any
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import numpy as np
import pandas as pd
from ..core.common import _contains_datetime_like_objects
from .cftime_offsets import _MONTH_ABBREVIATIONS
from .cftimeindex import CFTimeIndex
_ONE_MICRO = 1
_ONE_MILLI = _ONE_MICRO * 1000
_ONE_SECOND = _ONE_MILLI * 1000
_ONE_MINUTE = 60 * _ONE_SECOND
_ONE_HOUR = 60 * _ONE_MINUTE
_ONE_DAY = 24 * _ONE_HOUR
def infer_freq(index):
"""
Infer the most likely frequency given the input index.
Parameters
----------
index : CFTimeIndex, DataArray, DatetimeIndex, TimedeltaIndex, Series
If not passed a CFTimeIndex, this simply calls `pandas.infer_freq`.
If passed a Series or a DataArray will use the values of the series (NOT THE INDEX).
Returns
-------
str or None
None if no discernible frequency.
Raises
------
TypeError
If the index is not datetime-like.
ValueError
If there are fewer than three values or the index is not 1D.
"""
from xarray.core.dataarray import DataArray
if isinstance(index, (DataArray, pd.Series)):
if index.ndim != 1:
raise ValueError("'index' must be 1D")
elif not _contains_datetime_like_objects(DataArray(index)):
raise ValueError("'index' must contain datetime-like objects")
dtype = np.asarray(index).dtype
if dtype == "datetime64[ns]":
index = pd.DatetimeIndex(index.values)
elif dtype == "timedelta64[ns]":
index = pd.TimedeltaIndex(index.values)
else:
index = CFTimeIndex(index.values)
if isinstance(index, CFTimeIndex):
inferer = _CFTimeFrequencyInferer(index)
return inferer.get_freq()
return pd.infer_freq(index)
class _CFTimeFrequencyInferer: # (pd.tseries.frequencies._FrequencyInferer):
def __init__(self, index):
self.index = index
self.values = index.asi8
if len(index) < 3:
raise ValueError("Need at least 3 dates to infer frequency")
self.is_monotonic = (
self.index.is_monotonic_decreasing or self.index.is_monotonic_increasing
)
self._deltas = None
self._year_deltas = None
self._month_deltas = None
def get_freq(self):
"""Find the appropriate frequency string to describe the inferred frequency of self.index
Adapted from `pandas.tsseries.frequencies._FrequencyInferer.get_freq` for CFTimeIndexes.
Returns
-------
str or None
"""
if not self.is_monotonic or not self.index.is_unique:
return None
delta = self.deltas[0] # Smallest delta
if _is_multiple(delta, _ONE_DAY):
return self._infer_daily_rule()
# There is no possible intraday frequency with a non-unique delta
# Different from pandas: we don't need to manage DST and business offsets in cftime
elif not len(self.deltas) == 1:
return None
if _is_multiple(delta, _ONE_HOUR):
return _maybe_add_count("H", delta / _ONE_HOUR)
elif _is_multiple(delta, _ONE_MINUTE):
return _maybe_add_count("T", delta / _ONE_MINUTE)
elif _is_multiple(delta, _ONE_SECOND):
return _maybe_add_count("S", delta / _ONE_SECOND)
elif _is_multiple(delta, _ONE_MILLI):
return _maybe_add_count("L", delta / _ONE_MILLI)
else:
return _maybe_add_count("U", delta / _ONE_MICRO)
def _infer_daily_rule(self):
annual_rule = self._get_annual_rule()
if annual_rule:
nyears = self.year_deltas[0]
month = _MONTH_ABBREVIATIONS[self.index[0].month]
alias = f"{annual_rule}-{month}"
return _maybe_add_count(alias, nyears)
quartely_rule = self._get_quartely_rule()
if quartely_rule:
nquarters = self.month_deltas[0] / 3
mod_dict = {0: 12, 2: 11, 1: 10}
month = _MONTH_ABBREVIATIONS[mod_dict[self.index[0].month % 3]]
alias = f"{quartely_rule}-{month}"
return _maybe_add_count(alias, nquarters)
monthly_rule = self._get_monthly_rule()
if monthly_rule:
return _maybe_add_count(monthly_rule, self.month_deltas[0])
if len(self.deltas) == 1:
# Daily as there is no "Weekly" offsets with CFTime
days = self.deltas[0] / _ONE_DAY
return _maybe_add_count("D", days)
# CFTime has no business freq and no "week of month" (WOM)
return None
def _get_annual_rule(self):
if len(self.year_deltas) > 1:
return None
if len(np.unique(self.index.month)) > 1:
return None
return {"cs": "AS", "ce": "A"}.get(month_anchor_check(self.index))
def _get_quartely_rule(self):
if len(self.month_deltas) > 1:
return None
if not self.month_deltas[0] % 3 == 0:
return None
return {"cs": "QS", "ce": "Q"}.get(month_anchor_check(self.index))
def _get_monthly_rule(self):
if len(self.month_deltas) > 1:
return None
return {"cs": "MS", "ce": "M"}.get(month_anchor_check(self.index))
@property
def deltas(self):
"""Sorted unique timedeltas as microseconds."""
if self._deltas is None:
self._deltas = _unique_deltas(self.values)
return self._deltas
@property
def year_deltas(self):
"""Sorted unique year deltas."""
if self._year_deltas is None:
self._year_deltas = _unique_deltas(self.index.year)
return self._year_deltas
@property
def month_deltas(self):
"""Sorted unique month deltas."""
if self._month_deltas is None:
self._month_deltas = _unique_deltas(self.index.year * 12 + self.index.month)
return self._month_deltas
def _unique_deltas(arr):
"""Sorted unique deltas of numpy array"""
return np.sort(np.unique(np.diff(arr)))
def _is_multiple(us, mult: int):
"""Whether us is a multiple of mult"""
return us % mult == 0
def _maybe_add_count(base: str, count: float):
"""If count is greater than 1, add it to the base offset string"""
if count != 1:
assert count == int(count)
count = int(count)
return f"{count}{base}"
else:
return base
def month_anchor_check(dates):
"""Return the monthly offset string.
Return "cs" if all dates are the first days of the month,
"ce" if all dates are the last day of the month,
None otherwise.
Replicated pandas._libs.tslibs.resolution.month_position_check
but without business offset handling.
"""
calendar_end = True
calendar_start = True
for date in dates:
if calendar_start:
calendar_start &= date.day == 1
if calendar_end:
cal = date.day == date.daysinmonth
if calendar_end:
calendar_end &= cal
elif not calendar_start:
break
if calendar_end:
return "ce"
elif calendar_start:
return "cs"
else:
return None
|