1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153
|
module Stamp
class Translator
# Full list of time zone abbreviations from
# http://en.wikipedia.org/wiki/List_of_time_zone_abbreviations
TIME_ZONE_ABBREVIATIONS = %w{
ACDT ACST ACT ADT AEDT AEST AFT AKDT AKST AMST AMT ART AST AWDT AWST AZOST AZT
BDT BIOT BIT BOT BRT BST BTTCAT CCT CDT CEDT CEST CET CHADT CHAST CHOT ChST CHUT
CIST CIT CKT CLST CLT COST COT CST CT CVT CWST CXT DAVT DDUT DFT EASST EAST EAT
ECT EDT EEDT EEST EET EGST EGT EIT EST FET FJT FKST FKT FNT GALT GAMT GET GFT
GILT GIT GMT GST GYT HADT HAEC HAST HKT HMT HOVT HST ICT IDT IOT IRDT IRKT IRST
IST JST KGT KOST KRAT KST LHST LINT MAGT MART MAWT MDT MET MEST MHT MIST MIT MMT
MSK MST MUT MVT MYT NCT NDT NFT NPT NST NT NUT NZDT NZST OMST ORAT PDT PET PETT
PGT PHOT PHT PKT PMDT PMST PONT PST RET ROTT SAKT SAMT SAST SBT SCT SGT SLT SRT
SST SYOT TAHT THA TFT TJT TKT TLT TMT TOT TVT UCT ULAT UTC UYST UYT UZT VET VLAT
VOLT VOST VUT WAKT WAST WAT WEDT WEST WET WST YAKT YEKT
}
TIMEZONE_REGEXP = /^(#{TIME_ZONE_ABBREVIATIONS.join('|')})$/
MONTHNAMES_REGEXP = /^(#{Date::MONTHNAMES.compact.join('|')})$/i
ABBR_MONTHNAMES_REGEXP = /^(#{Date::ABBR_MONTHNAMES.compact.join('|')})$/i
DAYNAMES_REGEXP = /^(#{Date::DAYNAMES.join('|')})$/i
ABBR_DAYNAMES_REGEXP = /^(#{Date::ABBR_DAYNAMES.join('|')})$/i
ONE_DIGIT_REGEXP = /^\d{1}$/
TWO_DIGIT_REGEXP = /^\d{2}$/
FOUR_DIGIT_REGEXP = /^\d{4}$/
TIME_REGEXP = /(\d{1,2})(:)(\d{2})(\s*)(:)?(\d{2})?(\s*)?([ap]m)?/i
MERIDIAN_LOWER_REGEXP = /^(a|p)m$/
MERIDIAN_UPPER_REGEXP = /^(A|P)M$/
ORDINAL_DAY_REGEXP = /^(\d{1,2})(st|nd|rd|th)$/
# Disambiguate based on value
OBVIOUS_24_HOUR = 13..23
OBVIOUS_DAY = 13..31
OBVIOUS_YEAR = 32..99
TWO_DIGIT_YEAR_EMITTER = Emitters::TwoDigit.new(:year) { |year| year % 100 }
TWO_DIGIT_MONTH_EMITTER = Emitters::TwoDigit.new(:month)
TWO_DIGIT_DAY_EMITTER = Emitters::TwoDigit.new(:day)
HOUR_TO_12_HOUR = lambda { |h| ((h - 1) % 12) + 1 }
def translate(example)
# extract any substrings that look like times, like "23:59" or "8:37 am"
before, time_example, after = example.partition(TIME_REGEXP)
# build emitters from the example date
emitters = Emitters::Composite.new
emitters << build_emitters(before.split(/\b/)) do |token|
date_emitter(token)
end
# build emitters from the example time
unless time_example.empty?
time_parts = time_example.scan(TIME_REGEXP).first
emitters << build_emitters(time_parts) do |token|
time_emitter(token)
end
end
# recursively process any remaining text
emitters << translate(after) unless after.empty?
emitters
end
# Transforms tokens that look like date/time parts to emitter objects.
def build_emitters(tokens)
tokens.map do |token|
yield(token) || Emitters::String.new(token)
end
end
def time_emitter(token)
case token
when MERIDIAN_LOWER_REGEXP
Emitters::AmPm.new
when MERIDIAN_UPPER_REGEXP
Emitters::AmPm.new { |v| v.upcase }
when TWO_DIGIT_REGEXP
Emitters::Ambiguous.new(
two_digit_hour_emitter(token),
Emitters::TwoDigit.new(:min),
Emitters::TwoDigit.new(:sec))
when ONE_DIGIT_REGEXP
# 12-hour clock without leading zero
Emitters::Delegate.new(:hour, &HOUR_TO_12_HOUR)
end
end
def two_digit_hour_emitter(token)
case token.to_i
when OBVIOUS_24_HOUR
# 24-hour clock
Emitters::TwoDigit.new(:hour)
else
# 12-hour clock with leading zero
Emitters::TwoDigit.new(:hour, &HOUR_TO_12_HOUR)
end
end
def date_emitter(token)
case token
when MONTHNAMES_REGEXP
Emitters::Lookup.new(:month, Date::MONTHNAMES)
when ABBR_MONTHNAMES_REGEXP
Emitters::Lookup.new(:month, Date::ABBR_MONTHNAMES)
when DAYNAMES_REGEXP
Emitters::Lookup.new(:wday, Date::DAYNAMES)
when ABBR_DAYNAMES_REGEXP
Emitters::Lookup.new(:wday, Date::ABBR_DAYNAMES)
when TIMEZONE_REGEXP
Emitters::Delegate.new(:zone)
when FOUR_DIGIT_REGEXP
Emitters::Delegate.new(:year)
when ORDINAL_DAY_REGEXP
Emitters::Ordinal.new(:day)
when TWO_DIGIT_REGEXP
value = token.to_i
case value
when OBVIOUS_DAY
TWO_DIGIT_DAY_EMITTER
when OBVIOUS_YEAR
TWO_DIGIT_YEAR_EMITTER
else
Emitters::Ambiguous.new(
TWO_DIGIT_MONTH_EMITTER,
TWO_DIGIT_DAY_EMITTER,
TWO_DIGIT_YEAR_EMITTER)
end
when ONE_DIGIT_REGEXP
Emitters::Ambiguous.new(
Emitters::Delegate.new(:month),
Emitters::Delegate.new(:day))
end
end
end
end
|