File: translator.rb

package info (click to toggle)
ruby-stamp 0.6.0-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 220 kB
  • sloc: ruby: 371; makefile: 4
file content (153 lines) | stat: -rw-r--r-- 5,017 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
module Stamp
  class Translator

    # Full list of time zone abbreviations from
    # http://en.wikipedia.org/wiki/List_of_time_zone_abbreviations
    TIME_ZONE_ABBREVIATIONS = %w{
        ACDT ACST ACT ADT AEDT AEST AFT AKDT AKST AMST AMT ART AST AWDT AWST AZOST AZT
        BDT BIOT BIT BOT BRT BST BTTCAT CCT CDT CEDT CEST CET CHADT CHAST CHOT ChST CHUT
        CIST CIT CKT CLST CLT COST COT CST CT CVT CWST CXT DAVT DDUT DFT EASST EAST EAT
        ECT EDT EEDT EEST EET EGST EGT EIT EST FET FJT FKST FKT FNT GALT GAMT GET GFT
        GILT GIT GMT GST GYT HADT HAEC HAST HKT HMT HOVT HST ICT IDT IOT IRDT IRKT IRST
        IST JST KGT KOST KRAT KST LHST LINT MAGT MART MAWT MDT MET MEST MHT MIST MIT MMT
        MSK MST MUT MVT MYT NCT NDT NFT NPT NST NT NUT NZDT NZST OMST ORAT PDT PET PETT
        PGT PHOT PHT PKT PMDT PMST PONT PST RET ROTT SAKT SAMT SAST SBT SCT SGT SLT SRT
        SST SYOT TAHT THA TFT TJT TKT TLT TMT TOT TVT UCT ULAT UTC UYST UYT UZT VET VLAT
        VOLT VOST VUT WAKT WAST WAT WEDT WEST WET WST YAKT YEKT
      }

    TIMEZONE_REGEXP        = /^(#{TIME_ZONE_ABBREVIATIONS.join('|')})$/
    MONTHNAMES_REGEXP      = /^(#{Date::MONTHNAMES.compact.join('|')})$/i
    ABBR_MONTHNAMES_REGEXP = /^(#{Date::ABBR_MONTHNAMES.compact.join('|')})$/i
    DAYNAMES_REGEXP        = /^(#{Date::DAYNAMES.join('|')})$/i
    ABBR_DAYNAMES_REGEXP   = /^(#{Date::ABBR_DAYNAMES.join('|')})$/i

    ONE_DIGIT_REGEXP       = /^\d{1}$/
    TWO_DIGIT_REGEXP       = /^\d{2}$/
    FOUR_DIGIT_REGEXP      = /^\d{4}$/

    TIME_REGEXP            = /(\d{1,2})(:)(\d{2})(\s*)(:)?(\d{2})?(\s*)?([ap]m)?/i

    MERIDIAN_LOWER_REGEXP  = /^(a|p)m$/
    MERIDIAN_UPPER_REGEXP  = /^(A|P)M$/

    ORDINAL_DAY_REGEXP     = /^(\d{1,2})(st|nd|rd|th)$/

    # Disambiguate based on value
    OBVIOUS_24_HOUR        = 13..23
    OBVIOUS_DAY            = 13..31
    OBVIOUS_YEAR           = 32..99

    TWO_DIGIT_YEAR_EMITTER  = Emitters::TwoDigit.new(:year) { |year| year % 100 }
    TWO_DIGIT_MONTH_EMITTER = Emitters::TwoDigit.new(:month)
    TWO_DIGIT_DAY_EMITTER   = Emitters::TwoDigit.new(:day)
    HOUR_TO_12_HOUR         = lambda { |h| ((h - 1) % 12) + 1 }

    def translate(example)
      # extract any substrings that look like times, like "23:59" or "8:37 am"
      before, time_example, after = example.partition(TIME_REGEXP)

      # build emitters from the example date
      emitters = Emitters::Composite.new
      emitters << build_emitters(before.split(/\b/)) do |token|
        date_emitter(token)
      end

      # build emitters from the example time
      unless time_example.empty?
        time_parts = time_example.scan(TIME_REGEXP).first
        emitters << build_emitters(time_parts) do |token|
          time_emitter(token)
        end
      end

      # recursively process any remaining text
      emitters << translate(after) unless after.empty?
      emitters
    end

    # Transforms tokens that look like date/time parts to emitter objects.
    def build_emitters(tokens)
      tokens.map do |token|
        yield(token) || Emitters::String.new(token)
      end
    end

    def time_emitter(token)
      case token
      when MERIDIAN_LOWER_REGEXP
        Emitters::AmPm.new

      when MERIDIAN_UPPER_REGEXP
        Emitters::AmPm.new { |v| v.upcase }

      when TWO_DIGIT_REGEXP
        Emitters::Ambiguous.new(
          two_digit_hour_emitter(token),
          Emitters::TwoDigit.new(:min),
          Emitters::TwoDigit.new(:sec))

      when ONE_DIGIT_REGEXP
        # 12-hour clock without leading zero
        Emitters::Delegate.new(:hour, &HOUR_TO_12_HOUR)
      end
    end

    def two_digit_hour_emitter(token)
      case token.to_i
      when OBVIOUS_24_HOUR
        # 24-hour clock
        Emitters::TwoDigit.new(:hour)
      else
        # 12-hour clock with leading zero
        Emitters::TwoDigit.new(:hour, &HOUR_TO_12_HOUR)
      end
    end

    def date_emitter(token)
      case token
      when MONTHNAMES_REGEXP
        Emitters::Lookup.new(:month, Date::MONTHNAMES)

      when ABBR_MONTHNAMES_REGEXP
        Emitters::Lookup.new(:month, Date::ABBR_MONTHNAMES)

      when DAYNAMES_REGEXP
        Emitters::Lookup.new(:wday, Date::DAYNAMES)

      when ABBR_DAYNAMES_REGEXP
        Emitters::Lookup.new(:wday, Date::ABBR_DAYNAMES)

      when TIMEZONE_REGEXP
        Emitters::Delegate.new(:zone)

      when FOUR_DIGIT_REGEXP
        Emitters::Delegate.new(:year)

      when ORDINAL_DAY_REGEXP
        Emitters::Ordinal.new(:day)

      when TWO_DIGIT_REGEXP
        value = token.to_i

        case value
        when OBVIOUS_DAY
          TWO_DIGIT_DAY_EMITTER
        when OBVIOUS_YEAR
          TWO_DIGIT_YEAR_EMITTER
        else
          Emitters::Ambiguous.new(
            TWO_DIGIT_MONTH_EMITTER,
            TWO_DIGIT_DAY_EMITTER,
            TWO_DIGIT_YEAR_EMITTER)
        end

      when ONE_DIGIT_REGEXP
        Emitters::Ambiguous.new(
          Emitters::Delegate.new(:month),
          Emitters::Delegate.new(:day))
      end
    end

  end
end