1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154
|
# encoding: UTF-8
module Stringex
module Localization
module ConversionExpressions
ABBREVIATION = /(\s|\(|^)([[:alpha:]](\.[[:alpha:]])+(\.?)[[:alpha:]]*(\s|\)|$))/
ACCENTED_HTML_ENTITY = /&([A-Za-z])(grave|acute|circ|tilde|uml|ring|cedil|slash);/
APOSTROPHE = /(^|[[:alpha:]])'|`([[:alpha:]]|$)/
CHARACTERS = {
and: /\s*&\s*/,
at: /\s*@\s*/,
degrees: /\s*°\s*/,
divide: /\s*÷\s*/,
dot: /(\S|^)\.(\S)/,
ellipsis: /\s*\.{3,}\s*/,
equals: /\s*=\s*/,
number: /\s*#/,
percent: /\s*%\s*/,
plus: /\s*\+\s*/,
slash: /\s*(\\|\/|/)\s*/,
star: /\s*\*\s*/,
}
# Things that just get converted to spaces
CLEANUP_CHARACTERS = /[\.,:;(){}\[\]\?!\^'ʼ"`~_\|<>]/
CLEANUP_HTML_ENTITIES = /&[^;]+;/
CURRENCIES_SUPPORTED_SIMPLE = {
generic: /¤/,
dollars: /\$/,
euros: /€/,
pounds: /£/,
yen: /¥/,
reais: /R\$/
}
CURRENCIES_SUPPORTED_COMPLEX = {
dollars: :dollars_cents,
euros: :euros_cents,
pounds: :pounds_pence,
reais: :reais_cents
}
CURRENCIES_SUPPORTED = Regexp.new(CURRENCIES_SUPPORTED_SIMPLE.values.join('|'))
CURRENCIES_SIMPLE = CURRENCIES_SUPPORTED_SIMPLE.inject({}) do |hash, content|
key, expression = content
hash[key] = /(?:\s|^)#{expression}(\d*)(?:\s|$)/
hash
end
CURRENCIES_COMPLEX = CURRENCIES_SUPPORTED_SIMPLE.inject({}) do |hash, content|
key, expression = content
# Do we really need to not worry about complex currencies if there are none for the currency?
complex_key = CURRENCIES_SUPPORTED_COMPLEX[key]
if complex_key
hash[complex_key] = /(?:\s|^)#{expression}(\d+)\.(\d+)(?:\s|$)/
end
hash
end
CURRENCIES = CURRENCIES_SIMPLE.merge(CURRENCIES_COMPLEX)
HTML_ENTITIES = Proc.new(){
base = {
amp: %w{#38 amp},
cent: %w{#162 cent},
copy: %w{#169 copy},
deg: %w{#176 deg},
divide: %w{#247 divide},
double_quote: %w{#34 #822[012] quot ldquo rdquo dbquo},
ellipsis: %w{#8230 hellip},
en_dash: %w{#8211 ndash},
em_dash: %w{#8212 mdash},
frac14: %w{#188 frac14},
frac12: %w{#189 frac12},
frac34: %w{#190 frac34},
gt: %w{#62 gt},
lt: %w{#60 lt},
nbsp: %w{#160 nbsp},
pound: %w{#163 pound},
reg: %w{#174 reg},
single_quote: %w{#39 #821[678] apos lsquo rsquo sbquo},
times: %w{#215 times},
trade: %w{#8482 trade},
yen: %w{#165 yen},
}
base.inject({}) do |hash, content|
key, expression = content
hash[key] = /&(#{expression.join('|')});/
hash
end
}.call
HTML_TAG = Proc.new(){
name = /[\w:-]+/
value = /([A-Za-z0-9]+|('[^']*?'|"[^"]*?"))/
attr = /(#{name}(\s*=\s*#{value})?)/
/<[!\/?\[]?(#{name}|--)(\s+(#{attr}(\s+#{attr})*))?\s*([!\/?\]]+|--)?>/
}.call
SMART_PUNCTUATION = {
/(“|”|\302\223|\302\224|\303\222|\303\223)/ => '"',
/(‘|’|\302\221|\302\222|\303\225)/ => "'",
/…/ => "...",
}
UNREADABLE_CONTROL_CHARACTERS = /[[:cntrl:]]/
# Ordered by denominator then numerator of the value
VULGAR_FRACTIONS = {
half: /(½|½|½)/,
one_third: /(⅓|⅓)/,
two_thirds: /(⅔|⅔)/,
one_fourth: /(¼|¼|¼)/,
three_fourths: /(¾|¾|¾)/,
one_fifth: /(⅕|⅕)/,
two_fifths: /(⅖|⅖)/,
three_fifths: /(⅗|⅗)/,
four_fifths: /(⅘|⅘)/,
one_sixth: /(⅙|⅙)/,
five_sixths: /(⅚|⅚)/,
one_eighth: /(⅛|⅛)/,
three_eighths: /(⅜|⅜)/,
five_eighths: /(⅝|⅝)/,
seven_eighths: /(⅞|⅞)/,
}
WHITESPACE = /\s+/
class << self
%w{
abbreviation
accented_html_entity
apostrophe
characters
cleanup_characters
cleanup_html_entities
currencies
currencies_simple
currencies_complex
html_entities
html_tag
smart_punctuation
unreadable_control_characters
vulgar_fractions
whitespace
}.each do |conversion_type|
define_method conversion_type do
const_get conversion_type.upcase
end
end
end
end
end
end
|