
|
# encoding: UTF-8
module Stringex
module Localization
module ConversionExpressions
ABBREVIATION = /(\s|\(|^)([[:alpha:]](\.[[:alpha:]])+(\.?)[[:alpha:]]*(\s|\)|$))/
ACCENTED_HTML_ENTITY = /&([A-Za-z])(grave|acute|circ|tilde|uml|ring|cedil|slash);/
APOSTROPHE = /(^|[[:alpha:]])'|`([[:alpha:]]|$)/
CHARACTERS = {
and: /\s*&\s*/,
at: /\s*@\s*/,
degrees: /\s*°\s*/,
divide: /\s*÷\s*/,
dot: /(\S|^)\.(\S)/,
ellipsis: /\s*\.{3,}\s*/,
equals: /\s*=\s*/,
number: /\s*#/,
percent: /\s*%\s*/,
plus: /\s*\+\s*/,
slash: /\s*(\\|\/|/)\s*/,
star: /\s*\*\s*/,
}
# Things that just get converted to spaces
CLEANUP_CHARACTERS = /[\.,:;(){}\[\]\?!\^'ʼ"`~_\|<>]/
CLEANUP_HTML_ENTITIES = /&[^;]+;/
CURRENCIES_SUPPORTED_SIMPLE = {
generic: /¤/,
dollars: /\$/,
euros: /€/,
pounds: /£/,
yen: /¥/,
reais: /R\$/
}
CURRENCIES_SUPPORTED_COMPLEX = {
dollars: :dollars_cents,
euros: :euros_cents,
pounds: :pounds_pence,
reais: :reais_cents
}
CURRENCIES_SUPPORTED = Regexp.new(CURRENCIES_SUPPORTED_SIMPLE.values.join('|'))
CURRENCIES_SIMPLE = CURRENCIES_SUPPORTED_SIMPLE.inject({}) do |hash, content|
key, expression = content
hash[key] = /(?:\s|^)#{expression}(\d*)(?:\s|$)/
hash
end
CURRENCIES_COMPLEX = CURRENCIES_SUPPORTED_SIMPLE.inject({}) do |hash, content|
key, expression = content
# Do we really need to not worry about complex currencies if there are none for the currency?
complex_key = CURRENCIES_SUPPORTED_COMPLEX[key]
if complex_key
hash[complex_key] = /(?:\s|^)#{expression}(\d+)\.(\d+)(?:\s|$)/
end
hash
end
CURRENCIES = CURRENCIES_SIMPLE.merge(CURRENCIES_COMPLEX)
HTML_ENTITIES = Proc.new(){
base = {
amp: %w{#38 amp},
cent: %w{#162 cent},
copy: %w{#169 copy},
deg: %w{#176 deg},
divide: %w{#247 divide},
double_quote: %w{#34 #822[012] quot ldquo rdquo dbquo},
ellipsis: %w{#8230 hellip},
en_dash: %w{#8211 ndash},
em_dash: %w{#8212 mdash},
frac14: %w{#188 frac14},
frac12: %w{#189 frac12},
frac34: %w{#190 frac34},
gt: %w{#62 gt},
lt: %w{#60 lt},
nbsp: %w{#160 nbsp},
pound: %w{#163 pound},
reg: %w{#174 reg},
single_quote: %w{#39 #821[678] apos lsquo rsquo sbquo},
times: %w{#215 times},
trade: %w{#8482 trade},
yen: %w{#165 yen},
}
base.inject({}) do |hash, content|
key, expression = content
hash[key] = /&(#{expression.join('|')});/
hash
end
}.call
HTML_TAG = Proc.new(){
name = /[\w:-]+/
value = /([A-Za-z0-9]+|('[^']*?'|"[^"]*?"))/
attr = /(#{name}(\s*=\s*#{value})?)/
/<[!\/?\[]?(#{name}|--)(\s+(#{attr}(\s+#{attr})*))?\s*([!\/?\]]+|--)?>/
}.call
SMART_PUNCTUATION = {
/(“|”|\302\223|\302\224|\303\222|\303\223)/ => '"',
/(‘|’|\302\221|\302\222|\303\225)/ => "'",
/…/ => "...",
}
UNREADABLE_CONTROL_CHARACTERS = /[[:cntrl:]]/
# Ordered by denominator then numerator of the value
VULGAR_FRACTIONS = {
half: /(½|½|½)/,
one_third: /(⅓|⅓)/,
two_thirds: /(⅔|⅔)/,
one_fourth: /(¼|¼|¼)/,
three_fourths: /(¾|¾|¾)/,
one_fifth: /(⅕|⅕)/,
two_fifths: /(⅖|⅖)/,
three_fifths: /(⅗|⅗)/,
four_fifths: /(⅘|⅘)/,
one_sixth: /(⅙|⅙)/,
five_sixths: /(⅚|⅚)/,
one_eighth: /(⅛|⅛)/,
three_eighths: /(⅜|⅜)/,
five_eighths: /(⅝|⅝)/,
seven_eighths: /(⅞|⅞)/,
}
WHITESPACE = /\s+/
class << self
%w{
abbreviation
accented_html_entity
apostrophe
characters
cleanup_characters
cleanup_html_entities
currencies
currencies_simple
currencies_complex
html_entities
html_tag
smart_punctuation
unreadable_control_characters
vulgar_fractions
whitespace
}.each do |conversion_type|
define_method conversion_type do
const_get conversion_type.upcase
end
end
end
end
end
end
|