1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333
|
module Mtx::IANALanguageSubtagRegistry
@@list_cpp_content = <<EOERB
/*
mkvmerge -- utility for splicing together matroska files
from component media subtypes
Distributed under the GPL v2
see the file COPYING for details
or visit https://www.gnu.org/licenses/old-licenses/gpl-2.0.html
IANA language subtag registry
Written by Moritz Bunkus <moritz@bunkus.org>.
*/
// ----------------------------------------------------------------------------------------------
// NOTE: this file is auto-generated by the "dev:iana_language_subtag_registry_list" rake target.
// ----------------------------------------------------------------------------------------------
#include "common/common_pch.h"
#include "common/bcp47.h"
#include "common/iana_language_subtag_registry.h"
namespace mtx::iana::language_subtag_registry {
std::vector<entry_t> g_extlangs, g_variants, g_grandfathered;
std::vector<std::pair<mtx::bcp47::language_c, mtx::bcp47::language_c>> g_preferred_values;
std::unordered_map<std::string, std::string> g_suppress_scripts;
using VS = std::vector<std::string>;
struct extlang_variant_init_t {
char const *code, *description;
char const *prefixes[<%= content_of[:max_num_prefixes] + 1 %>];
bool is_deprecated;
};
struct suppress_script_init_t {
char const *first, *second;
};
struct preferred_values_init_t {
struct sub_t {
char const *tag, *region, *variant;
mtx::bcp47::language_c parse() const;
};
sub_t from, to;
};
mtx::bcp47::language_c
preferred_values_init_t::sub_t::parse()
const {
auto language = tag ? mtx::bcp47::language_c::parse(tag) : mtx::bcp47::language_c{};
if (region)
language.set_region(region);
if (variant)
language.set_variants({ variant });
if (!tag)
language.set_valid(true);
return language;
}
static extlang_variant_init_t s_extlangs_init[] = {
<%= content_of[:extlangs_init] %>
};
static extlang_variant_init_t s_variants_init[] = {
<%= content_of[:variants_init] %>
};
static extlang_variant_init_t s_grandfathered_init[] = {
<%= content_of[:grandfathered_init] %>
};
static suppress_script_init_t s_suppress_scripts_init[] = {
<%= content_of[:suppress_scripts_init] %>
};
static preferred_values_init_t s_preferred_values_init[] = {
<%= content_of[:preferred_values_init] %>
};
void
init() {
g_extlangs.reserve(<%= content_of[:num_extlangs] %>);
for (auto const *extlang = s_extlangs_init, *end = extlang + <%= content_of[:num_extlangs] %>; extlang < end; ++extlang) {
g_extlangs.emplace_back(extlang->code, extlang->description, extlang->is_deprecated);
auto &new_extlang = g_extlangs.back();
for (auto prefix = extlang->prefixes; *prefix; ++prefix)
new_extlang.prefixes.emplace_back(*prefix);
}
g_variants.reserve(<%= content_of[:num_variants] %>);
for (auto const *variant = s_variants_init, *end = variant + <%= content_of[:num_variants] %>; variant < end; ++variant) {
g_variants.emplace_back(variant->code, variant->description, variant->is_deprecated);
auto &new_variant = g_variants.back();
for (auto prefix = variant->prefixes; *prefix; ++prefix)
new_variant.prefixes.emplace_back(*prefix);
}
g_suppress_scripts.reserve(<%= content_of[:num_suppress_scripts] %>);
for (auto const *suppress_script = s_suppress_scripts_init, *end = suppress_script + <%= content_of[:num_suppress_scripts] %>; suppress_script < end; ++suppress_script)
g_suppress_scripts.insert_or_assign(suppress_script->first, suppress_script->second);
g_grandfathered.reserve(<%= content_of[:num_grandfathered] %>);
for (auto const *grandfathered = s_grandfathered_init, *end = grandfathered + <%= content_of[:num_grandfathered] %>; grandfathered < end; ++grandfathered)
g_grandfathered.emplace_back(grandfathered->code, grandfathered->description, grandfathered->is_deprecated);
}
void
init_preferred_values() {
mtx::bcp47::language_c::set_normalization_mode(mtx::bcp47::normalization_mode_e::none);
g_preferred_values.reserve(<%= content_of[:num_preferred_values] %>);
for (auto const *preferred_value = s_preferred_values_init, *end = preferred_value + <%= content_of[:num_preferred_values] %>; preferred_value < end; ++preferred_value)
g_preferred_values.emplace_back(preferred_value->from.parse(), preferred_value->to.parse());
mtx::bcp47::language_c::set_normalization_mode(mtx::bcp47::normalization_mode_e::default_mode);
}
} // namespace mtx::iana::language_subtag_registry
EOERB
@@registry_mutex = Mutex.new
@@registry = nil
def self.fetch_registry
@@registry_mutex.synchronize {
return @@registry if @@registry
shorten_description_for = %w{1959acad abl1943 ao1990 colb1945}
@@registry = {}
entry = {}
process = lambda do
type = entry[:type]
if shorten_description_for.include? entry[:subtag]
entry[:description].gsub!(%r{ +\(.*?\)}, '')
end
if type
@@registry[type] ||= []
@@registry[type] << entry
end
entry = {}
end
current_sym = nil
Mtx::OnlineFile.download("https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry").
split(%r{\n+}).
map(&:chomp).
each do |line|
if line == '%%'
process.call
current_sym = nil
elsif %r{^Prefix: *(.+)}i.match(line)
entry[:prefix] ||= []
entry[:prefix] << $1
current_sym = nil
elsif %r{^(.*?): *(.+)}i.match(line)
key, value = $1, $2
current_sym = key.downcase.gsub(%r{-}, '_').to_sym
entry[current_sym] = value
elsif %r{^ +(.+)}.match(line) && current_sym
entry[current_sym] += " #{$1}"
end
end
process.call
}
return @@registry
end
def self.fetch_isdcf_languages
JSON.parse(Mtx::OnlineFile.download("https://registry.isdcf.com/languages", "isdcf_languages"))["data"]
end
def self.format_one_extlang_variant entry
if entry[:prefix]
prefix = '{ ' + entry[:prefix].sort.map(&:to_c_string).join(', ') + ', NULL }'
else
prefix = '{ NULL }'
end
[ entry[:subtag].downcase.to_c_string,
entry[:description].to_u8_c_string,
prefix,
entry.key?(:deprecated).to_s,
]
end
def self.format_extlangs_variants entries, type, name
rows = entries[type].map { |entry| self.format_one_extlang_variant entry }
return entries[type].size, format_table(rows.sort, :column_suffix => ',', :row_prefix => " { ", :row_suffix => " },").join("\n")
end
def self.format_one_grandfathered entry
[ entry[:tag].to_c_string,
entry[:description].to_u8_c_string,
'{ NULL }',
'true',
]
end
def self.format_grandfathered entries
rows = entries["grandfathered"].map { |entry| self.format_one_grandfathered entry }
return entries["grandfathered"].size, format_table(rows.sort, :column_suffix => ',', :row_prefix => " { ", :row_suffix => " },").join("\n")
end
def self.preferred_value_type_original type, pv
return %r{-}.match(pv) ? :tag : type.to_sym
end
def self.preferred_value_type_target type, pv
return %r{-|^[a-z]{2,3}$}.match(pv) ? :tag : type.to_sym
end
def self.format_one_preferred_value_construction pv_type, pv
pv_str = pv.to_c_string
tag = [:tag, :language].include?(pv_type) ? pv_str : "NULL"
region = :region == pv_type ? pv_str : "NULL"
variant = :variant == pv_type ? pv_str : "NULL"
fail "unknown pv_type #{pv_type}" if !tag && !region && !variant
[ "{ #{tag}", region, variant, "}" ]
end
def self.format_one_preferred_value_target type, pv
pv_type = self.preferred_value_type type, pv
pv_str = pv.to_c_string
tag = [:tag, :language].include?(pv_type) ? pv_str : "NULL"
region = :region == pv_type ? pv_str : "NULL"
variant = :variant == pv_type ? pv_str : "NULL"
fail "unknown pv_type #{pv_type}" if !tag && !region && !variant
[ "{ #{tag}", region, variant, "}" ]
end
def self.format_one_preferred_value entry
return self.format_one_preferred_value_construction(self.preferred_value_type_original(entry[:type], entry[:original_value]), entry[:original_value]) \
+ self.format_one_preferred_value_construction(self.preferred_value_type_target( entry[:type], entry[:preferred_value]), entry[:preferred_value])
end
def self.format_one_preferred_value_isdcf entry
return self.format_one_preferred_value_construction(:tag, entry["dcncTag"]) \
+ self.format_one_preferred_value_construction(:tag, entry["rfc5646Tag"])
end
def self.format_preferred_values entries, isdcf_entries
rows = entries.
values.
map { |v| v.select { |e| e.key?(:preferred_value) } }.
flatten.
map { |e| e[:original_value] = (e.key?(:prefix) ? e[:prefix].first + "-" : "") + (e[:subtag] || e[:tag]); e }.
sort_by { |e| [ 10 - e[:original_value].gsub(%r{[^-]+}, '').length, e[:original_value].downcase ] }.
map { |e| self.format_one_preferred_value e }
rows += isdcf_entries.
select { |e| %r{^Q[A-T][A-Z]$}.match(e["dcncTag"] || "") and !e["rfc5646Tag"].blank? }.
map { |e| self.format_one_preferred_value_isdcf e }
return rows.size, format_table(rows, :column_suffix => ',', :row_prefix => " { ", :row_suffix => " },").join("\n")
end
def self.format_suppress_scripts entries
rows = (entries["language"] + entries["extlang"]).
select { |e| !e[:suppress_script].blank? }.
map { |e| [ e[:tag] || e[:subtag], e[:suppress_script] ] }.
sort.
uniq.
map { |p| p.map(&:to_c_string) }
return rows.size, format_table(rows, :column_suffix => ",", :row_prefix => " { ", :row_suffix => " },").join("\n")
end
def self.calculate_max_num_prefixes entries
%w{extlang variant}.
map { |type| entries[type] }.
flatten.
map { |entry| (entry[:prefix] || []).length }.
max
end
def self.do_create_cpp entries, isdcf_entries
cpp_file_name = "src/common/iana_language_subtag_registry_list.cpp"
content_of = Hash.new
content_of[:max_num_prefixes] = self.calculate_max_num_prefixes(entries)
content_of[:num_extlangs], content_of[:extlangs_init] = self.format_extlangs_variants(entries, "extlang", "extlangs")
content_of[:num_variants], content_of[:variants_init] = self.format_extlangs_variants(entries, "variant", "variants")
content_of[:num_suppress_scripts], content_of[:suppress_scripts_init] = self.format_suppress_scripts(entries)
content_of[:num_grandfathered], content_of[:grandfathered_init] = self.format_grandfathered(entries)
content_of[:num_preferred_values], content_of[:preferred_values_init] = self.format_preferred_values(entries, isdcf_entries)
content = ERB.new(@@list_cpp_content).result(binding)
runq("write", cpp_file_name) { IO.write("#{$source_dir}/#{cpp_file_name}", content); 0 }
end
def self.create_cpp
do_create_cpp(self.fetch_registry, self.fetch_isdcf_languages)
end
end
|