1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134
|
// rime_table_decompiler.cc
// nopdan <me@nopdan.com>
//
#include <cmath>
#include <fstream>
#include <iomanip>
#include <ios>
#include <iostream>
#include <string>
#include <rime/dict/table.h>
#include "codepage.h"
// usage:
// rime_table_decompiler <rime-table-file> [save-path]
// example:
// rime_table_decompiler pinyin.table.bin pinyin.dict.yaml
void outCode(rime::Table* table, const rime::Code code, std::ofstream& fout) {
if (code.empty()) {
return;
}
auto item = code.begin();
fout << table->GetSyllableById(*item);
item++;
for (; item != code.end(); ++item) {
fout << " ";
fout << table->GetSyllableById(*item);
}
return;
}
void access(rime::Table* table,
rime::TableAccessor accessor,
std::ofstream& fout) {
while (!accessor.exhausted()) {
auto word = table->GetEntryText(*accessor.entry());
fout << word << "\t";
outCode(table, accessor.code(), fout);
auto weight = accessor.entry()->weight;
if (weight >= 0) {
fout << "\t" << exp(weight);
}
fout << std::endl;
accessor.Next();
}
}
// recursively traverse table
void recursion(rime::Table* table,
rime::TableQuery* query,
std::ofstream& fout) {
for (int i = 0; i < table->metadata()->num_syllables; i++) {
auto accessor = query->Access(i);
access(table, accessor, fout);
if (query->Advance(i)) {
if (query->level() < 3) {
recursion(table, query, fout);
} else {
auto accessor = query->Access(0);
access(table, accessor, fout);
}
query->Backdate();
}
}
}
void traversal(rime::Table* table, std::ofstream& fout) {
auto metadata = table->metadata();
std::cout << "num_syllables: " << metadata->num_syllables << std::endl;
std::cout << "num_entries: " << metadata->num_entries << std::endl;
fout << std::fixed;
fout << std::setprecision(0);
rime::TableQuery query(table->metadata()->index.get());
recursion(table, &query, fout);
}
rime::path InferredOutputPath(rime::path input_path) {
if (input_path.extension() == ".bin") {
input_path.replace_extension();
if (input_path.extension() == ".table") {
return input_path.replace_extension(".dict.yaml");
}
}
return input_path.concat(".yaml");
}
int main(int argc, char* argv[]) {
unsigned int codepage = SetConsoleOutputCodePage();
if (argc < 2 || argc > 3) {
std::cout << "Usage: rime_table_decompiler <rime-table-file> [save-path]"
<< std::endl;
std::cout
<< "Example: rime_table_decompiler pinyin.table.bin pinyin.dict.yaml"
<< std::endl;
SetConsoleOutputCodePage(codepage);
return 0;
}
rime::path file_path(argv[1]);
rime::Table table(file_path);
bool success = table.Load();
if (!success) {
std::cerr << "Failed to load table." << std::endl;
SetConsoleOutputCodePage(codepage);
return 1;
}
rime::path output_path =
(argc == 3) ? rime::path(argv[2]) : InferredOutputPath(file_path);
std::ofstream fout;
fout.open(output_path.c_str());
if (!fout.is_open()) {
std::cerr << "Failed to open file " << output_path << std::endl;
SetConsoleOutputCodePage(codepage);
return 1;
}
// schema id
fout << "# Rime dictionary\n\n";
fout << "---\n"
"name: "
<< file_path.stem().u8string()
<< "\n"
"version: \"1.0\"\n"
"...\n\n";
traversal(&table, fout);
std::cout << "Save to: " << output_path << std::endl;
fout.close();
SetConsoleOutputCodePage(codepage);
return 0;
}
|