File: cld2_dynamic_data_tool.cc

package info (click to toggle)
cld2 0.0.0-git20150806-6
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 120,756 kB
  • ctags: 2,721
  • sloc: cpp: 864,716; sh: 255; ansic: 207; makefile: 17
file content (184 lines) | stat: -rw-r--r-- 6,353 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
// Copyright 2014 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <assert.h>
#include <stdio.h>
#include <fstream>
#include <fcntl.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>

#include "cld2_dynamic_data.h"
#include "cld2_dynamic_data_extractor.h"
#include "cld2_dynamic_data_loader.h"
#include "integral_types.h"
#include "cld2tablesummary.h"
#include "utf8statetable.h"
#include "scoreonescriptspan.h"

// We need these in order to set up a real data object to pass around.                              
namespace CLD2 {
  extern const UTF8PropObj cld_generated_CjkUni_obj;
  extern const CLD2TableSummary kCjkCompat_obj;
  extern const CLD2TableSummary kCjkDeltaBi_obj;
  extern const CLD2TableSummary kDistinctBiTable_obj;
  extern const CLD2TableSummary kQuad_obj;
  extern const CLD2TableSummary kQuad_obj2;
  extern const CLD2TableSummary kDeltaOcta_obj;
  extern const CLD2TableSummary kDistinctOcta_obj;
  extern const short kAvgDeltaOctaScore[];
  extern const uint32 kAvgDeltaOctaScoreSize;
  extern const uint32 kCompatTableIndSize;
  extern const uint32 kCjkDeltaBiIndSize;
  extern const uint32 kDistinctBiTableIndSize;
  extern const uint32 kQuadChromeIndSize;
  extern const uint32 kQuadChrome2IndSize;
  extern const uint32 kDeltaOctaIndSize;
  extern const uint32 kDistinctOctaIndSize;
}

int main(int argc, char** argv) {
  if (!CLD2DynamicData::isLittleEndian()) {
    fprintf(stderr, "System is big-endian: currently not supported.\n");
    return -1;
  }
  if (!CLD2DynamicData::coreAssumptionsOk()) {
    fprintf(stderr, "Core assumptions violated, unsafe to continue.\n");
    return -2;
  }

  // Get command-line flags
  int flags = 0;
  bool get_vector = false;
  char* fileName = NULL;
  const char* USAGE = "\
CLD2 Dynamic Data Tool:\n\
Dump, verify or print summaries of scoring tables for CLD2.\n\
\n\
The files output by this tool are suitable for all little-endian platforms,\n\
and should work on both 32- and 64-bit platforms.\n\
\n\
IMPORTANT: The files output by this tool WILL NOT work on big-endian platforms.\n\
\n\
Usage:\n\
  --dump [FILE]     Dump the scoring tables that this tool was linked against\n\
                    to the specified file. The tables are automatically verified\n\
                    after writing, just as if the tool was run again with\n\
                    '--verify'.\n\
  --verify [FILE]   Verify that a given file precisely matches the scoring\n\
                    tables that this tool was linked against. This can be used\n\
                    to verify that a file is compatible.\n\
  --head [FILE]     Print headers from the specified file to stdout.\n\
  --verbose         Be verbose.\n\
";
  int mode = 0; //1=dump, 2=verify, 3=head
  for (int i = 1; i < argc; ++i) {
    if (strcmp(argv[i], "--verbose") == 0) {
      CLD2DynamicDataExtractor::setDebug(1);
      CLD2DynamicData::setDebug(1);
    }
    else if (strcmp(argv[i], "--dump") == 0
              || strcmp(argv[i], "--verify") == 0
              || strcmp(argv[i], "--head") == 0) {

      // set mode flag properly
      if (strcmp(argv[i], "--dump") == 0) mode=1;
      else if (strcmp(argv[i], "--verify") == 0) mode=2;
      else mode=3;
      if (i < argc - 1) {
        fileName = argv[++i];
      } else {
        fprintf(stderr, "Missing file name argument\n\n");
        fprintf(stderr, "%s", USAGE);
        return -1;
      }
    } else if (strcmp(argv[i], "--help") == 0) {
      fprintf(stdout, "%s", USAGE);
      return 0;
    } else {
      fprintf(stderr, "Unsupported option: %s\n\n", argv[i]);
      fprintf(stderr, "%s", USAGE);
      return -1;
    }
  }

  if (mode == 0) {
    fprintf(stderr, "%s", USAGE);
    return -1;
  }

  CLD2::ScoringTables realData = {
    &CLD2::cld_generated_CjkUni_obj,
    &CLD2::kCjkCompat_obj,
    &CLD2::kCjkDeltaBi_obj,
    &CLD2::kDistinctBiTable_obj,
    &CLD2::kQuad_obj,
    &CLD2::kQuad_obj2,
    &CLD2::kDeltaOcta_obj,
    &CLD2::kDistinctOcta_obj,
    CLD2::kAvgDeltaOctaScore,
  };
  const CLD2::uint32 indirectTableSizes[7] = {
    CLD2::kCompatTableIndSize,
    CLD2::kCjkDeltaBiIndSize,
    CLD2::kDistinctBiTableIndSize,
    CLD2::kQuadChromeIndSize,
    CLD2::kQuadChrome2IndSize,
    CLD2::kDeltaOctaIndSize,
    CLD2::kDistinctOctaIndSize
  };
  const CLD2DynamicData::Supplement supplement = {
    CLD2::kAvgDeltaOctaScoreSize,
    indirectTableSizes
  };
  if (mode == 1) { // dump
    CLD2DynamicDataExtractor::writeDataFile(
      static_cast<const CLD2::ScoringTables*>(&realData),
      &supplement,
      fileName);
  } else if (mode == 3) { // head
    CLD2DynamicData::FileHeader* header = CLD2DynamicDataLoader::loadHeaderFromFile(fileName);
    if (header == NULL) {
      fprintf(stderr, "Cannot read header from file: %s\n", fileName);
      return -1;
    }
    CLD2DynamicData::dumpHeader(header);
    delete[] header->tableHeaders;
    delete header;
  }
  
  if (mode == 1 || mode == 2) { // dump || verify (so perform verification)
    void* mmapAddress = NULL;
    uint32_t mmapLength = 0;
    CLD2::ScoringTables* loadedData = CLD2DynamicDataLoader::loadDataFile(fileName, &mmapAddress, &mmapLength);

    if (loadedData == NULL) {
      fprintf(stderr, "Failed to read data file: %s\n", fileName);
      return -1;
    }
    bool result = CLD2DynamicData::verify(
      static_cast<const CLD2::ScoringTables*>(&realData),
      &supplement,
      static_cast<const CLD2::ScoringTables*>(loadedData));
    CLD2DynamicDataLoader::unloadDataFile(&loadedData, &mmapAddress, &mmapLength);
    if (loadedData != NULL || mmapAddress != NULL || mmapLength != 0) {
      fprintf(stderr, "Warning: failed to clean up memory for ScoringTables.\n");
    }
    if (!result) {
      fprintf(stderr, "Verification failed!\n");
      return -1;
    }
  }
}