File: textlineprojection_test.cc

package info (click to toggle)
tesseract 5.5.0-1
  • links: PTS
  • area: main
  • in suites: forky, sid, trixie
  • size: 43,508 kB
  • sloc: cpp: 154,570; makefile: 1,519; java: 1,143; ansic: 852; sh: 763; python: 51
file content (254 lines) | stat: -rw-r--r-- 10,609 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
// (C) Copyright 2017, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <allheaders.h>
#include <string> // for std::string

#include "include_gunit.h"

#include <tesseract/baseapi.h>
#include <tesseract/osdetect.h>
#include "colfind.h"
#include "log.h" // for LOG
#include "mutableiterator.h"
#include "pageres.h"
#include "tesseractclass.h"
#include "textlineprojection.h"

namespace tesseract {

// Minimum score for a STRONG_CHAIN textline.
// NOTE: Keep in sync with textlineprojection.cc.
const int kMinStrongTextValue = 6;

// The fixture for testing Tesseract.
class TextlineProjectionTest : public testing::Test {
protected:
  std::string OutputNameToPath(const std::string &name) {
    file::MakeTmpdir();
    return file::JoinPath(FLAGS_test_tmpdir, name);
  }

  TextlineProjectionTest() {
    src_pix_ = nullptr;
    bin_pix_ = nullptr;
    finder_ = nullptr;
    denorm_ = nullptr;
    projection_ = nullptr;
  }
  ~TextlineProjectionTest() override {
    src_pix_.destroy();
    bin_pix_.destroy();
    delete finder_;
  }

  void SetImage(const char *filename) {
    src_pix_.destroy();
    src_pix_ = pixRead(file::JoinPath(TESTING_DIR, filename).c_str());
    api_.Init(TESSDATA_DIR, "eng", tesseract::OEM_TESSERACT_ONLY);
    api_.SetPageSegMode(tesseract::PSM_AUTO_OSD);
    api_.SetImage(src_pix_);
  }

  // Ugly hacked-together function sets up projection_ and denorm_ by setting
  // up for auto pagelayout, setting up a ColumnFinder, running it, and
  // using accessors to get at the internal denorm and projection.
  // If the coordinates have been rotated, the denorm should match
  // correctly and transform coordinates back to the projection.
  // We throw away all the blocks, blobs etc, and test the projection with
  // the resultiterator from a separate BaseAPI run.
  void SetupProjection() {
    tesseract::TessdataManager mgr;
    auto osd_tess = std::make_unique<Tesseract>();
    OSResults osr;
    EXPECT_EQ(osd_tess->init_tesseract(TESSDATA_DIR, "", "osd", tesseract::OEM_TESSERACT_ONLY,
                                       nullptr, 0, nullptr, nullptr, false, &mgr),
              0);
    tesseract_ = std::make_unique<Tesseract>();
    EXPECT_EQ(tesseract_->init_tesseract(TESSDATA_DIR, "", "eng", tesseract::OEM_TESSERACT_ONLY,
                                         nullptr, 0, nullptr, nullptr, false, &mgr),
              0);
    bin_pix_ = api_.GetThresholdedImage();
    *tesseract_->mutable_pix_binary() = bin_pix_.clone();
    osd_tess->set_source_resolution(api_.tesseract()->source_resolution());
    tesseract_->set_source_resolution(api_.tesseract()->source_resolution());
    int width = pixGetWidth(bin_pix_);
    int height = pixGetHeight(bin_pix_);
    // First make a single block covering the whole image.
    auto *block = new BLOCK("", true, 0, 0, 0, 0, width, height);
    block->set_right_to_left(false);
    BLOCK_LIST src_blocks;
    BLOCK_IT block_it(&src_blocks);
    block_it.add_to_end(block);
    Image photomask_pix = nullptr;
    // The blocks made by the ColumnFinder. Moved to blocks before return.
    BLOCK_LIST found_blocks;
    TO_BLOCK_LIST temp_blocks;
    finder_ =
        tesseract_->SetupPageSegAndDetectOrientation(tesseract::PSM_AUTO_OSD, &src_blocks, osd_tess.get(),
                                                     &osr, &temp_blocks, &photomask_pix, nullptr);
    TO_BLOCK_IT to_block_it(&temp_blocks);
    TO_BLOCK *to_block = to_block_it.data();
    denorm_ = finder_->denorm();
    TO_BLOCK_LIST to_blocks;
    BLOBNBOX_LIST diacritic_blobs;
    EXPECT_GE(finder_->FindBlocks(tesseract::PSM_AUTO, nullptr, 1, to_block, photomask_pix, nullptr,
                                  nullptr, nullptr, &found_blocks, &diacritic_blobs, &to_blocks),
              0);
    projection_ = finder_->projection();
    photomask_pix.destroy();
  }

  // Helper evaluates the given box, expects the result to be greater_than
  // or !greater_than the target_value and provides diagnostics if not.
  void EvaluateBox(const TBOX &box, bool greater_or_equal, int target_value, const char *text,
                   const char *message) {
    int value = projection_->EvaluateBox(box, denorm_, false);
    if (greater_or_equal != (value > target_value)) {
      LOG(INFO) << "EvaluateBox too " << (greater_or_equal ? "low" : "high")
        << ":" << value << " vs " << target_value << " for " << message << " word '" << text << "' at:";
      box.print();
      value = projection_->EvaluateBox(box, denorm_, true);
    } else {
      LOG(INFO) << "EvaluateBox OK(" << value << ") for " << message << " word '" << text << "'";
    }
    if (greater_or_equal) {
      EXPECT_GE(value, target_value);
    } else {
      EXPECT_LT(value, target_value);
    }
  }

  // Helper evaluates the DistanceOfBoxFromBox function by expecting that
  // box should be nearer to true_box than false_box.
  void EvaluateDistance(const TBOX &box, const TBOX &true_box, const TBOX &false_box,
                        const char *text, const char *message) {
    int true_dist = projection_->DistanceOfBoxFromBox(box, true_box, true, denorm_, false);
    int false_dist = projection_->DistanceOfBoxFromBox(box, false_box, true, denorm_, false);
    if (false_dist <= true_dist) {
      LOG(INFO) << "Distance wrong:" << false_dist << " vs " << true_dist
        << " for " << message << " word '" << text << "' at:";
      true_box.print();
      projection_->DistanceOfBoxFromBox(box, true_box, true, denorm_, true);
      projection_->DistanceOfBoxFromBox(box, false_box, true, denorm_, true);
    } else {
      LOG(INFO) << "Distance OK(" << false_dist << " vs " << true_dist
        << ") for " << message << " word '" << text << "'";
    }
  }

  // Tests the projection on the word boxes of the given image.
  // line_height is the cap + descender size of the text.
  void VerifyBoxes(const char *imagefile, int line_height) {
    SetImage(imagefile);
    api_.Recognize(nullptr);
    SetupProjection();
    MutableIterator *it = api_.GetMutableIterator();
    do {
      char *text = it->GetUTF8Text(tesseract::RIL_WORD);
      const PAGE_RES_IT *pr_it = it->PageResIt();
      WERD_RES *word = pr_it->word();
      // The word_box refers to the internal, possibly rotated, coords.
      TBOX word_box = word->word->bounding_box();
      bool small_word = word_box.height() * 1.5 < line_height;
      bool tall_word = word_box.height() * 1.125 > line_height;
      // We pad small and tall words differently because ascenders and
      // descenders affect the position and size of the upper/lower boxes.
      int padding;
      if (small_word) {
        padding = word_box.height();
      } else if (tall_word) {
        padding = word_box.height() / 3;
      } else {
        padding = word_box.height() / 2;
      }
      // Test that the word box gets a good score.
      EvaluateBox(word_box, true, kMinStrongTextValue, text, "Real Word");

      // Now test a displaced box, both above and below the word.
      TBOX upper_box(word_box);
      upper_box.set_bottom(word_box.top());
      upper_box.set_top(word_box.top() + padding);
      EvaluateBox(upper_box, false, kMinStrongTextValue, text, "Upper Word");
      EvaluateBox(upper_box, true, -1, text, "Upper Word not vertical");
      TBOX lower_box = word_box;
      lower_box.set_top(word_box.bottom());
      lower_box.set_bottom(word_box.bottom() - padding);
      if (tall_word) {
        lower_box.move(ICOORD(0, padding / 2));
      }
      EvaluateBox(lower_box, false, kMinStrongTextValue, text, "Lower Word");
      EvaluateBox(lower_box, true, -1, text, "Lower Word not vertical");

      // Since some words have no text below and some words have no text above
      // check that at least one of the boxes satisfies BoxOutOfTextline.
      bool upper_or_lower_out_of_textline =
          projection_->BoxOutOfHTextline(upper_box, denorm_, false) ||
          projection_->BoxOutOfHTextline(lower_box, denorm_, false);
      if (!upper_or_lower_out_of_textline) {
        projection_->BoxOutOfHTextline(upper_box, denorm_, true);
        projection_->BoxOutOfHTextline(lower_box, denorm_, true);
      }
      EXPECT_TRUE(upper_or_lower_out_of_textline);

      // Now test DistanceOfBoxFromBox by faking a challenger word, and asking
      // that each pad box be nearer to its true textline than the
      // challenger. Due to the tight spacing of latin text, getting
      // the right position and size of these test boxes is quite fiddly.
      padding = line_height / 4;
      upper_box.set_top(upper_box.bottom() + padding);
      TBOX target_box(word_box);
      if (!small_word) {
        upper_box.move(ICOORD(0, -padding * 3 / 2));
      }
      target_box.set_top(upper_box.bottom());
      TBOX upper_challenger(upper_box);
      upper_challenger.set_bottom(upper_box.top());
      upper_challenger.set_top(upper_box.top() + word_box.height());
      EvaluateDistance(upper_box, target_box, upper_challenger, text, "Upper Word");
      if (tall_word) {
        lower_box.move(ICOORD(0, padding / 2));
      }
      lower_box.set_bottom(lower_box.top() - padding);
      target_box = word_box;
      target_box.set_bottom(lower_box.top());
      TBOX lower_challenger(lower_box);
      lower_challenger.set_top(lower_box.bottom());
      lower_challenger.set_bottom(lower_box.bottom() - word_box.height());
      EvaluateDistance(lower_box, target_box, lower_challenger, text, "Lower Word");

      delete[] text;
    } while (it->Next(tesseract::RIL_WORD));
    delete it;
  }

  Image src_pix_;
  Image bin_pix_;
  BLOCK_LIST blocks_;
  std::string ocr_text_;
  tesseract::TessBaseAPI api_;
  std::unique_ptr<Tesseract> tesseract_;
  ColumnFinder *finder_;
  const DENORM *denorm_;
  const TextlineProjection *projection_;
};

// Tests all word boxes on an unrotated image.
TEST_F(TextlineProjectionTest, Unrotated) {
  VerifyBoxes("phototest.tif", 31);
}

// Tests character-level applyboxes on italic Times New Roman.
TEST_F(TextlineProjectionTest, Rotated) {
  VerifyBoxes("phototestrot.tif", 31);
}

} // namespace tesseract