File: osra_ocr_tesseract.cpp

package info (click to toggle)
osra 2.1.3-3
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, trixie
  • size: 3,468 kB
  • sloc: cpp: 52,893; sh: 3,085; perl: 390; makefile: 328; python: 279; xml: 169; java: 160
file content (60 lines) | stat: -rw-r--r-- 1,918 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
/******************************************************************************
 OSRA: Optical Structure Recognition Application
 
 Created by Igor Filippov, 2007-2013 (igor.v.filippov@gmail.com)

 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
 Foundation; either version 2 of the License, or (at your option) any later
 version.

 This program is distributed in the hope that it will be useful, but WITHOUT ANY
 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
 PARTICULAR PURPOSE.  See the GNU General Public License for more details.

 You should have received a copy of the GNU General Public License along with
 this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
 St, Fifth Floor, Boston, MA 02110-1301, USA
 *****************************************************************************/
#include "config.h"
#ifdef HAVE_TESSERACT_LIB
#include <stddef.h> // NULL
#include <stdlib.h> // free()
#include <ctype.h> // isalnum()
#include <string.h> // strlen()

#include <string> // std::string

#include <tesseract/baseapi.h>

const char UNKNOWN_CHAR = '_';

// Global variable:
tesseract::TessBaseAPI tess;

void osra_tesseract_init()
{
  tess.Init(NULL, "eng", tesseract::OEM_DEFAULT, NULL, 0, NULL, NULL, false);
}

void osra_tesseract_destroy()
{
  tess.End();
}

char osra_tesseract_ocr(unsigned char *pixmap, int width, int height, const std::string &char_filter)
{
  char result = UNKNOWN_CHAR;

  char *text = tess.TesseractRect(pixmap, 1, width, 0, 0, width, height);

  // TODO: Why text length should be exactly 3? Give examples...
  if (text != NULL && strlen(text) == 3 && isalnum(text[0]) && (char_filter.empty() || char_filter.find(text[0], 0) != std::string::npos))
    result = text[0];

  free(text);

  return result;
}

#endif