File: ocradcheck.cc

package info (click to toggle)
ocrad 0.29-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 812 kB
  • sloc: cpp: 9,710; sh: 388; makefile: 155
file content (193 lines) | stat: -rw-r--r-- 6,668 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
/* Ocrcheck - A test program for the library ocradlib
   Copyright (C) 2009-2024 Antonio Diaz Diaz.

   This program is free software: you have unlimited permission to
   copy, distribute, and modify it.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*/

#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <string>
#include <vector>
#include <stdint.h>
#include <unistd.h>

#include "arg_parser.h"
#include "ocradlib.h"


namespace {

const char * const program_name = "ocradcheck";
const char * invocation_name = program_name;		// default value


void show_help()
  {
  std::printf( "Ocradcheck is a test program for the library ocradlib. It reads the image\n"
               "files specified, feeds them to the OCR engine, and sends the resulting text\n"
               "to stdout.\n"
               "\nUsage: %s [options] [files]\n", invocation_name );
  std::printf( "\nOptions:\n"
               "  -h, --help                display this help and exit\n"
               "  -V, --version             output version information and exit\n"
               "  -i, --invert              invert image levels (white on black)\n"
               "  -l, --layout              perform layout analysis\n"
               "  -u, --utf8                output text in UTF-8 format [default byte]\n"
               "\nIf no files are specified, or if a file is '-', ocradcheck reads the image\n"
               "from standard input.\n" );
  }


void show_version()
  {
  std::printf( "%s %s\n", program_name, PROGVERSION );
  std::printf( "Using ocradlib %s\n", OCRAD_version() );
  }


void show_error( const char * const msg, const int errcode = 0,
                 const bool help = false )
  {
  if( msg && msg[0] )
    std::fprintf( stderr, "%s: %s%s%s\n", program_name, msg,
                  ( errcode > 0 ) ? ": " : "",
                  ( errcode > 0 ) ? std::strerror( errcode ) : "" );
  if( help )
    std::fprintf( stderr, "Try '%s --help' for more information.\n",
                  invocation_name );
  }

} // end namespace


int main( const int argc, const char * const argv[] )
  {
  bool invert = false;
  bool layout = false;
  bool utf8 = false;
  if( argc > 0 ) invocation_name = argv[0];

  const Arg_parser::Option options[] =
    {
    { 'h', "help",        Arg_parser::no  },
    { 'i', "invert",      Arg_parser::no  },
    { 'l', "layout",      Arg_parser::no  },
    { 'u', "utf8",        Arg_parser::no  },
    { 'V', "version",     Arg_parser::no  },
    {  0 , 0,             Arg_parser::no  } };

  const Arg_parser parser( argc, argv, options );
  if( parser.error().size() )				// bad option
    { show_error( parser.error().c_str(), 0, true ); return 1; }

  int argind = 0;
  for( ; argind < parser.arguments(); ++argind )
    {
    const int code = parser.code( argind );
    if( !code ) break;					// no more options
    switch( code )
      {
      case 'h': show_help(); return 0;
      case 'i': invert = true; break;
      case 'l': layout = true; break;
      case 'u': utf8 = true; break;
      case 'V': show_version(); return 0;
      default: std::fprintf( stderr, "%s: internal error: uncaught option.\n",
                             program_name ); return 3;
      }
    } // end process options

  if( OCRAD_version()[0] != OCRAD_version_string[0] )
    { std::fputs( "wrong library version.\n", stderr ); return 3; }

  if( std::strcmp( PROGVERSION, OCRAD_version_string ) != 0 )
    { std::fputs( "wrong library version_string.\n", stderr ); return 3; }

  // process any remaining command-line arguments (input files)
  bool stdin_used = false;
  for( bool first = true; first || argind < parser.arguments(); first = false )
    {
    const char * infile_name;
    if( argind < parser.arguments() )
      infile_name = parser.argument( argind++ ).c_str();
    else
      { infile_name = "-"; if( stdin_used ) continue; else stdin_used = true; }

    OCRAD_Descriptor * const ocrdes = OCRAD_open();
    if( !ocrdes || OCRAD_get_errno( ocrdes ) != OCRAD_ok )
      {
      OCRAD_close( ocrdes );
      std::fputs( "Not enough memory.\n", stderr );
      return 1;
      }

    if( OCRAD_set_image_from_file( ocrdes, infile_name, invert ) < 0 )
      {
      const OCRAD_Errno ocr_errno = OCRAD_get_errno( ocrdes );
      OCRAD_close( ocrdes );
      if( ocr_errno == OCRAD_mem_error )
        std::fputs( "Not enough memory.\n", stderr );
      else
        std::fprintf( stderr, "%s: Can't open file for reading.\n", infile_name );
      return 1;
      }
//    std::fprintf( stderr, "ocradcheck: testing file '%s'\n", infile_name );

    if( ( utf8 && OCRAD_set_utf8_format( ocrdes, true ) < 0 ) ||
        OCRAD_set_threshold( ocrdes, -1 ) < 0 ||	// auto threshold
        OCRAD_recognize( ocrdes, layout ) < 0 )
      {
      const OCRAD_Errno ocr_errno = OCRAD_get_errno( ocrdes );
      OCRAD_close( ocrdes );
      if( ocr_errno == OCRAD_mem_error )
        { std::fputs( "Not enough memory.\n", stderr ); return 1; }
      std::fprintf( stderr, "%s: internal error: invalid argument.\n",
                    program_name ); return 3;
      }

    const int blocks = OCRAD_result_blocks( ocrdes );
    int chars_total_by_block = 0;
    int chars_total_by_line = 0;
    int chars_total_by_count = 0;
    for( int b = 0; b < blocks; ++b )
      {
      const int lines = OCRAD_result_lines( ocrdes, b );
      chars_total_by_block += OCRAD_result_chars_block( ocrdes, b );
      for( int l = 0; l < lines; ++l )
        {
        const char * const s = OCRAD_result_line( ocrdes, b, l );
        chars_total_by_line += OCRAD_result_chars_line( ocrdes, b, l );
        if( s && s[0] )
          {
          std::fputs( s, stdout );
          const int len = std::strlen( s ) - 1;
          if( !utf8 )
            chars_total_by_count += len;
          else
            for( int i = 0; i < len; ++i )
              if( (uint8_t)s[i] < 128 || (uint8_t)s[i] >= 0xC0 )
                ++chars_total_by_count;
          }
        }
      std::fputc( '\n', stdout );
      }
    const int chars_total = OCRAD_result_chars_total( ocrdes );
    if( chars_total_by_block != chars_total ||
        chars_total_by_line != chars_total ||
        chars_total_by_count != chars_total )
      {
      std::fprintf( stderr, "library_error: character counts differ.\n"
                    "%d  %d  %d  %d\n", chars_total, chars_total_by_block,
                    chars_total_by_line, chars_total_by_count );
      return 1;
      }
    OCRAD_close( ocrdes );
    }
  return 0;
  }