File: mfx.cpp

package info (click to toggle)
tesseract 2.04-2%2Bsqueeze1
  • links: PTS
  • area: main
  • in suites: squeeze
  • size: 7,336 kB
  • ctags: 6,860
  • sloc: cpp: 81,388; sh: 3,446; java: 1,220; makefile: 376
file content (436 lines) | stat: -rw-r--r-- 17,159 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
/******************************************************************************
 **      Filename:       mfx.c
 **      Purpose:        Micro feature extraction routines
 **      Author:         Dan Johnson
 **      History:        7/21/89, DSJ, Created.
 **
 **      (c) Copyright Hewlett-Packard Company, 1988.
 ** Licensed under the Apache License, Version 2.0 (the "License");
 ** you may not use this file except in compliance with the License.
 ** You may obtain a copy of the License at
 ** http://www.apache.org/licenses/LICENSE-2.0
 ** Unless required by applicable law or agreed to in writing, software
 ** distributed under the License is distributed on an "AS IS" BASIS,
 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 ** See the License for the specific language governing permissions and
 ** limitations under the License.
 ******************************************************************************/
/**----------------------------------------------------------------------------
          Include Files and Type Defines
----------------------------------------------------------------------------**/
#include "mfdefs.h"
#include "variables.h"
#include "sigmenu.h"
#include "mfoutline.h"
#include "clusttool.h"           //NEEDED
#include "const.h"
#include "intfx.h"
#include <math.h>

/* default values for tunable knobs */
/* old numbers corresponded to 10.0 degrees and 80.0 degrees */
                                 /* PREV DEFAULT 0.176326981 approx. 10.0 degrees */
#define MIN_SLOPE               0.414213562
                                 /* PREV DEFAULT 5.671281820 approx. 80.0 degrees */
#define MAX_SLOPE               2.414213562
                                 /* no noise filtering */
#define NOISE_SEGMENT_LENGTH    (0.00)
                                 /* no feature splitting */
#define MAX_FEATURE_LENGTH      (MAXFLOAT)

/**----------------------------------------------------------------------------
          Macros
----------------------------------------------------------------------------**/
/* miscellaneous macros */
#define NormalizeAngle(A)       ( (((A)<0)?((A)+2*PI):(A)) / (2*PI) )

/*----------------------------------------------------------------------------
          Private Function Prototypes
-----------------------------------------------------------------------------*/
void ComputeBulges(MFOUTLINE Start, MFOUTLINE End, MICROFEATURE MicroFeature);

FLOAT32 ComputeOrientation(MFEDGEPT *Start, MFEDGEPT *End);

MICROFEATURES ConvertToMicroFeatures(MFOUTLINE Outline,
                                     MICROFEATURES MicroFeatures);

MICROFEATURE ExtractMicroFeature(MFOUTLINE Start, MFOUTLINE End);

void SmearBulges(MICROFEATURES MicroFeatures, FLOAT32 XScale, FLOAT32 YScale);

/*
#if defined(__STDC__) || defined(__cplusplus)
# define _ARGS(s) s
#else
# define _ARGS(s) ()
#endif*/

/* /users/danj/wiseowl/src/danj/microfeatures/mfx.c
void ComputeBulges
  _ARGS((MFOUTLINE Start,
  MFOUTLINE End,
  MICROFEATURE MicroFeature));

FLOAT32 ComputeOrientation
  _ARGS((MFEDGEPT *Start,
  MFEDGEPT *End));

MICROFEATURES ConvertToMicroFeatures
  _ARGS((MFOUTLINE Outline,
  MICROFEATURES MicroFeatures));

MICROFEATURE ExtractMicroFeature
  _ARGS((MFOUTLINE Start,
  MFOUTLINE End));

void SmearBulges
  _ARGS((MICROFEATURES MicroFeatures,
  FLOAT32 XScale,
  FLOAT32 YScale));

#undef _ARGS
*/

/**----------------------------------------------------------------------------
        Global Data Definitions and Declarations
----------------------------------------------------------------------------**/
/* tuning knobs that can be adjusted without recompilation */
static FLOAT32 MinSlope;
static FLOAT32 MaxSlope;
static FLOAT32 NoiseSegmentLength;

/**----------------------------------------------------------------------------
            Public Code
----------------------------------------------------------------------------**/
/*---------------------------------------------------------------------------*/
void InitMicroFxVars() {
/*
 **      Parameters: none
 **      Globals:
 **              MinSlope        slope below which lines are called horizontal
 **              MaxSlope        slope above which lines are called vertical
 **              NoiseSegmentLength      length below which outline segments
 **                              are treated as noise
 **              MaxFeatureLength        length above which a feature will
 **                              be split into 2 equal pieces
 **              ExtremityMode   controls how extremities are defined
 **              XHeightAdjust   allows xheight of line to be adjusted
 **      Operation: Initialize the micro-feature extractor variables (knobs)
 **              that can be tuned without recompiling.
 **      Return: none
 **      Exceptions: none
 **      History: Mon May 14 11:24:40 1990, DSJ, Created.
 */
  VALUE dummy;

  float_variable (MinSlope, "MinSlope", MIN_SLOPE);
  float_variable (MaxSlope, "MaxSlope", MAX_SLOPE);
  float_variable (NoiseSegmentLength, "NoiseSegmentLength",
    NOISE_SEGMENT_LENGTH);
}                                /* InitMicroFxVars */


/*---------------------------------------------------------------------------*/
CHAR_FEATURES BlobMicroFeatures(TBLOB *Blob, LINE_STATS *LineStats) {
/*
 **      Parameters:
 **              Blob            blob to extract micro-features from
 **              LineStats       statistics for text line normalization
 **      Globals:
 **              XHeightAdjust   used for manually adjusting xheight
 **      Operation:
 **              This routine extracts micro-features from the specified
 **              blob and returns a list of the micro-features.  All
 **              micro-features are normalized according to the specified
 **              line statistics.
 **      Return: List of micro-features extracted from the blob.
 **      Exceptions: none
 **      History: 7/21/89, DSJ, Created.
 */
  MICROFEATURES MicroFeatures = NIL;
  FLOAT32 XScale, YScale;
  LIST Outlines;
  LIST RemainingOutlines;
  MFOUTLINE Outline;
  INT_FEATURE_ARRAY blfeatures;
  INT_FEATURE_ARRAY cnfeatures;
  INT_FX_RESULT_STRUCT results;

  if (Blob != NULL) {
    Outlines = ConvertBlob (Blob);
//    NormalizeOutlines(Outlines, LineStats, &XScale, &YScale);
    ExtractIntFeat(Blob, blfeatures, cnfeatures, &results);
    XScale = 0.2f / results.Ry;
    YScale = 0.2f / results.Rx;

    RemainingOutlines = Outlines;
    iterate(RemainingOutlines) {
      Outline = (MFOUTLINE) first_node (RemainingOutlines);
      CharNormalizeOutline (Outline,
        results.Xmean, results.Ymean,
        XScale, YScale);
    }

    RemainingOutlines = Outlines;
    iterate(RemainingOutlines) {
      Outline = (MFOUTLINE) first_node (RemainingOutlines);
      FindDirectionChanges(Outline, MinSlope, MaxSlope);
      FilterEdgeNoise(Outline, NoiseSegmentLength);
      MarkDirectionChanges(Outline);
      SmearExtremities(Outline, XScale, YScale);
      MicroFeatures = ConvertToMicroFeatures (Outline, MicroFeatures);
    }
    SmearBulges(MicroFeatures, XScale, YScale);
    FreeOutlines(Outlines);
  }
  return ((CHAR_FEATURES) MicroFeatures);
}                                /* BlobMicroFeatures */


/**----------------------------------------------------------------------------
              Private Macros
----------------------------------------------------------------------------**/
/**********************************************************************
 * angle_of
 *
 * Return the angle of the line between two points.
 **********************************************************************/
#define angle_of(x1,y1,x2,y2)                   \
((x2-x1) ?                                    \
	(atan2 (y2-y1, x2-x1)) :                     \
	((y2<y1) ? (- PI / 2.0) : (PI / 2.0)))   \


/**********************************************************************
 * scale_angle
 *
 * Make sure that the angle is non-negative.  Scale it to the right
 * amount.
 **********************************************************************/

#define scale_angle(x)                             \
(((x<0) ? (2.0 * PI + x) : (x)) * 0.5 / PI)  \

/*---------------------------------------------------------------------------
            Private Code
---------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------*/
void ComputeBulges(MFOUTLINE Start, MFOUTLINE End, MICROFEATURE MicroFeature) {
/*
 **      Parameters:
 **              Start           starting point of micro-feature
 **              End             ending point of micro-feature
 **              MicroFeature    micro-feature whose bulges are to be computed
 **      Globals: none
 **      Operation:
 **              This routine computes the size of the "bulges" of the
 **              specified micro-feature.  The bulges are the deviations
 **              of the micro-features from a straight line at the 1/3
 **              and 2/3 points along the straight line approximation of
 **              the micro-feature.  The size of each bulge is normalized
 **              to the range -0.5 to 0.5.  A positive bulge indicates a
 **              deviation in the counterclockwise direction and vice versa.
 **              A size of 0.5 (+ or -) corresponds to the largest bulge that
 **              could ever occur for the given feature independent of
 **              orientation.  This routine assumes that Start
 **              and End are not the same point.  It also assumes that the
 **              orientation and length parameters of the micro-feature
 **              have already been computed.
 **      Return: none
 **      Exceptions: none
 **      History: 7/27/89, DSJ, Created.
 */
  MATRIX_2D Matrix;
  MFEDGEPT *Origin;
  MFOUTLINE SegmentStart, SegmentEnd;
  FPOINT CurrentPoint, LastPoint;
  FLOAT32 BulgePosition;

  /* check for simple case */
  if (End == NextPointAfter (Start))
    MicroFeature[FIRSTBULGE] = MicroFeature[SECONDBULGE] = 0;
  else {
    Origin = PointAt (Start);

    InitMatrix(&Matrix);
    RotateMatrix (&Matrix, MicroFeature[ORIENTATION] * -2.0 * PI);
    TranslateMatrix (&Matrix, -Origin->Point.x, -Origin->Point.y);

    SegmentEnd = Start;
    FillPoint (CurrentPoint, 0, 0);
    BulgePosition = MicroFeature[MFLENGTH] / 3;
    CopyPoint(CurrentPoint, LastPoint);
    while (CurrentPoint.x < BulgePosition) {
      SegmentStart = SegmentEnd;
      SegmentEnd = NextPointAfter (SegmentStart);
      CopyPoint(CurrentPoint, LastPoint);

      MapPoint (&Matrix, PointAt (SegmentEnd)->Point, CurrentPoint);
    }
    MicroFeature[FIRSTBULGE] =
      XIntersectionOf(LastPoint, CurrentPoint, BulgePosition);

    BulgePosition *= 2;

    // Prevents from copying the points before computing the bulge if
    // CurrentPoint will not change. (Which would cause to output nan
    // for the SecondBulge.)
    if (CurrentPoint.x < BulgePosition)
      CopyPoint(CurrentPoint, LastPoint);
    while (CurrentPoint.x < BulgePosition) {
      SegmentStart = SegmentEnd;
      SegmentEnd = NextPointAfter (SegmentStart);
      CopyPoint(CurrentPoint, LastPoint);
      MapPoint (&Matrix, PointAt (SegmentEnd)->Point, CurrentPoint);
    }
    MicroFeature[SECONDBULGE] =
      XIntersectionOf(LastPoint, CurrentPoint, BulgePosition);

    MicroFeature[FIRSTBULGE] /= BULGENORMALIZER * MicroFeature[MFLENGTH];
    MicroFeature[SECONDBULGE] /= BULGENORMALIZER * MicroFeature[MFLENGTH];
  }
}                                /* ComputeBulges */


/*---------------------------------------------------------------------------*/
FLOAT32 ComputeOrientation(MFEDGEPT *Start, MFEDGEPT *End) {
/*
 **      Parameters:
 **              Start           starting edge point of micro-feature
 **              End             ending edge point of micro-feature
 **      Globals: none
 **      Operation:
 **              This routine computes the orientation parameter of the
 **              specified micro-feature.  The orientation is the angle of
 **              the vector from Start to End.  It is normalized to a number
 **              between 0 and 1 where 0 corresponds to 0 degrees and 1
 **              corresponds to 360 degrees.  The actual range is [0,1), i.e.
 **              1 is excluded from the range (since it is actual the
 **              same orientation as 0).  This routine assumes that Start
 **              and End are not the same point.
 **      Return: Orientation parameter for the specified micro-feature.
 **      Exceptions: none
 **      History: 7/27/89, DSJ, Created.
 */
  FLOAT32 Orientation;

  Orientation = NormalizeAngle (AngleFrom (Start->Point,
    End->Point));

  /* ensure that round-off errors do not put circular param out of range */
  if ((Orientation < 0) || (Orientation >= 1))
    Orientation = 0;
  return (Orientation);
}                                /* ComputeOrientation */


/*---------------------------------------------------------------------------*/
MICROFEATURES ConvertToMicroFeatures(MFOUTLINE Outline,
                                     MICROFEATURES MicroFeatures) {
/*
 **      Parameters:
 **              Outline         outline to extract micro-features from
 **              MicroFeatures   list of micro-features to add to
 **      Globals: none
 **      Operation:
 **              This routine
 **      Return: List of micro-features with new features added to front.
 **      Exceptions: none
 **      History: 7/26/89, DSJ, Created.
 */
  MFOUTLINE Current;
  MFOUTLINE Last;
  MFOUTLINE First;
  MICROFEATURE NewFeature;

  if (DegenerateOutline (Outline))
    return (MicroFeatures);

  First = NextExtremity (Outline);
  Last = First;
  do {
    Current = NextExtremity (Last);
    NewFeature = ExtractMicroFeature (Last, Current);
    if (NewFeature != NULL)
      MicroFeatures = push (MicroFeatures, NewFeature);
    Last = Current;
  }
  while (Last != First);

  return (MicroFeatures);
}                                /* ConvertToMicroFeatures */


/*---------------------------------------------------------------------------*/
MICROFEATURE ExtractMicroFeature(MFOUTLINE Start, MFOUTLINE End) {
/*
 **      Parameters:
 **              Start           starting point of micro-feature
 **              End             ending point of micro-feature
 **      Globals: none
 **      Operation:
 **              This routine computes the feature parameters which describe
 **              the micro-feature that starts and Start and ends at End.
 **              A new micro-feature is allocated, filled with the feature
 **              parameters, and returned.  The routine assumes that
 **              Start and End are not the same point.  If they are the
 **              same point, NULL is returned, a warning message is
 **              printed, and the current outline is dumped to stdout.
 **      Return: New micro-feature or NULL if the feature was rejected.
 **      Exceptions: none
 **      History: 7/26/89, DSJ, Created.
 **              11/17/89, DSJ, Added handling for Start and End same point.
 */
  MICROFEATURE NewFeature;
  MFEDGEPT *P1, *P2;

  P1 = PointAt (Start);
  P2 = PointAt (End);

  NewFeature = NewMicroFeature ();
  NewFeature[XPOSITION] = AverageOf (P1->Point.x, P2->Point.x);
  NewFeature[YPOSITION] = AverageOf (P1->Point.y, P2->Point.y);
  NewFeature[MFLENGTH] = DistanceBetween (P1->Point, P2->Point);
  NewFeature[ORIENTATION] =
    NormalizedAngleFrom (&((P1)->Point), &((P2)->Point), 1.0);
  ComputeBulges(Start, End, NewFeature);
  return (NewFeature);
}                                /* ExtractMicroFeature */


/*---------------------------------------------------------------------------*/
void SmearBulges(MICROFEATURES MicroFeatures, FLOAT32 XScale, FLOAT32 YScale) {
/*
 **      Parameters:
 **              MicroFeatures   features to be smeared
 **		XScale		# of normalized units per pixel in x dir
 **		YScale		# of normalized units per pixel in y dir
 **      Globals: none
 **      Operation: Add a random amount to each bulge parameter of each
 **              feature.  The amount added is between -0.5 pixels and
 **              0.5 pixels.  This is done to prevent the prototypes
 **              generated in training from being unrealistically tight.
 **      Return: none
 **      Exceptions: none
 **      History: Thu Jun 28 18:03:38 1990, DSJ, Created.
 */
  MICROFEATURE MicroFeature;
  FLOAT32 MinSmear;
  FLOAT32 MaxSmear;
  FLOAT32 Cos, Sin;
  FLOAT32 Scale;

  iterate(MicroFeatures) {
    MicroFeature = NextFeatureOf (MicroFeatures);

    Cos = fabs (cos (2.0 * PI * MicroFeature[ORIENTATION]));
    Sin = fabs (sin (2.0 * PI * MicroFeature[ORIENTATION]));
    Scale = YScale * Cos + XScale * Sin;

    MinSmear = -0.5 * Scale / (BULGENORMALIZER * MicroFeature[MFLENGTH]);
    MaxSmear = 0.5 * Scale / (BULGENORMALIZER * MicroFeature[MFLENGTH]);

    MicroFeature[FIRSTBULGE] += UniformRandomNumber (MinSmear, MaxSmear);
    MicroFeature[SECONDBULGE] += UniformRandomNumber (MinSmear, MaxSmear);
  }
}                                /* SmearBulges */