File: group.py

package info (click to toggle)
gamera 1:3.4.3-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye, buster, sid
  • size: 15,912 kB
  • sloc: xml: 122,324; cpp: 50,730; python: 35,044; ansic: 258; makefile: 114; sh: 101
file content (173 lines) | stat: -rw-r--r-- 7,408 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
# -*- mode: python; indent-tabs-mode: nil; tab-width: 3 -*-
# vim: set tabstop=3 shiftwidth=3 expandtab:
#
# Copyright (C) 2001-2005 Ichiro Fujinaga, Michael Droettboom,
#                          and Karl MacMillan
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
#

from __future__ import generators
from gamera import util
import sys, re

# GROUPING IN GENERAL:
#
# The grouping classifier interface allows glyphs to be grouped according to
# their physical relationships to each other.  The user classifies a set of
# glyphs as a "group", and other glyphs matching this general structure can
# later be located on the page.
#
# For documentation, there are two kinds of glyphs involved in this process:
#   part glyphs -- glyphs that should be grouped to form larger glyphs
#   union glyphs -- glyphs made up of part glyphs

class GridIndex:
   """Indexes glyphs using a grid, so glyphs near a given glyph are easier
   to find."""
   def __init__(self, glyphs, max_width=100, max_height=100):
      """Creates a grid index to store the given set of glyphs.  Note that
      the init function only creates a grid big enough to hold the glyphs,
      it does not actually store them...  That must be done by calling
      GridIndex.add_glyph.  max_width and max_height are the maximum size
      (in pixels) of each cell."""
      glyphs = util.make_sequence(glyphs)
      if len(glyphs) == 0:
         raise ValueError(
             "GridIndex must be initialised with at least one glyph")
      self.grid_rect = glyphs[0].union_rects(glyphs)
      self.cell_width = int(max_width)
      self.cell_height = int(max_height)
      self.cell_ncols = int(self.grid_rect.width / self.cell_width) + 1
      self.cell_nrows = int(self.grid_rect.height / self.cell_height) + 1
      self._create_cells()

   def _create_cells(self):
      self.grid = []
      for i in range(self.cell_ncols * self.cell_nrows):
         self.grid.append([])

   def add_glyph(self, glyph):
      if not self.grid_rect.contains_point(glyph.center):
         raise ValueError(
             "glyph is not within the bounding box of the initial set of images")
      row = (glyph.center_y - self.grid_rect.ul_y) / self.cell_height
      col = (glyph.center_x - self.grid_rect.ul_x) / self.cell_width
      self.grid[row * self.cell_ncols + col].append(glyph)

   def get_cell(self, row, col):
      if row < 0 or row >= self.cell_nrows:
         return []
      if col < 0 or col >= self.cell_ncols:
         return []
      return self.grid[row * self.cell_ncols + col]

   def get_cell_at_glyph(self, glyph):
      if not self.grid_rect.contains_point(glyph.center):
         raise ValueError(
             "glyph is not within the bounding box of the initial set of images")
      row = (glyph.center_y - self.grid_rect.ul_y) / self.cell_height
      col = (glyph.center_x - self.grid_rect.ul_x) / self.cell_width
      return self.get_cell(row, col)

   def get_glyphs_around_glyph(self, glyph):
      if not self.grid_rect.contains_point(glyph.center):
         raise ValueError(
             "glyph is not within the bounding box of the initial set of images")
      row = (glyph.center_y - self.grid_rect.ul_y) / self.cell_height
      col = (glyph.center_x - self.grid_rect.ul_x) / self.cell_width
      for r, c in self.search_order:
         ri = r + row
         ci = c + col
         cell_nrows = self.cell_nrows
         cell_ncols = self.cell_ncols
         if ri < 0 or ri >= cell_nrows or ci < 0 or ci >= cell_ncols:
            continue
         for glyph in self.grid[ri * cell_ncols + ci]:
            yield glyph
   search_order = ((0,0),                          # center
                   (-1,0), (0,-1), (1,0), (0,1),   # +
                   (-1,-1), (-1,1), (1,-1), (1,1)) # x

class GridIndexWithKeys(GridIndex):
   """Extends the basic GridIndex class to allow glyphs to also be stored and
   retreived by an arbitrary key."""

   def _create_cells(self):
      self.flat = {}
      self.grid = []
      for i in range(self.cell_ncols * self.cell_nrows):
         self.grid.append({})

   def add_glyph_by_key(self, glyph, key):
      if not self.grid_rect.contains_point(glyph.center):
         raise ValueError(
             "glyph is not within the bounding box of the initial set of images")
      row = (glyph.center_y - self.grid_rect.ul_y) / self.cell_height
      col = (glyph.center_x - self.grid_rect.ul_x) / self.cell_width
      cell_index = row * self.cell_ncols + col
      self.grid[cell_index].setdefault(key, []).append(glyph)
      self.flat.setdefault(key, []).append(glyph)

   def get_cell_by_key(self, row, col, key):
      if row < 0 or row >= self.cell_nrows:
         return []
      if col < 0 or col >= self.cell_ncols:
         return []
      return self.grid[row * self.cell_ncols + col].get(key, [])

   def get_cell_at_glyph_by_key(self, glyph, key):
      if not self.grid_rect.contains_point(glyph.center):
         raise ValueError(
             "glyph is not within the bounding box of the initial set of images")
      row = (glyph.center_y - self.grid_rect.ul_y) / self.cell_height
      col = (glyph.center_x - self.grid_rect.ul_x) / self.cell_width
      return self.grid[row * self.cell_ncols + col].get(key, [])

   def get_glyphs_around_glyph(self, glyph):
      if not self.grid_rect.contains_point(glyph.center):
         raise ValueError(
             "glyph is not within the bounding box of the initial set of images")
      row = (glyph.center_y - self.grid_rect.ul_y) / self.cell_height
      col = (glyph.center_x - self.grid_rect.ul_x) / self.cell_width
      cell_nrows = self.cell_nrows
      cell_ncols = self.cell_ncols
      for r, c in self.search_order:
         ri = r + row
         ci = c + col
         if ri < 0 or ri >= cell_nrows or ci < 0 or ci >= cell_ncols:
            continue
         for mapping in self.grid[ri * cell_ncols + ci].values():
            for glyph in mapping:
               yield glyph

   def get_glyphs_around_glyph_by_key(self, glyph, key):
      if not self.grid_rect.contains_point(glyph.center):
         raise ValueError(
             "glyph is not within the bounding box of the initial set of images")
      row = (glyph.center_y - self.grid_rect.ul_y) / self.cell_height
      col = (glyph.center_x - self.grid_rect.ul_x) / self.cell_width
      cell_nrows = self.cell_nrows
      cell_ncols = self.cell_ncols
      for r, c in self.search_order:
         ri = r + row
         ci = c + col
         if ri < 0 or ri >= cell_nrows or ci < 0 or ci >= cell_ncols:
            continue
         for glyph in self.grid[ri * cell_ncols + ci].get(key, []):
            yield glyph

   def get_glyphs_by_key(self, key):
      return self.flat.get(key, [])