File: cluster.py

package info (click to toggle)
gamera 1:3.4.3-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye, buster, sid
  • size: 15,912 kB
  • sloc: xml: 122,324; cpp: 50,730; python: 35,044; ansic: 258; makefile: 114; sh: 101
file content (168 lines) | stat: -rw-r--r-- 5,490 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
# -*- mode: python; indent-tabs-mode: nil; tab-width: 3 -*-
# vim: set tabstop=3 shiftwidth=3 expandtab:
#
# Copyright (C) 2002-2003 Karl MacMillan and Michael Droettboom
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
#

from gamera import core, knn, graph, gamera_xml
core.init_gamera()

def get_lengths(node, depth, lengths, cur_depth=0, path = {}):
   if cur_depth >= depth:
      return
   for edge in node.edges:
      if path.has_key(edge):
         continue
      path[edge] = None
      lengths.append(edge.cost)
      get_lengths(edge.traverse(node), depth, lengths, cur_depth + 1, path)

def label(graph, node, label_start, label):
   for node in graph.DFS(node):
      node().classify_automatic(label_start + str(label))
                        
def make_subtrees_stddev(graph, ratio, distance, relabel=1, lab="cluster."):
   import stats
   cur_label = 0
   remove = []
   i = 0
   for edge in graph.get_edges():
      lengths = []
      path = {}
      #print node().get_main_id(), edge.cost
      get_lengths(edge.from_node, distance, lengths, 0, path)
      lengths.remove(edge.cost)
      #print lengths
      if not (len(lengths) > 1):
         continue
      mean = stats.mean(lengths)
      stdev2 = stats.samplestdev([mean, edge.cost])
      #print mean, stdev2, edge.cost, len(lengths)
      if stdev2 > ratio:
         #graph.remove_edge(edge)
         remove.append(edge)

   for edge in remove:
      graph.remove_edge(edge)

   if relabel:
      cur_label = 0
      for node in graph.get_nodes():
         node().classify_manual("")
      for node in graph.get_nodes():
         if node().get_main_id() == "":
            label(graph, node, lab, cur_label)
            cur_label += 1
   nodes = []

   for node in graph.get_nodes():
      nodes.append(node())

   return nodes

def make_spanning_tree(glyphs, k=None):
   if k is None:
      k = knn.kNNInteractive()
   uniq_dists = k.distance_matrix(glyphs, 0)
   g = graph.Undirected()
   g.create_minimum_spanning_tree(glyphs, uniq_dists)
   return g

def cluster(glyphs, ratio=1.0, distance=2, label="cluster.", k=None, relabel=1):
   g = make_spanning_tree(glyphs, k)

   return make_subtrees_stddev(g, ratio, distance, lab=label)


def cluster2(glyphs):
   ko = knn.kNNInterative()
   gc = knn.glyphs_by_category(cluster(glyphs, k=ko))
   small = []
   large = []
   for x in gc.itervalues():
      if len(x) < 10:
         small.extend(x)
      else:
         large.append(x)
   print len(small)
   output = cluster(small, 1, 1, label="cluster_small.", k=ko)
   cur_label = 0
   for x in large:
      l = x[0].get_main_id() + ".cluster_large" + str(cur_label) + "."
      cur_label += 1
      c = cluster(x, .6, 4, label=l, k=ko)
      output.extend(c)
   return output

def do_tests(filename):
   from gamera import gamera_xml
   from gamera import core
   import os.path

   image = core.load_image(filename)
   glyphs = image.cc_analysis()
   g = make_spanning_tree(glyphs)
   g_orig = g.copy()
   c = make_subtrees_stddev(g, 1.5)
   gamera_xml.glyphs_to_xml(c, os.path.abspath(os.path.dirname(filename) + "cluster_1_5_" + os.path.basename(filename)))
   g = g_orig.copy()
   c = make_subtrees_stddev(g, 2.0)
   gamera_xml.glyphs_to_xml(c, os.path.abspath(os.path.dirname(filename) + "cluster_2_0_" + os.path.basename(filename)))
   g = g_orig.copy()
   c = make_subtrees_stddev(g, 2.5)
   gamera_xml.glyphs_to_xml(c, os.path.abspath(os.path.dirname(filename) + "cluster_2_5_" + os.path.basename(filename)))
   g = g_orig.copy()
   c = make_subtrees_stddev(g, 3.0)
   gamera_xml.glyphs_to_xml(c, os.path.abspath(os.path.dirname(filename) + "cluster_3_0_" + os.path.basename(filename)))

def make_unique_names(glyphs):
   for i in range(len(glyphs)):
      glyphs[i].classify_automatic(glyphs[i].get_main_id() + str(i))

def graphvis_output(G, filename):
   fd = open(filename, 'w')
   if G.is_directed():
      fd.write("digraph G {\n")
      for node in G.get_nodes():
         for edge in node.out_edges:
            fd.write('   %s -> %s [ label = %f ];\n' % (node(), edge.to_node(), edge()))
      fd.write("}\n")
   else:
      fd.write("graph G {\n")
      for edge in G.get_edges():
         fd.write('   "%s" -- "%s" [label="%.2f",len="%f"];\n'
                  % (edge.from_node().get_main_id(), edge.to_node().get_main_id(),
                     edge.cost, edge.cost / 4))
      fd.write("}\n")
   fd.close()

# this function is a stub and does not do anything
# unless we figure out what it was supposed to do,
# I have commented it out (CD)
# def analysis(glyphs):
#    by_id = {}
#    for x in glyphs:
#       id = x.get_main_id()
#       if not by_id.has_key(id):
#          by_id[id] = []
#       by_id[id].append(x)
#    num_features = len(x.features)

#    for x in by_id:
#       for i in range(len(x)):
#          sum_vec