Package: wordnet / 1:3.0-35

10_wordnet_structures.patch Patch series | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
Adapt wordnet_structures which is used to convert WordNet to dict format
from WordNet 2.1 to WordNet 3.0 format
Author: Sebastian Hagen <sebastian_hagen@memespace.net>

--- a/contrib/wordnet_structures/wordnet_structures.py
+++ b/contrib/wordnet_structures/wordnet_structures.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python
-#Copyright 2007 Sebastian Hagen
+#Copyright 2007,2008 Sebastian Hagen
 # This file is part of wordnet_tools.
 
 # wordnet_tools is free software; you can redistribute it and/or modify
@@ -21,7 +21,7 @@
 # files usable by dictd.
 # This is basically a reimplementation of the wnfilter program by Rik Faith,
 # which unfortunately doesn't work correctly for wordnet files in the newer
-# formats. This version of wordnet_structures whould parse wordnet 2.1 files
+# formats. This version of wordnet_structures should parse wordnet 3.0 files
 # correctly, and create output very similar to what wnfilter would have 
 # written.
 
@@ -117,27 +117,37 @@ class WordIndexDictFormatter(WordIndex):
    linesep = '\n'
    LINE_WIDTH_MAX = 68
    prefix_fmtf_line_first = '    %s 1: '
-   prefix_fmtn_line_first = '        %s'
+   prefix_fmtn_line_first = '        %*s'
    prefix_fmtf_line_nonfirst = '    %d: '
-   prefix_fmtn_line_nonfirst = '       '
+   prefix_fmtn_line_nonfirst = '    %*s  '
    
    def dict_str(self):
-      """Build a human-readable definition for this word, including data for each subset
-      
-      Optional synset_map argument is used to look up data for antonyms."""
+      """Build a human-readable definition for this word, including data for each synset"""
       tw = TextWrapper(width=self.LINE_WIDTH_MAX,
          initial_indent=(self.prefix_fmtf_line_first % self.category_map_rev[self.category]),
-         subsequent_indent=(self.prefix_fmtn_line_first % (' '*len(self.category_map_rev[self.category]))))
+         subsequent_indent=(self.prefix_fmtn_line_first % (len(self.category_map_rev[self.category]), '')))
          
       lines = (tw.wrap(self.synsets[0].synset_get().dict_str()))
       i = 2
+      
+      prefix_fmtn_line_nonfirst = self.prefix_fmtn_line_nonfirst
+      pfln_len = 0
       for ss_wrap in self.synsets[1:]:
+         # adjust indenting based on index-number with
+         pfln_len_new = len('%d' % (i,))
+         if (pfln_len_new > pfln_len):
+            pfln_len = pfln_len_new
+            pfln_str = (self.prefix_fmtn_line_nonfirst % (pfln_len, ''))
+         
+         # format data for this synset
          synset = ss_wrap.synset_get()
          tw = TextWrapper(width=self.LINE_WIDTH_MAX,
             initial_indent=(self.prefix_fmtf_line_nonfirst % i),
-            subsequent_indent=self.prefix_fmtn_line_nonfirst)
+            subsequent_indent=pfln_str)
          lines.extend(tw.wrap(synset.dict_str()))
+         
          i += 1
+         
       return self.linesep.join(lines)
 
 
@@ -209,9 +219,7 @@ class Synset:
       return (rv, comments)
 
    def dict_str(self):
-      """Format this synset into a human-readable line-wrapped dict block.
-      
-      Takes an optional synset_map argument, to look up antonyms."""
+      """Format this synset into a human-readable line-wrapped dict block."""
       rv = self.gloss.rstrip()
       if (len(self.words) > 1):
          rv += ' [syn: %s]' % (', '.join([('{%s}' % word) for word in self.words]))
@@ -399,7 +407,7 @@ if (__name__ == '__main__'):
    op.add_option('-i', '--outindex', dest='oi', default='wn.index', help='filename of index file to write to')
    op.add_option('-d', '--outdata', dest='od', default='wn.dict', help='filename of data file to write to')
    op.add_option('--wn_url', dest='wn_url', default='ftp://ftp.cogsci.princeton.edu/pub/wordnet/2.0', help='URL for wordnet sources')
-   op.add_option('--db_desc_short', dest='desc_short', default='     WordNet (r) 2.1 (2005)', help='short dict DB description')
+   op.add_option('--db_desc_short', dest='desc_short', default='     WordNet (r) 3.0 (2006)', help='short dict DB description')
    op.add_option('--db_desc_long', dest='desc_long', default='    WordNet (r): A Lexical Database for English from the\n     Cognitive Science Laboratory at Princeton University', help='long dict DB description')
    
    (options, args) = op.parse_args()