1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87
|
Adapt wordnet_structures which is used to convert WordNet to dict format
from WordNet 2.1 to WordNet 3.0 format
Author: Sebastian Hagen <sebastian_hagen@memespace.net>
--- a/contrib/wordnet_structures/wordnet_structures.py
+++ b/contrib/wordnet_structures/wordnet_structures.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python
-#Copyright 2007 Sebastian Hagen
+#Copyright 2007,2008 Sebastian Hagen
# This file is part of wordnet_tools.
# wordnet_tools is free software; you can redistribute it and/or modify
@@ -21,7 +21,7 @@
# files usable by dictd.
# This is basically a reimplementation of the wnfilter program by Rik Faith,
# which unfortunately doesn't work correctly for wordnet files in the newer
-# formats. This version of wordnet_structures whould parse wordnet 2.1 files
+# formats. This version of wordnet_structures should parse wordnet 3.0 files
# correctly, and create output very similar to what wnfilter would have
# written.
@@ -117,27 +117,37 @@ class WordIndexDictFormatter(WordIndex):
linesep = '\n'
LINE_WIDTH_MAX = 68
prefix_fmtf_line_first = ' %s 1: '
- prefix_fmtn_line_first = ' %s'
+ prefix_fmtn_line_first = ' %*s'
prefix_fmtf_line_nonfirst = ' %d: '
- prefix_fmtn_line_nonfirst = ' '
+ prefix_fmtn_line_nonfirst = ' %*s '
def dict_str(self):
- """Build a human-readable definition for this word, including data for each subset
-
- Optional synset_map argument is used to look up data for antonyms."""
+ """Build a human-readable definition for this word, including data for each synset"""
tw = TextWrapper(width=self.LINE_WIDTH_MAX,
initial_indent=(self.prefix_fmtf_line_first % self.category_map_rev[self.category]),
- subsequent_indent=(self.prefix_fmtn_line_first % (' '*len(self.category_map_rev[self.category]))))
+ subsequent_indent=(self.prefix_fmtn_line_first % (len(self.category_map_rev[self.category]), '')))
lines = (tw.wrap(self.synsets[0].synset_get().dict_str()))
i = 2
+
+ prefix_fmtn_line_nonfirst = self.prefix_fmtn_line_nonfirst
+ pfln_len = 0
for ss_wrap in self.synsets[1:]:
+ # adjust indenting based on index-number with
+ pfln_len_new = len('%d' % (i,))
+ if (pfln_len_new > pfln_len):
+ pfln_len = pfln_len_new
+ pfln_str = (self.prefix_fmtn_line_nonfirst % (pfln_len, ''))
+
+ # format data for this synset
synset = ss_wrap.synset_get()
tw = TextWrapper(width=self.LINE_WIDTH_MAX,
initial_indent=(self.prefix_fmtf_line_nonfirst % i),
- subsequent_indent=self.prefix_fmtn_line_nonfirst)
+ subsequent_indent=pfln_str)
lines.extend(tw.wrap(synset.dict_str()))
+
i += 1
+
return self.linesep.join(lines)
@@ -209,9 +219,7 @@ class Synset:
return (rv, comments)
def dict_str(self):
- """Format this synset into a human-readable line-wrapped dict block.
-
- Takes an optional synset_map argument, to look up antonyms."""
+ """Format this synset into a human-readable line-wrapped dict block."""
rv = self.gloss.rstrip()
if (len(self.words) > 1):
rv += ' [syn: %s]' % (', '.join([('{%s}' % word) for word in self.words]))
@@ -399,7 +407,7 @@ if (__name__ == '__main__'):
op.add_option('-i', '--outindex', dest='oi', default='wn.index', help='filename of index file to write to')
op.add_option('-d', '--outdata', dest='od', default='wn.dict', help='filename of data file to write to')
op.add_option('--wn_url', dest='wn_url', default='ftp://ftp.cogsci.princeton.edu/pub/wordnet/2.0', help='URL for wordnet sources')
- op.add_option('--db_desc_short', dest='desc_short', default=' WordNet (r) 2.1 (2005)', help='short dict DB description')
+ op.add_option('--db_desc_short', dest='desc_short', default=' WordNet (r) 3.0 (2006)', help='short dict DB description')
op.add_option('--db_desc_long', dest='desc_long', default=' WordNet (r): A Lexical Database for English from the\n Cognitive Science Laboratory at Princeton University', help='long dict DB description')
(options, args) = op.parse_args()
|