File: dict.py

package info (click to toggle)
phenny 2~hg28-3
links: PTS, VCS
area: main
in suites: buster, stretch
size: 448 kB
ctags: 190
sloc: python: 6,520; sh: 607; makefile: 15
file content (123 lines) | stat: -rwxr-xr-x 3,811 bytes
parent folder | download | duplicates (3)
#!/usr/bin/env python
"""
dict.py - Phenny Dictionary Module
Copyright 2008, Sean B. Palmer, inamidst.com
Licensed under the Eiffel Forum License 2.

http://inamidst.com/phenny/
"""

import re, urllib
import web
from tools import deprecated

formuri = 'http://wordnet.princeton.edu/perl/webwn?s='

r_li = re.compile(r'(?ims)<li>.*?</li>')
r_tag = re.compile(r'<[^>]+>')
r_parens = re.compile(r'(?<=\()(?:[^()]+|\([^)]+\))*(?=\))')
r_word = re.compile(r'^[A-Za-z0-9\' -]+$')

@deprecated
def f_wordnet(self, origin, match, args): 
   """Gives the definition of a word using Wordnet."""
   command = 'w'
   term = match.group(2)
   term = term.encode('utf-8')

   if origin.sender != '#inamidst': 
      if not r_word.match(term): 
         msg = "Words must match the regexp %s" % r'^[A-Za-z0-9\' -]+$'
         return self.msg(origin.sender, origin.nick + ": " + msg)
      if ('--' in term) or ("''" in term) or ('  ' in term): 
        self.msg(origin.sender, origin.nick + ": That's not in WordNet.")
        return

   bytes = web.get(formuri + web.urllib.quote(term)) # @@ ugh!
   items = r_li.findall(bytes)

   nouns, verbs, adjectives = [], [], []
   for item in items: 
      item = r_tag.sub('', item)
      chunks = r_parens.findall(item)
      # self.msg(origin.sender, item)
      if len(chunks) < 2: continue

      kind, defn = chunks[0], chunks[1]
      if command != 'wordnet': 
         defn = defn.split(';')[0]
      if not defn: continue
      defn = defn[0].upper() + defn[1:]

      if kind == 'n': 
         nouns.append(defn)
      elif kind == 'v': 
         verbs.append(defn)
      elif kind == 'adj': 
         adjectives.append(defn)

   if not (nouns or verbs or adjectives): 
      self.msg(origin.sender, "I couldn't find '%s' in WordNet." % term)
      return

   while len(nouns + verbs + adjectives) > 3: 
      if len(nouns) >= len(verbs) and len(nouns) >= len(adjectives): 
         nouns.pop()
      elif len(verbs) >= len(nouns) and len(verbs) >= len(adjectives): 
         verbs.pop()
      elif len(adjectives) >= len(nouns) and len(adjectives) >= len(verbs): 
         adjectives.pop()

   if adjectives: 
      adjectives[-1] = adjectives[-1] + '.'
   elif verbs: 
      verbs[-1] = verbs[-1] + '.'
   elif nouns: 
      nouns[-1] = nouns[-1] + '.'

   for (i, defn) in enumerate(nouns): 
      self.msg(origin.sender, '%s n. %r: %s' % (term, i+1, defn))
   for (i, defn) in enumerate(verbs): 
      self.msg(origin.sender, '%s v. %r: %s' % (term, i+1, defn))
   for (i, defn) in enumerate(adjectives): 
      self.msg(origin.sender, '%s a. %r: %s' % (term, i+1, defn))
f_wordnet.commands = ['wordnet']
f_wordnet.priority = 'low'

uri = 'http://encarta.msn.com/dictionary_/%s.html'
r_info = re.compile(
   r'(?:ResultBody"><br /><br />(.*?)&nbsp;)|(?:<b>(.*?)</b>)'
)

def dict(phenny, input): 
   word = input.group(2)
   word = urllib.quote(word.encode('utf-8'))

   def trim(thing): 
      if thing.endswith('&nbsp;'): 
         thing = thing[:-6]
      return thing.strip(' :.')

   bytes = web.get(uri % word)
   results = {}
   wordkind = None
   for kind, sense in r_info.findall(bytes): 
      kind, sense = trim(kind), trim(sense)
      if kind: wordkind = kind
      elif sense: 
         results.setdefault(wordkind, []).append(sense)
   result = input.group(2).encode('utf-8') + ' - '
   for key in sorted(results.keys()): 
      if results[key]: 
         result += (key or '') + ' 1. ' + results[key][0]
         if len(results[key]) > 1: 
            result += ', 2. ' + results[key][1]
         result += '; '
   result = result.rstrip('; ')
   if result.endswith('-') and (len(result) < 30): 
      phenny.reply('Sorry, no definition found.')
   else: phenny.say(result)
dict.commands = ['dict']

if __name__ == '__main__': 
   print __doc__.strip()