File: spellcheck.py

package info (click to toggle)
python-jtoolkit 0.7.8-2
links: PTS
area: main
in suites: etch, etch-m68k
size: 1,436 kB
ctags: 2,536
sloc: python: 15,143; makefile: 20
file content (150 lines) | stat: -rwxr-xr-x 5,203 bytes
parent folder | download | duplicates (2)
#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""simple python wrapper for aspell"""

# Copyright 2004 St James Software
# 
# This file is part of jToolkit.
#
# jToolkit is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
# 
# jToolkit is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with jToolkit; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

import os
try:
    import subprocess
except ImportError:
    subprocess = None

class SpellingError(Exception):
    pass

def find_aspell():
    """
    Locate Aspell's executables in the system.
    """
    aspellname = "aspell"
    if os.name == "nt":
        aspellname += os.extsep + "exe"
    for path in os.environ.get('PATH', None).split(os.pathsep):
        if os.path.exists(os.path.join(path, aspellname)):
            return os.path.join(path, aspellname)
    # try check in the registry otherwise...
    if os.name == "nt":
        from jToolkit import winreg
        import win32con
        hklm_key = winreg.regkey(win32con.HKEY_LOCAL_MACHINE, "Software").childkey("Aspell")
        hkcu_key = winreg.regkey(win32con.HKEY_CURRENT_USER, "Software").childkey("Aspell")
        for aspell_key in (hklm_key, hkcu_key):
            try:
                path = aspell_key.getvalue("Path")
            except AttributeError:
                continue
            path = path[0]
            if os.path.exists(os.path.join(path, aspellname)):
                return os.path.join(path, aspellname)
    # this will just look in the current directory
    if os.path.exists(aspellname):
        return aspellname
    else:
        raise SpellingError("Could not find aspell executable")

def have_checker():
    """tests whether a spell checker is present"""
    try:
        find_aspell()
        return True
    except SpellingError:
        return False

def run_aspell(text, lang=None):
    """runs aspell on the given text, returning output and error_output"""
    aspellexe = find_aspell()

    # open a pipe to aspell, feed in the text to be checked, and get the result
    args = ['"%s"' % aspellexe, '-a']
    if lang:
        args.append("--lang=%s" % lang)
    if isinstance(text, unicode):
        text = text.encode('utf-8')
        # TODO: we can do this only if aspell supports --encoding (e.g. version 0.50, not version 0.33!)
        # args.append("--encoding=utf-8")
    command = " ".join(args)
    if subprocess:
        PIPE = subprocess.PIPE
        process = subprocess.Popen(command, stdin=PIPE, stdout=PIPE, stderr=PIPE, shell=True)
        input,output,error = process.stdin,process.stdout,process.stderr
    else:
        input,output,error = os.popen3(command)
    if text:
        input.write('!\n') # put aspell into terse mode (so no *'s)
        input.write(text)
    input.close()
    results = output.read()
    output.close()
    if isinstance(results, str):
        results = results.decode('utf-8')
    error_results = error.read()
    error.close()
    if isinstance(error_results, str):
        error_results = error_results.decode('utf-8')
    return results, error_results

lang_cache = {}
def can_check_lang(lang=None, cache=True):
    """checks whether the given language can be spellchecked"""
    if not have_checker():
        return False
    if cache and lang in lang_cache:
      return lang_cache[lang]
    results, error_results = run_aspell("", lang)
    can_check = not error_results
    if cache:
      lang_cache[lang] = can_check
    return can_check

def check(text=None,lang=None):
    """
    performs a spell check on the text and returns a list of tuples in the form
    (word,index,[suggestions])

    note:
    words in the returns sequence occur in the same order as they were found in the text
    """
    results, error_results = run_aspell(text, lang)
    if error_results.strip():
        raise IOError("error running aspell:\n"+error_results)
    # split it up and get rid of all those empty lines
    parts = [part.strip() for part in results.split('\n') if len(part.strip()) > 0]
    if len(parts):
        del parts[0] # the first line is always rubish

    # get all the mistakes into a nice list of tuples
    start=0
    mistakes = []
    for part in parts:
        if part[0] == '&':
            front,back = part.split(':')
            word,resultcount,indentlocation = front.split()[1:]
            suggestions = back.replace(' ','').split(',')
            index = text.find(word,start)
            lineno = text[:index].count('\n')+1
            start = index+1
            mistakes.append((word,index,suggestions))
        if part[0] == '#' and len(part[0].strip()) > 1:
            print part[0]
            word,indentlocation = part[0].split()[1:]

    return mistakes