1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160
|
#!/usr/bin/env python
#---
# $Id: duali,v 1.17 2003/12/09 13:20:29 elzubeir Exp $
#
# ------------
# Description:
# ------------
#
# The Duali spellchecker
#
# (C) Copyright 2003, Arabeyes, Mohammed Elzubeir
# -----------------
# Revision Details: (Updated by Revision Control System)
# -----------------
# $Date: 2003/12/09 13:20:29 $
# $Author: elzubeir $
# $Revision: 1.17 $
# $Source: /home/arabeyes/cvs/projects/duali/pyduali/duali,v $
#
# This program is written under the BSD License.
#---
import sys, getopt, os, ConfigParser
import pyduali.aradict, pyduali.aralex, pyduali.araspell
#from pyduali.arabic import *
scriptname = os.path.splitext(os.path.basename(sys.argv[0]))[0]
scriptversion = '0.2.0'
conf = '/etc/duali.conf'
def usage():
"Display usage options"
print "(C) Copyright 2003, Arabeyes, Mohammed Elzubeir\n"
print "Usage: %s -c filename [OPTIONS]" % scriptname
print "\t[-h | --help ]\toutputs this usage message"
print "\t[-V | --version ]\tprogram version"
print "\t[-c | --check= filename]\tinput file to spellcheck"
print "\t[-C | --charset ]\tcharacter encoding (cp1256, utf-8)"
print "\t[-n | --normalize ]\tturn normalize mode off (default on)"
print "\t[-p | --path ]\tpath to dictionary database"
print "\t[-v | --verbose ]\tverbose out (for debugging)"
print "\r\nThis program is licensed under the BSD License\n"
def grabargs():
"Grab command-line arguments"
verbose = 0
charset = 'utf-8'
fname = ''
path, normalize = readconf()
if not sys.argv[1:]:
usage()
sys.exit(0)
try:
opts, args = getopt.getopt(sys.argv[1:], "hVvnC:p:c:",
["help", "version", "verbose", "normalize",
"charset=", "charset=", "path=", "check="],)
except getopt.GetoptError:
usage()
sys.exit(0)
for o, val in opts:
if o in ("-h", "--help"):
usage()
sys.exit(0)
if o in ("-V", "--version"):
print scriptversion
sys.exit(0)
if o in ("-v", "--verbose"):
verbose = 1
if o in ("-c", "--check"):
fname = val
if o in ("-n", "--normalize"):
normalize = 0
if o in ("-C", "--charset"):
charset = val
if o in ("-p", "--path"):
path = val
return (fname, charset, path, normalize, verbose)
def readconf():
"Read configuration file"
config = ConfigParser.ConfigParser()
try:
config.readfp(open(conf))
except IOError:
print "Warning. Unable to open '%s' configuration file" % conf
return ('', 1)
if (not config.has_section('Main')):
print "Bad config file. Please refer to documentation. Exiting.."
sys.exit(1)
for opt in config.options('Main'):
dict_path = config.get('Main', 'DictPath')
normalize = config.getboolean('Main', 'Normalize')
return (dict_path, normalize)
def main():
"Main function"
fname, charset, path, normalize, verbose = grabargs()
if sys.version_info[0] < 2 or sys.version_info[1] < 2:
print """
%s requires Python 2.2.x at least. Please upgrade your Python version.
""" % scriptname
sys.exit(0)
if (not fname):
print "I need a file to check!"
usage()
sys.exit(0)
#
# initialize the aralex, aradict and araspell classes
#
mylex = pyduali.aralex.aralex(nostem=1, verbose=verbose)
if (len(path) is not 0):
mydict = pyduali.aradict.aradict(data_path=path, verbose=verbose)
else:
mydict = pyduali.aradict.aradict(verbose=verbose)
myspell = pyduali.araspell.araspell(charset=charset, verbose=verbose,
mydict=mydict, mylex=mylex)
# initialize the line counter
line_no = 1
lines = open(fname, 'r').readlines()
for line in lines:
line = mylex.stripextras(line)
line = mylex.stripPunctuations(line.decode(charset)).encode(charset)
line = mylex.stripDiacritics(line.decode(charset)).encode(charset)
words = line.split(' ')
word_no = 1
for word in words:
word = word.strip()
if (verbose):
print "Word: [%s]" % word
if (len(word) is not 0):
if (normalize):
word = mylex.normalize(word.decode(charset)).encode(charset)
if (not myspell.spellcheck(word)):
print "Line: %d Word %d: Incorrect: [%s]" % (line_no, word_no, word)
word_no += 1
line_no += 1
sys.exit(0)
if __name__ == "__main__":
main()
|