1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176
|
import argparse
import json
import logging
import sys
from pathlib import Path
import wn
from wn import lmf
from wn._util import format_lexicon_specifier
from wn.project import iterpackages
from wn.validate import validate
def _download(args):
if args.index:
wn.config.load_index(args.index)
for target in args.target:
wn.download(target, add=args.add)
def _lexicons(args):
for lex in wn.lexicons(lang=args.lang, lexicon=args.lexicon):
print("\t".join((lex.id, lex.version, f"[{lex.language}]", lex.label)))
def _projects(args):
for info in wn.projects():
key = "i"
key += "c" if info["cache"] else "-"
# key += 'a' if False else '-' # TODO: check if project is added to db
print(
"\t".join(
(
key,
info["id"],
info["version"],
f"[{info['language'] or '---'}]",
info["label"] or "---",
)
)
)
def _validate(args):
all_valid = True
selectseq = [check.strip() for check in args.select.split(",")]
for package in iterpackages(args.FILE):
resource = lmf.load(package.resource_file())
for lexicon in resource["lexicons"]:
spec = format_lexicon_specifier(lexicon["id"], lexicon["version"])
print(f"{spec:<20}", end="")
report = validate(lexicon, select=selectseq)
if not any(check.get("items", []) for check in report.values()):
print("passed")
else:
print("failed")
all_valid = False
# clean up report
for code in list(report):
if not report[code].get("items"):
del report[code]
if args.output_file:
with open(args.output_file, "w") as outfile:
json.dump(report, outfile, indent=2)
else:
for _code, check in report.items():
if not check["items"]:
continue
print(f" {check['message']}")
for id, context in check["items"].items():
print(f" {id}: {context}" if context else f" {id}")
sys.exit(0 if all_valid else 1)
def _path_type(arg):
return Path(arg)
def _file_path_type(arg):
path = Path(arg)
if not path.is_file():
raise argparse.ArgumentTypeError(f"cannot file file: {arg}")
return path
parser = argparse.ArgumentParser(
prog="python3 -m wn",
description="Manage Wn's wordnet data from the command line.",
)
parser.add_argument("-V", "--version", action="version", version=f"Wn {wn.__version__}")
parser.add_argument(
"-v",
"--verbose",
action="count",
dest="verbosity",
default=0,
help="increase verbosity (can repeat: -vv, -vvv)",
)
parser.add_argument(
"-d",
"--dir",
type=_path_type,
help="data directory for Wn's database and cache",
)
parser.set_defaults(func=lambda _: parser.print_help())
sub_parsers = parser.add_subparsers(title="subcommands")
parser_download = sub_parsers.add_parser(
"download",
description="Download wordnets and add them to Wn's database.",
help="download wordnets",
)
parser_download.add_argument("target", nargs="+", help="project specifiers or URLs")
parser_download.add_argument(
"--index", type=_file_path_type, help="project index to use for downloading"
)
parser_download.add_argument(
"--no-add",
action="store_false",
dest="add",
help="download and cache without adding to the database",
)
parser_download.set_defaults(func=_download)
parser_lexicons = sub_parsers.add_parser(
"lexicons",
description="Display a list of installed lexicons.",
help="list installed lexicons",
)
parser_lexicons.add_argument("-l", "--lang", help="BCP 47 language code")
parser_lexicons.add_argument("--lexicon", help="lexicon specifiers")
parser_lexicons.set_defaults(func=_lexicons)
parser_projects = sub_parsers.add_parser(
"projects",
description=(
"Display a list of known projects. The first column shows the "
"status for a project (i=indexed, c=cached)."
),
help="list known projects",
)
parser_projects.set_defaults(func=_projects)
parser_validate = sub_parsers.add_parser(
"validate",
description=("Validate a WN-LMF lexicon"),
help="validate a lexicon",
)
parser_validate.add_argument(
"FILE", type=_file_path_type, help="WN-LMF (XML) lexicon file to validate"
)
parser_validate.add_argument(
"--select",
metavar="CHECKS",
default="E,W",
help="comma-separated list of checks to run (default: E,W)",
)
parser_validate.add_argument(
"--output-file", metavar="FILE", help="write report to a JSON file"
)
parser_validate.set_defaults(func=_validate)
args = parser.parse_args()
logging.basicConfig(level=logging.ERROR - (min(args.verbosity, 3) * 10))
if args.dir:
wn.config.data_directory = args.dir
args.func(args)
|