File: common.py

package info (click to toggle)

frog 0.12.15-3

links: PTS, VCS
area: main
in suites: wheezy
size: 2,096 kB
sloc: sh: 11,167; cpp: 4,661; python: 765; makefile: 32

file content (27 lines) | stat: -rwxr-xr-x 684 bytes

parent folder | download | duplicates (2)

import unicodedata


__all__ = ["ID", "FORM", "LEMMA", "CPOSTAG", "POSTAG",
		   "FEATS", "HEAD", "DEPREL", "PHEAD", "PDEPREL"]

ID, FORM, LEMMA, CPOSTAG, POSTAG, \
	FEATS, HEAD, DEPREL, PHEAD, PDEPREL = range(10)


def isScoringToken(token):
	for chr in token.decode("utf-8"):
		if unicodedata.category(chr) == "Po":
			return False

	return True


def pairIterator(sentence, options):
	for dependent in sentence:
		for head in sentence:
			if dependent is not head:
				if not options.skipNonScoring or \
					   isScoringToken(dependent[FORM]):
					dist = abs(int(dependent[ID]) - int(head[ID]))
					if not options.maxDist or dist <= options.maxDist:
						yield dependent, head