File: common.py

package info (click to toggle)
frog 0.12.15-3
  • links: PTS, VCS
  • area: main
  • in suites: wheezy
  • size: 2,096 kB
  • sloc: sh: 11,167; cpp: 4,661; python: 765; makefile: 32
file content (27 lines) | stat: -rwxr-xr-x 684 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import unicodedata


__all__ = ["ID", "FORM", "LEMMA", "CPOSTAG", "POSTAG",
		   "FEATS", "HEAD", "DEPREL", "PHEAD", "PDEPREL"]

ID, FORM, LEMMA, CPOSTAG, POSTAG, \
	FEATS, HEAD, DEPREL, PHEAD, PDEPREL = range(10)


def isScoringToken(token):
	for chr in token.decode("utf-8"):
		if unicodedata.category(chr) == "Po":
			return False

	return True


def pairIterator(sentence, options):
	for dependent in sentence:
		for head in sentence:
			if dependent is not head:
				if not options.skipNonScoring or \
					   isScoringToken(dependent[FORM]):
					dist = abs(int(dependent[ID]) - int(head[ID]))
					if not options.maxDist or dist <= options.maxDist:
						yield dependent, head