File: common.py

package info (click to toggle)
frog 0.12.17-7.1
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 2,140 kB
  • ctags: 575
  • sloc: sh: 11,167; cpp: 5,146; python: 765; makefile: 38
file content (27 lines) | stat: -rwxr-xr-x 684 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import unicodedata


__all__ = ["ID", "FORM", "LEMMA", "CPOSTAG", "POSTAG",
		   "FEATS", "HEAD", "DEPREL", "PHEAD", "PDEPREL"]

ID, FORM, LEMMA, CPOSTAG, POSTAG, \
	FEATS, HEAD, DEPREL, PHEAD, PDEPREL = range(10)


def isScoringToken(token):
	for chr in token.decode("utf-8"):
		if unicodedata.category(chr) == "Po":
			return False

	return True


def pairIterator(sentence, options):
	for dependent in sentence:
		for head in sentence:
			if dependent is not head:
				if not options.skipNonScoring or \
					   isScoringToken(dependent[FORM]):
					dist = abs(int(dependent[ID]) - int(head[ID]))
					if not options.maxDist or dist <= options.maxDist:
						yield dependent, head