File: morphlength_from_annotations.py

package info (click to toggle)
morfessor 2.0.6-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 332 kB
  • sloc: python: 2,456; makefile: 147
file content (21 lines) | stat: -rw-r--r-- 513 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
from __future__ import division
import fileinput


def main():
    tot_morph_count = 0
    tot_length = 0

    for line in fileinput.input():
        word, segm = line.strip().split(None, 1)
        segmentations = segm.split(',')
        num_morphs = [len([x for x in s.split(None) if x.strip().strip("~") != ""]) for s in segmentations]

        tot_morph_count += sum(num_morphs) / len(num_morphs)
        tot_length += len(word)

    print(tot_length / tot_morph_count)


if __name__ == "__main__":
    main()