File: tag.go

package info (click to toggle)
golang-github-jdkato-prose 1.1.0%2Bgit20171031.e27abfd-2
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 12,848 kB
  • sloc: python: 115; makefile: 55; sh: 21
file content (37 lines) | stat: -rw-r--r-- 952 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
/*
Package tag implements functions for tagging parts of speech.
*/
package tag

import "strings"

// Token represents a tagged section of text.
type Token struct {
	Text string
	Tag  string
}

// TupleSlice is a slice of tuples in the form (words, tags).
type TupleSlice [][][]string

// Len returns the length of a Tuple.
func (t TupleSlice) Len() int { return len(t) }

// Swap switches the ith and jth elements in a Tuple.
func (t TupleSlice) Swap(i, j int) { t[i], t[j] = t[j], t[i] }

// ReadTagged converts pre-tagged input into a TupleSlice suitable for training.
func ReadTagged(text, sep string) TupleSlice {
	t := TupleSlice{}
	for _, sent := range strings.Split(text, "\n") {
		tokens := []string{}
		tags := []string{}
		for _, token := range strings.Split(sent, " ") {
			parts := strings.Split(token, sep)
			tokens = append(tokens, parts[0])
			tags = append(tags, parts[1])
		}
		t = append(t, [][]string{tokens, tags})
	}
	return t
}