File: tagsets.Rd

package info (click to toggle)
r-cran-nlp 0.1-9-1~bpo8%2B1
  • links: PTS, VCS
  • area: main
  • in suites: jessie-backports
  • size: 376 kB
  • sloc: makefile: 1
file content (61 lines) | stat: -rw-r--r-- 1,930 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
\name{tagsets}
\alias{Penn_Treebank_POS_tags}
\alias{Brown_POS_tags}
\alias{Universal_POS_tags}
\alias{Universal_POS_tags_map}
\title{NLP Tag Sets}
\description{
  Tag sets frequently used in Natural Language Processing.
}
\usage{
Penn_Treebank_POS_tags
Brown_POS_tags
Universal_POS_tags
Universal_POS_tags_map
}
\details{
  \code{Penn_Treebank_POS_tags} and \code{Brown_POS_tags} provide,
  respectively, the Penn Treebank POS tags 
  (\url{http://www.cis.upenn.edu/~treebank}) and the POS tags used for
  the Brown corpus
  (\url{http://www.hit.uib.no/icame/brown/bcm.html}), both as data
  frames with the following variables:
  \describe{
    \item{entry}{a character vector with the POS tags}
    \item{description}{a character vector with short descriptions of the
      tags}
    \item{examples}{a character vector with examples for the tags}
  }

  \code{Universal_POS_tags} provides the universal POS tagset introduced
  by Slav Petrov, Dipanjan Das, and Ryan McDonald
  (\url{http://arxiv.org/abs/1104.2086}), as a data frame with character
  variables \code{entry} and \code{description}.

  \code{Universal_POS_tags_map} is a named list of mappings from
  language and treebank specific POS tagsets to the universal POS tags,
  with elements named \samp{en-ptb} and \samp{en-brown} giving the
  mappings, respectively, for the Penn Treebank and Brown POS tags.
}
\source{
  \url{http://www.comp.leeds.ac.uk/ccalas/tagsets/upenn.html},
  \url{http://www.comp.leeds.ac.uk/ccalas/tagsets/brown.html},
  \url{https://code.google.com/p/universal-pos-tags/}.
}
\examples{
## Penn Treebank POS tags
dim(Penn_Treebank_POS_tags)
## Inspect first 20 entries:
write.dcf(head(Penn_Treebank_POS_tags, 20L))

## Brown POS tags
dim(Brown_POS_tags)
## Inspect first 20 entries:
write.dcf(head(Brown_POS_tags, 20L))

## Universal POS tags
Universal_POS_tags

## Available mappings to universal POS tags
names(Universal_POS_tags_map)
}