File: tagsets.Rd

package info (click to toggle)
r-cran-nlp 0.3-2-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 456 kB
  • sloc: makefile: 2
file content (63 lines) | stat: -rw-r--r-- 2,059 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
\name{tagsets}
\alias{Penn_Treebank_POS_tags}
\alias{Brown_POS_tags}
\alias{Universal_POS_tags}
\alias{Universal_POS_tags_map}
\title{NLP Tag Sets}
\description{
  Tag sets frequently used in Natural Language Processing.
}
\usage{
Penn_Treebank_POS_tags
Brown_POS_tags
Universal_POS_tags
Universal_POS_tags_map
}
\details{
  \code{Penn_Treebank_POS_tags} and \code{Brown_POS_tags} provide,
  respectively, the Penn Treebank POS tags
  (\url{https://catalog.ldc.upenn.edu/docs/LDC95T7/cl93.html}, Table 2)
  and the POS tags used for the Brown corpus
  (\url{https://en.wikipedia.org/wiki/Brown_Corpus}),
  both as data frames with the following variables:
  \describe{
    \item{entry}{a character vector with the POS tags}
    \item{description}{a character vector with short descriptions of the
      tags}
    \item{examples}{a character vector with examples for the tags}
  }

  \code{Universal_POS_tags} provides the universal POS tagset introduced
  by Slav Petrov, Dipanjan Das, and Ryan McDonald
  (\doi{10.48550/arXiv.1104.2086}), as a data frame with character
  variables \code{entry} and \code{description}.

  \code{Universal_POS_tags_map} is a named list of mappings from
  language and treebank specific POS tagsets to the universal POS tags,
  with elements named \samp{en-ptb} and \samp{en-brown} giving the
  mappings, respectively, for the Penn Treebank and Brown POS tags.
}
\source{
  \url{https://catalog.ldc.upenn.edu/docs/LDC95T7/cl93.html},
  % As of 2024-07, \url{http://www.hit.uib.no/icame/brown/bcm.html} no
  % longer works, so use the download info from Wikipedia.
  \url{http://www.nltk.org/nltk_data/},
  \url{https://github.com/slavpetrov/universal-pos-tags}.
}
\examples{
## Penn Treebank POS tags
dim(Penn_Treebank_POS_tags)
## Inspect first 20 entries:
write.dcf(head(Penn_Treebank_POS_tags, 20L))

## Brown POS tags
dim(Brown_POS_tags)
## Inspect first 20 entries:
write.dcf(head(Brown_POS_tags, 20L))

## Universal POS tags
Universal_POS_tags

## Available mappings to universal POS tags
names(Universal_POS_tags_map)
}