File: cleannlp.R

package info (click to toggle)
r-cran-nlp 0.3-2-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 456 kB
  • sloc: makefile: 2
file content (61 lines) | stat: -rw-r--r-- 1,328 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
## Viewer methods for objects of class "cnlp_annotation" as obtained by
## cleanNLP::cnlp_annotate().

words.cnlp_annotation <-
function(x, ...)
{
    x$token$token
}

sents.cnlp_annotation <-
function(x, ...)
{
    x <- x$token
    split(x$token, x$sid)
}

## paras.cnlp_annotation <-
## function(x, ...)
## {
##     x <- x$token
##     if(is.na(match("pid", names(x))))
##         stop("unavailable paragraph ids")
##     lapply(split(x, x$pid),
##            function(e) split(e$token, e$sid))
## }

tagged_words.cnlp_annotation <-
function(x, which = c("upos", "pos"), ...)
{
    x <- x$token
    which <- match.arg(which)
    Tagged_Token(x$token, x[[which]])
}

tagged_sents.cnlp_annotation <-
function(x, which = c("upos", "pos"), ...)
{
    x <- x$token
    which <- match.arg(which)
    .tagged_sents_from_cnlp_token_frame(x, which)
}

.tagged_sents_from_cnlp_token_frame <-
function(x, which)
{
    lapply(split(x, x$sid),
           function(e) Tagged_Token(e$token, e[[which]]))
}

## tagged_paras.cnlp_annotation <-
## function(x, which = c("upos", "pos"), ...)
## {
##     x <- x$token
##     if(is.na(match("pid", names(x))))
##         stop("unavailable paragraph ids")
##     which <- match.arg(which)
##     lapply(split(x, x$pid),
##            .tagged_sents_from_cnlp_token_frame,
##            which)
## }