File: rdf_parse.R

package info (click to toggle)
r-cran-rdflib 0.2.9%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 596 kB
  • sloc: xml: 66; sh: 13; makefile: 2
file content (98 lines) | stat: -rw-r--r-- 3,137 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#' Parse RDF Files
#'
#' @param doc path, URL, or literal string of the rdf document to parse
#' @param format rdf serialization format of the doc,
#' one of "rdfxml", "nquads", "ntriples", "turtle"
#' or "jsonld". If not provided, will try to guess based
#' on file extension and fall back on rdfxml.
#' @param rdf an existing rdf triplestore to extend with triples from
#' the parsed file.  Default will create a new rdf object.
#' @param base the base URI to assume for any relative URIs (blank nodes)
#' @param ... additional parameters (not implemented)
#'
#' @return an rdf object, containing the redland world
#'  and model objects
#' @importClassesFrom redland World Storage Model Parser
#' @importMethodsFrom redland parseFileIntoModel freeParser
#' @export
#'
#' @examples
#' doc <- system.file("extdata", "dc.rdf", package="redland")
#' rdf <- rdf_parse(doc)
#'
rdf_parse <- function(doc,
                      format = c("guess",
                                 "rdfxml",
                                 "nquads",
                                 "ntriples",
                                 "turtle",
                                 "jsonld"),
                      rdf = NULL,
                      base = getOption("rdf_base_uri", "localhost://"),
                      ...){
  
  format <- match.arg(format)
  if(format == "guess"){
    format <- guess_format(doc)
  }
  
  ## if we get a string as input, we'll store it in tmp file here
  ## which we can later be sure to clean up.
  tmp_string <- tempfile()
  ## if we get json-ld, we'll need a temp location to serialize that too:
  tmp_json <- tempfile()
  
  # convert string input or url to local file
  doc <- text_or_url_to_doc(doc, tmp_string)
  
  ## redlands doesn't support jsonld. So rewrite as nquads using jsonld package
  ## We use tmp to avoid altering input doc, since parsing a local file should
  ## be a read-only task!
  if(format == "jsonld"){
    
    has_jsonld <- requireNamespace("jsonld", quietly = TRUE)
    if (!has_jsonld) {
      stop("please install the jsonld package to use this functionality.")
    }
    
    x <- jsonld::jsonld_to_rdf(doc, 
                               options = 
           list(base = getOption("rdf_base_uri", "localhost://"),
                format = "application/nquads"))
    writeLines(x, tmp_json)
    format <- "nquads"
    doc <- tmp_json
  }
  
  if(is.null(rdf)){
    rdf <- rdf()
  }
    
  mimetype <- unname(rdf_mimetypes[format])
  parser <- new("Parser", rdf$world, name = format, mimeType = mimetype)
  redland::parseFileIntoModel(parser, rdf$world, doc, rdf$model, baseUri = base)
  
  redland::freeParser(parser)
  unlink(tmp_string)
  unlink(tmp_json)  
  
  ## return rdf object (pointer)
  rdf
}



# rdf functions like working with local files
# this helper function allows us to also use URLs or strings
#' @importFrom utils download.file
text_or_url_to_doc <- function(x, tmp = tempfile()){
  if(file.exists(x)){
    return(x) 
  } else if(grepl("^https?://", x)) {
    utils::download.file(x, tmp, quiet = TRUE)
    return(tmp)
  } else {
    writeLines(x, tmp)
    return(tmp)
  }
}