1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166
|
<?xml version="1.0" encoding="ISO-8859-1"?>
<!--
-
- This file is part of the OpenLink Software Virtuoso Open-Source (VOS)
- project.
-
- Copyright (C) 1998-2018 OpenLink Software
-
- This project is free software; you can redistribute it and/or modify it
- under the terms of the GNU General Public License as published by the
- Free Software Foundation; only version 2 of the License, dated June 1991.
-
- This program is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License along
- with this program; if not, write to the Free Software Foundation, Inc.,
- 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
-
-->
<refentry id="fn_ttlp_mt_local_file">
<refmeta>
<refentrytitle>DB.DBA.TTLP_MT_LOCAL_FILE</refentrytitle>
<refmiscinfo>rdf</refmiscinfo>
</refmeta>
<refnamediv>
<refname>DB.DBA.TTLP_MT_LOCAL_FILE</refname>
<refpurpose>parses TTL (TURTLE or N3 resource) and places its triples into DB.DBA.RDF_QUAD on multiple threads.</refpurpose>
</refnamediv>
<refsynopsisdiv>
<funcsynopsis id="fsyn_ttlp_mt_local_file">
<funcprototype id="fproto_ttlp_mt_local_file">
<funcdef><function>DB.DBA.TTLP_MT_LOCAL_FILE</function></funcdef>
<paramdef>in <parameter>path</parameter> varchar</paramdef>
<paramdef>in <parameter>base</parameter> varchar</paramdef>
<paramdef><optional>in <parameter>graph</parameter> varchar</optional></paramdef>
<paramdef><optional>in <parameter>flags</parameter> integer</optional></paramdef>
<paramdef><optional>in <parameter>log_mode</parameter> integer</optional></paramdef>
<paramdef><optional>in <parameter>threads</parameter> integer</optional></paramdef>
</funcprototype>
</funcsynopsis>
</refsynopsisdiv>
<refsect1 id="desc_ttlp_mt_local_file">
<title>Description</title>
<para>Loads the TTL (TURTLE or N3 resource) file on multiple threads, using parallel I/O and multiprocessing if available. The function commit partial transactions while it runs so the transaction log may contain part of loading. Moreover, the function may or may not leave a transaction log, depending on <parameter>log_mode</parameter>. Hence, after successful load, one may need to execute the checkpoint statement to make sure that a server restart does not wipe out the results.</para>
</refsect1>
<refsect1 id="params_ttlp_mt_local_file">
<title>Parameters</title>
<refsect2><title>path</title>
<para>path to the file in the local filesystem</para>
</refsect2>
<refsect2><title>base</title>
<para>base IRI to resolve relative IRIs to absolute</para>
</refsect2>
<refsect2><title>graph</title>
<para>target graph IRI, parsed triples will appear in that graph.</para>
</refsect2>
<refsect2><title>flags</title>
<para>bitmask of parsing flags. Permits some sorts of syntax errors in resource.
Default is 0, meaning no permitted deviations from the spec. Other supported bits are:</para>
<programlisting><![CDATA[
1 - Single quoted and double quoted strings may with newlines.
2 - Allows bnode predicates (but SPARQL processor may ignore them!).
4 - Allows variables, but triples with variables are ignored.
8 - Allows literal subjects, but triples with them are ignored.
16 - Allows '/', '#', '%' and '+' in local part of QName ("Qname with path")
32 - Allows invalid symbols between '<' and '>', i.e. in relative IRIs.
64 - Relax TURTLE syntax to include popular violations.
128 - Try to recover from lexical errors as much as it is possible.
256 - Allows Trig syntax, thus loading data in more than one graph.
512 - Allows loading N-quad dataset files with and optional context value to indicate provenance as detailed
<ulink url="http://sw.deri.org/2008/07/n-quads/">here</ulink>.
]]></programlisting>
</refsect2>
<refsect2><title>log_mode</title>
<para>detail level of writing the effect of loading to the transaction log. 0 means log nothing, 1 means log only allocations of internal IDs for new IRIs and literals, 2 means log everything. If database crashes when the loading is in progress or after the loading but before checkpoint is made, 0 will means that the database become inconsistent, 1 means that the database is consistent but loaded quads may disappear so the loading should be repeated and log replay may produce wrong results if actions in it depend on the content of quad store, 2 means no danger (so the default is 2). Hence loading with mode 1 and especially mode 0 are faster than usual mode 2 but they require checkpoints after data loadings and mode 0 additionally requires a checkpoint and database backup right before the loading.</para>
</refsect2>
<refsect2><title>threads</title>
<para>number of threads that insert quads into the database. It should not be less than 1, obviously; it is better to not set it greater than <emphasis>((N-2)/k)-1</emphasis> where <emphasis>N</emphasis> is the number of available CPU cores and <emphasis>k</emphasis> is the number of loadings that happen at the same time.</para>
</refsect2>
</refsect1>
<refsect1 id="ret_ttlp_mt_local_file"><title>Return Types</title>
<para>The return value is not specified and may be changed in future versions.</para>
</refsect1>
<refsect1 id="examples_ttlp_mt_local_file">
<title>Examples</title>
<example id="ex_ttlp_mt_local_file"><title></title>
<para><emphasis>Sample Example 1</emphasis></para>
<screen><![CDATA[
SQL>DB.DBA.TTLP_MT_LOCAL_FILE ('tmp/users.ttl', '', 'http://example.com');
Done. -- 381 msec.
SQL>sparql
select *
from <http://example.com>
where {?s ?p ?o}
limit 10;
s p o
VARCHAR VARCHAR VARCHAR
_______________________________________________________________________________
http://www.openlinksw.com/virtrdf-data-formats#SysUsers-GranteeId-format http://www.w3.org/1999/02/22-rdf-syntax-ns#type http://www.openlinksw.com/schemas/virtrdf#QuadMapFormat
http://www.openlinksw.com/virtrdf-data-formats#SysUsers-GranteeId-format http://www.openlinksw.com/schemas/virtrdf#qmfSuperFormats nodeID://1000272018
http://www.openlinksw.com/virtrdf-data-formats#SysUsers-GranteeId-format http://www.openlinksw.com/schemas/virtrdf#inheritFrom http://www.openlinksw.com/virtrdf-data-formats#sql-integer-uri-fn
http://www.openlinksw.com/virtrdf-data-formats#SysUsers-GranteeId-format http://www.openlinksw.com/schemas/virtrdf#noInherit http://www.openlinksw.com/schemas/virtrdf#qmfCustomString1
http://www.openlinksw.com/virtrdf-data-formats#SysUsers-GranteeId-format http://www.openlinksw.com/schemas/virtrdf#qmfCustomString1 DB.DBA.RDF_DF_GRANTEE_ID_URI
http://www.openlinksw.com/virtrdf-data-formats#SysUsers-UserId-format http://www.w3.org/1999/02/22-rdf-syntax-ns#type http://www.openlinksw.com/schemas/virtrdf#QuadMapFormat
http://www.openlinksw.com/virtrdf-data-formats#SysUsers-UserId-format http://www.openlinksw.com/schemas/virtrdf#qmfSuperFormats nodeID://1000272019
http://www.openlinksw.com/virtrdf-data-formats#SysUsers-UserId-format http://www.openlinksw.com/schemas/virtrdf#inheritFrom http://www.openlinksw.com/virtrdf-data-formats#sql-integer-uri
http://www.openlinksw.com/virtrdf-data-formats#SysUsers-UserId-format http://www.openlinksw.com/schemas/virtrdf#noInherit http://www.openlinksw.com/schemas/virtrdf#qmfCustomString1
http://www.openlinksw.com/virtrdf-data-formats#SysUsers-UserId-format http://www.openlinksw.com/schemas/virtrdf#qmfCustomString1 http://example.com/sys/user?id=%d
10 Rows. -- 30 msec.
]]></screen>
<para><emphasis>Sample Example 2</emphasis></para>
<screen><![CDATA[
SQL>create procedure SPARQL_DAWG_LOAD_REMOTE_DATFILE (in full_uri varchar, in in_resultset integer := 0)
{
declare REPORT varchar;
declare graph_uri, dattext varchar;
declare app_env any;
app_env := null;
whenever sqlstate '*' goto err_rep;
if (not in_resultset)
result_names (REPORT);
dattext := cast (XML_URI_GET_AND_CACHE (full_uri) as varchar);
SPARQL_REPORT (sprintf ('Downloading %s: %d bytes',
full_uri, length (dattext) ) );
graph_uri := full_uri;
delete from RDF_QUAD where G = DB.DBA.RDF_MAKE_IID_OF_QNAME (graph_uri);
if ((full_uri like '%.ttl') or (full_uri like '%.nt') or (full_uri like '%.n3'))
DB.DBA.TTLP_MT_LOCAL_FILE (dattext, full_uri, graph_uri);
else -- if (rel_path like '%.rdf')
DB.DBA.RDF_LOAD_RDFXML_MT (dattext, full_uri, graph_uri);
return graph_uri;
err_rep:
result (sprintf ('%s: %s', __SQL_STATE, __SQL_MESSAGE));
return graph_uri;
}
;
Done. -- 891 msec
SQL> select SPARQL_DAWG_LOAD_REMOTE_DATFILE('http://www.openlinksw.com/dataspace/kidehen@openlinksw.com/weblog/kidehen@openlinksw.com%27s%20BLOG%20%5B127%5D/sioc.ttl');
REPORT
VARCHAR
_______________________________________________________________________________
Downloading http://www.openlinksw.com/dataspace/kidehen@openlinksw.com/weblog/kidehen@openlinksw.com%27s%20BLOG%20%5B127%5D/sioc.ttl: 12768 bytes
http://www.openlinksw.com/dataspace/kidehen@openlinksw.com/weblog/kidehen@openlinksw.com%27s%20BLOG%20%5B127%5D/sioc.ttl
2 Rows. -- 1382 msec.
]]></screen>
</example>
</refsect1>
<refsect1 id="seealso_ttlp_mt_local_file">
<title>See Also</title>
<para><link linkend="fn_rdf_audit_metadata"><function>DB.DBA.RDF_AUDIT_METADATA()</function></link></para>
<para><link linkend="fn_rdf_backup_metadata"><function>DB.DBA.RDF_BACKUP_METADATA()</function></link></para>
<para><link linkend="fn_rdf_load_rdfxml"><function>DB.DBA.RDF_LOAD_RDFXML()</function></link></para>
<para><link linkend="fn_rdf_load_rdfxml_mt"><function>DB.DBA.RDF_LOAD_RDFXML_MT()</function></link></para>
<para><link linkend="fn_ttlp"><function>DB.DBA.TTLP()</function></link></para>
<para><link linkend="fn_ttlp_mt"><function>DB.DBA.TTLP_MT()</function></link></para>
</refsect1>
</refentry>
|