1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229
|
<?xml version="1.0" encoding="iso-8859-1"?>
<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.3//EN"
"http://www.oasis-open.org/docbook/xml/4.3/docbookx.dtd" [
<!ENTITY version SYSTEM "version.xml">
]>
<refentry id="raptor-section-unicode">
<refmeta>
<refentrytitle role="top_of_page">Unicode</refentrytitle>
<manvolnum>3</manvolnum>
<refmiscinfo>RAPTOR Library</refmiscinfo>
</refmeta>
<refnamediv>
<refname>Unicode</refname>
<refpurpose>Unicode and UTF-8 utility functions.</refpurpose>
<!--[<xref linkend="desc" endterm="desc.title"/>]-->
</refnamediv>
<refsynopsisdiv role="synopsis">
<title role="synopsis.title">Synopsis</title>
<synopsis>
<link linkend="int">int</link> <link linkend="raptor-unicode-char-to-utf8">raptor_unicode_char_to_utf8</link> (long <link linkend="c">c</link> ,
unsigned <link linkend="char">char</link> *output);
<link linkend="int">int</link> <link linkend="raptor-utf8-to-unicode-char">raptor_utf8_to_unicode_char</link> (unsigned <link linkend="long">long</link> *output,
unsigned <link linkend="char">char</link> *input,
<link linkend="int">int</link> length);
<link linkend="int">int</link> <link linkend="raptor-unicode-is-xml11-namestartchar">raptor_unicode_is_xml11_namestartchar</link>
(long <link linkend="c">c</link> );
<link linkend="int">int</link> <link linkend="raptor-unicode-is-xml10-namestartchar">raptor_unicode_is_xml10_namestartchar</link>
(long <link linkend="c">c</link> );
<link linkend="int">int</link> <link linkend="raptor-unicode-is-xml11-namechar">raptor_unicode_is_xml11_namechar</link>
(long <link linkend="c">c</link> );
<link linkend="int">int</link> <link linkend="raptor-unicode-is-xml10-namechar">raptor_unicode_is_xml10_namechar</link>
(long <link linkend="c">c</link> );
<link linkend="int">int</link> <link linkend="raptor-utf8-check">raptor_utf8_check</link> (unsigned <link linkend="char">char</link> *string,
<link linkend="size-t">size_t</link> length);
</synopsis>
</refsynopsisdiv>
<refsect1 role="desc">
<title role="desc.title">Description</title>
<para>
Functions to support converting to and from Unicode written in UTF-8
which is the native internal string format of all the redland libraries.
Includes checking for Unicode names using either the XML 1.0 or XML 1.1
rules.
</para>
</refsect1>
<refsect1 role="details">
<title role="details.title">Details</title>
<refsect2>
<title><anchor id="raptor-unicode-char-to-utf8" role="function"/>raptor_unicode_char_to_utf8 ()</title>
<indexterm><primary>raptor_unicode_char_to_utf8</primary></indexterm><programlisting><link linkend="int">int</link> raptor_unicode_char_to_utf8 (long <link linkend="c">c</link> ,
unsigned <link linkend="char">char</link> *output);</programlisting>
<para>
Convert a Unicode character to UTF-8 encoding.
</para>
<para>
Based on <link linkend="librdf-unicode-char-to-utf8"><function>librdf_unicode_char_to_utf8()</function></link> with no need to calculate
length since the encoded character is always copied into a buffer
with sufficient size.</para>
<para>
</para><variablelist role="params">
<varlistentry><term><parameter>Param1</parameter> :</term>
<listitem><simpara>
</simpara></listitem></varlistentry>
<varlistentry><term><parameter>output</parameter> :</term>
<listitem><simpara> UTF-8 string buffer or NULL
</simpara></listitem></varlistentry>
<varlistentry><term><emphasis>Returns</emphasis> :</term><listitem><simpara> bytes encoded to output buffer or <0 on failure
</simpara></listitem></varlistentry>
</variablelist></refsect2>
<refsect2>
<title><anchor id="raptor-utf8-to-unicode-char" role="function"/>raptor_utf8_to_unicode_char ()</title>
<indexterm><primary>raptor_utf8_to_unicode_char</primary></indexterm><programlisting><link linkend="int">int</link> raptor_utf8_to_unicode_char (unsigned <link linkend="long">long</link> *output,
unsigned <link linkend="char">char</link> *input,
<link linkend="int">int</link> length);</programlisting>
<para>
Convert an UTF-8 encoded buffer to a Unicode character.
</para>
<para>
If output is NULL, then will calculate the number of bytes that
will be used from the input buffer and not perform the conversion.</para>
<para>
</para><variablelist role="params">
<varlistentry><term><parameter>output</parameter> :</term>
<listitem><simpara> Pointer to the Unicode character or NULL
</simpara></listitem></varlistentry>
<varlistentry><term><parameter>input</parameter> :</term>
<listitem><simpara> UTF-8 string buffer
</simpara></listitem></varlistentry>
<varlistentry><term><parameter>length</parameter> :</term>
<listitem><simpara> buffer size
</simpara></listitem></varlistentry>
<varlistentry><term><emphasis>Returns</emphasis> :</term><listitem><simpara> bytes used from input buffer or <0 on failure: -1 input buffer too short or length error, -2 overlong UTF-8 sequence, -3 illegal code positions, -4 code out of range U+0000 to U+10FFFF. In cases -2, -3 and -4 the coded character is stored in the output.
</simpara></listitem></varlistentry>
</variablelist></refsect2>
<refsect2>
<title><anchor id="raptor-unicode-is-xml11-namestartchar" role="function"/>raptor_unicode_is_xml11_namestartchar ()</title>
<indexterm><primary>raptor_unicode_is_xml11_namestartchar</primary></indexterm><programlisting><link linkend="int">int</link> raptor_unicode_is_xml11_namestartchar
(long <link linkend="c">c</link> );</programlisting>
<para>
Check if Unicode character is legal to start an XML 1.1 Name
</para>
<para>
Namespaces in XML 1.1 REC 2004-02-04
http://www.w3.org/TR/2004/REC-xml11-20040204/<link linkend="NT-NameStartChar"><type>NT-NameStartChar</type></link>
updating
Extensible Markup Language (XML) 1.1 REC 2004-02-04
http://www.w3.org/TR/2004/REC-xml11-20040204/ sec 2.3, [4a]
excluding the ':'</para>
<para>
</para><variablelist role="params">
<varlistentry><term><parameter>Param1</parameter> :</term>
<listitem><simpara>
</simpara></listitem></varlistentry>
<varlistentry><term><emphasis>Returns</emphasis> :</term><listitem><simpara> non-0 if legal
</simpara></listitem></varlistentry>
</variablelist></refsect2>
<refsect2>
<title><anchor id="raptor-unicode-is-xml10-namestartchar" role="function"/>raptor_unicode_is_xml10_namestartchar ()</title>
<indexterm><primary>raptor_unicode_is_xml10_namestartchar</primary></indexterm><programlisting><link linkend="int">int</link> raptor_unicode_is_xml10_namestartchar
(long <link linkend="c">c</link> );</programlisting>
<para>
Check if Unicode character is legal to start an XML 1.0 Name
</para>
<para>
Namespaces in XML REC 1999-01-14
http://www.w3.org/TR/1999/REC-xml-names-19990114/<link linkend="NT-NCName"><type>NT-NCName</type></link>
updating
Extensible Markup Language (XML) 1.0 (Third Edition) REC 2004-02-04
http://www.w3.org/TR/2004/REC-xml-20040204/
excluding the ':'</para>
<para>
</para><variablelist role="params">
<varlistentry><term><parameter>Param1</parameter> :</term>
<listitem><simpara>
</simpara></listitem></varlistentry>
<varlistentry><term><emphasis>Returns</emphasis> :</term><listitem><simpara> non-0 if legal
</simpara></listitem></varlistentry>
</variablelist></refsect2>
<refsect2>
<title><anchor id="raptor-unicode-is-xml11-namechar" role="function"/>raptor_unicode_is_xml11_namechar ()</title>
<indexterm><primary>raptor_unicode_is_xml11_namechar</primary></indexterm><programlisting><link linkend="int">int</link> raptor_unicode_is_xml11_namechar
(long <link linkend="c">c</link> );</programlisting>
<para>
Check if a Unicode codepoint is a legal to continue an XML 1.1 Name
</para>
<para>
Namespaces in XML 1.1 REC 2004-02-04
http://www.w3.org/TR/2004/REC-xml11-20040204/
updating
Extensible Markup Language (XML) 1.1 REC 2004-02-04
http://www.w3.org/TR/2004/REC-xml11-20040204/ sec 2.3, [4a]
excluding the ':'</para>
<para>
</para><variablelist role="params">
<varlistentry><term><parameter>Param1</parameter> :</term>
<listitem><simpara>
</simpara></listitem></varlistentry>
<varlistentry><term><emphasis>Returns</emphasis> :</term><listitem><simpara> non-0 if legal
</simpara></listitem></varlistentry>
</variablelist></refsect2>
<refsect2>
<title><anchor id="raptor-unicode-is-xml10-namechar" role="function"/>raptor_unicode_is_xml10_namechar ()</title>
<indexterm><primary>raptor_unicode_is_xml10_namechar</primary></indexterm><programlisting><link linkend="int">int</link> raptor_unicode_is_xml10_namechar
(long <link linkend="c">c</link> );</programlisting>
<para>
Check if a Unicode codepoint is a legal to continue an XML 1.0 Name
</para>
<para>
Namespaces in XML REC 1999-01-14
http://www.w3.org/TR/1999/REC-xml-names-19990114/<link linkend="NT-NCNameChar"><type>NT-NCNameChar</type></link>
updating
Extensible Markup Language (XML) 1.0 (Third Edition) REC 2004-02-04
http://www.w3.org/TR/2004/REC-xml-20040204/
excluding the ':'</para>
<para>
</para><variablelist role="params">
<varlistentry><term><parameter>Param1</parameter> :</term>
<listitem><simpara>
</simpara></listitem></varlistentry>
<varlistentry><term><emphasis>Returns</emphasis> :</term><listitem><simpara> non-0 if legal
</simpara></listitem></varlistentry>
</variablelist></refsect2>
<refsect2>
<title><anchor id="raptor-utf8-check" role="function"/>raptor_utf8_check ()</title>
<indexterm><primary>raptor_utf8_check</primary></indexterm><programlisting><link linkend="int">int</link> raptor_utf8_check (unsigned <link linkend="char">char</link> *string,
<link linkend="size-t">size_t</link> length);</programlisting>
<para>
Check a string is UTF-8.</para>
<para>
</para><variablelist role="params">
<varlistentry><term><parameter>string</parameter> :</term>
<listitem><simpara> UTF-8 string
</simpara></listitem></varlistentry>
<varlistentry><term><parameter>length</parameter> :</term>
<listitem><simpara> length of string
</simpara></listitem></varlistentry>
<varlistentry><term><emphasis>Returns</emphasis> :</term><listitem><simpara> Non 0 if the string is UTF-8
</simpara></listitem></varlistentry>
</variablelist></refsect2>
</refsect1>
</refentry>
|