1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155
|
<?xml version="1.0" encoding="utf-8"?>
<!-- $Revision: 291316 $ -->
<refentry xml:id="function.grapheme-extract" xmlns="http://docbook.org/ns/docbook" xmlns:xlink="http://www.w3.org/1999/xlink">
<refnamediv>
<refname>grapheme_extract</refname>
<refpurpose>Function to extract a sequence of default grapheme clusters from a text buffer, which must be encoded in UTF-8.</refpurpose>
</refnamediv>
<refsect1 role="description">
&reftitle.description;
<para>Procedural style</para>
<methodsynopsis>
<type>string</type><methodname>grapheme_extract</methodname>
<methodparam><type>string</type><parameter>haystack</parameter></methodparam>
<methodparam><type>int</type><parameter>size</parameter></methodparam>
<methodparam choice='opt'><type>int</type><parameter>extract_type</parameter></methodparam>
<methodparam choice='opt'><type>int</type><parameter>start</parameter><initializer>0</initializer></methodparam>
<methodparam choice='opt'><type>int</type><parameter role="reference">next</parameter></methodparam>
</methodsynopsis>
<para>
Function to extract a sequence of default grapheme clusters from a text buffer, which must be encoded in UTF-8.
</para>
</refsect1>
<refsect1 role="parameters">
&reftitle.parameters;
<para>
<variablelist>
<varlistentry>
<term><parameter>haystack</parameter></term>
<listitem>
<para>
String to search.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><parameter>size</parameter></term>
<listitem>
<para>
Maximum number items - based on the $extract_type - to return.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><parameter>extract_type</parameter></term>
<listitem>
<para>
Defines the type of units referred to by the $size parameter:
</para>
<para>
<simplelist>
<member>GRAPHEME_EXTR_COUNT (default) - $size is the number of default
grapheme clusters to extract.</member>
<member>GRAPHEME_EXTR_MAXBYTES - $size is the maximum number of bytes
returned.</member>
<member>GRAPHEME_EXTR_MAXCHARS - $size is the maximum number of UTF-8
characters returned.</member>
</simplelist>
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><parameter>start</parameter></term>
<listitem>
<para>
Starting position in $haystack in bytes - if given, it must be zero or a
positive value that is less than or equal to the length of $haystack in
bytes. If $start does not point to the first byte of a UTF-8
character, the start position is moved to the next character boundary.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><parameter>next</parameter></term>
<listitem>
<para>
Reference to a value that will be set to the next starting position.
When the call returns, this may point to the first byte position past the end of the string.
</para>
</listitem>
</varlistentry>
</variablelist>
</para>
</refsect1>
<refsect1 role="returnvalues">
&reftitle.returnvalues;
<para>
A string starting at offset $start and ending on a default grapheme cluster
boundary that conforms to the $size and $extract_type specified.
</para>
</refsect1>
<refsect1 role="examples">
&reftitle.examples;
<example>
<title><function>grapheme_extract</function> example</title>
<programlisting role="php">
<![CDATA[
<?php
$char_a_ring_nfd = "a\xCC\x8A"; // 'LATIN SMALL LETTER A WITH RING ABOVE' (U+00E5) normalization form "D"
$char_o_diaeresis_nfd = "o\xCC\x88"; // 'LATIN SMALL LETTER O WITH DIAERESIS' (U+00F6) normalization form "D"
print urlencode(grapheme_extract( $char_a_ring_nfd . $char_o_diaeresis_nfd, 1, GRAPHEME_EXTR_COUNT, 2));
?>
]]>
</programlisting>
</example>
&example.outputs;
<screen>
<![CDATA[
o%CC%88
]]>
</screen>
</refsect1>
<refsect1 role="seealso">
&reftitle.seealso;
<para>
<simplelist>
<member><function>grapheme_substr</function></member>
<member>
<link xlink:href="&uri.unicode.graphemes;">
Unicode Text Segmentation: Grapheme Cluster Boundaries
</link>
</member>
</simplelist>
</para>
</refsect1>
</refentry>
<!-- Keep this comment at the end of the file
Local variables:
mode: sgml
sgml-omittag:t
sgml-shorttag:t
sgml-minimize-attributes:nil
sgml-always-quote-attributes:t
sgml-indent-step:1
sgml-indent-data:t
indent-tabs-mode:nil
sgml-parent-document:nil
sgml-default-dtd-file:"~/.phpdoc/manual.ced"
sgml-exposed-tags:nil
sgml-local-catalogs:nil
sgml-local-ecat-files:nil
End:
vim600: syn=xml fen fdm=syntax fdl=2 si
vim: et tw=78 syn=sgml
vi: ts=1 sw=1
-->
|