1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197
|
<?xml version="1.0" encoding="utf-8"?>
<!-- $Revision$ -->
<refentry xml:id="uconverter.transcode" xmlns="http://docbook.org/ns/docbook" xmlns:xlink="http://www.w3.org/1999/xlink">
<refnamediv>
<refname>UConverter::transcode</refname>
<refpurpose>Convert a string from one character encoding to another</refpurpose>
</refnamediv>
<refsect1 role="description">
&reftitle.description;
<methodsynopsis role="UConverter">
<modifier>public</modifier> <modifier>static</modifier> <type class="union"><type>string</type><type>false</type></type><methodname>UConverter::transcode</methodname>
<methodparam><type>string</type><parameter>str</parameter></methodparam>
<methodparam><type>string</type><parameter>toEncoding</parameter></methodparam>
<methodparam><type>string</type><parameter>fromEncoding</parameter></methodparam>
<methodparam choice="opt"><type class="union"><type>array</type><type>null</type></type><parameter>options</parameter><initializer>&null;</initializer></methodparam>
</methodsynopsis>
<para>
Converts <parameter>str</parameter> from <parameter>fromEncoding</parameter> to <parameter>toEncoding</parameter>.
</para>
</refsect1>
<refsect1 role="parameters">
&reftitle.parameters;
<variablelist>
<varlistentry>
<term><parameter>str</parameter></term>
<listitem>
<para>
The &string; to be converted.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><parameter>toEncoding</parameter></term>
<listitem>
<para>
The desired encoding of the result.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><parameter>fromEncoding</parameter></term>
<listitem>
<para>
The current encoding used to interpret <parameter>str</parameter>.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><parameter>options</parameter></term>
<listitem>
<para>
An optional &array;, which may contain the following keys:
<simplelist>
<member>
<literal>'to_subst'</literal> - the substitution character to use
in place of any character of <parameter>str</parameter> which cannot
be encoded in <parameter>toEncoding</parameter>. If specified, it must
represent a single character in the target encoding.
</member>
</simplelist>
</para>
</listitem>
</varlistentry>
</variablelist>
</refsect1>
<refsect1 role="returnvalues">
&reftitle.returnvalues;
<para>
Returns the converted string&return.falseforfailure;.
</para>
</refsect1>
<refsect1 role="examples">
&reftitle.examples;
<example>
<title>Converting from UTF-8 to UTF-16 and back</title>
<programlisting role="php">
<![CDATA[
<?php
$utf8_string = "\x5A\x6F\xC3\xAB"; // 'Zoë' in UTF-8
$utf16_string = UConverter::transcode($utf8_string, 'UTF-16BE', 'UTF-8');
echo bin2hex($utf16_string), "\n";
$new_utf8_string = UConverter::transcode($utf16_string, 'UTF-8', 'UTF-16BE');
echo bin2hex($new_utf8_string), "\n";
?>
]]>
</programlisting>
&example.outputs;
<screen>
<![CDATA[
005a006f00eb
5a6fc3ab
]]>
</screen>
</example>
<example>
<title>Invalid characters in input</title>
<para>
If the input string contains a sequence of bytes which is not valid in
the encoding specified by <parameter>fromEncoding</parameter>, they are
replaced by Unicode code point U+FFFD (Replacement Character) before
converting to <parameter>toEncoding</parameter>.
</para>
<programlisting role="php">
<![CDATA[
<?php
$invalid_utf8_string = "\xC3"; // incomplete multi-byte UTF-8 sequence
$utf16_string = UConverter::transcode($invalid_utf8_string, 'UTF-16BE', 'UTF-8');
echo bin2hex($utf16_string), "\n";
?>
]]>
</programlisting>
&example.outputs;
<screen>
<![CDATA[
fffd
]]>
</screen>
</example>
<example>
<title>Characters which cannot be encoded</title>
<para>
If the input string contains characters which cannot be represented
in <parameter>toEncoding</parameter>, they are replaced with a single
character. The default character to use depends on the encoding, and
can be controlled using the <literal>'to_subst'</literal> option.
</para>
<programlisting role="php">
<![CDATA[
<?php
$utf8_string = "\xE2\x82\xAC"; // € (Euro Sign) does not exist in ISO 8859-1
// Default replacement in ISO 8859-1 is "\x1A" (Substitute)
$iso8859_1_string = UConverter::transcode($utf8_string, 'ISO-8859-1', 'UTF-8');
echo bin2hex($iso8859_1_string), "\n";
// Specify a replacement of '?' ("\x3F") instead
$iso8859_1_string = UConverter::transcode(
$utf8_string, 'ISO-8859-1', 'UTF-8', ['to_subst' => '?']
);
echo bin2hex($iso8859_1_string), "\n";
// Since ISO 8859-1 cannot map U+FFFD, invalid input is also replaced by to_subst
$invalid_utf8_string = "\xC3"; // incomplete multi-byte UTF-8 sequence
$iso8859_1_string = UConverter::transcode(
$invalid_utf8_string, 'ISO-8859-1', 'UTF-8', ['to_subst' => '?']
);
echo bin2hex($iso8859_1_string), "\n";
?>
]]>
</programlisting>
&example.outputs;
<screen>
<![CDATA[
1a
3f
3f
]]>
</screen>
</example>
</refsect1>
<refsect1 role="seealso">
&reftitle.seealso;
<para>
<simplelist>
<member><function>mb_convert_encoding</function></member>
<member><function>iconv</function></member>
</simplelist>
</para>
</refsect1>
</refentry>
<!-- Keep this comment at the end of the file
Local variables:
mode: sgml
sgml-omittag:t
sgml-shorttag:t
sgml-minimize-attributes:nil
sgml-always-quote-attributes:t
sgml-indent-step:1
sgml-indent-data:t
indent-tabs-mode:nil
sgml-parent-document:nil
sgml-default-dtd-file:"~/.phpdoc/manual.ced"
sgml-exposed-tags:nil
sgml-local-catalogs:nil
sgml-local-ecat-files:nil
End:
vim600: syn=xml fen fdm=syntax fdl=2 si
vim: et tw=78 syn=sgml
vi: ts=1 sw=1
-->
|