File: charconv.php

package info (click to toggle)
bbclone 0.4.6-8
  • links: PTS
  • area: main
  • in suites: etch, etch-m68k
  • size: 4,304 kB
  • ctags: 528
  • sloc: php: 15,858; sh: 349; makefile: 41
file content (123 lines) | stat: -rw-r--r-- 3,669 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
<?php
# This file is part of BBClone (The PHP web counter on steroids)

# $Header: /cvs/bbclone/lib/charconv.php,v 1.11 2005/02/21 00:31:17 olliver Exp $

# Copyright (C) 2001-2005, the BBClone Team (see file doc/authors.txt
# distributed with this library)

# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.

# See doc/copying.txt for details

function bbc_get_cyrillic($str) {
  # note that iso-8859-1 is only a placeholder. The focus lies on detecting UTF-8...
  if (mb_detect_encoding($str, "UTF-8, iso-8859-1") == "UTF-8") return "UTF-8";

  $win = 0;
  $koi = 0;

  for($i = 0, $j = strlen($str); $i < $j; $i++) {
    if ((ord($str[$i]) > 224) && (ord($str[$i]) < 255)) $win++;
    if ((ord($str[$i]) > 192) && (ord($str[$i]) < 223)) $koi++;
  }
  return (($win < $koi) ? "KOI8-R" : "Windows-1251");
}

function bbc_get_encoding($str) {
  global $BBC_LANGUAGE;

  switch ($BBC_LANGUAGE) {
    case "ja":
      return mb_detect_encoding($str, "JIS, UTF-8, EUC-JP, SJIS");

    case "ru":
      return bbc_get_cyrillic($str);

    case "bg":
      return bbc_get_cyrillic($str);

    case "uk":
      return bbc_get_cyrillic($str);

    default:
      # note that iso-8859-1 is only a placeholder. The focus lies on detecting UTF-8...
      return (mb_detect_encoding($str, "UTF-8, iso-8859-1") == "UTF-8") ? "UTF-8" : false;
  }
}

function bbc_convert_keys($str, $from, $to) {
  if (($from !== false) && extension_loaded("mbstring") && (($to == "EUC-JP") ||
      (strpos($to, "iso-8859-") !== false) || (!empty($BBC_CUSTOM_CHARSET) &&
       stristr("UTF-8", $BBC_CUSTOM_CHARSET)))) {
    return mb_convert_encoding($str, $to, $from);
  }
  elseif (($from !== false) && extension_loaded("iconv")) return iconv($from, $to."//TRANSLIT", $str);
  elseif (extension_loaded("recode")) return recode_string($to, $str);
  # bail out with unmodified string
  else return $str;
}

# Note: A custom charset will overwrite the specified default. So you need not
# worry about your personal UTF-8 or whatever language file and change
# anything here. Just specify $BBC_CUSTOM_CHARSET and everything will be
# alright
function bbc_convert_lang($str, $from, $char) {
  global $BBC_LANGUAGE;

  if (!empty($char)) return bbc_convert_keys($str, $from, $char);

  switch ($BBC_LANGUAGE) {
    case "bg":
      return bbc_convert_keys($str, $from, "Windows-1251");

    case "cs":
      return bbc_convert_keys($str, $from, "iso-8859-2");

    case "el":
      return bbc_convert_keys($str, $from, "iso-8859-7");

    case "hu":
      return bbc_convert_keys($str, $from, "iso-8859-2");

    case "ja":
      return bbc_convert_keys($str, $from, "EUC-JP");

    case "lt":
      return bbc_convert_keys($str, $from, "Windows-1257");

    case "pl":
      return bbc_convert_keys($str, $from, "iso-8859-2");

    case "ro":
      return bbc_convert_keys($str, $from, "iso-8859-2");

    case "ru":
      return bbc_convert_keys($str, $from, "Windows-1251");

    case "sk":
      return bbc_convert_keys($str, $from, "iso-8859-2");

    case "sl":
      return bbc_convert_keys($str, $from, "iso-8859-2");

    case "tr":
      return bbc_convert_keys($str, $from, "Windows-1254");

    case "uk":
      return bbc_convert_keys($str, $from, "Windows-1251");

    case "zh-cn":
      return bbc_convert_keys($str, $from, "gb2312");

    case "zh-tw":
      return bbc_convert_keys($str, $from, "big5");

    default:
      return bbc_convert_keys($str, $from, "iso-8859-15");
  }
}
?>