# KInterbasDB Python Package - Type Conv : Text/Unicode
#
# Version 3.1
#
# The following contributors hold Copyright (C) over their respective
# portions of code (see license.txt for details):
#
# [Original Author (maintained through version 2.0-0.3.1):]
#   1998-2001 [alex]  Alexander Kuznetsov   <alexan@users.sourceforge.net>
# [Maintainers (after version 2.0-0.3.1):]
#   2001-2002 [maz]   Marek Isalski         <kinterbasdb@maz.nu>
#   2002-2004 [dsr]   David Rushby          <woodsplitter@rocketmail.com>
# [Contributors:]
#   2001      [eac]   Evgeny A. Cherkashin  <eugeneai@icc.ru>
#   2001-2002 [janez] Janez Jere            <janez.jere@void.si>

__all__ = (
    'unicode_conv_in', 'unicode_conv_out', 'DB_TO_PYTHON_ENCODING_MAP',
  )

import sys, types
from kinterbasdb.k_exceptions import *

if sys.version_info < (2,2):
    from kinterbasdb.typeconv_util_isinstance import isinstance

# The database character set codes (the *keys* in DB_TO_PYTHON_ENCODING_MAP)
# are defined on pages 221-225 of the Interbase 6 Data Definition Guide.
# The Python codec names (the *values* in DB_TO_PYTHON_ENCODING_MAP) are defined
# in section 4.9.2 "Standard Encodings" of the Python Library Reference.
#
# The character sets supported by a given database can be determined with the
# following query:
#   select rdb$character_set_id, rdb$character_set_name
#   from rdb$character_sets order by rdb$character_set_id

DB_TO_PYTHON_ENCODING_MAP = {
  # The following three database character set codes are not handled by
  # kinterbasdb's TEXT_UNICODE dynamic type translation (they're handled by
  # TEXT instead, and deal with plain Python strings):
  #  0 -> 'NONE'
  #  1 -> 'OCTETS'
  #  2 -> 'ASCII'

  # DB CODE  : PYTHON NAME          : DB NAME
  # ---------------------------------------------------------------------------
            3: 'utf_8',            #: 'UNICODE_FSS'
            5: 'shift_jis',        #: 'SJIS_0208'
            6: 'euc_jp',           #: 'EUCJ_0208'
            9: 'cp737',            #: 'DOS737'
           10: 'cp437',            #: 'DOS437'
           11: 'cp850',            #: 'DOS850'
           12: 'cp865',            #: 'DOS865'
           13: 'cp860',            #: 'DOS860'
           14: 'cp863',            #: 'DOS863'
           15: 'cp775',            #: 'DOS775'
         # 16: NOT SUPPORTED,      #: 'DOS858'
           17: 'cp862',            #: 'DOS862'
           18: 'cp864',            #: 'DOS864'
         # 19: NOT SUPPORTED,      #: 'NEXT'
           21: 'iso8859_1',        #: 'ISO8859_1'
           22: 'iso8859_2',        #: 'ISO8859_2'
           23: 'iso8859_3',        #: 'ISO8859_3'
           34: 'iso8859_4',        #: 'ISO8859_4'
           35: 'iso8859_5',        #: 'ISO8859_5'
           36: 'iso8859_6',        #: 'ISO8859_6'
           37: 'iso8859_7',        #: 'ISO8859_7'
           38: 'iso8859_8',        #: 'ISO8859_8'
           39: 'iso8859_9',        #: 'ISO8859_9'
           40: 'iso8859_13',       #: 'ISO8859_13'
           44: 'euc_kr',           #: 'KSC_5601'
           45: 'cp852',            #: 'DOS852'
           46: 'cp857',            #: 'DOS857'
           47: 'cp861',            #: 'DOS861'
           48: 'cp866',            #: 'DOS866'
           49: 'cp869',            #: 'DOS869'
         # 50: NOT SUPPORTED,      #: 'CYRL'
           51: 'cp1250',           #: 'WIN1250'
           52: 'cp1251',           #: 'WIN1251'
           53: 'cp1252',           #: 'WIN1252'
           54: 'cp1253',           #: 'WIN1253'
           55: 'cp1254',           #: 'WIN1254'
           56: 'big5',             #: 'BIG_5'
           57: 'gb2312',           #: 'GB_2312'
           58: 'cp1255',           #: 'WIN1255'
           59: 'cp1256',           #: 'WIN1256'
           60: 'cp1257',           #: 'WIN1257'
  }

_UNKNOWN_CHARSET_MSG = (
    "Don't know how to %s value %s charset with numeric ID %d."
    " If you are using an unofficial character set, you should add a"
    " corresponding entry to kinterbasdb's translation table, as in:\n"
    "  kinterbasdb.typeconv_text_unicode.DB_TO_PYTHON_ENCODING_MAP[%d] = 'XX'\n"
    "where XX is the name of a Python codec."
    " Standard Python codecs are listed in section 4.9.2 ('Standard Encodings')"
    " of the Python 2.3 documentation."
  )

def unicode_conv_in((unicodeString, dbCharacterSetCode)):
    # Modulate dbCharacterSetCode by 256 to get rid of collation info.
    dbCharacterSetCode %= 256

    if unicodeString is None:
        return None

    pyEncodingName = DB_TO_PYTHON_ENCODING_MAP.get(dbCharacterSetCode, None)
    if pyEncodingName is not None:
        return unicodeString.encode(pyEncodingName)
    else:
        raise OperationalError( _UNKNOWN_CHARSET_MSG % (
            'encode', 'to', dbCharacterSetCode, dbCharacterSetCode
          ))

if sys.version_info < (2,2):
    # Maintain Python 2.1 compatibility.
    import codecs
    def unicode_conv_out((rawString, dbCharacterSetCode)):
        # Modulate dbCharacterSetCode by 256 to get rid of collation info.
        dbCharacterSetCode %= 256

        if rawString is None:
            return None

        pyEncodingName = DB_TO_PYTHON_ENCODING_MAP.get(dbCharacterSetCode, None)
        if pyEncodingName is not None:
            decoder = codecs.lookup(pyEncodingName)[1]
            return decoder(rawString)[0]
        else:
            raise OperationalError( _UNKNOWN_CHARSET_MSG % (
                'decode', 'from', dbCharacterSetCode, dbCharacterSetCode
              ))
else:
    def unicode_conv_out((rawString, dbCharacterSetCode)):
        # Modulate dbCharacterSetCode by 256 to get rid of collation info.
        dbCharacterSetCode %= 256

        if rawString is None:
            return None

        pyEncodingName = DB_TO_PYTHON_ENCODING_MAP.get(dbCharacterSetCode, None)
        if pyEncodingName is not None:
            return rawString.decode(pyEncodingName)
        else:
            raise OperationalError( _UNKNOWN_CHARSET_MSG % (
                'decode', 'from', dbCharacterSetCode, dbCharacterSetCode
              ))