File: enca.c

package info (click to toggle)
enca 1.13-3
  • links: PTS, VCS
  • area: main
  • in suites: squeeze
  • size: 4,440 kB
  • ctags: 1,230
  • sloc: sh: 11,125; ansic: 10,330; xml: 2,926; makefile: 698; perl: 232
file content (163 lines) | stat: -rw-r--r-- 4,680 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
/*
  @(#) $Id: enca.c,v 1.9 2003/12/22 22:24:33 yeti Exp $
  encoding-guessing libary; the high-level analyser interface

  Copyright (C) 2000-2003 David Necas (Yeti) <yeti@physics.muni.cz>

  This program is free software; you can redistribute it and/or modify it
  under the terms of version 2 of the GNU General Public License as published
  by the Free Software Foundation.

  This program is distributed in the hope that it will be useful, but WITHOUT
  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  more details.

  You should have received a copy of the GNU General Public License along
  with this program; if not, write to the Free Software Foundation, Inc.,
  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
*/
#ifdef HAVE_CONFIG_H
#  include "config.h"
#endif /* HAVE_CONFIG_H */

#include "enca.h"
#include "internal.h"

/**
 * enca_analyser_alloc:
 * @langname: Language for which the analyser should be initialized.
 *
 * Allocates an analyser and initializes it for language @language.
 *
 * The analyser, once crerated, can be used only for language for which it
 * was initialized.  If you need to detect encodings of texts in more than one
 * language, you must allocate an analyser for each one.  Note however, an
 * analyser may occupy a considerable amount of memory (a few hundreds of kB),
 * so it's generally not a good idea to have several hundreds of them floating
 * around.
 *
 * @langname is two-letter ISO 639:1989 language code.  Locale names in form
 * language_territory and ISO-639 English language names also may be accepted
 * in the future. To be on the safe side, use only names returned by
 * enca_get_languages().
 *
 * Returns: The newly created #EncaAnalyser on success, #NULL on failure
 *          (namely when @langname is unknown or otherwise invalid).
 **/
EncaAnalyser
enca_analyser_alloc(const char *langname)
{
  EncaAnalyserState *analyser;

  if (langname == NULL)
    return NULL;

  analyser = NEW(EncaAnalyserState, 1);
  if (!enca_language_init(analyser, langname)) {
    enca_free(analyser);
    return NULL;
  }

  enca_guess_init(analyser);
  enca_double_utf8_init(analyser);
  enca_pair_init(analyser);

  return analyser;
}

/**
 * enca_analyser_free:
 * @analyser: An analyser to be destroyed.
 *
 * Frees memory used by #EncaAnalyser @analyser.
 **/
void
enca_analyser_free(EncaAnalyser analyser)
{
  assert(analyser != NULL);

  enca_pair_destroy(analyser);
  enca_double_utf8_destroy(analyser);
  enca_guess_destroy(analyser);
  enca_language_destroy(analyser);
  enca_free(analyser);
}

/**
 * enca_errno:
 * @analyser: An analyser.
 *
 * Returns analyser error code.
 *
 * The error code is not modified.  However, any other analyser call i.e.
 * call to a function taking @analyser as parameter can change the error code.
 *
 * Returns: Error code of reason why last analyser call failed.
 **/
int
enca_errno(EncaAnalyser analyser)
{
  assert(analyser != NULL);

  return analyser->gerrno;
}

/**
 * enca_strerror:
 * @analyser: An analyser.
 * @errnum: An analyser error code.
 *
 * Returns string describing the error code.
 *
 * The returned string must be considered constant and must NOT be freed.
 * It is however gauranteed not to be modified on invalidated by subsequent
 * calls to any libenca functions, including enca_strerror().
 *
 * The analyser error code is not changed for a successful call, and it set
 * to #ENCA_EINVALUE upon error.
 *
 * Returns: String describing the error code.
 **/
const char*
enca_strerror(EncaAnalyser analyser,
              int errnum)
{
  static const char *const DESCRIPTION_LIST[] = {
    "OK",
    "Invalid value",
    "Sample is empty",
    "After filtering, (almost) nothing remained",
    "Multibyte tests failed, language contains no 8bit charsets",
    "Not enough significant characters",
    "No clear winner",
    "Sample is just garbage"
  };

  if ((size_t)errnum >= ELEMENTS(DESCRIPTION_LIST)) {
    analyser->gerrno = ENCA_EINVALUE;
    return "Unknown error! (FIXME!)";
  }

  return DESCRIPTION_LIST[errnum];
}

/***** Documentation *********************************************************/

/**
 * EncaErrno:
 * @ENCA_EOK: OK.
 * @ENCA_EINVALUE: Invalid value (usually of an option).
 * @ENCA_EEMPTY: Sample is empty.
 * @ENCA_EFILTERED: After filtering, (almost) nothing remained.
 * @ENCA_ENOCS8: Mulitibyte tests failed and language contains no 8bit charsets.
 * @ENCA_ESIGNIF: Too few significant characters.
 * @ENCA_EWINNER: No clear winner.
 * @ENCA_EGARBAGE: Sample is garbage.
 *
 * Error codes.
 **/

/* vim: ts=2 sw=2 et
 */