1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163
|
/*
@(#) $Id: enca.c,v 1.9 2003/12/22 22:24:33 yeti Exp $
encoding-guessing libary; the high-level analyser interface
Copyright (C) 2000-2003 David Necas (Yeti) <yeti@physics.muni.cz>
This program is free software; you can redistribute it and/or modify it
under the terms of version 2 of the GNU General Public License as published
by the Free Software Foundation.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
*/
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif /* HAVE_CONFIG_H */
#include "enca.h"
#include "internal.h"
/**
* enca_analyser_alloc:
* @langname: Language for which the analyser should be initialized.
*
* Allocates an analyser and initializes it for language @language.
*
* The analyser, once crerated, can be used only for language for which it
* was initialized. If you need to detect encodings of texts in more than one
* language, you must allocate an analyser for each one. Note however, an
* analyser may occupy a considerable amount of memory (a few hundreds of kB),
* so it's generally not a good idea to have several hundreds of them floating
* around.
*
* @langname is two-letter ISO 639:1989 language code. Locale names in form
* language_territory and ISO-639 English language names also may be accepted
* in the future. To be on the safe side, use only names returned by
* enca_get_languages().
*
* Returns: The newly created #EncaAnalyser on success, #NULL on failure
* (namely when @langname is unknown or otherwise invalid).
**/
EncaAnalyser
enca_analyser_alloc(const char *langname)
{
EncaAnalyserState *analyser;
if (langname == NULL)
return NULL;
analyser = NEW(EncaAnalyserState, 1);
if (!enca_language_init(analyser, langname)) {
enca_free(analyser);
return NULL;
}
enca_guess_init(analyser);
enca_double_utf8_init(analyser);
enca_pair_init(analyser);
return analyser;
}
/**
* enca_analyser_free:
* @analyser: An analyser to be destroyed.
*
* Frees memory used by #EncaAnalyser @analyser.
**/
void
enca_analyser_free(EncaAnalyser analyser)
{
assert(analyser != NULL);
enca_pair_destroy(analyser);
enca_double_utf8_destroy(analyser);
enca_guess_destroy(analyser);
enca_language_destroy(analyser);
enca_free(analyser);
}
/**
* enca_errno:
* @analyser: An analyser.
*
* Returns analyser error code.
*
* The error code is not modified. However, any other analyser call i.e.
* call to a function taking @analyser as parameter can change the error code.
*
* Returns: Error code of reason why last analyser call failed.
**/
int
enca_errno(EncaAnalyser analyser)
{
assert(analyser != NULL);
return analyser->gerrno;
}
/**
* enca_strerror:
* @analyser: An analyser.
* @errnum: An analyser error code.
*
* Returns string describing the error code.
*
* The returned string must be considered constant and must NOT be freed.
* It is however gauranteed not to be modified on invalidated by subsequent
* calls to any libenca functions, including enca_strerror().
*
* The analyser error code is not changed for a successful call, and it set
* to #ENCA_EINVALUE upon error.
*
* Returns: String describing the error code.
**/
const char*
enca_strerror(EncaAnalyser analyser,
int errnum)
{
static const char *const DESCRIPTION_LIST[] = {
"OK",
"Invalid value",
"Sample is empty",
"After filtering, (almost) nothing remained",
"Multibyte tests failed, language contains no 8bit charsets",
"Not enough significant characters",
"No clear winner",
"Sample is just garbage"
};
if ((size_t)errnum >= ELEMENTS(DESCRIPTION_LIST)) {
analyser->gerrno = ENCA_EINVALUE;
return "Unknown error! (FIXME!)";
}
return DESCRIPTION_LIST[errnum];
}
/***** Documentation *********************************************************/
/**
* EncaErrno:
* @ENCA_EOK: OK.
* @ENCA_EINVALUE: Invalid value (usually of an option).
* @ENCA_EEMPTY: Sample is empty.
* @ENCA_EFILTERED: After filtering, (almost) nothing remained.
* @ENCA_ENOCS8: Mulitibyte tests failed and language contains no 8bit charsets.
* @ENCA_ESIGNIF: Too few significant characters.
* @ENCA_EWINNER: No clear winner.
* @ENCA_EGARBAGE: Sample is garbage.
*
* Error codes.
**/
/* vim: ts=2 sw=2 et
*/
|