File: tracker-encoding-meegotouch.cpp

package info (click to toggle)
tracker 1.10.5-1
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 41,032 kB
  • ctags: 21,997
  • sloc: ansic: 238,235; python: 8,639; sh: 4,649; makefile: 3,902; xml: 569; perl: 106; cpp: 61
file content (90 lines) | stat: -rw-r--r-- 2,726 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
/*
 * Copyright (C) 2011 Nokia <ivan.frade@nokia.com>
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the
 * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
 * Boston, MA  02110-1301, USA.
 */

#include "config.h"

#include <MCharsetDetector>
#include <MCharsetMatch>

#include <glib.h>
#include <libtracker-common/tracker-locale.h>
#include "tracker-encoding-meegotouch.h"

/*
 * See http://apidocs.meego.com/git-tip/mtf/class_m_charset_detector.html
 */

gchar *
tracker_encoding_guess_meegotouch (const gchar *buffer,
                                   gsize        size)
{
	/* Initialize detector */
	MCharsetDetector detector ((const char *)buffer, (int)size);
	gchar *locale;
	gchar *encoding = NULL;

	if (detector.hasError ()) {
		g_warning ("Charset detector error when creating: %s",
		           detector.errorString ().toUtf8 (). data ());
		return NULL;
	}

	locale = tracker_locale_get (TRACKER_LOCALE_LANGUAGE);
	detector.setDeclaredLocale (locale);

	MCharsetMatch bestMatch = detector.detect ();

	if (detector.hasError ()) {
		g_warning ("Charset detector error when detecting: %s",
		           detector.errorString ().toUtf8 (). data ());
		g_free (locale);
		return NULL;
	}

	if (bestMatch.confidence () > 30) {
		encoding = g_strdup (bestMatch.name ().toUtf8 ().data ());

#if 0
		QList<MCharsetMatch> mCharsetMatchList = detector.detectAll();

		if (detector.hasError ()) {
			g_warning ("Charset detector error when detecting all: %s",
			           detector.errorString ().toUtf8 (). data ());
		}

		g_debug ("Detecting all charsets...");
		for (gint i = 0; i < mCharsetMatchList.size (); ++i) {
			g_debug ("  Charset '%s' with %d%% confidence...",
			         mCharsetMatchList[i].name (). toUtf8 ().data (),
			         mCharsetMatchList[i].confidence ());
		}
#endif

		g_debug ("Guessing charset as '%s' with %d%% confidence",
		         encoding, bestMatch.confidence ());
	} else {
		g_debug ("Ignoring charset as '%s' with %d%% (< 30%%) confidence",
		         bestMatch.name ().toUtf8 ().data (),
		         bestMatch.confidence ());
	}

	g_free (locale);

	return encoding;
}