File: cldapp.cc

package info (click to toggle)
thunderbird 1%3A78.8.0-1~deb10u1
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 2,910,720 kB
  • sloc: cpp: 5,890,568; javascript: 4,416,354; ansic: 3,051,657; python: 917,001; asm: 304,197; xml: 206,624; sh: 109,232; java: 108,679; makefile: 22,984; perl: 15,867; yacc: 4,565; objc: 3,026; pascal: 1,787; lex: 1,720; ada: 1,681; cs: 879; exp: 505; awk: 485; sql: 452; php: 436; lisp: 432; ruby: 99; sed: 69; csh: 45
file content (107 lines) | stat: -rw-r--r-- 2,633 bytes parent folder | download | duplicates (34)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this file,
 * You can obtain one at http://mozilla.org/MPL/2.0/. */

#include "public/compact_lang_det.h"

#define MAX_RESULTS 3

class Language {
public:
  Language(CLD2::Language lang) : mLang(lang) {}

  const char* getLanguageCode() const
  {
    return CLD2::LanguageCode(mLang);
  }

private:
  const CLD2::Language mLang;
};

class LanguageGuess : public Language {
public:
  LanguageGuess(CLD2::Language lang, char percent) :
    Language(lang), mPercent(percent) {}

  char getPercent() const
  {
    return mPercent;
  }

private:
  const char mPercent;
};


class LanguageInfo : public Language {
public:
  static LanguageInfo* detectLanguage(const char* buffer, bool isPlainText)
  {
    CLD2::Language languages[MAX_RESULTS] = {};
    int percentages[MAX_RESULTS] = {};
    bool isReliable = false;

    // This is ignored.
    int textBytes;

    CLD2::Language bestGuess = DetectLanguageSummary(
      buffer, strlen(buffer), isPlainText,
      languages, percentages, &textBytes,
      &isReliable);

    return new LanguageInfo(isReliable, bestGuess, languages, percentages);
  }

  static LanguageInfo* detectLanguage(const char* buffer, bool isPlainText,
                                      const char* tldHint, int encodingHint,
                                      const char* languageHint)
  {
    CLD2::CLDHints hints = {languageHint, tldHint, encodingHint, CLD2::UNKNOWN_LANGUAGE};

    CLD2::Language languages[MAX_RESULTS] = {};
    int percentages[MAX_RESULTS] = {};
    bool isReliable = false;

    // These are ignored.
    double scores[MAX_RESULTS];
    int textBytes;

    CLD2::Language bestGuess = ExtDetectLanguageSummary(
      buffer, strlen(buffer), isPlainText,
      &hints, 0,
      languages, percentages, scores,
      nullptr, &textBytes, &isReliable);

    return new LanguageInfo(isReliable, bestGuess, languages, percentages);
  }

  ~LanguageInfo()
  {
    for (int i = 0; i < MAX_RESULTS; i++) {
      delete languages[i];
    }
  }

  bool getIsReliable() const
  {
    return mIsReliable;
  }

  const LanguageGuess* languages[MAX_RESULTS];

private:
  LanguageInfo(bool isReliable, CLD2::Language bestGuess,
               CLD2::Language languageIDs[MAX_RESULTS],
               int percentages[MAX_RESULTS]) :
    Language(bestGuess), mIsReliable(isReliable)
  {
    for (int i = 0; i < MAX_RESULTS; i++) {
      languages[i] = new LanguageGuess(languageIDs[i], percentages[i]);
    }
  }

  const bool mIsReliable;
};

#include "cld.cpp"