File: cldapp.cc

package info (click to toggle)
firefox-esr 128.13.0esr-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 4,230,012 kB
  • sloc: cpp: 7,103,971; javascript: 6,088,450; ansic: 3,653,980; python: 1,212,330; xml: 594,604; asm: 420,652; java: 182,969; sh: 71,124; makefile: 20,747; perl: 13,449; objc: 12,399; yacc: 4,583; cs: 3,846; pascal: 2,973; lex: 1,720; ruby: 1,194; exp: 762; php: 436; lisp: 258; awk: 247; sql: 66; sed: 54; csh: 10
file content (107 lines) | stat: -rw-r--r-- 2,633 bytes parent folder | download | duplicates (31)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this file,
 * You can obtain one at http://mozilla.org/MPL/2.0/. */

#include "public/compact_lang_det.h"

#define MAX_RESULTS 3

class Language {
public:
  Language(CLD2::Language lang) : mLang(lang) {}

  const char* getLanguageCode() const
  {
    return CLD2::LanguageCode(mLang);
  }

private:
  const CLD2::Language mLang;
};

class LanguageGuess : public Language {
public:
  LanguageGuess(CLD2::Language lang, char percent) :
    Language(lang), mPercent(percent) {}

  char getPercent() const
  {
    return mPercent;
  }

private:
  const char mPercent;
};


class LanguageInfo : public Language {
public:
  static LanguageInfo* detectLanguage(const char* buffer, bool isPlainText)
  {
    CLD2::Language languages[MAX_RESULTS] = {};
    int percentages[MAX_RESULTS] = {};
    bool isReliable = false;

    // This is ignored.
    int textBytes;

    CLD2::Language bestGuess = DetectLanguageSummary(
      buffer, strlen(buffer), isPlainText,
      languages, percentages, &textBytes,
      &isReliable);

    return new LanguageInfo(isReliable, bestGuess, languages, percentages);
  }

  static LanguageInfo* detectLanguage(const char* buffer, bool isPlainText,
                                      const char* tldHint, int encodingHint,
                                      const char* languageHint)
  {
    CLD2::CLDHints hints = {languageHint, tldHint, encodingHint, CLD2::UNKNOWN_LANGUAGE};

    CLD2::Language languages[MAX_RESULTS] = {};
    int percentages[MAX_RESULTS] = {};
    bool isReliable = false;

    // These are ignored.
    double scores[MAX_RESULTS];
    int textBytes;

    CLD2::Language bestGuess = ExtDetectLanguageSummary(
      buffer, strlen(buffer), isPlainText,
      &hints, 0,
      languages, percentages, scores,
      nullptr, &textBytes, &isReliable);

    return new LanguageInfo(isReliable, bestGuess, languages, percentages);
  }

  ~LanguageInfo()
  {
    for (int i = 0; i < MAX_RESULTS; i++) {
      delete languages[i];
    }
  }

  bool getIsReliable() const
  {
    return mIsReliable;
  }

  const LanguageGuess* languages[MAX_RESULTS];

private:
  LanguageInfo(bool isReliable, CLD2::Language bestGuess,
               CLD2::Language languageIDs[MAX_RESULTS],
               int percentages[MAX_RESULTS]) :
    Language(bestGuess), mIsReliable(isReliable)
  {
    for (int i = 0; i < MAX_RESULTS; i++) {
      languages[i] = new LanguageGuess(languageIDs[i], percentages[i]);
    }
  }

  const bool mIsReliable;
};

#include "cld.cpp"