File: lm_unigram.cpp

package info (click to toggle)
onboard 1.4.1-5
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, bullseye
  • size: 31,548 kB
  • sloc: python: 29,215; cpp: 5,965; ansic: 5,735; xml: 1,026; sh: 163; makefile: 39
file content (57 lines) | stat: -rw-r--r-- 1,958 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
/*
 * Copyright © 2013 marmuta <marmvta@gmail.com>
 *
 * This file is part of Onboard.
 *
 * Onboard is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 3 of the License, or
 * (at your option) any later version.
 *
 * Onboard is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */


#include "lm_unigram.h"
#include <numeric>

using namespace std;

//------------------------------------------------------------------------
// UnigramModel
//------------------------------------------------------------------------

// Calculate a vector of probabilities for the ngrams formed
// by history + word[i], for all i.
// Input:  constant history and a vector of candidate words
// Output: vector of probabilities, one value per candidate word
void UnigramModel::get_probs(const std::vector<WordId>& history,
                             const std::vector<WordId>& words,
                             std::vector<double>& probabilities)
{
    std::vector<double>& vp = probabilities;
    int size = words.size();   // number of candidate words
    int num_word_types = get_num_word_types(); 
    int cs = accumulate(m_counts.begin(), m_counts.end(), 0); // total number of occurencess
    if (cs)
    {
        vp.resize(size);
        for(int i=0; i<size; i++)
        {
            WordId wid = words[i];
            CountType count = m_counts.at(wid);
            vp[i] = count / (double) cs;
        }
    }
    else
    {
        fill(vp.begin(), vp.end(), 1.0/num_word_types); // uniform distribution
    }
}