File: dictdbuilder.cpp

package info (click to toggle)
dictconv 0.2-7
  • links: PTS
  • area: main
  • in suites: bullseye, buster, jessie-kfreebsd, lenny, sid, squeeze, stretch, wheezy
  • size: 1,536 kB
  • ctags: 434
  • sloc: sh: 8,876; cpp: 1,979; makefile: 10
file content (129 lines) | stat: -rw-r--r-- 3,987 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
/***************************************************************************
 *   Copyright (C) 2007 by Raul Fernandes                                  *
 *   rgfernandes@yahoo.com                                                 *
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; either version 2 of the License, or     *
 *   (at your option) any later version.                                   *
 *                                                                         *
 *   This program is distributed in the hope that it will be useful,       *
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
 *   GNU General Public License for more details.                          *
 *                                                                         *
 *   You should have received a copy of the GNU General Public License     *
 *   along with this program; if not, write to the                         *
 *   Free Software Foundation, Inc.,                                       *
 *   51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.          *
 ***************************************************************************/

#include "dictdbuilder.h"

#include <iostream>

DictdBuilder::DictdBuilder( std::string filename )
{
  m_idxfilename = filename;
  m_dictfilename = filename;
  m_dictfilename = m_dictfilename.substr( 0, m_dictfilename.length() - 6 ) + ".dict";
  m_entriescount = 0;
}


DictdBuilder::~DictdBuilder()
{
}

bool DictdBuilder::addHeadword( std::string word, std::string def, std::vector<std::string> /*alternates*/ )
{
  m_entriescount++;
  struct entry entry;
  std::string definition = def;
  entry.position = m_definition.length();
  entry.size = definition.length();
  std::string headword;

  headword = word;
  dic.insert( make_pair( headword, entry ) );

  // TODO: syn file
  /*
  // Alternate forms
  std::vector<std::string>::iterator iter;
  for(iter = alternates.begin();iter != alternates.end(); iter++)
  {
  dic.insert( make_pair( *iter, entry ) );
}*/

  m_definition += definition;
  return true;
}

bool DictdBuilder::finish()
{
  m_wordcount = dic.size();


  //////////////////
  // Index file
  /////////////////

  file.open( m_idxfilename.c_str() );
  if( !file.is_open() )
  {
    return false;
  }

  dictionary::iterator iter;
  const char *result;
  for( iter = dic.begin(); iter != dic.end(); ++iter ) {
    file.write( iter->first.data(), iter->first.length() );
    file.put( '\t' );
    result = b64_encode( iter->second.position );
    file.write( result, strlen( result ) );
    file.put( '\t' );
    result = b64_encode( iter->second.size );
    file.write( result, strlen( result ) );
    file.put( '\n' );
  }
  file.close();


  //////////////////
  // Dict file
  /////////////////

  file.open( m_dictfilename.c_str() );
  if( !file.is_open() )
  {
    return false;
  }
  file.write( m_definition.data(), m_definition.length() );
  file.close();

  return true;
}

// Copied from libmaa/base64.c file in dictd sources
// available at http://www.dict.org/
const char* DictdBuilder::b64_encode( unsigned long val )
{
  unsigned char b64_list[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
  static char result[7];
  int i;

  result[0] = b64_list[ (val & 0xc0000000) >> 30 ];
  result[1] = b64_list[ (val & 0x3f000000) >> 24 ];
  result[2] = b64_list[ (val & 0x00fc0000) >> 18 ];
  result[3] = b64_list[ (val & 0x0003f000) >> 12 ];
  result[4] = b64_list[ (val & 0x00000fc0) >>  6 ];
  result[5] = b64_list[ (val & 0x0000003f)       ];
  result[6] = 0;

  for(i = 0; i < 5; i++)
    if (result[i] != b64_list[0])
      return result + i;

  return result + 5;
}