1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219
|
// https://bitbucket.org/xwang/mdict-analysis
// https://github.com/zhansliu/writemdict/blob/master/fileformat.md
// Octopus MDict Dictionary File (.mdx) and Resource File (.mdd) Analyser
//
// Copyright (C) 2012, 2013 Xiaoqiang Wang <xiaoqiangwang AT gmail DOT com>
// Copyright (C) 2013 Timon Wong <timon86.wang AT gmail DOT com>
// Copyright (C) 2015 Zhe Wang <0x1998 AT gmail DOT com>
//
// This program is a free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 3 of the License.
//
// You can get a copy of GNU General Public License along this program
// But you can always get it from http://www.gnu.org/licenses/gpl.txt
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
#ifndef __MDICTPARSER_HH_INCLUDED__
#define __MDICTPARSER_HH_INCLUDED__
#include <string>
#include <vector>
#include <map>
#include <utility>
#include <QPointer>
#include <QFile>
namespace Mdict
{
using std::string;
using std::vector;
using std::pair;
using std::map;
// A helper class to handle memory map for QFile
class ScopedMemMap
{
QFile & file;
uchar * address;
public:
ScopedMemMap( QFile & file, qint64 offset, qint64 size ) :
file( file ),
address( file.map( offset, size ) )
{
}
~ScopedMemMap()
{
if ( address )
file.unmap( address );
}
inline uchar * startAddress()
{
return address;
}
};
class MdictParser
{
public:
enum
{
kParserVersion = 0x000000d
};
struct RecordIndex
{
qint64 startPos;
qint64 endPos;
qint64 shadowStartPos;
qint64 shadowEndPos;
qint64 compressedSize;
qint64 decompressedSize;
inline bool operator==( qint64 rhs ) const
{
return ( shadowStartPos <= rhs ) && ( rhs < shadowEndPos );
}
inline bool operator<( qint64 rhs ) const
{
return shadowEndPos <= rhs;
}
inline bool operator>( qint64 rhs ) const
{
return shadowStartPos > rhs;
}
static size_t bsearch( vector<RecordIndex> const & offsets, qint64 val );
};
struct RecordInfo
{
qint64 compressedBlockPos;
qint64 recordOffset;
qint64 decompressedBlockSize;
qint64 compressedBlockSize;
qint64 recordSize;
};
class RecordHandler
{
public:
virtual void handleRecord( QString const & name, RecordInfo const & recordInfo ) = 0;
};
typedef vector< pair<qint64, qint64> > BlockInfoVector;
typedef vector< pair<qint64, QString> > HeadWordIndex;
typedef map<qint32, pair<QString, QString> > StyleSheets;
inline QString const & title() const
{
return title_;
}
inline QString const & description() const
{
return description_;
}
inline StyleSheets const & styleSheets() const
{
return styleSheets_;
}
inline quint32 wordCount() const
{
return wordCount_;
}
inline QString const & encoding() const
{
return encoding_;
}
inline QString const & filename() const
{
return filename_;
}
inline bool isRightToLeft() const
{
return rtl_;
}
MdictParser();
~MdictParser() {}
bool open( const char * filename );
bool readNextHeadWordIndex( HeadWordIndex & headWordIndex );
bool readRecordBlock( HeadWordIndex & headWordIndex, RecordHandler & recordHandler );
// helpers
static QString toUtf16( const char * fromCode, const char * from, size_t fromSize );
static inline QString toUtf16( QString const & fromCode, const char * from, size_t fromSize )
{
return toUtf16( fromCode.toLatin1().constData(), from, fromSize );
}
static bool parseCompressedBlock( qint64 compressedBlockSize, const char * compressedBlockPtr,
qint64 decompressedBlockSize, QByteArray & decompressedBlock);
static QString & substituteStylesheet( QString & article, StyleSheets const & styleSheets );
static inline string substituteStylesheet( string const & article, StyleSheets const & styleSheets )
{
QString s = QString::fromUtf8( article.c_str() );
substituteStylesheet( s, styleSheets );
return s.toStdString();
}
protected:
qint64 readNumber( QDataStream & in );
static quint32 readU8OrU16( QDataStream & in, bool isU16 );
static bool checkAdler32(const char * buffer, unsigned int len, quint32 checksum);
static bool decryptHeadWordIndex(char * buffer, qint64 len);
bool readHeader( QDataStream & in );
bool readHeadWordBlockInfos( QDataStream & in );
bool readRecordBlockInfos();
BlockInfoVector decodeHeadWordBlockInfo( QByteArray const & headWordBlockInfo );
HeadWordIndex splitHeadWordBlock( QByteArray const & block );
protected:
QString filename_;
QPointer<QFile> file_;
StyleSheets styleSheets_;
BlockInfoVector headWordBlockInfos_;
BlockInfoVector::iterator headWordBlockInfosIter_;
vector<RecordIndex> recordBlockInfos_;
QString encoding_;
QString title_;
QString description_;
double version_;
qint64 numHeadWordBlocks_;
qint64 headWordBlockInfoSize_;
qint64 headWordBlockSize_;
qint64 headWordBlockInfoPos_;
qint64 headWordPos_;
qint64 totalRecordsSize_;
qint64 recordPos_;
quint32 wordCount_;
int numberTypeSize_;
int encrypted_;
bool rtl_;
};
}
#endif // __MDICTPARSER_HH_INCLUDED__
|