File: ebook_epub.cpp

package info (click to toggle)
kchmviewer 8.0-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,900 kB
  • sloc: cpp: 8,099; sh: 145; makefile: 3
file content (385 lines) | stat: -rw-r--r-- 9,071 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
/*
 *  Kchmviewer - a CHM and EPUB file viewer with broad language support
 *  Copyright (C) 2004-2014 George Yunaev, gyunaev@ulduzsoft.com
 *
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

#if defined (WIN32)
    #include <io.h>     // dup
#else
    #include <unistd.h>
#endif

#include <QMessageBox>
#include <QtXml/QXmlSimpleReader>

#include "ebook_epub.h"
#include "helperxmlhandler_epubcontainer.h"
#include "helperxmlhandler_epubcontent.h"
#include "helperxmlhandler_epubtoc.h"

static const char * URL_SCHEME_EPUB = "epub";

EBook_EPUB::EBook_EPUB()
    : EBook()
{
	m_zipFile = 0;
}

EBook_EPUB::~EBook_EPUB()
{
	close();
}

bool EBook_EPUB::load(const QString &archiveName)
{
	close();

	// We use QFile and zip_fdopen instead of zip_open because latter does not support Unicode file names
	m_epubFile.setFileName( archiveName );

	if ( !m_epubFile.open( QIODevice::ReadOnly ) )
	{
		qWarning("Could not open file %s: %s", qPrintable(archiveName), qPrintable( m_epubFile.errorString()));
		return false;
	}

	// Open the ZIP archive: http://www.nih.at/libzip/zip_fdopen.html
    // Note that zip_fdopen takes control over the passed descriptor,
    // so we need to pass a duplicate of it for this to work correctly
    int fdcopy = dup( m_epubFile.handle() );

    if ( fdcopy < 0 )
    {
        qWarning("Could not duplicate descriptor" );
        return false;
    }

	int errcode;
    m_zipFile = zip_fdopen( fdcopy, 0, &errcode );

	if ( !m_zipFile )
	{
		qWarning("Could not open file %s: error %d", qPrintable(archiveName), errcode);
		return false;
	}

	// Parse the book descriptor file
	if ( !parseBookinfo() )
		return false;

	return true;
}

void EBook_EPUB::close()
{
	if ( m_zipFile )
	{
		zip_close( m_zipFile );
		m_zipFile = 0;
	}

    //if ( m_epubFile.isOpen() )
    //	m_epubFile.close();


}

bool EBook_EPUB::getFileContentAsString(QString &str, const QUrl &url) const
{
	return getFileAsString( str, urlToPath( url ) );
}

bool EBook_EPUB::getFileContentAsBinary(QByteArray &data, const QUrl &url) const
{
	return getFileAsBinary( data, urlToPath( url ) );
}

bool EBook_EPUB::enumerateFiles(QList<QUrl> &files)
{
	files = m_ebookManifest;
	return true;
}

QString EBook_EPUB::title() const
{
	return m_title;
}

QUrl EBook_EPUB::homeUrl() const
{
	return m_tocEntries[0].url;
}

bool EBook_EPUB::hasFeature(EBook::Feature code) const
{
    switch ( code )
    {
    case FEATURE_TOC:
        return true;

    case FEATURE_INDEX:
        return false;

    case FEATURE_ENCODING:
        return false;
    }

	return false;
}

bool EBook_EPUB::getTableOfContents( QList<EBookTocEntry> &toc ) const
{
	toc = m_tocEntries;
	return true;
}

bool EBook_EPUB::getIndex(QList<EBookIndexEntry> &) const
{
	return false;
}

QString EBook_EPUB::getTopicByUrl(const QUrl& url)
{
	if ( m_urlTitleMap.contains( url ) )
		return m_urlTitleMap[ url ];

	return "";
}

QString EBook_EPUB::currentEncoding() const
{
    return "UTF-8";
}

bool EBook_EPUB::setCurrentEncoding(const char *)
{
	abort();
}

bool EBook_EPUB::isSupportedUrl(const QUrl &url)
{
	return url.scheme() == URL_SCHEME_EPUB;
}

bool EBook_EPUB::parseXML(const QString &uri, QXmlDefaultHandler * parser)
{
	QByteArray container;

	if ( !getFileAsBinary( container, uri ) )
	{
		qDebug("Failed to retrieve XML file %s", qPrintable( uri ) );
		return false;
	}

	// Use it as XML source
	QXmlInputSource source;
	source.setData( container );

	// Init the reader
	QXmlSimpleReader reader;
	reader.setContentHandler( parser );
	reader.setErrorHandler( parser );

	return reader.parse( source );
}

bool EBook_EPUB::parseBookinfo()
{
    // Parse the container.xml to find the content descriptor
    HelperXmlHandler_EpubContainer container_parser;

    if ( !parseXML( "META-INF/container.xml", &container_parser )
         || container_parser.contentPath.isEmpty() )
        return false;

    // Parse the content.opf
    HelperXmlHandler_EpubContent content_parser;

    if ( !parseXML( container_parser.contentPath, &content_parser ) )
        return false;

    // At least title and the TOC must be present
    if ( !content_parser.metadata.contains("title") || content_parser.tocname.isEmpty() )
        return false;

    // All the files, including TOC, are relative to the container_parser.contentPath
    m_documentRoot.clear();
    int sep = container_parser.contentPath.lastIndexOf( '/' );

    if ( sep != -1 )
        m_documentRoot = container_parser.contentPath.left( sep + 1 );	// Keep the trailing slash

    // Parse the TOC
    HelperXmlHandler_EpubTOC toc_parser( this );

    if ( !parseXML( content_parser.tocname, &toc_parser ) )
        return false;

    // Get the data
    m_title = content_parser.metadata[ "title" ];

    // Move the manifest entries into the list
    Q_FOREACH( QString f, content_parser.manifest.values() )
        m_ebookManifest.push_back( pathToUrl( f ) );

    // Copy the manifest information and fill up the other maps if we have it
    if ( !toc_parser.entries.isEmpty() )
    {
        Q_FOREACH( EBookTocEntry e, toc_parser.entries )
        {
            // Add into url-title map
            m_urlTitleMap[ e.url ] = e.name;
            m_tocEntries.push_back( e );
        }
    }
    else
    {
        // Copy them from spline
        Q_FOREACH( QString u, content_parser.spine )
        {
            EBookTocEntry e;
            QString url = u;

            if ( content_parser.manifest.contains( u ) )
                url = content_parser.manifest[ u ];

            e.name = url;
            e.url= pathToUrl( url );
            e.iconid = EBookTocEntry::IMAGE_NONE;
            e.indent = 0;

            // Add into url-title map
            m_urlTitleMap[ pathToUrl( url ) ] = url;
            m_tocEntries.push_back( e );
        }
    }

    // EPub with an empty TOC is not valid
    if ( m_tocEntries.isEmpty() )
        return false;

    return true;
}

QUrl EBook_EPUB::pathToUrl(const QString &link) const
{
	QUrl url;
	url.setScheme( URL_SCHEME_EPUB );
	url.setHost( URL_SCHEME_EPUB );

    // Does the link contain the fragment as well?
    int off = link.indexOf( '#' );
    QString path;

    if ( off != -1 )
    {
        path = link.left( off );
        url.setFragment( link.mid( off + 1 ) );
    }
    else
        path = link;

    if ( !path.startsWith( '/' ) )
        path.prepend( '/' );

    url.setPath( QUrl::fromPercentEncoding( path.toUtf8() ) );

	return url;
}

QString EBook_EPUB::urlToPath(const QUrl &link) const
{
	if ( link.scheme() == URL_SCHEME_EPUB )
		return link.path();

    return "";
}

const char *EBook_EPUB::urlScheme()
{
    return URL_SCHEME_EPUB;
}

bool EBook_EPUB::getFileAsString(QString &str, const QString &path) const
{
	QByteArray data;

	if ( !getFileAsBinary( data, path ) )
		return false;

	// I have never seen yet an UTF16 epub
	if ( data.startsWith("<?xml" ) )
	{
		int endxmltag = data.indexOf( "?>" );
		int utf16 = data.indexOf("UTF-16");

		if ( utf16 > 0 && utf16 < endxmltag )
		{
			QMessageBox::critical( 0,
								   ("Unsupported encoding"),
								   ("The encoding of this ebook is not supported yet. Please send it to gyunaev@ulduzsoft.com for support to be added") );
			return false;
		}
	}

	str = QString::fromUtf8( data );
	return true;
}

bool EBook_EPUB::getFileAsBinary(QByteArray &data, const QString &path) const
{
	// Retrieve the file size
	struct zip_stat fileinfo;
	QString completeUrl;

	if ( !path.isEmpty() && path[0] == '/' )
		completeUrl = m_documentRoot + path.mid( 1 );
	else
		completeUrl = m_documentRoot + path;

	//qDebug("URL requested: %s (%s)", qPrintable(path), qPrintable(completeUrl));

	// http://www.nih.at/libzip/zip_stat.html
	if ( zip_stat( m_zipFile, completeUrl.toUtf8().constData(), 0, &fileinfo) != 0 )
	{
		qDebug("File %s is not found in the archive", qPrintable(completeUrl));
		return false;
	}

	// Make sure the size field is valid
	if ( (fileinfo.valid & ZIP_STAT_SIZE) == 0 || (fileinfo.valid & ZIP_STAT_INDEX) == 0 )
		return false;

	// Open the file
	struct zip_file * file = zip_fopen_index( m_zipFile, fileinfo.index, 0 );

	if ( !file )
		return false;

	// Allocate the memory and read the file
	data.resize( fileinfo.size );

	// Could it return a positive number but not fileinfo.size???
	int ret = zip_fread( file, data.data(), fileinfo.size );
	if ( ret != (int) fileinfo.size )
	{
		zip_fclose( file );
		return false;
	}

	zip_fclose( file );
	return true;
}