File: WP1Heuristics.cpp

package info (click to toggle)
libwpd 0.10.3-2
  • links: PTS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 4,948 kB
  • sloc: cpp: 28,095; sh: 4,433; makefile: 616; ansic: 4
file content (185 lines) | stat: -rw-r--r-- 5,931 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*- */
/* libwpd
 * Version: MPL 2.0 / LGPLv2.1+
 *
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
 *
 * Major Contributor(s):
 * Copyright (C) 2003 William Lachance (wrlach@gmail.com)
 * Copyright (C) 2003 Marc Maurer (uwog@uwog.net)
 * Copyright (C) 2006 Fridrich Strba (fridrich.strba@bluewin.ch)
 *
 * For minor contributions see the git repository.
 *
 * Alternatively, the contents of this file may be used under the terms
 * of the GNU Lesser General Public License Version 2.1 or later
 * (LGPLv2.1+), in which case the provisions of the LGPLv2.1+ are
 * applicable instead of those above.
 *
 * For further information visit http://libwpd.sourceforge.net
 */

/* "This product is not manufactured, approved, or supported by
 * Corel Corporation or Corel Corporation Limited."
 */

#include "WP1Heuristics.h"
#include "WP1FileStructure.h"
#include "libwpd_internal.h"
#include <limits>
#include <memory>

using namespace libwpd;

WPDPasswordMatch WP1Heuristics::verifyPassword(librevenge::RVNGInputStream *input, const char *password) try
{
	if (!password)
		return WPD_PASSWORD_MATCH_DONTKNOW;

	input->seek(0, librevenge::RVNG_SEEK_SET);
	if (readU8(input, nullptr) == 0xFE && readU8(input, nullptr) == 0xFF &&
	        readU8(input, nullptr) == 0x61 && readU8(input, nullptr) == 0x61)
	{
		WPXEncryption encryption(password, 6);
		if (readU16(input, nullptr, true) == encryption.getCheckSum())
			return WPD_PASSWORD_MATCH_OK;
		else
			return WPD_PASSWORD_MATCH_NONE;
	}
	else
		return WPD_PASSWORD_MATCH_DONTKNOW;
}
catch (...)
{
	return WPD_PASSWORD_MATCH_DONTKNOW;
}

WPDConfidence WP1Heuristics::isWP1FileFormat(librevenge::RVNGInputStream *input, const char *password) try
{
	input->seek(0, librevenge::RVNG_SEEK_SET);
	std::unique_ptr<WPXEncryption> encryption;

	if (readU8(input, nullptr) == 0xFE && readU8(input, nullptr) == 0xFF &&
	        readU8(input, nullptr) == 0x61 && readU8(input, nullptr) == 0x61)
	{
		if (password)
		{
			encryption.reset(new WPXEncryption(password, 6));
			if (readU16(input, nullptr, true) != encryption->getCheckSum())
				return WPD_CONFIDENCE_SUPPORTED_ENCRYPTION;
		}
		else
		{
			if (readU16(input,nullptr) != 0x0000)
				return WPD_CONFIDENCE_SUPPORTED_ENCRYPTION;
		}
	}

	input->seek(0, librevenge::RVNG_SEEK_SET);
	if (password && encryption)
		input->seek(6, librevenge::RVNG_SEEK_SET);

	int functionGroupCount = 0;

	WPD_DEBUG_MSG(("WP1Heuristics::isWP1FileFormat()\n"));

	while (!input->isEnd())
	{
		unsigned char readVal = readU8(input, encryption.get());

		WPD_DEBUG_MSG(("WP1Heuristics, Offset 0x%.8x, value 0x%.2x (%c)\n", (unsigned int)input->tell() - 1, readVal, readVal));

		if (readVal < (unsigned char)0x20)
		{
			// line breaks et al, skip
		}
		else if (readVal >= (unsigned char)0x20 && readVal <= (unsigned char)0x7F)
		{
			// normal ASCII characters, skip
		}
		else if (readVal >= (unsigned char)0x80 && readVal <= (unsigned char)0xBF)
		{
			// single character function codes, skip
			functionGroupCount++;
		}
		else if (readVal >= (unsigned char)0xFF)
		{
			// special codes that should not be found as separate functions
			return WPD_CONFIDENCE_NONE;
		}
		else
		{
			// multi character function group
			// check that the size constrains are valid, and that every group_member
			// is properly closed at the right place

			if (WP1_FUNCTION_GROUP_SIZE[readVal-0xC0] == -1)
			{
				// variable length function group

				// We are checking following structure:
				//   <function code>{function length}...{function length}<function code>
				//   that we observed in variable length WP1 functions

				unsigned functionLength = readU32(input, encryption.get(), true);
				if (functionLength == 0 || functionLength > ((std::numeric_limits<unsigned>::max)() / 2))
					return WPD_CONFIDENCE_NONE;
				WPD_DEBUG_MSG(("WP1Heuristics functionLength = 0x%.8x\n", (unsigned int)functionLength));

				input->seek(functionLength, librevenge::RVNG_SEEK_CUR);
				unsigned long closingFunctionLength = readU32(input, encryption.get(), true);
				WPD_DEBUG_MSG(("WP1Heuristics closingFunctionLength = 0x%.8x\n", (unsigned int)closingFunctionLength));
				if (functionLength != closingFunctionLength)
					return WPD_CONFIDENCE_NONE;

				unsigned char closingGate = 0;
				if (!input->isEnd())
				{
					closingGate = readU8(input, encryption.get());
					WPD_DEBUG_MSG(("WP1Heuristics closingGate = 0x%.2x\n", closingGate));
					if (closingGate != readVal)
						return WPD_CONFIDENCE_NONE;
				}

				// when passed the complete file, we don't allow for open groups when we've reached EOF
				if (input->isEnd() && (closingGate != readVal))
					return WPD_CONFIDENCE_NONE;

				functionGroupCount++;
			}
			else
			{
				// fixed length function group

				// seek to the position where the closing gate should be
				int res = input->seek(WP1_FUNCTION_GROUP_SIZE[readVal-0xC0]-2, librevenge::RVNG_SEEK_CUR);
				// when passed the complete file, we should be able to do that
				if (res)
					return WPD_CONFIDENCE_NONE;

				// read the closing gate
				unsigned char readNextVal = readU8(input, encryption.get());
				if (readNextVal != readVal)
					return WPD_CONFIDENCE_NONE;

				functionGroupCount++;
			}
		}
	}

	/* When we get here, the document is in a format that we *could* import properly.
	However, if we didn't encounter a single WP1 function group) we need to be more carefull:
	this would be the case when passed a plaintext file for example, which libwpd is not
	supposed to handle. */
	if (!functionGroupCount && !encryption)
		return WPD_CONFIDENCE_NONE;
	return WPD_CONFIDENCE_EXCELLENT;
}
catch (...)
{
	return WPD_CONFIDENCE_NONE;
}

/* vim:set shiftwidth=4 softtabstop=4 noexpandtab: */