File: SortKeyBuffer.cs

package info (click to toggle)
mono 6.14.1%2Bds2-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 1,282,732 kB
  • sloc: cs: 11,182,461; xml: 2,850,281; ansic: 699,123; cpp: 122,919; perl: 58,604; javascript: 30,841; asm: 21,845; makefile: 19,602; sh: 10,973; python: 4,772; pascal: 925; sql: 859; sed: 16; php: 1
file content (297 lines) | stat: -rw-r--r-- 9,359 bytes parent folder | download | duplicates (9)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
//
// SortKeyBuffer.cs : buffer implementation for GetSortKey()
//
// Author:
//	Atsushi Enomoto  <atsushi@ximian.com>
//
// Copyright (C) 2005 Novell, Inc (http://www.novell.com)
//
// Permission is hereby granted, free of charge, to any person obtaining
// a copy of this software and associated documentation files (the
// "Software"), to deal in the Software without restriction, including
// without limitation the rights to use, copy, modify, merge, publish,
// distribute, sublicense, and/or sell copies of the Software, and to
// permit persons to whom the Software is furnished to do so, subject to
// the following conditions:
// 
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
// 
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
//

using System;
using System.IO;
using System.Globalization;

namespace Mono.Globalization.Unicode
{
	// Internal sort key storage that is reused during GetSortKey.
	internal class SortKeyBuffer
	{
		// l4s = small kana sensitivity, l4t = mark type,
		// l4k = katakana flag, l4w = kana width sensitivity
		byte [] l1b, l2b, l3b, l4sb, l4tb, l4kb, l4wb, l5b;
//		int level5LastPos;

		string source;
		int l1, l2, l3, l4s, l4t, l4k, l4w, l5;
		int lcid;
		CompareOptions options;
		bool processLevel2;
		bool frenchSort;
		bool frenchSorted;

		public SortKeyBuffer (int lcid)
		{
		}

		public void Reset ()
		{
			l1 = l2 = l3 = l4s = l4t = l4k = l4w = l5 = 0;
//			level5LastPos = 0;
			frenchSorted = false;
		}

		// It is used for CultureInfo.ClearCachedData().
		internal void ClearBuffer ()
		{
			l1b = l2b = l3b = l4sb = l4tb = l4kb = l4wb = l5b = null;
		}

		internal void Initialize (CompareOptions options, int lcid, string s, bool frenchSort)
		{
			this.source = s;
			this.lcid = lcid;
			this.options = options;
			int len = s.Length;
			processLevel2 = (options & CompareOptions.IgnoreNonSpace) == 0;
			this.frenchSort = frenchSort;

			// For Korean text it is likely to be much bigger (for
			// Jamo), but even in ko-KR most of the compared
			// strings won't be Hangul.
			if (l1b == null || l1b.Length < len)
				l1b = new byte [len * 2 + 10];

			if (processLevel2 && (l2b == null || l2b.Length < len))
				l2b = new byte [len + 10];
			if (l3b == null || l3b.Length < len)
				l3b = new byte [len + 10];

			// This weight is used only in Japanese text.
			// We could expand the initial length as well as
			// primary length (actually x3), but even in ja-JP
			// most of the compared strings won't be Japanese.
			if (l4sb == null)
				l4sb = new byte [10];
			if (l4tb == null)
				l4tb = new byte [10];
			if (l4kb == null)
				l4kb = new byte [10];
			if (l4wb == null)
				l4wb = new byte [10];

			if (l5b == null)
				l5b = new byte [10];
		}

		internal void AppendCJKExtension (byte lv1msb, byte lv1lsb)
		{
			AppendBufferPrimitive (0xFE, ref l1b, ref l1);
			AppendBufferPrimitive (0xFF, ref l1b, ref l1);
			AppendBufferPrimitive (lv1msb, ref l1b, ref l1);
			AppendBufferPrimitive (lv1lsb, ref l1b, ref l1);
			if (processLevel2)
				AppendBufferPrimitive (2, ref l2b, ref l2);
			AppendBufferPrimitive (2, ref l3b, ref l3);
		}

		// LAMESPEC: Windows handles some of Hangul Jamo as to have
		// more than two primary weight values. However this causes
		// incorrect zero-termination. So I just ignore them and
		// treat it as usual character.
		/*
		internal void AppendJamo (byte category, byte lv1msb, byte lv1lsb)
		{
			AppendNormal (category, lv1msb, 0, 0);
			AppendBufferPrimitive (0xFF, ref l1b, ref l1);
			AppendBufferPrimitive (lv1lsb, ref l1b, ref l1);
			AppendBufferPrimitive (0xFF, ref l1b, ref l1);
			// FIXME: those values looks extraneous but might be
			// some advanced use. Worthy of digging into it.
			AppendBufferPrimitive (0, ref l1b, ref l1);
			AppendBufferPrimitive (0xFF, ref l1b, ref l1);
			AppendBufferPrimitive (0, ref l1b, ref l1);
		}
		*/

		// Append sort key value from table normally.
		internal void AppendKana (byte category, byte lv1, byte lv2, byte lv3, bool isSmallKana, byte markType, bool isKatakana, bool isHalfWidth)
		{
			AppendNormal (category, lv1, lv2, lv3);

			AppendBufferPrimitive ((byte) (isSmallKana ? 0xC4 : 0xE4), ref l4sb, ref l4s);
			AppendBufferPrimitive (markType, ref l4tb, ref l4t);
			AppendBufferPrimitive ((byte) (isKatakana ? 0xC4 : 0xE4), ref l4kb, ref l4k);
			AppendBufferPrimitive ((byte) (isHalfWidth ? 0xC4 : 0xE4), ref l4wb, ref l4w);
		}

		// Append sort key value from table normally.
		internal void AppendNormal (byte category, byte lv1, byte lv2, byte lv3)
		{
			if (lv2 == 0)
				lv2 = 2;
			if (lv3 == 0)
				lv3 = 2;

			// Special weight processing
			if (category == 6 && (options & CompareOptions.StringSort) == 0) {
				AppendLevel5 (category, lv1);
				return;
			}

			// non-primary diacritical weight is added to that of
			// the previous character (and does not reset level 3
			// weight).
			if (processLevel2 && category == 1 && l1 > 0) {
				lv2 = (byte) (lv2 + l2b [--l2]);
				lv3 = l3b [--l3];
			}

			if (category != 1) {
				AppendBufferPrimitive (category, ref l1b, ref l1);
				AppendBufferPrimitive (lv1, ref l1b, ref l1);
			}
			if (processLevel2)
				AppendBufferPrimitive (lv2, ref l2b, ref l2);
			AppendBufferPrimitive (lv3, ref l3b, ref l3);
		}

		// Append variable-weight character.
		// It uses level 2 index for counting offsets (since level1
		// might be longer than 1).
		private void AppendLevel5 (byte category, byte lv1)
		{
			// offset
#if false
			// If it strictly matches to Windows, offsetValue is always l2.
			int offsetValue = l2 - level5LastPos;
			// If it strictly matches ti Windows, no 0xFF here.
			for (; offsetValue > 8192; offsetValue -= 8192)
				AppendBufferPrimitive (0xFF, ref l5b, ref l5);
#else
			// LAMESPEC: Windows cannot compute lv5 values for
			// those string that has length larger than 8064.
			// (It reminds me of SQL Server varchar length).
			int offsetValue = (l2 + 1) % 8192;
#endif
			AppendBufferPrimitive ((byte) ((offsetValue / 64) + 0x80), ref l5b, ref l5);
			AppendBufferPrimitive ((byte) (offsetValue % 64 * 4 + 3), ref l5b, ref l5);

//			level5LastPos = l2;

			// sortkey value
			AppendBufferPrimitive (category, ref l5b, ref l5);
			AppendBufferPrimitive (lv1, ref l5b, ref l5);
		}

		private void AppendBufferPrimitive (byte value, ref byte [] buf, ref int bidx)
		{
			buf [bidx++] = value;
			if (bidx == buf.Length) {
				byte [] tmp = new byte [bidx * 2];
				Array.Copy (buf, tmp, buf.Length);
				buf = tmp;
			}
		}

		public SortKey GetResultAndReset ()
		{
			SortKey ret = GetResult ();
			Reset ();
			return ret;
		}

		// For level2-5, 02 is the default and could be cut (implied).
		// 02 02 02 -> 0
		// 02 03 02 -> 2
		// 03 04 05 -> 3
		private int GetOptimizedLength (byte [] data, int len, byte defaultValue)
		{
			int cur = -1;
			for (int i = 0; i < len; i++)
				if (data [i] != defaultValue)
					cur = i;
			return cur + 1;
		}

		public SortKey GetResult ()
		{
			if (source.Length == 0)
				return new SortKey (lcid, source, new byte [0], options, 0, 0, 0, 0, 0, 0, 0, 0);

			if (frenchSort && !frenchSorted && l2b != null) {
				int i = 0;
				for (; i < l2b.Length; i++)
					if (l2b [i] == 0)
						break;
				Array.Reverse (l2b, 0, i);
				frenchSorted = true;
			}

			l2 = GetOptimizedLength (l2b, l2, 2);
			l3 = GetOptimizedLength (l3b, l3, 2);
			bool hasJapaneseWeight = (l4s > 0); // snapshot before being optimized
			l4s = GetOptimizedLength (l4sb, l4s, 0xE4);
			l4t = GetOptimizedLength (l4tb, l4t, 3);
			l4k = GetOptimizedLength (l4kb, l4k, 0xE4);
			l4w = GetOptimizedLength (l4wb, l4w, 0xE4);
			l5 = GetOptimizedLength (l5b, l5, 2);

			int length = l1 + l2 + l3 + l5 + 5;
			int jpLength = l4s + l4t + l4k + l4w;
			if (hasJapaneseWeight)
				length += jpLength + 4;

			byte [] ret = new byte [length];
			Array.Copy (l1b, ret, l1);
			ret [l1] = 1; // end-of-level mark
			int cur = l1 + 1;
			if (l2 > 0)
				Array.Copy (l2b, 0, ret, cur, l2);
			cur += l2;
			ret [cur++] = 1; // end-of-level mark
			if (l3 > 0)
				Array.Copy (l3b, 0, ret, cur, l3);
			cur += l3;
			ret [cur++] = 1; // end-of-level mark
			if (hasJapaneseWeight) {
				Array.Copy (l4sb, 0, ret, cur, l4s);
				cur += l4s;
				ret [cur++] = 0xFF; // end-of-jp-subsection
				Array.Copy (l4tb, 0, ret, cur, l4t);
				cur += l4t;
				ret [cur++] = 2; // end-of-jp-middle-subsection
				Array.Copy (l4kb, 0, ret, cur, l4k);
				cur += l4k;
				ret [cur++] = 0xFF; // end-of-jp-subsection
				Array.Copy (l4wb, 0, ret, cur, l4w);
				cur += l4w;
				ret [cur++] = 0xFF; // end-of-jp-subsection
			}
			ret [cur++] = 1; // end-of-level mark
			if (l5 > 0)
				Array.Copy (l5b, 0, ret, cur, l5);
			cur += l5;
			ret [cur++] = 0; // end-of-data mark
			return new SortKey (lcid, source, ret, options, l1, l2, l3, l4s, l4t, l4k, l4w, l5);
		}
	}
}