File: two_way.ha

package info (click to toggle)
hare 0.25.2-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 6,948 kB
  • sloc: asm: 1,264; makefile: 123; sh: 114; lisp: 101
file content (95 lines) | stat: -rw-r--r-- 1,757 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
// SPDX-License-Identifier: MPL-2.0
// (c) Hare authors <https://harelang.org>

use types;

// Implements the so called Two-way string matching algorithm by Crochemore and
// Perrin. It pre-processes the needle in O(len(needle)) steps and performs the
// matching in O(len(haystack)) steps. It does so without any space overhead.
fn index_tw(haystack: []u8, needle: []u8) (size | void) = {
	const n = len(haystack);
	const m = len(needle);
	if (n < m) {
		return void;
	};
	if (n == m) {
		return if (equal(haystack, needle)) 0 else void;
	};

	// pre-processing
	let tup1 = max_suf(needle, false);
	let i = tup1.0, p = tup1.1;
	let tup2 = max_suf(needle, true);
	let j = tup2.0, q = tup2.1;
	let ms = i;
	let per = p;
	if (i + 1 < j + 1) {
		ms = j;
		per = q;
	};

	let mem0 = 0z, mem = 0z;
	if (equal(haystack[..ms + 1], haystack[per..per + ms + 1])) {
		if (ms + 1 > m - ms - 1) {
			per = ms + 2;
		} else {
			per = m - ms;
		};
		mem0 = m - per;
	};


	j = 0;
	for (j <= n - m) {
		// right half
		i = if (ms + 1 > mem) ms + 1 else mem;
		for (i < m && needle[i] == haystack[j + i]) {
			i += 1;
		};
		if (i < m) {
			j += i - ms;
			mem = 0;
			continue;
		};

		// left half
		i = ms + 1;
		for (i > mem && needle[i - 1] == haystack[j + i - 1]) {
			i -= 1;
		};
		if (i <= mem) {
			return j;
		};
		j += per;
		mem = mem0;
	};
};

fn max_suf(x: []u8, inv: bool) (size, size) = {
	let i = types::SIZE_MAX;
	let j = 0z;
	let k = 1z, p = 1z;
	let m = len(x);
	for (j + k < m) {
		let a = x[j + k];
		let b = x[i + k];
		if (a == b) {
			if (k == p) {
				j += p;
				k = 1;
			} else {
				k += 1;
			};
		} else if (a < b ^^ inv) {
			j += k;
			k = 1;
			p = j - i;
		} else {
			i = j;
			j += 1;
			k = 1;
			p = 1;
		};
	};
	return (i, p);
};