File: asciiset.go

package info (click to toggle)
golang-github-elliotwutingfeng-asciiset 0.0~git20240214.24af97c-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 772 kB
  • sloc: makefile: 16
file content (120 lines) | stat: -rw-r--r-- 3,510 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
// Package asciiset is an ASCII character bitset
package asciiset

import (
	"unicode/utf8"
)

// ASCIISet is a 36-byte value, where each bit in the first 32-bytes
// represents the presence of a given ASCII character in the set.
// The remaining 4-bytes is a counter for the number of ASCII characters in the set.
// The 128-bits of the first 16 bytes, starting with the least-significant bit
// of the lowest word to the most-significant bit of the highest word,
// map to the full range of all 128 ASCII characters.
// The 128-bits of the next 16 bytes will be zeroed,
// ensuring that any non-ASCII character will be reported as not in the set.
// Rejecting non-ASCII characters in this way avoids bounds checks in ASCIISet.Contains.
type ASCIISet [9]uint32

// MakeASCIISet creates a set of ASCII characters and reports whether all
// characters in chars are ASCII.
func MakeASCIISet(chars string) (as ASCIISet, ok bool) {
	for i := 0; i < len(chars); i++ {
		c := chars[i]
		if c >= utf8.RuneSelf {
			return as, false
		}
		as.Add(c)
	}
	return as, true
}

// Add inserts character c into the set.
func (as *ASCIISet) Add(c byte) {
	if c < utf8.RuneSelf { // ensure that c is an ASCII byte
		before := as[c/32]
		as[c/32] |= 1 << (c % 32)
		if before != as[c/32] {
			as[8]++
		}
	}
}

// Contains reports whether c is inside the set.
func (as *ASCIISet) Contains(c byte) bool {
	return (as[c/32] & (1 << (c % 32))) != 0
}

// Remove removes c from the set
//
// if c is not in the set, the set contents will remain unchanged.
func (as *ASCIISet) Remove(c byte) {
	if c < utf8.RuneSelf { // ensure that c is an ASCII byte
		before := as[c/32]
		as[c/32] &^= 1 << (c % 32)
		if before != as[c/32] {
			as[8]--
		}
	}
}

// Size returns the number of characters in the set.
func (as *ASCIISet) Size() int {
	return int(as[8])
}

// Union returns a new set containing all characters that belong to either as and as2.
func (as *ASCIISet) Union(as2 ASCIISet) (as3 ASCIISet) {
	as3[0] = as[0] | as2[0]
	as3[1] = as[1] | as2[1]
	as3[2] = as[2] | as2[2]
	as3[3] = as[3] | as2[3]
	return
}

// Intersection returns a new set containing all characters that belong to both as and as2.
func (as *ASCIISet) Intersection(as2 ASCIISet) (as3 ASCIISet) {
	as3[0] = as[0] & as2[0]
	as3[1] = as[1] & as2[1]
	as3[2] = as[2] & as2[2]
	as3[3] = as[3] & as2[3]
	return
}

// Subtract returns a new set containing all characters that belong to as but not as2.
func (as *ASCIISet) Subtract(as2 ASCIISet) (as3 ASCIISet) {
	as3[0] = as[0] &^ as2[0]
	as3[1] = as[1] &^ as2[1]
	as3[2] = as[2] &^ as2[2]
	as3[3] = as[3] &^ as2[3]
	return
}

// Equals reports whether as contains the same characters as as2.
func (as *ASCIISet) Equals(as2 ASCIISet) bool {
	return as[0] == as2[0] && as[1] == as2[1] && as[2] == as2[2] && as[3] == as2[3]
}

// Visit calls the do function for each character of the as set in ascending numerical order.
//
// If do returns true, Visit returns immediately, skipping any remaining
// characters, and returns true.
//
// It is safe for do to add or remove characters from the set
// via the (*ASCIISet).Add or (*ASCIISet).Remove methods.
//
// The behavior of Visit is undefined if do changes the set in any other way.
func (as *ASCIISet) Visit(do func(n byte) (skip bool)) (aborted bool) {
	var currentChar byte
	for i := uint(0); i < 4; i++ {
		for j := uint(0); j < 32; j++ {
			if (as[i] & (1 << j)) != 0 {
				if do(currentChar) {
					return true
				}
			}
			currentChar++
		}
	}
	return false
}