File: segment.go

package info (click to toggle)
gitleaks 8.26.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 2,368 kB
  • sloc: makefile: 22; python: 19; xml: 13
file content (173 lines) | stat: -rw-r--r-- 4,502 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
package codec

import (
	"fmt"
)

// EncodedSegment represents a portion of text that is encoded in some way.
type EncodedSegment struct {
	// predecessors are all of the segments from the previous decoding pass
	predecessors []*EncodedSegment

	// original start/end indices before decoding
	original startEnd

	// encoded start/end indices relative to the previous decoding pass.
	// If it's a top level segment, original and encoded will be the
	// same.
	encoded startEnd

	// decoded start/end indices in this pass after decoding
	decoded startEnd

	// decodedValue contains the decoded string for this segment
	decodedValue string

	// encodings is the encodings that make up this segment. encodingKind
	// can be or'd together to hold multiple encodings
	encodings encodingKind

	// depth is how many decoding passes it took to decode this segment
	depth int
}

// Tags returns additional meta data tags related to the types of segments
func Tags(segments []*EncodedSegment) []string {
	// Return an empty list if we don't have any segments
	if len(segments) == 0 {
		return []string{}
	}

	// Since decoding is done in passes, the depth of all the segments
	// should be the same
	depth := segments[0].depth

	// Collect the encodings from the segments
	encodings := segments[0].encodings
	for i := 1; i < len(segments); i++ {
		encodings |= segments[i].encodings
	}

	kinds := encodings.kinds()
	tags := make([]string, len(kinds)+1)

	tags[len(tags)-1] = fmt.Sprintf("decode-depth:%d", depth)
	for i, kind := range kinds {
		tags[i] = fmt.Sprintf("decoded:%s", kind)
	}

	return tags
}

// CurrentLine returns from the start of the line containing the segments
// to the end of the line where the segment ends.
func CurrentLine(segments []*EncodedSegment, currentRaw string) string {
	// Return the whole thing if no segments are provided
	if len(segments) == 0 {
		return currentRaw
	}

	start := 0
	end := len(currentRaw)

	// Merge the ranges together into a single decoded value
	decoded := segments[0].decoded
	for i := 1; i < len(segments); i++ {
		decoded = decoded.merge(segments[i].decoded)
	}

	// Find the start of the range
	for i := decoded.start; i > -1; i-- {
		c := currentRaw[i]
		if c == '\n' {
			start = i
			break
		}
	}

	// Find the end of the range
	for i := decoded.end; i < end; i++ {
		c := currentRaw[i]
		if c == '\n' {
			end = i
			break
		}
	}

	return currentRaw[start:end]
}

// AdjustMatchIndex maps a match index from the current decode pass back to
// its location in the original text
func AdjustMatchIndex(segments []*EncodedSegment, matchIndex []int) []int {
	// Don't adjust if we're not provided any segments
	if len(segments) == 0 {
		return matchIndex
	}

	// Map the match to the location in the original text
	match := startEnd{matchIndex[0], matchIndex[1]}

	// Map the match to its orignal location
	adjusted := toOriginal(segments, match)

	// Return the adjusted match index
	return []int{
		adjusted.start,
		adjusted.end,
	}
}

// SegmentsWithDecodedOverlap the segments where the start and end overlap its
// decoded range
func SegmentsWithDecodedOverlap(segments []*EncodedSegment, start, end int) []*EncodedSegment {
	se := startEnd{start, end}
	overlaps := []*EncodedSegment{}

	for _, segment := range segments {
		if segment.decoded.overlaps(se) {
			overlaps = append(overlaps, segment)
		}
	}

	return overlaps
}

// toOriginal maps a start/end to its start/end in the original text
// the provided start/end should be relative to the segment's decoded value
func toOriginal(predecessors []*EncodedSegment, decoded startEnd) startEnd {
	if len(predecessors) == 0 {
		return decoded
	}

	// Map the decoded value one level up where it was encoded
	encoded := startEnd{}

	for _, p := range predecessors {
		if !p.decoded.overlaps(decoded) {
			continue // Not in scope
		}

		// If fully contained, return the segments original start/end
		if p.decoded.contains(decoded) {
			return p.original
		}

		// Map the value to be relative to the predecessors's decoded values
		if encoded.end == 0 {
			encoded = p.encoded.add(p.decoded.overflow(decoded))
		} else {
			encoded = encoded.merge(p.encoded.add(p.decoded.overflow(decoded)))
		}
	}

	// Should only get here if the thing passed in wasn't in a decoded
	// value. This shouldn't be the case
	if encoded.end == 0 {
		return decoded
	}

	// Climb up another level
	// (NOTE: each segment references all the predecessors)
	return toOriginal(predecessors[0].predecessors, encoded)
}