File: cleanpath.S

package info (click to toggle)
dtrace 2.0.5-1
  • links: PTS
  • area: main
  • in suites: sid
  • size: 24,408 kB
  • sloc: ansic: 61,247; sh: 17,997; asm: 1,717; lex: 947; awk: 754; yacc: 695; perl: 37; sed: 17; makefile: 15
file content (462 lines) | stat: -rw-r--r-- 11,269 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
// SPDX-License-Identifier: GPL-2.0
/*
 * Copyright (c) 2024, Oracle and/or its affiliates.
 */

/*
 * Conceptually, the idea behind cleanpath() is relatively simple:
 * simplify gratuitous "//", "/./" and "/foo/../" sequences in a path name.
 *
 * In practice, the algorithm is a little tricky, all the more since we
 * want to implement this in BPF.
 *
 * Significantly, the BPF verifier really does not like conditional
 * statements, since it has to verify every conceivable execution path,
 * and the number of such paths can explode exponentially.  So, we go to
 * great lengths to replace conditional execution with nonconditional
 * execution.  For example,
 *      if (nup > 0)
 *      	nup--;
 * can be implemented as:
 *      tmp = (-nup) >> 63;
 *      nup -= tmp;
 *
 * Here is the high-level algorithm.  Milestones are designated with
 * "#" so that they can be found easily in the code.  Comments like
 * "execute if TMP==1" indicate which variable is set to 1 (or 0) to
 * indicate that a code block should be executed (or not).
 *
 *      // #1.  handle empty string: cleanpath("") = ""
 *      if (strlen(src) == 0) {
 *      	dst = "";
 *      	return;
 *      }
 *
 *      // #2.  handle single-char string: cleanpath("x") = "x", even for '.' and '/'
 *      if (strlen(src) == 1)
 *      	return;
 *
 *      lastchar = src[strlen(src) - 1];
 *
 *      // #3.  regularize processing by prepending and appending a '/'
 *      dst = '/' + dst + '/';
 *
 *      // #4.  loop forward to handle "//" and "/./" strings
 *      seq = 0;      // word whose bytes are the most recent chars we have seen
 *      for (to = from = 0; ; to++, from++) {    // "from" advances faster than "to"
 *      	char chr;
 *
 *      	chr = dst[from];
 *      	dst[to] = chr;
 *      	if (chr == '\0')
 *      		break;
 *      	seq = (seq << 8) | chr;
 *
 *      	if (recent two chars in seq are "//") {
 *      		// #5.  execute if TMP==1
 *      		to--;
 *      		seq >>= 8;
 *      	}
 *
 *      	if (recent three chars in seq are "/./") {
 *      		// #6.  execute if TMP==1
 *      		to -= 2;
 *      		seq >>= 16;
 *      	}
 *      }
 *
 *      // #7.  loop backward to handle "/../" strings
 *      nup = 0;      // how many ".." are unresolved
 *      brk = ito;    // index of last '/' we have seen
 *      seq = 0;      // word whose bytes are the most recent chars we have seen
 *      for (to = from = brk - 1; from >= 0; to--, from--) { // "from" descends faster than "to"
 *      	char chr;
 *
 *      	chr = dst[from];
 *      	dst[to] = chr;
 *      	seq = (seq << 8) | chr;
 *
 *      	if (recent four chars in seq are "/../") {  // drop 3 chars and increment nup
 *      		// #8.  execute if TMP==1
 *      		nup++;
 *      		to += 3;
 *      		seq >>= 24;
 *      	} else if (chr == '/') {        // completed a dir name that is not ".."
 *      		// #9.  execute if TMP==1
 *      		if (nup > 0) {          // we have to drop the dir name due to a ".."
 *      			// #10.  execute if TM4==1
 *      			to = brk;       // rewind to last brk index
 *      			nup--;          // decrement nup
 *      		} else {                // normal case, keep this dir name
 *      			// #11.  execute if TM2==1
 *      			brk = ito;      // note brk for future use
 *      		}
 *      	}
 *      }
 *
 *      // #12.  point to the first char in the output
 *      to++;
 *
 *      // #13.  for absolute paths, forget about nup:  "/.." is equivalent to "/"
 *      if (src[0] == '/')
 *      	goto Lout;
 *
 *      // #14.  for relative paths, advance past the initial, artificial '/'
 *      to++;
 *
 *      // #15.  for each nup, prepend a "../"
 *      while (nup > 0) {
 *      	ito -= 3;
 *      	dst[ITO + 0] = '.';
 *      	dst[ITO + 1] = '.';
 *      	dst[ITO + 2] = '/';
 *      	nup--;
 *      }
 *
 *      Lout:
 *
 *      // #16.  move the result string to the beginning of the dst buffer
 *      strcpy(dst, &dst[to]);
 *
 *      // #17.  check for empty string
 *      if (strlen(dst) < 1) {
 *      	dst = ".";
 *      	return;
 *      }
 *
 *      // #18.  check for single-char string
 *      if (strlen(dst) < 2)
 *      	return;
 *
 *      // #19.  strip off the trailing '/'
 *      dst[strlen(dst) - 1] = '\0';
 */

#include <bpf_asm_helpers.h>

/*
 * void dt_cleanpath(const dt_dctx_t *dctx, char *src, char *dst);
 */
	.text
	.align	4
	.global	dt_cleanpath
	.type	dt_cleanpath, @function
dt_cleanpath :

#define DST %r9
	/* Cache dst pointer. */
	mov	DST, %r3

	/*
	 * Copy src to &dst[1].  Then we can directly dereference individual chars.
	 * The first byte, dst[0], is reserved for prepending a '/'.
	 */
	mov	%r3, %r2
	lddw	%r2, STRSZ
	add	%r2, 1
	mov	%r1, DST
	add	%r1, 1
	call	BPF_FUNC_probe_read_str
	/* At this point, %r0 has strlen(src) + 1. */

	/* #1.  Handle empty string: cleanpath("") = "". */
	jsgt	%r0, 1, .Lnotempty
	stb	[DST+0], 0
	mov	%r0, 0
	exit
.Lnotempty:

	/*
	 * #2.  Handle single-char string: cleanpath("x") = "x", even for '.' and '/'.
	 * Load the first char (which we had put in dst[1]) and stuff it in dst[0].
	 */
	ldxb	%r1, [DST+1]
	jgt	%r0, 2, .Lnotsinglechar
	stxb	[DST+0], %r1
	stb	[DST+1], 0
	mov	%r0, 0
	exit
.Lnotsinglechar:

#define FIRSTCHAR [%fp+-1]
	/* Save the first char. */
	stxb	FIRSTCHAR, %r1

	/* Save the last char. */
	mov	%r8, %r0
	add	%r8, DST
	ldxb	%r8, [%r8+-1]

	/* #3.  Prepend a '/'. */
	stb	[DST+0], '/'

	/* #3.  Append a '/'. */
	add	%r0, DST
	stb	[%r0+0], '/'
	stb	[%r0+1], 0

#define TMP %r8		/* temp reg */
#define ITO %r7		/* index to copy "to" */
#define IFR %r6		/* index to copy "from" */
#define SEQ %r5		/* sequence of most-recent chars */
#define CHR %r4		/* char being copied */
#define SIZ %r3		/* STRSZ */

	/* #4.  Loop forward through the string. */
	mov	ITO, 0
	mov	IFR, 0
	mov	SEQ, 0
	lddw	SIZ, STRSZ
.Lforward:
	jge	IFR, SIZ, .Lret		/* reassure the BPF verifier */

	/* Load the char from the "from" index. */
	mov	TMP, IFR
	add	TMP, DST
	ldxb	CHR, [TMP + 0]

	/* Store the char to the "to" index. */
	mov	TMP, ITO
	jlt	TMP, 0, .Lret		/* reassure the BPF verifier */
	jgt	TMP, IFR, .Lret		/* reassure the BPF verifier */
	add	TMP, DST
	stxb	[TMP + 0], CHR

	/* If the char is '\0', break out of the loop. */
	jeq	CHR, 0, .Lforward_brk

	/* Add this char to the sequence of chars. */
	lsh	SEQ, 8
	or	SEQ, CHR

	/* #5.  Compute TMP = 1 (if we should execute), else 0. */
	mov	TMP, (('/' << 8) | '/')
	xor	TMP, SEQ
	and	TMP, 0xffff
	neg	TMP
	rsh	TMP, 63
	xor	TMP, 1

	/* Use this to back up one char if "//". */
	sub	ITO, TMP

	/* Update SEQ accordingly. */
	mul	TMP, 8
	rsh	SEQ, TMP

	/* #6.  Compute TMP = 1 (if we should execute), else 0. */
	mov	TMP, (('/' << 16) | ('.' << 8) | '/')
	xor	TMP, SEQ
	and	TMP, 0xffffff
	neg	TMP
	rsh	TMP, 63
	xor	TMP, 1

	/* Use this to back up two chars if "/./". */
	mul	TMP, 2
	sub	ITO, TMP

	/* Update SEQ accordingly. */
	mul	TMP, 8
	rsh	SEQ, TMP

	/* Advance the indices and iterate. */
	add	ITO, 1
	add	IFR, 1
	ja	.Lforward
#undef SIZ

.Lforward_brk:

#define TM2 %r3		/* another temp reg */
#define TM3 %r2		/* another temp reg */
#define BRK %r1		/* index of last directory-level break '/' */
#define NUP %r0		/* num of parent directories to go up due to "../" */

	/* #7.  Handle double-dot "/../", looping backward. */
	mov	NUP, 0
	mov	SEQ, 0
	mov	BRK, ITO
	jslt	BRK, 1, .Lret		/* reassure the BPF verifier */
	lddw	TMP, STRSZ
	jge	BRK, TMP, .Lret		/* reassure the BPF verifier */
	mov	IFR, BRK
	sub	IFR, 1
	mov	ITO, IFR
.Lbackward:
	/* Load the char from the "from" index. */
	mov	TMP, IFR
	add	TMP, DST
	ldxb	CHR, [TMP + 0]

	/* Store the char to the "to" index. */
	mov	TMP, ITO
	lddw	TM2, STRSZ
	jge	TMP, TM2, .Lret		/* reassure the BPF verifier */
	jlt	TMP, IFR, .Lret		/* reassure the BPF verifier */
	add	TMP, DST
	stxb	[TMP + 0], CHR

	/* Add this char to the sequence of chars. */
	lsh	SEQ, 8
	or	SEQ, CHR

	/* #8.  Compute TMP = 1 (if we should execute), else 0. */
	mov	TMP, ('/' | ('.' << 8) | ('.' << 16) | ('/' << 24))
	xor	TMP, SEQ
	lsh	TMP, 32
	rsh	TMP, 32
	neg	TMP
	rsh	TMP, 63
	xor	TMP, 1

	/* Use this to increase NUP if "/../". */
	add	NUP, TMP

	/* Use this to back up three chars if "/../". */
	mov	TM2, TMP
	mul	TM2, 3
	add	ITO, TM2

	/* Update SEQ accordingly. */
	mul	TM2, 8
	rsh	SEQ, TM2

	/* Now switch to the not-"/.../" case. */
	xor	TMP, 1

/* Rename CHR's reg TM4;  TM4 is yet another temp reg and starts with CHR's value. */
#undef CHR
#define TM4 %r4

	/* If the last char is "/", TM4 = 1.  Else, TM4 = 0. */
	xor	TM4, '/'
	neg	TM4
	rsh	TM4, 63
	xor	TM4, 1

	/* #9.  Compute TMP = 1 (if we should execute), else 0. */
	mul	TMP, TM4

	/* If NUP > 0, TM2 = 1.  Else, TM2 = 0. */
	mov	TM2, NUP
	neg	TM2
	rsh	TM2, 63

	/* #10.  Compute TM4 = 1 (if we should execute), else 0. */
	mov	TM4, TMP
	mul	TM4, TM2

	/* If TM4 is 1, then ITO = BRK (revert to last BRK index). */
	mov	TM3, BRK
	sub	TM3, ITO
	mul	TM3, TM4
	add	ITO, TM3

	/* If TM4 is 1, then NUP--. */
	sub	NUP, TM4

	/* #11.  Compute TM2 = 1 (if we should execute), else 0. */
	xor	TM2, 1
	mul	TM2, TMP

	/* If TM2 is 1, then BRK = ITO. */
	mov	TM4, ITO
	sub	TM4, BRK
	mul	TM4, TM2
	add	BRK, TM4

	/* Advance the indices and iterate. */
	sub	ITO, 1
	sub	IFR, 1
	jsge	IFR, 0, .Lbackward
#undef BRK
#undef TM4
#undef TM2
#undef SEQ
#undef TM3
#undef IFR

	/*
	 * At this point, we've finished the backward loop.
	 */

	/* #12.  Increment ITO to point to the beginning of the output. */
	add	ITO, 1

	/* #13.  If we have an absolute path (src[0] == '/'), go to Lout. */
	ldxb	TMP, FIRSTCHAR
	jeq	TMP, '/', .Lout
#undef FIRSTCHAR

	/* #14.  Otherwise, advance past the initial, spurious '/'. */
	add	ITO, 1

	/* #15.  Add a "../" prefix NUP times. */
	/*
	 * FIXME:  The BPF verifier needs some assurance that this loop
	 * is not endless.  We arbitrarily assume NUP is at most 15,
	 * equivalent to some clean path of the form
	 * "../../../../../../../../../../../../../../../foo/bar".
	 */
	and	NUP, 15

	lddw	TMP, STRSZ
	jgt	ITO, TMP, .Lret		/* reassure the BPF verifier */
.Lprefix:
	jsle	NUP, 0, .Lout

	sub	ITO, 3
	jslt	ITO, 0, .Lret		/* reassure the BPF verifier */

	mov	TMP, ITO
	add	TMP, DST
	stb	[TMP+0], '.'
	stb	[TMP+1], '.'
	stb	[TMP+2], '/'

	sub	NUP, 1
	ja	.Lprefix
#undef NUP
#undef TMP

.Lout:
	jslt	ITO, 0, .Lret		/* reassure the BPF verifier */
	lddw	%r1, STRSZ
	jgt	ITO, %r1, .Lret		/* reassure the BPF verifier */

	/* #16.  Copy from index "ito" to the beginning of the DST buffer. */
	/*
	 * FIXME:  Is this okay?  We are copying a string in the DST
	 * array from offset ITO down to offset 0.  The input and output
	 * strings might (indeed probably will) overlap.  Can we use
	 * the BPF helper function in this way?
	 */
	mov	%r1, DST
	lddw	%r2, STRSZ
	add	%r2, 1
	sub	%r2, ITO
	mov	%r3, DST
	add	%r3, ITO
	call	BPF_FUNC_probe_read_str
	/* At this point, %r0 has strlen(dst) + 1. */
#undef ITO

	/* #17.  If the string is empty, make it ".\0" and return. */
	jsge	%r0, 2, .Lnotempty2
	sth	[DST+0], '.' /* little-endian uses "\0.", or just '.' */
	ja	.Lret
.Lnotempty2:

	/* #18.  If the string is a single char (plus NUL char), then return. */
	jle	%r0, 2, .Lret

	/* #19.  Strip off the trailing '/'. */
	add	%r0, DST
	stb	[%r0+-2], 0
#undef DST

.Lret:
	mov	%r0, 0
	exit
	.size	dt_cleanpath, .-dt_cleanpath