File: bomstrip.fs

package info (click to toggle)
bomstrip 9-3
  • links: PTS, VCS
  • area: main
  • in suites: squeeze
  • size: 212 kB
  • ctags: 29
  • sloc: pascal: 41; cpp: 34; ansic: 34; java: 33; python: 31; makefile: 28; sh: 24; perl: 7; haskell: 6; php: 6; ruby: 6; awk: 2; sed: 1
file content (45 lines) | stat: -rwxr-xr-x 1,410 bytes parent folder | download | duplicates (9)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
\ by Peter Pentchev, 2008, public domain.
\ 
\ This works with GNU Forth.  It really ought to work with other
\ Forths, too - as long as they have the STDIN and STDOUT words,
\ which at least FICL seems to be missing.

\ The buffer where the input is read - 8KB should be enough for everyone ;)
CREATE STRIP-BUF 8192 CHARS ALLOT

\ The UTF-8 BOM to compare to
CREATE UTF-8-BOM 239 C, 187 C, 191 C,

\ Read three bytes, skip them if it is the BOM, output them otherwise
: STRIP-FIRST ( -- )
	( read three bytes )
	STRIP-BUF 3 STDIN READ-FILE IF EXIT THEN
	( less than three bytes read? )
	DUP 3 < IF
		( yep, just write them to stdout )
		STRIP-BUF SWAP STDOUT WRITE-FILE IF BYE THEN
	ELSE
		( are they the same as the three bytes in the UTF-8-BOM? )
		STRIP-BUF SWAP 2DUP UTF-8-BOM 3 COMPARE IF
			( nope, must copy them, write them to stdout )
			STDOUT WRITE-FILE IF BYE THEN
		THEN
	THEN ;

\ Read as much as we can from stdin and copy it to stdout, in 8192-byte blocks
: STRIP-REST ( -- )
	( read up to 8KB )
	STRIP-BUF 8192 STDIN READ-FILE IF BYE THEN
	( just return on EOF )
	DUP 0= IF EXIT THEN
	( copy to stdout )
	STRIP-BUF SWAP STDOUT WRITE-FILE IF BYE THEN
	( actually equivalent to a forever loop :)
	RECURSE ;

\ First examine the first three bytes, then copy the rest
: BOMSTRIP ( -- )
	STRIP-FIRST STRIP-REST ;

\ This is a bomstrip filter - run BOMSTRIP, then exit the interpreter
BOMSTRIP BYE