1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
|
\ by Peter Pentchev, 2008, public domain.
\
\ This works with GNU Forth. It really ought to work with other
\ Forths, too - as long as they have the STDIN and STDOUT words,
\ which at least FICL seems to be missing.
\ The buffer where the input is read - 8KB should be enough for everyone ;)
CREATE STRIP-BUF 8192 CHARS ALLOT
\ The UTF-8 BOM to compare to
CREATE UTF-8-BOM 239 C, 187 C, 191 C,
\ Read three bytes, skip them if it is the BOM, output them otherwise
: STRIP-FIRST ( -- )
( read three bytes )
STRIP-BUF 3 STDIN READ-FILE IF EXIT THEN
( less than three bytes read? )
DUP 3 < IF
( yep, just write them to stdout )
STRIP-BUF SWAP STDOUT WRITE-FILE IF BYE THEN
ELSE
( are they the same as the three bytes in the UTF-8-BOM? )
STRIP-BUF SWAP 2DUP UTF-8-BOM 3 COMPARE IF
( nope, must copy them, write them to stdout )
STDOUT WRITE-FILE IF BYE THEN
THEN
THEN ;
\ Read as much as we can from stdin and copy it to stdout, in 8192-byte blocks
: STRIP-REST ( -- )
( read up to 8KB )
STRIP-BUF 8192 STDIN READ-FILE IF BYE THEN
( just return on EOF )
DUP 0= IF EXIT THEN
( copy to stdout )
STRIP-BUF SWAP STDOUT WRITE-FILE IF BYE THEN
( actually equivalent to a forever loop :)
RECURSE ;
\ First examine the first three bytes, then copy the rest
: BOMSTRIP ( -- )
STRIP-FIRST STRIP-REST ;
\ This is a bomstrip filter - run BOMSTRIP, then exit the interpreter
BOMSTRIP BYE
|