File: bomstrip.pas

package info (click to toggle)
bomstrip 9-15
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 252 kB
  • sloc: pascal: 41; ansic: 34; cpp: 34; java: 33; python: 31; sh: 24; makefile: 20; perl: 7; php: 6; ruby: 6; haskell: 6; awk: 2; sed: 1
file content (67 lines) | stat: -rwxr-xr-x 1,198 bytes parent folder | download | duplicates (10)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
Program Bomstrip;
(* Berteun Damman, 2005, Public Domain 
 * Compiles with FreePascal and GNU Pasal (at least) and
 * besides that also seems to work....
 *)

Type 
	BOMT = Array[1..3] of Byte;

Const
	UTF8BOM: BOMT = ($EF, $BB, $BF);

Var
	C: Char;
	F: Boolean;
	I, J: Integer;
	BOM: BOMT;

Begin
	(* Try to read 3 Bytes, if an EOF happens before, gracefully exit,
	   and print the first one or two bytes.
	 *)
	I := 0;
	While Not EOF And Not EOLn And (I <= 2) Do
	Begin
		Inc(I);
		Read(C);
		BOM[I] := Ord(C);
	End;

	(* Assume we have a BOM if the input has length 3 *)
	F := (I = 3);
	For J := 1 To I Do
		If BOM[J] <> UTF8BOM[J] Then
			F := False;

	(* Not a BOM, print it. *)
	If Not F Then
		For J := 1 To I Do
			Write(Chr(BOM[J]));
	
	(* If EOF, then Exit *)
	If EOF Then
		Exit;

	(* Print the remainder. *)
	While Not EOF Do
	Begin
		If EOLn Then
		Begin
			(* Actually GPC reads an EOLn as a space. I
			 * do not know whether that is correct (FPC does not)
			 * but this works either way.
			 * Perhaps we do run into some implicit CR/LF <-> LF 
			 * conversion.
			 * TODO Test this!
			 *)
			WriteLn;
			Read(C);
		End
		Else
		Begin
			Read(C);
			Write(C);
		End;
	End;
End.