1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67
|
Program Bomstrip;
(* Berteun Damman, 2005, Public Domain
* Compiles with FreePascal and GNU Pasal (at least) and
* besides that also seems to work....
*)
Type
BOMT = Array[1..3] of Byte;
Const
UTF8BOM: BOMT = ($EF, $BB, $BF);
Var
C: Char;
F: Boolean;
I, J: Integer;
BOM: BOMT;
Begin
(* Try to read 3 Bytes, if an EOF happens before, gracefully exit,
and print the first one or two bytes.
*)
I := 0;
While Not EOF And Not EOLn And (I <= 2) Do
Begin
Inc(I);
Read(C);
BOM[I] := Ord(C);
End;
(* Assume we have a BOM if the input has length 3 *)
F := (I = 3);
For J := 1 To I Do
If BOM[J] <> UTF8BOM[J] Then
F := False;
(* Not a BOM, print it. *)
If Not F Then
For J := 1 To I Do
Write(Chr(BOM[J]));
(* If EOF, then Exit *)
If EOF Then
Exit;
(* Print the remainder. *)
While Not EOF Do
Begin
If EOLn Then
Begin
(* Actually GPC reads an EOLn as a space. I
* do not know whether that is correct (FPC does not)
* but this works either way.
* Perhaps we do run into some implicit CR/LF <-> LF
* conversion.
* TODO Test this!
*)
WriteLn;
Read(C);
End
Else
Begin
Read(C);
Write(C);
End;
End;
End.
|