File: test_charEncoding.ml

package info (click to toggle)
camomile 2.0.0-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 39,888 kB
  • sloc: ml: 14,026; xml: 230; makefile: 22
file content (132 lines) | stat: -rw-r--r-- 4,145 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
(* $Id: test-charEncoding.ml,v 1.8 2006/08/13 21:23:08 yori Exp $ *)
(* Copyright 2002, 2010 Yamagata Yoriyuki *)

open Printf
open Blender
open Camomile
open UPervasives
open CharEncoding
open OOChannel
module UL = ULine.Make (UTF8)

let print_uchar u = sprintf "\\u%08x" (int_of_uchar u)
let enc_filename file = input_filename (Filename.concat "data/enc" file)

let test_decoding enc_name file utf8_file =
  let file = enc_filename file in
  let utf8_file = enc_filename utf8_file in
  let enc = of_name enc_name in
  let src = new in_channel enc (open_in_bin file) in
  let dst = new in_channel utf8 (open_in_bin utf8_file) in
  test ~desc:("Decoding: " ^ enc_name) ~body:(fun () ->
      expect_pass ~body:(fun () ->
          let c = ref 0 in
          (try
             while true do
               incr c;
               let u = dst#get () in
               expect_equal_app
                 ~msg:(lazy (sprintf "location %d" !c))
                 ~printer:print_uchar
                 (fun () -> u)
                 ()
                 (fun () -> src#get ())
                 ()
             done
           with End_of_file -> ());
          expect_equal_app
            ~msg:(lazy "EOF")
            ~printer:print_uchar
            (fun () -> dst#get ())
            ()
            (fun () -> src#get ())
            ()))

let test_encoding enc_name utf8_file =
  let enc = of_name enc_name in
  let utf8_file = enc_filename utf8_file in
  let us = stream_of_channel (new in_channel utf8 (open_in_bin utf8_file)) in
  let us0 = stream_of_channel (new in_channel utf8 (open_in_bin utf8_file)) in
  let cs = char_stream_of enc us0 in
  let us' = ustream_of enc cs in
  test ~desc:("Encoding: " ^ enc_name) ~body:(fun () ->
      expect_pass ~body:(fun () ->
          Stream.iter
            (fun u ->
              expect_equal_app
                ~msg:(lazy (sprintf "location %d" (Stream.count us)))
                ~printer:print_uchar
                (fun () -> u)
                () Stream.next us')
            us;
          expect_equal_app
            ~msg:(lazy "EOF")
            ~printer:print_uchar Stream.next us Stream.next us'))

let test_enc file =
  let enc_name =
    let s = Filename.basename file in
    try
      let i = String.index s '#' in
      String.sub s 0 i
    with Not_found -> s
  in
  try
    let utf8_file = file ^ "..UTF8" in
    test_decoding enc_name file utf8_file;
    test_encoding enc_name utf8_file
  with
    | Not_found ->
        prerr_string ("Warning: " ^ enc_name ^ " is not supported");
        prerr_newline ()
    | Sys_error _ as exn ->
        prerr_string ("Exception: " ^ Printexc.to_string exn);
        prerr_newline ()

let filter name = not (Str.string_match (Str.regexp ".*\\.\\..*") name 0)
let _ = foreach_file (input_filename "data/enc") ~filter test_enc

(* test for Japanese auto detection *)

let _ = test_decoding "jauto" "ISO-2022-JP" "ISO-2022-JP..UTF8"
let _ = test_decoding "jauto" "ISO-2022-JP-2" "ISO-2022-JP-2..UTF8"
let _ = test_decoding "jauto" "EUC-JP" "EUC-JP..UTF8"

class buffer_io b =
  object
    method flush () = ()
    method close_out () = ()

    method output s pos len =
      Buffer.add_subbytes b s pos len;
      len
  end

let () =
  test ~desc:"Output Test" ~body:(fun () ->
      expect_pass ~body:(fun () ->
          let b = Buffer.create 0 in
          let oooch = new buffer_io b in
          let ceuoc =
            new CharEncoding.uchar_output_channel_of CharEncoding.utf8 oooch
          in
          let outln = new UL.output_line ceuoc in
          outln#put "---------------";
          outln#flush ();
          outln#put "1 first line  1";
          outln#flush ();
          outln#put "2 second line 2";
          outln#flush ();
          outln#put "3 third line  3";
          outln#flush ();
          outln#flush ();
          outln#flush ();
          expect_equal
            ~msg:(lazy (sprintf "output %s\n" (Buffer.contents b)))
            (Buffer.contents b)
            "---------------\n\
             1 first line  1\n\
             2 second line 2\n\
             3 third line  3\n"))

let () = Blender.main ()