File: ultra.ml

package info (click to toggle)
hevea 2.32-2
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 3,692 kB
  • sloc: ml: 19,109; sh: 493; makefile: 301; ansic: 132
file content (551 lines) | stat: -rw-r--r-- 14,096 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
(***********************************************************************)
(*                                                                     *)
(*                          HEVEA                                      *)
(*                                                                     *)
(*  Luc Maranget, projet Moscova, INRIA Rocquencourt                   *)
(*                                                                     *)
(*  Copyright 2001 Institut National de Recherche en Informatique et   *)
(*  Automatique.  Distributed only by permission.                      *)
(*                                                                     *)
(*  $Id: ultra.ml,v 1.14 2012-06-05 14:55:39 maranget Exp $             *)
(***********************************************************************)

open Printf
open Tree
open Htmltext
open Util

let same_prop f s =
  try
    let p = Htmltext.get_prop f.nat in
    List.exists (fun s -> p s.nat) s 
  with
  | NoProp -> false

let rec part_factor some blanks i s keep leave = function
  | [] -> keep,leave
  | ((f,_) as x)::rem when there f s ||
    same_prop f s ||
    (blanks && Htmltext.blanksNeutral f)->
      part_factor some blanks i s (x::keep) leave rem
  | (f,j)::rem ->
      part_factor some blanks i s keep
        (some f j (i-1) leave) rem

let there_factor s fs =  List.exists (fun (f,_) -> same_style s f) fs

let rec start_factor i fs start = function
  | [] -> start
  | s::rem when there_factor s fs ->
      start_factor i fs start rem
  | s::rem ->
      start_factor i fs ((s,i)::start) rem

let extend_factors some blanks i s r fs =
  let keep,leave = part_factor some blanks i s [] r fs in
  start_factor i fs keep s,leave


let rec part_factor_neutral some i keep leave = function
  | [] -> keep,leave
  | ((f,_) as x)::rem when Htmltext.blanksNeutral f ->
      part_factor_neutral some i (x::keep) leave rem
  | (f,j)::rem ->
      part_factor_neutral some i keep (some f j (i-1) leave) rem

let extend_factors_neutral some i r fs = part_factor_neutral some i [] r fs
  

let finish_factors some i r fs = part_factor some false i [] [] r fs

let pfactor chan fs =
  List.iter
    (fun ((i,j),f) ->
      Printf.fprintf chan " %d,%d:%s" i j f.txt)
    fs ;
  output_char chan '\n'

let covers (i1:int) (j1:int) i2 j2 =
  (i1 <= i2 && j2 < j1) ||
  (i1 < i2 &&  j2 <= j1)


let rec all_blanks ts i j =
  if i <= j then
    is_blank ts.(i) && all_blanks ts (i+1) j
  else
    true

let rec get_same ts i j f = function
  | [] -> ((i,j),f)
  | ((ii,jj),_)::_ when
       covers i j ii jj &&
       all_blanks ts i (ii-1) &&
       all_blanks ts (jj+1) j -> ((ii,jj),f)
  | _::rem -> get_same ts i j f rem

let get_sames ts fs =
  let rec do_rec r = function
    | [] -> r
    | (((i,j),f) as x)::rem ->
        do_rec
          (if blanksNeutral f then
            get_same ts i j f fs::r
          else
            x::r)
          rem in
  do_rec [] fs

let group_font ts fs =
  let fonts,no_fonts =
    List.partition (fun (_,f) -> is_font f.nat) fs in
  get_sames ts fonts@no_fonts

let group_span ts fs =
  let span,no_span =
    List.partition (fun (_,f) -> is_span f.nat) fs in
  get_sames ts span@no_span
                      
let conflict_low i1 j1 i2 j2 =  i1 < i2 && i2 <= j1 && j1 < j2

let correct_cfl_low ts i1 j1 i2 j2 =
  if conflict_low i1 j1 i2 j2 &&
    all_blanks ts i1 (i2-1)
  then
    i1
  else
    i2

and correct_cfl_high ts i1 j1 i2 j2 =
  if conflict_low i1 j1 i2 j2 &&
    all_blanks ts (j1+1) j2
  then
    j2
  else
    j1

let rec mk_cover_one ts i j f = function
  | [] -> (i,j),f
  | ((ii,jj),_)::rem ->
      mk_cover_one
        ts
        (correct_cfl_low ts ii jj i j)
        (correct_cfl_high ts i j ii jj)
        f rem

let rec mk_cover ts fs = function
  | [] -> []
  | ((i,j),f)::rem ->
      mk_cover_one ts i j f fs :: mk_cover ts fs rem

let extend_neutrals ts fs =
  let neutral,not_neutral =
    List.partition (fun (_,f) -> blanksNeutral f) fs in
  mk_cover ts fs neutral @ not_neutral

let factorize low high ts =
  if low >= high then []
  else
    let limit_blanks_right i =
    let rec do_rec i =
      if i <= low then low
      else begin
        if is_blank ts.(i) then
          do_rec (i-1)
        else
          i
      end in
    do_rec i in

  let correct_prop f i j env =
    try
      let _ = Htmltext.get_prop f.nat in
      let rec find_same  k = match ts.(k) with
        | Node (s,_) when there f s -> k
        | _ -> find_same (k-1) in
      let j = find_same j in
      if j=i || (blanksNeutral f && all_blanks ts i (j-1)) then
        env
      else
        ((i,j),f)::env
    with
    | NoProp -> ((i,j),f)::env in

  let some f i j env =
      if not (Htmltext.blanksNeutral f) then begin
        if j-i > 0 then
          correct_prop f i j env
        else
          env
      end else begin
        let r = ref 0 in
        for k = i to j do
          if not (is_blank ts.(k)) then incr r
        done ;
        if !r > 1 then
          correct_prop f i (limit_blanks_right j) env
        else
          env
      end in
      
  let rec do_rec i r fs =
    if i <= high then begin
      let fs,r = match ts.(i) with
        | Node (s,ts) ->
            extend_factors some (is_blanks ts) i s r fs
        | t ->
            if is_blank t then
              extend_factors_neutral some i r fs
            else
              finish_factors some i r fs in
      do_rec (i+1) r fs
    end else
      let _,r = finish_factors some i r fs in
      r in
  let r = do_rec low [] [] in
  let r = group_font ts r in
  let r = group_span ts r in
  let r = extend_neutrals ts r in
  if r <> [] && !Emisc.verbose > 1 then begin
    Printf.fprintf stderr "Factors in %d %d\n" low high ;
    for i=low to high do
      Pp.tree stderr ts.(i)
    done ;
    prerr_endline "\n*********" ;
    pfactor stderr r
  end ;
  r

let same ((i1,j1),_) ((i2,j2),_) = i1=i2 && j1=j2

let covers_cost ((((i1:int),(j1:int)),_),_) (((i2,j2),_),_) =
  covers i1 j1 i2 j2

let biggest fs =
  let rec through r = function
    | [] -> r
    | x::rem ->
        if List.exists (fun y -> covers_cost y x) rem then
          through r rem
        else
          through (x::r) rem in
  through [] (through [] fs)

let conflicts ((i1,j1),_) ((i2,j2),_) =
  (i1 < i2 && i2 <= j1 && j1 < j2) ||
  (i2 < i1 && i1 <= j2 && j2 < j1)


let num_conflicts f fs = 
  List.fold_left
    (fun r g ->
      if conflicts f g then 1+r else r)
    0 fs

let put_conflicts fs =
  List.fold_left
    (fun r g -> (g,num_conflicts g fs)::r)
    [] fs


let rec add f = function
  | [] -> let i,f = f in [i,[f]]
  | x::rem as r ->
      if same f x then
        let _,f = f
        and i,r = x in
        (i,(f::r))::rem
      else if conflicts f x then
        r
      else
        x::add f rem

let get_them fs =
  List.fold_left
    (fun r (f,_) ->  add f r)
    [] fs

let pfactorc chan fs =
  List.iter
    (fun (((i,j),f),c) ->
      Printf.fprintf chan " %d,%d:%s(%d)" i j f.txt c)
    fs ;
  output_char chan '\n'

let slen f =
  (if is_font f.nat then 
    5
  else
    0) + String.length f.txt + String.length f.ctxt

let order_factors (((_i1,_j1),f1),(c1:int)) (((_i2,_j2),f2),c2) =
  match compare c1 c2 with
  | 0 -> compare (slen f2) (slen f1) (* NB comparison reversed *)
  | r -> r

let select_factors fs =
  let fs1 = put_conflicts fs in
  let fs2 = biggest fs1 in
  let fs3 = List.sort order_factors fs2 in
  if !Emisc.verbose > 1 then begin
    prerr_string "fs1:" ; pfactorc stderr fs1 ;
    prerr_string "fs2:" ; pfactorc stderr fs2 ;
    prerr_string "fs3:" ; pfactorc stderr fs3
  end ;
  List.sort
    (fun ((_,j1),_) ((i2,_),_) -> Pervasives.compare (j1:int) i2)
    (get_them fs3)


let some_font s = List.exists (fun s -> is_font s.nat) s

let rec font_tree = function
  | Node (s,ts) ->
      some_font s || font_trees ts
  | Blanks _ -> true
  | _ -> false

and font_trees ts = List.for_all font_tree ts

let other_props s =
  let rec other r = function
    | [] -> r
    | s::rem when is_font s.nat ->
        other
          (List.fold_left
             (fun r p -> if p s.nat then r else p::r)
             [] r)
          rem
    | _::rem -> other r rem in
  other font_props s

let rec all_props r ts = match r with
| [] -> []
| _  -> match ts with
  | [] -> r
  | Node (s,_)::rem when some_font s ->
      all_props
        (List.filter
           (fun p -> List.exists (fun s -> is_font s.nat && p s.nat) s)
           r)
        rem
  | Node (_,ts)::rem ->
      all_props (all_props r ts) rem
  | Blanks _::rem ->
      all_props
        (List.filter neutral_prop r)
        rem
  | _ -> assert false

let extract_props ps s =
  List.partition
    (fun s ->
      is_font s.nat &&
      List.exists (fun p -> p s.nat) ps)
    s


let  clean t k = match t with
  | Node ([],ts) -> ts@k
  | _ -> t::k

let rec neutrals started r = function
  | [] -> r
  | Blanks _::rem -> neutrals started r rem
  | Node (s, _)::rem ->
      if started then
        neutrals true (inter r (List.filter blanksNeutral s)) rem
      else
        neutrals true (List.filter blanksNeutral s) rem        
  | _ -> []

let rec remove_list fs ts = match ts with
  | [] -> []
  | Node (gs,args)::rem ->
      begin match sub gs fs with
      | [] -> args @ remove_list fs rem
      | ks -> Node (ks,args) :: remove_list fs rem
      end
  | t::rem -> t::remove_list fs rem

let lift_neutral fs ts k = match neutrals false [] ts with
| [] -> Node (fs,ts)::k
| lift -> Node (lift@fs, remove_list lift ts)::k
  

let check_node fs ts k = match ts with
  | Node (si,args)::rem when
    some_font fs && font_trees ts ->
    begin match all_props (other_props fs) ts with
    | [] -> lift_neutral fs ts k
    | ps ->
        let lift,keep = extract_props ps si in
        lift_neutral
          (lift@fs) (clean (Node (keep,args)) rem) k
    end
  | _ -> lift_neutral fs ts k

let rec as_list i j ts k =
  if i > j then k
  else
    (clean ts.(i)) (as_list (i+1) j ts k)

let remove s = function
  | Node (os,ts) -> node (sub os s) ts
  | t -> t


and is_text_blank = function
  | Text _ | Blanks _ -> true
  | _ -> false

and is_node = function
  | Node (_::_,_) -> true
  | _ -> false
    
let rec cut_begin p ts l i =
  if i >= l then l,[]
  else
    if p ts.(i) then
      let j,l = cut_begin p ts l (i+1) in
      j,ts.(i)::l
    else
      i,[]

let cut_end p ts l =
  let rec do_rec r i =
    if i < 0 then i,r
    else
      if p ts.(i) then
        do_rec (ts.(i)::r) (i-1)
      else
        i,r in
  do_rec [] (l-1)

let is_other s = match s.nat with
| Other -> true
| _ -> false

let rec deeper i j ts k =
  let rec again r i =
    if i > j then r
    else match ts.(i) with    
    | Node ([],args) ->
        let b1 =  List.exists is_node args in
        again (b1 || r) (i+1)
    | Node (s,args) when List.exists is_other s ->
        let r = again r (i+1) in
        if not r then
          ts.(i) <- Node (s,opt true (Array.of_list args) []) ;
        r
    | _ -> again r (i+1) in
  if again false i then begin
    let ts = as_list i j ts [] in    
    let rs = opt true  (Array.of_list ts) k in
    rs
  end else
    as_list i j ts k
          
    
and trees i j ts k =
  if i > j then  k
  else
    match factorize i j ts with
    | [] -> deeper i j ts k
    | fs ->
        let rec zyva cur fs k = match fs with
        | [] -> deeper cur j ts k
        | ((ii,jj),gs)::rem ->
            for k=ii to jj do
              ts.(k) <- remove gs ts.(k)
            done ;
            deeper cur (ii-1) ts
              (check_node gs (trees ii jj ts [])
                 (zyva (jj+1) rem k)) in
        let fs = select_factors fs in
        if !Emisc.verbose > 1 then begin
          prerr_endline "selected" ;
          List.iter
            (fun ((i,j),fs) ->
              Printf.fprintf stderr " %d,%d:" i j ;
              List.iter
                (fun f -> output_string stderr (" "^f.txt))
                fs)
            fs ;
          prerr_endline ""
        end ;
        zyva i fs k

and opt_onodes ts i = match ts.(i) with
  |  ONode (o,c,args) -> begin match opt false (Array.of_list args) [] with
      | [Node (s,args)] when false ->
	  let s1, s2 = partition_color s in
	  ts.(i) <-
	     begin match s1, s2 with
	     | [],[] -> assert false
	     | [],s  -> ONode (o,c,[Node (s, args)])
	     | s,[]  -> Node (s,[ONode (o,c,args)])
	     | _,_   -> Node (s1, [ONode (o,c,[Node (s2, args)])])
	     end
      | t ->
          ts.(i) <- ONode (o,c,t)
  end
  | _ -> ()

and opt top ts k =
  let l = Array.length ts in  
  for i = 0 to l-1 do
    opt_onodes ts i
  done ;
  let p = is_text_blank in
  let start,pre = cut_begin p ts l 0 in
  if start >= l then pre@k
  else
    let fin,post  = cut_end p ts l in
    if top then pre@trees start fin ts (post@k)
    else
      extend_blanks pre (trees start fin ts []) post k

and extend_blanks pre ts post k = match ts with
| [Node (s,args)] when
    pre <> [] && post <> [] &&
    List.exists blanksNeutral s &&
    is_blanks pre && is_blanks post ->
      let neutral,not_neutral =
        List.partition blanksNeutral s in
      [Node
          (neutral,
           (match not_neutral with
           | [] -> pre@args@post@k
           | _  -> pre@Node (not_neutral,args)::post@k))]
| _ -> pre@ts@post@k
    


let main chan ts =
  if !Emisc.verbose > 2 then begin
    eprintf "**Ultra input **\n" ;
    Pp.ptrees stderr ts ;
    eprintf "** Ultra end**\n%!" ;
    ()
  end ;
  let ci = costs Htmllex.cost ts in
  let rs =  opt true (Array.of_list (Explode.trees ts)) [] in
  let cf = costs Htmltext.cost rs in
  if compare ci cf < 0 then begin
    if !Emisc.verbose > 1 then begin
      prerr_endline "*********** Pessimization ***********" ;
      Pp.ptrees stderr ts ;
      prerr_endline "***********   Into        ***********" ;
      Pp.trees stderr rs
    end ;
    Pp.ptrees chan ts
  end else begin
    if !Emisc.verbose > 2 then begin
      eprintf "** Ultra output **\n" ;
      Pp.trees stderr rs ;
      eprintf "** Ultra end**\n%!" ;
      ()
    end ;
    Pp.trees chan rs 
  end