File: code_group_order.ml

package info (click to toggle)
galax 1.1-10
  • links: PTS, VCS
  • area: main
  • in suites: wheezy
  • size: 12,832 kB
  • sloc: ml: 96,603; xml: 26,602; ansic: 4,875; sh: 3,977; makefile: 1,667; java: 1,146
file content (502 lines) | stat: -rwxr-xr-x 17,348 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
(***********************************************************************)
(*                                                                     *)
(*                                 GALAX                               *)
(*                              XQuery Engine                          *)
(*                                                                     *)
(*  Copyright 2001-2007.                                               *)
(*  Distributed only by permission.                                    *)
(*                                                                     *)
(***********************************************************************)

(* $Id: code_group_order.ml,v 1.13 2007/08/23 21:01:32 simeon Exp $ *)

(* Module: Code_group_order
   Description:
     This module contains code building for operators that implement
     group-by.
*)

open Cs_util_coercion 

open Error

open Physical_value_util 
open Physical_table (* Create_Tuple *)
open Physical_sequence (* materialized_of_list *)

open Xquery_common_ast
open Xquery_algebra_ast
open Xquery_algebra_ast_util
open Xquery_algebra_ast_annotation_util

open Algebra_type
open Execution_context
open Cs_util

open Code_selection_context
open Code_util_materialize
(**************************************************)
(* Some of this section could probably reside in other 
  locations (dm.mli) but
   we haven't used the yet *)
(**************************************************)

(* AtomicValue Hash *)
module AtomicValueHashType = 
 struct 
   type t = Dm_atomic.atomicValue
   let equal x y = x#atomic_value_eq y
   let hash x = 
     let x_str = x#string_value () in
     let ret   = (Hashtbl.hash x_str) 
		 * (Hashtbl.hash (x#getAtomicValueKind ())) in	
       ret 
 end

module AtomicValueHash = Hashtbl.Make(AtomicValueHashType)
(* There are potentially many sorts and many group desc *)
(* Right now we implement them one after the other as decomposed operators *)

(**************************************************)
type select_single_op_fn =   
    Code_selection_context.code_selection_context -> algop_expr -> 
      Code_selection_context.code_selection_context

  (* Note:
     All of the evaluations of e2 must yield values (or nodes) of
     the same type.
     - Jerome *)
let eval_e2 (eval:eval_fun)
    (alg_ctxt:algebra_context) 
    (dep:algop_expr) =
    (* Evaluates the sortspec expression *)
    (* This would be nice to get rid of... *)
    item_cursor_of_physical_value
      (eval alg_ctxt dep)
  
(* Compare function.
   This can be updated since we have very particular semantics (no
   need to evaluate the functions).
*)
let compare_function restore_fn sk_esk_list eval alg_ctxt compare_deps input_value1 input_value2 =
  let atomic_comp = Code_util.make_atomic_gt in
  let rec compare_fun deps sk_esk_list =
    match (deps,sk_esk_list) with
      | [],[] -> 0
      | (dep::deps',(sk,esk) :: sk_esk_list') ->
	  let eval_e2' x = 
	    restore_fn x; (* setup evaluation tuple fields *)
	    eval_e2 eval alg_ctxt dep
	  in
	  let comp1 =	      
	    (Code_util.forest_compare sk esk 
	       (eval_e2' input_value1)
	       (eval_e2' input_value2))
	      atomic_comp
	  in
	    if comp1 = 0
	    then
	      compare_fun deps' sk_esk_list'
	    else
	      comp1
      | _ ->
	  raise (Query (Code_Selection "Incompatible number of sub-exprs and order by modifiers in one order-by expression"))
  in
    (* Tail because the first dep is to be applied, not grouped by
    *)
    
    compare_fun compare_deps sk_esk_list

(* Comparison operation *)

(* Helper function to find the index of a given variable name *)
let name_to_index needed_names vn =
  let n_needed_names = Array.length needed_names in
  let rec name_to_index_helper index vn = 
    if index < n_needed_names then
      if needed_names.(index) = vn then
	index
      else
	name_to_index_helper (index+1) vn
    else
      raise (Query (Code_Selection ("name not found during grouping: " ^ 
				  (Namespace_names.prefixed_string_of_rqname vn))))
  in
    name_to_index_helper 0 vn

(* Aggregate Cursor Semantic:
   Assume this is sorted by the group names in order.   
*)
let aggregate_cursor nsenv code_ctxt (store_aggregate_value: Physical_value.dom_value -> unit)
  (restore_fn:Code_util_materialize.restore_function)
  needed_names group_names odt valid_names
  group_create_op 

  (* These are the typical runtime parameters *)
  eval alg_ctxt sorted_input =
  let stat_ctxt      = static_context_from_code_selection_context code_ctxt in
  let start_offset   = ref 0 in
  let current_offset = ref 0 in

  let input_length   = Array.length sorted_input in
  (* First lookup the indexes of the names *)
  let group_indexes  =
    Array.of_list 
      (List.map (name_to_index needed_names)
	 group_names) in
  let n_indexes        = Array.length group_indexes in
  let required_indexes = Array.of_list
			   (List.map (name_to_index needed_names)
			      valid_names) in
  
  let required_length = Array.length required_indexes in
  (* Helper function determine if row1 = row2 *)
  let named_contents_equal row1 row2 =
    let is_equal = ref true in
      for i = 0 to n_indexes - 1 do
	let cur_index = group_indexes.(i) in
	  is_equal := !is_equal &&
	  (sorted_input.(row1).(cur_index) = 
	     sorted_input.(row2).(cur_index))
      done;
      !is_equal
  in

  (* Helper function to see if any grouping attribute
     is empty *)
  let exists_empty_name rown = 
    let is_empty = ref false in 
    let i = ref 0 in
      while (!i < required_length) && 
	(not (!is_empty)) do
	let cur_index = required_indexes.(!i) in
	  if (sorted_input.(rown).(cur_index)
	     = empty_dom_sequence) then
	    begin
	      is_empty := true
	    end
	  ;
	  incr i
      done;
      !is_empty
  in
  (****************************)
  (* Actual Cursor Definition *)
  (****************************)

  (* If we have input length = 0
     - we clear the aggregate value 
     o On empty input
     the apply clause is the empty
     list to the name.
     
     This to prevent dirty output
   *)
  if (input_length = 0) then
    begin
      (fun () ->	       	     
	begin
	  store_aggregate_value (materialized_of_list []);
	  None
	end
      )
    end
  else
    (* Typical grouping *)
    (fun () ->	   
      (* In this case, we have reached the end,
	 so we return None *)
      if (!start_offset >= input_length) then	    
	begin
	  None	       
	end     
      else
	begin
	  let return_sequence = ref [] in
	  let start_off = !start_offset in
	  
	  (* Restore the current tuple *)
	  restore_fn sorted_input.(start_off);
	  
	  (* Special Case: 
             There exists x in GroupingNames = () =>
             The aggregate should be set to () and
             we advance *)		 
	  if (exists_empty_name start_off) then
	    begin
	      store_aggregate_value empty_dom_sequence;
	      incr start_offset;
	      empty_tuple_opt			     
	    end
	  else
	    begin
	      (* General Case: 
		 This is an actual grouping *)
	      
	      (* Start counting and looping *)
	      (* Could be faster to split this into two loops
		 Loop 1 gets the bounds in the array
		 -- and --  
		 Loop 2 does the evaluation and storest the results in *)
	      
	      (* For performance we should really have two different 
		 functions a duplicate removing
		 one and a non-duplicate removing version *)
	      
	      current_offset := start_off;
	      while !current_offset < input_length && (* still in bounds *)
		(named_contents_equal start_off !current_offset) do
		(* restore the current value for evaluation *)
		restore_fn sorted_input.(!current_offset);
		(* retrieve the current return *)
		let current_ungrouped_value = item_list_of_physical_value
		    (eval alg_ctxt group_create_op) in
		
		return_sequence := (!return_sequence @ current_ungrouped_value);			     
		incr current_offset
	      done;		 
	      
	      start_offset := !current_offset; (* update the new start offset *)
	      
	      begin
		match odt with
		| None ->
		    store_aggregate_value
		      (materialized_of_list !return_sequence);
		| Some dt ->
		    let type_matched_cursor = Code_util_matching.dynamic_type_check stat_ctxt dt (Cursor.cursor_of_list !return_sequence) in
		    store_aggregate_value (Physical_sequence.materialized_of_cursor type_matched_cursor)
	      end;
	      empty_tuple_opt (* bogus return *)
	    end
	end)

(*******************************************)
(* This does not select the code to do so! *)
(*******************************************)
let build_comparison_dep code_selection code_ctxt g_names =
  let (comp_ctxt:Algebra_type.alg_compile_context) = Code_selection_context.annotated_compile_context_from_code_selection_context code_ctxt in 
  let build_op gn =    
    Cs_annotate.annotate_statement comp_ctxt
      (logical_aalgop_mkop (AOEAccessTuple gn) NoSub NoSub None None Finfo.bogus)
  in
  let return = List.map build_op g_names in
    List.iter (fun x -> ignore(code_selection code_ctxt x)) return;
    return
      

(* GROUPING CODE TAKES THE OP SELECTION FUNCTION
   SO THAT IT CAN BUILD COMPARISON OPERATORS *)
(* This build for a single grouping *)
(* This is a really slow way to do this. We materialize PER
   group. Each group consumes the previous.  There is no code to share
   the sort orders (in most cases we just require one
   sort/materialization and several uses of it) Also, there are some
   cases when we could have no materialization at all (if we are just
   sorting on doc order).  *)

let append_aggregate_return code_ctxt agg =
  let annot = retrieve_annotation "append_aggregate_return" code_ctxt in 
  let rf    = agg :: (get_returned_fields annot) in 
  let annot =   mk_annotation (get_use_counts annot) (get_bound_use_counts annot)
                  (get_accessed_fields annot) rf (get_tuple_field_use_counts annot) in
      store_annotation code_ctxt (Some annot)

let build_group_code_aux op_selection code_ctxt group_desc index =
  let agg_name  = get_aggregate_name group_desc in
  let agg_type  = get_aggregate_type group_desc in 
    (* Store the new aggregate so the next cursor materializes it *)
    (* Retrieve before append so that we don't materialize our own
    aggregate (no need too) *)
  let annot     = retrieve_annotation "build_group_code_aux" code_ctxt in 
  let code_ctxt = append_aggregate_return code_ctxt agg_name in
  let stat_ctxt = static_context_from_code_selection_context code_ctxt in
  let norm_ctxt = Typing_context.norm_context_from_stat_context stat_ctxt in
  let nsenv     = Norm_context.nsenv_from_norm_context norm_ctxt in

  let sort_function = Array.stable_sort in
  let store_aggregate_value = build_create_dom_tuple_code code_ctxt agg_name in

  let group_names = get_group_names    group_desc in
  let valid_names = get_valid_names    group_desc in 

  let sk_esk_list = List.map (fun x -> (Ascending, EmptyGreatest)) group_names in

  let materialize_fun, restore_fn, needed_names = materialize_cursor_to_dom_value_array code_ctxt annot () in
  let built_cursor = aggregate_cursor nsenv code_ctxt store_aggregate_value restore_fn needed_names group_names agg_type valid_names in
  let compare_deps = build_comparison_dep op_selection code_ctxt group_names in

    (fun (deps:algop_expr array) 
	 (eval:eval_fun)
	 (alg_ctxt: algebra_context) 
	 input_table ->     
	   (* Then sort !! *)
	   Debug.print_join_debug ">>> In GroupBy, materialization";
	   let materialized = materialize_fun eval alg_ctxt input_table in
	     (* Now sorts it in place *)
	   Debug.print_join_debug ">>> In GroupBy, AFTER materialization";
	   Debug.print_join_debug ">>> In GroupBy, sorting";
	   sort_function
	       (compare_function restore_fn sk_esk_list eval alg_ctxt compare_deps) materialized;
	   Debug.print_join_debug ">>> In GroupBy, AFTER sorting";
	   Debug.print_join_debug ">>> In GroupBy, NOW creating output_cursor";
	     Cursor.cursor_of_function (built_cursor deps.(index) eval alg_ctxt materialized)
    ), code_ctxt

(* Order by *)
(* Maybe should make actual tuples array based *)
let build_default_tuple_order_by_code code_ctxt stable sk_esk_list gt_table =
  (* The following function needs to be called to get the value of the
     sorting expression. *)
  let annot = retrieve_annotation "build_default_tuple_order_by_code" code_ctxt in
  let materialize_fun, restore_fn, needed_names = materialize_cursor_to_dom_value_array code_ctxt annot () in

   (* Note:
       All of the evaluations of e2 must yield values (or nodes) of
       the same type.
     - Jerome *)
  let eval_e2 (eval:eval_fun)
      (alg_ctxt:algebra_context) 
      (dep:algop_expr) =
    (* Evaluates the sortspec expression *)
    (* This would be nice to get rid of... *)
    item_cursor_of_physical_value
      (eval alg_ctxt dep)
  in

  (* Here is the local comparison function *)

  (* Note:
       This applies both evaluation of the sorting expression, and
       actual comparison for each resulting value taking all the
       necessary parameters into account.
     - Jerome *)

  let op_gt alg_ctxt arg1 arg2 =
    let arg1 = [arg1] in
    let arg2 = [arg2] in
    let result =
      Code_overloaded_fn.build_default_overloaded_fn_code
	code_ctxt
	Namespace_builtin.op_gt
	2
	gt_table
	alg_ctxt
	[|arg1;arg2|]
    in
    Physical_util.get_boolean (Cursor.cursor_of_list result)
  in
  
  let compare_function eval alg_ctxt deps input_value1 input_value2 =
    let deps = Array.to_list deps in
    let rec compare_fun deps sk_esk_list =
      match (deps,sk_esk_list) with
      | [],[] -> 0
      | (dep::deps',(sk,esk) :: sk_esk_list') ->
	  let eval_e2' = (fun x ->
	    restore_fn x; (* setup evaluation tuple fields *)
	    eval_e2 eval alg_ctxt dep) in
	  let comp1 =
	    (Code_util.forest_compare sk esk 
	       (eval_e2' input_value1)
	       (eval_e2' input_value2))
	      (op_gt alg_ctxt)
	  in
	  if comp1 = 0
	  then
	    compare_fun deps' sk_esk_list'
	  else
	    comp1
      | _ ->
	  raise (Query (Code_Selection "Incompatible number of sub-exprs and order by modifiers in one order by expression"))
    in
    compare_fun deps sk_esk_list
  in

  (* The sort function *)
  let sort_function =
    match stable with
    | Stable ->
	Array.stable_sort
    | NonStable ->
	Array.sort
  in
  (* Should be a common operation.. *)
  let our_cursor m = 
    let offset = ref 0 in
    let len = Array.length m in
    
    (fun () ->
      let res = 
	if !offset < len then	   
	  begin
	    restore_fn m.(!offset);
	    empty_tuple_opt
	  end
	else
	  None
      in
      incr offset;
      res)
  in
  (fun (deps:algop_expr array) 
      (eval:eval_fun)
      (alg_ctxt: algebra_context) 
      input_table ->     
	let materialized = materialize_fun eval alg_ctxt input_table in
	(* Now sorts it in place *)
	sort_function (compare_function eval alg_ctxt deps) materialized;
	Cursor.cursor_of_function (our_cursor materialized))

let build_order_by_code code_ctxt algop (stablekind, sort_spec_list, gt_table) =
  let _   = access_onesub algop.psub_expression in
  let dep = access_manysub algop.pdep_sub_expression in
  let fn = build_default_tuple_order_by_code code_ctxt stablekind sort_spec_list gt_table in
  (coerce_manydep fn dep coerce_unary_tuple_cursor_to_tuple_cursor), code_ctxt

(**************************)
(* Exposed build function *)
(**************************)

let build_default_group_code op_select code_ctxt gd_list =
  let build_helper code_ctxt index gd =
    build_group_code_aux op_select code_ctxt gd index 
  in
  let build_fold gd (cursors, code_ctxt, index) =
    let cursor, code_ctxt = build_helper code_ctxt index gd  in
      (cursor :: cursors, code_ctxt, (index-1))
  in
  (* These must be built in reverse order so that the annotation for
     the new aggregate is kept. This annotation tells cursor{i+1} to materialize
     the aggregates of the previous aggregates
  *)
  let cursor_list, code_ctxt, _ = 
    List.fold_right build_fold gd_list ([], code_ctxt, ((List.length gd_list)-1) ) 
  in
  let cursor_array = Array.of_list cursor_list in
  let gd_length    = Array.length cursor_array in
  (fun deps eval alg_ctxt input_table ->
    (* The first are applied last since this is the semantic of the operand
       new ones can be appended *)
    (* apply in reverse order *)
    let rec apply_cursor index input = 
      match index with
      | -1 ->
	  input
      | i  -> 
	  apply_cursor (index-1) (cursor_array.(index) deps eval alg_ctxt input)
    in
    apply_cursor (gd_length - 1) input_table)

let build_group_code code_ctxt algop single_op_default_code_selection gd_list =
  (* Add in the created tuples in the return *)
  let add_aggs  = List.map Xquery_algebra_ast_util.get_aggregate_name gd_list in
  let code_ctxt = List.fold_left add_tuple_reference code_ctxt add_aggs in
  let indep = access_onesub algop.psub_expression in 	
  let code_ctxt = store_annotation code_ctxt indep.compile_annotations in
  (* THIS IS AWFUL - we are passing the function inside the grouping code...*)
  (* We do this to do code selection for the sort operands over sequences *)
  let fn    = build_default_group_code (single_op_default_code_selection) code_ctxt gd_list in
  let dep   = access_manysub algop.pdep_sub_expression in 
  (coerce_manydep fn dep coerce_unary_tuple_cursor_to_tuple_cursor), code_ctxt