File: scheduling.ml

package info (click to toggle)
ocaml 1.05-2
  • links: PTS
  • area: non-free
  • in suites: hamm, slink
  • size: 7,472 kB
  • ctags: 12,935
  • sloc: ml: 37,142; ansic: 24,745; asm: 11,632; lisp: 1,957; sh: 1,801; makefile: 1,512; perl: 29; sed: 28
file content (251 lines) | stat: -rw-r--r-- 8,814 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
(***********************************************************************)
(*                                                                     *)
(*                           Objective Caml                            *)
(*                                                                     *)
(*            Xavier Leroy, projet Cristal, INRIA Rocquencourt         *)
(*                                                                     *)
(*  Copyright 1996 Institut National de Recherche en Informatique et   *)
(*  Automatique.  Distributed only by permission.                      *)
(*                                                                     *)
(***********************************************************************)

(* $Id: scheduling.ml,v 1.8 1996/04/30 14:42:58 xleroy Exp $ *)

(* Instruction scheduling *)

open Misc
open Reg
open Mach
open Linearize

(* Determine whether an instruction ends a basic block or not *)

let in_basic_block instr =
  match instr.desc with
    Lop op ->
      begin match op with
        Icall_ind | Icall_imm _ | Itailcall_ind | Itailcall_imm _ -> false
      | Iextcall(_, _) -> false
      | Istackoffset _ -> false
      | Istore(_, _) -> false
      | Ialloc _ -> false
      | op -> Proc.oper_latency op >= 0
        (* The processor description can return a latency of -1 to signal
           a specific instruction that terminates a basic block, e.g.
           Istore_symbol for the I386. *)
      end
  | Lreloadretaddr -> true
  | _ -> false

(* Estimate the delay needed to evaluate an instruction. *)

let reload_retaddr_latency =
  Proc.oper_latency (Iload(Cmm.Word, Arch.identity_addressing))

let instr_latency instr =
  match instr.desc with
    Lop op -> Proc.oper_latency op
  | Lreloadretaddr -> reload_retaddr_latency
  | _ -> fatal_error "Scheduling.instr_latency"

(* Representation of the code DAG. *)

type code_dag_node =
  { instr: instruction;                 (* The instruction *)
    delay: int;                         (* How many cycles it needs *)
    mutable sons: (code_dag_node * int) list;
                                        (* Instructions that depend on it *)
    mutable date: int;                  (* Start date *)
    mutable length: int;                (* Length of longest path to result *)
    mutable ancestors: int;             (* Number of ancestors *)
    mutable emitted_ancestors: int }    (* Number of emitted ancestors *)

let dummy_node =
  { instr = end_instr; delay = 0; sons = []; date = 0;
    length = -1; ancestors = 0; emitted_ancestors = 0 }

(* The code dag itself is represented by two tables from registers to nodes:
   - "results" maps registers to the instructions that produced them;
   - "uses" maps registers to the instructions that use them. *)

let code_results = (Hashtbl.create 31 : (location, code_dag_node) Hashtbl.t)
let code_uses = (Hashtbl.create 31 : (location, code_dag_node) Hashtbl.t)

let clear_code_dag () =
  Hashtbl.clear code_results;
  Hashtbl.clear code_uses

(* Add an instruction to the code DAG *)

let add_edge ancestor son delay =
  ancestor.sons <- (son, delay) :: ancestor.sons;
  son.ancestors <- son.ancestors + 1

let add_instruction ready_queue instr =
  let delay = instr_latency instr in
  let node =
    { instr = instr;
      delay = delay;
      sons = [];
      date = 0;
      length = -1;
      ancestors = 0;
      emitted_ancestors = 0 } in
  (* Add edges from all instructions that define one of the registers used *)
  for i = 0 to Array.length instr.arg - 1 do
    try
      let ancestor = Hashtbl.find code_results instr.arg.(i).loc in
      add_edge ancestor node ancestor.delay
    with Not_found ->
      ()
  done;
  (* Also add edges from all instructions that use one of the results
     of this instruction, so that evaluation order is preserved. *)
  for i = 0 to Array.length instr.res - 1 do
    let ancestors = Hashtbl.find_all code_uses instr.res.(i).loc in
    List.iter (fun ancestor -> add_edge ancestor node 0) ancestors
  done;
  (* Also add edges from all instructions that have already defined one
     of the results of this instruction, so that evaluation order
     is preserved. *)
  for i = 0 to Array.length instr.res - 1 do
    try
      let ancestor = Hashtbl.find code_results instr.res.(i).loc in
      add_edge ancestor node 0
    with Not_found ->
      ()
  done;
  (* Remember the registers used and produced by this instruction *)
  for i = 0 to Array.length instr.res - 1 do
    Hashtbl.add code_results instr.res.(i).loc node
  done;
  for i = 0 to Array.length instr.arg - 1 do
    Hashtbl.add code_uses instr.arg.(i).loc node
  done;
  (* If this is a root instruction (all arguments already computed),
     add it to the ready queue *)
  if node.ancestors = 0 then node :: ready_queue else ready_queue

(* Compute length of longest path to a result.
   For leafs of the DAG, see whether their result is used in the instruction
   immediately following the basic block (a "critical" output). *)

let is_critical critical_outputs results =
  try
    for i = 0 to Array.length results - 1 do
      let r = results.(i).loc in
      for j = 0 to Array.length critical_outputs - 1 do
        if critical_outputs.(j).loc = r then raise Exit
      done
    done;
    false
  with Exit ->
    true

let rec longest_path critical_outputs node =
  if node.length < 0 then begin
    match node.sons with
      [] ->
        node.length <-
          if is_critical critical_outputs node.instr.res
          or node.instr.desc = Lreloadretaddr (* alway critical *)
          then node.delay
          else 0
    | sons ->
        node.length <- 
          List.fold_left
            (fun len (son, delay) ->
              max len (longest_path critical_outputs son + delay))
            0 sons
  end;
  node.length

(* Given a list of instructions with estimated start date, choose one
   that we can start (start date <= current date) and that has
   maximal distance to result. If we can't find any, return None. *)

let extract_ready_instr date queue =
  let rec extract best = function
    [] ->
      if best == dummy_node then None else Some best
  | instr :: rem ->
      let new_best =
        if instr.date <= date & instr.length > best.length
        then instr else best in
      extract new_best rem in
  extract dummy_node queue

(* Remove an instruction from the ready queue *)

let rec remove_instr node = function
    [] -> []
  | instr :: rem ->
      if instr == node then rem else instr :: remove_instr node rem

(* Schedule a basic block, adding its instructions in front of the given
   instruction sequence *)

let rec reschedule ready_queue date cont =
  match ready_queue with
    [] -> cont
  | _ ->
      (* Find "most ready" instruction in queue *)
      match extract_ready_instr date ready_queue with
        None ->
          (* Try again, one cycle later *)
          reschedule ready_queue (date + 1) cont
      | Some node ->
          (* Update the start date and number of ancestors emitted of
             all descendents of this node. Enter those that become ready
             in the queue. *)
          let new_queue = ref (remove_instr node ready_queue) in
          List.iter
            (fun (son, delay) ->
              let completion_date = date + delay in
              if son.date < completion_date then son.date <- completion_date;
              son.emitted_ancestors <- son.emitted_ancestors + 1;
              if son.emitted_ancestors = son.ancestors then
                new_queue := son :: !new_queue)
            node.sons;
          instr_cons node.instr.desc node.instr.arg node.instr.res
            (reschedule !new_queue (date + 1) cont)

(* Schedule basic blocks in an instruction sequence *)

let rec schedule i =
  match i.desc with
    Lend -> i
  | _ ->
      if in_basic_block i then begin
        clear_code_dag();
        schedule_block [] i
      end else
        { desc = i.desc; arg = i.arg; res = i.res; live = i.live;
          next = schedule i.next }

and schedule_block ready_queue i =
  if in_basic_block i then
    schedule_block (add_instruction ready_queue i) i.next
  else begin
    let critical_outputs =
      match i.desc with
        Lop(Icall_ind | Itailcall_ind) -> [| i.arg.(0) |]
      | Lop(Icall_imm _ | Itailcall_imm _ | Iextcall(_, _)) -> [||]
      | Lreturn -> [||]
      | _ -> i.arg in
    List.iter (longest_path critical_outputs) ready_queue;
    reschedule ready_queue 0 (schedule i)
  end

(* Entry point *)
(* Don't bother to schedule for initialization code and the like. *)

let fundecl f =
  if Proc.need_scheduling & f.fun_fast then begin
    let new_body = schedule f.fun_body in
    clear_code_dag();
    { fun_name = f.fun_name;
      fun_body = new_body;
      fun_fast = f.fun_fast }
  end else
    f