1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251
|
(***********************************************************************)
(* *)
(* Objective Caml *)
(* *)
(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
(* *)
(* Copyright 1996 Institut National de Recherche en Informatique et *)
(* Automatique. Distributed only by permission. *)
(* *)
(***********************************************************************)
(* $Id: scheduling.ml,v 1.8 1996/04/30 14:42:58 xleroy Exp $ *)
(* Instruction scheduling *)
open Misc
open Reg
open Mach
open Linearize
(* Determine whether an instruction ends a basic block or not *)
let in_basic_block instr =
match instr.desc with
Lop op ->
begin match op with
Icall_ind | Icall_imm _ | Itailcall_ind | Itailcall_imm _ -> false
| Iextcall(_, _) -> false
| Istackoffset _ -> false
| Istore(_, _) -> false
| Ialloc _ -> false
| op -> Proc.oper_latency op >= 0
(* The processor description can return a latency of -1 to signal
a specific instruction that terminates a basic block, e.g.
Istore_symbol for the I386. *)
end
| Lreloadretaddr -> true
| _ -> false
(* Estimate the delay needed to evaluate an instruction. *)
let reload_retaddr_latency =
Proc.oper_latency (Iload(Cmm.Word, Arch.identity_addressing))
let instr_latency instr =
match instr.desc with
Lop op -> Proc.oper_latency op
| Lreloadretaddr -> reload_retaddr_latency
| _ -> fatal_error "Scheduling.instr_latency"
(* Representation of the code DAG. *)
type code_dag_node =
{ instr: instruction; (* The instruction *)
delay: int; (* How many cycles it needs *)
mutable sons: (code_dag_node * int) list;
(* Instructions that depend on it *)
mutable date: int; (* Start date *)
mutable length: int; (* Length of longest path to result *)
mutable ancestors: int; (* Number of ancestors *)
mutable emitted_ancestors: int } (* Number of emitted ancestors *)
let dummy_node =
{ instr = end_instr; delay = 0; sons = []; date = 0;
length = -1; ancestors = 0; emitted_ancestors = 0 }
(* The code dag itself is represented by two tables from registers to nodes:
- "results" maps registers to the instructions that produced them;
- "uses" maps registers to the instructions that use them. *)
let code_results = (Hashtbl.create 31 : (location, code_dag_node) Hashtbl.t)
let code_uses = (Hashtbl.create 31 : (location, code_dag_node) Hashtbl.t)
let clear_code_dag () =
Hashtbl.clear code_results;
Hashtbl.clear code_uses
(* Add an instruction to the code DAG *)
let add_edge ancestor son delay =
ancestor.sons <- (son, delay) :: ancestor.sons;
son.ancestors <- son.ancestors + 1
let add_instruction ready_queue instr =
let delay = instr_latency instr in
let node =
{ instr = instr;
delay = delay;
sons = [];
date = 0;
length = -1;
ancestors = 0;
emitted_ancestors = 0 } in
(* Add edges from all instructions that define one of the registers used *)
for i = 0 to Array.length instr.arg - 1 do
try
let ancestor = Hashtbl.find code_results instr.arg.(i).loc in
add_edge ancestor node ancestor.delay
with Not_found ->
()
done;
(* Also add edges from all instructions that use one of the results
of this instruction, so that evaluation order is preserved. *)
for i = 0 to Array.length instr.res - 1 do
let ancestors = Hashtbl.find_all code_uses instr.res.(i).loc in
List.iter (fun ancestor -> add_edge ancestor node 0) ancestors
done;
(* Also add edges from all instructions that have already defined one
of the results of this instruction, so that evaluation order
is preserved. *)
for i = 0 to Array.length instr.res - 1 do
try
let ancestor = Hashtbl.find code_results instr.res.(i).loc in
add_edge ancestor node 0
with Not_found ->
()
done;
(* Remember the registers used and produced by this instruction *)
for i = 0 to Array.length instr.res - 1 do
Hashtbl.add code_results instr.res.(i).loc node
done;
for i = 0 to Array.length instr.arg - 1 do
Hashtbl.add code_uses instr.arg.(i).loc node
done;
(* If this is a root instruction (all arguments already computed),
add it to the ready queue *)
if node.ancestors = 0 then node :: ready_queue else ready_queue
(* Compute length of longest path to a result.
For leafs of the DAG, see whether their result is used in the instruction
immediately following the basic block (a "critical" output). *)
let is_critical critical_outputs results =
try
for i = 0 to Array.length results - 1 do
let r = results.(i).loc in
for j = 0 to Array.length critical_outputs - 1 do
if critical_outputs.(j).loc = r then raise Exit
done
done;
false
with Exit ->
true
let rec longest_path critical_outputs node =
if node.length < 0 then begin
match node.sons with
[] ->
node.length <-
if is_critical critical_outputs node.instr.res
or node.instr.desc = Lreloadretaddr (* alway critical *)
then node.delay
else 0
| sons ->
node.length <-
List.fold_left
(fun len (son, delay) ->
max len (longest_path critical_outputs son + delay))
0 sons
end;
node.length
(* Given a list of instructions with estimated start date, choose one
that we can start (start date <= current date) and that has
maximal distance to result. If we can't find any, return None. *)
let extract_ready_instr date queue =
let rec extract best = function
[] ->
if best == dummy_node then None else Some best
| instr :: rem ->
let new_best =
if instr.date <= date & instr.length > best.length
then instr else best in
extract new_best rem in
extract dummy_node queue
(* Remove an instruction from the ready queue *)
let rec remove_instr node = function
[] -> []
| instr :: rem ->
if instr == node then rem else instr :: remove_instr node rem
(* Schedule a basic block, adding its instructions in front of the given
instruction sequence *)
let rec reschedule ready_queue date cont =
match ready_queue with
[] -> cont
| _ ->
(* Find "most ready" instruction in queue *)
match extract_ready_instr date ready_queue with
None ->
(* Try again, one cycle later *)
reschedule ready_queue (date + 1) cont
| Some node ->
(* Update the start date and number of ancestors emitted of
all descendents of this node. Enter those that become ready
in the queue. *)
let new_queue = ref (remove_instr node ready_queue) in
List.iter
(fun (son, delay) ->
let completion_date = date + delay in
if son.date < completion_date then son.date <- completion_date;
son.emitted_ancestors <- son.emitted_ancestors + 1;
if son.emitted_ancestors = son.ancestors then
new_queue := son :: !new_queue)
node.sons;
instr_cons node.instr.desc node.instr.arg node.instr.res
(reschedule !new_queue (date + 1) cont)
(* Schedule basic blocks in an instruction sequence *)
let rec schedule i =
match i.desc with
Lend -> i
| _ ->
if in_basic_block i then begin
clear_code_dag();
schedule_block [] i
end else
{ desc = i.desc; arg = i.arg; res = i.res; live = i.live;
next = schedule i.next }
and schedule_block ready_queue i =
if in_basic_block i then
schedule_block (add_instruction ready_queue i) i.next
else begin
let critical_outputs =
match i.desc with
Lop(Icall_ind | Itailcall_ind) -> [| i.arg.(0) |]
| Lop(Icall_imm _ | Itailcall_imm _ | Iextcall(_, _)) -> [||]
| Lreturn -> [||]
| _ -> i.arg in
List.iter (longest_path critical_outputs) ready_queue;
reschedule ready_queue 0 (schedule i)
end
(* Entry point *)
(* Don't bother to schedule for initialization code and the like. *)
let fundecl f =
if Proc.need_scheduling & f.fun_fast then begin
let new_body = schedule f.fun_body in
clear_code_dag();
{ fun_name = f.fun_name;
fun_body = new_body;
fun_fast = f.fun_fast }
end else
f
|