1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405
|
(**************************************************************************)
(* *)
(* OCaml *)
(* *)
(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
(* *)
(* Copyright 1996 Institut National de Recherche en Informatique et *)
(* en Automatique. *)
(* *)
(* All rights reserved. This file is distributed under the terms of *)
(* the GNU Lesser General Public License version 2.1, with the *)
(* special exception on linking described in the file LICENSE. *)
(* *)
(**************************************************************************)
(* Instruction scheduling *)
open Reg
open Mach
open Linear
(* Representation of the code DAG. *)
type code_dag_node = {
instr: instruction; (* The instruction *)
delay: int; (* How many cycles before result is available *)
mutable sons: (code_dag_node * int) list;
(* Instructions that depend on it *)
mutable date: int; (* Start date *)
mutable length: int; (* Length of longest path to result *)
mutable ancestors: int; (* Number of ancestors *)
mutable emitted_ancestors: int (* Number of emitted ancestors *)
}
let dummy_node =
{ instr = end_instr; delay = 0; sons = []; date = 0;
length = -1; ancestors = 0; emitted_ancestors = 0 }
(* The code dag itself is represented by two tables from registers to nodes:
- "results" maps registers to the instructions that produced them;
- "uses" maps registers to the instructions that use them.
In addition:
- code_stores contains the latest store nodes emitted so far
- code_loads contains all load nodes emitted since the last store
- code_checkbounds contains the latest checkbound node not matched
by a subsequent load or store. *)
type code_dag = {
results : (location, code_dag_node) Hashtbl.t;
uses : (location, code_dag_node) Hashtbl.t;
mutable stores : code_dag_node list;
mutable loads : code_dag_node list;
mutable checkbounds : code_dag_node list;
}
let empty_dag () =
{
results = Hashtbl.create 31;
uses = Hashtbl.create 31;
stores = [];
loads = [];
checkbounds = [];
}
(* Add an edge to the code DAG *)
let add_edge ancestor son delay =
ancestor.sons <- (son, delay) :: ancestor.sons;
son.ancestors <- son.ancestors + 1
let add_edge_after son ancestor = add_edge ancestor son 0
(* Add edges from all instructions that define a pseudoregister [arg] being used
as argument to node [node] (RAW dependencies *)
let add_RAW_dependencies t node arg =
try
let ancestor = Hashtbl.find t.results arg.loc in
add_edge ancestor node ancestor.delay
with Not_found ->
()
(* Add edges from all instructions that use a pseudoregister [res] that is
defined by node [node] (WAR dependencies). *)
let add_WAR_dependencies t node res =
let ancestors = Hashtbl.find_all t.uses res.loc in
List.iter (add_edge_after node) ancestors
(* Add edges from all instructions that have already defined a pseudoregister
[res] that is defined by node [node] (WAW dependencies). *)
let add_WAW_dependencies t node res =
try
let ancestor = Hashtbl.find t.results res.loc in
add_edge ancestor node 0
with Not_found ->
()
(* Compute length of longest path to a result.
For leafs of the DAG, see whether their result is used in the instruction
immediately following the basic block (a "critical" output). *)
let is_critical critical_outputs results =
try
for i = 0 to Array.length results - 1 do
let r = results.(i).loc in
for j = 0 to Array.length critical_outputs - 1 do
if critical_outputs.(j).loc = r then raise Exit
done
done;
false
with Exit ->
true
let rec longest_path critical_outputs node =
if node.length < 0 then begin
match node.sons with
[] ->
node.length <-
if is_critical critical_outputs node.instr.res
|| node.instr.desc = Lreloadretaddr (* always critical *)
then node.delay
else 0
| sons ->
node.length <-
List.fold_left
(fun len (son, delay) ->
Int.max len (longest_path critical_outputs son + delay))
0 sons
end;
node.length
(* Remove an instruction from the ready queue *)
let rec remove_instr node = function
[] -> []
| instr :: rem ->
if instr == node then rem else instr :: remove_instr node rem
(* We treat Lreloadretaddr as a word-sized load *)
let some_load =
Iload {
memory_chunk = Cmm.Word_int;
addressing_mode = Arch.identity_addressing;
mutability = Mutable;
is_atomic = false }
(* The generic scheduler *)
class virtual scheduler_generic = object (self)
(* Determine whether an operation ends a basic block or not.
Can be overridden for some processors to signal specific instructions
that terminate a basic block. *)
method oper_in_basic_block = function
Icall_ind -> false
| Icall_imm _ -> false
| Itailcall_ind -> false
| Itailcall_imm _ -> false
| Iextcall _ -> false
| Istackoffset _ -> false
| Ialloc _ | Ipoll _ -> false
| _ -> true
(* Determine whether an instruction ends a basic block or not *)
(* PR#2719: it is generally incorrect to schedule checkbound instructions
within a try ... with Invalid_argument _ -> ...
Hence, a checkbound instruction within a try...with block ends the
current basic block. *)
method private instr_in_basic_block instr try_nesting =
match instr.desc with
Lop op ->
self#oper_in_basic_block op &&
not (try_nesting > 0 && self#is_checkbound op)
| Lreloadretaddr -> true
| _ -> false
(* Determine whether an operation is a memory store or a memory load.
Can be overridden for some processors to signal specific
load or store instructions (e.g. on the I386). *)
(* Stores are not reordered with other stores nor with loads.
Loads can be reordered with other loads, but not with stores.
Atomic loads must not be reordered, so we treat them like stores. *)
method is_store = function
Istore(_, _, _) -> true
| Iload {is_atomic = true} -> true
| _ -> false
method is_load = function
Iload {is_atomic = false} -> true
| _ -> false
method is_checkbound = function
Iintop(Icheckbound) -> true
| Iintop_imm(Icheckbound, _) -> true
| _ -> false
method private instr_is_store instr =
match instr.desc with
Lop op -> self#is_store op
| _ -> false
method private instr_is_load instr =
match instr.desc with
Lop op -> self#is_load op
| _ -> false
method private instr_is_checkbound instr =
match instr.desc with
Lop op -> self#is_checkbound op
| _ -> false
(* Estimate the latency of an operation. *)
method virtual oper_latency : Mach.operation -> int
(* Estimate the latency of a Lreloadretaddr operation. *)
method reload_retaddr_latency = self#oper_latency some_load
(* Estimate the delay needed to evaluate an instruction *)
method private instr_latency instr =
match instr.desc with
Lop op -> self#oper_latency op
| Lreloadretaddr -> self#reload_retaddr_latency
| _ -> assert false
(* Estimate the number of cycles consumed by emitting an operation. *)
method virtual oper_issue_cycles : Mach.operation -> int
(* Estimate the number of cycles consumed by emitting a Lreloadretaddr. *)
method reload_retaddr_issue_cycles = self#oper_issue_cycles some_load
(* Estimate the number of cycles consumed by emitting an instruction. *)
method private instr_issue_cycles instr =
match instr.desc with
Lop op -> self#oper_issue_cycles op
| Lreloadretaddr -> self#reload_retaddr_issue_cycles
| _ -> assert false
(* Pseudoregisters destroyed by an instruction *)
method private destroyed_by_instr instr =
match instr.desc with
| Lop op -> Proc.destroyed_at_oper (Iop op)
| Lreloadretaddr -> [||]
| _ -> assert false
(* Add an instruction to the code dag *)
method private add_instruction t ready_queue instr =
let delay = self#instr_latency instr in
let destroyed = self#destroyed_by_instr instr in
let node =
{ instr = instr;
delay = delay;
sons = [];
date = 0;
length = -1;
ancestors = 0;
emitted_ancestors = 0 } in
(* Add edges from all instructions that define one of the registers used
(RAW dependencies) *)
Array.iter (add_RAW_dependencies t node) instr.arg;
(* Also add edges from all instructions that use one of the result regs
of this instruction, or a reg destroyed by this instruction
(WAR dependencies). *)
Array.iter (add_WAR_dependencies t node) instr.res;
Array.iter (add_WAR_dependencies t node) destroyed; (* PR#5731 *)
(* Also add edges from all instructions that have already defined one
of the results of this instruction, or a reg destroyed by
this instruction (WAW dependencies). *)
Array.iter (add_WAW_dependencies t node) instr.res;
Array.iter (add_WAW_dependencies t node) destroyed; (* PR#5731 *)
(* If this is a load, add edges from the most recent store viewed so
far (if any) and remember the load. Also add edges from the most
recent checkbound and forget that checkbound. *)
if self#instr_is_load instr then begin
List.iter (add_edge_after node) t.stores;
t.loads <- node :: t.loads;
List.iter (add_edge_after node) t.checkbounds;
t.checkbounds <- []
end
(* If this is a store, add edges from the most recent store,
as well as all loads viewed since then, and also the most recent
checkbound. Remember the store,
discarding the previous stores, loads and checkbounds. *)
else if self#instr_is_store instr then begin
List.iter (add_edge_after node) t.stores;
List.iter (add_edge_after node) t.loads;
List.iter (add_edge_after node) t.checkbounds;
t.stores <- [node];
t.loads <- [];
t.checkbounds <- []
end
else if self#instr_is_checkbound instr then begin
t.checkbounds <- [node]
end;
(* Remember the registers used and produced by this instruction *)
for i = 0 to Array.length instr.res - 1 do
Hashtbl.add t.results instr.res.(i).loc node
done;
for i = 0 to Array.length destroyed - 1 do
Hashtbl.add t.results destroyed.(i).loc node (* PR#5731 *)
done;
for i = 0 to Array.length instr.arg - 1 do
Hashtbl.add t.uses instr.arg.(i).loc node
done;
(* If this is a root instruction (all arguments already computed),
add it to the ready queue *)
if node.ancestors = 0 then node :: ready_queue else ready_queue
(* Given a list of instructions and a date, choose one or several
that are ready to be computed (start date <= current date)
and that we can emit in one cycle. Favor instructions with
maximal distance to result. If we can't find any, return None.
This does not take multiple issues into account, though. *)
method private ready_instruction date queue =
let rec extract best = function
[] ->
if best == dummy_node then None else Some best
| instr :: rem ->
let new_best =
if instr.date <= date && instr.length > best.length
then instr else best in
extract new_best rem in
extract dummy_node queue
(* Schedule a basic block, adding its instructions in front of the given
instruction sequence *)
method private reschedule ready_queue date cont =
if ready_queue = [] then cont else begin
match self#ready_instruction date ready_queue with
None ->
self#reschedule ready_queue (date + 1) cont
| Some node ->
(* Remove node from queue *)
let new_queue = ref (remove_instr node ready_queue) in
(* Update the start date and number of ancestors emitted of
all descendants of this node. Enter those that become ready
in the queue. *)
let issue_cycles = self#instr_issue_cycles node.instr in
List.iter
(fun (son, delay) ->
let completion_date = date + issue_cycles + delay - 1 in
if son.date < completion_date then son.date <- completion_date;
son.emitted_ancestors <- son.emitted_ancestors + 1;
if son.emitted_ancestors = son.ancestors then
new_queue := son :: !new_queue)
node.sons;
{ node.instr with next =
self#reschedule !new_queue (date + issue_cycles) cont }
end
(* Entry point *)
(* Don't bother to schedule for initialization code and the like. *)
method schedule_fundecl f =
let rec schedule i try_nesting =
match i.desc with
| Lend -> i
| Lpushtrap { lbl_handler = _; }
-> { i with next = schedule i.next (try_nesting + 1) }
| Lpoptrap -> { i with next = schedule i.next (try_nesting - 1) }
| _ ->
if self#instr_in_basic_block i try_nesting then begin
schedule_block (empty_dag ()) [] i try_nesting
end else
{ i with next = schedule i.next try_nesting }
and schedule_block t ready_queue i try_nesting =
if self#instr_in_basic_block i try_nesting then
schedule_block t (self#add_instruction t ready_queue i) i.next try_nesting
else begin
let critical_outputs =
match i.desc with
Lop(Icall_ind | Itailcall_ind) -> [| i.arg.(0) |]
| Lop(Icall_imm _ | Itailcall_imm _ | Iextcall _) -> [||]
| Lreturn -> [||]
| _ -> i.arg in
List.iter (fun x -> ignore (longest_path critical_outputs x)) ready_queue;
self#reschedule ready_queue 0 (schedule i try_nesting)
end in
if f.fun_fast && !Clflags.insn_sched then begin
let new_body = schedule f.fun_body 0 in
{ f with fun_body = new_body }
end else
f
end
|