File: X86OPTIMISE.ML

package info (click to toggle)
polyml 5.6-8
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 31,892 kB
  • ctags: 34,453
  • sloc: cpp: 44,983; ansic: 24,520; asm: 14,850; sh: 11,730; makefile: 551; exp: 484; python: 253; awk: 91; sed: 9
file content (403 lines) | stat: -rw-r--r-- 20,069 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
(*
    Copyright David C. J. Matthews 2010, 2012

    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
    License as published by the Free Software Foundation; either
    version 2.1 of the License, or (at your option) any later version.
    
    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Lesser General Public License for more details.
    
    You should have received a copy of the GNU Lesser General Public
    License along with this library; if not, write to the Free Software
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
*)

functor X86OPTIMISE(
    structure X86CODE: X86CODESIG
) :
sig
    type operation
    type code
    type operations = operation list

    val optimise: code * operations -> operations

    structure Sharing:
    sig
        type operation = operation
        type code = code
    end
end =
struct
    open X86CODE
    exception InternalError = Misc.InternalError

    fun optimise(code, ops) =
    let
        open RegSet
        (* Print the pre-optimised code if required. *)
        val () = printLowLevelCode(ops, code)

        (* All these instructions can't be eliminated after a jump. *)
        fun labelInstruction(JumpLabel(Labels{uses=ref uses, ...})) = uses > 0
        |   labelInstruction(StartHandler _) = true
        |   labelInstruction _ = false

        (* If we remove a branch the use counts are reduced. *)
        fun eliminate(UncondBranch(Labels{uses, ...})) =
            (!uses > 0 orelse raise InternalError "eliminate"; uses := !uses-1)
        |   eliminate(ConditionalBranch{label=Labels{uses, ...}, ...}) =
            (!uses > 0 orelse raise InternalError "eliminate"; uses := !uses-1)
        |   eliminate _ = ()
    
        (* Optimise the code list by repeated scans up and down the list.
           Scan forward through the list reversing it as we go.  Then scan the
           reversed list and turn it back into the original order. *)
        fun forward([], list, rep) = reverse(list, [], rep)

            (* Eliminate dead instructions after RaiseException up to the next label. *)
        |   forward((u as RaiseException) :: next :: tl, list, rep) =
            if labelInstruction next
            then forward(next :: tl, u :: list, rep)
            else (eliminate next; forward(u :: tl, list, true))

            (* Eliminate Unconditional branches to the next instruction. *)
        |   forward((u as UncondBranch(Labels{forward=source, ...})) ::
                    (next as JumpLabel(Labels{forward=dest, ...})) :: tl, list, rep) =
            if source = dest
            then (eliminate u; forward(next :: tl, list, true))
            else forward(next :: tl, u :: list, rep)
           
            (* Eliminate dead instructions after UnconditionalBranch up to the next label. *)
        |   forward((u as UncondBranch _) :: next :: tl, list, rep) =
            if labelInstruction next
            then forward(next :: tl, u :: list, rep)
            else (eliminate next; forward(u :: tl, list, true))

            (* Eliminate unreferenced labels.  *)
        |   forward(JumpLabel(Labels{uses=ref 0, ...}) :: tl, list, _) =
                forward(tl, list, true)

            (* Branch chaining. If we have a label followed by an unconditional branch set the
               chain entry of the label to destination of the branch. *)
        |   forward((l as JumpLabel(Labels{chain as ref NONE, ...})) ::
                    (u as UncondBranch(uncondLab as Labels{chain=ref chainL, ...})) :: tl, list, _) =
            (
                if isSome chainL then chain := chainL else chain := SOME uncondLab;
                (* It's essential to include the branch in the output at this point.
                   Otherwise we could eliminate the branch if it happened to be followed
                   by its destination, set the use count of the destination label to
                   zero, eliminate that; all before we had updated the incoming
                   branches. *)
                forward(tl, u :: l :: list, true)
            )

            (* Reorder conditional and unconditional branches.  If we have a conditional branch
               followed by an unconditional branch followed by the destination of the conditional
               branch we can turn the test round. *)
        |   forward((c as ConditionalBranch{label=condDest, test, predict, ...}) ::
                    (u as UncondBranch uncondLab) ::
                    (d as JumpLabel(Labels{forward=nextLabel, ...})) :: tl, list, rep) =
            let
                (* See where the ultimate destination of the conditional branch is.
                   If it's already been forwarded we don't want to change that. *)
                fun follow(Labels{chain=ref(SOME c), ...}) = follow c
                |   follow c = c

                val Labels{forward=condRef, ...} = follow condDest

                fun reverseTest JE  = JNE
                |   reverseTest JNE = JE
                |   reverseTest JA  = JNA
                |   reverseTest JB  = JNB
                |   reverseTest JNA = JA
                |   reverseTest JNB = JB
                |   reverseTest JL  = JGE
                |   reverseTest JG  = JLE
                |   reverseTest JLE = JG
                |   reverseTest JGE = JL
                |   reverseTest JO  = JNO
                |   reverseTest JNO = JO

                fun reversePrediction PredictNeutral = PredictNeutral
                |   reversePrediction PredictTaken = PredictNotTaken
                |   reversePrediction PredictNotTaken = PredictTaken
            in
                if condRef = nextLabel
                then
                (
                    eliminate c;
                    forward(d :: tl,
                        ConditionalBranch{label=uncondLab, test=reverseTest test, predict=reversePrediction predict} ::
                            list, true)
                )
                else forward(u :: d :: tl, c :: list, rep)
            end

        |   forward(ResetStack count1 :: ResetStack count2 :: tl, list, _) =
                (* Combine adjacent resets. *)
                forward(ResetStack(count1+count2) :: tl, list, true)

        |   forward((a as ArithRConst{ opc=opA, output=outA, source=constA }) ::
                    (b as ArithRConst{ opc=opB, output=outB, source=constB }) :: tl, list, rep) =
            if outA = outB andalso (opA = ADD orelse opA = SUB) andalso (opB = ADD orelse opB = SUB)
            then
            let
                val (opc, result) =
                    case (opA, opB) of
                        (ADD, ADD) => (ADD, constA+constB)
                    |   (SUB, SUB) => (SUB, constA+constB)
                    |   (ADD, SUB) =>
                            if constA >= constB then (ADD, constA-constB)
                            else (SUB, constB-constA)
                    |   (SUB, ADD) =>
                            if constA >= constB then (SUB, constA-constB)
                            else (ADD, constB-constA)
                    |   _ => raise InternalError "forward: ArithRConst"
            in
                (* We could extract the case where the result is zero but that
                   doesn't seem to occur. *)
                forward(ArithRConst{ opc=opc, output=outA, source=result } :: tl, list, true)
            end
            else forward(b :: tl, a :: list, rep)

        |   forward(hd :: tl, list, rep) = forward(tl, hd :: list, rep)
        
        and reverse([], list, rep) = (list, rep)

            (* Eliminate unreferenced labels.  *)
        |   reverse(JumpLabel(Labels{uses=ref 0, ...}) :: tl, list, _) =
                reverse(tl, list, true)

            (* Combine multiple labels by setting the earlier to point to the later.  This may
               subsequently eliminate the first label.  This simplifies branch chaining.
               This can only be done after we've checked that the later label is
               actually referenced. *)
        |   reverse((a as JumpLabel(aL as Labels{chain=ref chainA, ...})) ::
                    (b as JumpLabel(Labels{chain as ref NONE, ...})) :: tl,
                    list, _) =
            (
                if isSome chainA then chain := chainA else chain := SOME aL;
                reverse(b :: tl, a :: list, true)
            )

            (* Branch chaining.  If we have a branch to a destination that is chained
               we replace this by a branch to the ultimate destination.  This is done on
               the reverse pass because generally branches are forward and the extensions will
               have been added after the branches were seen on the forward pass. *)
        |   reverse(UncondBranch(Labels{chain=ref(SOME dest), uses=oldUses, ...}) :: tl, list, _) =
            let
                fun follow(Labels{chain=ref(SOME c), ...}) = follow c
                |   follow c = c
                val dest as Labels{uses=newUses, ...} = follow dest
            in
                oldUses := !oldUses - 1;
                ! newUses >= 1 orelse raise InternalError "UncondBranch1";
                newUses := !newUses + 1;
                ! oldUses >= 0 orelse raise InternalError "UncondBranch2";
                reverse(UncondBranch dest :: tl, list, true)
            end

        |   reverse(
                ConditionalBranch{
                    label=Labels{chain=ref(SOME dest), uses=oldUses, ...}, test, predict}
                    :: tl, list, _) =
            let
                fun follow(Labels{chain=ref(SOME c), ...}) = follow c
                |   follow c = c
                val dest as Labels{uses=newUses, ...} = follow dest
            in
                oldUses := !oldUses - 1;
                ! newUses >= 1 orelse raise InternalError "ConditionalBranch1";
                newUses := !newUses + 1;
                ! oldUses >= 0 orelse raise InternalError "ConditionalBranch2";
                reverse(ConditionalBranch{label=dest, test=test, predict=predict} :: tl, list, true)
            end

            (* Branch chaining with indexed branches.  We may be able to forward
               branch table entries to their new location. *)
        |   reverse(
                (i as IndexedCase{testReg, workReg, min, cases}) :: tl, list, rep) =
            let
                val changed = ref false
                fun forwardBranch(Labels{chain=ref(SOME dest), uses=oldUses, ...}) =
                let
                    fun follow(Labels{chain=ref(SOME c), ...}) = follow c
                    |   follow c = c
                    val dest as Labels{uses=newUses, ...} = follow dest
                in
                    oldUses := !oldUses - 1;
                    ! newUses >= 1 orelse raise InternalError "IndexedBranch1";
                    newUses := !newUses + 1;
                    ! oldUses >= 0 orelse raise InternalError "IndexedBranch2";
                    changed := true;
                    dest
                end
                |   forwardBranch lab = lab
                val newList = List.map forwardBranch cases
            in
                if ! changed
                then reverse(tl,
                        IndexedCase{testReg=testReg, workReg=workReg, min=min, cases=newList} :: list,
                        true)
                else reverse(tl, i :: list, rep)
            end

            (* If we free a floating point register after loading it onto the stack we want to
               propagate that information.  Otherwise push the RegisterStatusChange back up
               the list. *)
        |   reverse((r as FreeRegisters regs) :: (f as FPLoadFromFPReg{source, ...}) :: tl, list, _) =
            if inSet(source, regs)
            then
            let
                val left = regSetMinus(regs, singleton source)
            in
                if left = noRegisters
                then reverse(FPLoadFromFPReg{source=source, lastRef=true} :: tl, list, true)
                else reverse(FPLoadFromFPReg{source=source, lastRef=true} ::
                             FreeRegisters left :: tl, list, true)
            end
            else reverse(f :: r :: tl, list, true) (* Push it back *)

        |   reverse((r as FreeRegisters regs) :: (f as FPStoreToFPReg{output, ...}) :: tl, list, rep) =
            if inSet(output, regs)
            then (* We're discarding this register without using it.  Why?
                    Split the sets but otherwise do nothing for the moment. *)
            let
                val left = regSetMinus(regs, singleton output)
            in
                if left = noRegisters
                then reverse(f :: tl, FreeRegisters(singleton output) :: list, rep)
                else reverse(f :: FreeRegisters left :: tl, FreeRegisters(singleton output) :: list, rep)
            end
            else (* We're saving into a different register.  Push back the Free. *)
                reverse(f :: r :: tl, list, true) (* Push it back *)

        |   reverse((r as FreeRegisters _) :: (f as FPArithConst _) :: tl, list, _) =
                reverse(f :: r :: tl, list, true) (* Push it back *)

        |   reverse((r as FreeRegisters _) :: (f as FPArithMemory _) :: tl, list, _) =
                reverse(f :: r :: tl, list, true) (* Push it back *)

(*  This isn't right.  The register may be being freed because its last use was in the
    test associated with the conditional but it could be because the register contains a
    value that is not used in the "fall through" branch but IS used in the branch we're
    going to.
        |   reverse((r as FreeRegisters _) :: (f as ConditionalBranch _) :: tl, list, _) =
                reverse(f :: r :: tl, list, true) (* Push it back *)
*)

        |   reverse((r as FreeRegisters _) :: (f as MakeSafe _) :: tl, list, _) =
                reverse(f :: r :: tl, list, true) (* Push it back *)

        |   reverse((r as FreeRegisters _) :: (f as ArithRConst _) :: tl, list, _) =
                reverse(f :: r :: tl, list, true) (* Push it back *)

        |   reverse((r as FreeRegisters _) :: (f as FPStatusToEAX) :: tl, list, _) =
                reverse(f :: r :: tl, list, true) (* Push it back *)

        |   reverse((r as FreeRegisters _) :: (f as FPLoadFromConst _) :: tl, list, _) =
                reverse(f :: r :: tl, list, true) (* Push it back *)

        |   reverse((r as FreeRegisters _) :: (f as MoveRR _) :: tl, list, _) =
                reverse(f :: r :: tl, list, true) (* Push it back *)

        |   reverse((r as FreeRegisters regs) :: (f as FPArithR{source, ...}) :: tl, list, rep) =
            if inSet(source, regs)
            then (* We're freeing the register after the arithmetic.  Split the sets but
                    otherwise do nothing. *)
            let
                val left = regSetMinus(regs, singleton source)
            in
                if left = noRegisters
                then reverse(f :: tl, FreeRegisters(singleton source) :: list, rep)
                else reverse(f :: FreeRegisters left :: tl, FreeRegisters(singleton source) :: list, rep)
            end
            else reverse(f :: r :: tl, list, true) (* Push it back *)

            (* Merge multiple sets. *)
        |   reverse(FreeRegisters a :: FreeRegisters b :: tl, list, _) =
                reverse(FreeRegisters(RegSet.regSetUnion(a,b)) :: tl, list, true)

            (* We store a result, then load it. *)
        |   reverse((l as FPLoadFromFPReg{source, lastRef}) ::
                    (s as FPStoreToFPReg{output, andPop=true}) :: tl, list, rep) =
            if source = output
            then if lastRef
            then (* We're not reusing the register so we don't need to store. *)
                reverse(tl, list, true)
            else (* We're reusing the register later.  Store it there but don't pop. *)
                reverse(FPStoreToFPReg{output=output, andPop=false} :: tl, list, true)
            else reverse(s :: tl, l :: list, rep)

            (* See if we can merge two allocations. *)
        |   reverse((l as AllocStore{size=aSize, output=aOut}) :: tl, list, rep) =
            let
                fun searchAlloc([], _, _, _) = []
                |   searchAlloc (AllocStore{size=bSize, output=bOut} :: tl, instrs, modRegs, true) =
                    (* We can merge this allocation unless the output register
                       has been modified in the meantime. *)
                    if inSet(bOut, modRegs)
                    then []
                    else (* Construct a new list with the allocation replaced by an
                            addition, the original instructions in between and the
                            first allocation now allocating the original space plus
                            space for the additional object and its length word. *)
                        LoadAddress{output=aOut, offset=(bSize+1) * Address.wordSize,
                                    base=SOME bOut, index=NoIndex} ::
                            List.filter (fn StoreInitialised => false | _ => true) (List.rev instrs) @
                            (AllocStore{size=aSize+bSize+1, output=bOut} :: tl)
                    (* Check the correct matching of allocation and completion. *)
                |   searchAlloc (AllocStore _ :: _, _, _, false) =
                        raise InternalError "AllocStore found but last allocation not complete"
                |   searchAlloc((s as StoreInitialised) :: tl, instrs, modRegs, false) =
                        searchAlloc(tl, s :: instrs, modRegs, true)
                |   searchAlloc(StoreInitialised :: _, _, _, true) =
                        raise InternalError "StoreInitialised found with no allocation"
                    (* For the moment we allow only a limited range of instructions here*)
                |   searchAlloc((s as StoreConstToMemory _) :: tl, instrs, modRegs, alloc) =
                        searchAlloc(tl, s :: instrs, modRegs, alloc)
                |   searchAlloc((s as StoreRegToMemory _) :: tl, instrs, modRegs, alloc) =
                        searchAlloc(tl, s :: instrs, modRegs, alloc)
                |   searchAlloc((s as StoreLongConstToMemory _) :: tl, instrs, modRegs, alloc) =
                        searchAlloc(tl, s :: instrs, modRegs, alloc)
                |   searchAlloc((s as ResetStack _) :: tl, instrs, modRegs, alloc) =
                        searchAlloc(tl, s :: instrs, modRegs, alloc)
                |   searchAlloc((s as LoadMemR{output, ...}) :: tl, instrs, modRegs, alloc) =
                        if output = aOut then []
                        else searchAlloc(tl, s :: instrs, regSetUnion(modRegs, singleton output), alloc)
                |   searchAlloc((s as MoveRR{output, ...}) :: tl, instrs, modRegs, alloc) =
                        if output = aOut then []
                        else searchAlloc(tl, s :: instrs, regSetUnion(modRegs, singleton output), alloc)                        
                    (* Anything else terminates the search. *)
                |   searchAlloc _ = []
            in
                case searchAlloc(tl, [], noRegisters, false) of
                    [] => reverse(tl, l :: list, rep)
                |   newTail => reverse(newTail, list, true)
            end

        |   reverse(hd :: tl, list, rep) = reverse(tl, hd :: list, rep)

        (* Repeat scans through the code until there are no further changes. *)
        fun repeat ops =
            case forward(ops, [], false) of
                (list, false) => list
            |   (list, true) => repeat list

    in
        if lowLevelOptimise code
        then repeat ops
        else ops
    end

    structure Sharing =
    struct
        type operation = operation
        type code = code
    end
end;