1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316
|
/*
* Creation Date: <2001/03/17 18:00:05 samuel>
* Time-stamp: <2001/06/23 13:48:30 samuel>
*
* <ptintercept.S>
*
* Handles writes to the (mac) hash table
*
* Copyright (C) 2001 Samuel Rydh (samuel@ibrium.se)
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation
*
*/
// NEEDS TO BE FIXED WITH RESPECT TO r6 and r7
// Performance: 1.15 MHz (350 MHz G3)
//////////////////////////////////////////////////////////////////////
// Implementing the tlbie instruction properly is a tricky
// problems. Since a tlbie is supposed to invalidate an equivalence
// class of PTEs, it really does not map well to the huge
// 'on chip PTE table' MOL utilizes (i.e. the linux PTE hash).
// Moreover, keeping track of mapped pages is not without cost.
//
// One solution is intercepting all PTE accesses (at least the writes)
// and performing any PTE invalidation at this time. That is,
// the 'on chip PTE table' is forced to match the (mac) PTE hash
// at all times. Any explicit tlbie can then safely be ignored.
//////////////////////////////////////////////////////////////////////
.macro SET_MSR_DR scr
mfmsr \scr
ori \scr,\scr,MSR_DR
mtmsr \scr
isync
.endm
.macro CLEAR_MSR_DR scr
mfmsr \scr
rlwinm \scr,\scr,0,28,26 // clear MSR_DR (bit 27)
mtmsr \scr
isync
.endm
////////////////////////////////////////////////////////////////////////
// check_pthash_hit
//
// m: r0,r2-r5, cr
check_pthash_hit:
mfdsisr r3
rlwinm. r4,r3,0,6,6 // Is this a write
beqlr
rlwinm. r4,r3,0,4,4 // and a protection violation?
beqlr
mfdar r2 // Is this a pthash hit?
lwz r3,K_TLBHASH_BASE_EA(r1) // First check EA...
lwz r4,K_HASH_MASK(r1)
sub r5,r2,r3
cmplw r5,r4
bgtlr+
lwz r5,K_SR_DATA(r1) // Correct context for EA?
rlwinm r3,r2,4+2,26,29 // #sr << 2
lwz r4,K_TLBHASH_SR(r1)
lwzx r3,r3,r5
cmpw r3,r4
bnelr
/////////////////////////////////////////////////
// Handle page table write, r2=dar, r4=segreg
/////////////////////////////////////////////////
lwz r3,xINST_OPCODE(r1)
LI_PHYS( r5,pt_store_patch ) // r5 = address of pt_store_patch
stw r3,0(r5)
dcbst 0,r5 // Start flushing the cache
mfsrin r3,r2
stw r3,K_TMP_SCRATCH0(r1) // save old segment register
mtsrin r4,r2 // set new segment register value
bl secint_pt_load_1
SET_MSR_DR /**/ r4
rlwinm r3,r2,0,0,28 // r2=dar
lwz r0,0(r3) // Save original double PTE-word in sprg0/1
lwz r2,4(r3)
CLEAR_MSR_DR /**/ r4
stw r0,K_TMP_SCRATCH2(r1) // Store PTE
stw r2,K_TMP_SCRATCH3(r1)
sync // Make sure the d-cache flush has finished
icbi 0,r5 // Flush the i-cache
sync // Needed on the 7400
mfsrr0 r0 // save xNIP
lwz r5,xGPR1(r1)
xGPR_LOAD_RANGE r2,r4
mtlr r5 // save mac-r1
stw r0,xNIP(r1)
xGPR_LOAD r0,r5,r1
SET_MSR_DR /**/ r1
mflr r1
bl secint_pt_store
pt_store_patch:
nop
mtlr r1 // save new mac-r1
CLEAR_MSR_DR /**/ r1
mfsprg3 r1 // restore stack pointer
xGPR_SAVE_RANGE r2,r5
mflr r2
stw r0,xGPR0(r1)
stw r2,xGPR1(r1)
mfdar r2 // r2 = DAR
rlwinm r3,r2,0,0,28
bl secint_pt_load_2
SET_MSR_DR /**/ r4
lwz r0,0(r3)
lwz r5,4(r3)
CLEAR_MSR_DR /**/ r4
lwz r3,K_TMP_SCRATCH0(r1) // Restore segment register
mtsrin r3,r2
////////////////////////////////////////////////
// New PTE: r0,r5. Old PTE: K_TMP_SCRATCH[2,3]
///////////////////////////////////////////////
// Is a tlbie of the overwritten PTE needed? Even if it is only
// the R/C bits that change, we will actually need to flush the PTE.
lwz r2,K_TMP_SCRATCH2(r1) // r2 = old PTE0
rlwinm. r3,r2,0,0,0 // old V-bit set?
cmpw cr2,r0,r2 // compare world equal?
beq emulation_done
lwz r3,K_TMP_SCRATCH3(r1) // r3 = old PTE1
cmpw cr3,r3,r5
bne cr2,1f
beq cr3,emulation_done
// Tlbie needed
1: mfsrr0 r4 // inc NIP
addi r4,r4,4
mtsrr0 r4
bl save_middle_regs
mfdar r6 // Needed to reconstruct the EA
lwz r2,K_TLBHASH_BASE_EA(r1) // Calculate tlb offset
sub r6,r6,r2
rlwinm r6,r6,0,0,28 // double world align
mr r4,r2
mr r5,r3
LI_VIRT( r3, do_intercept_tlbie )
b call_kernel
//////////////////////////////////////////////////////////////////////
// secint_dsi_pt_store
// r1: stack (sprg1 = old r1)
// r3: vector index (sprg0 = old r3)
// srr0/srr1: kernel nip/msr
//
// xGPR(0-5) are valid
secint_pt_store:
blrl
TRACE( 0x7778, "secint_pt_store" )
lwz r5,xNIP(r1) // Restore NIP & MSR
mtsrr0 r5
lwz r0,x_MSR(r1)
mtsrr1 r0
mfdar r4 // restore segment register
lwz r2,K_TMP_SCRATCH0(r1) // since we will take an exception
mtsrin r2,r4
cmpwi r3,0x300 // Probably a DSI
beq+ 1f
cmpwi r3,0x600 // Alignment [quite unlikely]
beq 2f
cmpwi r3,0x800 // FPU unavailable [ UNIMPLEMENTED ]
beq 3f
cmpwi r3,0xf20 // AltiVec unavailable [ UNIMPLEMENTED ]
beq 4f
DEBUGGER_SAVE( 0x6918 ) // ERROR...
1: bl save_middle_regs // *** DSI (0x300) ***
b dsi_cont
2: bl save_middle_regs // *** Alignment (0x600) ***
b alignment_cont
3: DEBUGGER_SAVE( 0x6919 ) // *** FPU Unavailable (0x800) ***
4: DEBUGGER_SAVE( 0x6920 ) // *** AltiVec Unavailable (0xf20) ***
// These should only occur on SMP (the PTE is *known* to be valid)
secint_pt_load_1:
blrl
lwz r2,xNIP(r1) // Restore NIP & MSR
mtsrr0 r2
lwz r3,x_MSR(r1)
mtsrr1 r3
DEBUGGER_SAVE( 0x1313 )
secint_pt_load_2:
blrl
DEBUGGER_SAVE( 0x1314 )
#if 0
/************************************************************************/
/* Store instruction emulation */
/************************************************************************/
/////////////////////////////////////////////////////
// Decode store instruction
////////////////////////////////////////////////////
// [2.06 MHz]
mfsrr0 r5 // inc NIP
addi r5,r5,4
mtsrr0 r5
lwz r3,xINST_OPCODE(r1)
extrwi r0,r3,6,0 // r0 = opcode For the following opcodes, bit 5 is the update bit
li r5,4 // r5 = #bytes to store
cmpwi cr0,r0,31 // rS,rA,rB type instructions
beq+ st_opcode31
clrrwi r2,r0,1
rlwinm. r0,r0,0,31,31 // cr0.eq = do update
cmpwi cr1,r2,36 // stw[u] rS,d(rA), r0[31] = u-bit
cmpwi cr2,r2,38 // stb[u] rS,d(rA), r0[31] = u-bit
cmpwi cr3,r2,44 // sth[u] rS,d(rA), r0[31] = u-bit
beq+ cr1,st_wU
beq cr2,st_bU
beq cr3,st_hU
b st_unimplemented
st_opcode31:
rlwinm r2,r3,0,21,31 // Secondary opcode
crset eq // No-update form
cmpwi cr1,r2,151*2 // stwx
cmpwi cr2,r2,215*2 // stbx
cmpwi cr3,r2,407*2 // sthx
beq+ cr1,st_wU
beq cr2,st_bU
beq cr3,st_hU
crclr eq // Update form
cmpwi cr1,r2,183*2 // stwux
cmpwi cr2,r2,247*2 // stbux
cmpwi cr3,r2,439*2 // sthux
beq+ cr1,st_wU
beq cr2,st_bU
beq cr3,st_hU
b st_unimplemented
// cr0.eq if not updating, r3 = instruction, r5 = 4
st_bU: subi r5,r5,1
st_hU: subi r5,r5,2
st_wU:
// [1.8 MHz]
beq+ 1f // update rA?
mfdar r0 // r0 = ea (to be stored in rA)
rlwinm r2,r3,11+5+3,24,28 // r2 = regnum (rA) << 3
EMU_STORE_GPR r2, /**/ r4 // m: lr
1:
rlwinm r2,r3,6+5+3,24,28 // rS << 3
EMU_LOAD_GPR r2, /**/ r4 // r0 = value
// r4 = htab offset, r5 = #bytes, r6=value
bl save_middle_regs // r6-r31, ctr...
mr r6,r0
mfdar r4
lwz r2,K_TLBHASH_BASE_EA(r1)
sub r4,r4,r2
LI_VIRT( r3, do_pt_store )
#if 0
// [1.89 MHz]
b emulation_done_noinc
#endif
b call_kernel
st_unimplemented:
mfsrr0 r3
subi r3,r3,4
mtsrr0 r3
blr
#endif
|