1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229
|
/*
===========================================================================
Copyright (C) 1999-2005 Id Software, Inc.
This file is part of Quake III Arena source code.
Quake III Arena source code is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the License,
or (at your option) any later version.
Quake III Arena source code is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Quake III Arena source code; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
===========================================================================
*/
/* This file is only compiled for PowerPC builds with Altivec support.
Altivec intrinsics need to be in a separate file, so GCC's -maltivec
command line can enable them, but give us the option to _not_ use that
on other files, where the compiler might then generate Altivec
instructions for normal floating point, crashing on G3 (etc) processors. */
#include "client.h"
#include "snd_local.h"
#if idppc_altivec
#if !defined(__APPLE__)
#include <altivec.h>
#endif
void S_PaintChannelFrom16_altivec( portable_samplepair_t paintbuffer[PAINTBUFFER_SIZE], int snd_vol, channel_t *ch, const sfx_t *sc, int count, int sampleOffset, int bufferOffset ) {
int data, aoff, boff;
int leftvol, rightvol;
int i, j;
portable_samplepair_t *samp;
sndBuffer *chunk;
short *samples;
float ooff, fdata[2], fdiv, fleftvol, frightvol;
if (sc->soundChannels <= 0) {
return;
}
samp = &paintbuffer[ bufferOffset ];
if (ch->doppler) {
sampleOffset = sampleOffset*ch->oldDopplerScale;
}
if ( sc->soundChannels == 2 ) {
sampleOffset *= sc->soundChannels;
if ( sampleOffset & 1 ) {
sampleOffset &= ~1;
}
}
chunk = sc->soundData;
while (sampleOffset>=SND_CHUNK_SIZE) {
chunk = chunk->next;
sampleOffset -= SND_CHUNK_SIZE;
if (!chunk) {
chunk = sc->soundData;
}
}
if (!ch->doppler || ch->dopplerScale==1.0f) {
vector signed short volume_vec;
vector unsigned int volume_shift;
int vectorCount, samplesLeft, chunkSamplesLeft;
leftvol = ch->leftvol*snd_vol;
rightvol = ch->rightvol*snd_vol;
samples = chunk->sndChunk;
((short *)&volume_vec)[0] = leftvol;
((short *)&volume_vec)[1] = leftvol;
((short *)&volume_vec)[4] = leftvol;
((short *)&volume_vec)[5] = leftvol;
((short *)&volume_vec)[2] = rightvol;
((short *)&volume_vec)[3] = rightvol;
((short *)&volume_vec)[6] = rightvol;
((short *)&volume_vec)[7] = rightvol;
volume_shift = vec_splat_u32(8);
i = 0;
while(i < count) {
/* Try to align destination to 16-byte boundary */
while(i < count && (((unsigned long)&samp[i] & 0x1f) || ((count-i) < 8) || ((SND_CHUNK_SIZE - sampleOffset) < 8))) {
data = samples[sampleOffset++];
samp[i].left += (data * leftvol)>>8;
if ( sc->soundChannels == 2 ) {
data = samples[sampleOffset++];
}
samp[i].right += (data * rightvol)>>8;
if (sampleOffset == SND_CHUNK_SIZE) {
chunk = chunk->next;
samples = chunk->sndChunk;
sampleOffset = 0;
}
i++;
}
/* Destination is now aligned. Process as many 8-sample
chunks as we can before we run out of room from the current
sound chunk. We do 8 per loop to avoid extra source data reads. */
samplesLeft = count - i;
chunkSamplesLeft = SND_CHUNK_SIZE - sampleOffset;
if(samplesLeft > chunkSamplesLeft)
samplesLeft = chunkSamplesLeft;
vectorCount = samplesLeft / 8;
if(vectorCount)
{
vector unsigned char tmp;
vector short s0, s1, sampleData0, sampleData1;
vector signed int merge0, merge1;
vector signed int d0, d1, d2, d3;
vector unsigned char samplePermute0 =
VECCONST_UINT8(0, 1, 4, 5, 0, 1, 4, 5, 2, 3, 6, 7, 2, 3, 6, 7);
vector unsigned char samplePermute1 =
VECCONST_UINT8(8, 9, 12, 13, 8, 9, 12, 13, 10, 11, 14, 15, 10, 11, 14, 15);
vector unsigned char loadPermute0, loadPermute1;
// Rather than permute the vectors after we load them to do the sample
// replication and rearrangement, we permute the alignment vector so
// we do everything in one step below and avoid data shuffling.
tmp = vec_lvsl(0,&samples[sampleOffset]);
loadPermute0 = vec_perm(tmp,tmp,samplePermute0);
loadPermute1 = vec_perm(tmp,tmp,samplePermute1);
s0 = *(vector short *)&samples[sampleOffset];
while(vectorCount)
{
/* Load up source (16-bit) sample data */
s1 = *(vector short *)&samples[sampleOffset+7];
/* Load up destination sample data */
d0 = *(vector signed int *)&samp[i];
d1 = *(vector signed int *)&samp[i+2];
d2 = *(vector signed int *)&samp[i+4];
d3 = *(vector signed int *)&samp[i+6];
sampleData0 = vec_perm(s0,s1,loadPermute0);
sampleData1 = vec_perm(s0,s1,loadPermute1);
merge0 = vec_mule(sampleData0,volume_vec);
merge0 = vec_sra(merge0,volume_shift); /* Shift down to proper range */
merge1 = vec_mulo(sampleData0,volume_vec);
merge1 = vec_sra(merge1,volume_shift);
d0 = vec_add(merge0,d0);
d1 = vec_add(merge1,d1);
merge0 = vec_mule(sampleData1,volume_vec);
merge0 = vec_sra(merge0,volume_shift); /* Shift down to proper range */
merge1 = vec_mulo(sampleData1,volume_vec);
merge1 = vec_sra(merge1,volume_shift);
d2 = vec_add(merge0,d2);
d3 = vec_add(merge1,d3);
/* Store destination sample data */
*(vector signed int *)&samp[i] = d0;
*(vector signed int *)&samp[i+2] = d1;
*(vector signed int *)&samp[i+4] = d2;
*(vector signed int *)&samp[i+6] = d3;
i += 8;
vectorCount--;
s0 = s1;
sampleOffset += 8;
}
if (sampleOffset == SND_CHUNK_SIZE) {
chunk = chunk->next;
samples = chunk->sndChunk;
sampleOffset = 0;
}
}
}
} else {
fleftvol = ch->leftvol*snd_vol;
frightvol = ch->rightvol*snd_vol;
ooff = sampleOffset;
samples = chunk->sndChunk;
for ( i=0 ; i<count ; i++ ) {
aoff = ooff;
ooff = ooff + ch->dopplerScale * sc->soundChannels;
boff = ooff;
fdata[0] = fdata[1] = 0;
for (j=aoff; j<boff; j += sc->soundChannels) {
if (j == SND_CHUNK_SIZE) {
chunk = chunk->next;
if (!chunk) {
chunk = sc->soundData;
}
samples = chunk->sndChunk;
ooff -= SND_CHUNK_SIZE;
}
if ( sc->soundChannels == 2 ) {
fdata[0] += samples[j&(SND_CHUNK_SIZE-1)];
fdata[1] += samples[(j+1)&(SND_CHUNK_SIZE-1)];
} else {
fdata[0] += samples[j&(SND_CHUNK_SIZE-1)];
fdata[1] += samples[j&(SND_CHUNK_SIZE-1)];
}
}
fdiv = 256 * (boff-aoff) / sc->soundChannels;
samp[i].left += (fdata[0] * fleftvol)/fdiv;
samp[i].right += (fdata[1] * frightvol)/fdiv;
}
}
}
#endif
|