1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225
|
/*
@author herumi
JPEG quantize sample
This program generates a quantization routine by using fast division algorithm in run-time.
time(sec)
quality 1(high) 10 50 100(low)
VC2005 8.0 8.0 8.0 8.0
Xbyak 1.6 0.8 0.5 0.5
; generated code at q = 1
push esi
push edi
mov edi,dword ptr [esp+0Ch]
mov esi,dword ptr [esp+10h]
mov eax,dword ptr [esi]
shr eax,4
mov dword ptr [edi],eax
mov eax,dword ptr [esi+4]
mov edx,0BA2E8BA3h
mul eax,edx
shr edx,3
...
; generated code at q = 100
push esi
push edi
mov edi,dword ptr [esp+0Ch]
mov esi,dword ptr [esp+10h]
mov eax,dword ptr [esi]
mov dword ptr [edi],eax
mov eax,dword ptr [esi+4]
mov dword ptr [edi+4],eax
mov eax,dword ptr [esi+8]
mov dword ptr [edi+8],eax
mov eax,dword ptr [esi+0Ch]
...
*/
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include "xbyak/xbyak.h"
#ifdef _MSC_VER
#pragma warning(disable : 4996) // scanf
#endif
const int N = 64;
class Quantize : public Xbyak::CodeGenerator {
static int ilog2(int x)
{
int shift = 0;
while ((1 << shift) <= x) shift++;
return shift - 1;
}
public:
/*
input : esi
output : eax = [esi+offset] / dividend
destroy : edx
*/
void udiv(uint32_t dividend, int offset)
{
mov(eax, ptr[esi + offset]);
/* dividend = odd x 2^exponent */
int exponent = 0, odd = dividend;
while ((odd & 1) == 0) {
odd >>= 1; exponent++;
}
if (odd == 1) { // trivial case
if (exponent) {
shr(eax, exponent);
}
return;
}
uint64_t mLow, mHigh;
int len = ilog2(odd) + 1;
{
uint64_t roundUp = uint64_t(1) << (32 + len);
uint64_t k = roundUp / (0xFFFFFFFFL - (0xFFFFFFFFL % odd));
mLow = roundUp / odd;
mHigh = (roundUp + k) / odd;
}
while (((mLow >> 1) < (mHigh >> 1)) && (len > 0)) {
mLow >>= 1; mHigh >>= 1; len--;
}
uint64_t m; int a;
if ((mHigh >> 32) == 0) {
m = mHigh; a = 0;
} else {
len = ilog2(odd);
uint64_t roundDown = uint64_t(1) << (32 + len);
mLow = roundDown / odd;
int r = (int)(roundDown % odd);
m = (r <= (odd >> 1)) ? mLow : mLow + 1;
a = 1;
}
while ((m & 1) == 0) {
m >>= 1; len--;
}
len += exponent;
mov(edx, int(m));
mul(edx);
if (a) {
add(eax, int(m));
adc(edx, 0);
}
if (len) {
shr(edx, len);
}
mov(eax, edx);
}
/*
quantize(uint32_t dest[64], const uint32_t src[64]);
*/
Quantize(const uint32_t qTbl[64])
{
push(esi);
push(edi);
const int P_ = 4 * 2;
mov(edi, ptr [esp+P_+4]); // dest
mov(esi, ptr [esp+P_+8]); // src
for (int i = 0; i < N; i++) {
udiv(qTbl[i], i * 4);
mov(ptr[edi+i*4], eax);
}
pop(edi);
pop(esi);
ret();
}
};
void quantize(uint32_t dest[64], const uint32_t src[64], const uint32_t qTbl[64])
{
for (int i = 0; i < N; i++) {
dest[i] = src[i] / qTbl[i];
}
}
#ifdef XBYAK64
int main()
{
puts("not implemented for 64bit");
return 1;
}
#else
int main(int argc, char *argv[])
{
int q;
if (argc > 1) {
q = atoi(argv[1]);
} else {
printf("input quantize=");
if (scanf("%d", &q) != 1) {
fprintf(stderr, "bad number\n");
return 1;
}
}
printf("q=%d\n", q);
uint32_t qTbl[] = {
16, 11, 10, 16, 24, 40, 51, 61,
12, 12, 14, 19, 26, 58, 60, 55,
14, 13, 16, 24, 40, 57, 69, 56,
14, 17, 22, 29, 51, 87, 80, 62,
18, 22, 37, 56, 68, 109, 103, 77,
24, 35, 55, 64, 81, 104, 113, 92,
49, 64, 78, 87, 103, 121, 120, 101,
72, 92, 95, 98, 112, 100, 103, 99
};
for (int i = 0; i < N; i++) {
qTbl[i] /= q;
if (qTbl[i] == 0) qTbl[i] = 1;
}
try {
uint32_t src[N];
uint32_t dest[N];
uint32_t dest2[N];
for (int i = 0; i < N; i++) {
src[i] = rand() % 2048;
}
Quantize jit(qTbl);
//printf("jit size=%d, ptr=%p\n", jit.getSize(), jit.getCode());
void (*quantize2)(uint32_t*, const uint32_t*, const uint32_t *) = jit.getCode<void (*)(uint32_t*, const uint32_t*, const uint32_t *)>();
quantize(dest, src, qTbl);
quantize2(dest2, src, qTbl);
for (int i = 0; i < N; i++) {
if (dest[i] != dest2[i]) {
printf("err[%d] %u %u\n", i, dest[i], dest2[i]);
}
}
const int count = 10000000;
int begin;
begin = clock();
for (int i = 0; i < count; i++) {
quantize(dest, src, qTbl);
}
printf("time=%.1fsec\n", (clock() - begin) / double(CLOCKS_PER_SEC));
begin = clock();
for (int i = 0; i < count; i++) {
quantize2(dest, src, qTbl);
}
printf("time=%.1fsec\n", (clock() - begin) / double(CLOCKS_PER_SEC));
} catch (std::exception& e) {
printf("ERR:%s\n", e.what());
} catch (...) {
printf("unknown error\n");
}
}
#endif
|