File: testharness.h

package info (click to toggle)
x265 4.1-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 15,452 kB
  • sloc: asm: 187,063; cpp: 118,996; ansic: 741; makefile: 146; sh: 91; python: 11
file content (183 lines) | stat: -rw-r--r-- 7,196 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
/*****************************************************************************
 * Copyright (C) 2013-2020 MulticoreWare, Inc
 *
 * Authors: Steve Borho <steve@borho.org>
 *          Min Chen <chenm003@163.com>
 *          Yimeng Su <yimeng.su@huawei.com>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
 *
 * This program is also available under a commercial proprietary license.
 * For more information, contact us at license @ x265.com.
 *****************************************************************************/

#ifndef _TESTHARNESS_H_
#define _TESTHARNESS_H_ 1

#include "common.h"
#include "primitives.h"

#if _MSC_VER
#pragma warning(disable: 4324) // structure was padded due to __declspec(align())
#endif

#define PIXEL_MIN 0
#define SHORT_MAX  32767
#define SHORT_MIN -32767
#define UNSIGNED_SHORT_MAX 65535

using namespace X265_NS;

extern const char* lumaPartStr[NUM_PU_SIZES];
extern const char* const* chromaPartStr[X265_CSP_COUNT];

class TestHarness
{
public:

    TestHarness() {}

    virtual ~TestHarness() {}

    virtual bool testCorrectness(const EncoderPrimitives& ref, const EncoderPrimitives& opt) = 0;

    virtual void measureSpeed(const EncoderPrimitives& ref, const EncoderPrimitives& opt) = 0;

    virtual const char *getName() const = 0;

protected:

    /* Temporary variables for stack checks */
    int      m_ok;

    uint64_t m_rand;
};

#ifdef _MSC_VER
#include <intrin.h>
#elif HAVE_RDTSC
#include <intrin.h>
#elif (!defined(__APPLE__) && (defined (__GNUC__) && (defined(__x86_64__) || defined(__i386__))))
#include <x86intrin.h>
#elif ( !defined(__APPLE__) && defined (__GNUC__) && defined(__ARM_NEON__))
#include <arm_neon.h>
#else
/* fallback for older GCC/MinGW */
static inline uint32_t __rdtsc(void)
{
    uint32_t a = 0;

#if X265_ARCH_X86
    asm volatile("rdtsc" : "=a" (a) ::"edx");
#elif X265_ARCH_ARM
    // TOD-DO: verify following inline asm to get cpu Timestamp Counter for ARM arch
    // asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(a));

    // TO-DO: replace clock() function with appropriate ARM cpu instructions
    a = clock();
#elif  X265_ARCH_ARM64
    asm volatile("isb" : : : "memory");
    asm volatile("mrs %0, cntvct_el0" : "=r"(a));
#endif
    return a;
}
#endif // ifdef _MSC_VER

#define BENCH_RUNS 2000

/* Adapted from checkasm.c, runs each optimized primitive four times, measures rdtsc
 * and discards invalid times. Repeats BENCH_RUNS times to get a good average.
 * Then measures the C reference with BENCH_RUNS / 4 runs and reports X factor and average cycles.*/
#define REPORT_SPEEDUP(RUNOPT, RUNREF, ...) \
    { \
        uint32_t cycles = 0; int runs = 0; \
        RUNOPT(__VA_ARGS__); \
        for (int ti = 0; ti < BENCH_RUNS; ti++) { \
            uint32_t t0 = (uint32_t)__rdtsc(); \
            RUNOPT(__VA_ARGS__); \
            RUNOPT(__VA_ARGS__); \
            RUNOPT(__VA_ARGS__); \
            RUNOPT(__VA_ARGS__); \
            uint32_t t1 = (uint32_t)__rdtsc() - t0; \
            if (t1 * runs <= cycles * 4 && ti > 0) { cycles += t1; runs++; } \
        } \
        uint32_t refcycles = 0; int refruns = 0; \
        RUNREF(__VA_ARGS__); \
        for (int ti = 0; ti < BENCH_RUNS / 4; ti++) { \
            uint32_t t0 = (uint32_t)__rdtsc(); \
            RUNREF(__VA_ARGS__); \
            RUNREF(__VA_ARGS__); \
            RUNREF(__VA_ARGS__); \
            RUNREF(__VA_ARGS__); \
            uint32_t t1 = (uint32_t)__rdtsc() - t0; \
            if (t1 * refruns <= refcycles * 4 && ti > 0) { refcycles += t1; refruns++; } \
        } \
        x265_emms(); \
        float optperf = (10.0f * cycles / runs) / 4; \
        float refperf = (10.0f * refcycles / refruns) / 4; \
        printf(" | \t%3.2fx | ", refperf / optperf); \
        printf("\t %-8.2lf | \t %-8.2lf\n", optperf, refperf); \
    }

extern "C" {
#if X265_ARCH_X86
int PFX(stack_pagealign)(int (*func)(), int align);

/* detect when callee-saved regs aren't saved
 * needs an explicit asm check because it only sometimes crashes in normal use. */
intptr_t PFX(checkasm_call)(intptr_t (*func)(), int *ok, ...);
float PFX(checkasm_call_float)(float (*func)(), int *ok, ...);
#elif (X265_ARCH_ARM == 0 && X265_ARCH_ARM64 == 0)
#define PFX(stack_pagealign)(func, align) func()
#endif

#if X86_64

/* Evil hack: detect incorrect assumptions that 32-bit ints are zero-extended to 64-bit.
 * This is done by clobbering the stack with junk around the stack pointer and calling the
 * assembly function through x265_checkasm_call with added dummy arguments which forces all
 * real arguments to be passed on the stack and not in registers. For 32-bit argument the
 * upper half of the 64-bit register location on the stack will now contain junk. Note that
 * this is dependent on compiler behavior and that interrupts etc. at the wrong time may
 * overwrite the junk written to the stack so there's no guarantee that it will always
 * detect all functions that assumes zero-extension.
 */
void PFX(checkasm_stack_clobber)(uint64_t clobber, ...);
#define checked(func, ...) ( \
        m_ok = 1, m_rand = (rand() & 0xffff) * 0x0001000100010001ULL, \
        PFX(checkasm_stack_clobber)(m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, \
                                    m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, \
                                    m_rand, m_rand, m_rand, m_rand, m_rand), /* max_args+6 */ \
        PFX(checkasm_call)((intptr_t(*)())func, &m_ok, 0, 0, 0, 0, __VA_ARGS__))

#define checked_float(func, ...) ( \
        m_ok = 1, m_rand = (rand() & 0xffff) * 0x0001000100010001ULL, \
        PFX(checkasm_stack_clobber)(m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, \
                                    m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, \
                                    m_rand, m_rand, m_rand, m_rand, m_rand), /* max_args+6 */ \
        PFX(checkasm_call_float)((float(*)())func, &m_ok, 0, 0, 0, 0, __VA_ARGS__))
#define reportfail() if (!m_ok) { fflush(stdout); fprintf(stderr, "stack clobber check failed at %s:%d", __FILE__, __LINE__); abort(); }
#elif ARCH_X86
#define checked(func, ...) PFX(checkasm_call)((intptr_t(*)())func, &m_ok, __VA_ARGS__);
#define checked_float(func, ...) PFX(checkasm_call_float)((float(*)())func, &m_ok, __VA_ARGS__);

#else // if X86_64
#define checked(func, ...) func(__VA_ARGS__)
#define checked_float(func, ...) func(__VA_ARGS__)
#define reportfail()
#endif // if X86_64
}

#endif // ifndef _TESTHARNESS_H_