File: SearchLoopBottom.inc

package info (click to toggle)
transcode 3%3A1.1.7-3
  • links: PTS, VCS
  • area: main
  • in suites: wheezy
  • size: 11,644 kB
  • sloc: ansic: 116,927; sh: 11,468; xml: 2,849; makefile: 1,891; perl: 1,492; pascal: 526; php: 191; python: 144; sed: 43
file content (131 lines) | stat: -rw-r--r-- 4,281 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
// -*- c++ -*-

#ifdef IS_SSE2
//sse2 code deleted for now
#else

// Version for non-SSE2

#ifdef SKIP_SEARCH
            "movq    %%mm6, %%mm0\n\t"            // just use the results of our wierd bob
#else


            // JA 9/Dec/2002
            // failed experiment
            // but leave in placeholder for me to play about
#ifdef DONT_USE_STRANGE_BOB
            // Use the best weave if diffs less than 10 as that
            // means the image is still or moving cleanly
            // if there is motion we will clip which will catch anything
            "psubusb "_FOURS", %%mm7\n\t"          // sets bits to zero if weave diff < 4
            "pxor    %%mm0, %%mm0\n\t"
            "pcmpeqb %%mm0, %%mm7\n\t"            // all ff where weave better, else 00
            "pcmpeqb %%mm7, %%mm0\n\t"            // all ff where bob better, else 00
            "pand    %%mm6, %%mm0\n\t"            // use bob for these pixel values
            "pand    %%mm5, %%mm7\n\t"            // use weave for these
            "por     %%mm7, %%mm0\n\t"            // combine both
#else
            // Use the better of bob or weave
            //      pminub  mm4, TENS           // the most we care about
            V_PMINUB ("%%mm4", _TENS, "%%mm0")   // the most we care about

            "psubusb %%mm4, %%mm7\n\t"            // foregive that much from weave est?
            "psubusb "_FOURS", %%mm7\n\t"       // bias it a bit toward weave
            "pxor    %%mm0, %%mm0\n\t"
            "pcmpeqb %%mm0, %%mm7\n\t"            // all ff where weave better, else 00
            "pcmpeqb %%mm7, %%mm0\n\t"            // all ff where bob better, else 00
            "pand    %%mm6, %%mm0\n\t"            // use bob for these pixel values
            "pand    %%mm5, %%mm7\n\t"            // use weave for these
            "por     %%mm7, %%mm0\n\t"            // combine both
#endif


                //      pminub  mm0, Max_Vals       // but clip to catch the stray error
                V_PMINUB ("%%mm0", _Max_Vals, "%%mm1") // but clip to catch the stray error
                //      pmaxub  mm0, Min_Vals
                V_PMAXUB ("%%mm0", _Min_Vals)

#endif


            "movl     "_pDest", %%eax\n\t"

#ifdef USE_VERTICAL_FILTER
            "movq    %%mm0, %%mm1\n\t"
            //      pavgb   mm0, qword ptr[ebx]
            V_PAVGB ("%%mm0", "(%%ebx)", "%%mm2", _ShiftMask)
            //      movntq  qword ptr[eax+edx], mm0
            V_MOVNTQ ("(%%eax, %%edx)", "%%mm0")
            //      pavgb   mm1, qword ptr[ebx+ecx]
            V_PAVGB ("%%mm1", "(%%ebx, %%ecx)", "%%mm2", _ShiftMask)
            "addl   "_dst_pitchw", %%eax"
            //      movntq  qword ptr[eax+edx], mm1
            V_MOVNTQ ("(%%eax, %%edx)", "%%mm1")
#else

            //      movntq  qword ptr[eax+edx], mm0
                V_MOVNTQ ("(%%eax, %%edx)", "%%mm0")
#endif

           "leal    8(%%edx), %%edx\n\t"       // bump offset pointer
           "cmpl    "_Last8", %%edx\n\t"       // done with line?
           "jb      1b\n\t"                    // y
#endif

            "popl %%ebx\n\t"

        : /* no outputs */
        : "m"(pBob),
          "m"(src_pitch2),
          "m"(ShiftMask),
          "m"(pDest),
          "m"(dst_pitchw),
          "m"(Last8),
          "m"(pSrc),
          "m"(pSrcP),
          "m"(pBobP),
          "m"(DiffThres[0]),
          "m"(Min_Vals[0]),
          "m"(Max_Vals[0]),
          "m"(FOURS[0]),
          "m"(TENS[0]),
          "m"(ONES[0]),
          "m"(UVMask[0]),
          "m"(Max_Mov[0]),
          "m"(YMask[0])

        : "eax", "ecx", "edx", "esi", "edi",
          "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)",
          "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7",
          "memory", "cc"
        );

#undef _pBob
#undef _src_pitch2
#undef _ShiftMask
#undef _pDest
#undef _dst_pitchw
#undef _Last8
#undef _pSrc
#undef _pSrcP
#undef _pBobP
#undef _DiffThres
#undef _Min_Vals
#undef _Max_Vals
#undef _FOURS
#undef _TENS
#undef _ONES
#undef _UVMask
#undef _Max_Mov
#undef _YMask

        // adjust for next line
        pSrc  += src_pitch2;
        pSrcP += src_pitch2;
        pDest += dst_pitch2;
        pBob += src_pitch2;
        pBobP += src_pitch2;
    }

    return 0;