File: DI_MoComp2.c

package info (click to toggle)
zapping 0.10~cvs6-9
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 10,956 kB
  • ctags: 13,278
  • sloc: ansic: 111,145; asm: 11,770; sh: 9,816; xml: 2,742; makefile: 1,255; perl: 488
file content (408 lines) | stat: -rw-r--r-- 11,914 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
/*///////////////////////////////////////////////////////////////////////////
// $Id: DI_MoComp2.c,v 1.5 2006/04/12 01:44:02 mschimek Exp $
/////////////////////////////////////////////////////////////////////////////
// Copyright (c) 2003 Tom Barry & John Adcock.  All rights reserved.
// Copyright (c) 2005 Michael H. Schimek
/////////////////////////////////////////////////////////////////////////////
//
//  This file is subject to the terms of the GNU General Public License as
//  published by the Free Software Foundation.  A copy of this license is
//  included with this software distribution in the file COPYING.  If you
//  do not have a copy, you may obtain a copy by writing to the Free
//  Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
//
//  This software is distributed in the hope that it will be useful,
//  but WITHOUT ANY WARRANTY; without even the implied warranty of
//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
//  GNU General Public License for more details
//
//  (From Tom Barry)
//  Also, this program is "Philanthropy-Ware".  That is, if you like it and 
//  feel the need to reward or inspire the author then please feel free (but
//  not obligated) to consider joining or donating to the Electronic Frontier
//  Foundation. This will help keep cyber space free of barbed wire and
//  bullsh*t.  See www.eff.org for details
/////////////////////////////////////////////////////////////////////////////
// CVS Log
//
// $Log: DI_MoComp2.c,v $
// Revision 1.5  2006/04/12 01:44:02  mschimek
// s/CPU_FEATURE_SSE/CPU_FEATURE_SSE_INT.
//
// Revision 1.4  2005/07/29 17:39:30  mschimek
// *** empty log message ***
//
// Revision 1.3  2005/06/28 19:17:10  mschimek
// *** empty log message ***
//
// Revision 1.2  2005/06/28 00:48:07  mschimek
// New file integrating all the code formerly in
// plugins/deinterlace/DI_MoComp2. Converted the MMX inline asm to vector
// intrinsics. Added support for 3DNow, SSE2, x86-64 and AltiVec. Cleaned
// up.
//
// Revision 1.1.2.5  2005/06/17 02:54:20  mschimek
// *** empty log message ***
//
// Revision 1.1.2.4  2005/05/31 02:40:34  mschimek
// *** empty log message ***
//
// Revision 1.1.2.3  2005/05/20 05:45:14  mschimek
// *** empty log message ***
//
// Revision 1.1.2.2  2005/05/17 19:58:32  mschimek
// *** empty log message ***
//
// Revision 1.1.2.1  2005/05/05 09:46:01  mschimek
// *** empty log message ***
//
// Revision 1.3  2005/02/05 22:18:27  mschimek
// Completed l18n.
//
// Revision 1.2  2005/01/31 07:03:38  mschimek
// Don\'t define size_t or we run into conflicts with system headers.
//
// Revision 1.1  2005/01/08 14:33:33  mschimek
// TomsMoCompMethod, MoComp2Method, VideoWeaveMethod, VideoBobMethod,
// TwoFrameMethod, OldGameMethod, Greedy2FrameMethod, GreedyMethod,
// DI_GreedyHSettings: Localized.
//
// Revision 1.3  2003/06/17 12:46:28  adcockj
// Added Help for new deinterlace methods
//
// Revision 1.2  2003/03/05 13:55:20  adcockj
// Allow SSE optimizations
//
// Revision 1.1  2003/01/02 13:15:00  adcockj
// Added new plug-ins ready for developement by copying TomsMoComp and Gamma
//
//
// Log: DI_TomsMoComp.c,v
// Revision 1.5  2002/12/10 16:32:19  adcockj
// Fix StrangeBob for MMX
//
// Revision 1.4  2002/11/26 21:32:14  adcockj
// Made new strange bob method optional
//
// Revision 1.3  2002/07/08 18:16:43  adcockj
// final fixes fro alpha 3
//
// Revision 1.2  2002/07/08 17:44:58  adcockj
// Corrected Settings messages
//
// Revision 1.1  2002/07/07 20:07:24  trbarry
// First cut at TomsMoComp, motion compensated deinterlace
//
// Revision 1.0  2002/05/04 16:13:33  trbarry
//
///////////////////////////////////////////////////////////////////////////*/

#include "windows.h"
#include <DS_Deinterlace.h>

SIMD_FN_PROTOS (DEINTERLACE_FUNC, DeinterlaceMoComp2);

#if SIMD & (CPU_FEATURE_MMX | CPU_FEATURE_3DNOW |			\
	    CPU_FEATURE_SSE_INT | CPU_FEATURE_SSE2 | CPU_FEATURE_SSE3 |	\
	    CPU_FEATURE_ALTIVEC)

static void
simple_bob			(uint8_t *		pDest,
				 const uint8_t *	pBob,
				 unsigned long		dst_bpl,
				 unsigned long		src_bpl)
{
    vu8 b, e;

    b = vload (pBob, 0);
    /* Copy line of previous field. */
    vstorent (pDest, 0, b);

    e = vload (pBob, src_bpl);
    /* Estimate line of this field. */
    vstorent (pDest, dst_bpl, fast_vavgu8 (b, e));
}

BOOL
SIMD_NAME (DeinterlaceMoComp2)	(TDeinterlaceInfo *pInfo)
{
    uint8_t *Dest;
    const uint8_t *F4;
    const uint8_t *F3;
    const uint8_t *F2;
    const uint8_t *F1;
    unsigned int byte_width;
    unsigned int height;
    unsigned long dst_bpl;
    unsigned long src_bpl;
    unsigned long dst_padding;
    unsigned long src_padding1;
    unsigned long src_padding2;

    if (SIMD & (CPU_FEATURE_SSE2 | CPU_FEATURE_SSE3)) {
	if (((unsigned long) pInfo->Overlay |
	     (unsigned long) pInfo->PictureHistory[0]->pData |
	     (unsigned long) pInfo->PictureHistory[1]->pData |
	     (unsigned long) pInfo->PictureHistory[2]->pData |
	     (unsigned long) pInfo->PictureHistory[3]->pData |
	     (unsigned long) pInfo->OverlayPitch |
	     (unsigned long)  pInfo->InputPitch |
	     (unsigned long) pInfo->LineLength) & 15)
	    return DeinterlaceMoComp2_SSE (pInfo);
    }

    byte_width = pInfo->LineLength;

    dst_bpl = pInfo->OverlayPitch;
    src_bpl = pInfo->InputPitch;

    /*
	dest	src (F3) odd				src (F3) even
	0	F2.0 (F3.0*)				F3.0
	1     { F3.0					F2.0 (F3.0*)
	2     { weave.0 (F1|3.n|n+1 + F2.n+1 + F4.n+1)	F3.1		}
	3       F3.1		weave.1 (F1|3.n|n+1 + F2.n + F4.n)	}
	4	weave.1					F3.2
	h-3	F3.h/2-2				weave.h/2-2	
	h-2	F2.h/2-1 (F3.h/2-1*)			F3.h/2-1
	h-1	F3.h/2-1				F2.h/2-1 (F3.h/2-1*)

	F4	next field, opposite parity
	F3	this field
	F2	previous field, opposite parity
	F1	previous field, same parity
	h	frame height

	(*)	Actual data stored here. Why? Ask Tom. :-) 
    */

    Dest = (uint8_t *) pInfo->Overlay;

    F4 = (const uint8_t *) pInfo->PictureHistory[0]->pData;
    F3 = (const uint8_t *) pInfo->PictureHistory[1]->pData;
    F2 = (const uint8_t *) pInfo->PictureHistory[2]->pData;
    F1 = (const uint8_t *) pInfo->PictureHistory[3]->pData;

    if (pInfo->PictureHistory[1]->Flags & PICTURE_INTERLACED_ODD) {
	copy_line (Dest, F3, byte_width);
	Dest += dst_bpl;
    } else {
	copy_line_pair (Dest, F3, byte_width, dst_bpl);
	Dest += dst_bpl * 2;

	F3 += src_bpl;
	F1 += src_bpl;
    }

    /* We start in field row 1, no access in first column. */
    F4 += src_bpl + sizeof (vu8);
    F2 += src_bpl + sizeof (vu8);
    F1 += sizeof (vu8);

    /* We write two frame lines in parallel, incrementing Dest
       except after the last column. */
    dst_padding = dst_bpl * 2 - byte_width + sizeof (vu8);

    /* We read one field line, incrementing the pointer
       except after the last column. */
    src_padding1 = src_bpl - byte_width + sizeof (vu8);

    /* We read one field line and skip over first and last column. */
    src_padding2 = src_padding1 + sizeof (vu8);

    /* All but first and last field line where we cannot read the
       line above and below. */
    for (height = pInfo->FieldHeight - 2; height > 0; --height) {
	vu8 a, b, c, d, e, f;
	unsigned int count;

	/* Simple bob first column. */
	simple_bob (Dest, F3, dst_bpl, src_bpl);

	F3 += sizeof (vu8);
	Dest += sizeof (vu8);

	/* All but first and last column where we cannot read the
	   pixels left and right. */
	for (count = byte_width / sizeof (vu8) - 2; count > 0; --count) {
	    vu8 diff_af, diff_cd, diff_be;
	    vu8 avg_af, avg_cd, avg_be;
	    vu8 best, diag, bob, weave;
	    vu8 mm1, mm2, mm4, mm6, next, prev, above, below;
	    v32 tm, cm, bm;

	    /* StrangeBob */

	    /*///////////////////////////////////////////////////////////////
	    // This implements the algorithm described in:
	    // T. Chen, H.R. Wu; & Z.H. Yu, 
	    // "Efficient deinterlacing algorithm using edge-based line
	    // average interpolation", 
	    // Optical Engineering, Volume 39, Issue 8, (2000)
	    ///////////////////////////////////////////////////////////////*/

	    /* First, get and save our possible Bob values.
	       Assume our pixels are layed out as follows with x the
	       calc'd bob value and the other pixels are from the
	       current field.
	       a b c 	current field   (F3.n)
	         x	calculated line (F2.n)
	       d e f 	current field   (F3.n+1)
	    */
	    /* XXX could calculate a, d from b, e of previous loop
	       and integrate first 8/16 bytes into the loop. */
	    uload2t (&a, &b, &c, F3, 0);
	    uload2t (&d, &e, &f, F3, src_bpl);
	    F3 += sizeof (vu8);

	    diff_af = vabsdiffu8 (a, f);
	    avg_af = fast_vavgu8 (a, f);

	    diff_cd = vabsdiffu8 (c, d);
	    avg_cd = fast_vavgu8 (c, d);

	    /* Copy line of previous field. */
	    vstorent (Dest, 0, b);

	    mm4 = fast_vavgu8 (vabsdiffu8 (b, d), vabsdiffu8 (c, e));
	    mm6 = fast_vavgu8 (vabsdiffu8 (a, e), vabsdiffu8 (b, f));
	    mm1 = (vu8) vcmpleu8 (mm6, mm4);
	    best = vsel (mm1, diff_af, diff_cd);
	    diag = vsel (mm1, avg_af, avg_cd);

	    diff_be = vabsdiffu8 (b, e);
	    avg_be = fast_vavgu8 (b, e);

	    mm2 = (vu8) vcmpleu8 (diff_be, best);
	    /* we only want luma from diagonals */
	    mm2 = vor (mm2, (vu8) UVMask);
	    mm1 = (vu8) vcmpleu8 (diff_be, vsplatu8_15);
	    /* we let bob through always if diff is small */
	    mm1 = vor (mm1, mm2);

	    bob = vsel (mm1, avg_be, diag);

	    /* SimpleWeave */

	    next = vload (F4, 0);
	    F4 += sizeof (vu8);

	    prev = vload (F2, 0);
	    F2 += sizeof (vu8);

	    /* "movement" in the centre */
	    cm = (v32) vabsdiffu8 (prev, next);
	    weave = fast_vavgu8 (prev, next);

	    /* operate only on luma as we will always bob the chroma */
	    cm = (v32) yuyv2yy ((vu8) cm);

	    above = vload (F1, 0);
	    below = vload (F1, src_bpl);
	    F1 += sizeof (vu8);

	    /* "movement" in the top and bottom btw curr and prev frame */
	    tm = (v32) yuyv2yy (vabsdiffu8 (b, above));
	    bm = (v32) yuyv2yy (vabsdiffu8 (e, below));

	    /* 0xff where movement (xm > 15) in either of *two* pixels */
	    tm = (v32) vcmpnz32 ((v32) vsubsu8 ((vu8) tm, vsplatu8_15));
	    cm = (v32) vcmpnz32 ((v32) vsubsu8 ((vu8) cm, vsplatu8_15));
	    bm = (v32) vcmpnz32 ((v32) vsubsu8 ((vu8) bm, vsplatu8_15));

	    mm2 = (vu8) vand (tm, bm); /* top and bottom moving */
	    mm1 = vor ((vu8) cm, mm2); /* where we should bob */
	    mm1 = vor (mm1, (vu8) UVMask);

	    /* Our value for the line of this field. */
	    vstorent (Dest, dst_bpl, vsel (mm1, bob, weave));

	    Dest += sizeof (vu8);
	}

	/* Simple bob last column. */
	simple_bob (Dest, F3, dst_bpl, src_bpl);

	F4 += src_padding2;
	F3 += src_padding1;
	F2 += src_padding2;
	F1 += src_padding2;
	Dest += dst_padding;
    }

    /* Remaining lines. */

    if (pInfo->PictureHistory[1]->Flags & PICTURE_INTERLACED_ODD) {
	copy_line (Dest, F3, byte_width);
	Dest += dst_bpl;
	F3 += src_bpl;
    }

    copy_line_pair (Dest, F3, byte_width, dst_bpl);

    vempty ();

    return TRUE;
}

#elif !SIMD

static const DEINTERLACE_METHOD
MoComp2Method = {
    sizeof(DEINTERLACE_METHOD),
    DEINTERLACE_CURRENT_VERSION,
    N_("Video (MoComp2)"), 
    "MoComp2",
    FALSE, 
    FALSE, 
    /* pfnAlgorithm */ NULL,
    50, 
    60,
    0,
    NULL,
    INDEX_VIDEO_MOCOMP2,
    NULL,
    NULL,
    NULL,
    NULL,
    4, /* number fields needed */
    0,
    0,
    0,
    NULL,
    0,
    FALSE,
    FALSE,
    IDH_MOCOMP2,
};

DEINTERLACE_METHOD *
DI_MoComp2_GetDeinterlacePluginInfo (void)
{
    DEINTERLACE_METHOD *m;
    DEINTERLACE_FUNC *f;

    m = NULL;

    f = SIMD_FN_SELECT (DeinterlaceMoComp2,
			CPU_FEATURE_MMX | CPU_FEATURE_3DNOW |
			CPU_FEATURE_SSE_INT | CPU_FEATURE_SSE2 |
			CPU_FEATURE_SSE3 | CPU_FEATURE_ALTIVEC);

    if (f) {
	m = malloc (sizeof (*m));
	*m = MoComp2Method;

	m->pfnAlgorithm = f;
    }

    return m;
}

#endif /* !SIMD */

/*
Local Variables:
c-basic-offset: 4
End:
 */