File: vp_warpA.m4

package info (click to toggle)
volpack 1.0b3-10
links: PTS, VCS
area: main
in suites: forky, sid, trixie
size: 4,864 kB
sloc: ansic: 12,208; sh: 9,078; makefile: 90; csh: 76
file content (556 lines) | stat: -rw-r--r-- 19,292 bytes
parent folder | download | duplicates (6)
/*
 * vp_warpA.m4
 *
 * One-pass image warping routine for affine transformations.
 *
 * Copyright (c) 1994 The Board of Trustees of The Leland Stanford
 * Junior University.  All rights reserved.
 *
 * Permission to use, copy, modify and distribute this software and its
 * documentation for any purpose is hereby granted without fee, provided
 * that the above copyright notice and this permission notice appear in
 * all copies of this software and that you do not sell the software.
 * Commercial licensing is available by contacting the author.
 * 
 * THE SOFTWARE IS PROVIDED "AS IS" AND WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY
 * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
 *
 * Author:
 *    Phil Lacroute
 *    Computer Systems Laboratory
 *    Electrical Engineering Dept.
 *    Stanford University
 */

/*
 * $Date: 1994/12/30 23:52:38 $
 * $Revision: 1.21 $
 */

#include "vp_global.h"

dnl Description:
dnl    This is an m4 source file which defines a C procedure to warp
dnl    a 2D image according to an affine transformation.  The macro
dnl    definitions allow the procedure to be specialized for a particular
dnl    output pixel type.
dnl
dnl    To produce a C source file, run this file through m4 with the
dnl    following m4 macros defined:
dnl
dnl    FuncName			name of the C function to produce
dnl    IntPixelType		intermediate image element type (GrayIntPixel
dnl				or RGBIntPixel)	
dnl    BGRPixel			1 to store color pixel in BGR order,
dnl				0 otherwise
dnl    ColorChannels		number of channels for color (0, 1 or 3)
dnl    OpacityChannels		number of channels for opacity (0, 1)
dnl
dnl    OR, define "SourceFile" to be a file name of the form
dnl         vp_warpA????.c
dnl    where the four wildcard characters (call them W, X, Y and Z)
dnl    can have one of the following values:
dnl	    W: number of color channels in intermediate image ("1" or "3")
dnl	    X: number of color channels in the output image ("0", "1" or "3")
dnl	    Y: number of opacity channels in output image ("0" or "1")
dnl	    Z: "R" to store color pixel in reverse order (ABGR),
dnl	       "N" to store color pixel in normal order (RGBA)

ifdef(`SourceFile', `
    define(FuncNameStr,		`substr(SourceFile, 7, 5)')
    define(ColorChannelsIn,	`substr(SourceFile, 8, 1)')
    define(ColorChannels,	`substr(SourceFile, 9, 1)')
    define(OpacityChannels,	`substr(SourceFile, 10, 1)')
    define(ReverseOrder,	`substr(SourceFile, 11, 1)')

    define(FuncName,		`VPWarp'FuncNameStr())
    ifelse(ColorChannelsIn, `1', `define(IntPixelType, GrayIntPixel)',
	   ColorChannelsIn, `3', `define(IntPixelType, RGBIntPixel)')
    ifelse(ReverseOrder, `R', `define(BGRPixel, 1)', `define(BGRPixel, 0)')
')

dnl Macros defined below:
dnl    DeclareAccumulator()	declare a pixel accumulator
dnl    ClearAccumulator()	clear the pixel accumulator
dnl    Accumulate1(wgt, ipixel)	multiply ipixel by wgt and add to accumulator
dnl    Accumulate4(wgt0, ipixel0, wgt1, ipixel1, wgt2, ipixel2, wgt3, ipixel3)
dnl				multiply each pixel by corresponding weight
dnl				and store in accumulator
dnl    StoreAccumulator(dst)	write the pixel accumulator to dst
dnl

dnl
dnl Definitions that depend on the number of color channels.
dnl

ifelse(ColorChannels, 0, `
    define(`DeclareColorAccumulator', `')
    define(`ClearColorAccumulator', `')
    define(`ColorAccumulate1', `')
    define(`ColorAccumulate4', `')
    define(`StoreColorAccumulator', `')')

ifelse(ColorChannels, 1, `
    define(`DeclareColorAccumulator', `float gray_acc; int gray_acc_int;')
    define(`ClearColorAccumulator', `gray_acc = 0;')
    define(`ColorAccumulate1', `gray_acc += ($1) * ($2.clrflt);')
    define(`ColorAccumulate4', `
	gray_acc = ($1) * ($2.clrflt) +
		   ($3) * ($4.clrflt) +	
		   ($5) * ($6.clrflt) +
		   ($7) * ($8.clrflt);')
    define(`StoreColorAccumulator', `
	gray_acc_int = gray_acc;
	if (gray_acc_int > 255)
	    gray_acc_int = 255;
	($1) = gray_acc_int;')')

ifelse(ColorChannels, 3, `
    define(`DeclareColorAccumulator', `float r_acc, g_acc, b_acc;
    int r_acc_int, g_acc_int, b_acc_int;')
    define(`ClearColorAccumulator', `r_acc = g_acc = b_acc = 0;')
    define(`ColorAccumulate1', `
		r_acc += ($1) * ($2.rclrflt);
		g_acc += ($1) * ($2.gclrflt);
		b_acc += ($1) * ($2.bclrflt);')
    define(`ColorAccumulate4', `
		r_acc = ($1) * ($2.rclrflt) +
			($3) * ($4.rclrflt) +	
			($5) * ($6.rclrflt) +
			($7) * ($8.rclrflt);
		g_acc = ($1) * ($2.gclrflt) +
			($3) * ($4.gclrflt) +	
			($5) * ($6.gclrflt) +
			($7) * ($8.gclrflt);
		b_acc = ($1) * ($2.bclrflt) +
			($3) * ($4.bclrflt) +	
			($5) * ($6.bclrflt) +
			($7) * ($8.bclrflt);')
    define(`StoreColorAccumulator', `
		r_acc_int = r_acc;
		if (r_acc_int > 255)
		    r_acc_int = 255;
		($1) = r_acc_int;
		g_acc_int = g_acc;
		if (g_acc_int > 255)
		    g_acc_int = 255;
		($2) = g_acc_int;
		b_acc_int = b_acc;
		if (b_acc_int > 255)
		    b_acc_int = 255;
		($3) = b_acc_int;')')

dnl
dnl Definitions that depend on the number of opacity channels.
dnl

ifelse(OpacityChannels, 0, `
    define(`DeclareOpacityAccumulator', `')
    define(`ClearOpacityAccumulator', `')
    define(`OpacityAccumulate1', `')
    define(`OpacityAccumulate4', `')
    define(`StoreOpacityAccumulator', `')')

ifelse(OpacityChannels, 1, `
    define(`DeclareOpacityAccumulator', `float opc_acc; int opc_acc_int;')
    define(`ClearOpacityAccumulator', `opc_acc = 0;')
    define(`OpacityAccumulate1', `opc_acc += ($1) * ($2.opcflt);')
    define(`OpacityAccumulate4', `
	opc_acc = ($1) * ($2.opcflt) +
		  ($3) * ($4.opcflt) +	
		  ($5) * ($6.opcflt) +
		  ($7) * ($8.opcflt);')
    define(`StoreOpacityAccumulator', `
	opc_acc_int = opc_acc * (float)255.;
	if (opc_acc_int > 255)
	    opc_acc_int = 255;
	($1) = opc_acc_int;')')

dnl
dnl Other defintions.
dnl

define(`OutputChannels', ((ColorChannels) + (OpacityChannels)))
define(`DeclareAccumulator', `
	DeclareColorAccumulator
	DeclareOpacityAccumulator')
define(`ClearAccumulator', `
	ClearColorAccumulator
	ClearOpacityAccumulator')
define(`Accumulate1', `
	ColorAccumulate1($1, $2)
	OpacityAccumulate1($1, $2)')
define(`Accumulate4', `
	ColorAccumulate4($1, $2, $3, $4, $5, $6, $7, $8)
	OpacityAccumulate4($1, $2, $3, $4, $5, $6, $7, $8)')
ifelse(BGRPixel, 0, `
	define(`StoreAccumulator', `
	    StoreColorAccumulator(($1)[0], ($1)[1], ($1)[2])
	    StoreOpacityAccumulator(($1)[ColorChannels])')', `
	define(`StoreAccumulator', `
	    StoreOpacityAccumulator(($1)[0])
	    StoreColorAccumulator(($1)[OpacityChannels+2],
		($1)[OpacityChannels+1], ($1)[OpacityChannels+0])')')

/* convert a float in the interval [0-1) to a 31-bit fixed point */
#define FLTFRAC_TO_FIX31(f)	((int)((f) * 2147483648.))

/* convert a 31-bit fixed point to a weight table index */
#define FIX31_TO_WGTIND(f)	((f) >> (31 - WARP_WEIGHT_INDEX_BITS))

extern float VPBilirpWeight[WARP_WEIGHT_ENTRIES][WARP_WEIGHT_ENTRIES][4];

/*
 * FuncName
 *
 * One-pass warper.  Transforms in_image to out_image according to
 * the affine warp specified by warp_matrix.  The resampling filter
 * is a bilirp (suitable for upsampling only).
 */

void
FuncName (in_image, in_width, in_height, in_bytes_per_scan,
	  out_image, out_width, out_height, out_bytes_per_scan,
	  warp_matrix)
IntPixelType *in_image;		/* input image data */
int in_width;			/* size of input image */
int in_height;
int in_bytes_per_scan;		/* bytes per scanline in input image */
char *out_image;		/* output image data */
int out_width;			/* size of output image */
int out_height;
int out_bytes_per_scan;		/* bytes per scanline in output image */
vpMatrix3 warp_matrix;		/* [ outx ]                 [ inx ] */
				/* [ outy ] = warp_matrix * [ iny ] */
				/* [   1  ]                 [  1  ] */
{
    Trapezoid full_overlap[9];	/* description of the area of overlap
				   of output image (shrunk by the size
				   of the filter kernel) with input image */
    Trapezoid part_overlap[9];	/* description of the area of overlap
				   of output image (unlarged by the size
				   of the filter kernel) with input image */
    int region;			/* index into full/part_overlap */
    char *out_ptr;		/* pointer to current pixel of output image */
    int out_scan_y;		/* coordinate of current output scanline */
    int scans_to_next_vertex;	/* number of scans left to process before
				   the next vertex is reached */
    IntPixelType *in_ptr;	/* pointer to current pixel of input image */
    double x_lft_full, x_rgt_full; /* intersection of scan with full_overlap */
    double x_lft_part, x_rgt_part; /* intersection of scan with part_overlap */
    int no_full_pixels;		/* true if full_overlap is empty for scan */
    double in_x, in_y;		/* exact coordinates in the input image of
				   the current output image pixel */
    int in_x_int, in_y_int;	/* coordinates of the nearest input image
				   pixel to the upper-left of the current
				   output image pixel */
    int xfrac, yfrac;		/* in_x - in_x_int and in_y - in_y_int,
				   stored as a fixed-point number with 31 bits
				   of fraction */
    int xfrac_incr, yfrac_incr;	/* increments to xfrac and yfrac to give
				   the fractions for the next output image
				   pixel in the current scan */
    double in_x_incr, in_y_incr;/* increments to in_x and in_y to give the
				   input image coordinates of the next
				   output image pixel in the current scan 
				   (equal to dx_in/dx_out and dy_in/dx_out) */
    int in_x_incr_int, in_y_incr_int; /* integer part of in_x/y_incr */
    int in_x_incr_dlt, in_y_incr_dlt; /* sign of in_x/y_incr */
    float *wptr;		/* pointer into weight table */
    int lft_zero_cnt;		/* # zero pixels on left edge of scan */
    int lft_edge_cnt;		/* # pixels on left w/ part filter overlap */
    int full_cnt;		/* # pixels w/ full filter overlap */
    int rgt_edge_cnt;		/* # pixels on rgt w/ part filter overlap */
    int rgt_zero_cnt;		/* # zero pixels on right edge of scan */
    int x;			/* pixel index */
    DeclareAccumulator		/* pixel accumulator */
    double denom;
    int c;

#ifdef DEBUG
    {
	int y;

	for (y = 0; y < out_height; y++) {
	    out_ptr = out_image + y*out_bytes_per_scan;
	    for (x = 0; x < out_width; x++) {
		for (c = 0; c < OutputChannels; c++)
		    *out_ptr++ = 255;
	    }
	}
    }
#endif

    /* initialize tables */
    VPComputeWarpTables();

    /* compute the intersection of the input image and the output image */
    /* filter width = 2.0 in input image space (triangle filter) */
    VPAffineImageOverlap(in_width, in_height, out_width, out_height,
			 warp_matrix, 2., full_overlap, part_overlap);

    /* compute the output image */
    out_ptr = out_image;
    out_scan_y = 0;
    denom = 1. / (warp_matrix[0][0] * warp_matrix[1][1] -
		  warp_matrix[0][1] * warp_matrix[1][0]);
    in_x_incr = warp_matrix[1][1]*denom;
    in_y_incr = -warp_matrix[1][0]*denom;
    if (in_x_incr < 0) {
	in_x_incr_int = (int)ceil(in_x_incr);
	in_x_incr_dlt = -1;
    } else {
	in_x_incr_int = (int)floor(in_x_incr);
	in_x_incr_dlt = 1;
    }
    if (in_y_incr < 0) {
	in_y_incr_int = (int)ceil(in_y_incr);
	in_y_incr_dlt = -1;
    } else {
	in_y_incr_int = (int)floor(in_y_incr);
	in_y_incr_dlt = 1;
    }
    xfrac_incr = FLTFRAC_TO_FIX31(in_x_incr - in_x_incr_int);
    yfrac_incr = FLTFRAC_TO_FIX31(in_y_incr - in_y_incr_int);
    for (region = 0; region < 9; region++) {
	/* check for empty region */
	if (part_overlap[region].miny >= out_height) {
	    break;
	}

	/* check if this region of the output image is unaffected by
	   the input image */
	if (part_overlap[region].x_top_lft >
	    part_overlap[region].x_top_rgt) {
	    c = (part_overlap[region].maxy - part_overlap[region].miny + 1) *
		out_bytes_per_scan;
	    bzero(out_ptr, c);
	    out_ptr += c;
	    out_scan_y += part_overlap[region].maxy -
			  part_overlap[region].miny + 1;
	    continue;
	}

	/* process scanlines of this region */
	scans_to_next_vertex = part_overlap[region].maxy -
			       part_overlap[region].miny + 1;
	x_lft_full = full_overlap[region].x_top_lft;
	x_rgt_full = full_overlap[region].x_top_rgt;
	x_lft_part = part_overlap[region].x_top_lft;
	x_rgt_part = part_overlap[region].x_top_rgt;
	if (x_lft_full > x_rgt_full)
	    no_full_pixels = 1;
	else
	    no_full_pixels = 0;
	ASSERT(scans_to_next_vertex > 0);
	ASSERT(out_scan_y == part_overlap[region].miny);
	while (scans_to_next_vertex > 0) {
	    /* compute the portions of the scanline which are zero
	       and which intersect the full and partially-full regions */
	    lft_zero_cnt = (int)floor(x_lft_part);
	    if (lft_zero_cnt < 0)
		lft_zero_cnt = 0;
	    else if (lft_zero_cnt > out_width)
		lft_zero_cnt = out_width;
	    if (no_full_pixels) {
		lft_edge_cnt = (int)ceil(x_rgt_part);
		if (lft_edge_cnt < 0)
		    lft_edge_cnt = 0;
		else if (lft_edge_cnt > out_width)
		    lft_edge_cnt = out_width;
		lft_edge_cnt -= lft_zero_cnt;
		if (lft_edge_cnt < 0)
		    lft_edge_cnt = 0;
		full_cnt = 0;
		rgt_edge_cnt = 0;
		rgt_zero_cnt = out_width - lft_zero_cnt - lft_edge_cnt;
	    } else {
		lft_edge_cnt = (int)ceil(x_lft_full);
		if (lft_edge_cnt < 0)
		    lft_edge_cnt = 0;
		else if (lft_edge_cnt > out_width)
		    lft_edge_cnt = out_width;
		lft_edge_cnt -= lft_zero_cnt;
		if (lft_edge_cnt < 0)
		    lft_edge_cnt = 0;
		full_cnt = (int)floor(x_rgt_full);
		if (full_cnt < 0)
		    full_cnt = 0;
		else if (full_cnt > out_width)
		    full_cnt = out_width;
		full_cnt -= lft_edge_cnt + lft_zero_cnt;
		if (full_cnt < 0)
		    full_cnt = 0;
		rgt_edge_cnt = (int)ceil(x_rgt_part);
		if (rgt_edge_cnt < 0)
		    rgt_edge_cnt = 0;
		else if (rgt_edge_cnt > out_width)
		    rgt_edge_cnt = out_width;
		rgt_edge_cnt -= full_cnt + lft_edge_cnt + lft_zero_cnt;
		if (rgt_edge_cnt < 0)
		    rgt_edge_cnt = 0;
		rgt_zero_cnt = out_width - lft_zero_cnt - lft_edge_cnt - 
		    	       full_cnt - rgt_edge_cnt;
	    }

	    /* reverse map the first left-edge output pixel coordinate into
	       the input image coordinate system */
	    in_x = ((lft_zero_cnt - warp_matrix[0][2]) * warp_matrix[1][1] -
		    (out_scan_y - warp_matrix[1][2])*warp_matrix[0][1])*denom;
	    in_y = (-(lft_zero_cnt - warp_matrix[0][2]) * warp_matrix[1][0] +
		    (out_scan_y - warp_matrix[1][2])*warp_matrix[0][0])*denom;
	    in_x_int = (int)floor(in_x);
	    in_y_int = (int)floor(in_y);
	    in_ptr = (IntPixelType *)(((char *)in_image + in_y_int *
				       in_bytes_per_scan)) + in_x_int;

	    /* compute the weight lookup table indices and increments */
	    xfrac = FLTFRAC_TO_FIX31(in_x - in_x_int);
	    yfrac = FLTFRAC_TO_FIX31(in_y - in_y_int);

	    /* zero out unaffected pixels on left edge of scan */
	    if (lft_zero_cnt > 0) {
		bzero(out_ptr, lft_zero_cnt * OutputChannels);
		out_ptr += lft_zero_cnt * OutputChannels;
	    }

	    /* process left edge case pixels */
	    for (x = lft_zero_cnt; x < lft_zero_cnt + lft_edge_cnt; x++) {
		wptr = VPBilirpWeight[FIX31_TO_WGTIND(yfrac)]
		    		     [FIX31_TO_WGTIND(xfrac)];
		ClearAccumulator();
		if (in_x_int >= 0 && in_x_int < in_width) {
		    if (in_y_int >= 0 && in_y_int < in_height) {
			Accumulate1(wptr[0], in_ptr[0]);
		    }
		    if (in_y_int+1 >= 0 && in_y_int+1 < in_height) {
			Accumulate1(wptr[2], in_ptr[in_width]);
		    }
		}
		if (in_x_int+1 >= 0 && in_x_int+1 < in_width) {
		    if (in_y_int >= 0 && in_y_int < in_height) {
			Accumulate1(wptr[1], in_ptr[1]);
		    }
		    if (in_y_int+1 >= 0 && in_y_int+1 < in_height) {
			Accumulate1(wptr[3], in_ptr[in_width + 1]);
		    }
		}
		StoreAccumulator(out_ptr);
		out_ptr += OutputChannels;
		xfrac += xfrac_incr;
		yfrac += yfrac_incr;
		if (xfrac < 0) {
		    xfrac &= 0x7fffffff;
		    in_x_int += in_x_incr_int + in_x_incr_dlt;
		    in_ptr += in_x_incr_int + in_x_incr_dlt;
		} else {
		    in_x_int += in_x_incr_int;
		    in_ptr += in_x_incr_int;
		}
		if (yfrac < 0) {
		    yfrac &= 0x7fffffff;
		    in_y_int += in_y_incr_int + in_y_incr_dlt;
		    in_ptr += in_width * (in_y_incr_int + in_y_incr_dlt);
		} else {
		    in_y_int += in_y_incr_int;
		    in_ptr += in_width * in_y_incr_int;
		}
	    }

	    /* process output pixels affected by four input pixels */
	    for (x = lft_zero_cnt + lft_edge_cnt;
		 x < lft_zero_cnt + lft_edge_cnt + full_cnt; x++) {
		ASSERT(in_x_int >= 0 && in_x_int < in_width-1);
		ASSERT(in_y_int >= 0 && in_y_int < in_height-1);
		ASSERT((IntPixelType *)(((char *)in_image + in_y_int *
				in_bytes_per_scan)) + in_x_int == in_ptr);
		wptr = VPBilirpWeight[FIX31_TO_WGTIND(yfrac)]
				     [FIX31_TO_WGTIND(xfrac)];
		Accumulate4(wptr[0], in_ptr[0], wptr[2], in_ptr[in_width],
			    wptr[1], in_ptr[1], wptr[3], in_ptr[in_width+1]);
		StoreAccumulator(out_ptr);
		out_ptr += OutputChannels;
		xfrac += xfrac_incr;
		yfrac += yfrac_incr;
		if (xfrac < 0) {
		    xfrac &= 0x7fffffff;
		    in_x_int += in_x_incr_int + in_x_incr_dlt;
		    in_ptr += in_x_incr_int + in_x_incr_dlt;
		} else {
		    in_x_int += in_x_incr_int;
		    in_ptr += in_x_incr_int;
		}
		if (yfrac < 0) {
		    yfrac &= 0x7fffffff;
		    in_y_int += in_y_incr_int + in_y_incr_dlt;
		    in_ptr += in_width * (in_y_incr_int + in_y_incr_dlt);
		} else {
		    in_y_int += in_y_incr_int;
		    in_ptr += in_width * in_y_incr_int;
		}
	    }

	    /* process right edge case pixels */
	    for (x = lft_zero_cnt + lft_edge_cnt + full_cnt;
		 x < lft_zero_cnt + lft_edge_cnt + full_cnt + rgt_edge_cnt;
		 x++) {
		wptr = VPBilirpWeight[FIX31_TO_WGTIND(yfrac)]
				     [FIX31_TO_WGTIND(xfrac)];
		ClearAccumulator();
		if (in_x_int >= 0 && in_x_int < in_width) {
		    if (in_y_int >= 0 && in_y_int < in_height) {
			Accumulate1(wptr[0], in_ptr[0]);
		    }
		    if (in_y_int+1 >= 0 && in_y_int+1 < in_height) {
			Accumulate1(wptr[2], in_ptr[in_width]);
		    }
		}
		if (in_x_int+1 >= 0 && in_x_int+1 < in_width) {
		    if (in_y_int >= 0 && in_y_int < in_height) {
			Accumulate1(wptr[1], in_ptr[1]);
		    }
		    if (in_y_int+1 >= 0 && in_y_int+1 < in_height) {
			Accumulate1(wptr[3], in_ptr[in_width + 1]);
		    }
		}
		StoreAccumulator(out_ptr);
		out_ptr += OutputChannels;
		xfrac += xfrac_incr;
		yfrac += yfrac_incr;
		if (xfrac < 0) {
		    xfrac &= 0x7fffffff;
		    in_x_int += in_x_incr_int + in_x_incr_dlt;
		    in_ptr += in_x_incr_int + in_x_incr_dlt;
		} else {
		    in_x_int += in_x_incr_int;
		    in_ptr += in_x_incr_int;
		}
		if (yfrac < 0) {
		    yfrac &= 0x7fffffff;
		    in_y_int += in_y_incr_int + in_y_incr_dlt;
		    in_ptr += in_width * (in_y_incr_int + in_y_incr_dlt);
		} else {
		    in_y_int += in_y_incr_int;
		    in_ptr += in_width * in_y_incr_int;
		}
	    }

	    /* zero out unaffected pixels on right edge of scan */
	    if (rgt_zero_cnt > 0) {
		bzero(out_ptr, rgt_zero_cnt * OutputChannels);
		out_ptr += rgt_zero_cnt * OutputChannels;
	    }

	    /* go on to next scan */
	    scans_to_next_vertex--;
	    out_scan_y++;
	    out_ptr += out_bytes_per_scan - out_width * OutputChannels;
	    x_lft_full += full_overlap[region].x_incr_lft;
	    x_rgt_full += full_overlap[region].x_incr_rgt;
	    x_lft_part += part_overlap[region].x_incr_lft;
	    x_rgt_part += part_overlap[region].x_incr_rgt;
	} /* next scanline in region */
    } /* next region */
    ASSERT(out_scan_y == out_height);
}