File: machine.c

package info (click to toggle)
gwc 0.22.04-1.1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 3,676 kB
  • sloc: ansic: 40,072; sh: 797; makefile: 661; perl: 37
file content (170 lines) | stat: -rwxr-xr-x 4,184 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170

/**************************************************************************
**
** Copyright (C) 1993 David E. Stewart & Zbigniew Leyk, all rights reserved.
**
**			     Meschach Library
** 
** This Meschach Library is provided "as is" without any express 
** or implied warranty of any kind with respect to this software. 
** In particular the authors shall not be liable for any direct, 
** indirect, special, incidental or consequential damages arising 
** in any way from use of the software.
** 
** Everyone is granted permission to copy, modify and redistribute this
** Meschach Library, provided:
**  1.  All copies contain this copyright notice.
**  2.  All modified copies shall carry a notice stating who
**      made the last modification and the date of such modification.
**  3.  No charge is made for this software or works derived from it.  
**      This clause shall not be construed as constraining other software
**      distributed on the same medium as this software, nor is a
**      distribution fee considered a charge.
**
***************************************************************************/

/*
  This file contains basic routines which are used by the functions
  in meschach.a etc.
  These are the routines that should be modified in order to take
  full advantage of specialised architectures (pipelining, vector
  processors etc).
  */

static	char	*rcsid = "$Id: machine.c,v 1.4 1994/01/13 05:28:56 des Exp $";

#include	"machine.h"

/* __ip__ -- inner product */
#ifndef ANSI_C
double	__ip__(dp1,dp2,len)
register Real	*dp1, *dp2;
int	len;
#else
double	__ip__(const Real *dp1, const Real *dp2, int len)
#endif
{
#ifdef VUNROLL
    register int	len4;
    register Real	sum1, sum2, sum3;
#endif
    register int	i;
    register Real     sum;

    sum = 0.0;
#ifdef VUNROLL
    sum1 = sum2 = sum3 = 0.0;
    
    len4 = len / 4;
    len  = len % 4;
    
    for ( i = 0; i < len4; i++ )
    {
	sum  += dp1[4*i]*dp2[4*i];
	sum1 += dp1[4*i+1]*dp2[4*i+1];
	sum2 += dp1[4*i+2]*dp2[4*i+2];
	sum3 += dp1[4*i+3]*dp2[4*i+3];
    }
    sum  += sum1 + sum2 + sum3;
    dp1 += 4*len4;	dp2 += 4*len4;
#endif
    
    for ( i = 0; i < len; i++ )
	sum  += dp1[i]*dp2[i];
    
    return sum;
}

/* __mltadd__ -- scalar multiply and add c.f. v_mltadd() */
#ifndef ANSI_C
void	__mltadd__(dp1,dp2,s,len)
register Real	*dp1, *dp2;
register double s;
register int	len;
#else
void	__mltadd__(Real *dp1, const Real *dp2, double s, int len)
#endif
{
    register int	i;
#ifdef VUNROLL
    register int        len4;
    
    len4 = len / 4;
    len  = len % 4;
    for ( i = 0; i < len4; i++ )
    {
	dp1[4*i]   += s*dp2[4*i];
	dp1[4*i+1] += s*dp2[4*i+1];
	dp1[4*i+2] += s*dp2[4*i+2];
	dp1[4*i+3] += s*dp2[4*i+3];
    }
    dp1 += 4*len4;	dp2 += 4*len4;
#endif
    
    for ( i = 0; i < len; i++ )
	dp1[i] += s*dp2[i];
}

/* __smlt__ scalar multiply array c.f. sv_mlt() */
#ifndef ANSI_C
void	__smlt__(dp,s,out,len)
register Real	*dp, *out;
register double s;
register int	len;
#else
void	__smlt__(const Real *dp, double s, Real *out, int len)
#endif
{
    register int	i;
    for ( i = 0; i < len; i++ )
	out[i] = s*dp[i];
}

/* __add__ -- add arrays c.f. v_add() */
#ifndef ANSI_C
void	__add__(dp1,dp2,out,len)
register Real	*dp1, *dp2, *out;
register int	len;
#else
void	__add__(const Real *dp1, const Real *dp2, Real *out, int len)
#endif
{
    register int	i;
    for ( i = 0; i < len; i++ )
	out[i] = dp1[i] + dp2[i];
}

/* __sub__ -- subtract arrays c.f. v_sub() */
#ifndef ANSI_C
void	__sub__(dp1,dp2,out,len)
register Real	*dp1, *dp2, *out;
register int	len;
#else
void	__sub__(const Real *dp1, const Real *dp2, Real *out, int len)
#endif
{
    register int	i;
    for ( i = 0; i < len; i++ )
	out[i] = dp1[i] - dp2[i];
}

/* __zero__ -- zeros an array of floating point numbers */
#ifndef ANSI_C
void	__zero__(dp,len)
register Real	*dp;
register int	len;
#else
void	__zero__(Real *dp, int len)
#endif
{
#ifdef CHAR0ISDBL0
    /* if a floating point zero is equivalent to a string of nulls */
    MEM_ZERO((char *)dp,len*sizeof(Real));
#else
    /* else, need to zero the array entry by entry */
    int	i;
    for ( i = 0; i < len; i++ )
	dp[i] = 0.0;
#endif
}