File: ATL_hpr.c

package info (click to toggle)
atlas 3.6.0-22
  • links: PTS
  • area: main
  • in suites: lenny
  • size: 109,212 kB
  • ctags: 58,065
  • sloc: ansic: 315,180; fortran: 47,890; asm: 8,751; makefile: 1,368; sh: 515; awk: 321; exp: 43
file content (162 lines) | stat: -rw-r--r-- 5,479 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
/* ---------------------------------------------------------------------
 *
 * -- Automatically Tuned Linear Algebra Software (ATLAS)
 *    (C) Copyright 2000 All Rights Reserved
 *
 * -- ATLAS routine -- Version 3.2 -- December 25, 2000
 *
 * -- Suggestions,  comments,  bugs reports should be sent to the follo-
 *    wing e-mail address: atlas@cs.utk.edu
 *
 * Author         : Antoine P. Petitet
 * Contributor(s) : R. Clint Whaley
 * University of Tennessee - Innovative Computing Laboratory
 * Knoxville TN, 37996-1301, USA.
 *
 * ---------------------------------------------------------------------
 *
 * -- Copyright notice and Licensing terms:
 *
 *  Redistribution  and  use in  source and binary forms, with or without
 *  modification, are  permitted provided  that the following  conditions
 *  are met:
 *
 * 1. Redistributions  of  source  code  must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce  the above copyright
 *    notice,  this list of conditions, and the  following disclaimer in
 *    the documentation and/or other materials provided with the distri-
 *    bution.
 * 3. The name of the University,  the ATLAS group,  or the names of its
 *    contributors  may not be used to endorse or promote products deri-
 *    ved from this software without specific written permission.
 *
 * -- Disclaimer:
 *
 * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
 * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,  INDIRECT, INCIDENTAL, SPE-
 * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
 * TO,  PROCUREMENT  OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
 * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEO-
 * RY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT  (IN-
 * CLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * ---------------------------------------------------------------------
 */
/*
 * Include files
 */
#include "atlas_misc.h"
#include "atlas_level1.h"
#include "atlas_kernel2.h"
#include "atlas_lvl2.h"
#include "atlas_mv.h"
#include "atlas_r1.h"

#include "atlas_reflvl2.h"          /* temporary for building purposes */
#include "atlas_reflevel2.h"        /* used for gbmv, gpmv and gpr.    */

void Mjoin( PATL, hpr )
(
   const enum ATLAS_UPLO      UPLO,
   const int                  N,
   const TYPE                 ALPHA,
   const TYPE                 * X,
   const int                  INCX,
   TYPE                       * A
)
{
/*
 * Purpose
 * =======
 *
 * Mjoin( PATL, hpr ) performs the Hermitian rank 1 operation
 *
 *    A := alpha* x * conjg( x' ) + A,
 *
 * where  alpha is a real scalar, x is an n-element vector and A is an n
 * by n Hermitian matrix, supplied in packed form.
 *
 * This is a blocked version of the algorithm.  For a more detailed des-
 * cription of  the arguments of this function, see the reference imple-
 * mentation in the ATLAS/src/blas/reference directory.
 *
 * ---------------------------------------------------------------------
 */
/*
 * .. Local Variables ..
 */
   TYPE                       Calph[2];
#ifdef TREAL
#define    one                ATL_rone
#else
   const TYPE                 one[2] = { ATL_rone, ATL_rzero };
#endif
   void                       * vx = NULL;
   TYPE                       * x, * y, * A0, * y0;
   int                        incA0, incX, incY, incy, lda, lda0, mb, mb1,
                              n, nb;
#define   gpr1L   Mjoin( PATL, gpr1cL_a1_x1_yX )
#define   gpr1U   Mjoin( PATL, gpr1cU_a1_x1_yX )
/* ..
 * .. Executable Statements ..
 *
 */
   if( ( N == 0 ) || ( ALPHA == ATL_rzero ) ) return;

   if( ( INCX == 1 ) && ( ALPHA == ATL_rone ) )
   {
      x = y = (TYPE *)(X); incy = INCX;
   }
   else
   {
      vx = (void *)malloc( ATL_Cachelen + ATL_MulBySize( N ) );
      ATL_assert( vx ); x = ATL_AlignPtr( vx );
      *Calph = ALPHA; Calph[1] = ATL_rzero;
      Mjoin( PATL, cpsc )( N, Calph, X, INCX, x, 1 );
      y = (TYPE *)(X); incy = INCX;
   }

   ATL_GetPartP1( A, N, mb, nb );

   mb1  = N - ( ( N - 1 ) / mb ) * mb;

   if( UPLO == AtlasLower )
   {
      lda = lda0 = N; incA0 = ( incX = (mb SHIFT) );

      Mjoin( PATL, hprL )( mb1, x, y, incy, A, lda );
      A0 = (TYPE *)( A + (mb1 SHIFT) ); MLpnext( mb1, A, lda );
      x += (mb1 SHIFT);

      for( n = mb1; n < N; n += mb, A0 += incA0, x += incX )
      {
         gpr1L( mb, n, one, x, 1, y, incy, A0, lda0 );
         Mjoin( PATL, hprL )( mb, x, y + n*(incy SHIFT), incy, A, lda );
         MLpnext( mb, A, lda );
      }
   }
   else
   {
      incA0 = ( incX = (mb SHIFT) ); incY = incX * incy;
      lda0 = lda = 1; A0 = (TYPE *)(A); MUpnext( mb, A0, lda0 );

      for( n  = N - mb, y0 = y + incY; n > 0;
           n -= mb, x += incX, y += incY, y0 += incY )
      {
         Mjoin( PATL, hprU )( mb, x, y, incy, A, lda );
         gpr1U(  mb, n, one, x, 1, y0, incy, A0 - incA0, lda0 );
         lda = lda0; A = A0; MUpnext( mb, A0, lda0 );
      }
      Mjoin( PATL, hprU )( mb1, x, y, incy, A, lda );
   }

   if( vx ) free( vx );
/*
 * End of Mjoin( PATL, hpr )
 */
}