Teuchos - Trilinos Tools Package  Version of the Day
Teuchos_BLAS.cpp
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Teuchos: Common Tools Package
5 // Copyright (2004) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ***********************************************************************
40 // @HEADER
41 
42 #include "Teuchos_BLAS.hpp"
44 
45 /* for INTEL_CXML, the second arg may need to be changed to 'one'. If so
46 the appropriate declaration of one will need to be added back into
47 functions that include the macro:
48 */
49 
50 namespace {
51 #if defined (INTEL_CXML)
52  unsigned int one=1;
53 #endif
54 } // namespace
55 
56 #ifdef CHAR_MACRO
57 #undef CHAR_MACRO
58 #endif
59 #if defined (INTEL_CXML)
60 #define CHAR_MACRO(char_var) &char_var, one
61 #else
62 #define CHAR_MACRO(char_var) &char_var
63 #endif
64 
65 
66 const char Teuchos::ESideChar[] = {'L' , 'R' };
67 const char Teuchos::ETranspChar[] = {'N' , 'T' , 'C' };
68 const char Teuchos::EUploChar[] = {'U' , 'L' };
69 const char Teuchos::EDiagChar[] = {'U' , 'N' };
70 const char Teuchos::ETypeChar[] = {'G' , 'L', 'U', 'H', 'B', 'Q', 'Z' };
71 //const char Teuchos::EFactChar[] = {'F', 'N' };
72 //const char Teuchos::ENormChar[] = {'O', 'I' };
73 //const char Teuchos::ECompQChar[] = {'N', 'I', 'V' };
74 //const char Teuchos::EJobChar[] = {'E', 'V', 'B' };
75 //const char Teuchos::EJobSChar[] = {'E', 'S' };
76 //const char Teuchos::EJobVSChar[] = {'V', 'N' };
77 //const char Teuchos::EHowmnyChar[] = {'A', 'S' };
78 //const char Teuchos::ECMachChar[] = {'E', 'S', 'B', 'P', 'N', 'R', 'M', 'U', 'L', 'O' };
79 //const char Teuchos::ESortChar[] = {'N', 'S'};
80 
81 
82 namespace {
83 
84 
85 template<typename Scalar>
86 Scalar generic_dot(const int n, const Scalar* x, const int incx,
87  const Scalar* y, const int incy)
88 {
90  Scalar dot = 0.0;
91  if (incx==1 && incy==1) {
92  for (int i = 0; i < n; ++i)
93  dot += (*x++)*ST::conjugate(*y++);
94  }
95  else {
96  if (incx < 0)
97  x = x - incx*(n-1);
98  if (incy < 0)
99  y = y - incy*(n-1);
100  for (int i = 0; i < n; ++i, x+=incx, y+=incy)
101  dot += (*x)*ST::conjugate(*y);
102  }
103  return dot;
104 }
105 
106 
107 } // namespace
108 
109 
110 namespace Teuchos {
111 
112 //Explicitly instantiating these templates for windows due to an issue with
113 //resolving them when linking dlls.
114 #ifdef _WIN32
115 # ifdef HAVE_TEUCHOS_COMPLEX
116  template BLAS<long int, std::complex<float> >;
117  template BLAS<long int, std::complex<double> >;
118 # endif
119  template BLAS<long int, float>;
120  template BLAS<long int, double>;
121 #endif
122 
123  // *************************** BLAS<int,float> DEFINITIONS ******************************
124 
125  void BLAS<int, float>::ROTG(float* da, float* db, float* c, float* s) const
126  { SROTG_F77(da, db, c, s ); }
127 
128  void BLAS<int, float>::ROT(const int n, float* dx, const int incx, float* dy, const int incy, float* c, float* s) const
129  { SROT_F77(&n, dx, &incx, dy, &incy, c, s); }
130 
131 
132  float BLAS<int, float>::ASUM(const int n, const float* x, const int incx) const
133  {
134 #if defined(HAVE_TEUCHOS_BLASFLOAT_APPLE_VECLIB_BUGFIX)
135  return cblas_sasum(n, x, incx);
136 #elif defined(HAVE_TEUCHOS_BLASFLOAT)
137  float tmp = SASUM_F77(&n, x, &incx);
138  return tmp;
139 #else
140  typedef ScalarTraits<float> ST;
141  float sum = 0.0;
142  if (incx == 1) {
143  for (int i = 0; i < n; ++i)
144  sum += ST::magnitude(*x++);
145  }
146  else {
147  for (int i = 0; i < n; ++i, x+=incx)
148  sum += ST::magnitude(*x);
149  }
150  return sum;
151 #endif
152  }
153 
154  void BLAS<int, float>::AXPY(const int n, const float alpha, const float* x, const int incx, float* y, const int incy) const
155  { SAXPY_F77(&n, &alpha, x, &incx, y, &incy); }
156 
157  void BLAS<int, float>::COPY(const int n, const float* x, const int incx, float* y, const int incy) const
158  { SCOPY_F77(&n, x, &incx, y, &incy); }
159 
160  float BLAS<int, float>::DOT(const int n, const float* x, const int incx, const float* y, const int incy) const
161  {
162 #if defined(HAVE_TEUCHOS_BLASFLOAT_APPLE_VECLIB_BUGFIX)
163  return cblas_sdot(n, x, incx, y, incy);
164 #elif defined(HAVE_TEUCHOS_BLASFLOAT)
165  return SDOT_F77(&n, x, &incx, y, &incy);
166 #else
167  return generic_dot(n, x, incx, y, incy);
168 #endif
169  }
170 
171  int BLAS<int, float>::IAMAX(const int n, const float* x, const int incx) const
172  { return ISAMAX_F77(&n, x, &incx); }
173 
174  float BLAS<int, float>::NRM2(const int n, const float* x, const int incx) const
175  {
176 #if defined(HAVE_TEUCHOS_BLASFLOAT_APPLE_VECLIB_BUGFIX)
177  return cblas_snrm2(n, x, incx);
178 #elif defined(HAVE_TEUCHOS_BLASFLOAT)
179  return SNRM2_F77(&n, x, &incx);
180 #else
181  return ScalarTraits<float>::squareroot(generic_dot(n, x, incx, x, incx));
182 #endif
183  }
184 
185  void BLAS<int, float>::SCAL(const int n, const float alpha, float* x, const int incx) const
186  { SSCAL_F77(&n, &alpha, x, &incx); }
187 
188  void BLAS<int, float>::GEMV(ETransp trans, const int m, const int n, const float alpha, const float* A, const int lda, const float* x, const int incx, const float beta, float* y, const int incy) const
189  { SGEMV_F77(CHAR_MACRO(ETranspChar[trans]), &m, &n, &alpha, A, &lda, x, &incx, &beta, y, &incy); }
190 
191  void BLAS<int, float>::GER(const int m, const int n, const float alpha, const float* x, const int incx, const float* y, const int incy, float* A, const int lda) const
192  { SGER_F77(&m, &n, &alpha, x, &incx, y, &incy, A, &lda); }
193 
194  void BLAS<int, float>::TRMV(EUplo uplo, ETransp trans, EDiag diag, const int n, const float* A, const int lda, float* x, const int incx) const
195  { STRMV_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), CHAR_MACRO(EDiagChar[diag]), &n, A, &lda, x, &incx); }
196 
197  void BLAS<int, float>::GEMM(ETransp transa, ETransp transb, const int m, const int n, const int k, const float alpha, const float* A, const int lda, const float* B, const int ldb, const float beta, float* C, const int ldc) const
198  { SGEMM_F77(CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(ETranspChar[transb]), &m, &n, &k, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
199 
200  void BLAS<int, float>::SWAP(const int n, float* const x, const int incx, float* const y, const int incy) const
201  {
202  SSWAP_F77 (&n, x, &incx, y, &incy);
203  }
204 
205  void BLAS<int, float>::SYMM(ESide side, EUplo uplo, const int m, const int n, const float alpha, const float* A, const int lda, const float* B, const int ldb, const float beta, float* C, const int ldc) const
206  { SSYMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), &m, &n, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
207 
208  void BLAS<int, float>::SYRK(EUplo uplo, ETransp trans, const int n, const int k, const float alpha, const float* A, const int lda, const float beta, float* C, const int ldc) const
209  { SSYRK_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), &n, &k, &alpha, A, &lda, &beta, C, &ldc); }
210 
211  void BLAS<int, float>::HERK(EUplo uplo, ETransp trans, const int n, const int k, const float alpha, const float* A, const int lda, const float beta, float* C, const int ldc) const
212  { SSYRK_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), &n, &k, &alpha, A, &lda, &beta, C, &ldc); }
213 
214  void BLAS<int, float>::TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const float alpha, const float* A, const int lda, float* B, const int ldb) const
215  { STRMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
216 
217  void BLAS<int, float>::TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const float alpha, const float* A, const int lda, float* B, const int ldb) const
218  { STRSM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
219 
220  // *************************** BLAS<int,double> DEFINITIONS ******************************
221 
222  void BLAS<int, double>::ROTG(double* da, double* db, double* c, double* s) const
223  { DROTG_F77(da, db, c, s); }
224 
225  void BLAS<int, double>::ROT(const int n, double* dx, const int incx, double* dy, const int incy, double* c, double* s) const
226  { DROT_F77(&n, dx, &incx, dy, &incy, c, s); }
227 
228  double BLAS<int, double>::ASUM(const int n, const double* x, const int incx) const
229  { return DASUM_F77(&n, x, &incx); }
230 
231  void BLAS<int, double>::AXPY(const int n, const double alpha, const double* x, const int incx, double* y, const int incy) const
232  { DAXPY_F77(&n, &alpha, x, &incx, y, &incy); }
233 
234  void BLAS<int, double>::COPY(const int n, const double* x, const int incx, double* y, const int incy) const
235  { DCOPY_F77(&n, x, &incx, y, &incy); }
236 
237  double BLAS<int, double>::DOT(const int n, const double* x, const int incx, const double* y, const int incy) const
238  {
239  return DDOT_F77(&n, x, &incx, y, &incy);
240  }
241 
242  int BLAS<int, double>::IAMAX(const int n, const double* x, const int incx) const
243  { return IDAMAX_F77(&n, x, &incx); }
244 
245  double BLAS<int, double>::NRM2(const int n, const double* x, const int incx) const
246  { return DNRM2_F77(&n, x, &incx); }
247 
248  void BLAS<int, double>::SCAL(const int n, const double alpha, double* x, const int incx) const
249  { DSCAL_F77(&n, &alpha, x, &incx); }
250 
251  void BLAS<int, double>::GEMV(ETransp trans, const int m, const int n, const double alpha, const double* A, const int lda, const double* x, const int incx, const double beta, double* y, const int incy) const
252  { DGEMV_F77(CHAR_MACRO(ETranspChar[trans]), &m, &n, &alpha, A, &lda, x, &incx, &beta, y, &incy); }
253 
254  void BLAS<int, double>::GER(const int m, const int n, const double alpha, const double* x, const int incx, const double* y, const int incy, double* A, const int lda) const
255  { DGER_F77(&m, &n, &alpha, x, &incx, y, &incy, A, &lda); }
256 
257  void BLAS<int, double>::TRMV(EUplo uplo, ETransp trans, EDiag diag, const int n, const double* A, const int lda, double* x, const int incx) const
258  { DTRMV_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), CHAR_MACRO(EDiagChar[diag]), &n, A, &lda, x, &incx); }
259 
260  void BLAS<int, double>::GEMM(ETransp transa, ETransp transb, const int m, const int n, const int k, const double alpha, const double* A, const int lda, const double* B, const int ldb, const double beta, double* C, const int ldc) const
261  { DGEMM_F77(CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(ETranspChar[transb]), &m, &n, &k, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
262 
263  void BLAS<int, double>::SWAP(const int n, double* const x, const int incx, double* const y, const int incy) const
264  {
265  DSWAP_F77 (&n, x, &incx, y, &incy);
266  }
267 
268  void BLAS<int, double>::SYMM(ESide side, EUplo uplo, const int m, const int n, const double alpha, const double* A, const int lda, const double *B, const int ldb, const double beta, double *C, const int ldc) const
269  { DSYMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), &m, &n, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
270 
271  void BLAS<int, double>::SYRK(EUplo uplo, ETransp trans, const int n, const int k, const double alpha, const double* A, const int lda, const double beta, double* C, const int ldc) const
272  { DSYRK_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), &n, &k, &alpha, A, &lda, &beta, C, &ldc); }
273 
274  void BLAS<int, double>::HERK(EUplo uplo, ETransp trans, const int n, const int k, const double alpha, const double* A, const int lda, const double beta, double* C, const int ldc) const
275  { DSYRK_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), &n, &k, &alpha, A, &lda, &beta, C, &ldc); }
276 
277  void BLAS<int, double>::TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const double alpha, const double* A, const int lda, double* B, const int ldb) const
278  { DTRMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
279 
280  void BLAS<int, double>::TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const double alpha, const double* A, const int lda, double* B, const int ldb) const
281  { DTRSM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
282 
283 #ifdef HAVE_TEUCHOS_COMPLEX
284 
285  // *************************** BLAS<int,std::complex<float> > DEFINITIONS ******************************
286 
287  void BLAS<int, std::complex<float> >::ROTG(std::complex<float>* da, std::complex<float>* db, float* c, std::complex<float>* s) const
288  { CROTG_F77(da, db, c, s ); }
289 
290  void BLAS<int, std::complex<float> >::ROT(const int n, std::complex<float>* dx, const int incx, std::complex<float>* dy, const int incy, float* c, std::complex<float>* s) const
291  { CROT_F77(&n, dx, &incx, dy, &incy, c, s); }
292 
293  float BLAS<int, std::complex<float> >::ASUM(const int n, const std::complex<float>* x, const int incx) const
294  {
295 #if defined(HAVE_TEUCHOS_BLASFLOAT_APPLE_VECLIB_BUGFIX)
296  return cblas_scasum(n, x, incx);
297 #elif defined(HAVE_TEUCHOS_BLASFLOAT_DOUBLE_RETURN)
298  return (float) SCASUM_F77(&n, x, &incx);
299 #elif defined(HAVE_TEUCHOS_BLASFLOAT)
300  return SCASUM_F77(&n, x, &incx);
301 #else // Wow, you just plain don't have this routine.
302  // mfh 01 Feb 2013: See www.netlib.org/blas/scasum.f.
303  // I've enhanced this by accumulating in double precision.
304  double result = 0;
305  if (incx == 1) {
306  for (int i = 0; i < n; ++i) {
307  result += std::abs (std::real (x[i])) + std::abs (std::imag (x[i]));
308  }
309  } else {
310  const int nincx = n * incx;
311  for (int i = 0; i < nincx; i += incx) {
312  result += std::abs (std::real (x[i])) + std::abs (std::imag (x[i]));
313  }
314  }
315  return static_cast<float> (result);
316 #endif
317  }
318 
319  void BLAS<int, std::complex<float> >::AXPY(const int n, const std::complex<float> alpha, const std::complex<float>* x, const int incx, std::complex<float>* y, const int incy) const
320  { CAXPY_F77(&n, &alpha, x, &incx, y, &incy); }
321 
322  void BLAS<int, std::complex<float> >::COPY(const int n, const std::complex<float>* x, const int incx, std::complex<float>* y, const int incy) const
323  { CCOPY_F77(&n, x, &incx, y, &incy); }
324 
325  std::complex<float> BLAS<int, std::complex<float> >::DOT(const int n, const std::complex<float>* x, const int incx, const std::complex<float>* y, const int incy) const
326  {
327 #if defined(HAVE_TEUCHOS_BLASFLOAT_APPLE_VECLIB_BUGFIX)
328  std::complex<float> z;
329  cblas_cdotc_sub(n,x,incx,y,incy,&z);
330  return z;
331 #elif defined(HAVE_COMPLEX_BLAS_PROBLEM) && defined(HAVE_FIXABLE_COMPLEX_BLAS_PROBLEM)
332  std::complex<float> z;
333  CDOT_F77(&z, &n, x, &incx, y, &incy);
334  return z;
335 #elif defined(HAVE_TEUCHOS_BLASFLOAT)
336  return CDOT_F77(&n, x, &incx, y, &incy);
337 #else // Wow, you just plain don't have this routine.
338  // mfh 01 Feb 2013: See www.netlib.org/blas/cdotc.f.
339  // I've enhanced this by accumulating in double precision.
340  std::complex<double> result (0, 0);
341  if (n >= 0) {
342  if (incx == 1 && incy == 1) {
343  for (int i = 0; i < n; ++i) {
344  result += std::conj (x[i]) * y[i];
345  }
346  } else {
347  int ix = 0;
348  int iy = 0;
349  if (incx < 0) {
350  ix = (1-n) * incx;
351  }
352  if (incy < 0) {
353  iy = (1-n) * incy;
354  }
355  for (int i = 0; i < n; ++i) {
356  result += std::conj (x[ix]) * y[iy];
357  ix += incx;
358  iy += incy;
359  }
360  }
361  }
362  return static_cast<std::complex<float> > (result);
363 #endif
364  }
365 
366  int BLAS<int, std::complex<float> >::IAMAX(const int n, const std::complex<float>* x, const int incx) const
367  { return ICAMAX_F77(&n, x, &incx); }
368 
369  float BLAS<int, std::complex<float> >::NRM2(const int n, const std::complex<float>* x, const int incx) const
370  {
371 #if defined(HAVE_TEUCHOS_BLASFLOAT_APPLE_VECLIB_BUGFIX)
372  return cblas_scnrm2(n, x, incx);
373 #elif defined(HAVE_TEUCHOS_BLASFLOAT_DOUBLE_RETURN)
374  return (float) SCNRM2_F77(&n, x, &incx);
375 #elif defined(HAVE_TEUCHOS_BLASFLOAT)
376  return SCNRM2_F77(&n, x, &incx);
377 #else // Wow, you just plain don't have this routine.
378  // mfh 01 Feb 2013: See www.netlib.org/blas/scnrm2.f.
379  // I've enhanced this by accumulating in double precision.
380  if (n < 1 || incx < 1) {
381  return 0;
382  } else {
383  double scale = 0;
384  double ssq = 1;
385 
386  const int upper = 1 + (n-1)*incx;
387  for (int ix = 0; ix < upper; ix += incx) {
388  // The reference BLAS implementation cleverly scales the
389  // intermediate result. so that even if the square of the norm
390  // would overflow, computing the norm itself does not. Hence,
391  // "ssq" for "scaled square root."
392  if (std::real (x[ix]) != 0) {
393  const double temp = std::abs (std::real (x[ix]));
394  if (scale < temp) {
395  const double scale_over_temp = scale / temp;
396  ssq = 1 + ssq * scale_over_temp*scale_over_temp;
397  // New scaling factor: biggest (in magnitude) real or imaginary part seen thus far.
398  scale = temp;
399  } else {
400  const double temp_over_scale = temp / scale;
401  ssq = ssq + temp_over_scale*temp_over_scale;
402  }
403  }
404  if (std::imag (x[ix]) != 0) {
405  const double temp = std::abs (std::imag (x[ix]));
406  if (scale < temp) {
407  const double scale_over_temp = scale / temp;
408  ssq = 1 + ssq * scale_over_temp*scale_over_temp;
409  // New scaling factor: biggest (in magnitude) real or imaginary part seen thus far.
410  scale = temp;
411  } else {
412  const double temp_over_scale = temp / scale;
413  ssq = ssq + temp_over_scale*temp_over_scale;
414  }
415  }
416  }
417  return static_cast<float> (scale * std::sqrt (ssq));
418  }
419 #endif
420  }
421 
422  void BLAS<int, std::complex<float> >::SCAL(const int n, const std::complex<float> alpha, std::complex<float>* x, const int incx) const
423  { CSCAL_F77(&n, &alpha, x, &incx); }
424 
425  void BLAS<int, std::complex<float> >::GEMV(ETransp trans, const int m, const int n, const std::complex<float> alpha, const std::complex<float>* A, const int lda, const std::complex<float>* x, const int incx, const std::complex<float> beta, std::complex<float>* y, const int incy) const
426  { CGEMV_F77(CHAR_MACRO(ETranspChar[trans]), &m, &n, &alpha, A, &lda, x, &incx, &beta, y, &incy); }
427 
428  void BLAS<int, std::complex<float> >::GER(const int m, const int n, const std::complex<float> alpha, const std::complex<float>* x, const int incx, const std::complex<float>* y, const int incy, std::complex<float>* A, const int lda) const
429  { CGER_F77(&m, &n, &alpha, x, &incx, y, &incy, A, &lda); }
430 
431  void BLAS<int, std::complex<float> >::TRMV(EUplo uplo, ETransp trans, EDiag diag, const int n, const std::complex<float>* A, const int lda, std::complex<float>* x, const int incx) const
432  { CTRMV_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), CHAR_MACRO(EDiagChar[diag]), &n, A, &lda, x, &incx); }
433 
434  void BLAS<int, std::complex<float> >::GEMM(ETransp transa, ETransp transb, const int m, const int n, const int k, const std::complex<float> alpha, const std::complex<float>* A, const int lda, const std::complex<float>* B, const int ldb, const std::complex<float> beta, std::complex<float>* C, const int ldc) const
435  { CGEMM_F77(CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(ETranspChar[transb]), &m, &n, &k, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
436 
437  void BLAS<int, std::complex<float> >::SWAP(const int n, std::complex<float>* const x, const int incx, std::complex<float>* const y, const int incy) const
438  {
439  CSWAP_F77 (&n, x, &incx, y, &incy);
440  }
441 
442  void BLAS<int, std::complex<float> >::SYMM(ESide side, EUplo uplo, const int m, const int n, const std::complex<float> alpha, const std::complex<float>* A, const int lda, const std::complex<float>* B, const int ldb, const std::complex<float> beta, std::complex<float>* C, const int ldc) const
443  { CSYMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), &m, &n, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
444 
445  void BLAS<int, std::complex<float> >::SYRK(EUplo uplo, ETransp trans, const int n, const int k, const std::complex<float> alpha, const std::complex<float>* A, const int lda, const std::complex<float> beta, std::complex<float>* C, const int ldc) const
446  { CSYRK_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), &n, &k, &alpha, A, &lda, &beta, C, &ldc); }
447 
448  void BLAS<int, std::complex<float> >::HERK(EUplo uplo, ETransp trans, const int n, const int k, const std::complex<float> alpha, const std::complex<float>* A, const int lda, const std::complex<float> beta, std::complex<float>* C, const int ldc) const
449  { CHERK_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), &n, &k, &alpha, A, &lda, &beta, C, &ldc); }
450 
451  void BLAS<int, std::complex<float> >::TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const std::complex<float> alpha, const std::complex<float>* A, const int lda, std::complex<float>* B, const int ldb) const
452  { CTRMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
453 
454  void BLAS<int, std::complex<float> >::TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const std::complex<float> alpha, const std::complex<float>* A, const int lda, std::complex<float>* B, const int ldb) const
455  { CTRSM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
456 
457  // *************************** BLAS<int,std::complex<double> > DEFINITIONS ******************************
458 
459  void BLAS<int, std::complex<double> >::ROTG(std::complex<double>* da, std::complex<double>* db, double* c, std::complex<double>* s) const
460  { ZROTG_F77(da, db, c, s); }
461 
462  void BLAS<int, std::complex<double> >::ROT(const int n, std::complex<double>* dx, const int incx, std::complex<double>* dy, const int incy, double* c, std::complex<double>* s) const
463  { ZROT_F77(&n, dx, &incx, dy, &incy, c, s); }
464 
465  double BLAS<int, std::complex<double> >::ASUM(const int n, const std::complex<double>* x, const int incx) const
466  { return ZASUM_F77(&n, x, &incx); }
467 
468  void BLAS<int, std::complex<double> >::AXPY(const int n, const std::complex<double> alpha, const std::complex<double>* x, const int incx, std::complex<double>* y, const int incy) const
469  { ZAXPY_F77(&n, &alpha, x, &incx, y, &incy); }
470 
471  void BLAS<int, std::complex<double> >::COPY(const int n, const std::complex<double>* x, const int incx, std::complex<double>* y, const int incy) const
472  { ZCOPY_F77(&n, x, &incx, y, &incy); }
473 
474  std::complex<double> BLAS<int, std::complex<double> >::DOT(const int n, const std::complex<double>* x, const int incx, const std::complex<double>* y, const int incy) const
475  {
476 #if defined(HAVE_TEUCHOS_BLASFLOAT_APPLE_VECLIB_BUGFIX)
477  std::complex<double> z;
478  cblas_zdotc_sub(n,x,incx,y,incy,&z);
479  return z;
480 #elif defined(HAVE_COMPLEX_BLAS_PROBLEM)
481 # if defined(HAVE_FIXABLE_COMPLEX_BLAS_PROBLEM)
482  std::complex<double> z;
483  ZDOT_F77(&z, &n, x, &incx, y, &incy);
484  return z;
485 # else
486  // mfh 01 Feb 2013: Your complex BLAS is broken, but the problem
487  // doesn't have the easy workaround. I'll just reimplement the
488  // missing routine here. See www.netlib.org/blas/zdotc.f.
489  std::complex<double> ztemp (0, 0);
490  if (n > 0) {
491  if (incx == 1 && incy == 1) {
492  for (int i = 0; i < n; ++i) {
493  ztemp += std::conj (x[i]) * y[i];
494  }
495  } else {
496  int ix = 0;
497  int iy = 0;
498  if (incx < 0) {
499  ix = (1-n)*incx;
500  }
501  if (incy < 0) {
502  iy = (1-n)*incy;
503  }
504  for (int i = 0; i < n; ++i) {
505  ztemp += std::conj (x[ix]) * y[iy];
506  ix += incx;
507  iy += incy;
508  }
509  }
510  }
511  return ztemp;
512 
513 # endif // defined(HAVE_FIXABLE_COMPLEX_BLAS_PROBLEM)
514 #else
515  return ZDOT_F77(&n, x, &incx, y, &incy);
516 #endif
517  }
518 
519  int BLAS<int, std::complex<double> >::IAMAX(const int n, const std::complex<double>* x, const int incx) const
520  { return IZAMAX_F77(&n, x, &incx); }
521 
522  double BLAS<int, std::complex<double> >::NRM2(const int n, const std::complex<double>* x, const int incx) const
523  { return ZNRM2_F77(&n, x, &incx); }
524 
525  void BLAS<int, std::complex<double> >::SCAL(const int n, const std::complex<double> alpha, std::complex<double>* x, const int incx) const
526  { ZSCAL_F77(&n, &alpha, x, &incx); }
527 
528  void BLAS<int, std::complex<double> >::GEMV(ETransp trans, const int m, const int n, const std::complex<double> alpha, const std::complex<double>* A, const int lda, const std::complex<double>* x, const int incx, const std::complex<double> beta, std::complex<double>* y, const int incy) const
529  { ZGEMV_F77(CHAR_MACRO(ETranspChar[trans]), &m, &n, &alpha, A, &lda, x, &incx, &beta, y, &incy); }
530 
531  void BLAS<int, std::complex<double> >::GER(const int m, const int n, const std::complex<double> alpha, const std::complex<double>* x, const int incx, const std::complex<double>* y, const int incy, std::complex<double>* A, const int lda) const
532  { ZGER_F77(&m, &n, &alpha, x, &incx, y, &incy, A, &lda); }
533 
534  void BLAS<int, std::complex<double> >::TRMV(EUplo uplo, ETransp trans, EDiag diag, const int n, const std::complex<double>* A, const int lda, std::complex<double>* x, const int incx) const
535  { ZTRMV_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), CHAR_MACRO(EDiagChar[diag]), &n, A, &lda, x, &incx); }
536 
537  void BLAS<int, std::complex<double> >::GEMM(ETransp transa, ETransp transb, const int m, const int n, const int k, const std::complex<double> alpha, const std::complex<double>* A, const int lda, const std::complex<double>* B, const int ldb, const std::complex<double> beta, std::complex<double>* C, const int ldc) const
538  { ZGEMM_F77(CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(ETranspChar[transb]), &m, &n, &k, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
539 
540  void BLAS<int, std::complex<double> >::SWAP(const int n, std::complex<double>* const x, const int incx, std::complex<double>* const y, const int incy) const
541  {
542  ZSWAP_F77 (&n, x, &incx, y, &incy);
543  }
544 
545  void BLAS<int, std::complex<double> >::SYMM(ESide side, EUplo uplo, const int m, const int n, const std::complex<double> alpha, const std::complex<double>* A, const int lda, const std::complex<double> *B, const int ldb, const std::complex<double> beta, std::complex<double> *C, const int ldc) const
546  { ZSYMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), &m, &n, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
547 
548  void BLAS<int, std::complex<double> >::SYRK(EUplo uplo, ETransp trans, const int n, const int k, const std::complex<double> alpha, const std::complex<double>* A, const int lda, const std::complex<double> beta, std::complex<double>* C, const int ldc) const
549  { ZSYRK_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), &n, &k, &alpha, A, &lda, &beta, C, &ldc); }
550 
551  void BLAS<int, std::complex<double> >::HERK(EUplo uplo, ETransp trans, const int n, const int k, const std::complex<double> alpha, const std::complex<double>* A, const int lda, const std::complex<double> beta, std::complex<double>* C, const int ldc) const
552  { ZHERK_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), &n, &k, &alpha, A, &lda, &beta, C, &ldc); }
553 
554  void BLAS<int, std::complex<double> >::TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const std::complex<double> alpha, const std::complex<double>* A, const int lda, std::complex<double>* B, const int ldb) const
555  { ZTRMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
556 
557  void BLAS<int, std::complex<double> >::TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const std::complex<double> alpha, const std::complex<double>* A, const int lda, std::complex<double>* B, const int ldb) const
558  { ZTRSM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
559 
560 #endif // HAVE_TEUCHOS_COMPLEX
561 
562 }
void SWAP(const OrdinalType n, ScalarType *const x, const OrdinalType incx, ScalarType *const y, const OrdinalType incy) const
Swap the entries of x and y.
ScalarType DOT(const OrdinalType n, const x_type *x, const OrdinalType incx, const y_type *y, const OrdinalType incy) const
Form the dot product of the vectors x and y.
void GER(const OrdinalType m, const OrdinalType n, const alpha_type alpha, const x_type *x, const OrdinalType incx, const y_type *y, const OrdinalType incy, ScalarType *A, const OrdinalType lda) const
Performs the rank 1 operation: A <- alpha*x*y&#39;+A.
void SYMM(ESide side, EUplo uplo, const OrdinalType m, const OrdinalType n, const alpha_type alpha, const A_type *A, const OrdinalType lda, const B_type *B, const OrdinalType ldb, const beta_type beta, ScalarType *C, const OrdinalType ldc) const
Performs the matrix-matrix operation: C <- alpha*A*B+beta*C or C <- alpha*B*A+beta*C where A is an m ...
Templated interface class to BLAS routines.
void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n, const alpha_type alpha, const A_type *A, const OrdinalType lda, ScalarType *B, const OrdinalType ldb) const
Solves the matrix equations: op(A)*X=alpha*B or X*op(A)=alpha*B where X and B are m by n matrices...
static T squareroot(T x)
Returns a number of magnitudeType that is the square root of this scalar type x.
void GEMV(ETransp trans, const OrdinalType m, const OrdinalType n, const alpha_type alpha, const A_type *A, const OrdinalType lda, const x_type *x, const OrdinalType incx, const beta_type beta, ScalarType *y, const OrdinalType incy) const
Performs the matrix-vector operation: y <- alpha*A*x+beta*y or y <- alpha*A&#39;*x+beta*y where A is a ge...
void SYRK(EUplo uplo, ETransp trans, const OrdinalType n, const OrdinalType k, const alpha_type alpha, const A_type *A, const OrdinalType lda, const beta_type beta, ScalarType *C, const OrdinalType ldc) const
Performs the symmetric rank k operation: C <- alpha*A*A&#39;+beta*C or C <- alpha*A&#39;*A+beta*C, where alpha and beta are scalars, C is an n by n symmetric matrix and A is an n by k matrix in the first case or k by n matrix in the second case.
void GEMM(ETransp transa, ETransp transb, const OrdinalType m, const OrdinalType n, const OrdinalType k, const alpha_type alpha, const A_type *A, const OrdinalType lda, const B_type *B, const OrdinalType ldb, const beta_type beta, ScalarType *C, const OrdinalType ldc) const
General matrix-matrix multiply.
ScalarTraits< ScalarType >::magnitudeType NRM2(const OrdinalType n, const ScalarType *x, const OrdinalType incx) const
Compute the 2-norm of the vector x.
OrdinalType IAMAX(const OrdinalType n, const ScalarType *x, const OrdinalType incx) const
Return the index of the element of x with the maximum magnitude.
void ROTG(ScalarType *da, ScalarType *db, rotg_c_type *c, ScalarType *s) const
Computes a Givens plane rotation.
The Templated BLAS wrappers.
void TRMV(EUplo uplo, ETransp trans, EDiag diag, const OrdinalType n, const A_type *A, const OrdinalType lda, ScalarType *x, const OrdinalType incx) const
Performs the matrix-vector operation: x <- A*x or x <- A&#39;*x where A is a unit/non-unit n by n upper/l...
This structure defines some basic traits for a scalar field type.
void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n, const alpha_type alpha, const A_type *A, const OrdinalType lda, ScalarType *B, const OrdinalType ldb) const
Performs the matrix-matrix operation: B <- alpha*op(A)*B or B <- alpha*B*op(A) where op(A) is an unit...
void ROT(const OrdinalType n, ScalarType *dx, const OrdinalType incx, ScalarType *dy, const OrdinalType incy, MagnitudeType *c, ScalarType *s) const
Applies a Givens plane rotation.
void COPY(const OrdinalType n, const ScalarType *x, const OrdinalType incx, ScalarType *y, const OrdinalType incy) const
Copy the vector x to the vector y.
void AXPY(const OrdinalType n, const alpha_type alpha, const x_type *x, const OrdinalType incx, ScalarType *y, const OrdinalType incy) const
Perform the operation: y <- y+alpha*x.
The Teuchos namespace contains all of the classes, structs and enums used by Teuchos, as well as a number of utility routines.
ScalarTraits< ScalarType >::magnitudeType ASUM(const OrdinalType n, const ScalarType *x, const OrdinalType incx) const
Sum the absolute values of the entries of x.
void SCAL(const OrdinalType n, const ScalarType alpha, ScalarType *x, const OrdinalType incx) const
Scale the vector x by the constant alpha.