Tpetra parallel linear algebra  Version of the Day
Tpetra_Experimental_BlockView.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 // @HEADER
41 
42 #ifndef TPETRA_EXPERIMENTAL_BLOCKVIEW_HPP
43 #define TPETRA_EXPERIMENTAL_BLOCKVIEW_HPP
44 
52 
53 #include "TpetraCore_config.h"
54 #include "Kokkos_ArithTraits.hpp"
55 #include "Kokkos_Complex.hpp"
56 
57 namespace Tpetra {
58 
70 namespace Experimental {
71 
72 namespace Impl {
73 
80 template<class ViewType1,
81  class ViewType2,
82  const int rank1 = ViewType1::rank>
83 struct AbsMax {
84  static KOKKOS_INLINE_FUNCTION void
85  run (const ViewType2& Y, const ViewType1& X);
86 };
87 
92 template<class ViewType1,
93  class ViewType2>
94 struct AbsMax<ViewType1, ViewType2, 2> {
97  static KOKKOS_INLINE_FUNCTION void
98  run (const ViewType2& Y, const ViewType1& X)
99  {
100  static_assert (static_cast<int> (ViewType1::rank) == static_cast<int> (ViewType2::rank),
101  "AbsMax: ViewType1 and ViewType2 must have the same rank.");
102  typedef typename std::remove_reference<decltype (Y(0,0)) >::type STY;
103  static_assert(! std::is_const<STY>::value,
104  "AbsMax: The type of each entry of Y must be nonconst.");
105  typedef typename std::decay<decltype (X(0,0)) >::type STX;
106  static_assert( std::is_same<STX, STY>::value,
107  "AbsMax: The type of each entry of X and Y must be the same.");
108  typedef Kokkos::Details::ArithTraits<STY> KAT;
109 
110  const int numCols = Y.dimension_1 ();
111  const int numRows = Y.dimension_0 ();
112  for (int j = 0; j < numCols; ++j) {
113  for (int i = 0; i < numRows; ++i) {
114  STY& Y_ij = Y(i,j); // use ref here to avoid 2nd op() call on Y
115  const STX X_ij = X(i,j);
116  // NOTE: no std::max (not a CUDA __device__ function); must
117  // cast back up to complex.
118  const auto Y_ij_abs = KAT::abs (Y_ij);
119  const auto X_ij_abs = KAT::abs (X_ij);
120  Y_ij = (Y_ij_abs >= X_ij_abs) ?
121  static_cast<STY> (Y_ij_abs) :
122  static_cast<STY> (X_ij_abs);
123  }
124  }
125  }
126 };
127 
132 template<class ViewType1,
133  class ViewType2>
134 struct AbsMax<ViewType1, ViewType2, 1> {
137  static KOKKOS_INLINE_FUNCTION void
138  run (const ViewType2& Y, const ViewType1& X)
139  {
140  static_assert (static_cast<int> (ViewType1::rank) == static_cast<int> (ViewType2::rank),
141  "AbsMax: ViewType1 and ViewType2 must have the same rank.");
142 
143  typedef typename std::remove_reference<decltype (Y(0)) >::type STY;
144  static_assert(! std::is_const<STY>::value,
145  "AbsMax: The type of each entry of Y must be nonconst.");
146  typedef typename std::remove_const<typename std::remove_reference<decltype (X(0)) >::type>::type STX;
147  static_assert( std::is_same<STX, STY>::value,
148  "AbsMax: The type of each entry of X and Y must be the same.");
149  typedef Kokkos::Details::ArithTraits<STY> KAT;
150 
151  const int numRows = Y.dimension_0 ();
152  for (int i = 0; i < numRows; ++i) {
153  STY& Y_i = Y(i); // use ref here to avoid 2nd op() call on Y
154  const STX X_i = X(i);
155  // NOTE: no std::max (not a CUDA __device__ function); must
156  // cast back up to complex.
157  const auto Y_i_abs = KAT::abs (Y_i);
158  const auto X_i_abs = KAT::abs (X_i);
159  Y_i = (Y_i_abs >= X_i_abs) ?
160  static_cast<STY> (Y_i_abs) :
161  static_cast<STY> (X_i_abs);
162  }
163  }
164 };
165 
172 template<class ViewType1, class ViewType2, const int rank = ViewType1::rank>
173 KOKKOS_INLINE_FUNCTION void
174 absMax (const ViewType2& Y, const ViewType1& X)
175 {
176  static_assert (static_cast<int> (ViewType1::rank) == static_cast<int> (ViewType2::rank),
177  "absMax: ViewType1 and ViewType2 must have the same rank.");
179 }
180 
185 template<class ViewType,
186  class CoefficientType,
187  class LayoutType = typename ViewType::array_layout,
188  class IndexType = int,
189  const int rank = ViewType::rank>
190 struct SCAL {
191  static KOKKOS_INLINE_FUNCTION void
192  run (const CoefficientType& alpha, const ViewType& x);
193 };
194 
197 template<class ViewType,
198  class CoefficientType,
199  class LayoutType,
200  class IndexType>
201 struct SCAL<ViewType, CoefficientType, LayoutType, IndexType, 1> {
203  static KOKKOS_INLINE_FUNCTION void
204  run (const CoefficientType& alpha, const ViewType& x)
205  {
206  const IndexType numRows = static_cast<IndexType> (x.dimension_0 ());
207  // BLAS _SCAL doesn't check whether alpha is 0.
208  for (IndexType i = 0; i < numRows; ++i) {
209  x(i) = alpha * x(i);
210  }
211  }
212 };
213 
216 template<class ViewType,
217  class CoefficientType,
218  class LayoutType,
219  class IndexType>
220 struct SCAL<ViewType, CoefficientType, LayoutType, IndexType, 2> {
222  static KOKKOS_INLINE_FUNCTION void
223  run (const CoefficientType& alpha, const ViewType& A)
224  {
225  const IndexType numRows = static_cast<IndexType> (A.dimension_0 ());
226  const IndexType numCols = static_cast<IndexType> (A.dimension_1 ());
227 
228  // BLAS _SCAL doesn't check whether alpha is 0.
229  for (IndexType i = 0; i < numRows; ++i) {
230  for (IndexType j = 0; j < numCols; ++j) {
231  A(i,j) = alpha * A(i,j);
232  }
233  }
234  }
235 };
236 
242 template<class ViewType,
243  class CoefficientType,
244  class IndexType>
245 struct SCAL<ViewType, CoefficientType, Kokkos::LayoutRight, IndexType, 2> {
247  static KOKKOS_INLINE_FUNCTION void
248  run (const CoefficientType& alpha, const ViewType& A)
249  {
250  const IndexType N = A.size ();
251  typedef typename std::decay<decltype (A(0,0)) >::type scalar_type;
252  scalar_type* const A_raw = A.ptr_on_device ();
253 
254  for (IndexType i = 0; i < N; ++i) {
255  A_raw[i] = alpha * A_raw[i];
256  }
257  }
258 };
259 
260 
265 template<class ViewType,
266  class InputType,
267  class LayoutType = typename ViewType::array_layout,
268  class IndexType = int,
269  const int rank = ViewType::rank>
270 struct FILL {
271  static KOKKOS_INLINE_FUNCTION void
272  run (const ViewType& x, const InputType& val);
273 };
274 
277 template<class ViewType,
278  class InputType,
279  class LayoutType,
280  class IndexType>
281 struct FILL<ViewType, InputType, LayoutType, IndexType, 1> {
282  static KOKKOS_INLINE_FUNCTION void
283  run (const ViewType& x, const InputType& val)
284  {
285  const IndexType numRows = static_cast<IndexType> (x.dimension_0 ());
286  for (IndexType i = 0; i < numRows; ++i) {
287  x(i) = val;
288  }
289  }
290 };
291 
294 template<class ViewType,
295  class InputType,
296  class LayoutType,
297  class IndexType>
298 struct FILL<ViewType, InputType, LayoutType, IndexType, 2> {
299  static KOKKOS_INLINE_FUNCTION void
300  run (const ViewType& X, const InputType& val)
301  {
302  const IndexType numRows = static_cast<IndexType> (X.dimension_0 ());
303  const IndexType numCols = static_cast<IndexType> (X.dimension_1 ());
304  for (IndexType j = 0; j < numCols; ++j) {
305  for (IndexType i = 0; i < numRows; ++i) {
306  X(i,j) = val;
307  }
308  }
309  }
310 };
311 
316 template<class CoefficientType,
317  class ViewType1,
318  class ViewType2,
319  class LayoutType1 = typename ViewType1::array_layout,
320  class LayoutType2 = typename ViewType2::array_layout,
321  class IndexType = int,
322  const int rank = ViewType1::rank>
323 struct AXPY {
324  static KOKKOS_INLINE_FUNCTION void
325  run (const CoefficientType& alpha,
326  const ViewType1& x,
327  const ViewType2& y);
328 };
329 
332 template<class CoefficientType,
333  class ViewType1,
334  class ViewType2,
335  class LayoutType1,
336  class LayoutType2,
337  class IndexType>
338 struct AXPY<CoefficientType, ViewType1, ViewType2, LayoutType1, LayoutType2, IndexType, 1> {
340  static KOKKOS_INLINE_FUNCTION void
341  run (const CoefficientType& alpha,
342  const ViewType1& x,
343  const ViewType2& y)
344  {
345  static_assert (static_cast<int> (ViewType1::rank) == static_cast<int> (ViewType2::rank),
346  "AXPY: x and y must have the same rank.");
347  const IndexType numRows = static_cast<IndexType> (y.dimension_0 ());
348  if (alpha != 0.0) {
349  for (IndexType i = 0; i < numRows; ++i) {
350  y(i) += alpha * x(i);
351  }
352  }
353  }
354 };
355 
358 template<class CoefficientType,
359  class ViewType1,
360  class ViewType2,
361  class LayoutType1,
362  class LayoutType2,
363  class IndexType>
364 struct AXPY<CoefficientType, ViewType1, ViewType2, LayoutType1, LayoutType2, IndexType, 2> {
366  static KOKKOS_INLINE_FUNCTION void
367  run (const CoefficientType& alpha,
368  const ViewType1& X,
369  const ViewType2& Y)
370  {
371  static_assert (ViewType1::rank == ViewType2::rank,
372  "AXPY: X and Y must have the same rank.");
373  const IndexType numRows = static_cast<IndexType> (Y.dimension_0 ());
374  const IndexType numCols = static_cast<IndexType> (Y.dimension_1 ());
375 
376  if (alpha != 0.0) {
377  for (IndexType i = 0; i < numRows; ++i) {
378  for (IndexType j = 0; j < numCols; ++j) {
379  Y(i,j) += alpha * X(i,j);
380  }
381  }
382  }
383  }
384 };
385 
389 template<class CoefficientType,
390  class ViewType1,
391  class ViewType2,
392  class IndexType>
393 struct AXPY<CoefficientType, ViewType1, ViewType2, Kokkos::LayoutRight, Kokkos::LayoutRight, IndexType, 2> {
395  static KOKKOS_INLINE_FUNCTION void
396  run (const CoefficientType& alpha,
397  const ViewType1& X,
398  const ViewType2& Y)
399  {
400  static_assert (static_cast<int> (ViewType1::rank) == static_cast<int> (ViewType2::rank),
401  "AXPY: X and Y must have the same rank.");
402  typedef typename std::decay<decltype (X(0,0)) >::type SX;
403  typedef typename std::decay<decltype (Y(0,0)) >::type SY;
404 
405  const IndexType N = static_cast<IndexType> (Y.size ());
406  const SX* const X_raw = X.ptr_on_device ();
407  SY* const Y_raw = Y.ptr_on_device ();
408 
409  if (alpha != 0.0) {
410  for (IndexType i = 0; i < N; ++i) {
411  Y_raw[i] += alpha * X_raw[i];
412  }
413  }
414  }
415 };
416 
420 template<class CoefficientType,
421  class ViewType1,
422  class ViewType2,
423  class IndexType>
424 struct AXPY<CoefficientType, ViewType1, ViewType2, Kokkos::LayoutLeft, Kokkos::LayoutLeft, IndexType, 2> {
426  static KOKKOS_INLINE_FUNCTION void
427  run (const CoefficientType& alpha,
428  const ViewType1& X,
429  const ViewType2& Y)
430  {
431  static_assert (ViewType1::rank == ViewType2::rank,
432  "AXPY: X and Y must have the same rank.");
433  typedef typename std::decay<decltype (X(0,0)) >::type SX;
434  typedef typename std::decay<decltype (Y(0,0)) >::type SY;
435 
436  const IndexType N = static_cast<IndexType> (Y.size ());
437  const SX* const X_raw = X.ptr_on_device ();
438  SY* const Y_raw = Y.ptr_on_device ();
439 
440  if (alpha != 0.0) {
441  for (IndexType i = 0; i < N; ++i) {
442  Y_raw[i] += alpha * X_raw[i];
443  }
444  }
445  }
446 };
447 
452 template<class ViewType1,
453  class ViewType2,
454  class LayoutType1 = typename ViewType1::array_layout,
455  class LayoutType2 = typename ViewType2::array_layout,
456  class IndexType = int,
457  const int rank = ViewType1::rank>
458 struct COPY {
459  static KOKKOS_INLINE_FUNCTION void
460  run (const ViewType1& x, const ViewType2& y);
461 };
462 
465 template<class ViewType1,
466  class ViewType2,
467  class LayoutType1,
468  class LayoutType2,
469  class IndexType>
470 struct COPY<ViewType1, ViewType2, LayoutType1, LayoutType2, IndexType, 1> {
472  static KOKKOS_INLINE_FUNCTION void
473  run (const ViewType1& x, const ViewType2& y)
474  {
475  const IndexType numRows = static_cast<IndexType> (x.dimension_0 ());
476  for (IndexType i = 0; i < numRows; ++i) {
477  y(i) = x(i);
478  }
479  }
480 };
481 
484 template<class ViewType1,
485  class ViewType2,
486  class LayoutType1,
487  class LayoutType2,
488  class IndexType>
489 struct COPY<ViewType1, ViewType2, LayoutType1, LayoutType2, IndexType, 2> {
491  static KOKKOS_INLINE_FUNCTION void
492  run (const ViewType1& X, const ViewType2& Y)
493  {
494  const IndexType numRows = static_cast<IndexType> (Y.dimension_0 ());
495  const IndexType numCols = static_cast<IndexType> (Y.dimension_1 ());
496 
497  // BLAS _SCAL doesn't check whether alpha is 0.
498  for (IndexType i = 0; i < numRows; ++i) {
499  for (IndexType j = 0; j < numCols; ++j) {
500  Y(i,j) = X(i,j);
501  }
502  }
503  }
504 };
505 
509 template<class ViewType1,
510  class ViewType2,
511  class IndexType>
512 struct COPY<ViewType1, ViewType2, Kokkos::LayoutRight, Kokkos::LayoutRight, IndexType, 2> {
514  static KOKKOS_INLINE_FUNCTION void
515  run (const ViewType1& X, const ViewType2& Y)
516  {
517  typedef typename std::decay<decltype (X(0,0)) >::type SX;
518  typedef typename std::decay<decltype (Y(0,0)) >::type SY;
519 
520  const IndexType N = static_cast<IndexType> (Y.size ());
521  const SX* const X_raw = X.ptr_on_device ();
522  SY* const Y_raw = Y.ptr_on_device ();
523 
524  // BLAS _SCAL doesn't check whether alpha is 0.
525  for (IndexType i = 0; i < N; ++i) {
526  Y_raw[i] = X_raw[i];
527  }
528  }
529 };
530 
534 template<class ViewType1,
535  class ViewType2,
536  class IndexType>
537 struct COPY<ViewType1, ViewType2, Kokkos::LayoutLeft, Kokkos::LayoutLeft, IndexType, 2> {
539  static KOKKOS_INLINE_FUNCTION void
540  run (const ViewType1& X, const ViewType2& Y)
541  {
542  typedef typename std::decay<decltype (X(0,0)) >::type SX;
543  typedef typename std::decay<decltype (Y(0,0)) >::type SY;
544 
545  const IndexType N = static_cast<IndexType> (Y.size ());
546  const SX* const X_raw = X.ptr_on_device ();
547  SY* const Y_raw = Y.ptr_on_device ();
548 
549  // BLAS _SCAL doesn't check whether alpha is 0.
550  for (IndexType i = 0; i < N; ++i) {
551  Y_raw[i] = X_raw[i];
552  }
553  }
554 };
555 
556 
557 template<class VecType1,
558  class BlkType,
559  class VecType2,
560  class CoeffType,
561  class IndexType = int,
562  class VecLayoutType1 = typename VecType1::array_layout,
563  class BlkLayoutType = typename BlkType::array_layout,
564  class VecLayoutType2 = typename VecType2::array_layout>
565 struct GEMV {
566  static KOKKOS_INLINE_FUNCTION void
567  run (const CoeffType& alpha,
568  const BlkType& A,
569  const VecType1& x,
570  const VecType2& y)
571  {
572  static_assert (VecType1::rank == 1, "GEMV: VecType1 must have rank 1.");
573  static_assert (BlkType::rank == 2, "GEMV: BlkType must have rank 2.");
574  static_assert (VecType2::rank == 1, "GEMV: VecType2 must have rank 1.");
575  typedef typename std::decay<decltype (y(0)) >::type y_elt_type;
576 
577  const IndexType numRows = static_cast<IndexType> (A.dimension_0 ());
578  const IndexType numCols = static_cast<IndexType> (A.dimension_1 ());
579 
580  for (IndexType i = 0; i < numRows; ++i) {
581  y_elt_type y_i = y(i);
582  for (IndexType j = 0; j < numCols; ++j) {
583  y_i += alpha * A(i,j) * x(j);
584  }
585  y(i) = y_i;
586  }
587  }
588 };
589 
590 template<class VecType1,
591  class BlkType,
592  class VecType2,
593  class CoeffType,
594  class IndexType>
595 struct GEMV<VecType1, BlkType, VecType2, CoeffType, IndexType,
596  Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight>
597 {
598  static KOKKOS_INLINE_FUNCTION void
599  run (const CoeffType& alpha,
600  const BlkType& A,
601  const VecType1& x,
602  const VecType2& y)
603  {
604  static_assert (VecType1::rank == 1, "GEMV: VecType1 must have rank 1.");
605  static_assert (BlkType::rank == 2, "GEMV: BlkType must have rank 2.");
606  static_assert (VecType2::rank == 1, "GEMV: VecType2 must have rank 1.");
607  typedef typename std::decay<decltype (y(0)) >::type y_elt_type;
608  typedef typename std::decay<decltype (A(0,0)) >::type A_elt_type;
609 
610  const IndexType numRows = static_cast<IndexType> (A.dimension_0 ());
611  const IndexType numCols = static_cast<IndexType> (A.dimension_1 ());
612  const A_elt_type* const A_raw = A.ptr_on_device ();
613 
614  for (IndexType i = 0; i < numRows; ++i) {
615  y_elt_type y_i = y(i);
616  const A_elt_type* const A_i = A_raw + i*numCols;
617  for (IndexType j = 0; j < numCols; ++j) {
618  y_i += alpha * A_i[j] * x(j);
619  }
620  y(i) = y_i;
621  }
622  }
623 };
624 
625 template<class VecType1,
626  class BlkType,
627  class VecType2,
628  class CoeffType,
629  class IndexType>
630 struct GEMV<VecType1, BlkType, VecType2, CoeffType, IndexType,
631  Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft>
632 {
633  static KOKKOS_INLINE_FUNCTION void
634  run (const CoeffType& alpha,
635  const BlkType& A,
636  const VecType1& x,
637  const VecType2& y)
638  {
639  static_assert (VecType1::rank == 1, "GEMV: VecType1 must have rank 1.");
640  static_assert (BlkType::rank == 2, "GEMV: BlkType must have rank 2.");
641  static_assert (VecType2::rank == 1, "GEMV: VecType2 must have rank 1.");
642  typedef typename std::decay<decltype (A(0,0)) >::type A_elt_type;
643 
644  const A_elt_type* const A_raw = A.ptr_on_device ();
645  const IndexType numRows = static_cast<IndexType> (A.dimension_0 ());
646  const IndexType numCols = static_cast<IndexType> (A.dimension_1 ());
647  for (IndexType j = 0; j < numCols; ++j) {
648  const A_elt_type* const A_j = A_raw + j*numRows;
649  for (IndexType i = 0; i < numRows; ++i) {
650  y(i) += alpha * A_j[i] * x(i);
651  }
652  }
653  }
654 };
655 
656 } // namespace Impl
657 
660 template<class ViewType,
661  class CoefficientType,
662  class LayoutType = typename ViewType::array_layout,
663  class IndexType = int,
664  const int rank = ViewType::rank>
665 KOKKOS_INLINE_FUNCTION void
666 SCAL (const CoefficientType& alpha, const ViewType& x)
667 {
669 }
670 
672 template<class ViewType,
673  class InputType,
674  class LayoutType = typename ViewType::array_layout,
675  class IndexType = int,
676  const int rank = ViewType::rank>
677 KOKKOS_INLINE_FUNCTION void
678 FILL (const ViewType& x, const InputType& val)
679 {
681 }
682 
688 template<class CoefficientType,
689  class ViewType1,
690  class ViewType2,
691  class LayoutType1 = typename ViewType1::array_layout,
692  class LayoutType2 = typename ViewType2::array_layout,
693  class IndexType = int,
694  const int rank = ViewType1::rank>
695 KOKKOS_INLINE_FUNCTION void
696 AXPY (const CoefficientType& alpha,
697  const ViewType1& x,
698  const ViewType2& y)
699 {
700  static_assert (static_cast<int> (ViewType1::rank) ==
701  static_cast<int> (ViewType2::rank),
702  "AXPY: x and y must have the same rank.");
703  Impl::AXPY<CoefficientType, ViewType1, ViewType2, LayoutType1, LayoutType2,
704  IndexType, rank>::run (alpha, x, y);
705 }
706 
715 template<class ViewType1,
716  class ViewType2,
717  class LayoutType1 = typename ViewType1::array_layout,
718  class LayoutType2 = typename ViewType2::array_layout,
719  class IndexType = int,
720  const int rank = ViewType1::rank>
721 KOKKOS_INLINE_FUNCTION void
722 COPY (const ViewType1& x, const ViewType2& y)
723 {
724  static_assert (static_cast<int> (ViewType1::rank) ==
725  static_cast<int> (ViewType2::rank),
726  "COPY: x and y must have the same rank.");
727  Impl::COPY<ViewType1, ViewType2, LayoutType1, LayoutType2, IndexType,
728  rank>::run (x, y);
729 }
730 
742 template<class VecType1,
743  class BlkType,
744  class VecType2,
745  class CoeffType,
746  class IndexType = int>
747 KOKKOS_INLINE_FUNCTION void
748 GEMV (const CoeffType& alpha,
749  const BlkType& A,
750  const VecType1& x,
751  const VecType2& y)
752 {
753  Impl::GEMV<VecType1, BlkType, VecType2, CoeffType,
754  IndexType>::run (alpha, A, x, y);
755 }
756 
764 template<class ViewType1,
765  class ViewType2,
766  class ViewType3,
767  class CoefficientType,
768  class IndexType = int>
769 KOKKOS_INLINE_FUNCTION void
770 GEMM (const char transA[],
771  const char transB[],
772  const CoefficientType& alpha,
773  const ViewType1& A,
774  const ViewType2& B,
775  const CoefficientType& beta,
776  const ViewType3& C)
777 {
778  // Assert that A, B, and C are in fact matrices
779  static_assert (ViewType1::rank == 2, "GEMM: A must have rank 2 (be a matrix).");
780  static_assert (ViewType2::rank == 2, "GEMM: B must have rank 2 (be a matrix).");
781  static_assert (ViewType3::rank == 2, "GEMM: C must have rank 2 (be a matrix).");
782 
783  typedef typename std::remove_reference<decltype (A(0,0))>::type Scalar;
784  typedef Kokkos::Details::ArithTraits<Scalar> STS;
785  const Scalar ZERO = STS::zero();
786  const Scalar ONE = STS::one();
787 
788  // Get the dimensions
789  IndexType m, n, k;
790  if(transA[0] == 'N' || transA[0] == 'n') {
791  m = static_cast<IndexType> (A.dimension_0 ());
792  n = static_cast<IndexType> (A.dimension_1 ());
793  }
794  else {
795  m = static_cast<IndexType> (A.dimension_1 ());
796  n = static_cast<IndexType> (A.dimension_0 ());
797  }
798  k = static_cast<IndexType> (C.dimension_1 ());
799 
800  // quick return if possible
801  if(alpha == ZERO && beta == ONE)
802  return;
803 
804  // And if alpha equals zero...
805  if(alpha == ZERO) {
806  if(beta == ZERO) {
807  for(IndexType i=0; i<m; i++) {
808  for(IndexType j=0; j<k; j++) {
809  C(i,j) = ZERO;
810  }
811  }
812  }
813  else {
814  for(IndexType i=0; i<m; i++) {
815  for(IndexType j=0; j<k; j++) {
816  C(i,j) = beta*C(i,j);
817  }
818  }
819  }
820  }
821 
822  // Start the operations
823  if(transB[0] == 'n' || transB[0] == 'N') {
824  if(transA[0] == 'n' || transA[0] == 'N') {
825  // Form C = alpha*A*B + beta*C
826  for(IndexType j=0; j<n; j++) {
827  if(beta == ZERO) {
828  for(IndexType i=0; i<m; i++) {
829  C(i,j) = ZERO;
830  }
831  }
832  else if(beta != ONE) {
833  for(IndexType i=0; i<m; i++) {
834  C(i,j) = beta*C(i,j);
835  }
836  }
837  for(IndexType l=0; l<k; l++) {
838  Scalar temp = alpha*B(l,j);
839  for(IndexType i=0; i<m; i++) {
840  C(i,j) = C(i,j) + temp*A(i,l);
841  }
842  }
843  }
844  }
845  else {
846  // Form C = alpha*A**T*B + beta*C
847  for(IndexType j=0; j<n; j++) {
848  for(IndexType i=0; i<m; i++) {
849  Scalar temp = ZERO;
850  for(IndexType l=0; l<k; l++) {
851  temp = temp + A(l,i)*B(l,j);
852  }
853  if(beta == ZERO) {
854  C(i,j) = alpha*temp;
855  }
856  else {
857  C(i,j) = alpha*temp + beta*C(i,j);
858  }
859  }
860  }
861  }
862  }
863  else {
864  if(transA[0] == 'n' || transA[0] == 'N') {
865  // Form C = alpha*A*B**T + beta*C
866  for(IndexType j=0; j<n; j++) {
867  if(beta == ZERO) {
868  for(IndexType i=0; i<m; i++) {
869  C(i,j) = ZERO;
870  }
871  }
872  else if(beta != ONE) {
873  for(IndexType i=0; i<m; i++) {
874  C(i,j) = beta*C(i,j);
875  }
876  }
877  for(IndexType l=0; l<k; l++) {
878  Scalar temp = alpha*B(j,l);
879  for(IndexType i=0; i<m; i++) {
880  C(i,j) = C(i,j) + temp*A(i,l);
881  }
882  }
883  }
884  }
885  else {
886  // Form C = alpha*A**T*B**T + beta*C
887  for(IndexType j=0; j<n; j++) {
888  for(IndexType i=0; i<m; i++) {
889  Scalar temp = ZERO;
890  for(IndexType l=0; l<k; l++) {
891  temp = temp + A(l,i)*B(j,l);
892  }
893  if(beta == ZERO) {
894  C(i,j) = alpha*temp;
895  }
896  else {
897  C(i,j) = alpha*temp + beta*C(i,j);
898  }
899  }
900  }
901  }
902  }
903 }
904 
906 template<class LittleBlockType,
907  class LittleVectorType>
908 KOKKOS_INLINE_FUNCTION void
909 GETF2 (const LittleBlockType& A, const LittleVectorType& ipiv, int& info)
910 {
911  // The type of an entry of ipiv is the index type.
912  typedef typename std::decay<decltype (ipiv(0)) >::type IndexType;
913  static_assert (std::is_integral<IndexType>::value,
914  "GETF2: The type of each entry of ipiv must be an integer type.");
915  typedef typename std::remove_reference<decltype (A(0,0))>::type Scalar;
916  static_assert (! std::is_const<Scalar>::value,
917  "GETF2: A must not be a const View (or LittleBlock).");
918  static_assert (! std::is_const<std::remove_reference<decltype (ipiv(0))>>::value,
919  "GETF2: ipiv must not be a const View (or LittleBlock).");
920  static_assert (LittleBlockType::rank == 2, "GETF2: A must have rank 2 (be a matrix).");
921  typedef Kokkos::Details::ArithTraits<Scalar> STS;
922  const Scalar ZERO = STS::zero();
923 
924  const IndexType numRows = static_cast<IndexType> (A.dimension_0 ());
925  const IndexType numCols = static_cast<IndexType> (A.dimension_1 ());
926  const IndexType pivDim = static_cast<IndexType> (ipiv.dimension_0 ());
927 
928  // std::min is not a CUDA device function
929  const IndexType minPivDim = (numRows < numCols) ? numRows : numCols;
930  if (pivDim < minPivDim) {
931  info = -2;
932  return;
933  }
934 
935  // Initialize info
936  info = 0;
937 
938  for(IndexType j=0; j < pivDim; j++)
939  {
940  // Find pivot and test for singularity
941  IndexType jp = j;
942  for(IndexType i=j+1; i<numRows; i++)
943  {
944  if(STS::abs(A(i,j)) > STS::abs(A(jp,j))) {
945  jp = i;
946  }
947  }
948  ipiv(j) = jp+1;
949 
950  if(A(jp,j) != ZERO)
951  {
952  // Apply the interchange to columns 1:N
953  if(jp != j)
954  {
955  for(IndexType i=0; i < numCols; i++)
956  {
957  Scalar temp = A(jp,i);
958  A(jp,i) = A(j,i);
959  A(j,i) = temp;
960  }
961  }
962 
963  // Compute elements J+1:M of J-th column
964  for(IndexType i=j+1; i<numRows; i++) {
965  A(i,j) = A(i,j) / A(j,j);
966  }
967  }
968  else if(info == 0) {
969  info = j;
970  }
971 
972  // Update trailing submatrix
973  for(IndexType r=j+1; r < numRows; r++)
974  {
975  for(IndexType c=j+1; c < numCols; c++) {
976  A(r,c) = A(r,c) - A(r,j) * A(j,c);
977  }
978  }
979  }
980 }
981 
982 namespace Impl {
983 
987 template<class LittleBlockType,
988  class LittleIntVectorType,
989  class LittleScalarVectorType,
990  const int rank = LittleScalarVectorType::rank>
991 struct GETRS {
992  static KOKKOS_INLINE_FUNCTION void
993  run (const char mode[],
994  const LittleBlockType& A,
995  const LittleIntVectorType& ipiv,
996  const LittleScalarVectorType& B,
997  int& info);
998 };
999 
1001 template<class LittleBlockType,
1002  class LittleIntVectorType,
1003  class LittleScalarVectorType>
1004 struct GETRS<LittleBlockType, LittleIntVectorType, LittleScalarVectorType, 1> {
1005  static KOKKOS_INLINE_FUNCTION void
1006  run (const char mode[],
1007  const LittleBlockType& A,
1008  const LittleIntVectorType& ipiv,
1009  const LittleScalarVectorType& B,
1010  int& info)
1011  {
1012  // The type of an entry of ipiv is the index type.
1013  typedef typename std::decay<decltype (ipiv(0))>::type IndexType;
1014  // IndexType must be signed, because this code does a countdown loop
1015  // to zero. Unsigned integers are always >= 0, even on underflow.
1016  static_assert (std::is_integral<IndexType>::value &&
1017  std::is_signed<IndexType>::value,
1018  "GETRS: The type of each entry of ipiv must be a signed integer.");
1019  typedef typename std::decay<decltype (A(0,0))>::type Scalar;
1020  static_assert (! std::is_const<std::remove_reference<decltype (B(0))>>::value,
1021  "GETRS: B must not be a const View (or LittleBlock).");
1022  static_assert (LittleBlockType::rank == 2, "GETRS: A must have rank 2 (be a matrix).");
1023  static_assert (LittleIntVectorType::rank == 1, "GETRS: ipiv must have rank 1.");
1024  static_assert (LittleScalarVectorType::rank == 1, "GETRS: For this specialization, B must have rank 1.");
1025 
1026  typedef Kokkos::Details::ArithTraits<Scalar> STS;
1027  const Scalar ZERO = STS::zero();
1028  const IndexType numRows = static_cast<IndexType> (A.dimension_0 ());
1029  const IndexType numCols = static_cast<IndexType> (A.dimension_1 ());
1030  const IndexType pivDim = static_cast<IndexType> (ipiv.dimension_0 ());
1031 
1032  info = 0;
1033 
1034  // Ensure that the matrix is square
1035  if (numRows != numCols) {
1036  info = -2;
1037  return;
1038  }
1039 
1040  // Ensure that the pivot array is sufficiently large
1041  if (pivDim < numRows) {
1042  info = -3;
1043  return;
1044  }
1045 
1046  // No transpose case
1047  if(mode[0] == 'n' || mode[0] == 'N') {
1048  // Apply row interchanges to the RHS
1049  for(IndexType i=0; i<numRows; i++) {
1050  if(ipiv(i) != i+1) {
1051  Scalar temp = B(i);
1052  B(i) = B(ipiv(i)-1);
1053  B(ipiv(i)-1) = temp;
1054  }
1055  }
1056 
1057  // Solve Lx=b, overwriting b with x
1058  for(IndexType r=1; r < numRows; r++) {
1059  for(IndexType c=0; c < r; c++) {
1060  B(r) = B(r) - A(r,c)*B(c);
1061  }
1062  }
1063 
1064  // Solve Ux=b, overwriting b with x
1065  for(IndexType r=numRows-1; r >= 0; r--) {
1066  // Check whether U is singular
1067  if(A(r,r) == ZERO) {
1068  info = r+1;
1069  return;
1070  }
1071 
1072  for(IndexType c=r+1; c < numCols; c++) {
1073  B(r) = B(r) - A(r,c)*B(c);
1074  }
1075  B(r) = B(r) / A(r,r);
1076  }
1077  }
1078  // Transpose case
1079  else if(mode[0] == 't' || mode[0] == 'T') {
1080  info = -1; // NOT YET IMPLEMENTED
1081  return;
1082  }
1083  // Conjugate transpose case
1084  else if (mode[0] == 'c' || mode[0] == 'C') {
1085  info = -1; // NOT YET IMPLEMENTED
1086  return;
1087  }
1088  else { // invalid mode
1089  info = -1;
1090  return;
1091  }
1092  }
1093 };
1094 
1095 
1097 template<class LittleBlockType,
1098  class LittleIntVectorType,
1099  class LittleScalarVectorType>
1100 struct GETRS<LittleBlockType, LittleIntVectorType, LittleScalarVectorType, 2> {
1101  static KOKKOS_INLINE_FUNCTION void
1102  run (const char mode[],
1103  const LittleBlockType& A,
1104  const LittleIntVectorType& ipiv,
1105  const LittleScalarVectorType& B,
1106  int& info)
1107  {
1108  // The type of an entry of ipiv is the index type.
1109  typedef typename std::decay<decltype (ipiv(0)) >::type IndexType;
1110  static_assert (std::is_integral<IndexType>::value,
1111  "GETRS: The type of each entry of ipiv must be an integer type.");
1112  static_assert (! std::is_const<std::remove_reference<decltype (B(0)) > >::value,
1113  "GETRS: B must not be a const View (or LittleBlock).");
1114  static_assert (LittleBlockType::rank == 2, "GETRS: A must have rank 2 (be a matrix).");
1115  static_assert (LittleIntVectorType::rank == 1, "GETRS: ipiv must have rank 1.");
1116  static_assert (LittleScalarVectorType::rank == 2, "GETRS: For this specialization, B must have rank 2.");
1117 
1118  // The current implementation iterates over one right-hand side at
1119  // a time. It might be faster to do this differently, but this
1120  // should work for now.
1121  const IndexType numRhs = B.dimension_1 ();
1122  info = 0;
1123 
1124  for (IndexType rhs = 0; rhs < numRhs; ++rhs) {
1125  auto B_cur = Kokkos::subview (B, Kokkos::ALL (), rhs);
1127  if (info != 0) {
1128  return;
1129  }
1130  }
1131  }
1132 };
1133 
1134 } // namespace Impl
1135 
1139 template<class LittleBlockType,
1140  class LittleIntVectorType,
1141  class LittleScalarVectorType>
1142 KOKKOS_INLINE_FUNCTION void
1143 GETRS (const char mode[],
1144  const LittleBlockType& A,
1145  const LittleIntVectorType& ipiv,
1146  const LittleScalarVectorType& B,
1147  int& info)
1148 {
1149  Impl::GETRS<LittleBlockType, LittleIntVectorType, LittleScalarVectorType,
1150  LittleScalarVectorType::rank>::run (mode, A, ipiv, B, info);
1151 }
1152 
1153 
1168 template<class LittleBlockType,
1169  class LittleIntVectorType,
1170  class LittleScalarVectorType>
1171 KOKKOS_INLINE_FUNCTION void
1172 GETRI (const LittleBlockType& A,
1173  const LittleIntVectorType& ipiv,
1174  const LittleScalarVectorType& work,
1175  int& info)
1176 {
1177  // The type of an entry of ipiv is the index type.
1178  typedef typename std::decay<decltype (ipiv(0))>::type IndexType;
1179  // IndexType must be signed, because this code does a countdown loop
1180  // to zero. Unsigned integers are always >= 0, even on underflow.
1181  static_assert (std::is_integral<IndexType>::value &&
1182  std::is_signed<IndexType>::value,
1183  "GETRI: The type of each entry of ipiv must be a signed integer.");
1184  typedef typename std::remove_reference<decltype (A(0,0))>::type Scalar;
1185  static_assert (! std::is_const<std::remove_reference<decltype (A(0,0))>>::value,
1186  "GETRI: A must not be a const View (or LittleBlock).");
1187  static_assert (! std::is_const<std::remove_reference<decltype (work(0))>>::value,
1188  "GETRI: work must not be a const View (or LittleBlock).");
1189  static_assert (LittleBlockType::rank == 2, "GETRI: A must have rank 2 (be a matrix).");
1190  typedef Kokkos::Details::ArithTraits<Scalar> STS;
1191  const Scalar ZERO = STS::zero();
1192  const Scalar ONE = STS::one();
1193 
1194  const IndexType numRows = static_cast<IndexType> (A.dimension_0 ());
1195  const IndexType numCols = static_cast<IndexType> (A.dimension_1 ());
1196  const IndexType pivDim = static_cast<IndexType> (ipiv.dimension_0 ());
1197  const IndexType workDim = static_cast<IndexType> (work.dimension_0 ());
1198 
1199  info = 0;
1200 
1201  // Ensure that the matrix is square
1202  if (numRows != numCols) {
1203  info = -1;
1204  return;
1205  }
1206 
1207  // Ensure that the pivot array is sufficiently large
1208  if (pivDim < numRows) {
1209  info = -2;
1210  return;
1211  }
1212 
1213  // Ensure that the work array is sufficiently large
1214  if (workDim < numRows) {
1215  info = -3;
1216  return;
1217  }
1218 
1219  // Form Uinv in place
1220  for(IndexType j=0; j < numRows; j++) {
1221  if(A(j,j) == ZERO) {
1222  info = j+1;
1223  return;
1224  }
1225 
1226  A(j,j) = ONE / A(j,j);
1227 
1228  // Compute elements 1:j-1 of j-th column
1229  for(IndexType r=0; r < j; r++) {
1230  A(r,j) = A(r,r)*A(r,j);
1231  for(IndexType c=r+1; c < j; c++) {
1232  A(r,j) = A(r,j) + A(r,c)*A(c,j);
1233  }
1234  }
1235  for(IndexType r=0; r < j; r++) {
1236  A(r,j) = -A(j,j)*A(r,j);
1237  }
1238  }
1239 
1240  // Compute Ainv by solving A\L = Uinv
1241  for(IndexType j = numCols-2; j >= 0; j--) {
1242  // Copy lower triangular data to work array and replace with 0
1243  for(IndexType r=j+1; r < numRows; r++) {
1244  work(r) = A(r,j);
1245  A(r,j) = 0;
1246  }
1247 
1248  for(IndexType r=0; r < numRows; r++) {
1249  for(IndexType i=j+1; i < numRows; i++) {
1250  A(r,j) = A(r,j) - work(i)*A(r,i);
1251  }
1252  }
1253  }
1254 
1255  // Apply column interchanges
1256  for(IndexType j=numRows-1; j >= 0; j--) {
1257  IndexType jp = ipiv(j)-1;
1258  if(j != jp) {
1259  for(IndexType r=0; r < numRows; r++) {
1260  Scalar temp = A(r,j);
1261  A(r,j) = A(r,jp);
1262  A(r,jp) = temp;
1263  }
1264  }
1265  }
1266 }
1267 
1268 
1269 // mfh 08 Nov 2015: I haven't tested this overload yet. It also needs
1270 // an implementation for trans != 'N' (the transpose and conjugate
1271 // transpose cases).
1272 #if 0
1273 template<class LittleBlockType,
1274  class LittleVectorTypeX,
1275  class LittleVectorTypeY,
1276  class CoefficientType,
1277  class IndexType = int>
1278 KOKKOS_INLINE_FUNCTION void
1279 GEMV (const char trans,
1280  const CoefficientType& alpha,
1281  const LittleBlockType& A,
1282  const LittleVectorTypeX& x,
1283  const CoefficientType& beta,
1284  const LittleVectorTypeY& y)
1285 {
1286  // y(0) returns a reference to the 0-th entry of y. Remove that
1287  // reference to get the type of each entry of y. It's OK if y has
1288  // zero entries -- this doesn't actually do y(i), it just returns
1289  // the type of that expression.
1290  typedef typename std::remove_reference<decltype (y(0)) >::type y_value_type;
1291  const IndexType numRows = static_cast<IndexType> (A.dimension_0 ());
1292  const IndexType numCols = static_cast<IndexType> (A.dimension_1 ());
1293 
1294  if (beta == 0.0) {
1295  if (alpha == 0.0) {
1296  for (IndexType i = 0; i < numRows; ++i) {
1297  y(i) = 0.0;
1298  }
1299  }
1300  else {
1301  for (IndexType i = 0; i < numRows; ++i) {
1302  y_value_type y_i = 0.0;
1303  for (IndexType j = 0; j < numCols; ++j) {
1304  y_i += A(i,j) * x(j);
1305  }
1306  y(i) = y_i;
1307  }
1308  }
1309  }
1310  else { // beta != 0
1311  if (alpha == 0.0) {
1312  if (beta == 0.0) {
1313  for (IndexType i = 0; i < numRows; ++i) {
1314  y(i) = 0.0;
1315  }
1316  }
1317  else {
1318  for (IndexType i = 0; i < numRows; ++i) {
1319  y(i) *= beta;
1320  }
1321  }
1322  }
1323  else {
1324  for (IndexType i = 0; i < numRows; ++i) {
1325  y_value_type y_i = beta * y(i);
1326  for (IndexType j = 0; j < numCols; ++j) {
1327  y_i += alpha * A(i,j) * x(j);
1328  }
1329  y(i) = y_i;
1330  }
1331  }
1332  }
1333 }
1334 
1335 #endif // 0
1336 
1337 } // namespace Experimental
1338 } // namespace Tpetra
1339 
1340 #endif // TPETRA_EXPERIMENTAL_BLOCKVIEW_HPP
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType1 &X, const ViewType2 &Y)
Y := Y + alpha*X (rank-2 X and Y, i.e., matrices)
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha*x (rank-1 x and y, i.e., vectors)
Implementation of Tpetra&#39;s ABSMAX CombineMode for the small dense blocks in BlockCrsMatrix, or the small dense vectors in BlockMultiVector and BlockVector.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType1 &X, const ViewType2 &Y)
Y := Y + alpha*X (rank-2 X and Y, i.e., matrices)
static KOKKOS_INLINE_FUNCTION void run(const ViewType1 &x, const ViewType2 &y)
y := x (rank-1 x and y, i.e., vectors)
static KOKKOS_INLINE_FUNCTION void run(const ViewType2 &Y, const ViewType1 &X)
(*this)(i,j) := max(abs((*this)(i,j)), abs(X(i,j))) for all (i,j).
KOKKOS_INLINE_FUNCTION void AXPY(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha * x (dense vector or matrix update)
KOKKOS_INLINE_FUNCTION void GEMV(const CoeffType &alpha, const BlkType &A, const VecType1 &x, const VecType2 &y)
y := y + alpha * A * x (dense matrix-vector multiply)
KOKKOS_INLINE_FUNCTION void SCAL(const CoefficientType &alpha, const ViewType &x)
x := alpha*x, where x is either rank 1 (a vector) or rank 2 (a matrix).
static KOKKOS_INLINE_FUNCTION void run(const ViewType1 &X, const ViewType2 &Y)
Y := X (rank-2 X and Y, i.e., matrices)
KOKKOS_INLINE_FUNCTION void GETRI(const LittleBlockType &A, const LittleIntVectorType &ipiv, const LittleScalarVectorType &work, int &info)
Compute inverse of A, using result of GETRF or GETF2.
KOKKOS_INLINE_FUNCTION void FILL(const ViewType &x, const InputType &val)
Set every entry of x to val.
Implementation of Tpetra::Experimental::FILL function.
Implementation of Tpetra::Experimental::SCAL function.
KOKKOS_INLINE_FUNCTION void absMax(const ViewType2 &Y, const ViewType1 &X)
Implementation of Tpetra&#39;s ABSMAX CombineMode for the small dense blocks in BlockCrsMatrix, and the small dense vectors in BlockMultiVector and BlockVector.
KOKKOS_INLINE_FUNCTION void COPY(const ViewType1 &x, const ViewType2 &y)
Deep copy x into y, where x and y are either rank 1 (vectors) or rank 2 (matrices) with the same dime...
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType &A)
A := alpha*A (rank-2 A, i.e., a matrix)
Replace old values with zero.
KOKKOS_INLINE_FUNCTION void GETF2(const LittleBlockType &A, const LittleVectorType &ipiv, int &info)
Computes A = P*L*U.
static KOKKOS_INLINE_FUNCTION void run(const ViewType1 &X, const ViewType2 &Y)
Y := X (rank-2 X and Y, i.e., matrices)
KOKKOS_INLINE_FUNCTION void GEMM(const char transA[], const char transB[], const CoefficientType &alpha, const ViewType1 &A, const ViewType2 &B, const CoefficientType &beta, const ViewType3 &C)
Small dense matrix-matrix multiply: C := alpha*A*B + beta*C
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType1 &X, const ViewType2 &Y)
Y := Y + alpha*X (rank-2 X and Y, i.e., matrices)
static KOKKOS_INLINE_FUNCTION void run(const ViewType2 &Y, const ViewType1 &X)
(*this)(i) := max(abs((*this)(i)), abs(X(i))) for all i.
KOKKOS_INLINE_FUNCTION void GETRS(const char mode[], const LittleBlockType &A, const LittleIntVectorType &ipiv, const LittleScalarVectorType &B, int &info)
Solve the linear system(s) AX=B, using the result of GETRF or GETF2.
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType &x)
x := alpha*x (rank-1 x, i.e., a vector)
Implementation of Tpetra::Experimental::COPY function.
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType &A)
A := alpha*A (rank-2 A, i.e., a matrix)
Implementation of Tpetra::Experimental::AXPY function.
static KOKKOS_INLINE_FUNCTION void run(const ViewType1 &X, const ViewType2 &Y)
Y := X (rank-2 X and Y, i.e., matrices)