42 #ifndef TPETRA_EXPERIMENTAL_BLOCKVIEW_HPP 43 #define TPETRA_EXPERIMENTAL_BLOCKVIEW_HPP 53 #include "TpetraCore_config.h" 54 #include "Kokkos_ArithTraits.hpp" 55 #include "Kokkos_Complex.hpp" 70 namespace Experimental {
80 template<
class ViewType1,
82 const int rank1 = ViewType1::rank>
84 static KOKKOS_INLINE_FUNCTION
void 85 run (
const ViewType2& Y,
const ViewType1& X);
92 template<
class ViewType1,
94 struct AbsMax<ViewType1, ViewType2, 2> {
97 static KOKKOS_INLINE_FUNCTION
void 98 run (
const ViewType2& Y,
const ViewType1& X)
100 static_assert (static_cast<int> (ViewType1::rank) == static_cast<int> (ViewType2::rank),
101 "AbsMax: ViewType1 and ViewType2 must have the same rank.");
102 typedef typename std::remove_reference<decltype (Y(0,0)) >::type STY;
103 static_assert(! std::is_const<STY>::value,
104 "AbsMax: The type of each entry of Y must be nonconst.");
105 typedef typename std::decay<decltype (X(0,0)) >::type STX;
106 static_assert( std::is_same<STX, STY>::value,
107 "AbsMax: The type of each entry of X and Y must be the same.");
108 typedef Kokkos::Details::ArithTraits<STY> KAT;
110 const int numCols = Y.dimension_1 ();
111 const int numRows = Y.dimension_0 ();
112 for (
int j = 0; j < numCols; ++j) {
113 for (
int i = 0; i < numRows; ++i) {
115 const STX X_ij = X(i,j);
118 const auto Y_ij_abs = KAT::abs (Y_ij);
119 const auto X_ij_abs = KAT::abs (X_ij);
120 Y_ij = (Y_ij_abs >= X_ij_abs) ?
121 static_cast<STY> (Y_ij_abs) :
122 static_cast<STY
> (X_ij_abs);
132 template<
class ViewType1,
137 static KOKKOS_INLINE_FUNCTION
void 138 run (
const ViewType2& Y,
const ViewType1& X)
140 static_assert (static_cast<int> (ViewType1::rank) == static_cast<int> (ViewType2::rank),
141 "AbsMax: ViewType1 and ViewType2 must have the same rank.");
143 typedef typename std::remove_reference<decltype (Y(0)) >::type STY;
144 static_assert(! std::is_const<STY>::value,
145 "AbsMax: The type of each entry of Y must be nonconst.");
146 typedef typename std::remove_const<typename std::remove_reference<decltype (X(0)) >::type>::type STX;
147 static_assert( std::is_same<STX, STY>::value,
148 "AbsMax: The type of each entry of X and Y must be the same.");
149 typedef Kokkos::Details::ArithTraits<STY> KAT;
151 const int numRows = Y.dimension_0 ();
152 for (
int i = 0; i < numRows; ++i) {
154 const STX X_i = X(i);
157 const auto Y_i_abs = KAT::abs (Y_i);
158 const auto X_i_abs = KAT::abs (X_i);
159 Y_i = (Y_i_abs >= X_i_abs) ?
160 static_cast<STY> (Y_i_abs) :
161 static_cast<STY
> (X_i_abs);
172 template<
class ViewType1,
class ViewType2, const
int rank = ViewType1::rank>
173 KOKKOS_INLINE_FUNCTION
void 174 absMax (
const ViewType2& Y,
const ViewType1& X)
176 static_assert (static_cast<int> (ViewType1::rank) == static_cast<int> (ViewType2::rank),
177 "absMax: ViewType1 and ViewType2 must have the same rank.");
185 template<
class ViewType,
186 class CoefficientType,
187 class LayoutType =
typename ViewType::array_layout,
188 class IndexType = int,
189 const int rank = ViewType::rank>
191 static KOKKOS_INLINE_FUNCTION
void 192 run (
const CoefficientType& alpha,
const ViewType& x);
197 template<
class ViewType,
198 class CoefficientType,
201 struct SCAL<ViewType, CoefficientType, LayoutType, IndexType, 1> {
203 static KOKKOS_INLINE_FUNCTION
void 204 run (
const CoefficientType& alpha,
const ViewType& x)
206 const IndexType numRows =
static_cast<IndexType
> (x.dimension_0 ());
208 for (IndexType i = 0; i < numRows; ++i) {
216 template<
class ViewType,
217 class CoefficientType,
220 struct SCAL<ViewType, CoefficientType, LayoutType, IndexType, 2> {
222 static KOKKOS_INLINE_FUNCTION
void 223 run (
const CoefficientType& alpha,
const ViewType& A)
225 const IndexType numRows =
static_cast<IndexType
> (A.dimension_0 ());
226 const IndexType numCols =
static_cast<IndexType
> (A.dimension_1 ());
229 for (IndexType i = 0; i < numRows; ++i) {
230 for (IndexType j = 0; j < numCols; ++j) {
231 A(i,j) = alpha * A(i,j);
242 template<
class ViewType,
243 class CoefficientType,
245 struct SCAL<ViewType, CoefficientType,
Kokkos::LayoutRight, IndexType, 2> {
247 static KOKKOS_INLINE_FUNCTION
void 248 run (
const CoefficientType& alpha,
const ViewType& A)
250 const IndexType N = A.size ();
251 typedef typename std::decay<decltype (A(0,0)) >::type scalar_type;
252 scalar_type*
const A_raw = A.ptr_on_device ();
254 for (IndexType i = 0; i < N; ++i) {
255 A_raw[i] = alpha * A_raw[i];
265 template<
class ViewType,
267 class LayoutType =
typename ViewType::array_layout,
268 class IndexType = int,
269 const int rank = ViewType::rank>
271 static KOKKOS_INLINE_FUNCTION
void 272 run (
const ViewType& x,
const InputType& val);
277 template<
class ViewType,
281 struct FILL<ViewType, InputType, LayoutType, IndexType, 1> {
282 static KOKKOS_INLINE_FUNCTION
void 283 run (
const ViewType& x,
const InputType& val)
285 const IndexType numRows =
static_cast<IndexType
> (x.dimension_0 ());
286 for (IndexType i = 0; i < numRows; ++i) {
294 template<
class ViewType,
298 struct FILL<ViewType, InputType, LayoutType, IndexType, 2> {
299 static KOKKOS_INLINE_FUNCTION
void 300 run (
const ViewType& X,
const InputType& val)
302 const IndexType numRows =
static_cast<IndexType
> (X.dimension_0 ());
303 const IndexType numCols =
static_cast<IndexType
> (X.dimension_1 ());
304 for (IndexType j = 0; j < numCols; ++j) {
305 for (IndexType i = 0; i < numRows; ++i) {
316 template<
class CoefficientType,
319 class LayoutType1 =
typename ViewType1::array_layout,
320 class LayoutType2 =
typename ViewType2::array_layout,
321 class IndexType = int,
322 const int rank = ViewType1::rank>
324 static KOKKOS_INLINE_FUNCTION
void 325 run (
const CoefficientType& alpha,
332 template<
class CoefficientType,
338 struct AXPY<CoefficientType, ViewType1, ViewType2, LayoutType1, LayoutType2, IndexType, 1> {
340 static KOKKOS_INLINE_FUNCTION
void 341 run (
const CoefficientType& alpha,
345 static_assert (static_cast<int> (ViewType1::rank) == static_cast<int> (ViewType2::rank),
346 "AXPY: x and y must have the same rank.");
347 const IndexType numRows =
static_cast<IndexType
> (y.dimension_0 ());
349 for (IndexType i = 0; i < numRows; ++i) {
350 y(i) += alpha * x(i);
358 template<
class CoefficientType,
364 struct AXPY<CoefficientType, ViewType1, ViewType2, LayoutType1, LayoutType2, IndexType, 2> {
366 static KOKKOS_INLINE_FUNCTION
void 367 run (
const CoefficientType& alpha,
371 static_assert (ViewType1::rank == ViewType2::rank,
372 "AXPY: X and Y must have the same rank.");
373 const IndexType numRows =
static_cast<IndexType
> (Y.dimension_0 ());
374 const IndexType numCols =
static_cast<IndexType
> (Y.dimension_1 ());
377 for (IndexType i = 0; i < numRows; ++i) {
378 for (IndexType j = 0; j < numCols; ++j) {
379 Y(i,j) += alpha * X(i,j);
389 template<
class CoefficientType,
393 struct AXPY<CoefficientType, ViewType1, ViewType2,
Kokkos::LayoutRight, Kokkos::LayoutRight, IndexType, 2> {
395 static KOKKOS_INLINE_FUNCTION
void 396 run (
const CoefficientType& alpha,
400 static_assert (static_cast<int> (ViewType1::rank) == static_cast<int> (ViewType2::rank),
401 "AXPY: X and Y must have the same rank.");
402 typedef typename std::decay<decltype (X(0,0)) >::type SX;
403 typedef typename std::decay<decltype (Y(0,0)) >::type SY;
405 const IndexType N =
static_cast<IndexType
> (Y.size ());
406 const SX*
const X_raw = X.ptr_on_device ();
407 SY*
const Y_raw = Y.ptr_on_device ();
410 for (IndexType i = 0; i < N; ++i) {
411 Y_raw[i] += alpha * X_raw[i];
420 template<
class CoefficientType,
424 struct AXPY<CoefficientType, ViewType1, ViewType2,
Kokkos::LayoutLeft, Kokkos::LayoutLeft, IndexType, 2> {
426 static KOKKOS_INLINE_FUNCTION
void 427 run (
const CoefficientType& alpha,
431 static_assert (ViewType1::rank == ViewType2::rank,
432 "AXPY: X and Y must have the same rank.");
433 typedef typename std::decay<decltype (X(0,0)) >::type SX;
434 typedef typename std::decay<decltype (Y(0,0)) >::type SY;
436 const IndexType N =
static_cast<IndexType
> (Y.size ());
437 const SX*
const X_raw = X.ptr_on_device ();
438 SY*
const Y_raw = Y.ptr_on_device ();
441 for (IndexType i = 0; i < N; ++i) {
442 Y_raw[i] += alpha * X_raw[i];
452 template<
class ViewType1,
454 class LayoutType1 =
typename ViewType1::array_layout,
455 class LayoutType2 =
typename ViewType2::array_layout,
456 class IndexType = int,
457 const int rank = ViewType1::rank>
459 static KOKKOS_INLINE_FUNCTION
void 460 run (
const ViewType1& x,
const ViewType2& y);
465 template<
class ViewType1,
470 struct COPY<ViewType1, ViewType2, LayoutType1, LayoutType2, IndexType, 1> {
472 static KOKKOS_INLINE_FUNCTION
void 473 run (
const ViewType1& x,
const ViewType2& y)
475 const IndexType numRows =
static_cast<IndexType
> (x.dimension_0 ());
476 for (IndexType i = 0; i < numRows; ++i) {
484 template<
class ViewType1,
489 struct COPY<ViewType1, ViewType2, LayoutType1, LayoutType2, IndexType, 2> {
491 static KOKKOS_INLINE_FUNCTION
void 492 run (
const ViewType1& X,
const ViewType2& Y)
494 const IndexType numRows =
static_cast<IndexType
> (Y.dimension_0 ());
495 const IndexType numCols =
static_cast<IndexType
> (Y.dimension_1 ());
498 for (IndexType i = 0; i < numRows; ++i) {
499 for (IndexType j = 0; j < numCols; ++j) {
509 template<
class ViewType1,
512 struct COPY<ViewType1, ViewType2,
Kokkos::LayoutRight, Kokkos::LayoutRight, IndexType, 2> {
514 static KOKKOS_INLINE_FUNCTION
void 515 run (
const ViewType1& X,
const ViewType2& Y)
517 typedef typename std::decay<decltype (X(0,0)) >::type SX;
518 typedef typename std::decay<decltype (Y(0,0)) >::type SY;
520 const IndexType N =
static_cast<IndexType
> (Y.size ());
521 const SX*
const X_raw = X.ptr_on_device ();
522 SY*
const Y_raw = Y.ptr_on_device ();
525 for (IndexType i = 0; i < N; ++i) {
534 template<
class ViewType1,
537 struct COPY<ViewType1, ViewType2,
Kokkos::LayoutLeft, Kokkos::LayoutLeft, IndexType, 2> {
539 static KOKKOS_INLINE_FUNCTION
void 540 run (
const ViewType1& X,
const ViewType2& Y)
542 typedef typename std::decay<decltype (X(0,0)) >::type SX;
543 typedef typename std::decay<decltype (Y(0,0)) >::type SY;
545 const IndexType N =
static_cast<IndexType
> (Y.size ());
546 const SX*
const X_raw = X.ptr_on_device ();
547 SY*
const Y_raw = Y.ptr_on_device ();
550 for (IndexType i = 0; i < N; ++i) {
557 template<
class VecType1,
561 class IndexType = int,
562 class VecLayoutType1 =
typename VecType1::array_layout,
563 class BlkLayoutType =
typename BlkType::array_layout,
564 class VecLayoutType2 =
typename VecType2::array_layout>
566 static KOKKOS_INLINE_FUNCTION
void 567 run (
const CoeffType& alpha,
572 static_assert (VecType1::rank == 1,
"GEMV: VecType1 must have rank 1.");
573 static_assert (BlkType::rank == 2,
"GEMV: BlkType must have rank 2.");
574 static_assert (VecType2::rank == 1,
"GEMV: VecType2 must have rank 1.");
575 typedef typename std::decay<decltype (y(0)) >::type y_elt_type;
577 const IndexType numRows =
static_cast<IndexType
> (A.dimension_0 ());
578 const IndexType numCols =
static_cast<IndexType
> (A.dimension_1 ());
580 for (IndexType i = 0; i < numRows; ++i) {
581 y_elt_type y_i = y(i);
582 for (IndexType j = 0; j < numCols; ++j) {
583 y_i += alpha * A(i,j) * x(j);
590 template<
class VecType1,
595 struct GEMV<VecType1, BlkType, VecType2, CoeffType, IndexType,
596 Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight>
598 static KOKKOS_INLINE_FUNCTION
void 599 run (
const CoeffType& alpha,
604 static_assert (VecType1::rank == 1,
"GEMV: VecType1 must have rank 1.");
605 static_assert (BlkType::rank == 2,
"GEMV: BlkType must have rank 2.");
606 static_assert (VecType2::rank == 1,
"GEMV: VecType2 must have rank 1.");
607 typedef typename std::decay<decltype (y(0)) >::type y_elt_type;
608 typedef typename std::decay<decltype (A(0,0)) >::type A_elt_type;
610 const IndexType numRows =
static_cast<IndexType
> (A.dimension_0 ());
611 const IndexType numCols =
static_cast<IndexType
> (A.dimension_1 ());
612 const A_elt_type*
const A_raw = A.ptr_on_device ();
614 for (IndexType i = 0; i < numRows; ++i) {
615 y_elt_type y_i = y(i);
616 const A_elt_type*
const A_i = A_raw + i*numCols;
617 for (IndexType j = 0; j < numCols; ++j) {
618 y_i += alpha * A_i[j] * x(j);
625 template<
class VecType1,
630 struct GEMV<VecType1, BlkType, VecType2, CoeffType, IndexType,
631 Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft>
633 static KOKKOS_INLINE_FUNCTION
void 634 run (
const CoeffType& alpha,
639 static_assert (VecType1::rank == 1,
"GEMV: VecType1 must have rank 1.");
640 static_assert (BlkType::rank == 2,
"GEMV: BlkType must have rank 2.");
641 static_assert (VecType2::rank == 1,
"GEMV: VecType2 must have rank 1.");
642 typedef typename std::decay<decltype (A(0,0)) >::type A_elt_type;
644 const A_elt_type*
const A_raw = A.ptr_on_device ();
645 const IndexType numRows =
static_cast<IndexType
> (A.dimension_0 ());
646 const IndexType numCols =
static_cast<IndexType
> (A.dimension_1 ());
647 for (IndexType j = 0; j < numCols; ++j) {
648 const A_elt_type*
const A_j = A_raw + j*numRows;
649 for (IndexType i = 0; i < numRows; ++i) {
650 y(i) += alpha * A_j[i] * x(i);
660 template<
class ViewType,
661 class CoefficientType,
662 class LayoutType =
typename ViewType::array_layout,
663 class IndexType = int,
664 const int rank = ViewType::rank>
665 KOKKOS_INLINE_FUNCTION
void 666 SCAL (
const CoefficientType& alpha,
const ViewType& x)
672 template<
class ViewType,
674 class LayoutType =
typename ViewType::array_layout,
675 class IndexType = int,
676 const int rank = ViewType::rank>
677 KOKKOS_INLINE_FUNCTION
void 678 FILL (
const ViewType& x,
const InputType& val)
688 template<
class CoefficientType,
691 class LayoutType1 =
typename ViewType1::array_layout,
692 class LayoutType2 =
typename ViewType2::array_layout,
693 class IndexType = int,
694 const int rank = ViewType1::rank>
695 KOKKOS_INLINE_FUNCTION
void 696 AXPY (
const CoefficientType& alpha,
700 static_assert (static_cast<int> (ViewType1::rank) ==
701 static_cast<int> (ViewType2::rank),
702 "AXPY: x and y must have the same rank.");
703 Impl::AXPY<CoefficientType, ViewType1, ViewType2, LayoutType1, LayoutType2,
704 IndexType, rank>::run (alpha, x, y);
715 template<
class ViewType1,
717 class LayoutType1 =
typename ViewType1::array_layout,
718 class LayoutType2 =
typename ViewType2::array_layout,
719 class IndexType = int,
720 const int rank = ViewType1::rank>
721 KOKKOS_INLINE_FUNCTION
void 722 COPY (
const ViewType1& x,
const ViewType2& y)
724 static_assert (static_cast<int> (ViewType1::rank) ==
725 static_cast<int> (ViewType2::rank),
726 "COPY: x and y must have the same rank.");
727 Impl::COPY<ViewType1, ViewType2, LayoutType1, LayoutType2, IndexType,
742 template<
class VecType1,
746 class IndexType =
int>
747 KOKKOS_INLINE_FUNCTION
void 753 Impl::GEMV<VecType1, BlkType, VecType2, CoeffType,
754 IndexType>::run (alpha, A, x, y);
764 template<
class ViewType1,
767 class CoefficientType,
768 class IndexType =
int>
769 KOKKOS_INLINE_FUNCTION
void 772 const CoefficientType& alpha,
775 const CoefficientType& beta,
779 static_assert (ViewType1::rank == 2,
"GEMM: A must have rank 2 (be a matrix).");
780 static_assert (ViewType2::rank == 2,
"GEMM: B must have rank 2 (be a matrix).");
781 static_assert (ViewType3::rank == 2,
"GEMM: C must have rank 2 (be a matrix).");
783 typedef typename std::remove_reference<decltype (A(0,0))>::type Scalar;
784 typedef Kokkos::Details::ArithTraits<Scalar> STS;
785 const Scalar
ZERO = STS::zero();
786 const Scalar ONE = STS::one();
790 if(transA[0] ==
'N' || transA[0] ==
'n') {
791 m =
static_cast<IndexType
> (A.dimension_0 ());
792 n =
static_cast<IndexType
> (A.dimension_1 ());
795 m =
static_cast<IndexType
> (A.dimension_1 ());
796 n =
static_cast<IndexType
> (A.dimension_0 ());
798 k =
static_cast<IndexType
> (C.dimension_1 ());
801 if(alpha ==
ZERO && beta == ONE)
807 for(IndexType i=0; i<m; i++) {
808 for(IndexType j=0; j<k; j++) {
814 for(IndexType i=0; i<m; i++) {
815 for(IndexType j=0; j<k; j++) {
816 C(i,j) = beta*C(i,j);
823 if(transB[0] ==
'n' || transB[0] ==
'N') {
824 if(transA[0] ==
'n' || transA[0] ==
'N') {
826 for(IndexType j=0; j<n; j++) {
828 for(IndexType i=0; i<m; i++) {
832 else if(beta != ONE) {
833 for(IndexType i=0; i<m; i++) {
834 C(i,j) = beta*C(i,j);
837 for(IndexType l=0; l<k; l++) {
838 Scalar temp = alpha*B(l,j);
839 for(IndexType i=0; i<m; i++) {
840 C(i,j) = C(i,j) + temp*A(i,l);
847 for(IndexType j=0; j<n; j++) {
848 for(IndexType i=0; i<m; i++) {
850 for(IndexType l=0; l<k; l++) {
851 temp = temp + A(l,i)*B(l,j);
857 C(i,j) = alpha*temp + beta*C(i,j);
864 if(transA[0] ==
'n' || transA[0] ==
'N') {
866 for(IndexType j=0; j<n; j++) {
868 for(IndexType i=0; i<m; i++) {
872 else if(beta != ONE) {
873 for(IndexType i=0; i<m; i++) {
874 C(i,j) = beta*C(i,j);
877 for(IndexType l=0; l<k; l++) {
878 Scalar temp = alpha*B(j,l);
879 for(IndexType i=0; i<m; i++) {
880 C(i,j) = C(i,j) + temp*A(i,l);
887 for(IndexType j=0; j<n; j++) {
888 for(IndexType i=0; i<m; i++) {
890 for(IndexType l=0; l<k; l++) {
891 temp = temp + A(l,i)*B(j,l);
897 C(i,j) = alpha*temp + beta*C(i,j);
906 template<
class LittleBlockType,
907 class LittleVectorType>
908 KOKKOS_INLINE_FUNCTION
void 909 GETF2 (
const LittleBlockType& A,
const LittleVectorType& ipiv,
int& info)
912 typedef typename std::decay<decltype (ipiv(0)) >::type IndexType;
913 static_assert (std::is_integral<IndexType>::value,
914 "GETF2: The type of each entry of ipiv must be an integer type.");
915 typedef typename std::remove_reference<decltype (A(0,0))>::type Scalar;
916 static_assert (! std::is_const<Scalar>::value,
917 "GETF2: A must not be a const View (or LittleBlock).");
918 static_assert (! std::is_const<std::remove_reference<decltype (ipiv(0))>>::value,
919 "GETF2: ipiv must not be a const View (or LittleBlock).");
920 static_assert (LittleBlockType::rank == 2,
"GETF2: A must have rank 2 (be a matrix).");
921 typedef Kokkos::Details::ArithTraits<Scalar> STS;
922 const Scalar
ZERO = STS::zero();
924 const IndexType numRows =
static_cast<IndexType
> (A.dimension_0 ());
925 const IndexType numCols =
static_cast<IndexType
> (A.dimension_1 ());
926 const IndexType pivDim =
static_cast<IndexType
> (ipiv.dimension_0 ());
929 const IndexType minPivDim = (numRows < numCols) ? numRows : numCols;
930 if (pivDim < minPivDim) {
938 for(IndexType j=0; j < pivDim; j++)
942 for(IndexType i=j+1; i<numRows; i++)
944 if(STS::abs(A(i,j)) > STS::abs(A(jp,j))) {
955 for(IndexType i=0; i < numCols; i++)
957 Scalar temp = A(jp,i);
964 for(IndexType i=j+1; i<numRows; i++) {
965 A(i,j) = A(i,j) / A(j,j);
973 for(IndexType r=j+1; r < numRows; r++)
975 for(IndexType c=j+1; c < numCols; c++) {
976 A(r,c) = A(r,c) - A(r,j) * A(j,c);
987 template<
class LittleBlockType,
988 class LittleIntVectorType,
989 class LittleScalarVectorType,
990 const int rank = LittleScalarVectorType::rank>
992 static KOKKOS_INLINE_FUNCTION
void 993 run (
const char mode[],
994 const LittleBlockType& A,
995 const LittleIntVectorType& ipiv,
996 const LittleScalarVectorType& B,
1001 template<
class LittleBlockType,
1002 class LittleIntVectorType,
1003 class LittleScalarVectorType>
1004 struct GETRS<LittleBlockType, LittleIntVectorType, LittleScalarVectorType, 1> {
1005 static KOKKOS_INLINE_FUNCTION
void 1006 run (
const char mode[],
1007 const LittleBlockType& A,
1008 const LittleIntVectorType& ipiv,
1009 const LittleScalarVectorType& B,
1013 typedef typename std::decay<decltype (ipiv(0))>::type IndexType;
1016 static_assert (std::is_integral<IndexType>::value &&
1017 std::is_signed<IndexType>::value,
1018 "GETRS: The type of each entry of ipiv must be a signed integer.");
1019 typedef typename std::decay<decltype (A(0,0))>::type Scalar;
1020 static_assert (! std::is_const<std::remove_reference<decltype (B(0))>>::value,
1021 "GETRS: B must not be a const View (or LittleBlock).");
1022 static_assert (LittleBlockType::rank == 2,
"GETRS: A must have rank 2 (be a matrix).");
1023 static_assert (LittleIntVectorType::rank == 1,
"GETRS: ipiv must have rank 1.");
1024 static_assert (LittleScalarVectorType::rank == 1,
"GETRS: For this specialization, B must have rank 1.");
1026 typedef Kokkos::Details::ArithTraits<Scalar> STS;
1027 const Scalar
ZERO = STS::zero();
1028 const IndexType numRows =
static_cast<IndexType
> (A.dimension_0 ());
1029 const IndexType numCols =
static_cast<IndexType
> (A.dimension_1 ());
1030 const IndexType pivDim =
static_cast<IndexType
> (ipiv.dimension_0 ());
1035 if (numRows != numCols) {
1041 if (pivDim < numRows) {
1047 if(mode[0] ==
'n' || mode[0] ==
'N') {
1049 for(IndexType i=0; i<numRows; i++) {
1050 if(ipiv(i) != i+1) {
1052 B(i) = B(ipiv(i)-1);
1053 B(ipiv(i)-1) = temp;
1058 for(IndexType r=1; r < numRows; r++) {
1059 for(IndexType c=0; c < r; c++) {
1060 B(r) = B(r) - A(r,c)*B(c);
1065 for(IndexType r=numRows-1; r >= 0; r--) {
1067 if(A(r,r) ==
ZERO) {
1072 for(IndexType c=r+1; c < numCols; c++) {
1073 B(r) = B(r) - A(r,c)*B(c);
1075 B(r) = B(r) / A(r,r);
1079 else if(mode[0] ==
't' || mode[0] ==
'T') {
1084 else if (mode[0] ==
'c' || mode[0] ==
'C') {
1097 template<
class LittleBlockType,
1098 class LittleIntVectorType,
1099 class LittleScalarVectorType>
1100 struct GETRS<LittleBlockType, LittleIntVectorType, LittleScalarVectorType, 2> {
1101 static KOKKOS_INLINE_FUNCTION
void 1102 run (
const char mode[],
1103 const LittleBlockType& A,
1104 const LittleIntVectorType& ipiv,
1105 const LittleScalarVectorType& B,
1109 typedef typename std::decay<decltype (ipiv(0)) >::type IndexType;
1110 static_assert (std::is_integral<IndexType>::value,
1111 "GETRS: The type of each entry of ipiv must be an integer type.");
1112 static_assert (! std::is_const<std::remove_reference<decltype (B(0)) > >::value,
1113 "GETRS: B must not be a const View (or LittleBlock).");
1114 static_assert (LittleBlockType::rank == 2,
"GETRS: A must have rank 2 (be a matrix).");
1115 static_assert (LittleIntVectorType::rank == 1,
"GETRS: ipiv must have rank 1.");
1116 static_assert (LittleScalarVectorType::rank == 2,
"GETRS: For this specialization, B must have rank 2.");
1121 const IndexType numRhs = B.dimension_1 ();
1124 for (IndexType rhs = 0; rhs < numRhs; ++rhs) {
1125 auto B_cur = Kokkos::subview (B, Kokkos::ALL (), rhs);
1139 template<
class LittleBlockType,
1140 class LittleIntVectorType,
1141 class LittleScalarVectorType>
1142 KOKKOS_INLINE_FUNCTION
void 1144 const LittleBlockType& A,
1145 const LittleIntVectorType& ipiv,
1146 const LittleScalarVectorType& B,
1149 Impl::GETRS<LittleBlockType, LittleIntVectorType, LittleScalarVectorType,
1150 LittleScalarVectorType::rank>::run (mode, A, ipiv, B, info);
1168 template<
class LittleBlockType,
1169 class LittleIntVectorType,
1170 class LittleScalarVectorType>
1171 KOKKOS_INLINE_FUNCTION
void 1173 const LittleIntVectorType& ipiv,
1174 const LittleScalarVectorType& work,
1178 typedef typename std::decay<decltype (ipiv(0))>::type IndexType;
1181 static_assert (std::is_integral<IndexType>::value &&
1182 std::is_signed<IndexType>::value,
1183 "GETRI: The type of each entry of ipiv must be a signed integer.");
1184 typedef typename std::remove_reference<decltype (A(0,0))>::type Scalar;
1185 static_assert (! std::is_const<std::remove_reference<decltype (A(0,0))>>::value,
1186 "GETRI: A must not be a const View (or LittleBlock).");
1187 static_assert (! std::is_const<std::remove_reference<decltype (work(0))>>::value,
1188 "GETRI: work must not be a const View (or LittleBlock).");
1189 static_assert (LittleBlockType::rank == 2,
"GETRI: A must have rank 2 (be a matrix).");
1190 typedef Kokkos::Details::ArithTraits<Scalar> STS;
1191 const Scalar
ZERO = STS::zero();
1192 const Scalar ONE = STS::one();
1194 const IndexType numRows =
static_cast<IndexType
> (A.dimension_0 ());
1195 const IndexType numCols =
static_cast<IndexType
> (A.dimension_1 ());
1196 const IndexType pivDim =
static_cast<IndexType
> (ipiv.dimension_0 ());
1197 const IndexType workDim =
static_cast<IndexType
> (work.dimension_0 ());
1202 if (numRows != numCols) {
1208 if (pivDim < numRows) {
1214 if (workDim < numRows) {
1220 for(IndexType j=0; j < numRows; j++) {
1221 if(A(j,j) ==
ZERO) {
1226 A(j,j) = ONE / A(j,j);
1229 for(IndexType r=0; r < j; r++) {
1230 A(r,j) = A(r,r)*A(r,j);
1231 for(IndexType c=r+1; c < j; c++) {
1232 A(r,j) = A(r,j) + A(r,c)*A(c,j);
1235 for(IndexType r=0; r < j; r++) {
1236 A(r,j) = -A(j,j)*A(r,j);
1241 for(IndexType j = numCols-2; j >= 0; j--) {
1243 for(IndexType r=j+1; r < numRows; r++) {
1248 for(IndexType r=0; r < numRows; r++) {
1249 for(IndexType i=j+1; i < numRows; i++) {
1250 A(r,j) = A(r,j) - work(i)*A(r,i);
1256 for(IndexType j=numRows-1; j >= 0; j--) {
1257 IndexType jp = ipiv(j)-1;
1259 for(IndexType r=0; r < numRows; r++) {
1260 Scalar temp = A(r,j);
1273 template<
class LittleBlockType,
1274 class LittleVectorTypeX,
1275 class LittleVectorTypeY,
1276 class CoefficientType,
1277 class IndexType =
int>
1278 KOKKOS_INLINE_FUNCTION
void 1279 GEMV (
const char trans,
1280 const CoefficientType& alpha,
1281 const LittleBlockType& A,
1282 const LittleVectorTypeX& x,
1283 const CoefficientType& beta,
1284 const LittleVectorTypeY& y)
1290 typedef typename std::remove_reference<decltype (y(0)) >::type y_value_type;
1291 const IndexType numRows =
static_cast<IndexType
> (A.dimension_0 ());
1292 const IndexType numCols =
static_cast<IndexType
> (A.dimension_1 ());
1296 for (IndexType i = 0; i < numRows; ++i) {
1301 for (IndexType i = 0; i < numRows; ++i) {
1302 y_value_type y_i = 0.0;
1303 for (IndexType j = 0; j < numCols; ++j) {
1304 y_i += A(i,j) * x(j);
1313 for (IndexType i = 0; i < numRows; ++i) {
1318 for (IndexType i = 0; i < numRows; ++i) {
1324 for (IndexType i = 0; i < numRows; ++i) {
1325 y_value_type y_i = beta * y(i);
1326 for (IndexType j = 0; j < numCols; ++j) {
1327 y_i += alpha * A(i,j) * x(j);
1340 #endif // TPETRA_EXPERIMENTAL_BLOCKVIEW_HPP static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType1 &X, const ViewType2 &Y)
Y := Y + alpha*X (rank-2 X and Y, i.e., matrices)
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha*x (rank-1 x and y, i.e., vectors)
Implementation of Tpetra's ABSMAX CombineMode for the small dense blocks in BlockCrsMatrix, or the small dense vectors in BlockMultiVector and BlockVector.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType1 &X, const ViewType2 &Y)
Y := Y + alpha*X (rank-2 X and Y, i.e., matrices)
static KOKKOS_INLINE_FUNCTION void run(const ViewType1 &x, const ViewType2 &y)
y := x (rank-1 x and y, i.e., vectors)
static KOKKOS_INLINE_FUNCTION void run(const ViewType2 &Y, const ViewType1 &X)
(*this)(i,j) := max(abs((*this)(i,j)), abs(X(i,j))) for all (i,j).
KOKKOS_INLINE_FUNCTION void AXPY(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha * x (dense vector or matrix update)
KOKKOS_INLINE_FUNCTION void GEMV(const CoeffType &alpha, const BlkType &A, const VecType1 &x, const VecType2 &y)
y := y + alpha * A * x (dense matrix-vector multiply)
KOKKOS_INLINE_FUNCTION void SCAL(const CoefficientType &alpha, const ViewType &x)
x := alpha*x, where x is either rank 1 (a vector) or rank 2 (a matrix).
static KOKKOS_INLINE_FUNCTION void run(const ViewType1 &X, const ViewType2 &Y)
Y := X (rank-2 X and Y, i.e., matrices)
KOKKOS_INLINE_FUNCTION void GETRI(const LittleBlockType &A, const LittleIntVectorType &ipiv, const LittleScalarVectorType &work, int &info)
Compute inverse of A, using result of GETRF or GETF2.
KOKKOS_INLINE_FUNCTION void FILL(const ViewType &x, const InputType &val)
Set every entry of x to val.
Implementation of Tpetra::Experimental::FILL function.
Computes the solution to Ax=b.
Implementation of Tpetra::Experimental::SCAL function.
KOKKOS_INLINE_FUNCTION void absMax(const ViewType2 &Y, const ViewType1 &X)
Implementation of Tpetra's ABSMAX CombineMode for the small dense blocks in BlockCrsMatrix, and the small dense vectors in BlockMultiVector and BlockVector.
KOKKOS_INLINE_FUNCTION void COPY(const ViewType1 &x, const ViewType2 &y)
Deep copy x into y, where x and y are either rank 1 (vectors) or rank 2 (matrices) with the same dime...
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType &A)
A := alpha*A (rank-2 A, i.e., a matrix)
Replace old values with zero.
KOKKOS_INLINE_FUNCTION void GETF2(const LittleBlockType &A, const LittleVectorType &ipiv, int &info)
Computes A = P*L*U.
static KOKKOS_INLINE_FUNCTION void run(const ViewType1 &X, const ViewType2 &Y)
Y := X (rank-2 X and Y, i.e., matrices)
KOKKOS_INLINE_FUNCTION void GEMM(const char transA[], const char transB[], const CoefficientType &alpha, const ViewType1 &A, const ViewType2 &B, const CoefficientType &beta, const ViewType3 &C)
Small dense matrix-matrix multiply: C := alpha*A*B + beta*C
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType1 &X, const ViewType2 &Y)
Y := Y + alpha*X (rank-2 X and Y, i.e., matrices)
static KOKKOS_INLINE_FUNCTION void run(const ViewType2 &Y, const ViewType1 &X)
(*this)(i) := max(abs((*this)(i)), abs(X(i))) for all i.
KOKKOS_INLINE_FUNCTION void GETRS(const char mode[], const LittleBlockType &A, const LittleIntVectorType &ipiv, const LittleScalarVectorType &B, int &info)
Solve the linear system(s) AX=B, using the result of GETRF or GETF2.
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType &x)
x := alpha*x (rank-1 x, i.e., a vector)
Implementation of Tpetra::Experimental::COPY function.
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType &A)
A := alpha*A (rank-2 A, i.e., a matrix)
Implementation of Tpetra::Experimental::AXPY function.
static KOKKOS_INLINE_FUNCTION void run(const ViewType1 &X, const ViewType2 &Y)
Y := X (rank-2 X and Y, i.e., matrices)