9 #ifndef stk_util_parallel_ParallelReduce_hpp 10 #define stk_util_parallel_ParallelReduce_hpp 15 #include <stk_util/parallel/Parallel.hpp> 16 #include <stk_util/util/SimpleArrayOps.hpp> 33 const std::string & );
37 const double * local ,
double * global ,
unsigned count );
41 const float * local ,
float * global ,
unsigned count );
45 const int * local ,
int * global ,
unsigned count );
49 const size_t * local ,
size_t * global ,
unsigned count );
53 const unsigned * local ,
54 unsigned * global ,
unsigned count );
74 template <
class ReduceOp >
84 #ifndef DOXYGEN_COMPILE 93 void copyin( WorkType & )
const {}
94 void copyout( WorkType & )
const {}
95 static void op( WorkType & , WorkType & ) {}
100 template <
class Op,
typename T,
class Next>
107 typename Next::WorkType m_next ;
115 void copyin( WorkType & w )
const 116 { Copy<N>( w.m_value , m_value ); m_next.copyin( w.m_next ); }
119 void copyout( WorkType & w )
const 120 { Copy<N>( m_value , w.m_value ); m_next.copyout( w.m_next ); }
123 static void op( WorkType & out , WorkType & in )
124 { Op(
out.m_value , in.m_value ); Next::op(
out.m_next , in.m_next ); }
127 template<
class OpB,
typename TB>
128 Reduce<OpB, TB, Reduce<Op,T,Next> >
129 operator & (
const Reduce<OpB,TB,ReduceEnd> & rhs )
130 {
return Reduce<OpB, TB, Reduce<Op,T,Next> >( rhs , *this ); }
133 Reduce(
const Reduce<Op,T, ReduceEnd> & arg_val ,
const Next & arg_next )
134 : m_next( arg_next ), m_value( arg_val.m_value ) {}
137 explicit Reduce( Type * arg_value )
138 : m_next(), m_value( arg_value ) {}
143 template <
class Op,
typename T,
class Next>
144 void Reduce<Op,T,Next>::void_op(
void*inv,
void*inoutv,
int*,
ParallelDatatype*)
146 op( * reinterpret_cast<WorkType*>( inoutv ) ,
147 * reinterpret_cast<WorkType*>( inv ) );
158 template<
unsigned N,
typename T>
160 Reduce< Sum<N> , T, ReduceEnd>
ReduceSum( T * value )
161 {
return Reduce< Sum<N>, T, ReduceEnd >( value ); }
163 template<
unsigned N,
typename T>
165 Reduce< Prod<N>, T, ReduceEnd >
ReduceProd( T * value )
166 {
return Reduce< Prod<N>, T, ReduceEnd >( value ); }
168 template<
unsigned N,
typename T>
170 Reduce< Max<N>, T, ReduceEnd>
ReduceMax( T * value )
171 {
return Reduce< Max<N>, T, ReduceEnd>( value ); }
173 template<
unsigned N,
typename T>
175 Reduce< Min<N>, T, ReduceEnd>
ReduceMin( T * value )
176 {
return Reduce<Min<N>, T, ReduceEnd>( value ); }
178 template<
unsigned N,
typename T>
180 Reduce< BitOr<N>, T, ReduceEnd> ReduceBitOr( T * value )
181 {
return Reduce< BitOr<N>, T, ReduceEnd>( value ); }
183 template<
unsigned N,
typename T>
185 Reduce< BitAnd<N>, T, ReduceEnd> ReduceBitAnd( T * value )
186 {
return Reduce< BitAnd<N>, T, ReduceEnd>( value ); }
192 typedef void (*ParallelReduceOp)
197 ParallelReduceOp arg_op ,
204 template <
class ReduceOp >
207 typedef typename ReduceOp::WorkType WorkType ;
209 WorkType inbuf , outbuf ;
212 reinterpret_cast<ParallelReduceOp
>( & ReduceOp::void_op );
214 all_reduce( comm , f , & inbuf, & outbuf,
sizeof(WorkType) );
215 op.copyout( outbuf );
220 template <
class ReduceOp >
223 { all_reduce_driver<ReduceOp>( comm , op ); }
void all_reduce_bor(ParallelMachine comm, const unsigned *local, unsigned *global, unsigned count)
Parallel bitwise-or to all processors.
void all_reduce_sum(ParallelMachine comm, const double *local, double *global, unsigned count)
Parallel summation to all processors.
std::ostream & out()
Normal output stream.
Reduce< Sum, T * > * ReduceSum(T *t, T *u, size_t length)
Member function ReduceSum ...
Reduce< Prod, T * > * ReduceProd(T *t, T *u, size_t length)
Member function ReduceProd ...
void all_write_string(ParallelMachine arg_comm, std::ostream &arg_root_os, const std::string &arg_msg)
Write string from any or all processors to the ostream on the root processor.
Reduce< Max, T * > * ReduceMax(T *t, T *u, size_t length)
Member function ReduceMax ...
MPI_Datatype ParallelDatatype
Reduce< Min, T * > * ReduceMin(T *t, T *u, size_t length)
Member function ReduceMin ...