Teuchos - Trilinos Tools Package  Version of the Day
Teuchos_TimeMonitor.cpp
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Teuchos: Common Tools Package
5 // Copyright (2004) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ***********************************************************************
40 // @HEADER
41 
42 #include "Teuchos_TimeMonitor.hpp"
43 #include "Teuchos_CommHelpers.hpp"
44 #include "Teuchos_DefaultComm.hpp"
45 #include "Teuchos_TableColumn.hpp"
46 #include "Teuchos_TableFormat.hpp"
47 #include "Teuchos_StandardParameterEntryValidators.hpp"
48 #include "Teuchos_ScalarTraits.hpp"
49 #include <functional>
50 
51 
52 namespace Teuchos {
105  template<class Ordinal, class ScalarType, class IndexType>
106  class MaxLoc :
107  public ValueTypeReductionOp<Ordinal, std::pair<ScalarType, IndexType> > {
108  public:
109  void
110  reduce (const Ordinal count,
111  const std::pair<ScalarType, IndexType> inBuffer[],
112  std::pair<ScalarType, IndexType> inoutBuffer[]) const;
113  };
114 
115  template<class Ordinal>
116  class MaxLoc<Ordinal, double, int> :
117  public ValueTypeReductionOp<Ordinal, std::pair<double, int> > {
118  public:
119  void
120  reduce (const Ordinal count,
121  const std::pair<double, int> inBuffer[],
122  std::pair<double, int> inoutBuffer[]) const
123  {
124  for (Ordinal ind = 0; ind < count; ++ind) {
125  const std::pair<double, int>& in = inBuffer[ind];
126  std::pair<double, int>& inout = inoutBuffer[ind];
127 
128  if (in.first > inout.first) {
129  inout.first = in.first;
130  inout.second = in.second;
131  } else if (in.first < inout.first) {
132  // Don't need to do anything; inout has the values.
133  } else { // equal, or at least one is NaN.
134  inout.first = in.first;
135  inout.second = std::min (in.second, inout.second);
136  }
137  }
138  }
139  };
140 
167  template<class Ordinal, class ScalarType, class IndexType>
168  class MinLoc :
169  public ValueTypeReductionOp<Ordinal, std::pair<ScalarType, IndexType> > {
170  public:
171  void
172  reduce (const Ordinal count,
173  const std::pair<ScalarType, IndexType> inBuffer[],
174  std::pair<ScalarType, IndexType> inoutBuffer[]) const;
175  };
176 
177  template<class Ordinal>
178  class MinLoc<Ordinal, double, int> :
179  public ValueTypeReductionOp<Ordinal, std::pair<double, int> > {
180  public:
181  void
182  reduce (const Ordinal count,
183  const std::pair<double, int> inBuffer[],
184  std::pair<double, int> inoutBuffer[]) const
185  {
186  for (Ordinal ind = 0; ind < count; ++ind) {
187  const std::pair<double, int>& in = inBuffer[ind];
188  std::pair<double, int>& inout = inoutBuffer[ind];
189 
190  if (in.first < inout.first) {
191  inout.first = in.first;
192  inout.second = in.second;
193  } else if (in.first > inout.first) {
194  // Don't need to do anything; inout has the values.
195  } else { // equal, or at least one is NaN.
196  inout.first = in.first;
197  inout.second = std::min (in.second, inout.second);
198  }
199  }
200  }
201  };
202 
206  template<class Ordinal, class ScalarType, class IndexType>
208  public ValueTypeReductionOp<Ordinal, std::pair<ScalarType, IndexType> > {
209  public:
210  void
211  reduce (const Ordinal count,
212  const std::pair<ScalarType, IndexType> inBuffer[],
213  std::pair<ScalarType, IndexType> inoutBuffer[]) const;
214  };
215 
216  template<class Ordinal>
217  class MinLocNonzero<Ordinal, double, int> :
218  public ValueTypeReductionOp<Ordinal, std::pair<double, int> > {
219  public:
220  void
221  reduce (const Ordinal count,
222  const std::pair<double, int> inBuffer[],
223  std::pair<double, int> inoutBuffer[]) const
224  {
225  for (Ordinal ind = 0; ind < count; ++ind) {
226  const std::pair<double, int>& in = inBuffer[ind];
227  std::pair<double, int>& inout = inoutBuffer[ind];
228 
229  if ( (in.first < inout.first && in.first != 0) || (inout.first == 0 && in.first != 0) ) {
230  inout.first = in.first;
231  inout.second = in.second;
232  } else if (in.first > inout.first) {
233  // Don't need to do anything; inout has the values.
234  } else { // equal, or at least one is NaN.
235  inout.first = in.first;
236  inout.second = std::min (in.second, inout.second);
237  }
238  }
239  }
240  };
241 
242  // Typedef used internally by TimeMonitor::summarize() and its
243  // helper functions. The map is keyed on timer label (a string).
244  // Each value is a pair: (total number of seconds over all calls to
245  // that timer, total number of calls to that timer).
246  typedef std::map<std::string, std::pair<double, int> > timer_map_t;
247 
248  TimeMonitor::TimeMonitor (Time& timer, bool reset)
249  : PerformanceMonitorBase<Time>(timer, reset)
250  {
251  if (!isRecursiveCall()) counter().start(reset);
252  }
253 
255  if (!isRecursiveCall()) counter().stop();
256  }
257 
258  void
259  TimeMonitor::disableTimer (const std::string& name)
260  {
261  RCP<Time> timer = lookupCounter (name);
263  timer == null, std::invalid_argument,
264  "TimeMonitor::disableTimer: Invalid timer \"" << name << "\"");
265  timer->disable ();
266  }
267 
268  void
269  TimeMonitor::enableTimer (const std::string& name)
270  {
271  RCP<Time> timer = lookupCounter (name);
273  timer == null, std::invalid_argument,
274  "TimeMonitor::enableTimer: Invalid timer \"" << name << "\"");
275  timer->enable ();
276  }
277 
278  void
280  {
281  typedef std::map<std::string, RCP<Time> > map_type;
282  typedef map_type::iterator iter_type;
283  map_type& ctrs = counters ();
284 
285  // In debug mode, loop first to check whether any of the timers
286  // are running, before resetting them. This ensures that this
287  // method satisfies the strong exception guarantee (either it
288  // completes normally, or there are no side effects).
289 #ifdef TEUCHOS_DEBUG
290  for (iter_type it = ctrs.begin(); it != ctrs.end(); ++it) {
291  // We throw a runtime_error rather than a logic_error, because
292  // logic_error suggests a bug in the implementation of
293  // TimeMonitor. Calling zeroOutTimers() when a timer is running
294  // is not TimeMonitor's fault.
296  it->second->isRunning (), std::runtime_error,
297  "Timer \"" << it->second->name () << "\" is currently running. "
298  "You are not allowed to reset running timers.");
299  }
300 #endif // TEUCHOS_DEBUG
301 
302  for (iter_type it = ctrs.begin(); it != ctrs.end(); ++it) {
303  it->second->reset ();
304  }
305  }
306 
307  // An anonymous namespace is the standard way of limiting linkage of
308  // its contained routines to file scope.
309  namespace {
310  // \brief Return an "empty" local timer datum.
311  //
312  // "Empty" means the datum has zero elapsed time and zero call
313  // count. This function does not actually create a timer.
314  //
315  // \param name The timer's name.
316  std::pair<std::string, std::pair<double, int> >
317  makeEmptyTimerDatum (const std::string& name)
318  {
319  return std::make_pair (name, std::make_pair (double(0), int(0)));
320  }
321 
322  // \fn collectLocalTimerData
323  // \brief Collect and sort local timer data by timer names.
324  //
325  // \param localData [out] Map whose keys are the timer names, and
326  // whose value for each key is the total elapsed time (in
327  // seconds) and the call count for the timer with that name.
328  //
329  // \param localCounters [in] Timers from which to extract data.
330  //
331  // \param filter [in] Filter for timer labels. If filter is not
332  // empty, this method will only collect data for local timers
333  // whose labels begin with this string.
334  //
335  // Extract the total elapsed time and call count from each timer
336  // in the given array. Merge results for timers with duplicate
337  // labels, by summing their total elapsed times and call counts
338  // pairwise.
339  void
340  collectLocalTimerData (timer_map_t& localData,
341  const std::map<std::string, RCP<Time> >& localCounters,
342  const std::string& filter="")
343  {
344  using std::make_pair;
345  typedef timer_map_t::iterator iter_t;
346 
347  timer_map_t theLocalData;
348  for (std::map<std::string, RCP<Time> >::const_iterator it = localCounters.begin();
349  it != localCounters.end(); ++it) {
350  const std::string& name = it->second->name ();
351 
352  // Filter current timer name, if provided filter is nonempty.
353  // Filter string must _start_ the timer label, not just be in it.
354  const bool skipThisOne = (filter != "" && name.find (filter) != 0);
355  if (! skipThisOne) {
356  const double timing = it->second->totalElapsedTime ();
357  const int numCalls = it->second->numCalls ();
358 
359  // Merge timers with duplicate labels, by summing their
360  // total elapsed times and call counts.
361  iter_t loc = theLocalData.find (name);
362  if (loc == theLocalData.end()) {
363  // Use loc as an insertion location hint.
364  theLocalData.insert (loc, make_pair (name, make_pair (timing, numCalls)));
365  }
366  else {
367  loc->second.first += timing;
368  loc->second.second += numCalls;
369  }
370  }
371  }
372  // This avoids copying the map, and also makes this method
373  // satisfy the strong exception guarantee.
374  localData.swap (theLocalData);
375  }
376 
377  // \brief Locally filter out timer data with zero call counts.
378  //
379  // \param timerData [in/out]
380  void
381  filterZeroData (timer_map_t& timerData)
382  {
383  // FIXME (mfh 15 Mar 2013) Should use std::map::erase with
384  // iterator hint, instead of rebuilding the map completely.
385  timer_map_t newTimerData;
386  for (timer_map_t::const_iterator it = timerData.begin();
387  it != timerData.end(); ++it) {
388  if (it->second.second > 0) {
389  newTimerData[it->first] = it->second;
390  }
391  }
392  timerData.swap (newTimerData);
393  }
394 
416  void
417  collectLocalTimerDataAndNames (timer_map_t& localTimerData,
418  Array<std::string>& localTimerNames,
419  const std::map<std::string, RCP<Time> >& localTimers,
420  const bool writeZeroTimers,
421  const std::string& filter="")
422  {
423  // Collect and sort local timer data by timer names.
424  collectLocalTimerData (localTimerData, localTimers, filter);
425 
426  // Filter out zero data locally first. This ensures that if we
427  // are writing global stats, and if a timer name exists in the
428  // set of global names, then that timer has a nonzero call count
429  // on at least one MPI process.
430  if (! writeZeroTimers) {
431  filterZeroData (localTimerData);
432  }
433 
434  // Extract the set of local timer names. The std::map keeps
435  // them sorted alphabetically.
436  localTimerNames.reserve (localTimerData.size());
437  for (timer_map_t::const_iterator it = localTimerData.begin();
438  it != localTimerData.end(); ++it) {
439  localTimerNames.push_back (it->first);
440  }
441  }
442 
477  void
478  collectGlobalTimerData (timer_map_t& globalTimerData,
479  Array<std::string>& globalTimerNames,
480  timer_map_t& localTimerData,
481  Array<std::string>& localTimerNames,
482  Ptr<const Comm<int> > comm,
483  const bool alwaysWriteLocal,
484  const ECounterSetOp setOp)
485  {
486  // There may be some global timers that are not local timers on
487  // the calling MPI process(es). In that case, if
488  // alwaysWriteLocal is true, then we need to fill in the
489  // "missing" local timers. That will ensure that both global
490  // and local timer columns in the output table have the same
491  // number of rows. The collectLocalTimerDataAndNames() method
492  // may have already filtered out local timers with zero call
493  // counts (if its writeZeroTimers argument was false), but we
494  // won't be filtering again. Thus, any local timer data we
495  // insert here won't get filtered out.
496  //
497  // Note that calling summarize() with writeZeroTimers == false
498  // will still do what it says, even if we insert local timers
499  // with zero call counts here.
500 
501  // This does the correct and inexpensive thing (just copies the
502  // timer data) if numProcs == 1. Otherwise, it initiates a
503  // communication with \f$O(\log P)\f$ messages along the
504  // critical path, where \f$P\f$ is the number of participating
505  // processes.
506  mergeCounterNames (*comm, localTimerNames, globalTimerNames, setOp);
507 
508 #ifdef TEUCHOS_DEBUG
509  {
510  // Sanity check that all processes have the name number of
511  // global timer names.
512  const timer_map_t::size_type myNumGlobalNames = globalTimerNames.size();
513  timer_map_t::size_type minNumGlobalNames = 0;
514  timer_map_t::size_type maxNumGlobalNames = 0;
515  reduceAll (*comm, REDUCE_MIN, myNumGlobalNames,
516  outArg (minNumGlobalNames));
517  reduceAll (*comm, REDUCE_MAX, myNumGlobalNames,
518  outArg (maxNumGlobalNames));
519  TEUCHOS_TEST_FOR_EXCEPTION(minNumGlobalNames != maxNumGlobalNames,
520  std::logic_error, "Min # global timer names = " << minNumGlobalNames
521  << " != max # global timer names = " << maxNumGlobalNames
522  << ". Please report this bug to the Teuchos developers.");
523  TEUCHOS_TEST_FOR_EXCEPTION(myNumGlobalNames != minNumGlobalNames,
524  std::logic_error, "My # global timer names = " << myNumGlobalNames
525  << " != min # global timer names = " << minNumGlobalNames
526  << ". Please report this bug to the Teuchos developers.");
527  }
528 #endif // TEUCHOS_DEBUG
529 
530  // mergeCounterNames() just merges the counters' names, not
531  // their actual data. Now we need to fill globalTimerData with
532  // this process' timer data for the timers in globalTimerNames.
533  //
534  // All processes need the full list of global timers, since
535  // there may be some global timers that are not local timers.
536  // That's why mergeCounterNames() has to be an all-reduce, not
537  // just a reduction to Proc 0.
538  //
539  // Insertion optimization: if the iterator given to map::insert
540  // points right before where we want to insert, insertion is
541  // O(1). globalTimerNames is sorted, so feeding the iterator
542  // output of map::insert into the next invocation's input should
543  // make the whole insertion O(N) where N is the number of
544  // entries in globalTimerNames.
545  timer_map_t::iterator globalMapIter = globalTimerData.begin();
546  timer_map_t::iterator localMapIter;
547  for (Array<string>::const_iterator it = globalTimerNames.begin();
548  it != globalTimerNames.end(); ++it) {
549  const std::string& globalName = *it;
550  localMapIter = localTimerData.find (globalName);
551 
552  if (localMapIter == localTimerData.end()) {
553  if (alwaysWriteLocal) {
554  // If there are some global timers that are not local
555  // timers, and if we want to print local timers, we insert
556  // a local timer datum with zero elapsed time and zero
557  // call count into localTimerData as well. This will
558  // ensure that both global and local timer columns in the
559  // output table have the same number of rows.
560  //
561  // We really only need to do this on Proc 0, which is the
562  // only process that currently may print local timers.
563  // However, we do it on all processes, just in case
564  // someone later wants to modify this function to print
565  // out local timer data for some process other than Proc
566  // 0. This extra computation won't affect the cost along
567  // the critical path, for future computations in which
568  // Proc 0 participates.
569  localMapIter = localTimerData.insert (localMapIter, makeEmptyTimerDatum (globalName));
570 
571  // Make sure the missing global name gets added to the
572  // list of local names. We'll re-sort the list of local
573  // names below.
574  localTimerNames.push_back (globalName);
575  }
576  // There's a global timer that's not a local timer. Add it
577  // to our pre-merge version of the global timer data so that
578  // we can safely merge the global timer data later.
579  globalMapIter = globalTimerData.insert (globalMapIter, makeEmptyTimerDatum (globalName));
580  }
581  else {
582  // We have this global timer name in our local timer list.
583  // Fill in our pre-merge version of the global timer data
584  // with our local data.
585  globalMapIter = globalTimerData.insert (globalMapIter, std::make_pair (globalName, localMapIter->second));
586  }
587  }
588 
589  if (alwaysWriteLocal) {
590  // Re-sort the list of local timer names, since we may have
591  // inserted "missing" names above.
592  std::sort (localTimerNames.begin(), localTimerNames.end());
593  }
594 
595 #ifdef TEUCHOS_DEBUG
596  {
597  // Sanity check that all processes have the name number of
598  // global timers.
599  const timer_map_t::size_type myNumGlobalTimers = globalTimerData.size();
600  timer_map_t::size_type minNumGlobalTimers = 0;
601  timer_map_t::size_type maxNumGlobalTimers = 0;
602  reduceAll (*comm, REDUCE_MIN, myNumGlobalTimers,
603  outArg (minNumGlobalTimers));
604  reduceAll (*comm, REDUCE_MAX, myNumGlobalTimers,
605  outArg (maxNumGlobalTimers));
606  TEUCHOS_TEST_FOR_EXCEPTION(minNumGlobalTimers != maxNumGlobalTimers,
607  std::logic_error, "Min # global timers = " << minNumGlobalTimers
608  << " != max # global timers = " << maxNumGlobalTimers
609  << ". Please report this bug to the Teuchos developers.");
610  TEUCHOS_TEST_FOR_EXCEPTION(myNumGlobalTimers != minNumGlobalTimers,
611  std::logic_error, "My # global timers = " << myNumGlobalTimers
612  << " != min # global timers = " << minNumGlobalTimers
613  << ". Please report this bug to the Teuchos developers.");
614  }
615 #endif // TEUCHOS_DEBUG
616  }
617 
664  void
665  computeGlobalTimerStats (stat_map_type& statData,
666  std::vector<std::string>& statNames,
667  Ptr<const Comm<int> > comm,
668  const timer_map_t& globalTimerData,
669  const bool ignoreZeroTimers)
670  {
671  using Teuchos::ScalarTraits;
672 
673  const int numTimers = static_cast<int> (globalTimerData.size());
674  const int numProcs = comm->getSize();
675 
676  // Extract pre-reduction timings and call counts into a
677  // sequential array. This array will be in the same order as
678  // the global timer names are in the map.
679  Array<std::pair<double, int> > timingsAndCallCounts;
680  timingsAndCallCounts.reserve (numTimers);
681  for (timer_map_t::const_iterator it = globalTimerData.begin();
682  it != globalTimerData.end(); ++it) {
683  timingsAndCallCounts.push_back (it->second);
684  }
685 
686  // For each timer name, compute the min timing and its
687  // corresponding call count. If two processes have the same
688  // timing but different call counts, the minimum call count will
689  // be used.
690  Array<std::pair<double, int> > minTimingsAndCallCounts (numTimers);
691  if (numTimers > 0) {
692  if (ignoreZeroTimers)
693  reduceAll (*comm, MinLocNonzero<int, double, int>(), numTimers,
694  &timingsAndCallCounts[0], &minTimingsAndCallCounts[0]);
695  else
696  reduceAll (*comm, MinLoc<int, double, int>(), numTimers,
697  &timingsAndCallCounts[0], &minTimingsAndCallCounts[0]);
698  }
699 
700  // For each timer name, compute the max timing and its
701  // corresponding call count. If two processes have the same
702  // timing but different call counts, the minimum call count will
703  // be used.
704  Array<std::pair<double, int> > maxTimingsAndCallCounts (numTimers);
705  if (numTimers > 0) {
706  reduceAll (*comm, MaxLoc<int, double, int>(), numTimers,
707  &timingsAndCallCounts[0], &maxTimingsAndCallCounts[0]);
708  }
709 
710  // For each timer name, compute the mean-over-processes timing,
711  // the mean call count, and the mean-over-call-counts timing.
712  // The mean call count is reported as a double to allow a
713  // fractional value.
714  //
715  // Each local timing is really the total timing over all local
716  // invocations. The number of local invocations is the call
717  // count. Thus, the mean-over-call-counts timing is the sum of
718  // all the timings (over all processes), divided by the sum of
719  // all the call counts (over all processes). We compute it in a
720  // different way to over unnecessary overflow.
721  Array<double> meanOverCallCountsTimings (numTimers);
722  Array<double> meanOverProcsTimings (numTimers);
723  Array<double> meanCallCounts (numTimers);
724  Array<int> ICallThisTimer (numTimers);
725  Array<int> numProcsCallingEachTimer (numTimers);
726  {
727  // Figure out how many processors actually call each timer.
728  if (ignoreZeroTimers) {
729  for (int k = 0; k < numTimers; ++k) {
730  const double callCount = static_cast<double> (timingsAndCallCounts[k].second);
731  if (callCount > 0) ICallThisTimer[k] = 1;
732  else ICallThisTimer[k] = 0;
733  }
734  if (numTimers > 0) {
735  reduceAll (*comm, REDUCE_SUM, numTimers, &ICallThisTimer[0],
736  &numProcsCallingEachTimer[0]);
737  }
738  }
739 
740  // When summing, first scale by the number of processes. This
741  // avoids unnecessary overflow, and also gives us the mean
742  // call count automatically.
743  Array<double> scaledTimings (numTimers);
744  Array<double> scaledCallCounts (numTimers);
745  const double P = static_cast<double> (numProcs);
746 
747  if (ignoreZeroTimers) {
748  for (int k = 0; k < numTimers; ++k) {
749  const double timing = timingsAndCallCounts[k].first;
750  const double callCount = static_cast<double> (timingsAndCallCounts[k].second);
751 
752  scaledTimings[k] = timing / numProcsCallingEachTimer[k];
753  scaledCallCounts[k] = callCount / numProcsCallingEachTimer[k];
754  }
755  }
756  else {
757  for (int k = 0; k < numTimers; ++k) {
758  const double timing = timingsAndCallCounts[k].first;
759  const double callCount = static_cast<double> (timingsAndCallCounts[k].second);
760 
761  scaledTimings[k] = timing / P;
762  scaledCallCounts[k] = callCount / P;
763  }
764  }
765 
766  if (numTimers > 0) {
767  reduceAll (*comm, REDUCE_SUM, numTimers, &scaledTimings[0],
768  &meanOverProcsTimings[0]);
769  reduceAll (*comm, REDUCE_SUM, numTimers, &scaledCallCounts[0],
770  &meanCallCounts[0]);
771  }
772  // We don't have to undo the scaling for the mean timings;
773  // just divide by the scaled call count.
774  for (int k = 0; k < numTimers; ++k) {
775  if (meanCallCounts[k] > ScalarTraits<double>::zero ()) {
776  meanOverCallCountsTimings[k] = meanOverProcsTimings[k] / meanCallCounts[k];
777  }
778  else {
779  meanOverCallCountsTimings[k] = ScalarTraits<double>::zero ();
780  }
781  }
782  }
783 
784  // Reformat the data into the map of statistics. Be sure that
785  // each value (the std::vector of (timing, call count) pairs,
786  // each entry of which is a different statistic) preserves the
787  // order of statNames.
788  statNames.resize (4);
789  statNames[0] = "MinOverProcs";
790  statNames[1] = "MeanOverProcs";
791  statNames[2] = "MaxOverProcs";
792  statNames[3] = "MeanOverCallCounts";
793 
794  stat_map_type::iterator statIter = statData.end();
795  timer_map_t::const_iterator it = globalTimerData.begin();
796  for (int k = 0; it != globalTimerData.end(); ++k, ++it) {
797  std::vector<std::pair<double, double> > curData (4);
798  curData[0] = minTimingsAndCallCounts[k];
799  curData[1] = std::make_pair (meanOverProcsTimings[k], meanCallCounts[k]);
800  curData[2] = maxTimingsAndCallCounts[k];
801  curData[3] = std::make_pair (meanOverCallCountsTimings[k], meanCallCounts[k]);
802 
803  // statIter gives an insertion location hint that makes each
804  // insertion O(1), since we remember the location of the last
805  // insertion.
806  statIter = statData.insert (statIter, std::make_pair (it->first, curData));
807  }
808  }
809 
810 
827  RCP<const Comm<int> >
828  getDefaultComm ()
829  {
830  // The default communicator. If Trilinos was built with MPI
831  // enabled, this should be MPI_COMM_WORLD. (If MPI has not yet
832  // been initialized, it's not valid to use the communicator!)
833  // Otherwise, this should be a "serial" (no MPI, one "process")
834  // communicator.
835  RCP<const Comm<int> > comm = DefaultComm<int>::getComm ();
836 
837 #ifdef HAVE_MPI
838  {
839  int mpiHasBeenStarted = 0;
840  MPI_Initialized (&mpiHasBeenStarted);
841  if (! mpiHasBeenStarted) {
842  // Make pComm a new "serial communicator."
843  comm = rcp_implicit_cast<const Comm<int> > (rcp (new SerialComm<int> ()));
844  }
845  }
846 #endif // HAVE_MPI
847  return comm;
848  }
849 
850  } // namespace (anonymous)
851 
852 
853  void
855  std::vector<std::string>& statNames,
856  Ptr<const Comm<int> > comm,
857  const ECounterSetOp setOp,
858  const std::string& filter)
859  {
860  // Collect local timer data and names. Filter out timers with
861  // zero call counts if writeZeroTimers is false. Also, apply the
862  // timer label filter at this point, so we don't have to compute
863  // statistics on timers we don't want to display anyway.
864  timer_map_t localTimerData;
865  Array<std::string> localTimerNames;
866  const bool writeZeroTimers = false;
867  collectLocalTimerDataAndNames (localTimerData, localTimerNames,
868  counters(), writeZeroTimers, filter);
869  // Merge the local timer data and names into global timer data and
870  // names.
871  timer_map_t globalTimerData;
872  Array<std::string> globalTimerNames;
873  const bool alwaysWriteLocal = false;
874  collectGlobalTimerData (globalTimerData, globalTimerNames,
875  localTimerData, localTimerNames,
876  comm, alwaysWriteLocal, setOp);
877  // Compute statistics on the data.
878  computeGlobalTimerStats (statData, statNames, comm, globalTimerData, false);
879  }
880 
881 
882  void
884  std::ostream& out,
885  const bool alwaysWriteLocal,
886  const bool writeGlobalStats,
887  const bool writeZeroTimers,
888  const ECounterSetOp setOp,
889  const std::string& filter,
890  const bool ignoreZeroTimers)
891  {
892  //
893  // We can't just call computeGlobalTimerStatistics(), since
894  // summarize() has different options that affect whether global
895  // statistics are computed and printed.
896  //
897  const int numProcs = comm->getSize();
898  const int myRank = comm->getRank();
899 
900  // Collect local timer data and names. Filter out timers with
901  // zero call counts if writeZeroTimers is false. Also, apply the
902  // timer label filter at this point, so we don't have to compute
903  // statistics on timers we don't want to display anyway.
904  timer_map_t localTimerData;
905  Array<std::string> localTimerNames;
906  collectLocalTimerDataAndNames (localTimerData, localTimerNames,
907  counters(), writeZeroTimers, filter);
908 
909  // If we're computing global statistics, merge the local timer
910  // data and names into global timer data and names, and compute
911  // global timer statistics. Otherwise, leave the global data
912  // empty.
913  timer_map_t globalTimerData;
914  Array<std::string> globalTimerNames;
915  stat_map_type statData;
916  std::vector<std::string> statNames;
917  if (writeGlobalStats) {
918  collectGlobalTimerData (globalTimerData, globalTimerNames,
919  localTimerData, localTimerNames,
920  comm, alwaysWriteLocal, setOp);
921  // Compute statistics on the data, but only if the communicator
922  // contains more than one process. Otherwise, statistics don't
923  // make sense and we don't print them (see below).
924  if (numProcs > 1) {
925  computeGlobalTimerStats (statData, statNames, comm, globalTimerData, ignoreZeroTimers);
926  }
927  }
928 
929  // Precision of floating-point numbers in the table.
930  const int precision = format().precision();
931 
932  // All columns of the table, in order.
933  Array<TableColumn> tableColumns;
934 
935  // Labels of all the columns of the table.
936  // We will append to this when we add each column.
937  Array<std::string> titles;
938 
939  // Widths (in number of characters) of each column.
940  // We will append to this when we add each column.
941  Array<int> columnWidths;
942 
943  // Table column containing all timer names. If writeGlobalStats
944  // is true, we use the global timer names, otherwise we use the
945  // local timer names. We build the table on all processes
946  // redundantly, but only print on Rank 0.
947  {
948  titles.append ("Timer Name");
949 
950  // The column labels depend on whether we are computing global statistics.
951  TableColumn nameCol (writeGlobalStats ? globalTimerNames : localTimerNames);
952  tableColumns.append (nameCol);
953 
954  // Each column is as wide as it needs to be to hold both its
955  // title and all of the column data. This column's title is the
956  // current last entry of the titles array.
957  columnWidths.append (format().computeRequiredColumnWidth (titles.back(), nameCol));
958  }
959 
960  // Table column containing local timer stats, if applicable. We
961  // only write local stats if asked, only on MPI Proc 0, and only
962  // if there is more than one MPI process in the communicator
963  // (otherwise local stats == global stats, so we just print the
964  // global stats). In this case, we've padded the local data on
965  // Proc 0 if necessary to match the global timer list, so that the
966  // columns have the same number of rows.
967  if (alwaysWriteLocal && numProcs > 1 && myRank == 0) {
968  titles.append ("Local time (num calls)");
969 
970  // Copy local timer data out of the array-of-structs into
971  // separate arrays, for display in the table.
972  Array<double> localTimings;
973  Array<double> localNumCalls;
974  for (timer_map_t::const_iterator it = localTimerData.begin();
975  it != localTimerData.end(); ++it) {
976  localTimings.push_back (it->second.first);
977  localNumCalls.push_back (static_cast<double> (it->second.second));
978  }
979  TableColumn timeAndCalls (localTimings, localNumCalls, precision, true);
980  tableColumns.append (timeAndCalls);
981  columnWidths.append (format().computeRequiredColumnWidth (titles.back(), timeAndCalls));
982  }
983 
984  if (writeGlobalStats) {
985  // If there's only 1 process in the communicator, don't display
986  // statistics; statistics don't make sense in that case. Just
987  // display the timings and call counts. If there's more than 1
988  // process, do display statistics.
989  if (numProcs == 1) {
990  // Extract timings and the call counts from globalTimerData.
991  Array<double> globalTimings;
992  Array<double> globalNumCalls;
993  for (timer_map_t::const_iterator it = globalTimerData.begin();
994  it != globalTimerData.end(); ++it) {
995  globalTimings.push_back (it->second.first);
996  globalNumCalls.push_back (static_cast<double> (it->second.second));
997  }
998  // Print the table column.
999  titles.append ("Global time (num calls)");
1000  TableColumn timeAndCalls (globalTimings, globalNumCalls, precision, true);
1001  tableColumns.append (timeAndCalls);
1002  columnWidths.append (format().computeRequiredColumnWidth (titles.back(), timeAndCalls));
1003  }
1004  else { // numProcs > 1
1005  // Print a table column for each statistic. statNames and
1006  // each value in statData use the same ordering, so we can
1007  // iterate over valid indices of statNames to display the
1008  // statistics in the right order.
1009  const timer_map_t::size_type numGlobalTimers = globalTimerData.size();
1010  for (std::vector<std::string>::size_type statInd = 0; statInd < statNames.size(); ++statInd) {
1011  // Extract lists of timings and their call counts for the
1012  // current statistic.
1013  Array<double> statTimings (numGlobalTimers);
1014  Array<double> statCallCounts (numGlobalTimers);
1015  stat_map_type::const_iterator it = statData.begin();
1016  for (int k = 0; it != statData.end(); ++it, ++k) {
1017  statTimings[k] = (it->second[statInd]).first;
1018  statCallCounts[k] = (it->second[statInd]).second;
1019  }
1020  // Print the table column.
1021  const std::string& statisticName = statNames[statInd];
1022  const std::string titleString = statisticName;
1023  titles.append (titleString);
1024  TableColumn timeAndCalls (statTimings, statCallCounts, precision, true);
1025  tableColumns.append (timeAndCalls);
1026  columnWidths.append (format().computeRequiredColumnWidth (titles.back(), timeAndCalls));
1027  }
1028  }
1029  }
1030 
1031  // Print the whole table to the given output stream on MPI Rank 0.
1032  format().setColumnWidths (columnWidths);
1033  if (myRank == 0) {
1034  std::ostringstream theTitle;
1035  theTitle << "TimeMonitor results over " << numProcs << " processor"
1036  << (numProcs > 1 ? "s" : "");
1037  format().writeWholeTable (out, theTitle.str(), titles, tableColumns);
1038  }
1039  }
1040 
1041  void
1042  TimeMonitor::summarize (std::ostream &out,
1043  const bool alwaysWriteLocal,
1044  const bool writeGlobalStats,
1045  const bool writeZeroTimers,
1046  const ECounterSetOp setOp,
1047  const std::string& filter,
1048  const bool ignoreZeroTimers)
1049  {
1050  // The default communicator. If Trilinos was built with MPI
1051  // enabled, this should be MPI_COMM_WORLD. Otherwise, this should
1052  // be a "serial" (no MPI, one "process") communicator.
1053  RCP<const Comm<int> > comm = getDefaultComm();
1054 
1055  summarize (comm.ptr(), out, alwaysWriteLocal,
1056  writeGlobalStats, writeZeroTimers, setOp, filter, ignoreZeroTimers);
1057  }
1058 
1059  void
1061  std::vector<std::string>& statNames,
1062  const ECounterSetOp setOp,
1063  const std::string& filter)
1064  {
1065  // The default communicator. If Trilinos was built with MPI
1066  // enabled, this should be MPI_COMM_WORLD. Otherwise, this should
1067  // be a "serial" (no MPI, one "process") communicator.
1068  RCP<const Comm<int> > comm = getDefaultComm();
1069 
1070  computeGlobalTimerStatistics (statData, statNames, comm.ptr(), setOp, filter);
1071  }
1072 
1073 
1074  namespace {
1098  std::string
1099  quoteLabelForYaml (const std::string& label)
1100  {
1101  // YAML allows empty keys in key: value pairs. See Section 7.2
1102  // of the YAML 1.2 spec. We thus let an empty label pass
1103  // through without quoting or other special treatment.
1104  if (label.empty ()) {
1105  return label;
1106  }
1107 
1108  // Check whether the label is already quoted. If so, we don't
1109  // need to quote it again. However, we do need to quote any
1110  // quote symbols in the string inside the outer quotes.
1111  const bool alreadyQuoted = label.size () >= 2 &&
1112  label[0] == '"' && label[label.size() - 1] == '"';
1113 
1114  // We need to quote if there are any colons or (inner) quotes in
1115  // the string. We'll determine this as we read through the
1116  // string and escape any characters that need escaping.
1117  bool needToQuote = false;
1118 
1119  std::string out; // To fill with the return value
1120  out.reserve (label.size ());
1121 
1122  const size_t startPos = alreadyQuoted ? 1 : 0;
1123  const size_t endPos = alreadyQuoted ? label.size () - 1 : label.size ();
1124  for (size_t i = startPos; i < endPos; ++i) {
1125  const char c = label[i];
1126  if (c == '"' || c == '\\') {
1127  out.push_back ('\\'); // Escape the quote or backslash.
1128  needToQuote = true;
1129  }
1130  else if (c == ':') {
1131  needToQuote = true;
1132  }
1133  out.push_back (c);
1134  }
1135 
1136  if (needToQuote || alreadyQuoted) {
1137  // If the input string was already quoted, then out doesn't
1138  // include its quotes, so we have to add them back in.
1139  return "\"" + out + "\"";
1140  }
1141  else {
1142  return out;
1143  }
1144  }
1145 
1146  } // namespace (anonymous)
1147 
1148 
1149  void TimeMonitor::
1150  summarizeToYaml (Ptr<const Comm<int> > comm,
1151  std::ostream &out,
1152  const ETimeMonitorYamlFormat yamlStyle,
1153  const std::string& filter)
1154  {
1155  using Teuchos::FancyOStream;
1156  using Teuchos::fancyOStream;
1157  using Teuchos::getFancyOStream;
1158  using Teuchos::OSTab;
1159  using Teuchos::RCP;
1160  using Teuchos::rcpFromRef;
1161  using std::endl;
1162  typedef std::vector<std::string>::size_type size_type;
1163 
1164  const bool compact = (yamlStyle == YAML_FORMAT_COMPACT);
1165 
1166  // const bool writeGlobalStats = true;
1167  // const bool writeZeroTimers = true;
1168  // const bool alwaysWriteLocal = false;
1169  const ECounterSetOp setOp = Intersection;
1170 
1171  stat_map_type statData;
1172  std::vector<std::string> statNames;
1173  computeGlobalTimerStatistics (statData, statNames, comm, setOp, filter);
1174 
1175  const int numProcs = comm->getSize();
1176 
1177  // HACK (mfh 20 Aug 2012) For some reason, creating OSTab with "-
1178  // " as the line prefix does not work, else I would prefer that
1179  // method for printing each line of a YAML block sequence (see
1180  // Section 8.2.1 of the YAML 1.2 spec).
1181  //
1182  // Also, I have to set the tab indent string here, rather than in
1183  // OSTab's constructor. This is because line prefix (which for
1184  // some reason is what OSTab's constructor takes, rather than tab
1185  // indent string) means something different from tab indent
1186  // string, and turning on the line prefix prints all sorts of
1187  // things including "|" for some reason.
1188  RCP<FancyOStream> pfout = getFancyOStream (rcpFromRef (out));
1189  pfout->setTabIndentStr (" ");
1190  FancyOStream& fout = *pfout;
1191 
1192  fout << "# Teuchos::TimeMonitor report" << endl
1193  << "---" << endl;
1194 
1195  // mfh 19 Aug 2012: An important goal of our chosen output format
1196  // was to minimize the nesting depth. We have managed to keep the
1197  // nesting depth to 3, which is the limit that the current version
1198  // of PylotDB imposes for its YAML input.
1199 
1200  // Outermost level is a dictionary. (Individual entries of a
1201  // dictionary do _not_ begin with "- ".) We always print the
1202  // outermost level in standard style, not flow style, for better
1203  // readability. We begin the outermost level with metadata.
1204  fout << "Output mode: " << (compact ? "compact" : "spacious") << endl
1205  << "Number of processes: " << numProcs << endl
1206  << "Time unit: s" << endl;
1207  // For a key: value pair where the value is a sequence or
1208  // dictionary on the following line, YAML requires a space after
1209  // the colon.
1210  fout << "Statistics collected: ";
1211  // Print list of the names of all the statistics we collected.
1212  if (compact) {
1213  fout << " [";
1214  for (size_type i = 0; i < statNames.size (); ++i) {
1215  fout << quoteLabelForYaml (statNames[i]);
1216  if (i + 1 < statNames.size ()) {
1217  fout << ", ";
1218  }
1219  }
1220  fout << "]" << endl;
1221  }
1222  else {
1223  fout << endl;
1224  OSTab tab1 (pfout);
1225  for (size_type i = 0; i < statNames.size (); ++i) {
1226  fout << "- " << quoteLabelForYaml (statNames[i]) << endl;
1227  }
1228  }
1229 
1230  // Print the list of timer names.
1231  //
1232  // It might be nicer instead to print a map from timer name to all
1233  // of its data, but keeping the maximum nesting depth small
1234  // ensures better compatibility with different parsing tools.
1235  fout << "Timer names: ";
1236  if (compact) {
1237  fout << " [";
1238  size_type ind = 0;
1239  for (stat_map_type::const_iterator it = statData.begin();
1240  it != statData.end(); ++it, ++ind) {
1241  fout << quoteLabelForYaml (it->first);
1242  if (ind + 1 < statData.size ()) {
1243  fout << ", ";
1244  }
1245  }
1246  fout << "]" << endl;
1247  }
1248  else {
1249  fout << endl;
1250  OSTab tab1 (pfout);
1251  for (stat_map_type::const_iterator it = statData.begin();
1252  it != statData.end(); ++it) {
1253  fout << "- " << quoteLabelForYaml (it->first) << endl;
1254  }
1255  }
1256 
1257  // Print times for each timer, as a map from statistic name to its time.
1258  fout << "Total times: ";
1259  if (compact) {
1260  fout << " {";
1261  size_type outerInd = 0;
1262  for (stat_map_type::const_iterator outerIter = statData.begin();
1263  outerIter != statData.end(); ++outerIter, ++outerInd) {
1264  // Print timer name.
1265  fout << quoteLabelForYaml (outerIter->first) << ": ";
1266  // Print that timer's data.
1267  const std::vector<std::pair<double, double> >& curData = outerIter->second;
1268  fout << "{";
1269  for (size_type innerInd = 0; innerInd < curData.size (); ++innerInd) {
1270  fout << quoteLabelForYaml (statNames[innerInd]) << ": "
1271  << curData[innerInd].first;
1272  if (innerInd + 1 < curData.size ()) {
1273  fout << ", ";
1274  }
1275  }
1276  fout << "}";
1277  if (outerInd + 1 < statData.size ()) {
1278  fout << ", ";
1279  }
1280  }
1281  fout << "}" << endl;
1282  }
1283  else {
1284  fout << endl;
1285  OSTab tab1 (pfout);
1286  size_type outerInd = 0;
1287  for (stat_map_type::const_iterator outerIter = statData.begin();
1288  outerIter != statData.end(); ++outerIter, ++outerInd) {
1289  // Print timer name.
1290  fout << quoteLabelForYaml (outerIter->first) << ": " << endl;
1291  // Print that timer's data.
1292  OSTab tab2 (pfout);
1293  const std::vector<std::pair<double, double> >& curData = outerIter->second;
1294  for (size_type innerInd = 0; innerInd < curData.size (); ++innerInd) {
1295  fout << quoteLabelForYaml (statNames[innerInd]) << ": "
1296  << curData[innerInd].first << endl;
1297  }
1298  }
1299  }
1300 
1301  // Print call counts for each timer, for each statistic name.
1302  fout << "Call counts:";
1303  if (compact) {
1304  fout << " {";
1305  size_type outerInd = 0;
1306  for (stat_map_type::const_iterator outerIter = statData.begin();
1307  outerIter != statData.end(); ++outerIter, ++outerInd) {
1308  // Print timer name.
1309  fout << quoteLabelForYaml (outerIter->first) << ": ";
1310  // Print that timer's data.
1311  const std::vector<std::pair<double, double> >& curData = outerIter->second;
1312  fout << "{";
1313  for (size_type innerInd = 0; innerInd < curData.size (); ++innerInd) {
1314  fout << quoteLabelForYaml (statNames[innerInd]) << ": "
1315  << curData[innerInd].second;
1316  if (innerInd + 1 < curData.size ()) {
1317  fout << ", ";
1318  }
1319  }
1320  fout << "}";
1321  if (outerInd + 1 < statData.size ()) {
1322  fout << ", ";
1323  }
1324  }
1325  fout << "}" << endl;
1326  }
1327  else {
1328  fout << endl;
1329  OSTab tab1 (pfout);
1330  size_type outerInd = 0;
1331  for (stat_map_type::const_iterator outerIter = statData.begin();
1332  outerIter != statData.end(); ++outerIter, ++outerInd) {
1333  // Print timer name.
1334  fout << quoteLabelForYaml (outerIter->first) << ": " << endl;
1335  // Print that timer's data.
1336  OSTab tab2 (pfout);
1337  const std::vector<std::pair<double, double> >& curData = outerIter->second;
1338  for (size_type innerInd = 0; innerInd < curData.size (); ++innerInd) {
1339  fout << quoteLabelForYaml (statNames[innerInd]) << ": "
1340  << curData[innerInd].second << endl;
1341  }
1342  }
1343  }
1344  }
1345 
1346  void TimeMonitor::
1347  summarizeToYaml (std::ostream &out,
1348  const ETimeMonitorYamlFormat yamlStyle,
1349  const std::string& filter)
1350  {
1351  // The default communicator. If Trilinos was built with MPI
1352  // enabled, this should be MPI_COMM_WORLD. Otherwise, this should
1353  // be a "serial" (no MPI, one "process") communicator.
1354  RCP<const Comm<int> > comm = getDefaultComm ();
1355 
1356  summarizeToYaml (comm.ptr (), out, yamlStyle, filter);
1357  }
1358 
1359  // Default value is false. We'll set to true once
1360  // setReportParameters() completes successfully.
1361  bool TimeMonitor::setParams_ = false;
1362 
1363  // We have to declare all of these here in order to avoid linker errors.
1364  TimeMonitor::ETimeMonitorReportFormat TimeMonitor::reportFormat_ = TimeMonitor::REPORT_FORMAT_TABLE;
1365  TimeMonitor::ETimeMonitorYamlFormat TimeMonitor::yamlStyle_ = TimeMonitor::YAML_FORMAT_SPACIOUS;
1366  ECounterSetOp TimeMonitor::setOp_ = Intersection;
1367  bool TimeMonitor::alwaysWriteLocal_ = false;
1368  bool TimeMonitor::writeGlobalStats_ = true;
1369  bool TimeMonitor::writeZeroTimers_ = true;
1370 
1371  void
1372  TimeMonitor::setReportFormatParameter (ParameterList& plist)
1373  {
1374  const std::string name ("Report format");
1375  const std::string defaultValue ("Table");
1376  const std::string docString ("Output format for report of timer statistics");
1377  Array<std::string> strings;
1378  Array<std::string> docs;
1379  Array<ETimeMonitorReportFormat> values;
1380 
1381  strings.push_back ("YAML");
1382  docs.push_back ("YAML (see yaml.org) format");
1383  values.push_back (REPORT_FORMAT_YAML);
1384  strings.push_back ("Table");
1385  docs.push_back ("Tabular format via Teuchos::TableFormat");
1386  values.push_back (REPORT_FORMAT_TABLE);
1387 
1388  setStringToIntegralParameter<ETimeMonitorReportFormat> (name, defaultValue,
1389  docString,
1390  strings (), docs (),
1391  values (), &plist);
1392  }
1393 
1394  void
1395  TimeMonitor::setYamlFormatParameter (ParameterList& plist)
1396  {
1397  const std::string name ("YAML style");
1398  const std::string defaultValue ("spacious");
1399  const std::string docString ("YAML-specific output format");
1400  Array<std::string> strings;
1401  Array<std::string> docs;
1402  Array<ETimeMonitorYamlFormat> values;
1403 
1404  strings.push_back ("compact");
1405  docs.push_back ("Compact format: use \"flow style\" (see YAML 1.2 spec at "
1406  "yaml.org) for most sequences except the outermost sequence");
1407  values.push_back (YAML_FORMAT_COMPACT);
1408 
1409  strings.push_back ("spacious");
1410  docs.push_back ("Spacious format: avoid flow style");
1411  values.push_back (YAML_FORMAT_SPACIOUS);
1412 
1413  setStringToIntegralParameter<ETimeMonitorYamlFormat> (name, defaultValue,
1414  docString,
1415  strings (), docs (),
1416  values (), &plist);
1417  }
1418 
1419  void
1420  TimeMonitor::setSetOpParameter (ParameterList& plist)
1421  {
1422  const std::string name ("How to merge timer sets");
1423  const std::string defaultValue ("Intersection");
1424  const std::string docString ("How to merge differing sets of timers "
1425  "across processes");
1426  Array<std::string> strings;
1427  Array<std::string> docs;
1428  Array<ECounterSetOp> values;
1429 
1430  strings.push_back ("Intersection");
1431  docs.push_back ("Compute intersection of timer sets over processes");
1432  values.push_back (Intersection);
1433  strings.push_back ("Union");
1434  docs.push_back ("Compute union of timer sets over processes");
1435  values.push_back (Union);
1436 
1437  setStringToIntegralParameter<ECounterSetOp> (name, defaultValue, docString,
1438  strings (), docs (), values (),
1439  &plist);
1440  }
1441 
1442  RCP<const ParameterList>
1444  {
1445  // Our implementation favors recomputation over persistent
1446  // storage. That is, we simply recreate the list every time we
1447  // need it.
1448  RCP<ParameterList> plist = parameterList ("TimeMonitor::report");
1449 
1450  const bool alwaysWriteLocal = false;
1451  const bool writeGlobalStats = true;
1452  const bool writeZeroTimers = true;
1453 
1454  setReportFormatParameter (*plist);
1455  setYamlFormatParameter (*plist);
1456  setSetOpParameter (*plist);
1457  plist->set ("alwaysWriteLocal", alwaysWriteLocal,
1458  "Always output local timers' values on Proc 0");
1459  plist->set ("writeGlobalStats", writeGlobalStats, "Always output global "
1460  "statistics, even if there is only one process in the "
1461  "communicator");
1462  plist->set ("writeZeroTimers", writeZeroTimers, "Generate output for "
1463  "timers that have never been called");
1464 
1465  return rcp_const_cast<const ParameterList> (plist);
1466  }
1467 
1468  void
1469  TimeMonitor::setReportParameters (const RCP<ParameterList>& params)
1470  {
1471  ETimeMonitorReportFormat reportFormat = REPORT_FORMAT_TABLE;
1472  ETimeMonitorYamlFormat yamlStyle = YAML_FORMAT_SPACIOUS;
1473  ECounterSetOp setOp = Intersection;
1474  bool alwaysWriteLocal = false;
1475  bool writeGlobalStats = true;
1476  bool writeZeroTimers = true;
1477 
1478  if (params.is_null ()) {
1479  // If we've set parameters before, leave their current values.
1480  // Otherwise, set defaults (below).
1481  if (setParams_) {
1482  return;
1483  }
1484  }
1485  else { // params is nonnull. Let's read it!
1486  params->validateParametersAndSetDefaults (*getValidReportParameters ());
1487 
1488  reportFormat = getIntegralValue<ETimeMonitorReportFormat> (*params, "Report format");
1489  yamlStyle = getIntegralValue<ETimeMonitorYamlFormat> (*params, "YAML style");
1490  setOp = getIntegralValue<ECounterSetOp> (*params, "How to merge timer sets");
1491  alwaysWriteLocal = params->get<bool> ("alwaysWriteLocal");
1492  writeGlobalStats = params->get<bool> ("writeGlobalStats");
1493  writeZeroTimers = params->get<bool> ("writeZeroTimers");
1494  }
1495  // Defer setting state until here, to ensure the strong exception
1496  // guarantee for this method (either it throws with no externally
1497  // visible state changes, or it returns normally).
1498  reportFormat_ = reportFormat;
1499  yamlStyle_ = yamlStyle;
1500  setOp_ = setOp;
1501  alwaysWriteLocal_ = alwaysWriteLocal;
1502  writeGlobalStats_ = writeGlobalStats;
1503  writeZeroTimers_ = writeZeroTimers;
1504 
1505  setParams_ = true; // Yay, we successfully set parameters!
1506  }
1507 
1508  void
1510  std::ostream& out,
1511  const std::string& filter,
1512  const RCP<ParameterList>& params)
1513  {
1514  setReportParameters (params);
1515 
1516  if (reportFormat_ == REPORT_FORMAT_YAML) {
1517  summarizeToYaml (comm, out, yamlStyle_, filter);
1518  }
1519  else if (reportFormat_ == REPORT_FORMAT_TABLE) {
1520  summarize (comm, out, alwaysWriteLocal_, writeGlobalStats_,
1521  writeZeroTimers_, setOp_, filter);
1522  }
1523  else {
1524  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error, "TimeMonitor::report: "
1525  "Invalid report format. This should never happen; ParameterList "
1526  "validation should have caught this. Please report this bug to the "
1527  "Teuchos developers.");
1528  }
1529  }
1530 
1531  void
1533  std::ostream& out,
1534  const RCP<ParameterList>& params)
1535  {
1536  report (comm, out, "", params);
1537  }
1538 
1539  void
1540  TimeMonitor::report (std::ostream& out,
1541  const std::string& filter,
1542  const RCP<ParameterList>& params)
1543  {
1544  RCP<const Comm<int> > comm = getDefaultComm ();
1545  report (comm.ptr (), out, filter, params);
1546  }
1547 
1548  void
1549  TimeMonitor::report (std::ostream& out,
1550  const RCP<ParameterList>& params)
1551  {
1552  RCP<const Comm<int> > comm = getDefaultComm ();
1553  report (comm.ptr (), out, "", params);
1554  }
1555 
1556 } // namespace Teuchos
Array< T > & append(const T &x)
Add a new entry at the end of the array.
std::map< std::string, std::vector< std::pair< double, double > > > stat_map_type
Global statistics collected from timer data.
basic_OSTab< char > OSTab
void setColumnWidths(const Array< int > &colWidths)
Set the column widths to be used for subsequent rows.
TimeMonitor(Time &timer, bool reset=false)
Constructor: starts the timer.
~TimeMonitor()
Destructor: stops the timer.
static RCP< Time > lookupCounter(const std::string &name)
Return the first counter with the given name, or null if none.
basic_FancyOStream< char > FancyOStream
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Macro for throwing an exception with breakpointing to ease debugging.
bool isRecursiveCall() const
Whether we are currently in a recursive call of the counter.
static void disableTimer(const std::string &name)
Disable the timer with the given name.
Teuchos version of MPI_MINLOC.
static Teuchos::RCP< const Comm< OrdinalType > > getComm()
Return the default global communicator.
static RCP< const ParameterList > getValidReportParameters()
Default parameters (with validators) for report().
T * get() const
Get the raw C++ pointer to the underlying object.
This structure defines some basic traits for a scalar field type.
Teuchos version of MPI_MAXLOC.
bool is_null() const
Returns true if the underlying pointer is null.
Base interface class for user-defined reduction operations for objects that use value semantics...
void start(bool reset=false)
Start the timer, if the timer is enabled (see disable()).
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Deprecated.
A column of TableEntry objects.
static void summarize(Ptr< const Comm< int > > comm, std::ostream &out=std::cout, const bool alwaysWriteLocal=false, const bool writeGlobalStats=true, const bool writeZeroTimers=true, const ECounterSetOp setOp=Intersection, const std::string &filter="", const bool ignoreZeroTimers=false)
Print summary statistics for all timers on the given communicator.
double stop()
Stop the timer, if the timer is enabled (see disable()).
Wall-clock timer.
size_type size() const
static void computeGlobalTimerStatistics(stat_map_type &statData, std::vector< std::string > &statNames, Ptr< const Comm< int > > comm, const ECounterSetOp setOp=Intersection, const std::string &filter="")
Compute global timer statistics for all timers on the given communicator.
void writeWholeTable(std::ostream &out, const std::string &tableTitle, const Array< std::string > &columnNames, const Array< TableColumn > &columns) const
Provides utilities for formatting tabular output.
TEUCHOS_DEPRECATED void reduceAll(const Comm< Ordinal > &comm, const EReductionType reductType, const Packet &send, Packet *globalReduct)
Deprecated .
void mergeCounterNames(const Comm< int > &comm, const Array< std::string > &localNames, Array< std::string > &globalNames, const ECounterSetOp setOp)
Merge counter names over all processors.
void reduce(const Ordinal count, const std::pair< ScalarType, IndexType > inBuffer[], std::pair< ScalarType, IndexType > inoutBuffer[]) const
A list of parameters of arbitrary type.
std::vector< T >::const_iterator const_iterator
The type of a const forward iterator.
int precision() const
Get the precision for writing doubles. Default is 4.
static std::map< std::string, RCP< Time > > & counters()
Array of all counters that were created with getNewCounter() on the calling (MPI) process...
reference back()
Abstract interface for distributed-memory communication.
void push_back(const value_type &x)
The Teuchos namespace contains all of the classes, structs and enums used by Teuchos, as well as a number of utility routines.
Defines basic traits for the scalar field type.
static T zero()
Returns representation of zero for this scalar type.
Scope protection wrapper for Teuchos::Time, with timer reporting functionality.
Smart reference counting pointer class for automatic garbage collection.
const Time & counter() const
Constant access to the instance&#39;s counter reference.
void reduce(const Ordinal count, const std::pair< ScalarType, IndexType > inBuffer[], std::pair< ScalarType, IndexType > inoutBuffer[]) const
ECounterSetOp
Set operation type for mergeCounterNames() to perform.
Common capabilities for collecting and reporting performance data across processors.
void reduce(const Ordinal count, const std::pair< ScalarType, IndexType > inBuffer[], std::pair< ScalarType, IndexType > inoutBuffer[]) const
same as MinLoc, but don&#39;t allow zero
Ptr< T > ptr() const
Get a safer wrapper raw C++ pointer to the underlying object.
Simple wrapper class for raw pointers to single objects where no persisting relationship exists...
static void enableTimer(const std::string &name)
Enable the timer with the given name.
static TableFormat & format()
Table format that will be used to print a summary of timer results.
static void zeroOutTimers()
Reset all global timers to zero.
static void report(Ptr< const Comm< int > > comm, std::ostream &out, const std::string &filter, const RCP< ParameterList > &params=null)
Report timer statistics to the given output stream.