48 #ifdef HAVE_TEUCHOS_BOOST 49 # include "boost/shared_ptr.hpp" 62 double relCpuSpeed = 1e-2;
63 int maxArraySize = 10000;
64 double maxRcpRawCreateDestroyRatio = 10.0;
65 double maxRcpRawAdjustRefCountRatio = 100.0;
66 double maxRcpSpAdjustRefCountRatio = 5.0;
67 double maxRcpRawObjAccessRatio = 13.5;
69 const int intPrec = 8;
70 const int dblPrec = 6;
78 "rel-cpu-speed", &relCpuSpeed,
79 "The relative speed of the CPU (higher means the machine runs faster)" 82 "max-array-size", &maxArraySize,
83 "The maximum size of the arrays created" 86 "max-rcp-create-destroy-ratio", &maxRcpRawCreateDestroyRatio,
87 "The ratio of the final CPU time ratio of creating and destroying" 88 "std::vector<char>(size) objects wrapped in an RCP object versus" 89 "using just raw new and delete." 92 "max-rcp-raw-adjust-ref-count-ratio", &maxRcpRawAdjustRefCountRatio,
93 "The ratio of the final CPU time ratio for adjusting the reference" 94 "count of RCP objects versus a raw pointer." 97 "max-rcp-sp-adjust-ref-count-ratio", &maxRcpSpAdjustRefCountRatio,
98 "The ratio of the final CPU time ratio for adjusting the reference" 99 "count of RCP objects versus boost::shared_ptr objects." 102 "max-rcp-raw-obj-access-ratio", &maxRcpRawObjAccessRatio,
103 "The ratio of the final CPU time ratio for accessing the object for RCP" 104 "versus a raw pointer." 111 struct DeleteDeleter {};
116 out <<
"\nPrinting the size the RCP and RCPNodeImpl objects ...\n";
129 #ifdef HAVE_TEUCHOS_BOOST 134 sizeof(boost::detail::sp_counted_impl_pd<std::vector<double>,
135 DeleteDeleter<std::vector<double> > >),
146 const int maxLoopIters = 1000;
147 const double relTestCost = 1e-3;
148 const double numInnerLoops = relCpuSpeed / relTestCost;
151 <<
"Messuring the overhead of creating and destorying objects of different sizes\n" 152 <<
"using raw C++ pointers, shared_ptr, and using RCP.\n" 154 <<
"Number of loops = relCpuSpeed/relTestCost = " 155 << relCpuSpeed <<
"/" << relTestCost <<
" = " << numInnerLoops <<
"\n" 158 TabularOutputter outputter(out);
159 outputter.setFieldTypePrecision(TO::DOUBLE, dblPrec);
160 outputter.setFieldTypePrecision(TO::INT, intPrec);
162 outputter.pushFieldSpec(
"obj size", TO::INT);
163 outputter.pushFieldSpec(
"num loops", TO::INT);
164 outputter.pushFieldSpec(
"raw", TO::DOUBLE);
165 #ifdef HAVE_TEUCHOS_BOOST 166 outputter.pushFieldSpec(
"shared_ptr", TO::DOUBLE);
168 outputter.pushFieldSpec(
"RCP", TO::DOUBLE);
169 #ifdef HAVE_TEUCHOS_BOOST 170 outputter.pushFieldSpec(
"shared_ptr/raw", TO::DOUBLE);
172 outputter.pushFieldSpec(
"RCP/raw", TO::DOUBLE);
174 outputter.outputHeader();
176 double finalRcpRawRatio = 100000.0;
179 for (
int test_case_k = 0;
180 test_case_k < maxLoopIters && arraySize <= maxArraySize;
186 outputter.outputField(arraySize);
189 const int numActualLoops =
192 (numInnerLoops / arraySize)
193 * std::log(static_cast<double>(arraySize+1))
197 outputter.outputField(numActualLoops);
201 std::vector<std::vector<char>*> p_raw_vec(numActualLoops);
205 p_raw_vec[i] =
new std::vector<char>(arraySize, 1);
212 #ifdef HAVE_TEUCHOS_BOOST 215 typedef boost::shared_ptr<std::vector<char> > shared_ptr_t;
216 std::vector<shared_ptr_t > sp_vec(numActualLoops);
220 sp_vec[i] = shared_ptr_t(
new std::vector<char>(arraySize, 1));
230 std::vector<RCP<std::vector<char> > > p_vec(numActualLoops);
234 p_vec[i] =
rcp(
new std::vector<char>(arraySize, 1));
240 #ifdef HAVE_TEUCHOS_BOOST 242 const double spRatio = spTime / rawPtrTime;
243 outputter.outputField(spRatio);
247 const double rcpRatio = rcpTime / rawPtrTime;
248 outputter.outputField(rcpRatio);
253 finalRcpRawRatio =
TEUCHOS_MIN(rcpRatio, finalRcpRawRatio);
258 TEST_COMPARE( finalRcpRawRatio, <=, maxRcpRawCreateDestroyRatio );
269 const double relTestCost = 5e-3;
270 const int maxLoopIters = 1000;
271 const double numInnerLoops = relCpuSpeed / relTestCost;
274 <<
"Messuring the overhead of incrementing and deincrementing the reference count\n" 275 <<
"comparing RCP to raw pointer and boost::shared_ptr.\n" 278 TabularOutputter outputter(out);
279 outputter.setFieldTypePrecision(TO::DOUBLE, dblPrec);
280 outputter.setFieldTypePrecision(TO::INT, intPrec);
282 outputter.pushFieldSpec(
"array dim", TO::INT);
283 outputter.pushFieldSpec(
"num loops", TO::INT);
284 outputter.pushFieldSpec(
"raw", TO::DOUBLE);
285 outputter.pushFieldSpec(
"shared_ptr", TO::DOUBLE);
286 outputter.pushFieldSpec(
"RCP", TO::DOUBLE);
287 outputter.pushFieldSpec(
"RCP/raw", TO::DOUBLE);
288 outputter.pushFieldSpec(
"RCP/shared_ptr", TO::DOUBLE);
290 outputter.outputHeader();
292 double finalRcpRawRatio = 100000.0;
293 double finalRcpSpRatio = 100000.0;
298 test_case_k < maxLoopIters && arraySize <= maxArraySize;
304 outputter.outputField(arraySize);
307 const int numActualLoops =
310 (numInnerLoops / arraySize)
311 * std::log(static_cast<double>(arraySize+1))
315 outputter.outputField(numActualLoops);
319 char dummy_char =
'n';
320 std::vector<char*> p_raw_vec(arraySize);
323 for (
int i=0; i < arraySize; ++i) {
324 p_raw_vec[i] = &dummy_char;
330 #ifdef HAVE_TEUCHOS_BOOST 333 typedef boost::shared_ptr<char> shared_ptr_t;
334 shared_ptr_t sp(
new char(
'n'));
335 std::vector<shared_ptr_t> sp_vec(arraySize);
338 for (
int i=0; i < arraySize; ++i) {
345 outputter.outputField(
"-");
350 RCP<char> p(
new char(
'n'));
351 std::vector<RCP<char> > p_vec(arraySize);
354 for (
int i=0; i < arraySize; ++i) {
365 const double rcpRawRatio = rcpTime / rawPtrTime;
366 finalRcpRawRatio =
TEUCHOS_MIN(rcpRawRatio, finalRcpRawRatio);
367 outputter.outputField(rcpRawRatio);
369 #ifdef HAVE_TEUCHOS_BOOST 371 const double rcpSpRatio = rcpTime / spTime;
372 finalRcpSpRatio =
TEUCHOS_MIN(rcpSpRatio, finalRcpSpRatio);
373 outputter.outputField(rcpSpRatio);
375 outputter.outputField(
"-");
385 TEST_COMPARE( finalRcpRawRatio, <=, maxRcpRawAdjustRefCountRatio );
386 #ifdef HAVE_TEUCHOS_BOOST 388 TEST_COMPARE( finalRcpSpRatio, <=, maxRcpSpAdjustRefCountRatio );
391 (void)finalRcpSpRatio;
402 const double relTestCost = 1e-4;
403 const int maxLoopIters = 1000;
404 const double numInnerLoops = relCpuSpeed / relTestCost;
407 <<
"Messuring the overhead of dereferencing RCP, shared_ptr and a raw pointer.\n" 410 TabularOutputter outputter(out);
411 outputter.setFieldTypePrecision(TO::DOUBLE, dblPrec);
412 outputter.setFieldTypePrecision(TO::INT, intPrec);
414 outputter.pushFieldSpec(
"array dim", TO::INT);
415 outputter.pushFieldSpec(
"num loops", TO::INT);
416 outputter.pushFieldSpec(
"raw", TO::DOUBLE);
417 outputter.pushFieldSpec(
"shared_ptr", TO::DOUBLE);
418 outputter.pushFieldSpec(
"RCP", TO::DOUBLE);
419 outputter.pushFieldSpec(
"RCP/raw", TO::DOUBLE);
420 outputter.pushFieldSpec(
"RCP/shared_ptr", TO::DOUBLE);
422 outputter.outputHeader();
424 double finalRcpRawRatio = 100000.0;
426 const int dummy_int_val = 1;
427 int overall_dummy_int_out = 0;
432 test_case_k < maxLoopIters && arraySize <= maxArraySize;
438 outputter.outputField(arraySize);
441 const int numActualLoops =
444 (numInnerLoops / arraySize)
445 * std::log(static_cast<double>(arraySize+1))
449 outputter.outputField(numActualLoops);
451 int dummy_int_out = 0;
455 int dummy_int = dummy_int_val;
456 std::vector<int*> p_raw_vec(arraySize);
457 for (
int i=0; i < arraySize; ++i) {
458 p_raw_vec[i] = &dummy_int;
463 for (
int i=0; i < arraySize; ++i) {
464 dummy_int_out += *p_raw_vec[i];
469 overall_dummy_int_out += dummy_int_out;
472 #ifdef HAVE_TEUCHOS_BOOST 474 typedef boost::shared_ptr<int> shared_ptr_t;
475 shared_ptr_t sp(
new int(dummy_int_val));
476 std::vector<shared_ptr_t> sp_vec(arraySize);
477 for (
int i=0; i < arraySize; ++i) {
483 for (
int i=0; i < arraySize; ++i) {
484 dummy_int_out += *sp_vec[i];
489 overall_dummy_int_out += dummy_int_out;
491 outputter.outputField(
"-");
496 RCP<int> p(
new int(dummy_int_val));
497 std::vector<RCP<int> > p_vec(arraySize);
498 for (
int i=0; i < arraySize; ++i) {
504 for (
int i=0; i < arraySize; ++i) {
505 dummy_int_out += *p_vec[i];
510 overall_dummy_int_out += dummy_int_out;
513 const double rcpRawRatio = rcpTime / rawPtrTime;
514 finalRcpRawRatio =
TEUCHOS_MIN(rcpRawRatio, finalRcpRawRatio);
515 outputter.outputField(rcpRawRatio);
517 #ifdef HAVE_TEUCHOS_BOOST 519 const double rcpSpRatio = rcpTime / spTime;
520 outputter.outputField(rcpSpRatio);
522 outputter.outputField(
"-");
532 TEST_COMPARE( finalRcpRawRatio, <=, maxRcpRawObjAccessRatio );
537 if (overall_dummy_int_out == 0)
544 SomeStruct(
int member_in) : member(member_in) {}
554 const double relTestCost = 1e-4;
555 const int maxLoopIters = 1000;
556 const double numInnerLoops = relCpuSpeed / relTestCost;
559 <<
"Messuring the overhead of dereferencing RCP, shared_ptr and a raw pointer.\n" 562 TabularOutputter outputter(out);
563 outputter.setFieldTypePrecision(TO::DOUBLE, dblPrec);
564 outputter.setFieldTypePrecision(TO::INT, intPrec);
566 outputter.pushFieldSpec(
"array dim", TO::INT);
567 outputter.pushFieldSpec(
"num loops", TO::INT);
568 outputter.pushFieldSpec(
"raw", TO::DOUBLE);
569 outputter.pushFieldSpec(
"shared_ptr", TO::DOUBLE);
570 outputter.pushFieldSpec(
"RCP", TO::DOUBLE);
571 outputter.pushFieldSpec(
"RCP/raw", TO::DOUBLE);
572 outputter.pushFieldSpec(
"RCP/shared_ptr", TO::DOUBLE);
574 outputter.outputHeader();
576 double finalRcpRawRatio = 100000.0;
578 const int dummy_int_val = 1;
579 int overall_dummy_int_out = 0;
583 test_case_k < maxLoopIters && arraySize <= maxArraySize;
589 outputter.outputField(arraySize);
592 const int numActualLoops =
595 (numInnerLoops / arraySize)
596 * std::log(static_cast<double>(arraySize+1))
600 outputter.outputField(numActualLoops);
602 int dummy_int_out = 0;
606 SomeStruct dummy_SomeStruct(dummy_int_val);
607 std::vector<SomeStruct*> p_raw_vec(arraySize);
608 for (
int i=0; i < arraySize; ++i) {
609 p_raw_vec[i] = &dummy_SomeStruct;
614 for (
int i=0; i < arraySize; ++i) {
615 dummy_int_out += p_raw_vec[i]->member;
620 overall_dummy_int_out += dummy_int_out;
623 #ifdef HAVE_TEUCHOS_BOOST 625 typedef boost::shared_ptr<SomeStruct> shared_ptr_t;
626 shared_ptr_t sp(
new SomeStruct(dummy_int_val));
627 std::vector<shared_ptr_t> sp_vec(arraySize);
628 for (
int i=0; i < arraySize; ++i) {
634 for (
int i=0; i < arraySize; ++i) {
635 dummy_int_out += sp_vec[i]->member;
640 overall_dummy_int_out += dummy_int_out;
642 outputter.outputField(
"-");
647 RCP<SomeStruct> p(
new SomeStruct(dummy_int_val));
648 std::vector<RCP<SomeStruct> > p_vec(arraySize);
649 for (
int i=0; i < arraySize; ++i) {
655 for (
int i=0; i < arraySize; ++i) {
656 dummy_int_out += p_vec[i]->member;
661 overall_dummy_int_out += dummy_int_out;
664 const double rcpRawRatio = rcpTime / rawPtrTime;
665 finalRcpRawRatio =
TEUCHOS_MIN(rcpRawRatio, finalRcpRawRatio);
666 outputter.outputField(rcpRawRatio);
668 #ifdef HAVE_TEUCHOS_BOOST 670 const double rcpSpRatio = rcpTime / spTime;
671 outputter.outputField(rcpSpRatio);
673 outputter.outputField(
"-");
683 TEST_COMPARE( finalRcpRawRatio, <=, maxRcpRawObjAccessRatio );
688 if (overall_dummy_int_out == 0)
RCP< T > rcp(const boost::shared_ptr< T > &sptr)
Conversion function that takes in a boost::shared_ptr object and spits out a Teuchos::RCP object...
ERCPStrength
Used to specify if the pointer is weak or strong.
#define TEST_INEQUALITY_CONST(v1, v2)
Assert the inequality of v1 and constant v2.
static CommandLineProcessor & getCLP()
Return the CLP to add options to.
#define TEST_COMPARE(v1, comp, v2)
Assert that v1 comp v2 (where comp = '==', '>=", "!=", etc).
#define TEUCHOS_UNIT_TEST(TEST_GROUP, TEST_NAME)
Macro for defining a (non-templated) unit test.
Utility class that makes it easy to create formatted tables of output.
Policy class for deallocator that uses delete to delete a pointer which is used by RCP...
Node class to keep track of address and the reference count for a reference-counted utility class and...
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Deprecated.
Templated implementation class of RCPNode that has the responsibility for deleting the reference-coun...
void setOption(const char option_true[], const char option_false[], bool *option_val, const char documentation[]=NULL)
Set a boolean option.
#define TEUCHOS_START_PERF_OUTPUT_TIMER_INNERLOOP(OUTPUTTER, NUMLOOPS, NUMINNERLOOPS)
Start a timer block using a TabularOutputter object .
#define TEUCHOS_END_PERF_OUTPUT_TIMER(OUTPUTTER, VARNAME)
End a timer block, output the time field to a TabularOutputter object, and set a variable with the ti...
#define TEUCHOS_MAX(x, y)
Handle class that manages the RCPNode's reference counting.
Smart reference counting pointer class for automatic garbage collection.
#define TEUCHOS_START_PERF_OUTPUT_TIMER(OUTPUTTER, NUMLOOPS)
Start a timer block using a TabularOutputter object .
#define TEUCHOS_MIN(x, y)
Reference-counted pointer class and non-member templated function implementations.
Class that helps parse command line input arguments from (argc,argv[]) and set options.