Motivation
C++11 has introduced the move semantic which can also be used in containers instead of "old-school" pointers. The specific containers owning objects has been proposed earlier by third-party libraries like Boost (for example, ptr_map
or ptr_vector
).
The goal of the test is to compare the speed of manipulations on the objects having move semantics stored in the standard vector container with the container stores the pointers only.
Scenario
We'll test the sequential and random read-write access to the objects stored in
- statically sized vectors
- dynamically sized vectors (objects are pushed back)
The containers are allocated in the heap. The count of objects in the container should be sufficiently huge to see the difference. By default it is 1 million but may be changed.
Program
Here is the simple console program compiled with Microsoft (R) C/C++ Optimizing Compiler Version 19.15.26730
for 64-bits Windows target.
#include <iostream> #include <string> #include <memory> #include <vector> #include <random> #include <ctime> #include <chrono> #include <cassert> using namespace std; using namespace std::chrono; class TestData { public: TestData() : int_value(0), str_value("") { } TestData(int intval, string strval) : int_value(intval), str_value(strval) { } TestData(TestData&& src) : int_value(std::move(src.int_value)), str_value(std::move(src.str_value)) { } TestData& operator = (TestData&& src) { int_value = std::move(src.int_value); str_value = std::move(src.str_value); return *this; } int int_value; string str_value; }; using TestDataVec = vector<TestData>; using TestDataPtrVec = vector<TestData*>; class Timer { public: Timer() {} void Start(string timer_title) { title = timer_title; t1 = system_clock::now(); } void Stop() { t2 = system_clock::now(); } void StopAndPrint(string timer_title) { Stop(); auto ms1 = duration_cast<std::chrono::microseconds>(t2 - t1); cout << timer_title << endl; cout << "Duration, mcsec: " << ms1.count() << endl; } void StopAndPrint() { StopAndPrint(title); } private: system_clock::time_point t1 = system_clock::now(); system_clock::time_point t2; string title; }; // Globals: tested objects const int MaxObjectCount = 1000000; unique_ptr<int[]> random_access(new int[MaxObjectCount]); TestDataVec* v1 = nullptr; TestDataPtrVec* v2 = nullptr; void TestFillAllocated() { Timer t1; t1.Start("Allocating TestDataVec"); v1 = new TestDataVec(MaxObjectCount); t1.StopAndPrint(); t1.Start("Allocating TestDataPtrVec"); v2 = new TestDataPtrVec(MaxObjectCount); t1.StopAndPrint(); t1.Start("Fill with TestData"); for (int i = 0; i < MaxObjectCount; i++) { TestData data(i, "Mov " + std::to_string(i)); (*v1)[i] = std::move(data); } t1.StopAndPrint(); t1.Start("Fill with TestData*"); for (int i = 0; i < MaxObjectCount; i++) { TestData* data = new TestData(i, "Ptr " + std::to_string(i)); (*v2)[i] = data; } t1.StopAndPrint(); } void TestFillPushBack() { Timer t1; t1.Start("Allocating TestDataVec"); v1 = new TestDataVec(); t1.StopAndPrint(); t1.Start("Allocating TestDataPtrVec"); v2 = new TestDataPtrVec(); t1.StopAndPrint(); t1.Start("Fill with TestData"); for (int i = 0; i < MaxObjectCount; i++) { TestData data(i, "Mov " + std::to_string(i)); v1->push_back(std::move(data)); } t1.StopAndPrint(); t1.Start("Fill with TestData*"); for (int i = 0; i < MaxObjectCount; i++) { TestData* data = new TestData(i, "Ptr " + std::to_string(i)); v2->push_back(data); } t1.StopAndPrint(); } void TestAccess() { Timer t1; t1.Start("Sequential access TestData"); for (int i = 0; i < MaxObjectCount; i++) { (*v1)[i].int_value++; assert(v1[i].int_value == i + 1); } t1.StopAndPrint(); t1.Start("Sequential access TestData*"); for (int i = 0; i < MaxObjectCount; i++) { (*v2)[i]->int_value++; assert(v2[i]->int_value == i + 1); } t1.StopAndPrint(); t1.Start("Random access TestData"); for (int i = 0; i < MaxObjectCount; i++) { int index = random_access[i]; int old_value = (*v1)[index].int_value; (*v1)[index].int_value++; assert(v1[index].int_value == old_value + 1); } t1.StopAndPrint(); t1.Start("Random access TestData*"); for (int i = 0; i < MaxObjectCount; i++) { int index = random_access[i]; int old_value = (*v2)[index]->int_value; (*v2)[index]->int_value++; assert(v2[index]->int_value == old_value + 1); } t1.StopAndPrint(); } void TestDelete() { Timer t1; t1.Start("Deleting TestData"); delete v1; t1.StopAndPrint(); t1.Start("Deleting TestData*"); for (TestDataPtrVec::iterator it = v2->begin(); it != v2->end(); it++) delete *it; delete v2; t1.StopAndPrint(); } int main() { // Random access order random_device rnd_device; default_random_engine rnd_engine(rnd_device()); uniform_int_distribution<int> uniform_dist(0, MaxObjectCount - 1); for (size_t i = 0; i < MaxObjectCount; i++) random_access[i] = uniform_dist(rnd_engine); // Benchmarks cout << "Test preallocated vector data" << endl; TestFillAllocated(); TestAccess(); TestDelete(); cout << endl << "Test dynamically added vector data" << endl; TestFillPushBack(); TestAccess(); TestDelete(); }
Results
Test preallocated vector data Allocating TestDataVec Duration, mcsec: 16527 Allocating TestDataPtrVec Duration, mcsec: 2840 Fill with TestData Duration, mcsec: 31316 Fill with TestData* Duration, mcsec: 80240 Sequential access TestData Duration, mcsec: 3173 Sequential access TestData* Duration, mcsec: 5039 Random access TestData Duration, mcsec: 13619 Random access TestData* Duration, mcsec: 153940 Deleting TestData Duration, mcsec: 7648 Deleting TestData* Duration, mcsec: 37663 Test dynamically added vector data Allocating TestDataVec Duration, mcsec: 0 Allocating TestDataPtrVec Duration, mcsec: 0 Fill with TestData Duration, mcsec: 88315 Fill with TestData* Duration, mcsec: 86523 Sequential access TestData Duration, mcsec: 3102 Sequential access TestData* Duration, mcsec: 5079 Random access TestData Duration, mcsec: 12926 Random access TestData* Duration, mcsec: 154061 Deleting TestData Duration, mcsec: 8067 Deleting TestData* Duration, mcsec: 36407
Conclusions
One can see that manipulations with objects having move semantic is more efficient that the manipulations with pointers except the initial allocation (in statically sized container scenario). The "overhead" is "The rule of 5" conformity: you need to implement the move semantic for all user defined classes, to test and to maintain it after. Whether the drawbacks of additional code will exceed the performance profits, is the decision to make but the advantage seems to be significant.