Allocator Builder
Policy Based C++ Template Allocator Library
 All Classes Functions Variables Enumerations Enumerator Groups Pages
shared_heap.hpp
1 //
3 // Copyright 2014 Felix Petriconi
4 //
5 // License: http://boost.org/LICENSE_1_0.txt, Boost License 1.0
6 //
7 // Authors: http://petriconi.net, Felix Petriconi
8 //
10 #pragma once
11 
12 #include "allocator_base.hpp"
13 #include "internal/shared_helpers.hpp"
14 #include "internal/dynastic.hpp"
15 #include "internal/reallocator.hpp"
16 #include "internal/heap_helpers.hpp"
17 
18 #include <atomic>
19 #include <algorithm>
20 #include <numeric>
21 #include <boost/thread.hpp>
22 
23 #ifdef min
24 #undef min
25 #endif
26 
27 #ifdef max
28 #undef max
29 #endif
30 
31 #define CAS(ATOMIC, EXPECT, VALUE) ATOMIC.compare_exchange_strong(EXPECT, VALUE)
32 
33 #define CAS_P(ATOMIC, EXPECT, VALUE) ATOMIC->compare_exchange_strong(EXPECT, VALUE)
34 
35 namespace alb {
36  inline namespace v_100 {
48  template <class Allocator, size_t NumberOfChunks, size_t ChunkSize>
49  class shared_heap {
50  const uint64_t all_set = std::numeric_limits<uint64_t>::max();
51  const uint64_t all_zero = 0u;
52 
54  numberOfChunks_;
55 
57 
58  block buffer_;
59  block controlBuffer_;
60 
61  // bit field where 0 means used and 1 means free block
62  std::atomic<uint64_t> *control_;
63  size_t controlSize_;
64 
65  boost::shared_mutex mutex_;
66  Allocator allocator_;
67 
68  void shrink() noexcept {
69  allocator_.deallocate(controlBuffer_);
70  allocator_.deallocate(buffer_);
71  control_ = nullptr;
72  }
73 
74  shared_heap(const shared_heap &) = delete;
75  shared_heap &operator=(const shared_heap &) = delete;
76 
77  public:
78  using allocator = Allocator;
79  static constexpr bool supports_truncated_deallocation = true;
80  static constexpr unsigned alignment = Allocator::alignment;
81 
82  shared_heap() noexcept {
83  init();
84  }
85 
86  shared_heap(size_t numberOfChunks, size_t chunkSize) noexcept
87  : all_set(std::numeric_limits<uint64_t>::max())
88  , all_zero(0) {
89 
90  numberOfChunks_.value(internal::round_to_alignment(64, numberOfChunks));
91  chunk_size_.value(internal::round_to_alignment(alignment, chunkSize));
92  init();
93  }
94 
95  shared_heap(shared_heap &&x) noexcept {
96  *this = std::move(x);
97  }
98 
99  shared_heap &operator=(shared_heap &&x) noexcept {
100  if (this == &x) {
101  return *this;
102  }
103  boost::unique_lock<boost::shared_mutex> guardThis(mutex_);
104  shrink();
105  boost::unique_lock<boost::shared_mutex> guardX(x.mutex_);
106  numberOfChunks_ = std::move(x.numberOfChunks_);
107  chunk_size_ = std::move(x.chunk_size_);
108  buffer_ = std::move(x.buffer_);
109  controlBuffer_ = std::move(x.controlBuffer_);
110  control_ = std::move(x.control_);
111  controlSize_ = std::move(x.controlSize_);
112  allocator_ = std::move(x.allocator_);
113 
114  x.control_ = nullptr;
115 
116  return *this;
117  }
118 
119  size_t number_of_chunk() const noexcept {
120  return numberOfChunks_.value();
121  }
122 
123  size_t chunk_size() const noexcept {
124  return chunk_size_.value();
125  }
126 
127  ~shared_heap() {
128  shrink();
129  }
130 
131  bool owns(const block &b) const noexcept {
132  return b && buffer_.ptr <= b.ptr &&
133  b.ptr < (static_cast<char *>(buffer_.ptr) + buffer_.length);
134  }
135 
136  block allocate(size_t n) noexcept {
137  block result;
138  if (n == 0) {
139  return result;
140  }
141 
142  // The heap cannot handle such a big request
143  if (n > chunk_size_.value() * numberOfChunks_.value()) {
144  return result;
145  }
146 
147  size_t numberOfAlignedBytes = internal::round_to_alignment(chunk_size_.value(), n);
148  size_t numberOfBlocks = numberOfAlignedBytes / chunk_size_.value();
149  numberOfBlocks = std::max(size_t(1), numberOfBlocks);
150 
151  if (numberOfBlocks < 64) {
152  result = allocate_within_single_control_register(numberOfBlocks);
153  if (result) {
154  return result;
155  }
156  }
157  else if (numberOfBlocks == 64) {
158  result = allocate_within_complete_control_register(numberOfBlocks);
159  if (result) {
160  return result;
161  }
162  }
163  else if ((numberOfBlocks % 64) == 0) {
164  result = allocate_multiple_complete_control_registers(numberOfBlocks);
165  if (result) {
166  return result;
167  }
168  }
169  result = allocate_with_register_overlap(numberOfBlocks);
170  return result;
171  }
172 
173  void deallocate(block &b) noexcept {
174  if (!b) {
175  return;
176  }
177 
178  if (!owns(b)) {
179  assert(!"It is not wise to let me deallocate a foreign Block!");
180  return;
181  }
182 
183  const auto context = block_to_context(b);
184 
185  // printf("Used Block %d in thread %d\n", blockIndex,
186  // std::this_thread::get_id());
187  if (context.subIndex + context.usedChunks <= 64) {
188  set_within_single_register<shared_helpers::SharedLock, true>(context);
189  }
190  else if ((context.usedChunks % 64) == 0) {
191  deallocate_for_multiple_complete_control_register(context);
192  }
193  else {
194  deallocate_with_control_register_overlap(context);
195  }
196  b.reset();
197  }
198 
199  void deallocate_all() noexcept {
200  boost::unique_lock<boost::shared_mutex> guard(mutex_);
201  std::fill(control_, control_ + controlSize_, all_set);
202  }
203 
204  bool reallocate(block &b, size_t n) noexcept {
205  if (internal::is_reallocation_handled_default(*this, b, n)) {
206  return true;
207  }
208 
209  const auto numberOfBlocks = static_cast<int>(b.length / chunk_size_.value());
210  const auto numberOfNewNeededBlocks =
211  static_cast<int>(internal::round_to_alignment(chunk_size_.value(), n) / chunk_size_.value());
212 
213  if (numberOfBlocks == numberOfNewNeededBlocks) {
214  return true;
215  }
216  if (b.length > n) {
217  auto context = block_to_context(b);
218  if (context.subIndex + context.usedChunks <= 64) {
219  set_within_single_register<shared_helpers::SharedLock, true>(
220  BlockContext{ context.registerIndex, context.subIndex + numberOfNewNeededBlocks,
221  context.usedChunks - numberOfNewNeededBlocks });
222  }
223  else {
224  deallocate_with_control_register_overlap(
225  BlockContext{ context.registerIndex, context.subIndex + numberOfNewNeededBlocks,
226  context.usedChunks - numberOfNewNeededBlocks });
227  }
228  b.length = numberOfNewNeededBlocks * chunk_size_.value();
229  return true;
230  }
231  return internal::reallocate_with_copy(*this, *this, b, n);
232  }
233 
234  bool expand(block &b, size_t delta) noexcept {
235  if (delta == 0) {
236  return true;
237  }
238 
239  const auto context = block_to_context(b);
240  const auto numberOfAdditionalNeededBlocks = static_cast<int>(
241  internal::round_to_alignment(chunk_size_.value(), delta) / chunk_size_.value());
242 
243  if (context.subIndex + context.usedChunks + numberOfAdditionalNeededBlocks <= 64) {
244  if (test_and_set_within_single_register<false>(
245  BlockContext{ context.registerIndex, context.subIndex + context.usedChunks,
246  numberOfAdditionalNeededBlocks })) {
247  b.length += numberOfAdditionalNeededBlocks * chunk_size_.value();
248  return true;
249  }
250  return false;
251  }
252  if (test_and_set_over_multiple_registers<false>(BlockContext{ context.registerIndex,
253  context.subIndex + context.usedChunks,
254  numberOfAdditionalNeededBlocks })) {
255  b.length += numberOfAdditionalNeededBlocks * chunk_size_.value();
256  return true;
257  }
258 
259  return false;
260  }
261 
262  private:
263  void init() noexcept {
264  controlSize_ = numberOfChunks_.value() / 64;
265  controlBuffer_ = allocator_.allocate( sizeof(std::atomic<uint64_t>) * controlSize_ );
266  assert((bool)controlBuffer_);
267 
268  control_ = static_cast<std::atomic<uint64_t> *>(controlBuffer_.ptr);
269  new (control_) std::atomic<uint64_t>[controlSize_]();
270 
271  buffer_ = allocator_.allocate( chunk_size_.value() * numberOfChunks_.value() );
272  assert((bool)buffer_);
273 
274  deallocate_all();
275  }
276 
277  struct BlockContext
278  {
279  int registerIndex;
280  int subIndex;
281  int usedChunks;
282  };
283 
284  BlockContext block_to_context(const block &b) noexcept {
285  const auto blockIndex = static_cast<int>(
286  (static_cast<char *>(b.ptr) - static_cast<char *>(buffer_.ptr)) / chunk_size_.value());
287 
288  return { blockIndex / 64, blockIndex % 64, static_cast<int>(b.length / chunk_size_.value()) };
289  }
290 
291  template <bool Used>
292  bool test_and_set_within_single_register(const BlockContext &context) noexcept {
293  assert(context.subIndex + context.usedChunks <= 64);
294 
295  uint64_t mask = (context.usedChunks == 64) ? all_set : (((uint64_t(1) << context.usedChunks) - 1)
296  << context.subIndex);
297 
298  uint64_t currentRegister, newRegister;
299  do {
300  currentRegister = control_[context.registerIndex].load();
301  if ((currentRegister & mask) != mask) {
302  return false;
303  }
304  newRegister = helpers::set_used<Used>(currentRegister, mask);
305  boost::shared_lock<boost::shared_mutex> guard(mutex_);
306  } while (!CAS(control_[context.registerIndex], currentRegister, newRegister));
307  return true;
308  }
309 
310  template <class LockPolicy, bool Used>
311  void set_over_multiple_registers(const BlockContext &context) noexcept {
312  size_t chunksToTest = context.usedChunks;
313  size_t subIndexStart = context.subIndex;
314  size_t registerIndex = context.registerIndex;
315  do {
316  size_t mask;
317  if (subIndexStart > 0)
318  mask = ((uint64_t(1) << (64 - subIndexStart)) - 1) << subIndexStart;
319  else
320  mask = (chunksToTest >= 64) ? all_set : ((uint64_t(1) << chunksToTest) - 1);
321 
322  assert(registerIndex < controlSize_);
323 
324  uint64_t currentRegister, newRegister;
325  do {
326  currentRegister = control_[registerIndex].load();
327  newRegister = helpers::set_used<Used>(currentRegister, mask);
328  LockPolicy guard(mutex_);
329  } while (!CAS(control_[registerIndex], currentRegister, newRegister));
330 
331  if (subIndexStart + chunksToTest > 64) {
332  chunksToTest = subIndexStart + chunksToTest - 64;
333  subIndexStart = 0;
334  }
335  else {
336  chunksToTest = 0;
337  }
338  registerIndex++;
339  } while (chunksToTest > 0);
340  }
341 
342  template <bool Used>
343  bool test_and_set_over_multiple_registers(const BlockContext &context) noexcept {
344  static_assert(sizeof(std::atomic<uint64_t>) == sizeof(uint64_t),
345  "Current assumption that std::atomic has no overhead on "
346  "integral types is not fulfilled!");
347 
348  // This branch works on multiple chunks at the same time and so a real lock
349  // is necessary.
350  size_t chunksToTest = context.usedChunks;
351  size_t subIndexStart = context.subIndex;
352  size_t registerIndex = context.registerIndex;
353 
354  boost::unique_lock<boost::shared_mutex> guard(mutex_);
355  do {
356  uint64_t mask;
357  if (subIndexStart > 0)
358  mask = ((uint64_t(1) << (64 - subIndexStart)) - 1) << subIndexStart;
359  else
360  mask = (chunksToTest >= 64) ? all_set : ((uint64_t(1) << chunksToTest) - 1);
361 
362  auto currentRegister = control_[registerIndex].load();
363 
364  if ((currentRegister & mask) != mask) {
365  return false;
366  }
367 
368  if (subIndexStart + chunksToTest > 64) {
369  chunksToTest = subIndexStart + chunksToTest - 64;
370  subIndexStart = 0;
371  }
372  else {
373  chunksToTest = 0;
374  }
375  registerIndex++;
376  if (registerIndex > controlSize_) {
377  return false;
378  }
379  } while (chunksToTest > 0);
380 
381  set_over_multiple_registers<shared_helpers::NullLock, Used>(context);
382 
383  return true;
384  }
385 
386  template <class LockPolicy, bool Used>
387  void set_within_single_register(const BlockContext &context) noexcept {
388  assert(context.subIndex + context.usedChunks <= 64);
389 
390  uint64_t mask = (context.usedChunks == 64) ? all_set : (((uint64_t(1) << context.usedChunks) - 1)
391  << context.subIndex);
392 
393  uint64_t currentRegister, newRegister;
394  do {
395  currentRegister = control_[context.registerIndex].load();
396  newRegister = helpers::set_used<Used>(currentRegister, mask);
397  LockPolicy guard(mutex_);
398  } while (!CAS(control_[context.registerIndex], currentRegister, newRegister));
399  }
400 
401  block allocate_within_single_control_register(size_t numberOfBlocks) noexcept {
402  block result;
403  // we must assume that we may find a free location, but that it is later
404  // already used during the set operation
405  do {
406  // first we have to look for at least one free block
407  size_t controlIndex = 0;
408  while (controlIndex < controlSize_) {
409  auto currentControlRegister = control_[controlIndex].load();
410 
411  // == 0 means that all blocks are in use and no need to search further
412  if (currentControlRegister != 0) {
413  uint64_t mask = (numberOfBlocks == 64) ? all_set : ((uint64_t(1) << numberOfBlocks) - 1);
414 
415  size_t i = 0;
416  // Search for numberOfBlock bits that are set to one
417  while (i <= 64 - numberOfBlocks) {
418  if ((currentControlRegister & mask) == mask) {
419  auto newControlRegister = helpers::set_used<false>(currentControlRegister, mask);
420 
421  boost::shared_lock<boost::shared_mutex> guard(mutex_);
422 
423  if (CAS(control_[controlIndex], currentControlRegister, newControlRegister)) {
424  size_t ptrOffset = (controlIndex * 64 + i) * chunk_size_.value();
425 
426  result.ptr = static_cast<char *>(buffer_.ptr) + ptrOffset;
427  result.length = numberOfBlocks * chunk_size_.value();
428 
429  return result;
430  }
431  }
432  i++;
433  mask <<= 1;
434  };
435  }
436  controlIndex++;
437  }
438  if (controlIndex == controlSize_) {
439  return result;
440  }
441  } while (true);
442  }
443 
444  block allocate_within_complete_control_register(size_t numberOfBlocks) noexcept {
445  // we must assume that we may find a free location, but that it is later
446  // already used during the CAS set operation
447  do {
448  // first we have to look for at least full free block
449  auto freeChunk =
450  std::find_if(control_, control_ + controlSize_,
451  [this](const std::atomic<uint64_t> &v) { return v.load() == all_set; });
452 
453  if (freeChunk == control_ + controlSize_) {
454  return block();
455  }
456 
457  boost::shared_lock<boost::shared_mutex> guard(mutex_);
458 
459  if (CAS_P(freeChunk, const_cast<uint64_t &>(all_set), all_zero)) {
460  size_t ptrOffset = ((freeChunk - control_) * 64) * chunk_size_.value();
461 
462  return block(static_cast<char *>(buffer_.ptr) + ptrOffset,
463  numberOfBlocks * chunk_size_.value());
464  }
465  } while (true);
466  }
467 
468  block allocate_multiple_complete_control_registers(size_t numberOfBlocks) noexcept {
469  block result;
470  // This branch works on multiple chunks at the same time and so a real
471  // lock is necessary.
472  boost::unique_lock<boost::shared_mutex> guard(mutex_);
473 
474  const auto neededChunks = static_cast<int>(numberOfBlocks / 64);
475  auto freeFirstChunk = std::search_n(
476  control_, control_ + controlSize_, neededChunks, all_set,
477  [](const std::atomic<uint64_t> &v, const uint64_t &p) { return v.load() == p; });
478 
479  if (freeFirstChunk == control_ + controlSize_) {
480  return result;
481  }
482  auto p = freeFirstChunk;
483  while (p < freeFirstChunk + neededChunks) {
484  CAS_P(p, const_cast<uint64_t &>(all_set), all_zero);
485  ++p;
486  }
487 
488  size_t ptrOffset = ((freeFirstChunk - control_) * 64) * chunk_size_.value();
489  result.ptr = static_cast<char *>(buffer_.ptr) + ptrOffset;
490  result.length = numberOfBlocks * chunk_size_.value();
491  return result;
492  }
493 
494  block allocate_with_register_overlap(size_t numberOfBlocks) noexcept {
495  block result;
496  // search for free area
497  static_assert(sizeof(std::atomic<uint64_t>) == sizeof(uint64_t),
498  "Current assumption that std::atomic has no overhead on "
499  "integral types is not fulfilled!");
500 
501  auto p = reinterpret_cast<unsigned char *>(control_);
502  const auto lastp =
503  reinterpret_cast<unsigned char *>(control_) + controlSize_ * sizeof(uint64_t);
504 
505  auto freeBlocksCount = size_t(0);
506  unsigned char *chunkStart = nullptr;
507 
508  // This branch works on multiple chunks at the same time and so a real
509  // lock is necessary.
510  boost::unique_lock<boost::shared_mutex> guard(mutex_);
511 
512  while (p < lastp) {
513  if (*p == 0xff) { // free
514  if (!chunkStart) {
515  chunkStart = p;
516  }
517 
518  freeBlocksCount += 8;
519  if (freeBlocksCount >= numberOfBlocks) {
520  break;
521  }
522  }
523  else {
524  freeBlocksCount = 0;
525  chunkStart = nullptr;
526  }
527  p++;
528  };
529 
530  if (p != lastp && freeBlocksCount >= numberOfBlocks) {
531  size_t ptrOffset =
532  (chunkStart - reinterpret_cast<unsigned char *>(control_)) * 8 * chunk_size_.value();
533 
534  result.ptr = static_cast<char *>(buffer_.ptr) + ptrOffset;
535  result.length = numberOfBlocks * chunk_size_.value();
536 
537  set_over_multiple_registers<shared_helpers::NullLock, false>(block_to_context(result));
538  return result;
539  }
540 
541  return result;
542  }
543 
544  void deallocate_for_multiple_complete_control_register(const BlockContext &context) noexcept {
545  const auto registerToFree = context.registerIndex + context.usedChunks / 64;
546  for (auto i = context.registerIndex; i < registerToFree; i++) {
547  // it is not necessary to use a unique lock is used here
548  boost::shared_lock<boost::shared_mutex> guard(mutex_);
549  control_[i] = static_cast<uint64_t>(-1);
550  }
551  }
552 
553  void deallocate_with_control_register_overlap(const BlockContext &context) noexcept
554  {
555  set_over_multiple_registers<shared_helpers::SharedLock, true>(context);
556  }
557  };
558  }
559  using namespace v_100;
560 }
561 
562 #undef CAS
563 #undef CAS_P
void reset() noexcept
size_t length
This describes the length of the reserved bytes.
bool reallocate_with_copy(OldAllocator &oldAllocator, NewAllocator &newAllocator, block &b, size_t n) noexcept
Definition: reallocator.hpp:35
constexpr size_t round_to_alignment(size_t basis, size_t n) noexcept
void * ptr
This points to the start address of the described memory block.