12 #include "allocator_base.hpp"
13 #include "internal/shared_helpers.hpp"
14 #include "internal/dynastic.hpp"
15 #include "internal/reallocator.hpp"
16 #include "internal/heap_helpers.hpp"
21 #include <boost/thread.hpp>
31 #define CAS(ATOMIC, EXPECT, VALUE) ATOMIC.compare_exchange_strong(EXPECT, VALUE)
33 #define CAS_P(ATOMIC, EXPECT, VALUE) ATOMIC->compare_exchange_strong(EXPECT, VALUE)
36 inline namespace v_100 {
48 template <
class Allocator,
size_t NumberOfChunks,
size_t ChunkSize>
50 const uint64_t all_set = std::numeric_limits<uint64_t>::max();
51 const uint64_t all_zero = 0u;
62 std::atomic<uint64_t> *control_;
65 boost::shared_mutex mutex_;
68 void shrink() noexcept {
69 allocator_.deallocate(controlBuffer_);
70 allocator_.deallocate(buffer_);
78 using allocator = Allocator;
79 static constexpr
bool supports_truncated_deallocation =
true;
80 static constexpr
unsigned alignment = Allocator::alignment;
86 shared_heap(
size_t numberOfChunks,
size_t chunkSize) noexcept
87 : all_set(std::numeric_limits<uint64_t>::max())
103 boost::unique_lock<boost::shared_mutex> guardThis(mutex_);
105 boost::unique_lock<boost::shared_mutex> guardX(x.mutex_);
106 numberOfChunks_ = std::move(x.numberOfChunks_);
107 chunk_size_ = std::move(x.chunk_size_);
108 buffer_ = std::move(x.buffer_);
109 controlBuffer_ = std::move(x.controlBuffer_);
110 control_ = std::move(x.control_);
111 controlSize_ = std::move(x.controlSize_);
112 allocator_ = std::move(x.allocator_);
114 x.control_ =
nullptr;
119 size_t number_of_chunk()
const noexcept {
120 return numberOfChunks_.value();
123 size_t chunk_size()
const noexcept {
124 return chunk_size_.value();
131 bool owns(
const block &b)
const noexcept {
132 return b && buffer_.
ptr <= b.
ptr &&
133 b.
ptr < (
static_cast<char *
>(buffer_.
ptr) + buffer_.
length);
136 block allocate(
size_t n) noexcept {
143 if (n > chunk_size_.value() * numberOfChunks_.value()) {
148 size_t numberOfBlocks = numberOfAlignedBytes / chunk_size_.value();
149 numberOfBlocks = std::max(
size_t(1), numberOfBlocks);
151 if (numberOfBlocks < 64) {
152 result = allocate_within_single_control_register(numberOfBlocks);
157 else if (numberOfBlocks == 64) {
158 result = allocate_within_complete_control_register(numberOfBlocks);
163 else if ((numberOfBlocks % 64) == 0) {
164 result = allocate_multiple_complete_control_registers(numberOfBlocks);
169 result = allocate_with_register_overlap(numberOfBlocks);
173 void deallocate(
block &b) noexcept {
179 assert(!
"It is not wise to let me deallocate a foreign Block!");
183 const auto context = block_to_context(b);
187 if (context.subIndex + context.usedChunks <= 64) {
188 set_within_single_register<shared_helpers::SharedLock, true>(context);
190 else if ((context.usedChunks % 64) == 0) {
191 deallocate_for_multiple_complete_control_register(context);
194 deallocate_with_control_register_overlap(context);
199 void deallocate_all() noexcept {
200 boost::unique_lock<boost::shared_mutex> guard(mutex_);
201 std::fill(control_, control_ + controlSize_, all_set);
204 bool reallocate(
block &b,
size_t n) noexcept {
205 if (internal::is_reallocation_handled_default(*
this, b, n)) {
209 const auto numberOfBlocks =
static_cast<int>(b.
length / chunk_size_.value());
210 const auto numberOfNewNeededBlocks =
213 if (numberOfBlocks == numberOfNewNeededBlocks) {
217 auto context = block_to_context(b);
218 if (context.subIndex + context.usedChunks <= 64) {
219 set_within_single_register<shared_helpers::SharedLock, true>(
220 BlockContext{ context.registerIndex, context.subIndex + numberOfNewNeededBlocks,
221 context.usedChunks - numberOfNewNeededBlocks });
224 deallocate_with_control_register_overlap(
225 BlockContext{ context.registerIndex, context.subIndex + numberOfNewNeededBlocks,
226 context.usedChunks - numberOfNewNeededBlocks });
228 b.
length = numberOfNewNeededBlocks * chunk_size_.value();
234 bool expand(
block &b,
size_t delta) noexcept {
239 const auto context = block_to_context(b);
240 const auto numberOfAdditionalNeededBlocks =
static_cast<int>(
243 if (context.subIndex + context.usedChunks + numberOfAdditionalNeededBlocks <= 64) {
244 if (test_and_set_within_single_register<false>(
245 BlockContext{ context.registerIndex, context.subIndex + context.usedChunks,
246 numberOfAdditionalNeededBlocks })) {
247 b.
length += numberOfAdditionalNeededBlocks * chunk_size_.value();
252 if (test_and_set_over_multiple_registers<false>(BlockContext{ context.registerIndex,
253 context.subIndex + context.usedChunks,
254 numberOfAdditionalNeededBlocks })) {
255 b.
length += numberOfAdditionalNeededBlocks * chunk_size_.value();
263 void init() noexcept {
264 controlSize_ = numberOfChunks_.value() / 64;
265 controlBuffer_ = allocator_.allocate(
sizeof(std::atomic<uint64_t>) * controlSize_ );
266 assert((
bool)controlBuffer_);
268 control_ =
static_cast<std::atomic<uint64_t> *
>(controlBuffer_.ptr);
269 new (control_) std::atomic<uint64_t>[controlSize_]();
271 buffer_ = allocator_.allocate( chunk_size_.value() * numberOfChunks_.value() );
272 assert((
bool)buffer_);
284 BlockContext block_to_context(
const block &b) noexcept {
285 const auto blockIndex =
static_cast<int>(
286 (
static_cast<char *
>(b.
ptr) - static_cast<char *>(buffer_.
ptr)) / chunk_size_.value());
288 return { blockIndex / 64, blockIndex % 64,
static_cast<int>(b.
length / chunk_size_.value()) };
292 bool test_and_set_within_single_register(
const BlockContext &context) noexcept {
293 assert(context.subIndex + context.usedChunks <= 64);
295 uint64_t mask = (context.usedChunks == 64) ? all_set : (((uint64_t(1) << context.usedChunks) - 1)
296 << context.subIndex);
298 uint64_t currentRegister, newRegister;
300 currentRegister = control_[context.registerIndex].load();
301 if ((currentRegister & mask) != mask) {
304 newRegister = helpers::set_used<Used>(currentRegister, mask);
305 boost::shared_lock<boost::shared_mutex> guard(mutex_);
306 }
while (!CAS(control_[context.registerIndex], currentRegister, newRegister));
310 template <
class LockPolicy,
bool Used>
311 void set_over_multiple_registers(
const BlockContext &context) noexcept {
312 size_t chunksToTest = context.usedChunks;
313 size_t subIndexStart = context.subIndex;
314 size_t registerIndex = context.registerIndex;
317 if (subIndexStart > 0)
318 mask = ((uint64_t(1) << (64 - subIndexStart)) - 1) << subIndexStart;
320 mask = (chunksToTest >= 64) ? all_set : ((uint64_t(1) << chunksToTest) - 1);
322 assert(registerIndex < controlSize_);
324 uint64_t currentRegister, newRegister;
326 currentRegister = control_[registerIndex].load();
327 newRegister = helpers::set_used<Used>(currentRegister, mask);
328 LockPolicy guard(mutex_);
329 }
while (!CAS(control_[registerIndex], currentRegister, newRegister));
331 if (subIndexStart + chunksToTest > 64) {
332 chunksToTest = subIndexStart + chunksToTest - 64;
339 }
while (chunksToTest > 0);
343 bool test_and_set_over_multiple_registers(
const BlockContext &context) noexcept {
344 static_assert(
sizeof(std::atomic<uint64_t>) ==
sizeof(uint64_t),
345 "Current assumption that std::atomic has no overhead on "
346 "integral types is not fulfilled!");
350 size_t chunksToTest = context.usedChunks;
351 size_t subIndexStart = context.subIndex;
352 size_t registerIndex = context.registerIndex;
354 boost::unique_lock<boost::shared_mutex> guard(mutex_);
357 if (subIndexStart > 0)
358 mask = ((uint64_t(1) << (64 - subIndexStart)) - 1) << subIndexStart;
360 mask = (chunksToTest >= 64) ? all_set : ((uint64_t(1) << chunksToTest) - 1);
362 auto currentRegister = control_[registerIndex].load();
364 if ((currentRegister & mask) != mask) {
368 if (subIndexStart + chunksToTest > 64) {
369 chunksToTest = subIndexStart + chunksToTest - 64;
376 if (registerIndex > controlSize_) {
379 }
while (chunksToTest > 0);
381 set_over_multiple_registers<shared_helpers::NullLock, Used>(context);
386 template <
class LockPolicy,
bool Used>
387 void set_within_single_register(
const BlockContext &context) noexcept {
388 assert(context.subIndex + context.usedChunks <= 64);
390 uint64_t mask = (context.usedChunks == 64) ? all_set : (((uint64_t(1) << context.usedChunks) - 1)
391 << context.subIndex);
393 uint64_t currentRegister, newRegister;
395 currentRegister = control_[context.registerIndex].load();
396 newRegister = helpers::set_used<Used>(currentRegister, mask);
397 LockPolicy guard(mutex_);
398 }
while (!CAS(control_[context.registerIndex], currentRegister, newRegister));
401 block allocate_within_single_control_register(
size_t numberOfBlocks) noexcept {
407 size_t controlIndex = 0;
408 while (controlIndex < controlSize_) {
409 auto currentControlRegister = control_[controlIndex].load();
412 if (currentControlRegister != 0) {
413 uint64_t mask = (numberOfBlocks == 64) ? all_set : ((uint64_t(1) << numberOfBlocks) - 1);
417 while (i <= 64 - numberOfBlocks) {
418 if ((currentControlRegister & mask) == mask) {
419 auto newControlRegister = helpers::set_used<false>(currentControlRegister, mask);
421 boost::shared_lock<boost::shared_mutex> guard(mutex_);
423 if (CAS(control_[controlIndex], currentControlRegister, newControlRegister)) {
424 size_t ptrOffset = (controlIndex * 64 + i) * chunk_size_.value();
426 result.
ptr =
static_cast<char *
>(buffer_.
ptr) + ptrOffset;
427 result.
length = numberOfBlocks * chunk_size_.value();
438 if (controlIndex == controlSize_) {
444 block allocate_within_complete_control_register(
size_t numberOfBlocks) noexcept {
450 std::find_if(control_, control_ + controlSize_,
451 [
this](
const std::atomic<uint64_t> &v) {
return v.load() == all_set; });
453 if (freeChunk == control_ + controlSize_) {
457 boost::shared_lock<boost::shared_mutex> guard(mutex_);
459 if (CAS_P(freeChunk, const_cast<uint64_t &>(all_set), all_zero)) {
460 size_t ptrOffset = ((freeChunk - control_) * 64) * chunk_size_.value();
462 return block(static_cast<char *>(buffer_.
ptr) + ptrOffset,
463 numberOfBlocks * chunk_size_.value());
468 block allocate_multiple_complete_control_registers(
size_t numberOfBlocks) noexcept {
472 boost::unique_lock<boost::shared_mutex> guard(mutex_);
474 const auto neededChunks =
static_cast<int>(numberOfBlocks / 64);
475 auto freeFirstChunk = std::search_n(
476 control_, control_ + controlSize_, neededChunks, all_set,
477 [](
const std::atomic<uint64_t> &v,
const uint64_t &p) {
return v.load() == p; });
479 if (freeFirstChunk == control_ + controlSize_) {
482 auto p = freeFirstChunk;
483 while (p < freeFirstChunk + neededChunks) {
484 CAS_P(p, const_cast<uint64_t &>(all_set), all_zero);
488 size_t ptrOffset = ((freeFirstChunk - control_) * 64) * chunk_size_.value();
489 result.
ptr =
static_cast<char *
>(buffer_.
ptr) + ptrOffset;
490 result.
length = numberOfBlocks * chunk_size_.value();
494 block allocate_with_register_overlap(
size_t numberOfBlocks) noexcept {
497 static_assert(
sizeof(std::atomic<uint64_t>) ==
sizeof(uint64_t),
498 "Current assumption that std::atomic has no overhead on "
499 "integral types is not fulfilled!");
501 auto p =
reinterpret_cast<unsigned char *
>(control_);
503 reinterpret_cast<unsigned char *
>(control_) + controlSize_ *
sizeof(uint64_t);
505 auto freeBlocksCount = size_t(0);
506 unsigned char *chunkStart =
nullptr;
510 boost::unique_lock<boost::shared_mutex> guard(mutex_);
518 freeBlocksCount += 8;
519 if (freeBlocksCount >= numberOfBlocks) {
525 chunkStart =
nullptr;
530 if (p != lastp && freeBlocksCount >= numberOfBlocks) {
532 (chunkStart -
reinterpret_cast<unsigned char *
>(control_)) * 8 * chunk_size_.value();
534 result.
ptr =
static_cast<char *
>(buffer_.
ptr) + ptrOffset;
535 result.
length = numberOfBlocks * chunk_size_.value();
537 set_over_multiple_registers<shared_helpers::NullLock, false>(block_to_context(result));
544 void deallocate_for_multiple_complete_control_register(
const BlockContext &context) noexcept {
545 const auto registerToFree = context.registerIndex + context.usedChunks / 64;
546 for (
auto i = context.registerIndex; i < registerToFree; i++) {
548 boost::shared_lock<boost::shared_mutex> guard(mutex_);
549 control_[i] =
static_cast<uint64_t
>(-1);
553 void deallocate_with_control_register_overlap(
const BlockContext &context) noexcept
555 set_over_multiple_registers<shared_helpers::SharedLock, true>(context);
559 using namespace v_100;
size_t length
This describes the length of the reserved bytes.
bool reallocate_with_copy(OldAllocator &oldAllocator, NewAllocator &newAllocator, block &b, size_t n) noexcept
constexpr size_t round_to_alignment(size_t basis, size_t n) noexcept
void * ptr
This points to the start address of the described memory block.