1/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2
3Licensed under the Apache License, Version 2.0 (the "License");
4you may not use this file except in compliance with the License.
5You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9Unless required by applicable law or agreed to in writing, software
10distributed under the License is distributed on an "AS IS" BASIS,
11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12See the License for the specific language governing permissions and
13limitations under the License.
14==============================================================================*/
15
16#include <atomic>
17
18#include "tensorflow/core/common_runtime/bfc_allocator.h"
19
20#include "tensorflow/core/common_runtime/allocator_retry.h"
21#include "tensorflow/core/lib/core/bits.h"
22#include "tensorflow/core/lib/gtl/stl_util.h"
23#include "tensorflow/core/lib/strings/numbers.h"
24#include "tensorflow/core/lib/strings/str_util.h"
25#include "tensorflow/core/lib/strings/strcat.h"
26#include "tensorflow/core/platform/logging.h"
27#include "tensorflow/core/platform/mutex.h"
28#include "tensorflow/core/platform/types.h"
29
30namespace tensorflow {
31
32BFCAllocator::BFCAllocator(SubAllocator* sub_allocator, size_t total_memory,
33 bool allow_growth, const string& name)
34 : suballocator_(sub_allocator),
35 name_(name),
36 free_chunks_list_(kInvalidChunkHandle),
37 next_allocation_id_(1) {
38 if (allow_growth) {
39 // 1MiB smallest initial allocation, unless total memory available
40 // is less.
41 curr_region_allocation_bytes_ =
42 RoundedBytes(std::min(total_memory, size_t{1048576}));
43 } else {
44 curr_region_allocation_bytes_ = RoundedBytes(total_memory);
45 }
46
47 // Allocate the requested amount of memory.
48 memory_limit_ = total_memory;
49 stats_.bytes_limit = static_cast<int64>(total_memory);
50
51 // Create a bunch of bins of various good sizes.
52
53 // We create bins to fit all possible ranges that cover the
54 // memory_limit_ starting from allocations up to 256 bytes to
55 // allocations up to (and including) the memory limit.
56 for (BinNum b = 0; b < kNumBins; b++) {
57 size_t bin_size = BinNumToSize(b);
58 VLOG(1) << "Creating bin of max chunk size "
59 << strings::HumanReadableNumBytes(bin_size);
60 new (BinFromIndex(b)) Bin(this, bin_size);
61 CHECK_EQ(BinForSize(bin_size), BinFromIndex(b));
62 CHECK_EQ(BinForSize(bin_size + 255), BinFromIndex(b));
63 CHECK_EQ(BinForSize(bin_size * 2 - 1), BinFromIndex(b));
64 if (b + 1 < kNumBins) {
65 CHECK_NE(BinForSize(bin_size * 2), BinFromIndex(b));
66 }
67 }
68}
69
70BFCAllocator::~BFCAllocator() {
71 // Return memory back.
72 VLOG(2) << "Number of regions allocated: "
73 << region_manager_.regions().size();
74 for (const auto& region : region_manager_.regions()) {
75 suballocator_->Free(region.ptr(), region.memory_size());
76 }
77
78 for (BinNum b = 0; b < kNumBins; b++) {
79 BinFromIndex(b)->~Bin();
80 }
81}
82
83BFCAllocator::Chunk* BFCAllocator::ChunkFromHandle(ChunkHandle h) {
84 DCHECK_GE(h, 0);
85 DCHECK_LT(h, static_cast<int>(chunks_.size()));
86 return &(chunks_[h]);
87}
88
89bool BFCAllocator::Extend(size_t rounded_bytes) {
90 size_t available_bytes = memory_limit_ - total_region_allocated_bytes_;
91 // Rounds available_bytes down to the nearest multiple of kMinAllocationSize.
92 available_bytes = (available_bytes / kMinAllocationSize) * kMinAllocationSize;
93
94 // Do we have enough space to handle the client's request?
95 // If not, fail immediately.
96 if (rounded_bytes > available_bytes) {
97 return false;
98 }
99
100 // If curr_region_allocation_bytes_ is not enough to satisfy the
101 // allocation, keep multiplying by a power of two until that is
102 // sufficient.
103 bool increased_allocation = false;
104 while (rounded_bytes > curr_region_allocation_bytes_) {
105 curr_region_allocation_bytes_ *= 2;
106 increased_allocation = true;
107 }
108
109 // Try allocating.
110 size_t bytes = std::min(curr_region_allocation_bytes_, available_bytes);
111 void* mem_addr = suballocator_->Alloc(32, bytes);
112 if (mem_addr == nullptr && !started_backpedal_) {
113 // Only backpedal once.
114 started_backpedal_ = true;
115
116 static constexpr float kBackpedalFactor = 0.9;
117
118 // Try allocating less memory.
119 while (mem_addr == nullptr) {
120 bytes = RoundedBytes(bytes * kBackpedalFactor);
121 if (bytes < rounded_bytes) break;
122 mem_addr = suballocator_->Alloc(32, bytes);
123 }
124 }
125
126 if (mem_addr == nullptr) {
127 return false;
128 }
129
130 if (!increased_allocation) {
131 // Increase the region size of the next required allocation.
132 curr_region_allocation_bytes_ *= 2;
133 }
134
135 VLOG(1) << "Extending allocation by " << strings::HumanReadableNumBytes(bytes)
136 << " bytes.";
137
138 total_region_allocated_bytes_ += bytes;
139 VLOG(1) << "Total allocated bytes: "
140 << strings::HumanReadableNumBytes(total_region_allocated_bytes_);
141
142 VLOG(1) << "Allocated memory at " << mem_addr << " to "
143 << static_cast<void*>(static_cast<char*>(mem_addr) + bytes);
144 region_manager_.AddAllocationRegion(mem_addr, bytes);
145
146 // Create one large chunk for the whole memory space that will
147 // be chunked later.
148 ChunkHandle h = AllocateChunk();
149 BFCAllocator::Chunk* c = ChunkFromHandle(h);
150 c->ptr = mem_addr;
151 c->size = bytes;
152 c->allocation_id = -1;
153 c->prev = kInvalidChunkHandle;
154 c->next = kInvalidChunkHandle;
155
156 region_manager_.set_handle(c->ptr, h);
157
158 // TODO(vrv): Try to merge this new region with an existing region,
159 // if the address space is contiguous, to avoid fragmentation
160 // across regions.
161
162 // Insert the chunk into the right bin.
163 InsertFreeChunkIntoBin(h);
164
165 // Invoke visitors on newly allocated region.
166 for (const auto& visitor : region_visitors_) {
167 visitor(mem_addr, bytes);
168 }
169 return true;
170}
171
172BFCAllocator::ChunkHandle BFCAllocator::AllocateChunk() {
173 if (free_chunks_list_ != kInvalidChunkHandle) {
174 ChunkHandle h = free_chunks_list_;
175 Chunk* c = ChunkFromHandle(h);
176 free_chunks_list_ = c->next;
177 return h;
178 } else {
179 ChunkHandle h = chunks_.size();
180 chunks_.resize(h + 1);
181 return h;
182 }
183}
184
185void BFCAllocator::DeallocateChunk(ChunkHandle h) {
186 Chunk* c = ChunkFromHandle(h);
187 c->next = free_chunks_list_;
188 free_chunks_list_ = h;
189}
190
191void* BFCAllocator::AllocateRaw(size_t unused_alignment, size_t num_bytes) {
192 // Fast path: Try once to allocate without getting the retry_helper_ involved
193 void* r = AllocateRawInternal(unused_alignment, num_bytes, false);
194 if (r != nullptr) {
195 return r;
196 } else {
197 static const int64 kMaxMillisToWait = 10000; // 10 seconds
198 return retry_helper_.AllocateRaw(
199 [this](size_t a, size_t nb, bool v) {
200 return AllocateRawInternal(a, nb, v);
201 },
202 kMaxMillisToWait, unused_alignment, num_bytes);
203 }
204}
205
206void* BFCAllocator::AllocateRaw(size_t unused_alignment, size_t num_bytes,
207 const AllocationAttributes& allocation_attr) {
208 if (allocation_attr.no_retry_on_failure) {
209 // Return immediately upon the first failure if this is for allocating an
210 // optional scratch space.
211 bool dump_log_on_failure = VLOG_IS_ON(2);
212 void* result =
213 AllocateRawInternal(unused_alignment, num_bytes, dump_log_on_failure);
214 if (result == nullptr) {
215 static std::atomic<int32> log_counter{0};
216 int32 counter_value = log_counter.load(std::memory_order_relaxed);
217 if (counter_value < 10) {
218 log_counter.store(counter_value + 1, std::memory_order_relaxed);
219 LOG(WARNING)
220 << "Allocator (" << Name() << ") ran out of memory trying "
221 << "to allocate " << strings::HumanReadableNumBytes(num_bytes)
222 << ". The caller indicates that this is not a failure, but"
223 << " may mean that there could be performance gains if more"
224 << " memory were available.";
225 }
226 }
227 return result;
228 } else {
229 return AllocateRaw(unused_alignment, num_bytes);
230 }
231}
232
233// static
234size_t BFCAllocator::RoundedBytes(size_t bytes) {
235 size_t rounded_bytes =
236 (kMinAllocationSize *
237 ((bytes + kMinAllocationSize - 1) / kMinAllocationSize));
238 DCHECK_EQ(size_t{0}, rounded_bytes % kMinAllocationSize);
239 return rounded_bytes;
240}
241
242void* BFCAllocator::AllocateRawInternal(size_t unused_alignment,
243 size_t num_bytes,
244 bool dump_log_on_failure) {
245 if (num_bytes == 0) {
246 LOG(ERROR) << "tried to allocate 0 bytes";
247 return nullptr;
248 }
249 // First, always allocate memory of at least kMinAllocationSize
250 // bytes, and always allocate multiples of kMinAllocationSize bytes
251 // so all memory addresses are nicely byte aligned.
252 size_t rounded_bytes = RoundedBytes(num_bytes);
253
254 // The BFC allocator tries to find the best fit first.
255 BinNum bin_num = BinNumForSize(rounded_bytes);
256
257 mutex_lock l(lock_);
258 void* ptr = FindChunkPtr(bin_num, rounded_bytes, num_bytes);
259 if (ptr != nullptr) {
260 return ptr;
261 }
262
263 // Try to extend
264 if (Extend(rounded_bytes)) {
265 ptr = FindChunkPtr(bin_num, rounded_bytes, num_bytes);
266 if (ptr != nullptr) {
267 return ptr;
268 }
269 }
270
271 // We searched all bins for an existing free chunk to use and
272 // couldn't find one. This means we must have run out of memory,
273 // Dump the memory log for analysis.
274 if (dump_log_on_failure) {
275 LOG(WARNING) << "Allocator (" << Name() << ") ran out of memory trying "
276 << "to allocate " << strings::HumanReadableNumBytes(num_bytes)
277 << ". Current allocation summary follows.";
278 DumpMemoryLog(rounded_bytes);
279 LOG(WARNING) << RenderOccupancy();
280 }
281 return nullptr;
282}
283
284void* BFCAllocator::FindChunkPtr(BinNum bin_num, size_t rounded_bytes,
285 size_t num_bytes) {
286 // First identify the first bin that could satisfy rounded_bytes.
287 for (; bin_num < kNumBins; bin_num++) {
288 // Start searching from the first bin for the smallest chunk that fits
289 // rounded_bytes.
290 Bin* b = BinFromIndex(bin_num);
291 for (auto citer = b->free_chunks.begin(); citer != b->free_chunks.end();
292 ++citer) {
293 const BFCAllocator::ChunkHandle h = (*citer);
294 BFCAllocator::Chunk* chunk = ChunkFromHandle(h);
295 DCHECK(!chunk->in_use());
296 if (chunk->size >= rounded_bytes) {
297 // We found an existing chunk that fits us that wasn't in use, so remove
298 // it from the free bin structure prior to using.
299 RemoveFreeChunkIterFromBin(&b->free_chunks, citer);
300
301 // If we can break the size of the chunk into two reasonably large
302 // pieces, do so. In any case don't waste more than
303 // kMaxInternalFragmentation bytes on padding this alloc.
304 const int64 kMaxInternalFragmentation = 128 << 20; // 128mb
305 if (chunk->size >= rounded_bytes * 2 ||
306 static_cast<int64>(chunk->size) - rounded_bytes >=
307 kMaxInternalFragmentation) {
308 SplitChunk(h, rounded_bytes);
309 chunk = ChunkFromHandle(h); // Update chunk pointer in case it moved
310 }
311
312 // The requested size of the returned chunk is what the user
313 // has allocated.
314 chunk->requested_size = num_bytes;
315 // Assign a unique id and increment the id counter, marking the
316 // chunk as being in use.
317 chunk->allocation_id = next_allocation_id_++;
318
319 // Update stats.
320 ++stats_.num_allocs;
321 stats_.bytes_in_use += chunk->size;
322 stats_.max_bytes_in_use =
323 std::max(stats_.max_bytes_in_use, stats_.bytes_in_use);
324 stats_.max_alloc_size =
325 std::max<std::size_t>(stats_.max_alloc_size, chunk->size);
326
327 VLOG(4) << "Returning: " << chunk->ptr;
328 if (VLOG_IS_ON(4)) {
329 LOG(INFO) << "A: " << RenderOccupancy();
330 }
331 return chunk->ptr;
332 }
333 }
334 }
335
336 return nullptr;
337}
338
339void BFCAllocator::SplitChunk(BFCAllocator::ChunkHandle h, size_t num_bytes) {
340 // Allocate the new chunk before we do any ChunkFromHandle
341 ChunkHandle h_new_chunk = AllocateChunk();
342
343 Chunk* c = ChunkFromHandle(h);
344 CHECK(!c->in_use() && (c->bin_num == kInvalidBinNum));
345
346 // Create a new chunk starting num_bytes after c
347 BFCAllocator::Chunk* new_chunk = ChunkFromHandle(h_new_chunk);
348 new_chunk->ptr = static_cast<void*>(static_cast<char*>(c->ptr) + num_bytes);
349 region_manager_.set_handle(new_chunk->ptr, h_new_chunk);
350
351 // Set the new sizes of the chunks.
352 new_chunk->size = c->size - num_bytes;
353 c->size = num_bytes;
354
355 // The new chunk is not in use.
356 new_chunk->allocation_id = -1;
357
358 // Maintain the pointers.
359 // c <-> c_neighbor becomes
360 // c <-> new_chunk <-> c_neighbor
361 BFCAllocator::ChunkHandle h_neighbor = c->next;
362 new_chunk->prev = h;
363 new_chunk->next = h_neighbor;
364 c->next = h_new_chunk;
365 if (h_neighbor != kInvalidChunkHandle) {
366 Chunk* c_neighbor = ChunkFromHandle(h_neighbor);
367 c_neighbor->prev = h_new_chunk;
368 }
369
370 // Add the newly free chunk to the free bin.
371 InsertFreeChunkIntoBin(h_new_chunk);
372}
373
374void BFCAllocator::DeallocateRaw(void* ptr) {
375 DeallocateRawInternal(ptr);
376 retry_helper_.NotifyDealloc();
377}
378
379void BFCAllocator::DeallocateRawInternal(void* ptr) {
380 if (ptr == nullptr) {
381 LOG(ERROR) << "tried to deallocate nullptr";
382 return;
383 }
384 mutex_lock l(lock_);
385
386 // Find the chunk from the ptr.
387 BFCAllocator::ChunkHandle h = region_manager_.get_handle(ptr);
388 CHECK(h != kInvalidChunkHandle);
389
390 // Consider coalescing it.
391 FreeAndMaybeCoalesce(h);
392
393 if (VLOG_IS_ON(4)) {
394 LOG(INFO) << "F: " << RenderOccupancy();
395 }
396}
397
398// Merges h1 and h2 when Chunk(h1)->next is h2 and Chunk(h2)->prev is c1.
399// We merge Chunk(h2) into Chunk(h1).
400void BFCAllocator::Merge(BFCAllocator::ChunkHandle h1,
401 BFCAllocator::ChunkHandle h2) {
402 Chunk* c1 = ChunkFromHandle(h1);
403 Chunk* c2 = ChunkFromHandle(h2);
404 // We can only merge chunks that are not in use.
405 CHECK(!c1->in_use() && !c2->in_use());
406
407 // c1's prev doesn't change, still points to the same ptr, and is
408 // still not in use.
409
410 // Fix up neighbor pointers
411 //
412 // c1 <-> c2 <-> c3 should become
413 // c1 <-> c3
414
415 BFCAllocator::ChunkHandle h3 = c2->next;
416 c1->next = h3;
417 CHECK(c2->prev == h1);
418 if (h3 != kInvalidChunkHandle) {
419 BFCAllocator::Chunk* c3 = ChunkFromHandle(h3);
420 c3->prev = h1;
421 }
422
423 // Set the new size
424 c1->size += c2->size;
425
426 DeleteChunk(h2);
427}
428
429void BFCAllocator::DeleteChunk(ChunkHandle h) {
430 // Delete h and cleanup all state
431 Chunk* c = ChunkFromHandle(h);
432 // VLOG(4) << "Removing: " << c->ptr;
433 region_manager_.erase(c->ptr);
434 DeallocateChunk(h);
435}
436
437void BFCAllocator::InsertFreeChunkIntoBin(BFCAllocator::ChunkHandle h) {
438 Chunk* c = ChunkFromHandle(h);
439 CHECK(!c->in_use() && (c->bin_num == kInvalidBinNum));
440 BinNum bin_num = BinNumForSize(c->size);
441 Bin* new_bin = BinFromIndex(bin_num);
442 c->bin_num = bin_num;
443 new_bin->free_chunks.insert(h);
444}
445
446void BFCAllocator::RemoveFreeChunkIterFromBin(
447 BFCAllocator::Bin::FreeChunkSet* free_chunks,
448 const BFCAllocator::Bin::FreeChunkSet::iterator& citer) {
449 ChunkHandle h = *citer;
450 Chunk* c = ChunkFromHandle(h);
451 CHECK(!c->in_use() && (c->bin_num != kInvalidBinNum));
452 free_chunks->erase(citer);
453 c->bin_num = kInvalidBinNum;
454}
455
456void BFCAllocator::RemoveFreeChunkFromBin(BFCAllocator::ChunkHandle h) {
457 Chunk* c = ChunkFromHandle(h);
458 CHECK(!c->in_use() && (c->bin_num != kInvalidBinNum));
459 CHECK_GT(BinFromIndex(c->bin_num)->free_chunks.erase(h), 0)
460 << "Could not find chunk in bin";
461 c->bin_num = kInvalidBinNum;
462}
463
464void BFCAllocator::FreeAndMaybeCoalesce(BFCAllocator::ChunkHandle h) {
465 Chunk* c = ChunkFromHandle(h);
466 CHECK(c->in_use() && (c->bin_num == kInvalidBinNum));
467
468 // Mark the chunk as no longer in use
469 c->allocation_id = -1;
470
471 // Updates the stats.
472 stats_.bytes_in_use -= c->size;
473
474 // This chunk is no longer in-use, consider coalescing the chunk
475 // with adjacent chunks.
476 ChunkHandle chunk_to_reassign = h;
477
478 // If the next chunk is free, coalesce the two
479 if (c->next != kInvalidChunkHandle) {
480 Chunk* cnext = ChunkFromHandle(c->next);
481 if (!cnext->in_use()) {
482 // VLOG(8) << "Chunk at " << cnext->ptr << " merging with c " <<
483 // c->ptr;
484
485 chunk_to_reassign = h;
486
487 // Deletes c->next
488 RemoveFreeChunkFromBin(c->next);
489 Merge(h, ChunkFromHandle(h)->next);
490 }
491 }
492
493 // If the previous chunk is free, coalesce the two
494 c = ChunkFromHandle(h);
495 if (c->prev != kInvalidChunkHandle) {
496 Chunk* cprev = ChunkFromHandle(c->prev);
497 if (!cprev->in_use()) {
498 // VLOG(8) << "Chunk at " << c->ptr << " merging into c->prev "
499 // << cprev->ptr;
500
501 chunk_to_reassign = c->prev;
502
503 // Deletes c
504 RemoveFreeChunkFromBin(c->prev);
505 Merge(ChunkFromHandle(h)->prev, h);
506 c = ChunkFromHandle(h);
507 }
508 }
509
510 InsertFreeChunkIntoBin(chunk_to_reassign);
511}
512
513void BFCAllocator::AddAllocVisitor(Visitor visitor) {
514 VLOG(1) << "AddVisitor";
515 mutex_lock l(lock_);
516 region_visitors_.push_back(visitor);
517 for (const auto& region : region_manager_.regions()) {
518 visitor(region.ptr(), region.memory_size());
519 }
520}
521
522bool BFCAllocator::TracksAllocationSizes() { return true; }
523
524size_t BFCAllocator::RequestedSize(const void* ptr) {
525 mutex_lock l(lock_);
526 BFCAllocator::ChunkHandle h = region_manager_.get_handle(ptr);
527 CHECK(h != kInvalidChunkHandle)
528 << "Asked for requested size of pointer we never allocated: " << ptr;
529 BFCAllocator::Chunk* c = ChunkFromHandle(h);
530 return c->requested_size;
531}
532
533size_t BFCAllocator::AllocatedSize(const void* ptr) {
534 mutex_lock l(lock_);
535 BFCAllocator::ChunkHandle h = region_manager_.get_handle(ptr);
536 CHECK(h != kInvalidChunkHandle)
537 << "Asked for allocated size of pointer we never allocated: " << ptr;
538 BFCAllocator::Chunk* c = ChunkFromHandle(h);
539 return c->size;
540}
541
542int64 BFCAllocator::AllocationId(const void* ptr) {
543 mutex_lock l(lock_);
544 BFCAllocator::ChunkHandle h = region_manager_.get_handle(ptr);
545 CHECK(h != kInvalidChunkHandle)
546 << "Asked for allocation id of pointer we never allocated: " << ptr;
547 BFCAllocator::Chunk* c = ChunkFromHandle(h);
548 return c->allocation_id;
549}
550
551namespace {
552
553void RenderRegion(char* rendered, const size_t resolution,
554 const size_t total_render_size, const size_t offset,
555 const void* base_ptr, const void* ptr, const size_t size,
556 const char c) {
557 const char* base_ptr_c = static_cast<const char*>(base_ptr);
558 const char* ptr_c = static_cast<const char*>(ptr);
559
560 size_t start_location =
561 ((ptr_c - base_ptr_c + offset) * resolution) / total_render_size;
562 CHECK_GE(start_location, 0);
563 CHECK_LT(start_location, resolution);
564 size_t end_location =
565 ((ptr_c + size - 1 - base_ptr_c + offset) * resolution) /
566 total_render_size;
567 CHECK_GE(end_location, 0);
568 CHECK_LT(end_location, resolution);
569
570 for (size_t i = start_location; i <= end_location; ++i) {
571 rendered[i] = c;
572 }
573}
574
575} // namespace
576
577string BFCAllocator::RenderOccupancy() {
578 // Make a buffer for the ASCII-art representation.
579 const size_t resolution = 100;
580 char rendered[resolution];
581
582 // Compute the total region size to render over
583 size_t total_region_size = 0;
584 for (const auto& region : region_manager_.regions()) {
585 total_region_size += region.memory_size();
586 }
587
588 if (total_region_size == 0) {
589 return "<allocator contains no memory>";
590 }
591
592 // Start out with everything empty
593 RenderRegion(rendered, resolution, total_region_size, 0, nullptr, nullptr,
594 total_region_size, '_');
595
596 size_t region_offset = 0;
597 for (const auto& region : region_manager_.regions()) {
598 ChunkHandle h = region_manager_.get_handle(region.ptr());
599 // Then render each chunk left to right.
600 while (h != kInvalidChunkHandle) {
601 Chunk* c = ChunkFromHandle(h);
602 if (c->in_use()) {
603 // Render the wasted space
604 size_t wasted = c->size - c->requested_size;
605 if (wasted > 0) {
606 RenderRegion(rendered, resolution, total_region_size,
607 region_offset + c->requested_size, region.ptr(), c->ptr,
608 wasted, 'x');
609 }
610 // Then the occupied space
611 RenderRegion(rendered, resolution, total_region_size, region_offset,
612 region.ptr(), c->ptr, c->requested_size, '*');
613 }
614 h = c->next;
615 }
616 region_offset += region.memory_size();
617 }
618
619 return StringPiece(rendered, resolution).ToString();
620}
621
622void BFCAllocator::DumpMemoryLog(size_t num_bytes) {
623 const std::array<BinDebugInfo, kNumBins> bin_infos = get_bin_debug_info();
624 for (BinNum bin_num = 0; bin_num < kNumBins; bin_num++) {
625 Bin* b = BinFromIndex(bin_num);
626 const BinDebugInfo& bin_info = bin_infos[bin_num];
627 CHECK_EQ(b->free_chunks.size(),
628 bin_info.total_chunks_in_bin - bin_info.total_chunks_in_use);
629
630 LOG(INFO) << "Bin (" << b->bin_size
631 << "): \tTotal Chunks: " << bin_info.total_chunks_in_bin
632 << ", Chunks in use: " << bin_info.total_chunks_in_use << ". "
633 << strings::HumanReadableNumBytes(bin_info.total_bytes_in_bin)
634 << " allocated for chunks. "
635 << strings::HumanReadableNumBytes(bin_info.total_bytes_in_use)
636 << " in use in bin. "
637 << strings::HumanReadableNumBytes(
638 bin_info.total_requested_bytes_in_use)
639 << " client-requested in use in bin.";
640 }
641
642 // Find the bin that we would have liked to allocate in, so we
643 // can get some further analysis about fragmentation.
644 Bin* b = BinForSize(num_bytes);
645
646 LOG(INFO) << "Bin for " << strings::HumanReadableNumBytes(num_bytes)
647 << " was " << strings::HumanReadableNumBytes(b->bin_size)
648 << ", Chunk State: ";
649
650 for (ChunkHandle h : b->free_chunks) {
651 Chunk* c = ChunkFromHandle(h);
652 LOG(INFO) << c->DebugString(this, true);
653 }
654
655 // Next show the chunks that are in use, and also summarize their
656 // number by size.
657 std::map<size_t, int> in_use_by_size;
658 for (const auto& region : region_manager_.regions()) {
659 ChunkHandle h = region_manager_.get_handle(region.ptr());
660 while (h != kInvalidChunkHandle) {
661 const Chunk* c = ChunkFromHandle(h);
662 if (c->in_use()) {
663 in_use_by_size[c->size]++;
664 }
665 LOG(INFO) << (c->in_use() ? "Chunk" : "Free ") << " at " << c->ptr
666 << " of size " << c->size;
667 h = c->next;
668 }
669 }
670
671 LOG(INFO) << " Summary of in-use Chunks by size: ";
672 size_t total_bytes = 0;
673 for (auto& it : in_use_by_size) {
674 LOG(INFO) << it.second << " Chunks of size " << it.first << " totalling "
675 << strings::HumanReadableNumBytes(it.first * it.second);
676 total_bytes += (it.first * it.second);
677 }
678 LOG(INFO) << "Sum Total of in-use chunks: "
679 << strings::HumanReadableNumBytes(total_bytes);
680 LOG(INFO) << "Stats: \n" << stats_.DebugString();
681}
682
683void BFCAllocator::GetStats(AllocatorStats* stats) {
684 mutex_lock l(lock_);
685 *stats = stats_;
686}
687
688void BFCAllocator::ClearStats() {
689 mutex_lock l(lock_);
690 stats_.num_allocs = 0;
691 stats_.max_bytes_in_use = stats_.bytes_in_use;
692 stats_.max_alloc_size = 0;
693}
694
695std::array<BFCAllocator::BinDebugInfo, BFCAllocator::kNumBins>
696BFCAllocator::get_bin_debug_info() {
697 std::array<BinDebugInfo, kNumBins> bin_infos;
698 for (const auto& region : region_manager_.regions()) {
699 ChunkHandle h = region_manager_.get_handle(region.ptr());
700 while (h != kInvalidChunkHandle) {
701 const Chunk* c = ChunkFromHandle(h);
702 BinNum bin_num = BinNumForSize(c->size);
703 BinDebugInfo& bin_info = bin_infos[bin_num];
704 bin_info.total_bytes_in_bin += c->size;
705 bin_info.total_chunks_in_bin++;
706 if (c->in_use()) {
707 bin_info.total_bytes_in_use += c->size;
708 bin_info.total_requested_bytes_in_use += c->requested_size;
709 bin_info.total_chunks_in_use++;
710 } else {
711 Bin* bin = BinFromIndex(bin_num);
712 CHECK_EQ(bin->free_chunks.count(h), 1);
713 CHECK_EQ(c->bin_num, bin_num);
714 }
715 h = c->next;
716 }
717 }
718 return bin_infos;
719}
720
721} // namespace tensorflow
722