1 | /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | |
16 | #include <atomic> |
17 | |
18 | #include "tensorflow/core/common_runtime/bfc_allocator.h" |
19 | |
20 | #include "tensorflow/core/common_runtime/allocator_retry.h" |
21 | #include "tensorflow/core/lib/core/bits.h" |
22 | #include "tensorflow/core/lib/gtl/stl_util.h" |
23 | #include "tensorflow/core/lib/strings/numbers.h" |
24 | #include "tensorflow/core/lib/strings/str_util.h" |
25 | #include "tensorflow/core/lib/strings/strcat.h" |
26 | #include "tensorflow/core/platform/logging.h" |
27 | #include "tensorflow/core/platform/mutex.h" |
28 | #include "tensorflow/core/platform/types.h" |
29 | |
30 | namespace tensorflow { |
31 | |
32 | BFCAllocator::BFCAllocator(SubAllocator* sub_allocator, size_t total_memory, |
33 | bool allow_growth, const string& name) |
34 | : suballocator_(sub_allocator), |
35 | name_(name), |
36 | free_chunks_list_(kInvalidChunkHandle), |
37 | next_allocation_id_(1) { |
38 | if (allow_growth) { |
39 | // 1MiB smallest initial allocation, unless total memory available |
40 | // is less. |
41 | curr_region_allocation_bytes_ = |
42 | RoundedBytes(std::min(total_memory, size_t{1048576})); |
43 | } else { |
44 | curr_region_allocation_bytes_ = RoundedBytes(total_memory); |
45 | } |
46 | |
47 | // Allocate the requested amount of memory. |
48 | memory_limit_ = total_memory; |
49 | stats_.bytes_limit = static_cast<int64>(total_memory); |
50 | |
51 | // Create a bunch of bins of various good sizes. |
52 | |
53 | // We create bins to fit all possible ranges that cover the |
54 | // memory_limit_ starting from allocations up to 256 bytes to |
55 | // allocations up to (and including) the memory limit. |
56 | for (BinNum b = 0; b < kNumBins; b++) { |
57 | size_t bin_size = BinNumToSize(b); |
58 | VLOG(1) << "Creating bin of max chunk size " |
59 | << strings::HumanReadableNumBytes(bin_size); |
60 | new (BinFromIndex(b)) Bin(this, bin_size); |
61 | CHECK_EQ(BinForSize(bin_size), BinFromIndex(b)); |
62 | CHECK_EQ(BinForSize(bin_size + 255), BinFromIndex(b)); |
63 | CHECK_EQ(BinForSize(bin_size * 2 - 1), BinFromIndex(b)); |
64 | if (b + 1 < kNumBins) { |
65 | CHECK_NE(BinForSize(bin_size * 2), BinFromIndex(b)); |
66 | } |
67 | } |
68 | } |
69 | |
70 | BFCAllocator::~BFCAllocator() { |
71 | // Return memory back. |
72 | VLOG(2) << "Number of regions allocated: " |
73 | << region_manager_.regions().size(); |
74 | for (const auto& region : region_manager_.regions()) { |
75 | suballocator_->Free(region.ptr(), region.memory_size()); |
76 | } |
77 | |
78 | for (BinNum b = 0; b < kNumBins; b++) { |
79 | BinFromIndex(b)->~Bin(); |
80 | } |
81 | } |
82 | |
83 | BFCAllocator::Chunk* BFCAllocator::ChunkFromHandle(ChunkHandle h) { |
84 | DCHECK_GE(h, 0); |
85 | DCHECK_LT(h, static_cast<int>(chunks_.size())); |
86 | return &(chunks_[h]); |
87 | } |
88 | |
89 | bool BFCAllocator::Extend(size_t rounded_bytes) { |
90 | size_t available_bytes = memory_limit_ - total_region_allocated_bytes_; |
91 | // Rounds available_bytes down to the nearest multiple of kMinAllocationSize. |
92 | available_bytes = (available_bytes / kMinAllocationSize) * kMinAllocationSize; |
93 | |
94 | // Do we have enough space to handle the client's request? |
95 | // If not, fail immediately. |
96 | if (rounded_bytes > available_bytes) { |
97 | return false; |
98 | } |
99 | |
100 | // If curr_region_allocation_bytes_ is not enough to satisfy the |
101 | // allocation, keep multiplying by a power of two until that is |
102 | // sufficient. |
103 | bool increased_allocation = false; |
104 | while (rounded_bytes > curr_region_allocation_bytes_) { |
105 | curr_region_allocation_bytes_ *= 2; |
106 | increased_allocation = true; |
107 | } |
108 | |
109 | // Try allocating. |
110 | size_t bytes = std::min(curr_region_allocation_bytes_, available_bytes); |
111 | void* mem_addr = suballocator_->Alloc(32, bytes); |
112 | if (mem_addr == nullptr && !started_backpedal_) { |
113 | // Only backpedal once. |
114 | started_backpedal_ = true; |
115 | |
116 | static constexpr float kBackpedalFactor = 0.9; |
117 | |
118 | // Try allocating less memory. |
119 | while (mem_addr == nullptr) { |
120 | bytes = RoundedBytes(bytes * kBackpedalFactor); |
121 | if (bytes < rounded_bytes) break; |
122 | mem_addr = suballocator_->Alloc(32, bytes); |
123 | } |
124 | } |
125 | |
126 | if (mem_addr == nullptr) { |
127 | return false; |
128 | } |
129 | |
130 | if (!increased_allocation) { |
131 | // Increase the region size of the next required allocation. |
132 | curr_region_allocation_bytes_ *= 2; |
133 | } |
134 | |
135 | VLOG(1) << "Extending allocation by " << strings::HumanReadableNumBytes(bytes) |
136 | << " bytes." ; |
137 | |
138 | total_region_allocated_bytes_ += bytes; |
139 | VLOG(1) << "Total allocated bytes: " |
140 | << strings::HumanReadableNumBytes(total_region_allocated_bytes_); |
141 | |
142 | VLOG(1) << "Allocated memory at " << mem_addr << " to " |
143 | << static_cast<void*>(static_cast<char*>(mem_addr) + bytes); |
144 | region_manager_.AddAllocationRegion(mem_addr, bytes); |
145 | |
146 | // Create one large chunk for the whole memory space that will |
147 | // be chunked later. |
148 | ChunkHandle h = AllocateChunk(); |
149 | BFCAllocator::Chunk* c = ChunkFromHandle(h); |
150 | c->ptr = mem_addr; |
151 | c->size = bytes; |
152 | c->allocation_id = -1; |
153 | c->prev = kInvalidChunkHandle; |
154 | c->next = kInvalidChunkHandle; |
155 | |
156 | region_manager_.set_handle(c->ptr, h); |
157 | |
158 | // TODO(vrv): Try to merge this new region with an existing region, |
159 | // if the address space is contiguous, to avoid fragmentation |
160 | // across regions. |
161 | |
162 | // Insert the chunk into the right bin. |
163 | InsertFreeChunkIntoBin(h); |
164 | |
165 | // Invoke visitors on newly allocated region. |
166 | for (const auto& visitor : region_visitors_) { |
167 | visitor(mem_addr, bytes); |
168 | } |
169 | return true; |
170 | } |
171 | |
172 | BFCAllocator::ChunkHandle BFCAllocator::AllocateChunk() { |
173 | if (free_chunks_list_ != kInvalidChunkHandle) { |
174 | ChunkHandle h = free_chunks_list_; |
175 | Chunk* c = ChunkFromHandle(h); |
176 | free_chunks_list_ = c->next; |
177 | return h; |
178 | } else { |
179 | ChunkHandle h = chunks_.size(); |
180 | chunks_.resize(h + 1); |
181 | return h; |
182 | } |
183 | } |
184 | |
185 | void BFCAllocator::DeallocateChunk(ChunkHandle h) { |
186 | Chunk* c = ChunkFromHandle(h); |
187 | c->next = free_chunks_list_; |
188 | free_chunks_list_ = h; |
189 | } |
190 | |
191 | void* BFCAllocator::AllocateRaw(size_t unused_alignment, size_t num_bytes) { |
192 | // Fast path: Try once to allocate without getting the retry_helper_ involved |
193 | void* r = AllocateRawInternal(unused_alignment, num_bytes, false); |
194 | if (r != nullptr) { |
195 | return r; |
196 | } else { |
197 | static const int64 kMaxMillisToWait = 10000; // 10 seconds |
198 | return retry_helper_.AllocateRaw( |
199 | [this](size_t a, size_t nb, bool v) { |
200 | return AllocateRawInternal(a, nb, v); |
201 | }, |
202 | kMaxMillisToWait, unused_alignment, num_bytes); |
203 | } |
204 | } |
205 | |
206 | void* BFCAllocator::AllocateRaw(size_t unused_alignment, size_t num_bytes, |
207 | const AllocationAttributes& allocation_attr) { |
208 | if (allocation_attr.no_retry_on_failure) { |
209 | // Return immediately upon the first failure if this is for allocating an |
210 | // optional scratch space. |
211 | bool dump_log_on_failure = VLOG_IS_ON(2); |
212 | void* result = |
213 | AllocateRawInternal(unused_alignment, num_bytes, dump_log_on_failure); |
214 | if (result == nullptr) { |
215 | static std::atomic<int32> log_counter{0}; |
216 | int32 counter_value = log_counter.load(std::memory_order_relaxed); |
217 | if (counter_value < 10) { |
218 | log_counter.store(counter_value + 1, std::memory_order_relaxed); |
219 | LOG(WARNING) |
220 | << "Allocator (" << Name() << ") ran out of memory trying " |
221 | << "to allocate " << strings::HumanReadableNumBytes(num_bytes) |
222 | << ". The caller indicates that this is not a failure, but" |
223 | << " may mean that there could be performance gains if more" |
224 | << " memory were available." ; |
225 | } |
226 | } |
227 | return result; |
228 | } else { |
229 | return AllocateRaw(unused_alignment, num_bytes); |
230 | } |
231 | } |
232 | |
233 | // static |
234 | size_t BFCAllocator::RoundedBytes(size_t bytes) { |
235 | size_t rounded_bytes = |
236 | (kMinAllocationSize * |
237 | ((bytes + kMinAllocationSize - 1) / kMinAllocationSize)); |
238 | DCHECK_EQ(size_t{0}, rounded_bytes % kMinAllocationSize); |
239 | return rounded_bytes; |
240 | } |
241 | |
242 | void* BFCAllocator::AllocateRawInternal(size_t unused_alignment, |
243 | size_t num_bytes, |
244 | bool dump_log_on_failure) { |
245 | if (num_bytes == 0) { |
246 | LOG(ERROR) << "tried to allocate 0 bytes" ; |
247 | return nullptr; |
248 | } |
249 | // First, always allocate memory of at least kMinAllocationSize |
250 | // bytes, and always allocate multiples of kMinAllocationSize bytes |
251 | // so all memory addresses are nicely byte aligned. |
252 | size_t rounded_bytes = RoundedBytes(num_bytes); |
253 | |
254 | // The BFC allocator tries to find the best fit first. |
255 | BinNum bin_num = BinNumForSize(rounded_bytes); |
256 | |
257 | mutex_lock l(lock_); |
258 | void* ptr = FindChunkPtr(bin_num, rounded_bytes, num_bytes); |
259 | if (ptr != nullptr) { |
260 | return ptr; |
261 | } |
262 | |
263 | // Try to extend |
264 | if (Extend(rounded_bytes)) { |
265 | ptr = FindChunkPtr(bin_num, rounded_bytes, num_bytes); |
266 | if (ptr != nullptr) { |
267 | return ptr; |
268 | } |
269 | } |
270 | |
271 | // We searched all bins for an existing free chunk to use and |
272 | // couldn't find one. This means we must have run out of memory, |
273 | // Dump the memory log for analysis. |
274 | if (dump_log_on_failure) { |
275 | LOG(WARNING) << "Allocator (" << Name() << ") ran out of memory trying " |
276 | << "to allocate " << strings::HumanReadableNumBytes(num_bytes) |
277 | << ". Current allocation summary follows." ; |
278 | DumpMemoryLog(rounded_bytes); |
279 | LOG(WARNING) << RenderOccupancy(); |
280 | } |
281 | return nullptr; |
282 | } |
283 | |
284 | void* BFCAllocator::FindChunkPtr(BinNum bin_num, size_t rounded_bytes, |
285 | size_t num_bytes) { |
286 | // First identify the first bin that could satisfy rounded_bytes. |
287 | for (; bin_num < kNumBins; bin_num++) { |
288 | // Start searching from the first bin for the smallest chunk that fits |
289 | // rounded_bytes. |
290 | Bin* b = BinFromIndex(bin_num); |
291 | for (auto citer = b->free_chunks.begin(); citer != b->free_chunks.end(); |
292 | ++citer) { |
293 | const BFCAllocator::ChunkHandle h = (*citer); |
294 | BFCAllocator::Chunk* chunk = ChunkFromHandle(h); |
295 | DCHECK(!chunk->in_use()); |
296 | if (chunk->size >= rounded_bytes) { |
297 | // We found an existing chunk that fits us that wasn't in use, so remove |
298 | // it from the free bin structure prior to using. |
299 | RemoveFreeChunkIterFromBin(&b->free_chunks, citer); |
300 | |
301 | // If we can break the size of the chunk into two reasonably large |
302 | // pieces, do so. In any case don't waste more than |
303 | // kMaxInternalFragmentation bytes on padding this alloc. |
304 | const int64 kMaxInternalFragmentation = 128 << 20; // 128mb |
305 | if (chunk->size >= rounded_bytes * 2 || |
306 | static_cast<int64>(chunk->size) - rounded_bytes >= |
307 | kMaxInternalFragmentation) { |
308 | SplitChunk(h, rounded_bytes); |
309 | chunk = ChunkFromHandle(h); // Update chunk pointer in case it moved |
310 | } |
311 | |
312 | // The requested size of the returned chunk is what the user |
313 | // has allocated. |
314 | chunk->requested_size = num_bytes; |
315 | // Assign a unique id and increment the id counter, marking the |
316 | // chunk as being in use. |
317 | chunk->allocation_id = next_allocation_id_++; |
318 | |
319 | // Update stats. |
320 | ++stats_.num_allocs; |
321 | stats_.bytes_in_use += chunk->size; |
322 | stats_.max_bytes_in_use = |
323 | std::max(stats_.max_bytes_in_use, stats_.bytes_in_use); |
324 | stats_.max_alloc_size = |
325 | std::max<std::size_t>(stats_.max_alloc_size, chunk->size); |
326 | |
327 | VLOG(4) << "Returning: " << chunk->ptr; |
328 | if (VLOG_IS_ON(4)) { |
329 | LOG(INFO) << "A: " << RenderOccupancy(); |
330 | } |
331 | return chunk->ptr; |
332 | } |
333 | } |
334 | } |
335 | |
336 | return nullptr; |
337 | } |
338 | |
339 | void BFCAllocator::SplitChunk(BFCAllocator::ChunkHandle h, size_t num_bytes) { |
340 | // Allocate the new chunk before we do any ChunkFromHandle |
341 | ChunkHandle h_new_chunk = AllocateChunk(); |
342 | |
343 | Chunk* c = ChunkFromHandle(h); |
344 | CHECK(!c->in_use() && (c->bin_num == kInvalidBinNum)); |
345 | |
346 | // Create a new chunk starting num_bytes after c |
347 | BFCAllocator::Chunk* new_chunk = ChunkFromHandle(h_new_chunk); |
348 | new_chunk->ptr = static_cast<void*>(static_cast<char*>(c->ptr) + num_bytes); |
349 | region_manager_.set_handle(new_chunk->ptr, h_new_chunk); |
350 | |
351 | // Set the new sizes of the chunks. |
352 | new_chunk->size = c->size - num_bytes; |
353 | c->size = num_bytes; |
354 | |
355 | // The new chunk is not in use. |
356 | new_chunk->allocation_id = -1; |
357 | |
358 | // Maintain the pointers. |
359 | // c <-> c_neighbor becomes |
360 | // c <-> new_chunk <-> c_neighbor |
361 | BFCAllocator::ChunkHandle h_neighbor = c->next; |
362 | new_chunk->prev = h; |
363 | new_chunk->next = h_neighbor; |
364 | c->next = h_new_chunk; |
365 | if (h_neighbor != kInvalidChunkHandle) { |
366 | Chunk* c_neighbor = ChunkFromHandle(h_neighbor); |
367 | c_neighbor->prev = h_new_chunk; |
368 | } |
369 | |
370 | // Add the newly free chunk to the free bin. |
371 | InsertFreeChunkIntoBin(h_new_chunk); |
372 | } |
373 | |
374 | void BFCAllocator::DeallocateRaw(void* ptr) { |
375 | DeallocateRawInternal(ptr); |
376 | retry_helper_.NotifyDealloc(); |
377 | } |
378 | |
379 | void BFCAllocator::DeallocateRawInternal(void* ptr) { |
380 | if (ptr == nullptr) { |
381 | LOG(ERROR) << "tried to deallocate nullptr" ; |
382 | return; |
383 | } |
384 | mutex_lock l(lock_); |
385 | |
386 | // Find the chunk from the ptr. |
387 | BFCAllocator::ChunkHandle h = region_manager_.get_handle(ptr); |
388 | CHECK(h != kInvalidChunkHandle); |
389 | |
390 | // Consider coalescing it. |
391 | FreeAndMaybeCoalesce(h); |
392 | |
393 | if (VLOG_IS_ON(4)) { |
394 | LOG(INFO) << "F: " << RenderOccupancy(); |
395 | } |
396 | } |
397 | |
398 | // Merges h1 and h2 when Chunk(h1)->next is h2 and Chunk(h2)->prev is c1. |
399 | // We merge Chunk(h2) into Chunk(h1). |
400 | void BFCAllocator::Merge(BFCAllocator::ChunkHandle h1, |
401 | BFCAllocator::ChunkHandle h2) { |
402 | Chunk* c1 = ChunkFromHandle(h1); |
403 | Chunk* c2 = ChunkFromHandle(h2); |
404 | // We can only merge chunks that are not in use. |
405 | CHECK(!c1->in_use() && !c2->in_use()); |
406 | |
407 | // c1's prev doesn't change, still points to the same ptr, and is |
408 | // still not in use. |
409 | |
410 | // Fix up neighbor pointers |
411 | // |
412 | // c1 <-> c2 <-> c3 should become |
413 | // c1 <-> c3 |
414 | |
415 | BFCAllocator::ChunkHandle h3 = c2->next; |
416 | c1->next = h3; |
417 | CHECK(c2->prev == h1); |
418 | if (h3 != kInvalidChunkHandle) { |
419 | BFCAllocator::Chunk* c3 = ChunkFromHandle(h3); |
420 | c3->prev = h1; |
421 | } |
422 | |
423 | // Set the new size |
424 | c1->size += c2->size; |
425 | |
426 | DeleteChunk(h2); |
427 | } |
428 | |
429 | void BFCAllocator::DeleteChunk(ChunkHandle h) { |
430 | // Delete h and cleanup all state |
431 | Chunk* c = ChunkFromHandle(h); |
432 | // VLOG(4) << "Removing: " << c->ptr; |
433 | region_manager_.erase(c->ptr); |
434 | DeallocateChunk(h); |
435 | } |
436 | |
437 | void BFCAllocator::InsertFreeChunkIntoBin(BFCAllocator::ChunkHandle h) { |
438 | Chunk* c = ChunkFromHandle(h); |
439 | CHECK(!c->in_use() && (c->bin_num == kInvalidBinNum)); |
440 | BinNum bin_num = BinNumForSize(c->size); |
441 | Bin* new_bin = BinFromIndex(bin_num); |
442 | c->bin_num = bin_num; |
443 | new_bin->free_chunks.insert(h); |
444 | } |
445 | |
446 | void BFCAllocator::RemoveFreeChunkIterFromBin( |
447 | BFCAllocator::Bin::FreeChunkSet* free_chunks, |
448 | const BFCAllocator::Bin::FreeChunkSet::iterator& citer) { |
449 | ChunkHandle h = *citer; |
450 | Chunk* c = ChunkFromHandle(h); |
451 | CHECK(!c->in_use() && (c->bin_num != kInvalidBinNum)); |
452 | free_chunks->erase(citer); |
453 | c->bin_num = kInvalidBinNum; |
454 | } |
455 | |
456 | void BFCAllocator::RemoveFreeChunkFromBin(BFCAllocator::ChunkHandle h) { |
457 | Chunk* c = ChunkFromHandle(h); |
458 | CHECK(!c->in_use() && (c->bin_num != kInvalidBinNum)); |
459 | CHECK_GT(BinFromIndex(c->bin_num)->free_chunks.erase(h), 0) |
460 | << "Could not find chunk in bin" ; |
461 | c->bin_num = kInvalidBinNum; |
462 | } |
463 | |
464 | void BFCAllocator::FreeAndMaybeCoalesce(BFCAllocator::ChunkHandle h) { |
465 | Chunk* c = ChunkFromHandle(h); |
466 | CHECK(c->in_use() && (c->bin_num == kInvalidBinNum)); |
467 | |
468 | // Mark the chunk as no longer in use |
469 | c->allocation_id = -1; |
470 | |
471 | // Updates the stats. |
472 | stats_.bytes_in_use -= c->size; |
473 | |
474 | // This chunk is no longer in-use, consider coalescing the chunk |
475 | // with adjacent chunks. |
476 | ChunkHandle chunk_to_reassign = h; |
477 | |
478 | // If the next chunk is free, coalesce the two |
479 | if (c->next != kInvalidChunkHandle) { |
480 | Chunk* cnext = ChunkFromHandle(c->next); |
481 | if (!cnext->in_use()) { |
482 | // VLOG(8) << "Chunk at " << cnext->ptr << " merging with c " << |
483 | // c->ptr; |
484 | |
485 | chunk_to_reassign = h; |
486 | |
487 | // Deletes c->next |
488 | RemoveFreeChunkFromBin(c->next); |
489 | Merge(h, ChunkFromHandle(h)->next); |
490 | } |
491 | } |
492 | |
493 | // If the previous chunk is free, coalesce the two |
494 | c = ChunkFromHandle(h); |
495 | if (c->prev != kInvalidChunkHandle) { |
496 | Chunk* cprev = ChunkFromHandle(c->prev); |
497 | if (!cprev->in_use()) { |
498 | // VLOG(8) << "Chunk at " << c->ptr << " merging into c->prev " |
499 | // << cprev->ptr; |
500 | |
501 | chunk_to_reassign = c->prev; |
502 | |
503 | // Deletes c |
504 | RemoveFreeChunkFromBin(c->prev); |
505 | Merge(ChunkFromHandle(h)->prev, h); |
506 | c = ChunkFromHandle(h); |
507 | } |
508 | } |
509 | |
510 | InsertFreeChunkIntoBin(chunk_to_reassign); |
511 | } |
512 | |
513 | void BFCAllocator::AddAllocVisitor(Visitor visitor) { |
514 | VLOG(1) << "AddVisitor" ; |
515 | mutex_lock l(lock_); |
516 | region_visitors_.push_back(visitor); |
517 | for (const auto& region : region_manager_.regions()) { |
518 | visitor(region.ptr(), region.memory_size()); |
519 | } |
520 | } |
521 | |
522 | bool BFCAllocator::TracksAllocationSizes() { return true; } |
523 | |
524 | size_t BFCAllocator::RequestedSize(const void* ptr) { |
525 | mutex_lock l(lock_); |
526 | BFCAllocator::ChunkHandle h = region_manager_.get_handle(ptr); |
527 | CHECK(h != kInvalidChunkHandle) |
528 | << "Asked for requested size of pointer we never allocated: " << ptr; |
529 | BFCAllocator::Chunk* c = ChunkFromHandle(h); |
530 | return c->requested_size; |
531 | } |
532 | |
533 | size_t BFCAllocator::AllocatedSize(const void* ptr) { |
534 | mutex_lock l(lock_); |
535 | BFCAllocator::ChunkHandle h = region_manager_.get_handle(ptr); |
536 | CHECK(h != kInvalidChunkHandle) |
537 | << "Asked for allocated size of pointer we never allocated: " << ptr; |
538 | BFCAllocator::Chunk* c = ChunkFromHandle(h); |
539 | return c->size; |
540 | } |
541 | |
542 | int64 BFCAllocator::AllocationId(const void* ptr) { |
543 | mutex_lock l(lock_); |
544 | BFCAllocator::ChunkHandle h = region_manager_.get_handle(ptr); |
545 | CHECK(h != kInvalidChunkHandle) |
546 | << "Asked for allocation id of pointer we never allocated: " << ptr; |
547 | BFCAllocator::Chunk* c = ChunkFromHandle(h); |
548 | return c->allocation_id; |
549 | } |
550 | |
551 | namespace { |
552 | |
553 | void RenderRegion(char* rendered, const size_t resolution, |
554 | const size_t total_render_size, const size_t offset, |
555 | const void* base_ptr, const void* ptr, const size_t size, |
556 | const char c) { |
557 | const char* base_ptr_c = static_cast<const char*>(base_ptr); |
558 | const char* ptr_c = static_cast<const char*>(ptr); |
559 | |
560 | size_t start_location = |
561 | ((ptr_c - base_ptr_c + offset) * resolution) / total_render_size; |
562 | CHECK_GE(start_location, 0); |
563 | CHECK_LT(start_location, resolution); |
564 | size_t end_location = |
565 | ((ptr_c + size - 1 - base_ptr_c + offset) * resolution) / |
566 | total_render_size; |
567 | CHECK_GE(end_location, 0); |
568 | CHECK_LT(end_location, resolution); |
569 | |
570 | for (size_t i = start_location; i <= end_location; ++i) { |
571 | rendered[i] = c; |
572 | } |
573 | } |
574 | |
575 | } // namespace |
576 | |
577 | string BFCAllocator::RenderOccupancy() { |
578 | // Make a buffer for the ASCII-art representation. |
579 | const size_t resolution = 100; |
580 | char rendered[resolution]; |
581 | |
582 | // Compute the total region size to render over |
583 | size_t total_region_size = 0; |
584 | for (const auto& region : region_manager_.regions()) { |
585 | total_region_size += region.memory_size(); |
586 | } |
587 | |
588 | if (total_region_size == 0) { |
589 | return "<allocator contains no memory>" ; |
590 | } |
591 | |
592 | // Start out with everything empty |
593 | RenderRegion(rendered, resolution, total_region_size, 0, nullptr, nullptr, |
594 | total_region_size, '_'); |
595 | |
596 | size_t region_offset = 0; |
597 | for (const auto& region : region_manager_.regions()) { |
598 | ChunkHandle h = region_manager_.get_handle(region.ptr()); |
599 | // Then render each chunk left to right. |
600 | while (h != kInvalidChunkHandle) { |
601 | Chunk* c = ChunkFromHandle(h); |
602 | if (c->in_use()) { |
603 | // Render the wasted space |
604 | size_t wasted = c->size - c->requested_size; |
605 | if (wasted > 0) { |
606 | RenderRegion(rendered, resolution, total_region_size, |
607 | region_offset + c->requested_size, region.ptr(), c->ptr, |
608 | wasted, 'x'); |
609 | } |
610 | // Then the occupied space |
611 | RenderRegion(rendered, resolution, total_region_size, region_offset, |
612 | region.ptr(), c->ptr, c->requested_size, '*'); |
613 | } |
614 | h = c->next; |
615 | } |
616 | region_offset += region.memory_size(); |
617 | } |
618 | |
619 | return StringPiece(rendered, resolution).ToString(); |
620 | } |
621 | |
622 | void BFCAllocator::DumpMemoryLog(size_t num_bytes) { |
623 | const std::array<BinDebugInfo, kNumBins> bin_infos = get_bin_debug_info(); |
624 | for (BinNum bin_num = 0; bin_num < kNumBins; bin_num++) { |
625 | Bin* b = BinFromIndex(bin_num); |
626 | const BinDebugInfo& bin_info = bin_infos[bin_num]; |
627 | CHECK_EQ(b->free_chunks.size(), |
628 | bin_info.total_chunks_in_bin - bin_info.total_chunks_in_use); |
629 | |
630 | LOG(INFO) << "Bin (" << b->bin_size |
631 | << "): \tTotal Chunks: " << bin_info.total_chunks_in_bin |
632 | << ", Chunks in use: " << bin_info.total_chunks_in_use << ". " |
633 | << strings::HumanReadableNumBytes(bin_info.total_bytes_in_bin) |
634 | << " allocated for chunks. " |
635 | << strings::HumanReadableNumBytes(bin_info.total_bytes_in_use) |
636 | << " in use in bin. " |
637 | << strings::HumanReadableNumBytes( |
638 | bin_info.total_requested_bytes_in_use) |
639 | << " client-requested in use in bin." ; |
640 | } |
641 | |
642 | // Find the bin that we would have liked to allocate in, so we |
643 | // can get some further analysis about fragmentation. |
644 | Bin* b = BinForSize(num_bytes); |
645 | |
646 | LOG(INFO) << "Bin for " << strings::HumanReadableNumBytes(num_bytes) |
647 | << " was " << strings::HumanReadableNumBytes(b->bin_size) |
648 | << ", Chunk State: " ; |
649 | |
650 | for (ChunkHandle h : b->free_chunks) { |
651 | Chunk* c = ChunkFromHandle(h); |
652 | LOG(INFO) << c->DebugString(this, true); |
653 | } |
654 | |
655 | // Next show the chunks that are in use, and also summarize their |
656 | // number by size. |
657 | std::map<size_t, int> in_use_by_size; |
658 | for (const auto& region : region_manager_.regions()) { |
659 | ChunkHandle h = region_manager_.get_handle(region.ptr()); |
660 | while (h != kInvalidChunkHandle) { |
661 | const Chunk* c = ChunkFromHandle(h); |
662 | if (c->in_use()) { |
663 | in_use_by_size[c->size]++; |
664 | } |
665 | LOG(INFO) << (c->in_use() ? "Chunk" : "Free " ) << " at " << c->ptr |
666 | << " of size " << c->size; |
667 | h = c->next; |
668 | } |
669 | } |
670 | |
671 | LOG(INFO) << " Summary of in-use Chunks by size: " ; |
672 | size_t total_bytes = 0; |
673 | for (auto& it : in_use_by_size) { |
674 | LOG(INFO) << it.second << " Chunks of size " << it.first << " totalling " |
675 | << strings::HumanReadableNumBytes(it.first * it.second); |
676 | total_bytes += (it.first * it.second); |
677 | } |
678 | LOG(INFO) << "Sum Total of in-use chunks: " |
679 | << strings::HumanReadableNumBytes(total_bytes); |
680 | LOG(INFO) << "Stats: \n" << stats_.DebugString(); |
681 | } |
682 | |
683 | void BFCAllocator::GetStats(AllocatorStats* stats) { |
684 | mutex_lock l(lock_); |
685 | *stats = stats_; |
686 | } |
687 | |
688 | void BFCAllocator::ClearStats() { |
689 | mutex_lock l(lock_); |
690 | stats_.num_allocs = 0; |
691 | stats_.max_bytes_in_use = stats_.bytes_in_use; |
692 | stats_.max_alloc_size = 0; |
693 | } |
694 | |
695 | std::array<BFCAllocator::BinDebugInfo, BFCAllocator::kNumBins> |
696 | BFCAllocator::get_bin_debug_info() { |
697 | std::array<BinDebugInfo, kNumBins> bin_infos; |
698 | for (const auto& region : region_manager_.regions()) { |
699 | ChunkHandle h = region_manager_.get_handle(region.ptr()); |
700 | while (h != kInvalidChunkHandle) { |
701 | const Chunk* c = ChunkFromHandle(h); |
702 | BinNum bin_num = BinNumForSize(c->size); |
703 | BinDebugInfo& bin_info = bin_infos[bin_num]; |
704 | bin_info.total_bytes_in_bin += c->size; |
705 | bin_info.total_chunks_in_bin++; |
706 | if (c->in_use()) { |
707 | bin_info.total_bytes_in_use += c->size; |
708 | bin_info.total_requested_bytes_in_use += c->requested_size; |
709 | bin_info.total_chunks_in_use++; |
710 | } else { |
711 | Bin* bin = BinFromIndex(bin_num); |
712 | CHECK_EQ(bin->free_chunks.count(h), 1); |
713 | CHECK_EQ(c->bin_num, bin_num); |
714 | } |
715 | h = c->next; |
716 | } |
717 | } |
718 | return bin_infos; |
719 | } |
720 | |
721 | } // namespace tensorflow |
722 | |