bfc_allocator.h source code [tensorflow/tensorflow/core/common_runtime/bfc_allocator.h]

1	/ Copyright 2015 The TensorFlow Authors. All Rights Reserved.*
2
3	Licensed under the Apache License, Version 2.0 (the "License");
4	you may not use this file except in compliance with the License.
5	You may obtain a copy of the License at
6
7	http://www.apache.org/licenses/LICENSE-2.0
8
9	Unless required by applicable law or agreed to in writing, software
10	distributed under the License is distributed on an "AS IS" BASIS,
11	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12	See the License for the specific language governing permissions and
13	limitations under the License.
14	==============================================================================/*
15
16	#ifndef TENSORFLOW_COMMON_RUNTIME_BFC_ALLOCATOR_H_
17	#define TENSORFLOW_COMMON_RUNTIME_BFC_ALLOCATOR_H_
18
19	#include <array>
20	#include <memory>
21	#include <string>
22	#include <unordered_map>
23	#include <vector>
24
25	#include "tensorflow/core/common_runtime/allocator_retry.h"
26	#include "tensorflow/core/common_runtime/visitable_allocator.h"
27	#include "tensorflow/core/lib/gtl/stl_util.h"
28	#include "tensorflow/core/lib/strings/strcat.h"
29	#include "tensorflow/core/platform/macros.h"
30	#include "tensorflow/core/platform/mutex.h"
31	#include "tensorflow/core/platform/thread_annotations.h"
32	#include "tensorflow/core/platform/types.h"
33	#include "tensorflow/core/protobuf/config.pb.h"
34
35	namespace tensorflow {
36
37	// A memory allocator that implements a 'best-fit with coalescing'
38	// algorithm. This is essentially a very simple version of Doug Lea's
39	// malloc (dlmalloc).
40	//
41	// The goal of this allocator is to support defragmentation via
42	// coalescing. One assumption we make is that the process using this
43	// allocator owns pretty much all of the memory, and that nearly
44	// all requests to allocate memory go through this interface.
45	class BFCAllocator : public VisitableAllocator {
46	public:
47	// Takes ownership of sub_allocator.
48	BFCAllocator(SubAllocator* sub_allocator, size_t total_memory,
49	bool allow_growth, const string& name);
50	~BFCAllocator() override;
51
52	string Name() override { return name_; }
53	void* AllocateRaw(size_t alignment, size_t num_bytes) override;
54	void* AllocateRaw(size_t alignment, size_t num_bytes,
55	const AllocationAttributes& allocation_attr) override;
56	void DeallocateRaw(void* ptr) override;
57
58	void AddAllocVisitor(Visitor visitor) override;
59
60	// Does nothing, because memory is never freed.
61	void AddFreeVisitor(Visitor visitor) override {}
62
63	bool TracksAllocationSizes() override;
64
65	size_t RequestedSize(const void* ptr) override;
66
67	size_t AllocatedSize(const void* ptr) override;
68
69	int64 AllocationId(const void* ptr) override;
70
71	void GetStats(AllocatorStats* stats) override;
72
73	void ClearStats() override;
74
75	private:
76	struct Bin;
77
78	void* AllocateRawInternal(size_t alignment, size_t num_bytes,
79	bool dump_log_on_failure);
80	void DeallocateRawInternal(void* ptr);
81
82	// A ChunkHandle is an index into the chunks_ vector in BFCAllocator
83	// kInvalidChunkHandle means an invalid chunk
84	typedef size_t ChunkHandle;
85	static const int kInvalidChunkHandle = -`1`;
86
87	typedef int BinNum;
88	static const int kInvalidBinNum = -`1`;
89	static const int kNumBins = `21`;
90
91	// Chunks point to memory. Their prev/next pointers form a
92	// doubly-linked list of addresses sorted by base address that
93	// must be contiguous. Chunks contain information about whether
94	// they are in use or whether they are free, and contain a pointer
95	// to the bin they are in.
96	struct Chunk {
97	size_t size = `0`; // Full size of buffer.
98
99	// We sometimes give chunks that are larger than needed to reduce
100	// fragmentation. requested_size keeps track of what the client
101	// actually wanted so we can understand whether our splitting
102	// strategy is efficient.
103	size_t requested_size = `0`;
104
105	// allocation_id is set to -1 when the chunk is not in use. It is assigned a
106	// value greater than zero before the chunk is returned from
107	// AllocateRaw, and this value is unique among values assigned by
108	// the parent allocator.
109	int64 allocation_id = -`1`;
110	void* ptr = nullptr; // pointer to granted subbuffer.
111
112	// If not kInvalidChunkHandle, the memory referred to by 'prev' is directly
113	// preceding the memory used by this chunk. E.g., It should start
114	// at 'ptr - prev->size'
115	ChunkHandle prev = kInvalidChunkHandle;
116
117	// If not kInvalidChunkHandle, the memory referred to by 'next' is directly
118	// following the memory used by this chunk. E.g., It should be at
119	// 'ptr + size'
120	ChunkHandle next = kInvalidChunkHandle;
121
122	// What bin are we in?
123	BinNum bin_num = kInvalidBinNum;
124
125	bool in_use() const { return allocation_id != -`1`; }
126
127	string DebugString(BFCAllocator* a,
128	bool recurse) NO_THREAD_SAFETY_ANALYSIS {
129	string dbg;
130	strings::StrAppend(
131	&dbg, " Size: ", strings::HumanReadableNumBytes(size),
132	" \| Requested Size: ", strings::HumanReadableNumBytes(requested_size),
133	" \| in_use: ", in_use());
134	if (recurse && prev != BFCAllocator::kInvalidChunkHandle) {
135	Chunk* p = a->ChunkFromHandle(prev);
136	strings::StrAppend(&dbg, ", prev: ", p->DebugString(a, false));
137	}
138	if (recurse && next != BFCAllocator::kInvalidChunkHandle) {
139	Chunk* n = a->ChunkFromHandle(next);
140	strings::StrAppend(&dbg, ", next: ", n->DebugString(a, false));
141	}
142	return dbg;
143	}
144	};
145
146	// A Bin is a collection of similar-sized free chunks.
147	struct Bin {
148	// All chunks in this bin have >= bin_size memory.
149	size_t bin_size = `0`;
150
151	struct ChunkComparator {
152	explicit ChunkComparator(BFCAllocator* allocator)
153	: allocator_(allocator) {}
154	// Sort first by size and then use pointer address as a tie breaker.
155	bool operator()(const ChunkHandle ha,
156	const ChunkHandle hb) const NO_THREAD_SAFETY_ANALYSIS {
157	const Chunk* a = allocator_->ChunkFromHandle(ha);
158	const Chunk* b = allocator_->ChunkFromHandle(hb);
159	if (a->size != b->size) {
160	return a->size < b->size;
161	}
162	return a->ptr < b->ptr;
163	}
164
165	private:
166	BFCAllocator* allocator_; // The parent allocator
167	};
168
169	typedef std::set<ChunkHandle, ChunkComparator> FreeChunkSet;
170	// List of free chunks within the bin, sorted by chunk size.
171	// Chunk not owned.*
172	FreeChunkSet free_chunks;
173	Bin(BFCAllocator* allocator, size_t bs)
174	: bin_size(bs), free_chunks (ChunkComparator (allocator)) {}
175	};
176
177	static const size_t kMinAllocationBits = `8`;
178	static const size_t kMinAllocationSize = `1` << kMinAllocationBits;
179
180	// AllocationRegion maps pointers to ChunkHandles for a single
181	// contiguous memory region.
182	//
183	// This class is thread-compatible.
184	class AllocationRegion {
185	public:
186	AllocationRegion(void* ptr, size_t memory_size)
187	: ptr_(ptr),
188	memory_size_(memory_size),
189	end_ptr_(
190	static_cast<void>(static_cast<char**>(ptr_) + memory_size_)) {
191	DCHECK_EQ(`0`, memory_size % kMinAllocationSize);
192	const size_t n_handles =
193	(memory_size + kMinAllocationSize - `1`) / kMinAllocationSize;
194	handles_ = new ChunkHandle[n_handles];
195	for (size_t i = `0`; i < n_handles; i++) {
196	handles_[i] = kInvalidChunkHandle;
197	}
198	}
199
200	AllocationRegion() {}
201
202	~AllocationRegion() { delete[] handles_; }
203
204	AllocationRegion(AllocationRegion&& other) { Swap(other); }
205
206	AllocationRegion& operator=(AllocationRegion&& other) {
207	Swap(other);
208	return *this;
209	}
210
211	void* ptr() const { return ptr_; }
212	void* end_ptr() const { return end_ptr_; }
213	size_t memory_size() const { return memory_size_; }
214	ChunkHandle get_handle(const void* p) const {
215	return handles_[IndexFor(p)];
216	}
217	void set_handle(const void* p, ChunkHandle h) { handles_[IndexFor(p)] = h; }
218	void erase(const void* p) { set_handle(p, kInvalidChunkHandle); }
219
220	private:
221	void Swap(AllocationRegion& other) {
222	std::swap(ptr_, other.ptr_);
223	std::swap(memory_size_, other.memory_size_);
224	std::swap(end_ptr_, other.end_ptr_);
225	std::swap(handles_, other.handles_);
226	}
227
228	int IndexFor(const void* p) const {
229	std::uintptr_t p_int = reinterpret_cast<std::uintptr_t>(p);
230	std::uintptr_t base_int = reinterpret_cast<std::uintptr_t>(ptr_);
231	DCHECK_GE(p_int, base_int);
232	DCHECK_LT(p_int, base_int + memory_size_);
233	return static_cast<int>(((p_int - base_int) >> kMinAllocationBits));
234	}
235
236	// Metadata about the allocation region.
237	void* ptr_ = nullptr;
238	size_t memory_size_ = `0`;
239	void* end_ptr_ = nullptr;
240
241	// Array of size "memory_size / kMinAllocationSize". It is
242	// indexed by (p-base) / kMinAllocationSize, contains ChunkHandle
243	// for the memory allocation represented by "p"
244	ChunkHandle* handles_ = nullptr;
245
246	TF_DISALLOW_COPY_AND_ASSIGN(AllocationRegion);
247	};
248
249	// RegionManager aggregates one or more "AllocationRegions" and provides
250	// a layer of indirection from pointers to the underlying ChunkHandle,
251	// allowing allocation across multiple discontiguous memory regions.
252	//
253	// This class is thread-compatible.
254	class RegionManager {
255	public:
256	RegionManager() {}
257	~RegionManager() {}
258
259	void AddAllocationRegion(void* ptr, size_t memory_size) {
260	// Insert sorted by end_ptr
261	auto entry =
262	std::upper_bound(regions_.begin(), regions_.end(), ptr, &Comparator);
263	regions_.insert(entry, AllocationRegion (ptr, memory_size));
264	}
265
266	ChunkHandle get_handle(const void* p) const {
267	return RegionFor(p)->get_handle(p);
268	}
269
270	void set_handle(const void* p, ChunkHandle h) {
271	return MutableRegionFor(p)->set_handle(p, h);
272	}
273	void erase(const void* p) { return MutableRegionFor(p)->erase(p); }
274
275	const std::vector<AllocationRegion>& regions() const { return regions_; }
276
277	private:
278	static bool Comparator(const void* ptr, const AllocationRegion& other) {
279	return ptr < other.end_ptr();
280	}
281
282	AllocationRegion* MutableRegionFor(const void* p) {
283	return const_cast<AllocationRegion*>(RegionFor(p));
284	}
285
286	const AllocationRegion* RegionFor(const void* p) const {
287	auto entry =
288	std::upper_bound(regions_.begin(), regions_.end(), p, &Comparator);
289
290	if (entry != regions_.end()) {
291	return &(*entry);
292	}
293
294	LOG(FATAL) << "Could not find Region for " << p;
295	return nullptr;
296	}
297
298	private:
299	std::vector<AllocationRegion> regions_;
300	};
301
302	// Returns 'bytes' rounded up to the next highest kMinAllocationSize.
303	size_t RoundedBytes(size_t bytes);
304
305	// Try to add a new memory region that can satisfy an allocation of
306	// 'rounded_bytes' bytes. Returns true on success and false on
307	// failure.
308	bool Extend(size_t rounded_bytes) EXCLUSIVE_LOCKS_REQUIRED(lock_);
309
310	// Returns a pointer to an underlying allocated chunk of size
311	// 'rounded_bytes'.
312	void* FindChunkPtr(BinNum bin_num, size_t rounded_bytes, size_t num_bytes)
313	EXCLUSIVE_LOCKS_REQUIRED(lock_);
314
315	// Splits the chunk specified by 'h' into two chunks, one at least
316	// of size 'num_bytes'.
317	void SplitChunk(ChunkHandle h, size_t num_bytes)
318	EXCLUSIVE_LOCKS_REQUIRED(lock_);
319
320	// Merges the two chunk handles. Requires that the chunks are
321	// contiguous in their allocation.
322	void Merge(ChunkHandle h, ChunkHandle h2) EXCLUSIVE_LOCKS_REQUIRED(lock_);
323
324	// Frees the memory represented by 'h', coalescing the chunk if
325	// possible.
326	void FreeAndMaybeCoalesce(ChunkHandle h) EXCLUSIVE_LOCKS_REQUIRED(lock_);
327
328	// Adds the chunk 'h' to the proper free bin.
329	void InsertFreeChunkIntoBin(ChunkHandle h) EXCLUSIVE_LOCKS_REQUIRED(lock_);
330
331	// Removes the free chunk pointed to by 'c' from the set free_chunks.
332	void RemoveFreeChunkIterFromBin(Bin::FreeChunkSet* free_chunks,
333	const Bin::FreeChunkSet::iterator& c)
334	EXCLUSIVE_LOCKS_REQUIRED(lock_);
335
336	// Removes a free chunk from the bin.
337	void RemoveFreeChunkFromBin(ChunkHandle h) EXCLUSIVE_LOCKS_REQUIRED(lock_);
338
339	// Removes the chunk metadata represented by 'h'.
340	void DeleteChunk(ChunkHandle h) EXCLUSIVE_LOCKS_REQUIRED(lock_);
341
342	string RenderOccupancy() EXCLUSIVE_LOCKS_REQUIRED(lock_);
343	void DumpMemoryLog(size_t num_bytes) EXCLUSIVE_LOCKS_REQUIRED(lock_);
344
345	ChunkHandle AllocateChunk() EXCLUSIVE_LOCKS_REQUIRED(lock_);
346	void DeallocateChunk(ChunkHandle h) EXCLUSIVE_LOCKS_REQUIRED(lock_);
347
348	Chunk* ChunkFromHandle(ChunkHandle h) EXCLUSIVE_LOCKS_REQUIRED(lock_);
349
350	// Information about a Bin that is useful for debugging.
351	struct BinDebugInfo {
352	size_t total_bytes_in_use = `0`;
353	size_t total_bytes_in_bin = `0`;
354	size_t total_requested_bytes_in_use = `0`;
355	size_t total_chunks_in_use = `0`;
356	size_t total_chunks_in_bin = `0`;
357	};
358
359	// Computes and returns a BinDebugInfo for each Bin.
360	std::array<BinDebugInfo, kNumBins> get_bin_debug_info()
361	EXCLUSIVE_LOCKS_REQUIRED(lock_);
362
363	AllocatorRetry retry_helper_;
364
365	// Structures immutable after construction
366	size_t memory_limit_ = `0`;
367
368	inline int Log2FloorNonZeroSlow(uint64 n) {
369	int r = `0`;
370	while (n > `0`) {
371	r++;
372	n >>= `1`;
373	}
374	return r - `1`;
375	}
376
377	// Returns floor(log2(n)).
378	inline int Log2FloorNonZero(uint64 n) {
379	#if defined(__GNUC__)
380	return `63` ^ __builtin_clzll(n);
381	#elif defined(PLATFORM_WINDOWS)
382	unsigned long index;
383	_BitScanReverse64(&index, n);
384	return index;
385	#else
386	return Log2FloorNonZeroSlow(n);
387	#endif
388	}
389
390	// Map from bin size to Bin
391	Bin* BinFromIndex(BinNum index) {
392	return reinterpret_cast<Bin>(&(bins_space_[index sizeof(Bin)]));
393	}
394	size_t BinNumToSize(BinNum index) {
395	return static_cast<size_t>(`256`) << index;
396	}
397	BinNum BinNumForSize(size_t bytes) {
398	uint64 v = std::max<size_t>(bytes, `256`) >> kMinAllocationBits;
399	int b = std::min(kNumBins - `1`, Log2FloorNonZero(v));
400	return b;
401	}
402	Bin* BinForSize(size_t bytes) { return BinFromIndex(BinNumForSize(bytes)); }
403
404	char bins_space_[sizeof(Bin) * kNumBins];
405
406	// The size of the current region allocation.
407	size_t curr_region_allocation_bytes_;
408
409	// The total number of allocated bytes by the allocator.
410	size_t total_region_allocated_bytes_ = `0`;
411
412	// An indicator that expansion of a region has hit the limits
413	// of the available memory.
414	bool started_backpedal_ = false;
415
416	std::unique_ptr<SubAllocator> suballocator_;
417	string name_;
418
419	// Structures mutable after construction
420	mutable mutex lock_;
421	RegionManager region_manager_ GUARDED_BY(lock_);
422
423	std::vector<Chunk> chunks_ GUARDED_BY(lock_);
424
425	// Pointer to head of linked list of free Chunks
426	ChunkHandle free_chunks_list_ GUARDED_BY(lock_);
427
428	// Called once on each region, ASAP.
429	std::vector<Visitor> region_visitors_ GUARDED_BY(lock_);
430
431	// Counter containing the next unique identifier to assign to a
432	// newly-created chunk.
433	int64 next_allocation_id_ GUARDED_BY(lock_);
434
435	// Stats.
436	AllocatorStats stats_ GUARDED_BY(lock_);
437
438	friend class GPUBFCAllocatorPrivateMethodsTest;
439	TF_DISALLOW_COPY_AND_ASSIGN(BFCAllocator);
440	};
441
442	} // namespace tensorflow
443
444	#endif // TENSORFLOW_COMMON_RUNTIME_BFC_ALLOCATOR_H_
445

Browse the source code of tensorflow/tensorflow/core/common_runtime/bfc_allocator.h