1 | /* Copyright 2017 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | |
16 | #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_BUFFER_ASSIGNMENT_H_ |
17 | #define TENSORFLOW_COMPILER_XLA_SERVICE_BUFFER_ASSIGNMENT_H_ |
18 | |
19 | #include <functional> |
20 | #include <iosfwd> |
21 | #include <memory> |
22 | #include <string> |
23 | #include <vector> |
24 | |
25 | #include "tensorflow/compiler/xla/service/buffer_liveness.h" |
26 | #include "tensorflow/compiler/xla/service/heap_simulator.h" |
27 | #include "tensorflow/compiler/xla/service/hlo.pb.h" |
28 | #include "tensorflow/compiler/xla/service/hlo_computation.h" |
29 | #include "tensorflow/compiler/xla/service/hlo_instruction.h" |
30 | #include "tensorflow/compiler/xla/service/hlo_module.h" |
31 | #include "tensorflow/compiler/xla/service/logical_buffer.h" |
32 | #include "tensorflow/compiler/xla/service/tuple_points_to_analysis.h" |
33 | #include "tensorflow/compiler/xla/statusor.h" |
34 | #include "tensorflow/compiler/xla/types.h" |
35 | #include "tensorflow/compiler/xla/xla_data.pb.h" |
36 | #include "tensorflow/core/lib/gtl/array_slice.h" |
37 | #include "tensorflow/core/lib/gtl/flatmap.h" |
38 | #include "tensorflow/core/lib/gtl/flatset.h" |
39 | #include "tensorflow/core/platform/logging.h" |
40 | #include "tensorflow/core/platform/macros.h" |
41 | #include "tensorflow/core/platform/types.h" |
42 | |
43 | namespace xla { |
44 | |
45 | // This class abstracts an allocation of contiguous memory which can hold the |
46 | // values described by LogicalBuffers. Each LogicalBuffer occupies a sub-range |
47 | // of the allocation, represented by a Slice. A single BufferAllocation may hold |
48 | // LogicalBuffers with disjoint liveness, which may have overlapping Slices. A |
49 | // single BufferAllocation may also hold LogicalBuffers with overlapping |
50 | // liveness, which must have disjoint Slices. |
51 | // |
52 | // The abstraction includes information required by the backends for allocation, |
53 | // use, and deallocation of the buffer. This includes the LogicalBuffers which |
54 | // are held in this allocation through the execution of the computation. |
55 | class BufferAllocation { |
56 | public: |
57 | // Holds a unique identifier for each allocation. Values are assigned |
58 | // contiguously and can be used as array indexes. |
59 | using Index = int64; |
60 | |
61 | BufferAllocation(Index index, int64 size, bool is_thread_local, |
62 | bool is_reusable, LogicalBuffer::Color color) |
63 | : index_(index), |
64 | size_(size), |
65 | is_thread_local_(is_thread_local), |
66 | is_reusable_(is_reusable), |
67 | color_(color) {} |
68 | ~BufferAllocation() {} |
69 | |
70 | // Returns the index of this allocation. |
71 | Index index() const { return index_; } |
72 | |
73 | // Whether this allocation is used in a parallel calling context such as |
74 | // inside of a map or reduce computation. Such allocations need to be thread |
75 | // local. |
76 | bool is_thread_local() const { return is_thread_local_; } |
77 | |
78 | // Whether this allocation can be used by more than one logical buffer. |
79 | bool is_reusable() const { return is_reusable_; } |
80 | |
81 | // Whether this allocation holds a LogicalBuffer from a parameter of the entry |
82 | // computation. These buffers have lifetimes which may be longer than the |
83 | // XLA computation. |
84 | bool is_entry_computation_parameter() const { |
85 | return is_entry_computation_parameter_; |
86 | } |
87 | // If this allocation holds a Buffer from a parameter of the entry |
88 | // computation, this methods returns the parameter number. CHECKs otherwise. |
89 | int64 parameter_number() const { |
90 | CHECK(is_entry_computation_parameter_); |
91 | return parameter_number_; |
92 | } |
93 | |
94 | // If this allocation is for a parameter of the entry computation, this |
95 | // function returns which subshape of the parameter the allocation is for. |
96 | const ShapeIndex& param_shape_index() const { |
97 | CHECK(is_entry_computation_parameter_); |
98 | return param_shape_index_; |
99 | } |
100 | |
101 | // Returns whether this allocation is assigned a LogicalBuffer which may |
102 | // be live out of the entry computation. |
103 | bool maybe_live_out() const { return maybe_live_out_; } |
104 | |
105 | // Returns the size of the allocation. Necessarily this must be at least as |
106 | // large as any LogicalBuffer assigned to this allocation. |
107 | int64 size() const { return size_; } |
108 | |
109 | // Returns the color of the allocation. Only logical buffers with a matching |
110 | // color can reside in this allocation. |
111 | LogicalBuffer::Color color() const { return color_; } |
112 | |
113 | struct OffsetSize { |
114 | int64 offset = 0; |
115 | int64 size = 0; |
116 | }; |
117 | |
118 | // Access to the logical buffers assigned to this allocation, and their |
119 | // associated logical offsets and sizes. |
120 | const tensorflow::gtl::FlatMap<const LogicalBuffer*, OffsetSize>& |
121 | assigned_buffers() const { |
122 | return assigned_buffers_; |
123 | } |
124 | |
125 | // A Slice represents a contiguous portion of a memory allocation. It is used |
126 | // to identify the memory range that a LogicalBuffer corresponds to. |
127 | class Slice { |
128 | public: |
129 | Slice() {} |
130 | Slice(const BufferAllocation* allocation, int64 offset, int64 size) |
131 | : allocation_(allocation), offset_(offset), size_(size) {} |
132 | |
133 | const BufferAllocation* allocation() const { return allocation_; } |
134 | Index index() const { return allocation_->index(); } |
135 | int64 offset() const { return offset_; } |
136 | int64 size() const { return size_; } |
137 | |
138 | bool operator==(const Slice& other) const { |
139 | return index() == other.index() && offset_ == other.offset_ && |
140 | size_ == other.size_; |
141 | } |
142 | bool operator!=(const Slice& other) const { return !(*this == other); } |
143 | bool operator<(const Slice& other) const { |
144 | if (index() != other.index()) return index() < other.index(); |
145 | if (offset_ != other.offset_) return offset_ < other.offset_; |
146 | return size_ < other.size_; |
147 | } |
148 | |
149 | // Returns true iff this slice's memory range has a non-empty intersection |
150 | // with the other slice's memory range. |
151 | bool OverlapsWith(const Slice& other) const { |
152 | const int64 end = offset_ + size_; |
153 | const int64 other_end = other.offset_ + other.size_; |
154 | return index() == other.index() && offset_ < other_end && |
155 | end > other.offset_; |
156 | } |
157 | |
158 | struct Hasher { |
159 | size_t operator()(Slice s) const; |
160 | }; |
161 | |
162 | string ToString() const; |
163 | |
164 | private: |
165 | const BufferAllocation* allocation_ = nullptr; |
166 | int64 offset_ = 0; |
167 | int64 size_ = 0; |
168 | }; |
169 | |
170 | // GetSlice returns the Slice of contiguous memory that holds the value |
171 | // described by the given 'buffer'. |
172 | // REQUIRES: 'buffer' must be assigned to this allocation. |
173 | Slice GetSlice(const LogicalBuffer& buffer) const; |
174 | |
175 | string ToString() const; |
176 | BufferAllocationProto ToProto() const; |
177 | |
178 | // Whether the buffer is a parameter to or live out of the entry computation. |
179 | bool IsInputOrOutput() const { |
180 | return is_entry_computation_parameter() || maybe_live_out(); |
181 | } |
182 | |
183 | // Whether the buffer is a temporary buffer allocated before |
184 | // Executable::ExecuteOnStream. |
185 | bool IsPreallocatedTempBuffer() const { |
186 | // Parameters do not need temporary buffers. |
187 | return !is_entry_computation_parameter() && |
188 | // LogicalBuffers that maybe pointed to by the output should live out |
189 | // of the computation. |
190 | !maybe_live_out() && |
191 | // Thread-local buffers are allocated using `alloca`s. |
192 | !is_thread_local(); |
193 | } |
194 | |
195 | // Add a heap trace which was used to assign slices to logical buffers in this |
196 | // allocation. A single BufferAllocation may include multiple heap traces |
197 | // in the case of the temporary block where there is a heap trace per |
198 | // computation. |
199 | void AddHeapTrace(const HeapSimulatorTrace& heap_trace) { |
200 | heap_traces_.push_back(heap_trace); |
201 | } |
202 | |
203 | // Return the set of heap traces used to assign slices to logical buffers in |
204 | // this allocation. |
205 | const std::vector<HeapSimulatorTrace> HeapTraces() const { |
206 | return heap_traces_; |
207 | } |
208 | |
209 | // Compute and return the LogicalBuffers which are live at the point of peak |
210 | // memory usage for the given allocation. The point of peak memory usage is |
211 | // the point at which the total size of all live logical buffers is |
212 | // maximal. If peak memory is reached at multiple points, the set of logical |
213 | // buffers live at the earliest maximal point is returned. The vector is |
214 | // stabily asserted by LogicalBuffer::Index. |
215 | // |
216 | // The return value is a pair of total size of the logical buffers at peak, |
217 | // and the buffers themselves. |
218 | std::pair<int64, std::vector<const LogicalBuffer*>> |
219 | ComputePeakMemoryLogicalBuffers() const; |
220 | |
221 | // Get the number of bytes lost to fragmentation. This is equal to the |
222 | // difference between the size of the allocation and the size of the maximal |
223 | // live set. |
224 | int64 fragmentation_bytes() const { return fragmentation_bytes_; } |
225 | |
226 | bool operator==(const BufferAllocation& other) const { |
227 | return index_ == other.index_; |
228 | } |
229 | bool operator!=(const BufferAllocation& other) const { |
230 | return !(*this == other); |
231 | } |
232 | bool operator<(const BufferAllocation& other) const { |
233 | return index() < other.index(); |
234 | } |
235 | |
236 | private: |
237 | // Only BufferAssigner and BufferAssignment can modify BufferAllocation. |
238 | friend class BufferAssigner; |
239 | friend class BufferAssignment; |
240 | |
241 | // Adds a LogicalBuffer to the set assigned to this buffer. |
242 | void AddAssignment(const LogicalBuffer& buffer, int64 offset, int64 size); |
243 | |
244 | void set_entry_computation_parameter(int64 parameter_number, |
245 | ShapeIndex param_shape_index) { |
246 | is_entry_computation_parameter_ = true; |
247 | parameter_number_ = parameter_number; |
248 | param_shape_index_ = std::move(param_shape_index); |
249 | } |
250 | void set_maybe_live_out(bool value) { maybe_live_out_ = value; } |
251 | void set_index(Index index) { index_ = index; } |
252 | void set_size(int64 size) { size_ = size; } |
253 | |
254 | // The index of the allocation in the BufferAssignment. |
255 | Index index_; |
256 | |
257 | // Size of the allocation in bytes. |
258 | int64 size_; |
259 | |
260 | // Whether this buffer needs to be thread-local. |
261 | bool is_thread_local_; |
262 | |
263 | // Whether this buffer is usable by more than one logical buffer. |
264 | bool is_reusable_; |
265 | |
266 | // Color of the allocation. |
267 | LogicalBuffer::Color color_; |
268 | |
269 | // Whether this allocation holds an entry computation parameter. Entry |
270 | // computation parameters are special be cause they have lifetimes which may |
271 | // outlast the computation. |
272 | bool is_entry_computation_parameter_ = false; |
273 | |
274 | // If this allocation holds an entry computation parameter, this field |
275 | // indicates the index (starting from 0) of the parameter. |
276 | int64 parameter_number_ = 0; |
277 | |
278 | // If this buffer is for an entry computation parameter, which subshape of the |
279 | // parameter is it for? |
280 | ShapeIndex param_shape_index_; |
281 | |
282 | // Whether the allocation contains a LogicalBuffer which may be live-out of |
283 | // the entry computation. Note that this flag is conservatively computed by |
284 | // TuplePointsToAnalysis. That is, an allocation marked `maybe_live_out_` |
285 | // might not actually escape. |
286 | bool maybe_live_out_ = false; |
287 | |
288 | // Mapping from the set of buffers assigned to this allocation to their |
289 | // logical offsets and sizes. |
290 | tensorflow::gtl::FlatMap<const LogicalBuffer*, OffsetSize> assigned_buffers_; |
291 | |
292 | int64 fragmentation_bytes_ = 0; |
293 | std::vector<HeapSimulatorTrace> heap_traces_; |
294 | }; |
295 | |
296 | // Add stream operators for nicer output of CHECK/RET_CHECK failures. |
297 | std::ostream& operator<<(std::ostream& out, const BufferAllocation& s); |
298 | std::ostream& operator<<(std::ostream& out, const BufferAllocation::Slice& s); |
299 | |
300 | // This class encapsulates an assignment of the LogicalBuffers in an XLA |
301 | // module to a set of BufferAllocations. |
302 | class BufferAssignment { |
303 | public: |
304 | // Returns the vector containing all buffer allocations in this assignment. |
305 | const std::vector<BufferAllocation>& Allocations() const { |
306 | return allocations_; |
307 | } |
308 | |
309 | // Returns the total size allocation holding all temporary buffers. |
310 | int64 temp_allocation_total_size() const { |
311 | return temp_allocation_total_size_; |
312 | } |
313 | |
314 | // Returns whether the given buffer has been assigned an allocation. |
315 | bool HasAllocation(const LogicalBuffer& buffer) const; |
316 | |
317 | // Returns the allocation that a particular LogicalBuffer has been assigned |
318 | // to. CHECKs if buffer has not been assigned an allocation. |
319 | const BufferAllocation& GetAssignedAllocation( |
320 | const LogicalBuffer& buffer) const; |
321 | |
322 | // Returns the allocation with the given index. CHECKs if no allocation exists |
323 | // with the given index. |
324 | const BufferAllocation& GetAllocation(BufferAllocation::Index index) const; |
325 | |
326 | // Builds and returns a vector containing the slices which might contain the |
327 | // subvalue at the given index of given instruction. |
328 | std::set<BufferAllocation::Slice> GetAllSlices( |
329 | const HloInstruction* instruction, const ShapeIndex& index) const; |
330 | |
331 | // Convenience function which returns whether the buffer of the |
332 | // instruction at the given index is assigned an allocation. |
333 | bool HasAllocationAt(const HloInstruction* instruction, |
334 | const ShapeIndex& index) const; |
335 | |
336 | // Convenience function which returns whether the top-level buffer of the |
337 | // instruction (index == {}) is assigned an allocation. |
338 | bool HasTopLevelAllocation(const HloInstruction* instruction) const; |
339 | |
340 | // Convenience function which returns the unique slice containing the buffer |
341 | // at the given index of the given instruction. If a slice is not assigned or |
342 | // the slice cannot be determined at compile time then an error is returned. |
343 | StatusOr<BufferAllocation::Slice> GetUniqueSlice( |
344 | const HloInstruction* instruction, const ShapeIndex& index) const; |
345 | // Like GetUniqueSlice but fixes the index to the top-level of the shape |
346 | // (index = {}). |
347 | StatusOr<BufferAllocation::Slice> GetUniqueTopLevelSlice( |
348 | const HloInstruction* instruction) const; |
349 | // Like GetUniqueTopLevelSlice but returns the slice for the output of the |
350 | // entry computation of the HLO module (ie, the result of the XLA |
351 | // computation). |
352 | StatusOr<BufferAllocation::Slice> GetUniqueTopLevelOutputSlice() const; |
353 | |
354 | // Returns the set LogicalBuffers which may be the source of the value at the |
355 | // given index and instruction. |
356 | const PointsToSet::BufferList& GetSourceBuffers( |
357 | const HloInstruction* instruction, const ShapeIndex& index) const { |
358 | return GetPointsToSet(instruction).element(index); |
359 | } |
360 | |
361 | // Returns true if 'hlo_a{shape_index_a}' and 'hlo_b{shape_index_b}' |
362 | // share the same BufferAllocation::Slice. |
363 | // Returns false otherwise. |
364 | // REQUIRES: BufferAssignment assigned allocations to both instructions. |
365 | bool SharesSliceAtIndex(const HloInstruction* hlo_a, |
366 | const ShapeIndex& shape_index_a, |
367 | const HloInstruction* hlo_b, |
368 | const ShapeIndex& shape_index_b) const; |
369 | |
370 | // Returns true if the top-level buffers of hlo_a and hlo_b are the same. |
371 | // REQUIRES: HasTopLevelAllocation(hlo_a) && HasTopLevelAllocation(hlo_b). |
372 | bool SharesTopLevelSlice(const HloInstruction* hlo_a, |
373 | const HloInstruction* hlo_b) const { |
374 | return SharesSliceAtIndex(hlo_a, {}, hlo_b, {}); |
375 | } |
376 | |
377 | // Returns true if hlo_a and hlo_b both have at least one buffer assigned for |
378 | // their top-level and each of their nested shape indices, and if hlo_a's |
379 | // buffers are all different from hlo_b's buffers. |
380 | bool HaveDisjointSlices(const HloInstruction* hlo_a, |
381 | const HloInstruction* hlo_b) const; |
382 | |
383 | // Returns the underlying points-to analysis used for this assignment. |
384 | const TuplePointsToAnalysis& points_to_analysis() const { |
385 | return liveness_->points_to_analysis(); |
386 | } |
387 | |
388 | // Returns the BufferLiveness object used to construct this assignment. |
389 | const BufferLiveness& liveness() const { return *liveness_; } |
390 | |
391 | string ToString() const; |
392 | BufferAssignmentProto ToProto() const; |
393 | |
394 | // Statistics for the assignment. Values initialized to -1 are not always |
395 | // collected; fragmentation is only collected for instructions that have a |
396 | // sequential total ordering. |
397 | struct Stats { |
398 | int64 parameter_allocation_count = 0; |
399 | int64 parameter_allocation_bytes = 0; |
400 | int64 maybe_live_out_allocation_count = 0; |
401 | int64 maybe_live_out_allocation_bytes = 0; |
402 | int64 preallocated_temp_allocation_count = 0; |
403 | int64 preallocated_temp_allocation_bytes = 0; |
404 | int64 preallocated_temp_fragmentation_bytes = -1; |
405 | int64 total_allocation_count = 0; |
406 | int64 total_allocation_bytes = 0; |
407 | int64 total_fragmentation_bytes = -1; |
408 | |
409 | string ToString() const; |
410 | }; |
411 | const Stats& GetStats() const { return stats_; } |
412 | |
413 | private: |
414 | // Only BufferAssigner can build or modify BufferAssignments. |
415 | friend class BufferAssigner; |
416 | |
417 | explicit BufferAssignment(const HloModule* module, |
418 | std::unique_ptr<BufferLiveness> liveness, |
419 | LogicalBuffer::SizeFunction buffer_size, |
420 | LogicalBuffer::AlignmentFunction color_alignment) |
421 | : module_(module), |
422 | liveness_(std::move(liveness)), |
423 | buffer_size_(std::move(buffer_size)), |
424 | color_alignment_(std::move(color_alignment)) {} |
425 | |
426 | // Creates and returns a new BufferAllocation, with no assigned |
427 | // LogicalBuffers. Ownership is maintained internally. |
428 | BufferAllocation* NewEmptyAllocation(int64 size, bool is_thread_local, |
429 | bool is_reusable, |
430 | LogicalBuffer::Color color); |
431 | |
432 | // Helper that calls NewEmptyAllocation and AddAssignment in one call, |
433 | // creating an allocation containing a single LogicalBuffer. |
434 | BufferAllocation* NewAllocation(const LogicalBuffer& buffer, int64 size, |
435 | bool is_thread_local, bool is_reusable); |
436 | |
437 | // Adds a LogicalBuffer to the set assigned to the given allocation. |
438 | void AddAssignment(BufferAllocation* allocation, const LogicalBuffer& buffer, |
439 | int64 offset, int64 size); |
440 | |
441 | // Returns the HloModule used to construct this assignment. |
442 | const HloModule& module() const { return *module_; } |
443 | |
444 | // Convenience function which returns the PointsToSet for the given |
445 | // instruction. Extracted from the liveness object. |
446 | const PointsToSet& GetPointsToSet(const HloInstruction* instruction) const; |
447 | |
448 | // Mutable accessors for allocations. |
449 | BufferAllocation* GetMutableAssignedAllocation(const LogicalBuffer& buffer); |
450 | BufferAllocation* GetMutableAllocation(BufferAllocation::Index index); |
451 | |
452 | // Combines allocations of temporary buffers into one big BufferAllocation. |
453 | void CombineTempAllocations(); |
454 | |
455 | // Computes stats for the assignment, to be retrieved by GetStats. |
456 | Status ComputeSummaryStats(); |
457 | |
458 | // The vector of buffer allocations. Indexed by BufferAllocation::Index. |
459 | std::vector<BufferAllocation> allocations_; |
460 | |
461 | // The total size of all temporary buffers. |
462 | int64 temp_allocation_total_size_ = 0; |
463 | |
464 | // Maps Buffers to the index of the BufferAllocation which holds the buffer. |
465 | tensorflow::gtl::FlatMap<const LogicalBuffer*, BufferAllocation::Index> |
466 | allocation_index_for_buffer_; |
467 | |
468 | const HloModule* module_; |
469 | const std::unique_ptr<BufferLiveness> liveness_; |
470 | |
471 | // Function which returns the buffer size for a given logical buffer (shape). |
472 | LogicalBuffer::SizeFunction buffer_size_; |
473 | |
474 | // Function which returns the alignment for a given logical buffer color. |
475 | LogicalBuffer::AlignmentFunction color_alignment_; |
476 | |
477 | Stats stats_; |
478 | |
479 | TF_DISALLOW_COPY_AND_ASSIGN(BufferAssignment); |
480 | }; |
481 | |
482 | // A class which constructs a buffer assignment. |
483 | class BufferAssigner { |
484 | public: |
485 | // Build and return a BufferAssignment for the given module. The given |
486 | // HloOrdering is used to determine buffer liveness. buffer_size and |
487 | // color_alignment are functions which returns the size and alignment of a |
488 | // LogicalBuffer. allow_input_output_aliasing specifies whether input buffer |
489 | // are allowed to be reused as outbut buffers by the client code. |
490 | static StatusOr<std::unique_ptr<BufferAssignment>> Run( |
491 | const HloModule* module, std::unique_ptr<HloOrdering> hlo_ordering, |
492 | LogicalBuffer::SizeFunction buffer_size, |
493 | LogicalBuffer::AlignmentFunction color_alignment, |
494 | bool allow_input_output_aliasing = false, |
495 | BufferLiveness::Colorer colorer = BufferLiveness::DefaultColorer()); |
496 | |
497 | private: |
498 | BufferAssigner(bool allow_input_output_aliasing, |
499 | BufferLiveness::Colorer colorer) |
500 | : allow_input_output_aliasing_(allow_input_output_aliasing), |
501 | colorer_(colorer) {} |
502 | virtual ~BufferAssigner() = default; |
503 | |
504 | // Create a buffer assignment. |
505 | StatusOr<std::unique_ptr<BufferAssignment>> CreateAssignment( |
506 | const HloModule* module, std::unique_ptr<HloOrdering> hlo_ordering, |
507 | LogicalBuffer::SizeFunction buffer_size, |
508 | LogicalBuffer::AlignmentFunction color_alignment); |
509 | |
510 | // Assigns buffers to the instructions in the given computation. "assignment" |
511 | // is modified to reflect the new buffer assignments. If is_thread_local is |
512 | // true, then all assigned buffers have the is_thread_local flag set to |
513 | // true. |
514 | Status AssignBuffersForComputation( |
515 | const HloComputation* computation, const DebugOptions& debug_options, |
516 | bool is_thread_local, |
517 | const tensorflow::gtl::FlatSet<const LogicalBuffer*>& colocated_buffers, |
518 | const tensorflow::gtl::FlatSet<BufferAllocation::Index>& |
519 | colocated_allocations, |
520 | tensorflow::gtl::FlatMap<const HloComputation*, |
521 | tensorflow::gtl::FlatSet<const LogicalBuffer*>>* |
522 | buffers_to_assign_sequentially, |
523 | BufferAssignment* assignment); |
524 | |
525 | // Assigns 'buffers_to_assign_sequentially' using heap simulation, assuming |
526 | // the HLO instructions will be executed in the sequential order given by |
527 | // assignment->liveness().hlo_ordering().SequentialOrder. If |
528 | // 'run_whole_module_heap_simulation' is true, the heap simulation will be run |
529 | // assuming all global computations are sequentially ordered. |
530 | Status AssignBuffersWithSequentialOrdering( |
531 | const tensorflow::gtl::FlatMap< |
532 | const HloComputation*, |
533 | tensorflow::gtl::FlatSet<const LogicalBuffer*>>& |
534 | buffers_to_assign_sequentially, |
535 | bool run_whole_module_heap_simulation, BufferAssignment* assignment); |
536 | |
537 | // Uses the results of the heap simulator to create a single allocation, with |
538 | // LogicalBuffers packed to specific offsets. |
539 | void AssignBuffersFromHeapSimulator(const HeapSimulator::Result& result, |
540 | BufferAssignment* assignment, |
541 | LogicalBuffer::Color color); |
542 | |
543 | // Tries to assign the given instruction to the given buffer. Returns if the |
544 | // assignment was successful. |
545 | bool MaybeAssignBuffer(BufferAllocation* allocation, |
546 | const LogicalBuffer& buffer, |
547 | BufferAssignment* assignment); |
548 | |
549 | // Colocated buffers are logical buffers from different computations which |
550 | // alias. Explicitly handling these colocated buffers is necessary because |
551 | // points-to analysis is computation level scope and does not recognize |
552 | // aliasing across computations (b/32491382). |
553 | using ColocatedBufferSet = tensorflow::gtl::FlatSet<const LogicalBuffer*>; |
554 | |
555 | // Returns a vector of ColocatedBufferSet objects, where each |
556 | // ColocatedBufferSet aggregates a set of related LogicalBuffers from 'module' |
557 | // which should be colocated in the same buffer allocation. |
558 | void BuildColocatedBufferSets( |
559 | const HloModule* module, const BufferLiveness& buffer_liveness, |
560 | const LogicalBuffer::SizeFunction& buffer_size, |
561 | std::vector<ColocatedBufferSet>* colocated_buffer_sets); |
562 | |
563 | // For each buffer set in 'colocated_buffer_sets', assigns all buffers in the |
564 | // same set to the same buffer allocation in 'assignment'. |
565 | void AssignColocatedBufferSets( |
566 | const std::vector<ColocatedBufferSet>& colocated_buffer_sets, |
567 | BufferAssignment* assignment, |
568 | tensorflow::gtl::FlatSet<const LogicalBuffer*>* colocated_buffers, |
569 | tensorflow::gtl::FlatSet<BufferAllocation::Index>* colocated_allocations); |
570 | |
571 | // Adds the 'colocated_set' of buffers to 'colocated_buffer_sets', maintaining |
572 | // the invariant that all sets in 'colocated_buffer_sets' are disjoint. |
573 | void AddSetToColocatedBufferSets( |
574 | const std::vector<const LogicalBuffer*>& colocated_set, |
575 | std::vector<ColocatedBufferSet>* colocated_buffer_sets); |
576 | |
577 | // Given a list of colocated buffer sets (each colocated buffer set represents |
578 | // the logical buffers that would be assigned to the same physical buffer), |
579 | // try to merge the sets if the buffers can be shared. Returns the merged set. |
580 | std::vector<ColocatedBufferSet> MergeColocatedBufferSets( |
581 | const std::vector<ColocatedBufferSet>& colocated_buffer_sets, |
582 | const BufferLiveness& buffer_liveness, |
583 | const LogicalBuffer::SizeFunction& buffer_size); |
584 | |
585 | // Split a set of buffers into several sets, each of which contains buffers |
586 | // colored with the same color. |
587 | tensorflow::gtl::FlatMap<LogicalBuffer::Color, |
588 | tensorflow::gtl::FlatSet<const LogicalBuffer*>, |
589 | LogicalBuffer::Color::Hasher> |
590 | SplitBuffersByColor( |
591 | const tensorflow::gtl::FlatSet<const LogicalBuffer*>& buffers); |
592 | |
593 | // If true, buffer assignments assumes that input parameter buffers and output |
594 | // buffers can be shared if their sizes match. |
595 | bool allow_input_output_aliasing_; |
596 | |
597 | // Functor used to assign colors to newly allocated logical buffers. |
598 | BufferLiveness::Colorer colorer_; |
599 | |
600 | TF_DISALLOW_COPY_AND_ASSIGN(BufferAssigner); |
601 | }; |
602 | |
603 | } // namespace xla |
604 | |
605 | #endif // TENSORFLOW_COMPILER_XLA_SERVICE_BUFFER_ASSIGNMENT_H_ |
606 | |