tf_1.8_xla_doc
compiler.h
Go to the documentation of this file.
1 
3 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7  http://www.apache.org/licenses/LICENSE-2.0
8 Unless required by applicable law or agreed to in writing, software
9 distributed under the License is distributed on an "AS IS" BASIS,
10 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 See the License for the specific language governing permissions and
12 limitations under the License.
13 ==============================================================================*/
14 // The compiler API is used by the XLA service to generate executables that
15 // run on a given platform. This is a registry and abstract interface, for
16 // pluggability by the various platforms.
17 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_COMPILER_H_
18 #define TENSORFLOW_COMPILER_XLA_SERVICE_COMPILER_H_
19 #include <functional>
20 #include <map>
21 #include <memory>
22 #include <string>
23 #include "tensorflow/compiler/xla/service/executable.h"
25 #include "tensorflow/compiler/xla/service/hlo_module_config.h"
26 #include "tensorflow/compiler/xla/service/logical_buffer.h"
27 #include "tensorflow/compiler/xla/statusor.h"
28 #include "tensorflow/compiler/xla/types.h"
29 #include "tensorflow/core/lib/gtl/array_slice.h"
30 #include "tensorflow/core/platform/mutex.h"
31 #include "tensorflow/core/platform/stream_executor_no_cuda.h"
32 #include "tensorflow/core/platform/thread_annotations.h"
33 namespace xla {
34 // The following types are used for ahead of time compilation.
35 // Contains the object file data created as a result of ahead-of-time
36 // compuation.
37 using ObjectFileData = std::vector<char>;
38 // Contains the buffer sizes information needed to allocate buffers to execute
39 // an ahead-of-time computation. Entries which contain -1 designate a parameter
40 // which should be skipped over during allocation.
41 using BufferSizes = std::vector<int64>;
42 // Abstract superclass describing the result of an ahead-of-time compilation.
43 class AotCompilationResult {
44  public:
45  AotCompilationResult(const AotCompilationResult&) = delete;
46  AotCompilationResult& operator=(AotCompilationResult const&) = delete;
47  virtual ~AotCompilationResult() = default;
48  protected:
49  AotCompilationResult() = default;
50 };
51 // Abstract superclass describing options to an ahead-of-time compilation.
52 class AotCompilationOptions {
53  public:
54  AotCompilationOptions(const AotCompilationOptions&) = delete;
55  AotCompilationOptions& operator=(AotCompilationOptions const&) = delete;
56  virtual ~AotCompilationOptions() = default;
57  // Returns the ID of the platform to which these options apply.
58  virtual perftools::gputools::Platform::Id PlatformId() const = 0;
59  // Optional allocator that may be used for allocating temp space on the device
60  // during compilation.
61  DeviceMemoryAllocator* device_allocator() const { return device_allocator_; }
62  void set_device_allocator(DeviceMemoryAllocator* device_allocator) {
63  device_allocator_ = device_allocator;
64  }
65  const DebugOptions& debug_options() const { return debug_options_; }
66  DebugOptions* mutable_debug_options() { return &debug_options_; }
67  protected:
68  AotCompilationOptions();
69  private:
70  DeviceMemoryAllocator* device_allocator_ = nullptr;
71  DebugOptions debug_options_;
72 };
92 class Compiler {
93  public:
94  virtual ~Compiler() {}
95  // Returns the ID of the platform that this compiler targets.
96  virtual perftools::gputools::Platform::Id PlatformId() const = 0;
97  // Runs Hlo passes to optimize the given Hlo module, returns the optimized
98  // module.
99  //
100  // If device_allocator is not null, the compiler may use it to allocate temp
101  // space on the device for use during compilation. For example, the compiler
102  // may allocate buffers on the device and then run variants of a given
103  // algorithm over those buffers, to see which variant is fastest. Any space
104  // allocated should be deallocated before this function returns.
105  virtual StatusOr<std::unique_ptr<HloModule>> RunHloPasses(
106  std::unique_ptr<HloModule> module,
107  perftools::gputools::StreamExecutor* executor,
108  DeviceMemoryAllocator* device_allocator) = 0;
109  // Compiles the HLO module for execution on a device given by the executor,
110  // and returns an executable object or an error status. No HLO passes are
111  // applied to module. Generally a module should be passed through RunHloPasses
112  // prior to calling this method because some HLO passes are required for
113  // correctness. Takes ownership of the HLO module and is free to transform it.
114  //
115  // The compiler may optionally specialize to the individual device
116  // (not just type of device) indicated by the executor.
117  //
118  // device_allocator is optional; see RunHloPasses.
119  //
120  // Use the overload below to compile computations that run in parallel.
121  virtual StatusOr<std::unique_ptr<Executable>> RunBackend(
122  std::unique_ptr<HloModule> module,
123  perftools::gputools::StreamExecutor* executor,
124  DeviceMemoryAllocator* device_allocator) = 0;
125  // Compiles a set of HLO modules that can run in parallel, potentially
126  // communicating data between the modules, and returns a corresponding
127  // sequence of executable objects.
128  //
129  // device_allocator is optional; see RunHloPasses.
130  //
131  // TODO(b/68666782): Remove this method after adding support for multiple
132  // modules to RunHloPasses and RunBackends.
133  virtual StatusOr<std::vector<std::unique_ptr<Executable>>> Compile(
134  std::vector<std::unique_ptr<HloModule>> modules,
135  std::vector<std::vector<perftools::gputools::StreamExecutor*>>
136  stream_exec,
137  DeviceMemoryAllocator* device_allocator) = 0;
138  // Compiles the HLO module for ahead-of-time execution. This is intended for
139  // use in static compilation.
140  virtual StatusOr<std::vector<std::unique_ptr<AotCompilationResult>>>
141  CompileAheadOfTime(std::vector<std::unique_ptr<HloModule>> modules,
142  const AotCompilationOptions& options) = 0;
144  // The Compiler class also serves as a point to register compiler objects
145  // for the various platforms.
146  using CompilerFactory = std::function<std::unique_ptr<Compiler>()>;
147  // Registers the compiler singleton for the platform. This is assumed to
148  // be a singleton, so no ownership is transferred.
149  //
150  // Precondition: a platform kind must not be registered more than once.
151  static void RegisterCompilerFactory(
152  perftools::gputools::Platform::Id platform_id,
153  CompilerFactory compiler_factory);
154  // Returns the compiler singleton pointer if it is available for the given
155  // platform, or an error status if it is not.
156  static StatusOr<Compiler*> GetForPlatform(
157  const perftools::gputools::Platform* platform);
158  // Returns a function that computes the size in bytes of the logical
159  // buffer that contains a shape.
160  virtual HloCostAnalysis::ShapeSizeFunction ShapeSizeBytesFunction() const = 0;
161  // Returns a function that computes the size in bytes of a given
162  // logical buffer.
163  std::function<int64(const LogicalBuffer&)> BufferSizeBytesFunction() {
164  HloCostAnalysis::ShapeSizeFunction shape_size = ShapeSizeBytesFunction();
165  return [shape_size](const LogicalBuffer& buffer) {
166  return shape_size(buffer.shape());
167  };
168  }
169  private:
170  // Mutex that guards the platform-compiler map.
171  static tensorflow::mutex platform_compiler_mutex_;
172  // Map from platform kind to compiler factory.
173  static std::map<perftools::gputools::Platform::Id, CompilerFactory>*
174  GetPlatformCompilerFactories();
175  // Map from platform kind to compiler instance, if we made one already (based
176  // on the factories above).
177  static std::map<perftools::gputools::Platform::Id, std::unique_ptr<Compiler>>*
178  GetPlatformCompilers();
179 };
180 } // namespace xla
181 #endif // TENSORFLOW_COMPILER_XLA_SERVICE_COMPILER_H_
182 
183 
namespace for xla
Definition: client_library.cc:26
Definition: compiler.h:92