.. index:: pair: example; sycl_single_op_partition.cpp .. _doxid-sycl_single_op_partition_8cpp-example: sycl_single_op_partition.cpp ============================ This is an example to demonstrate how to build a simple op graph and run it on gpu. Annotated version: :ref:`Single op partition on GPU ` This is an example to demonstrate how to build a simple op graph and run it on gpu. Annotated version: :ref:`Single op partition on GPU ` .. ref-code-block:: cpp /******************************************************************************* * Copyright 2024 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. *******************************************************************************/ //[Headers and namespace] #include "oneapi/dnnl/dnnl_graph.hpp" #include "oneapi/dnnl/dnnl_graph_sycl.hpp" #include "oneapi/dnnl/dnnl_sycl.hpp" using namespace :ref:`dnnl::graph `; using namespace :ref:`sycl `; #include #include #include #include #include #include #include "example_utils.hpp" #include "graph_example_utils.hpp" using namespace :ref:`dnnl::graph `; using :ref:`data_type ` = :ref:`logical_tensor::data_type `; using :ref:`layout_type ` = :ref:`logical_tensor::layout_type `; using dim = :ref:`logical_tensor::dim `; using dims = :ref:`logical_tensor::dims `; //[Headers and namespace] void gpu_single_op_partition_tutorial() { dim M = 32, K = 1024, N = 2048; dims src0_dims {M, K}; dims src1_dims {K, N}; //[Create matmul] :ref:`logical_tensor ` matmul_src0_desc {0, :ref:`data_type::f32 `}; :ref:`logical_tensor ` matmul_src1_desc {1, :ref:`data_type::f32 `}; :ref:`logical_tensor ` matmul_dst_desc {2, :ref:`data_type::f32 `}; :ref:`op ` :ref:`matmul `(0, op::kind::MatMul, {matmul_src0_desc, matmul_src1_desc}, {matmul_dst_desc}, "matmul"); :ref:`matmul `.set_attr(:ref:`op::attr::transpose_a `, false); :ref:`matmul `.set_attr(:ref:`op::attr::transpose_b `, false); //[Create matmul] //[Create allocator] :ref:`allocator ` alloc = :ref:`sycl_interop::make_allocator `( sycl_malloc_wrapper, sycl_free_wrapper); //[Create allocator] //[Define sycl queue] sycl::queue q = sycl::queue( sycl::gpu_selector_v, sycl::property::queue::in_order {}); //[Define sycl queue] //[Create engine] :ref:`dnnl::engine ` eng = :ref:`sycl_interop::make_engine_with_allocator `( q.get_device(), q.get_context(), alloc); //[Create engine] //[Create stream] :ref:`dnnl::stream ` strm = :ref:`dnnl::sycl_interop::make_stream `(eng, q); //[Create stream] // Memory buffers bound to the partition input/output tensors // that helps manage the lifetime of these tensors std::vector> data_buffer; // Mapping from logical tensor id to the concrete shapes. // In practical usage, concrete shapes and layouts are not given // until compilation stage, hence need this mapping to mock the step. std::unordered_map concrete_shapes { {0, src0_dims}, {1, src1_dims}}; // Compile and execute the partitions, including the following steps: // // 1. Update the input/output logical tensors with concrete shape and layout // 2. Compile the partition // 3. Update the output logical tensors with queried ones after compilation // 4. Allocate memory and bind the data buffer for the partition // 5. Execute the partition // // Although they are not part of the APIs, these steps are essential for // the integration of Graph API., hence users need to implement similar // logic. //[Create partition] :ref:`partition ` part(:ref:`matmul `, :ref:`dnnl::engine::kind::gpu `); //[Create partition] if (!part.is_supported()) { std::cout << "sycl_single_op_partition: Got unsupported partition, " "users need to handle the operators by themselves." << std::endl; return; } std::vector inputs = part.get_input_ports(); std::vector outputs = part.get_output_ports(); // Update input logical tensors with concrete shape and layout for (auto &input : inputs) { const auto id = input.get_id(); // Create logical tensor with strided layout input = :ref:`logical_tensor ` {id, input.:ref:`get_data_type `(), concrete_shapes[id], layout_type::strided}; } // Update output logical tensors with concrete shape and layout for (auto &output : outputs) { const auto id = output.get_id(); output = :ref:`logical_tensor ` {id, output.:ref:`get_data_type `(), :ref:`DNNL_GRAPH_UNKNOWN_NDIMS `, // do not require concrete shape as the shape will be inferred // based on input shapes during compilation layout_type::strided}; } //[Compile partition] :ref:`compiled_partition ` cp = part.compile(inputs, outputs, eng); //[Compile partition] // Update output logical tensors with queried one for (auto &output : outputs) { const auto id = output.get_id(); output = cp.:ref:`query_logical_tensor `(id); } // Allocate memory for the partition, and bind the data buffers with // input and output logical tensors std::vector inputs_ts, outputs_ts; allocate_sycl_graph_mem(inputs_ts, inputs, data_buffer, q, eng); allocate_sycl_graph_mem(outputs_ts, outputs, data_buffer, q, eng); //[Execute compiled partition] cp.:ref:`execute `(strm, inputs_ts, outputs_ts); //[Execute compiled partition] // Wait for all compiled partition's execution finished strm.wait(); std::cout << "Graph:" << std::endl << " [matmul_src0] [matmul_src1]" << std::endl << " \\ /" << std::endl << " matmul" << std::endl << " |" << std::endl << " [matmul_dst]" << std::endl << "Note:" << std::endl << " '[]' represents a logical tensor, which refers to " "inputs/outputs of the graph. " << std::endl; } int main(int argc, char **argv) { return handle_example_errors({validate_engine_kind(:ref:`engine::kind::gpu `)}, gpu_single_op_partition_tutorial); }