.. DO NOT EDIT. .. THIS FILE WAS AUTOMATICALLY GENERATED BY SPHINX-GALLERY. .. TO MAKE CHANGES, EDIT THE SOURCE PYTHON FILE: .. "gyexamples/plot_benchmark_op_short.py" .. LINE NUMBERS ARE GIVEN BELOW. .. only:: html .. note:: :class: sphx-glr-download-link-note Click :ref:`here ` to download the full example code .. rst-class:: sphx-glr-example-title .. _sphx_glr_gyexamples_plot_benchmark_op_short.py: .. _example-ort-training-benchmark: Benchmark operator Slice ======================== This short code compares the execution of the operator *Slice* on CPU and GPU in three configurations. .. contents:: :local: A simple example ++++++++++++++++ .. GENERATED FROM PYTHON SOURCE LINES 17-37 .. code-block:: default import numpy from numpy.testing import assert_almost_equal from pandas import DataFrame, pivot_table from onnxruntime import InferenceSession, get_device from onnxruntime.capi._pybind_state import ( # pylint: disable=E0611 OrtValue as C_OrtValue) from skl2onnx.common.data_types import FloatTensorType from skl2onnx.algebra.onnx_ops import OnnxSlice, OnnxAdd, OnnxMul from cpyquickhelper.numbers.speed_measure import measure_time from mlprodict.testing.experimental_c_impl.experimental_c import code_optimisation from mlprodict.onnxrt import OnnxInference from mlprodict.plotting.plotting_onnx import plot_onnx from onnxcustom.utils.onnxruntime_helper import get_ort_device from tqdm import tqdm print([code_optimisation(), get_device()]) .. rst-class:: sphx-glr-script-out .. code-block:: none ['AVX-omp=8', 'CPU'] .. GENERATED FROM PYTHON SOURCE LINES 38-39 The graph to compare. .. GENERATED FROM PYTHON SOURCE LINES 39-79 .. code-block:: default def build_ort_op(op_version=14, save=None, slices=None): # opset=13, 14, ... if slices is None: starts = numpy.array([1, 1], dtype=numpy.int64) ends = numpy.array([-1, -1], dtype=numpy.int64) axes = None else: starts, ends = slices if starts[0] is None: indexes = [i for i in range(len(starts)) if starts[i] is not None] starts = numpy.array( [n for n in starts if n is not None], dtype=numpy.int64) ends = numpy.array( [n for n in ends if n is not None], dtype=numpy.int64) axes = numpy.array(indexes, dtype=numpy.int64) else: starts = numpy.array(starts, dtype=numpy.int64) ends = numpy.array(ends, dtype=numpy.int64) axes = None if axes is None: node1 = OnnxSlice('X', starts, ends, op_version=op_version) else: node1 = OnnxSlice('X', starts, ends, axes, op_version=op_version) node2 = OnnxAdd(node1, numpy.array([1], dtype=numpy.float32), op_version=op_version) if axes is None: node3 = OnnxSlice(node2, starts, ends, op_version=op_version) else: node3 = OnnxSlice(node2, starts, ends, axes, op_version=op_version) node4 = OnnxMul(node3, numpy.array([2], dtype=numpy.float32), op_version=op_version, output_names=['Y']) onx = node4.to_onnx(inputs=[('X', FloatTensorType([None, None]))], target_opset=op_version) return onx onx = build_ort_op() plot_onnx(onx) .. image-sg:: /gyexamples/images/sphx_glr_plot_benchmark_op_short_001.png :alt: plot benchmark op short :srcset: /gyexamples/images/sphx_glr_plot_benchmark_op_short_001.png :class: sphx-glr-single-img .. rst-class:: sphx-glr-script-out .. code-block:: none .. GENERATED FROM PYTHON SOURCE LINES 80-82 Execution on CPU ++++++++++++++++ .. GENERATED FROM PYTHON SOURCE LINES 82-88 .. code-block:: default x = numpy.random.rand(50, 50).astype(numpy.float32) oinf = OnnxInference(onx) oinf.run({'X': x}, verbose=1, fLOG=print) .. rst-class:: sphx-glr-script-out .. code-block:: none +ki='Sl_Slicecst': (2,) (dtype=int64 min=1 max=1) +ki='Sl_Slicecst1': (2,) (dtype=int64 min=-1 max=-1) +ki='Ad_Addcst': (1,) (dtype=float32 min=1.0 max=1.0) +ki='Mu_Mulcst': (1,) (dtype=float32 min=2.0 max=2.0) -- OnnxInference: run 6 nodes with 1 inputs Onnx-Identity(Sl_Slicecst) -> Sl_Slicecst2 (name='Sl_Slicecst2_op') +kr='Sl_Slicecst2': (2,) (dtype=int64 min=1 max=1) Onnx-Identity(Sl_Slicecst1) -> Sl_Slicecst3 (name='Sl_Slicecst3_op') +kr='Sl_Slicecst3': (2,) (dtype=int64 min=-1 max=-1) Onnx-Slice(X, Sl_Slicecst, Sl_Slicecst1) -> Sl_output01 (name='Sl_Slice') +kr='Sl_output01': (48, 48) (dtype=float32 min=9.875727846520022e-05 max=0.9997695088386536) Onnx-Add(Sl_output01, Ad_Addcst) -> Ad_C0 (name='Ad_Add') +kr='Ad_C0': (48, 48) (dtype=float32 min=1.000098705291748 max=1.9997694492340088) Onnx-Slice(Ad_C0, Sl_Slicecst2, Sl_Slicecst3) -> Sl_output0 (name='Sl_Slice1') +kr='Sl_output0': (46, 46) (dtype=float32 min=1.000098705291748 max=1.9997694492340088) Onnx-Mul(Sl_output0, Mu_Mulcst) -> Y (name='Mu_Mul') +kr='Y': (46, 46) (dtype=float32 min=2.000197410583496 max=3.9995388984680176) {'Y': array([[3.419556 , 2.5125964, 2.6644747, ..., 2.2184076, 3.9621744, 2.5592406], [3.822313 , 3.8818226, 2.3897986, ..., 3.829173 , 3.9068456, 3.6176982], [2.5244346, 2.5971172, 2.0926566, ..., 2.8305347, 3.1156988, 2.5614848], ..., [2.336208 , 3.693772 , 2.6583784, ..., 2.9779284, 3.9650922, 2.4234152], [2.973298 , 3.8292289, 2.6124973, ..., 3.71129 , 2.1220028, 3.7119985], [3.939609 , 3.942213 , 2.0477579, ..., 2.6146688, 2.7894301, 2.039695 ]], dtype=float32)} .. GENERATED FROM PYTHON SOURCE LINES 89-90 With onnxruntime. .. GENERATED FROM PYTHON SOURCE LINES 90-96 .. code-block:: default sess = InferenceSession(onx.SerializeToString(), providers=["CPUExecutionProvider"]) y_cpu = sess.run(None, {'X': x})[0] .. GENERATED FROM PYTHON SOURCE LINES 97-101 Execution on GPU ++++++++++++++++ If available... .. GENERATED FROM PYTHON SOURCE LINES 101-126 .. code-block:: default if get_device().upper() == 'GPU': dev = get_ort_device('cuda:0') try: gx = C_OrtValue.ortvalue_from_numpy(x, dev) cuda = True except RuntimeError as e: print(e) cuda = False else: cuda = False if cuda: sessg = InferenceSession(onx.SerializeToString(), providers=["CUDAExecutionProvider"]) io_binding = sessg.io_binding()._iobinding io_binding.bind_input( 'X', dev, numpy.float32, gx.shape(), gx.data_ptr()) io_binding.bind_output('Y', dev) sessg._sess.run_with_iobinding(io_binding, None) y_gpu = io_binding.copy_outputs_to_cpu()[0] assert_almost_equal(y_cpu, y_gpu) .. GENERATED FROM PYTHON SOURCE LINES 127-129 Benchmark +++++++++ .. GENERATED FROM PYTHON SOURCE LINES 129-193 .. code-block:: default data = [] shapes = ([(n, n) for n in [10, 100, 1000]] + [(n, 100) for n in [10, 100, 1000, 10000]] + [(100, n) for n in [10, 100, 1000, 10000]]) slices = [([1, 1], [-1, -1]), ([1], [-1]), ([None, 1], [None, -1])] shape_slices = [(sh, sl) for sh in shapes for sl in slices] for shape, slices in tqdm(shape_slices): onx = build_ort_op(slices=slices) x = numpy.random.rand(*shape).astype(numpy.float32) number = 100 if x.size >= 100000: number = 10 sess = InferenceSession( onx.SerializeToString(), providers=["CPUExecutionProvider"]) sess.run(None, {'X': x}) obs = dict( shape=str(shape).replace( " ", ""), slice=str(slices).replace( " ", "")) r = measure_time(lambda: sess.run(None, {'X': x}), number=number, div_by_number=True, context={}) obs.update(r) obs['provider'] = 'CPU' data.append(obs) if cuda: def sess_run(sess, io_binding, x, dev): io_binding.bind_input( 'X', dev, numpy.float32, gx.shape(), gx.data_ptr()) io_binding.bind_output('Y', dev) sess._sess.run_with_iobinding(io_binding) io_binding = sess.io_binding()._iobinding sess = InferenceSession( onx.SerializeToString(), providers=["CUDAExecutionProvider"]) dev = get_ort_device('cuda:0') gx = C_OrtValue.ortvalue_from_numpy(x, dev) sess_run(sess, io_binding, gx, dev) obs = dict( shape=str(shape).replace( " ", ""), slice=str(slices).replace( " ", "")) r = measure_time( lambda: sess_run(sess, io_binding, io_binding, gx, dev), number=number, div_by_number=True, context={ 'sess': sess, 'gx': gx, 'io_binding': io_binding, 'dev': dev, 'sess_run': sess_run}) obs.update(r) obs['provider'] = 'GPU' data.append(obs) df = DataFrame(data) print(df) .. rst-class:: sphx-glr-script-out .. code-block:: none 0%| | 0/33 [00:00 .. rst-class:: sphx-glr-timing **Total running time of the script:** ( 0 minutes 9.562 seconds) .. _sphx_glr_download_gyexamples_plot_benchmark_op_short.py: .. only:: html .. container:: sphx-glr-footer sphx-glr-footer-example .. container:: sphx-glr-download sphx-glr-download-python :download:`Download Python source code: plot_benchmark_op_short.py ` .. container:: sphx-glr-download sphx-glr-download-jupyter :download:`Download Jupyter notebook: plot_benchmark_op_short.ipynb ` .. only:: html .. rst-class:: sphx-glr-signature `Gallery generated by Sphinx-Gallery `_