.. DO NOT EDIT. .. THIS FILE WAS AUTOMATICALLY GENERATED BY SPHINX-GALLERY. .. TO MAKE CHANGES, EDIT THE SOURCE PYTHON FILE: .. "gyexamples/plot_gexternal_lightgbm_reg_per.py" .. LINE NUMBERS ARE GIVEN BELOW. .. only:: html .. note:: :class: sphx-glr-download-link-note Click :ref:`here ` to download the full example code .. rst-class:: sphx-glr-example-title .. _sphx_glr_gyexamples_plot_gexternal_lightgbm_reg_per.py: .. _example-lightgbm-reg-one-off: Batch predictions vs one-off predictions ======================================== .. index:: LightGBM The goal is to compare the processing time between batch predictions and one-off prediction for the same number of predictions on trees. onnxruntime parallelizes the prediction by trees or by rows. The rule is fixed and cannot be changed but it seems to have some loopholes. .. contents:: :local: Train a LGBMRegressor +++++++++++++++++++++ .. GENERATED FROM PYTHON SOURCE LINES 21-57 .. code-block:: default import warnings import time import os from packaging.version import Version import numpy from pandas import DataFrame import onnx import matplotlib.pyplot as plt from tqdm import tqdm from lightgbm import LGBMRegressor from onnxruntime import InferenceSession from skl2onnx import update_registered_converter, to_onnx from skl2onnx.common.shape_calculator import calculate_linear_regressor_output_shapes # noqa from onnxmltools import __version__ as oml_version from onnxmltools.convert.lightgbm.operator_converters.LightGbm import convert_lightgbm # noqa N = 1000 Ntrees = [10, 100, 200] X = numpy.random.randn(N, 1000) y = (numpy.random.randn(N) + numpy.random.randn(N) * 100 * numpy.random.randint(0, 1, N)) filenames = [f"plot_lightgbm_regressor_{nt}_{X.shape[1]}.onnx" for nt in Ntrees] regs = [] for nt, filename in zip(Ntrees, filenames): if not os.path.exists(filename): print(f"training with shape={X.shape} and {nt} trees") r = LGBMRegressor(n_estimators=nt).fit(X, y) regs.append(r) print("done.") else: regs.append(None) .. rst-class:: sphx-glr-script-out .. code-block:: none training with shape=(1000, 1000) and 10 trees done. training with shape=(1000, 1000) and 100 trees done. training with shape=(1000, 1000) and 200 trees done. .. GENERATED FROM PYTHON SOURCE LINES 58-60 Register the converter for LGBMRegressor ++++++++++++++++++++++++++++++++++++++++ .. GENERATED FROM PYTHON SOURCE LINES 60-81 .. code-block:: default def skl2onnx_convert_lightgbm(scope, operator, container): options = scope.get_options(operator.raw_operator) if 'split' in options: if Version(oml_version) < Version('1.9.2'): warnings.warn( "Option split was released in version 1.9.2 but %s is " "installed. It will be ignored." % oml_version) operator.split = options['split'] else: operator.split = None convert_lightgbm(scope, operator, container) update_registered_converter( LGBMRegressor, 'LightGbmLGBMRegressor', calculate_linear_regressor_output_shapes, skl2onnx_convert_lightgbm, options={'split': None}) .. GENERATED FROM PYTHON SOURCE LINES 82-88 Convert +++++++ We convert the same model following the two scenarios, one single TreeEnsembleRegressor node, or more. *split* parameter is the number of trees per node TreeEnsembleRegressor. .. GENERATED FROM PYTHON SOURCE LINES 88-107 .. code-block:: default models_onnx = [] for i, filename in enumerate(filenames): print(i, filename) if os.path.exists(filename): with open(filename, "rb") as f: model_onnx = onnx.load(f) models_onnx.append(model_onnx) else: model_onnx = to_onnx(regs[i], X[:1].astype(numpy.float32), target_opset={'': 17, 'ai.onnx.ml': 3}) models_onnx.append(model_onnx) with open(filename, "wb") as f: f.write(model_onnx.SerializeToString()) sesss = [InferenceSession(m.SerializeToString(), providers=['CPUExecutionProvider']) for m in models_onnx] .. rst-class:: sphx-glr-script-out .. code-block:: none 0 plot_lightgbm_regressor_10_1000.onnx 1 plot_lightgbm_regressor_100_1000.onnx 2 plot_lightgbm_regressor_200_1000.onnx .. GENERATED FROM PYTHON SOURCE LINES 108-111 Processing time +++++++++++++++ .. GENERATED FROM PYTHON SOURCE LINES 111-146 .. code-block:: default repeat = 7 data = [] for N in tqdm(list(range(10, 100, 10)) + list(range(100, 1000, 100)) + list(range(1000, 10001, 1000))): X32 = numpy.random.randn(N, X.shape[1]).astype(numpy.float32) obs = dict(N=N) for sess, T in zip(sesss, Ntrees): times = [] for _ in range(repeat): begin = time.perf_counter() sess.run(None, {'X': X32}) end = time.perf_counter() - begin times.append(end / X32.shape[0]) times.sort() obs[f"batch-{T}"] = sum(times[2:-2]) / (len(times) - 4) times = [] for _ in range(repeat): begin = time.perf_counter() for i in range(X32.shape[0]): sess.run(None, {'X': X32[i: i + 1]}) end = time.perf_counter() - begin times.append(end / X32.shape[0]) times.sort() obs[f"one-off-{T}"] = sum(times[2:-2]) / (len(times) - 4) data.append(obs) df = DataFrame(data).set_index("N") df.reset_index(drop=False).to_csv( "plot_gexternal_lightgbm_reg_per.csv", index=False) print(df) .. rst-class:: sphx-glr-script-out .. code-block:: none 0%| | 0/28 [00:00` .. container:: sphx-glr-download sphx-glr-download-jupyter :download:`Download Jupyter notebook: plot_gexternal_lightgbm_reg_per.ipynb ` .. only:: html .. rst-class:: sphx-glr-signature `Gallery generated by Sphinx-Gallery `_