Profiling predictions functions#

Overview#

The following plots show the results of the profiling of many classifiers trained on modified iris dataset. There were automatically generated by running the following command line (module mlprodict).

python -m mlprodict asv_bench --location . -n "4,50" -d "1,1000" -o -1 --add_pyspy 1 --runtime "scikit-learn,python_compiled,onnxruntime1" --conf_params "project,asv-skl2onnx;project_url,https://github.com/sdpython/asv-skl2onnx" --models SVC,RandomForestClassifier,DecisionTreeClassifier,AdaBoostClassifier,LogisticRegression,KNeighborsClassifier,MLPClassifier,MultinomialNB,BernoulliNB,OneVsRestClassifier -v 1

It generates many files following the same pattern. The first file works for the module asv.

import numpy  # pylint: disable=W0611
from onnx.defs import onnx_opset_version
# Import specific to this model.
from sklearn.tree import DecisionTreeClassifier

from mlprodict.asv_benchmark import _CommonAsvSklBenchmarkClassifier
from mlprodict.onnx_conv import to_onnx  # pylint: disable=W0611
from mlprodict.onnxrt import OnnxInference  # pylint: disable=W0611

class DecisionTreeClassifier_default_b_cl_benchClassifier(
        _CommonAsvSklBenchmarkClassifier):
    """
    :epkg:`asv` test for a classifier,
    Full template can be found in
    `common_asv_skl.py <https://github.com/sdpython/mlprodict/
    blob/master/mlprodict/asv_benchmark/common_asv_skl.py>`_.
    """
    params = [
        ['skl', 'pyrt', 'ort'],
        (1, 10, 100, 10000, 100000),
        (4, 20),
        [12],
        ['float'],
        [{}],
    ]

    par_modelname = 'DecisionTreeClassifier'
    par_extra = {
        'random_state': 42}
    chk_method_name = 'predict_proba'
    par_scenario = 'default'
    par_problem = 'b-cl'
    par_optimisation = None
    par_convopts = None

    def setup_cache(self):  # pylint: disable=W0235
        super().setup_cache()

    def _create_model(self):
        return DecisionTreeClassifier(
            random_state=42
        )

The second file calls a couple of runtimes (scikit-learn, onnxruntime, mlprodict). The prediction function from scikit-learn is run a number of times equivalent to 20 seconds. The other runtimes are run the same number of times.

from bench_DecisionTreeClassifier_default_b_cl import DecisionTreeClassifier_default_b_cl_benchClassifier
import time
from datetime import datetime

def start():
    cl = DecisionTreeClassifier_default_b_cl_benchClassifier()
    cl.setup_cache()
    return cl

def profile0(iter, cl, runtime, N, nf, opset, dtype, optim):
    begin = time.perf_counter()
    for i in range(0, 100):
        cl.time_predict(runtime, N, nf, opset, dtype, optim)
    duration = time.perf_counter() - begin
    iter = max(100, int(20 / duration * 100)) # 20 seconds
    return iter

def setup_profile0(iter, cl, runtime, N, nf, opset, dtype, optim):
    cl.setup(runtime, N, nf, opset, dtype, optim)
    return profile0(iter, cl, runtime, N, nf, opset, dtype, optim)

def profile(iter, cl, runtime, N, nf, opset, dtype, optim):
    for i in range(iter):
        cl.time_predict(runtime, N, nf, opset, dtype, optim)
    return iter

def setup_profile(iter, cl, runtime, N, nf, opset, dtype, optim):
    cl.setup(runtime, N, nf, opset, dtype, optim)
    return profile(iter, cl, runtime, N, nf, opset, dtype, optim)

cl = start()
iter = None
print(datetime.now(), "begin")

def profile0_skl(iter, cl, N, nf, opset, dtype, optim):
    return setup_profile0(iter, cl, 'skl', N, nf, opset, dtype, optim)
iter = profile0_skl(iter, cl, 1, 4, 12, 'float', '')
print(datetime.now(), "iter", iter)

def profile_skl(iter, cl, N, nf, opset, dtype, optim):
    return setup_profile(iter, cl, 'skl', N, nf, opset, dtype, optim)
profile_skl(iter, cl, 1, 4, 12, 'float', '')
print(datetime.now(), "iter", iter)

def profile_pyrt(iter, cl, N, nf, opset, dtype, optim):
    return setup_profile(iter, cl, 'pyrt', N, nf, opset, dtype, optim)
profile_pyrt(iter, cl, 1, 4, 12, 'float', '')
print(datetime.now(), "iter", iter)

def profile_ort(iter, cl, N, nf, opset, dtype, optim):
    return setup_profile(iter, cl, 'ort', N, nf, opset, dtype, optim)
profile_ort(iter, cl, 1, 4, 12, 'float', '')
print(datetime.now(), "iter", iter)

Then py-spy is used to produce the following profilings with and without option --function.

py-spy record --native --function --rate=10 -o bench_LinReg_default_b_reg_1_4_12_float__fct.svg -- python bench_LinReg_default_b_reg_1_4_12_float__fct.pypy-spy record --native --rate=10 -o bench_DecisionTreeClassifier_default_b_cl_1_20_12_float__line.svg -- python bench_LinReg_default_b_reg_1_4_12_float__fct.py

Results#

They walk through many models, problems and scenarios.